Files
kata-containers/virtcontainers/device.go
Archana Shinde da08a65de3 device: Assign pci address to block device for kata_agent
Store PCI address for a block device on hotplugging it via
virtio-blk. This address will be passed by kata agent in the
device "Id" field. The agent within the guest can then use this
to identify the PCI slot in the guest and create the device node
based on it.

Signed-off-by: Archana Shinde <archana.m.shinde@intel.com>
2018-05-03 10:59:09 -07:00

645 lines
17 KiB
Go

// Copyright (c) 2017 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
package virtcontainers
import (
"encoding/hex"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"strings"
"github.com/go-ini/ini"
"github.com/sirupsen/logrus"
)
const (
// DeviceVFIO is the VFIO device type
DeviceVFIO = "vfio"
// DeviceBlock is the block device type
DeviceBlock = "block"
// DeviceGeneric is a generic device type
DeviceGeneric = "generic"
)
// Defining this as a variable instead of a const, to allow
// overriding this in the tests.
var sysIOMMUPath = "/sys/kernel/iommu_groups"
var sysDevPrefix = "/sys/dev"
const (
vfioPath = "/dev/vfio/"
)
// Device is the virtcontainers device interface.
type Device interface {
attach(hypervisor, *Container) error
detach(hypervisor) error
deviceType() string
}
// DeviceInfo is an embedded type that contains device data common to all types of devices.
type DeviceInfo struct {
// Device path on host
HostPath string
// Device path inside the container
ContainerPath string
// Type of device: c, b, u or p
// c , u - character(unbuffered)
// p - FIFO
// b - block(buffered) special file
// More info in mknod(1).
DevType string
// Major, minor numbers for device.
Major int64
Minor int64
// FileMode permission bits for the device.
FileMode os.FileMode
// id of the device owner.
UID uint32
// id of the device group.
GID uint32
// Hotplugged is used to store device state indicating if the
// device was hotplugged.
Hotplugged bool
// ID for the device that is passed to the hypervisor.
ID string
}
func deviceLogger() *logrus.Entry {
return virtLog.WithField("subsystem", "device")
}
// VFIODevice is a vfio device meant to be passed to the hypervisor
// to be used by the Virtual Machine.
type VFIODevice struct {
DeviceType string
DeviceInfo DeviceInfo
BDF string
}
func newVFIODevice(devInfo DeviceInfo) *VFIODevice {
return &VFIODevice{
DeviceType: DeviceVFIO,
DeviceInfo: devInfo,
}
}
func (device *VFIODevice) attach(h hypervisor, c *Container) error {
vfioGroup := filepath.Base(device.DeviceInfo.HostPath)
iommuDevicesPath := filepath.Join(sysIOMMUPath, vfioGroup, "devices")
deviceFiles, err := ioutil.ReadDir(iommuDevicesPath)
if err != nil {
return err
}
// Pass all devices in iommu group
for _, deviceFile := range deviceFiles {
//Get bdf of device eg 0000:00:1c.0
deviceBDF, err := getBDF(deviceFile.Name())
if err != nil {
return err
}
device.BDF = deviceBDF
randBytes, err := generateRandomBytes(8)
if err != nil {
return err
}
device.DeviceInfo.ID = hex.EncodeToString(randBytes)
if err := h.hotplugAddDevice(*device, vfioDev); err != nil {
deviceLogger().WithError(err).Error("Failed to add device")
return err
}
deviceLogger().WithFields(logrus.Fields{
"device-group": device.DeviceInfo.HostPath,
"device-type": "vfio-passthrough",
}).Info("Device group attached")
}
return nil
}
func (device *VFIODevice) detach(h hypervisor) error {
return nil
}
func (device *VFIODevice) deviceType() string {
return device.DeviceType
}
// VhostUserDeviceType - represents a vhost-user device type
// Currently support just VhostUserNet
type VhostUserDeviceType string
const (
//VhostUserSCSI - SCSI based vhost-user type
VhostUserSCSI = "vhost-user-scsi-pci"
//VhostUserNet - net based vhost-user type
VhostUserNet = "virtio-net-pci"
//VhostUserBlk represents a block vhostuser device type
VhostUserBlk = "vhost-user-blk-pci"
)
// VhostUserDevice represents a vhost-user device. Shared
// attributes of a vhost-user device can be retrieved using
// the Attrs() method. Unique data can be obtained by casting
// the object to the proper type.
type VhostUserDevice interface {
Attrs() *VhostUserDeviceAttrs
Type() string
}
// VhostUserDeviceAttrs represents data shared by most vhost-user devices
type VhostUserDeviceAttrs struct {
DeviceType string
DeviceInfo DeviceInfo
SocketPath string
ID string
}
// VhostUserNetDevice is a network vhost-user based device
type VhostUserNetDevice struct {
VhostUserDeviceAttrs
MacAddress string
}
// Attrs returns the VhostUserDeviceAttrs associated with the vhost-user device
func (vhostUserNetDevice *VhostUserNetDevice) Attrs() *VhostUserDeviceAttrs {
return &vhostUserNetDevice.VhostUserDeviceAttrs
}
// Type returns the type associated with the vhost-user device
func (vhostUserNetDevice *VhostUserNetDevice) Type() string {
return VhostUserNet
}
// VhostUserSCSIDevice is a SCSI vhost-user based device
type VhostUserSCSIDevice struct {
VhostUserDeviceAttrs
}
// Attrs returns the VhostUserDeviceAttrs associated with the vhost-user device
func (vhostUserSCSIDevice *VhostUserSCSIDevice) Attrs() *VhostUserDeviceAttrs {
return &vhostUserSCSIDevice.VhostUserDeviceAttrs
}
// Type returns the type associated with the vhost-user device
func (vhostUserSCSIDevice *VhostUserSCSIDevice) Type() string {
return VhostUserSCSI
}
// VhostUserBlkDevice is a block vhost-user based device
type VhostUserBlkDevice struct {
VhostUserDeviceAttrs
}
// Attrs returns the VhostUserDeviceAttrs associated with the vhost-user device
func (vhostUserBlkDevice *VhostUserBlkDevice) Attrs() *VhostUserDeviceAttrs {
return &vhostUserBlkDevice.VhostUserDeviceAttrs
}
// Type returns the type associated with the vhost-user device
func (vhostUserBlkDevice *VhostUserBlkDevice) Type() string {
return VhostUserBlk
}
// vhostUserAttach handles the common logic among all of the vhost-user device's
// attach functions
func vhostUserAttach(device VhostUserDevice, h hypervisor, c *Container) (err error) {
// generate a unique ID to be used for hypervisor commandline fields
randBytes, err := generateRandomBytes(8)
if err != nil {
return err
}
id := hex.EncodeToString(randBytes)
device.Attrs().ID = id
return h.addDevice(device, vhostuserDev)
}
//
// VhostUserNetDevice's implementation of the device interface:
//
func (vhostUserNetDevice *VhostUserNetDevice) attach(h hypervisor, c *Container) (err error) {
return vhostUserAttach(vhostUserNetDevice, h, c)
}
func (vhostUserNetDevice *VhostUserNetDevice) detach(h hypervisor) error {
return nil
}
func (vhostUserNetDevice *VhostUserNetDevice) deviceType() string {
return vhostUserNetDevice.DeviceType
}
//
// VhostUserBlkDevice's implementation of the device interface:
//
func (vhostUserBlkDevice *VhostUserBlkDevice) attach(h hypervisor, c *Container) (err error) {
return vhostUserAttach(vhostUserBlkDevice, h, c)
}
func (vhostUserBlkDevice *VhostUserBlkDevice) detach(h hypervisor) error {
return nil
}
func (vhostUserBlkDevice *VhostUserBlkDevice) deviceType() string {
return vhostUserBlkDevice.DeviceType
}
//
// VhostUserSCSIDevice's implementation of the device interface:
//
func (vhostUserSCSIDevice *VhostUserSCSIDevice) attach(h hypervisor, c *Container) (err error) {
return vhostUserAttach(vhostUserSCSIDevice, h, c)
}
func (vhostUserSCSIDevice *VhostUserSCSIDevice) detach(h hypervisor) error {
return nil
}
func (vhostUserSCSIDevice *VhostUserSCSIDevice) deviceType() string {
return vhostUserSCSIDevice.DeviceType
}
// Long term, this should be made more configurable. For now matching path
// provided by CNM VPP and OVS-DPDK plugins, available at github.com/clearcontainers/vpp and
// github.com/clearcontainers/ovsdpdk. The plugins create the socket on the host system
// using this path.
const hostSocketSearchPath = "/tmp/vhostuser_%s/vhu.sock"
// findVhostUserNetSocketPath checks if an interface is a dummy placeholder
// for a vhost-user socket, and if it is it returns the path to the socket
func findVhostUserNetSocketPath(netInfo NetworkInfo) (string, error) {
if netInfo.Iface.Name == "lo" {
return "", nil
}
// check for socket file existence at known location.
for _, addr := range netInfo.Addrs {
socketPath := fmt.Sprintf(hostSocketSearchPath, addr.IPNet.IP)
if _, err := os.Stat(socketPath); err == nil {
return socketPath, nil
}
}
return "", nil
}
// vhostUserSocketPath returns the path of the socket discovered. This discovery
// will vary depending on the type of vhost-user socket.
// Today only VhostUserNetDevice is supported.
func vhostUserSocketPath(info interface{}) (string, error) {
switch v := info.(type) {
case NetworkInfo:
return findVhostUserNetSocketPath(v)
default:
return "", nil
}
}
// BlockDevice refers to a block storage device implementation.
type BlockDevice struct {
DeviceType string
DeviceInfo DeviceInfo
// SCSI Address of the block device, in case the device is attached using SCSI driver
// SCSI address is in the format SCSI-Id:LUN
SCSIAddr string
// Path at which the device appears inside the VM, outside of the container mount namespace
VirtPath string
// PCI Slot of the block device
PCIAddr string
}
func newBlockDevice(devInfo DeviceInfo) *BlockDevice {
return &BlockDevice{
DeviceType: DeviceBlock,
DeviceInfo: devInfo,
}
}
func (device *BlockDevice) attach(h hypervisor, c *Container) (err error) {
randBytes, err := generateRandomBytes(8)
if err != nil {
return err
}
device.DeviceInfo.ID = hex.EncodeToString(randBytes)
// Increment the block index for the sandbox. This is used to determine the name
// for the block device in the case where the block device is used as container
// rootfs and the predicted block device name needs to be provided to the agent.
index, err := c.sandbox.getAndSetSandboxBlockIndex()
defer func() {
if err != nil {
c.sandbox.decrementSandboxBlockIndex()
}
}()
if err != nil {
return err
}
drive := Drive{
File: device.DeviceInfo.HostPath,
Format: "raw",
ID: makeNameID("drive", device.DeviceInfo.ID),
Index: index,
}
driveName, err := getVirtDriveName(index)
if err != nil {
return err
}
deviceLogger().WithField("device", device.DeviceInfo.HostPath).Info("Attaching block device")
if err = h.hotplugAddDevice(&drive, blockDev); err != nil {
return err
}
device.DeviceInfo.Hotplugged = true
if c.sandbox.config.HypervisorConfig.BlockDeviceDriver == VirtioBlock {
device.VirtPath = filepath.Join("/dev", driveName)
device.PCIAddr = drive.PCIAddr
} else {
scsiAddr, err := getSCSIAddress(index)
if err != nil {
return err
}
device.SCSIAddr = scsiAddr
}
return nil
}
func (device BlockDevice) detach(h hypervisor) error {
if device.DeviceInfo.Hotplugged {
deviceLogger().WithField("device", device.DeviceInfo.HostPath).Info("Unplugging block device")
drive := &Drive{
ID: makeNameID("drive", device.DeviceInfo.ID),
}
if err := h.hotplugRemoveDevice(drive, blockDev); err != nil {
deviceLogger().WithError(err).Error("Failed to unplug block device")
return err
}
}
return nil
}
func (device *BlockDevice) deviceType() string {
return device.DeviceType
}
// GenericDevice refers to a device that is neither a VFIO device or block device.
type GenericDevice struct {
DeviceType string
DeviceInfo DeviceInfo
}
func newGenericDevice(devInfo DeviceInfo) *GenericDevice {
return &GenericDevice{
DeviceType: DeviceGeneric,
DeviceInfo: devInfo,
}
}
func (device *GenericDevice) attach(h hypervisor, c *Container) error {
return nil
}
func (device *GenericDevice) detach(h hypervisor) error {
return nil
}
func (device *GenericDevice) deviceType() string {
return device.DeviceType
}
// isVFIO checks if the device provided is a vfio group.
func isVFIO(hostPath string) bool {
// Ignore /dev/vfio/vfio character device
if strings.HasPrefix(hostPath, filepath.Join(vfioPath, "vfio")) {
return false
}
if strings.HasPrefix(hostPath, vfioPath) && len(hostPath) > len(vfioPath) {
return true
}
return false
}
// isBlock checks if the device is a block device.
func isBlock(devInfo DeviceInfo) bool {
if devInfo.DevType == "b" {
return true
}
return false
}
func createDevice(devInfo DeviceInfo) Device {
path := devInfo.HostPath
if isVFIO(path) {
return newVFIODevice(devInfo)
} else if isBlock(devInfo) {
return newBlockDevice(devInfo)
} else {
deviceLogger().WithField("device", path).Info("Device has not been passed to the container")
return newGenericDevice(devInfo)
}
}
// GetHostPath is used to fetcg the host path for the device.
// The path passed in the spec refers to the path that should appear inside the container.
// We need to find the actual device path on the host based on the major-minor numbers of the device.
func GetHostPath(devInfo DeviceInfo) (string, error) {
if devInfo.ContainerPath == "" {
return "", fmt.Errorf("Empty path provided for device")
}
var pathComp string
switch devInfo.DevType {
case "c", "u":
pathComp = "char"
case "b":
pathComp = "block"
default:
// Unsupported device types. Return nil error to ignore devices
// that cannot be handled currently.
return "", nil
}
format := strconv.FormatInt(devInfo.Major, 10) + ":" + strconv.FormatInt(devInfo.Minor, 10)
sysDevPath := filepath.Join(sysDevPrefix, pathComp, format, "uevent")
if _, err := os.Stat(sysDevPath); err != nil {
// Some devices(eg. /dev/fuse, /dev/cuse) do not always implement sysfs interface under /sys/dev
// These devices are passed by default by docker.
//
// Simply return the path passed in the device configuration, this does mean that no device renames are
// supported for these devices.
if os.IsNotExist(err) {
return devInfo.ContainerPath, nil
}
return "", err
}
content, err := ini.Load(sysDevPath)
if err != nil {
return "", err
}
devName, err := content.Section("").GetKey("DEVNAME")
if err != nil {
return "", err
}
return filepath.Join("/dev", devName.String()), nil
}
// GetHostPathFunc is function pointer used to mock GetHostPath in tests.
var GetHostPathFunc = GetHostPath
func newDevices(devInfos []DeviceInfo) ([]Device, error) {
var devices []Device
for _, devInfo := range devInfos {
hostPath, err := GetHostPathFunc(devInfo)
if err != nil {
return nil, err
}
devInfo.HostPath = hostPath
device := createDevice(devInfo)
devices = append(devices, device)
}
return devices, nil
}
// getBDF returns the BDF of pci device
// Expected input strng format is [<domain>]:[<bus>][<slot>].[<func>] eg. 0000:02:10.0
func getBDF(deviceSysStr string) (string, error) {
tokens := strings.Split(deviceSysStr, ":")
if len(tokens) != 3 {
return "", fmt.Errorf("Incorrect number of tokens found while parsing bdf for device : %s", deviceSysStr)
}
tokens = strings.SplitN(deviceSysStr, ":", 2)
return tokens[1], nil
}
// bind/unbind paths to aid in SRIOV VF bring-up/restore
var pciDriverUnbindPath = "/sys/bus/pci/devices/%s/driver/unbind"
var pciDriverBindPath = "/sys/bus/pci/drivers/%s/bind"
var vfioRemoveIDPath = "/sys/bus/pci/drivers/vfio-pci/remove_id"
var vfioNewIDPath = "/sys/bus/pci/drivers/vfio-pci/new_id"
// bindDevicetoVFIO binds the device to vfio driver after unbinding from host.
// Will be called by a network interface or a generic pcie device.
func bindDevicetoVFIO(bdf, hostDriver, vendorDeviceID string) error {
// Unbind from the host driver
unbindDriverPath := fmt.Sprintf(pciDriverUnbindPath, bdf)
deviceLogger().WithFields(logrus.Fields{
"device-bdf": bdf,
"driver-path": unbindDriverPath,
}).Info("Unbinding device from driver")
if err := writeToFile(unbindDriverPath, []byte(bdf)); err != nil {
return err
}
// Add device id to vfio driver.
deviceLogger().WithFields(logrus.Fields{
"vendor-device-id": vendorDeviceID,
"vfio-new-id-path": vfioNewIDPath,
}).Info("Writing vendor-device-id to vfio new-id path")
if err := writeToFile(vfioNewIDPath, []byte(vendorDeviceID)); err != nil {
return err
}
// Bind to vfio-pci driver.
bindDriverPath := fmt.Sprintf(pciDriverBindPath, "vfio-pci")
deviceLogger().WithFields(logrus.Fields{
"device-bdf": bdf,
"driver-path": bindDriverPath,
}).Info("Binding device to vfio driver")
// Device may be already bound at this time because of earlier write to new_id, ignore error
writeToFile(bindDriverPath, []byte(bdf))
return nil
}
// bindDevicetoHost binds the device to the host driver driver after unbinding from vfio-pci.
func bindDevicetoHost(bdf, hostDriver, vendorDeviceID string) error {
// Unbind from vfio-pci driver
unbindDriverPath := fmt.Sprintf(pciDriverUnbindPath, bdf)
deviceLogger().WithFields(logrus.Fields{
"device-bdf": bdf,
"driver-path": unbindDriverPath,
}).Info("Unbinding device from driver")
if err := writeToFile(unbindDriverPath, []byte(bdf)); err != nil {
return err
}
// To prevent new VFs from binding to VFIO-PCI, remove_id
if err := writeToFile(vfioRemoveIDPath, []byte(vendorDeviceID)); err != nil {
return err
}
// Bind back to host driver
bindDriverPath := fmt.Sprintf(pciDriverBindPath, hostDriver)
deviceLogger().WithFields(logrus.Fields{
"device-bdf": bdf,
"driver-path": bindDriverPath,
}).Info("Binding back device to host driver")
return writeToFile(bindDriverPath, []byte(bdf))
}