Merge pull request #4492 from zvonkok/pcie-topology

runtime: fix PCIe topology for GPUDirect use-case
This commit is contained in:
Peng Tao
2023-07-03 09:17:12 +08:00
committed by GitHub
35 changed files with 952 additions and 560 deletions

View File

@@ -20,7 +20,7 @@ import (
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/stretchr/testify/assert"
hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors"
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
ktu "github.com/kata-containers/kata-containers/src/runtime/pkg/katatestutils"
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
vcAnnotations "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations"
@@ -308,8 +308,9 @@ func TestCreateContainerConfigFail(t *testing.T) {
assert.Error(err)
}
func createAllRuntimeConfigFiles(dir, hypervisor string) (config string, err error) {
var coldPlugVFIO hv.PCIePort
func createAllRuntimeConfigFiles(dir, hypervisor string) (runtimeConfig string, err error) {
var hotPlugVFIO config.PCIePort
var coldPlugVFIO config.PCIePort
if dir == "" {
return "", fmt.Errorf("BUG: need directory")
}
@@ -330,11 +331,11 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config string, err err
blockDeviceDriver := "virtio-scsi"
enableIOThreads := true
hotplugVFIOOnRootBus := true
pcieRootPort := uint32(2)
disableNewNetNs := false
sharedFS := "virtio-9p"
virtioFSdaemon := path.Join(dir, "virtiofsd")
coldPlugVFIO = hv.RootPort
hotPlugVFIO = config.BridgePort
coldPlugVFIO = config.RootPort
configFileOptions := ktu.RuntimeConfigOptions{
Hypervisor: "qemu",
@@ -349,10 +350,10 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config string, err err
BlockDeviceDriver: blockDeviceDriver,
EnableIOThreads: enableIOThreads,
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
PCIeRootPort: pcieRootPort,
DisableNewNetNs: disableNewNetNs,
SharedFS: sharedFS,
VirtioFSDaemon: virtioFSdaemon,
HotPlugVFIO: hotPlugVFIO,
ColdPlugVFIO: coldPlugVFIO,
}

View File

@@ -125,14 +125,117 @@ const (
// SysDevPrefix is static string of /sys/dev
var SysDevPrefix = "/sys/dev"
// SysIOMMUPath is static string of /sys/kernel/iommu_groups
var SysIOMMUPath = "/sys/kernel/iommu_groups"
// SysIOMMUGroupPath is static string of /sys/kernel/iommu_groups
var SysIOMMUGroupPath = "/sys/kernel/iommu_groups"
// SysBusPciDevicesPath is static string of /sys/bus/pci/devices
var SysBusPciDevicesPath = "/sys/bus/pci/devices"
var getSysDevPath = getSysDevPathImpl
// PCIePortBusPrefix gives us the correct bus nameing dependeing on the port
// used to hot(cold)-plug the device
type PCIePortBusPrefix string
const (
PCIeRootPortPrefix PCIePortBusPrefix = "rp"
PCIeSwitchPortPrefix PCIePortBusPrefix = "sw"
PCIeSwitchUpstreamPortPrefix PCIePortBusPrefix = "swup"
PCIeSwitchhDownstreamPortPrefix PCIePortBusPrefix = "swdp"
PCIBridgePortPrefix PCIePortBusPrefix = "bp"
)
func (p PCIePortBusPrefix) String() string {
switch p {
case PCIeRootPortPrefix:
fallthrough
case PCIeSwitchPortPrefix:
fallthrough
case PCIeSwitchUpstreamPortPrefix:
fallthrough
case PCIeSwitchhDownstreamPortPrefix:
fallthrough
case PCIBridgePortPrefix:
return string(p)
}
return fmt.Sprintf("<unknown PCIePortBusPrefix: %s>", string(p))
}
// PCIePort distinguish only between root and switch port
type PCIePort string
const (
// RootPort attach VFIO devices to a root-port
RootPort PCIePort = "root-port"
// SwitchPort attach VFIO devices to a switch-port
SwitchPort = "switch-port"
// BridgePort is the default
BridgePort = "bridge-port"
// NoPort is for disabling VFIO hotplug/coldplug
NoPort = "no-port"
// InvalidPort is for invalid port
InvalidPort = "invalid-port"
)
func (p PCIePort) String() string {
switch p {
case RootPort:
fallthrough
case SwitchPort:
fallthrough
case BridgePort:
fallthrough
case NoPort:
fallthrough
case InvalidPort:
return string(p)
}
return fmt.Sprintf("<unknown PCIePort: %s>", string(p))
}
var PCIePortPrefixMapping = map[PCIePort]PCIePortBusPrefix{
RootPort: PCIeRootPortPrefix,
SwitchPort: PCIeSwitchhDownstreamPortPrefix,
BridgePort: PCIBridgePortPrefix,
}
func (p PCIePort) Invalid() bool {
switch p {
case RootPort:
fallthrough
case SwitchPort:
fallthrough
case BridgePort:
fallthrough
case NoPort:
return false
}
return true
}
func (p PCIePort) Valid() bool {
switch p {
case RootPort:
fallthrough
case SwitchPort:
fallthrough
case BridgePort:
fallthrough
case NoPort:
return true
}
return false
}
type PCIePortMapping map[string]bool
var (
// Each of this structures keeps track of the devices attached to the
// different types of PCI ports. We can deduces the Bus number from it
// and eliminate duplicates being assigned.
PCIeDevices = map[PCIePort]PCIePortMapping{}
)
// DeviceInfo is an embedded type that contains device data common to all types of devices.
type DeviceInfo struct {
// DriverOptions is specific options for each device driver
@@ -178,6 +281,9 @@ type DeviceInfo struct {
// ColdPlug specifies whether the device must be cold plugged (true)
// or hot plugged (false).
ColdPlug bool
// Specifies the PCIe port type to which the device is attached
Port PCIePort
}
// BlockDrive represents a block storage drive which may be used in case the storage
@@ -279,14 +385,8 @@ const (
VFIOAPDeviceMediatedType
)
type VFIODev interface {
GetID() *string
GetType() VFIODeviceType
GetSysfsDev() *string
}
// VFIOPCIDev represents a VFIO PCI device used for hotplugging
type VFIOPCIDev struct {
// VFIODev represents a VFIO PCI device used for hotplugging
type VFIODev struct {
// ID is used to identify this drive in the hypervisor options.
ID string
@@ -316,44 +416,15 @@ type VFIOPCIDev struct {
// IsPCIe specifies device is PCIe or PCI
IsPCIe bool
}
func (d VFIOPCIDev) GetID() *string {
return &d.ID
}
func (d VFIOPCIDev) GetType() VFIODeviceType {
return d.Type
}
func (d VFIOPCIDev) GetSysfsDev() *string {
return &d.SysfsDev
}
type VFIOAPDev struct {
// ID is used to identify this drive in the hypervisor options.
ID string
// sysfsdev of VFIO mediated device
SysfsDev string
// APDevices are the Adjunct Processor devices assigned to the mdev
APDevices []string
// Type of VFIO device
Type VFIODeviceType
}
// Rank identifies a device in a IOMMU group
Rank int
func (d VFIOAPDev) GetID() *string {
return &d.ID
}
func (d VFIOAPDev) GetType() VFIODeviceType {
return d.Type
}
func (d VFIOAPDev) GetSysfsDev() *string {
return &d.SysfsDev
// Port is the PCIe port type to which the device is attached
Port PCIePort
}
// RNGDev represents a random number generator device

View File

@@ -47,9 +47,9 @@ func deviceLogger() *logrus.Entry {
return api.DeviceLogger()
}
// Identify PCIe device by reading the size of the PCI config space
// IsPCIeDevice identifies PCIe device by reading the size of the PCI config space
// Plain PCI device have 256 bytes of config space where PCIe devices have 4K
func isPCIeDevice(bdf string) bool {
func IsPCIeDevice(bdf string) bool {
if len(strings.Split(bdf, ":")) == 2 {
bdf = PCIDomain + ":" + bdf
}
@@ -157,14 +157,12 @@ func checkIgnorePCIClass(pciClass string, deviceBDF string, bitmask uint64) (boo
// GetAllVFIODevicesFromIOMMUGroup returns all the VFIO devices in the IOMMU group
// We can reuse this function at various levels, sandbox, container.
// Only the VFIO module is allowed to do bus assignments, all other modules need to
// ignore it if used as helper function to get VFIO information.
func GetAllVFIODevicesFromIOMMUGroup(device config.DeviceInfo, ignoreBusAssignment bool) ([]*config.VFIODev, error) {
func GetAllVFIODevicesFromIOMMUGroup(device config.DeviceInfo) ([]*config.VFIODev, error) {
vfioDevs := []*config.VFIODev{}
vfioGroup := filepath.Base(device.HostPath)
iommuDevicesPath := filepath.Join(config.SysIOMMUPath, vfioGroup, "devices")
iommuDevicesPath := filepath.Join(config.SysIOMMUGroupPath, vfioGroup, "devices")
deviceFiles, err := os.ReadDir(iommuDevicesPath)
if err != nil {
@@ -174,7 +172,7 @@ func GetAllVFIODevicesFromIOMMUGroup(device config.DeviceInfo, ignoreBusAssignme
// Pass all devices in iommu group
for i, deviceFile := range deviceFiles {
//Get bdf of device eg 0000:00:1c.0
deviceBDF, deviceSysfsDev, vfioDeviceType, err := getVFIODetails(deviceFile.Name(), iommuDevicesPath)
deviceBDF, deviceSysfsDev, vfioDeviceType, err := GetVFIODetails(deviceFile.Name(), iommuDevicesPath)
if err != nil {
return nil, err
}
@@ -196,27 +194,24 @@ func GetAllVFIODevicesFromIOMMUGroup(device config.DeviceInfo, ignoreBusAssignme
switch vfioDeviceType {
case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType:
isPCIe := isPCIeDevice(deviceBDF)
// Do not directly assign to `vfio` -- need to access field still
vfioPCI := config.VFIOPCIDev{
vfio = config.VFIODev{
ID: id,
Type: vfioDeviceType,
BDF: deviceBDF,
SysfsDev: deviceSysfsDev,
IsPCIe: isPCIe,
IsPCIe: IsPCIeDevice(deviceBDF),
Class: pciClass,
Rank: -1,
Port: device.Port,
}
if isPCIe && !ignoreBusAssignment {
vfioPCI.Bus = fmt.Sprintf("%s%d", pcieRootPortPrefix, len(AllPCIeDevs))
AllPCIeDevs[deviceBDF] = true
}
vfio = vfioPCI
case config.VFIOAPDeviceMediatedType:
devices, err := GetAPVFIODevices(deviceSysfsDev)
if err != nil {
return nil, err
}
vfio = config.VFIOAPDev{
vfio = config.VFIODev{
ID: id,
SysfsDev: deviceSysfsDev,
Type: config.VFIOAPDeviceMediatedType,

View File

@@ -28,14 +28,9 @@ const (
vfioRemoveIDPath = "/sys/bus/pci/drivers/vfio-pci/remove_id"
iommuGroupPath = "/sys/bus/pci/devices/%s/iommu_group"
vfioDevPath = "/dev/vfio/%s"
pcieRootPortPrefix = "rp"
vfioAPSysfsDir = "/sys/devices/vfio_ap"
)
var (
AllPCIeDevs = map[string]bool{}
)
// VFIODevice is a vfio device meant to be passed to the hypervisor
// to be used by the Virtual Machine.
type VFIODevice struct {
@@ -70,10 +65,17 @@ func (device *VFIODevice) Attach(ctx context.Context, devReceiver api.DeviceRece
}
}()
device.VfioDevs, err = GetAllVFIODevicesFromIOMMUGroup(*device.DeviceInfo, false)
device.VfioDevs, err = GetAllVFIODevicesFromIOMMUGroup(*device.DeviceInfo)
if err != nil {
return err
}
for _, vfio := range device.VfioDevs {
if vfio.IsPCIe {
busIndex := len(config.PCIeDevices[vfio.Port])
vfio.Bus = fmt.Sprintf("%s%d", config.PCIePortPrefixMapping[vfio.Port], busIndex)
config.PCIeDevices[vfio.Port][vfio.BDF] = true
}
}
coldPlug := device.DeviceInfo.ColdPlug
deviceLogger().WithField("cold-plug", coldPlug).Info("Attaching VFIO device")
@@ -169,23 +171,18 @@ func (device *VFIODevice) Load(ds config.DeviceState) {
for _, dev := range ds.VFIODevs {
var vfio config.VFIODev
vfioDeviceType := (*device.VfioDevs[0]).GetType()
switch vfioDeviceType {
switch dev.Type {
case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType:
bdf := ""
if pciDev, ok := (*dev).(config.VFIOPCIDev); ok {
bdf = pciDev.BDF
}
vfio = config.VFIOPCIDev{
ID: *(*dev).GetID(),
Type: config.VFIODeviceType((*dev).GetType()),
BDF: bdf,
SysfsDev: *(*dev).GetSysfsDev(),
vfio = config.VFIODev{
ID: dev.ID,
Type: config.VFIODeviceType(dev.Type),
BDF: dev.BDF,
SysfsDev: dev.SysfsDev,
}
case config.VFIOAPDeviceMediatedType:
vfio = config.VFIOAPDev{
ID: *(*dev).GetID(),
SysfsDev: *(*dev).GetSysfsDev(),
vfio = config.VFIODev{
ID: dev.ID,
SysfsDev: dev.SysfsDev,
}
default:
deviceLogger().WithError(
@@ -200,7 +197,7 @@ func (device *VFIODevice) Load(ds config.DeviceState) {
// It should implement GetAttachCount() and DeviceID() as api.Device implementation
// here it shares function from *GenericDevice so we don't need duplicate codes
func getVFIODetails(deviceFileName, iommuDevicesPath string) (deviceBDF, deviceSysfsDev string, vfioDeviceType config.VFIODeviceType, err error) {
func GetVFIODetails(deviceFileName, iommuDevicesPath string) (deviceBDF, deviceSysfsDev string, vfioDeviceType config.VFIODeviceType, err error) {
sysfsDevStr := filepath.Join(iommuDevicesPath, deviceFileName)
vfioDeviceType, err = GetVFIODeviceType(sysfsDevStr)
if err != nil {
@@ -210,14 +207,18 @@ func getVFIODetails(deviceFileName, iommuDevicesPath string) (deviceBDF, deviceS
switch vfioDeviceType {
case config.VFIOPCIDeviceNormalType:
// Get bdf of device eg. 0000:00:1c.0
deviceBDF = getBDF(deviceFileName)
// OLD IMPL: deviceBDF = getBDF(deviceFileName)
// The old implementation did not consider the case where
// vfio devices are located on different root busses. The
// kata-agent will handle the case now, here, use the full PCI addr
deviceBDF = deviceFileName
// Get sysfs path used by cloud-hypervisor
deviceSysfsDev = filepath.Join(config.SysBusPciDevicesPath, deviceFileName)
case config.VFIOPCIDeviceMediatedType:
// Get sysfsdev of device eg. /sys/devices/pci0000:00/0000:00:02.0/f79944e4-5a3d-11e8-99ce-479cbab002e4
sysfsDevStr := filepath.Join(iommuDevicesPath, deviceFileName)
deviceSysfsDev, err = GetSysfsDev(sysfsDevStr)
deviceBDF = getBDF(getMediatedBDF(deviceSysfsDev))
deviceBDF = GetBDF(getMediatedBDF(deviceSysfsDev))
case config.VFIOAPDeviceMediatedType:
sysfsDevStr := filepath.Join(iommuDevicesPath, deviceFileName)
deviceSysfsDev, err = GetSysfsDev(sysfsDevStr)
@@ -240,7 +241,7 @@ func getMediatedBDF(deviceSysfsDev string) string {
// getBDF returns the BDF of pci device
// Expected input string format is [<domain>]:[<bus>][<slot>].[<func>] eg. 0000:02:10.0
func getBDF(deviceSysStr string) string {
func GetBDF(deviceSysStr string) string {
tokens := strings.SplitN(deviceSysStr, ":", 2)
if len(tokens) == 1 {
return ""

View File

@@ -20,7 +20,7 @@ func TestGetVFIODetails(t *testing.T) {
}
data := []testData{
{"0000:02:10.0", "02:10.0"},
{"0000:02:10.0", "0000:02:10.0"},
{"0000:0210.0", ""},
{"f79944e4-5a3d-11e8-99ce-", ""},
{"f79944e4-5a3d-11e8-99ce", ""},
@@ -29,7 +29,7 @@ func TestGetVFIODetails(t *testing.T) {
}
for _, d := range data {
deviceBDF, deviceSysfsDev, vfioDeviceType, err := getVFIODetails(d.deviceStr, "")
deviceBDF, deviceSysfsDev, vfioDeviceType, err := GetVFIODetails(d.deviceStr, "")
switch vfioDeviceType {
case config.VFIOPCIDeviceNormalType:

View File

@@ -71,7 +71,11 @@ func NewDeviceManager(blockDriver string, vhostUserStoreEnabled bool, vhostUserS
dm.blockDriver = config.VirtioSCSI
}
drivers.AllPCIeDevs = make(map[string]bool)
config.PCIeDevices = make(map[config.PCIePort]config.PCIePortMapping)
config.PCIeDevices[config.RootPort] = make(map[string]bool)
config.PCIeDevices[config.SwitchPort] = make(map[string]bool)
config.PCIeDevices[config.BridgePort] = make(map[string]bool)
for _, dev := range devices {
dm.devices[dev.DeviceID()] = dev
@@ -118,7 +122,7 @@ func (dm *deviceManager) createDevice(devInfo config.DeviceInfo) (dev api.Device
}
if IsVFIO(devInfo.HostPath) {
return drivers.NewVFIODevice(&devInfo), nil
} else if isVhostUserBlk(devInfo) {
} else if IsVhostUserBlk(devInfo) {
if devInfo.DriverOptions == nil {
devInfo.DriverOptions = make(map[string]string)
}

View File

@@ -116,14 +116,14 @@ func TestAttachVFIODevice(t *testing.T) {
_, err = os.Create(deviceConfigFile)
assert.Nil(t, err)
savedIOMMUPath := config.SysIOMMUPath
config.SysIOMMUPath = tmpDir
savedIOMMUPath := config.SysIOMMUGroupPath
config.SysIOMMUGroupPath = tmpDir
savedSysBusPciDevicesPath := config.SysBusPciDevicesPath
config.SysBusPciDevicesPath = devicesDir
defer func() {
config.SysIOMMUPath = savedIOMMUPath
config.SysIOMMUGroupPath = savedIOMMUPath
config.SysBusPciDevicesPath = savedSysBusPciDevicesPath
}()

View File

@@ -37,7 +37,7 @@ func isBlock(devInfo config.DeviceInfo) bool {
}
// isVhostUserBlk checks if the device is a VhostUserBlk device.
func isVhostUserBlk(devInfo config.DeviceInfo) bool {
func IsVhostUserBlk(devInfo config.DeviceInfo) bool {
return devInfo.DevType == "b" && devInfo.Major == config.VhostUserBlkMajor
}

View File

@@ -70,7 +70,7 @@ func TestIsVhostUserBlk(t *testing.T) {
}
for _, d := range data {
isVhostUserBlk := isVhostUserBlk(
isVhostUserBlk := IsVhostUserBlk(
config.DeviceInfo{
DevType: d.devType,
Major: d.major,

View File

@@ -123,6 +123,14 @@ const (
// PCIeRootPort is a PCIe Root Port, the PCIe device should be hotplugged to this port.
PCIeRootPort DeviceDriver = "pcie-root-port"
// PCIeSwitchUpstreamPort is a PCIe switch upstream port
// A upstream port connects to a PCIe Root Port
PCIeSwitchUpstreamPort DeviceDriver = "x3130-upstream"
// PCIeSwitchDownstreamPort is a PCIe switch downstream port
// PCIe devices can be hot-plugged to the downstream port.
PCIeSwitchDownstreamPort DeviceDriver = "xio3130-downstream"
// Loader is the Loader device driver.
Loader DeviceDriver = "loader"
@@ -236,6 +244,7 @@ const (
// SecExecGuest represents an s390x Secure Execution (Protected Virtualization in QEMU) object
SecExecGuest ObjectType = "s390-pv-guest"
// PEFGuest represent ppc64le PEF(Protected Execution Facility) object.
PEFGuest ObjectType = "pef-guest"
)
@@ -369,7 +378,6 @@ func (object Object) QemuParams(config *Config) []string {
deviceParams = append(deviceParams, string(object.Driver))
deviceParams = append(deviceParams, fmt.Sprintf("id=%s", object.DeviceID))
deviceParams = append(deviceParams, fmt.Sprintf("host-path=%s", object.File))
}
if len(deviceParams) > 0 {
@@ -1681,6 +1689,106 @@ func (b PCIeRootPortDevice) Valid() bool {
return true
}
// PCIeSwitchUpstreamPortDevice is the port connecting to the root port
type PCIeSwitchUpstreamPortDevice struct {
ID string // format: sup{n}, n>=0
Bus string // default is rp0
}
// QemuParams returns the qemu parameters built out of the PCIeSwitchUpstreamPortDevice.
func (b PCIeSwitchUpstreamPortDevice) QemuParams(config *Config) []string {
var qemuParams []string
var deviceParams []string
driver := PCIeSwitchUpstreamPort
deviceParams = append(deviceParams, fmt.Sprintf("%s,id=%s", driver, b.ID))
deviceParams = append(deviceParams, fmt.Sprintf("bus=%s", b.Bus))
qemuParams = append(qemuParams, "-device")
qemuParams = append(qemuParams, strings.Join(deviceParams, ","))
return qemuParams
}
// Valid returns true if the PCIeSwitchUpstreamPortDevice structure is valid and complete.
func (b PCIeSwitchUpstreamPortDevice) Valid() bool {
if b.ID == "" {
return false
}
if b.Bus == "" {
return false
}
return true
}
// PCIeSwitchDownstreamPortDevice is the port connecting to the root port
type PCIeSwitchDownstreamPortDevice struct {
ID string // format: sup{n}, n>=0
Bus string // default is rp0
Chassis string // (slot, chassis) pair is mandatory and must be unique for each downstream port, >=0, default is 0x00
Slot string // >=0, default is 0x00
// This to work needs patches to QEMU
BusReserve string
// Pref64 and Pref32 are not allowed to be set simultaneously
Pref64Reserve string // reserve prefetched MMIO aperture, 64-bit
Pref32Reserve string // reserve prefetched MMIO aperture, 32-bit
MemReserve string // reserve non-prefetched MMIO aperture, 32-bit *only*
IOReserve string // IO reservation
}
// QemuParams returns the qemu parameters built out of the PCIeSwitchUpstreamPortDevice.
func (b PCIeSwitchDownstreamPortDevice) QemuParams(config *Config) []string {
var qemuParams []string
var deviceParams []string
driver := PCIeSwitchDownstreamPort
deviceParams = append(deviceParams, fmt.Sprintf("%s,id=%s", driver, b.ID))
deviceParams = append(deviceParams, fmt.Sprintf("bus=%s", b.Bus))
deviceParams = append(deviceParams, fmt.Sprintf("chassis=%s", b.Chassis))
deviceParams = append(deviceParams, fmt.Sprintf("slot=%s", b.Slot))
if b.BusReserve != "" {
deviceParams = append(deviceParams, fmt.Sprintf("bus-reserve=%s", b.BusReserve))
}
if b.Pref64Reserve != "" {
deviceParams = append(deviceParams, fmt.Sprintf("pref64-reserve=%s", b.Pref64Reserve))
}
if b.Pref32Reserve != "" {
deviceParams = append(deviceParams, fmt.Sprintf("pref32-reserve=%s", b.Pref32Reserve))
}
if b.MemReserve != "" {
deviceParams = append(deviceParams, fmt.Sprintf("mem-reserve=%s", b.MemReserve))
}
if b.IOReserve != "" {
deviceParams = append(deviceParams, fmt.Sprintf("io-reserve=%s", b.IOReserve))
}
qemuParams = append(qemuParams, "-device")
qemuParams = append(qemuParams, strings.Join(deviceParams, ","))
return qemuParams
}
// Valid returns true if the PCIeSwitchUpstremPortDevice structure is valid and complete.
func (b PCIeSwitchDownstreamPortDevice) Valid() bool {
if b.ID == "" {
return false
}
if b.Bus == "" {
return false
}
if b.Chassis == "" {
return false
}
if b.Slot == "" {
return false
}
return true
}
// VFIODevice represents a qemu vfio device meant for direct access by guest OS.
type VFIODevice struct {
// Bus-Device-Function of device

View File

@@ -5,7 +5,7 @@
package hypervisors
import "fmt"
import "github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
// Bridge is a bridge where devices can be hot plugged
type Bridge struct {
@@ -28,37 +28,8 @@ type CPUDevice struct {
ID string
}
// PCIePort distinguish only between root and switch port
type PCIePort string
const (
// RootPort attach VFIO devices to a root-port
RootPort PCIePort = "root-port"
// SwitchPort attach VFIO devices to a switch-port
SwitchPort = "switch-port"
// BridgePort is the default
BridgePort = "bridge-port"
// NoPort is for disabling VFIO hotplug/coldplug
NoPort = "no-port"
)
func (p PCIePort) String() string {
switch p {
case RootPort:
return "root-port"
case SwitchPort:
return "switch-port"
case BridgePort:
return "bridge-port"
case NoPort:
return "no-port"
}
return fmt.Sprintf("<unknown PCIePort: %s>", string(p))
}
type HypervisorState struct {
BlockIndexMap map[int]struct{}
// Type of hypervisor, E.g. qemu/firecracker/acrn.
Type string
UUID string
@@ -74,7 +45,7 @@ type HypervisorState struct {
HotpluggedMemory int
VirtiofsDaemonPid int
Pid int
PCIeRootPort int
ColdPlugVFIO PCIePort
HotPlugVFIO config.PCIePort
ColdPlugVFIO config.PCIePort
HotplugVFIOOnRootBus bool
}

View File

@@ -14,7 +14,7 @@ import (
"strconv"
"testing"
hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors"
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/stretchr/testify/assert"
)
@@ -224,8 +224,8 @@ type RuntimeConfigOptions struct {
JaegerUser string
JaegerPassword string
PFlash []string
PCIeRootPort uint32
ColdPlugVFIO hv.PCIePort
HotPlugVFIO config.PCIePort
ColdPlugVFIO config.PCIePort
DefaultVCPUCount uint32
DefaultMaxVCPUCount uint32
DefaultMemSize uint32
@@ -318,7 +318,6 @@ func MakeRuntimeConfigFileData(config RuntimeConfigOptions) string {
disable_block_device_use = ` + strconv.FormatBool(config.DisableBlock) + `
enable_iothreads = ` + strconv.FormatBool(config.EnableIOThreads) + `
hotplug_vfio_on_root_bus = ` + strconv.FormatBool(config.HotplugVFIOOnRootBus) + `
pcie_root_port = ` + strconv.FormatUint(uint64(config.PCIeRootPort), 10) + `
cold_plug_vfio = "` + config.ColdPlugVFIO.String() + `"
msize_9p = ` + strconv.FormatUint(uint64(config.DefaultMsize9p), 10) + `
enable_debug = ` + strconv.FormatBool(config.HypervisorDebug) + `

View File

@@ -10,7 +10,7 @@
package katautils
import (
hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors"
config "github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
)
// name is the name of the runtime
@@ -82,7 +82,6 @@ const defaultEnableDebug bool = false
const defaultDisableNestingChecks bool = false
const defaultMsize9p uint32 = 8192
const defaultHotplugVFIOOnRootBus bool = false
const defaultPCIeRootPort = 0
const defaultEntropySource = "/dev/urandom"
const defaultGuestHookPath string = ""
const defaultVirtioFSCacheMode = "never"
@@ -108,4 +107,5 @@ const defaultVMCacheEndpoint string = "/var/run/kata-containers/cache.sock"
// Default config file used by stateless systems.
var defaultRuntimeConfiguration = "@CONFIG_PATH@"
const defaultColdPlugVFIO = hv.NoPort
const defaultHotPlugVFIO = config.NoPort
const defaultColdPlugVFIO = config.NoPort

View File

@@ -20,7 +20,6 @@ import (
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
"github.com/kata-containers/kata-containers/src/runtime/pkg/govmm"
govmmQemu "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm/qemu"
hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors"
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace"
"github.com/kata-containers/kata-containers/src/runtime/pkg/oci"
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
@@ -78,88 +77,88 @@ type factory struct {
}
type hypervisor struct {
Path string `toml:"path"`
JailerPath string `toml:"jailer_path"`
Kernel string `toml:"kernel"`
CtlPath string `toml:"ctlpath"`
Initrd string `toml:"initrd"`
Image string `toml:"image"`
RootfsType string `toml:"rootfs_type"`
Firmware string `toml:"firmware"`
FirmwareVolume string `toml:"firmware_volume"`
MachineAccelerators string `toml:"machine_accelerators"`
CPUFeatures string `toml:"cpu_features"`
KernelParams string `toml:"kernel_params"`
MachineType string `toml:"machine_type"`
BlockDeviceDriver string `toml:"block_device_driver"`
EntropySource string `toml:"entropy_source"`
SharedFS string `toml:"shared_fs"`
VirtioFSDaemon string `toml:"virtio_fs_daemon"`
VirtioFSCache string `toml:"virtio_fs_cache"`
VhostUserStorePath string `toml:"vhost_user_store_path"`
FileBackedMemRootDir string `toml:"file_mem_backend"`
GuestHookPath string `toml:"guest_hook_path"`
GuestMemoryDumpPath string `toml:"guest_memory_dump_path"`
SeccompSandbox string `toml:"seccompsandbox"`
BlockDeviceAIO string `toml:"block_device_aio"`
HypervisorPathList []string `toml:"valid_hypervisor_paths"`
JailerPathList []string `toml:"valid_jailer_paths"`
CtlPathList []string `toml:"valid_ctlpaths"`
VirtioFSDaemonList []string `toml:"valid_virtio_fs_daemon_paths"`
VirtioFSExtraArgs []string `toml:"virtio_fs_extra_args"`
PFlashList []string `toml:"pflashes"`
VhostUserStorePathList []string `toml:"valid_vhost_user_store_paths"`
FileBackedMemRootList []string `toml:"valid_file_mem_backends"`
EntropySourceList []string `toml:"valid_entropy_sources"`
EnableAnnotations []string `toml:"enable_annotations"`
RxRateLimiterMaxRate uint64 `toml:"rx_rate_limiter_max_rate"`
TxRateLimiterMaxRate uint64 `toml:"tx_rate_limiter_max_rate"`
MemOffset uint64 `toml:"memory_offset"`
DefaultMaxMemorySize uint64 `toml:"default_maxmemory"`
DiskRateLimiterBwMaxRate int64 `toml:"disk_rate_limiter_bw_max_rate"`
DiskRateLimiterBwOneTimeBurst int64 `toml:"disk_rate_limiter_bw_one_time_burst"`
DiskRateLimiterOpsMaxRate int64 `toml:"disk_rate_limiter_ops_max_rate"`
DiskRateLimiterOpsOneTimeBurst int64 `toml:"disk_rate_limiter_ops_one_time_burst"`
NetRateLimiterBwMaxRate int64 `toml:"net_rate_limiter_bw_max_rate"`
NetRateLimiterBwOneTimeBurst int64 `toml:"net_rate_limiter_bw_one_time_burst"`
NetRateLimiterOpsMaxRate int64 `toml:"net_rate_limiter_ops_max_rate"`
NetRateLimiterOpsOneTimeBurst int64 `toml:"net_rate_limiter_ops_one_time_burst"`
VirtioFSCacheSize uint32 `toml:"virtio_fs_cache_size"`
VirtioFSQueueSize uint32 `toml:"virtio_fs_queue_size"`
DefaultMaxVCPUs uint32 `toml:"default_maxvcpus"`
MemorySize uint32 `toml:"default_memory"`
MemSlots uint32 `toml:"memory_slots"`
DefaultBridges uint32 `toml:"default_bridges"`
Msize9p uint32 `toml:"msize_9p"`
PCIeRootPort uint32 `toml:"pcie_root_port"`
NumVCPUs int32 `toml:"default_vcpus"`
BlockDeviceCacheSet bool `toml:"block_device_cache_set"`
BlockDeviceCacheDirect bool `toml:"block_device_cache_direct"`
BlockDeviceCacheNoflush bool `toml:"block_device_cache_noflush"`
EnableVhostUserStore bool `toml:"enable_vhost_user_store"`
VhostUserDeviceReconnect uint32 `toml:"vhost_user_reconnect_timeout_sec"`
DisableBlockDeviceUse bool `toml:"disable_block_device_use"`
MemPrealloc bool `toml:"enable_mem_prealloc"`
HugePages bool `toml:"enable_hugepages"`
VirtioMem bool `toml:"enable_virtio_mem"`
IOMMU bool `toml:"enable_iommu"`
IOMMUPlatform bool `toml:"enable_iommu_platform"`
Debug bool `toml:"enable_debug"`
DisableNestingChecks bool `toml:"disable_nesting_checks"`
EnableIOThreads bool `toml:"enable_iothreads"`
DisableImageNvdimm bool `toml:"disable_image_nvdimm"`
HotplugVFIOOnRootBus bool `toml:"hotplug_vfio_on_root_bus"`
ColdPlugVFIO hv.PCIePort `toml:"cold_plug_vfio"`
DisableVhostNet bool `toml:"disable_vhost_net"`
GuestMemoryDumpPaging bool `toml:"guest_memory_dump_paging"`
ConfidentialGuest bool `toml:"confidential_guest"`
SevSnpGuest bool `toml:"sev_snp_guest"`
GuestSwap bool `toml:"enable_guest_swap"`
Rootless bool `toml:"rootless"`
DisableSeccomp bool `toml:"disable_seccomp"`
DisableSeLinux bool `toml:"disable_selinux"`
DisableGuestSeLinux bool `toml:"disable_guest_selinux"`
LegacySerial bool `toml:"use_legacy_serial"`
Path string `toml:"path"`
JailerPath string `toml:"jailer_path"`
Kernel string `toml:"kernel"`
CtlPath string `toml:"ctlpath"`
Initrd string `toml:"initrd"`
Image string `toml:"image"`
RootfsType string `toml:"rootfs_type"`
Firmware string `toml:"firmware"`
FirmwareVolume string `toml:"firmware_volume"`
MachineAccelerators string `toml:"machine_accelerators"`
CPUFeatures string `toml:"cpu_features"`
KernelParams string `toml:"kernel_params"`
MachineType string `toml:"machine_type"`
BlockDeviceDriver string `toml:"block_device_driver"`
EntropySource string `toml:"entropy_source"`
SharedFS string `toml:"shared_fs"`
VirtioFSDaemon string `toml:"virtio_fs_daemon"`
VirtioFSCache string `toml:"virtio_fs_cache"`
VhostUserStorePath string `toml:"vhost_user_store_path"`
FileBackedMemRootDir string `toml:"file_mem_backend"`
GuestHookPath string `toml:"guest_hook_path"`
GuestMemoryDumpPath string `toml:"guest_memory_dump_path"`
SeccompSandbox string `toml:"seccompsandbox"`
BlockDeviceAIO string `toml:"block_device_aio"`
HypervisorPathList []string `toml:"valid_hypervisor_paths"`
JailerPathList []string `toml:"valid_jailer_paths"`
CtlPathList []string `toml:"valid_ctlpaths"`
VirtioFSDaemonList []string `toml:"valid_virtio_fs_daemon_paths"`
VirtioFSExtraArgs []string `toml:"virtio_fs_extra_args"`
PFlashList []string `toml:"pflashes"`
VhostUserStorePathList []string `toml:"valid_vhost_user_store_paths"`
FileBackedMemRootList []string `toml:"valid_file_mem_backends"`
EntropySourceList []string `toml:"valid_entropy_sources"`
EnableAnnotations []string `toml:"enable_annotations"`
RxRateLimiterMaxRate uint64 `toml:"rx_rate_limiter_max_rate"`
TxRateLimiterMaxRate uint64 `toml:"tx_rate_limiter_max_rate"`
MemOffset uint64 `toml:"memory_offset"`
DefaultMaxMemorySize uint64 `toml:"default_maxmemory"`
DiskRateLimiterBwMaxRate int64 `toml:"disk_rate_limiter_bw_max_rate"`
DiskRateLimiterBwOneTimeBurst int64 `toml:"disk_rate_limiter_bw_one_time_burst"`
DiskRateLimiterOpsMaxRate int64 `toml:"disk_rate_limiter_ops_max_rate"`
DiskRateLimiterOpsOneTimeBurst int64 `toml:"disk_rate_limiter_ops_one_time_burst"`
NetRateLimiterBwMaxRate int64 `toml:"net_rate_limiter_bw_max_rate"`
NetRateLimiterBwOneTimeBurst int64 `toml:"net_rate_limiter_bw_one_time_burst"`
NetRateLimiterOpsMaxRate int64 `toml:"net_rate_limiter_ops_max_rate"`
NetRateLimiterOpsOneTimeBurst int64 `toml:"net_rate_limiter_ops_one_time_burst"`
VirtioFSCacheSize uint32 `toml:"virtio_fs_cache_size"`
VirtioFSQueueSize uint32 `toml:"virtio_fs_queue_size"`
DefaultMaxVCPUs uint32 `toml:"default_maxvcpus"`
MemorySize uint32 `toml:"default_memory"`
MemSlots uint32 `toml:"memory_slots"`
DefaultBridges uint32 `toml:"default_bridges"`
Msize9p uint32 `toml:"msize_9p"`
NumVCPUs int32 `toml:"default_vcpus"`
BlockDeviceCacheSet bool `toml:"block_device_cache_set"`
BlockDeviceCacheDirect bool `toml:"block_device_cache_direct"`
BlockDeviceCacheNoflush bool `toml:"block_device_cache_noflush"`
EnableVhostUserStore bool `toml:"enable_vhost_user_store"`
VhostUserDeviceReconnect uint32 `toml:"vhost_user_reconnect_timeout_sec"`
DisableBlockDeviceUse bool `toml:"disable_block_device_use"`
MemPrealloc bool `toml:"enable_mem_prealloc"`
HugePages bool `toml:"enable_hugepages"`
VirtioMem bool `toml:"enable_virtio_mem"`
IOMMU bool `toml:"enable_iommu"`
IOMMUPlatform bool `toml:"enable_iommu_platform"`
Debug bool `toml:"enable_debug"`
DisableNestingChecks bool `toml:"disable_nesting_checks"`
EnableIOThreads bool `toml:"enable_iothreads"`
DisableImageNvdimm bool `toml:"disable_image_nvdimm"`
HotplugVFIOOnRootBus bool `toml:"hotplug_vfio_on_root_bus"`
HotPlugVFIO config.PCIePort `toml:"hot_plug_vfio"`
ColdPlugVFIO config.PCIePort `toml:"cold_plug_vfio"`
DisableVhostNet bool `toml:"disable_vhost_net"`
GuestMemoryDumpPaging bool `toml:"guest_memory_dump_paging"`
ConfidentialGuest bool `toml:"confidential_guest"`
SevSnpGuest bool `toml:"sev_snp_guest"`
GuestSwap bool `toml:"enable_guest_swap"`
Rootless bool `toml:"rootless"`
DisableSeccomp bool `toml:"disable_seccomp"`
DisableSeLinux bool `toml:"disable_selinux"`
DisableGuestSeLinux bool `toml:"disable_guest_selinux"`
LegacySerial bool `toml:"use_legacy_serial"`
}
type runtime struct {
@@ -287,12 +286,18 @@ func (h hypervisor) firmware() (string, error) {
return ResolvePath(p)
}
func (h hypervisor) coldPlugVFIO() hv.PCIePort {
func (h hypervisor) coldPlugVFIO() config.PCIePort {
if h.ColdPlugVFIO == "" {
return defaultColdPlugVFIO
}
return h.ColdPlugVFIO
}
func (h hypervisor) hotPlugVFIO() config.PCIePort {
if h.HotPlugVFIO == "" {
return defaultHotPlugVFIO
}
return h.HotPlugVFIO
}
func (h hypervisor) firmwareVolume() (string, error) {
p := h.FirmwareVolume
@@ -863,8 +868,8 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
Msize9p: h.msize9p(),
DisableImageNvdimm: h.DisableImageNvdimm,
HotplugVFIOOnRootBus: h.HotplugVFIOOnRootBus,
HotPlugVFIO: h.hotPlugVFIO(),
ColdPlugVFIO: h.coldPlugVFIO(),
PCIeRootPort: h.PCIeRootPort,
DisableVhostNet: h.DisableVhostNet,
EnableVhostUserStore: h.EnableVhostUserStore,
VhostUserStorePath: h.vhostUserStorePath(),
@@ -1060,7 +1065,7 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
Msize9p: h.msize9p(),
HotplugVFIOOnRootBus: h.HotplugVFIOOnRootBus,
ColdPlugVFIO: h.coldPlugVFIO(),
PCIeRootPort: h.PCIeRootPort,
HotPlugVFIO: h.hotPlugVFIO(),
DisableVhostNet: true,
GuestHookPath: h.guestHookPath(),
VirtioFSExtraArgs: h.VirtioFSExtraArgs,
@@ -1291,7 +1296,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig {
Msize9p: defaultMsize9p,
HotplugVFIOOnRootBus: defaultHotplugVFIOOnRootBus,
ColdPlugVFIO: defaultColdPlugVFIO,
PCIeRootPort: defaultPCIeRootPort,
HotPlugVFIO: defaultHotPlugVFIO,
GuestHookPath: defaultGuestHookPath,
VhostUserStorePath: defaultVhostUserStorePath,
VhostUserDeviceReconnect: defaultVhostUserDeviceReconnect,
@@ -1663,9 +1668,10 @@ func checkConfig(config oci.RuntimeConfig) error {
return err
}
hotPlugVFIO := config.HypervisorConfig.HotPlugVFIO
coldPlugVFIO := config.HypervisorConfig.ColdPlugVFIO
machineType := config.HypervisorConfig.HypervisorMachineType
if err := checkPCIeConfig(coldPlugVFIO, machineType); err != nil {
if err := checkPCIeConfig(coldPlugVFIO, hotPlugVFIO, machineType); err != nil {
return err
}
@@ -1675,18 +1681,32 @@ func checkConfig(config oci.RuntimeConfig) error {
// checkPCIeConfig ensures the PCIe configuration is valid.
// Only allow one of the following settings for cold-plug:
// no-port, root-port, switch-port
func checkPCIeConfig(vfioPort hv.PCIePort, machineType string) error {
func checkPCIeConfig(coldPlug config.PCIePort, hotPlug config.PCIePort, machineType string) error {
// Currently only QEMU q35 supports advanced PCIe topologies
// firecracker, dragonball do not have right now any PCIe support
if machineType != "q35" {
return nil
}
if vfioPort == hv.NoPort || vfioPort == hv.RootPort || vfioPort == hv.SwitchPort {
if coldPlug != config.NoPort && hotPlug != config.NoPort {
return fmt.Errorf("invalid hot-plug=%s and cold-plug=%s settings, only one of them can be set", coldPlug, hotPlug)
}
if coldPlug == config.NoPort && hotPlug == config.NoPort {
return nil
}
var port config.PCIePort
if coldPlug != config.NoPort {
port = coldPlug
}
if hotPlug != config.NoPort {
port = hotPlug
}
if port == config.NoPort || port == config.BridgePort || port == config.RootPort || port == config.SwitchPort {
return nil
}
return fmt.Errorf("invalid vfio_port=%s setting, allowed values %s, %s, %s",
vfioPort, hv.NoPort, hv.RootPort, hv.SwitchPort)
return fmt.Errorf("invalid vfio_port=%s setting, allowed values %s, %s, %s, %s",
coldPlug, config.NoPort, config.BridgePort, config.RootPort, config.SwitchPort)
}
// checkNetNsConfig performs sanity checks on disable_new_netns config.

View File

@@ -18,8 +18,8 @@ import (
"syscall"
"testing"
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
"github.com/kata-containers/kata-containers/src/runtime/pkg/govmm"
hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors"
ktu "github.com/kata-containers/kata-containers/src/runtime/pkg/katatestutils"
"github.com/kata-containers/kata-containers/src/runtime/pkg/oci"
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
@@ -63,15 +63,16 @@ func createConfig(configPath string, fileData string) error {
// createAllRuntimeConfigFiles creates all files necessary to call
// loadConfiguration().
func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConfig, err error) {
func createAllRuntimeConfigFiles(dir, hypervisor string) (testConfig testRuntimeConfig, err error) {
if dir == "" {
return config, fmt.Errorf("BUG: need directory")
return testConfig, fmt.Errorf("BUG: need directory")
}
if hypervisor == "" {
return config, fmt.Errorf("BUG: need hypervisor")
return testConfig, fmt.Errorf("BUG: need hypervisor")
}
var coldPlugVFIO hv.PCIePort
var hotPlugVFIO config.PCIePort
var coldPlugVFIO config.PCIePort
hypervisorPath := path.Join(dir, "hypervisor")
kernelPath := path.Join(dir, "kernel")
kernelParams := "foo=bar xyz"
@@ -85,8 +86,8 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
blockDeviceAIO := "io_uring"
enableIOThreads := true
hotplugVFIOOnRootBus := true
pcieRootPort := uint32(2)
coldPlugVFIO = hv.RootPort
hotPlugVFIO = config.NoPort
coldPlugVFIO = config.BridgePort
disableNewNetNs := false
sharedFS := "virtio-9p"
virtioFSdaemon := path.Join(dir, "virtiofsd")
@@ -108,7 +109,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
BlockDeviceAIO: blockDeviceAIO,
EnableIOThreads: enableIOThreads,
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
PCIeRootPort: pcieRootPort,
HotPlugVFIO: hotPlugVFIO,
ColdPlugVFIO: coldPlugVFIO,
DisableNewNetNs: disableNewNetNs,
DefaultVCPUCount: defaultVCPUCount,
@@ -134,7 +135,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
configPath := path.Join(dir, "runtime.toml")
err = createConfig(configPath, runtimeConfigFileData)
if err != nil {
return config, err
return testConfig, err
}
configPathLink := path.Join(filepath.Dir(configPath), "link-to-configuration.toml")
@@ -142,7 +143,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
// create a link to the config file
err = syscall.Symlink(configPath, configPathLink)
if err != nil {
return config, err
return testConfig, err
}
files := []string{hypervisorPath, kernelPath, imagePath}
@@ -151,7 +152,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
// create the resource (which must be >0 bytes)
err := WriteFile(file, "foo", testFileMode)
if err != nil {
return config, err
return testConfig, err
}
}
@@ -172,7 +173,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
DefaultBridges: defaultBridgesCount,
EnableIOThreads: enableIOThreads,
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
PCIeRootPort: pcieRootPort,
HotPlugVFIO: hotPlugVFIO,
ColdPlugVFIO: coldPlugVFIO,
Msize9p: defaultMsize9p,
MemSlots: defaultMemSlots,
@@ -216,10 +217,10 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
err = SetKernelParams(&runtimeConfig)
if err != nil {
return config, err
return testConfig, err
}
config = testRuntimeConfig{
rtimeConfig := testRuntimeConfig{
RuntimeConfig: runtimeConfig,
RuntimeConfigFile: configPath,
ConfigPath: configPath,
@@ -228,7 +229,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
LogPath: logPath,
}
return config, nil
return rtimeConfig, nil
}
// testLoadConfiguration accepts an optional function that can be used
@@ -568,6 +569,7 @@ func TestMinimalRuntimeConfig(t *testing.T) {
VirtioFSCache: defaultVirtioFSCacheMode,
BlockDeviceAIO: defaultBlockDeviceAIO,
DisableGuestSeLinux: defaultDisableGuestSeLinux,
HotPlugVFIO: defaultHotPlugVFIO,
ColdPlugVFIO: defaultColdPlugVFIO,
}
@@ -602,7 +604,7 @@ func TestMinimalRuntimeConfig(t *testing.T) {
func TestNewQemuHypervisorConfig(t *testing.T) {
dir := t.TempDir()
var coldPlugVFIO hv.PCIePort
var coldPlugVFIO config.PCIePort
hypervisorPath := path.Join(dir, "hypervisor")
kernelPath := path.Join(dir, "kernel")
imagePath := path.Join(dir, "image")
@@ -610,8 +612,7 @@ func TestNewQemuHypervisorConfig(t *testing.T) {
disableBlock := true
enableIOThreads := true
hotplugVFIOOnRootBus := true
pcieRootPort := uint32(2)
coldPlugVFIO = hv.RootPort
coldPlugVFIO = config.BridgePort
orgVHostVSockDevicePath := utils.VHostVSockDevicePath
blockDeviceAIO := "io_uring"
defer func() {
@@ -630,7 +631,6 @@ func TestNewQemuHypervisorConfig(t *testing.T) {
DisableBlockDeviceUse: disableBlock,
EnableIOThreads: enableIOThreads,
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
PCIeRootPort: pcieRootPort,
ColdPlugVFIO: coldPlugVFIO,
RxRateLimiterMaxRate: rxRateLimiterMaxRate,
TxRateLimiterMaxRate: txRateLimiterMaxRate,
@@ -686,10 +686,6 @@ func TestNewQemuHypervisorConfig(t *testing.T) {
t.Errorf("Expected value for HotplugVFIOOnRootBus %v, got %v", hotplugVFIOOnRootBus, config.HotplugVFIOOnRootBus)
}
if config.PCIeRootPort != pcieRootPort {
t.Errorf("Expected value for PCIeRootPort %v, got %v", pcieRootPort, config.PCIeRootPort)
}
if config.RxRateLimiterMaxRate != rxRateLimiterMaxRate {
t.Errorf("Expected value for rx rate limiter %v, got %v", rxRateLimiterMaxRate, config.RxRateLimiterMaxRate)
}
@@ -812,7 +808,6 @@ func TestNewQemuHypervisorConfigImageAndInitrd(t *testing.T) {
disableBlock := true
enableIOThreads := true
hotplugVFIOOnRootBus := true
pcieRootPort := uint32(2)
hypervisor := hypervisor{
Path: hypervisorPath,
@@ -823,7 +818,6 @@ func TestNewQemuHypervisorConfigImageAndInitrd(t *testing.T) {
DisableBlockDeviceUse: disableBlock,
EnableIOThreads: enableIOThreads,
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
PCIeRootPort: pcieRootPort,
}
_, err := newQemuHypervisorConfig(hypervisor)

View File

@@ -453,6 +453,10 @@ func addHypervisorConfigOverrides(ocispec specs.Spec, config *vc.SandboxConfig,
return err
}
if err := addHypervisorHotColdPlugVfioOverrides(ocispec, config); err != nil {
return err
}
if value, ok := ocispec.Annotations[vcAnnotations.MachineType]; ok {
if value != "" {
config.HypervisorConfig.HypervisorMachineType = value
@@ -508,12 +512,6 @@ func addHypervisorConfigOverrides(ocispec specs.Spec, config *vc.SandboxConfig,
return err
}
if err := newAnnotationConfiguration(ocispec, vcAnnotations.PCIeRootPort).setUint(func(pcieRootPort uint64) {
config.HypervisorConfig.PCIeRootPort = uint32(pcieRootPort)
}); err != nil {
return err
}
if value, ok := ocispec.Annotations[vcAnnotations.EntropySource]; ok {
if !checkPathIsInGlobs(runtime.HypervisorConfig.EntropySourceList, value) {
return fmt.Errorf("entropy source %v required from annotation is not valid", value)
@@ -576,6 +574,37 @@ func addHypervisorPathOverrides(ocispec specs.Spec, config *vc.SandboxConfig, ru
return nil
}
func addHypervisorPCIePortOverride(value string) (config.PCIePort, error) {
if value == "" {
return config.NoPort, nil
}
port := config.PCIePort(value)
if port.Invalid() {
return config.InvalidPort, fmt.Errorf("Invalid PCIe port \"%v\" specified in annotation", value)
}
return port, nil
}
func addHypervisorHotColdPlugVfioOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) error {
var err error
if value, ok := ocispec.Annotations[vcAnnotations.HotPlugVFIO]; ok {
if sbConfig.HypervisorConfig.HotPlugVFIO, err = addHypervisorPCIePortOverride(value); err != nil {
return err
}
// If hot-plug is specified disable cold-plug and vice versa
sbConfig.HypervisorConfig.ColdPlugVFIO = config.NoPort
}
if value, ok := ocispec.Annotations[vcAnnotations.ColdPlugVFIO]; ok {
if sbConfig.HypervisorConfig.ColdPlugVFIO, err = addHypervisorPCIePortOverride(value); err != nil {
return err
}
// If cold-plug is specified disable hot-plug and vice versa
sbConfig.HypervisorConfig.HotPlugVFIO = config.NoPort
}
return nil
}
func addHypervisorMemoryOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig, runtime RuntimeConfig) error {
if err := newAnnotationConfiguration(ocispec, vcAnnotations.DefaultMemory).setUintWithCheck(func(memorySz uint64) error {

View File

@@ -599,7 +599,7 @@ func TestContainerPipeSizeAnnotation(t *testing.T) {
func TestAddHypervisorAnnotations(t *testing.T) {
assert := assert.New(t)
config := vc.SandboxConfig{
sbConfig := vc.SandboxConfig{
Annotations: make(map[string]string),
}
@@ -628,8 +628,8 @@ func TestAddHypervisorAnnotations(t *testing.T) {
runtimeConfig.HypervisorConfig.VirtioFSDaemonList = []string{"/bin/*ls*"}
ocispec.Annotations[vcAnnotations.KernelParams] = "vsyscall=emulate iommu=on"
addHypervisorConfigOverrides(ocispec, &config, runtimeConfig)
assert.Exactly(expectedHyperConfig, config.HypervisorConfig)
addHypervisorConfigOverrides(ocispec, &sbConfig, runtimeConfig)
assert.Exactly(expectedHyperConfig, sbConfig.HypervisorConfig)
ocispec.Annotations[vcAnnotations.DefaultVCPUs] = "1"
ocispec.Annotations[vcAnnotations.DefaultMaxVCPUs] = "1"
@@ -660,7 +660,8 @@ func TestAddHypervisorAnnotations(t *testing.T) {
ocispec.Annotations[vcAnnotations.GuestHookPath] = "/usr/bin/"
ocispec.Annotations[vcAnnotations.DisableImageNvdimm] = "true"
ocispec.Annotations[vcAnnotations.HotplugVFIOOnRootBus] = "true"
ocispec.Annotations[vcAnnotations.PCIeRootPort] = "2"
ocispec.Annotations[vcAnnotations.ColdPlugVFIO] = config.BridgePort
ocispec.Annotations[vcAnnotations.HotPlugVFIO] = config.NoPort
ocispec.Annotations[vcAnnotations.IOMMUPlatform] = "true"
ocispec.Annotations[vcAnnotations.SGXEPC] = "64Mi"
ocispec.Annotations[vcAnnotations.UseLegacySerial] = "true"
@@ -668,55 +669,58 @@ func TestAddHypervisorAnnotations(t *testing.T) {
ocispec.Annotations[vcAnnotations.RxRateLimiterMaxRate] = "10000000"
ocispec.Annotations[vcAnnotations.TxRateLimiterMaxRate] = "10000000"
addAnnotations(ocispec, &config, runtimeConfig)
assert.Equal(config.HypervisorConfig.NumVCPUs, uint32(1))
assert.Equal(config.HypervisorConfig.DefaultMaxVCPUs, uint32(1))
assert.Equal(config.HypervisorConfig.MemorySize, uint32(1024))
assert.Equal(config.HypervisorConfig.MemSlots, uint32(20))
assert.Equal(config.HypervisorConfig.MemOffset, uint64(512))
assert.Equal(config.HypervisorConfig.VirtioMem, true)
assert.Equal(config.HypervisorConfig.MemPrealloc, true)
assert.Equal(config.HypervisorConfig.FileBackedMemRootDir, "/dev/shm")
assert.Equal(config.HypervisorConfig.HugePages, true)
assert.Equal(config.HypervisorConfig.IOMMU, true)
assert.Equal(config.HypervisorConfig.BlockDeviceDriver, "virtio-scsi")
assert.Equal(config.HypervisorConfig.BlockDeviceAIO, "io_uring")
assert.Equal(config.HypervisorConfig.DisableBlockDeviceUse, true)
assert.Equal(config.HypervisorConfig.EnableIOThreads, true)
assert.Equal(config.HypervisorConfig.BlockDeviceCacheSet, true)
assert.Equal(config.HypervisorConfig.BlockDeviceCacheDirect, true)
assert.Equal(config.HypervisorConfig.BlockDeviceCacheNoflush, true)
assert.Equal(config.HypervisorConfig.SharedFS, "virtio-fs")
assert.Equal(config.HypervisorConfig.VirtioFSDaemon, "/bin/false")
assert.Equal(config.HypervisorConfig.VirtioFSCache, "auto")
assert.ElementsMatch(config.HypervisorConfig.VirtioFSExtraArgs, [2]string{"arg0", "arg1"})
assert.Equal(config.HypervisorConfig.Msize9p, uint32(512))
assert.Equal(config.HypervisorConfig.HypervisorMachineType, "q35")
assert.Equal(config.HypervisorConfig.MachineAccelerators, "nofw")
assert.Equal(config.HypervisorConfig.CPUFeatures, "pmu=off")
assert.Equal(config.HypervisorConfig.DisableVhostNet, true)
assert.Equal(config.HypervisorConfig.GuestHookPath, "/usr/bin/")
assert.Equal(config.HypervisorConfig.DisableImageNvdimm, true)
assert.Equal(config.HypervisorConfig.HotplugVFIOOnRootBus, true)
assert.Equal(config.HypervisorConfig.PCIeRootPort, uint32(2))
assert.Equal(config.HypervisorConfig.IOMMUPlatform, true)
assert.Equal(config.HypervisorConfig.SGXEPCSize, int64(67108864))
assert.Equal(config.HypervisorConfig.LegacySerial, true)
assert.Equal(config.HypervisorConfig.RxRateLimiterMaxRate, uint64(10000000))
assert.Equal(config.HypervisorConfig.TxRateLimiterMaxRate, uint64(10000000))
err := addAnnotations(ocispec, &sbConfig, runtimeConfig)
assert.NoError(err)
assert.Equal(sbConfig.HypervisorConfig.NumVCPUs, uint32(1))
assert.Equal(sbConfig.HypervisorConfig.DefaultMaxVCPUs, uint32(1))
assert.Equal(sbConfig.HypervisorConfig.MemorySize, uint32(1024))
assert.Equal(sbConfig.HypervisorConfig.MemSlots, uint32(20))
assert.Equal(sbConfig.HypervisorConfig.MemOffset, uint64(512))
assert.Equal(sbConfig.HypervisorConfig.VirtioMem, true)
assert.Equal(sbConfig.HypervisorConfig.MemPrealloc, true)
assert.Equal(sbConfig.HypervisorConfig.FileBackedMemRootDir, "/dev/shm")
assert.Equal(sbConfig.HypervisorConfig.HugePages, true)
assert.Equal(sbConfig.HypervisorConfig.IOMMU, true)
assert.Equal(sbConfig.HypervisorConfig.BlockDeviceDriver, "virtio-scsi")
assert.Equal(sbConfig.HypervisorConfig.BlockDeviceAIO, "io_uring")
assert.Equal(sbConfig.HypervisorConfig.DisableBlockDeviceUse, true)
assert.Equal(sbConfig.HypervisorConfig.EnableIOThreads, true)
assert.Equal(sbConfig.HypervisorConfig.BlockDeviceCacheSet, true)
assert.Equal(sbConfig.HypervisorConfig.BlockDeviceCacheDirect, true)
assert.Equal(sbConfig.HypervisorConfig.BlockDeviceCacheNoflush, true)
assert.Equal(sbConfig.HypervisorConfig.SharedFS, "virtio-fs")
assert.Equal(sbConfig.HypervisorConfig.VirtioFSDaemon, "/bin/false")
assert.Equal(sbConfig.HypervisorConfig.VirtioFSCache, "auto")
assert.ElementsMatch(sbConfig.HypervisorConfig.VirtioFSExtraArgs, [2]string{"arg0", "arg1"})
assert.Equal(sbConfig.HypervisorConfig.Msize9p, uint32(512))
assert.Equal(sbConfig.HypervisorConfig.HypervisorMachineType, "q35")
assert.Equal(sbConfig.HypervisorConfig.MachineAccelerators, "nofw")
assert.Equal(sbConfig.HypervisorConfig.CPUFeatures, "pmu=off")
assert.Equal(sbConfig.HypervisorConfig.DisableVhostNet, true)
assert.Equal(sbConfig.HypervisorConfig.GuestHookPath, "/usr/bin/")
assert.Equal(sbConfig.HypervisorConfig.DisableImageNvdimm, true)
assert.Equal(sbConfig.HypervisorConfig.HotplugVFIOOnRootBus, true)
assert.Equal(string(sbConfig.HypervisorConfig.ColdPlugVFIO), string(config.BridgePort))
assert.Equal(string(sbConfig.HypervisorConfig.HotPlugVFIO), string(config.NoPort))
assert.Equal(sbConfig.HypervisorConfig.IOMMUPlatform, true)
assert.Equal(sbConfig.HypervisorConfig.SGXEPCSize, int64(67108864))
assert.Equal(sbConfig.HypervisorConfig.LegacySerial, true)
assert.Equal(sbConfig.HypervisorConfig.RxRateLimiterMaxRate, uint64(10000000))
assert.Equal(sbConfig.HypervisorConfig.TxRateLimiterMaxRate, uint64(10000000))
// In case an absurd large value is provided, the config value if not over-ridden
ocispec.Annotations[vcAnnotations.DefaultVCPUs] = "655536"
err := addAnnotations(ocispec, &config, runtimeConfig)
err = addAnnotations(ocispec, &sbConfig, runtimeConfig)
assert.Error(err)
ocispec.Annotations[vcAnnotations.DefaultVCPUs] = "-1"
err = addAnnotations(ocispec, &config, runtimeConfig)
err = addAnnotations(ocispec, &sbConfig, runtimeConfig)
assert.Error(err)
ocispec.Annotations[vcAnnotations.DefaultVCPUs] = "1"
ocispec.Annotations[vcAnnotations.DefaultMaxVCPUs] = "-1"
err = addAnnotations(ocispec, &config, runtimeConfig)
err = addAnnotations(ocispec, &sbConfig, runtimeConfig)
assert.Error(err)
ocispec.Annotations[vcAnnotations.DefaultMaxVCPUs] = "1"