mirror of
https://github.com/aljazceru/kata-containers.git
synced 2025-12-18 23:04:20 +01:00
Merge pull request #4492 from zvonkok/pcie-topology
runtime: fix PCIe topology for GPUDirect use-case
This commit is contained in:
@@ -17,7 +17,7 @@ import (
|
|||||||
"github.com/prometheus/procfs"
|
"github.com/prometheus/procfs"
|
||||||
"github.com/urfave/cli"
|
"github.com/urfave/cli"
|
||||||
|
|
||||||
hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors"
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
|
||||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
|
||||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/oci"
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/oci"
|
||||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/utils"
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/utils"
|
||||||
@@ -113,8 +113,8 @@ type HypervisorInfo struct {
|
|||||||
SocketPath string
|
SocketPath string
|
||||||
Msize9p uint32
|
Msize9p uint32
|
||||||
MemorySlots uint32
|
MemorySlots uint32
|
||||||
PCIeRootPort uint32
|
HotPlugVFIO config.PCIePort
|
||||||
ColdPlugVFIO hv.PCIePort
|
ColdPlugVFIO config.PCIePort
|
||||||
HotplugVFIOOnRootBus bool
|
HotplugVFIOOnRootBus bool
|
||||||
Debug bool
|
Debug bool
|
||||||
}
|
}
|
||||||
@@ -317,9 +317,9 @@ func getHypervisorInfo(config oci.RuntimeConfig) (HypervisorInfo, error) {
|
|||||||
EntropySource: config.HypervisorConfig.EntropySource,
|
EntropySource: config.HypervisorConfig.EntropySource,
|
||||||
SharedFS: config.HypervisorConfig.SharedFS,
|
SharedFS: config.HypervisorConfig.SharedFS,
|
||||||
VirtioFSDaemon: config.HypervisorConfig.VirtioFSDaemon,
|
VirtioFSDaemon: config.HypervisorConfig.VirtioFSDaemon,
|
||||||
|
HotPlugVFIO: config.HypervisorConfig.HotPlugVFIO,
|
||||||
ColdPlugVFIO: config.HypervisorConfig.ColdPlugVFIO,
|
ColdPlugVFIO: config.HypervisorConfig.ColdPlugVFIO,
|
||||||
HotplugVFIOOnRootBus: config.HypervisorConfig.HotplugVFIOOnRootBus,
|
HotplugVFIOOnRootBus: config.HypervisorConfig.HotplugVFIOOnRootBus,
|
||||||
PCIeRootPort: config.HypervisorConfig.PCIeRootPort,
|
|
||||||
SocketPath: socketPath,
|
SocketPath: socketPath,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -19,12 +19,12 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/BurntSushi/toml"
|
"github.com/BurntSushi/toml"
|
||||||
hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors"
|
|
||||||
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
|
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
|
||||||
vcUtils "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils"
|
vcUtils "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils"
|
||||||
specs "github.com/opencontainers/runtime-spec/specs-go"
|
specs "github.com/opencontainers/runtime-spec/specs-go"
|
||||||
"github.com/urfave/cli"
|
"github.com/urfave/cli"
|
||||||
|
|
||||||
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
|
||||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/katatestutils"
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/katatestutils"
|
||||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils"
|
||||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/oci"
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/oci"
|
||||||
@@ -74,8 +74,9 @@ func createConfig(configPath string, fileData string) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func makeRuntimeConfig(prefixDir string) (configFile string, config oci.RuntimeConfig, err error) {
|
func makeRuntimeConfig(prefixDir string) (configFile string, ociConfig oci.RuntimeConfig, err error) {
|
||||||
var coldPlugVFIO hv.PCIePort
|
var hotPlugVFIO config.PCIePort
|
||||||
|
var coldPlugVFIO config.PCIePort
|
||||||
const logPath = "/log/path"
|
const logPath = "/log/path"
|
||||||
hypervisorPath := filepath.Join(prefixDir, "hypervisor")
|
hypervisorPath := filepath.Join(prefixDir, "hypervisor")
|
||||||
kernelPath := filepath.Join(prefixDir, "kernel")
|
kernelPath := filepath.Join(prefixDir, "kernel")
|
||||||
@@ -87,8 +88,8 @@ func makeRuntimeConfig(prefixDir string) (configFile string, config oci.RuntimeC
|
|||||||
blockStorageDriver := "virtio-scsi"
|
blockStorageDriver := "virtio-scsi"
|
||||||
enableIOThreads := true
|
enableIOThreads := true
|
||||||
hotplugVFIOOnRootBus := true
|
hotplugVFIOOnRootBus := true
|
||||||
pcieRootPort := uint32(2)
|
hotPlugVFIO = config.BridgePort
|
||||||
coldPlugVFIO = hv.NoPort
|
coldPlugVFIO = config.NoPort
|
||||||
disableNewNetNs := false
|
disableNewNetNs := false
|
||||||
sharedFS := "virtio-9p"
|
sharedFS := "virtio-9p"
|
||||||
virtioFSdaemon := filepath.Join(prefixDir, "virtiofsd")
|
virtioFSdaemon := filepath.Join(prefixDir, "virtiofsd")
|
||||||
@@ -132,8 +133,8 @@ func makeRuntimeConfig(prefixDir string) (configFile string, config oci.RuntimeC
|
|||||||
BlockDeviceDriver: blockStorageDriver,
|
BlockDeviceDriver: blockStorageDriver,
|
||||||
EnableIOThreads: enableIOThreads,
|
EnableIOThreads: enableIOThreads,
|
||||||
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
|
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
|
||||||
|
HotPlugVFIO: hotPlugVFIO,
|
||||||
ColdPlugVFIO: coldPlugVFIO,
|
ColdPlugVFIO: coldPlugVFIO,
|
||||||
PCIeRootPort: pcieRootPort,
|
|
||||||
DisableNewNetNs: disableNewNetNs,
|
DisableNewNetNs: disableNewNetNs,
|
||||||
DefaultVCPUCount: hypConfig.NumVCPUs,
|
DefaultVCPUCount: hypConfig.NumVCPUs,
|
||||||
DefaultMaxVCPUCount: hypConfig.DefaultMaxVCPUs,
|
DefaultMaxVCPUCount: hypConfig.DefaultMaxVCPUs,
|
||||||
@@ -156,12 +157,12 @@ func makeRuntimeConfig(prefixDir string) (configFile string, config oci.RuntimeC
|
|||||||
return "", oci.RuntimeConfig{}, err
|
return "", oci.RuntimeConfig{}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
_, config, err = katautils.LoadConfiguration(configFile, true)
|
_, ociConfig, err = katautils.LoadConfiguration(configFile, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", oci.RuntimeConfig{}, err
|
return "", oci.RuntimeConfig{}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return configFile, config, nil
|
return configFile, ociConfig, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getExpectedAgentDetails(config oci.RuntimeConfig) (AgentInfo, error) {
|
func getExpectedAgentDetails(config oci.RuntimeConfig) (AgentInfo, error) {
|
||||||
@@ -277,7 +278,7 @@ func getExpectedHypervisor(config oci.RuntimeConfig) HypervisorInfo {
|
|||||||
VirtioFSDaemon: config.HypervisorConfig.VirtioFSDaemon,
|
VirtioFSDaemon: config.HypervisorConfig.VirtioFSDaemon,
|
||||||
|
|
||||||
HotplugVFIOOnRootBus: config.HypervisorConfig.HotplugVFIOOnRootBus,
|
HotplugVFIOOnRootBus: config.HypervisorConfig.HotplugVFIOOnRootBus,
|
||||||
PCIeRootPort: config.HypervisorConfig.PCIeRootPort,
|
HotPlugVFIO: config.HypervisorConfig.HotPlugVFIO,
|
||||||
ColdPlugVFIO: config.HypervisorConfig.ColdPlugVFIO,
|
ColdPlugVFIO: config.HypervisorConfig.ColdPlugVFIO,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -357,8 +357,15 @@ pflashes = []
|
|||||||
# Default false
|
# Default false
|
||||||
#hotplug_vfio_on_root_bus = true
|
#hotplug_vfio_on_root_bus = true
|
||||||
|
|
||||||
|
# Enable hot-plugging of VFIO devices to a bridge-port,
|
||||||
|
# root-port or switch-port.
|
||||||
|
# The default setting is "no-port"
|
||||||
|
#hot_plug_vfio = "root-port"
|
||||||
|
|
||||||
# In a confidential compute environment hot-plugging can compromise
|
# In a confidential compute environment hot-plugging can compromise
|
||||||
# security. Enable cold-plugging of VFIO devices to a root-port.
|
# security.
|
||||||
|
# Enable cold-plugging of VFIO devices to a bridge-port,
|
||||||
|
# root-port or switch-port.
|
||||||
# The default setting is "no-port", which means disabled.
|
# The default setting is "no-port", which means disabled.
|
||||||
#cold_plug_vfio = "root-port"
|
#cold_plug_vfio = "root-port"
|
||||||
|
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ import (
|
|||||||
specs "github.com/opencontainers/runtime-spec/specs-go"
|
specs "github.com/opencontainers/runtime-spec/specs-go"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
|
|
||||||
hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors"
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
|
||||||
ktu "github.com/kata-containers/kata-containers/src/runtime/pkg/katatestutils"
|
ktu "github.com/kata-containers/kata-containers/src/runtime/pkg/katatestutils"
|
||||||
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
|
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
|
||||||
vcAnnotations "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations"
|
vcAnnotations "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations"
|
||||||
@@ -308,8 +308,9 @@ func TestCreateContainerConfigFail(t *testing.T) {
|
|||||||
assert.Error(err)
|
assert.Error(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
func createAllRuntimeConfigFiles(dir, hypervisor string) (config string, err error) {
|
func createAllRuntimeConfigFiles(dir, hypervisor string) (runtimeConfig string, err error) {
|
||||||
var coldPlugVFIO hv.PCIePort
|
var hotPlugVFIO config.PCIePort
|
||||||
|
var coldPlugVFIO config.PCIePort
|
||||||
if dir == "" {
|
if dir == "" {
|
||||||
return "", fmt.Errorf("BUG: need directory")
|
return "", fmt.Errorf("BUG: need directory")
|
||||||
}
|
}
|
||||||
@@ -330,11 +331,11 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config string, err err
|
|||||||
blockDeviceDriver := "virtio-scsi"
|
blockDeviceDriver := "virtio-scsi"
|
||||||
enableIOThreads := true
|
enableIOThreads := true
|
||||||
hotplugVFIOOnRootBus := true
|
hotplugVFIOOnRootBus := true
|
||||||
pcieRootPort := uint32(2)
|
|
||||||
disableNewNetNs := false
|
disableNewNetNs := false
|
||||||
sharedFS := "virtio-9p"
|
sharedFS := "virtio-9p"
|
||||||
virtioFSdaemon := path.Join(dir, "virtiofsd")
|
virtioFSdaemon := path.Join(dir, "virtiofsd")
|
||||||
coldPlugVFIO = hv.RootPort
|
hotPlugVFIO = config.BridgePort
|
||||||
|
coldPlugVFIO = config.RootPort
|
||||||
|
|
||||||
configFileOptions := ktu.RuntimeConfigOptions{
|
configFileOptions := ktu.RuntimeConfigOptions{
|
||||||
Hypervisor: "qemu",
|
Hypervisor: "qemu",
|
||||||
@@ -349,10 +350,10 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config string, err err
|
|||||||
BlockDeviceDriver: blockDeviceDriver,
|
BlockDeviceDriver: blockDeviceDriver,
|
||||||
EnableIOThreads: enableIOThreads,
|
EnableIOThreads: enableIOThreads,
|
||||||
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
|
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
|
||||||
PCIeRootPort: pcieRootPort,
|
|
||||||
DisableNewNetNs: disableNewNetNs,
|
DisableNewNetNs: disableNewNetNs,
|
||||||
SharedFS: sharedFS,
|
SharedFS: sharedFS,
|
||||||
VirtioFSDaemon: virtioFSdaemon,
|
VirtioFSDaemon: virtioFSdaemon,
|
||||||
|
HotPlugVFIO: hotPlugVFIO,
|
||||||
ColdPlugVFIO: coldPlugVFIO,
|
ColdPlugVFIO: coldPlugVFIO,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -125,14 +125,117 @@ const (
|
|||||||
// SysDevPrefix is static string of /sys/dev
|
// SysDevPrefix is static string of /sys/dev
|
||||||
var SysDevPrefix = "/sys/dev"
|
var SysDevPrefix = "/sys/dev"
|
||||||
|
|
||||||
// SysIOMMUPath is static string of /sys/kernel/iommu_groups
|
// SysIOMMUGroupPath is static string of /sys/kernel/iommu_groups
|
||||||
var SysIOMMUPath = "/sys/kernel/iommu_groups"
|
var SysIOMMUGroupPath = "/sys/kernel/iommu_groups"
|
||||||
|
|
||||||
// SysBusPciDevicesPath is static string of /sys/bus/pci/devices
|
// SysBusPciDevicesPath is static string of /sys/bus/pci/devices
|
||||||
var SysBusPciDevicesPath = "/sys/bus/pci/devices"
|
var SysBusPciDevicesPath = "/sys/bus/pci/devices"
|
||||||
|
|
||||||
var getSysDevPath = getSysDevPathImpl
|
var getSysDevPath = getSysDevPathImpl
|
||||||
|
|
||||||
|
// PCIePortBusPrefix gives us the correct bus nameing dependeing on the port
|
||||||
|
// used to hot(cold)-plug the device
|
||||||
|
type PCIePortBusPrefix string
|
||||||
|
|
||||||
|
const (
|
||||||
|
PCIeRootPortPrefix PCIePortBusPrefix = "rp"
|
||||||
|
PCIeSwitchPortPrefix PCIePortBusPrefix = "sw"
|
||||||
|
PCIeSwitchUpstreamPortPrefix PCIePortBusPrefix = "swup"
|
||||||
|
PCIeSwitchhDownstreamPortPrefix PCIePortBusPrefix = "swdp"
|
||||||
|
PCIBridgePortPrefix PCIePortBusPrefix = "bp"
|
||||||
|
)
|
||||||
|
|
||||||
|
func (p PCIePortBusPrefix) String() string {
|
||||||
|
switch p {
|
||||||
|
case PCIeRootPortPrefix:
|
||||||
|
fallthrough
|
||||||
|
case PCIeSwitchPortPrefix:
|
||||||
|
fallthrough
|
||||||
|
case PCIeSwitchUpstreamPortPrefix:
|
||||||
|
fallthrough
|
||||||
|
case PCIeSwitchhDownstreamPortPrefix:
|
||||||
|
fallthrough
|
||||||
|
case PCIBridgePortPrefix:
|
||||||
|
return string(p)
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("<unknown PCIePortBusPrefix: %s>", string(p))
|
||||||
|
}
|
||||||
|
|
||||||
|
// PCIePort distinguish only between root and switch port
|
||||||
|
type PCIePort string
|
||||||
|
|
||||||
|
const (
|
||||||
|
// RootPort attach VFIO devices to a root-port
|
||||||
|
RootPort PCIePort = "root-port"
|
||||||
|
// SwitchPort attach VFIO devices to a switch-port
|
||||||
|
SwitchPort = "switch-port"
|
||||||
|
// BridgePort is the default
|
||||||
|
BridgePort = "bridge-port"
|
||||||
|
// NoPort is for disabling VFIO hotplug/coldplug
|
||||||
|
NoPort = "no-port"
|
||||||
|
// InvalidPort is for invalid port
|
||||||
|
InvalidPort = "invalid-port"
|
||||||
|
)
|
||||||
|
|
||||||
|
func (p PCIePort) String() string {
|
||||||
|
switch p {
|
||||||
|
case RootPort:
|
||||||
|
fallthrough
|
||||||
|
case SwitchPort:
|
||||||
|
fallthrough
|
||||||
|
case BridgePort:
|
||||||
|
fallthrough
|
||||||
|
case NoPort:
|
||||||
|
fallthrough
|
||||||
|
case InvalidPort:
|
||||||
|
return string(p)
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("<unknown PCIePort: %s>", string(p))
|
||||||
|
}
|
||||||
|
|
||||||
|
var PCIePortPrefixMapping = map[PCIePort]PCIePortBusPrefix{
|
||||||
|
RootPort: PCIeRootPortPrefix,
|
||||||
|
SwitchPort: PCIeSwitchhDownstreamPortPrefix,
|
||||||
|
BridgePort: PCIBridgePortPrefix,
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p PCIePort) Invalid() bool {
|
||||||
|
switch p {
|
||||||
|
case RootPort:
|
||||||
|
fallthrough
|
||||||
|
case SwitchPort:
|
||||||
|
fallthrough
|
||||||
|
case BridgePort:
|
||||||
|
fallthrough
|
||||||
|
case NoPort:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p PCIePort) Valid() bool {
|
||||||
|
switch p {
|
||||||
|
case RootPort:
|
||||||
|
fallthrough
|
||||||
|
case SwitchPort:
|
||||||
|
fallthrough
|
||||||
|
case BridgePort:
|
||||||
|
fallthrough
|
||||||
|
case NoPort:
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
type PCIePortMapping map[string]bool
|
||||||
|
|
||||||
|
var (
|
||||||
|
// Each of this structures keeps track of the devices attached to the
|
||||||
|
// different types of PCI ports. We can deduces the Bus number from it
|
||||||
|
// and eliminate duplicates being assigned.
|
||||||
|
PCIeDevices = map[PCIePort]PCIePortMapping{}
|
||||||
|
)
|
||||||
|
|
||||||
// DeviceInfo is an embedded type that contains device data common to all types of devices.
|
// DeviceInfo is an embedded type that contains device data common to all types of devices.
|
||||||
type DeviceInfo struct {
|
type DeviceInfo struct {
|
||||||
// DriverOptions is specific options for each device driver
|
// DriverOptions is specific options for each device driver
|
||||||
@@ -178,6 +281,9 @@ type DeviceInfo struct {
|
|||||||
// ColdPlug specifies whether the device must be cold plugged (true)
|
// ColdPlug specifies whether the device must be cold plugged (true)
|
||||||
// or hot plugged (false).
|
// or hot plugged (false).
|
||||||
ColdPlug bool
|
ColdPlug bool
|
||||||
|
|
||||||
|
// Specifies the PCIe port type to which the device is attached
|
||||||
|
Port PCIePort
|
||||||
}
|
}
|
||||||
|
|
||||||
// BlockDrive represents a block storage drive which may be used in case the storage
|
// BlockDrive represents a block storage drive which may be used in case the storage
|
||||||
@@ -279,14 +385,8 @@ const (
|
|||||||
VFIOAPDeviceMediatedType
|
VFIOAPDeviceMediatedType
|
||||||
)
|
)
|
||||||
|
|
||||||
type VFIODev interface {
|
// VFIODev represents a VFIO PCI device used for hotplugging
|
||||||
GetID() *string
|
type VFIODev struct {
|
||||||
GetType() VFIODeviceType
|
|
||||||
GetSysfsDev() *string
|
|
||||||
}
|
|
||||||
|
|
||||||
// VFIOPCIDev represents a VFIO PCI device used for hotplugging
|
|
||||||
type VFIOPCIDev struct {
|
|
||||||
// ID is used to identify this drive in the hypervisor options.
|
// ID is used to identify this drive in the hypervisor options.
|
||||||
ID string
|
ID string
|
||||||
|
|
||||||
@@ -316,44 +416,15 @@ type VFIOPCIDev struct {
|
|||||||
|
|
||||||
// IsPCIe specifies device is PCIe or PCI
|
// IsPCIe specifies device is PCIe or PCI
|
||||||
IsPCIe bool
|
IsPCIe bool
|
||||||
}
|
|
||||||
|
|
||||||
func (d VFIOPCIDev) GetID() *string {
|
|
||||||
return &d.ID
|
|
||||||
}
|
|
||||||
|
|
||||||
func (d VFIOPCIDev) GetType() VFIODeviceType {
|
|
||||||
return d.Type
|
|
||||||
}
|
|
||||||
|
|
||||||
func (d VFIOPCIDev) GetSysfsDev() *string {
|
|
||||||
return &d.SysfsDev
|
|
||||||
}
|
|
||||||
|
|
||||||
type VFIOAPDev struct {
|
|
||||||
// ID is used to identify this drive in the hypervisor options.
|
|
||||||
ID string
|
|
||||||
|
|
||||||
// sysfsdev of VFIO mediated device
|
|
||||||
SysfsDev string
|
|
||||||
|
|
||||||
// APDevices are the Adjunct Processor devices assigned to the mdev
|
// APDevices are the Adjunct Processor devices assigned to the mdev
|
||||||
APDevices []string
|
APDevices []string
|
||||||
|
|
||||||
// Type of VFIO device
|
// Rank identifies a device in a IOMMU group
|
||||||
Type VFIODeviceType
|
Rank int
|
||||||
}
|
|
||||||
|
|
||||||
func (d VFIOAPDev) GetID() *string {
|
// Port is the PCIe port type to which the device is attached
|
||||||
return &d.ID
|
Port PCIePort
|
||||||
}
|
|
||||||
|
|
||||||
func (d VFIOAPDev) GetType() VFIODeviceType {
|
|
||||||
return d.Type
|
|
||||||
}
|
|
||||||
|
|
||||||
func (d VFIOAPDev) GetSysfsDev() *string {
|
|
||||||
return &d.SysfsDev
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// RNGDev represents a random number generator device
|
// RNGDev represents a random number generator device
|
||||||
|
|||||||
@@ -47,9 +47,9 @@ func deviceLogger() *logrus.Entry {
|
|||||||
return api.DeviceLogger()
|
return api.DeviceLogger()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Identify PCIe device by reading the size of the PCI config space
|
// IsPCIeDevice identifies PCIe device by reading the size of the PCI config space
|
||||||
// Plain PCI device have 256 bytes of config space where PCIe devices have 4K
|
// Plain PCI device have 256 bytes of config space where PCIe devices have 4K
|
||||||
func isPCIeDevice(bdf string) bool {
|
func IsPCIeDevice(bdf string) bool {
|
||||||
if len(strings.Split(bdf, ":")) == 2 {
|
if len(strings.Split(bdf, ":")) == 2 {
|
||||||
bdf = PCIDomain + ":" + bdf
|
bdf = PCIDomain + ":" + bdf
|
||||||
}
|
}
|
||||||
@@ -157,14 +157,12 @@ func checkIgnorePCIClass(pciClass string, deviceBDF string, bitmask uint64) (boo
|
|||||||
|
|
||||||
// GetAllVFIODevicesFromIOMMUGroup returns all the VFIO devices in the IOMMU group
|
// GetAllVFIODevicesFromIOMMUGroup returns all the VFIO devices in the IOMMU group
|
||||||
// We can reuse this function at various levels, sandbox, container.
|
// We can reuse this function at various levels, sandbox, container.
|
||||||
// Only the VFIO module is allowed to do bus assignments, all other modules need to
|
func GetAllVFIODevicesFromIOMMUGroup(device config.DeviceInfo) ([]*config.VFIODev, error) {
|
||||||
// ignore it if used as helper function to get VFIO information.
|
|
||||||
func GetAllVFIODevicesFromIOMMUGroup(device config.DeviceInfo, ignoreBusAssignment bool) ([]*config.VFIODev, error) {
|
|
||||||
|
|
||||||
vfioDevs := []*config.VFIODev{}
|
vfioDevs := []*config.VFIODev{}
|
||||||
|
|
||||||
vfioGroup := filepath.Base(device.HostPath)
|
vfioGroup := filepath.Base(device.HostPath)
|
||||||
iommuDevicesPath := filepath.Join(config.SysIOMMUPath, vfioGroup, "devices")
|
iommuDevicesPath := filepath.Join(config.SysIOMMUGroupPath, vfioGroup, "devices")
|
||||||
|
|
||||||
deviceFiles, err := os.ReadDir(iommuDevicesPath)
|
deviceFiles, err := os.ReadDir(iommuDevicesPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -174,7 +172,7 @@ func GetAllVFIODevicesFromIOMMUGroup(device config.DeviceInfo, ignoreBusAssignme
|
|||||||
// Pass all devices in iommu group
|
// Pass all devices in iommu group
|
||||||
for i, deviceFile := range deviceFiles {
|
for i, deviceFile := range deviceFiles {
|
||||||
//Get bdf of device eg 0000:00:1c.0
|
//Get bdf of device eg 0000:00:1c.0
|
||||||
deviceBDF, deviceSysfsDev, vfioDeviceType, err := getVFIODetails(deviceFile.Name(), iommuDevicesPath)
|
deviceBDF, deviceSysfsDev, vfioDeviceType, err := GetVFIODetails(deviceFile.Name(), iommuDevicesPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -196,27 +194,24 @@ func GetAllVFIODevicesFromIOMMUGroup(device config.DeviceInfo, ignoreBusAssignme
|
|||||||
|
|
||||||
switch vfioDeviceType {
|
switch vfioDeviceType {
|
||||||
case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType:
|
case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType:
|
||||||
isPCIe := isPCIeDevice(deviceBDF)
|
|
||||||
// Do not directly assign to `vfio` -- need to access field still
|
// Do not directly assign to `vfio` -- need to access field still
|
||||||
vfioPCI := config.VFIOPCIDev{
|
vfio = config.VFIODev{
|
||||||
ID: id,
|
ID: id,
|
||||||
Type: vfioDeviceType,
|
Type: vfioDeviceType,
|
||||||
BDF: deviceBDF,
|
BDF: deviceBDF,
|
||||||
SysfsDev: deviceSysfsDev,
|
SysfsDev: deviceSysfsDev,
|
||||||
IsPCIe: isPCIe,
|
IsPCIe: IsPCIeDevice(deviceBDF),
|
||||||
Class: pciClass,
|
Class: pciClass,
|
||||||
|
Rank: -1,
|
||||||
|
Port: device.Port,
|
||||||
}
|
}
|
||||||
if isPCIe && !ignoreBusAssignment {
|
|
||||||
vfioPCI.Bus = fmt.Sprintf("%s%d", pcieRootPortPrefix, len(AllPCIeDevs))
|
|
||||||
AllPCIeDevs[deviceBDF] = true
|
|
||||||
}
|
|
||||||
vfio = vfioPCI
|
|
||||||
case config.VFIOAPDeviceMediatedType:
|
case config.VFIOAPDeviceMediatedType:
|
||||||
devices, err := GetAPVFIODevices(deviceSysfsDev)
|
devices, err := GetAPVFIODevices(deviceSysfsDev)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
vfio = config.VFIOAPDev{
|
vfio = config.VFIODev{
|
||||||
ID: id,
|
ID: id,
|
||||||
SysfsDev: deviceSysfsDev,
|
SysfsDev: deviceSysfsDev,
|
||||||
Type: config.VFIOAPDeviceMediatedType,
|
Type: config.VFIOAPDeviceMediatedType,
|
||||||
|
|||||||
@@ -28,14 +28,9 @@ const (
|
|||||||
vfioRemoveIDPath = "/sys/bus/pci/drivers/vfio-pci/remove_id"
|
vfioRemoveIDPath = "/sys/bus/pci/drivers/vfio-pci/remove_id"
|
||||||
iommuGroupPath = "/sys/bus/pci/devices/%s/iommu_group"
|
iommuGroupPath = "/sys/bus/pci/devices/%s/iommu_group"
|
||||||
vfioDevPath = "/dev/vfio/%s"
|
vfioDevPath = "/dev/vfio/%s"
|
||||||
pcieRootPortPrefix = "rp"
|
|
||||||
vfioAPSysfsDir = "/sys/devices/vfio_ap"
|
vfioAPSysfsDir = "/sys/devices/vfio_ap"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
|
||||||
AllPCIeDevs = map[string]bool{}
|
|
||||||
)
|
|
||||||
|
|
||||||
// VFIODevice is a vfio device meant to be passed to the hypervisor
|
// VFIODevice is a vfio device meant to be passed to the hypervisor
|
||||||
// to be used by the Virtual Machine.
|
// to be used by the Virtual Machine.
|
||||||
type VFIODevice struct {
|
type VFIODevice struct {
|
||||||
@@ -70,10 +65,17 @@ func (device *VFIODevice) Attach(ctx context.Context, devReceiver api.DeviceRece
|
|||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
device.VfioDevs, err = GetAllVFIODevicesFromIOMMUGroup(*device.DeviceInfo, false)
|
device.VfioDevs, err = GetAllVFIODevicesFromIOMMUGroup(*device.DeviceInfo)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
for _, vfio := range device.VfioDevs {
|
||||||
|
if vfio.IsPCIe {
|
||||||
|
busIndex := len(config.PCIeDevices[vfio.Port])
|
||||||
|
vfio.Bus = fmt.Sprintf("%s%d", config.PCIePortPrefixMapping[vfio.Port], busIndex)
|
||||||
|
config.PCIeDevices[vfio.Port][vfio.BDF] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
coldPlug := device.DeviceInfo.ColdPlug
|
coldPlug := device.DeviceInfo.ColdPlug
|
||||||
deviceLogger().WithField("cold-plug", coldPlug).Info("Attaching VFIO device")
|
deviceLogger().WithField("cold-plug", coldPlug).Info("Attaching VFIO device")
|
||||||
@@ -169,23 +171,18 @@ func (device *VFIODevice) Load(ds config.DeviceState) {
|
|||||||
for _, dev := range ds.VFIODevs {
|
for _, dev := range ds.VFIODevs {
|
||||||
var vfio config.VFIODev
|
var vfio config.VFIODev
|
||||||
|
|
||||||
vfioDeviceType := (*device.VfioDevs[0]).GetType()
|
switch dev.Type {
|
||||||
switch vfioDeviceType {
|
|
||||||
case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType:
|
case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType:
|
||||||
bdf := ""
|
vfio = config.VFIODev{
|
||||||
if pciDev, ok := (*dev).(config.VFIOPCIDev); ok {
|
ID: dev.ID,
|
||||||
bdf = pciDev.BDF
|
Type: config.VFIODeviceType(dev.Type),
|
||||||
}
|
BDF: dev.BDF,
|
||||||
vfio = config.VFIOPCIDev{
|
SysfsDev: dev.SysfsDev,
|
||||||
ID: *(*dev).GetID(),
|
|
||||||
Type: config.VFIODeviceType((*dev).GetType()),
|
|
||||||
BDF: bdf,
|
|
||||||
SysfsDev: *(*dev).GetSysfsDev(),
|
|
||||||
}
|
}
|
||||||
case config.VFIOAPDeviceMediatedType:
|
case config.VFIOAPDeviceMediatedType:
|
||||||
vfio = config.VFIOAPDev{
|
vfio = config.VFIODev{
|
||||||
ID: *(*dev).GetID(),
|
ID: dev.ID,
|
||||||
SysfsDev: *(*dev).GetSysfsDev(),
|
SysfsDev: dev.SysfsDev,
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
deviceLogger().WithError(
|
deviceLogger().WithError(
|
||||||
@@ -200,7 +197,7 @@ func (device *VFIODevice) Load(ds config.DeviceState) {
|
|||||||
|
|
||||||
// It should implement GetAttachCount() and DeviceID() as api.Device implementation
|
// It should implement GetAttachCount() and DeviceID() as api.Device implementation
|
||||||
// here it shares function from *GenericDevice so we don't need duplicate codes
|
// here it shares function from *GenericDevice so we don't need duplicate codes
|
||||||
func getVFIODetails(deviceFileName, iommuDevicesPath string) (deviceBDF, deviceSysfsDev string, vfioDeviceType config.VFIODeviceType, err error) {
|
func GetVFIODetails(deviceFileName, iommuDevicesPath string) (deviceBDF, deviceSysfsDev string, vfioDeviceType config.VFIODeviceType, err error) {
|
||||||
sysfsDevStr := filepath.Join(iommuDevicesPath, deviceFileName)
|
sysfsDevStr := filepath.Join(iommuDevicesPath, deviceFileName)
|
||||||
vfioDeviceType, err = GetVFIODeviceType(sysfsDevStr)
|
vfioDeviceType, err = GetVFIODeviceType(sysfsDevStr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -210,14 +207,18 @@ func getVFIODetails(deviceFileName, iommuDevicesPath string) (deviceBDF, deviceS
|
|||||||
switch vfioDeviceType {
|
switch vfioDeviceType {
|
||||||
case config.VFIOPCIDeviceNormalType:
|
case config.VFIOPCIDeviceNormalType:
|
||||||
// Get bdf of device eg. 0000:00:1c.0
|
// Get bdf of device eg. 0000:00:1c.0
|
||||||
deviceBDF = getBDF(deviceFileName)
|
// OLD IMPL: deviceBDF = getBDF(deviceFileName)
|
||||||
|
// The old implementation did not consider the case where
|
||||||
|
// vfio devices are located on different root busses. The
|
||||||
|
// kata-agent will handle the case now, here, use the full PCI addr
|
||||||
|
deviceBDF = deviceFileName
|
||||||
// Get sysfs path used by cloud-hypervisor
|
// Get sysfs path used by cloud-hypervisor
|
||||||
deviceSysfsDev = filepath.Join(config.SysBusPciDevicesPath, deviceFileName)
|
deviceSysfsDev = filepath.Join(config.SysBusPciDevicesPath, deviceFileName)
|
||||||
case config.VFIOPCIDeviceMediatedType:
|
case config.VFIOPCIDeviceMediatedType:
|
||||||
// Get sysfsdev of device eg. /sys/devices/pci0000:00/0000:00:02.0/f79944e4-5a3d-11e8-99ce-479cbab002e4
|
// Get sysfsdev of device eg. /sys/devices/pci0000:00/0000:00:02.0/f79944e4-5a3d-11e8-99ce-479cbab002e4
|
||||||
sysfsDevStr := filepath.Join(iommuDevicesPath, deviceFileName)
|
sysfsDevStr := filepath.Join(iommuDevicesPath, deviceFileName)
|
||||||
deviceSysfsDev, err = GetSysfsDev(sysfsDevStr)
|
deviceSysfsDev, err = GetSysfsDev(sysfsDevStr)
|
||||||
deviceBDF = getBDF(getMediatedBDF(deviceSysfsDev))
|
deviceBDF = GetBDF(getMediatedBDF(deviceSysfsDev))
|
||||||
case config.VFIOAPDeviceMediatedType:
|
case config.VFIOAPDeviceMediatedType:
|
||||||
sysfsDevStr := filepath.Join(iommuDevicesPath, deviceFileName)
|
sysfsDevStr := filepath.Join(iommuDevicesPath, deviceFileName)
|
||||||
deviceSysfsDev, err = GetSysfsDev(sysfsDevStr)
|
deviceSysfsDev, err = GetSysfsDev(sysfsDevStr)
|
||||||
@@ -240,7 +241,7 @@ func getMediatedBDF(deviceSysfsDev string) string {
|
|||||||
|
|
||||||
// getBDF returns the BDF of pci device
|
// getBDF returns the BDF of pci device
|
||||||
// Expected input string format is [<domain>]:[<bus>][<slot>].[<func>] eg. 0000:02:10.0
|
// Expected input string format is [<domain>]:[<bus>][<slot>].[<func>] eg. 0000:02:10.0
|
||||||
func getBDF(deviceSysStr string) string {
|
func GetBDF(deviceSysStr string) string {
|
||||||
tokens := strings.SplitN(deviceSysStr, ":", 2)
|
tokens := strings.SplitN(deviceSysStr, ":", 2)
|
||||||
if len(tokens) == 1 {
|
if len(tokens) == 1 {
|
||||||
return ""
|
return ""
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ func TestGetVFIODetails(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
data := []testData{
|
data := []testData{
|
||||||
{"0000:02:10.0", "02:10.0"},
|
{"0000:02:10.0", "0000:02:10.0"},
|
||||||
{"0000:0210.0", ""},
|
{"0000:0210.0", ""},
|
||||||
{"f79944e4-5a3d-11e8-99ce-", ""},
|
{"f79944e4-5a3d-11e8-99ce-", ""},
|
||||||
{"f79944e4-5a3d-11e8-99ce", ""},
|
{"f79944e4-5a3d-11e8-99ce", ""},
|
||||||
@@ -29,7 +29,7 @@ func TestGetVFIODetails(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, d := range data {
|
for _, d := range data {
|
||||||
deviceBDF, deviceSysfsDev, vfioDeviceType, err := getVFIODetails(d.deviceStr, "")
|
deviceBDF, deviceSysfsDev, vfioDeviceType, err := GetVFIODetails(d.deviceStr, "")
|
||||||
|
|
||||||
switch vfioDeviceType {
|
switch vfioDeviceType {
|
||||||
case config.VFIOPCIDeviceNormalType:
|
case config.VFIOPCIDeviceNormalType:
|
||||||
|
|||||||
@@ -71,7 +71,11 @@ func NewDeviceManager(blockDriver string, vhostUserStoreEnabled bool, vhostUserS
|
|||||||
dm.blockDriver = config.VirtioSCSI
|
dm.blockDriver = config.VirtioSCSI
|
||||||
}
|
}
|
||||||
|
|
||||||
drivers.AllPCIeDevs = make(map[string]bool)
|
config.PCIeDevices = make(map[config.PCIePort]config.PCIePortMapping)
|
||||||
|
|
||||||
|
config.PCIeDevices[config.RootPort] = make(map[string]bool)
|
||||||
|
config.PCIeDevices[config.SwitchPort] = make(map[string]bool)
|
||||||
|
config.PCIeDevices[config.BridgePort] = make(map[string]bool)
|
||||||
|
|
||||||
for _, dev := range devices {
|
for _, dev := range devices {
|
||||||
dm.devices[dev.DeviceID()] = dev
|
dm.devices[dev.DeviceID()] = dev
|
||||||
@@ -118,7 +122,7 @@ func (dm *deviceManager) createDevice(devInfo config.DeviceInfo) (dev api.Device
|
|||||||
}
|
}
|
||||||
if IsVFIO(devInfo.HostPath) {
|
if IsVFIO(devInfo.HostPath) {
|
||||||
return drivers.NewVFIODevice(&devInfo), nil
|
return drivers.NewVFIODevice(&devInfo), nil
|
||||||
} else if isVhostUserBlk(devInfo) {
|
} else if IsVhostUserBlk(devInfo) {
|
||||||
if devInfo.DriverOptions == nil {
|
if devInfo.DriverOptions == nil {
|
||||||
devInfo.DriverOptions = make(map[string]string)
|
devInfo.DriverOptions = make(map[string]string)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -116,14 +116,14 @@ func TestAttachVFIODevice(t *testing.T) {
|
|||||||
_, err = os.Create(deviceConfigFile)
|
_, err = os.Create(deviceConfigFile)
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
|
|
||||||
savedIOMMUPath := config.SysIOMMUPath
|
savedIOMMUPath := config.SysIOMMUGroupPath
|
||||||
config.SysIOMMUPath = tmpDir
|
config.SysIOMMUGroupPath = tmpDir
|
||||||
|
|
||||||
savedSysBusPciDevicesPath := config.SysBusPciDevicesPath
|
savedSysBusPciDevicesPath := config.SysBusPciDevicesPath
|
||||||
config.SysBusPciDevicesPath = devicesDir
|
config.SysBusPciDevicesPath = devicesDir
|
||||||
|
|
||||||
defer func() {
|
defer func() {
|
||||||
config.SysIOMMUPath = savedIOMMUPath
|
config.SysIOMMUGroupPath = savedIOMMUPath
|
||||||
config.SysBusPciDevicesPath = savedSysBusPciDevicesPath
|
config.SysBusPciDevicesPath = savedSysBusPciDevicesPath
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ func isBlock(devInfo config.DeviceInfo) bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// isVhostUserBlk checks if the device is a VhostUserBlk device.
|
// isVhostUserBlk checks if the device is a VhostUserBlk device.
|
||||||
func isVhostUserBlk(devInfo config.DeviceInfo) bool {
|
func IsVhostUserBlk(devInfo config.DeviceInfo) bool {
|
||||||
return devInfo.DevType == "b" && devInfo.Major == config.VhostUserBlkMajor
|
return devInfo.DevType == "b" && devInfo.Major == config.VhostUserBlkMajor
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -70,7 +70,7 @@ func TestIsVhostUserBlk(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, d := range data {
|
for _, d := range data {
|
||||||
isVhostUserBlk := isVhostUserBlk(
|
isVhostUserBlk := IsVhostUserBlk(
|
||||||
config.DeviceInfo{
|
config.DeviceInfo{
|
||||||
DevType: d.devType,
|
DevType: d.devType,
|
||||||
Major: d.major,
|
Major: d.major,
|
||||||
|
|||||||
@@ -123,6 +123,14 @@ const (
|
|||||||
// PCIeRootPort is a PCIe Root Port, the PCIe device should be hotplugged to this port.
|
// PCIeRootPort is a PCIe Root Port, the PCIe device should be hotplugged to this port.
|
||||||
PCIeRootPort DeviceDriver = "pcie-root-port"
|
PCIeRootPort DeviceDriver = "pcie-root-port"
|
||||||
|
|
||||||
|
// PCIeSwitchUpstreamPort is a PCIe switch upstream port
|
||||||
|
// A upstream port connects to a PCIe Root Port
|
||||||
|
PCIeSwitchUpstreamPort DeviceDriver = "x3130-upstream"
|
||||||
|
|
||||||
|
// PCIeSwitchDownstreamPort is a PCIe switch downstream port
|
||||||
|
// PCIe devices can be hot-plugged to the downstream port.
|
||||||
|
PCIeSwitchDownstreamPort DeviceDriver = "xio3130-downstream"
|
||||||
|
|
||||||
// Loader is the Loader device driver.
|
// Loader is the Loader device driver.
|
||||||
Loader DeviceDriver = "loader"
|
Loader DeviceDriver = "loader"
|
||||||
|
|
||||||
@@ -236,6 +244,7 @@ const (
|
|||||||
|
|
||||||
// SecExecGuest represents an s390x Secure Execution (Protected Virtualization in QEMU) object
|
// SecExecGuest represents an s390x Secure Execution (Protected Virtualization in QEMU) object
|
||||||
SecExecGuest ObjectType = "s390-pv-guest"
|
SecExecGuest ObjectType = "s390-pv-guest"
|
||||||
|
|
||||||
// PEFGuest represent ppc64le PEF(Protected Execution Facility) object.
|
// PEFGuest represent ppc64le PEF(Protected Execution Facility) object.
|
||||||
PEFGuest ObjectType = "pef-guest"
|
PEFGuest ObjectType = "pef-guest"
|
||||||
)
|
)
|
||||||
@@ -369,7 +378,6 @@ func (object Object) QemuParams(config *Config) []string {
|
|||||||
deviceParams = append(deviceParams, string(object.Driver))
|
deviceParams = append(deviceParams, string(object.Driver))
|
||||||
deviceParams = append(deviceParams, fmt.Sprintf("id=%s", object.DeviceID))
|
deviceParams = append(deviceParams, fmt.Sprintf("id=%s", object.DeviceID))
|
||||||
deviceParams = append(deviceParams, fmt.Sprintf("host-path=%s", object.File))
|
deviceParams = append(deviceParams, fmt.Sprintf("host-path=%s", object.File))
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(deviceParams) > 0 {
|
if len(deviceParams) > 0 {
|
||||||
@@ -1681,6 +1689,106 @@ func (b PCIeRootPortDevice) Valid() bool {
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// PCIeSwitchUpstreamPortDevice is the port connecting to the root port
|
||||||
|
type PCIeSwitchUpstreamPortDevice struct {
|
||||||
|
ID string // format: sup{n}, n>=0
|
||||||
|
Bus string // default is rp0
|
||||||
|
}
|
||||||
|
|
||||||
|
// QemuParams returns the qemu parameters built out of the PCIeSwitchUpstreamPortDevice.
|
||||||
|
func (b PCIeSwitchUpstreamPortDevice) QemuParams(config *Config) []string {
|
||||||
|
var qemuParams []string
|
||||||
|
var deviceParams []string
|
||||||
|
|
||||||
|
driver := PCIeSwitchUpstreamPort
|
||||||
|
|
||||||
|
deviceParams = append(deviceParams, fmt.Sprintf("%s,id=%s", driver, b.ID))
|
||||||
|
deviceParams = append(deviceParams, fmt.Sprintf("bus=%s", b.Bus))
|
||||||
|
|
||||||
|
qemuParams = append(qemuParams, "-device")
|
||||||
|
qemuParams = append(qemuParams, strings.Join(deviceParams, ","))
|
||||||
|
return qemuParams
|
||||||
|
}
|
||||||
|
|
||||||
|
// Valid returns true if the PCIeSwitchUpstreamPortDevice structure is valid and complete.
|
||||||
|
func (b PCIeSwitchUpstreamPortDevice) Valid() bool {
|
||||||
|
if b.ID == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if b.Bus == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// PCIeSwitchDownstreamPortDevice is the port connecting to the root port
|
||||||
|
type PCIeSwitchDownstreamPortDevice struct {
|
||||||
|
ID string // format: sup{n}, n>=0
|
||||||
|
Bus string // default is rp0
|
||||||
|
Chassis string // (slot, chassis) pair is mandatory and must be unique for each downstream port, >=0, default is 0x00
|
||||||
|
Slot string // >=0, default is 0x00
|
||||||
|
// This to work needs patches to QEMU
|
||||||
|
BusReserve string
|
||||||
|
// Pref64 and Pref32 are not allowed to be set simultaneously
|
||||||
|
Pref64Reserve string // reserve prefetched MMIO aperture, 64-bit
|
||||||
|
Pref32Reserve string // reserve prefetched MMIO aperture, 32-bit
|
||||||
|
MemReserve string // reserve non-prefetched MMIO aperture, 32-bit *only*
|
||||||
|
IOReserve string // IO reservation
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// QemuParams returns the qemu parameters built out of the PCIeSwitchUpstreamPortDevice.
|
||||||
|
func (b PCIeSwitchDownstreamPortDevice) QemuParams(config *Config) []string {
|
||||||
|
var qemuParams []string
|
||||||
|
var deviceParams []string
|
||||||
|
driver := PCIeSwitchDownstreamPort
|
||||||
|
|
||||||
|
deviceParams = append(deviceParams, fmt.Sprintf("%s,id=%s", driver, b.ID))
|
||||||
|
deviceParams = append(deviceParams, fmt.Sprintf("bus=%s", b.Bus))
|
||||||
|
deviceParams = append(deviceParams, fmt.Sprintf("chassis=%s", b.Chassis))
|
||||||
|
deviceParams = append(deviceParams, fmt.Sprintf("slot=%s", b.Slot))
|
||||||
|
if b.BusReserve != "" {
|
||||||
|
deviceParams = append(deviceParams, fmt.Sprintf("bus-reserve=%s", b.BusReserve))
|
||||||
|
}
|
||||||
|
|
||||||
|
if b.Pref64Reserve != "" {
|
||||||
|
deviceParams = append(deviceParams, fmt.Sprintf("pref64-reserve=%s", b.Pref64Reserve))
|
||||||
|
}
|
||||||
|
|
||||||
|
if b.Pref32Reserve != "" {
|
||||||
|
deviceParams = append(deviceParams, fmt.Sprintf("pref32-reserve=%s", b.Pref32Reserve))
|
||||||
|
}
|
||||||
|
|
||||||
|
if b.MemReserve != "" {
|
||||||
|
deviceParams = append(deviceParams, fmt.Sprintf("mem-reserve=%s", b.MemReserve))
|
||||||
|
}
|
||||||
|
|
||||||
|
if b.IOReserve != "" {
|
||||||
|
deviceParams = append(deviceParams, fmt.Sprintf("io-reserve=%s", b.IOReserve))
|
||||||
|
}
|
||||||
|
|
||||||
|
qemuParams = append(qemuParams, "-device")
|
||||||
|
qemuParams = append(qemuParams, strings.Join(deviceParams, ","))
|
||||||
|
return qemuParams
|
||||||
|
}
|
||||||
|
|
||||||
|
// Valid returns true if the PCIeSwitchUpstremPortDevice structure is valid and complete.
|
||||||
|
func (b PCIeSwitchDownstreamPortDevice) Valid() bool {
|
||||||
|
if b.ID == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if b.Bus == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if b.Chassis == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if b.Slot == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
// VFIODevice represents a qemu vfio device meant for direct access by guest OS.
|
// VFIODevice represents a qemu vfio device meant for direct access by guest OS.
|
||||||
type VFIODevice struct {
|
type VFIODevice struct {
|
||||||
// Bus-Device-Function of device
|
// Bus-Device-Function of device
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
|
|
||||||
package hypervisors
|
package hypervisors
|
||||||
|
|
||||||
import "fmt"
|
import "github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
|
||||||
|
|
||||||
// Bridge is a bridge where devices can be hot plugged
|
// Bridge is a bridge where devices can be hot plugged
|
||||||
type Bridge struct {
|
type Bridge struct {
|
||||||
@@ -28,37 +28,8 @@ type CPUDevice struct {
|
|||||||
ID string
|
ID string
|
||||||
}
|
}
|
||||||
|
|
||||||
// PCIePort distinguish only between root and switch port
|
|
||||||
type PCIePort string
|
|
||||||
|
|
||||||
const (
|
|
||||||
// RootPort attach VFIO devices to a root-port
|
|
||||||
RootPort PCIePort = "root-port"
|
|
||||||
// SwitchPort attach VFIO devices to a switch-port
|
|
||||||
SwitchPort = "switch-port"
|
|
||||||
// BridgePort is the default
|
|
||||||
BridgePort = "bridge-port"
|
|
||||||
// NoPort is for disabling VFIO hotplug/coldplug
|
|
||||||
NoPort = "no-port"
|
|
||||||
)
|
|
||||||
|
|
||||||
func (p PCIePort) String() string {
|
|
||||||
switch p {
|
|
||||||
case RootPort:
|
|
||||||
return "root-port"
|
|
||||||
case SwitchPort:
|
|
||||||
return "switch-port"
|
|
||||||
case BridgePort:
|
|
||||||
return "bridge-port"
|
|
||||||
case NoPort:
|
|
||||||
return "no-port"
|
|
||||||
}
|
|
||||||
return fmt.Sprintf("<unknown PCIePort: %s>", string(p))
|
|
||||||
}
|
|
||||||
|
|
||||||
type HypervisorState struct {
|
type HypervisorState struct {
|
||||||
BlockIndexMap map[int]struct{}
|
BlockIndexMap map[int]struct{}
|
||||||
|
|
||||||
// Type of hypervisor, E.g. qemu/firecracker/acrn.
|
// Type of hypervisor, E.g. qemu/firecracker/acrn.
|
||||||
Type string
|
Type string
|
||||||
UUID string
|
UUID string
|
||||||
@@ -74,7 +45,7 @@ type HypervisorState struct {
|
|||||||
HotpluggedMemory int
|
HotpluggedMemory int
|
||||||
VirtiofsDaemonPid int
|
VirtiofsDaemonPid int
|
||||||
Pid int
|
Pid int
|
||||||
PCIeRootPort int
|
HotPlugVFIO config.PCIePort
|
||||||
ColdPlugVFIO PCIePort
|
ColdPlugVFIO config.PCIePort
|
||||||
HotplugVFIOOnRootBus bool
|
HotplugVFIOOnRootBus bool
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ import (
|
|||||||
"strconv"
|
"strconv"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors"
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
|
||||||
"github.com/opencontainers/runtime-spec/specs-go"
|
"github.com/opencontainers/runtime-spec/specs-go"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
)
|
)
|
||||||
@@ -224,8 +224,8 @@ type RuntimeConfigOptions struct {
|
|||||||
JaegerUser string
|
JaegerUser string
|
||||||
JaegerPassword string
|
JaegerPassword string
|
||||||
PFlash []string
|
PFlash []string
|
||||||
PCIeRootPort uint32
|
HotPlugVFIO config.PCIePort
|
||||||
ColdPlugVFIO hv.PCIePort
|
ColdPlugVFIO config.PCIePort
|
||||||
DefaultVCPUCount uint32
|
DefaultVCPUCount uint32
|
||||||
DefaultMaxVCPUCount uint32
|
DefaultMaxVCPUCount uint32
|
||||||
DefaultMemSize uint32
|
DefaultMemSize uint32
|
||||||
@@ -318,7 +318,6 @@ func MakeRuntimeConfigFileData(config RuntimeConfigOptions) string {
|
|||||||
disable_block_device_use = ` + strconv.FormatBool(config.DisableBlock) + `
|
disable_block_device_use = ` + strconv.FormatBool(config.DisableBlock) + `
|
||||||
enable_iothreads = ` + strconv.FormatBool(config.EnableIOThreads) + `
|
enable_iothreads = ` + strconv.FormatBool(config.EnableIOThreads) + `
|
||||||
hotplug_vfio_on_root_bus = ` + strconv.FormatBool(config.HotplugVFIOOnRootBus) + `
|
hotplug_vfio_on_root_bus = ` + strconv.FormatBool(config.HotplugVFIOOnRootBus) + `
|
||||||
pcie_root_port = ` + strconv.FormatUint(uint64(config.PCIeRootPort), 10) + `
|
|
||||||
cold_plug_vfio = "` + config.ColdPlugVFIO.String() + `"
|
cold_plug_vfio = "` + config.ColdPlugVFIO.String() + `"
|
||||||
msize_9p = ` + strconv.FormatUint(uint64(config.DefaultMsize9p), 10) + `
|
msize_9p = ` + strconv.FormatUint(uint64(config.DefaultMsize9p), 10) + `
|
||||||
enable_debug = ` + strconv.FormatBool(config.HypervisorDebug) + `
|
enable_debug = ` + strconv.FormatBool(config.HypervisorDebug) + `
|
||||||
|
|||||||
@@ -10,7 +10,7 @@
|
|||||||
package katautils
|
package katautils
|
||||||
|
|
||||||
import (
|
import (
|
||||||
hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors"
|
config "github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
|
||||||
)
|
)
|
||||||
|
|
||||||
// name is the name of the runtime
|
// name is the name of the runtime
|
||||||
@@ -82,7 +82,6 @@ const defaultEnableDebug bool = false
|
|||||||
const defaultDisableNestingChecks bool = false
|
const defaultDisableNestingChecks bool = false
|
||||||
const defaultMsize9p uint32 = 8192
|
const defaultMsize9p uint32 = 8192
|
||||||
const defaultHotplugVFIOOnRootBus bool = false
|
const defaultHotplugVFIOOnRootBus bool = false
|
||||||
const defaultPCIeRootPort = 0
|
|
||||||
const defaultEntropySource = "/dev/urandom"
|
const defaultEntropySource = "/dev/urandom"
|
||||||
const defaultGuestHookPath string = ""
|
const defaultGuestHookPath string = ""
|
||||||
const defaultVirtioFSCacheMode = "never"
|
const defaultVirtioFSCacheMode = "never"
|
||||||
@@ -108,4 +107,5 @@ const defaultVMCacheEndpoint string = "/var/run/kata-containers/cache.sock"
|
|||||||
// Default config file used by stateless systems.
|
// Default config file used by stateless systems.
|
||||||
var defaultRuntimeConfiguration = "@CONFIG_PATH@"
|
var defaultRuntimeConfiguration = "@CONFIG_PATH@"
|
||||||
|
|
||||||
const defaultColdPlugVFIO = hv.NoPort
|
const defaultHotPlugVFIO = config.NoPort
|
||||||
|
const defaultColdPlugVFIO = config.NoPort
|
||||||
|
|||||||
@@ -20,7 +20,6 @@ import (
|
|||||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
|
||||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/govmm"
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/govmm"
|
||||||
govmmQemu "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm/qemu"
|
govmmQemu "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm/qemu"
|
||||||
hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors"
|
|
||||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace"
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace"
|
||||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/oci"
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/oci"
|
||||||
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
|
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
|
||||||
@@ -131,7 +130,6 @@ type hypervisor struct {
|
|||||||
MemSlots uint32 `toml:"memory_slots"`
|
MemSlots uint32 `toml:"memory_slots"`
|
||||||
DefaultBridges uint32 `toml:"default_bridges"`
|
DefaultBridges uint32 `toml:"default_bridges"`
|
||||||
Msize9p uint32 `toml:"msize_9p"`
|
Msize9p uint32 `toml:"msize_9p"`
|
||||||
PCIeRootPort uint32 `toml:"pcie_root_port"`
|
|
||||||
NumVCPUs int32 `toml:"default_vcpus"`
|
NumVCPUs int32 `toml:"default_vcpus"`
|
||||||
BlockDeviceCacheSet bool `toml:"block_device_cache_set"`
|
BlockDeviceCacheSet bool `toml:"block_device_cache_set"`
|
||||||
BlockDeviceCacheDirect bool `toml:"block_device_cache_direct"`
|
BlockDeviceCacheDirect bool `toml:"block_device_cache_direct"`
|
||||||
@@ -149,7 +147,8 @@ type hypervisor struct {
|
|||||||
EnableIOThreads bool `toml:"enable_iothreads"`
|
EnableIOThreads bool `toml:"enable_iothreads"`
|
||||||
DisableImageNvdimm bool `toml:"disable_image_nvdimm"`
|
DisableImageNvdimm bool `toml:"disable_image_nvdimm"`
|
||||||
HotplugVFIOOnRootBus bool `toml:"hotplug_vfio_on_root_bus"`
|
HotplugVFIOOnRootBus bool `toml:"hotplug_vfio_on_root_bus"`
|
||||||
ColdPlugVFIO hv.PCIePort `toml:"cold_plug_vfio"`
|
HotPlugVFIO config.PCIePort `toml:"hot_plug_vfio"`
|
||||||
|
ColdPlugVFIO config.PCIePort `toml:"cold_plug_vfio"`
|
||||||
DisableVhostNet bool `toml:"disable_vhost_net"`
|
DisableVhostNet bool `toml:"disable_vhost_net"`
|
||||||
GuestMemoryDumpPaging bool `toml:"guest_memory_dump_paging"`
|
GuestMemoryDumpPaging bool `toml:"guest_memory_dump_paging"`
|
||||||
ConfidentialGuest bool `toml:"confidential_guest"`
|
ConfidentialGuest bool `toml:"confidential_guest"`
|
||||||
@@ -287,12 +286,18 @@ func (h hypervisor) firmware() (string, error) {
|
|||||||
return ResolvePath(p)
|
return ResolvePath(p)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (h hypervisor) coldPlugVFIO() hv.PCIePort {
|
func (h hypervisor) coldPlugVFIO() config.PCIePort {
|
||||||
if h.ColdPlugVFIO == "" {
|
if h.ColdPlugVFIO == "" {
|
||||||
return defaultColdPlugVFIO
|
return defaultColdPlugVFIO
|
||||||
}
|
}
|
||||||
return h.ColdPlugVFIO
|
return h.ColdPlugVFIO
|
||||||
}
|
}
|
||||||
|
func (h hypervisor) hotPlugVFIO() config.PCIePort {
|
||||||
|
if h.HotPlugVFIO == "" {
|
||||||
|
return defaultHotPlugVFIO
|
||||||
|
}
|
||||||
|
return h.HotPlugVFIO
|
||||||
|
}
|
||||||
|
|
||||||
func (h hypervisor) firmwareVolume() (string, error) {
|
func (h hypervisor) firmwareVolume() (string, error) {
|
||||||
p := h.FirmwareVolume
|
p := h.FirmwareVolume
|
||||||
@@ -863,8 +868,8 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
|
|||||||
Msize9p: h.msize9p(),
|
Msize9p: h.msize9p(),
|
||||||
DisableImageNvdimm: h.DisableImageNvdimm,
|
DisableImageNvdimm: h.DisableImageNvdimm,
|
||||||
HotplugVFIOOnRootBus: h.HotplugVFIOOnRootBus,
|
HotplugVFIOOnRootBus: h.HotplugVFIOOnRootBus,
|
||||||
|
HotPlugVFIO: h.hotPlugVFIO(),
|
||||||
ColdPlugVFIO: h.coldPlugVFIO(),
|
ColdPlugVFIO: h.coldPlugVFIO(),
|
||||||
PCIeRootPort: h.PCIeRootPort,
|
|
||||||
DisableVhostNet: h.DisableVhostNet,
|
DisableVhostNet: h.DisableVhostNet,
|
||||||
EnableVhostUserStore: h.EnableVhostUserStore,
|
EnableVhostUserStore: h.EnableVhostUserStore,
|
||||||
VhostUserStorePath: h.vhostUserStorePath(),
|
VhostUserStorePath: h.vhostUserStorePath(),
|
||||||
@@ -1060,7 +1065,7 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
|
|||||||
Msize9p: h.msize9p(),
|
Msize9p: h.msize9p(),
|
||||||
HotplugVFIOOnRootBus: h.HotplugVFIOOnRootBus,
|
HotplugVFIOOnRootBus: h.HotplugVFIOOnRootBus,
|
||||||
ColdPlugVFIO: h.coldPlugVFIO(),
|
ColdPlugVFIO: h.coldPlugVFIO(),
|
||||||
PCIeRootPort: h.PCIeRootPort,
|
HotPlugVFIO: h.hotPlugVFIO(),
|
||||||
DisableVhostNet: true,
|
DisableVhostNet: true,
|
||||||
GuestHookPath: h.guestHookPath(),
|
GuestHookPath: h.guestHookPath(),
|
||||||
VirtioFSExtraArgs: h.VirtioFSExtraArgs,
|
VirtioFSExtraArgs: h.VirtioFSExtraArgs,
|
||||||
@@ -1291,7 +1296,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig {
|
|||||||
Msize9p: defaultMsize9p,
|
Msize9p: defaultMsize9p,
|
||||||
HotplugVFIOOnRootBus: defaultHotplugVFIOOnRootBus,
|
HotplugVFIOOnRootBus: defaultHotplugVFIOOnRootBus,
|
||||||
ColdPlugVFIO: defaultColdPlugVFIO,
|
ColdPlugVFIO: defaultColdPlugVFIO,
|
||||||
PCIeRootPort: defaultPCIeRootPort,
|
HotPlugVFIO: defaultHotPlugVFIO,
|
||||||
GuestHookPath: defaultGuestHookPath,
|
GuestHookPath: defaultGuestHookPath,
|
||||||
VhostUserStorePath: defaultVhostUserStorePath,
|
VhostUserStorePath: defaultVhostUserStorePath,
|
||||||
VhostUserDeviceReconnect: defaultVhostUserDeviceReconnect,
|
VhostUserDeviceReconnect: defaultVhostUserDeviceReconnect,
|
||||||
@@ -1663,9 +1668,10 @@ func checkConfig(config oci.RuntimeConfig) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
hotPlugVFIO := config.HypervisorConfig.HotPlugVFIO
|
||||||
coldPlugVFIO := config.HypervisorConfig.ColdPlugVFIO
|
coldPlugVFIO := config.HypervisorConfig.ColdPlugVFIO
|
||||||
machineType := config.HypervisorConfig.HypervisorMachineType
|
machineType := config.HypervisorConfig.HypervisorMachineType
|
||||||
if err := checkPCIeConfig(coldPlugVFIO, machineType); err != nil {
|
if err := checkPCIeConfig(coldPlugVFIO, hotPlugVFIO, machineType); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1675,18 +1681,32 @@ func checkConfig(config oci.RuntimeConfig) error {
|
|||||||
// checkPCIeConfig ensures the PCIe configuration is valid.
|
// checkPCIeConfig ensures the PCIe configuration is valid.
|
||||||
// Only allow one of the following settings for cold-plug:
|
// Only allow one of the following settings for cold-plug:
|
||||||
// no-port, root-port, switch-port
|
// no-port, root-port, switch-port
|
||||||
func checkPCIeConfig(vfioPort hv.PCIePort, machineType string) error {
|
func checkPCIeConfig(coldPlug config.PCIePort, hotPlug config.PCIePort, machineType string) error {
|
||||||
// Currently only QEMU q35 supports advanced PCIe topologies
|
// Currently only QEMU q35 supports advanced PCIe topologies
|
||||||
// firecracker, dragonball do not have right now any PCIe support
|
// firecracker, dragonball do not have right now any PCIe support
|
||||||
if machineType != "q35" {
|
if machineType != "q35" {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
if vfioPort == hv.NoPort || vfioPort == hv.RootPort || vfioPort == hv.SwitchPort {
|
|
||||||
|
if coldPlug != config.NoPort && hotPlug != config.NoPort {
|
||||||
|
return fmt.Errorf("invalid hot-plug=%s and cold-plug=%s settings, only one of them can be set", coldPlug, hotPlug)
|
||||||
|
}
|
||||||
|
if coldPlug == config.NoPort && hotPlug == config.NoPort {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
var port config.PCIePort
|
||||||
|
if coldPlug != config.NoPort {
|
||||||
|
port = coldPlug
|
||||||
|
}
|
||||||
|
if hotPlug != config.NoPort {
|
||||||
|
port = hotPlug
|
||||||
|
}
|
||||||
|
if port == config.NoPort || port == config.BridgePort || port == config.RootPort || port == config.SwitchPort {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return fmt.Errorf("invalid vfio_port=%s setting, allowed values %s, %s, %s",
|
return fmt.Errorf("invalid vfio_port=%s setting, allowed values %s, %s, %s, %s",
|
||||||
vfioPort, hv.NoPort, hv.RootPort, hv.SwitchPort)
|
coldPlug, config.NoPort, config.BridgePort, config.RootPort, config.SwitchPort)
|
||||||
}
|
}
|
||||||
|
|
||||||
// checkNetNsConfig performs sanity checks on disable_new_netns config.
|
// checkNetNsConfig performs sanity checks on disable_new_netns config.
|
||||||
|
|||||||
@@ -18,8 +18,8 @@ import (
|
|||||||
"syscall"
|
"syscall"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
|
||||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/govmm"
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/govmm"
|
||||||
hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors"
|
|
||||||
ktu "github.com/kata-containers/kata-containers/src/runtime/pkg/katatestutils"
|
ktu "github.com/kata-containers/kata-containers/src/runtime/pkg/katatestutils"
|
||||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/oci"
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/oci"
|
||||||
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
|
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
|
||||||
@@ -63,15 +63,16 @@ func createConfig(configPath string, fileData string) error {
|
|||||||
|
|
||||||
// createAllRuntimeConfigFiles creates all files necessary to call
|
// createAllRuntimeConfigFiles creates all files necessary to call
|
||||||
// loadConfiguration().
|
// loadConfiguration().
|
||||||
func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConfig, err error) {
|
func createAllRuntimeConfigFiles(dir, hypervisor string) (testConfig testRuntimeConfig, err error) {
|
||||||
if dir == "" {
|
if dir == "" {
|
||||||
return config, fmt.Errorf("BUG: need directory")
|
return testConfig, fmt.Errorf("BUG: need directory")
|
||||||
}
|
}
|
||||||
|
|
||||||
if hypervisor == "" {
|
if hypervisor == "" {
|
||||||
return config, fmt.Errorf("BUG: need hypervisor")
|
return testConfig, fmt.Errorf("BUG: need hypervisor")
|
||||||
}
|
}
|
||||||
var coldPlugVFIO hv.PCIePort
|
var hotPlugVFIO config.PCIePort
|
||||||
|
var coldPlugVFIO config.PCIePort
|
||||||
hypervisorPath := path.Join(dir, "hypervisor")
|
hypervisorPath := path.Join(dir, "hypervisor")
|
||||||
kernelPath := path.Join(dir, "kernel")
|
kernelPath := path.Join(dir, "kernel")
|
||||||
kernelParams := "foo=bar xyz"
|
kernelParams := "foo=bar xyz"
|
||||||
@@ -85,8 +86,8 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
|
|||||||
blockDeviceAIO := "io_uring"
|
blockDeviceAIO := "io_uring"
|
||||||
enableIOThreads := true
|
enableIOThreads := true
|
||||||
hotplugVFIOOnRootBus := true
|
hotplugVFIOOnRootBus := true
|
||||||
pcieRootPort := uint32(2)
|
hotPlugVFIO = config.NoPort
|
||||||
coldPlugVFIO = hv.RootPort
|
coldPlugVFIO = config.BridgePort
|
||||||
disableNewNetNs := false
|
disableNewNetNs := false
|
||||||
sharedFS := "virtio-9p"
|
sharedFS := "virtio-9p"
|
||||||
virtioFSdaemon := path.Join(dir, "virtiofsd")
|
virtioFSdaemon := path.Join(dir, "virtiofsd")
|
||||||
@@ -108,7 +109,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
|
|||||||
BlockDeviceAIO: blockDeviceAIO,
|
BlockDeviceAIO: blockDeviceAIO,
|
||||||
EnableIOThreads: enableIOThreads,
|
EnableIOThreads: enableIOThreads,
|
||||||
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
|
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
|
||||||
PCIeRootPort: pcieRootPort,
|
HotPlugVFIO: hotPlugVFIO,
|
||||||
ColdPlugVFIO: coldPlugVFIO,
|
ColdPlugVFIO: coldPlugVFIO,
|
||||||
DisableNewNetNs: disableNewNetNs,
|
DisableNewNetNs: disableNewNetNs,
|
||||||
DefaultVCPUCount: defaultVCPUCount,
|
DefaultVCPUCount: defaultVCPUCount,
|
||||||
@@ -134,7 +135,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
|
|||||||
configPath := path.Join(dir, "runtime.toml")
|
configPath := path.Join(dir, "runtime.toml")
|
||||||
err = createConfig(configPath, runtimeConfigFileData)
|
err = createConfig(configPath, runtimeConfigFileData)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return config, err
|
return testConfig, err
|
||||||
}
|
}
|
||||||
|
|
||||||
configPathLink := path.Join(filepath.Dir(configPath), "link-to-configuration.toml")
|
configPathLink := path.Join(filepath.Dir(configPath), "link-to-configuration.toml")
|
||||||
@@ -142,7 +143,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
|
|||||||
// create a link to the config file
|
// create a link to the config file
|
||||||
err = syscall.Symlink(configPath, configPathLink)
|
err = syscall.Symlink(configPath, configPathLink)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return config, err
|
return testConfig, err
|
||||||
}
|
}
|
||||||
|
|
||||||
files := []string{hypervisorPath, kernelPath, imagePath}
|
files := []string{hypervisorPath, kernelPath, imagePath}
|
||||||
@@ -151,7 +152,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
|
|||||||
// create the resource (which must be >0 bytes)
|
// create the resource (which must be >0 bytes)
|
||||||
err := WriteFile(file, "foo", testFileMode)
|
err := WriteFile(file, "foo", testFileMode)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return config, err
|
return testConfig, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -172,7 +173,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
|
|||||||
DefaultBridges: defaultBridgesCount,
|
DefaultBridges: defaultBridgesCount,
|
||||||
EnableIOThreads: enableIOThreads,
|
EnableIOThreads: enableIOThreads,
|
||||||
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
|
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
|
||||||
PCIeRootPort: pcieRootPort,
|
HotPlugVFIO: hotPlugVFIO,
|
||||||
ColdPlugVFIO: coldPlugVFIO,
|
ColdPlugVFIO: coldPlugVFIO,
|
||||||
Msize9p: defaultMsize9p,
|
Msize9p: defaultMsize9p,
|
||||||
MemSlots: defaultMemSlots,
|
MemSlots: defaultMemSlots,
|
||||||
@@ -216,10 +217,10 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
|
|||||||
|
|
||||||
err = SetKernelParams(&runtimeConfig)
|
err = SetKernelParams(&runtimeConfig)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return config, err
|
return testConfig, err
|
||||||
}
|
}
|
||||||
|
|
||||||
config = testRuntimeConfig{
|
rtimeConfig := testRuntimeConfig{
|
||||||
RuntimeConfig: runtimeConfig,
|
RuntimeConfig: runtimeConfig,
|
||||||
RuntimeConfigFile: configPath,
|
RuntimeConfigFile: configPath,
|
||||||
ConfigPath: configPath,
|
ConfigPath: configPath,
|
||||||
@@ -228,7 +229,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
|
|||||||
LogPath: logPath,
|
LogPath: logPath,
|
||||||
}
|
}
|
||||||
|
|
||||||
return config, nil
|
return rtimeConfig, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// testLoadConfiguration accepts an optional function that can be used
|
// testLoadConfiguration accepts an optional function that can be used
|
||||||
@@ -568,6 +569,7 @@ func TestMinimalRuntimeConfig(t *testing.T) {
|
|||||||
VirtioFSCache: defaultVirtioFSCacheMode,
|
VirtioFSCache: defaultVirtioFSCacheMode,
|
||||||
BlockDeviceAIO: defaultBlockDeviceAIO,
|
BlockDeviceAIO: defaultBlockDeviceAIO,
|
||||||
DisableGuestSeLinux: defaultDisableGuestSeLinux,
|
DisableGuestSeLinux: defaultDisableGuestSeLinux,
|
||||||
|
HotPlugVFIO: defaultHotPlugVFIO,
|
||||||
ColdPlugVFIO: defaultColdPlugVFIO,
|
ColdPlugVFIO: defaultColdPlugVFIO,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -602,7 +604,7 @@ func TestMinimalRuntimeConfig(t *testing.T) {
|
|||||||
|
|
||||||
func TestNewQemuHypervisorConfig(t *testing.T) {
|
func TestNewQemuHypervisorConfig(t *testing.T) {
|
||||||
dir := t.TempDir()
|
dir := t.TempDir()
|
||||||
var coldPlugVFIO hv.PCIePort
|
var coldPlugVFIO config.PCIePort
|
||||||
hypervisorPath := path.Join(dir, "hypervisor")
|
hypervisorPath := path.Join(dir, "hypervisor")
|
||||||
kernelPath := path.Join(dir, "kernel")
|
kernelPath := path.Join(dir, "kernel")
|
||||||
imagePath := path.Join(dir, "image")
|
imagePath := path.Join(dir, "image")
|
||||||
@@ -610,8 +612,7 @@ func TestNewQemuHypervisorConfig(t *testing.T) {
|
|||||||
disableBlock := true
|
disableBlock := true
|
||||||
enableIOThreads := true
|
enableIOThreads := true
|
||||||
hotplugVFIOOnRootBus := true
|
hotplugVFIOOnRootBus := true
|
||||||
pcieRootPort := uint32(2)
|
coldPlugVFIO = config.BridgePort
|
||||||
coldPlugVFIO = hv.RootPort
|
|
||||||
orgVHostVSockDevicePath := utils.VHostVSockDevicePath
|
orgVHostVSockDevicePath := utils.VHostVSockDevicePath
|
||||||
blockDeviceAIO := "io_uring"
|
blockDeviceAIO := "io_uring"
|
||||||
defer func() {
|
defer func() {
|
||||||
@@ -630,7 +631,6 @@ func TestNewQemuHypervisorConfig(t *testing.T) {
|
|||||||
DisableBlockDeviceUse: disableBlock,
|
DisableBlockDeviceUse: disableBlock,
|
||||||
EnableIOThreads: enableIOThreads,
|
EnableIOThreads: enableIOThreads,
|
||||||
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
|
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
|
||||||
PCIeRootPort: pcieRootPort,
|
|
||||||
ColdPlugVFIO: coldPlugVFIO,
|
ColdPlugVFIO: coldPlugVFIO,
|
||||||
RxRateLimiterMaxRate: rxRateLimiterMaxRate,
|
RxRateLimiterMaxRate: rxRateLimiterMaxRate,
|
||||||
TxRateLimiterMaxRate: txRateLimiterMaxRate,
|
TxRateLimiterMaxRate: txRateLimiterMaxRate,
|
||||||
@@ -686,10 +686,6 @@ func TestNewQemuHypervisorConfig(t *testing.T) {
|
|||||||
t.Errorf("Expected value for HotplugVFIOOnRootBus %v, got %v", hotplugVFIOOnRootBus, config.HotplugVFIOOnRootBus)
|
t.Errorf("Expected value for HotplugVFIOOnRootBus %v, got %v", hotplugVFIOOnRootBus, config.HotplugVFIOOnRootBus)
|
||||||
}
|
}
|
||||||
|
|
||||||
if config.PCIeRootPort != pcieRootPort {
|
|
||||||
t.Errorf("Expected value for PCIeRootPort %v, got %v", pcieRootPort, config.PCIeRootPort)
|
|
||||||
}
|
|
||||||
|
|
||||||
if config.RxRateLimiterMaxRate != rxRateLimiterMaxRate {
|
if config.RxRateLimiterMaxRate != rxRateLimiterMaxRate {
|
||||||
t.Errorf("Expected value for rx rate limiter %v, got %v", rxRateLimiterMaxRate, config.RxRateLimiterMaxRate)
|
t.Errorf("Expected value for rx rate limiter %v, got %v", rxRateLimiterMaxRate, config.RxRateLimiterMaxRate)
|
||||||
}
|
}
|
||||||
@@ -812,7 +808,6 @@ func TestNewQemuHypervisorConfigImageAndInitrd(t *testing.T) {
|
|||||||
disableBlock := true
|
disableBlock := true
|
||||||
enableIOThreads := true
|
enableIOThreads := true
|
||||||
hotplugVFIOOnRootBus := true
|
hotplugVFIOOnRootBus := true
|
||||||
pcieRootPort := uint32(2)
|
|
||||||
|
|
||||||
hypervisor := hypervisor{
|
hypervisor := hypervisor{
|
||||||
Path: hypervisorPath,
|
Path: hypervisorPath,
|
||||||
@@ -823,7 +818,6 @@ func TestNewQemuHypervisorConfigImageAndInitrd(t *testing.T) {
|
|||||||
DisableBlockDeviceUse: disableBlock,
|
DisableBlockDeviceUse: disableBlock,
|
||||||
EnableIOThreads: enableIOThreads,
|
EnableIOThreads: enableIOThreads,
|
||||||
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
|
HotplugVFIOOnRootBus: hotplugVFIOOnRootBus,
|
||||||
PCIeRootPort: pcieRootPort,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
_, err := newQemuHypervisorConfig(hypervisor)
|
_, err := newQemuHypervisorConfig(hypervisor)
|
||||||
|
|||||||
@@ -453,6 +453,10 @@ func addHypervisorConfigOverrides(ocispec specs.Spec, config *vc.SandboxConfig,
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if err := addHypervisorHotColdPlugVfioOverrides(ocispec, config); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
if value, ok := ocispec.Annotations[vcAnnotations.MachineType]; ok {
|
if value, ok := ocispec.Annotations[vcAnnotations.MachineType]; ok {
|
||||||
if value != "" {
|
if value != "" {
|
||||||
config.HypervisorConfig.HypervisorMachineType = value
|
config.HypervisorConfig.HypervisorMachineType = value
|
||||||
@@ -508,12 +512,6 @@ func addHypervisorConfigOverrides(ocispec specs.Spec, config *vc.SandboxConfig,
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := newAnnotationConfiguration(ocispec, vcAnnotations.PCIeRootPort).setUint(func(pcieRootPort uint64) {
|
|
||||||
config.HypervisorConfig.PCIeRootPort = uint32(pcieRootPort)
|
|
||||||
}); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if value, ok := ocispec.Annotations[vcAnnotations.EntropySource]; ok {
|
if value, ok := ocispec.Annotations[vcAnnotations.EntropySource]; ok {
|
||||||
if !checkPathIsInGlobs(runtime.HypervisorConfig.EntropySourceList, value) {
|
if !checkPathIsInGlobs(runtime.HypervisorConfig.EntropySourceList, value) {
|
||||||
return fmt.Errorf("entropy source %v required from annotation is not valid", value)
|
return fmt.Errorf("entropy source %v required from annotation is not valid", value)
|
||||||
@@ -576,6 +574,37 @@ func addHypervisorPathOverrides(ocispec specs.Spec, config *vc.SandboxConfig, ru
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func addHypervisorPCIePortOverride(value string) (config.PCIePort, error) {
|
||||||
|
if value == "" {
|
||||||
|
return config.NoPort, nil
|
||||||
|
}
|
||||||
|
port := config.PCIePort(value)
|
||||||
|
if port.Invalid() {
|
||||||
|
return config.InvalidPort, fmt.Errorf("Invalid PCIe port \"%v\" specified in annotation", value)
|
||||||
|
}
|
||||||
|
return port, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func addHypervisorHotColdPlugVfioOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) error {
|
||||||
|
|
||||||
|
var err error
|
||||||
|
if value, ok := ocispec.Annotations[vcAnnotations.HotPlugVFIO]; ok {
|
||||||
|
if sbConfig.HypervisorConfig.HotPlugVFIO, err = addHypervisorPCIePortOverride(value); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// If hot-plug is specified disable cold-plug and vice versa
|
||||||
|
sbConfig.HypervisorConfig.ColdPlugVFIO = config.NoPort
|
||||||
|
}
|
||||||
|
if value, ok := ocispec.Annotations[vcAnnotations.ColdPlugVFIO]; ok {
|
||||||
|
if sbConfig.HypervisorConfig.ColdPlugVFIO, err = addHypervisorPCIePortOverride(value); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// If cold-plug is specified disable hot-plug and vice versa
|
||||||
|
sbConfig.HypervisorConfig.HotPlugVFIO = config.NoPort
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func addHypervisorMemoryOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig, runtime RuntimeConfig) error {
|
func addHypervisorMemoryOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig, runtime RuntimeConfig) error {
|
||||||
|
|
||||||
if err := newAnnotationConfiguration(ocispec, vcAnnotations.DefaultMemory).setUintWithCheck(func(memorySz uint64) error {
|
if err := newAnnotationConfiguration(ocispec, vcAnnotations.DefaultMemory).setUintWithCheck(func(memorySz uint64) error {
|
||||||
|
|||||||
@@ -599,7 +599,7 @@ func TestContainerPipeSizeAnnotation(t *testing.T) {
|
|||||||
func TestAddHypervisorAnnotations(t *testing.T) {
|
func TestAddHypervisorAnnotations(t *testing.T) {
|
||||||
assert := assert.New(t)
|
assert := assert.New(t)
|
||||||
|
|
||||||
config := vc.SandboxConfig{
|
sbConfig := vc.SandboxConfig{
|
||||||
Annotations: make(map[string]string),
|
Annotations: make(map[string]string),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -628,8 +628,8 @@ func TestAddHypervisorAnnotations(t *testing.T) {
|
|||||||
runtimeConfig.HypervisorConfig.VirtioFSDaemonList = []string{"/bin/*ls*"}
|
runtimeConfig.HypervisorConfig.VirtioFSDaemonList = []string{"/bin/*ls*"}
|
||||||
|
|
||||||
ocispec.Annotations[vcAnnotations.KernelParams] = "vsyscall=emulate iommu=on"
|
ocispec.Annotations[vcAnnotations.KernelParams] = "vsyscall=emulate iommu=on"
|
||||||
addHypervisorConfigOverrides(ocispec, &config, runtimeConfig)
|
addHypervisorConfigOverrides(ocispec, &sbConfig, runtimeConfig)
|
||||||
assert.Exactly(expectedHyperConfig, config.HypervisorConfig)
|
assert.Exactly(expectedHyperConfig, sbConfig.HypervisorConfig)
|
||||||
|
|
||||||
ocispec.Annotations[vcAnnotations.DefaultVCPUs] = "1"
|
ocispec.Annotations[vcAnnotations.DefaultVCPUs] = "1"
|
||||||
ocispec.Annotations[vcAnnotations.DefaultMaxVCPUs] = "1"
|
ocispec.Annotations[vcAnnotations.DefaultMaxVCPUs] = "1"
|
||||||
@@ -660,7 +660,8 @@ func TestAddHypervisorAnnotations(t *testing.T) {
|
|||||||
ocispec.Annotations[vcAnnotations.GuestHookPath] = "/usr/bin/"
|
ocispec.Annotations[vcAnnotations.GuestHookPath] = "/usr/bin/"
|
||||||
ocispec.Annotations[vcAnnotations.DisableImageNvdimm] = "true"
|
ocispec.Annotations[vcAnnotations.DisableImageNvdimm] = "true"
|
||||||
ocispec.Annotations[vcAnnotations.HotplugVFIOOnRootBus] = "true"
|
ocispec.Annotations[vcAnnotations.HotplugVFIOOnRootBus] = "true"
|
||||||
ocispec.Annotations[vcAnnotations.PCIeRootPort] = "2"
|
ocispec.Annotations[vcAnnotations.ColdPlugVFIO] = config.BridgePort
|
||||||
|
ocispec.Annotations[vcAnnotations.HotPlugVFIO] = config.NoPort
|
||||||
ocispec.Annotations[vcAnnotations.IOMMUPlatform] = "true"
|
ocispec.Annotations[vcAnnotations.IOMMUPlatform] = "true"
|
||||||
ocispec.Annotations[vcAnnotations.SGXEPC] = "64Mi"
|
ocispec.Annotations[vcAnnotations.SGXEPC] = "64Mi"
|
||||||
ocispec.Annotations[vcAnnotations.UseLegacySerial] = "true"
|
ocispec.Annotations[vcAnnotations.UseLegacySerial] = "true"
|
||||||
@@ -668,55 +669,58 @@ func TestAddHypervisorAnnotations(t *testing.T) {
|
|||||||
ocispec.Annotations[vcAnnotations.RxRateLimiterMaxRate] = "10000000"
|
ocispec.Annotations[vcAnnotations.RxRateLimiterMaxRate] = "10000000"
|
||||||
ocispec.Annotations[vcAnnotations.TxRateLimiterMaxRate] = "10000000"
|
ocispec.Annotations[vcAnnotations.TxRateLimiterMaxRate] = "10000000"
|
||||||
|
|
||||||
addAnnotations(ocispec, &config, runtimeConfig)
|
err := addAnnotations(ocispec, &sbConfig, runtimeConfig)
|
||||||
assert.Equal(config.HypervisorConfig.NumVCPUs, uint32(1))
|
assert.NoError(err)
|
||||||
assert.Equal(config.HypervisorConfig.DefaultMaxVCPUs, uint32(1))
|
|
||||||
assert.Equal(config.HypervisorConfig.MemorySize, uint32(1024))
|
assert.Equal(sbConfig.HypervisorConfig.NumVCPUs, uint32(1))
|
||||||
assert.Equal(config.HypervisorConfig.MemSlots, uint32(20))
|
assert.Equal(sbConfig.HypervisorConfig.DefaultMaxVCPUs, uint32(1))
|
||||||
assert.Equal(config.HypervisorConfig.MemOffset, uint64(512))
|
assert.Equal(sbConfig.HypervisorConfig.MemorySize, uint32(1024))
|
||||||
assert.Equal(config.HypervisorConfig.VirtioMem, true)
|
assert.Equal(sbConfig.HypervisorConfig.MemSlots, uint32(20))
|
||||||
assert.Equal(config.HypervisorConfig.MemPrealloc, true)
|
assert.Equal(sbConfig.HypervisorConfig.MemOffset, uint64(512))
|
||||||
assert.Equal(config.HypervisorConfig.FileBackedMemRootDir, "/dev/shm")
|
assert.Equal(sbConfig.HypervisorConfig.VirtioMem, true)
|
||||||
assert.Equal(config.HypervisorConfig.HugePages, true)
|
assert.Equal(sbConfig.HypervisorConfig.MemPrealloc, true)
|
||||||
assert.Equal(config.HypervisorConfig.IOMMU, true)
|
assert.Equal(sbConfig.HypervisorConfig.FileBackedMemRootDir, "/dev/shm")
|
||||||
assert.Equal(config.HypervisorConfig.BlockDeviceDriver, "virtio-scsi")
|
assert.Equal(sbConfig.HypervisorConfig.HugePages, true)
|
||||||
assert.Equal(config.HypervisorConfig.BlockDeviceAIO, "io_uring")
|
assert.Equal(sbConfig.HypervisorConfig.IOMMU, true)
|
||||||
assert.Equal(config.HypervisorConfig.DisableBlockDeviceUse, true)
|
assert.Equal(sbConfig.HypervisorConfig.BlockDeviceDriver, "virtio-scsi")
|
||||||
assert.Equal(config.HypervisorConfig.EnableIOThreads, true)
|
assert.Equal(sbConfig.HypervisorConfig.BlockDeviceAIO, "io_uring")
|
||||||
assert.Equal(config.HypervisorConfig.BlockDeviceCacheSet, true)
|
assert.Equal(sbConfig.HypervisorConfig.DisableBlockDeviceUse, true)
|
||||||
assert.Equal(config.HypervisorConfig.BlockDeviceCacheDirect, true)
|
assert.Equal(sbConfig.HypervisorConfig.EnableIOThreads, true)
|
||||||
assert.Equal(config.HypervisorConfig.BlockDeviceCacheNoflush, true)
|
assert.Equal(sbConfig.HypervisorConfig.BlockDeviceCacheSet, true)
|
||||||
assert.Equal(config.HypervisorConfig.SharedFS, "virtio-fs")
|
assert.Equal(sbConfig.HypervisorConfig.BlockDeviceCacheDirect, true)
|
||||||
assert.Equal(config.HypervisorConfig.VirtioFSDaemon, "/bin/false")
|
assert.Equal(sbConfig.HypervisorConfig.BlockDeviceCacheNoflush, true)
|
||||||
assert.Equal(config.HypervisorConfig.VirtioFSCache, "auto")
|
assert.Equal(sbConfig.HypervisorConfig.SharedFS, "virtio-fs")
|
||||||
assert.ElementsMatch(config.HypervisorConfig.VirtioFSExtraArgs, [2]string{"arg0", "arg1"})
|
assert.Equal(sbConfig.HypervisorConfig.VirtioFSDaemon, "/bin/false")
|
||||||
assert.Equal(config.HypervisorConfig.Msize9p, uint32(512))
|
assert.Equal(sbConfig.HypervisorConfig.VirtioFSCache, "auto")
|
||||||
assert.Equal(config.HypervisorConfig.HypervisorMachineType, "q35")
|
assert.ElementsMatch(sbConfig.HypervisorConfig.VirtioFSExtraArgs, [2]string{"arg0", "arg1"})
|
||||||
assert.Equal(config.HypervisorConfig.MachineAccelerators, "nofw")
|
assert.Equal(sbConfig.HypervisorConfig.Msize9p, uint32(512))
|
||||||
assert.Equal(config.HypervisorConfig.CPUFeatures, "pmu=off")
|
assert.Equal(sbConfig.HypervisorConfig.HypervisorMachineType, "q35")
|
||||||
assert.Equal(config.HypervisorConfig.DisableVhostNet, true)
|
assert.Equal(sbConfig.HypervisorConfig.MachineAccelerators, "nofw")
|
||||||
assert.Equal(config.HypervisorConfig.GuestHookPath, "/usr/bin/")
|
assert.Equal(sbConfig.HypervisorConfig.CPUFeatures, "pmu=off")
|
||||||
assert.Equal(config.HypervisorConfig.DisableImageNvdimm, true)
|
assert.Equal(sbConfig.HypervisorConfig.DisableVhostNet, true)
|
||||||
assert.Equal(config.HypervisorConfig.HotplugVFIOOnRootBus, true)
|
assert.Equal(sbConfig.HypervisorConfig.GuestHookPath, "/usr/bin/")
|
||||||
assert.Equal(config.HypervisorConfig.PCIeRootPort, uint32(2))
|
assert.Equal(sbConfig.HypervisorConfig.DisableImageNvdimm, true)
|
||||||
assert.Equal(config.HypervisorConfig.IOMMUPlatform, true)
|
assert.Equal(sbConfig.HypervisorConfig.HotplugVFIOOnRootBus, true)
|
||||||
assert.Equal(config.HypervisorConfig.SGXEPCSize, int64(67108864))
|
assert.Equal(string(sbConfig.HypervisorConfig.ColdPlugVFIO), string(config.BridgePort))
|
||||||
assert.Equal(config.HypervisorConfig.LegacySerial, true)
|
assert.Equal(string(sbConfig.HypervisorConfig.HotPlugVFIO), string(config.NoPort))
|
||||||
assert.Equal(config.HypervisorConfig.RxRateLimiterMaxRate, uint64(10000000))
|
assert.Equal(sbConfig.HypervisorConfig.IOMMUPlatform, true)
|
||||||
assert.Equal(config.HypervisorConfig.TxRateLimiterMaxRate, uint64(10000000))
|
assert.Equal(sbConfig.HypervisorConfig.SGXEPCSize, int64(67108864))
|
||||||
|
assert.Equal(sbConfig.HypervisorConfig.LegacySerial, true)
|
||||||
|
assert.Equal(sbConfig.HypervisorConfig.RxRateLimiterMaxRate, uint64(10000000))
|
||||||
|
assert.Equal(sbConfig.HypervisorConfig.TxRateLimiterMaxRate, uint64(10000000))
|
||||||
|
|
||||||
// In case an absurd large value is provided, the config value if not over-ridden
|
// In case an absurd large value is provided, the config value if not over-ridden
|
||||||
ocispec.Annotations[vcAnnotations.DefaultVCPUs] = "655536"
|
ocispec.Annotations[vcAnnotations.DefaultVCPUs] = "655536"
|
||||||
err := addAnnotations(ocispec, &config, runtimeConfig)
|
err = addAnnotations(ocispec, &sbConfig, runtimeConfig)
|
||||||
assert.Error(err)
|
assert.Error(err)
|
||||||
|
|
||||||
ocispec.Annotations[vcAnnotations.DefaultVCPUs] = "-1"
|
ocispec.Annotations[vcAnnotations.DefaultVCPUs] = "-1"
|
||||||
err = addAnnotations(ocispec, &config, runtimeConfig)
|
err = addAnnotations(ocispec, &sbConfig, runtimeConfig)
|
||||||
assert.Error(err)
|
assert.Error(err)
|
||||||
|
|
||||||
ocispec.Annotations[vcAnnotations.DefaultVCPUs] = "1"
|
ocispec.Annotations[vcAnnotations.DefaultVCPUs] = "1"
|
||||||
ocispec.Annotations[vcAnnotations.DefaultMaxVCPUs] = "-1"
|
ocispec.Annotations[vcAnnotations.DefaultMaxVCPUs] = "-1"
|
||||||
err = addAnnotations(ocispec, &config, runtimeConfig)
|
err = addAnnotations(ocispec, &sbConfig, runtimeConfig)
|
||||||
assert.Error(err)
|
assert.Error(err)
|
||||||
|
|
||||||
ocispec.Annotations[vcAnnotations.DefaultMaxVCPUs] = "1"
|
ocispec.Annotations[vcAnnotations.DefaultMaxVCPUs] = "1"
|
||||||
|
|||||||
@@ -864,12 +864,12 @@ func (clh *cloudHypervisor) hotPlugVFIODevice(device *config.VFIODev) error {
|
|||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
// Create the clh device config via the constructor to ensure default values are properly assigned
|
// Create the clh device config via the constructor to ensure default values are properly assigned
|
||||||
clhDevice := *chclient.NewDeviceConfig(*(*device).GetSysfsDev())
|
clhDevice := *chclient.NewDeviceConfig(device.SysfsDev)
|
||||||
pciInfo, _, err := cl.VmAddDevicePut(ctx, clhDevice)
|
pciInfo, _, err := cl.VmAddDevicePut(ctx, clhDevice)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("Failed to hotplug device %+v %s", device, openAPIClientError(err))
|
return fmt.Errorf("Failed to hotplug device %+v %s", device, openAPIClientError(err))
|
||||||
}
|
}
|
||||||
clh.devicesIds[*(*device).GetID()] = pciInfo.GetId()
|
clh.devicesIds[device.ID] = pciInfo.GetId()
|
||||||
|
|
||||||
// clh doesn't use bridges, so the PCI path is simply the slot
|
// clh doesn't use bridges, so the PCI path is simply the slot
|
||||||
// number of the device. This will break if clh starts using
|
// number of the device. This will break if clh starts using
|
||||||
@@ -886,14 +886,11 @@ func (clh *cloudHypervisor) hotPlugVFIODevice(device *config.VFIODev) error {
|
|||||||
return fmt.Errorf("Unexpected PCI address %q from clh hotplug", pciInfo.Bdf)
|
return fmt.Errorf("Unexpected PCI address %q from clh hotplug", pciInfo.Bdf)
|
||||||
}
|
}
|
||||||
|
|
||||||
guestPciPath, err := types.PciPathFromString(tokens[0])
|
if device.Type == config.VFIOAPDeviceMediatedType {
|
||||||
|
|
||||||
pciDevice, ok := (*device).(config.VFIOPCIDev)
|
|
||||||
if !ok {
|
|
||||||
return fmt.Errorf("VFIO device %+v is not PCI, only PCI is supported in Cloud Hypervisor", device)
|
return fmt.Errorf("VFIO device %+v is not PCI, only PCI is supported in Cloud Hypervisor", device)
|
||||||
}
|
}
|
||||||
pciDevice.GuestPciPath = guestPciPath
|
|
||||||
*device = pciDevice
|
device.GuestPciPath, err = types.PciPathFromString(tokens[0])
|
||||||
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -937,7 +934,7 @@ func (clh *cloudHypervisor) HotplugRemoveDevice(ctx context.Context, devInfo int
|
|||||||
case BlockDev:
|
case BlockDev:
|
||||||
deviceID = clhDriveIndexToID(devInfo.(*config.BlockDrive).Index)
|
deviceID = clhDriveIndexToID(devInfo.(*config.BlockDrive).Index)
|
||||||
case VfioDev:
|
case VfioDev:
|
||||||
deviceID = *devInfo.(config.VFIODev).GetID()
|
deviceID = devInfo.(*config.VFIODev).ID
|
||||||
default:
|
default:
|
||||||
clh.Logger().WithFields(log.Fields{"devInfo": devInfo,
|
clh.Logger().WithFields(log.Fields{"devInfo": devInfo,
|
||||||
"deviceType": devType}).Error("HotplugRemoveDevice: unsupported device")
|
"deviceType": devType}).Error("HotplugRemoveDevice: unsupported device")
|
||||||
|
|||||||
@@ -673,7 +673,7 @@ func TestCloudHypervisorHotplugRemoveDevice(t *testing.T) {
|
|||||||
_, err = clh.HotplugRemoveDevice(context.Background(), &config.BlockDrive{}, BlockDev)
|
_, err = clh.HotplugRemoveDevice(context.Background(), &config.BlockDrive{}, BlockDev)
|
||||||
assert.NoError(err, "Hotplug remove block device expected no error")
|
assert.NoError(err, "Hotplug remove block device expected no error")
|
||||||
|
|
||||||
_, err = clh.HotplugRemoveDevice(context.Background(), &config.VFIOPCIDev{}, VfioDev)
|
_, err = clh.HotplugRemoveDevice(context.Background(), &config.VFIODev{}, VfioDev)
|
||||||
assert.NoError(err, "Hotplug remove vfio block device expected no error")
|
assert.NoError(err, "Hotplug remove vfio block device expected no error")
|
||||||
|
|
||||||
_, err = clh.HotplugRemoveDevice(context.Background(), nil, NetDev)
|
_, err = clh.HotplugRemoveDevice(context.Background(), nil, NetDev)
|
||||||
|
|||||||
@@ -288,12 +288,12 @@ type HypervisorConfig struct {
|
|||||||
// root bus instead of a bridge.
|
// root bus instead of a bridge.
|
||||||
HotplugVFIOOnRootBus bool
|
HotplugVFIOOnRootBus bool
|
||||||
|
|
||||||
// PCIeRootPort is used to indicate the number of PCIe Root Port devices
|
// HotPlugVFIO is used to indicate if devices need to be hotplugged on the
|
||||||
// The PCIe Root Port device is used to hot-plug the PCIe device
|
// root port, switch, bridge or no port
|
||||||
PCIeRootPort uint32
|
HotPlugVFIO hv.PCIePort
|
||||||
|
|
||||||
// ColdPlugVFIO is used to indicate if devices need to be coldplugged on the
|
// ColdPlugVFIO is used to indicate if devices need to be coldplugged on the
|
||||||
// root port, switch or no port
|
// root port, switch, bridge or no port
|
||||||
ColdPlugVFIO hv.PCIePort
|
ColdPlugVFIO hv.PCIePort
|
||||||
|
|
||||||
// BootToBeTemplate used to indicate if the VM is created to be a template VM
|
// BootToBeTemplate used to indicate if the VM is created to be a template VM
|
||||||
|
|||||||
@@ -505,13 +505,21 @@ type HypervisorConfig struct {
|
|||||||
// MemOffset specifies memory space for nvdimm device
|
// MemOffset specifies memory space for nvdimm device
|
||||||
MemOffset uint64
|
MemOffset uint64
|
||||||
|
|
||||||
// PCIeRootPort is used to indicate the number of PCIe Root Port devices
|
// VFIODevices are used to get PCIe device info early before the sandbox
|
||||||
// The PCIe Root Port device is used to hot-plug the PCIe device
|
// is started to make better PCIe topology decisions
|
||||||
PCIeRootPort uint32
|
VFIODevices []config.DeviceInfo
|
||||||
|
// VhostUserBlkDevices are handled differently in Q35 and Virt machine
|
||||||
|
// type. capture them early before the sandbox to make better PCIe topology
|
||||||
|
// decisions
|
||||||
|
VhostUserBlkDevices []config.DeviceInfo
|
||||||
|
|
||||||
|
// HotplugVFIO is used to indicate if devices need to be hotplugged on the
|
||||||
|
// root port or a switch
|
||||||
|
HotPlugVFIO config.PCIePort
|
||||||
|
|
||||||
// ColdPlugVFIO is used to indicate if devices need to be coldplugged on the
|
// ColdPlugVFIO is used to indicate if devices need to be coldplugged on the
|
||||||
// root port, switch or no port
|
// root port, switch or no port
|
||||||
ColdPlugVFIO hv.PCIePort
|
ColdPlugVFIO config.PCIePort
|
||||||
|
|
||||||
// NumVCPUs specifies default number of vCPUs for the VM.
|
// NumVCPUs specifies default number of vCPUs for the VM.
|
||||||
NumVCPUs uint32
|
NumVCPUs uint32
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ import (
|
|||||||
"github.com/docker/go-units"
|
"github.com/docker/go-units"
|
||||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/api"
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/api"
|
||||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
|
||||||
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/drivers"
|
||||||
volume "github.com/kata-containers/kata-containers/src/runtime/pkg/direct-volume"
|
volume "github.com/kata-containers/kata-containers/src/runtime/pkg/direct-volume"
|
||||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace"
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace"
|
||||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/uuid"
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/uuid"
|
||||||
@@ -1137,7 +1138,7 @@ func (k *kataAgent) appendVfioDevice(dev ContainerDevice, device api.Device, c *
|
|||||||
ContainerPath: dev.ContainerPath,
|
ContainerPath: dev.ContainerPath,
|
||||||
Type: kataVfioPciDevType,
|
Type: kataVfioPciDevType,
|
||||||
Id: groupNum,
|
Id: groupNum,
|
||||||
Options: nil,
|
Options: make([]string, len(devList)),
|
||||||
}
|
}
|
||||||
|
|
||||||
// We always pass the device information to the agent, since
|
// We always pass the device information to the agent, since
|
||||||
@@ -1147,16 +1148,16 @@ func (k *kataAgent) appendVfioDevice(dev ContainerDevice, device api.Device, c *
|
|||||||
if c.sandbox.config.VfioMode == config.VFIOModeGuestKernel {
|
if c.sandbox.config.VfioMode == config.VFIOModeGuestKernel {
|
||||||
kataDevice.Type = kataVfioPciGuestKernelDevType
|
kataDevice.Type = kataVfioPciGuestKernelDevType
|
||||||
}
|
}
|
||||||
|
for i, dev := range devList {
|
||||||
if (*devList[0]).GetType() == config.VFIOAPDeviceMediatedType {
|
if dev.Type == config.VFIOAPDeviceMediatedType {
|
||||||
kataDevice.Type = kataVfioApDevType
|
kataDevice.Type = kataVfioApDevType
|
||||||
kataDevice.Options = (*devList[0]).(config.VFIOAPDev).APDevices
|
kataDevice.Options = dev.APDevices
|
||||||
} else {
|
} else {
|
||||||
kataDevice.Options = make([]string, len(devList))
|
|
||||||
for i, device := range devList {
|
devBDF := drivers.GetBDF(dev.BDF)
|
||||||
pciDevice := (*device).(config.VFIOPCIDev)
|
kataDevice.Options[i] = fmt.Sprintf("0000:%s=%s", devBDF, dev.GuestPciPath)
|
||||||
kataDevice.Options[i] = fmt.Sprintf("0000:%s=%s", pciDevice.BDF, pciDevice.GuestPciPath)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return kataDevice
|
return kataDevice
|
||||||
@@ -1342,7 +1343,6 @@ func (k *kataAgent) createContainer(ctx context.Context, sandbox *Sandbox, c *Co
|
|||||||
if _, err = k.sendReq(ctx, req); err != nil {
|
if _, err = k.sendReq(ctx, req); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return buildProcessFromExecID(req.ExecId)
|
return buildProcessFromExecID(req.ExecId)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -245,7 +245,6 @@ func (s *Sandbox) dumpConfig(ss *persistapi.SandboxState) {
|
|||||||
DisableNestingChecks: sconfig.HypervisorConfig.DisableNestingChecks,
|
DisableNestingChecks: sconfig.HypervisorConfig.DisableNestingChecks,
|
||||||
DisableImageNvdimm: sconfig.HypervisorConfig.DisableImageNvdimm,
|
DisableImageNvdimm: sconfig.HypervisorConfig.DisableImageNvdimm,
|
||||||
HotplugVFIOOnRootBus: sconfig.HypervisorConfig.HotplugVFIOOnRootBus,
|
HotplugVFIOOnRootBus: sconfig.HypervisorConfig.HotplugVFIOOnRootBus,
|
||||||
PCIeRootPort: sconfig.HypervisorConfig.PCIeRootPort,
|
|
||||||
BootToBeTemplate: sconfig.HypervisorConfig.BootToBeTemplate,
|
BootToBeTemplate: sconfig.HypervisorConfig.BootToBeTemplate,
|
||||||
BootFromTemplate: sconfig.HypervisorConfig.BootFromTemplate,
|
BootFromTemplate: sconfig.HypervisorConfig.BootFromTemplate,
|
||||||
DisableVhostNet: sconfig.HypervisorConfig.DisableVhostNet,
|
DisableVhostNet: sconfig.HypervisorConfig.DisableVhostNet,
|
||||||
@@ -487,8 +486,8 @@ func loadSandboxConfig(id string) (*SandboxConfig, error) {
|
|||||||
DisableNestingChecks: hconf.DisableNestingChecks,
|
DisableNestingChecks: hconf.DisableNestingChecks,
|
||||||
DisableImageNvdimm: hconf.DisableImageNvdimm,
|
DisableImageNvdimm: hconf.DisableImageNvdimm,
|
||||||
HotplugVFIOOnRootBus: hconf.HotplugVFIOOnRootBus,
|
HotplugVFIOOnRootBus: hconf.HotplugVFIOOnRootBus,
|
||||||
|
HotPlugVFIO: hconf.HotPlugVFIO,
|
||||||
ColdPlugVFIO: hconf.ColdPlugVFIO,
|
ColdPlugVFIO: hconf.ColdPlugVFIO,
|
||||||
PCIeRootPort: hconf.PCIeRootPort,
|
|
||||||
BootToBeTemplate: hconf.BootToBeTemplate,
|
BootToBeTemplate: hconf.BootToBeTemplate,
|
||||||
BootFromTemplate: hconf.BootFromTemplate,
|
BootFromTemplate: hconf.BootFromTemplate,
|
||||||
DisableVhostNet: hconf.DisableVhostNet,
|
DisableVhostNet: hconf.DisableVhostNet,
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
package persistapi
|
package persistapi
|
||||||
|
|
||||||
import (
|
import (
|
||||||
hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors"
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
|
||||||
"github.com/opencontainers/runc/libcontainer/configs"
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
specs "github.com/opencontainers/runtime-spec/specs-go"
|
specs "github.com/opencontainers/runtime-spec/specs-go"
|
||||||
)
|
)
|
||||||
@@ -131,10 +131,6 @@ type HypervisorConfig struct {
|
|||||||
// Enable SGX. Hardware-based isolation and memory encryption.
|
// Enable SGX. Hardware-based isolation and memory encryption.
|
||||||
SGXEPCSize int64
|
SGXEPCSize int64
|
||||||
|
|
||||||
// PCIeRootPort is used to indicate the number of PCIe Root Port devices
|
|
||||||
// The PCIe Root Port device is used to hot-plug the PCIe device
|
|
||||||
PCIeRootPort uint32
|
|
||||||
|
|
||||||
// NumVCPUs specifies default number of vCPUs for the VM.
|
// NumVCPUs specifies default number of vCPUs for the VM.
|
||||||
NumVCPUs uint32
|
NumVCPUs uint32
|
||||||
|
|
||||||
@@ -199,9 +195,13 @@ type HypervisorConfig struct {
|
|||||||
// root bus instead of a bridge.
|
// root bus instead of a bridge.
|
||||||
HotplugVFIOOnRootBus bool
|
HotplugVFIOOnRootBus bool
|
||||||
|
|
||||||
|
// HotPlugVFIO is used to indicate if devices need to be hotplugged on the
|
||||||
|
// root, switch, bridge or no-port
|
||||||
|
HotPlugVFIO config.PCIePort
|
||||||
|
|
||||||
// ColdPlugVFIO is used to indicate if devices need to be coldplugged on the
|
// ColdPlugVFIO is used to indicate if devices need to be coldplugged on the
|
||||||
// root port or a switch or no-port
|
// root, bridge, switch or no-port
|
||||||
ColdPlugVFIO hv.PCIePort
|
ColdPlugVFIO config.PCIePort
|
||||||
|
|
||||||
// BootToBeTemplate used to indicate if the VM is created to be a template VM
|
// BootToBeTemplate used to indicate if the VM is created to be a template VM
|
||||||
BootToBeTemplate bool
|
BootToBeTemplate bool
|
||||||
|
|||||||
@@ -126,9 +126,11 @@ const (
|
|||||||
// root bus instead of a bridge.
|
// root bus instead of a bridge.
|
||||||
HotplugVFIOOnRootBus = kataAnnotHypervisorPrefix + "hotplug_vfio_on_root_bus"
|
HotplugVFIOOnRootBus = kataAnnotHypervisorPrefix + "hotplug_vfio_on_root_bus"
|
||||||
|
|
||||||
// PCIeRootPort is used to indicate the number of PCIe Root Port devices
|
// ColdPlugVFIO is a sandbox annotation used to indicate if devices need to be coldplugged.
|
||||||
// The PCIe Root Port device is used to hot-plug the PCIe device
|
ColdPlugVFIO = kataAnnotHypervisorPrefix + "cold_plug_vfio"
|
||||||
PCIeRootPort = kataAnnotHypervisorPrefix + "pcie_root_port"
|
|
||||||
|
// HotPlugVFIO is a sandbox annotation used to indicate if devices need to be hotplugged.
|
||||||
|
HotPlugVFIO = kataAnnotHypervisorPrefix + "hot_plug_vfio"
|
||||||
|
|
||||||
// EntropySource is a sandbox annotation to specify the path to a host source of
|
// EntropySource is a sandbox annotation to specify the path to a host source of
|
||||||
// entropy (/dev/random, /dev/urandom or real hardware RNG device)
|
// entropy (/dev/random, /dev/urandom or real hardware RNG device)
|
||||||
|
|||||||
@@ -65,6 +65,11 @@ const romFile = ""
|
|||||||
// Default value is false.
|
// Default value is false.
|
||||||
const defaultDisableModern = false
|
const defaultDisableModern = false
|
||||||
|
|
||||||
|
// A deeper PCIe topology than 5 is already not advisable just for the sake
|
||||||
|
// of having enough buffer we limit ourselves to 10 and exit if we reach
|
||||||
|
// the root bus
|
||||||
|
const maxPCIeTopoDepth = 10
|
||||||
|
|
||||||
type qmpChannel struct {
|
type qmpChannel struct {
|
||||||
qmp *govmmQemu.QMP
|
qmp *govmmQemu.QMP
|
||||||
ctx context.Context
|
ctx context.Context
|
||||||
@@ -76,14 +81,14 @@ type qmpChannel struct {
|
|||||||
// QemuState keeps Qemu's state
|
// QemuState keeps Qemu's state
|
||||||
type QemuState struct {
|
type QemuState struct {
|
||||||
UUID string
|
UUID string
|
||||||
|
HotPlugVFIO config.PCIePort
|
||||||
Bridges []types.Bridge
|
Bridges []types.Bridge
|
||||||
// HotpluggedCPUs is the list of CPUs that were hot-added
|
|
||||||
HotpluggedVCPUs []hv.CPUDevice
|
HotpluggedVCPUs []hv.CPUDevice
|
||||||
HotpluggedMemory int
|
HotpluggedMemory int
|
||||||
VirtiofsDaemonPid int
|
VirtiofsDaemonPid int
|
||||||
PCIeRootPort int
|
|
||||||
HotplugVFIOOnRootBus bool
|
HotplugVFIOOnRootBus bool
|
||||||
ColdPlugVFIO hv.PCIePort
|
HotplugVFIO config.PCIePort
|
||||||
|
ColdPlugVFIO config.PCIePort
|
||||||
}
|
}
|
||||||
|
|
||||||
// qemu is an Hypervisor interface implementation for the Linux qemu hypervisor.
|
// qemu is an Hypervisor interface implementation for the Linux qemu hypervisor.
|
||||||
@@ -282,10 +287,10 @@ func (q *qemu) setup(ctx context.Context, id string, hypervisorConfig *Hyperviso
|
|||||||
|
|
||||||
q.Logger().Debug("Creating UUID")
|
q.Logger().Debug("Creating UUID")
|
||||||
q.state.UUID = uuid.Generate().String()
|
q.state.UUID = uuid.Generate().String()
|
||||||
|
q.state.HotPlugVFIO = q.config.HotPlugVFIO
|
||||||
q.state.ColdPlugVFIO = q.config.ColdPlugVFIO
|
q.state.ColdPlugVFIO = q.config.ColdPlugVFIO
|
||||||
q.state.HotplugVFIOOnRootBus = q.config.HotplugVFIOOnRootBus
|
q.state.HotplugVFIOOnRootBus = q.config.HotplugVFIOOnRootBus
|
||||||
q.state.PCIeRootPort = int(q.config.PCIeRootPort)
|
q.state.HotPlugVFIO = q.config.HotPlugVFIO
|
||||||
|
|
||||||
// The path might already exist, but in case of VM templating,
|
// The path might already exist, but in case of VM templating,
|
||||||
// we have to create it since the sandbox has not created it yet.
|
// we have to create it since the sandbox has not created it yet.
|
||||||
@@ -701,27 +706,12 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add PCIe Root Port devices to hypervisor
|
if machine.Type == QemuQ35 || machine.Type == QemuVirt {
|
||||||
// The pcie.0 bus do not support hot-plug, but PCIe device can be hot-plugged into PCIe Root Port.
|
if err := q.createPCIeTopology(&qemuConfig, hypervisorConfig, machine.Type); err != nil {
|
||||||
// For more details, please see https://github.com/qemu/qemu/blob/master/docs/pcie.txt
|
q.Logger().WithError(err).Errorf("Cannot create PCIe topology")
|
||||||
memSize32bit, memSize64bit := q.arch.getBARsMaxAddressableMemory()
|
return err
|
||||||
|
|
||||||
if hypervisorConfig.PCIeRootPort > 0 {
|
|
||||||
qemuConfig.Devices = q.arch.appendPCIeRootPortDevice(qemuConfig.Devices, hypervisorConfig.PCIeRootPort, memSize32bit, memSize64bit)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// The default OVMF MMIO aperture is too small for some PCIe devices
|
|
||||||
// with huge BARs so we need to increase it.
|
|
||||||
// memSize64bit is in bytes, convert to MB, OVMF expects MB as a string
|
|
||||||
if strings.Contains(strings.ToLower(hypervisorConfig.FirmwarePath), "ovmf") {
|
|
||||||
pciMmio64Mb := fmt.Sprintf("%d", (memSize64bit / 1024 / 1024))
|
|
||||||
fwCfg := govmmQemu.FwCfg{
|
|
||||||
Name: "opt/ovmf/X-PciMmio64Mb",
|
|
||||||
Str: pciMmio64Mb,
|
|
||||||
}
|
}
|
||||||
qemuConfig.FwCfg = append(qemuConfig.FwCfg, fwCfg)
|
|
||||||
}
|
|
||||||
|
|
||||||
q.qemuConfig = qemuConfig
|
q.qemuConfig = qemuConfig
|
||||||
|
|
||||||
q.virtiofsDaemon, err = q.createVirtiofsDaemon(hypervisorConfig.SharedPath)
|
q.virtiofsDaemon, err = q.createVirtiofsDaemon(hypervisorConfig.SharedPath)
|
||||||
@@ -747,6 +737,101 @@ func (q *qemu) checkBpfEnabled() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If a user uses 8 GPUs with 4 devices in each IOMMU Group that means we need
|
||||||
|
// to hotplug 32 devices. We do not have enough PCIe root bus slots to
|
||||||
|
// accomplish this task. Kata will use already some slots for vfio-xxxx-pci
|
||||||
|
// devices.
|
||||||
|
// Max PCI slots per root bus is 32
|
||||||
|
// Max PCIe root ports is 16
|
||||||
|
// Max PCIe switch ports is 16
|
||||||
|
// There is only 64kB of IO memory each root,switch port will consume 4k hence
|
||||||
|
// only 16 ports possible.
|
||||||
|
func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig *HypervisorConfig, machineType string) error {
|
||||||
|
|
||||||
|
// If no-port set just return no need to add PCIe Root Port or PCIe Switches
|
||||||
|
if hypervisorConfig.HotPlugVFIO == config.NoPort && hypervisorConfig.ColdPlugVFIO == config.NoPort && machineType == QemuQ35 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add PCIe Root Port or PCIe Switches to the hypervisor
|
||||||
|
// The pcie.0 bus do not support hot-plug, but PCIe device can be hot-plugged
|
||||||
|
// into a PCIe Root Port or PCIe Switch.
|
||||||
|
// For more details, please see https://github.com/qemu/qemu/blob/master/docs/pcie.txt
|
||||||
|
|
||||||
|
// Deduce the right values for mem-reserve and pref-64-reserve memory regions
|
||||||
|
memSize32bit, memSize64bit := q.arch.getBARsMaxAddressableMemory()
|
||||||
|
|
||||||
|
// The default OVMF MMIO aperture is too small for some PCIe devices
|
||||||
|
// with huge BARs so we need to increase it.
|
||||||
|
// memSize64bit is in bytes, convert to MB, OVMF expects MB as a string
|
||||||
|
if strings.Contains(strings.ToLower(hypervisorConfig.FirmwarePath), "ovmf") {
|
||||||
|
pciMmio64Mb := fmt.Sprintf("%d", (memSize64bit / 1024 / 1024))
|
||||||
|
fwCfg := govmmQemu.FwCfg{
|
||||||
|
Name: "opt/ovmf/X-PciMmio64Mb",
|
||||||
|
Str: pciMmio64Mb,
|
||||||
|
}
|
||||||
|
qemuConfig.FwCfg = append(qemuConfig.FwCfg, fwCfg)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the number of hot(cold)-pluggable ports needed from the provided
|
||||||
|
// VFIO devices and VhostUserBlockDevices
|
||||||
|
var numOfPluggablePorts uint32 = 0
|
||||||
|
for _, dev := range hypervisorConfig.VFIODevices {
|
||||||
|
var err error
|
||||||
|
dev.HostPath, err = config.GetHostPath(dev, false, "")
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("Cannot get host path for device: %v err: %v", dev, err)
|
||||||
|
}
|
||||||
|
devicesPerIOMMUGroup, err := drivers.GetAllVFIODevicesFromIOMMUGroup(dev)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("Cannot get all VFIO devices from IOMMU group with device: %v err: %v", dev, err)
|
||||||
|
}
|
||||||
|
for _, vfioDevice := range devicesPerIOMMUGroup {
|
||||||
|
if drivers.IsPCIeDevice(vfioDevice.BDF) {
|
||||||
|
numOfPluggablePorts = numOfPluggablePorts + 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
vfioOnRootPort := (q.state.HotPlugVFIO == config.RootPort || q.state.ColdPlugVFIO == config.RootPort || q.state.HotplugVFIOOnRootBus)
|
||||||
|
vfioOnSwitchPort := (q.state.HotPlugVFIO == config.SwitchPort || q.state.ColdPlugVFIO == config.SwitchPort)
|
||||||
|
|
||||||
|
numOfVhostUserBlockDevices := len(hypervisorConfig.VhostUserBlkDevices)
|
||||||
|
|
||||||
|
// If number of PCIe root ports > 16 then bail out otherwise we may
|
||||||
|
// use up all slots or IO memory on the root bus and vfio-XXX-pci devices
|
||||||
|
// cannot be added which are crucial for Kata max slots on root bus is 32
|
||||||
|
// max slots on the complete pci(e) topology is 256 in QEMU
|
||||||
|
if vfioOnRootPort {
|
||||||
|
// On Arm the vhost-user-block device is a PCIe device we need
|
||||||
|
// to account for it in the number of pluggable ports
|
||||||
|
if machineType == QemuVirt {
|
||||||
|
numOfPluggablePorts = numOfPluggablePorts + uint32(numOfVhostUserBlockDevices)
|
||||||
|
}
|
||||||
|
if numOfPluggablePorts > maxPCIeRootPort {
|
||||||
|
return fmt.Errorf("Number of PCIe Root Ports exceeed allowed max of %d", maxPCIeRootPort)
|
||||||
|
}
|
||||||
|
qemuConfig.Devices = q.arch.appendPCIeRootPortDevice(qemuConfig.Devices, numOfPluggablePorts, memSize32bit, memSize64bit)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if vfioOnSwitchPort {
|
||||||
|
// On Arm the vhost-user-block device is a PCIe device we need
|
||||||
|
// to account for it in the number of pluggable ports
|
||||||
|
if machineType == QemuVirt {
|
||||||
|
numOfPluggableRootPorts := uint32(numOfVhostUserBlockDevices)
|
||||||
|
if numOfPluggableRootPorts > maxPCIeRootPort {
|
||||||
|
return fmt.Errorf("Number of PCIe Root Ports exceeed allowed max of %d", maxPCIeRootPort)
|
||||||
|
}
|
||||||
|
qemuConfig.Devices = q.arch.appendPCIeRootPortDevice(qemuConfig.Devices, numOfPluggableRootPorts, memSize32bit, memSize64bit)
|
||||||
|
}
|
||||||
|
if numOfPluggablePorts > maxPCIeSwitchPort {
|
||||||
|
return fmt.Errorf("Number of PCIe Switch Ports exceeed allowed max of %d", maxPCIeSwitchPort)
|
||||||
|
}
|
||||||
|
qemuConfig.Devices = q.arch.appendPCIeSwitchPortDevice(qemuConfig.Devices, numOfPluggablePorts, memSize32bit, memSize64bit)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (q *qemu) vhostFSSocketPath(id string) (string, error) {
|
func (q *qemu) vhostFSSocketPath(id string) (string, error) {
|
||||||
return utils.BuildSocketPath(q.config.VMStorePath, id, vhostFSSocket)
|
return utils.BuildSocketPath(q.config.VMStorePath, id, vhostFSSocket)
|
||||||
}
|
}
|
||||||
@@ -1527,6 +1612,7 @@ func (q *qemu) hotplugAddBlockDevice(ctx context.Context, drive *config.BlockDri
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (q *qemu) hotplugAddVhostUserBlkDevice(ctx context.Context, vAttr *config.VhostUserDeviceAttrs, op Operation, devID string) (err error) {
|
func (q *qemu) hotplugAddVhostUserBlkDevice(ctx context.Context, vAttr *config.VhostUserDeviceAttrs, op Operation, devID string) (err error) {
|
||||||
|
|
||||||
err = q.qmpMonitorCh.qmp.ExecuteCharDevUnixSocketAdd(q.qmpMonitorCh.ctx, vAttr.DevID, vAttr.SocketPath, false, false, vAttr.ReconnectTime)
|
err = q.qmpMonitorCh.qmp.ExecuteCharDevUnixSocketAdd(q.qmpMonitorCh.ctx, vAttr.DevID, vAttr.SocketPath, false, false, vAttr.ReconnectTime)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@@ -1544,18 +1630,14 @@ func (q *qemu) hotplugAddVhostUserBlkDevice(ctx context.Context, vAttr *config.V
|
|||||||
|
|
||||||
switch machineType {
|
switch machineType {
|
||||||
case QemuVirt:
|
case QemuVirt:
|
||||||
if q.state.PCIeRootPort <= 0 {
|
|
||||||
return fmt.Errorf("Vhost-user-blk device is a PCIe device if machine type is virt. Need to add the PCIe Root Port by setting the pcie_root_port parameter in the configuration for virt")
|
|
||||||
}
|
|
||||||
|
|
||||||
//The addr of a dev is corresponding with device:function for PCIe in qemu which starting from 0
|
//The addr of a dev is corresponding with device:function for PCIe in qemu which starting from 0
|
||||||
//Since the dev is the first and only one on this bus(root port), it should be 0.
|
//Since the dev is the first and only one on this bus(root port), it should be 0.
|
||||||
addr := "00"
|
addr := "00"
|
||||||
|
|
||||||
bridgeId := fmt.Sprintf("%s%d", pcieRootPortPrefix, len(drivers.AllPCIeDevs))
|
bridgeID := fmt.Sprintf("%s%d", config.PCIeRootPortPrefix, len(config.PCIeDevices[config.RootPort]))
|
||||||
drivers.AllPCIeDevs[devID] = true
|
config.PCIeDevices[config.RootPort][devID] = true
|
||||||
|
|
||||||
bridgeQomPath := fmt.Sprintf("%s%s", qomPathPrefix, bridgeId)
|
bridgeQomPath := fmt.Sprintf("%s%s", qomPathPrefix, bridgeID)
|
||||||
bridgeSlot, err := q.qomGetSlot(bridgeQomPath)
|
bridgeSlot, err := q.qomGetSlot(bridgeQomPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@@ -1571,7 +1653,7 @@ func (q *qemu) hotplugAddVhostUserBlkDevice(ctx context.Context, vAttr *config.V
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err = q.qmpMonitorCh.qmp.ExecutePCIVhostUserDevAdd(q.qmpMonitorCh.ctx, driver, devID, vAttr.DevID, addr, bridgeId); err != nil {
|
if err = q.qmpMonitorCh.qmp.ExecutePCIVhostUserDevAdd(q.qmpMonitorCh.ctx, driver, devID, vAttr.DevID, addr, bridgeID); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1685,41 +1767,108 @@ func (q *qemu) qomGetSlot(qomPath string) (types.PciSlot, error) {
|
|||||||
|
|
||||||
// Query QMP to find a device's PCI path given its QOM path or ID
|
// Query QMP to find a device's PCI path given its QOM path or ID
|
||||||
func (q *qemu) qomGetPciPath(qemuID string) (types.PciPath, error) {
|
func (q *qemu) qomGetPciPath(qemuID string) (types.PciPath, error) {
|
||||||
// XXX: For now we assume there's exactly one bridge, since
|
|
||||||
// that's always how we configure qemu from Kata for now. It
|
var slots []types.PciSlot
|
||||||
// would be good to generalize this to different PCI
|
|
||||||
// topologies
|
|
||||||
devSlot, err := q.qomGetSlot(qemuID)
|
devSlot, err := q.qomGetSlot(qemuID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return types.PciPath{}, err
|
return types.PciPath{}, err
|
||||||
}
|
}
|
||||||
|
slots = append(slots, devSlot)
|
||||||
|
|
||||||
busq, err := q.qmpMonitorCh.qmp.ExecQomGet(q.qmpMonitorCh.ctx, qemuID, "parent_bus")
|
// This only works for Q35 and Virt
|
||||||
|
r, _ := regexp.Compile(`^/machine/.*/pcie.0`)
|
||||||
|
|
||||||
|
var parentPath = qemuID
|
||||||
|
// We do not want to use a forever loop here, a deeper PCIe topology
|
||||||
|
// than 5 is already not advisable just for the sake of having enough
|
||||||
|
// buffer we limit ourselves to 10 and leave the loop early if we hit
|
||||||
|
// the root bus.
|
||||||
|
for i := 1; i <= maxPCIeTopoDepth; i++ {
|
||||||
|
parenBusQOM, err := q.qmpMonitorCh.qmp.ExecQomGet(q.qmpMonitorCh.ctx, parentPath, "parent_bus")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return types.PciPath{}, err
|
return types.PciPath{}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
bus, ok := busq.(string)
|
busQOM, ok := parenBusQOM.(string)
|
||||||
if !ok {
|
if !ok {
|
||||||
return types.PciPath{}, fmt.Errorf("parent_bus QOM property of %s is %t not a string", qemuID, busq)
|
return types.PciPath{}, fmt.Errorf("parent_bus QOM property of %s is %t not a string", qemuID, parenBusQOM)
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we hit /machine/q35/pcie.0 we're done this is the root bus
|
||||||
|
// we climbed the complete hierarchy
|
||||||
|
if r.Match([]byte(busQOM)) {
|
||||||
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
// `bus` is the QOM path of the QOM bus object, but we need
|
// `bus` is the QOM path of the QOM bus object, but we need
|
||||||
// the PCI bridge which manages that bus. There doesn't seem
|
// the PCI parent_bus which manages that bus. There doesn't seem
|
||||||
// to be a way to get that other than to simply drop the last
|
// to be a way to get that other than to simply drop the last
|
||||||
// path component.
|
// path component.
|
||||||
idx := strings.LastIndex(bus, "/")
|
idx := strings.LastIndex(busQOM, "/")
|
||||||
if idx == -1 {
|
if idx == -1 {
|
||||||
return types.PciPath{}, fmt.Errorf("Bus has unexpected QOM path %s", bus)
|
return types.PciPath{}, fmt.Errorf("Bus has unexpected QOM path %s", busQOM)
|
||||||
}
|
}
|
||||||
bridge := bus[:idx]
|
parentBus := busQOM[:idx]
|
||||||
|
|
||||||
bridgeSlot, err := q.qomGetSlot(bridge)
|
parentSlot, err := q.qomGetSlot(parentBus)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return types.PciPath{}, err
|
return types.PciPath{}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return types.PciPathFromSlots(bridgeSlot, devSlot)
|
// Prepend the slots, since we're climbing the hierarchy
|
||||||
|
slots = append([]types.PciSlot{parentSlot}, slots...)
|
||||||
|
parentPath = parentBus
|
||||||
|
}
|
||||||
|
return types.PciPathFromSlots(slots...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (q *qemu) hotplugVFIODeviceRootPort(ctx context.Context, device *config.VFIODev) (err error) {
|
||||||
|
return q.executeVFIODeviceAdd(device)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (q *qemu) hotplugVFIODeviceSwitchPort(ctx context.Context, device *config.VFIODev) (err error) {
|
||||||
|
return q.executeVFIODeviceAdd(device)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (q *qemu) hotplugVFIODeviceBridgePort(ctx context.Context, device *config.VFIODev) (err error) {
|
||||||
|
addr, bridge, err := q.arch.addDeviceToBridge(ctx, device.ID, types.PCI)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
defer func() {
|
||||||
|
if err != nil {
|
||||||
|
q.arch.removeDeviceFromBridge(device.ID)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
return q.executePCIVFIODeviceAdd(device, addr, bridge.ID)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (q *qemu) executePCIVFIODeviceAdd(device *config.VFIODev, addr string, bridgeID string) error {
|
||||||
|
switch device.Type {
|
||||||
|
case config.VFIOPCIDeviceNormalType:
|
||||||
|
return q.qmpMonitorCh.qmp.ExecutePCIVFIODeviceAdd(q.qmpMonitorCh.ctx, device.ID, device.BDF, addr, bridgeID, romFile)
|
||||||
|
case config.VFIOPCIDeviceMediatedType:
|
||||||
|
return q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.ID, device.SysfsDev, addr, bridgeID, romFile)
|
||||||
|
case config.VFIOAPDeviceMediatedType:
|
||||||
|
return q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.SysfsDev)
|
||||||
|
default:
|
||||||
|
return fmt.Errorf("Incorrect VFIO device type found")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (q *qemu) executeVFIODeviceAdd(device *config.VFIODev) error {
|
||||||
|
switch device.Type {
|
||||||
|
case config.VFIOPCIDeviceNormalType:
|
||||||
|
return q.qmpMonitorCh.qmp.ExecuteVFIODeviceAdd(q.qmpMonitorCh.ctx, device.ID, device.BDF, device.Bus, romFile)
|
||||||
|
case config.VFIOPCIDeviceMediatedType:
|
||||||
|
return q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.ID, device.SysfsDev, "", device.Bus, romFile)
|
||||||
|
case config.VFIOAPDeviceMediatedType:
|
||||||
|
return q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.SysfsDev)
|
||||||
|
default:
|
||||||
|
return fmt.Errorf("Incorrect VFIO device type found")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op Operation) (err error) {
|
func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op Operation) (err error) {
|
||||||
@@ -1727,85 +1876,32 @@ func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
devID := *(*device).GetID()
|
|
||||||
machineType := q.HypervisorConfig().HypervisorMachineType
|
|
||||||
|
|
||||||
if op == AddDevice {
|
if op == AddDevice {
|
||||||
|
|
||||||
buf, _ := json.Marshal(device)
|
buf, _ := json.Marshal(device)
|
||||||
q.Logger().WithFields(logrus.Fields{
|
q.Logger().WithFields(logrus.Fields{
|
||||||
"machine-type": machineType,
|
"machine-type": q.HypervisorConfig().HypervisorMachineType,
|
||||||
"hotplug-vfio-on-root-bus": q.state.HotplugVFIOOnRootBus,
|
"hot-plug-vfio": q.state.HotPlugVFIO,
|
||||||
"pcie-root-port": q.state.PCIeRootPort,
|
|
||||||
"device-info": string(buf),
|
"device-info": string(buf),
|
||||||
}).Info("Start hot-plug VFIO device")
|
}).Info("Start hot-plug VFIO device")
|
||||||
|
// In case MachineType is q35, a PCIe device is hotplugged on
|
||||||
|
// a PCIe Root Port or alternatively on a PCIe Switch Port
|
||||||
|
if q.HypervisorConfig().HypervisorMachineType != QemuQ35 && q.HypervisorConfig().HypervisorMachineType != QemuVirt {
|
||||||
|
device.Bus = ""
|
||||||
|
} else {
|
||||||
|
var err error
|
||||||
// In case HotplugVFIOOnRootBus is true, devices are hotplugged on the root bus
|
// In case HotplugVFIOOnRootBus is true, devices are hotplugged on the root bus
|
||||||
// for pc machine type instead of bridge. This is useful for devices that require
|
// for pc machine type instead of bridge. This is useful for devices that require
|
||||||
// a large PCI BAR which is a currently a limitation with PCI bridges.
|
// a large PCI BAR which is a currently a limitation with PCI bridges.
|
||||||
if q.state.HotplugVFIOOnRootBus {
|
if q.state.HotPlugVFIO == config.RootPort || q.state.HotplugVFIOOnRootBus {
|
||||||
switch (*device).GetType() {
|
err = q.hotplugVFIODeviceRootPort(ctx, device)
|
||||||
case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType:
|
} else if q.state.HotPlugVFIO == config.SwitchPort {
|
||||||
// In case MachineType is q35, a PCIe device is hotplugged on a PCIe Root Port.
|
err = q.hotplugVFIODeviceSwitchPort(ctx, device)
|
||||||
pciDevice, ok := (*device).(config.VFIOPCIDev)
|
|
||||||
if !ok {
|
|
||||||
return fmt.Errorf("VFIO device %+v is not PCI, but its Type said otherwise", device)
|
|
||||||
}
|
|
||||||
switch machineType {
|
|
||||||
case QemuQ35:
|
|
||||||
if pciDevice.IsPCIe && q.state.PCIeRootPort <= 0 {
|
|
||||||
q.Logger().WithField("dev-id", (*device).GetID()).Warn("VFIO device is a PCIe device. It's recommended to add the PCIe Root Port by setting the pcie_root_port parameter in the configuration for q35")
|
|
||||||
pciDevice.Bus = ""
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
pciDevice.Bus = ""
|
|
||||||
}
|
|
||||||
*device = pciDevice
|
|
||||||
|
|
||||||
if pciDevice.Type == config.VFIOPCIDeviceNormalType {
|
|
||||||
err = q.qmpMonitorCh.qmp.ExecuteVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, pciDevice.BDF, pciDevice.Bus, romFile)
|
|
||||||
} else {
|
} else {
|
||||||
err = q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, *(*device).GetSysfsDev(), "", pciDevice.Bus, romFile)
|
err = q.hotplugVFIODeviceBridgePort(ctx, device)
|
||||||
}
|
|
||||||
case config.VFIOAPDeviceMediatedType:
|
|
||||||
err = q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, *(*device).GetSysfsDev())
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
addr, bridge, err := q.arch.addDeviceToBridge(ctx, devID, types.PCI)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
defer func() {
|
|
||||||
if err != nil {
|
|
||||||
q.arch.removeDeviceFromBridge(devID)
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
switch (*device).GetType() {
|
|
||||||
case config.VFIOPCIDeviceNormalType:
|
|
||||||
pciDevice, ok := (*device).(config.VFIOPCIDev)
|
|
||||||
if !ok {
|
|
||||||
return fmt.Errorf("VFIO device %+v is not PCI, but its Type said otherwise", device)
|
|
||||||
}
|
|
||||||
err = q.qmpMonitorCh.qmp.ExecutePCIVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, pciDevice.BDF, addr, bridge.ID, romFile)
|
|
||||||
case config.VFIOPCIDeviceMediatedType:
|
|
||||||
err = q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, *(*device).GetSysfsDev(), addr, bridge.ID, romFile)
|
|
||||||
case config.VFIOAPDeviceMediatedType:
|
|
||||||
err = q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, *(*device).GetSysfsDev())
|
|
||||||
default:
|
|
||||||
return fmt.Errorf("Incorrect VFIO device type found")
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (*device).GetType() {
|
|
||||||
case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType:
|
|
||||||
pciDevice, ok := (*device).(config.VFIOPCIDev)
|
|
||||||
if !ok {
|
|
||||||
return fmt.Errorf("VFIO device %+v is not PCI, but its Type said otherwise", device)
|
|
||||||
}
|
}
|
||||||
// XXX: Depending on whether we're doing root port or
|
// XXX: Depending on whether we're doing root port or
|
||||||
// bridge hotplug, and how the bridge is set up in
|
// bridge hotplug, and how the bridge is set up in
|
||||||
@@ -1813,23 +1909,20 @@ func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op
|
|||||||
// have information about the slot number of the
|
// have information about the slot number of the
|
||||||
// bridge and or the device. For simplicity, just
|
// bridge and or the device. For simplicity, just
|
||||||
// query both of them back from qemu
|
// query both of them back from qemu
|
||||||
guestPciPath, err := q.qomGetPciPath(devID)
|
device.GuestPciPath, err = q.qomGetPciPath(device.ID)
|
||||||
pciDevice.GuestPciPath = guestPciPath
|
|
||||||
*device = pciDevice
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
return err
|
|
||||||
} else {
|
q.Logger().WithField("dev-id", device.ID).Info("Start hot-unplug VFIO device")
|
||||||
q.Logger().WithField("dev-id", devID).Info("Start hot-unplug VFIO device")
|
|
||||||
|
|
||||||
if !q.state.HotplugVFIOOnRootBus {
|
if !q.state.HotplugVFIOOnRootBus {
|
||||||
if err := q.arch.removeDeviceFromBridge(devID); err != nil {
|
if err := q.arch.removeDeviceFromBridge(device.ID); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return q.qmpMonitorCh.qmp.ExecuteDeviceDel(q.qmpMonitorCh.ctx, devID)
|
return q.qmpMonitorCh.qmp.ExecuteDeviceDel(q.qmpMonitorCh.ctx, device.ID)
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (q *qemu) hotAddNetDevice(name, hardAddr string, VMFds, VhostFds []*os.File) error {
|
func (q *qemu) hotAddNetDevice(name, hardAddr string, VMFds, VhostFds []*os.File) error {
|
||||||
@@ -2527,7 +2620,7 @@ func genericAppendPCIeRootPort(devices []govmmQemu.Device, number uint32, machin
|
|||||||
for i := uint32(0); i < number; i++ {
|
for i := uint32(0); i < number; i++ {
|
||||||
devices = append(devices,
|
devices = append(devices,
|
||||||
govmmQemu.PCIeRootPortDevice{
|
govmmQemu.PCIeRootPortDevice{
|
||||||
ID: fmt.Sprintf("%s%d", pcieRootPortPrefix, i),
|
ID: fmt.Sprintf("%s%d", config.PCIeRootPortPrefix, i),
|
||||||
Bus: bus,
|
Bus: bus,
|
||||||
Chassis: chassis,
|
Chassis: chassis,
|
||||||
Slot: strconv.FormatUint(uint64(i), 10),
|
Slot: strconv.FormatUint(uint64(i), 10),
|
||||||
@@ -2541,6 +2634,79 @@ func genericAppendPCIeRootPort(devices []govmmQemu.Device, number uint32, machin
|
|||||||
return devices
|
return devices
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// gollangci-lint enforces multi-line comments to be a block comment
|
||||||
|
// not multiple single line comments ...
|
||||||
|
/* pcie.0 bus
|
||||||
|
// -------------------------------------------------
|
||||||
|
// |
|
||||||
|
// -------------
|
||||||
|
// | Root Port |
|
||||||
|
// -------------
|
||||||
|
// -------------------------|------------------------
|
||||||
|
// | ----------------- |
|
||||||
|
// | PCI Express | Upstream Port | |
|
||||||
|
// | Switch ----------------- |
|
||||||
|
// | | | |
|
||||||
|
// | ------------------- ------------------- |
|
||||||
|
// | | Downstream Port | | Downstream Port | |
|
||||||
|
// | ------------------- ------------------- |
|
||||||
|
// -------------|-----------------------|------------
|
||||||
|
// ------------- --------------
|
||||||
|
// | GPU/ACCEL | | IB/ETH NIC |
|
||||||
|
// ------------- --------------
|
||||||
|
*/
|
||||||
|
// genericAppendPCIeSwitch adds a PCIe Swtich
|
||||||
|
func genericAppendPCIeSwitchPort(devices []govmmQemu.Device, number uint32, machineType string, memSize32bit uint64, memSize64bit uint64) []govmmQemu.Device {
|
||||||
|
|
||||||
|
// Q35, Virt have the correct PCIe support,
|
||||||
|
// hence ignore all other machines
|
||||||
|
if machineType != QemuQ35 && machineType != QemuVirt {
|
||||||
|
return devices
|
||||||
|
}
|
||||||
|
|
||||||
|
// Using an own ID for the root port, so we do not clash with already
|
||||||
|
// existing root ports adding "s" for switch prefix
|
||||||
|
pcieRootPort := govmmQemu.PCIeRootPortDevice{
|
||||||
|
ID: fmt.Sprintf("%s%s%d", config.PCIeSwitchPortPrefix, config.PCIeRootPortPrefix, 0),
|
||||||
|
Bus: defaultBridgeBus,
|
||||||
|
Chassis: "1",
|
||||||
|
Slot: strconv.FormatUint(uint64(0), 10),
|
||||||
|
Multifunction: false,
|
||||||
|
Addr: "0",
|
||||||
|
MemReserve: fmt.Sprintf("%dB", memSize32bit),
|
||||||
|
Pref64Reserve: fmt.Sprintf("%dB", memSize64bit),
|
||||||
|
}
|
||||||
|
|
||||||
|
devices = append(devices, pcieRootPort)
|
||||||
|
|
||||||
|
pcieSwitchUpstreamPort := govmmQemu.PCIeSwitchUpstreamPortDevice{
|
||||||
|
ID: fmt.Sprintf("%s%d", config.PCIeSwitchUpstreamPortPrefix, 0),
|
||||||
|
Bus: pcieRootPort.ID,
|
||||||
|
}
|
||||||
|
devices = append(devices, pcieSwitchUpstreamPort)
|
||||||
|
|
||||||
|
currentChassis, err := strconv.Atoi(pcieRootPort.Chassis)
|
||||||
|
if err != nil {
|
||||||
|
return devices
|
||||||
|
}
|
||||||
|
nextChassis := currentChassis + 1
|
||||||
|
|
||||||
|
for i := uint32(0); i < number; i++ {
|
||||||
|
|
||||||
|
pcieSwitchDownstreamPort := govmmQemu.PCIeSwitchDownstreamPortDevice{
|
||||||
|
ID: fmt.Sprintf("%s%d", config.PCIeSwitchhDownstreamPortPrefix, i),
|
||||||
|
Bus: pcieSwitchUpstreamPort.ID,
|
||||||
|
Chassis: fmt.Sprintf("%d", nextChassis),
|
||||||
|
Slot: strconv.FormatUint(uint64(i), 10),
|
||||||
|
// TODO: MemReserve: fmt.Sprintf("%dB", memSize32bit),
|
||||||
|
// TODO: Pref64Reserve: fmt.Sprintf("%dB", memSize64bit),
|
||||||
|
}
|
||||||
|
devices = append(devices, pcieSwitchDownstreamPort)
|
||||||
|
}
|
||||||
|
|
||||||
|
return devices
|
||||||
|
}
|
||||||
|
|
||||||
func (q *qemu) GetThreadIDs(ctx context.Context) (VcpuThreadIDs, error) {
|
func (q *qemu) GetThreadIDs(ctx context.Context) (VcpuThreadIDs, error) {
|
||||||
span, _ := katatrace.Trace(ctx, q.Logger(), "GetThreadIDs", qemuTracingTags, map[string]string{"sandbox_id": q.id})
|
span, _ := katatrace.Trace(ctx, q.Logger(), "GetThreadIDs", qemuTracingTags, map[string]string{"sandbox_id": q.id})
|
||||||
defer span.End()
|
defer span.End()
|
||||||
@@ -2716,7 +2882,6 @@ func (q *qemu) Save() (s hv.HypervisorState) {
|
|||||||
s.UUID = q.state.UUID
|
s.UUID = q.state.UUID
|
||||||
s.HotpluggedMemory = q.state.HotpluggedMemory
|
s.HotpluggedMemory = q.state.HotpluggedMemory
|
||||||
s.HotplugVFIOOnRootBus = q.state.HotplugVFIOOnRootBus
|
s.HotplugVFIOOnRootBus = q.state.HotplugVFIOOnRootBus
|
||||||
s.PCIeRootPort = q.state.PCIeRootPort
|
|
||||||
|
|
||||||
for _, bridge := range q.arch.getBridges() {
|
for _, bridge := range q.arch.getBridges() {
|
||||||
s.Bridges = append(s.Bridges, hv.Bridge{
|
s.Bridges = append(s.Bridges, hv.Bridge{
|
||||||
@@ -2740,7 +2905,6 @@ func (q *qemu) Load(s hv.HypervisorState) {
|
|||||||
q.state.HotpluggedMemory = s.HotpluggedMemory
|
q.state.HotpluggedMemory = s.HotpluggedMemory
|
||||||
q.state.HotplugVFIOOnRootBus = s.HotplugVFIOOnRootBus
|
q.state.HotplugVFIOOnRootBus = s.HotplugVFIOOnRootBus
|
||||||
q.state.VirtiofsDaemonPid = s.VirtiofsDaemonPid
|
q.state.VirtiofsDaemonPid = s.VirtiofsDaemonPid
|
||||||
q.state.PCIeRootPort = s.PCIeRootPort
|
|
||||||
|
|
||||||
for _, bridge := range s.Bridges {
|
for _, bridge := range s.Bridges {
|
||||||
q.state.Bridges = append(q.state.Bridges, types.NewBridge(types.Type(bridge.Type), bridge.ID, bridge.DeviceAddr, bridge.Addr))
|
q.state.Bridges = append(q.state.Bridges, types.NewBridge(types.Type(bridge.Type), bridge.ID, bridge.DeviceAddr, bridge.Addr))
|
||||||
|
|||||||
@@ -124,7 +124,7 @@ func newQemuArch(config HypervisorConfig) (qemuArch, error) {
|
|||||||
legacySerial: config.LegacySerial,
|
legacySerial: config.LegacySerial,
|
||||||
},
|
},
|
||||||
vmFactory: factory,
|
vmFactory: factory,
|
||||||
snpGuest: config.SevSnpGuest,
|
snpGuest: config.ConfidentialGuest,
|
||||||
}
|
}
|
||||||
|
|
||||||
if config.ConfidentialGuest {
|
if config.ConfidentialGuest {
|
||||||
@@ -308,6 +308,7 @@ func (q *qemuAmd64) appendProtectionDevice(devices []govmmQemu.Device, firmware,
|
|||||||
ReducedPhysBits: 1,
|
ReducedPhysBits: 1,
|
||||||
}), "", nil
|
}), "", nil
|
||||||
case noneProtection:
|
case noneProtection:
|
||||||
|
|
||||||
return devices, firmware, nil
|
return devices, firmware, nil
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
|||||||
@@ -140,6 +140,9 @@ type qemuArch interface {
|
|||||||
// appendPCIeRootPortDevice appends a pcie-root-port device to pcie.0 bus
|
// appendPCIeRootPortDevice appends a pcie-root-port device to pcie.0 bus
|
||||||
appendPCIeRootPortDevice(devices []govmmQemu.Device, number uint32, memSize32bit uint64, memSize64bit uint64) []govmmQemu.Device
|
appendPCIeRootPortDevice(devices []govmmQemu.Device, number uint32, memSize32bit uint64, memSize64bit uint64) []govmmQemu.Device
|
||||||
|
|
||||||
|
// appendPCIeSwitch appends a ioh3420 device to a pcie-root-port
|
||||||
|
appendPCIeSwitchPortDevice(devices []govmmQemu.Device, number uint32, memSize32bit uint64, memSize64bit uint64) []govmmQemu.Device
|
||||||
|
|
||||||
// append vIOMMU device
|
// append vIOMMU device
|
||||||
appendIOMMU(devices []govmmQemu.Device) ([]govmmQemu.Device, error)
|
appendIOMMU(devices []govmmQemu.Device) ([]govmmQemu.Device, error)
|
||||||
|
|
||||||
@@ -183,7 +186,8 @@ const (
|
|||||||
defaultBridgeBus = "pcie.0"
|
defaultBridgeBus = "pcie.0"
|
||||||
defaultPCBridgeBus = "pci.0"
|
defaultPCBridgeBus = "pci.0"
|
||||||
maxDevIDSize = 31
|
maxDevIDSize = 31
|
||||||
pcieRootPortPrefix = "rp"
|
maxPCIeRootPort = 16 // Limitation from QEMU
|
||||||
|
maxPCIeSwitchPort = 16 // Limitation from QEMU
|
||||||
)
|
)
|
||||||
|
|
||||||
// This is the PCI start address assigned to the first bridge that
|
// This is the PCI start address assigned to the first bridge that
|
||||||
@@ -677,17 +681,17 @@ func (q *qemuArchBase) appendVhostUserDevice(ctx context.Context, devices []govm
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (q *qemuArchBase) appendVFIODevice(devices []govmmQemu.Device, vfioDev config.VFIODev) []govmmQemu.Device {
|
func (q *qemuArchBase) appendVFIODevice(devices []govmmQemu.Device, vfioDev config.VFIODev) []govmmQemu.Device {
|
||||||
pciDevice := vfioDev.(config.VFIOPCIDev)
|
|
||||||
if pciDevice.BDF == "" {
|
if vfioDev.BDF == "" {
|
||||||
return devices
|
return devices
|
||||||
}
|
}
|
||||||
|
|
||||||
devices = append(devices,
|
devices = append(devices,
|
||||||
govmmQemu.VFIODevice{
|
govmmQemu.VFIODevice{
|
||||||
BDF: pciDevice.BDF,
|
BDF: vfioDev.BDF,
|
||||||
VendorID: pciDevice.VendorID,
|
VendorID: vfioDev.VendorID,
|
||||||
DeviceID: pciDevice.DeviceID,
|
DeviceID: vfioDev.DeviceID,
|
||||||
Bus: pciDevice.Bus,
|
Bus: vfioDev.Bus,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -803,6 +807,13 @@ func (q *qemuArchBase) appendPCIeRootPortDevice(devices []govmmQemu.Device, numb
|
|||||||
return genericAppendPCIeRootPort(devices, number, q.qemuMachine.Type, memSize32bit, memSize64bit)
|
return genericAppendPCIeRootPort(devices, number, q.qemuMachine.Type, memSize32bit, memSize64bit)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// appendPCIeSwitchPortDevice appends a PCIe Switch with <number> ports
|
||||||
|
func (q *qemuArchBase) appendPCIeSwitchPortDevice(devices []govmmQemu.Device, number uint32, memSize32bit uint64, memSize64bit uint64) []govmmQemu.Device {
|
||||||
|
return genericAppendPCIeSwitchPort(devices, number, q.qemuMachine.Type, memSize32bit, memSize64bit)
|
||||||
|
}
|
||||||
|
|
||||||
|
// getBARsMaxAddressableMemory we need to know the BAR sizes to configure the
|
||||||
|
// PCIe Root Port or PCIe Downstream Port attaching a device with huge BARs.
|
||||||
func (q *qemuArchBase) getBARsMaxAddressableMemory() (uint64, uint64) {
|
func (q *qemuArchBase) getBARsMaxAddressableMemory() (uint64, uint64) {
|
||||||
|
|
||||||
pci := nvpci.New()
|
pci := nvpci.New()
|
||||||
|
|||||||
@@ -470,7 +470,7 @@ func TestQemuArchBaseAppendVFIODevice(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
vfDevice := config.VFIOPCIDev{
|
vfDevice := config.VFIODev{
|
||||||
BDF: bdf,
|
BDF: bdf,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -490,7 +490,7 @@ func TestQemuArchBaseAppendVFIODeviceWithVendorDeviceID(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
vfDevice := config.VFIOPCIDev{
|
vfDevice := config.VFIODev{
|
||||||
BDF: bdf,
|
BDF: bdf,
|
||||||
VendorID: vendorID,
|
VendorID: vendorID,
|
||||||
DeviceID: deviceID,
|
DeviceID: deviceID,
|
||||||
|
|||||||
@@ -111,9 +111,6 @@ func TestQemuCreateVM(t *testing.T) {
|
|||||||
config6 := newQemuConfig()
|
config6 := newQemuConfig()
|
||||||
config6.DisableGuestSeLinux = false
|
config6.DisableGuestSeLinux = false
|
||||||
|
|
||||||
config7 := newQemuConfig()
|
|
||||||
config7.PCIeRootPort = 1
|
|
||||||
|
|
||||||
config8 := newQemuConfig()
|
config8 := newQemuConfig()
|
||||||
config8.EnableVhostUserStore = true
|
config8.EnableVhostUserStore = true
|
||||||
config8.HugePages = true
|
config8.HugePages = true
|
||||||
@@ -161,7 +158,6 @@ func TestQemuCreateVM(t *testing.T) {
|
|||||||
{config3, false, true},
|
{config3, false, true},
|
||||||
{config5, false, true},
|
{config5, false, true},
|
||||||
{config6, false, false},
|
{config6, false, false},
|
||||||
{config7, false, true},
|
|
||||||
{config8, false, true},
|
{config8, false, true},
|
||||||
{config9, true, false},
|
{config9, true, false},
|
||||||
{config10, false, true},
|
{config10, false, true},
|
||||||
|
|||||||
@@ -32,7 +32,6 @@ import (
|
|||||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
|
||||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/drivers"
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/drivers"
|
||||||
deviceManager "github.com/kata-containers/kata-containers/src/runtime/pkg/device/manager"
|
deviceManager "github.com/kata-containers/kata-containers/src/runtime/pkg/device/manager"
|
||||||
hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors"
|
|
||||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace"
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace"
|
||||||
resCtrl "github.com/kata-containers/kata-containers/src/runtime/pkg/resourcecontrol"
|
resCtrl "github.com/kata-containers/kata-containers/src/runtime/pkg/resourcecontrol"
|
||||||
exp "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/experimental"
|
exp "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/experimental"
|
||||||
@@ -101,15 +100,10 @@ type HypervisorPidKey struct{}
|
|||||||
|
|
||||||
// SandboxStatus describes a sandbox status.
|
// SandboxStatus describes a sandbox status.
|
||||||
type SandboxStatus struct {
|
type SandboxStatus struct {
|
||||||
ContainersStatus []ContainerStatus
|
|
||||||
|
|
||||||
// Annotations allow clients to store arbitrary values,
|
|
||||||
// for example to add additional status values required
|
|
||||||
// to support particular specifications.
|
|
||||||
Annotations map[string]string
|
Annotations map[string]string
|
||||||
|
|
||||||
ID string
|
ID string
|
||||||
Hypervisor HypervisorType
|
Hypervisor HypervisorType
|
||||||
|
ContainersStatus []ContainerStatus
|
||||||
State types.SandboxState
|
State types.SandboxState
|
||||||
HypervisorConfig HypervisorConfig
|
HypervisorConfig HypervisorConfig
|
||||||
}
|
}
|
||||||
@@ -176,10 +170,8 @@ type SandboxConfig struct {
|
|||||||
|
|
||||||
// SharePidNs sets all containers to share the same sandbox level pid namespace.
|
// SharePidNs sets all containers to share the same sandbox level pid namespace.
|
||||||
SharePidNs bool
|
SharePidNs bool
|
||||||
|
|
||||||
// SystemdCgroup enables systemd cgroup support
|
// SystemdCgroup enables systemd cgroup support
|
||||||
SystemdCgroup bool
|
SystemdCgroup bool
|
||||||
|
|
||||||
// SandboxCgroupOnly enables cgroup only at podlevel in the host
|
// SandboxCgroupOnly enables cgroup only at podlevel in the host
|
||||||
SandboxCgroupOnly bool
|
SandboxCgroupOnly bool
|
||||||
|
|
||||||
@@ -623,22 +615,49 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor
|
|||||||
|
|
||||||
// If we have a confidential guest we need to cold-plug the PCIe VFIO devices
|
// If we have a confidential guest we need to cold-plug the PCIe VFIO devices
|
||||||
// until we have TDISP/IDE PCIe support.
|
// until we have TDISP/IDE PCIe support.
|
||||||
coldPlugVFIO := (sandboxConfig.HypervisorConfig.ColdPlugVFIO != hv.NoPort)
|
coldPlugVFIO := (sandboxConfig.HypervisorConfig.ColdPlugVFIO != config.NoPort)
|
||||||
var devs []config.DeviceInfo
|
// Aggregate all the containner devices for hot-plug and use them to dedcue
|
||||||
|
// the correct amount of ports to reserve for the hypervisor.
|
||||||
|
hotPlugVFIO := (sandboxConfig.HypervisorConfig.HotPlugVFIO != config.NoPort)
|
||||||
|
|
||||||
|
var vfioDevices []config.DeviceInfo
|
||||||
|
// vhost-user-block device is a PCIe device in Virt, keep track of it
|
||||||
|
// for correct number of PCIe root ports.
|
||||||
|
var vhostUserBlkDevices []config.DeviceInfo
|
||||||
|
|
||||||
for cnt, containers := range sandboxConfig.Containers {
|
for cnt, containers := range sandboxConfig.Containers {
|
||||||
for dev, device := range containers.DeviceInfos {
|
for dev, device := range containers.DeviceInfos {
|
||||||
if coldPlugVFIO && deviceManager.IsVFIO(device.ContainerPath) {
|
|
||||||
|
if deviceManager.IsVhostUserBlk(device) {
|
||||||
|
vhostUserBlkDevices = append(vhostUserBlkDevices, device)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
isVFIO := deviceManager.IsVFIO(device.ContainerPath)
|
||||||
|
if hotPlugVFIO && isVFIO {
|
||||||
|
vfioDevices = append(vfioDevices, device)
|
||||||
|
sandboxConfig.Containers[cnt].DeviceInfos[dev].Port = sandboxConfig.HypervisorConfig.HotPlugVFIO
|
||||||
|
}
|
||||||
|
if coldPlugVFIO && isVFIO {
|
||||||
device.ColdPlug = true
|
device.ColdPlug = true
|
||||||
devs = append(devs, device)
|
device.Port = sandboxConfig.HypervisorConfig.ColdPlugVFIO
|
||||||
|
vfioDevices = append(vfioDevices, device)
|
||||||
// We need to remove the devices marked for cold-plug
|
// We need to remove the devices marked for cold-plug
|
||||||
// otherwise at the container level the kata-agent
|
// otherwise at the container level the kata-agent
|
||||||
// will try to hot-plug them.
|
// will try to hot-plug them.
|
||||||
infos := sandboxConfig.Containers[cnt].DeviceInfos
|
sandboxConfig.Containers[cnt].DeviceInfos[dev].ID = "remove-we-are-cold-plugging"
|
||||||
infos = append(infos[:dev], infos[dev+1:]...)
|
|
||||||
sandboxConfig.Containers[cnt].DeviceInfos = infos
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
var filteredDevices []config.DeviceInfo
|
||||||
|
for _, device := range containers.DeviceInfos {
|
||||||
|
if device.ID != "remove-we-are-cold-plugging" {
|
||||||
|
filteredDevices = append(filteredDevices, device)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
sandboxConfig.Containers[cnt].DeviceInfos = filteredDevices
|
||||||
|
|
||||||
|
}
|
||||||
|
sandboxConfig.HypervisorConfig.VFIODevices = vfioDevices
|
||||||
|
sandboxConfig.HypervisorConfig.VhostUserBlkDevices = vhostUserBlkDevices
|
||||||
|
|
||||||
// store doesn't require hypervisor to be stored immediately
|
// store doesn't require hypervisor to be stored immediately
|
||||||
if err = s.hypervisor.CreateVM(ctx, s.id, s.network, &sandboxConfig.HypervisorConfig); err != nil {
|
if err = s.hypervisor.CreateVM(ctx, s.id, s.network, &sandboxConfig.HypervisorConfig); err != nil {
|
||||||
@@ -653,7 +672,7 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor
|
|||||||
return s, nil
|
return s, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, dev := range devs {
|
for _, dev := range vfioDevices {
|
||||||
_, err := s.AddDevice(ctx, dev)
|
_, err := s.AddDevice(ctx, dev)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
s.Logger().WithError(err).Debug("Cannot cold-plug add device")
|
s.Logger().WithError(err).Debug("Cannot cold-plug add device")
|
||||||
@@ -1709,7 +1728,6 @@ func (s *Sandbox) createContainers(ctx context.Context) error {
|
|||||||
defer span.End()
|
defer span.End()
|
||||||
|
|
||||||
for i := range s.config.Containers {
|
for i := range s.config.Containers {
|
||||||
|
|
||||||
c, err := newContainer(ctx, s, &s.config.Containers[i])
|
c, err := newContainer(ctx, s, &s.config.Containers[i])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@@ -1728,7 +1746,6 @@ func (s *Sandbox) createContainers(ctx context.Context) error {
|
|||||||
if err := s.updateResources(ctx); err != nil {
|
if err := s.updateResources(ctx); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := s.resourceControllerUpdate(ctx); err != nil {
|
if err := s.resourceControllerUpdate(ctx); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -1740,7 +1757,6 @@ func (s *Sandbox) createContainers(ctx context.Context) error {
|
|||||||
if err := s.storeSandbox(ctx); err != nil {
|
if err := s.storeSandbox(ctx); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1904,15 +1920,11 @@ func (s *Sandbox) HotplugAddDevice(ctx context.Context, device api.Device, devTy
|
|||||||
// adding a group of VFIO devices
|
// adding a group of VFIO devices
|
||||||
for _, dev := range vfioDevices {
|
for _, dev := range vfioDevices {
|
||||||
if _, err := s.hypervisor.HotplugAddDevice(ctx, dev, VfioDev); err != nil {
|
if _, err := s.hypervisor.HotplugAddDevice(ctx, dev, VfioDev); err != nil {
|
||||||
bdf := ""
|
|
||||||
if pciDevice, ok := (*dev).(config.VFIOPCIDev); ok {
|
|
||||||
bdf = pciDevice.BDF
|
|
||||||
}
|
|
||||||
s.Logger().
|
s.Logger().
|
||||||
WithFields(logrus.Fields{
|
WithFields(logrus.Fields{
|
||||||
"sandbox": s.id,
|
"sandbox": s.id,
|
||||||
"vfio-device-ID": (*dev).GetID(),
|
"vfio-device-ID": dev.ID,
|
||||||
"vfio-device-BDF": bdf,
|
"vfio-device-BDF": dev.BDF,
|
||||||
}).WithError(err).Error("failed to hotplug VFIO device")
|
}).WithError(err).Error("failed to hotplug VFIO device")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -1927,6 +1939,7 @@ func (s *Sandbox) HotplugAddDevice(ctx context.Context, device api.Device, devTy
|
|||||||
return err
|
return err
|
||||||
case config.VhostUserBlk:
|
case config.VhostUserBlk:
|
||||||
vhostUserBlkDevice, ok := device.(*drivers.VhostUserBlkDevice)
|
vhostUserBlkDevice, ok := device.(*drivers.VhostUserBlkDevice)
|
||||||
|
|
||||||
if !ok {
|
if !ok {
|
||||||
return fmt.Errorf("device type mismatch, expect device type to be %s", devType)
|
return fmt.Errorf("device type mismatch, expect device type to be %s", devType)
|
||||||
}
|
}
|
||||||
@@ -1961,15 +1974,11 @@ func (s *Sandbox) HotplugRemoveDevice(ctx context.Context, device api.Device, de
|
|||||||
// remove a group of VFIO devices
|
// remove a group of VFIO devices
|
||||||
for _, dev := range vfioDevices {
|
for _, dev := range vfioDevices {
|
||||||
if _, err := s.hypervisor.HotplugRemoveDevice(ctx, dev, VfioDev); err != nil {
|
if _, err := s.hypervisor.HotplugRemoveDevice(ctx, dev, VfioDev); err != nil {
|
||||||
bdf := ""
|
|
||||||
if pciDevice, ok := (*dev).(config.VFIOPCIDev); ok {
|
|
||||||
bdf = pciDevice.BDF
|
|
||||||
}
|
|
||||||
s.Logger().WithError(err).
|
s.Logger().WithError(err).
|
||||||
WithFields(logrus.Fields{
|
WithFields(logrus.Fields{
|
||||||
"sandbox": s.id,
|
"sandbox": s.id,
|
||||||
"vfio-device-ID": (*dev).GetID(),
|
"vfio-device-ID": dev.ID,
|
||||||
"vfio-device-BDF": bdf,
|
"vfio-device-BDF": dev.BDF,
|
||||||
}).Error("failed to hot unplug VFIO device")
|
}).Error("failed to hot unplug VFIO device")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -593,11 +593,11 @@ func TestSandboxAttachDevicesVFIO(t *testing.T) {
|
|||||||
_, err = os.Create(deviceFile)
|
_, err = os.Create(deviceFile)
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
|
|
||||||
savedIOMMUPath := config.SysIOMMUPath
|
savedIOMMUPath := config.SysIOMMUGroupPath
|
||||||
config.SysIOMMUPath = tmpDir
|
config.SysIOMMUGroupPath = tmpDir
|
||||||
|
|
||||||
defer func() {
|
defer func() {
|
||||||
config.SysIOMMUPath = savedIOMMUPath
|
config.SysIOMMUGroupPath = savedIOMMUPath
|
||||||
}()
|
}()
|
||||||
|
|
||||||
dm := manager.NewDeviceManager(config.VirtioSCSI, false, "", 0, nil)
|
dm := manager.NewDeviceManager(config.VirtioSCSI, false, "", 0, nil)
|
||||||
|
|||||||
Reference in New Issue
Block a user