Merge pull request #4492 from zvonkok/pcie-topology

runtime: fix PCIe topology for GPUDirect use-case
This commit is contained in:
Peng Tao
2023-07-03 09:17:12 +08:00
committed by GitHub
35 changed files with 952 additions and 560 deletions

View File

@@ -20,7 +20,6 @@ import (
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
"github.com/kata-containers/kata-containers/src/runtime/pkg/govmm"
govmmQemu "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm/qemu"
hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors"
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace"
"github.com/kata-containers/kata-containers/src/runtime/pkg/oci"
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
@@ -78,88 +77,88 @@ type factory struct {
}
type hypervisor struct {
Path string `toml:"path"`
JailerPath string `toml:"jailer_path"`
Kernel string `toml:"kernel"`
CtlPath string `toml:"ctlpath"`
Initrd string `toml:"initrd"`
Image string `toml:"image"`
RootfsType string `toml:"rootfs_type"`
Firmware string `toml:"firmware"`
FirmwareVolume string `toml:"firmware_volume"`
MachineAccelerators string `toml:"machine_accelerators"`
CPUFeatures string `toml:"cpu_features"`
KernelParams string `toml:"kernel_params"`
MachineType string `toml:"machine_type"`
BlockDeviceDriver string `toml:"block_device_driver"`
EntropySource string `toml:"entropy_source"`
SharedFS string `toml:"shared_fs"`
VirtioFSDaemon string `toml:"virtio_fs_daemon"`
VirtioFSCache string `toml:"virtio_fs_cache"`
VhostUserStorePath string `toml:"vhost_user_store_path"`
FileBackedMemRootDir string `toml:"file_mem_backend"`
GuestHookPath string `toml:"guest_hook_path"`
GuestMemoryDumpPath string `toml:"guest_memory_dump_path"`
SeccompSandbox string `toml:"seccompsandbox"`
BlockDeviceAIO string `toml:"block_device_aio"`
HypervisorPathList []string `toml:"valid_hypervisor_paths"`
JailerPathList []string `toml:"valid_jailer_paths"`
CtlPathList []string `toml:"valid_ctlpaths"`
VirtioFSDaemonList []string `toml:"valid_virtio_fs_daemon_paths"`
VirtioFSExtraArgs []string `toml:"virtio_fs_extra_args"`
PFlashList []string `toml:"pflashes"`
VhostUserStorePathList []string `toml:"valid_vhost_user_store_paths"`
FileBackedMemRootList []string `toml:"valid_file_mem_backends"`
EntropySourceList []string `toml:"valid_entropy_sources"`
EnableAnnotations []string `toml:"enable_annotations"`
RxRateLimiterMaxRate uint64 `toml:"rx_rate_limiter_max_rate"`
TxRateLimiterMaxRate uint64 `toml:"tx_rate_limiter_max_rate"`
MemOffset uint64 `toml:"memory_offset"`
DefaultMaxMemorySize uint64 `toml:"default_maxmemory"`
DiskRateLimiterBwMaxRate int64 `toml:"disk_rate_limiter_bw_max_rate"`
DiskRateLimiterBwOneTimeBurst int64 `toml:"disk_rate_limiter_bw_one_time_burst"`
DiskRateLimiterOpsMaxRate int64 `toml:"disk_rate_limiter_ops_max_rate"`
DiskRateLimiterOpsOneTimeBurst int64 `toml:"disk_rate_limiter_ops_one_time_burst"`
NetRateLimiterBwMaxRate int64 `toml:"net_rate_limiter_bw_max_rate"`
NetRateLimiterBwOneTimeBurst int64 `toml:"net_rate_limiter_bw_one_time_burst"`
NetRateLimiterOpsMaxRate int64 `toml:"net_rate_limiter_ops_max_rate"`
NetRateLimiterOpsOneTimeBurst int64 `toml:"net_rate_limiter_ops_one_time_burst"`
VirtioFSCacheSize uint32 `toml:"virtio_fs_cache_size"`
VirtioFSQueueSize uint32 `toml:"virtio_fs_queue_size"`
DefaultMaxVCPUs uint32 `toml:"default_maxvcpus"`
MemorySize uint32 `toml:"default_memory"`
MemSlots uint32 `toml:"memory_slots"`
DefaultBridges uint32 `toml:"default_bridges"`
Msize9p uint32 `toml:"msize_9p"`
PCIeRootPort uint32 `toml:"pcie_root_port"`
NumVCPUs int32 `toml:"default_vcpus"`
BlockDeviceCacheSet bool `toml:"block_device_cache_set"`
BlockDeviceCacheDirect bool `toml:"block_device_cache_direct"`
BlockDeviceCacheNoflush bool `toml:"block_device_cache_noflush"`
EnableVhostUserStore bool `toml:"enable_vhost_user_store"`
VhostUserDeviceReconnect uint32 `toml:"vhost_user_reconnect_timeout_sec"`
DisableBlockDeviceUse bool `toml:"disable_block_device_use"`
MemPrealloc bool `toml:"enable_mem_prealloc"`
HugePages bool `toml:"enable_hugepages"`
VirtioMem bool `toml:"enable_virtio_mem"`
IOMMU bool `toml:"enable_iommu"`
IOMMUPlatform bool `toml:"enable_iommu_platform"`
Debug bool `toml:"enable_debug"`
DisableNestingChecks bool `toml:"disable_nesting_checks"`
EnableIOThreads bool `toml:"enable_iothreads"`
DisableImageNvdimm bool `toml:"disable_image_nvdimm"`
HotplugVFIOOnRootBus bool `toml:"hotplug_vfio_on_root_bus"`
ColdPlugVFIO hv.PCIePort `toml:"cold_plug_vfio"`
DisableVhostNet bool `toml:"disable_vhost_net"`
GuestMemoryDumpPaging bool `toml:"guest_memory_dump_paging"`
ConfidentialGuest bool `toml:"confidential_guest"`
SevSnpGuest bool `toml:"sev_snp_guest"`
GuestSwap bool `toml:"enable_guest_swap"`
Rootless bool `toml:"rootless"`
DisableSeccomp bool `toml:"disable_seccomp"`
DisableSeLinux bool `toml:"disable_selinux"`
DisableGuestSeLinux bool `toml:"disable_guest_selinux"`
LegacySerial bool `toml:"use_legacy_serial"`
Path string `toml:"path"`
JailerPath string `toml:"jailer_path"`
Kernel string `toml:"kernel"`
CtlPath string `toml:"ctlpath"`
Initrd string `toml:"initrd"`
Image string `toml:"image"`
RootfsType string `toml:"rootfs_type"`
Firmware string `toml:"firmware"`
FirmwareVolume string `toml:"firmware_volume"`
MachineAccelerators string `toml:"machine_accelerators"`
CPUFeatures string `toml:"cpu_features"`
KernelParams string `toml:"kernel_params"`
MachineType string `toml:"machine_type"`
BlockDeviceDriver string `toml:"block_device_driver"`
EntropySource string `toml:"entropy_source"`
SharedFS string `toml:"shared_fs"`
VirtioFSDaemon string `toml:"virtio_fs_daemon"`
VirtioFSCache string `toml:"virtio_fs_cache"`
VhostUserStorePath string `toml:"vhost_user_store_path"`
FileBackedMemRootDir string `toml:"file_mem_backend"`
GuestHookPath string `toml:"guest_hook_path"`
GuestMemoryDumpPath string `toml:"guest_memory_dump_path"`
SeccompSandbox string `toml:"seccompsandbox"`
BlockDeviceAIO string `toml:"block_device_aio"`
HypervisorPathList []string `toml:"valid_hypervisor_paths"`
JailerPathList []string `toml:"valid_jailer_paths"`
CtlPathList []string `toml:"valid_ctlpaths"`
VirtioFSDaemonList []string `toml:"valid_virtio_fs_daemon_paths"`
VirtioFSExtraArgs []string `toml:"virtio_fs_extra_args"`
PFlashList []string `toml:"pflashes"`
VhostUserStorePathList []string `toml:"valid_vhost_user_store_paths"`
FileBackedMemRootList []string `toml:"valid_file_mem_backends"`
EntropySourceList []string `toml:"valid_entropy_sources"`
EnableAnnotations []string `toml:"enable_annotations"`
RxRateLimiterMaxRate uint64 `toml:"rx_rate_limiter_max_rate"`
TxRateLimiterMaxRate uint64 `toml:"tx_rate_limiter_max_rate"`
MemOffset uint64 `toml:"memory_offset"`
DefaultMaxMemorySize uint64 `toml:"default_maxmemory"`
DiskRateLimiterBwMaxRate int64 `toml:"disk_rate_limiter_bw_max_rate"`
DiskRateLimiterBwOneTimeBurst int64 `toml:"disk_rate_limiter_bw_one_time_burst"`
DiskRateLimiterOpsMaxRate int64 `toml:"disk_rate_limiter_ops_max_rate"`
DiskRateLimiterOpsOneTimeBurst int64 `toml:"disk_rate_limiter_ops_one_time_burst"`
NetRateLimiterBwMaxRate int64 `toml:"net_rate_limiter_bw_max_rate"`
NetRateLimiterBwOneTimeBurst int64 `toml:"net_rate_limiter_bw_one_time_burst"`
NetRateLimiterOpsMaxRate int64 `toml:"net_rate_limiter_ops_max_rate"`
NetRateLimiterOpsOneTimeBurst int64 `toml:"net_rate_limiter_ops_one_time_burst"`
VirtioFSCacheSize uint32 `toml:"virtio_fs_cache_size"`
VirtioFSQueueSize uint32 `toml:"virtio_fs_queue_size"`
DefaultMaxVCPUs uint32 `toml:"default_maxvcpus"`
MemorySize uint32 `toml:"default_memory"`
MemSlots uint32 `toml:"memory_slots"`
DefaultBridges uint32 `toml:"default_bridges"`
Msize9p uint32 `toml:"msize_9p"`
NumVCPUs int32 `toml:"default_vcpus"`
BlockDeviceCacheSet bool `toml:"block_device_cache_set"`
BlockDeviceCacheDirect bool `toml:"block_device_cache_direct"`
BlockDeviceCacheNoflush bool `toml:"block_device_cache_noflush"`
EnableVhostUserStore bool `toml:"enable_vhost_user_store"`
VhostUserDeviceReconnect uint32 `toml:"vhost_user_reconnect_timeout_sec"`
DisableBlockDeviceUse bool `toml:"disable_block_device_use"`
MemPrealloc bool `toml:"enable_mem_prealloc"`
HugePages bool `toml:"enable_hugepages"`
VirtioMem bool `toml:"enable_virtio_mem"`
IOMMU bool `toml:"enable_iommu"`
IOMMUPlatform bool `toml:"enable_iommu_platform"`
Debug bool `toml:"enable_debug"`
DisableNestingChecks bool `toml:"disable_nesting_checks"`
EnableIOThreads bool `toml:"enable_iothreads"`
DisableImageNvdimm bool `toml:"disable_image_nvdimm"`
HotplugVFIOOnRootBus bool `toml:"hotplug_vfio_on_root_bus"`
HotPlugVFIO config.PCIePort `toml:"hot_plug_vfio"`
ColdPlugVFIO config.PCIePort `toml:"cold_plug_vfio"`
DisableVhostNet bool `toml:"disable_vhost_net"`
GuestMemoryDumpPaging bool `toml:"guest_memory_dump_paging"`
ConfidentialGuest bool `toml:"confidential_guest"`
SevSnpGuest bool `toml:"sev_snp_guest"`
GuestSwap bool `toml:"enable_guest_swap"`
Rootless bool `toml:"rootless"`
DisableSeccomp bool `toml:"disable_seccomp"`
DisableSeLinux bool `toml:"disable_selinux"`
DisableGuestSeLinux bool `toml:"disable_guest_selinux"`
LegacySerial bool `toml:"use_legacy_serial"`
}
type runtime struct {
@@ -287,12 +286,18 @@ func (h hypervisor) firmware() (string, error) {
return ResolvePath(p)
}
func (h hypervisor) coldPlugVFIO() hv.PCIePort {
func (h hypervisor) coldPlugVFIO() config.PCIePort {
if h.ColdPlugVFIO == "" {
return defaultColdPlugVFIO
}
return h.ColdPlugVFIO
}
func (h hypervisor) hotPlugVFIO() config.PCIePort {
if h.HotPlugVFIO == "" {
return defaultHotPlugVFIO
}
return h.HotPlugVFIO
}
func (h hypervisor) firmwareVolume() (string, error) {
p := h.FirmwareVolume
@@ -863,8 +868,8 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
Msize9p: h.msize9p(),
DisableImageNvdimm: h.DisableImageNvdimm,
HotplugVFIOOnRootBus: h.HotplugVFIOOnRootBus,
HotPlugVFIO: h.hotPlugVFIO(),
ColdPlugVFIO: h.coldPlugVFIO(),
PCIeRootPort: h.PCIeRootPort,
DisableVhostNet: h.DisableVhostNet,
EnableVhostUserStore: h.EnableVhostUserStore,
VhostUserStorePath: h.vhostUserStorePath(),
@@ -1060,7 +1065,7 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
Msize9p: h.msize9p(),
HotplugVFIOOnRootBus: h.HotplugVFIOOnRootBus,
ColdPlugVFIO: h.coldPlugVFIO(),
PCIeRootPort: h.PCIeRootPort,
HotPlugVFIO: h.hotPlugVFIO(),
DisableVhostNet: true,
GuestHookPath: h.guestHookPath(),
VirtioFSExtraArgs: h.VirtioFSExtraArgs,
@@ -1291,7 +1296,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig {
Msize9p: defaultMsize9p,
HotplugVFIOOnRootBus: defaultHotplugVFIOOnRootBus,
ColdPlugVFIO: defaultColdPlugVFIO,
PCIeRootPort: defaultPCIeRootPort,
HotPlugVFIO: defaultHotPlugVFIO,
GuestHookPath: defaultGuestHookPath,
VhostUserStorePath: defaultVhostUserStorePath,
VhostUserDeviceReconnect: defaultVhostUserDeviceReconnect,
@@ -1663,9 +1668,10 @@ func checkConfig(config oci.RuntimeConfig) error {
return err
}
hotPlugVFIO := config.HypervisorConfig.HotPlugVFIO
coldPlugVFIO := config.HypervisorConfig.ColdPlugVFIO
machineType := config.HypervisorConfig.HypervisorMachineType
if err := checkPCIeConfig(coldPlugVFIO, machineType); err != nil {
if err := checkPCIeConfig(coldPlugVFIO, hotPlugVFIO, machineType); err != nil {
return err
}
@@ -1675,18 +1681,32 @@ func checkConfig(config oci.RuntimeConfig) error {
// checkPCIeConfig ensures the PCIe configuration is valid.
// Only allow one of the following settings for cold-plug:
// no-port, root-port, switch-port
func checkPCIeConfig(vfioPort hv.PCIePort, machineType string) error {
func checkPCIeConfig(coldPlug config.PCIePort, hotPlug config.PCIePort, machineType string) error {
// Currently only QEMU q35 supports advanced PCIe topologies
// firecracker, dragonball do not have right now any PCIe support
if machineType != "q35" {
return nil
}
if vfioPort == hv.NoPort || vfioPort == hv.RootPort || vfioPort == hv.SwitchPort {
if coldPlug != config.NoPort && hotPlug != config.NoPort {
return fmt.Errorf("invalid hot-plug=%s and cold-plug=%s settings, only one of them can be set", coldPlug, hotPlug)
}
if coldPlug == config.NoPort && hotPlug == config.NoPort {
return nil
}
var port config.PCIePort
if coldPlug != config.NoPort {
port = coldPlug
}
if hotPlug != config.NoPort {
port = hotPlug
}
if port == config.NoPort || port == config.BridgePort || port == config.RootPort || port == config.SwitchPort {
return nil
}
return fmt.Errorf("invalid vfio_port=%s setting, allowed values %s, %s, %s",
vfioPort, hv.NoPort, hv.RootPort, hv.SwitchPort)
return fmt.Errorf("invalid vfio_port=%s setting, allowed values %s, %s, %s, %s",
coldPlug, config.NoPort, config.BridgePort, config.RootPort, config.SwitchPort)
}
// checkNetNsConfig performs sanity checks on disable_new_netns config.