From 3b5c5bcfa4eda9343e32d0967f1fc903c1f1458f Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Wed, 30 Aug 2023 16:40:55 +0000 Subject: [PATCH] runtime: clh: Support enabling iommu by enabling IOMMU on the default PCI segment. For hotplug to work we need a virtualized iommu and clh exposes one if there is some device or PCI segment that requests it. I would have preferred to add a separate PCI segment for hotplugging vfio devices but unfortunately kata assumes there is only one segment all over the place. See create_pci_root_bus_path(), split_vfio_pci_option() and grep for '0000'. Enabling the IOMMU on the default PCI segment requires passing enabling IOMMU on every device that is attached to it, which is why it is sprinkled all over the place. CLH does not support IOMMU for VirtioFs, so I've added a non IOMMU segment for that device. Signed-off-by: Jeremi Piotrowski (cherry picked from commit 3a1db7a86ba1ab0dd3a659f29612535087c0922e) --- src/runtime/config/configuration-clh.toml.in | 6 ++++++ src/runtime/virtcontainers/clh.go | 18 ++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/src/runtime/config/configuration-clh.toml.in b/src/runtime/config/configuration-clh.toml.in index a7c203d20..4531a4e65 100644 --- a/src/runtime/config/configuration-clh.toml.in +++ b/src/runtime/config/configuration-clh.toml.in @@ -188,6 +188,12 @@ block_device_driver = "virtio-blk" # Disable the 'seccomp' feature from Cloud Hypervisor, default false # disable_seccomp = true +# Enable vIOMMU, default false +# Enabling this will result in the VM having a vIOMMU device +# This will also add the following options to the kernel's +# command line: iommu=pt +#enable_iommu = true + # This option changes the default hypervisor and kernel parameters # to enable debug output where available. # diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 10a1fc7ab..f031811e9 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -490,6 +490,13 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net } clh.vmconfig.Payload.SetKernel(kernelPath) + clh.vmconfig.Platform = chclient.NewPlatformConfig() + platform := clh.vmconfig.Platform + platform.SetNumPciSegments(2) + if clh.config.IOMMU { + platform.SetIommuSegments([]int32{0}) + } + if clh.config.ConfidentialGuest { if err := clh.enableProtection(); err != nil { return err @@ -528,6 +535,9 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net // start the guest kernel with 'quiet' in non-debug mode params = append(params, Param{"quiet", ""}) } + if clh.config.IOMMU { + params = append(params, Param{"iommu", "pt"}) + } // Followed by extra kernel parameters defined in the configuration file params = append(params, clh.config.KernelParams...) @@ -536,6 +546,7 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net // set random device generator to hypervisor clh.vmconfig.Rng = chclient.NewRngConfig(clh.config.EntropySource) + clh.vmconfig.Rng.SetIommu(clh.config.IOMMU) // set the initial root/boot disk of hypervisor imagePath, err := clh.config.ImageAssetPath() @@ -561,6 +572,7 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net } else { pmem := chclient.NewPmemConfig(imagePath) *pmem.DiscardWrites = true + pmem.SetIommu(clh.config.IOMMU) if clh.vmconfig.Pmem != nil { *clh.vmconfig.Pmem = append(*clh.vmconfig.Pmem, *pmem) @@ -598,6 +610,7 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net clh.vmconfig.Console = chclient.NewConsoleConfig(cctOFF) } + clh.vmconfig.Console.SetIommu(clh.config.IOMMU) cpu_topology := chclient.NewCpuTopology() cpu_topology.ThreadsPerCore = func(i int32) *int32 { return &i }(1) @@ -840,6 +853,7 @@ func (clh *cloudHypervisor) hotplugAddBlockDevice(drive *config.BlockDrive) erro queueSize := int32(1024) clhDisk.NumQueues = &queues clhDisk.QueueSize = &queueSize + clhDisk.SetIommu(clh.config.IOMMU) diskRateLimiterConfig := clh.getDiskRateLimiterConfig() if diskRateLimiterConfig != nil { @@ -865,6 +879,7 @@ func (clh *cloudHypervisor) hotPlugVFIODevice(device *config.VFIODev) error { // Create the clh device config via the constructor to ensure default values are properly assigned clhDevice := *chclient.NewDeviceConfig(device.SysfsDev) + clhDevice.SetIommu(clh.config.IOMMU) pciInfo, _, err := cl.VmAddDevicePut(ctx, clhDevice) if err != nil { return fmt.Errorf("Failed to hotplug device %+v %s", device, openAPIClientError(err)) @@ -1538,6 +1553,7 @@ func (clh *cloudHypervisor) addVSock(cid int64, path string) { }).Info("Adding HybridVSock") clh.vmconfig.Vsock = chclient.NewVsockConfig(cid, path) + clh.vmconfig.Vsock.SetIommu(clh.config.IOMMU) } func (clh *cloudHypervisor) getRateLimiterConfig(bwSize, bwOneTimeBurst, opsSize, opsOneTimeBurst int64) *chclient.RateLimiterConfig { @@ -1607,6 +1623,7 @@ func (clh *cloudHypervisor) addNet(e Endpoint) error { if netRateLimiterConfig != nil { net.SetRateLimiterConfig(*netRateLimiterConfig) } + net.SetIommu(clh.config.IOMMU) if clh.netDevices != nil { *clh.netDevices = append(*clh.netDevices, *net) @@ -1639,6 +1656,7 @@ func (clh *cloudHypervisor) addVolume(volume types.Volume) error { } fs := chclient.NewFsConfig(volume.MountTag, vfsdSockPath, numQueues, queueSize) + fs.SetPciSegment(1) clh.vmconfig.Fs = &[]chclient.FsConfig{*fs} clh.Logger().Debug("Adding share volume to hypervisor: ", volume.MountTag)