Merge pull request #4492 from zvonkok/pcie-topology

runtime: fix PCIe topology for GPUDirect use-case
This commit is contained in:
Peng Tao
2023-07-03 09:17:12 +08:00
committed by GitHub
35 changed files with 952 additions and 560 deletions

View File

@@ -65,6 +65,11 @@ const romFile = ""
// Default value is false.
const defaultDisableModern = false
// A deeper PCIe topology than 5 is already not advisable just for the sake
// of having enough buffer we limit ourselves to 10 and exit if we reach
// the root bus
const maxPCIeTopoDepth = 10
type qmpChannel struct {
qmp *govmmQemu.QMP
ctx context.Context
@@ -75,15 +80,15 @@ type qmpChannel struct {
// QemuState keeps Qemu's state
type QemuState struct {
UUID string
Bridges []types.Bridge
// HotpluggedCPUs is the list of CPUs that were hot-added
UUID string
HotPlugVFIO config.PCIePort
Bridges []types.Bridge
HotpluggedVCPUs []hv.CPUDevice
HotpluggedMemory int
VirtiofsDaemonPid int
PCIeRootPort int
HotplugVFIOOnRootBus bool
ColdPlugVFIO hv.PCIePort
HotplugVFIO config.PCIePort
ColdPlugVFIO config.PCIePort
}
// qemu is an Hypervisor interface implementation for the Linux qemu hypervisor.
@@ -282,10 +287,10 @@ func (q *qemu) setup(ctx context.Context, id string, hypervisorConfig *Hyperviso
q.Logger().Debug("Creating UUID")
q.state.UUID = uuid.Generate().String()
q.state.HotPlugVFIO = q.config.HotPlugVFIO
q.state.ColdPlugVFIO = q.config.ColdPlugVFIO
q.state.HotplugVFIOOnRootBus = q.config.HotplugVFIOOnRootBus
q.state.PCIeRootPort = int(q.config.PCIeRootPort)
q.state.HotPlugVFIO = q.config.HotPlugVFIO
// The path might already exist, but in case of VM templating,
// we have to create it since the sandbox has not created it yet.
@@ -701,27 +706,12 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi
}
}
// Add PCIe Root Port devices to hypervisor
// The pcie.0 bus do not support hot-plug, but PCIe device can be hot-plugged into PCIe Root Port.
// For more details, please see https://github.com/qemu/qemu/blob/master/docs/pcie.txt
memSize32bit, memSize64bit := q.arch.getBARsMaxAddressableMemory()
if hypervisorConfig.PCIeRootPort > 0 {
qemuConfig.Devices = q.arch.appendPCIeRootPortDevice(qemuConfig.Devices, hypervisorConfig.PCIeRootPort, memSize32bit, memSize64bit)
}
// The default OVMF MMIO aperture is too small for some PCIe devices
// with huge BARs so we need to increase it.
// memSize64bit is in bytes, convert to MB, OVMF expects MB as a string
if strings.Contains(strings.ToLower(hypervisorConfig.FirmwarePath), "ovmf") {
pciMmio64Mb := fmt.Sprintf("%d", (memSize64bit / 1024 / 1024))
fwCfg := govmmQemu.FwCfg{
Name: "opt/ovmf/X-PciMmio64Mb",
Str: pciMmio64Mb,
if machine.Type == QemuQ35 || machine.Type == QemuVirt {
if err := q.createPCIeTopology(&qemuConfig, hypervisorConfig, machine.Type); err != nil {
q.Logger().WithError(err).Errorf("Cannot create PCIe topology")
return err
}
qemuConfig.FwCfg = append(qemuConfig.FwCfg, fwCfg)
}
q.qemuConfig = qemuConfig
q.virtiofsDaemon, err = q.createVirtiofsDaemon(hypervisorConfig.SharedPath)
@@ -747,6 +737,101 @@ func (q *qemu) checkBpfEnabled() {
}
}
// If a user uses 8 GPUs with 4 devices in each IOMMU Group that means we need
// to hotplug 32 devices. We do not have enough PCIe root bus slots to
// accomplish this task. Kata will use already some slots for vfio-xxxx-pci
// devices.
// Max PCI slots per root bus is 32
// Max PCIe root ports is 16
// Max PCIe switch ports is 16
// There is only 64kB of IO memory each root,switch port will consume 4k hence
// only 16 ports possible.
func (q *qemu) createPCIeTopology(qemuConfig *govmmQemu.Config, hypervisorConfig *HypervisorConfig, machineType string) error {
// If no-port set just return no need to add PCIe Root Port or PCIe Switches
if hypervisorConfig.HotPlugVFIO == config.NoPort && hypervisorConfig.ColdPlugVFIO == config.NoPort && machineType == QemuQ35 {
return nil
}
// Add PCIe Root Port or PCIe Switches to the hypervisor
// The pcie.0 bus do not support hot-plug, but PCIe device can be hot-plugged
// into a PCIe Root Port or PCIe Switch.
// For more details, please see https://github.com/qemu/qemu/blob/master/docs/pcie.txt
// Deduce the right values for mem-reserve and pref-64-reserve memory regions
memSize32bit, memSize64bit := q.arch.getBARsMaxAddressableMemory()
// The default OVMF MMIO aperture is too small for some PCIe devices
// with huge BARs so we need to increase it.
// memSize64bit is in bytes, convert to MB, OVMF expects MB as a string
if strings.Contains(strings.ToLower(hypervisorConfig.FirmwarePath), "ovmf") {
pciMmio64Mb := fmt.Sprintf("%d", (memSize64bit / 1024 / 1024))
fwCfg := govmmQemu.FwCfg{
Name: "opt/ovmf/X-PciMmio64Mb",
Str: pciMmio64Mb,
}
qemuConfig.FwCfg = append(qemuConfig.FwCfg, fwCfg)
}
// Get the number of hot(cold)-pluggable ports needed from the provided
// VFIO devices and VhostUserBlockDevices
var numOfPluggablePorts uint32 = 0
for _, dev := range hypervisorConfig.VFIODevices {
var err error
dev.HostPath, err = config.GetHostPath(dev, false, "")
if err != nil {
return fmt.Errorf("Cannot get host path for device: %v err: %v", dev, err)
}
devicesPerIOMMUGroup, err := drivers.GetAllVFIODevicesFromIOMMUGroup(dev)
if err != nil {
return fmt.Errorf("Cannot get all VFIO devices from IOMMU group with device: %v err: %v", dev, err)
}
for _, vfioDevice := range devicesPerIOMMUGroup {
if drivers.IsPCIeDevice(vfioDevice.BDF) {
numOfPluggablePorts = numOfPluggablePorts + 1
}
}
}
vfioOnRootPort := (q.state.HotPlugVFIO == config.RootPort || q.state.ColdPlugVFIO == config.RootPort || q.state.HotplugVFIOOnRootBus)
vfioOnSwitchPort := (q.state.HotPlugVFIO == config.SwitchPort || q.state.ColdPlugVFIO == config.SwitchPort)
numOfVhostUserBlockDevices := len(hypervisorConfig.VhostUserBlkDevices)
// If number of PCIe root ports > 16 then bail out otherwise we may
// use up all slots or IO memory on the root bus and vfio-XXX-pci devices
// cannot be added which are crucial for Kata max slots on root bus is 32
// max slots on the complete pci(e) topology is 256 in QEMU
if vfioOnRootPort {
// On Arm the vhost-user-block device is a PCIe device we need
// to account for it in the number of pluggable ports
if machineType == QemuVirt {
numOfPluggablePorts = numOfPluggablePorts + uint32(numOfVhostUserBlockDevices)
}
if numOfPluggablePorts > maxPCIeRootPort {
return fmt.Errorf("Number of PCIe Root Ports exceeed allowed max of %d", maxPCIeRootPort)
}
qemuConfig.Devices = q.arch.appendPCIeRootPortDevice(qemuConfig.Devices, numOfPluggablePorts, memSize32bit, memSize64bit)
return nil
}
if vfioOnSwitchPort {
// On Arm the vhost-user-block device is a PCIe device we need
// to account for it in the number of pluggable ports
if machineType == QemuVirt {
numOfPluggableRootPorts := uint32(numOfVhostUserBlockDevices)
if numOfPluggableRootPorts > maxPCIeRootPort {
return fmt.Errorf("Number of PCIe Root Ports exceeed allowed max of %d", maxPCIeRootPort)
}
qemuConfig.Devices = q.arch.appendPCIeRootPortDevice(qemuConfig.Devices, numOfPluggableRootPorts, memSize32bit, memSize64bit)
}
if numOfPluggablePorts > maxPCIeSwitchPort {
return fmt.Errorf("Number of PCIe Switch Ports exceeed allowed max of %d", maxPCIeSwitchPort)
}
qemuConfig.Devices = q.arch.appendPCIeSwitchPortDevice(qemuConfig.Devices, numOfPluggablePorts, memSize32bit, memSize64bit)
return nil
}
return nil
}
func (q *qemu) vhostFSSocketPath(id string) (string, error) {
return utils.BuildSocketPath(q.config.VMStorePath, id, vhostFSSocket)
}
@@ -1527,6 +1612,7 @@ func (q *qemu) hotplugAddBlockDevice(ctx context.Context, drive *config.BlockDri
}
func (q *qemu) hotplugAddVhostUserBlkDevice(ctx context.Context, vAttr *config.VhostUserDeviceAttrs, op Operation, devID string) (err error) {
err = q.qmpMonitorCh.qmp.ExecuteCharDevUnixSocketAdd(q.qmpMonitorCh.ctx, vAttr.DevID, vAttr.SocketPath, false, false, vAttr.ReconnectTime)
if err != nil {
return err
@@ -1544,18 +1630,14 @@ func (q *qemu) hotplugAddVhostUserBlkDevice(ctx context.Context, vAttr *config.V
switch machineType {
case QemuVirt:
if q.state.PCIeRootPort <= 0 {
return fmt.Errorf("Vhost-user-blk device is a PCIe device if machine type is virt. Need to add the PCIe Root Port by setting the pcie_root_port parameter in the configuration for virt")
}
//The addr of a dev is corresponding with device:function for PCIe in qemu which starting from 0
//Since the dev is the first and only one on this bus(root port), it should be 0.
addr := "00"
bridgeId := fmt.Sprintf("%s%d", pcieRootPortPrefix, len(drivers.AllPCIeDevs))
drivers.AllPCIeDevs[devID] = true
bridgeID := fmt.Sprintf("%s%d", config.PCIeRootPortPrefix, len(config.PCIeDevices[config.RootPort]))
config.PCIeDevices[config.RootPort][devID] = true
bridgeQomPath := fmt.Sprintf("%s%s", qomPathPrefix, bridgeId)
bridgeQomPath := fmt.Sprintf("%s%s", qomPathPrefix, bridgeID)
bridgeSlot, err := q.qomGetSlot(bridgeQomPath)
if err != nil {
return err
@@ -1571,7 +1653,7 @@ func (q *qemu) hotplugAddVhostUserBlkDevice(ctx context.Context, vAttr *config.V
return err
}
if err = q.qmpMonitorCh.qmp.ExecutePCIVhostUserDevAdd(q.qmpMonitorCh.ctx, driver, devID, vAttr.DevID, addr, bridgeId); err != nil {
if err = q.qmpMonitorCh.qmp.ExecutePCIVhostUserDevAdd(q.qmpMonitorCh.ctx, driver, devID, vAttr.DevID, addr, bridgeID); err != nil {
return err
}
@@ -1685,41 +1767,108 @@ func (q *qemu) qomGetSlot(qomPath string) (types.PciSlot, error) {
// Query QMP to find a device's PCI path given its QOM path or ID
func (q *qemu) qomGetPciPath(qemuID string) (types.PciPath, error) {
// XXX: For now we assume there's exactly one bridge, since
// that's always how we configure qemu from Kata for now. It
// would be good to generalize this to different PCI
// topologies
var slots []types.PciSlot
devSlot, err := q.qomGetSlot(qemuID)
if err != nil {
return types.PciPath{}, err
}
slots = append(slots, devSlot)
busq, err := q.qmpMonitorCh.qmp.ExecQomGet(q.qmpMonitorCh.ctx, qemuID, "parent_bus")
// This only works for Q35 and Virt
r, _ := regexp.Compile(`^/machine/.*/pcie.0`)
var parentPath = qemuID
// We do not want to use a forever loop here, a deeper PCIe topology
// than 5 is already not advisable just for the sake of having enough
// buffer we limit ourselves to 10 and leave the loop early if we hit
// the root bus.
for i := 1; i <= maxPCIeTopoDepth; i++ {
parenBusQOM, err := q.qmpMonitorCh.qmp.ExecQomGet(q.qmpMonitorCh.ctx, parentPath, "parent_bus")
if err != nil {
return types.PciPath{}, err
}
busQOM, ok := parenBusQOM.(string)
if !ok {
return types.PciPath{}, fmt.Errorf("parent_bus QOM property of %s is %t not a string", qemuID, parenBusQOM)
}
// If we hit /machine/q35/pcie.0 we're done this is the root bus
// we climbed the complete hierarchy
if r.Match([]byte(busQOM)) {
break
}
// `bus` is the QOM path of the QOM bus object, but we need
// the PCI parent_bus which manages that bus. There doesn't seem
// to be a way to get that other than to simply drop the last
// path component.
idx := strings.LastIndex(busQOM, "/")
if idx == -1 {
return types.PciPath{}, fmt.Errorf("Bus has unexpected QOM path %s", busQOM)
}
parentBus := busQOM[:idx]
parentSlot, err := q.qomGetSlot(parentBus)
if err != nil {
return types.PciPath{}, err
}
// Prepend the slots, since we're climbing the hierarchy
slots = append([]types.PciSlot{parentSlot}, slots...)
parentPath = parentBus
}
return types.PciPathFromSlots(slots...)
}
func (q *qemu) hotplugVFIODeviceRootPort(ctx context.Context, device *config.VFIODev) (err error) {
return q.executeVFIODeviceAdd(device)
}
func (q *qemu) hotplugVFIODeviceSwitchPort(ctx context.Context, device *config.VFIODev) (err error) {
return q.executeVFIODeviceAdd(device)
}
func (q *qemu) hotplugVFIODeviceBridgePort(ctx context.Context, device *config.VFIODev) (err error) {
addr, bridge, err := q.arch.addDeviceToBridge(ctx, device.ID, types.PCI)
if err != nil {
return types.PciPath{}, err
return err
}
bus, ok := busq.(string)
if !ok {
return types.PciPath{}, fmt.Errorf("parent_bus QOM property of %s is %t not a string", qemuID, busq)
}
defer func() {
if err != nil {
q.arch.removeDeviceFromBridge(device.ID)
}
}()
return q.executePCIVFIODeviceAdd(device, addr, bridge.ID)
}
// `bus` is the QOM path of the QOM bus object, but we need
// the PCI bridge which manages that bus. There doesn't seem
// to be a way to get that other than to simply drop the last
// path component.
idx := strings.LastIndex(bus, "/")
if idx == -1 {
return types.PciPath{}, fmt.Errorf("Bus has unexpected QOM path %s", bus)
func (q *qemu) executePCIVFIODeviceAdd(device *config.VFIODev, addr string, bridgeID string) error {
switch device.Type {
case config.VFIOPCIDeviceNormalType:
return q.qmpMonitorCh.qmp.ExecutePCIVFIODeviceAdd(q.qmpMonitorCh.ctx, device.ID, device.BDF, addr, bridgeID, romFile)
case config.VFIOPCIDeviceMediatedType:
return q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.ID, device.SysfsDev, addr, bridgeID, romFile)
case config.VFIOAPDeviceMediatedType:
return q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.SysfsDev)
default:
return fmt.Errorf("Incorrect VFIO device type found")
}
bridge := bus[:idx]
}
bridgeSlot, err := q.qomGetSlot(bridge)
if err != nil {
return types.PciPath{}, err
func (q *qemu) executeVFIODeviceAdd(device *config.VFIODev) error {
switch device.Type {
case config.VFIOPCIDeviceNormalType:
return q.qmpMonitorCh.qmp.ExecuteVFIODeviceAdd(q.qmpMonitorCh.ctx, device.ID, device.BDF, device.Bus, romFile)
case config.VFIOPCIDeviceMediatedType:
return q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.ID, device.SysfsDev, "", device.Bus, romFile)
case config.VFIOAPDeviceMediatedType:
return q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, device.SysfsDev)
default:
return fmt.Errorf("Incorrect VFIO device type found")
}
return types.PciPathFromSlots(bridgeSlot, devSlot)
}
func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op Operation) (err error) {
@@ -1727,109 +1876,53 @@ func (q *qemu) hotplugVFIODevice(ctx context.Context, device *config.VFIODev, op
return err
}
devID := *(*device).GetID()
machineType := q.HypervisorConfig().HypervisorMachineType
if op == AddDevice {
buf, _ := json.Marshal(device)
q.Logger().WithFields(logrus.Fields{
"machine-type": machineType,
"hotplug-vfio-on-root-bus": q.state.HotplugVFIOOnRootBus,
"pcie-root-port": q.state.PCIeRootPort,
"device-info": string(buf),
"machine-type": q.HypervisorConfig().HypervisorMachineType,
"hot-plug-vfio": q.state.HotPlugVFIO,
"device-info": string(buf),
}).Info("Start hot-plug VFIO device")
// In case HotplugVFIOOnRootBus is true, devices are hotplugged on the root bus
// for pc machine type instead of bridge. This is useful for devices that require
// a large PCI BAR which is a currently a limitation with PCI bridges.
if q.state.HotplugVFIOOnRootBus {
switch (*device).GetType() {
case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType:
// In case MachineType is q35, a PCIe device is hotplugged on a PCIe Root Port.
pciDevice, ok := (*device).(config.VFIOPCIDev)
if !ok {
return fmt.Errorf("VFIO device %+v is not PCI, but its Type said otherwise", device)
}
switch machineType {
case QemuQ35:
if pciDevice.IsPCIe && q.state.PCIeRootPort <= 0 {
q.Logger().WithField("dev-id", (*device).GetID()).Warn("VFIO device is a PCIe device. It's recommended to add the PCIe Root Port by setting the pcie_root_port parameter in the configuration for q35")
pciDevice.Bus = ""
}
default:
pciDevice.Bus = ""
}
*device = pciDevice
if pciDevice.Type == config.VFIOPCIDeviceNormalType {
err = q.qmpMonitorCh.qmp.ExecuteVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, pciDevice.BDF, pciDevice.Bus, romFile)
} else {
err = q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, *(*device).GetSysfsDev(), "", pciDevice.Bus, romFile)
}
case config.VFIOAPDeviceMediatedType:
err = q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, *(*device).GetSysfsDev())
}
// In case MachineType is q35, a PCIe device is hotplugged on
// a PCIe Root Port or alternatively on a PCIe Switch Port
if q.HypervisorConfig().HypervisorMachineType != QemuQ35 && q.HypervisorConfig().HypervisorMachineType != QemuVirt {
device.Bus = ""
} else {
addr, bridge, err := q.arch.addDeviceToBridge(ctx, devID, types.PCI)
var err error
// In case HotplugVFIOOnRootBus is true, devices are hotplugged on the root bus
// for pc machine type instead of bridge. This is useful for devices that require
// a large PCI BAR which is a currently a limitation with PCI bridges.
if q.state.HotPlugVFIO == config.RootPort || q.state.HotplugVFIOOnRootBus {
err = q.hotplugVFIODeviceRootPort(ctx, device)
} else if q.state.HotPlugVFIO == config.SwitchPort {
err = q.hotplugVFIODeviceSwitchPort(ctx, device)
} else {
err = q.hotplugVFIODeviceBridgePort(ctx, device)
}
if err != nil {
return err
}
defer func() {
if err != nil {
q.arch.removeDeviceFromBridge(devID)
}
}()
switch (*device).GetType() {
case config.VFIOPCIDeviceNormalType:
pciDevice, ok := (*device).(config.VFIOPCIDev)
if !ok {
return fmt.Errorf("VFIO device %+v is not PCI, but its Type said otherwise", device)
}
err = q.qmpMonitorCh.qmp.ExecutePCIVFIODeviceAdd(q.qmpMonitorCh.ctx, devID, pciDevice.BDF, addr, bridge.ID, romFile)
case config.VFIOPCIDeviceMediatedType:
err = q.qmpMonitorCh.qmp.ExecutePCIVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, devID, *(*device).GetSysfsDev(), addr, bridge.ID, romFile)
case config.VFIOAPDeviceMediatedType:
err = q.qmpMonitorCh.qmp.ExecuteAPVFIOMediatedDeviceAdd(q.qmpMonitorCh.ctx, *(*device).GetSysfsDev())
default:
return fmt.Errorf("Incorrect VFIO device type found")
}
}
if err != nil {
return err
}
switch (*device).GetType() {
case config.VFIOPCIDeviceNormalType, config.VFIOPCIDeviceMediatedType:
pciDevice, ok := (*device).(config.VFIOPCIDev)
if !ok {
return fmt.Errorf("VFIO device %+v is not PCI, but its Type said otherwise", device)
}
// XXX: Depending on whether we're doing root port or
// bridge hotplug, and how the bridge is set up in
// other parts of the code, we may or may not already
// have information about the slot number of the
// bridge and or the device. For simplicity, just
// query both of them back from qemu
guestPciPath, err := q.qomGetPciPath(devID)
pciDevice.GuestPciPath = guestPciPath
*device = pciDevice
return err
}
// XXX: Depending on whether we're doing root port or
// bridge hotplug, and how the bridge is set up in
// other parts of the code, we may or may not already
// have information about the slot number of the
// bridge and or the device. For simplicity, just
// query both of them back from qemu
device.GuestPciPath, err = q.qomGetPciPath(device.ID)
return err
} else {
q.Logger().WithField("dev-id", devID).Info("Start hot-unplug VFIO device")
if !q.state.HotplugVFIOOnRootBus {
if err := q.arch.removeDeviceFromBridge(devID); err != nil {
return err
}
}
return q.qmpMonitorCh.qmp.ExecuteDeviceDel(q.qmpMonitorCh.ctx, devID)
}
q.Logger().WithField("dev-id", device.ID).Info("Start hot-unplug VFIO device")
if !q.state.HotplugVFIOOnRootBus {
if err := q.arch.removeDeviceFromBridge(device.ID); err != nil {
return err
}
}
return q.qmpMonitorCh.qmp.ExecuteDeviceDel(q.qmpMonitorCh.ctx, device.ID)
}
func (q *qemu) hotAddNetDevice(name, hardAddr string, VMFds, VhostFds []*os.File) error {
@@ -2527,7 +2620,7 @@ func genericAppendPCIeRootPort(devices []govmmQemu.Device, number uint32, machin
for i := uint32(0); i < number; i++ {
devices = append(devices,
govmmQemu.PCIeRootPortDevice{
ID: fmt.Sprintf("%s%d", pcieRootPortPrefix, i),
ID: fmt.Sprintf("%s%d", config.PCIeRootPortPrefix, i),
Bus: bus,
Chassis: chassis,
Slot: strconv.FormatUint(uint64(i), 10),
@@ -2541,6 +2634,79 @@ func genericAppendPCIeRootPort(devices []govmmQemu.Device, number uint32, machin
return devices
}
// gollangci-lint enforces multi-line comments to be a block comment
// not multiple single line comments ...
/* pcie.0 bus
// -------------------------------------------------
// |
// -------------
// | Root Port |
// -------------
// -------------------------|------------------------
// | ----------------- |
// | PCI Express | Upstream Port | |
// | Switch ----------------- |
// | | | |
// | ------------------- ------------------- |
// | | Downstream Port | | Downstream Port | |
// | ------------------- ------------------- |
// -------------|-----------------------|------------
// ------------- --------------
// | GPU/ACCEL | | IB/ETH NIC |
// ------------- --------------
*/
// genericAppendPCIeSwitch adds a PCIe Swtich
func genericAppendPCIeSwitchPort(devices []govmmQemu.Device, number uint32, machineType string, memSize32bit uint64, memSize64bit uint64) []govmmQemu.Device {
// Q35, Virt have the correct PCIe support,
// hence ignore all other machines
if machineType != QemuQ35 && machineType != QemuVirt {
return devices
}
// Using an own ID for the root port, so we do not clash with already
// existing root ports adding "s" for switch prefix
pcieRootPort := govmmQemu.PCIeRootPortDevice{
ID: fmt.Sprintf("%s%s%d", config.PCIeSwitchPortPrefix, config.PCIeRootPortPrefix, 0),
Bus: defaultBridgeBus,
Chassis: "1",
Slot: strconv.FormatUint(uint64(0), 10),
Multifunction: false,
Addr: "0",
MemReserve: fmt.Sprintf("%dB", memSize32bit),
Pref64Reserve: fmt.Sprintf("%dB", memSize64bit),
}
devices = append(devices, pcieRootPort)
pcieSwitchUpstreamPort := govmmQemu.PCIeSwitchUpstreamPortDevice{
ID: fmt.Sprintf("%s%d", config.PCIeSwitchUpstreamPortPrefix, 0),
Bus: pcieRootPort.ID,
}
devices = append(devices, pcieSwitchUpstreamPort)
currentChassis, err := strconv.Atoi(pcieRootPort.Chassis)
if err != nil {
return devices
}
nextChassis := currentChassis + 1
for i := uint32(0); i < number; i++ {
pcieSwitchDownstreamPort := govmmQemu.PCIeSwitchDownstreamPortDevice{
ID: fmt.Sprintf("%s%d", config.PCIeSwitchhDownstreamPortPrefix, i),
Bus: pcieSwitchUpstreamPort.ID,
Chassis: fmt.Sprintf("%d", nextChassis),
Slot: strconv.FormatUint(uint64(i), 10),
// TODO: MemReserve: fmt.Sprintf("%dB", memSize32bit),
// TODO: Pref64Reserve: fmt.Sprintf("%dB", memSize64bit),
}
devices = append(devices, pcieSwitchDownstreamPort)
}
return devices
}
func (q *qemu) GetThreadIDs(ctx context.Context) (VcpuThreadIDs, error) {
span, _ := katatrace.Trace(ctx, q.Logger(), "GetThreadIDs", qemuTracingTags, map[string]string{"sandbox_id": q.id})
defer span.End()
@@ -2716,7 +2882,6 @@ func (q *qemu) Save() (s hv.HypervisorState) {
s.UUID = q.state.UUID
s.HotpluggedMemory = q.state.HotpluggedMemory
s.HotplugVFIOOnRootBus = q.state.HotplugVFIOOnRootBus
s.PCIeRootPort = q.state.PCIeRootPort
for _, bridge := range q.arch.getBridges() {
s.Bridges = append(s.Bridges, hv.Bridge{
@@ -2740,7 +2905,6 @@ func (q *qemu) Load(s hv.HypervisorState) {
q.state.HotpluggedMemory = s.HotpluggedMemory
q.state.HotplugVFIOOnRootBus = s.HotplugVFIOOnRootBus
q.state.VirtiofsDaemonPid = s.VirtiofsDaemonPid
q.state.PCIeRootPort = s.PCIeRootPort
for _, bridge := range s.Bridges {
q.state.Bridges = append(q.state.Bridges, types.NewBridge(types.Type(bridge.Type), bridge.ID, bridge.DeviceAddr, bridge.Addr))