mirror of
https://github.com/aljazceru/kata-containers.git
synced 2025-12-18 23:04:20 +01:00
For kata containers, rootfs is used in the read-only way. EROFS can noticably decrease metadata overhead. On the basis of supporting the EROFS file system, it supports using the config parameter to switch the file system used by rootfs. Fixes: #6063 Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com> Signed-off-by: yaoyinnan <yaoyinnan@foxmail.com>
870 lines
25 KiB
Go
870 lines
25 KiB
Go
//go:build linux
|
|
|
|
// Copyright (c) 2018 Intel Corporation
|
|
//
|
|
// SPDX-License-Identifier: Apache-2.0
|
|
//
|
|
|
|
package virtcontainers
|
|
|
|
import (
|
|
"context"
|
|
"encoding/hex"
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"runtime"
|
|
"strings"
|
|
|
|
govmmQemu "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm/qemu"
|
|
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvpci"
|
|
|
|
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
|
|
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types"
|
|
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils"
|
|
)
|
|
|
|
type qemuArch interface {
|
|
// enableNestingChecks nesting checks will be honoured
|
|
enableNestingChecks()
|
|
|
|
// disableNestingChecks nesting checks will be ignored
|
|
disableNestingChecks()
|
|
|
|
// runNested indicates if the hypervisor runs in a nested environment
|
|
runNested() bool
|
|
|
|
// enableVhostNet vhost will be enabled
|
|
enableVhostNet()
|
|
|
|
// disableVhostNet vhost will be disabled
|
|
disableVhostNet()
|
|
|
|
// machine returns the machine type
|
|
machine() govmmQemu.Machine
|
|
|
|
// qemuPath returns the path to the QEMU binary
|
|
qemuPath() string
|
|
|
|
// kernelParameters returns the kernel parameters
|
|
// if debug is true then kernel debug parameters are included
|
|
kernelParameters(debug bool) []Param
|
|
|
|
//capabilities returns the capabilities supported by QEMU
|
|
capabilities() types.Capabilities
|
|
|
|
// bridges sets the number bridges for the machine type
|
|
bridges(number uint32)
|
|
|
|
// cpuTopology returns the CPU topology for the given amount of vcpus
|
|
cpuTopology(vcpus, maxvcpus uint32) govmmQemu.SMP
|
|
|
|
// cpuModel returns the CPU model for the machine type
|
|
cpuModel() string
|
|
|
|
// memoryTopology returns the memory topology using the given amount of memoryMb and hostMemoryMb
|
|
memoryTopology(memoryMb, hostMemoryMb uint64, slots uint8) govmmQemu.Memory
|
|
|
|
// appendConsole appends a console to devices
|
|
appendConsole(ctx context.Context, devices []govmmQemu.Device, path string) ([]govmmQemu.Device, error)
|
|
|
|
// appendImage appends an image to devices
|
|
appendImage(ctx context.Context, devices []govmmQemu.Device, path string) ([]govmmQemu.Device, error)
|
|
|
|
// appendBlockImage appends an image as block device
|
|
appendBlockImage(ctx context.Context, devices []govmmQemu.Device, path string) ([]govmmQemu.Device, error)
|
|
|
|
// appendNvdimmImage appends an image as nvdimm device
|
|
appendNvdimmImage(devices []govmmQemu.Device, path string) ([]govmmQemu.Device, error)
|
|
|
|
// appendSCSIController appens a SCSI controller to devices
|
|
appendSCSIController(context context.Context, devices []govmmQemu.Device, enableIOThreads bool) ([]govmmQemu.Device, *govmmQemu.IOThread, error)
|
|
|
|
// appendBridges appends bridges to devices
|
|
appendBridges(devices []govmmQemu.Device) []govmmQemu.Device
|
|
|
|
// append9PVolume appends a 9P volume to devices
|
|
append9PVolume(ctx context.Context, devices []govmmQemu.Device, volume types.Volume) ([]govmmQemu.Device, error)
|
|
|
|
// appendSocket appends a socket to devices
|
|
appendSocket(devices []govmmQemu.Device, socket types.Socket) []govmmQemu.Device
|
|
|
|
// appendVSock appends a vsock PCI to devices
|
|
appendVSock(ctx context.Context, devices []govmmQemu.Device, vsock types.VSock) ([]govmmQemu.Device, error)
|
|
|
|
// appendNetwork appends a endpoint device to devices
|
|
appendNetwork(ctx context.Context, devices []govmmQemu.Device, endpoint Endpoint) ([]govmmQemu.Device, error)
|
|
|
|
// appendBlockDevice appends a block drive to devices
|
|
appendBlockDevice(ctx context.Context, devices []govmmQemu.Device, drive config.BlockDrive) ([]govmmQemu.Device, error)
|
|
|
|
// appendVhostUserDevice appends a vhost user device to devices
|
|
appendVhostUserDevice(ctx context.Context, devices []govmmQemu.Device, drive config.VhostUserDeviceAttrs) ([]govmmQemu.Device, error)
|
|
|
|
// appendVFIODevice appends a VFIO device to devices
|
|
appendVFIODevice(devices []govmmQemu.Device, vfioDevice config.VFIODev) []govmmQemu.Device
|
|
|
|
// appendRNGDevice appends a RNG device to devices
|
|
appendRNGDevice(ctx context.Context, devices []govmmQemu.Device, rngDevice config.RNGDev) ([]govmmQemu.Device, error)
|
|
|
|
// addDeviceToBridge adds devices to the bus
|
|
addDeviceToBridge(ctx context.Context, ID string, t types.Type) (string, types.Bridge, error)
|
|
|
|
// removeDeviceFromBridge removes devices to the bus
|
|
removeDeviceFromBridge(ID string) error
|
|
|
|
// getBridges grants access to Bridges
|
|
getBridges() []types.Bridge
|
|
|
|
// setBridges grants access to Bridges
|
|
setBridges(bridges []types.Bridge)
|
|
|
|
// addBridge adds a new Bridge to the list of Bridges
|
|
addBridge(types.Bridge)
|
|
|
|
// getPFlash() get pflash from configuration
|
|
getPFlash() ([]string, error)
|
|
|
|
// setPFlash() grants access to pflash
|
|
setPFlash([]string)
|
|
|
|
// handleImagePath handles the Hypervisor Config image path
|
|
handleImagePath(config HypervisorConfig) error
|
|
|
|
// supportGuestMemoryHotplug returns if the guest supports memory hotplug
|
|
supportGuestMemoryHotplug() bool
|
|
|
|
// setIgnoreSharedMemoryMigrationCaps set bypass-shared-memory capability for migration
|
|
setIgnoreSharedMemoryMigrationCaps(context.Context, *govmmQemu.QMP) error
|
|
|
|
// appendPCIeRootPortDevice appends a pcie-root-port device to pcie.0 bus
|
|
appendPCIeRootPortDevice(devices []govmmQemu.Device, number uint32, memSize32bit uint64, memSize64bit uint64) []govmmQemu.Device
|
|
|
|
// append vIOMMU device
|
|
appendIOMMU(devices []govmmQemu.Device) ([]govmmQemu.Device, error)
|
|
|
|
// append pvpanic device
|
|
appendPVPanicDevice(devices []govmmQemu.Device) ([]govmmQemu.Device, error)
|
|
|
|
// append protection device.
|
|
// This implementation is architecture specific, some archs may need
|
|
// a firmware, returns a string containing the path to the firmware that should
|
|
// be used with the -bios option, ommit -bios option if the path is empty.
|
|
appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string) ([]govmmQemu.Device, string, error)
|
|
|
|
// scans the PCIe space and returns the biggest BAR sizes for 32-bit
|
|
// and 64-bit addressable memory
|
|
getBARsMaxAddressableMemory() (uint64, uint64)
|
|
}
|
|
|
|
type qemuArchBase struct {
|
|
qemuExePath string
|
|
qemuMachine govmmQemu.Machine
|
|
PFlash []string
|
|
kernelParamsNonDebug []Param
|
|
kernelParamsDebug []Param
|
|
kernelParams []Param
|
|
Bridges []types.Bridge
|
|
memoryOffset uint64
|
|
networkIndex int
|
|
// Exclude from lint checking for it is ultimately only used in architecture-specific code
|
|
protection guestProtection //nolint:structcheck
|
|
nestedRun bool
|
|
vhost bool
|
|
disableNvdimm bool
|
|
dax bool
|
|
legacySerial bool
|
|
}
|
|
|
|
const (
|
|
defaultCores uint32 = 1
|
|
defaultThreads uint32 = 1
|
|
defaultCPUModel = "host"
|
|
defaultBridgeBus = "pcie.0"
|
|
defaultPCBridgeBus = "pci.0"
|
|
maxDevIDSize = 31
|
|
pcieRootPortPrefix = "rp"
|
|
)
|
|
|
|
// This is the PCI start address assigned to the first bridge that
|
|
// is added on the qemu command line. In case of x86_64, the first two PCI
|
|
// addresses (0 and 1) are used by the platform while in case of ARM, address
|
|
// 0 is reserved.
|
|
const bridgePCIStartAddr = 2
|
|
|
|
const (
|
|
// QemuQ35 is the QEMU Q35 machine type for amd64
|
|
QemuQ35 = "q35"
|
|
|
|
// QemuMicrovm is the QEMU microvm machine type for amd64
|
|
QemuMicrovm = "microvm"
|
|
|
|
// QemuVirt is the QEMU virt machine type for aarch64 or amd64
|
|
QemuVirt = "virt"
|
|
|
|
// QemuPseries is a QEMU virt machine type for ppc64le
|
|
QemuPseries = "pseries"
|
|
|
|
// QemuCCWVirtio is a QEMU virt machine type for for s390x
|
|
QemuCCWVirtio = "s390-ccw-virtio"
|
|
|
|
qmpCapMigrationIgnoreShared = "x-ignore-shared"
|
|
|
|
qemuNvdimmOption = "nvdimm=on"
|
|
)
|
|
|
|
// kernelParamsNonDebug is a list of the default kernel
|
|
// parameters that will be used in standard (non-debug) mode.
|
|
var kernelParamsNonDebug = []Param{
|
|
{"quiet", ""},
|
|
}
|
|
|
|
// kernelParamsSystemdNonDebug is a list of the default systemd related
|
|
// kernel parameters that will be used in standard (non-debug) mode.
|
|
var kernelParamsSystemdNonDebug = []Param{
|
|
{"systemd.show_status", "false"},
|
|
}
|
|
|
|
// kernelParamsDebug is a list of the default kernel
|
|
// parameters that will be used in debug mode (as much boot output as
|
|
// possible).
|
|
var kernelParamsDebug = []Param{
|
|
{"debug", ""},
|
|
}
|
|
|
|
// kernelParamsSystemdDebug is a list of the default systemd related kernel
|
|
// parameters that will be used in debug mode (as much boot output as
|
|
// possible).
|
|
var kernelParamsSystemdDebug = []Param{
|
|
{"systemd.show_status", "true"},
|
|
{"systemd.log_level", "debug"},
|
|
}
|
|
|
|
func (q *qemuArchBase) enableNestingChecks() {
|
|
q.nestedRun = true
|
|
}
|
|
|
|
func (q *qemuArchBase) disableNestingChecks() {
|
|
q.nestedRun = false
|
|
}
|
|
|
|
func (q *qemuArchBase) runNested() bool {
|
|
return q.nestedRun
|
|
}
|
|
|
|
func (q *qemuArchBase) enableVhostNet() {
|
|
q.vhost = true
|
|
}
|
|
|
|
func (q *qemuArchBase) disableVhostNet() {
|
|
q.vhost = false
|
|
}
|
|
|
|
func (q *qemuArchBase) machine() govmmQemu.Machine {
|
|
return q.qemuMachine
|
|
}
|
|
|
|
func (q *qemuArchBase) qemuPath() string {
|
|
return q.qemuExePath
|
|
}
|
|
|
|
func (q *qemuArchBase) kernelParameters(debug bool) []Param {
|
|
params := q.kernelParams
|
|
|
|
if debug {
|
|
params = append(params, q.kernelParamsDebug...)
|
|
} else {
|
|
params = append(params, q.kernelParamsNonDebug...)
|
|
}
|
|
|
|
return params
|
|
}
|
|
|
|
func (q *qemuArchBase) capabilities() types.Capabilities {
|
|
var caps types.Capabilities
|
|
caps.SetBlockDeviceHotplugSupport()
|
|
caps.SetMultiQueueSupport()
|
|
caps.SetFsSharingSupport()
|
|
return caps
|
|
}
|
|
|
|
func (q *qemuArchBase) bridges(number uint32) {
|
|
for i := uint32(0); i < number; i++ {
|
|
q.Bridges = append(q.Bridges, types.NewBridge(types.PCI, fmt.Sprintf("%s-bridge-%d", types.PCI, i), make(map[uint32]string), 0))
|
|
}
|
|
}
|
|
|
|
func (q *qemuArchBase) cpuTopology(vcpus, maxvcpus uint32) govmmQemu.SMP {
|
|
smp := govmmQemu.SMP{
|
|
CPUs: vcpus,
|
|
Sockets: maxvcpus,
|
|
Cores: defaultCores,
|
|
Threads: defaultThreads,
|
|
MaxCPUs: maxvcpus,
|
|
}
|
|
|
|
return smp
|
|
}
|
|
|
|
func (q *qemuArchBase) cpuModel() string {
|
|
return defaultCPUModel
|
|
}
|
|
|
|
func (q *qemuArchBase) memoryTopology(memoryMb, hostMemoryMb uint64, slots uint8) govmmQemu.Memory {
|
|
memMax := fmt.Sprintf("%dM", hostMemoryMb)
|
|
mem := fmt.Sprintf("%dM", memoryMb)
|
|
memory := govmmQemu.Memory{
|
|
Size: mem,
|
|
Slots: slots,
|
|
MaxMem: memMax,
|
|
}
|
|
|
|
return memory
|
|
}
|
|
|
|
func (q *qemuArchBase) appendConsole(_ context.Context, devices []govmmQemu.Device, path string) ([]govmmQemu.Device, error) {
|
|
var serial, console govmmQemu.Device
|
|
var consoleKernelParams []Param
|
|
|
|
if q.legacySerial {
|
|
serial = govmmQemu.LegacySerialDevice{
|
|
Chardev: "charconsole0",
|
|
}
|
|
|
|
console = govmmQemu.CharDevice{
|
|
Driver: govmmQemu.LegacySerial,
|
|
Backend: govmmQemu.Socket,
|
|
DeviceID: "console0",
|
|
ID: "charconsole0",
|
|
Path: path,
|
|
}
|
|
|
|
consoleKernelParams = []Param{
|
|
{"console", "ttyS0"},
|
|
}
|
|
} else {
|
|
serial = govmmQemu.SerialDevice{
|
|
Driver: govmmQemu.VirtioSerial,
|
|
ID: "serial0",
|
|
DisableModern: q.nestedRun,
|
|
MaxPorts: uint(2),
|
|
}
|
|
|
|
console = govmmQemu.CharDevice{
|
|
Driver: govmmQemu.Console,
|
|
Backend: govmmQemu.Socket,
|
|
DeviceID: "console0",
|
|
ID: "charconsole0",
|
|
Path: path,
|
|
}
|
|
|
|
consoleKernelParams = []Param{
|
|
{"console", "hvc0"},
|
|
{"console", "hvc1"},
|
|
}
|
|
}
|
|
|
|
devices = append(devices, serial)
|
|
devices = append(devices, console)
|
|
q.kernelParams = append(q.kernelParams, consoleKernelParams...)
|
|
|
|
return devices, nil
|
|
}
|
|
|
|
func genericImage(path string) (config.BlockDrive, error) {
|
|
if _, err := os.Stat(path); os.IsNotExist(err) {
|
|
return config.BlockDrive{}, err
|
|
}
|
|
|
|
randBytes, err := utils.GenerateRandomBytes(8)
|
|
if err != nil {
|
|
return config.BlockDrive{}, err
|
|
}
|
|
|
|
id := utils.MakeNameID("image", hex.EncodeToString(randBytes), maxDevIDSize)
|
|
|
|
drive := config.BlockDrive{
|
|
File: path,
|
|
Format: "raw",
|
|
ID: id,
|
|
ShareRW: true,
|
|
ReadOnly: true,
|
|
}
|
|
|
|
return drive, nil
|
|
}
|
|
|
|
func (q *qemuArchBase) appendNvdimmImage(devices []govmmQemu.Device, path string) ([]govmmQemu.Device, error) {
|
|
imageFile, err := os.Open(path)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer imageFile.Close()
|
|
|
|
imageStat, err := imageFile.Stat()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
object := govmmQemu.Object{
|
|
Driver: govmmQemu.NVDIMM,
|
|
Type: govmmQemu.MemoryBackendFile,
|
|
DeviceID: "nv0",
|
|
ID: "mem0",
|
|
MemPath: path,
|
|
Size: (uint64)(imageStat.Size()),
|
|
ReadOnly: true,
|
|
}
|
|
|
|
devices = append(devices, object)
|
|
|
|
return devices, nil
|
|
}
|
|
|
|
func (q *qemuArchBase) appendImage(ctx context.Context, devices []govmmQemu.Device, path string) ([]govmmQemu.Device, error) {
|
|
return q.appendBlockImage(ctx, devices, path)
|
|
}
|
|
|
|
func (q *qemuArchBase) appendBlockImage(ctx context.Context, devices []govmmQemu.Device, path string) ([]govmmQemu.Device, error) {
|
|
drive, err := genericImage(path)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
devices, err = q.appendBlockDevice(ctx, devices, drive)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return devices, nil
|
|
}
|
|
|
|
func genericSCSIController(enableIOThreads, nestedRun bool) (govmmQemu.SCSIController, *govmmQemu.IOThread) {
|
|
scsiController := govmmQemu.SCSIController{
|
|
ID: scsiControllerID,
|
|
DisableModern: nestedRun,
|
|
}
|
|
|
|
var t *govmmQemu.IOThread
|
|
|
|
if enableIOThreads {
|
|
randBytes, _ := utils.GenerateRandomBytes(8)
|
|
|
|
t = &govmmQemu.IOThread{
|
|
ID: fmt.Sprintf("%s-%s", "iothread", hex.EncodeToString(randBytes)),
|
|
}
|
|
|
|
scsiController.IOThread = t.ID
|
|
}
|
|
|
|
return scsiController, t
|
|
}
|
|
|
|
func (q *qemuArchBase) appendSCSIController(_ context.Context, devices []govmmQemu.Device, enableIOThreads bool) ([]govmmQemu.Device, *govmmQemu.IOThread, error) {
|
|
d, t := genericSCSIController(enableIOThreads, q.nestedRun)
|
|
devices = append(devices, d)
|
|
return devices, t, nil
|
|
}
|
|
|
|
// appendBridges appends to devices the given bridges
|
|
func (q *qemuArchBase) appendBridges(devices []govmmQemu.Device) []govmmQemu.Device {
|
|
return genericAppendBridges(devices, q.Bridges, q.qemuMachine.Type)
|
|
}
|
|
|
|
func generic9PVolume(volume types.Volume, nestedRun bool) govmmQemu.FSDevice {
|
|
devID := fmt.Sprintf("extra-9p-%s", volume.MountTag)
|
|
if len(devID) > maxDevIDSize {
|
|
devID = devID[:maxDevIDSize]
|
|
}
|
|
|
|
return govmmQemu.FSDevice{
|
|
Driver: govmmQemu.Virtio9P,
|
|
FSDriver: govmmQemu.Local,
|
|
ID: devID,
|
|
Path: volume.HostPath,
|
|
MountTag: volume.MountTag,
|
|
SecurityModel: govmmQemu.None,
|
|
DisableModern: nestedRun,
|
|
Multidev: govmmQemu.Remap,
|
|
}
|
|
}
|
|
|
|
func genericAppend9PVolume(devices []govmmQemu.Device, volume types.Volume, nestedRun bool) (govmmQemu.FSDevice, error) {
|
|
d := generic9PVolume(volume, nestedRun)
|
|
return d, nil
|
|
}
|
|
|
|
func (q *qemuArchBase) append9PVolume(_ context.Context, devices []govmmQemu.Device, volume types.Volume) ([]govmmQemu.Device, error) {
|
|
if volume.MountTag == "" || volume.HostPath == "" {
|
|
return devices, nil
|
|
}
|
|
|
|
d, err := genericAppend9PVolume(devices, volume, q.nestedRun)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
devices = append(devices, d)
|
|
return devices, nil
|
|
}
|
|
|
|
func (q *qemuArchBase) appendSocket(devices []govmmQemu.Device, socket types.Socket) []govmmQemu.Device {
|
|
devID := socket.ID
|
|
if len(devID) > maxDevIDSize {
|
|
devID = devID[:maxDevIDSize]
|
|
}
|
|
|
|
devices = append(devices,
|
|
govmmQemu.CharDevice{
|
|
Driver: govmmQemu.VirtioSerialPort,
|
|
Backend: govmmQemu.Socket,
|
|
DeviceID: socket.DeviceID,
|
|
ID: devID,
|
|
Path: socket.HostPath,
|
|
Name: socket.Name,
|
|
},
|
|
)
|
|
|
|
return devices
|
|
}
|
|
|
|
func (q *qemuArchBase) appendVSock(_ context.Context, devices []govmmQemu.Device, vsock types.VSock) ([]govmmQemu.Device, error) {
|
|
devices = append(devices,
|
|
govmmQemu.VSOCKDevice{
|
|
ID: fmt.Sprintf("vsock-%d", vsock.ContextID),
|
|
ContextID: vsock.ContextID,
|
|
VHostFD: vsock.VhostFd,
|
|
DisableModern: q.nestedRun,
|
|
},
|
|
)
|
|
|
|
return devices, nil
|
|
|
|
}
|
|
|
|
func networkModelToQemuType(model NetInterworkingModel) govmmQemu.NetDeviceType {
|
|
switch model {
|
|
case NetXConnectMacVtapModel:
|
|
return govmmQemu.MACVTAP
|
|
default:
|
|
//TAP should work for most other cases
|
|
return govmmQemu.TAP
|
|
}
|
|
}
|
|
|
|
func genericNetwork(endpoint Endpoint, vhost, nestedRun bool, index int) (govmmQemu.NetDevice, error) {
|
|
var d govmmQemu.NetDevice
|
|
switch ep := endpoint.(type) {
|
|
case *VethEndpoint, *MacvlanEndpoint, *IPVlanEndpoint:
|
|
netPair := ep.NetworkPair()
|
|
d = govmmQemu.NetDevice{
|
|
Type: networkModelToQemuType(netPair.NetInterworkingModel),
|
|
Driver: govmmQemu.VirtioNet,
|
|
ID: fmt.Sprintf("network-%d", index),
|
|
IFName: netPair.TAPIface.Name,
|
|
MACAddress: netPair.TAPIface.HardAddr,
|
|
DownScript: "no",
|
|
Script: "no",
|
|
VHost: vhost,
|
|
DisableModern: nestedRun,
|
|
FDs: netPair.VMFds,
|
|
VhostFDs: netPair.VhostFds,
|
|
}
|
|
case *MacvtapEndpoint:
|
|
d = govmmQemu.NetDevice{
|
|
Type: govmmQemu.MACVTAP,
|
|
Driver: govmmQemu.VirtioNet,
|
|
ID: fmt.Sprintf("network-%d", index),
|
|
IFName: ep.Name(),
|
|
MACAddress: ep.HardwareAddr(),
|
|
DownScript: "no",
|
|
Script: "no",
|
|
VHost: vhost,
|
|
DisableModern: nestedRun,
|
|
FDs: ep.VMFds,
|
|
VhostFDs: ep.VhostFds,
|
|
}
|
|
case *TuntapEndpoint:
|
|
netPair := ep.NetworkPair()
|
|
d = govmmQemu.NetDevice{
|
|
Type: govmmQemu.NetDeviceType("tap"),
|
|
Driver: govmmQemu.VirtioNet,
|
|
ID: fmt.Sprintf("network-%d", index),
|
|
IFName: netPair.TAPIface.Name,
|
|
MACAddress: netPair.TAPIface.HardAddr,
|
|
DownScript: "no",
|
|
Script: "no",
|
|
VHost: vhost,
|
|
DisableModern: nestedRun,
|
|
FDs: netPair.VMFds,
|
|
VhostFDs: netPair.VhostFds,
|
|
}
|
|
default:
|
|
return govmmQemu.NetDevice{}, fmt.Errorf("Unknown type for endpoint")
|
|
}
|
|
|
|
return d, nil
|
|
}
|
|
|
|
func (q *qemuArchBase) appendNetwork(_ context.Context, devices []govmmQemu.Device, endpoint Endpoint) ([]govmmQemu.Device, error) {
|
|
d, err := genericNetwork(endpoint, q.vhost, q.nestedRun, q.networkIndex)
|
|
if err != nil {
|
|
return devices, fmt.Errorf("Failed to append network %v", err)
|
|
}
|
|
q.networkIndex++
|
|
devices = append(devices, d)
|
|
return devices, nil
|
|
}
|
|
|
|
func genericBlockDevice(drive config.BlockDrive, nestedRun bool) (govmmQemu.BlockDevice, error) {
|
|
if drive.File == "" || drive.ID == "" || drive.Format == "" {
|
|
return govmmQemu.BlockDevice{}, fmt.Errorf("Empty File, ID or Format for drive %v", drive)
|
|
}
|
|
|
|
if len(drive.ID) > maxDevIDSize {
|
|
drive.ID = drive.ID[:maxDevIDSize]
|
|
}
|
|
|
|
return govmmQemu.BlockDevice{
|
|
Driver: govmmQemu.VirtioBlock,
|
|
ID: drive.ID,
|
|
File: drive.File,
|
|
AIO: govmmQemu.Threads,
|
|
Format: govmmQemu.BlockDeviceFormat(drive.Format),
|
|
Interface: "none",
|
|
DisableModern: nestedRun,
|
|
ShareRW: drive.ShareRW,
|
|
ReadOnly: drive.ReadOnly,
|
|
}, nil
|
|
}
|
|
|
|
func (q *qemuArchBase) appendBlockDevice(_ context.Context, devices []govmmQemu.Device, drive config.BlockDrive) ([]govmmQemu.Device, error) {
|
|
d, err := genericBlockDevice(drive, q.nestedRun)
|
|
if err != nil {
|
|
return devices, fmt.Errorf("Failed to append block device %v", err)
|
|
}
|
|
devices = append(devices, d)
|
|
return devices, nil
|
|
}
|
|
|
|
func (q *qemuArchBase) appendVhostUserDevice(ctx context.Context, devices []govmmQemu.Device, attr config.VhostUserDeviceAttrs) ([]govmmQemu.Device, error) {
|
|
qemuVhostUserDevice := govmmQemu.VhostUserDevice{}
|
|
|
|
switch attr.Type {
|
|
case config.VhostUserNet:
|
|
qemuVhostUserDevice.TypeDevID = utils.MakeNameID("net", attr.DevID, maxDevIDSize)
|
|
qemuVhostUserDevice.Address = attr.MacAddress
|
|
qemuVhostUserDevice.VhostUserType = govmmQemu.VhostUserNet
|
|
case config.VhostUserSCSI:
|
|
qemuVhostUserDevice.TypeDevID = utils.MakeNameID("scsi", attr.DevID, maxDevIDSize)
|
|
qemuVhostUserDevice.VhostUserType = govmmQemu.VhostUserSCSI
|
|
case config.VhostUserBlk:
|
|
qemuVhostUserDevice.VhostUserType = govmmQemu.VhostUserBlk
|
|
case config.VhostUserFS:
|
|
qemuVhostUserDevice.TypeDevID = utils.MakeNameID("fs", attr.DevID, maxDevIDSize)
|
|
qemuVhostUserDevice.Tag = attr.Tag
|
|
qemuVhostUserDevice.CacheSize = attr.CacheSize
|
|
qemuVhostUserDevice.QueueSize = attr.QueueSize
|
|
qemuVhostUserDevice.VhostUserType = govmmQemu.VhostUserFS
|
|
}
|
|
|
|
qemuVhostUserDevice.SocketPath = attr.SocketPath
|
|
qemuVhostUserDevice.CharDevID = utils.MakeNameID("char", attr.DevID, maxDevIDSize)
|
|
|
|
devices = append(devices, qemuVhostUserDevice)
|
|
|
|
return devices, nil
|
|
}
|
|
|
|
func (q *qemuArchBase) appendVFIODevice(devices []govmmQemu.Device, vfioDev config.VFIODev) []govmmQemu.Device {
|
|
if vfioDev.BDF == "" {
|
|
return devices
|
|
}
|
|
|
|
devices = append(devices,
|
|
govmmQemu.VFIODevice{
|
|
BDF: vfioDev.BDF,
|
|
VendorID: vfioDev.VendorID,
|
|
DeviceID: vfioDev.DeviceID,
|
|
Bus: vfioDev.Bus,
|
|
},
|
|
)
|
|
|
|
return devices
|
|
}
|
|
|
|
func (q *qemuArchBase) appendRNGDevice(_ context.Context, devices []govmmQemu.Device, rngDev config.RNGDev) ([]govmmQemu.Device, error) {
|
|
devices = append(devices,
|
|
govmmQemu.RngDevice{
|
|
ID: rngDev.ID,
|
|
Filename: rngDev.Filename,
|
|
},
|
|
)
|
|
|
|
return devices, nil
|
|
}
|
|
|
|
func (q *qemuArchBase) handleImagePath(config HypervisorConfig) error {
|
|
if config.ImagePath != "" {
|
|
kernelRootParams, err := GetKernelRootParams(config.RootfsType, q.disableNvdimm, false)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if !q.disableNvdimm {
|
|
q.qemuMachine.Options = strings.Join([]string{
|
|
q.qemuMachine.Options, qemuNvdimmOption,
|
|
}, ",")
|
|
kernelRootParams, err = GetKernelRootParams(config.RootfsType, q.disableNvdimm, q.dax)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
q.kernelParams = append(q.kernelParams, kernelRootParams...)
|
|
q.kernelParamsNonDebug = append(q.kernelParamsNonDebug, kernelParamsSystemdNonDebug...)
|
|
q.kernelParamsDebug = append(q.kernelParamsDebug, kernelParamsSystemdDebug...)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (q *qemuArchBase) supportGuestMemoryHotplug() bool {
|
|
return q.protection == noneProtection
|
|
}
|
|
|
|
func (q *qemuArchBase) setIgnoreSharedMemoryMigrationCaps(ctx context.Context, qmp *govmmQemu.QMP) error {
|
|
err := qmp.ExecSetMigrationCaps(ctx, []map[string]interface{}{
|
|
{
|
|
"capability": qmpCapMigrationIgnoreShared,
|
|
"state": true,
|
|
},
|
|
})
|
|
return err
|
|
}
|
|
|
|
func (q *qemuArchBase) addDeviceToBridge(ctx context.Context, ID string, t types.Type) (string, types.Bridge, error) {
|
|
addr, b, err := genericAddDeviceToBridge(ctx, q.Bridges, ID, t)
|
|
if err != nil {
|
|
return "", b, err
|
|
}
|
|
|
|
return fmt.Sprintf("%02x", addr), b, nil
|
|
}
|
|
|
|
func genericAddDeviceToBridge(ctx context.Context, bridges []types.Bridge, ID string, t types.Type) (uint32, types.Bridge, error) {
|
|
var err error
|
|
var addr uint32
|
|
|
|
if len(bridges) == 0 {
|
|
return 0, types.Bridge{}, errors.New("failed to get available address from bridges")
|
|
}
|
|
|
|
// looking for an empty address in the bridges
|
|
for _, b := range bridges {
|
|
if t != b.Type {
|
|
continue
|
|
}
|
|
addr, err = b.AddDevice(ctx, ID)
|
|
if err == nil {
|
|
return addr, b, nil
|
|
}
|
|
}
|
|
|
|
return 0, types.Bridge{}, fmt.Errorf("no more bridge slots available")
|
|
}
|
|
|
|
func (q *qemuArchBase) removeDeviceFromBridge(ID string) error {
|
|
var err error
|
|
for _, b := range q.Bridges {
|
|
err = b.RemoveDevice(ID)
|
|
if err == nil {
|
|
// device was removed correctly
|
|
return nil
|
|
}
|
|
}
|
|
|
|
return err
|
|
}
|
|
|
|
func (q *qemuArchBase) getBridges() []types.Bridge {
|
|
return q.Bridges
|
|
}
|
|
|
|
func (q *qemuArchBase) setBridges(bridges []types.Bridge) {
|
|
q.Bridges = bridges
|
|
}
|
|
|
|
func (q *qemuArchBase) addBridge(b types.Bridge) {
|
|
q.Bridges = append(q.Bridges, b)
|
|
}
|
|
|
|
// appendPCIeRootPortDevice appends to devices the given pcie-root-port
|
|
func (q *qemuArchBase) appendPCIeRootPortDevice(devices []govmmQemu.Device, number uint32, memSize32bit uint64, memSize64bit uint64) []govmmQemu.Device {
|
|
return genericAppendPCIeRootPort(devices, number, q.qemuMachine.Type, memSize32bit, memSize64bit)
|
|
}
|
|
|
|
func (q *qemuArchBase) getBARsMaxAddressableMemory() (uint64, uint64) {
|
|
|
|
pci := nvpci.New()
|
|
devs, _ := pci.GetAllDevices()
|
|
|
|
// Since we do not know which devices are going to be hotplugged,
|
|
// we're going to use the GPU with the biggest BARs to initialize the
|
|
// root port, this should work for all other devices as well.
|
|
// defaults are 2MB for both, if no suitable devices found
|
|
max32bit := uint64(2 * 1024 * 1024)
|
|
max64bit := uint64(2 * 1024 * 1024)
|
|
|
|
for _, dev := range devs {
|
|
if !dev.IsGPU() {
|
|
continue
|
|
}
|
|
memSize32bit, memSize64bit := dev.Resources.GetTotalAddressableMemory(true)
|
|
if max32bit < memSize32bit {
|
|
max32bit = memSize32bit
|
|
}
|
|
if max64bit < memSize64bit {
|
|
max64bit = memSize64bit
|
|
}
|
|
}
|
|
// The actual 32bit is most of the time a power of 2 but we need some
|
|
// buffer so double that to leave space for other IO functions.
|
|
// The 64bit size is not a power of 2 and hence is already rounded up
|
|
// to the higher value.
|
|
return max32bit * 2, max64bit
|
|
}
|
|
|
|
// appendIOMMU appends a virtual IOMMU device
|
|
func (q *qemuArchBase) appendIOMMU(devices []govmmQemu.Device) ([]govmmQemu.Device, error) {
|
|
switch q.qemuMachine.Type {
|
|
case QemuQ35:
|
|
iommu := govmmQemu.IommuDev{
|
|
Intremap: true,
|
|
DeviceIotlb: true,
|
|
CachingMode: true,
|
|
}
|
|
|
|
devices = append(devices, iommu)
|
|
return devices, nil
|
|
default:
|
|
return devices, fmt.Errorf("Machine Type %s does not support vIOMMU", q.qemuMachine.Type)
|
|
}
|
|
}
|
|
|
|
// appendPVPanicDevice appends a pvpanic device
|
|
func (q *qemuArchBase) appendPVPanicDevice(devices []govmmQemu.Device) ([]govmmQemu.Device, error) {
|
|
devices = append(devices, govmmQemu.PVPanicDevice{NoShutdown: true})
|
|
return devices, nil
|
|
}
|
|
|
|
func (q *qemuArchBase) getPFlash() ([]string, error) {
|
|
return q.PFlash, nil
|
|
}
|
|
|
|
func (q *qemuArchBase) setPFlash(p []string) {
|
|
q.PFlash = p
|
|
}
|
|
|
|
// append protection device
|
|
func (q *qemuArchBase) appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string) ([]govmmQemu.Device, string, error) {
|
|
hvLogger.WithField("arch", runtime.GOARCH).Warnf("Confidential Computing has not been implemented for this architecture")
|
|
return devices, firmware, nil
|
|
}
|