mirror of
https://github.com/aljazceru/kata-containers.git
synced 2025-12-22 16:54:25 +01:00
512 lines
14 KiB
Go
512 lines
14 KiB
Go
// Copyright (c) 2018 Intel Corporation
|
|
//
|
|
// SPDX-License-Identifier: Apache-2.0
|
|
//
|
|
|
|
package virtcontainers
|
|
|
|
import (
|
|
"encoding/hex"
|
|
"fmt"
|
|
"os"
|
|
"strconv"
|
|
|
|
govmmQemu "github.com/intel/govmm/qemu"
|
|
|
|
"github.com/kata-containers/runtime/virtcontainers/device/api"
|
|
"github.com/kata-containers/runtime/virtcontainers/device/drivers"
|
|
"github.com/kata-containers/runtime/virtcontainers/utils"
|
|
)
|
|
|
|
type qemuArch interface {
|
|
// enableNestingChecks nesting checks will be honoured
|
|
enableNestingChecks()
|
|
|
|
// disableNestingChecks nesting checks will be ignored
|
|
disableNestingChecks()
|
|
|
|
// machine returns the machine type
|
|
machine() (govmmQemu.Machine, error)
|
|
|
|
// qemuPath returns the path to the QEMU binary
|
|
qemuPath() (string, error)
|
|
|
|
// kernelParameters returns the kernel parameters
|
|
// if debug is true then kernel debug parameters are included
|
|
kernelParameters(debug bool) []Param
|
|
|
|
//capabilities returns the capabilities supported by QEMU
|
|
capabilities() capabilities
|
|
|
|
// bridges returns the number bridges for the machine type
|
|
bridges(number uint32) []Bridge
|
|
|
|
// cpuTopology returns the CPU topology for the given amount of vcpus
|
|
cpuTopology(vcpus, maxvcpus uint32) govmmQemu.SMP
|
|
|
|
// cpuModel returns the CPU model for the machine type
|
|
cpuModel() string
|
|
|
|
// memoryTopology returns the memory topology using the given amount of memoryMb and hostMemoryMb
|
|
memoryTopology(memoryMb, hostMemoryMb uint64) govmmQemu.Memory
|
|
|
|
// append9PVolumes appends volumes to devices
|
|
append9PVolumes(devices []govmmQemu.Device, volumes []Volume) []govmmQemu.Device
|
|
|
|
// appendConsole appends a console to devices
|
|
appendConsole(devices []govmmQemu.Device, path string) []govmmQemu.Device
|
|
|
|
// appendImage appends an image to devices
|
|
appendImage(devices []govmmQemu.Device, path string) ([]govmmQemu.Device, error)
|
|
|
|
// appendSCSIController appens a SCSI controller to devices
|
|
appendSCSIController(devices []govmmQemu.Device, enableIOThreads bool) ([]govmmQemu.Device, *govmmQemu.IOThread)
|
|
|
|
// appendBridges appends bridges to devices
|
|
appendBridges(devices []govmmQemu.Device, bridges []Bridge) []govmmQemu.Device
|
|
|
|
// append9PVolume appends a 9P volume to devices
|
|
append9PVolume(devices []govmmQemu.Device, volume Volume) []govmmQemu.Device
|
|
|
|
// appendSocket appends a socket to devices
|
|
appendSocket(devices []govmmQemu.Device, socket Socket) []govmmQemu.Device
|
|
|
|
// appendNetwork appends a endpoint device to devices
|
|
appendNetwork(devices []govmmQemu.Device, endpoint Endpoint) []govmmQemu.Device
|
|
|
|
// appendBlockDevice appends a block drive to devices
|
|
appendBlockDevice(devices []govmmQemu.Device, drive drivers.Drive) []govmmQemu.Device
|
|
|
|
// appendVhostUserDevice appends a vhost user device to devices
|
|
appendVhostUserDevice(devices []govmmQemu.Device, vhostUserDevice api.VhostUserDevice) []govmmQemu.Device
|
|
|
|
// appendVFIODevice appends a VFIO device to devices
|
|
appendVFIODevice(devices []govmmQemu.Device, vfioDevice drivers.VFIODevice) []govmmQemu.Device
|
|
|
|
// handleImagePath handles the Hypervisor Config image path
|
|
handleImagePath(config HypervisorConfig)
|
|
}
|
|
|
|
type qemuArchBase struct {
|
|
machineType string
|
|
nestedRun bool
|
|
networkIndex int
|
|
qemuPaths map[string]string
|
|
supportedQemuMachines []govmmQemu.Machine
|
|
kernelParamsNonDebug []Param
|
|
kernelParamsDebug []Param
|
|
kernelParams []Param
|
|
}
|
|
|
|
const (
|
|
defaultCores uint32 = 1
|
|
defaultThreads uint32 = 1
|
|
defaultMemSlots uint8 = 2
|
|
defaultCPUModel = "host"
|
|
defaultBridgeBus = "pcie.0"
|
|
maxDevIDSize = 31
|
|
defaultMsize9p = 8192
|
|
)
|
|
|
|
// This is the PCI start address assigned to the first bridge that
|
|
// is added on the qemu command line. In case of x86_64, the first two PCI
|
|
// addresses (0 and 1) are used by the platform while in case of ARM, address
|
|
// 0 is reserved.
|
|
const bridgePCIStartAddr = 2
|
|
|
|
const (
|
|
// VirtioBlock means use virtio-blk for hotplugging drives
|
|
VirtioBlock = "virtio-blk"
|
|
|
|
// VirtioSCSI means use virtio-scsi for hotplugging drives
|
|
VirtioSCSI = "virtio-scsi"
|
|
)
|
|
|
|
const (
|
|
// QemuPCLite is the QEMU pc-lite machine type for amd64
|
|
QemuPCLite = "pc-lite"
|
|
|
|
// QemuPC is the QEMU pc machine type for amd64
|
|
QemuPC = "pc"
|
|
|
|
// QemuQ35 is the QEMU Q35 machine type for amd64
|
|
QemuQ35 = "q35"
|
|
|
|
// QemuVirt is the QEMU virt machine type for aarch64
|
|
QemuVirt = "virt"
|
|
|
|
// QemuPseries is a QEMU virt machine type for for ppc64le
|
|
QemuPseries = "pseries"
|
|
)
|
|
|
|
// kernelParamsNonDebug is a list of the default kernel
|
|
// parameters that will be used in standard (non-debug) mode.
|
|
var kernelParamsNonDebug = []Param{
|
|
{"quiet", ""},
|
|
}
|
|
|
|
// kernelParamsSystemdNonDebug is a list of the default systemd related
|
|
// kernel parameters that will be used in standard (non-debug) mode.
|
|
var kernelParamsSystemdNonDebug = []Param{
|
|
{"systemd.show_status", "false"},
|
|
}
|
|
|
|
// kernelParamsDebug is a list of the default kernel
|
|
// parameters that will be used in debug mode (as much boot output as
|
|
// possible).
|
|
var kernelParamsDebug = []Param{
|
|
{"debug", ""},
|
|
}
|
|
|
|
// kernelParamsSystemdDebug is a list of the default systemd related kernel
|
|
// parameters that will be used in debug mode (as much boot output as
|
|
// possible).
|
|
var kernelParamsSystemdDebug = []Param{
|
|
{"systemd.show_status", "true"},
|
|
{"systemd.log_level", "debug"},
|
|
}
|
|
|
|
func (q *qemuArchBase) enableNestingChecks() {
|
|
q.nestedRun = true
|
|
}
|
|
|
|
func (q *qemuArchBase) disableNestingChecks() {
|
|
q.nestedRun = false
|
|
}
|
|
|
|
func (q *qemuArchBase) machine() (govmmQemu.Machine, error) {
|
|
for _, m := range q.supportedQemuMachines {
|
|
if m.Type == q.machineType {
|
|
return m, nil
|
|
}
|
|
}
|
|
|
|
return govmmQemu.Machine{}, fmt.Errorf("unrecognised machine type: %v", q.machineType)
|
|
}
|
|
|
|
func (q *qemuArchBase) qemuPath() (string, error) {
|
|
p, ok := q.qemuPaths[q.machineType]
|
|
if !ok {
|
|
return "", fmt.Errorf("Unknown machine type: %s", q.machineType)
|
|
}
|
|
|
|
return p, nil
|
|
}
|
|
|
|
func (q *qemuArchBase) kernelParameters(debug bool) []Param {
|
|
params := q.kernelParams
|
|
|
|
if debug {
|
|
params = append(params, q.kernelParamsDebug...)
|
|
} else {
|
|
params = append(params, q.kernelParamsNonDebug...)
|
|
}
|
|
|
|
return params
|
|
}
|
|
|
|
func (q *qemuArchBase) capabilities() capabilities {
|
|
var caps capabilities
|
|
caps.setBlockDeviceHotplugSupport()
|
|
return caps
|
|
}
|
|
|
|
func (q *qemuArchBase) bridges(number uint32) []Bridge {
|
|
var bridges []Bridge
|
|
|
|
for i := uint32(0); i < number; i++ {
|
|
bridges = append(bridges, Bridge{
|
|
Type: pciBridge,
|
|
ID: fmt.Sprintf("%s-bridge-%d", pciBridge, i),
|
|
Address: make(map[uint32]string),
|
|
})
|
|
}
|
|
|
|
return bridges
|
|
}
|
|
|
|
func (q *qemuArchBase) cpuTopology(vcpus, maxvcpus uint32) govmmQemu.SMP {
|
|
smp := govmmQemu.SMP{
|
|
CPUs: vcpus,
|
|
Sockets: vcpus,
|
|
Cores: defaultCores,
|
|
Threads: defaultThreads,
|
|
MaxCPUs: maxvcpus,
|
|
}
|
|
|
|
return smp
|
|
}
|
|
|
|
func (q *qemuArchBase) cpuModel() string {
|
|
return defaultCPUModel
|
|
}
|
|
|
|
func (q *qemuArchBase) memoryTopology(memoryMb, hostMemoryMb uint64) govmmQemu.Memory {
|
|
memMax := fmt.Sprintf("%dM", hostMemoryMb)
|
|
mem := fmt.Sprintf("%dM", memoryMb)
|
|
memory := govmmQemu.Memory{
|
|
Size: mem,
|
|
Slots: defaultMemSlots,
|
|
MaxMem: memMax,
|
|
}
|
|
|
|
return memory
|
|
}
|
|
|
|
func (q *qemuArchBase) append9PVolumes(devices []govmmQemu.Device, volumes []Volume) []govmmQemu.Device {
|
|
// Add the shared volumes
|
|
for _, v := range volumes {
|
|
devices = q.append9PVolume(devices, v)
|
|
}
|
|
|
|
return devices
|
|
}
|
|
|
|
func (q *qemuArchBase) appendConsole(devices []govmmQemu.Device, path string) []govmmQemu.Device {
|
|
serial := govmmQemu.SerialDevice{
|
|
Driver: govmmQemu.VirtioSerial,
|
|
ID: "serial0",
|
|
DisableModern: q.nestedRun,
|
|
}
|
|
|
|
devices = append(devices, serial)
|
|
|
|
var console govmmQemu.CharDevice
|
|
|
|
console = govmmQemu.CharDevice{
|
|
Driver: govmmQemu.Console,
|
|
Backend: govmmQemu.Socket,
|
|
DeviceID: "console0",
|
|
ID: "charconsole0",
|
|
Path: path,
|
|
}
|
|
|
|
devices = append(devices, console)
|
|
|
|
return devices
|
|
}
|
|
|
|
func (q *qemuArchBase) appendImage(devices []govmmQemu.Device, path string) ([]govmmQemu.Device, error) {
|
|
if _, err := os.Stat(path); os.IsNotExist(err) {
|
|
return nil, err
|
|
}
|
|
|
|
randBytes, err := utils.GenerateRandomBytes(8)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
id := utils.MakeNameID("image", hex.EncodeToString(randBytes), maxDevIDSize)
|
|
|
|
drive := drivers.Drive{
|
|
File: path,
|
|
Format: "raw",
|
|
ID: id,
|
|
}
|
|
|
|
return q.appendBlockDevice(devices, drive), nil
|
|
}
|
|
|
|
func (q *qemuArchBase) appendSCSIController(devices []govmmQemu.Device, enableIOThreads bool) ([]govmmQemu.Device, *govmmQemu.IOThread) {
|
|
scsiController := govmmQemu.SCSIController{
|
|
ID: scsiControllerID,
|
|
DisableModern: q.nestedRun,
|
|
}
|
|
|
|
var t *govmmQemu.IOThread
|
|
|
|
if enableIOThreads {
|
|
randBytes, _ := utils.GenerateRandomBytes(8)
|
|
|
|
t = &govmmQemu.IOThread{
|
|
ID: fmt.Sprintf("%s-%s", "iothread", hex.EncodeToString(randBytes)),
|
|
}
|
|
|
|
scsiController.IOThread = t.ID
|
|
}
|
|
|
|
devices = append(devices, scsiController)
|
|
|
|
return devices, t
|
|
}
|
|
|
|
// appendBridges appends to devices the given bridges
|
|
func (q *qemuArchBase) appendBridges(devices []govmmQemu.Device, bridges []Bridge) []govmmQemu.Device {
|
|
for idx, b := range bridges {
|
|
t := govmmQemu.PCIBridge
|
|
if b.Type == pcieBridge {
|
|
t = govmmQemu.PCIEBridge
|
|
}
|
|
|
|
bridges[idx].Addr = bridgePCIStartAddr + idx
|
|
|
|
devices = append(devices,
|
|
govmmQemu.BridgeDevice{
|
|
Type: t,
|
|
Bus: defaultBridgeBus,
|
|
ID: b.ID,
|
|
// Each bridge is required to be assigned a unique chassis id > 0
|
|
Chassis: (idx + 1),
|
|
SHPC: true,
|
|
Addr: strconv.FormatInt(int64(bridges[idx].Addr), 10),
|
|
},
|
|
)
|
|
}
|
|
|
|
return devices
|
|
}
|
|
|
|
func (q *qemuArchBase) append9PVolume(devices []govmmQemu.Device, volume Volume) []govmmQemu.Device {
|
|
if volume.MountTag == "" || volume.HostPath == "" {
|
|
return devices
|
|
}
|
|
|
|
devID := fmt.Sprintf("extra-9p-%s", volume.MountTag)
|
|
if len(devID) > maxDevIDSize {
|
|
devID = devID[:maxDevIDSize]
|
|
}
|
|
|
|
devices = append(devices,
|
|
govmmQemu.FSDevice{
|
|
Driver: govmmQemu.Virtio9P,
|
|
FSDriver: govmmQemu.Local,
|
|
ID: devID,
|
|
Path: volume.HostPath,
|
|
MountTag: volume.MountTag,
|
|
SecurityModel: govmmQemu.None,
|
|
DisableModern: q.nestedRun,
|
|
},
|
|
)
|
|
|
|
return devices
|
|
}
|
|
|
|
func (q *qemuArchBase) appendSocket(devices []govmmQemu.Device, socket Socket) []govmmQemu.Device {
|
|
devID := socket.ID
|
|
if len(devID) > maxDevIDSize {
|
|
devID = devID[:maxDevIDSize]
|
|
}
|
|
|
|
devices = append(devices,
|
|
govmmQemu.CharDevice{
|
|
Driver: govmmQemu.VirtioSerialPort,
|
|
Backend: govmmQemu.Socket,
|
|
DeviceID: socket.DeviceID,
|
|
ID: devID,
|
|
Path: socket.HostPath,
|
|
Name: socket.Name,
|
|
},
|
|
)
|
|
|
|
return devices
|
|
}
|
|
|
|
func networkModelToQemuType(model NetInterworkingModel) govmmQemu.NetDeviceType {
|
|
switch model {
|
|
case NetXConnectBridgedModel:
|
|
return govmmQemu.MACVTAP //TODO: We should rename MACVTAP to .NET_FD
|
|
case NetXConnectMacVtapModel:
|
|
return govmmQemu.MACVTAP
|
|
//case ModelEnlightened:
|
|
// Here the Network plugin will create a VM native interface
|
|
// which could be MacVtap, IpVtap, SRIOV, veth-tap, vhost-user
|
|
// In these cases we will determine the interface type here
|
|
// and pass in the native interface through
|
|
default:
|
|
//TAP should work for most other cases
|
|
return govmmQemu.TAP
|
|
}
|
|
}
|
|
|
|
func (q *qemuArchBase) appendNetwork(devices []govmmQemu.Device, endpoint Endpoint) []govmmQemu.Device {
|
|
switch ep := endpoint.(type) {
|
|
case *VirtualEndpoint:
|
|
devices = append(devices,
|
|
govmmQemu.NetDevice{
|
|
Type: networkModelToQemuType(ep.NetPair.NetInterworkingModel),
|
|
Driver: govmmQemu.VirtioNetPCI,
|
|
ID: fmt.Sprintf("network-%d", q.networkIndex),
|
|
IFName: ep.NetPair.TAPIface.Name,
|
|
MACAddress: ep.NetPair.TAPIface.HardAddr,
|
|
DownScript: "no",
|
|
Script: "no",
|
|
VHost: true,
|
|
DisableModern: q.nestedRun,
|
|
FDs: ep.NetPair.VMFds,
|
|
VhostFDs: ep.NetPair.VhostFds,
|
|
},
|
|
)
|
|
q.networkIndex++
|
|
}
|
|
|
|
return devices
|
|
}
|
|
|
|
func (q *qemuArchBase) appendBlockDevice(devices []govmmQemu.Device, drive drivers.Drive) []govmmQemu.Device {
|
|
if drive.File == "" || drive.ID == "" || drive.Format == "" {
|
|
return devices
|
|
}
|
|
|
|
if len(drive.ID) > maxDevIDSize {
|
|
drive.ID = drive.ID[:maxDevIDSize]
|
|
}
|
|
|
|
devices = append(devices,
|
|
govmmQemu.BlockDevice{
|
|
Driver: govmmQemu.VirtioBlock,
|
|
ID: drive.ID,
|
|
File: drive.File,
|
|
AIO: govmmQemu.Threads,
|
|
Format: govmmQemu.BlockDeviceFormat(drive.Format),
|
|
Interface: "none",
|
|
DisableModern: q.nestedRun,
|
|
},
|
|
)
|
|
|
|
return devices
|
|
}
|
|
|
|
func (q *qemuArchBase) appendVhostUserDevice(devices []govmmQemu.Device, vhostUserDevice api.VhostUserDevice) []govmmQemu.Device {
|
|
qemuVhostUserDevice := govmmQemu.VhostUserDevice{}
|
|
|
|
// TODO: find a way to remove dependency of drivers package
|
|
switch vhostUserDevice := vhostUserDevice.(type) {
|
|
case *drivers.VhostUserNetDevice:
|
|
qemuVhostUserDevice.TypeDevID = utils.MakeNameID("net", vhostUserDevice.ID, maxDevIDSize)
|
|
qemuVhostUserDevice.Address = vhostUserDevice.MacAddress
|
|
case *drivers.VhostUserSCSIDevice:
|
|
qemuVhostUserDevice.TypeDevID = utils.MakeNameID("scsi", vhostUserDevice.ID, maxDevIDSize)
|
|
case *drivers.VhostUserBlkDevice:
|
|
}
|
|
|
|
qemuVhostUserDevice.VhostUserType = govmmQemu.VhostUserDeviceType(vhostUserDevice.Type())
|
|
qemuVhostUserDevice.SocketPath = vhostUserDevice.Attrs().SocketPath
|
|
qemuVhostUserDevice.CharDevID = utils.MakeNameID("char", vhostUserDevice.Attrs().ID, maxDevIDSize)
|
|
|
|
devices = append(devices, qemuVhostUserDevice)
|
|
|
|
return devices
|
|
}
|
|
|
|
func (q *qemuArchBase) appendVFIODevice(devices []govmmQemu.Device, vfioDevice drivers.VFIODevice) []govmmQemu.Device {
|
|
if vfioDevice.BDF == "" {
|
|
return devices
|
|
}
|
|
|
|
devices = append(devices,
|
|
govmmQemu.VFIODevice{
|
|
BDF: vfioDevice.BDF,
|
|
},
|
|
)
|
|
|
|
return devices
|
|
}
|
|
|
|
func (q *qemuArchBase) handleImagePath(config HypervisorConfig) {
|
|
if config.ImagePath != "" {
|
|
q.kernelParams = append(q.kernelParams, kernelRootParams...)
|
|
q.kernelParamsNonDebug = append(q.kernelParamsNonDebug, kernelParamsSystemdNonDebug...)
|
|
q.kernelParamsDebug = append(q.kernelParamsDebug, kernelParamsSystemdDebug...)
|
|
}
|
|
}
|