Files
kata-containers/virtcontainers/qemu_arch_base.go
Sebastien Boeuf 0ae5b142a6 qemu: Disable the default romfile used by virtio-pci
As we try to make sure we don't pull unneeded dependency when using
QEMU or NEMU as the hypervisor, and because SeaBIOS and OVMF firmware
already handle what's done by the default efi-virtio.rom binary, this
commit gets rid of this dependency by providing a default empty one.

Fixes #812

Signed-off-by: Sebastien Boeuf <sebastien.boeuf@intel.com>
2018-10-16 18:29:49 -07:00

562 lines
15 KiB
Go

// Copyright (c) 2018 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
package virtcontainers
import (
"encoding/hex"
"fmt"
"os"
"strconv"
govmmQemu "github.com/intel/govmm/qemu"
"github.com/kata-containers/runtime/virtcontainers/device/config"
"github.com/kata-containers/runtime/virtcontainers/utils"
)
type qemuArch interface {
// enableNestingChecks nesting checks will be honoured
enableNestingChecks()
// disableNestingChecks nesting checks will be ignored
disableNestingChecks()
// enableVhostNet vhost will be enabled
enableVhostNet()
// disableVhostNet vhost will be disabled
disableVhostNet()
// machine returns the machine type
machine() (govmmQemu.Machine, error)
// qemuPath returns the path to the QEMU binary
qemuPath() (string, error)
// kernelParameters returns the kernel parameters
// if debug is true then kernel debug parameters are included
kernelParameters(debug bool) []Param
//capabilities returns the capabilities supported by QEMU
capabilities() capabilities
// bridges returns the number bridges for the machine type
bridges(number uint32) []Bridge
// cpuTopology returns the CPU topology for the given amount of vcpus
cpuTopology(vcpus, maxvcpus uint32) govmmQemu.SMP
// cpuModel returns the CPU model for the machine type
cpuModel() string
// memoryTopology returns the memory topology using the given amount of memoryMb and hostMemoryMb
memoryTopology(memoryMb, hostMemoryMb uint64, slots uint8) govmmQemu.Memory
// appendConsole appends a console to devices
appendConsole(devices []govmmQemu.Device, path string) []govmmQemu.Device
// appendImage appends an image to devices
appendImage(devices []govmmQemu.Device, path string) ([]govmmQemu.Device, error)
// appendSCSIController appens a SCSI controller to devices
appendSCSIController(devices []govmmQemu.Device, enableIOThreads bool) ([]govmmQemu.Device, *govmmQemu.IOThread)
// appendBridges appends bridges to devices
appendBridges(devices []govmmQemu.Device, bridges []Bridge) []govmmQemu.Device
// append9PVolume appends a 9P volume to devices
append9PVolume(devices []govmmQemu.Device, volume Volume) []govmmQemu.Device
// appendSocket appends a socket to devices
appendSocket(devices []govmmQemu.Device, socket Socket) []govmmQemu.Device
// appendVSockPCI appends a vsock PCI to devices
appendVSockPCI(devices []govmmQemu.Device, vsock kataVSOCK) []govmmQemu.Device
// appendNetwork appends a endpoint device to devices
appendNetwork(devices []govmmQemu.Device, endpoint Endpoint) []govmmQemu.Device
// appendBlockDevice appends a block drive to devices
appendBlockDevice(devices []govmmQemu.Device, drive config.BlockDrive) []govmmQemu.Device
// appendVhostUserDevice appends a vhost user device to devices
appendVhostUserDevice(devices []govmmQemu.Device, drive config.VhostUserDeviceAttrs) []govmmQemu.Device
// appendVFIODevice appends a VFIO device to devices
appendVFIODevice(devices []govmmQemu.Device, vfioDevice config.VFIODev) []govmmQemu.Device
// appendRNGDevice appends a RNG device to devices
appendRNGDevice(devices []govmmQemu.Device, rngDevice config.RNGDev) []govmmQemu.Device
// handleImagePath handles the Hypervisor Config image path
handleImagePath(config HypervisorConfig)
}
type qemuArchBase struct {
machineType string
nestedRun bool
vhost bool
networkIndex int
qemuPaths map[string]string
supportedQemuMachines []govmmQemu.Machine
kernelParamsNonDebug []Param
kernelParamsDebug []Param
kernelParams []Param
}
const (
defaultCores uint32 = 1
defaultThreads uint32 = 1
defaultCPUModel = "host"
defaultBridgeBus = "pcie.0"
maxDevIDSize = 31
defaultMsize9p = 8192
)
// This is the PCI start address assigned to the first bridge that
// is added on the qemu command line. In case of x86_64, the first two PCI
// addresses (0 and 1) are used by the platform while in case of ARM, address
// 0 is reserved.
const bridgePCIStartAddr = 2
const (
// VirtioBlock means use virtio-blk for hotplugging drives
VirtioBlock = "virtio-blk"
// VirtioSCSI means use virtio-scsi for hotplugging drives
VirtioSCSI = "virtio-scsi"
)
const (
// QemuPCLite is the QEMU pc-lite machine type for amd64
QemuPCLite = "pc-lite"
// QemuPC is the QEMU pc machine type for amd64
QemuPC = "pc"
// QemuQ35 is the QEMU Q35 machine type for amd64
QemuQ35 = "q35"
// QemuVirt is the QEMU virt machine type for aarch64 or amd64
QemuVirt = "virt"
// QemuPseries is a QEMU virt machine type for ppc64le
QemuPseries = "pseries"
)
// kernelParamsNonDebug is a list of the default kernel
// parameters that will be used in standard (non-debug) mode.
var kernelParamsNonDebug = []Param{
{"quiet", ""},
}
// kernelParamsSystemdNonDebug is a list of the default systemd related
// kernel parameters that will be used in standard (non-debug) mode.
var kernelParamsSystemdNonDebug = []Param{
{"systemd.show_status", "false"},
}
// kernelParamsDebug is a list of the default kernel
// parameters that will be used in debug mode (as much boot output as
// possible).
var kernelParamsDebug = []Param{
{"debug", ""},
}
// kernelParamsSystemdDebug is a list of the default systemd related kernel
// parameters that will be used in debug mode (as much boot output as
// possible).
var kernelParamsSystemdDebug = []Param{
{"systemd.show_status", "true"},
{"systemd.log_level", "debug"},
}
func (q *qemuArchBase) enableNestingChecks() {
q.nestedRun = true
}
func (q *qemuArchBase) disableNestingChecks() {
q.nestedRun = false
}
func (q *qemuArchBase) enableVhostNet() {
q.vhost = true
}
func (q *qemuArchBase) disableVhostNet() {
q.vhost = false
}
func (q *qemuArchBase) machine() (govmmQemu.Machine, error) {
for _, m := range q.supportedQemuMachines {
if m.Type == q.machineType {
return m, nil
}
}
return govmmQemu.Machine{}, fmt.Errorf("unrecognised machine type: %v", q.machineType)
}
func (q *qemuArchBase) qemuPath() (string, error) {
p, ok := q.qemuPaths[q.machineType]
if !ok {
return "", fmt.Errorf("Unknown machine type: %s", q.machineType)
}
return p, nil
}
func (q *qemuArchBase) kernelParameters(debug bool) []Param {
params := q.kernelParams
if debug {
params = append(params, q.kernelParamsDebug...)
} else {
params = append(params, q.kernelParamsNonDebug...)
}
return params
}
func (q *qemuArchBase) capabilities() capabilities {
var caps capabilities
caps.setBlockDeviceHotplugSupport()
return caps
}
func (q *qemuArchBase) bridges(number uint32) []Bridge {
var bridges []Bridge
for i := uint32(0); i < number; i++ {
bridges = append(bridges, Bridge{
Type: pciBridge,
ID: fmt.Sprintf("%s-bridge-%d", pciBridge, i),
Address: make(map[uint32]string),
})
}
return bridges
}
func (q *qemuArchBase) cpuTopology(vcpus, maxvcpus uint32) govmmQemu.SMP {
smp := govmmQemu.SMP{
CPUs: vcpus,
Sockets: vcpus,
Cores: defaultCores,
Threads: defaultThreads,
MaxCPUs: maxvcpus,
}
return smp
}
func (q *qemuArchBase) cpuModel() string {
return defaultCPUModel
}
func (q *qemuArchBase) memoryTopology(memoryMb, hostMemoryMb uint64, slots uint8) govmmQemu.Memory {
memMax := fmt.Sprintf("%dM", hostMemoryMb)
mem := fmt.Sprintf("%dM", memoryMb)
memory := govmmQemu.Memory{
Size: mem,
Slots: slots,
MaxMem: memMax,
}
return memory
}
func (q *qemuArchBase) appendConsole(devices []govmmQemu.Device, path string) []govmmQemu.Device {
serial := govmmQemu.SerialDevice{
Driver: govmmQemu.VirtioSerial,
ID: "serial0",
DisableModern: q.nestedRun,
}
devices = append(devices, serial)
var console govmmQemu.CharDevice
console = govmmQemu.CharDevice{
Driver: govmmQemu.Console,
Backend: govmmQemu.Socket,
DeviceID: "console0",
ID: "charconsole0",
Path: path,
}
devices = append(devices, console)
return devices
}
func (q *qemuArchBase) appendImage(devices []govmmQemu.Device, path string) ([]govmmQemu.Device, error) {
if _, err := os.Stat(path); os.IsNotExist(err) {
return nil, err
}
randBytes, err := utils.GenerateRandomBytes(8)
if err != nil {
return nil, err
}
id := utils.MakeNameID("image", hex.EncodeToString(randBytes), maxDevIDSize)
drive := config.BlockDrive{
File: path,
Format: "raw",
ID: id,
}
return q.appendBlockDevice(devices, drive), nil
}
func (q *qemuArchBase) appendSCSIController(devices []govmmQemu.Device, enableIOThreads bool) ([]govmmQemu.Device, *govmmQemu.IOThread) {
scsiController := govmmQemu.SCSIController{
ID: scsiControllerID,
DisableModern: q.nestedRun,
}
var t *govmmQemu.IOThread
if enableIOThreads {
randBytes, _ := utils.GenerateRandomBytes(8)
t = &govmmQemu.IOThread{
ID: fmt.Sprintf("%s-%s", "iothread", hex.EncodeToString(randBytes)),
}
scsiController.IOThread = t.ID
}
devices = append(devices, scsiController)
return devices, t
}
// appendBridges appends to devices the given bridges
func (q *qemuArchBase) appendBridges(devices []govmmQemu.Device, bridges []Bridge) []govmmQemu.Device {
for idx, b := range bridges {
t := govmmQemu.PCIBridge
if b.Type == pcieBridge {
t = govmmQemu.PCIEBridge
}
bridges[idx].Addr = bridgePCIStartAddr + idx
devices = append(devices,
govmmQemu.BridgeDevice{
Type: t,
Bus: defaultBridgeBus,
ID: b.ID,
// Each bridge is required to be assigned a unique chassis id > 0
Chassis: idx + 1,
SHPC: true,
Addr: strconv.FormatInt(int64(bridges[idx].Addr), 10),
},
)
}
return devices
}
func (q *qemuArchBase) append9PVolume(devices []govmmQemu.Device, volume Volume) []govmmQemu.Device {
if volume.MountTag == "" || volume.HostPath == "" {
return devices
}
devID := fmt.Sprintf("extra-9p-%s", volume.MountTag)
if len(devID) > maxDevIDSize {
devID = devID[:maxDevIDSize]
}
devices = append(devices,
govmmQemu.FSDevice{
Driver: govmmQemu.Virtio9P,
FSDriver: govmmQemu.Local,
ID: devID,
Path: volume.HostPath,
MountTag: volume.MountTag,
SecurityModel: govmmQemu.None,
DisableModern: q.nestedRun,
},
)
return devices
}
func (q *qemuArchBase) appendSocket(devices []govmmQemu.Device, socket Socket) []govmmQemu.Device {
devID := socket.ID
if len(devID) > maxDevIDSize {
devID = devID[:maxDevIDSize]
}
devices = append(devices,
govmmQemu.CharDevice{
Driver: govmmQemu.VirtioSerialPort,
Backend: govmmQemu.Socket,
DeviceID: socket.DeviceID,
ID: devID,
Path: socket.HostPath,
Name: socket.Name,
},
)
return devices
}
func (q *qemuArchBase) appendVSockPCI(devices []govmmQemu.Device, vsock kataVSOCK) []govmmQemu.Device {
devices = append(devices,
govmmQemu.VSOCKDevice{
ID: fmt.Sprintf("vsock-%d", vsock.contextID),
ContextID: vsock.contextID,
VHostFD: vsock.vhostFd,
DisableModern: q.nestedRun,
},
)
return devices
}
func networkModelToQemuType(model NetInterworkingModel) govmmQemu.NetDeviceType {
switch model {
case NetXConnectBridgedModel:
return govmmQemu.MACVTAP //TODO: We should rename MACVTAP to .NET_FD
case NetXConnectMacVtapModel:
return govmmQemu.MACVTAP
//case ModelEnlightened:
// Here the Network plugin will create a VM native interface
// which could be MacVtap, IpVtap, SRIOV, veth-tap, vhost-user
// In these cases we will determine the interface type here
// and pass in the native interface through
default:
//TAP should work for most other cases
return govmmQemu.TAP
}
}
func (q *qemuArchBase) appendNetwork(devices []govmmQemu.Device, endpoint Endpoint) []govmmQemu.Device {
switch ep := endpoint.(type) {
case *VethEndpoint, *BridgedMacvlanEndpoint:
netPair := ep.NetworkPair()
devices = append(devices,
govmmQemu.NetDevice{
Type: networkModelToQemuType(netPair.NetInterworkingModel),
Driver: govmmQemu.VirtioNetPCI,
ID: fmt.Sprintf("network-%d", q.networkIndex),
IFName: netPair.TAPIface.Name,
MACAddress: netPair.TAPIface.HardAddr,
DownScript: "no",
Script: "no",
VHost: q.vhost,
DisableModern: q.nestedRun,
FDs: netPair.VMFds,
VhostFDs: netPair.VhostFds,
},
)
q.networkIndex++
case *MacvtapEndpoint:
devices = append(devices,
govmmQemu.NetDevice{
Type: govmmQemu.MACVTAP,
Driver: govmmQemu.VirtioNetPCI,
ID: fmt.Sprintf("network-%d", q.networkIndex),
IFName: ep.Name(),
MACAddress: ep.HardwareAddr(),
DownScript: "no",
Script: "no",
VHost: q.vhost,
DisableModern: q.nestedRun,
FDs: ep.VMFds,
VhostFDs: ep.VhostFds,
},
)
q.networkIndex++
}
return devices
}
func (q *qemuArchBase) appendBlockDevice(devices []govmmQemu.Device, drive config.BlockDrive) []govmmQemu.Device {
if drive.File == "" || drive.ID == "" || drive.Format == "" {
return devices
}
if len(drive.ID) > maxDevIDSize {
drive.ID = drive.ID[:maxDevIDSize]
}
devices = append(devices,
govmmQemu.BlockDevice{
Driver: govmmQemu.VirtioBlock,
ID: drive.ID,
File: drive.File,
AIO: govmmQemu.Threads,
Format: govmmQemu.BlockDeviceFormat(drive.Format),
Interface: "none",
DisableModern: q.nestedRun,
},
)
return devices
}
func (q *qemuArchBase) appendVhostUserDevice(devices []govmmQemu.Device, attr config.VhostUserDeviceAttrs) []govmmQemu.Device {
qemuVhostUserDevice := govmmQemu.VhostUserDevice{}
switch attr.Type {
case config.VhostUserNet:
qemuVhostUserDevice.TypeDevID = utils.MakeNameID("net", attr.DevID, maxDevIDSize)
qemuVhostUserDevice.Address = attr.MacAddress
case config.VhostUserSCSI:
qemuVhostUserDevice.TypeDevID = utils.MakeNameID("scsi", attr.DevID, maxDevIDSize)
case config.VhostUserBlk:
}
qemuVhostUserDevice.VhostUserType = govmmQemu.DeviceDriver(attr.Type)
qemuVhostUserDevice.SocketPath = attr.SocketPath
qemuVhostUserDevice.CharDevID = utils.MakeNameID("char", attr.DevID, maxDevIDSize)
devices = append(devices, qemuVhostUserDevice)
return devices
}
func (q *qemuArchBase) appendVFIODevice(devices []govmmQemu.Device, vfioDev config.VFIODev) []govmmQemu.Device {
if vfioDev.BDF == "" {
return devices
}
devices = append(devices,
govmmQemu.VFIODevice{
BDF: vfioDev.BDF,
},
)
return devices
}
func (q *qemuArchBase) appendRNGDevice(devices []govmmQemu.Device, rngDev config.RNGDev) []govmmQemu.Device {
devices = append(devices,
govmmQemu.RngDevice{
ID: rngDev.ID,
Filename: rngDev.Filename,
},
)
return devices
}
func (q *qemuArchBase) handleImagePath(config HypervisorConfig) {
if config.ImagePath != "" {
q.kernelParams = append(q.kernelParams, kernelRootParams...)
q.kernelParamsNonDebug = append(q.kernelParamsNonDebug, kernelParamsSystemdNonDebug...)
q.kernelParamsDebug = append(q.kernelParamsDebug, kernelParamsSystemdDebug...)
}
}