Files
kata-containers/src/runtime/pkg/katautils/config.go
Eric Ernst 8cde54131a runtime: introduce static sandbox resource management
There are software and hardware architectures which do not support
dynamically adjusting the CPU and memory resources associated with a
sandbox. For these, today, they rely on "default CPU" and "default
memory" configuration options for the runtime, either set by annotation
or by the configuration toml on disk.

In the case of a single container (launched by ctr, or something like
"docker run"), we could allow for sizing the VM correctly, since all of
the information is already available to us at creation time.

In the sandbox / pod container case, it is possible for the upper layer
container runtime (ie, containerd or crio) could send a specific
annotation indicating the total workload resource requirements
associated with the sandbox creation request.

In the case of sizing information not being provided, we will follow
same behavior as today: start the VM with (just) the default CPU/memory.

If this information is provided, we'll track this as Workload specific
resources, and track default sizing information as Base resources. We
will update the hypervisor configuration to utilize Base+Workload
resources, thus starting the VM with the appropriate amount of CPU and
memory.

In this scenario (we start the VM with the "right" amount of
CPU/Memory), we do not want to update the VM resources when containers
are added, or adjusted in size.

This functionality is introduced behind a configuration flag,
`static_sandbox_resource_mgmt`. This is defaulted to false for all
configurations except Firecracker, which is set to true.

This'll greatly improve UX for folks who are utilizing
Kata with a VMM or hardware architecture that doesn't support hotplug.

Note, users will still be unable to do in place vertical pod autoscaling
or other dynamic container/pod sizing with this enabled.

Fixes: #3264

Signed-off-by: Eric Ernst <eric_ernst@apple.com>
2022-01-26 09:04:38 -08:00

1366 lines
38 KiB
Go

// Copyright (c) 2018-2021 Intel Corporation
// Copyright (c) 2018 HyperHQ Inc.
//
// SPDX-License-Identifier: Apache-2.0
//
package katautils
import (
"errors"
"fmt"
"os"
"path/filepath"
goruntime "runtime"
"strings"
"github.com/BurntSushi/toml"
govmmQemu "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm/qemu"
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace"
"github.com/kata-containers/kata-containers/src/runtime/pkg/oci"
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/config"
exp "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/experimental"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils"
"github.com/sirupsen/logrus"
)
const (
defaultHypervisor = vc.QemuHypervisor
)
// The TOML configuration file contains a number of sections (or
// tables). The names of these tables are in dotted ("nested table")
// form:
//
// [<component>.<type>]
//
// The components are hypervisor, and agent. For example,
//
// [agent.kata]
//
// The currently supported types are listed below:
const (
// supported hypervisor component types
firecrackerHypervisorTableType = "firecracker"
clhHypervisorTableType = "clh"
qemuHypervisorTableType = "qemu"
acrnHypervisorTableType = "acrn"
// the maximum amount of PCI bridges that can be cold plugged in a VM
maxPCIBridges uint32 = 5
)
type tomlConfig struct {
Hypervisor map[string]hypervisor
Agent map[string]agent
Runtime runtime
Image image
Factory factory
}
type image struct {
Provision string `toml:"provision"`
ServiceOffload bool `toml:"service_offload"`
}
type factory struct {
TemplatePath string `toml:"template_path"`
VMCacheEndpoint string `toml:"vm_cache_endpoint"`
VMCacheNumber uint `toml:"vm_cache_number"`
Template bool `toml:"enable_template"`
}
type hypervisor struct {
Path string `toml:"path"`
JailerPath string `toml:"jailer_path"`
Kernel string `toml:"kernel"`
CtlPath string `toml:"ctlpath"`
Initrd string `toml:"initrd"`
Image string `toml:"image"`
Firmware string `toml:"firmware"`
MachineAccelerators string `toml:"machine_accelerators"`
CPUFeatures string `toml:"cpu_features"`
KernelParams string `toml:"kernel_params"`
MachineType string `toml:"machine_type"`
BlockDeviceDriver string `toml:"block_device_driver"`
EntropySource string `toml:"entropy_source"`
SharedFS string `toml:"shared_fs"`
VirtioFSDaemon string `toml:"virtio_fs_daemon"`
VirtioFSCache string `toml:"virtio_fs_cache"`
VhostUserStorePath string `toml:"vhost_user_store_path"`
FileBackedMemRootDir string `toml:"file_mem_backend"`
GuestHookPath string `toml:"guest_hook_path"`
GuestMemoryDumpPath string `toml:"guest_memory_dump_path"`
HypervisorPathList []string `toml:"valid_hypervisor_paths"`
JailerPathList []string `toml:"valid_jailer_paths"`
CtlPathList []string `toml:"valid_ctlpaths"`
VirtioFSDaemonList []string `toml:"valid_virtio_fs_daemon_paths"`
VirtioFSExtraArgs []string `toml:"virtio_fs_extra_args"`
PFlashList []string `toml:"pflashes"`
VhostUserStorePathList []string `toml:"valid_vhost_user_store_paths"`
FileBackedMemRootList []string `toml:"valid_file_mem_backends"`
EntropySourceList []string `toml:"valid_entropy_sources"`
EnableAnnotations []string `toml:"enable_annotations"`
RxRateLimiterMaxRate uint64 `toml:"rx_rate_limiter_max_rate"`
TxRateLimiterMaxRate uint64 `toml:"tx_rate_limiter_max_rate"`
MemOffset uint64 `toml:"memory_offset"`
VirtioFSCacheSize uint32 `toml:"virtio_fs_cache_size"`
DefaultMaxVCPUs uint32 `toml:"default_maxvcpus"`
MemorySize uint32 `toml:"default_memory"`
MemSlots uint32 `toml:"memory_slots"`
DefaultBridges uint32 `toml:"default_bridges"`
Msize9p uint32 `toml:"msize_9p"`
PCIeRootPort uint32 `toml:"pcie_root_port"`
NumVCPUs int32 `toml:"default_vcpus"`
BlockDeviceCacheSet bool `toml:"block_device_cache_set"`
BlockDeviceCacheDirect bool `toml:"block_device_cache_direct"`
BlockDeviceCacheNoflush bool `toml:"block_device_cache_noflush"`
EnableVhostUserStore bool `toml:"enable_vhost_user_store"`
DisableBlockDeviceUse bool `toml:"disable_block_device_use"`
MemPrealloc bool `toml:"enable_mem_prealloc"`
HugePages bool `toml:"enable_hugepages"`
VirtioMem bool `toml:"enable_virtio_mem"`
IOMMU bool `toml:"enable_iommu"`
IOMMUPlatform bool `toml:"enable_iommu_platform"`
Debug bool `toml:"enable_debug"`
DisableNestingChecks bool `toml:"disable_nesting_checks"`
EnableIOThreads bool `toml:"enable_iothreads"`
DisableImageNvdimm bool `toml:"disable_image_nvdimm"`
HotplugVFIOOnRootBus bool `toml:"hotplug_vfio_on_root_bus"`
DisableVhostNet bool `toml:"disable_vhost_net"`
GuestMemoryDumpPaging bool `toml:"guest_memory_dump_paging"`
ConfidentialGuest bool `toml:"confidential_guest"`
GuestSwap bool `toml:"enable_guest_swap"`
Rootless bool `toml:"rootless"`
DisableSeccomp bool `toml:"disable_seccomp"`
}
type runtime struct {
InterNetworkModel string `toml:"internetworking_model"`
JaegerEndpoint string `toml:"jaeger_endpoint"`
JaegerUser string `toml:"jaeger_user"`
JaegerPassword string `toml:"jaeger_password"`
VfioMode string `toml:"vfio_mode"`
SandboxBindMounts []string `toml:"sandbox_bind_mounts"`
Experimental []string `toml:"experimental"`
Debug bool `toml:"enable_debug"`
Tracing bool `toml:"enable_tracing"`
DisableNewNetNs bool `toml:"disable_new_netns"`
DisableGuestSeccomp bool `toml:"disable_guest_seccomp"`
SandboxCgroupOnly bool `toml:"sandbox_cgroup_only"`
StaticSandboxResourceMgmt bool `toml:"static_sandbox_resource_mgmt"`
EnablePprof bool `toml:"enable_pprof"`
}
type agent struct {
KernelModules []string `toml:"kernel_modules"`
Debug bool `toml:"enable_debug"`
Tracing bool `toml:"enable_tracing"`
DebugConsoleEnabled bool `toml:"debug_console_enabled"`
DialTimeout uint32 `toml:"dial_timeout"`
}
func (h hypervisor) path() (string, error) {
p := h.Path
if h.Path == "" {
p = defaultHypervisorPath
}
return ResolvePath(p)
}
func (h hypervisor) ctlpath() (string, error) {
p := h.CtlPath
if h.CtlPath == "" {
p = defaultHypervisorCtlPath
}
return ResolvePath(p)
}
func (h hypervisor) jailerPath() (string, error) {
p := h.JailerPath
if h.JailerPath == "" {
return "", nil
}
return ResolvePath(p)
}
func (h hypervisor) kernel() (string, error) {
p := h.Kernel
if p == "" {
p = defaultKernelPath
}
return ResolvePath(p)
}
func (h hypervisor) initrd() (string, error) {
p := h.Initrd
if p == "" {
return "", errors.New("initrd is not set")
}
return ResolvePath(p)
}
func (h hypervisor) image() (string, error) {
p := h.Image
if p == "" {
return "", errors.New("image is not set")
}
return ResolvePath(p)
}
func (h hypervisor) firmware() (string, error) {
p := h.Firmware
if p == "" {
if defaultFirmwarePath == "" {
return "", nil
}
p = defaultFirmwarePath
}
return ResolvePath(p)
}
func (h hypervisor) PFlash() ([]string, error) {
pflashes := h.PFlashList
if len(pflashes) == 0 {
return []string{}, nil
}
for _, pflash := range pflashes {
_, err := ResolvePath(pflash)
if err != nil {
return []string{}, fmt.Errorf("failed to resolve path: %s: %v", pflash, err)
}
}
return pflashes, nil
}
func (h hypervisor) machineAccelerators() string {
var machineAccelerators string
for _, accelerator := range strings.Split(h.MachineAccelerators, ",") {
if accelerator != "" {
machineAccelerators += strings.TrimSpace(accelerator) + ","
}
}
machineAccelerators = strings.Trim(machineAccelerators, ",")
return machineAccelerators
}
func (h hypervisor) cpuFeatures() string {
var cpuFeatures string
for _, feature := range strings.Split(h.CPUFeatures, ",") {
if feature != "" {
cpuFeatures += strings.TrimSpace(feature) + ","
}
}
cpuFeatures = strings.Trim(cpuFeatures, ",")
return cpuFeatures
}
func (h hypervisor) kernelParams() string {
if h.KernelParams == "" {
return defaultKernelParams
}
return h.KernelParams
}
func (h hypervisor) machineType() string {
if h.MachineType == "" {
return defaultMachineType
}
return h.MachineType
}
func (h hypervisor) GetEntropySource() string {
if h.EntropySource == "" {
return defaultEntropySource
}
return h.EntropySource
}
// Current cpu number should not larger than defaultMaxVCPUs()
func getCurrentCpuNum() uint32 {
var cpu uint32
h := hypervisor{}
cpu = uint32(goruntime.NumCPU())
if cpu > h.defaultMaxVCPUs() {
cpu = h.defaultMaxVCPUs()
}
return cpu
}
func (h hypervisor) defaultVCPUs() uint32 {
numCPUs := getCurrentCpuNum()
if h.NumVCPUs < 0 || h.NumVCPUs > int32(numCPUs) {
return numCPUs
}
if h.NumVCPUs == 0 { // or unspecified
return defaultVCPUCount
}
return uint32(h.NumVCPUs)
}
func (h hypervisor) defaultMaxVCPUs() uint32 {
numcpus := uint32(goruntime.NumCPU())
maxvcpus := vc.MaxQemuVCPUs()
reqVCPUs := h.DefaultMaxVCPUs
//don't exceed the number of physical CPUs. If a default is not provided, use the
// numbers of physical CPUs
if reqVCPUs >= numcpus || reqVCPUs == 0 {
reqVCPUs = numcpus
}
// Don't exceed the maximum number of vCPUs supported by hypervisor
if reqVCPUs > maxvcpus {
return maxvcpus
}
return reqVCPUs
}
func (h hypervisor) defaultMemSz() uint32 {
if h.MemorySize < vc.MinHypervisorMemory {
return defaultMemSize // MiB
}
return h.MemorySize
}
func (h hypervisor) defaultMemSlots() uint32 {
slots := h.MemSlots
if slots == 0 {
slots = defaultMemSlots
}
return slots
}
func (h hypervisor) defaultMemOffset() uint64 {
offset := h.MemOffset
if offset == 0 {
offset = defaultMemOffset
}
return offset
}
func (h hypervisor) defaultBridges() uint32 {
if h.DefaultBridges == 0 {
return defaultBridgesCount
}
if h.DefaultBridges > maxPCIBridges {
return maxPCIBridges
}
return h.DefaultBridges
}
func (h hypervisor) defaultVirtioFSCache() string {
if h.VirtioFSCache == "" {
return defaultVirtioFSCacheMode
}
return h.VirtioFSCache
}
func (h hypervisor) blockDeviceDriver() (string, error) {
supportedBlockDrivers := []string{config.VirtioSCSI, config.VirtioBlock, config.VirtioMmio, config.Nvdimm, config.VirtioBlockCCW}
if h.BlockDeviceDriver == "" {
return defaultBlockDeviceDriver, nil
}
for _, b := range supportedBlockDrivers {
if b == h.BlockDeviceDriver {
return h.BlockDeviceDriver, nil
}
}
return "", fmt.Errorf("Invalid hypervisor block storage driver %v specified (supported drivers: %v)", h.BlockDeviceDriver, supportedBlockDrivers)
}
func (h hypervisor) sharedFS() (string, error) {
supportedSharedFS := []string{config.Virtio9P, config.VirtioFS}
if h.SharedFS == "" {
return config.Virtio9P, nil
}
for _, fs := range supportedSharedFS {
if fs == h.SharedFS {
return h.SharedFS, nil
}
}
return "", fmt.Errorf("Invalid hypervisor shared file system %v specified (supported file systems: %v)", h.SharedFS, supportedSharedFS)
}
func (h hypervisor) msize9p() uint32 {
if h.Msize9p == 0 {
return defaultMsize9p
}
return h.Msize9p
}
func (h hypervisor) guestHookPath() string {
if h.GuestHookPath == "" {
return defaultGuestHookPath
}
return h.GuestHookPath
}
func (h hypervisor) vhostUserStorePath() string {
if h.VhostUserStorePath == "" {
return defaultVhostUserStorePath
}
return h.VhostUserStorePath
}
func (h hypervisor) getInitrdAndImage() (initrd string, image string, err error) {
initrd, errInitrd := h.initrd()
image, errImage := h.image()
if image != "" && initrd != "" {
return "", "", errors.New("having both an image and an initrd defined in the configuration file is not supported")
}
if errInitrd != nil && errImage != nil {
return "", "", fmt.Errorf("Either initrd or image must be set to a valid path (initrd: %v) (image: %v)", errInitrd, errImage)
}
return
}
func (h hypervisor) getRxRateLimiterCfg() uint64 {
return h.RxRateLimiterMaxRate
}
func (h hypervisor) getTxRateLimiterCfg() uint64 {
return h.TxRateLimiterMaxRate
}
func (h hypervisor) getIOMMUPlatform() bool {
if h.IOMMUPlatform {
kataUtilsLogger.Info("IOMMUPlatform is enabled by default.")
} else {
kataUtilsLogger.Info("IOMMUPlatform is disabled by default.")
}
return h.IOMMUPlatform
}
func (a agent) debugConsoleEnabled() bool {
return a.DebugConsoleEnabled
}
func (a agent) dialTimout() uint32 {
return a.DialTimeout
}
func (a agent) debug() bool {
return a.Debug
}
func (a agent) trace() bool {
return a.Tracing
}
func (a agent) kernelModules() []string {
return a.KernelModules
}
func newFirecrackerHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
hypervisor, err := h.path()
if err != nil {
return vc.HypervisorConfig{}, err
}
jailer, err := h.jailerPath()
if err != nil {
return vc.HypervisorConfig{}, err
}
kernel, err := h.kernel()
if err != nil {
return vc.HypervisorConfig{}, err
}
initrd, image, err := h.getInitrdAndImage()
if err != nil {
return vc.HypervisorConfig{}, err
}
firmware, err := h.firmware()
if err != nil {
return vc.HypervisorConfig{}, err
}
kernelParams := h.kernelParams()
blockDriver, err := h.blockDeviceDriver()
if err != nil {
return vc.HypervisorConfig{}, err
}
rxRateLimiterMaxRate := h.getRxRateLimiterCfg()
txRateLimiterMaxRate := h.getTxRateLimiterCfg()
return vc.HypervisorConfig{
HypervisorPath: hypervisor,
HypervisorPathList: h.HypervisorPathList,
JailerPath: jailer,
JailerPathList: h.JailerPathList,
KernelPath: kernel,
InitrdPath: initrd,
ImagePath: image,
FirmwarePath: firmware,
KernelParams: vc.DeserializeParams(strings.Fields(kernelParams)),
NumVCPUs: h.defaultVCPUs(),
DefaultMaxVCPUs: h.defaultMaxVCPUs(),
MemorySize: h.defaultMemSz(),
MemSlots: h.defaultMemSlots(),
EntropySource: h.GetEntropySource(),
EntropySourceList: h.EntropySourceList,
DefaultBridges: h.defaultBridges(),
DisableBlockDeviceUse: h.DisableBlockDeviceUse,
HugePages: h.HugePages,
Debug: h.Debug,
DisableNestingChecks: h.DisableNestingChecks,
BlockDeviceDriver: blockDriver,
EnableIOThreads: h.EnableIOThreads,
DisableVhostNet: true, // vhost-net backend is not supported in Firecracker
GuestHookPath: h.guestHookPath(),
RxRateLimiterMaxRate: rxRateLimiterMaxRate,
TxRateLimiterMaxRate: txRateLimiterMaxRate,
EnableAnnotations: h.EnableAnnotations,
}, nil
}
func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
hypervisor, err := h.path()
if err != nil {
return vc.HypervisorConfig{}, err
}
kernel, err := h.kernel()
if err != nil {
return vc.HypervisorConfig{}, err
}
initrd, image, err := h.getInitrdAndImage()
if err != nil {
return vc.HypervisorConfig{}, err
}
pflashes, err := h.PFlash()
if err != nil {
return vc.HypervisorConfig{}, err
}
if image != "" && initrd != "" {
return vc.HypervisorConfig{},
errors.New("having both an image and an initrd defined in the configuration file is not supported")
}
if image == "" && initrd == "" {
return vc.HypervisorConfig{},
errors.New("either image or initrd must be defined in the configuration file")
}
firmware, err := h.firmware()
if err != nil {
return vc.HypervisorConfig{}, err
}
machineAccelerators := h.machineAccelerators()
cpuFeatures := h.cpuFeatures()
kernelParams := h.kernelParams()
machineType := h.machineType()
// The "microvm" machine type doesn't support NVDIMM so override the
// config setting to explicitly disable it (i.e. don't require the
// user to add 'disable_image_nvdimm = true' in the .toml file).
if machineType == govmmQemu.MachineTypeMicrovm && !h.DisableImageNvdimm {
h.DisableImageNvdimm = true
kataUtilsLogger.Info("Setting 'disable_image_nvdimm = true' as microvm does not support NVDIMM")
}
blockDriver, err := h.blockDeviceDriver()
if err != nil {
return vc.HypervisorConfig{}, err
}
sharedFS, err := h.sharedFS()
if err != nil {
return vc.HypervisorConfig{}, err
}
if sharedFS == config.VirtioFS && h.VirtioFSDaemon == "" {
return vc.HypervisorConfig{},
errors.New("cannot enable virtio-fs without daemon path in configuration file")
}
if vSock, err := utils.SupportsVsocks(); !vSock {
return vc.HypervisorConfig{}, err
}
rxRateLimiterMaxRate := h.getRxRateLimiterCfg()
txRateLimiterMaxRate := h.getTxRateLimiterCfg()
return vc.HypervisorConfig{
HypervisorPath: hypervisor,
HypervisorPathList: h.HypervisorPathList,
KernelPath: kernel,
InitrdPath: initrd,
ImagePath: image,
FirmwarePath: firmware,
PFlash: pflashes,
MachineAccelerators: machineAccelerators,
CPUFeatures: cpuFeatures,
KernelParams: vc.DeserializeParams(strings.Fields(kernelParams)),
HypervisorMachineType: machineType,
NumVCPUs: h.defaultVCPUs(),
DefaultMaxVCPUs: h.defaultMaxVCPUs(),
MemorySize: h.defaultMemSz(),
MemSlots: h.defaultMemSlots(),
MemOffset: h.defaultMemOffset(),
VirtioMem: h.VirtioMem,
EntropySource: h.GetEntropySource(),
EntropySourceList: h.EntropySourceList,
DefaultBridges: h.defaultBridges(),
DisableBlockDeviceUse: h.DisableBlockDeviceUse,
SharedFS: sharedFS,
VirtioFSDaemon: h.VirtioFSDaemon,
VirtioFSDaemonList: h.VirtioFSDaemonList,
VirtioFSCacheSize: h.VirtioFSCacheSize,
VirtioFSCache: h.defaultVirtioFSCache(),
VirtioFSExtraArgs: h.VirtioFSExtraArgs,
MemPrealloc: h.MemPrealloc,
HugePages: h.HugePages,
IOMMU: h.IOMMU,
IOMMUPlatform: h.getIOMMUPlatform(),
FileBackedMemRootDir: h.FileBackedMemRootDir,
FileBackedMemRootList: h.FileBackedMemRootList,
Debug: h.Debug,
DisableNestingChecks: h.DisableNestingChecks,
BlockDeviceDriver: blockDriver,
BlockDeviceCacheSet: h.BlockDeviceCacheSet,
BlockDeviceCacheDirect: h.BlockDeviceCacheDirect,
BlockDeviceCacheNoflush: h.BlockDeviceCacheNoflush,
EnableIOThreads: h.EnableIOThreads,
Msize9p: h.msize9p(),
DisableImageNvdimm: h.DisableImageNvdimm,
HotplugVFIOOnRootBus: h.HotplugVFIOOnRootBus,
PCIeRootPort: h.PCIeRootPort,
DisableVhostNet: h.DisableVhostNet,
EnableVhostUserStore: h.EnableVhostUserStore,
VhostUserStorePath: h.vhostUserStorePath(),
VhostUserStorePathList: h.VhostUserStorePathList,
GuestHookPath: h.guestHookPath(),
RxRateLimiterMaxRate: rxRateLimiterMaxRate,
TxRateLimiterMaxRate: txRateLimiterMaxRate,
EnableAnnotations: h.EnableAnnotations,
GuestMemoryDumpPath: h.GuestMemoryDumpPath,
GuestMemoryDumpPaging: h.GuestMemoryDumpPaging,
ConfidentialGuest: h.ConfidentialGuest,
GuestSwap: h.GuestSwap,
Rootless: h.Rootless,
}, nil
}
func newAcrnHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
hypervisor, err := h.path()
if err != nil {
return vc.HypervisorConfig{}, err
}
hypervisorctl, err := h.ctlpath()
if err != nil {
return vc.HypervisorConfig{}, err
}
kernel, err := h.kernel()
if err != nil {
return vc.HypervisorConfig{}, err
}
image, err := h.image()
if err != nil {
return vc.HypervisorConfig{}, err
}
if image == "" {
return vc.HypervisorConfig{},
errors.New("image must be defined in the configuration file")
}
firmware, err := h.firmware()
if err != nil {
return vc.HypervisorConfig{}, err
}
kernelParams := h.kernelParams()
blockDriver, err := h.blockDeviceDriver()
if err != nil {
return vc.HypervisorConfig{}, err
}
return vc.HypervisorConfig{
HypervisorPath: hypervisor,
HypervisorPathList: h.HypervisorPathList,
KernelPath: kernel,
ImagePath: image,
HypervisorCtlPath: hypervisorctl,
HypervisorCtlPathList: h.CtlPathList,
FirmwarePath: firmware,
KernelParams: vc.DeserializeParams(strings.Fields(kernelParams)),
NumVCPUs: h.defaultVCPUs(),
DefaultMaxVCPUs: h.defaultMaxVCPUs(),
MemorySize: h.defaultMemSz(),
MemSlots: h.defaultMemSlots(),
EntropySource: h.GetEntropySource(),
EntropySourceList: h.EntropySourceList,
DefaultBridges: h.defaultBridges(),
HugePages: h.HugePages,
Debug: h.Debug,
DisableNestingChecks: h.DisableNestingChecks,
BlockDeviceDriver: blockDriver,
DisableVhostNet: h.DisableVhostNet,
GuestHookPath: h.guestHookPath(),
EnableAnnotations: h.EnableAnnotations,
}, nil
}
func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
hypervisor, err := h.path()
if err != nil {
return vc.HypervisorConfig{}, err
}
kernel, err := h.kernel()
if err != nil {
return vc.HypervisorConfig{}, err
}
initrd, image, err := h.getInitrdAndImage()
if err != nil {
return vc.HypervisorConfig{}, err
}
if initrd != "" {
return vc.HypervisorConfig{},
errors.New("having an initrd defined in the configuration file is not supported")
}
if image == "" {
return vc.HypervisorConfig{},
errors.New("image must be defined in the configuration file")
}
firmware, err := h.firmware()
if err != nil {
return vc.HypervisorConfig{}, err
}
machineAccelerators := h.machineAccelerators()
kernelParams := h.kernelParams()
machineType := h.machineType()
blockDriver, err := h.blockDeviceDriver()
if err != nil {
return vc.HypervisorConfig{}, err
}
sharedFS := config.VirtioFS
if h.VirtioFSDaemon == "" {
return vc.HypervisorConfig{},
errors.New("virtio-fs daemon path is missing in configuration file")
}
return vc.HypervisorConfig{
HypervisorPath: hypervisor,
HypervisorPathList: h.HypervisorPathList,
KernelPath: kernel,
InitrdPath: initrd,
ImagePath: image,
FirmwarePath: firmware,
MachineAccelerators: machineAccelerators,
KernelParams: vc.DeserializeParams(strings.Fields(kernelParams)),
HypervisorMachineType: machineType,
NumVCPUs: h.defaultVCPUs(),
DefaultMaxVCPUs: h.defaultMaxVCPUs(),
MemorySize: h.defaultMemSz(),
MemSlots: h.defaultMemSlots(),
MemOffset: h.defaultMemOffset(),
VirtioMem: h.VirtioMem,
EntropySource: h.GetEntropySource(),
EntropySourceList: h.EntropySourceList,
DefaultBridges: h.defaultBridges(),
DisableBlockDeviceUse: h.DisableBlockDeviceUse,
SharedFS: sharedFS,
VirtioFSDaemon: h.VirtioFSDaemon,
VirtioFSDaemonList: h.VirtioFSDaemonList,
VirtioFSCacheSize: h.VirtioFSCacheSize,
VirtioFSCache: h.VirtioFSCache,
MemPrealloc: h.MemPrealloc,
HugePages: h.HugePages,
FileBackedMemRootDir: h.FileBackedMemRootDir,
FileBackedMemRootList: h.FileBackedMemRootList,
Debug: h.Debug,
DisableNestingChecks: h.DisableNestingChecks,
BlockDeviceDriver: blockDriver,
BlockDeviceCacheSet: h.BlockDeviceCacheSet,
BlockDeviceCacheDirect: h.BlockDeviceCacheDirect,
BlockDeviceCacheNoflush: h.BlockDeviceCacheNoflush,
EnableIOThreads: h.EnableIOThreads,
Msize9p: h.msize9p(),
HotplugVFIOOnRootBus: h.HotplugVFIOOnRootBus,
PCIeRootPort: h.PCIeRootPort,
DisableVhostNet: true,
GuestHookPath: h.guestHookPath(),
VirtioFSExtraArgs: h.VirtioFSExtraArgs,
SGXEPCSize: defaultSGXEPCSize,
EnableAnnotations: h.EnableAnnotations,
DisableSeccomp: h.DisableSeccomp,
}, nil
}
func newFactoryConfig(f factory) (oci.FactoryConfig, error) {
if f.TemplatePath == "" {
f.TemplatePath = defaultTemplatePath
}
if f.VMCacheEndpoint == "" {
f.VMCacheEndpoint = defaultVMCacheEndpoint
}
return oci.FactoryConfig{
Template: f.Template,
TemplatePath: f.TemplatePath,
VMCacheNumber: f.VMCacheNumber,
VMCacheEndpoint: f.VMCacheEndpoint,
}, nil
}
func updateRuntimeConfigHypervisor(configPath string, tomlConf tomlConfig, config *oci.RuntimeConfig) error {
for k, hypervisor := range tomlConf.Hypervisor {
var err error
var hConfig vc.HypervisorConfig
switch k {
case firecrackerHypervisorTableType:
config.HypervisorType = vc.FirecrackerHypervisor
hConfig, err = newFirecrackerHypervisorConfig(hypervisor)
case qemuHypervisorTableType:
config.HypervisorType = vc.QemuHypervisor
hConfig, err = newQemuHypervisorConfig(hypervisor)
case acrnHypervisorTableType:
config.HypervisorType = vc.AcrnHypervisor
hConfig, err = newAcrnHypervisorConfig(hypervisor)
case clhHypervisorTableType:
config.HypervisorType = vc.ClhHypervisor
hConfig, err = newClhHypervisorConfig(hypervisor)
}
if err != nil {
return fmt.Errorf("%v: %v", configPath, err)
}
config.HypervisorConfig = hConfig
}
return nil
}
func updateRuntimeConfigAgent(configPath string, tomlConf tomlConfig, config *oci.RuntimeConfig) error {
for _, agent := range tomlConf.Agent {
config.AgentConfig = vc.KataAgentConfig{
LongLiveConn: true,
Debug: agent.debug(),
Trace: agent.trace(),
KernelModules: agent.kernelModules(),
EnableDebugConsole: agent.debugConsoleEnabled(),
DialTimeout: agent.dialTimout(),
}
}
return nil
}
// SetKernelParams adds the user-specified kernel parameters (from the
// configuration file) to the defaults so that the former take priority.
func SetKernelParams(runtimeConfig *oci.RuntimeConfig) error {
defaultKernelParams := GetKernelParamsFunc(needSystemd(runtimeConfig.HypervisorConfig), runtimeConfig.Trace)
if runtimeConfig.HypervisorConfig.Debug {
strParams := vc.SerializeParams(defaultKernelParams, "=")
formatted := strings.Join(strParams, " ")
kataUtilsLogger.WithField("default-kernel-parameters", formatted).Debug()
}
// retrieve the parameters specified in the config file
userKernelParams := runtimeConfig.HypervisorConfig.KernelParams
// reset
runtimeConfig.HypervisorConfig.KernelParams = []vc.Param{}
// first, add default values
for _, p := range defaultKernelParams {
if err := runtimeConfig.AddKernelParam(p); err != nil {
return err
}
}
// set the scsi scan mode to none for virtio-scsi
if runtimeConfig.HypervisorConfig.BlockDeviceDriver == config.VirtioSCSI {
p := vc.Param{
Key: "scsi_mod.scan",
Value: "none",
}
if err := runtimeConfig.AddKernelParam(p); err != nil {
return err
}
}
// next, check for agent specific kernel params
params := vc.KataAgentKernelParams(runtimeConfig.AgentConfig)
for _, p := range params {
if err := runtimeConfig.AddKernelParam(p); err != nil {
return err
}
}
// now re-add the user-specified values so that they take priority.
for _, p := range userKernelParams {
if err := runtimeConfig.AddKernelParam(p); err != nil {
return err
}
}
return nil
}
func updateRuntimeConfig(configPath string, tomlConf tomlConfig, config *oci.RuntimeConfig) error {
if err := updateRuntimeConfigHypervisor(configPath, tomlConf, config); err != nil {
return err
}
if err := updateRuntimeConfigAgent(configPath, tomlConf, config); err != nil {
return err
}
fConfig, err := newFactoryConfig(tomlConf.Factory)
if err != nil {
return fmt.Errorf("%v: %v", configPath, err)
}
config.FactoryConfig = fConfig
err = SetKernelParams(config)
if err != nil {
return err
}
return nil
}
func GetDefaultHypervisorConfig() vc.HypervisorConfig {
return vc.HypervisorConfig{
HypervisorPath: defaultHypervisorPath,
JailerPath: defaultJailerPath,
KernelPath: defaultKernelPath,
ImagePath: defaultImagePath,
InitrdPath: defaultInitrdPath,
FirmwarePath: defaultFirmwarePath,
MachineAccelerators: defaultMachineAccelerators,
CPUFeatures: defaultCPUFeatures,
HypervisorMachineType: defaultMachineType,
NumVCPUs: defaultVCPUCount,
DefaultMaxVCPUs: defaultMaxVCPUCount,
MemorySize: defaultMemSize,
MemOffset: defaultMemOffset,
VirtioMem: defaultVirtioMem,
DisableBlockDeviceUse: defaultDisableBlockDeviceUse,
DefaultBridges: defaultBridgesCount,
MemPrealloc: defaultEnableMemPrealloc,
HugePages: defaultEnableHugePages,
IOMMU: defaultEnableIOMMU,
IOMMUPlatform: defaultEnableIOMMUPlatform,
FileBackedMemRootDir: defaultFileBackedMemRootDir,
Debug: defaultEnableDebug,
DisableNestingChecks: defaultDisableNestingChecks,
BlockDeviceDriver: defaultBlockDeviceDriver,
BlockDeviceCacheSet: defaultBlockDeviceCacheSet,
BlockDeviceCacheDirect: defaultBlockDeviceCacheDirect,
BlockDeviceCacheNoflush: defaultBlockDeviceCacheNoflush,
EnableIOThreads: defaultEnableIOThreads,
Msize9p: defaultMsize9p,
HotplugVFIOOnRootBus: defaultHotplugVFIOOnRootBus,
PCIeRootPort: defaultPCIeRootPort,
GuestHookPath: defaultGuestHookPath,
VhostUserStorePath: defaultVhostUserStorePath,
VirtioFSCache: defaultVirtioFSCacheMode,
DisableImageNvdimm: defaultDisableImageNvdimm,
RxRateLimiterMaxRate: defaultRxRateLimiterMaxRate,
TxRateLimiterMaxRate: defaultTxRateLimiterMaxRate,
SGXEPCSize: defaultSGXEPCSize,
ConfidentialGuest: defaultConfidentialGuest,
GuestSwap: defaultGuestSwap,
Rootless: defaultRootlessHypervisor,
DisableSeccomp: defaultDisableSeccomp,
}
}
func initConfig() (config oci.RuntimeConfig, err error) {
err = config.InterNetworkModel.SetModel(defaultInterNetworkingModel)
if err != nil {
return oci.RuntimeConfig{}, err
}
err = config.VfioMode.VFIOSetMode(defaultVfioMode)
if err != nil {
return oci.RuntimeConfig{}, err
}
config = oci.RuntimeConfig{
HypervisorType: defaultHypervisor,
HypervisorConfig: GetDefaultHypervisorConfig(),
AgentConfig: vc.KataAgentConfig{},
}
return config, nil
}
// LoadConfiguration loads the configuration file and converts it into a
// runtime configuration.
//
// If ignoreLogging is true, the system logger will not be initialised nor
// will this function make any log calls.
//
// All paths are resolved fully meaning if this function does not return an
// error, all paths are valid at the time of the call.
func LoadConfiguration(configPath string, ignoreLogging bool) (resolvedConfigPath string, config oci.RuntimeConfig, err error) {
config, err = initConfig()
if err != nil {
return "", oci.RuntimeConfig{}, err
}
tomlConf, resolved, err := decodeConfig(configPath)
if err != nil {
return "", oci.RuntimeConfig{}, err
}
config.Debug = tomlConf.Runtime.Debug
if !tomlConf.Runtime.Debug {
// If debug is not required, switch back to the original
// default log priority, otherwise continue in debug mode.
kataUtilsLogger.Logger.Level = originalLoggerLevel
}
config.Trace = tomlConf.Runtime.Tracing
katatrace.SetTracing(config.Trace)
if tomlConf.Runtime.InterNetworkModel != "" {
err = config.InterNetworkModel.SetModel(tomlConf.Runtime.InterNetworkModel)
if err != nil {
return "", config, err
}
}
if tomlConf.Runtime.VfioMode != "" {
err = config.VfioMode.VFIOSetMode(tomlConf.Runtime.VfioMode)
if err != nil {
return "", config, err
}
}
if !ignoreLogging {
err := handleSystemLog("", "")
if err != nil {
return "", config, err
}
kataUtilsLogger.WithFields(
logrus.Fields{
"format": "TOML",
"file": resolved,
}).Info("loaded configuration")
}
if err := updateRuntimeConfig(resolved, tomlConf, &config); err != nil {
return "", config, err
}
config.DisableGuestSeccomp = tomlConf.Runtime.DisableGuestSeccomp
config.StaticSandboxResourceMgmt = tomlConf.Runtime.StaticSandboxResourceMgmt
config.SandboxCgroupOnly = tomlConf.Runtime.SandboxCgroupOnly
config.DisableNewNetNs = tomlConf.Runtime.DisableNewNetNs
config.EnablePprof = tomlConf.Runtime.EnablePprof
config.JaegerEndpoint = tomlConf.Runtime.JaegerEndpoint
config.JaegerUser = tomlConf.Runtime.JaegerUser
config.JaegerPassword = tomlConf.Runtime.JaegerPassword
for _, f := range tomlConf.Runtime.Experimental {
feature := exp.Get(f)
if feature == nil {
return "", config, fmt.Errorf("Unsupported experimental feature %q", f)
}
config.Experimental = append(config.Experimental, *feature)
}
if err = validateBindMounts(tomlConf.Runtime.SandboxBindMounts); err != nil {
return "", config, err
}
config.SandboxBindMounts = tomlConf.Runtime.SandboxBindMounts
if err := checkConfig(config); err != nil {
return "", config, err
}
return resolved, config, nil
}
// Verify that bind mounts exist
func validateBindMounts(mounts []string) error {
if len(mounts) == 0 {
return nil
}
bases := make(map[string]struct{})
for _, m := range mounts {
path, err := ResolvePath(m)
if err != nil {
return fmt.Errorf("sandbox-bindmounts: Failed to resolve path: %s: %v", m, err)
}
base := filepath.Base(path)
// check to make sure the base does not already exists.
if _, ok := bases[base]; !ok {
bases[base] = struct{}{}
} else {
return fmt.Errorf("sandbox-bindmounts: File %s has base that matches already specified bindmount", path)
}
}
return nil
}
func decodeConfig(configPath string) (tomlConfig, string, error) {
var (
resolved string
tomlConf tomlConfig
err error
)
if configPath == "" {
resolved, err = getDefaultConfigFile()
} else {
resolved, err = ResolvePath(configPath)
}
if err != nil {
return tomlConf, "", fmt.Errorf("Cannot find usable config file (%v)", err)
}
configData, err := os.ReadFile(resolved)
if err != nil {
return tomlConf, resolved, err
}
_, err = toml.Decode(string(configData), &tomlConf)
if err != nil {
return tomlConf, resolved, err
}
return tomlConf, resolved, nil
}
// checkConfig checks the validity of the specified config.
func checkConfig(config oci.RuntimeConfig) error {
if err := checkNetNsConfig(config); err != nil {
return err
}
if err := checkHypervisorConfig(config.HypervisorConfig); err != nil {
return err
}
if err := checkFactoryConfig(config); err != nil {
return err
}
return nil
}
// checkNetNsConfig performs sanity checks on disable_new_netns config.
// Because it is an expert option and conflicts with some other common configs.
func checkNetNsConfig(config oci.RuntimeConfig) error {
if config.DisableNewNetNs {
if config.InterNetworkModel != vc.NetXConnectNoneModel {
return fmt.Errorf("config disable_new_netns only works with 'none' internetworking_model")
}
}
return nil
}
// checkFactoryConfig ensures the VM factory configuration is valid.
func checkFactoryConfig(config oci.RuntimeConfig) error {
if config.FactoryConfig.Template {
if config.HypervisorConfig.InitrdPath == "" {
return errors.New("Factory option enable_template requires an initrd image")
}
}
if config.FactoryConfig.VMCacheNumber > 0 {
if config.HypervisorType != vc.QemuHypervisor {
return errors.New("VM cache just support qemu")
}
}
return nil
}
// checkHypervisorConfig performs basic "sanity checks" on the hypervisor
// config.
func checkHypervisorConfig(config vc.HypervisorConfig) error {
type image struct {
path string
initrd bool
}
images := []image{
{
path: config.ImagePath,
initrd: false,
},
{
path: config.InitrdPath,
initrd: true,
},
}
memSizeMB := int64(config.MemorySize)
if memSizeMB == 0 {
return errors.New("VM memory cannot be zero")
}
mb := int64(1024 * 1024)
for _, image := range images {
if image.path == "" {
continue
}
imageSizeBytes, err := fileSize(image.path)
if err != nil {
return err
}
if imageSizeBytes == 0 {
return fmt.Errorf("image %q is empty", image.path)
}
if imageSizeBytes > mb {
imageSizeMB := imageSizeBytes / mb
msg := fmt.Sprintf("VM memory (%dMB) smaller than image %q size (%dMB)",
memSizeMB, image.path, imageSizeMB)
if imageSizeMB >= memSizeMB {
if image.initrd {
// Initrd's need to be fully read into memory
return errors.New(msg)
}
// Images do not need to be fully read
// into memory, but it would be highly
// unusual to have an image larger
// than the amount of memory assigned
// to the VM.
kataUtilsLogger.Warn(msg)
}
}
}
return nil
}
// GetDefaultConfigFilePaths returns a list of paths that will be
// considered as configuration files in priority order.
func GetDefaultConfigFilePaths() []string {
return []string{
// normally below "/etc"
DEFAULTSYSCONFRUNTIMECONFIGURATION,
// normally below "/usr/share"
defaultRuntimeConfiguration,
}
}
// getDefaultConfigFile looks in multiple default locations for a
// configuration file and returns the resolved path for the first file
// found, or an error if no config files can be found.
func getDefaultConfigFile() (string, error) {
var errs []string
for _, file := range GetDefaultConfigFilePaths() {
resolved, err := ResolvePath(file)
if err == nil {
return resolved, nil
}
s := fmt.Sprintf("config file %q unresolvable: %v", file, err)
errs = append(errs, s)
}
return "", errors.New(strings.Join(errs, ", "))
}
// SetConfigOptions will override some of the defaults settings.
func SetConfigOptions(n, runtimeConfig, sysRuntimeConfig string) {
if n != "" {
NAME = n
}
if runtimeConfig != "" {
defaultRuntimeConfiguration = runtimeConfig
}
if sysRuntimeConfig != "" {
DEFAULTSYSCONFRUNTIMECONFIGURATION = sysRuntimeConfig
}
}