mirror of
https://github.com/aljazceru/kata-containers.git
synced 2026-01-20 14:54:21 +01:00
CCv0: Merge main into CCv0 branch
Merge remote-tracking branch 'upstream/main' into CCv0 Fixes: #6558 Signed-off-by: stevenhorsman <steven@uk.ibm.com>
This commit is contained in:
@@ -97,6 +97,8 @@ GENERATED_VARS = \
|
||||
VERSION \
|
||||
CONFIG_ACRN_IN \
|
||||
CONFIG_QEMU_IN \
|
||||
CONFIG_QEMU_TDX_IN \
|
||||
CONFIG_QEMU_GPU_IN \
|
||||
CONFIG_CLH_IN \
|
||||
CONFIG_FC_IN \
|
||||
CONFIG_CLH_TDX_IN \
|
||||
@@ -130,8 +132,6 @@ DEFROOTFSTYPE := $(ROOTFSTYPE_EXT4)
|
||||
|
||||
FIRMWAREPATH :=
|
||||
FIRMWAREVOLUMEPATH :=
|
||||
TDVFFIRMWAREPATH := $(PREFIXDEPS)/share/tdvf/OVMF_CODE.fd
|
||||
TDVFFIRMWAREVOLUMEPATH := $(PREFIXDEPS)/share/tdvf/OVMF_VARS.fd
|
||||
TDSHIMFIRMWAREPATH := ${PREFIXDEPS}/share/td-shim/td-shim.bin
|
||||
SEVFIRMWAREPATH := $(PREFIXDEPS)/share/ovmf/AMDSEV.fd
|
||||
SNPFIRMWAREPATH := $(PREFIXDEPS)/share/ovmf/OVMF.fd
|
||||
@@ -146,12 +146,15 @@ AGENT_AA_KBC_PARAMS_TDX ?= ""
|
||||
AGENT_AA_KBC_PARAMS_SEV ?= ""
|
||||
AGENT_AA_KBC_PARAMS_SNP ?= ""
|
||||
TDXKERNELPARAMS := tdx_disable_filter agent.enable_signature_verification=false $(AGENT_AA_KBC_PARAMS_TDX)
|
||||
TDXKERNELPARAMS_QEMU += $(TDXKERNELPARAMS) $(ROOTMEASURECONFIGTDX)
|
||||
KERNELTDXPARAMS += $(TDXKERNELPARAMS) $(ROOTMEASURECONFIGTDX)
|
||||
TDXKERNELPARAMS_CLH += $(TDXKERNELPARAMS) $(ROOTMEASURECONFIG)
|
||||
SEVKERNELPARAMS := $(AGENTCONFIGFILEKERNELPARAM) agent.enable_signature_verification=false $(AGENT_AA_KBC_PARAMS_SEV)
|
||||
SNPKERNELPARAMS := $(AGENTCONFIGFILEKERNELPARAM) agent.enable_signature_verification=false $(AGENT_AA_KBC_PARAMS_SNP)
|
||||
KERNELPARAMS += $(ROOTMEASURECONFIG) agent.enable_signature_verification=false $(AGENT_AA_KBC_PARAMS)
|
||||
|
||||
FIRMWARETDVFPATH := $(PREFIXDEPS)/share/tdvf/OVMF.fd
|
||||
FIRMWARETDVFVOLUMEPATH := $(PREFIXDEPS)/share/tdvf/OVMF_VARS.fd
|
||||
|
||||
# Name of default configuration file the runtime will use.
|
||||
CONFIG_FILE = configuration.toml
|
||||
|
||||
@@ -233,6 +236,7 @@ DEFVALIDENTROPYSOURCES := [\"/dev/urandom\",\"/dev/random\",\"\"]
|
||||
DEFDISABLEBLOCK := false
|
||||
DEFSHAREDFS_CLH_VIRTIOFS := virtio-fs
|
||||
DEFSHAREDFS_QEMU_VIRTIOFS := virtio-fs
|
||||
DEFSHAREDFS_QEMU_TDX_VIRTIOFS := virtio-9p
|
||||
DEFVIRTIOFSDAEMON := $(LIBEXECDIR)/virtiofsd
|
||||
ifeq ($(ARCH),ppc64le)
|
||||
DEFVIRTIOFSDAEMON := $(LIBEXECDIR)/qemu/virtiofsd
|
||||
@@ -328,7 +332,7 @@ ifneq (,$(QEMUCMD))
|
||||
CONFIG_PATHS += $(CONFIG_PATH_QEMU_TDX)
|
||||
|
||||
SYSCONFIG_QEMU_TDX = $(abspath $(SYSCONFDIR)/$(CONFIG_FILE_QEMU_TDX))
|
||||
SYSCONFIG_PATHS += $(SYSCONFIG_QEMU_TDX)
|
||||
SYSCONFIG_PATHS_TDX += $(SYSCONFIG_QEMU_TDX)
|
||||
|
||||
CONFIGS += $(CONFIG_QEMU_TDX)
|
||||
|
||||
@@ -380,10 +384,18 @@ ifneq (,$(QEMUCMD))
|
||||
|
||||
CONFIGS += $(CONFIG_REMOTE)
|
||||
|
||||
|
||||
CONFIG_FILE_QEMU_GPU = configuration-qemu-gpu.toml
|
||||
CONFIG_QEMU_GPU = config/$(CONFIG_FILE_QEMU_GPU)
|
||||
CONFIG_QEMU_GPU_IN = $(CONFIG_QEMU_GPU).in
|
||||
|
||||
CONFIGS += $(CONFIG_QEMU_GPU)
|
||||
|
||||
# qemu-specific options (all should be suffixed by "_QEMU")
|
||||
DEFBLOCKSTORAGEDRIVER_QEMU := virtio-scsi
|
||||
DEFBLOCKDEVICEAIO_QEMU := io_uring
|
||||
DEFNETWORKMODEL_QEMU := tcfilter
|
||||
|
||||
KERNELTYPE = uncompressed
|
||||
KERNELNAME = $(call MAKE_KERNEL_NAME,$(KERNELTYPE))
|
||||
KERNELPATH = $(KERNELDIR)/$(KERNELNAME)
|
||||
@@ -582,10 +594,10 @@ USER_VARS += KERNELTDXPATH_CLH
|
||||
USER_VARS += KERNELPATH_FC
|
||||
USER_VARS += KERNELVIRTIOFSPATH
|
||||
USER_VARS += FIRMWAREPATH
|
||||
USER_VARS += FIRMWARETDVFPATH
|
||||
USER_VARS += FIRMWAREVOLUMEPATH
|
||||
USER_VARS += TDSHIMFIRMWAREPATH
|
||||
USER_VARS += TDVFFIRMWAREPATH
|
||||
USER_VARS += TDVFFIRMWAREVOLUMEPATH
|
||||
USER_VARS += FIRMWARETDVFVOLUMEPATH
|
||||
USER_VARS += SEVFIRMWAREPATH
|
||||
USER_VARS += SNPFIRMWAREPATH
|
||||
USER_VARS += MACHINEACCELERATORS
|
||||
@@ -598,6 +610,7 @@ USER_VARS += TDXKERNELPARAMS_QEMU
|
||||
USER_VARS += TDXKERNELPARAMS_CLH
|
||||
USER_VARS += SEVKERNELPARAMS
|
||||
USER_VARS += SNPKERNELPARAMS
|
||||
USER_VARS += KERNELTDXPARAMS
|
||||
USER_VARS += LIBEXECDIR
|
||||
USER_VARS += LOCALSTATEDIR
|
||||
USER_VARS += PKGDATADIR
|
||||
@@ -613,8 +626,11 @@ USER_VARS += PROJECT_TYPE
|
||||
USER_VARS += PROJECT_URL
|
||||
USER_VARS += QEMUBINDIR
|
||||
USER_VARS += QEMUCMD
|
||||
USER_VARS += QEMUTDXCMD
|
||||
USER_VARS += QEMUPATH
|
||||
USER_VARS += QEMUTDXPATH
|
||||
USER_VARS += QEMUVALIDHYPERVISORPATHS
|
||||
USER_VARS += QEMUTDXVALIDHYPERVISORPATHS
|
||||
USER_VARS += QEMUVIRTIOFSCMD
|
||||
USER_VARS += QEMUVIRTIOFSPATH
|
||||
USER_VARS += QEMUSNPPATH
|
||||
@@ -648,6 +664,7 @@ USER_VARS += DEFBLOCKSTORAGEDRIVER_QEMU
|
||||
USER_VARS += DEFBLOCKDEVICEAIO_QEMU
|
||||
USER_VARS += DEFSHAREDFS_CLH_VIRTIOFS
|
||||
USER_VARS += DEFSHAREDFS_QEMU_VIRTIOFS
|
||||
USER_VARS += DEFSHAREDFS_QEMU_TDX_VIRTIOFS
|
||||
USER_VARS += DEFVIRTIOFSDAEMON
|
||||
USER_VARS += DEFVALIDVIRTIOFSDAEMONPATHS
|
||||
USER_VARS += DEFVIRTIOFSCACHESIZE
|
||||
@@ -770,6 +787,10 @@ define MAKE_KERNEL_VIRTIOFS_NAME
|
||||
$(if $(findstring uncompressed,$1),vmlinux-virtiofs.container,vmlinuz-virtiofs.container)
|
||||
endef
|
||||
|
||||
define MAKE_KERNEL_TDX_NAME
|
||||
$(if $(findstring uncompressed,$1),vmlinux-tdx.container,vmlinuz-tdx.container)
|
||||
endef
|
||||
|
||||
GENERATED_FILES += pkg/katautils/config-settings.go
|
||||
|
||||
$(RUNTIME_OUTPUT): $(SOURCES) $(GENERATED_FILES) $(MAKEFILE_LIST) | show-summary
|
||||
|
||||
@@ -9,10 +9,10 @@ MACHINETYPE := q35
|
||||
KERNELPARAMS :=
|
||||
MACHINEACCELERATORS :=
|
||||
CPUFEATURES := pmu=off
|
||||
TDXCPUFEATURES := -vmx-rdseed-exit,pmu=off
|
||||
|
||||
QEMUCMD := qemu-system-x86_64
|
||||
QEMUTDXCMD := qemu-system-x86_64-tdx
|
||||
QEMUTDXCMD := qemu-system-x86_64-tdx-experimental
|
||||
TDXCPUFEATURES := -vmx-rdseed-exit,pmu=off
|
||||
QEMUSNPCMD := qemu-system-x86_64-snp
|
||||
|
||||
# Firecracker binary name
|
||||
|
||||
@@ -52,6 +52,8 @@ The **log-level** allows the chose how verbose the logs should be. The default i
|
||||
|
||||
**NOTE: The debug endpoints are available only if the [Kata Containers configuration file](https://github.com/kata-containers/kata-containers/blob/9d5b03a1b70bbd175237ec4b9f821d6ccee0a1f6/src/runtime/config/configuration-qemu.toml.in#L590-L592) includes** `enable_pprof = true` **in the** `[runtime]` **section**.
|
||||
|
||||
The `/metrics` has a query parameter `filter_family`, which filter Kata sandboxes metrics with specific names. If `filter_family` is set to `A` (and `B`, split with `,`), metrics with prefix `A` (and `B`) will only be returned.
|
||||
|
||||
The `/sandboxes` endpoint lists the _sandbox ID_ of all the detected Kata runtimes. If accessed via a web browser, it provides html links to the endpoints available for each sandbox.
|
||||
|
||||
In order to retrieve data for a specific Kata workload, the _sandbox ID_ should be passed in the query string using the _sandbox_ key. The `/agent-url`, and all the `/debug/`* endpoints require `sandbox_id` to be specified in the query string.
|
||||
|
||||
@@ -154,8 +154,8 @@ disable_selinux=@DEFDISABLESELINUX@
|
||||
#debug_console_enabled = true
|
||||
|
||||
# Agent connection dialing timeout value in seconds
|
||||
# (default: 30)
|
||||
#dial_timeout = 30
|
||||
# (default: 45)
|
||||
dial_timeout = 45
|
||||
|
||||
[runtime]
|
||||
# If enabled, the runtime will log additional debug messages to the
|
||||
|
||||
@@ -41,6 +41,11 @@ rootfs_type=@DEFROOTFSTYPE@
|
||||
# Default false
|
||||
# confidential_guest = true
|
||||
|
||||
# Enable running clh VMM as a non-root user.
|
||||
# By default clh VMM run as root. When this is set to true, clh VMM process runs as
|
||||
# a non-root random user. See documentation for the limitations of this mode.
|
||||
# rootless = true
|
||||
|
||||
# disable applying SELinux on the VMM process (default false)
|
||||
disable_selinux=@DEFDISABLESELINUX@
|
||||
|
||||
@@ -300,8 +305,8 @@ block_device_driver = "virtio-blk"
|
||||
#debug_console_enabled = true
|
||||
|
||||
# Agent connection dialing timeout value in seconds
|
||||
# (default: 30)
|
||||
#dial_timeout = 30
|
||||
# (default: 45)
|
||||
dial_timeout = 45
|
||||
|
||||
[runtime]
|
||||
# If enabled, the runtime will log additional debug messages to the
|
||||
|
||||
@@ -284,8 +284,8 @@ kernel_modules=[]
|
||||
#debug_console_enabled = true
|
||||
|
||||
# Agent connection dialing timeout value in seconds
|
||||
# (default: 30)
|
||||
#dial_timeout = 30
|
||||
# (default: 45)
|
||||
dial_timeout = 45
|
||||
|
||||
[runtime]
|
||||
# If enabled, the runtime will log additional debug messages to the
|
||||
|
||||
692
src/runtime/config/configuration-qemu-gpu.toml.in
Normal file
692
src/runtime/config/configuration-qemu-gpu.toml.in
Normal file
@@ -0,0 +1,692 @@
|
||||
# Copyright (c) 2017-2019 Intel Corporation
|
||||
# Copyright (c) 2021 Adobe Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
# XXX: WARNING: this file is auto-generated.
|
||||
# XXX:
|
||||
# XXX: Source file: "@CONFIG_QEMU_IN@"
|
||||
# XXX: Project:
|
||||
# XXX: Name: @PROJECT_NAME@
|
||||
# XXX: Type: @PROJECT_TYPE@
|
||||
|
||||
[hypervisor.qemu]
|
||||
path = "@QEMUPATH@"
|
||||
kernel = "@KERNELPATH@"
|
||||
image = "@IMAGEPATH@"
|
||||
# initrd = "@INITRDPATH@"
|
||||
machine_type = "@MACHINETYPE@"
|
||||
|
||||
# rootfs filesystem type:
|
||||
# - ext4 (default)
|
||||
# - xfs
|
||||
# - erofs
|
||||
rootfs_type=@DEFROOTFSTYPE@
|
||||
|
||||
# Enable confidential guest support.
|
||||
# Toggling that setting may trigger different hardware features, ranging
|
||||
# from memory encryption to both memory and CPU-state encryption and integrity.
|
||||
# The Kata Containers runtime dynamically detects the available feature set and
|
||||
# aims at enabling the largest possible one, returning an error if none is
|
||||
# available, or none is supported by the hypervisor.
|
||||
#
|
||||
# Known limitations:
|
||||
# * Does not work by design:
|
||||
# - CPU Hotplug
|
||||
# - Memory Hotplug
|
||||
# - NVDIMM devices
|
||||
#
|
||||
# Default false
|
||||
# confidential_guest = true
|
||||
|
||||
# Choose AMD SEV-SNP confidential guests
|
||||
# In case of using confidential guests on AMD hardware that supports both SEV
|
||||
# and SEV-SNP, the following enables SEV-SNP guests. SEV guests are default.
|
||||
# Default false
|
||||
# sev_snp_guest = true
|
||||
|
||||
# Enable running QEMU VMM as a non-root user.
|
||||
# By default QEMU VMM run as root. When this is set to true, QEMU VMM process runs as
|
||||
# a non-root random user. See documentation for the limitations of this mode.
|
||||
# rootless = true
|
||||
|
||||
# List of valid annotation names for the hypervisor
|
||||
# Each member of the list is a regular expression, which is the base name
|
||||
# of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path"
|
||||
enable_annotations = @DEFENABLEANNOTATIONS@
|
||||
|
||||
# List of valid annotations values for the hypervisor
|
||||
# Each member of the list is a path pattern as described by glob(3).
|
||||
# The default if not set is empty (all annotations rejected.)
|
||||
# Your distribution recommends: @QEMUVALIDHYPERVISORPATHS@
|
||||
valid_hypervisor_paths = @QEMUVALIDHYPERVISORPATHS@
|
||||
|
||||
# Optional space-separated list of options to pass to the guest kernel.
|
||||
# For example, use `kernel_params = "vsyscall=emulate"` if you are having
|
||||
# trouble running pre-2.15 glibc.
|
||||
#
|
||||
# WARNING: - any parameter specified here will take priority over the default
|
||||
# parameter value of the same name used to start the virtual machine.
|
||||
# Do not set values here unless you understand the impact of doing so as you
|
||||
# may stop the virtual machine from booting.
|
||||
# To see the list of default parameters, enable hypervisor debug, create a
|
||||
# container and look for 'default-kernel-parameters' log entries.
|
||||
kernel_params = "@KERNELPARAMS@"
|
||||
|
||||
# Path to the firmware.
|
||||
# If you want that qemu uses the default firmware leave this option empty
|
||||
firmware = "@FIRMWAREPATH@"
|
||||
|
||||
# Path to the firmware volume.
|
||||
# firmware TDVF or OVMF can be split into FIRMWARE_VARS.fd (UEFI variables
|
||||
# as configuration) and FIRMWARE_CODE.fd (UEFI program image). UEFI variables
|
||||
# can be customized per each user while UEFI code is kept same.
|
||||
firmware_volume = "@FIRMWAREVOLUMEPATH@"
|
||||
|
||||
# Machine accelerators
|
||||
# comma-separated list of machine accelerators to pass to the hypervisor.
|
||||
# For example, `machine_accelerators = "nosmm,nosmbus,nosata,nopit,static-prt,nofw"`
|
||||
machine_accelerators="@MACHINEACCELERATORS@"
|
||||
|
||||
# Qemu seccomp sandbox feature
|
||||
# comma-separated list of seccomp sandbox features to control the syscall access.
|
||||
# For example, `seccompsandbox= "on,obsolete=deny,spawn=deny,resourcecontrol=deny"`
|
||||
# Note: "elevateprivileges=deny" doesn't work with daemonize option, so it's removed from the seccomp sandbox
|
||||
# Another note: enabling this feature may reduce performance, you may enable
|
||||
# /proc/sys/net/core/bpf_jit_enable to reduce the impact. see https://man7.org/linux/man-pages/man8/bpfc.8.html
|
||||
#seccompsandbox="@DEFSECCOMPSANDBOXPARAM@"
|
||||
|
||||
# CPU features
|
||||
# comma-separated list of cpu features to pass to the cpu
|
||||
# For example, `cpu_features = "pmu=off,vmx=off"
|
||||
cpu_features="@CPUFEATURES@"
|
||||
|
||||
# Default number of vCPUs per SB/VM:
|
||||
# unspecified or 0 --> will be set to @DEFVCPUS@
|
||||
# < 0 --> will be set to the actual number of physical cores
|
||||
# > 0 <= number of physical cores --> will be set to the specified number
|
||||
# > number of physical cores --> will be set to the actual number of physical cores
|
||||
default_vcpus = 1
|
||||
|
||||
# Default maximum number of vCPUs per SB/VM:
|
||||
# unspecified or == 0 --> will be set to the actual number of physical cores or to the maximum number
|
||||
# of vCPUs supported by KVM if that number is exceeded
|
||||
# > 0 <= number of physical cores --> will be set to the specified number
|
||||
# > number of physical cores --> will be set to the actual number of physical cores or to the maximum number
|
||||
# of vCPUs supported by KVM if that number is exceeded
|
||||
# WARNING: Depending of the architecture, the maximum number of vCPUs supported by KVM is used when
|
||||
# the actual number of physical cores is greater than it.
|
||||
# WARNING: Be aware that this value impacts the virtual machine's memory footprint and CPU
|
||||
# the hotplug functionality. For example, `default_maxvcpus = 240` specifies that until 240 vCPUs
|
||||
# can be added to a SB/VM, but the memory footprint will be big. Another example, with
|
||||
# `default_maxvcpus = 8` the memory footprint will be small, but 8 will be the maximum number of
|
||||
# vCPUs supported by the SB/VM. In general, we recommend that you do not edit this variable,
|
||||
# unless you know what are you doing.
|
||||
# NOTICE: on arm platform with gicv2 interrupt controller, set it to 8.
|
||||
default_maxvcpus = @DEFMAXVCPUS@
|
||||
|
||||
# Bridges can be used to hot plug devices.
|
||||
# Limitations:
|
||||
# * Currently only pci bridges are supported
|
||||
# * Until 30 devices per bridge can be hot plugged.
|
||||
# * Until 5 PCI bridges can be cold plugged per VM.
|
||||
# This limitation could be a bug in qemu or in the kernel
|
||||
# Default number of bridges per SB/VM:
|
||||
# unspecified or 0 --> will be set to @DEFBRIDGES@
|
||||
# > 1 <= 5 --> will be set to the specified number
|
||||
# > 5 --> will be set to 5
|
||||
default_bridges = @DEFBRIDGES@
|
||||
|
||||
# Default memory size in MiB for SB/VM.
|
||||
# If unspecified then it will be set @DEFMEMSZ@ MiB.
|
||||
default_memory = @DEFMEMSZ@
|
||||
#
|
||||
# Default memory slots per SB/VM.
|
||||
# If unspecified then it will be set @DEFMEMSLOTS@.
|
||||
# This is will determine the times that memory will be hotadded to sandbox/VM.
|
||||
#memory_slots = @DEFMEMSLOTS@
|
||||
|
||||
# Default maximum memory in MiB per SB / VM
|
||||
# unspecified or == 0 --> will be set to the actual amount of physical RAM
|
||||
# > 0 <= amount of physical RAM --> will be set to the specified number
|
||||
# > amount of physical RAM --> will be set to the actual amount of physical RAM
|
||||
default_maxmemory = @DEFMAXMEMSZ@
|
||||
|
||||
# The size in MiB will be plused to max memory of hypervisor.
|
||||
# It is the memory address space for the NVDIMM devie.
|
||||
# If set block storage driver (block_device_driver) to "nvdimm",
|
||||
# should set memory_offset to the size of block device.
|
||||
# Default 0
|
||||
#memory_offset = 0
|
||||
|
||||
# Specifies virtio-mem will be enabled or not.
|
||||
# Please note that this option should be used with the command
|
||||
# "echo 1 > /proc/sys/vm/overcommit_memory".
|
||||
# Default false
|
||||
#enable_virtio_mem = true
|
||||
|
||||
# Disable block device from being used for a container's rootfs.
|
||||
# In case of a storage driver like devicemapper where a container's
|
||||
# root file system is backed by a block device, the block device is passed
|
||||
# directly to the hypervisor for performance reasons.
|
||||
# This flag prevents the block device from being passed to the hypervisor,
|
||||
# virtio-fs is used instead to pass the rootfs.
|
||||
disable_block_device_use = @DEFDISABLEBLOCK@
|
||||
|
||||
# Shared file system type:
|
||||
# - virtio-fs (default)
|
||||
# - virtio-9p
|
||||
# - virtio-fs-nydus
|
||||
shared_fs = "@DEFSHAREDFS_QEMU_VIRTIOFS@"
|
||||
|
||||
# Path to vhost-user-fs daemon.
|
||||
virtio_fs_daemon = "@DEFVIRTIOFSDAEMON@"
|
||||
|
||||
# List of valid annotations values for the virtiofs daemon
|
||||
# The default if not set is empty (all annotations rejected.)
|
||||
# Your distribution recommends: @DEFVALIDVIRTIOFSDAEMONPATHS@
|
||||
valid_virtio_fs_daemon_paths = @DEFVALIDVIRTIOFSDAEMONPATHS@
|
||||
|
||||
# Default size of DAX cache in MiB
|
||||
virtio_fs_cache_size = @DEFVIRTIOFSCACHESIZE@
|
||||
|
||||
# Default size of virtqueues
|
||||
virtio_fs_queue_size = @DEFVIRTIOFSQUEUESIZE@
|
||||
|
||||
# Extra args for virtiofsd daemon
|
||||
#
|
||||
# Format example:
|
||||
# ["-o", "arg1=xxx,arg2", "-o", "hello world", "--arg3=yyy"]
|
||||
# Examples:
|
||||
# Set virtiofsd log level to debug : ["-o", "log_level=debug"] or ["-d"]
|
||||
#
|
||||
# see `virtiofsd -h` for possible options.
|
||||
virtio_fs_extra_args = @DEFVIRTIOFSEXTRAARGS@
|
||||
|
||||
# Cache mode:
|
||||
#
|
||||
# - never
|
||||
# Metadata, data, and pathname lookup are not cached in guest. They are
|
||||
# always fetched from host and any changes are immediately pushed to host.
|
||||
#
|
||||
# - auto
|
||||
# Metadata and pathname lookup cache expires after a configured amount of
|
||||
# time (default is 1 second). Data is cached while the file is open (close
|
||||
# to open consistency).
|
||||
#
|
||||
# - always
|
||||
# Metadata, data, and pathname lookup are cached in guest and never expire.
|
||||
virtio_fs_cache = "@DEFVIRTIOFSCACHE@"
|
||||
|
||||
# Block storage driver to be used for the hypervisor in case the container
|
||||
# rootfs is backed by a block device. This is virtio-scsi, virtio-blk
|
||||
# or nvdimm.
|
||||
block_device_driver = "@DEFBLOCKSTORAGEDRIVER_QEMU@"
|
||||
|
||||
# aio is the I/O mechanism used by qemu
|
||||
# Options:
|
||||
#
|
||||
# - threads
|
||||
# Pthread based disk I/O.
|
||||
#
|
||||
# - native
|
||||
# Native Linux I/O.
|
||||
#
|
||||
# - io_uring
|
||||
# Linux io_uring API. This provides the fastest I/O operations on Linux, requires kernel>5.1 and
|
||||
# qemu >=5.0.
|
||||
block_device_aio = "@DEFBLOCKDEVICEAIO_QEMU@"
|
||||
|
||||
# Specifies cache-related options will be set to block devices or not.
|
||||
# Default false
|
||||
#block_device_cache_set = true
|
||||
|
||||
# Specifies cache-related options for block devices.
|
||||
# Denotes whether use of O_DIRECT (bypass the host page cache) is enabled.
|
||||
# Default false
|
||||
#block_device_cache_direct = true
|
||||
|
||||
# Specifies cache-related options for block devices.
|
||||
# Denotes whether flush requests for the device are ignored.
|
||||
# Default false
|
||||
#block_device_cache_noflush = true
|
||||
|
||||
# Enable iothreads (data-plane) to be used. This causes IO to be
|
||||
# handled in a separate IO thread. This is currently only implemented
|
||||
# for SCSI.
|
||||
#
|
||||
enable_iothreads = @DEFENABLEIOTHREADS@
|
||||
|
||||
# Enable pre allocation of VM RAM, default false
|
||||
# Enabling this will result in lower container density
|
||||
# as all of the memory will be allocated and locked
|
||||
# This is useful when you want to reserve all the memory
|
||||
# upfront or in the cases where you want memory latencies
|
||||
# to be very predictable
|
||||
# Default false
|
||||
#enable_mem_prealloc = true
|
||||
|
||||
# Enable huge pages for VM RAM, default false
|
||||
# Enabling this will result in the VM memory
|
||||
# being allocated using huge pages.
|
||||
# This is useful when you want to use vhost-user network
|
||||
# stacks within the container. This will automatically
|
||||
# result in memory pre allocation
|
||||
#enable_hugepages = true
|
||||
|
||||
# Enable vhost-user storage device, default false
|
||||
# Enabling this will result in some Linux reserved block type
|
||||
# major range 240-254 being chosen to represent vhost-user devices.
|
||||
enable_vhost_user_store = @DEFENABLEVHOSTUSERSTORE@
|
||||
|
||||
# The base directory specifically used for vhost-user devices.
|
||||
# Its sub-path "block" is used for block devices; "block/sockets" is
|
||||
# where we expect vhost-user sockets to live; "block/devices" is where
|
||||
# simulated block device nodes for vhost-user devices to live.
|
||||
vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@"
|
||||
|
||||
# Enable vIOMMU, default false
|
||||
# Enabling this will result in the VM having a vIOMMU device
|
||||
# This will also add the following options to the kernel's
|
||||
# command line: intel_iommu=on,iommu=pt
|
||||
#enable_iommu = true
|
||||
|
||||
# Enable IOMMU_PLATFORM, default false
|
||||
# Enabling this will result in the VM device having iommu_platform=on set
|
||||
#enable_iommu_platform = true
|
||||
|
||||
# List of valid annotations values for the vhost user store path
|
||||
# The default if not set is empty (all annotations rejected.)
|
||||
# Your distribution recommends: @DEFVALIDVHOSTUSERSTOREPATHS@
|
||||
valid_vhost_user_store_paths = @DEFVALIDVHOSTUSERSTOREPATHS@
|
||||
|
||||
# The timeout for reconnecting on non-server spdk sockets when the remote end goes away.
|
||||
# qemu will delay this many seconds and then attempt to reconnect.
|
||||
# Zero disables reconnecting, and the default is zero.
|
||||
vhost_user_reconnect_timeout_sec = 0
|
||||
|
||||
# Enable file based guest memory support. The default is an empty string which
|
||||
# will disable this feature. In the case of virtio-fs, this is enabled
|
||||
# automatically and '/dev/shm' is used as the backing folder.
|
||||
# This option will be ignored if VM templating is enabled.
|
||||
#file_mem_backend = "@DEFFILEMEMBACKEND@"
|
||||
|
||||
# List of valid annotations values for the file_mem_backend annotation
|
||||
# The default if not set is empty (all annotations rejected.)
|
||||
# Your distribution recommends: @DEFVALIDFILEMEMBACKENDS@
|
||||
valid_file_mem_backends = @DEFVALIDFILEMEMBACKENDS@
|
||||
|
||||
# -pflash can add image file to VM. The arguments of it should be in format
|
||||
# of ["/path/to/flash0.img", "/path/to/flash1.img"]
|
||||
pflashes = []
|
||||
|
||||
# This option changes the default hypervisor and kernel parameters
|
||||
# to enable debug output where available. And Debug also enable the hmp socket.
|
||||
#
|
||||
# Default false
|
||||
#enable_debug = true
|
||||
|
||||
# Disable the customizations done in the runtime when it detects
|
||||
# that it is running on top a VMM. This will result in the runtime
|
||||
# behaving as it would when running on bare metal.
|
||||
#
|
||||
#disable_nesting_checks = true
|
||||
|
||||
# This is the msize used for 9p shares. It is the number of bytes
|
||||
# used for 9p packet payload.
|
||||
#msize_9p = @DEFMSIZE9P@
|
||||
|
||||
# If false and nvdimm is supported, use nvdimm device to plug guest image.
|
||||
# Otherwise virtio-block device is used.
|
||||
#
|
||||
# nvdimm is not supported when `confidential_guest = true`.
|
||||
#
|
||||
# Default is false
|
||||
#disable_image_nvdimm = true
|
||||
|
||||
# VFIO devices are hotplugged on a bridge by default.
|
||||
# Enable hotplugging on root bus. This may be required for devices with
|
||||
# a large PCI bar, as this is a current limitation with hotplugging on
|
||||
# a bridge.
|
||||
# Default false
|
||||
hotplug_vfio_on_root_bus = true
|
||||
|
||||
# Before hot plugging a PCIe device, you need to add a pcie_root_port device.
|
||||
# Use this parameter when using some large PCI bar devices, such as Nvidia GPU
|
||||
# The value means the number of pcie_root_port
|
||||
# This value is valid when hotplug_vfio_on_root_bus is true and machine_type is "q35"
|
||||
# Default 0
|
||||
pcie_root_port = 1
|
||||
|
||||
# If vhost-net backend for virtio-net is not desired, set to true. Default is false, which trades off
|
||||
# security (vhost-net runs ring0) for network I/O performance.
|
||||
#disable_vhost_net = true
|
||||
|
||||
#
|
||||
# Default entropy source.
|
||||
# The path to a host source of entropy (including a real hardware RNG)
|
||||
# /dev/urandom and /dev/random are two main options.
|
||||
# Be aware that /dev/random is a blocking source of entropy. If the host
|
||||
# runs out of entropy, the VMs boot time will increase leading to get startup
|
||||
# timeouts.
|
||||
# The source of entropy /dev/urandom is non-blocking and provides a
|
||||
# generally acceptable source of entropy. It should work well for pretty much
|
||||
# all practical purposes.
|
||||
#entropy_source= "@DEFENTROPYSOURCE@"
|
||||
|
||||
# List of valid annotations values for entropy_source
|
||||
# The default if not set is empty (all annotations rejected.)
|
||||
# Your distribution recommends: @DEFVALIDENTROPYSOURCES@
|
||||
valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
|
||||
|
||||
# Path to OCI hook binaries in the *guest rootfs*.
|
||||
# This does not affect host-side hooks which must instead be added to
|
||||
# the OCI spec passed to the runtime.
|
||||
#
|
||||
# You can create a rootfs with hooks by customizing the osbuilder scripts:
|
||||
# https://github.com/kata-containers/kata-containers/tree/main/tools/osbuilder
|
||||
#
|
||||
# Hooks must be stored in a subdirectory of guest_hook_path according to their
|
||||
# hook type, i.e. "guest_hook_path/{prestart,poststart,poststop}".
|
||||
# The agent will scan these directories for executable files and add them, in
|
||||
# lexicographical order, to the lifecycle of the guest container.
|
||||
# Hooks are executed in the runtime namespace of the guest. See the official documentation:
|
||||
# https://github.com/opencontainers/runtime-spec/blob/v1.0.1/config.md#posix-platform-hooks
|
||||
# Warnings will be logged if any error is encountered while scanning for hooks,
|
||||
# but it will not abort container execution.
|
||||
guest_hook_path = "/etc/oci/hooks.d"
|
||||
#
|
||||
# Use rx Rate Limiter to control network I/O inbound bandwidth(size in bits/sec for SB/VM).
|
||||
# In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) to discipline traffic.
|
||||
# Default 0-sized value means unlimited rate.
|
||||
#rx_rate_limiter_max_rate = 0
|
||||
# Use tx Rate Limiter to control network I/O outbound bandwidth(size in bits/sec for SB/VM).
|
||||
# In Qemu, we use classful qdiscs HTB(Hierarchy Token Bucket) and ifb(Intermediate Functional Block)
|
||||
# to discipline traffic.
|
||||
# Default 0-sized value means unlimited rate.
|
||||
#tx_rate_limiter_max_rate = 0
|
||||
|
||||
# Set where to save the guest memory dump file.
|
||||
# If set, when GUEST_PANICKED event occurred,
|
||||
# guest memeory will be dumped to host filesystem under guest_memory_dump_path,
|
||||
# This directory will be created automatically if it does not exist.
|
||||
#
|
||||
# The dumped file(also called vmcore) can be processed with crash or gdb.
|
||||
#
|
||||
# WARNING:
|
||||
# Dump guest’s memory can take very long depending on the amount of guest memory
|
||||
# and use much disk space.
|
||||
#guest_memory_dump_path="/var/crash/kata"
|
||||
|
||||
# If enable paging.
|
||||
# Basically, if you want to use "gdb" rather than "crash",
|
||||
# or need the guest-virtual addresses in the ELF vmcore,
|
||||
# then you should enable paging.
|
||||
#
|
||||
# See: https://www.qemu.org/docs/master/qemu-qmp-ref.html#Dump-guest-memory for details
|
||||
#guest_memory_dump_paging=false
|
||||
|
||||
# Enable swap in the guest. Default false.
|
||||
# When enable_guest_swap is enabled, insert a raw file to the guest as the swap device
|
||||
# if the swappiness of a container (set by annotation "io.katacontainers.container.resource.swappiness")
|
||||
# is bigger than 0.
|
||||
# The size of the swap device should be
|
||||
# swap_in_bytes (set by annotation "io.katacontainers.container.resource.swap_in_bytes") - memory_limit_in_bytes.
|
||||
# If swap_in_bytes is not set, the size should be memory_limit_in_bytes.
|
||||
# If swap_in_bytes and memory_limit_in_bytes is not set, the size should
|
||||
# be default_memory.
|
||||
#enable_guest_swap = true
|
||||
|
||||
# use legacy serial for guest console if available and implemented for architecture. Default false
|
||||
#use_legacy_serial = true
|
||||
|
||||
# disable applying SELinux on the VMM process (default false)
|
||||
disable_selinux=@DEFDISABLESELINUX@
|
||||
|
||||
# disable applying SELinux on the container process
|
||||
# If set to false, the type `container_t` is applied to the container process by default.
|
||||
# Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built
|
||||
# with `SELINUX=yes`.
|
||||
# (default: true)
|
||||
disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
|
||||
|
||||
|
||||
[factory]
|
||||
# VM templating support. Once enabled, new VMs are created from template
|
||||
# using vm cloning. They will share the same initial kernel, initramfs and
|
||||
# agent memory by mapping it readonly. It helps speeding up new container
|
||||
# creation and saves a lot of memory if there are many kata containers running
|
||||
# on the same host.
|
||||
#
|
||||
# When disabled, new VMs are created from scratch.
|
||||
#
|
||||
# Note: Requires "initrd=" to be set ("image=" is not supported).
|
||||
#
|
||||
# Default false
|
||||
#enable_template = true
|
||||
|
||||
# Specifies the path of template.
|
||||
#
|
||||
# Default "/run/vc/vm/template"
|
||||
#template_path = "/run/vc/vm/template"
|
||||
|
||||
# The number of caches of VMCache:
|
||||
# unspecified or == 0 --> VMCache is disabled
|
||||
# > 0 --> will be set to the specified number
|
||||
#
|
||||
# VMCache is a function that creates VMs as caches before using it.
|
||||
# It helps speed up new container creation.
|
||||
# The function consists of a server and some clients communicating
|
||||
# through Unix socket. The protocol is gRPC in protocols/cache/cache.proto.
|
||||
# The VMCache server will create some VMs and cache them by factory cache.
|
||||
# It will convert the VM to gRPC format and transport it when gets
|
||||
# requestion from clients.
|
||||
# Factory grpccache is the VMCache client. It will request gRPC format
|
||||
# VM and convert it back to a VM. If VMCache function is enabled,
|
||||
# kata-runtime will request VM from factory grpccache when it creates
|
||||
# a new sandbox.
|
||||
#
|
||||
# Default 0
|
||||
#vm_cache_number = 0
|
||||
|
||||
# Specify the address of the Unix socket that is used by VMCache.
|
||||
#
|
||||
# Default /var/run/kata-containers/cache.sock
|
||||
#vm_cache_endpoint = "/var/run/kata-containers/cache.sock"
|
||||
|
||||
[agent.@PROJECT_TYPE@]
|
||||
# If enabled, make the agent display debug-level messages.
|
||||
# (default: disabled)
|
||||
#enable_debug = true
|
||||
|
||||
# Enable agent tracing.
|
||||
#
|
||||
# If enabled, the agent will generate OpenTelemetry trace spans.
|
||||
#
|
||||
# Notes:
|
||||
#
|
||||
# - If the runtime also has tracing enabled, the agent spans will be
|
||||
# associated with the appropriate runtime parent span.
|
||||
# - If enabled, the runtime will wait for the container to shutdown,
|
||||
# increasing the container shutdown time slightly.
|
||||
#
|
||||
# (default: disabled)
|
||||
#enable_tracing = true
|
||||
|
||||
# Comma separated list of kernel modules and their parameters.
|
||||
# These modules will be loaded in the guest kernel using modprobe(8).
|
||||
# The following example can be used to load two kernel modules with parameters
|
||||
# - kernel_modules=["e1000e InterruptThrottleRate=3000,3000,3000 EEE=1", "i915 enable_ppgtt=0"]
|
||||
# The first word is considered as the module name and the rest as its parameters.
|
||||
# Container will not be started when:
|
||||
# * A kernel module is specified and the modprobe command is not installed in the guest
|
||||
# or it fails loading the module.
|
||||
# * The module is not available in the guest or it doesn't met the guest kernel
|
||||
# requirements, like architecture and version.
|
||||
#
|
||||
kernel_modules=[]
|
||||
|
||||
# Enable debug console.
|
||||
|
||||
# If enabled, user can connect guest OS running inside hypervisor
|
||||
# through "kata-runtime exec <sandbox-id>" command
|
||||
|
||||
#debug_console_enabled = true
|
||||
|
||||
# Agent connection dialing timeout value in seconds
|
||||
# (default: 30)
|
||||
#dial_timeout = 30
|
||||
|
||||
[runtime]
|
||||
# If enabled, the runtime will log additional debug messages to the
|
||||
# system log
|
||||
# (default: disabled)
|
||||
#enable_debug = true
|
||||
#
|
||||
# Internetworking model
|
||||
# Determines how the VM should be connected to the
|
||||
# the container network interface
|
||||
# Options:
|
||||
#
|
||||
# - macvtap
|
||||
# Used when the Container network interface can be bridged using
|
||||
# macvtap.
|
||||
#
|
||||
# - none
|
||||
# Used when customize network. Only creates a tap device. No veth pair.
|
||||
#
|
||||
# - tcfilter
|
||||
# Uses tc filter rules to redirect traffic from the network interface
|
||||
# provided by plugin to a tap interface connected to the VM.
|
||||
#
|
||||
internetworking_model="@DEFNETWORKMODEL_QEMU@"
|
||||
|
||||
# disable guest seccomp
|
||||
# Determines whether container seccomp profiles are passed to the virtual
|
||||
# machine and applied by the kata agent. If set to true, seccomp is not applied
|
||||
# within the guest
|
||||
# (default: true)
|
||||
disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
|
||||
|
||||
# vCPUs pinning settings
|
||||
# if enabled, each vCPU thread will be scheduled to a fixed CPU
|
||||
# qualified condition: num(vCPU threads) == num(CPUs in sandbox's CPUSet)
|
||||
# enable_vcpus_pinning = false
|
||||
|
||||
# Apply a custom SELinux security policy to the container process inside the VM.
|
||||
# This is used when you want to apply a type other than the default `container_t`,
|
||||
# so general users should not uncomment and apply it.
|
||||
# (format: "user:role:type")
|
||||
# Note: You cannot specify MCS policy with the label because the sensitivity levels and
|
||||
# categories are determined automatically by high-level container runtimes such as containerd.
|
||||
#guest_selinux_label="@DEFGUESTSELINUXLABEL@"
|
||||
|
||||
# If enabled, the runtime will create opentracing.io traces and spans.
|
||||
# (See https://www.jaegertracing.io/docs/getting-started).
|
||||
# (default: disabled)
|
||||
#enable_tracing = true
|
||||
|
||||
# Set the full url to the Jaeger HTTP Thrift collector.
|
||||
# The default if not set will be "http://localhost:14268/api/traces"
|
||||
#jaeger_endpoint = ""
|
||||
|
||||
# Sets the username to be used if basic auth is required for Jaeger.
|
||||
#jaeger_user = ""
|
||||
|
||||
# Sets the password to be used if basic auth is required for Jaeger.
|
||||
#jaeger_password = ""
|
||||
|
||||
# If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
|
||||
# This option may have some potential impacts to your host. It should only be used when you know what you're doing.
|
||||
# `disable_new_netns` conflicts with `internetworking_model=tcfilter` and `internetworking_model=macvtap`. It works only
|
||||
# with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge
|
||||
# (like OVS) directly.
|
||||
# (default: false)
|
||||
#disable_new_netns = true
|
||||
|
||||
# if enabled, the runtime will add all the kata processes inside one dedicated cgroup.
|
||||
# The container cgroups in the host are not created, just one single cgroup per sandbox.
|
||||
# The runtime caller is free to restrict or collect cgroup stats of the overall Kata sandbox.
|
||||
# The sandbox cgroup path is the parent cgroup of a container with the PodSandbox annotation.
|
||||
# The sandbox cgroup is constrained if there is no container type annotation.
|
||||
# See: https://pkg.go.dev/github.com/kata-containers/kata-containers/src/runtime/virtcontainers#ContainerType
|
||||
sandbox_cgroup_only=@DEFSANDBOXCGROUPONLY@
|
||||
|
||||
# If enabled, the runtime will attempt to determine appropriate sandbox size (memory, CPU) before booting the virtual machine. In
|
||||
# this case, the runtime will not dynamically update the amount of memory and CPU in the virtual machine. This is generally helpful
|
||||
# when a hardware architecture or hypervisor solutions is utilized which does not support CPU and/or memory hotplug.
|
||||
# Compatibility for determining appropriate sandbox (VM) size:
|
||||
# - When running with pods, sandbox sizing information will only be available if using Kubernetes >= 1.23 and containerd >= 1.6. CRI-O
|
||||
# does not yet support sandbox sizing annotations.
|
||||
# - When running single containers using a tool like ctr, container sizing information will be available.
|
||||
static_sandbox_resource_mgmt=@DEFSTATICRESOURCEMGMT@
|
||||
|
||||
# If specified, sandbox_bind_mounts identifieds host paths to be mounted (ro) into the sandboxes shared path.
|
||||
# This is only valid if filesystem sharing is utilized. The provided path(s) will be bindmounted into the shared fs directory.
|
||||
# If defaults are utilized, these mounts should be available in the guest at `/run/kata-containers/shared/containers/sandbox-mounts`
|
||||
# These will not be exposed to the container workloads, and are only provided for potential guest services.
|
||||
sandbox_bind_mounts=@DEFBINDMOUNTS@
|
||||
|
||||
# VFIO Mode
|
||||
# Determines how VFIO devices should be be presented to the container.
|
||||
# Options:
|
||||
#
|
||||
# - vfio
|
||||
# Matches behaviour of OCI runtimes (e.g. runc) as much as
|
||||
# possible. VFIO devices will appear in the container as VFIO
|
||||
# character devices under /dev/vfio. The exact names may differ
|
||||
# from the host (they need to match the VM's IOMMU group numbers
|
||||
# rather than the host's)
|
||||
#
|
||||
# - guest-kernel
|
||||
# This is a Kata-specific behaviour that's useful in certain cases.
|
||||
# The VFIO device is managed by whatever driver in the VM kernel
|
||||
# claims it. This means it will appear as one or more device nodes
|
||||
# or network interfaces depending on the nature of the device.
|
||||
# Using this mode requires specially built workloads that know how
|
||||
# to locate the relevant device interfaces within the VM.
|
||||
#
|
||||
vfio_mode="@DEFVFIOMODE@"
|
||||
|
||||
# If enabled, the runtime will not create Kubernetes emptyDir mounts on the guest filesystem. Instead, emptyDir mounts will
|
||||
# be created on the host and shared via virtio-fs. This is potentially slower, but allows sharing of files from host to guest.
|
||||
disable_guest_empty_dir=@DEFDISABLEGUESTEMPTYDIR@
|
||||
|
||||
# Enabled experimental feature list, format: ["a", "b"].
|
||||
# Experimental features are features not stable enough for production,
|
||||
# they may break compatibility, and are prepared for a big version bump.
|
||||
# Supported experimental features:
|
||||
# (default: [])
|
||||
experimental=@DEFAULTEXPFEATURES@
|
||||
|
||||
# If enabled, user can run pprof tools with shim v2 process through kata-monitor.
|
||||
# (default: false)
|
||||
# enable_pprof = true
|
||||
|
||||
# WARNING: All the options in the following section have not been implemented yet.
|
||||
# This section was added as a placeholder. DO NOT USE IT!
|
||||
[image]
|
||||
# Container image service.
|
||||
#
|
||||
# Offload the CRI image management service to the Kata agent.
|
||||
# (default: false)
|
||||
#service_offload = true
|
||||
|
||||
# Container image decryption keys provisioning.
|
||||
# Applies only if service_offload is true.
|
||||
# Keys can be provisioned locally (e.g. through a special command or
|
||||
# a local file) or remotely (usually after the guest is remotely attested).
|
||||
# The provision setting is a complete URL that lets the Kata agent decide
|
||||
# which method to use in order to fetch the keys.
|
||||
#
|
||||
# Keys can be stored in a local file, in a measured and attested initrd:
|
||||
#provision=data:///local/key/file
|
||||
#
|
||||
# Keys could be fetched through a special command or binary from the
|
||||
# initrd (guest) image, e.g. a firmware call:
|
||||
#provision=file:///path/to/bin/fetcher/in/guest
|
||||
#
|
||||
# Keys can be remotely provisioned. The Kata agent fetches them from e.g.
|
||||
# a HTTPS URL:
|
||||
#provision=https://my-key-broker.foo/tenant/<tenant-id>
|
||||
@@ -14,9 +14,16 @@
|
||||
[hypervisor.qemu]
|
||||
path = "@QEMUTDXPATH@"
|
||||
kernel = "@KERNELTDXPATH@"
|
||||
image = "@IMAGETDXPATH@"
|
||||
image = "@IMAGEPATH@"
|
||||
# initrd = "@INITRDPATH@"
|
||||
machine_type = "@MACHINETYPE@"
|
||||
|
||||
# rootfs filesystem type:
|
||||
# - ext4 (default)
|
||||
# - xfs
|
||||
# - erofs
|
||||
rootfs_type=@DEFROOTFSTYPE@
|
||||
|
||||
# Enable confidential guest support.
|
||||
# Toggling that setting may trigger different hardware features, ranging
|
||||
# from memory encryption to both memory and CPU-state encryption and integrity.
|
||||
@@ -59,17 +66,17 @@ valid_hypervisor_paths = @QEMUTDXVALIDHYPERVISORPATHS@
|
||||
# may stop the virtual machine from booting.
|
||||
# To see the list of default parameters, enable hypervisor debug, create a
|
||||
# container and look for 'default-kernel-parameters' log entries.
|
||||
kernel_params = "@TDXKERNELPARAMS_QEMU@"
|
||||
kernel_params = "@KERNELTDXPARAMS@"
|
||||
|
||||
# Path to the firmware.
|
||||
# If you want that qemu uses the default firmware leave this option empty
|
||||
firmware = "@TDVFFIRMWAREPATH@"
|
||||
firmware = "@FIRMWARETDVFPATH@"
|
||||
|
||||
# Path to the firmware volume.
|
||||
# firmware TDVF or OVMF can be split into FIRMWARE_VARS.fd (UEFI variables
|
||||
# as configuration) and FIRMWARE_CODE.fd (UEFI program image). UEFI variables
|
||||
# can be customized per each user while UEFI code is kept same.
|
||||
firmware_volume = "@TDVFFIRMWAREVOLUMEPATH@"
|
||||
firmware_volume = "@FIRMWARETDVFVOLUMEPATH@"
|
||||
|
||||
# Machine accelerators
|
||||
# comma-separated list of machine accelerators to pass to the hypervisor.
|
||||
@@ -165,7 +172,7 @@ disable_block_device_use = @DEFDISABLEBLOCK@
|
||||
# - virtio-fs (default)
|
||||
# - virtio-9p
|
||||
# - virtio-fs-nydus
|
||||
shared_fs = "@DEFSHAREDFS_QEMU_VIRTIOFS@"
|
||||
shared_fs = "@DEFSHAREDFS_QEMU_TDX_VIRTIOFS@"
|
||||
|
||||
# Path to vhost-user-fs daemon.
|
||||
virtio_fs_daemon = "@DEFVIRTIOFSDAEMON@"
|
||||
@@ -178,6 +185,9 @@ valid_virtio_fs_daemon_paths = @DEFVALIDVIRTIOFSDAEMONPATHS@
|
||||
# Default size of DAX cache in MiB
|
||||
virtio_fs_cache_size = @DEFVIRTIOFSCACHESIZE@
|
||||
|
||||
# Default size of virtqueues
|
||||
virtio_fs_queue_size = @DEFVIRTIOFSQUEUESIZE@
|
||||
|
||||
# Extra args for virtiofsd daemon
|
||||
#
|
||||
# Format example:
|
||||
@@ -190,7 +200,7 @@ virtio_fs_extra_args = @DEFVIRTIOFSEXTRAARGS@
|
||||
|
||||
# Cache mode:
|
||||
#
|
||||
# - none
|
||||
# - never
|
||||
# Metadata, data, and pathname lookup are not cached in guest. They are
|
||||
# always fetched from host and any changes are immediately pushed to host.
|
||||
#
|
||||
@@ -208,6 +218,20 @@ virtio_fs_cache = "@DEFVIRTIOFSCACHE@"
|
||||
# or nvdimm.
|
||||
block_device_driver = "@DEFBLOCKSTORAGEDRIVER_QEMU@"
|
||||
|
||||
# aio is the I/O mechanism used by qemu
|
||||
# Options:
|
||||
#
|
||||
# - threads
|
||||
# Pthread based disk I/O.
|
||||
#
|
||||
# - native
|
||||
# Native Linux I/O.
|
||||
#
|
||||
# - io_uring
|
||||
# Linux io_uring API. This provides the fastest I/O operations on Linux, requires kernel>5.1 and
|
||||
# qemu >=5.0.
|
||||
block_device_aio = "@DEFBLOCKDEVICEAIO_QEMU@"
|
||||
|
||||
# Specifies cache-related options will be set to block devices or not.
|
||||
# Default false
|
||||
#block_device_cache_set = true
|
||||
@@ -271,6 +295,11 @@ vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@"
|
||||
# Your distribution recommends: @DEFVALIDVHOSTUSERSTOREPATHS@
|
||||
valid_vhost_user_store_paths = @DEFVALIDVHOSTUSERSTOREPATHS@
|
||||
|
||||
# The timeout for reconnecting on non-server spdk sockets when the remote end goes away.
|
||||
# qemu will delay this many seconds and then attempt to reconnect.
|
||||
# Zero disables reconnecting, and the default is zero.
|
||||
vhost_user_reconnect_timeout_sec = 0
|
||||
|
||||
# Enable file based guest memory support. The default is an empty string which
|
||||
# will disable this feature. In the case of virtio-fs, this is enabled
|
||||
# automatically and '/dev/shm' is used as the backing folder.
|
||||
@@ -287,7 +316,7 @@ valid_file_mem_backends = @DEFVALIDFILEMEMBACKENDS@
|
||||
pflashes = []
|
||||
|
||||
# This option changes the default hypervisor and kernel parameters
|
||||
# to enable debug output where available.
|
||||
# to enable debug output where available. And Debug also enable the hmp socket.
|
||||
#
|
||||
# Default false
|
||||
#enable_debug = true
|
||||
@@ -499,8 +528,8 @@ kernel_modules=[]
|
||||
#debug_console_enabled = true
|
||||
|
||||
# Agent connection dialing timeout value in seconds
|
||||
# (default: 30)
|
||||
#dial_timeout = 30
|
||||
# (default: 60)
|
||||
dial_timeout = 60
|
||||
|
||||
[runtime]
|
||||
# If enabled, the runtime will log additional debug messages to the
|
||||
@@ -533,6 +562,11 @@ internetworking_model="@DEFNETWORKMODEL_QEMU@"
|
||||
# (default: true)
|
||||
disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
|
||||
|
||||
# vCPUs pinning settings
|
||||
# if enabled, each vCPU thread will be scheduled to a fixed CPU
|
||||
# qualified condition: num(vCPU threads) == num(CPUs in sandbox's CPUSet)
|
||||
# enable_vcpus_pinning = false
|
||||
|
||||
# Apply a custom SELinux security policy to the container process inside the VM.
|
||||
# This is used when you want to apply a type other than the default `container_t`,
|
||||
# so general users should not uncomment and apply it.
|
||||
@@ -630,7 +664,7 @@ experimental=@DEFAULTEXPFEATURES@
|
||||
#
|
||||
# Offload the CRI image management service to the Kata agent.
|
||||
# (default: false)
|
||||
service_offload = @DEFSERVICEOFFLOAD@
|
||||
#service_offload = true
|
||||
|
||||
# Container image decryption keys provisioning.
|
||||
# Applies only if service_offload is true.
|
||||
|
||||
@@ -563,8 +563,8 @@ kernel_modules=[]
|
||||
#debug_console_enabled = true
|
||||
|
||||
# Agent connection dialing timeout value in seconds
|
||||
# (default: 30)
|
||||
#dial_timeout = 30
|
||||
# (default: 45)
|
||||
dial_timeout = 45
|
||||
|
||||
[runtime]
|
||||
# If enabled, the runtime will log additional debug messages to the
|
||||
|
||||
@@ -243,7 +243,7 @@ func (s *service) genericIPTablesHandler(w http.ResponseWriter, r *http.Request,
|
||||
|
||||
func (s *service) startManagementServer(ctx context.Context, ociSpec *specs.Spec) {
|
||||
// metrics socket will under sandbox's bundle path
|
||||
metricsAddress := SocketAddress(s.id)
|
||||
metricsAddress := ServerSocketAddress(s.id)
|
||||
|
||||
listener, err := cdshim.NewSocket(metricsAddress)
|
||||
if err != nil {
|
||||
@@ -312,14 +312,38 @@ func GetSandboxesStoragePathRust() string {
|
||||
return "/run/kata"
|
||||
}
|
||||
|
||||
// SocketAddress returns the address of the unix domain socket for communicating with the
|
||||
// SocketPath returns the path of the socket using the given storagePath
|
||||
func SocketPath(id string, storagePath string) string {
|
||||
return filepath.Join(string(filepath.Separator), storagePath, id, "shim-monitor.sock")
|
||||
}
|
||||
|
||||
// SocketPathGo returns the path of the socket to be used with the go runtime
|
||||
func SocketPathGo(id string) string {
|
||||
return SocketPath(id, GetSandboxesStoragePath())
|
||||
}
|
||||
|
||||
// SocketPathRust returns the path of the socket to be used with the rust runtime
|
||||
func SocketPathRust(id string) string {
|
||||
return SocketPath(id, GetSandboxesStoragePathRust())
|
||||
}
|
||||
|
||||
// ServerSocketAddress returns the address of the unix domain socket the shim management endpoint
|
||||
// should listen.
|
||||
// NOTE: this code is only called by the go shim management implementation.
|
||||
func ServerSocketAddress(id string) string {
|
||||
return fmt.Sprintf("unix://%s", SocketPathGo(id))
|
||||
}
|
||||
|
||||
// ClientSocketAddress returns the address of the unix domain socket for communicating with the
|
||||
// shim management endpoint
|
||||
func SocketAddress(id string) string {
|
||||
// NOTE: this code allows various go clients, e.g. kata-runtime or kata-monitor commands, to
|
||||
// connect to the rust shim management implementation.
|
||||
func ClientSocketAddress(id string) string {
|
||||
// get the go runtime uds path
|
||||
socketPath := filepath.Join(string(filepath.Separator), GetSandboxesStoragePath(), id, "shim-monitor.sock")
|
||||
socketPath := SocketPathGo(id)
|
||||
// if the path not exist, use the rust runtime uds path instead
|
||||
if _, err := os.Stat(socketPath); err != nil {
|
||||
return fmt.Sprintf("unix://%s", filepath.Join(string(filepath.Separator), GetSandboxesStoragePathRust(), id, "shim-monitor.sock"))
|
||||
socketPath = SocketPathRust(id)
|
||||
}
|
||||
return fmt.Sprintf("unix://%s", socketPath)
|
||||
}
|
||||
|
||||
@@ -54,6 +54,25 @@ func NewVFIODevice(devInfo *config.DeviceInfo) *VFIODevice {
|
||||
}
|
||||
}
|
||||
|
||||
// Ignore specific PCI devices, supply the pciClass and the bitmask to check
|
||||
// against the device class, deviceBDF for meaningfull info message
|
||||
func (device *VFIODevice) checkIgnorePCIClass(pciClass string, deviceBDF string, bitmask uint64) (bool, error) {
|
||||
if pciClass == "" {
|
||||
return false, nil
|
||||
}
|
||||
pciClassID, err := strconv.ParseUint(pciClass, 0, 32)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
// ClassID is 16 bits, remove the two trailing zeros
|
||||
pciClassID = pciClassID >> 8
|
||||
if pciClassID&bitmask == bitmask {
|
||||
deviceLogger().Infof("Ignoring PCI (Host) Bridge deviceBDF %v Class %x", deviceBDF, pciClassID)
|
||||
return true, nil
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// Attach is standard interface of api.Device, it's used to add device to some
|
||||
// DeviceReceiver
|
||||
func (device *VFIODevice) Attach(ctx context.Context, devReceiver api.DeviceReceiver) (retErr error) {
|
||||
@@ -88,6 +107,18 @@ func (device *VFIODevice) Attach(ctx context.Context, devReceiver api.DeviceRece
|
||||
}
|
||||
id := utils.MakeNameID("vfio", device.DeviceInfo.ID+strconv.Itoa(i), maxDevIDSize)
|
||||
|
||||
pciClass := getPCIDeviceProperty(deviceBDF, PCISysFsDevicesClass)
|
||||
// We need to ignore Host or PCI Bridges that are in the same IOMMU group as the
|
||||
// passed-through devices. One CANNOT pass-through a PCI bridge or Host bridge.
|
||||
// Class 0x0604 is PCI bridge, 0x0600 is Host bridge
|
||||
ignorePCIDevice, err := device.checkIgnorePCIClass(pciClass, deviceBDF, 0x0600)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if ignorePCIDevice {
|
||||
continue
|
||||
}
|
||||
|
||||
var vfio config.VFIODev
|
||||
|
||||
switch vfioDeviceType {
|
||||
@@ -100,7 +131,7 @@ func (device *VFIODevice) Attach(ctx context.Context, devReceiver api.DeviceRece
|
||||
BDF: deviceBDF,
|
||||
SysfsDev: deviceSysfsDev,
|
||||
IsPCIe: isPCIe,
|
||||
Class: getPCIDeviceProperty(deviceBDF, PCISysFsDevicesClass),
|
||||
Class: pciClass,
|
||||
}
|
||||
if isPCIe {
|
||||
vfioPCI.Bus = fmt.Sprintf("%s%d", pcieRootPortPrefix, len(AllPCIeDevs))
|
||||
@@ -121,6 +152,7 @@ func (device *VFIODevice) Attach(ctx context.Context, devReceiver api.DeviceRece
|
||||
default:
|
||||
return fmt.Errorf("Failed to append device: VFIO device type unrecognized")
|
||||
}
|
||||
|
||||
device.VfioDevs = append(device.VfioDevs, &vfio)
|
||||
}
|
||||
|
||||
|
||||
@@ -363,16 +363,12 @@ func (object Object) QemuParams(config *Config) []string {
|
||||
|
||||
case TDXGuest:
|
||||
objectParams = append(objectParams, string(object.Type))
|
||||
objectParams = append(objectParams, "sept-ve-disable=on")
|
||||
objectParams = append(objectParams, fmt.Sprintf("id=%s", object.ID))
|
||||
if object.Debug {
|
||||
objectParams = append(objectParams, "debug=on")
|
||||
}
|
||||
deviceParams = append(deviceParams, string(object.Driver))
|
||||
deviceParams = append(deviceParams, fmt.Sprintf("id=%s", object.DeviceID))
|
||||
deviceParams = append(deviceParams, fmt.Sprintf("file=%s", object.File))
|
||||
if object.FirmwareVolume != "" {
|
||||
deviceParams = append(deviceParams, fmt.Sprintf("config-firmware-volume=%s", object.FirmwareVolume))
|
||||
}
|
||||
config.Bios = object.File
|
||||
case SEVGuest:
|
||||
objectParams = append(objectParams, string(object.Type))
|
||||
objectParams = append(objectParams, fmt.Sprintf("id=%s", object.ID))
|
||||
|
||||
@@ -114,25 +114,32 @@ func (km *KataMonitor) ProcessMetricsRequest(w http.ResponseWriter, r *http.Requ
|
||||
writer = gz
|
||||
}
|
||||
|
||||
// create encoder to encode metrics.
|
||||
encoder := expfmt.NewEncoder(writer, contentType)
|
||||
|
||||
// gather metrics collected for management agent.
|
||||
mfs, err := prometheus.DefaultGatherer.Gather()
|
||||
filterFamilies, err := getFilterFamilyFromReq(r)
|
||||
if err != nil {
|
||||
monitorLog.WithError(err).Error("failed to Gather metrics from prometheus.DefaultGatherer")
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
w.Write([]byte(err.Error()))
|
||||
return
|
||||
}
|
||||
|
||||
// encode metric gathered in current process
|
||||
if err := encodeMetricFamily(mfs, encoder); err != nil {
|
||||
monitorLog.WithError(err).Warnf("failed to encode metrics")
|
||||
// create encoder to encode metrics.
|
||||
encoder := expfmt.NewEncoder(writer, contentType)
|
||||
|
||||
if len(filterFamilies) == 0 {
|
||||
// gather metrics collected for management agent.
|
||||
mfs, err := prometheus.DefaultGatherer.Gather()
|
||||
if err != nil {
|
||||
monitorLog.WithError(err).Error("failed to Gather metrics from prometheus.DefaultGatherer")
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
w.Write([]byte(err.Error()))
|
||||
return
|
||||
}
|
||||
|
||||
// encode metric gathered in current process
|
||||
if err := encodeMetricFamily(mfs, encoder); err != nil {
|
||||
monitorLog.WithError(err).Warnf("failed to encode metrics")
|
||||
}
|
||||
}
|
||||
|
||||
// aggregate sandboxes metrics and write to response by encoder
|
||||
if err := km.aggregateSandboxMetrics(encoder); err != nil {
|
||||
if err := km.aggregateSandboxMetrics(encoder, filterFamilies); err != nil {
|
||||
monitorLog.WithError(err).Errorf("failed aggregateSandboxMetrics")
|
||||
scrapeFailedCount.Inc()
|
||||
}
|
||||
@@ -155,7 +162,7 @@ func encodeMetricFamily(mfs []*dto.MetricFamily, encoder expfmt.Encoder) error {
|
||||
}
|
||||
|
||||
// aggregateSandboxMetrics will get metrics from one sandbox and do some process
|
||||
func (km *KataMonitor) aggregateSandboxMetrics(encoder expfmt.Encoder) error {
|
||||
func (km *KataMonitor) aggregateSandboxMetrics(encoder expfmt.Encoder, filterFamilies []string) error {
|
||||
// get all kata sandboxes from cache
|
||||
sandboxes := km.sandboxCache.getSandboxList()
|
||||
// save running kata pods as a metrics.
|
||||
@@ -230,9 +237,21 @@ func (km *KataMonitor) aggregateSandboxMetrics(encoder expfmt.Encoder) error {
|
||||
}
|
||||
|
||||
// write metrics to response.
|
||||
for _, mf := range metricsMap {
|
||||
if err := encoder.Encode(mf); err != nil {
|
||||
return err
|
||||
if len(filterFamilies) > 0 {
|
||||
for _, filterName := range filterFamilies {
|
||||
for fullName, mf := range metricsMap {
|
||||
if strings.HasPrefix(fullName, filterName) {
|
||||
if err := encoder.Encode(mf); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for _, mf := range metricsMap {
|
||||
if err := encoder.Encode(mf); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
|
||||
@@ -32,7 +32,7 @@ func (km *KataMonitor) composeSocketAddress(r *http.Request) (string, error) {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return shim.SocketAddress(sandbox), nil
|
||||
return shim.ClientSocketAddress(sandbox), nil
|
||||
}
|
||||
|
||||
func (km *KataMonitor) proxyRequest(w http.ResponseWriter, r *http.Request,
|
||||
|
||||
@@ -8,6 +8,7 @@ package katamonitor
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
shim "github.com/kata-containers/kata-containers/src/runtime/pkg/containerd-shim-v2"
|
||||
@@ -36,3 +37,11 @@ func getSandboxIDFromReq(r *http.Request) (string, error) {
|
||||
func getSandboxFS() string {
|
||||
return shim.GetSandboxesStoragePath()
|
||||
}
|
||||
|
||||
func getFilterFamilyFromReq(r *http.Request) ([]string, error) {
|
||||
filterFamilies := r.URL.Query().Get("filter_family")
|
||||
if filterFamilies != "" {
|
||||
return strings.Split(filterFamilies, ","), nil
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
@@ -786,6 +786,16 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
|
||||
kataUtilsLogger.Info("Setting 'disable_image_nvdimm = true' as microvm does not support NVDIMM")
|
||||
}
|
||||
|
||||
// Nvdimm can only be support when UEFI/ACPI is enabled on arm64, otherwise disable it.
|
||||
if goruntime.GOARCH == "arm64" && firmware == "" {
|
||||
if p, err := h.PFlash(); err == nil {
|
||||
if len(p) == 0 {
|
||||
h.DisableImageNvdimm = true
|
||||
kataUtilsLogger.Info("Setting 'disable_image_nvdimm = true' if there is no firmware specified")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
blockDriver, err := h.blockDeviceDriver()
|
||||
if err != nil {
|
||||
return vc.HypervisorConfig{}, err
|
||||
@@ -1071,6 +1081,7 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
|
||||
EnableAnnotations: h.EnableAnnotations,
|
||||
DisableSeccomp: h.DisableSeccomp,
|
||||
ConfidentialGuest: h.ConfidentialGuest,
|
||||
Rootless: h.Rootless,
|
||||
DisableSeLinux: h.DisableSeLinux,
|
||||
DisableGuestSeLinux: h.DisableGuestSeLinux,
|
||||
NetRateLimiterBwMaxRate: h.getNetRateLimiterBwMaxRate(),
|
||||
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
"path"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
goruntime "runtime"
|
||||
"strings"
|
||||
"syscall"
|
||||
"testing"
|
||||
@@ -182,6 +183,10 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf
|
||||
SNPGuestPolicy: defaultSNPGuestPolicy,
|
||||
}
|
||||
|
||||
if goruntime.GOARCH == "arm64" && len(hypervisorConfig.PFlash) == 0 && hypervisorConfig.FirmwarePath == "" {
|
||||
hypervisorConfig.DisableImageNvdimm = true
|
||||
}
|
||||
|
||||
agentConfig := vc.KataAgentConfig{
|
||||
LongLiveConn: true,
|
||||
}
|
||||
|
||||
@@ -19,7 +19,7 @@ import (
|
||||
|
||||
// BuildShimClient builds and returns an http client for communicating with the provided sandbox
|
||||
func BuildShimClient(sandboxID string, timeout time.Duration) (*http.Client, error) {
|
||||
return buildUnixSocketClient(shim.SocketAddress(sandboxID), timeout)
|
||||
return buildUnixSocketClient(shim.ClientSocketAddress(sandboxID), timeout)
|
||||
}
|
||||
|
||||
// buildUnixSocketClient build http client for Unix socket
|
||||
|
||||
@@ -19,6 +19,7 @@ import (
|
||||
"net/http/httputil"
|
||||
"os"
|
||||
"os/exec"
|
||||
"os/user"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strconv"
|
||||
@@ -37,6 +38,8 @@ import (
|
||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/device/config"
|
||||
hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors"
|
||||
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace"
|
||||
pkgUtils "github.com/kata-containers/kata-containers/src/runtime/pkg/utils"
|
||||
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/rootless"
|
||||
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types"
|
||||
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils"
|
||||
)
|
||||
@@ -653,7 +656,7 @@ func (clh *cloudHypervisor) StartVM(ctx context.Context, timeout int) error {
|
||||
clh.Logger().WithField("function", "StartVM").Info("starting Sandbox")
|
||||
|
||||
vmPath := filepath.Join(clh.config.VMStorePath, clh.id)
|
||||
err := os.MkdirAll(vmPath, DirMode)
|
||||
err := utils.MkdirAllWithInheritedOwner(vmPath, DirMode)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -1364,9 +1367,16 @@ func (clh *cloudHypervisor) launchClh() (int, error) {
|
||||
cmdHypervisor.Stdout = clh.console
|
||||
}
|
||||
}
|
||||
|
||||
cmdHypervisor.Stderr = cmdHypervisor.Stdout
|
||||
|
||||
attr := syscall.SysProcAttr{}
|
||||
attr.Credential = &syscall.Credential{
|
||||
Uid: clh.config.Uid,
|
||||
Gid: clh.config.Gid,
|
||||
Groups: clh.config.Groups,
|
||||
}
|
||||
cmdHypervisor.SysProcAttr = &attr
|
||||
|
||||
err = utils.StartCmd(cmdHypervisor)
|
||||
if err != nil {
|
||||
return -1, err
|
||||
@@ -1691,6 +1701,30 @@ func (clh *cloudHypervisor) cleanupVM(force bool) error {
|
||||
clh.Logger().WithError(err).WithField("path", dir).Warnf("failed to remove vm path")
|
||||
}
|
||||
}
|
||||
if rootless.IsRootless() {
|
||||
if _, err := user.Lookup(clh.config.User); err != nil {
|
||||
clh.Logger().WithError(err).WithFields(
|
||||
log.Fields{
|
||||
"user": clh.config.User,
|
||||
"uid": clh.config.Uid,
|
||||
}).Warn("failed to find the user, it might have been removed")
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := pkgUtils.RemoveVmmUser(clh.config.User); err != nil {
|
||||
clh.Logger().WithError(err).WithFields(
|
||||
log.Fields{
|
||||
"user": clh.config.User,
|
||||
"uid": clh.config.Uid,
|
||||
}).Warn("failed to delete the user")
|
||||
return nil
|
||||
}
|
||||
clh.Logger().WithFields(
|
||||
log.Fields{
|
||||
"user": clh.config.User,
|
||||
"uid": clh.config.Uid,
|
||||
}).Debug("successfully removed the non root user")
|
||||
}
|
||||
|
||||
clh.reset()
|
||||
|
||||
|
||||
@@ -8,15 +8,28 @@ package virtcontainers
|
||||
import "os"
|
||||
|
||||
const (
|
||||
tdxSysFirmwareDir = "/sys/firmware/tdx_seam/"
|
||||
tdxSeamSysFirmwareDir = "/sys/firmware/tdx_seam/"
|
||||
|
||||
tdxCPUFlag = "tdx"
|
||||
tdxSysFirmwareDir = "/sys/firmware/tdx/"
|
||||
|
||||
sevKvmParameterPath = "/sys/module/kvm_amd/parameters/sev"
|
||||
|
||||
snpKvmParameterPath = "/sys/module/kvm_amd/parameters/sev_snp"
|
||||
)
|
||||
|
||||
// TDX is supported and properly loaded when the firmware directory (either tdx or tdx_seam) exists or `tdx` is part of the CPU flag
|
||||
func checkTdxGuestProtection(flags map[string]bool) bool {
|
||||
if d, err := os.Stat(tdxSysFirmwareDir); err == nil && d.IsDir() {
|
||||
return true
|
||||
}
|
||||
|
||||
if d, err := os.Stat(tdxSeamSysFirmwareDir); err == nil && d.IsDir() {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// Implementation of this function is architecture specific
|
||||
func availableGuestProtection() (guestProtection, error) {
|
||||
flags, err := CPUFlags(procCPUInfo)
|
||||
@@ -24,10 +37,10 @@ func availableGuestProtection() (guestProtection, error) {
|
||||
return noneProtection, err
|
||||
}
|
||||
|
||||
// TDX is supported and properly loaded when the firmware directory exists or `tdx` is part of the CPU flags
|
||||
if d, err := os.Stat(tdxSysFirmwareDir); (err == nil && d.IsDir()) || flags[tdxCPUFlag] {
|
||||
if checkTdxGuestProtection(flags) {
|
||||
return tdxProtection, nil
|
||||
}
|
||||
|
||||
// SEV-SNP is supported and enabled when the kvm module `sev_snp` parameter is set to `Y`
|
||||
// SEV-SNP support infers SEV (-ES) support
|
||||
if _, err := os.Stat(snpKvmParameterPath); err == nil {
|
||||
|
||||
@@ -157,7 +157,7 @@ func (nd *nydusd) args() ([]string, error) {
|
||||
logLevel = "debug"
|
||||
}
|
||||
args := []string{
|
||||
"virtiofs", "--hybrid-mode",
|
||||
"virtiofs",
|
||||
"--log-level", logLevel,
|
||||
"--apisock", nd.apiSockPath,
|
||||
"--sock", nd.sockPath,
|
||||
|
||||
@@ -99,13 +99,13 @@ func TestNydusdArgs(t *testing.T) {
|
||||
apiSockPath: "/var/lib/api.sock",
|
||||
debug: true,
|
||||
}
|
||||
expected := "virtiofs --hybrid-mode --log-level debug --apisock /var/lib/api.sock --sock /var/lib/vhost-user.sock"
|
||||
expected := "virtiofs --log-level debug --apisock /var/lib/api.sock --sock /var/lib/vhost-user.sock"
|
||||
args, err := nd.args()
|
||||
assert.NoError(err)
|
||||
assert.Equal(expected, strings.Join(args, " "))
|
||||
|
||||
nd.debug = false
|
||||
expected = "virtiofs --hybrid-mode --log-level info --apisock /var/lib/api.sock --sock /var/lib/vhost-user.sock"
|
||||
expected = "virtiofs --log-level info --apisock /var/lib/api.sock --sock /var/lib/vhost-user.sock"
|
||||
args, err = nd.args()
|
||||
assert.NoError(err)
|
||||
assert.Equal(expected, strings.Join(args, " "))
|
||||
|
||||
@@ -1267,6 +1267,7 @@ func (q *qemu) cleanupVM() error {
|
||||
"user": q.config.User,
|
||||
"uid": q.config.Uid,
|
||||
}).Warn("failed to delete the user")
|
||||
return nil
|
||||
}
|
||||
q.Logger().WithFields(
|
||||
logrus.Fields{
|
||||
|
||||
@@ -260,7 +260,7 @@ func (q *qemuAmd64) enableProtection() error {
|
||||
if q.qemuMachine.Options != "" {
|
||||
q.qemuMachine.Options += ","
|
||||
}
|
||||
q.qemuMachine.Options += "kvm-type=tdx,confidential-guest-support=tdx"
|
||||
q.qemuMachine.Options += "confidential-guest-support=tdx"
|
||||
logger.Info("Enabling TDX guest protection")
|
||||
return nil
|
||||
case sevProtection:
|
||||
|
||||
Reference in New Issue
Block a user