From 7ffe9e519837a0503b1cb42ee6f2f189371ce6dd Mon Sep 17 00:00:00 2001 From: Jakob Naucke Date: Wed, 2 Feb 2022 17:06:20 +0100 Subject: [PATCH 01/71] virtcontainers: Do not add a virtio-rng-ccw device On s390x, skip adding a virtio-rng device. The on-chip CPACF provides entropy instead. For Confidential Containers, when using Secure Execution, entropy attacks on virtio-rng are mitigated. Fixes: #3598 Signed-off-by: Jakob Naucke --- src/runtime/virtcontainers/qemu.go | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index b522928a7..6e584628f 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -619,13 +619,16 @@ func (q *qemu) CreateVM(ctx context.Context, id string, networkNS NetworkNamespa qemuConfig.IOThreads = []govmmQemu.IOThread{*ioThread} } // Add RNG device to hypervisor - rngDev := config.RNGDev{ - ID: rngID, - Filename: q.config.EntropySource, - } - qemuConfig.Devices, err = q.arch.appendRNGDevice(ctx, qemuConfig.Devices, rngDev) - if err != nil { - return err + // Skip for s390x as CPACF is used + if machine.Type != QemuCCWVirtio { + rngDev := config.RNGDev{ + ID: rngID, + Filename: q.config.EntropySource, + } + qemuConfig.Devices, err = q.arch.appendRNGDevice(ctx, qemuConfig.Devices, rngDev) + if err != nil { + return err + } } // Add PCIe Root Port devices to hypervisor From f10642c82beacd8f04d21d9f8116796d74bdc087 Mon Sep 17 00:00:00 2001 From: Jakob Naucke Date: Wed, 22 Dec 2021 19:06:26 +0100 Subject: [PATCH 02/71] osbuilder: Source .cargo/env before checking Rust We install Rust in the build containers, but we also install Rust in `rootfs.sh` if it is missing. It makes sense to install Rust in the build containers so it does not have to be installed every time, but for that check to work on non-login shells, we should source `.cargo/env` before running it. Signed-off-by: Jakob Naucke --- tools/osbuilder/rootfs-builder/rootfs.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/osbuilder/rootfs-builder/rootfs.sh b/tools/osbuilder/rootfs-builder/rootfs.sh index 20c14b2e8..b13dc3cbc 100755 --- a/tools/osbuilder/rootfs-builder/rootfs.sh +++ b/tools/osbuilder/rootfs-builder/rootfs.sh @@ -554,6 +554,7 @@ EOT echo "WARNING: Forcing LIBC=gnu because $ARCH has no musl Rust target" fi [ "$LIBC" == "musl" ] && bash ${script_dir}/../../../ci/install_musl.sh + test -r "${HOME}/.cargo/env" && source "${HOME}/.cargo/env" # rust agent needs ${arch}-unknown-linux-${LIBC} if ! (rustup show | grep -v linux-${LIBC} > /dev/null); then if [ "$RUST_VERSION" == "null" ]; then From 573a37b33b4100deaf1026af61aadd03fbc37abe Mon Sep 17 00:00:00 2001 From: Jakob Naucke Date: Wed, 22 Dec 2021 19:06:26 +0100 Subject: [PATCH 03/71] osbuilder: Add CentOS Stream rootfs to cover a Red Hat (adjacent) rootfs with great cross-platform compatibility and a workable release cadence. The previous CentOS & Fedora workflows are simplified. Also remove unnecessary `/usr/share` files as on Ubuntu and mark Alpine as unuspported on ppc64le (due to musl, for a while already). Fixes: #3340 Signed-off-by: Jakob Naucke --- tools/osbuilder/.gitignore | 2 +- tools/osbuilder/README.md | 12 +++--- .../rootfs-builder/centos/Dockerfile.in | 18 ++++++++ .../osbuilder/rootfs-builder/centos/config.sh | 17 ++++++++ tools/osbuilder/rootfs-builder/rootfs.sh | 9 ---- tools/osbuilder/scripts/lib.sh | 43 ++++--------------- 6 files changed, 51 insertions(+), 50 deletions(-) create mode 100644 tools/osbuilder/rootfs-builder/centos/Dockerfile.in create mode 100644 tools/osbuilder/rootfs-builder/centos/config.sh diff --git a/tools/osbuilder/.gitignore b/tools/osbuilder/.gitignore index b70d4aa8c..5936792d5 100644 --- a/tools/osbuilder/.gitignore +++ b/tools/osbuilder/.gitignore @@ -3,7 +3,7 @@ dracut/Dockerfile dracut/dracut.conf.d/15-extra-libs.conf /.*.done /*_rootfs -/kata-Centos-dnf.conf +/kata-centos-dnf.conf /kata-containers-initrd.img /kata-containers.img rootfs-builder/centos/RPM-GPG-KEY-* diff --git a/tools/osbuilder/README.md b/tools/osbuilder/README.md index b97839361..343d2bf60 100644 --- a/tools/osbuilder/README.md +++ b/tools/osbuilder/README.md @@ -209,9 +209,9 @@ of the the osbuilder distributions. > Note: this table is not relevant for the dracut build method, since it supports any Linux distribution and architecture where dracut is available. -| |Alpine |Clear Linux |Debian/Ubuntu | -|-- |-- |-- |-- | -|**ARM64** |:heavy_check_mark:| | | -|**PPC64le**|:heavy_check_mark:| |:heavy_check_mark:| -|**s390x** | | |:heavy_check_mark:| -|**x86_64** |:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:| +| |Alpine |CentOS Stream |Clear Linux |Debian/Ubuntu | +|-- |-- |-- |-- |-- | +|**ARM64** |:heavy_check_mark:|:heavy_check_mark:| | | +|**PPC64le**| |:heavy_check_mark:| |:heavy_check_mark:| +|**s390x** | |:heavy_check_mark:| |:heavy_check_mark:| +|**x86_64** |:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:| diff --git a/tools/osbuilder/rootfs-builder/centos/Dockerfile.in b/tools/osbuilder/rootfs-builder/centos/Dockerfile.in new file mode 100644 index 000000000..fce805994 --- /dev/null +++ b/tools/osbuilder/rootfs-builder/centos/Dockerfile.in @@ -0,0 +1,18 @@ +# Copyright (c) 2018 Intel Corporation, 2021 IBM Corp. +# +# SPDX-License-Identifier: Apache-2.0 + +FROM quay.io/centos/centos:@OS_VERSION@ +@SET_PROXY@ + +RUN dnf -y update && \ + dnf -y install dnf-plugins-core && \ + dnf config-manager --set-enabled crb && \ + dnf -y install \ + diffutils \ + file \ + g++ \ + git \ + protobuf-compiler + +@INSTALL_RUST@ diff --git a/tools/osbuilder/rootfs-builder/centos/config.sh b/tools/osbuilder/rootfs-builder/centos/config.sh new file mode 100644 index 000000000..d9d51e5d7 --- /dev/null +++ b/tools/osbuilder/rootfs-builder/centos/config.sh @@ -0,0 +1,17 @@ +#!/bin/sh +# Copyright (c) 2018 Intel Corporation, 2021 IBM Corp. +# +# SPDX-License-Identifier: Apache-2.0 + +OS_NAME=centos +OS_VERSION=${OS_VERSION:-stream9} +PACKAGES=chrony +[ "$AGENT_INIT" = no ] && PACKAGES+=" systemd" +[ "$SECCOMP" = yes ] && PACKAGES+=" libseccomp" + +# Container registry tag is different from metalink repo, e.g. "stream9" => "9-stream" +os_repo_version="$(sed -E "s/(stream)(.+)/\2-\1/" <<< "$OS_VERSION")" + +METALINK="https://mirrors.centos.org/metalink?repo=centos-baseos-$os_repo_version&arch=\$basearch" +GPG_KEY_FILE=RPM-GPG-KEY-CentOS-Official +GPG_KEY_URL="https://centos.org/keys/$GPG_KEY_FILE" diff --git a/tools/osbuilder/rootfs-builder/rootfs.sh b/tools/osbuilder/rootfs-builder/rootfs.sh index b13dc3cbc..80633a045 100755 --- a/tools/osbuilder/rootfs-builder/rootfs.sh +++ b/tools/osbuilder/rootfs-builder/rootfs.sh @@ -45,9 +45,6 @@ ARCH=$(uname -m) # distro-specific config file typeset -r CONFIG_SH="config.sh" -# optional arch-specific config file -typeset -r CONFIG_ARCH_SH="config_${ARCH}.sh" - # Name of an optional distro-specific file which, if it exists, must implement the # build_rootfs() function. typeset -r LIB_SH="rootfs_lib.sh" @@ -308,12 +305,6 @@ build_rootfs_distro() rootfs_config="${distro_config_dir}/${CONFIG_SH}" source "${rootfs_config}" - # Source arch-specific config file - rootfs_arch_config="${distro_config_dir}/${CONFIG_ARCH_SH}" - if [ -f "${rootfs_arch_config}" ]; then - source "${rootfs_arch_config}" - fi - if [ -z "$ROOTFS_DIR" ]; then ROOTFS_DIR="${script_dir}/rootfs-${OS_NAME}" fi diff --git a/tools/osbuilder/scripts/lib.sh b/tools/osbuilder/scripts/lib.sh index 857eed57c..9f806d039 100644 --- a/tools/osbuilder/scripts/lib.sh +++ b/tools/osbuilder/scripts/lib.sh @@ -57,36 +57,18 @@ check_root() generate_dnf_config() { - REPO_NAME=${REPO_NAME:-"base"} - CACHE_DIR=${CACHE_DIR:-"/var/cache/dnf"} cat > "${DNF_CONF}" << EOF [main] -cachedir=${CACHE_DIR} -logfile=${LOG_FILE} -keepcache=0 -debuglevel=2 -exactarch=1 -obsoletes=1 -plugins=0 -installonly_limit=3 reposdir=/root/mash -retries=5 + +[base] +name=${OS_NAME}-${OS_VERSION} base +releasever=${OS_VERSION} EOF if [ "$BASE_URL" != "" ]; then - cat >> "${DNF_CONF}" << EOF -[base] -name=${OS_NAME}-${OS_VERSION} ${REPO_NAME} -failovermethod=priority -baseurl=${BASE_URL} -enabled=1 -EOF - elif [ "$MIRROR_LIST" != "" ]; then - cat >> "${DNF_CONF}" << EOF -[base] -name=${OS_NAME}-${OS_VERSION} ${REPO_NAME} -mirrorlist=${MIRROR_LIST} -enabled=1 -EOF + echo "baseurl=$BASE_URL" >> "$DNF_CONF" + elif [ "$METALINK" != "" ]; then + echo "metalink=$METALINK" >> "$DNF_CONF" fi if [ -n "$GPG_KEY_URL" ]; then @@ -99,15 +81,6 @@ gpgkey=file://${CONFIG_DIR}/${GPG_KEY_FILE} EOF fi - if [ -n "$GPG_KEY_ARCH_URL" ]; then - if [ ! -f "${CONFIG_DIR}/${GPG_KEY_ARCH_FILE}" ]; then - curl -L "${GPG_KEY_ARCH_URL}" -o "${CONFIG_DIR}/${GPG_KEY_ARCH_FILE}" - fi - cat >> "${DNF_CONF}" << EOF - file://${CONFIG_DIR}/${GPG_KEY_ARCH_FILE} -EOF - fi - } build_rootfs() @@ -151,6 +124,8 @@ build_rootfs() info "install packages for rootfs" $DNF install ${EXTRA_PKGS} ${PACKAGES} + + rm -rf ${ROOTFS_DIR}/usr/share/{bash-completion,cracklib,doc,info,locale,man,misc,pixmaps,terminfo,zoneinfo,zsh} } # Create a YAML metadata file inside the rootfs. From 37df1678ae8e3c913b23e6bcfce3ccb5975241e0 Mon Sep 17 00:00:00 2001 From: Yu Li Date: Thu, 20 Jan 2022 10:53:22 +0800 Subject: [PATCH 04/71] build: always reset ARCH after getting it When building with `ARCH=x86_64`, the previous `Makefile` will use it without checking and cause: Makefile:319: *** "ERROR: No hypervisors known for architecture x86_64 (looked for: acrn firecracker qemu cloud-hypervisor)". Stop. This commit fix the above issue by checking `ARCH` no matter where it is assigned. Fixes: #3444 Signed-off-by: James O. D. Hunt Signed-off-by: Yu Li --- src/runtime/Makefile | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/src/runtime/Makefile b/src/runtime/Makefile index f936bd796..db2c5446c 100644 --- a/src/runtime/Makefile +++ b/src/runtime/Makefile @@ -8,20 +8,19 @@ SKIP_GO_VERSION_CHECK= include golang.mk #Get ARCH. -ifneq (,$(golang_version_raw)) - GOARCH=$(shell go env GOARCH) - ifeq ($(ARCH),) - ARCH = $(GOARCH) - endif -else - ARCH = $(shell uname -m) - ifeq ($(ARCH),x86_64) - ARCH = amd64 - endif - ifeq ($(ARCH),aarch64) - ARCH = arm64 +ifeq ($(ARCH),) + ifneq (,$(golang_version_raw)) + override ARCH = $(shell go env GOARCH) + else + override ARCH = $(shell uname -m) endif endif +ifeq ($(ARCH),x86_64) + override ARCH = amd64 +endif +ifeq ($(ARCH),aarch64) + override ARCH = arm64 +endif ARCH_DIR = arch ARCH_FILE_SUFFIX = -options.mk @@ -29,8 +28,12 @@ ARCH_FILE = $(ARCH_DIR)/$(ARCH)$(ARCH_FILE_SUFFIX) ARCH_FILES = $(wildcard arch/*$(ARCH_FILE_SUFFIX)) ALL_ARCHES = $(patsubst $(ARCH_DIR)/%$(ARCH_FILE_SUFFIX),%,$(ARCH_FILES)) -# Load architecture-dependent settings -include $(ARCH_FILE) +ifeq (,$(realpath $(ARCH_FILE))) + $(error "ERROR: invalid architecture: '$(ARCH)'") +else + # Load architecture-dependent settings + include $(ARCH_FILE) +endif PROJECT_TYPE = kata PROJECT_NAME = Kata Containers From 6be6d0a3b3b4d38410e4329c92c4e197d5de2abc Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Mon, 14 Feb 2022 09:40:42 +0100 Subject: [PATCH 05/71] katautils: Pass the OCI annotations back to the called OCI hooks That allows us to amend those annotations with information that could be used when running those hooks. For example nerdctl will use those annotations to resolve the networking namespace path in where to run the CNI plugin, i.e. the created pod networking namespace. Fixes #3629 Signed-off-by: Samuel Ortiz --- src/runtime/pkg/katautils/hook.go | 19 ++++++++++--------- src/runtime/pkg/katautils/hook_test.go | 9 +++++---- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/src/runtime/pkg/katautils/hook.go b/src/runtime/pkg/katautils/hook.go index 62eb81169..16c316fea 100644 --- a/src/runtime/pkg/katautils/hook.go +++ b/src/runtime/pkg/katautils/hook.go @@ -32,15 +32,16 @@ func hookLogger() *logrus.Entry { return kataUtilsLogger.WithField("subsystem", "hook") } -func runHook(ctx context.Context, hook specs.Hook, cid, bundlePath string) error { +func runHook(ctx context.Context, spec specs.Spec, hook specs.Hook, cid, bundlePath string) error { span, _ := katatrace.Trace(ctx, hookLogger(), "runHook", hookTracingTags) defer span.End() katatrace.AddTags(span, "path", hook.Path, "args", hook.Args) state := specs.State{ - Pid: syscall.Gettid(), - Bundle: bundlePath, - ID: cid, + Pid: syscall.Gettid(), + Bundle: bundlePath, + ID: cid, + Annotations: spec.Annotations, } stateJSON, err := json.Marshal(state) @@ -90,13 +91,13 @@ func runHook(ctx context.Context, hook specs.Hook, cid, bundlePath string) error return nil } -func runHooks(ctx context.Context, hooks []specs.Hook, cid, bundlePath, hookType string) error { +func runHooks(ctx context.Context, spec specs.Spec, hooks []specs.Hook, cid, bundlePath, hookType string) error { span, ctx := katatrace.Trace(ctx, hookLogger(), "runHooks", hookTracingTags) katatrace.AddTags(span, "type", hookType) defer span.End() for _, hook := range hooks { - if err := runHook(ctx, hook, cid, bundlePath); err != nil { + if err := runHook(ctx, spec, hook, cid, bundlePath); err != nil { hookLogger().WithFields(logrus.Fields{ "hook-type": hookType, "error": err, @@ -116,7 +117,7 @@ func PreStartHooks(ctx context.Context, spec specs.Spec, cid, bundlePath string) return nil } - return runHooks(ctx, spec.Hooks.Prestart, cid, bundlePath, "pre-start") + return runHooks(ctx, spec, spec.Hooks.Prestart, cid, bundlePath, "pre-start") } // PostStartHooks run the hooks just after start container @@ -126,7 +127,7 @@ func PostStartHooks(ctx context.Context, spec specs.Spec, cid, bundlePath string return nil } - return runHooks(ctx, spec.Hooks.Poststart, cid, bundlePath, "post-start") + return runHooks(ctx, spec, spec.Hooks.Poststart, cid, bundlePath, "post-start") } // PostStopHooks run the hooks after stop container @@ -136,5 +137,5 @@ func PostStopHooks(ctx context.Context, spec specs.Spec, cid, bundlePath string) return nil } - return runHooks(ctx, spec.Hooks.Poststop, cid, bundlePath, "post-stop") + return runHooks(ctx, spec, spec.Hooks.Poststop, cid, bundlePath, "post-stop") } diff --git a/src/runtime/pkg/katautils/hook_test.go b/src/runtime/pkg/katautils/hook_test.go index 54acfecc0..6109b5549 100644 --- a/src/runtime/pkg/katautils/hook_test.go +++ b/src/runtime/pkg/katautils/hook_test.go @@ -57,26 +57,27 @@ func TestRunHook(t *testing.T) { assert := assert.New(t) ctx := context.Background() + spec := specs.Spec{} // Run with timeout 0 hook := createHook(0) - err := runHook(ctx, hook, testSandboxID, testBundlePath) + err := runHook(ctx, spec, hook, testSandboxID, testBundlePath) assert.NoError(err) // Run with timeout 1 hook = createHook(1) - err = runHook(ctx, hook, testSandboxID, testBundlePath) + err = runHook(ctx, spec, hook, testSandboxID, testBundlePath) assert.NoError(err) // Run timeout failure hook = createHook(1) hook.Args = append(hook.Args, "2") - err = runHook(ctx, hook, testSandboxID, testBundlePath) + err = runHook(ctx, spec, hook, testSandboxID, testBundlePath) assert.Error(err) // Failure due to wrong hook hook = createWrongHook() - err = runHook(ctx, hook, testSandboxID, testBundlePath) + err = runHook(ctx, spec, hook, testSandboxID, testBundlePath) assert.Error(err) } From d9dfce14536aad0540c219fa37f7fa44ca6a9e20 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Mon, 14 Feb 2022 09:50:36 +0100 Subject: [PATCH 06/71] katautils: Run the preStart hook in the host namespace The OCI spec is very specific about it: "The prestart hooks MUST be executed in the runtime namespace." Signed-off-by: Samuel Ortiz --- src/runtime/pkg/katautils/create.go | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/runtime/pkg/katautils/create.go b/src/runtime/pkg/katautils/create.go index bf38acb05..541e481b4 100644 --- a/src/runtime/pkg/katautils/create.go +++ b/src/runtime/pkg/katautils/create.go @@ -155,11 +155,8 @@ func CreateSandbox(ctx context.Context, vci vc.VC, ociSpec specs.Spec, runtimeCo } }() - // Run pre-start OCI hooks. - err = EnterNetNS(sandboxConfig.NetworkConfig.NetworkID, func() error { - return PreStartHooks(ctx, ociSpec, containerID, bundlePath) - }) - if err != nil { + // Run pre-start OCI hooks, in the runtime namespace. + if err := PreStartHooks(ctx, ociSpec, containerID, bundlePath); err != nil { return nil, vc.Process{}, err } From a871a33b6578eb84f19c7e75fd1de27041cef7bb Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Mon, 14 Feb 2022 09:53:05 +0100 Subject: [PATCH 07/71] katautils: Run the createRuntime hooks The preStart hooks are being deprecated over the createRuntime ones. Signed-off-by: Samuel Ortiz --- src/runtime/pkg/katautils/create.go | 5 +++++ src/runtime/pkg/katautils/hook.go | 9 +++++++++ 2 files changed, 14 insertions(+) diff --git a/src/runtime/pkg/katautils/create.go b/src/runtime/pkg/katautils/create.go index 541e481b4..7ecc86301 100644 --- a/src/runtime/pkg/katautils/create.go +++ b/src/runtime/pkg/katautils/create.go @@ -160,6 +160,11 @@ func CreateSandbox(ctx context.Context, vci vc.VC, ociSpec specs.Spec, runtimeCo return nil, vc.Process{}, err } + // Run create runtime OCI hooks, in the runtime namespace. + if err := CreateRuntimeHooks(ctx, ociSpec, containerID, bundlePath); err != nil { + return nil, vc.Process{}, err + } + sandbox, err := vci.CreateSandbox(ctx, sandboxConfig) if err != nil { return nil, vc.Process{}, err diff --git a/src/runtime/pkg/katautils/hook.go b/src/runtime/pkg/katautils/hook.go index 16c316fea..303773e60 100644 --- a/src/runtime/pkg/katautils/hook.go +++ b/src/runtime/pkg/katautils/hook.go @@ -110,6 +110,15 @@ func runHooks(ctx context.Context, spec specs.Spec, hooks []specs.Hook, cid, bun return nil } +func CreateRuntimeHooks(ctx context.Context, spec specs.Spec, cid, bundlePath string) error { + // If no hook available, nothing needs to be done. + if spec.Hooks == nil { + return nil + } + + return runHooks(ctx, spec, spec.Hooks.CreateRuntime, cid, bundlePath, "createRuntime") +} + // PreStartHooks run the hooks before start container func PreStartHooks(ctx context.Context, spec specs.Spec, cid, bundlePath string) error { // If no hook available, nothing needs to be done. From 4f96e3eae37c165ba41e7e49f1b5f4996cc27641 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Mon, 14 Feb 2022 10:23:34 +0100 Subject: [PATCH 08/71] katautils: Pass the nerdctl netns annotation to the OCI hooks We need to let nerdctl know which namespace to use when calling the selected CNI plugin. See https://github.com/containerd/nerdctl/issues/787 Fixes: #1935 Signed-off-by: Samuel Ortiz --- src/runtime/pkg/katautils/create.go | 6 ++++ src/runtime/pkg/katautils/create_test.go | 40 ++++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/src/runtime/pkg/katautils/create.go b/src/runtime/pkg/katautils/create.go index 7ecc86301..d429558f4 100644 --- a/src/runtime/pkg/katautils/create.go +++ b/src/runtime/pkg/katautils/create.go @@ -155,6 +155,12 @@ func CreateSandbox(ctx context.Context, vci vc.VC, ociSpec specs.Spec, runtimeCo } }() + if ociSpec.Annotations == nil { + ociSpec.Annotations = make(map[string]string) + } + ociSpec.Annotations["nerdctl/network-namespace"] = sandboxConfig.NetworkConfig.NetworkID + sandboxConfig.Annotations["nerdctl/network-namespace"] = ociSpec.Annotations["nerdctl/network-namespace"] + // Run pre-start OCI hooks, in the runtime namespace. if err := PreStartHooks(ctx, ociSpec, containerID, bundlePath); err != nil { return nil, vc.Process{}, err diff --git a/src/runtime/pkg/katautils/create_test.go b/src/runtime/pkg/katautils/create_test.go index 804b4318e..dab665dce 100644 --- a/src/runtime/pkg/katautils/create_test.go +++ b/src/runtime/pkg/katautils/create_test.go @@ -264,6 +264,46 @@ func TestCreateSandboxFail(t *testing.T) { assert.True(vcmock.IsMockError(err)) } +func TestCreateSandboxAnnotations(t *testing.T) { + if tc.NotValid(ktu.NeedRoot()) { + t.Skip(ktu.TestDisabledNeedRoot) + } + + assert := assert.New(t) + + tmpdir, bundlePath, _ := ktu.SetupOCIConfigFile(t) + defer os.RemoveAll(tmpdir) + + runtimeConfig, err := newTestRuntimeConfig(tmpdir, testConsole, true) + assert.NoError(err) + + spec, err := compatoci.ParseConfigJSON(bundlePath) + assert.NoError(err) + + rootFs := vc.RootFs{Mounted: true} + + testingImpl.CreateSandboxFunc = func(ctx context.Context, sandboxConfig vc.SandboxConfig) (vc.VCSandbox, error) { + return &vcmock.Sandbox{ + MockID: testSandboxID, + MockContainers: []*vcmock.Container{ + {MockID: testContainerID}, + }, + MockAnnotations: sandboxConfig.Annotations, + }, nil + } + + defer func() { + testingImpl.CreateSandboxFunc = nil + }() + + sandbox, _, err := CreateSandbox(context.Background(), testingImpl, spec, runtimeConfig, rootFs, testContainerID, bundlePath, testConsole, true, true) + assert.NoError(err) + + netNsPath, err := sandbox.Annotations("nerdctl/network-namespace") + assert.NoError(err) + assert.Equal(path.Dir(netNsPath), "/var/run/netns") +} + func TestCheckForFips(t *testing.T) { assert := assert.New(t) From a5f6df6a49856035e3e64ed2480792e6086d6882 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Tue, 8 Feb 2022 21:25:08 +0000 Subject: [PATCH 09/71] govmm: Define the number of supported vCPUs per architecture Based on qhe QEMU supports on those architectures. Signed-off-by: Samuel Ortiz --- src/runtime/pkg/govmm/vmm_amd64.go | 12 ++++++++++++ src/runtime/pkg/govmm/vmm_arm64.go | 25 +++++++++++++++++++++++++ src/runtime/pkg/govmm/vmm_ppc64le.go | 12 ++++++++++++ src/runtime/pkg/govmm/vmm_s390x.go | 15 +++++++++++++++ 4 files changed, 64 insertions(+) create mode 100644 src/runtime/pkg/govmm/vmm_amd64.go create mode 100644 src/runtime/pkg/govmm/vmm_arm64.go create mode 100644 src/runtime/pkg/govmm/vmm_ppc64le.go create mode 100644 src/runtime/pkg/govmm/vmm_s390x.go diff --git a/src/runtime/pkg/govmm/vmm_amd64.go b/src/runtime/pkg/govmm/vmm_amd64.go new file mode 100644 index 000000000..37c7a7c52 --- /dev/null +++ b/src/runtime/pkg/govmm/vmm_amd64.go @@ -0,0 +1,12 @@ +// +// Copyright (c) 2018 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package govmm + +// MaxVCPUs returns the maximum number of vCPUs supported +func MaxVCPUs() uint32 { + return uint32(240) +} diff --git a/src/runtime/pkg/govmm/vmm_arm64.go b/src/runtime/pkg/govmm/vmm_arm64.go new file mode 100644 index 000000000..a01cd683f --- /dev/null +++ b/src/runtime/pkg/govmm/vmm_arm64.go @@ -0,0 +1,25 @@ +// +// Copyright (c) 2018 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package govmm + +//In qemu, maximum number of vCPUs depends on the GIC version, or on how +//many redistributors we can fit into the memory map. +//related codes are under github.com/qemu/qemu/hw/arm/virt.c(Line 135 and 1306 in stable-2.11) +//for now, qemu only supports v2 and v3, we treat v4 as v3 based on +//backward compatibility. +var gicList = map[uint32]uint32{ + uint32(2): uint32(8), + uint32(3): uint32(123), + uint32(4): uint32(123), +} + +var defaultGICVersion = uint32(3) + +// MaxVCPUs returns the maximum number of vCPUs supported +func MaxVCPUs() uint32 { + return gicList[defaultGICVersion] +} diff --git a/src/runtime/pkg/govmm/vmm_ppc64le.go b/src/runtime/pkg/govmm/vmm_ppc64le.go new file mode 100644 index 000000000..91ef70f86 --- /dev/null +++ b/src/runtime/pkg/govmm/vmm_ppc64le.go @@ -0,0 +1,12 @@ +// +// Copyright (c) 2018 IBM +// +// SPDX-License-Identifier: Apache-2.0 +// + +package qemu + +// MaxVCPUs returns the maximum number of vCPUs supported +func MaxVCPUs() uint32 { + return uint32(128) +} diff --git a/src/runtime/pkg/govmm/vmm_s390x.go b/src/runtime/pkg/govmm/vmm_s390x.go new file mode 100644 index 000000000..50f1735dc --- /dev/null +++ b/src/runtime/pkg/govmm/vmm_s390x.go @@ -0,0 +1,15 @@ +// +// Copyright (c) 2018 IBM +// +// SPDX-License-Identifier: Apache-2.0 +// + +package govmm + +// MaxVCPUs returns the maximum number of vCPUs supported +func MaxVCPUs() uint32 { + // Max number of virtual Cpu defined in qemu. See + // https://github.com/qemu/qemu/blob/80422b00196a7af4c6efb628fae0ad8b644e98af/target/s390x/cpu.h#L55 + // #define S390_MAX_CPUS 248 + return uint32(248) +} From b28d0274ffba82d5119ec2c96de42e00724972cd Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Wed, 24 Nov 2021 18:48:57 +0100 Subject: [PATCH 10/71] virtcontainers: Make max vCPU config less QEMU specific Even though it's still actually defined as the QEMU upper bound, it's now abstracted away through govmm. Signed-off-by: Samuel Ortiz --- src/runtime/pkg/katautils/config.go | 3 ++- src/runtime/pkg/katautils/config_test.go | 3 ++- src/runtime/pkg/oci/utils.go | 5 +++-- src/runtime/virtcontainers/hypervisor.go | 7 ++++--- src/runtime/virtcontainers/hypervisor_test.go | 2 +- src/runtime/virtcontainers/qemu_amd64.go | 5 ----- .../virtcontainers/qemu_arch_base_test.go | 6 +++--- src/runtime/virtcontainers/qemu_arm64.go | 18 ------------------ src/runtime/virtcontainers/qemu_arm64_test.go | 5 +++-- src/runtime/virtcontainers/qemu_ppc64le.go | 5 ----- src/runtime/virtcontainers/qemu_s390x.go | 8 -------- src/runtime/virtcontainers/qemu_test.go | 5 +++-- 12 files changed, 21 insertions(+), 51 deletions(-) diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index 06f6a6df0..e699c0c55 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -15,6 +15,7 @@ import ( "strings" "github.com/BurntSushi/toml" + "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm" govmmQemu "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm/qemu" "github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace" "github.com/kata-containers/kata-containers/src/runtime/pkg/oci" @@ -343,7 +344,7 @@ func (h hypervisor) defaultVCPUs() uint32 { func (h hypervisor) defaultMaxVCPUs() uint32 { numcpus := uint32(goruntime.NumCPU()) - maxvcpus := vc.MaxQemuVCPUs() + maxvcpus := govmm.MaxVCPUs() reqVCPUs := h.DefaultMaxVCPUs //don't exceed the number of physical CPUs. If a default is not provided, use the diff --git a/src/runtime/pkg/katautils/config_test.go b/src/runtime/pkg/katautils/config_test.go index 5bfd61250..74a776745 100644 --- a/src/runtime/pkg/katautils/config_test.go +++ b/src/runtime/pkg/katautils/config_test.go @@ -17,6 +17,7 @@ import ( "syscall" "testing" + "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm" ktu "github.com/kata-containers/kata-containers/src/runtime/pkg/katatestutils" "github.com/kata-containers/kata-containers/src/runtime/pkg/oci" vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers" @@ -917,7 +918,7 @@ func TestHypervisorDefaults(t *testing.T) { h.DefaultMaxVCPUs = numCPUs + 1 assert.Equal(h.defaultMaxVCPUs(), numCPUs, "default max vCPU number is wrong") - maxvcpus := vc.MaxQemuVCPUs() + maxvcpus := govmm.MaxVCPUs() h.DefaultMaxVCPUs = maxvcpus + 1 assert.Equal(h.defaultMaxVCPUs(), numCPUs, "default max vCPU number is wrong") diff --git a/src/runtime/pkg/oci/utils.go b/src/runtime/pkg/oci/utils.go index a7b3860ef..66172dcba 100644 --- a/src/runtime/pkg/oci/utils.go +++ b/src/runtime/pkg/oci/utils.go @@ -23,6 +23,7 @@ import ( "github.com/sirupsen/logrus" "k8s.io/apimachinery/pkg/api/resource" + "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm" vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/config" @@ -637,8 +638,8 @@ func addHypervisorCPUOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) e return fmt.Errorf("Number of cpus %d in annotation default_maxvcpus is greater than the number of CPUs %d on the system", max, numCPUs) } - if sbConfig.HypervisorType == vc.QemuHypervisor && max > vc.MaxQemuVCPUs() { - return fmt.Errorf("Number of cpus %d in annotation default_maxvcpus is greater than max no of CPUs %d supported for qemu", max, vc.MaxQemuVCPUs()) + if sbConfig.HypervisorType == vc.QemuHypervisor && max > govmm.MaxVCPUs() { + return fmt.Errorf("Number of cpus %d in annotation default_maxvcpus is greater than max no of CPUs %d supported for qemu", max, govmm.MaxVCPUs()) } sbConfig.HypervisorConfig.DefaultMaxVCPUs = max return nil diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index 175342016..140f0b006 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -14,6 +14,7 @@ import ( "strconv" "strings" + "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm" hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/config" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" @@ -76,7 +77,7 @@ var ( ) // In some architectures the maximum number of vCPUs depends on the number of physical cores. -var defaultMaxQemuVCPUs = MaxQemuVCPUs() +var defaultMaxVCPUs = govmm.MaxVCPUs() // agnostic list of kernel root parameters for NVDIMM var commonNvdimmKernelRootParams = []Param{ //nolint: unused, deadcode, varcheck @@ -574,8 +575,8 @@ func (conf *HypervisorConfig) Valid() error { conf.BlockDeviceDriver = config.VirtioBlockCCW } - if conf.DefaultMaxVCPUs == 0 || conf.DefaultMaxVCPUs > defaultMaxQemuVCPUs { - conf.DefaultMaxVCPUs = defaultMaxQemuVCPUs + if conf.DefaultMaxVCPUs == 0 || conf.DefaultMaxVCPUs > defaultMaxVCPUs { + conf.DefaultMaxVCPUs = defaultMaxVCPUs } if conf.Msize9p == 0 && conf.SharedFS != config.VirtioFS { diff --git a/src/runtime/virtcontainers/hypervisor_test.go b/src/runtime/virtcontainers/hypervisor_test.go index f8bbbd330..86aefde69 100644 --- a/src/runtime/virtcontainers/hypervisor_test.go +++ b/src/runtime/virtcontainers/hypervisor_test.go @@ -185,7 +185,7 @@ func TestHypervisorConfigDefaults(t *testing.T) { MemorySize: defaultMemSzMiB, DefaultBridges: defaultBridges, BlockDeviceDriver: defaultBlockDriver, - DefaultMaxVCPUs: defaultMaxQemuVCPUs, + DefaultMaxVCPUs: defaultMaxVCPUs, Msize9p: defaultMsize9p, } diff --git a/src/runtime/virtcontainers/qemu_amd64.go b/src/runtime/virtcontainers/qemu_amd64.go index 757065961..fe0650dbc 100644 --- a/src/runtime/virtcontainers/qemu_amd64.go +++ b/src/runtime/virtcontainers/qemu_amd64.go @@ -81,11 +81,6 @@ var supportedQemuMachines = []govmmQemu.Machine{ }, } -// MaxQemuVCPUs returns the maximum number of vCPUs supported -func MaxQemuVCPUs() uint32 { - return uint32(240) -} - func newQemuArch(config HypervisorConfig) (qemuArch, error) { machineType := config.HypervisorMachineType if machineType == "" { diff --git a/src/runtime/virtcontainers/qemu_arch_base_test.go b/src/runtime/virtcontainers/qemu_arch_base_test.go index b4b808685..5611c9539 100644 --- a/src/runtime/virtcontainers/qemu_arch_base_test.go +++ b/src/runtime/virtcontainers/qemu_arch_base_test.go @@ -173,13 +173,13 @@ func TestQemuArchBaseCPUTopology(t *testing.T) { expectedSMP := govmmQemu.SMP{ CPUs: vcpus, - Sockets: defaultMaxQemuVCPUs, + Sockets: defaultMaxVCPUs, Cores: defaultCores, Threads: defaultThreads, - MaxCPUs: defaultMaxQemuVCPUs, + MaxCPUs: defaultMaxVCPUs, } - smp := qemuArchBase.cpuTopology(vcpus, defaultMaxQemuVCPUs) + smp := qemuArchBase.cpuTopology(vcpus, defaultMaxVCPUs) assert.Equal(expectedSMP, smp) } diff --git a/src/runtime/virtcontainers/qemu_arm64.go b/src/runtime/virtcontainers/qemu_arm64.go index ee07b3241..250e709f6 100644 --- a/src/runtime/virtcontainers/qemu_arm64.go +++ b/src/runtime/virtcontainers/qemu_arm64.go @@ -29,8 +29,6 @@ const qmpMigrationWaitTimeout = 10 * time.Second const defaultQemuMachineOptions = "usb=off,accel=kvm,gic-version=host" -var defaultGICVersion = uint32(3) - var kernelParams = []Param{ {"console", "hvc0"}, {"console", "hvc1"}, @@ -42,22 +40,6 @@ var supportedQemuMachine = govmmQemu.Machine{ Options: defaultQemuMachineOptions, } -//In qemu, maximum number of vCPUs depends on the GIC version, or on how -//many redistributors we can fit into the memory map. -//related codes are under github.com/qemu/qemu/hw/arm/virt.c(Line 135 and 1306 in stable-2.11) -//for now, qemu only supports v2 and v3, we treat v4 as v3 based on -//backward compatibility. -var gicList = map[uint32]uint32{ - uint32(2): uint32(8), - uint32(3): uint32(123), - uint32(4): uint32(123), -} - -// MaxQemuVCPUs returns the maximum number of vCPUs supported -func MaxQemuVCPUs() uint32 { - return gicList[defaultGICVersion] -} - func newQemuArch(config HypervisorConfig) (qemuArch, error) { machineType := config.HypervisorMachineType if machineType == "" { diff --git a/src/runtime/virtcontainers/qemu_arm64_test.go b/src/runtime/virtcontainers/qemu_arm64_test.go index 2766ae44a..fb43f00b1 100644 --- a/src/runtime/virtcontainers/qemu_arm64_test.go +++ b/src/runtime/virtcontainers/qemu_arm64_test.go @@ -11,6 +11,7 @@ import ( "os" "testing" + "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm" govmmQemu "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm/qemu" "github.com/stretchr/testify/assert" ) @@ -54,10 +55,10 @@ func TestQemuArm64MemoryTopology(t *testing.T) { assert.Equal(expectedMemory, m) } -func TestMaxQemuVCPUs(t *testing.T) { +func TestMaxVCPUs(t *testing.T) { assert := assert.New(t) - vCPUs := MaxQemuVCPUs() + vCPUs := govmm.MaxVCPUs() assert.Equal(uint32(123), vCPUs) } diff --git a/src/runtime/virtcontainers/qemu_ppc64le.go b/src/runtime/virtcontainers/qemu_ppc64le.go index eef94bde9..a1154fe33 100644 --- a/src/runtime/virtcontainers/qemu_ppc64le.go +++ b/src/runtime/virtcontainers/qemu_ppc64le.go @@ -54,11 +54,6 @@ func (q *qemuPPC64le) Logger() *logrus.Entry { return hvLogger.WithField("subsystem", "qemuPPC64le") } -// MaxQemuVCPUs returns the maximum number of vCPUs supported -func MaxQemuVCPUs() uint32 { - return uint32(128) -} - func newQemuArch(config HypervisorConfig) (qemuArch, error) { machineType := config.HypervisorMachineType if machineType == "" { diff --git a/src/runtime/virtcontainers/qemu_s390x.go b/src/runtime/virtcontainers/qemu_s390x.go index fa15c759f..131c68385 100644 --- a/src/runtime/virtcontainers/qemu_s390x.go +++ b/src/runtime/virtcontainers/qemu_s390x.go @@ -47,14 +47,6 @@ var supportedQemuMachine = govmmQemu.Machine{ Options: defaultQemuMachineOptions, } -// MaxQemuVCPUs returns the maximum number of vCPUs supported -func MaxQemuVCPUs() uint32 { - // Max number of virtual Cpu defined in qemu. See - // https://github.com/qemu/qemu/blob/80422b00196a7af4c6efb628fae0ad8b644e98af/target/s390x/cpu.h#L55 - // #define S390_MAX_CPUS 248 - return uint32(248) -} - func newQemuArch(config HypervisorConfig) (qemuArch, error) { machineType := config.HypervisorMachineType if machineType == "" { diff --git a/src/runtime/virtcontainers/qemu_test.go b/src/runtime/virtcontainers/qemu_test.go index 5feecaabe..b1f9aab5e 100644 --- a/src/runtime/virtcontainers/qemu_test.go +++ b/src/runtime/virtcontainers/qemu_test.go @@ -12,6 +12,7 @@ import ( "path/filepath" "testing" + "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm" govmmQemu "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm/qemu" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/config" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/persist" @@ -30,7 +31,7 @@ func newQemuConfig() HypervisorConfig { MemorySize: defaultMemSzMiB, DefaultBridges: defaultBridges, BlockDeviceDriver: defaultBlockDriver, - DefaultMaxVCPUs: defaultMaxQemuVCPUs, + DefaultMaxVCPUs: defaultMaxVCPUs, Msize9p: defaultMsize9p, } } @@ -54,7 +55,7 @@ func testQemuKernelParameters(t *testing.T, kernelParams []Param, expected strin } func TestQemuKernelParameters(t *testing.T) { - expectedOut := fmt.Sprintf("panic=1 nr_cpus=%d foo=foo bar=bar", MaxQemuVCPUs()) + expectedOut := fmt.Sprintf("panic=1 nr_cpus=%d foo=foo bar=bar", govmm.MaxVCPUs()) params := []Param{ { Key: "foo", From 10ae05914c0e01a5988bdd2b8139c204fb16c304 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Wed, 24 Nov 2021 19:10:53 +0100 Subject: [PATCH 11/71] virtcontainers: Move guest protection definitions to hypervisor.go They're not QEMU specific, other VMMs may implement support for it. Signed-off-by: Samuel Ortiz --- src/runtime/virtcontainers/hypervisor.go | 51 +++++++++++++++++++ .../virtcontainers/hypervisor_amd64.go | 8 +++ src/runtime/virtcontainers/qemu_amd64.go | 6 --- src/runtime/virtcontainers/qemu_arch_base.go | 51 ------------------- 4 files changed, 59 insertions(+), 57 deletions(-) diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index 140f0b006..abea7e58f 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -908,6 +908,57 @@ func generateVMSocket(id string, vmStogarePath string) (interface{}, error) { }, nil } +// Kind of guest protection +type guestProtection uint8 + +const ( + noneProtection guestProtection = iota + + //Intel Trust Domain Extensions + //https://software.intel.com/content/www/us/en/develop/articles/intel-trust-domain-extensions.html + // Exclude from lint checking for it won't be used on arm64 code + tdxProtection + + // AMD Secure Encrypted Virtualization + // https://developer.amd.com/sev/ + // Exclude from lint checking for it won't be used on arm64 code + sevProtection + + // IBM POWER 9 Protected Execution Facility + // https://www.kernel.org/doc/html/latest/powerpc/ultravisor.html + // Exclude from lint checking for it won't be used on arm64 code + pefProtection + + // IBM Secure Execution (IBM Z & LinuxONE) + // https://www.kernel.org/doc/html/latest/virt/kvm/s390-pv.html + // Exclude from lint checking for it won't be used on arm64 code + seProtection +) + +var guestProtectionStr = [...]string{ + noneProtection: "none", + pefProtection: "pef", + seProtection: "se", + sevProtection: "sev", + tdxProtection: "tdx", +} + +func (gp guestProtection) String() string { + return guestProtectionStr[gp] +} + +func genericAvailableGuestProtections() (protections []string) { + return +} + +func AvailableGuestProtections() (protections []string) { + gp, err := availableGuestProtection() + if err != nil || gp == noneProtection { + return genericAvailableGuestProtections() + } + return []string{gp.String()} +} + // hypervisor is the virtcontainers hypervisor interface. // The default hypervisor implementation is Qemu. type Hypervisor interface { diff --git a/src/runtime/virtcontainers/hypervisor_amd64.go b/src/runtime/virtcontainers/hypervisor_amd64.go index 97a30ea01..38313d05b 100644 --- a/src/runtime/virtcontainers/hypervisor_amd64.go +++ b/src/runtime/virtcontainers/hypervisor_amd64.go @@ -7,6 +7,14 @@ package virtcontainers import "os" +const ( + tdxSysFirmwareDir = "/sys/firmware/tdx_seam/" + + tdxCPUFlag = "tdx" + + sevKvmParameterPath = "/sys/module/kvm_amd/parameters/sev" +) + // Implementation of this function is architecture specific func availableGuestProtection() (guestProtection, error) { flags, err := CPUFlags(procCPUInfo) diff --git a/src/runtime/virtcontainers/qemu_amd64.go b/src/runtime/virtcontainers/qemu_amd64.go index fe0650dbc..ba8aeeffd 100644 --- a/src/runtime/virtcontainers/qemu_amd64.go +++ b/src/runtime/virtcontainers/qemu_amd64.go @@ -36,12 +36,6 @@ const ( defaultQemuMachineOptions = "accel=kvm,kernel_irqchip=on" qmpMigrationWaitTimeout = 5 * time.Second - - tdxSysFirmwareDir = "/sys/firmware/tdx_seam/" - - tdxCPUFlag = "tdx" - - sevKvmParameterPath = "/sys/module/kvm_amd/parameters/sev" ) var qemuPaths = map[string]string{ diff --git a/src/runtime/virtcontainers/qemu_arch_base.go b/src/runtime/virtcontainers/qemu_arch_base.go index e800199eb..bbed76e2a 100644 --- a/src/runtime/virtcontainers/qemu_arch_base.go +++ b/src/runtime/virtcontainers/qemu_arch_base.go @@ -150,57 +150,6 @@ type qemuArch interface { appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string) ([]govmmQemu.Device, string, error) } -// Kind of guest protection -type guestProtection uint8 - -const ( - noneProtection guestProtection = iota - - //Intel Trust Domain Extensions - //https://software.intel.com/content/www/us/en/develop/articles/intel-trust-domain-extensions.html - // Exclude from lint checking for it won't be used on arm64 code - tdxProtection - - // AMD Secure Encrypted Virtualization - // https://developer.amd.com/sev/ - // Exclude from lint checking for it won't be used on arm64 code - sevProtection - - // IBM POWER 9 Protected Execution Facility - // https://www.kernel.org/doc/html/latest/powerpc/ultravisor.html - // Exclude from lint checking for it won't be used on arm64 code - pefProtection - - // IBM Secure Execution (IBM Z & LinuxONE) - // https://www.kernel.org/doc/html/latest/virt/kvm/s390-pv.html - // Exclude from lint checking for it won't be used on arm64 code - seProtection -) - -var guestProtectionStr = [...]string{ - noneProtection: "none", - pefProtection: "pef", - seProtection: "se", - sevProtection: "sev", - tdxProtection: "tdx", -} - -func (gp guestProtection) String() string { - return guestProtectionStr[gp] -} - -func genericAvailableGuestProtections() (protections []string) { - return -} - -func AvailableGuestProtections() (protections []string) { - gp, err := availableGuestProtection() - if err != nil || gp == noneProtection { - return genericAvailableGuestProtections() - } - return []string{gp.String()} -} - type qemuArchBase struct { qemuExePath string qemuMachine govmmQemu.Machine From c91035d0e139e4c5dcad02bc6bf5196f56880f4b Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Wed, 24 Nov 2021 19:16:17 +0100 Subject: [PATCH 12/71] virtcontainers: Move non QEMU specific constants to hypervisor.go Hotplugging errors and 9pfs size are not particularily QEMU specific. Signed-off-by: Samuel Ortiz --- src/runtime/virtcontainers/hypervisor.go | 7 ++++++- src/runtime/virtcontainers/qemu.go | 2 -- src/runtime/virtcontainers/qemu_arch_base.go | 1 - 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index abea7e58f..67c725442 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -14,6 +14,8 @@ import ( "strconv" "strings" + "github.com/pkg/errors" + "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm" hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/config" @@ -70,10 +72,13 @@ const ( // MinHypervisorMemory is the minimum memory required for a VM. MinHypervisorMemory = 256 + + defaultMsize9p = 8192 ) var ( - hvLogger = logrus.WithField("source", "virtcontainers/hypervisor") + hvLogger = logrus.WithField("source", "virtcontainers/hypervisor") + noGuestMemHotplugErr error = errors.New("guest memory hotplug not supported") ) // In some architectures the maximum number of vCPUs depends on the number of physical cores. diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 055281cb7..b020cdb94 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -128,8 +128,6 @@ const ( qemuStopSandboxTimeoutSecs = 15 ) -var noGuestMemHotplugErr error = errors.New("guest memory hotplug not supported") - // agnostic list of kernel parameters var defaultKernelParameters = []Param{ {"panic", "1"}, diff --git a/src/runtime/virtcontainers/qemu_arch_base.go b/src/runtime/virtcontainers/qemu_arch_base.go index bbed76e2a..b9daab22e 100644 --- a/src/runtime/virtcontainers/qemu_arch_base.go +++ b/src/runtime/virtcontainers/qemu_arch_base.go @@ -175,7 +175,6 @@ const ( defaultBridgeBus = "pcie.0" defaultPCBridgeBus = "pci.0" maxDevIDSize = 31 - defaultMsize9p = 8192 pcieRootPortPrefix = "rp" ) From fa0e9dc6b1ad057ab63a8b4efae4c610d5d2befb Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Wed, 24 Nov 2021 19:22:57 +0100 Subject: [PATCH 13/71] virtcontainers: Make all Linux VMMs only build on Linux Some of them (e.g. QEMU) can run on other OSes (e.g. Darwin) but the current virtcontainers implementation is Linux specific. Signed-off-by: Samuel Ortiz --- src/runtime/virtcontainers/acrn.go | 3 +++ src/runtime/virtcontainers/acrn_arch_base.go | 3 +++ src/runtime/virtcontainers/acrn_test.go | 3 +++ src/runtime/virtcontainers/clh.go | 3 +++ src/runtime/virtcontainers/clh_test.go | 3 +++ src/runtime/virtcontainers/fc.go | 3 +++ src/runtime/virtcontainers/fc_metrics.go | 3 +++ src/runtime/virtcontainers/fc_test.go | 3 +++ src/runtime/virtcontainers/qemu.go | 3 +++ src/runtime/virtcontainers/qemu_amd64.go | 3 +++ src/runtime/virtcontainers/qemu_amd64_test.go | 3 +++ src/runtime/virtcontainers/qemu_arch_base.go | 3 +++ src/runtime/virtcontainers/qemu_arch_base_test.go | 3 +++ src/runtime/virtcontainers/qemu_arm64.go | 3 +++ src/runtime/virtcontainers/qemu_arm64_test.go | 3 +++ src/runtime/virtcontainers/qemu_ppc64le.go | 3 +++ src/runtime/virtcontainers/qemu_ppc64le_test.go | 3 +++ src/runtime/virtcontainers/qemu_s390x.go | 3 +++ src/runtime/virtcontainers/qemu_s390x_test.go | 3 +++ src/runtime/virtcontainers/qemu_test.go | 3 +++ 20 files changed, 60 insertions(+) diff --git a/src/runtime/virtcontainers/acrn.go b/src/runtime/virtcontainers/acrn.go index 244b78d66..5e709da42 100644 --- a/src/runtime/virtcontainers/acrn.go +++ b/src/runtime/virtcontainers/acrn.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux +// // Copyright (c) 2019 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/acrn_arch_base.go b/src/runtime/virtcontainers/acrn_arch_base.go index 5d258c42c..52f6ea339 100644 --- a/src/runtime/virtcontainers/acrn_arch_base.go +++ b/src/runtime/virtcontainers/acrn_arch_base.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux +// // Copyright (c) 2019 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/acrn_test.go b/src/runtime/virtcontainers/acrn_test.go index 45d85fb4e..974b1fe1b 100644 --- a/src/runtime/virtcontainers/acrn_test.go +++ b/src/runtime/virtcontainers/acrn_test.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux +// // Copyright (c) 2019 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index f08eba67f..3501a1ae1 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux +// // Copyright (c) 2019 Ericsson Eurolab Deutschland GmbH // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/clh_test.go b/src/runtime/virtcontainers/clh_test.go index d350dd9e9..622e2f6fd 100644 --- a/src/runtime/virtcontainers/clh_test.go +++ b/src/runtime/virtcontainers/clh_test.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux +// // Copyright (c) 2019 Ericsson Eurolab Deutschland G.m.b.H. // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/fc.go b/src/runtime/virtcontainers/fc.go index 34a44692d..39985907f 100644 --- a/src/runtime/virtcontainers/fc.go +++ b/src/runtime/virtcontainers/fc.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux +// // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/fc_metrics.go b/src/runtime/virtcontainers/fc_metrics.go index f97501fc7..814d81936 100644 --- a/src/runtime/virtcontainers/fc_metrics.go +++ b/src/runtime/virtcontainers/fc_metrics.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux +// // Copyright (c) 2020 Ant Group // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/fc_test.go b/src/runtime/virtcontainers/fc_test.go index 215dfdb74..02d9d7144 100644 --- a/src/runtime/virtcontainers/fc_test.go +++ b/src/runtime/virtcontainers/fc_test.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux +// // Copyright (c) 2019 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index b020cdb94..26801825d 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux +// // Copyright (c) 2016 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/qemu_amd64.go b/src/runtime/virtcontainers/qemu_amd64.go index ba8aeeffd..f3ae89b59 100644 --- a/src/runtime/virtcontainers/qemu_amd64.go +++ b/src/runtime/virtcontainers/qemu_amd64.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux +// // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/qemu_amd64_test.go b/src/runtime/virtcontainers/qemu_amd64_test.go index a992b1497..1071a7051 100644 --- a/src/runtime/virtcontainers/qemu_amd64_test.go +++ b/src/runtime/virtcontainers/qemu_amd64_test.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux +// // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/qemu_arch_base.go b/src/runtime/virtcontainers/qemu_arch_base.go index b9daab22e..b65cb61e3 100644 --- a/src/runtime/virtcontainers/qemu_arch_base.go +++ b/src/runtime/virtcontainers/qemu_arch_base.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux +// // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/qemu_arch_base_test.go b/src/runtime/virtcontainers/qemu_arch_base_test.go index 5611c9539..f8d07bc58 100644 --- a/src/runtime/virtcontainers/qemu_arch_base_test.go +++ b/src/runtime/virtcontainers/qemu_arch_base_test.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux +// // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/qemu_arm64.go b/src/runtime/virtcontainers/qemu_arm64.go index 250e709f6..fb0e7a9d7 100644 --- a/src/runtime/virtcontainers/qemu_arm64.go +++ b/src/runtime/virtcontainers/qemu_arm64.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux +// // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/qemu_arm64_test.go b/src/runtime/virtcontainers/qemu_arm64_test.go index fb43f00b1..893c46e66 100644 --- a/src/runtime/virtcontainers/qemu_arm64_test.go +++ b/src/runtime/virtcontainers/qemu_arm64_test.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux +// // Copyright (c) 2018 IBM // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/qemu_ppc64le.go b/src/runtime/virtcontainers/qemu_ppc64le.go index a1154fe33..f78ed2429 100644 --- a/src/runtime/virtcontainers/qemu_ppc64le.go +++ b/src/runtime/virtcontainers/qemu_ppc64le.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux +// // Copyright (c) 2018 IBM // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/qemu_ppc64le_test.go b/src/runtime/virtcontainers/qemu_ppc64le_test.go index 978e1b0ec..6c046b620 100644 --- a/src/runtime/virtcontainers/qemu_ppc64le_test.go +++ b/src/runtime/virtcontainers/qemu_ppc64le_test.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux +// // Copyright (c) 2018 IBM // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/qemu_s390x.go b/src/runtime/virtcontainers/qemu_s390x.go index 131c68385..aeddb982a 100644 --- a/src/runtime/virtcontainers/qemu_s390x.go +++ b/src/runtime/virtcontainers/qemu_s390x.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux +// // Copyright (c) 2018 IBM // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/qemu_s390x_test.go b/src/runtime/virtcontainers/qemu_s390x_test.go index f207a0c91..07eaa8517 100644 --- a/src/runtime/virtcontainers/qemu_s390x_test.go +++ b/src/runtime/virtcontainers/qemu_s390x_test.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux +// // Copyright (c) 2018 IBM // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/qemu_test.go b/src/runtime/virtcontainers/qemu_test.go index b1f9aab5e..985c15f1c 100644 --- a/src/runtime/virtcontainers/qemu_test.go +++ b/src/runtime/virtcontainers/qemu_test.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux +// // Copyright (c) 2016 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 From 26b3f0017c6f4756cd5280949695104cd6e7424c Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Thu, 25 Nov 2021 08:55:08 +0100 Subject: [PATCH 14/71] virtcontainers: Split hypervisor into Linux and OS agnostic bits Keep all the OS agnostic bits in the hypervisor.go and hypervisor_ARCH.go files. Fixes #3614 Signed-off-by: Eric Ernst Signed-off-by: Samuel Ortiz --- src/runtime/virtcontainers/hypervisor.go | 33 -------------- .../virtcontainers/hypervisor_linux.go | 45 +++++++++++++++++++ ...sor_amd64.go => hypervisor_linux_amd64.go} | 0 ...sor_arm64.go => hypervisor_linux_arm64.go} | 2 +- ...test.go => hypervisor_linux_arm64_test.go} | 0 5 files changed, 46 insertions(+), 34 deletions(-) create mode 100644 src/runtime/virtcontainers/hypervisor_linux.go rename src/runtime/virtcontainers/{hypervisor_amd64.go => hypervisor_linux_amd64.go} (100%) rename src/runtime/virtcontainers/{hypervisor_arm64.go => hypervisor_linux_arm64.go} (76%) rename src/runtime/virtcontainers/{hypervisor_arm64_test.go => hypervisor_linux_arm64_test.go} (100%) diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index 67c725442..713280be7 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -20,7 +20,6 @@ import ( hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/config" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" - "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils" "github.com/sirupsen/logrus" ) @@ -198,25 +197,6 @@ func (hType *HypervisorType) String() string { } } -// NewHypervisor returns an hypervisor from a hypervisor type. -func NewHypervisor(hType HypervisorType) (Hypervisor, error) { - - switch hType { - case QemuHypervisor: - return &qemu{}, nil - case FirecrackerHypervisor: - return &firecracker{}, nil - case AcrnHypervisor: - return &Acrn{}, nil - case ClhHypervisor: - return &cloudHypervisor{}, nil - case MockHypervisor: - return &mockHypervisor{}, nil - default: - return nil, fmt.Errorf("Unknown hypervisor type %s", hType) - } -} - // GetHypervisorSocketTemplate returns the full "template" path to the // hypervisor socket. If the specified hypervisor doesn't use a socket, // an empty string is returned. @@ -900,19 +880,6 @@ func GetHypervisorPid(h Hypervisor) int { return pids[0] } -func generateVMSocket(id string, vmStogarePath string) (interface{}, error) { - vhostFd, contextID, err := utils.FindContextID() - if err != nil { - return nil, err - } - - return types.VSock{ - VhostFd: vhostFd, - ContextID: contextID, - Port: uint32(vSockPort), - }, nil -} - // Kind of guest protection type guestProtection uint8 diff --git a/src/runtime/virtcontainers/hypervisor_linux.go b/src/runtime/virtcontainers/hypervisor_linux.go new file mode 100644 index 000000000..1fd0375b8 --- /dev/null +++ b/src/runtime/virtcontainers/hypervisor_linux.go @@ -0,0 +1,45 @@ +// Copyright (c) 2021 Apple Inc. +// +// SPDX-License-Identifier: Apache-2.0 +// + +package virtcontainers + +import ( + "fmt" + + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils" +) + +func generateVMSocket(id string, vmStogarePath string) (interface{}, error) { + vhostFd, contextID, err := utils.FindContextID() + if err != nil { + return nil, err + } + + return types.VSock{ + VhostFd: vhostFd, + ContextID: contextID, + Port: uint32(vSockPort), + }, nil +} + +// NewHypervisor returns an hypervisor from a hypervisor type. +func NewHypervisor(hType HypervisorType) (Hypervisor, error) { + + switch hType { + case QemuHypervisor: + return &qemu{}, nil + case FirecrackerHypervisor: + return &firecracker{}, nil + case AcrnHypervisor: + return &Acrn{}, nil + case ClhHypervisor: + return &cloudHypervisor{}, nil + case MockHypervisor: + return &mockHypervisor{}, nil + default: + return nil, fmt.Errorf("Unknown hypervisor type %s", hType) + } +} diff --git a/src/runtime/virtcontainers/hypervisor_amd64.go b/src/runtime/virtcontainers/hypervisor_linux_amd64.go similarity index 100% rename from src/runtime/virtcontainers/hypervisor_amd64.go rename to src/runtime/virtcontainers/hypervisor_linux_amd64.go diff --git a/src/runtime/virtcontainers/hypervisor_arm64.go b/src/runtime/virtcontainers/hypervisor_linux_arm64.go similarity index 76% rename from src/runtime/virtcontainers/hypervisor_arm64.go rename to src/runtime/virtcontainers/hypervisor_linux_arm64.go index cdf38165a..9c6a13ea9 100644 --- a/src/runtime/virtcontainers/hypervisor_arm64.go +++ b/src/runtime/virtcontainers/hypervisor_linux_arm64.go @@ -4,7 +4,7 @@ package virtcontainers -//Returns pefProtection if the firmware directory exists +// Guest protection is not supported on ARM64. func availableGuestProtection() (guestProtection, error) { return noneProtection, nil } diff --git a/src/runtime/virtcontainers/hypervisor_arm64_test.go b/src/runtime/virtcontainers/hypervisor_linux_arm64_test.go similarity index 100% rename from src/runtime/virtcontainers/hypervisor_arm64_test.go rename to src/runtime/virtcontainers/hypervisor_linux_arm64_test.go From 94b831ebf8ae33592076cbb37e1641ca757d4575 Mon Sep 17 00:00:00 2001 From: zhanghj Date: Sat, 19 Feb 2022 16:36:31 +0800 Subject: [PATCH 15/71] virtcontainers: remove temp dir created for vsock in test code remove temp dir generated by mock.GenerateKataMockHybridVSock(). Fixes: #3186 Signed-off-by: zhanghj --- src/runtime/virtcontainers/api_test.go | 4 ++-- .../virtcontainers/factory/factory_test.go | 7 ++++--- .../factory/template/template_test.go | 3 ++- src/runtime/virtcontainers/kata_agent_test.go | 6 ++++++ src/runtime/virtcontainers/pkg/mock/mock.go | 15 ++++++++++++++- 5 files changed, 28 insertions(+), 7 deletions(-) diff --git a/src/runtime/virtcontainers/api_test.go b/src/runtime/virtcontainers/api_test.go index 3366041c6..97ee76144 100644 --- a/src/runtime/virtcontainers/api_test.go +++ b/src/runtime/virtcontainers/api_test.go @@ -170,10 +170,10 @@ func TestCreateSandboxKataAgentSuccessful(t *testing.T) { url, err := mock.GenerateKataMockHybridVSock() assert.NoError(err) - MockHybridVSockPath = url + defer mock.RemoveKataMockHybridVSock(url) hybridVSockTTRPCMock := mock.HybridVSockTTRPCMock{} - err = hybridVSockTTRPCMock.Start(fmt.Sprintf("mock://%s", url)) + err = hybridVSockTTRPCMock.Start(url) assert.NoError(err) defer hybridVSockTTRPCMock.Stop() diff --git a/src/runtime/virtcontainers/factory/factory_test.go b/src/runtime/virtcontainers/factory/factory_test.go index 9cd5568b7..71af52d71 100644 --- a/src/runtime/virtcontainers/factory/factory_test.go +++ b/src/runtime/virtcontainers/factory/factory_test.go @@ -7,7 +7,6 @@ package factory import ( "context" - "fmt" "os" "testing" @@ -61,10 +60,11 @@ func TestNewFactory(t *testing.T) { url, err := mock.GenerateKataMockHybridVSock() assert.NoError(err) + defer mock.RemoveKataMockHybridVSock(url) vc.MockHybridVSockPath = url hybridVSockTTRPCMock := mock.HybridVSockTTRPCMock{} - err = hybridVSockTTRPCMock.Start(fmt.Sprintf("mock://%s", url)) + err = hybridVSockTTRPCMock.Start(url) assert.NoError(err) defer hybridVSockTTRPCMock.Stop() @@ -179,10 +179,11 @@ func TestFactoryGetVM(t *testing.T) { url, err := mock.GenerateKataMockHybridVSock() assert.NoError(err) + defer mock.RemoveKataMockHybridVSock(url) vc.MockHybridVSockPath = url hybridVSockTTRPCMock := mock.HybridVSockTTRPCMock{} - err = hybridVSockTTRPCMock.Start(fmt.Sprintf("mock://%s", url)) + err = hybridVSockTTRPCMock.Start(url) assert.NoError(err) defer hybridVSockTTRPCMock.Stop() diff --git a/src/runtime/virtcontainers/factory/template/template_test.go b/src/runtime/virtcontainers/factory/template/template_test.go index 9c8d5805a..55c7bc596 100644 --- a/src/runtime/virtcontainers/factory/template/template_test.go +++ b/src/runtime/virtcontainers/factory/template/template_test.go @@ -51,10 +51,11 @@ func TestTemplateFactory(t *testing.T) { url, err := mock.GenerateKataMockHybridVSock() assert.NoError(err) + defer mock.RemoveKataMockHybridVSock(url) vc.MockHybridVSockPath = url hybridVSockTTRPCMock := mock.HybridVSockTTRPCMock{} - err = hybridVSockTTRPCMock.Start(fmt.Sprintf("mock://%s", url)) + err = hybridVSockTTRPCMock.Start(url) assert.NoError(err) defer hybridVSockTTRPCMock.Stop() diff --git a/src/runtime/virtcontainers/kata_agent_test.go b/src/runtime/virtcontainers/kata_agent_test.go index 6475e8b06..13dd9e1c9 100644 --- a/src/runtime/virtcontainers/kata_agent_test.go +++ b/src/runtime/virtcontainers/kata_agent_test.go @@ -50,6 +50,7 @@ func TestKataAgentConnect(t *testing.T) { url, err := mock.GenerateKataMockHybridVSock() assert.NoError(err) + defer mock.RemoveKataMockHybridVSock(url) hybridVSockTTRPCMock := mock.HybridVSockTTRPCMock{} err = hybridVSockTTRPCMock.Start(url) @@ -73,6 +74,7 @@ func TestKataAgentDisconnect(t *testing.T) { url, err := mock.GenerateKataMockHybridVSock() assert.NoError(err) + defer mock.RemoveKataMockHybridVSock(url) hybridVSockTTRPCMock := mock.HybridVSockTTRPCMock{} err = hybridVSockTTRPCMock.Start(url) @@ -110,6 +112,7 @@ func TestKataAgentSendReq(t *testing.T) { url, err := mock.GenerateKataMockHybridVSock() assert.NoError(err) + defer mock.RemoveKataMockHybridVSock(url) hybridVSockTTRPCMock := mock.HybridVSockTTRPCMock{} err = hybridVSockTTRPCMock.Start(url) @@ -843,6 +846,7 @@ func TestAgentCreateContainer(t *testing.T) { url, err := mock.GenerateKataMockHybridVSock() assert.NoError(err) + defer mock.RemoveKataMockHybridVSock(url) hybridVSockTTRPCMock := mock.HybridVSockTTRPCMock{} err = hybridVSockTTRPCMock.Start(url) @@ -873,6 +877,7 @@ func TestAgentNetworkOperation(t *testing.T) { url, err := mock.GenerateKataMockHybridVSock() assert.NoError(err) + defer mock.RemoveKataMockHybridVSock(url) hybridVSockTTRPCMock := mock.HybridVSockTTRPCMock{} err = hybridVSockTTRPCMock.Start(url) @@ -921,6 +926,7 @@ func TestKataCopyFile(t *testing.T) { url, err := mock.GenerateKataMockHybridVSock() assert.NoError(err) + defer mock.RemoveKataMockHybridVSock(url) hybridVSockTTRPCMock := mock.HybridVSockTTRPCMock{} err = hybridVSockTTRPCMock.Start(url) diff --git a/src/runtime/virtcontainers/pkg/mock/mock.go b/src/runtime/virtcontainers/pkg/mock/mock.go index 4efdcb0ad..de6f7aec3 100644 --- a/src/runtime/virtcontainers/pkg/mock/mock.go +++ b/src/runtime/virtcontainers/pkg/mock/mock.go @@ -16,9 +16,13 @@ import ( gpb "github.com/gogo/protobuf/types" aTypes "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols" pb "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols/grpc" + "path" + "strings" ) -var testKataMockHybridVSockURLTempl = "mock://%s/kata-mock-hybrid-vsock.sock" +const VSockPrefix = "mock://" + +var testKataMockHybridVSockURLTempl = VSockPrefix + "%s/kata-mock-hybrid-vsock.sock" func GenerateKataMockHybridVSock() (string, error) { dir, err := os.MkdirTemp("", "kata-mock-hybrid-vsock-test") @@ -29,6 +33,15 @@ func GenerateKataMockHybridVSock() (string, error) { return fmt.Sprintf(testKataMockHybridVSockURLTempl, dir), nil } +func RemoveKataMockHybridVSock(sockAddress string) error { + if !strings.HasPrefix(sockAddress, VSockPrefix) { + return fmt.Errorf("Invalid socket address: %s", sockAddress) + } + + sockPath := strings.TrimPrefix(sockAddress, VSockPrefix) + return os.RemoveAll(path.Dir(sockPath)) +} + // HybridVSockTTRPCMock is the ttrpc-based mock hybrid-vsock backend implementation type HybridVSockTTRPCMock struct { // HybridVSockTTRPCMockImp is the structure implementing From 9123fc098dc9e09761dda913316fff3602d8c706 Mon Sep 17 00:00:00 2001 From: Jakob Naucke Date: Fri, 18 Feb 2022 15:00:08 +0100 Subject: [PATCH 16/71] kata-deploy: Simplify Dockerfile and support s390x The kata-deploy Dockerfile is based on CentOS 7, which has no s390x support. Add an `IMAGE` argument to specify the registry, which still defaults to CentOS, but e.g. ClefOS can be selected instead. Other x86_64 assumptions are also removed. Other general simplicifations are made. This does not address the more general issue of #3723 -- what we're doing here does not seem to be working with systemd >= something between 235-237. Fixes: #3722 Signed-off-by: Jakob Naucke --- tools/packaging/kata-deploy/Dockerfile | 42 ++++++++------------------ 1 file changed, 13 insertions(+), 29 deletions(-) diff --git a/tools/packaging/kata-deploy/Dockerfile b/tools/packaging/kata-deploy/Dockerfile index e89d24292..94533a906 100644 --- a/tools/packaging/kata-deploy/Dockerfile +++ b/tools/packaging/kata-deploy/Dockerfile @@ -1,27 +1,10 @@ -# Copyright Intel Corporation. +# Copyright Intel Corporation, 2022 IBM Corp. # # SPDX-License-Identifier: Apache-2.0 -FROM registry.centos.org/centos:7 AS base - -ENV container docker - -RUN (cd /lib/systemd/system/sysinit.target.wants/ && for i in *; do [ "$i" = systemd-tmpfiles-setup.service ] || rm -f "$i"; done); \ -rm -f /lib/systemd/system/multi-user.target.wants/*; \ -rm -f /etc/systemd/system/*.wants/*; \ -rm -f /lib/systemd/system/local-fs.target.wants/*; \ -rm -f /lib/systemd/system/sockets.target.wants/*udev*; \ -rm -f /lib/systemd/system/sockets.target.wants/*initctl*; \ -rm -f /lib/systemd/system/basic.target.wants/*; \ -rm -f /lib/systemd/system/anaconda.target.wants/*; - -VOLUME [ "/sys/fs/cgroup" ] - -CMD ["/usr/sbin/init"] - -FROM base - -ARG KUBE_ARCH=amd64 +# Specify alternative base image, e.g. clefos for s390x +ARG IMAGE +FROM ${IMAGE:-registry.centos.org/centos}:7 ARG KATA_ARTIFACTS=./kata-static.tar.xz ARG DESTINATION=/opt/kata-artifacts @@ -29,17 +12,18 @@ COPY ${KATA_ARTIFACTS} ${WORKDIR} RUN \ yum -y update && \ -yum install -y epel-release && \ -yum install -y bzip2 jq && \ +yum -y install xz && \ yum clean all && \ mkdir -p ${DESTINATION} && \ -tar xvf ${KATA_ARTIFACTS} -C ${DESTINATION}/ && \ -chown -R root:root ${DESTINATION}/ +tar xvf ${KATA_ARTIFACTS} -C ${DESTINATION} +# hadolint will deny echo -e, heredocs don't work in Dockerfiles, shell substitution doesn't work with $'...' RUN \ -curl -Lso /bin/kubectl "https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/${KUBE_ARCH}/kubectl" && \ -chmod +x /bin/kubectl +echo "[kubernetes]" >> /etc/yum.repos.d/kubernetes.repo && \ +echo "name=Kubernetes" >> /etc/yum.repos.d/kubernetes.repo && \ +echo "baseurl=https://packages.cloud.google.com/yum/repos/kubernetes-el7-$(uname -m)" >> /etc/yum.repos.d/kubernetes.repo && \ +echo "gpgkey=https://packages.cloud.google.com/yum/doc/yum-key.gpg https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg" >> /etc/yum.repos.d/kubernetes.repo && \ +yum -y install kubectl && \ +yum clean all COPY scripts ${DESTINATION}/scripts -RUN \ -ln -s ${DESTINATION}/scripts/kata-deploy.sh /usr/bin/kata-deploy From 1d68a08f4bc03788fd5771f7b64a70e793d189af Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Mon, 21 Feb 2022 17:01:09 +0000 Subject: [PATCH 17/71] docs: Update contributing link This PR updates the contributing documentation link to the one that is using kata 2.0 Fixes #3740 Signed-off-by: Gabriela Cervantes --- CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 8a3af744a..c14210fdb 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,4 +2,4 @@ ## This repo is part of [Kata Containers](https://katacontainers.io) -For details on how to contribute to the Kata Containers project, please see the main [contributing document](https://github.com/kata-containers/community/blob/master/CONTRIBUTING.md). \ No newline at end of file +For details on how to contribute to the Kata Containers project, please see the main [contributing document](https://github.com/kata-containers/community/blob/main/CONTRIBUTING.md). From f6fc1621f7bf915ddd8d0e44dd08e29a9ff723fd Mon Sep 17 00:00:00 2001 From: bin Date: Mon, 21 Feb 2022 17:25:00 +0800 Subject: [PATCH 18/71] shim: log events for CRI-O CRI-O start shim process without setting TTRPC_ADDRESS, that the forwarding events goroutine will get errors. For CRI-O runtime, we can log the events to log file. Fixes: #3733 Signed-off-by: bin --- .../pkg/containerd-shim-v2/event_forwarder.go | 88 +++++++++++++++++++ .../event_forwarder_test.go | 45 ++++++++++ src/runtime/pkg/containerd-shim-v2/service.go | 19 +--- src/runtime/virtcontainers/container.go | 1 + 4 files changed, 136 insertions(+), 17 deletions(-) create mode 100644 src/runtime/pkg/containerd-shim-v2/event_forwarder.go create mode 100644 src/runtime/pkg/containerd-shim-v2/event_forwarder_test.go diff --git a/src/runtime/pkg/containerd-shim-v2/event_forwarder.go b/src/runtime/pkg/containerd-shim-v2/event_forwarder.go new file mode 100644 index 000000000..a77645a1f --- /dev/null +++ b/src/runtime/pkg/containerd-shim-v2/event_forwarder.go @@ -0,0 +1,88 @@ +// Copyright (c) 2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +package containerdshim + +import ( + "context" + "os" + "time" + + "github.com/containerd/containerd/events" +) + +type forwarderType string + +const ( + forwarderTypeLog forwarderType = "log" + forwarderTypeContainerd forwarderType = "containerd" + + // A time span used to wait for publish a containerd event, + // once it costs a longer time than timeOut, it will be canceld. + timeOut = 5 * time.Second + + // ttrpc address passed from container runtime. + // For now containerd will pass the address, and CRI-O will not + ttrpcAddressEnv = "TTRPC_ADDRESS" +) + +type eventsForwarder interface { + forward() + forwarderType() forwarderType +} + +type logForwarder struct { + s *service +} + +func (lf *logForwarder) forward() { + for e := range lf.s.events { + shimLog.WithField("topic", getTopic(e)).Infof("post event: %+v", e) + } +} + +func (lf *logForwarder) forwarderType() forwarderType { + return forwarderTypeLog +} + +type containerdForwarder struct { + s *service + ctx context.Context + publisher events.Publisher +} + +func (cf *containerdForwarder) forward() { + for e := range cf.s.events { + ctx, cancel := context.WithTimeout(cf.ctx, timeOut) + err := cf.publisher.Publish(ctx, getTopic(e), e) + cancel() + if err != nil { + shimLog.WithError(err).Error("post event") + } + } +} + +func (cf *containerdForwarder) forwarderType() forwarderType { + return forwarderTypeContainerd +} + +func (s *service) newEventsForwarder(ctx context.Context, publisher events.Publisher) eventsForwarder { + var forwarder eventsForwarder + ttrpcAddress := os.Getenv(ttrpcAddressEnv) + if ttrpcAddress == "" { + // non containerd will use log forwarder to write events to log + forwarder = &logForwarder{ + s: s, + } + } else { + forwarder = &containerdForwarder{ + s: s, + ctx: ctx, + publisher: publisher, + } + } + + return forwarder +} diff --git a/src/runtime/pkg/containerd-shim-v2/event_forwarder_test.go b/src/runtime/pkg/containerd-shim-v2/event_forwarder_test.go new file mode 100644 index 000000000..979ce5f1e --- /dev/null +++ b/src/runtime/pkg/containerd-shim-v2/event_forwarder_test.go @@ -0,0 +1,45 @@ +// Copyright (c) 2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +package containerdshim + +import ( + "context" + "os" + "testing" + + "github.com/containerd/containerd/events" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/vcmock" + + "github.com/stretchr/testify/assert" +) + +func TestNewEventsForwarder(t *testing.T) { + assert := assert.New(t) + + sandbox := &vcmock.Sandbox{ + MockID: testSandboxID, + } + + s := &service{ + id: testSandboxID, + sandbox: sandbox, + containers: make(map[string]*container), + } + + // newEventsForwarder will not call publisher to publish events + // so here we can use a nil pointer to test newEventsForwarder + var publisher events.Publisher + + // check log forwarder + forwarder := s.newEventsForwarder(context.Background(), publisher) + assert.Equal(forwarderTypeLog, forwarder.forwarderType()) + + // check containerd forwarder + os.Setenv(ttrpcAddressEnv, "/foo/bar.sock") + defer os.Setenv(ttrpcAddressEnv, "") + forwarder = s.newEventsForwarder(context.Background(), publisher) + assert.Equal(forwarderTypeContainerd, forwarder.forwarderType()) +} diff --git a/src/runtime/pkg/containerd-shim-v2/service.go b/src/runtime/pkg/containerd-shim-v2/service.go index 0cca3ebf8..8e20ae82f 100644 --- a/src/runtime/pkg/containerd-shim-v2/service.go +++ b/src/runtime/pkg/containerd-shim-v2/service.go @@ -17,7 +17,6 @@ import ( eventstypes "github.com/containerd/containerd/api/events" "github.com/containerd/containerd/api/types/task" "github.com/containerd/containerd/errdefs" - "github.com/containerd/containerd/events" "github.com/containerd/containerd/namespaces" cdruntime "github.com/containerd/containerd/runtime" cdshim "github.com/containerd/containerd/runtime/v2/shim" @@ -51,10 +50,6 @@ const ( chSize = 128 exitCode255 = 255 - - // A time span used to wait for publish a containerd event, - // once it costs a longer time than timeOut, it will be canceld. - timeOut = 5 * time.Second ) var ( @@ -100,7 +95,8 @@ func New(ctx context.Context, id string, publisher cdshim.Publisher, shutdown fu go s.processExits() - go s.forward(ctx, publisher) + forwarder := s.newEventsForwarder(ctx, publisher) + go forwarder.forward() return s, nil } @@ -256,17 +252,6 @@ func (s *service) StartShim(ctx context.Context, opts cdshim.StartOpts) (_ strin return address, nil } -func (s *service) forward(ctx context.Context, publisher events.Publisher) { - for e := range s.events { - ctx, cancel := context.WithTimeout(ctx, timeOut) - err := publisher.Publish(ctx, getTopic(e), e) - cancel() - if err != nil { - shimLog.WithError(err).Error("post event") - } - } -} - func (s *service) send(evt interface{}) { // for unit test, it will not initialize s.events if s.events != nil { diff --git a/src/runtime/virtcontainers/container.go b/src/runtime/virtcontainers/container.go index 13ec54855..ee0a38079 100644 --- a/src/runtime/virtcontainers/container.go +++ b/src/runtime/virtcontainers/container.go @@ -356,6 +356,7 @@ func (c *Container) Logger() *logrus.Entry { return virtLog.WithFields(logrus.Fields{ "subsystem": "container", "sandbox": c.sandboxID, + "container": c.id, }) } From 1a3381b096a779bcd3885ef63d91c43109dc562a Mon Sep 17 00:00:00 2001 From: "wangyongchao.bj" Date: Mon, 21 Feb 2022 14:45:29 +0800 Subject: [PATCH 19/71] docs: Developer-Guide build a custom Kata agent with musl The Developer-Guide.md build a custom kata agent with `x86_64-unknown-linux-musl`. The `musl` should be changed by the system arch. The system arch is aarch64, ppc64le and s390x, the musl should be changed. When the arch is ppc64le or s390x, the musl should be replaced by the gnu. Fixes: #3731 Signed-off-by: wangyongchao.bj --- docs/Developer-Guide.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/Developer-Guide.md b/docs/Developer-Guide.md index f4ec4ff7b..8435c23da 100644 --- a/docs/Developer-Guide.md +++ b/docs/Developer-Guide.md @@ -212,11 +212,13 @@ $ sudo systemctl restart systemd-journald > > - You should only do this step if you are testing with the latest version of the agent. -The rust-agent is built with a static linked `musl.` To configure this: +The agent is built with a statically linked `musl.` The default `libc` used is `musl`, but on `ppc64le` and `s390x`, `gnu` should be used. To configure this: ``` -rustup target add x86_64-unknown-linux-musl -sudo ln -s /usr/bin/g++ /bin/musl-g++ +$ export ARCH=$(uname -m) +$ if [ "$ARCH" = "ppc64le" -o "$ARCH" = "s390x" ]; then export LIBC=gnu; else export LIBC=musl; fi +$ [ ${ARCH} == "ppc64le" ] && export ARCH=powerpc64le +$ rustup target add ${ARCH}-unknown-linux-${LIBC} ``` To build the agent: From 160bb621382a53d0ccd4d6287977d144cd71d429 Mon Sep 17 00:00:00 2001 From: Francesco Giudici Date: Wed, 23 Feb 2022 11:17:02 +0100 Subject: [PATCH 20/71] kata-monitor: bump version to 0.3.0 Since kata-monitor now: - relies on fs events *only* to update the sandbox cache - adds CRI meta-data as labels (CRI pod name, namespace and uid) it deserves a version bump. Note that while we could let kata-monitor match the runtime version, kata-monitor will usually work flawlessy with different kata shim releases: so it makes sense to keep kata-monitor version separated. Signed-off-by: Francesco Giudici --- src/runtime/cmd/kata-monitor/main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime/cmd/kata-monitor/main.go b/src/runtime/cmd/kata-monitor/main.go index 4a52671b6..c0001cdc2 100644 --- a/src/runtime/cmd/kata-monitor/main.go +++ b/src/runtime/cmd/kata-monitor/main.go @@ -26,7 +26,7 @@ var logLevel = flag.String("log-level", "info", "Log level of logrus(trace/debug var ( appName = "kata-monitor" // version is the kata monitor version. - version = "0.2.0" + version = "0.3.0" GitCommit = "unknown-commit" ) From 3ac52e81937cf0fe1cbbb4b2cb1b4132e7030877 Mon Sep 17 00:00:00 2001 From: Francesco Giudici Date: Wed, 23 Feb 2022 11:21:06 +0100 Subject: [PATCH 21/71] kata-monitor: fix updating sandbox cache at startup We now rely on fs events only to update the sandbox cache. This is not true anyway for sandboxes already present at kata-monitor startup: we just retrieve the list and add them in the cache only when we get their CRI metadata. If CRI metadata is not available we will never add them to the sandbox cache. Fix this by immediately adding the sandboxes we find at startup time to the sandbox cache. Fixes: #3705 Signed-off-by: Francesco Giudici --- src/runtime/pkg/kata-monitor/monitor.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/runtime/pkg/kata-monitor/monitor.go b/src/runtime/pkg/kata-monitor/monitor.go index caacfb222..4a1a2d2f5 100644 --- a/src/runtime/pkg/kata-monitor/monitor.go +++ b/src/runtime/pkg/kata-monitor/monitor.go @@ -104,6 +104,10 @@ func (km *KataMonitor) startPodCacheUpdater() { monitorLog.WithError(err).Fatal("cannot read sandboxes fs") os.Exit(1) } + for _, sandbox := range sandboxList { + km.sandboxCache.putIfNotExists(sandbox, sandboxKubeData{}) + } + monitorLog.Debug("initial sync of sbs directory completed") monitorLog.Tracef("pod list from sbs: %v", sandboxList) From d49d0b6f39d26622b34372ba91ba36f52e94ccf6 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Mon, 15 Nov 2021 07:50:03 +0100 Subject: [PATCH 22/71] virtcontainers: cgroups: Define a cgroup interface And move the current, Linux-specific implementation into cgroups_linux.go Signed-off-by: Samuel Ortiz --- .../virtcontainers/pkg/cgroups/cgroups.go | 313 ++---------------- .../pkg/cgroups/cgroups_linux.go | 310 +++++++++++++++++ .../pkg/cgroups/{utils.go => utils_linux.go} | 20 -- .../{utils_test.go => utils_linux_test.go} | 0 src/runtime/virtcontainers/sandbox.go | 26 +- 5 files changed, 351 insertions(+), 318 deletions(-) create mode 100644 src/runtime/virtcontainers/pkg/cgroups/cgroups_linux.go rename src/runtime/virtcontainers/pkg/cgroups/{utils.go => utils_linux.go} (88%) rename src/runtime/virtcontainers/pkg/cgroups/{utils_test.go => utils_linux_test.go} (100%) diff --git a/src/runtime/virtcontainers/pkg/cgroups/cgroups.go b/src/runtime/virtcontainers/pkg/cgroups/cgroups.go index ea912db3d..bc12cb2b2 100644 --- a/src/runtime/virtcontainers/pkg/cgroups/cgroups.go +++ b/src/runtime/virtcontainers/pkg/cgroups/cgroups.go @@ -6,24 +6,22 @@ package cgroups import ( - "os" + "fmt" "path/filepath" - "sync" - "github.com/containerd/cgroups" v1 "github.com/containerd/cgroups/stats/v1" "github.com/opencontainers/runtime-spec/specs-go" "github.com/sirupsen/logrus" ) -type Cgroup struct { - cgroup cgroups.Cgroup - path string - cpusets *specs.LinuxCPU - devices []specs.LinuxDeviceCgroup +// prepend a kata specific string to oci cgroup path to +// form a different cgroup path, thus cAdvisor couldn't +// find kata containers cgroup path on host to prevent it +// from grabbing the stats data. +const CgroupKataPrefix = "kata" - sync.Mutex -} +// DefaultCgroupPath runtime-determined location in the cgroups hierarchy. +const DefaultCgroupPath = "/vc" var ( cgroupsLogger = logrus.WithField("source", "virtcontainers/pkg/cgroups") @@ -36,286 +34,27 @@ func SetLogger(logger *logrus.Entry) { cgroupsLogger = logger.WithFields(fields) } -func sandboxDevices() []specs.LinuxDeviceCgroup { - devices := []specs.LinuxDeviceCgroup{} - - defaultDevices := []string{ - "/dev/null", - "/dev/random", - "/dev/full", - "/dev/tty", - "/dev/zero", - "/dev/urandom", - "/dev/console", +func RenameCgroupPath(path string) (string, error) { + if path == "" { + path = DefaultCgroupPath } - // Processes running in a device-cgroup are constrained, they have acccess - // only to the devices listed in the devices.list file. - // In order to run Virtual Machines and create virtqueues, hypervisors - // need access to certain character devices in the host, like kvm and vhost-net. - hypervisorDevices := []string{ - "/dev/kvm", // To run virtual machines - "/dev/vhost-net", // To create virtqueues - "/dev/vfio/vfio", // To access VFIO devices - "/dev/vhost-vsock", // To interact with vsock if - } + cgroupPathDir := filepath.Dir(path) + cgroupPathName := fmt.Sprintf("%s_%s", CgroupKataPrefix, filepath.Base(path)) + return filepath.Join(cgroupPathDir, cgroupPathName), nil - defaultDevices = append(defaultDevices, hypervisorDevices...) - - for _, device := range defaultDevices { - ldevice, err := DeviceToLinuxDevice(device) - if err != nil { - cgroupsLogger.WithField("source", "cgroups").Warnf("Could not add %s to the devices cgroup", device) - continue - } - devices = append(devices, ldevice) - } - - wildcardMajor := int64(-1) - wildcardMinor := int64(-1) - ptsMajor := int64(136) - tunMajor := int64(10) - tunMinor := int64(200) - - wildcardDevices := []specs.LinuxDeviceCgroup{ - // allow mknod for any device - { - Allow: true, - Type: "c", - Major: &wildcardMajor, - Minor: &wildcardMinor, - Access: "m", - }, - { - Allow: true, - Type: "b", - Major: &wildcardMajor, - Minor: &wildcardMinor, - Access: "m", - }, - // /dev/pts/ - pts namespaces are "coming soon" - { - Allow: true, - Type: "c", - Major: &ptsMajor, - Minor: &wildcardMinor, - Access: "rwm", - }, - // tuntap - { - Allow: true, - Type: "c", - Major: &tunMajor, - Minor: &tunMinor, - Access: "rwm", - }, - } - - devices = append(devices, wildcardDevices...) - - return devices } -func NewCgroup(path string, resources *specs.LinuxResources) (*Cgroup, error) { - var err error - - cgroupPath, err := ValidCgroupPath(path, IsSystemdCgroup(path)) - if err != nil { - return nil, err - } - - cgroup, err := cgroups.New(cgroups.V1, cgroups.StaticPath(cgroupPath), resources) - if err != nil { - return nil, err - } - - return &Cgroup{ - path: cgroupPath, - devices: resources.Devices, - cpusets: resources.CPU, - cgroup: cgroup, - }, nil -} - -func NewSandboxCgroup(path string, resources *specs.LinuxResources, sandboxCgroupOnly bool) (*Cgroup, error) { - var cgroup cgroups.Cgroup - sandboxResources := *resources - sandboxResources.Devices = append(sandboxResources.Devices, sandboxDevices()...) - - // Currently we know to handle systemd cgroup path only when it's the only cgroup (no overhead group), hence, - // if sandboxCgroupOnly is not true we treat it as cgroupfs path as it used to be, although it may be incorrect - if !IsSystemdCgroup(path) || !sandboxCgroupOnly { - return NewCgroup(path, &sandboxResources) - } - - slice, unit, err := getSliceAndUnit(path) - if err != nil { - return nil, err - } - // github.com/containerd/cgroups doesn't support creating a scope unit with - // v1 cgroups against systemd, the following interacts directly with systemd - // to create the cgroup and then load it using containerd's api - err = createCgroupsSystemd(slice, unit, uint32(os.Getpid())) // adding runtime process, it makes calling setupCgroups redundant - if err != nil { - return nil, err - } - - cgHierarchy, cgPath, err := cgroupHierarchy(path) - if err != nil { - return nil, err - } - - // load created cgroup and update with resources - if cgroup, err = cgroups.Load(cgHierarchy, cgPath); err == nil { - err = cgroup.Update(&sandboxResources) - } - - if err != nil { - return nil, err - } - - return &Cgroup{ - path: path, - devices: sandboxResources.Devices, - cpusets: sandboxResources.CPU, - cgroup: cgroup, - }, nil -} - -func Load(path string) (*Cgroup, error) { - cgHierarchy, cgPath, err := cgroupHierarchy(path) - if err != nil { - return nil, err - } - - cgroup, err := cgroups.Load(cgHierarchy, cgPath) - if err != nil { - return nil, err - } - - return &Cgroup{ - path: path, - cgroup: cgroup, - }, nil -} - -func (c *Cgroup) Logger() *logrus.Entry { - return cgroupsLogger.WithField("source", "cgroups") -} - -func (c *Cgroup) Delete() error { - return c.cgroup.Delete() -} - -func (c *Cgroup) Stat() (*v1.Metrics, error) { - return c.cgroup.Stat(cgroups.ErrorHandler(cgroups.IgnoreNotExist)) -} - -func (c *Cgroup) AddProcess(pid int, subsystems ...string) error { - return c.cgroup.Add(cgroups.Process{Pid: pid}) -} - -func (c *Cgroup) AddTask(pid int, subsystems ...string) error { - return c.cgroup.AddTask(cgroups.Process{Pid: pid}) -} - -func (c *Cgroup) Update(resources *specs.LinuxResources) error { - return c.cgroup.Update(resources) -} - -func (c *Cgroup) MoveTo(path string) error { - cgHierarchy, cgPath, err := cgroupHierarchy(path) - if err != nil { - return err - } - - newCgroup, err := cgroups.Load(cgHierarchy, cgPath) - if err != nil { - return err - } - - return c.cgroup.MoveTo(newCgroup) -} - -func (c *Cgroup) MoveToParent() error { - parentPath := filepath.Dir(c.path) - - return c.MoveTo(parentPath) -} - -func (c *Cgroup) AddDevice(deviceHostPath string) error { - deviceResource, err := DeviceToLinuxDevice(deviceHostPath) - if err != nil { - return err - } - - c.Lock() - defer c.Unlock() - - c.devices = append(c.devices, deviceResource) - - if err := c.cgroup.Update(&specs.LinuxResources{ - Devices: c.devices, - }); err != nil { - return err - } - - return nil -} - -func (c *Cgroup) RemoveDevice(deviceHostPath string) error { - deviceResource, err := DeviceToLinuxDevice(deviceHostPath) - if err != nil { - return err - } - - c.Lock() - defer c.Unlock() - - for i, d := range c.devices { - if d.Type == deviceResource.Type && - d.Major == deviceResource.Major && - d.Minor == deviceResource.Minor { - c.devices = append(c.devices[:i], c.devices[i+1:]...) - } - } - - if err := c.cgroup.Update(&specs.LinuxResources{ - Devices: c.devices, - }); err != nil { - return err - } - - return nil -} - -func (c *Cgroup) UpdateCpuSet(cpuset, memset string) error { - c.Lock() - defer c.Unlock() - - if len(cpuset) > 0 { - // If we didn't have a cpuset defined, let's create: - if c.cpusets == nil { - c.cpusets = &specs.LinuxCPU{} - } - - c.cpusets.Cpus = cpuset - } - - if len(memset) > 0 { - // If we didn't have a cpuset defined, let's now create: - if c.cpusets == nil { - c.cpusets = &specs.LinuxCPU{} - } - - c.cpusets.Mems = memset - } - - return c.cgroup.Update(&specs.LinuxResources{ - CPU: c.cpusets, - }) -} - -func (c *Cgroup) Path() string { - return c.path +type Cgroup interface { + Delete() error + Stat() (*v1.Metrics, error) + AddProcess(int, ...string) error + AddTask(int, ...string) error + Update(*specs.LinuxResources) error + MoveTo(string) error + MoveToParent() error + AddDevice(string) error + RemoveDevice(string) error + UpdateCpuSet(string, string) error + Path() string } diff --git a/src/runtime/virtcontainers/pkg/cgroups/cgroups_linux.go b/src/runtime/virtcontainers/pkg/cgroups/cgroups_linux.go new file mode 100644 index 000000000..667539683 --- /dev/null +++ b/src/runtime/virtcontainers/pkg/cgroups/cgroups_linux.go @@ -0,0 +1,310 @@ +// Copyright (c) 2021-2022 Apple Inc. +// +// SPDX-License-Identifier: Apache-2.0 +// + +package cgroups + +import ( + "os" + "path/filepath" + "sync" + + "github.com/containerd/cgroups" + v1 "github.com/containerd/cgroups/stats/v1" + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/sirupsen/logrus" +) + +type LinuxCgroup struct { + cgroup cgroups.Cgroup + path string + cpusets *specs.LinuxCPU + devices []specs.LinuxDeviceCgroup + + sync.Mutex +} + +func sandboxDevices() []specs.LinuxDeviceCgroup { + devices := []specs.LinuxDeviceCgroup{} + + defaultDevices := []string{ + "/dev/null", + "/dev/random", + "/dev/full", + "/dev/tty", + "/dev/zero", + "/dev/urandom", + "/dev/console", + } + + // Processes running in a device-cgroup are constrained, they have acccess + // only to the devices listed in the devices.list file. + // In order to run Virtual Machines and create virtqueues, hypervisors + // need access to certain character devices in the host, like kvm and vhost-net. + hypervisorDevices := []string{ + "/dev/kvm", // To run virtual machines + "/dev/vhost-net", // To create virtqueues + "/dev/vfio/vfio", // To access VFIO devices + "/dev/vhost-vsock", // To interact with vsock if + } + + defaultDevices = append(defaultDevices, hypervisorDevices...) + + for _, device := range defaultDevices { + ldevice, err := DeviceToLinuxDevice(device) + if err != nil { + cgroupsLogger.WithField("source", "cgroups").Warnf("Could not add %s to the devices cgroup", device) + continue + } + devices = append(devices, ldevice) + } + + wildcardMajor := int64(-1) + wildcardMinor := int64(-1) + ptsMajor := int64(136) + tunMajor := int64(10) + tunMinor := int64(200) + + wildcardDevices := []specs.LinuxDeviceCgroup{ + // allow mknod for any device + { + Allow: true, + Type: "c", + Major: &wildcardMajor, + Minor: &wildcardMinor, + Access: "m", + }, + { + Allow: true, + Type: "b", + Major: &wildcardMajor, + Minor: &wildcardMinor, + Access: "m", + }, + // /dev/pts/ - pts namespaces are "coming soon" + { + Allow: true, + Type: "c", + Major: &ptsMajor, + Minor: &wildcardMinor, + Access: "rwm", + }, + // tuntap + { + Allow: true, + Type: "c", + Major: &tunMajor, + Minor: &tunMinor, + Access: "rwm", + }, + } + + devices = append(devices, wildcardDevices...) + + return devices +} + +func NewCgroup(path string, resources *specs.LinuxResources) (Cgroup, error) { + var err error + + cgroupPath, err := ValidCgroupPath(path, IsSystemdCgroup(path)) + if err != nil { + return nil, err + } + + cgroup, err := cgroups.New(cgroups.V1, cgroups.StaticPath(cgroupPath), resources) + if err != nil { + return nil, err + } + + return &LinuxCgroup{ + path: cgroupPath, + devices: resources.Devices, + cpusets: resources.CPU, + cgroup: cgroup, + }, nil +} + +func NewSandboxCgroup(path string, resources *specs.LinuxResources, sandboxCgroupOnly bool) (Cgroup, error) { + var cgroup cgroups.Cgroup + sandboxResources := *resources + sandboxResources.Devices = append(sandboxResources.Devices, sandboxDevices()...) + + // Currently we know to handle systemd cgroup path only when it's the only cgroup (no overhead group), hence, + // if sandboxCgroupOnly is not true we treat it as cgroupfs path as it used to be, although it may be incorrect + if !IsSystemdCgroup(path) || !sandboxCgroupOnly { + return NewCgroup(path, &sandboxResources) + } + + slice, unit, err := getSliceAndUnit(path) + if err != nil { + return nil, err + } + // github.com/containerd/cgroups doesn't support creating a scope unit with + // v1 cgroups against systemd, the following interacts directly with systemd + // to create the cgroup and then load it using containerd's api + err = createCgroupsSystemd(slice, unit, uint32(os.Getpid())) // adding runtime process, it makes calling setupCgroups redundant + if err != nil { + return nil, err + } + + cgHierarchy, cgPath, err := cgroupHierarchy(path) + if err != nil { + return nil, err + } + + // load created cgroup and update with resources + if cgroup, err = cgroups.Load(cgHierarchy, cgPath); err == nil { + err = cgroup.Update(&sandboxResources) + } + + if err != nil { + return nil, err + } + + return &LinuxCgroup{ + path: path, + devices: sandboxResources.Devices, + cpusets: sandboxResources.CPU, + cgroup: cgroup, + }, nil +} + +func LoadCgroup(path string) (Cgroup, error) { + cgHierarchy, cgPath, err := cgroupHierarchy(path) + if err != nil { + return nil, err + } + + cgroup, err := cgroups.Load(cgHierarchy, cgPath) + if err != nil { + return nil, err + } + + return &LinuxCgroup{ + path: path, + cgroup: cgroup, + }, nil +} + +func (c *LinuxCgroup) Logger() *logrus.Entry { + return cgroupsLogger.WithField("source", "cgroups") +} + +func (c *LinuxCgroup) Delete() error { + return c.cgroup.Delete() +} + +func (c *LinuxCgroup) Stat() (*v1.Metrics, error) { + return c.cgroup.Stat(cgroups.ErrorHandler(cgroups.IgnoreNotExist)) +} + +func (c *LinuxCgroup) AddProcess(pid int, subsystems ...string) error { + return c.cgroup.Add(cgroups.Process{Pid: pid}) +} + +func (c *LinuxCgroup) AddTask(pid int, subsystems ...string) error { + return c.cgroup.AddTask(cgroups.Process{Pid: pid}) +} + +func (c *LinuxCgroup) Update(resources *specs.LinuxResources) error { + return c.cgroup.Update(resources) +} + +func (c *LinuxCgroup) MoveTo(path string) error { + cgHierarchy, cgPath, err := cgroupHierarchy(path) + if err != nil { + return err + } + + newCgroup, err := cgroups.Load(cgHierarchy, cgPath) + if err != nil { + return err + } + + return c.cgroup.MoveTo(newCgroup) +} + +func (c *LinuxCgroup) MoveToParent() error { + parentPath := filepath.Dir(c.path) + + return c.MoveTo(parentPath) +} + +func (c *LinuxCgroup) AddDevice(deviceHostPath string) error { + deviceResource, err := DeviceToLinuxDevice(deviceHostPath) + if err != nil { + return err + } + + c.Lock() + defer c.Unlock() + + c.devices = append(c.devices, deviceResource) + + if err := c.cgroup.Update(&specs.LinuxResources{ + Devices: c.devices, + }); err != nil { + return err + } + + return nil +} + +func (c *LinuxCgroup) RemoveDevice(deviceHostPath string) error { + deviceResource, err := DeviceToLinuxDevice(deviceHostPath) + if err != nil { + return err + } + + c.Lock() + defer c.Unlock() + + for i, d := range c.devices { + if d.Type == deviceResource.Type && + d.Major == deviceResource.Major && + d.Minor == deviceResource.Minor { + c.devices = append(c.devices[:i], c.devices[i+1:]...) + } + } + + if err := c.cgroup.Update(&specs.LinuxResources{ + Devices: c.devices, + }); err != nil { + return err + } + + return nil +} + +func (c *LinuxCgroup) UpdateCpuSet(cpuset, memset string) error { + c.Lock() + defer c.Unlock() + + if len(cpuset) > 0 { + // If we didn't have a cpuset defined, let's create: + if c.cpusets == nil { + c.cpusets = &specs.LinuxCPU{} + } + + c.cpusets.Cpus = cpuset + } + + if len(memset) > 0 { + // If we didn't have a cpuset defined, let's now create: + if c.cpusets == nil { + c.cpusets = &specs.LinuxCPU{} + } + + c.cpusets.Mems = memset + } + + return c.cgroup.Update(&specs.LinuxResources{ + CPU: c.cpusets, + }) +} + +func (c *LinuxCgroup) Path() string { + return c.path +} diff --git a/src/runtime/virtcontainers/pkg/cgroups/utils.go b/src/runtime/virtcontainers/pkg/cgroups/utils_linux.go similarity index 88% rename from src/runtime/virtcontainers/pkg/cgroups/utils.go rename to src/runtime/virtcontainers/pkg/cgroups/utils_linux.go index fd70b880e..afc095bba 100644 --- a/src/runtime/virtcontainers/pkg/cgroups/utils.go +++ b/src/runtime/virtcontainers/pkg/cgroups/utils_linux.go @@ -20,26 +20,6 @@ import ( "golang.org/x/sys/unix" ) -// prepend a kata specific string to oci cgroup path to -// form a different cgroup path, thus cAdvisor couldn't -// find kata containers cgroup path on host to prevent it -// from grabbing the stats data. -const CgroupKataPrefix = "kata" - -// DefaultCgroupPath runtime-determined location in the cgroups hierarchy. -const DefaultCgroupPath = "/vc" - -func RenameCgroupPath(path string) (string, error) { - if path == "" { - path = DefaultCgroupPath - } - - cgroupPathDir := filepath.Dir(path) - cgroupPathName := fmt.Sprintf("%s_%s", CgroupKataPrefix, filepath.Base(path)) - return filepath.Join(cgroupPathDir, cgroupPathName), nil - -} - // validCgroupPath returns a valid cgroup path. // see https://github.com/opencontainers/runtime-spec/blob/master/config-linux.md#cgroups-path func ValidCgroupPath(path string, systemdCgroup bool) (string, error) { diff --git a/src/runtime/virtcontainers/pkg/cgroups/utils_test.go b/src/runtime/virtcontainers/pkg/cgroups/utils_linux_test.go similarity index 100% rename from src/runtime/virtcontainers/pkg/cgroups/utils_test.go rename to src/runtime/virtcontainers/pkg/cgroups/utils_linux_test.go diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index 623ccb8dc..37a36346d 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -199,8 +199,8 @@ type Sandbox struct { config *SandboxConfig annotationsLock *sync.RWMutex wg *sync.WaitGroup - sandboxCgroup *cgroups.Cgroup - overheadCgroup *cgroups.Cgroup + sandboxCgroup cgroups.Cgroup + overheadCgroup cgroups.Cgroup cw *consoleWatcher containers map[string]*Container @@ -1752,9 +1752,11 @@ func (s *Sandbox) HotplugAddDevice(ctx context.Context, device api.Device, devTy span, ctx := katatrace.Trace(ctx, s.Logger(), "HotplugAddDevice", sandboxTracingTags, map[string]string{"sandbox_id": s.id}) defer span.End() - if err := s.sandboxCgroup.AddDevice(device.GetHostPath()); err != nil { - s.Logger().WithError(err).WithField("device", device). - Warn("Could not add device to cgroup") + if s.sandboxCgroup != nil { + if err := s.sandboxCgroup.AddDevice(device.GetHostPath()); err != nil { + s.Logger().WithError(err).WithField("device", device). + Warn("Could not add device to cgroup") + } } switch devType { @@ -1801,10 +1803,12 @@ func (s *Sandbox) HotplugAddDevice(ctx context.Context, device api.Device, devTy // HotplugRemoveDevice is used for removing a device from sandbox // Sandbox implement DeviceReceiver interface from device/api/interface.go func (s *Sandbox) HotplugRemoveDevice(ctx context.Context, device api.Device, devType config.DeviceType) error { - defer func() { - if err := s.sandboxCgroup.RemoveDevice(device.GetHostPath()); err != nil { - s.Logger().WithError(err).WithField("device", device). - Warn("Could not add device to cgroup") + defer func() { + if s.sandboxCgroup != nil { + if err := s.sandboxCgroup.RemoveDevice(device.GetHostPath()); err != nil { + s.Logger().WithError(err).WithField("device", device). + Warn("Could not add device to cgroup") + } } }() @@ -2127,7 +2131,7 @@ func (s *Sandbox) cgroupsDelete() error { return nil } - sandboxCgroup, err := cgroups.Load(s.state.SandboxCgroupPath) + sandboxCgroup, err := cgroups.LoadCgroup(s.state.SandboxCgroupPath) if err != nil { return err } @@ -2141,7 +2145,7 @@ func (s *Sandbox) cgroupsDelete() error { } if s.state.OverheadCgroupPath != "" { - overheadCgroup, err := cgroups.Load(s.state.OverheadCgroupPath) + overheadCgroup, err := cgroups.LoadCgroup(s.state.OverheadCgroupPath) if err != nil { return err } From ad10e201e17e6e551df0ad9ee837ee8e56311c55 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Wed, 24 Nov 2021 17:13:31 +0100 Subject: [PATCH 23/71] virtcontainers: cgroups: Move non Linux routine to utils.go Have an OS agnostic file for sharing routines. Signed-off-by: Samuel Ortiz --- .../virtcontainers/pkg/cgroups/utils.go | 59 +++++++++++++++++++ .../virtcontainers/pkg/cgroups/utils_linux.go | 48 --------------- 2 files changed, 59 insertions(+), 48 deletions(-) create mode 100644 src/runtime/virtcontainers/pkg/cgroups/utils.go diff --git a/src/runtime/virtcontainers/pkg/cgroups/utils.go b/src/runtime/virtcontainers/pkg/cgroups/utils.go new file mode 100644 index 000000000..383bfb11d --- /dev/null +++ b/src/runtime/virtcontainers/pkg/cgroups/utils.go @@ -0,0 +1,59 @@ +// Copyright (c) 2020 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package cgroups + +import ( + "fmt" + + "github.com/opencontainers/runc/libcontainer/devices" + "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" +) + +func DeviceToCgroupDeviceRule(device string) (*devices.Rule, error) { + var st unix.Stat_t + deviceRule := devices.Rule{ + Allow: true, + Permissions: "rwm", + } + + if err := unix.Stat(device, &st); err != nil { + return nil, err + } + + devType := st.Mode & unix.S_IFMT + + switch devType { + case unix.S_IFCHR: + deviceRule.Type = 'c' + case unix.S_IFBLK: + deviceRule.Type = 'b' + default: + return nil, fmt.Errorf("unsupported device type: %v", devType) + } + + major := int64(unix.Major(st.Rdev)) + minor := int64(unix.Minor(st.Rdev)) + deviceRule.Major = major + deviceRule.Minor = minor + + return &deviceRule, nil +} + +func DeviceToLinuxDevice(device string) (specs.LinuxDeviceCgroup, error) { + dev, err := DeviceToCgroupDeviceRule(device) + if err != nil { + return specs.LinuxDeviceCgroup{}, err + } + + return specs.LinuxDeviceCgroup{ + Allow: dev.Allow, + Type: string(dev.Type), + Major: &dev.Major, + Minor: &dev.Minor, + Access: string(dev.Permissions), + }, nil +} diff --git a/src/runtime/virtcontainers/pkg/cgroups/utils_linux.go b/src/runtime/virtcontainers/pkg/cgroups/utils_linux.go index afc095bba..6bb7bdec4 100644 --- a/src/runtime/virtcontainers/pkg/cgroups/utils_linux.go +++ b/src/runtime/virtcontainers/pkg/cgroups/utils_linux.go @@ -15,9 +15,6 @@ import ( systemdDbus "github.com/coreos/go-systemd/v22/dbus" "github.com/godbus/dbus/v5" "github.com/opencontainers/runc/libcontainer/cgroups/systemd" - "github.com/opencontainers/runc/libcontainer/devices" - "github.com/opencontainers/runtime-spec/specs-go" - "golang.org/x/sys/unix" ) // validCgroupPath returns a valid cgroup path. @@ -129,48 +126,3 @@ func getSliceAndUnit(cgroupPath string) (string, string, error) { return "", "", fmt.Errorf("Path: %s is not valid systemd's cgroups path", cgroupPath) } - -func DeviceToCgroupDeviceRule(device string) (*devices.Rule, error) { - var st unix.Stat_t - deviceRule := devices.Rule{ - Allow: true, - Permissions: "rwm", - } - - if err := unix.Stat(device, &st); err != nil { - return nil, err - } - - devType := st.Mode & unix.S_IFMT - - switch devType { - case unix.S_IFCHR: - deviceRule.Type = 'c' - case unix.S_IFBLK: - deviceRule.Type = 'b' - default: - return nil, fmt.Errorf("unsupported device type: %v", devType) - } - - major := int64(unix.Major(st.Rdev)) - minor := int64(unix.Minor(st.Rdev)) - deviceRule.Major = major - deviceRule.Minor = minor - - return &deviceRule, nil -} - -func DeviceToLinuxDevice(device string) (specs.LinuxDeviceCgroup, error) { - dev, err := DeviceToCgroupDeviceRule(device) - if err != nil { - return specs.LinuxDeviceCgroup{}, err - } - - return specs.LinuxDeviceCgroup{ - Allow: dev.Allow, - Type: string(dev.Type), - Major: &dev.Major, - Minor: &dev.Minor, - Access: string(dev.Permissions), - }, nil -} From 0d1a7da6823aba57fbf68d9adfeb61db3932ca11 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Wed, 2 Feb 2022 15:49:06 +0000 Subject: [PATCH 24/71] virtcontainers: Rename and clean the cgroup interface We call it a ResourceController, and we make it not so Linux specific. Now the Linux implementations is the cgroups one. Signed-off-by: Samuel Ortiz --- .../virtcontainers/pkg/cgroups/cgroups.go | 323 ++++++++++++++++-- .../pkg/cgroups/cgroups_linux.go | 310 ----------------- .../virtcontainers/pkg/cgroups/controller.go | 82 +++++ src/runtime/virtcontainers/sandbox.go | 24 +- 4 files changed, 394 insertions(+), 345 deletions(-) delete mode 100644 src/runtime/virtcontainers/pkg/cgroups/cgroups_linux.go create mode 100644 src/runtime/virtcontainers/pkg/cgroups/controller.go diff --git a/src/runtime/virtcontainers/pkg/cgroups/cgroups.go b/src/runtime/virtcontainers/pkg/cgroups/cgroups.go index bc12cb2b2..19aeb8ced 100644 --- a/src/runtime/virtcontainers/pkg/cgroups/cgroups.go +++ b/src/runtime/virtcontainers/pkg/cgroups/cgroups.go @@ -1,4 +1,6 @@ -// Copyright (c) 2021 Apple Inc. +// +build linux +// +// Copyright (c) 2021-2022 Apple Inc. // // SPDX-License-Identifier: Apache-2.0 // @@ -7,8 +9,11 @@ package cgroups import ( "fmt" + "os" "path/filepath" + "sync" + "github.com/containerd/cgroups" v1 "github.com/containerd/cgroups/stats/v1" "github.com/opencontainers/runtime-spec/specs-go" "github.com/sirupsen/logrus" @@ -23,17 +28,6 @@ const CgroupKataPrefix = "kata" // DefaultCgroupPath runtime-determined location in the cgroups hierarchy. const DefaultCgroupPath = "/vc" -var ( - cgroupsLogger = logrus.WithField("source", "virtcontainers/pkg/cgroups") -) - -// SetLogger sets up a logger for this pkg -func SetLogger(logger *logrus.Entry) { - fields := cgroupsLogger.Data - - cgroupsLogger = logger.WithFields(fields) -} - func RenameCgroupPath(path string) (string, error) { if path == "" { path = DefaultCgroupPath @@ -45,16 +39,297 @@ func RenameCgroupPath(path string) (string, error) { } -type Cgroup interface { - Delete() error - Stat() (*v1.Metrics, error) - AddProcess(int, ...string) error - AddTask(int, ...string) error - Update(*specs.LinuxResources) error - MoveTo(string) error - MoveToParent() error - AddDevice(string) error - RemoveDevice(string) error - UpdateCpuSet(string, string) error - Path() string +type LinuxCgroup struct { + cgroup cgroups.Cgroup + path string + cpusets *specs.LinuxCPU + devices []specs.LinuxDeviceCgroup + + sync.Mutex +} + +func sandboxDevices() []specs.LinuxDeviceCgroup { + devices := []specs.LinuxDeviceCgroup{} + + defaultDevices := []string{ + "/dev/null", + "/dev/random", + "/dev/full", + "/dev/tty", + "/dev/zero", + "/dev/urandom", + "/dev/console", + } + + // Processes running in a device-cgroup are constrained, they have acccess + // only to the devices listed in the devices.list file. + // In order to run Virtual Machines and create virtqueues, hypervisors + // need access to certain character devices in the host, like kvm and vhost-net. + hypervisorDevices := []string{ + "/dev/kvm", // To run virtual machines + "/dev/vhost-net", // To create virtqueues + "/dev/vfio/vfio", // To access VFIO devices + "/dev/vhost-vsock", // To interact with vsock if + } + + defaultDevices = append(defaultDevices, hypervisorDevices...) + + for _, device := range defaultDevices { + ldevice, err := DeviceToLinuxDevice(device) + if err != nil { + controllerLogger.WithField("source", "cgroups").Warnf("Could not add %s to the devices cgroup", device) + continue + } + devices = append(devices, ldevice) + } + + wildcardMajor := int64(-1) + wildcardMinor := int64(-1) + ptsMajor := int64(136) + tunMajor := int64(10) + tunMinor := int64(200) + + wildcardDevices := []specs.LinuxDeviceCgroup{ + // allow mknod for any device + { + Allow: true, + Type: "c", + Major: &wildcardMajor, + Minor: &wildcardMinor, + Access: "m", + }, + { + Allow: true, + Type: "b", + Major: &wildcardMajor, + Minor: &wildcardMinor, + Access: "m", + }, + // /dev/pts/ - pts namespaces are "coming soon" + { + Allow: true, + Type: "c", + Major: &ptsMajor, + Minor: &wildcardMinor, + Access: "rwm", + }, + // tuntap + { + Allow: true, + Type: "c", + Major: &tunMajor, + Minor: &tunMinor, + Access: "rwm", + }, + } + + devices = append(devices, wildcardDevices...) + + return devices +} + +func NewResourceController(path string, resources *specs.LinuxResources) (ResourceController, error) { + var err error + + cgroupPath, err := ValidCgroupPath(path, IsSystemdCgroup(path)) + if err != nil { + return nil, err + } + + cgroup, err := cgroups.New(cgroups.V1, cgroups.StaticPath(cgroupPath), resources) + if err != nil { + return nil, err + } + + return &LinuxCgroup{ + path: cgroupPath, + devices: resources.Devices, + cpusets: resources.CPU, + cgroup: cgroup, + }, nil +} + +func NewSandboxResourceController(path string, resources *specs.LinuxResources, sandboxCgroupOnly bool) (ResourceController, error) { + var cgroup cgroups.Cgroup + sandboxResources := *resources + sandboxResources.Devices = append(sandboxResources.Devices, sandboxDevices()...) + + // Currently we know to handle systemd cgroup path only when it's the only cgroup (no overhead group), hence, + // if sandboxCgroupOnly is not true we treat it as cgroupfs path as it used to be, although it may be incorrect + if !IsSystemdCgroup(path) || !sandboxCgroupOnly { + return NewResourceController(path, &sandboxResources) + } + + slice, unit, err := getSliceAndUnit(path) + if err != nil { + return nil, err + } + // github.com/containerd/cgroups doesn't support creating a scope unit with + // v1 cgroups against systemd, the following interacts directly with systemd + // to create the cgroup and then load it using containerd's api + err = createCgroupsSystemd(slice, unit, uint32(os.Getpid())) // adding runtime process, it makes calling setupCgroups redundant + if err != nil { + return nil, err + } + + cgHierarchy, cgPath, err := cgroupHierarchy(path) + if err != nil { + return nil, err + } + + // load created cgroup and update with resources + if cgroup, err = cgroups.Load(cgHierarchy, cgPath); err == nil { + err = cgroup.Update(&sandboxResources) + } + + if err != nil { + return nil, err + } + + return &LinuxCgroup{ + path: path, + devices: sandboxResources.Devices, + cpusets: sandboxResources.CPU, + cgroup: cgroup, + }, nil +} + +func LoadResourceController(path string) (ResourceController, error) { + cgHierarchy, cgPath, err := cgroupHierarchy(path) + if err != nil { + return nil, err + } + + cgroup, err := cgroups.Load(cgHierarchy, cgPath) + if err != nil { + return nil, err + } + + return &LinuxCgroup{ + path: path, + cgroup: cgroup, + }, nil +} + +func (c *LinuxCgroup) Logger() *logrus.Entry { + return controllerLogger.WithField("source", "cgroups") +} + +func (c *LinuxCgroup) Delete() error { + return c.cgroup.Delete() +} + +func (c *LinuxCgroup) Stat() (*v1.Metrics, error) { + return c.cgroup.Stat(cgroups.ErrorHandler(cgroups.IgnoreNotExist)) +} + +func (c *LinuxCgroup) AddProcess(pid int, subsystems ...string) error { + return c.cgroup.Add(cgroups.Process{Pid: pid}) +} + +func (c *LinuxCgroup) AddThread(pid int, subsystems ...string) error { + return c.cgroup.AddTask(cgroups.Process{Pid: pid}) +} + +func (c *LinuxCgroup) Update(resources *specs.LinuxResources) error { + return c.cgroup.Update(resources) +} + +func (c *LinuxCgroup) MoveTo(path string) error { + cgHierarchy, cgPath, err := cgroupHierarchy(path) + if err != nil { + return err + } + + newCgroup, err := cgroups.Load(cgHierarchy, cgPath) + if err != nil { + return err + } + + return c.cgroup.MoveTo(newCgroup) +} + +func (c *LinuxCgroup) AddDevice(deviceHostPath string) error { + deviceResource, err := DeviceToLinuxDevice(deviceHostPath) + if err != nil { + return err + } + + c.Lock() + defer c.Unlock() + + c.devices = append(c.devices, deviceResource) + + if err := c.cgroup.Update(&specs.LinuxResources{ + Devices: c.devices, + }); err != nil { + return err + } + + return nil +} + +func (c *LinuxCgroup) RemoveDevice(deviceHostPath string) error { + deviceResource, err := DeviceToLinuxDevice(deviceHostPath) + if err != nil { + return err + } + + c.Lock() + defer c.Unlock() + + for i, d := range c.devices { + if d.Type == deviceResource.Type && + d.Major == deviceResource.Major && + d.Minor == deviceResource.Minor { + c.devices = append(c.devices[:i], c.devices[i+1:]...) + } + } + + if err := c.cgroup.Update(&specs.LinuxResources{ + Devices: c.devices, + }); err != nil { + return err + } + + return nil +} + +func (c *LinuxCgroup) UpdateCpuSet(cpuset, memset string) error { + c.Lock() + defer c.Unlock() + + if len(cpuset) > 0 { + // If we didn't have a cpuset defined, let's create: + if c.cpusets == nil { + c.cpusets = &specs.LinuxCPU{} + } + + c.cpusets.Cpus = cpuset + } + + if len(memset) > 0 { + // If we didn't have a cpuset defined, let's now create: + if c.cpusets == nil { + c.cpusets = &specs.LinuxCPU{} + } + + c.cpusets.Mems = memset + } + + return c.cgroup.Update(&specs.LinuxResources{ + CPU: c.cpusets, + }) +} + +func (c *LinuxCgroup) Type() ResourceControllerType { + return LinuxCgroups +} + +func (c *LinuxCgroup) ID() string { + return c.path +} + +func (c *LinuxCgroup) Parent() string { + return filepath.Dir(c.path) } diff --git a/src/runtime/virtcontainers/pkg/cgroups/cgroups_linux.go b/src/runtime/virtcontainers/pkg/cgroups/cgroups_linux.go deleted file mode 100644 index 667539683..000000000 --- a/src/runtime/virtcontainers/pkg/cgroups/cgroups_linux.go +++ /dev/null @@ -1,310 +0,0 @@ -// Copyright (c) 2021-2022 Apple Inc. -// -// SPDX-License-Identifier: Apache-2.0 -// - -package cgroups - -import ( - "os" - "path/filepath" - "sync" - - "github.com/containerd/cgroups" - v1 "github.com/containerd/cgroups/stats/v1" - "github.com/opencontainers/runtime-spec/specs-go" - "github.com/sirupsen/logrus" -) - -type LinuxCgroup struct { - cgroup cgroups.Cgroup - path string - cpusets *specs.LinuxCPU - devices []specs.LinuxDeviceCgroup - - sync.Mutex -} - -func sandboxDevices() []specs.LinuxDeviceCgroup { - devices := []specs.LinuxDeviceCgroup{} - - defaultDevices := []string{ - "/dev/null", - "/dev/random", - "/dev/full", - "/dev/tty", - "/dev/zero", - "/dev/urandom", - "/dev/console", - } - - // Processes running in a device-cgroup are constrained, they have acccess - // only to the devices listed in the devices.list file. - // In order to run Virtual Machines and create virtqueues, hypervisors - // need access to certain character devices in the host, like kvm and vhost-net. - hypervisorDevices := []string{ - "/dev/kvm", // To run virtual machines - "/dev/vhost-net", // To create virtqueues - "/dev/vfio/vfio", // To access VFIO devices - "/dev/vhost-vsock", // To interact with vsock if - } - - defaultDevices = append(defaultDevices, hypervisorDevices...) - - for _, device := range defaultDevices { - ldevice, err := DeviceToLinuxDevice(device) - if err != nil { - cgroupsLogger.WithField("source", "cgroups").Warnf("Could not add %s to the devices cgroup", device) - continue - } - devices = append(devices, ldevice) - } - - wildcardMajor := int64(-1) - wildcardMinor := int64(-1) - ptsMajor := int64(136) - tunMajor := int64(10) - tunMinor := int64(200) - - wildcardDevices := []specs.LinuxDeviceCgroup{ - // allow mknod for any device - { - Allow: true, - Type: "c", - Major: &wildcardMajor, - Minor: &wildcardMinor, - Access: "m", - }, - { - Allow: true, - Type: "b", - Major: &wildcardMajor, - Minor: &wildcardMinor, - Access: "m", - }, - // /dev/pts/ - pts namespaces are "coming soon" - { - Allow: true, - Type: "c", - Major: &ptsMajor, - Minor: &wildcardMinor, - Access: "rwm", - }, - // tuntap - { - Allow: true, - Type: "c", - Major: &tunMajor, - Minor: &tunMinor, - Access: "rwm", - }, - } - - devices = append(devices, wildcardDevices...) - - return devices -} - -func NewCgroup(path string, resources *specs.LinuxResources) (Cgroup, error) { - var err error - - cgroupPath, err := ValidCgroupPath(path, IsSystemdCgroup(path)) - if err != nil { - return nil, err - } - - cgroup, err := cgroups.New(cgroups.V1, cgroups.StaticPath(cgroupPath), resources) - if err != nil { - return nil, err - } - - return &LinuxCgroup{ - path: cgroupPath, - devices: resources.Devices, - cpusets: resources.CPU, - cgroup: cgroup, - }, nil -} - -func NewSandboxCgroup(path string, resources *specs.LinuxResources, sandboxCgroupOnly bool) (Cgroup, error) { - var cgroup cgroups.Cgroup - sandboxResources := *resources - sandboxResources.Devices = append(sandboxResources.Devices, sandboxDevices()...) - - // Currently we know to handle systemd cgroup path only when it's the only cgroup (no overhead group), hence, - // if sandboxCgroupOnly is not true we treat it as cgroupfs path as it used to be, although it may be incorrect - if !IsSystemdCgroup(path) || !sandboxCgroupOnly { - return NewCgroup(path, &sandboxResources) - } - - slice, unit, err := getSliceAndUnit(path) - if err != nil { - return nil, err - } - // github.com/containerd/cgroups doesn't support creating a scope unit with - // v1 cgroups against systemd, the following interacts directly with systemd - // to create the cgroup and then load it using containerd's api - err = createCgroupsSystemd(slice, unit, uint32(os.Getpid())) // adding runtime process, it makes calling setupCgroups redundant - if err != nil { - return nil, err - } - - cgHierarchy, cgPath, err := cgroupHierarchy(path) - if err != nil { - return nil, err - } - - // load created cgroup and update with resources - if cgroup, err = cgroups.Load(cgHierarchy, cgPath); err == nil { - err = cgroup.Update(&sandboxResources) - } - - if err != nil { - return nil, err - } - - return &LinuxCgroup{ - path: path, - devices: sandboxResources.Devices, - cpusets: sandboxResources.CPU, - cgroup: cgroup, - }, nil -} - -func LoadCgroup(path string) (Cgroup, error) { - cgHierarchy, cgPath, err := cgroupHierarchy(path) - if err != nil { - return nil, err - } - - cgroup, err := cgroups.Load(cgHierarchy, cgPath) - if err != nil { - return nil, err - } - - return &LinuxCgroup{ - path: path, - cgroup: cgroup, - }, nil -} - -func (c *LinuxCgroup) Logger() *logrus.Entry { - return cgroupsLogger.WithField("source", "cgroups") -} - -func (c *LinuxCgroup) Delete() error { - return c.cgroup.Delete() -} - -func (c *LinuxCgroup) Stat() (*v1.Metrics, error) { - return c.cgroup.Stat(cgroups.ErrorHandler(cgroups.IgnoreNotExist)) -} - -func (c *LinuxCgroup) AddProcess(pid int, subsystems ...string) error { - return c.cgroup.Add(cgroups.Process{Pid: pid}) -} - -func (c *LinuxCgroup) AddTask(pid int, subsystems ...string) error { - return c.cgroup.AddTask(cgroups.Process{Pid: pid}) -} - -func (c *LinuxCgroup) Update(resources *specs.LinuxResources) error { - return c.cgroup.Update(resources) -} - -func (c *LinuxCgroup) MoveTo(path string) error { - cgHierarchy, cgPath, err := cgroupHierarchy(path) - if err != nil { - return err - } - - newCgroup, err := cgroups.Load(cgHierarchy, cgPath) - if err != nil { - return err - } - - return c.cgroup.MoveTo(newCgroup) -} - -func (c *LinuxCgroup) MoveToParent() error { - parentPath := filepath.Dir(c.path) - - return c.MoveTo(parentPath) -} - -func (c *LinuxCgroup) AddDevice(deviceHostPath string) error { - deviceResource, err := DeviceToLinuxDevice(deviceHostPath) - if err != nil { - return err - } - - c.Lock() - defer c.Unlock() - - c.devices = append(c.devices, deviceResource) - - if err := c.cgroup.Update(&specs.LinuxResources{ - Devices: c.devices, - }); err != nil { - return err - } - - return nil -} - -func (c *LinuxCgroup) RemoveDevice(deviceHostPath string) error { - deviceResource, err := DeviceToLinuxDevice(deviceHostPath) - if err != nil { - return err - } - - c.Lock() - defer c.Unlock() - - for i, d := range c.devices { - if d.Type == deviceResource.Type && - d.Major == deviceResource.Major && - d.Minor == deviceResource.Minor { - c.devices = append(c.devices[:i], c.devices[i+1:]...) - } - } - - if err := c.cgroup.Update(&specs.LinuxResources{ - Devices: c.devices, - }); err != nil { - return err - } - - return nil -} - -func (c *LinuxCgroup) UpdateCpuSet(cpuset, memset string) error { - c.Lock() - defer c.Unlock() - - if len(cpuset) > 0 { - // If we didn't have a cpuset defined, let's create: - if c.cpusets == nil { - c.cpusets = &specs.LinuxCPU{} - } - - c.cpusets.Cpus = cpuset - } - - if len(memset) > 0 { - // If we didn't have a cpuset defined, let's now create: - if c.cpusets == nil { - c.cpusets = &specs.LinuxCPU{} - } - - c.cpusets.Mems = memset - } - - return c.cgroup.Update(&specs.LinuxResources{ - CPU: c.cpusets, - }) -} - -func (c *LinuxCgroup) Path() string { - return c.path -} diff --git a/src/runtime/virtcontainers/pkg/cgroups/controller.go b/src/runtime/virtcontainers/pkg/cgroups/controller.go new file mode 100644 index 000000000..fbdc4e4ab --- /dev/null +++ b/src/runtime/virtcontainers/pkg/cgroups/controller.go @@ -0,0 +1,82 @@ +// Copyright (c) 2021 Apple Inc. +// +// SPDX-License-Identifier: Apache-2.0 +// + +package cgroups + +import ( + v1 "github.com/containerd/cgroups/stats/v1" + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/sirupsen/logrus" +) + +var ( + controllerLogger = logrus.WithField("source", "virtcontainers/pkg/cgroups") +) + +// SetLogger sets up a logger for this pkg +func SetLogger(logger *logrus.Entry) { + fields := controllerLogger.Data + + controllerLogger = logger.WithFields(fields) +} + +// HypervisorType describes an hypervisor type. +type ResourceControllerType string + +const ( + LinuxCgroups ResourceControllerType = "cgroups" +) + +// String converts an hypervisor type to a string. +func (rType *ResourceControllerType) String() string { + switch *rType { + case LinuxCgroups: + return string(LinuxCgroups) + default: + return "Unknown controller type" + } +} + +// ResourceController represents a system resources controller. +// On Linux this interface is implemented through the cgroups API. +type ResourceController interface { + // Type returns the resource controller implementation type. + Type() ResourceControllerType + + // The controller identifier, e.g. a Linux cgroups path. + ID() string + + // Parent returns the parent controller, on hierarchically + // defined resource (e.g. Linux cgroups). + Parent() string + + // Delete the controller. + Delete() error + + // Stat returns the statistics for the controller. + Stat() (*v1.Metrics, error) + + // AddProcess adds a process to a set of controllers. + AddProcess(int, ...string) error + + // AddThread adds a process thread to a set of controllers. + AddThread(int, ...string) error + + // Update updates the set of resources controlled, based on + // an OCI resources description. + Update(*specs.LinuxResources) error + + // MoveTo moves a controller to another one. + MoveTo(string) error + + // AddDevice adds a device resource to the controller. + AddDevice(string) error + + // RemoveDevice removes a device resource to the controller. + RemoveDevice(string) error + + // UpdateCpuSet updates the set of controlled CPUs and memory nodes. + UpdateCpuSet(string, string) error +} diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index 37a36346d..6b4ff14ec 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -199,8 +199,8 @@ type Sandbox struct { config *SandboxConfig annotationsLock *sync.RWMutex wg *sync.WaitGroup - sandboxCgroup cgroups.Cgroup - overheadCgroup cgroups.Cgroup + sandboxCgroup cgroups.ResourceController + overheadCgroup cgroups.ResourceController cw *consoleWatcher containers map[string]*Container @@ -672,13 +672,13 @@ func (s *Sandbox) createCgroups() error { // Depending on the SandboxCgroupOnly value, this cgroup // will either hold all the pod threads (SandboxCgroupOnly is true) // or only the virtual CPU ones (SandboxCgroupOnly is false). - s.sandboxCgroup, err = cgroups.NewSandboxCgroup(cgroupPath, &resources, s.config.SandboxCgroupOnly) + s.sandboxCgroup, err = cgroups.NewSandboxResourceController(cgroupPath, &resources, s.config.SandboxCgroupOnly) if err != nil { return fmt.Errorf("Could not create the sandbox cgroup %v", err) } // Now that the sandbox cgroup is created, we can set the state cgroup root paths. - s.state.SandboxCgroupPath = s.sandboxCgroup.Path() + s.state.SandboxCgroupPath = s.sandboxCgroup.ID() s.state.OverheadCgroupPath = "" if s.config.SandboxCgroupOnly { @@ -688,14 +688,14 @@ func (s *Sandbox) createCgroups() error { // into the sandbox cgroup. // We're creating an overhead cgroup, with no constraints. Everything but // the vCPU threads will eventually make it there. - overheadCgroup, err := cgroups.NewCgroup(fmt.Sprintf("/%s/%s", cgroupKataOverheadPath, s.id), &specs.LinuxResources{}) + overheadCgroup, err := cgroups.NewResourceController(fmt.Sprintf("/%s/%s", cgroupKataOverheadPath, s.id), &specs.LinuxResources{}) // TODO: support systemd cgroups overhead cgroup // https://github.com/kata-containers/kata-containers/issues/2963 if err != nil { return err } s.overheadCgroup = overheadCgroup - s.state.OverheadCgroupPath = s.overheadCgroup.Path() + s.state.OverheadCgroupPath = s.overheadCgroup.ID() } return nil @@ -2131,12 +2131,13 @@ func (s *Sandbox) cgroupsDelete() error { return nil } - sandboxCgroup, err := cgroups.LoadCgroup(s.state.SandboxCgroupPath) + sandboxCgroup, err := cgroups.LoadResourceController(s.state.SandboxCgroupPath) if err != nil { return err } - if err := sandboxCgroup.MoveToParent(); err != nil { + resCtrlParent := sandboxCgroup.Parent() + if err := sandboxCgroup.MoveTo(resCtrlParent); err != nil { return err } @@ -2145,12 +2146,13 @@ func (s *Sandbox) cgroupsDelete() error { } if s.state.OverheadCgroupPath != "" { - overheadCgroup, err := cgroups.LoadCgroup(s.state.OverheadCgroupPath) + overheadCgroup, err := cgroups.LoadResourceController(s.state.OverheadCgroupPath) if err != nil { return err } - if err := s.overheadCgroup.MoveToParent(); err != nil { + resCtrlParent := overheadCgroup.Parent() + if err := s.overheadCgroup.MoveTo(resCtrlParent); err != nil { return err } @@ -2171,7 +2173,7 @@ func (s *Sandbox) constrainHypervisor(ctx context.Context) error { // All vCPU threads move to the sandbox cgroup. for _, i := range tids.vcpus { - if err := s.sandboxCgroup.AddTask(i); err != nil { + if err := s.sandboxCgroup.AddThread(i); err != nil { return err } } From 823faee83a139a09b5eca66f53cf308ae9ebe482 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Wed, 2 Feb 2022 16:11:14 +0000 Subject: [PATCH 25/71] virtcontainers: Rename the cgroups package To resourcecontrol, and make it consistent with the fact that cgroups are a Linux implementation of the ResourceController interface. Fixes: #3601 Signed-off-by: Samuel Ortiz --- src/runtime/virtcontainers/api.go | 6 +- src/runtime/virtcontainers/api_test.go | 4 +- src/runtime/virtcontainers/kata_agent.go | 4 +- .../virtcontainers/physical_endpoint.go | 4 +- .../{cgroups => resourcecontrol}/cgroups.go | 7 +- .../controller.go | 4 +- .../pkg/{cgroups => resourcecontrol}/utils.go | 2 +- .../utils_linux.go | 9 +- .../utils_linux_test.go | 6 +- src/runtime/virtcontainers/sandbox.go | 169 +++++++++--------- src/runtime/virtcontainers/sandbox_test.go | 4 +- 11 files changed, 110 insertions(+), 109 deletions(-) rename src/runtime/virtcontainers/pkg/{cgroups => resourcecontrol}/cgroups.go (98%) rename src/runtime/virtcontainers/pkg/{cgroups => resourcecontrol}/controller.go (97%) rename src/runtime/virtcontainers/pkg/{cgroups => resourcecontrol}/utils.go (98%) rename src/runtime/virtcontainers/pkg/{cgroups => resourcecontrol}/utils_linux.go (92%) rename src/runtime/virtcontainers/pkg/{cgroups => resourcecontrol}/utils_linux_test.go (95%) diff --git a/src/runtime/virtcontainers/api.go b/src/runtime/virtcontainers/api.go index 14cfbb109..4275626e8 100644 --- a/src/runtime/virtcontainers/api.go +++ b/src/runtime/virtcontainers/api.go @@ -12,8 +12,8 @@ import ( "github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace" deviceApi "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/api" deviceConfig "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/config" - "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/cgroups" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/compatoci" + resCtrl "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/resourcecontrol" vcTypes "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" "github.com/sirupsen/logrus" ) @@ -39,7 +39,7 @@ func SetLogger(ctx context.Context, logger *logrus.Entry) { deviceApi.SetLogger(virtLog) compatoci.SetLogger(virtLog) deviceConfig.SetLogger(virtLog) - cgroups.SetLogger(virtLog) + resCtrl.SetLogger(virtLog) } // CreateSandbox is the virtcontainers sandbox creation entry point. @@ -83,7 +83,7 @@ func createSandboxFromConfig(ctx context.Context, sandboxConfig SandboxConfig, f }() // Set the sandbox host cgroups. - if err := s.setupCgroups(); err != nil { + if err := s.setupResourceController(); err != nil { return nil, err } diff --git a/src/runtime/virtcontainers/api_test.go b/src/runtime/virtcontainers/api_test.go index 3366041c6..d793ba43e 100644 --- a/src/runtime/virtcontainers/api_test.go +++ b/src/runtime/virtcontainers/api_test.go @@ -17,8 +17,8 @@ import ( ktu "github.com/kata-containers/kata-containers/src/runtime/pkg/katatestutils" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/persist/fs" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations" - vccgroups "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/cgroups" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/mock" + resCtrl "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/resourcecontrol" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/rootless" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" specs "github.com/opencontainers/runtime-spec/specs-go" @@ -49,7 +49,7 @@ func newEmptySpec() *specs.Spec { return &specs.Spec{ Linux: &specs.Linux{ Resources: &specs.LinuxResources{}, - CgroupsPath: vccgroups.DefaultCgroupPath, + CgroupsPath: resCtrl.DefaultResourceControllerID, }, Process: &specs.Process{ Capabilities: &specs.LinuxCapabilities{}, diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index 28ff3c73a..e95362f58 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -27,7 +27,7 @@ import ( kataclient "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols/client" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols/grpc" vcAnnotations "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations" - vccgroups "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/cgroups" + resCtrl "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/resourcecontrol" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/rootless" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" vcTypes "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" @@ -1061,7 +1061,7 @@ func (k *kataAgent) constrainGRPCSpec(grpcSpec *grpc.Spec, passSeccomp bool, str // - Initrd image doesn't have systemd. // - Nobody will be able to modify the resources of a specific container by using systemctl set-property. // - docker is not running in the VM. - if vccgroups.IsSystemdCgroup(grpcSpec.Linux.CgroupsPath) { + if resCtrl.IsSystemdCgroup(grpcSpec.Linux.CgroupsPath) { // Convert systemd cgroup to cgroupfs slice := strings.Split(grpcSpec.Linux.CgroupsPath, ":") // 0 - slice: system.slice diff --git a/src/runtime/virtcontainers/physical_endpoint.go b/src/runtime/virtcontainers/physical_endpoint.go index 1ab193006..845400e09 100644 --- a/src/runtime/virtcontainers/physical_endpoint.go +++ b/src/runtime/virtcontainers/physical_endpoint.go @@ -17,7 +17,7 @@ import ( "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/config" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/drivers" persistapi "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/persist/api" - "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/cgroups" + resCtrl "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/resourcecontrol" vcTypes "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" "github.com/safchain/ethtool" ) @@ -89,7 +89,7 @@ func (endpoint *PhysicalEndpoint) Attach(ctx context.Context, s *Sandbox) error return err } - c, err := cgroups.DeviceToCgroupDeviceRule(vfioPath) + c, err := resCtrl.DeviceToCgroupDeviceRule(vfioPath) if err != nil { return err } diff --git a/src/runtime/virtcontainers/pkg/cgroups/cgroups.go b/src/runtime/virtcontainers/pkg/resourcecontrol/cgroups.go similarity index 98% rename from src/runtime/virtcontainers/pkg/cgroups/cgroups.go rename to src/runtime/virtcontainers/pkg/resourcecontrol/cgroups.go index 19aeb8ced..49c6562af 100644 --- a/src/runtime/virtcontainers/pkg/cgroups/cgroups.go +++ b/src/runtime/virtcontainers/pkg/resourcecontrol/cgroups.go @@ -5,7 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 // -package cgroups +package resourcecontrol import ( "fmt" @@ -25,12 +25,9 @@ import ( // from grabbing the stats data. const CgroupKataPrefix = "kata" -// DefaultCgroupPath runtime-determined location in the cgroups hierarchy. -const DefaultCgroupPath = "/vc" - func RenameCgroupPath(path string) (string, error) { if path == "" { - path = DefaultCgroupPath + path = DefaultResourceControllerID } cgroupPathDir := filepath.Dir(path) diff --git a/src/runtime/virtcontainers/pkg/cgroups/controller.go b/src/runtime/virtcontainers/pkg/resourcecontrol/controller.go similarity index 97% rename from src/runtime/virtcontainers/pkg/cgroups/controller.go rename to src/runtime/virtcontainers/pkg/resourcecontrol/controller.go index fbdc4e4ab..1ffba476b 100644 --- a/src/runtime/virtcontainers/pkg/cgroups/controller.go +++ b/src/runtime/virtcontainers/pkg/resourcecontrol/controller.go @@ -3,7 +3,7 @@ // SPDX-License-Identifier: Apache-2.0 // -package cgroups +package resourcecontrol import ( v1 "github.com/containerd/cgroups/stats/v1" @@ -12,7 +12,7 @@ import ( ) var ( - controllerLogger = logrus.WithField("source", "virtcontainers/pkg/cgroups") + controllerLogger = logrus.WithField("source", "virtcontainers/pkg/resourcecontrol") ) // SetLogger sets up a logger for this pkg diff --git a/src/runtime/virtcontainers/pkg/cgroups/utils.go b/src/runtime/virtcontainers/pkg/resourcecontrol/utils.go similarity index 98% rename from src/runtime/virtcontainers/pkg/cgroups/utils.go rename to src/runtime/virtcontainers/pkg/resourcecontrol/utils.go index 383bfb11d..9ab592be1 100644 --- a/src/runtime/virtcontainers/pkg/cgroups/utils.go +++ b/src/runtime/virtcontainers/pkg/resourcecontrol/utils.go @@ -3,7 +3,7 @@ // SPDX-License-Identifier: Apache-2.0 // -package cgroups +package resourcecontrol import ( "fmt" diff --git a/src/runtime/virtcontainers/pkg/cgroups/utils_linux.go b/src/runtime/virtcontainers/pkg/resourcecontrol/utils_linux.go similarity index 92% rename from src/runtime/virtcontainers/pkg/cgroups/utils_linux.go rename to src/runtime/virtcontainers/pkg/resourcecontrol/utils_linux.go index 6bb7bdec4..7a83db8f0 100644 --- a/src/runtime/virtcontainers/pkg/cgroups/utils_linux.go +++ b/src/runtime/virtcontainers/pkg/resourcecontrol/utils_linux.go @@ -3,7 +3,7 @@ // SPDX-License-Identifier: Apache-2.0 // -package cgroups +package resourcecontrol import ( "context" @@ -17,6 +17,9 @@ import ( "github.com/opencontainers/runc/libcontainer/cgroups/systemd" ) +// DefaultResourceControllerID runtime-determined location in the cgroups hierarchy. +const DefaultResourceControllerID = "/vc" + // validCgroupPath returns a valid cgroup path. // see https://github.com/opencontainers/runtime-spec/blob/master/config-linux.md#cgroups-path func ValidCgroupPath(path string, systemdCgroup bool) (string, error) { @@ -36,8 +39,8 @@ func ValidCgroupPath(path string, systemdCgroup bool) (string, error) { // In the case of a relative path (not starting with /), the runtime MAY // interpret the path relative to a runtime-determined location in the cgroups hierarchy. - // clean up path and return a new path relative to DefaultCgroupPath - return filepath.Join(DefaultCgroupPath, filepath.Clean("/"+path)), nil + // clean up path and return a new path relative to DefaultResourceControllerID + return filepath.Join(DefaultResourceControllerID, filepath.Clean("/"+path)), nil } func IsSystemdCgroup(cgroupPath string) bool { diff --git a/src/runtime/virtcontainers/pkg/cgroups/utils_linux_test.go b/src/runtime/virtcontainers/pkg/resourcecontrol/utils_linux_test.go similarity index 95% rename from src/runtime/virtcontainers/pkg/cgroups/utils_linux_test.go rename to src/runtime/virtcontainers/pkg/resourcecontrol/utils_linux_test.go index 381af8165..2e40c4d83 100644 --- a/src/runtime/virtcontainers/pkg/cgroups/utils_linux_test.go +++ b/src/runtime/virtcontainers/pkg/resourcecontrol/utils_linux_test.go @@ -3,7 +3,7 @@ // SPDX-License-Identifier: Apache-2.0 // -package cgroups +package resourcecontrol import ( "os" @@ -99,8 +99,8 @@ func TestValidCgroupPath(t *testing.T) { assert.Equal(t.path, path) } else { assert.True( - strings.HasPrefix(path, DefaultCgroupPath), - "%v should have prefix /%v", path, DefaultCgroupPath) + strings.HasPrefix(path, DefaultResourceControllerID), + "%v should have prefix /%v", path, DefaultResourceControllerID) } } diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index 6b4ff14ec..94b94a2e7 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -36,9 +36,9 @@ import ( pbTypes "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols/grpc" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations" - "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/cgroups" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/compatoci" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/cpuset" + resCtrl "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/resourcecontrol" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/rootless" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" vcTypes "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" @@ -64,13 +64,13 @@ const ( rwm = "rwm" // When the Kata overhead threads (I/O, VMM, etc) are not - // placed in the sandbox cgroup, they are moved to a specific, - // unconstrained cgroup hierarchy. - // Assuming the cgroup mount point is at /sys/fs/cgroup/, on a - // cgroup v1 system, the Kata overhead memory cgroup will be at + // placed in the sandbox resource controller (A cgroup on Linux), + // they are moved to a specific, unconstrained resource controller. + // On Linux, assuming the cgroup mount point is at /sys/fs/cgroup/, + // on a cgroup v1 system, the Kata overhead memory cgroup will be at // /sys/fs/cgroup/memory/kata_overhead/$CGPATH where $CGPATH is // defined by the orchestrator. - cgroupKataOverheadPath = "/kata_overhead/" + resCtrlKataOverheadID = "/kata_overhead/" ) var ( @@ -199,10 +199,11 @@ type Sandbox struct { config *SandboxConfig annotationsLock *sync.RWMutex wg *sync.WaitGroup - sandboxCgroup cgroups.ResourceController - overheadCgroup cgroups.ResourceController cw *consoleWatcher + sandboxController resCtrl.ResourceController + overheadController resCtrl.ResourceController + containers map[string]*Container id string @@ -565,8 +566,8 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor sandboxConfig.HypervisorConfig.EnableVhostUserStore, sandboxConfig.HypervisorConfig.VhostUserStorePath, nil) - // Create the sandbox cgroups - if err := s.createCgroups(); err != nil { + // Create the sandbox resource controllers. + if err := s.createResourceController(); err != nil { return nil, err } @@ -587,7 +588,7 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor return s, nil } -func (s *Sandbox) createCgroups() error { +func (s *Sandbox) createResourceController() error { var err error cgroupPath := "" @@ -596,20 +597,19 @@ func (s *Sandbox) createCgroups() error { resources := specs.LinuxResources{} if s.config == nil { - return fmt.Errorf("Could not create cgroup manager: empty sandbox configuration") + return fmt.Errorf("Could not create %s resource controller manager: empty sandbox configuration", s.sandboxController) } spec := s.GetPatchedOCISpec() if spec != nil && spec.Linux != nil { cgroupPath = spec.Linux.CgroupsPath - // Kata relies on the cgroup parent created and configured by the container - // engine by default. The exception is for devices whitelist as well as sandbox-level - // CPUSet. - // For the sandbox cgroups we create and manage, rename the base of the cgroup path to + // Kata relies on the resource controller (cgroups on Linux) parent created and configured by the + // container engine by default. The exception is for devices whitelist as well as sandbox-level CPUSet. + // For the sandbox controllers we create and manage, rename the base of the controller ID to // include "kata_" - if !cgroups.IsSystemdCgroup(cgroupPath) { // don't add prefix when cgroups are managed by systemd - cgroupPath, err = cgroups.RenameCgroupPath(cgroupPath) + if !resCtrl.IsSystemdCgroup(cgroupPath) { // don't add prefix when cgroups are managed by systemd + cgroupPath, err = resCtrl.RenameCgroupPath(cgroupPath) if err != nil { return err } @@ -659,7 +659,7 @@ func (s *Sandbox) createCgroups() error { if s.devManager != nil { for _, d := range s.devManager.GetAllDevices() { - dev, err := cgroups.DeviceToLinuxDevice(d.GetHostPath()) + dev, err := resCtrl.DeviceToLinuxDevice(d.GetHostPath()) if err != nil { s.Logger().WithError(err).WithField("device", d.GetHostPath()).Warn("Could not add device to sandbox resources") continue @@ -668,34 +668,34 @@ func (s *Sandbox) createCgroups() error { } } - // Create the sandbox cgroup. + // Create the sandbox resource controller (cgroups on Linux). // Depending on the SandboxCgroupOnly value, this cgroup // will either hold all the pod threads (SandboxCgroupOnly is true) // or only the virtual CPU ones (SandboxCgroupOnly is false). - s.sandboxCgroup, err = cgroups.NewSandboxResourceController(cgroupPath, &resources, s.config.SandboxCgroupOnly) + s.sandboxController, err = resCtrl.NewSandboxResourceController(cgroupPath, &resources, s.config.SandboxCgroupOnly) if err != nil { - return fmt.Errorf("Could not create the sandbox cgroup %v", err) + return fmt.Errorf("Could not create the sandbox resource controller %v", err) } - // Now that the sandbox cgroup is created, we can set the state cgroup root paths. - s.state.SandboxCgroupPath = s.sandboxCgroup.ID() + // Now that the sandbox resource controller is created, we can set the state controller paths.. + s.state.SandboxCgroupPath = s.sandboxController.ID() s.state.OverheadCgroupPath = "" if s.config.SandboxCgroupOnly { - s.overheadCgroup = nil + s.overheadController = nil } else { // The shim configuration is requesting that we do not put all threads - // into the sandbox cgroup. - // We're creating an overhead cgroup, with no constraints. Everything but + // into the sandbox resource controller. + // We're creating an overhead controller, with no constraints. Everything but // the vCPU threads will eventually make it there. - overheadCgroup, err := cgroups.NewResourceController(fmt.Sprintf("/%s/%s", cgroupKataOverheadPath, s.id), &specs.LinuxResources{}) + overheadController, err := resCtrl.NewResourceController(fmt.Sprintf("/%s/%s", resCtrlKataOverheadID, s.id), &specs.LinuxResources{}) // TODO: support systemd cgroups overhead cgroup // https://github.com/kata-containers/kata-containers/issues/2963 if err != nil { return err } - s.overheadCgroup = overheadCgroup - s.state.OverheadCgroupPath = s.overheadCgroup.ID() + s.overheadController = overheadController + s.state.OverheadCgroupPath = s.overheadController.ID() } return nil @@ -778,8 +778,8 @@ func (s *Sandbox) Delete(ctx context.Context) error { } if !rootless.IsRootless() { - if err := s.cgroupsDelete(); err != nil { - s.Logger().WithError(err).Error("failed to Cleanup cgroups") + if err := s.resourceControllerDelete(); err != nil { + s.Logger().WithError(err).Errorf("failed to cleanup the %s resource controllers", s.sandboxController) } } @@ -1318,7 +1318,7 @@ func (s *Sandbox) CreateContainer(ctx context.Context, contConfig ContainerConfi return nil, err } - if err = s.cgroupsUpdate(ctx); err != nil { + if err = s.resourceControllerUpdate(ctx); err != nil { return nil, err } @@ -1421,8 +1421,8 @@ func (s *Sandbox) DeleteContainer(ctx context.Context, containerID string) (VCCo } } - // update the sandbox cgroup - if err = s.cgroupsUpdate(ctx); err != nil { + // update the sandbox resource controller + if err = s.resourceControllerUpdate(ctx); err != nil { return nil, err } @@ -1487,7 +1487,7 @@ func (s *Sandbox) UpdateContainer(ctx context.Context, containerID string, resou return err } - if err := s.cgroupsUpdate(ctx); err != nil { + if err := s.resourceControllerUpdate(ctx); err != nil { return err } @@ -1515,7 +1515,7 @@ func (s *Sandbox) StatsContainer(ctx context.Context, containerID string) (Conta // Stats returns the stats of a running sandbox func (s *Sandbox) Stats(ctx context.Context) (SandboxStats, error) { - metrics, err := s.sandboxCgroup.Stat() + metrics, err := s.sandboxController.Stat() if err != nil { return SandboxStats{}, err } @@ -1599,7 +1599,7 @@ func (s *Sandbox) createContainers(ctx context.Context) error { return err } - if err := s.cgroupsUpdate(ctx); err != nil { + if err := s.resourceControllerUpdate(ctx); err != nil { return err } if err := s.storeSandbox(ctx); err != nil { @@ -1752,10 +1752,10 @@ func (s *Sandbox) HotplugAddDevice(ctx context.Context, device api.Device, devTy span, ctx := katatrace.Trace(ctx, s.Logger(), "HotplugAddDevice", sandboxTracingTags, map[string]string{"sandbox_id": s.id}) defer span.End() - if s.sandboxCgroup != nil { - if err := s.sandboxCgroup.AddDevice(device.GetHostPath()); err != nil { + if s.sandboxController != nil { + if err := s.sandboxController.AddDevice(device.GetHostPath()); err != nil { s.Logger().WithError(err).WithField("device", device). - Warn("Could not add device to cgroup") + Warnf("Could not add device to the %s controller", s.sandboxController) } } @@ -1803,11 +1803,11 @@ func (s *Sandbox) HotplugAddDevice(ctx context.Context, device api.Device, devTy // HotplugRemoveDevice is used for removing a device from sandbox // Sandbox implement DeviceReceiver interface from device/api/interface.go func (s *Sandbox) HotplugRemoveDevice(ctx context.Context, device api.Device, devType config.DeviceType) error { - defer func() { - if s.sandboxCgroup != nil { - if err := s.sandboxCgroup.RemoveDevice(device.GetHostPath()); err != nil { + defer func() { + if s.sandboxController != nil { + if err := s.sandboxController.RemoveDevice(device.GetHostPath()); err != nil { s.Logger().WithError(err).WithField("device", device). - Warn("Could not add device to cgroup") + Warnf("Could not add device to the %s controller", s.sandboxController) } } }() @@ -2095,25 +2095,26 @@ func (s *Sandbox) GetHypervisorType() string { return string(s.config.HypervisorType) } -// cgroupsUpdate updates the sandbox cpuset cgroup subsystem. -// Also, if the sandbox has an overhead cgroup, it updates the hypervisor +// resourceControllerUpdate updates the sandbox cpuset resource controller +// (Linux cgroup) subsystem. +// Also, if the sandbox has an overhead controller, it updates the hypervisor // constraints by moving the potentially new vCPU threads back to the sandbox -// cgroup. -func (s *Sandbox) cgroupsUpdate(ctx context.Context) error { +// controller. +func (s *Sandbox) resourceControllerUpdate(ctx context.Context) error { cpuset, memset, err := s.getSandboxCPUSet() if err != nil { return err } - // We update the sandbox cgroup with potentially new virtual CPUs. - if err := s.sandboxCgroup.UpdateCpuSet(cpuset, memset); err != nil { + // We update the sandbox controller with potentially new virtual CPUs. + if err := s.sandboxController.UpdateCpuSet(cpuset, memset); err != nil { return err } - if s.overheadCgroup != nil { - // If we have an overhead cgroup, new vCPU threads would start there, + if s.overheadController != nil { + // If we have an overhead controller, new vCPU threads would start there, // as being children of the VMM PID. - // We need to constrain them by moving them into the sandbox cgroup. + // We need to constrain them by moving them into the sandbox controller. if err := s.constrainHypervisor(ctx); err != nil { return err } @@ -2122,41 +2123,41 @@ func (s *Sandbox) cgroupsUpdate(ctx context.Context) error { return nil } -// cgroupsDelete will move the running processes in the sandbox cgroup -// to the parent and then delete the sandbox cgroup -func (s *Sandbox) cgroupsDelete() error { - s.Logger().Debug("Deleting sandbox cgroup") +// resourceControllerDelete will move the running processes in the sandbox resource +// cvontroller to the parent and then delete the sandbox controller. +func (s *Sandbox) resourceControllerDelete() error { + s.Logger().Debugf("Deleting sandbox %s resource controler", s.sandboxController) if s.state.SandboxCgroupPath == "" { - s.Logger().Warnf("sandbox cgroup path is empty") + s.Logger().Warnf("sandbox %s resource controler path is empty", s.sandboxController) return nil } - sandboxCgroup, err := cgroups.LoadResourceController(s.state.SandboxCgroupPath) + sandboxController, err := resCtrl.LoadResourceController(s.state.SandboxCgroupPath) if err != nil { return err } - resCtrlParent := sandboxCgroup.Parent() - if err := sandboxCgroup.MoveTo(resCtrlParent); err != nil { + resCtrlParent := sandboxController.Parent() + if err := sandboxController.MoveTo(resCtrlParent); err != nil { return err } - if err := sandboxCgroup.Delete(); err != nil { + if err := sandboxController.Delete(); err != nil { return err } if s.state.OverheadCgroupPath != "" { - overheadCgroup, err := cgroups.LoadResourceController(s.state.OverheadCgroupPath) + overheadController, err := resCtrl.LoadResourceController(s.state.OverheadCgroupPath) if err != nil { return err } - resCtrlParent := overheadCgroup.Parent() - if err := s.overheadCgroup.MoveTo(resCtrlParent); err != nil { + resCtrlParent := overheadController.Parent() + if err := s.overheadController.MoveTo(resCtrlParent); err != nil { return err } - if err := overheadCgroup.Delete(); err != nil { + if err := overheadController.Delete(); err != nil { return err } } @@ -2164,16 +2165,16 @@ func (s *Sandbox) cgroupsDelete() error { return nil } -// constrainHypervisor will place the VMM and vCPU threads into cgroups. +// constrainHypervisor will place the VMM and vCPU threads into resource controllers (cgroups on Linux). func (s *Sandbox) constrainHypervisor(ctx context.Context) error { tids, err := s.hypervisor.GetThreadIDs(ctx) if err != nil { return fmt.Errorf("failed to get thread ids from hypervisor: %v", err) } - // All vCPU threads move to the sandbox cgroup. + // All vCPU threads move to the sandbox controller. for _, i := range tids.vcpus { - if err := s.sandboxCgroup.AddThread(i); err != nil { + if err := s.sandboxController.AddThread(i); err != nil { return err } } @@ -2181,22 +2182,22 @@ func (s *Sandbox) constrainHypervisor(ctx context.Context) error { return nil } -// setupCgroups adds the runtime process to either the sandbox cgroup or the overhead one, -// depending on the sandbox_cgroup_only configuration setting. -func (s *Sandbox) setupCgroups() error { - vmmCgroup := s.sandboxCgroup - if s.overheadCgroup != nil { - vmmCgroup = s.overheadCgroup +// setupResourceController adds the runtime process to either the sandbox resource controller or the +// overhead one, depending on the sandbox_cgroup_only configuration setting. +func (s *Sandbox) setupResourceController() error { + vmmController := s.sandboxController + if s.overheadController != nil { + vmmController = s.overheadController } - // By adding the runtime process to either the sandbox or overhead cgroup, we are making + // By adding the runtime process to either the sandbox or overhead controller, we are making // sure that any child process of the runtime (i.e. *all* processes serving a Kata pod) - // will initially live in this cgroup. Depending on the sandbox_cgroup settings, we will - // then move the vCPU threads between cgroups. + // will initially live in this controller. Depending on the sandbox_cgroup settings, we will + // then move the vCPU threads between resource controllers. runtimePid := os.Getpid() - // Add the runtime to the VMM sandbox cgroup - if err := vmmCgroup.AddProcess(runtimePid); err != nil { - return fmt.Errorf("Could not add runtime PID %d to sandbox cgroup: %v", runtimePid, err) + // Add the runtime to the VMM sandbox resource controller + if err := vmmController.AddProcess(runtimePid); err != nil { + return fmt.Errorf("Could not add runtime PID %d to the sandbox %s resource controller: %v", runtimePid, s.sandboxController, err) } return nil @@ -2217,8 +2218,8 @@ func (s *Sandbox) GetPatchedOCISpec() *specs.Spec { } // get the container associated with the PodSandbox annotation. In Kubernetes, this - // represents the pause container. In Docker, this is the container. We derive the - // cgroup path from this container. + // represents the pause container. In Docker, this is the container. + // On Linux, we derive the group path from this container. for _, cConfig := range s.config.Containers { if cConfig.Annotations[annotations.ContainerTypeKey] == string(PodSandbox) { return cConfig.CustomSpec diff --git a/src/runtime/virtcontainers/sandbox_test.go b/src/runtime/virtcontainers/sandbox_test.go index 9c64c8b22..344551e3b 100644 --- a/src/runtime/virtcontainers/sandbox_test.go +++ b/src/runtime/virtcontainers/sandbox_test.go @@ -1502,14 +1502,14 @@ func TestSandbox_Cgroups(t *testing.T) { } t.Run(tt.name, func(t *testing.T) { - err := tt.s.createCgroups() + err := tt.s.createResourceController() t.Logf("create groups error %v", err) if (err != nil) != tt.wantErr { t.Errorf("Sandbox.CreateCgroups() error = %v, wantErr %v", err, tt.wantErr) } if err == nil { - if err := tt.s.setupCgroups(); (err != nil) != tt.wantErr { + if err := tt.s.setupResourceController(); (err != nil) != tt.wantErr { t.Errorf("Sandbox.SetupCgroups() error = %v, wantErr %v", err, tt.wantErr) } } From 9fd4e5514fcf012d6cdb92d0fd8b4c3de665c4ee Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Tue, 8 Feb 2022 12:54:44 +0000 Subject: [PATCH 26/71] runtime: Move the resourcecontrol package one layer up And try to reduce the number of virtcontainers packages, step by step. Signed-off-by: Samuel Ortiz --- src/runtime/{virtcontainers => }/pkg/resourcecontrol/cgroups.go | 0 .../{virtcontainers => }/pkg/resourcecontrol/controller.go | 0 src/runtime/{virtcontainers => }/pkg/resourcecontrol/utils.go | 0 .../{virtcontainers => }/pkg/resourcecontrol/utils_linux.go | 0 .../pkg/resourcecontrol/utils_linux_test.go | 0 src/runtime/virtcontainers/api.go | 2 +- src/runtime/virtcontainers/api_test.go | 2 +- src/runtime/virtcontainers/kata_agent.go | 2 +- src/runtime/virtcontainers/physical_endpoint.go | 2 +- src/runtime/virtcontainers/sandbox.go | 2 +- 10 files changed, 5 insertions(+), 5 deletions(-) rename src/runtime/{virtcontainers => }/pkg/resourcecontrol/cgroups.go (100%) rename src/runtime/{virtcontainers => }/pkg/resourcecontrol/controller.go (100%) rename src/runtime/{virtcontainers => }/pkg/resourcecontrol/utils.go (100%) rename src/runtime/{virtcontainers => }/pkg/resourcecontrol/utils_linux.go (100%) rename src/runtime/{virtcontainers => }/pkg/resourcecontrol/utils_linux_test.go (100%) diff --git a/src/runtime/virtcontainers/pkg/resourcecontrol/cgroups.go b/src/runtime/pkg/resourcecontrol/cgroups.go similarity index 100% rename from src/runtime/virtcontainers/pkg/resourcecontrol/cgroups.go rename to src/runtime/pkg/resourcecontrol/cgroups.go diff --git a/src/runtime/virtcontainers/pkg/resourcecontrol/controller.go b/src/runtime/pkg/resourcecontrol/controller.go similarity index 100% rename from src/runtime/virtcontainers/pkg/resourcecontrol/controller.go rename to src/runtime/pkg/resourcecontrol/controller.go diff --git a/src/runtime/virtcontainers/pkg/resourcecontrol/utils.go b/src/runtime/pkg/resourcecontrol/utils.go similarity index 100% rename from src/runtime/virtcontainers/pkg/resourcecontrol/utils.go rename to src/runtime/pkg/resourcecontrol/utils.go diff --git a/src/runtime/virtcontainers/pkg/resourcecontrol/utils_linux.go b/src/runtime/pkg/resourcecontrol/utils_linux.go similarity index 100% rename from src/runtime/virtcontainers/pkg/resourcecontrol/utils_linux.go rename to src/runtime/pkg/resourcecontrol/utils_linux.go diff --git a/src/runtime/virtcontainers/pkg/resourcecontrol/utils_linux_test.go b/src/runtime/pkg/resourcecontrol/utils_linux_test.go similarity index 100% rename from src/runtime/virtcontainers/pkg/resourcecontrol/utils_linux_test.go rename to src/runtime/pkg/resourcecontrol/utils_linux_test.go diff --git a/src/runtime/virtcontainers/api.go b/src/runtime/virtcontainers/api.go index 4275626e8..4b85851cf 100644 --- a/src/runtime/virtcontainers/api.go +++ b/src/runtime/virtcontainers/api.go @@ -10,10 +10,10 @@ import ( "runtime" "github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace" + resCtrl "github.com/kata-containers/kata-containers/src/runtime/pkg/resourcecontrol" deviceApi "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/api" deviceConfig "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/config" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/compatoci" - resCtrl "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/resourcecontrol" vcTypes "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" "github.com/sirupsen/logrus" ) diff --git a/src/runtime/virtcontainers/api_test.go b/src/runtime/virtcontainers/api_test.go index d793ba43e..731e982f5 100644 --- a/src/runtime/virtcontainers/api_test.go +++ b/src/runtime/virtcontainers/api_test.go @@ -15,10 +15,10 @@ import ( "testing" ktu "github.com/kata-containers/kata-containers/src/runtime/pkg/katatestutils" + resCtrl "github.com/kata-containers/kata-containers/src/runtime/pkg/resourcecontrol" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/persist/fs" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/mock" - resCtrl "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/resourcecontrol" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/rootless" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" specs "github.com/opencontainers/runtime-spec/specs-go" diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index e95362f58..b3f68a88d 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -19,6 +19,7 @@ import ( "github.com/docker/go-units" "github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace" + resCtrl "github.com/kata-containers/kata-containers/src/runtime/pkg/resourcecontrol" "github.com/kata-containers/kata-containers/src/runtime/pkg/uuid" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/api" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/config" @@ -27,7 +28,6 @@ import ( kataclient "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols/client" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols/grpc" vcAnnotations "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations" - resCtrl "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/resourcecontrol" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/rootless" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" vcTypes "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" diff --git a/src/runtime/virtcontainers/physical_endpoint.go b/src/runtime/virtcontainers/physical_endpoint.go index 845400e09..48ff18219 100644 --- a/src/runtime/virtcontainers/physical_endpoint.go +++ b/src/runtime/virtcontainers/physical_endpoint.go @@ -14,10 +14,10 @@ import ( "path/filepath" "strings" + resCtrl "github.com/kata-containers/kata-containers/src/runtime/pkg/resourcecontrol" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/config" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/drivers" persistapi "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/persist/api" - resCtrl "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/resourcecontrol" vcTypes "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" "github.com/safchain/ethtool" ) diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index 94b94a2e7..f37316844 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -26,6 +26,7 @@ import ( "github.com/vishvananda/netlink" "github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace" + resCtrl "github.com/kata-containers/kata-containers/src/runtime/pkg/resourcecontrol" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/api" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/config" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/drivers" @@ -38,7 +39,6 @@ import ( "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/compatoci" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/cpuset" - resCtrl "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/resourcecontrol" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/rootless" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" vcTypes "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" From fec26f8e512bf191cf23fc4725e67085c9a77c13 Mon Sep 17 00:00:00 2001 From: Francesco Giudici Date: Wed, 23 Feb 2022 18:10:37 +0100 Subject: [PATCH 27/71] kata-monitor: trivial: rename symbols & labels We introduced collection of sandboxes metadata from the CRI that will be attached to the sandbox metrics: this will allow to immediately match sandboxes metrics with CRI workloads. Rename the symbols from *Kube* to *CRI* as the metadata will be there every time pods are created through CRI, also if kubernetes is not installed (e.g., 'crictl runp'). Signed-off-by: Francesco Giudici --- src/runtime/pkg/kata-monitor/cri.go | 8 ++++---- src/runtime/pkg/kata-monitor/metrics.go | 14 +++++++------- src/runtime/pkg/kata-monitor/metrics_test.go | 8 ++++---- src/runtime/pkg/kata-monitor/monitor.go | 8 ++++---- src/runtime/pkg/kata-monitor/sandbox_cache.go | 12 ++++++------ src/runtime/pkg/kata-monitor/sandbox_cache_test.go | 6 +++--- 6 files changed, 28 insertions(+), 28 deletions(-) diff --git a/src/runtime/pkg/kata-monitor/cri.go b/src/runtime/pkg/kata-monitor/cri.go index 0e9e06884..f32fae1a9 100644 --- a/src/runtime/pkg/kata-monitor/cri.go +++ b/src/runtime/pkg/kata-monitor/cri.go @@ -141,7 +141,7 @@ func (km *KataMonitor) syncSandboxes(sandboxList []string) ([]string, error) { for _, pod := range r.Items { for _, sandbox := range sandboxList { if pod.Id == sandbox { - km.sandboxCache.setMetadata(sandbox, sandboxKubeData{ + km.sandboxCache.setCRIMetadata(sandbox, sandboxCRIMetadata{ uid: pod.Metadata.Uid, name: pod.Metadata.Name, namespace: pod.Metadata.Namespace, @@ -150,9 +150,9 @@ func (km *KataMonitor) syncSandboxes(sandboxList []string) ([]string, error) { sandboxList = removeFromSandboxList(sandboxList, sandbox) monitorLog.WithFields(logrus.Fields{ - "Pod Name": pod.Metadata.Name, - "Pod Namespace": pod.Metadata.Namespace, - "Pod UID": pod.Metadata.Uid, + "cri-name": pod.Metadata.Name, + "cri-namespace": pod.Metadata.Namespace, + "cri-uid": pod.Metadata.Uid, }).Debugf("Synced KATA POD %s", pod.Id) break diff --git a/src/runtime/pkg/kata-monitor/metrics.go b/src/runtime/pkg/kata-monitor/metrics.go index 0216969cb..84098c88f 100644 --- a/src/runtime/pkg/kata-monitor/metrics.go +++ b/src/runtime/pkg/kata-monitor/metrics.go @@ -160,12 +160,12 @@ func (km *KataMonitor) aggregateSandboxMetrics(encoder expfmt.Encoder) error { // get metrics from sandbox's shim for _, sandboxID := range sandboxes { - sandboxMetadata, ok := km.sandboxCache.getMetadata(sandboxID) + sandboxMetadata, ok := km.sandboxCache.getCRIMetadata(sandboxID) if !ok { // likely the sandbox has been just removed continue } wg.Add(1) - go func(sandboxID string, sandboxMetadata sandboxKubeData, results chan<- []*dto.MetricFamily) { + go func(sandboxID string, sandboxMetadata sandboxCRIMetadata, results chan<- []*dto.MetricFamily) { sandboxMetrics, err := getParsedMetrics(sandboxID, sandboxMetadata) if err != nil { monitorLog.WithError(err).WithField("sandbox_id", sandboxID).Errorf("failed to get metrics for sandbox") @@ -223,7 +223,7 @@ func (km *KataMonitor) aggregateSandboxMetrics(encoder expfmt.Encoder) error { } -func getParsedMetrics(sandboxID string, sandboxMetadata sandboxKubeData) ([]*dto.MetricFamily, error) { +func getParsedMetrics(sandboxID string, sandboxMetadata sandboxCRIMetadata) ([]*dto.MetricFamily, error) { body, err := doGet(sandboxID, defaultTimeout, "metrics") if err != nil { return nil, err @@ -244,7 +244,7 @@ func GetSandboxMetrics(sandboxID string) (string, error) { // parsePrometheusMetrics will decode metrics from Prometheus text format // and return array of *dto.MetricFamily with an ASC order -func parsePrometheusMetrics(sandboxID string, sandboxMetadata sandboxKubeData, body []byte) ([]*dto.MetricFamily, error) { +func parsePrometheusMetrics(sandboxID string, sandboxMetadata sandboxCRIMetadata, body []byte) ([]*dto.MetricFamily, error) { reader := bytes.NewReader(body) decoder := expfmt.NewDecoder(reader, expfmt.FmtText) @@ -268,15 +268,15 @@ func parsePrometheusMetrics(sandboxID string, sandboxMetadata sandboxKubeData, b Value: mutils.String2Pointer(sandboxID), }, &dto.LabelPair{ - Name: mutils.String2Pointer("kube_uid"), + Name: mutils.String2Pointer("cri_uid"), Value: mutils.String2Pointer(sandboxMetadata.uid), }, &dto.LabelPair{ - Name: mutils.String2Pointer("kube_name"), + Name: mutils.String2Pointer("cri_name"), Value: mutils.String2Pointer(sandboxMetadata.name), }, &dto.LabelPair{ - Name: mutils.String2Pointer("kube_namespace"), + Name: mutils.String2Pointer("cri_namespace"), Value: mutils.String2Pointer(sandboxMetadata.namespace), }, ) diff --git a/src/runtime/pkg/kata-monitor/metrics_test.go b/src/runtime/pkg/kata-monitor/metrics_test.go index 1055a6d36..4402911d9 100644 --- a/src/runtime/pkg/kata-monitor/metrics_test.go +++ b/src/runtime/pkg/kata-monitor/metrics_test.go @@ -40,7 +40,7 @@ ttt 999 func TestParsePrometheusMetrics(t *testing.T) { assert := assert.New(t) sandboxID := "sandboxID-abc" - sandboxMetadata := sandboxKubeData{"123", "pod-name", "pod-namespace"} + sandboxMetadata := sandboxCRIMetadata{"123", "pod-name", "pod-namespace"} // parse metrics list, err := parsePrometheusMetrics(sandboxID, sandboxMetadata, []byte(shimMetricBody)) @@ -60,12 +60,12 @@ func TestParsePrometheusMetrics(t *testing.T) { assert.Equal(4, len(m.Label), "should have 4 labels") assert.Equal("sandbox_id", *m.Label[0].Name, "label name should be sandbox_id") assert.Equal(sandboxID, *m.Label[0].Value, "label value should be", sandboxID) - assert.Equal("kube_uid", *m.Label[1].Name, "label name should be kube_uid") + assert.Equal("cri_uid", *m.Label[1].Name, "label name should be cri_uid") assert.Equal(sandboxMetadata.uid, *m.Label[1].Value, "label value should be", sandboxMetadata.uid) - assert.Equal("kube_name", *m.Label[2].Name, "label name should be kube_name") + assert.Equal("cri_name", *m.Label[2].Name, "label name should be cri_name") assert.Equal(sandboxMetadata.name, *m.Label[2].Value, "label value should be", sandboxMetadata.name) - assert.Equal("kube_namespace", *m.Label[3].Name, "label name should be kube_namespace") + assert.Equal("cri_namespace", *m.Label[3].Name, "label name should be cri_namespace") assert.Equal(sandboxMetadata.namespace, *m.Label[3].Value, "label value should be", sandboxMetadata.namespace) summary := m.Summary diff --git a/src/runtime/pkg/kata-monitor/monitor.go b/src/runtime/pkg/kata-monitor/monitor.go index 4a1a2d2f5..9004cf103 100644 --- a/src/runtime/pkg/kata-monitor/monitor.go +++ b/src/runtime/pkg/kata-monitor/monitor.go @@ -53,7 +53,7 @@ func NewKataMonitor(runtimeEndpoint string) (*KataMonitor, error) { runtimeEndpoint: runtimeEndpoint, sandboxCache: &sandboxCache{ Mutex: &sync.Mutex{}, - sandboxes: make(map[string]sandboxKubeData), + sandboxes: make(map[string]sandboxCRIMetadata), }, } @@ -105,13 +105,13 @@ func (km *KataMonitor) startPodCacheUpdater() { os.Exit(1) } for _, sandbox := range sandboxList { - km.sandboxCache.putIfNotExists(sandbox, sandboxKubeData{}) + km.sandboxCache.putIfNotExists(sandbox, sandboxCRIMetadata{}) } monitorLog.Debug("initial sync of sbs directory completed") monitorLog.Tracef("pod list from sbs: %v", sandboxList) - // We should get kubernetes metadata from the container manager for each new kata sandbox we detect. + // We try to get CRI (kubernetes) metadata from the container manager for each new kata sandbox we detect. // It may take a while for data to be available, so we always wait podCacheRefreshDelaySeconds before checking. cacheUpdateTimer := time.NewTimer(podCacheRefreshDelaySeconds * time.Second) cacheUpdateTimerIsSet := true @@ -127,7 +127,7 @@ func (km *KataMonitor) startPodCacheUpdater() { case fsnotify.Create: splitPath := strings.Split(event.Name, string(os.PathSeparator)) id := splitPath[len(splitPath)-1] - if !km.sandboxCache.putIfNotExists(id, sandboxKubeData{}) { + if !km.sandboxCache.putIfNotExists(id, sandboxCRIMetadata{}) { monitorLog.WithField("pod", id).Warn( "CREATE event but pod already present in the sandbox cache") } diff --git a/src/runtime/pkg/kata-monitor/sandbox_cache.go b/src/runtime/pkg/kata-monitor/sandbox_cache.go index ba98a121f..5557aa26e 100644 --- a/src/runtime/pkg/kata-monitor/sandbox_cache.go +++ b/src/runtime/pkg/kata-monitor/sandbox_cache.go @@ -9,15 +9,15 @@ import ( "sync" ) -type sandboxKubeData struct { +type sandboxCRIMetadata struct { uid string name string namespace string } type sandboxCache struct { *sync.Mutex - // the sandboxKubeData links the sandbox id from the container manager to the pod metadata of kubernetes - sandboxes map[string]sandboxKubeData + // the sandboxCRIMetadata links the sandbox id from the container manager to the pod metadata of kubernetes + sandboxes map[string]sandboxCRIMetadata } func (sc *sandboxCache) getSandboxList() []string { @@ -43,7 +43,7 @@ func (sc *sandboxCache) deleteIfExists(id string) bool { return false } -func (sc *sandboxCache) putIfNotExists(id string, value sandboxKubeData) bool { +func (sc *sandboxCache) putIfNotExists(id string, value sandboxCRIMetadata) bool { sc.Lock() defer sc.Unlock() @@ -56,14 +56,14 @@ func (sc *sandboxCache) putIfNotExists(id string, value sandboxKubeData) bool { return false } -func (sc *sandboxCache) setMetadata(id string, value sandboxKubeData) { +func (sc *sandboxCache) setCRIMetadata(id string, value sandboxCRIMetadata) { sc.Lock() defer sc.Unlock() sc.sandboxes[id] = value } -func (sc *sandboxCache) getMetadata(id string) (sandboxKubeData, bool) { +func (sc *sandboxCache) getCRIMetadata(id string) (sandboxCRIMetadata, bool) { sc.Lock() defer sc.Unlock() diff --git a/src/runtime/pkg/kata-monitor/sandbox_cache_test.go b/src/runtime/pkg/kata-monitor/sandbox_cache_test.go index 4eedf778a..d63925573 100644 --- a/src/runtime/pkg/kata-monitor/sandbox_cache_test.go +++ b/src/runtime/pkg/kata-monitor/sandbox_cache_test.go @@ -16,19 +16,19 @@ func TestSandboxCache(t *testing.T) { assert := assert.New(t) sc := &sandboxCache{ Mutex: &sync.Mutex{}, - sandboxes: map[string]sandboxKubeData{"111": {"1-2-3", "test-name", "test-namespace"}}, + sandboxes: map[string]sandboxCRIMetadata{"111": {"1-2-3", "test-name", "test-namespace"}}, } assert.Equal(1, len(sc.getSandboxList())) // put new item id := "new-id" - b := sc.putIfNotExists(id, sandboxKubeData{}) + b := sc.putIfNotExists(id, sandboxCRIMetadata{}) assert.Equal(true, b) assert.Equal(2, len(sc.getSandboxList())) // put key that alreay exists - b = sc.putIfNotExists(id, sandboxKubeData{}) + b = sc.putIfNotExists(id, sandboxCRIMetadata{}) assert.Equal(false, b) b = sc.deleteIfExists(id) From cb4230e60eca3e5c49cde6d3551babc6d8ed562e Mon Sep 17 00:00:00 2001 From: Amulyam24 Date: Thu, 24 Feb 2022 13:34:37 +0530 Subject: [PATCH 28/71] runtime: fix package declaration for ppc64le Incorrect package name causes build to fail. Fix it in vm_ppc64le.go Fixes: #3761 Signed-off-by: Amulyam24 --- src/runtime/pkg/govmm/vmm_ppc64le.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime/pkg/govmm/vmm_ppc64le.go b/src/runtime/pkg/govmm/vmm_ppc64le.go index 91ef70f86..600b083b7 100644 --- a/src/runtime/pkg/govmm/vmm_ppc64le.go +++ b/src/runtime/pkg/govmm/vmm_ppc64le.go @@ -4,7 +4,7 @@ // SPDX-License-Identifier: Apache-2.0 // -package qemu +package govmm // MaxVCPUs returns the maximum number of vCPUs supported func MaxVCPUs() uint32 { From b2a65f9031368f5932807c61a018394f5804e96c Mon Sep 17 00:00:00 2001 From: Jakob Naucke Date: Thu, 24 Feb 2022 12:15:47 +0100 Subject: [PATCH 29/71] virtcontainers: Use available s390x hugepages in TestHandleHugepages. On s390x, hugepage sizes must be set at boot, so test with any that are present (default is 1M). Depends-on: github.com/kata-containers/kata-containers#3770 Fixes: #3763 Signed-off-by: Jakob Naucke --- src/runtime/go.mod | 1 + src/runtime/go.sum | 5 +- .../code.cloudfoundry.org/bytefmt/LICENSE | 201 ++++++++++++++++++ .../code.cloudfoundry.org/bytefmt/NOTICE | 20 ++ .../code.cloudfoundry.org/bytefmt/README.md | 19 ++ .../code.cloudfoundry.org/bytefmt/bytes.go | 121 +++++++++++ .../code.cloudfoundry.org/bytefmt/go.mod | 8 + .../code.cloudfoundry.org/bytefmt/go.sum | 93 ++++++++ .../code.cloudfoundry.org/bytefmt/package.go | 1 + src/runtime/vendor/modules.txt | 3 + src/runtime/virtcontainers/kata_agent_test.go | 47 ++-- 11 files changed, 500 insertions(+), 19 deletions(-) create mode 100644 src/runtime/vendor/code.cloudfoundry.org/bytefmt/LICENSE create mode 100644 src/runtime/vendor/code.cloudfoundry.org/bytefmt/NOTICE create mode 100644 src/runtime/vendor/code.cloudfoundry.org/bytefmt/README.md create mode 100644 src/runtime/vendor/code.cloudfoundry.org/bytefmt/bytes.go create mode 100644 src/runtime/vendor/code.cloudfoundry.org/bytefmt/go.mod create mode 100644 src/runtime/vendor/code.cloudfoundry.org/bytefmt/go.sum create mode 100644 src/runtime/vendor/code.cloudfoundry.org/bytefmt/package.go diff --git a/src/runtime/go.mod b/src/runtime/go.mod index 321a5eb1f..2cf60759c 100644 --- a/src/runtime/go.mod +++ b/src/runtime/go.mod @@ -3,6 +3,7 @@ module github.com/kata-containers/kata-containers/src/runtime go 1.14 require ( + code.cloudfoundry.org/bytefmt v0.0.0-20211005130812-5bb3c17173e5 github.com/BurntSushi/toml v0.3.1 github.com/blang/semver v3.5.1+incompatible github.com/blang/semver/v4 v4.0.0 diff --git a/src/runtime/go.sum b/src/runtime/go.sum index 3417ff96a..919790fe1 100644 --- a/src/runtime/go.sum +++ b/src/runtime/go.sum @@ -33,6 +33,8 @@ cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0Zeo cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk= cloud.google.com/go/storage v1.8.0/go.mod h1:Wv1Oy7z6Yz3DshWRJFhqM/UCfaWIRTdp0RXyy7KQOVs= cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9ullr3+Kg0= +code.cloudfoundry.org/bytefmt v0.0.0-20211005130812-5bb3c17173e5 h1:tM5+dn2C9xZw1RzgI6WTQW1rGqdUimKB3RFbyu4h6Hc= +code.cloudfoundry.org/bytefmt v0.0.0-20211005130812-5bb3c17173e5/go.mod h1:v4VVB6oBMz/c9fRY6vZrwr5xKRWOH5NPDjQZlPk0Gbs= dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= github.com/AdaLogics/go-fuzz-headers v0.0.0-20210715213245-6c3934b029d8/go.mod h1:CzsSbkDixRphAF5hS6wbMKq0eI6ccJRb7/A0M6JBnwg= github.com/Azure/azure-sdk-for-go v16.2.1+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc= @@ -686,8 +688,9 @@ github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7J github.com/onsi/gomega v1.9.0/go.mod h1:Ho0h+IUsWyvy1OpqCwxlQ/21gkhVunqlU8fDGcoTdcA= github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= github.com/onsi/gomega v1.10.3/go.mod h1:V9xEwhxec5O8UDM77eCW8vLymOMltsqPVYWrpDsH8xc= -github.com/onsi/gomega v1.15.0 h1:WjP/FQ/sk43MRmnEcT+MlDw2TFvkrXlprrPST/IudjU= github.com/onsi/gomega v1.15.0/go.mod h1:cIuvLEne0aoVhAgh/O6ac0Op8WWw9H6eYCriF+tEHG0= +github.com/onsi/gomega v1.16.0 h1:6gjqkI8iiRHMvdccRJM8rVKjCWk6ZIm6FTm3ddIe4/c= +github.com/onsi/gomega v1.16.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAlGdZY= github.com/opencontainers/go-digest v0.0.0-20170106003457-a6d0ee40d420/go.mod h1:cMLVZDEM3+U2I4VmLI6N8jQYUd2OVphdqWwCJHrFt2s= github.com/opencontainers/go-digest v0.0.0-20180430190053-c9281466c8b2/go.mod h1:cMLVZDEM3+U2I4VmLI6N8jQYUd2OVphdqWwCJHrFt2s= github.com/opencontainers/go-digest v1.0.0-rc1/go.mod h1:cMLVZDEM3+U2I4VmLI6N8jQYUd2OVphdqWwCJHrFt2s= diff --git a/src/runtime/vendor/code.cloudfoundry.org/bytefmt/LICENSE b/src/runtime/vendor/code.cloudfoundry.org/bytefmt/LICENSE new file mode 100644 index 000000000..f49a4e16e --- /dev/null +++ b/src/runtime/vendor/code.cloudfoundry.org/bytefmt/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/src/runtime/vendor/code.cloudfoundry.org/bytefmt/NOTICE b/src/runtime/vendor/code.cloudfoundry.org/bytefmt/NOTICE new file mode 100644 index 000000000..8625a7f41 --- /dev/null +++ b/src/runtime/vendor/code.cloudfoundry.org/bytefmt/NOTICE @@ -0,0 +1,20 @@ +Copyright (c) 2015-Present CloudFoundry.org Foundation, Inc. All Rights Reserved. + +This project contains software that is Copyright (c) 2013-2015 Pivotal Software, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +This project may include a number of subcomponents with separate +copyright notices and license terms. Your use of these subcomponents +is subject to the terms and conditions of each subcomponent's license, +as noted in the LICENSE file. diff --git a/src/runtime/vendor/code.cloudfoundry.org/bytefmt/README.md b/src/runtime/vendor/code.cloudfoundry.org/bytefmt/README.md new file mode 100644 index 000000000..778fd386d --- /dev/null +++ b/src/runtime/vendor/code.cloudfoundry.org/bytefmt/README.md @@ -0,0 +1,19 @@ +bytefmt +======= + +**Note**: This repository should be imported as `code.cloudfoundry.org/bytefmt`. + +Human-readable byte formatter. + +Example: + +```go +bytefmt.ByteSize(100.5*bytefmt.MEGABYTE) // returns "100.5M" +bytefmt.ByteSize(uint64(1024)) // returns "1K" +``` + +For documentation, please see http://godoc.org/code.cloudfoundry.org/bytefmt + +## Reporting issues and requesting features + +Please report all issues and feature requests in [cloudfoundry/diego-release](https://github.com/cloudfoundry/diego-release/issues). diff --git a/src/runtime/vendor/code.cloudfoundry.org/bytefmt/bytes.go b/src/runtime/vendor/code.cloudfoundry.org/bytefmt/bytes.go new file mode 100644 index 000000000..739560477 --- /dev/null +++ b/src/runtime/vendor/code.cloudfoundry.org/bytefmt/bytes.go @@ -0,0 +1,121 @@ +// Package bytefmt contains helper methods and constants for converting to and from a human-readable byte format. +// +// bytefmt.ByteSize(100.5*bytefmt.MEGABYTE) // "100.5M" +// bytefmt.ByteSize(uint64(1024)) // "1K" +// +package bytefmt + +import ( + "errors" + "strconv" + "strings" + "unicode" +) + +const ( + BYTE = 1 << (10 * iota) + KILOBYTE + MEGABYTE + GIGABYTE + TERABYTE + PETABYTE + EXABYTE +) + +var invalidByteQuantityError = errors.New("byte quantity must be a positive integer with a unit of measurement like M, MB, MiB, G, GiB, or GB") + +// ByteSize returns a human-readable byte string of the form 10M, 12.5K, and so forth. The following units are available: +// E: Exabyte +// P: Petabyte +// T: Terabyte +// G: Gigabyte +// M: Megabyte +// K: Kilobyte +// B: Byte +// The unit that results in the smallest number greater than or equal to 1 is always chosen. +func ByteSize(bytes uint64) string { + unit := "" + value := float64(bytes) + + switch { + case bytes >= EXABYTE: + unit = "E" + value = value / EXABYTE + case bytes >= PETABYTE: + unit = "P" + value = value / PETABYTE + case bytes >= TERABYTE: + unit = "T" + value = value / TERABYTE + case bytes >= GIGABYTE: + unit = "G" + value = value / GIGABYTE + case bytes >= MEGABYTE: + unit = "M" + value = value / MEGABYTE + case bytes >= KILOBYTE: + unit = "K" + value = value / KILOBYTE + case bytes >= BYTE: + unit = "B" + case bytes == 0: + return "0B" + } + + result := strconv.FormatFloat(value, 'f', 1, 64) + result = strings.TrimSuffix(result, ".0") + return result + unit +} + +// ToMegabytes parses a string formatted by ByteSize as megabytes. +func ToMegabytes(s string) (uint64, error) { + bytes, err := ToBytes(s) + if err != nil { + return 0, err + } + + return bytes / MEGABYTE, nil +} + +// ToBytes parses a string formatted by ByteSize as bytes. Note binary-prefixed and SI prefixed units both mean a base-2 units +// KB = K = KiB = 1024 +// MB = M = MiB = 1024 * K +// GB = G = GiB = 1024 * M +// TB = T = TiB = 1024 * G +// PB = P = PiB = 1024 * T +// EB = E = EiB = 1024 * P +func ToBytes(s string) (uint64, error) { + s = strings.TrimSpace(s) + s = strings.ToUpper(s) + + i := strings.IndexFunc(s, unicode.IsLetter) + + if i == -1 { + return 0, invalidByteQuantityError + } + + bytesString, multiple := s[:i], s[i:] + bytes, err := strconv.ParseFloat(bytesString, 64) + if err != nil || bytes < 0 { + return 0, invalidByteQuantityError + } + + switch multiple { + case "E", "EB", "EIB": + return uint64(bytes * EXABYTE), nil + case "P", "PB", "PIB": + return uint64(bytes * PETABYTE), nil + case "T", "TB", "TIB": + return uint64(bytes * TERABYTE), nil + case "G", "GB", "GIB": + return uint64(bytes * GIGABYTE), nil + case "M", "MB", "MIB": + return uint64(bytes * MEGABYTE), nil + case "K", "KB", "KIB": + return uint64(bytes * KILOBYTE), nil + case "B": + return uint64(bytes), nil + default: + return 0, invalidByteQuantityError + } +} diff --git a/src/runtime/vendor/code.cloudfoundry.org/bytefmt/go.mod b/src/runtime/vendor/code.cloudfoundry.org/bytefmt/go.mod new file mode 100644 index 000000000..dfc6cce21 --- /dev/null +++ b/src/runtime/vendor/code.cloudfoundry.org/bytefmt/go.mod @@ -0,0 +1,8 @@ +module code.cloudfoundry.org/bytefmt + +go 1.16 + +require ( + github.com/onsi/ginkgo v1.16.4 + github.com/onsi/gomega v1.16.0 +) diff --git a/src/runtime/vendor/code.cloudfoundry.org/bytefmt/go.sum b/src/runtime/vendor/code.cloudfoundry.org/bytefmt/go.sum new file mode 100644 index 000000000..a881b8539 --- /dev/null +++ b/src/runtime/vendor/code.cloudfoundry.org/bytefmt/go.sum @@ -0,0 +1,93 @@ +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= +github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4= +github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= +github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= +github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= +github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= +github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= +github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= +github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw= +github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= +github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= +github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= +github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= +github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= +github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= +github.com/onsi/ginkgo v1.16.4 h1:29JGrr5oVBm5ulCWet69zQkzWipVXIol6ygQUe/EzNc= +github.com/onsi/ginkgo v1.16.4/go.mod h1:dX+/inL/fNMqNlz0e9LfyB9TswhZpCVdJM/Z6Vvnwo0= +github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= +github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= +github.com/onsi/gomega v1.16.0 h1:6gjqkI8iiRHMvdccRJM8rVKjCWk6ZIm6FTm3ddIe4/c= +github.com/onsi/gomega v1.16.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAlGdZY= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20210428140749-89ef3d95e781 h1:DzZ89McO9/gWPsQXS/FVKAlG02ZjaQ6AlZRBimEYOd0= +golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk= +golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da h1:b3NXsE2LusjYGGjL5bxEVZZORm/YEFFrWFjR8eFrw/c= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= +google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= +google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= +google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= +google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= +google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.26.0 h1:bxAC2xTBsZGibn2RTntX0oH50xLsqy1OxA9tTL3p/lk= +google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= +gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= +gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= diff --git a/src/runtime/vendor/code.cloudfoundry.org/bytefmt/package.go b/src/runtime/vendor/code.cloudfoundry.org/bytefmt/package.go new file mode 100644 index 000000000..03429300b --- /dev/null +++ b/src/runtime/vendor/code.cloudfoundry.org/bytefmt/package.go @@ -0,0 +1 @@ +package bytefmt // import "code.cloudfoundry.org/bytefmt" diff --git a/src/runtime/vendor/modules.txt b/src/runtime/vendor/modules.txt index 13b3ae350..77090fae8 100644 --- a/src/runtime/vendor/modules.txt +++ b/src/runtime/vendor/modules.txt @@ -1,3 +1,6 @@ +# code.cloudfoundry.org/bytefmt v0.0.0-20211005130812-5bb3c17173e5 +## explicit +code.cloudfoundry.org/bytefmt # github.com/BurntSushi/toml v0.3.1 ## explicit github.com/BurntSushi/toml diff --git a/src/runtime/virtcontainers/kata_agent_test.go b/src/runtime/virtcontainers/kata_agent_test.go index 6475e8b06..085823b7e 100644 --- a/src/runtime/virtcontainers/kata_agent_test.go +++ b/src/runtime/virtcontainers/kata_agent_test.go @@ -14,6 +14,7 @@ import ( "path" "path/filepath" "reflect" + "runtime" "strings" "syscall" "testing" @@ -21,6 +22,7 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/stretchr/testify/assert" + "code.cloudfoundry.org/bytefmt" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/api" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/config" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/drivers" @@ -35,6 +37,8 @@ import ( vcTypes "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" ) +const sysHugepagesDir = "/sys/kernel/mm/hugepages" + var ( testBlkDriveFormat = "testBlkDriveFormat" testBlockDeviceCtrPath = "testBlockDeviceCtrPath" @@ -1244,18 +1248,36 @@ func TestHandleHugepages(t *testing.T) { defer os.RemoveAll(dir) k := kataAgent{} + var formattedSizes []string var mounts []specs.Mount var hugepageLimits []specs.LinuxHugepageLimit - hugepageDirs := [2]string{"hugepages-1Gi", "hugepages-2Mi"} - options := [2]string{"pagesize=1024M", "pagesize=2M"} + // On s390x, hugepage sizes must be set at boot and cannot be created ad hoc. Use any that + // are present (default is 1M, can only be changed on LPAR). See + // https://www.ibm.com/docs/en/linuxonibm/pdf/lku5dd05.pdf, p. 345 for more information. + if runtime.GOARCH == "s390x" { + dirs, err := ioutil.ReadDir(sysHugepagesDir) + assert.Nil(err) + for _, dir := range dirs { + formattedSizes = append(formattedSizes, strings.TrimPrefix(dir.Name(), "hugepages-")) + } + } else { + formattedSizes = []string{"1G", "2M"} + } - for i := 0; i < 2; i++ { - target := path.Join(dir, hugepageDirs[i]) - err := os.MkdirAll(target, 0777) + for _, formattedSize := range formattedSizes { + bytes, err := bytefmt.ToBytes(formattedSize) + assert.Nil(err) + hugepageLimits = append(hugepageLimits, specs.LinuxHugepageLimit{ + Pagesize: formattedSize, + Limit: 1_000_000 * bytes, + }) + + target := path.Join(dir, fmt.Sprintf("hugepages-%s", formattedSize)) + err = os.MkdirAll(target, 0777) assert.NoError(err, "Unable to create dir %s", target) - err = syscall.Mount("nodev", target, "hugetlbfs", uintptr(0), options[i]) + err = syscall.Mount("nodev", target, "hugetlbfs", uintptr(0), fmt.Sprintf("pagesize=%s", formattedSize)) assert.NoError(err, "Unable to mount %s", target) defer syscall.Unmount(target, 0) @@ -1267,21 +1289,10 @@ func TestHandleHugepages(t *testing.T) { mounts = append(mounts, mount) } - hugepageLimits = []specs.LinuxHugepageLimit{ - { - Pagesize: "1GB", - Limit: 1073741824, - }, - { - Pagesize: "2MB", - Limit: 134217728, - }, - } - hugepages, err := k.handleHugepages(mounts, hugepageLimits) assert.NoError(err, "Unable to handle hugepages %v", hugepageLimits) assert.NotNil(hugepages) - assert.Equal(len(hugepages), 2) + assert.Equal(len(hugepages), len(formattedSizes)) } From 4c164afbac3df0611a03f7dc774833ee51184178 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 24 Feb 2022 09:39:51 +0100 Subject: [PATCH 30/71] versions: Update Cloud Hypervisor to 5343e09e7b8db MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's bump the Cloud Hypervisor version to 5343e09e7b8db, as that brings a few fixes we're interested in, such as: * hypervisor, vmm: Handle TDX hypercalls with INVALID_OPERAND - https://github.com/cloud-hypervisor/cloud-hypervisor/pull/3723 - This is needed for the TDX support on the cloud hypervisor driver, which is part of this very same series. * openapi: Update the PciBdf types - https://github.com/cloud-hypervisor/cloud-hypervisor/pull/3748 - This is needed due to a change in a DeviceNode field, which would cause a marshalling / demarshalling error when running with a version of cloud-hypervisor that includes the TDX fixes mentioned above. * scripts: dev_cli: Don't quote $features_build * scripts: dev_cli: Add --features option - https://github.com/cloud-hypervisor/cloud-hypervisor/pull/3773 - This is needed due to changes in the scripts used to build Cloud Hypervisor, which are used as part of Kata Containers CIs and github actions. Due to this change, we're also adapting the build scripts as part of this very same commit. Signed-off-by: Fabiano FidĂȘncio --- .../cloud-hypervisor/client/api/openapi.yaml | 23 +++++++++--- .../client/docs/DefaultApi.md | 4 +-- .../client/docs/DeviceNode.md | 8 ++--- .../client/docs/ReceiveMigrationData.md | 9 ++--- .../client/docs/SendMigrationData.md | 9 ++--- .../client/model_device_node.go | 12 +++---- .../client/model_receive_migration_data.go | 33 +++++++---------- .../client/model_send_migration_data.go | 35 ++++++++----------- .../cloud-hypervisor/cloud-hypervisor.yaml | 19 ++++++++-- .../local-build/kata-deploy-binaries.sh | 6 +++- .../cloud-hypervisor/build-static-clh.sh | 6 ++-- versions.yaml | 2 +- 12 files changed, 88 insertions(+), 78 deletions(-) diff --git a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/api/openapi.yaml b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/api/openapi.yaml index b820e01cc..4a168e53e 100644 --- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/api/openapi.yaml +++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/api/openapi.yaml @@ -181,6 +181,8 @@ paths: schema: $ref: '#/components/schemas/PciDeviceInfo' description: The new device was successfully added to the VM instance. + "204": + description: The new device was successfully (cold) added to the VM instance. "404": description: The new device could not be added to the VM instance. summary: Add a new device to the VM @@ -215,6 +217,8 @@ paths: schema: $ref: '#/components/schemas/PciDeviceInfo' description: The new disk was successfully added to the VM instance. + "204": + description: The new disk was successfully (cold) added to the VM instance. "500": description: The new disk could not be added to the VM instance. summary: Add a new disk to the VM @@ -234,6 +238,8 @@ paths: schema: $ref: '#/components/schemas/PciDeviceInfo' description: The new device was successfully added to the VM instance. + "204": + description: The new device was successfully (cold) added to the VM instance. "500": description: The new device could not be added to the VM instance. summary: Add a new virtio-fs device to the VM @@ -253,6 +259,8 @@ paths: schema: $ref: '#/components/schemas/PciDeviceInfo' description: The new device was successfully added to the VM instance. + "204": + description: The new device was successfully (cold) added to the VM instance. "500": description: The new device could not be added to the VM instance. summary: Add a new pmem device to the VM @@ -272,6 +280,8 @@ paths: schema: $ref: '#/components/schemas/PciDeviceInfo' description: The new device was successfully added to the VM instance. + "204": + description: The new device was successfully (cold) added to the VM instance. "500": description: The new device could not be added to the VM instance. summary: Add a new network device to the VM @@ -291,6 +301,8 @@ paths: schema: $ref: '#/components/schemas/PciDeviceInfo' description: The new device was successfully added to the VM instance. + "204": + description: The new device was successfully (cold) added to the VM instance. "500": description: The new device could not be added to the VM instance. summary: Add a new vsock device to the VM @@ -632,7 +644,7 @@ components: children: - children - children - pci_bdf: 3 + pci_bdf: pci_bdf resources: - '{}' - '{}' @@ -663,7 +675,7 @@ components: children: - children - children - pci_bdf: 3 + pci_bdf: pci_bdf resources: - '{}' - '{}' @@ -680,8 +692,7 @@ components: type: string type: array pci_bdf: - format: int32 - type: integer + type: string type: object VmCounters: additionalProperties: @@ -1757,6 +1768,8 @@ components: properties: receiver_url: type: string + required: + - receiver_url type: object SendMigrationData: example: @@ -1767,4 +1780,6 @@ components: type: string local: type: boolean + required: + - destination_url type: object diff --git a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/DefaultApi.md b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/DefaultApi.md index 3c2c8821d..c5ca050b6 100644 --- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/DefaultApi.md +++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/DefaultApi.md @@ -1071,7 +1071,7 @@ import ( ) func main() { - receiveMigrationData := *openapiclient.NewReceiveMigrationData() // ReceiveMigrationData | The URL for the reception of migration state + receiveMigrationData := *openapiclient.NewReceiveMigrationData("ReceiverUrl_example") // ReceiveMigrationData | The URL for the reception of migration state configuration := openapiclient.NewConfiguration() api_client := openapiclient.NewAPIClient(configuration) @@ -1381,7 +1381,7 @@ import ( ) func main() { - sendMigrationData := *openapiclient.NewSendMigrationData() // SendMigrationData | The URL for sending the migration state + sendMigrationData := *openapiclient.NewSendMigrationData("DestinationUrl_example") // SendMigrationData | The URL for sending the migration state configuration := openapiclient.NewConfiguration() api_client := openapiclient.NewAPIClient(configuration) diff --git a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/DeviceNode.md b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/DeviceNode.md index 575547055..02863e513 100644 --- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/DeviceNode.md +++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/DeviceNode.md @@ -7,7 +7,7 @@ Name | Type | Description | Notes **Id** | Pointer to **string** | | [optional] **Resources** | Pointer to **[]map[string]interface{}** | | [optional] **Children** | Pointer to **[]string** | | [optional] -**PciBdf** | Pointer to **int32** | | [optional] +**PciBdf** | Pointer to **string** | | [optional] ## Methods @@ -105,20 +105,20 @@ HasChildren returns a boolean if a field has been set. ### GetPciBdf -`func (o *DeviceNode) GetPciBdf() int32` +`func (o *DeviceNode) GetPciBdf() string` GetPciBdf returns the PciBdf field if non-nil, zero value otherwise. ### GetPciBdfOk -`func (o *DeviceNode) GetPciBdfOk() (*int32, bool)` +`func (o *DeviceNode) GetPciBdfOk() (*string, bool)` GetPciBdfOk returns a tuple with the PciBdf field if it's non-nil, zero value otherwise and a boolean to check if the value has been set. ### SetPciBdf -`func (o *DeviceNode) SetPciBdf(v int32)` +`func (o *DeviceNode) SetPciBdf(v string)` SetPciBdf sets PciBdf field to given value. diff --git a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/ReceiveMigrationData.md b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/ReceiveMigrationData.md index 652bf96bc..394d47ceb 100644 --- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/ReceiveMigrationData.md +++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/ReceiveMigrationData.md @@ -4,13 +4,13 @@ Name | Type | Description | Notes ------------ | ------------- | ------------- | ------------- -**ReceiverUrl** | Pointer to **string** | | [optional] +**ReceiverUrl** | **string** | | ## Methods ### NewReceiveMigrationData -`func NewReceiveMigrationData() *ReceiveMigrationData` +`func NewReceiveMigrationData(receiverUrl string, ) *ReceiveMigrationData` NewReceiveMigrationData instantiates a new ReceiveMigrationData object This constructor will assign default values to properties that have it defined, @@ -44,11 +44,6 @@ and a boolean to check if the value has been set. SetReceiverUrl sets ReceiverUrl field to given value. -### HasReceiverUrl - -`func (o *ReceiveMigrationData) HasReceiverUrl() bool` - -HasReceiverUrl returns a boolean if a field has been set. [[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) diff --git a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/SendMigrationData.md b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/SendMigrationData.md index 03edd489e..d3b6a5c1b 100644 --- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/SendMigrationData.md +++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/SendMigrationData.md @@ -4,14 +4,14 @@ Name | Type | Description | Notes ------------ | ------------- | ------------- | ------------- -**DestinationUrl** | Pointer to **string** | | [optional] +**DestinationUrl** | **string** | | **Local** | Pointer to **bool** | | [optional] ## Methods ### NewSendMigrationData -`func NewSendMigrationData() *SendMigrationData` +`func NewSendMigrationData(destinationUrl string, ) *SendMigrationData` NewSendMigrationData instantiates a new SendMigrationData object This constructor will assign default values to properties that have it defined, @@ -45,11 +45,6 @@ and a boolean to check if the value has been set. SetDestinationUrl sets DestinationUrl field to given value. -### HasDestinationUrl - -`func (o *SendMigrationData) HasDestinationUrl() bool` - -HasDestinationUrl returns a boolean if a field has been set. ### GetLocal diff --git a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_device_node.go b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_device_node.go index ef6aab589..012f97196 100644 --- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_device_node.go +++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_device_node.go @@ -19,7 +19,7 @@ type DeviceNode struct { Id *string `json:"id,omitempty"` Resources *[]map[string]interface{} `json:"resources,omitempty"` Children *[]string `json:"children,omitempty"` - PciBdf *int32 `json:"pci_bdf,omitempty"` + PciBdf *string `json:"pci_bdf,omitempty"` } // NewDeviceNode instantiates a new DeviceNode object @@ -136,9 +136,9 @@ func (o *DeviceNode) SetChildren(v []string) { } // GetPciBdf returns the PciBdf field value if set, zero value otherwise. -func (o *DeviceNode) GetPciBdf() int32 { +func (o *DeviceNode) GetPciBdf() string { if o == nil || o.PciBdf == nil { - var ret int32 + var ret string return ret } return *o.PciBdf @@ -146,7 +146,7 @@ func (o *DeviceNode) GetPciBdf() int32 { // GetPciBdfOk returns a tuple with the PciBdf field value if set, nil otherwise // and a boolean to check if the value has been set. -func (o *DeviceNode) GetPciBdfOk() (*int32, bool) { +func (o *DeviceNode) GetPciBdfOk() (*string, bool) { if o == nil || o.PciBdf == nil { return nil, false } @@ -162,8 +162,8 @@ func (o *DeviceNode) HasPciBdf() bool { return false } -// SetPciBdf gets a reference to the given int32 and assigns it to the PciBdf field. -func (o *DeviceNode) SetPciBdf(v int32) { +// SetPciBdf gets a reference to the given string and assigns it to the PciBdf field. +func (o *DeviceNode) SetPciBdf(v string) { o.PciBdf = &v } diff --git a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_receive_migration_data.go b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_receive_migration_data.go index fd7e6eff7..7549055d5 100644 --- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_receive_migration_data.go +++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_receive_migration_data.go @@ -16,15 +16,16 @@ import ( // ReceiveMigrationData struct for ReceiveMigrationData type ReceiveMigrationData struct { - ReceiverUrl *string `json:"receiver_url,omitempty"` + ReceiverUrl string `json:"receiver_url"` } // NewReceiveMigrationData instantiates a new ReceiveMigrationData object // This constructor will assign default values to properties that have it defined, // and makes sure properties required by API are set, but the set of arguments // will change when the set of required properties is changed -func NewReceiveMigrationData() *ReceiveMigrationData { +func NewReceiveMigrationData(receiverUrl string) *ReceiveMigrationData { this := ReceiveMigrationData{} + this.ReceiverUrl = receiverUrl return &this } @@ -36,41 +37,33 @@ func NewReceiveMigrationDataWithDefaults() *ReceiveMigrationData { return &this } -// GetReceiverUrl returns the ReceiverUrl field value if set, zero value otherwise. +// GetReceiverUrl returns the ReceiverUrl field value func (o *ReceiveMigrationData) GetReceiverUrl() string { - if o == nil || o.ReceiverUrl == nil { + if o == nil { var ret string return ret } - return *o.ReceiverUrl + + return o.ReceiverUrl } -// GetReceiverUrlOk returns a tuple with the ReceiverUrl field value if set, nil otherwise +// GetReceiverUrlOk returns a tuple with the ReceiverUrl field value // and a boolean to check if the value has been set. func (o *ReceiveMigrationData) GetReceiverUrlOk() (*string, bool) { - if o == nil || o.ReceiverUrl == nil { + if o == nil { return nil, false } - return o.ReceiverUrl, true + return &o.ReceiverUrl, true } -// HasReceiverUrl returns a boolean if a field has been set. -func (o *ReceiveMigrationData) HasReceiverUrl() bool { - if o != nil && o.ReceiverUrl != nil { - return true - } - - return false -} - -// SetReceiverUrl gets a reference to the given string and assigns it to the ReceiverUrl field. +// SetReceiverUrl sets field value func (o *ReceiveMigrationData) SetReceiverUrl(v string) { - o.ReceiverUrl = &v + o.ReceiverUrl = v } func (o ReceiveMigrationData) MarshalJSON() ([]byte, error) { toSerialize := map[string]interface{}{} - if o.ReceiverUrl != nil { + if true { toSerialize["receiver_url"] = o.ReceiverUrl } return json.Marshal(toSerialize) diff --git a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_send_migration_data.go b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_send_migration_data.go index 11f913bf0..6ceeedb7e 100644 --- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_send_migration_data.go +++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_send_migration_data.go @@ -16,16 +16,17 @@ import ( // SendMigrationData struct for SendMigrationData type SendMigrationData struct { - DestinationUrl *string `json:"destination_url,omitempty"` - Local *bool `json:"local,omitempty"` + DestinationUrl string `json:"destination_url"` + Local *bool `json:"local,omitempty"` } // NewSendMigrationData instantiates a new SendMigrationData object // This constructor will assign default values to properties that have it defined, // and makes sure properties required by API are set, but the set of arguments // will change when the set of required properties is changed -func NewSendMigrationData() *SendMigrationData { +func NewSendMigrationData(destinationUrl string) *SendMigrationData { this := SendMigrationData{} + this.DestinationUrl = destinationUrl return &this } @@ -37,36 +38,28 @@ func NewSendMigrationDataWithDefaults() *SendMigrationData { return &this } -// GetDestinationUrl returns the DestinationUrl field value if set, zero value otherwise. +// GetDestinationUrl returns the DestinationUrl field value func (o *SendMigrationData) GetDestinationUrl() string { - if o == nil || o.DestinationUrl == nil { + if o == nil { var ret string return ret } - return *o.DestinationUrl + + return o.DestinationUrl } -// GetDestinationUrlOk returns a tuple with the DestinationUrl field value if set, nil otherwise +// GetDestinationUrlOk returns a tuple with the DestinationUrl field value // and a boolean to check if the value has been set. func (o *SendMigrationData) GetDestinationUrlOk() (*string, bool) { - if o == nil || o.DestinationUrl == nil { + if o == nil { return nil, false } - return o.DestinationUrl, true + return &o.DestinationUrl, true } -// HasDestinationUrl returns a boolean if a field has been set. -func (o *SendMigrationData) HasDestinationUrl() bool { - if o != nil && o.DestinationUrl != nil { - return true - } - - return false -} - -// SetDestinationUrl gets a reference to the given string and assigns it to the DestinationUrl field. +// SetDestinationUrl sets field value func (o *SendMigrationData) SetDestinationUrl(v string) { - o.DestinationUrl = &v + o.DestinationUrl = v } // GetLocal returns the Local field value if set, zero value otherwise. @@ -103,7 +96,7 @@ func (o *SendMigrationData) SetLocal(v bool) { func (o SendMigrationData) MarshalJSON() ([]byte, error) { toSerialize := map[string]interface{}{} - if o.DestinationUrl != nil { + if true { toSerialize["destination_url"] = o.DestinationUrl } if o.Local != nil { diff --git a/src/runtime/virtcontainers/pkg/cloud-hypervisor/cloud-hypervisor.yaml b/src/runtime/virtcontainers/pkg/cloud-hypervisor/cloud-hypervisor.yaml index c4dcae04c..c9ecd399e 100644 --- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/cloud-hypervisor.yaml +++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/cloud-hypervisor.yaml @@ -195,6 +195,8 @@ paths: application/json: schema: $ref: '#/components/schemas/PciDeviceInfo' + 204: + description: The new device was successfully (cold) added to the VM instance. 404: description: The new device could not be added to the VM instance. @@ -231,6 +233,8 @@ paths: application/json: schema: $ref: '#/components/schemas/PciDeviceInfo' + 204: + description: The new disk was successfully (cold) added to the VM instance. 500: description: The new disk could not be added to the VM instance. @@ -251,6 +255,8 @@ paths: application/json: schema: $ref: '#/components/schemas/PciDeviceInfo' + 204: + description: The new device was successfully (cold) added to the VM instance. 500: description: The new device could not be added to the VM instance. @@ -271,6 +277,8 @@ paths: application/json: schema: $ref: '#/components/schemas/PciDeviceInfo' + 204: + description: The new device was successfully (cold) added to the VM instance. 500: description: The new device could not be added to the VM instance. @@ -291,6 +299,8 @@ paths: application/json: schema: $ref: '#/components/schemas/PciDeviceInfo' + 204: + description: The new device was successfully (cold) added to the VM instance. 500: description: The new device could not be added to the VM instance. @@ -311,6 +321,8 @@ paths: application/json: schema: $ref: '#/components/schemas/PciDeviceInfo' + 204: + description: The new device was successfully (cold) added to the VM instance. 500: description: The new device could not be added to the VM instance. @@ -428,8 +440,7 @@ components: items: type: string pci_bdf: - type: integer - format: int32 + type: string VmCounters: type: object @@ -1055,12 +1066,16 @@ components: type: boolean ReceiveMigrationData: + required: + - receiver_url type: object properties: receiver_url: type: string SendMigrationData: + required: + - destination_url type: object properties: destination_url: diff --git a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh index 1864140e1..2a215726b 100755 --- a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh +++ b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh @@ -28,6 +28,8 @@ readonly shimv2_builder="${static_build_dir}/shim-v2/build.sh" readonly rootfs_builder="${repo_root_dir}/tools/packaging/guest-image/build_image.sh" +ARCH=$(uname -m) + workdir="${WORKDIR:-$PWD}" destdir="${workdir}/kata-static" @@ -125,7 +127,9 @@ install_firecracker() { # Install static cloud-hypervisor asset install_clh() { - export extra_build_args="--features tdx" + if [[ "${ARCH}" == "x86_64" ]]; then + export features="tdx" + fi info "build static cloud-hypervisor" "${clh_builder}" diff --git a/tools/packaging/static-build/cloud-hypervisor/build-static-clh.sh b/tools/packaging/static-build/cloud-hypervisor/build-static-clh.sh index 61a0824eb..1f49abcaa 100755 --- a/tools/packaging/static-build/cloud-hypervisor/build-static-clh.sh +++ b/tools/packaging/static-build/cloud-hypervisor/build-static-clh.sh @@ -52,9 +52,9 @@ build_clh_from_source() { pushd "${repo_dir}" git fetch || true git checkout "${cloud_hypervisor_version}" - if [ -n "${extra_build_args}" ]; then - info "Build cloud-hypervisor with extra args: ${extra_build_args}" - ./scripts/dev_cli.sh build --release --libc musl -- ${extra_build_args} + if [ -n "${features}" ]; then + info "Build cloud-hypervisor enabling the following features: ${features}" + ./scripts/dev_cli.sh build --release --libc musl --features "${features}" else ./scripts/dev_cli.sh build --release --libc musl fi diff --git a/versions.yaml b/versions.yaml index 84463db8e..c1b10ce89 100644 --- a/versions.yaml +++ b/versions.yaml @@ -75,7 +75,7 @@ assets: url: "https://github.com/cloud-hypervisor/cloud-hypervisor" uscan-url: >- https://github.com/cloud-hypervisor/cloud-hypervisor/tags.*/v?(\d\S+)\.tar\.gz - version: "55479a64d237d4c757dba19a696abefd27ec74fd" + version: "5343e09e7b8dbd5dd8ac0d90a3ad52037490dd86" firecracker: description: "Firecracker micro-VMM" From dcdc412e252ee24daca9196902a52eeef22e93d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 24 Feb 2022 10:19:12 +0100 Subject: [PATCH 31/71] clh: use common kernel params from the hypervisor code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hypervisor code already defines 3 common kernel root params for the following cases: * NVDIMM * NVDIMM without DAX support * Virtio Block As parameters used for cloud-hypervisor have an overlap with the ones provided by the NVDIMM case, let's take advantage of that. Signed-off-by: Fabiano FidĂȘncio --- src/runtime/virtcontainers/clh.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 79a976abf..0dc478e7e 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -171,12 +171,9 @@ type cloudHypervisor struct { } var clhKernelParams = []Param{ - {"root", "/dev/pmem0p1"}, {"panic", "1"}, // upon kernel panic wait 1 second before reboot {"no_timer_check", ""}, // do not Check broken timer IRQ resources {"noreplace-smp", ""}, // do not replace SMP instructions - {"rootflags", "dax,data=ordered,errors=remount-ro ro"}, // mount the root filesystem as readonly - {"rootfstype", "ext4"}, } var clhDebugKernelParams = []Param{ @@ -267,7 +264,8 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net clh.vmconfig.Cpus = chclient.NewCpusConfig(int32(clh.config.NumVCPUs), int32(clh.config.DefaultMaxVCPUs)) // First take the default parameters defined by this driver - params := clhKernelParams + params := commonNvdimmKernelRootParams + params = append(params, clhKernelParams...) // Followed by extra debug parameters if debug enabled in configuration file if clh.config.Debug { From 9621c596914c40245ce49d95bfa421f57304132a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 24 Feb 2022 11:26:57 +0100 Subject: [PATCH 32/71] clh: refactor image / initrd configuration set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a small code refactor removing a deadcode based the checks already done in the generic hypervisor abstraction. Signed-off-by: Fabiano FidĂȘncio --- src/runtime/virtcontainers/clh.go | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 0dc478e7e..e81e34fcd 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -289,11 +289,6 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net return err } - initrdPath, err := clh.config.InitrdAssetPath() - if err != nil { - return err - } - if imagePath != "" { pmem := chclient.NewPmemConfig(imagePath) *pmem.DiscardWrites = true @@ -303,12 +298,15 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net } else { clh.vmconfig.Pmem = &[]chclient.PmemConfig{*pmem} } - } else if initrdPath != "" { + } else { + initrdPath, err := clh.config.InitrdAssetPath() + if err != nil { + return err + } + initrd := chclient.NewInitramfsConfig(initrdPath) clh.vmconfig.SetInitramfs(*initrd) - } else { - return errors.New("no image or initrd specified") } // Use serial port as the guest console only in debug mode, From 29ee870d20d382233001bdc97bb8e75d3f05081c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 24 Feb 2022 19:44:26 +0100 Subject: [PATCH 33/71] clh: Add confidential_guest to the config file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ConfidentialGuest is an option already present and exposed for QEMU, which is used for using Kata Containers together with different sorts of Guest Protections, such as TDX and SEV for x86_64, PEF for ppc64le, and SE for s390x. Right now we error out in case confidential_guest is enabled, as we will be implementing the needed blocks for this as part of this series. Signed-off-by: Fabiano FidĂȘncio --- src/runtime/config/configuration-clh.toml.in | 8 ++++++++ src/runtime/pkg/katautils/config.go | 1 + src/runtime/virtcontainers/clh.go | 4 ++++ 3 files changed, 13 insertions(+) diff --git a/src/runtime/config/configuration-clh.toml.in b/src/runtime/config/configuration-clh.toml.in index 07e3f31a4..cfd63c488 100644 --- a/src/runtime/config/configuration-clh.toml.in +++ b/src/runtime/config/configuration-clh.toml.in @@ -15,6 +15,14 @@ path = "@CLHPATH@" kernel = "@KERNELPATH_CLH@" image = "@IMAGEPATH@" +# Enable confidential guest support. +# Toggling that setting may trigger different hardware features, ranging +# from memory encryption to both memory and CPU-state encryption and integrity. +# The Kata Containers runtime dynamically detects the available feature set and +# aims at enabling the largest possible one. +# Default false +# confidential_guest = true + # List of valid annotation names for the hypervisor # Each member of the list is a regular expression, which is the base name # of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path" diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index ed67bad13..e62355933 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -877,6 +877,7 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { SGXEPCSize: defaultSGXEPCSize, EnableAnnotations: h.EnableAnnotations, DisableSeccomp: h.DisableSeccomp, + ConfidentialGuest: h.ConfidentialGuest, }, nil } diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index e81e34fcd..3cf1ca3e1 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -215,6 +215,10 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net return err } + if clh.config.ConfidentialGuest { + return errors.New("confidential guest is not yet supported with Cloud Hypervisor") + } + clh.id = id clh.state.state = clhNotReady From 28c4c044e6987b8eb40c1c3fd01400219c2db8fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 24 Feb 2022 20:39:15 +0100 Subject: [PATCH 34/71] hypervisors: Confidential Guests do not support VCPUs hotplug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As confidential guests do not support VCPUs hotplug, let's set the "DefaultMaxVCPUs" value to "NumVCPUs". The reason to do this is to ensure that guests will be started with the correct amount of VCPUs, without giving to the guest with all the possible VCPUs the host could provide. One clear side effect of this limitation is that workloads that would require more VCPUs on their yaml definition will not run on this scenario. Signed-off-by: Fabiano FidĂȘncio --- src/runtime/config/configuration-clh.toml.in | 5 +++++ src/runtime/config/configuration-qemu.toml.in | 5 +++++ src/runtime/virtcontainers/hypervisor.go | 5 +++++ 3 files changed, 15 insertions(+) diff --git a/src/runtime/config/configuration-clh.toml.in b/src/runtime/config/configuration-clh.toml.in index cfd63c488..0971c6c4b 100644 --- a/src/runtime/config/configuration-clh.toml.in +++ b/src/runtime/config/configuration-clh.toml.in @@ -20,6 +20,11 @@ image = "@IMAGEPATH@" # from memory encryption to both memory and CPU-state encryption and integrity. # The Kata Containers runtime dynamically detects the available feature set and # aims at enabling the largest possible one. +# +# Known limitations: +# * Does not work by design: +# - CPU Hotplug +# # Default false # confidential_guest = true diff --git a/src/runtime/config/configuration-qemu.toml.in b/src/runtime/config/configuration-qemu.toml.in index 59753db5d..a603d7607 100644 --- a/src/runtime/config/configuration-qemu.toml.in +++ b/src/runtime/config/configuration-qemu.toml.in @@ -21,6 +21,11 @@ machine_type = "@MACHINETYPE@" # from memory encryption to both memory and CPU-state encryption and integrity. # The Kata Containers runtime dynamically detects the available feature set and # aims at enabling the largest possible one. +# +# Known limitations: +# * Does not work by design: +# - CPU Hotplug +# # Default false # confidential_guest = true diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index 713280be7..26f33f5d6 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -564,6 +564,11 @@ func (conf *HypervisorConfig) Valid() error { conf.DefaultMaxVCPUs = defaultMaxVCPUs } + if conf.ConfidentialGuest && conf.NumVCPUs != conf.DefaultMaxVCPUs { + hvLogger.Warnf("Confidential guests do not support hotplugging of vCPUs. Setting DefaultMaxVCPUs to NumVCPUs (%d)", conf.NumVCPUs) + conf.DefaultMaxVCPUs = conf.NumVCPUs + } + if conf.Msize9p == 0 && conf.SharedFS != config.VirtioFS { conf.Msize9p = defaultMsize9p } From df8ffecde0b4190c6c8ce8ee21e871ceafe132f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 24 Feb 2022 21:00:39 +0100 Subject: [PATCH 35/71] hypervisors: Confidential Guests do not support Device hotplug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Similarly to VCPUs hotplug, Confidential Guests also do not support Device hotplug. Let's make it clear in the documentation and guard the code on both QEMU and Cloud Hypervisor side to ensure we don't advertise Device hotplug as being supported when running Confidential Guests. Signed-off-by: Fabiano FidĂȘncio --- src/runtime/config/configuration-clh.toml.in | 1 + src/runtime/config/configuration-qemu.toml.in | 1 + src/runtime/virtcontainers/clh.go | 12 +++++++++++- src/runtime/virtcontainers/qemu_amd64.go | 5 +++-- src/runtime/virtcontainers/qemu_arch_base.go | 4 +++- src/runtime/virtcontainers/qemu_ppc64le.go | 3 ++- 6 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/runtime/config/configuration-clh.toml.in b/src/runtime/config/configuration-clh.toml.in index 0971c6c4b..dc7f1f9f5 100644 --- a/src/runtime/config/configuration-clh.toml.in +++ b/src/runtime/config/configuration-clh.toml.in @@ -24,6 +24,7 @@ image = "@IMAGEPATH@" # Known limitations: # * Does not work by design: # - CPU Hotplug +# - Device Hotplug # # Default false # confidential_guest = true diff --git a/src/runtime/config/configuration-qemu.toml.in b/src/runtime/config/configuration-qemu.toml.in index a603d7607..1282be310 100644 --- a/src/runtime/config/configuration-qemu.toml.in +++ b/src/runtime/config/configuration-qemu.toml.in @@ -25,6 +25,7 @@ machine_type = "@MACHINETYPE@" # Known limitations: # * Does not work by design: # - CPU Hotplug +# - Device Hotplug # # Default false # confidential_guest = true diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 3cf1ca3e1..f463d1744 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -589,6 +589,10 @@ func (clh *cloudHypervisor) HotplugAddDevice(ctx context.Context, devInfo interf span, _ := katatrace.Trace(ctx, clh.Logger(), "HotplugAddDevice", clhTracingTags, map[string]string{"sandbox_id": clh.id}) defer span.End() + if clh.config.ConfidentialGuest { + return nil, errors.New("Device hotplug addition is not supported in confidential mode") + } + switch devType { case BlockDev: drive := devInfo.(*config.BlockDrive) @@ -606,6 +610,10 @@ func (clh *cloudHypervisor) HotplugRemoveDevice(ctx context.Context, devInfo int span, _ := katatrace.Trace(ctx, clh.Logger(), "HotplugRemoveDevice", clhTracingTags, map[string]string{"sandbox_id": clh.id}) defer span.End() + if clh.config.ConfidentialGuest { + return nil, errors.New("Device hotplug addition is not supported in confidential mode") + } + var deviceID string switch devType { @@ -860,7 +868,9 @@ func (clh *cloudHypervisor) Capabilities(ctx context.Context) types.Capabilities clh.Logger().WithField("function", "Capabilities").Info("get Capabilities") var caps types.Capabilities caps.SetFsSharingSupport() - caps.SetBlockDeviceHotplugSupport() + if !clh.config.ConfidentialGuest { + caps.SetBlockDeviceHotplugSupport() + } return caps } diff --git a/src/runtime/virtcontainers/qemu_amd64.go b/src/runtime/virtcontainers/qemu_amd64.go index f3ae89b59..067a55503 100644 --- a/src/runtime/virtcontainers/qemu_amd64.go +++ b/src/runtime/virtcontainers/qemu_amd64.go @@ -153,8 +153,9 @@ func newQemuArch(config HypervisorConfig) (qemuArch, error) { func (q *qemuAmd64) capabilities() types.Capabilities { var caps types.Capabilities - if q.qemuMachine.Type == QemuQ35 || - q.qemuMachine.Type == QemuVirt { + if (q.qemuMachine.Type == QemuQ35 || + q.qemuMachine.Type == QemuVirt) && + q.protection == noneProtection { caps.SetBlockDeviceHotplugSupport() } diff --git a/src/runtime/virtcontainers/qemu_arch_base.go b/src/runtime/virtcontainers/qemu_arch_base.go index b65cb61e3..6601a74d8 100644 --- a/src/runtime/virtcontainers/qemu_arch_base.go +++ b/src/runtime/virtcontainers/qemu_arch_base.go @@ -277,7 +277,9 @@ func (q *qemuArchBase) kernelParameters(debug bool) []Param { func (q *qemuArchBase) capabilities() types.Capabilities { var caps types.Capabilities - caps.SetBlockDeviceHotplugSupport() + if q.protection == noneProtection { + caps.SetBlockDeviceHotplugSupport() + } caps.SetMultiQueueSupport() caps.SetFsSharingSupport() return caps diff --git a/src/runtime/virtcontainers/qemu_ppc64le.go b/src/runtime/virtcontainers/qemu_ppc64le.go index f78ed2429..d6f768128 100644 --- a/src/runtime/virtcontainers/qemu_ppc64le.go +++ b/src/runtime/virtcontainers/qemu_ppc64le.go @@ -96,7 +96,8 @@ func (q *qemuPPC64le) capabilities() types.Capabilities { var caps types.Capabilities // pseries machine type supports hotplugging drives - if q.qemuMachine.Type == QemuPseries { + if q.qemuMachine.Type == QemuPseries && + q.protection == noneProtection { caps.SetBlockDeviceHotplugSupport() } From f50ff9f7987ce7e0156b3ea183f6fc3678f8d07c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 24 Feb 2022 21:39:54 +0100 Subject: [PATCH 36/71] hypervisors: Confidential Guests do not support Memory hotplug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Similarly to VCPUs and Device hotplug, Confidential Guests also do not support Memory hotplug. Let's make it clear in the documentation and guard the code on both QEMU and Cloud Hypervisor side to ensure we don't advertise Memory hotplug as being supported when running Confidential Guests. Signed-off-by: Fabiano FidĂȘncio --- src/runtime/config/configuration-clh.toml.in | 1 + src/runtime/config/configuration-qemu.toml.in | 1 + src/runtime/virtcontainers/clh.go | 12 +++++++----- src/runtime/virtcontainers/qemu_amd64.go | 6 +++++- src/runtime/virtcontainers/qemu_arch_base.go | 2 +- 5 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/runtime/config/configuration-clh.toml.in b/src/runtime/config/configuration-clh.toml.in index dc7f1f9f5..c2522cba6 100644 --- a/src/runtime/config/configuration-clh.toml.in +++ b/src/runtime/config/configuration-clh.toml.in @@ -25,6 +25,7 @@ image = "@IMAGEPATH@" # * Does not work by design: # - CPU Hotplug # - Device Hotplug +# - Memory Hotplug # # Default false # confidential_guest = true diff --git a/src/runtime/config/configuration-qemu.toml.in b/src/runtime/config/configuration-qemu.toml.in index 1282be310..0f2198432 100644 --- a/src/runtime/config/configuration-qemu.toml.in +++ b/src/runtime/config/configuration-qemu.toml.in @@ -26,6 +26,7 @@ machine_type = "@MACHINETYPE@" # * Does not work by design: # - CPU Hotplug # - Device Hotplug +# - Memory Hotplug # # Default false # confidential_guest = true diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index f463d1744..7833d4093 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -258,12 +258,14 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net clh.vmconfig.Memory.Shared = func(b bool) *bool { return &b }(true) // Enable hugepages if needed clh.vmconfig.Memory.Hugepages = func(b bool) *bool { return &b }(clh.config.HugePages) - hostMemKb, err := GetHostMemorySizeKb(procMemInfo) - if err != nil { - return nil + if !clh.config.ConfidentialGuest { + hostMemKb, err := GetHostMemorySizeKb(procMemInfo) + if err != nil { + return nil + } + // OpenAPI only supports int64 values + clh.vmconfig.Memory.HotplugSize = func(i int64) *int64 { return &i }(int64((utils.MemUnit(hostMemKb) * utils.KiB).ToBytes())) } - // OpenAPI only supports int64 values - clh.vmconfig.Memory.HotplugSize = func(i int64) *int64 { return &i }(int64((utils.MemUnit(hostMemKb) * utils.KiB).ToBytes())) // Set initial amount of cpu's for the virtual machine clh.vmconfig.Cpus = chclient.NewCpusConfig(int32(clh.config.NumVCPUs), int32(clh.config.DefaultMaxVCPUs)) diff --git a/src/runtime/virtcontainers/qemu_amd64.go b/src/runtime/virtcontainers/qemu_amd64.go index 067a55503..c32c5025d 100644 --- a/src/runtime/virtcontainers/qemu_amd64.go +++ b/src/runtime/virtcontainers/qemu_amd64.go @@ -189,7 +189,11 @@ func (q *qemuAmd64) memoryTopology(memoryMb, hostMemoryMb uint64, slots uint8) g // Is Memory Hotplug supported by this architecture/machine type combination? func (q *qemuAmd64) supportGuestMemoryHotplug() bool { // true for all amd64 machine types except for microvm. - return q.qemuMachine.Type != govmmQemu.MachineTypeMicrovm + if q.qemuMachine.Type == govmmQemu.MachineTypeMicrovm { + return false + } + + return q.protection == noneProtection } func (q *qemuAmd64) appendImage(ctx context.Context, devices []govmmQemu.Device, path string) ([]govmmQemu.Device, error) { diff --git a/src/runtime/virtcontainers/qemu_arch_base.go b/src/runtime/virtcontainers/qemu_arch_base.go index 6601a74d8..8820fd655 100644 --- a/src/runtime/virtcontainers/qemu_arch_base.go +++ b/src/runtime/virtcontainers/qemu_arch_base.go @@ -692,7 +692,7 @@ func (q *qemuArchBase) handleImagePath(config HypervisorConfig) { } func (q *qemuArchBase) supportGuestMemoryHotplug() bool { - return true + return q.protection == noneProtection } func (q *qemuArchBase) setIgnoreSharedMemoryMigrationCaps(ctx context.Context, qmp *govmmQemu.QMP) error { From a8827e0c78938afa9d5da68d05b32cb91ca85467 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 24 Feb 2022 21:57:38 +0100 Subject: [PATCH 37/71] hypervisors: Confidential Guests do not support NVDIMM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NVDIMM is also not supported with Confidential Guests and Virtio Block devices should be used instead. Signed-off-by: Fabiano FidĂȘncio --- src/runtime/config/configuration-clh.toml.in | 1 + src/runtime/config/configuration-qemu.toml.in | 4 ++++ src/runtime/virtcontainers/clh.go | 24 +++++++++++++++---- src/runtime/virtcontainers/qemu_amd64.go | 5 ++++ src/runtime/virtcontainers/qemu_ppc64le.go | 5 ++++ src/runtime/virtcontainers/qemu_s390x.go | 5 ++++ 6 files changed, 39 insertions(+), 5 deletions(-) diff --git a/src/runtime/config/configuration-clh.toml.in b/src/runtime/config/configuration-clh.toml.in index c2522cba6..99acada75 100644 --- a/src/runtime/config/configuration-clh.toml.in +++ b/src/runtime/config/configuration-clh.toml.in @@ -26,6 +26,7 @@ image = "@IMAGEPATH@" # - CPU Hotplug # - Device Hotplug # - Memory Hotplug +# - NVDIMM devices # # Default false # confidential_guest = true diff --git a/src/runtime/config/configuration-qemu.toml.in b/src/runtime/config/configuration-qemu.toml.in index 0f2198432..804849a13 100644 --- a/src/runtime/config/configuration-qemu.toml.in +++ b/src/runtime/config/configuration-qemu.toml.in @@ -27,6 +27,7 @@ machine_type = "@MACHINETYPE@" # - CPU Hotplug # - Device Hotplug # - Memory Hotplug +# - NVDIMM devices # # Default false # confidential_guest = true @@ -286,6 +287,9 @@ pflashes = [] # If false and nvdimm is supported, use nvdimm device to plug guest image. # Otherwise virtio-block device is used. +# +# nvdimm is not supported when `confidential_guest = true`. +# # Default is false #disable_image_nvdimm = true diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 7833d4093..eae13eb04 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -271,6 +271,9 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net // First take the default parameters defined by this driver params := commonNvdimmKernelRootParams + if clh.config.ConfidentialGuest { + params = commonVirtioblkKernelRootParams + } params = append(params, clhKernelParams...) // Followed by extra debug parameters if debug enabled in configuration file @@ -296,13 +299,24 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net } if imagePath != "" { - pmem := chclient.NewPmemConfig(imagePath) - *pmem.DiscardWrites = true + if clh.config.ConfidentialGuest { + disk := chclient.NewDiskConfig(imagePath) + disk.SetReadonly(true) - if clh.vmconfig.Pmem != nil { - *clh.vmconfig.Pmem = append(*clh.vmconfig.Pmem, *pmem) + if clh.vmconfig.Disks != nil { + *clh.vmconfig.Disks = append(*clh.vmconfig.Disks, *disk) + } else { + clh.vmconfig.Disks = &[]chclient.DiskConfig{*disk} + } } else { - clh.vmconfig.Pmem = &[]chclient.PmemConfig{*pmem} + pmem := chclient.NewPmemConfig(imagePath) + *pmem.DiscardWrites = true + + if clh.vmconfig.Pmem != nil { + *clh.vmconfig.Pmem = append(*clh.vmconfig.Pmem, *pmem) + } else { + clh.vmconfig.Pmem = &[]chclient.PmemConfig{*pmem} + } } } else { initrdPath, err := clh.config.InitrdAssetPath() diff --git a/src/runtime/virtcontainers/qemu_amd64.go b/src/runtime/virtcontainers/qemu_amd64.go index c32c5025d..ef1e16adb 100644 --- a/src/runtime/virtcontainers/qemu_amd64.go +++ b/src/runtime/virtcontainers/qemu_amd64.go @@ -132,6 +132,11 @@ func newQemuArch(config HypervisorConfig) (qemuArch, error) { if err := q.enableProtection(); err != nil { return nil, err } + + if !q.qemuArchBase.disableNvdimm { + hvLogger.WithField("subsystem", "qemuAmd64").Warn("Nvdimm is not supported with confidential guest, disabling it.") + q.qemuArchBase.disableNvdimm = true + } } if config.SGXEPCSize != 0 { diff --git a/src/runtime/virtcontainers/qemu_ppc64le.go b/src/runtime/virtcontainers/qemu_ppc64le.go index d6f768128..93c11416c 100644 --- a/src/runtime/virtcontainers/qemu_ppc64le.go +++ b/src/runtime/virtcontainers/qemu_ppc64le.go @@ -83,6 +83,11 @@ func newQemuArch(config HypervisorConfig) (qemuArch, error) { if err := q.enableProtection(); err != nil { return nil, err } + + if !q.qemuArchBase.disableNvdimm { + hvLogger.WithField("subsystem", "qemuPPC64le").Warn("Nvdimm is not supported with confidential guest, disabling it.") + q.qemuArchBase.disableNvdimm = true + } } q.handleImagePath(config) diff --git a/src/runtime/virtcontainers/qemu_s390x.go b/src/runtime/virtcontainers/qemu_s390x.go index aeddb982a..77b6f440b 100644 --- a/src/runtime/virtcontainers/qemu_s390x.go +++ b/src/runtime/virtcontainers/qemu_s390x.go @@ -77,6 +77,11 @@ func newQemuArch(config HypervisorConfig) (qemuArch, error) { if err := q.enableProtection(); err != nil { return nil, err } + + if !q.qemuArchBase.disableNvdimm { + hvLogger.WithField("subsystem", "qemuS390x").Warn("Nvdimm is not supported with confidential guest, disabling it.") + q.qemuArchBase.disableNvdimm = true + } } if config.ImagePath != "" { From a13b4d5ad8dc03e3ba25a5cd93c5d37919c13787 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 24 Feb 2022 19:55:54 +0100 Subject: [PATCH 38/71] clh: Add firmware to the config file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "firmware" option was already present for a while, but it's never been exposed to the configuration file before. Let's do it now as it can be used, in combination with the newly added confidential_guest option, to boot a guest VM using the so called `td-shim`[0] with Cloud Hypervisor. [0]: https://github.com/confidential-containers/td-shim Signed-off-by: Fabiano FidĂȘncio --- src/runtime/config/configuration-clh.toml.in | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/runtime/config/configuration-clh.toml.in b/src/runtime/config/configuration-clh.toml.in index 99acada75..1296d20b9 100644 --- a/src/runtime/config/configuration-clh.toml.in +++ b/src/runtime/config/configuration-clh.toml.in @@ -31,6 +31,17 @@ image = "@IMAGEPATH@" # Default false # confidential_guest = true +# Path to the firmware. +# If you want Cloud Hypervisor to use a specific firmware, set its path below. +# This is option is only used when confidential_guest is enabled. +# +# For more information about firmwared that can be used with specific TEEs, +# please, refer to: +# * TDX: +# - td-shim: https://github.com/confidential-containers/td-shim +# +# firmware = "@FIRMWAREPATH@" + # List of valid annotation names for the hypervisor # Each member of the list is a regular expression, which is the base name # of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path" From 72434333aa08daaa86c5110a1ebb6839d8a8b0e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 24 Feb 2022 22:30:24 +0100 Subject: [PATCH 39/71] clh: Add TDX support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's enable TDX support for Cloud Hypervisor, using td-shim as its desired firmware. Fixes: #3632 Signed-off-by: Fabiano FidĂȘncio --- src/runtime/virtcontainers/clh.go | 38 +++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index eae13eb04..11c1f7e35 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -202,6 +202,34 @@ func (clh *cloudHypervisor) nydusdAPISocketPath(id string) (string, error) { return utils.BuildSocketPath(clh.config.VMStorePath, id, nydusdAPISock) } +func (clh *cloudHypervisor) enableProtection() error { + protection, err := availableGuestProtection() + if err != nil { + return err + } + + switch protection { + case tdxProtection: + firmwarePath, err := clh.config.FirmwareAssetPath() + if err != nil { + return err + } + + if firmwarePath == "" { + return errors.New("Firmware path is not specified") + } + + clh.vmconfig.Tdx = chclient.NewTdxConfig(firmwarePath) + return nil + + case sevProtection: + return errors.New("SEV protection is not supported by Cloud Hypervisor") + + default: + return errors.New("This system doesn't support Confidentian Computing (Guest Protection)") + } +} + // For cloudHypervisor this call only sets the internal structure up. // The VM will be created and started through StartVM(). func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Network, hypervisorConfig *HypervisorConfig) error { @@ -215,10 +243,6 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net return err } - if clh.config.ConfidentialGuest { - return errors.New("confidential guest is not yet supported with Cloud Hypervisor") - } - clh.id = id clh.state.state = clhNotReady @@ -252,6 +276,12 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net // Create the VM config via the constructor to ensure default values are properly assigned clh.vmconfig = *chclient.NewVmConfig(*chclient.NewKernelConfig(kernelPath)) + if clh.config.ConfidentialGuest { + if err := clh.enableProtection(); err != nil { + return err + } + } + // Create the VM memory config via the constructor to ensure default values are properly assigned clh.vmconfig.Memory = chclient.NewMemoryConfig(int64((utils.MemUnit(clh.config.MemorySize) * utils.MiB).ToBytes())) // shared memory should be enabled if using vhost-user(kata uses virtiofsd) From 03fc1cbd7ea6730b1bae54c1b1dcff7ab5c6cc03 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Fri, 4 Feb 2022 09:38:04 +0000 Subject: [PATCH 40/71] virtcontainers: Add a filesystem sharing interface Filesystem sharing here means the ability to share some parts of the host filesystem with the guest. It's mostly about sharing files and container bundle root filesystems. In order to allow for different file and rootfs sharing implementations, we define a FilesystemSharer interface. This interface provides a preparation step, where concrete implementations will be able to e.g. prepare the host filesysstem. Then it provides 2 methods, one for sharing any file (regular file or a directory) and another one for sharing a container root filesystem Signed-off-by: Samuel Ortiz --- src/runtime/virtcontainers/fs_share.go | 77 ++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 src/runtime/virtcontainers/fs_share.go diff --git a/src/runtime/virtcontainers/fs_share.go b/src/runtime/virtcontainers/fs_share.go new file mode 100644 index 000000000..fa8374c3b --- /dev/null +++ b/src/runtime/virtcontainers/fs_share.go @@ -0,0 +1,77 @@ +// Copyright (c) 2022 Apple Inc. +// +// SPDX-License-Identifier: Apache-2.0 +// + +package virtcontainers + +import ( + "context" + + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols/grpc" +) + +// fsShareTracingTags defines tags for the trace span +var fsShareTracingTags = map[string]string{ + "source": "runtime", + "package": "virtcontainers", + "subsystem": "fs_share", +} + +// SharedFile represents the outcome of a host filesystem sharing +// operation. +type SharedFile struct { + storage *grpc.Storage + guestPath string +} + +type FilesystemSharer interface { + // Prepare will set the host filesystem up, making it ready + // to share container files and directories with the guest. + // It will be called before any container is running. + // + // For example, the Linux implementation would create and + // prepare the host shared folders, and also make all + // sandbox mounts ready to be shared. + // + // Implementation of this method must be idempotent and be + // ready to potentially be called several times in a row, + // without symmetric calls to Cleanup in between. + Prepare(context.Context) error + + // Cleanup cleans the host filesystem up from the initial + // setup created by Prepare. + // It will be called after all containers are terminated. + // + // Implementation of this method must be idempotent and be + // ready to potentially be called several times in a row, + // without symmetric calls to Prepare in between. + Cleanup(context.Context) error + + // ShareFile shares a file (a regular file or a directory) + // from the host filesystem with a container running in the + // guest. The host file to be shared is described by the + // Mount argument. + // This method should be called for each container file to + // be shared with the guest. + // + // The returned SharedFile pointer describes how the file + // should be shared between the host and the guest. If it + // is nil, then the shared filed described by the Mount + // argument will be ignored by the guest, i.e. it will NOT + // be shared with the guest. + ShareFile(context.Context, *Container, *Mount) (*SharedFile, error) + + // UnshareFile stops sharing a container file, described by + // the Mount argument. + UnshareFile(context.Context, *Container, *Mount) error + + // ShareRootFilesystem shares a container bundle rootfs with + // the Kata guest, allowing the kata agent to eventually start + // the container from that shared rootfs. + ShareRootFilesystem(context.Context, *Container) (*SharedFile, error) + + // UnshareRootFilesystem stops sharing a container bundle + // rootfs. + UnshareRootFilesystem(context.Context, *Container) error +} From 61590bbddc9edb29389fe358cc0cfbeeeece48ac Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Fri, 4 Feb 2022 09:42:46 +0000 Subject: [PATCH 41/71] virtcontainers: Add a Linux implementation for the FilesystemSharer This gathers the current kata agent and container filesystem sharing code into a FilesystemSharer implementation. Signed-off-by: Samuel Ortiz --- src/runtime/virtcontainers/fs_share_linux.go | 476 +++++++++++++++++++ 1 file changed, 476 insertions(+) create mode 100644 src/runtime/virtcontainers/fs_share_linux.go diff --git a/src/runtime/virtcontainers/fs_share_linux.go b/src/runtime/virtcontainers/fs_share_linux.go new file mode 100644 index 000000000..dc16a73b7 --- /dev/null +++ b/src/runtime/virtcontainers/fs_share_linux.go @@ -0,0 +1,476 @@ +// Copyright (c) 2016 Intel Corporation +// Copyright (c) 2014,2015,2016,2017 Docker, Inc. +// Copyright (c) 2022 Apple Inc. +// +// SPDX-License-Identifier: Apache-2.0 +// + +package virtcontainers + +import ( + "context" + "encoding/hex" + "fmt" + "os" + "path/filepath" + "sync" + "syscall" + + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + + "github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/config" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols/grpc" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils" +) + +func unmountNoFollow(path string) error { + return syscall.Unmount(path, syscall.MNT_DETACH|UmountNoFollow) +} + +type FilesystemShare struct { + sandbox *Sandbox + sync.Mutex + prepared bool +} + +func NewFilesystemShare(s *Sandbox) (FilesystemSharer, error) { + return &FilesystemShare{ + prepared: false, + sandbox: s, + }, nil +} + +// Logger returns a logrus logger appropriate for logging Filesystem sharing messages +func (f *FilesystemShare) Logger() *logrus.Entry { + return virtLog.WithFields(logrus.Fields{ + "subsystem": "filesystem share", + "sandbox": f.sandbox.ID(), + }) +} + +func (f *FilesystemShare) prepareBindMounts(ctx context.Context) error { + span, ctx := katatrace.Trace(ctx, f.Logger(), "setupBindMounts", fsShareTracingTags) + defer span.End() + + var err error + + if len(f.sandbox.config.SandboxBindMounts) == 0 { + return nil + } + + // Create subdirectory in host shared path for sandbox mounts + sandboxMountDir := filepath.Join(getMountPath(f.sandbox.ID()), sandboxMountsDir) + sandboxShareDir := filepath.Join(GetSharePath(f.sandbox.ID()), sandboxMountsDir) + if err := os.MkdirAll(sandboxMountDir, DirMode); err != nil { + return fmt.Errorf("Creating sandbox shared mount directory: %v: %w", sandboxMountDir, err) + } + var mountedList []string + defer func() { + if err != nil { + for _, mnt := range mountedList { + if derr := unmountNoFollow(mnt); derr != nil { + f.Logger().WithError(derr).Errorf("Cleanup: couldn't unmount %s", mnt) + } + } + if derr := os.RemoveAll(sandboxMountDir); derr != nil { + f.Logger().WithError(derr).Errorf("Cleanup: failed to remove %s", sandboxMountDir) + } + + } + }() + + for _, m := range f.sandbox.config.SandboxBindMounts { + mountDest := filepath.Join(sandboxMountDir, filepath.Base(m)) + // bind-mount each sandbox mount that's defined into the sandbox mounts dir + if err := bindMount(ctx, m, mountDest, true, "private"); err != nil { + return fmt.Errorf("Mounting sandbox directory: %v to %v: %w", m, mountDest, err) + } + mountedList = append(mountedList, mountDest) + + mountDest = filepath.Join(sandboxShareDir, filepath.Base(m)) + if err := remountRo(ctx, mountDest); err != nil { + return fmt.Errorf("remount sandbox directory: %v to %v: %w", m, mountDest, err) + } + } + + return nil +} + +func (f *FilesystemShare) cleanupBindMounts(ctx context.Context) error { + if f.sandbox.config == nil || len(f.sandbox.config.SandboxBindMounts) == 0 { + return nil + } + + var retErr error + bindmountShareDir := filepath.Join(getMountPath(f.sandbox.ID()), sandboxMountsDir) + for _, m := range f.sandbox.config.SandboxBindMounts { + mountPath := filepath.Join(bindmountShareDir, filepath.Base(m)) + if err := unmountNoFollow(mountPath); err != nil { + if retErr == nil { + retErr = err + } + f.Logger().WithError(err).Errorf("Failed to unmount sandbox bindmount: %v", mountPath) + } + } + if err := os.RemoveAll(bindmountShareDir); err != nil { + if retErr == nil { + retErr = err + } + f.Logger().WithError(err).Errorf("Failed to remove sandbox bindmount directory: %s", bindmountShareDir) + } + + return retErr +} + +func (f *FilesystemShare) Prepare(ctx context.Context) error { + var err error + + span, ctx := katatrace.Trace(ctx, f.Logger(), "prepare", fsShareTracingTags) + defer span.End() + + f.Lock() + defer f.Unlock() + + // Prepare is idempotent, i.e. can be called multiple times in a row, without failing + // and without modifying the filesystem state after the first call. + if f.prepared { + f.Logger().Warn("Calling Prepare() on an already prepared filesystem") + return nil + } + + // Toggle prepared to true if everything went fine. + defer func() { + if err == nil { + f.prepared = true + } + }() + + // create shared path structure + sharePath := GetSharePath(f.sandbox.ID()) + mountPath := getMountPath(f.sandbox.ID()) + if err = os.MkdirAll(sharePath, sharedDirMode); err != nil { + return err + } + if err = os.MkdirAll(mountPath, DirMode); err != nil { + return err + } + + // slave mount so that future mountpoints under mountPath are shown in sharePath as well + if err = bindMount(ctx, mountPath, sharePath, true, "slave"); err != nil { + return err + } + defer func() { + if err != nil { + if umountErr := unmountNoFollow(sharePath); umountErr != nil { + f.Logger().WithError(umountErr).Errorf("failed to unmount vm share path %s", sharePath) + } + } + }() + + // Setup sandbox bindmounts, if specified. + if err = f.prepareBindMounts(ctx); err != nil { + return err + } + + return nil +} + +func (f *FilesystemShare) Cleanup(ctx context.Context) error { + var err error + + f.Lock() + defer f.Unlock() + + // Cleanup is idempotent, i.e. can be called multiple times in a row, without failing + // and without modifying the filesystem state after the first call. + if !f.prepared { + f.Logger().Warn("Calling Cleanup() on an already cleaned up filesystem") + return nil + } + + // Toggle prepared to false if everything went fine. + defer func() { + if err == nil { + f.prepared = false + } + }() + + // Unmount all the sandbox bind mounts. + if err = f.cleanupBindMounts(ctx); err != nil { + return err + } + + // Unmount shared path + path := GetSharePath(f.sandbox.ID()) + f.Logger().WithField("path", path).Infof("Cleanup agent") + if err = unmountNoFollow(path); err != nil { + f.Logger().WithError(err).Errorf("failed to unmount vm share path %s", path) + return err + } + + // Unmount mount path + path = getMountPath(f.sandbox.ID()) + if err = bindUnmountAllRootfs(ctx, path, f.sandbox); err != nil { + f.Logger().WithError(err).Errorf("failed to unmount vm mount path %s", path) + return err + } + if err = os.RemoveAll(getSandboxPath(f.sandbox.ID())); err != nil { + f.Logger().WithError(err).Errorf("failed to Cleanup vm path %s", getSandboxPath(f.sandbox.ID())) + return err + } + + return nil +} + +func (f *FilesystemShare) ShareFile(ctx context.Context, c *Container, m *Mount) (*SharedFile, error) { + randBytes, err := utils.GenerateRandomBytes(8) + if err != nil { + return nil, err + } + + filename := fmt.Sprintf("%s-%s-%s", c.id, hex.EncodeToString(randBytes), filepath.Base(m.Destination)) + guestPath := filepath.Join(kataGuestSharedDir(), filename) + + // copy file to container's rootfs if filesystem sharing is not supported, otherwise + // bind mount it in the shared directory. + caps := f.sandbox.hypervisor.Capabilities(ctx) + if !caps.IsFsSharingSupported() { + f.Logger().Debug("filesystem sharing is not supported, files will be copied") + + fileInfo, err := os.Stat(m.Source) + if err != nil { + return nil, err + } + + // Ignore the mount if this is not a regular file (excludes + // directory, socket, device, ...) as it cannot be handled by + // a simple copy. But this should not be treated as an error, + // only as a limitation. + if !fileInfo.Mode().IsRegular() { + f.Logger().WithField("ignored-file", m.Source).Debug("Ignoring non-regular file as FS sharing not supported") + return nil, nil + } + + if err := f.sandbox.agent.copyFile(ctx, m.Source, guestPath); err != nil { + return nil, err + } + } else { + // These mounts are created in the shared dir + mountDest := filepath.Join(getMountPath(f.sandbox.ID()), filename) + if !m.ReadOnly { + if err := bindMount(ctx, m.Source, mountDest, false, "private"); err != nil { + return nil, err + } + } else { + // For RO mounts, bindmount remount event is not propagated to mount subtrees, + // and it doesn't present in the virtiofsd standalone mount namespace either. + // So we end up a bit tricky: + // 1. make a private ro bind mount to the mount source + // 2. duplicate the ro mount we create in step 1 to mountDest, by making a bind mount. No need to remount with MS_RDONLY here. + // 3. umount the private bind mount created in step 1 + privateDest := filepath.Join(getPrivatePath(f.sandbox.ID()), filename) + + if err := bindMount(ctx, m.Source, privateDest, true, "private"); err != nil { + return nil, err + } + defer func() { + unmountNoFollow(privateDest) + }() + + if err := bindMount(ctx, privateDest, mountDest, false, "private"); err != nil { + return nil, err + } + } + + // Save HostPath mount value into the passed mount + m.HostPath = mountDest + } + + return &SharedFile{ + guestPath: guestPath, + }, nil +} + +func (f *FilesystemShare) UnshareFile(ctx context.Context, c *Container, m *Mount) error { + if err := unmountNoFollow(m.HostPath); err != nil { + return err + } + + if m.Type == "bind" { + s, err := os.Stat(m.HostPath) + if err != nil { + return errors.Wrapf(err, "Could not stat host-path %v", m.HostPath) + } + // Remove the empty file or directory + if s.Mode().IsRegular() && s.Size() == 0 { + os.Remove(m.HostPath) + } + if s.Mode().IsDir() { + syscall.Rmdir(m.HostPath) + } + } + + return nil +} + +func (f *FilesystemShare) shareRootFilesystemWithNydus(ctx context.Context, c *Container) (*SharedFile, error) { + rootfsGuestPath := filepath.Join(kataGuestSharedDir(), c.id, c.rootfsSuffix) + if f.sandbox.GetHypervisorType() != string(QemuHypervisor) { + // qemu is supported first, other hypervisors will next + // https://github.com/kata-containers/kata-containers/issues/2724 + return nil, errNydusdNotSupport + } + extraOption, err := parseExtraOption(c.rootFs.Options) + if err != nil { + return nil, err + } + f.Logger().Infof("Nydus option: %v", extraOption) + mountOpt := &MountOption{ + mountpoint: rafsMountPath(c.id), + source: extraOption.Source, + config: extraOption.Config, + } + + q, _ := f.sandbox.hypervisor.(*qemu) + // mount lowerdir to guest /run/kata-containers/shared/images//lowerdir + if err := q.virtiofsDaemon.Mount(*mountOpt); err != nil { + return nil, err + } + rootfs := &grpc.Storage{} + containerShareDir := filepath.Join(getMountPath(f.sandbox.ID()), c.id) + + // mkdir rootfs, guest at /run/kata-containers/shared/containers//rootfs + rootfsDir := filepath.Join(containerShareDir, c.rootfsSuffix) + if err := os.MkdirAll(rootfsDir, DirMode); err != nil { + return nil, err + } + + // bindmount snapshot dir which snapshotter allocated + // to guest /run/kata-containers/shared/containers//snapshotdir + snapshotShareDir := filepath.Join(containerShareDir, snapshotDir) + if err := bindMount(ctx, extraOption.Snapshotdir, snapshotShareDir, true, "slave"); err != nil { + return nil, err + } + + // so rootfs = overlay(upperdir, workerdir, lowerdir) + rootfs.MountPoint = rootfsGuestPath + rootfs.Source = typeOverlayFS + rootfs.Fstype = typeOverlayFS + rootfs.Driver = kataOverlayDevType + rootfs.Options = append(rootfs.Options, fmt.Sprintf("%s=%s", upperDir, filepath.Join(kataGuestSharedDir(), c.id, snapshotDir, "fs"))) + rootfs.Options = append(rootfs.Options, fmt.Sprintf("%s=%s", workDir, filepath.Join(kataGuestSharedDir(), c.id, snapshotDir, "work"))) + rootfs.Options = append(rootfs.Options, fmt.Sprintf("%s=%s", lowerDir, filepath.Join(kataGuestNydusImageDir(), c.id, lowerDir))) + rootfs.Options = append(rootfs.Options, "index=off") + f.Logger().Infof("Nydus rootfs info: %#v\n", rootfs) + + return &SharedFile{ + storage: rootfs, + guestPath: rootfsGuestPath, + }, nil +} + +//func (c *Container) shareRootfs(ctx context.Context) (*grpc.Storage, string, error) { +func (f *FilesystemShare) ShareRootFilesystem(ctx context.Context, c *Container) (*SharedFile, error) { + if c.rootFs.Type == NydusRootFSType { + return f.shareRootFilesystemWithNydus(ctx, c) + } + rootfsGuestPath := filepath.Join(kataGuestSharedDir(), c.id, c.rootfsSuffix) + + if c.state.Fstype != "" && c.state.BlockDeviceID != "" { + // The rootfs storage volume represents the container rootfs + // mount point inside the guest. + // It can be a block based device (when using block based container + // overlay on the host) mount or a 9pfs one (for all other overlay + // implementations). + rootfsStorage := &grpc.Storage{} + + // This is a block based device rootfs. + device := f.sandbox.devManager.GetDeviceByID(c.state.BlockDeviceID) + if device == nil { + f.Logger().WithField("device", c.state.BlockDeviceID).Error("failed to find device by id") + return nil, fmt.Errorf("failed to find device by id %q", c.state.BlockDeviceID) + } + + blockDrive, ok := device.GetDeviceInfo().(*config.BlockDrive) + if !ok || blockDrive == nil { + f.Logger().Error("malformed block drive") + return nil, fmt.Errorf("malformed block drive") + } + switch { + case f.sandbox.config.HypervisorConfig.BlockDeviceDriver == config.VirtioMmio: + rootfsStorage.Driver = kataMmioBlkDevType + rootfsStorage.Source = blockDrive.VirtPath + case f.sandbox.config.HypervisorConfig.BlockDeviceDriver == config.VirtioBlockCCW: + rootfsStorage.Driver = kataBlkCCWDevType + rootfsStorage.Source = blockDrive.DevNo + case f.sandbox.config.HypervisorConfig.BlockDeviceDriver == config.VirtioBlock: + rootfsStorage.Driver = kataBlkDevType + rootfsStorage.Source = blockDrive.PCIPath.String() + case f.sandbox.config.HypervisorConfig.BlockDeviceDriver == config.VirtioSCSI: + rootfsStorage.Driver = kataSCSIDevType + rootfsStorage.Source = blockDrive.SCSIAddr + default: + return nil, fmt.Errorf("Unknown block device driver: %s", f.sandbox.config.HypervisorConfig.BlockDeviceDriver) + } + + // We can't use filepath.Dir(rootfsGuestPath) (The rootfs parent) because + // with block devices the rootfsSuffix may not be set. + // So we have to build the bundle path explicitly. + rootfsStorage.MountPoint = filepath.Join(kataGuestSharedDir(), c.id) + rootfsStorage.Fstype = c.state.Fstype + + if c.state.Fstype == "xfs" { + rootfsStorage.Options = []string{"nouuid"} + } + + // Ensure container mount destination exists + // TODO: remove dependency on shared fs path. shared fs is just one kind of storage source. + // we should not always use shared fs path for all kinds of storage. Instead, all storage + // should be bind mounted to a tmpfs path for containers to use. + if err := os.MkdirAll(filepath.Join(getMountPath(f.sandbox.ID()), c.id, c.rootfsSuffix), DirMode); err != nil { + return nil, err + } + + return &SharedFile{ + storage: rootfsStorage, + guestPath: rootfsGuestPath, + }, nil + } + + // This is not a block based device rootfs. We are going to bind mount it into the shared drive + // between the host and the guest. + // With virtiofs/9pfs we don't need to ask the agent to mount the rootfs as the shared directory + // (kataGuestSharedDir) is already mounted in the guest. We only need to mount the rootfs from + // the host and it will show up in the guest. + if err := bindMountContainerRootfs(ctx, getMountPath(f.sandbox.ID()), c.id, c.rootFs.Target, false); err != nil { + return nil, err + } + + return &SharedFile{ + storage: nil, + guestPath: rootfsGuestPath, + }, nil +} + +func (f *FilesystemShare) UnshareRootFilesystem(ctx context.Context, c *Container) error { + if c.rootFs.Type == NydusRootFSType { + if err2 := nydusContainerCleanup(ctx, getMountPath(c.sandbox.id), c); err2 != nil { + f.Logger().WithError(err2).Error("rollback failed nydusContainerCleanup") + } + } else { + if err := bindUnmountContainerRootfs(ctx, getMountPath(f.sandbox.ID()), c.id); err != nil { + return err + } + } + + // Remove the shared directory for this container. + shareDir := filepath.Join(getMountPath(f.sandbox.ID()), c.id) + if err := syscall.Rmdir(shareDir); err != nil { + f.Logger().WithError(err).WithField("share-dir", shareDir).Warn("Could not remove container share dir") + } + + return nil + +} From 533c1c0e8627684d54b70bb4fcef46b966efed40 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Fri, 4 Feb 2022 09:45:11 +0000 Subject: [PATCH 42/71] virtcontainers: Keep all filesystem sharing prep code to sandbox.go With the Linux implementation of the FilesystemSharer interface, we can now remove all host filesystem sharing code from kata_agent and keep it where it belongs: sandbox.go. Signed-off-by: Samuel Ortiz --- src/runtime/virtcontainers/agent.go | 2 +- .../virtcontainers/fs_share_linux_test.go | 102 +++++++++++++ src/runtime/virtcontainers/kata_agent.go | 137 +----------------- src/runtime/virtcontainers/kata_agent_test.go | 119 +-------------- src/runtime/virtcontainers/mock_agent.go | 2 +- src/runtime/virtcontainers/sandbox.go | 24 ++- 6 files changed, 133 insertions(+), 253 deletions(-) create mode 100644 src/runtime/virtcontainers/fs_share_linux_test.go diff --git a/src/runtime/virtcontainers/agent.go b/src/runtime/virtcontainers/agent.go index 089d099e2..a7888552b 100644 --- a/src/runtime/virtcontainers/agent.go +++ b/src/runtime/virtcontainers/agent.go @@ -176,7 +176,7 @@ type agent interface { markDead(ctx context.Context) // cleanup removes all on disk information generated by the agent - cleanup(ctx context.Context, s *Sandbox) + cleanup(ctx context.Context) // return data for saving save() persistapi.AgentState diff --git a/src/runtime/virtcontainers/fs_share_linux_test.go b/src/runtime/virtcontainers/fs_share_linux_test.go new file mode 100644 index 000000000..704cb4b88 --- /dev/null +++ b/src/runtime/virtcontainers/fs_share_linux_test.go @@ -0,0 +1,102 @@ +// Copyright (c) 2018 Intel Corporation +// Copyright (c) 2022 Apple Inc. +// +// SPDX-License-Identifier: Apache-2.0 +// + +package virtcontainers + +import ( + "context" + "os" + "path" + "path/filepath" + "syscall" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestSandboxSharedFilesystem(t *testing.T) { + if os.Getuid() != 0 { + t.Skip("Test disabled as requires root user") + } + + assert := assert.New(t) + // create temporary files to mount: + testMountPath, err := os.MkdirTemp("", "sandbox-test") + assert.NoError(err) + defer os.RemoveAll(testMountPath) + + // create a new shared directory for our test: + kataHostSharedDirSaved := kataHostSharedDir + testHostDir, err := os.MkdirTemp("", "kata-Cleanup") + assert.NoError(err) + kataHostSharedDir = func() string { + return testHostDir + } + defer func() { + kataHostSharedDir = kataHostSharedDirSaved + }() + + m1Path := filepath.Join(testMountPath, "foo.txt") + f1, err := os.Create(m1Path) + assert.NoError(err) + defer f1.Close() + + m2Path := filepath.Join(testMountPath, "bar.txt") + f2, err := os.Create(m2Path) + assert.NoError(err) + defer f2.Close() + + // create sandbox for mounting into + sandbox := &Sandbox{ + ctx: context.Background(), + id: "foobar", + config: &SandboxConfig{ + SandboxBindMounts: []string{m1Path, m2Path}, + }, + } + + fsShare, err := NewFilesystemShare(sandbox) + assert.Nil(err) + sandbox.fsShare = fsShare + + // make the shared directory for our test: + dir := kataHostSharedDir() + err = os.MkdirAll(path.Join(dir, sandbox.id), 0777) + assert.Nil(err) + defer os.RemoveAll(dir) + + // Test the prepare function. We expect it to succeed + err = sandbox.fsShare.Prepare(sandbox.ctx) + assert.NoError(err) + + // Test the Cleanup function. We expect it to succeed for the mount to be removed. + err = sandbox.fsShare.Cleanup(sandbox.ctx) + assert.NoError(err) + + // After successful Cleanup, verify there are not any mounts left behind. + stat := syscall.Stat_t{} + mount1CheckPath := filepath.Join(getMountPath(sandbox.id), sandboxMountsDir, filepath.Base(m1Path)) + err = syscall.Stat(mount1CheckPath, &stat) + assert.Error(err) + assert.True(os.IsNotExist(err)) + + mount2CheckPath := filepath.Join(getMountPath(sandbox.id), sandboxMountsDir, filepath.Base(m2Path)) + err = syscall.Stat(mount2CheckPath, &stat) + assert.Error(err) + assert.True(os.IsNotExist(err)) + + // Verify that Prepare is idempotent. + err = sandbox.fsShare.Prepare(sandbox.ctx) + assert.NoError(err) + err = sandbox.fsShare.Prepare(sandbox.ctx) + assert.NoError(err) + + // Verify that Cleanup is idempotent. + err = sandbox.fsShare.Cleanup(sandbox.ctx) + assert.NoError(err) + err = sandbox.fsShare.Cleanup(sandbox.ctx) + assert.NoError(err) +} diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index b3f68a88d..8572f584a 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -63,16 +63,12 @@ const ( // path to vfio devices vfioPath = "/dev/vfio/" - sandboxMountsDir = "sandbox-mounts" - NydusRootFSType = "fuse.nydus-overlayfs" + // enable debug console kernelParamDebugConsole = "agent.debug_console" kernelParamDebugConsoleVPort = "agent.debug_console_vport" kernelParamDebugConsoleVPortValue = "1026" - - // Restricted permission for shared directory managed by virtiofs - sharedDirMode = os.FileMode(0700) | os.ModeDir ) var ( @@ -387,79 +383,6 @@ func (k *kataAgent) internalConfigure(ctx context.Context, h Hypervisor, id stri return nil } -func (k *kataAgent) setupSandboxBindMounts(ctx context.Context, sandbox *Sandbox) (err error) { - span, ctx := katatrace.Trace(ctx, k.Logger(), "setupSandboxBindMounts", kataAgentTracingTags) - defer span.End() - - if len(sandbox.config.SandboxBindMounts) == 0 { - return nil - } - - // Create subdirectory in host shared path for sandbox mounts - sandboxMountDir := filepath.Join(getMountPath(sandbox.id), sandboxMountsDir) - sandboxShareDir := filepath.Join(GetSharePath(sandbox.id), sandboxMountsDir) - if err := os.MkdirAll(sandboxMountDir, DirMode); err != nil { - return fmt.Errorf("Creating sandbox shared mount directory: %v: %w", sandboxMountDir, err) - } - var mountedList []string - defer func() { - if err != nil { - for _, mnt := range mountedList { - if derr := syscall.Unmount(mnt, syscall.MNT_DETACH|UmountNoFollow); derr != nil { - k.Logger().WithError(derr).Errorf("Cleanup: couldn't unmount %s", mnt) - } - } - if derr := os.RemoveAll(sandboxMountDir); derr != nil { - k.Logger().WithError(derr).Errorf("Cleanup: failed to remove %s", sandboxMountDir) - } - - } - }() - - for _, m := range sandbox.config.SandboxBindMounts { - mountDest := filepath.Join(sandboxMountDir, filepath.Base(m)) - // bind-mount each sandbox mount that's defined into the sandbox mounts dir - if err := bindMount(ctx, m, mountDest, true, "private"); err != nil { - return fmt.Errorf("Mounting sandbox directory: %v to %v: %w", m, mountDest, err) - } - mountedList = append(mountedList, mountDest) - - mountDest = filepath.Join(sandboxShareDir, filepath.Base(m)) - if err := remountRo(ctx, mountDest); err != nil { - return fmt.Errorf("remount sandbox directory: %v to %v: %w", m, mountDest, err) - } - - } - - return nil -} - -func (k *kataAgent) cleanupSandboxBindMounts(sandbox *Sandbox) error { - if sandbox.config == nil || len(sandbox.config.SandboxBindMounts) == 0 { - return nil - } - - var retErr error - bindmountShareDir := filepath.Join(getMountPath(sandbox.id), sandboxMountsDir) - for _, m := range sandbox.config.SandboxBindMounts { - mountPath := filepath.Join(bindmountShareDir, filepath.Base(m)) - if err := syscall.Unmount(mountPath, syscall.MNT_DETACH|UmountNoFollow); err != nil { - if retErr == nil { - retErr = err - } - k.Logger().WithError(err).Errorf("Failed to unmount sandbox bindmount: %v", mountPath) - } - } - if err := os.RemoveAll(bindmountShareDir); err != nil { - if retErr == nil { - retErr = err - } - k.Logger().WithError(err).Errorf("Failed to remove sandbox bindmount directory: %s", bindmountShareDir) - } - - return retErr -} - func (k *kataAgent) configure(ctx context.Context, h Hypervisor, id, sharePath string, config KataAgentConfig) error { span, ctx := katatrace.Trace(ctx, k.Logger(), "configure", kataAgentTracingTags) defer span.End() @@ -509,47 +432,10 @@ func (k *kataAgent) configureFromGrpc(ctx context.Context, h Hypervisor, id stri return k.internalConfigure(ctx, h, id, config) } -func (k *kataAgent) setupSharedPath(ctx context.Context, sandbox *Sandbox) (err error) { - span, ctx := katatrace.Trace(ctx, k.Logger(), "setupSharedPath", kataAgentTracingTags) - defer span.End() - - // create shared path structure - sharePath := GetSharePath(sandbox.id) - mountPath := getMountPath(sandbox.id) - if err := os.MkdirAll(sharePath, sharedDirMode); err != nil { - return err - } - if err := os.MkdirAll(mountPath, DirMode); err != nil { - return err - } - - // slave mount so that future mountpoints under mountPath are shown in sharePath as well - if err := bindMount(ctx, mountPath, sharePath, true, "slave"); err != nil { - return err - } - defer func() { - if err != nil { - if umountErr := syscall.Unmount(sharePath, syscall.MNT_DETACH|UmountNoFollow); umountErr != nil { - k.Logger().WithError(umountErr).Errorf("failed to unmount vm share path %s", sharePath) - } - } - }() - - // Setup sandbox bindmounts, if specified: - if err = k.setupSandboxBindMounts(ctx, sandbox); err != nil { - return err - } - - return nil -} - func (k *kataAgent) createSandbox(ctx context.Context, sandbox *Sandbox) error { span, ctx := katatrace.Trace(ctx, k.Logger(), "createSandbox", kataAgentTracingTags) defer span.End() - if err := k.setupSharedPath(ctx, sandbox); err != nil { - return err - } return k.configure(ctx, sandbox.hypervisor, sandbox.id, GetSharePath(sandbox.id), sandbox.config.AgentConfig) } @@ -2388,26 +2274,7 @@ func (k *kataAgent) markDead(ctx context.Context) { k.disconnect(ctx) } -func (k *kataAgent) cleanup(ctx context.Context, s *Sandbox) { - if err := k.cleanupSandboxBindMounts(s); err != nil { - k.Logger().WithError(err).Errorf("failed to Cleanup sandbox bindmounts") - } - - // Unmount shared path - path := GetSharePath(s.id) - k.Logger().WithField("path", path).Infof("Cleanup agent") - if err := syscall.Unmount(path, syscall.MNT_DETACH|UmountNoFollow); err != nil { - k.Logger().WithError(err).Errorf("failed to unmount vm share path %s", path) - } - - // Unmount mount path - path = getMountPath(s.id) - if err := bindUnmountAllRootfs(ctx, path, s); err != nil { - k.Logger().WithError(err).Errorf("failed to unmount vm mount path %s", path) - } - if err := os.RemoveAll(getSandboxPath(s.id)); err != nil { - k.Logger().WithError(err).Errorf("failed to Cleanup vm path %s", getSandboxPath(s.id)) - } +func (k *kataAgent) cleanup(ctx context.Context) { } func (k *kataAgent) save() persistapi.AgentState { diff --git a/src/runtime/virtcontainers/kata_agent_test.go b/src/runtime/virtcontainers/kata_agent_test.go index 6475e8b06..16dd9dcfe 100644 --- a/src/runtime/virtcontainers/kata_agent_test.go +++ b/src/runtime/virtcontainers/kata_agent_test.go @@ -822,6 +822,10 @@ func TestAgentCreateContainer(t *testing.T) { hypervisor: &mockHypervisor{}, } + fsShare, err := NewFilesystemShare(sandbox) + assert.Nil(err) + sandbox.fsShare = fsShare + store, err := persist.GetDriver() assert.NoError(err) assert.NotNil(store) @@ -983,7 +987,7 @@ func TestKataCleanupSandbox(t *testing.T) { assert.Nil(err) k := &kataAgent{ctx: context.Background()} - k.cleanup(context.Background(), &s) + k.cleanup(context.Background()) _, err = os.Stat(dir) assert.False(os.IsExist(err)) @@ -1119,119 +1123,6 @@ func TestKataAgentDirs(t *testing.T) { assert.Equal(rafsMountPath(cid), expected) } -func TestSandboxBindMount(t *testing.T) { - if os.Getuid() != 0 { - t.Skip("Test disabled as requires root user") - } - - assert := assert.New(t) - // create temporary files to mount: - testMountPath, err := os.MkdirTemp("", "sandbox-test") - assert.NoError(err) - defer os.RemoveAll(testMountPath) - - // create a new shared directory for our test: - kataHostSharedDirSaved := kataHostSharedDir - testHostDir, err := os.MkdirTemp("", "kata-Cleanup") - assert.NoError(err) - kataHostSharedDir = func() string { - return testHostDir - } - defer func() { - kataHostSharedDir = kataHostSharedDirSaved - }() - - m1Path := filepath.Join(testMountPath, "foo.txt") - f1, err := os.Create(m1Path) - assert.NoError(err) - defer f1.Close() - - m2Path := filepath.Join(testMountPath, "bar.txt") - f2, err := os.Create(m2Path) - assert.NoError(err) - defer f2.Close() - - // create sandbox for mounting into - sandbox := &Sandbox{ - ctx: context.Background(), - id: "foobar", - config: &SandboxConfig{ - SandboxBindMounts: []string{m1Path, m2Path}, - }, - } - k := &kataAgent{ctx: context.Background()} - - // make the shared directory for our test: - dir := kataHostSharedDir() - err = os.MkdirAll(path.Join(dir, sandbox.id), 0777) - assert.Nil(err) - defer os.RemoveAll(dir) - - sharePath := GetSharePath(sandbox.id) - mountPath := getMountPath(sandbox.id) - - err = os.MkdirAll(sharePath, DirMode) - assert.Nil(err) - err = os.MkdirAll(mountPath, DirMode) - assert.Nil(err) - - // setup the expeted slave mount: - err = bindMount(sandbox.ctx, mountPath, sharePath, true, "slave") - assert.Nil(err) - defer syscall.Unmount(sharePath, syscall.MNT_DETACH|UmountNoFollow) - - // Test the function. We expect it to succeed and for the mount to exist - err = k.setupSandboxBindMounts(context.Background(), sandbox) - assert.NoError(err) - - // Test the Cleanup function. We expect it to succeed for the mount to be removed. - err = k.cleanupSandboxBindMounts(sandbox) - assert.NoError(err) - - // After successful Cleanup, verify there are not any mounts left behind. - stat := syscall.Stat_t{} - mount1CheckPath := filepath.Join(getMountPath(sandbox.id), sandboxMountsDir, filepath.Base(m1Path)) - err = syscall.Stat(mount1CheckPath, &stat) - assert.Error(err) - assert.True(os.IsNotExist(err)) - - mount2CheckPath := filepath.Join(getMountPath(sandbox.id), sandboxMountsDir, filepath.Base(m2Path)) - err = syscall.Stat(mount2CheckPath, &stat) - assert.Error(err) - assert.True(os.IsNotExist(err)) - - // Now, let's setup the Cleanup to fail. Setup the sandbox bind mount twice, which will result in - // extra mounts being present that the sandbox description doesn't account for (ie, duplicate mounts). - // We expect Cleanup to fail on the first time, since it cannot remove the sandbox-bindmount directory because - // there are leftover mounts. If we run it a second time, however, it should succeed since it'll remove the - // second set of mounts: - err = k.setupSandboxBindMounts(context.Background(), sandbox) - assert.NoError(err) - err = k.setupSandboxBindMounts(context.Background(), sandbox) - assert.NoError(err) - // Test the Cleanup function. We expect it to succeed for the mount to be removed. - err = k.cleanupSandboxBindMounts(sandbox) - assert.Error(err) - err = k.cleanupSandboxBindMounts(sandbox) - assert.NoError(err) - - // - // Now, let's setup the sandbox bindmount to fail, and verify that no mounts are left behind - // - sandbox.config.SandboxBindMounts = append(sandbox.config.SandboxBindMounts, "oh-nos") - err = k.setupSandboxBindMounts(context.Background(), sandbox) - assert.Error(err) - // Verify there aren't any mounts left behind - stat = syscall.Stat_t{} - err = syscall.Stat(mount1CheckPath, &stat) - assert.Error(err) - assert.True(os.IsNotExist(err)) - err = syscall.Stat(mount2CheckPath, &stat) - assert.Error(err) - assert.True(os.IsNotExist(err)) - -} - func TestHandleHugepages(t *testing.T) { if os.Getuid() != 0 { t.Skip("Test disabled as requires root user") diff --git a/src/runtime/virtcontainers/mock_agent.go b/src/runtime/virtcontainers/mock_agent.go index b66441cb2..393597397 100644 --- a/src/runtime/virtcontainers/mock_agent.go +++ b/src/runtime/virtcontainers/mock_agent.go @@ -224,7 +224,7 @@ func (n *mockAgent) addSwap(ctx context.Context, PCIPath vcTypes.PciPath) error func (n *mockAgent) markDead(ctx context.Context) { } -func (n *mockAgent) cleanup(ctx context.Context, s *Sandbox) { +func (n *mockAgent) cleanup(ctx context.Context) { } // save is the Noop agent state saver. It does nothing. diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index f37316844..8c72d90ce 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -71,6 +71,11 @@ const ( // /sys/fs/cgroup/memory/kata_overhead/$CGPATH where $CGPATH is // defined by the orchestrator. resCtrlKataOverheadID = "/kata_overhead/" + + sandboxMountsDir = "sandbox-mounts" + + // Restricted permission for shared directory managed by virtiofs + sharedDirMode = os.FileMode(0700) | os.ModeDir ) var ( @@ -191,6 +196,7 @@ type Sandbox struct { hypervisor Hypervisor agent agent store persistapi.PersistDriver + fsShare FilesystemSharer swapDevices []*config.BlockDrive volumes []types.Volume @@ -490,7 +496,13 @@ func createSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Fac return s, nil } - // Below code path is called only during create, because of earlier Check. + // The code below only gets called when initially creating a sandbox, not when restoring or + // re-creating it. The above check for the sandbox state enforces that. + + if err := s.fsShare.Prepare(ctx); err != nil { + return nil, err + } + if err := s.agent.createSandbox(ctx, s); err != nil { return nil, err } @@ -544,6 +556,12 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor swapDevices: []*config.BlockDrive{}, } + fsShare, err := NewFilesystemShare(s) + if err != nil { + return nil, err + } + s.fsShare = fsShare + if s.store, err = persist.GetDriver(); err != nil || s.store == nil { return nil, fmt.Errorf("failed to get fs persist driver: %v", err) } @@ -791,7 +809,9 @@ func (s *Sandbox) Delete(ctx context.Context) error { s.Logger().WithError(err).Error("failed to Cleanup hypervisor") } - s.agent.cleanup(ctx, s) + if err := s.fsShare.Cleanup(ctx); err != nil { + s.Logger().WithError(err).Error("failed to cleanup share files") + } return s.store.Destroy(s.id) } From 1103f5a4d4aa6849facc4877d5997a3c6784cf9a Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Fri, 4 Feb 2022 10:25:55 +0000 Subject: [PATCH 43/71] virtcontainers: Use FilesystemSharer for sharing the containers files Switching to the generic FilesystemSharer brings 2 majors improvements: 1. Remove container and sandbox specific code from kata_agent.go 2. Allow for non Linux implementations to provide ways to share container files and root filesystems with the Kata Linux guest. Fixes #3622 Signed-off-by: Samuel Ortiz --- src/runtime/virtcontainers/container.go | 107 ++----------- src/runtime/virtcontainers/container_test.go | 33 +++- src/runtime/virtcontainers/fs_share_linux.go | 10 +- src/runtime/virtcontainers/kata_agent.go | 156 +------------------ src/runtime/virtcontainers/mount.go | 13 ++ 5 files changed, 62 insertions(+), 257 deletions(-) diff --git a/src/runtime/virtcontainers/container.go b/src/runtime/virtcontainers/container.go index ee0a38079..b57f40e51 100644 --- a/src/runtime/virtcontainers/container.go +++ b/src/runtime/virtcontainers/container.go @@ -10,7 +10,6 @@ package virtcontainers import ( "context" - "encoding/hex" "fmt" "io" "os" @@ -29,7 +28,6 @@ import ( "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils" specs "github.com/opencontainers/runtime-spec/specs-go" - "github.com/pkg/errors" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" ) @@ -423,72 +421,6 @@ func (c *Container) setContainerState(state types.StateString) error { return nil } -func (c *Container) shareFiles(ctx context.Context, m Mount, idx int) (string, bool, error) { - randBytes, err := utils.GenerateRandomBytes(8) - if err != nil { - return "", false, err - } - - filename := fmt.Sprintf("%s-%s-%s", c.id, hex.EncodeToString(randBytes), filepath.Base(m.Destination)) - guestDest := filepath.Join(kataGuestSharedDir(), filename) - - // copy file to contaier's rootfs if filesystem sharing is not supported, otherwise - // bind mount it in the shared directory. - caps := c.sandbox.hypervisor.Capabilities(ctx) - if !caps.IsFsSharingSupported() { - c.Logger().Debug("filesystem sharing is not supported, files will be copied") - - fileInfo, err := os.Stat(m.Source) - if err != nil { - return "", false, err - } - - // Ignore the mount if this is not a regular file (excludes - // directory, socket, device, ...) as it cannot be handled by - // a simple copy. But this should not be treated as an error, - // only as a limitation. - if !fileInfo.Mode().IsRegular() { - c.Logger().WithField("ignored-file", m.Source).Debug("Ignoring non-regular file as FS sharing not supported") - return "", true, nil - } - - if err := c.sandbox.agent.copyFile(ctx, m.Source, guestDest); err != nil { - return "", false, err - } - } else { - // These mounts are created in the shared dir - mountDest := filepath.Join(getMountPath(c.sandboxID), filename) - if !m.ReadOnly { - if err := bindMount(c.ctx, m.Source, mountDest, false, "private"); err != nil { - return "", false, err - } - } else { - // For RO mounts, bindmount remount event is not propagated to mount subtrees, - // and it doesn't present in the virtiofsd standalone mount namespace either. - // So we end up a bit tricky: - // 1. make a private ro bind mount to the mount source - // 2. duplicate the ro mount we create in step 1 to mountDest, by making a bind mount. No need to remount with MS_RDONLY here. - // 3. umount the private bind mount created in step 1 - privateDest := filepath.Join(getPrivatePath(c.sandboxID), filename) - - if err := bindMount(c.ctx, m.Source, privateDest, true, "private"); err != nil { - return "", false, err - } - defer func() { - syscall.Unmount(privateDest, syscall.MNT_DETACH|UmountNoFollow) - }() - - if err := bindMount(c.ctx, privateDest, mountDest, false, "private"); err != nil { - return "", false, err - } - } - // Save HostPath mount value into the mount list of the container. - c.mounts[idx].HostPath = mountDest - } - - return guestDest, false, nil -} - // mountSharedDirMounts handles bind-mounts by bindmounting to the host shared // directory which is mounted through virtiofs/9pfs in the VM. // It also updates the container mount list with the HostPath info, and store @@ -503,6 +435,7 @@ func (c *Container) mountSharedDirMounts(ctx context.Context, sharedDirMounts, i } } }() + for idx, m := range c.mounts { // Skip mounting certain system paths from the source on the host side // into the container as it does not make sense to do so. @@ -541,20 +474,18 @@ func (c *Container) mountSharedDirMounts(ctx context.Context, sharedDirMounts, i continue } - var ignore bool - var guestDest string - guestDest, ignore, err = c.shareFiles(ctx, m, idx) + sharedFile, err := c.sandbox.fsShare.ShareFile(ctx, c, &c.mounts[idx]) if err != nil { return storages, err } // Expand the list of mounts to ignore. - if ignore { + if sharedFile == nil { ignoredMounts[m.Source] = Mount{Source: m.Source} continue } sharedDirMount := Mount{ - Source: guestDest, + Source: sharedFile.guestPath, Destination: m.Destination, Type: m.Type, Options: m.Options, @@ -581,11 +512,11 @@ func (c *Container) mountSharedDirMounts(ctx context.Context, sharedDirMounts, i return storages, fmt.Errorf("unable to create watchable path: %s: %v", watchableHostPath, err) } - watchableGuestMount := filepath.Join(kataGuestSharedDir(), "watchable", filepath.Base(guestDest)) + watchableGuestMount := filepath.Join(kataGuestSharedDir(), "watchable", filepath.Base(sharedFile.guestPath)) storage := &grpc.Storage{ Driver: kataWatchableBindDevType, - Source: guestDest, + Source: sharedFile.guestPath, Fstype: "bind", MountPoint: watchableGuestMount, Options: m.Options, @@ -616,7 +547,7 @@ func (c *Container) unmountHostMounts(ctx context.Context) error { span.End() }() - if err = syscall.Unmount(m.HostPath, syscall.MNT_DETACH|UmountNoFollow); err != nil { + if err = c.sandbox.fsShare.UnshareFile(ctx, c, &m); err != nil { c.Logger().WithFields(logrus.Fields{ "host-path": m.HostPath, "error": err, @@ -624,19 +555,6 @@ func (c *Container) unmountHostMounts(ctx context.Context) error { return err } - if m.Type == "bind" { - s, err := os.Stat(m.HostPath) - if err != nil { - return errors.Wrapf(err, "Could not stat host-path %v", m.HostPath) - } - // Remove the empty file or directory - if s.Mode().IsRegular() && s.Size() == 0 { - os.Remove(m.HostPath) - } - if s.Mode().IsDir() { - syscall.Rmdir(m.HostPath) - } - } return nil } @@ -867,8 +785,8 @@ func (c *Container) rollbackFailingContainerCreation(ctx context.Context) { c.Logger().WithError(err).Error("rollback failed nydusContainerCleanup()") } } else { - if err := bindUnmountContainerRootfs(ctx, getMountPath(c.sandbox.id), c.id); err != nil { - c.Logger().WithError(err).Error("rollback failed bindUnmountContainerRootfs()") + if err := c.sandbox.fsShare.UnshareRootFilesystem(ctx, c); err != nil { + c.Logger().WithError(err).Error("rollback failed UnshareRootFilesystem()") } } } @@ -1051,7 +969,7 @@ func (c *Container) stop(ctx context.Context, force bool) error { return err } } else { - if err := bindUnmountContainerRootfs(ctx, getMountPath(c.sandbox.id), c.id); err != nil && !force { + if err := c.sandbox.fsShare.UnshareRootFilesystem(ctx, c); err != nil && !force { return err } } @@ -1064,11 +982,6 @@ func (c *Container) stop(ctx context.Context, force bool) error { return err } - shareDir := filepath.Join(getMountPath(c.sandbox.id), c.id) - if err := syscall.Rmdir(shareDir); err != nil { - c.Logger().WithError(err).WithField("share-dir", shareDir).Warn("Could not remove container share dir") - } - // container was killed by force, container MUST change its state // as soon as possible just in case one of below operations fail leaving // the containers in a bad state. diff --git a/src/runtime/virtcontainers/container_test.go b/src/runtime/virtcontainers/container_test.go index 05b8974a9..b41fcc108 100644 --- a/src/runtime/virtcontainers/container_test.go +++ b/src/runtime/virtcontainers/container_test.go @@ -146,7 +146,7 @@ func TestUnmountHostMountsRemoveBindHostPath(t *testing.T) { return f.Name() } - doUnmountCheck := func(src, dest, hostPath, nonEmptyHostpath, devPath string) { + doUnmountCheck := func(s *Sandbox, src, dest, hostPath, nonEmptyHostpath, devPath string) { mounts := []Mount{ { Source: src, @@ -169,8 +169,10 @@ func TestUnmountHostMountsRemoveBindHostPath(t *testing.T) { } c := Container{ - mounts: mounts, - ctx: context.Background(), + mounts: mounts, + ctx: context.Background(), + id: "fooabr", + sandbox: s, } if err := bindMount(c.ctx, src, hostPath, false, "private"); err != nil { @@ -221,8 +223,21 @@ func TestUnmountHostMountsRemoveBindHostPath(t *testing.T) { hostPath := createFakeMountDir(t, testDir, "host-path") nonEmptyHostpath := createFakeMountDir(t, testDir, "non-empty-host-path") devPath := createFakeMountDir(t, testDir, "dev-hostpath") + // create sandbox for mounting into + sandbox := &Sandbox{ + ctx: context.Background(), + id: "foobar", + config: &SandboxConfig{}, + } + + fsShare, err := NewFilesystemShare(sandbox) + if err != nil { + t.Fatal(err) + } + sandbox.fsShare = fsShare + createFakeMountDir(t, nonEmptyHostpath, "nop") - doUnmountCheck(src, dest, hostPath, nonEmptyHostpath, devPath) + doUnmountCheck(sandbox, src, dest, hostPath, nonEmptyHostpath, devPath) src = createFakeMountFile(t, testDir, "src") dest = createFakeMountFile(t, testDir, "dest") @@ -235,7 +250,7 @@ func TestUnmountHostMountsRemoveBindHostPath(t *testing.T) { } f.WriteString("nop\n") f.Close() - doUnmountCheck(src, dest, hostPath, nonEmptyHostpath, devPath) + doUnmountCheck(sandbox, src, dest, hostPath, nonEmptyHostpath, devPath) } func testSetupFakeRootfs(t *testing.T) (testRawFile, loopDev, mntDir string, err error) { @@ -584,8 +599,14 @@ func TestMountSharedDirMounts(t *testing.T) { }, }, } + + fsShare, err := NewFilesystemShare(sandbox) + assert.Nil(err) + sandbox.fsShare = fsShare + // setup the shared mounts: - k.setupSharedPath(k.ctx, sandbox) + err = sandbox.fsShare.Prepare(sandbox.ctx) + assert.NoError(err) // // Create the mounts that we'll test with diff --git a/src/runtime/virtcontainers/fs_share_linux.go b/src/runtime/virtcontainers/fs_share_linux.go index dc16a73b7..a7007826f 100644 --- a/src/runtime/virtcontainers/fs_share_linux.go +++ b/src/runtime/virtcontainers/fs_share_linux.go @@ -317,10 +317,9 @@ func (f *FilesystemShare) UnshareFile(ctx context.Context, c *Container, m *Moun func (f *FilesystemShare) shareRootFilesystemWithNydus(ctx context.Context, c *Container) (*SharedFile, error) { rootfsGuestPath := filepath.Join(kataGuestSharedDir(), c.id, c.rootfsSuffix) - if f.sandbox.GetHypervisorType() != string(QemuHypervisor) { - // qemu is supported first, other hypervisors will next - // https://github.com/kata-containers/kata-containers/issues/2724 - return nil, errNydusdNotSupport + virtiofsDaemon, err := getVirtiofsDaemonForNydus(f.sandbox) + if err != nil { + return nil, err } extraOption, err := parseExtraOption(c.rootFs.Options) if err != nil { @@ -333,9 +332,8 @@ func (f *FilesystemShare) shareRootFilesystemWithNydus(ctx context.Context, c *C config: extraOption.Config, } - q, _ := f.sandbox.hypervisor.(*qemu) // mount lowerdir to guest /run/kata-containers/shared/images//lowerdir - if err := q.virtiofsDaemon.Mount(*mountOpt); err != nil { + if err := virtiofsDaemon.Mount(*mountOpt); err != nil { return nil, err } rootfs := &grpc.Storage{} diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index 8572f584a..1a1054305 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -1158,158 +1158,18 @@ func (k *kataAgent) rollbackFailingContainerCreation(ctx context.Context, c *Con k.Logger().WithError(err2).Error("rollback failed unmountHostMounts()") } - if c.rootFs.Type == NydusRootFSType { - if err2 := nydusContainerCleanup(ctx, getMountPath(c.sandbox.id), c); err2 != nil { - k.Logger().WithError(err2).Error("rollback failed nydusContainerCleanup") - } - } else { - if err2 := bindUnmountContainerRootfs(ctx, getMountPath(c.sandbox.id), c.id); err2 != nil { - k.Logger().WithError(err2).Error("rollback failed bindUnmountContainerRootfs()") - } + if err2 := c.sandbox.fsShare.UnshareRootFilesystem(ctx, c); err2 != nil { + k.Logger().WithError(err2).Error("rollback failed UnshareRootfs()") } } } -func getVirtiofsDaemonForNydus(sandbox *Sandbox) (VirtiofsDaemon, error) { - var virtiofsDaemon VirtiofsDaemon - switch sandbox.GetHypervisorType() { - case string(QemuHypervisor): - virtiofsDaemon = sandbox.hypervisor.(*qemu).virtiofsDaemon - case string(ClhHypervisor): - virtiofsDaemon = sandbox.hypervisor.(*cloudHypervisor).virtiofsDaemon - default: - return nil, errNydusdNotSupport - } - return virtiofsDaemon, nil -} - -func (k *kataAgent) buildContainerRootfsWithNydus(sandbox *Sandbox, c *Container, rootPathParent string) (*grpc.Storage, error) { - virtiofsDaemon, err := getVirtiofsDaemonForNydus(sandbox) - if err != nil { - return nil, err - } - extraOption, err := parseExtraOption(c.rootFs.Options) - if err != nil { - return nil, err - } - mountOpt := &MountOption{ - mountpoint: rafsMountPath(c.id), - source: extraOption.Source, - config: extraOption.Config, - } - k.Logger().Infof("nydus option: %v", extraOption) - // mount lowerdir to guest /run/kata-containers/shared/images//lowerdir - if err := virtiofsDaemon.Mount(*mountOpt); err != nil { - return nil, err - } - rootfs := &grpc.Storage{} - containerShareDir := filepath.Join(getMountPath(c.sandbox.id), c.id) - - // mkdir rootfs, guest at /run/kata-containers/shared/containers//rootfs - rootfsDir := filepath.Join(containerShareDir, c.rootfsSuffix) - if err := os.MkdirAll(rootfsDir, DirMode); err != nil { - return nil, err - } - // bindmount snapshot dir which snapshotter allocated - // to guest /run/kata-containers/shared/containers//snapshotdir - snapshotShareDir := filepath.Join(containerShareDir, snapshotDir) - if err := bindMount(k.ctx, extraOption.Snapshotdir, snapshotShareDir, true, "slave"); err != nil { - return nil, err - } - - // so rootfs = overlay(upperdir, workerdir, lowerdir) - rootfs.MountPoint = filepath.Join(rootPathParent, c.rootfsSuffix) - rootfs.Source = typeOverlayFS - rootfs.Fstype = typeOverlayFS - rootfs.Driver = kataOverlayDevType - rootfs.Options = append(rootfs.Options, fmt.Sprintf("%s=%s", upperDir, filepath.Join(kataGuestSharedDir(), c.id, snapshotDir, "fs"))) - rootfs.Options = append(rootfs.Options, fmt.Sprintf("%s=%s", workDir, filepath.Join(kataGuestSharedDir(), c.id, snapshotDir, "work"))) - rootfs.Options = append(rootfs.Options, fmt.Sprintf("%s=%s", lowerDir, filepath.Join(kataGuestNydusImageDir(), c.id, lowerDir))) - rootfs.Options = append(rootfs.Options, "index=off") - k.Logger().Infof("rootfs info: %#v\n", rootfs) - return rootfs, nil -} - -func (k *kataAgent) buildContainerRootfs(ctx context.Context, sandbox *Sandbox, c *Container, rootPathParent string) (*grpc.Storage, error) { - if c.rootFs.Type == NydusRootFSType { - return k.buildContainerRootfsWithNydus(sandbox, c, rootPathParent) - } - if c.state.Fstype != "" && c.state.BlockDeviceID != "" { - // The rootfs storage volume represents the container rootfs - // mount point inside the guest. - // It can be a block based device (when using block based container - // overlay on the host) mount or a 9pfs one (for all other overlay - // implementations). - rootfs := &grpc.Storage{} - - // This is a block based device rootfs. - device := sandbox.devManager.GetDeviceByID(c.state.BlockDeviceID) - if device == nil { - k.Logger().WithField("device", c.state.BlockDeviceID).Error("failed to find device by id") - return nil, fmt.Errorf("failed to find device by id %q", c.state.BlockDeviceID) - } - - blockDrive, ok := device.GetDeviceInfo().(*config.BlockDrive) - if !ok || blockDrive == nil { - k.Logger().Error("malformed block drive") - return nil, fmt.Errorf("malformed block drive") - } - switch { - case sandbox.config.HypervisorConfig.BlockDeviceDriver == config.VirtioMmio: - rootfs.Driver = kataMmioBlkDevType - rootfs.Source = blockDrive.VirtPath - case sandbox.config.HypervisorConfig.BlockDeviceDriver == config.VirtioBlockCCW: - rootfs.Driver = kataBlkCCWDevType - rootfs.Source = blockDrive.DevNo - case sandbox.config.HypervisorConfig.BlockDeviceDriver == config.VirtioBlock: - rootfs.Driver = kataBlkDevType - rootfs.Source = blockDrive.PCIPath.String() - case sandbox.config.HypervisorConfig.BlockDeviceDriver == config.VirtioSCSI: - rootfs.Driver = kataSCSIDevType - rootfs.Source = blockDrive.SCSIAddr - default: - return nil, fmt.Errorf("Unknown block device driver: %s", sandbox.config.HypervisorConfig.BlockDeviceDriver) - } - - rootfs.MountPoint = rootPathParent - rootfs.Fstype = c.state.Fstype - - if c.state.Fstype == "xfs" { - rootfs.Options = []string{"nouuid"} - } - - // Ensure container mount destination exists - // TODO: remove dependency on shared fs path. shared fs is just one kind of storage source. - // we should not always use shared fs path for all kinds of storage. Instead, all storage - // should be bind mounted to a tmpfs path for containers to use. - if err := os.MkdirAll(filepath.Join(getMountPath(c.sandbox.id), c.id, c.rootfsSuffix), DirMode); err != nil { - return nil, err - } - return rootfs, nil - } - - // This is not a block based device rootfs. We are going to bind mount it into the shared drive - // between the host and the guest. - // With virtiofs/9pfs we don't need to ask the agent to mount the rootfs as the shared directory - // (kataGuestSharedDir) is already mounted in the guest. We only need to mount the rootfs from - // the host and it will show up in the guest. - if err := bindMountContainerRootfs(ctx, getMountPath(sandbox.id), c.id, c.rootFs.Target, false); err != nil { - return nil, err - } - - return nil, nil -} - func (k *kataAgent) createContainer(ctx context.Context, sandbox *Sandbox, c *Container) (p *Process, err error) { span, ctx := katatrace.Trace(ctx, k.Logger(), "createContainer", kataAgentTracingTags) defer span.End() var ctrStorages []*grpc.Storage var ctrDevices []*grpc.Device - var rootfs *grpc.Storage - - // This is the guest absolute root path for that container. - rootPathParent := filepath.Join(kataGuestSharedDir(), c.id) - rootPath := filepath.Join(rootPathParent, c.rootfsSuffix) + var sharedRootfs *SharedFile // In case the container creation fails, the following defer statement // takes care of rolling back actions previously performed. @@ -1320,19 +1180,19 @@ func (k *kataAgent) createContainer(ctx context.Context, sandbox *Sandbox, c *Co } }() - // setup rootfs -- if its block based, we'll receive a non-nil storage object representing + // Share the container rootfs -- if its block based, we'll receive a non-nil storage object representing // the block device for the rootfs, which us utilized for mounting in the guest. This'll be handled // already for non-block based rootfs - if rootfs, err = k.buildContainerRootfs(ctx, sandbox, c, rootPathParent); err != nil { + if sharedRootfs, err = sandbox.fsShare.ShareRootFilesystem(ctx, c); err != nil { return nil, err } - if rootfs != nil { + if sharedRootfs.storage != nil { // Add rootfs to the list of container storage. // We only need to do this for block based rootfs, as we // want the agent to mount it into the right location // (kataGuestSharedDir/ctrID/ - ctrStorages = append(ctrStorages, rootfs) + ctrStorages = append(ctrStorages, sharedRootfs.storage) } ociSpec := c.GetPatchedOCISpec() @@ -1408,7 +1268,7 @@ func (k *kataAgent) createContainer(ctx context.Context, sandbox *Sandbox, c *Co } // We need to give the OCI spec our absolute rootfs path in the guest. - grpcSpec.Root.Path = rootPath + grpcSpec.Root.Path = sharedRootfs.guestPath sharedPidNs := k.handlePidNamespace(grpcSpec, sandbox) diff --git a/src/runtime/virtcontainers/mount.go b/src/runtime/virtcontainers/mount.go index f782c5d09..76e5fe3f4 100644 --- a/src/runtime/virtcontainers/mount.go +++ b/src/runtime/virtcontainers/mount.go @@ -388,6 +388,19 @@ func bindUnmountContainerSnapshotDir(ctx context.Context, sharedDir, cID string) return bindUnmountContainerShareDir(ctx, sharedDir, cID, snapshotDir) } +func getVirtiofsDaemonForNydus(sandbox *Sandbox) (VirtiofsDaemon, error) { + var virtiofsDaemon VirtiofsDaemon + switch sandbox.GetHypervisorType() { + case string(QemuHypervisor): + virtiofsDaemon = sandbox.hypervisor.(*qemu).virtiofsDaemon + case string(ClhHypervisor): + virtiofsDaemon = sandbox.hypervisor.(*cloudHypervisor).virtiofsDaemon + default: + return nil, errNydusdNotSupport + } + return virtiofsDaemon, nil +} + func nydusContainerCleanup(ctx context.Context, sharedDir string, c *Container) error { sandbox := c.sandbox virtiofsDaemon, err := getVirtiofsDaemonForNydus(sandbox) From 082d538cb461852c96700ffe68ef11c8dd2fa059 Mon Sep 17 00:00:00 2001 From: Tanweer Noor Date: Thu, 24 Feb 2022 23:08:59 -0800 Subject: [PATCH 44/71] runtime: make selinux configurable removes --tags selinux handling in the makefile (part of it introduced here: d78ffd6) and makes selinux configurable via configuration.toml Fixes: #3631 Signed-off-by: Tanweer Noor --- src/runtime/Makefile | 25 +++---------------- src/runtime/config/configuration-acrn.toml.in | 3 +++ src/runtime/config/configuration-clh.toml.in | 3 +++ src/runtime/config/configuration-fc.toml.in | 3 +++ src/runtime/config/configuration-qemu.toml.in | 3 +++ src/runtime/pkg/katautils/config.go | 2 ++ src/runtime/virtcontainers/clh.go | 10 +++++--- src/runtime/virtcontainers/fc.go | 10 +++++--- src/runtime/virtcontainers/hypervisor.go | 3 +++ src/runtime/virtcontainers/qemu.go | 11 +++++--- 10 files changed, 41 insertions(+), 32 deletions(-) diff --git a/src/runtime/Makefile b/src/runtime/Makefile index ff0f1fb38..73805741e 100644 --- a/src/runtime/Makefile +++ b/src/runtime/Makefile @@ -158,6 +158,8 @@ DEFDISABLEGUESTSECCOMP := true #Default experimental features enabled DEFAULTEXPFEATURES := [] +DEFDISABLESELINUX := false + #Default entropy source DEFENTROPYSOURCE := /dev/urandom DEFVALIDENTROPYSOURCES := [\"/dev/urandom\",\"/dev/random\",\"\"] @@ -193,9 +195,6 @@ DEFSTATICRESOURCEMGMT ?= false DEFBINDMOUNTS := [] -# Features -FEATURE_SELINUX ?= check - SED = sed CLI_DIR = cmd @@ -433,6 +432,7 @@ USER_VARS += DEFNETWORKMODEL_CLH USER_VARS += DEFNETWORKMODEL_FC USER_VARS += DEFNETWORKMODEL_QEMU USER_VARS += DEFDISABLEGUESTSECCOMP +USER_VARS += DEFDISABLESELINUX USER_VARS += DEFAULTEXPFEATURES USER_VARS += DEFDISABLEBLOCK USER_VARS += DEFBLOCKSTORAGEDRIVER_ACRN @@ -460,7 +460,6 @@ USER_VARS += DEFSTATICRESOURCEMGMT USER_VARS += DEFSTATICRESOURCEMGMT_FC USER_VARS += DEFBINDMOUNTS USER_VARS += DEFVFIOMODE -USER_VARS += FEATURE_SELINUX USER_VARS += BUILDFLAGS @@ -475,21 +474,6 @@ QUIET_TEST = $(Q:@=@echo ' TEST '$@;) BUILDTAGS := -ifneq ($(FEATURE_SELINUX),no) - SELINUXTAG := $(shell ./hack/selinux_tag.sh) - - ifneq ($(SELINUXTAG),) - override FEATURE_SELINUX = yes - BUILDTAGS += --tags "$(SELINUXTAG)" - else - ifeq ($(FEATURE_SELINUX),yes) - $(error "ERROR: SELinux support requested, but libselinux is not available") - endif - - override FEATURE_SELINUX = no - endif -endif - # go build common flags BUILDFLAGS := -buildmode=pie -mod=vendor ${BUILDTAGS} @@ -749,9 +733,6 @@ endif @printf "\tKnown: $(sort $(HYPERVISORS))\n" @printf "\tAvailable for this architecture: $(sort $(KNOWN_HYPERVISORS))\n" @printf "\n" - @printf "‱ Features:\n" - @printf "\tSELinux (FEATURE_SELINUX): $(FEATURE_SELINUX)\n" - @printf "\n" @printf "‱ Summary:\n" @printf "\n" @printf "\tdestination install path (DESTDIR) : %s\n" $(abspath $(DESTDIR)) diff --git a/src/runtime/config/configuration-acrn.toml.in b/src/runtime/config/configuration-acrn.toml.in index 4ef211835..2a9736e9e 100644 --- a/src/runtime/config/configuration-acrn.toml.in +++ b/src/runtime/config/configuration-acrn.toml.in @@ -185,6 +185,9 @@ internetworking_model="@DEFNETWORKMODEL_ACRN@" # (default: true) disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ +# disable applying SELinux on the VMM process (default false) +disable_selinux=@DEFDISABLESELINUX@ + # If enabled, the runtime will create opentracing.io traces and spans. # (See https://www.jaegertracing.io/docs/getting-started). # (default: disabled) diff --git a/src/runtime/config/configuration-clh.toml.in b/src/runtime/config/configuration-clh.toml.in index 1296d20b9..afb537075 100644 --- a/src/runtime/config/configuration-clh.toml.in +++ b/src/runtime/config/configuration-clh.toml.in @@ -232,6 +232,9 @@ internetworking_model="@DEFNETWORKMODEL_CLH@" # (default: true) disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ +# disable applying SELinux on the VMM process (default false) +disable_selinux=@DEFDISABLESELINUX@ + # If enabled, the runtime will create opentracing.io traces and spans. # (See https://www.jaegertracing.io/docs/getting-started). # (default: disabled) diff --git a/src/runtime/config/configuration-fc.toml.in b/src/runtime/config/configuration-fc.toml.in index ad3e5c672..d9eb093c2 100644 --- a/src/runtime/config/configuration-fc.toml.in +++ b/src/runtime/config/configuration-fc.toml.in @@ -309,6 +309,9 @@ internetworking_model="@DEFNETWORKMODEL_FC@" # (default: true) disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ +# disable applying SELinux on the VMM process (default false) +disable_selinux=@DEFDISABLESELINUX@ + # If enabled, the runtime will create opentracing.io traces and spans. # (See https://www.jaegertracing.io/docs/getting-started). # (default: disabled) diff --git a/src/runtime/config/configuration-qemu.toml.in b/src/runtime/config/configuration-qemu.toml.in index 804849a13..00c5dbfff 100644 --- a/src/runtime/config/configuration-qemu.toml.in +++ b/src/runtime/config/configuration-qemu.toml.in @@ -503,6 +503,9 @@ internetworking_model="@DEFNETWORKMODEL_QEMU@" # (default: true) disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ +# disable applying SELinux on the VMM process (default false) +disable_selinux=@DEFDISABLESELINUX@ + # If enabled, the runtime will create opentracing.io traces and spans. # (See https://www.jaegertracing.io/docs/getting-started). # (default: disabled) diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index e62355933..1a4d3e1c6 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -136,6 +136,7 @@ type hypervisor struct { GuestSwap bool `toml:"enable_guest_swap"` Rootless bool `toml:"rootless"` DisableSeccomp bool `toml:"disable_seccomp"` + DisableSeLinux bool `toml:"disable_selinux"` } type runtime struct { @@ -878,6 +879,7 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { EnableAnnotations: h.EnableAnnotations, DisableSeccomp: h.DisableSeccomp, ConfidentialGuest: h.ConfidentialGuest, + DisableSeLinux: h.DisableSeLinux, }, nil } diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 11c1f7e35..db82ab337 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -1,5 +1,6 @@ //go:build linux // +build linux + // // Copyright (c) 2019 Ericsson Eurolab Deutschland GmbH // @@ -469,10 +470,13 @@ func (clh *cloudHypervisor) StartVM(ctx context.Context, timeout int) error { // virtiofsd are executed by kata-runtime after this call, run with // the SELinux label. If these processes require privileged, we do // notwant to run them under confinement. - if err := label.SetProcessLabel(clh.config.SELinuxProcessLabel); err != nil { - return err + if !clh.config.DisableSeLinux { + + if err := label.SetProcessLabel(clh.config.SELinuxProcessLabel); err != nil { + return err + } + defer label.SetProcessLabel("") } - defer label.SetProcessLabel("") if clh.config.SharedFS == config.VirtioFS || clh.config.SharedFS == config.VirtioFSNydus { clh.Logger().WithField("function", "StartVM").Info("Starting virtiofsDaemon") diff --git a/src/runtime/virtcontainers/fc.go b/src/runtime/virtcontainers/fc.go index 39985907f..7fd7909f4 100644 --- a/src/runtime/virtcontainers/fc.go +++ b/src/runtime/virtcontainers/fc.go @@ -1,5 +1,6 @@ //go:build linux // +build linux + // // Copyright (c) 2018 Intel Corporation // @@ -795,10 +796,13 @@ func (fc *firecracker) StartVM(ctx context.Context, timeout int) error { // are executed by kata-runtime after this call, run with the SELinux // label. If these processes require privileged, we do not want to run // them under confinement. - if err := label.SetProcessLabel(fc.config.SELinuxProcessLabel); err != nil { - return err + if !fc.config.DisableSeLinux { + + if err := label.SetProcessLabel(fc.config.SELinuxProcessLabel); err != nil { + return err + } + defer label.SetProcessLabel("") } - defer label.SetProcessLabel("") err = fc.fcInit(ctx, fcTimeout) if err != nil { diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index 26f33f5d6..cebc51570 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -498,6 +498,9 @@ type HypervisorConfig struct { // Disable seccomp from the hypervisor process DisableSeccomp bool + + // Disable selinux from the hypervisor process + DisableSeLinux bool } // vcpu mapping from vcpu number to thread number diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index ba3bb9588..67b24b9a4 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -1,5 +1,6 @@ //go:build linux // +build linux + // // Copyright (c) 2016 Intel Corporation // @@ -845,11 +846,13 @@ func (q *qemu) StartVM(ctx context.Context, timeout int) error { // virtiofsd are executed by kata-runtime after this call, run with // the SELinux label. If these processes require privileged, we do // notwant to run them under confinement. - if err := label.SetProcessLabel(q.config.SELinuxProcessLabel); err != nil { - return err - } - defer label.SetProcessLabel("") + if !q.config.DisableSeLinux { + if err := label.SetProcessLabel(q.config.SELinuxProcessLabel); err != nil { + return err + } + defer label.SetProcessLabel("") + } if q.config.SharedFS == config.VirtioFS || q.config.SharedFS == config.VirtioFSNydus { err = q.setupVirtiofsDaemon(ctx) if err != nil { From 827ab82a828feec7e021cc24623657d5b68639f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 25 Feb 2022 21:18:07 +0100 Subject: [PATCH 45/71] tools: clh: Fix unbound variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 4c164afbac3df0611a03f7dc774833ee51184178 renamed extra_build_args to features, but did it only in one place, leading to: ``` 21:15:28 /home/jenkins/workspace/kata-containers-2.0-ubuntu-ARM-PR/go/src/github.com/kata-containers/kata-containers/tools/packaging/static-build/cloud-hypervisor/build-static-clh.sh: line 55: features: unbound variable 21:15:29 make[1]: *** [tools/packaging/kata-deploy/local-build/Makefile:30: cloud-hypervisor-tarball-build] Error 1 ``` Fixes: #3775 Signed-off-by: Fabiano FidĂȘncio --- .../static-build/cloud-hypervisor/build-static-clh.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/packaging/static-build/cloud-hypervisor/build-static-clh.sh b/tools/packaging/static-build/cloud-hypervisor/build-static-clh.sh index 1f49abcaa..cf2aaec38 100755 --- a/tools/packaging/static-build/cloud-hypervisor/build-static-clh.sh +++ b/tools/packaging/static-build/cloud-hypervisor/build-static-clh.sh @@ -16,7 +16,7 @@ ARCH=$(uname -m) script_dir=$(dirname $(readlink -f "$0")) kata_version="${kata_version:-}" force_build_from_source="${force_build_from_source:-false}" -extra_build_args="${extra_build_args:-}" +features="${features:-}" source "${script_dir}/../../scripts/lib.sh" @@ -68,7 +68,7 @@ if [ "${ARCH}" == "aarch64" ]; then force_build_from_source="true" fi -if [ -n "${extra_build_args}" ]; then +if [ -n "${features}" ]; then info "As an extra build argument has been passed to the script, forcing to build from source" force_build_from_source="true" fi From a9ba7c132b174e3209ca1d7d93bb4aa29d5f0c9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 25 Feb 2022 22:29:11 +0100 Subject: [PATCH 46/71] clh: Fix typo on HotplugRemoveDevice MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A copy and paste mistake was made and the error on HotplugRemoveDevice() should be about removal and not about addition. Signed-off-by: Fabiano FidĂȘncio --- src/runtime/virtcontainers/clh.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 11c1f7e35..22a8bec02 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -657,7 +657,7 @@ func (clh *cloudHypervisor) HotplugRemoveDevice(ctx context.Context, devInfo int defer span.End() if clh.config.ConfidentialGuest { - return nil, errors.New("Device hotplug addition is not supported in confidential mode") + return nil, errors.New("Device hotplug removal is not supported in confidential mode") } var deviceID string From 8edca8bbd170102ae3a4556a810cef6244a48663 Mon Sep 17 00:00:00 2001 From: sailorvii Date: Mon, 28 Feb 2022 10:03:09 +0800 Subject: [PATCH 47/71] kata-agent: Fix mismatching error of cgroup and mountinfo. The content about systemd in "/proc/self/cgroup" is as: 1:name=systemd:/kubepods/pod1815643d-3789-4e4e-aaf4-00de024912e1/0e15a65bd5f7b30a0b818d90706212354d8b3f0998a1495473c3be9a24706ccf and in "/prol/self/mountinfo" is as: 30 29 0:26 / /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime shared:6 - cgroup cgroup rw,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd The keys extracted from the two files are the same as "name=systemd". So no need to rename the key to "systemd". Fixes: #3385 Signed-off-by: sailorvii --- src/agent/rustjail/src/cgroups/fs/mod.rs | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/agent/rustjail/src/cgroups/fs/mod.rs b/src/agent/rustjail/src/cgroups/fs/mod.rs index b4cf73e31..b264e5e9d 100644 --- a/src/agent/rustjail/src/cgroups/fs/mod.rs +++ b/src/agent/rustjail/src/cgroups/fs/mod.rs @@ -905,13 +905,7 @@ pub fn get_paths() -> Result> { let keys: Vec<&str> = fl[1].split(',').collect(); for key in &keys { - // this is a workaround, cgroup file are using `name=systemd`, - // but if file system the name is `systemd` - if *key == "name=systemd" { - m.insert("systemd".to_string(), fl[2].to_string()); - } else { - m.insert(key.to_string(), fl[2].to_string()); - } + m.insert(key.to_string(), fl[2].to_string()); } } Ok(m) From 81ed269ed2f742984ab0a1cca9e4c62acf8a3cc6 Mon Sep 17 00:00:00 2001 From: bin Date: Mon, 28 Feb 2022 16:52:49 +0800 Subject: [PATCH 48/71] runtime: use Cmd.StdoutPipe instead of self-created pipe Nydusd uses a bufio.Scanner to check if nydusd process has existed, but stderr/stdout passed to Cmd is self-created pipe, this pipe will not be closed if the process start failing. Use standard Cmd.StdoutPipe can close the stdout and kata shim will detect the existence of the nydusd process, then call cmd.Wait to reap the process' resources. Fixes: #3783 Signed-off-by: bin --- src/runtime/virtcontainers/nydusd.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/runtime/virtcontainers/nydusd.go b/src/runtime/virtcontainers/nydusd.go index c9315ee37..7931e4556 100644 --- a/src/runtime/virtcontainers/nydusd.go +++ b/src/runtime/virtcontainers/nydusd.go @@ -104,12 +104,13 @@ func (nd *nydusd) Start(ctx context.Context, onQuit onQuitFunc) (int, error) { return pid, err } cmd := exec.Command(nd.path, args...) - r, w, err := os.Pipe() + stdout, err := cmd.StdoutPipe() if err != nil { return pid, err } - cmd.Stdout = w - cmd.Stderr = w + + cmd.Stderr = cmd.Stdout + fields := logrus.Fields{ "path": nd.path, "args": strings.Join(args, " "), @@ -120,7 +121,7 @@ func (nd *nydusd) Start(ctx context.Context, onQuit onQuitFunc) (int, error) { } // Monitor nydusd's stdout/stderr and stop sandbox if nydusd quits go func() { - scanner := bufio.NewScanner(r) + scanner := bufio.NewScanner(stdout) for scanner.Scan() { nd.Logger().Info(scanner.Text()) } From 0bafa2def979ed45e78e1c8eff9dac1340b3dee5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Mon, 28 Feb 2022 10:24:33 +0100 Subject: [PATCH 49/71] config: clh: Mention supported TEEs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's mention the supported TEEs to be used with confidential guests. Right now, Cloud Hyperisor supports only Intel TDX, used together with TD Shim. Signed-off-by: Fabiano FidĂȘncio --- src/runtime/config/configuration-clh.toml.in | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/runtime/config/configuration-clh.toml.in b/src/runtime/config/configuration-clh.toml.in index afb537075..bf00dff61 100644 --- a/src/runtime/config/configuration-clh.toml.in +++ b/src/runtime/config/configuration-clh.toml.in @@ -28,6 +28,9 @@ image = "@IMAGEPATH@" # - Memory Hotplug # - NVDIMM devices # +# Supported TEEs: +# * Intel TDX +# # Default false # confidential_guest = true From 641d475fa663f41874a6451a34f134c36ee0be2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Mon, 28 Feb 2022 10:27:21 +0100 Subject: [PATCH 50/71] config: clh: Use "Intel TDX" instead of just "TDX" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's use "Intel TDX" rather than just "TDX", as it can ease the understanding of the terminology. Signed-off-by: Fabiano FidĂȘncio --- src/runtime/config/configuration-clh.toml.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime/config/configuration-clh.toml.in b/src/runtime/config/configuration-clh.toml.in index bf00dff61..5bdc2cc06 100644 --- a/src/runtime/config/configuration-clh.toml.in +++ b/src/runtime/config/configuration-clh.toml.in @@ -40,7 +40,7 @@ image = "@IMAGEPATH@" # # For more information about firmwared that can be used with specific TEEs, # please, refer to: -# * TDX: +# * Intel TDX: # - td-shim: https://github.com/confidential-containers/td-shim # # firmware = "@FIRMWAREPATH@" From de57466212e06769165bb821882ce58204ba5a5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Mon, 28 Feb 2022 11:57:42 +0100 Subject: [PATCH 51/71] config: Expand confidential_guest comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's clarify that an error will be reported in case confidential_guest is enabled, but the hardware where Kata Containers is running doesn't provide the required feature set. Fixes: #3787 Signed-off-by: Fabiano FidĂȘncio --- src/runtime/config/configuration-clh.toml.in | 3 ++- src/runtime/config/configuration-qemu.toml.in | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/runtime/config/configuration-clh.toml.in b/src/runtime/config/configuration-clh.toml.in index 5bdc2cc06..4afafaf08 100644 --- a/src/runtime/config/configuration-clh.toml.in +++ b/src/runtime/config/configuration-clh.toml.in @@ -19,7 +19,8 @@ image = "@IMAGEPATH@" # Toggling that setting may trigger different hardware features, ranging # from memory encryption to both memory and CPU-state encryption and integrity. # The Kata Containers runtime dynamically detects the available feature set and -# aims at enabling the largest possible one. +# aims at enabling the largest possible one, returning an error if none is +# available, or none is supported by the hypervisor. # # Known limitations: # * Does not work by design: diff --git a/src/runtime/config/configuration-qemu.toml.in b/src/runtime/config/configuration-qemu.toml.in index 00c5dbfff..af5c58364 100644 --- a/src/runtime/config/configuration-qemu.toml.in +++ b/src/runtime/config/configuration-qemu.toml.in @@ -20,7 +20,8 @@ machine_type = "@MACHINETYPE@" # Toggling that setting may trigger different hardware features, ranging # from memory encryption to both memory and CPU-state encryption and integrity. # The Kata Containers runtime dynamically detects the available feature set and -# aims at enabling the largest possible one. +# aims at enabling the largest possible one, returning an error if none is +# available, or none is supported by the hypervisor. # # Known limitations: # * Does not work by design: From abc681ca5f69e79daadf63d51cc4e8700fa218c8 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Thu, 25 Nov 2021 09:47:44 +0100 Subject: [PATCH 52/71] katautils: Add Darwin stub for the netNS API And move the current implementation into a Linux only file. Signed-off-by: Samuel Ortiz --- src/runtime/pkg/katautils/network_darwin.go | 22 +++++++++++++++++++ .../{network.go => network_linux.go} | 0 2 files changed, 22 insertions(+) create mode 100644 src/runtime/pkg/katautils/network_darwin.go rename src/runtime/pkg/katautils/{network.go => network_linux.go} (100%) diff --git a/src/runtime/pkg/katautils/network_darwin.go b/src/runtime/pkg/katautils/network_darwin.go new file mode 100644 index 000000000..d913557cb --- /dev/null +++ b/src/runtime/pkg/katautils/network_darwin.go @@ -0,0 +1,22 @@ +// Copyright (c) 2022 Apple Inc. +// +// SPDX-License-Identifier: Apache-2.0 +// + +package katautils + +import ( + vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers" +) + +func EnterNetNS(networkID string, cb func() error) error { + return nil +} + +func SetupNetworkNamespace(config *vc.NetworkConfig) error { + return nil +} + +func cleanupNetNS(netNSPath string) error { + return nil +} diff --git a/src/runtime/pkg/katautils/network.go b/src/runtime/pkg/katautils/network_linux.go similarity index 100% rename from src/runtime/pkg/katautils/network.go rename to src/runtime/pkg/katautils/network_linux.go From 7d64ae7a417dd968cd355f4738764f9206f62eea Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Sat, 26 Feb 2022 10:49:52 +0100 Subject: [PATCH 53/71] runtime: Add a syscall wrapper package It allows to support syscall variations between host OSes. Signed-off-by: Samuel Ortiz --- src/runtime/pkg/syscall/syscall_darwin.go | 16 ++++++++++++++++ src/runtime/pkg/syscall/syscall_linux.go | 14 ++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 src/runtime/pkg/syscall/syscall_darwin.go create mode 100644 src/runtime/pkg/syscall/syscall_linux.go diff --git a/src/runtime/pkg/syscall/syscall_darwin.go b/src/runtime/pkg/syscall/syscall_darwin.go new file mode 100644 index 000000000..a5cc79d18 --- /dev/null +++ b/src/runtime/pkg/syscall/syscall_darwin.go @@ -0,0 +1,16 @@ +// Copyright (c) 2022 Apple Inc. +// +// SPDX-License-Identifier: Apache-2.0 +// + +package syscall + +import ( + "syscall" +) + +func Gettid() int { + // There is no equivalent to a thread ID on Darwin. + // We use the PID instead. + return syscall.Getpid() +} diff --git a/src/runtime/pkg/syscall/syscall_linux.go b/src/runtime/pkg/syscall/syscall_linux.go new file mode 100644 index 000000000..b05fc156d --- /dev/null +++ b/src/runtime/pkg/syscall/syscall_linux.go @@ -0,0 +1,14 @@ +// Copyright (c) 2022 Apple Inc. +// +// SPDX-License-Identifier: Apache-2.0 +// + +package syscall + +import ( + "syscall" +) + +func Gettid() int { + return syscall.Gettid() +} From 56751089c00dc388b476a217ee7ce80aceb50b27 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Thu, 25 Nov 2021 18:24:18 +0100 Subject: [PATCH 54/71] katautils: Use a syscall wrapper for the hook JSON state There is no real equivalent of a thread ID on Darwin. Signed-off-by: Samuel Ortiz --- src/runtime/pkg/katautils/hook.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/runtime/pkg/katautils/hook.go b/src/runtime/pkg/katautils/hook.go index 303773e60..50ac95cb8 100644 --- a/src/runtime/pkg/katautils/hook.go +++ b/src/runtime/pkg/katautils/hook.go @@ -16,6 +16,7 @@ import ( "time" "github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace" + syscallWrapper "github.com/kata-containers/kata-containers/src/runtime/pkg/syscall" "github.com/opencontainers/runtime-spec/specs-go" "github.com/sirupsen/logrus" ) @@ -38,7 +39,7 @@ func runHook(ctx context.Context, spec specs.Spec, hook specs.Hook, cid, bundleP katatrace.AddTags(span, "path", hook.Path, "args", hook.Args) state := specs.State{ - Pid: syscall.Gettid(), + Pid: syscallWrapper.Gettid(), Bundle: bundlePath, ID: cid, Annotations: spec.Annotations, From ad0449195d1dda5d1dc588f66bc4fe2035d4aafe Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Thu, 25 Nov 2021 10:17:08 +0100 Subject: [PATCH 55/71] virtcontainers: Convert stats dev_t to uint64 We need to convert them to uint64 as their types may differ on various host OSes, but unix.Major|Minor takes a uint64 regardless. Signed-off-by: Samuel Ortiz --- src/runtime/virtcontainers/container.go | 8 ++++---- src/runtime/virtcontainers/device/config/config.go | 4 ++-- src/runtime/virtcontainers/device/config/pmem.go | 4 ++-- src/runtime/virtcontainers/kata_agent.go | 2 +- src/runtime/virtcontainers/mount.go | 8 ++++---- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/runtime/virtcontainers/container.go b/src/runtime/virtcontainers/container.go index b57f40e51..582e196ae 100644 --- a/src/runtime/virtcontainers/container.go +++ b/src/runtime/virtcontainers/container.go @@ -629,8 +629,8 @@ func (c *Container) createBlockDevices(ctx context.Context) error { HostPath: m.Source, ContainerPath: m.Destination, DevType: "b", - Major: int64(unix.Major(stat.Rdev)), - Minor: int64(unix.Minor(stat.Rdev)), + Major: int64(unix.Major(uint64(stat.Rdev))), + Minor: int64(unix.Minor(uint64(stat.Rdev))), ReadOnly: m.ReadOnly, } // Check whether source can be used as a pmem device @@ -1226,8 +1226,8 @@ func (c *Container) plugDevice(ctx context.Context, devicePath string) error { HostPath: devicePath, ContainerPath: filepath.Join(kataGuestSharedDir(), c.id), DevType: "b", - Major: int64(unix.Major(stat.Rdev)), - Minor: int64(unix.Minor(stat.Rdev)), + Major: int64(unix.Major(uint64(stat.Rdev))), + Minor: int64(unix.Minor(uint64(stat.Rdev))), }) if err != nil { return fmt.Errorf("device manager failed to create rootfs device for %q: %v", devicePath, err) diff --git a/src/runtime/virtcontainers/device/config/config.go b/src/runtime/virtcontainers/device/config/config.go index 835059ed6..69be4f583 100644 --- a/src/runtime/virtcontainers/device/config/config.go +++ b/src/runtime/virtcontainers/device/config/config.go @@ -425,8 +425,8 @@ func getVhostUserDevName(dirname string, majorNum, minorNum uint32) (string, err return "", err } - devMajor := unix.Major(devStat.Rdev) - devMinor := unix.Minor(devStat.Rdev) + devMajor := unix.Major(uint64(devStat.Rdev)) + devMinor := unix.Minor(uint64(devStat.Rdev)) if devMajor == majorNum && devMinor == minorNum { return file.Name(), nil } diff --git a/src/runtime/virtcontainers/device/config/pmem.go b/src/runtime/virtcontainers/device/config/pmem.go index 33ce4fdff..81d1da9b5 100644 --- a/src/runtime/virtcontainers/device/config/pmem.go +++ b/src/runtime/virtcontainers/device/config/pmem.go @@ -51,8 +51,8 @@ func PmemDeviceInfo(source, destination string) (*DeviceInfo, error) { device := &DeviceInfo{ ContainerPath: destination, DevType: "b", - Major: int64(unix.Major(stat.Dev)), - Minor: int64(unix.Minor(stat.Dev)), + Major: int64(unix.Major(uint64(stat.Dev))), + Minor: int64(unix.Minor(uint64(stat.Dev))), Pmem: true, DriverOptions: make(map[string]string), } diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index 1a1054305..47a1c1cc3 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -2084,7 +2084,7 @@ func (k *kataAgent) copyFile(ctx context.Context, src, dst string) error { cpReq := &grpc.CopyFileRequest{ Path: dst, DirMode: uint32(DirMode), - FileMode: st.Mode, + FileMode: uint32(st.Mode), FileSize: fileSize, Uid: int32(st.Uid), Gid: int32(st.Gid), diff --git a/src/runtime/virtcontainers/mount.go b/src/runtime/virtcontainers/mount.go index 76e5fe3f4..c2879f213 100644 --- a/src/runtime/virtcontainers/mount.go +++ b/src/runtime/virtcontainers/mount.go @@ -144,8 +144,8 @@ func getDeviceForPath(path string) (device, error) { if isHostDevice(path) { // stat.Rdev describes the device that this file (inode) represents. - devMajor = major(stat.Rdev) - devMinor = minor(stat.Rdev) + devMajor = major(uint64(stat.Rdev)) + devMinor = minor(uint64(stat.Rdev)) return device{ major: devMajor, @@ -154,8 +154,8 @@ func getDeviceForPath(path string) (device, error) { }, nil } // stat.Dev points to the underlying device containing the file - devMajor = major(stat.Dev) - devMinor = minor(stat.Dev) + devMajor = major(uint64(stat.Dev)) + devMinor = minor(uint64(stat.Dev)) path, err = filepath.Abs(path) if err != nil { From 5be188cc29a1281aa972d8db5103d1f4083e1fce Mon Sep 17 00:00:00 2001 From: Eric Ernst Date: Fri, 25 Feb 2022 11:43:57 -0800 Subject: [PATCH 56/71] utils: Add darwin stub Add a stub for utils_darwin to facilitate building this package on Darwin. We can probably drop this empty stub if we have better abstraction for the various parts of virtcontainers that call it today... Fixes:# 3777 Signed-off-by: Eric Ernst --- src/runtime/virtcontainers/utils/utils_darwin.go | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 src/runtime/virtcontainers/utils/utils_darwin.go diff --git a/src/runtime/virtcontainers/utils/utils_darwin.go b/src/runtime/virtcontainers/utils/utils_darwin.go new file mode 100644 index 000000000..54b2124ce --- /dev/null +++ b/src/runtime/virtcontainers/utils/utils_darwin.go @@ -0,0 +1,10 @@ +// Copyright (c) 2022 Apple Inc. +// +// SPDX-License-Identifier: Apache-2.0 +// + +package utils + +func GetDevicePathAndFsTypeOptions(mountPoint string) (devicePath, fsType string, fsOptions []string, err error) { + return +} From cc58cf69931eb3d29ebc95a8053f8125146dfd78 Mon Sep 17 00:00:00 2001 From: Eric Ernst Date: Fri, 25 Feb 2022 11:50:04 -0800 Subject: [PATCH 57/71] resourcecontrol: convert stats dev_t to unit64types Their types may differ on various host OSes, but unix.Major|Minor always takes a uint64 Depends-on: github.com/kata-containers/tests#4516 Signed-off-by: Eric Ernst --- src/runtime/pkg/resourcecontrol/utils.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/runtime/pkg/resourcecontrol/utils.go b/src/runtime/pkg/resourcecontrol/utils.go index 9ab592be1..6fafe2d6a 100644 --- a/src/runtime/pkg/resourcecontrol/utils.go +++ b/src/runtime/pkg/resourcecontrol/utils.go @@ -35,8 +35,8 @@ func DeviceToCgroupDeviceRule(device string) (*devices.Rule, error) { return nil, fmt.Errorf("unsupported device type: %v", devType) } - major := int64(unix.Major(st.Rdev)) - minor := int64(unix.Minor(st.Rdev)) + major := int64(unix.Major(uint64(st.Rdev))) + minor := int64(unix.Minor(uint64(st.Rdev))) deviceRule.Major = major deviceRule.Minor = minor From 6a5c634490882e517b3ba11b6b38a0ccbd526c98 Mon Sep 17 00:00:00 2001 From: Eric Ernst Date: Fri, 25 Feb 2022 12:05:01 -0800 Subject: [PATCH 58/71] resourcecontrol: SystemdCgroup check is not necessarily linux specific This utility function is also used to check the spec that will run in the guest - no need for this to be linux specific. Signed-off-by: Eric Ernst --- src/runtime/pkg/resourcecontrol/utils.go | 18 ++++++++++++++++++ src/runtime/pkg/resourcecontrol/utils_linux.go | 17 ----------------- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/src/runtime/pkg/resourcecontrol/utils.go b/src/runtime/pkg/resourcecontrol/utils.go index 6fafe2d6a..835cb54c4 100644 --- a/src/runtime/pkg/resourcecontrol/utils.go +++ b/src/runtime/pkg/resourcecontrol/utils.go @@ -7,6 +7,7 @@ package resourcecontrol import ( "fmt" + "strings" "github.com/opencontainers/runc/libcontainer/devices" "github.com/opencontainers/runtime-spec/specs-go" @@ -57,3 +58,20 @@ func DeviceToLinuxDevice(device string) (specs.LinuxDeviceCgroup, error) { Access: string(dev.Permissions), }, nil } + +func IsSystemdCgroup(cgroupPath string) bool { + + // If we are utilizing systemd to manage cgroups, we expect to receive a path + // in the format slice:scopeprefix:name. A typical example would be: + // + // system.slice:docker:6b4c4a4d0cc2a12c529dcb13a2b8e438dfb3b2a6af34d548d7d + // + // Based on this, let's split by the ':' delimiter and verify that the first + // section has .slice as a suffix. + parts := strings.Split(cgroupPath, ":") + if len(parts) == 3 && strings.HasSuffix(parts[0], ".slice") { + return true + } + + return false +} diff --git a/src/runtime/pkg/resourcecontrol/utils_linux.go b/src/runtime/pkg/resourcecontrol/utils_linux.go index 7a83db8f0..73d77dc23 100644 --- a/src/runtime/pkg/resourcecontrol/utils_linux.go +++ b/src/runtime/pkg/resourcecontrol/utils_linux.go @@ -43,23 +43,6 @@ func ValidCgroupPath(path string, systemdCgroup bool) (string, error) { return filepath.Join(DefaultResourceControllerID, filepath.Clean("/"+path)), nil } -func IsSystemdCgroup(cgroupPath string) bool { - - // If we are utilizing systemd to manage cgroups, we expect to receive a path - // in the format slice:scopeprefix:name. A typical example would be: - // - // system.slice:docker:6b4c4a4d0cc2a12c529dcb13a2b8e438dfb3b2a6af34d548d7d - // - // Based on this, let's split by the ':' delimiter and verify that the first - // section has .slice as a suffix. - parts := strings.Split(cgroupPath, ":") - if len(parts) == 3 && strings.HasSuffix(parts[0], ".slice") { - return true - } - - return false -} - func newProperty(name string, units interface{}) systemdDbus.Property { return systemdDbus.Property{ Name: name, From b31876eefb6fa1ffe0b95bc42ec1d09674320d04 Mon Sep 17 00:00:00 2001 From: Eric Ernst Date: Fri, 25 Feb 2022 14:34:41 -0800 Subject: [PATCH 59/71] device-manager: move linux-only test to a linux-only file We can't Mkdev on Darwin - let's make sure the vfio test is in a linux-only file. Signed-off-by: Eric Ernst --- .../device/manager/manager_linux_test.go | 100 ++++++++++++++++++ .../device/manager/manager_test.go | 81 -------------- 2 files changed, 100 insertions(+), 81 deletions(-) create mode 100644 src/runtime/virtcontainers/device/manager/manager_linux_test.go diff --git a/src/runtime/virtcontainers/device/manager/manager_linux_test.go b/src/runtime/virtcontainers/device/manager/manager_linux_test.go new file mode 100644 index 000000000..78773fc5c --- /dev/null +++ b/src/runtime/virtcontainers/device/manager/manager_linux_test.go @@ -0,0 +1,100 @@ +// Copyright (c) 2017 Intel Corporation +// Copyright (c) 2018 Huawei Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package manager + +import ( + "context" + "fmt" + "os" + "path/filepath" + "testing" + + ktu "github.com/kata-containers/kata-containers/src/runtime/pkg/katatestutils" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/api" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/config" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/drivers" + "github.com/stretchr/testify/assert" + + "golang.org/x/sys/unix" +) + +func TestAttachVhostUserBlkDevice(t *testing.T) { + rootEnabled := true + tc := ktu.NewTestConstraint(false) + if tc.NotValid(ktu.NeedRoot()) { + rootEnabled = false + } + + tmpDir, err := os.MkdirTemp("", "") + dm := &deviceManager{ + blockDriver: VirtioBlock, + devices: make(map[string]api.Device), + vhostUserStoreEnabled: true, + vhostUserStorePath: tmpDir, + } + assert.Nil(t, err) + defer os.RemoveAll(tmpDir) + + vhostUserDevNodePath := filepath.Join(tmpDir, "/block/devices/") + vhostUserSockPath := filepath.Join(tmpDir, "/block/sockets/") + deviceNodePath := filepath.Join(vhostUserDevNodePath, "vhostblk0") + deviceSockPath := filepath.Join(vhostUserSockPath, "vhostblk0") + + err = os.MkdirAll(vhostUserDevNodePath, dirMode) + assert.Nil(t, err) + err = os.MkdirAll(vhostUserSockPath, dirMode) + assert.Nil(t, err) + _, err = os.Create(deviceSockPath) + assert.Nil(t, err) + + // mknod requires root privilege, call mock function for non-root to + // get VhostUserBlk device type. + if rootEnabled == true { + err = unix.Mknod(deviceNodePath, unix.S_IFBLK, int(unix.Mkdev(config.VhostUserBlkMajor, 0))) + assert.Nil(t, err) + } else { + savedFunc := config.GetVhostUserNodeStatFunc + + _, err = os.Create(deviceNodePath) + assert.Nil(t, err) + + config.GetVhostUserNodeStatFunc = func(devNodePath string, + devNodeStat *unix.Stat_t) error { + if deviceNodePath != devNodePath { + return fmt.Errorf("mock GetVhostUserNodeStatFunc error") + } + + devNodeStat.Rdev = unix.Mkdev(config.VhostUserBlkMajor, 0) + return nil + } + + defer func() { + config.GetVhostUserNodeStatFunc = savedFunc + }() + } + + path := "/dev/vda" + deviceInfo := config.DeviceInfo{ + HostPath: deviceNodePath, + ContainerPath: path, + DevType: "b", + Major: config.VhostUserBlkMajor, + Minor: 0, + } + + devReceiver := &api.MockDeviceReceiver{} + device, err := dm.NewDevice(deviceInfo) + assert.Nil(t, err) + _, ok := device.(*drivers.VhostUserBlkDevice) + assert.True(t, ok) + + err = device.Attach(context.Background(), devReceiver) + assert.Nil(t, err) + + err = device.Detach(context.Background(), devReceiver) + assert.Nil(t, err) +} diff --git a/src/runtime/virtcontainers/device/manager/manager_test.go b/src/runtime/virtcontainers/device/manager/manager_test.go index d6fab6d26..f0d7ef974 100644 --- a/src/runtime/virtcontainers/device/manager/manager_test.go +++ b/src/runtime/virtcontainers/device/manager/manager_test.go @@ -8,19 +8,15 @@ package manager import ( "context" - "fmt" "os" "path/filepath" "strconv" "testing" - ktu "github.com/kata-containers/kata-containers/src/runtime/pkg/katatestutils" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/api" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/config" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/drivers" "github.com/stretchr/testify/assert" - - "golang.org/x/sys/unix" ) const fileMode0640 = os.FileMode(0640) @@ -217,83 +213,6 @@ func TestAttachBlockDevice(t *testing.T) { assert.Nil(t, err) } -func TestAttachVhostUserBlkDevice(t *testing.T) { - rootEnabled := true - tc := ktu.NewTestConstraint(false) - if tc.NotValid(ktu.NeedRoot()) { - rootEnabled = false - } - - tmpDir, err := os.MkdirTemp("", "") - dm := &deviceManager{ - blockDriver: VirtioBlock, - devices: make(map[string]api.Device), - vhostUserStoreEnabled: true, - vhostUserStorePath: tmpDir, - } - assert.Nil(t, err) - defer os.RemoveAll(tmpDir) - - vhostUserDevNodePath := filepath.Join(tmpDir, "/block/devices/") - vhostUserSockPath := filepath.Join(tmpDir, "/block/sockets/") - deviceNodePath := filepath.Join(vhostUserDevNodePath, "vhostblk0") - deviceSockPath := filepath.Join(vhostUserSockPath, "vhostblk0") - - err = os.MkdirAll(vhostUserDevNodePath, dirMode) - assert.Nil(t, err) - err = os.MkdirAll(vhostUserSockPath, dirMode) - assert.Nil(t, err) - _, err = os.Create(deviceSockPath) - assert.Nil(t, err) - - // mknod requires root privilege, call mock function for non-root to - // get VhostUserBlk device type. - if rootEnabled == true { - err = unix.Mknod(deviceNodePath, unix.S_IFBLK, int(unix.Mkdev(config.VhostUserBlkMajor, 0))) - assert.Nil(t, err) - } else { - savedFunc := config.GetVhostUserNodeStatFunc - - _, err = os.Create(deviceNodePath) - assert.Nil(t, err) - - config.GetVhostUserNodeStatFunc = func(devNodePath string, - devNodeStat *unix.Stat_t) error { - if deviceNodePath != devNodePath { - return fmt.Errorf("mock GetVhostUserNodeStatFunc error") - } - - devNodeStat.Rdev = unix.Mkdev(config.VhostUserBlkMajor, 0) - return nil - } - - defer func() { - config.GetVhostUserNodeStatFunc = savedFunc - }() - } - - path := "/dev/vda" - deviceInfo := config.DeviceInfo{ - HostPath: deviceNodePath, - ContainerPath: path, - DevType: "b", - Major: config.VhostUserBlkMajor, - Minor: 0, - } - - devReceiver := &api.MockDeviceReceiver{} - device, err := dm.NewDevice(deviceInfo) - assert.Nil(t, err) - _, ok := device.(*drivers.VhostUserBlkDevice) - assert.True(t, ok) - - err = device.Attach(context.Background(), devReceiver) - assert.Nil(t, err) - - err = device.Detach(context.Background(), devReceiver) - assert.Nil(t, err) -} - func TestAttachDetachDevice(t *testing.T) { dm := NewDeviceManager(VirtioSCSI, false, "", nil) From e355a71860d62bb5312a395776dded73328be433 Mon Sep 17 00:00:00 2001 From: Eric Ernst Date: Fri, 25 Feb 2022 11:53:23 -0800 Subject: [PATCH 60/71] container: file is not linux specific This should not be linux specific -- drop restriction. Signed-off-by: Eric Ernst --- src/runtime/virtcontainers/container.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/runtime/virtcontainers/container.go b/src/runtime/virtcontainers/container.go index 582e196ae..82aae79e3 100644 --- a/src/runtime/virtcontainers/container.go +++ b/src/runtime/virtcontainers/container.go @@ -1,6 +1,3 @@ -//go:build linux -// +build linux - // Copyright (c) 2016 Intel Corporation // Copyright (c) 2014,2015,2016,2017 Docker, Inc. // SPDX-License-Identifier: Apache-2.0 From 4afb278fe2c60ac59b20dc104b932c1f9be96466 Mon Sep 17 00:00:00 2001 From: Eric Ernst Date: Fri, 25 Feb 2022 14:06:56 -0800 Subject: [PATCH 61/71] ci: add github action to exercise darwin build, unit tests There are a few outstanding changes required to build the runtime on Darwin. Let's add a GitHub action to exercise build and unit tests of the packages which we do expect to work. Eventually this should be dropped and we can run any Darwin specific tests, or just add MacOS to the matrix for our static check OSes. Fixes: #3778 Signed-off-by: Eric Ernst --- .github/workflows/darwin-tests.yaml | 25 +++++++++++++++++ ci/darwin-test.sh | 42 +++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 .github/workflows/darwin-tests.yaml create mode 100755 ci/darwin-test.sh diff --git a/.github/workflows/darwin-tests.yaml b/.github/workflows/darwin-tests.yaml new file mode 100644 index 000000000..5a83add32 --- /dev/null +++ b/.github/workflows/darwin-tests.yaml @@ -0,0 +1,25 @@ +on: + pull_request: + types: + - opened + - edited + - reopened + - synchronize + +name: Darwin tests +jobs: + test: + strategy: + matrix: + go-version: [1.16.x, 1.17.x] + os: [macos-latest] + runs-on: ${{ matrix.os }} + steps: + - name: Install Go + uses: actions/setup-go@v2 + with: + go-version: ${{ matrix.go-version }} + - name: Checkout code + uses: actions/checkout@v2 + - name: Build utils + run: ./ci/darwin-test.sh diff --git a/ci/darwin-test.sh b/ci/darwin-test.sh new file mode 100755 index 000000000..92317653b --- /dev/null +++ b/ci/darwin-test.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2022 Apple Inc. +# +# SPDX-License-Identifier: Apache-2.0 + +set -e + +cidir=$(dirname "$0") +runtimedir=$cidir/../src/runtime + +build_working_packages() { + # working packages: + device_api=$runtimedir/virtcontainers/device/api + device_config=$runtimedir/virtcontainers/device/config + device_drivers=$runtimedir/virtcontainers/device/drivers + device_manager=$runtimedir/virtcontainers/device/manager + rc_pkg_dir=$runtimedir/pkg/resourcecontrol/ + utils_pkg_dir=$runtimedir/virtcontainers/utils + + # broken packages :( : + #katautils=$runtimedir/pkg/katautils + #oci=$runtimedir/pkg/oci + #vc=$runtimedir/virtcontainers + + pkgs=( + "$device_api" + "$device_config" + "$device_drivers" + "$device_manager" + "$utils_pkg_dir" + "$rc_pkg_dir") + for pkg in "${pkgs[@]}"; do + echo building "$pkg" + pushd "$pkg" &>/dev/null + go build + go test + popd &>/dev/null + done +} + +build_working_packages From eda8ea154a12c3a537a55d8a42d50727429e8d64 Mon Sep 17 00:00:00 2001 From: Jakob Naucke Date: Fri, 25 Feb 2022 10:25:33 +0100 Subject: [PATCH 62/71] runtime: Gofmt fixes - Mostly blank lines after `+build` -- see https://pkg.go.dev/go/build@go1.14.15 -- this is, to date, enforced by `gofmt`. - 1.17-style go:build directives are also added. - Spaces in govmm/vmm_s390x.go Fixes: #3769 Signed-off-by: Jakob Naucke --- src/runtime/pkg/govmm/qemu/qemu_arch_base_test.go | 1 + src/runtime/pkg/govmm/vmm_s390x.go | 6 +++--- src/runtime/pkg/resourcecontrol/cgroups.go | 3 ++- src/runtime/virtcontainers/acrn.go | 2 +- src/runtime/virtcontainers/acrn_arch_base.go | 2 +- src/runtime/virtcontainers/acrn_test.go | 2 +- src/runtime/virtcontainers/clh.go | 1 - src/runtime/virtcontainers/clh_test.go | 2 +- src/runtime/virtcontainers/fc.go | 1 - src/runtime/virtcontainers/fc_metrics.go | 2 +- src/runtime/virtcontainers/fc_test.go | 2 +- src/runtime/virtcontainers/ipvlan_endpoint.go | 3 ++- src/runtime/virtcontainers/ipvlan_endpoint_test.go | 3 ++- src/runtime/virtcontainers/macvlan_endpoint.go | 3 ++- src/runtime/virtcontainers/macvlan_endpoint_test.go | 3 ++- src/runtime/virtcontainers/macvtap_endpoint.go | 3 ++- src/runtime/virtcontainers/macvtap_endpoint_test.go | 3 ++- src/runtime/virtcontainers/physical_endpoint.go | 3 ++- src/runtime/virtcontainers/physical_endpoint_test.go | 3 ++- src/runtime/virtcontainers/qemu.go | 1 - src/runtime/virtcontainers/qemu_amd64.go | 2 +- src/runtime/virtcontainers/qemu_amd64_test.go | 2 +- src/runtime/virtcontainers/qemu_arch_base.go | 2 +- src/runtime/virtcontainers/qemu_arch_base_test.go | 2 +- src/runtime/virtcontainers/qemu_arm64.go | 2 +- src/runtime/virtcontainers/qemu_arm64_test.go | 2 +- src/runtime/virtcontainers/qemu_ppc64le.go | 2 +- src/runtime/virtcontainers/qemu_ppc64le_test.go | 2 +- src/runtime/virtcontainers/qemu_s390x.go | 2 +- src/runtime/virtcontainers/qemu_s390x_test.go | 2 +- src/runtime/virtcontainers/qemu_test.go | 2 +- src/runtime/virtcontainers/tap_endpoint.go | 3 ++- src/runtime/virtcontainers/tuntap_endpoint.go | 3 ++- src/runtime/virtcontainers/veth_endpoint.go | 3 ++- src/runtime/virtcontainers/veth_endpoint_test.go | 3 ++- src/runtime/virtcontainers/vhostuser_endpoint.go | 3 ++- src/runtime/virtcontainers/vhostuser_endpoint_test.go | 3 ++- 37 files changed, 51 insertions(+), 38 deletions(-) diff --git a/src/runtime/pkg/govmm/qemu/qemu_arch_base_test.go b/src/runtime/pkg/govmm/qemu/qemu_arch_base_test.go index 4d343eedc..26b2ac547 100644 --- a/src/runtime/pkg/govmm/qemu/qemu_arch_base_test.go +++ b/src/runtime/pkg/govmm/qemu/qemu_arch_base_test.go @@ -1,3 +1,4 @@ +//go:build !s390x // +build !s390x // Copyright contributors to the Virtual Machine Manager for Go project diff --git a/src/runtime/pkg/govmm/vmm_s390x.go b/src/runtime/pkg/govmm/vmm_s390x.go index 50f1735dc..d6ef960a8 100644 --- a/src/runtime/pkg/govmm/vmm_s390x.go +++ b/src/runtime/pkg/govmm/vmm_s390x.go @@ -9,7 +9,7 @@ package govmm // MaxVCPUs returns the maximum number of vCPUs supported func MaxVCPUs() uint32 { // Max number of virtual Cpu defined in qemu. See - // https://github.com/qemu/qemu/blob/80422b00196a7af4c6efb628fae0ad8b644e98af/target/s390x/cpu.h#L55 - // #define S390_MAX_CPUS 248 - return uint32(248) + // https://github.com/qemu/qemu/blob/80422b00196a7af4c6efb628fae0ad8b644e98af/target/s390x/cpu.h#L55 + // #define S390_MAX_CPUS 248 + return uint32(248) } diff --git a/src/runtime/pkg/resourcecontrol/cgroups.go b/src/runtime/pkg/resourcecontrol/cgroups.go index 49c6562af..4210392d2 100644 --- a/src/runtime/pkg/resourcecontrol/cgroups.go +++ b/src/runtime/pkg/resourcecontrol/cgroups.go @@ -1,5 +1,6 @@ +//go:build linux // +build linux -// + // Copyright (c) 2021-2022 Apple Inc. // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/acrn.go b/src/runtime/virtcontainers/acrn.go index 5e709da42..6b2679201 100644 --- a/src/runtime/virtcontainers/acrn.go +++ b/src/runtime/virtcontainers/acrn.go @@ -1,6 +1,6 @@ //go:build linux // +build linux -// + // Copyright (c) 2019 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/acrn_arch_base.go b/src/runtime/virtcontainers/acrn_arch_base.go index 52f6ea339..ab547b6bd 100644 --- a/src/runtime/virtcontainers/acrn_arch_base.go +++ b/src/runtime/virtcontainers/acrn_arch_base.go @@ -1,6 +1,6 @@ //go:build linux // +build linux -// + // Copyright (c) 2019 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/acrn_test.go b/src/runtime/virtcontainers/acrn_test.go index 974b1fe1b..b92f35a2f 100644 --- a/src/runtime/virtcontainers/acrn_test.go +++ b/src/runtime/virtcontainers/acrn_test.go @@ -1,6 +1,6 @@ //go:build linux // +build linux -// + // Copyright (c) 2019 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 57cee5e20..805fda716 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -1,7 +1,6 @@ //go:build linux // +build linux -// // Copyright (c) 2019 Ericsson Eurolab Deutschland GmbH // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/clh_test.go b/src/runtime/virtcontainers/clh_test.go index ec73303d0..9bfd2e62e 100644 --- a/src/runtime/virtcontainers/clh_test.go +++ b/src/runtime/virtcontainers/clh_test.go @@ -1,6 +1,6 @@ //go:build linux // +build linux -// + // Copyright (c) 2019 Ericsson Eurolab Deutschland G.m.b.H. // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/fc.go b/src/runtime/virtcontainers/fc.go index 7fd7909f4..6137e32cf 100644 --- a/src/runtime/virtcontainers/fc.go +++ b/src/runtime/virtcontainers/fc.go @@ -1,7 +1,6 @@ //go:build linux // +build linux -// // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/fc_metrics.go b/src/runtime/virtcontainers/fc_metrics.go index 814d81936..0462542fd 100644 --- a/src/runtime/virtcontainers/fc_metrics.go +++ b/src/runtime/virtcontainers/fc_metrics.go @@ -1,6 +1,6 @@ //go:build linux // +build linux -// + // Copyright (c) 2020 Ant Group // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/fc_test.go b/src/runtime/virtcontainers/fc_test.go index 02d9d7144..f2b099f1d 100644 --- a/src/runtime/virtcontainers/fc_test.go +++ b/src/runtime/virtcontainers/fc_test.go @@ -1,6 +1,6 @@ //go:build linux // +build linux -// + // Copyright (c) 2019 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/ipvlan_endpoint.go b/src/runtime/virtcontainers/ipvlan_endpoint.go index f9196cda3..a257e6ecb 100644 --- a/src/runtime/virtcontainers/ipvlan_endpoint.go +++ b/src/runtime/virtcontainers/ipvlan_endpoint.go @@ -1,5 +1,6 @@ +//go:build linux // +build linux -// + // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/ipvlan_endpoint_test.go b/src/runtime/virtcontainers/ipvlan_endpoint_test.go index 0f26119b6..7fe33d166 100644 --- a/src/runtime/virtcontainers/ipvlan_endpoint_test.go +++ b/src/runtime/virtcontainers/ipvlan_endpoint_test.go @@ -1,5 +1,6 @@ +//go:build linux // +build linux -// + // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/macvlan_endpoint.go b/src/runtime/virtcontainers/macvlan_endpoint.go index 228ec3351..061a36f15 100644 --- a/src/runtime/virtcontainers/macvlan_endpoint.go +++ b/src/runtime/virtcontainers/macvlan_endpoint.go @@ -1,5 +1,6 @@ +//go:build linux // +build linux -// + // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/macvlan_endpoint_test.go b/src/runtime/virtcontainers/macvlan_endpoint_test.go index 2f421b6b7..896be917e 100644 --- a/src/runtime/virtcontainers/macvlan_endpoint_test.go +++ b/src/runtime/virtcontainers/macvlan_endpoint_test.go @@ -1,5 +1,6 @@ +//go:build linux // +build linux -// + // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/macvtap_endpoint.go b/src/runtime/virtcontainers/macvtap_endpoint.go index e64076341..1a2452cf1 100644 --- a/src/runtime/virtcontainers/macvtap_endpoint.go +++ b/src/runtime/virtcontainers/macvtap_endpoint.go @@ -1,5 +1,6 @@ +//go:build linux // +build linux -// + // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/macvtap_endpoint_test.go b/src/runtime/virtcontainers/macvtap_endpoint_test.go index 0f9283f79..6b7e00943 100644 --- a/src/runtime/virtcontainers/macvtap_endpoint_test.go +++ b/src/runtime/virtcontainers/macvtap_endpoint_test.go @@ -1,5 +1,6 @@ +//go:build linux // +build linux -// + // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/physical_endpoint.go b/src/runtime/virtcontainers/physical_endpoint.go index 48ff18219..71f67da8f 100644 --- a/src/runtime/virtcontainers/physical_endpoint.go +++ b/src/runtime/virtcontainers/physical_endpoint.go @@ -1,5 +1,6 @@ +//go:build linux // +build linux -// + // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/physical_endpoint_test.go b/src/runtime/virtcontainers/physical_endpoint_test.go index 7e823f6f7..cac41358f 100644 --- a/src/runtime/virtcontainers/physical_endpoint_test.go +++ b/src/runtime/virtcontainers/physical_endpoint_test.go @@ -1,5 +1,6 @@ +//go:build linux // +build linux -// + // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 67b24b9a4..0985380d8 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -1,7 +1,6 @@ //go:build linux // +build linux -// // Copyright (c) 2016 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/qemu_amd64.go b/src/runtime/virtcontainers/qemu_amd64.go index ef1e16adb..8e76bf55c 100644 --- a/src/runtime/virtcontainers/qemu_amd64.go +++ b/src/runtime/virtcontainers/qemu_amd64.go @@ -1,6 +1,6 @@ //go:build linux // +build linux -// + // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/qemu_amd64_test.go b/src/runtime/virtcontainers/qemu_amd64_test.go index 1071a7051..e8cf9fcd5 100644 --- a/src/runtime/virtcontainers/qemu_amd64_test.go +++ b/src/runtime/virtcontainers/qemu_amd64_test.go @@ -1,6 +1,6 @@ //go:build linux // +build linux -// + // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/qemu_arch_base.go b/src/runtime/virtcontainers/qemu_arch_base.go index 8820fd655..c5c5e8057 100644 --- a/src/runtime/virtcontainers/qemu_arch_base.go +++ b/src/runtime/virtcontainers/qemu_arch_base.go @@ -1,6 +1,6 @@ //go:build linux // +build linux -// + // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/qemu_arch_base_test.go b/src/runtime/virtcontainers/qemu_arch_base_test.go index f8d07bc58..ff20ba447 100644 --- a/src/runtime/virtcontainers/qemu_arch_base_test.go +++ b/src/runtime/virtcontainers/qemu_arch_base_test.go @@ -1,6 +1,6 @@ //go:build linux // +build linux -// + // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/qemu_arm64.go b/src/runtime/virtcontainers/qemu_arm64.go index fb0e7a9d7..f5af19e21 100644 --- a/src/runtime/virtcontainers/qemu_arm64.go +++ b/src/runtime/virtcontainers/qemu_arm64.go @@ -1,6 +1,6 @@ //go:build linux // +build linux -// + // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/qemu_arm64_test.go b/src/runtime/virtcontainers/qemu_arm64_test.go index 893c46e66..d78f4474b 100644 --- a/src/runtime/virtcontainers/qemu_arm64_test.go +++ b/src/runtime/virtcontainers/qemu_arm64_test.go @@ -1,6 +1,6 @@ //go:build linux // +build linux -// + // Copyright (c) 2018 IBM // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/qemu_ppc64le.go b/src/runtime/virtcontainers/qemu_ppc64le.go index 93c11416c..e78fcb701 100644 --- a/src/runtime/virtcontainers/qemu_ppc64le.go +++ b/src/runtime/virtcontainers/qemu_ppc64le.go @@ -1,6 +1,6 @@ //go:build linux // +build linux -// + // Copyright (c) 2018 IBM // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/qemu_ppc64le_test.go b/src/runtime/virtcontainers/qemu_ppc64le_test.go index 6c046b620..15e75a91b 100644 --- a/src/runtime/virtcontainers/qemu_ppc64le_test.go +++ b/src/runtime/virtcontainers/qemu_ppc64le_test.go @@ -1,6 +1,6 @@ //go:build linux // +build linux -// + // Copyright (c) 2018 IBM // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/qemu_s390x.go b/src/runtime/virtcontainers/qemu_s390x.go index 77b6f440b..32716e776 100644 --- a/src/runtime/virtcontainers/qemu_s390x.go +++ b/src/runtime/virtcontainers/qemu_s390x.go @@ -1,6 +1,6 @@ //go:build linux // +build linux -// + // Copyright (c) 2018 IBM // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/qemu_s390x_test.go b/src/runtime/virtcontainers/qemu_s390x_test.go index 07eaa8517..3d0c393a6 100644 --- a/src/runtime/virtcontainers/qemu_s390x_test.go +++ b/src/runtime/virtcontainers/qemu_s390x_test.go @@ -1,6 +1,6 @@ //go:build linux // +build linux -// + // Copyright (c) 2018 IBM // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/qemu_test.go b/src/runtime/virtcontainers/qemu_test.go index 985c15f1c..9069bb212 100644 --- a/src/runtime/virtcontainers/qemu_test.go +++ b/src/runtime/virtcontainers/qemu_test.go @@ -1,6 +1,6 @@ //go:build linux // +build linux -// + // Copyright (c) 2016 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/tap_endpoint.go b/src/runtime/virtcontainers/tap_endpoint.go index 75c6e3273..62b855382 100644 --- a/src/runtime/virtcontainers/tap_endpoint.go +++ b/src/runtime/virtcontainers/tap_endpoint.go @@ -1,5 +1,6 @@ +//go:build linux // +build linux -// + // Copyright (c) 2018 Huawei Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/tuntap_endpoint.go b/src/runtime/virtcontainers/tuntap_endpoint.go index 9d51f2f72..888e666ef 100644 --- a/src/runtime/virtcontainers/tuntap_endpoint.go +++ b/src/runtime/virtcontainers/tuntap_endpoint.go @@ -1,5 +1,6 @@ +//go:build linux // +build linux -// + // Copyright (c) 2018 Huawei Corporation // Copyright (c) 2019 Intel Corporation // diff --git a/src/runtime/virtcontainers/veth_endpoint.go b/src/runtime/virtcontainers/veth_endpoint.go index 2dd7a4b08..52a7ae479 100644 --- a/src/runtime/virtcontainers/veth_endpoint.go +++ b/src/runtime/virtcontainers/veth_endpoint.go @@ -1,5 +1,6 @@ +//go:build linux // +build linux -// + // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/veth_endpoint_test.go b/src/runtime/virtcontainers/veth_endpoint_test.go index c9295af97..c6ae2ce67 100644 --- a/src/runtime/virtcontainers/veth_endpoint_test.go +++ b/src/runtime/virtcontainers/veth_endpoint_test.go @@ -1,5 +1,6 @@ +//go:build linux // +build linux -// + // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/vhostuser_endpoint.go b/src/runtime/virtcontainers/vhostuser_endpoint.go index 8d317adf3..ecdfa8a5e 100644 --- a/src/runtime/virtcontainers/vhostuser_endpoint.go +++ b/src/runtime/virtcontainers/vhostuser_endpoint.go @@ -1,5 +1,6 @@ +//go:build linux // +build linux -// + // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/vhostuser_endpoint_test.go b/src/runtime/virtcontainers/vhostuser_endpoint_test.go index 783c646bc..c75930ff6 100644 --- a/src/runtime/virtcontainers/vhostuser_endpoint_test.go +++ b/src/runtime/virtcontainers/vhostuser_endpoint_test.go @@ -1,5 +1,6 @@ +//go:build linux // +build linux -// + // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 From edf20766d15d58972793620dcc16ee6139c668a6 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Mon, 28 Feb 2022 16:07:12 +0000 Subject: [PATCH 63/71] docs: Update Readme document This PR updates the README document by using the proper link for the contributing guide as well as a misspelling. Fixes #3791 Signed-off-by: Gabriela Cervantes --- docs/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/README.md b/docs/README.md index 5ccddda8b..973a00b83 100644 --- a/docs/README.md +++ b/docs/README.md @@ -21,9 +21,9 @@ See the [tracing documentation](tracing.md). * [Limitations](Limitations.md): differences and limitations compared with the default [Docker](https://www.docker.com/) runtime, [`runc`](https://github.com/opencontainers/runc). -### Howto guides +### How-to guides -See the [howto documentation](how-to). +See the [how-to documentation](how-to). ## Kata Use-Cases @@ -48,7 +48,7 @@ Documents that help to understand and contribute to Kata Containers. ### How to Contribute * [Developer Guide](Developer-Guide.md): Setup the Kata Containers developing environments -* [How to contribute to Kata Containers](https://github.com/kata-containers/community/blob/master/CONTRIBUTING.md) +* [How to contribute to Kata Containers](https://github.com/kata-containers/community/blob/main/CONTRIBUTING.md) * [Code of Conduct](../CODE_OF_CONDUCT.md) ## Help Writing a Code PR From 54d0a672c55e3e28eca20790aa4037472e1fef6e Mon Sep 17 00:00:00 2001 From: goodluckbot Date: Tue, 1 Mar 2022 01:18:35 +0800 Subject: [PATCH 64/71] subsystem: build With the ACPI PCI hotplug changes introduced in 2.3, QEMU >= 6.1 is required. Remove unnecessary qemu version check in build script. Fixes #3547 Signed-off-by: goodluckbot --- .../packaging/scripts/configure-hypervisor.sh | 31 ++++--------------- 1 file changed, 6 insertions(+), 25 deletions(-) diff --git a/tools/packaging/scripts/configure-hypervisor.sh b/tools/packaging/scripts/configure-hypervisor.sh index bfcc8ca44..4db23ae66 100755 --- a/tools/packaging/scripts/configure-hypervisor.sh +++ b/tools/packaging/scripts/configure-hypervisor.sh @@ -222,11 +222,6 @@ generate_qemu_options() { # Disabled options - # Disable sheepdog block driver support (deprecated in 5.2.0) - if ! gt_eq ${qemu_version} 5.2.0 ; then - qemu_options+=(size:--disable-sheepdog) - fi - # Disable block migration in the main migration stream qemu_options+=(size:--disable-live-block-migration) @@ -312,9 +307,7 @@ generate_qemu_options() { # Disable libudev since it is only needed for qemu-pr-helper and USB, # none of which are used with Kata - if gt_eq "${qemu_version}" "5.2.0" ; then - qemu_options+=(size:--disable-libudev) - fi + qemu_options+=(size:--disable-libudev) # Disallow network downloads qemu_options+=(security:--disable-curl) @@ -331,10 +324,8 @@ generate_qemu_options() { # But since QEMU 5.2 the daemon is built as part of the tools set # (disabled with --disable-tools) thus it needs to be explicitely # enabled. - if gt_eq "${qemu_version}" "5.2.0" ; then - qemu_options+=(functionality:--enable-virtiofsd) - qemu_options+=(functionality:--enable-virtfs) - fi + qemu_options+=(functionality:--enable-virtiofsd) + qemu_options+=(functionality:--enable-virtfs) # Don't build linux-user bsd-user qemu_options+=(size:--disable-bsd-user) @@ -385,12 +376,6 @@ generate_qemu_options() { qemu_options+=(size:--disable-dmg) qemu_options+=(size:--disable-parallels) - # vxhs was deprecated on QEMU 5.1 so it doesn't need to be - # explicitly disabled. - if ! gt_eq "${qemu_version}" "5.1.0" ; then - qemu_options+=(size:--disable-vxhs) - fi - #--------------------------------------------------------------------- # Enabled options @@ -449,11 +434,7 @@ generate_qemu_options() { # compile with high level of optimisation # On version 5.2.0 onward the Meson build system warns to not use -O3 - if ! gt_eq "${qemu_version}" "5.2.0" ; then - _qemu_cflags+=" -O3" - else - _qemu_cflags+=" -O2" - fi + _qemu_cflags+=" -O2" # Improve code quality by assuming identical semantics for interposed # synmbols. @@ -544,8 +525,8 @@ main() { [ -n "${qemu_version}" ] || die "cannot determine qemu version from file $qemu_version_file" - if ! gt_eq "${qemu_version}" "5.0.0" ; then - die "Kata requires QEMU >= 5.0.0" + if ! gt_eq "${qemu_version}" "6.1.0" ; then + die "Kata requires QEMU >= 6.1.0" fi local gcc_version_major=$(gcc -dumpversion | cut -f1 -d.) From f31125fe92f056fda48f5226aebf9d295ffc640c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Mon, 28 Feb 2022 17:35:35 +0100 Subject: [PATCH 65/71] version: Bump cloud-hypervisor to b0324f85571c441f MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This bump brings a fix on the build script, for ARM, so we can use the very same build script everywhere. The commit of our interest is b0324f85571c441f840e9bdeb25410514a00bb74: ``` scripts: Fix musl build on aarch64 Adding the missing TARGET_CC environment variable to get the build to complete correctly. Fixes #3776 Signed-off-by: Sebastien Boeuf Signed-off-by: Fabiano FidĂȘncio --- versions.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/versions.yaml b/versions.yaml index c1b10ce89..50d07d23c 100644 --- a/versions.yaml +++ b/versions.yaml @@ -75,7 +75,7 @@ assets: url: "https://github.com/cloud-hypervisor/cloud-hypervisor" uscan-url: >- https://github.com/cloud-hypervisor/cloud-hypervisor/tags.*/v?(\d\S+)\.tar\.gz - version: "5343e09e7b8dbd5dd8ac0d90a3ad52037490dd86" + version: "b0324f85571c441f840e9bdeb25410514a00bb74" firecracker: description: "Firecracker micro-VMM" From a67b93bb035f25f99cbfdf2b2ef99a0c882c2af0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Sun, 20 Feb 2022 20:16:30 +0100 Subject: [PATCH 66/71] snap: clh: Re-use kata-deploy script here MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current snap build for clh is broken as it's not aware of how to build the binary from sources. Instead of fixing it here, let's take advantage of the kata-deploy script, which is capable of building from sources, and re-use it here. Fixes: #3693 Signed-off-by: Fabiano FidĂȘncio --- snap/snapcraft.yaml | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/snap/snapcraft.yaml b/snap/snapcraft.yaml index 4f90329c1..173c0f7b7 100644 --- a/snap/snapcraft.yaml +++ b/snap/snapcraft.yaml @@ -320,17 +320,20 @@ parts: plugin: nil after: [godeps] override-build: | - export GOPATH=${SNAPCRAFT_STAGE}/gopath - yq=${SNAPCRAFT_STAGE}/yq - kata_dir=${GOPATH}/src/github.com/${SNAPCRAFT_PROJECT_NAME}/${SNAPCRAFT_PROJECT_NAME} - versions_file="${kata_dir}/versions.yaml" - version="$(${yq} r ${versions_file} assets.hypervisor.cloud_hypervisor.version)" - url="https://github.com/cloud-hypervisor/cloud-hypervisor/releases/download/${version}" - curl -L ${url}/cloud-hypervisor-static -o cloud-hypervisor - curl -LO ${url}/clh-remote + sudo apt-get -y update + sudo apt-get -y install ca-certificates curl gnupg lsb-release + curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --batch --yes --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg + echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + sudo apt-get -y update + sudo apt-get -y install docker-ce docker-ce-cli containerd.io + sudo systemctl start docker.socket - install -D cloud-hypervisor ${SNAPCRAFT_PART_INSTALL}/usr/bin/cloud-hypervisor - install -D clh-remote ${SNAPCRAFT_PART_INSTALL}/usr/bin/clh-remote + export GOPATH=${SNAPCRAFT_STAGE}/gopath + kata_dir=${GOPATH}/src/github.com/${SNAPCRAFT_PROJECT_NAME}/${SNAPCRAFT_PROJECT_NAME} + cd ${kata_dir} + sudo -E NO_TTY=true make cloud-hypervisor-tarball + tar xvJpf build/kata-static-cloud-hypervisor.tar.xz -C /tmp/ + install -D /tmp/opt/kata/bin/cloud-hypervisor ${SNAPCRAFT_PART_INSTALL}/usr/bin/cloud-hypervisor apps: runtime: From e6350d3d45096030f43b4d38d768291a56d2214c Mon Sep 17 00:00:00 2001 From: "James O. D. Hunt" Date: Mon, 28 Feb 2022 16:52:30 +0000 Subject: [PATCH 67/71] monitor: Fix build options Removed redundant and duplicated build options to build `kata-monitor` the same way as the other components: - `CGO_ENABLED=0` is not necessary. - `-buildmode=exe` is not necessary since `BUILDFLAGS` already sets the build mode. Signed-off-by: James O. D. Hunt --- src/runtime/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/runtime/Makefile b/src/runtime/Makefile index 73805741e..9446c1dc1 100644 --- a/src/runtime/Makefile +++ b/src/runtime/Makefile @@ -552,8 +552,8 @@ $(SHIMV2_OUTPUT): $(SOURCES) $(GENERATED_FILES) $(MAKEFILE_LIST) $(QUIET_BUILD)(cd $(SHIMV2_DIR)/ && go build -ldflags "$(KATA_LDFLAGS)" $(BUILDFLAGS) -o $@ .) $(MONITOR_OUTPUT): $(SOURCES) $(GENERATED_FILES) $(MAKEFILE_LIST) .git-commit - $(QUIET_BUILD)(cd $(MONITOR_DIR)/ && CGO_ENABLED=0 go build \ - --ldflags "-X main.GitCommit=$(shell cat .git-commit)" $(BUILDFLAGS) -buildmode=exe -o $@ .) + $(QUIET_BUILD)(cd $(MONITOR_DIR)/ && go build \ + --ldflags "-X main.GitCommit=$(shell cat .git-commit)" $(BUILDFLAGS) -o $@ .) .PHONY: \ check \ From e64c54a2ad766d034a853b4a23e0b8f39d5a8d4c Mon Sep 17 00:00:00 2001 From: "James O. D. Hunt" Date: Mon, 28 Feb 2022 17:15:31 +0000 Subject: [PATCH 68/71] monitor: Listen to localhost only by default Change `kata-monitor` to listen to port `8090` on the local interface only by default. > **Note:** > > This is a breaking change as previously it listened on all interfaces. Fixes: #3795. Signed-off-by: James O. D. Hunt --- src/runtime/cmd/kata-monitor/main.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/runtime/cmd/kata-monitor/main.go b/src/runtime/cmd/kata-monitor/main.go index c0001cdc2..a3fc86cfd 100644 --- a/src/runtime/cmd/kata-monitor/main.go +++ b/src/runtime/cmd/kata-monitor/main.go @@ -18,7 +18,9 @@ import ( "github.com/sirupsen/logrus" ) -var monitorListenAddr = flag.String("listen-address", ":8090", "The address to listen on for HTTP requests.") +const defaultListenAddress = "127.0.0.1:8090" + +var monitorListenAddr = flag.String("listen-address", defaultListenAddress, "The address to listen on for HTTP requests.") var runtimeEndpoint = flag.String("runtime-endpoint", "/run/containerd/containerd.sock", `Endpoint of CRI container runtime service. (default: "/run/containerd/containerd.sock")`) var logLevel = flag.String("log-level", "info", "Log level of logrus(trace/debug/info/warn/error/fatal/panic).") From b27c7f4068bccd6593e7d9197e6dc1c9b2ffc837 Mon Sep 17 00:00:00 2001 From: "James O. D. Hunt" Date: Mon, 28 Feb 2022 08:46:39 +0000 Subject: [PATCH 69/71] docs: Add unit testing presentation Add the Kata Containers unit testing presentation I gave to the Kata outreach students as this may be of some use to others. Fixes: #3781 Signed-off-by: James O. D. Hunt --- docs/README.md | 5 + docs/presentations/README.md | 3 + docs/presentations/unit-testing/README.md | 14 + .../kata-containers-unit-testing.md | 335 ++++++++++++++++++ 4 files changed, 357 insertions(+) create mode 100644 docs/presentations/README.md create mode 100644 docs/presentations/unit-testing/README.md create mode 100644 docs/presentations/unit-testing/kata-containers-unit-testing.md diff --git a/docs/README.md b/docs/README.md index 973a00b83..529d772d9 100644 --- a/docs/README.md +++ b/docs/README.md @@ -58,6 +58,7 @@ Documents that help to understand and contribute to Kata Containers. ## Help Writing Unit Tests * [Unit Test Advice](Unit-Test-Advice.md) +* [Unit testing presentation](presentations/unit-testing/kata-containers-unit-testing.md) ## Help Improving the Documents @@ -72,6 +73,10 @@ Documents that help to understand and contribute to Kata Containers. * [Release strategy](Stable-Branch-Strategy.md) * [Release Process](Release-Process.md) +## Presentations + +* [Presentations](presentations) + ## Website Changes If you have a suggestion for how we can improve the diff --git a/docs/presentations/README.md b/docs/presentations/README.md new file mode 100644 index 000000000..6219c2b5f --- /dev/null +++ b/docs/presentations/README.md @@ -0,0 +1,3 @@ +# Kata Containers presentations + +* [Unit testing](unit-testing) diff --git a/docs/presentations/unit-testing/README.md b/docs/presentations/unit-testing/README.md new file mode 100644 index 000000000..658d89dc9 --- /dev/null +++ b/docs/presentations/unit-testing/README.md @@ -0,0 +1,14 @@ +# Kata Containers unit testing presentation + +## Markdown version + +See [the Kata Containers unit testing presentation](kata-containers-unit-testing.md). + +### To view as an HTML presentation + +```bash +$ infile="kata-containers-unit-testing.md" +$ outfile="/tmp/kata-containers-unit-testing.html" +$ pandoc -s --metadata title="Kata Containers unit testing" -f markdown -t revealjs --highlight-style="zenburn" -i -o "$outfile" "$infile" +$ xdg-open "file://$outfile" +``` diff --git a/docs/presentations/unit-testing/kata-containers-unit-testing.md b/docs/presentations/unit-testing/kata-containers-unit-testing.md new file mode 100644 index 000000000..52ea699a1 --- /dev/null +++ b/docs/presentations/unit-testing/kata-containers-unit-testing.md @@ -0,0 +1,335 @@ +## Why write unit tests? + +- Catch regressions + +- Improve the code being tested + + Structure, quality, security, performance, "shakes out" implicit + assumptions, _etc_ + +- Extremely instructive + + Once you've fully tested a single function, you'll understand that + code very well indeed. + +## Why write unit tests? (continued) + +- Fun! + + Yes, really! Don't believe me? Try it! ;) + +## Run all Kata Containers agent unit tests + +As an example, to run all agent unit tests: + +```bash +$ cd $GOPATH/src/github.com/kata-containers/kata-containers +$ cd src/agent +$ make test +``` + +## List all unit tests + +- Identify the full name of all the tests _in the current package_: + + ```bash + $ cargo test -- --list + ``` + +- Identify the full name of all tests in the `foo` "local crate" + (sub-directory containing another `Cargo.toml` file): + + ```bash + $ cargo test -p "foo" -- --list + ``` + +## Run a single unit test + +- Run a test in the current package in verbose mode: + + ```bash + # Example + $ test="config::tests::test_get_log_level" + + $ cargo test "$test" -vv -- --exact --nocapture + ``` + +## Test coverage setup + +```bash +$ cargo install cargo-tarpaulin +``` + +## Show test coverage + +```bash +$ cd $GOPATH/src/github.com/kata-containers/kata-containers/src/agent +$ cargo -v tarpaulin --all-features --run-types AllTargets --count --force-clean -o Html +$ xdg-open "file://$PWD/tarpaulin-report.html" +``` + +## Testability (part 1) + +- To be testable, a function should: + - Not be "too long" (say >100 lines). + - Not be "too complex" (say >3 levels of indentation). + - Should return a `Result` or an `Option` so error paths + can be tested. + +- If functions don't conform, they need to be reworked (refactored) + before writing tests. + +## Testability (part 2) + +- Some functions can't be fully tested. +- However, you _can_ test the initial code that checks + the parameter values (test error paths only). + +## Writing new tests: General advice (part 1) + +- KISS: Keep It Simple Stupid + + You don't get extra points for cryptic code. + +- DRY: Don't Repeat Yourself + + Make use of existing facilities (don't "re-invert the wheel"). + +- Read the [unit test advice document](https://github.com/kata-containers/kata-containers/blob/main/docs/Unit-Test-Advice.md) + +## Writing new tests: General advice (part 2) + +- Attack the function in all possible ways + +- Use the _table driven_ approach: + - Simple + - Compact + - Easy to debug + - Makes boundary analysis easy + - Encourages functions to be testable + +## Writing new tests: Specific advice (part 1) + +- Create a new "`tests`" module if necessary. +- Give each test function a "`test_`" prefix. +- Add the "`#[test]`" annotation on each test function. + +## Writing new tests: Specific advice (part 2) + +- If you need to `use` (import) packages for the tests, + _only do it in the `tests` module_: + ```rust + use some_test_pkg::{foo, bar}; // <-- Not here + + #[cfg(test)] + mod tests { + use super::*; + use some_test_pkg:{foo, bar}; // <-- Put it here + } + ``` + +## Writing new tests: Specific advice (part 3) + +- You can add test-specific dependencies in `Cargo.toml`: + ```toml + [dev-dependencies] + serial_test = "0.5.1" + ``` + +## Writing new tests: Specific advice (part 4) + +- Don't add in lots of error handling code: let the test panic! + ```rust + // This will panic if the unwrap fails. + // - NOT acceptable generally for production code. + // - PERFECTLY acceptable for test code since: + // - Keeps the test code simple. + // - Rust will detect the panic and fail the test. + let result = func().unwrap(); + ``` + +## Debugging tests (part 1) + +- Comment out all tests in your `TestData` array apart from the failing test. + +- Add temporary `println!("FIXME: ...")` statements in the code. + +- Set `RUST_BACKTRACE=full` before running `cargo test`. + +## Debugging tests (part 2) + +- Use a debugger (not normally necessary though): + ```bash + # Disable optimisation + $ RUSTFLAGS="-C opt-level=0" cargo test --no-run + + # Find the test binary + $ test_binary=$(find target/debug/deps | grep "kata_agent-[a-z0-9][a-z0-9]*$" | tail -1) + + $ rust-gdb "$test_binary" + ``` + +## Useful tips + +- Always start a test with a "clean environment": + + Create new set of objects / files / directories / _etc_ + for each test. + +- Mounts + - Linux allows mounts on top of existing mounts. + - Bind mounts and read-only mounts can be useful. + +## Gotchas (part 1) + +If a test runs successfully _most of the time_: + +- Review the test logic. + +- Add a `#[serial]` annotation on the test function + Requires the `serial_test` package in the `[dev-dependencies]` + section of `Cargo.toml`. + + If this makes it work the test is probably sharing resources with + another task (thread). + +## Gotchas (part 2) + +If a test works locally but fails in the CI, consider the following +attributes of each environment (local and CI): + +- The version of rust being used. +- The hardware architecture. +- Number (and spec) of the CPUs. + +## Gotchas (part 3) + +If in doubt, look at the +["test artifacts" attached to the failing CI test](http://jenkins.katacontainers.io). + +## Before raising a PR + +- Remember to check that the test runs locally: + - As a non-privileged user. + - As the `root` user (carefully!) + +- Run the [static checker](https://github.com/kata-containers/tests/blob/main/.ci/static-checks.sh) + on your changes. + + Checks formatting and many other things. + +## If in doubt + +- Ask for help! ;) + +## Quiz 1 + +What's wrong with this function? + +```rust +fn foo(config: &Config, path_prefix: String, container_id: String, pid: String) -> Result<()> { + let mut full_path = format!("{}/{}", path_prefix, container_id); + + let _ = remove_recursively(&mut full_path); + + write_number_to_file(pid, full_path); + + Ok(()) +} +``` + +## Quiz 1: Answers (part 1) + +- No check that `path_prefix`, `container_id` and `pid` are not `""`. +- No check that `path_prefix` is absolute. +- No check that `container_id` does not contain slashes / contains only valid characters. +- Result of `remove_recursively()` discarded. +- `remove_recursively()` _may_ modify `full_path` without `foo()` knowing! + +## Quiz 1: Answers (part 2) + +- Why is `pid` not a numeric? +- No check to ensure the PID is positive. +- No check to recreate any directories in the original `path_prefix`. +- `write_number_to_file()` could fail so why doesn't it return a value? +- The `config` parameter is unused. + +## Quiz 1: What if... + +Imagine if the caller managed to do this: + +```rust +foo(config, "", "sbin/init", r#"#!/bin/sh\n/sbin/reboot"#); +``` + +## Quiz 2 + +What makes this function difficult to test? + +```rust +fn get_user_id(username: String) -> i32 { + let line = grep_file(username, "/etc/passwd").unwrap(); + let fields = line.split(':'); + + let uid = fields.nth(2).ok_or("failed").unwrap(); + + uid.parse::() +} +``` + +## Quiz 2: Answers (part 1) + +- Unhelpful error message ("failed"). + +- Panics on error! Return a `Result` instead! + +- UID's cannot be negative so function should return an unsigned + value. + +## Quiz 2: Answers (part 2) + +- Hard-coded filename. + + This would be better: + + ```rust + const PASSWD_DB: &str = "/etc/passwd"; + + // Test code can now pass valid and invalid files! + fn get_user_id(filename: String, username: String) -> i32 { + // ... + } + + let id = get_user_id(PASSWD_DB, username); + ``` + +## Quiz 3 + +What's wrong with this test code? + +```rust +let mut obj = Object::new(); + +// Sanity check +assert_eq!(obj.num, 0); +assert_eq!(obj.wibble, false); + +// Test 1 +obj->foo_method(7); +assert_eq!(obj.num, 7); + +// Test 2 +obj->bar_method(true); +assert_eq!(obj.wibble, true); +``` + +## Quiz 3: Answers + +- The test code is "fragile": + - The 2nd test re-uses the object created in the first test. + +## Finally + +- [We need a GH action to run the unit tests](https://github.com/kata-containers/kata-containers/issues/2934) + + Needs to fail PRs that decrease test coverage
by "x%". From fe52465bdb91e7c4930332e05830a128e7e324a7 Mon Sep 17 00:00:00 2001 From: stevenhorsman Date: Wed, 2 Mar 2022 17:05:00 +0000 Subject: [PATCH 70/71] doc: Fix typo Fix typo in script Signed-off-by: stevenhorsman --- docs/how-to/ccv0.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/how-to/ccv0.sh b/docs/how-to/ccv0.sh index a39468884..f18739dbf 100755 --- a/docs/how-to/ccv0.sh +++ b/docs/how-to/ccv0.sh @@ -445,8 +445,8 @@ EOF crictl_delete_cc fi - $(sudo crictl runp -r kata ${HOME}/pod-config.yaml) - sudo crictl pods + local pod_id=$(sudo crictl runp -r kata ${HOME}/pod-config.yaml) + sudo crictl pods ${pod_id} } crictl_create_cc_container() { From 39d6b826c15ffe8abe0e9974340257bcf657d5a1 Mon Sep 17 00:00:00 2001 From: stevenhorsman Date: Wed, 2 Mar 2022 17:04:33 +0000 Subject: [PATCH 71/71] runtime: Add cc compatibility to refactor The create rootfs code has been refactored, try and add back in the CC behaviour Signed-off-by: stevenhorsman --- src/runtime/virtcontainers/fs_share_linux.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/runtime/virtcontainers/fs_share_linux.go b/src/runtime/virtcontainers/fs_share_linux.go index 6dea74a45..cec3d154e 100644 --- a/src/runtime/virtcontainers/fs_share_linux.go +++ b/src/runtime/virtcontainers/fs_share_linux.go @@ -371,15 +371,20 @@ func (f *FilesystemShare) shareRootFilesystemWithNydus(ctx context.Context, c *C //func (c *Container) shareRootfs(ctx context.Context) (*grpc.Storage, string, error) { func (f *FilesystemShare) ShareRootFilesystem(ctx context.Context, c *Container) (*SharedFile, error) { + rootfsGuestPath := filepath.Join(kataGuestSharedDir(), c.id, c.rootfsSuffix) + // In the confidential computing, there is no Image information on the host, // so there is no Rootfs.Target. if f.sandbox.config.ServiceOffload && c.rootFs.Target == "" { - return nil, nil + return &SharedFile{ + storage: nil, + guestPath: rootfsGuestPath, + }, nil } + if c.rootFs.Type == NydusRootFSType { return f.shareRootFilesystemWithNydus(ctx, c) } - rootfsGuestPath := filepath.Join(kataGuestSharedDir(), c.id, c.rootfsSuffix) if c.state.Fstype != "" && c.state.BlockDeviceID != "" { // The rootfs storage volume represents the container rootfs