diff --git a/.github/workflows/darwin-tests.yaml b/.github/workflows/darwin-tests.yaml new file mode 100644 index 000000000..5a83add32 --- /dev/null +++ b/.github/workflows/darwin-tests.yaml @@ -0,0 +1,25 @@ +on: + pull_request: + types: + - opened + - edited + - reopened + - synchronize + +name: Darwin tests +jobs: + test: + strategy: + matrix: + go-version: [1.16.x, 1.17.x] + os: [macos-latest] + runs-on: ${{ matrix.os }} + steps: + - name: Install Go + uses: actions/setup-go@v2 + with: + go-version: ${{ matrix.go-version }} + - name: Checkout code + uses: actions/checkout@v2 + - name: Build utils + run: ./ci/darwin-test.sh diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 8a3af744a..c14210fdb 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,4 +2,4 @@ ## This repo is part of [Kata Containers](https://katacontainers.io) -For details on how to contribute to the Kata Containers project, please see the main [contributing document](https://github.com/kata-containers/community/blob/master/CONTRIBUTING.md). \ No newline at end of file +For details on how to contribute to the Kata Containers project, please see the main [contributing document](https://github.com/kata-containers/community/blob/main/CONTRIBUTING.md). diff --git a/ci/darwin-test.sh b/ci/darwin-test.sh new file mode 100755 index 000000000..92317653b --- /dev/null +++ b/ci/darwin-test.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2022 Apple Inc. +# +# SPDX-License-Identifier: Apache-2.0 + +set -e + +cidir=$(dirname "$0") +runtimedir=$cidir/../src/runtime + +build_working_packages() { + # working packages: + device_api=$runtimedir/virtcontainers/device/api + device_config=$runtimedir/virtcontainers/device/config + device_drivers=$runtimedir/virtcontainers/device/drivers + device_manager=$runtimedir/virtcontainers/device/manager + rc_pkg_dir=$runtimedir/pkg/resourcecontrol/ + utils_pkg_dir=$runtimedir/virtcontainers/utils + + # broken packages :( : + #katautils=$runtimedir/pkg/katautils + #oci=$runtimedir/pkg/oci + #vc=$runtimedir/virtcontainers + + pkgs=( + "$device_api" + "$device_config" + "$device_drivers" + "$device_manager" + "$utils_pkg_dir" + "$rc_pkg_dir") + for pkg in "${pkgs[@]}"; do + echo building "$pkg" + pushd "$pkg" &>/dev/null + go build + go test + popd &>/dev/null + done +} + +build_working_packages diff --git a/docs/Developer-Guide.md b/docs/Developer-Guide.md index f4ec4ff7b..8435c23da 100644 --- a/docs/Developer-Guide.md +++ b/docs/Developer-Guide.md @@ -212,11 +212,13 @@ $ sudo systemctl restart systemd-journald > > - You should only do this step if you are testing with the latest version of the agent. -The rust-agent is built with a static linked `musl.` To configure this: +The agent is built with a statically linked `musl.` The default `libc` used is `musl`, but on `ppc64le` and `s390x`, `gnu` should be used. To configure this: ``` -rustup target add x86_64-unknown-linux-musl -sudo ln -s /usr/bin/g++ /bin/musl-g++ +$ export ARCH=$(uname -m) +$ if [ "$ARCH" = "ppc64le" -o "$ARCH" = "s390x" ]; then export LIBC=gnu; else export LIBC=musl; fi +$ [ ${ARCH} == "ppc64le" ] && export ARCH=powerpc64le +$ rustup target add ${ARCH}-unknown-linux-${LIBC} ``` To build the agent: diff --git a/docs/README.md b/docs/README.md index 5ccddda8b..529d772d9 100644 --- a/docs/README.md +++ b/docs/README.md @@ -21,9 +21,9 @@ See the [tracing documentation](tracing.md). * [Limitations](Limitations.md): differences and limitations compared with the default [Docker](https://www.docker.com/) runtime, [`runc`](https://github.com/opencontainers/runc). -### Howto guides +### How-to guides -See the [howto documentation](how-to). +See the [how-to documentation](how-to). ## Kata Use-Cases @@ -48,7 +48,7 @@ Documents that help to understand and contribute to Kata Containers. ### How to Contribute * [Developer Guide](Developer-Guide.md): Setup the Kata Containers developing environments -* [How to contribute to Kata Containers](https://github.com/kata-containers/community/blob/master/CONTRIBUTING.md) +* [How to contribute to Kata Containers](https://github.com/kata-containers/community/blob/main/CONTRIBUTING.md) * [Code of Conduct](../CODE_OF_CONDUCT.md) ## Help Writing a Code PR @@ -58,6 +58,7 @@ Documents that help to understand and contribute to Kata Containers. ## Help Writing Unit Tests * [Unit Test Advice](Unit-Test-Advice.md) +* [Unit testing presentation](presentations/unit-testing/kata-containers-unit-testing.md) ## Help Improving the Documents @@ -72,6 +73,10 @@ Documents that help to understand and contribute to Kata Containers. * [Release strategy](Stable-Branch-Strategy.md) * [Release Process](Release-Process.md) +## Presentations + +* [Presentations](presentations) + ## Website Changes If you have a suggestion for how we can improve the diff --git a/docs/how-to/ccv0.sh b/docs/how-to/ccv0.sh index a39468884..f18739dbf 100755 --- a/docs/how-to/ccv0.sh +++ b/docs/how-to/ccv0.sh @@ -445,8 +445,8 @@ EOF crictl_delete_cc fi - $(sudo crictl runp -r kata ${HOME}/pod-config.yaml) - sudo crictl pods + local pod_id=$(sudo crictl runp -r kata ${HOME}/pod-config.yaml) + sudo crictl pods ${pod_id} } crictl_create_cc_container() { diff --git a/docs/presentations/README.md b/docs/presentations/README.md new file mode 100644 index 000000000..6219c2b5f --- /dev/null +++ b/docs/presentations/README.md @@ -0,0 +1,3 @@ +# Kata Containers presentations + +* [Unit testing](unit-testing) diff --git a/docs/presentations/unit-testing/README.md b/docs/presentations/unit-testing/README.md new file mode 100644 index 000000000..658d89dc9 --- /dev/null +++ b/docs/presentations/unit-testing/README.md @@ -0,0 +1,14 @@ +# Kata Containers unit testing presentation + +## Markdown version + +See [the Kata Containers unit testing presentation](kata-containers-unit-testing.md). + +### To view as an HTML presentation + +```bash +$ infile="kata-containers-unit-testing.md" +$ outfile="/tmp/kata-containers-unit-testing.html" +$ pandoc -s --metadata title="Kata Containers unit testing" -f markdown -t revealjs --highlight-style="zenburn" -i -o "$outfile" "$infile" +$ xdg-open "file://$outfile" +``` diff --git a/docs/presentations/unit-testing/kata-containers-unit-testing.md b/docs/presentations/unit-testing/kata-containers-unit-testing.md new file mode 100644 index 000000000..52ea699a1 --- /dev/null +++ b/docs/presentations/unit-testing/kata-containers-unit-testing.md @@ -0,0 +1,335 @@ +## Why write unit tests? + +- Catch regressions + +- Improve the code being tested + + Structure, quality, security, performance, "shakes out" implicit + assumptions, _etc_ + +- Extremely instructive + + Once you've fully tested a single function, you'll understand that + code very well indeed. + +## Why write unit tests? (continued) + +- Fun! + + Yes, really! Don't believe me? Try it! ;) + +## Run all Kata Containers agent unit tests + +As an example, to run all agent unit tests: + +```bash +$ cd $GOPATH/src/github.com/kata-containers/kata-containers +$ cd src/agent +$ make test +``` + +## List all unit tests + +- Identify the full name of all the tests _in the current package_: + + ```bash + $ cargo test -- --list + ``` + +- Identify the full name of all tests in the `foo` "local crate" + (sub-directory containing another `Cargo.toml` file): + + ```bash + $ cargo test -p "foo" -- --list + ``` + +## Run a single unit test + +- Run a test in the current package in verbose mode: + + ```bash + # Example + $ test="config::tests::test_get_log_level" + + $ cargo test "$test" -vv -- --exact --nocapture + ``` + +## Test coverage setup + +```bash +$ cargo install cargo-tarpaulin +``` + +## Show test coverage + +```bash +$ cd $GOPATH/src/github.com/kata-containers/kata-containers/src/agent +$ cargo -v tarpaulin --all-features --run-types AllTargets --count --force-clean -o Html +$ xdg-open "file://$PWD/tarpaulin-report.html" +``` + +## Testability (part 1) + +- To be testable, a function should: + - Not be "too long" (say >100 lines). + - Not be "too complex" (say >3 levels of indentation). + - Should return a `Result` or an `Option` so error paths + can be tested. + +- If functions don't conform, they need to be reworked (refactored) + before writing tests. + +## Testability (part 2) + +- Some functions can't be fully tested. +- However, you _can_ test the initial code that checks + the parameter values (test error paths only). + +## Writing new tests: General advice (part 1) + +- KISS: Keep It Simple Stupid + + You don't get extra points for cryptic code. + +- DRY: Don't Repeat Yourself + + Make use of existing facilities (don't "re-invert the wheel"). + +- Read the [unit test advice document](https://github.com/kata-containers/kata-containers/blob/main/docs/Unit-Test-Advice.md) + +## Writing new tests: General advice (part 2) + +- Attack the function in all possible ways + +- Use the _table driven_ approach: + - Simple + - Compact + - Easy to debug + - Makes boundary analysis easy + - Encourages functions to be testable + +## Writing new tests: Specific advice (part 1) + +- Create a new "`tests`" module if necessary. +- Give each test function a "`test_`" prefix. +- Add the "`#[test]`" annotation on each test function. + +## Writing new tests: Specific advice (part 2) + +- If you need to `use` (import) packages for the tests, + _only do it in the `tests` module_: + ```rust + use some_test_pkg::{foo, bar}; // <-- Not here + + #[cfg(test)] + mod tests { + use super::*; + use some_test_pkg:{foo, bar}; // <-- Put it here + } + ``` + +## Writing new tests: Specific advice (part 3) + +- You can add test-specific dependencies in `Cargo.toml`: + ```toml + [dev-dependencies] + serial_test = "0.5.1" + ``` + +## Writing new tests: Specific advice (part 4) + +- Don't add in lots of error handling code: let the test panic! + ```rust + // This will panic if the unwrap fails. + // - NOT acceptable generally for production code. + // - PERFECTLY acceptable for test code since: + // - Keeps the test code simple. + // - Rust will detect the panic and fail the test. + let result = func().unwrap(); + ``` + +## Debugging tests (part 1) + +- Comment out all tests in your `TestData` array apart from the failing test. + +- Add temporary `println!("FIXME: ...")` statements in the code. + +- Set `RUST_BACKTRACE=full` before running `cargo test`. + +## Debugging tests (part 2) + +- Use a debugger (not normally necessary though): + ```bash + # Disable optimisation + $ RUSTFLAGS="-C opt-level=0" cargo test --no-run + + # Find the test binary + $ test_binary=$(find target/debug/deps | grep "kata_agent-[a-z0-9][a-z0-9]*$" | tail -1) + + $ rust-gdb "$test_binary" + ``` + +## Useful tips + +- Always start a test with a "clean environment": + + Create new set of objects / files / directories / _etc_ + for each test. + +- Mounts + - Linux allows mounts on top of existing mounts. + - Bind mounts and read-only mounts can be useful. + +## Gotchas (part 1) + +If a test runs successfully _most of the time_: + +- Review the test logic. + +- Add a `#[serial]` annotation on the test function + Requires the `serial_test` package in the `[dev-dependencies]` + section of `Cargo.toml`. + + If this makes it work the test is probably sharing resources with + another task (thread). + +## Gotchas (part 2) + +If a test works locally but fails in the CI, consider the following +attributes of each environment (local and CI): + +- The version of rust being used. +- The hardware architecture. +- Number (and spec) of the CPUs. + +## Gotchas (part 3) + +If in doubt, look at the +["test artifacts" attached to the failing CI test](http://jenkins.katacontainers.io). + +## Before raising a PR + +- Remember to check that the test runs locally: + - As a non-privileged user. + - As the `root` user (carefully!) + +- Run the [static checker](https://github.com/kata-containers/tests/blob/main/.ci/static-checks.sh) + on your changes. + + Checks formatting and many other things. + +## If in doubt + +- Ask for help! ;) + +## Quiz 1 + +What's wrong with this function? + +```rust +fn foo(config: &Config, path_prefix: String, container_id: String, pid: String) -> Result<()> { + let mut full_path = format!("{}/{}", path_prefix, container_id); + + let _ = remove_recursively(&mut full_path); + + write_number_to_file(pid, full_path); + + Ok(()) +} +``` + +## Quiz 1: Answers (part 1) + +- No check that `path_prefix`, `container_id` and `pid` are not `""`. +- No check that `path_prefix` is absolute. +- No check that `container_id` does not contain slashes / contains only valid characters. +- Result of `remove_recursively()` discarded. +- `remove_recursively()` _may_ modify `full_path` without `foo()` knowing! + +## Quiz 1: Answers (part 2) + +- Why is `pid` not a numeric? +- No check to ensure the PID is positive. +- No check to recreate any directories in the original `path_prefix`. +- `write_number_to_file()` could fail so why doesn't it return a value? +- The `config` parameter is unused. + +## Quiz 1: What if... + +Imagine if the caller managed to do this: + +```rust +foo(config, "", "sbin/init", r#"#!/bin/sh\n/sbin/reboot"#); +``` + +## Quiz 2 + +What makes this function difficult to test? + +```rust +fn get_user_id(username: String) -> i32 { + let line = grep_file(username, "/etc/passwd").unwrap(); + let fields = line.split(':'); + + let uid = fields.nth(2).ok_or("failed").unwrap(); + + uid.parse::() +} +``` + +## Quiz 2: Answers (part 1) + +- Unhelpful error message ("failed"). + +- Panics on error! Return a `Result` instead! + +- UID's cannot be negative so function should return an unsigned + value. + +## Quiz 2: Answers (part 2) + +- Hard-coded filename. + + This would be better: + + ```rust + const PASSWD_DB: &str = "/etc/passwd"; + + // Test code can now pass valid and invalid files! + fn get_user_id(filename: String, username: String) -> i32 { + // ... + } + + let id = get_user_id(PASSWD_DB, username); + ``` + +## Quiz 3 + +What's wrong with this test code? + +```rust +let mut obj = Object::new(); + +// Sanity check +assert_eq!(obj.num, 0); +assert_eq!(obj.wibble, false); + +// Test 1 +obj->foo_method(7); +assert_eq!(obj.num, 7); + +// Test 2 +obj->bar_method(true); +assert_eq!(obj.wibble, true); +``` + +## Quiz 3: Answers + +- The test code is "fragile": + - The 2nd test re-uses the object created in the first test. + +## Finally + +- [We need a GH action to run the unit tests](https://github.com/kata-containers/kata-containers/issues/2934) + + Needs to fail PRs that decrease test coverage
by "x%". diff --git a/snap/snapcraft.yaml b/snap/snapcraft.yaml index 4f90329c1..173c0f7b7 100644 --- a/snap/snapcraft.yaml +++ b/snap/snapcraft.yaml @@ -320,17 +320,20 @@ parts: plugin: nil after: [godeps] override-build: | - export GOPATH=${SNAPCRAFT_STAGE}/gopath - yq=${SNAPCRAFT_STAGE}/yq - kata_dir=${GOPATH}/src/github.com/${SNAPCRAFT_PROJECT_NAME}/${SNAPCRAFT_PROJECT_NAME} - versions_file="${kata_dir}/versions.yaml" - version="$(${yq} r ${versions_file} assets.hypervisor.cloud_hypervisor.version)" - url="https://github.com/cloud-hypervisor/cloud-hypervisor/releases/download/${version}" - curl -L ${url}/cloud-hypervisor-static -o cloud-hypervisor - curl -LO ${url}/clh-remote + sudo apt-get -y update + sudo apt-get -y install ca-certificates curl gnupg lsb-release + curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --batch --yes --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg + echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + sudo apt-get -y update + sudo apt-get -y install docker-ce docker-ce-cli containerd.io + sudo systemctl start docker.socket - install -D cloud-hypervisor ${SNAPCRAFT_PART_INSTALL}/usr/bin/cloud-hypervisor - install -D clh-remote ${SNAPCRAFT_PART_INSTALL}/usr/bin/clh-remote + export GOPATH=${SNAPCRAFT_STAGE}/gopath + kata_dir=${GOPATH}/src/github.com/${SNAPCRAFT_PROJECT_NAME}/${SNAPCRAFT_PROJECT_NAME} + cd ${kata_dir} + sudo -E NO_TTY=true make cloud-hypervisor-tarball + tar xvJpf build/kata-static-cloud-hypervisor.tar.xz -C /tmp/ + install -D /tmp/opt/kata/bin/cloud-hypervisor ${SNAPCRAFT_PART_INSTALL}/usr/bin/cloud-hypervisor apps: runtime: diff --git a/src/agent/rustjail/src/cgroups/fs/mod.rs b/src/agent/rustjail/src/cgroups/fs/mod.rs index b4cf73e31..b264e5e9d 100644 --- a/src/agent/rustjail/src/cgroups/fs/mod.rs +++ b/src/agent/rustjail/src/cgroups/fs/mod.rs @@ -905,13 +905,7 @@ pub fn get_paths() -> Result> { let keys: Vec<&str> = fl[1].split(',').collect(); for key in &keys { - // this is a workaround, cgroup file are using `name=systemd`, - // but if file system the name is `systemd` - if *key == "name=systemd" { - m.insert("systemd".to_string(), fl[2].to_string()); - } else { - m.insert(key.to_string(), fl[2].to_string()); - } + m.insert(key.to_string(), fl[2].to_string()); } } Ok(m) diff --git a/src/runtime/Makefile b/src/runtime/Makefile index ff0f1fb38..ea0f4eb35 100644 --- a/src/runtime/Makefile +++ b/src/runtime/Makefile @@ -8,20 +8,19 @@ SKIP_GO_VERSION_CHECK= include golang.mk #Get ARCH. -ifneq (,$(golang_version_raw)) - GOARCH=$(shell go env GOARCH) - ifeq ($(ARCH),) - ARCH = $(GOARCH) - endif -else - ARCH = $(shell uname -m) - ifeq ($(ARCH),x86_64) - ARCH = amd64 - endif - ifeq ($(ARCH),aarch64) - ARCH = arm64 +ifeq ($(ARCH),) + ifneq (,$(golang_version_raw)) + override ARCH = $(shell go env GOARCH) + else + override ARCH = $(shell uname -m) endif endif +ifeq ($(ARCH),x86_64) + override ARCH = amd64 +endif +ifeq ($(ARCH),aarch64) + override ARCH = arm64 +endif ARCH_DIR = arch ARCH_FILE_SUFFIX = -options.mk @@ -29,8 +28,12 @@ ARCH_FILE = $(ARCH_DIR)/$(ARCH)$(ARCH_FILE_SUFFIX) ARCH_FILES = $(wildcard arch/*$(ARCH_FILE_SUFFIX)) ALL_ARCHES = $(patsubst $(ARCH_DIR)/%$(ARCH_FILE_SUFFIX),%,$(ARCH_FILES)) -# Load architecture-dependent settings -include $(ARCH_FILE) +ifeq (,$(realpath $(ARCH_FILE))) + $(error "ERROR: invalid architecture: '$(ARCH)'") +else + # Load architecture-dependent settings + include $(ARCH_FILE) +endif PROJECT_TYPE = kata PROJECT_NAME = Kata Containers @@ -158,6 +161,8 @@ DEFDISABLEGUESTSECCOMP := true #Default experimental features enabled DEFAULTEXPFEATURES := [] +DEFDISABLESELINUX := false + #Default entropy source DEFENTROPYSOURCE := /dev/urandom DEFVALIDENTROPYSOURCES := [\"/dev/urandom\",\"/dev/random\",\"\"] @@ -193,9 +198,6 @@ DEFSTATICRESOURCEMGMT ?= false DEFBINDMOUNTS := [] -# Features -FEATURE_SELINUX ?= check - SED = sed CLI_DIR = cmd @@ -433,6 +435,7 @@ USER_VARS += DEFNETWORKMODEL_CLH USER_VARS += DEFNETWORKMODEL_FC USER_VARS += DEFNETWORKMODEL_QEMU USER_VARS += DEFDISABLEGUESTSECCOMP +USER_VARS += DEFDISABLESELINUX USER_VARS += DEFAULTEXPFEATURES USER_VARS += DEFDISABLEBLOCK USER_VARS += DEFBLOCKSTORAGEDRIVER_ACRN @@ -460,7 +463,6 @@ USER_VARS += DEFSTATICRESOURCEMGMT USER_VARS += DEFSTATICRESOURCEMGMT_FC USER_VARS += DEFBINDMOUNTS USER_VARS += DEFVFIOMODE -USER_VARS += FEATURE_SELINUX USER_VARS += BUILDFLAGS @@ -475,21 +477,6 @@ QUIET_TEST = $(Q:@=@echo ' TEST '$@;) BUILDTAGS := -ifneq ($(FEATURE_SELINUX),no) - SELINUXTAG := $(shell ./hack/selinux_tag.sh) - - ifneq ($(SELINUXTAG),) - override FEATURE_SELINUX = yes - BUILDTAGS += --tags "$(SELINUXTAG)" - else - ifeq ($(FEATURE_SELINUX),yes) - $(error "ERROR: SELinux support requested, but libselinux is not available") - endif - - override FEATURE_SELINUX = no - endif -endif - # go build common flags BUILDFLAGS := -buildmode=pie -mod=vendor ${BUILDTAGS} @@ -568,8 +555,8 @@ $(SHIMV2_OUTPUT): $(SOURCES) $(GENERATED_FILES) $(MAKEFILE_LIST) $(QUIET_BUILD)(cd $(SHIMV2_DIR)/ && go build -ldflags "$(KATA_LDFLAGS)" $(BUILDFLAGS) -o $@ .) $(MONITOR_OUTPUT): $(SOURCES) $(GENERATED_FILES) $(MAKEFILE_LIST) .git-commit - $(QUIET_BUILD)(cd $(MONITOR_DIR)/ && CGO_ENABLED=0 go build \ - --ldflags "-X main.GitCommit=$(shell cat .git-commit)" $(BUILDFLAGS) -buildmode=exe -o $@ .) + $(QUIET_BUILD)(cd $(MONITOR_DIR)/ && go build \ + --ldflags "-X main.GitCommit=$(shell cat .git-commit)" $(BUILDFLAGS) -o $@ .) .PHONY: \ check \ @@ -749,9 +736,6 @@ endif @printf "\tKnown: $(sort $(HYPERVISORS))\n" @printf "\tAvailable for this architecture: $(sort $(KNOWN_HYPERVISORS))\n" @printf "\n" - @printf "• Features:\n" - @printf "\tSELinux (FEATURE_SELINUX): $(FEATURE_SELINUX)\n" - @printf "\n" @printf "• Summary:\n" @printf "\n" @printf "\tdestination install path (DESTDIR) : %s\n" $(abspath $(DESTDIR)) diff --git a/src/runtime/cmd/kata-monitor/main.go b/src/runtime/cmd/kata-monitor/main.go index 4a52671b6..a3fc86cfd 100644 --- a/src/runtime/cmd/kata-monitor/main.go +++ b/src/runtime/cmd/kata-monitor/main.go @@ -18,7 +18,9 @@ import ( "github.com/sirupsen/logrus" ) -var monitorListenAddr = flag.String("listen-address", ":8090", "The address to listen on for HTTP requests.") +const defaultListenAddress = "127.0.0.1:8090" + +var monitorListenAddr = flag.String("listen-address", defaultListenAddress, "The address to listen on for HTTP requests.") var runtimeEndpoint = flag.String("runtime-endpoint", "/run/containerd/containerd.sock", `Endpoint of CRI container runtime service. (default: "/run/containerd/containerd.sock")`) var logLevel = flag.String("log-level", "info", "Log level of logrus(trace/debug/info/warn/error/fatal/panic).") @@ -26,7 +28,7 @@ var logLevel = flag.String("log-level", "info", "Log level of logrus(trace/debug var ( appName = "kata-monitor" // version is the kata monitor version. - version = "0.2.0" + version = "0.3.0" GitCommit = "unknown-commit" ) diff --git a/src/runtime/config/configuration-acrn.toml.in b/src/runtime/config/configuration-acrn.toml.in index 4ef211835..2a9736e9e 100644 --- a/src/runtime/config/configuration-acrn.toml.in +++ b/src/runtime/config/configuration-acrn.toml.in @@ -185,6 +185,9 @@ internetworking_model="@DEFNETWORKMODEL_ACRN@" # (default: true) disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ +# disable applying SELinux on the VMM process (default false) +disable_selinux=@DEFDISABLESELINUX@ + # If enabled, the runtime will create opentracing.io traces and spans. # (See https://www.jaegertracing.io/docs/getting-started). # (default: disabled) diff --git a/src/runtime/config/configuration-clh.toml.in b/src/runtime/config/configuration-clh.toml.in index 07e3f31a4..4afafaf08 100644 --- a/src/runtime/config/configuration-clh.toml.in +++ b/src/runtime/config/configuration-clh.toml.in @@ -15,6 +15,37 @@ path = "@CLHPATH@" kernel = "@KERNELPATH_CLH@" image = "@IMAGEPATH@" +# Enable confidential guest support. +# Toggling that setting may trigger different hardware features, ranging +# from memory encryption to both memory and CPU-state encryption and integrity. +# The Kata Containers runtime dynamically detects the available feature set and +# aims at enabling the largest possible one, returning an error if none is +# available, or none is supported by the hypervisor. +# +# Known limitations: +# * Does not work by design: +# - CPU Hotplug +# - Device Hotplug +# - Memory Hotplug +# - NVDIMM devices +# +# Supported TEEs: +# * Intel TDX +# +# Default false +# confidential_guest = true + +# Path to the firmware. +# If you want Cloud Hypervisor to use a specific firmware, set its path below. +# This is option is only used when confidential_guest is enabled. +# +# For more information about firmwared that can be used with specific TEEs, +# please, refer to: +# * Intel TDX: +# - td-shim: https://github.com/confidential-containers/td-shim +# +# firmware = "@FIRMWAREPATH@" + # List of valid annotation names for the hypervisor # Each member of the list is a regular expression, which is the base name # of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path" @@ -205,6 +236,9 @@ internetworking_model="@DEFNETWORKMODEL_CLH@" # (default: true) disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ +# disable applying SELinux on the VMM process (default false) +disable_selinux=@DEFDISABLESELINUX@ + # If enabled, the runtime will create opentracing.io traces and spans. # (See https://www.jaegertracing.io/docs/getting-started). # (default: disabled) diff --git a/src/runtime/config/configuration-fc.toml.in b/src/runtime/config/configuration-fc.toml.in index ad3e5c672..d9eb093c2 100644 --- a/src/runtime/config/configuration-fc.toml.in +++ b/src/runtime/config/configuration-fc.toml.in @@ -309,6 +309,9 @@ internetworking_model="@DEFNETWORKMODEL_FC@" # (default: true) disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ +# disable applying SELinux on the VMM process (default false) +disable_selinux=@DEFDISABLESELINUX@ + # If enabled, the runtime will create opentracing.io traces and spans. # (See https://www.jaegertracing.io/docs/getting-started). # (default: disabled) diff --git a/src/runtime/config/configuration-qemu.toml.in b/src/runtime/config/configuration-qemu.toml.in index 59753db5d..af5c58364 100644 --- a/src/runtime/config/configuration-qemu.toml.in +++ b/src/runtime/config/configuration-qemu.toml.in @@ -20,7 +20,16 @@ machine_type = "@MACHINETYPE@" # Toggling that setting may trigger different hardware features, ranging # from memory encryption to both memory and CPU-state encryption and integrity. # The Kata Containers runtime dynamically detects the available feature set and -# aims at enabling the largest possible one. +# aims at enabling the largest possible one, returning an error if none is +# available, or none is supported by the hypervisor. +# +# Known limitations: +# * Does not work by design: +# - CPU Hotplug +# - Device Hotplug +# - Memory Hotplug +# - NVDIMM devices +# # Default false # confidential_guest = true @@ -279,6 +288,9 @@ pflashes = [] # If false and nvdimm is supported, use nvdimm device to plug guest image. # Otherwise virtio-block device is used. +# +# nvdimm is not supported when `confidential_guest = true`. +# # Default is false #disable_image_nvdimm = true @@ -492,6 +504,9 @@ internetworking_model="@DEFNETWORKMODEL_QEMU@" # (default: true) disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@ +# disable applying SELinux on the VMM process (default false) +disable_selinux=@DEFDISABLESELINUX@ + # If enabled, the runtime will create opentracing.io traces and spans. # (See https://www.jaegertracing.io/docs/getting-started). # (default: disabled) diff --git a/src/runtime/go.mod b/src/runtime/go.mod index 1176012f5..3d2af6892 100644 --- a/src/runtime/go.mod +++ b/src/runtime/go.mod @@ -3,6 +3,7 @@ module github.com/kata-containers/kata-containers/src/runtime go 1.14 require ( + code.cloudfoundry.org/bytefmt v0.0.0-20211005130812-5bb3c17173e5 github.com/BurntSushi/toml v0.3.1 github.com/blang/semver v3.5.1+incompatible github.com/blang/semver/v4 v4.0.0 diff --git a/src/runtime/go.sum b/src/runtime/go.sum index dcd31a4b0..27b4670a1 100644 --- a/src/runtime/go.sum +++ b/src/runtime/go.sum @@ -38,6 +38,8 @@ cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0Zeo cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk= cloud.google.com/go/storage v1.8.0/go.mod h1:Wv1Oy7z6Yz3DshWRJFhqM/UCfaWIRTdp0RXyy7KQOVs= cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9ullr3+Kg0= +code.cloudfoundry.org/bytefmt v0.0.0-20211005130812-5bb3c17173e5 h1:tM5+dn2C9xZw1RzgI6WTQW1rGqdUimKB3RFbyu4h6Hc= +code.cloudfoundry.org/bytefmt v0.0.0-20211005130812-5bb3c17173e5/go.mod h1:v4VVB6oBMz/c9fRY6vZrwr5xKRWOH5NPDjQZlPk0Gbs= dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= github.com/AdaLogics/go-fuzz-headers v0.0.0-20210715213245-6c3934b029d8/go.mod h1:CzsSbkDixRphAF5hS6wbMKq0eI6ccJRb7/A0M6JBnwg= github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX56iTiv29bbRTIsUNlaFfuhWRQBWjQdVyAevI8= @@ -550,8 +552,9 @@ github.com/onsi/gomega v1.5.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1Cpa github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= github.com/onsi/gomega v1.9.0/go.mod h1:Ho0h+IUsWyvy1OpqCwxlQ/21gkhVunqlU8fDGcoTdcA= github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= -github.com/onsi/gomega v1.15.0 h1:WjP/FQ/sk43MRmnEcT+MlDw2TFvkrXlprrPST/IudjU= github.com/onsi/gomega v1.15.0/go.mod h1:cIuvLEne0aoVhAgh/O6ac0Op8WWw9H6eYCriF+tEHG0= +github.com/onsi/gomega v1.16.0 h1:6gjqkI8iiRHMvdccRJM8rVKjCWk6ZIm6FTm3ddIe4/c= +github.com/onsi/gomega v1.16.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAlGdZY= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/image-spec v1.0.2 h1:9yCKha/T5XdGtO0q9Q9a6T5NUCsTn/DrBg0D7ufOcFM= diff --git a/src/runtime/pkg/containerd-shim-v2/event_forwarder.go b/src/runtime/pkg/containerd-shim-v2/event_forwarder.go new file mode 100644 index 000000000..a77645a1f --- /dev/null +++ b/src/runtime/pkg/containerd-shim-v2/event_forwarder.go @@ -0,0 +1,88 @@ +// Copyright (c) 2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +package containerdshim + +import ( + "context" + "os" + "time" + + "github.com/containerd/containerd/events" +) + +type forwarderType string + +const ( + forwarderTypeLog forwarderType = "log" + forwarderTypeContainerd forwarderType = "containerd" + + // A time span used to wait for publish a containerd event, + // once it costs a longer time than timeOut, it will be canceld. + timeOut = 5 * time.Second + + // ttrpc address passed from container runtime. + // For now containerd will pass the address, and CRI-O will not + ttrpcAddressEnv = "TTRPC_ADDRESS" +) + +type eventsForwarder interface { + forward() + forwarderType() forwarderType +} + +type logForwarder struct { + s *service +} + +func (lf *logForwarder) forward() { + for e := range lf.s.events { + shimLog.WithField("topic", getTopic(e)).Infof("post event: %+v", e) + } +} + +func (lf *logForwarder) forwarderType() forwarderType { + return forwarderTypeLog +} + +type containerdForwarder struct { + s *service + ctx context.Context + publisher events.Publisher +} + +func (cf *containerdForwarder) forward() { + for e := range cf.s.events { + ctx, cancel := context.WithTimeout(cf.ctx, timeOut) + err := cf.publisher.Publish(ctx, getTopic(e), e) + cancel() + if err != nil { + shimLog.WithError(err).Error("post event") + } + } +} + +func (cf *containerdForwarder) forwarderType() forwarderType { + return forwarderTypeContainerd +} + +func (s *service) newEventsForwarder(ctx context.Context, publisher events.Publisher) eventsForwarder { + var forwarder eventsForwarder + ttrpcAddress := os.Getenv(ttrpcAddressEnv) + if ttrpcAddress == "" { + // non containerd will use log forwarder to write events to log + forwarder = &logForwarder{ + s: s, + } + } else { + forwarder = &containerdForwarder{ + s: s, + ctx: ctx, + publisher: publisher, + } + } + + return forwarder +} diff --git a/src/runtime/pkg/containerd-shim-v2/event_forwarder_test.go b/src/runtime/pkg/containerd-shim-v2/event_forwarder_test.go new file mode 100644 index 000000000..979ce5f1e --- /dev/null +++ b/src/runtime/pkg/containerd-shim-v2/event_forwarder_test.go @@ -0,0 +1,45 @@ +// Copyright (c) 2022 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +package containerdshim + +import ( + "context" + "os" + "testing" + + "github.com/containerd/containerd/events" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/vcmock" + + "github.com/stretchr/testify/assert" +) + +func TestNewEventsForwarder(t *testing.T) { + assert := assert.New(t) + + sandbox := &vcmock.Sandbox{ + MockID: testSandboxID, + } + + s := &service{ + id: testSandboxID, + sandbox: sandbox, + containers: make(map[string]*container), + } + + // newEventsForwarder will not call publisher to publish events + // so here we can use a nil pointer to test newEventsForwarder + var publisher events.Publisher + + // check log forwarder + forwarder := s.newEventsForwarder(context.Background(), publisher) + assert.Equal(forwarderTypeLog, forwarder.forwarderType()) + + // check containerd forwarder + os.Setenv(ttrpcAddressEnv, "/foo/bar.sock") + defer os.Setenv(ttrpcAddressEnv, "") + forwarder = s.newEventsForwarder(context.Background(), publisher) + assert.Equal(forwarderTypeContainerd, forwarder.forwarderType()) +} diff --git a/src/runtime/pkg/containerd-shim-v2/service.go b/src/runtime/pkg/containerd-shim-v2/service.go index 0cca3ebf8..8e20ae82f 100644 --- a/src/runtime/pkg/containerd-shim-v2/service.go +++ b/src/runtime/pkg/containerd-shim-v2/service.go @@ -17,7 +17,6 @@ import ( eventstypes "github.com/containerd/containerd/api/events" "github.com/containerd/containerd/api/types/task" "github.com/containerd/containerd/errdefs" - "github.com/containerd/containerd/events" "github.com/containerd/containerd/namespaces" cdruntime "github.com/containerd/containerd/runtime" cdshim "github.com/containerd/containerd/runtime/v2/shim" @@ -51,10 +50,6 @@ const ( chSize = 128 exitCode255 = 255 - - // A time span used to wait for publish a containerd event, - // once it costs a longer time than timeOut, it will be canceld. - timeOut = 5 * time.Second ) var ( @@ -100,7 +95,8 @@ func New(ctx context.Context, id string, publisher cdshim.Publisher, shutdown fu go s.processExits() - go s.forward(ctx, publisher) + forwarder := s.newEventsForwarder(ctx, publisher) + go forwarder.forward() return s, nil } @@ -256,17 +252,6 @@ func (s *service) StartShim(ctx context.Context, opts cdshim.StartOpts) (_ strin return address, nil } -func (s *service) forward(ctx context.Context, publisher events.Publisher) { - for e := range s.events { - ctx, cancel := context.WithTimeout(ctx, timeOut) - err := publisher.Publish(ctx, getTopic(e), e) - cancel() - if err != nil { - shimLog.WithError(err).Error("post event") - } - } -} - func (s *service) send(evt interface{}) { // for unit test, it will not initialize s.events if s.events != nil { diff --git a/src/runtime/pkg/govmm/qemu/qemu_arch_base_test.go b/src/runtime/pkg/govmm/qemu/qemu_arch_base_test.go index 4d343eedc..26b2ac547 100644 --- a/src/runtime/pkg/govmm/qemu/qemu_arch_base_test.go +++ b/src/runtime/pkg/govmm/qemu/qemu_arch_base_test.go @@ -1,3 +1,4 @@ +//go:build !s390x // +build !s390x // Copyright contributors to the Virtual Machine Manager for Go project diff --git a/src/runtime/pkg/govmm/vmm_amd64.go b/src/runtime/pkg/govmm/vmm_amd64.go new file mode 100644 index 000000000..37c7a7c52 --- /dev/null +++ b/src/runtime/pkg/govmm/vmm_amd64.go @@ -0,0 +1,12 @@ +// +// Copyright (c) 2018 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package govmm + +// MaxVCPUs returns the maximum number of vCPUs supported +func MaxVCPUs() uint32 { + return uint32(240) +} diff --git a/src/runtime/pkg/govmm/vmm_arm64.go b/src/runtime/pkg/govmm/vmm_arm64.go new file mode 100644 index 000000000..a01cd683f --- /dev/null +++ b/src/runtime/pkg/govmm/vmm_arm64.go @@ -0,0 +1,25 @@ +// +// Copyright (c) 2018 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package govmm + +//In qemu, maximum number of vCPUs depends on the GIC version, or on how +//many redistributors we can fit into the memory map. +//related codes are under github.com/qemu/qemu/hw/arm/virt.c(Line 135 and 1306 in stable-2.11) +//for now, qemu only supports v2 and v3, we treat v4 as v3 based on +//backward compatibility. +var gicList = map[uint32]uint32{ + uint32(2): uint32(8), + uint32(3): uint32(123), + uint32(4): uint32(123), +} + +var defaultGICVersion = uint32(3) + +// MaxVCPUs returns the maximum number of vCPUs supported +func MaxVCPUs() uint32 { + return gicList[defaultGICVersion] +} diff --git a/src/runtime/pkg/govmm/vmm_ppc64le.go b/src/runtime/pkg/govmm/vmm_ppc64le.go new file mode 100644 index 000000000..600b083b7 --- /dev/null +++ b/src/runtime/pkg/govmm/vmm_ppc64le.go @@ -0,0 +1,12 @@ +// +// Copyright (c) 2018 IBM +// +// SPDX-License-Identifier: Apache-2.0 +// + +package govmm + +// MaxVCPUs returns the maximum number of vCPUs supported +func MaxVCPUs() uint32 { + return uint32(128) +} diff --git a/src/runtime/pkg/govmm/vmm_s390x.go b/src/runtime/pkg/govmm/vmm_s390x.go new file mode 100644 index 000000000..d6ef960a8 --- /dev/null +++ b/src/runtime/pkg/govmm/vmm_s390x.go @@ -0,0 +1,15 @@ +// +// Copyright (c) 2018 IBM +// +// SPDX-License-Identifier: Apache-2.0 +// + +package govmm + +// MaxVCPUs returns the maximum number of vCPUs supported +func MaxVCPUs() uint32 { + // Max number of virtual Cpu defined in qemu. See + // https://github.com/qemu/qemu/blob/80422b00196a7af4c6efb628fae0ad8b644e98af/target/s390x/cpu.h#L55 + // #define S390_MAX_CPUS 248 + return uint32(248) +} diff --git a/src/runtime/pkg/kata-monitor/cri.go b/src/runtime/pkg/kata-monitor/cri.go index 206726521..e500e5bff 100644 --- a/src/runtime/pkg/kata-monitor/cri.go +++ b/src/runtime/pkg/kata-monitor/cri.go @@ -142,7 +142,7 @@ func (km *KataMonitor) syncSandboxes(sandboxList []string) ([]string, error) { for _, pod := range r.Items { for _, sandbox := range sandboxList { if pod.Id == sandbox { - km.sandboxCache.setMetadata(sandbox, sandboxKubeData{ + km.sandboxCache.setCRIMetadata(sandbox, sandboxCRIMetadata{ uid: pod.Metadata.Uid, name: pod.Metadata.Name, namespace: pod.Metadata.Namespace, @@ -151,9 +151,9 @@ func (km *KataMonitor) syncSandboxes(sandboxList []string) ([]string, error) { sandboxList = removeFromSandboxList(sandboxList, sandbox) monitorLog.WithFields(logrus.Fields{ - "Pod Name": pod.Metadata.Name, - "Pod Namespace": pod.Metadata.Namespace, - "Pod UID": pod.Metadata.Uid, + "cri-name": pod.Metadata.Name, + "cri-namespace": pod.Metadata.Namespace, + "cri-uid": pod.Metadata.Uid, }).Debugf("Synced KATA POD %s", pod.Id) break diff --git a/src/runtime/pkg/kata-monitor/metrics.go b/src/runtime/pkg/kata-monitor/metrics.go index 0216969cb..84098c88f 100644 --- a/src/runtime/pkg/kata-monitor/metrics.go +++ b/src/runtime/pkg/kata-monitor/metrics.go @@ -160,12 +160,12 @@ func (km *KataMonitor) aggregateSandboxMetrics(encoder expfmt.Encoder) error { // get metrics from sandbox's shim for _, sandboxID := range sandboxes { - sandboxMetadata, ok := km.sandboxCache.getMetadata(sandboxID) + sandboxMetadata, ok := km.sandboxCache.getCRIMetadata(sandboxID) if !ok { // likely the sandbox has been just removed continue } wg.Add(1) - go func(sandboxID string, sandboxMetadata sandboxKubeData, results chan<- []*dto.MetricFamily) { + go func(sandboxID string, sandboxMetadata sandboxCRIMetadata, results chan<- []*dto.MetricFamily) { sandboxMetrics, err := getParsedMetrics(sandboxID, sandboxMetadata) if err != nil { monitorLog.WithError(err).WithField("sandbox_id", sandboxID).Errorf("failed to get metrics for sandbox") @@ -223,7 +223,7 @@ func (km *KataMonitor) aggregateSandboxMetrics(encoder expfmt.Encoder) error { } -func getParsedMetrics(sandboxID string, sandboxMetadata sandboxKubeData) ([]*dto.MetricFamily, error) { +func getParsedMetrics(sandboxID string, sandboxMetadata sandboxCRIMetadata) ([]*dto.MetricFamily, error) { body, err := doGet(sandboxID, defaultTimeout, "metrics") if err != nil { return nil, err @@ -244,7 +244,7 @@ func GetSandboxMetrics(sandboxID string) (string, error) { // parsePrometheusMetrics will decode metrics from Prometheus text format // and return array of *dto.MetricFamily with an ASC order -func parsePrometheusMetrics(sandboxID string, sandboxMetadata sandboxKubeData, body []byte) ([]*dto.MetricFamily, error) { +func parsePrometheusMetrics(sandboxID string, sandboxMetadata sandboxCRIMetadata, body []byte) ([]*dto.MetricFamily, error) { reader := bytes.NewReader(body) decoder := expfmt.NewDecoder(reader, expfmt.FmtText) @@ -268,15 +268,15 @@ func parsePrometheusMetrics(sandboxID string, sandboxMetadata sandboxKubeData, b Value: mutils.String2Pointer(sandboxID), }, &dto.LabelPair{ - Name: mutils.String2Pointer("kube_uid"), + Name: mutils.String2Pointer("cri_uid"), Value: mutils.String2Pointer(sandboxMetadata.uid), }, &dto.LabelPair{ - Name: mutils.String2Pointer("kube_name"), + Name: mutils.String2Pointer("cri_name"), Value: mutils.String2Pointer(sandboxMetadata.name), }, &dto.LabelPair{ - Name: mutils.String2Pointer("kube_namespace"), + Name: mutils.String2Pointer("cri_namespace"), Value: mutils.String2Pointer(sandboxMetadata.namespace), }, ) diff --git a/src/runtime/pkg/kata-monitor/metrics_test.go b/src/runtime/pkg/kata-monitor/metrics_test.go index 1055a6d36..4402911d9 100644 --- a/src/runtime/pkg/kata-monitor/metrics_test.go +++ b/src/runtime/pkg/kata-monitor/metrics_test.go @@ -40,7 +40,7 @@ ttt 999 func TestParsePrometheusMetrics(t *testing.T) { assert := assert.New(t) sandboxID := "sandboxID-abc" - sandboxMetadata := sandboxKubeData{"123", "pod-name", "pod-namespace"} + sandboxMetadata := sandboxCRIMetadata{"123", "pod-name", "pod-namespace"} // parse metrics list, err := parsePrometheusMetrics(sandboxID, sandboxMetadata, []byte(shimMetricBody)) @@ -60,12 +60,12 @@ func TestParsePrometheusMetrics(t *testing.T) { assert.Equal(4, len(m.Label), "should have 4 labels") assert.Equal("sandbox_id", *m.Label[0].Name, "label name should be sandbox_id") assert.Equal(sandboxID, *m.Label[0].Value, "label value should be", sandboxID) - assert.Equal("kube_uid", *m.Label[1].Name, "label name should be kube_uid") + assert.Equal("cri_uid", *m.Label[1].Name, "label name should be cri_uid") assert.Equal(sandboxMetadata.uid, *m.Label[1].Value, "label value should be", sandboxMetadata.uid) - assert.Equal("kube_name", *m.Label[2].Name, "label name should be kube_name") + assert.Equal("cri_name", *m.Label[2].Name, "label name should be cri_name") assert.Equal(sandboxMetadata.name, *m.Label[2].Value, "label value should be", sandboxMetadata.name) - assert.Equal("kube_namespace", *m.Label[3].Name, "label name should be kube_namespace") + assert.Equal("cri_namespace", *m.Label[3].Name, "label name should be cri_namespace") assert.Equal(sandboxMetadata.namespace, *m.Label[3].Value, "label value should be", sandboxMetadata.namespace) summary := m.Summary diff --git a/src/runtime/pkg/kata-monitor/monitor.go b/src/runtime/pkg/kata-monitor/monitor.go index caacfb222..9004cf103 100644 --- a/src/runtime/pkg/kata-monitor/monitor.go +++ b/src/runtime/pkg/kata-monitor/monitor.go @@ -53,7 +53,7 @@ func NewKataMonitor(runtimeEndpoint string) (*KataMonitor, error) { runtimeEndpoint: runtimeEndpoint, sandboxCache: &sandboxCache{ Mutex: &sync.Mutex{}, - sandboxes: make(map[string]sandboxKubeData), + sandboxes: make(map[string]sandboxCRIMetadata), }, } @@ -104,10 +104,14 @@ func (km *KataMonitor) startPodCacheUpdater() { monitorLog.WithError(err).Fatal("cannot read sandboxes fs") os.Exit(1) } + for _, sandbox := range sandboxList { + km.sandboxCache.putIfNotExists(sandbox, sandboxCRIMetadata{}) + } + monitorLog.Debug("initial sync of sbs directory completed") monitorLog.Tracef("pod list from sbs: %v", sandboxList) - // We should get kubernetes metadata from the container manager for each new kata sandbox we detect. + // We try to get CRI (kubernetes) metadata from the container manager for each new kata sandbox we detect. // It may take a while for data to be available, so we always wait podCacheRefreshDelaySeconds before checking. cacheUpdateTimer := time.NewTimer(podCacheRefreshDelaySeconds * time.Second) cacheUpdateTimerIsSet := true @@ -123,7 +127,7 @@ func (km *KataMonitor) startPodCacheUpdater() { case fsnotify.Create: splitPath := strings.Split(event.Name, string(os.PathSeparator)) id := splitPath[len(splitPath)-1] - if !km.sandboxCache.putIfNotExists(id, sandboxKubeData{}) { + if !km.sandboxCache.putIfNotExists(id, sandboxCRIMetadata{}) { monitorLog.WithField("pod", id).Warn( "CREATE event but pod already present in the sandbox cache") } diff --git a/src/runtime/pkg/kata-monitor/sandbox_cache.go b/src/runtime/pkg/kata-monitor/sandbox_cache.go index ba98a121f..5557aa26e 100644 --- a/src/runtime/pkg/kata-monitor/sandbox_cache.go +++ b/src/runtime/pkg/kata-monitor/sandbox_cache.go @@ -9,15 +9,15 @@ import ( "sync" ) -type sandboxKubeData struct { +type sandboxCRIMetadata struct { uid string name string namespace string } type sandboxCache struct { *sync.Mutex - // the sandboxKubeData links the sandbox id from the container manager to the pod metadata of kubernetes - sandboxes map[string]sandboxKubeData + // the sandboxCRIMetadata links the sandbox id from the container manager to the pod metadata of kubernetes + sandboxes map[string]sandboxCRIMetadata } func (sc *sandboxCache) getSandboxList() []string { @@ -43,7 +43,7 @@ func (sc *sandboxCache) deleteIfExists(id string) bool { return false } -func (sc *sandboxCache) putIfNotExists(id string, value sandboxKubeData) bool { +func (sc *sandboxCache) putIfNotExists(id string, value sandboxCRIMetadata) bool { sc.Lock() defer sc.Unlock() @@ -56,14 +56,14 @@ func (sc *sandboxCache) putIfNotExists(id string, value sandboxKubeData) bool { return false } -func (sc *sandboxCache) setMetadata(id string, value sandboxKubeData) { +func (sc *sandboxCache) setCRIMetadata(id string, value sandboxCRIMetadata) { sc.Lock() defer sc.Unlock() sc.sandboxes[id] = value } -func (sc *sandboxCache) getMetadata(id string) (sandboxKubeData, bool) { +func (sc *sandboxCache) getCRIMetadata(id string) (sandboxCRIMetadata, bool) { sc.Lock() defer sc.Unlock() diff --git a/src/runtime/pkg/kata-monitor/sandbox_cache_test.go b/src/runtime/pkg/kata-monitor/sandbox_cache_test.go index 4eedf778a..d63925573 100644 --- a/src/runtime/pkg/kata-monitor/sandbox_cache_test.go +++ b/src/runtime/pkg/kata-monitor/sandbox_cache_test.go @@ -16,19 +16,19 @@ func TestSandboxCache(t *testing.T) { assert := assert.New(t) sc := &sandboxCache{ Mutex: &sync.Mutex{}, - sandboxes: map[string]sandboxKubeData{"111": {"1-2-3", "test-name", "test-namespace"}}, + sandboxes: map[string]sandboxCRIMetadata{"111": {"1-2-3", "test-name", "test-namespace"}}, } assert.Equal(1, len(sc.getSandboxList())) // put new item id := "new-id" - b := sc.putIfNotExists(id, sandboxKubeData{}) + b := sc.putIfNotExists(id, sandboxCRIMetadata{}) assert.Equal(true, b) assert.Equal(2, len(sc.getSandboxList())) // put key that alreay exists - b = sc.putIfNotExists(id, sandboxKubeData{}) + b = sc.putIfNotExists(id, sandboxCRIMetadata{}) assert.Equal(false, b) b = sc.deleteIfExists(id) diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index 5874c0344..f97b64459 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -15,6 +15,7 @@ import ( "strings" "github.com/BurntSushi/toml" + "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm" govmmQemu "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm/qemu" "github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace" "github.com/kata-containers/kata-containers/src/runtime/pkg/oci" @@ -135,6 +136,7 @@ type hypervisor struct { GuestSwap bool `toml:"enable_guest_swap"` Rootless bool `toml:"rootless"` DisableSeccomp bool `toml:"disable_seccomp"` + DisableSeLinux bool `toml:"disable_selinux"` } type runtime struct { @@ -343,7 +345,7 @@ func (h hypervisor) defaultVCPUs() uint32 { func (h hypervisor) defaultMaxVCPUs() uint32 { numcpus := uint32(goruntime.NumCPU()) - maxvcpus := vc.MaxQemuVCPUs() + maxvcpus := govmm.MaxVCPUs() reqVCPUs := h.DefaultMaxVCPUs //don't exceed the number of physical CPUs. If a default is not provided, use the @@ -876,6 +878,8 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { SGXEPCSize: defaultSGXEPCSize, EnableAnnotations: h.EnableAnnotations, DisableSeccomp: h.DisableSeccomp, + ConfidentialGuest: h.ConfidentialGuest, + DisableSeLinux: h.DisableSeLinux, }, nil } diff --git a/src/runtime/pkg/katautils/config_test.go b/src/runtime/pkg/katautils/config_test.go index 268e50276..c6882f074 100644 --- a/src/runtime/pkg/katautils/config_test.go +++ b/src/runtime/pkg/katautils/config_test.go @@ -17,6 +17,7 @@ import ( "syscall" "testing" + "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm" ktu "github.com/kata-containers/kata-containers/src/runtime/pkg/katatestutils" "github.com/kata-containers/kata-containers/src/runtime/pkg/oci" vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers" @@ -919,7 +920,7 @@ func TestHypervisorDefaults(t *testing.T) { h.DefaultMaxVCPUs = numCPUs + 1 assert.Equal(h.defaultMaxVCPUs(), numCPUs, "default max vCPU number is wrong") - maxvcpus := vc.MaxQemuVCPUs() + maxvcpus := govmm.MaxVCPUs() h.DefaultMaxVCPUs = maxvcpus + 1 assert.Equal(h.defaultMaxVCPUs(), numCPUs, "default max vCPU number is wrong") diff --git a/src/runtime/pkg/katautils/create.go b/src/runtime/pkg/katautils/create.go index bf38acb05..d429558f4 100644 --- a/src/runtime/pkg/katautils/create.go +++ b/src/runtime/pkg/katautils/create.go @@ -155,11 +155,19 @@ func CreateSandbox(ctx context.Context, vci vc.VC, ociSpec specs.Spec, runtimeCo } }() - // Run pre-start OCI hooks. - err = EnterNetNS(sandboxConfig.NetworkConfig.NetworkID, func() error { - return PreStartHooks(ctx, ociSpec, containerID, bundlePath) - }) - if err != nil { + if ociSpec.Annotations == nil { + ociSpec.Annotations = make(map[string]string) + } + ociSpec.Annotations["nerdctl/network-namespace"] = sandboxConfig.NetworkConfig.NetworkID + sandboxConfig.Annotations["nerdctl/network-namespace"] = ociSpec.Annotations["nerdctl/network-namespace"] + + // Run pre-start OCI hooks, in the runtime namespace. + if err := PreStartHooks(ctx, ociSpec, containerID, bundlePath); err != nil { + return nil, vc.Process{}, err + } + + // Run create runtime OCI hooks, in the runtime namespace. + if err := CreateRuntimeHooks(ctx, ociSpec, containerID, bundlePath); err != nil { return nil, vc.Process{}, err } diff --git a/src/runtime/pkg/katautils/create_test.go b/src/runtime/pkg/katautils/create_test.go index 804b4318e..dab665dce 100644 --- a/src/runtime/pkg/katautils/create_test.go +++ b/src/runtime/pkg/katautils/create_test.go @@ -264,6 +264,46 @@ func TestCreateSandboxFail(t *testing.T) { assert.True(vcmock.IsMockError(err)) } +func TestCreateSandboxAnnotations(t *testing.T) { + if tc.NotValid(ktu.NeedRoot()) { + t.Skip(ktu.TestDisabledNeedRoot) + } + + assert := assert.New(t) + + tmpdir, bundlePath, _ := ktu.SetupOCIConfigFile(t) + defer os.RemoveAll(tmpdir) + + runtimeConfig, err := newTestRuntimeConfig(tmpdir, testConsole, true) + assert.NoError(err) + + spec, err := compatoci.ParseConfigJSON(bundlePath) + assert.NoError(err) + + rootFs := vc.RootFs{Mounted: true} + + testingImpl.CreateSandboxFunc = func(ctx context.Context, sandboxConfig vc.SandboxConfig) (vc.VCSandbox, error) { + return &vcmock.Sandbox{ + MockID: testSandboxID, + MockContainers: []*vcmock.Container{ + {MockID: testContainerID}, + }, + MockAnnotations: sandboxConfig.Annotations, + }, nil + } + + defer func() { + testingImpl.CreateSandboxFunc = nil + }() + + sandbox, _, err := CreateSandbox(context.Background(), testingImpl, spec, runtimeConfig, rootFs, testContainerID, bundlePath, testConsole, true, true) + assert.NoError(err) + + netNsPath, err := sandbox.Annotations("nerdctl/network-namespace") + assert.NoError(err) + assert.Equal(path.Dir(netNsPath), "/var/run/netns") +} + func TestCheckForFips(t *testing.T) { assert := assert.New(t) diff --git a/src/runtime/pkg/katautils/hook.go b/src/runtime/pkg/katautils/hook.go index 62eb81169..50ac95cb8 100644 --- a/src/runtime/pkg/katautils/hook.go +++ b/src/runtime/pkg/katautils/hook.go @@ -16,6 +16,7 @@ import ( "time" "github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace" + syscallWrapper "github.com/kata-containers/kata-containers/src/runtime/pkg/syscall" "github.com/opencontainers/runtime-spec/specs-go" "github.com/sirupsen/logrus" ) @@ -32,15 +33,16 @@ func hookLogger() *logrus.Entry { return kataUtilsLogger.WithField("subsystem", "hook") } -func runHook(ctx context.Context, hook specs.Hook, cid, bundlePath string) error { +func runHook(ctx context.Context, spec specs.Spec, hook specs.Hook, cid, bundlePath string) error { span, _ := katatrace.Trace(ctx, hookLogger(), "runHook", hookTracingTags) defer span.End() katatrace.AddTags(span, "path", hook.Path, "args", hook.Args) state := specs.State{ - Pid: syscall.Gettid(), - Bundle: bundlePath, - ID: cid, + Pid: syscallWrapper.Gettid(), + Bundle: bundlePath, + ID: cid, + Annotations: spec.Annotations, } stateJSON, err := json.Marshal(state) @@ -90,13 +92,13 @@ func runHook(ctx context.Context, hook specs.Hook, cid, bundlePath string) error return nil } -func runHooks(ctx context.Context, hooks []specs.Hook, cid, bundlePath, hookType string) error { +func runHooks(ctx context.Context, spec specs.Spec, hooks []specs.Hook, cid, bundlePath, hookType string) error { span, ctx := katatrace.Trace(ctx, hookLogger(), "runHooks", hookTracingTags) katatrace.AddTags(span, "type", hookType) defer span.End() for _, hook := range hooks { - if err := runHook(ctx, hook, cid, bundlePath); err != nil { + if err := runHook(ctx, spec, hook, cid, bundlePath); err != nil { hookLogger().WithFields(logrus.Fields{ "hook-type": hookType, "error": err, @@ -109,6 +111,15 @@ func runHooks(ctx context.Context, hooks []specs.Hook, cid, bundlePath, hookType return nil } +func CreateRuntimeHooks(ctx context.Context, spec specs.Spec, cid, bundlePath string) error { + // If no hook available, nothing needs to be done. + if spec.Hooks == nil { + return nil + } + + return runHooks(ctx, spec, spec.Hooks.CreateRuntime, cid, bundlePath, "createRuntime") +} + // PreStartHooks run the hooks before start container func PreStartHooks(ctx context.Context, spec specs.Spec, cid, bundlePath string) error { // If no hook available, nothing needs to be done. @@ -116,7 +127,7 @@ func PreStartHooks(ctx context.Context, spec specs.Spec, cid, bundlePath string) return nil } - return runHooks(ctx, spec.Hooks.Prestart, cid, bundlePath, "pre-start") + return runHooks(ctx, spec, spec.Hooks.Prestart, cid, bundlePath, "pre-start") } // PostStartHooks run the hooks just after start container @@ -126,7 +137,7 @@ func PostStartHooks(ctx context.Context, spec specs.Spec, cid, bundlePath string return nil } - return runHooks(ctx, spec.Hooks.Poststart, cid, bundlePath, "post-start") + return runHooks(ctx, spec, spec.Hooks.Poststart, cid, bundlePath, "post-start") } // PostStopHooks run the hooks after stop container @@ -136,5 +147,5 @@ func PostStopHooks(ctx context.Context, spec specs.Spec, cid, bundlePath string) return nil } - return runHooks(ctx, spec.Hooks.Poststop, cid, bundlePath, "post-stop") + return runHooks(ctx, spec, spec.Hooks.Poststop, cid, bundlePath, "post-stop") } diff --git a/src/runtime/pkg/katautils/hook_test.go b/src/runtime/pkg/katautils/hook_test.go index 54acfecc0..6109b5549 100644 --- a/src/runtime/pkg/katautils/hook_test.go +++ b/src/runtime/pkg/katautils/hook_test.go @@ -57,26 +57,27 @@ func TestRunHook(t *testing.T) { assert := assert.New(t) ctx := context.Background() + spec := specs.Spec{} // Run with timeout 0 hook := createHook(0) - err := runHook(ctx, hook, testSandboxID, testBundlePath) + err := runHook(ctx, spec, hook, testSandboxID, testBundlePath) assert.NoError(err) // Run with timeout 1 hook = createHook(1) - err = runHook(ctx, hook, testSandboxID, testBundlePath) + err = runHook(ctx, spec, hook, testSandboxID, testBundlePath) assert.NoError(err) // Run timeout failure hook = createHook(1) hook.Args = append(hook.Args, "2") - err = runHook(ctx, hook, testSandboxID, testBundlePath) + err = runHook(ctx, spec, hook, testSandboxID, testBundlePath) assert.Error(err) // Failure due to wrong hook hook = createWrongHook() - err = runHook(ctx, hook, testSandboxID, testBundlePath) + err = runHook(ctx, spec, hook, testSandboxID, testBundlePath) assert.Error(err) } diff --git a/src/runtime/pkg/katautils/network_darwin.go b/src/runtime/pkg/katautils/network_darwin.go new file mode 100644 index 000000000..d913557cb --- /dev/null +++ b/src/runtime/pkg/katautils/network_darwin.go @@ -0,0 +1,22 @@ +// Copyright (c) 2022 Apple Inc. +// +// SPDX-License-Identifier: Apache-2.0 +// + +package katautils + +import ( + vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers" +) + +func EnterNetNS(networkID string, cb func() error) error { + return nil +} + +func SetupNetworkNamespace(config *vc.NetworkConfig) error { + return nil +} + +func cleanupNetNS(netNSPath string) error { + return nil +} diff --git a/src/runtime/pkg/katautils/network.go b/src/runtime/pkg/katautils/network_linux.go similarity index 100% rename from src/runtime/pkg/katautils/network.go rename to src/runtime/pkg/katautils/network_linux.go diff --git a/src/runtime/pkg/oci/utils.go b/src/runtime/pkg/oci/utils.go index 995339e0e..d136ed5da 100644 --- a/src/runtime/pkg/oci/utils.go +++ b/src/runtime/pkg/oci/utils.go @@ -23,6 +23,7 @@ import ( "github.com/sirupsen/logrus" "k8s.io/apimachinery/pkg/api/resource" + "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm" vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/config" @@ -640,8 +641,8 @@ func addHypervisorCPUOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) e return fmt.Errorf("Number of cpus %d in annotation default_maxvcpus is greater than the number of CPUs %d on the system", max, numCPUs) } - if sbConfig.HypervisorType == vc.QemuHypervisor && max > vc.MaxQemuVCPUs() { - return fmt.Errorf("Number of cpus %d in annotation default_maxvcpus is greater than max no of CPUs %d supported for qemu", max, vc.MaxQemuVCPUs()) + if sbConfig.HypervisorType == vc.QemuHypervisor && max > govmm.MaxVCPUs() { + return fmt.Errorf("Number of cpus %d in annotation default_maxvcpus is greater than max no of CPUs %d supported for qemu", max, govmm.MaxVCPUs()) } sbConfig.HypervisorConfig.DefaultMaxVCPUs = max return nil diff --git a/src/runtime/virtcontainers/pkg/cgroups/cgroups.go b/src/runtime/pkg/resourcecontrol/cgroups.go similarity index 73% rename from src/runtime/virtcontainers/pkg/cgroups/cgroups.go rename to src/runtime/pkg/resourcecontrol/cgroups.go index ea912db3d..4210392d2 100644 --- a/src/runtime/virtcontainers/pkg/cgroups/cgroups.go +++ b/src/runtime/pkg/resourcecontrol/cgroups.go @@ -1,11 +1,15 @@ -// Copyright (c) 2021 Apple Inc. +//go:build linux +// +build linux + +// Copyright (c) 2021-2022 Apple Inc. // // SPDX-License-Identifier: Apache-2.0 // -package cgroups +package resourcecontrol import ( + "fmt" "os" "path/filepath" "sync" @@ -16,7 +20,24 @@ import ( "github.com/sirupsen/logrus" ) -type Cgroup struct { +// prepend a kata specific string to oci cgroup path to +// form a different cgroup path, thus cAdvisor couldn't +// find kata containers cgroup path on host to prevent it +// from grabbing the stats data. +const CgroupKataPrefix = "kata" + +func RenameCgroupPath(path string) (string, error) { + if path == "" { + path = DefaultResourceControllerID + } + + cgroupPathDir := filepath.Dir(path) + cgroupPathName := fmt.Sprintf("%s_%s", CgroupKataPrefix, filepath.Base(path)) + return filepath.Join(cgroupPathDir, cgroupPathName), nil + +} + +type LinuxCgroup struct { cgroup cgroups.Cgroup path string cpusets *specs.LinuxCPU @@ -25,17 +46,6 @@ type Cgroup struct { sync.Mutex } -var ( - cgroupsLogger = logrus.WithField("source", "virtcontainers/pkg/cgroups") -) - -// SetLogger sets up a logger for this pkg -func SetLogger(logger *logrus.Entry) { - fields := cgroupsLogger.Data - - cgroupsLogger = logger.WithFields(fields) -} - func sandboxDevices() []specs.LinuxDeviceCgroup { devices := []specs.LinuxDeviceCgroup{} @@ -65,7 +75,7 @@ func sandboxDevices() []specs.LinuxDeviceCgroup { for _, device := range defaultDevices { ldevice, err := DeviceToLinuxDevice(device) if err != nil { - cgroupsLogger.WithField("source", "cgroups").Warnf("Could not add %s to the devices cgroup", device) + controllerLogger.WithField("source", "cgroups").Warnf("Could not add %s to the devices cgroup", device) continue } devices = append(devices, ldevice) @@ -116,7 +126,7 @@ func sandboxDevices() []specs.LinuxDeviceCgroup { return devices } -func NewCgroup(path string, resources *specs.LinuxResources) (*Cgroup, error) { +func NewResourceController(path string, resources *specs.LinuxResources) (ResourceController, error) { var err error cgroupPath, err := ValidCgroupPath(path, IsSystemdCgroup(path)) @@ -129,7 +139,7 @@ func NewCgroup(path string, resources *specs.LinuxResources) (*Cgroup, error) { return nil, err } - return &Cgroup{ + return &LinuxCgroup{ path: cgroupPath, devices: resources.Devices, cpusets: resources.CPU, @@ -137,7 +147,7 @@ func NewCgroup(path string, resources *specs.LinuxResources) (*Cgroup, error) { }, nil } -func NewSandboxCgroup(path string, resources *specs.LinuxResources, sandboxCgroupOnly bool) (*Cgroup, error) { +func NewSandboxResourceController(path string, resources *specs.LinuxResources, sandboxCgroupOnly bool) (ResourceController, error) { var cgroup cgroups.Cgroup sandboxResources := *resources sandboxResources.Devices = append(sandboxResources.Devices, sandboxDevices()...) @@ -145,7 +155,7 @@ func NewSandboxCgroup(path string, resources *specs.LinuxResources, sandboxCgrou // Currently we know to handle systemd cgroup path only when it's the only cgroup (no overhead group), hence, // if sandboxCgroupOnly is not true we treat it as cgroupfs path as it used to be, although it may be incorrect if !IsSystemdCgroup(path) || !sandboxCgroupOnly { - return NewCgroup(path, &sandboxResources) + return NewResourceController(path, &sandboxResources) } slice, unit, err := getSliceAndUnit(path) @@ -174,7 +184,7 @@ func NewSandboxCgroup(path string, resources *specs.LinuxResources, sandboxCgrou return nil, err } - return &Cgroup{ + return &LinuxCgroup{ path: path, devices: sandboxResources.Devices, cpusets: sandboxResources.CPU, @@ -182,7 +192,7 @@ func NewSandboxCgroup(path string, resources *specs.LinuxResources, sandboxCgrou }, nil } -func Load(path string) (*Cgroup, error) { +func LoadResourceController(path string) (ResourceController, error) { cgHierarchy, cgPath, err := cgroupHierarchy(path) if err != nil { return nil, err @@ -193,37 +203,37 @@ func Load(path string) (*Cgroup, error) { return nil, err } - return &Cgroup{ + return &LinuxCgroup{ path: path, cgroup: cgroup, }, nil } -func (c *Cgroup) Logger() *logrus.Entry { - return cgroupsLogger.WithField("source", "cgroups") +func (c *LinuxCgroup) Logger() *logrus.Entry { + return controllerLogger.WithField("source", "cgroups") } -func (c *Cgroup) Delete() error { +func (c *LinuxCgroup) Delete() error { return c.cgroup.Delete() } -func (c *Cgroup) Stat() (*v1.Metrics, error) { +func (c *LinuxCgroup) Stat() (*v1.Metrics, error) { return c.cgroup.Stat(cgroups.ErrorHandler(cgroups.IgnoreNotExist)) } -func (c *Cgroup) AddProcess(pid int, subsystems ...string) error { +func (c *LinuxCgroup) AddProcess(pid int, subsystems ...string) error { return c.cgroup.Add(cgroups.Process{Pid: pid}) } -func (c *Cgroup) AddTask(pid int, subsystems ...string) error { +func (c *LinuxCgroup) AddThread(pid int, subsystems ...string) error { return c.cgroup.AddTask(cgroups.Process{Pid: pid}) } -func (c *Cgroup) Update(resources *specs.LinuxResources) error { +func (c *LinuxCgroup) Update(resources *specs.LinuxResources) error { return c.cgroup.Update(resources) } -func (c *Cgroup) MoveTo(path string) error { +func (c *LinuxCgroup) MoveTo(path string) error { cgHierarchy, cgPath, err := cgroupHierarchy(path) if err != nil { return err @@ -237,13 +247,7 @@ func (c *Cgroup) MoveTo(path string) error { return c.cgroup.MoveTo(newCgroup) } -func (c *Cgroup) MoveToParent() error { - parentPath := filepath.Dir(c.path) - - return c.MoveTo(parentPath) -} - -func (c *Cgroup) AddDevice(deviceHostPath string) error { +func (c *LinuxCgroup) AddDevice(deviceHostPath string) error { deviceResource, err := DeviceToLinuxDevice(deviceHostPath) if err != nil { return err @@ -263,7 +267,7 @@ func (c *Cgroup) AddDevice(deviceHostPath string) error { return nil } -func (c *Cgroup) RemoveDevice(deviceHostPath string) error { +func (c *LinuxCgroup) RemoveDevice(deviceHostPath string) error { deviceResource, err := DeviceToLinuxDevice(deviceHostPath) if err != nil { return err @@ -289,7 +293,7 @@ func (c *Cgroup) RemoveDevice(deviceHostPath string) error { return nil } -func (c *Cgroup) UpdateCpuSet(cpuset, memset string) error { +func (c *LinuxCgroup) UpdateCpuSet(cpuset, memset string) error { c.Lock() defer c.Unlock() @@ -316,6 +320,14 @@ func (c *Cgroup) UpdateCpuSet(cpuset, memset string) error { }) } -func (c *Cgroup) Path() string { +func (c *LinuxCgroup) Type() ResourceControllerType { + return LinuxCgroups +} + +func (c *LinuxCgroup) ID() string { return c.path } + +func (c *LinuxCgroup) Parent() string { + return filepath.Dir(c.path) +} diff --git a/src/runtime/pkg/resourcecontrol/controller.go b/src/runtime/pkg/resourcecontrol/controller.go new file mode 100644 index 000000000..1ffba476b --- /dev/null +++ b/src/runtime/pkg/resourcecontrol/controller.go @@ -0,0 +1,82 @@ +// Copyright (c) 2021 Apple Inc. +// +// SPDX-License-Identifier: Apache-2.0 +// + +package resourcecontrol + +import ( + v1 "github.com/containerd/cgroups/stats/v1" + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/sirupsen/logrus" +) + +var ( + controllerLogger = logrus.WithField("source", "virtcontainers/pkg/resourcecontrol") +) + +// SetLogger sets up a logger for this pkg +func SetLogger(logger *logrus.Entry) { + fields := controllerLogger.Data + + controllerLogger = logger.WithFields(fields) +} + +// HypervisorType describes an hypervisor type. +type ResourceControllerType string + +const ( + LinuxCgroups ResourceControllerType = "cgroups" +) + +// String converts an hypervisor type to a string. +func (rType *ResourceControllerType) String() string { + switch *rType { + case LinuxCgroups: + return string(LinuxCgroups) + default: + return "Unknown controller type" + } +} + +// ResourceController represents a system resources controller. +// On Linux this interface is implemented through the cgroups API. +type ResourceController interface { + // Type returns the resource controller implementation type. + Type() ResourceControllerType + + // The controller identifier, e.g. a Linux cgroups path. + ID() string + + // Parent returns the parent controller, on hierarchically + // defined resource (e.g. Linux cgroups). + Parent() string + + // Delete the controller. + Delete() error + + // Stat returns the statistics for the controller. + Stat() (*v1.Metrics, error) + + // AddProcess adds a process to a set of controllers. + AddProcess(int, ...string) error + + // AddThread adds a process thread to a set of controllers. + AddThread(int, ...string) error + + // Update updates the set of resources controlled, based on + // an OCI resources description. + Update(*specs.LinuxResources) error + + // MoveTo moves a controller to another one. + MoveTo(string) error + + // AddDevice adds a device resource to the controller. + AddDevice(string) error + + // RemoveDevice removes a device resource to the controller. + RemoveDevice(string) error + + // UpdateCpuSet updates the set of controlled CPUs and memory nodes. + UpdateCpuSet(string, string) error +} diff --git a/src/runtime/pkg/resourcecontrol/utils.go b/src/runtime/pkg/resourcecontrol/utils.go new file mode 100644 index 000000000..835cb54c4 --- /dev/null +++ b/src/runtime/pkg/resourcecontrol/utils.go @@ -0,0 +1,77 @@ +// Copyright (c) 2020 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package resourcecontrol + +import ( + "fmt" + "strings" + + "github.com/opencontainers/runc/libcontainer/devices" + "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" +) + +func DeviceToCgroupDeviceRule(device string) (*devices.Rule, error) { + var st unix.Stat_t + deviceRule := devices.Rule{ + Allow: true, + Permissions: "rwm", + } + + if err := unix.Stat(device, &st); err != nil { + return nil, err + } + + devType := st.Mode & unix.S_IFMT + + switch devType { + case unix.S_IFCHR: + deviceRule.Type = 'c' + case unix.S_IFBLK: + deviceRule.Type = 'b' + default: + return nil, fmt.Errorf("unsupported device type: %v", devType) + } + + major := int64(unix.Major(uint64(st.Rdev))) + minor := int64(unix.Minor(uint64(st.Rdev))) + deviceRule.Major = major + deviceRule.Minor = minor + + return &deviceRule, nil +} + +func DeviceToLinuxDevice(device string) (specs.LinuxDeviceCgroup, error) { + dev, err := DeviceToCgroupDeviceRule(device) + if err != nil { + return specs.LinuxDeviceCgroup{}, err + } + + return specs.LinuxDeviceCgroup{ + Allow: dev.Allow, + Type: string(dev.Type), + Major: &dev.Major, + Minor: &dev.Minor, + Access: string(dev.Permissions), + }, nil +} + +func IsSystemdCgroup(cgroupPath string) bool { + + // If we are utilizing systemd to manage cgroups, we expect to receive a path + // in the format slice:scopeprefix:name. A typical example would be: + // + // system.slice:docker:6b4c4a4d0cc2a12c529dcb13a2b8e438dfb3b2a6af34d548d7d + // + // Based on this, let's split by the ':' delimiter and verify that the first + // section has .slice as a suffix. + parts := strings.Split(cgroupPath, ":") + if len(parts) == 3 && strings.HasSuffix(parts[0], ".slice") { + return true + } + + return false +} diff --git a/src/runtime/virtcontainers/pkg/cgroups/utils.go b/src/runtime/pkg/resourcecontrol/utils_linux.go similarity index 56% rename from src/runtime/virtcontainers/pkg/cgroups/utils.go rename to src/runtime/pkg/resourcecontrol/utils_linux.go index fd70b880e..73d77dc23 100644 --- a/src/runtime/virtcontainers/pkg/cgroups/utils.go +++ b/src/runtime/pkg/resourcecontrol/utils_linux.go @@ -3,7 +3,7 @@ // SPDX-License-Identifier: Apache-2.0 // -package cgroups +package resourcecontrol import ( "context" @@ -15,30 +15,10 @@ import ( systemdDbus "github.com/coreos/go-systemd/v22/dbus" "github.com/godbus/dbus/v5" "github.com/opencontainers/runc/libcontainer/cgroups/systemd" - "github.com/opencontainers/runc/libcontainer/devices" - "github.com/opencontainers/runtime-spec/specs-go" - "golang.org/x/sys/unix" ) -// prepend a kata specific string to oci cgroup path to -// form a different cgroup path, thus cAdvisor couldn't -// find kata containers cgroup path on host to prevent it -// from grabbing the stats data. -const CgroupKataPrefix = "kata" - -// DefaultCgroupPath runtime-determined location in the cgroups hierarchy. -const DefaultCgroupPath = "/vc" - -func RenameCgroupPath(path string) (string, error) { - if path == "" { - path = DefaultCgroupPath - } - - cgroupPathDir := filepath.Dir(path) - cgroupPathName := fmt.Sprintf("%s_%s", CgroupKataPrefix, filepath.Base(path)) - return filepath.Join(cgroupPathDir, cgroupPathName), nil - -} +// DefaultResourceControllerID runtime-determined location in the cgroups hierarchy. +const DefaultResourceControllerID = "/vc" // validCgroupPath returns a valid cgroup path. // see https://github.com/opencontainers/runtime-spec/blob/master/config-linux.md#cgroups-path @@ -59,25 +39,8 @@ func ValidCgroupPath(path string, systemdCgroup bool) (string, error) { // In the case of a relative path (not starting with /), the runtime MAY // interpret the path relative to a runtime-determined location in the cgroups hierarchy. - // clean up path and return a new path relative to DefaultCgroupPath - return filepath.Join(DefaultCgroupPath, filepath.Clean("/"+path)), nil -} - -func IsSystemdCgroup(cgroupPath string) bool { - - // If we are utilizing systemd to manage cgroups, we expect to receive a path - // in the format slice:scopeprefix:name. A typical example would be: - // - // system.slice:docker:6b4c4a4d0cc2a12c529dcb13a2b8e438dfb3b2a6af34d548d7d - // - // Based on this, let's split by the ':' delimiter and verify that the first - // section has .slice as a suffix. - parts := strings.Split(cgroupPath, ":") - if len(parts) == 3 && strings.HasSuffix(parts[0], ".slice") { - return true - } - - return false + // clean up path and return a new path relative to DefaultResourceControllerID + return filepath.Join(DefaultResourceControllerID, filepath.Clean("/"+path)), nil } func newProperty(name string, units interface{}) systemdDbus.Property { @@ -149,48 +112,3 @@ func getSliceAndUnit(cgroupPath string) (string, string, error) { return "", "", fmt.Errorf("Path: %s is not valid systemd's cgroups path", cgroupPath) } - -func DeviceToCgroupDeviceRule(device string) (*devices.Rule, error) { - var st unix.Stat_t - deviceRule := devices.Rule{ - Allow: true, - Permissions: "rwm", - } - - if err := unix.Stat(device, &st); err != nil { - return nil, err - } - - devType := st.Mode & unix.S_IFMT - - switch devType { - case unix.S_IFCHR: - deviceRule.Type = 'c' - case unix.S_IFBLK: - deviceRule.Type = 'b' - default: - return nil, fmt.Errorf("unsupported device type: %v", devType) - } - - major := int64(unix.Major(st.Rdev)) - minor := int64(unix.Minor(st.Rdev)) - deviceRule.Major = major - deviceRule.Minor = minor - - return &deviceRule, nil -} - -func DeviceToLinuxDevice(device string) (specs.LinuxDeviceCgroup, error) { - dev, err := DeviceToCgroupDeviceRule(device) - if err != nil { - return specs.LinuxDeviceCgroup{}, err - } - - return specs.LinuxDeviceCgroup{ - Allow: dev.Allow, - Type: string(dev.Type), - Major: &dev.Major, - Minor: &dev.Minor, - Access: string(dev.Permissions), - }, nil -} diff --git a/src/runtime/virtcontainers/pkg/cgroups/utils_test.go b/src/runtime/pkg/resourcecontrol/utils_linux_test.go similarity index 95% rename from src/runtime/virtcontainers/pkg/cgroups/utils_test.go rename to src/runtime/pkg/resourcecontrol/utils_linux_test.go index 381af8165..2e40c4d83 100644 --- a/src/runtime/virtcontainers/pkg/cgroups/utils_test.go +++ b/src/runtime/pkg/resourcecontrol/utils_linux_test.go @@ -3,7 +3,7 @@ // SPDX-License-Identifier: Apache-2.0 // -package cgroups +package resourcecontrol import ( "os" @@ -99,8 +99,8 @@ func TestValidCgroupPath(t *testing.T) { assert.Equal(t.path, path) } else { assert.True( - strings.HasPrefix(path, DefaultCgroupPath), - "%v should have prefix /%v", path, DefaultCgroupPath) + strings.HasPrefix(path, DefaultResourceControllerID), + "%v should have prefix /%v", path, DefaultResourceControllerID) } } diff --git a/src/runtime/pkg/syscall/syscall_darwin.go b/src/runtime/pkg/syscall/syscall_darwin.go new file mode 100644 index 000000000..a5cc79d18 --- /dev/null +++ b/src/runtime/pkg/syscall/syscall_darwin.go @@ -0,0 +1,16 @@ +// Copyright (c) 2022 Apple Inc. +// +// SPDX-License-Identifier: Apache-2.0 +// + +package syscall + +import ( + "syscall" +) + +func Gettid() int { + // There is no equivalent to a thread ID on Darwin. + // We use the PID instead. + return syscall.Getpid() +} diff --git a/src/runtime/pkg/syscall/syscall_linux.go b/src/runtime/pkg/syscall/syscall_linux.go new file mode 100644 index 000000000..b05fc156d --- /dev/null +++ b/src/runtime/pkg/syscall/syscall_linux.go @@ -0,0 +1,14 @@ +// Copyright (c) 2022 Apple Inc. +// +// SPDX-License-Identifier: Apache-2.0 +// + +package syscall + +import ( + "syscall" +) + +func Gettid() int { + return syscall.Gettid() +} diff --git a/src/runtime/vendor/code.cloudfoundry.org/bytefmt/LICENSE b/src/runtime/vendor/code.cloudfoundry.org/bytefmt/LICENSE new file mode 100644 index 000000000..f49a4e16e --- /dev/null +++ b/src/runtime/vendor/code.cloudfoundry.org/bytefmt/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/src/runtime/vendor/code.cloudfoundry.org/bytefmt/NOTICE b/src/runtime/vendor/code.cloudfoundry.org/bytefmt/NOTICE new file mode 100644 index 000000000..8625a7f41 --- /dev/null +++ b/src/runtime/vendor/code.cloudfoundry.org/bytefmt/NOTICE @@ -0,0 +1,20 @@ +Copyright (c) 2015-Present CloudFoundry.org Foundation, Inc. All Rights Reserved. + +This project contains software that is Copyright (c) 2013-2015 Pivotal Software, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +This project may include a number of subcomponents with separate +copyright notices and license terms. Your use of these subcomponents +is subject to the terms and conditions of each subcomponent's license, +as noted in the LICENSE file. diff --git a/src/runtime/vendor/code.cloudfoundry.org/bytefmt/README.md b/src/runtime/vendor/code.cloudfoundry.org/bytefmt/README.md new file mode 100644 index 000000000..778fd386d --- /dev/null +++ b/src/runtime/vendor/code.cloudfoundry.org/bytefmt/README.md @@ -0,0 +1,19 @@ +bytefmt +======= + +**Note**: This repository should be imported as `code.cloudfoundry.org/bytefmt`. + +Human-readable byte formatter. + +Example: + +```go +bytefmt.ByteSize(100.5*bytefmt.MEGABYTE) // returns "100.5M" +bytefmt.ByteSize(uint64(1024)) // returns "1K" +``` + +For documentation, please see http://godoc.org/code.cloudfoundry.org/bytefmt + +## Reporting issues and requesting features + +Please report all issues and feature requests in [cloudfoundry/diego-release](https://github.com/cloudfoundry/diego-release/issues). diff --git a/src/runtime/vendor/code.cloudfoundry.org/bytefmt/bytes.go b/src/runtime/vendor/code.cloudfoundry.org/bytefmt/bytes.go new file mode 100644 index 000000000..739560477 --- /dev/null +++ b/src/runtime/vendor/code.cloudfoundry.org/bytefmt/bytes.go @@ -0,0 +1,121 @@ +// Package bytefmt contains helper methods and constants for converting to and from a human-readable byte format. +// +// bytefmt.ByteSize(100.5*bytefmt.MEGABYTE) // "100.5M" +// bytefmt.ByteSize(uint64(1024)) // "1K" +// +package bytefmt + +import ( + "errors" + "strconv" + "strings" + "unicode" +) + +const ( + BYTE = 1 << (10 * iota) + KILOBYTE + MEGABYTE + GIGABYTE + TERABYTE + PETABYTE + EXABYTE +) + +var invalidByteQuantityError = errors.New("byte quantity must be a positive integer with a unit of measurement like M, MB, MiB, G, GiB, or GB") + +// ByteSize returns a human-readable byte string of the form 10M, 12.5K, and so forth. The following units are available: +// E: Exabyte +// P: Petabyte +// T: Terabyte +// G: Gigabyte +// M: Megabyte +// K: Kilobyte +// B: Byte +// The unit that results in the smallest number greater than or equal to 1 is always chosen. +func ByteSize(bytes uint64) string { + unit := "" + value := float64(bytes) + + switch { + case bytes >= EXABYTE: + unit = "E" + value = value / EXABYTE + case bytes >= PETABYTE: + unit = "P" + value = value / PETABYTE + case bytes >= TERABYTE: + unit = "T" + value = value / TERABYTE + case bytes >= GIGABYTE: + unit = "G" + value = value / GIGABYTE + case bytes >= MEGABYTE: + unit = "M" + value = value / MEGABYTE + case bytes >= KILOBYTE: + unit = "K" + value = value / KILOBYTE + case bytes >= BYTE: + unit = "B" + case bytes == 0: + return "0B" + } + + result := strconv.FormatFloat(value, 'f', 1, 64) + result = strings.TrimSuffix(result, ".0") + return result + unit +} + +// ToMegabytes parses a string formatted by ByteSize as megabytes. +func ToMegabytes(s string) (uint64, error) { + bytes, err := ToBytes(s) + if err != nil { + return 0, err + } + + return bytes / MEGABYTE, nil +} + +// ToBytes parses a string formatted by ByteSize as bytes. Note binary-prefixed and SI prefixed units both mean a base-2 units +// KB = K = KiB = 1024 +// MB = M = MiB = 1024 * K +// GB = G = GiB = 1024 * M +// TB = T = TiB = 1024 * G +// PB = P = PiB = 1024 * T +// EB = E = EiB = 1024 * P +func ToBytes(s string) (uint64, error) { + s = strings.TrimSpace(s) + s = strings.ToUpper(s) + + i := strings.IndexFunc(s, unicode.IsLetter) + + if i == -1 { + return 0, invalidByteQuantityError + } + + bytesString, multiple := s[:i], s[i:] + bytes, err := strconv.ParseFloat(bytesString, 64) + if err != nil || bytes < 0 { + return 0, invalidByteQuantityError + } + + switch multiple { + case "E", "EB", "EIB": + return uint64(bytes * EXABYTE), nil + case "P", "PB", "PIB": + return uint64(bytes * PETABYTE), nil + case "T", "TB", "TIB": + return uint64(bytes * TERABYTE), nil + case "G", "GB", "GIB": + return uint64(bytes * GIGABYTE), nil + case "M", "MB", "MIB": + return uint64(bytes * MEGABYTE), nil + case "K", "KB", "KIB": + return uint64(bytes * KILOBYTE), nil + case "B": + return uint64(bytes), nil + default: + return 0, invalidByteQuantityError + } +} diff --git a/src/runtime/vendor/code.cloudfoundry.org/bytefmt/go.mod b/src/runtime/vendor/code.cloudfoundry.org/bytefmt/go.mod new file mode 100644 index 000000000..dfc6cce21 --- /dev/null +++ b/src/runtime/vendor/code.cloudfoundry.org/bytefmt/go.mod @@ -0,0 +1,8 @@ +module code.cloudfoundry.org/bytefmt + +go 1.16 + +require ( + github.com/onsi/ginkgo v1.16.4 + github.com/onsi/gomega v1.16.0 +) diff --git a/src/runtime/vendor/code.cloudfoundry.org/bytefmt/go.sum b/src/runtime/vendor/code.cloudfoundry.org/bytefmt/go.sum new file mode 100644 index 000000000..a881b8539 --- /dev/null +++ b/src/runtime/vendor/code.cloudfoundry.org/bytefmt/go.sum @@ -0,0 +1,93 @@ +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= +github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4= +github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= +github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= +github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= +github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= +github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= +github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= +github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw= +github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= +github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= +github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= +github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= +github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= +github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= +github.com/onsi/ginkgo v1.16.4 h1:29JGrr5oVBm5ulCWet69zQkzWipVXIol6ygQUe/EzNc= +github.com/onsi/ginkgo v1.16.4/go.mod h1:dX+/inL/fNMqNlz0e9LfyB9TswhZpCVdJM/Z6Vvnwo0= +github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= +github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= +github.com/onsi/gomega v1.16.0 h1:6gjqkI8iiRHMvdccRJM8rVKjCWk6ZIm6FTm3ddIe4/c= +github.com/onsi/gomega v1.16.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAlGdZY= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20210428140749-89ef3d95e781 h1:DzZ89McO9/gWPsQXS/FVKAlG02ZjaQ6AlZRBimEYOd0= +golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk= +golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da h1:b3NXsE2LusjYGGjL5bxEVZZORm/YEFFrWFjR8eFrw/c= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= +google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= +google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= +google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= +google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= +google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.26.0 h1:bxAC2xTBsZGibn2RTntX0oH50xLsqy1OxA9tTL3p/lk= +google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= +gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= +gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= diff --git a/src/runtime/vendor/code.cloudfoundry.org/bytefmt/package.go b/src/runtime/vendor/code.cloudfoundry.org/bytefmt/package.go new file mode 100644 index 000000000..03429300b --- /dev/null +++ b/src/runtime/vendor/code.cloudfoundry.org/bytefmt/package.go @@ -0,0 +1 @@ +package bytefmt // import "code.cloudfoundry.org/bytefmt" diff --git a/src/runtime/vendor/modules.txt b/src/runtime/vendor/modules.txt index e62ed4774..8fedaa19e 100644 --- a/src/runtime/vendor/modules.txt +++ b/src/runtime/vendor/modules.txt @@ -1,3 +1,6 @@ +# code.cloudfoundry.org/bytefmt v0.0.0-20211005130812-5bb3c17173e5 +## explicit +code.cloudfoundry.org/bytefmt # github.com/BurntSushi/toml v0.3.1 ## explicit github.com/BurntSushi/toml diff --git a/src/runtime/virtcontainers/acrn.go b/src/runtime/virtcontainers/acrn.go index 244b78d66..6b2679201 100644 --- a/src/runtime/virtcontainers/acrn.go +++ b/src/runtime/virtcontainers/acrn.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux + // Copyright (c) 2019 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/acrn_arch_base.go b/src/runtime/virtcontainers/acrn_arch_base.go index 5d258c42c..ab547b6bd 100644 --- a/src/runtime/virtcontainers/acrn_arch_base.go +++ b/src/runtime/virtcontainers/acrn_arch_base.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux + // Copyright (c) 2019 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/acrn_test.go b/src/runtime/virtcontainers/acrn_test.go index 45d85fb4e..b92f35a2f 100644 --- a/src/runtime/virtcontainers/acrn_test.go +++ b/src/runtime/virtcontainers/acrn_test.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux + // Copyright (c) 2019 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/agent.go b/src/runtime/virtcontainers/agent.go index afd30aa59..f597d4e47 100644 --- a/src/runtime/virtcontainers/agent.go +++ b/src/runtime/virtcontainers/agent.go @@ -177,7 +177,7 @@ type agent interface { markDead(ctx context.Context) // cleanup removes all on disk information generated by the agent - cleanup(ctx context.Context, s *Sandbox) + cleanup(ctx context.Context) // return data for saving save() persistapi.AgentState diff --git a/src/runtime/virtcontainers/api.go b/src/runtime/virtcontainers/api.go index 14cfbb109..4b85851cf 100644 --- a/src/runtime/virtcontainers/api.go +++ b/src/runtime/virtcontainers/api.go @@ -10,9 +10,9 @@ import ( "runtime" "github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace" + resCtrl "github.com/kata-containers/kata-containers/src/runtime/pkg/resourcecontrol" deviceApi "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/api" deviceConfig "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/config" - "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/cgroups" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/compatoci" vcTypes "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" "github.com/sirupsen/logrus" @@ -39,7 +39,7 @@ func SetLogger(ctx context.Context, logger *logrus.Entry) { deviceApi.SetLogger(virtLog) compatoci.SetLogger(virtLog) deviceConfig.SetLogger(virtLog) - cgroups.SetLogger(virtLog) + resCtrl.SetLogger(virtLog) } // CreateSandbox is the virtcontainers sandbox creation entry point. @@ -83,7 +83,7 @@ func createSandboxFromConfig(ctx context.Context, sandboxConfig SandboxConfig, f }() // Set the sandbox host cgroups. - if err := s.setupCgroups(); err != nil { + if err := s.setupResourceController(); err != nil { return nil, err } diff --git a/src/runtime/virtcontainers/api_test.go b/src/runtime/virtcontainers/api_test.go index 3366041c6..0268ab125 100644 --- a/src/runtime/virtcontainers/api_test.go +++ b/src/runtime/virtcontainers/api_test.go @@ -15,9 +15,9 @@ import ( "testing" ktu "github.com/kata-containers/kata-containers/src/runtime/pkg/katatestutils" + resCtrl "github.com/kata-containers/kata-containers/src/runtime/pkg/resourcecontrol" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/persist/fs" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations" - vccgroups "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/cgroups" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/mock" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/rootless" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" @@ -49,7 +49,7 @@ func newEmptySpec() *specs.Spec { return &specs.Spec{ Linux: &specs.Linux{ Resources: &specs.LinuxResources{}, - CgroupsPath: vccgroups.DefaultCgroupPath, + CgroupsPath: resCtrl.DefaultResourceControllerID, }, Process: &specs.Process{ Capabilities: &specs.LinuxCapabilities{}, @@ -170,10 +170,10 @@ func TestCreateSandboxKataAgentSuccessful(t *testing.T) { url, err := mock.GenerateKataMockHybridVSock() assert.NoError(err) - MockHybridVSockPath = url + defer mock.RemoveKataMockHybridVSock(url) hybridVSockTTRPCMock := mock.HybridVSockTTRPCMock{} - err = hybridVSockTTRPCMock.Start(fmt.Sprintf("mock://%s", url)) + err = hybridVSockTTRPCMock.Start(url) assert.NoError(err) defer hybridVSockTTRPCMock.Stop() diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 4ab1400da..805fda716 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux + // Copyright (c) 2019 Ericsson Eurolab Deutschland GmbH // // SPDX-License-Identifier: Apache-2.0 @@ -168,12 +171,9 @@ type cloudHypervisor struct { } var clhKernelParams = []Param{ - {"root", "/dev/pmem0p1"}, {"panic", "1"}, // upon kernel panic wait 1 second before reboot {"no_timer_check", ""}, // do not Check broken timer IRQ resources {"noreplace-smp", ""}, // do not replace SMP instructions - {"rootflags", "dax,data=ordered,errors=remount-ro ro"}, // mount the root filesystem as readonly - {"rootfstype", "ext4"}, } var clhDebugKernelParams = []Param{ @@ -202,6 +202,34 @@ func (clh *cloudHypervisor) nydusdAPISocketPath(id string) (string, error) { return utils.BuildSocketPath(clh.config.VMStorePath, id, nydusdAPISock) } +func (clh *cloudHypervisor) enableProtection() error { + protection, err := availableGuestProtection() + if err != nil { + return err + } + + switch protection { + case tdxProtection: + firmwarePath, err := clh.config.FirmwareAssetPath() + if err != nil { + return err + } + + if firmwarePath == "" { + return errors.New("Firmware path is not specified") + } + + clh.vmconfig.Tdx = chclient.NewTdxConfig(firmwarePath) + return nil + + case sevProtection: + return errors.New("SEV protection is not supported by Cloud Hypervisor") + + default: + return errors.New("This system doesn't support Confidentian Computing (Guest Protection)") + } +} + // For cloudHypervisor this call only sets the internal structure up. // The VM will be created and started through StartVM(). func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Network, hypervisorConfig *HypervisorConfig) error { @@ -248,23 +276,35 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net // Create the VM config via the constructor to ensure default values are properly assigned clh.vmconfig = *chclient.NewVmConfig(*chclient.NewKernelConfig(kernelPath)) + if clh.config.ConfidentialGuest { + if err := clh.enableProtection(); err != nil { + return err + } + } + // Create the VM memory config via the constructor to ensure default values are properly assigned clh.vmconfig.Memory = chclient.NewMemoryConfig(int64((utils.MemUnit(clh.config.MemorySize) * utils.MiB).ToBytes())) // shared memory should be enabled if using vhost-user(kata uses virtiofsd) clh.vmconfig.Memory.Shared = func(b bool) *bool { return &b }(true) // Enable hugepages if needed clh.vmconfig.Memory.Hugepages = func(b bool) *bool { return &b }(clh.config.HugePages) - hostMemKb, err := GetHostMemorySizeKb(procMemInfo) - if err != nil { - return nil + if !clh.config.ConfidentialGuest { + hostMemKb, err := GetHostMemorySizeKb(procMemInfo) + if err != nil { + return nil + } + // OpenAPI only supports int64 values + clh.vmconfig.Memory.HotplugSize = func(i int64) *int64 { return &i }(int64((utils.MemUnit(hostMemKb) * utils.KiB).ToBytes())) } - // OpenAPI only supports int64 values - clh.vmconfig.Memory.HotplugSize = func(i int64) *int64 { return &i }(int64((utils.MemUnit(hostMemKb) * utils.KiB).ToBytes())) // Set initial amount of cpu's for the virtual machine clh.vmconfig.Cpus = chclient.NewCpusConfig(int32(clh.config.NumVCPUs), int32(clh.config.DefaultMaxVCPUs)) // First take the default parameters defined by this driver - params := clhKernelParams + params := commonNvdimmKernelRootParams + if clh.config.ConfidentialGuest { + params = commonVirtioblkKernelRootParams + } + params = append(params, clhKernelParams...) // Followed by extra debug parameters if debug enabled in configuration file if clh.config.Debug { @@ -288,26 +328,35 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net return err } - initrdPath, err := clh.config.InitrdAssetPath() - if err != nil { - return err - } - if imagePath != "" { - pmem := chclient.NewPmemConfig(imagePath) - *pmem.DiscardWrites = true + if clh.config.ConfidentialGuest { + disk := chclient.NewDiskConfig(imagePath) + disk.SetReadonly(true) - if clh.vmconfig.Pmem != nil { - *clh.vmconfig.Pmem = append(*clh.vmconfig.Pmem, *pmem) + if clh.vmconfig.Disks != nil { + *clh.vmconfig.Disks = append(*clh.vmconfig.Disks, *disk) + } else { + clh.vmconfig.Disks = &[]chclient.DiskConfig{*disk} + } } else { - clh.vmconfig.Pmem = &[]chclient.PmemConfig{*pmem} + pmem := chclient.NewPmemConfig(imagePath) + *pmem.DiscardWrites = true + + if clh.vmconfig.Pmem != nil { + *clh.vmconfig.Pmem = append(*clh.vmconfig.Pmem, *pmem) + } else { + clh.vmconfig.Pmem = &[]chclient.PmemConfig{*pmem} + } } - } else if initrdPath != "" { + } else { + initrdPath, err := clh.config.InitrdAssetPath() + if err != nil { + return err + } + initrd := chclient.NewInitramfsConfig(initrdPath) clh.vmconfig.SetInitramfs(*initrd) - } else { - return errors.New("no image or initrd specified") } // Use serial port as the guest console only in debug mode, @@ -420,10 +469,13 @@ func (clh *cloudHypervisor) StartVM(ctx context.Context, timeout int) error { // virtiofsd are executed by kata-runtime after this call, run with // the SELinux label. If these processes require privileged, we do // notwant to run them under confinement. - if err := label.SetProcessLabel(clh.config.SELinuxProcessLabel); err != nil { - return err + if !clh.config.DisableSeLinux { + + if err := label.SetProcessLabel(clh.config.SELinuxProcessLabel); err != nil { + return err + } + defer label.SetProcessLabel("") } - defer label.SetProcessLabel("") if clh.config.SharedFS == config.VirtioFS || clh.config.SharedFS == config.VirtioFSNydus { clh.Logger().WithField("function", "StartVM").Info("Starting virtiofsDaemon") @@ -586,6 +638,10 @@ func (clh *cloudHypervisor) HotplugAddDevice(ctx context.Context, devInfo interf span, _ := katatrace.Trace(ctx, clh.Logger(), "HotplugAddDevice", clhTracingTags, map[string]string{"sandbox_id": clh.id}) defer span.End() + if clh.config.ConfidentialGuest { + return nil, errors.New("Device hotplug addition is not supported in confidential mode") + } + switch devType { case BlockDev: drive := devInfo.(*config.BlockDrive) @@ -603,6 +659,10 @@ func (clh *cloudHypervisor) HotplugRemoveDevice(ctx context.Context, devInfo int span, _ := katatrace.Trace(ctx, clh.Logger(), "HotplugRemoveDevice", clhTracingTags, map[string]string{"sandbox_id": clh.id}) defer span.End() + if clh.config.ConfidentialGuest { + return nil, errors.New("Device hotplug removal is not supported in confidential mode") + } + var deviceID string switch devType { @@ -857,7 +917,9 @@ func (clh *cloudHypervisor) Capabilities(ctx context.Context) types.Capabilities clh.Logger().WithField("function", "Capabilities").Info("get Capabilities") var caps types.Capabilities caps.SetFsSharingSupport() - caps.SetBlockDeviceHotplugSupport() + if !clh.config.ConfidentialGuest { + caps.SetBlockDeviceHotplugSupport() + } return caps } diff --git a/src/runtime/virtcontainers/clh_test.go b/src/runtime/virtcontainers/clh_test.go index 7fbdd17fa..9bfd2e62e 100644 --- a/src/runtime/virtcontainers/clh_test.go +++ b/src/runtime/virtcontainers/clh_test.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux + // Copyright (c) 2019 Ericsson Eurolab Deutschland G.m.b.H. // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/container.go b/src/runtime/virtcontainers/container.go index 0ee4b8c2c..69cdbe1a9 100644 --- a/src/runtime/virtcontainers/container.go +++ b/src/runtime/virtcontainers/container.go @@ -1,6 +1,3 @@ -//go:build linux -// +build linux - // Copyright (c) 2016 Intel Corporation // Copyright (c) 2014,2015,2016,2017 Docker, Inc. // SPDX-License-Identifier: Apache-2.0 @@ -10,7 +7,6 @@ package virtcontainers import ( "context" - "encoding/hex" "fmt" "io" "os" @@ -29,7 +25,6 @@ import ( "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils" specs "github.com/opencontainers/runtime-spec/specs-go" - "github.com/pkg/errors" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" ) @@ -356,6 +351,7 @@ func (c *Container) Logger() *logrus.Entry { return virtLog.WithFields(logrus.Fields{ "subsystem": "container", "sandbox": c.sandboxID, + "container": c.id, }) } @@ -422,72 +418,6 @@ func (c *Container) setContainerState(state types.StateString) error { return nil } -func (c *Container) shareFiles(ctx context.Context, m Mount, idx int) (string, bool, error) { - randBytes, err := utils.GenerateRandomBytes(8) - if err != nil { - return "", false, err - } - - filename := fmt.Sprintf("%s-%s-%s", c.id, hex.EncodeToString(randBytes), filepath.Base(m.Destination)) - guestDest := filepath.Join(kataGuestSharedDir(), filename) - - // copy file to contaier's rootfs if filesystem sharing is not supported, otherwise - // bind mount it in the shared directory. - caps := c.sandbox.hypervisor.Capabilities(ctx) - if !caps.IsFsSharingSupported() { - c.Logger().Debug("filesystem sharing is not supported, files will be copied") - - fileInfo, err := os.Stat(m.Source) - if err != nil { - return "", false, err - } - - // Ignore the mount if this is not a regular file (excludes - // directory, socket, device, ...) as it cannot be handled by - // a simple copy. But this should not be treated as an error, - // only as a limitation. - if !fileInfo.Mode().IsRegular() { - c.Logger().WithField("ignored-file", m.Source).Debug("Ignoring non-regular file as FS sharing not supported") - return "", true, nil - } - - if err := c.sandbox.agent.copyFile(ctx, m.Source, guestDest); err != nil { - return "", false, err - } - } else { - // These mounts are created in the shared dir - mountDest := filepath.Join(getMountPath(c.sandboxID), filename) - if !m.ReadOnly { - if err := bindMount(c.ctx, m.Source, mountDest, false, "private"); err != nil { - return "", false, err - } - } else { - // For RO mounts, bindmount remount event is not propagated to mount subtrees, - // and it doesn't present in the virtiofsd standalone mount namespace either. - // So we end up a bit tricky: - // 1. make a private ro bind mount to the mount source - // 2. duplicate the ro mount we create in step 1 to mountDest, by making a bind mount. No need to remount with MS_RDONLY here. - // 3. umount the private bind mount created in step 1 - privateDest := filepath.Join(getPrivatePath(c.sandboxID), filename) - - if err := bindMount(c.ctx, m.Source, privateDest, true, "private"); err != nil { - return "", false, err - } - defer func() { - syscall.Unmount(privateDest, syscall.MNT_DETACH|UmountNoFollow) - }() - - if err := bindMount(c.ctx, privateDest, mountDest, false, "private"); err != nil { - return "", false, err - } - } - // Save HostPath mount value into the mount list of the container. - c.mounts[idx].HostPath = mountDest - } - - return guestDest, false, nil -} - // mountSharedDirMounts handles bind-mounts by bindmounting to the host shared // directory which is mounted through virtiofs/9pfs in the VM. // It also updates the container mount list with the HostPath info, and store @@ -502,6 +432,7 @@ func (c *Container) mountSharedDirMounts(ctx context.Context, sharedDirMounts, i } } }() + for idx, m := range c.mounts { // Skip mounting certain system paths from the source on the host side // into the container as it does not make sense to do so. @@ -540,20 +471,18 @@ func (c *Container) mountSharedDirMounts(ctx context.Context, sharedDirMounts, i continue } - var ignore bool - var guestDest string - guestDest, ignore, err = c.shareFiles(ctx, m, idx) + sharedFile, err := c.sandbox.fsShare.ShareFile(ctx, c, &c.mounts[idx]) if err != nil { return storages, err } // Expand the list of mounts to ignore. - if ignore { + if sharedFile == nil { ignoredMounts[m.Source] = Mount{Source: m.Source} continue } sharedDirMount := Mount{ - Source: guestDest, + Source: sharedFile.guestPath, Destination: m.Destination, Type: m.Type, Options: m.Options, @@ -580,11 +509,11 @@ func (c *Container) mountSharedDirMounts(ctx context.Context, sharedDirMounts, i return storages, fmt.Errorf("unable to create watchable path: %s: %v", watchableHostPath, err) } - watchableGuestMount := filepath.Join(kataGuestSharedDir(), "watchable", filepath.Base(guestDest)) + watchableGuestMount := filepath.Join(kataGuestSharedDir(), "watchable", filepath.Base(sharedFile.guestPath)) storage := &grpc.Storage{ Driver: kataWatchableBindDevType, - Source: guestDest, + Source: sharedFile.guestPath, Fstype: "bind", MountPoint: watchableGuestMount, Options: m.Options, @@ -615,7 +544,7 @@ func (c *Container) unmountHostMounts(ctx context.Context) error { span.End() }() - if err = syscall.Unmount(m.HostPath, syscall.MNT_DETACH|UmountNoFollow); err != nil { + if err = c.sandbox.fsShare.UnshareFile(ctx, c, &m); err != nil { c.Logger().WithFields(logrus.Fields{ "host-path": m.HostPath, "error": err, @@ -623,19 +552,6 @@ func (c *Container) unmountHostMounts(ctx context.Context) error { return err } - if m.Type == "bind" { - s, err := os.Stat(m.HostPath) - if err != nil { - return errors.Wrapf(err, "Could not stat host-path %v", m.HostPath) - } - // Remove the empty file or directory - if s.Mode().IsRegular() && s.Size() == 0 { - os.Remove(m.HostPath) - } - if s.Mode().IsDir() { - syscall.Rmdir(m.HostPath) - } - } return nil } @@ -710,8 +626,8 @@ func (c *Container) createBlockDevices(ctx context.Context) error { HostPath: m.Source, ContainerPath: m.Destination, DevType: "b", - Major: int64(unix.Major(stat.Rdev)), - Minor: int64(unix.Minor(stat.Rdev)), + Major: int64(unix.Major(uint64(stat.Rdev))), + Minor: int64(unix.Minor(uint64(stat.Rdev))), ReadOnly: m.ReadOnly, } // Check whether source can be used as a pmem device @@ -866,8 +782,8 @@ func (c *Container) rollbackFailingContainerCreation(ctx context.Context) { c.Logger().WithError(err).Error("rollback failed nydusContainerCleanup()") } } else { - if err := bindUnmountContainerRootfs(ctx, getMountPath(c.sandbox.id), c.id); err != nil { - c.Logger().WithError(err).Error("rollback failed bindUnmountContainerRootfs()") + if err := c.sandbox.fsShare.UnshareRootFilesystem(ctx, c); err != nil { + c.Logger().WithError(err).Error("rollback failed UnshareRootFilesystem()") } } } @@ -1050,7 +966,7 @@ func (c *Container) stop(ctx context.Context, force bool) error { return err } } else { - if err := bindUnmountContainerRootfs(ctx, getMountPath(c.sandbox.id), c.id); err != nil && !force { + if err := c.sandbox.fsShare.UnshareRootFilesystem(ctx, c); err != nil && !force { return err } } @@ -1063,11 +979,6 @@ func (c *Container) stop(ctx context.Context, force bool) error { return err } - shareDir := filepath.Join(getMountPath(c.sandbox.id), c.id) - if err := syscall.Rmdir(shareDir); err != nil { - c.Logger().WithError(err).WithField("share-dir", shareDir).Warn("Could not remove container share dir") - } - // container was killed by force, container MUST change its state // as soon as possible just in case one of below operations fail leaving // the containers in a bad state. @@ -1317,8 +1228,8 @@ func (c *Container) plugDevice(ctx context.Context, devicePath string) error { HostPath: devicePath, ContainerPath: filepath.Join(kataGuestSharedDir(), c.id), DevType: "b", - Major: int64(unix.Major(stat.Rdev)), - Minor: int64(unix.Minor(stat.Rdev)), + Major: int64(unix.Major(uint64(stat.Rdev))), + Minor: int64(unix.Minor(uint64(stat.Rdev))), }) if err != nil { return fmt.Errorf("device manager failed to create rootfs device for %q: %v", devicePath, err) diff --git a/src/runtime/virtcontainers/container_test.go b/src/runtime/virtcontainers/container_test.go index 05b8974a9..b41fcc108 100644 --- a/src/runtime/virtcontainers/container_test.go +++ b/src/runtime/virtcontainers/container_test.go @@ -146,7 +146,7 @@ func TestUnmountHostMountsRemoveBindHostPath(t *testing.T) { return f.Name() } - doUnmountCheck := func(src, dest, hostPath, nonEmptyHostpath, devPath string) { + doUnmountCheck := func(s *Sandbox, src, dest, hostPath, nonEmptyHostpath, devPath string) { mounts := []Mount{ { Source: src, @@ -169,8 +169,10 @@ func TestUnmountHostMountsRemoveBindHostPath(t *testing.T) { } c := Container{ - mounts: mounts, - ctx: context.Background(), + mounts: mounts, + ctx: context.Background(), + id: "fooabr", + sandbox: s, } if err := bindMount(c.ctx, src, hostPath, false, "private"); err != nil { @@ -221,8 +223,21 @@ func TestUnmountHostMountsRemoveBindHostPath(t *testing.T) { hostPath := createFakeMountDir(t, testDir, "host-path") nonEmptyHostpath := createFakeMountDir(t, testDir, "non-empty-host-path") devPath := createFakeMountDir(t, testDir, "dev-hostpath") + // create sandbox for mounting into + sandbox := &Sandbox{ + ctx: context.Background(), + id: "foobar", + config: &SandboxConfig{}, + } + + fsShare, err := NewFilesystemShare(sandbox) + if err != nil { + t.Fatal(err) + } + sandbox.fsShare = fsShare + createFakeMountDir(t, nonEmptyHostpath, "nop") - doUnmountCheck(src, dest, hostPath, nonEmptyHostpath, devPath) + doUnmountCheck(sandbox, src, dest, hostPath, nonEmptyHostpath, devPath) src = createFakeMountFile(t, testDir, "src") dest = createFakeMountFile(t, testDir, "dest") @@ -235,7 +250,7 @@ func TestUnmountHostMountsRemoveBindHostPath(t *testing.T) { } f.WriteString("nop\n") f.Close() - doUnmountCheck(src, dest, hostPath, nonEmptyHostpath, devPath) + doUnmountCheck(sandbox, src, dest, hostPath, nonEmptyHostpath, devPath) } func testSetupFakeRootfs(t *testing.T) (testRawFile, loopDev, mntDir string, err error) { @@ -584,8 +599,14 @@ func TestMountSharedDirMounts(t *testing.T) { }, }, } + + fsShare, err := NewFilesystemShare(sandbox) + assert.Nil(err) + sandbox.fsShare = fsShare + // setup the shared mounts: - k.setupSharedPath(k.ctx, sandbox) + err = sandbox.fsShare.Prepare(sandbox.ctx) + assert.NoError(err) // // Create the mounts that we'll test with diff --git a/src/runtime/virtcontainers/device/config/config.go b/src/runtime/virtcontainers/device/config/config.go index 835059ed6..69be4f583 100644 --- a/src/runtime/virtcontainers/device/config/config.go +++ b/src/runtime/virtcontainers/device/config/config.go @@ -425,8 +425,8 @@ func getVhostUserDevName(dirname string, majorNum, minorNum uint32) (string, err return "", err } - devMajor := unix.Major(devStat.Rdev) - devMinor := unix.Minor(devStat.Rdev) + devMajor := unix.Major(uint64(devStat.Rdev)) + devMinor := unix.Minor(uint64(devStat.Rdev)) if devMajor == majorNum && devMinor == minorNum { return file.Name(), nil } diff --git a/src/runtime/virtcontainers/device/config/pmem.go b/src/runtime/virtcontainers/device/config/pmem.go index 33ce4fdff..81d1da9b5 100644 --- a/src/runtime/virtcontainers/device/config/pmem.go +++ b/src/runtime/virtcontainers/device/config/pmem.go @@ -51,8 +51,8 @@ func PmemDeviceInfo(source, destination string) (*DeviceInfo, error) { device := &DeviceInfo{ ContainerPath: destination, DevType: "b", - Major: int64(unix.Major(stat.Dev)), - Minor: int64(unix.Minor(stat.Dev)), + Major: int64(unix.Major(uint64(stat.Dev))), + Minor: int64(unix.Minor(uint64(stat.Dev))), Pmem: true, DriverOptions: make(map[string]string), } diff --git a/src/runtime/virtcontainers/device/manager/manager_linux_test.go b/src/runtime/virtcontainers/device/manager/manager_linux_test.go new file mode 100644 index 000000000..78773fc5c --- /dev/null +++ b/src/runtime/virtcontainers/device/manager/manager_linux_test.go @@ -0,0 +1,100 @@ +// Copyright (c) 2017 Intel Corporation +// Copyright (c) 2018 Huawei Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +package manager + +import ( + "context" + "fmt" + "os" + "path/filepath" + "testing" + + ktu "github.com/kata-containers/kata-containers/src/runtime/pkg/katatestutils" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/api" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/config" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/drivers" + "github.com/stretchr/testify/assert" + + "golang.org/x/sys/unix" +) + +func TestAttachVhostUserBlkDevice(t *testing.T) { + rootEnabled := true + tc := ktu.NewTestConstraint(false) + if tc.NotValid(ktu.NeedRoot()) { + rootEnabled = false + } + + tmpDir, err := os.MkdirTemp("", "") + dm := &deviceManager{ + blockDriver: VirtioBlock, + devices: make(map[string]api.Device), + vhostUserStoreEnabled: true, + vhostUserStorePath: tmpDir, + } + assert.Nil(t, err) + defer os.RemoveAll(tmpDir) + + vhostUserDevNodePath := filepath.Join(tmpDir, "/block/devices/") + vhostUserSockPath := filepath.Join(tmpDir, "/block/sockets/") + deviceNodePath := filepath.Join(vhostUserDevNodePath, "vhostblk0") + deviceSockPath := filepath.Join(vhostUserSockPath, "vhostblk0") + + err = os.MkdirAll(vhostUserDevNodePath, dirMode) + assert.Nil(t, err) + err = os.MkdirAll(vhostUserSockPath, dirMode) + assert.Nil(t, err) + _, err = os.Create(deviceSockPath) + assert.Nil(t, err) + + // mknod requires root privilege, call mock function for non-root to + // get VhostUserBlk device type. + if rootEnabled == true { + err = unix.Mknod(deviceNodePath, unix.S_IFBLK, int(unix.Mkdev(config.VhostUserBlkMajor, 0))) + assert.Nil(t, err) + } else { + savedFunc := config.GetVhostUserNodeStatFunc + + _, err = os.Create(deviceNodePath) + assert.Nil(t, err) + + config.GetVhostUserNodeStatFunc = func(devNodePath string, + devNodeStat *unix.Stat_t) error { + if deviceNodePath != devNodePath { + return fmt.Errorf("mock GetVhostUserNodeStatFunc error") + } + + devNodeStat.Rdev = unix.Mkdev(config.VhostUserBlkMajor, 0) + return nil + } + + defer func() { + config.GetVhostUserNodeStatFunc = savedFunc + }() + } + + path := "/dev/vda" + deviceInfo := config.DeviceInfo{ + HostPath: deviceNodePath, + ContainerPath: path, + DevType: "b", + Major: config.VhostUserBlkMajor, + Minor: 0, + } + + devReceiver := &api.MockDeviceReceiver{} + device, err := dm.NewDevice(deviceInfo) + assert.Nil(t, err) + _, ok := device.(*drivers.VhostUserBlkDevice) + assert.True(t, ok) + + err = device.Attach(context.Background(), devReceiver) + assert.Nil(t, err) + + err = device.Detach(context.Background(), devReceiver) + assert.Nil(t, err) +} diff --git a/src/runtime/virtcontainers/device/manager/manager_test.go b/src/runtime/virtcontainers/device/manager/manager_test.go index d6fab6d26..f0d7ef974 100644 --- a/src/runtime/virtcontainers/device/manager/manager_test.go +++ b/src/runtime/virtcontainers/device/manager/manager_test.go @@ -8,19 +8,15 @@ package manager import ( "context" - "fmt" "os" "path/filepath" "strconv" "testing" - ktu "github.com/kata-containers/kata-containers/src/runtime/pkg/katatestutils" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/api" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/config" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/drivers" "github.com/stretchr/testify/assert" - - "golang.org/x/sys/unix" ) const fileMode0640 = os.FileMode(0640) @@ -217,83 +213,6 @@ func TestAttachBlockDevice(t *testing.T) { assert.Nil(t, err) } -func TestAttachVhostUserBlkDevice(t *testing.T) { - rootEnabled := true - tc := ktu.NewTestConstraint(false) - if tc.NotValid(ktu.NeedRoot()) { - rootEnabled = false - } - - tmpDir, err := os.MkdirTemp("", "") - dm := &deviceManager{ - blockDriver: VirtioBlock, - devices: make(map[string]api.Device), - vhostUserStoreEnabled: true, - vhostUserStorePath: tmpDir, - } - assert.Nil(t, err) - defer os.RemoveAll(tmpDir) - - vhostUserDevNodePath := filepath.Join(tmpDir, "/block/devices/") - vhostUserSockPath := filepath.Join(tmpDir, "/block/sockets/") - deviceNodePath := filepath.Join(vhostUserDevNodePath, "vhostblk0") - deviceSockPath := filepath.Join(vhostUserSockPath, "vhostblk0") - - err = os.MkdirAll(vhostUserDevNodePath, dirMode) - assert.Nil(t, err) - err = os.MkdirAll(vhostUserSockPath, dirMode) - assert.Nil(t, err) - _, err = os.Create(deviceSockPath) - assert.Nil(t, err) - - // mknod requires root privilege, call mock function for non-root to - // get VhostUserBlk device type. - if rootEnabled == true { - err = unix.Mknod(deviceNodePath, unix.S_IFBLK, int(unix.Mkdev(config.VhostUserBlkMajor, 0))) - assert.Nil(t, err) - } else { - savedFunc := config.GetVhostUserNodeStatFunc - - _, err = os.Create(deviceNodePath) - assert.Nil(t, err) - - config.GetVhostUserNodeStatFunc = func(devNodePath string, - devNodeStat *unix.Stat_t) error { - if deviceNodePath != devNodePath { - return fmt.Errorf("mock GetVhostUserNodeStatFunc error") - } - - devNodeStat.Rdev = unix.Mkdev(config.VhostUserBlkMajor, 0) - return nil - } - - defer func() { - config.GetVhostUserNodeStatFunc = savedFunc - }() - } - - path := "/dev/vda" - deviceInfo := config.DeviceInfo{ - HostPath: deviceNodePath, - ContainerPath: path, - DevType: "b", - Major: config.VhostUserBlkMajor, - Minor: 0, - } - - devReceiver := &api.MockDeviceReceiver{} - device, err := dm.NewDevice(deviceInfo) - assert.Nil(t, err) - _, ok := device.(*drivers.VhostUserBlkDevice) - assert.True(t, ok) - - err = device.Attach(context.Background(), devReceiver) - assert.Nil(t, err) - - err = device.Detach(context.Background(), devReceiver) - assert.Nil(t, err) -} - func TestAttachDetachDevice(t *testing.T) { dm := NewDeviceManager(VirtioSCSI, false, "", nil) diff --git a/src/runtime/virtcontainers/factory/factory_test.go b/src/runtime/virtcontainers/factory/factory_test.go index 9cd5568b7..71af52d71 100644 --- a/src/runtime/virtcontainers/factory/factory_test.go +++ b/src/runtime/virtcontainers/factory/factory_test.go @@ -7,7 +7,6 @@ package factory import ( "context" - "fmt" "os" "testing" @@ -61,10 +60,11 @@ func TestNewFactory(t *testing.T) { url, err := mock.GenerateKataMockHybridVSock() assert.NoError(err) + defer mock.RemoveKataMockHybridVSock(url) vc.MockHybridVSockPath = url hybridVSockTTRPCMock := mock.HybridVSockTTRPCMock{} - err = hybridVSockTTRPCMock.Start(fmt.Sprintf("mock://%s", url)) + err = hybridVSockTTRPCMock.Start(url) assert.NoError(err) defer hybridVSockTTRPCMock.Stop() @@ -179,10 +179,11 @@ func TestFactoryGetVM(t *testing.T) { url, err := mock.GenerateKataMockHybridVSock() assert.NoError(err) + defer mock.RemoveKataMockHybridVSock(url) vc.MockHybridVSockPath = url hybridVSockTTRPCMock := mock.HybridVSockTTRPCMock{} - err = hybridVSockTTRPCMock.Start(fmt.Sprintf("mock://%s", url)) + err = hybridVSockTTRPCMock.Start(url) assert.NoError(err) defer hybridVSockTTRPCMock.Stop() diff --git a/src/runtime/virtcontainers/factory/template/template_test.go b/src/runtime/virtcontainers/factory/template/template_test.go index 9c8d5805a..55c7bc596 100644 --- a/src/runtime/virtcontainers/factory/template/template_test.go +++ b/src/runtime/virtcontainers/factory/template/template_test.go @@ -51,10 +51,11 @@ func TestTemplateFactory(t *testing.T) { url, err := mock.GenerateKataMockHybridVSock() assert.NoError(err) + defer mock.RemoveKataMockHybridVSock(url) vc.MockHybridVSockPath = url hybridVSockTTRPCMock := mock.HybridVSockTTRPCMock{} - err = hybridVSockTTRPCMock.Start(fmt.Sprintf("mock://%s", url)) + err = hybridVSockTTRPCMock.Start(url) assert.NoError(err) defer hybridVSockTTRPCMock.Stop() diff --git a/src/runtime/virtcontainers/fc.go b/src/runtime/virtcontainers/fc.go index 34a44692d..6137e32cf 100644 --- a/src/runtime/virtcontainers/fc.go +++ b/src/runtime/virtcontainers/fc.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux + // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 @@ -792,10 +795,13 @@ func (fc *firecracker) StartVM(ctx context.Context, timeout int) error { // are executed by kata-runtime after this call, run with the SELinux // label. If these processes require privileged, we do not want to run // them under confinement. - if err := label.SetProcessLabel(fc.config.SELinuxProcessLabel); err != nil { - return err + if !fc.config.DisableSeLinux { + + if err := label.SetProcessLabel(fc.config.SELinuxProcessLabel); err != nil { + return err + } + defer label.SetProcessLabel("") } - defer label.SetProcessLabel("") err = fc.fcInit(ctx, fcTimeout) if err != nil { diff --git a/src/runtime/virtcontainers/fc_metrics.go b/src/runtime/virtcontainers/fc_metrics.go index f97501fc7..0462542fd 100644 --- a/src/runtime/virtcontainers/fc_metrics.go +++ b/src/runtime/virtcontainers/fc_metrics.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux + // Copyright (c) 2020 Ant Group // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/fc_test.go b/src/runtime/virtcontainers/fc_test.go index 215dfdb74..f2b099f1d 100644 --- a/src/runtime/virtcontainers/fc_test.go +++ b/src/runtime/virtcontainers/fc_test.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux + // Copyright (c) 2019 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/fs_share.go b/src/runtime/virtcontainers/fs_share.go new file mode 100644 index 000000000..fa8374c3b --- /dev/null +++ b/src/runtime/virtcontainers/fs_share.go @@ -0,0 +1,77 @@ +// Copyright (c) 2022 Apple Inc. +// +// SPDX-License-Identifier: Apache-2.0 +// + +package virtcontainers + +import ( + "context" + + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols/grpc" +) + +// fsShareTracingTags defines tags for the trace span +var fsShareTracingTags = map[string]string{ + "source": "runtime", + "package": "virtcontainers", + "subsystem": "fs_share", +} + +// SharedFile represents the outcome of a host filesystem sharing +// operation. +type SharedFile struct { + storage *grpc.Storage + guestPath string +} + +type FilesystemSharer interface { + // Prepare will set the host filesystem up, making it ready + // to share container files and directories with the guest. + // It will be called before any container is running. + // + // For example, the Linux implementation would create and + // prepare the host shared folders, and also make all + // sandbox mounts ready to be shared. + // + // Implementation of this method must be idempotent and be + // ready to potentially be called several times in a row, + // without symmetric calls to Cleanup in between. + Prepare(context.Context) error + + // Cleanup cleans the host filesystem up from the initial + // setup created by Prepare. + // It will be called after all containers are terminated. + // + // Implementation of this method must be idempotent and be + // ready to potentially be called several times in a row, + // without symmetric calls to Prepare in between. + Cleanup(context.Context) error + + // ShareFile shares a file (a regular file or a directory) + // from the host filesystem with a container running in the + // guest. The host file to be shared is described by the + // Mount argument. + // This method should be called for each container file to + // be shared with the guest. + // + // The returned SharedFile pointer describes how the file + // should be shared between the host and the guest. If it + // is nil, then the shared filed described by the Mount + // argument will be ignored by the guest, i.e. it will NOT + // be shared with the guest. + ShareFile(context.Context, *Container, *Mount) (*SharedFile, error) + + // UnshareFile stops sharing a container file, described by + // the Mount argument. + UnshareFile(context.Context, *Container, *Mount) error + + // ShareRootFilesystem shares a container bundle rootfs with + // the Kata guest, allowing the kata agent to eventually start + // the container from that shared rootfs. + ShareRootFilesystem(context.Context, *Container) (*SharedFile, error) + + // UnshareRootFilesystem stops sharing a container bundle + // rootfs. + UnshareRootFilesystem(context.Context, *Container) error +} diff --git a/src/runtime/virtcontainers/fs_share_linux.go b/src/runtime/virtcontainers/fs_share_linux.go new file mode 100644 index 000000000..cec3d154e --- /dev/null +++ b/src/runtime/virtcontainers/fs_share_linux.go @@ -0,0 +1,484 @@ +// Copyright (c) 2016 Intel Corporation +// Copyright (c) 2014,2015,2016,2017 Docker, Inc. +// Copyright (c) 2022 Apple Inc. +// +// SPDX-License-Identifier: Apache-2.0 +// + +package virtcontainers + +import ( + "context" + "encoding/hex" + "fmt" + "os" + "path/filepath" + "sync" + "syscall" + + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + + "github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/config" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols/grpc" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils" +) + +func unmountNoFollow(path string) error { + return syscall.Unmount(path, syscall.MNT_DETACH|UmountNoFollow) +} + +type FilesystemShare struct { + sandbox *Sandbox + sync.Mutex + prepared bool +} + +func NewFilesystemShare(s *Sandbox) (FilesystemSharer, error) { + return &FilesystemShare{ + prepared: false, + sandbox: s, + }, nil +} + +// Logger returns a logrus logger appropriate for logging Filesystem sharing messages +func (f *FilesystemShare) Logger() *logrus.Entry { + return virtLog.WithFields(logrus.Fields{ + "subsystem": "filesystem share", + "sandbox": f.sandbox.ID(), + }) +} + +func (f *FilesystemShare) prepareBindMounts(ctx context.Context) error { + span, ctx := katatrace.Trace(ctx, f.Logger(), "setupBindMounts", fsShareTracingTags) + defer span.End() + + var err error + + if len(f.sandbox.config.SandboxBindMounts) == 0 { + return nil + } + + // Create subdirectory in host shared path for sandbox mounts + sandboxMountDir := filepath.Join(getMountPath(f.sandbox.ID()), sandboxMountsDir) + sandboxShareDir := filepath.Join(GetSharePath(f.sandbox.ID()), sandboxMountsDir) + if err := os.MkdirAll(sandboxMountDir, DirMode); err != nil { + return fmt.Errorf("Creating sandbox shared mount directory: %v: %w", sandboxMountDir, err) + } + var mountedList []string + defer func() { + if err != nil { + for _, mnt := range mountedList { + if derr := unmountNoFollow(mnt); derr != nil { + f.Logger().WithError(derr).Errorf("Cleanup: couldn't unmount %s", mnt) + } + } + if derr := os.RemoveAll(sandboxMountDir); derr != nil { + f.Logger().WithError(derr).Errorf("Cleanup: failed to remove %s", sandboxMountDir) + } + + } + }() + + for _, m := range f.sandbox.config.SandboxBindMounts { + mountDest := filepath.Join(sandboxMountDir, filepath.Base(m)) + // bind-mount each sandbox mount that's defined into the sandbox mounts dir + if err := bindMount(ctx, m, mountDest, true, "private"); err != nil { + return fmt.Errorf("Mounting sandbox directory: %v to %v: %w", m, mountDest, err) + } + mountedList = append(mountedList, mountDest) + + mountDest = filepath.Join(sandboxShareDir, filepath.Base(m)) + if err := remountRo(ctx, mountDest); err != nil { + return fmt.Errorf("remount sandbox directory: %v to %v: %w", m, mountDest, err) + } + } + + return nil +} + +func (f *FilesystemShare) cleanupBindMounts(ctx context.Context) error { + if f.sandbox.config == nil || len(f.sandbox.config.SandboxBindMounts) == 0 { + return nil + } + + var retErr error + bindmountShareDir := filepath.Join(getMountPath(f.sandbox.ID()), sandboxMountsDir) + for _, m := range f.sandbox.config.SandboxBindMounts { + mountPath := filepath.Join(bindmountShareDir, filepath.Base(m)) + if err := unmountNoFollow(mountPath); err != nil { + if retErr == nil { + retErr = err + } + f.Logger().WithError(err).Errorf("Failed to unmount sandbox bindmount: %v", mountPath) + } + } + if err := os.RemoveAll(bindmountShareDir); err != nil { + if retErr == nil { + retErr = err + } + f.Logger().WithError(err).Errorf("Failed to remove sandbox bindmount directory: %s", bindmountShareDir) + } + + return retErr +} + +func (f *FilesystemShare) Prepare(ctx context.Context) error { + var err error + + span, ctx := katatrace.Trace(ctx, f.Logger(), "prepare", fsShareTracingTags) + defer span.End() + + f.Lock() + defer f.Unlock() + + // Prepare is idempotent, i.e. can be called multiple times in a row, without failing + // and without modifying the filesystem state after the first call. + if f.prepared { + f.Logger().Warn("Calling Prepare() on an already prepared filesystem") + return nil + } + + // Toggle prepared to true if everything went fine. + defer func() { + if err == nil { + f.prepared = true + } + }() + + // create shared path structure + sharePath := GetSharePath(f.sandbox.ID()) + mountPath := getMountPath(f.sandbox.ID()) + if err = os.MkdirAll(sharePath, sharedDirMode); err != nil { + return err + } + if err = os.MkdirAll(mountPath, DirMode); err != nil { + return err + } + + // slave mount so that future mountpoints under mountPath are shown in sharePath as well + if err = bindMount(ctx, mountPath, sharePath, true, "slave"); err != nil { + return err + } + defer func() { + if err != nil { + if umountErr := unmountNoFollow(sharePath); umountErr != nil { + f.Logger().WithError(umountErr).Errorf("failed to unmount vm share path %s", sharePath) + } + } + }() + + // Setup sandbox bindmounts, if specified. + if err = f.prepareBindMounts(ctx); err != nil { + return err + } + + return nil +} + +func (f *FilesystemShare) Cleanup(ctx context.Context) error { + var err error + + f.Lock() + defer f.Unlock() + + // Cleanup is idempotent, i.e. can be called multiple times in a row, without failing + // and without modifying the filesystem state after the first call. + if !f.prepared { + f.Logger().Warn("Calling Cleanup() on an already cleaned up filesystem") + return nil + } + + // Toggle prepared to false if everything went fine. + defer func() { + if err == nil { + f.prepared = false + } + }() + + // Unmount all the sandbox bind mounts. + if err = f.cleanupBindMounts(ctx); err != nil { + return err + } + + // Unmount shared path + path := GetSharePath(f.sandbox.ID()) + f.Logger().WithField("path", path).Infof("Cleanup agent") + if err = unmountNoFollow(path); err != nil { + f.Logger().WithError(err).Errorf("failed to unmount vm share path %s", path) + return err + } + + // Unmount mount path + path = getMountPath(f.sandbox.ID()) + if err = bindUnmountAllRootfs(ctx, path, f.sandbox); err != nil { + f.Logger().WithError(err).Errorf("failed to unmount vm mount path %s", path) + return err + } + if err = os.RemoveAll(getSandboxPath(f.sandbox.ID())); err != nil { + f.Logger().WithError(err).Errorf("failed to Cleanup vm path %s", getSandboxPath(f.sandbox.ID())) + return err + } + + return nil +} + +func (f *FilesystemShare) ShareFile(ctx context.Context, c *Container, m *Mount) (*SharedFile, error) { + randBytes, err := utils.GenerateRandomBytes(8) + if err != nil { + return nil, err + } + + filename := fmt.Sprintf("%s-%s-%s", c.id, hex.EncodeToString(randBytes), filepath.Base(m.Destination)) + guestPath := filepath.Join(kataGuestSharedDir(), filename) + + // copy file to container's rootfs if filesystem sharing is not supported, otherwise + // bind mount it in the shared directory. + caps := f.sandbox.hypervisor.Capabilities(ctx) + if !caps.IsFsSharingSupported() { + f.Logger().Debug("filesystem sharing is not supported, files will be copied") + + fileInfo, err := os.Stat(m.Source) + if err != nil { + return nil, err + } + + // Ignore the mount if this is not a regular file (excludes + // directory, socket, device, ...) as it cannot be handled by + // a simple copy. But this should not be treated as an error, + // only as a limitation. + if !fileInfo.Mode().IsRegular() { + f.Logger().WithField("ignored-file", m.Source).Debug("Ignoring non-regular file as FS sharing not supported") + return nil, nil + } + + if err := f.sandbox.agent.copyFile(ctx, m.Source, guestPath); err != nil { + return nil, err + } + } else { + // These mounts are created in the shared dir + mountDest := filepath.Join(getMountPath(f.sandbox.ID()), filename) + if !m.ReadOnly { + if err := bindMount(ctx, m.Source, mountDest, false, "private"); err != nil { + return nil, err + } + } else { + // For RO mounts, bindmount remount event is not propagated to mount subtrees, + // and it doesn't present in the virtiofsd standalone mount namespace either. + // So we end up a bit tricky: + // 1. make a private ro bind mount to the mount source + // 2. duplicate the ro mount we create in step 1 to mountDest, by making a bind mount. No need to remount with MS_RDONLY here. + // 3. umount the private bind mount created in step 1 + privateDest := filepath.Join(getPrivatePath(f.sandbox.ID()), filename) + + if err := bindMount(ctx, m.Source, privateDest, true, "private"); err != nil { + return nil, err + } + defer func() { + unmountNoFollow(privateDest) + }() + + if err := bindMount(ctx, privateDest, mountDest, false, "private"); err != nil { + return nil, err + } + } + + // Save HostPath mount value into the passed mount + m.HostPath = mountDest + } + + return &SharedFile{ + guestPath: guestPath, + }, nil +} + +func (f *FilesystemShare) UnshareFile(ctx context.Context, c *Container, m *Mount) error { + if err := unmountNoFollow(m.HostPath); err != nil { + return err + } + + if m.Type == "bind" { + s, err := os.Stat(m.HostPath) + if err != nil { + return errors.Wrapf(err, "Could not stat host-path %v", m.HostPath) + } + // Remove the empty file or directory + if s.Mode().IsRegular() && s.Size() == 0 { + os.Remove(m.HostPath) + } + if s.Mode().IsDir() { + syscall.Rmdir(m.HostPath) + } + } + + return nil +} + +func (f *FilesystemShare) shareRootFilesystemWithNydus(ctx context.Context, c *Container) (*SharedFile, error) { + rootfsGuestPath := filepath.Join(kataGuestSharedDir(), c.id, c.rootfsSuffix) + virtiofsDaemon, err := getVirtiofsDaemonForNydus(f.sandbox) + if err != nil { + return nil, err + } + extraOption, err := parseExtraOption(c.rootFs.Options) + if err != nil { + return nil, err + } + f.Logger().Infof("Nydus option: %v", extraOption) + mountOpt := &MountOption{ + mountpoint: rafsMountPath(c.id), + source: extraOption.Source, + config: extraOption.Config, + } + + // mount lowerdir to guest /run/kata-containers/shared/images//lowerdir + if err := virtiofsDaemon.Mount(*mountOpt); err != nil { + return nil, err + } + rootfs := &grpc.Storage{} + containerShareDir := filepath.Join(getMountPath(f.sandbox.ID()), c.id) + + // mkdir rootfs, guest at /run/kata-containers/shared/containers//rootfs + rootfsDir := filepath.Join(containerShareDir, c.rootfsSuffix) + if err := os.MkdirAll(rootfsDir, DirMode); err != nil { + return nil, err + } + + // bindmount snapshot dir which snapshotter allocated + // to guest /run/kata-containers/shared/containers//snapshotdir + snapshotShareDir := filepath.Join(containerShareDir, snapshotDir) + if err := bindMount(ctx, extraOption.Snapshotdir, snapshotShareDir, true, "slave"); err != nil { + return nil, err + } + + // so rootfs = overlay(upperdir, workerdir, lowerdir) + rootfs.MountPoint = rootfsGuestPath + rootfs.Source = typeOverlayFS + rootfs.Fstype = typeOverlayFS + rootfs.Driver = kataOverlayDevType + rootfs.Options = append(rootfs.Options, fmt.Sprintf("%s=%s", upperDir, filepath.Join(kataGuestSharedDir(), c.id, snapshotDir, "fs"))) + rootfs.Options = append(rootfs.Options, fmt.Sprintf("%s=%s", workDir, filepath.Join(kataGuestSharedDir(), c.id, snapshotDir, "work"))) + rootfs.Options = append(rootfs.Options, fmt.Sprintf("%s=%s", lowerDir, filepath.Join(kataGuestNydusImageDir(), c.id, lowerDir))) + rootfs.Options = append(rootfs.Options, "index=off") + f.Logger().Infof("Nydus rootfs info: %#v\n", rootfs) + + return &SharedFile{ + storage: rootfs, + guestPath: rootfsGuestPath, + }, nil +} + +//func (c *Container) shareRootfs(ctx context.Context) (*grpc.Storage, string, error) { +func (f *FilesystemShare) ShareRootFilesystem(ctx context.Context, c *Container) (*SharedFile, error) { + rootfsGuestPath := filepath.Join(kataGuestSharedDir(), c.id, c.rootfsSuffix) + + // In the confidential computing, there is no Image information on the host, + // so there is no Rootfs.Target. + if f.sandbox.config.ServiceOffload && c.rootFs.Target == "" { + return &SharedFile{ + storage: nil, + guestPath: rootfsGuestPath, + }, nil + } + + if c.rootFs.Type == NydusRootFSType { + return f.shareRootFilesystemWithNydus(ctx, c) + } + + if c.state.Fstype != "" && c.state.BlockDeviceID != "" { + // The rootfs storage volume represents the container rootfs + // mount point inside the guest. + // It can be a block based device (when using block based container + // overlay on the host) mount or a 9pfs one (for all other overlay + // implementations). + rootfsStorage := &grpc.Storage{} + + // This is a block based device rootfs. + device := f.sandbox.devManager.GetDeviceByID(c.state.BlockDeviceID) + if device == nil { + f.Logger().WithField("device", c.state.BlockDeviceID).Error("failed to find device by id") + return nil, fmt.Errorf("failed to find device by id %q", c.state.BlockDeviceID) + } + + blockDrive, ok := device.GetDeviceInfo().(*config.BlockDrive) + if !ok || blockDrive == nil { + f.Logger().Error("malformed block drive") + return nil, fmt.Errorf("malformed block drive") + } + switch { + case f.sandbox.config.HypervisorConfig.BlockDeviceDriver == config.VirtioMmio: + rootfsStorage.Driver = kataMmioBlkDevType + rootfsStorage.Source = blockDrive.VirtPath + case f.sandbox.config.HypervisorConfig.BlockDeviceDriver == config.VirtioBlockCCW: + rootfsStorage.Driver = kataBlkCCWDevType + rootfsStorage.Source = blockDrive.DevNo + case f.sandbox.config.HypervisorConfig.BlockDeviceDriver == config.VirtioBlock: + rootfsStorage.Driver = kataBlkDevType + rootfsStorage.Source = blockDrive.PCIPath.String() + case f.sandbox.config.HypervisorConfig.BlockDeviceDriver == config.VirtioSCSI: + rootfsStorage.Driver = kataSCSIDevType + rootfsStorage.Source = blockDrive.SCSIAddr + default: + return nil, fmt.Errorf("Unknown block device driver: %s", f.sandbox.config.HypervisorConfig.BlockDeviceDriver) + } + + // We can't use filepath.Dir(rootfsGuestPath) (The rootfs parent) because + // with block devices the rootfsSuffix may not be set. + // So we have to build the bundle path explicitly. + rootfsStorage.MountPoint = filepath.Join(kataGuestSharedDir(), c.id) + rootfsStorage.Fstype = c.state.Fstype + + if c.state.Fstype == "xfs" { + rootfsStorage.Options = []string{"nouuid"} + } + + // Ensure container mount destination exists + // TODO: remove dependency on shared fs path. shared fs is just one kind of storage source. + // we should not always use shared fs path for all kinds of storage. Instead, all storage + // should be bind mounted to a tmpfs path for containers to use. + if err := os.MkdirAll(filepath.Join(getMountPath(f.sandbox.ID()), c.id, c.rootfsSuffix), DirMode); err != nil { + return nil, err + } + + return &SharedFile{ + storage: rootfsStorage, + guestPath: rootfsGuestPath, + }, nil + } + + // This is not a block based device rootfs. We are going to bind mount it into the shared drive + // between the host and the guest. + // With virtiofs/9pfs we don't need to ask the agent to mount the rootfs as the shared directory + // (kataGuestSharedDir) is already mounted in the guest. We only need to mount the rootfs from + // the host and it will show up in the guest. + if err := bindMountContainerRootfs(ctx, getMountPath(f.sandbox.ID()), c.id, c.rootFs.Target, false); err != nil { + return nil, err + } + + return &SharedFile{ + storage: nil, + guestPath: rootfsGuestPath, + }, nil +} + +func (f *FilesystemShare) UnshareRootFilesystem(ctx context.Context, c *Container) error { + if c.rootFs.Type == NydusRootFSType { + if err2 := nydusContainerCleanup(ctx, getMountPath(c.sandbox.id), c); err2 != nil { + f.Logger().WithError(err2).Error("rollback failed nydusContainerCleanup") + } + } else { + if err := bindUnmountContainerRootfs(ctx, getMountPath(f.sandbox.ID()), c.id); err != nil { + return err + } + } + + // Remove the shared directory for this container. + shareDir := filepath.Join(getMountPath(f.sandbox.ID()), c.id) + if err := syscall.Rmdir(shareDir); err != nil { + f.Logger().WithError(err).WithField("share-dir", shareDir).Warn("Could not remove container share dir") + } + + return nil + +} diff --git a/src/runtime/virtcontainers/fs_share_linux_test.go b/src/runtime/virtcontainers/fs_share_linux_test.go new file mode 100644 index 000000000..704cb4b88 --- /dev/null +++ b/src/runtime/virtcontainers/fs_share_linux_test.go @@ -0,0 +1,102 @@ +// Copyright (c) 2018 Intel Corporation +// Copyright (c) 2022 Apple Inc. +// +// SPDX-License-Identifier: Apache-2.0 +// + +package virtcontainers + +import ( + "context" + "os" + "path" + "path/filepath" + "syscall" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestSandboxSharedFilesystem(t *testing.T) { + if os.Getuid() != 0 { + t.Skip("Test disabled as requires root user") + } + + assert := assert.New(t) + // create temporary files to mount: + testMountPath, err := os.MkdirTemp("", "sandbox-test") + assert.NoError(err) + defer os.RemoveAll(testMountPath) + + // create a new shared directory for our test: + kataHostSharedDirSaved := kataHostSharedDir + testHostDir, err := os.MkdirTemp("", "kata-Cleanup") + assert.NoError(err) + kataHostSharedDir = func() string { + return testHostDir + } + defer func() { + kataHostSharedDir = kataHostSharedDirSaved + }() + + m1Path := filepath.Join(testMountPath, "foo.txt") + f1, err := os.Create(m1Path) + assert.NoError(err) + defer f1.Close() + + m2Path := filepath.Join(testMountPath, "bar.txt") + f2, err := os.Create(m2Path) + assert.NoError(err) + defer f2.Close() + + // create sandbox for mounting into + sandbox := &Sandbox{ + ctx: context.Background(), + id: "foobar", + config: &SandboxConfig{ + SandboxBindMounts: []string{m1Path, m2Path}, + }, + } + + fsShare, err := NewFilesystemShare(sandbox) + assert.Nil(err) + sandbox.fsShare = fsShare + + // make the shared directory for our test: + dir := kataHostSharedDir() + err = os.MkdirAll(path.Join(dir, sandbox.id), 0777) + assert.Nil(err) + defer os.RemoveAll(dir) + + // Test the prepare function. We expect it to succeed + err = sandbox.fsShare.Prepare(sandbox.ctx) + assert.NoError(err) + + // Test the Cleanup function. We expect it to succeed for the mount to be removed. + err = sandbox.fsShare.Cleanup(sandbox.ctx) + assert.NoError(err) + + // After successful Cleanup, verify there are not any mounts left behind. + stat := syscall.Stat_t{} + mount1CheckPath := filepath.Join(getMountPath(sandbox.id), sandboxMountsDir, filepath.Base(m1Path)) + err = syscall.Stat(mount1CheckPath, &stat) + assert.Error(err) + assert.True(os.IsNotExist(err)) + + mount2CheckPath := filepath.Join(getMountPath(sandbox.id), sandboxMountsDir, filepath.Base(m2Path)) + err = syscall.Stat(mount2CheckPath, &stat) + assert.Error(err) + assert.True(os.IsNotExist(err)) + + // Verify that Prepare is idempotent. + err = sandbox.fsShare.Prepare(sandbox.ctx) + assert.NoError(err) + err = sandbox.fsShare.Prepare(sandbox.ctx) + assert.NoError(err) + + // Verify that Cleanup is idempotent. + err = sandbox.fsShare.Cleanup(sandbox.ctx) + assert.NoError(err) + err = sandbox.fsShare.Cleanup(sandbox.ctx) + assert.NoError(err) +} diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index 175342016..cebc51570 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -14,10 +14,12 @@ import ( "strconv" "strings" + "github.com/pkg/errors" + + "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm" hv "github.com/kata-containers/kata-containers/src/runtime/pkg/hypervisors" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/config" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" - "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils" "github.com/sirupsen/logrus" ) @@ -69,14 +71,17 @@ const ( // MinHypervisorMemory is the minimum memory required for a VM. MinHypervisorMemory = 256 + + defaultMsize9p = 8192 ) var ( - hvLogger = logrus.WithField("source", "virtcontainers/hypervisor") + hvLogger = logrus.WithField("source", "virtcontainers/hypervisor") + noGuestMemHotplugErr error = errors.New("guest memory hotplug not supported") ) // In some architectures the maximum number of vCPUs depends on the number of physical cores. -var defaultMaxQemuVCPUs = MaxQemuVCPUs() +var defaultMaxVCPUs = govmm.MaxVCPUs() // agnostic list of kernel root parameters for NVDIMM var commonNvdimmKernelRootParams = []Param{ //nolint: unused, deadcode, varcheck @@ -192,25 +197,6 @@ func (hType *HypervisorType) String() string { } } -// NewHypervisor returns an hypervisor from a hypervisor type. -func NewHypervisor(hType HypervisorType) (Hypervisor, error) { - - switch hType { - case QemuHypervisor: - return &qemu{}, nil - case FirecrackerHypervisor: - return &firecracker{}, nil - case AcrnHypervisor: - return &Acrn{}, nil - case ClhHypervisor: - return &cloudHypervisor{}, nil - case MockHypervisor: - return &mockHypervisor{}, nil - default: - return nil, fmt.Errorf("Unknown hypervisor type %s", hType) - } -} - // GetHypervisorSocketTemplate returns the full "template" path to the // hypervisor socket. If the specified hypervisor doesn't use a socket, // an empty string is returned. @@ -512,6 +498,9 @@ type HypervisorConfig struct { // Disable seccomp from the hypervisor process DisableSeccomp bool + + // Disable selinux from the hypervisor process + DisableSeLinux bool } // vcpu mapping from vcpu number to thread number @@ -574,8 +563,13 @@ func (conf *HypervisorConfig) Valid() error { conf.BlockDeviceDriver = config.VirtioBlockCCW } - if conf.DefaultMaxVCPUs == 0 || conf.DefaultMaxVCPUs > defaultMaxQemuVCPUs { - conf.DefaultMaxVCPUs = defaultMaxQemuVCPUs + if conf.DefaultMaxVCPUs == 0 || conf.DefaultMaxVCPUs > defaultMaxVCPUs { + conf.DefaultMaxVCPUs = defaultMaxVCPUs + } + + if conf.ConfidentialGuest && conf.NumVCPUs != conf.DefaultMaxVCPUs { + hvLogger.Warnf("Confidential guests do not support hotplugging of vCPUs. Setting DefaultMaxVCPUs to NumVCPUs (%d)", conf.NumVCPUs) + conf.DefaultMaxVCPUs = conf.NumVCPUs } if conf.Msize9p == 0 && conf.SharedFS != config.VirtioFS { @@ -894,17 +888,55 @@ func GetHypervisorPid(h Hypervisor) int { return pids[0] } -func generateVMSocket(id string, vmStogarePath string) (interface{}, error) { - vhostFd, contextID, err := utils.FindContextID() - if err != nil { - return nil, err - } +// Kind of guest protection +type guestProtection uint8 - return types.VSock{ - VhostFd: vhostFd, - ContextID: contextID, - Port: uint32(vSockPort), - }, nil +const ( + noneProtection guestProtection = iota + + //Intel Trust Domain Extensions + //https://software.intel.com/content/www/us/en/develop/articles/intel-trust-domain-extensions.html + // Exclude from lint checking for it won't be used on arm64 code + tdxProtection + + // AMD Secure Encrypted Virtualization + // https://developer.amd.com/sev/ + // Exclude from lint checking for it won't be used on arm64 code + sevProtection + + // IBM POWER 9 Protected Execution Facility + // https://www.kernel.org/doc/html/latest/powerpc/ultravisor.html + // Exclude from lint checking for it won't be used on arm64 code + pefProtection + + // IBM Secure Execution (IBM Z & LinuxONE) + // https://www.kernel.org/doc/html/latest/virt/kvm/s390-pv.html + // Exclude from lint checking for it won't be used on arm64 code + seProtection +) + +var guestProtectionStr = [...]string{ + noneProtection: "none", + pefProtection: "pef", + seProtection: "se", + sevProtection: "sev", + tdxProtection: "tdx", +} + +func (gp guestProtection) String() string { + return guestProtectionStr[gp] +} + +func genericAvailableGuestProtections() (protections []string) { + return +} + +func AvailableGuestProtections() (protections []string) { + gp, err := availableGuestProtection() + if err != nil || gp == noneProtection { + return genericAvailableGuestProtections() + } + return []string{gp.String()} } // hypervisor is the virtcontainers hypervisor interface. diff --git a/src/runtime/virtcontainers/hypervisor_linux.go b/src/runtime/virtcontainers/hypervisor_linux.go new file mode 100644 index 000000000..1fd0375b8 --- /dev/null +++ b/src/runtime/virtcontainers/hypervisor_linux.go @@ -0,0 +1,45 @@ +// Copyright (c) 2021 Apple Inc. +// +// SPDX-License-Identifier: Apache-2.0 +// + +package virtcontainers + +import ( + "fmt" + + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils" +) + +func generateVMSocket(id string, vmStogarePath string) (interface{}, error) { + vhostFd, contextID, err := utils.FindContextID() + if err != nil { + return nil, err + } + + return types.VSock{ + VhostFd: vhostFd, + ContextID: contextID, + Port: uint32(vSockPort), + }, nil +} + +// NewHypervisor returns an hypervisor from a hypervisor type. +func NewHypervisor(hType HypervisorType) (Hypervisor, error) { + + switch hType { + case QemuHypervisor: + return &qemu{}, nil + case FirecrackerHypervisor: + return &firecracker{}, nil + case AcrnHypervisor: + return &Acrn{}, nil + case ClhHypervisor: + return &cloudHypervisor{}, nil + case MockHypervisor: + return &mockHypervisor{}, nil + default: + return nil, fmt.Errorf("Unknown hypervisor type %s", hType) + } +} diff --git a/src/runtime/virtcontainers/hypervisor_amd64.go b/src/runtime/virtcontainers/hypervisor_linux_amd64.go similarity index 86% rename from src/runtime/virtcontainers/hypervisor_amd64.go rename to src/runtime/virtcontainers/hypervisor_linux_amd64.go index 97a30ea01..38313d05b 100644 --- a/src/runtime/virtcontainers/hypervisor_amd64.go +++ b/src/runtime/virtcontainers/hypervisor_linux_amd64.go @@ -7,6 +7,14 @@ package virtcontainers import "os" +const ( + tdxSysFirmwareDir = "/sys/firmware/tdx_seam/" + + tdxCPUFlag = "tdx" + + sevKvmParameterPath = "/sys/module/kvm_amd/parameters/sev" +) + // Implementation of this function is architecture specific func availableGuestProtection() (guestProtection, error) { flags, err := CPUFlags(procCPUInfo) diff --git a/src/runtime/virtcontainers/hypervisor_arm64.go b/src/runtime/virtcontainers/hypervisor_linux_arm64.go similarity index 76% rename from src/runtime/virtcontainers/hypervisor_arm64.go rename to src/runtime/virtcontainers/hypervisor_linux_arm64.go index cdf38165a..9c6a13ea9 100644 --- a/src/runtime/virtcontainers/hypervisor_arm64.go +++ b/src/runtime/virtcontainers/hypervisor_linux_arm64.go @@ -4,7 +4,7 @@ package virtcontainers -//Returns pefProtection if the firmware directory exists +// Guest protection is not supported on ARM64. func availableGuestProtection() (guestProtection, error) { return noneProtection, nil } diff --git a/src/runtime/virtcontainers/hypervisor_arm64_test.go b/src/runtime/virtcontainers/hypervisor_linux_arm64_test.go similarity index 100% rename from src/runtime/virtcontainers/hypervisor_arm64_test.go rename to src/runtime/virtcontainers/hypervisor_linux_arm64_test.go diff --git a/src/runtime/virtcontainers/hypervisor_test.go b/src/runtime/virtcontainers/hypervisor_test.go index f8bbbd330..86aefde69 100644 --- a/src/runtime/virtcontainers/hypervisor_test.go +++ b/src/runtime/virtcontainers/hypervisor_test.go @@ -185,7 +185,7 @@ func TestHypervisorConfigDefaults(t *testing.T) { MemorySize: defaultMemSzMiB, DefaultBridges: defaultBridges, BlockDeviceDriver: defaultBlockDriver, - DefaultMaxVCPUs: defaultMaxQemuVCPUs, + DefaultMaxVCPUs: defaultMaxVCPUs, Msize9p: defaultMsize9p, } diff --git a/src/runtime/virtcontainers/ipvlan_endpoint.go b/src/runtime/virtcontainers/ipvlan_endpoint.go index f9196cda3..a257e6ecb 100644 --- a/src/runtime/virtcontainers/ipvlan_endpoint.go +++ b/src/runtime/virtcontainers/ipvlan_endpoint.go @@ -1,5 +1,6 @@ +//go:build linux // +build linux -// + // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/ipvlan_endpoint_test.go b/src/runtime/virtcontainers/ipvlan_endpoint_test.go index 0f26119b6..7fe33d166 100644 --- a/src/runtime/virtcontainers/ipvlan_endpoint_test.go +++ b/src/runtime/virtcontainers/ipvlan_endpoint_test.go @@ -1,5 +1,6 @@ +//go:build linux // +build linux -// + // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index 208b192c0..8e089ac13 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -19,6 +19,7 @@ import ( "github.com/docker/go-units" "github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace" + resCtrl "github.com/kata-containers/kata-containers/src/runtime/pkg/resourcecontrol" "github.com/kata-containers/kata-containers/src/runtime/pkg/uuid" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/api" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/config" @@ -28,7 +29,6 @@ import ( kataclient "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols/client" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols/grpc" vcAnnotations "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations" - vccgroups "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/cgroups" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/rootless" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" vcTypes "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" @@ -64,16 +64,12 @@ const ( // path to vfio devices vfioPath = "/dev/vfio/" - sandboxMountsDir = "sandbox-mounts" - NydusRootFSType = "fuse.nydus-overlayfs" + // enable debug console kernelParamDebugConsole = "agent.debug_console" kernelParamDebugConsoleVPort = "agent.debug_console_vport" kernelParamDebugConsoleVPortValue = "1026" - - // Restricted permission for shared directory managed by virtiofs - sharedDirMode = os.FileMode(0700) | os.ModeDir ) var ( @@ -389,79 +385,6 @@ func (k *kataAgent) internalConfigure(ctx context.Context, h Hypervisor, id stri return nil } -func (k *kataAgent) setupSandboxBindMounts(ctx context.Context, sandbox *Sandbox) (err error) { - span, ctx := katatrace.Trace(ctx, k.Logger(), "setupSandboxBindMounts", kataAgentTracingTags) - defer span.End() - - if len(sandbox.config.SandboxBindMounts) == 0 { - return nil - } - - // Create subdirectory in host shared path for sandbox mounts - sandboxMountDir := filepath.Join(getMountPath(sandbox.id), sandboxMountsDir) - sandboxShareDir := filepath.Join(GetSharePath(sandbox.id), sandboxMountsDir) - if err := os.MkdirAll(sandboxMountDir, DirMode); err != nil { - return fmt.Errorf("Creating sandbox shared mount directory: %v: %w", sandboxMountDir, err) - } - var mountedList []string - defer func() { - if err != nil { - for _, mnt := range mountedList { - if derr := syscall.Unmount(mnt, syscall.MNT_DETACH|UmountNoFollow); derr != nil { - k.Logger().WithError(derr).Errorf("Cleanup: couldn't unmount %s", mnt) - } - } - if derr := os.RemoveAll(sandboxMountDir); derr != nil { - k.Logger().WithError(derr).Errorf("Cleanup: failed to remove %s", sandboxMountDir) - } - - } - }() - - for _, m := range sandbox.config.SandboxBindMounts { - mountDest := filepath.Join(sandboxMountDir, filepath.Base(m)) - // bind-mount each sandbox mount that's defined into the sandbox mounts dir - if err := bindMount(ctx, m, mountDest, true, "private"); err != nil { - return fmt.Errorf("Mounting sandbox directory: %v to %v: %w", m, mountDest, err) - } - mountedList = append(mountedList, mountDest) - - mountDest = filepath.Join(sandboxShareDir, filepath.Base(m)) - if err := remountRo(ctx, mountDest); err != nil { - return fmt.Errorf("remount sandbox directory: %v to %v: %w", m, mountDest, err) - } - - } - - return nil -} - -func (k *kataAgent) cleanupSandboxBindMounts(sandbox *Sandbox) error { - if sandbox.config == nil || len(sandbox.config.SandboxBindMounts) == 0 { - return nil - } - - var retErr error - bindmountShareDir := filepath.Join(getMountPath(sandbox.id), sandboxMountsDir) - for _, m := range sandbox.config.SandboxBindMounts { - mountPath := filepath.Join(bindmountShareDir, filepath.Base(m)) - if err := syscall.Unmount(mountPath, syscall.MNT_DETACH|UmountNoFollow); err != nil { - if retErr == nil { - retErr = err - } - k.Logger().WithError(err).Errorf("Failed to unmount sandbox bindmount: %v", mountPath) - } - } - if err := os.RemoveAll(bindmountShareDir); err != nil { - if retErr == nil { - retErr = err - } - k.Logger().WithError(err).Errorf("Failed to remove sandbox bindmount directory: %s", bindmountShareDir) - } - - return retErr -} - func (k *kataAgent) configure(ctx context.Context, h Hypervisor, id, sharePath string, config KataAgentConfig) error { span, ctx := katatrace.Trace(ctx, k.Logger(), "configure", kataAgentTracingTags) defer span.End() @@ -511,47 +434,10 @@ func (k *kataAgent) configureFromGrpc(ctx context.Context, h Hypervisor, id stri return k.internalConfigure(ctx, h, id, config) } -func (k *kataAgent) setupSharedPath(ctx context.Context, sandbox *Sandbox) (err error) { - span, ctx := katatrace.Trace(ctx, k.Logger(), "setupSharedPath", kataAgentTracingTags) - defer span.End() - - // create shared path structure - sharePath := GetSharePath(sandbox.id) - mountPath := getMountPath(sandbox.id) - if err := os.MkdirAll(sharePath, sharedDirMode); err != nil { - return err - } - if err := os.MkdirAll(mountPath, DirMode); err != nil { - return err - } - - // slave mount so that future mountpoints under mountPath are shown in sharePath as well - if err := bindMount(ctx, mountPath, sharePath, true, "slave"); err != nil { - return err - } - defer func() { - if err != nil { - if umountErr := syscall.Unmount(sharePath, syscall.MNT_DETACH|UmountNoFollow); umountErr != nil { - k.Logger().WithError(umountErr).Errorf("failed to unmount vm share path %s", sharePath) - } - } - }() - - // Setup sandbox bindmounts, if specified: - if err = k.setupSandboxBindMounts(ctx, sandbox); err != nil { - return err - } - - return nil -} - func (k *kataAgent) createSandbox(ctx context.Context, sandbox *Sandbox) error { span, ctx := katatrace.Trace(ctx, k.Logger(), "createSandbox", kataAgentTracingTags) defer span.End() - if err := k.setupSharedPath(ctx, sandbox); err != nil { - return err - } return k.configure(ctx, sandbox.hypervisor, sandbox.id, GetSharePath(sandbox.id), sandbox.config.AgentConfig) } @@ -1063,7 +949,7 @@ func (k *kataAgent) constrainGRPCSpec(grpcSpec *grpc.Spec, passSeccomp bool, str // - Initrd image doesn't have systemd. // - Nobody will be able to modify the resources of a specific container by using systemctl set-property. // - docker is not running in the VM. - if vccgroups.IsSystemdCgroup(grpcSpec.Linux.CgroupsPath) { + if resCtrl.IsSystemdCgroup(grpcSpec.Linux.CgroupsPath) { // Convert systemd cgroup to cgroupfs slice := strings.Split(grpcSpec.Linux.CgroupsPath, ":") // 0 - slice: system.slice @@ -1274,163 +1160,18 @@ func (k *kataAgent) rollbackFailingContainerCreation(ctx context.Context, c *Con k.Logger().WithError(err2).Error("rollback failed unmountHostMounts()") } - if c.rootFs.Type == NydusRootFSType { - if err2 := nydusContainerCleanup(ctx, getMountPath(c.sandbox.id), c); err2 != nil { - k.Logger().WithError(err2).Error("rollback failed nydusContainerCleanup") - } - } else { - if err2 := bindUnmountContainerRootfs(ctx, getMountPath(c.sandbox.id), c.id); err2 != nil { - k.Logger().WithError(err2).Error("rollback failed bindUnmountContainerRootfs()") - } + if err2 := c.sandbox.fsShare.UnshareRootFilesystem(ctx, c); err2 != nil { + k.Logger().WithError(err2).Error("rollback failed UnshareRootfs()") } } } -func getVirtiofsDaemonForNydus(sandbox *Sandbox) (VirtiofsDaemon, error) { - var virtiofsDaemon VirtiofsDaemon - switch sandbox.GetHypervisorType() { - case string(QemuHypervisor): - virtiofsDaemon = sandbox.hypervisor.(*qemu).virtiofsDaemon - case string(ClhHypervisor): - virtiofsDaemon = sandbox.hypervisor.(*cloudHypervisor).virtiofsDaemon - default: - return nil, errNydusdNotSupport - } - return virtiofsDaemon, nil -} - -func (k *kataAgent) buildContainerRootfsWithNydus(sandbox *Sandbox, c *Container, rootPathParent string) (*grpc.Storage, error) { - virtiofsDaemon, err := getVirtiofsDaemonForNydus(sandbox) - if err != nil { - return nil, err - } - extraOption, err := parseExtraOption(c.rootFs.Options) - if err != nil { - return nil, err - } - mountOpt := &MountOption{ - mountpoint: rafsMountPath(c.id), - source: extraOption.Source, - config: extraOption.Config, - } - k.Logger().Infof("nydus option: %v", extraOption) - // mount lowerdir to guest /run/kata-containers/shared/images//lowerdir - if err := virtiofsDaemon.Mount(*mountOpt); err != nil { - return nil, err - } - rootfs := &grpc.Storage{} - containerShareDir := filepath.Join(getMountPath(c.sandbox.id), c.id) - - // mkdir rootfs, guest at /run/kata-containers/shared/containers//rootfs - rootfsDir := filepath.Join(containerShareDir, c.rootfsSuffix) - if err := os.MkdirAll(rootfsDir, DirMode); err != nil { - return nil, err - } - // bindmount snapshot dir which snapshotter allocated - // to guest /run/kata-containers/shared/containers//snapshotdir - snapshotShareDir := filepath.Join(containerShareDir, snapshotDir) - if err := bindMount(k.ctx, extraOption.Snapshotdir, snapshotShareDir, true, "slave"); err != nil { - return nil, err - } - - // so rootfs = overlay(upperdir, workerdir, lowerdir) - rootfs.MountPoint = filepath.Join(rootPathParent, c.rootfsSuffix) - rootfs.Source = typeOverlayFS - rootfs.Fstype = typeOverlayFS - rootfs.Driver = kataOverlayDevType - rootfs.Options = append(rootfs.Options, fmt.Sprintf("%s=%s", upperDir, filepath.Join(kataGuestSharedDir(), c.id, snapshotDir, "fs"))) - rootfs.Options = append(rootfs.Options, fmt.Sprintf("%s=%s", workDir, filepath.Join(kataGuestSharedDir(), c.id, snapshotDir, "work"))) - rootfs.Options = append(rootfs.Options, fmt.Sprintf("%s=%s", lowerDir, filepath.Join(kataGuestNydusImageDir(), c.id, lowerDir))) - rootfs.Options = append(rootfs.Options, "index=off") - k.Logger().Infof("rootfs info: %#v\n", rootfs) - return rootfs, nil -} - -func (k *kataAgent) buildContainerRootfs(ctx context.Context, sandbox *Sandbox, c *Container, rootPathParent string) (*grpc.Storage, error) { - // In the confidential computing, there is no Image information on the host, - // so there is no Rootfs.Target. - if sandbox.config.ServiceOffload && c.rootFs.Target == "" { - return nil, nil - } - if c.rootFs.Type == NydusRootFSType { - return k.buildContainerRootfsWithNydus(sandbox, c, rootPathParent) - } - if c.state.Fstype != "" && c.state.BlockDeviceID != "" { - // The rootfs storage volume represents the container rootfs - // mount point inside the guest. - // It can be a block based device (when using block based container - // overlay on the host) mount or a 9pfs one (for all other overlay - // implementations). - rootfs := &grpc.Storage{} - - // This is a block based device rootfs. - device := sandbox.devManager.GetDeviceByID(c.state.BlockDeviceID) - if device == nil { - k.Logger().WithField("device", c.state.BlockDeviceID).Error("failed to find device by id") - return nil, fmt.Errorf("failed to find device by id %q", c.state.BlockDeviceID) - } - - blockDrive, ok := device.GetDeviceInfo().(*config.BlockDrive) - if !ok || blockDrive == nil { - k.Logger().Error("malformed block drive") - return nil, fmt.Errorf("malformed block drive") - } - switch { - case sandbox.config.HypervisorConfig.BlockDeviceDriver == config.VirtioMmio: - rootfs.Driver = kataMmioBlkDevType - rootfs.Source = blockDrive.VirtPath - case sandbox.config.HypervisorConfig.BlockDeviceDriver == config.VirtioBlockCCW: - rootfs.Driver = kataBlkCCWDevType - rootfs.Source = blockDrive.DevNo - case sandbox.config.HypervisorConfig.BlockDeviceDriver == config.VirtioBlock: - rootfs.Driver = kataBlkDevType - rootfs.Source = blockDrive.PCIPath.String() - case sandbox.config.HypervisorConfig.BlockDeviceDriver == config.VirtioSCSI: - rootfs.Driver = kataSCSIDevType - rootfs.Source = blockDrive.SCSIAddr - default: - return nil, fmt.Errorf("Unknown block device driver: %s", sandbox.config.HypervisorConfig.BlockDeviceDriver) - } - - rootfs.MountPoint = rootPathParent - rootfs.Fstype = c.state.Fstype - - if c.state.Fstype == "xfs" { - rootfs.Options = []string{"nouuid"} - } - - // Ensure container mount destination exists - // TODO: remove dependency on shared fs path. shared fs is just one kind of storage source. - // we should not always use shared fs path for all kinds of storage. Instead, all storage - // should be bind mounted to a tmpfs path for containers to use. - if err := os.MkdirAll(filepath.Join(getMountPath(c.sandbox.id), c.id, c.rootfsSuffix), DirMode); err != nil { - return nil, err - } - return rootfs, nil - } - - // This is not a block based device rootfs. We are going to bind mount it into the shared drive - // between the host and the guest. - // With virtiofs/9pfs we don't need to ask the agent to mount the rootfs as the shared directory - // (kataGuestSharedDir) is already mounted in the guest. We only need to mount the rootfs from - // the host and it will show up in the guest. - if err := bindMountContainerRootfs(ctx, getMountPath(sandbox.id), c.id, c.rootFs.Target, false); err != nil { - return nil, err - } - - return nil, nil -} - func (k *kataAgent) createContainer(ctx context.Context, sandbox *Sandbox, c *Container) (p *Process, err error) { span, ctx := katatrace.Trace(ctx, k.Logger(), "createContainer", kataAgentTracingTags) defer span.End() var ctrStorages []*grpc.Storage var ctrDevices []*grpc.Device - var rootfs *grpc.Storage - - // This is the guest absolute root path for that container. - rootPathParent := filepath.Join(kataGuestSharedDir(), c.id) - rootPath := filepath.Join(rootPathParent, c.rootfsSuffix) + var sharedRootfs *SharedFile // In case the container creation fails, the following defer statement // takes care of rolling back actions previously performed. @@ -1441,19 +1182,19 @@ func (k *kataAgent) createContainer(ctx context.Context, sandbox *Sandbox, c *Co } }() - // setup rootfs -- if its block based, we'll receive a non-nil storage object representing + // Share the container rootfs -- if its block based, we'll receive a non-nil storage object representing // the block device for the rootfs, which us utilized for mounting in the guest. This'll be handled // already for non-block based rootfs - if rootfs, err = k.buildContainerRootfs(ctx, sandbox, c, rootPathParent); err != nil { + if sharedRootfs, err = sandbox.fsShare.ShareRootFilesystem(ctx, c); err != nil { return nil, err } - if rootfs != nil { + if sharedRootfs.storage != nil { // Add rootfs to the list of container storage. // We only need to do this for block based rootfs, as we // want the agent to mount it into the right location // (kataGuestSharedDir/ctrID/ - ctrStorages = append(ctrStorages, rootfs) + ctrStorages = append(ctrStorages, sharedRootfs.storage) } ociSpec := c.GetPatchedOCISpec() @@ -1529,7 +1270,7 @@ func (k *kataAgent) createContainer(ctx context.Context, sandbox *Sandbox, c *Co } // We need to give the OCI spec our absolute rootfs path in the guest. - grpcSpec.Root.Path = rootPath + grpcSpec.Root.Path = sharedRootfs.guestPath sharedPidNs := k.handlePidNamespace(grpcSpec, sandbox) @@ -2348,7 +2089,7 @@ func (k *kataAgent) copyFile(ctx context.Context, src, dst string) error { cpReq := &grpc.CopyFileRequest{ Path: dst, DirMode: uint32(DirMode), - FileMode: st.Mode, + FileMode: uint32(st.Mode), FileSize: fileSize, Uid: int32(st.Uid), Gid: int32(st.Gid), @@ -2398,26 +2139,7 @@ func (k *kataAgent) markDead(ctx context.Context) { k.disconnect(ctx) } -func (k *kataAgent) cleanup(ctx context.Context, s *Sandbox) { - if err := k.cleanupSandboxBindMounts(s); err != nil { - k.Logger().WithError(err).Errorf("failed to Cleanup sandbox bindmounts") - } - - // Unmount shared path - path := GetSharePath(s.id) - k.Logger().WithField("path", path).Infof("Cleanup agent") - if err := syscall.Unmount(path, syscall.MNT_DETACH|UmountNoFollow); err != nil { - k.Logger().WithError(err).Errorf("failed to unmount vm share path %s", path) - } - - // Unmount mount path - path = getMountPath(s.id) - if err := bindUnmountAllRootfs(ctx, path, s); err != nil { - k.Logger().WithError(err).Errorf("failed to unmount vm mount path %s", path) - } - if err := os.RemoveAll(getSandboxPath(s.id)); err != nil { - k.Logger().WithError(err).Errorf("failed to Cleanup vm path %s", getSandboxPath(s.id)) - } +func (k *kataAgent) cleanup(ctx context.Context) { } func (k *kataAgent) save() persistapi.AgentState { diff --git a/src/runtime/virtcontainers/kata_agent_test.go b/src/runtime/virtcontainers/kata_agent_test.go index 6475e8b06..bca567c3f 100644 --- a/src/runtime/virtcontainers/kata_agent_test.go +++ b/src/runtime/virtcontainers/kata_agent_test.go @@ -14,6 +14,7 @@ import ( "path" "path/filepath" "reflect" + "runtime" "strings" "syscall" "testing" @@ -21,6 +22,7 @@ import ( specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/stretchr/testify/assert" + "code.cloudfoundry.org/bytefmt" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/api" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/config" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/drivers" @@ -35,6 +37,8 @@ import ( vcTypes "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" ) +const sysHugepagesDir = "/sys/kernel/mm/hugepages" + var ( testBlkDriveFormat = "testBlkDriveFormat" testBlockDeviceCtrPath = "testBlockDeviceCtrPath" @@ -50,6 +54,7 @@ func TestKataAgentConnect(t *testing.T) { url, err := mock.GenerateKataMockHybridVSock() assert.NoError(err) + defer mock.RemoveKataMockHybridVSock(url) hybridVSockTTRPCMock := mock.HybridVSockTTRPCMock{} err = hybridVSockTTRPCMock.Start(url) @@ -73,6 +78,7 @@ func TestKataAgentDisconnect(t *testing.T) { url, err := mock.GenerateKataMockHybridVSock() assert.NoError(err) + defer mock.RemoveKataMockHybridVSock(url) hybridVSockTTRPCMock := mock.HybridVSockTTRPCMock{} err = hybridVSockTTRPCMock.Start(url) @@ -110,6 +116,7 @@ func TestKataAgentSendReq(t *testing.T) { url, err := mock.GenerateKataMockHybridVSock() assert.NoError(err) + defer mock.RemoveKataMockHybridVSock(url) hybridVSockTTRPCMock := mock.HybridVSockTTRPCMock{} err = hybridVSockTTRPCMock.Start(url) @@ -822,6 +829,10 @@ func TestAgentCreateContainer(t *testing.T) { hypervisor: &mockHypervisor{}, } + fsShare, err := NewFilesystemShare(sandbox) + assert.Nil(err) + sandbox.fsShare = fsShare + store, err := persist.GetDriver() assert.NoError(err) assert.NotNil(store) @@ -843,6 +854,7 @@ func TestAgentCreateContainer(t *testing.T) { url, err := mock.GenerateKataMockHybridVSock() assert.NoError(err) + defer mock.RemoveKataMockHybridVSock(url) hybridVSockTTRPCMock := mock.HybridVSockTTRPCMock{} err = hybridVSockTTRPCMock.Start(url) @@ -873,6 +885,7 @@ func TestAgentNetworkOperation(t *testing.T) { url, err := mock.GenerateKataMockHybridVSock() assert.NoError(err) + defer mock.RemoveKataMockHybridVSock(url) hybridVSockTTRPCMock := mock.HybridVSockTTRPCMock{} err = hybridVSockTTRPCMock.Start(url) @@ -921,6 +934,7 @@ func TestKataCopyFile(t *testing.T) { url, err := mock.GenerateKataMockHybridVSock() assert.NoError(err) + defer mock.RemoveKataMockHybridVSock(url) hybridVSockTTRPCMock := mock.HybridVSockTTRPCMock{} err = hybridVSockTTRPCMock.Start(url) @@ -983,7 +997,7 @@ func TestKataCleanupSandbox(t *testing.T) { assert.Nil(err) k := &kataAgent{ctx: context.Background()} - k.cleanup(context.Background(), &s) + k.cleanup(context.Background()) _, err = os.Stat(dir) assert.False(os.IsExist(err)) @@ -1119,119 +1133,6 @@ func TestKataAgentDirs(t *testing.T) { assert.Equal(rafsMountPath(cid), expected) } -func TestSandboxBindMount(t *testing.T) { - if os.Getuid() != 0 { - t.Skip("Test disabled as requires root user") - } - - assert := assert.New(t) - // create temporary files to mount: - testMountPath, err := os.MkdirTemp("", "sandbox-test") - assert.NoError(err) - defer os.RemoveAll(testMountPath) - - // create a new shared directory for our test: - kataHostSharedDirSaved := kataHostSharedDir - testHostDir, err := os.MkdirTemp("", "kata-Cleanup") - assert.NoError(err) - kataHostSharedDir = func() string { - return testHostDir - } - defer func() { - kataHostSharedDir = kataHostSharedDirSaved - }() - - m1Path := filepath.Join(testMountPath, "foo.txt") - f1, err := os.Create(m1Path) - assert.NoError(err) - defer f1.Close() - - m2Path := filepath.Join(testMountPath, "bar.txt") - f2, err := os.Create(m2Path) - assert.NoError(err) - defer f2.Close() - - // create sandbox for mounting into - sandbox := &Sandbox{ - ctx: context.Background(), - id: "foobar", - config: &SandboxConfig{ - SandboxBindMounts: []string{m1Path, m2Path}, - }, - } - k := &kataAgent{ctx: context.Background()} - - // make the shared directory for our test: - dir := kataHostSharedDir() - err = os.MkdirAll(path.Join(dir, sandbox.id), 0777) - assert.Nil(err) - defer os.RemoveAll(dir) - - sharePath := GetSharePath(sandbox.id) - mountPath := getMountPath(sandbox.id) - - err = os.MkdirAll(sharePath, DirMode) - assert.Nil(err) - err = os.MkdirAll(mountPath, DirMode) - assert.Nil(err) - - // setup the expeted slave mount: - err = bindMount(sandbox.ctx, mountPath, sharePath, true, "slave") - assert.Nil(err) - defer syscall.Unmount(sharePath, syscall.MNT_DETACH|UmountNoFollow) - - // Test the function. We expect it to succeed and for the mount to exist - err = k.setupSandboxBindMounts(context.Background(), sandbox) - assert.NoError(err) - - // Test the Cleanup function. We expect it to succeed for the mount to be removed. - err = k.cleanupSandboxBindMounts(sandbox) - assert.NoError(err) - - // After successful Cleanup, verify there are not any mounts left behind. - stat := syscall.Stat_t{} - mount1CheckPath := filepath.Join(getMountPath(sandbox.id), sandboxMountsDir, filepath.Base(m1Path)) - err = syscall.Stat(mount1CheckPath, &stat) - assert.Error(err) - assert.True(os.IsNotExist(err)) - - mount2CheckPath := filepath.Join(getMountPath(sandbox.id), sandboxMountsDir, filepath.Base(m2Path)) - err = syscall.Stat(mount2CheckPath, &stat) - assert.Error(err) - assert.True(os.IsNotExist(err)) - - // Now, let's setup the Cleanup to fail. Setup the sandbox bind mount twice, which will result in - // extra mounts being present that the sandbox description doesn't account for (ie, duplicate mounts). - // We expect Cleanup to fail on the first time, since it cannot remove the sandbox-bindmount directory because - // there are leftover mounts. If we run it a second time, however, it should succeed since it'll remove the - // second set of mounts: - err = k.setupSandboxBindMounts(context.Background(), sandbox) - assert.NoError(err) - err = k.setupSandboxBindMounts(context.Background(), sandbox) - assert.NoError(err) - // Test the Cleanup function. We expect it to succeed for the mount to be removed. - err = k.cleanupSandboxBindMounts(sandbox) - assert.Error(err) - err = k.cleanupSandboxBindMounts(sandbox) - assert.NoError(err) - - // - // Now, let's setup the sandbox bindmount to fail, and verify that no mounts are left behind - // - sandbox.config.SandboxBindMounts = append(sandbox.config.SandboxBindMounts, "oh-nos") - err = k.setupSandboxBindMounts(context.Background(), sandbox) - assert.Error(err) - // Verify there aren't any mounts left behind - stat = syscall.Stat_t{} - err = syscall.Stat(mount1CheckPath, &stat) - assert.Error(err) - assert.True(os.IsNotExist(err)) - err = syscall.Stat(mount2CheckPath, &stat) - assert.Error(err) - assert.True(os.IsNotExist(err)) - -} - func TestHandleHugepages(t *testing.T) { if os.Getuid() != 0 { t.Skip("Test disabled as requires root user") @@ -1244,18 +1145,36 @@ func TestHandleHugepages(t *testing.T) { defer os.RemoveAll(dir) k := kataAgent{} + var formattedSizes []string var mounts []specs.Mount var hugepageLimits []specs.LinuxHugepageLimit - hugepageDirs := [2]string{"hugepages-1Gi", "hugepages-2Mi"} - options := [2]string{"pagesize=1024M", "pagesize=2M"} + // On s390x, hugepage sizes must be set at boot and cannot be created ad hoc. Use any that + // are present (default is 1M, can only be changed on LPAR). See + // https://www.ibm.com/docs/en/linuxonibm/pdf/lku5dd05.pdf, p. 345 for more information. + if runtime.GOARCH == "s390x" { + dirs, err := ioutil.ReadDir(sysHugepagesDir) + assert.Nil(err) + for _, dir := range dirs { + formattedSizes = append(formattedSizes, strings.TrimPrefix(dir.Name(), "hugepages-")) + } + } else { + formattedSizes = []string{"1G", "2M"} + } - for i := 0; i < 2; i++ { - target := path.Join(dir, hugepageDirs[i]) - err := os.MkdirAll(target, 0777) + for _, formattedSize := range formattedSizes { + bytes, err := bytefmt.ToBytes(formattedSize) + assert.Nil(err) + hugepageLimits = append(hugepageLimits, specs.LinuxHugepageLimit{ + Pagesize: formattedSize, + Limit: 1_000_000 * bytes, + }) + + target := path.Join(dir, fmt.Sprintf("hugepages-%s", formattedSize)) + err = os.MkdirAll(target, 0777) assert.NoError(err, "Unable to create dir %s", target) - err = syscall.Mount("nodev", target, "hugetlbfs", uintptr(0), options[i]) + err = syscall.Mount("nodev", target, "hugetlbfs", uintptr(0), fmt.Sprintf("pagesize=%s", formattedSize)) assert.NoError(err, "Unable to mount %s", target) defer syscall.Unmount(target, 0) @@ -1267,21 +1186,10 @@ func TestHandleHugepages(t *testing.T) { mounts = append(mounts, mount) } - hugepageLimits = []specs.LinuxHugepageLimit{ - { - Pagesize: "1GB", - Limit: 1073741824, - }, - { - Pagesize: "2MB", - Limit: 134217728, - }, - } - hugepages, err := k.handleHugepages(mounts, hugepageLimits) assert.NoError(err, "Unable to handle hugepages %v", hugepageLimits) assert.NotNil(hugepages) - assert.Equal(len(hugepages), 2) + assert.Equal(len(hugepages), len(formattedSizes)) } diff --git a/src/runtime/virtcontainers/macvlan_endpoint.go b/src/runtime/virtcontainers/macvlan_endpoint.go index 228ec3351..061a36f15 100644 --- a/src/runtime/virtcontainers/macvlan_endpoint.go +++ b/src/runtime/virtcontainers/macvlan_endpoint.go @@ -1,5 +1,6 @@ +//go:build linux // +build linux -// + // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/macvlan_endpoint_test.go b/src/runtime/virtcontainers/macvlan_endpoint_test.go index 2f421b6b7..896be917e 100644 --- a/src/runtime/virtcontainers/macvlan_endpoint_test.go +++ b/src/runtime/virtcontainers/macvlan_endpoint_test.go @@ -1,5 +1,6 @@ +//go:build linux // +build linux -// + // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/macvtap_endpoint.go b/src/runtime/virtcontainers/macvtap_endpoint.go index e64076341..1a2452cf1 100644 --- a/src/runtime/virtcontainers/macvtap_endpoint.go +++ b/src/runtime/virtcontainers/macvtap_endpoint.go @@ -1,5 +1,6 @@ +//go:build linux // +build linux -// + // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/macvtap_endpoint_test.go b/src/runtime/virtcontainers/macvtap_endpoint_test.go index 0f9283f79..6b7e00943 100644 --- a/src/runtime/virtcontainers/macvtap_endpoint_test.go +++ b/src/runtime/virtcontainers/macvtap_endpoint_test.go @@ -1,5 +1,6 @@ +//go:build linux // +build linux -// + // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/mock_agent.go b/src/runtime/virtcontainers/mock_agent.go index 019d9f981..0025bb268 100644 --- a/src/runtime/virtcontainers/mock_agent.go +++ b/src/runtime/virtcontainers/mock_agent.go @@ -225,7 +225,7 @@ func (n *mockAgent) addSwap(ctx context.Context, PCIPath vcTypes.PciPath) error func (n *mockAgent) markDead(ctx context.Context) { } -func (n *mockAgent) cleanup(ctx context.Context, s *Sandbox) { +func (n *mockAgent) cleanup(ctx context.Context) { } // save is the Noop agent state saver. It does nothing. diff --git a/src/runtime/virtcontainers/mount.go b/src/runtime/virtcontainers/mount.go index f782c5d09..c2879f213 100644 --- a/src/runtime/virtcontainers/mount.go +++ b/src/runtime/virtcontainers/mount.go @@ -144,8 +144,8 @@ func getDeviceForPath(path string) (device, error) { if isHostDevice(path) { // stat.Rdev describes the device that this file (inode) represents. - devMajor = major(stat.Rdev) - devMinor = minor(stat.Rdev) + devMajor = major(uint64(stat.Rdev)) + devMinor = minor(uint64(stat.Rdev)) return device{ major: devMajor, @@ -154,8 +154,8 @@ func getDeviceForPath(path string) (device, error) { }, nil } // stat.Dev points to the underlying device containing the file - devMajor = major(stat.Dev) - devMinor = minor(stat.Dev) + devMajor = major(uint64(stat.Dev)) + devMinor = minor(uint64(stat.Dev)) path, err = filepath.Abs(path) if err != nil { @@ -388,6 +388,19 @@ func bindUnmountContainerSnapshotDir(ctx context.Context, sharedDir, cID string) return bindUnmountContainerShareDir(ctx, sharedDir, cID, snapshotDir) } +func getVirtiofsDaemonForNydus(sandbox *Sandbox) (VirtiofsDaemon, error) { + var virtiofsDaemon VirtiofsDaemon + switch sandbox.GetHypervisorType() { + case string(QemuHypervisor): + virtiofsDaemon = sandbox.hypervisor.(*qemu).virtiofsDaemon + case string(ClhHypervisor): + virtiofsDaemon = sandbox.hypervisor.(*cloudHypervisor).virtiofsDaemon + default: + return nil, errNydusdNotSupport + } + return virtiofsDaemon, nil +} + func nydusContainerCleanup(ctx context.Context, sharedDir string, c *Container) error { sandbox := c.sandbox virtiofsDaemon, err := getVirtiofsDaemonForNydus(sandbox) diff --git a/src/runtime/virtcontainers/nydusd.go b/src/runtime/virtcontainers/nydusd.go index c9315ee37..7931e4556 100644 --- a/src/runtime/virtcontainers/nydusd.go +++ b/src/runtime/virtcontainers/nydusd.go @@ -104,12 +104,13 @@ func (nd *nydusd) Start(ctx context.Context, onQuit onQuitFunc) (int, error) { return pid, err } cmd := exec.Command(nd.path, args...) - r, w, err := os.Pipe() + stdout, err := cmd.StdoutPipe() if err != nil { return pid, err } - cmd.Stdout = w - cmd.Stderr = w + + cmd.Stderr = cmd.Stdout + fields := logrus.Fields{ "path": nd.path, "args": strings.Join(args, " "), @@ -120,7 +121,7 @@ func (nd *nydusd) Start(ctx context.Context, onQuit onQuitFunc) (int, error) { } // Monitor nydusd's stdout/stderr and stop sandbox if nydusd quits go func() { - scanner := bufio.NewScanner(r) + scanner := bufio.NewScanner(stdout) for scanner.Scan() { nd.Logger().Info(scanner.Text()) } diff --git a/src/runtime/virtcontainers/physical_endpoint.go b/src/runtime/virtcontainers/physical_endpoint.go index 1ab193006..71f67da8f 100644 --- a/src/runtime/virtcontainers/physical_endpoint.go +++ b/src/runtime/virtcontainers/physical_endpoint.go @@ -1,5 +1,6 @@ +//go:build linux // +build linux -// + // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 @@ -14,10 +15,10 @@ import ( "path/filepath" "strings" + resCtrl "github.com/kata-containers/kata-containers/src/runtime/pkg/resourcecontrol" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/config" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/drivers" persistapi "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/persist/api" - "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/cgroups" vcTypes "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" "github.com/safchain/ethtool" ) @@ -89,7 +90,7 @@ func (endpoint *PhysicalEndpoint) Attach(ctx context.Context, s *Sandbox) error return err } - c, err := cgroups.DeviceToCgroupDeviceRule(vfioPath) + c, err := resCtrl.DeviceToCgroupDeviceRule(vfioPath) if err != nil { return err } diff --git a/src/runtime/virtcontainers/physical_endpoint_test.go b/src/runtime/virtcontainers/physical_endpoint_test.go index 7e823f6f7..cac41358f 100644 --- a/src/runtime/virtcontainers/physical_endpoint_test.go +++ b/src/runtime/virtcontainers/physical_endpoint_test.go @@ -1,5 +1,6 @@ +//go:build linux // +build linux -// + // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/api/openapi.yaml b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/api/openapi.yaml index b820e01cc..4a168e53e 100644 --- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/api/openapi.yaml +++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/api/openapi.yaml @@ -181,6 +181,8 @@ paths: schema: $ref: '#/components/schemas/PciDeviceInfo' description: The new device was successfully added to the VM instance. + "204": + description: The new device was successfully (cold) added to the VM instance. "404": description: The new device could not be added to the VM instance. summary: Add a new device to the VM @@ -215,6 +217,8 @@ paths: schema: $ref: '#/components/schemas/PciDeviceInfo' description: The new disk was successfully added to the VM instance. + "204": + description: The new disk was successfully (cold) added to the VM instance. "500": description: The new disk could not be added to the VM instance. summary: Add a new disk to the VM @@ -234,6 +238,8 @@ paths: schema: $ref: '#/components/schemas/PciDeviceInfo' description: The new device was successfully added to the VM instance. + "204": + description: The new device was successfully (cold) added to the VM instance. "500": description: The new device could not be added to the VM instance. summary: Add a new virtio-fs device to the VM @@ -253,6 +259,8 @@ paths: schema: $ref: '#/components/schemas/PciDeviceInfo' description: The new device was successfully added to the VM instance. + "204": + description: The new device was successfully (cold) added to the VM instance. "500": description: The new device could not be added to the VM instance. summary: Add a new pmem device to the VM @@ -272,6 +280,8 @@ paths: schema: $ref: '#/components/schemas/PciDeviceInfo' description: The new device was successfully added to the VM instance. + "204": + description: The new device was successfully (cold) added to the VM instance. "500": description: The new device could not be added to the VM instance. summary: Add a new network device to the VM @@ -291,6 +301,8 @@ paths: schema: $ref: '#/components/schemas/PciDeviceInfo' description: The new device was successfully added to the VM instance. + "204": + description: The new device was successfully (cold) added to the VM instance. "500": description: The new device could not be added to the VM instance. summary: Add a new vsock device to the VM @@ -632,7 +644,7 @@ components: children: - children - children - pci_bdf: 3 + pci_bdf: pci_bdf resources: - '{}' - '{}' @@ -663,7 +675,7 @@ components: children: - children - children - pci_bdf: 3 + pci_bdf: pci_bdf resources: - '{}' - '{}' @@ -680,8 +692,7 @@ components: type: string type: array pci_bdf: - format: int32 - type: integer + type: string type: object VmCounters: additionalProperties: @@ -1757,6 +1768,8 @@ components: properties: receiver_url: type: string + required: + - receiver_url type: object SendMigrationData: example: @@ -1767,4 +1780,6 @@ components: type: string local: type: boolean + required: + - destination_url type: object diff --git a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/DefaultApi.md b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/DefaultApi.md index 3c2c8821d..c5ca050b6 100644 --- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/DefaultApi.md +++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/DefaultApi.md @@ -1071,7 +1071,7 @@ import ( ) func main() { - receiveMigrationData := *openapiclient.NewReceiveMigrationData() // ReceiveMigrationData | The URL for the reception of migration state + receiveMigrationData := *openapiclient.NewReceiveMigrationData("ReceiverUrl_example") // ReceiveMigrationData | The URL for the reception of migration state configuration := openapiclient.NewConfiguration() api_client := openapiclient.NewAPIClient(configuration) @@ -1381,7 +1381,7 @@ import ( ) func main() { - sendMigrationData := *openapiclient.NewSendMigrationData() // SendMigrationData | The URL for sending the migration state + sendMigrationData := *openapiclient.NewSendMigrationData("DestinationUrl_example") // SendMigrationData | The URL for sending the migration state configuration := openapiclient.NewConfiguration() api_client := openapiclient.NewAPIClient(configuration) diff --git a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/DeviceNode.md b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/DeviceNode.md index 575547055..02863e513 100644 --- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/DeviceNode.md +++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/DeviceNode.md @@ -7,7 +7,7 @@ Name | Type | Description | Notes **Id** | Pointer to **string** | | [optional] **Resources** | Pointer to **[]map[string]interface{}** | | [optional] **Children** | Pointer to **[]string** | | [optional] -**PciBdf** | Pointer to **int32** | | [optional] +**PciBdf** | Pointer to **string** | | [optional] ## Methods @@ -105,20 +105,20 @@ HasChildren returns a boolean if a field has been set. ### GetPciBdf -`func (o *DeviceNode) GetPciBdf() int32` +`func (o *DeviceNode) GetPciBdf() string` GetPciBdf returns the PciBdf field if non-nil, zero value otherwise. ### GetPciBdfOk -`func (o *DeviceNode) GetPciBdfOk() (*int32, bool)` +`func (o *DeviceNode) GetPciBdfOk() (*string, bool)` GetPciBdfOk returns a tuple with the PciBdf field if it's non-nil, zero value otherwise and a boolean to check if the value has been set. ### SetPciBdf -`func (o *DeviceNode) SetPciBdf(v int32)` +`func (o *DeviceNode) SetPciBdf(v string)` SetPciBdf sets PciBdf field to given value. diff --git a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/ReceiveMigrationData.md b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/ReceiveMigrationData.md index 652bf96bc..394d47ceb 100644 --- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/ReceiveMigrationData.md +++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/ReceiveMigrationData.md @@ -4,13 +4,13 @@ Name | Type | Description | Notes ------------ | ------------- | ------------- | ------------- -**ReceiverUrl** | Pointer to **string** | | [optional] +**ReceiverUrl** | **string** | | ## Methods ### NewReceiveMigrationData -`func NewReceiveMigrationData() *ReceiveMigrationData` +`func NewReceiveMigrationData(receiverUrl string, ) *ReceiveMigrationData` NewReceiveMigrationData instantiates a new ReceiveMigrationData object This constructor will assign default values to properties that have it defined, @@ -44,11 +44,6 @@ and a boolean to check if the value has been set. SetReceiverUrl sets ReceiverUrl field to given value. -### HasReceiverUrl - -`func (o *ReceiveMigrationData) HasReceiverUrl() bool` - -HasReceiverUrl returns a boolean if a field has been set. [[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) diff --git a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/SendMigrationData.md b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/SendMigrationData.md index 03edd489e..d3b6a5c1b 100644 --- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/SendMigrationData.md +++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/docs/SendMigrationData.md @@ -4,14 +4,14 @@ Name | Type | Description | Notes ------------ | ------------- | ------------- | ------------- -**DestinationUrl** | Pointer to **string** | | [optional] +**DestinationUrl** | **string** | | **Local** | Pointer to **bool** | | [optional] ## Methods ### NewSendMigrationData -`func NewSendMigrationData() *SendMigrationData` +`func NewSendMigrationData(destinationUrl string, ) *SendMigrationData` NewSendMigrationData instantiates a new SendMigrationData object This constructor will assign default values to properties that have it defined, @@ -45,11 +45,6 @@ and a boolean to check if the value has been set. SetDestinationUrl sets DestinationUrl field to given value. -### HasDestinationUrl - -`func (o *SendMigrationData) HasDestinationUrl() bool` - -HasDestinationUrl returns a boolean if a field has been set. ### GetLocal diff --git a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_device_node.go b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_device_node.go index ef6aab589..012f97196 100644 --- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_device_node.go +++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_device_node.go @@ -19,7 +19,7 @@ type DeviceNode struct { Id *string `json:"id,omitempty"` Resources *[]map[string]interface{} `json:"resources,omitempty"` Children *[]string `json:"children,omitempty"` - PciBdf *int32 `json:"pci_bdf,omitempty"` + PciBdf *string `json:"pci_bdf,omitempty"` } // NewDeviceNode instantiates a new DeviceNode object @@ -136,9 +136,9 @@ func (o *DeviceNode) SetChildren(v []string) { } // GetPciBdf returns the PciBdf field value if set, zero value otherwise. -func (o *DeviceNode) GetPciBdf() int32 { +func (o *DeviceNode) GetPciBdf() string { if o == nil || o.PciBdf == nil { - var ret int32 + var ret string return ret } return *o.PciBdf @@ -146,7 +146,7 @@ func (o *DeviceNode) GetPciBdf() int32 { // GetPciBdfOk returns a tuple with the PciBdf field value if set, nil otherwise // and a boolean to check if the value has been set. -func (o *DeviceNode) GetPciBdfOk() (*int32, bool) { +func (o *DeviceNode) GetPciBdfOk() (*string, bool) { if o == nil || o.PciBdf == nil { return nil, false } @@ -162,8 +162,8 @@ func (o *DeviceNode) HasPciBdf() bool { return false } -// SetPciBdf gets a reference to the given int32 and assigns it to the PciBdf field. -func (o *DeviceNode) SetPciBdf(v int32) { +// SetPciBdf gets a reference to the given string and assigns it to the PciBdf field. +func (o *DeviceNode) SetPciBdf(v string) { o.PciBdf = &v } diff --git a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_receive_migration_data.go b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_receive_migration_data.go index fd7e6eff7..7549055d5 100644 --- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_receive_migration_data.go +++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_receive_migration_data.go @@ -16,15 +16,16 @@ import ( // ReceiveMigrationData struct for ReceiveMigrationData type ReceiveMigrationData struct { - ReceiverUrl *string `json:"receiver_url,omitempty"` + ReceiverUrl string `json:"receiver_url"` } // NewReceiveMigrationData instantiates a new ReceiveMigrationData object // This constructor will assign default values to properties that have it defined, // and makes sure properties required by API are set, but the set of arguments // will change when the set of required properties is changed -func NewReceiveMigrationData() *ReceiveMigrationData { +func NewReceiveMigrationData(receiverUrl string) *ReceiveMigrationData { this := ReceiveMigrationData{} + this.ReceiverUrl = receiverUrl return &this } @@ -36,41 +37,33 @@ func NewReceiveMigrationDataWithDefaults() *ReceiveMigrationData { return &this } -// GetReceiverUrl returns the ReceiverUrl field value if set, zero value otherwise. +// GetReceiverUrl returns the ReceiverUrl field value func (o *ReceiveMigrationData) GetReceiverUrl() string { - if o == nil || o.ReceiverUrl == nil { + if o == nil { var ret string return ret } - return *o.ReceiverUrl + + return o.ReceiverUrl } -// GetReceiverUrlOk returns a tuple with the ReceiverUrl field value if set, nil otherwise +// GetReceiverUrlOk returns a tuple with the ReceiverUrl field value // and a boolean to check if the value has been set. func (o *ReceiveMigrationData) GetReceiverUrlOk() (*string, bool) { - if o == nil || o.ReceiverUrl == nil { + if o == nil { return nil, false } - return o.ReceiverUrl, true + return &o.ReceiverUrl, true } -// HasReceiverUrl returns a boolean if a field has been set. -func (o *ReceiveMigrationData) HasReceiverUrl() bool { - if o != nil && o.ReceiverUrl != nil { - return true - } - - return false -} - -// SetReceiverUrl gets a reference to the given string and assigns it to the ReceiverUrl field. +// SetReceiverUrl sets field value func (o *ReceiveMigrationData) SetReceiverUrl(v string) { - o.ReceiverUrl = &v + o.ReceiverUrl = v } func (o ReceiveMigrationData) MarshalJSON() ([]byte, error) { toSerialize := map[string]interface{}{} - if o.ReceiverUrl != nil { + if true { toSerialize["receiver_url"] = o.ReceiverUrl } return json.Marshal(toSerialize) diff --git a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_send_migration_data.go b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_send_migration_data.go index 11f913bf0..6ceeedb7e 100644 --- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_send_migration_data.go +++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/client/model_send_migration_data.go @@ -16,16 +16,17 @@ import ( // SendMigrationData struct for SendMigrationData type SendMigrationData struct { - DestinationUrl *string `json:"destination_url,omitempty"` - Local *bool `json:"local,omitempty"` + DestinationUrl string `json:"destination_url"` + Local *bool `json:"local,omitempty"` } // NewSendMigrationData instantiates a new SendMigrationData object // This constructor will assign default values to properties that have it defined, // and makes sure properties required by API are set, but the set of arguments // will change when the set of required properties is changed -func NewSendMigrationData() *SendMigrationData { +func NewSendMigrationData(destinationUrl string) *SendMigrationData { this := SendMigrationData{} + this.DestinationUrl = destinationUrl return &this } @@ -37,36 +38,28 @@ func NewSendMigrationDataWithDefaults() *SendMigrationData { return &this } -// GetDestinationUrl returns the DestinationUrl field value if set, zero value otherwise. +// GetDestinationUrl returns the DestinationUrl field value func (o *SendMigrationData) GetDestinationUrl() string { - if o == nil || o.DestinationUrl == nil { + if o == nil { var ret string return ret } - return *o.DestinationUrl + + return o.DestinationUrl } -// GetDestinationUrlOk returns a tuple with the DestinationUrl field value if set, nil otherwise +// GetDestinationUrlOk returns a tuple with the DestinationUrl field value // and a boolean to check if the value has been set. func (o *SendMigrationData) GetDestinationUrlOk() (*string, bool) { - if o == nil || o.DestinationUrl == nil { + if o == nil { return nil, false } - return o.DestinationUrl, true + return &o.DestinationUrl, true } -// HasDestinationUrl returns a boolean if a field has been set. -func (o *SendMigrationData) HasDestinationUrl() bool { - if o != nil && o.DestinationUrl != nil { - return true - } - - return false -} - -// SetDestinationUrl gets a reference to the given string and assigns it to the DestinationUrl field. +// SetDestinationUrl sets field value func (o *SendMigrationData) SetDestinationUrl(v string) { - o.DestinationUrl = &v + o.DestinationUrl = v } // GetLocal returns the Local field value if set, zero value otherwise. @@ -103,7 +96,7 @@ func (o *SendMigrationData) SetLocal(v bool) { func (o SendMigrationData) MarshalJSON() ([]byte, error) { toSerialize := map[string]interface{}{} - if o.DestinationUrl != nil { + if true { toSerialize["destination_url"] = o.DestinationUrl } if o.Local != nil { diff --git a/src/runtime/virtcontainers/pkg/cloud-hypervisor/cloud-hypervisor.yaml b/src/runtime/virtcontainers/pkg/cloud-hypervisor/cloud-hypervisor.yaml index c4dcae04c..c9ecd399e 100644 --- a/src/runtime/virtcontainers/pkg/cloud-hypervisor/cloud-hypervisor.yaml +++ b/src/runtime/virtcontainers/pkg/cloud-hypervisor/cloud-hypervisor.yaml @@ -195,6 +195,8 @@ paths: application/json: schema: $ref: '#/components/schemas/PciDeviceInfo' + 204: + description: The new device was successfully (cold) added to the VM instance. 404: description: The new device could not be added to the VM instance. @@ -231,6 +233,8 @@ paths: application/json: schema: $ref: '#/components/schemas/PciDeviceInfo' + 204: + description: The new disk was successfully (cold) added to the VM instance. 500: description: The new disk could not be added to the VM instance. @@ -251,6 +255,8 @@ paths: application/json: schema: $ref: '#/components/schemas/PciDeviceInfo' + 204: + description: The new device was successfully (cold) added to the VM instance. 500: description: The new device could not be added to the VM instance. @@ -271,6 +277,8 @@ paths: application/json: schema: $ref: '#/components/schemas/PciDeviceInfo' + 204: + description: The new device was successfully (cold) added to the VM instance. 500: description: The new device could not be added to the VM instance. @@ -291,6 +299,8 @@ paths: application/json: schema: $ref: '#/components/schemas/PciDeviceInfo' + 204: + description: The new device was successfully (cold) added to the VM instance. 500: description: The new device could not be added to the VM instance. @@ -311,6 +321,8 @@ paths: application/json: schema: $ref: '#/components/schemas/PciDeviceInfo' + 204: + description: The new device was successfully (cold) added to the VM instance. 500: description: The new device could not be added to the VM instance. @@ -428,8 +440,7 @@ components: items: type: string pci_bdf: - type: integer - format: int32 + type: string VmCounters: type: object @@ -1055,12 +1066,16 @@ components: type: boolean ReceiveMigrationData: + required: + - receiver_url type: object properties: receiver_url: type: string SendMigrationData: + required: + - destination_url type: object properties: destination_url: diff --git a/src/runtime/virtcontainers/pkg/mock/mock.go b/src/runtime/virtcontainers/pkg/mock/mock.go index 537aef9f5..264c74d7b 100644 --- a/src/runtime/virtcontainers/pkg/mock/mock.go +++ b/src/runtime/virtcontainers/pkg/mock/mock.go @@ -16,9 +16,13 @@ import ( gpb "github.com/gogo/protobuf/types" aTypes "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols" pb "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols/grpc" + "path" + "strings" ) -var testKataMockHybridVSockURLTempl = "mock://%s/kata-mock-hybrid-vsock.sock" +const VSockPrefix = "mock://" + +var testKataMockHybridVSockURLTempl = VSockPrefix + "%s/kata-mock-hybrid-vsock.sock" func GenerateKataMockHybridVSock() (string, error) { dir, err := os.MkdirTemp("", "kata-mock-hybrid-vsock-test") @@ -29,6 +33,15 @@ func GenerateKataMockHybridVSock() (string, error) { return fmt.Sprintf(testKataMockHybridVSockURLTempl, dir), nil } +func RemoveKataMockHybridVSock(sockAddress string) error { + if !strings.HasPrefix(sockAddress, VSockPrefix) { + return fmt.Errorf("Invalid socket address: %s", sockAddress) + } + + sockPath := strings.TrimPrefix(sockAddress, VSockPrefix) + return os.RemoveAll(path.Dir(sockPath)) +} + // HybridVSockTTRPCMock is the ttrpc-based mock hybrid-vsock backend implementation type HybridVSockTTRPCMock struct { // HybridVSockTTRPCMockImp is the structure implementing diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 055281cb7..0985380d8 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux + // Copyright (c) 2016 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 @@ -128,8 +131,6 @@ const ( qemuStopSandboxTimeoutSecs = 15 ) -var noGuestMemHotplugErr error = errors.New("guest memory hotplug not supported") - // agnostic list of kernel parameters var defaultKernelParameters = []Param{ {"panic", "1"}, @@ -661,13 +662,16 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi qemuConfig.IOThreads = []govmmQemu.IOThread{*ioThread} } // Add RNG device to hypervisor - rngDev := config.RNGDev{ - ID: rngID, - Filename: q.config.EntropySource, - } - qemuConfig.Devices, err = q.arch.appendRNGDevice(ctx, qemuConfig.Devices, rngDev) - if err != nil { - return err + // Skip for s390x as CPACF is used + if machine.Type != QemuCCWVirtio { + rngDev := config.RNGDev{ + ID: rngID, + Filename: q.config.EntropySource, + } + qemuConfig.Devices, err = q.arch.appendRNGDevice(ctx, qemuConfig.Devices, rngDev) + if err != nil { + return err + } } // Add PCIe Root Port devices to hypervisor @@ -841,11 +845,13 @@ func (q *qemu) StartVM(ctx context.Context, timeout int) error { // virtiofsd are executed by kata-runtime after this call, run with // the SELinux label. If these processes require privileged, we do // notwant to run them under confinement. - if err := label.SetProcessLabel(q.config.SELinuxProcessLabel); err != nil { - return err - } - defer label.SetProcessLabel("") + if !q.config.DisableSeLinux { + if err := label.SetProcessLabel(q.config.SELinuxProcessLabel); err != nil { + return err + } + defer label.SetProcessLabel("") + } if q.config.SharedFS == config.VirtioFS || q.config.SharedFS == config.VirtioFSNydus { err = q.setupVirtiofsDaemon(ctx) if err != nil { diff --git a/src/runtime/virtcontainers/qemu_amd64.go b/src/runtime/virtcontainers/qemu_amd64.go index 757065961..8e76bf55c 100644 --- a/src/runtime/virtcontainers/qemu_amd64.go +++ b/src/runtime/virtcontainers/qemu_amd64.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux + // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 @@ -36,12 +39,6 @@ const ( defaultQemuMachineOptions = "accel=kvm,kernel_irqchip=on" qmpMigrationWaitTimeout = 5 * time.Second - - tdxSysFirmwareDir = "/sys/firmware/tdx_seam/" - - tdxCPUFlag = "tdx" - - sevKvmParameterPath = "/sys/module/kvm_amd/parameters/sev" ) var qemuPaths = map[string]string{ @@ -81,11 +78,6 @@ var supportedQemuMachines = []govmmQemu.Machine{ }, } -// MaxQemuVCPUs returns the maximum number of vCPUs supported -func MaxQemuVCPUs() uint32 { - return uint32(240) -} - func newQemuArch(config HypervisorConfig) (qemuArch, error) { machineType := config.HypervisorMachineType if machineType == "" { @@ -140,6 +132,11 @@ func newQemuArch(config HypervisorConfig) (qemuArch, error) { if err := q.enableProtection(); err != nil { return nil, err } + + if !q.qemuArchBase.disableNvdimm { + hvLogger.WithField("subsystem", "qemuAmd64").Warn("Nvdimm is not supported with confidential guest, disabling it.") + q.qemuArchBase.disableNvdimm = true + } } if config.SGXEPCSize != 0 { @@ -161,8 +158,9 @@ func newQemuArch(config HypervisorConfig) (qemuArch, error) { func (q *qemuAmd64) capabilities() types.Capabilities { var caps types.Capabilities - if q.qemuMachine.Type == QemuQ35 || - q.qemuMachine.Type == QemuVirt { + if (q.qemuMachine.Type == QemuQ35 || + q.qemuMachine.Type == QemuVirt) && + q.protection == noneProtection { caps.SetBlockDeviceHotplugSupport() } @@ -196,7 +194,11 @@ func (q *qemuAmd64) memoryTopology(memoryMb, hostMemoryMb uint64, slots uint8) g // Is Memory Hotplug supported by this architecture/machine type combination? func (q *qemuAmd64) supportGuestMemoryHotplug() bool { // true for all amd64 machine types except for microvm. - return q.qemuMachine.Type != govmmQemu.MachineTypeMicrovm + if q.qemuMachine.Type == govmmQemu.MachineTypeMicrovm { + return false + } + + return q.protection == noneProtection } func (q *qemuAmd64) appendImage(ctx context.Context, devices []govmmQemu.Device, path string) ([]govmmQemu.Device, error) { diff --git a/src/runtime/virtcontainers/qemu_amd64_test.go b/src/runtime/virtcontainers/qemu_amd64_test.go index a992b1497..e8cf9fcd5 100644 --- a/src/runtime/virtcontainers/qemu_amd64_test.go +++ b/src/runtime/virtcontainers/qemu_amd64_test.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux + // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/qemu_arch_base.go b/src/runtime/virtcontainers/qemu_arch_base.go index e800199eb..c5c5e8057 100644 --- a/src/runtime/virtcontainers/qemu_arch_base.go +++ b/src/runtime/virtcontainers/qemu_arch_base.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux + // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 @@ -150,57 +153,6 @@ type qemuArch interface { appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string) ([]govmmQemu.Device, string, error) } -// Kind of guest protection -type guestProtection uint8 - -const ( - noneProtection guestProtection = iota - - //Intel Trust Domain Extensions - //https://software.intel.com/content/www/us/en/develop/articles/intel-trust-domain-extensions.html - // Exclude from lint checking for it won't be used on arm64 code - tdxProtection - - // AMD Secure Encrypted Virtualization - // https://developer.amd.com/sev/ - // Exclude from lint checking for it won't be used on arm64 code - sevProtection - - // IBM POWER 9 Protected Execution Facility - // https://www.kernel.org/doc/html/latest/powerpc/ultravisor.html - // Exclude from lint checking for it won't be used on arm64 code - pefProtection - - // IBM Secure Execution (IBM Z & LinuxONE) - // https://www.kernel.org/doc/html/latest/virt/kvm/s390-pv.html - // Exclude from lint checking for it won't be used on arm64 code - seProtection -) - -var guestProtectionStr = [...]string{ - noneProtection: "none", - pefProtection: "pef", - seProtection: "se", - sevProtection: "sev", - tdxProtection: "tdx", -} - -func (gp guestProtection) String() string { - return guestProtectionStr[gp] -} - -func genericAvailableGuestProtections() (protections []string) { - return -} - -func AvailableGuestProtections() (protections []string) { - gp, err := availableGuestProtection() - if err != nil || gp == noneProtection { - return genericAvailableGuestProtections() - } - return []string{gp.String()} -} - type qemuArchBase struct { qemuExePath string qemuMachine govmmQemu.Machine @@ -226,7 +178,6 @@ const ( defaultBridgeBus = "pcie.0" defaultPCBridgeBus = "pci.0" maxDevIDSize = 31 - defaultMsize9p = 8192 pcieRootPortPrefix = "rp" ) @@ -326,7 +277,9 @@ func (q *qemuArchBase) kernelParameters(debug bool) []Param { func (q *qemuArchBase) capabilities() types.Capabilities { var caps types.Capabilities - caps.SetBlockDeviceHotplugSupport() + if q.protection == noneProtection { + caps.SetBlockDeviceHotplugSupport() + } caps.SetMultiQueueSupport() caps.SetFsSharingSupport() return caps @@ -739,7 +692,7 @@ func (q *qemuArchBase) handleImagePath(config HypervisorConfig) { } func (q *qemuArchBase) supportGuestMemoryHotplug() bool { - return true + return q.protection == noneProtection } func (q *qemuArchBase) setIgnoreSharedMemoryMigrationCaps(ctx context.Context, qmp *govmmQemu.QMP) error { diff --git a/src/runtime/virtcontainers/qemu_arch_base_test.go b/src/runtime/virtcontainers/qemu_arch_base_test.go index b4b808685..ff20ba447 100644 --- a/src/runtime/virtcontainers/qemu_arch_base_test.go +++ b/src/runtime/virtcontainers/qemu_arch_base_test.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux + // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 @@ -173,13 +176,13 @@ func TestQemuArchBaseCPUTopology(t *testing.T) { expectedSMP := govmmQemu.SMP{ CPUs: vcpus, - Sockets: defaultMaxQemuVCPUs, + Sockets: defaultMaxVCPUs, Cores: defaultCores, Threads: defaultThreads, - MaxCPUs: defaultMaxQemuVCPUs, + MaxCPUs: defaultMaxVCPUs, } - smp := qemuArchBase.cpuTopology(vcpus, defaultMaxQemuVCPUs) + smp := qemuArchBase.cpuTopology(vcpus, defaultMaxVCPUs) assert.Equal(expectedSMP, smp) } diff --git a/src/runtime/virtcontainers/qemu_arm64.go b/src/runtime/virtcontainers/qemu_arm64.go index ee07b3241..f5af19e21 100644 --- a/src/runtime/virtcontainers/qemu_arm64.go +++ b/src/runtime/virtcontainers/qemu_arm64.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux + // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 @@ -29,8 +32,6 @@ const qmpMigrationWaitTimeout = 10 * time.Second const defaultQemuMachineOptions = "usb=off,accel=kvm,gic-version=host" -var defaultGICVersion = uint32(3) - var kernelParams = []Param{ {"console", "hvc0"}, {"console", "hvc1"}, @@ -42,22 +43,6 @@ var supportedQemuMachine = govmmQemu.Machine{ Options: defaultQemuMachineOptions, } -//In qemu, maximum number of vCPUs depends on the GIC version, or on how -//many redistributors we can fit into the memory map. -//related codes are under github.com/qemu/qemu/hw/arm/virt.c(Line 135 and 1306 in stable-2.11) -//for now, qemu only supports v2 and v3, we treat v4 as v3 based on -//backward compatibility. -var gicList = map[uint32]uint32{ - uint32(2): uint32(8), - uint32(3): uint32(123), - uint32(4): uint32(123), -} - -// MaxQemuVCPUs returns the maximum number of vCPUs supported -func MaxQemuVCPUs() uint32 { - return gicList[defaultGICVersion] -} - func newQemuArch(config HypervisorConfig) (qemuArch, error) { machineType := config.HypervisorMachineType if machineType == "" { diff --git a/src/runtime/virtcontainers/qemu_arm64_test.go b/src/runtime/virtcontainers/qemu_arm64_test.go index 2766ae44a..d78f4474b 100644 --- a/src/runtime/virtcontainers/qemu_arm64_test.go +++ b/src/runtime/virtcontainers/qemu_arm64_test.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux + // Copyright (c) 2018 IBM // // SPDX-License-Identifier: Apache-2.0 @@ -11,6 +14,7 @@ import ( "os" "testing" + "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm" govmmQemu "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm/qemu" "github.com/stretchr/testify/assert" ) @@ -54,10 +58,10 @@ func TestQemuArm64MemoryTopology(t *testing.T) { assert.Equal(expectedMemory, m) } -func TestMaxQemuVCPUs(t *testing.T) { +func TestMaxVCPUs(t *testing.T) { assert := assert.New(t) - vCPUs := MaxQemuVCPUs() + vCPUs := govmm.MaxVCPUs() assert.Equal(uint32(123), vCPUs) } diff --git a/src/runtime/virtcontainers/qemu_ppc64le.go b/src/runtime/virtcontainers/qemu_ppc64le.go index eef94bde9..e78fcb701 100644 --- a/src/runtime/virtcontainers/qemu_ppc64le.go +++ b/src/runtime/virtcontainers/qemu_ppc64le.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux + // Copyright (c) 2018 IBM // // SPDX-License-Identifier: Apache-2.0 @@ -54,11 +57,6 @@ func (q *qemuPPC64le) Logger() *logrus.Entry { return hvLogger.WithField("subsystem", "qemuPPC64le") } -// MaxQemuVCPUs returns the maximum number of vCPUs supported -func MaxQemuVCPUs() uint32 { - return uint32(128) -} - func newQemuArch(config HypervisorConfig) (qemuArch, error) { machineType := config.HypervisorMachineType if machineType == "" { @@ -85,6 +83,11 @@ func newQemuArch(config HypervisorConfig) (qemuArch, error) { if err := q.enableProtection(); err != nil { return nil, err } + + if !q.qemuArchBase.disableNvdimm { + hvLogger.WithField("subsystem", "qemuPPC64le").Warn("Nvdimm is not supported with confidential guest, disabling it.") + q.qemuArchBase.disableNvdimm = true + } } q.handleImagePath(config) @@ -98,7 +101,8 @@ func (q *qemuPPC64le) capabilities() types.Capabilities { var caps types.Capabilities // pseries machine type supports hotplugging drives - if q.qemuMachine.Type == QemuPseries { + if q.qemuMachine.Type == QemuPseries && + q.protection == noneProtection { caps.SetBlockDeviceHotplugSupport() } diff --git a/src/runtime/virtcontainers/qemu_ppc64le_test.go b/src/runtime/virtcontainers/qemu_ppc64le_test.go index 978e1b0ec..15e75a91b 100644 --- a/src/runtime/virtcontainers/qemu_ppc64le_test.go +++ b/src/runtime/virtcontainers/qemu_ppc64le_test.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux + // Copyright (c) 2018 IBM // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/qemu_s390x.go b/src/runtime/virtcontainers/qemu_s390x.go index fa15c759f..32716e776 100644 --- a/src/runtime/virtcontainers/qemu_s390x.go +++ b/src/runtime/virtcontainers/qemu_s390x.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux + // Copyright (c) 2018 IBM // // SPDX-License-Identifier: Apache-2.0 @@ -47,14 +50,6 @@ var supportedQemuMachine = govmmQemu.Machine{ Options: defaultQemuMachineOptions, } -// MaxQemuVCPUs returns the maximum number of vCPUs supported -func MaxQemuVCPUs() uint32 { - // Max number of virtual Cpu defined in qemu. See - // https://github.com/qemu/qemu/blob/80422b00196a7af4c6efb628fae0ad8b644e98af/target/s390x/cpu.h#L55 - // #define S390_MAX_CPUS 248 - return uint32(248) -} - func newQemuArch(config HypervisorConfig) (qemuArch, error) { machineType := config.HypervisorMachineType if machineType == "" { @@ -82,6 +77,11 @@ func newQemuArch(config HypervisorConfig) (qemuArch, error) { if err := q.enableProtection(); err != nil { return nil, err } + + if !q.qemuArchBase.disableNvdimm { + hvLogger.WithField("subsystem", "qemuS390x").Warn("Nvdimm is not supported with confidential guest, disabling it.") + q.qemuArchBase.disableNvdimm = true + } } if config.ImagePath != "" { diff --git a/src/runtime/virtcontainers/qemu_s390x_test.go b/src/runtime/virtcontainers/qemu_s390x_test.go index f207a0c91..3d0c393a6 100644 --- a/src/runtime/virtcontainers/qemu_s390x_test.go +++ b/src/runtime/virtcontainers/qemu_s390x_test.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux + // Copyright (c) 2018 IBM // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/qemu_test.go b/src/runtime/virtcontainers/qemu_test.go index 5feecaabe..9069bb212 100644 --- a/src/runtime/virtcontainers/qemu_test.go +++ b/src/runtime/virtcontainers/qemu_test.go @@ -1,3 +1,6 @@ +//go:build linux +// +build linux + // Copyright (c) 2016 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 @@ -12,6 +15,7 @@ import ( "path/filepath" "testing" + "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm" govmmQemu "github.com/kata-containers/kata-containers/src/runtime/pkg/govmm/qemu" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/config" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/persist" @@ -30,7 +34,7 @@ func newQemuConfig() HypervisorConfig { MemorySize: defaultMemSzMiB, DefaultBridges: defaultBridges, BlockDeviceDriver: defaultBlockDriver, - DefaultMaxVCPUs: defaultMaxQemuVCPUs, + DefaultMaxVCPUs: defaultMaxVCPUs, Msize9p: defaultMsize9p, } } @@ -54,7 +58,7 @@ func testQemuKernelParameters(t *testing.T, kernelParams []Param, expected strin } func TestQemuKernelParameters(t *testing.T) { - expectedOut := fmt.Sprintf("panic=1 nr_cpus=%d foo=foo bar=bar", MaxQemuVCPUs()) + expectedOut := fmt.Sprintf("panic=1 nr_cpus=%d foo=foo bar=bar", govmm.MaxVCPUs()) params := []Param{ { Key: "foo", diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index 0aa9b1d9a..6852a1d42 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -26,6 +26,7 @@ import ( "github.com/vishvananda/netlink" "github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace" + resCtrl "github.com/kata-containers/kata-containers/src/runtime/pkg/resourcecontrol" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/api" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/config" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/drivers" @@ -37,7 +38,6 @@ import ( pbTypes "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols/grpc" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations" - "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/cgroups" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/compatoci" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/cpuset" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/rootless" @@ -65,13 +65,18 @@ const ( rwm = "rwm" // When the Kata overhead threads (I/O, VMM, etc) are not - // placed in the sandbox cgroup, they are moved to a specific, - // unconstrained cgroup hierarchy. - // Assuming the cgroup mount point is at /sys/fs/cgroup/, on a - // cgroup v1 system, the Kata overhead memory cgroup will be at + // placed in the sandbox resource controller (A cgroup on Linux), + // they are moved to a specific, unconstrained resource controller. + // On Linux, assuming the cgroup mount point is at /sys/fs/cgroup/, + // on a cgroup v1 system, the Kata overhead memory cgroup will be at // /sys/fs/cgroup/memory/kata_overhead/$CGPATH where $CGPATH is // defined by the orchestrator. - cgroupKataOverheadPath = "/kata_overhead/" + resCtrlKataOverheadID = "/kata_overhead/" + + sandboxMountsDir = "sandbox-mounts" + + // Restricted permission for shared directory managed by virtiofs + sharedDirMode = os.FileMode(0700) | os.ModeDir ) var ( @@ -195,6 +200,7 @@ type Sandbox struct { hypervisor Hypervisor agent agent store persistapi.PersistDriver + fsShare FilesystemSharer swapDevices []*config.BlockDrive volumes []types.Volume @@ -203,10 +209,11 @@ type Sandbox struct { config *SandboxConfig annotationsLock *sync.RWMutex wg *sync.WaitGroup - sandboxCgroup *cgroups.Cgroup - overheadCgroup *cgroups.Cgroup cw *consoleWatcher + sandboxController resCtrl.ResourceController + overheadController resCtrl.ResourceController + containers map[string]*Container id string @@ -493,7 +500,13 @@ func createSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Fac return s, nil } - // Below code path is called only during create, because of earlier Check. + // The code below only gets called when initially creating a sandbox, not when restoring or + // re-creating it. The above check for the sandbox state enforces that. + + if err := s.fsShare.Prepare(ctx); err != nil { + return nil, err + } + if err := s.agent.createSandbox(ctx, s); err != nil { return nil, err } @@ -547,6 +560,12 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor swapDevices: []*config.BlockDrive{}, } + fsShare, err := NewFilesystemShare(s) + if err != nil { + return nil, err + } + s.fsShare = fsShare + if s.store, err = persist.GetDriver(); err != nil || s.store == nil { return nil, fmt.Errorf("failed to get fs persist driver: %v", err) } @@ -569,8 +588,8 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor sandboxConfig.HypervisorConfig.EnableVhostUserStore, sandboxConfig.HypervisorConfig.VhostUserStorePath, nil) - // Create the sandbox cgroups - if err := s.createCgroups(); err != nil { + // Create the sandbox resource controllers. + if err := s.createResourceController(); err != nil { return nil, err } @@ -591,7 +610,7 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor return s, nil } -func (s *Sandbox) createCgroups() error { +func (s *Sandbox) createResourceController() error { var err error cgroupPath := "" @@ -600,20 +619,19 @@ func (s *Sandbox) createCgroups() error { resources := specs.LinuxResources{} if s.config == nil { - return fmt.Errorf("Could not create cgroup manager: empty sandbox configuration") + return fmt.Errorf("Could not create %s resource controller manager: empty sandbox configuration", s.sandboxController) } spec := s.GetPatchedOCISpec() if spec != nil && spec.Linux != nil { cgroupPath = spec.Linux.CgroupsPath - // Kata relies on the cgroup parent created and configured by the container - // engine by default. The exception is for devices whitelist as well as sandbox-level - // CPUSet. - // For the sandbox cgroups we create and manage, rename the base of the cgroup path to + // Kata relies on the resource controller (cgroups on Linux) parent created and configured by the + // container engine by default. The exception is for devices whitelist as well as sandbox-level CPUSet. + // For the sandbox controllers we create and manage, rename the base of the controller ID to // include "kata_" - if !cgroups.IsSystemdCgroup(cgroupPath) { // don't add prefix when cgroups are managed by systemd - cgroupPath, err = cgroups.RenameCgroupPath(cgroupPath) + if !resCtrl.IsSystemdCgroup(cgroupPath) { // don't add prefix when cgroups are managed by systemd + cgroupPath, err = resCtrl.RenameCgroupPath(cgroupPath) if err != nil { return err } @@ -663,7 +681,7 @@ func (s *Sandbox) createCgroups() error { if s.devManager != nil { for _, d := range s.devManager.GetAllDevices() { - dev, err := cgroups.DeviceToLinuxDevice(d.GetHostPath()) + dev, err := resCtrl.DeviceToLinuxDevice(d.GetHostPath()) if err != nil { s.Logger().WithError(err).WithField("device", d.GetHostPath()).Warn("Could not add device to sandbox resources") continue @@ -672,34 +690,34 @@ func (s *Sandbox) createCgroups() error { } } - // Create the sandbox cgroup. + // Create the sandbox resource controller (cgroups on Linux). // Depending on the SandboxCgroupOnly value, this cgroup // will either hold all the pod threads (SandboxCgroupOnly is true) // or only the virtual CPU ones (SandboxCgroupOnly is false). - s.sandboxCgroup, err = cgroups.NewSandboxCgroup(cgroupPath, &resources, s.config.SandboxCgroupOnly) + s.sandboxController, err = resCtrl.NewSandboxResourceController(cgroupPath, &resources, s.config.SandboxCgroupOnly) if err != nil { - return fmt.Errorf("Could not create the sandbox cgroup %v", err) + return fmt.Errorf("Could not create the sandbox resource controller %v", err) } - // Now that the sandbox cgroup is created, we can set the state cgroup root paths. - s.state.SandboxCgroupPath = s.sandboxCgroup.Path() + // Now that the sandbox resource controller is created, we can set the state controller paths.. + s.state.SandboxCgroupPath = s.sandboxController.ID() s.state.OverheadCgroupPath = "" if s.config.SandboxCgroupOnly { - s.overheadCgroup = nil + s.overheadController = nil } else { // The shim configuration is requesting that we do not put all threads - // into the sandbox cgroup. - // We're creating an overhead cgroup, with no constraints. Everything but + // into the sandbox resource controller. + // We're creating an overhead controller, with no constraints. Everything but // the vCPU threads will eventually make it there. - overheadCgroup, err := cgroups.NewCgroup(fmt.Sprintf("/%s/%s", cgroupKataOverheadPath, s.id), &specs.LinuxResources{}) + overheadController, err := resCtrl.NewResourceController(fmt.Sprintf("/%s/%s", resCtrlKataOverheadID, s.id), &specs.LinuxResources{}) // TODO: support systemd cgroups overhead cgroup // https://github.com/kata-containers/kata-containers/issues/2963 if err != nil { return err } - s.overheadCgroup = overheadCgroup - s.state.OverheadCgroupPath = s.overheadCgroup.Path() + s.overheadController = overheadController + s.state.OverheadCgroupPath = s.overheadController.ID() } return nil @@ -782,8 +800,8 @@ func (s *Sandbox) Delete(ctx context.Context) error { } if !rootless.IsRootless() { - if err := s.cgroupsDelete(); err != nil { - s.Logger().WithError(err).Error("failed to Cleanup cgroups") + if err := s.resourceControllerDelete(); err != nil { + s.Logger().WithError(err).Errorf("failed to cleanup the %s resource controllers", s.sandboxController) } } @@ -795,7 +813,9 @@ func (s *Sandbox) Delete(ctx context.Context) error { s.Logger().WithError(err).Error("failed to Cleanup hypervisor") } - s.agent.cleanup(ctx, s) + if err := s.fsShare.Cleanup(ctx); err != nil { + s.Logger().WithError(err).Error("failed to cleanup share files") + } return s.store.Destroy(s.id) } @@ -1322,7 +1342,7 @@ func (s *Sandbox) CreateContainer(ctx context.Context, contConfig ContainerConfi return nil, err } - if err = s.cgroupsUpdate(ctx); err != nil { + if err = s.resourceControllerUpdate(ctx); err != nil { return nil, err } @@ -1425,8 +1445,8 @@ func (s *Sandbox) DeleteContainer(ctx context.Context, containerID string) (VCCo } } - // update the sandbox cgroup - if err = s.cgroupsUpdate(ctx); err != nil { + // update the sandbox resource controller + if err = s.resourceControllerUpdate(ctx); err != nil { return nil, err } @@ -1491,7 +1511,7 @@ func (s *Sandbox) UpdateContainer(ctx context.Context, containerID string, resou return err } - if err := s.cgroupsUpdate(ctx); err != nil { + if err := s.resourceControllerUpdate(ctx); err != nil { return err } @@ -1519,7 +1539,7 @@ func (s *Sandbox) StatsContainer(ctx context.Context, containerID string) (Conta // Stats returns the stats of a running sandbox func (s *Sandbox) Stats(ctx context.Context) (SandboxStats, error) { - metrics, err := s.sandboxCgroup.Stat() + metrics, err := s.sandboxController.Stat() if err != nil { return SandboxStats{}, err } @@ -1603,7 +1623,7 @@ func (s *Sandbox) createContainers(ctx context.Context) error { return err } - if err := s.cgroupsUpdate(ctx); err != nil { + if err := s.resourceControllerUpdate(ctx); err != nil { return err } if err := s.storeSandbox(ctx); err != nil { @@ -1756,9 +1776,11 @@ func (s *Sandbox) HotplugAddDevice(ctx context.Context, device api.Device, devTy span, ctx := katatrace.Trace(ctx, s.Logger(), "HotplugAddDevice", sandboxTracingTags, map[string]string{"sandbox_id": s.id}) defer span.End() - if err := s.sandboxCgroup.AddDevice(device.GetHostPath()); err != nil { - s.Logger().WithError(err).WithField("device", device). - Warn("Could not add device to cgroup") + if s.sandboxController != nil { + if err := s.sandboxController.AddDevice(device.GetHostPath()); err != nil { + s.Logger().WithError(err).WithField("device", device). + Warnf("Could not add device to the %s controller", s.sandboxController) + } } switch devType { @@ -1806,9 +1828,11 @@ func (s *Sandbox) HotplugAddDevice(ctx context.Context, device api.Device, devTy // Sandbox implement DeviceReceiver interface from device/api/interface.go func (s *Sandbox) HotplugRemoveDevice(ctx context.Context, device api.Device, devType config.DeviceType) error { defer func() { - if err := s.sandboxCgroup.RemoveDevice(device.GetHostPath()); err != nil { - s.Logger().WithError(err).WithField("device", device). - Warn("Could not add device to cgroup") + if s.sandboxController != nil { + if err := s.sandboxController.RemoveDevice(device.GetHostPath()); err != nil { + s.Logger().WithError(err).WithField("device", device). + Warnf("Could not add device to the %s controller", s.sandboxController) + } } }() @@ -2095,25 +2119,26 @@ func (s *Sandbox) GetHypervisorType() string { return string(s.config.HypervisorType) } -// cgroupsUpdate updates the sandbox cpuset cgroup subsystem. -// Also, if the sandbox has an overhead cgroup, it updates the hypervisor +// resourceControllerUpdate updates the sandbox cpuset resource controller +// (Linux cgroup) subsystem. +// Also, if the sandbox has an overhead controller, it updates the hypervisor // constraints by moving the potentially new vCPU threads back to the sandbox -// cgroup. -func (s *Sandbox) cgroupsUpdate(ctx context.Context) error { +// controller. +func (s *Sandbox) resourceControllerUpdate(ctx context.Context) error { cpuset, memset, err := s.getSandboxCPUSet() if err != nil { return err } - // We update the sandbox cgroup with potentially new virtual CPUs. - if err := s.sandboxCgroup.UpdateCpuSet(cpuset, memset); err != nil { + // We update the sandbox controller with potentially new virtual CPUs. + if err := s.sandboxController.UpdateCpuSet(cpuset, memset); err != nil { return err } - if s.overheadCgroup != nil { - // If we have an overhead cgroup, new vCPU threads would start there, + if s.overheadController != nil { + // If we have an overhead controller, new vCPU threads would start there, // as being children of the VMM PID. - // We need to constrain them by moving them into the sandbox cgroup. + // We need to constrain them by moving them into the sandbox controller. if err := s.constrainHypervisor(ctx); err != nil { return err } @@ -2122,39 +2147,41 @@ func (s *Sandbox) cgroupsUpdate(ctx context.Context) error { return nil } -// cgroupsDelete will move the running processes in the sandbox cgroup -// to the parent and then delete the sandbox cgroup -func (s *Sandbox) cgroupsDelete() error { - s.Logger().Debug("Deleting sandbox cgroup") +// resourceControllerDelete will move the running processes in the sandbox resource +// cvontroller to the parent and then delete the sandbox controller. +func (s *Sandbox) resourceControllerDelete() error { + s.Logger().Debugf("Deleting sandbox %s resource controler", s.sandboxController) if s.state.SandboxCgroupPath == "" { - s.Logger().Warnf("sandbox cgroup path is empty") + s.Logger().Warnf("sandbox %s resource controler path is empty", s.sandboxController) return nil } - sandboxCgroup, err := cgroups.Load(s.state.SandboxCgroupPath) + sandboxController, err := resCtrl.LoadResourceController(s.state.SandboxCgroupPath) if err != nil { return err } - if err := sandboxCgroup.MoveToParent(); err != nil { + resCtrlParent := sandboxController.Parent() + if err := sandboxController.MoveTo(resCtrlParent); err != nil { return err } - if err := sandboxCgroup.Delete(); err != nil { + if err := sandboxController.Delete(); err != nil { return err } if s.state.OverheadCgroupPath != "" { - overheadCgroup, err := cgroups.Load(s.state.OverheadCgroupPath) + overheadController, err := resCtrl.LoadResourceController(s.state.OverheadCgroupPath) if err != nil { return err } - if err := s.overheadCgroup.MoveToParent(); err != nil { + resCtrlParent := overheadController.Parent() + if err := s.overheadController.MoveTo(resCtrlParent); err != nil { return err } - if err := overheadCgroup.Delete(); err != nil { + if err := overheadController.Delete(); err != nil { return err } } @@ -2162,16 +2189,16 @@ func (s *Sandbox) cgroupsDelete() error { return nil } -// constrainHypervisor will place the VMM and vCPU threads into cgroups. +// constrainHypervisor will place the VMM and vCPU threads into resource controllers (cgroups on Linux). func (s *Sandbox) constrainHypervisor(ctx context.Context) error { tids, err := s.hypervisor.GetThreadIDs(ctx) if err != nil { return fmt.Errorf("failed to get thread ids from hypervisor: %v", err) } - // All vCPU threads move to the sandbox cgroup. + // All vCPU threads move to the sandbox controller. for _, i := range tids.vcpus { - if err := s.sandboxCgroup.AddTask(i); err != nil { + if err := s.sandboxController.AddThread(i); err != nil { return err } } @@ -2179,22 +2206,22 @@ func (s *Sandbox) constrainHypervisor(ctx context.Context) error { return nil } -// setupCgroups adds the runtime process to either the sandbox cgroup or the overhead one, -// depending on the sandbox_cgroup_only configuration setting. -func (s *Sandbox) setupCgroups() error { - vmmCgroup := s.sandboxCgroup - if s.overheadCgroup != nil { - vmmCgroup = s.overheadCgroup +// setupResourceController adds the runtime process to either the sandbox resource controller or the +// overhead one, depending on the sandbox_cgroup_only configuration setting. +func (s *Sandbox) setupResourceController() error { + vmmController := s.sandboxController + if s.overheadController != nil { + vmmController = s.overheadController } - // By adding the runtime process to either the sandbox or overhead cgroup, we are making + // By adding the runtime process to either the sandbox or overhead controller, we are making // sure that any child process of the runtime (i.e. *all* processes serving a Kata pod) - // will initially live in this cgroup. Depending on the sandbox_cgroup settings, we will - // then move the vCPU threads between cgroups. + // will initially live in this controller. Depending on the sandbox_cgroup settings, we will + // then move the vCPU threads between resource controllers. runtimePid := os.Getpid() - // Add the runtime to the VMM sandbox cgroup - if err := vmmCgroup.AddProcess(runtimePid); err != nil { - return fmt.Errorf("Could not add runtime PID %d to sandbox cgroup: %v", runtimePid, err) + // Add the runtime to the VMM sandbox resource controller + if err := vmmController.AddProcess(runtimePid); err != nil { + return fmt.Errorf("Could not add runtime PID %d to the sandbox %s resource controller: %v", runtimePid, s.sandboxController, err) } return nil @@ -2215,8 +2242,8 @@ func (s *Sandbox) GetPatchedOCISpec() *specs.Spec { } // get the container associated with the PodSandbox annotation. In Kubernetes, this - // represents the pause container. In Docker, this is the container. We derive the - // cgroup path from this container. + // represents the pause container. In Docker, this is the container. + // On Linux, we derive the group path from this container. for _, cConfig := range s.config.Containers { if cConfig.Annotations[annotations.ContainerTypeKey] == string(PodSandbox) { return cConfig.CustomSpec diff --git a/src/runtime/virtcontainers/sandbox_test.go b/src/runtime/virtcontainers/sandbox_test.go index 9c64c8b22..344551e3b 100644 --- a/src/runtime/virtcontainers/sandbox_test.go +++ b/src/runtime/virtcontainers/sandbox_test.go @@ -1502,14 +1502,14 @@ func TestSandbox_Cgroups(t *testing.T) { } t.Run(tt.name, func(t *testing.T) { - err := tt.s.createCgroups() + err := tt.s.createResourceController() t.Logf("create groups error %v", err) if (err != nil) != tt.wantErr { t.Errorf("Sandbox.CreateCgroups() error = %v, wantErr %v", err, tt.wantErr) } if err == nil { - if err := tt.s.setupCgroups(); (err != nil) != tt.wantErr { + if err := tt.s.setupResourceController(); (err != nil) != tt.wantErr { t.Errorf("Sandbox.SetupCgroups() error = %v, wantErr %v", err, tt.wantErr) } } diff --git a/src/runtime/virtcontainers/tap_endpoint.go b/src/runtime/virtcontainers/tap_endpoint.go index 75c6e3273..62b855382 100644 --- a/src/runtime/virtcontainers/tap_endpoint.go +++ b/src/runtime/virtcontainers/tap_endpoint.go @@ -1,5 +1,6 @@ +//go:build linux // +build linux -// + // Copyright (c) 2018 Huawei Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/tuntap_endpoint.go b/src/runtime/virtcontainers/tuntap_endpoint.go index 9d51f2f72..888e666ef 100644 --- a/src/runtime/virtcontainers/tuntap_endpoint.go +++ b/src/runtime/virtcontainers/tuntap_endpoint.go @@ -1,5 +1,6 @@ +//go:build linux // +build linux -// + // Copyright (c) 2018 Huawei Corporation // Copyright (c) 2019 Intel Corporation // diff --git a/src/runtime/virtcontainers/utils/utils_darwin.go b/src/runtime/virtcontainers/utils/utils_darwin.go new file mode 100644 index 000000000..54b2124ce --- /dev/null +++ b/src/runtime/virtcontainers/utils/utils_darwin.go @@ -0,0 +1,10 @@ +// Copyright (c) 2022 Apple Inc. +// +// SPDX-License-Identifier: Apache-2.0 +// + +package utils + +func GetDevicePathAndFsTypeOptions(mountPoint string) (devicePath, fsType string, fsOptions []string, err error) { + return +} diff --git a/src/runtime/virtcontainers/veth_endpoint.go b/src/runtime/virtcontainers/veth_endpoint.go index 2dd7a4b08..52a7ae479 100644 --- a/src/runtime/virtcontainers/veth_endpoint.go +++ b/src/runtime/virtcontainers/veth_endpoint.go @@ -1,5 +1,6 @@ +//go:build linux // +build linux -// + // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/veth_endpoint_test.go b/src/runtime/virtcontainers/veth_endpoint_test.go index c9295af97..c6ae2ce67 100644 --- a/src/runtime/virtcontainers/veth_endpoint_test.go +++ b/src/runtime/virtcontainers/veth_endpoint_test.go @@ -1,5 +1,6 @@ +//go:build linux // +build linux -// + // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/vhostuser_endpoint.go b/src/runtime/virtcontainers/vhostuser_endpoint.go index 8d317adf3..ecdfa8a5e 100644 --- a/src/runtime/virtcontainers/vhostuser_endpoint.go +++ b/src/runtime/virtcontainers/vhostuser_endpoint.go @@ -1,5 +1,6 @@ +//go:build linux // +build linux -// + // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/src/runtime/virtcontainers/vhostuser_endpoint_test.go b/src/runtime/virtcontainers/vhostuser_endpoint_test.go index 783c646bc..c75930ff6 100644 --- a/src/runtime/virtcontainers/vhostuser_endpoint_test.go +++ b/src/runtime/virtcontainers/vhostuser_endpoint_test.go @@ -1,5 +1,6 @@ +//go:build linux // +build linux -// + // Copyright (c) 2018 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 diff --git a/tools/osbuilder/.gitignore b/tools/osbuilder/.gitignore index b70d4aa8c..5936792d5 100644 --- a/tools/osbuilder/.gitignore +++ b/tools/osbuilder/.gitignore @@ -3,7 +3,7 @@ dracut/Dockerfile dracut/dracut.conf.d/15-extra-libs.conf /.*.done /*_rootfs -/kata-Centos-dnf.conf +/kata-centos-dnf.conf /kata-containers-initrd.img /kata-containers.img rootfs-builder/centos/RPM-GPG-KEY-* diff --git a/tools/osbuilder/README.md b/tools/osbuilder/README.md index b97839361..343d2bf60 100644 --- a/tools/osbuilder/README.md +++ b/tools/osbuilder/README.md @@ -209,9 +209,9 @@ of the the osbuilder distributions. > Note: this table is not relevant for the dracut build method, since it supports any Linux distribution and architecture where dracut is available. -| |Alpine |Clear Linux |Debian/Ubuntu | -|-- |-- |-- |-- | -|**ARM64** |:heavy_check_mark:| | | -|**PPC64le**|:heavy_check_mark:| |:heavy_check_mark:| -|**s390x** | | |:heavy_check_mark:| -|**x86_64** |:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:| +| |Alpine |CentOS Stream |Clear Linux |Debian/Ubuntu | +|-- |-- |-- |-- |-- | +|**ARM64** |:heavy_check_mark:|:heavy_check_mark:| | | +|**PPC64le**| |:heavy_check_mark:| |:heavy_check_mark:| +|**s390x** | |:heavy_check_mark:| |:heavy_check_mark:| +|**x86_64** |:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:| diff --git a/tools/osbuilder/rootfs-builder/centos/Dockerfile.in b/tools/osbuilder/rootfs-builder/centos/Dockerfile.in new file mode 100644 index 000000000..fce805994 --- /dev/null +++ b/tools/osbuilder/rootfs-builder/centos/Dockerfile.in @@ -0,0 +1,18 @@ +# Copyright (c) 2018 Intel Corporation, 2021 IBM Corp. +# +# SPDX-License-Identifier: Apache-2.0 + +FROM quay.io/centos/centos:@OS_VERSION@ +@SET_PROXY@ + +RUN dnf -y update && \ + dnf -y install dnf-plugins-core && \ + dnf config-manager --set-enabled crb && \ + dnf -y install \ + diffutils \ + file \ + g++ \ + git \ + protobuf-compiler + +@INSTALL_RUST@ diff --git a/tools/osbuilder/rootfs-builder/centos/config.sh b/tools/osbuilder/rootfs-builder/centos/config.sh new file mode 100644 index 000000000..d9d51e5d7 --- /dev/null +++ b/tools/osbuilder/rootfs-builder/centos/config.sh @@ -0,0 +1,17 @@ +#!/bin/sh +# Copyright (c) 2018 Intel Corporation, 2021 IBM Corp. +# +# SPDX-License-Identifier: Apache-2.0 + +OS_NAME=centos +OS_VERSION=${OS_VERSION:-stream9} +PACKAGES=chrony +[ "$AGENT_INIT" = no ] && PACKAGES+=" systemd" +[ "$SECCOMP" = yes ] && PACKAGES+=" libseccomp" + +# Container registry tag is different from metalink repo, e.g. "stream9" => "9-stream" +os_repo_version="$(sed -E "s/(stream)(.+)/\2-\1/" <<< "$OS_VERSION")" + +METALINK="https://mirrors.centos.org/metalink?repo=centos-baseos-$os_repo_version&arch=\$basearch" +GPG_KEY_FILE=RPM-GPG-KEY-CentOS-Official +GPG_KEY_URL="https://centos.org/keys/$GPG_KEY_FILE" diff --git a/tools/osbuilder/rootfs-builder/rootfs.sh b/tools/osbuilder/rootfs-builder/rootfs.sh index f54849fe2..e218f3f7b 100755 --- a/tools/osbuilder/rootfs-builder/rootfs.sh +++ b/tools/osbuilder/rootfs-builder/rootfs.sh @@ -45,9 +45,6 @@ ARCH=$(uname -m) # distro-specific config file typeset -r CONFIG_SH="config.sh" -# optional arch-specific config file -typeset -r CONFIG_ARCH_SH="config_${ARCH}.sh" - # Name of an optional distro-specific file which, if it exists, must implement the # build_rootfs() function. typeset -r LIB_SH="rootfs_lib.sh" @@ -328,12 +325,6 @@ build_rootfs_distro() rootfs_config="${distro_config_dir}/${CONFIG_SH}" source "${rootfs_config}" - # Source arch-specific config file - rootfs_arch_config="${distro_config_dir}/${CONFIG_ARCH_SH}" - if [ -f "${rootfs_arch_config}" ]; then - source "${rootfs_arch_config}" - fi - if [ -z "$ROOTFS_DIR" ]; then ROOTFS_DIR="${script_dir}/rootfs-${OS_NAME}" fi @@ -579,6 +570,7 @@ EOT if [ -z "${AGENT_SOURCE_BIN}" ] ; then [ "$LIBC" == "musl" ] && bash ${script_dir}/../../../ci/install_musl.sh + test -r "${HOME}/.cargo/env" && source "${HOME}/.cargo/env" # rust agent needs ${arch}-unknown-linux-${LIBC} if ! (rustup show | grep -v linux-${LIBC} > /dev/null); then if [ "$RUST_VERSION" == "null" ]; then diff --git a/tools/osbuilder/scripts/lib.sh b/tools/osbuilder/scripts/lib.sh index c7f1b77d4..7343c1f17 100644 --- a/tools/osbuilder/scripts/lib.sh +++ b/tools/osbuilder/scripts/lib.sh @@ -57,36 +57,18 @@ check_root() generate_dnf_config() { - REPO_NAME=${REPO_NAME:-"base"} - CACHE_DIR=${CACHE_DIR:-"/var/cache/dnf"} cat > "${DNF_CONF}" << EOF [main] -cachedir=${CACHE_DIR} -logfile=${LOG_FILE} -keepcache=0 -debuglevel=2 -exactarch=1 -obsoletes=1 -plugins=0 -installonly_limit=3 reposdir=/root/mash -retries=5 + +[base] +name=${OS_NAME}-${OS_VERSION} base +releasever=${OS_VERSION} EOF if [ "$BASE_URL" != "" ]; then - cat >> "${DNF_CONF}" << EOF -[base] -name=${OS_NAME}-${OS_VERSION} ${REPO_NAME} -failovermethod=priority -baseurl=${BASE_URL} -enabled=1 -EOF - elif [ "$MIRROR_LIST" != "" ]; then - cat >> "${DNF_CONF}" << EOF -[base] -name=${OS_NAME}-${OS_VERSION} ${REPO_NAME} -mirrorlist=${MIRROR_LIST} -enabled=1 -EOF + echo "baseurl=$BASE_URL" >> "$DNF_CONF" + elif [ "$METALINK" != "" ]; then + echo "metalink=$METALINK" >> "$DNF_CONF" fi if [ -n "$GPG_KEY_URL" ]; then @@ -99,15 +81,6 @@ gpgkey=file://${CONFIG_DIR}/${GPG_KEY_FILE} EOF fi - if [ -n "$GPG_KEY_ARCH_URL" ]; then - if [ ! -f "${CONFIG_DIR}/${GPG_KEY_ARCH_FILE}" ]; then - curl -L "${GPG_KEY_ARCH_URL}" -o "${CONFIG_DIR}/${GPG_KEY_ARCH_FILE}" - fi - cat >> "${DNF_CONF}" << EOF - file://${CONFIG_DIR}/${GPG_KEY_ARCH_FILE} -EOF - fi - } build_rootfs() @@ -151,6 +124,8 @@ build_rootfs() info "install packages for rootfs" $DNF install ${EXTRA_PKGS} ${PACKAGES} + + rm -rf ${ROOTFS_DIR}/usr/share/{bash-completion,cracklib,doc,info,locale,man,misc,pixmaps,terminfo,zoneinfo,zsh} } # Create a YAML metadata file inside the rootfs. diff --git a/tools/packaging/kata-deploy/Dockerfile b/tools/packaging/kata-deploy/Dockerfile index e89d24292..94533a906 100644 --- a/tools/packaging/kata-deploy/Dockerfile +++ b/tools/packaging/kata-deploy/Dockerfile @@ -1,27 +1,10 @@ -# Copyright Intel Corporation. +# Copyright Intel Corporation, 2022 IBM Corp. # # SPDX-License-Identifier: Apache-2.0 -FROM registry.centos.org/centos:7 AS base - -ENV container docker - -RUN (cd /lib/systemd/system/sysinit.target.wants/ && for i in *; do [ "$i" = systemd-tmpfiles-setup.service ] || rm -f "$i"; done); \ -rm -f /lib/systemd/system/multi-user.target.wants/*; \ -rm -f /etc/systemd/system/*.wants/*; \ -rm -f /lib/systemd/system/local-fs.target.wants/*; \ -rm -f /lib/systemd/system/sockets.target.wants/*udev*; \ -rm -f /lib/systemd/system/sockets.target.wants/*initctl*; \ -rm -f /lib/systemd/system/basic.target.wants/*; \ -rm -f /lib/systemd/system/anaconda.target.wants/*; - -VOLUME [ "/sys/fs/cgroup" ] - -CMD ["/usr/sbin/init"] - -FROM base - -ARG KUBE_ARCH=amd64 +# Specify alternative base image, e.g. clefos for s390x +ARG IMAGE +FROM ${IMAGE:-registry.centos.org/centos}:7 ARG KATA_ARTIFACTS=./kata-static.tar.xz ARG DESTINATION=/opt/kata-artifacts @@ -29,17 +12,18 @@ COPY ${KATA_ARTIFACTS} ${WORKDIR} RUN \ yum -y update && \ -yum install -y epel-release && \ -yum install -y bzip2 jq && \ +yum -y install xz && \ yum clean all && \ mkdir -p ${DESTINATION} && \ -tar xvf ${KATA_ARTIFACTS} -C ${DESTINATION}/ && \ -chown -R root:root ${DESTINATION}/ +tar xvf ${KATA_ARTIFACTS} -C ${DESTINATION} +# hadolint will deny echo -e, heredocs don't work in Dockerfiles, shell substitution doesn't work with $'...' RUN \ -curl -Lso /bin/kubectl "https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/${KUBE_ARCH}/kubectl" && \ -chmod +x /bin/kubectl +echo "[kubernetes]" >> /etc/yum.repos.d/kubernetes.repo && \ +echo "name=Kubernetes" >> /etc/yum.repos.d/kubernetes.repo && \ +echo "baseurl=https://packages.cloud.google.com/yum/repos/kubernetes-el7-$(uname -m)" >> /etc/yum.repos.d/kubernetes.repo && \ +echo "gpgkey=https://packages.cloud.google.com/yum/doc/yum-key.gpg https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg" >> /etc/yum.repos.d/kubernetes.repo && \ +yum -y install kubectl && \ +yum clean all COPY scripts ${DESTINATION}/scripts -RUN \ -ln -s ${DESTINATION}/scripts/kata-deploy.sh /usr/bin/kata-deploy diff --git a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh index 1864140e1..2a215726b 100755 --- a/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh +++ b/tools/packaging/kata-deploy/local-build/kata-deploy-binaries.sh @@ -28,6 +28,8 @@ readonly shimv2_builder="${static_build_dir}/shim-v2/build.sh" readonly rootfs_builder="${repo_root_dir}/tools/packaging/guest-image/build_image.sh" +ARCH=$(uname -m) + workdir="${WORKDIR:-$PWD}" destdir="${workdir}/kata-static" @@ -125,7 +127,9 @@ install_firecracker() { # Install static cloud-hypervisor asset install_clh() { - export extra_build_args="--features tdx" + if [[ "${ARCH}" == "x86_64" ]]; then + export features="tdx" + fi info "build static cloud-hypervisor" "${clh_builder}" diff --git a/tools/packaging/scripts/configure-hypervisor.sh b/tools/packaging/scripts/configure-hypervisor.sh index bfcc8ca44..4db23ae66 100755 --- a/tools/packaging/scripts/configure-hypervisor.sh +++ b/tools/packaging/scripts/configure-hypervisor.sh @@ -222,11 +222,6 @@ generate_qemu_options() { # Disabled options - # Disable sheepdog block driver support (deprecated in 5.2.0) - if ! gt_eq ${qemu_version} 5.2.0 ; then - qemu_options+=(size:--disable-sheepdog) - fi - # Disable block migration in the main migration stream qemu_options+=(size:--disable-live-block-migration) @@ -312,9 +307,7 @@ generate_qemu_options() { # Disable libudev since it is only needed for qemu-pr-helper and USB, # none of which are used with Kata - if gt_eq "${qemu_version}" "5.2.0" ; then - qemu_options+=(size:--disable-libudev) - fi + qemu_options+=(size:--disable-libudev) # Disallow network downloads qemu_options+=(security:--disable-curl) @@ -331,10 +324,8 @@ generate_qemu_options() { # But since QEMU 5.2 the daemon is built as part of the tools set # (disabled with --disable-tools) thus it needs to be explicitely # enabled. - if gt_eq "${qemu_version}" "5.2.0" ; then - qemu_options+=(functionality:--enable-virtiofsd) - qemu_options+=(functionality:--enable-virtfs) - fi + qemu_options+=(functionality:--enable-virtiofsd) + qemu_options+=(functionality:--enable-virtfs) # Don't build linux-user bsd-user qemu_options+=(size:--disable-bsd-user) @@ -385,12 +376,6 @@ generate_qemu_options() { qemu_options+=(size:--disable-dmg) qemu_options+=(size:--disable-parallels) - # vxhs was deprecated on QEMU 5.1 so it doesn't need to be - # explicitly disabled. - if ! gt_eq "${qemu_version}" "5.1.0" ; then - qemu_options+=(size:--disable-vxhs) - fi - #--------------------------------------------------------------------- # Enabled options @@ -449,11 +434,7 @@ generate_qemu_options() { # compile with high level of optimisation # On version 5.2.0 onward the Meson build system warns to not use -O3 - if ! gt_eq "${qemu_version}" "5.2.0" ; then - _qemu_cflags+=" -O3" - else - _qemu_cflags+=" -O2" - fi + _qemu_cflags+=" -O2" # Improve code quality by assuming identical semantics for interposed # synmbols. @@ -544,8 +525,8 @@ main() { [ -n "${qemu_version}" ] || die "cannot determine qemu version from file $qemu_version_file" - if ! gt_eq "${qemu_version}" "5.0.0" ; then - die "Kata requires QEMU >= 5.0.0" + if ! gt_eq "${qemu_version}" "6.1.0" ; then + die "Kata requires QEMU >= 6.1.0" fi local gcc_version_major=$(gcc -dumpversion | cut -f1 -d.) diff --git a/tools/packaging/static-build/cloud-hypervisor/build-static-clh.sh b/tools/packaging/static-build/cloud-hypervisor/build-static-clh.sh index 61a0824eb..cf2aaec38 100755 --- a/tools/packaging/static-build/cloud-hypervisor/build-static-clh.sh +++ b/tools/packaging/static-build/cloud-hypervisor/build-static-clh.sh @@ -16,7 +16,7 @@ ARCH=$(uname -m) script_dir=$(dirname $(readlink -f "$0")) kata_version="${kata_version:-}" force_build_from_source="${force_build_from_source:-false}" -extra_build_args="${extra_build_args:-}" +features="${features:-}" source "${script_dir}/../../scripts/lib.sh" @@ -52,9 +52,9 @@ build_clh_from_source() { pushd "${repo_dir}" git fetch || true git checkout "${cloud_hypervisor_version}" - if [ -n "${extra_build_args}" ]; then - info "Build cloud-hypervisor with extra args: ${extra_build_args}" - ./scripts/dev_cli.sh build --release --libc musl -- ${extra_build_args} + if [ -n "${features}" ]; then + info "Build cloud-hypervisor enabling the following features: ${features}" + ./scripts/dev_cli.sh build --release --libc musl --features "${features}" else ./scripts/dev_cli.sh build --release --libc musl fi @@ -68,7 +68,7 @@ if [ "${ARCH}" == "aarch64" ]; then force_build_from_source="true" fi -if [ -n "${extra_build_args}" ]; then +if [ -n "${features}" ]; then info "As an extra build argument has been passed to the script, forcing to build from source" force_build_from_source="true" fi diff --git a/versions.yaml b/versions.yaml index 40a529bdb..36bb0df18 100644 --- a/versions.yaml +++ b/versions.yaml @@ -75,7 +75,7 @@ assets: url: "https://github.com/cloud-hypervisor/cloud-hypervisor" uscan-url: >- https://github.com/cloud-hypervisor/cloud-hypervisor/tags.*/v?(\d\S+)\.tar\.gz - version: "55479a64d237d4c757dba19a696abefd27ec74fd" + version: "b0324f85571c441f840e9bdeb25410514a00bb74" firecracker: description: "Firecracker micro-VMM"