diff --git a/.github/workflows/add-backport-label.yaml b/.github/workflows/add-backport-label.yaml index 5774bb805..3df518b54 100644 --- a/.github/workflows/add-backport-label.yaml +++ b/.github/workflows/add-backport-label.yaml @@ -62,15 +62,15 @@ jobs: has_backport_needed_label=${{ contains(github.event.pull_request.labels.*.name, 'needs-backport') }} has_no_backport_needed_label=${{ contains(github.event.pull_request.labels.*.name, 'no-backport-needed') }} - echo "::set-output name=add_backport_label::false" + echo "add_backport_label=false" >> $GITHUB_OUTPUT if [ $has_backport_needed_label = true ] || [ $has_bug = true ]; then if [[ $has_no_backport_needed_label = false ]]; then - echo "::set-output name=add_backport_label::true" + echo "add_backport_label=true" >> $GITHUB_OUTPUT fi fi # Do not spam comment, only if auto-backport label is going to be newly added. - echo "::set-output name=auto_backport_added::$CONTAINS_AUTO_BACKPORT" + echo "auto_backport_added=$CONTAINS_AUTO_BACKPORT" >> $GITHUB_OUTPUT - name: Add comment if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') && steps.add_label.outputs.add_backport_label == 'true' && steps.add_label.outputs.auto_backport_added == 'false' }} @@ -97,4 +97,4 @@ jobs: uses: andymckay/labeler@e6c4322d0397f3240f0e7e30a33b5c5df2d39e90 with: add-labels: "auto-backport" - repo-token: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file + repo-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/kata-deploy-test.yaml b/.github/workflows/kata-deploy-test.yaml index 5a924c739..c19c7bb4a 100644 --- a/.github/workflows/kata-deploy-test.yaml +++ b/.github/workflows/kata-deploy-test.yaml @@ -67,7 +67,7 @@ jobs: ref="refs/pull/${{ github.event.inputs.pr }}/merge" fi echo "reference for PR: " ${ref} "event:" ${{ github.event_name }} - echo "##[set-output name=pr-ref;]${ref}" + echo "pr-ref=${ref}" >> $GITHUB_OUTPUT - uses: actions/checkout@v2 with: ref: ${{ steps.get-PR-ref.outputs.pr-ref }} @@ -107,7 +107,7 @@ jobs: ref="refs/pull/${{ github.event.inputs.pr }}/merge" fi echo "reference for PR: " ${ref} "event:" ${{ github.event_name }} - echo "##[set-output name=pr-ref;]${ref}" + echo "pr-ref=${ref}" >> $GITHUB_OUTPUT - uses: actions/checkout@v2 with: ref: ${{ steps.get-PR-ref.outputs.pr-ref }} @@ -138,7 +138,7 @@ jobs: ref="refs/pull/${{ github.event.inputs.pr }}/merge" fi echo "reference for PR: " ${ref} "event:" ${{ github.event_name }} - echo "##[set-output name=pr-ref;]${ref}" + echo "pr-ref=${ref}" >> $GITHUB_OUTPUT - uses: actions/checkout@v2 with: ref: ${{ steps.get-PR-ref.outputs.pr-ref }} @@ -156,7 +156,7 @@ jobs: docker push quay.io/kata-containers/kata-deploy-ci:$PR_SHA mkdir -p packaging/kata-deploy ln -s $GITHUB_WORKSPACE/tools/packaging/kata-deploy/action packaging/kata-deploy/action - echo "::set-output name=PKG_SHA::${PR_SHA}" + echo "PKG_SHA=${PR_SHA}" >> $GITHUB_OUTPUT - name: test-kata-deploy-ci-in-aks uses: ./packaging/kata-deploy/action with: diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index bbb95c53d..dd48494f1 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -89,7 +89,7 @@ jobs: docker push quay.io/kata-containers/kata-deploy-ci:$pkg_sha mkdir -p packaging/kata-deploy ln -s $GITHUB_WORKSPACE/tools/packaging/kata-deploy/action packaging/kata-deploy/action - echo "::set-output name=PKG_SHA::${pkg_sha}" + echo "PKG_SHA=${pkg_sha}" >> $GITHUB_OUTPUT - name: test-kata-deploy-ci-in-aks uses: ./packaging/kata-deploy/action with: diff --git a/.github/workflows/static-checks.yaml b/.github/workflows/static-checks.yaml index 69dfbae59..0cb9d72d3 100644 --- a/.github/workflows/static-checks.yaml +++ b/.github/workflows/static-checks.yaml @@ -27,7 +27,6 @@ jobs: target_branch: ${{ github.base_ref }} steps: - name: Checkout code - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} uses: actions/checkout@v3 with: fetch-depth: 0 @@ -38,6 +37,22 @@ jobs: go-version: 1.19.3 env: GOPATH: ${{ runner.workspace }}/kata-containers + - name: Check kernel config version + run: | + cd "${{ github.workspace }}/src/github.com/${{ github.repository }}" + kernel_dir="tools/packaging/kernel/" + kernel_version_file="${kernel_dir}kata_config_version" + modified_files=$(git diff --name-only origin/main..HEAD) + result=$(git whatchanged origin/main..HEAD "${kernel_dir}" >>"/dev/null") + if git whatchanged origin/main..HEAD "${kernel_dir}" >>"/dev/null"; then + echo "Kernel directory has changed, checking if $kernel_version_file has been updated" + if echo "$modified_files" | grep -v "README.md" | grep "${kernel_dir}" >>"/dev/null"; then + echo "$modified_files" | grep "$kernel_version_file" >>/dev/null || ( echo "Please bump version in $kernel_version_file" && exit 1) + else + echo "Readme file changed, no need for kernel config version update." + fi + echo "Check passed" + fi - name: Setup GOPATH if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} run: | diff --git a/Makefile b/Makefile index d4a3dad05..8e856138c 100644 --- a/Makefile +++ b/Makefile @@ -45,10 +45,8 @@ docs-url-alive-check: .PHONY: \ all \ - binary-tarball \ + kata-tarball \ + install-tarball \ default \ - install-binary-tarball \ static-checks \ docs-url-alive-check - - diff --git a/VERSION b/VERSION index d0e50ca07..a36373c3b 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.1.0-alpha1 +3.1.0-rc0 diff --git a/docs/Release-Process.md b/docs/Release-Process.md index 7dcfb84a3..962463c43 100644 --- a/docs/Release-Process.md +++ b/docs/Release-Process.md @@ -28,23 +28,6 @@ $ ./update-repository-version.sh -p "$NEW_VERSION" "$BRANCH" ``` -### Point tests repository to stable branch - - If you create a new stable branch, i.e. if your release changes a major or minor version number (not a patch release), then - you should modify the `tests` repository to point to that newly created stable branch and not the `main` branch. - The objective is that changes in the CI on the main branch will not impact the stable branch. - - In the test directory, change references the main branch in: - * `README.md` - * `versions.yaml` - * `cmd/github-labels/labels.yaml.in` - * `cmd/pmemctl/pmemctl.sh` - * `.ci/lib.sh` - * `.ci/static-checks.sh` - - See the commits in [the corresponding PR for stable-2.1](https://github.com/kata-containers/tests/pull/3504) for an example of the changes. - - ### Merge all bump version Pull requests - The above step will create a GitHub pull request in the Kata projects. Trigger the CI using `/test` command on each bump Pull request. @@ -63,6 +46,24 @@ $ ./tag_repos.sh -p -b "$BRANCH" tag ``` +### Point tests repository to stable branch + + If your release changes a major or minor version number(not a patch release), then the above + `./tag_repos.sh` script will create a new stable branch in all the repositories in addition to tagging them. + This happens when you are making the first `rc` release for a new major or minor version in Kata. + In this case, you should modify the `tests` repository to point to the newly created stable branch and not the `main` branch. + The objective is that changes in the CI on the main branch will not impact the stable branch. + + In the test directory, change references of the `main` branch to the new stable branch in: + * `README.md` + * `versions.yaml` + * `cmd/github-labels/labels.yaml.in` + * `cmd/pmemctl/pmemctl.sh` + * `.ci/lib.sh` + * `.ci/static-checks.sh` + + See the commits in [the corresponding PR for stable-2.1](https://github.com/kata-containers/tests/pull/3504) for an example of the changes. + ### Check Git-hub Actions We make use of [GitHub actions](https://github.com/features/actions) in this [file](../.github/workflows/release.yaml) in the `kata-containers/kata-containers` repository to build and upload release artifacts. This action is auto triggered with the above step when a new tag is pushed to the `kata-containers/kata-containers` repository. diff --git a/docs/how-to/README.md b/docs/how-to/README.md index 07ca974a8..024f29337 100644 --- a/docs/how-to/README.md +++ b/docs/how-to/README.md @@ -44,7 +44,7 @@ - [How to run Docker with Kata Containers](how-to-run-docker-with-kata.md) - [How to run Kata Containers with `nydus`](how-to-use-virtio-fs-nydus-with-kata.md) - [How to run Kata Containers with AMD SEV-SNP](how-to-run-kata-containers-with-SNP-VMs.md) - +- [How to use EROFS to build rootfs in Kata Containers](how-to-use-erofs-build-rootfs.md) ## Confidential Containers - [How to use build and test the Confidential Containers `CCv0` proof of concept](how-to-build-and-test-ccv0.md) - [How to generate a Kata Containers payload for the Confidential Containers Operator](how-to-generate-a-kata-containers-payload-for-the-confidential-containers-operator.md) diff --git a/docs/how-to/how-to-hotplug-memory-arm64.md b/docs/how-to/how-to-hotplug-memory-arm64.md index 799367ad0..bc138b458 100644 --- a/docs/how-to/how-to-hotplug-memory-arm64.md +++ b/docs/how-to/how-to-hotplug-memory-arm64.md @@ -15,6 +15,18 @@ $ sudo .ci/aarch64/install_rom_aarch64.sh $ popd ``` +## Config KATA QEMU + +After executing the above script, two files will be generated under the directory `/usr/share/kata-containers/` by default, namely `kata-flash0.img` and `kata-flash1.img`. Next we need to change the configuration file of `kata qemu`, which is in `/opt/kata/share/defaults/kata-containers/configuration-qemu.toml` by default, specify in the configuration file to use the UEFI ROM installed above. The above is an example of `kata deploy` installation. For package management installation, please use `kata-runtime env` to find the location of the configuration file. Please refer to the following configuration. + +``` +[hypervisor.qemu] + +# -pflash can add image file to VM. The arguments of it should be in format +# of ["/path/to/flash0.img", "/path/to/flash1.img"] +pflashes = ["/usr/share/kata-containers/kata-flash0.img", "/usr/share/kata-containers/kata-flash1.img"] +``` + ## Run for test Let's test if the memory hotplug is ready for Kata after install the UEFI ROM. Make sure containerd is ready to run Kata before test. diff --git a/docs/how-to/how-to-set-sandbox-config-kata.md b/docs/how-to/how-to-set-sandbox-config-kata.md index 27edbf76d..b8ac511cd 100644 --- a/docs/how-to/how-to-set-sandbox-config-kata.md +++ b/docs/how-to/how-to-set-sandbox-config-kata.md @@ -57,6 +57,7 @@ There are several kinds of Kata configurations and they are listed below. | `io.katacontainers.config.hypervisor.enable_iothreads` | `boolean`| enable IO to be processed in a separate thread. Supported currently for virtio-`scsi` driver | | `io.katacontainers.config.hypervisor.enable_mem_prealloc` | `boolean` | the memory space used for `nvdimm` device by the hypervisor | | `io.katacontainers.config.hypervisor.enable_vhost_user_store` | `boolean` | enable vhost-user storage device (QEMU) | +| `io.katacontainers.config.hypervisor.vhost_user_reconnect_timeout_sec` | `string`| the timeout for reconnecting vhost user socket (QEMU) | `io.katacontainers.config.hypervisor.enable_virtio_mem` | `boolean` | enable virtio-mem (QEMU) | | `io.katacontainers.config.hypervisor.entropy_source` (R) | string| the path to a host source of entropy (`/dev/random`, `/dev/urandom` or real hardware RNG device) | | `io.katacontainers.config.hypervisor.file_mem_backend` (R) | string | file based memory backend root directory | diff --git a/docs/how-to/how-to-use-erofs-build-rootfs.md b/docs/how-to/how-to-use-erofs-build-rootfs.md new file mode 100644 index 000000000..72bf6315f --- /dev/null +++ b/docs/how-to/how-to-use-erofs-build-rootfs.md @@ -0,0 +1,90 @@ +# Configure Kata Containers to use EROFS build rootfs + +## Introduction +For kata containers, rootfs is used in the read-only way. EROFS can noticeably decrease metadata overhead. + +`mkfs.erofs` can generate compressed and uncompressed EROFS images. + +For uncompressed images, no files are compressed. However, it is optional to inline the data blocks at the end of the file with the metadata. + +For compressed images, each file will be compressed using the lz4 or lz4hc algorithm, and it will be confirmed whether it can save space. Use No compression of the file if compression does not save space. + +## Performance comparison +| | EROFS | EXT4 | XFS | +|-----------------|-------| --- | --- | +| Image Size [MB] | 106(uncompressed) | 256 | 126 | + + +## Guidance +### Install the `erofs-utils` +#### `apt/dnf` install +On newer `Ubuntu/Debian` systems, it can be installed directly using the `apt` command, and on `Fedora` it can be installed directly using the `dnf` command. + +```shell +# Debian/Ubuntu +$ apt install erofs-utils +# Fedora +$ dnf install erofs-utils +``` + +#### Source install +[https://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git](https://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git) + +##### Compile dependencies +If you need to enable the `Lz4` compression feature, `Lz4 1.8.0+` is required, and `Lz4 1.9.3+` is strongly recommended. + +##### Compilation process +For some old lz4 versions (lz4-1.8.0~1.8.3), if lz4-static is not installed, the lz4hc algorithm will not be supported. lz4-static can be installed with apt install lz4-static.x86_64. However, these versions have some bugs in compression, and it is not recommended to use these versions directly. +If you use `lz4 1.9.0+`, you can directly use the following command to compile. + +```shell +$ ./autogen.sh +$ ./configure +$ make +``` + +The compiled `mkfs.erofs` program will be saved in the `mkfs` directory. Afterwards, the generated tools can be installed to a system directory using make install (requires root privileges). + +### Create a local rootfs +```shell +$ export distro="ubuntu" +$ export FS_TYPE="erofs" +$ export ROOTFS_DIR="realpath kata-containers/tools/osbuilder/rootfs-builder/rootfs" +$ sudo rm -rf "${ROOTFS_DIR}" +$ pushd kata-containers/tools/osbuilder/rootfs-builder +$ script -fec 'sudo -E SECCOMP=no ./rootfs.sh "${distro}"' +$ popd +``` + +### Add a custom agent to the image - OPTIONAL +> Note: +> - You should only do this step if you are testing with the latest version of the agent. +```shell +$ sudo install -o root -g root -m 0550 -t "${ROOTFS_DIR}/usr/bin" "${ROOTFS_DIR}/../../../../src/agent/target/x86_64-unknown-linux-musl/release/kata-agent" +$ sudo install -o root -g root -m 0440 "${ROOTFS_DIR}/../../../../src/agent/kata-agent.service" "${ROOTFS_DIR}/usr/lib/systemd/system/" +$ sudo install -o root -g root -m 0440 "${ROOTFS_DIR}/../../../../src/agent/kata-containers.target" "${ROOTFS_DIR}/usr/lib/systemd/system/" +``` + +### Build a root image +```shell +$ pushd kata-containers/tools/osbuilder/image-builder +$ script -fec 'sudo -E ./image_builder.sh "${ROOTFS_DIR}"' +$ popd +``` + +### Install the rootfs image +```shell +$ pushd kata-containers/tools/osbuilder/image-builder +$ commit="$(git log --format=%h -1 HEAD)" +$ date="$(date +%Y-%m-%d-%T.%N%z)" +$ rootfs="erofs" +$ image="kata-containers-${rootfs}-${date}-${commit}" +$ sudo install -o root -g root -m 0640 -D kata-containers.img "/usr/share/kata-containers/${image}" +$ (cd /usr/share/kata-containers && sudo ln -sf "$image" kata-containers.img) +$ popd +``` + +### Use `EROFS` in the runtime +```shell +$ sudo sed -i -e 's/^# *\(rootfs_type\).*=.*$/\1 = erofs/g' /etc/kata-containers/configuration.toml +``` diff --git a/docs/how-to/how-to-use-kata-containers-with-firecracker.md b/docs/how-to/how-to-use-kata-containers-with-firecracker.md index d2bd362c1..377bd7d11 100644 --- a/docs/how-to/how-to-use-kata-containers-with-firecracker.md +++ b/docs/how-to/how-to-use-kata-containers-with-firecracker.md @@ -104,7 +104,7 @@ sudo dmsetup create "${POOL_NAME}" \ cat << EOF # -# Add this to your config.toml configuration file and restart `containerd` daemon +# Add this to your config.toml configuration file and restart containerd daemon # [plugins] [plugins.devmapper] diff --git a/src/agent/Cargo.lock b/src/agent/Cargo.lock index bad0639a1..15c8b6f4d 100644 --- a/src/agent/Cargo.lock +++ b/src/agent/Cargo.lock @@ -724,9 +724,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "cgroups-rs" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b97b639839204a6eb727ffbbd68e1dcfc55488c3a26cb0cda1d662b7a186e79" +checksum = "8d5761f3a351b92e0e02a31ca418190bb323edb0d4fce0109b6dba673dc3fdc1" dependencies = [ "libc", "log", diff --git a/src/agent/Cargo.toml b/src/agent/Cargo.toml index 31fc5d32e..476181565 100644 --- a/src/agent/Cargo.toml +++ b/src/agent/Cargo.toml @@ -52,7 +52,7 @@ log = "0.4.11" prometheus = { version = "0.13.0", features = ["process"] } procfs = "0.12.0" anyhow = "1.0.32" -cgroups = { package = "cgroups-rs", version = "0.3.0" } +cgroups = { package = "cgroups-rs", version = "0.3.1" } # Tracing tracing = "0.1.26" diff --git a/src/agent/rustjail/Cargo.toml b/src/agent/rustjail/Cargo.toml index 4d60c11dc..d4ba4e6f3 100644 --- a/src/agent/rustjail/Cargo.toml +++ b/src/agent/rustjail/Cargo.toml @@ -25,7 +25,7 @@ scan_fmt = "0.2.6" regex = "1.5.6" path-absolutize = "1.2.0" anyhow = "1.0.32" -cgroups = { package = "cgroups-rs", version = "0.3.0" } +cgroups = { package = "cgroups-rs", version = "0.3.1" } rlimit = "0.5.3" cfg-if = "0.1.0" diff --git a/src/agent/rustjail/src/cgroups/fs/mod.rs b/src/agent/rustjail/src/cgroups/fs/mod.rs index 9896ec981..fc32c33b4 100644 --- a/src/agent/rustjail/src/cgroups/fs/mod.rs +++ b/src/agent/rustjail/src/cgroups/fs/mod.rs @@ -694,17 +694,6 @@ fn get_cpuacct_stats(cg: &cgroups::Cgroup) -> SingularPtrField { }); } - if cg.v2() { - return SingularPtrField::some(CpuUsage { - total_usage: 0, - percpu_usage: vec![], - usage_in_kernelmode: 0, - usage_in_usermode: 0, - unknown_fields: UnknownFields::default(), - cached_size: CachedSize::default(), - }); - } - // try to get from cpu controller let cpu_controller: &CpuController = get_controller_or_return_singular_none!(cg); let stat = cpu_controller.cpu().stat; @@ -735,7 +724,7 @@ fn get_memory_stats(cg: &cgroups::Cgroup) -> SingularPtrField { let value = memory.use_hierarchy; let use_hierarchy = value == 1; - // gte memory datas + // get memory data let usage = SingularPtrField::some(MemoryData { usage: memory.usage_in_bytes, max_usage: memory.max_usage_in_bytes, diff --git a/src/agent/src/network.rs b/src/agent/src/network.rs index 451b5064d..2e617dc3b 100644 --- a/src/agent/src/network.rs +++ b/src/agent/src/network.rs @@ -7,6 +7,7 @@ use anyhow::{anyhow, Result}; use nix::mount::{self, MsFlags}; use slog::Logger; use std::fs; +use std::path; const KATA_GUEST_SANDBOX_DNS_FILE: &str = "/run/kata-containers/sandbox/resolv.conf"; const GUEST_DNS_FILE: &str = "/etc/resolv.conf"; @@ -64,6 +65,12 @@ fn do_setup_guest_dns(logger: Logger, dns_list: Vec, src: &str, dst: &st .map(|x| x.trim()) .collect::>() .join("\n"); + + // make sure the src file's parent path exist. + let file_path = path::Path::new(src); + if let Some(p) = file_path.parent() { + fs::create_dir_all(p)?; + } fs::write(src, content)?; // bind mount to /etc/resolv.conf diff --git a/src/agent/src/watcher.rs b/src/agent/src/watcher.rs index 468684a43..a6cf4113b 100644 --- a/src/agent/src/watcher.rs +++ b/src/agent/src/watcher.rs @@ -1291,6 +1291,7 @@ mod tests { #[tokio::test] #[serial] + #[cfg(not(target_arch = "aarch64"))] async fn create_tmpfs() { skip_if_not_root!(); diff --git a/src/dragonball/src/api/v1/vmm_action.rs b/src/dragonball/src/api/v1/vmm_action.rs index a2dd496fd..886e9ec68 100644 --- a/src/dragonball/src/api/v1/vmm_action.rs +++ b/src/dragonball/src/api/v1/vmm_action.rs @@ -35,6 +35,9 @@ pub use crate::device_manager::virtio_net_dev_mgr::{ #[cfg(feature = "virtio-vsock")] pub use crate::device_manager::vsock_dev_mgr::{VsockDeviceConfigInfo, VsockDeviceError}; +#[cfg(feature = "hotplug")] +pub use crate::vcpu::{VcpuResizeError, VcpuResizeInfo}; + use super::*; /// Wrapper for all errors associated with VMM actions. @@ -44,9 +47,13 @@ pub enum VmmActionError { #[error("the virtual machine instance ID is invalid")] InvalidVMID, + /// VM doesn't exist and can't get VM information. + #[error("VM doesn't exist and can't get VM information")] + VmNotExist, + /// Failed to hotplug, due to Upcall not ready. #[error("Upcall not ready, can't hotplug device.")] - UpcallNotReady, + UpcallServerNotReady, /// The action `ConfigureBootSource` failed either because of bad user input or an internal /// error. @@ -85,6 +92,11 @@ pub enum VmmActionError { /// The action `InsertFsDevice` failed either because of bad user input or an internal error. #[error("virtio-fs device error: {0}")] FsDevice(#[source] FsDeviceError), + + #[cfg(feature = "hotplug")] + /// The action `ResizeVcpu` Failed + #[error("vcpu resize error : {0}")] + ResizeVcpu(#[source] VcpuResizeError), } /// This enum represents the public interface of the VMM. Each action contains various @@ -156,6 +168,10 @@ pub enum VmmAction { #[cfg(feature = "virtio-fs")] /// Update fs rate limiter, after microVM start. UpdateFsDevice(FsDeviceConfigUpdateInfo), + + #[cfg(feature = "hotplug")] + /// Resize Vcpu number in the guest. + ResizeVcpu(VcpuResizeInfo), } /// The enum represents the response sent by the VMM in case of success. The response is either @@ -256,6 +272,8 @@ impl VmmService { VmmAction::UpdateFsDevice(fs_update_cfg) => { self.update_fs_rate_limiters(vmm, fs_update_cfg) } + #[cfg(feature = "hotplug")] + VmmAction::ResizeVcpu(vcpu_resize_cfg) => self.resize_vcpu(vmm, vcpu_resize_cfg), }; debug!("send vmm response: {:?}", response); @@ -462,8 +480,8 @@ impl VmmService { let ctx = vm .create_device_op_context(Some(event_mgr.epoll_manager())) .map_err(|e| { - if let StartMicroVmError::UpcallNotReady = e { - return VmmActionError::UpcallNotReady; + if let StartMicroVmError::UpcallServerNotReady = e { + return VmmActionError::UpcallServerNotReady; } VmmActionError::Block(BlockDeviceError::UpdateNotAllowedPostBoot) })?; @@ -518,8 +536,8 @@ impl VmmService { .map_err(|e| { if let StartMicroVmError::MicroVMAlreadyRunning = e { VmmActionError::VirtioNet(VirtioNetDeviceError::UpdateNotAllowedPostBoot) - } else if let StartMicroVmError::UpcallNotReady = e { - VmmActionError::UpcallNotReady + } else if let StartMicroVmError::UpcallServerNotReady = e { + VmmActionError::UpcallServerNotReady } else { VmmActionError::StartMicroVm(e) } @@ -595,6 +613,37 @@ impl VmmService { .map(|_| VmmData::Empty) .map_err(VmmActionError::FsDevice) } + + #[cfg(feature = "hotplug")] + fn resize_vcpu(&mut self, vmm: &mut Vmm, config: VcpuResizeInfo) -> VmmRequestResult { + if !cfg!(target_arch = "x86_64") { + // TODO: Arm need to support vcpu hotplug. issue: #6010 + warn!("This arch do not support vm resize!"); + return Ok(VmmData::Empty); + } + + if !cfg!(feature = "dbs-upcall") { + warn!("We only support cpu resize through upcall server in the guest kernel now, please enable dbs-upcall feature."); + return Ok(VmmData::Empty); + } + + let vm = vmm.get_vm_mut().ok_or(VmmActionError::VmNotExist)?; + + if !vm.is_vm_initialized() { + return Err(VmmActionError::ResizeVcpu( + VcpuResizeError::UpdateNotAllowedPreBoot, + )); + } + + vm.resize_vcpu(config, None).map_err(|e| { + if let VcpuResizeError::UpcallServerNotReady = e { + return VmmActionError::UpcallServerNotReady; + } + VmmActionError::ResizeVcpu(e) + })?; + + Ok(VmmData::Empty) + } } fn handle_cpu_topology( diff --git a/src/dragonball/src/error.rs b/src/dragonball/src/error.rs index 7677ebd19..5328ef85a 100644 --- a/src/dragonball/src/error.rs +++ b/src/dragonball/src/error.rs @@ -104,7 +104,7 @@ pub enum StartMicroVmError { /// Upcall is not ready #[error("the upcall client is not ready")] - UpcallNotReady, + UpcallServerNotReady, /// Configuration passed in is invalidate. #[error("invalid virtual machine configuration: {0} ")] diff --git a/src/dragonball/src/vcpu/mod.rs b/src/dragonball/src/vcpu/mod.rs index 5b8ed397f..b04baf29f 100644 --- a/src/dragonball/src/vcpu/mod.rs +++ b/src/dragonball/src/vcpu/mod.rs @@ -10,7 +10,10 @@ mod vcpu_manager; #[cfg(target_arch = "x86_64")] use dbs_arch::cpuid::VpmuFeatureLevel; -pub use vcpu_manager::{VcpuManager, VcpuManagerError}; +pub use vcpu_manager::{VcpuManager, VcpuManagerError, VcpuResizeInfo}; + +#[cfg(feature = "hotplug")] +pub use vcpu_manager::VcpuResizeError; /// vcpu config collection pub struct VcpuConfig { diff --git a/src/dragonball/src/vcpu/vcpu_impl.rs b/src/dragonball/src/vcpu/vcpu_impl.rs index f6c1c2d4c..1c21ea38b 100644 --- a/src/dragonball/src/vcpu/vcpu_impl.rs +++ b/src/dragonball/src/vcpu/vcpu_impl.rs @@ -214,10 +214,20 @@ pub enum VcpuResponse { CacheRevalidated, } +#[derive(Debug, PartialEq)] +/// Vcpu Hotplug Result returned from the guest +pub enum VcpuResizeResult { + /// All vCPU hotplug / hot-unplug operations are successful + Success = 0, + /// vCPU hotplug / hot-unplug failed + Failed = 1, +} + /// List of events that the vcpu_state_sender can send. pub enum VcpuStateEvent { - /// (result, response) for hotplug, result 0 means failure, 1 means success. - Hotplug((i32, u32)), + /// (result, response) for hotplug / hot-unplugged. + /// response records how many cpu has successfully being hotplugged / hot-unplugged. + Hotplug((VcpuResizeResult, u32)), } /// Wrapper over vCPU that hides the underlying interactions with the vCPU thread. diff --git a/src/dragonball/src/vcpu/vcpu_manager.rs b/src/dragonball/src/vcpu/vcpu_manager.rs index 2b076cd5b..383f1f0a7 100644 --- a/src/dragonball/src/vcpu/vcpu_manager.rs +++ b/src/dragonball/src/vcpu/vcpu_manager.rs @@ -29,7 +29,7 @@ use crate::address_space_manager::GuestAddressSpaceImpl; use crate::api::v1::InstanceInfo; use crate::kvm_context::KvmContext; use crate::vcpu::vcpu_impl::{ - Vcpu, VcpuError, VcpuEvent, VcpuHandle, VcpuResponse, VcpuStateEvent, + Vcpu, VcpuError, VcpuEvent, VcpuHandle, VcpuResizeResult, VcpuResponse, VcpuStateEvent, }; use crate::vcpu::VcpuConfig; use crate::vm::VmConfigInfo; @@ -113,11 +113,6 @@ pub enum VcpuManagerError { #[error("vcpu internal error: {0}")] Vcpu(#[source] VcpuError), - #[cfg(feature = "hotplug")] - /// vCPU resize error - #[error("resize vcpu error: {0}")] - VcpuResize(#[source] VcpuResizeError), - /// Kvm Ioctl Error #[error("failure in issuing KVM ioctl command: {0}")] Kvm(#[source] kvm_ioctls::Error), @@ -132,6 +127,10 @@ pub enum VcpuResizeError { VcpuIsHotplugging, /// Cannot update the configuration of the microvm pre boot. + #[error("resize vcpu operation is not allowed pre boot")] + UpdateNotAllowedPreBoot, + + /// Cannot update the configuration of the microvm post boot. #[error("resize vcpu operation is not allowed after boot")] UpdateNotAllowedPostBoot, @@ -147,10 +146,24 @@ pub enum VcpuResizeError { #[error("Removable vcpu not enough, removable vcpu num: {0}, number to remove: {1}, present vcpu count {2}")] LackRemovableVcpus(u16, u16, u16), - #[cfg(all(feature = "hotplug", feature = "dbs-upcall"))] + #[cfg(feature = "dbs-upcall")] /// Cannot update the configuration by upcall channel. #[error("cannot update the configuration by upcall channel: {0}")] Upcall(#[source] dbs_upcall::UpcallClientError), + + #[cfg(feature = "dbs-upcall")] + /// Cannot find upcall client + #[error("Cannot find upcall client")] + UpcallClientMissing, + + #[cfg(feature = "dbs-upcall")] + /// Upcall server is not ready + #[error("Upcall server is not ready")] + UpcallServerNotReady, + + /// Vcpu manager error + #[error("Vcpu manager error : {0}")] + Vcpu(#[source] VcpuManagerError), } /// Result for vCPU manager operations @@ -163,6 +176,13 @@ enum VcpuAction { Hotunplug, } +/// VcpuResizeInfo describes the information for vcpu hotplug / hot-unplug +#[derive(Default, Debug, Clone, PartialEq, Eq)] +pub struct VcpuResizeInfo { + /// The desired vcpu count to resize. + pub vcpu_count: Option, +} + /// Infos related to per vcpu #[derive(Default)] pub(crate) struct VcpuInfo { @@ -810,11 +830,9 @@ mod hotplug { &mut self, vcpu_count: u8, sync_tx: Option>, - ) -> std::result::Result<(), VcpuManagerError> { + ) -> std::result::Result<(), VcpuResizeError> { if self.get_vcpus_action() != VcpuAction::None { - return Err(VcpuManagerError::VcpuResize( - VcpuResizeError::VcpuIsHotplugging, - )); + return Err(VcpuResizeError::VcpuIsHotplugging); } self.action_sycn_tx = sync_tx; @@ -831,9 +849,7 @@ mod hotplug { Ordering::Less => self.do_del_vcpu(vcpu_count, upcall), } } else { - Err(VcpuManagerError::VcpuResize( - VcpuResizeError::UpdateNotAllowedPostBoot, - )) + Err(VcpuResizeError::UpdateNotAllowedPostBoot) } } @@ -841,28 +857,31 @@ mod hotplug { &mut self, vcpu_count: u8, upcall_client: Arc>, - ) -> std::result::Result<(), VcpuManagerError> { + ) -> std::result::Result<(), VcpuResizeError> { info!("resize vcpu: add"); if vcpu_count > self.vcpu_config.max_vcpu_count { - return Err(VcpuManagerError::VcpuResize( - VcpuResizeError::ExpectedVcpuExceedMax, - )); + return Err(VcpuResizeError::ExpectedVcpuExceedMax); } - let created_vcpus = self.create_vcpus(vcpu_count, None, None)?; - let cpu_ids = self.activate_vcpus(vcpu_count, true).map_err(|e| { - // we need to rollback when activate vcpu error - error!("activate vcpu error, rollback! {:?}", e); - let activated_vcpus: Vec = created_vcpus - .iter() - .filter(|&cpu_id| self.vcpu_infos[*cpu_id as usize].handle.is_some()) - .copied() - .collect(); - if let Err(e) = self.exit_vcpus(&activated_vcpus) { - error!("try to rollback error, stop_vcpu: {:?}", e); - } - e - })?; + let created_vcpus = self + .create_vcpus(vcpu_count, None, None) + .map_err(VcpuResizeError::Vcpu)?; + let cpu_ids = self + .activate_vcpus(vcpu_count, true) + .map_err(|e| { + // we need to rollback when activate vcpu error + error!("activate vcpu error, rollback! {:?}", e); + let activated_vcpus: Vec = created_vcpus + .iter() + .filter(|&cpu_id| self.vcpu_infos[*cpu_id as usize].handle.is_some()) + .copied() + .collect(); + if let Err(e) = self.exit_vcpus(&activated_vcpus) { + error!("try to rollback error, stop_vcpu: {:?}", e); + } + e + }) + .map_err(VcpuResizeError::Vcpu)?; let mut cpu_ids_array = [0u8; (u8::MAX as usize) + 1]; cpu_ids_array[..cpu_ids.len()].copy_from_slice(&cpu_ids[..cpu_ids.len()]); @@ -882,23 +901,19 @@ mod hotplug { &mut self, vcpu_count: u8, upcall_client: Arc>, - ) -> std::result::Result<(), VcpuManagerError> { + ) -> std::result::Result<(), VcpuResizeError> { info!("resize vcpu: delete"); if vcpu_count == 0 { - return Err(VcpuManagerError::VcpuResize( - VcpuResizeError::Vcpu0CanNotBeRemoved, - )); + return Err(VcpuResizeError::Vcpu0CanNotBeRemoved); } let mut cpu_ids = self.calculate_removable_vcpus(); let cpu_num_to_be_del = (self.present_vcpus_count() - vcpu_count) as usize; if cpu_num_to_be_del >= cpu_ids.len() { - return Err(VcpuManagerError::VcpuResize( - VcpuResizeError::LackRemovableVcpus( - cpu_ids.len() as u16, - cpu_num_to_be_del as u16, - self.present_vcpus_count() as u16, - ), + return Err(VcpuResizeError::LackRemovableVcpus( + cpu_ids.len() as u16, + cpu_num_to_be_del as u16, + self.present_vcpus_count() as u16, )); } @@ -924,7 +939,7 @@ mod hotplug { &self, _upcall_client: Arc>, _request: DevMgrRequest, - ) -> std::result::Result<(), VcpuManagerError> { + ) -> std::result::Result<(), VcpuResizeError> { Ok(()) } @@ -933,7 +948,7 @@ mod hotplug { &self, upcall_client: Arc>, request: DevMgrRequest, - ) -> std::result::Result<(), VcpuManagerError> { + ) -> std::result::Result<(), VcpuResizeError> { // This is used to fix clippy warnings. use dbs_upcall::{DevMgrResponse, UpcallClientRequest, UpcallClientResponse}; @@ -946,9 +961,14 @@ mod hotplug { Box::new(move |result| match result { UpcallClientResponse::DevMgr(response) => { if let DevMgrResponse::CpuDev(resp) = response { + let result: VcpuResizeResult = if resp.result == 0 { + VcpuResizeResult::Success + } else { + VcpuResizeResult::Failed + }; vcpu_state_sender .send(VcpuStateEvent::Hotplug(( - resp.result, + result, resp.info.apic_id_index, ))) .unwrap(); @@ -957,7 +977,7 @@ mod hotplug { } UpcallClientResponse::UpcallReset => { vcpu_state_sender - .send(VcpuStateEvent::Hotplug((0, 0))) + .send(VcpuStateEvent::Hotplug((VcpuResizeResult::Success, 0))) .unwrap(); vcpu_state_event.write(1).unwrap(); } @@ -968,7 +988,6 @@ mod hotplug { }), ) .map_err(VcpuResizeError::Upcall) - .map_err(VcpuManagerError::VcpuResize) } /// Get removable vcpus. @@ -993,16 +1012,19 @@ impl VcpuEpollHandler { while let Ok(event) = self.rx.try_recv() { match event { VcpuStateEvent::Hotplug((success, cpu_count)) => { - info!("get vcpu event, cpu_index {}", cpu_count); - self.process_cpu_action(success != 0, cpu_count); + info!( + "get vcpu event, cpu_index {} success {:?}", + cpu_count, success + ); + self.process_cpu_action(success, cpu_count); } } } } - fn process_cpu_action(&self, success: bool, _cpu_index: u32) { + fn process_cpu_action(&self, result: VcpuResizeResult, _cpu_index: u32) { let mut vcpu_manager = self.vcpu_manager.lock().unwrap(); - if success { + if result == VcpuResizeResult::Success { match vcpu_manager.get_vcpus_action() { VcpuAction::Hotplug => { // Notify hotplug success @@ -1362,12 +1384,7 @@ mod tests { // vcpu is already in hotplug process let res = vcpu_manager.resize_vcpu(1, None); - assert!(matches!( - res, - Err(VcpuManagerError::VcpuResize( - VcpuResizeError::VcpuIsHotplugging - )) - )); + assert!(matches!(res, Err(VcpuResizeError::VcpuIsHotplugging))); // clear vcpus action let cpu_ids = vec![0]; @@ -1377,9 +1394,7 @@ mod tests { let res = vcpu_manager.resize_vcpu(1, None); assert!(matches!( res, - Err(VcpuManagerError::VcpuResize( - VcpuResizeError::UpdateNotAllowedPostBoot - )) + Err(VcpuResizeError::UpdateNotAllowedPostBoot) )); // init upcall channel @@ -1397,20 +1412,10 @@ mod tests { // exceeed max vcpu count let res = vcpu_manager.resize_vcpu(4, None); - assert!(matches!( - res, - Err(VcpuManagerError::VcpuResize( - VcpuResizeError::ExpectedVcpuExceedMax - )) - )); + assert!(matches!(res, Err(VcpuResizeError::ExpectedVcpuExceedMax))); // remove vcpu 0 let res = vcpu_manager.resize_vcpu(0, None); - assert!(matches!( - res, - Err(VcpuManagerError::VcpuResize( - VcpuResizeError::Vcpu0CanNotBeRemoved - )) - )); + assert!(matches!(res, Err(VcpuResizeError::Vcpu0CanNotBeRemoved))); } } diff --git a/src/dragonball/src/vm/mod.rs b/src/dragonball/src/vm/mod.rs index c8a6a39e1..59a4ae205 100644 --- a/src/dragonball/src/vm/mod.rs +++ b/src/dragonball/src/vm/mod.rs @@ -4,6 +4,7 @@ use std::io::{self, Read, Seek, SeekFrom}; use std::ops::Deref; use std::os::unix::io::RawFd; + use std::sync::{Arc, Mutex, RwLock}; use dbs_address_space::AddressSpace; @@ -22,6 +23,8 @@ use vmm_sys_util::eventfd::EventFd; #[cfg(all(feature = "hotplug", feature = "dbs-upcall"))] use dbs_upcall::{DevMgrService, UpcallClient}; +#[cfg(feature = "hotplug")] +use std::sync::mpsc::Sender; use crate::address_space_manager::{ AddressManagerError, AddressSpaceMgr, AddressSpaceMgrBuilder, GuestAddressSpaceImpl, @@ -35,6 +38,8 @@ use crate::event_manager::EventManager; use crate::kvm_context::KvmContext; use crate::resource_manager::ResourceManager; use crate::vcpu::{VcpuManager, VcpuManagerError}; +#[cfg(feature = "hotplug")] +use crate::vcpu::{VcpuResizeError, VcpuResizeInfo}; #[cfg(target_arch = "aarch64")] use dbs_arch::gic::Error as GICError; @@ -823,7 +828,30 @@ impl Vm { } else if self.is_upcall_client_ready() { Ok(DeviceOpContext::create_hotplug_ctx(self, epoll_mgr)) } else { - Err(StartMicroVmError::UpcallNotReady) + Err(StartMicroVmError::UpcallServerNotReady) + } + } + + /// Resize MicroVM vCPU number + #[cfg(feature = "hotplug")] + pub fn resize_vcpu( + &mut self, + config: VcpuResizeInfo, + sync_tx: Option>, + ) -> std::result::Result<(), VcpuResizeError> { + if self.upcall_client().is_none() { + Err(VcpuResizeError::UpcallClientMissing) + } else if self.is_upcall_client_ready() { + if let Some(vcpu_count) = config.vcpu_count { + self.vcpu_manager() + .map_err(VcpuResizeError::Vcpu)? + .resize_vcpu(vcpu_count, sync_tx)?; + + self.vm_config.vcpu_count = vcpu_count; + } + Ok(()) + } else { + Err(VcpuResizeError::UpcallServerNotReady) } } diff --git a/src/libs/Cargo.lock b/src/libs/Cargo.lock index c1f73d21b..e0c1296a2 100644 --- a/src/libs/Cargo.lock +++ b/src/libs/Cargo.lock @@ -110,14 +110,15 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "cgroups-rs" -version = "0.2.8" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b827f9d9f6c2fff719d25f5d44cbc8d2ef6df1ef00d055c5c14d5dc25529579" +checksum = "8d5761f3a351b92e0e02a31ca418190bb323edb0d4fce0109b6dba673dc3fdc1" dependencies = [ "libc", "log", - "nix 0.23.1", + "nix 0.25.1", "regex", + "thiserror", ] [[package]] @@ -536,9 +537,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.124" +version = "0.2.139" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21a41fed9d98f27ab1c6d161da622a4fa35e8a54a8adc24bbf3ddd0ef70b0e50" +checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79" [[package]] name = "lock_api" @@ -640,6 +641,18 @@ dependencies = [ "memoffset", ] +[[package]] +name = "nix" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f346ff70e7dbfd675fe90590b92d59ef2de15a8779ae305ebcbfd3f0caf59be4" +dependencies = [ + "autocfg", + "bitflags", + "cfg-if", + "libc", +] + [[package]] name = "ntapi" version = "0.3.7" diff --git a/src/libs/kata-sys-util/Cargo.toml b/src/libs/kata-sys-util/Cargo.toml index a4b67e2c0..efcde317e 100644 --- a/src/libs/kata-sys-util/Cargo.toml +++ b/src/libs/kata-sys-util/Cargo.toml @@ -12,7 +12,7 @@ edition = "2018" [dependencies] byteorder = "1.4.3" -cgroups = { package = "cgroups-rs", version = "0.3.0" } +cgroups = { package = "cgroups-rs", version = "0.3.1" } chrono = "0.4.0" common-path = "=1.0.0" fail = "0.5.0" diff --git a/src/libs/kata-types/src/config/default.rs b/src/libs/kata-types/src/config/default.rs index bf6b36a40..d2d922715 100644 --- a/src/libs/kata-types/src/config/default.rs +++ b/src/libs/kata-types/src/config/default.rs @@ -42,6 +42,7 @@ pub const MIN_SHARED_9PFS_SIZE_MB: u32 = 4 * 1024; pub const MAX_SHARED_9PFS_SIZE_MB: u32 = 8 * 1024 * 1024; pub const DEFAULT_GUEST_HOOK_PATH: &str = "/opt/kata/hooks"; +pub const DEFAULT_GUEST_DNS_FILE: &str = "/etc/resolv.conf"; pub const DEFAULT_GUEST_VCPUS: u32 = 1; @@ -67,3 +68,17 @@ pub const DEFAULT_QEMU_PCI_BRIDGES: u32 = 2; pub const MAX_QEMU_PCI_BRIDGES: u32 = 5; pub const MAX_QEMU_VCPUS: u32 = 256; pub const MIN_QEMU_MEMORY_SIZE_MB: u32 = 64; + +// Default configuration for Cloud Hypervisor (CH) +pub const DEFAULT_CH_BINARY_PATH: &str = "/usr/bin/cloud-hypervisor"; +pub const DEFAULT_CH_CONTROL_PATH: &str = ""; +pub const DEFAULT_CH_ENTROPY_SOURCE: &str = "/dev/urandom"; +pub const DEFAULT_CH_GUEST_KERNEL_IMAGE: &str = "vmlinuz"; +pub const DEFAULT_CH_GUEST_KERNEL_PARAMS: &str = ""; +pub const DEFAULT_CH_FIRMWARE_PATH: &str = ""; +pub const DEFAULT_CH_MEMORY_SIZE_MB: u32 = 128; +pub const DEFAULT_CH_MEMORY_SLOTS: u32 = 128; +pub const DEFAULT_CH_PCI_BRIDGES: u32 = 2; +pub const MAX_CH_PCI_BRIDGES: u32 = 5; +pub const MAX_CH_VCPUS: u32 = 256; +pub const MIN_CH_MEMORY_SIZE_MB: u32 = 64; diff --git a/src/libs/kata-types/src/config/hypervisor/ch.rs b/src/libs/kata-types/src/config/hypervisor/ch.rs new file mode 100644 index 000000000..cc752e385 --- /dev/null +++ b/src/libs/kata-types/src/config/hypervisor/ch.rs @@ -0,0 +1,146 @@ +// Copyright (c) 2019-2021 Alibaba Cloud +// Copyright (c) 2022-2023 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::io::Result; +use std::path::Path; +use std::sync::Arc; + +use super::{default, register_hypervisor_plugin}; + +use crate::config::default::MAX_CH_VCPUS; +use crate::config::default::MIN_CH_MEMORY_SIZE_MB; + +use crate::config::hypervisor::VIRTIO_BLK_MMIO; +use crate::config::{ConfigPlugin, TomlConfig}; +use crate::{eother, resolve_path, validate_path}; + +/// Hypervisor name for CH, used to index `TomlConfig::hypervisor`. +pub const HYPERVISOR_NAME_CH: &str = "cloud-hypervisor"; + +/// Configuration information for CH. +#[derive(Default, Debug)] +pub struct CloudHypervisorConfig {} + +impl CloudHypervisorConfig { + /// Create a new instance of `CloudHypervisorConfig`. + pub fn new() -> Self { + CloudHypervisorConfig {} + } + + /// Register the CH plugin. + pub fn register(self) { + let plugin = Arc::new(self); + register_hypervisor_plugin(HYPERVISOR_NAME_CH, plugin); + } +} + +impl ConfigPlugin for CloudHypervisorConfig { + fn get_max_cpus(&self) -> u32 { + MAX_CH_VCPUS + } + + fn get_min_memory(&self) -> u32 { + MIN_CH_MEMORY_SIZE_MB + } + + fn name(&self) -> &str { + HYPERVISOR_NAME_CH + } + + /// Adjust the configuration information after loading from configuration file. + fn adjust_config(&self, conf: &mut TomlConfig) -> Result<()> { + if let Some(ch) = conf.hypervisor.get_mut(HYPERVISOR_NAME_CH) { + if ch.path.is_empty() { + ch.path = default::DEFAULT_CH_BINARY_PATH.to_string(); + } + resolve_path!(ch.path, "CH binary path `{}` is invalid: {}")?; + if ch.ctlpath.is_empty() { + ch.ctlpath = default::DEFAULT_CH_CONTROL_PATH.to_string(); + } + resolve_path!(ch.ctlpath, "CH ctlpath `{}` is invalid: {}")?; + + if ch.boot_info.kernel.is_empty() { + ch.boot_info.kernel = default::DEFAULT_CH_GUEST_KERNEL_IMAGE.to_string(); + } + if ch.boot_info.kernel_params.is_empty() { + ch.boot_info.kernel_params = default::DEFAULT_CH_GUEST_KERNEL_PARAMS.to_string(); + } + if ch.boot_info.firmware.is_empty() { + ch.boot_info.firmware = default::DEFAULT_CH_FIRMWARE_PATH.to_string(); + } + + if ch.device_info.default_bridges == 0 { + ch.device_info.default_bridges = default::DEFAULT_CH_PCI_BRIDGES; + } + + if ch.machine_info.entropy_source.is_empty() { + ch.machine_info.entropy_source = default::DEFAULT_CH_ENTROPY_SOURCE.to_string(); + } + + if ch.memory_info.default_memory == 0 { + ch.memory_info.default_memory = default::DEFAULT_CH_MEMORY_SIZE_MB; + } + if ch.memory_info.memory_slots == 0 { + ch.memory_info.memory_slots = default::DEFAULT_CH_MEMORY_SLOTS; + } + } + + Ok(()) + } + + /// Validate the configuration information. + fn validate(&self, conf: &TomlConfig) -> Result<()> { + if let Some(ch) = conf.hypervisor.get(HYPERVISOR_NAME_CH) { + validate_path!(ch.path, "CH binary path `{}` is invalid: {}")?; + validate_path!(ch.ctlpath, "CH control path `{}` is invalid: {}")?; + if !ch.jailer_path.is_empty() { + return Err(eother!("Path for CH jailer should be empty")); + } + if !ch.valid_jailer_paths.is_empty() { + return Err(eother!("Valid CH jailer path list should be empty")); + } + + if !ch.blockdev_info.disable_block_device_use + && ch.blockdev_info.block_device_driver == VIRTIO_BLK_MMIO + { + return Err(eother!("CH doesn't support virtio-blk-mmio")); + } + + if ch.boot_info.kernel.is_empty() { + return Err(eother!("Guest kernel image for CH is empty")); + } + if ch.boot_info.image.is_empty() && ch.boot_info.initrd.is_empty() { + return Err(eother!("Both guest boot image and initrd for CH are empty")); + } + + if (ch.cpu_info.default_vcpus > 0 + && ch.cpu_info.default_vcpus as u32 > default::MAX_CH_VCPUS) + || ch.cpu_info.default_maxvcpus > default::MAX_CH_VCPUS + { + return Err(eother!( + "CH hypervisor cannot support {} vCPUs", + ch.cpu_info.default_maxvcpus + )); + } + + if ch.device_info.default_bridges > default::MAX_CH_PCI_BRIDGES { + return Err(eother!( + "CH hypervisor cannot support {} PCI bridges", + ch.device_info.default_bridges + )); + } + + if ch.memory_info.default_memory < MIN_CH_MEMORY_SIZE_MB { + return Err(eother!( + "CH hypervisor has minimal memory limitation {}", + MIN_CH_MEMORY_SIZE_MB + )); + } + } + + Ok(()) + } +} diff --git a/src/libs/kata-types/src/config/hypervisor/mod.rs b/src/libs/kata-types/src/config/hypervisor/mod.rs index afd3a42ea..98ae2cc79 100644 --- a/src/libs/kata-types/src/config/hypervisor/mod.rs +++ b/src/libs/kata-types/src/config/hypervisor/mod.rs @@ -40,6 +40,9 @@ pub use self::dragonball::{DragonballConfig, HYPERVISOR_NAME_DRAGONBALL}; mod qemu; pub use self::qemu::{QemuConfig, HYPERVISOR_NAME_QEMU}; +mod ch; +pub use self::ch::{CloudHypervisorConfig, HYPERVISOR_NAME_CH}; + const VIRTIO_BLK: &str = "virtio-blk"; const VIRTIO_BLK_MMIO: &str = "virtio-mmio"; const VIRTIO_BLK_CCW: &str = "virtio-blk-ccw"; @@ -210,6 +213,9 @@ pub struct BootInfo { /// Path to root device on host #[serde(default)] pub image: String, + /// Rootfs filesystem type. + #[serde(default)] + pub rootfs_type: String, /// Path to the firmware. /// /// If you want that qemu uses the default firmware leave this option empty. diff --git a/src/libs/kata-types/src/config/mod.rs b/src/libs/kata-types/src/config/mod.rs index 863dd7590..173026921 100644 --- a/src/libs/kata-types/src/config/mod.rs +++ b/src/libs/kata-types/src/config/mod.rs @@ -25,8 +25,8 @@ pub mod hypervisor; pub use self::agent::Agent; use self::default::DEFAULT_AGENT_DBG_CONSOLE_PORT; pub use self::hypervisor::{ - BootInfo, DragonballConfig, Hypervisor, QemuConfig, HYPERVISOR_NAME_DRAGONBALL, - HYPERVISOR_NAME_QEMU, + BootInfo, CloudHypervisorConfig, DragonballConfig, Hypervisor, QemuConfig, + HYPERVISOR_NAME_DRAGONBALL, HYPERVISOR_NAME_QEMU, }; mod runtime; diff --git a/src/runtime-rs/Cargo.lock b/src/runtime-rs/Cargo.lock index bb4325861..598d1940a 100644 --- a/src/runtime-rs/Cargo.lock +++ b/src/runtime-rs/Cargo.lock @@ -81,9 +81,17 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.57" +version = "1.0.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08f9b8508dccb7687a1d6c4ce66b2b0ecef467c94667de27d8d7fe1f8d2a9cdc" +checksum = "2cb2f989d18dd141ab8ae82f64d1a8cdd37e0840f73a406896cf5e99502fab61" + +[[package]] +name = "api_client" +version = "0.1.0" +source = "git+https://github.com/cloud-hypervisor/cloud-hypervisor?tag=v27.0#2ba6a9bfcfd79629aecf77504fa554ab821d138e" +dependencies = [ + "vmm-sys-util 0.10.0", +] [[package]] name = "arc-swap" @@ -401,9 +409,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "cgroups-rs" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b97b639839204a6eb727ffbbd68e1dcfc55488c3a26cb0cda1d662b7a186e79" +checksum = "8d5761f3a351b92e0e02a31ca418190bb323edb0d4fce0109b6dba673dc3fdc1" dependencies = [ "libc", "log", @@ -412,6 +420,17 @@ dependencies = [ "thiserror", ] +[[package]] +name = "ch-config" +version = "0.1.0" +dependencies = [ + "anyhow", + "api_client", + "serde", + "serde_json", + "tokio", +] + [[package]] name = "chrono" version = "0.4.22" @@ -934,9 +953,9 @@ checksum = "3a471a38ef8ed83cd6e40aa59c1ffe17db6855c18e3604d9c4ed8c08ebc28678" [[package]] name = "futures" -version = "0.3.21" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f73fe65f54d1e12b726f517d3e2135ca3125a437b6d998caf1962961f7172d9e" +checksum = "13e2792b0ff0340399d58445b88fd9770e3489eff258a4cbc1523418f12abf84" dependencies = [ "futures-channel", "futures-core", @@ -949,9 +968,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.21" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3083ce4b914124575708913bca19bfe887522d6e2e6d0952943f5eac4a74010" +checksum = "2e5317663a9089767a1ec00a487df42e0ca174b61b4483213ac24448e4664df5" dependencies = [ "futures-core", "futures-sink", @@ -959,15 +978,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.21" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c09fd04b7e4073ac7156a9539b57a484a8ea920f79c7c675d05d289ab6110d3" +checksum = "ec90ff4d0fe1f57d600049061dc6bb68ed03c7d2fbd697274c41805dcb3f8608" [[package]] name = "futures-executor" -version = "0.3.21" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9420b90cfa29e327d0429f19be13e7ddb68fa1cccb09d65e5706b8c7a749b8a6" +checksum = "e8de0a35a6ab97ec8869e32a2473f4b1324459e14c29275d14b10cb1fd19b50e" dependencies = [ "futures-core", "futures-task", @@ -976,9 +995,9 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.21" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc4045962a5a5e935ee2fdedaa4e08284547402885ab326734432bed5d12966b" +checksum = "bfb8371b6fb2aeb2d280374607aeabfc99d95c72edfe51692e42d3d7f0d08531" [[package]] name = "futures-lite" @@ -997,9 +1016,9 @@ dependencies = [ [[package]] name = "futures-macro" -version = "0.3.21" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33c1e13800337f4d4d7a316bf45a567dbcb6ffe087f16424852d97e97a91f512" +checksum = "95a73af87da33b5acf53acfebdc339fe592ecf5357ac7c0a7734ab9d8c876a70" dependencies = [ "proc-macro2", "quote", @@ -1008,15 +1027,15 @@ dependencies = [ [[package]] name = "futures-sink" -version = "0.3.21" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21163e139fa306126e6eedaf49ecdb4588f939600f0b1e770f4205ee4b7fa868" +checksum = "f310820bb3e8cfd46c80db4d7fb8353e15dfff853a127158425f31e0be6c8364" [[package]] name = "futures-task" -version = "0.3.21" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57c66a976bf5909d801bbef33416c41372779507e7a6b3a5e25e4749c58f776a" +checksum = "dcf79a1bf610b10f42aea489289c5a2c478a786509693b80cd39c44ccd936366" [[package]] name = "futures-timer" @@ -1026,9 +1045,9 @@ checksum = "e64b03909df88034c26dc1547e8970b91f98bdb65165d6a4e9110d94263dbb2c" [[package]] name = "futures-util" -version = "0.3.21" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8b7abd5d659d9b90c8cba917f6ec750a74e2dc23902ef9cd4cc8c8b22e6036a" +checksum = "9c1d6de3acfef38d2be4b1f543f553131788603495be83da675e180c8d6b7bd1" dependencies = [ "futures-channel", "futures-core", @@ -1114,7 +1133,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7df0ee4b237afb71e99f7e2fbd840ffec2d6c4bb569f69b2af18aa1f63077d38" dependencies = [ "dashmap", - "futures 0.3.21", + "futures 0.3.26", "futures-timer", "no-std-compat", "nonzero_ext", @@ -1236,8 +1255,10 @@ version = "0.1.0" dependencies = [ "anyhow", "async-trait", + "ch-config", "dbs-utils", "dragonball", + "futures 0.3.26", "go-flag", "kata-sys-util", "kata-types", @@ -1246,6 +1267,7 @@ dependencies = [ "nix 0.24.2", "persist", "rand 0.8.5", + "safe-path 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "seccompiler", "serde", "serde_json", @@ -1551,14 +1573,14 @@ dependencies = [ [[package]] name = "mio" -version = "0.8.3" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "713d550d9b44d89174e066b7a6217ae06234c10cb47819a88290d2b353c31799" +checksum = "e5d732bc30207a6423068df043e3d02e0735b155ad7ce1a6f76fe2baa5b158de" dependencies = [ "libc", "log", "wasi 0.11.0+wasi-snapshot-preview1", - "windows-sys", + "windows-sys 0.42.0", ] [[package]] @@ -1612,7 +1634,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "65b4b14489ab424703c092062176d52ba55485a89c076b4f9db05092b7223aa6" dependencies = [ "bytes 1.1.0", - "futures 0.3.21", + "futures 0.3.26", "log", "netlink-packet-core", "netlink-sys", @@ -1627,7 +1649,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92b654097027250401127914afb37cb1f311df6610a9891ff07a757e94199027" dependencies = [ "bytes 1.1.0", - "futures 0.3.21", + "futures 0.3.26", "libc", "log", "tokio", @@ -1783,7 +1805,7 @@ dependencies = [ "bitflags", "blake3", "fuse-backend-rs", - "futures 0.3.21", + "futures 0.3.26", "lazy_static", "libc", "log", @@ -1811,7 +1833,7 @@ dependencies = [ "bitflags", "dbs-uhttp", "fuse-backend-rs", - "futures 0.3.21", + "futures 0.3.26", "governor", "lazy_static", "libc", @@ -1931,7 +1953,7 @@ dependencies = [ "libc", "redox_syscall", "smallvec", - "windows-sys", + "windows-sys 0.36.1", ] [[package]] @@ -1955,7 +1977,7 @@ dependencies = [ "kata-sys-util", "kata-types", "libc", - "safe-path", + "safe-path 0.1.0", "serde", "serde_json", "shim-interface", @@ -2025,9 +2047,9 @@ checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" [[package]] name = "proc-macro2" -version = "1.0.39" +version = "1.0.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c54b25569025b7fc9651de43004ae593a75ad88543b17178aa5e1b9c4f15f56f" +checksum = "6ef7d57beacfaf2d8aee5937dab7b7f28de3cb8b1828479bb5de2a7106f2bae2" dependencies = [ "unicode-ident", ] @@ -2321,7 +2343,7 @@ dependencies = [ "bitflags", "byte-unit 4.0.17", "cgroups-rs", - "futures 0.3.21", + "futures 0.3.26", "hypervisor", "kata-sys-util", "kata-types", @@ -2361,7 +2383,7 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "46f1cfa18f8cebe685373a2697915d7e0db3b4554918bba118385e0f71f258a7" dependencies = [ - "futures 0.3.21", + "futures 0.3.26", "log", "netlink-packet-route", "netlink-proto", @@ -2432,6 +2454,15 @@ dependencies = [ "libc", ] +[[package]] +name = "safe-path" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "980abdd3220aa19b67ca3ea07b173ca36383f18ae48cde696d90c8af39447ffb" +dependencies = [ + "libc", +] + [[package]] name = "scoped-tls" version = "1.0.0" @@ -2455,18 +2486,18 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.143" +version = "1.0.152" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53e8e5d5b70924f74ff5c6d64d9a5acd91422117c60f48c4e07855238a254553" +checksum = "bb7d1f0d3021d347a83e556fc4683dea2ea09d87bccdf88ff5c12545d89d5efb" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.143" +version = "1.0.152" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3d8e8de557aee63c26b85b947f5e59b690d0454c753f3adeb5cd7835ab88391" +checksum = "af487d118eecd09402d70a5d72551860e788df87b464af30e5ea6a38c75c541e" dependencies = [ "proc-macro2", "quote", @@ -2475,9 +2506,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.83" +version = "1.0.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38dd04e3c8279e75b31ef29dbdceebfe5ad89f4d0937213c53f7d49d01b3d5a7" +checksum = "877c235533714907a8c2464236f5c4b2a17262ef1bd71f38f35ea592c8da6883" dependencies = [ "itoa", "ryu", @@ -2729,9 +2760,9 @@ checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" [[package]] name = "syn" -version = "1.0.96" +version = "1.0.107" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0748dd251e24453cb8717f0354206b91557e4ec8703673a4b30208f2abaf1ebf" +checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5" dependencies = [ "proc-macro2", "quote", @@ -2857,22 +2888,22 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" [[package]] name = "tokio" -version = "1.19.1" +version = "1.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95eec79ea28c00a365f539f1961e9278fbcaf81c0ff6aaf0e93c181352446948" +checksum = "c8e00990ebabbe4c14c08aca901caed183ecd5c09562a12c824bb53d3c3fd3af" dependencies = [ + "autocfg", "bytes 1.1.0", "libc", "memchr", "mio", "num_cpus", - "once_cell", "parking_lot 0.12.1", "pin-project-lite", "signal-hook-registry", "socket2", "tokio-macros", - "winapi", + "windows-sys 0.42.0", ] [[package]] @@ -2907,7 +2938,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e0723fc001950a3b018947b05eeb45014fd2b7c6e8f292502193ab74486bdb6" dependencies = [ "bytes 0.4.12", - "futures 0.3.21", + "futures 0.3.26", "libc", "tokio", "vsock", @@ -2971,7 +3002,7 @@ checksum = "2ecfff459a859c6ba6668ff72b34c2f1d94d9d58f7088414c2674ad0f31cc7d8" dependencies = [ "async-trait", "byteorder", - "futures 0.3.21", + "futures 0.3.26", "libc", "log", "nix 0.23.1", @@ -3105,7 +3136,7 @@ dependencies = [ "awaitgroup", "common", "containerd-shim-protos", - "futures 0.3.21", + "futures 0.3.26", "hypervisor", "kata-sys-util", "kata-types", @@ -3366,43 +3397,100 @@ version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" dependencies = [ - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_msvc", + "windows_aarch64_msvc 0.36.1", + "windows_i686_gnu 0.36.1", + "windows_i686_msvc 0.36.1", + "windows_x86_64_gnu 0.36.1", + "windows_x86_64_msvc 0.36.1", ] +[[package]] +name = "windows-sys" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc 0.42.1", + "windows_i686_gnu 0.42.1", + "windows_i686_msvc 0.42.1", + "windows_x86_64_gnu 0.42.1", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc 0.42.1", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c9864e83243fdec7fc9c5444389dcbbfd258f745e7853198f365e3c4968a608" + [[package]] name = "windows_aarch64_msvc" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c8b1b673ffc16c47a9ff48570a9d85e25d265735c503681332589af6253c6c7" + [[package]] name = "windows_i686_gnu" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" +[[package]] +name = "windows_i686_gnu" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de3887528ad530ba7bdbb1faa8275ec7a1155a45ffa57c37993960277145d640" + [[package]] name = "windows_i686_msvc" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" +[[package]] +name = "windows_i686_msvc" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf4d1122317eddd6ff351aa852118a2418ad4214e6613a50e0191f7004372605" + [[package]] name = "windows_x86_64_gnu" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1040f221285e17ebccbc2591ffdc2d44ee1f9186324dd3e84e99ac68d699c45" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "628bfdf232daa22b0d64fdb62b09fcc36bb01f05a3939e20ab73aaf9470d0463" + [[package]] name = "windows_x86_64_msvc" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "447660ad36a13288b1db4d4248e857b510e8c3a225c822ba4fb748c0aafecffd" + [[package]] name = "zstd" version = "0.11.2+zstd.1.5.2" diff --git a/src/runtime-rs/Makefile b/src/runtime-rs/Makefile index b50ed3991..4c17fe36c 100644 --- a/src/runtime-rs/Makefile +++ b/src/runtime-rs/Makefile @@ -93,6 +93,12 @@ DBVALIDHYPERVISORPATHS := [] PKGDATADIR := $(PREFIXDEPS)/share/$(PROJECT_DIR) KERNELDIR := $(PKGDATADIR) IMAGEPATH := $(PKGDATADIR)/$(IMAGENAME) + +ROOTFSTYPE_EXT4 := \"ext4\" +ROOTFSTYPE_XFS := \"xfs\" +ROOTFSTYPE_EROFS := \"erofs\" +DEFROOTFSTYPE := $(ROOTFSTYPE_EXT4) + PKGLIBEXECDIR := $(LIBEXECDIR)/$(PROJECT_DIR) FIRMWAREPATH := FIRMWAREVOLUMEPATH := @@ -224,6 +230,7 @@ USER_VARS += DBVALIDCTLPATHS USER_VARS += SYSCONFIG USER_VARS += IMAGENAME USER_VARS += IMAGEPATH +USER_VARS += DEFROOTFSTYPE USER_VARS += MACHINETYPE USER_VARS += KERNELDIR USER_VARS += KERNELTYPE diff --git a/src/runtime-rs/arch/powerpc64le-options.mk b/src/runtime-rs/arch/powerpc64le-options.mk new file mode 100644 index 000000000..0a974680e --- /dev/null +++ b/src/runtime-rs/arch/powerpc64le-options.mk @@ -0,0 +1,15 @@ +# Copyright (c) 2019-2022 Alibaba Cloud +# Copyright (c) 2019-2022 Ant Group +# +# SPDX-License-Identifier: Apache-2.0 +# + +MACHINETYPE := pseries +KERNELPARAMS := +MACHINEACCELERATORS := "cap-cfpc=broken,cap-sbbc=broken,cap-ibs=broken,cap-large-decr=off,cap-ccf-assist=off" +CPUFEATURES := pmu=off + +QEMUCMD := qemu-system-ppc64 + +# dragonball binary name +DBCMD := dragonball diff --git a/src/runtime-rs/config/configuration-dragonball.toml.in b/src/runtime-rs/config/configuration-dragonball.toml.in index fa39d23c8..8131d0c68 100644 --- a/src/runtime-rs/config/configuration-dragonball.toml.in +++ b/src/runtime-rs/config/configuration-dragonball.toml.in @@ -17,6 +17,12 @@ ctlpath = "@DBCTLPATH@" kernel = "@KERNELPATH_DB@" image = "@IMAGEPATH@" +# rootfs filesystem type: +# - ext4 (default) +# - xfs +# - erofs +rootfs_type=@DEFROOTFSTYPE@ + # List of valid annotation names for the hypervisor # Each member of the list is a regular expression, which is the base name # of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path" diff --git a/src/runtime-rs/crates/hypervisor/Cargo.toml b/src/runtime-rs/crates/hypervisor/Cargo.toml index 9c70f3914..7bd49dd8b 100644 --- a/src/runtime-rs/crates/hypervisor/Cargo.toml +++ b/src/runtime-rs/crates/hypervisor/Cargo.toml @@ -21,7 +21,7 @@ serde_json = ">=1.0.9" slog = "2.5.2" slog-scope = "4.4.0" thiserror = "1.0" -tokio = { version = "1.8.0", features = ["sync"] } +tokio = { version = "1.8.0", features = ["sync", "fs"] } vmm-sys-util = "0.11.0" rand = "0.8.4" @@ -32,4 +32,14 @@ shim-interface = { path = "../../../libs/shim-interface" } dragonball = { path = "../../../dragonball", features = ["atomic-guest-memory", "virtio-vsock", "hotplug", "virtio-blk", "virtio-net", "virtio-fs","dbs-upcall"] } +ch-config = { path = "ch-config", optional = true } + +futures = "0.3.25" +safe-path = "0.1.0" + [features] +default = [] + +# Feature is not yet complete, so not enabled by default. +# See https://github.com/kata-containers/kata-containers/issues/6264. +cloud-hypervisor = ["ch-config"] diff --git a/src/runtime-rs/crates/hypervisor/README.md b/src/runtime-rs/crates/hypervisor/README.md index 8d43baa70..dcaa4e57e 100644 --- a/src/runtime-rs/crates/hypervisor/README.md +++ b/src/runtime-rs/crates/hypervisor/README.md @@ -1,4 +1,26 @@ # Multi-vmm support for runtime-rs + +## 0. Status + +External hypervisor support is currently being developed. + +See [the main tracking issue](https://github.com/kata-containers/kata-containers/issues/4634) +for further details. + +### Cloud Hypervisor + +A basic implementation currently exists for Cloud Hypervisor. However, +since it is not yet fully functional, the feature is disabled by +default. When the implementation matures, the feature will be enabled +by default. + +> **Note:** +> +> To enable the feature, follow the instructions on https://github.com/kata-containers/kata-containers/pull/6201. + +See the [Cloud Hypervisor tracking issue](https://github.com/kata-containers/kata-containers/issues/6263) +for further details. + Some key points for supporting multi-vmm in rust runtime. ## 1. Hypervisor Config diff --git a/src/runtime-rs/crates/hypervisor/ch-config/Cargo.toml b/src/runtime-rs/crates/hypervisor/ch-config/Cargo.toml new file mode 100644 index 000000000..2fd58f9f3 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/ch-config/Cargo.toml @@ -0,0 +1,22 @@ +# Copyright (c) 2022-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +[package] +name = "ch-config" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +anyhow = "1.0.68" +serde = { version = "1.0.145", features = ["rc", "derive"] } +serde_json = "1.0.91" +tokio = { version = "1.25.0", features = ["sync", "rt"] } + +# Cloud Hypervisor public HTTP API functions +# Note that the version specified is not necessarily the version of CH +# being used. This version is used to pin the CH config structure +# which is relatively static. +api_client = { git = "https://github.com/cloud-hypervisor/cloud-hypervisor", crate = "api_client", tag = "v27.0" } diff --git a/src/runtime-rs/crates/hypervisor/ch-config/src/ch_api.rs b/src/runtime-rs/crates/hypervisor/ch-config/src/ch_api.rs new file mode 100644 index 000000000..fe812c7ca --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/ch-config/src/ch_api.rs @@ -0,0 +1,274 @@ +// Copyright (c) 2022-2023 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +use crate::net_util::MAC_ADDR_LEN; +use crate::{ + ConsoleConfig, ConsoleOutputMode, CpuTopology, CpusConfig, DeviceConfig, FsConfig, MacAddr, + MemoryConfig, NetConfig, PayloadConfig, PmemConfig, RngConfig, VmConfig, VsockConfig, +}; +use anyhow::{anyhow, Context, Result}; +use api_client::simple_api_full_command_and_response; + +use std::fmt::Display; +use std::net::Ipv4Addr; +use std::os::unix::net::UnixStream; +use std::path::PathBuf; +use tokio::task; + +pub async fn cloud_hypervisor_vmm_ping(mut socket: UnixStream) -> Result> { + task::spawn_blocking(move || -> Result> { + let response = simple_api_full_command_and_response(&mut socket, "GET", "vmm.ping", None) + .map_err(|e| anyhow!(e))?; + + Ok(response) + }) + .await? +} + +pub async fn cloud_hypervisor_vmm_shutdown(mut socket: UnixStream) -> Result> { + task::spawn_blocking(move || -> Result> { + let response = + simple_api_full_command_and_response(&mut socket, "PUT", "vmm.shutdown", None) + .map_err(|e| anyhow!(e))?; + + Ok(response) + }) + .await? +} + +pub async fn cloud_hypervisor_vm_create( + sandbox_path: String, + vsock_socket_path: String, + mut socket: UnixStream, + shared_fs_devices: Option>, + pmem_devices: Option>, +) -> Result> { + let cfg = cloud_hypervisor_vm_create_cfg( + sandbox_path, + vsock_socket_path, + shared_fs_devices, + pmem_devices, + ) + .await?; + + let serialised = serde_json::to_string_pretty(&cfg)?; + + task::spawn_blocking(move || -> Result> { + let data = Some(serialised.as_str()); + + let response = simple_api_full_command_and_response(&mut socket, "PUT", "vm.create", data) + .map_err(|e| anyhow!(e))?; + + Ok(response) + }) + .await? +} + +pub async fn cloud_hypervisor_vm_start(mut socket: UnixStream) -> Result> { + task::spawn_blocking(move || -> Result> { + let response = simple_api_full_command_and_response(&mut socket, "PUT", "vm.boot", None) + .map_err(|e| anyhow!(e))?; + + Ok(response) + }) + .await? +} + +#[allow(dead_code)] +pub async fn cloud_hypervisor_vm_stop(mut socket: UnixStream) -> Result> { + task::spawn_blocking(move || -> Result> { + let response = + simple_api_full_command_and_response(&mut socket, "PUT", "vm.shutdown", None) + .map_err(|e| anyhow!(e))?; + + Ok(response) + }) + .await? +} + +#[allow(dead_code)] +pub async fn cloud_hypervisor_vm_device_add(mut socket: UnixStream) -> Result> { + let device_config = DeviceConfig::default(); + + task::spawn_blocking(move || -> Result> { + let response = simple_api_full_command_and_response( + &mut socket, + "PUT", + "vm.add-device", + Some(&serde_json::to_string(&device_config)?), + ) + .map_err(|e| anyhow!(e))?; + + Ok(response) + }) + .await? +} + +pub async fn cloud_hypervisor_vm_fs_add( + mut socket: UnixStream, + fs_config: FsConfig, +) -> Result> { + let result = task::spawn_blocking(move || -> Result> { + let response = simple_api_full_command_and_response( + &mut socket, + "PUT", + "vm.add-fs", + Some(&serde_json::to_string(&fs_config)?), + ) + .map_err(|e| anyhow!(e))?; + + Ok(response) + }) + .await?; + + result +} + +pub async fn cloud_hypervisor_vm_create_cfg( + // FIXME: + _sandbox_path: String, + vsock_socket_path: String, + shared_fs_devices: Option>, + pmem_devices: Option>, +) -> Result { + let topology = CpuTopology { + threads_per_core: 1, + cores_per_die: 12, + dies_per_package: 1, + packages: 1, + }; + + let cpus = CpusConfig { + boot_vcpus: 1, + max_vcpus: 12, + max_phys_bits: 46, + topology: Some(topology), + ..Default::default() + }; + + let rng = RngConfig { + src: PathBuf::from("/dev/urandom"), + ..Default::default() + }; + + let kernel_args = vec![ + "root=/dev/pmem0p1", + "rootflags=dax,data=ordered,errors=remount-ro", + "ro", + "rootfstype=ext4", + "panic=1", + "no_timer_check", + "noreplace-smp", + "console=ttyS0,115200n8", + "systemd.log_target=console", + "systemd.unit=kata-containers", + "systemd.mask=systemd-networkd.service", + "systemd.mask=systemd-networkd.socket", + "agent.log=debug", + ]; + + let cmdline = kernel_args.join(" "); + + let kernel = PathBuf::from("/opt/kata/share/kata-containers/vmlinux.container"); + + // Note that PmemConfig replaces the PayloadConfig.initrd. + let payload = PayloadConfig { + kernel: Some(kernel), + cmdline: Some(cmdline), + ..Default::default() + }; + + let serial = ConsoleConfig { + mode: ConsoleOutputMode::Tty, + ..Default::default() + }; + + let ip = Ipv4Addr::new(192, 168, 10, 10); + let mask = Ipv4Addr::new(255, 255, 255, 0); + + let mac_str = "12:34:56:78:90:01"; + + let mac = parse_mac(mac_str)?; + + let network = NetConfig { + ip, + mask, + mac, + ..Default::default() + }; + + let memory = MemoryConfig { + size: (1024 * 1024 * 2048), + + // Required + shared: true, + + prefault: false, + hugepages: false, + mergeable: false, + + // FIXME: + hotplug_size: Some(16475226112), + + ..Default::default() + }; + + let fs = shared_fs_devices; + let pmem = pmem_devices; + + let vsock = VsockConfig { + cid: 3, + socket: PathBuf::from(vsock_socket_path), + ..Default::default() + }; + + let cfg = VmConfig { + cpus, + memory, + fs, + serial, + pmem, + payload: Some(payload), + vsock: Some(vsock), + rng, + net: Some(vec![network]), + ..Default::default() + }; + + Ok(cfg) +} + +fn parse_mac(s: &S) -> Result +where + S: AsRef + ?Sized + Display, +{ + let v: Vec<&str> = s.as_ref().split(':').collect(); + let mut bytes = [0u8; MAC_ADDR_LEN]; + + if v.len() != MAC_ADDR_LEN { + return Err(anyhow!( + "invalid MAC {} (length {}, expected {})", + s, + v.len(), + MAC_ADDR_LEN + )); + } + + for i in 0..MAC_ADDR_LEN { + if v[i].len() != 2 { + return Err(anyhow!( + "invalid MAC {} (segment {} length {}, expected {})", + s, + i, + v.len(), + 2 + )); + } + + bytes[i] = + u8::from_str_radix(v[i], 16).context(format!("failed to parse MAC address: {}", s))?; + } + + Ok(MacAddr { bytes }) +} diff --git a/src/runtime-rs/crates/hypervisor/ch-config/src/lib.rs b/src/runtime-rs/crates/hypervisor/ch-config/src/lib.rs new file mode 100644 index 000000000..3e3fb3412 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/ch-config/src/lib.rs @@ -0,0 +1,481 @@ +// Copyright (c) 2022-2023 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +use serde::{Deserialize, Serialize}; +use std::net::Ipv4Addr; +use std::path::PathBuf; + +pub mod ch_api; +pub mod net_util; +mod virtio_devices; + +use crate::virtio_devices::RateLimiterConfig; +pub use net_util::MacAddr; + +pub const MAX_NUM_PCI_SEGMENTS: u16 = 16; + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct BalloonConfig { + pub size: u64, + /// Option to deflate the balloon in case the guest is out of memory. + #[serde(default)] + pub deflate_on_oom: bool, + /// Option to enable free page reporting from the guest. + #[serde(default)] + pub free_page_reporting: bool, +} + +#[derive(Clone, Debug, Default, PartialEq, Eq, Deserialize, Serialize)] +pub struct CmdlineConfig { + pub args: String, +} + +impl CmdlineConfig { + fn is_empty(&self) -> bool { + self.args.is_empty() + } +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct ConsoleConfig { + //#[serde(default = "default_consoleconfig_file")] + pub file: Option, + pub mode: ConsoleOutputMode, + #[serde(default)] + pub iommu: bool, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub enum ConsoleOutputMode { + #[default] + Off, + Pty, + Tty, + File, + Null, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct CpuAffinity { + pub vcpu: u8, + pub host_cpus: Vec, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct CpusConfig { + pub boot_vcpus: u8, + pub max_vcpus: u8, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub topology: Option, + #[serde(default)] + pub kvm_hyperv: bool, + #[serde(skip_serializing_if = "u8_is_zero")] + pub max_phys_bits: u8, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub affinity: Option>, + #[serde(default)] + pub features: CpuFeatures, +} + +#[derive(Clone, Debug, Default, PartialEq, Eq, Deserialize, Serialize)] +pub struct CpuFeatures { + #[cfg(target_arch = "x86_64")] + #[serde(default)] + pub amx: bool, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct CpuTopology { + pub threads_per_core: u8, + pub cores_per_die: u8, + pub dies_per_package: u8, + pub packages: u8, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct DeviceConfig { + pub path: PathBuf, + #[serde(default)] + pub iommu: bool, + #[serde(default)] + pub id: Option, + #[serde(default)] + pub pci_segment: u16, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct DiskConfig { + pub path: Option, + #[serde(default)] + pub readonly: bool, + #[serde(default)] + pub direct: bool, + #[serde(default)] + pub iommu: bool, + //#[serde(default = "default_diskconfig_num_queues")] + pub num_queues: usize, + //#[serde(default = "default_diskconfig_queue_size")] + pub queue_size: u16, + #[serde(default)] + pub vhost_user: bool, + pub vhost_socket: Option, + #[serde(default)] + pub rate_limiter_config: Option, + #[serde(default)] + pub id: Option, + // For testing use only. Not exposed in API. + #[serde(default)] + pub disable_io_uring: bool, + #[serde(default)] + pub pci_segment: u16, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct FsConfig { + pub tag: String, + pub socket: PathBuf, + //#[serde(default = "default_fsconfig_num_queues")] + pub num_queues: usize, + //#[serde(default = "default_fsconfig_queue_size")] + pub queue_size: u16, + #[serde(default)] + pub id: Option, + #[serde(default)] + pub pci_segment: u16, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub enum HotplugMethod { + #[default] + Acpi, + VirtioMem, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct InitramfsConfig { + pub path: PathBuf, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct KernelConfig { + pub path: PathBuf, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct MemoryConfig { + pub size: u64, + #[serde(default)] + pub mergeable: bool, + #[serde(default)] + pub hotplug_method: HotplugMethod, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub hotplug_size: Option, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub hotplugged_size: Option, + #[serde(default)] + pub shared: bool, + #[serde(default)] + pub hugepages: bool, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub hugepage_size: Option, + #[serde(default)] + pub prefault: bool, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub zones: Option>, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct MemoryZoneConfig { + pub id: String, + pub size: u64, + #[serde(default)] + pub file: Option, + #[serde(default)] + pub shared: bool, + #[serde(default)] + pub hugepages: bool, + #[serde(default)] + pub hugepage_size: Option, + #[serde(default)] + pub host_numa_node: Option, + #[serde(default)] + pub hotplug_size: Option, + #[serde(default)] + pub hotplugged_size: Option, + #[serde(default)] + pub prefault: bool, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] +pub struct NetConfig { + //#[serde(default = "default_netconfig_tap")] + #[serde(skip_serializing_if = "Option::is_none")] + pub tap: Option, + //#[serde(default = "default_netconfig_ip")] + pub ip: Ipv4Addr, + //#[serde(default = "default_netconfig_mask")] + pub mask: Ipv4Addr, + //#[serde(default = "default_netconfig_mac")] + pub mac: MacAddr, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub host_mac: Option, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub mtu: Option, + #[serde(default)] + pub iommu: bool, + //#[serde(default = "default_netconfig_num_queues")] + #[serde(skip_serializing_if = "usize_is_zero")] + pub num_queues: usize, + //#[serde(default = "default_netconfig_queue_size")] + #[serde(skip_serializing_if = "u16_is_zero")] + pub queue_size: u16, + #[serde(default)] + pub vhost_user: bool, + #[serde(skip_serializing_if = "Option::is_none")] + pub vhost_socket: Option, + #[serde(default)] + pub vhost_mode: VhostMode, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub id: Option, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub fds: Option>, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub rate_limiter_config: Option, + #[serde(default)] + #[serde(skip_serializing_if = "u16_is_zero")] + pub pci_segment: u16, +} + +impl Default for NetConfig { + fn default() -> Self { + NetConfig { + tap: None, + ip: Ipv4Addr::new(0, 0, 0, 0), + mask: Ipv4Addr::new(0, 0, 0, 0), + mac: MacAddr::default(), + host_mac: None, + mtu: None, + iommu: false, + num_queues: 0, + queue_size: 0, + vhost_user: false, + vhost_socket: None, + vhost_mode: VhostMode::default(), + id: None, + fds: None, + rate_limiter_config: None, + pci_segment: 0, + } + } +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct NumaConfig { + #[serde(default)] + pub guest_numa_id: u32, + #[serde(default)] + pub cpus: Option>, + #[serde(default)] + pub distances: Option>, + #[serde(default)] + pub memory_zones: Option>, + #[cfg(target_arch = "x86_64")] + #[serde(default)] + pub sgx_epc_sections: Option>, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct NumaDistance { + #[serde(default)] + pub destination: u32, + #[serde(default)] + pub distance: u8, +} + +#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] +pub struct PayloadConfig { + #[serde(default)] + pub firmware: Option, + #[serde(default)] + pub kernel: Option, + #[serde(default)] + pub cmdline: Option, + #[serde(default)] + pub initramfs: Option, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct PlatformConfig { + //#[serde(default = "default_platformconfig_num_pci_segments")] + pub num_pci_segments: u16, + #[serde(default)] + pub iommu_segments: Option>, + #[serde(default)] + pub serial_number: Option, + #[serde(default)] + pub uuid: Option, + #[serde(default)] + pub oem_strings: Option>, + #[cfg(feature = "tdx")] + #[serde(default)] + pub tdx: bool, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct PmemConfig { + pub file: PathBuf, + #[serde(default)] + pub size: Option, + #[serde(default)] + pub iommu: bool, + #[serde(default)] + pub discard_writes: bool, + #[serde(default)] + pub id: Option, + #[serde(default)] + pub pci_segment: u16, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct RngConfig { + pub src: PathBuf, + #[serde(default)] + pub iommu: bool, +} + +#[cfg(target_arch = "x86_64")] +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct SgxEpcConfig { + pub id: String, + #[serde(default)] + pub size: u64, + #[serde(default)] + pub prefault: bool, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct UserDeviceConfig { + pub socket: PathBuf, + #[serde(default)] + pub id: Option, + #[serde(default)] + pub pci_segment: u16, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct VdpaConfig { + pub path: PathBuf, + //#[serde(default = "default_vdpaconfig_num_queues")] + pub num_queues: usize, + #[serde(default)] + pub iommu: bool, + #[serde(default)] + pub id: Option, + #[serde(default)] + pub pci_segment: u16, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub enum VhostMode { + #[default] + Client, + Server, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct VmConfig { + #[serde(default)] + pub cpus: CpusConfig, + #[serde(default)] + pub memory: MemoryConfig, + #[serde(skip_serializing_if = "Option::is_none")] + pub kernel: Option, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub initramfs: Option, + #[serde(default)] + #[serde(skip_serializing_if = "CmdlineConfig::is_empty")] + pub cmdline: CmdlineConfig, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub payload: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub disks: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub net: Option>, + #[serde(default)] + pub rng: RngConfig, + #[serde(skip_serializing_if = "Option::is_none")] + pub balloon: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub fs: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub pmem: Option>, + //#[serde(default = "ConsoleConfig::default_serial")] + pub serial: ConsoleConfig, + //#[serde(default = "ConsoleConfig::default_console")] + pub console: ConsoleConfig, + #[serde(skip_serializing_if = "Option::is_none")] + pub devices: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub user_devices: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub vdpa: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub vsock: Option, + #[serde(default)] + pub iommu: bool, + #[cfg(target_arch = "x86_64")] + #[serde(skip_serializing_if = "Option::is_none")] + pub sgx_epc: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub numa: Option>, + #[serde(default)] + pub watchdog: bool, + #[cfg(feature = "guest_debug")] + pub gdb: bool, + #[serde(skip_serializing_if = "Option::is_none")] + pub platform: Option, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct VsockConfig { + pub cid: u64, + pub socket: PathBuf, + #[serde(default)] + pub iommu: bool, + #[serde(default)] + pub id: Option, + #[serde(default)] + pub pci_segment: u16, +} + +//-------------------------------------------------------------------- +// For serde serialization + +#[allow(clippy::trivially_copy_pass_by_ref)] +fn u8_is_zero(v: &u8) -> bool { + *v == 0 +} + +#[allow(clippy::trivially_copy_pass_by_ref)] +fn usize_is_zero(v: &usize) -> bool { + *v == 0 +} + +#[allow(clippy::trivially_copy_pass_by_ref)] +fn u16_is_zero(v: &u16) -> bool { + *v == 0 +} diff --git a/src/runtime-rs/crates/hypervisor/ch-config/src/net_util.rs b/src/runtime-rs/crates/hypervisor/ch-config/src/net_util.rs new file mode 100644 index 000000000..00a079462 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/ch-config/src/net_util.rs @@ -0,0 +1,32 @@ +// Copyright (c) 2022-2023 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +use serde::{Deserialize, Serialize, Serializer}; +use std::fmt; + +pub const MAC_ADDR_LEN: usize = 6; + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Deserialize, Default)] +pub struct MacAddr { + pub bytes: [u8; MAC_ADDR_LEN], +} + +// Note: Implements ToString automatically. +impl fmt::Display for MacAddr { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let b = &self.bytes; + write!( + f, + "{:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}", + b[0], b[1], b[2], b[3], b[4], b[5] + ) + } +} + +// Requried to remove the `bytes` member from the serialized JSON! +impl Serialize for MacAddr { + fn serialize(&self, serializer: S) -> Result { + self.to_string().serialize(serializer) + } +} diff --git a/src/runtime-rs/crates/hypervisor/ch-config/src/virtio_devices.rs b/src/runtime-rs/crates/hypervisor/ch-config/src/virtio_devices.rs new file mode 100644 index 000000000..02bf04bf9 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/ch-config/src/virtio_devices.rs @@ -0,0 +1,19 @@ +// Copyright (c) 2022-2023 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Copy, Debug, Default, Deserialize, Serialize, PartialEq, Eq)] +pub struct TokenBucketConfig { + pub size: u64, + pub one_time_burst: Option, + pub refill_time: u64, +} + +#[derive(Clone, Copy, Debug, Default, Deserialize, Serialize, PartialEq, Eq)] +#[serde(deny_unknown_fields)] +pub struct RateLimiterConfig { + pub bandwidth: Option, + pub ops: Option, +} diff --git a/src/runtime-rs/crates/hypervisor/src/ch/inner.rs b/src/runtime-rs/crates/hypervisor/src/ch/inner.rs new file mode 100644 index 000000000..7f65ac11b --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/ch/inner.rs @@ -0,0 +1,148 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2022 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +use super::HypervisorState; +use crate::device::Device; +use crate::VmmState; +use anyhow::Result; +use async_trait::async_trait; +use kata_types::capabilities::{Capabilities, CapabilityBits}; +use kata_types::config::hypervisor::Hypervisor as HypervisorConfig; +use kata_types::config::hypervisor::HYPERVISOR_NAME_CH; +use persist::sandbox_persist::Persist; +use std::os::unix::net::UnixStream; +use tokio::process::Child; +use tokio::sync::watch::{channel, Receiver, Sender}; +use tokio::task::JoinHandle; + +#[derive(Debug)] +pub struct CloudHypervisorInner { + pub(crate) state: VmmState, + pub(crate) id: String, + + pub(crate) api_socket: Option, + pub(crate) extra_args: Option>, + + pub(crate) config: Option, + + pub(crate) process: Option, + pub(crate) pid: Option, + + pub(crate) timeout_secs: i32, + + pub(crate) netns: Option, + + // Sandbox-specific directory + pub(crate) vm_path: String, + + // Hypervisor runtime directory + pub(crate) run_dir: String, + + // Subdirectory of vm_path. + pub(crate) jailer_root: String, + + /// List of devices that will be added to the VM once it boots + pub(crate) pending_devices: Option>, + + pub(crate) _capabilities: Capabilities, + + pub(crate) shutdown_tx: Option>, + pub(crate) shutdown_rx: Option>, + pub(crate) tasks: Option>>>, +} + +unsafe impl Send for CloudHypervisorInner {} +unsafe impl Sync for CloudHypervisorInner {} + +const CH_DEFAULT_TIMEOUT_SECS: u32 = 10; + +impl CloudHypervisorInner { + pub fn new() -> Self { + let mut capabilities = Capabilities::new(); + capabilities.set( + CapabilityBits::BlockDeviceSupport + | CapabilityBits::BlockDeviceHotplugSupport + | CapabilityBits::FsSharingSupport, + ); + + let (tx, rx) = channel(true); + + Self { + api_socket: None, + extra_args: None, + + process: None, + pid: None, + + config: None, + state: VmmState::NotReady, + timeout_secs: CH_DEFAULT_TIMEOUT_SECS as i32, + id: String::default(), + jailer_root: String::default(), + vm_path: String::default(), + run_dir: String::default(), + netns: None, + pending_devices: None, + _capabilities: capabilities, + shutdown_tx: Some(tx), + shutdown_rx: Some(rx), + tasks: None, + } + } + + pub fn set_hypervisor_config(&mut self, config: HypervisorConfig) { + self.config = Some(config); + } + + pub fn hypervisor_config(&self) -> HypervisorConfig { + self.config.clone().unwrap_or_default() + } +} + +impl Default for CloudHypervisorInner { + fn default() -> Self { + Self::new() + } +} + +#[async_trait] +impl Persist for CloudHypervisorInner { + type State = HypervisorState; + type ConstructorArgs = (); + + // Return a state object that will be saved by the caller. + async fn save(&self) -> Result { + Ok(HypervisorState { + hypervisor_type: HYPERVISOR_NAME_CH.to_string(), + id: self.id.clone(), + vm_path: self.vm_path.clone(), + jailed: false, + jailer_root: String::default(), + netns: None, + config: self.hypervisor_config(), + run_dir: self.run_dir.clone(), + cached_block_devices: Default::default(), + ..Default::default() + }) + } + + // Set the hypervisor state to the specified state + async fn restore( + _hypervisor_args: Self::ConstructorArgs, + hypervisor_state: Self::State, + ) -> Result { + let ch = Self { + config: Some(hypervisor_state.config), + state: VmmState::NotReady, + id: hypervisor_state.id, + vm_path: hypervisor_state.vm_path, + run_dir: hypervisor_state.run_dir, + + ..Default::default() + }; + + Ok(ch) + } +} diff --git a/src/runtime-rs/crates/hypervisor/src/ch/inner_device.rs b/src/runtime-rs/crates/hypervisor/src/ch/inner_device.rs new file mode 100644 index 000000000..03cf95daf --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/ch/inner_device.rs @@ -0,0 +1,235 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// Copyright (c) 2022 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +use super::inner::CloudHypervisorInner; +use crate::device::{Device, ShareFsDeviceConfig}; +use crate::HybridVsockConfig; +use crate::VmmState; +use anyhow::{anyhow, Context, Result}; +use ch_config::ch_api::cloud_hypervisor_vm_fs_add; +use ch_config::{FsConfig, PmemConfig}; +use safe_path::scoped_join; +use std::convert::TryFrom; +use std::path::PathBuf; + +const VIRTIO_FS: &str = "virtio-fs"; + +impl CloudHypervisorInner { + pub(crate) async fn add_device(&mut self, device: Device) -> Result<()> { + if self.state != VmmState::VmRunning { + let mut devices: Vec = if let Some(devices) = self.pending_devices.take() { + devices + } else { + vec![] + }; + + devices.insert(0, device); + + self.pending_devices = Some(devices); + + return Ok(()); + } + + self.handle_add_device(device).await?; + + Ok(()) + } + + async fn handle_add_device(&mut self, device: Device) -> Result<()> { + match device { + Device::ShareFsDevice(cfg) => self.handle_share_fs_device(cfg).await, + Device::HybridVsock(cfg) => self.handle_hvsock_device(&cfg).await, + _ => return Err(anyhow!("unhandled device: {:?}", device)), + } + } + + /// Add the device that were requested to be added before the VMM was + /// started. + #[allow(dead_code)] + pub(crate) async fn handle_pending_devices_after_boot(&mut self) -> Result<()> { + if self.state != VmmState::VmRunning { + return Err(anyhow!( + "cannot handle pending devices with VMM state {:?}", + self.state + )); + } + + if let Some(mut devices) = self.pending_devices.take() { + while let Some(dev) = devices.pop() { + self.add_device(dev).await.context("add_device")?; + } + } + + Ok(()) + } + + pub(crate) async fn remove_device(&mut self, _device: Device) -> Result<()> { + Ok(()) + } + + async fn handle_share_fs_device(&mut self, cfg: ShareFsDeviceConfig) -> Result<()> { + if cfg.fs_type != VIRTIO_FS { + return Err(anyhow!("cannot handle share fs type: {:?}", cfg.fs_type)); + } + + let socket = self + .api_socket + .as_ref() + .ok_or("missing socket") + .map_err(|e| anyhow!(e))?; + + let num_queues: usize = if cfg.queue_num > 0 { + cfg.queue_num as usize + } else { + 1 + }; + + let queue_size: u16 = if cfg.queue_num > 0 { + u16::try_from(cfg.queue_size)? + } else { + 1024 + }; + + let socket_path = if cfg.sock_path.starts_with('/') { + PathBuf::from(cfg.sock_path) + } else { + scoped_join(&self.vm_path, cfg.sock_path)? + }; + + let fs_config = FsConfig { + tag: cfg.mount_tag, + socket: socket_path, + num_queues, + queue_size, + ..Default::default() + }; + + let response = cloud_hypervisor_vm_fs_add( + socket.try_clone().context("failed to clone socket")?, + fs_config, + ) + .await?; + + if let Some(detail) = response { + debug!(sl!(), "fs add response: {:?}", detail); + } + + Ok(()) + } + + async fn handle_hvsock_device(&mut self, _cfg: &HybridVsockConfig) -> Result<()> { + Ok(()) + } + + pub(crate) async fn get_shared_fs_devices(&mut self) -> Result>> { + let pending_root_devices = self.pending_devices.take(); + + let mut root_devices = Vec::::new(); + + if let Some(devices) = pending_root_devices { + for dev in devices { + match dev { + Device::ShareFsDevice(dev) => { + let settings = ShareFsSettings::new(dev, self.vm_path.clone()); + + let fs_cfg = FsConfig::try_from(settings)?; + + root_devices.push(fs_cfg); + } + _ => continue, + }; + } + + Ok(Some(root_devices)) + } else { + Ok(None) + } + } + + pub(crate) async fn get_boot_file(&mut self) -> Result { + if let Some(ref config) = self.config { + let boot_info = &config.boot_info; + + let file = if !boot_info.initrd.is_empty() { + boot_info.initrd.clone() + } else if !boot_info.image.is_empty() { + boot_info.image.clone() + } else { + return Err(anyhow!("missing boot file (no image or initrd)")); + }; + + Ok(PathBuf::from(file)) + } else { + Err(anyhow!("no hypervisor config")) + } + } + + pub(crate) async fn get_pmem_devices(&mut self) -> Result>> { + let file = self.get_boot_file().await?; + + let pmem_cfg = PmemConfig { + file, + size: None, + iommu: false, + discard_writes: true, + id: None, + pci_segment: 0, + }; + + let pmem_devices = vec![pmem_cfg]; + + Ok(Some(pmem_devices)) + } +} + +#[derive(Debug)] +pub struct ShareFsSettings { + cfg: ShareFsDeviceConfig, + vm_path: String, +} + +impl ShareFsSettings { + pub fn new(cfg: ShareFsDeviceConfig, vm_path: String) -> Self { + ShareFsSettings { cfg, vm_path } + } +} + +impl TryFrom for FsConfig { + type Error = anyhow::Error; + + fn try_from(settings: ShareFsSettings) -> Result { + let cfg = settings.cfg; + let vm_path = settings.vm_path; + + let num_queues: usize = if cfg.queue_num > 0 { + cfg.queue_num as usize + } else { + 1 + }; + + let queue_size: u16 = if cfg.queue_num > 0 { + u16::try_from(cfg.queue_size)? + } else { + 1024 + }; + + let socket_path = if cfg.sock_path.starts_with('/') { + PathBuf::from(cfg.sock_path) + } else { + PathBuf::from(vm_path).join(cfg.sock_path) + }; + + let fs_cfg = FsConfig { + tag: cfg.mount_tag, + socket: socket_path, + num_queues, + queue_size, + ..Default::default() + }; + + Ok(fs_cfg) + } +} diff --git a/src/runtime-rs/crates/hypervisor/src/ch/inner_hypervisor.rs b/src/runtime-rs/crates/hypervisor/src/ch/inner_hypervisor.rs new file mode 100644 index 000000000..b3271ee79 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/ch/inner_hypervisor.rs @@ -0,0 +1,486 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2022 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +use super::inner::CloudHypervisorInner; +use crate::ch::utils::get_api_socket_path; +use crate::ch::utils::{get_jailer_root, get_sandbox_path, get_vsock_path}; +use crate::Device; +use crate::VsockConfig; +use crate::{VcpuThreadIds, VmmState}; +use anyhow::{anyhow, Context, Result}; +use ch_config::ch_api::{ + cloud_hypervisor_vm_create, cloud_hypervisor_vm_start, cloud_hypervisor_vmm_ping, + cloud_hypervisor_vmm_shutdown, +}; +use core::future::poll_fn; +use futures::executor::block_on; +use futures::future::join_all; +use kata_types::capabilities::{Capabilities, CapabilityBits}; +use std::fs::create_dir_all; +use std::os::unix::net::UnixStream; +use std::path::Path; +use std::process::Stdio; +use tokio::io::AsyncBufReadExt; +use tokio::io::BufReader; +use tokio::process::{Child, Command}; +use tokio::sync::watch::Receiver; +use tokio::task; +use tokio::task::JoinHandle; +use tokio::time::Duration; + +const CH_NAME: &str = "cloud-hypervisor"; + +/// Number of milliseconds to wait before retrying a CH operation. +const CH_POLL_TIME_MS: u64 = 50; + +impl CloudHypervisorInner { + async fn start_hypervisor(&mut self, timeout_secs: i32) -> Result<()> { + self.cloud_hypervisor_launch(timeout_secs) + .await + .context("launch failed")?; + + self.cloud_hypervisor_setup_comms() + .await + .context("comms setup failed")?; + + self.cloud_hypervisor_check_running() + .await + .context("hypervisor running check failed")?; + + self.state = VmmState::VmmServerReady; + + Ok(()) + } + + async fn boot_vm(&mut self) -> Result<()> { + let shared_fs_devices = self.get_shared_fs_devices().await?; + + let pmem_devices = self.get_pmem_devices().await?; + + let socket = self + .api_socket + .as_ref() + .ok_or("missing socket") + .map_err(|e| anyhow!(e))?; + + let sandbox_path = get_sandbox_path(&self.id)?; + + std::fs::create_dir_all(sandbox_path.clone()).context("failed to create sandbox path")?; + + let vsock_socket_path = get_vsock_path(&self.id)?; + + let response = cloud_hypervisor_vm_create( + sandbox_path, + vsock_socket_path, + socket.try_clone().context("failed to clone socket")?, + shared_fs_devices, + pmem_devices, + ) + .await?; + + if let Some(detail) = response { + debug!(sl!(), "vm boot response: {:?}", detail); + } + + let response = + cloud_hypervisor_vm_start(socket.try_clone().context("failed to clone socket")?) + .await?; + + if let Some(detail) = response { + debug!(sl!(), "vm start response: {:?}", detail); + } + + self.state = VmmState::VmRunning; + + Ok(()) + } + + async fn cloud_hypervisor_setup_comms(&mut self) -> Result<()> { + let api_socket_path = get_api_socket_path(&self.id)?; + + // The hypervisor has just been spawned, but may not yet have created + // the API socket, so repeatedly try to connect for up to + // timeout_secs. + let join_handle: JoinHandle> = + task::spawn_blocking(move || -> Result { + let api_socket: UnixStream; + + loop { + let result = UnixStream::connect(api_socket_path.clone()); + + if let Ok(result) = result { + api_socket = result; + break; + } + + std::thread::sleep(Duration::from_millis(CH_POLL_TIME_MS)); + } + + Ok(api_socket) + }); + + let timeout_msg = format!( + "API socket connect timed out after {} seconds", + self.timeout_secs + ); + + let result = + tokio::time::timeout(Duration::from_secs(self.timeout_secs as u64), join_handle) + .await + .context(timeout_msg)?; + + let result = result?; + + let api_socket = result?; + + self.api_socket = Some(api_socket); + + Ok(()) + } + + async fn cloud_hypervisor_check_running(&mut self) -> Result<()> { + let timeout_secs = self.timeout_secs; + + let timeout_msg = format!( + "API socket connect timed out after {} seconds", + timeout_secs + ); + + let join_handle = self.cloud_hypervisor_ping_until_ready(CH_POLL_TIME_MS); + + let result = tokio::time::timeout(Duration::new(timeout_secs as u64, 0), join_handle) + .await + .context(timeout_msg)?; + + result + } + + async fn cloud_hypervisor_ensure_not_launched(&self) -> Result<()> { + if let Some(child) = &self.process { + return Err(anyhow!( + "{} already running with PID {}", + CH_NAME, + child.id().unwrap_or(0) + )); + } + + Ok(()) + } + + async fn cloud_hypervisor_launch(&mut self, _timeout_secs: i32) -> Result<()> { + self.cloud_hypervisor_ensure_not_launched().await?; + + let debug = false; + + let disable_seccomp = true; + + let api_socket_path = get_api_socket_path(&self.id)?; + + let _ = std::fs::remove_file(api_socket_path.clone()); + + let binary_path = self + .config + .as_ref() + .ok_or("no hypervisor config for CH") + .map_err(|e| anyhow!(e))? + .path + .to_string(); + + let path = Path::new(&binary_path).canonicalize()?; + + let mut cmd = Command::new(path); + + cmd.current_dir("/"); + + cmd.stdin(Stdio::null()); + cmd.stdout(Stdio::piped()); + cmd.stderr(Stdio::piped()); + + cmd.env("RUST_BACKTRACE", "full"); + + cmd.args(["--api-socket", &api_socket_path]); + + if let Some(extra_args) = &self.extra_args { + cmd.args(extra_args); + } + + if debug { + cmd.arg("-v"); + } + + if disable_seccomp { + cmd.args(["--seccomp", "false"]); + } + + let child = cmd.spawn().context(format!("{} spawn failed", CH_NAME))?; + + // Save process PID + self.pid = child.id(); + + let shutdown = self + .shutdown_rx + .as_ref() + .ok_or("no receiver channel") + .map_err(|e| anyhow!(e))? + .clone(); + + let ch_outputlogger_task = tokio::spawn(cloud_hypervisor_log_output(child, shutdown)); + + let tasks = vec![ch_outputlogger_task]; + + self.tasks = Some(tasks); + + Ok(()) + } + + async fn cloud_hypervisor_shutdown(&mut self) -> Result<()> { + let socket = self + .api_socket + .as_ref() + .ok_or("missing socket") + .map_err(|e| anyhow!(e))?; + + let response = + cloud_hypervisor_vmm_shutdown(socket.try_clone().context("shutdown failed")?).await?; + + if let Some(detail) = response { + debug!(sl!(), "shutdown response: {:?}", detail); + } + + // Trigger a controlled shutdown + self.shutdown_tx + .as_mut() + .ok_or("no shutdown channel") + .map_err(|e| anyhow!(e))? + .send(true) + .map_err(|e| anyhow!(e).context("failed to request shutdown"))?; + + let tasks = self + .tasks + .take() + .ok_or("no tasks") + .map_err(|e| anyhow!(e))?; + + let results = join_all(tasks).await; + + let mut wait_errors: Vec = vec![]; + + for result in results { + if let Err(e) = result { + eprintln!("wait task error: {:#?}", e); + + wait_errors.push(e); + } + } + + if wait_errors.is_empty() { + Ok(()) + } else { + Err(anyhow!("wait all tasks failed: {:#?}", wait_errors)) + } + } + + #[allow(dead_code)] + async fn cloud_hypervisor_wait(&mut self) -> Result<()> { + let mut child = self + .process + .take() + .ok_or(format!("{} not running", CH_NAME)) + .map_err(|e| anyhow!(e))?; + + let _pid = child + .id() + .ok_or(format!("{} missing PID", CH_NAME)) + .map_err(|e| anyhow!(e))?; + + // Note that this kills _and_ waits for the process! + child.kill().await?; + + Ok(()) + } + + async fn cloud_hypervisor_ping_until_ready(&mut self, _poll_time_ms: u64) -> Result<()> { + let socket = self + .api_socket + .as_ref() + .ok_or("missing socket") + .map_err(|e| anyhow!(e))?; + + loop { + let response = + cloud_hypervisor_vmm_ping(socket.try_clone().context("failed to clone socket")?) + .await + .context("ping failed"); + + if let Ok(response) = response { + if let Some(detail) = response { + debug!(sl!(), "ping response: {:?}", detail); + } + break; + } + + tokio::time::sleep(Duration::from_millis(CH_POLL_TIME_MS)).await; + } + + Ok(()) + } + + pub(crate) async fn prepare_vm(&mut self, id: &str, netns: Option) -> Result<()> { + self.id = id.to_string(); + self.state = VmmState::NotReady; + + self.setup_environment().await?; + + self.netns = netns; + + let vsock_cfg = VsockConfig::new(self.id.clone()).await?; + + let dev = Device::Vsock(vsock_cfg); + self.add_device(dev).await.context("add vsock device")?; + + self.start_hypervisor(self.timeout_secs).await?; + + Ok(()) + } + + async fn setup_environment(&mut self) -> Result<()> { + // run_dir and vm_path are the same (shared) + self.run_dir = get_sandbox_path(&self.id)?; + self.vm_path = self.run_dir.to_string(); + + create_dir_all(&self.run_dir) + .with_context(|| anyhow!("failed to create sandbox directory {}", self.run_dir))?; + + if !self.jailer_root.is_empty() { + create_dir_all(self.jailer_root.as_str()) + .map_err(|e| anyhow!("Failed to create dir {} err : {:?}", self.jailer_root, e))?; + } + + Ok(()) + } + + pub(crate) async fn start_vm(&mut self, timeout_secs: i32) -> Result<()> { + self.setup_environment().await?; + + self.timeout_secs = timeout_secs; + + self.boot_vm().await?; + + Ok(()) + } + + pub(crate) fn stop_vm(&mut self) -> Result<()> { + block_on(self.cloud_hypervisor_shutdown())?; + + Ok(()) + } + + pub(crate) fn pause_vm(&self) -> Result<()> { + Ok(()) + } + + pub(crate) fn resume_vm(&self) -> Result<()> { + Ok(()) + } + + pub(crate) async fn save_vm(&self) -> Result<()> { + Ok(()) + } + + pub(crate) async fn get_agent_socket(&self) -> Result { + const HYBRID_VSOCK_SCHEME: &str = "hvsock"; + + let vsock_path = get_vsock_path(&self.id)?; + + let uri = format!("{}://{}", HYBRID_VSOCK_SCHEME, vsock_path); + + Ok(uri) + } + + pub(crate) async fn disconnect(&mut self) { + self.state = VmmState::NotReady; + } + + pub(crate) async fn get_thread_ids(&self) -> Result { + Ok(VcpuThreadIds::default()) + } + + pub(crate) async fn cleanup(&self) -> Result<()> { + Ok(()) + } + + pub(crate) async fn get_pids(&self) -> Result> { + Ok(Vec::::new()) + } + + pub(crate) async fn check(&self) -> Result<()> { + Ok(()) + } + + pub(crate) async fn get_jailer_root(&self) -> Result { + let root_path = get_jailer_root(&self.id)?; + + std::fs::create_dir_all(&root_path)?; + + Ok(root_path) + } + + pub(crate) async fn capabilities(&self) -> Result { + let mut caps = Capabilities::default(); + caps.set(CapabilityBits::FsSharingSupport); + Ok(caps) + } +} + +// Log all output from the CH process until a shutdown signal is received. +// When that happens, stop logging and wait for the child process to finish +// before returning. +async fn cloud_hypervisor_log_output(mut child: Child, mut shutdown: Receiver) -> Result<()> { + let stdout = child + .stdout + .as_mut() + .ok_or("failed to get child stdout") + .map_err(|e| anyhow!(e))?; + + let stdout_reader = BufReader::new(stdout); + let mut stdout_lines = stdout_reader.lines(); + + let stderr = child + .stderr + .as_mut() + .ok_or("failed to get child stderr") + .map_err(|e| anyhow!(e))?; + + let stderr_reader = BufReader::new(stderr); + let mut stderr_lines = stderr_reader.lines(); + + loop { + tokio::select! { + _ = shutdown.changed() => { + info!(sl!(), "got shutdown request"); + break; + }, + stderr_line = poll_fn(|cx| Pin::new(&mut stderr_lines).poll_next_line(cx)) => { + if let Ok(line) = stderr_line { + let line = line.ok_or("missing stderr line").map_err(|e| anyhow!(e))?; + + info!(sl!(), "{:?}", line; "stream" => "stderr"); + } + }, + stdout_line = poll_fn(|cx| Pin::new(&mut stdout_lines).poll_next_line(cx)) => { + if let Ok(line) = stdout_line { + let line = line.ok_or("missing stdout line").map_err(|e| anyhow!(e))?; + + info!(sl!(), "{:?}", line; "stream" => "stdout"); + } + }, + }; + } + + // Note that this kills _and_ waits for the process! + child.kill().await?; + + Ok(()) +} diff --git a/src/runtime-rs/crates/hypervisor/src/ch/mod.rs b/src/runtime-rs/crates/hypervisor/src/ch/mod.rs new file mode 100644 index 000000000..d589c18df --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/ch/mod.rs @@ -0,0 +1,163 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2022 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +use super::HypervisorState; +use crate::{device::Device, Hypervisor, VcpuThreadIds}; +use anyhow::{Context, Result}; +use async_trait::async_trait; +use kata_types::capabilities::Capabilities; +use kata_types::config::hypervisor::Hypervisor as HypervisorConfig; +use persist::sandbox_persist::Persist; +use std::sync::Arc; +use tokio::sync::RwLock; + +// Convenience macro to obtain the scope logger +#[macro_export] +macro_rules! sl { + () => { + slog_scope::logger().new(o!("subsystem" => "cloud-hypervisor")) + }; + } + +mod inner; +mod inner_device; +mod inner_hypervisor; +mod utils; + +use inner::CloudHypervisorInner; + +#[derive(Debug, Default, Clone)] +pub struct CloudHypervisor { + inner: Arc>, +} + +unsafe impl Send for CloudHypervisor {} +unsafe impl Sync for CloudHypervisor {} + +impl CloudHypervisor { + pub fn new() -> Self { + Self { + inner: Arc::new(RwLock::new(CloudHypervisorInner::new())), + } + } + + pub async fn set_hypervisor_config(&mut self, config: HypervisorConfig) { + let mut inner = self.inner.write().await; + inner.set_hypervisor_config(config) + } +} + +#[async_trait] +impl Hypervisor for CloudHypervisor { + async fn prepare_vm(&self, id: &str, netns: Option) -> Result<()> { + let mut inner = self.inner.write().await; + inner.prepare_vm(id, netns).await + } + + async fn start_vm(&self, timeout: i32) -> Result<()> { + let mut inner = self.inner.write().await; + inner.start_vm(timeout).await + } + + async fn stop_vm(&self) -> Result<()> { + let mut inner = self.inner.write().await; + inner.stop_vm() + } + + async fn pause_vm(&self) -> Result<()> { + let inner = self.inner.write().await; + inner.pause_vm() + } + + async fn resume_vm(&self) -> Result<()> { + let inner = self.inner.write().await; + inner.resume_vm() + } + + async fn save_vm(&self) -> Result<()> { + let inner = self.inner.write().await; + inner.save_vm().await + } + + async fn add_device(&self, device: Device) -> Result<()> { + let mut inner = self.inner.write().await; + inner.add_device(device).await + } + + async fn remove_device(&self, device: Device) -> Result<()> { + let mut inner = self.inner.write().await; + inner.remove_device(device).await + } + + async fn get_agent_socket(&self) -> Result { + let inner = self.inner.write().await; + inner.get_agent_socket().await + } + + async fn disconnect(&self) { + let mut inner = self.inner.write().await; + inner.disconnect().await + } + + async fn hypervisor_config(&self) -> HypervisorConfig { + let inner = self.inner.write().await; + inner.hypervisor_config() + } + + async fn get_thread_ids(&self) -> Result { + let inner = self.inner.read().await; + inner.get_thread_ids().await + } + + async fn cleanup(&self) -> Result<()> { + let inner = self.inner.read().await; + inner.cleanup().await + } + + async fn get_pids(&self) -> Result> { + let inner = self.inner.read().await; + inner.get_pids().await + } + + async fn check(&self) -> Result<()> { + let inner = self.inner.read().await; + inner.check().await + } + + async fn get_jailer_root(&self) -> Result { + let inner = self.inner.read().await; + inner.get_jailer_root().await + } + + async fn save_state(&self) -> Result { + self.save().await + } + + async fn capabilities(&self) -> Result { + let inner = self.inner.read().await; + inner.capabilities().await + } +} + +#[async_trait] +impl Persist for CloudHypervisor { + type State = HypervisorState; + type ConstructorArgs = (); + + async fn save(&self) -> Result { + let inner = self.inner.read().await; + inner.save().await.context("save CH hypervisor state") + } + + async fn restore( + hypervisor_args: Self::ConstructorArgs, + hypervisor_state: Self::State, + ) -> Result { + let inner = CloudHypervisorInner::restore(hypervisor_args, hypervisor_state).await?; + Ok(Self { + inner: Arc::new(RwLock::new(inner)), + }) + } +} diff --git a/src/runtime-rs/crates/hypervisor/src/ch/utils.rs b/src/runtime-rs/crates/hypervisor/src/ch/utils.rs new file mode 100644 index 000000000..dcea26aeb --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/ch/utils.rs @@ -0,0 +1,53 @@ +// Copyright (c) 2022-2023 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +use anyhow::Result; +use shim_interface::KATA_PATH; + +// The socket used to connect to CH. This is used for CH API communications. +const CH_API_SOCKET_NAME: &str = "ch-api.sock"; + +// The socket that allows runtime-rs to connect direct through to the Kata +// Containers agent running inside the CH hosted VM. +const CH_VM_SOCKET_NAME: &str = "ch-vm.sock"; + +const CH_JAILER_DIR: &str = "root"; + +// Return the path for a _hypothetical_ sandbox: the path does *not* exist +// yet, and for this reason safe-path cannot be used. +pub fn get_sandbox_path(id: &str) -> Result { + let path = [KATA_PATH, id].join("/"); + + Ok(path) +} + +// Return the path for a _hypothetical_ API socket path: +// the path does *not* exist yet, and for this reason safe-path cannot be +// used. +pub fn get_api_socket_path(id: &str) -> Result { + let sandbox_path = get_sandbox_path(id)?; + + let path = [&sandbox_path, CH_API_SOCKET_NAME].join("/"); + + Ok(path) +} + +// Return the path for a _hypothetical_ sandbox specific VSOCK socket path: +// the path does *not* exist yet, and for this reason safe-path cannot be +// used. +pub fn get_vsock_path(id: &str) -> Result { + let sandbox_path = get_sandbox_path(id)?; + + let path = [&sandbox_path, CH_VM_SOCKET_NAME].join("/"); + + Ok(path) +} + +pub fn get_jailer_root(id: &str) -> Result { + let sandbox_path = get_sandbox_path(id)?; + + let path = [&sandbox_path, CH_JAILER_DIR].join("/"); + + Ok(path) +} diff --git a/src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs b/src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs index a74fe28f8..849ec4a2b 100644 --- a/src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs +++ b/src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs @@ -27,7 +27,6 @@ use std::{collections::HashSet, fs::create_dir_all, path::PathBuf}; const DRAGONBALL_KERNEL: &str = "vmlinux"; const DRAGONBALL_ROOT_FS: &str = "rootfs"; -unsafe impl Send for DragonballInner {} unsafe impl Sync for DragonballInner {} pub struct DragonballInner { /// sandbox id @@ -101,7 +100,10 @@ impl DragonballInner { // get kernel params let mut kernel_params = KernelParams::new(self.config.debug_info.enable_debug); - kernel_params.append(&mut KernelParams::new_rootfs_kernel_params(&rootfs_driver)); + kernel_params.append(&mut KernelParams::new_rootfs_kernel_params( + &rootfs_driver, + &self.config.boot_info.rootfs_type, + )?); kernel_params.append(&mut KernelParams::from_string( &self.config.boot_info.kernel_params, )); diff --git a/src/runtime-rs/crates/hypervisor/src/dragonball/mod.rs b/src/runtime-rs/crates/hypervisor/src/dragonball/mod.rs index 37ffd69b9..2886043c8 100644 --- a/src/runtime-rs/crates/hypervisor/src/dragonball/mod.rs +++ b/src/runtime-rs/crates/hypervisor/src/dragonball/mod.rs @@ -22,8 +22,6 @@ use tokio::sync::RwLock; use crate::{device::Device, Hypervisor, VcpuThreadIds}; -unsafe impl Send for Dragonball {} -unsafe impl Sync for Dragonball {} pub struct Dragonball { inner: Arc>, } diff --git a/src/runtime-rs/crates/hypervisor/src/dragonball/vmm_instance.rs b/src/runtime-rs/crates/hypervisor/src/dragonball/vmm_instance.rs index 00829ad4c..9837ea667 100644 --- a/src/runtime-rs/crates/hypervisor/src/dragonball/vmm_instance.rs +++ b/src/runtime-rs/crates/hypervisor/src/dragonball/vmm_instance.rs @@ -314,7 +314,7 @@ impl VmmInstance { return Ok(vmm_data); } Err(vmm_action_error) => { - if let VmmActionError::UpcallNotReady = vmm_action_error { + if let VmmActionError::UpcallServerNotReady = vmm_action_error { std::thread::sleep(std::time::Duration::from_millis(10)); continue; } else { diff --git a/src/runtime-rs/crates/hypervisor/src/hypervisor_persist.rs b/src/runtime-rs/crates/hypervisor/src/hypervisor_persist.rs index a04fd24b0..ea870f342 100644 --- a/src/runtime-rs/crates/hypervisor/src/hypervisor_persist.rs +++ b/src/runtime-rs/crates/hypervisor/src/hypervisor_persist.rs @@ -8,7 +8,7 @@ use crate::HypervisorConfig; use serde::{Deserialize, Serialize}; use std::collections::HashSet; -#[derive(Serialize, Deserialize, Default)] +#[derive(Serialize, Deserialize, Default, Clone, Debug)] pub struct HypervisorState { // Type of hypervisor, E.g. dragonball/qemu/firecracker/acrn. pub hypervisor_type: String, diff --git a/src/runtime-rs/crates/hypervisor/src/kernel_param.rs b/src/runtime-rs/crates/hypervisor/src/kernel_param.rs index 39bef9a64..7ad17cb8f 100644 --- a/src/runtime-rs/crates/hypervisor/src/kernel_param.rs +++ b/src/runtime-rs/crates/hypervisor/src/kernel_param.rs @@ -6,7 +6,10 @@ use anyhow::{anyhow, Result}; -use crate::{VM_ROOTFS_DRIVER_BLK, VM_ROOTFS_DRIVER_PMEM}; +use crate::{ + VM_ROOTFS_DRIVER_BLK, VM_ROOTFS_DRIVER_PMEM, VM_ROOTFS_FILESYSTEM_EROFS, + VM_ROOTFS_FILESYSTEM_EXT4, VM_ROOTFS_FILESYSTEM_XFS, VM_ROOTFS_ROOT_BLK, VM_ROOTFS_ROOT_PMEM, +}; use kata_types::config::LOG_VPORT_OPTION; // Port where the agent will send the logs. Logs are sent through the vsock in cases @@ -67,43 +70,49 @@ impl KernelParams { Self { params } } - pub(crate) fn new_rootfs_kernel_params(rootfs_driver: &str) -> Self { - let params = match rootfs_driver { - VM_ROOTFS_DRIVER_BLK => { - vec![ - Param { - key: "root".to_string(), - value: "/dev/vda1".to_string(), - }, - Param { - key: "rootflags".to_string(), - value: "data=ordered,errors=remount-ro ro".to_string(), - }, - Param { - key: "rootfstype".to_string(), - value: "ext4".to_string(), - }, - ] - } + pub(crate) fn new_rootfs_kernel_params(rootfs_driver: &str, rootfs_type: &str) -> Result { + let mut params = vec![]; + + match rootfs_driver { VM_ROOTFS_DRIVER_PMEM => { - vec![ - Param { - key: "root".to_string(), - value: "/dev/pmem0p1".to_string(), - }, - Param { - key: "rootflags".to_string(), - value: "data=ordered,errors=remount-ro,dax ro".to_string(), - }, - Param { - key: "rootfstype".to_string(), - value: "ext4".to_string(), - }, - ] + params.push(Param::new("root", VM_ROOTFS_ROOT_PMEM)); + match rootfs_type { + VM_ROOTFS_FILESYSTEM_EXT4 | VM_ROOTFS_FILESYSTEM_XFS => { + params.push(Param::new( + "rootflags", + "dax,data=ordered,errors=remount-ro ro", + )); + } + VM_ROOTFS_FILESYSTEM_EROFS => { + params.push(Param::new("rootflags", "dax ro")); + } + _ => { + return Err(anyhow!("Unsupported rootfs type")); + } + } } - _ => vec![], - }; - Self { params } + VM_ROOTFS_DRIVER_BLK => { + params.push(Param::new("root", VM_ROOTFS_ROOT_BLK)); + match rootfs_type { + VM_ROOTFS_FILESYSTEM_EXT4 | VM_ROOTFS_FILESYSTEM_XFS => { + params.push(Param::new("rootflags", "data=ordered,errors=remount-ro ro")); + } + VM_ROOTFS_FILESYSTEM_EROFS => { + params.push(Param::new("rootflags", "ro")); + } + _ => { + return Err(anyhow!("Unsupported rootfs type")); + } + } + } + _ => { + return Err(anyhow!("Unsupported rootfs driver")); + } + } + + params.push(Param::new("rootfstype", rootfs_type)); + + Ok(Self { params }) } pub(crate) fn append(&mut self, params: &mut KernelParams) { @@ -155,6 +164,12 @@ mod tests { use super::*; + use crate::{ + VM_ROOTFS_DRIVER_BLK, VM_ROOTFS_DRIVER_PMEM, VM_ROOTFS_FILESYSTEM_EROFS, + VM_ROOTFS_FILESYSTEM_EXT4, VM_ROOTFS_FILESYSTEM_XFS, VM_ROOTFS_ROOT_BLK, + VM_ROOTFS_ROOT_PMEM, + }; + #[test] fn test_params() { let param1 = Param::new("", ""); @@ -190,4 +205,142 @@ mod tests { Ok(()) } + + #[derive(Debug)] + struct TestData<'a> { + rootfs_driver: &'a str, + rootfs_type: &'a str, + expect_params: KernelParams, + result: Result<()>, + } + + #[test] + fn test_rootfs_kernel_params() { + let tests = &[ + // EXT4 + TestData { + rootfs_driver: VM_ROOTFS_DRIVER_PMEM, + rootfs_type: VM_ROOTFS_FILESYSTEM_EXT4, + expect_params: KernelParams { + params: [ + Param::new("root", VM_ROOTFS_ROOT_PMEM), + Param::new("rootflags", "dax,data=ordered,errors=remount-ro ro"), + Param::new("rootfstype", VM_ROOTFS_FILESYSTEM_EXT4), + ] + .to_vec(), + }, + result: Ok(()), + }, + TestData { + rootfs_driver: VM_ROOTFS_DRIVER_BLK, + rootfs_type: VM_ROOTFS_FILESYSTEM_EXT4, + expect_params: KernelParams { + params: [ + Param::new("root", VM_ROOTFS_ROOT_BLK), + Param::new("rootflags", "data=ordered,errors=remount-ro ro"), + Param::new("rootfstype", VM_ROOTFS_FILESYSTEM_EXT4), + ] + .to_vec(), + }, + result: Ok(()), + }, + // XFS + TestData { + rootfs_driver: VM_ROOTFS_DRIVER_PMEM, + rootfs_type: VM_ROOTFS_FILESYSTEM_XFS, + expect_params: KernelParams { + params: [ + Param::new("root", VM_ROOTFS_ROOT_PMEM), + Param::new("rootflags", "dax,data=ordered,errors=remount-ro ro"), + Param::new("rootfstype", VM_ROOTFS_FILESYSTEM_XFS), + ] + .to_vec(), + }, + result: Ok(()), + }, + TestData { + rootfs_driver: VM_ROOTFS_DRIVER_BLK, + rootfs_type: VM_ROOTFS_FILESYSTEM_XFS, + expect_params: KernelParams { + params: [ + Param::new("root", VM_ROOTFS_ROOT_BLK), + Param::new("rootflags", "data=ordered,errors=remount-ro ro"), + Param::new("rootfstype", VM_ROOTFS_FILESYSTEM_XFS), + ] + .to_vec(), + }, + result: Ok(()), + }, + // EROFS + TestData { + rootfs_driver: VM_ROOTFS_DRIVER_PMEM, + rootfs_type: VM_ROOTFS_FILESYSTEM_EROFS, + expect_params: KernelParams { + params: [ + Param::new("root", VM_ROOTFS_ROOT_PMEM), + Param::new("rootflags", "dax ro"), + Param::new("rootfstype", VM_ROOTFS_FILESYSTEM_EROFS), + ] + .to_vec(), + }, + result: Ok(()), + }, + TestData { + rootfs_driver: VM_ROOTFS_DRIVER_BLK, + rootfs_type: VM_ROOTFS_FILESYSTEM_EROFS, + expect_params: KernelParams { + params: [ + Param::new("root", VM_ROOTFS_ROOT_BLK), + Param::new("rootflags", "ro"), + Param::new("rootfstype", VM_ROOTFS_FILESYSTEM_EROFS), + ] + .to_vec(), + }, + result: Ok(()), + }, + // Unsupported rootfs driver + TestData { + rootfs_driver: "foo", + rootfs_type: VM_ROOTFS_FILESYSTEM_EXT4, + expect_params: KernelParams { + params: [ + Param::new("root", VM_ROOTFS_ROOT_BLK), + Param::new("rootflags", "data=ordered,errors=remount-ro ro"), + Param::new("rootfstype", VM_ROOTFS_FILESYSTEM_EXT4), + ] + .to_vec(), + }, + result: Err(anyhow!("Unsupported rootfs driver")), + }, + // Unsupported rootfs type + TestData { + rootfs_driver: VM_ROOTFS_DRIVER_BLK, + rootfs_type: "foo", + expect_params: KernelParams { + params: [ + Param::new("root", VM_ROOTFS_ROOT_BLK), + Param::new("rootflags", "data=ordered,errors=remount-ro ro"), + Param::new("rootfstype", VM_ROOTFS_FILESYSTEM_EXT4), + ] + .to_vec(), + }, + result: Err(anyhow!("Unsupported rootfs type")), + }, + ]; + + for (i, t) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, t); + let result = KernelParams::new_rootfs_kernel_params(t.rootfs_driver, t.rootfs_type); + let msg = format!("{}, result: {:?}", msg, result); + + if t.result.is_ok() { + assert!(result.is_ok(), "{}", msg); + assert_eq!(t.expect_params, result.unwrap()); + } else { + let expected_error = format!("{}", t.result.as_ref().unwrap_err()); + let actual_error = format!("{}", result.unwrap_err()); + assert!(actual_error == expected_error, "{}", msg); + } + } + } } diff --git a/src/runtime-rs/crates/hypervisor/src/lib.rs b/src/runtime-rs/crates/hypervisor/src/lib.rs index 2c3e8016f..3c417f195 100644 --- a/src/runtime-rs/crates/hypervisor/src/lib.rs +++ b/src/runtime-rs/crates/hypervisor/src/lib.rs @@ -19,14 +19,30 @@ pub use kernel_param::Param; mod utils; use std::collections::HashMap; +#[cfg(feature = "cloud-hypervisor")] +pub mod ch; + use anyhow::Result; use async_trait::async_trait; use hypervisor_persist::HypervisorState; use kata_types::capabilities::Capabilities; use kata_types::config::hypervisor::Hypervisor as HypervisorConfig; + +pub use kata_types::config::hypervisor::HYPERVISOR_NAME_CH; + // Config which driver to use as vm root dev const VM_ROOTFS_DRIVER_BLK: &str = "virtio-blk"; const VM_ROOTFS_DRIVER_PMEM: &str = "virtio-pmem"; + +//Configure the root corresponding to the driver +const VM_ROOTFS_ROOT_BLK: &str = "/dev/vda1"; +const VM_ROOTFS_ROOT_PMEM: &str = "/dev/pmem0p1"; + +// Config which filesystem to use as rootfs type +const VM_ROOTFS_FILESYSTEM_EXT4: &str = "ext4"; +const VM_ROOTFS_FILESYSTEM_XFS: &str = "xfs"; +const VM_ROOTFS_FILESYSTEM_EROFS: &str = "erofs"; + // before using hugepages for VM, we need to mount hugetlbfs // /dev/hugepages will be the mount point // mkdir -p /dev/hugepages @@ -38,7 +54,7 @@ const SHMEM: &str = "shmem"; pub const HYPERVISOR_DRAGONBALL: &str = "dragonball"; pub const HYPERVISOR_QEMU: &str = "qemu"; -#[derive(PartialEq)] +#[derive(PartialEq, Debug, Clone)] pub(crate) enum VmmState { NotReady, VmmServerReady, @@ -46,7 +62,7 @@ pub(crate) enum VmmState { } // vcpu mapping from vcpu number to thread number -#[derive(Debug)] +#[derive(Debug, Default)] pub struct VcpuThreadIds { pub vcpus: HashMap, } diff --git a/src/runtime-rs/crates/resource/Cargo.toml b/src/runtime-rs/crates/resource/Cargo.toml index 98714a7c2..7ae1f799f 100644 --- a/src/runtime-rs/crates/resource/Cargo.toml +++ b/src/runtime-rs/crates/resource/Cargo.toml @@ -14,7 +14,7 @@ anyhow = "^1.0" async-trait = "0.1.48" bitflags = "1.2.1" byte-unit = "4.0.14" -cgroups-rs = "0.3.0" +cgroups-rs = "0.3.1" futures = "0.3.11" lazy_static = "1.4.0" libc = ">=0.2.39" diff --git a/src/runtime-rs/crates/resource/src/cgroups/mod.rs b/src/runtime-rs/crates/resource/src/cgroups/mod.rs index 8dbef7f64..7787d2ad0 100644 --- a/src/runtime-rs/crates/resource/src/cgroups/mod.rs +++ b/src/runtime-rs/crates/resource/src/cgroups/mod.rs @@ -9,6 +9,8 @@ mod utils; use std::{ collections::{HashMap, HashSet}, + error::Error, + io, iter::FromIterator, sync::Arc, }; @@ -24,6 +26,8 @@ use oci::LinuxResources; use persist::sandbox_persist::Persist; use tokio::sync::RwLock; +const OS_ERROR_NO_SUCH_PROCESS: i32 = 3; + pub struct CgroupArgs { pub sid: String, pub config: TomlConfig, @@ -109,7 +113,22 @@ impl CgroupsResource { /// overhead_cgroup_manager to the parent and then delete the cgroups. pub async fn delete(&self) -> Result<()> { for cg_pid in self.cgroup_manager.tasks() { - self.cgroup_manager.remove_task(cg_pid)?; + // For now, we can't guarantee that the thread in cgroup_manager does still + // exist. Once it exit, we should ignor that error returned by remove_task + // to let it go. + if let Err(error) = self.cgroup_manager.remove_task(cg_pid) { + match error.source() { + Some(err) => match err.downcast_ref::() { + Some(e) => { + if e.raw_os_error() != Some(OS_ERROR_NO_SUCH_PROCESS) { + return Err(error.into()); + } + } + None => return Err(error.into()), + }, + None => return Err(error.into()), + } + } } self.cgroup_manager diff --git a/src/runtime-rs/crates/resource/src/volume/mod.rs b/src/runtime-rs/crates/resource/src/volume/mod.rs index 7a603c601..2868ddee3 100644 --- a/src/runtime-rs/crates/resource/src/volume/mod.rs +++ b/src/runtime-rs/crates/resource/src/volume/mod.rs @@ -20,6 +20,7 @@ use crate::share_fs::ShareFs; use self::hugepage::{get_huge_page_limits_map, get_huge_page_option}; const BIND: &str = "bind"; + #[async_trait] pub trait Volume: Send + Sync { fn get_volume_mount(&self) -> Result>; diff --git a/src/runtime-rs/crates/resource/src/volume/share_fs_volume.rs b/src/runtime-rs/crates/resource/src/volume/share_fs_volume.rs index 95bc2edfb..f3f70424d 100644 --- a/src/runtime-rs/crates/resource/src/volume/share_fs_volume.rs +++ b/src/runtime-rs/crates/resource/src/volume/share_fs_volume.rs @@ -17,6 +17,8 @@ use super::Volume; use crate::share_fs::{MountedInfo, ShareFs, ShareFsVolumeConfig}; use kata_types::mount; +const SYS_MOUNT_PREFIX: [&str; 2] = ["/proc", "/sys"]; + // copy file to container's rootfs if filesystem sharing is not supported, otherwise // bind mount it in the shared directory. // Ignore /dev, directories and all other device files. We handle @@ -229,6 +231,7 @@ impl Volume for ShareFsVolume { pub(crate) fn is_share_fs_volume(m: &oci::Mount) -> bool { (m.r#type == "bind" || m.r#type == mount::KATA_EPHEMERAL_VOLUME_TYPE) && !is_host_device(&m.destination) + && !is_system_mount(&m.source) } fn is_host_device(dest: &str) -> bool { @@ -252,6 +255,20 @@ fn is_host_device(dest: &str) -> bool { false } +// Skip mounting certain system paths("/sys/*", "/proc/*") +// from source on the host side into the container as it does not +// make sense to do so. +// Agent will support this kind of bind mount. +fn is_system_mount(src: &str) -> bool { + for p in SYS_MOUNT_PREFIX { + let sub_dir_p = format!("{}/", p); + if src == p || src.contains(sub_dir_p.as_str()) { + return true; + } + } + false +} + // Note, don't generate random name, attaching rafs depends on the predictable name. pub fn generate_mount_path(id: &str, file_name: &str) -> String { let mut nid = String::from(id); @@ -265,3 +282,23 @@ pub fn generate_mount_path(id: &str, file_name: &str) -> String { format!("{}-{}-{}", nid, uid, file_name) } + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_is_system_mount() { + let sys_dir = "/sys"; + let proc_dir = "/proc"; + let sys_sub_dir = "/sys/fs/cgroup"; + let proc_sub_dir = "/proc/cgroups"; + let not_sys_dir = "/root"; + + assert!(is_system_mount(sys_dir)); + assert!(is_system_mount(proc_dir)); + assert!(is_system_mount(sys_sub_dir)); + assert!(is_system_mount(proc_sub_dir)); + assert!(!is_system_mount(not_sys_dir)); + } +} diff --git a/src/runtime-rs/crates/runtimes/common/src/sandbox.rs b/src/runtime-rs/crates/runtimes/common/src/sandbox.rs index 5ac028ddb..a08d56d07 100644 --- a/src/runtime-rs/crates/runtimes/common/src/sandbox.rs +++ b/src/runtime-rs/crates/runtimes/common/src/sandbox.rs @@ -9,7 +9,7 @@ use async_trait::async_trait; #[async_trait] pub trait Sandbox: Send + Sync { - async fn start(&self, netns: Option) -> Result<()>; + async fn start(&self, netns: Option, dns: Vec) -> Result<()>; async fn stop(&self) -> Result<()>; async fn cleanup(&self, container_id: &str) -> Result<()>; async fn shutdown(&self) -> Result<()>; diff --git a/src/runtime-rs/crates/runtimes/linux_container/src/lib.rs b/src/runtime-rs/crates/runtimes/linux_container/src/lib.rs index 582b4e961..406ccb0b6 100644 --- a/src/runtime-rs/crates/runtimes/linux_container/src/lib.rs +++ b/src/runtime-rs/crates/runtimes/linux_container/src/lib.rs @@ -11,8 +11,6 @@ use common::{message::Message, RuntimeHandler, RuntimeInstance}; use kata_types::config::TomlConfig; use tokio::sync::mpsc::Sender; -unsafe impl Send for LinuxContainer {} -unsafe impl Sync for LinuxContainer {} pub struct LinuxContainer {} #[async_trait] diff --git a/src/runtime-rs/crates/runtimes/src/manager.rs b/src/runtime-rs/crates/runtimes/src/manager.rs index f3163a3dd..1fba4e522 100644 --- a/src/runtime-rs/crates/runtimes/src/manager.rs +++ b/src/runtime-rs/crates/runtimes/src/manager.rs @@ -15,11 +15,14 @@ use common::{ RuntimeHandler, RuntimeInstance, Sandbox, }; use hypervisor::Param; -use kata_types::{annotations::Annotation, config::TomlConfig}; +use kata_types::{ + annotations::Annotation, config::default::DEFAULT_GUEST_DNS_FILE, config::TomlConfig, +}; #[cfg(feature = "linux")] use linux_container::LinuxContainer; use persist::sandbox_persist::Persist; use shim_interface::shim_mgmt::ERR_NO_SHIM_SERVER; +use tokio::fs; use tokio::sync::{mpsc::Sender, RwLock}; #[cfg(feature = "virt")] use virt_container::{ @@ -48,6 +51,7 @@ impl RuntimeHandlerManagerInner { async fn init_runtime_handler( &mut self, netns: Option, + dns: Vec, config: Arc, ) -> Result<()> { info!(sl!(), "new runtime handler {}", &config.runtime.name); @@ -70,7 +74,7 @@ impl RuntimeHandlerManagerInner { // start sandbox runtime_instance .sandbox - .start(netns) + .start(netns, dns) .await .context("start sandbox")?; self.runtime_instance = Some(Arc::new(runtime_instance)); @@ -83,6 +87,8 @@ impl RuntimeHandlerManagerInner { return Ok(()); } + let mut dns: Vec = vec![]; + #[cfg(feature = "linux")] LinuxContainer::init().context("init linux container")?; #[cfg(feature = "wasm")] @@ -107,8 +113,15 @@ impl RuntimeHandlerManagerInner { None }; + for m in &spec.mounts { + if m.destination == DEFAULT_GUEST_DNS_FILE { + let contents = fs::read_to_string(&m.source).await?; + dns = contents.split('\n').map(|e| e.to_string()).collect(); + } + } + let config = load_config(spec, options).context("load config")?; - self.init_runtime_handler(netns, Arc::new(config)) + self.init_runtime_handler(netns, dns, Arc::new(config)) .await .context("init runtime handler")?; @@ -132,8 +145,6 @@ impl RuntimeHandlerManagerInner { } } -unsafe impl Send for RuntimeHandlerManager {} -unsafe impl Sync for RuntimeHandlerManager {} pub struct RuntimeHandlerManager { inner: Arc>, } diff --git a/src/runtime-rs/crates/runtimes/virt_container/Cargo.toml b/src/runtime-rs/crates/runtimes/virt_container/Cargo.toml index 0e3fbdc60..6dea5e762 100644 --- a/src/runtime-rs/crates/runtimes/virt_container/Cargo.toml +++ b/src/runtime-rs/crates/runtimes/virt_container/Cargo.toml @@ -35,3 +35,9 @@ oci = { path = "../../../../libs/oci" } persist = { path = "../../persist"} resource = { path = "../../resource" } +[features] +default = [] + +# Feature is not yet complete, so not enabled by default. +# See https://github.com/kata-containers/kata-containers/issues/6264. +cloud-hypervisor = [] diff --git a/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/manager.rs b/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/manager.rs index 1358db5b2..ba73c17d5 100644 --- a/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/manager.rs +++ b/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/manager.rs @@ -25,8 +25,6 @@ use tokio::sync::RwLock; use super::{logger_with_process, Container}; -unsafe impl Send for VirtContainerManager {} -unsafe impl Sync for VirtContainerManager {} pub struct VirtContainerManager { sid: String, pid: u32, diff --git a/src/runtime-rs/crates/runtimes/virt_container/src/lib.rs b/src/runtime-rs/crates/runtimes/virt_container/src/lib.rs index 99489d7b3..b73caa849 100644 --- a/src/runtime-rs/crates/runtimes/virt_container/src/lib.rs +++ b/src/runtime-rs/crates/runtimes/virt_container/src/lib.rs @@ -25,12 +25,16 @@ use hypervisor::{qemu::Qemu, HYPERVISOR_QEMU}; use kata_types::config::{ hypervisor::register_hypervisor_plugin, DragonballConfig, QemuConfig, TomlConfig, }; + +#[cfg(feature = "cloud-hypervisor")] +use hypervisor::ch::CloudHypervisor; +#[cfg(feature = "cloud-hypervisor")] +use kata_types::config::{hypervisor::HYPERVISOR_NAME_CH, CloudHypervisorConfig}; + use resource::ResourceManager; use sandbox::VIRTCONTAINER; use tokio::sync::mpsc::Sender; -unsafe impl Send for VirtContainer {} -unsafe impl Sync for VirtContainer {} pub struct VirtContainer {} #[async_trait] @@ -39,8 +43,16 @@ impl RuntimeHandler for VirtContainer { // register let dragonball_config = Arc::new(DragonballConfig::new()); register_hypervisor_plugin("dragonball", dragonball_config); + let qemu_config = Arc::new(QemuConfig::new()); register_hypervisor_plugin("qemu", qemu_config); + + #[cfg(feature = "cloud-hypervisor")] + { + let ch_config = Arc::new(CloudHypervisorConfig::new()); + register_hypervisor_plugin(HYPERVISOR_NAME_CH, ch_config); + } + Ok(()) } @@ -118,6 +130,17 @@ async fn new_hypervisor(toml_config: &TomlConfig) -> Result> .await; Ok(Arc::new(hypervisor)) } + + #[cfg(feature = "cloud-hypervisor")] + HYPERVISOR_NAME_CH => { + let mut hypervisor = CloudHypervisor::new(); + + hypervisor + .set_hypervisor_config(hypervisor_config.clone()) + .await; + + Ok(Arc::new(hypervisor)) + } _ => Err(anyhow!("Unsupported hypervisor {}", &hypervisor_name)), } } diff --git a/src/runtime-rs/crates/runtimes/virt_container/src/sandbox.rs b/src/runtime-rs/crates/runtimes/virt_container/src/sandbox.rs index 469d5ea42..2f9a9c5f9 100644 --- a/src/runtime-rs/crates/runtimes/virt_container/src/sandbox.rs +++ b/src/runtime-rs/crates/runtimes/virt_container/src/sandbox.rs @@ -57,8 +57,6 @@ impl SandboxInner { } } -unsafe impl Send for VirtSandbox {} -unsafe impl Sync for VirtSandbox {} #[derive(Clone)] pub struct VirtSandbox { sid: String, @@ -123,7 +121,7 @@ impl VirtSandbox { #[async_trait] impl Sandbox for VirtSandbox { - async fn start(&self, netns: Option) -> Result<()> { + async fn start(&self, netns: Option, dns: Vec) -> Result<()> { let id = &self.sid; // if sandbox running, return @@ -170,7 +168,7 @@ impl Sandbox for VirtSandbox { let kernel_modules = KernelModule::set_kernel_modules(agent_config.kernel_modules)?; let req = agent::CreateSandboxRequest { hostname: "".to_string(), - dns: vec![], + dns, storages: self .resource_manager .get_storage_for_sandbox() diff --git a/src/runtime-rs/crates/runtimes/wasm_container/src/lib.rs b/src/runtime-rs/crates/runtimes/wasm_container/src/lib.rs index c68727467..77282ac9c 100644 --- a/src/runtime-rs/crates/runtimes/wasm_container/src/lib.rs +++ b/src/runtime-rs/crates/runtimes/wasm_container/src/lib.rs @@ -10,8 +10,6 @@ use async_trait::async_trait; use common::{message::Message, RuntimeHandler, RuntimeInstance}; use kata_types::config::TomlConfig; use tokio::sync::mpsc::Sender; -unsafe impl Send for WasmContainer {} -unsafe impl Sync for WasmContainer {} pub struct WasmContainer {} #[async_trait] diff --git a/src/runtime/Makefile b/src/runtime/Makefile index 7abcd4b83..edb17be6a 100644 --- a/src/runtime/Makefile +++ b/src/runtime/Makefile @@ -119,6 +119,12 @@ IMAGEPATH := $(PKGDATADIR)/$(IMAGENAME) INITRDPATH := $(PKGDATADIR)/$(INITRDNAME) INITRDSEVPATH := $(PKGDATADIR)/$(INITRDSEVNAME) IMAGETDXPATH := $(PKGDATADIR)/$(IMAGETDXNAME) + +ROOTFSTYPE_EXT4 := \"ext4\" +ROOTFSTYPE_XFS := \"xfs\" +ROOTFSTYPE_EROFS := \"erofs\" +DEFROOTFSTYPE := $(ROOTFSTYPE_EXT4) + FIRMWAREPATH := FIRMWAREVOLUMEPATH := TDVFFIRMWAREPATH := $(PREFIXDEPS)/share/tdvf/OVMF_CODE.fd @@ -504,6 +510,7 @@ USER_VARS += INITRDNAME USER_VARS += INITRDPATH USER_VARS += INITRDSEVNAME USER_VARS += INITRDSEVPATH +USER_VARS += DEFROOTFSTYPE USER_VARS += MACHINETYPE USER_VARS += KERNELDIR USER_VARS += KERNELTYPE diff --git a/src/runtime/cmd/kata-runtime/kata-env_test.go b/src/runtime/cmd/kata-runtime/kata-env_test.go index 96847dc13..321bc507b 100644 --- a/src/runtime/cmd/kata-runtime/kata-env_test.go +++ b/src/runtime/cmd/kata-runtime/kata-env_test.go @@ -78,6 +78,7 @@ func makeRuntimeConfig(prefixDir string) (configFile string, config oci.RuntimeC hypervisorPath := filepath.Join(prefixDir, "hypervisor") kernelPath := filepath.Join(prefixDir, "kernel") imagePath := filepath.Join(prefixDir, "image") + rootfsType := "ext4" kernelParams := "foo=bar xyz" machineType := "machineType" disableBlock := true @@ -119,6 +120,7 @@ func makeRuntimeConfig(prefixDir string) (configFile string, config oci.RuntimeC HypervisorPath: hypervisorPath, KernelPath: kernelPath, ImagePath: imagePath, + RootfsType: rootfsType, KernelParams: kernelParams, MachineType: machineType, LogPath: logPath, diff --git a/src/runtime/config/configuration-acrn.toml.in b/src/runtime/config/configuration-acrn.toml.in index 5f1368ce8..2d2b7065e 100644 --- a/src/runtime/config/configuration-acrn.toml.in +++ b/src/runtime/config/configuration-acrn.toml.in @@ -17,6 +17,12 @@ ctlpath = "@ACRNCTLPATH@" kernel = "@KERNELPATH_ACRN@" image = "@IMAGEPATH@" +# rootfs filesystem type: +# - ext4 (default) +# - xfs +# - erofs +rootfs_type=@DEFROOTFSTYPE@ + # List of valid annotation names for the hypervisor # Each member of the list is a regular expression, which is the base name # of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path" diff --git a/src/runtime/config/configuration-clh.toml.in b/src/runtime/config/configuration-clh.toml.in index 373f255dc..9f3381da6 100644 --- a/src/runtime/config/configuration-clh.toml.in +++ b/src/runtime/config/configuration-clh.toml.in @@ -16,6 +16,12 @@ path = "@CLHPATH@" kernel = "@KERNELPATH_CLH@" image = "@IMAGEPATH@" +# rootfs filesystem type: +# - ext4 (default) +# - xfs +# - erofs +rootfs_type=@DEFROOTFSTYPE@ + # Enable confidential guest support. # Toggling that setting may trigger different hardware features, ranging # from memory encryption to both memory and CPU-state encryption and integrity. diff --git a/src/runtime/config/configuration-fc.toml.in b/src/runtime/config/configuration-fc.toml.in index b7f349c0d..10dc17700 100644 --- a/src/runtime/config/configuration-fc.toml.in +++ b/src/runtime/config/configuration-fc.toml.in @@ -16,6 +16,12 @@ path = "@FCPATH@" kernel = "@KERNELPATH_FC@" image = "@IMAGEPATH@" +# rootfs filesystem type: +# - ext4 (default) +# - xfs +# - erofs +rootfs_type=@DEFROOTFSTYPE@ + # List of valid annotation names for the hypervisor # Each member of the list is a regular expression, which is the base name # of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path" diff --git a/src/runtime/config/configuration-qemu.toml.in b/src/runtime/config/configuration-qemu.toml.in index 48f6a2cfd..f94cf89b5 100644 --- a/src/runtime/config/configuration-qemu.toml.in +++ b/src/runtime/config/configuration-qemu.toml.in @@ -18,6 +18,12 @@ image = "@IMAGEPATH@" # initrd = "@INITRDPATH@" machine_type = "@MACHINETYPE@" +# rootfs filesystem type: +# - ext4 (default) +# - xfs +# - erofs +rootfs_type=@DEFROOTFSTYPE@ + # Enable confidential guest support. # Toggling that setting may trigger different hardware features, ranging # from memory encryption to both memory and CPU-state encryption and integrity. @@ -326,6 +332,11 @@ vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@" # Your distribution recommends: @DEFVALIDVHOSTUSERSTOREPATHS@ valid_vhost_user_store_paths = @DEFVALIDVHOSTUSERSTOREPATHS@ +# The timeout for reconnecting on non-server spdk sockets when the remote end goes away. +# qemu will delay this many seconds and then attempt to reconnect. +# Zero disables reconnecting, and the default is zero. +vhost_user_reconnect_timeout_sec = 0 + # Enable file based guest memory support. The default is an empty string which # will disable this feature. In the case of virtio-fs, this is enabled # automatically and '/dev/shm' is used as the backing folder. diff --git a/src/runtime/pkg/containerd-shim-v2/create_test.go b/src/runtime/pkg/containerd-shim-v2/create_test.go index 75638b518..ccad5ceea 100644 --- a/src/runtime/pkg/containerd-shim-v2/create_test.go +++ b/src/runtime/pkg/containerd-shim-v2/create_test.go @@ -320,7 +320,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config string, err err kernelPath := path.Join(dir, "kernel") kernelParams := "foo=bar xyz" imagePath := path.Join(dir, "image") - shimPath := path.Join(dir, "shim") + rootfsType := "ext4" logDir := path.Join(dir, "logs") logPath := path.Join(logDir, "runtime.log") machineType := "machineType" @@ -338,9 +338,9 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config string, err err HypervisorPath: hypervisorPath, KernelPath: kernelPath, ImagePath: imagePath, + RootfsType: rootfsType, KernelParams: kernelParams, MachineType: machineType, - ShimPath: shimPath, LogPath: logPath, DisableBlock: disableBlockDevice, BlockDeviceDriver: blockDeviceDriver, @@ -360,7 +360,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config string, err err return "", err } - files := []string{hypervisorPath, kernelPath, imagePath, shimPath} + files := []string{hypervisorPath, kernelPath, imagePath} for _, file := range files { // create the resource (which must be >0 bytes) diff --git a/src/runtime/pkg/containerd-shim-v2/metrics.go b/src/runtime/pkg/containerd-shim-v2/metrics.go index 6fc10cae9..69ce552eb 100644 --- a/src/runtime/pkg/containerd-shim-v2/metrics.go +++ b/src/runtime/pkg/containerd-shim-v2/metrics.go @@ -9,9 +9,11 @@ import ( "context" cgroupsv1 "github.com/containerd/cgroups/stats/v1" + cgroupsv2 "github.com/containerd/cgroups/v2/stats" "github.com/containerd/typeurl" google_protobuf "github.com/gogo/protobuf/types" + resCtrl "github.com/kata-containers/kata-containers/src/runtime/pkg/resourcecontrol" vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers" ) @@ -21,7 +23,18 @@ func marshalMetrics(ctx context.Context, s *service, containerID string) (*googl return nil, err } - metrics := statsToMetrics(&stats) + isCgroupV1, err := resCtrl.IsCgroupV1() + if err != nil { + return nil, err + } + + var metrics interface{} + + if isCgroupV1 { + metrics = statsToMetricsV1(&stats) + } else { + metrics = statsToMetricsV2(&stats) + } data, err := typeurl.MarshalAny(metrics) if err != nil { @@ -31,25 +44,40 @@ func marshalMetrics(ctx context.Context, s *service, containerID string) (*googl return data, nil } -func statsToMetrics(stats *vc.ContainerStats) *cgroupsv1.Metrics { +func statsToMetricsV1(stats *vc.ContainerStats) *cgroupsv1.Metrics { metrics := &cgroupsv1.Metrics{} if stats.CgroupStats != nil { metrics = &cgroupsv1.Metrics{ - Hugetlb: setHugetlbStats(stats.CgroupStats.HugetlbStats), - Pids: setPidsStats(stats.CgroupStats.PidsStats), - CPU: setCPUStats(stats.CgroupStats.CPUStats), - Memory: setMemoryStats(stats.CgroupStats.MemoryStats), - Blkio: setBlkioStats(stats.CgroupStats.BlkioStats), + Hugetlb: setHugetlbStatsV1(stats.CgroupStats.HugetlbStats), + Pids: setPidsStatsV1(stats.CgroupStats.PidsStats), + CPU: setCPUStatsV1(stats.CgroupStats.CPUStats), + Memory: setMemoryStatsV1(stats.CgroupStats.MemoryStats), + Blkio: setBlkioStatsV1(stats.CgroupStats.BlkioStats), } } - metrics.Network = setNetworkStats(stats.NetworkStats) return metrics } -func setHugetlbStats(vcHugetlb map[string]vc.HugetlbStats) []*cgroupsv1.HugetlbStat { +func statsToMetricsV2(stats *vc.ContainerStats) *cgroupsv2.Metrics { + metrics := &cgroupsv2.Metrics{} + + if stats.CgroupStats != nil { + metrics = &cgroupsv2.Metrics{ + Hugetlb: setHugetlbStatsV2(stats.CgroupStats.HugetlbStats), + Pids: setPidsStatsV2(stats.CgroupStats.PidsStats), + CPU: setCPUStatsV2(stats.CgroupStats.CPUStats), + Memory: setMemoryStatsV2(stats.CgroupStats.MemoryStats), + Io: setBlkioStatsV2(stats.CgroupStats.BlkioStats), + } + } + + return metrics +} + +func setHugetlbStatsV1(vcHugetlb map[string]vc.HugetlbStats) []*cgroupsv1.HugetlbStat { var hugetlbStats []*cgroupsv1.HugetlbStat for k, v := range vcHugetlb { hugetlbStats = append( @@ -65,7 +93,22 @@ func setHugetlbStats(vcHugetlb map[string]vc.HugetlbStats) []*cgroupsv1.HugetlbS return hugetlbStats } -func setPidsStats(vcPids vc.PidsStats) *cgroupsv1.PidsStat { +func setHugetlbStatsV2(vcHugetlb map[string]vc.HugetlbStats) []*cgroupsv2.HugeTlbStat { + var hugetlbStats []*cgroupsv2.HugeTlbStat + for k, v := range vcHugetlb { + hugetlbStats = append( + hugetlbStats, + &cgroupsv2.HugeTlbStat{ + Current: v.Usage, + Max: v.MaxUsage, + Pagesize: k, + }) + } + + return hugetlbStats +} + +func setPidsStatsV1(vcPids vc.PidsStats) *cgroupsv1.PidsStat { pidsStats := &cgroupsv1.PidsStat{ Current: vcPids.Current, Limit: vcPids.Limit, @@ -74,8 +117,16 @@ func setPidsStats(vcPids vc.PidsStats) *cgroupsv1.PidsStat { return pidsStats } -func setCPUStats(vcCPU vc.CPUStats) *cgroupsv1.CPUStat { +func setPidsStatsV2(vcPids vc.PidsStats) *cgroupsv2.PidsStat { + pidsStats := &cgroupsv2.PidsStat{ + Current: vcPids.Current, + Limit: vcPids.Limit, + } + return pidsStats +} + +func setCPUStatsV1(vcCPU vc.CPUStats) *cgroupsv1.CPUStat { var perCPU []uint64 perCPU = append(perCPU, vcCPU.CPUUsage.PercpuUsage...) @@ -96,7 +147,20 @@ func setCPUStats(vcCPU vc.CPUStats) *cgroupsv1.CPUStat { return cpuStats } -func setMemoryStats(vcMemory vc.MemoryStats) *cgroupsv1.MemoryStat { +func setCPUStatsV2(vcCPU vc.CPUStats) *cgroupsv2.CPUStat { + cpuStats := &cgroupsv2.CPUStat{ + UsageUsec: vcCPU.CPUUsage.TotalUsage / 1000, + UserUsec: vcCPU.CPUUsage.UsageInKernelmode / 1000, + SystemUsec: vcCPU.CPUUsage.UsageInUsermode / 1000, + NrPeriods: vcCPU.ThrottlingData.Periods, + NrThrottled: vcCPU.ThrottlingData.ThrottledPeriods, + ThrottledUsec: vcCPU.ThrottlingData.ThrottledTime / 1000, + } + + return cpuStats +} + +func setMemoryStatsV1(vcMemory vc.MemoryStats) *cgroupsv1.MemoryStat { memoryStats := &cgroupsv1.MemoryStat{ Usage: &cgroupsv1.MemoryEntry{ Limit: vcMemory.Usage.Limit, @@ -146,22 +210,41 @@ func setMemoryStats(vcMemory vc.MemoryStats) *cgroupsv1.MemoryStat { return memoryStats } -func setBlkioStats(vcBlkio vc.BlkioStats) *cgroupsv1.BlkIOStat { +func setMemoryStatsV2(vcMemory vc.MemoryStats) *cgroupsv2.MemoryStat { + memoryStats := &cgroupsv2.MemoryStat{ + Usage: vcMemory.Usage.Usage, + UsageLimit: vcMemory.Usage.Limit, + SwapUsage: vcMemory.SwapUsage.Usage, + SwapLimit: vcMemory.SwapUsage.Limit, + } + + return memoryStats +} + +func setBlkioStatsV1(vcBlkio vc.BlkioStats) *cgroupsv1.BlkIOStat { blkioStats := &cgroupsv1.BlkIOStat{ - IoServiceBytesRecursive: copyBlkio(vcBlkio.IoServiceBytesRecursive), - IoServicedRecursive: copyBlkio(vcBlkio.IoServicedRecursive), - IoQueuedRecursive: copyBlkio(vcBlkio.IoQueuedRecursive), - SectorsRecursive: copyBlkio(vcBlkio.SectorsRecursive), - IoServiceTimeRecursive: copyBlkio(vcBlkio.IoServiceTimeRecursive), - IoWaitTimeRecursive: copyBlkio(vcBlkio.IoWaitTimeRecursive), - IoMergedRecursive: copyBlkio(vcBlkio.IoMergedRecursive), - IoTimeRecursive: copyBlkio(vcBlkio.IoTimeRecursive), + IoServiceBytesRecursive: copyBlkioV1(vcBlkio.IoServiceBytesRecursive), + IoServicedRecursive: copyBlkioV1(vcBlkio.IoServicedRecursive), + IoQueuedRecursive: copyBlkioV1(vcBlkio.IoQueuedRecursive), + SectorsRecursive: copyBlkioV1(vcBlkio.SectorsRecursive), + IoServiceTimeRecursive: copyBlkioV1(vcBlkio.IoServiceTimeRecursive), + IoWaitTimeRecursive: copyBlkioV1(vcBlkio.IoWaitTimeRecursive), + IoMergedRecursive: copyBlkioV1(vcBlkio.IoMergedRecursive), + IoTimeRecursive: copyBlkioV1(vcBlkio.IoTimeRecursive), } return blkioStats } -func copyBlkio(s []vc.BlkioStatEntry) []*cgroupsv1.BlkIOEntry { +func setBlkioStatsV2(vcBlkio vc.BlkioStats) *cgroupsv2.IOStat { + ioStats := &cgroupsv2.IOStat{ + Usage: copyBlkioV2(vcBlkio.IoServiceBytesRecursive), + } + + return ioStats +} + +func copyBlkioV1(s []vc.BlkioStatEntry) []*cgroupsv1.BlkIOEntry { ret := make([]*cgroupsv1.BlkIOEntry, len(s)) for i, v := range s { ret[i] = &cgroupsv1.BlkIOEntry{ @@ -175,6 +258,28 @@ func copyBlkio(s []vc.BlkioStatEntry) []*cgroupsv1.BlkIOEntry { return ret } +func copyBlkioV2(s []vc.BlkioStatEntry) []*cgroupsv2.IOEntry { + var ret []*cgroupsv2.IOEntry + item := cgroupsv2.IOEntry{} + for _, v := range s { + switch v.Op { + case "read": + item.Rbytes = v.Value + case "write": + item.Wbytes = v.Value + case "rios": + item.Rios = v.Value + case "wios": + item.Wios = v.Value + } + item.Major = v.Major + item.Minor = v.Minor + } + ret = append(ret, &item) + + return ret +} + func setNetworkStats(vcNetwork []*vc.NetworkStats) []*cgroupsv1.NetworkStat { networkStats := make([]*cgroupsv1.NetworkStat, len(vcNetwork)) for i, v := range vcNetwork { diff --git a/src/runtime/pkg/containerd-shim-v2/metrics_test.go b/src/runtime/pkg/containerd-shim-v2/metrics_test.go index e2a9177a2..57842d93e 100644 --- a/src/runtime/pkg/containerd-shim-v2/metrics_test.go +++ b/src/runtime/pkg/containerd-shim-v2/metrics_test.go @@ -17,8 +17,7 @@ import ( ) func TestStatNetworkMetric(t *testing.T) { - - assert := assert.New(t) + assertions := assert.New(t) var err error mockNetwork := []*vc.NetworkStats{ @@ -52,8 +51,8 @@ func TestStatNetworkMetric(t *testing.T) { }() resp, err := sandbox.StatsContainer(context.Background(), testContainerID) - assert.NoError(err) + assertions.NoError(err) - metrics := statsToMetrics(&resp) - assert.Equal(expectedNetwork, metrics.Network) + metrics := statsToMetricsV1(&resp) + assertions.Equal(expectedNetwork, metrics.Network) } diff --git a/src/runtime/pkg/containerd-shim-v2/shim_management.go b/src/runtime/pkg/containerd-shim-v2/shim_management.go index 7b59caef3..74c750918 100644 --- a/src/runtime/pkg/containerd-shim-v2/shim_management.go +++ b/src/runtime/pkg/containerd-shim-v2/shim_management.go @@ -315,11 +315,11 @@ func GetSandboxesStoragePathRust() string { // SocketAddress returns the address of the unix domain socket for communicating with the // shim management endpoint func SocketAddress(id string) string { - socketAddress := fmt.Sprintf("unix://%s", filepath.Join(string(filepath.Separator), GetSandboxesStoragePath(), id, "shim-monitor.sock")) - _, err := os.Stat(socketAddress) - // if the path not exist, check the rust runtime path - if err != nil { + // get the go runtime uds path + socketPath := filepath.Join(string(filepath.Separator), GetSandboxesStoragePath(), id, "shim-monitor.sock") + // if the path not exist, use the rust runtime uds path instead + if _, err := os.Stat(socketPath); err != nil { return fmt.Sprintf("unix://%s", filepath.Join(string(filepath.Separator), GetSandboxesStoragePathRust(), id, "shim-monitor.sock")) } - return socketAddress + return fmt.Sprintf("unix://%s", socketPath) } diff --git a/src/runtime/pkg/containerd-shim-v2/stream_test.go b/src/runtime/pkg/containerd-shim-v2/stream_test.go index ea4f026ca..c85633c88 100644 --- a/src/runtime/pkg/containerd-shim-v2/stream_test.go +++ b/src/runtime/pkg/containerd-shim-v2/stream_test.go @@ -10,6 +10,7 @@ import ( "io" "os" "path/filepath" + "runtime" "syscall" "testing" "time" @@ -96,6 +97,10 @@ func TestNewTtyIOFifoReopen(t *testing.T) { } func TestIoCopy(t *testing.T) { + // This test fails on aarch64 regularly, temporarily skip it + if runtime.GOARCH == "arm64" { + t.Skip("Skip TestIoCopy for aarch64") + } assert := assert.New(t) ctx := context.TODO() diff --git a/src/runtime/pkg/device/config/config.go b/src/runtime/pkg/device/config/config.go index f1078671a..35af6afec 100644 --- a/src/runtime/pkg/device/config/config.go +++ b/src/runtime/pkg/device/config/config.go @@ -87,6 +87,8 @@ const ( // Define the string key for DriverOptions in DeviceInfo struct FsTypeOpt = "fstype" BlockDriverOpt = "block-driver" + + VhostUserReconnectTimeOutOpt = "vhost-user-reconnect-timeout" ) const ( @@ -97,6 +99,15 @@ const ( VhostUserSCSIMajor = 242 ) +const ( + + // The timeout for reconnecting on non-server sockets when the remote end + // goes away. + // qemu will delay this many seconds and then attempt to reconnect. Zero + // disables reconnecting, and is the default. + DefaultVhostUserReconnectTimeOut = 0 +) + // Defining these as a variable instead of a const, to allow // overriding this in the tests. @@ -320,6 +331,9 @@ type VhostUserDeviceAttrs struct { CacheSize uint32 QueueSize uint32 + + // Reconnect timeout for socket of vhost user block device + ReconnectTime uint32 } // GetHostPathFunc is function pointer used to mock GetHostPath in tests. diff --git a/src/runtime/pkg/device/drivers/vhost_user_blk.go b/src/runtime/pkg/device/drivers/vhost_user_blk.go index 49c66e711..fbf195c30 100644 --- a/src/runtime/pkg/device/drivers/vhost_user_blk.go +++ b/src/runtime/pkg/device/drivers/vhost_user_blk.go @@ -8,6 +8,7 @@ package drivers import ( "context" + "strconv" "github.com/kata-containers/kata-containers/src/runtime/pkg/device/api" "github.com/kata-containers/kata-containers/src/runtime/pkg/device/config" @@ -72,17 +73,19 @@ func (device *VhostUserBlkDevice) Attach(ctx context.Context, devReceiver api.De } vAttrs := &config.VhostUserDeviceAttrs{ - DevID: utils.MakeNameID("blk", device.DeviceInfo.ID, maxDevIDSize), - SocketPath: device.DeviceInfo.HostPath, - Type: config.VhostUserBlk, - Index: index, + DevID: utils.MakeNameID("blk", device.DeviceInfo.ID, maxDevIDSize), + SocketPath: device.DeviceInfo.HostPath, + Type: config.VhostUserBlk, + Index: index, + ReconnectTime: vhostUserReconnect(device.DeviceInfo.DriverOptions), } deviceLogger().WithFields(logrus.Fields{ - "device": device.DeviceInfo.HostPath, - "SocketPath": vAttrs.SocketPath, - "Type": config.VhostUserBlk, - "Index": index, + "device": device.DeviceInfo.HostPath, + "SocketPath": vAttrs.SocketPath, + "Type": config.VhostUserBlk, + "Index": index, + "ReconnectTime": vAttrs.ReconnectTime, }).Info("Attaching device") device.VhostUserDeviceAttrs = vAttrs @@ -93,6 +96,24 @@ func (device *VhostUserBlkDevice) Attach(ctx context.Context, devReceiver api.De return nil } +func vhostUserReconnect(customOptions map[string]string) uint32 { + var vhostUserReconnectTimeout uint32 + + if customOptions == nil { + vhostUserReconnectTimeout = config.DefaultVhostUserReconnectTimeOut + } else { + reconnectTimeoutStr := customOptions[config.VhostUserReconnectTimeOutOpt] + if reconnectTimeout, err := strconv.Atoi(reconnectTimeoutStr); err != nil { + vhostUserReconnectTimeout = config.DefaultVhostUserReconnectTimeOut + deviceLogger().WithField("reconnect", reconnectTimeoutStr).WithError(err).Warn("Failed to get reconnect timeout for vhost-user-blk device") + } else { + vhostUserReconnectTimeout = uint32(reconnectTimeout) + } + } + + return vhostUserReconnectTimeout +} + func isVirtioBlkBlockDriver(customOptions map[string]string) bool { var blockDriverOption string diff --git a/src/runtime/pkg/device/manager/manager.go b/src/runtime/pkg/device/manager/manager.go index eed9e39f1..34a51d300 100644 --- a/src/runtime/pkg/device/manager/manager.go +++ b/src/runtime/pkg/device/manager/manager.go @@ -10,6 +10,7 @@ import ( "context" "encoding/hex" "errors" + "fmt" "sync" "github.com/sirupsen/logrus" @@ -42,6 +43,8 @@ type deviceManager struct { sync.RWMutex vhostUserStoreEnabled bool + + vhostUserReconnectTimeout uint32 } func deviceLogger() *logrus.Entry { @@ -49,11 +52,12 @@ func deviceLogger() *logrus.Entry { } // NewDeviceManager creates a deviceManager object behaved as api.DeviceManager -func NewDeviceManager(blockDriver string, vhostUserStoreEnabled bool, vhostUserStorePath string, devices []api.Device) api.DeviceManager { +func NewDeviceManager(blockDriver string, vhostUserStoreEnabled bool, vhostUserStorePath string, vhostUserReconnect uint32, devices []api.Device) api.DeviceManager { dm := &deviceManager{ - vhostUserStoreEnabled: vhostUserStoreEnabled, - vhostUserStorePath: vhostUserStorePath, - devices: make(map[string]api.Device), + vhostUserStoreEnabled: vhostUserStoreEnabled, + vhostUserStorePath: vhostUserStorePath, + vhostUserReconnectTimeout: vhostUserReconnect, + devices: make(map[string]api.Device), } if blockDriver == config.VirtioMmio { dm.blockDriver = config.VirtioMmio @@ -119,6 +123,7 @@ func (dm *deviceManager) createDevice(devInfo config.DeviceInfo) (dev api.Device devInfo.DriverOptions = make(map[string]string) } devInfo.DriverOptions[config.BlockDriverOpt] = dm.blockDriver + devInfo.DriverOptions[config.VhostUserReconnectTimeOutOpt] = fmt.Sprintf("%d", dm.vhostUserReconnectTimeout) return drivers.NewVhostUserBlkDevice(&devInfo), nil } else if isBlock(devInfo) { if devInfo.DriverOptions == nil { diff --git a/src/runtime/pkg/device/manager/manager_test.go b/src/runtime/pkg/device/manager/manager_test.go index 1070e6b8e..49e339f60 100644 --- a/src/runtime/pkg/device/manager/manager_test.go +++ b/src/runtime/pkg/device/manager/manager_test.go @@ -208,7 +208,7 @@ func TestAttachBlockDevice(t *testing.T) { } func TestAttachDetachDevice(t *testing.T) { - dm := NewDeviceManager(config.VirtioSCSI, false, "", nil) + dm := NewDeviceManager(config.VirtioSCSI, false, "", 0, nil) path := "/dev/hda" deviceInfo := config.DeviceInfo{ diff --git a/src/runtime/pkg/govmm/qemu/qemu.go b/src/runtime/pkg/govmm/qemu/qemu.go index 3f57855c7..5ff258aed 100644 --- a/src/runtime/pkg/govmm/qemu/qemu.go +++ b/src/runtime/pkg/govmm/qemu/qemu.go @@ -2668,8 +2668,9 @@ type Config struct { qemuParams []string } -// appendFDs append a list of file descriptors to the qemu configuration and -// returns a slice of offset file descriptors that will be seen by the qemu process. +// appendFDs appends a list of arbitrary file descriptors to the qemu configuration and +// returns a slice of consecutive file descriptors that will be seen by the qemu process. +// Please see the comment below for details. func (config *Config) appendFDs(fds []*os.File) []int { var fdInts []int @@ -2681,6 +2682,10 @@ func (config *Config) appendFDs(fds []*os.File) []int { // ExtraFiles specifies additional open files to be inherited by the // new process. It does not include standard input, standard output, or // standard error. If non-nil, entry i becomes file descriptor 3+i. + // This means that arbitrary file descriptors fd0, fd1... fdN passed in + // the array will be presented to the guest as consecutive descriptors + // 3, 4... N+3. The golang library internally relies on dup2() to do + // the renumbering. for i := range fds { fdInts = append(fdInts, oldLen+3+i) } diff --git a/src/runtime/pkg/govmm/qemu/qmp.go b/src/runtime/pkg/govmm/qemu/qmp.go index a5fd2f6f6..3d89fbe66 100644 --- a/src/runtime/pkg/govmm/qemu/qmp.go +++ b/src/runtime/pkg/govmm/qemu/qmp.go @@ -1540,7 +1540,7 @@ func (q *QMP) ExecuteGetFD(ctx context.Context, fdname string, fd *os.File) erro // ExecuteCharDevUnixSocketAdd adds a character device using as backend a unix socket, // id is an identifier for the device, path specifies the local path of the unix socket, // wait is to block waiting for a client to connect, server specifies that the socket is a listening socket. -func (q *QMP) ExecuteCharDevUnixSocketAdd(ctx context.Context, id, path string, wait, server bool) error { +func (q *QMP) ExecuteCharDevUnixSocketAdd(ctx context.Context, id, path string, wait, server bool, reconnect uint32) error { data := map[string]interface{}{ "server": server, "addr": map[string]interface{}{ @@ -1556,6 +1556,10 @@ func (q *QMP) ExecuteCharDevUnixSocketAdd(ctx context.Context, id, path string, data["wait"] = wait } + if reconnect > 0 { + data["reconnect"] = reconnect + } + args := map[string]interface{}{ "id": id, "backend": map[string]interface{}{ diff --git a/src/runtime/pkg/govmm/qemu/qmp_test.go b/src/runtime/pkg/govmm/qemu/qmp_test.go index c6bee11e7..17492f6fd 100644 --- a/src/runtime/pkg/govmm/qemu/qmp_test.go +++ b/src/runtime/pkg/govmm/qemu/qmp_test.go @@ -1445,7 +1445,7 @@ func TestExecuteCharDevUnixSocketAdd(t *testing.T) { cfg := QMPConfig{Logger: qmpTestLogger{}} q := startQMPLoop(buf, cfg, connectedCh, disconnectedCh) checkVersion(t, connectedCh) - err := q.ExecuteCharDevUnixSocketAdd(context.Background(), "foo", "foo.sock", false, true) + err := q.ExecuteCharDevUnixSocketAdd(context.Background(), "foo", "foo.sock", false, true, 1) if err != nil { t.Fatalf("Unexpected error %v", err) } diff --git a/src/runtime/pkg/katatestutils/utils.go b/src/runtime/pkg/katatestutils/utils.go index 2aa0754ca..4e3a784a2 100644 --- a/src/runtime/pkg/katatestutils/utils.go +++ b/src/runtime/pkg/katatestutils/utils.go @@ -211,9 +211,9 @@ type RuntimeConfigOptions struct { DefaultGuestHookPath string KernelPath string ImagePath string + RootfsType string KernelParams string MachineType string - ShimPath string LogPath string BlockDeviceDriver string BlockDeviceAIO string @@ -236,7 +236,6 @@ type RuntimeConfigOptions struct { HypervisorDebug bool RuntimeDebug bool RuntimeTrace bool - ShimDebug bool AgentDebug bool AgentTrace bool EnablePprof bool @@ -309,6 +308,7 @@ func MakeRuntimeConfigFileData(config RuntimeConfigOptions) string { block_device_aio = "` + config.BlockDeviceAIO + `" kernel_params = "` + config.KernelParams + `" image = "` + config.ImagePath + `" + rootfs_type = "` + config.RootfsType + `" machine_type = "` + config.MachineType + `" default_vcpus = ` + strconv.FormatUint(uint64(config.DefaultVCPUCount), 10) + ` default_maxvcpus = ` + strconv.FormatUint(uint64(config.DefaultMaxVCPUCount), 10) + ` @@ -323,10 +323,6 @@ func MakeRuntimeConfigFileData(config RuntimeConfigOptions) string { shared_fs = "` + config.SharedFS + `" virtio_fs_daemon = "` + config.VirtioFSDaemon + `" - [shim.kata] - path = "` + config.ShimPath + `" - enable_debug = ` + strconv.FormatBool(config.ShimDebug) + ` - [agent.kata] enable_debug = ` + strconv.FormatBool(config.AgentDebug) + ` enable_tracing = ` + strconv.FormatBool(config.AgentTrace) + ` diff --git a/src/runtime/pkg/katautils/config-settings.go.in b/src/runtime/pkg/katautils/config-settings.go.in index 47c371ac5..9dd138778 100644 --- a/src/runtime/pkg/katautils/config-settings.go.in +++ b/src/runtime/pkg/katautils/config-settings.go.in @@ -44,6 +44,7 @@ var defaultJailerPath = "/usr/bin/jailer" var defaultImagePath = "/usr/share/kata-containers/kata-containers.img" var defaultKernelPath = "/usr/share/kata-containers/vmlinuz.container" var defaultInitrdPath = "/usr/share/kata-containers/kata-containers-initrd.img" +var defaultRootfsType = "ext4" var defaultFirmwarePath = "" var defaultFirmwareVolumePath = "" var defaultMachineAccelerators = "" @@ -80,9 +81,10 @@ const defaultHotplugVFIOOnRootBus bool = false const defaultPCIeRootPort = 0 const defaultEntropySource = "/dev/urandom" const defaultGuestHookPath string = "" -const defaultVirtioFSCacheMode = "none" +const defaultVirtioFSCacheMode = "never" const defaultDisableImageNvdimm = false const defaultVhostUserStorePath string = "/var/run/kata-containers/vhost-user/" +const defaultVhostUserDeviceReconnect = 0 const defaultRxRateLimiterMaxRate = uint64(0) const defaultTxRateLimiterMaxRate = uint64(0) const defaultConfidentialGuest = false diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index 139311ff6..51bf23a29 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -84,6 +84,7 @@ type hypervisor struct { CtlPath string `toml:"ctlpath"` Initrd string `toml:"initrd"` Image string `toml:"image"` + RootfsType string `toml:"rootfs_type"` Firmware string `toml:"firmware"` FirmwareVolume string `toml:"firmware_volume"` MachineAccelerators string `toml:"machine_accelerators"` @@ -146,6 +147,7 @@ type hypervisor struct { BlockDeviceCacheDirect bool `toml:"block_device_cache_direct"` BlockDeviceCacheNoflush bool `toml:"block_device_cache_noflush"` EnableVhostUserStore bool `toml:"enable_vhost_user_store"` + VhostUserDeviceReconnect uint32 `toml:"vhost_user_reconnect_timeout_sec"` DisableBlockDeviceUse bool `toml:"disable_block_device_use"` MemPrealloc bool `toml:"enable_mem_prealloc"` HugePages bool `toml:"enable_hugepages"` @@ -272,6 +274,16 @@ func (h hypervisor) image() (string, error) { return ResolvePath(p) } +func (h hypervisor) rootfsType() (string, error) { + p := h.RootfsType + + if p == "" { + p = "ext4" + } + + return p, nil +} + func (h hypervisor) firmware() (string, error) { p := h.Firmware @@ -666,6 +678,11 @@ func newFirecrackerHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { return vc.HypervisorConfig{}, err } + rootfsType, err := h.rootfsType() + if err != nil { + return vc.HypervisorConfig{}, err + } + firmware, err := h.firmware() if err != nil { return vc.HypervisorConfig{}, err @@ -689,6 +706,7 @@ func newFirecrackerHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { KernelPath: kernel, InitrdPath: initrd, ImagePath: image, + RootfsType: rootfsType, FirmwarePath: firmware, KernelParams: vc.DeserializeParams(strings.Fields(kernelParams)), NumVCPUs: h.defaultVCPUs(), @@ -736,6 +754,11 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { return vc.HypervisorConfig{}, err } + rootfsType, err := h.rootfsType() + if err != nil { + return vc.HypervisorConfig{}, err + } + pflashes, err := h.PFlash() if err != nil { return vc.HypervisorConfig{}, err @@ -866,6 +889,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { SNPGuestPolicy: h.getSnpGuestPolicy(), SEVCertChainPath: h.SEVCertChainPath, DisableGuestSeLinux: h.DisableGuestSeLinux, + RootfsType: rootfsType, }, nil } @@ -895,6 +919,11 @@ func newAcrnHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { errors.New("image must be defined in the configuration file") } + rootfsType, err := h.rootfsType() + if err != nil { + return vc.HypervisorConfig{}, err + } + firmware, err := h.firmware() if err != nil { return vc.HypervisorConfig{}, err @@ -912,6 +941,7 @@ func newAcrnHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { HypervisorPathList: h.HypervisorPathList, KernelPath: kernel, ImagePath: image, + RootfsType: rootfsType, HypervisorCtlPath: hypervisorctl, HypervisorCtlPathList: h.CtlPathList, FirmwarePath: firmware, @@ -962,6 +992,11 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { errors.New("image or initrd must be defined in the configuration file") } + rootfsType, err := h.rootfsType() + if err != nil { + return vc.HypervisorConfig{}, err + } + firmware, err := h.firmware() if err != nil { return vc.HypervisorConfig{}, err @@ -996,6 +1031,7 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { KernelPath: kernel, InitrdPath: initrd, ImagePath: image, + RootfsType: rootfsType, FirmwarePath: firmware, MachineAccelerators: machineAccelerators, KernelParams: vc.DeserializeParams(strings.Fields(kernelParams)), @@ -1055,15 +1091,23 @@ func newDragonballHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { if err != nil { return vc.HypervisorConfig{}, err } + image, err := h.image() if err != nil { return vc.HypervisorConfig{}, err } + + rootfsType, err := h.rootfsType() + if err != nil { + return vc.HypervisorConfig{}, err + } + kernelParams := h.kernelParams() return vc.HypervisorConfig{ KernelPath: kernel, ImagePath: image, + RootfsType: rootfsType, KernelParams: vc.DeserializeParams(strings.Fields(kernelParams)), NumVCPUs: h.defaultVCPUs(), DefaultMaxVCPUs: h.defaultMaxVCPUs(), @@ -1287,6 +1331,8 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig { SEVGuestPolicy: defaultSEVGuestPolicy, SNPGuestPolicy: defaultSNPGuestPolicy, SEVCertChainPath: defaultSEVCertChainPath, + VhostUserDeviceReconnect: defaultVhostUserDeviceReconnect, + RootfsType: defaultRootfsType, } } diff --git a/src/runtime/pkg/katautils/config_test.go b/src/runtime/pkg/katautils/config_test.go index 6610c6589..683b76ddc 100644 --- a/src/runtime/pkg/katautils/config_test.go +++ b/src/runtime/pkg/katautils/config_test.go @@ -74,6 +74,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf kernelPath := path.Join(dir, "kernel") kernelParams := "foo=bar xyz" imagePath := path.Join(dir, "image") + rootfsType := "ext4" logDir := path.Join(dir, "logs") logPath := path.Join(logDir, "runtime.log") machineType := "machineType" @@ -94,6 +95,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf HypervisorPath: hypervisorPath, KernelPath: kernelPath, ImagePath: imagePath, + RootfsType: rootfsType, KernelParams: kernelParams, MachineType: machineType, LogPath: logPath, @@ -153,6 +155,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf HypervisorPath: hypervisorPath, KernelPath: kernelPath, ImagePath: imagePath, + RootfsType: rootfsType, KernelParams: vc.DeserializeParams(strings.Fields(kernelParams)), HypervisorMachineType: machineType, NumVCPUs: defaultVCPUCount, @@ -543,6 +546,7 @@ func TestMinimalRuntimeConfig(t *testing.T) { KernelPath: defaultKernelPath, ImagePath: defaultImagePath, InitrdPath: defaultInitrdPath, + RootfsType: defaultRootfsType, HypervisorMachineType: defaultMachineType, NumVCPUs: defaultVCPUCount, DefaultMaxVCPUs: defaultMaxVCPUCount, diff --git a/src/runtime/pkg/oci/utils.go b/src/runtime/pkg/oci/utils.go index 9e73108b2..437995f39 100644 --- a/src/runtime/pkg/oci/utils.go +++ b/src/runtime/pkg/oci/utils.go @@ -481,6 +481,12 @@ func addHypervisorConfigOverrides(ocispec specs.Spec, config *vc.SandboxConfig, return err } + if err := newAnnotationConfiguration(ocispec, vcAnnotations.VhostUserDeviceReconnect).setUint(func(reconnect uint64) { + config.HypervisorConfig.VhostUserDeviceReconnect = uint32(reconnect) + }); err != nil { + return err + } + if value, ok := ocispec.Annotations[vcAnnotations.GuestHookPath]; ok { if value != "" { config.HypervisorConfig.GuestHookPath = value diff --git a/src/runtime/pkg/resourcecontrol/utils.go b/src/runtime/pkg/resourcecontrol/utils.go index 4e1f029e0..449a89e9a 100644 --- a/src/runtime/pkg/resourcecontrol/utils.go +++ b/src/runtime/pkg/resourcecontrol/utils.go @@ -19,9 +19,6 @@ var ( ErrCgroupMode = errors.New("cgroup controller type error") ) -// DefaultResourceControllerID runtime-determined location in the cgroups hierarchy. -const DefaultResourceControllerID = "/vc" - func DeviceToCgroupDeviceRule(device string) (*devices.Rule, error) { var st unix.Stat_t deviceRule := devices.Rule{ diff --git a/src/runtime/pkg/resourcecontrol/utils_linux.go b/src/runtime/pkg/resourcecontrol/utils_linux.go index e39e6c046..59ca788f8 100644 --- a/src/runtime/pkg/resourcecontrol/utils_linux.go +++ b/src/runtime/pkg/resourcecontrol/utils_linux.go @@ -18,6 +18,9 @@ import ( "golang.org/x/sys/unix" ) +// DefaultResourceControllerID runtime-determined location in the cgroups hierarchy. +const DefaultResourceControllerID = "/vc" + // ValidCgroupPathV1 returns a valid cgroup path for cgroup v1. // see https://github.com/opencontainers/runtime-spec/blob/master/config-linux.md#cgroups-path func ValidCgroupPathV1(path string, systemdCgroup bool) (string, error) { @@ -140,6 +143,16 @@ func getSliceAndUnit(cgroupPath string) (string, string, error) { return "", "", fmt.Errorf("Path: %s is not valid systemd's cgroups path", cgroupPath) } +func IsCgroupV1() (bool, error) { + if cgroups.Mode() == cgroups.Legacy || cgroups.Mode() == cgroups.Hybrid { + return true, nil + } else if cgroups.Mode() == cgroups.Unified { + return false, nil + } else { + return false, ErrCgroupMode + } +} + func SetThreadAffinity(threadID int, cpuSetSlice []int) error { unixCPUSet := unix.CPUSet{} diff --git a/src/runtime/virtcontainers/acrn.go b/src/runtime/virtcontainers/acrn.go index 6e281a169..ad54a0477 100644 --- a/src/runtime/virtcontainers/acrn.go +++ b/src/runtime/virtcontainers/acrn.go @@ -255,7 +255,11 @@ func (a *Acrn) setup(ctx context.Context, id string, hypervisorConfig *Hyperviso } a.id = id - a.arch = newAcrnArch(a.config) + var err error + a.arch, err = newAcrnArch(a.config) + if err != nil { + return err + } return nil } diff --git a/src/runtime/virtcontainers/acrn_arch_base.go b/src/runtime/virtcontainers/acrn_arch_base.go index 8d8c42242..77fb8e9e5 100644 --- a/src/runtime/virtcontainers/acrn_arch_base.go +++ b/src/runtime/virtcontainers/acrn_arch_base.go @@ -64,7 +64,7 @@ type acrnArch interface { appendBlockDevice(devices []Device, drive config.BlockDrive) []Device // handleImagePath handles the Hypervisor Config image path - handleImagePath(config HypervisorConfig) + handleImagePath(config HypervisorConfig) error } type acrnArchBase struct { @@ -314,7 +314,7 @@ func MaxAcrnVCPUs() uint32 { return uint32(8) } -func newAcrnArch(config HypervisorConfig) acrnArch { +func newAcrnArch(config HypervisorConfig) (acrnArch, error) { a := &acrnArchBase{ path: acrnPath, ctlpath: acrnctlPath, @@ -323,8 +323,11 @@ func newAcrnArch(config HypervisorConfig) acrnArch { kernelParams: acrnKernelParams, } - a.handleImagePath(config) - return a + if err := a.handleImagePath(config); err != nil { + return nil, err + } + + return a, nil } func (a *acrnArchBase) acrnPath() (string, error) { @@ -788,10 +791,11 @@ func (a *acrnArchBase) appendBlockDevice(devices []Device, drive config.BlockDri return devices } -func (a *acrnArchBase) handleImagePath(config HypervisorConfig) { +func (a *acrnArchBase) handleImagePath(config HypervisorConfig) error { if config.ImagePath != "" { a.kernelParams = append(a.kernelParams, acrnKernelRootParams...) a.kernelParamsNonDebug = append(a.kernelParamsNonDebug, acrnKernelParamsSystemdNonDebug...) a.kernelParamsDebug = append(a.kernelParamsDebug, acrnKernelParamsSystemdDebug...) } + return nil } diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 966920334..b0860fce0 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -80,7 +80,6 @@ const ( clhAPISocket = "clh-api.sock" virtioFsSocket = "virtiofsd.sock" defaultClhPath = "/usr/local/bin/cloud-hypervisor" - virtioFsCacheAlways = "always" ) // Interface that hides the implementation of openAPI client @@ -504,10 +503,9 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net // Set initial amount of cpu's for the virtual machine clh.vmconfig.Cpus = chclient.NewCpusConfig(int32(clh.config.NumVCPUs), int32(clh.config.DefaultMaxVCPUs)) - // First take the default parameters defined by this driver - params := commonNvdimmKernelRootParams - if clh.config.ConfidentialGuest { - params = commonVirtioblkKernelRootParams + params, err := GetKernelRootParams(hypervisorConfig.RootfsType, clh.config.ConfidentialGuest, false) + if err != nil { + return err } params = append(params, clhKernelParams...) @@ -649,12 +647,9 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net // StartVM will start the VMM and boot the virtual machine for the given sandbox. func (clh *cloudHypervisor) StartVM(ctx context.Context, timeout int) error { - span, _ := katatrace.Trace(ctx, clh.Logger(), "StartVM", clhTracingTags, map[string]string{"sandbox_id": clh.id}) + span, ctx := katatrace.Trace(ctx, clh.Logger(), "StartVM", clhTracingTags, map[string]string{"sandbox_id": clh.id}) defer span.End() - ctx, cancel := context.WithTimeout(context.Background(), clh.getClhAPITimeout()*time.Second) - defer cancel() - clh.Logger().WithField("function", "StartVM").Info("starting Sandbox") vmPath := filepath.Join(clh.config.VMStorePath, clh.id) @@ -693,6 +688,9 @@ func (clh *cloudHypervisor) StartVM(ctx context.Context, timeout int) error { } clh.state.PID = pid + ctx, cancel := context.WithTimeout(ctx, clh.getClhAPITimeout()*time.Second) + defer cancel() + if err := clh.bootVM(ctx); err != nil { return err } diff --git a/src/runtime/virtcontainers/clh_test.go b/src/runtime/virtcontainers/clh_test.go index 6865f4b1f..20e6935d8 100644 --- a/src/runtime/virtcontainers/clh_test.go +++ b/src/runtime/virtcontainers/clh_test.go @@ -53,6 +53,7 @@ func newClhConfig() (HypervisorConfig, error) { return HypervisorConfig{ KernelPath: testClhKernelPath, ImagePath: testClhImagePath, + RootfsType: string(EXT4), HypervisorPath: testClhPath, NumVCPUs: defaultVCPUs, BlockDeviceDriver: config.VirtioBlock, @@ -60,7 +61,7 @@ func newClhConfig() (HypervisorConfig, error) { DefaultBridges: defaultBridges, DefaultMaxVCPUs: uint32(64), SharedFS: config.VirtioFS, - VirtioFSCache: virtioFsCacheAlways, + VirtioFSCache: typeVirtioFSCacheModeAlways, VirtioFSDaemon: testVirtiofsdPath, NetRateLimiterBwMaxRate: int64(0), NetRateLimiterBwOneTimeBurst: int64(0), diff --git a/src/runtime/virtcontainers/container_linux_test.go b/src/runtime/virtcontainers/container_linux_test.go index 4e3564814..581461e61 100644 --- a/src/runtime/virtcontainers/container_linux_test.go +++ b/src/runtime/virtcontainers/container_linux_test.go @@ -216,7 +216,7 @@ func TestContainerAddDriveDir(t *testing.T) { sandbox := &Sandbox{ ctx: context.Background(), id: testSandboxID, - devManager: manager.NewDeviceManager(config.VirtioSCSI, false, "", nil), + devManager: manager.NewDeviceManager(config.VirtioSCSI, false, "", 0, nil), hypervisor: &mockHypervisor{}, agent: &mockAgent{}, config: &SandboxConfig{ diff --git a/src/runtime/virtcontainers/container_test.go b/src/runtime/virtcontainers/container_test.go index 0115fb011..ac2ce540c 100644 --- a/src/runtime/virtcontainers/container_test.go +++ b/src/runtime/virtcontainers/container_test.go @@ -82,7 +82,7 @@ func TestContainerRemoveDrive(t *testing.T) { sandbox := &Sandbox{ ctx: context.Background(), id: "sandbox", - devManager: manager.NewDeviceManager(config.VirtioSCSI, false, "", nil), + devManager: manager.NewDeviceManager(config.VirtioSCSI, false, "", 0, nil), config: &SandboxConfig{}, } diff --git a/src/runtime/virtcontainers/fc.go b/src/runtime/virtcontainers/fc.go index 92bad7f3a..1646f0623 100644 --- a/src/runtime/virtcontainers/fc.go +++ b/src/runtime/virtcontainers/fc.go @@ -89,7 +89,7 @@ const ( // Specify the minimum version of firecracker supported var fcMinSupportedVersion = semver.MustParse("0.21.1") -var fcKernelParams = append(commonVirtioblkKernelRootParams, []Param{ +var fcKernelParams = []Param{ // The boot source is the first partition of the first block device added {"pci", "off"}, {"reboot", "k"}, @@ -101,7 +101,7 @@ var fcKernelParams = append(commonVirtioblkKernelRootParams, []Param{ // Firecracker doesn't support ACPI // Fix kernel error "ACPI BIOS Error (bug)" {"acpi", "off"}, -}...) +} func (s vmmState) String() string { switch s { @@ -700,6 +700,11 @@ func (fc *firecracker) fcInitConfiguration(ctx context.Context) error { return err } + params, err := GetKernelRootParams(fc.config.RootfsType, true, false) + if err != nil { + return err + } + fcKernelParams = append(params, fcKernelParams...) if fc.config.Debug { fcKernelParams = append(fcKernelParams, Param{"console", "ttyS0"}) } else { diff --git a/src/runtime/virtcontainers/fs_share_linux.go b/src/runtime/virtcontainers/fs_share_linux.go index 322a26a51..5600e8e9c 100644 --- a/src/runtime/virtcontainers/fs_share_linux.go +++ b/src/runtime/virtcontainers/fs_share_linux.go @@ -43,16 +43,16 @@ func NewFilesystemShare(s *Sandbox) (FilesystemSharer, error) { }, nil } -// Logger returns a logrus logger appropriate for logging Filesystem sharing messages +// Logger returns a logrus logger appropriate for logging filesystem sharing messages func (f *FilesystemShare) Logger() *logrus.Entry { return virtLog.WithFields(logrus.Fields{ - "subsystem": "filesystem share", + "subsystem": "fs_share", "sandbox": f.sandbox.ID(), }) } func (f *FilesystemShare) prepareBindMounts(ctx context.Context) error { - span, ctx := katatrace.Trace(ctx, f.Logger(), "setupBindMounts", fsShareTracingTags) + span, ctx := katatrace.Trace(ctx, f.Logger(), "prepareBindMounts", fsShareTracingTags) defer span.End() var err error diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index d44ba11c6..d0ecb1259 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -94,25 +94,74 @@ var ( // cores. var defaultMaxVCPUs = govmm.MaxVCPUs() -// agnostic list of kernel root parameters for NVDIMM -var commonNvdimmKernelRootParams = []Param{ //nolint: unused, deadcode, varcheck - {"root", "/dev/pmem0p1"}, - {"rootflags", "dax,data=ordered,errors=remount-ro ro"}, - {"rootfstype", "ext4"}, -} +// RootfsDriver describes a rootfs driver. +type RootfsDriver string -// agnostic list of kernel root parameters for NVDIMM -var commonNvdimmNoDAXKernelRootParams = []Param{ //nolint: unused, deadcode, varcheck - {"root", "/dev/pmem0p1"}, - {"rootflags", "data=ordered,errors=remount-ro ro"}, - {"rootfstype", "ext4"}, -} +const ( + // VirtioBlk is the Virtio-Blk rootfs driver. + VirtioBlk RootfsDriver = "/dev/vda1" -// agnostic list of kernel root parameters for virtio-blk -var commonVirtioblkKernelRootParams = []Param{ //nolint: unused, deadcode, varcheck - {"root", "/dev/vda1"}, - {"rootflags", "data=ordered,errors=remount-ro ro"}, - {"rootfstype", "ext4"}, + // Nvdimm is the Nvdimm rootfs driver. + Nvdimm RootfsType = "/dev/pmem0p1" +) + +// RootfsType describes a rootfs type. +type RootfsType string + +const ( + // EXT4 is the ext4 filesystem. + EXT4 RootfsType = "ext4" + + // XFS is the xfs filesystem. + XFS RootfsType = "xfs" + + // EROFS is the erofs filesystem. + EROFS RootfsType = "erofs" +) + +func GetKernelRootParams(rootfstype string, disableNvdimm bool, dax bool) ([]Param, error) { + var kernelRootParams []Param + + // EXT4 filesystem is used by default. + if rootfstype == "" { + rootfstype = string(EXT4) + } + + if disableNvdimm && dax { + return []Param{}, fmt.Errorf("Virtio-Blk does not support DAX") + } + + if disableNvdimm { + // Virtio-Blk + kernelRootParams = append(kernelRootParams, Param{"root", string(VirtioBlk)}) + } else { + // Nvdimm + kernelRootParams = append(kernelRootParams, Param{"root", string(Nvdimm)}) + } + + switch RootfsType(rootfstype) { + case EROFS: + if dax { + kernelRootParams = append(kernelRootParams, Param{"rootflags", "dax ro"}) + } else { + kernelRootParams = append(kernelRootParams, Param{"rootflags", "ro"}) + } + case XFS: + fallthrough + // EXT4 filesystem is used by default. + case EXT4: + if dax { + kernelRootParams = append(kernelRootParams, Param{"rootflags", "dax,data=ordered,errors=remount-ro ro"}) + } else { + kernelRootParams = append(kernelRootParams, Param{"rootflags", "data=ordered,errors=remount-ro ro"}) + } + default: + return []Param{}, fmt.Errorf("unsupported rootfs type") + } + + kernelRootParams = append(kernelRootParams, Param{"rootfstype", rootfstype}) + + return kernelRootParams, nil } // DeviceType describes a virtualized device type. @@ -261,6 +310,7 @@ type Param struct { } // HypervisorConfig is the hypervisor configuration. +// nolint: govet type HypervisorConfig struct { customAssets map[types.AssetType]*types.Asset SeccompSandbox string @@ -316,6 +366,8 @@ type HypervisorConfig struct { HypervisorParams []Param DiskRateLimiterBwOneTimeBurst int64 DiskRateLimiterOpsMaxRate int64 + RootfsType string + VhostUserDeviceReconnect uint32 DiskRateLimiterOpsOneTimeBurst int64 SGXEPCSize int64 DefaultMaxMemorySize uint64 diff --git a/src/runtime/virtcontainers/hypervisor_test.go b/src/runtime/virtcontainers/hypervisor_test.go index e540aea48..d67a38698 100644 --- a/src/runtime/virtcontainers/hypervisor_test.go +++ b/src/runtime/virtcontainers/hypervisor_test.go @@ -7,13 +7,167 @@ package virtcontainers import ( "fmt" - "os" - "testing" - "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" "github.com/stretchr/testify/assert" + "os" + "testing" ) +func TestGetKernelRootParams(t *testing.T) { + assert := assert.New(t) + tests := []struct { + rootfstype string + expected []Param + disableNvdimm bool + dax bool + error bool + }{ + // EXT4 + { + rootfstype: string(EXT4), + expected: []Param{ + {"root", string(Nvdimm)}, + {"rootflags", "data=ordered,errors=remount-ro ro"}, + {"rootfstype", string(EXT4)}, + }, + disableNvdimm: false, + dax: false, + error: false, + }, + { + rootfstype: string(EXT4), + expected: []Param{ + {"root", string(Nvdimm)}, + {"rootflags", "dax,data=ordered,errors=remount-ro ro"}, + {"rootfstype", string(EXT4)}, + }, + disableNvdimm: false, + dax: true, + error: false, + }, + { + rootfstype: string(EXT4), + expected: []Param{ + {"root", string(VirtioBlk)}, + {"rootflags", "data=ordered,errors=remount-ro ro"}, + {"rootfstype", string(EXT4)}, + }, + disableNvdimm: true, + dax: false, + error: false, + }, + + // XFS + { + rootfstype: string(XFS), + expected: []Param{ + {"root", string(Nvdimm)}, + {"rootflags", "data=ordered,errors=remount-ro ro"}, + {"rootfstype", string(XFS)}, + }, + disableNvdimm: false, + dax: false, + error: false, + }, + { + rootfstype: string(XFS), + expected: []Param{ + {"root", string(Nvdimm)}, + {"rootflags", "dax,data=ordered,errors=remount-ro ro"}, + {"rootfstype", string(XFS)}, + }, + disableNvdimm: false, + dax: true, + error: false, + }, + { + rootfstype: string(XFS), + expected: []Param{ + {"root", string(VirtioBlk)}, + {"rootflags", "data=ordered,errors=remount-ro ro"}, + {"rootfstype", string(XFS)}, + }, + disableNvdimm: true, + dax: false, + error: false, + }, + + // EROFS + { + rootfstype: string(EROFS), + expected: []Param{ + {"root", string(Nvdimm)}, + {"rootflags", "ro"}, + {"rootfstype", string(EROFS)}, + }, + disableNvdimm: false, + dax: false, + error: false, + }, + { + rootfstype: string(EROFS), + expected: []Param{ + {"root", string(Nvdimm)}, + {"rootflags", "dax ro"}, + {"rootfstype", string(EROFS)}, + }, + disableNvdimm: false, + dax: true, + error: false, + }, + { + rootfstype: string(EROFS), + expected: []Param{ + {"root", string(VirtioBlk)}, + {"rootflags", "ro"}, + {"rootfstype", string(EROFS)}, + }, + disableNvdimm: true, + dax: false, + error: false, + }, + + // Unsupported rootfs type + { + rootfstype: "foo", + expected: []Param{ + {"root", string(VirtioBlk)}, + {"rootflags", "data=ordered,errors=remount-ro ro"}, + {"rootfstype", string(EXT4)}, + }, + disableNvdimm: false, + dax: false, + error: true, + }, + + // Nvdimm does not support DAX + { + rootfstype: string(EXT4), + expected: []Param{ + {"root", string(VirtioBlk)}, + {"rootflags", "dax,data=ordered,errors=remount-ro ro"}, + {"rootfstype", string(EXT4)}, + }, + disableNvdimm: true, + dax: true, + error: true, + }, + } + + for _, t := range tests { + kernelRootParams, err := GetKernelRootParams(t.rootfstype, t.disableNvdimm, t.dax) + if t.error { + assert.Error(err) + continue + } else { + assert.NoError(err) + } + assert.Equal(t.expected, kernelRootParams, + "Invalid parameters rootfstype: %v, disableNvdimm: %v, dax: %v, "+ + "unable to get kernel root params", t.rootfstype, t.disableNvdimm, t.dax) + } +} + func testSetHypervisorType(t *testing.T, value string, expected HypervisorType) { var hypervisorType HypervisorType assert := assert.New(t) diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index 4913aeec9..3925ceaa2 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -93,7 +93,6 @@ var ( type9pFs = "9p" typeVirtioFS = "virtiofs" typeOverlayFS = "overlay" - typeVirtioFSNoCache = "never" kata9pDevType = "9p" kataMmioBlkDevType = "mmioblk" kataBlkDevType = "blk" @@ -816,7 +815,7 @@ func setupStorages(ctx context.Context, sandbox *Sandbox) []*grpc.Storage { // directly map contents from the host. When set to 'never', the mount // options should not contain 'dax' lest the virtio-fs daemon crashing // with an invalid address reference. - if sandbox.config.HypervisorConfig.VirtioFSCache != typeVirtioFSNoCache { + if sandbox.config.HypervisorConfig.VirtioFSCache != typeVirtioFSCacheModeNever { // If virtio_fs_cache_size = 0, dax should not be used. if sandbox.config.HypervisorConfig.VirtioFSCacheSize != 0 { sharedDirVirtioFSOptions = append(sharedDirVirtioFSOptions, sharedDirVirtioFSDaxOptions) diff --git a/src/runtime/virtcontainers/kata_agent_test.go b/src/runtime/virtcontainers/kata_agent_test.go index 3b95b0779..b2564e5a8 100644 --- a/src/runtime/virtcontainers/kata_agent_test.go +++ b/src/runtime/virtcontainers/kata_agent_test.go @@ -408,7 +408,7 @@ func TestHandleBlockVolume(t *testing.T) { mounts = append(mounts, vMount, bMount, dMount) tmpDir := "/vhost/user/dir" - dm := manager.NewDeviceManager(config.VirtioBlock, true, tmpDir, devices) + dm := manager.NewDeviceManager(config.VirtioBlock, true, tmpDir, 0, devices) sConfig := SandboxConfig{} sConfig.HypervisorConfig.BlockDeviceDriver = config.VirtioBlock @@ -466,7 +466,7 @@ func TestAppendDevicesEmptyContainerDeviceList(t *testing.T) { c := &Container{ sandbox: &Sandbox{ - devManager: manager.NewDeviceManager("virtio-scsi", false, "", nil), + devManager: manager.NewDeviceManager("virtio-scsi", false, "", 0, nil), }, devices: ctrDevices, } @@ -499,7 +499,7 @@ func TestAppendDevices(t *testing.T) { c := &Container{ sandbox: &Sandbox{ - devManager: manager.NewDeviceManager("virtio-blk", false, "", ctrDevices), + devManager: manager.NewDeviceManager("virtio-blk", false, "", 0, ctrDevices), config: sandboxConfig, }, } @@ -547,7 +547,7 @@ func TestAppendVhostUserBlkDevices(t *testing.T) { testVhostUserStorePath := "/test/vhost/user/store/path" c := &Container{ sandbox: &Sandbox{ - devManager: manager.NewDeviceManager("virtio-blk", true, testVhostUserStorePath, ctrDevices), + devManager: manager.NewDeviceManager("virtio-blk", true, testVhostUserStorePath, 0, ctrDevices), config: sandboxConfig, }, } diff --git a/src/runtime/virtcontainers/persist_test.go b/src/runtime/virtcontainers/persist_test.go index 96b947e5b..2e851d69d 100644 --- a/src/runtime/virtcontainers/persist_test.go +++ b/src/runtime/virtcontainers/persist_test.go @@ -32,7 +32,7 @@ func TestSandboxRestore(t *testing.T) { sandbox := Sandbox{ id: "test-exp", containers: container, - devManager: manager.NewDeviceManager(config.VirtioSCSI, false, "", nil), + devManager: manager.NewDeviceManager(config.VirtioSCSI, false, "", 0, nil), hypervisor: &mockHypervisor{}, network: network, ctx: context.Background(), diff --git a/src/runtime/virtcontainers/pkg/annotations/annotations.go b/src/runtime/virtcontainers/pkg/annotations/annotations.go index a94878f93..3584ccd70 100644 --- a/src/runtime/virtcontainers/pkg/annotations/annotations.go +++ b/src/runtime/virtcontainers/pkg/annotations/annotations.go @@ -87,7 +87,7 @@ const ( AssetHashType = kataAnnotationsPrefix + "asset_hash_type" // - // Generic annotations + // Generic annotations // // KernelParams is a sandbox annotation for passing additional guest kernel parameters. @@ -112,6 +112,10 @@ const ( // related folders, sockets and device nodes should be. VhostUserStorePath = kataAnnotHypervisorPrefix + "vhost_user_store_path" + // VhostUserDeviceReconnect is a sandbox annotation to specify the timeout for reconnecting on + // non-server sockets when the remote end goes away. + VhostUserDeviceReconnect = kataAnnotHypervisorPrefix + "vhost_user_reconnect_timeout_sec" + // GuestHookPath is a sandbox annotation to specify the path within the VM that will be used for 'drop-in' hooks. GuestHookPath = kataAnnotHypervisorPrefix + "guest_hook_path" @@ -134,7 +138,7 @@ const ( UseLegacySerial = kataAnnotHypervisorPrefix + "use_legacy_serial" // - // CPU Annotations + // CPU Annotations // // DefaultVCPUs is a sandbox annotation for passing the default vcpus assigned for a VM by the hypervisor. @@ -144,7 +148,7 @@ const ( DefaultMaxVCPUs = kataAnnotHypervisorPrefix + "default_max_vcpus" // - // Memory related annotations + // Memory related annotations // // DefaultMemory is a sandbox annotation for the memory assigned for a VM by the hypervisor. @@ -175,7 +179,7 @@ const ( FileBackedMemRootDir = kataAnnotHypervisorPrefix + "file_mem_backend" // - // Shared File System related annotations + // Shared File System related annotations // // Msize9p is a sandbox annotation to specify as the msize for 9p shares @@ -197,7 +201,7 @@ const ( VirtioFSExtraArgs = kataAnnotHypervisorPrefix + "virtio_fs_extra_args" // - // Block Device related annotations + // Block Device related annotations // // BlockDeviceDriver specifies the driver to be used for block device either VirtioSCSI or VirtioBlock diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 0ce2b8815..9b4260e86 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -1581,7 +1581,7 @@ func (q *qemu) hotplugAddBlockDevice(ctx context.Context, drive *config.BlockDri } func (q *qemu) hotplugAddVhostUserBlkDevice(ctx context.Context, vAttr *config.VhostUserDeviceAttrs, op Operation, devID string) (err error) { - err = q.qmpMonitorCh.qmp.ExecuteCharDevUnixSocketAdd(q.qmpMonitorCh.ctx, vAttr.DevID, vAttr.SocketPath, false, false) + err = q.qmpMonitorCh.qmp.ExecuteCharDevUnixSocketAdd(q.qmpMonitorCh.ctx, vAttr.DevID, vAttr.SocketPath, false, false, vAttr.ReconnectTime) if err != nil { return err } diff --git a/src/runtime/virtcontainers/qemu_amd64.go b/src/runtime/virtcontainers/qemu_amd64.go index d5918deb6..780c3f711 100644 --- a/src/runtime/virtcontainers/qemu_amd64.go +++ b/src/runtime/virtcontainers/qemu_amd64.go @@ -174,7 +174,9 @@ func newQemuArch(config HypervisorConfig) (qemuArch, error) { q.qemuMachine.Options += "sgx-epc.0.memdev=epc0,sgx-epc.0.node=0" } - q.handleImagePath(config) + if err := q.handleImagePath(config); err != nil { + return nil, err + } return q, nil } @@ -308,6 +310,16 @@ func (q *qemuAmd64) appendProtectionDevice(devices []govmmQemu.Device, firmware, File: firmware, FirmwareVolume: firmwareVolume, }), "", nil + case sevProtection: + return append(devices, + govmmQemu.Object{ + Type: govmmQemu.SEVGuest, + ID: "sev", + Debug: false, + File: firmware, + CBitPos: cpuid.AMDMemEncrypt.CBitPosition, + ReducedPhysBits: 1, + }), "", nil case snpProtection: return append(devices, govmmQemu.Object{ diff --git a/src/runtime/virtcontainers/qemu_amd64_test.go b/src/runtime/virtcontainers/qemu_amd64_test.go index 850118f69..113fad627 100644 --- a/src/runtime/virtcontainers/qemu_amd64_test.go +++ b/src/runtime/virtcontainers/qemu_amd64_test.go @@ -272,6 +272,26 @@ func TestQemuAmd64AppendProtectionDevice(t *testing.T) { assert.Error(err) assert.Empty(bios) + // sev protection + amd64.(*qemuAmd64).protection = sevProtection + + devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "") + assert.NoError(err) + assert.Empty(bios) + + expectedOut := []govmmQemu.Device{ + govmmQemu.Object{ + Type: govmmQemu.SEVGuest, + ID: "sev", + Debug: false, + File: firmware, + CBitPos: cpuid.AMDMemEncrypt.CBitPosition, + ReducedPhysBits: 1, + }, + } + + assert.Equal(expectedOut, devices) + // snp protection amd64.(*qemuAmd64).protection = snpProtection diff --git a/src/runtime/virtcontainers/qemu_arch_base.go b/src/runtime/virtcontainers/qemu_arch_base.go index 8f22fb893..1cc152bd5 100644 --- a/src/runtime/virtcontainers/qemu_arch_base.go +++ b/src/runtime/virtcontainers/qemu_arch_base.go @@ -139,7 +139,7 @@ type qemuArch interface { setPFlash([]string) // handleImagePath handles the Hypervisor Config image path - handleImagePath(config HypervisorConfig) + handleImagePath(config HypervisorConfig) error // supportGuestMemoryHotplug returns if the guest supports memory hotplug supportGuestMemoryHotplug() bool @@ -735,23 +735,27 @@ func (q *qemuArchBase) appendRNGDevice(_ context.Context, devices []govmmQemu.De return devices, nil } -func (q *qemuArchBase) handleImagePath(config HypervisorConfig) { +func (q *qemuArchBase) handleImagePath(config HypervisorConfig) error { if config.ImagePath != "" { - kernelRootParams := commonVirtioblkKernelRootParams + kernelRootParams, err := GetKernelRootParams(config.RootfsType, q.disableNvdimm, false) + if err != nil { + return err + } if !q.disableNvdimm { q.qemuMachine.Options = strings.Join([]string{ q.qemuMachine.Options, qemuNvdimmOption, }, ",") - if q.dax { - kernelRootParams = commonNvdimmKernelRootParams - } else { - kernelRootParams = commonNvdimmNoDAXKernelRootParams + kernelRootParams, err = GetKernelRootParams(config.RootfsType, q.disableNvdimm, q.dax) + if err != nil { + return err } } q.kernelParams = append(q.kernelParams, kernelRootParams...) q.kernelParamsNonDebug = append(q.kernelParamsNonDebug, kernelParamsSystemdNonDebug...) q.kernelParamsDebug = append(q.kernelParamsDebug, kernelParamsSystemdDebug...) } + + return nil } func (q *qemuArchBase) supportGuestMemoryHotplug() bool { diff --git a/src/runtime/virtcontainers/qemu_arm64.go b/src/runtime/virtcontainers/qemu_arm64.go index ccf164139..9e05c5452 100644 --- a/src/runtime/virtcontainers/qemu_arm64.go +++ b/src/runtime/virtcontainers/qemu_arm64.go @@ -65,7 +65,9 @@ func newQemuArch(config HypervisorConfig) (qemuArch, error) { }, } - q.handleImagePath(config) + if err := q.handleImagePath(config); err != nil { + return nil, err + } return q, nil } diff --git a/src/runtime/virtcontainers/qemu_ppc64le.go b/src/runtime/virtcontainers/qemu_ppc64le.go index 7f6de2fcd..2d4010fbc 100644 --- a/src/runtime/virtcontainers/qemu_ppc64le.go +++ b/src/runtime/virtcontainers/qemu_ppc64le.go @@ -88,7 +88,9 @@ func newQemuArch(config HypervisorConfig) (qemuArch, error) { } } - q.handleImagePath(config) + if err := q.handleImagePath(config); err != nil { + return nil, err + } q.memoryOffset = config.MemOffset diff --git a/src/runtime/virtcontainers/qemu_s390x.go b/src/runtime/virtcontainers/qemu_s390x.go index 422db2b87..b0c1ede54 100644 --- a/src/runtime/virtcontainers/qemu_s390x.go +++ b/src/runtime/virtcontainers/qemu_s390x.go @@ -83,7 +83,11 @@ func newQemuArch(config HypervisorConfig) (qemuArch, error) { } if config.ImagePath != "" { - q.kernelParams = append(q.kernelParams, commonVirtioblkKernelRootParams...) + kernelParams, err := GetKernelRootParams(config.RootfsType, true, false) + if err != nil { + return nil, err + } + q.kernelParams = append(q.kernelParams, kernelParams...) q.kernelParamsNonDebug = append(q.kernelParamsNonDebug, kernelParamsSystemdNonDebug...) q.kernelParamsDebug = append(q.kernelParamsDebug, kernelParamsSystemdDebug...) } diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index 70434a725..176714282 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -589,7 +589,7 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor s.devManager = deviceManager.NewDeviceManager(sandboxConfig.HypervisorConfig.BlockDeviceDriver, sandboxConfig.HypervisorConfig.EnableVhostUserStore, - sandboxConfig.HypervisorConfig.VhostUserStorePath, nil) + sandboxConfig.HypervisorConfig.VhostUserStorePath, sandboxConfig.HypervisorConfig.VhostUserDeviceReconnect, nil) // Create the sandbox resource controllers. if err := s.createResourceController(); err != nil { diff --git a/src/runtime/virtcontainers/sandbox_linux_test.go b/src/runtime/virtcontainers/sandbox_linux_test.go index f08ac6d4e..1d29e1505 100644 --- a/src/runtime/virtcontainers/sandbox_linux_test.go +++ b/src/runtime/virtcontainers/sandbox_linux_test.go @@ -30,7 +30,7 @@ func TestSandboxAttachDevicesVhostUserBlk(t *testing.T) { tmpDir := t.TempDir() os.RemoveAll(tmpDir) - dm := manager.NewDeviceManager(config.VirtioSCSI, true, tmpDir, nil) + dm := manager.NewDeviceManager(config.VirtioSCSI, true, tmpDir, 0, nil) vhostUserDevNodePath := filepath.Join(tmpDir, "/block/devices/") vhostUserSockPath := filepath.Join(tmpDir, "/block/sockets/") diff --git a/src/runtime/virtcontainers/sandbox_test.go b/src/runtime/virtcontainers/sandbox_test.go index 37c489427..8b86fa995 100644 --- a/src/runtime/virtcontainers/sandbox_test.go +++ b/src/runtime/virtcontainers/sandbox_test.go @@ -541,7 +541,7 @@ func TestSandboxAttachDevicesVFIO(t *testing.T) { config.SysIOMMUPath = savedIOMMUPath }() - dm := manager.NewDeviceManager(config.VirtioSCSI, false, "", nil) + dm := manager.NewDeviceManager(config.VirtioSCSI, false, "", 0, nil) path := filepath.Join(vfioPath, testFDIOGroup) deviceInfo := config.DeviceInfo{ HostPath: path, @@ -1080,7 +1080,7 @@ func TestAttachBlockDevice(t *testing.T) { DevType: "b", } - dm := manager.NewDeviceManager(config.VirtioBlock, false, "", nil) + dm := manager.NewDeviceManager(config.VirtioBlock, false, "", 0, nil) device, err := dm.NewDevice(deviceInfo) assert.Nil(t, err) _, ok := device.(*drivers.BlockDevice) @@ -1136,7 +1136,7 @@ func TestPreAddDevice(t *testing.T) { HypervisorConfig: hConfig, } - dm := manager.NewDeviceManager(config.VirtioBlock, false, "", nil) + dm := manager.NewDeviceManager(config.VirtioBlock, false, "", 0, nil) // create a sandbox first sandbox := &Sandbox{ id: testSandboxID, diff --git a/src/runtime/virtcontainers/virtiofsd.go b/src/runtime/virtcontainers/virtiofsd.go index 08f7dfd6f..6df62adf9 100644 --- a/src/runtime/virtcontainers/virtiofsd.go +++ b/src/runtime/virtcontainers/virtiofsd.go @@ -37,6 +37,13 @@ var ( errUnimplemented = errors.New("unimplemented") ) +const ( + typeVirtioFSCacheModeNever = "never" + typeVirtioFSCacheModeNone = "none" + typeVirtioFSCacheModeAlways = "always" + typeVirtioFSCacheModeAuto = "auto" +) + type VirtiofsDaemon interface { // Start virtiofs daemon, return pid of virtiofs daemon process Start(context.Context, onQuitFunc) (pid int, err error) @@ -216,11 +223,11 @@ func (v *virtiofsd) valid() error { } if v.cache == "" { - v.cache = "auto" - } else if v.cache == "none" { + v.cache = typeVirtioFSCacheModeAuto + } else if v.cache == typeVirtioFSCacheModeNone { v.Logger().Warn("virtio-fs cache mode `none` is deprecated since Kata Containers 2.5.0 and will be removed in the future release, please use `never` instead. For more details please refer to https://github.com/kata-containers/kata-containers/issues/4234.") - v.cache = "never" - } else if v.cache != "auto" && v.cache != "always" && v.cache != "never" { + v.cache = typeVirtioFSCacheModeNever + } else if v.cache != typeVirtioFSCacheModeAuto && v.cache != typeVirtioFSCacheModeAlways && v.cache != typeVirtioFSCacheModeNever { return errVirtiofsdInvalidVirtiofsCacheMode(v.cache) } diff --git a/src/tools/agent-ctl/Cargo.lock b/src/tools/agent-ctl/Cargo.lock index 634d192b6..deb3385f6 100644 --- a/src/tools/agent-ctl/Cargo.lock +++ b/src/tools/agent-ctl/Cargo.lock @@ -250,14 +250,15 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "cgroups-rs" -version = "0.2.10" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf5525f2cf84d5113ab26bfb6474180eb63224b4b1e4be31ee87be4098f11399" +checksum = "8d5761f3a351b92e0e02a31ca418190bb323edb0d4fce0109b6dba673dc3fdc1" dependencies = [ "libc", "log", - "nix 0.24.2", + "nix 0.25.1", "regex", + "thiserror", ] [[package]] @@ -719,7 +720,6 @@ version = "0.0.1" dependencies = [ "anyhow", "byteorder", - "cgroups-rs", "clap", "hex", "humantime", @@ -893,6 +893,18 @@ dependencies = [ "memoffset", ] +[[package]] +name = "nix" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f346ff70e7dbfd675fe90590b92d59ef2de15a8779ae305ebcbfd3f0caf59be4" +dependencies = [ + "autocfg", + "bitflags", + "cfg-if 1.0.0", + "libc", +] + [[package]] name = "ntapi" version = "0.3.6" diff --git a/src/tools/kata-ctl/Cargo.lock b/src/tools/kata-ctl/Cargo.lock index b10691db0..85d4eb517 100644 --- a/src/tools/kata-ctl/Cargo.lock +++ b/src/tools/kata-ctl/Cargo.lock @@ -618,6 +618,7 @@ dependencies = [ "semver", "serde", "serde_json", + "serial_test", "shim-interface", "strum", "strum_macros", @@ -660,6 +661,16 @@ version = "0.2.135" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68783febc7782c6c5cb401fbda4de5a9898be1762314da0bb2c10ced61f18b0c" +[[package]] +name = "lock_api" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "435011366fe56583b16cf956f9df0095b405b82d76425bc8981c0e22e60ec4df" +dependencies = [ + "autocfg", + "scopeguard", +] + [[package]] name = "log" version = "0.4.17" @@ -853,6 +864,31 @@ version = "6.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ff7415e9ae3fff1225851df9e0d9e4e5479f947619774677a63572e55e80eff" +[[package]] +name = "parking_lot" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" +dependencies = [ + "instant", + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc" +dependencies = [ + "cfg-if", + "instant", + "libc", + "redox_syscall", + "smallvec", + "winapi", +] + [[package]] name = "percent-encoding" version = "2.2.0" @@ -1202,6 +1238,12 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + [[package]] name = "sct" version = "0.7.0" @@ -1284,6 +1326,28 @@ dependencies = [ "serde", ] +[[package]] +name = "serial_test" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0bccbcf40c8938196944a3da0e133e031a33f4d6b72db3bda3cc556e361905d" +dependencies = [ + "lazy_static", + "parking_lot", + "serial_test_derive", +] + +[[package]] +name = "serial_test_derive" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2acd6defeddb41eb60bb468f8825d0cfd0c2a76bc03bfd235b6a1dc4f6a1ad5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "shim-interface" version = "0.1.0" @@ -1344,6 +1408,12 @@ dependencies = [ "slog", ] +[[package]] +name = "smallvec" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" + [[package]] name = "socket2" version = "0.4.7" diff --git a/src/tools/kata-ctl/Cargo.toml b/src/tools/kata-ctl/Cargo.toml index cc32054d2..d1f3c5637 100644 --- a/src/tools/kata-ctl/Cargo.toml +++ b/src/tools/kata-ctl/Cargo.toml @@ -30,6 +30,7 @@ shim-interface = { path = "../../libs/shim-interface"} kata-types = { path = "../../libs/kata-types" } safe-path = { path = "../../libs/safe-path" } agent = { path = "../../runtime-rs/crates/agent"} +serial_test = "0.5.1" [target.'cfg(target_arch = "s390x")'.dependencies] reqwest = { version = "0.11", default-features = false, features = ["json", "blocking", "native-tls"] } diff --git a/src/tools/kata-ctl/src/check.rs b/src/tools/kata-ctl/src/check.rs index 81b9b83a7..8218a0f31 100644 --- a/src/tools/kata-ctl/src/check.rs +++ b/src/tools/kata-ctl/src/check.rs @@ -23,6 +23,9 @@ const JSON_TYPE: &str = "application/json"; const USER_AGT: &str = "kata"; +#[allow(dead_code)] +const ERR_NO_CPUINFO: &str = "cpu_info string is empty"; + #[allow(dead_code)] pub const GENERIC_CPU_VENDOR_FIELD: &str = "vendor_id"; #[allow(dead_code)] @@ -44,7 +47,7 @@ pub fn get_single_cpu_info(cpu_info_file: &str, substring: &str) -> Result = contents.split(substring).collect(); @@ -62,7 +65,11 @@ pub fn get_single_cpu_info(cpu_info_file: &str, substring: &str) -> Result Result { if cpu_info.is_empty() { - return Err(anyhow!("cpu_info string is empty")); + return Err(anyhow!(ERR_NO_CPUINFO)); + } + + if cpu_flags_tag.is_empty() { + return Err(anyhow!("cpu flags delimiter string is empty"))?; } let subcontents: Vec<&str> = cpu_info.split('\n').collect(); @@ -195,58 +202,168 @@ pub fn check_official_releases() -> Result<()> { mod tests { use super::*; use semver::Version; + use std::fs; + use std::io::Write; + use tempfile::tempdir; #[test] - fn test_get_cpu_info_empty_input() { - let expected = "No such file or directory (os error 2)"; - let actual = get_cpu_info("").err().unwrap().to_string(); - assert_eq!(expected, actual); + fn test_get_single_cpu_info() { + // Valid cpuinfo example + let dir = tempdir().unwrap(); + let file_path_full = dir.path().join("cpuinfo_full"); + let path_full = file_path_full.clone(); + let mut file_full = fs::File::create(file_path_full).unwrap(); + let contents = "processor : 0\nvendor_id : VendorExample\nflags : flag_1 flag_2 flag_3 flag_4\nprocessor : 1\n".to_string(); + writeln!(file_full, "{}", contents).unwrap(); - let actual = get_single_cpu_info("", "\nprocessor") - .err() - .unwrap() - .to_string(); - assert_eq!(expected, actual); + // Empty cpuinfo example + let file_path_empty = dir.path().join("cpuinfo_empty"); + let path_empty = file_path_empty.clone(); + let mut _file_empty = fs::File::create(file_path_empty).unwrap(); + + #[derive(Debug)] + struct TestData<'a> { + cpuinfo_path: &'a str, + processor_delimiter_str: &'a str, + result: Result, + } + let tests = &[ + // Failure scenarios + TestData { + cpuinfo_path: "", + processor_delimiter_str: "", + result: Err(anyhow!("No such file or directory (os error 2)")), + }, + TestData { + cpuinfo_path: &path_empty.as_path().display().to_string(), + processor_delimiter_str: "\nprocessor", + result: Err(anyhow!(ERR_NO_CPUINFO)), + }, + // Success scenarios + TestData { + cpuinfo_path: &path_full.as_path().display().to_string(), + processor_delimiter_str: "\nprocessor", + result: Ok( + "processor : 0\nvendor_id : VendorExample\nflags : flag_1 flag_2 flag_3 flag_4" + .to_string(), + ), + }, + ]; + + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d); + let result = get_single_cpu_info(d.cpuinfo_path, d.processor_delimiter_str); + let msg = format!("{}, result: {:?}", msg, result); + + if d.result.is_ok() { + assert_eq!( + result.as_ref().unwrap(), + d.result.as_ref().unwrap(), + "{}", + msg + ); + continue; + } + + let expected_error = format!("{}", d.result.as_ref().unwrap_err()); + let actual_error = format!("{}", result.unwrap_err()); + assert!(actual_error == expected_error, "{}", msg); + } } #[test] - fn test_get_cpu_flags_empty_input() { - let expected = "cpu_info string is empty"; - let actual = get_cpu_flags("", "").err().unwrap().to_string(); - assert_eq!(expected, actual); + fn test_get_cpu_flags() { + let contents = "processor : 0\nvendor_id : VendorExample\nflags : flag_1 flag_2 flag_3 flag_4\nprocessor : 1\n"; + + #[derive(Debug)] + struct TestData<'a> { + cpu_info_str: &'a str, + cpu_flags_tag: &'a str, + result: Result, + } + let tests = &[ + // Failure scenarios + TestData { + cpu_info_str: "", + cpu_flags_tag: "", + result: Err(anyhow!(ERR_NO_CPUINFO)), + }, + TestData { + cpu_info_str: "", + cpu_flags_tag: "flags", + result: Err(anyhow!(ERR_NO_CPUINFO)), + }, + TestData { + cpu_info_str: contents, + cpu_flags_tag: "", + result: Err(anyhow!("cpu flags delimiter string is empty")), + }, + // Success scenarios + TestData { + cpu_info_str: contents, + cpu_flags_tag: "flags", + result: Ok(" flag_1 flag_2 flag_3 flag_4".to_string()), + }, + TestData { + cpu_info_str: contents, + cpu_flags_tag: "flags_err", + result: Ok("".to_string()), + }, + ]; + + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d); + let result = get_cpu_flags(d.cpu_info_str, d.cpu_flags_tag); + let msg = format!("{}, result: {:?}", msg, result); + + if d.result.is_ok() { + assert_eq!( + result.as_ref().unwrap(), + d.result.as_ref().unwrap(), + "{}", + msg + ); + continue; + } + + let expected_error = format!("{}", d.result.as_ref().unwrap_err()); + let actual_error = format!("{}", result.unwrap_err()); + assert!(actual_error == expected_error, "{}", msg); + } } #[test] - fn check_version_by_empty_url() { - const TEST_URL: &str = "http:"; - let expected = "builder error: empty host"; - let actual = get_kata_all_releases_by_url(TEST_URL) - .err() - .unwrap() - .to_string(); - assert_eq!(expected, actual); - } + fn test_get_kata_all_releases_by_url() { + #[derive(Debug)] + struct TestData<'a> { + test_url: &'a str, + expected: &'a str, + } + let tests = &[ + // Failure scenarios + TestData { + test_url: "http:", + expected: "builder error: empty host", + }, + TestData { + test_url: "_localhost_", + expected: "builder error: relative URL without a base", + }, + TestData { + test_url: "http://localhost :80", + expected: "builder error: invalid domain character", + }, + ]; - #[test] - fn check_version_by_garbage_url() { - const TEST_URL: &str = "_localhost_"; - let expected = "builder error: relative URL without a base"; - let actual = get_kata_all_releases_by_url(TEST_URL) - .err() - .unwrap() - .to_string(); - assert_eq!(expected, actual); - } - - #[test] - fn check_version_by_invalid_url() { - const TEST_URL: &str = "http://localhost :80"; - let expected = "builder error: invalid domain character"; - let actual = get_kata_all_releases_by_url(TEST_URL) - .err() - .unwrap() - .to_string(); - assert_eq!(expected, actual); + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d); + let actual = get_kata_all_releases_by_url(d.test_url) + .err() + .unwrap() + .to_string(); + let msg = format!("{}, result: {:?}", msg, actual); + assert_eq!(d.expected, actual, "{}", msg); + } } #[test] diff --git a/src/tools/kata-ctl/src/ops/volume_ops.rs b/src/tools/kata-ctl/src/ops/volume_ops.rs index a9df9ce78..60c753f6f 100644 --- a/src/tools/kata-ctl/src/ops/volume_ops.rs +++ b/src/tools/kata-ctl/src/ops/volume_ops.rs @@ -169,11 +169,13 @@ pub fn get_sandbox_id_for_volume(volume_path: &str) -> Result { mod tests { use super::*; use kata_types::mount::DirectVolumeMountInfo; + use serial_test::serial; use std::{collections::HashMap, fs}; use tempfile::tempdir; use test_utils::skip_if_not_root; #[test] + #[serial] fn test_get_sandbox_id_for_volume() { // this test has to run as root, so has to manually cleanup afterwards skip_if_not_root!(); @@ -259,6 +261,7 @@ mod tests { } #[test] + #[serial] fn test_add_remove() { skip_if_not_root!(); // example volume dir is a/b/c, note the behavior of join would take "/a" as absolute path. diff --git a/src/tools/kata-ctl/src/utils.rs b/src/tools/kata-ctl/src/utils.rs index b1564f4c4..8e6815d02 100644 --- a/src/tools/kata-ctl/src/utils.rs +++ b/src/tools/kata-ctl/src/utils.rs @@ -147,10 +147,12 @@ pub fn get_generic_cpu_details(cpu_info_file: &str) -> Result<(String, String)> #[cfg(test)] mod tests { use super::*; + use serial_test::serial; use std::io::Write; use tempfile::tempdir; #[test] + #[serial] fn test_drop_privs() { let res = drop_privs(); assert!(res.is_ok()); @@ -164,6 +166,7 @@ mod tests { } #[test] + #[serial] fn test_kernel_version_valid_input() { let dir = tempdir().unwrap(); let file_path = dir.path().join("proc-version"); @@ -208,6 +211,7 @@ mod tests { } #[test] + #[serial] fn test_get_distro_details_valid_file() { let dir = tempdir().unwrap(); let file_path = dir.path().join("os-version"); diff --git a/src/tools/runk/Cargo.lock b/src/tools/runk/Cargo.lock index 582a9e54e..9a8e92748 100644 --- a/src/tools/runk/Cargo.lock +++ b/src/tools/runk/Cargo.lock @@ -233,9 +233,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "cgroups-rs" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b97b639839204a6eb727ffbbd68e1dcfc55488c3a26cb0cda1d662b7a186e79" +checksum = "8d5761f3a351b92e0e02a31ca418190bb323edb0d4fce0109b6dba673dc3fdc1" dependencies = [ "libc", "log", diff --git a/src/tools/runk/libcontainer/Cargo.toml b/src/tools/runk/libcontainer/Cargo.toml index 83f6a3699..82e56b06e 100644 --- a/src/tools/runk/libcontainer/Cargo.toml +++ b/src/tools/runk/libcontainer/Cargo.toml @@ -20,7 +20,7 @@ chrono = { version = "0.4.19", features = ["serde"] } serde = { version = "1.0.133", features = ["derive"] } serde_json = "1.0.74" scopeguard = "1.1.0" -cgroups = { package = "cgroups-rs", version = "0.3.0" } +cgroups = { package = "cgroups-rs", version = "0.3.1" } procfs = "0.14.0" [dev-dependencies] diff --git a/tools/osbuilder/image-builder/image_builder.sh b/tools/osbuilder/image-builder/image_builder.sh index 30984911f..a22d8d12a 100755 --- a/tools/osbuilder/image-builder/image_builder.sh +++ b/tools/osbuilder/image-builder/image_builder.sh @@ -19,6 +19,7 @@ readonly lib_file="${script_dir}/../scripts/lib.sh" readonly ext4_format="ext4" readonly xfs_format="xfs" +readonly erofs_format="erofs" # ext4: percentage of the filesystem which may only be allocated by privileged processes. readonly reserved_blocks_percentage=3 @@ -84,7 +85,7 @@ Options: -h Show this help -o Path to generate image file. ENV: IMAGE -r Free space of the root partition in MB. ENV: ROOT_FREE_SPACE - -f Filesystem type to use, only xfs and ext4 are supported. ENV: FS_TYPE + -f Filesystem type to use, only ext4, xfs and erofs are supported. ENV: FS_TYPE Extra environment variables: AGENT_BIN: Use it to change the expected agent binary name @@ -101,7 +102,6 @@ Extra environment variables: and the rootfs is built with SELINUX=yes. DEFAULT value: "no" - Following diagram shows how the resulting image will look like .-----------.----------.---------------.-----------. @@ -353,12 +353,14 @@ format_loop() { local device="$1" local block_size="$2" local fs_type="$3" + local mount_dir="$4" case "${fs_type}" in "${ext4_format}") mkfs.ext4 -q -F -b "${block_size}" "${device}p1" info "Set filesystem reserved blocks percentage to ${reserved_blocks_percentage}%" tune2fs -m "${reserved_blocks_percentage}" "${device}p1" + return 0 ;; "${xfs_format}") @@ -368,7 +370,8 @@ format_loop() { if mkfs.xfs -m reflink=0 -q -f -b size="${block_size}" "${device}p1" 2>&1 | grep -q "unknown option"; then mkfs.xfs -q -f -b size="${block_size}" "${device}p1" fi - ;; + return 0 + ;; *) error "Unsupported fs type: ${fs_type}" @@ -409,6 +412,55 @@ create_disk() { OK "Partitions created" } +setup_selinux() { + local mount_dir="$1" + local agent_bin="$2" + + if [ "${SELINUX}" == "yes" ]; then + if [ "${AGENT_INIT}" == "yes" ]; then + die "Guest SELinux with the agent init is not supported yet" + fi + + info "Labeling rootfs for SELinux" + selinuxfs_path="${mount_dir}${SELINUXFS}" + mkdir -p "$selinuxfs_path" + if mountpoint $SELINUXFS > /dev/null && \ + chroot "${mount_dir}" command -v restorecon > /dev/null; then + mount -t selinuxfs selinuxfs "$selinuxfs_path" + chroot "${mount_dir}" restorecon -RF -e ${SELINUXFS} / + # TODO: This operation will be removed after the updated container-selinux that + # includes the following commit is released. + # https://github.com/containers/container-selinux/commit/39f83cc74d50bd10ab6be4d0bdd98bc04857469f + # We use chcon as an interim solution until then. + chroot "${mount_dir}" chcon -t container_runtime_exec_t "/usr/bin/${agent_bin}" + umount "${selinuxfs_path}" + else + die "Could not label the rootfs. Make sure that SELinux is enabled on the host \ + and the rootfs is built with SELINUX=yes" + fi + fi +} + +setup_systemd() { + local mount_dir="$1" + + info "Removing unneeded systemd services and sockets" + for u in "${systemd_units[@]}"; do + find "${mount_dir}" -type f \( \ + -name "${u}.service" -o \ + -name "${u}.socket" \) \ + -exec rm -f {} \; + done + + info "Removing unneeded systemd files" + for u in "${systemd_files[@]}"; do + find "${mount_dir}" -type f -name "${u}" -exec rm -f {} \; + done + + info "Creating empty machine-id to allow systemd to bind-mount it" + touch "${mount_dir}/etc/machine-id" +} + create_rootfs_image() { local rootfs="$1" local image="$2" @@ -423,60 +475,26 @@ create_rootfs_image() { die "Could not setup loop device" fi - if ! format_loop "${device}" "${block_size}" "${fs_type}"; then + if ! format_loop "${device}" "${block_size}" "${fs_type}" ""; then die "Could not format loop device: ${device}" fi info "Mounting root partition" - readonly mount_dir=$(mktemp -p ${TMPDIR:-/tmp} -d osbuilder-mount-dir.XXXX) + local mount_dir=$(mktemp -p "${TMPDIR:-/tmp}" -d osbuilder-mount-dir.XXXX) mount "${device}p1" "${mount_dir}" OK "root partition mounted" info "Copying content from rootfs to root partition" cp -a "${rootfs}"/* "${mount_dir}" - if [ "${SELINUX}" == "yes" ]; then - if [ "${AGENT_INIT}" == "yes" ]; then - die "Guest SELinux with the agent init is not supported yet" - fi - - info "Labeling rootfs for SELinux" - selinuxfs_path="${mount_dir}${SELINUXFS}" - mkdir -p $selinuxfs_path - if mountpoint $SELINUXFS > /dev/null && \ - chroot "${mount_dir}" command -v restorecon > /dev/null; then - mount -t selinuxfs selinuxfs $selinuxfs_path - chroot "${mount_dir}" restorecon -RF -e ${SELINUXFS} / - # TODO: This operation will be removed after the updated container-selinux that - # includes the following commit is released. - # https://github.com/containers/container-selinux/commit/39f83cc74d50bd10ab6be4d0bdd98bc04857469f - # We use chcon as an interim solution until then. - chroot "${mount_dir}" chcon -t container_runtime_exec_t "/usr/bin/${agent_bin}" - umount $selinuxfs_path - else - die "Could not label the rootfs. Make sure that SELinux is enabled on the host \ -and the rootfs is built with SELINUX=yes" - fi - fi + info "Setup SELinux" + setup_selinux "${mount_dir}" "${agent_bin}" sync OK "rootfs copied" - info "Removing unneeded systemd services and sockets" - for u in "${systemd_units[@]}"; do - find "${mount_dir}" -type f \( \ - -name "${u}.service" -o \ - -name "${u}.socket" \) \ - -exec rm -f {} \; - done - - info "Removing unneeded systemd files" - for u in "${systemd_files[@]}"; do - find "${mount_dir}" -type f -name "${u}" -exec rm -f {} \; - done - - info "Creating empty machine-id to allow systemd to bind-mount it" - touch "${mount_dir}/etc/machine-id" + info "Setup systemd" + setup_systemd "${mount_dir}" info "Unmounting root partition" umount "${mount_dir}" @@ -493,7 +511,50 @@ and the rootfs is built with SELINUX=yes" fi losetup -d "${device}" - rmdir "${mount_dir}" + rm -rf "${mount_dir}" +} + +create_erofs_rootfs_image() { + local rootfs="$1" + local image="$2" + local block_size="$3" + local agent_bin="$4" + + if [ "$block_size" -ne 4096 ]; then + die "Invalid block size for erofs" + fi + + if ! device="$(setup_loop_device "${image}")"; then + die "Could not setup loop device" + fi + + local mount_dir=$(mktemp -p "${TMPDIR:-/tmp}" -d osbuilder-mount-dir.XXXX) + + info "Copying content from rootfs to root partition" + cp -a "${rootfs}"/* "${mount_dir}" + + info "Setup SELinux" + setup_selinux "${mount_dir}" "${agent_bin}" + + sync + OK "rootfs copied" + + info "Setup systemd" + setup_systemd "${mount_dir}" + + readonly fsimage="$(mktemp)" + mkfs.erofs -Enoinline_data "${fsimage}" "${mount_dir}" + local img_size="$(stat -c"%s" "${fsimage}")" + local img_size_mb="$(((("${img_size}" + 1048576) / 1048576) + 1 + "${rootfs_start}"))" + + create_disk "${image}" "${img_size_mb}" "ext4" "${rootfs_start}" + + dd if="${fsimage}" of="${device}p1" + + losetup -d "${device}" + rm -rf "${mount_dir}" + + return "${img_size_mb}" } set_dax_header() { @@ -586,14 +647,23 @@ main() { die "Invalid rootfs" fi - img_size=$(calculate_img_size "${rootfs}" "${root_free_space}" "${fs_type}" "${block_size}") + if [ "${fs_type}" == 'erofs' ]; then + # mkfs.erofs accepts an src root dir directory as an input + # rather than some device, so no need to guess the device dest size first. + create_erofs_rootfs_image "${rootfs}" "${image}" \ + "${block_size}" "${agent_bin}" + rootfs_img_size=$? + img_size=$((rootfs_img_size + dax_header_sz)) + else + img_size=$(calculate_img_size "${rootfs}" "${root_free_space}" \ + "${fs_type}" "${block_size}") - # the first 2M are for the first MBR + NVDIMM metadata and were already - # consider in calculate_img_size - rootfs_img_size=$((img_size - dax_header_sz)) - create_rootfs_image "${rootfs}" "${image}" "${rootfs_img_size}" \ + # the first 2M are for the first MBR + NVDIMM metadata and were already + # consider in calculate_img_size + rootfs_img_size=$((img_size - dax_header_sz)) + create_rootfs_image "${rootfs}" "${image}" "${rootfs_img_size}" \ "${fs_type}" "${block_size}" "${agent_bin}" - + fi # insert at the beginning of the image the MBR + DAX header set_dax_header "${image}" "${img_size}" "${fs_type}" "${nsdax_bin}" } diff --git a/tools/packaging/kata-deploy/local-build/Makefile b/tools/packaging/kata-deploy/local-build/Makefile index 47dfd1710..1cad20d31 100644 --- a/tools/packaging/kata-deploy/local-build/Makefile +++ b/tools/packaging/kata-deploy/local-build/Makefile @@ -89,7 +89,7 @@ merge-builds: $(MK_DIR)/kata-deploy-merge-builds.sh build install-tarball: - tar -xvf ./kata-static.tar.xz -C / + tar -xf ./kata-static.tar.xz -C / cc-payload: cc-tarball $(MK_DIR)kata-deploy-build-and-upload-payload.sh $(CURDIR)/kata-static.tar.xz diff --git a/tools/packaging/kernel/README.md b/tools/packaging/kernel/README.md index dbf991540..ce4ea30c4 100644 --- a/tools/packaging/kernel/README.md +++ b/tools/packaging/kernel/README.md @@ -107,6 +107,10 @@ Kata Containers packaging repository holds the kernel configs and patches. The config and patches can work for many versions, but we only test the kernel version defined in the [Kata Containers versions file][kata-containers-versions-file]. +For any change to the kernel configs or patches, the version defined in the file +[`kata_config_version`][kata-containers-versions-file] needs to be incremented +so that the CI can test with these changes. + For further details, see [the kernel configuration documentation](configs). ## How is it tested diff --git a/tools/packaging/kernel/configs/fragments/common/fs.conf b/tools/packaging/kernel/configs/fragments/common/fs.conf index ae4e3255f..c3be9f925 100644 --- a/tools/packaging/kernel/configs/fragments/common/fs.conf +++ b/tools/packaging/kernel/configs/fragments/common/fs.conf @@ -36,6 +36,11 @@ CONFIG_AUTOFS_FS=y CONFIG_TMPFS=y CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y +CONFIG_MISC_FILESYSTEMS=y +CONFIG_EROFS_FS=y +CONFIG_EROFS_FS_XATTR=y +CONFIG_EROFS_FS_ZIP=y +CONFIG_EROFS_FS_SECURITY=y CONFIG_SIGNALFD=y CONFIG_TIMERFD=y CONFIG_EPOLL=y diff --git a/tools/packaging/kernel/configs/fragments/s390/console.conf b/tools/packaging/kernel/configs/fragments/s390/console.conf index 2bc4db09b..6ff749efc 100644 --- a/tools/packaging/kernel/configs/fragments/s390/console.conf +++ b/tools/packaging/kernel/configs/fragments/s390/console.conf @@ -6,3 +6,5 @@ CONFIG_TN3270_CONSOLE=y CONFIG_CCW_CONSOLE=y CONFIG_SCLP_TTY=y CONFIG_SCLP_CONSOLE=y +CONFIG_SCLP_VT220_TTY=y +CONFIG_SCLP_VT220_CONSOLE=y diff --git a/tools/packaging/kernel/kata_config_version b/tools/packaging/kernel/kata_config_version index 6529ff889..29d6383b5 100644 --- a/tools/packaging/kernel/kata_config_version +++ b/tools/packaging/kernel/kata_config_version @@ -1 +1 @@ -98 +100 diff --git a/tools/packaging/static-build/shim-v2/build.sh b/tools/packaging/static-build/shim-v2/build.sh index 10df1a0b3..875c7698f 100755 --- a/tools/packaging/static-build/shim-v2/build.sh +++ b/tools/packaging/static-build/shim-v2/build.sh @@ -13,6 +13,7 @@ script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" source "${script_dir}/../../scripts/lib.sh" readonly kernel_builder="${repo_root_dir}/tools/packaging/kernel/build-kernel.sh" +VMM_CONFIGS="qemu fc" GO_VERSION=${GO_VERSION} RUST_VERSION=${RUST_VERSION} diff --git a/tools/packaging/static-build/shim-v2/install_go_rust.sh b/tools/packaging/static-build/shim-v2/install_go_rust.sh index db192f673..502d5f085 100755 --- a/tools/packaging/static-build/shim-v2/install_go_rust.sh +++ b/tools/packaging/static-build/shim-v2/install_go_rust.sh @@ -50,12 +50,37 @@ EOF trap finish EXIT +go_version=${1:-} rust_version=${2:-} + ARCH=${ARCH:-$(uname -m)} -LIBC=${LIBC:-musl} +case "${ARCH}" in + aarch64) + goarch=arm64 + LIBC=musl + ;; + ppc64le) + goarch=${ARCH} + ARCH=powerpc64le + LIBC=gnu + ;; + s390x) + goarch=${ARCH} + LIBC=gnu + ;; + x86_64) + goarch=amd64 + LIBC=musl + ;; + *) + echo "unsupported architecture $(uname -m)" + exit 1 + ;; +esac + curl --proto '=https' --tlsv1.2 https://sh.rustup.rs -sSLf | sh -s -- -y --default-toolchain ${rust_version} -t ${ARCH}-unknown-linux-${LIBC} source /root/.cargo/env -rustup target add x86_64-unknown-linux-musl +rustup target add ${ARCH}-unknown-linux-${LIBC} pushd "${tmp_dir}" @@ -70,9 +95,6 @@ done shift $(( $OPTIND - 1 )) - -go_version=${1:-} - if [ -z "$go_version" ];then echo "Missing go" usage 1 @@ -90,14 +112,6 @@ if command -v go; then fi fi -case "$(uname -m)" in - aarch64) goarch="arm64";; - ppc64le) goarch="ppc64le";; - x86_64) goarch="amd64";; - s390x) goarch="s390x";; - *) echo "unsupported architecture: $(uname -m)"; exit 1;; -esac - info "Download go version ${go_version}" kernel_name=$(uname -s) curl -OL "https://storage.googleapis.com/golang/go${go_version}.${kernel_name,,}-${goarch}.tar.gz" diff --git a/tools/packaging/static-build/virtiofsd/build-static-virtiofsd.sh b/tools/packaging/static-build/virtiofsd/build-static-virtiofsd.sh index 4f4f05f0d..6bad74ffb 100755 --- a/tools/packaging/static-build/virtiofsd/build-static-virtiofsd.sh +++ b/tools/packaging/static-build/virtiofsd/build-static-virtiofsd.sh @@ -57,6 +57,7 @@ init_env() { LIBC="gnu" ARCH="powerpc64le" ARCH_LIBC=${ARCH}-linux-${LIBC} + extra_rust_flags="" ;; "s390x") LIBC="gnu" diff --git a/versions.yaml b/versions.yaml index 98b3e34bc..326132bc0 100644 --- a/versions.yaml +++ b/versions.yaml @@ -268,7 +268,7 @@ externals: uscan-url: >- https://github.com/opencontainers/runc/tags .*/v?(\d\S+)\.tar\.gz - version: "v1.1.0" + version: "v1.1.4" skopeo: description: "Utility for container images and image repositories"