From 49326fe4e1a2f5235ea4ccc9939383cbb6b813c2 Mon Sep 17 00:00:00 2001 From: yaoyinnan Date: Thu, 9 Feb 2023 14:32:27 +0800 Subject: [PATCH 1/3] fix(clippy): fix hypervisor clippy checks Fix hypervisor clippy checks. Signed-off-by: yaoyinnan --- src/runtime-rs/crates/hypervisor/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime-rs/crates/hypervisor/Cargo.toml b/src/runtime-rs/crates/hypervisor/Cargo.toml index 9c70f3914..0edbfc8bc 100644 --- a/src/runtime-rs/crates/hypervisor/Cargo.toml +++ b/src/runtime-rs/crates/hypervisor/Cargo.toml @@ -21,7 +21,7 @@ serde_json = ">=1.0.9" slog = "2.5.2" slog-scope = "4.4.0" thiserror = "1.0" -tokio = { version = "1.8.0", features = ["sync"] } +tokio = { version = "1.8.0", features = ["sync", "fs"] } vmm-sys-util = "0.11.0" rand = "0.8.4" From ed02c8a05137d40f564582ec8f0e225a15df4928 Mon Sep 17 00:00:00 2001 From: yaoyinnan Date: Thu, 9 Feb 2023 20:07:51 +0800 Subject: [PATCH 2/3] docs: add guide for building rootfs with EROFS Add guide for building rootfs with EROFS. Fixes: #6063 Signed-off-by: Gao Xiang Signed-off-by: yaoyinnan --- docs/how-to/README.md | 1 + docs/how-to/how-to-use-erofs-build-rootfs.md | 90 ++++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 docs/how-to/how-to-use-erofs-build-rootfs.md diff --git a/docs/how-to/README.md b/docs/how-to/README.md index 1ad486048..df6a060fc 100644 --- a/docs/how-to/README.md +++ b/docs/how-to/README.md @@ -44,3 +44,4 @@ - [How to run Docker with Kata Containers](how-to-run-docker-with-kata.md) - [How to run Kata Containers with `nydus`](how-to-use-virtio-fs-nydus-with-kata.md) - [How to run Kata Containers with AMD SEV-SNP](how-to-run-kata-containers-with-SNP-VMs.md) +- [How to use EROFS to build rootfs in Kata Containers](how-to-use-erofs-build-rootfs.md) diff --git a/docs/how-to/how-to-use-erofs-build-rootfs.md b/docs/how-to/how-to-use-erofs-build-rootfs.md new file mode 100644 index 000000000..72bf6315f --- /dev/null +++ b/docs/how-to/how-to-use-erofs-build-rootfs.md @@ -0,0 +1,90 @@ +# Configure Kata Containers to use EROFS build rootfs + +## Introduction +For kata containers, rootfs is used in the read-only way. EROFS can noticeably decrease metadata overhead. + +`mkfs.erofs` can generate compressed and uncompressed EROFS images. + +For uncompressed images, no files are compressed. However, it is optional to inline the data blocks at the end of the file with the metadata. + +For compressed images, each file will be compressed using the lz4 or lz4hc algorithm, and it will be confirmed whether it can save space. Use No compression of the file if compression does not save space. + +## Performance comparison +| | EROFS | EXT4 | XFS | +|-----------------|-------| --- | --- | +| Image Size [MB] | 106(uncompressed) | 256 | 126 | + + +## Guidance +### Install the `erofs-utils` +#### `apt/dnf` install +On newer `Ubuntu/Debian` systems, it can be installed directly using the `apt` command, and on `Fedora` it can be installed directly using the `dnf` command. + +```shell +# Debian/Ubuntu +$ apt install erofs-utils +# Fedora +$ dnf install erofs-utils +``` + +#### Source install +[https://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git](https://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git) + +##### Compile dependencies +If you need to enable the `Lz4` compression feature, `Lz4 1.8.0+` is required, and `Lz4 1.9.3+` is strongly recommended. + +##### Compilation process +For some old lz4 versions (lz4-1.8.0~1.8.3), if lz4-static is not installed, the lz4hc algorithm will not be supported. lz4-static can be installed with apt install lz4-static.x86_64. However, these versions have some bugs in compression, and it is not recommended to use these versions directly. +If you use `lz4 1.9.0+`, you can directly use the following command to compile. + +```shell +$ ./autogen.sh +$ ./configure +$ make +``` + +The compiled `mkfs.erofs` program will be saved in the `mkfs` directory. Afterwards, the generated tools can be installed to a system directory using make install (requires root privileges). + +### Create a local rootfs +```shell +$ export distro="ubuntu" +$ export FS_TYPE="erofs" +$ export ROOTFS_DIR="realpath kata-containers/tools/osbuilder/rootfs-builder/rootfs" +$ sudo rm -rf "${ROOTFS_DIR}" +$ pushd kata-containers/tools/osbuilder/rootfs-builder +$ script -fec 'sudo -E SECCOMP=no ./rootfs.sh "${distro}"' +$ popd +``` + +### Add a custom agent to the image - OPTIONAL +> Note: +> - You should only do this step if you are testing with the latest version of the agent. +```shell +$ sudo install -o root -g root -m 0550 -t "${ROOTFS_DIR}/usr/bin" "${ROOTFS_DIR}/../../../../src/agent/target/x86_64-unknown-linux-musl/release/kata-agent" +$ sudo install -o root -g root -m 0440 "${ROOTFS_DIR}/../../../../src/agent/kata-agent.service" "${ROOTFS_DIR}/usr/lib/systemd/system/" +$ sudo install -o root -g root -m 0440 "${ROOTFS_DIR}/../../../../src/agent/kata-containers.target" "${ROOTFS_DIR}/usr/lib/systemd/system/" +``` + +### Build a root image +```shell +$ pushd kata-containers/tools/osbuilder/image-builder +$ script -fec 'sudo -E ./image_builder.sh "${ROOTFS_DIR}"' +$ popd +``` + +### Install the rootfs image +```shell +$ pushd kata-containers/tools/osbuilder/image-builder +$ commit="$(git log --format=%h -1 HEAD)" +$ date="$(date +%Y-%m-%d-%T.%N%z)" +$ rootfs="erofs" +$ image="kata-containers-${rootfs}-${date}-${commit}" +$ sudo install -o root -g root -m 0640 -D kata-containers.img "/usr/share/kata-containers/${image}" +$ (cd /usr/share/kata-containers && sudo ln -sf "$image" kata-containers.img) +$ popd +``` + +### Use `EROFS` in the runtime +```shell +$ sudo sed -i -e 's/^# *\(rootfs_type\).*=.*$/\1 = erofs/g' /etc/kata-containers/configuration.toml +``` From bdf20b5d263c352b0ce93b3956beb5de8e85b3ee Mon Sep 17 00:00:00 2001 From: yaoyinnan Date: Sat, 11 Feb 2023 00:44:13 +0800 Subject: [PATCH 3/3] rootfs: support EROFS filesystem For kata containers, rootfs is used in the read-only way. EROFS can noticably decrease metadata overhead. On the basis of supporting the EROFS file system, it supports using the config parameter to switch the file system used by rootfs. Fixes: #6063 Signed-off-by: Gao Xiang Signed-off-by: yaoyinnan --- .../kata-types/src/config/hypervisor/mod.rs | 3 + src/runtime-rs/Makefile | 7 + .../config/configuration-dragonball.toml.in | 6 + .../crates/hypervisor/src/dragonball/inner.rs | 5 +- .../crates/hypervisor/src/kernel_param.rs | 225 +++++++++++++++--- src/runtime-rs/crates/hypervisor/src/lib.rs | 10 + src/runtime/Makefile | 7 + src/runtime/cmd/kata-runtime/kata-env_test.go | 2 + src/runtime/config/configuration-acrn.toml.in | 6 + src/runtime/config/configuration-clh.toml.in | 6 + src/runtime/config/configuration-fc.toml.in | 6 + src/runtime/config/configuration-qemu.toml.in | 6 + .../pkg/containerd-shim-v2/create_test.go | 2 + src/runtime/pkg/katatestutils/utils.go | 2 + .../pkg/katautils/config-settings.go.in | 1 + src/runtime/pkg/katautils/config.go | 44 ++++ src/runtime/pkg/katautils/config_test.go | 4 + src/runtime/virtcontainers/acrn.go | 6 +- src/runtime/virtcontainers/acrn_arch_base.go | 14 +- src/runtime/virtcontainers/clh.go | 7 +- src/runtime/virtcontainers/clh_test.go | 1 + src/runtime/virtcontainers/fc.go | 9 +- src/runtime/virtcontainers/hypervisor.go | 86 +++++-- src/runtime/virtcontainers/hypervisor_test.go | 160 ++++++++++++- src/runtime/virtcontainers/qemu_amd64.go | 4 +- src/runtime/virtcontainers/qemu_arch_base.go | 18 +- src/runtime/virtcontainers/qemu_arm64.go | 4 +- src/runtime/virtcontainers/qemu_ppc64le.go | 4 +- src/runtime/virtcontainers/qemu_s390x.go | 6 +- .../osbuilder/image-builder/image_builder.sh | 170 +++++++++---- .../kernel/configs/fragments/common/fs.conf | 5 + tools/packaging/kernel/kata_config_version | 2 +- 32 files changed, 707 insertions(+), 131 deletions(-) diff --git a/src/libs/kata-types/src/config/hypervisor/mod.rs b/src/libs/kata-types/src/config/hypervisor/mod.rs index afd3a42ea..2eaa3443b 100644 --- a/src/libs/kata-types/src/config/hypervisor/mod.rs +++ b/src/libs/kata-types/src/config/hypervisor/mod.rs @@ -210,6 +210,9 @@ pub struct BootInfo { /// Path to root device on host #[serde(default)] pub image: String, + /// Rootfs filesystem type. + #[serde(default)] + pub rootfs_type: String, /// Path to the firmware. /// /// If you want that qemu uses the default firmware leave this option empty. diff --git a/src/runtime-rs/Makefile b/src/runtime-rs/Makefile index ffe7a934b..880b5caf1 100644 --- a/src/runtime-rs/Makefile +++ b/src/runtime-rs/Makefile @@ -79,6 +79,12 @@ DBVALIDHYPERVISORPATHS := [] PKGDATADIR := $(PREFIXDEPS)/share/$(PROJECT_DIR) KERNELDIR := $(PKGDATADIR) IMAGEPATH := $(PKGDATADIR)/$(IMAGENAME) + +ROOTFSTYPE_EXT4 := \"ext4\" +ROOTFSTYPE_XFS := \"xfs\" +ROOTFSTYPE_EROFS := \"erofs\" +DEFROOTFSTYPE := $(ROOTFSTYPE_EXT4) + PKGLIBEXECDIR := $(LIBEXECDIR)/$(PROJECT_DIR) FIRMWAREPATH := FIRMWAREVOLUMEPATH := @@ -210,6 +216,7 @@ USER_VARS += DBVALIDCTLPATHS USER_VARS += SYSCONFIG USER_VARS += IMAGENAME USER_VARS += IMAGEPATH +USER_VARS += DEFROOTFSTYPE USER_VARS += MACHINETYPE USER_VARS += KERNELDIR USER_VARS += KERNELTYPE diff --git a/src/runtime-rs/config/configuration-dragonball.toml.in b/src/runtime-rs/config/configuration-dragonball.toml.in index fa39d23c8..8131d0c68 100644 --- a/src/runtime-rs/config/configuration-dragonball.toml.in +++ b/src/runtime-rs/config/configuration-dragonball.toml.in @@ -17,6 +17,12 @@ ctlpath = "@DBCTLPATH@" kernel = "@KERNELPATH_DB@" image = "@IMAGEPATH@" +# rootfs filesystem type: +# - ext4 (default) +# - xfs +# - erofs +rootfs_type=@DEFROOTFSTYPE@ + # List of valid annotation names for the hypervisor # Each member of the list is a regular expression, which is the base name # of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path" diff --git a/src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs b/src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs index a74fe28f8..99a38b306 100644 --- a/src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs +++ b/src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs @@ -101,7 +101,10 @@ impl DragonballInner { // get kernel params let mut kernel_params = KernelParams::new(self.config.debug_info.enable_debug); - kernel_params.append(&mut KernelParams::new_rootfs_kernel_params(&rootfs_driver)); + kernel_params.append(&mut KernelParams::new_rootfs_kernel_params( + &rootfs_driver, + &self.config.boot_info.rootfs_type, + )?); kernel_params.append(&mut KernelParams::from_string( &self.config.boot_info.kernel_params, )); diff --git a/src/runtime-rs/crates/hypervisor/src/kernel_param.rs b/src/runtime-rs/crates/hypervisor/src/kernel_param.rs index 39bef9a64..7ad17cb8f 100644 --- a/src/runtime-rs/crates/hypervisor/src/kernel_param.rs +++ b/src/runtime-rs/crates/hypervisor/src/kernel_param.rs @@ -6,7 +6,10 @@ use anyhow::{anyhow, Result}; -use crate::{VM_ROOTFS_DRIVER_BLK, VM_ROOTFS_DRIVER_PMEM}; +use crate::{ + VM_ROOTFS_DRIVER_BLK, VM_ROOTFS_DRIVER_PMEM, VM_ROOTFS_FILESYSTEM_EROFS, + VM_ROOTFS_FILESYSTEM_EXT4, VM_ROOTFS_FILESYSTEM_XFS, VM_ROOTFS_ROOT_BLK, VM_ROOTFS_ROOT_PMEM, +}; use kata_types::config::LOG_VPORT_OPTION; // Port where the agent will send the logs. Logs are sent through the vsock in cases @@ -67,43 +70,49 @@ impl KernelParams { Self { params } } - pub(crate) fn new_rootfs_kernel_params(rootfs_driver: &str) -> Self { - let params = match rootfs_driver { - VM_ROOTFS_DRIVER_BLK => { - vec![ - Param { - key: "root".to_string(), - value: "/dev/vda1".to_string(), - }, - Param { - key: "rootflags".to_string(), - value: "data=ordered,errors=remount-ro ro".to_string(), - }, - Param { - key: "rootfstype".to_string(), - value: "ext4".to_string(), - }, - ] - } + pub(crate) fn new_rootfs_kernel_params(rootfs_driver: &str, rootfs_type: &str) -> Result { + let mut params = vec![]; + + match rootfs_driver { VM_ROOTFS_DRIVER_PMEM => { - vec![ - Param { - key: "root".to_string(), - value: "/dev/pmem0p1".to_string(), - }, - Param { - key: "rootflags".to_string(), - value: "data=ordered,errors=remount-ro,dax ro".to_string(), - }, - Param { - key: "rootfstype".to_string(), - value: "ext4".to_string(), - }, - ] + params.push(Param::new("root", VM_ROOTFS_ROOT_PMEM)); + match rootfs_type { + VM_ROOTFS_FILESYSTEM_EXT4 | VM_ROOTFS_FILESYSTEM_XFS => { + params.push(Param::new( + "rootflags", + "dax,data=ordered,errors=remount-ro ro", + )); + } + VM_ROOTFS_FILESYSTEM_EROFS => { + params.push(Param::new("rootflags", "dax ro")); + } + _ => { + return Err(anyhow!("Unsupported rootfs type")); + } + } } - _ => vec![], - }; - Self { params } + VM_ROOTFS_DRIVER_BLK => { + params.push(Param::new("root", VM_ROOTFS_ROOT_BLK)); + match rootfs_type { + VM_ROOTFS_FILESYSTEM_EXT4 | VM_ROOTFS_FILESYSTEM_XFS => { + params.push(Param::new("rootflags", "data=ordered,errors=remount-ro ro")); + } + VM_ROOTFS_FILESYSTEM_EROFS => { + params.push(Param::new("rootflags", "ro")); + } + _ => { + return Err(anyhow!("Unsupported rootfs type")); + } + } + } + _ => { + return Err(anyhow!("Unsupported rootfs driver")); + } + } + + params.push(Param::new("rootfstype", rootfs_type)); + + Ok(Self { params }) } pub(crate) fn append(&mut self, params: &mut KernelParams) { @@ -155,6 +164,12 @@ mod tests { use super::*; + use crate::{ + VM_ROOTFS_DRIVER_BLK, VM_ROOTFS_DRIVER_PMEM, VM_ROOTFS_FILESYSTEM_EROFS, + VM_ROOTFS_FILESYSTEM_EXT4, VM_ROOTFS_FILESYSTEM_XFS, VM_ROOTFS_ROOT_BLK, + VM_ROOTFS_ROOT_PMEM, + }; + #[test] fn test_params() { let param1 = Param::new("", ""); @@ -190,4 +205,142 @@ mod tests { Ok(()) } + + #[derive(Debug)] + struct TestData<'a> { + rootfs_driver: &'a str, + rootfs_type: &'a str, + expect_params: KernelParams, + result: Result<()>, + } + + #[test] + fn test_rootfs_kernel_params() { + let tests = &[ + // EXT4 + TestData { + rootfs_driver: VM_ROOTFS_DRIVER_PMEM, + rootfs_type: VM_ROOTFS_FILESYSTEM_EXT4, + expect_params: KernelParams { + params: [ + Param::new("root", VM_ROOTFS_ROOT_PMEM), + Param::new("rootflags", "dax,data=ordered,errors=remount-ro ro"), + Param::new("rootfstype", VM_ROOTFS_FILESYSTEM_EXT4), + ] + .to_vec(), + }, + result: Ok(()), + }, + TestData { + rootfs_driver: VM_ROOTFS_DRIVER_BLK, + rootfs_type: VM_ROOTFS_FILESYSTEM_EXT4, + expect_params: KernelParams { + params: [ + Param::new("root", VM_ROOTFS_ROOT_BLK), + Param::new("rootflags", "data=ordered,errors=remount-ro ro"), + Param::new("rootfstype", VM_ROOTFS_FILESYSTEM_EXT4), + ] + .to_vec(), + }, + result: Ok(()), + }, + // XFS + TestData { + rootfs_driver: VM_ROOTFS_DRIVER_PMEM, + rootfs_type: VM_ROOTFS_FILESYSTEM_XFS, + expect_params: KernelParams { + params: [ + Param::new("root", VM_ROOTFS_ROOT_PMEM), + Param::new("rootflags", "dax,data=ordered,errors=remount-ro ro"), + Param::new("rootfstype", VM_ROOTFS_FILESYSTEM_XFS), + ] + .to_vec(), + }, + result: Ok(()), + }, + TestData { + rootfs_driver: VM_ROOTFS_DRIVER_BLK, + rootfs_type: VM_ROOTFS_FILESYSTEM_XFS, + expect_params: KernelParams { + params: [ + Param::new("root", VM_ROOTFS_ROOT_BLK), + Param::new("rootflags", "data=ordered,errors=remount-ro ro"), + Param::new("rootfstype", VM_ROOTFS_FILESYSTEM_XFS), + ] + .to_vec(), + }, + result: Ok(()), + }, + // EROFS + TestData { + rootfs_driver: VM_ROOTFS_DRIVER_PMEM, + rootfs_type: VM_ROOTFS_FILESYSTEM_EROFS, + expect_params: KernelParams { + params: [ + Param::new("root", VM_ROOTFS_ROOT_PMEM), + Param::new("rootflags", "dax ro"), + Param::new("rootfstype", VM_ROOTFS_FILESYSTEM_EROFS), + ] + .to_vec(), + }, + result: Ok(()), + }, + TestData { + rootfs_driver: VM_ROOTFS_DRIVER_BLK, + rootfs_type: VM_ROOTFS_FILESYSTEM_EROFS, + expect_params: KernelParams { + params: [ + Param::new("root", VM_ROOTFS_ROOT_BLK), + Param::new("rootflags", "ro"), + Param::new("rootfstype", VM_ROOTFS_FILESYSTEM_EROFS), + ] + .to_vec(), + }, + result: Ok(()), + }, + // Unsupported rootfs driver + TestData { + rootfs_driver: "foo", + rootfs_type: VM_ROOTFS_FILESYSTEM_EXT4, + expect_params: KernelParams { + params: [ + Param::new("root", VM_ROOTFS_ROOT_BLK), + Param::new("rootflags", "data=ordered,errors=remount-ro ro"), + Param::new("rootfstype", VM_ROOTFS_FILESYSTEM_EXT4), + ] + .to_vec(), + }, + result: Err(anyhow!("Unsupported rootfs driver")), + }, + // Unsupported rootfs type + TestData { + rootfs_driver: VM_ROOTFS_DRIVER_BLK, + rootfs_type: "foo", + expect_params: KernelParams { + params: [ + Param::new("root", VM_ROOTFS_ROOT_BLK), + Param::new("rootflags", "data=ordered,errors=remount-ro ro"), + Param::new("rootfstype", VM_ROOTFS_FILESYSTEM_EXT4), + ] + .to_vec(), + }, + result: Err(anyhow!("Unsupported rootfs type")), + }, + ]; + + for (i, t) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, t); + let result = KernelParams::new_rootfs_kernel_params(t.rootfs_driver, t.rootfs_type); + let msg = format!("{}, result: {:?}", msg, result); + + if t.result.is_ok() { + assert!(result.is_ok(), "{}", msg); + assert_eq!(t.expect_params, result.unwrap()); + } else { + let expected_error = format!("{}", t.result.as_ref().unwrap_err()); + let actual_error = format!("{}", result.unwrap_err()); + assert!(actual_error == expected_error, "{}", msg); + } + } + } } diff --git a/src/runtime-rs/crates/hypervisor/src/lib.rs b/src/runtime-rs/crates/hypervisor/src/lib.rs index 2c3e8016f..f2bcc21c7 100644 --- a/src/runtime-rs/crates/hypervisor/src/lib.rs +++ b/src/runtime-rs/crates/hypervisor/src/lib.rs @@ -27,6 +27,16 @@ use kata_types::config::hypervisor::Hypervisor as HypervisorConfig; // Config which driver to use as vm root dev const VM_ROOTFS_DRIVER_BLK: &str = "virtio-blk"; const VM_ROOTFS_DRIVER_PMEM: &str = "virtio-pmem"; + +//Configure the root corresponding to the driver +const VM_ROOTFS_ROOT_BLK: &str = "/dev/vda1"; +const VM_ROOTFS_ROOT_PMEM: &str = "/dev/pmem0p1"; + +// Config which filesystem to use as rootfs type +const VM_ROOTFS_FILESYSTEM_EXT4: &str = "ext4"; +const VM_ROOTFS_FILESYSTEM_XFS: &str = "xfs"; +const VM_ROOTFS_FILESYSTEM_EROFS: &str = "erofs"; + // before using hugepages for VM, we need to mount hugetlbfs // /dev/hugepages will be the mount point // mkdir -p /dev/hugepages diff --git a/src/runtime/Makefile b/src/runtime/Makefile index 64e034069..99dde7e2b 100644 --- a/src/runtime/Makefile +++ b/src/runtime/Makefile @@ -112,6 +112,12 @@ KERNELDIR := $(PKGDATADIR) IMAGEPATH := $(PKGDATADIR)/$(IMAGENAME) INITRDPATH := $(PKGDATADIR)/$(INITRDNAME) + +ROOTFSTYPE_EXT4 := \"ext4\" +ROOTFSTYPE_XFS := \"xfs\" +ROOTFSTYPE_EROFS := \"erofs\" +DEFROOTFSTYPE := $(ROOTFSTYPE_EXT4) + FIRMWAREPATH := FIRMWAREVOLUMEPATH := @@ -412,6 +418,7 @@ USER_VARS += IMAGENAME USER_VARS += IMAGEPATH USER_VARS += INITRDNAME USER_VARS += INITRDPATH +USER_VARS += DEFROOTFSTYPE USER_VARS += MACHINETYPE USER_VARS += KERNELDIR USER_VARS += KERNELTYPE diff --git a/src/runtime/cmd/kata-runtime/kata-env_test.go b/src/runtime/cmd/kata-runtime/kata-env_test.go index 96847dc13..321bc507b 100644 --- a/src/runtime/cmd/kata-runtime/kata-env_test.go +++ b/src/runtime/cmd/kata-runtime/kata-env_test.go @@ -78,6 +78,7 @@ func makeRuntimeConfig(prefixDir string) (configFile string, config oci.RuntimeC hypervisorPath := filepath.Join(prefixDir, "hypervisor") kernelPath := filepath.Join(prefixDir, "kernel") imagePath := filepath.Join(prefixDir, "image") + rootfsType := "ext4" kernelParams := "foo=bar xyz" machineType := "machineType" disableBlock := true @@ -119,6 +120,7 @@ func makeRuntimeConfig(prefixDir string) (configFile string, config oci.RuntimeC HypervisorPath: hypervisorPath, KernelPath: kernelPath, ImagePath: imagePath, + RootfsType: rootfsType, KernelParams: kernelParams, MachineType: machineType, LogPath: logPath, diff --git a/src/runtime/config/configuration-acrn.toml.in b/src/runtime/config/configuration-acrn.toml.in index 5f1368ce8..2d2b7065e 100644 --- a/src/runtime/config/configuration-acrn.toml.in +++ b/src/runtime/config/configuration-acrn.toml.in @@ -17,6 +17,12 @@ ctlpath = "@ACRNCTLPATH@" kernel = "@KERNELPATH_ACRN@" image = "@IMAGEPATH@" +# rootfs filesystem type: +# - ext4 (default) +# - xfs +# - erofs +rootfs_type=@DEFROOTFSTYPE@ + # List of valid annotation names for the hypervisor # Each member of the list is a regular expression, which is the base name # of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path" diff --git a/src/runtime/config/configuration-clh.toml.in b/src/runtime/config/configuration-clh.toml.in index fbf838572..df7cc7ac5 100644 --- a/src/runtime/config/configuration-clh.toml.in +++ b/src/runtime/config/configuration-clh.toml.in @@ -16,6 +16,12 @@ path = "@CLHPATH@" kernel = "@KERNELPATH_CLH@" image = "@IMAGEPATH@" +# rootfs filesystem type: +# - ext4 (default) +# - xfs +# - erofs +rootfs_type=@DEFROOTFSTYPE@ + # Enable confidential guest support. # Toggling that setting may trigger different hardware features, ranging # from memory encryption to both memory and CPU-state encryption and integrity. diff --git a/src/runtime/config/configuration-fc.toml.in b/src/runtime/config/configuration-fc.toml.in index b7f349c0d..10dc17700 100644 --- a/src/runtime/config/configuration-fc.toml.in +++ b/src/runtime/config/configuration-fc.toml.in @@ -16,6 +16,12 @@ path = "@FCPATH@" kernel = "@KERNELPATH_FC@" image = "@IMAGEPATH@" +# rootfs filesystem type: +# - ext4 (default) +# - xfs +# - erofs +rootfs_type=@DEFROOTFSTYPE@ + # List of valid annotation names for the hypervisor # Each member of the list is a regular expression, which is the base name # of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path" diff --git a/src/runtime/config/configuration-qemu.toml.in b/src/runtime/config/configuration-qemu.toml.in index 4aced5975..b2a23047e 100644 --- a/src/runtime/config/configuration-qemu.toml.in +++ b/src/runtime/config/configuration-qemu.toml.in @@ -18,6 +18,12 @@ image = "@IMAGEPATH@" # initrd = "@INITRDPATH@" machine_type = "@MACHINETYPE@" +# rootfs filesystem type: +# - ext4 (default) +# - xfs +# - erofs +rootfs_type=@DEFROOTFSTYPE@ + # Enable confidential guest support. # Toggling that setting may trigger different hardware features, ranging # from memory encryption to both memory and CPU-state encryption and integrity. diff --git a/src/runtime/pkg/containerd-shim-v2/create_test.go b/src/runtime/pkg/containerd-shim-v2/create_test.go index eecc19968..ccad5ceea 100644 --- a/src/runtime/pkg/containerd-shim-v2/create_test.go +++ b/src/runtime/pkg/containerd-shim-v2/create_test.go @@ -320,6 +320,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config string, err err kernelPath := path.Join(dir, "kernel") kernelParams := "foo=bar xyz" imagePath := path.Join(dir, "image") + rootfsType := "ext4" logDir := path.Join(dir, "logs") logPath := path.Join(logDir, "runtime.log") machineType := "machineType" @@ -337,6 +338,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config string, err err HypervisorPath: hypervisorPath, KernelPath: kernelPath, ImagePath: imagePath, + RootfsType: rootfsType, KernelParams: kernelParams, MachineType: machineType, LogPath: logPath, diff --git a/src/runtime/pkg/katatestutils/utils.go b/src/runtime/pkg/katatestutils/utils.go index 33a5d1836..4e3a784a2 100644 --- a/src/runtime/pkg/katatestutils/utils.go +++ b/src/runtime/pkg/katatestutils/utils.go @@ -211,6 +211,7 @@ type RuntimeConfigOptions struct { DefaultGuestHookPath string KernelPath string ImagePath string + RootfsType string KernelParams string MachineType string LogPath string @@ -307,6 +308,7 @@ func MakeRuntimeConfigFileData(config RuntimeConfigOptions) string { block_device_aio = "` + config.BlockDeviceAIO + `" kernel_params = "` + config.KernelParams + `" image = "` + config.ImagePath + `" + rootfs_type = "` + config.RootfsType + `" machine_type = "` + config.MachineType + `" default_vcpus = ` + strconv.FormatUint(uint64(config.DefaultVCPUCount), 10) + ` default_maxvcpus = ` + strconv.FormatUint(uint64(config.DefaultMaxVCPUCount), 10) + ` diff --git a/src/runtime/pkg/katautils/config-settings.go.in b/src/runtime/pkg/katautils/config-settings.go.in index e83500343..f56111771 100644 --- a/src/runtime/pkg/katautils/config-settings.go.in +++ b/src/runtime/pkg/katautils/config-settings.go.in @@ -44,6 +44,7 @@ var defaultJailerPath = "/usr/bin/jailer" var defaultImagePath = "/usr/share/kata-containers/kata-containers.img" var defaultKernelPath = "/usr/share/kata-containers/vmlinuz.container" var defaultInitrdPath = "/usr/share/kata-containers/kata-containers-initrd.img" +var defaultRootfsType = "ext4" var defaultFirmwarePath = "" var defaultFirmwareVolumePath = "" var defaultMachineAccelerators = "" diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index 7a1f57ac4..bb92ca718 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -83,6 +83,7 @@ type hypervisor struct { CtlPath string `toml:"ctlpath"` Initrd string `toml:"initrd"` Image string `toml:"image"` + RootfsType string `toml:"rootfs_type"` Firmware string `toml:"firmware"` FirmwareVolume string `toml:"firmware_volume"` MachineAccelerators string `toml:"machine_accelerators"` @@ -260,6 +261,16 @@ func (h hypervisor) image() (string, error) { return ResolvePath(p) } +func (h hypervisor) rootfsType() (string, error) { + p := h.RootfsType + + if p == "" { + p = "ext4" + } + + return p, nil +} + func (h hypervisor) firmware() (string, error) { p := h.Firmware @@ -647,6 +658,11 @@ func newFirecrackerHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { return vc.HypervisorConfig{}, err } + rootfsType, err := h.rootfsType() + if err != nil { + return vc.HypervisorConfig{}, err + } + firmware, err := h.firmware() if err != nil { return vc.HypervisorConfig{}, err @@ -670,6 +686,7 @@ func newFirecrackerHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { KernelPath: kernel, InitrdPath: initrd, ImagePath: image, + RootfsType: rootfsType, FirmwarePath: firmware, KernelParams: vc.DeserializeParams(strings.Fields(kernelParams)), NumVCPUs: h.defaultVCPUs(), @@ -717,6 +734,11 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { return vc.HypervisorConfig{}, err } + rootfsType, err := h.rootfsType() + if err != nil { + return vc.HypervisorConfig{}, err + } + pflashes, err := h.PFlash() if err != nil { return vc.HypervisorConfig{}, err @@ -778,6 +800,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { KernelPath: kernel, InitrdPath: initrd, ImagePath: image, + RootfsType: rootfsType, FirmwarePath: firmware, FirmwareVolumePath: firmwareVolume, PFlash: pflashes, @@ -868,6 +891,11 @@ func newAcrnHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { errors.New("image must be defined in the configuration file") } + rootfsType, err := h.rootfsType() + if err != nil { + return vc.HypervisorConfig{}, err + } + firmware, err := h.firmware() if err != nil { return vc.HypervisorConfig{}, err @@ -885,6 +913,7 @@ func newAcrnHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { HypervisorPathList: h.HypervisorPathList, KernelPath: kernel, ImagePath: image, + RootfsType: rootfsType, HypervisorCtlPath: hypervisorctl, HypervisorCtlPathList: h.CtlPathList, FirmwarePath: firmware, @@ -935,6 +964,11 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { errors.New("image or initrd must be defined in the configuration file") } + rootfsType, err := h.rootfsType() + if err != nil { + return vc.HypervisorConfig{}, err + } + firmware, err := h.firmware() if err != nil { return vc.HypervisorConfig{}, err @@ -969,6 +1003,7 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { KernelPath: kernel, InitrdPath: initrd, ImagePath: image, + RootfsType: rootfsType, FirmwarePath: firmware, MachineAccelerators: machineAccelerators, KernelParams: vc.DeserializeParams(strings.Fields(kernelParams)), @@ -1028,15 +1063,23 @@ func newDragonballHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { if err != nil { return vc.HypervisorConfig{}, err } + image, err := h.image() if err != nil { return vc.HypervisorConfig{}, err } + + rootfsType, err := h.rootfsType() + if err != nil { + return vc.HypervisorConfig{}, err + } + kernelParams := h.kernelParams() return vc.HypervisorConfig{ KernelPath: kernel, ImagePath: image, + RootfsType: rootfsType, KernelParams: vc.DeserializeParams(strings.Fields(kernelParams)), NumVCPUs: h.defaultVCPUs(), DefaultMaxVCPUs: h.defaultMaxVCPUs(), @@ -1195,6 +1238,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig { KernelPath: defaultKernelPath, ImagePath: defaultImagePath, InitrdPath: defaultInitrdPath, + RootfsType: defaultRootfsType, FirmwarePath: defaultFirmwarePath, FirmwareVolumePath: defaultFirmwareVolumePath, MachineAccelerators: defaultMachineAccelerators, diff --git a/src/runtime/pkg/katautils/config_test.go b/src/runtime/pkg/katautils/config_test.go index 156c312cf..d958fa1a3 100644 --- a/src/runtime/pkg/katautils/config_test.go +++ b/src/runtime/pkg/katautils/config_test.go @@ -74,6 +74,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf kernelPath := path.Join(dir, "kernel") kernelParams := "foo=bar xyz" imagePath := path.Join(dir, "image") + rootfsType := "ext4" logDir := path.Join(dir, "logs") logPath := path.Join(logDir, "runtime.log") machineType := "machineType" @@ -94,6 +95,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf HypervisorPath: hypervisorPath, KernelPath: kernelPath, ImagePath: imagePath, + RootfsType: rootfsType, KernelParams: kernelParams, MachineType: machineType, LogPath: logPath, @@ -153,6 +155,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf HypervisorPath: hypervisorPath, KernelPath: kernelPath, ImagePath: imagePath, + RootfsType: rootfsType, KernelParams: vc.DeserializeParams(strings.Fields(kernelParams)), HypervisorMachineType: machineType, NumVCPUs: defaultVCPUCount, @@ -542,6 +545,7 @@ func TestMinimalRuntimeConfig(t *testing.T) { KernelPath: defaultKernelPath, ImagePath: defaultImagePath, InitrdPath: defaultInitrdPath, + RootfsType: defaultRootfsType, HypervisorMachineType: defaultMachineType, NumVCPUs: defaultVCPUCount, DefaultMaxVCPUs: defaultMaxVCPUCount, diff --git a/src/runtime/virtcontainers/acrn.go b/src/runtime/virtcontainers/acrn.go index 3863000a6..35c71a6d6 100644 --- a/src/runtime/virtcontainers/acrn.go +++ b/src/runtime/virtcontainers/acrn.go @@ -255,7 +255,11 @@ func (a *Acrn) setup(ctx context.Context, id string, hypervisorConfig *Hyperviso } a.id = id - a.arch = newAcrnArch(a.config) + var err error + a.arch, err = newAcrnArch(a.config) + if err != nil { + return err + } return nil } diff --git a/src/runtime/virtcontainers/acrn_arch_base.go b/src/runtime/virtcontainers/acrn_arch_base.go index 8d8c42242..77fb8e9e5 100644 --- a/src/runtime/virtcontainers/acrn_arch_base.go +++ b/src/runtime/virtcontainers/acrn_arch_base.go @@ -64,7 +64,7 @@ type acrnArch interface { appendBlockDevice(devices []Device, drive config.BlockDrive) []Device // handleImagePath handles the Hypervisor Config image path - handleImagePath(config HypervisorConfig) + handleImagePath(config HypervisorConfig) error } type acrnArchBase struct { @@ -314,7 +314,7 @@ func MaxAcrnVCPUs() uint32 { return uint32(8) } -func newAcrnArch(config HypervisorConfig) acrnArch { +func newAcrnArch(config HypervisorConfig) (acrnArch, error) { a := &acrnArchBase{ path: acrnPath, ctlpath: acrnctlPath, @@ -323,8 +323,11 @@ func newAcrnArch(config HypervisorConfig) acrnArch { kernelParams: acrnKernelParams, } - a.handleImagePath(config) - return a + if err := a.handleImagePath(config); err != nil { + return nil, err + } + + return a, nil } func (a *acrnArchBase) acrnPath() (string, error) { @@ -788,10 +791,11 @@ func (a *acrnArchBase) appendBlockDevice(devices []Device, drive config.BlockDri return devices } -func (a *acrnArchBase) handleImagePath(config HypervisorConfig) { +func (a *acrnArchBase) handleImagePath(config HypervisorConfig) error { if config.ImagePath != "" { a.kernelParams = append(a.kernelParams, acrnKernelRootParams...) a.kernelParamsNonDebug = append(a.kernelParamsNonDebug, acrnKernelParamsSystemdNonDebug...) a.kernelParamsDebug = append(a.kernelParamsDebug, acrnKernelParamsSystemdDebug...) } + return nil } diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index f91136b25..dffd4213a 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -503,10 +503,9 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net // Set initial amount of cpu's for the virtual machine clh.vmconfig.Cpus = chclient.NewCpusConfig(int32(clh.config.NumVCPUs), int32(clh.config.DefaultMaxVCPUs)) - // First take the default parameters defined by this driver - params := commonNvdimmKernelRootParams - if clh.config.ConfidentialGuest { - params = commonVirtioblkKernelRootParams + params, err := GetKernelRootParams(hypervisorConfig.RootfsType, clh.config.ConfidentialGuest, false) + if err != nil { + return err } params = append(params, clhKernelParams...) diff --git a/src/runtime/virtcontainers/clh_test.go b/src/runtime/virtcontainers/clh_test.go index 27e89d7ec..20e6935d8 100644 --- a/src/runtime/virtcontainers/clh_test.go +++ b/src/runtime/virtcontainers/clh_test.go @@ -53,6 +53,7 @@ func newClhConfig() (HypervisorConfig, error) { return HypervisorConfig{ KernelPath: testClhKernelPath, ImagePath: testClhImagePath, + RootfsType: string(EXT4), HypervisorPath: testClhPath, NumVCPUs: defaultVCPUs, BlockDeviceDriver: config.VirtioBlock, diff --git a/src/runtime/virtcontainers/fc.go b/src/runtime/virtcontainers/fc.go index 4fb1dddfe..520d73eaa 100644 --- a/src/runtime/virtcontainers/fc.go +++ b/src/runtime/virtcontainers/fc.go @@ -89,7 +89,7 @@ const ( // Specify the minimum version of firecracker supported var fcMinSupportedVersion = semver.MustParse("0.21.1") -var fcKernelParams = append(commonVirtioblkKernelRootParams, []Param{ +var fcKernelParams = []Param{ // The boot source is the first partition of the first block device added {"pci", "off"}, {"reboot", "k"}, @@ -101,7 +101,7 @@ var fcKernelParams = append(commonVirtioblkKernelRootParams, []Param{ // Firecracker doesn't support ACPI // Fix kernel error "ACPI BIOS Error (bug)" {"acpi", "off"}, -}...) +} func (s vmmState) String() string { switch s { @@ -700,6 +700,11 @@ func (fc *firecracker) fcInitConfiguration(ctx context.Context) error { return err } + params, err := GetKernelRootParams(fc.config.RootfsType, true, false) + if err != nil { + return err + } + fcKernelParams = append(params, fcKernelParams...) if fc.config.Debug { fcKernelParams = append(fcKernelParams, Param{"console", "ttyS0"}) } else { diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index 3ea7a83a4..b36a34dde 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -91,25 +91,74 @@ var ( // cores. var defaultMaxVCPUs = govmm.MaxVCPUs() -// agnostic list of kernel root parameters for NVDIMM -var commonNvdimmKernelRootParams = []Param{ //nolint: unused, deadcode, varcheck - {"root", "/dev/pmem0p1"}, - {"rootflags", "dax,data=ordered,errors=remount-ro ro"}, - {"rootfstype", "ext4"}, -} +// RootfsDriver describes a rootfs driver. +type RootfsDriver string -// agnostic list of kernel root parameters for NVDIMM -var commonNvdimmNoDAXKernelRootParams = []Param{ //nolint: unused, deadcode, varcheck - {"root", "/dev/pmem0p1"}, - {"rootflags", "data=ordered,errors=remount-ro ro"}, - {"rootfstype", "ext4"}, -} +const ( + // VirtioBlk is the Virtio-Blk rootfs driver. + VirtioBlk RootfsDriver = "/dev/vda1" -// agnostic list of kernel root parameters for virtio-blk -var commonVirtioblkKernelRootParams = []Param{ //nolint: unused, deadcode, varcheck - {"root", "/dev/vda1"}, - {"rootflags", "data=ordered,errors=remount-ro ro"}, - {"rootfstype", "ext4"}, + // Nvdimm is the Nvdimm rootfs driver. + Nvdimm RootfsType = "/dev/pmem0p1" +) + +// RootfsType describes a rootfs type. +type RootfsType string + +const ( + // EXT4 is the ext4 filesystem. + EXT4 RootfsType = "ext4" + + // XFS is the xfs filesystem. + XFS RootfsType = "xfs" + + // EROFS is the erofs filesystem. + EROFS RootfsType = "erofs" +) + +func GetKernelRootParams(rootfstype string, disableNvdimm bool, dax bool) ([]Param, error) { + var kernelRootParams []Param + + // EXT4 filesystem is used by default. + if rootfstype == "" { + rootfstype = string(EXT4) + } + + if disableNvdimm && dax { + return []Param{}, fmt.Errorf("Virtio-Blk does not support DAX") + } + + if disableNvdimm { + // Virtio-Blk + kernelRootParams = append(kernelRootParams, Param{"root", string(VirtioBlk)}) + } else { + // Nvdimm + kernelRootParams = append(kernelRootParams, Param{"root", string(Nvdimm)}) + } + + switch RootfsType(rootfstype) { + case EROFS: + if dax { + kernelRootParams = append(kernelRootParams, Param{"rootflags", "dax ro"}) + } else { + kernelRootParams = append(kernelRootParams, Param{"rootflags", "ro"}) + } + case XFS: + fallthrough + // EXT4 filesystem is used by default. + case EXT4: + if dax { + kernelRootParams = append(kernelRootParams, Param{"rootflags", "dax,data=ordered,errors=remount-ro ro"}) + } else { + kernelRootParams = append(kernelRootParams, Param{"rootflags", "data=ordered,errors=remount-ro ro"}) + } + default: + return []Param{}, fmt.Errorf("unsupported rootfs type") + } + + kernelRootParams = append(kernelRootParams, Param{"rootfstype", rootfstype}) + + return kernelRootParams, nil } // DeviceType describes a virtualized device type. @@ -273,6 +322,9 @@ type HypervisorConfig struct { // ImagePath and InitrdPath cannot be set at the same time. InitrdPath string + // RootfsType is filesystem type of rootfs. + RootfsType string + // FirmwarePath is the bios host path FirmwarePath string diff --git a/src/runtime/virtcontainers/hypervisor_test.go b/src/runtime/virtcontainers/hypervisor_test.go index e540aea48..d67a38698 100644 --- a/src/runtime/virtcontainers/hypervisor_test.go +++ b/src/runtime/virtcontainers/hypervisor_test.go @@ -7,13 +7,167 @@ package virtcontainers import ( "fmt" - "os" - "testing" - "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" "github.com/stretchr/testify/assert" + "os" + "testing" ) +func TestGetKernelRootParams(t *testing.T) { + assert := assert.New(t) + tests := []struct { + rootfstype string + expected []Param + disableNvdimm bool + dax bool + error bool + }{ + // EXT4 + { + rootfstype: string(EXT4), + expected: []Param{ + {"root", string(Nvdimm)}, + {"rootflags", "data=ordered,errors=remount-ro ro"}, + {"rootfstype", string(EXT4)}, + }, + disableNvdimm: false, + dax: false, + error: false, + }, + { + rootfstype: string(EXT4), + expected: []Param{ + {"root", string(Nvdimm)}, + {"rootflags", "dax,data=ordered,errors=remount-ro ro"}, + {"rootfstype", string(EXT4)}, + }, + disableNvdimm: false, + dax: true, + error: false, + }, + { + rootfstype: string(EXT4), + expected: []Param{ + {"root", string(VirtioBlk)}, + {"rootflags", "data=ordered,errors=remount-ro ro"}, + {"rootfstype", string(EXT4)}, + }, + disableNvdimm: true, + dax: false, + error: false, + }, + + // XFS + { + rootfstype: string(XFS), + expected: []Param{ + {"root", string(Nvdimm)}, + {"rootflags", "data=ordered,errors=remount-ro ro"}, + {"rootfstype", string(XFS)}, + }, + disableNvdimm: false, + dax: false, + error: false, + }, + { + rootfstype: string(XFS), + expected: []Param{ + {"root", string(Nvdimm)}, + {"rootflags", "dax,data=ordered,errors=remount-ro ro"}, + {"rootfstype", string(XFS)}, + }, + disableNvdimm: false, + dax: true, + error: false, + }, + { + rootfstype: string(XFS), + expected: []Param{ + {"root", string(VirtioBlk)}, + {"rootflags", "data=ordered,errors=remount-ro ro"}, + {"rootfstype", string(XFS)}, + }, + disableNvdimm: true, + dax: false, + error: false, + }, + + // EROFS + { + rootfstype: string(EROFS), + expected: []Param{ + {"root", string(Nvdimm)}, + {"rootflags", "ro"}, + {"rootfstype", string(EROFS)}, + }, + disableNvdimm: false, + dax: false, + error: false, + }, + { + rootfstype: string(EROFS), + expected: []Param{ + {"root", string(Nvdimm)}, + {"rootflags", "dax ro"}, + {"rootfstype", string(EROFS)}, + }, + disableNvdimm: false, + dax: true, + error: false, + }, + { + rootfstype: string(EROFS), + expected: []Param{ + {"root", string(VirtioBlk)}, + {"rootflags", "ro"}, + {"rootfstype", string(EROFS)}, + }, + disableNvdimm: true, + dax: false, + error: false, + }, + + // Unsupported rootfs type + { + rootfstype: "foo", + expected: []Param{ + {"root", string(VirtioBlk)}, + {"rootflags", "data=ordered,errors=remount-ro ro"}, + {"rootfstype", string(EXT4)}, + }, + disableNvdimm: false, + dax: false, + error: true, + }, + + // Nvdimm does not support DAX + { + rootfstype: string(EXT4), + expected: []Param{ + {"root", string(VirtioBlk)}, + {"rootflags", "dax,data=ordered,errors=remount-ro ro"}, + {"rootfstype", string(EXT4)}, + }, + disableNvdimm: true, + dax: true, + error: true, + }, + } + + for _, t := range tests { + kernelRootParams, err := GetKernelRootParams(t.rootfstype, t.disableNvdimm, t.dax) + if t.error { + assert.Error(err) + continue + } else { + assert.NoError(err) + } + assert.Equal(t.expected, kernelRootParams, + "Invalid parameters rootfstype: %v, disableNvdimm: %v, dax: %v, "+ + "unable to get kernel root params", t.rootfstype, t.disableNvdimm, t.dax) + } +} + func testSetHypervisorType(t *testing.T, value string, expected HypervisorType) { var hypervisorType HypervisorType assert := assert.New(t) diff --git a/src/runtime/virtcontainers/qemu_amd64.go b/src/runtime/virtcontainers/qemu_amd64.go index 124fd4534..29aa45375 100644 --- a/src/runtime/virtcontainers/qemu_amd64.go +++ b/src/runtime/virtcontainers/qemu_amd64.go @@ -148,7 +148,9 @@ func newQemuArch(config HypervisorConfig) (qemuArch, error) { q.qemuMachine.Options += "sgx-epc.0.memdev=epc0,sgx-epc.0.node=0" } - q.handleImagePath(config) + if err := q.handleImagePath(config); err != nil { + return nil, err + } return q, nil } diff --git a/src/runtime/virtcontainers/qemu_arch_base.go b/src/runtime/virtcontainers/qemu_arch_base.go index e16c285ab..5a03b659f 100644 --- a/src/runtime/virtcontainers/qemu_arch_base.go +++ b/src/runtime/virtcontainers/qemu_arch_base.go @@ -129,7 +129,7 @@ type qemuArch interface { setPFlash([]string) // handleImagePath handles the Hypervisor Config image path - handleImagePath(config HypervisorConfig) + handleImagePath(config HypervisorConfig) error // supportGuestMemoryHotplug returns if the guest supports memory hotplug supportGuestMemoryHotplug() bool @@ -702,23 +702,27 @@ func (q *qemuArchBase) appendRNGDevice(_ context.Context, devices []govmmQemu.De return devices, nil } -func (q *qemuArchBase) handleImagePath(config HypervisorConfig) { +func (q *qemuArchBase) handleImagePath(config HypervisorConfig) error { if config.ImagePath != "" { - kernelRootParams := commonVirtioblkKernelRootParams + kernelRootParams, err := GetKernelRootParams(config.RootfsType, q.disableNvdimm, false) + if err != nil { + return err + } if !q.disableNvdimm { q.qemuMachine.Options = strings.Join([]string{ q.qemuMachine.Options, qemuNvdimmOption, }, ",") - if q.dax { - kernelRootParams = commonNvdimmKernelRootParams - } else { - kernelRootParams = commonNvdimmNoDAXKernelRootParams + kernelRootParams, err = GetKernelRootParams(config.RootfsType, q.disableNvdimm, q.dax) + if err != nil { + return err } } q.kernelParams = append(q.kernelParams, kernelRootParams...) q.kernelParamsNonDebug = append(q.kernelParamsNonDebug, kernelParamsSystemdNonDebug...) q.kernelParamsDebug = append(q.kernelParamsDebug, kernelParamsSystemdDebug...) } + + return nil } func (q *qemuArchBase) supportGuestMemoryHotplug() bool { diff --git a/src/runtime/virtcontainers/qemu_arm64.go b/src/runtime/virtcontainers/qemu_arm64.go index ccf164139..9e05c5452 100644 --- a/src/runtime/virtcontainers/qemu_arm64.go +++ b/src/runtime/virtcontainers/qemu_arm64.go @@ -65,7 +65,9 @@ func newQemuArch(config HypervisorConfig) (qemuArch, error) { }, } - q.handleImagePath(config) + if err := q.handleImagePath(config); err != nil { + return nil, err + } return q, nil } diff --git a/src/runtime/virtcontainers/qemu_ppc64le.go b/src/runtime/virtcontainers/qemu_ppc64le.go index 7f6de2fcd..2d4010fbc 100644 --- a/src/runtime/virtcontainers/qemu_ppc64le.go +++ b/src/runtime/virtcontainers/qemu_ppc64le.go @@ -88,7 +88,9 @@ func newQemuArch(config HypervisorConfig) (qemuArch, error) { } } - q.handleImagePath(config) + if err := q.handleImagePath(config); err != nil { + return nil, err + } q.memoryOffset = config.MemOffset diff --git a/src/runtime/virtcontainers/qemu_s390x.go b/src/runtime/virtcontainers/qemu_s390x.go index 422db2b87..b0c1ede54 100644 --- a/src/runtime/virtcontainers/qemu_s390x.go +++ b/src/runtime/virtcontainers/qemu_s390x.go @@ -83,7 +83,11 @@ func newQemuArch(config HypervisorConfig) (qemuArch, error) { } if config.ImagePath != "" { - q.kernelParams = append(q.kernelParams, commonVirtioblkKernelRootParams...) + kernelParams, err := GetKernelRootParams(config.RootfsType, true, false) + if err != nil { + return nil, err + } + q.kernelParams = append(q.kernelParams, kernelParams...) q.kernelParamsNonDebug = append(q.kernelParamsNonDebug, kernelParamsSystemdNonDebug...) q.kernelParamsDebug = append(q.kernelParamsDebug, kernelParamsSystemdDebug...) } diff --git a/tools/osbuilder/image-builder/image_builder.sh b/tools/osbuilder/image-builder/image_builder.sh index 75b23b176..2ae656b94 100755 --- a/tools/osbuilder/image-builder/image_builder.sh +++ b/tools/osbuilder/image-builder/image_builder.sh @@ -18,6 +18,7 @@ readonly lib_file="${script_dir}/../scripts/lib.sh" readonly ext4_format="ext4" readonly xfs_format="xfs" +readonly erofs_format="erofs" # ext4: percentage of the filesystem which may only be allocated by privileged processes. readonly reserved_blocks_percentage=3 @@ -83,7 +84,7 @@ Options: -h Show this help -o Path to generate image file. ENV: IMAGE -r Free space of the root partition in MB. ENV: ROOT_FREE_SPACE - -f Filesystem type to use, only xfs and ext4 are supported. ENV: FS_TYPE + -f Filesystem type to use, only ext4, xfs and erofs are supported. ENV: FS_TYPE Extra environment variables: AGENT_BIN: Use it to change the expected agent binary name @@ -100,7 +101,6 @@ Extra environment variables: and the rootfs is built with SELINUX=yes. DEFAULT value: "no" - Following diagram shows how the resulting image will look like .-----------.----------.---------------.-----------. @@ -351,12 +351,14 @@ format_loop() { local device="$1" local block_size="$2" local fs_type="$3" + local mount_dir="$4" case "${fs_type}" in "${ext4_format}") mkfs.ext4 -q -F -b "${block_size}" "${device}p1" info "Set filesystem reserved blocks percentage to ${reserved_blocks_percentage}%" tune2fs -m "${reserved_blocks_percentage}" "${device}p1" + return 0 ;; "${xfs_format}") @@ -366,7 +368,8 @@ format_loop() { if mkfs.xfs -m reflink=0 -q -f -b size="${block_size}" "${device}p1" 2>&1 | grep -q "unknown option"; then mkfs.xfs -q -f -b size="${block_size}" "${device}p1" fi - ;; + return 0 + ;; *) error "Unsupported fs type: ${fs_type}" @@ -395,6 +398,55 @@ create_disk() { OK "Partitions created" } +setup_selinux() { + local mount_dir="$1" + local agent_bin="$2" + + if [ "${SELINUX}" == "yes" ]; then + if [ "${AGENT_INIT}" == "yes" ]; then + die "Guest SELinux with the agent init is not supported yet" + fi + + info "Labeling rootfs for SELinux" + selinuxfs_path="${mount_dir}${SELINUXFS}" + mkdir -p "$selinuxfs_path" + if mountpoint $SELINUXFS > /dev/null && \ + chroot "${mount_dir}" command -v restorecon > /dev/null; then + mount -t selinuxfs selinuxfs "$selinuxfs_path" + chroot "${mount_dir}" restorecon -RF -e ${SELINUXFS} / + # TODO: This operation will be removed after the updated container-selinux that + # includes the following commit is released. + # https://github.com/containers/container-selinux/commit/39f83cc74d50bd10ab6be4d0bdd98bc04857469f + # We use chcon as an interim solution until then. + chroot "${mount_dir}" chcon -t container_runtime_exec_t "/usr/bin/${agent_bin}" + umount "${selinuxfs_path}" + else + die "Could not label the rootfs. Make sure that SELinux is enabled on the host \ + and the rootfs is built with SELINUX=yes" + fi + fi +} + +setup_systemd() { + local mount_dir="$1" + + info "Removing unneeded systemd services and sockets" + for u in "${systemd_units[@]}"; do + find "${mount_dir}" -type f \( \ + -name "${u}.service" -o \ + -name "${u}.socket" \) \ + -exec rm -f {} \; + done + + info "Removing unneeded systemd files" + for u in "${systemd_files[@]}"; do + find "${mount_dir}" -type f -name "${u}" -exec rm -f {} \; + done + + info "Creating empty machine-id to allow systemd to bind-mount it" + touch "${mount_dir}/etc/machine-id" +} + create_rootfs_image() { local rootfs="$1" local image="$2" @@ -409,60 +461,26 @@ create_rootfs_image() { die "Could not setup loop device" fi - if ! format_loop "${device}" "${block_size}" "${fs_type}"; then + if ! format_loop "${device}" "${block_size}" "${fs_type}" ""; then die "Could not format loop device: ${device}" fi info "Mounting root partition" - readonly mount_dir=$(mktemp -p ${TMPDIR:-/tmp} -d osbuilder-mount-dir.XXXX) + local mount_dir=$(mktemp -p "${TMPDIR:-/tmp}" -d osbuilder-mount-dir.XXXX) mount "${device}p1" "${mount_dir}" OK "root partition mounted" info "Copying content from rootfs to root partition" cp -a "${rootfs}"/* "${mount_dir}" - if [ "${SELINUX}" == "yes" ]; then - if [ "${AGENT_INIT}" == "yes" ]; then - die "Guest SELinux with the agent init is not supported yet" - fi - - info "Labeling rootfs for SELinux" - selinuxfs_path="${mount_dir}${SELINUXFS}" - mkdir -p $selinuxfs_path - if mountpoint $SELINUXFS > /dev/null && \ - chroot "${mount_dir}" command -v restorecon > /dev/null; then - mount -t selinuxfs selinuxfs $selinuxfs_path - chroot "${mount_dir}" restorecon -RF -e ${SELINUXFS} / - # TODO: This operation will be removed after the updated container-selinux that - # includes the following commit is released. - # https://github.com/containers/container-selinux/commit/39f83cc74d50bd10ab6be4d0bdd98bc04857469f - # We use chcon as an interim solution until then. - chroot "${mount_dir}" chcon -t container_runtime_exec_t "/usr/bin/${agent_bin}" - umount $selinuxfs_path - else - die "Could not label the rootfs. Make sure that SELinux is enabled on the host \ -and the rootfs is built with SELINUX=yes" - fi - fi + info "Setup SELinux" + setup_selinux "${mount_dir}" "${agent_bin}" sync OK "rootfs copied" - info "Removing unneeded systemd services and sockets" - for u in "${systemd_units[@]}"; do - find "${mount_dir}" -type f \( \ - -name "${u}.service" -o \ - -name "${u}.socket" \) \ - -exec rm -f {} \; - done - - info "Removing unneeded systemd files" - for u in "${systemd_files[@]}"; do - find "${mount_dir}" -type f -name "${u}" -exec rm -f {} \; - done - - info "Creating empty machine-id to allow systemd to bind-mount it" - touch "${mount_dir}/etc/machine-id" + info "Setup systemd" + setup_systemd "${mount_dir}" info "Unmounting root partition" umount "${mount_dir}" @@ -473,7 +491,50 @@ and the rootfs is built with SELINUX=yes" fi losetup -d "${device}" - rmdir "${mount_dir}" + rm -rf "${mount_dir}" +} + +create_erofs_rootfs_image() { + local rootfs="$1" + local image="$2" + local block_size="$3" + local agent_bin="$4" + + if [ "$block_size" -ne 4096 ]; then + die "Invalid block size for erofs" + fi + + if ! device="$(setup_loop_device "${image}")"; then + die "Could not setup loop device" + fi + + local mount_dir=$(mktemp -p "${TMPDIR:-/tmp}" -d osbuilder-mount-dir.XXXX) + + info "Copying content from rootfs to root partition" + cp -a "${rootfs}"/* "${mount_dir}" + + info "Setup SELinux" + setup_selinux "${mount_dir}" "${agent_bin}" + + sync + OK "rootfs copied" + + info "Setup systemd" + setup_systemd "${mount_dir}" + + readonly fsimage="$(mktemp)" + mkfs.erofs -Enoinline_data "${fsimage}" "${mount_dir}" + local img_size="$(stat -c"%s" "${fsimage}")" + local img_size_mb="$(((("${img_size}" + 1048576) / 1048576) + 1 + "${rootfs_start}"))" + + create_disk "${image}" "${img_size_mb}" "ext4" "${rootfs_start}" + + dd if="${fsimage}" of="${device}p1" + + losetup -d "${device}" + rm -rf "${mount_dir}" + + return "${img_size_mb}" } set_dax_header() { @@ -566,14 +627,23 @@ main() { die "Invalid rootfs" fi - img_size=$(calculate_img_size "${rootfs}" "${root_free_space}" "${fs_type}" "${block_size}") + if [ "${fs_type}" == 'erofs' ]; then + # mkfs.erofs accepts an src root dir directory as an input + # rather than some device, so no need to guess the device dest size first. + create_erofs_rootfs_image "${rootfs}" "${image}" \ + "${block_size}" "${agent_bin}" + rootfs_img_size=$? + img_size=$((rootfs_img_size + dax_header_sz)) + else + img_size=$(calculate_img_size "${rootfs}" "${root_free_space}" \ + "${fs_type}" "${block_size}") - # the first 2M are for the first MBR + NVDIMM metadata and were already - # consider in calculate_img_size - rootfs_img_size=$((img_size - dax_header_sz)) - create_rootfs_image "${rootfs}" "${image}" "${rootfs_img_size}" \ + # the first 2M are for the first MBR + NVDIMM metadata and were already + # consider in calculate_img_size + rootfs_img_size=$((img_size - dax_header_sz)) + create_rootfs_image "${rootfs}" "${image}" "${rootfs_img_size}" \ "${fs_type}" "${block_size}" "${agent_bin}" - + fi # insert at the beginning of the image the MBR + DAX header set_dax_header "${image}" "${img_size}" "${fs_type}" "${nsdax_bin}" } diff --git a/tools/packaging/kernel/configs/fragments/common/fs.conf b/tools/packaging/kernel/configs/fragments/common/fs.conf index ae4e3255f..c3be9f925 100644 --- a/tools/packaging/kernel/configs/fragments/common/fs.conf +++ b/tools/packaging/kernel/configs/fragments/common/fs.conf @@ -36,6 +36,11 @@ CONFIG_AUTOFS_FS=y CONFIG_TMPFS=y CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y +CONFIG_MISC_FILESYSTEMS=y +CONFIG_EROFS_FS=y +CONFIG_EROFS_FS_XATTR=y +CONFIG_EROFS_FS_ZIP=y +CONFIG_EROFS_FS_SECURITY=y CONFIG_SIGNALFD=y CONFIG_TIMERFD=y CONFIG_EPOLL=y diff --git a/tools/packaging/kernel/kata_config_version b/tools/packaging/kernel/kata_config_version index 3ad5abd03..29d6383b5 100644 --- a/tools/packaging/kernel/kata_config_version +++ b/tools/packaging/kernel/kata_config_version @@ -1 +1 @@ -99 +100