From 1e531b44dc469e77744c9146d3ebef8312c1f8cf Mon Sep 17 00:00:00 2001 From: Zhongtao Hu Date: Sun, 29 Jan 2023 11:50:04 +0800 Subject: [PATCH 01/52] runtime:fix stat uds path os.Stat("unix:///run/vc/sbs/sid/shim-monitor.sock") will fail, should be os.Stat("/run/vc/sbs/sid/shim-monitor.sock") Fixes:#6148 Signed-off-by: Zhongtao Hu --- src/runtime/pkg/containerd-shim-v2/shim_management.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/runtime/pkg/containerd-shim-v2/shim_management.go b/src/runtime/pkg/containerd-shim-v2/shim_management.go index 7b59caef3..74c750918 100644 --- a/src/runtime/pkg/containerd-shim-v2/shim_management.go +++ b/src/runtime/pkg/containerd-shim-v2/shim_management.go @@ -315,11 +315,11 @@ func GetSandboxesStoragePathRust() string { // SocketAddress returns the address of the unix domain socket for communicating with the // shim management endpoint func SocketAddress(id string) string { - socketAddress := fmt.Sprintf("unix://%s", filepath.Join(string(filepath.Separator), GetSandboxesStoragePath(), id, "shim-monitor.sock")) - _, err := os.Stat(socketAddress) - // if the path not exist, check the rust runtime path - if err != nil { + // get the go runtime uds path + socketPath := filepath.Join(string(filepath.Separator), GetSandboxesStoragePath(), id, "shim-monitor.sock") + // if the path not exist, use the rust runtime uds path instead + if _, err := os.Stat(socketPath); err != nil { return fmt.Sprintf("unix://%s", filepath.Join(string(filepath.Separator), GetSandboxesStoragePathRust(), id, "shim-monitor.sock")) } - return socketAddress + return fmt.Sprintf("unix://%s", socketPath) } From 4e2db96ef76d48de3bd8cff119d27b64a478f75d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 27 Jan 2023 12:45:19 +0100 Subject: [PATCH 02/52] runtime-rs: Don't try to build on Power MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As done for s390x, let's just skip the runtime-rs build for Power. Fixes: #6142 Signed-off-by: Fabiano Fidêncio --- src/runtime-rs/Makefile | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/runtime-rs/Makefile b/src/runtime-rs/Makefile index ffe7a934b..23e963f78 100644 --- a/src/runtime-rs/Makefile +++ b/src/runtime-rs/Makefile @@ -28,6 +28,13 @@ default: test: @echo "s390x not support currently" exit 0 +else ifeq ($(ARCH), powerpc64le) +default: + @echo "PowerPC 64 LE is not currently supported" + exit 0 +test: + @echo "PowerPC 64 LE is not currently supported" + exit 0 else ##TARGET default: build code default: runtime show-header From c0713553597660be7d999446a815a05cf831c0f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 27 Jan 2023 12:49:19 +0100 Subject: [PATCH 03/52] runtime-rs: Improve s390x error message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Nothing much to add, let's just make the message more clear. Signed-off-by: Fabiano Fidêncio --- src/runtime-rs/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/runtime-rs/Makefile b/src/runtime-rs/Makefile index 23e963f78..ce0ba74cc 100644 --- a/src/runtime-rs/Makefile +++ b/src/runtime-rs/Makefile @@ -23,10 +23,10 @@ ARCH_FILE = $(ARCH_DIR)/$(ARCH)$(ARCH_FILE_SUFFIX) ifeq ($(ARCH), s390x) default: - @echo "s390x not support currently" + @echo "s390x is not currently supported" exit 0 test: - @echo "s390x not support currently" + @echo "s390x is not currently supported" exit 0 else ifeq ($(ARCH), powerpc64le) default: From 3a63e3c1f7c9cb24554c0510d61029dabd6ab512 Mon Sep 17 00:00:00 2001 From: Archana Shinde Date: Mon, 30 Jan 2023 22:33:34 -0800 Subject: [PATCH 04/52] cni: Update cni plugins version to 1.2.0 A new release was made for the cni plugins. Use the new version for the CI. Fixes: #6169 Signed-off-by: Archana Shinde --- versions.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/versions.yaml b/versions.yaml index f6bd320a2..f79eff770 100644 --- a/versions.yaml +++ b/versions.yaml @@ -193,7 +193,7 @@ externals: cni-plugins: description: "CNI network plugins" url: "https://github.com/containernetworking/plugins" - version: "v1.1.1" + version: "v1.2.0" conmon: description: "An OCI container runtime monitor" From 56f0a27fef9df38ac60e24b2411da7833186c8e3 Mon Sep 17 00:00:00 2001 From: Hyounggyu Choi Date: Mon, 30 Jan 2023 19:23:53 +0100 Subject: [PATCH 05/52] kernel: Add console kernel config for s390 This config is to update console kernel config for s390. Fixes: #6162 Signed-off-by: Hyounggyu Choi --- tools/packaging/kernel/configs/fragments/s390/console.conf | 2 ++ tools/packaging/kernel/kata_config_version | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/packaging/kernel/configs/fragments/s390/console.conf b/tools/packaging/kernel/configs/fragments/s390/console.conf index 2bc4db09b..6ff749efc 100644 --- a/tools/packaging/kernel/configs/fragments/s390/console.conf +++ b/tools/packaging/kernel/configs/fragments/s390/console.conf @@ -6,3 +6,5 @@ CONFIG_TN3270_CONSOLE=y CONFIG_CCW_CONSOLE=y CONFIG_SCLP_TTY=y CONFIG_SCLP_CONSOLE=y +CONFIG_SCLP_VT220_TTY=y +CONFIG_SCLP_VT220_CONSOLE=y diff --git a/tools/packaging/kernel/kata_config_version b/tools/packaging/kernel/kata_config_version index 6529ff889..3ad5abd03 100644 --- a/tools/packaging/kernel/kata_config_version +++ b/tools/packaging/kernel/kata_config_version @@ -1 +1 @@ -98 +99 From e071d9251ff8e97b2e6f21aa7c0f3f8a5380a45f Mon Sep 17 00:00:00 2001 From: yahaa <1477765176@qq.com> Date: Thu, 2 Feb 2023 12:07:59 +0800 Subject: [PATCH 06/52] Typo: change tabs in comment to spaces Fixes: #6150 Signed-off-by: yahaa <1477765176@qq.com> --- .../virtcontainers/pkg/annotations/annotations.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/runtime/virtcontainers/pkg/annotations/annotations.go b/src/runtime/virtcontainers/pkg/annotations/annotations.go index a94878f93..4441b5fd0 100644 --- a/src/runtime/virtcontainers/pkg/annotations/annotations.go +++ b/src/runtime/virtcontainers/pkg/annotations/annotations.go @@ -87,7 +87,7 @@ const ( AssetHashType = kataAnnotationsPrefix + "asset_hash_type" // - // Generic annotations + // Generic annotations // // KernelParams is a sandbox annotation for passing additional guest kernel parameters. @@ -134,7 +134,7 @@ const ( UseLegacySerial = kataAnnotHypervisorPrefix + "use_legacy_serial" // - // CPU Annotations + // CPU Annotations // // DefaultVCPUs is a sandbox annotation for passing the default vcpus assigned for a VM by the hypervisor. @@ -144,7 +144,7 @@ const ( DefaultMaxVCPUs = kataAnnotHypervisorPrefix + "default_max_vcpus" // - // Memory related annotations + // Memory related annotations // // DefaultMemory is a sandbox annotation for the memory assigned for a VM by the hypervisor. @@ -175,7 +175,7 @@ const ( FileBackedMemRootDir = kataAnnotHypervisorPrefix + "file_mem_backend" // - // Shared File System related annotations + // Shared File System related annotations // // Msize9p is a sandbox annotation to specify as the msize for 9p shares @@ -197,7 +197,7 @@ const ( VirtioFSExtraArgs = kataAnnotHypervisorPrefix + "virtio_fs_extra_args" // - // Block Device related annotations + // Block Device related annotations // // BlockDeviceDriver specifies the driver to be used for block device either VirtioSCSI or VirtioBlock From f83115a838005a0156298c69759c65f63e59a901 Mon Sep 17 00:00:00 2001 From: SinghWang Date: Wed, 1 Feb 2023 20:09:05 +0800 Subject: [PATCH 07/52] docs: Fix missing critical steps in how-to-hotplug-memory-arm64.md The key steps in how-to-hotplug-memory-arm64.md are missing, resulting in the kata qemu pod not being created successfully. Fixes: #6105 Signed-off-by: SinghWang --- docs/how-to/how-to-hotplug-memory-arm64.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/how-to/how-to-hotplug-memory-arm64.md b/docs/how-to/how-to-hotplug-memory-arm64.md index 799367ad0..bc138b458 100644 --- a/docs/how-to/how-to-hotplug-memory-arm64.md +++ b/docs/how-to/how-to-hotplug-memory-arm64.md @@ -15,6 +15,18 @@ $ sudo .ci/aarch64/install_rom_aarch64.sh $ popd ``` +## Config KATA QEMU + +After executing the above script, two files will be generated under the directory `/usr/share/kata-containers/` by default, namely `kata-flash0.img` and `kata-flash1.img`. Next we need to change the configuration file of `kata qemu`, which is in `/opt/kata/share/defaults/kata-containers/configuration-qemu.toml` by default, specify in the configuration file to use the UEFI ROM installed above. The above is an example of `kata deploy` installation. For package management installation, please use `kata-runtime env` to find the location of the configuration file. Please refer to the following configuration. + +``` +[hypervisor.qemu] + +# -pflash can add image file to VM. The arguments of it should be in format +# of ["/path/to/flash0.img", "/path/to/flash1.img"] +pflashes = ["/usr/share/kata-containers/kata-flash0.img", "/usr/share/kata-containers/kata-flash1.img"] +``` + ## Run for test Let's test if the memory hotplug is ready for Kata after install the UEFI ROM. Make sure containerd is ready to run Kata before test. From 856ab66871e40ebfb7f1775cfec3138a16eb61f7 Mon Sep 17 00:00:00 2001 From: Amulyam24 Date: Thu, 2 Feb 2023 13:21:28 +0530 Subject: [PATCH 08/52] virtiofsd: fix the build on ppc64le link-self-contained is not supported on ppc64le rust target. Hence, do not pass it while building virtiofsd. Fixes: #6195 Signed-off-by: Amulyam24 --- tools/packaging/static-build/virtiofsd/build-static-virtiofsd.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/packaging/static-build/virtiofsd/build-static-virtiofsd.sh b/tools/packaging/static-build/virtiofsd/build-static-virtiofsd.sh index 4f4f05f0d..6bad74ffb 100755 --- a/tools/packaging/static-build/virtiofsd/build-static-virtiofsd.sh +++ b/tools/packaging/static-build/virtiofsd/build-static-virtiofsd.sh @@ -57,6 +57,7 @@ init_env() { LIBC="gnu" ARCH="powerpc64le" ARCH_LIBC=${ARCH}-linux-${LIBC} + extra_rust_flags="" ;; "s390x") LIBC="gnu" From 3c48f2202cd3753fec1a0a729a52651b48a43aa3 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Wed, 1 Feb 2023 16:32:01 +0100 Subject: [PATCH 09/52] runtime: Improve documentation of appendFDs The cmd.ExtraFiles feature that is used to implement appendFDs takes an array of arbitray file descriptors and internally renumbers them to be consecutive starting from 3, using dup2(). This isn't especially obvious : document it for the sake of clarity. Fixes #6199 Signed-off-by: Greg Kurz --- src/runtime/pkg/govmm/qemu/qemu.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/runtime/pkg/govmm/qemu/qemu.go b/src/runtime/pkg/govmm/qemu/qemu.go index 6932826d7..43707c65e 100644 --- a/src/runtime/pkg/govmm/qemu/qemu.go +++ b/src/runtime/pkg/govmm/qemu/qemu.go @@ -2631,8 +2631,9 @@ type Config struct { qemuParams []string } -// appendFDs append a list of file descriptors to the qemu configuration and -// returns a slice of offset file descriptors that will be seen by the qemu process. +// appendFDs appends a list of arbitrary file descriptors to the qemu configuration and +// returns a slice of consecutive file descriptors that will be seen by the qemu process. +// Please see the comment below for details. func (config *Config) appendFDs(fds []*os.File) []int { var fdInts []int @@ -2644,6 +2645,10 @@ func (config *Config) appendFDs(fds []*os.File) []int { // ExtraFiles specifies additional open files to be inherited by the // new process. It does not include standard input, standard output, or // standard error. If non-nil, entry i becomes file descriptor 3+i. + // This means that arbitrary file descriptors fd0, fd1... fdN passed in + // the array will be presented to the guest as consecutive descriptors + // 3, 4... N+3. The golang library internally relies on dup2() to do + // the renumbering. for i := range fds { fdInts = append(fdInts, oldLen+3+i) } From 57c5e5629bce66da0452b249b35b753636cebb3e Mon Sep 17 00:00:00 2001 From: Chao Wu Date: Mon, 9 Jan 2023 17:03:07 +0800 Subject: [PATCH 10/52] Dragonball: add cpu resize ability Add cpu resize ability upon upcall communication channel. Runtime could use ResizeVcpu VmmAction and pass the desired vCPU number to the Dragonball hypervisor. Dragonball will trigger the device manager service in guest kernel's upcall server to do cpu resize. Fixes: #6008 Signed-off-by: Chao Wu --- src/dragonball/src/api/v1/vmm_action.rs | 59 ++++++- src/dragonball/src/error.rs | 2 +- src/dragonball/src/vcpu/mod.rs | 5 +- src/dragonball/src/vcpu/vcpu_impl.rs | 14 +- src/dragonball/src/vcpu/vcpu_manager.rs | 149 +++++++++--------- src/dragonball/src/vm/mod.rs | 30 +++- .../hypervisor/src/dragonball/vmm_instance.rs | 2 +- 7 files changed, 178 insertions(+), 83 deletions(-) diff --git a/src/dragonball/src/api/v1/vmm_action.rs b/src/dragonball/src/api/v1/vmm_action.rs index 45591e7ed..94b4ca16d 100644 --- a/src/dragonball/src/api/v1/vmm_action.rs +++ b/src/dragonball/src/api/v1/vmm_action.rs @@ -35,6 +35,9 @@ pub use crate::device_manager::virtio_net_dev_mgr::{ #[cfg(feature = "virtio-vsock")] pub use crate::device_manager::vsock_dev_mgr::{VsockDeviceConfigInfo, VsockDeviceError}; +#[cfg(feature = "hotplug")] +pub use crate::vcpu::{VcpuResizeError, VcpuResizeInfo}; + use super::*; /// Wrapper for all errors associated with VMM actions. @@ -44,9 +47,13 @@ pub enum VmmActionError { #[error("the virtual machine instance ID is invalid")] InvalidVMID, + /// VM doesn't exist and can't get VM information. + #[error("VM doesn't exist and can't get VM information")] + VmNotExist, + /// Failed to hotplug, due to Upcall not ready. #[error("Upcall not ready, can't hotplug device.")] - UpcallNotReady, + UpcallServerNotReady, /// The action `ConfigureBootSource` failed either because of bad user input or an internal /// error. @@ -85,6 +92,11 @@ pub enum VmmActionError { /// The action `InsertFsDevice` failed either because of bad user input or an internal error. #[error("virtio-fs device error: {0}")] FsDevice(#[source] FsDeviceError), + + #[cfg(feature = "hotplug")] + /// The action `ResizeVcpu` Failed + #[error("vcpu resize error : {0}")] + ResizeVcpu(#[source] VcpuResizeError), } /// This enum represents the public interface of the VMM. Each action contains various @@ -156,6 +168,10 @@ pub enum VmmAction { #[cfg(feature = "virtio-fs")] /// Update fs rate limiter, after microVM start. UpdateFsDevice(FsDeviceConfigUpdateInfo), + + #[cfg(feature = "hotplug")] + /// Resize Vcpu number in the guest. + ResizeVcpu(VcpuResizeInfo), } /// The enum represents the response sent by the VMM in case of success. The response is either @@ -256,6 +272,8 @@ impl VmmService { VmmAction::UpdateFsDevice(fs_update_cfg) => { self.update_fs_rate_limiters(vmm, fs_update_cfg) } + #[cfg(feature = "hotplug")] + VmmAction::ResizeVcpu(vcpu_resize_cfg) => self.resize_vcpu(vmm, vcpu_resize_cfg), }; debug!("send vmm response: {:?}", response); @@ -462,8 +480,8 @@ impl VmmService { let ctx = vm .create_device_op_context(Some(event_mgr.epoll_manager())) .map_err(|e| { - if let StartMicroVmError::UpcallNotReady = e { - return VmmActionError::UpcallNotReady; + if let StartMicroVmError::UpcallServerNotReady = e { + return VmmActionError::UpcallServerNotReady; } VmmActionError::Block(BlockDeviceError::UpdateNotAllowedPostBoot) })?; @@ -518,8 +536,8 @@ impl VmmService { .map_err(|e| { if let StartMicroVmError::MicroVMAlreadyRunning = e { VmmActionError::VirtioNet(VirtioNetDeviceError::UpdateNotAllowedPostBoot) - } else if let StartMicroVmError::UpcallNotReady = e { - VmmActionError::UpcallNotReady + } else if let StartMicroVmError::UpcallServerNotReady = e { + VmmActionError::UpcallServerNotReady } else { VmmActionError::StartMicroVm(e) } @@ -595,6 +613,37 @@ impl VmmService { .map(|_| VmmData::Empty) .map_err(VmmActionError::FsDevice) } + + #[cfg(feature = "hotplug")] + fn resize_vcpu(&mut self, vmm: &mut Vmm, config: VcpuResizeInfo) -> VmmRequestResult { + if !cfg!(target_arch = "x86_64") { + // TODO: Arm need to support vcpu hotplug. issue: #6010 + warn!("This arch do not support vm resize!"); + return Ok(VmmData::Empty); + } + + if !cfg!(feature = "dbs-upcall") { + warn!("We only support cpu resize through upcall server in the guest kernel now, please enable dbs-upcall feature."); + return Ok(VmmData::Empty); + } + + let vm = vmm.get_vm_mut().ok_or(VmmActionError::VmNotExist)?; + + if !vm.is_vm_initialized() { + return Err(VmmActionError::ResizeVcpu( + VcpuResizeError::UpdateNotAllowedPreBoot, + )); + } + + vm.resize_vcpu(config, None).map_err(|e| { + if let VcpuResizeError::UpcallServerNotReady = e { + return VmmActionError::UpcallServerNotReady; + } + VmmActionError::ResizeVcpu(e) + })?; + + Ok(VmmData::Empty) + } } fn handle_cpu_topology( diff --git a/src/dragonball/src/error.rs b/src/dragonball/src/error.rs index 9ad0b0792..35f092a50 100644 --- a/src/dragonball/src/error.rs +++ b/src/dragonball/src/error.rs @@ -100,7 +100,7 @@ pub enum StartMicroVmError { /// Upcall is not ready #[error("the upcall client is not ready")] - UpcallNotReady, + UpcallServerNotReady, /// Configuration passed in is invalidate. #[error("invalid virtual machine configuration: {0} ")] diff --git a/src/dragonball/src/vcpu/mod.rs b/src/dragonball/src/vcpu/mod.rs index 5b8ed397f..b04baf29f 100644 --- a/src/dragonball/src/vcpu/mod.rs +++ b/src/dragonball/src/vcpu/mod.rs @@ -10,7 +10,10 @@ mod vcpu_manager; #[cfg(target_arch = "x86_64")] use dbs_arch::cpuid::VpmuFeatureLevel; -pub use vcpu_manager::{VcpuManager, VcpuManagerError}; +pub use vcpu_manager::{VcpuManager, VcpuManagerError, VcpuResizeInfo}; + +#[cfg(feature = "hotplug")] +pub use vcpu_manager::VcpuResizeError; /// vcpu config collection pub struct VcpuConfig { diff --git a/src/dragonball/src/vcpu/vcpu_impl.rs b/src/dragonball/src/vcpu/vcpu_impl.rs index f6c1c2d4c..1c21ea38b 100644 --- a/src/dragonball/src/vcpu/vcpu_impl.rs +++ b/src/dragonball/src/vcpu/vcpu_impl.rs @@ -214,10 +214,20 @@ pub enum VcpuResponse { CacheRevalidated, } +#[derive(Debug, PartialEq)] +/// Vcpu Hotplug Result returned from the guest +pub enum VcpuResizeResult { + /// All vCPU hotplug / hot-unplug operations are successful + Success = 0, + /// vCPU hotplug / hot-unplug failed + Failed = 1, +} + /// List of events that the vcpu_state_sender can send. pub enum VcpuStateEvent { - /// (result, response) for hotplug, result 0 means failure, 1 means success. - Hotplug((i32, u32)), + /// (result, response) for hotplug / hot-unplugged. + /// response records how many cpu has successfully being hotplugged / hot-unplugged. + Hotplug((VcpuResizeResult, u32)), } /// Wrapper over vCPU that hides the underlying interactions with the vCPU thread. diff --git a/src/dragonball/src/vcpu/vcpu_manager.rs b/src/dragonball/src/vcpu/vcpu_manager.rs index 2b076cd5b..383f1f0a7 100644 --- a/src/dragonball/src/vcpu/vcpu_manager.rs +++ b/src/dragonball/src/vcpu/vcpu_manager.rs @@ -29,7 +29,7 @@ use crate::address_space_manager::GuestAddressSpaceImpl; use crate::api::v1::InstanceInfo; use crate::kvm_context::KvmContext; use crate::vcpu::vcpu_impl::{ - Vcpu, VcpuError, VcpuEvent, VcpuHandle, VcpuResponse, VcpuStateEvent, + Vcpu, VcpuError, VcpuEvent, VcpuHandle, VcpuResizeResult, VcpuResponse, VcpuStateEvent, }; use crate::vcpu::VcpuConfig; use crate::vm::VmConfigInfo; @@ -113,11 +113,6 @@ pub enum VcpuManagerError { #[error("vcpu internal error: {0}")] Vcpu(#[source] VcpuError), - #[cfg(feature = "hotplug")] - /// vCPU resize error - #[error("resize vcpu error: {0}")] - VcpuResize(#[source] VcpuResizeError), - /// Kvm Ioctl Error #[error("failure in issuing KVM ioctl command: {0}")] Kvm(#[source] kvm_ioctls::Error), @@ -132,6 +127,10 @@ pub enum VcpuResizeError { VcpuIsHotplugging, /// Cannot update the configuration of the microvm pre boot. + #[error("resize vcpu operation is not allowed pre boot")] + UpdateNotAllowedPreBoot, + + /// Cannot update the configuration of the microvm post boot. #[error("resize vcpu operation is not allowed after boot")] UpdateNotAllowedPostBoot, @@ -147,10 +146,24 @@ pub enum VcpuResizeError { #[error("Removable vcpu not enough, removable vcpu num: {0}, number to remove: {1}, present vcpu count {2}")] LackRemovableVcpus(u16, u16, u16), - #[cfg(all(feature = "hotplug", feature = "dbs-upcall"))] + #[cfg(feature = "dbs-upcall")] /// Cannot update the configuration by upcall channel. #[error("cannot update the configuration by upcall channel: {0}")] Upcall(#[source] dbs_upcall::UpcallClientError), + + #[cfg(feature = "dbs-upcall")] + /// Cannot find upcall client + #[error("Cannot find upcall client")] + UpcallClientMissing, + + #[cfg(feature = "dbs-upcall")] + /// Upcall server is not ready + #[error("Upcall server is not ready")] + UpcallServerNotReady, + + /// Vcpu manager error + #[error("Vcpu manager error : {0}")] + Vcpu(#[source] VcpuManagerError), } /// Result for vCPU manager operations @@ -163,6 +176,13 @@ enum VcpuAction { Hotunplug, } +/// VcpuResizeInfo describes the information for vcpu hotplug / hot-unplug +#[derive(Default, Debug, Clone, PartialEq, Eq)] +pub struct VcpuResizeInfo { + /// The desired vcpu count to resize. + pub vcpu_count: Option, +} + /// Infos related to per vcpu #[derive(Default)] pub(crate) struct VcpuInfo { @@ -810,11 +830,9 @@ mod hotplug { &mut self, vcpu_count: u8, sync_tx: Option>, - ) -> std::result::Result<(), VcpuManagerError> { + ) -> std::result::Result<(), VcpuResizeError> { if self.get_vcpus_action() != VcpuAction::None { - return Err(VcpuManagerError::VcpuResize( - VcpuResizeError::VcpuIsHotplugging, - )); + return Err(VcpuResizeError::VcpuIsHotplugging); } self.action_sycn_tx = sync_tx; @@ -831,9 +849,7 @@ mod hotplug { Ordering::Less => self.do_del_vcpu(vcpu_count, upcall), } } else { - Err(VcpuManagerError::VcpuResize( - VcpuResizeError::UpdateNotAllowedPostBoot, - )) + Err(VcpuResizeError::UpdateNotAllowedPostBoot) } } @@ -841,28 +857,31 @@ mod hotplug { &mut self, vcpu_count: u8, upcall_client: Arc>, - ) -> std::result::Result<(), VcpuManagerError> { + ) -> std::result::Result<(), VcpuResizeError> { info!("resize vcpu: add"); if vcpu_count > self.vcpu_config.max_vcpu_count { - return Err(VcpuManagerError::VcpuResize( - VcpuResizeError::ExpectedVcpuExceedMax, - )); + return Err(VcpuResizeError::ExpectedVcpuExceedMax); } - let created_vcpus = self.create_vcpus(vcpu_count, None, None)?; - let cpu_ids = self.activate_vcpus(vcpu_count, true).map_err(|e| { - // we need to rollback when activate vcpu error - error!("activate vcpu error, rollback! {:?}", e); - let activated_vcpus: Vec = created_vcpus - .iter() - .filter(|&cpu_id| self.vcpu_infos[*cpu_id as usize].handle.is_some()) - .copied() - .collect(); - if let Err(e) = self.exit_vcpus(&activated_vcpus) { - error!("try to rollback error, stop_vcpu: {:?}", e); - } - e - })?; + let created_vcpus = self + .create_vcpus(vcpu_count, None, None) + .map_err(VcpuResizeError::Vcpu)?; + let cpu_ids = self + .activate_vcpus(vcpu_count, true) + .map_err(|e| { + // we need to rollback when activate vcpu error + error!("activate vcpu error, rollback! {:?}", e); + let activated_vcpus: Vec = created_vcpus + .iter() + .filter(|&cpu_id| self.vcpu_infos[*cpu_id as usize].handle.is_some()) + .copied() + .collect(); + if let Err(e) = self.exit_vcpus(&activated_vcpus) { + error!("try to rollback error, stop_vcpu: {:?}", e); + } + e + }) + .map_err(VcpuResizeError::Vcpu)?; let mut cpu_ids_array = [0u8; (u8::MAX as usize) + 1]; cpu_ids_array[..cpu_ids.len()].copy_from_slice(&cpu_ids[..cpu_ids.len()]); @@ -882,23 +901,19 @@ mod hotplug { &mut self, vcpu_count: u8, upcall_client: Arc>, - ) -> std::result::Result<(), VcpuManagerError> { + ) -> std::result::Result<(), VcpuResizeError> { info!("resize vcpu: delete"); if vcpu_count == 0 { - return Err(VcpuManagerError::VcpuResize( - VcpuResizeError::Vcpu0CanNotBeRemoved, - )); + return Err(VcpuResizeError::Vcpu0CanNotBeRemoved); } let mut cpu_ids = self.calculate_removable_vcpus(); let cpu_num_to_be_del = (self.present_vcpus_count() - vcpu_count) as usize; if cpu_num_to_be_del >= cpu_ids.len() { - return Err(VcpuManagerError::VcpuResize( - VcpuResizeError::LackRemovableVcpus( - cpu_ids.len() as u16, - cpu_num_to_be_del as u16, - self.present_vcpus_count() as u16, - ), + return Err(VcpuResizeError::LackRemovableVcpus( + cpu_ids.len() as u16, + cpu_num_to_be_del as u16, + self.present_vcpus_count() as u16, )); } @@ -924,7 +939,7 @@ mod hotplug { &self, _upcall_client: Arc>, _request: DevMgrRequest, - ) -> std::result::Result<(), VcpuManagerError> { + ) -> std::result::Result<(), VcpuResizeError> { Ok(()) } @@ -933,7 +948,7 @@ mod hotplug { &self, upcall_client: Arc>, request: DevMgrRequest, - ) -> std::result::Result<(), VcpuManagerError> { + ) -> std::result::Result<(), VcpuResizeError> { // This is used to fix clippy warnings. use dbs_upcall::{DevMgrResponse, UpcallClientRequest, UpcallClientResponse}; @@ -946,9 +961,14 @@ mod hotplug { Box::new(move |result| match result { UpcallClientResponse::DevMgr(response) => { if let DevMgrResponse::CpuDev(resp) = response { + let result: VcpuResizeResult = if resp.result == 0 { + VcpuResizeResult::Success + } else { + VcpuResizeResult::Failed + }; vcpu_state_sender .send(VcpuStateEvent::Hotplug(( - resp.result, + result, resp.info.apic_id_index, ))) .unwrap(); @@ -957,7 +977,7 @@ mod hotplug { } UpcallClientResponse::UpcallReset => { vcpu_state_sender - .send(VcpuStateEvent::Hotplug((0, 0))) + .send(VcpuStateEvent::Hotplug((VcpuResizeResult::Success, 0))) .unwrap(); vcpu_state_event.write(1).unwrap(); } @@ -968,7 +988,6 @@ mod hotplug { }), ) .map_err(VcpuResizeError::Upcall) - .map_err(VcpuManagerError::VcpuResize) } /// Get removable vcpus. @@ -993,16 +1012,19 @@ impl VcpuEpollHandler { while let Ok(event) = self.rx.try_recv() { match event { VcpuStateEvent::Hotplug((success, cpu_count)) => { - info!("get vcpu event, cpu_index {}", cpu_count); - self.process_cpu_action(success != 0, cpu_count); + info!( + "get vcpu event, cpu_index {} success {:?}", + cpu_count, success + ); + self.process_cpu_action(success, cpu_count); } } } } - fn process_cpu_action(&self, success: bool, _cpu_index: u32) { + fn process_cpu_action(&self, result: VcpuResizeResult, _cpu_index: u32) { let mut vcpu_manager = self.vcpu_manager.lock().unwrap(); - if success { + if result == VcpuResizeResult::Success { match vcpu_manager.get_vcpus_action() { VcpuAction::Hotplug => { // Notify hotplug success @@ -1362,12 +1384,7 @@ mod tests { // vcpu is already in hotplug process let res = vcpu_manager.resize_vcpu(1, None); - assert!(matches!( - res, - Err(VcpuManagerError::VcpuResize( - VcpuResizeError::VcpuIsHotplugging - )) - )); + assert!(matches!(res, Err(VcpuResizeError::VcpuIsHotplugging))); // clear vcpus action let cpu_ids = vec![0]; @@ -1377,9 +1394,7 @@ mod tests { let res = vcpu_manager.resize_vcpu(1, None); assert!(matches!( res, - Err(VcpuManagerError::VcpuResize( - VcpuResizeError::UpdateNotAllowedPostBoot - )) + Err(VcpuResizeError::UpdateNotAllowedPostBoot) )); // init upcall channel @@ -1397,20 +1412,10 @@ mod tests { // exceeed max vcpu count let res = vcpu_manager.resize_vcpu(4, None); - assert!(matches!( - res, - Err(VcpuManagerError::VcpuResize( - VcpuResizeError::ExpectedVcpuExceedMax - )) - )); + assert!(matches!(res, Err(VcpuResizeError::ExpectedVcpuExceedMax))); // remove vcpu 0 let res = vcpu_manager.resize_vcpu(0, None); - assert!(matches!( - res, - Err(VcpuManagerError::VcpuResize( - VcpuResizeError::Vcpu0CanNotBeRemoved - )) - )); + assert!(matches!(res, Err(VcpuResizeError::Vcpu0CanNotBeRemoved))); } } diff --git a/src/dragonball/src/vm/mod.rs b/src/dragonball/src/vm/mod.rs index d573080ae..9a39a3d53 100644 --- a/src/dragonball/src/vm/mod.rs +++ b/src/dragonball/src/vm/mod.rs @@ -4,6 +4,7 @@ use std::io::{self, Read, Seek, SeekFrom}; use std::ops::Deref; use std::os::unix::io::RawFd; + use std::sync::{Arc, Mutex, RwLock}; use dbs_address_space::AddressSpace; @@ -22,6 +23,8 @@ use vmm_sys_util::eventfd::EventFd; #[cfg(all(feature = "hotplug", feature = "dbs-upcall"))] use dbs_upcall::{DevMgrService, UpcallClient}; +#[cfg(feature = "hotplug")] +use std::sync::mpsc::Sender; use crate::address_space_manager::{ AddressManagerError, AddressSpaceMgr, AddressSpaceMgrBuilder, GuestAddressSpaceImpl, @@ -35,6 +38,8 @@ use crate::event_manager::EventManager; use crate::kvm_context::KvmContext; use crate::resource_manager::ResourceManager; use crate::vcpu::{VcpuManager, VcpuManagerError}; +#[cfg(feature = "hotplug")] +use crate::vcpu::{VcpuResizeError, VcpuResizeInfo}; #[cfg(target_arch = "aarch64")] use dbs_arch::gic::Error as GICError; @@ -795,7 +800,30 @@ impl Vm { } else if self.is_upcall_client_ready() { Ok(DeviceOpContext::create_hotplug_ctx(self, epoll_mgr)) } else { - Err(StartMicroVmError::UpcallNotReady) + Err(StartMicroVmError::UpcallServerNotReady) + } + } + + /// Resize MicroVM vCPU number + #[cfg(feature = "hotplug")] + pub fn resize_vcpu( + &mut self, + config: VcpuResizeInfo, + sync_tx: Option>, + ) -> std::result::Result<(), VcpuResizeError> { + if self.upcall_client().is_none() { + Err(VcpuResizeError::UpcallClientMissing) + } else if self.is_upcall_client_ready() { + if let Some(vcpu_count) = config.vcpu_count { + self.vcpu_manager() + .map_err(VcpuResizeError::Vcpu)? + .resize_vcpu(vcpu_count, sync_tx)?; + + self.vm_config.vcpu_count = vcpu_count; + } + Ok(()) + } else { + Err(VcpuResizeError::UpcallServerNotReady) } } diff --git a/src/runtime-rs/crates/hypervisor/src/dragonball/vmm_instance.rs b/src/runtime-rs/crates/hypervisor/src/dragonball/vmm_instance.rs index 00829ad4c..9837ea667 100644 --- a/src/runtime-rs/crates/hypervisor/src/dragonball/vmm_instance.rs +++ b/src/runtime-rs/crates/hypervisor/src/dragonball/vmm_instance.rs @@ -314,7 +314,7 @@ impl VmmInstance { return Ok(vmm_data); } Err(vmm_action_error) => { - if let VmmActionError::UpcallNotReady = vmm_action_error { + if let VmmActionError::UpcallServerNotReady = vmm_action_error { std::thread::sleep(std::time::Duration::from_millis(10)); continue; } else { From b7dd97cac653f770f1eef479c0b4c7a3e3936d4f Mon Sep 17 00:00:00 2001 From: Jianyong Wu Date: Wed, 11 Jan 2023 09:10:16 +0800 Subject: [PATCH 11/52] kata-ctl: fix permission deny issue in test_add_remove test_add_remove and test_get_sandbox_id_for_volume need root user, but test_drop_privs can temporarily change the user to "nobody" that can lead to the failure of these tests. Serialise these three tests can fix it. Fixes: #6055 Signed-off-by: Jianyong Wu --- src/tools/kata-ctl/Cargo.toml | 1 + src/tools/kata-ctl/src/ops/volume_ops.rs | 3 +++ src/tools/kata-ctl/src/utils.rs | 4 ++++ 3 files changed, 8 insertions(+) diff --git a/src/tools/kata-ctl/Cargo.toml b/src/tools/kata-ctl/Cargo.toml index cc32054d2..d1f3c5637 100644 --- a/src/tools/kata-ctl/Cargo.toml +++ b/src/tools/kata-ctl/Cargo.toml @@ -30,6 +30,7 @@ shim-interface = { path = "../../libs/shim-interface"} kata-types = { path = "../../libs/kata-types" } safe-path = { path = "../../libs/safe-path" } agent = { path = "../../runtime-rs/crates/agent"} +serial_test = "0.5.1" [target.'cfg(target_arch = "s390x")'.dependencies] reqwest = { version = "0.11", default-features = false, features = ["json", "blocking", "native-tls"] } diff --git a/src/tools/kata-ctl/src/ops/volume_ops.rs b/src/tools/kata-ctl/src/ops/volume_ops.rs index a9df9ce78..60c753f6f 100644 --- a/src/tools/kata-ctl/src/ops/volume_ops.rs +++ b/src/tools/kata-ctl/src/ops/volume_ops.rs @@ -169,11 +169,13 @@ pub fn get_sandbox_id_for_volume(volume_path: &str) -> Result { mod tests { use super::*; use kata_types::mount::DirectVolumeMountInfo; + use serial_test::serial; use std::{collections::HashMap, fs}; use tempfile::tempdir; use test_utils::skip_if_not_root; #[test] + #[serial] fn test_get_sandbox_id_for_volume() { // this test has to run as root, so has to manually cleanup afterwards skip_if_not_root!(); @@ -259,6 +261,7 @@ mod tests { } #[test] + #[serial] fn test_add_remove() { skip_if_not_root!(); // example volume dir is a/b/c, note the behavior of join would take "/a" as absolute path. diff --git a/src/tools/kata-ctl/src/utils.rs b/src/tools/kata-ctl/src/utils.rs index b1564f4c4..8e6815d02 100644 --- a/src/tools/kata-ctl/src/utils.rs +++ b/src/tools/kata-ctl/src/utils.rs @@ -147,10 +147,12 @@ pub fn get_generic_cpu_details(cpu_info_file: &str) -> Result<(String, String)> #[cfg(test)] mod tests { use super::*; + use serial_test::serial; use std::io::Write; use tempfile::tempdir; #[test] + #[serial] fn test_drop_privs() { let res = drop_privs(); assert!(res.is_ok()); @@ -164,6 +166,7 @@ mod tests { } #[test] + #[serial] fn test_kernel_version_valid_input() { let dir = tempdir().unwrap(); let file_path = dir.path().join("proc-version"); @@ -208,6 +211,7 @@ mod tests { } #[test] + #[serial] fn test_get_distro_details_valid_file() { let dir = tempdir().unwrap(); let file_path = dir.path().join("os-version"); From 59f104c02290dbcee5c42125527e79493578e51c Mon Sep 17 00:00:00 2001 From: Jianyong Wu Date: Thu, 2 Feb 2023 13:28:40 +0800 Subject: [PATCH 12/52] runtime: skip unit test that fail regularly on aarch64 There are lots of unit test cases fails regularly on aarch64, including TestIOCopy, create_tmpfs. Temporarily skip it for now and enable it after them get fixed. Fixes: #6194 Signed-off-by: Jianyong Wu --- src/agent/src/watcher.rs | 1 + src/runtime/pkg/containerd-shim-v2/stream_test.go | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/src/agent/src/watcher.rs b/src/agent/src/watcher.rs index 468684a43..a6cf4113b 100644 --- a/src/agent/src/watcher.rs +++ b/src/agent/src/watcher.rs @@ -1291,6 +1291,7 @@ mod tests { #[tokio::test] #[serial] + #[cfg(not(target_arch = "aarch64"))] async fn create_tmpfs() { skip_if_not_root!(); diff --git a/src/runtime/pkg/containerd-shim-v2/stream_test.go b/src/runtime/pkg/containerd-shim-v2/stream_test.go index ea4f026ca..c85633c88 100644 --- a/src/runtime/pkg/containerd-shim-v2/stream_test.go +++ b/src/runtime/pkg/containerd-shim-v2/stream_test.go @@ -10,6 +10,7 @@ import ( "io" "os" "path/filepath" + "runtime" "syscall" "testing" "time" @@ -96,6 +97,10 @@ func TestNewTtyIOFifoReopen(t *testing.T) { } func TestIoCopy(t *testing.T) { + // This test fails on aarch64 regularly, temporarily skip it + if runtime.GOARCH == "arm64" { + t.Skip("Skip TestIoCopy for aarch64") + } assert := assert.New(t) ctx := context.TODO() From d73f3a8a26b9941c469f237de257bdcf95414f82 Mon Sep 17 00:00:00 2001 From: Archana Shinde Date: Thu, 2 Feb 2023 14:23:02 -0800 Subject: [PATCH 13/52] github-action: Add step to verify kernel config version id updated The version mentioned in the `kata_config_version` needs to be updated for any kernel config change or changed to the patches applied. Without this, CI would not test with the latest kernel changes. We use to enforce this earlier as part of CI when `packaging` was a standalone repo. Add back this check as part of a github action so that the check is performed early on instead of a CI job. Fixes: #6210 Signed-off-by: Archana Shinde --- .github/workflows/static-checks.yaml | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/.github/workflows/static-checks.yaml b/.github/workflows/static-checks.yaml index 69dfbae59..0cb9d72d3 100644 --- a/.github/workflows/static-checks.yaml +++ b/.github/workflows/static-checks.yaml @@ -27,7 +27,6 @@ jobs: target_branch: ${{ github.base_ref }} steps: - name: Checkout code - if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} uses: actions/checkout@v3 with: fetch-depth: 0 @@ -38,6 +37,22 @@ jobs: go-version: 1.19.3 env: GOPATH: ${{ runner.workspace }}/kata-containers + - name: Check kernel config version + run: | + cd "${{ github.workspace }}/src/github.com/${{ github.repository }}" + kernel_dir="tools/packaging/kernel/" + kernel_version_file="${kernel_dir}kata_config_version" + modified_files=$(git diff --name-only origin/main..HEAD) + result=$(git whatchanged origin/main..HEAD "${kernel_dir}" >>"/dev/null") + if git whatchanged origin/main..HEAD "${kernel_dir}" >>"/dev/null"; then + echo "Kernel directory has changed, checking if $kernel_version_file has been updated" + if echo "$modified_files" | grep -v "README.md" | grep "${kernel_dir}" >>"/dev/null"; then + echo "$modified_files" | grep "$kernel_version_file" >>/dev/null || ( echo "Please bump version in $kernel_version_file" && exit 1) + else + echo "Readme file changed, no need for kernel config version update." + fi + echo "Check passed" + fi - name: Setup GOPATH if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') }} run: | From 3c24e23409da3cbe8b22814a1cd365fb5efacf22 Mon Sep 17 00:00:00 2001 From: Archana Shinde Date: Thu, 2 Feb 2023 14:29:46 -0800 Subject: [PATCH 14/52] README: Update Readme under packaging/kernel Update Readme to instruct users to increment the kata config version for any changes made to configs or patches under packaging/kernel. Signed-off-by: Archana Shinde --- tools/packaging/kernel/README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/packaging/kernel/README.md b/tools/packaging/kernel/README.md index dbf991540..ce4ea30c4 100644 --- a/tools/packaging/kernel/README.md +++ b/tools/packaging/kernel/README.md @@ -107,6 +107,10 @@ Kata Containers packaging repository holds the kernel configs and patches. The config and patches can work for many versions, but we only test the kernel version defined in the [Kata Containers versions file][kata-containers-versions-file]. +For any change to the kernel configs or patches, the version defined in the file +[`kata_config_version`][kata-containers-versions-file] needs to be incremented +so that the CI can test with these changes. + For further details, see [the kernel configuration documentation](configs). ## How is it tested From f49b89b632e64d0cd72bbe39ed4b5f83abbfb0bb Mon Sep 17 00:00:00 2001 From: Hyounggyu Choi Date: Fri, 3 Feb 2023 11:11:42 +0100 Subject: [PATCH 15/52] CI: Set docker version to v20.10 in ubuntu:20.04 for s390x|ppc64le This is to make a docker version to v20.10 in docker upstream image ubuntu:20.04 for s390x and ppc64le. Fixes: #6211 Signed-off-by: Hyounggyu Choi --- tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile b/tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile index 06a4a93ac..2ff5f5ae8 100644 --- a/tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile +++ b/tools/packaging/kata-deploy/local-build/dockerbuild/Dockerfile @@ -8,6 +8,7 @@ ENV INSTALL_IN_GOPATH=false COPY install_yq.sh /usr/bin/install_yq.sh +SHELL ["/bin/bash", "-o", "pipefail", "-c"] # Install yq and docker RUN apt-get update && \ @@ -18,6 +19,7 @@ RUN apt-get update && \ apt-get clean && rm -rf /var/lib/apt/lists/ && \ install_yq.sh && \ curl -fsSL https://get.docker.com -o get-docker.sh && \ + if uname -m | grep -Eq 's390x|ppc64le'; then export VERSION="v20.10"; fi && \ sh get-docker.sh ARG IMG_USER=kata-builder From 9794c52c6517badf2e3b6f665883d11b37a3b62e Mon Sep 17 00:00:00 2001 From: joannejchen Date: Thu, 2 Feb 2023 15:23:29 -0600 Subject: [PATCH 16/52] improvement: Fix naming conventions for span name and log subsystem Normally, the span name should be the same as the function name, and the log subsystem should not contain spaces. Fixes #6153 Signed-off-by: joannejchen --- src/runtime/virtcontainers/fs_share_linux.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/runtime/virtcontainers/fs_share_linux.go b/src/runtime/virtcontainers/fs_share_linux.go index d4d68a42f..085f3b4f8 100644 --- a/src/runtime/virtcontainers/fs_share_linux.go +++ b/src/runtime/virtcontainers/fs_share_linux.go @@ -42,16 +42,16 @@ func NewFilesystemShare(s *Sandbox) (FilesystemSharer, error) { }, nil } -// Logger returns a logrus logger appropriate for logging Filesystem sharing messages +// Logger returns a logrus logger appropriate for logging filesystem sharing messages func (f *FilesystemShare) Logger() *logrus.Entry { return virtLog.WithFields(logrus.Fields{ - "subsystem": "filesystem share", + "subsystem": "fs_share", "sandbox": f.sandbox.ID(), }) } func (f *FilesystemShare) prepareBindMounts(ctx context.Context) error { - span, ctx := katatrace.Trace(ctx, f.Logger(), "setupBindMounts", fsShareTracingTags) + span, ctx := katatrace.Trace(ctx, f.Logger(), "prepareBindMounts", fsShareTracingTags) defer span.End() var err error From 390916b33c48fe7fcdc34a78a58a4425e42c76a0 Mon Sep 17 00:00:00 2001 From: d3c3mber Date: Tue, 7 Feb 2023 14:06:12 +0800 Subject: [PATCH 17/52] runtime: remove not used shim configurations ShimPath and ShimDebug are not needed anymore. Fixes: #6147 Signed-off-by: d3c3mber --- src/runtime/pkg/containerd-shim-v2/create_test.go | 4 +--- src/runtime/pkg/katatestutils/utils.go | 6 ------ 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/src/runtime/pkg/containerd-shim-v2/create_test.go b/src/runtime/pkg/containerd-shim-v2/create_test.go index 75638b518..eecc19968 100644 --- a/src/runtime/pkg/containerd-shim-v2/create_test.go +++ b/src/runtime/pkg/containerd-shim-v2/create_test.go @@ -320,7 +320,6 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config string, err err kernelPath := path.Join(dir, "kernel") kernelParams := "foo=bar xyz" imagePath := path.Join(dir, "image") - shimPath := path.Join(dir, "shim") logDir := path.Join(dir, "logs") logPath := path.Join(logDir, "runtime.log") machineType := "machineType" @@ -340,7 +339,6 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config string, err err ImagePath: imagePath, KernelParams: kernelParams, MachineType: machineType, - ShimPath: shimPath, LogPath: logPath, DisableBlock: disableBlockDevice, BlockDeviceDriver: blockDeviceDriver, @@ -360,7 +358,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config string, err err return "", err } - files := []string{hypervisorPath, kernelPath, imagePath, shimPath} + files := []string{hypervisorPath, kernelPath, imagePath} for _, file := range files { // create the resource (which must be >0 bytes) diff --git a/src/runtime/pkg/katatestutils/utils.go b/src/runtime/pkg/katatestutils/utils.go index 2aa0754ca..33a5d1836 100644 --- a/src/runtime/pkg/katatestutils/utils.go +++ b/src/runtime/pkg/katatestutils/utils.go @@ -213,7 +213,6 @@ type RuntimeConfigOptions struct { ImagePath string KernelParams string MachineType string - ShimPath string LogPath string BlockDeviceDriver string BlockDeviceAIO string @@ -236,7 +235,6 @@ type RuntimeConfigOptions struct { HypervisorDebug bool RuntimeDebug bool RuntimeTrace bool - ShimDebug bool AgentDebug bool AgentTrace bool EnablePprof bool @@ -323,10 +321,6 @@ func MakeRuntimeConfigFileData(config RuntimeConfigOptions) string { shared_fs = "` + config.SharedFS + `" virtio_fs_daemon = "` + config.VirtioFSDaemon + `" - [shim.kata] - path = "` + config.ShimPath + `" - enable_debug = ` + strconv.FormatBool(config.ShimDebug) + ` - [agent.kata] enable_debug = ` + strconv.FormatBool(config.AgentDebug) + ` enable_tracing = ` + strconv.FormatBool(config.AgentTrace) + ` From ab59a65c9265e51674ae56643c5e1c9d309c8241 Mon Sep 17 00:00:00 2001 From: Jianyong Wu Date: Wed, 1 Feb 2023 20:51:53 +0800 Subject: [PATCH 18/52] runtime-rs: neglect a certain error when delete cgroup Delete cgroup for a thread which may exit can lead to panic. Just neglect that error is harmless also avoid this failure. Fixes: #6192 Signed-off-by: Jianyong Wu --- .../crates/resource/src/cgroups/mod.rs | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/runtime-rs/crates/resource/src/cgroups/mod.rs b/src/runtime-rs/crates/resource/src/cgroups/mod.rs index 8dbef7f64..7787d2ad0 100644 --- a/src/runtime-rs/crates/resource/src/cgroups/mod.rs +++ b/src/runtime-rs/crates/resource/src/cgroups/mod.rs @@ -9,6 +9,8 @@ mod utils; use std::{ collections::{HashMap, HashSet}, + error::Error, + io, iter::FromIterator, sync::Arc, }; @@ -24,6 +26,8 @@ use oci::LinuxResources; use persist::sandbox_persist::Persist; use tokio::sync::RwLock; +const OS_ERROR_NO_SUCH_PROCESS: i32 = 3; + pub struct CgroupArgs { pub sid: String, pub config: TomlConfig, @@ -109,7 +113,22 @@ impl CgroupsResource { /// overhead_cgroup_manager to the parent and then delete the cgroups. pub async fn delete(&self) -> Result<()> { for cg_pid in self.cgroup_manager.tasks() { - self.cgroup_manager.remove_task(cg_pid)?; + // For now, we can't guarantee that the thread in cgroup_manager does still + // exist. Once it exit, we should ignor that error returned by remove_task + // to let it go. + if let Err(error) = self.cgroup_manager.remove_task(cg_pid) { + match error.source() { + Some(err) => match err.downcast_ref::() { + Some(e) => { + if e.raw_os_error() != Some(OS_ERROR_NO_SUCH_PROCESS) { + return Err(error.into()); + } + } + None => return Err(error.into()), + }, + None => return Err(error.into()), + } + } } self.cgroup_manager From 5d37d31ac7d980ceafb3375c5f158e4ea2661737 Mon Sep 17 00:00:00 2001 From: Jianyong Wu Date: Tue, 7 Feb 2023 17:53:23 +0800 Subject: [PATCH 19/52] cgroups: upgrade cgroupfs to 0.3.1 Trait method cause for std::error::Error is deprecated thus need replace it with source method for cgroups-fs::error::ErrorKind. Fixes: #6192 Signed-off-by: Jianyong Wu --- src/agent/Cargo.lock | 4 ++-- src/agent/Cargo.toml | 2 +- src/agent/rustjail/Cargo.toml | 2 +- src/libs/Cargo.lock | 23 ++++++++++++++++++----- src/libs/kata-sys-util/Cargo.toml | 2 +- src/runtime-rs/Cargo.lock | 4 ++-- src/runtime-rs/crates/resource/Cargo.toml | 2 +- src/tools/runk/Cargo.lock | 4 ++-- src/tools/runk/libcontainer/Cargo.toml | 2 +- 9 files changed, 29 insertions(+), 16 deletions(-) diff --git a/src/agent/Cargo.lock b/src/agent/Cargo.lock index 75c4cc0fd..0de459116 100644 --- a/src/agent/Cargo.lock +++ b/src/agent/Cargo.lock @@ -276,9 +276,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "cgroups-rs" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b97b639839204a6eb727ffbbd68e1dcfc55488c3a26cb0cda1d662b7a186e79" +checksum = "8d5761f3a351b92e0e02a31ca418190bb323edb0d4fce0109b6dba673dc3fdc1" dependencies = [ "libc", "log", diff --git a/src/agent/Cargo.toml b/src/agent/Cargo.toml index 1c391492c..8851798fd 100644 --- a/src/agent/Cargo.toml +++ b/src/agent/Cargo.toml @@ -51,7 +51,7 @@ log = "0.4.11" prometheus = { version = "0.13.0", features = ["process"] } procfs = "0.12.0" anyhow = "1.0.32" -cgroups = { package = "cgroups-rs", version = "0.3.0" } +cgroups = { package = "cgroups-rs", version = "0.3.1" } # Tracing tracing = "0.1.26" diff --git a/src/agent/rustjail/Cargo.toml b/src/agent/rustjail/Cargo.toml index 4d60c11dc..d4ba4e6f3 100644 --- a/src/agent/rustjail/Cargo.toml +++ b/src/agent/rustjail/Cargo.toml @@ -25,7 +25,7 @@ scan_fmt = "0.2.6" regex = "1.5.6" path-absolutize = "1.2.0" anyhow = "1.0.32" -cgroups = { package = "cgroups-rs", version = "0.3.0" } +cgroups = { package = "cgroups-rs", version = "0.3.1" } rlimit = "0.5.3" cfg-if = "0.1.0" diff --git a/src/libs/Cargo.lock b/src/libs/Cargo.lock index c1f73d21b..e0c1296a2 100644 --- a/src/libs/Cargo.lock +++ b/src/libs/Cargo.lock @@ -110,14 +110,15 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "cgroups-rs" -version = "0.2.8" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b827f9d9f6c2fff719d25f5d44cbc8d2ef6df1ef00d055c5c14d5dc25529579" +checksum = "8d5761f3a351b92e0e02a31ca418190bb323edb0d4fce0109b6dba673dc3fdc1" dependencies = [ "libc", "log", - "nix 0.23.1", + "nix 0.25.1", "regex", + "thiserror", ] [[package]] @@ -536,9 +537,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.124" +version = "0.2.139" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21a41fed9d98f27ab1c6d161da622a4fa35e8a54a8adc24bbf3ddd0ef70b0e50" +checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79" [[package]] name = "lock_api" @@ -640,6 +641,18 @@ dependencies = [ "memoffset", ] +[[package]] +name = "nix" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f346ff70e7dbfd675fe90590b92d59ef2de15a8779ae305ebcbfd3f0caf59be4" +dependencies = [ + "autocfg", + "bitflags", + "cfg-if", + "libc", +] + [[package]] name = "ntapi" version = "0.3.7" diff --git a/src/libs/kata-sys-util/Cargo.toml b/src/libs/kata-sys-util/Cargo.toml index a4b67e2c0..efcde317e 100644 --- a/src/libs/kata-sys-util/Cargo.toml +++ b/src/libs/kata-sys-util/Cargo.toml @@ -12,7 +12,7 @@ edition = "2018" [dependencies] byteorder = "1.4.3" -cgroups = { package = "cgroups-rs", version = "0.3.0" } +cgroups = { package = "cgroups-rs", version = "0.3.1" } chrono = "0.4.0" common-path = "=1.0.0" fail = "0.5.0" diff --git a/src/runtime-rs/Cargo.lock b/src/runtime-rs/Cargo.lock index bb4325861..6842ffb59 100644 --- a/src/runtime-rs/Cargo.lock +++ b/src/runtime-rs/Cargo.lock @@ -401,9 +401,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "cgroups-rs" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b97b639839204a6eb727ffbbd68e1dcfc55488c3a26cb0cda1d662b7a186e79" +checksum = "8d5761f3a351b92e0e02a31ca418190bb323edb0d4fce0109b6dba673dc3fdc1" dependencies = [ "libc", "log", diff --git a/src/runtime-rs/crates/resource/Cargo.toml b/src/runtime-rs/crates/resource/Cargo.toml index 98714a7c2..7ae1f799f 100644 --- a/src/runtime-rs/crates/resource/Cargo.toml +++ b/src/runtime-rs/crates/resource/Cargo.toml @@ -14,7 +14,7 @@ anyhow = "^1.0" async-trait = "0.1.48" bitflags = "1.2.1" byte-unit = "4.0.14" -cgroups-rs = "0.3.0" +cgroups-rs = "0.3.1" futures = "0.3.11" lazy_static = "1.4.0" libc = ">=0.2.39" diff --git a/src/tools/runk/Cargo.lock b/src/tools/runk/Cargo.lock index 582a9e54e..9a8e92748 100644 --- a/src/tools/runk/Cargo.lock +++ b/src/tools/runk/Cargo.lock @@ -233,9 +233,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "cgroups-rs" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b97b639839204a6eb727ffbbd68e1dcfc55488c3a26cb0cda1d662b7a186e79" +checksum = "8d5761f3a351b92e0e02a31ca418190bb323edb0d4fce0109b6dba673dc3fdc1" dependencies = [ "libc", "log", diff --git a/src/tools/runk/libcontainer/Cargo.toml b/src/tools/runk/libcontainer/Cargo.toml index 83f6a3699..82e56b06e 100644 --- a/src/tools/runk/libcontainer/Cargo.toml +++ b/src/tools/runk/libcontainer/Cargo.toml @@ -20,7 +20,7 @@ chrono = { version = "0.4.19", features = ["serde"] } serde = { version = "1.0.133", features = ["derive"] } serde_json = "1.0.74" scopeguard = "1.1.0" -cgroups = { package = "cgroups-rs", version = "0.3.0" } +cgroups = { package = "cgroups-rs", version = "0.3.1" } procfs = "0.14.0" [dev-dependencies] From 56071c6e7b61598f0f2343cdfca174849449d8c7 Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Sun, 29 Jan 2023 17:20:54 +0800 Subject: [PATCH 20/52] virtiofsd: change cache mod to const Change cache mod from literal to const and place them in one place. Also set default cache mode from `none` to `never` in `pkg/katautils/config-settings.go.in`. Fixes: #6151 Signed-off-by: Bin Liu --- src/runtime/pkg/katautils/config-settings.go.in | 2 +- src/runtime/virtcontainers/clh.go | 1 - src/runtime/virtcontainers/clh_test.go | 2 +- src/runtime/virtcontainers/kata_agent.go | 3 +-- src/runtime/virtcontainers/virtiofsd.go | 15 +++++++++++---- 5 files changed, 14 insertions(+), 9 deletions(-) diff --git a/src/runtime/pkg/katautils/config-settings.go.in b/src/runtime/pkg/katautils/config-settings.go.in index 43dd5cc5a..e83500343 100644 --- a/src/runtime/pkg/katautils/config-settings.go.in +++ b/src/runtime/pkg/katautils/config-settings.go.in @@ -80,7 +80,7 @@ const defaultHotplugVFIOOnRootBus bool = false const defaultPCIeRootPort = 0 const defaultEntropySource = "/dev/urandom" const defaultGuestHookPath string = "" -const defaultVirtioFSCacheMode = "none" +const defaultVirtioFSCacheMode = "never" const defaultDisableImageNvdimm = false const defaultVhostUserStorePath string = "/var/run/kata-containers/vhost-user/" const defaultRxRateLimiterMaxRate = uint64(0) diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 7f5bb1c9e..738edd9fc 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -80,7 +80,6 @@ const ( clhAPISocket = "clh-api.sock" virtioFsSocket = "virtiofsd.sock" defaultClhPath = "/usr/local/bin/cloud-hypervisor" - virtioFsCacheAlways = "always" ) // Interface that hides the implementation of openAPI client diff --git a/src/runtime/virtcontainers/clh_test.go b/src/runtime/virtcontainers/clh_test.go index 6865f4b1f..27e89d7ec 100644 --- a/src/runtime/virtcontainers/clh_test.go +++ b/src/runtime/virtcontainers/clh_test.go @@ -60,7 +60,7 @@ func newClhConfig() (HypervisorConfig, error) { DefaultBridges: defaultBridges, DefaultMaxVCPUs: uint32(64), SharedFS: config.VirtioFS, - VirtioFSCache: virtioFsCacheAlways, + VirtioFSCache: typeVirtioFSCacheModeAlways, VirtioFSDaemon: testVirtiofsdPath, NetRateLimiterBwMaxRate: int64(0), NetRateLimiterBwOneTimeBurst: int64(0), diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index 9de1661ef..60661fcd3 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -88,7 +88,6 @@ var ( type9pFs = "9p" typeVirtioFS = "virtiofs" typeOverlayFS = "overlay" - typeVirtioFSNoCache = "never" kata9pDevType = "9p" kataMmioBlkDevType = "mmioblk" kataBlkDevType = "blk" @@ -805,7 +804,7 @@ func setupStorages(ctx context.Context, sandbox *Sandbox) []*grpc.Storage { // directly map contents from the host. When set to 'never', the mount // options should not contain 'dax' lest the virtio-fs daemon crashing // with an invalid address reference. - if sandbox.config.HypervisorConfig.VirtioFSCache != typeVirtioFSNoCache { + if sandbox.config.HypervisorConfig.VirtioFSCache != typeVirtioFSCacheModeNever { // If virtio_fs_cache_size = 0, dax should not be used. if sandbox.config.HypervisorConfig.VirtioFSCacheSize != 0 { sharedDirVirtioFSOptions = append(sharedDirVirtioFSOptions, sharedDirVirtioFSDaxOptions) diff --git a/src/runtime/virtcontainers/virtiofsd.go b/src/runtime/virtcontainers/virtiofsd.go index 08f7dfd6f..6df62adf9 100644 --- a/src/runtime/virtcontainers/virtiofsd.go +++ b/src/runtime/virtcontainers/virtiofsd.go @@ -37,6 +37,13 @@ var ( errUnimplemented = errors.New("unimplemented") ) +const ( + typeVirtioFSCacheModeNever = "never" + typeVirtioFSCacheModeNone = "none" + typeVirtioFSCacheModeAlways = "always" + typeVirtioFSCacheModeAuto = "auto" +) + type VirtiofsDaemon interface { // Start virtiofs daemon, return pid of virtiofs daemon process Start(context.Context, onQuitFunc) (pid int, err error) @@ -216,11 +223,11 @@ func (v *virtiofsd) valid() error { } if v.cache == "" { - v.cache = "auto" - } else if v.cache == "none" { + v.cache = typeVirtioFSCacheModeAuto + } else if v.cache == typeVirtioFSCacheModeNone { v.Logger().Warn("virtio-fs cache mode `none` is deprecated since Kata Containers 2.5.0 and will be removed in the future release, please use `never` instead. For more details please refer to https://github.com/kata-containers/kata-containers/issues/4234.") - v.cache = "never" - } else if v.cache != "auto" && v.cache != "always" && v.cache != "never" { + v.cache = typeVirtioFSCacheModeNever + } else if v.cache != typeVirtioFSCacheModeAuto && v.cache != typeVirtioFSCacheModeAlways && v.cache != typeVirtioFSCacheModeNever { return errVirtiofsdInvalidVirtiofsCacheMode(v.cache) } From ac64b021a6817296ea8240755abcff22fa9a1b80 Mon Sep 17 00:00:00 2001 From: Alexandru Matei Date: Tue, 7 Feb 2023 17:04:37 +0200 Subject: [PATCH 21/52] clh: Enforce API timeout only for vm.boot request launchClh already has a timeout of 10seconds for launching clh, e.g. if launchClh or setupVirtiofsDaemon takes a few seconds the context's deadline will already be expired by the time it reaches bootVM Fixes #6240 Signed-off-by: Alexandru Matei --- src/runtime/virtcontainers/clh.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 7f5bb1c9e..8fdae05f8 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -652,9 +652,6 @@ func (clh *cloudHypervisor) StartVM(ctx context.Context, timeout int) error { span, _ := katatrace.Trace(ctx, clh.Logger(), "StartVM", clhTracingTags, map[string]string{"sandbox_id": clh.id}) defer span.End() - ctx, cancel := context.WithTimeout(context.Background(), clh.getClhAPITimeout()*time.Second) - defer cancel() - clh.Logger().WithField("function", "StartVM").Info("starting Sandbox") vmPath := filepath.Join(clh.config.VMStorePath, clh.id) @@ -693,6 +690,9 @@ func (clh *cloudHypervisor) StartVM(ctx context.Context, timeout int) error { } clh.state.PID = pid + ctx, cancel := context.WithTimeout(ctx, clh.getClhAPITimeout()*time.Second) + defer cancel() + if err := clh.bootVM(ctx); err != nil { return err } From c3836010a833220ef2f59a1e1d307a43ee393c03 Mon Sep 17 00:00:00 2001 From: Archana Shinde Date: Mon, 30 Jan 2023 13:59:19 -0800 Subject: [PATCH 22/52] make: clean up obsolete targets Cleanup targets that have been removed in the past when the makefile for kata-deploy was included. Instead, add targets from the makefile under local-build kata-deploy. Fixes: #6165 Signed-off-by: Archana Shinde --- Makefile | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index d4a3dad05..8e856138c 100644 --- a/Makefile +++ b/Makefile @@ -45,10 +45,8 @@ docs-url-alive-check: .PHONY: \ all \ - binary-tarball \ + kata-tarball \ + install-tarball \ default \ - install-binary-tarball \ static-checks \ docs-url-alive-check - - From f1855594a20cb55fae79508d84f6e202e4492af0 Mon Sep 17 00:00:00 2001 From: Archana Shinde Date: Mon, 30 Jan 2023 15:39:28 -0800 Subject: [PATCH 23/52] make: Get rid of verbose output while creating tar We already have verbose output while merging the builds from various build targets. Getting rid of verbose output to speed up. Signed-off-by: Archana Shinde --- tools/packaging/kata-deploy/local-build/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/packaging/kata-deploy/local-build/Makefile b/tools/packaging/kata-deploy/local-build/Makefile index de831e1fe..3e391f48e 100644 --- a/tools/packaging/kata-deploy/local-build/Makefile +++ b/tools/packaging/kata-deploy/local-build/Makefile @@ -76,4 +76,4 @@ merge-builds: $(MK_DIR)/kata-deploy-merge-builds.sh build install-tarball: - tar -xvf ./kata-static.tar.xz -C / + tar -xf ./kata-static.tar.xz -C / From 94b1d9814c67bbc1ef4b0a8363df5ce749c61f15 Mon Sep 17 00:00:00 2001 From: Archana Shinde Date: Wed, 8 Feb 2023 13:50:54 -0800 Subject: [PATCH 24/52] cargo: Update Cargo.lock files The cargo.locks file under src/libs and agent-ctl seem to be outdated. Updating these. Signed-off-by: Archana Shinde --- src/tools/agent-ctl/Cargo.lock | 20 ++++++++-- src/tools/kata-ctl/Cargo.lock | 70 ++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 4 deletions(-) diff --git a/src/tools/agent-ctl/Cargo.lock b/src/tools/agent-ctl/Cargo.lock index 634d192b6..deb3385f6 100644 --- a/src/tools/agent-ctl/Cargo.lock +++ b/src/tools/agent-ctl/Cargo.lock @@ -250,14 +250,15 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "cgroups-rs" -version = "0.2.10" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf5525f2cf84d5113ab26bfb6474180eb63224b4b1e4be31ee87be4098f11399" +checksum = "8d5761f3a351b92e0e02a31ca418190bb323edb0d4fce0109b6dba673dc3fdc1" dependencies = [ "libc", "log", - "nix 0.24.2", + "nix 0.25.1", "regex", + "thiserror", ] [[package]] @@ -719,7 +720,6 @@ version = "0.0.1" dependencies = [ "anyhow", "byteorder", - "cgroups-rs", "clap", "hex", "humantime", @@ -893,6 +893,18 @@ dependencies = [ "memoffset", ] +[[package]] +name = "nix" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f346ff70e7dbfd675fe90590b92d59ef2de15a8779ae305ebcbfd3f0caf59be4" +dependencies = [ + "autocfg", + "bitflags", + "cfg-if 1.0.0", + "libc", +] + [[package]] name = "ntapi" version = "0.3.6" diff --git a/src/tools/kata-ctl/Cargo.lock b/src/tools/kata-ctl/Cargo.lock index b10691db0..85d4eb517 100644 --- a/src/tools/kata-ctl/Cargo.lock +++ b/src/tools/kata-ctl/Cargo.lock @@ -618,6 +618,7 @@ dependencies = [ "semver", "serde", "serde_json", + "serial_test", "shim-interface", "strum", "strum_macros", @@ -660,6 +661,16 @@ version = "0.2.135" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68783febc7782c6c5cb401fbda4de5a9898be1762314da0bb2c10ced61f18b0c" +[[package]] +name = "lock_api" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "435011366fe56583b16cf956f9df0095b405b82d76425bc8981c0e22e60ec4df" +dependencies = [ + "autocfg", + "scopeguard", +] + [[package]] name = "log" version = "0.4.17" @@ -853,6 +864,31 @@ version = "6.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ff7415e9ae3fff1225851df9e0d9e4e5479f947619774677a63572e55e80eff" +[[package]] +name = "parking_lot" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" +dependencies = [ + "instant", + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc" +dependencies = [ + "cfg-if", + "instant", + "libc", + "redox_syscall", + "smallvec", + "winapi", +] + [[package]] name = "percent-encoding" version = "2.2.0" @@ -1202,6 +1238,12 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + [[package]] name = "sct" version = "0.7.0" @@ -1284,6 +1326,28 @@ dependencies = [ "serde", ] +[[package]] +name = "serial_test" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0bccbcf40c8938196944a3da0e133e031a33f4d6b72db3bda3cc556e361905d" +dependencies = [ + "lazy_static", + "parking_lot", + "serial_test_derive", +] + +[[package]] +name = "serial_test_derive" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2acd6defeddb41eb60bb468f8825d0cfd0c2a76bc03bfd235b6a1dc4f6a1ad5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "shim-interface" version = "0.1.0" @@ -1344,6 +1408,12 @@ dependencies = [ "slog", ] +[[package]] +name = "smallvec" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" + [[package]] name = "socket2" version = "0.4.7" From 49326fe4e1a2f5235ea4ccc9939383cbb6b813c2 Mon Sep 17 00:00:00 2001 From: yaoyinnan Date: Thu, 9 Feb 2023 14:32:27 +0800 Subject: [PATCH 25/52] fix(clippy): fix hypervisor clippy checks Fix hypervisor clippy checks. Signed-off-by: yaoyinnan --- src/runtime-rs/crates/hypervisor/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime-rs/crates/hypervisor/Cargo.toml b/src/runtime-rs/crates/hypervisor/Cargo.toml index 9c70f3914..0edbfc8bc 100644 --- a/src/runtime-rs/crates/hypervisor/Cargo.toml +++ b/src/runtime-rs/crates/hypervisor/Cargo.toml @@ -21,7 +21,7 @@ serde_json = ">=1.0.9" slog = "2.5.2" slog-scope = "4.4.0" thiserror = "1.0" -tokio = { version = "1.8.0", features = ["sync"] } +tokio = { version = "1.8.0", features = ["sync", "fs"] } vmm-sys-util = "0.11.0" rand = "0.8.4" From 01765e17342dd62ac67a42580a57e5b09c9534fa Mon Sep 17 00:00:00 2001 From: yaoyinnan Date: Mon, 30 Jan 2023 15:36:28 +0800 Subject: [PATCH 26/52] runtime: support cgroup v2 metrics marshal guest metrics Support to use cgroup v2 metrics marshal guest metrics. Fixes: #5998 Signed-off-by: yaoyinnan --- src/agent/rustjail/src/cgroups/fs/mod.rs | 13 +- src/runtime/pkg/containerd-shim-v2/metrics.go | 149 +++++++++++++++--- .../pkg/containerd-shim-v2/metrics_test.go | 9 +- src/runtime/pkg/resourcecontrol/utils.go | 3 - .../pkg/resourcecontrol/utils_linux.go | 13 ++ 5 files changed, 145 insertions(+), 42 deletions(-) diff --git a/src/agent/rustjail/src/cgroups/fs/mod.rs b/src/agent/rustjail/src/cgroups/fs/mod.rs index f9a7b845d..becc56036 100644 --- a/src/agent/rustjail/src/cgroups/fs/mod.rs +++ b/src/agent/rustjail/src/cgroups/fs/mod.rs @@ -691,17 +691,6 @@ fn get_cpuacct_stats(cg: &cgroups::Cgroup) -> SingularPtrField { }); } - if cg.v2() { - return SingularPtrField::some(CpuUsage { - total_usage: 0, - percpu_usage: vec![], - usage_in_kernelmode: 0, - usage_in_usermode: 0, - unknown_fields: UnknownFields::default(), - cached_size: CachedSize::default(), - }); - } - // try to get from cpu controller let cpu_controller: &CpuController = get_controller_or_return_singular_none!(cg); let stat = cpu_controller.cpu().stat; @@ -732,7 +721,7 @@ fn get_memory_stats(cg: &cgroups::Cgroup) -> SingularPtrField { let value = memory.use_hierarchy; let use_hierarchy = value == 1; - // gte memory datas + // get memory data let usage = SingularPtrField::some(MemoryData { usage: memory.usage_in_bytes, max_usage: memory.max_usage_in_bytes, diff --git a/src/runtime/pkg/containerd-shim-v2/metrics.go b/src/runtime/pkg/containerd-shim-v2/metrics.go index 6fc10cae9..69ce552eb 100644 --- a/src/runtime/pkg/containerd-shim-v2/metrics.go +++ b/src/runtime/pkg/containerd-shim-v2/metrics.go @@ -9,9 +9,11 @@ import ( "context" cgroupsv1 "github.com/containerd/cgroups/stats/v1" + cgroupsv2 "github.com/containerd/cgroups/v2/stats" "github.com/containerd/typeurl" google_protobuf "github.com/gogo/protobuf/types" + resCtrl "github.com/kata-containers/kata-containers/src/runtime/pkg/resourcecontrol" vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers" ) @@ -21,7 +23,18 @@ func marshalMetrics(ctx context.Context, s *service, containerID string) (*googl return nil, err } - metrics := statsToMetrics(&stats) + isCgroupV1, err := resCtrl.IsCgroupV1() + if err != nil { + return nil, err + } + + var metrics interface{} + + if isCgroupV1 { + metrics = statsToMetricsV1(&stats) + } else { + metrics = statsToMetricsV2(&stats) + } data, err := typeurl.MarshalAny(metrics) if err != nil { @@ -31,25 +44,40 @@ func marshalMetrics(ctx context.Context, s *service, containerID string) (*googl return data, nil } -func statsToMetrics(stats *vc.ContainerStats) *cgroupsv1.Metrics { +func statsToMetricsV1(stats *vc.ContainerStats) *cgroupsv1.Metrics { metrics := &cgroupsv1.Metrics{} if stats.CgroupStats != nil { metrics = &cgroupsv1.Metrics{ - Hugetlb: setHugetlbStats(stats.CgroupStats.HugetlbStats), - Pids: setPidsStats(stats.CgroupStats.PidsStats), - CPU: setCPUStats(stats.CgroupStats.CPUStats), - Memory: setMemoryStats(stats.CgroupStats.MemoryStats), - Blkio: setBlkioStats(stats.CgroupStats.BlkioStats), + Hugetlb: setHugetlbStatsV1(stats.CgroupStats.HugetlbStats), + Pids: setPidsStatsV1(stats.CgroupStats.PidsStats), + CPU: setCPUStatsV1(stats.CgroupStats.CPUStats), + Memory: setMemoryStatsV1(stats.CgroupStats.MemoryStats), + Blkio: setBlkioStatsV1(stats.CgroupStats.BlkioStats), } } - metrics.Network = setNetworkStats(stats.NetworkStats) return metrics } -func setHugetlbStats(vcHugetlb map[string]vc.HugetlbStats) []*cgroupsv1.HugetlbStat { +func statsToMetricsV2(stats *vc.ContainerStats) *cgroupsv2.Metrics { + metrics := &cgroupsv2.Metrics{} + + if stats.CgroupStats != nil { + metrics = &cgroupsv2.Metrics{ + Hugetlb: setHugetlbStatsV2(stats.CgroupStats.HugetlbStats), + Pids: setPidsStatsV2(stats.CgroupStats.PidsStats), + CPU: setCPUStatsV2(stats.CgroupStats.CPUStats), + Memory: setMemoryStatsV2(stats.CgroupStats.MemoryStats), + Io: setBlkioStatsV2(stats.CgroupStats.BlkioStats), + } + } + + return metrics +} + +func setHugetlbStatsV1(vcHugetlb map[string]vc.HugetlbStats) []*cgroupsv1.HugetlbStat { var hugetlbStats []*cgroupsv1.HugetlbStat for k, v := range vcHugetlb { hugetlbStats = append( @@ -65,7 +93,22 @@ func setHugetlbStats(vcHugetlb map[string]vc.HugetlbStats) []*cgroupsv1.HugetlbS return hugetlbStats } -func setPidsStats(vcPids vc.PidsStats) *cgroupsv1.PidsStat { +func setHugetlbStatsV2(vcHugetlb map[string]vc.HugetlbStats) []*cgroupsv2.HugeTlbStat { + var hugetlbStats []*cgroupsv2.HugeTlbStat + for k, v := range vcHugetlb { + hugetlbStats = append( + hugetlbStats, + &cgroupsv2.HugeTlbStat{ + Current: v.Usage, + Max: v.MaxUsage, + Pagesize: k, + }) + } + + return hugetlbStats +} + +func setPidsStatsV1(vcPids vc.PidsStats) *cgroupsv1.PidsStat { pidsStats := &cgroupsv1.PidsStat{ Current: vcPids.Current, Limit: vcPids.Limit, @@ -74,8 +117,16 @@ func setPidsStats(vcPids vc.PidsStats) *cgroupsv1.PidsStat { return pidsStats } -func setCPUStats(vcCPU vc.CPUStats) *cgroupsv1.CPUStat { +func setPidsStatsV2(vcPids vc.PidsStats) *cgroupsv2.PidsStat { + pidsStats := &cgroupsv2.PidsStat{ + Current: vcPids.Current, + Limit: vcPids.Limit, + } + return pidsStats +} + +func setCPUStatsV1(vcCPU vc.CPUStats) *cgroupsv1.CPUStat { var perCPU []uint64 perCPU = append(perCPU, vcCPU.CPUUsage.PercpuUsage...) @@ -96,7 +147,20 @@ func setCPUStats(vcCPU vc.CPUStats) *cgroupsv1.CPUStat { return cpuStats } -func setMemoryStats(vcMemory vc.MemoryStats) *cgroupsv1.MemoryStat { +func setCPUStatsV2(vcCPU vc.CPUStats) *cgroupsv2.CPUStat { + cpuStats := &cgroupsv2.CPUStat{ + UsageUsec: vcCPU.CPUUsage.TotalUsage / 1000, + UserUsec: vcCPU.CPUUsage.UsageInKernelmode / 1000, + SystemUsec: vcCPU.CPUUsage.UsageInUsermode / 1000, + NrPeriods: vcCPU.ThrottlingData.Periods, + NrThrottled: vcCPU.ThrottlingData.ThrottledPeriods, + ThrottledUsec: vcCPU.ThrottlingData.ThrottledTime / 1000, + } + + return cpuStats +} + +func setMemoryStatsV1(vcMemory vc.MemoryStats) *cgroupsv1.MemoryStat { memoryStats := &cgroupsv1.MemoryStat{ Usage: &cgroupsv1.MemoryEntry{ Limit: vcMemory.Usage.Limit, @@ -146,22 +210,41 @@ func setMemoryStats(vcMemory vc.MemoryStats) *cgroupsv1.MemoryStat { return memoryStats } -func setBlkioStats(vcBlkio vc.BlkioStats) *cgroupsv1.BlkIOStat { +func setMemoryStatsV2(vcMemory vc.MemoryStats) *cgroupsv2.MemoryStat { + memoryStats := &cgroupsv2.MemoryStat{ + Usage: vcMemory.Usage.Usage, + UsageLimit: vcMemory.Usage.Limit, + SwapUsage: vcMemory.SwapUsage.Usage, + SwapLimit: vcMemory.SwapUsage.Limit, + } + + return memoryStats +} + +func setBlkioStatsV1(vcBlkio vc.BlkioStats) *cgroupsv1.BlkIOStat { blkioStats := &cgroupsv1.BlkIOStat{ - IoServiceBytesRecursive: copyBlkio(vcBlkio.IoServiceBytesRecursive), - IoServicedRecursive: copyBlkio(vcBlkio.IoServicedRecursive), - IoQueuedRecursive: copyBlkio(vcBlkio.IoQueuedRecursive), - SectorsRecursive: copyBlkio(vcBlkio.SectorsRecursive), - IoServiceTimeRecursive: copyBlkio(vcBlkio.IoServiceTimeRecursive), - IoWaitTimeRecursive: copyBlkio(vcBlkio.IoWaitTimeRecursive), - IoMergedRecursive: copyBlkio(vcBlkio.IoMergedRecursive), - IoTimeRecursive: copyBlkio(vcBlkio.IoTimeRecursive), + IoServiceBytesRecursive: copyBlkioV1(vcBlkio.IoServiceBytesRecursive), + IoServicedRecursive: copyBlkioV1(vcBlkio.IoServicedRecursive), + IoQueuedRecursive: copyBlkioV1(vcBlkio.IoQueuedRecursive), + SectorsRecursive: copyBlkioV1(vcBlkio.SectorsRecursive), + IoServiceTimeRecursive: copyBlkioV1(vcBlkio.IoServiceTimeRecursive), + IoWaitTimeRecursive: copyBlkioV1(vcBlkio.IoWaitTimeRecursive), + IoMergedRecursive: copyBlkioV1(vcBlkio.IoMergedRecursive), + IoTimeRecursive: copyBlkioV1(vcBlkio.IoTimeRecursive), } return blkioStats } -func copyBlkio(s []vc.BlkioStatEntry) []*cgroupsv1.BlkIOEntry { +func setBlkioStatsV2(vcBlkio vc.BlkioStats) *cgroupsv2.IOStat { + ioStats := &cgroupsv2.IOStat{ + Usage: copyBlkioV2(vcBlkio.IoServiceBytesRecursive), + } + + return ioStats +} + +func copyBlkioV1(s []vc.BlkioStatEntry) []*cgroupsv1.BlkIOEntry { ret := make([]*cgroupsv1.BlkIOEntry, len(s)) for i, v := range s { ret[i] = &cgroupsv1.BlkIOEntry{ @@ -175,6 +258,28 @@ func copyBlkio(s []vc.BlkioStatEntry) []*cgroupsv1.BlkIOEntry { return ret } +func copyBlkioV2(s []vc.BlkioStatEntry) []*cgroupsv2.IOEntry { + var ret []*cgroupsv2.IOEntry + item := cgroupsv2.IOEntry{} + for _, v := range s { + switch v.Op { + case "read": + item.Rbytes = v.Value + case "write": + item.Wbytes = v.Value + case "rios": + item.Rios = v.Value + case "wios": + item.Wios = v.Value + } + item.Major = v.Major + item.Minor = v.Minor + } + ret = append(ret, &item) + + return ret +} + func setNetworkStats(vcNetwork []*vc.NetworkStats) []*cgroupsv1.NetworkStat { networkStats := make([]*cgroupsv1.NetworkStat, len(vcNetwork)) for i, v := range vcNetwork { diff --git a/src/runtime/pkg/containerd-shim-v2/metrics_test.go b/src/runtime/pkg/containerd-shim-v2/metrics_test.go index e2a9177a2..57842d93e 100644 --- a/src/runtime/pkg/containerd-shim-v2/metrics_test.go +++ b/src/runtime/pkg/containerd-shim-v2/metrics_test.go @@ -17,8 +17,7 @@ import ( ) func TestStatNetworkMetric(t *testing.T) { - - assert := assert.New(t) + assertions := assert.New(t) var err error mockNetwork := []*vc.NetworkStats{ @@ -52,8 +51,8 @@ func TestStatNetworkMetric(t *testing.T) { }() resp, err := sandbox.StatsContainer(context.Background(), testContainerID) - assert.NoError(err) + assertions.NoError(err) - metrics := statsToMetrics(&resp) - assert.Equal(expectedNetwork, metrics.Network) + metrics := statsToMetricsV1(&resp) + assertions.Equal(expectedNetwork, metrics.Network) } diff --git a/src/runtime/pkg/resourcecontrol/utils.go b/src/runtime/pkg/resourcecontrol/utils.go index 4e1f029e0..449a89e9a 100644 --- a/src/runtime/pkg/resourcecontrol/utils.go +++ b/src/runtime/pkg/resourcecontrol/utils.go @@ -19,9 +19,6 @@ var ( ErrCgroupMode = errors.New("cgroup controller type error") ) -// DefaultResourceControllerID runtime-determined location in the cgroups hierarchy. -const DefaultResourceControllerID = "/vc" - func DeviceToCgroupDeviceRule(device string) (*devices.Rule, error) { var st unix.Stat_t deviceRule := devices.Rule{ diff --git a/src/runtime/pkg/resourcecontrol/utils_linux.go b/src/runtime/pkg/resourcecontrol/utils_linux.go index e39e6c046..59ca788f8 100644 --- a/src/runtime/pkg/resourcecontrol/utils_linux.go +++ b/src/runtime/pkg/resourcecontrol/utils_linux.go @@ -18,6 +18,9 @@ import ( "golang.org/x/sys/unix" ) +// DefaultResourceControllerID runtime-determined location in the cgroups hierarchy. +const DefaultResourceControllerID = "/vc" + // ValidCgroupPathV1 returns a valid cgroup path for cgroup v1. // see https://github.com/opencontainers/runtime-spec/blob/master/config-linux.md#cgroups-path func ValidCgroupPathV1(path string, systemdCgroup bool) (string, error) { @@ -140,6 +143,16 @@ func getSliceAndUnit(cgroupPath string) (string, string, error) { return "", "", fmt.Errorf("Path: %s is not valid systemd's cgroups path", cgroupPath) } +func IsCgroupV1() (bool, error) { + if cgroups.Mode() == cgroups.Legacy || cgroups.Mode() == cgroups.Hybrid { + return true, nil + } else if cgroups.Mode() == cgroups.Unified { + return false, nil + } else { + return false, ErrCgroupMode + } +} + func SetThreadAffinity(threadID int, cpuSetSlice []int) error { unixCPUSet := unix.CPUSet{} From ed02c8a05137d40f564582ec8f0e225a15df4928 Mon Sep 17 00:00:00 2001 From: yaoyinnan Date: Thu, 9 Feb 2023 20:07:51 +0800 Subject: [PATCH 27/52] docs: add guide for building rootfs with EROFS Add guide for building rootfs with EROFS. Fixes: #6063 Signed-off-by: Gao Xiang Signed-off-by: yaoyinnan --- docs/how-to/README.md | 1 + docs/how-to/how-to-use-erofs-build-rootfs.md | 90 ++++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 docs/how-to/how-to-use-erofs-build-rootfs.md diff --git a/docs/how-to/README.md b/docs/how-to/README.md index 1ad486048..df6a060fc 100644 --- a/docs/how-to/README.md +++ b/docs/how-to/README.md @@ -44,3 +44,4 @@ - [How to run Docker with Kata Containers](how-to-run-docker-with-kata.md) - [How to run Kata Containers with `nydus`](how-to-use-virtio-fs-nydus-with-kata.md) - [How to run Kata Containers with AMD SEV-SNP](how-to-run-kata-containers-with-SNP-VMs.md) +- [How to use EROFS to build rootfs in Kata Containers](how-to-use-erofs-build-rootfs.md) diff --git a/docs/how-to/how-to-use-erofs-build-rootfs.md b/docs/how-to/how-to-use-erofs-build-rootfs.md new file mode 100644 index 000000000..72bf6315f --- /dev/null +++ b/docs/how-to/how-to-use-erofs-build-rootfs.md @@ -0,0 +1,90 @@ +# Configure Kata Containers to use EROFS build rootfs + +## Introduction +For kata containers, rootfs is used in the read-only way. EROFS can noticeably decrease metadata overhead. + +`mkfs.erofs` can generate compressed and uncompressed EROFS images. + +For uncompressed images, no files are compressed. However, it is optional to inline the data blocks at the end of the file with the metadata. + +For compressed images, each file will be compressed using the lz4 or lz4hc algorithm, and it will be confirmed whether it can save space. Use No compression of the file if compression does not save space. + +## Performance comparison +| | EROFS | EXT4 | XFS | +|-----------------|-------| --- | --- | +| Image Size [MB] | 106(uncompressed) | 256 | 126 | + + +## Guidance +### Install the `erofs-utils` +#### `apt/dnf` install +On newer `Ubuntu/Debian` systems, it can be installed directly using the `apt` command, and on `Fedora` it can be installed directly using the `dnf` command. + +```shell +# Debian/Ubuntu +$ apt install erofs-utils +# Fedora +$ dnf install erofs-utils +``` + +#### Source install +[https://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git](https://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git) + +##### Compile dependencies +If you need to enable the `Lz4` compression feature, `Lz4 1.8.0+` is required, and `Lz4 1.9.3+` is strongly recommended. + +##### Compilation process +For some old lz4 versions (lz4-1.8.0~1.8.3), if lz4-static is not installed, the lz4hc algorithm will not be supported. lz4-static can be installed with apt install lz4-static.x86_64. However, these versions have some bugs in compression, and it is not recommended to use these versions directly. +If you use `lz4 1.9.0+`, you can directly use the following command to compile. + +```shell +$ ./autogen.sh +$ ./configure +$ make +``` + +The compiled `mkfs.erofs` program will be saved in the `mkfs` directory. Afterwards, the generated tools can be installed to a system directory using make install (requires root privileges). + +### Create a local rootfs +```shell +$ export distro="ubuntu" +$ export FS_TYPE="erofs" +$ export ROOTFS_DIR="realpath kata-containers/tools/osbuilder/rootfs-builder/rootfs" +$ sudo rm -rf "${ROOTFS_DIR}" +$ pushd kata-containers/tools/osbuilder/rootfs-builder +$ script -fec 'sudo -E SECCOMP=no ./rootfs.sh "${distro}"' +$ popd +``` + +### Add a custom agent to the image - OPTIONAL +> Note: +> - You should only do this step if you are testing with the latest version of the agent. +```shell +$ sudo install -o root -g root -m 0550 -t "${ROOTFS_DIR}/usr/bin" "${ROOTFS_DIR}/../../../../src/agent/target/x86_64-unknown-linux-musl/release/kata-agent" +$ sudo install -o root -g root -m 0440 "${ROOTFS_DIR}/../../../../src/agent/kata-agent.service" "${ROOTFS_DIR}/usr/lib/systemd/system/" +$ sudo install -o root -g root -m 0440 "${ROOTFS_DIR}/../../../../src/agent/kata-containers.target" "${ROOTFS_DIR}/usr/lib/systemd/system/" +``` + +### Build a root image +```shell +$ pushd kata-containers/tools/osbuilder/image-builder +$ script -fec 'sudo -E ./image_builder.sh "${ROOTFS_DIR}"' +$ popd +``` + +### Install the rootfs image +```shell +$ pushd kata-containers/tools/osbuilder/image-builder +$ commit="$(git log --format=%h -1 HEAD)" +$ date="$(date +%Y-%m-%d-%T.%N%z)" +$ rootfs="erofs" +$ image="kata-containers-${rootfs}-${date}-${commit}" +$ sudo install -o root -g root -m 0640 -D kata-containers.img "/usr/share/kata-containers/${image}" +$ (cd /usr/share/kata-containers && sudo ln -sf "$image" kata-containers.img) +$ popd +``` + +### Use `EROFS` in the runtime +```shell +$ sudo sed -i -e 's/^# *\(rootfs_type\).*=.*$/\1 = erofs/g' /etc/kata-containers/configuration.toml +``` From fff0e50a738e052f14e45d64d5ea52d497d861c4 Mon Sep 17 00:00:00 2001 From: Gabriela Cervantes Date: Thu, 9 Feb 2023 16:48:18 +0000 Subject: [PATCH 28/52] versions: Update runc version This PR updates the runc version. This new version include changes in: - Fix mounting via wrong proc fd. When the user and mount namespaces are used, and the bind mount is followed by the cgroup mount in the spec, the cgroup was mounted using the bind mount's mount fd. - Switch kill() in libcontainer/nsenter to sane_kill(). - Fix "permission denied" error from runc run on noexec fs. - Fix failed exec after systemctl daemon-reload. Due to a regression in v1.1.3, the DeviceAllow=char-pts rwm rule was no longer added and was causing an error open /dev/pts/0: operation not permitted: unknown when systemd was reloaded. Fixes #6251 Signed-off-by: Gabriela Cervantes --- versions.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/versions.yaml b/versions.yaml index f79eff770..5f74ee8ba 100644 --- a/versions.yaml +++ b/versions.yaml @@ -249,7 +249,7 @@ externals: uscan-url: >- https://github.com/opencontainers/runc/tags .*/v?(\d\S+)\.tar\.gz - version: "v1.1.0" + version: "v1.1.4" nydus: description: "Nydus image acceleration service" From bdf20b5d263c352b0ce93b3956beb5de8e85b3ee Mon Sep 17 00:00:00 2001 From: yaoyinnan Date: Sat, 11 Feb 2023 00:44:13 +0800 Subject: [PATCH 29/52] rootfs: support EROFS filesystem For kata containers, rootfs is used in the read-only way. EROFS can noticably decrease metadata overhead. On the basis of supporting the EROFS file system, it supports using the config parameter to switch the file system used by rootfs. Fixes: #6063 Signed-off-by: Gao Xiang Signed-off-by: yaoyinnan --- .../kata-types/src/config/hypervisor/mod.rs | 3 + src/runtime-rs/Makefile | 7 + .../config/configuration-dragonball.toml.in | 6 + .../crates/hypervisor/src/dragonball/inner.rs | 5 +- .../crates/hypervisor/src/kernel_param.rs | 225 +++++++++++++++--- src/runtime-rs/crates/hypervisor/src/lib.rs | 10 + src/runtime/Makefile | 7 + src/runtime/cmd/kata-runtime/kata-env_test.go | 2 + src/runtime/config/configuration-acrn.toml.in | 6 + src/runtime/config/configuration-clh.toml.in | 6 + src/runtime/config/configuration-fc.toml.in | 6 + src/runtime/config/configuration-qemu.toml.in | 6 + .../pkg/containerd-shim-v2/create_test.go | 2 + src/runtime/pkg/katatestutils/utils.go | 2 + .../pkg/katautils/config-settings.go.in | 1 + src/runtime/pkg/katautils/config.go | 44 ++++ src/runtime/pkg/katautils/config_test.go | 4 + src/runtime/virtcontainers/acrn.go | 6 +- src/runtime/virtcontainers/acrn_arch_base.go | 14 +- src/runtime/virtcontainers/clh.go | 7 +- src/runtime/virtcontainers/clh_test.go | 1 + src/runtime/virtcontainers/fc.go | 9 +- src/runtime/virtcontainers/hypervisor.go | 86 +++++-- src/runtime/virtcontainers/hypervisor_test.go | 160 ++++++++++++- src/runtime/virtcontainers/qemu_amd64.go | 4 +- src/runtime/virtcontainers/qemu_arch_base.go | 18 +- src/runtime/virtcontainers/qemu_arm64.go | 4 +- src/runtime/virtcontainers/qemu_ppc64le.go | 4 +- src/runtime/virtcontainers/qemu_s390x.go | 6 +- .../osbuilder/image-builder/image_builder.sh | 170 +++++++++---- .../kernel/configs/fragments/common/fs.conf | 5 + tools/packaging/kernel/kata_config_version | 2 +- 32 files changed, 707 insertions(+), 131 deletions(-) diff --git a/src/libs/kata-types/src/config/hypervisor/mod.rs b/src/libs/kata-types/src/config/hypervisor/mod.rs index afd3a42ea..2eaa3443b 100644 --- a/src/libs/kata-types/src/config/hypervisor/mod.rs +++ b/src/libs/kata-types/src/config/hypervisor/mod.rs @@ -210,6 +210,9 @@ pub struct BootInfo { /// Path to root device on host #[serde(default)] pub image: String, + /// Rootfs filesystem type. + #[serde(default)] + pub rootfs_type: String, /// Path to the firmware. /// /// If you want that qemu uses the default firmware leave this option empty. diff --git a/src/runtime-rs/Makefile b/src/runtime-rs/Makefile index ffe7a934b..880b5caf1 100644 --- a/src/runtime-rs/Makefile +++ b/src/runtime-rs/Makefile @@ -79,6 +79,12 @@ DBVALIDHYPERVISORPATHS := [] PKGDATADIR := $(PREFIXDEPS)/share/$(PROJECT_DIR) KERNELDIR := $(PKGDATADIR) IMAGEPATH := $(PKGDATADIR)/$(IMAGENAME) + +ROOTFSTYPE_EXT4 := \"ext4\" +ROOTFSTYPE_XFS := \"xfs\" +ROOTFSTYPE_EROFS := \"erofs\" +DEFROOTFSTYPE := $(ROOTFSTYPE_EXT4) + PKGLIBEXECDIR := $(LIBEXECDIR)/$(PROJECT_DIR) FIRMWAREPATH := FIRMWAREVOLUMEPATH := @@ -210,6 +216,7 @@ USER_VARS += DBVALIDCTLPATHS USER_VARS += SYSCONFIG USER_VARS += IMAGENAME USER_VARS += IMAGEPATH +USER_VARS += DEFROOTFSTYPE USER_VARS += MACHINETYPE USER_VARS += KERNELDIR USER_VARS += KERNELTYPE diff --git a/src/runtime-rs/config/configuration-dragonball.toml.in b/src/runtime-rs/config/configuration-dragonball.toml.in index fa39d23c8..8131d0c68 100644 --- a/src/runtime-rs/config/configuration-dragonball.toml.in +++ b/src/runtime-rs/config/configuration-dragonball.toml.in @@ -17,6 +17,12 @@ ctlpath = "@DBCTLPATH@" kernel = "@KERNELPATH_DB@" image = "@IMAGEPATH@" +# rootfs filesystem type: +# - ext4 (default) +# - xfs +# - erofs +rootfs_type=@DEFROOTFSTYPE@ + # List of valid annotation names for the hypervisor # Each member of the list is a regular expression, which is the base name # of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path" diff --git a/src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs b/src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs index a74fe28f8..99a38b306 100644 --- a/src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs +++ b/src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs @@ -101,7 +101,10 @@ impl DragonballInner { // get kernel params let mut kernel_params = KernelParams::new(self.config.debug_info.enable_debug); - kernel_params.append(&mut KernelParams::new_rootfs_kernel_params(&rootfs_driver)); + kernel_params.append(&mut KernelParams::new_rootfs_kernel_params( + &rootfs_driver, + &self.config.boot_info.rootfs_type, + )?); kernel_params.append(&mut KernelParams::from_string( &self.config.boot_info.kernel_params, )); diff --git a/src/runtime-rs/crates/hypervisor/src/kernel_param.rs b/src/runtime-rs/crates/hypervisor/src/kernel_param.rs index 39bef9a64..7ad17cb8f 100644 --- a/src/runtime-rs/crates/hypervisor/src/kernel_param.rs +++ b/src/runtime-rs/crates/hypervisor/src/kernel_param.rs @@ -6,7 +6,10 @@ use anyhow::{anyhow, Result}; -use crate::{VM_ROOTFS_DRIVER_BLK, VM_ROOTFS_DRIVER_PMEM}; +use crate::{ + VM_ROOTFS_DRIVER_BLK, VM_ROOTFS_DRIVER_PMEM, VM_ROOTFS_FILESYSTEM_EROFS, + VM_ROOTFS_FILESYSTEM_EXT4, VM_ROOTFS_FILESYSTEM_XFS, VM_ROOTFS_ROOT_BLK, VM_ROOTFS_ROOT_PMEM, +}; use kata_types::config::LOG_VPORT_OPTION; // Port where the agent will send the logs. Logs are sent through the vsock in cases @@ -67,43 +70,49 @@ impl KernelParams { Self { params } } - pub(crate) fn new_rootfs_kernel_params(rootfs_driver: &str) -> Self { - let params = match rootfs_driver { - VM_ROOTFS_DRIVER_BLK => { - vec![ - Param { - key: "root".to_string(), - value: "/dev/vda1".to_string(), - }, - Param { - key: "rootflags".to_string(), - value: "data=ordered,errors=remount-ro ro".to_string(), - }, - Param { - key: "rootfstype".to_string(), - value: "ext4".to_string(), - }, - ] - } + pub(crate) fn new_rootfs_kernel_params(rootfs_driver: &str, rootfs_type: &str) -> Result { + let mut params = vec![]; + + match rootfs_driver { VM_ROOTFS_DRIVER_PMEM => { - vec![ - Param { - key: "root".to_string(), - value: "/dev/pmem0p1".to_string(), - }, - Param { - key: "rootflags".to_string(), - value: "data=ordered,errors=remount-ro,dax ro".to_string(), - }, - Param { - key: "rootfstype".to_string(), - value: "ext4".to_string(), - }, - ] + params.push(Param::new("root", VM_ROOTFS_ROOT_PMEM)); + match rootfs_type { + VM_ROOTFS_FILESYSTEM_EXT4 | VM_ROOTFS_FILESYSTEM_XFS => { + params.push(Param::new( + "rootflags", + "dax,data=ordered,errors=remount-ro ro", + )); + } + VM_ROOTFS_FILESYSTEM_EROFS => { + params.push(Param::new("rootflags", "dax ro")); + } + _ => { + return Err(anyhow!("Unsupported rootfs type")); + } + } } - _ => vec![], - }; - Self { params } + VM_ROOTFS_DRIVER_BLK => { + params.push(Param::new("root", VM_ROOTFS_ROOT_BLK)); + match rootfs_type { + VM_ROOTFS_FILESYSTEM_EXT4 | VM_ROOTFS_FILESYSTEM_XFS => { + params.push(Param::new("rootflags", "data=ordered,errors=remount-ro ro")); + } + VM_ROOTFS_FILESYSTEM_EROFS => { + params.push(Param::new("rootflags", "ro")); + } + _ => { + return Err(anyhow!("Unsupported rootfs type")); + } + } + } + _ => { + return Err(anyhow!("Unsupported rootfs driver")); + } + } + + params.push(Param::new("rootfstype", rootfs_type)); + + Ok(Self { params }) } pub(crate) fn append(&mut self, params: &mut KernelParams) { @@ -155,6 +164,12 @@ mod tests { use super::*; + use crate::{ + VM_ROOTFS_DRIVER_BLK, VM_ROOTFS_DRIVER_PMEM, VM_ROOTFS_FILESYSTEM_EROFS, + VM_ROOTFS_FILESYSTEM_EXT4, VM_ROOTFS_FILESYSTEM_XFS, VM_ROOTFS_ROOT_BLK, + VM_ROOTFS_ROOT_PMEM, + }; + #[test] fn test_params() { let param1 = Param::new("", ""); @@ -190,4 +205,142 @@ mod tests { Ok(()) } + + #[derive(Debug)] + struct TestData<'a> { + rootfs_driver: &'a str, + rootfs_type: &'a str, + expect_params: KernelParams, + result: Result<()>, + } + + #[test] + fn test_rootfs_kernel_params() { + let tests = &[ + // EXT4 + TestData { + rootfs_driver: VM_ROOTFS_DRIVER_PMEM, + rootfs_type: VM_ROOTFS_FILESYSTEM_EXT4, + expect_params: KernelParams { + params: [ + Param::new("root", VM_ROOTFS_ROOT_PMEM), + Param::new("rootflags", "dax,data=ordered,errors=remount-ro ro"), + Param::new("rootfstype", VM_ROOTFS_FILESYSTEM_EXT4), + ] + .to_vec(), + }, + result: Ok(()), + }, + TestData { + rootfs_driver: VM_ROOTFS_DRIVER_BLK, + rootfs_type: VM_ROOTFS_FILESYSTEM_EXT4, + expect_params: KernelParams { + params: [ + Param::new("root", VM_ROOTFS_ROOT_BLK), + Param::new("rootflags", "data=ordered,errors=remount-ro ro"), + Param::new("rootfstype", VM_ROOTFS_FILESYSTEM_EXT4), + ] + .to_vec(), + }, + result: Ok(()), + }, + // XFS + TestData { + rootfs_driver: VM_ROOTFS_DRIVER_PMEM, + rootfs_type: VM_ROOTFS_FILESYSTEM_XFS, + expect_params: KernelParams { + params: [ + Param::new("root", VM_ROOTFS_ROOT_PMEM), + Param::new("rootflags", "dax,data=ordered,errors=remount-ro ro"), + Param::new("rootfstype", VM_ROOTFS_FILESYSTEM_XFS), + ] + .to_vec(), + }, + result: Ok(()), + }, + TestData { + rootfs_driver: VM_ROOTFS_DRIVER_BLK, + rootfs_type: VM_ROOTFS_FILESYSTEM_XFS, + expect_params: KernelParams { + params: [ + Param::new("root", VM_ROOTFS_ROOT_BLK), + Param::new("rootflags", "data=ordered,errors=remount-ro ro"), + Param::new("rootfstype", VM_ROOTFS_FILESYSTEM_XFS), + ] + .to_vec(), + }, + result: Ok(()), + }, + // EROFS + TestData { + rootfs_driver: VM_ROOTFS_DRIVER_PMEM, + rootfs_type: VM_ROOTFS_FILESYSTEM_EROFS, + expect_params: KernelParams { + params: [ + Param::new("root", VM_ROOTFS_ROOT_PMEM), + Param::new("rootflags", "dax ro"), + Param::new("rootfstype", VM_ROOTFS_FILESYSTEM_EROFS), + ] + .to_vec(), + }, + result: Ok(()), + }, + TestData { + rootfs_driver: VM_ROOTFS_DRIVER_BLK, + rootfs_type: VM_ROOTFS_FILESYSTEM_EROFS, + expect_params: KernelParams { + params: [ + Param::new("root", VM_ROOTFS_ROOT_BLK), + Param::new("rootflags", "ro"), + Param::new("rootfstype", VM_ROOTFS_FILESYSTEM_EROFS), + ] + .to_vec(), + }, + result: Ok(()), + }, + // Unsupported rootfs driver + TestData { + rootfs_driver: "foo", + rootfs_type: VM_ROOTFS_FILESYSTEM_EXT4, + expect_params: KernelParams { + params: [ + Param::new("root", VM_ROOTFS_ROOT_BLK), + Param::new("rootflags", "data=ordered,errors=remount-ro ro"), + Param::new("rootfstype", VM_ROOTFS_FILESYSTEM_EXT4), + ] + .to_vec(), + }, + result: Err(anyhow!("Unsupported rootfs driver")), + }, + // Unsupported rootfs type + TestData { + rootfs_driver: VM_ROOTFS_DRIVER_BLK, + rootfs_type: "foo", + expect_params: KernelParams { + params: [ + Param::new("root", VM_ROOTFS_ROOT_BLK), + Param::new("rootflags", "data=ordered,errors=remount-ro ro"), + Param::new("rootfstype", VM_ROOTFS_FILESYSTEM_EXT4), + ] + .to_vec(), + }, + result: Err(anyhow!("Unsupported rootfs type")), + }, + ]; + + for (i, t) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, t); + let result = KernelParams::new_rootfs_kernel_params(t.rootfs_driver, t.rootfs_type); + let msg = format!("{}, result: {:?}", msg, result); + + if t.result.is_ok() { + assert!(result.is_ok(), "{}", msg); + assert_eq!(t.expect_params, result.unwrap()); + } else { + let expected_error = format!("{}", t.result.as_ref().unwrap_err()); + let actual_error = format!("{}", result.unwrap_err()); + assert!(actual_error == expected_error, "{}", msg); + } + } + } } diff --git a/src/runtime-rs/crates/hypervisor/src/lib.rs b/src/runtime-rs/crates/hypervisor/src/lib.rs index 2c3e8016f..f2bcc21c7 100644 --- a/src/runtime-rs/crates/hypervisor/src/lib.rs +++ b/src/runtime-rs/crates/hypervisor/src/lib.rs @@ -27,6 +27,16 @@ use kata_types::config::hypervisor::Hypervisor as HypervisorConfig; // Config which driver to use as vm root dev const VM_ROOTFS_DRIVER_BLK: &str = "virtio-blk"; const VM_ROOTFS_DRIVER_PMEM: &str = "virtio-pmem"; + +//Configure the root corresponding to the driver +const VM_ROOTFS_ROOT_BLK: &str = "/dev/vda1"; +const VM_ROOTFS_ROOT_PMEM: &str = "/dev/pmem0p1"; + +// Config which filesystem to use as rootfs type +const VM_ROOTFS_FILESYSTEM_EXT4: &str = "ext4"; +const VM_ROOTFS_FILESYSTEM_XFS: &str = "xfs"; +const VM_ROOTFS_FILESYSTEM_EROFS: &str = "erofs"; + // before using hugepages for VM, we need to mount hugetlbfs // /dev/hugepages will be the mount point // mkdir -p /dev/hugepages diff --git a/src/runtime/Makefile b/src/runtime/Makefile index 64e034069..99dde7e2b 100644 --- a/src/runtime/Makefile +++ b/src/runtime/Makefile @@ -112,6 +112,12 @@ KERNELDIR := $(PKGDATADIR) IMAGEPATH := $(PKGDATADIR)/$(IMAGENAME) INITRDPATH := $(PKGDATADIR)/$(INITRDNAME) + +ROOTFSTYPE_EXT4 := \"ext4\" +ROOTFSTYPE_XFS := \"xfs\" +ROOTFSTYPE_EROFS := \"erofs\" +DEFROOTFSTYPE := $(ROOTFSTYPE_EXT4) + FIRMWAREPATH := FIRMWAREVOLUMEPATH := @@ -412,6 +418,7 @@ USER_VARS += IMAGENAME USER_VARS += IMAGEPATH USER_VARS += INITRDNAME USER_VARS += INITRDPATH +USER_VARS += DEFROOTFSTYPE USER_VARS += MACHINETYPE USER_VARS += KERNELDIR USER_VARS += KERNELTYPE diff --git a/src/runtime/cmd/kata-runtime/kata-env_test.go b/src/runtime/cmd/kata-runtime/kata-env_test.go index 96847dc13..321bc507b 100644 --- a/src/runtime/cmd/kata-runtime/kata-env_test.go +++ b/src/runtime/cmd/kata-runtime/kata-env_test.go @@ -78,6 +78,7 @@ func makeRuntimeConfig(prefixDir string) (configFile string, config oci.RuntimeC hypervisorPath := filepath.Join(prefixDir, "hypervisor") kernelPath := filepath.Join(prefixDir, "kernel") imagePath := filepath.Join(prefixDir, "image") + rootfsType := "ext4" kernelParams := "foo=bar xyz" machineType := "machineType" disableBlock := true @@ -119,6 +120,7 @@ func makeRuntimeConfig(prefixDir string) (configFile string, config oci.RuntimeC HypervisorPath: hypervisorPath, KernelPath: kernelPath, ImagePath: imagePath, + RootfsType: rootfsType, KernelParams: kernelParams, MachineType: machineType, LogPath: logPath, diff --git a/src/runtime/config/configuration-acrn.toml.in b/src/runtime/config/configuration-acrn.toml.in index 5f1368ce8..2d2b7065e 100644 --- a/src/runtime/config/configuration-acrn.toml.in +++ b/src/runtime/config/configuration-acrn.toml.in @@ -17,6 +17,12 @@ ctlpath = "@ACRNCTLPATH@" kernel = "@KERNELPATH_ACRN@" image = "@IMAGEPATH@" +# rootfs filesystem type: +# - ext4 (default) +# - xfs +# - erofs +rootfs_type=@DEFROOTFSTYPE@ + # List of valid annotation names for the hypervisor # Each member of the list is a regular expression, which is the base name # of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path" diff --git a/src/runtime/config/configuration-clh.toml.in b/src/runtime/config/configuration-clh.toml.in index fbf838572..df7cc7ac5 100644 --- a/src/runtime/config/configuration-clh.toml.in +++ b/src/runtime/config/configuration-clh.toml.in @@ -16,6 +16,12 @@ path = "@CLHPATH@" kernel = "@KERNELPATH_CLH@" image = "@IMAGEPATH@" +# rootfs filesystem type: +# - ext4 (default) +# - xfs +# - erofs +rootfs_type=@DEFROOTFSTYPE@ + # Enable confidential guest support. # Toggling that setting may trigger different hardware features, ranging # from memory encryption to both memory and CPU-state encryption and integrity. diff --git a/src/runtime/config/configuration-fc.toml.in b/src/runtime/config/configuration-fc.toml.in index b7f349c0d..10dc17700 100644 --- a/src/runtime/config/configuration-fc.toml.in +++ b/src/runtime/config/configuration-fc.toml.in @@ -16,6 +16,12 @@ path = "@FCPATH@" kernel = "@KERNELPATH_FC@" image = "@IMAGEPATH@" +# rootfs filesystem type: +# - ext4 (default) +# - xfs +# - erofs +rootfs_type=@DEFROOTFSTYPE@ + # List of valid annotation names for the hypervisor # Each member of the list is a regular expression, which is the base name # of the annotation, e.g. "path" for io.katacontainers.config.hypervisor.path" diff --git a/src/runtime/config/configuration-qemu.toml.in b/src/runtime/config/configuration-qemu.toml.in index 4aced5975..b2a23047e 100644 --- a/src/runtime/config/configuration-qemu.toml.in +++ b/src/runtime/config/configuration-qemu.toml.in @@ -18,6 +18,12 @@ image = "@IMAGEPATH@" # initrd = "@INITRDPATH@" machine_type = "@MACHINETYPE@" +# rootfs filesystem type: +# - ext4 (default) +# - xfs +# - erofs +rootfs_type=@DEFROOTFSTYPE@ + # Enable confidential guest support. # Toggling that setting may trigger different hardware features, ranging # from memory encryption to both memory and CPU-state encryption and integrity. diff --git a/src/runtime/pkg/containerd-shim-v2/create_test.go b/src/runtime/pkg/containerd-shim-v2/create_test.go index eecc19968..ccad5ceea 100644 --- a/src/runtime/pkg/containerd-shim-v2/create_test.go +++ b/src/runtime/pkg/containerd-shim-v2/create_test.go @@ -320,6 +320,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config string, err err kernelPath := path.Join(dir, "kernel") kernelParams := "foo=bar xyz" imagePath := path.Join(dir, "image") + rootfsType := "ext4" logDir := path.Join(dir, "logs") logPath := path.Join(logDir, "runtime.log") machineType := "machineType" @@ -337,6 +338,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config string, err err HypervisorPath: hypervisorPath, KernelPath: kernelPath, ImagePath: imagePath, + RootfsType: rootfsType, KernelParams: kernelParams, MachineType: machineType, LogPath: logPath, diff --git a/src/runtime/pkg/katatestutils/utils.go b/src/runtime/pkg/katatestutils/utils.go index 33a5d1836..4e3a784a2 100644 --- a/src/runtime/pkg/katatestutils/utils.go +++ b/src/runtime/pkg/katatestutils/utils.go @@ -211,6 +211,7 @@ type RuntimeConfigOptions struct { DefaultGuestHookPath string KernelPath string ImagePath string + RootfsType string KernelParams string MachineType string LogPath string @@ -307,6 +308,7 @@ func MakeRuntimeConfigFileData(config RuntimeConfigOptions) string { block_device_aio = "` + config.BlockDeviceAIO + `" kernel_params = "` + config.KernelParams + `" image = "` + config.ImagePath + `" + rootfs_type = "` + config.RootfsType + `" machine_type = "` + config.MachineType + `" default_vcpus = ` + strconv.FormatUint(uint64(config.DefaultVCPUCount), 10) + ` default_maxvcpus = ` + strconv.FormatUint(uint64(config.DefaultMaxVCPUCount), 10) + ` diff --git a/src/runtime/pkg/katautils/config-settings.go.in b/src/runtime/pkg/katautils/config-settings.go.in index e83500343..f56111771 100644 --- a/src/runtime/pkg/katautils/config-settings.go.in +++ b/src/runtime/pkg/katautils/config-settings.go.in @@ -44,6 +44,7 @@ var defaultJailerPath = "/usr/bin/jailer" var defaultImagePath = "/usr/share/kata-containers/kata-containers.img" var defaultKernelPath = "/usr/share/kata-containers/vmlinuz.container" var defaultInitrdPath = "/usr/share/kata-containers/kata-containers-initrd.img" +var defaultRootfsType = "ext4" var defaultFirmwarePath = "" var defaultFirmwareVolumePath = "" var defaultMachineAccelerators = "" diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index 7a1f57ac4..bb92ca718 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -83,6 +83,7 @@ type hypervisor struct { CtlPath string `toml:"ctlpath"` Initrd string `toml:"initrd"` Image string `toml:"image"` + RootfsType string `toml:"rootfs_type"` Firmware string `toml:"firmware"` FirmwareVolume string `toml:"firmware_volume"` MachineAccelerators string `toml:"machine_accelerators"` @@ -260,6 +261,16 @@ func (h hypervisor) image() (string, error) { return ResolvePath(p) } +func (h hypervisor) rootfsType() (string, error) { + p := h.RootfsType + + if p == "" { + p = "ext4" + } + + return p, nil +} + func (h hypervisor) firmware() (string, error) { p := h.Firmware @@ -647,6 +658,11 @@ func newFirecrackerHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { return vc.HypervisorConfig{}, err } + rootfsType, err := h.rootfsType() + if err != nil { + return vc.HypervisorConfig{}, err + } + firmware, err := h.firmware() if err != nil { return vc.HypervisorConfig{}, err @@ -670,6 +686,7 @@ func newFirecrackerHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { KernelPath: kernel, InitrdPath: initrd, ImagePath: image, + RootfsType: rootfsType, FirmwarePath: firmware, KernelParams: vc.DeserializeParams(strings.Fields(kernelParams)), NumVCPUs: h.defaultVCPUs(), @@ -717,6 +734,11 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { return vc.HypervisorConfig{}, err } + rootfsType, err := h.rootfsType() + if err != nil { + return vc.HypervisorConfig{}, err + } + pflashes, err := h.PFlash() if err != nil { return vc.HypervisorConfig{}, err @@ -778,6 +800,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { KernelPath: kernel, InitrdPath: initrd, ImagePath: image, + RootfsType: rootfsType, FirmwarePath: firmware, FirmwareVolumePath: firmwareVolume, PFlash: pflashes, @@ -868,6 +891,11 @@ func newAcrnHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { errors.New("image must be defined in the configuration file") } + rootfsType, err := h.rootfsType() + if err != nil { + return vc.HypervisorConfig{}, err + } + firmware, err := h.firmware() if err != nil { return vc.HypervisorConfig{}, err @@ -885,6 +913,7 @@ func newAcrnHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { HypervisorPathList: h.HypervisorPathList, KernelPath: kernel, ImagePath: image, + RootfsType: rootfsType, HypervisorCtlPath: hypervisorctl, HypervisorCtlPathList: h.CtlPathList, FirmwarePath: firmware, @@ -935,6 +964,11 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { errors.New("image or initrd must be defined in the configuration file") } + rootfsType, err := h.rootfsType() + if err != nil { + return vc.HypervisorConfig{}, err + } + firmware, err := h.firmware() if err != nil { return vc.HypervisorConfig{}, err @@ -969,6 +1003,7 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { KernelPath: kernel, InitrdPath: initrd, ImagePath: image, + RootfsType: rootfsType, FirmwarePath: firmware, MachineAccelerators: machineAccelerators, KernelParams: vc.DeserializeParams(strings.Fields(kernelParams)), @@ -1028,15 +1063,23 @@ func newDragonballHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { if err != nil { return vc.HypervisorConfig{}, err } + image, err := h.image() if err != nil { return vc.HypervisorConfig{}, err } + + rootfsType, err := h.rootfsType() + if err != nil { + return vc.HypervisorConfig{}, err + } + kernelParams := h.kernelParams() return vc.HypervisorConfig{ KernelPath: kernel, ImagePath: image, + RootfsType: rootfsType, KernelParams: vc.DeserializeParams(strings.Fields(kernelParams)), NumVCPUs: h.defaultVCPUs(), DefaultMaxVCPUs: h.defaultMaxVCPUs(), @@ -1195,6 +1238,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig { KernelPath: defaultKernelPath, ImagePath: defaultImagePath, InitrdPath: defaultInitrdPath, + RootfsType: defaultRootfsType, FirmwarePath: defaultFirmwarePath, FirmwareVolumePath: defaultFirmwareVolumePath, MachineAccelerators: defaultMachineAccelerators, diff --git a/src/runtime/pkg/katautils/config_test.go b/src/runtime/pkg/katautils/config_test.go index 156c312cf..d958fa1a3 100644 --- a/src/runtime/pkg/katautils/config_test.go +++ b/src/runtime/pkg/katautils/config_test.go @@ -74,6 +74,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf kernelPath := path.Join(dir, "kernel") kernelParams := "foo=bar xyz" imagePath := path.Join(dir, "image") + rootfsType := "ext4" logDir := path.Join(dir, "logs") logPath := path.Join(logDir, "runtime.log") machineType := "machineType" @@ -94,6 +95,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf HypervisorPath: hypervisorPath, KernelPath: kernelPath, ImagePath: imagePath, + RootfsType: rootfsType, KernelParams: kernelParams, MachineType: machineType, LogPath: logPath, @@ -153,6 +155,7 @@ func createAllRuntimeConfigFiles(dir, hypervisor string) (config testRuntimeConf HypervisorPath: hypervisorPath, KernelPath: kernelPath, ImagePath: imagePath, + RootfsType: rootfsType, KernelParams: vc.DeserializeParams(strings.Fields(kernelParams)), HypervisorMachineType: machineType, NumVCPUs: defaultVCPUCount, @@ -542,6 +545,7 @@ func TestMinimalRuntimeConfig(t *testing.T) { KernelPath: defaultKernelPath, ImagePath: defaultImagePath, InitrdPath: defaultInitrdPath, + RootfsType: defaultRootfsType, HypervisorMachineType: defaultMachineType, NumVCPUs: defaultVCPUCount, DefaultMaxVCPUs: defaultMaxVCPUCount, diff --git a/src/runtime/virtcontainers/acrn.go b/src/runtime/virtcontainers/acrn.go index 3863000a6..35c71a6d6 100644 --- a/src/runtime/virtcontainers/acrn.go +++ b/src/runtime/virtcontainers/acrn.go @@ -255,7 +255,11 @@ func (a *Acrn) setup(ctx context.Context, id string, hypervisorConfig *Hyperviso } a.id = id - a.arch = newAcrnArch(a.config) + var err error + a.arch, err = newAcrnArch(a.config) + if err != nil { + return err + } return nil } diff --git a/src/runtime/virtcontainers/acrn_arch_base.go b/src/runtime/virtcontainers/acrn_arch_base.go index 8d8c42242..77fb8e9e5 100644 --- a/src/runtime/virtcontainers/acrn_arch_base.go +++ b/src/runtime/virtcontainers/acrn_arch_base.go @@ -64,7 +64,7 @@ type acrnArch interface { appendBlockDevice(devices []Device, drive config.BlockDrive) []Device // handleImagePath handles the Hypervisor Config image path - handleImagePath(config HypervisorConfig) + handleImagePath(config HypervisorConfig) error } type acrnArchBase struct { @@ -314,7 +314,7 @@ func MaxAcrnVCPUs() uint32 { return uint32(8) } -func newAcrnArch(config HypervisorConfig) acrnArch { +func newAcrnArch(config HypervisorConfig) (acrnArch, error) { a := &acrnArchBase{ path: acrnPath, ctlpath: acrnctlPath, @@ -323,8 +323,11 @@ func newAcrnArch(config HypervisorConfig) acrnArch { kernelParams: acrnKernelParams, } - a.handleImagePath(config) - return a + if err := a.handleImagePath(config); err != nil { + return nil, err + } + + return a, nil } func (a *acrnArchBase) acrnPath() (string, error) { @@ -788,10 +791,11 @@ func (a *acrnArchBase) appendBlockDevice(devices []Device, drive config.BlockDri return devices } -func (a *acrnArchBase) handleImagePath(config HypervisorConfig) { +func (a *acrnArchBase) handleImagePath(config HypervisorConfig) error { if config.ImagePath != "" { a.kernelParams = append(a.kernelParams, acrnKernelRootParams...) a.kernelParamsNonDebug = append(a.kernelParamsNonDebug, acrnKernelParamsSystemdNonDebug...) a.kernelParamsDebug = append(a.kernelParamsDebug, acrnKernelParamsSystemdDebug...) } + return nil } diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index f91136b25..dffd4213a 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -503,10 +503,9 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net // Set initial amount of cpu's for the virtual machine clh.vmconfig.Cpus = chclient.NewCpusConfig(int32(clh.config.NumVCPUs), int32(clh.config.DefaultMaxVCPUs)) - // First take the default parameters defined by this driver - params := commonNvdimmKernelRootParams - if clh.config.ConfidentialGuest { - params = commonVirtioblkKernelRootParams + params, err := GetKernelRootParams(hypervisorConfig.RootfsType, clh.config.ConfidentialGuest, false) + if err != nil { + return err } params = append(params, clhKernelParams...) diff --git a/src/runtime/virtcontainers/clh_test.go b/src/runtime/virtcontainers/clh_test.go index 27e89d7ec..20e6935d8 100644 --- a/src/runtime/virtcontainers/clh_test.go +++ b/src/runtime/virtcontainers/clh_test.go @@ -53,6 +53,7 @@ func newClhConfig() (HypervisorConfig, error) { return HypervisorConfig{ KernelPath: testClhKernelPath, ImagePath: testClhImagePath, + RootfsType: string(EXT4), HypervisorPath: testClhPath, NumVCPUs: defaultVCPUs, BlockDeviceDriver: config.VirtioBlock, diff --git a/src/runtime/virtcontainers/fc.go b/src/runtime/virtcontainers/fc.go index 4fb1dddfe..520d73eaa 100644 --- a/src/runtime/virtcontainers/fc.go +++ b/src/runtime/virtcontainers/fc.go @@ -89,7 +89,7 @@ const ( // Specify the minimum version of firecracker supported var fcMinSupportedVersion = semver.MustParse("0.21.1") -var fcKernelParams = append(commonVirtioblkKernelRootParams, []Param{ +var fcKernelParams = []Param{ // The boot source is the first partition of the first block device added {"pci", "off"}, {"reboot", "k"}, @@ -101,7 +101,7 @@ var fcKernelParams = append(commonVirtioblkKernelRootParams, []Param{ // Firecracker doesn't support ACPI // Fix kernel error "ACPI BIOS Error (bug)" {"acpi", "off"}, -}...) +} func (s vmmState) String() string { switch s { @@ -700,6 +700,11 @@ func (fc *firecracker) fcInitConfiguration(ctx context.Context) error { return err } + params, err := GetKernelRootParams(fc.config.RootfsType, true, false) + if err != nil { + return err + } + fcKernelParams = append(params, fcKernelParams...) if fc.config.Debug { fcKernelParams = append(fcKernelParams, Param{"console", "ttyS0"}) } else { diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index 3ea7a83a4..b36a34dde 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -91,25 +91,74 @@ var ( // cores. var defaultMaxVCPUs = govmm.MaxVCPUs() -// agnostic list of kernel root parameters for NVDIMM -var commonNvdimmKernelRootParams = []Param{ //nolint: unused, deadcode, varcheck - {"root", "/dev/pmem0p1"}, - {"rootflags", "dax,data=ordered,errors=remount-ro ro"}, - {"rootfstype", "ext4"}, -} +// RootfsDriver describes a rootfs driver. +type RootfsDriver string -// agnostic list of kernel root parameters for NVDIMM -var commonNvdimmNoDAXKernelRootParams = []Param{ //nolint: unused, deadcode, varcheck - {"root", "/dev/pmem0p1"}, - {"rootflags", "data=ordered,errors=remount-ro ro"}, - {"rootfstype", "ext4"}, -} +const ( + // VirtioBlk is the Virtio-Blk rootfs driver. + VirtioBlk RootfsDriver = "/dev/vda1" -// agnostic list of kernel root parameters for virtio-blk -var commonVirtioblkKernelRootParams = []Param{ //nolint: unused, deadcode, varcheck - {"root", "/dev/vda1"}, - {"rootflags", "data=ordered,errors=remount-ro ro"}, - {"rootfstype", "ext4"}, + // Nvdimm is the Nvdimm rootfs driver. + Nvdimm RootfsType = "/dev/pmem0p1" +) + +// RootfsType describes a rootfs type. +type RootfsType string + +const ( + // EXT4 is the ext4 filesystem. + EXT4 RootfsType = "ext4" + + // XFS is the xfs filesystem. + XFS RootfsType = "xfs" + + // EROFS is the erofs filesystem. + EROFS RootfsType = "erofs" +) + +func GetKernelRootParams(rootfstype string, disableNvdimm bool, dax bool) ([]Param, error) { + var kernelRootParams []Param + + // EXT4 filesystem is used by default. + if rootfstype == "" { + rootfstype = string(EXT4) + } + + if disableNvdimm && dax { + return []Param{}, fmt.Errorf("Virtio-Blk does not support DAX") + } + + if disableNvdimm { + // Virtio-Blk + kernelRootParams = append(kernelRootParams, Param{"root", string(VirtioBlk)}) + } else { + // Nvdimm + kernelRootParams = append(kernelRootParams, Param{"root", string(Nvdimm)}) + } + + switch RootfsType(rootfstype) { + case EROFS: + if dax { + kernelRootParams = append(kernelRootParams, Param{"rootflags", "dax ro"}) + } else { + kernelRootParams = append(kernelRootParams, Param{"rootflags", "ro"}) + } + case XFS: + fallthrough + // EXT4 filesystem is used by default. + case EXT4: + if dax { + kernelRootParams = append(kernelRootParams, Param{"rootflags", "dax,data=ordered,errors=remount-ro ro"}) + } else { + kernelRootParams = append(kernelRootParams, Param{"rootflags", "data=ordered,errors=remount-ro ro"}) + } + default: + return []Param{}, fmt.Errorf("unsupported rootfs type") + } + + kernelRootParams = append(kernelRootParams, Param{"rootfstype", rootfstype}) + + return kernelRootParams, nil } // DeviceType describes a virtualized device type. @@ -273,6 +322,9 @@ type HypervisorConfig struct { // ImagePath and InitrdPath cannot be set at the same time. InitrdPath string + // RootfsType is filesystem type of rootfs. + RootfsType string + // FirmwarePath is the bios host path FirmwarePath string diff --git a/src/runtime/virtcontainers/hypervisor_test.go b/src/runtime/virtcontainers/hypervisor_test.go index e540aea48..d67a38698 100644 --- a/src/runtime/virtcontainers/hypervisor_test.go +++ b/src/runtime/virtcontainers/hypervisor_test.go @@ -7,13 +7,167 @@ package virtcontainers import ( "fmt" - "os" - "testing" - "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" "github.com/stretchr/testify/assert" + "os" + "testing" ) +func TestGetKernelRootParams(t *testing.T) { + assert := assert.New(t) + tests := []struct { + rootfstype string + expected []Param + disableNvdimm bool + dax bool + error bool + }{ + // EXT4 + { + rootfstype: string(EXT4), + expected: []Param{ + {"root", string(Nvdimm)}, + {"rootflags", "data=ordered,errors=remount-ro ro"}, + {"rootfstype", string(EXT4)}, + }, + disableNvdimm: false, + dax: false, + error: false, + }, + { + rootfstype: string(EXT4), + expected: []Param{ + {"root", string(Nvdimm)}, + {"rootflags", "dax,data=ordered,errors=remount-ro ro"}, + {"rootfstype", string(EXT4)}, + }, + disableNvdimm: false, + dax: true, + error: false, + }, + { + rootfstype: string(EXT4), + expected: []Param{ + {"root", string(VirtioBlk)}, + {"rootflags", "data=ordered,errors=remount-ro ro"}, + {"rootfstype", string(EXT4)}, + }, + disableNvdimm: true, + dax: false, + error: false, + }, + + // XFS + { + rootfstype: string(XFS), + expected: []Param{ + {"root", string(Nvdimm)}, + {"rootflags", "data=ordered,errors=remount-ro ro"}, + {"rootfstype", string(XFS)}, + }, + disableNvdimm: false, + dax: false, + error: false, + }, + { + rootfstype: string(XFS), + expected: []Param{ + {"root", string(Nvdimm)}, + {"rootflags", "dax,data=ordered,errors=remount-ro ro"}, + {"rootfstype", string(XFS)}, + }, + disableNvdimm: false, + dax: true, + error: false, + }, + { + rootfstype: string(XFS), + expected: []Param{ + {"root", string(VirtioBlk)}, + {"rootflags", "data=ordered,errors=remount-ro ro"}, + {"rootfstype", string(XFS)}, + }, + disableNvdimm: true, + dax: false, + error: false, + }, + + // EROFS + { + rootfstype: string(EROFS), + expected: []Param{ + {"root", string(Nvdimm)}, + {"rootflags", "ro"}, + {"rootfstype", string(EROFS)}, + }, + disableNvdimm: false, + dax: false, + error: false, + }, + { + rootfstype: string(EROFS), + expected: []Param{ + {"root", string(Nvdimm)}, + {"rootflags", "dax ro"}, + {"rootfstype", string(EROFS)}, + }, + disableNvdimm: false, + dax: true, + error: false, + }, + { + rootfstype: string(EROFS), + expected: []Param{ + {"root", string(VirtioBlk)}, + {"rootflags", "ro"}, + {"rootfstype", string(EROFS)}, + }, + disableNvdimm: true, + dax: false, + error: false, + }, + + // Unsupported rootfs type + { + rootfstype: "foo", + expected: []Param{ + {"root", string(VirtioBlk)}, + {"rootflags", "data=ordered,errors=remount-ro ro"}, + {"rootfstype", string(EXT4)}, + }, + disableNvdimm: false, + dax: false, + error: true, + }, + + // Nvdimm does not support DAX + { + rootfstype: string(EXT4), + expected: []Param{ + {"root", string(VirtioBlk)}, + {"rootflags", "dax,data=ordered,errors=remount-ro ro"}, + {"rootfstype", string(EXT4)}, + }, + disableNvdimm: true, + dax: true, + error: true, + }, + } + + for _, t := range tests { + kernelRootParams, err := GetKernelRootParams(t.rootfstype, t.disableNvdimm, t.dax) + if t.error { + assert.Error(err) + continue + } else { + assert.NoError(err) + } + assert.Equal(t.expected, kernelRootParams, + "Invalid parameters rootfstype: %v, disableNvdimm: %v, dax: %v, "+ + "unable to get kernel root params", t.rootfstype, t.disableNvdimm, t.dax) + } +} + func testSetHypervisorType(t *testing.T, value string, expected HypervisorType) { var hypervisorType HypervisorType assert := assert.New(t) diff --git a/src/runtime/virtcontainers/qemu_amd64.go b/src/runtime/virtcontainers/qemu_amd64.go index 124fd4534..29aa45375 100644 --- a/src/runtime/virtcontainers/qemu_amd64.go +++ b/src/runtime/virtcontainers/qemu_amd64.go @@ -148,7 +148,9 @@ func newQemuArch(config HypervisorConfig) (qemuArch, error) { q.qemuMachine.Options += "sgx-epc.0.memdev=epc0,sgx-epc.0.node=0" } - q.handleImagePath(config) + if err := q.handleImagePath(config); err != nil { + return nil, err + } return q, nil } diff --git a/src/runtime/virtcontainers/qemu_arch_base.go b/src/runtime/virtcontainers/qemu_arch_base.go index e16c285ab..5a03b659f 100644 --- a/src/runtime/virtcontainers/qemu_arch_base.go +++ b/src/runtime/virtcontainers/qemu_arch_base.go @@ -129,7 +129,7 @@ type qemuArch interface { setPFlash([]string) // handleImagePath handles the Hypervisor Config image path - handleImagePath(config HypervisorConfig) + handleImagePath(config HypervisorConfig) error // supportGuestMemoryHotplug returns if the guest supports memory hotplug supportGuestMemoryHotplug() bool @@ -702,23 +702,27 @@ func (q *qemuArchBase) appendRNGDevice(_ context.Context, devices []govmmQemu.De return devices, nil } -func (q *qemuArchBase) handleImagePath(config HypervisorConfig) { +func (q *qemuArchBase) handleImagePath(config HypervisorConfig) error { if config.ImagePath != "" { - kernelRootParams := commonVirtioblkKernelRootParams + kernelRootParams, err := GetKernelRootParams(config.RootfsType, q.disableNvdimm, false) + if err != nil { + return err + } if !q.disableNvdimm { q.qemuMachine.Options = strings.Join([]string{ q.qemuMachine.Options, qemuNvdimmOption, }, ",") - if q.dax { - kernelRootParams = commonNvdimmKernelRootParams - } else { - kernelRootParams = commonNvdimmNoDAXKernelRootParams + kernelRootParams, err = GetKernelRootParams(config.RootfsType, q.disableNvdimm, q.dax) + if err != nil { + return err } } q.kernelParams = append(q.kernelParams, kernelRootParams...) q.kernelParamsNonDebug = append(q.kernelParamsNonDebug, kernelParamsSystemdNonDebug...) q.kernelParamsDebug = append(q.kernelParamsDebug, kernelParamsSystemdDebug...) } + + return nil } func (q *qemuArchBase) supportGuestMemoryHotplug() bool { diff --git a/src/runtime/virtcontainers/qemu_arm64.go b/src/runtime/virtcontainers/qemu_arm64.go index ccf164139..9e05c5452 100644 --- a/src/runtime/virtcontainers/qemu_arm64.go +++ b/src/runtime/virtcontainers/qemu_arm64.go @@ -65,7 +65,9 @@ func newQemuArch(config HypervisorConfig) (qemuArch, error) { }, } - q.handleImagePath(config) + if err := q.handleImagePath(config); err != nil { + return nil, err + } return q, nil } diff --git a/src/runtime/virtcontainers/qemu_ppc64le.go b/src/runtime/virtcontainers/qemu_ppc64le.go index 7f6de2fcd..2d4010fbc 100644 --- a/src/runtime/virtcontainers/qemu_ppc64le.go +++ b/src/runtime/virtcontainers/qemu_ppc64le.go @@ -88,7 +88,9 @@ func newQemuArch(config HypervisorConfig) (qemuArch, error) { } } - q.handleImagePath(config) + if err := q.handleImagePath(config); err != nil { + return nil, err + } q.memoryOffset = config.MemOffset diff --git a/src/runtime/virtcontainers/qemu_s390x.go b/src/runtime/virtcontainers/qemu_s390x.go index 422db2b87..b0c1ede54 100644 --- a/src/runtime/virtcontainers/qemu_s390x.go +++ b/src/runtime/virtcontainers/qemu_s390x.go @@ -83,7 +83,11 @@ func newQemuArch(config HypervisorConfig) (qemuArch, error) { } if config.ImagePath != "" { - q.kernelParams = append(q.kernelParams, commonVirtioblkKernelRootParams...) + kernelParams, err := GetKernelRootParams(config.RootfsType, true, false) + if err != nil { + return nil, err + } + q.kernelParams = append(q.kernelParams, kernelParams...) q.kernelParamsNonDebug = append(q.kernelParamsNonDebug, kernelParamsSystemdNonDebug...) q.kernelParamsDebug = append(q.kernelParamsDebug, kernelParamsSystemdDebug...) } diff --git a/tools/osbuilder/image-builder/image_builder.sh b/tools/osbuilder/image-builder/image_builder.sh index 75b23b176..2ae656b94 100755 --- a/tools/osbuilder/image-builder/image_builder.sh +++ b/tools/osbuilder/image-builder/image_builder.sh @@ -18,6 +18,7 @@ readonly lib_file="${script_dir}/../scripts/lib.sh" readonly ext4_format="ext4" readonly xfs_format="xfs" +readonly erofs_format="erofs" # ext4: percentage of the filesystem which may only be allocated by privileged processes. readonly reserved_blocks_percentage=3 @@ -83,7 +84,7 @@ Options: -h Show this help -o Path to generate image file. ENV: IMAGE -r Free space of the root partition in MB. ENV: ROOT_FREE_SPACE - -f Filesystem type to use, only xfs and ext4 are supported. ENV: FS_TYPE + -f Filesystem type to use, only ext4, xfs and erofs are supported. ENV: FS_TYPE Extra environment variables: AGENT_BIN: Use it to change the expected agent binary name @@ -100,7 +101,6 @@ Extra environment variables: and the rootfs is built with SELINUX=yes. DEFAULT value: "no" - Following diagram shows how the resulting image will look like .-----------.----------.---------------.-----------. @@ -351,12 +351,14 @@ format_loop() { local device="$1" local block_size="$2" local fs_type="$3" + local mount_dir="$4" case "${fs_type}" in "${ext4_format}") mkfs.ext4 -q -F -b "${block_size}" "${device}p1" info "Set filesystem reserved blocks percentage to ${reserved_blocks_percentage}%" tune2fs -m "${reserved_blocks_percentage}" "${device}p1" + return 0 ;; "${xfs_format}") @@ -366,7 +368,8 @@ format_loop() { if mkfs.xfs -m reflink=0 -q -f -b size="${block_size}" "${device}p1" 2>&1 | grep -q "unknown option"; then mkfs.xfs -q -f -b size="${block_size}" "${device}p1" fi - ;; + return 0 + ;; *) error "Unsupported fs type: ${fs_type}" @@ -395,6 +398,55 @@ create_disk() { OK "Partitions created" } +setup_selinux() { + local mount_dir="$1" + local agent_bin="$2" + + if [ "${SELINUX}" == "yes" ]; then + if [ "${AGENT_INIT}" == "yes" ]; then + die "Guest SELinux with the agent init is not supported yet" + fi + + info "Labeling rootfs for SELinux" + selinuxfs_path="${mount_dir}${SELINUXFS}" + mkdir -p "$selinuxfs_path" + if mountpoint $SELINUXFS > /dev/null && \ + chroot "${mount_dir}" command -v restorecon > /dev/null; then + mount -t selinuxfs selinuxfs "$selinuxfs_path" + chroot "${mount_dir}" restorecon -RF -e ${SELINUXFS} / + # TODO: This operation will be removed after the updated container-selinux that + # includes the following commit is released. + # https://github.com/containers/container-selinux/commit/39f83cc74d50bd10ab6be4d0bdd98bc04857469f + # We use chcon as an interim solution until then. + chroot "${mount_dir}" chcon -t container_runtime_exec_t "/usr/bin/${agent_bin}" + umount "${selinuxfs_path}" + else + die "Could not label the rootfs. Make sure that SELinux is enabled on the host \ + and the rootfs is built with SELINUX=yes" + fi + fi +} + +setup_systemd() { + local mount_dir="$1" + + info "Removing unneeded systemd services and sockets" + for u in "${systemd_units[@]}"; do + find "${mount_dir}" -type f \( \ + -name "${u}.service" -o \ + -name "${u}.socket" \) \ + -exec rm -f {} \; + done + + info "Removing unneeded systemd files" + for u in "${systemd_files[@]}"; do + find "${mount_dir}" -type f -name "${u}" -exec rm -f {} \; + done + + info "Creating empty machine-id to allow systemd to bind-mount it" + touch "${mount_dir}/etc/machine-id" +} + create_rootfs_image() { local rootfs="$1" local image="$2" @@ -409,60 +461,26 @@ create_rootfs_image() { die "Could not setup loop device" fi - if ! format_loop "${device}" "${block_size}" "${fs_type}"; then + if ! format_loop "${device}" "${block_size}" "${fs_type}" ""; then die "Could not format loop device: ${device}" fi info "Mounting root partition" - readonly mount_dir=$(mktemp -p ${TMPDIR:-/tmp} -d osbuilder-mount-dir.XXXX) + local mount_dir=$(mktemp -p "${TMPDIR:-/tmp}" -d osbuilder-mount-dir.XXXX) mount "${device}p1" "${mount_dir}" OK "root partition mounted" info "Copying content from rootfs to root partition" cp -a "${rootfs}"/* "${mount_dir}" - if [ "${SELINUX}" == "yes" ]; then - if [ "${AGENT_INIT}" == "yes" ]; then - die "Guest SELinux with the agent init is not supported yet" - fi - - info "Labeling rootfs for SELinux" - selinuxfs_path="${mount_dir}${SELINUXFS}" - mkdir -p $selinuxfs_path - if mountpoint $SELINUXFS > /dev/null && \ - chroot "${mount_dir}" command -v restorecon > /dev/null; then - mount -t selinuxfs selinuxfs $selinuxfs_path - chroot "${mount_dir}" restorecon -RF -e ${SELINUXFS} / - # TODO: This operation will be removed after the updated container-selinux that - # includes the following commit is released. - # https://github.com/containers/container-selinux/commit/39f83cc74d50bd10ab6be4d0bdd98bc04857469f - # We use chcon as an interim solution until then. - chroot "${mount_dir}" chcon -t container_runtime_exec_t "/usr/bin/${agent_bin}" - umount $selinuxfs_path - else - die "Could not label the rootfs. Make sure that SELinux is enabled on the host \ -and the rootfs is built with SELINUX=yes" - fi - fi + info "Setup SELinux" + setup_selinux "${mount_dir}" "${agent_bin}" sync OK "rootfs copied" - info "Removing unneeded systemd services and sockets" - for u in "${systemd_units[@]}"; do - find "${mount_dir}" -type f \( \ - -name "${u}.service" -o \ - -name "${u}.socket" \) \ - -exec rm -f {} \; - done - - info "Removing unneeded systemd files" - for u in "${systemd_files[@]}"; do - find "${mount_dir}" -type f -name "${u}" -exec rm -f {} \; - done - - info "Creating empty machine-id to allow systemd to bind-mount it" - touch "${mount_dir}/etc/machine-id" + info "Setup systemd" + setup_systemd "${mount_dir}" info "Unmounting root partition" umount "${mount_dir}" @@ -473,7 +491,50 @@ and the rootfs is built with SELINUX=yes" fi losetup -d "${device}" - rmdir "${mount_dir}" + rm -rf "${mount_dir}" +} + +create_erofs_rootfs_image() { + local rootfs="$1" + local image="$2" + local block_size="$3" + local agent_bin="$4" + + if [ "$block_size" -ne 4096 ]; then + die "Invalid block size for erofs" + fi + + if ! device="$(setup_loop_device "${image}")"; then + die "Could not setup loop device" + fi + + local mount_dir=$(mktemp -p "${TMPDIR:-/tmp}" -d osbuilder-mount-dir.XXXX) + + info "Copying content from rootfs to root partition" + cp -a "${rootfs}"/* "${mount_dir}" + + info "Setup SELinux" + setup_selinux "${mount_dir}" "${agent_bin}" + + sync + OK "rootfs copied" + + info "Setup systemd" + setup_systemd "${mount_dir}" + + readonly fsimage="$(mktemp)" + mkfs.erofs -Enoinline_data "${fsimage}" "${mount_dir}" + local img_size="$(stat -c"%s" "${fsimage}")" + local img_size_mb="$(((("${img_size}" + 1048576) / 1048576) + 1 + "${rootfs_start}"))" + + create_disk "${image}" "${img_size_mb}" "ext4" "${rootfs_start}" + + dd if="${fsimage}" of="${device}p1" + + losetup -d "${device}" + rm -rf "${mount_dir}" + + return "${img_size_mb}" } set_dax_header() { @@ -566,14 +627,23 @@ main() { die "Invalid rootfs" fi - img_size=$(calculate_img_size "${rootfs}" "${root_free_space}" "${fs_type}" "${block_size}") + if [ "${fs_type}" == 'erofs' ]; then + # mkfs.erofs accepts an src root dir directory as an input + # rather than some device, so no need to guess the device dest size first. + create_erofs_rootfs_image "${rootfs}" "${image}" \ + "${block_size}" "${agent_bin}" + rootfs_img_size=$? + img_size=$((rootfs_img_size + dax_header_sz)) + else + img_size=$(calculate_img_size "${rootfs}" "${root_free_space}" \ + "${fs_type}" "${block_size}") - # the first 2M are for the first MBR + NVDIMM metadata and were already - # consider in calculate_img_size - rootfs_img_size=$((img_size - dax_header_sz)) - create_rootfs_image "${rootfs}" "${image}" "${rootfs_img_size}" \ + # the first 2M are for the first MBR + NVDIMM metadata and were already + # consider in calculate_img_size + rootfs_img_size=$((img_size - dax_header_sz)) + create_rootfs_image "${rootfs}" "${image}" "${rootfs_img_size}" \ "${fs_type}" "${block_size}" "${agent_bin}" - + fi # insert at the beginning of the image the MBR + DAX header set_dax_header "${image}" "${img_size}" "${fs_type}" "${nsdax_bin}" } diff --git a/tools/packaging/kernel/configs/fragments/common/fs.conf b/tools/packaging/kernel/configs/fragments/common/fs.conf index ae4e3255f..c3be9f925 100644 --- a/tools/packaging/kernel/configs/fragments/common/fs.conf +++ b/tools/packaging/kernel/configs/fragments/common/fs.conf @@ -36,6 +36,11 @@ CONFIG_AUTOFS_FS=y CONFIG_TMPFS=y CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y +CONFIG_MISC_FILESYSTEMS=y +CONFIG_EROFS_FS=y +CONFIG_EROFS_FS_XATTR=y +CONFIG_EROFS_FS_ZIP=y +CONFIG_EROFS_FS_SECURITY=y CONFIG_SIGNALFD=y CONFIG_TIMERFD=y CONFIG_EPOLL=y diff --git a/tools/packaging/kernel/kata_config_version b/tools/packaging/kernel/kata_config_version index 3ad5abd03..29d6383b5 100644 --- a/tools/packaging/kernel/kata_config_version +++ b/tools/packaging/kernel/kata_config_version @@ -1 +1 @@ -99 +100 From 67b8f0773fb8ee6b9aeae02e5feaaa9a3b9825a0 Mon Sep 17 00:00:00 2001 From: Larry Dewey Date: Thu, 25 Aug 2022 13:43:05 -0500 Subject: [PATCH 30/52] SEV: Update ReducedPhysBits Updating this field, as `cpuid` provides host level data, which is not what a guest would expect for Reduced Phsycial Bits. In almost all cases, we should be using `1` for the value here. Amend: Adding unit test change. Fixes: #5006 Signed-off-by: Larry Dewey --- src/runtime/virtcontainers/qemu_amd64.go | 2 +- src/runtime/virtcontainers/qemu_amd64_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/runtime/virtcontainers/qemu_amd64.go b/src/runtime/virtcontainers/qemu_amd64.go index 124fd4534..b635a0f29 100644 --- a/src/runtime/virtcontainers/qemu_amd64.go +++ b/src/runtime/virtcontainers/qemu_amd64.go @@ -290,7 +290,7 @@ func (q *qemuAmd64) appendProtectionDevice(devices []govmmQemu.Device, firmware, Debug: false, File: firmware, CBitPos: cpuid.AMDMemEncrypt.CBitPosition, - ReducedPhysBits: cpuid.AMDMemEncrypt.PhysAddrReduction, + ReducedPhysBits: 1, }), "", nil case snpProtection: return append(devices, diff --git a/src/runtime/virtcontainers/qemu_amd64_test.go b/src/runtime/virtcontainers/qemu_amd64_test.go index 33d506dcd..cb9a7bb38 100644 --- a/src/runtime/virtcontainers/qemu_amd64_test.go +++ b/src/runtime/virtcontainers/qemu_amd64_test.go @@ -286,7 +286,7 @@ func TestQemuAmd64AppendProtectionDevice(t *testing.T) { Debug: false, File: firmware, CBitPos: cpuid.AMDMemEncrypt.CBitPosition, - ReducedPhysBits: cpuid.AMDMemEncrypt.PhysAddrReduction, + ReducedPhysBits: 1, }, } From 2f5bc0f408e6394efdfd913297ec65777bdb1fdd Mon Sep 17 00:00:00 2001 From: Chelsea Mafrica Date: Fri, 10 Feb 2023 03:58:45 -0800 Subject: [PATCH 31/52] kata-ctl: Expand unit tests for CPU check Change unit tests for CPU check to table-driven tests and expand test cases including temp files for cpuinfo. Fixes #5919 Signed-off-by: Chelsea Mafrica --- src/tools/kata-ctl/src/check.rs | 205 +++++++++++++++++++++++++------- 1 file changed, 161 insertions(+), 44 deletions(-) diff --git a/src/tools/kata-ctl/src/check.rs b/src/tools/kata-ctl/src/check.rs index 81b9b83a7..8218a0f31 100644 --- a/src/tools/kata-ctl/src/check.rs +++ b/src/tools/kata-ctl/src/check.rs @@ -23,6 +23,9 @@ const JSON_TYPE: &str = "application/json"; const USER_AGT: &str = "kata"; +#[allow(dead_code)] +const ERR_NO_CPUINFO: &str = "cpu_info string is empty"; + #[allow(dead_code)] pub const GENERIC_CPU_VENDOR_FIELD: &str = "vendor_id"; #[allow(dead_code)] @@ -44,7 +47,7 @@ pub fn get_single_cpu_info(cpu_info_file: &str, substring: &str) -> Result = contents.split(substring).collect(); @@ -62,7 +65,11 @@ pub fn get_single_cpu_info(cpu_info_file: &str, substring: &str) -> Result Result { if cpu_info.is_empty() { - return Err(anyhow!("cpu_info string is empty")); + return Err(anyhow!(ERR_NO_CPUINFO)); + } + + if cpu_flags_tag.is_empty() { + return Err(anyhow!("cpu flags delimiter string is empty"))?; } let subcontents: Vec<&str> = cpu_info.split('\n').collect(); @@ -195,58 +202,168 @@ pub fn check_official_releases() -> Result<()> { mod tests { use super::*; use semver::Version; + use std::fs; + use std::io::Write; + use tempfile::tempdir; #[test] - fn test_get_cpu_info_empty_input() { - let expected = "No such file or directory (os error 2)"; - let actual = get_cpu_info("").err().unwrap().to_string(); - assert_eq!(expected, actual); + fn test_get_single_cpu_info() { + // Valid cpuinfo example + let dir = tempdir().unwrap(); + let file_path_full = dir.path().join("cpuinfo_full"); + let path_full = file_path_full.clone(); + let mut file_full = fs::File::create(file_path_full).unwrap(); + let contents = "processor : 0\nvendor_id : VendorExample\nflags : flag_1 flag_2 flag_3 flag_4\nprocessor : 1\n".to_string(); + writeln!(file_full, "{}", contents).unwrap(); - let actual = get_single_cpu_info("", "\nprocessor") - .err() - .unwrap() - .to_string(); - assert_eq!(expected, actual); + // Empty cpuinfo example + let file_path_empty = dir.path().join("cpuinfo_empty"); + let path_empty = file_path_empty.clone(); + let mut _file_empty = fs::File::create(file_path_empty).unwrap(); + + #[derive(Debug)] + struct TestData<'a> { + cpuinfo_path: &'a str, + processor_delimiter_str: &'a str, + result: Result, + } + let tests = &[ + // Failure scenarios + TestData { + cpuinfo_path: "", + processor_delimiter_str: "", + result: Err(anyhow!("No such file or directory (os error 2)")), + }, + TestData { + cpuinfo_path: &path_empty.as_path().display().to_string(), + processor_delimiter_str: "\nprocessor", + result: Err(anyhow!(ERR_NO_CPUINFO)), + }, + // Success scenarios + TestData { + cpuinfo_path: &path_full.as_path().display().to_string(), + processor_delimiter_str: "\nprocessor", + result: Ok( + "processor : 0\nvendor_id : VendorExample\nflags : flag_1 flag_2 flag_3 flag_4" + .to_string(), + ), + }, + ]; + + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d); + let result = get_single_cpu_info(d.cpuinfo_path, d.processor_delimiter_str); + let msg = format!("{}, result: {:?}", msg, result); + + if d.result.is_ok() { + assert_eq!( + result.as_ref().unwrap(), + d.result.as_ref().unwrap(), + "{}", + msg + ); + continue; + } + + let expected_error = format!("{}", d.result.as_ref().unwrap_err()); + let actual_error = format!("{}", result.unwrap_err()); + assert!(actual_error == expected_error, "{}", msg); + } } #[test] - fn test_get_cpu_flags_empty_input() { - let expected = "cpu_info string is empty"; - let actual = get_cpu_flags("", "").err().unwrap().to_string(); - assert_eq!(expected, actual); + fn test_get_cpu_flags() { + let contents = "processor : 0\nvendor_id : VendorExample\nflags : flag_1 flag_2 flag_3 flag_4\nprocessor : 1\n"; + + #[derive(Debug)] + struct TestData<'a> { + cpu_info_str: &'a str, + cpu_flags_tag: &'a str, + result: Result, + } + let tests = &[ + // Failure scenarios + TestData { + cpu_info_str: "", + cpu_flags_tag: "", + result: Err(anyhow!(ERR_NO_CPUINFO)), + }, + TestData { + cpu_info_str: "", + cpu_flags_tag: "flags", + result: Err(anyhow!(ERR_NO_CPUINFO)), + }, + TestData { + cpu_info_str: contents, + cpu_flags_tag: "", + result: Err(anyhow!("cpu flags delimiter string is empty")), + }, + // Success scenarios + TestData { + cpu_info_str: contents, + cpu_flags_tag: "flags", + result: Ok(" flag_1 flag_2 flag_3 flag_4".to_string()), + }, + TestData { + cpu_info_str: contents, + cpu_flags_tag: "flags_err", + result: Ok("".to_string()), + }, + ]; + + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d); + let result = get_cpu_flags(d.cpu_info_str, d.cpu_flags_tag); + let msg = format!("{}, result: {:?}", msg, result); + + if d.result.is_ok() { + assert_eq!( + result.as_ref().unwrap(), + d.result.as_ref().unwrap(), + "{}", + msg + ); + continue; + } + + let expected_error = format!("{}", d.result.as_ref().unwrap_err()); + let actual_error = format!("{}", result.unwrap_err()); + assert!(actual_error == expected_error, "{}", msg); + } } #[test] - fn check_version_by_empty_url() { - const TEST_URL: &str = "http:"; - let expected = "builder error: empty host"; - let actual = get_kata_all_releases_by_url(TEST_URL) - .err() - .unwrap() - .to_string(); - assert_eq!(expected, actual); - } + fn test_get_kata_all_releases_by_url() { + #[derive(Debug)] + struct TestData<'a> { + test_url: &'a str, + expected: &'a str, + } + let tests = &[ + // Failure scenarios + TestData { + test_url: "http:", + expected: "builder error: empty host", + }, + TestData { + test_url: "_localhost_", + expected: "builder error: relative URL without a base", + }, + TestData { + test_url: "http://localhost :80", + expected: "builder error: invalid domain character", + }, + ]; - #[test] - fn check_version_by_garbage_url() { - const TEST_URL: &str = "_localhost_"; - let expected = "builder error: relative URL without a base"; - let actual = get_kata_all_releases_by_url(TEST_URL) - .err() - .unwrap() - .to_string(); - assert_eq!(expected, actual); - } - - #[test] - fn check_version_by_invalid_url() { - const TEST_URL: &str = "http://localhost :80"; - let expected = "builder error: invalid domain character"; - let actual = get_kata_all_releases_by_url(TEST_URL) - .err() - .unwrap() - .to_string(); - assert_eq!(expected, actual); + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d); + let actual = get_kata_all_releases_by_url(d.test_url) + .err() + .unwrap() + .to_string(); + let msg = format!("{}, result: {:?}", msg, actual); + assert_eq!(d.expected, actual, "{}", msg); + } } #[test] From ca02c9f5124eb13af6c63b4be9e1ff1ec750bb48 Mon Sep 17 00:00:00 2001 From: zhaojizhuang <571130360@qq.com> Date: Tue, 17 Jan 2023 11:23:36 +0800 Subject: [PATCH 32/52] runtime: add reconnect timeout for vhost user block Fixes: #6075 Signed-off-by: zhaojizhuang <571130360@qq.com> --- docs/how-to/how-to-set-sandbox-config-kata.md | 1 + src/runtime/config/configuration-qemu.toml.in | 5 + src/runtime/pkg/device/config/config.go | 14 +++ .../pkg/device/drivers/vhost_user_blk.go | 37 +++++-- src/runtime/pkg/device/manager/manager.go | 13 ++- .../pkg/device/manager/manager_test.go | 2 +- src/runtime/pkg/govmm/qemu/qmp.go | 6 +- src/runtime/pkg/govmm/qemu/qmp_test.go | 2 +- .../pkg/katautils/config-settings.go.in | 1 + src/runtime/pkg/katautils/config.go | 98 ++++++++++--------- src/runtime/pkg/oci/utils.go | 6 ++ .../virtcontainers/container_linux_test.go | 2 +- src/runtime/virtcontainers/container_test.go | 2 +- src/runtime/virtcontainers/hypervisor.go | 5 + src/runtime/virtcontainers/kata_agent_test.go | 8 +- src/runtime/virtcontainers/persist_test.go | 2 +- .../pkg/annotations/annotations.go | 4 + src/runtime/virtcontainers/qemu.go | 2 +- src/runtime/virtcontainers/sandbox.go | 2 +- .../virtcontainers/sandbox_linux_test.go | 2 +- src/runtime/virtcontainers/sandbox_test.go | 6 +- 21 files changed, 144 insertions(+), 76 deletions(-) diff --git a/docs/how-to/how-to-set-sandbox-config-kata.md b/docs/how-to/how-to-set-sandbox-config-kata.md index 27edbf76d..b8ac511cd 100644 --- a/docs/how-to/how-to-set-sandbox-config-kata.md +++ b/docs/how-to/how-to-set-sandbox-config-kata.md @@ -57,6 +57,7 @@ There are several kinds of Kata configurations and they are listed below. | `io.katacontainers.config.hypervisor.enable_iothreads` | `boolean`| enable IO to be processed in a separate thread. Supported currently for virtio-`scsi` driver | | `io.katacontainers.config.hypervisor.enable_mem_prealloc` | `boolean` | the memory space used for `nvdimm` device by the hypervisor | | `io.katacontainers.config.hypervisor.enable_vhost_user_store` | `boolean` | enable vhost-user storage device (QEMU) | +| `io.katacontainers.config.hypervisor.vhost_user_reconnect_timeout_sec` | `string`| the timeout for reconnecting vhost user socket (QEMU) | `io.katacontainers.config.hypervisor.enable_virtio_mem` | `boolean` | enable virtio-mem (QEMU) | | `io.katacontainers.config.hypervisor.entropy_source` (R) | string| the path to a host source of entropy (`/dev/random`, `/dev/urandom` or real hardware RNG device) | | `io.katacontainers.config.hypervisor.file_mem_backend` (R) | string | file based memory backend root directory | diff --git a/src/runtime/config/configuration-qemu.toml.in b/src/runtime/config/configuration-qemu.toml.in index b2a23047e..4fb5a8ba0 100644 --- a/src/runtime/config/configuration-qemu.toml.in +++ b/src/runtime/config/configuration-qemu.toml.in @@ -301,6 +301,11 @@ vhost_user_store_path = "@DEFVHOSTUSERSTOREPATH@" # Your distribution recommends: @DEFVALIDVHOSTUSERSTOREPATHS@ valid_vhost_user_store_paths = @DEFVALIDVHOSTUSERSTOREPATHS@ +# The timeout for reconnecting on non-server spdk sockets when the remote end goes away. +# qemu will delay this many seconds and then attempt to reconnect. +# Zero disables reconnecting, and the default is zero. +vhost_user_reconnect_timeout_sec = 0 + # Enable file based guest memory support. The default is an empty string which # will disable this feature. In the case of virtio-fs, this is enabled # automatically and '/dev/shm' is used as the backing folder. diff --git a/src/runtime/pkg/device/config/config.go b/src/runtime/pkg/device/config/config.go index f1078671a..35af6afec 100644 --- a/src/runtime/pkg/device/config/config.go +++ b/src/runtime/pkg/device/config/config.go @@ -87,6 +87,8 @@ const ( // Define the string key for DriverOptions in DeviceInfo struct FsTypeOpt = "fstype" BlockDriverOpt = "block-driver" + + VhostUserReconnectTimeOutOpt = "vhost-user-reconnect-timeout" ) const ( @@ -97,6 +99,15 @@ const ( VhostUserSCSIMajor = 242 ) +const ( + + // The timeout for reconnecting on non-server sockets when the remote end + // goes away. + // qemu will delay this many seconds and then attempt to reconnect. Zero + // disables reconnecting, and is the default. + DefaultVhostUserReconnectTimeOut = 0 +) + // Defining these as a variable instead of a const, to allow // overriding this in the tests. @@ -320,6 +331,9 @@ type VhostUserDeviceAttrs struct { CacheSize uint32 QueueSize uint32 + + // Reconnect timeout for socket of vhost user block device + ReconnectTime uint32 } // GetHostPathFunc is function pointer used to mock GetHostPath in tests. diff --git a/src/runtime/pkg/device/drivers/vhost_user_blk.go b/src/runtime/pkg/device/drivers/vhost_user_blk.go index 49c66e711..fbf195c30 100644 --- a/src/runtime/pkg/device/drivers/vhost_user_blk.go +++ b/src/runtime/pkg/device/drivers/vhost_user_blk.go @@ -8,6 +8,7 @@ package drivers import ( "context" + "strconv" "github.com/kata-containers/kata-containers/src/runtime/pkg/device/api" "github.com/kata-containers/kata-containers/src/runtime/pkg/device/config" @@ -72,17 +73,19 @@ func (device *VhostUserBlkDevice) Attach(ctx context.Context, devReceiver api.De } vAttrs := &config.VhostUserDeviceAttrs{ - DevID: utils.MakeNameID("blk", device.DeviceInfo.ID, maxDevIDSize), - SocketPath: device.DeviceInfo.HostPath, - Type: config.VhostUserBlk, - Index: index, + DevID: utils.MakeNameID("blk", device.DeviceInfo.ID, maxDevIDSize), + SocketPath: device.DeviceInfo.HostPath, + Type: config.VhostUserBlk, + Index: index, + ReconnectTime: vhostUserReconnect(device.DeviceInfo.DriverOptions), } deviceLogger().WithFields(logrus.Fields{ - "device": device.DeviceInfo.HostPath, - "SocketPath": vAttrs.SocketPath, - "Type": config.VhostUserBlk, - "Index": index, + "device": device.DeviceInfo.HostPath, + "SocketPath": vAttrs.SocketPath, + "Type": config.VhostUserBlk, + "Index": index, + "ReconnectTime": vAttrs.ReconnectTime, }).Info("Attaching device") device.VhostUserDeviceAttrs = vAttrs @@ -93,6 +96,24 @@ func (device *VhostUserBlkDevice) Attach(ctx context.Context, devReceiver api.De return nil } +func vhostUserReconnect(customOptions map[string]string) uint32 { + var vhostUserReconnectTimeout uint32 + + if customOptions == nil { + vhostUserReconnectTimeout = config.DefaultVhostUserReconnectTimeOut + } else { + reconnectTimeoutStr := customOptions[config.VhostUserReconnectTimeOutOpt] + if reconnectTimeout, err := strconv.Atoi(reconnectTimeoutStr); err != nil { + vhostUserReconnectTimeout = config.DefaultVhostUserReconnectTimeOut + deviceLogger().WithField("reconnect", reconnectTimeoutStr).WithError(err).Warn("Failed to get reconnect timeout for vhost-user-blk device") + } else { + vhostUserReconnectTimeout = uint32(reconnectTimeout) + } + } + + return vhostUserReconnectTimeout +} + func isVirtioBlkBlockDriver(customOptions map[string]string) bool { var blockDriverOption string diff --git a/src/runtime/pkg/device/manager/manager.go b/src/runtime/pkg/device/manager/manager.go index eed9e39f1..34a51d300 100644 --- a/src/runtime/pkg/device/manager/manager.go +++ b/src/runtime/pkg/device/manager/manager.go @@ -10,6 +10,7 @@ import ( "context" "encoding/hex" "errors" + "fmt" "sync" "github.com/sirupsen/logrus" @@ -42,6 +43,8 @@ type deviceManager struct { sync.RWMutex vhostUserStoreEnabled bool + + vhostUserReconnectTimeout uint32 } func deviceLogger() *logrus.Entry { @@ -49,11 +52,12 @@ func deviceLogger() *logrus.Entry { } // NewDeviceManager creates a deviceManager object behaved as api.DeviceManager -func NewDeviceManager(blockDriver string, vhostUserStoreEnabled bool, vhostUserStorePath string, devices []api.Device) api.DeviceManager { +func NewDeviceManager(blockDriver string, vhostUserStoreEnabled bool, vhostUserStorePath string, vhostUserReconnect uint32, devices []api.Device) api.DeviceManager { dm := &deviceManager{ - vhostUserStoreEnabled: vhostUserStoreEnabled, - vhostUserStorePath: vhostUserStorePath, - devices: make(map[string]api.Device), + vhostUserStoreEnabled: vhostUserStoreEnabled, + vhostUserStorePath: vhostUserStorePath, + vhostUserReconnectTimeout: vhostUserReconnect, + devices: make(map[string]api.Device), } if blockDriver == config.VirtioMmio { dm.blockDriver = config.VirtioMmio @@ -119,6 +123,7 @@ func (dm *deviceManager) createDevice(devInfo config.DeviceInfo) (dev api.Device devInfo.DriverOptions = make(map[string]string) } devInfo.DriverOptions[config.BlockDriverOpt] = dm.blockDriver + devInfo.DriverOptions[config.VhostUserReconnectTimeOutOpt] = fmt.Sprintf("%d", dm.vhostUserReconnectTimeout) return drivers.NewVhostUserBlkDevice(&devInfo), nil } else if isBlock(devInfo) { if devInfo.DriverOptions == nil { diff --git a/src/runtime/pkg/device/manager/manager_test.go b/src/runtime/pkg/device/manager/manager_test.go index 1070e6b8e..49e339f60 100644 --- a/src/runtime/pkg/device/manager/manager_test.go +++ b/src/runtime/pkg/device/manager/manager_test.go @@ -208,7 +208,7 @@ func TestAttachBlockDevice(t *testing.T) { } func TestAttachDetachDevice(t *testing.T) { - dm := NewDeviceManager(config.VirtioSCSI, false, "", nil) + dm := NewDeviceManager(config.VirtioSCSI, false, "", 0, nil) path := "/dev/hda" deviceInfo := config.DeviceInfo{ diff --git a/src/runtime/pkg/govmm/qemu/qmp.go b/src/runtime/pkg/govmm/qemu/qmp.go index 2103b7f79..6b020103d 100644 --- a/src/runtime/pkg/govmm/qemu/qmp.go +++ b/src/runtime/pkg/govmm/qemu/qmp.go @@ -1524,7 +1524,7 @@ func (q *QMP) ExecuteGetFD(ctx context.Context, fdname string, fd *os.File) erro // ExecuteCharDevUnixSocketAdd adds a character device using as backend a unix socket, // id is an identifier for the device, path specifies the local path of the unix socket, // wait is to block waiting for a client to connect, server specifies that the socket is a listening socket. -func (q *QMP) ExecuteCharDevUnixSocketAdd(ctx context.Context, id, path string, wait, server bool) error { +func (q *QMP) ExecuteCharDevUnixSocketAdd(ctx context.Context, id, path string, wait, server bool, reconnect uint32) error { data := map[string]interface{}{ "server": server, "addr": map[string]interface{}{ @@ -1540,6 +1540,10 @@ func (q *QMP) ExecuteCharDevUnixSocketAdd(ctx context.Context, id, path string, data["wait"] = wait } + if reconnect > 0 { + data["reconnect"] = reconnect + } + args := map[string]interface{}{ "id": id, "backend": map[string]interface{}{ diff --git a/src/runtime/pkg/govmm/qemu/qmp_test.go b/src/runtime/pkg/govmm/qemu/qmp_test.go index c6bee11e7..17492f6fd 100644 --- a/src/runtime/pkg/govmm/qemu/qmp_test.go +++ b/src/runtime/pkg/govmm/qemu/qmp_test.go @@ -1445,7 +1445,7 @@ func TestExecuteCharDevUnixSocketAdd(t *testing.T) { cfg := QMPConfig{Logger: qmpTestLogger{}} q := startQMPLoop(buf, cfg, connectedCh, disconnectedCh) checkVersion(t, connectedCh) - err := q.ExecuteCharDevUnixSocketAdd(context.Background(), "foo", "foo.sock", false, true) + err := q.ExecuteCharDevUnixSocketAdd(context.Background(), "foo", "foo.sock", false, true, 1) if err != nil { t.Fatalf("Unexpected error %v", err) } diff --git a/src/runtime/pkg/katautils/config-settings.go.in b/src/runtime/pkg/katautils/config-settings.go.in index f56111771..7bfab6d9f 100644 --- a/src/runtime/pkg/katautils/config-settings.go.in +++ b/src/runtime/pkg/katautils/config-settings.go.in @@ -84,6 +84,7 @@ const defaultGuestHookPath string = "" const defaultVirtioFSCacheMode = "never" const defaultDisableImageNvdimm = false const defaultVhostUserStorePath string = "/var/run/kata-containers/vhost-user/" +const defaultVhostUserDeviceReconnect = 0 const defaultRxRateLimiterMaxRate = uint64(0) const defaultTxRateLimiterMaxRate = uint64(0) const defaultConfidentialGuest = false diff --git a/src/runtime/pkg/katautils/config.go b/src/runtime/pkg/katautils/config.go index bb92ca718..997d07368 100644 --- a/src/runtime/pkg/katautils/config.go +++ b/src/runtime/pkg/katautils/config.go @@ -136,6 +136,7 @@ type hypervisor struct { BlockDeviceCacheDirect bool `toml:"block_device_cache_direct"` BlockDeviceCacheNoflush bool `toml:"block_device_cache_noflush"` EnableVhostUserStore bool `toml:"enable_vhost_user_store"` + VhostUserDeviceReconnect uint32 `toml:"vhost_user_reconnect_timeout_sec"` DisableBlockDeviceUse bool `toml:"disable_block_device_use"` MemPrealloc bool `toml:"enable_mem_prealloc"` HugePages bool `toml:"enable_hugepages"` @@ -1233,54 +1234,55 @@ func updateRuntimeConfig(configPath string, tomlConf tomlConfig, config *oci.Run func GetDefaultHypervisorConfig() vc.HypervisorConfig { return vc.HypervisorConfig{ - HypervisorPath: defaultHypervisorPath, - JailerPath: defaultJailerPath, - KernelPath: defaultKernelPath, - ImagePath: defaultImagePath, - InitrdPath: defaultInitrdPath, - RootfsType: defaultRootfsType, - FirmwarePath: defaultFirmwarePath, - FirmwareVolumePath: defaultFirmwareVolumePath, - MachineAccelerators: defaultMachineAccelerators, - CPUFeatures: defaultCPUFeatures, - HypervisorMachineType: defaultMachineType, - NumVCPUs: defaultVCPUCount, - DefaultMaxVCPUs: defaultMaxVCPUCount, - MemorySize: defaultMemSize, - MemOffset: defaultMemOffset, - VirtioMem: defaultVirtioMem, - DisableBlockDeviceUse: defaultDisableBlockDeviceUse, - DefaultBridges: defaultBridgesCount, - MemPrealloc: defaultEnableMemPrealloc, - HugePages: defaultEnableHugePages, - IOMMU: defaultEnableIOMMU, - IOMMUPlatform: defaultEnableIOMMUPlatform, - FileBackedMemRootDir: defaultFileBackedMemRootDir, - Debug: defaultEnableDebug, - DisableNestingChecks: defaultDisableNestingChecks, - BlockDeviceDriver: defaultBlockDeviceDriver, - BlockDeviceAIO: defaultBlockDeviceAIO, - BlockDeviceCacheSet: defaultBlockDeviceCacheSet, - BlockDeviceCacheDirect: defaultBlockDeviceCacheDirect, - BlockDeviceCacheNoflush: defaultBlockDeviceCacheNoflush, - EnableIOThreads: defaultEnableIOThreads, - Msize9p: defaultMsize9p, - HotplugVFIOOnRootBus: defaultHotplugVFIOOnRootBus, - PCIeRootPort: defaultPCIeRootPort, - GuestHookPath: defaultGuestHookPath, - VhostUserStorePath: defaultVhostUserStorePath, - VirtioFSCache: defaultVirtioFSCacheMode, - DisableImageNvdimm: defaultDisableImageNvdimm, - RxRateLimiterMaxRate: defaultRxRateLimiterMaxRate, - TxRateLimiterMaxRate: defaultTxRateLimiterMaxRate, - SGXEPCSize: defaultSGXEPCSize, - ConfidentialGuest: defaultConfidentialGuest, - SevSnpGuest: defaultSevSnpGuest, - GuestSwap: defaultGuestSwap, - Rootless: defaultRootlessHypervisor, - DisableSeccomp: defaultDisableSeccomp, - DisableGuestSeLinux: defaultDisableGuestSeLinux, - LegacySerial: defaultLegacySerial, + HypervisorPath: defaultHypervisorPath, + JailerPath: defaultJailerPath, + KernelPath: defaultKernelPath, + ImagePath: defaultImagePath, + InitrdPath: defaultInitrdPath, + RootfsType: defaultRootfsType, + FirmwarePath: defaultFirmwarePath, + FirmwareVolumePath: defaultFirmwareVolumePath, + MachineAccelerators: defaultMachineAccelerators, + CPUFeatures: defaultCPUFeatures, + HypervisorMachineType: defaultMachineType, + NumVCPUs: defaultVCPUCount, + DefaultMaxVCPUs: defaultMaxVCPUCount, + MemorySize: defaultMemSize, + MemOffset: defaultMemOffset, + VirtioMem: defaultVirtioMem, + DisableBlockDeviceUse: defaultDisableBlockDeviceUse, + DefaultBridges: defaultBridgesCount, + MemPrealloc: defaultEnableMemPrealloc, + HugePages: defaultEnableHugePages, + IOMMU: defaultEnableIOMMU, + IOMMUPlatform: defaultEnableIOMMUPlatform, + FileBackedMemRootDir: defaultFileBackedMemRootDir, + Debug: defaultEnableDebug, + DisableNestingChecks: defaultDisableNestingChecks, + BlockDeviceDriver: defaultBlockDeviceDriver, + BlockDeviceAIO: defaultBlockDeviceAIO, + BlockDeviceCacheSet: defaultBlockDeviceCacheSet, + BlockDeviceCacheDirect: defaultBlockDeviceCacheDirect, + BlockDeviceCacheNoflush: defaultBlockDeviceCacheNoflush, + EnableIOThreads: defaultEnableIOThreads, + Msize9p: defaultMsize9p, + HotplugVFIOOnRootBus: defaultHotplugVFIOOnRootBus, + PCIeRootPort: defaultPCIeRootPort, + GuestHookPath: defaultGuestHookPath, + VhostUserStorePath: defaultVhostUserStorePath, + VhostUserDeviceReconnect: defaultVhostUserDeviceReconnect, + VirtioFSCache: defaultVirtioFSCacheMode, + DisableImageNvdimm: defaultDisableImageNvdimm, + RxRateLimiterMaxRate: defaultRxRateLimiterMaxRate, + TxRateLimiterMaxRate: defaultTxRateLimiterMaxRate, + SGXEPCSize: defaultSGXEPCSize, + ConfidentialGuest: defaultConfidentialGuest, + SevSnpGuest: defaultSevSnpGuest, + GuestSwap: defaultGuestSwap, + Rootless: defaultRootlessHypervisor, + DisableSeccomp: defaultDisableSeccomp, + DisableGuestSeLinux: defaultDisableGuestSeLinux, + LegacySerial: defaultLegacySerial, } } diff --git a/src/runtime/pkg/oci/utils.go b/src/runtime/pkg/oci/utils.go index 0e6e0d733..d2d713ff8 100644 --- a/src/runtime/pkg/oci/utils.go +++ b/src/runtime/pkg/oci/utils.go @@ -478,6 +478,12 @@ func addHypervisorConfigOverrides(ocispec specs.Spec, config *vc.SandboxConfig, return err } + if err := newAnnotationConfiguration(ocispec, vcAnnotations.VhostUserDeviceReconnect).setUint(func(reconnect uint64) { + config.HypervisorConfig.VhostUserDeviceReconnect = uint32(reconnect) + }); err != nil { + return err + } + if value, ok := ocispec.Annotations[vcAnnotations.GuestHookPath]; ok { if value != "" { config.HypervisorConfig.GuestHookPath = value diff --git a/src/runtime/virtcontainers/container_linux_test.go b/src/runtime/virtcontainers/container_linux_test.go index 4e3564814..581461e61 100644 --- a/src/runtime/virtcontainers/container_linux_test.go +++ b/src/runtime/virtcontainers/container_linux_test.go @@ -216,7 +216,7 @@ func TestContainerAddDriveDir(t *testing.T) { sandbox := &Sandbox{ ctx: context.Background(), id: testSandboxID, - devManager: manager.NewDeviceManager(config.VirtioSCSI, false, "", nil), + devManager: manager.NewDeviceManager(config.VirtioSCSI, false, "", 0, nil), hypervisor: &mockHypervisor{}, agent: &mockAgent{}, config: &SandboxConfig{ diff --git a/src/runtime/virtcontainers/container_test.go b/src/runtime/virtcontainers/container_test.go index 0115fb011..ac2ce540c 100644 --- a/src/runtime/virtcontainers/container_test.go +++ b/src/runtime/virtcontainers/container_test.go @@ -82,7 +82,7 @@ func TestContainerRemoveDrive(t *testing.T) { sandbox := &Sandbox{ ctx: context.Background(), id: "sandbox", - devManager: manager.NewDeviceManager(config.VirtioSCSI, false, "", nil), + devManager: manager.NewDeviceManager(config.VirtioSCSI, false, "", 0, nil), config: &SandboxConfig{}, } diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index b36a34dde..dee5fec8f 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -302,6 +302,7 @@ type Param struct { } // HypervisorConfig is the hypervisor configuration. +// nolint: govet type HypervisorConfig struct { // customAssets is a map of assets. // Each value in that map takes precedence over the configured assets. @@ -387,6 +388,10 @@ type HypervisorConfig struct { // related folders, sockets and device nodes should be. VhostUserStorePath string + // VhostUserDeviceReconnect is the timeout for reconnecting on non-server spdk sockets + // when the remote end goes away. Zero disables reconnecting. + VhostUserDeviceReconnect uint32 + // GuestCoredumpPath is the path in host for saving guest memory dump GuestMemoryDumpPath string diff --git a/src/runtime/virtcontainers/kata_agent_test.go b/src/runtime/virtcontainers/kata_agent_test.go index 3b95b0779..b2564e5a8 100644 --- a/src/runtime/virtcontainers/kata_agent_test.go +++ b/src/runtime/virtcontainers/kata_agent_test.go @@ -408,7 +408,7 @@ func TestHandleBlockVolume(t *testing.T) { mounts = append(mounts, vMount, bMount, dMount) tmpDir := "/vhost/user/dir" - dm := manager.NewDeviceManager(config.VirtioBlock, true, tmpDir, devices) + dm := manager.NewDeviceManager(config.VirtioBlock, true, tmpDir, 0, devices) sConfig := SandboxConfig{} sConfig.HypervisorConfig.BlockDeviceDriver = config.VirtioBlock @@ -466,7 +466,7 @@ func TestAppendDevicesEmptyContainerDeviceList(t *testing.T) { c := &Container{ sandbox: &Sandbox{ - devManager: manager.NewDeviceManager("virtio-scsi", false, "", nil), + devManager: manager.NewDeviceManager("virtio-scsi", false, "", 0, nil), }, devices: ctrDevices, } @@ -499,7 +499,7 @@ func TestAppendDevices(t *testing.T) { c := &Container{ sandbox: &Sandbox{ - devManager: manager.NewDeviceManager("virtio-blk", false, "", ctrDevices), + devManager: manager.NewDeviceManager("virtio-blk", false, "", 0, ctrDevices), config: sandboxConfig, }, } @@ -547,7 +547,7 @@ func TestAppendVhostUserBlkDevices(t *testing.T) { testVhostUserStorePath := "/test/vhost/user/store/path" c := &Container{ sandbox: &Sandbox{ - devManager: manager.NewDeviceManager("virtio-blk", true, testVhostUserStorePath, ctrDevices), + devManager: manager.NewDeviceManager("virtio-blk", true, testVhostUserStorePath, 0, ctrDevices), config: sandboxConfig, }, } diff --git a/src/runtime/virtcontainers/persist_test.go b/src/runtime/virtcontainers/persist_test.go index 96b947e5b..2e851d69d 100644 --- a/src/runtime/virtcontainers/persist_test.go +++ b/src/runtime/virtcontainers/persist_test.go @@ -32,7 +32,7 @@ func TestSandboxRestore(t *testing.T) { sandbox := Sandbox{ id: "test-exp", containers: container, - devManager: manager.NewDeviceManager(config.VirtioSCSI, false, "", nil), + devManager: manager.NewDeviceManager(config.VirtioSCSI, false, "", 0, nil), hypervisor: &mockHypervisor{}, network: network, ctx: context.Background(), diff --git a/src/runtime/virtcontainers/pkg/annotations/annotations.go b/src/runtime/virtcontainers/pkg/annotations/annotations.go index 4441b5fd0..3584ccd70 100644 --- a/src/runtime/virtcontainers/pkg/annotations/annotations.go +++ b/src/runtime/virtcontainers/pkg/annotations/annotations.go @@ -112,6 +112,10 @@ const ( // related folders, sockets and device nodes should be. VhostUserStorePath = kataAnnotHypervisorPrefix + "vhost_user_store_path" + // VhostUserDeviceReconnect is a sandbox annotation to specify the timeout for reconnecting on + // non-server sockets when the remote end goes away. + VhostUserDeviceReconnect = kataAnnotHypervisorPrefix + "vhost_user_reconnect_timeout_sec" + // GuestHookPath is a sandbox annotation to specify the path within the VM that will be used for 'drop-in' hooks. GuestHookPath = kataAnnotHypervisorPrefix + "guest_hook_path" diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 3e1fd4bfd..0d4fa3e85 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -1513,7 +1513,7 @@ func (q *qemu) hotplugAddBlockDevice(ctx context.Context, drive *config.BlockDri } func (q *qemu) hotplugAddVhostUserBlkDevice(ctx context.Context, vAttr *config.VhostUserDeviceAttrs, op Operation, devID string) (err error) { - err = q.qmpMonitorCh.qmp.ExecuteCharDevUnixSocketAdd(q.qmpMonitorCh.ctx, vAttr.DevID, vAttr.SocketPath, false, false) + err = q.qmpMonitorCh.qmp.ExecuteCharDevUnixSocketAdd(q.qmpMonitorCh.ctx, vAttr.DevID, vAttr.SocketPath, false, false, vAttr.ReconnectTime) if err != nil { return err } diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index 19605caf0..523c07259 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -600,7 +600,7 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor s.devManager = deviceManager.NewDeviceManager(sandboxConfig.HypervisorConfig.BlockDeviceDriver, sandboxConfig.HypervisorConfig.EnableVhostUserStore, - sandboxConfig.HypervisorConfig.VhostUserStorePath, nil) + sandboxConfig.HypervisorConfig.VhostUserStorePath, sandboxConfig.HypervisorConfig.VhostUserDeviceReconnect, nil) // Create the sandbox resource controllers. if err := s.createResourceController(); err != nil { diff --git a/src/runtime/virtcontainers/sandbox_linux_test.go b/src/runtime/virtcontainers/sandbox_linux_test.go index f08ac6d4e..1d29e1505 100644 --- a/src/runtime/virtcontainers/sandbox_linux_test.go +++ b/src/runtime/virtcontainers/sandbox_linux_test.go @@ -30,7 +30,7 @@ func TestSandboxAttachDevicesVhostUserBlk(t *testing.T) { tmpDir := t.TempDir() os.RemoveAll(tmpDir) - dm := manager.NewDeviceManager(config.VirtioSCSI, true, tmpDir, nil) + dm := manager.NewDeviceManager(config.VirtioSCSI, true, tmpDir, 0, nil) vhostUserDevNodePath := filepath.Join(tmpDir, "/block/devices/") vhostUserSockPath := filepath.Join(tmpDir, "/block/sockets/") diff --git a/src/runtime/virtcontainers/sandbox_test.go b/src/runtime/virtcontainers/sandbox_test.go index 37c489427..8b86fa995 100644 --- a/src/runtime/virtcontainers/sandbox_test.go +++ b/src/runtime/virtcontainers/sandbox_test.go @@ -541,7 +541,7 @@ func TestSandboxAttachDevicesVFIO(t *testing.T) { config.SysIOMMUPath = savedIOMMUPath }() - dm := manager.NewDeviceManager(config.VirtioSCSI, false, "", nil) + dm := manager.NewDeviceManager(config.VirtioSCSI, false, "", 0, nil) path := filepath.Join(vfioPath, testFDIOGroup) deviceInfo := config.DeviceInfo{ HostPath: path, @@ -1080,7 +1080,7 @@ func TestAttachBlockDevice(t *testing.T) { DevType: "b", } - dm := manager.NewDeviceManager(config.VirtioBlock, false, "", nil) + dm := manager.NewDeviceManager(config.VirtioBlock, false, "", 0, nil) device, err := dm.NewDevice(deviceInfo) assert.Nil(t, err) _, ok := device.(*drivers.BlockDevice) @@ -1136,7 +1136,7 @@ func TestPreAddDevice(t *testing.T) { HypervisorConfig: hConfig, } - dm := manager.NewDeviceManager(config.VirtioBlock, false, "", nil) + dm := manager.NewDeviceManager(config.VirtioBlock, false, "", 0, nil) // create a sandbox first sandbox := &Sandbox{ id: testSandboxID, From 545151829d517f77962cfa6577274587b120b580 Mon Sep 17 00:00:00 2001 From: "James O. D. Hunt" Date: Wed, 1 Feb 2023 11:13:00 +0000 Subject: [PATCH 33/52] kata-types: Add Cloud Hypervisor (CH) definitions Implement `ConfigPlugin` trait for Cloud Hypervisor (CH). Signed-off-by: James O. D. Hunt --- src/libs/kata-types/src/config/default.rs | 14 ++ .../kata-types/src/config/hypervisor/ch.rs | 146 ++++++++++++++++++ .../kata-types/src/config/hypervisor/mod.rs | 3 + src/libs/kata-types/src/config/mod.rs | 4 +- 4 files changed, 165 insertions(+), 2 deletions(-) create mode 100644 src/libs/kata-types/src/config/hypervisor/ch.rs diff --git a/src/libs/kata-types/src/config/default.rs b/src/libs/kata-types/src/config/default.rs index bf6b36a40..aa32b59fc 100644 --- a/src/libs/kata-types/src/config/default.rs +++ b/src/libs/kata-types/src/config/default.rs @@ -67,3 +67,17 @@ pub const DEFAULT_QEMU_PCI_BRIDGES: u32 = 2; pub const MAX_QEMU_PCI_BRIDGES: u32 = 5; pub const MAX_QEMU_VCPUS: u32 = 256; pub const MIN_QEMU_MEMORY_SIZE_MB: u32 = 64; + +// Default configuration for Cloud Hypervisor (CH) +pub const DEFAULT_CH_BINARY_PATH: &str = "/usr/bin/cloud-hypervisor"; +pub const DEFAULT_CH_CONTROL_PATH: &str = ""; +pub const DEFAULT_CH_ENTROPY_SOURCE: &str = "/dev/urandom"; +pub const DEFAULT_CH_GUEST_KERNEL_IMAGE: &str = "vmlinuz"; +pub const DEFAULT_CH_GUEST_KERNEL_PARAMS: &str = ""; +pub const DEFAULT_CH_FIRMWARE_PATH: &str = ""; +pub const DEFAULT_CH_MEMORY_SIZE_MB: u32 = 128; +pub const DEFAULT_CH_MEMORY_SLOTS: u32 = 128; +pub const DEFAULT_CH_PCI_BRIDGES: u32 = 2; +pub const MAX_CH_PCI_BRIDGES: u32 = 5; +pub const MAX_CH_VCPUS: u32 = 256; +pub const MIN_CH_MEMORY_SIZE_MB: u32 = 64; diff --git a/src/libs/kata-types/src/config/hypervisor/ch.rs b/src/libs/kata-types/src/config/hypervisor/ch.rs new file mode 100644 index 000000000..cc752e385 --- /dev/null +++ b/src/libs/kata-types/src/config/hypervisor/ch.rs @@ -0,0 +1,146 @@ +// Copyright (c) 2019-2021 Alibaba Cloud +// Copyright (c) 2022-2023 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::io::Result; +use std::path::Path; +use std::sync::Arc; + +use super::{default, register_hypervisor_plugin}; + +use crate::config::default::MAX_CH_VCPUS; +use crate::config::default::MIN_CH_MEMORY_SIZE_MB; + +use crate::config::hypervisor::VIRTIO_BLK_MMIO; +use crate::config::{ConfigPlugin, TomlConfig}; +use crate::{eother, resolve_path, validate_path}; + +/// Hypervisor name for CH, used to index `TomlConfig::hypervisor`. +pub const HYPERVISOR_NAME_CH: &str = "cloud-hypervisor"; + +/// Configuration information for CH. +#[derive(Default, Debug)] +pub struct CloudHypervisorConfig {} + +impl CloudHypervisorConfig { + /// Create a new instance of `CloudHypervisorConfig`. + pub fn new() -> Self { + CloudHypervisorConfig {} + } + + /// Register the CH plugin. + pub fn register(self) { + let plugin = Arc::new(self); + register_hypervisor_plugin(HYPERVISOR_NAME_CH, plugin); + } +} + +impl ConfigPlugin for CloudHypervisorConfig { + fn get_max_cpus(&self) -> u32 { + MAX_CH_VCPUS + } + + fn get_min_memory(&self) -> u32 { + MIN_CH_MEMORY_SIZE_MB + } + + fn name(&self) -> &str { + HYPERVISOR_NAME_CH + } + + /// Adjust the configuration information after loading from configuration file. + fn adjust_config(&self, conf: &mut TomlConfig) -> Result<()> { + if let Some(ch) = conf.hypervisor.get_mut(HYPERVISOR_NAME_CH) { + if ch.path.is_empty() { + ch.path = default::DEFAULT_CH_BINARY_PATH.to_string(); + } + resolve_path!(ch.path, "CH binary path `{}` is invalid: {}")?; + if ch.ctlpath.is_empty() { + ch.ctlpath = default::DEFAULT_CH_CONTROL_PATH.to_string(); + } + resolve_path!(ch.ctlpath, "CH ctlpath `{}` is invalid: {}")?; + + if ch.boot_info.kernel.is_empty() { + ch.boot_info.kernel = default::DEFAULT_CH_GUEST_KERNEL_IMAGE.to_string(); + } + if ch.boot_info.kernel_params.is_empty() { + ch.boot_info.kernel_params = default::DEFAULT_CH_GUEST_KERNEL_PARAMS.to_string(); + } + if ch.boot_info.firmware.is_empty() { + ch.boot_info.firmware = default::DEFAULT_CH_FIRMWARE_PATH.to_string(); + } + + if ch.device_info.default_bridges == 0 { + ch.device_info.default_bridges = default::DEFAULT_CH_PCI_BRIDGES; + } + + if ch.machine_info.entropy_source.is_empty() { + ch.machine_info.entropy_source = default::DEFAULT_CH_ENTROPY_SOURCE.to_string(); + } + + if ch.memory_info.default_memory == 0 { + ch.memory_info.default_memory = default::DEFAULT_CH_MEMORY_SIZE_MB; + } + if ch.memory_info.memory_slots == 0 { + ch.memory_info.memory_slots = default::DEFAULT_CH_MEMORY_SLOTS; + } + } + + Ok(()) + } + + /// Validate the configuration information. + fn validate(&self, conf: &TomlConfig) -> Result<()> { + if let Some(ch) = conf.hypervisor.get(HYPERVISOR_NAME_CH) { + validate_path!(ch.path, "CH binary path `{}` is invalid: {}")?; + validate_path!(ch.ctlpath, "CH control path `{}` is invalid: {}")?; + if !ch.jailer_path.is_empty() { + return Err(eother!("Path for CH jailer should be empty")); + } + if !ch.valid_jailer_paths.is_empty() { + return Err(eother!("Valid CH jailer path list should be empty")); + } + + if !ch.blockdev_info.disable_block_device_use + && ch.blockdev_info.block_device_driver == VIRTIO_BLK_MMIO + { + return Err(eother!("CH doesn't support virtio-blk-mmio")); + } + + if ch.boot_info.kernel.is_empty() { + return Err(eother!("Guest kernel image for CH is empty")); + } + if ch.boot_info.image.is_empty() && ch.boot_info.initrd.is_empty() { + return Err(eother!("Both guest boot image and initrd for CH are empty")); + } + + if (ch.cpu_info.default_vcpus > 0 + && ch.cpu_info.default_vcpus as u32 > default::MAX_CH_VCPUS) + || ch.cpu_info.default_maxvcpus > default::MAX_CH_VCPUS + { + return Err(eother!( + "CH hypervisor cannot support {} vCPUs", + ch.cpu_info.default_maxvcpus + )); + } + + if ch.device_info.default_bridges > default::MAX_CH_PCI_BRIDGES { + return Err(eother!( + "CH hypervisor cannot support {} PCI bridges", + ch.device_info.default_bridges + )); + } + + if ch.memory_info.default_memory < MIN_CH_MEMORY_SIZE_MB { + return Err(eother!( + "CH hypervisor has minimal memory limitation {}", + MIN_CH_MEMORY_SIZE_MB + )); + } + } + + Ok(()) + } +} diff --git a/src/libs/kata-types/src/config/hypervisor/mod.rs b/src/libs/kata-types/src/config/hypervisor/mod.rs index 2eaa3443b..98ae2cc79 100644 --- a/src/libs/kata-types/src/config/hypervisor/mod.rs +++ b/src/libs/kata-types/src/config/hypervisor/mod.rs @@ -40,6 +40,9 @@ pub use self::dragonball::{DragonballConfig, HYPERVISOR_NAME_DRAGONBALL}; mod qemu; pub use self::qemu::{QemuConfig, HYPERVISOR_NAME_QEMU}; +mod ch; +pub use self::ch::{CloudHypervisorConfig, HYPERVISOR_NAME_CH}; + const VIRTIO_BLK: &str = "virtio-blk"; const VIRTIO_BLK_MMIO: &str = "virtio-mmio"; const VIRTIO_BLK_CCW: &str = "virtio-blk-ccw"; diff --git a/src/libs/kata-types/src/config/mod.rs b/src/libs/kata-types/src/config/mod.rs index 863dd7590..173026921 100644 --- a/src/libs/kata-types/src/config/mod.rs +++ b/src/libs/kata-types/src/config/mod.rs @@ -25,8 +25,8 @@ pub mod hypervisor; pub use self::agent::Agent; use self::default::DEFAULT_AGENT_DBG_CONSOLE_PORT; pub use self::hypervisor::{ - BootInfo, DragonballConfig, Hypervisor, QemuConfig, HYPERVISOR_NAME_DRAGONBALL, - HYPERVISOR_NAME_QEMU, + BootInfo, CloudHypervisorConfig, DragonballConfig, Hypervisor, QemuConfig, + HYPERVISOR_NAME_DRAGONBALL, HYPERVISOR_NAME_QEMU, }; mod runtime; From 4139d68d516c2272c76cc042d4b885a22621e526 Mon Sep 17 00:00:00 2001 From: Hyounggyu Choi Date: Mon, 13 Feb 2023 20:56:00 +0100 Subject: [PATCH 34/52] runtime-rs: Include target install in conditional branch A Makefile target `install` should be included in the conditional branch as default and test. Signed-off-by: Hyounggyu Choi --- src/runtime-rs/Makefile | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/runtime-rs/Makefile b/src/runtime-rs/Makefile index ce0ba74cc..b50ed3991 100644 --- a/src/runtime-rs/Makefile +++ b/src/runtime-rs/Makefile @@ -28,6 +28,9 @@ default: test: @echo "s390x is not currently supported" exit 0 +install: + @echo "s390x is not currently supported" + exit 0 else ifeq ($(ARCH), powerpc64le) default: @echo "PowerPC 64 LE is not currently supported" @@ -35,12 +38,16 @@ default: test: @echo "PowerPC 64 LE is not currently supported" exit 0 +install: + @echo "PowerPC 64 LE is not currently supported" + exit 0 else ##TARGET default: build code default: runtime show-header ##TARGET test: run cargo tests test: @cargo test --all --target $(TRIPLE) $(EXTRA_RUSTFEATURES) -- --nocapture +install: install-runtime install-configs endif ifeq (,$(realpath $(ARCH_FILE))) @@ -491,8 +498,6 @@ codecov: check_tarpaulin codecov-html: check_tarpaulin cargo tarpaulin $(TARPAULIN_ARGS) -o Html -install: install-runtime install-configs - install-runtime: runtime install -D $(TARGET_PATH) $(DESTDIR)$(BINDIR)/$(notdir $(TARGET_PATH)) From c4539199118f581d160c8ab680db06b519b4614e Mon Sep 17 00:00:00 2001 From: Chelsea Mafrica Date: Mon, 13 Feb 2023 12:35:38 -0800 Subject: [PATCH 35/52] runtime: tracing: Fix missing ctx return Normally we return the context when creating a trace span so that the ordering of spans w.r.t. calls is maintained in tracing output. Add missing context for StartVM() for Cloud Hypervisor. Fixes #6271 Signed-off-by: Chelsea Mafrica --- src/runtime/virtcontainers/clh.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index dffd4213a..71bd931dc 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -647,7 +647,7 @@ func (clh *cloudHypervisor) CreateVM(ctx context.Context, id string, network Net // StartVM will start the VMM and boot the virtual machine for the given sandbox. func (clh *cloudHypervisor) StartVM(ctx context.Context, timeout int) error { - span, _ := katatrace.Trace(ctx, clh.Logger(), "StartVM", clhTracingTags, map[string]string{"sandbox_id": clh.id}) + span, ctx := katatrace.Trace(ctx, clh.Logger(), "StartVM", clhTracingTags, map[string]string{"sandbox_id": clh.id}) defer span.End() clh.Logger().WithField("function", "StartVM").Info("starting Sandbox") From 8e3863cecbbe42dd79e1e8fc087624120f6c3793 Mon Sep 17 00:00:00 2001 From: Hyounggyu Choi Date: Thu, 9 Feb 2023 11:45:32 +0100 Subject: [PATCH 36/52] kata-deploy: Install protobuf-compiler explicitly in shim-v2 Dockerfile This is to install a missing binary protoc in shim-v2 Dockerfile. Fixes: #6244 Signed-off-by: Hyounggyu Choi (cherry picked from commit 10603e3deff8d4f9907ade320d727ef343970a2f) --- tools/packaging/static-build/shim-v2/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/packaging/static-build/shim-v2/Dockerfile b/tools/packaging/static-build/shim-v2/Dockerfile index a781bb233..045e4f2c9 100644 --- a/tools/packaging/static-build/shim-v2/Dockerfile +++ b/tools/packaging/static-build/shim-v2/Dockerfile @@ -14,6 +14,7 @@ RUN apt-get update && \ git \ make \ musl-tools \ + protobuf-compiler \ sudo && \ apt-get clean && rm -rf /var/lib/apt/lists/ From 37b594c0d2172eb10df9c5416e4a666a1f7f7eb0 Mon Sep 17 00:00:00 2001 From: "James O. D. Hunt" Date: Wed, 1 Feb 2023 13:40:27 +0000 Subject: [PATCH 37/52] runtime-rs: Add basic CH implementation Add a basic runtime-rs `Hypervisor` trait implementation for Cloud Hypervisor (CH). > **Notes:** > > - This only supports a default Kata configuration for CH currently. > > - Since this feature is still under development, `cargo` features have > been added to enable the feature optionally. The default is to not enable > currently since the code is not ready for general use. > > To enable the feature for testing and development, enable the > `cloud-hypervisor` feature in the `virt_container` crate and enable the > `cloud-hypervisor` feature for its `hypervisor` dependency. Fixes: #5242. Signed-off-by: James O. D. Hunt --- src/runtime-rs/Cargo.lock | 196 +++++-- src/runtime-rs/crates/hypervisor/Cargo.toml | 10 + .../crates/hypervisor/ch-config/Cargo.toml | 22 + .../crates/hypervisor/ch-config/src/ch_api.rs | 274 ++++++++++ .../crates/hypervisor/ch-config/src/lib.rs | 481 +++++++++++++++++ .../hypervisor/ch-config/src/net_util.rs | 32 ++ .../ch-config/src/virtio_devices.rs | 19 + .../crates/hypervisor/src/ch/inner.rs | 148 ++++++ .../crates/hypervisor/src/ch/inner_device.rs | 235 +++++++++ .../hypervisor/src/ch/inner_hypervisor.rs | 486 ++++++++++++++++++ .../crates/hypervisor/src/ch/mod.rs | 163 ++++++ .../crates/hypervisor/src/ch/utils.rs | 53 ++ .../hypervisor/src/hypervisor_persist.rs | 2 +- src/runtime-rs/crates/hypervisor/src/lib.rs | 10 +- .../crates/runtimes/virt_container/Cargo.toml | 6 + .../crates/runtimes/virt_container/src/lib.rs | 25 + 16 files changed, 2105 insertions(+), 57 deletions(-) create mode 100644 src/runtime-rs/crates/hypervisor/ch-config/Cargo.toml create mode 100644 src/runtime-rs/crates/hypervisor/ch-config/src/ch_api.rs create mode 100644 src/runtime-rs/crates/hypervisor/ch-config/src/lib.rs create mode 100644 src/runtime-rs/crates/hypervisor/ch-config/src/net_util.rs create mode 100644 src/runtime-rs/crates/hypervisor/ch-config/src/virtio_devices.rs create mode 100644 src/runtime-rs/crates/hypervisor/src/ch/inner.rs create mode 100644 src/runtime-rs/crates/hypervisor/src/ch/inner_device.rs create mode 100644 src/runtime-rs/crates/hypervisor/src/ch/inner_hypervisor.rs create mode 100644 src/runtime-rs/crates/hypervisor/src/ch/mod.rs create mode 100644 src/runtime-rs/crates/hypervisor/src/ch/utils.rs diff --git a/src/runtime-rs/Cargo.lock b/src/runtime-rs/Cargo.lock index 6842ffb59..598d1940a 100644 --- a/src/runtime-rs/Cargo.lock +++ b/src/runtime-rs/Cargo.lock @@ -81,9 +81,17 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.57" +version = "1.0.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08f9b8508dccb7687a1d6c4ce66b2b0ecef467c94667de27d8d7fe1f8d2a9cdc" +checksum = "2cb2f989d18dd141ab8ae82f64d1a8cdd37e0840f73a406896cf5e99502fab61" + +[[package]] +name = "api_client" +version = "0.1.0" +source = "git+https://github.com/cloud-hypervisor/cloud-hypervisor?tag=v27.0#2ba6a9bfcfd79629aecf77504fa554ab821d138e" +dependencies = [ + "vmm-sys-util 0.10.0", +] [[package]] name = "arc-swap" @@ -412,6 +420,17 @@ dependencies = [ "thiserror", ] +[[package]] +name = "ch-config" +version = "0.1.0" +dependencies = [ + "anyhow", + "api_client", + "serde", + "serde_json", + "tokio", +] + [[package]] name = "chrono" version = "0.4.22" @@ -934,9 +953,9 @@ checksum = "3a471a38ef8ed83cd6e40aa59c1ffe17db6855c18e3604d9c4ed8c08ebc28678" [[package]] name = "futures" -version = "0.3.21" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f73fe65f54d1e12b726f517d3e2135ca3125a437b6d998caf1962961f7172d9e" +checksum = "13e2792b0ff0340399d58445b88fd9770e3489eff258a4cbc1523418f12abf84" dependencies = [ "futures-channel", "futures-core", @@ -949,9 +968,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.21" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3083ce4b914124575708913bca19bfe887522d6e2e6d0952943f5eac4a74010" +checksum = "2e5317663a9089767a1ec00a487df42e0ca174b61b4483213ac24448e4664df5" dependencies = [ "futures-core", "futures-sink", @@ -959,15 +978,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.21" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c09fd04b7e4073ac7156a9539b57a484a8ea920f79c7c675d05d289ab6110d3" +checksum = "ec90ff4d0fe1f57d600049061dc6bb68ed03c7d2fbd697274c41805dcb3f8608" [[package]] name = "futures-executor" -version = "0.3.21" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9420b90cfa29e327d0429f19be13e7ddb68fa1cccb09d65e5706b8c7a749b8a6" +checksum = "e8de0a35a6ab97ec8869e32a2473f4b1324459e14c29275d14b10cb1fd19b50e" dependencies = [ "futures-core", "futures-task", @@ -976,9 +995,9 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.21" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc4045962a5a5e935ee2fdedaa4e08284547402885ab326734432bed5d12966b" +checksum = "bfb8371b6fb2aeb2d280374607aeabfc99d95c72edfe51692e42d3d7f0d08531" [[package]] name = "futures-lite" @@ -997,9 +1016,9 @@ dependencies = [ [[package]] name = "futures-macro" -version = "0.3.21" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33c1e13800337f4d4d7a316bf45a567dbcb6ffe087f16424852d97e97a91f512" +checksum = "95a73af87da33b5acf53acfebdc339fe592ecf5357ac7c0a7734ab9d8c876a70" dependencies = [ "proc-macro2", "quote", @@ -1008,15 +1027,15 @@ dependencies = [ [[package]] name = "futures-sink" -version = "0.3.21" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21163e139fa306126e6eedaf49ecdb4588f939600f0b1e770f4205ee4b7fa868" +checksum = "f310820bb3e8cfd46c80db4d7fb8353e15dfff853a127158425f31e0be6c8364" [[package]] name = "futures-task" -version = "0.3.21" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57c66a976bf5909d801bbef33416c41372779507e7a6b3a5e25e4749c58f776a" +checksum = "dcf79a1bf610b10f42aea489289c5a2c478a786509693b80cd39c44ccd936366" [[package]] name = "futures-timer" @@ -1026,9 +1045,9 @@ checksum = "e64b03909df88034c26dc1547e8970b91f98bdb65165d6a4e9110d94263dbb2c" [[package]] name = "futures-util" -version = "0.3.21" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8b7abd5d659d9b90c8cba917f6ec750a74e2dc23902ef9cd4cc8c8b22e6036a" +checksum = "9c1d6de3acfef38d2be4b1f543f553131788603495be83da675e180c8d6b7bd1" dependencies = [ "futures-channel", "futures-core", @@ -1114,7 +1133,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7df0ee4b237afb71e99f7e2fbd840ffec2d6c4bb569f69b2af18aa1f63077d38" dependencies = [ "dashmap", - "futures 0.3.21", + "futures 0.3.26", "futures-timer", "no-std-compat", "nonzero_ext", @@ -1236,8 +1255,10 @@ version = "0.1.0" dependencies = [ "anyhow", "async-trait", + "ch-config", "dbs-utils", "dragonball", + "futures 0.3.26", "go-flag", "kata-sys-util", "kata-types", @@ -1246,6 +1267,7 @@ dependencies = [ "nix 0.24.2", "persist", "rand 0.8.5", + "safe-path 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "seccompiler", "serde", "serde_json", @@ -1551,14 +1573,14 @@ dependencies = [ [[package]] name = "mio" -version = "0.8.3" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "713d550d9b44d89174e066b7a6217ae06234c10cb47819a88290d2b353c31799" +checksum = "e5d732bc30207a6423068df043e3d02e0735b155ad7ce1a6f76fe2baa5b158de" dependencies = [ "libc", "log", "wasi 0.11.0+wasi-snapshot-preview1", - "windows-sys", + "windows-sys 0.42.0", ] [[package]] @@ -1612,7 +1634,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "65b4b14489ab424703c092062176d52ba55485a89c076b4f9db05092b7223aa6" dependencies = [ "bytes 1.1.0", - "futures 0.3.21", + "futures 0.3.26", "log", "netlink-packet-core", "netlink-sys", @@ -1627,7 +1649,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92b654097027250401127914afb37cb1f311df6610a9891ff07a757e94199027" dependencies = [ "bytes 1.1.0", - "futures 0.3.21", + "futures 0.3.26", "libc", "log", "tokio", @@ -1783,7 +1805,7 @@ dependencies = [ "bitflags", "blake3", "fuse-backend-rs", - "futures 0.3.21", + "futures 0.3.26", "lazy_static", "libc", "log", @@ -1811,7 +1833,7 @@ dependencies = [ "bitflags", "dbs-uhttp", "fuse-backend-rs", - "futures 0.3.21", + "futures 0.3.26", "governor", "lazy_static", "libc", @@ -1931,7 +1953,7 @@ dependencies = [ "libc", "redox_syscall", "smallvec", - "windows-sys", + "windows-sys 0.36.1", ] [[package]] @@ -1955,7 +1977,7 @@ dependencies = [ "kata-sys-util", "kata-types", "libc", - "safe-path", + "safe-path 0.1.0", "serde", "serde_json", "shim-interface", @@ -2025,9 +2047,9 @@ checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" [[package]] name = "proc-macro2" -version = "1.0.39" +version = "1.0.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c54b25569025b7fc9651de43004ae593a75ad88543b17178aa5e1b9c4f15f56f" +checksum = "6ef7d57beacfaf2d8aee5937dab7b7f28de3cb8b1828479bb5de2a7106f2bae2" dependencies = [ "unicode-ident", ] @@ -2321,7 +2343,7 @@ dependencies = [ "bitflags", "byte-unit 4.0.17", "cgroups-rs", - "futures 0.3.21", + "futures 0.3.26", "hypervisor", "kata-sys-util", "kata-types", @@ -2361,7 +2383,7 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "46f1cfa18f8cebe685373a2697915d7e0db3b4554918bba118385e0f71f258a7" dependencies = [ - "futures 0.3.21", + "futures 0.3.26", "log", "netlink-packet-route", "netlink-proto", @@ -2432,6 +2454,15 @@ dependencies = [ "libc", ] +[[package]] +name = "safe-path" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "980abdd3220aa19b67ca3ea07b173ca36383f18ae48cde696d90c8af39447ffb" +dependencies = [ + "libc", +] + [[package]] name = "scoped-tls" version = "1.0.0" @@ -2455,18 +2486,18 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.143" +version = "1.0.152" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53e8e5d5b70924f74ff5c6d64d9a5acd91422117c60f48c4e07855238a254553" +checksum = "bb7d1f0d3021d347a83e556fc4683dea2ea09d87bccdf88ff5c12545d89d5efb" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.143" +version = "1.0.152" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3d8e8de557aee63c26b85b947f5e59b690d0454c753f3adeb5cd7835ab88391" +checksum = "af487d118eecd09402d70a5d72551860e788df87b464af30e5ea6a38c75c541e" dependencies = [ "proc-macro2", "quote", @@ -2475,9 +2506,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.83" +version = "1.0.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38dd04e3c8279e75b31ef29dbdceebfe5ad89f4d0937213c53f7d49d01b3d5a7" +checksum = "877c235533714907a8c2464236f5c4b2a17262ef1bd71f38f35ea592c8da6883" dependencies = [ "itoa", "ryu", @@ -2729,9 +2760,9 @@ checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" [[package]] name = "syn" -version = "1.0.96" +version = "1.0.107" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0748dd251e24453cb8717f0354206b91557e4ec8703673a4b30208f2abaf1ebf" +checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5" dependencies = [ "proc-macro2", "quote", @@ -2857,22 +2888,22 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" [[package]] name = "tokio" -version = "1.19.1" +version = "1.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95eec79ea28c00a365f539f1961e9278fbcaf81c0ff6aaf0e93c181352446948" +checksum = "c8e00990ebabbe4c14c08aca901caed183ecd5c09562a12c824bb53d3c3fd3af" dependencies = [ + "autocfg", "bytes 1.1.0", "libc", "memchr", "mio", "num_cpus", - "once_cell", "parking_lot 0.12.1", "pin-project-lite", "signal-hook-registry", "socket2", "tokio-macros", - "winapi", + "windows-sys 0.42.0", ] [[package]] @@ -2907,7 +2938,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e0723fc001950a3b018947b05eeb45014fd2b7c6e8f292502193ab74486bdb6" dependencies = [ "bytes 0.4.12", - "futures 0.3.21", + "futures 0.3.26", "libc", "tokio", "vsock", @@ -2971,7 +3002,7 @@ checksum = "2ecfff459a859c6ba6668ff72b34c2f1d94d9d58f7088414c2674ad0f31cc7d8" dependencies = [ "async-trait", "byteorder", - "futures 0.3.21", + "futures 0.3.26", "libc", "log", "nix 0.23.1", @@ -3105,7 +3136,7 @@ dependencies = [ "awaitgroup", "common", "containerd-shim-protos", - "futures 0.3.21", + "futures 0.3.26", "hypervisor", "kata-sys-util", "kata-types", @@ -3366,43 +3397,100 @@ version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" dependencies = [ - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_msvc", + "windows_aarch64_msvc 0.36.1", + "windows_i686_gnu 0.36.1", + "windows_i686_msvc 0.36.1", + "windows_x86_64_gnu 0.36.1", + "windows_x86_64_msvc 0.36.1", ] +[[package]] +name = "windows-sys" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc 0.42.1", + "windows_i686_gnu 0.42.1", + "windows_i686_msvc 0.42.1", + "windows_x86_64_gnu 0.42.1", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc 0.42.1", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c9864e83243fdec7fc9c5444389dcbbfd258f745e7853198f365e3c4968a608" + [[package]] name = "windows_aarch64_msvc" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c8b1b673ffc16c47a9ff48570a9d85e25d265735c503681332589af6253c6c7" + [[package]] name = "windows_i686_gnu" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" +[[package]] +name = "windows_i686_gnu" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de3887528ad530ba7bdbb1faa8275ec7a1155a45ffa57c37993960277145d640" + [[package]] name = "windows_i686_msvc" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" +[[package]] +name = "windows_i686_msvc" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf4d1122317eddd6ff351aa852118a2418ad4214e6613a50e0191f7004372605" + [[package]] name = "windows_x86_64_gnu" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1040f221285e17ebccbc2591ffdc2d44ee1f9186324dd3e84e99ac68d699c45" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "628bfdf232daa22b0d64fdb62b09fcc36bb01f05a3939e20ab73aaf9470d0463" + [[package]] name = "windows_x86_64_msvc" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "447660ad36a13288b1db4d4248e857b510e8c3a225c822ba4fb748c0aafecffd" + [[package]] name = "zstd" version = "0.11.2+zstd.1.5.2" diff --git a/src/runtime-rs/crates/hypervisor/Cargo.toml b/src/runtime-rs/crates/hypervisor/Cargo.toml index 0edbfc8bc..7bd49dd8b 100644 --- a/src/runtime-rs/crates/hypervisor/Cargo.toml +++ b/src/runtime-rs/crates/hypervisor/Cargo.toml @@ -32,4 +32,14 @@ shim-interface = { path = "../../../libs/shim-interface" } dragonball = { path = "../../../dragonball", features = ["atomic-guest-memory", "virtio-vsock", "hotplug", "virtio-blk", "virtio-net", "virtio-fs","dbs-upcall"] } +ch-config = { path = "ch-config", optional = true } + +futures = "0.3.25" +safe-path = "0.1.0" + [features] +default = [] + +# Feature is not yet complete, so not enabled by default. +# See https://github.com/kata-containers/kata-containers/issues/6264. +cloud-hypervisor = ["ch-config"] diff --git a/src/runtime-rs/crates/hypervisor/ch-config/Cargo.toml b/src/runtime-rs/crates/hypervisor/ch-config/Cargo.toml new file mode 100644 index 000000000..2fd58f9f3 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/ch-config/Cargo.toml @@ -0,0 +1,22 @@ +# Copyright (c) 2022-2023 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +[package] +name = "ch-config" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +anyhow = "1.0.68" +serde = { version = "1.0.145", features = ["rc", "derive"] } +serde_json = "1.0.91" +tokio = { version = "1.25.0", features = ["sync", "rt"] } + +# Cloud Hypervisor public HTTP API functions +# Note that the version specified is not necessarily the version of CH +# being used. This version is used to pin the CH config structure +# which is relatively static. +api_client = { git = "https://github.com/cloud-hypervisor/cloud-hypervisor", crate = "api_client", tag = "v27.0" } diff --git a/src/runtime-rs/crates/hypervisor/ch-config/src/ch_api.rs b/src/runtime-rs/crates/hypervisor/ch-config/src/ch_api.rs new file mode 100644 index 000000000..fe812c7ca --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/ch-config/src/ch_api.rs @@ -0,0 +1,274 @@ +// Copyright (c) 2022-2023 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +use crate::net_util::MAC_ADDR_LEN; +use crate::{ + ConsoleConfig, ConsoleOutputMode, CpuTopology, CpusConfig, DeviceConfig, FsConfig, MacAddr, + MemoryConfig, NetConfig, PayloadConfig, PmemConfig, RngConfig, VmConfig, VsockConfig, +}; +use anyhow::{anyhow, Context, Result}; +use api_client::simple_api_full_command_and_response; + +use std::fmt::Display; +use std::net::Ipv4Addr; +use std::os::unix::net::UnixStream; +use std::path::PathBuf; +use tokio::task; + +pub async fn cloud_hypervisor_vmm_ping(mut socket: UnixStream) -> Result> { + task::spawn_blocking(move || -> Result> { + let response = simple_api_full_command_and_response(&mut socket, "GET", "vmm.ping", None) + .map_err(|e| anyhow!(e))?; + + Ok(response) + }) + .await? +} + +pub async fn cloud_hypervisor_vmm_shutdown(mut socket: UnixStream) -> Result> { + task::spawn_blocking(move || -> Result> { + let response = + simple_api_full_command_and_response(&mut socket, "PUT", "vmm.shutdown", None) + .map_err(|e| anyhow!(e))?; + + Ok(response) + }) + .await? +} + +pub async fn cloud_hypervisor_vm_create( + sandbox_path: String, + vsock_socket_path: String, + mut socket: UnixStream, + shared_fs_devices: Option>, + pmem_devices: Option>, +) -> Result> { + let cfg = cloud_hypervisor_vm_create_cfg( + sandbox_path, + vsock_socket_path, + shared_fs_devices, + pmem_devices, + ) + .await?; + + let serialised = serde_json::to_string_pretty(&cfg)?; + + task::spawn_blocking(move || -> Result> { + let data = Some(serialised.as_str()); + + let response = simple_api_full_command_and_response(&mut socket, "PUT", "vm.create", data) + .map_err(|e| anyhow!(e))?; + + Ok(response) + }) + .await? +} + +pub async fn cloud_hypervisor_vm_start(mut socket: UnixStream) -> Result> { + task::spawn_blocking(move || -> Result> { + let response = simple_api_full_command_and_response(&mut socket, "PUT", "vm.boot", None) + .map_err(|e| anyhow!(e))?; + + Ok(response) + }) + .await? +} + +#[allow(dead_code)] +pub async fn cloud_hypervisor_vm_stop(mut socket: UnixStream) -> Result> { + task::spawn_blocking(move || -> Result> { + let response = + simple_api_full_command_and_response(&mut socket, "PUT", "vm.shutdown", None) + .map_err(|e| anyhow!(e))?; + + Ok(response) + }) + .await? +} + +#[allow(dead_code)] +pub async fn cloud_hypervisor_vm_device_add(mut socket: UnixStream) -> Result> { + let device_config = DeviceConfig::default(); + + task::spawn_blocking(move || -> Result> { + let response = simple_api_full_command_and_response( + &mut socket, + "PUT", + "vm.add-device", + Some(&serde_json::to_string(&device_config)?), + ) + .map_err(|e| anyhow!(e))?; + + Ok(response) + }) + .await? +} + +pub async fn cloud_hypervisor_vm_fs_add( + mut socket: UnixStream, + fs_config: FsConfig, +) -> Result> { + let result = task::spawn_blocking(move || -> Result> { + let response = simple_api_full_command_and_response( + &mut socket, + "PUT", + "vm.add-fs", + Some(&serde_json::to_string(&fs_config)?), + ) + .map_err(|e| anyhow!(e))?; + + Ok(response) + }) + .await?; + + result +} + +pub async fn cloud_hypervisor_vm_create_cfg( + // FIXME: + _sandbox_path: String, + vsock_socket_path: String, + shared_fs_devices: Option>, + pmem_devices: Option>, +) -> Result { + let topology = CpuTopology { + threads_per_core: 1, + cores_per_die: 12, + dies_per_package: 1, + packages: 1, + }; + + let cpus = CpusConfig { + boot_vcpus: 1, + max_vcpus: 12, + max_phys_bits: 46, + topology: Some(topology), + ..Default::default() + }; + + let rng = RngConfig { + src: PathBuf::from("/dev/urandom"), + ..Default::default() + }; + + let kernel_args = vec![ + "root=/dev/pmem0p1", + "rootflags=dax,data=ordered,errors=remount-ro", + "ro", + "rootfstype=ext4", + "panic=1", + "no_timer_check", + "noreplace-smp", + "console=ttyS0,115200n8", + "systemd.log_target=console", + "systemd.unit=kata-containers", + "systemd.mask=systemd-networkd.service", + "systemd.mask=systemd-networkd.socket", + "agent.log=debug", + ]; + + let cmdline = kernel_args.join(" "); + + let kernel = PathBuf::from("/opt/kata/share/kata-containers/vmlinux.container"); + + // Note that PmemConfig replaces the PayloadConfig.initrd. + let payload = PayloadConfig { + kernel: Some(kernel), + cmdline: Some(cmdline), + ..Default::default() + }; + + let serial = ConsoleConfig { + mode: ConsoleOutputMode::Tty, + ..Default::default() + }; + + let ip = Ipv4Addr::new(192, 168, 10, 10); + let mask = Ipv4Addr::new(255, 255, 255, 0); + + let mac_str = "12:34:56:78:90:01"; + + let mac = parse_mac(mac_str)?; + + let network = NetConfig { + ip, + mask, + mac, + ..Default::default() + }; + + let memory = MemoryConfig { + size: (1024 * 1024 * 2048), + + // Required + shared: true, + + prefault: false, + hugepages: false, + mergeable: false, + + // FIXME: + hotplug_size: Some(16475226112), + + ..Default::default() + }; + + let fs = shared_fs_devices; + let pmem = pmem_devices; + + let vsock = VsockConfig { + cid: 3, + socket: PathBuf::from(vsock_socket_path), + ..Default::default() + }; + + let cfg = VmConfig { + cpus, + memory, + fs, + serial, + pmem, + payload: Some(payload), + vsock: Some(vsock), + rng, + net: Some(vec![network]), + ..Default::default() + }; + + Ok(cfg) +} + +fn parse_mac(s: &S) -> Result +where + S: AsRef + ?Sized + Display, +{ + let v: Vec<&str> = s.as_ref().split(':').collect(); + let mut bytes = [0u8; MAC_ADDR_LEN]; + + if v.len() != MAC_ADDR_LEN { + return Err(anyhow!( + "invalid MAC {} (length {}, expected {})", + s, + v.len(), + MAC_ADDR_LEN + )); + } + + for i in 0..MAC_ADDR_LEN { + if v[i].len() != 2 { + return Err(anyhow!( + "invalid MAC {} (segment {} length {}, expected {})", + s, + i, + v.len(), + 2 + )); + } + + bytes[i] = + u8::from_str_radix(v[i], 16).context(format!("failed to parse MAC address: {}", s))?; + } + + Ok(MacAddr { bytes }) +} diff --git a/src/runtime-rs/crates/hypervisor/ch-config/src/lib.rs b/src/runtime-rs/crates/hypervisor/ch-config/src/lib.rs new file mode 100644 index 000000000..3e3fb3412 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/ch-config/src/lib.rs @@ -0,0 +1,481 @@ +// Copyright (c) 2022-2023 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +use serde::{Deserialize, Serialize}; +use std::net::Ipv4Addr; +use std::path::PathBuf; + +pub mod ch_api; +pub mod net_util; +mod virtio_devices; + +use crate::virtio_devices::RateLimiterConfig; +pub use net_util::MacAddr; + +pub const MAX_NUM_PCI_SEGMENTS: u16 = 16; + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct BalloonConfig { + pub size: u64, + /// Option to deflate the balloon in case the guest is out of memory. + #[serde(default)] + pub deflate_on_oom: bool, + /// Option to enable free page reporting from the guest. + #[serde(default)] + pub free_page_reporting: bool, +} + +#[derive(Clone, Debug, Default, PartialEq, Eq, Deserialize, Serialize)] +pub struct CmdlineConfig { + pub args: String, +} + +impl CmdlineConfig { + fn is_empty(&self) -> bool { + self.args.is_empty() + } +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct ConsoleConfig { + //#[serde(default = "default_consoleconfig_file")] + pub file: Option, + pub mode: ConsoleOutputMode, + #[serde(default)] + pub iommu: bool, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub enum ConsoleOutputMode { + #[default] + Off, + Pty, + Tty, + File, + Null, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct CpuAffinity { + pub vcpu: u8, + pub host_cpus: Vec, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct CpusConfig { + pub boot_vcpus: u8, + pub max_vcpus: u8, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub topology: Option, + #[serde(default)] + pub kvm_hyperv: bool, + #[serde(skip_serializing_if = "u8_is_zero")] + pub max_phys_bits: u8, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub affinity: Option>, + #[serde(default)] + pub features: CpuFeatures, +} + +#[derive(Clone, Debug, Default, PartialEq, Eq, Deserialize, Serialize)] +pub struct CpuFeatures { + #[cfg(target_arch = "x86_64")] + #[serde(default)] + pub amx: bool, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct CpuTopology { + pub threads_per_core: u8, + pub cores_per_die: u8, + pub dies_per_package: u8, + pub packages: u8, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct DeviceConfig { + pub path: PathBuf, + #[serde(default)] + pub iommu: bool, + #[serde(default)] + pub id: Option, + #[serde(default)] + pub pci_segment: u16, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct DiskConfig { + pub path: Option, + #[serde(default)] + pub readonly: bool, + #[serde(default)] + pub direct: bool, + #[serde(default)] + pub iommu: bool, + //#[serde(default = "default_diskconfig_num_queues")] + pub num_queues: usize, + //#[serde(default = "default_diskconfig_queue_size")] + pub queue_size: u16, + #[serde(default)] + pub vhost_user: bool, + pub vhost_socket: Option, + #[serde(default)] + pub rate_limiter_config: Option, + #[serde(default)] + pub id: Option, + // For testing use only. Not exposed in API. + #[serde(default)] + pub disable_io_uring: bool, + #[serde(default)] + pub pci_segment: u16, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct FsConfig { + pub tag: String, + pub socket: PathBuf, + //#[serde(default = "default_fsconfig_num_queues")] + pub num_queues: usize, + //#[serde(default = "default_fsconfig_queue_size")] + pub queue_size: u16, + #[serde(default)] + pub id: Option, + #[serde(default)] + pub pci_segment: u16, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub enum HotplugMethod { + #[default] + Acpi, + VirtioMem, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct InitramfsConfig { + pub path: PathBuf, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct KernelConfig { + pub path: PathBuf, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct MemoryConfig { + pub size: u64, + #[serde(default)] + pub mergeable: bool, + #[serde(default)] + pub hotplug_method: HotplugMethod, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub hotplug_size: Option, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub hotplugged_size: Option, + #[serde(default)] + pub shared: bool, + #[serde(default)] + pub hugepages: bool, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub hugepage_size: Option, + #[serde(default)] + pub prefault: bool, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub zones: Option>, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct MemoryZoneConfig { + pub id: String, + pub size: u64, + #[serde(default)] + pub file: Option, + #[serde(default)] + pub shared: bool, + #[serde(default)] + pub hugepages: bool, + #[serde(default)] + pub hugepage_size: Option, + #[serde(default)] + pub host_numa_node: Option, + #[serde(default)] + pub hotplug_size: Option, + #[serde(default)] + pub hotplugged_size: Option, + #[serde(default)] + pub prefault: bool, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] +pub struct NetConfig { + //#[serde(default = "default_netconfig_tap")] + #[serde(skip_serializing_if = "Option::is_none")] + pub tap: Option, + //#[serde(default = "default_netconfig_ip")] + pub ip: Ipv4Addr, + //#[serde(default = "default_netconfig_mask")] + pub mask: Ipv4Addr, + //#[serde(default = "default_netconfig_mac")] + pub mac: MacAddr, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub host_mac: Option, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub mtu: Option, + #[serde(default)] + pub iommu: bool, + //#[serde(default = "default_netconfig_num_queues")] + #[serde(skip_serializing_if = "usize_is_zero")] + pub num_queues: usize, + //#[serde(default = "default_netconfig_queue_size")] + #[serde(skip_serializing_if = "u16_is_zero")] + pub queue_size: u16, + #[serde(default)] + pub vhost_user: bool, + #[serde(skip_serializing_if = "Option::is_none")] + pub vhost_socket: Option, + #[serde(default)] + pub vhost_mode: VhostMode, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub id: Option, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub fds: Option>, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub rate_limiter_config: Option, + #[serde(default)] + #[serde(skip_serializing_if = "u16_is_zero")] + pub pci_segment: u16, +} + +impl Default for NetConfig { + fn default() -> Self { + NetConfig { + tap: None, + ip: Ipv4Addr::new(0, 0, 0, 0), + mask: Ipv4Addr::new(0, 0, 0, 0), + mac: MacAddr::default(), + host_mac: None, + mtu: None, + iommu: false, + num_queues: 0, + queue_size: 0, + vhost_user: false, + vhost_socket: None, + vhost_mode: VhostMode::default(), + id: None, + fds: None, + rate_limiter_config: None, + pci_segment: 0, + } + } +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct NumaConfig { + #[serde(default)] + pub guest_numa_id: u32, + #[serde(default)] + pub cpus: Option>, + #[serde(default)] + pub distances: Option>, + #[serde(default)] + pub memory_zones: Option>, + #[cfg(target_arch = "x86_64")] + #[serde(default)] + pub sgx_epc_sections: Option>, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct NumaDistance { + #[serde(default)] + pub destination: u32, + #[serde(default)] + pub distance: u8, +} + +#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] +pub struct PayloadConfig { + #[serde(default)] + pub firmware: Option, + #[serde(default)] + pub kernel: Option, + #[serde(default)] + pub cmdline: Option, + #[serde(default)] + pub initramfs: Option, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct PlatformConfig { + //#[serde(default = "default_platformconfig_num_pci_segments")] + pub num_pci_segments: u16, + #[serde(default)] + pub iommu_segments: Option>, + #[serde(default)] + pub serial_number: Option, + #[serde(default)] + pub uuid: Option, + #[serde(default)] + pub oem_strings: Option>, + #[cfg(feature = "tdx")] + #[serde(default)] + pub tdx: bool, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct PmemConfig { + pub file: PathBuf, + #[serde(default)] + pub size: Option, + #[serde(default)] + pub iommu: bool, + #[serde(default)] + pub discard_writes: bool, + #[serde(default)] + pub id: Option, + #[serde(default)] + pub pci_segment: u16, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct RngConfig { + pub src: PathBuf, + #[serde(default)] + pub iommu: bool, +} + +#[cfg(target_arch = "x86_64")] +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct SgxEpcConfig { + pub id: String, + #[serde(default)] + pub size: u64, + #[serde(default)] + pub prefault: bool, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct UserDeviceConfig { + pub socket: PathBuf, + #[serde(default)] + pub id: Option, + #[serde(default)] + pub pci_segment: u16, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct VdpaConfig { + pub path: PathBuf, + //#[serde(default = "default_vdpaconfig_num_queues")] + pub num_queues: usize, + #[serde(default)] + pub iommu: bool, + #[serde(default)] + pub id: Option, + #[serde(default)] + pub pci_segment: u16, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub enum VhostMode { + #[default] + Client, + Server, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct VmConfig { + #[serde(default)] + pub cpus: CpusConfig, + #[serde(default)] + pub memory: MemoryConfig, + #[serde(skip_serializing_if = "Option::is_none")] + pub kernel: Option, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub initramfs: Option, + #[serde(default)] + #[serde(skip_serializing_if = "CmdlineConfig::is_empty")] + pub cmdline: CmdlineConfig, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub payload: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub disks: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub net: Option>, + #[serde(default)] + pub rng: RngConfig, + #[serde(skip_serializing_if = "Option::is_none")] + pub balloon: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub fs: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub pmem: Option>, + //#[serde(default = "ConsoleConfig::default_serial")] + pub serial: ConsoleConfig, + //#[serde(default = "ConsoleConfig::default_console")] + pub console: ConsoleConfig, + #[serde(skip_serializing_if = "Option::is_none")] + pub devices: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub user_devices: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub vdpa: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub vsock: Option, + #[serde(default)] + pub iommu: bool, + #[cfg(target_arch = "x86_64")] + #[serde(skip_serializing_if = "Option::is_none")] + pub sgx_epc: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub numa: Option>, + #[serde(default)] + pub watchdog: bool, + #[cfg(feature = "guest_debug")] + pub gdb: bool, + #[serde(skip_serializing_if = "Option::is_none")] + pub platform: Option, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, Default)] +pub struct VsockConfig { + pub cid: u64, + pub socket: PathBuf, + #[serde(default)] + pub iommu: bool, + #[serde(default)] + pub id: Option, + #[serde(default)] + pub pci_segment: u16, +} + +//-------------------------------------------------------------------- +// For serde serialization + +#[allow(clippy::trivially_copy_pass_by_ref)] +fn u8_is_zero(v: &u8) -> bool { + *v == 0 +} + +#[allow(clippy::trivially_copy_pass_by_ref)] +fn usize_is_zero(v: &usize) -> bool { + *v == 0 +} + +#[allow(clippy::trivially_copy_pass_by_ref)] +fn u16_is_zero(v: &u16) -> bool { + *v == 0 +} diff --git a/src/runtime-rs/crates/hypervisor/ch-config/src/net_util.rs b/src/runtime-rs/crates/hypervisor/ch-config/src/net_util.rs new file mode 100644 index 000000000..00a079462 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/ch-config/src/net_util.rs @@ -0,0 +1,32 @@ +// Copyright (c) 2022-2023 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +use serde::{Deserialize, Serialize, Serializer}; +use std::fmt; + +pub const MAC_ADDR_LEN: usize = 6; + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Deserialize, Default)] +pub struct MacAddr { + pub bytes: [u8; MAC_ADDR_LEN], +} + +// Note: Implements ToString automatically. +impl fmt::Display for MacAddr { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let b = &self.bytes; + write!( + f, + "{:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}", + b[0], b[1], b[2], b[3], b[4], b[5] + ) + } +} + +// Requried to remove the `bytes` member from the serialized JSON! +impl Serialize for MacAddr { + fn serialize(&self, serializer: S) -> Result { + self.to_string().serialize(serializer) + } +} diff --git a/src/runtime-rs/crates/hypervisor/ch-config/src/virtio_devices.rs b/src/runtime-rs/crates/hypervisor/ch-config/src/virtio_devices.rs new file mode 100644 index 000000000..02bf04bf9 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/ch-config/src/virtio_devices.rs @@ -0,0 +1,19 @@ +// Copyright (c) 2022-2023 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Copy, Debug, Default, Deserialize, Serialize, PartialEq, Eq)] +pub struct TokenBucketConfig { + pub size: u64, + pub one_time_burst: Option, + pub refill_time: u64, +} + +#[derive(Clone, Copy, Debug, Default, Deserialize, Serialize, PartialEq, Eq)] +#[serde(deny_unknown_fields)] +pub struct RateLimiterConfig { + pub bandwidth: Option, + pub ops: Option, +} diff --git a/src/runtime-rs/crates/hypervisor/src/ch/inner.rs b/src/runtime-rs/crates/hypervisor/src/ch/inner.rs new file mode 100644 index 000000000..7f65ac11b --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/ch/inner.rs @@ -0,0 +1,148 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2022 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +use super::HypervisorState; +use crate::device::Device; +use crate::VmmState; +use anyhow::Result; +use async_trait::async_trait; +use kata_types::capabilities::{Capabilities, CapabilityBits}; +use kata_types::config::hypervisor::Hypervisor as HypervisorConfig; +use kata_types::config::hypervisor::HYPERVISOR_NAME_CH; +use persist::sandbox_persist::Persist; +use std::os::unix::net::UnixStream; +use tokio::process::Child; +use tokio::sync::watch::{channel, Receiver, Sender}; +use tokio::task::JoinHandle; + +#[derive(Debug)] +pub struct CloudHypervisorInner { + pub(crate) state: VmmState, + pub(crate) id: String, + + pub(crate) api_socket: Option, + pub(crate) extra_args: Option>, + + pub(crate) config: Option, + + pub(crate) process: Option, + pub(crate) pid: Option, + + pub(crate) timeout_secs: i32, + + pub(crate) netns: Option, + + // Sandbox-specific directory + pub(crate) vm_path: String, + + // Hypervisor runtime directory + pub(crate) run_dir: String, + + // Subdirectory of vm_path. + pub(crate) jailer_root: String, + + /// List of devices that will be added to the VM once it boots + pub(crate) pending_devices: Option>, + + pub(crate) _capabilities: Capabilities, + + pub(crate) shutdown_tx: Option>, + pub(crate) shutdown_rx: Option>, + pub(crate) tasks: Option>>>, +} + +unsafe impl Send for CloudHypervisorInner {} +unsafe impl Sync for CloudHypervisorInner {} + +const CH_DEFAULT_TIMEOUT_SECS: u32 = 10; + +impl CloudHypervisorInner { + pub fn new() -> Self { + let mut capabilities = Capabilities::new(); + capabilities.set( + CapabilityBits::BlockDeviceSupport + | CapabilityBits::BlockDeviceHotplugSupport + | CapabilityBits::FsSharingSupport, + ); + + let (tx, rx) = channel(true); + + Self { + api_socket: None, + extra_args: None, + + process: None, + pid: None, + + config: None, + state: VmmState::NotReady, + timeout_secs: CH_DEFAULT_TIMEOUT_SECS as i32, + id: String::default(), + jailer_root: String::default(), + vm_path: String::default(), + run_dir: String::default(), + netns: None, + pending_devices: None, + _capabilities: capabilities, + shutdown_tx: Some(tx), + shutdown_rx: Some(rx), + tasks: None, + } + } + + pub fn set_hypervisor_config(&mut self, config: HypervisorConfig) { + self.config = Some(config); + } + + pub fn hypervisor_config(&self) -> HypervisorConfig { + self.config.clone().unwrap_or_default() + } +} + +impl Default for CloudHypervisorInner { + fn default() -> Self { + Self::new() + } +} + +#[async_trait] +impl Persist for CloudHypervisorInner { + type State = HypervisorState; + type ConstructorArgs = (); + + // Return a state object that will be saved by the caller. + async fn save(&self) -> Result { + Ok(HypervisorState { + hypervisor_type: HYPERVISOR_NAME_CH.to_string(), + id: self.id.clone(), + vm_path: self.vm_path.clone(), + jailed: false, + jailer_root: String::default(), + netns: None, + config: self.hypervisor_config(), + run_dir: self.run_dir.clone(), + cached_block_devices: Default::default(), + ..Default::default() + }) + } + + // Set the hypervisor state to the specified state + async fn restore( + _hypervisor_args: Self::ConstructorArgs, + hypervisor_state: Self::State, + ) -> Result { + let ch = Self { + config: Some(hypervisor_state.config), + state: VmmState::NotReady, + id: hypervisor_state.id, + vm_path: hypervisor_state.vm_path, + run_dir: hypervisor_state.run_dir, + + ..Default::default() + }; + + Ok(ch) + } +} diff --git a/src/runtime-rs/crates/hypervisor/src/ch/inner_device.rs b/src/runtime-rs/crates/hypervisor/src/ch/inner_device.rs new file mode 100644 index 000000000..03cf95daf --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/ch/inner_device.rs @@ -0,0 +1,235 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2019-2022 Ant Group +// Copyright (c) 2022 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +use super::inner::CloudHypervisorInner; +use crate::device::{Device, ShareFsDeviceConfig}; +use crate::HybridVsockConfig; +use crate::VmmState; +use anyhow::{anyhow, Context, Result}; +use ch_config::ch_api::cloud_hypervisor_vm_fs_add; +use ch_config::{FsConfig, PmemConfig}; +use safe_path::scoped_join; +use std::convert::TryFrom; +use std::path::PathBuf; + +const VIRTIO_FS: &str = "virtio-fs"; + +impl CloudHypervisorInner { + pub(crate) async fn add_device(&mut self, device: Device) -> Result<()> { + if self.state != VmmState::VmRunning { + let mut devices: Vec = if let Some(devices) = self.pending_devices.take() { + devices + } else { + vec![] + }; + + devices.insert(0, device); + + self.pending_devices = Some(devices); + + return Ok(()); + } + + self.handle_add_device(device).await?; + + Ok(()) + } + + async fn handle_add_device(&mut self, device: Device) -> Result<()> { + match device { + Device::ShareFsDevice(cfg) => self.handle_share_fs_device(cfg).await, + Device::HybridVsock(cfg) => self.handle_hvsock_device(&cfg).await, + _ => return Err(anyhow!("unhandled device: {:?}", device)), + } + } + + /// Add the device that were requested to be added before the VMM was + /// started. + #[allow(dead_code)] + pub(crate) async fn handle_pending_devices_after_boot(&mut self) -> Result<()> { + if self.state != VmmState::VmRunning { + return Err(anyhow!( + "cannot handle pending devices with VMM state {:?}", + self.state + )); + } + + if let Some(mut devices) = self.pending_devices.take() { + while let Some(dev) = devices.pop() { + self.add_device(dev).await.context("add_device")?; + } + } + + Ok(()) + } + + pub(crate) async fn remove_device(&mut self, _device: Device) -> Result<()> { + Ok(()) + } + + async fn handle_share_fs_device(&mut self, cfg: ShareFsDeviceConfig) -> Result<()> { + if cfg.fs_type != VIRTIO_FS { + return Err(anyhow!("cannot handle share fs type: {:?}", cfg.fs_type)); + } + + let socket = self + .api_socket + .as_ref() + .ok_or("missing socket") + .map_err(|e| anyhow!(e))?; + + let num_queues: usize = if cfg.queue_num > 0 { + cfg.queue_num as usize + } else { + 1 + }; + + let queue_size: u16 = if cfg.queue_num > 0 { + u16::try_from(cfg.queue_size)? + } else { + 1024 + }; + + let socket_path = if cfg.sock_path.starts_with('/') { + PathBuf::from(cfg.sock_path) + } else { + scoped_join(&self.vm_path, cfg.sock_path)? + }; + + let fs_config = FsConfig { + tag: cfg.mount_tag, + socket: socket_path, + num_queues, + queue_size, + ..Default::default() + }; + + let response = cloud_hypervisor_vm_fs_add( + socket.try_clone().context("failed to clone socket")?, + fs_config, + ) + .await?; + + if let Some(detail) = response { + debug!(sl!(), "fs add response: {:?}", detail); + } + + Ok(()) + } + + async fn handle_hvsock_device(&mut self, _cfg: &HybridVsockConfig) -> Result<()> { + Ok(()) + } + + pub(crate) async fn get_shared_fs_devices(&mut self) -> Result>> { + let pending_root_devices = self.pending_devices.take(); + + let mut root_devices = Vec::::new(); + + if let Some(devices) = pending_root_devices { + for dev in devices { + match dev { + Device::ShareFsDevice(dev) => { + let settings = ShareFsSettings::new(dev, self.vm_path.clone()); + + let fs_cfg = FsConfig::try_from(settings)?; + + root_devices.push(fs_cfg); + } + _ => continue, + }; + } + + Ok(Some(root_devices)) + } else { + Ok(None) + } + } + + pub(crate) async fn get_boot_file(&mut self) -> Result { + if let Some(ref config) = self.config { + let boot_info = &config.boot_info; + + let file = if !boot_info.initrd.is_empty() { + boot_info.initrd.clone() + } else if !boot_info.image.is_empty() { + boot_info.image.clone() + } else { + return Err(anyhow!("missing boot file (no image or initrd)")); + }; + + Ok(PathBuf::from(file)) + } else { + Err(anyhow!("no hypervisor config")) + } + } + + pub(crate) async fn get_pmem_devices(&mut self) -> Result>> { + let file = self.get_boot_file().await?; + + let pmem_cfg = PmemConfig { + file, + size: None, + iommu: false, + discard_writes: true, + id: None, + pci_segment: 0, + }; + + let pmem_devices = vec![pmem_cfg]; + + Ok(Some(pmem_devices)) + } +} + +#[derive(Debug)] +pub struct ShareFsSettings { + cfg: ShareFsDeviceConfig, + vm_path: String, +} + +impl ShareFsSettings { + pub fn new(cfg: ShareFsDeviceConfig, vm_path: String) -> Self { + ShareFsSettings { cfg, vm_path } + } +} + +impl TryFrom for FsConfig { + type Error = anyhow::Error; + + fn try_from(settings: ShareFsSettings) -> Result { + let cfg = settings.cfg; + let vm_path = settings.vm_path; + + let num_queues: usize = if cfg.queue_num > 0 { + cfg.queue_num as usize + } else { + 1 + }; + + let queue_size: u16 = if cfg.queue_num > 0 { + u16::try_from(cfg.queue_size)? + } else { + 1024 + }; + + let socket_path = if cfg.sock_path.starts_with('/') { + PathBuf::from(cfg.sock_path) + } else { + PathBuf::from(vm_path).join(cfg.sock_path) + }; + + let fs_cfg = FsConfig { + tag: cfg.mount_tag, + socket: socket_path, + num_queues, + queue_size, + ..Default::default() + }; + + Ok(fs_cfg) + } +} diff --git a/src/runtime-rs/crates/hypervisor/src/ch/inner_hypervisor.rs b/src/runtime-rs/crates/hypervisor/src/ch/inner_hypervisor.rs new file mode 100644 index 000000000..b3271ee79 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/ch/inner_hypervisor.rs @@ -0,0 +1,486 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2022 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +use super::inner::CloudHypervisorInner; +use crate::ch::utils::get_api_socket_path; +use crate::ch::utils::{get_jailer_root, get_sandbox_path, get_vsock_path}; +use crate::Device; +use crate::VsockConfig; +use crate::{VcpuThreadIds, VmmState}; +use anyhow::{anyhow, Context, Result}; +use ch_config::ch_api::{ + cloud_hypervisor_vm_create, cloud_hypervisor_vm_start, cloud_hypervisor_vmm_ping, + cloud_hypervisor_vmm_shutdown, +}; +use core::future::poll_fn; +use futures::executor::block_on; +use futures::future::join_all; +use kata_types::capabilities::{Capabilities, CapabilityBits}; +use std::fs::create_dir_all; +use std::os::unix::net::UnixStream; +use std::path::Path; +use std::process::Stdio; +use tokio::io::AsyncBufReadExt; +use tokio::io::BufReader; +use tokio::process::{Child, Command}; +use tokio::sync::watch::Receiver; +use tokio::task; +use tokio::task::JoinHandle; +use tokio::time::Duration; + +const CH_NAME: &str = "cloud-hypervisor"; + +/// Number of milliseconds to wait before retrying a CH operation. +const CH_POLL_TIME_MS: u64 = 50; + +impl CloudHypervisorInner { + async fn start_hypervisor(&mut self, timeout_secs: i32) -> Result<()> { + self.cloud_hypervisor_launch(timeout_secs) + .await + .context("launch failed")?; + + self.cloud_hypervisor_setup_comms() + .await + .context("comms setup failed")?; + + self.cloud_hypervisor_check_running() + .await + .context("hypervisor running check failed")?; + + self.state = VmmState::VmmServerReady; + + Ok(()) + } + + async fn boot_vm(&mut self) -> Result<()> { + let shared_fs_devices = self.get_shared_fs_devices().await?; + + let pmem_devices = self.get_pmem_devices().await?; + + let socket = self + .api_socket + .as_ref() + .ok_or("missing socket") + .map_err(|e| anyhow!(e))?; + + let sandbox_path = get_sandbox_path(&self.id)?; + + std::fs::create_dir_all(sandbox_path.clone()).context("failed to create sandbox path")?; + + let vsock_socket_path = get_vsock_path(&self.id)?; + + let response = cloud_hypervisor_vm_create( + sandbox_path, + vsock_socket_path, + socket.try_clone().context("failed to clone socket")?, + shared_fs_devices, + pmem_devices, + ) + .await?; + + if let Some(detail) = response { + debug!(sl!(), "vm boot response: {:?}", detail); + } + + let response = + cloud_hypervisor_vm_start(socket.try_clone().context("failed to clone socket")?) + .await?; + + if let Some(detail) = response { + debug!(sl!(), "vm start response: {:?}", detail); + } + + self.state = VmmState::VmRunning; + + Ok(()) + } + + async fn cloud_hypervisor_setup_comms(&mut self) -> Result<()> { + let api_socket_path = get_api_socket_path(&self.id)?; + + // The hypervisor has just been spawned, but may not yet have created + // the API socket, so repeatedly try to connect for up to + // timeout_secs. + let join_handle: JoinHandle> = + task::spawn_blocking(move || -> Result { + let api_socket: UnixStream; + + loop { + let result = UnixStream::connect(api_socket_path.clone()); + + if let Ok(result) = result { + api_socket = result; + break; + } + + std::thread::sleep(Duration::from_millis(CH_POLL_TIME_MS)); + } + + Ok(api_socket) + }); + + let timeout_msg = format!( + "API socket connect timed out after {} seconds", + self.timeout_secs + ); + + let result = + tokio::time::timeout(Duration::from_secs(self.timeout_secs as u64), join_handle) + .await + .context(timeout_msg)?; + + let result = result?; + + let api_socket = result?; + + self.api_socket = Some(api_socket); + + Ok(()) + } + + async fn cloud_hypervisor_check_running(&mut self) -> Result<()> { + let timeout_secs = self.timeout_secs; + + let timeout_msg = format!( + "API socket connect timed out after {} seconds", + timeout_secs + ); + + let join_handle = self.cloud_hypervisor_ping_until_ready(CH_POLL_TIME_MS); + + let result = tokio::time::timeout(Duration::new(timeout_secs as u64, 0), join_handle) + .await + .context(timeout_msg)?; + + result + } + + async fn cloud_hypervisor_ensure_not_launched(&self) -> Result<()> { + if let Some(child) = &self.process { + return Err(anyhow!( + "{} already running with PID {}", + CH_NAME, + child.id().unwrap_or(0) + )); + } + + Ok(()) + } + + async fn cloud_hypervisor_launch(&mut self, _timeout_secs: i32) -> Result<()> { + self.cloud_hypervisor_ensure_not_launched().await?; + + let debug = false; + + let disable_seccomp = true; + + let api_socket_path = get_api_socket_path(&self.id)?; + + let _ = std::fs::remove_file(api_socket_path.clone()); + + let binary_path = self + .config + .as_ref() + .ok_or("no hypervisor config for CH") + .map_err(|e| anyhow!(e))? + .path + .to_string(); + + let path = Path::new(&binary_path).canonicalize()?; + + let mut cmd = Command::new(path); + + cmd.current_dir("/"); + + cmd.stdin(Stdio::null()); + cmd.stdout(Stdio::piped()); + cmd.stderr(Stdio::piped()); + + cmd.env("RUST_BACKTRACE", "full"); + + cmd.args(["--api-socket", &api_socket_path]); + + if let Some(extra_args) = &self.extra_args { + cmd.args(extra_args); + } + + if debug { + cmd.arg("-v"); + } + + if disable_seccomp { + cmd.args(["--seccomp", "false"]); + } + + let child = cmd.spawn().context(format!("{} spawn failed", CH_NAME))?; + + // Save process PID + self.pid = child.id(); + + let shutdown = self + .shutdown_rx + .as_ref() + .ok_or("no receiver channel") + .map_err(|e| anyhow!(e))? + .clone(); + + let ch_outputlogger_task = tokio::spawn(cloud_hypervisor_log_output(child, shutdown)); + + let tasks = vec![ch_outputlogger_task]; + + self.tasks = Some(tasks); + + Ok(()) + } + + async fn cloud_hypervisor_shutdown(&mut self) -> Result<()> { + let socket = self + .api_socket + .as_ref() + .ok_or("missing socket") + .map_err(|e| anyhow!(e))?; + + let response = + cloud_hypervisor_vmm_shutdown(socket.try_clone().context("shutdown failed")?).await?; + + if let Some(detail) = response { + debug!(sl!(), "shutdown response: {:?}", detail); + } + + // Trigger a controlled shutdown + self.shutdown_tx + .as_mut() + .ok_or("no shutdown channel") + .map_err(|e| anyhow!(e))? + .send(true) + .map_err(|e| anyhow!(e).context("failed to request shutdown"))?; + + let tasks = self + .tasks + .take() + .ok_or("no tasks") + .map_err(|e| anyhow!(e))?; + + let results = join_all(tasks).await; + + let mut wait_errors: Vec = vec![]; + + for result in results { + if let Err(e) = result { + eprintln!("wait task error: {:#?}", e); + + wait_errors.push(e); + } + } + + if wait_errors.is_empty() { + Ok(()) + } else { + Err(anyhow!("wait all tasks failed: {:#?}", wait_errors)) + } + } + + #[allow(dead_code)] + async fn cloud_hypervisor_wait(&mut self) -> Result<()> { + let mut child = self + .process + .take() + .ok_or(format!("{} not running", CH_NAME)) + .map_err(|e| anyhow!(e))?; + + let _pid = child + .id() + .ok_or(format!("{} missing PID", CH_NAME)) + .map_err(|e| anyhow!(e))?; + + // Note that this kills _and_ waits for the process! + child.kill().await?; + + Ok(()) + } + + async fn cloud_hypervisor_ping_until_ready(&mut self, _poll_time_ms: u64) -> Result<()> { + let socket = self + .api_socket + .as_ref() + .ok_or("missing socket") + .map_err(|e| anyhow!(e))?; + + loop { + let response = + cloud_hypervisor_vmm_ping(socket.try_clone().context("failed to clone socket")?) + .await + .context("ping failed"); + + if let Ok(response) = response { + if let Some(detail) = response { + debug!(sl!(), "ping response: {:?}", detail); + } + break; + } + + tokio::time::sleep(Duration::from_millis(CH_POLL_TIME_MS)).await; + } + + Ok(()) + } + + pub(crate) async fn prepare_vm(&mut self, id: &str, netns: Option) -> Result<()> { + self.id = id.to_string(); + self.state = VmmState::NotReady; + + self.setup_environment().await?; + + self.netns = netns; + + let vsock_cfg = VsockConfig::new(self.id.clone()).await?; + + let dev = Device::Vsock(vsock_cfg); + self.add_device(dev).await.context("add vsock device")?; + + self.start_hypervisor(self.timeout_secs).await?; + + Ok(()) + } + + async fn setup_environment(&mut self) -> Result<()> { + // run_dir and vm_path are the same (shared) + self.run_dir = get_sandbox_path(&self.id)?; + self.vm_path = self.run_dir.to_string(); + + create_dir_all(&self.run_dir) + .with_context(|| anyhow!("failed to create sandbox directory {}", self.run_dir))?; + + if !self.jailer_root.is_empty() { + create_dir_all(self.jailer_root.as_str()) + .map_err(|e| anyhow!("Failed to create dir {} err : {:?}", self.jailer_root, e))?; + } + + Ok(()) + } + + pub(crate) async fn start_vm(&mut self, timeout_secs: i32) -> Result<()> { + self.setup_environment().await?; + + self.timeout_secs = timeout_secs; + + self.boot_vm().await?; + + Ok(()) + } + + pub(crate) fn stop_vm(&mut self) -> Result<()> { + block_on(self.cloud_hypervisor_shutdown())?; + + Ok(()) + } + + pub(crate) fn pause_vm(&self) -> Result<()> { + Ok(()) + } + + pub(crate) fn resume_vm(&self) -> Result<()> { + Ok(()) + } + + pub(crate) async fn save_vm(&self) -> Result<()> { + Ok(()) + } + + pub(crate) async fn get_agent_socket(&self) -> Result { + const HYBRID_VSOCK_SCHEME: &str = "hvsock"; + + let vsock_path = get_vsock_path(&self.id)?; + + let uri = format!("{}://{}", HYBRID_VSOCK_SCHEME, vsock_path); + + Ok(uri) + } + + pub(crate) async fn disconnect(&mut self) { + self.state = VmmState::NotReady; + } + + pub(crate) async fn get_thread_ids(&self) -> Result { + Ok(VcpuThreadIds::default()) + } + + pub(crate) async fn cleanup(&self) -> Result<()> { + Ok(()) + } + + pub(crate) async fn get_pids(&self) -> Result> { + Ok(Vec::::new()) + } + + pub(crate) async fn check(&self) -> Result<()> { + Ok(()) + } + + pub(crate) async fn get_jailer_root(&self) -> Result { + let root_path = get_jailer_root(&self.id)?; + + std::fs::create_dir_all(&root_path)?; + + Ok(root_path) + } + + pub(crate) async fn capabilities(&self) -> Result { + let mut caps = Capabilities::default(); + caps.set(CapabilityBits::FsSharingSupport); + Ok(caps) + } +} + +// Log all output from the CH process until a shutdown signal is received. +// When that happens, stop logging and wait for the child process to finish +// before returning. +async fn cloud_hypervisor_log_output(mut child: Child, mut shutdown: Receiver) -> Result<()> { + let stdout = child + .stdout + .as_mut() + .ok_or("failed to get child stdout") + .map_err(|e| anyhow!(e))?; + + let stdout_reader = BufReader::new(stdout); + let mut stdout_lines = stdout_reader.lines(); + + let stderr = child + .stderr + .as_mut() + .ok_or("failed to get child stderr") + .map_err(|e| anyhow!(e))?; + + let stderr_reader = BufReader::new(stderr); + let mut stderr_lines = stderr_reader.lines(); + + loop { + tokio::select! { + _ = shutdown.changed() => { + info!(sl!(), "got shutdown request"); + break; + }, + stderr_line = poll_fn(|cx| Pin::new(&mut stderr_lines).poll_next_line(cx)) => { + if let Ok(line) = stderr_line { + let line = line.ok_or("missing stderr line").map_err(|e| anyhow!(e))?; + + info!(sl!(), "{:?}", line; "stream" => "stderr"); + } + }, + stdout_line = poll_fn(|cx| Pin::new(&mut stdout_lines).poll_next_line(cx)) => { + if let Ok(line) = stdout_line { + let line = line.ok_or("missing stdout line").map_err(|e| anyhow!(e))?; + + info!(sl!(), "{:?}", line; "stream" => "stdout"); + } + }, + }; + } + + // Note that this kills _and_ waits for the process! + child.kill().await?; + + Ok(()) +} diff --git a/src/runtime-rs/crates/hypervisor/src/ch/mod.rs b/src/runtime-rs/crates/hypervisor/src/ch/mod.rs new file mode 100644 index 000000000..d589c18df --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/ch/mod.rs @@ -0,0 +1,163 @@ +// Copyright (c) 2019-2022 Alibaba Cloud +// Copyright (c) 2022 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +use super::HypervisorState; +use crate::{device::Device, Hypervisor, VcpuThreadIds}; +use anyhow::{Context, Result}; +use async_trait::async_trait; +use kata_types::capabilities::Capabilities; +use kata_types::config::hypervisor::Hypervisor as HypervisorConfig; +use persist::sandbox_persist::Persist; +use std::sync::Arc; +use tokio::sync::RwLock; + +// Convenience macro to obtain the scope logger +#[macro_export] +macro_rules! sl { + () => { + slog_scope::logger().new(o!("subsystem" => "cloud-hypervisor")) + }; + } + +mod inner; +mod inner_device; +mod inner_hypervisor; +mod utils; + +use inner::CloudHypervisorInner; + +#[derive(Debug, Default, Clone)] +pub struct CloudHypervisor { + inner: Arc>, +} + +unsafe impl Send for CloudHypervisor {} +unsafe impl Sync for CloudHypervisor {} + +impl CloudHypervisor { + pub fn new() -> Self { + Self { + inner: Arc::new(RwLock::new(CloudHypervisorInner::new())), + } + } + + pub async fn set_hypervisor_config(&mut self, config: HypervisorConfig) { + let mut inner = self.inner.write().await; + inner.set_hypervisor_config(config) + } +} + +#[async_trait] +impl Hypervisor for CloudHypervisor { + async fn prepare_vm(&self, id: &str, netns: Option) -> Result<()> { + let mut inner = self.inner.write().await; + inner.prepare_vm(id, netns).await + } + + async fn start_vm(&self, timeout: i32) -> Result<()> { + let mut inner = self.inner.write().await; + inner.start_vm(timeout).await + } + + async fn stop_vm(&self) -> Result<()> { + let mut inner = self.inner.write().await; + inner.stop_vm() + } + + async fn pause_vm(&self) -> Result<()> { + let inner = self.inner.write().await; + inner.pause_vm() + } + + async fn resume_vm(&self) -> Result<()> { + let inner = self.inner.write().await; + inner.resume_vm() + } + + async fn save_vm(&self) -> Result<()> { + let inner = self.inner.write().await; + inner.save_vm().await + } + + async fn add_device(&self, device: Device) -> Result<()> { + let mut inner = self.inner.write().await; + inner.add_device(device).await + } + + async fn remove_device(&self, device: Device) -> Result<()> { + let mut inner = self.inner.write().await; + inner.remove_device(device).await + } + + async fn get_agent_socket(&self) -> Result { + let inner = self.inner.write().await; + inner.get_agent_socket().await + } + + async fn disconnect(&self) { + let mut inner = self.inner.write().await; + inner.disconnect().await + } + + async fn hypervisor_config(&self) -> HypervisorConfig { + let inner = self.inner.write().await; + inner.hypervisor_config() + } + + async fn get_thread_ids(&self) -> Result { + let inner = self.inner.read().await; + inner.get_thread_ids().await + } + + async fn cleanup(&self) -> Result<()> { + let inner = self.inner.read().await; + inner.cleanup().await + } + + async fn get_pids(&self) -> Result> { + let inner = self.inner.read().await; + inner.get_pids().await + } + + async fn check(&self) -> Result<()> { + let inner = self.inner.read().await; + inner.check().await + } + + async fn get_jailer_root(&self) -> Result { + let inner = self.inner.read().await; + inner.get_jailer_root().await + } + + async fn save_state(&self) -> Result { + self.save().await + } + + async fn capabilities(&self) -> Result { + let inner = self.inner.read().await; + inner.capabilities().await + } +} + +#[async_trait] +impl Persist for CloudHypervisor { + type State = HypervisorState; + type ConstructorArgs = (); + + async fn save(&self) -> Result { + let inner = self.inner.read().await; + inner.save().await.context("save CH hypervisor state") + } + + async fn restore( + hypervisor_args: Self::ConstructorArgs, + hypervisor_state: Self::State, + ) -> Result { + let inner = CloudHypervisorInner::restore(hypervisor_args, hypervisor_state).await?; + Ok(Self { + inner: Arc::new(RwLock::new(inner)), + }) + } +} diff --git a/src/runtime-rs/crates/hypervisor/src/ch/utils.rs b/src/runtime-rs/crates/hypervisor/src/ch/utils.rs new file mode 100644 index 000000000..dcea26aeb --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/ch/utils.rs @@ -0,0 +1,53 @@ +// Copyright (c) 2022-2023 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +use anyhow::Result; +use shim_interface::KATA_PATH; + +// The socket used to connect to CH. This is used for CH API communications. +const CH_API_SOCKET_NAME: &str = "ch-api.sock"; + +// The socket that allows runtime-rs to connect direct through to the Kata +// Containers agent running inside the CH hosted VM. +const CH_VM_SOCKET_NAME: &str = "ch-vm.sock"; + +const CH_JAILER_DIR: &str = "root"; + +// Return the path for a _hypothetical_ sandbox: the path does *not* exist +// yet, and for this reason safe-path cannot be used. +pub fn get_sandbox_path(id: &str) -> Result { + let path = [KATA_PATH, id].join("/"); + + Ok(path) +} + +// Return the path for a _hypothetical_ API socket path: +// the path does *not* exist yet, and for this reason safe-path cannot be +// used. +pub fn get_api_socket_path(id: &str) -> Result { + let sandbox_path = get_sandbox_path(id)?; + + let path = [&sandbox_path, CH_API_SOCKET_NAME].join("/"); + + Ok(path) +} + +// Return the path for a _hypothetical_ sandbox specific VSOCK socket path: +// the path does *not* exist yet, and for this reason safe-path cannot be +// used. +pub fn get_vsock_path(id: &str) -> Result { + let sandbox_path = get_sandbox_path(id)?; + + let path = [&sandbox_path, CH_VM_SOCKET_NAME].join("/"); + + Ok(path) +} + +pub fn get_jailer_root(id: &str) -> Result { + let sandbox_path = get_sandbox_path(id)?; + + let path = [&sandbox_path, CH_JAILER_DIR].join("/"); + + Ok(path) +} diff --git a/src/runtime-rs/crates/hypervisor/src/hypervisor_persist.rs b/src/runtime-rs/crates/hypervisor/src/hypervisor_persist.rs index a04fd24b0..ea870f342 100644 --- a/src/runtime-rs/crates/hypervisor/src/hypervisor_persist.rs +++ b/src/runtime-rs/crates/hypervisor/src/hypervisor_persist.rs @@ -8,7 +8,7 @@ use crate::HypervisorConfig; use serde::{Deserialize, Serialize}; use std::collections::HashSet; -#[derive(Serialize, Deserialize, Default)] +#[derive(Serialize, Deserialize, Default, Clone, Debug)] pub struct HypervisorState { // Type of hypervisor, E.g. dragonball/qemu/firecracker/acrn. pub hypervisor_type: String, diff --git a/src/runtime-rs/crates/hypervisor/src/lib.rs b/src/runtime-rs/crates/hypervisor/src/lib.rs index f2bcc21c7..3c417f195 100644 --- a/src/runtime-rs/crates/hypervisor/src/lib.rs +++ b/src/runtime-rs/crates/hypervisor/src/lib.rs @@ -19,11 +19,17 @@ pub use kernel_param::Param; mod utils; use std::collections::HashMap; +#[cfg(feature = "cloud-hypervisor")] +pub mod ch; + use anyhow::Result; use async_trait::async_trait; use hypervisor_persist::HypervisorState; use kata_types::capabilities::Capabilities; use kata_types::config::hypervisor::Hypervisor as HypervisorConfig; + +pub use kata_types::config::hypervisor::HYPERVISOR_NAME_CH; + // Config which driver to use as vm root dev const VM_ROOTFS_DRIVER_BLK: &str = "virtio-blk"; const VM_ROOTFS_DRIVER_PMEM: &str = "virtio-pmem"; @@ -48,7 +54,7 @@ const SHMEM: &str = "shmem"; pub const HYPERVISOR_DRAGONBALL: &str = "dragonball"; pub const HYPERVISOR_QEMU: &str = "qemu"; -#[derive(PartialEq)] +#[derive(PartialEq, Debug, Clone)] pub(crate) enum VmmState { NotReady, VmmServerReady, @@ -56,7 +62,7 @@ pub(crate) enum VmmState { } // vcpu mapping from vcpu number to thread number -#[derive(Debug)] +#[derive(Debug, Default)] pub struct VcpuThreadIds { pub vcpus: HashMap, } diff --git a/src/runtime-rs/crates/runtimes/virt_container/Cargo.toml b/src/runtime-rs/crates/runtimes/virt_container/Cargo.toml index 0e3fbdc60..6dea5e762 100644 --- a/src/runtime-rs/crates/runtimes/virt_container/Cargo.toml +++ b/src/runtime-rs/crates/runtimes/virt_container/Cargo.toml @@ -35,3 +35,9 @@ oci = { path = "../../../../libs/oci" } persist = { path = "../../persist"} resource = { path = "../../resource" } +[features] +default = [] + +# Feature is not yet complete, so not enabled by default. +# See https://github.com/kata-containers/kata-containers/issues/6264. +cloud-hypervisor = [] diff --git a/src/runtime-rs/crates/runtimes/virt_container/src/lib.rs b/src/runtime-rs/crates/runtimes/virt_container/src/lib.rs index 99489d7b3..e619188a9 100644 --- a/src/runtime-rs/crates/runtimes/virt_container/src/lib.rs +++ b/src/runtime-rs/crates/runtimes/virt_container/src/lib.rs @@ -25,6 +25,12 @@ use hypervisor::{qemu::Qemu, HYPERVISOR_QEMU}; use kata_types::config::{ hypervisor::register_hypervisor_plugin, DragonballConfig, QemuConfig, TomlConfig, }; + +#[cfg(feature = "cloud-hypervisor")] +use hypervisor::ch::CloudHypervisor; +#[cfg(feature = "cloud-hypervisor")] +use kata_types::config::{hypervisor::HYPERVISOR_NAME_CH, CloudHypervisorConfig}; + use resource::ResourceManager; use sandbox::VIRTCONTAINER; use tokio::sync::mpsc::Sender; @@ -39,8 +45,16 @@ impl RuntimeHandler for VirtContainer { // register let dragonball_config = Arc::new(DragonballConfig::new()); register_hypervisor_plugin("dragonball", dragonball_config); + let qemu_config = Arc::new(QemuConfig::new()); register_hypervisor_plugin("qemu", qemu_config); + + #[cfg(feature = "cloud-hypervisor")] + { + let ch_config = Arc::new(CloudHypervisorConfig::new()); + register_hypervisor_plugin(HYPERVISOR_NAME_CH, ch_config); + } + Ok(()) } @@ -118,6 +132,17 @@ async fn new_hypervisor(toml_config: &TomlConfig) -> Result> .await; Ok(Arc::new(hypervisor)) } + + #[cfg(feature = "cloud-hypervisor")] + HYPERVISOR_NAME_CH => { + let mut hypervisor = CloudHypervisor::new(); + + hypervisor + .set_hypervisor_config(hypervisor_config.clone()) + .await; + + Ok(Arc::new(hypervisor)) + } _ => Err(anyhow!("Unsupported hypervisor {}", &hypervisor_name)), } } From bbc733d6c8e6eef04a3cbdbac5cb69532ca3e555 Mon Sep 17 00:00:00 2001 From: "James O. D. Hunt" Date: Fri, 10 Feb 2023 13:56:14 +0000 Subject: [PATCH 38/52] docs: runtime-rs: Add CH status details Add a few details about the current state of the Cloud Hypervisor (CH) runtime-rs external hypervisor implementation with pointers to the appropriate issues. Signed-off-by: James O. D. Hunt --- src/runtime-rs/crates/hypervisor/README.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/runtime-rs/crates/hypervisor/README.md b/src/runtime-rs/crates/hypervisor/README.md index 8d43baa70..dcaa4e57e 100644 --- a/src/runtime-rs/crates/hypervisor/README.md +++ b/src/runtime-rs/crates/hypervisor/README.md @@ -1,4 +1,26 @@ # Multi-vmm support for runtime-rs + +## 0. Status + +External hypervisor support is currently being developed. + +See [the main tracking issue](https://github.com/kata-containers/kata-containers/issues/4634) +for further details. + +### Cloud Hypervisor + +A basic implementation currently exists for Cloud Hypervisor. However, +since it is not yet fully functional, the feature is disabled by +default. When the implementation matures, the feature will be enabled +by default. + +> **Note:** +> +> To enable the feature, follow the instructions on https://github.com/kata-containers/kata-containers/pull/6201. + +See the [Cloud Hypervisor tracking issue](https://github.com/kata-containers/kata-containers/issues/6263) +for further details. + Some key points for supporting multi-vmm in rust runtime. ## 1. Hypervisor Config From d144ded12c6848b06f1dedbc8f91bbf25da76230 Mon Sep 17 00:00:00 2001 From: Archana Shinde Date: Tue, 14 Feb 2023 15:47:44 -0800 Subject: [PATCH 39/52] release: Adapt kata-deploy for 3.1.0-rc0 kata-deploy files must be adapted to a new release. The cases where it happens are when the release goes from -> to: * main -> stable: * kata-deploy-stable / kata-cleanup-stable: are removed * stable -> stable: * kata-deploy / kata-cleanup: bump the release to the new one. There are no changes when doing an alpha release, as the files on the "main" branch always point to the "latest" and "stable" tags. Signed-off-by: Archana Shinde --- .../base/kata-cleanup-stable.yaml | 46 ------------- .../kata-deploy/base/kata-deploy-stable.yaml | 69 ------------------- 2 files changed, 115 deletions(-) delete mode 100644 tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup-stable.yaml delete mode 100644 tools/packaging/kata-deploy/kata-deploy/base/kata-deploy-stable.yaml diff --git a/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup-stable.yaml b/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup-stable.yaml deleted file mode 100644 index f1d9d0a2f..000000000 --- a/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup-stable.yaml +++ /dev/null @@ -1,46 +0,0 @@ ---- -apiVersion: apps/v1 -kind: DaemonSet -metadata: - name: kubelet-kata-cleanup - namespace: kube-system -spec: - selector: - matchLabels: - name: kubelet-kata-cleanup - template: - metadata: - labels: - name: kubelet-kata-cleanup - spec: - serviceAccountName: kata-label-node - nodeSelector: - katacontainers.io/kata-runtime: cleanup - containers: - - name: kube-kata-cleanup - image: quay.io/kata-containers/kata-deploy:stable - imagePullPolicy: Always - command: [ "bash", "-c", "/opt/kata-artifacts/scripts/kata-deploy.sh reset" ] - env: - - name: NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - securityContext: - privileged: false - volumeMounts: - - name: dbus - mountPath: /var/run/dbus - - name: systemd - mountPath: /run/systemd - volumes: - - name: dbus - hostPath: - path: /var/run/dbus - - name: systemd - hostPath: - path: /run/systemd - updateStrategy: - rollingUpdate: - maxUnavailable: 1 - type: RollingUpdate diff --git a/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy-stable.yaml b/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy-stable.yaml deleted file mode 100644 index 346e4c0ee..000000000 --- a/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy-stable.yaml +++ /dev/null @@ -1,69 +0,0 @@ ---- -apiVersion: apps/v1 -kind: DaemonSet -metadata: - name: kata-deploy - namespace: kube-system -spec: - selector: - matchLabels: - name: kata-deploy - template: - metadata: - labels: - name: kata-deploy - spec: - serviceAccountName: kata-label-node - containers: - - name: kube-kata - image: quay.io/kata-containers/kata-deploy:stable - imagePullPolicy: Always - lifecycle: - preStop: - exec: - command: ["bash", "-c", "/opt/kata-artifacts/scripts/kata-deploy.sh cleanup"] - command: [ "bash", "-c", "/opt/kata-artifacts/scripts/kata-deploy.sh install" ] - env: - - name: NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - securityContext: - privileged: false - volumeMounts: - - name: crio-conf - mountPath: /etc/crio/ - - name: containerd-conf - mountPath: /etc/containerd/ - - name: kata-artifacts - mountPath: /opt/kata/ - - name: dbus - mountPath: /var/run/dbus - - name: systemd - mountPath: /run/systemd - - name: local-bin - mountPath: /usr/local/bin/ - volumes: - - name: crio-conf - hostPath: - path: /etc/crio/ - - name: containerd-conf - hostPath: - path: /etc/containerd/ - - name: kata-artifacts - hostPath: - path: /opt/kata/ - type: DirectoryOrCreate - - name: dbus - hostPath: - path: /var/run/dbus - - name: systemd - hostPath: - path: /run/systemd - - name: local-bin - hostPath: - path: /usr/local/bin/ - updateStrategy: - rollingUpdate: - maxUnavailable: 1 - type: RollingUpdate From 5988199adaf8ff0c026bfc0775a0b4c509635108 Mon Sep 17 00:00:00 2001 From: Archana Shinde Date: Tue, 14 Feb 2023 15:47:44 -0800 Subject: [PATCH 40/52] release: Kata Containers 3.1.0-rc0 - kata-deploy: Install protobuf-compiler explicitly in shim-v2 Dockerfile - runtime: tracing: Fix missing ctx return - runtime: add reconnect timeout for vhost user block - SEV: Update ReducedPhysBits - shim-v2/build.sh: Only build runtime-rs for the supported arches - kata-ctl: Expand unit tests for CPU check - runtime: support cgroup v2 metrics marshal guest metrics - Typo: change tabs in comment to spaces - rootfs: support EROFS filesystem - versions: Update runc version - runtime: Improve documentation of appendFDs - Minor cleanups in make file - main | docs: Fix missing critical steps in how-to-hotplug-memory-arm64.md - Action check kernel config version - clh: Enforce API timeout only for vm.boot request - virtiofsd: change cache mod to const - runtime-rs: ignor "no such process" error when delete cgroup for a thread to let it go - kernel: Add console kernel config for s390 - runtime: remove not used shim configurations - improvement: Fix naming conventions for span name and log subsystem - Dragonball: add cpu resize ability - arm64/CI: fix unit test failure on arm64 - CI: Make docker version stick to v20.10 in ubuntu:20.04 for s390x|ppc64le - virtiofsd: fix the build on ppc64le - runtime:fix stat uds path - cni: Update cni plugins version to 1.2.0 - Built-in Sandbox: add more unit tests for dragonball. Part 5 - runtime: Drop QEMU log file support - docs: Add documentation for building agent with seccomp support. - Add kernel-dragonball-experimental to kata-deploy, kata-deploy-test, and the release - runtime-rs: add missing config section for share-fs - runtime: Add hmp for qemu - upcall: add document for upcall - runtime: Start QEMU undaemonized and get logs - docs: Update url link in QAT documentation - versions: update cni plugins version - versions: Upgrade to Cloud Hypervisor v29.0 - runtime: Use consts in `kata-runtime check` - versions: Bump QEMU to v7.2.0 - agent: Eliminate unnecessary metrics - runtime:all APIs are hang in the service.mu - Utility functions for kata-env - versions: Update conmon version - runtime: paas enablevhostuserstore annotation to hypervisor config - runk: Upgrade liboci-cli to v0.0.4 - runtime: use system pagesize for hugepage test - dependency: update cgroups-rs - runtime: Use git rev-parse for the kata-monitor tag - virtcontainers: split out linux-specific bits for mount, factory - Add darwin skeletons - vendor: revendor netlink to get latest - Address issues with the initial vCPU pinning functionality - virtcontainers: Fix misspelling in error message - runtime: add test generated file to .gitignore - runtime: fix up disable_netns handling - docs: add hint of probing loop module - tools: add --locked option for cargo install - runtime-rs: add Single Container support - virtcontainers: tests: Ensure Linux specific tests are just run on Linux - Change cache mode from none to never - tools: Fix indentation for setup aks script - virtcontainers: fs_share: Add Darwin skeleton - virtcontainers: Add a Virtualization.framework skeleton - kata-ctl: remove get_kata_version_by_url function - kata-ctl: fix build error on s390x - virtcontainers: Introduce hypervisor_darwin - runtime: Define Darwin handled signals list - nydus: net-ns handling needs to be only executed on Linux hosts - clh: Ensure it works with Docker / Moby - agent: refactor guest hooks - fix moby prestart hook handling - schedcore: Make buildable on !linux - Built-in Sandbox: add more unit tests for dragonball. Part 4 - runtime-rs: cleanup the run dir of hypervisor when shut down - Feat: implementation of kata-ctl direct-volume operations - Runtime: Clarify mutability of global var - kata-runtime: add rust runtime path for kata-runtime exec - versions: Upgrade to Cloud Hypervisor v28.1 - runtime-rs: add dbs-upcall feature - runtime/Makefile: Get some bits happy on darwin - docs: remove old and misleading instructions for minikube - packaging: fix indents in build-kernel.sh - kernel: adding kmod to do docker env - versions: Update the rust toolchain to 1.66.0 - kata-ctl: skip test if access GitHub.com fail - agent: unset `CC` for cross-build - runtime-rs: enable hugepage - runtime-rs: Clean up mount points shared to guest - kata-ctl: fix checkcpu bug in non-x86 arches d144ded12 release: Adapt kata-deploy for 3.1.0-rc0 8e3863cec kata-deploy: Install protobuf-compiler explicitly in shim-v2 Dockerfile c45391991 runtime: tracing: Fix missing ctx return 4139d68d5 runtime-rs: Include target install in conditional branch ca02c9f51 runtime: add reconnect timeout for vhost user block 2f5bc0f40 kata-ctl: Expand unit tests for CPU check 67b8f0773 SEV: Update ReducedPhysBits bdf20b5d2 rootfs: support EROFS filesystem fff0e50a7 versions: Update runc version ed02c8a05 docs: add guide for building rootfs with EROFS 01765e173 runtime: support cgroup v2 metrics marshal guest metrics 49326fe4e fix(clippy): fix hypervisor clippy checks 94b1d9814 cargo: Update Cargo.lock files f1855594a make: Get rid of verbose output while creating tar c3836010a make: clean up obsolete targets ac64b021a clh: Enforce API timeout only for vm.boot request 56071c6e7 virtiofsd: change cache mod to const 5d37d31ac cgroups: upgrade cgroupfs to 0.3.1 ab59a65c9 runtime-rs: neglect a certain error when delete cgroup 390916b33 runtime: remove not used shim configurations 9794c52c6 improvement: Fix naming conventions for span name and log subsystem f49b89b63 CI: Set docker version to v20.10 in ubuntu:20.04 for s390x|ppc64le 3c24e2340 README: Update Readme under packaging/kernel d73f3a8a2 github-action: Add step to verify kernel config version id updated 59f104c02 runtime: skip unit test that fail regularly on aarch64 b7dd97cac kata-ctl: fix permission deny issue in test_add_remove 57c5e5629 Dragonball: add cpu resize ability 3c48f2202 runtime: Improve documentation of appendFDs 856ab6687 virtiofsd: fix the build on ppc64le f83115a83 docs: Fix missing critical steps in how-to-hotplug-memory-arm64.md e071d9251 Typo: change tabs in comment to spaces 56f0a27fe kernel: Add console kernel config for s390 334c4b8bd runtime: Drop QEMU log file support 3a63e3c1f cni: Update cni plugins version to 1.2.0 510798155 dragonball: Improve test cases dc90c6e30 dragonball: add more unit test for vm c07135535 runtime-rs: Improve s390x error message 4e2db96ef runtime-rs: Don't try to build on Power 8e8c720d5 kata-deploy-push: Ensure we build Dragonball specific kernel 1e531b44d runtime:fix stat uds path 9092c23a2 runtime: Add hmp for qemu b7f4e96ff kata-deploy-test: Ensure we build dragonball specific kernel 063dec37c release: Add the dragonball-experimental kernel 0b3c91d2a kata-deploy: Add kernel-dragonball-experimental target 00dcd900f docs: Add documentation for building agent with seccomp support. 2b779cba0 docs: Update url link in QAT documentation 39fe4a4b6 runtime: Collect QEMU's stderr a5319c6be runtime: Start QEMU undaemonized bf4e3a618 runtime: Launch QEMU with cmd.Start() 8a1723a5c runtime: Pre-establish the QMP connection 8a4f08cb0 govmm: Optionally pass QMP listener to QEMU 219bb8e7d govmm: Optionally start QMP with a pre-configured connection a85d0e465 versions: update cni plugins version 676d02850 versions: Bump QEMU to v7.2.0 861c38b6a versions: Upgrade to Cloud Hypervisor v29.0 ba87e0afe runtime: Use consts in `kata-runtime check` 9f490d16f upcall: add document for upcall 596037e20 versions: Update conmon version 095e8fdef runk: Use the original Kill command instead of the customed it. 0f9e23a3d runk: Upgrade liboci-cli to v0.0.4 69fc8de71 runtime:all APIs are hang in the service.mu 8d4c2cf1b kata-ctl: Allow certain constants to go unused 64c11a66f kata-ctl: Have function to get cpu details to run on specific arch 923cd3fda virtcontainers: split out Linux parts from mount cf1bae352 runtime: paas enablevhostuserstore annotation to hypervisor config 1592a385e dependency: update cgroups-rs 60ff230d8 virtcontainers: Split the factory package into Linux and Darwin bits 76437a972 runtime: Use git rev-parse for the kata-monitor tag a9626682a virtcontainers: resourcecontrol: Add skeleton for Darwin ea06fe3af virtcontainers: Add a Network API skeleton for Darwin 6ee550e9a runtime: vCPUs pinning is sandbox specific, not hypervisor 6199b6917 runtime-rs: change cache mode a33a22ccd runtime-rs: add missing config section for share-fs e3d3b72fa virtcontainers: use resource control for setting CPU affinity f137048be resource-control: add helper function for setting CPU affinity 73216a810 vendor: revendor netlink to get latest fc17d7cc4 virtcontainers: Fix misspelling in error message 12fd6ffc1 runtime: fix up disable_netns handling 64c9114a3 tools: add --locked option for cargo install 7eb43cec1 runtime: add test generated file to .gitignore 8551853cf runtime: use system pagesize for hugepage test 86a82cace runtime: change cache mode from none to never 82c59efd6 runtime-rs: change cache mode from none to never 7b309b578 kata-types: change cache mode from none to never fee4e7c7c docs: change cache mode from none to never 594b57d08 utils: Add utility functions to get cpu and distro details. d33e34361 check: Move PROC_CPUINFO from architecture specific files f8a93a1de tools: Fix indentation for setup aks script 03de5f41b kata-ctl: remove get_kata_version_by_url function 464d4c94d runtime-rs: process single_container 5f9c892e4 kata-types: add single_container support fa9ae9362 virtcontainers: Add a Virtualization.framework skeleton d48b22bb1 virtcontainers: fs_share: add Darwin skeleton fafc7a8b1 virtcontainers: tests: Ensure Linux specific tests are just run on Linux efa4fc0b2 clh: Add hotplug support for network devices 1074d2c1d clh: Make vmAddNetPutRequest capable of doing hotplugs 9ec8a1398 virtcontainers: introduce hypervisor_darwin 8bb68a9f2 vc/network: skip existing endpoints when scanning for new ones c21a8d5ff kata-ctl: fix build error on s390x 3b4420eb8 runtime: Define Darwin handled signals list 24b05a99b schedcore: Make buildable on !linux 3886aad19 nydus: net-ns handling needs to be only executed on Linux hosts e256903af runtime-rs: cleanup the run dir of hypervisor when shut down 937a41346 kata-ctl: add unit tests for volume ops 8451db7c0 kata-ctl: direct-volume: add Add and Remove handlers 2d4b2cf72 runtime-rs: add POST method to shim-client cae78a685 kata-ctl: add constants for direct-volume commands 652021ad9 versions: Upgrade to Cloud Hypervisor v28.1 d08538912 vc: fix up UT for CreateSandbox API change 578a9c25f vc: rescan network endpoints after running prestart hooks cb84b0fb0 katautils: run prestart hooks after starting VM 079462d2e runk: Fix needless_borrow warning 2c24fcf34 runtime-rs: Fix clippy::bool-to-int-with-if warnings 025e78341 runtime-rs: Fix needless_borrow warnings 4fb163d57 runtime-rs: Allow clippy:box_default warnings 20121fcda runtime-rs: Fix unnecessary_cast warnings b95364a14 dragonball: Allow question_mark warning in allocate_device_resources() 0b2f060bf dragonball: Fix unnecessary_cast warnings a545a6593 agent: Allow clippy::question_mark warning in Namespace{} 9ced34dd2 agent: Fix explicit_auto_deref warnings f77220490 agent: Fix needless_borrow warnings 7bcdc9049 rustjail: Fix unnecessary_cast warnings 41d7dbaae rustjail: Fix needless_borrow warnings 2a73e057d kata-types: Fix unnecessary_cast warnings cf9ef1833 kata-types: Fix needless_borrow warnings 126187e81 safe-path: Fix needless_borrow warnings bb78d35db kata-sys-util: Fix "match-like-matches-macro" warning 668e65240 kata-sys-util: Fix unnecessary_cast warnings c1a8d89a7 kata-sys-util: Fix needless_borrow warnings c9c38e6d0 logging: Allow clippy::type-complexity warning ffd6fbb6b logging: Fix needless_borrow warnings 60df30015 protocols: Fix unnecessary_cast warnings 56e7b5d0f runtime/Makefile: Get some bits happy on darwin 0bbeb34b4 protocols: Fix needless_borrow warnings dfea6c7d2 versions: Update the rust toolchain to 1.66.0 86ee24b33 Runtime: Clarify mutability of global var dae667062 kata-runtime: add rust runtime path for kata-runtime exec a2e3715e0 upcall: remove upcall client when stopping vm 31591d791 dragonball: fix unit test failure case about Kvm. 2b02e0a9b dragonball: add more unit test for vcpu manager 85f9094f1 agent: refactor guest hooks 360506225 runtime-rs: add dbs-upcall feature 03a0c9d78 kata-ctl: skip test if access GitHub.com fail 1dcbda3f0 kata-ctl: update Cargo.lock b4b5d8150 docs: remove old and misleading instructions for minikube 0fe24e08b packaging: fix indents in build-kernel.sh 3480780bd kata-ctl: add check framework support for non-x86 1bd533f10 kata-ctl: let check framework arch-agnostic fd77eebd4 runtime-rs: fix the issues mentioned in the code review 0e6920790 runtime-rs: Clean up mount points shared to guest ecb28e2b1 kernel: adding kmod to do docker env 087515a46 agent: unset `CC` for cross-build bf8848f92 agent: Eliminate unnecessary metrics f8a48ab41 docs: add hint of probing loop module afaf17f42 runtime-rs: enable container hugepage fc4a67eec runtime-rs: enable vm hugepage Signed-off-by: Archana Shinde --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index d0e50ca07..a36373c3b 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.1.0-alpha1 +3.1.0-rc0 From ee76b398b32bd7c3788354c9fe2c62b904ed3a9d Mon Sep 17 00:00:00 2001 From: Archana Shinde Date: Tue, 14 Feb 2023 15:47:51 -0800 Subject: [PATCH 41/52] release: Revert kata-deploy changes after 3.1.0-rc0 release As 3.1.0-rc0 has been released, let's switch the kata-deploy / kata-cleanup tags back to "latest", and re-add the kata-deploy-stable and the kata-cleanup-stable files. Signed-off-by: Archana Shinde --- .../base/kata-cleanup-stable.yaml | 46 +++++++++++++ .../kata-deploy/base/kata-deploy-stable.yaml | 69 +++++++++++++++++++ 2 files changed, 115 insertions(+) create mode 100644 tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup-stable.yaml create mode 100644 tools/packaging/kata-deploy/kata-deploy/base/kata-deploy-stable.yaml diff --git a/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup-stable.yaml b/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup-stable.yaml new file mode 100644 index 000000000..f1d9d0a2f --- /dev/null +++ b/tools/packaging/kata-deploy/kata-cleanup/base/kata-cleanup-stable.yaml @@ -0,0 +1,46 @@ +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: kubelet-kata-cleanup + namespace: kube-system +spec: + selector: + matchLabels: + name: kubelet-kata-cleanup + template: + metadata: + labels: + name: kubelet-kata-cleanup + spec: + serviceAccountName: kata-label-node + nodeSelector: + katacontainers.io/kata-runtime: cleanup + containers: + - name: kube-kata-cleanup + image: quay.io/kata-containers/kata-deploy:stable + imagePullPolicy: Always + command: [ "bash", "-c", "/opt/kata-artifacts/scripts/kata-deploy.sh reset" ] + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + securityContext: + privileged: false + volumeMounts: + - name: dbus + mountPath: /var/run/dbus + - name: systemd + mountPath: /run/systemd + volumes: + - name: dbus + hostPath: + path: /var/run/dbus + - name: systemd + hostPath: + path: /run/systemd + updateStrategy: + rollingUpdate: + maxUnavailable: 1 + type: RollingUpdate diff --git a/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy-stable.yaml b/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy-stable.yaml new file mode 100644 index 000000000..346e4c0ee --- /dev/null +++ b/tools/packaging/kata-deploy/kata-deploy/base/kata-deploy-stable.yaml @@ -0,0 +1,69 @@ +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: kata-deploy + namespace: kube-system +spec: + selector: + matchLabels: + name: kata-deploy + template: + metadata: + labels: + name: kata-deploy + spec: + serviceAccountName: kata-label-node + containers: + - name: kube-kata + image: quay.io/kata-containers/kata-deploy:stable + imagePullPolicy: Always + lifecycle: + preStop: + exec: + command: ["bash", "-c", "/opt/kata-artifacts/scripts/kata-deploy.sh cleanup"] + command: [ "bash", "-c", "/opt/kata-artifacts/scripts/kata-deploy.sh install" ] + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + securityContext: + privileged: false + volumeMounts: + - name: crio-conf + mountPath: /etc/crio/ + - name: containerd-conf + mountPath: /etc/containerd/ + - name: kata-artifacts + mountPath: /opt/kata/ + - name: dbus + mountPath: /var/run/dbus + - name: systemd + mountPath: /run/systemd + - name: local-bin + mountPath: /usr/local/bin/ + volumes: + - name: crio-conf + hostPath: + path: /etc/crio/ + - name: containerd-conf + hostPath: + path: /etc/containerd/ + - name: kata-artifacts + hostPath: + path: /opt/kata/ + type: DirectoryOrCreate + - name: dbus + hostPath: + path: /var/run/dbus + - name: systemd + hostPath: + path: /run/systemd + - name: local-bin + hostPath: + path: /usr/local/bin/ + updateStrategy: + rollingUpdate: + maxUnavailable: 1 + type: RollingUpdate From 9304889330268020e65af123a7ebf45118a304d0 Mon Sep 17 00:00:00 2001 From: Willem Dendauw Date: Tue, 14 Feb 2023 15:47:23 -0800 Subject: [PATCH 42/52] docs: Update how-to-use-kata-containers-with-firecracker.md Removed the `` around containerd, because when you execute this as a script it runs the containerd command within the script, which it should not do. Fixes #4217 Signed-off-by: Willem Dendauw --- docs/how-to/how-to-use-kata-containers-with-firecracker.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/how-to/how-to-use-kata-containers-with-firecracker.md b/docs/how-to/how-to-use-kata-containers-with-firecracker.md index d2bd362c1..377bd7d11 100644 --- a/docs/how-to/how-to-use-kata-containers-with-firecracker.md +++ b/docs/how-to/how-to-use-kata-containers-with-firecracker.md @@ -104,7 +104,7 @@ sudo dmsetup create "${POOL_NAME}" \ cat << EOF # -# Add this to your config.toml configuration file and restart `containerd` daemon +# Add this to your config.toml configuration file and restart containerd daemon # [plugins] [plugins.devmapper] From a68c5004f859cc4f83d3523a90ea89680cc5b3e8 Mon Sep 17 00:00:00 2001 From: Hyounggyu Choi Date: Mon, 21 Nov 2022 15:48:28 +0100 Subject: [PATCH 43/52] packaging/shim-v2: Only change the config if the file exists MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's not try to sed a file that doesn't exist, which may be the case depending on the architecture we're building the shim-v2 for. This is a partial-forward port of f24c47ea47e39bdc28bb740d5281eba21fa5727d. Fixes: #6293 Signed-off-by: Hyounggyu Choi Signed-off-by: Fabiano Fidêncio --- tools/packaging/static-build/shim-v2/build.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/packaging/static-build/shim-v2/build.sh b/tools/packaging/static-build/shim-v2/build.sh index 4d9d01da1..6cbb0a41b 100755 --- a/tools/packaging/static-build/shim-v2/build.sh +++ b/tools/packaging/static-build/shim-v2/build.sh @@ -12,6 +12,7 @@ script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" readonly repo_root_dir="$(cd "${script_dir}/../../../.." && pwd)" readonly kernel_builder="${repo_root_dir}/tools/packaging/kernel/build-kernel.sh" +VMM_CONFIGS="qemu fc" GO_VERSION=${GO_VERSION} RUST_VERSION=${RUST_VERSION} @@ -47,8 +48,12 @@ sudo docker run --rm -i -v "${repo_root_dir}:${repo_root_dir}" \ "${container_image}" \ bash -c "git config --global --add safe.directory ${repo_root_dir} && make PREFIX="${PREFIX}" DESTDIR="${DESTDIR}" install" -sudo sed -i -e '/^initrd =/d' "${DESTDIR}/${PREFIX}/share/defaults/kata-containers/configuration-qemu.toml" -sudo sed -i -e '/^initrd =/d' "${DESTDIR}/${PREFIX}/share/defaults/kata-containers/configuration-fc.toml" +for vmm in ${VMM_CONFIGS}; do + config_file="${DESTDIR}/${PREFIX}/share/defaults/kata-containers/configuration-${vmm}.toml" + if [ -f ${config_file} ]; then + sudo sed -i -e '/^initrd =/d' ${config_file} + fi +done pushd "${DESTDIR}/${PREFIX}/share/defaults/kata-containers" sudo ln -sf "configuration-qemu.toml" configuration.toml From 44aaec9020f91a2eac11b28b45262cff6bd7a100 Mon Sep 17 00:00:00 2001 From: jongwooo Date: Sat, 11 Feb 2023 14:44:31 +0900 Subject: [PATCH 44/52] github-action: Replace deprecated command with environment file In workflow, `set-output` command is deprecated and will be disabled soon. This commit replaces the deprecated `set-output` command with putting a value in the environment file `$GITHUB_OUTPUT`. Fixes #6266 Signed-off-by: jongwooo --- .github/workflows/add-backport-label.yaml | 8 ++++---- .github/workflows/kata-deploy-test.yaml | 8 ++++---- .github/workflows/release.yaml | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/add-backport-label.yaml b/.github/workflows/add-backport-label.yaml index 5774bb805..3df518b54 100644 --- a/.github/workflows/add-backport-label.yaml +++ b/.github/workflows/add-backport-label.yaml @@ -62,15 +62,15 @@ jobs: has_backport_needed_label=${{ contains(github.event.pull_request.labels.*.name, 'needs-backport') }} has_no_backport_needed_label=${{ contains(github.event.pull_request.labels.*.name, 'no-backport-needed') }} - echo "::set-output name=add_backport_label::false" + echo "add_backport_label=false" >> $GITHUB_OUTPUT if [ $has_backport_needed_label = true ] || [ $has_bug = true ]; then if [[ $has_no_backport_needed_label = false ]]; then - echo "::set-output name=add_backport_label::true" + echo "add_backport_label=true" >> $GITHUB_OUTPUT fi fi # Do not spam comment, only if auto-backport label is going to be newly added. - echo "::set-output name=auto_backport_added::$CONTAINS_AUTO_BACKPORT" + echo "auto_backport_added=$CONTAINS_AUTO_BACKPORT" >> $GITHUB_OUTPUT - name: Add comment if: ${{ !contains(github.event.pull_request.labels.*.name, 'force-skip-ci') && steps.add_label.outputs.add_backport_label == 'true' && steps.add_label.outputs.auto_backport_added == 'false' }} @@ -97,4 +97,4 @@ jobs: uses: andymckay/labeler@e6c4322d0397f3240f0e7e30a33b5c5df2d39e90 with: add-labels: "auto-backport" - repo-token: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file + repo-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/kata-deploy-test.yaml b/.github/workflows/kata-deploy-test.yaml index 5a924c739..c19c7bb4a 100644 --- a/.github/workflows/kata-deploy-test.yaml +++ b/.github/workflows/kata-deploy-test.yaml @@ -67,7 +67,7 @@ jobs: ref="refs/pull/${{ github.event.inputs.pr }}/merge" fi echo "reference for PR: " ${ref} "event:" ${{ github.event_name }} - echo "##[set-output name=pr-ref;]${ref}" + echo "pr-ref=${ref}" >> $GITHUB_OUTPUT - uses: actions/checkout@v2 with: ref: ${{ steps.get-PR-ref.outputs.pr-ref }} @@ -107,7 +107,7 @@ jobs: ref="refs/pull/${{ github.event.inputs.pr }}/merge" fi echo "reference for PR: " ${ref} "event:" ${{ github.event_name }} - echo "##[set-output name=pr-ref;]${ref}" + echo "pr-ref=${ref}" >> $GITHUB_OUTPUT - uses: actions/checkout@v2 with: ref: ${{ steps.get-PR-ref.outputs.pr-ref }} @@ -138,7 +138,7 @@ jobs: ref="refs/pull/${{ github.event.inputs.pr }}/merge" fi echo "reference for PR: " ${ref} "event:" ${{ github.event_name }} - echo "##[set-output name=pr-ref;]${ref}" + echo "pr-ref=${ref}" >> $GITHUB_OUTPUT - uses: actions/checkout@v2 with: ref: ${{ steps.get-PR-ref.outputs.pr-ref }} @@ -156,7 +156,7 @@ jobs: docker push quay.io/kata-containers/kata-deploy-ci:$PR_SHA mkdir -p packaging/kata-deploy ln -s $GITHUB_WORKSPACE/tools/packaging/kata-deploy/action packaging/kata-deploy/action - echo "::set-output name=PKG_SHA::${PR_SHA}" + echo "PKG_SHA=${PR_SHA}" >> $GITHUB_OUTPUT - name: test-kata-deploy-ci-in-aks uses: ./packaging/kata-deploy/action with: diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index bbb95c53d..dd48494f1 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -89,7 +89,7 @@ jobs: docker push quay.io/kata-containers/kata-deploy-ci:$pkg_sha mkdir -p packaging/kata-deploy ln -s $GITHUB_WORKSPACE/tools/packaging/kata-deploy/action packaging/kata-deploy/action - echo "::set-output name=PKG_SHA::${pkg_sha}" + echo "PKG_SHA=${pkg_sha}" >> $GITHUB_OUTPUT - name: test-kata-deploy-ci-in-aks uses: ./packaging/kata-deploy/action with: From 32ebe1895bc2daffacf1e0654d33ce418c7ec068 Mon Sep 17 00:00:00 2001 From: Fupan Li Date: Thu, 16 Feb 2023 11:23:11 +0800 Subject: [PATCH 45/52] agent: fix the issue of creating the dns file We should make sure the dns's source file's parent directory exist, otherwise, it would failed to create the file directly. Signed-off-by: Fupan Li --- src/agent/src/network.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/agent/src/network.rs b/src/agent/src/network.rs index 451b5064d..2e617dc3b 100644 --- a/src/agent/src/network.rs +++ b/src/agent/src/network.rs @@ -7,6 +7,7 @@ use anyhow::{anyhow, Result}; use nix::mount::{self, MsFlags}; use slog::Logger; use std::fs; +use std::path; const KATA_GUEST_SANDBOX_DNS_FILE: &str = "/run/kata-containers/sandbox/resolv.conf"; const GUEST_DNS_FILE: &str = "/etc/resolv.conf"; @@ -64,6 +65,12 @@ fn do_setup_guest_dns(logger: Logger, dns_list: Vec, src: &str, dst: &st .map(|x| x.trim()) .collect::>() .join("\n"); + + // make sure the src file's parent path exist. + let file_path = path::Path::new(src); + if let Some(p) = file_path.parent() { + fs::create_dir_all(p)?; + } fs::write(src, content)?; // bind mount to /etc/resolv.conf From 04e930073c706fb001e3700fbc460dcd5d32ae0d Mon Sep 17 00:00:00 2001 From: Fupan Li Date: Wed, 15 Feb 2023 11:19:41 +0800 Subject: [PATCH 46/52] sandbox: set the dns for the sandbox The rust agent had supported to set the guest dns server in start sandbox request, thus add the dns in the runtime side. Fixes:#6286 Signed-off-by: Fupan Li --- src/libs/kata-types/src/config/default.rs | 1 + .../crates/runtimes/common/src/sandbox.rs | 2 +- src/runtime-rs/crates/runtimes/src/manager.rs | 19 ++++++++++++++++--- .../runtimes/virt_container/src/sandbox.rs | 4 ++-- 4 files changed, 20 insertions(+), 6 deletions(-) diff --git a/src/libs/kata-types/src/config/default.rs b/src/libs/kata-types/src/config/default.rs index aa32b59fc..d2d922715 100644 --- a/src/libs/kata-types/src/config/default.rs +++ b/src/libs/kata-types/src/config/default.rs @@ -42,6 +42,7 @@ pub const MIN_SHARED_9PFS_SIZE_MB: u32 = 4 * 1024; pub const MAX_SHARED_9PFS_SIZE_MB: u32 = 8 * 1024 * 1024; pub const DEFAULT_GUEST_HOOK_PATH: &str = "/opt/kata/hooks"; +pub const DEFAULT_GUEST_DNS_FILE: &str = "/etc/resolv.conf"; pub const DEFAULT_GUEST_VCPUS: u32 = 1; diff --git a/src/runtime-rs/crates/runtimes/common/src/sandbox.rs b/src/runtime-rs/crates/runtimes/common/src/sandbox.rs index 5ac028ddb..a08d56d07 100644 --- a/src/runtime-rs/crates/runtimes/common/src/sandbox.rs +++ b/src/runtime-rs/crates/runtimes/common/src/sandbox.rs @@ -9,7 +9,7 @@ use async_trait::async_trait; #[async_trait] pub trait Sandbox: Send + Sync { - async fn start(&self, netns: Option) -> Result<()>; + async fn start(&self, netns: Option, dns: Vec) -> Result<()>; async fn stop(&self) -> Result<()>; async fn cleanup(&self, container_id: &str) -> Result<()>; async fn shutdown(&self) -> Result<()>; diff --git a/src/runtime-rs/crates/runtimes/src/manager.rs b/src/runtime-rs/crates/runtimes/src/manager.rs index f3163a3dd..0165184ad 100644 --- a/src/runtime-rs/crates/runtimes/src/manager.rs +++ b/src/runtime-rs/crates/runtimes/src/manager.rs @@ -15,11 +15,14 @@ use common::{ RuntimeHandler, RuntimeInstance, Sandbox, }; use hypervisor::Param; -use kata_types::{annotations::Annotation, config::TomlConfig}; +use kata_types::{ + annotations::Annotation, config::default::DEFAULT_GUEST_DNS_FILE, config::TomlConfig, +}; #[cfg(feature = "linux")] use linux_container::LinuxContainer; use persist::sandbox_persist::Persist; use shim_interface::shim_mgmt::ERR_NO_SHIM_SERVER; +use tokio::fs; use tokio::sync::{mpsc::Sender, RwLock}; #[cfg(feature = "virt")] use virt_container::{ @@ -48,6 +51,7 @@ impl RuntimeHandlerManagerInner { async fn init_runtime_handler( &mut self, netns: Option, + dns: Vec, config: Arc, ) -> Result<()> { info!(sl!(), "new runtime handler {}", &config.runtime.name); @@ -70,7 +74,7 @@ impl RuntimeHandlerManagerInner { // start sandbox runtime_instance .sandbox - .start(netns) + .start(netns, dns) .await .context("start sandbox")?; self.runtime_instance = Some(Arc::new(runtime_instance)); @@ -83,6 +87,8 @@ impl RuntimeHandlerManagerInner { return Ok(()); } + let mut dns: Vec = vec![]; + #[cfg(feature = "linux")] LinuxContainer::init().context("init linux container")?; #[cfg(feature = "wasm")] @@ -107,8 +113,15 @@ impl RuntimeHandlerManagerInner { None }; + for m in &spec.mounts { + if m.destination == DEFAULT_GUEST_DNS_FILE { + let contents = fs::read_to_string(&m.source).await?; + dns = contents.split('\n').map(|e| e.to_string()).collect(); + } + } + let config = load_config(spec, options).context("load config")?; - self.init_runtime_handler(netns, Arc::new(config)) + self.init_runtime_handler(netns, dns, Arc::new(config)) .await .context("init runtime handler")?; diff --git a/src/runtime-rs/crates/runtimes/virt_container/src/sandbox.rs b/src/runtime-rs/crates/runtimes/virt_container/src/sandbox.rs index 469d5ea42..5ba122efe 100644 --- a/src/runtime-rs/crates/runtimes/virt_container/src/sandbox.rs +++ b/src/runtime-rs/crates/runtimes/virt_container/src/sandbox.rs @@ -123,7 +123,7 @@ impl VirtSandbox { #[async_trait] impl Sandbox for VirtSandbox { - async fn start(&self, netns: Option) -> Result<()> { + async fn start(&self, netns: Option, dns: Vec) -> Result<()> { let id = &self.sid; // if sandbox running, return @@ -170,7 +170,7 @@ impl Sandbox for VirtSandbox { let kernel_modules = KernelModule::set_kernel_modules(agent_config.kernel_modules)?; let req = agent::CreateSandboxRequest { hostname: "".to_string(), - dns: vec![], + dns, storages: self .resource_manager .get_storage_for_sandbox() From 07802a19dc5475fad17fc29cd87b1ee587d83a8f Mon Sep 17 00:00:00 2001 From: Yushuo Date: Thu, 9 Feb 2023 17:05:06 +0800 Subject: [PATCH 47/52] runtime-rs: handle sys_dir bind volume For some cases, users will mount system directories as bind volume. We should not bind mount these kind of directories in the host as it does not make sense. Fixes: #6299 Signed-off-by: Yushuo --- .../crates/resource/src/volume/mod.rs | 1 + .../resource/src/volume/share_fs_volume.rs | 37 +++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/src/runtime-rs/crates/resource/src/volume/mod.rs b/src/runtime-rs/crates/resource/src/volume/mod.rs index 7a603c601..2868ddee3 100644 --- a/src/runtime-rs/crates/resource/src/volume/mod.rs +++ b/src/runtime-rs/crates/resource/src/volume/mod.rs @@ -20,6 +20,7 @@ use crate::share_fs::ShareFs; use self::hugepage::{get_huge_page_limits_map, get_huge_page_option}; const BIND: &str = "bind"; + #[async_trait] pub trait Volume: Send + Sync { fn get_volume_mount(&self) -> Result>; diff --git a/src/runtime-rs/crates/resource/src/volume/share_fs_volume.rs b/src/runtime-rs/crates/resource/src/volume/share_fs_volume.rs index 95bc2edfb..f3f70424d 100644 --- a/src/runtime-rs/crates/resource/src/volume/share_fs_volume.rs +++ b/src/runtime-rs/crates/resource/src/volume/share_fs_volume.rs @@ -17,6 +17,8 @@ use super::Volume; use crate::share_fs::{MountedInfo, ShareFs, ShareFsVolumeConfig}; use kata_types::mount; +const SYS_MOUNT_PREFIX: [&str; 2] = ["/proc", "/sys"]; + // copy file to container's rootfs if filesystem sharing is not supported, otherwise // bind mount it in the shared directory. // Ignore /dev, directories and all other device files. We handle @@ -229,6 +231,7 @@ impl Volume for ShareFsVolume { pub(crate) fn is_share_fs_volume(m: &oci::Mount) -> bool { (m.r#type == "bind" || m.r#type == mount::KATA_EPHEMERAL_VOLUME_TYPE) && !is_host_device(&m.destination) + && !is_system_mount(&m.source) } fn is_host_device(dest: &str) -> bool { @@ -252,6 +255,20 @@ fn is_host_device(dest: &str) -> bool { false } +// Skip mounting certain system paths("/sys/*", "/proc/*") +// from source on the host side into the container as it does not +// make sense to do so. +// Agent will support this kind of bind mount. +fn is_system_mount(src: &str) -> bool { + for p in SYS_MOUNT_PREFIX { + let sub_dir_p = format!("{}/", p); + if src == p || src.contains(sub_dir_p.as_str()) { + return true; + } + } + false +} + // Note, don't generate random name, attaching rafs depends on the predictable name. pub fn generate_mount_path(id: &str, file_name: &str) -> String { let mut nid = String::from(id); @@ -265,3 +282,23 @@ pub fn generate_mount_path(id: &str, file_name: &str) -> String { format!("{}-{}-{}", nid, uid, file_name) } + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_is_system_mount() { + let sys_dir = "/sys"; + let proc_dir = "/proc"; + let sys_sub_dir = "/sys/fs/cgroup"; + let proc_sub_dir = "/proc/cgroups"; + let not_sys_dir = "/root"; + + assert!(is_system_mount(sys_dir)); + assert!(is_system_mount(proc_dir)); + assert!(is_system_mount(sys_sub_dir)); + assert!(is_system_mount(proc_sub_dir)); + assert!(!is_system_mount(not_sys_dir)); + } +} From 47c058599a3980e3d4dd86b1282561f86646dc55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 16 Feb 2023 09:12:05 +0100 Subject: [PATCH 48/52] packaging/shim-v2: Install the target depending on the arch/libc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the `install_go_rust.sh` file we're adding a x86_64-unknown-linux-musl target unconditionally. That should be, instead, based in the ARCH of the host and the appropriate LIBC to be used with that host. Signed-off-by: Fabiano Fidêncio --- .../static-build/shim-v2/install_go_rust.sh | 40 +++++++++++++------ 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/tools/packaging/static-build/shim-v2/install_go_rust.sh b/tools/packaging/static-build/shim-v2/install_go_rust.sh index db192f673..502d5f085 100755 --- a/tools/packaging/static-build/shim-v2/install_go_rust.sh +++ b/tools/packaging/static-build/shim-v2/install_go_rust.sh @@ -50,12 +50,37 @@ EOF trap finish EXIT +go_version=${1:-} rust_version=${2:-} + ARCH=${ARCH:-$(uname -m)} -LIBC=${LIBC:-musl} +case "${ARCH}" in + aarch64) + goarch=arm64 + LIBC=musl + ;; + ppc64le) + goarch=${ARCH} + ARCH=powerpc64le + LIBC=gnu + ;; + s390x) + goarch=${ARCH} + LIBC=gnu + ;; + x86_64) + goarch=amd64 + LIBC=musl + ;; + *) + echo "unsupported architecture $(uname -m)" + exit 1 + ;; +esac + curl --proto '=https' --tlsv1.2 https://sh.rustup.rs -sSLf | sh -s -- -y --default-toolchain ${rust_version} -t ${ARCH}-unknown-linux-${LIBC} source /root/.cargo/env -rustup target add x86_64-unknown-linux-musl +rustup target add ${ARCH}-unknown-linux-${LIBC} pushd "${tmp_dir}" @@ -70,9 +95,6 @@ done shift $(( $OPTIND - 1 )) - -go_version=${1:-} - if [ -z "$go_version" ];then echo "Missing go" usage 1 @@ -90,14 +112,6 @@ if command -v go; then fi fi -case "$(uname -m)" in - aarch64) goarch="arm64";; - ppc64le) goarch="ppc64le";; - x86_64) goarch="amd64";; - s390x) goarch="s390x";; - *) echo "unsupported architecture: $(uname -m)"; exit 1;; -esac - info "Download go version ${go_version}" kernel_name=$(uname -s) curl -OL "https://storage.googleapis.com/golang/go${go_version}.${kernel_name,,}-${goarch}.tar.gz" From be40683bc592f2373e3c282225e6605db8e202ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 16 Feb 2023 15:47:25 +0100 Subject: [PATCH 49/52] runtime-rs: Add a generic powerpc64le-options.mk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's a check in the runtime-rs Makefile that basically checks whether the `arch/$arch-options.mk` exists or not and, if it doesn't, the build is just aborted. With this in mind, let's create a generic powerpc64le-options.mk file and not bail when building for this architecture. Fixes: #6142 Signed-off-by: Fabiano Fidêncio --- src/runtime-rs/arch/powerpc64le-options.mk | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 src/runtime-rs/arch/powerpc64le-options.mk diff --git a/src/runtime-rs/arch/powerpc64le-options.mk b/src/runtime-rs/arch/powerpc64le-options.mk new file mode 100644 index 000000000..0a974680e --- /dev/null +++ b/src/runtime-rs/arch/powerpc64le-options.mk @@ -0,0 +1,15 @@ +# Copyright (c) 2019-2022 Alibaba Cloud +# Copyright (c) 2019-2022 Ant Group +# +# SPDX-License-Identifier: Apache-2.0 +# + +MACHINETYPE := pseries +KERNELPARAMS := +MACHINEACCELERATORS := "cap-cfpc=broken,cap-sbbc=broken,cap-ibs=broken,cap-large-decr=off,cap-ccf-assist=off" +CPUFEATURES := pmu=off + +QEMUCMD := qemu-system-ppc64 + +# dragonball binary name +DBCMD := dragonball From a161d119208ef07c877ad6894bfd218fb2e8a49d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 16 Feb 2023 20:30:40 +0100 Subject: [PATCH 50/52] versions: Use ubuntu as the default distro for the rootfs-image MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently ubuntu is already the default distro for all the architectures but x86_64, which uses clearlinux. However, our CI does *not* test the clearlinux image we ship. Taking a look at our CI code [0], we've been using ubuntu as base for the tests for a few years already, if not forever. The minimum we can do is to switch to distributing ubuntu, as the tested rootfs-image, and then decide later on whether we should switch back to clearlinux (once we switch our CI to using that, and make sure all tests will be green), or if we move to slimmer distro, such as alpine. [0]: https://github.com/kata-containers/tests/blob/0a39dd1a01dc135e76f874b9475a33c5f54afcca/.ci/install_kata_image.sh#L44 Fixes: #6303 Signed-off-by: Fabiano Fidêncio --- versions.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/versions.yaml b/versions.yaml index 5f74ee8ba..f4188ad76 100644 --- a/versions.yaml +++ b/versions.yaml @@ -129,7 +129,7 @@ assets: name: "ubuntu" version: "latest" x86_64: - name: &default-image-name "clearlinux" + name: &default-image-name "ubuntu" version: "latest" meta: image-type: *default-image-name From 619ef544525d57536c98129bb12c6456255d58ea Mon Sep 17 00:00:00 2001 From: Archana Shinde Date: Wed, 15 Feb 2023 16:55:41 -0800 Subject: [PATCH 51/52] docs: Change the order of release step When a new stable branch is created, it is necessary to change the references in the tests repo from main to the new stable branch. However this step needs to be performed after the repos have been tagged as the `tags_repos.sh` script is the one that creates the new branch. Clarify this in the documentation and move the step to change branch references in test repo after repos have been tagged. Fixes: #1824 Signed-off-by: Archana Shinde --- docs/Release-Process.md | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/docs/Release-Process.md b/docs/Release-Process.md index 7dcfb84a3..962463c43 100644 --- a/docs/Release-Process.md +++ b/docs/Release-Process.md @@ -28,23 +28,6 @@ $ ./update-repository-version.sh -p "$NEW_VERSION" "$BRANCH" ``` -### Point tests repository to stable branch - - If you create a new stable branch, i.e. if your release changes a major or minor version number (not a patch release), then - you should modify the `tests` repository to point to that newly created stable branch and not the `main` branch. - The objective is that changes in the CI on the main branch will not impact the stable branch. - - In the test directory, change references the main branch in: - * `README.md` - * `versions.yaml` - * `cmd/github-labels/labels.yaml.in` - * `cmd/pmemctl/pmemctl.sh` - * `.ci/lib.sh` - * `.ci/static-checks.sh` - - See the commits in [the corresponding PR for stable-2.1](https://github.com/kata-containers/tests/pull/3504) for an example of the changes. - - ### Merge all bump version Pull requests - The above step will create a GitHub pull request in the Kata projects. Trigger the CI using `/test` command on each bump Pull request. @@ -63,6 +46,24 @@ $ ./tag_repos.sh -p -b "$BRANCH" tag ``` +### Point tests repository to stable branch + + If your release changes a major or minor version number(not a patch release), then the above + `./tag_repos.sh` script will create a new stable branch in all the repositories in addition to tagging them. + This happens when you are making the first `rc` release for a new major or minor version in Kata. + In this case, you should modify the `tests` repository to point to the newly created stable branch and not the `main` branch. + The objective is that changes in the CI on the main branch will not impact the stable branch. + + In the test directory, change references of the `main` branch to the new stable branch in: + * `README.md` + * `versions.yaml` + * `cmd/github-labels/labels.yaml.in` + * `cmd/pmemctl/pmemctl.sh` + * `.ci/lib.sh` + * `.ci/static-checks.sh` + + See the commits in [the corresponding PR for stable-2.1](https://github.com/kata-containers/tests/pull/3504) for an example of the changes. + ### Check Git-hub Actions We make use of [GitHub actions](https://github.com/features/actions) in this [file](../.github/workflows/release.yaml) in the `kata-containers/kata-containers` repository to build and upload release artifacts. This action is auto triggered with the above step when a new tag is pushed to the `kata-containers/kata-containers` repository. From 95e3364493bdadee05ce5a3175dc453f4131fa42 Mon Sep 17 00:00:00 2001 From: Tim Zhang Date: Fri, 17 Feb 2023 11:45:56 +0800 Subject: [PATCH 52/52] runtime-rs: remove unnecessary Send/Sync trait implement Send and Sync are automatically derived traits, if a type is composed entirely of Send or Sync types, then it is Send or Sync. Almost all primitives are Send and Sync, so we don't need to implement them manually most of the time. Fixes: #6307 Signed-off-by: Tim Zhang --- src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs | 1 - src/runtime-rs/crates/hypervisor/src/dragonball/mod.rs | 2 -- src/runtime-rs/crates/runtimes/linux_container/src/lib.rs | 2 -- src/runtime-rs/crates/runtimes/src/manager.rs | 2 -- .../runtimes/virt_container/src/container_manager/manager.rs | 2 -- src/runtime-rs/crates/runtimes/virt_container/src/lib.rs | 2 -- src/runtime-rs/crates/runtimes/virt_container/src/sandbox.rs | 2 -- src/runtime-rs/crates/runtimes/wasm_container/src/lib.rs | 2 -- 8 files changed, 15 deletions(-) diff --git a/src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs b/src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs index 99a38b306..849ec4a2b 100644 --- a/src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs +++ b/src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs @@ -27,7 +27,6 @@ use std::{collections::HashSet, fs::create_dir_all, path::PathBuf}; const DRAGONBALL_KERNEL: &str = "vmlinux"; const DRAGONBALL_ROOT_FS: &str = "rootfs"; -unsafe impl Send for DragonballInner {} unsafe impl Sync for DragonballInner {} pub struct DragonballInner { /// sandbox id diff --git a/src/runtime-rs/crates/hypervisor/src/dragonball/mod.rs b/src/runtime-rs/crates/hypervisor/src/dragonball/mod.rs index 37ffd69b9..2886043c8 100644 --- a/src/runtime-rs/crates/hypervisor/src/dragonball/mod.rs +++ b/src/runtime-rs/crates/hypervisor/src/dragonball/mod.rs @@ -22,8 +22,6 @@ use tokio::sync::RwLock; use crate::{device::Device, Hypervisor, VcpuThreadIds}; -unsafe impl Send for Dragonball {} -unsafe impl Sync for Dragonball {} pub struct Dragonball { inner: Arc>, } diff --git a/src/runtime-rs/crates/runtimes/linux_container/src/lib.rs b/src/runtime-rs/crates/runtimes/linux_container/src/lib.rs index 582b4e961..406ccb0b6 100644 --- a/src/runtime-rs/crates/runtimes/linux_container/src/lib.rs +++ b/src/runtime-rs/crates/runtimes/linux_container/src/lib.rs @@ -11,8 +11,6 @@ use common::{message::Message, RuntimeHandler, RuntimeInstance}; use kata_types::config::TomlConfig; use tokio::sync::mpsc::Sender; -unsafe impl Send for LinuxContainer {} -unsafe impl Sync for LinuxContainer {} pub struct LinuxContainer {} #[async_trait] diff --git a/src/runtime-rs/crates/runtimes/src/manager.rs b/src/runtime-rs/crates/runtimes/src/manager.rs index 0165184ad..1fba4e522 100644 --- a/src/runtime-rs/crates/runtimes/src/manager.rs +++ b/src/runtime-rs/crates/runtimes/src/manager.rs @@ -145,8 +145,6 @@ impl RuntimeHandlerManagerInner { } } -unsafe impl Send for RuntimeHandlerManager {} -unsafe impl Sync for RuntimeHandlerManager {} pub struct RuntimeHandlerManager { inner: Arc>, } diff --git a/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/manager.rs b/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/manager.rs index 1358db5b2..ba73c17d5 100644 --- a/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/manager.rs +++ b/src/runtime-rs/crates/runtimes/virt_container/src/container_manager/manager.rs @@ -25,8 +25,6 @@ use tokio::sync::RwLock; use super::{logger_with_process, Container}; -unsafe impl Send for VirtContainerManager {} -unsafe impl Sync for VirtContainerManager {} pub struct VirtContainerManager { sid: String, pid: u32, diff --git a/src/runtime-rs/crates/runtimes/virt_container/src/lib.rs b/src/runtime-rs/crates/runtimes/virt_container/src/lib.rs index e619188a9..b73caa849 100644 --- a/src/runtime-rs/crates/runtimes/virt_container/src/lib.rs +++ b/src/runtime-rs/crates/runtimes/virt_container/src/lib.rs @@ -35,8 +35,6 @@ use resource::ResourceManager; use sandbox::VIRTCONTAINER; use tokio::sync::mpsc::Sender; -unsafe impl Send for VirtContainer {} -unsafe impl Sync for VirtContainer {} pub struct VirtContainer {} #[async_trait] diff --git a/src/runtime-rs/crates/runtimes/virt_container/src/sandbox.rs b/src/runtime-rs/crates/runtimes/virt_container/src/sandbox.rs index 5ba122efe..2f9a9c5f9 100644 --- a/src/runtime-rs/crates/runtimes/virt_container/src/sandbox.rs +++ b/src/runtime-rs/crates/runtimes/virt_container/src/sandbox.rs @@ -57,8 +57,6 @@ impl SandboxInner { } } -unsafe impl Send for VirtSandbox {} -unsafe impl Sync for VirtSandbox {} #[derive(Clone)] pub struct VirtSandbox { sid: String, diff --git a/src/runtime-rs/crates/runtimes/wasm_container/src/lib.rs b/src/runtime-rs/crates/runtimes/wasm_container/src/lib.rs index c68727467..77282ac9c 100644 --- a/src/runtime-rs/crates/runtimes/wasm_container/src/lib.rs +++ b/src/runtime-rs/crates/runtimes/wasm_container/src/lib.rs @@ -10,8 +10,6 @@ use async_trait::async_trait; use common::{message::Message, RuntimeHandler, RuntimeInstance}; use kata_types::config::TomlConfig; use tokio::sync::mpsc::Sender; -unsafe impl Send for WasmContainer {} -unsafe impl Sync for WasmContainer {} pub struct WasmContainer {} #[async_trait]