From 527b73a8e50493acadfca22f0d6fa241d36770c3 Mon Sep 17 00:00:00 2001 From: wllenyj Date: Sun, 15 May 2022 21:38:49 +0800 Subject: [PATCH 01/24] dragonball: remove unused feature in AddressSpaceMgr log_dirty_pages is useless now and will be redesigned to support live migration in the future. Signed-off-by: wllenyj --- src/dragonball/src/address_space_manager.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/dragonball/src/address_space_manager.rs b/src/dragonball/src/address_space_manager.rs index dc7650e97..2e4276626 100644 --- a/src/dragonball/src/address_space_manager.rs +++ b/src/dragonball/src/address_space_manager.rs @@ -27,7 +27,7 @@ use dbs_address_space::{ AddressSpaceRegionType, NumaNode, NumaNodeInfo, MPOL_MF_MOVE, MPOL_PREFERRED, }; use dbs_allocator::Constraint; -use kvm_bindings::{kvm_userspace_memory_region, KVM_MEM_LOG_DIRTY_PAGES}; +use kvm_bindings::kvm_userspace_memory_region; use kvm_ioctls::VmFd; use log::{debug, error, info, warn}; use nix::sys::mman; @@ -245,6 +245,11 @@ impl AddressSpaceMgr { self.address_space.is_some() } + /// Gets address space. + pub fn address_space(&self) -> Option<&AddressSpace> { + self.address_space.as_ref() + } + /// Create the address space for a virtual machine. /// /// This method is designed to be called when starting up a virtual machine instead of at @@ -393,11 +398,8 @@ impl AddressSpaceMgr { let host_addr = mmap_reg .get_host_address(MemoryRegionAddress(0)) .map_err(|_e| AddressManagerError::InvalidOperation)?; - let flags = if param.dirty_page_logging { - KVM_MEM_LOG_DIRTY_PAGES - } else { - 0 - }; + let flags = 0u32; + let mem_region = kvm_userspace_memory_region { slot: slot as u32, guest_phys_addr: reg.start_addr().raw_value(), From cfd5dae47ccc578c075b727248b54a6183226dcf Mon Sep 17 00:00:00 2001 From: wllenyj Date: Sun, 15 May 2022 21:42:00 +0800 Subject: [PATCH 02/24] dragonball: add vm struct The vm struct to manage resources and control states of an virtual machine instance. Signed-off-by: wllenyj Signed-off-by: jingshan Signed-off-by: Liu Jiang Signed-off-by: Chao Wu --- .../src/device_manager/console_manager.rs | 10 + src/dragonball/src/device_manager/mod.rs | 76 ++- src/dragonball/src/error.rs | 117 +++- src/dragonball/src/vcpu/mod.rs | 1 + src/dragonball/src/vcpu/vcpu_impl.rs | 2 +- src/dragonball/src/vm/aarch64.rs | 148 +++++ src/dragonball/src/vm/kernel_config.rs | 2 +- src/dragonball/src/vm/mod.rs | 610 ++++++++++++++++++ src/dragonball/src/vm/x86_64.rs | 276 ++++++++ 9 files changed, 1236 insertions(+), 6 deletions(-) create mode 100644 src/dragonball/src/vm/aarch64.rs create mode 100644 src/dragonball/src/vm/x86_64.rs diff --git a/src/dragonball/src/device_manager/console_manager.rs b/src/dragonball/src/device_manager/console_manager.rs index 617b98b16..388eea78b 100644 --- a/src/dragonball/src/device_manager/console_manager.rs +++ b/src/dragonball/src/device_manager/console_manager.rs @@ -351,6 +351,16 @@ pub struct DmesgWriter { logger: slog::Logger, } +impl DmesgWriter { + /// Creates a new instance. + pub fn new(logger: slog::Logger) -> Self { + Self { + buf: BytesMut::with_capacity(1024), + logger: logger.new(slog::o!("subsystem" => "dmesg")), + } + } +} + impl io::Write for DmesgWriter { /// 0000000 [ 0 . 0 3 4 9 1 6 ] R /// 5b 20 20 20 20 30 2e 30 33 34 39 31 36 5d 20 52 diff --git a/src/dragonball/src/device_manager/mod.rs b/src/dragonball/src/device_manager/mod.rs index 2ad26e0d0..011a1607d 100644 --- a/src/dragonball/src/device_manager/mod.rs +++ b/src/dragonball/src/device_manager/mod.rs @@ -8,6 +8,8 @@ use std::sync::{Arc, Mutex, MutexGuard}; use arc_swap::ArcSwap; use dbs_address_space::AddressSpace; +#[cfg(target_arch = "aarch64")] +use dbs_arch::{DeviceType, MMIODeviceInfo}; use dbs_device::device_manager::{Error as IoManagerError, IoManager, IoManagerContext}; use dbs_device::resources::Resource; use dbs_device::DeviceIo; @@ -20,6 +22,8 @@ use kvm_ioctls::VmFd; use dbs_device::resources::ResourceConstraint; #[cfg(feature = "dbs-virtio-devices")] use dbs_virtio_devices as virtio; +#[cfg(feature = "virtio-vsock")] +use dbs_virtio_devices::vsock::backend::VsockInnerConnector; #[cfg(feature = "dbs-virtio-devices")] use dbs_virtio_devices::{ mmio::{ @@ -38,7 +42,8 @@ use dbs_upcall::{ use crate::address_space_manager::GuestAddressSpaceImpl; use crate::error::StartMicrovmError; use crate::resource_manager::ResourceManager; -use crate::vm::KernelConfigInfo; +use crate::vm::{KernelConfigInfo, Vm}; +use crate::IoManagerCached; /// Virtual machine console device manager. pub mod console_manager; @@ -240,6 +245,10 @@ impl DeviceOpContext { } } + pub(crate) fn create_boot_ctx(vm: &Vm, epoll_mgr: Option) -> Self { + Self::new(epoll_mgr, vm.device_manager(), None, None, false) + } + pub(crate) fn get_vm_as(&self) -> Result { match self.vm_as.as_ref() { Some(v) => Ok(v.clone()), @@ -303,6 +312,23 @@ impl DeviceOpContext { #[cfg(all(feature = "hotplug", feature = "dbs-upcall"))] impl DeviceOpContext { + pub(crate) fn create_hotplug_ctx(vm: &Vm, epoll_mgr: Option) -> Self { + let vm_as = vm.vm_as().expect("VM should have memory ready").clone(); + + let vm_config = vm.vm_config().clone(); + + let mut ctx = Self::new( + epoll_mgr, + vm.device_manager(), + Some(vm_as), + vm.vm_address_space().cloned(), + true, + ); + ctx.upcall_client = vm.upcall_client().clone(); + + ctx + } + fn call_hotplug_device( &self, req: DevMgrRequest, @@ -380,6 +406,8 @@ pub struct DeviceManager { pub(crate) legacy_manager: Option, #[cfg(feature = "virtio-vsock")] pub(crate) vsock_manager: VsockDeviceMgr, + #[cfg(target_arch = "aarch64")] + mmio_device_info: HashMap<(DeviceType, String), MMIODeviceInfo>, } impl DeviceManager { @@ -401,9 +429,16 @@ impl DeviceManager { legacy_manager: None, #[cfg(feature = "virtio-vsock")] vsock_manager: VsockDeviceMgr::default(), + #[cfg(target_arch = "aarch64")] + mmio_device_info: HashMap::new(), } } + /// Get the underlying IoManager to dispatch IO read/write requests. + pub fn io_manager(&self) -> IoManagerCached { + IoManagerCached::new(self.io_manager.clone()) + } + /// Create the underline interrupt manager for the device manager. pub fn create_interrupt_manager(&mut self) -> Result<()> { self.irq_manager @@ -494,6 +529,12 @@ impl DeviceManager { self.con_manager.reset_console() } + #[cfg(target_arch = "aarch64")] + /// Return mmio device info for FDT build. + pub fn get_mmio_device_info(&self) -> Option<&HashMap<(DeviceType, String), MMIODeviceInfo>> { + Some(&self.mmio_device_info) + } + /// Create all registered devices when booting the associated virtual machine. pub fn create_devices( &mut self, @@ -524,6 +565,21 @@ impl DeviceManager { Ok(()) } + /// Start all registered devices when booting the associated virtual machine. + pub fn start_devices(&mut self) -> std::result::Result<(), StartMicrovmError> { + Ok(()) + } + + /// Remove all devices when shutdown the associated virtual machine + pub fn remove_devices( + &mut self, + _vm_as: GuestAddressSpaceImpl, + _epoll_mgr: EpollManager, + _address_space: Option<&AddressSpace>, + ) -> Result<()> { + Ok(()) + } + #[cfg(target_arch = "x86_64")] /// Get the underlying eventfd for vm exit notification. pub fn get_reset_eventfd(&self) -> Result { @@ -689,3 +745,21 @@ impl DeviceManager { } } } + +#[cfg(feature = "hotplug")] +impl DeviceManager { + /// Get Unix Domain Socket path for the vsock device. + pub fn get_vsock_inner_connector(&mut self) -> Option { + #[cfg(feature = "virtio-vsock")] + { + self.vsock_manager + .get_default_connector() + .map(|d| Some(d)) + .unwrap_or(None) + } + #[cfg(not(feature = "virtio-vsock"))] + { + return None; + } + } +} diff --git a/src/dragonball/src/error.rs b/src/dragonball/src/error.rs index 2858103a4..b201c1d77 100644 --- a/src/dragonball/src/error.rs +++ b/src/dragonball/src/error.rs @@ -12,7 +12,10 @@ #[cfg(feature = "dbs-virtio-devices")] use dbs_virtio_devices::Error as VirtIoError; +use crate::address_space_manager; use crate::device_manager; +use crate::vcpu; +use crate::vm; /// Shorthand result type for internal VMM commands. pub type Result = std::result::Result; @@ -23,8 +26,20 @@ pub type Result = std::result::Result; /// of the host (for example if Dragonball doesn't have permissions to open the KVM fd). #[derive(Debug, thiserror::Error)] pub enum Error { + /// Empty AddressSpace from parameters. + #[error("Empty AddressSpace from parameters")] + AddressSpace, + + /// The zero page extends past the end of guest_mem. + #[error("the guest zero page extends past the end of guest memory")] + ZeroPagePastRamEnd, + + /// Error writing the zero page of guest memory. + #[error("failed to write to guest zero page")] + ZeroPageSetup, + /// Failure occurs in issuing KVM ioctls and errors will be returned from kvm_ioctls lib. - #[error("failure in issuing KVM ioctl command")] + #[error("failure in issuing KVM ioctl command: {0}")] Kvm(#[source] kvm_ioctls::Error), /// The host kernel reports an unsupported KVM API version. @@ -32,17 +47,30 @@ pub enum Error { KvmApiVersion(i32), /// Cannot initialize the KVM context due to missing capabilities. - #[error("missing KVM capability")] + #[error("missing KVM capability: {0:?}")] KvmCap(kvm_ioctls::Cap), #[cfg(target_arch = "x86_64")] - #[error("failed to configure MSRs")] + #[error("failed to configure MSRs: {0:?}")] /// Cannot configure MSRs GuestMSRs(dbs_arch::msr::Error), /// MSR inner error #[error("MSR inner error")] Msr(vmm_sys_util::fam::Error), + + /// Error writing MP table to memory. + #[cfg(target_arch = "x86_64")] + #[error("failed to write MP table to guest memory: {0}")] + MpTableSetup(#[source] dbs_boot::mptable::Error), + + /// Fail to boot system + #[error("failed to boot system: {0}")] + BootSystem(#[source] dbs_boot::Error), + + /// Cannot open the VM file descriptor. + #[error(transparent)] + Vm(vm::VmError), } /// Errors associated with starting the instance. @@ -52,6 +80,48 @@ pub enum StartMicrovmError { #[error("failure while reading from EventFd file descriptor")] EventFd, + /// The start command was issued more than once. + #[error("the virtual machine is already running")] + MicroVMAlreadyRunning, + + /// Cannot start the VM because the kernel was not configured. + #[error("cannot start the virtual machine without kernel configuration")] + MissingKernelConfig, + + #[cfg(feature = "hotplug")] + /// Upcall initialize miss vsock device. + #[error("the upcall client needs a virtio-vsock device for communication")] + UpcallMissVsock, + + /// Upcall is not ready + #[error("the upcall client is not ready")] + UpcallNotReady, + + /// Configuration passed in is invalidate. + #[error("invalid virtual machine configuration: {0} ")] + ConfigureInvalid(String), + + /// This error is thrown by the minimal boot loader implementation. + /// It is related to a faulty memory configuration. + #[error("failure while configuring boot information for the virtual machine: {0}")] + ConfigureSystem(#[source] Error), + + /// Cannot configure the VM. + #[error("failure while configuring the virtual machine: {0}")] + ConfigureVm(#[source] vm::VmError), + + /// Cannot load initrd. + #[error("cannot load Initrd into guest memory: {0}")] + InitrdLoader(#[from] LoadInitrdError), + + /// Cannot load kernel due to invalid memory configuration or invalid kernel image. + #[error("cannot load guest kernel into guest memory: {0}")] + KernelLoader(#[source] linux_loader::loader::Error), + + /// Cannot load command line string. + #[error("failure while configuring guest kernel commandline: {0}")] + LoadCommandline(#[source] linux_loader::loader::Error), + /// The device manager was not configured. #[error("the device manager failed to manage devices: {0}")] DeviceManager(#[source] device_manager::DeviceMgrError), @@ -69,4 +139,45 @@ pub enum StartMicrovmError { /// Cannot initialize a MMIO Vsock Device or add a device to the MMIO Bus. #[error("failure while registering virtio-vsock device: {0}")] RegisterVsockDevice(#[source] device_manager::DeviceMgrError), + + /// Address space manager related error, e.g.cannot access guest address space manager. + #[error("address space manager related error: {0}")] + AddressManagerError(#[source] address_space_manager::AddressManagerError), + + /// Cannot create a new vCPU file descriptor. + #[error("vCPU related error: {0}")] + Vcpu(#[source] vcpu::VcpuManagerError), + + #[cfg(feature = "hotplug")] + /// Upcall initialize Error. + #[error("failure while initializing the upcall client: {0}")] + UpcallInitError(#[source] dbs_upcall::UpcallClientError), + + #[cfg(feature = "hotplug")] + /// Upcall connect Error. + #[error("failure while connecting the upcall client: {0}")] + UpcallConnectError(#[source] dbs_upcall::UpcallClientError), +} + +/// Errors associated with starting the instance. +#[derive(Debug, thiserror::Error)] +pub enum StopMicrovmError { + /// Guest memory has not been initialized. + #[error("Guest memory has not been initialized")] + GuestMemoryNotInitialized, + + /// Cannnot remove devices + #[error("Failed to remove devices in device_manager {0}")] + DeviceManager(#[source] device_manager::DeviceMgrError), +} + +/// Errors associated with loading initrd +#[derive(Debug, thiserror::Error)] +pub enum LoadInitrdError { + /// Cannot load initrd due to an invalid memory configuration. + #[error("failed to load the initrd image to guest memory")] + LoadInitrd, + /// Cannot load initrd due to an invalid image. + #[error("failed to read the initrd image: {0}")] + ReadInitrd(#[source] std::io::Error), } diff --git a/src/dragonball/src/vcpu/mod.rs b/src/dragonball/src/vcpu/mod.rs index d1075e734..fe6688324 100644 --- a/src/dragonball/src/vcpu/mod.rs +++ b/src/dragonball/src/vcpu/mod.rs @@ -6,6 +6,7 @@ mod sm; pub mod vcpu_impl; pub mod vcpu_manager; +pub use vcpu_manager::{VcpuManager, VcpuManagerError}; #[cfg(target_arch = "x86_64")] use dbs_arch::cpuid::VpmuFeatureLevel; diff --git a/src/dragonball/src/vcpu/vcpu_impl.rs b/src/dragonball/src/vcpu/vcpu_impl.rs index d2711cace..2dbccf66d 100644 --- a/src/dragonball/src/vcpu/vcpu_impl.rs +++ b/src/dragonball/src/vcpu/vcpu_impl.rs @@ -964,7 +964,7 @@ pub mod tests { #[cfg(target_arch = "x86_64")] #[test] fn test_vcpu_check_io_port_info() { - let (vcpu, receiver) = create_vcpu(); + let (vcpu, _receiver) = create_vcpu(); // boot complete signal let res = vcpu diff --git a/src/dragonball/src/vm/aarch64.rs b/src/dragonball/src/vm/aarch64.rs new file mode 100644 index 000000000..02af4a691 --- /dev/null +++ b/src/dragonball/src/vm/aarch64.rs @@ -0,0 +1,148 @@ +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +use std::collections::HashMap; +use std::ops::Deref; + +use dbs_arch::gic::GICDevice; +use dbs_arch::{DeviceInfoForFDT, DeviceType}; +use dbs_boot::InitrdConfig; +use dbs_utils::epoll_manager::EpollManager; +use dbs_utils::time::TimestampUs; +use std::fmt::Debug; +use vm_memory::{GuestAddressSpace, GuestMemory}; +use vmm_sys_util::eventfd::EventFd; + +use super::{Vm, VmError}; +use crate::address_space_manager::{GuestAddressSpaceImpl, GuestMemoryImpl}; +use crate::error::Error; +use crate::StartMicrovmError; +use linux_loader::loader::Cmdline; + +/// Configures the system and should be called once per vm before starting vcpu threads. +/// For aarch64, we only setup the FDT. +/// +/// # Arguments +/// +/// * `guest_mem` - The memory to be used by the guest. +/// * `cmdline` - The kernel commandline. +/// * `vcpu_mpidr` - Array of MPIDR register values per vcpu. +/// * `device_info` - A hashmap containing the attached devices for building FDT device nodes. +/// * `gic_device` - The GIC device. +/// * `initrd` - Information about an optional initrd. +pub fn configure_system( + guest_mem: &M, + cmdline: &str, + vcpu_mpidr: Vec, + device_info: Option<&HashMap<(DeviceType, String), T>>, + gic_device: &Box, + initrd: &Option, +) -> super::Result<()> { + dbs_boot::fdt::create_fdt( + guest_mem, + vcpu_mpidr, + cmdline, + device_info, + gic_device, + initrd, + ) + .map_err(Error::BootSystem)?; + Ok(()) +} + +#[cfg(target_arch = "aarch64")] +impl Vm { + /// Gets a reference to the irqchip of the VM + pub fn get_irqchip(&self) -> &Box { + &self.irqchip_handle.as_ref().unwrap() + } + + /// Initialize the virtual machine instance. + /// + /// It initialize the virtual machine instance by: + /// 1) initialize virtual machine global state and configuration. + /// 2) create system devices, such as interrupt controller. + /// 3) create and start IO devices, such as serial, console, block, net, vsock etc. + /// 4) create and initialize vCPUs. + /// 5) configure CPU power management features. + /// 6) load guest kernel image. + pub fn init_microvm( + &mut self, + epoll_mgr: EpollManager, + vm_as: GuestAddressSpaceImpl, + request_ts: TimestampUs, + ) -> std::result::Result<(), StartMicrovmError> { + let kernel_loader_result = self.load_kernel(vm_as.memory().deref())?; + // On aarch64, the vCPUs need to be created (i.e call KVM_CREATE_VCPU) and configured before + // setting up the IRQ chip because the `KVM_CREATE_VCPU` ioctl will return error if the IRQCHIP + // was already initialized. + // Search for `kvm_arch_vcpu_create` in arch/arm/kvm/arm.c. + + let reset_eventfd = + EventFd::new(libc::EFD_NONBLOCK).map_err(|_| StartMicrovmError::EventFd)?; + self.reset_eventfd = Some( + reset_eventfd + .try_clone() + .map_err(|_| StartMicrovmError::EventFd)?, + ); + + self.vcpu_manager() + .map_err(StartMicrovmError::Vcpu)? + .set_reset_event_fd(reset_eventfd); + self.vcpu_manager() + .map_err(StartMicrovmError::Vcpu)? + .create_boot_vcpus(request_ts, kernel_loader_result.kernel_load) + .map_err(StartMicrovmError::Vcpu)?; + + self.setup_interrupt_controller()?; + self.init_devices(epoll_mgr)?; + + Ok(()) + } + + /// Creates the irq chip in-kernel device model. + pub fn setup_interrupt_controller(&mut self) -> std::result::Result<(), StartMicrovmError> { + let vcpu_count = self.vm_config.vcpu_count; + + self.irqchip_handle = Some( + dbs_arch::gic::create_gic(&self.fd, vcpu_count.into()) + .map_err(|e| StartMicrovmError::ConfigureVm(VmError::SetupGIC(e)))?, + ); + + Ok(()) + } + + /// Execute system architecture specific configurations. + /// + /// 1) set guest kernel boot parameters + /// 2) setup FDT data structs. + pub fn configure_system_arch( + &self, + vm_memory: &GuestMemoryImpl, + cmdline: &Cmdline, + initrd: Option, + ) -> std::result::Result<(), StartMicrovmError> { + let vcpu_manager = self.vcpu_manager().map_err(StartMicrovmError::Vcpu)?; + let vcpu_mpidr = vcpu_manager + .vcpus() + .into_iter() + .map(|cpu| cpu.get_mpidr()) + .collect(); + + let guest_memory = vm_memory.memory(); + configure_system( + guest_memory, + cmdline.as_str(), + vcpu_mpidr, + self.device_manager.get_mmio_device_info(), + self.get_irqchip(), + &initrd, + ) + .map_err(StartMicrovmError::ConfigureSystem) + } +} diff --git a/src/dragonball/src/vm/kernel_config.rs b/src/dragonball/src/vm/kernel_config.rs index f266b48c4..466fd8849 100644 --- a/src/dragonball/src/vm/kernel_config.rs +++ b/src/dragonball/src/vm/kernel_config.rs @@ -8,7 +8,7 @@ pub struct KernelConfigInfo { /// The descriptor to the kernel file. kernel_file: File, /// The descriptor to the initrd file, if there is one - initrd_file: Option, + pub initrd_file: Option, /// The commandline for guest kernel. cmdline: linux_loader::cmdline::Cmdline, } diff --git a/src/dragonball/src/vm/mod.rs b/src/dragonball/src/vm/mod.rs index e09ec316b..db10e593e 100644 --- a/src/dragonball/src/vm/mod.rs +++ b/src/dragonball/src/vm/mod.rs @@ -1,11 +1,69 @@ // Copyright (C) 2021 Alibaba Cloud. All rights reserved. // SPDX-License-Identifier: Apache-2.0 +use std::io::{self, Read, Seek, SeekFrom}; +use std::ops::Deref; +use std::os::unix::io::RawFd; +use std::sync::{Arc, Mutex, RwLock}; + +use dbs_address_space::AddressSpace; +#[cfg(target_arch = "aarch64")] +use dbs_arch::gic::GICDevice; +use dbs_boot::InitrdConfig; +#[cfg(feature = "hotplug")] +use dbs_upcall::{DevMgrService, UpcallClient}; +use dbs_utils::epoll_manager::EpollManager; +use dbs_utils::time::TimestampUs; +use kvm_ioctls::VmFd; +use linux_loader::loader::{KernelLoader, KernelLoaderResult}; +use seccompiler::BpfProgram; use serde_derive::{Deserialize, Serialize}; +use slog::{error, info}; +use vm_memory::{Bytes, GuestAddress, GuestAddressSpace}; +use vmm_sys_util::eventfd::EventFd; + +use crate::address_space_manager::{ + AddressManagerError, AddressSpaceMgr, AddressSpaceMgrBuilder, GuestAddressSpaceImpl, + GuestMemoryImpl, +}; +use crate::api::v1::{InstanceInfo, InstanceState}; +use crate::device_manager::console_manager::DmesgWriter; +use crate::device_manager::{DeviceManager, DeviceMgrError, DeviceOpContext}; +use crate::error::{LoadInitrdError, Result, StartMicrovmError, StopMicrovmError}; +use crate::kvm_context::KvmContext; +use crate::resource_manager::ResourceManager; +use crate::vcpu::{VcpuManager, VcpuManagerError}; +#[cfg(target_arch = "aarch64")] +use dbs_arch::gic::Error as GICError; mod kernel_config; pub use self::kernel_config::KernelConfigInfo; +#[cfg(target_arch = "aarch64")] +#[path = "aarch64.rs"] +mod aarch64; + +#[cfg(target_arch = "x86_64")] +#[path = "x86_64.rs"] +mod x86_64; + +/// Errors associated with virtual machine instance related operations. +#[derive(Debug, thiserror::Error)] +pub enum VmError { + /// Cannot configure the IRQ. + #[error("failed to configure IRQ fot the virtual machine: {0}")] + Irq(#[source] kvm_ioctls::Error), + + /// Cannot configure the microvm. + #[error("failed to initialize the virtual machine: {0}")] + VmSetup(#[source] kvm_ioctls::Error), + + /// Cannot setup GIC + #[cfg(target_arch = "aarch64")] + #[error("failed to configure GIC")] + SetupGIC(GICError), +} + /// Configuration information for user defined NUMA nodes. #[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)] pub struct NumaRegionInfo { @@ -94,3 +152,555 @@ impl Default for VmConfigInfo { } } } + +/// Struct to manage resources and control states of an virtual machine instance. +/// +/// An `Vm` instance holds a resources assigned to a virtual machine instance, such as CPU, memory, +/// devices etc. When an `Vm` instance gets deconstructed, all resources assigned should be +/// released. +/// +/// We have explicit build the object model as: +/// |---Vmm API Server--<-1:1-> HTTP API Server +/// | |----------<-1:1-> Shimv2/CRI API Server +/// | +/// Vmm <-1:N-> Vm <-1:1-> Address Space Manager <-1:N-> GuestMemory +/// ^ ^---1:1-> Device Manager <-1:N-> Device +/// | ^---1:1-> Resource Manager +/// | ^---1:N-> Vcpu +/// |---<-1:N-> Event Manager +pub struct Vm { + fd: Arc, + kvm: KvmContext, + + address_space: AddressSpaceMgr, + device_manager: DeviceManager, + epoll_manager: EpollManager, + resource_manager: Arc, + vcpu_manager: Option>>, + logger: slog::Logger, + /// Config of virtual machine + vm_config: VmConfigInfo, + kernel_config: Option, + shared_info: Arc>, + reset_eventfd: Option, + dmesg_fifo: Option>, + start_instance_request_ts: u64, + start_instance_request_cpu_ts: u64, + start_instance_downtime: u64, + + // Arm specific fields. + // On aarch64 we need to keep around the fd obtained by creating the VGIC device. + #[cfg(target_arch = "aarch64")] + irqchip_handle: Option>, + + #[cfg(feature = "hotplug")] + upcall_client: Option>>, +} + +impl Vm { + /// Constructs a new `Vm` instance using the given `Kvm` instance. + pub fn new( + kvm_fd: Option, + api_shared_info: Arc>, + epoll_manager: EpollManager, + ) -> Result { + let id = api_shared_info.read().unwrap().id.clone(); + let logger = slog_scope::logger().new(slog::o!("id" => id)); + + let kvm = KvmContext::new(kvm_fd)?; + let fd = Arc::new(kvm.create_vm()?); + + let resource_manager = Arc::new(ResourceManager::new(Some(kvm.max_memslots()))); + + let device_manager = DeviceManager::new( + fd.clone(), + resource_manager.clone(), + epoll_manager.clone(), + &logger, + ); + + Ok(Vm { + fd, + kvm, + address_space: AddressSpaceMgr::default(), + device_manager, + epoll_manager, + resource_manager, + vcpu_manager: None, + logger, + vm_config: Default::default(), + kernel_config: None, + shared_info: api_shared_info, + reset_eventfd: None, + dmesg_fifo: None, + start_instance_request_ts: 0, + start_instance_request_cpu_ts: 0, + start_instance_downtime: 0, + #[cfg(target_arch = "aarch64")] + irqchip_handle: None, + #[cfg(feature = "hotplug")] + upcall_client: None, + }) + } + + /// Gets a reference to the kvm file descriptor owned by this VM. + pub fn vm_fd(&self) -> &VmFd { + &self.fd + } + + /// Gets a reference to the address_space.address_space for guest memory owned by this VM. + pub fn vm_address_space(&self) -> Option<&AddressSpace> { + self.address_space.get_address_space() + } + + /// Gets a reference to the device manager by this VM. + pub fn device_manager(&self) -> &DeviceManager { + &self.device_manager + } + + /// Gets a reference to the address space for guest memory owned by this VM. + /// + /// Note that `GuestMemory` does not include any device memory that may have been added after + /// this VM was constructed. + pub fn vm_as(&self) -> Option<&GuestAddressSpaceImpl> { + self.address_space.get_vm_as() + } + + /// Get a immutable reference to the virtual machine configuration information. + pub fn vm_config(&self) -> &VmConfigInfo { + &self.vm_config + } + + /// Set the virtual machine configuration information. + pub fn set_vm_config(&mut self, config: VmConfigInfo) { + self.vm_config = config; + } + + /// Set guest kernel boot configurations. + pub fn set_kernel_config(&mut self, kernel_config: KernelConfigInfo) { + self.kernel_config = Some(kernel_config); + } + + /// Get virtual machine shared instance information. + pub fn shared_info(&self) -> &Arc> { + &self.shared_info + } + + /// Get a reference to EpollManager. + pub fn epoll_manager(&self) -> &EpollManager { + &self.epoll_manager + } + + /// Get eventfd for exit notification. + pub fn get_reset_eventfd(&self) -> Option<&EventFd> { + self.reset_eventfd.as_ref() + } + + /// Check whether the VM has been initialized. + pub fn is_vm_initialized(&self) -> bool { + let instance_state = { + // Use expect() to crash if the other thread poisoned this lock. + let shared_info = self.shared_info.read() + .expect("Failed to determine if instance is initialized because shared info couldn't be read due to poisoned lock"); + shared_info.state + }; + instance_state != InstanceState::Uninitialized + } + + /// Check whether the VM instance is running. + pub fn is_vm_running(&self) -> bool { + let instance_state = { + // Use expect() to crash if the other thread poisoned this lock. + let shared_info = self.shared_info.read() + .expect("Failed to determine if instance is initialized because shared info couldn't be read due to poisoned lock"); + shared_info.state + }; + instance_state == InstanceState::Running + } + + /// returns true if system upcall service is ready + pub fn is_upcall_client_ready(&self) -> bool { + #[cfg(feature = "hotplug")] + { + if let Some(upcall_client) = self.upcall_client() { + return upcall_client.is_ready(); + } + } + + false + } + + /// Create device operation context. + /// vm is not running, return false + /// vm is running, but hotplug feature is not enable, return error + /// vm is running, but upcall initialize failed, return error + /// vm is running, upcall initialize OK, return true + pub fn create_device_op_context( + &mut self, + epoll_mgr: Option, + ) -> std::result::Result { + if !self.is_vm_initialized() { + Ok(DeviceOpContext::create_boot_ctx(self, epoll_mgr)) + } else { + #[cfg(feature = "hotplug")] + { + if self.upcall_client().is_none() { + Err(StartMicrovmError::UpcallMissVsock) + } else if self.is_upcall_client_ready() { + Ok(DeviceOpContext::create_hotplug_ctx(self, epoll_mgr)) + } else { + Err(StartMicrovmError::UpcallNotReady) + } + } + #[cfg(not(feature = "hotplug"))] + { + Err(StartMicrovmError::MicroVMAlreadyRunning) + } + } + } + + /// Save VM instance exit state + pub fn vm_exit(&self, exit_code: i32) { + if let Ok(mut info) = self.shared_info.write() { + info.state = InstanceState::Exited(exit_code); + } else { + error!( + self.logger, + "Failed to save exit state, couldn't be written due to poisoned lock" + ); + } + } + + /// Reset the console into canonical mode. + pub fn reset_console(&self) -> std::result::Result<(), DeviceMgrError> { + self.device_manager.reset_console() + } + + fn get_dragonball_info(&self) -> (String, String) { + let guard = self.shared_info.read().unwrap(); + let instance_id = guard.id.clone(); + let dragonball_version = guard.vmm_version.clone(); + + (dragonball_version, instance_id) + } + + fn init_dmesg_logger(&mut self) { + let writer = self.dmesg_logger(); + self.dmesg_fifo = Some(writer); + } + + /// dmesg write to logger + pub fn dmesg_logger(&self) -> Box { + Box::new(DmesgWriter::new(self.logger.clone())) + } + + pub(crate) fn check_health(&self) -> std::result::Result<(), StartMicrovmError> { + if self.kernel_config.is_none() { + return Err(StartMicrovmError::MissingKernelConfig); + } + Ok(()) + } + + pub(crate) fn init_vcpu_manager( + &mut self, + vm_as: GuestAddressSpaceImpl, + vcpu_seccomp_filter: BpfProgram, + ) -> std::result::Result<(), VcpuManagerError> { + let vcpu_manager = VcpuManager::new( + self.fd.clone(), + &self.kvm, + &self.vm_config, + vm_as, + vcpu_seccomp_filter, + self.shared_info.clone(), + self.device_manager.io_manager(), + self.epoll_manager.clone(), + )?; + self.vcpu_manager = Some(vcpu_manager); + + Ok(()) + } + + /// get the cpu manager's reference + pub fn vcpu_manager( + &self, + ) -> std::result::Result, VcpuManagerError> { + self.vcpu_manager + .as_ref() + .ok_or(VcpuManagerError::VcpuManagerNotInitialized) + .map(|mgr| mgr.lock().unwrap()) + } + + /// Pause all vcpus and record the instance downtime + pub fn pause_all_vcpus_with_downtime(&mut self) -> std::result::Result<(), VcpuManagerError> { + let ts = TimestampUs::default(); + self.start_instance_downtime = ts.time_us; + + self.vcpu_manager()?.pause_all_vcpus()?; + + Ok(()) + } + + /// Resume all vcpus and calc the intance downtime + pub fn resume_all_vcpus_with_downtime(&mut self) -> std::result::Result<(), VcpuManagerError> { + self.vcpu_manager()?.resume_all_vcpus()?; + + Ok(()) + } + + pub(crate) fn init_guest_memory(&mut self) -> std::result::Result<(), StartMicrovmError> { + info!(self.logger, "VM: initializing guest memory..."); + + // We are not allowing reinitialization of vm guest memory. + if self.address_space.is_initialized() { + return Ok(()); + } + // vcpu boot up require local memory. reserve 100 MiB memory + let mem_size = (self.vm_config.mem_size_mib as u64) << 20; + let reserve_memory_bytes = self.vm_config.reserve_memory_bytes; + if reserve_memory_bytes > (mem_size >> 1) as u64 { + return Err(StartMicrovmError::ConfigureInvalid(String::from( + "invalid reserve_memory_bytes", + ))); + } + + let mem_type = self.vm_config.mem_type.clone(); + let mut mem_file_path = String::from(""); + if mem_type == "hugetlbfs" { + let shared_info = self.shared_info.read() + .expect("Failed to determine if instance is initialized because shared info couldn't be read due to poisoned lock"); + mem_file_path.push_str("/dragonball/"); + mem_file_path.push_str(shared_info.id.as_str()); + } + + // init default regions. + let mut numa_regions = Vec::with_capacity(1); + let mut vcpu_ids: Vec = Vec::new(); + + for i in 0..self.vm_config().max_vcpu_count { + vcpu_ids.push(i as u32); + } + let numa_node = NumaRegionInfo { + size: self.vm_config.mem_size_mib as u64, + host_numa_node_id: None, + guest_numa_node_id: Some(0), + vcpu_ids, + }; + numa_regions.push(numa_node); + + info!( + self.logger, + "VM: mem_type:{} mem_file_path:{}, mem_size:{}, reserve_memory_bytes:{}, \ + numa_regions:{:?}", + mem_type, + mem_file_path, + mem_size, + reserve_memory_bytes, + numa_regions, + ); + + let mut address_space_param = AddressSpaceMgrBuilder::new(&mem_type, &mem_file_path) + .map_err(StartMicrovmError::AddressManagerError)?; + address_space_param.set_kvm_vm_fd(self.fd.clone()); + self.address_space + .create_address_space(&self.resource_manager, &numa_regions, address_space_param) + .map_err(StartMicrovmError::AddressManagerError)?; + + info!(self.logger, "VM: initializing guest memory done"); + Ok(()) + } + + fn init_devices( + &mut self, + epoll_manager: EpollManager, + ) -> std::result::Result<(), StartMicrovmError> { + info!(self.logger, "VM: initializing devices ..."); + + let com1_sock_path = self.vm_config.serial_path.clone(); + let kernel_config = self + .kernel_config + .as_mut() + .ok_or(StartMicrovmError::MissingKernelConfig)?; + + info!(self.logger, "VM: create interrupt manager"); + self.device_manager + .create_interrupt_manager() + .map_err(StartMicrovmError::DeviceManager)?; + + info!(self.logger, "VM: create devices"); + let vm_as = + self.address_space + .get_vm_as() + .ok_or(StartMicrovmError::AddressManagerError( + AddressManagerError::GuestMemoryNotInitialized, + ))?; + self.device_manager.create_devices( + vm_as.clone(), + epoll_manager, + kernel_config, + com1_sock_path, + self.dmesg_fifo.take(), + self.address_space.address_space(), + )?; + + info!(self.logger, "VM: start devices"); + self.device_manager.start_devices()?; + + info!(self.logger, "VM: initializing devices done"); + Ok(()) + } + + /// Remove devices when shutdown vm + pub fn remove_devices(&mut self) -> std::result::Result<(), StopMicrovmError> { + info!(self.logger, "VM: remove devices"); + let vm_as = self + .address_space + .get_vm_as() + .ok_or(StopMicrovmError::GuestMemoryNotInitialized)?; + + self.device_manager + .remove_devices( + vm_as.clone(), + self.epoll_manager.clone(), + self.address_space.address_space(), + ) + .map_err(StopMicrovmError::DeviceManager) + } + + fn load_kernel( + &mut self, + vm_memory: &GuestMemoryImpl, + ) -> std::result::Result { + // This is the easy way out of consuming the value of the kernel_cmdline. + + let kernel_config = self + .kernel_config + .as_mut() + .ok_or(StartMicrovmError::MissingKernelConfig)?; + + let high_mem_addr = GuestAddress(dbs_boot::get_kernel_start()); + + #[cfg(target_arch = "x86_64")] + return linux_loader::loader::elf::Elf::load( + vm_memory, + None, + kernel_config.kernel_file_mut(), + Some(high_mem_addr), + ) + .map_err(StartMicrovmError::KernelLoader); + + #[cfg(target_arch = "aarch64")] + return linux_loader::loader::pe::PE::load( + vm_memory, + Some(GuestAddress(dbs_boot::get_kernel_start())), + kernel_config.kernel_file_mut(), + Some(high_mem_addr), + ) + .map_err(StartMicrovmError::KernelLoader); + } + + /// Loads the initrd from a file into the given memory slice. + /// + /// * `vm_memory` - The guest memory the initrd is written to. + /// * `image` - The initrd image. + /// + /// Returns the result of initrd loading + fn load_initrd( + &self, + vm_memory: &GuestMemoryImpl, + image: &mut F, + ) -> std::result::Result + where + F: Read + Seek, + { + use crate::error::LoadInitrdError::*; + + let size: usize; + // Get the image size + match image.seek(SeekFrom::End(0)) { + Err(e) => return Err(ReadInitrd(e)), + Ok(0) => { + return Err(ReadInitrd(io::Error::new( + io::ErrorKind::InvalidData, + "Initrd image seek returned a size of zero", + ))) + } + Ok(s) => size = s as usize, + }; + // Go back to the image start + image.seek(SeekFrom::Start(0)).map_err(ReadInitrd)?; + + // Get the target address + let address = dbs_boot::initrd_load_addr(vm_memory, size as u64).map_err(|_| LoadInitrd)?; + + // Load the image into memory + vm_memory + .read_from(GuestAddress(address), image, size) + .map_err(|_| LoadInitrd)?; + + Ok(InitrdConfig { + address: GuestAddress(address), + size, + }) + } + + fn init_configure_system( + &mut self, + vm_as: &GuestAddressSpaceImpl, + ) -> std::result::Result<(), StartMicrovmError> { + let vm_memory = vm_as.memory(); + let kernel_config = self + .kernel_config + .as_ref() + .ok_or(StartMicrovmError::MissingKernelConfig)?; + //let cmdline = kernel_config.cmdline.clone(); + let initrd: Option = match &kernel_config.initrd_file { + Some(f) => { + let initrd_file = f.try_clone(); + if initrd_file.is_err() { + return Err(StartMicrovmError::InitrdLoader( + LoadInitrdError::ReadInitrd(io::Error::from(io::ErrorKind::InvalidData)), + )); + } + let res = self.load_initrd(vm_memory.deref(), &mut initrd_file.unwrap())?; + Some(res) + } + None => None, + }; + + self.configure_system_arch(vm_memory.deref(), kernel_config.kernel_cmdline(), initrd) + } +} + +#[cfg(feature = "hotplug")] +impl Vm { + /// Get upcall client. + pub fn upcall_client(&self) -> &Option>> { + &self.upcall_client + } + + /// initialize upcall client for guest os + fn init_upcall(&mut self) -> std::result::Result<(), StartMicrovmError> { + // get vsock inner connector for upcall + let inner_connector = self + .device_manager + .get_vsock_inner_connector() + .ok_or(StartMicrovmError::UpcallMissVsock)?; + + let mut upcall_client = UpcallClient::new( + inner_connector, + self.epoll_manager.clone(), + DevMgrService::default(), + ) + .map_err(StartMicrovmError::UpcallInitError)?; + + upcall_client + .connect() + .map_err(StartMicrovmError::UpcallConnectError)?; + + self.upcall_client = Some(Arc::new(upcall_client)); + + info!(self.logger, "upcall client init success"); + Ok(()) + } +} diff --git a/src/dragonball/src/vm/x86_64.rs b/src/dragonball/src/vm/x86_64.rs new file mode 100644 index 000000000..a55996282 --- /dev/null +++ b/src/dragonball/src/vm/x86_64.rs @@ -0,0 +1,276 @@ +// Copyright (C) 2020-2022 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +use std::convert::TryInto; +use std::mem; +use std::ops::Deref; + +use dbs_address_space::AddressSpace; +use dbs_boot::{add_e820_entry, bootparam, layout, mptable, BootParamsWrapper, InitrdConfig}; +use dbs_utils::epoll_manager::EpollManager; +use dbs_utils::time::TimestampUs; +use kvm_bindings::{kvm_irqchip, kvm_pit_config, kvm_pit_state2, KVM_PIT_SPEAKER_DUMMY}; +use slog::info; +use vm_memory::{Address, Bytes, GuestAddress, GuestAddressSpace, GuestMemory}; + +use crate::address_space_manager::{GuestAddressSpaceImpl, GuestMemoryImpl}; +use crate::error::{Error, Result, StartMicrovmError}; +use crate::vm::{Vm, VmError}; + +use linux_loader::cmdline::Cmdline; + +/// Configures the system and should be called once per vm before starting vcpu +/// threads. +/// +/// # Arguments +/// +/// * `guest_mem` - The memory to be used by the guest. +/// * `cmdline_addr` - Address in `guest_mem` where the kernel command line was +/// loaded. +/// * `cmdline_size` - Size of the kernel command line in bytes including the +/// null terminator. +/// * `initrd` - Information about where the ramdisk image was loaded in the +/// `guest_mem`. +/// * `boot_cpus` - Number of virtual CPUs the guest will have at boot time. +/// * `max_cpus` - Max number of virtual CPUs the guest will have. +/// * `rsv_mem_bytes` - Reserve memory from microVM.. +#[allow(clippy::too_many_arguments)] +pub fn configure_system( + guest_mem: &M, + address_space: Option<&AddressSpace>, + cmdline_addr: GuestAddress, + cmdline_size: usize, + initrd: &Option, + boot_cpus: u8, + max_cpus: u8, + rsv_mem_bytes: u64, +) -> super::Result<()> { + const KERNEL_BOOT_FLAG_MAGIC: u16 = 0xaa55; + const KERNEL_HDR_MAGIC: u32 = 0x5372_6448; + const KERNEL_LOADER_OTHER: u8 = 0xff; + const KERNEL_MIN_ALIGNMENT_BYTES: u32 = 0x0100_0000; // Must be non-zero. + + let mmio_start = GuestAddress(layout::MMIO_LOW_START); + let mmio_end = GuestAddress(layout::MMIO_LOW_END); + let himem_start = GuestAddress(layout::HIMEM_START); + + // Note that this puts the mptable at the last 1k of Linux's 640k base RAM + mptable::setup_mptable(guest_mem, boot_cpus, max_cpus).map_err(Error::MpTableSetup)?; + + let mut params: BootParamsWrapper = BootParamsWrapper(bootparam::boot_params::default()); + + params.0.hdr.type_of_loader = KERNEL_LOADER_OTHER; + params.0.hdr.boot_flag = KERNEL_BOOT_FLAG_MAGIC; + params.0.hdr.header = KERNEL_HDR_MAGIC; + params.0.hdr.cmd_line_ptr = cmdline_addr.raw_value() as u32; + params.0.hdr.cmdline_size = cmdline_size as u32; + params.0.hdr.kernel_alignment = KERNEL_MIN_ALIGNMENT_BYTES; + if let Some(initrd_config) = initrd { + params.0.hdr.ramdisk_image = initrd_config.address.raw_value() as u32; + params.0.hdr.ramdisk_size = initrd_config.size as u32; + } + + add_e820_entry(&mut params.0, 0, layout::EBDA_START, bootparam::E820_RAM) + .map_err(Error::BootSystem)?; + + let mem_end = address_space.ok_or(Error::AddressSpace)?.last_addr(); + if mem_end < mmio_start { + add_e820_entry( + &mut params.0, + himem_start.raw_value() as u64, + // it's safe to use unchecked_offset_from because + // mem_end > himem_start + mem_end.unchecked_offset_from(himem_start) as u64 + 1, + bootparam::E820_RAM, + ) + .map_err(Error::BootSystem)?; + } else { + add_e820_entry( + &mut params.0, + himem_start.raw_value(), + // it's safe to use unchecked_offset_from because + // end_32bit_gap_start > himem_start + mmio_start.unchecked_offset_from(himem_start), + bootparam::E820_RAM, + ) + .map_err(Error::BootSystem)?; + if mem_end > mmio_end { + add_e820_entry( + &mut params.0, + mmio_end.raw_value() + 1, + // it's safe to use unchecked_offset_from because mem_end > mmio_end + mem_end.unchecked_offset_from(mmio_end) as u64, + bootparam::E820_RAM, + ) + .map_err(Error::BootSystem)?; + } + } + + // reserve memory from microVM. + if rsv_mem_bytes > 0 { + add_e820_entry( + &mut params.0, + mem_end.raw_value().max(mmio_end.raw_value()) + 1, + rsv_mem_bytes, + bootparam::E820_RESERVED, + ) + .map_err(Error::BootSystem)?; + } + + let zero_page_addr = GuestAddress(layout::ZERO_PAGE_START); + guest_mem + .checked_offset(zero_page_addr, mem::size_of::()) + .ok_or(Error::ZeroPagePastRamEnd)?; + guest_mem + .write_obj(params, zero_page_addr) + .map_err(|_| Error::ZeroPageSetup)?; + + Ok(()) +} + +impl Vm { + /// Get the status of in-kernel PIT. + pub fn get_pit_state(&self) -> Result { + self.fd.get_pit2().map_err(|e| Error::Vm(VmError::Irq(e))) + } + + /// Set the status of in-kernel PIT. + pub fn set_pit_state(&self, pit_state: &kvm_pit_state2) -> Result<()> { + self.fd + .set_pit2(pit_state) + .map_err(|e| Error::Vm(VmError::Irq(e))) + } + + /// Get the status of in-kernel ioapic. + pub fn get_irqchip_state(&self, chip_id: u32) -> Result { + let mut irqchip: kvm_irqchip = kvm_irqchip { + chip_id, + ..kvm_irqchip::default() + }; + self.fd + .get_irqchip(&mut irqchip) + .map(|_| irqchip) + .map_err(|e| Error::Vm(VmError::Irq(e))) + } + + /// Set the status of in-kernel ioapic. + pub fn set_irqchip_state(&self, irqchip: &kvm_irqchip) -> Result<()> { + self.fd + .set_irqchip(irqchip) + .map_err(|e| Error::Vm(VmError::Irq(e))) + } +} + +impl Vm { + /// Initialize the virtual machine instance. + /// + /// It initialize the virtual machine instance by: + /// 1) initialize virtual machine global state and configuration. + /// 2) create system devices, such as interrupt controller, PIT etc. + /// 3) create and start IO devices, such as serial, console, block, net, vsock etc. + /// 4) create and initialize vCPUs. + /// 5) configure CPU power management features. + /// 6) load guest kernel image. + pub fn init_microvm( + &mut self, + epoll_mgr: EpollManager, + vm_as: GuestAddressSpaceImpl, + request_ts: TimestampUs, + ) -> std::result::Result<(), StartMicrovmError> { + info!(self.logger, "VM: start initializing microvm ..."); + + self.init_tss()?; + // For x86_64 we need to create the interrupt controller before calling `KVM_CREATE_VCPUS` + // while on aarch64 we need to do it the other way around. + self.setup_interrupt_controller()?; + self.create_pit()?; + self.init_devices(epoll_mgr)?; + + let reset_event_fd = self.device_manager.get_reset_eventfd().unwrap(); + self.vcpu_manager() + .map_err(StartMicrovmError::Vcpu)? + .set_reset_event_fd(reset_event_fd) + .map_err(StartMicrovmError::Vcpu)?; + + if self.vm_config.cpu_pm == "on" { + // TODO: add cpu_pm support. issue #4590. + info!(self.logger, "VM: enable CPU disable_idle_exits capability"); + } + + let vm_memory = vm_as.memory(); + let kernel_loader_result = self.load_kernel(vm_memory.deref())?; + + self.vcpu_manager() + .map_err(StartMicrovmError::Vcpu)? + .create_boot_vcpus(request_ts, kernel_loader_result.kernel_load) + .map_err(StartMicrovmError::Vcpu)?; + + info!(self.logger, "VM: initializing microvm done"); + Ok(()) + } + + /// Execute system architecture specific configurations. + /// + /// 1) set guest kernel boot parameters + /// 2) setup BIOS configuration data structs, mainly implement the MPSpec. + pub fn configure_system_arch( + &self, + vm_memory: &GuestMemoryImpl, + cmdline: &Cmdline, + initrd: Option, + ) -> std::result::Result<(), StartMicrovmError> { + let cmdline_addr = GuestAddress(dbs_boot::layout::CMDLINE_START); + linux_loader::loader::load_cmdline(vm_memory, cmdline_addr, cmdline) + .map_err(StartMicrovmError::LoadCommandline)?; + + configure_system( + vm_memory, + self.address_space.address_space(), + cmdline_addr, + cmdline.as_str().len() + 1, + &initrd, + self.vm_config.vcpu_count, + self.vm_config.max_vcpu_count, + self.vm_config.reserve_memory_bytes, + ) + .map_err(StartMicrovmError::ConfigureSystem) + } + + /// Initializes the guest memory. + pub(crate) fn init_tss(&mut self) -> std::result::Result<(), StartMicrovmError> { + self.fd + .set_tss_address(dbs_boot::layout::KVM_TSS_ADDRESS.try_into().unwrap()) + .map_err(|e| StartMicrovmError::ConfigureVm(VmError::VmSetup(e))) + } + + /// Creates the irq chip and an in-kernel device model for the PIT. + pub(crate) fn setup_interrupt_controller( + &mut self, + ) -> std::result::Result<(), StartMicrovmError> { + self.fd + .create_irq_chip() + .map_err(|e| StartMicrovmError::ConfigureVm(VmError::VmSetup(e))) + } + + /// Creates an in-kernel device model for the PIT. + pub(crate) fn create_pit(&self) -> std::result::Result<(), StartMicrovmError> { + info!(self.logger, "VM: create pit"); + // We need to enable the emulation of a dummy speaker port stub so that writing to port 0x61 + // (i.e. KVM_SPEAKER_BASE_ADDRESS) does not trigger an exit to user space. + let pit_config = kvm_pit_config { + flags: KVM_PIT_SPEAKER_DUMMY, + ..kvm_pit_config::default() + }; + + // Safe because we know that our file is a VM fd, we know the kernel will only read the + // correct amount of memory from our pointer, and we verify the return result. + self.fd + .create_pit2(pit_config) + .map_err(|e| StartMicrovmError::ConfigureVm(VmError::VmSetup(e))) + } +} From 4d234f5742e437cfa9bf4e1a0aaa6250df9f9904 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Sun, 22 May 2022 14:42:32 +0800 Subject: [PATCH 03/24] dragonball: refactor code layout Refactored some code layout. Signed-off-by: Jiang Liu --- .../src/device_manager/console_manager.rs | 2 +- src/dragonball/src/device_manager/mod.rs | 43 +- src/dragonball/src/error.rs | 5 +- src/dragonball/src/vcpu/mod.rs | 7 +- src/dragonball/src/vm/aarch64.rs | 51 +-- src/dragonball/src/vm/kernel_config.rs | 7 +- src/dragonball/src/vm/mod.rs | 417 +++++++++--------- src/dragonball/src/vm/x86_64.rs | 22 +- 8 files changed, 281 insertions(+), 273 deletions(-) diff --git a/src/dragonball/src/device_manager/console_manager.rs b/src/dragonball/src/device_manager/console_manager.rs index 388eea78b..4ebad5816 100644 --- a/src/dragonball/src/device_manager/console_manager.rs +++ b/src/dragonball/src/device_manager/console_manager.rs @@ -353,7 +353,7 @@ pub struct DmesgWriter { impl DmesgWriter { /// Creates a new instance. - pub fn new(logger: slog::Logger) -> Self { + pub fn new(logger: &slog::Logger) -> Self { Self { buf: BytesMut::with_capacity(1024), logger: logger.new(slog::o!("subsystem" => "dmesg")), diff --git a/src/dragonball/src/device_manager/mod.rs b/src/dragonball/src/device_manager/mod.rs index 011a1607d..07698d4b8 100644 --- a/src/dragonball/src/device_manager/mod.rs +++ b/src/dragonball/src/device_manager/mod.rs @@ -22,8 +22,6 @@ use kvm_ioctls::VmFd; use dbs_device::resources::ResourceConstraint; #[cfg(feature = "dbs-virtio-devices")] use dbs_virtio_devices as virtio; -#[cfg(feature = "virtio-vsock")] -use dbs_virtio_devices::vsock::backend::VsockInnerConnector; #[cfg(feature = "dbs-virtio-devices")] use dbs_virtio_devices::{ mmio::{ @@ -38,6 +36,8 @@ use dbs_upcall::{ DevMgrRequest, DevMgrService, MmioDevRequest, UpcallClient, UpcallClientError, UpcallClientRequest, UpcallClientResponse, }; +#[cfg(feature = "hotplug")] +use dbs_virtio_devices::vsock::backend::VsockInnerConnector; use crate::address_space_manager::GuestAddressSpaceImpl; use crate::error::StartMicrovmError; @@ -99,6 +99,10 @@ pub enum DeviceMgrError { /// Failed to hotplug the device. #[error("failed to hotplug virtual device")] HotplugDevice(#[source] UpcallClientError), + + /// Failed to free device resource. + #[error("failed to free device resources: {0}")] + ResourceError(#[source] crate::resource_manager::ResourceError), } /// Specialized version of `std::result::Result` for device manager operations. @@ -315,8 +319,6 @@ impl DeviceOpContext { pub(crate) fn create_hotplug_ctx(vm: &Vm, epoll_mgr: Option) -> Self { let vm_as = vm.vm_as().expect("VM should have memory ready").clone(); - let vm_config = vm.vm_config().clone(); - let mut ctx = Self::new( epoll_mgr, vm.device_manager(), @@ -325,7 +327,6 @@ impl DeviceOpContext { true, ); ctx.upcall_client = vm.upcall_client().clone(); - ctx } @@ -404,10 +405,10 @@ pub struct DeviceManager { pub(crate) con_manager: ConsoleManager, pub(crate) legacy_manager: Option, + #[cfg(target_arch = "aarch64")] + pub(crate) mmio_device_info: HashMap<(DeviceType, String), MMIODeviceInfo>, #[cfg(feature = "virtio-vsock")] pub(crate) vsock_manager: VsockDeviceMgr, - #[cfg(target_arch = "aarch64")] - mmio_device_info: HashMap<(DeviceType, String), MMIODeviceInfo>, } impl DeviceManager { @@ -425,12 +426,13 @@ impl DeviceManager { res_manager, vm_fd, logger: logger.new(slog::o!()), + con_manager: ConsoleManager::new(epoll_manager, logger), legacy_manager: None, - #[cfg(feature = "virtio-vsock")] - vsock_manager: VsockDeviceMgr::default(), #[cfg(target_arch = "aarch64")] mmio_device_info: HashMap::new(), + #[cfg(feature = "virtio-vsock")] + vsock_manager: VsockDeviceMgr::default(), } } @@ -452,6 +454,7 @@ impl DeviceManager { } /// Create legacy devices associted virtual machine + #[allow(unused_variables)] pub fn create_legacy_devices( &mut self, ctx: &mut DeviceOpContext, @@ -529,12 +532,6 @@ impl DeviceManager { self.con_manager.reset_console() } - #[cfg(target_arch = "aarch64")] - /// Return mmio device info for FDT build. - pub fn get_mmio_device_info(&self) -> Option<&HashMap<(DeviceType, String), MMIODeviceInfo>> { - Some(&self.mmio_device_info) - } - /// Create all registered devices when booting the associated virtual machine. pub fn create_devices( &mut self, @@ -579,8 +576,10 @@ impl DeviceManager { ) -> Result<()> { Ok(()) } +} - #[cfg(target_arch = "x86_64")] +#[cfg(target_arch = "x86_64")] +impl DeviceManager { /// Get the underlying eventfd for vm exit notification. pub fn get_reset_eventfd(&self) -> Result { if let Some(legacy) = self.legacy_manager.as_ref() { @@ -595,6 +594,14 @@ impl DeviceManager { } } +#[cfg(target_arch = "aarch64")] +impl DeviceManager { + /// Return mmio device info for FDT build. + pub fn get_mmio_device_info(&self) -> Option<&HashMap<(DeviceType, String), MMIODeviceInfo>> { + Some(&self.mmio_device_info) + } +} + #[cfg(feature = "dbs-virtio-devices")] impl DeviceManager { fn get_virtio_device_info(device: &Arc) -> Result<(u64, u64, u32)> { @@ -694,7 +701,9 @@ impl DeviceManager { // unregister Resource manager let resources = device.get_assigned_resources(); - ctx.res_manager.free_device_resources(&resources); + ctx.res_manager + .free_device_resources(&resources) + .map_err(DeviceMgrError::ResourceError)?; Ok(()) } diff --git a/src/dragonball/src/error.rs b/src/dragonball/src/error.rs index b201c1d77..c50e50b25 100644 --- a/src/dragonball/src/error.rs +++ b/src/dragonball/src/error.rs @@ -12,10 +12,7 @@ #[cfg(feature = "dbs-virtio-devices")] use dbs_virtio_devices::Error as VirtIoError; -use crate::address_space_manager; -use crate::device_manager; -use crate::vcpu; -use crate::vm; +use crate::{address_space_manager, device_manager, vcpu, vm}; /// Shorthand result type for internal VMM commands. pub type Result = std::result::Result; diff --git a/src/dragonball/src/vcpu/mod.rs b/src/dragonball/src/vcpu/mod.rs index fe6688324..5b8ed397f 100644 --- a/src/dragonball/src/vcpu/mod.rs +++ b/src/dragonball/src/vcpu/mod.rs @@ -4,13 +4,14 @@ // SPDX-License-Identifier: Apache-2.0 mod sm; -pub mod vcpu_impl; -pub mod vcpu_manager; -pub use vcpu_manager::{VcpuManager, VcpuManagerError}; +mod vcpu_impl; +mod vcpu_manager; #[cfg(target_arch = "x86_64")] use dbs_arch::cpuid::VpmuFeatureLevel; +pub use vcpu_manager::{VcpuManager, VcpuManagerError}; + /// vcpu config collection pub struct VcpuConfig { /// initial vcpu count diff --git a/src/dragonball/src/vm/aarch64.rs b/src/dragonball/src/vm/aarch64.rs index 02af4a691..739b1515c 100644 --- a/src/dragonball/src/vm/aarch64.rs +++ b/src/dragonball/src/vm/aarch64.rs @@ -7,6 +7,7 @@ // found in the THIRD-PARTY file. use std::collections::HashMap; +use std::fmt::Debug; use std::ops::Deref; use dbs_arch::gic::GICDevice; @@ -14,15 +15,13 @@ use dbs_arch::{DeviceInfoForFDT, DeviceType}; use dbs_boot::InitrdConfig; use dbs_utils::epoll_manager::EpollManager; use dbs_utils::time::TimestampUs; -use std::fmt::Debug; +use linux_loader::loader::Cmdline; use vm_memory::{GuestAddressSpace, GuestMemory}; use vmm_sys_util::eventfd::EventFd; use super::{Vm, VmError}; use crate::address_space_manager::{GuestAddressSpaceImpl, GuestMemoryImpl}; -use crate::error::Error; -use crate::StartMicrovmError; -use linux_loader::loader::Cmdline; +use crate::error::{Error, StartMicrovmError}; /// Configures the system and should be called once per vm before starting vcpu threads. /// For aarch64, we only setup the FDT. @@ -35,7 +34,7 @@ use linux_loader::loader::Cmdline; /// * `device_info` - A hashmap containing the attached devices for building FDT device nodes. /// * `gic_device` - The GIC device. /// * `initrd` - Information about an optional initrd. -pub fn configure_system( +fn configure_system( guest_mem: &M, cmdline: &str, vcpu_mpidr: Vec, @@ -62,6 +61,18 @@ impl Vm { &self.irqchip_handle.as_ref().unwrap() } + /// Creates the irq chip in-kernel device model. + pub fn setup_interrupt_controller(&mut self) -> std::result::Result<(), StartMicrovmError> { + let vcpu_count = self.vm_config.vcpu_count; + + self.irqchip_handle = Some( + dbs_arch::gic::create_gic(&self.vm_fd, vcpu_count.into()) + .map_err(|e| StartMicrovmError::ConfigureVm(VmError::SetupGIC(e)))?, + ); + + Ok(()) + } + /// Initialize the virtual machine instance. /// /// It initialize the virtual machine instance by: @@ -76,13 +87,7 @@ impl Vm { epoll_mgr: EpollManager, vm_as: GuestAddressSpaceImpl, request_ts: TimestampUs, - ) -> std::result::Result<(), StartMicrovmError> { - let kernel_loader_result = self.load_kernel(vm_as.memory().deref())?; - // On aarch64, the vCPUs need to be created (i.e call KVM_CREATE_VCPU) and configured before - // setting up the IRQ chip because the `KVM_CREATE_VCPU` ioctl will return error if the IRQCHIP - // was already initialized. - // Search for `kvm_arch_vcpu_create` in arch/arm/kvm/arm.c. - + ) -> Result<(), StartMicrovmError> { let reset_eventfd = EventFd::new(libc::EFD_NONBLOCK).map_err(|_| StartMicrovmError::EventFd)?; self.reset_eventfd = Some( @@ -90,33 +95,25 @@ impl Vm { .try_clone() .map_err(|_| StartMicrovmError::EventFd)?, ); - self.vcpu_manager() .map_err(StartMicrovmError::Vcpu)? .set_reset_event_fd(reset_eventfd); + + // On aarch64, the vCPUs need to be created (i.e call KVM_CREATE_VCPU) and configured before + // setting up the IRQ chip because the `KVM_CREATE_VCPU` ioctl will return error if the IRQCHIP + // was already initialized. + // Search for `kvm_arch_vcpu_create` in arch/arm/kvm/arm.c. + let kernel_loader_result = self.load_kernel(vm_as.memory().deref())?; self.vcpu_manager() .map_err(StartMicrovmError::Vcpu)? .create_boot_vcpus(request_ts, kernel_loader_result.kernel_load) .map_err(StartMicrovmError::Vcpu)?; - self.setup_interrupt_controller()?; self.init_devices(epoll_mgr)?; Ok(()) } - /// Creates the irq chip in-kernel device model. - pub fn setup_interrupt_controller(&mut self) -> std::result::Result<(), StartMicrovmError> { - let vcpu_count = self.vm_config.vcpu_count; - - self.irqchip_handle = Some( - dbs_arch::gic::create_gic(&self.fd, vcpu_count.into()) - .map_err(|e| StartMicrovmError::ConfigureVm(VmError::SetupGIC(e)))?, - ); - - Ok(()) - } - /// Execute system architecture specific configurations. /// /// 1) set guest kernel boot parameters @@ -133,8 +130,8 @@ impl Vm { .into_iter() .map(|cpu| cpu.get_mpidr()) .collect(); - let guest_memory = vm_memory.memory(); + configure_system( guest_memory, cmdline.as_str(), diff --git a/src/dragonball/src/vm/kernel_config.rs b/src/dragonball/src/vm/kernel_config.rs index 466fd8849..4798d8da3 100644 --- a/src/dragonball/src/vm/kernel_config.rs +++ b/src/dragonball/src/vm/kernel_config.rs @@ -8,7 +8,7 @@ pub struct KernelConfigInfo { /// The descriptor to the kernel file. kernel_file: File, /// The descriptor to the initrd file, if there is one - pub initrd_file: Option, + initrd_file: Option, /// The commandline for guest kernel. cmdline: linux_loader::cmdline::Cmdline, } @@ -32,6 +32,11 @@ impl KernelConfigInfo { &mut self.kernel_file } + /// Get an immutable reference to the initrd file. + pub fn initrd_file(&self) -> Option<&File> { + self.initrd_file.as_ref() + } + /// Get a mutable reference to the initrd file. pub fn initrd_file_mut(&mut self) -> Option<&mut File> { self.initrd_file.as_mut() diff --git a/src/dragonball/src/vm/mod.rs b/src/dragonball/src/vm/mod.rs index db10e593e..fff044e3a 100644 --- a/src/dragonball/src/vm/mod.rs +++ b/src/dragonball/src/vm/mod.rs @@ -10,8 +10,6 @@ use dbs_address_space::AddressSpace; #[cfg(target_arch = "aarch64")] use dbs_arch::gic::GICDevice; use dbs_boot::InitrdConfig; -#[cfg(feature = "hotplug")] -use dbs_upcall::{DevMgrService, UpcallClient}; use dbs_utils::epoll_manager::EpollManager; use dbs_utils::time::TimestampUs; use kvm_ioctls::VmFd; @@ -22,6 +20,9 @@ use slog::{error, info}; use vm_memory::{Bytes, GuestAddress, GuestAddressSpace}; use vmm_sys_util::eventfd::EventFd; +#[cfg(feature = "hotplug")] +use dbs_upcall::{DevMgrService, UpcallClient}; + use crate::address_space_manager::{ AddressManagerError, AddressSpaceMgr, AddressSpaceMgrBuilder, GuestAddressSpaceImpl, GuestMemoryImpl, @@ -61,7 +62,7 @@ pub enum VmError { /// Cannot setup GIC #[cfg(target_arch = "aarch64")] #[error("failed to configure GIC")] - SetupGIC(GICError), + SetupGIC(dbs_arch::gic::Error), } /// Configuration information for user defined NUMA nodes. @@ -169,21 +170,21 @@ impl Default for VmConfigInfo { /// | ^---1:N-> Vcpu /// |---<-1:N-> Event Manager pub struct Vm { - fd: Arc, + epoll_manager: EpollManager, kvm: KvmContext, + shared_info: Arc>, address_space: AddressSpaceMgr, device_manager: DeviceManager, - epoll_manager: EpollManager, + dmesg_fifo: Option>, + kernel_config: Option, + logger: slog::Logger, + reset_eventfd: Option, resource_manager: Arc, vcpu_manager: Option>>, - logger: slog::Logger, - /// Config of virtual machine vm_config: VmConfigInfo, - kernel_config: Option, - shared_info: Arc>, - reset_eventfd: Option, - dmesg_fifo: Option>, + vm_fd: Arc, + start_instance_request_ts: u64, start_instance_request_cpu_ts: u64, start_instance_downtime: u64, @@ -191,7 +192,7 @@ pub struct Vm { // Arm specific fields. // On aarch64 we need to keep around the fd obtained by creating the VGIC device. #[cfg(target_arch = "aarch64")] - irqchip_handle: Option>, + irqchip_handle: Option>, #[cfg(feature = "hotplug")] upcall_client: Option>>, @@ -206,36 +207,36 @@ impl Vm { ) -> Result { let id = api_shared_info.read().unwrap().id.clone(); let logger = slog_scope::logger().new(slog::o!("id" => id)); - let kvm = KvmContext::new(kvm_fd)?; - let fd = Arc::new(kvm.create_vm()?); - + let vm_fd = Arc::new(kvm.create_vm()?); let resource_manager = Arc::new(ResourceManager::new(Some(kvm.max_memslots()))); - let device_manager = DeviceManager::new( - fd.clone(), + vm_fd.clone(), resource_manager.clone(), epoll_manager.clone(), &logger, ); Ok(Vm { - fd, + epoll_manager, kvm, + shared_info: api_shared_info, + address_space: AddressSpaceMgr::default(), device_manager, - epoll_manager, + dmesg_fifo: None, + kernel_config: None, + logger, + reset_eventfd: None, resource_manager, vcpu_manager: None, - logger, vm_config: Default::default(), - kernel_config: None, - shared_info: api_shared_info, - reset_eventfd: None, - dmesg_fifo: None, + vm_fd, + start_instance_request_ts: 0, start_instance_request_cpu_ts: 0, start_instance_downtime: 0, + #[cfg(target_arch = "aarch64")] irqchip_handle: None, #[cfg(feature = "hotplug")] @@ -243,9 +244,29 @@ impl Vm { }) } - /// Gets a reference to the kvm file descriptor owned by this VM. - pub fn vm_fd(&self) -> &VmFd { - &self.fd + /// Gets a reference to the device manager by this VM. + pub fn device_manager(&self) -> &DeviceManager { + &self.device_manager + } + + /// Get a reference to EpollManager. + pub fn epoll_manager(&self) -> &EpollManager { + &self.epoll_manager + } + + /// Get eventfd for exit notification. + pub fn get_reset_eventfd(&self) -> Option<&EventFd> { + self.reset_eventfd.as_ref() + } + + /// Set guest kernel boot configurations. + pub fn set_kernel_config(&mut self, kernel_config: KernelConfigInfo) { + self.kernel_config = Some(kernel_config); + } + + /// Get virtual machine shared instance information. + pub fn shared_info(&self) -> &Arc> { + &self.shared_info } /// Gets a reference to the address_space.address_space for guest memory owned by this VM. @@ -253,11 +274,6 @@ impl Vm { self.address_space.get_address_space() } - /// Gets a reference to the device manager by this VM. - pub fn device_manager(&self) -> &DeviceManager { - &self.device_manager - } - /// Gets a reference to the address space for guest memory owned by this VM. /// /// Note that `GuestMemory` does not include any device memory that may have been added after @@ -276,24 +292,21 @@ impl Vm { self.vm_config = config; } - /// Set guest kernel boot configurations. - pub fn set_kernel_config(&mut self, kernel_config: KernelConfigInfo) { - self.kernel_config = Some(kernel_config); + /// Gets a reference to the kvm file descriptor owned by this VM. + pub fn vm_fd(&self) -> &VmFd { + &self.vm_fd } - /// Get virtual machine shared instance information. - pub fn shared_info(&self) -> &Arc> { - &self.shared_info - } + /// returns true if system upcall service is ready + pub fn is_upcall_client_ready(&self) -> bool { + #[cfg(feature = "hotplug")] + { + if let Some(upcall_client) = self.upcall_client() { + return upcall_client.is_ready(); + } + } - /// Get a reference to EpollManager. - pub fn epoll_manager(&self) -> &EpollManager { - &self.epoll_manager - } - - /// Get eventfd for exit notification. - pub fn get_reset_eventfd(&self) -> Option<&EventFd> { - self.reset_eventfd.as_ref() + false } /// Check whether the VM has been initialized. @@ -318,16 +331,16 @@ impl Vm { instance_state == InstanceState::Running } - /// returns true if system upcall service is ready - pub fn is_upcall_client_ready(&self) -> bool { - #[cfg(feature = "hotplug")] - { - if let Some(upcall_client) = self.upcall_client() { - return upcall_client.is_ready(); - } + /// Save VM instance exit state + pub fn vm_exit(&self, exit_code: i32) { + if let Ok(mut info) = self.shared_info.write() { + info.state = InstanceState::Exited(exit_code); + } else { + error!( + self.logger, + "Failed to save exit state, couldn't be written due to poisoned lock" + ); } - - false } /// Create device operation context. @@ -359,41 +372,6 @@ impl Vm { } } - /// Save VM instance exit state - pub fn vm_exit(&self, exit_code: i32) { - if let Ok(mut info) = self.shared_info.write() { - info.state = InstanceState::Exited(exit_code); - } else { - error!( - self.logger, - "Failed to save exit state, couldn't be written due to poisoned lock" - ); - } - } - - /// Reset the console into canonical mode. - pub fn reset_console(&self) -> std::result::Result<(), DeviceMgrError> { - self.device_manager.reset_console() - } - - fn get_dragonball_info(&self) -> (String, String) { - let guard = self.shared_info.read().unwrap(); - let instance_id = guard.id.clone(); - let dragonball_version = guard.vmm_version.clone(); - - (dragonball_version, instance_id) - } - - fn init_dmesg_logger(&mut self) { - let writer = self.dmesg_logger(); - self.dmesg_fifo = Some(writer); - } - - /// dmesg write to logger - pub fn dmesg_logger(&self) -> Box { - Box::new(DmesgWriter::new(self.logger.clone())) - } - pub(crate) fn check_health(&self) -> std::result::Result<(), StartMicrovmError> { if self.kernel_config.is_none() { return Err(StartMicrovmError::MissingKernelConfig); @@ -401,13 +379,23 @@ impl Vm { Ok(()) } + pub(crate) fn get_dragonball_info(&self) -> (String, String) { + let guard = self.shared_info.read().unwrap(); + let instance_id = guard.id.clone(); + let dragonball_version = guard.vmm_version.clone(); + + (dragonball_version, instance_id) + } +} + +impl Vm { pub(crate) fn init_vcpu_manager( &mut self, vm_as: GuestAddressSpaceImpl, vcpu_seccomp_filter: BpfProgram, ) -> std::result::Result<(), VcpuManagerError> { let vcpu_manager = VcpuManager::new( - self.fd.clone(), + self.vm_fd.clone(), &self.kvm, &self.vm_config, vm_as, @@ -422,7 +410,7 @@ impl Vm { } /// get the cpu manager's reference - pub fn vcpu_manager( + pub(crate) fn vcpu_manager( &self, ) -> std::result::Result, VcpuManagerError> { self.vcpu_manager @@ -448,69 +436,7 @@ impl Vm { Ok(()) } - pub(crate) fn init_guest_memory(&mut self) -> std::result::Result<(), StartMicrovmError> { - info!(self.logger, "VM: initializing guest memory..."); - - // We are not allowing reinitialization of vm guest memory. - if self.address_space.is_initialized() { - return Ok(()); - } - // vcpu boot up require local memory. reserve 100 MiB memory - let mem_size = (self.vm_config.mem_size_mib as u64) << 20; - let reserve_memory_bytes = self.vm_config.reserve_memory_bytes; - if reserve_memory_bytes > (mem_size >> 1) as u64 { - return Err(StartMicrovmError::ConfigureInvalid(String::from( - "invalid reserve_memory_bytes", - ))); - } - - let mem_type = self.vm_config.mem_type.clone(); - let mut mem_file_path = String::from(""); - if mem_type == "hugetlbfs" { - let shared_info = self.shared_info.read() - .expect("Failed to determine if instance is initialized because shared info couldn't be read due to poisoned lock"); - mem_file_path.push_str("/dragonball/"); - mem_file_path.push_str(shared_info.id.as_str()); - } - - // init default regions. - let mut numa_regions = Vec::with_capacity(1); - let mut vcpu_ids: Vec = Vec::new(); - - for i in 0..self.vm_config().max_vcpu_count { - vcpu_ids.push(i as u32); - } - let numa_node = NumaRegionInfo { - size: self.vm_config.mem_size_mib as u64, - host_numa_node_id: None, - guest_numa_node_id: Some(0), - vcpu_ids, - }; - numa_regions.push(numa_node); - - info!( - self.logger, - "VM: mem_type:{} mem_file_path:{}, mem_size:{}, reserve_memory_bytes:{}, \ - numa_regions:{:?}", - mem_type, - mem_file_path, - mem_size, - reserve_memory_bytes, - numa_regions, - ); - - let mut address_space_param = AddressSpaceMgrBuilder::new(&mem_type, &mem_file_path) - .map_err(StartMicrovmError::AddressManagerError)?; - address_space_param.set_kvm_vm_fd(self.fd.clone()); - self.address_space - .create_address_space(&self.resource_manager, &numa_regions, address_space_param) - .map_err(StartMicrovmError::AddressManagerError)?; - - info!(self.logger, "VM: initializing guest memory done"); - Ok(()) - } - - fn init_devices( + pub(crate) fn init_devices( &mut self, epoll_manager: EpollManager, ) -> std::result::Result<(), StartMicrovmError> { @@ -567,36 +493,108 @@ impl Vm { .map_err(StopMicrovmError::DeviceManager) } - fn load_kernel( - &mut self, - vm_memory: &GuestMemoryImpl, - ) -> std::result::Result { - // This is the easy way out of consuming the value of the kernel_cmdline. + /// Reset the console into canonical mode. + pub fn reset_console(&self) -> std::result::Result<(), DeviceMgrError> { + self.device_manager.reset_console() + } + pub(crate) fn init_dmesg_logger(&mut self) { + let writer = self.dmesg_logger(); + self.dmesg_fifo = Some(writer); + } + + /// dmesg write to logger + fn dmesg_logger(&self) -> Box { + Box::new(DmesgWriter::new(&self.logger)) + } + + pub(crate) fn init_guest_memory(&mut self) -> std::result::Result<(), StartMicrovmError> { + info!(self.logger, "VM: initializing guest memory..."); + // We are not allowing reinitialization of vm guest memory. + if self.address_space.is_initialized() { + return Ok(()); + } + + // vcpu boot up require local memory. reserve 100 MiB memory + let mem_size = (self.vm_config.mem_size_mib as u64) << 20; + let reserve_memory_bytes = self.vm_config.reserve_memory_bytes; + if reserve_memory_bytes > (mem_size >> 1) as u64 { + return Err(StartMicrovmError::ConfigureInvalid(String::from( + "invalid reserve_memory_bytes", + ))); + } + + let mem_type = self.vm_config.mem_type.clone(); + let mut mem_file_path = String::from(""); + if mem_type == "hugetlbfs" { + let shared_info = self.shared_info.read() + .expect("Failed to determine if instance is initialized because shared info couldn't be read due to poisoned lock"); + mem_file_path.push_str("/dragonball/"); + mem_file_path.push_str(shared_info.id.as_str()); + } + + let mut vcpu_ids: Vec = Vec::new(); + for i in 0..self.vm_config().max_vcpu_count { + vcpu_ids.push(i as u32); + } + + // init default regions. + let mut numa_regions = Vec::with_capacity(1); + let numa_node = NumaRegionInfo { + size: self.vm_config.mem_size_mib as u64, + host_numa_node_id: None, + guest_numa_node_id: Some(0), + vcpu_ids, + }; + numa_regions.push(numa_node); + + info!( + self.logger, + "VM: mem_type:{} mem_file_path:{}, mem_size:{}, reserve_memory_bytes:{}, \ + numa_regions:{:?}", + mem_type, + mem_file_path, + mem_size, + reserve_memory_bytes, + numa_regions, + ); + + let mut address_space_param = AddressSpaceMgrBuilder::new(&mem_type, &mem_file_path) + .map_err(StartMicrovmError::AddressManagerError)?; + address_space_param.set_kvm_vm_fd(self.vm_fd.clone()); + self.address_space + .create_address_space(&self.resource_manager, &numa_regions, address_space_param) + .map_err(StartMicrovmError::AddressManagerError)?; + + info!(self.logger, "VM: initializing guest memory done"); + Ok(()) + } + + fn init_configure_system( + &mut self, + vm_as: &GuestAddressSpaceImpl, + ) -> std::result::Result<(), StartMicrovmError> { + let vm_memory = vm_as.memory(); let kernel_config = self .kernel_config - .as_mut() + .as_ref() .ok_or(StartMicrovmError::MissingKernelConfig)?; + //let cmdline = kernel_config.cmdline.clone(); + let initrd: Option = match kernel_config.initrd_file() { + Some(f) => { + let initrd_file = f.try_clone(); + if initrd_file.is_err() { + return Err(StartMicrovmError::InitrdLoader( + LoadInitrdError::ReadInitrd(io::Error::from(io::ErrorKind::InvalidData)), + )); + } + let res = self.load_initrd(vm_memory.deref(), &mut initrd_file.unwrap())?; + Some(res) + } + None => None, + }; - let high_mem_addr = GuestAddress(dbs_boot::get_kernel_start()); - - #[cfg(target_arch = "x86_64")] - return linux_loader::loader::elf::Elf::load( - vm_memory, - None, - kernel_config.kernel_file_mut(), - Some(high_mem_addr), - ) - .map_err(StartMicrovmError::KernelLoader); - - #[cfg(target_arch = "aarch64")] - return linux_loader::loader::pe::PE::load( - vm_memory, - Some(GuestAddress(dbs_boot::get_kernel_start())), - kernel_config.kernel_file_mut(), - Some(high_mem_addr), - ) - .map_err(StartMicrovmError::KernelLoader); + self.configure_system_arch(vm_memory.deref(), kernel_config.kernel_cmdline(), initrd) } /// Loads the initrd from a file into the given memory slice. @@ -644,49 +642,46 @@ impl Vm { }) } - fn init_configure_system( + fn load_kernel( &mut self, - vm_as: &GuestAddressSpaceImpl, - ) -> std::result::Result<(), StartMicrovmError> { - let vm_memory = vm_as.memory(); + vm_memory: &GuestMemoryImpl, + ) -> std::result::Result { + // This is the easy way out of consuming the value of the kernel_cmdline. let kernel_config = self .kernel_config - .as_ref() + .as_mut() .ok_or(StartMicrovmError::MissingKernelConfig)?; - //let cmdline = kernel_config.cmdline.clone(); - let initrd: Option = match &kernel_config.initrd_file { - Some(f) => { - let initrd_file = f.try_clone(); - if initrd_file.is_err() { - return Err(StartMicrovmError::InitrdLoader( - LoadInitrdError::ReadInitrd(io::Error::from(io::ErrorKind::InvalidData)), - )); - } - let res = self.load_initrd(vm_memory.deref(), &mut initrd_file.unwrap())?; - Some(res) - } - None => None, - }; + let high_mem_addr = GuestAddress(dbs_boot::get_kernel_start()); - self.configure_system_arch(vm_memory.deref(), kernel_config.kernel_cmdline(), initrd) + #[cfg(target_arch = "x86_64")] + return linux_loader::loader::elf::Elf::load( + vm_memory, + None, + kernel_config.kernel_file_mut(), + Some(high_mem_addr), + ) + .map_err(StartMicrovmError::KernelLoader); + + #[cfg(target_arch = "aarch64")] + return linux_loader::loader::pe::PE::load( + vm_memory, + Some(GuestAddress(dbs_boot::get_kernel_start())), + kernel_config.kernel_file_mut(), + Some(high_mem_addr), + ) + .map_err(StartMicrovmError::KernelLoader); } } #[cfg(feature = "hotplug")] impl Vm { - /// Get upcall client. - pub fn upcall_client(&self) -> &Option>> { - &self.upcall_client - } - /// initialize upcall client for guest os - fn init_upcall(&mut self) -> std::result::Result<(), StartMicrovmError> { + pub(crate) fn init_upcall(&mut self) -> std::result::Result<(), StartMicrovmError> { // get vsock inner connector for upcall let inner_connector = self .device_manager .get_vsock_inner_connector() .ok_or(StartMicrovmError::UpcallMissVsock)?; - let mut upcall_client = UpcallClient::new( inner_connector, self.epoll_manager.clone(), @@ -697,10 +692,14 @@ impl Vm { upcall_client .connect() .map_err(StartMicrovmError::UpcallConnectError)?; - self.upcall_client = Some(Arc::new(upcall_client)); info!(self.logger, "upcall client init success"); Ok(()) } + + /// Get upcall client. + pub fn upcall_client(&self) -> &Option>> { + &self.upcall_client + } } diff --git a/src/dragonball/src/vm/x86_64.rs b/src/dragonball/src/vm/x86_64.rs index a55996282..74336b6da 100644 --- a/src/dragonball/src/vm/x86_64.rs +++ b/src/dragonball/src/vm/x86_64.rs @@ -15,6 +15,7 @@ use dbs_boot::{add_e820_entry, bootparam, layout, mptable, BootParamsWrapper, In use dbs_utils::epoll_manager::EpollManager; use dbs_utils::time::TimestampUs; use kvm_bindings::{kvm_irqchip, kvm_pit_config, kvm_pit_state2, KVM_PIT_SPEAKER_DUMMY}; +use linux_loader::cmdline::Cmdline; use slog::info; use vm_memory::{Address, Bytes, GuestAddress, GuestAddressSpace, GuestMemory}; @@ -22,8 +23,6 @@ use crate::address_space_manager::{GuestAddressSpaceImpl, GuestMemoryImpl}; use crate::error::{Error, Result, StartMicrovmError}; use crate::vm::{Vm, VmError}; -use linux_loader::cmdline::Cmdline; - /// Configures the system and should be called once per vm before starting vcpu /// threads. /// @@ -40,7 +39,7 @@ use linux_loader::cmdline::Cmdline; /// * `max_cpus` - Max number of virtual CPUs the guest will have. /// * `rsv_mem_bytes` - Reserve memory from microVM.. #[allow(clippy::too_many_arguments)] -pub fn configure_system( +fn configure_system( guest_mem: &M, address_space: Option<&AddressSpace>, cmdline_addr: GuestAddress, @@ -136,12 +135,14 @@ pub fn configure_system( impl Vm { /// Get the status of in-kernel PIT. pub fn get_pit_state(&self) -> Result { - self.fd.get_pit2().map_err(|e| Error::Vm(VmError::Irq(e))) + self.vm_fd + .get_pit2() + .map_err(|e| Error::Vm(VmError::Irq(e))) } /// Set the status of in-kernel PIT. pub fn set_pit_state(&self, pit_state: &kvm_pit_state2) -> Result<()> { - self.fd + self.vm_fd .set_pit2(pit_state) .map_err(|e| Error::Vm(VmError::Irq(e))) } @@ -152,7 +153,7 @@ impl Vm { chip_id, ..kvm_irqchip::default() }; - self.fd + self.vm_fd .get_irqchip(&mut irqchip) .map(|_| irqchip) .map_err(|e| Error::Vm(VmError::Irq(e))) @@ -160,7 +161,7 @@ impl Vm { /// Set the status of in-kernel ioapic. pub fn set_irqchip_state(&self, irqchip: &kvm_irqchip) -> Result<()> { - self.fd + self.vm_fd .set_irqchip(irqchip) .map_err(|e| Error::Vm(VmError::Irq(e))) } @@ -204,7 +205,6 @@ impl Vm { let vm_memory = vm_as.memory(); let kernel_loader_result = self.load_kernel(vm_memory.deref())?; - self.vcpu_manager() .map_err(StartMicrovmError::Vcpu)? .create_boot_vcpus(request_ts, kernel_loader_result.kernel_load) @@ -243,7 +243,7 @@ impl Vm { /// Initializes the guest memory. pub(crate) fn init_tss(&mut self) -> std::result::Result<(), StartMicrovmError> { - self.fd + self.vm_fd .set_tss_address(dbs_boot::layout::KVM_TSS_ADDRESS.try_into().unwrap()) .map_err(|e| StartMicrovmError::ConfigureVm(VmError::VmSetup(e))) } @@ -252,7 +252,7 @@ impl Vm { pub(crate) fn setup_interrupt_controller( &mut self, ) -> std::result::Result<(), StartMicrovmError> { - self.fd + self.vm_fd .create_irq_chip() .map_err(|e| StartMicrovmError::ConfigureVm(VmError::VmSetup(e))) } @@ -269,7 +269,7 @@ impl Vm { // Safe because we know that our file is a VM fd, we know the kernel will only read the // correct amount of memory from our pointer, and we verify the return result. - self.fd + self.vm_fd .create_pit2(pit_config) .map_err(|e| StartMicrovmError::ConfigureVm(VmError::VmSetup(e))) } From 5c1ccc376b5c46414c6fcda7d6183cfb274d16d6 Mon Sep 17 00:00:00 2001 From: wllenyj Date: Sun, 15 May 2022 23:45:56 +0800 Subject: [PATCH 04/24] dragonball: add Vmm struct The Vmm struct is global coordinator to manage API servers, virtual machines etc. Signed-off-by: wllenyj --- src/dragonball/src/api/v1/boot_source.rs | 59 +++++++ src/dragonball/src/api/v1/mod.rs | 10 ++ src/dragonball/src/api/v1/vmm_action.rs | 148 ++++++++++++++++ src/dragonball/src/error.rs | 21 +++ src/dragonball/src/event_manager.rs | 169 ++++++++++++++++++ src/dragonball/src/lib.rs | 5 + src/dragonball/src/vmm.rs | 215 +++++++++++++++++++++++ 7 files changed, 627 insertions(+) create mode 100644 src/dragonball/src/api/v1/boot_source.rs create mode 100644 src/dragonball/src/api/v1/vmm_action.rs create mode 100644 src/dragonball/src/event_manager.rs create mode 100644 src/dragonball/src/vmm.rs diff --git a/src/dragonball/src/api/v1/boot_source.rs b/src/dragonball/src/api/v1/boot_source.rs new file mode 100644 index 000000000..e7de03043 --- /dev/null +++ b/src/dragonball/src/api/v1/boot_source.rs @@ -0,0 +1,59 @@ +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use serde_derive::{Deserialize, Serialize}; + +/// Default guest kernel command line: +/// - `reboot=k` shut down the guest on reboot, instead of well... rebooting; +/// - `panic=1` on panic, reboot after 1 second; +/// - `pci=off` do not scan for PCI devices (ser boot time); +/// - `nomodules` disable loadable kernel module support; +/// - `8250.nr_uarts=0` disable 8250 serial interface; +/// - `i8042.noaux` do not probe the i8042 controller for an attached mouse (ser boot time); +/// - `i8042.nomux` do not probe i8042 for a multiplexing controller (ser boot time); +/// - `i8042.nopnp` do not use ACPIPnP to discover KBD/AUX controllers (ser boot time); +/// - `i8042.dumbkbd` do not attempt to control kbd state via the i8042 (ser boot time). +pub const DEFAULT_KERNEL_CMDLINE: &str = "reboot=k panic=1 pci=off nomodules 8250.nr_uarts=0 \ + i8042.noaux i8042.nomux i8042.nopnp i8042.dumbkbd"; + +/// Strongly typed data structure used to configure the boot source of the microvm. +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize, Default)] +#[serde(deny_unknown_fields)] +pub struct BootSourceConfig { + /// Path of the kernel image. + /// We only support uncompressed kernel for Dragonball. + pub kernel_path: String, + /// Path of the initrd, if there is one. + /// ps. rootfs is set in BlockDeviceConfigInfo + pub initrd_path: Option, + /// The boot arguments to pass to the kernel. + #[serde(skip_serializing_if = "Option::is_none")] + pub boot_args: Option, +} + +/// Errors associated with actions on `BootSourceConfig`. +#[derive(Debug, thiserror::Error)] +pub enum BootSourceConfigError { + /// The virutal machine instance ID is invalid. + #[error("the virtual machine instance ID is invalid")] + InvalidVMID, + + /// The kernel file cannot be opened. + #[error( + "the kernel file cannot be opened due to invalid kernel path or invalid permissions: {0}" + )] + InvalidKernelPath(#[source] std::io::Error), + + /// The initrd file cannot be opened. + #[error("the initrd file cannot be opened due to invalid path or invalid permissions: {0}")] + InvalidInitrdPath(#[source] std::io::Error), + + /// The kernel command line is invalid. + #[error("the kernel command line is invalid: {0}")] + InvalidKernelCommandLine(#[source] linux_loader::cmdline::Error), + + /// The boot source cannot be update post boot. + #[error("the update operation is not allowed after boot")] + UpdateNotAllowedPostBoot, +} diff --git a/src/dragonball/src/api/v1/mod.rs b/src/dragonball/src/api/v1/mod.rs index f25fb8436..c1ab3f5d3 100644 --- a/src/dragonball/src/api/v1/mod.rs +++ b/src/dragonball/src/api/v1/mod.rs @@ -3,5 +3,15 @@ //! API Version 1 related data structures to configure the vmm. +mod vmm_action; +pub use self::vmm_action::{ + VmmAction, VmmActionError, VmmData, VmmRequest, VmmResponse, VmmService, +}; + +/// Wrapper for configuring the microVM boot source. +mod boot_source; +pub use self::boot_source::{BootSourceConfig, BootSourceConfigError, DEFAULT_KERNEL_CMDLINE}; + +/// Wrapper over the microVM general information. mod instance_info; pub use self::instance_info::{InstanceInfo, InstanceState}; diff --git a/src/dragonball/src/api/v1/vmm_action.rs b/src/dragonball/src/api/v1/vmm_action.rs new file mode 100644 index 000000000..1d7ac7e63 --- /dev/null +++ b/src/dragonball/src/api/v1/vmm_action.rs @@ -0,0 +1,148 @@ +// Copyright (C) 2020-2022 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +use std::fs::File; +use std::sync::mpsc::{Receiver, Sender, TryRecvError}; + +use log::{debug, error, warn}; +use vmm_sys_util::eventfd::EventFd; + +use crate::error::Result; +use crate::event_manager::EventManager; +use crate::vm::{KernelConfigInfo, VmConfigInfo}; +use crate::vmm::Vmm; + +use super::*; + +/// Wrapper for all errors associated with VMM actions. +#[derive(Debug, thiserror::Error)] +pub enum VmmActionError { + /// The action `ConfigureBootSource` failed either because of bad user input or an internal + /// error. + #[error("failed to configure boot source for VM: {0}")] + BootSource(#[source] BootSourceConfigError), +} + +/// This enum represents the public interface of the VMM. Each action contains various +/// bits of information (ids, paths, etc.). +#[derive(Clone, Debug, PartialEq)] +pub enum VmmAction { + /// Configure the boot source of the microVM using as input the `ConfigureBootSource`. This + /// action can only be called before the microVM has booted. + ConfigureBootSource(BootSourceConfig), +} + +/// The enum represents the response sent by the VMM in case of success. The response is either +/// empty, when no data needs to be sent, or an internal VMM structure. +#[derive(Debug)] +pub enum VmmData { + /// No data is sent on the channel. + Empty, +} + +/// Request data type used to communicate between the API and the VMM. +pub type VmmRequest = Box; + +/// Data type used to communicate between the API and the VMM. +pub type VmmRequestResult = std::result::Result; + +/// Response data type used to communicate between the API and the VMM. +pub type VmmResponse = Box; + +/// VMM Service to handle requests from the API server. +/// +/// There are two levels of API servers as below: +/// API client <--> VMM API Server <--> VMM Core +pub struct VmmService { + from_api: Receiver, + to_api: Sender, + machine_config: VmConfigInfo, +} + +impl VmmService { + /// Create a new VMM API server instance. + pub fn new(from_api: Receiver, to_api: Sender) -> Self { + VmmService { + from_api, + to_api, + machine_config: VmConfigInfo::default(), + } + } + + /// Handle requests from the HTTP API Server and send back replies. + pub fn run_vmm_action(&mut self, vmm: &mut Vmm, _event_mgr: &mut EventManager) -> Result<()> { + let request = match self.from_api.try_recv() { + Ok(t) => *t, + Err(TryRecvError::Empty) => { + warn!("Got a spurious notification from api thread"); + return Ok(()); + } + Err(TryRecvError::Disconnected) => { + panic!("The channel's sending half was disconnected. Cannot receive data."); + } + }; + debug!("receive vmm action: {:?}", request); + + let response = match request { + VmmAction::ConfigureBootSource(boot_source_body) => { + self.configure_boot_source(vmm, boot_source_body) + } + }; + + debug!("send vmm response: {:?}", response); + self.send_response(response) + } + + fn send_response(&self, result: VmmRequestResult) -> Result<()> { + self.to_api + .send(Box::new(result)) + .map_err(|_| ()) + .expect("vmm: one-shot API result channel has been closed"); + + Ok(()) + } + + fn configure_boot_source( + &self, + vmm: &mut Vmm, + boot_source_config: BootSourceConfig, + ) -> VmmRequestResult { + use super::BootSourceConfigError::{ + InvalidInitrdPath, InvalidKernelCommandLine, InvalidKernelPath, InvalidVMID, + UpdateNotAllowedPostBoot, + }; + use super::VmmActionError::BootSource; + + let vm = vmm.get_vm_by_id_mut("").ok_or(BootSource(InvalidVMID))?; + if vm.is_vm_initialized() { + return Err(BootSource(UpdateNotAllowedPostBoot)); + } + + let kernel_file = File::open(&boot_source_config.kernel_path) + .map_err(|e| BootSource(InvalidKernelPath(e)))?; + + let initrd_file = match boot_source_config.initrd_path { + None => None, + Some(ref path) => Some(File::open(path).map_err(|e| BootSource(InvalidInitrdPath(e)))?), + }; + + let mut cmdline = linux_loader::cmdline::Cmdline::new(dbs_boot::layout::CMDLINE_MAX_SIZE); + let boot_args = boot_source_config + .boot_args + .clone() + .unwrap_or_else(|| String::from(DEFAULT_KERNEL_CMDLINE)); + cmdline + .insert_str(boot_args) + .map_err(|e| BootSource(InvalidKernelCommandLine(e)))?; + + let kernel_config = KernelConfigInfo::new(kernel_file, initrd_file, cmdline); + vm.set_kernel_config(kernel_config); + + Ok(VmmData::Empty) + } +} diff --git a/src/dragonball/src/error.rs b/src/dragonball/src/error.rs index c50e50b25..9cb27fdd7 100644 --- a/src/dragonball/src/error.rs +++ b/src/dragonball/src/error.rs @@ -178,3 +178,24 @@ pub enum LoadInitrdError { #[error("failed to read the initrd image: {0}")] ReadInitrd(#[source] std::io::Error), } + +/// A dedicated error type to glue with the vmm_epoll crate. +#[derive(Debug, thiserror::Error)] +pub enum EpollError { + /// Generic internal error. + #[error("unclassfied internal error")] + InternalError, + + /// Errors from the epoll subsystem. + #[error("failed to issue epoll syscall: {0}")] + EpollMgr(#[from] dbs_utils::epoll_manager::Error), + + /// Generic IO errors. + #[error(transparent)] + IOError(std::io::Error), + + #[cfg(feature = "dbs-virtio-devices")] + /// Errors from virtio devices. + #[error("failed to manager Virtio device: {0}")] + VirtIoDevice(#[source] VirtIoError), +} diff --git a/src/dragonball/src/event_manager.rs b/src/dragonball/src/event_manager.rs new file mode 100644 index 000000000..cd0da10c8 --- /dev/null +++ b/src/dragonball/src/event_manager.rs @@ -0,0 +1,169 @@ +// Copyright (C) 2020-2022 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +//! Event manager to manage and handle IO events and requests from API server . + +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::{Arc, Mutex}; + +use dbs_utils::epoll_manager::{ + EpollManager, EventOps, EventSet, Events, MutEventSubscriber, SubscriberId, +}; +use log::{error, warn}; +use vmm_sys_util::eventfd::EventFd; + +use crate::error::{EpollError, Result}; +use crate::vmm::Vmm; + +// Statically assigned epoll slot for VMM events. +pub(crate) const EPOLL_EVENT_EXIT: u32 = 0; +pub(crate) const EPOLL_EVENT_API_REQUEST: u32 = 1; + +/// Shared information between vmm::vmm_thread_event_loop() and VmmEpollHandler. +pub(crate) struct EventContext { + pub api_event_fd: EventFd, + pub api_event_flag: bool, + pub exit_evt_flag: bool, +} + +impl EventContext { + /// Create a new instance of [`EventContext`]. + pub fn new(api_event_fd: EventFd) -> Result { + Ok(EventContext { + api_event_fd, + api_event_flag: false, + exit_evt_flag: false, + }) + } +} + +/// Event manager for VMM to handle API requests and IO events. +pub struct EventManager { + epoll_mgr: EpollManager, + subscriber_id: SubscriberId, + vmm_event_count: Arc, +} + +impl Drop for EventManager { + fn drop(&mut self) { + // Vmm -> Vm -> EpollManager -> VmmEpollHandler -> Vmm + // We need to remove VmmEpollHandler to break the circular reference + // so that Vmm can drop. + self.epoll_mgr + .remove_subscriber(self.subscriber_id) + .map_err(|e| { + error!("event_manager: remove_subscriber err. {:?}", e); + e + }) + .ok(); + } +} + +impl EventManager { + /// Create a new event manager associated with the VMM object. + pub fn new(vmm: &Arc>, epoll_mgr: EpollManager) -> Result { + let vmm_event_count = Arc::new(AtomicUsize::new(0)); + let handler: Box = Box::new(VmmEpollHandler { + vmm: vmm.clone(), + vmm_event_count: vmm_event_count.clone(), + }); + let subscriber_id = epoll_mgr.add_subscriber(handler); + + Ok(EventManager { + epoll_mgr, + subscriber_id, + vmm_event_count, + }) + } + + /// Get the underlying epoll event manager. + pub fn epoll_manager(&self) -> EpollManager { + self.epoll_mgr.clone() + } + + /// Registry the eventfd for exit notification. + pub fn register_exit_eventfd( + &mut self, + exit_evt: &EventFd, + ) -> std::result::Result<(), EpollError> { + let events = Events::with_data(exit_evt, EPOLL_EVENT_EXIT, EventSet::IN); + + self.epoll_mgr + .add_event(self.subscriber_id, events) + .map_err(EpollError::EpollMgr) + } + + /// Poll pending events and invoke registered event handler. + /// + /// # Arguments: + /// * max_events: maximum number of pending events to handle + /// * timeout: maximum time in milliseconds to wait + pub fn handle_events(&self, timeout: i32) -> std::result::Result { + self.epoll_mgr + .handle_events(timeout) + .map_err(EpollError::EpollMgr) + } + + /// Fetch the VMM event count and reset it to zero. + pub fn fetch_vmm_event_count(&self) -> usize { + self.vmm_event_count.swap(0, Ordering::AcqRel) + } +} + +struct VmmEpollHandler { + vmm: Arc>, + vmm_event_count: Arc, +} + +impl MutEventSubscriber for VmmEpollHandler { + fn process(&mut self, events: Events, _ops: &mut EventOps) { + // Do not try to recover when the lock has already been poisoned. + // And be careful to avoid deadlock between process() and vmm::vmm_thread_event_loop(). + let mut vmm = self.vmm.lock().unwrap(); + + match events.data() { + EPOLL_EVENT_API_REQUEST => { + if let Err(e) = vmm.event_ctx.api_event_fd.read() { + error!("event_manager: failed to read API eventfd, {:?}", e); + } + vmm.event_ctx.api_event_flag = true; + self.vmm_event_count.fetch_add(1, Ordering::AcqRel); + } + EPOLL_EVENT_EXIT => { + let vm = vmm.get_vm_by_id("").unwrap(); + match vm.get_reset_eventfd() { + Some(ev) => { + if let Err(e) = ev.read() { + error!("event_manager: failed to read exit eventfd, {:?}", e); + } + } + None => warn!("event_manager: leftover exit event in epoll context!"), + } + vmm.event_ctx.exit_evt_flag = true; + self.vmm_event_count.fetch_add(1, Ordering::AcqRel); + } + _ => error!("event_manager: unknown epoll slot number {}", events.data()), + } + } + + fn init(&mut self, ops: &mut EventOps) { + // Do not expect poisoned lock. + let vmm = self.vmm.lock().unwrap(); + let events = Events::with_data( + &vmm.event_ctx.api_event_fd, + EPOLL_EVENT_API_REQUEST, + EventSet::IN, + ); + if let Err(e) = ops.add(events) { + error!( + "event_manager: failed to register epoll event for API server, {:?}", + e + ); + } + } +} diff --git a/src/dragonball/src/lib.rs b/src/dragonball/src/lib.rs index 6bf0b9298..bd58159ac 100644 --- a/src/dragonball/src/lib.rs +++ b/src/dragonball/src/lib.rs @@ -32,8 +32,13 @@ pub mod vcpu; /// Virtual machine manager for virtual machines. pub mod vm; +mod event_manager; mod io_manager; +mod vmm; + +pub use self::error::StartMicrovmError; pub use self::io_manager::IoManagerCached; +pub use self::vmm::Vmm; /// Success exit code. pub const EXIT_CODE_OK: u8 = 0; diff --git a/src/dragonball/src/vmm.rs b/src/dragonball/src/vmm.rs new file mode 100644 index 000000000..a2c75ff71 --- /dev/null +++ b/src/dragonball/src/vmm.rs @@ -0,0 +1,215 @@ +// Copyright (C) 2020-2022 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +use std::os::unix::io::RawFd; +use std::sync::{Arc, Mutex, RwLock}; + +use dbs_utils::epoll_manager::EpollManager; +use log::{error, info, warn}; +use seccompiler::BpfProgram; +use vmm_sys_util::eventfd::EventFd; + +use crate::api::v1::{InstanceInfo, VmmService}; +use crate::error::{EpollError, Result}; +use crate::event_manager::{EventContext, EventManager}; +use crate::vm::Vm; +use crate::{EXIT_CODE_GENERIC_ERROR, EXIT_CODE_OK}; + +/// Global coordinator to manage API servers, virtual machines, upgrade etc. +/// +/// Originally firecracker assumes an VMM only manages an VM, and doesn't distinguish VMM and VM. +/// Thus caused a mixed and confusion design. Now we have explicit build the object model as: +/// |---Vmm API Server--<-1:1-> HTTP API Server +/// | |----------<-1:1-> Shimv2/CRI API Server +/// | +/// Vmm <-1:N-> Vm <-1:1-> Address Space Manager <-1:N-> GuestMemory +/// ^ ^---1:1-> Device Manager <-1:N-> Device +/// | ^---1:1-> Resource Manager +/// | ^---1:N-> Vcpu +/// |---<-1:N-> Event Manager +pub struct Vmm { + pub(crate) event_ctx: EventContext, + epoll_manager: EpollManager, + + // Will change to a HashMap when enabling 1 VMM with multiple VMs. + vm: Vm, + + vcpu_seccomp_filter: BpfProgram, + vmm_seccomp_filter: BpfProgram, +} + +impl Vmm { + /// Create a Virtual Machine Monitor instance. + pub fn new( + api_shared_info: Arc>, + api_event_fd: EventFd, + vmm_seccomp_filter: BpfProgram, + vcpu_seccomp_filter: BpfProgram, + kvm_fd: Option, + ) -> Result { + let epoll_manager = EpollManager::default(); + Self::new_with_epoll_manager( + api_shared_info, + api_event_fd, + epoll_manager, + vmm_seccomp_filter, + vcpu_seccomp_filter, + kvm_fd, + ) + } + + /// Create a Virtual Machine Monitor instance with a epoll_manager. + pub fn new_with_epoll_manager( + api_shared_info: Arc>, + api_event_fd: EventFd, + epoll_manager: EpollManager, + vmm_seccomp_filter: BpfProgram, + vcpu_seccomp_filter: BpfProgram, + kvm_fd: Option, + ) -> Result { + let vm = Vm::new(kvm_fd, api_shared_info, epoll_manager.clone())?; + let event_ctx = EventContext::new(api_event_fd)?; + + Ok(Vmm { + event_ctx, + epoll_manager, + vm, + vcpu_seccomp_filter, + vmm_seccomp_filter, + }) + } + + /// Get a reference to a virtual machine managed by the VMM. + pub fn get_vm_by_id(&self, _id: &str) -> Option<&Vm> { + Some(&self.vm) + } + + /// Get a mutable reference to a virtual machine managed by the VMM. + pub fn get_vm_by_id_mut(&mut self, _id: &str) -> Option<&mut Vm> { + Some(&mut self.vm) + } + + /// Get the seccomp rules for vCPU threads. + pub fn vcpu_seccomp_filter(&self) -> BpfProgram { + self.vcpu_seccomp_filter.clone() + } + + /// Get the seccomp rules for VMM threads. + pub fn vmm_seccomp_filter(&self) -> BpfProgram { + self.vmm_seccomp_filter.clone() + } + + /// Run the event loop to service API requests. + /// + /// # Arguments + /// + /// * `vmm` - An Arc reference to the global Vmm instance. + /// * `service` - VMM Service provider. + pub fn run_vmm_event_loop(vmm: Arc>, mut service: VmmService) -> i32 { + let epoll_mgr = vmm.lock().unwrap().epoll_manager.clone(); + let mut event_mgr = + EventManager::new(&vmm, epoll_mgr).expect("Cannot create epoll manager"); + + 'poll: loop { + match event_mgr.handle_events(-1) { + Ok(_) => { + // Check whether there are pending vmm events. + if event_mgr.fetch_vmm_event_count() == 0 { + continue; + } + + let mut v = vmm.lock().unwrap(); + if v.event_ctx.api_event_flag { + // The run_vmm_action() needs to access event_mgr, so it could + // not be handled in EpollHandler::handle_events(). It has been + // delayed to the main loop. + v.event_ctx.api_event_flag = false; + service + .run_vmm_action(&mut v, &mut event_mgr) + .unwrap_or_else(|_| { + warn!("got spurious notification from api thread"); + }); + } + if v.event_ctx.exit_evt_flag { + info!("Gracefully terminated VMM control loop"); + return v.stop(EXIT_CODE_OK as i32); + } + } + Err(e) => { + error!("Abruptly exited VMM control loop: {:?}", e); + if let EpollError::EpollMgr(dbs_utils::epoll_manager::Error::Epoll(e)) = e { + if e.errno() == libc::EAGAIN || e.errno() == libc::EINTR { + continue 'poll; + } + } + return vmm.lock().unwrap().stop(EXIT_CODE_GENERIC_ERROR as i32); + } + } + } + } + + /// Waits for all vCPUs to exit and terminates the Dragonball process. + fn stop(&mut self, exit_code: i32) -> i32 { + info!("Vmm is stopping."); + if let Some(vm) = self.get_vm_by_id_mut("") { + if vm.is_vm_initialized() { + if let Err(e) = vm.remove_devices() { + warn!("failed to remove devices: {:?}", e); + } + + if let Err(e) = vm.reset_console() { + warn!("Cannot set canonical mode for the terminal. {:?}", e); + } + + // Now, we use exit_code instead of invoking _exit to + // terminate process, so all of vcpu threads should be stopped + // prior to vmm event loop. + match vm.vcpu_manager() { + Ok(mut mgr) => { + if let Err(e) = mgr.exit_all_vcpus() { + warn!("Failed to exit vcpu thread. {:?}", e); + } + } + Err(e) => warn!("Failed to get vcpu manager {:?}", e), + } + + // save exit state to VM, instead of exit process. + vm.vm_exit(exit_code); + } + } + + exit_code + } +} + +#[cfg(test)] +pub(crate) mod tests { + use super::*; + + pub fn create_vmm_instance() -> Vmm { + let info = Arc::new(RwLock::new(InstanceInfo::default())); + let event_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap(); + let seccomp_filter: BpfProgram = Vec::new(); + let epoll_manager = EpollManager::default(); + + Vmm::new_with_epoll_manager( + info, + event_fd, + epoll_manager, + seccomp_filter.clone(), + seccomp_filter, + None, + ) + .unwrap() + } + + #[test] + fn test_create_vmm_instance() { + create_vmm_instance(); + } +} From 95fa0c70c33c33781afaeef0349b1b199975a2e4 Mon Sep 17 00:00:00 2001 From: wllenyj Date: Mon, 16 May 2022 00:29:41 +0800 Subject: [PATCH 05/24] dragonball: add start microvm support We add microvm start related support in thie pull request. Signed-off-by: Liu Jiang Signed-off-by: wllenyj Signed-off-by: jingshan Signed-off-by: Chao Wu --- src/dragonball/src/api/v1/boot_source.rs | 4 - src/dragonball/src/api/v1/vmm_action.rs | 60 ++++++++++-- src/dragonball/src/error.rs | 6 +- src/dragonball/src/event_manager.rs | 12 +-- src/dragonball/src/vm/aarch64.rs | 13 +++ src/dragonball/src/vm/mod.rs | 119 ++++++++++++++++++++--- src/dragonball/src/vm/x86_64.rs | 17 ++++ src/dragonball/src/vmm.rs | 6 +- 8 files changed, 202 insertions(+), 35 deletions(-) diff --git a/src/dragonball/src/api/v1/boot_source.rs b/src/dragonball/src/api/v1/boot_source.rs index e7de03043..0094c8d7c 100644 --- a/src/dragonball/src/api/v1/boot_source.rs +++ b/src/dragonball/src/api/v1/boot_source.rs @@ -35,10 +35,6 @@ pub struct BootSourceConfig { /// Errors associated with actions on `BootSourceConfig`. #[derive(Debug, thiserror::Error)] pub enum BootSourceConfigError { - /// The virutal machine instance ID is invalid. - #[error("the virtual machine instance ID is invalid")] - InvalidVMID, - /// The kernel file cannot be opened. #[error( "the kernel file cannot be opened due to invalid kernel path or invalid permissions: {0}" diff --git a/src/dragonball/src/api/v1/vmm_action.rs b/src/dragonball/src/api/v1/vmm_action.rs index 1d7ac7e63..7f8a8b98d 100644 --- a/src/dragonball/src/api/v1/vmm_action.rs +++ b/src/dragonball/src/api/v1/vmm_action.rs @@ -12,7 +12,7 @@ use std::sync::mpsc::{Receiver, Sender, TryRecvError}; use log::{debug, error, warn}; use vmm_sys_util::eventfd::EventFd; -use crate::error::Result; +use crate::error::{Result, StartMicrovmError, StopMicrovmError}; use crate::event_manager::EventManager; use crate::vm::{KernelConfigInfo, VmConfigInfo}; use crate::vmm::Vmm; @@ -22,19 +22,39 @@ use super::*; /// Wrapper for all errors associated with VMM actions. #[derive(Debug, thiserror::Error)] pub enum VmmActionError { + /// Invalid virtual machine instance ID. + #[error("the virtual machine instance ID is invalid")] + InvalidVMID, + /// The action `ConfigureBootSource` failed either because of bad user input or an internal /// error. #[error("failed to configure boot source for VM: {0}")] BootSource(#[source] BootSourceConfigError), + + /// The action `StartMicroVm` failed either because of bad user input or an internal error. + #[error("failed to boot the VM: {0}")] + StartMicroVm(#[source] StartMicroVmError), + + /// The action `StopMicroVm` failed either because of bad user input or an internal error. + #[error("failed to shutdown the VM: {0}")] + StopMicrovm(#[source] StopMicrovmError), } /// This enum represents the public interface of the VMM. Each action contains various /// bits of information (ids, paths, etc.). #[derive(Clone, Debug, PartialEq)] pub enum VmmAction { - /// Configure the boot source of the microVM using as input the `ConfigureBootSource`. This - /// action can only be called before the microVM has booted. + /// Configure the boot source of the microVM using `BootSourceConfig`. + /// This action can only be called before the microVM has booted. ConfigureBootSource(BootSourceConfig), + + /// Launch the microVM. This action can only be called before the microVM has booted. + StartMicroVm, + + /// Shutdown the vmicroVM. This action can only be called after the microVM has booted. + /// When vmm is used as the crate by the other process, which is need to + /// shutdown the vcpu threads and destory all of the object. + ShutdownMicroVm, } /// The enum represents the response sent by the VMM in case of success. The response is either @@ -75,7 +95,7 @@ impl VmmService { } /// Handle requests from the HTTP API Server and send back replies. - pub fn run_vmm_action(&mut self, vmm: &mut Vmm, _event_mgr: &mut EventManager) -> Result<()> { + pub fn run_vmm_action(&mut self, vmm: &mut Vmm, event_mgr: &mut EventManager) -> Result<()> { let request = match self.from_api.try_recv() { Ok(t) => *t, Err(TryRecvError::Empty) => { @@ -92,6 +112,8 @@ impl VmmService { VmmAction::ConfigureBootSource(boot_source_body) => { self.configure_boot_source(vmm, boot_source_body) } + VmmAction::StartMicroVm => self.start_microvm(vmm, event_mgr), + VmmAction::ShutdownMicroVm => self.shutdown_microvm(vmm), }; debug!("send vmm response: {:?}", response); @@ -113,12 +135,14 @@ impl VmmService { boot_source_config: BootSourceConfig, ) -> VmmRequestResult { use super::BootSourceConfigError::{ - InvalidInitrdPath, InvalidKernelCommandLine, InvalidKernelPath, InvalidVMID, + InvalidInitrdPath, InvalidKernelCommandLine, InvalidKernelPath, UpdateNotAllowedPostBoot, }; use super::VmmActionError::BootSource; - let vm = vmm.get_vm_by_id_mut("").ok_or(BootSource(InvalidVMID))?; + let vm = vmm + .get_vm_by_id_mut("") + .ok_or(VmmActionError::InvalidVMID)?; if vm.is_vm_initialized() { return Err(BootSource(UpdateNotAllowedPostBoot)); } @@ -145,4 +169,28 @@ impl VmmService { Ok(VmmData::Empty) } + + fn start_microvm(&mut self, vmm: &mut Vmm, event_mgr: &mut EventManager) -> VmmRequestResult { + use self::StartMicrovmError::MicroVMAlreadyRunning; + use self::VmmActionError::StartMicrovm; + + let vmm_seccomp_filter = vmm.vmm_seccomp_filter(); + let vcpu_seccomp_filter = vmm.vcpu_seccomp_filter(); + let vm = vmm + .get_vm_by_id_mut("") + .ok_or(VmmActionError::InvalidVMID)?; + if vm.is_vm_initialized() { + return Err(StartMicrovm(MicroVMAlreadyRunning)); + } + + vm.start_microvm(event_mgr, vmm_seccomp_filter, vcpu_seccomp_filter) + .map(|_| VmmData::Empty) + .map_err(StartMicrovm) + } + + fn shutdown_microvm(&mut self, vmm: &mut Vmm) -> VmmRequestResult { + vmm.event_ctx.exit_evt_triggered = true; + + Ok(VmmData::Empty) + } } diff --git a/src/dragonball/src/error.rs b/src/dragonball/src/error.rs index 9cb27fdd7..f6b37c867 100644 --- a/src/dragonball/src/error.rs +++ b/src/dragonball/src/error.rs @@ -72,11 +72,15 @@ pub enum Error { /// Errors associated with starting the instance. #[derive(Debug, thiserror::Error)] -pub enum StartMicrovmError { +pub enum StartMicroVmError { /// Cannot read from an Event file descriptor. #[error("failure while reading from EventFd file descriptor")] EventFd, + /// Cannot add event to Epoll. + #[error("failure while registering epoll event for file descriptor")] + RegisterEvent, + /// The start command was issued more than once. #[error("the virtual machine is already running")] MicroVMAlreadyRunning, diff --git a/src/dragonball/src/event_manager.rs b/src/dragonball/src/event_manager.rs index cd0da10c8..9a2ad9d1c 100644 --- a/src/dragonball/src/event_manager.rs +++ b/src/dragonball/src/event_manager.rs @@ -27,8 +27,8 @@ pub(crate) const EPOLL_EVENT_API_REQUEST: u32 = 1; /// Shared information between vmm::vmm_thread_event_loop() and VmmEpollHandler. pub(crate) struct EventContext { pub api_event_fd: EventFd, - pub api_event_flag: bool, - pub exit_evt_flag: bool, + pub api_event_triggered: bool, + pub exit_evt_triggered: bool, } impl EventContext { @@ -36,8 +36,8 @@ impl EventContext { pub fn new(api_event_fd: EventFd) -> Result { Ok(EventContext { api_event_fd, - api_event_flag: false, - exit_evt_flag: false, + api_event_triggered: false, + exit_evt_triggered: false, }) } } @@ -131,7 +131,7 @@ impl MutEventSubscriber for VmmEpollHandler { if let Err(e) = vmm.event_ctx.api_event_fd.read() { error!("event_manager: failed to read API eventfd, {:?}", e); } - vmm.event_ctx.api_event_flag = true; + vmm.event_ctx.api_event_triggered = true; self.vmm_event_count.fetch_add(1, Ordering::AcqRel); } EPOLL_EVENT_EXIT => { @@ -144,7 +144,7 @@ impl MutEventSubscriber for VmmEpollHandler { } None => warn!("event_manager: leftover exit event in epoll context!"), } - vmm.event_ctx.exit_evt_flag = true; + vmm.event_ctx.exit_evt_triggered = true; self.vmm_event_count.fetch_add(1, Ordering::AcqRel); } _ => error!("event_manager: unknown epoll slot number {}", events.data()), diff --git a/src/dragonball/src/vm/aarch64.rs b/src/dragonball/src/vm/aarch64.rs index 739b1515c..25450f380 100644 --- a/src/dragonball/src/vm/aarch64.rs +++ b/src/dragonball/src/vm/aarch64.rs @@ -22,6 +22,7 @@ use vmm_sys_util::eventfd::EventFd; use super::{Vm, VmError}; use crate::address_space_manager::{GuestAddressSpaceImpl, GuestMemoryImpl}; use crate::error::{Error, StartMicrovmError}; +use crate::event_manager::EventManager; /// Configures the system and should be called once per vm before starting vcpu threads. /// For aarch64, we only setup the FDT. @@ -142,4 +143,16 @@ impl Vm { ) .map_err(StartMicrovmError::ConfigureSystem) } + + pub(crate) fn register_events( + &mut self, + event_mgr: &mut EventManager, + ) -> std::result::Result<(), StartMicrovmError> { + let reset_evt = self.get_reset_eventfd().ok_or(StartMicrovmError::EventFd)?; + event_mgr + .register_exit_eventfd(reset_evt) + .map_err(|_| StartMicrovmError::RegisterEvent)?; + + Ok(()) + } } diff --git a/src/dragonball/src/vm/mod.rs b/src/dragonball/src/vm/mod.rs index fff044e3a..1089c99bf 100644 --- a/src/dragonball/src/vm/mod.rs +++ b/src/dragonball/src/vm/mod.rs @@ -31,6 +31,7 @@ use crate::api::v1::{InstanceInfo, InstanceState}; use crate::device_manager::console_manager::DmesgWriter; use crate::device_manager::{DeviceManager, DeviceMgrError, DeviceOpContext}; use crate::error::{LoadInitrdError, Result, StartMicrovmError, StopMicrovmError}; +use crate::event_manager::EventManager; use crate::kvm_context::KvmContext; use crate::resource_manager::ResourceManager; use crate::vcpu::{VcpuManager, VcpuManagerError}; @@ -355,20 +356,7 @@ impl Vm { if !self.is_vm_initialized() { Ok(DeviceOpContext::create_boot_ctx(self, epoll_mgr)) } else { - #[cfg(feature = "hotplug")] - { - if self.upcall_client().is_none() { - Err(StartMicrovmError::UpcallMissVsock) - } else if self.is_upcall_client_ready() { - Ok(DeviceOpContext::create_hotplug_ctx(self, epoll_mgr)) - } else { - Err(StartMicrovmError::UpcallNotReady) - } - } - #[cfg(not(feature = "hotplug"))] - { - Err(StartMicrovmError::MicroVMAlreadyRunning) - } + self.create_device_hotplug_context(epoll_mgr) } } @@ -671,12 +659,70 @@ impl Vm { ) .map_err(StartMicrovmError::KernelLoader); } + + /// Set up the initial microVM state and start the vCPU threads. + /// + /// This is the main entrance of the Vm object, to bring up the virtual machine instance into + /// running state. + pub fn start_microvm( + &mut self, + event_mgr: &mut EventManager, + vmm_seccomp_filter: BpfProgram, + vcpu_seccomp_filter: BpfProgram, + ) -> std::result::Result<(), StartMicrovmError> { + info!(self.logger, "VM: received instance start command"); + if self.is_vm_initialized() { + return Err(StartMicrovmError::MicroVMAlreadyRunning); + } + + let request_ts = TimestampUs::default(); + self.start_instance_request_ts = request_ts.time_us; + self.start_instance_request_cpu_ts = request_ts.cputime_us; + + self.init_dmesg_logger(); + self.check_health()?; + + // Use expect() to crash if the other thread poisoned this lock. + self.shared_info + .write() + .expect("Failed to start microVM because shared info couldn't be written due to poisoned lock") + .state = InstanceState::Starting; + + self.init_guest_memory()?; + let vm_as = self.vm_as().cloned().ok_or(StartMicrovmError::AddressManagerError( + AddressManagerError::GuestMemoryNotInitialized, + ))?; + + self.init_vcpu_manager(vm_as.clone(), vcpu_seccomp_filter) + .map_err(StartMicrovmError::Vcpu)?; + self.init_microvm(event_mgr.epoll_manager(), vm_as.clone(), request_ts)?; + self.init_configure_system(&vm_as)?; + self.init_upcall()?; + + info!(self.logger, "VM: register events"); + self.register_events(event_mgr)?; + + info!(self.logger, "VM: start vcpus"); + self.vcpu_manager() + .map_err(StartMicrovmError::Vcpu)? + .start_boot_vcpus(vmm_seccomp_filter) + .map_err(StartMicrovmError::Vcpu)?; + + // Use expect() to crash if the other thread poisoned this lock. + self.shared_info + .write() + .expect("Failed to start microVM because shared info couldn't be written due to poisoned lock") + .state = InstanceState::Running; + + info!(self.logger, "VM started"); + Ok(()) + } } #[cfg(feature = "hotplug")] impl Vm { /// initialize upcall client for guest os - pub(crate) fn init_upcall(&mut self) -> std::result::Result<(), StartMicrovmError> { + fn new_upcall(&mut self) -> std::result::Result<(), StartMicrovmError> { // get vsock inner connector for upcall let inner_connector = self .device_manager @@ -698,8 +744,51 @@ impl Vm { Ok(()) } + fn init_upcall(&mut self) -> std::result::Result<(), StartMicrovmError> { + info!(self.logger, "VM upcall init"); + if let Err(e) = self.new_upcall() { + info!( + self.logger, + "VM upcall init failed, no support hotplug: {}", e + ); + Err(e) + } else { + self.vcpu_manager() + .map_err(StartMicrovmError::Vcpu)? + .set_upcall_channel(self.upcall_client().clone()); + Ok(()) + } + } + /// Get upcall client. pub fn upcall_client(&self) -> &Option>> { &self.upcall_client } + + fn create_device_hotplug_context( + &self, + epoll_mgr: Option, + ) -> std::result::Result { + if self.upcall_client().is_none() { + Err(StartMicrovmError::UpcallMissVsock) + } else if self.is_upcall_client_ready() { + Ok(DeviceOpContext::create_hotplug_ctx(self, epoll_mgr)) + } else { + Err(StartMicrovmError::UpcallNotReady) + } + } +} + +#[cfg(not(feature = "hotplug"))] +impl Vm { + fn init_upcall(&mut self) -> std::result::Result<(), StartMicrovmError> { + Ok(()) + } + + fn create_device_hotplug_context( + &self, + _epoll_mgr: Option, + ) -> std::result::Result { + Err(StartMicrovmError::MicroVMAlreadyRunning) + } } diff --git a/src/dragonball/src/vm/x86_64.rs b/src/dragonball/src/vm/x86_64.rs index 74336b6da..6bd74acc0 100644 --- a/src/dragonball/src/vm/x86_64.rs +++ b/src/dragonball/src/vm/x86_64.rs @@ -21,6 +21,7 @@ use vm_memory::{Address, Bytes, GuestAddress, GuestAddressSpace, GuestMemory}; use crate::address_space_manager::{GuestAddressSpaceImpl, GuestMemoryImpl}; use crate::error::{Error, Result, StartMicrovmError}; +use crate::event_manager::EventManager; use crate::vm::{Vm, VmError}; /// Configures the system and should be called once per vm before starting vcpu @@ -273,4 +274,20 @@ impl Vm { .create_pit2(pit_config) .map_err(|e| StartMicrovmError::ConfigureVm(VmError::VmSetup(e))) } + + pub(crate) fn register_events( + &mut self, + event_mgr: &mut EventManager, + ) -> std::result::Result<(), StartMicrovmError> { + let reset_evt = self + .device_manager + .get_reset_eventfd() + .map_err(StartMicrovmError::DeviceManager)?; + event_mgr + .register_exit_eventfd(&reset_evt) + .map_err(|_| StartMicrovmError::RegisterEvent)?; + self.reset_eventfd = Some(reset_evt); + + Ok(()) + } } diff --git a/src/dragonball/src/vmm.rs b/src/dragonball/src/vmm.rs index a2c75ff71..362415dbd 100644 --- a/src/dragonball/src/vmm.rs +++ b/src/dragonball/src/vmm.rs @@ -124,18 +124,18 @@ impl Vmm { } let mut v = vmm.lock().unwrap(); - if v.event_ctx.api_event_flag { + if v.event_ctx.api_event_triggered { // The run_vmm_action() needs to access event_mgr, so it could // not be handled in EpollHandler::handle_events(). It has been // delayed to the main loop. - v.event_ctx.api_event_flag = false; + v.event_ctx.api_event_triggered = false; service .run_vmm_action(&mut v, &mut event_mgr) .unwrap_or_else(|_| { warn!("got spurious notification from api thread"); }); } - if v.event_ctx.exit_evt_flag { + if v.event_ctx.exit_evt_triggered { info!("Gracefully terminated VMM control loop"); return v.stop(EXIT_CODE_OK as i32); } From 89b9ba86032691f92edfc8b50d6b22067a836228 Mon Sep 17 00:00:00 2001 From: wllenyj Date: Mon, 16 May 2022 01:42:19 +0800 Subject: [PATCH 06/24] dragonball: add set_vm_configuration api Set virtual machine configuration configurations. Signed-off-by: wllenyj --- src/dragonball/src/api/v1/machine_config.rs | 86 +++++++++++ src/dragonball/src/api/v1/mod.rs | 4 + src/dragonball/src/api/v1/vmm_action.rs | 154 +++++++++++++++++++- src/dragonball/src/vm/mod.rs | 9 +- 4 files changed, 248 insertions(+), 5 deletions(-) create mode 100644 src/dragonball/src/api/v1/machine_config.rs diff --git a/src/dragonball/src/api/v1/machine_config.rs b/src/dragonball/src/api/v1/machine_config.rs new file mode 100644 index 000000000..e4ae22867 --- /dev/null +++ b/src/dragonball/src/api/v1/machine_config.rs @@ -0,0 +1,86 @@ +// Copyright (C) 2022 Alibaba Cloud. All rights reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +/// We only support this number of vcpus for now. Mostly because we have set all vcpu related metrics as u8 +/// and breaking u8 will take extra efforts. +pub const MAX_SUPPORTED_VCPUS: u8 = 254; + +/// Memory hotplug value should have alignment in this size (unit: MiB) +pub const MEMORY_HOTPLUG_ALIGHMENT: u8 = 64; + +/// Errors associated with configuring the microVM. +#[derive(Debug, PartialEq, thiserror::Error)] +pub enum VmConfigError { + /// Cannot update the configuration of the microvm post boot. + #[error("update operation is not allowed after boot")] + UpdateNotAllowedPostBoot, + + /// The max vcpu count is invalid. + #[error("the vCPU number shouldn't large than {}", MAX_SUPPORTED_VCPUS)] + VcpuCountExceedsMaximum, + + /// The vcpu count is invalid. When hyperthreading is enabled, the `cpu_count` must be either + /// 1 or an even number. + #[error( + "the vCPU number '{0}' can only be 1 or an even number when hyperthreading is enabled" + )] + InvalidVcpuCount(u8), + + /// The threads_per_core is invalid. It should be either 1 or 2. + #[error("the threads_per_core number '{0}' can only be 1 or 2")] + InvalidThreadsPerCore(u8), + + /// The cores_per_die is invalid. It should be larger than 0. + #[error("the cores_per_die number '{0}' can only be larger than 0")] + InvalidCoresPerDie(u8), + + /// The dies_per_socket is invalid. It should be larger than 0. + #[error("the dies_per_socket number '{0}' can only be larger than 0")] + InvalidDiesPerSocket(u8), + + /// The socket number is invalid. It should be either 1 or 2. + #[error("the socket number '{0}' can only be 1 or 2")] + InvalidSocket(u8), + + /// max vcpu count inferred from cpu topology(threads_per_core * cores_per_die * dies_per_socket * sockets) should be larger or equal to vcpu_count + #[error("the max vcpu count inferred from cpu topology '{0}' (threads_per_core * cores_per_die * dies_per_socket * sockets) should be larger or equal to vcpu_count")] + InvalidCpuTopology(u8), + + /// The max vcpu count is invalid. + #[error( + "the max vCPU number '{0}' shouldn't less than vCPU count and can only be 1 or an even number when hyperthreading is enabled" + )] + InvalidMaxVcpuCount(u8), + + /// The memory size is invalid. The memory can only be an unsigned integer. + #[error("the memory size 0x{0:x}MiB is invalid")] + InvalidMemorySize(usize), + + /// The hotplug memory size is invalid. The memory can only be an unsigned integer. + #[error( + "the hotplug memory size '{0}' (MiB) is invalid, must be multiple of {}", + MEMORY_HOTPLUG_ALIGHMENT + )] + InvalidHotplugMemorySize(usize), + + /// The memory type is invalid. + #[error("the memory type '{0}' is invalid")] + InvalidMemType(String), + + /// The memory file path is invalid. + #[error("the memory file path is invalid")] + InvalidMemFilePath(String), + + /// NUMA region memory size is invalid + #[error("Total size of memory in NUMA regions: {0}, should matches memory size in config")] + InvalidNumaRegionMemorySize(usize), + + /// NUMA region vCPU count is invalid + #[error("Total counts of vCPUs in NUMA regions: {0}, should matches max vcpu count in config")] + InvalidNumaRegionCpuCount(u16), + + /// NUMA region vCPU count is invalid + #[error("Max id of vCPUs in NUMA regions: {0}, should matches max vcpu count in config")] + InvalidNumaRegionCpuMaxId(u16), +} diff --git a/src/dragonball/src/api/v1/mod.rs b/src/dragonball/src/api/v1/mod.rs index c1ab3f5d3..2077c982e 100644 --- a/src/dragonball/src/api/v1/mod.rs +++ b/src/dragonball/src/api/v1/mod.rs @@ -15,3 +15,7 @@ pub use self::boot_source::{BootSourceConfig, BootSourceConfigError, DEFAULT_KER /// Wrapper over the microVM general information. mod instance_info; pub use self::instance_info::{InstanceInfo, InstanceState}; + +/// Wrapper for configuring the memory and CPU of the microVM. +mod machine_config; +pub use self::machine_config::{VmConfigError, MAX_SUPPORTED_VCPUS}; diff --git a/src/dragonball/src/api/v1/vmm_action.rs b/src/dragonball/src/api/v1/vmm_action.rs index 7f8a8b98d..754d877b4 100644 --- a/src/dragonball/src/api/v1/vmm_action.rs +++ b/src/dragonball/src/api/v1/vmm_action.rs @@ -9,12 +9,12 @@ use std::fs::File; use std::sync::mpsc::{Receiver, Sender, TryRecvError}; -use log::{debug, error, warn}; +use log::{debug, error, info, warn}; use vmm_sys_util::eventfd::EventFd; use crate::error::{Result, StartMicrovmError, StopMicrovmError}; use crate::event_manager::EventManager; -use crate::vm::{KernelConfigInfo, VmConfigInfo}; +use crate::vm::{CpuTopology, KernelConfigInfo, VmConfigInfo}; use crate::vmm::Vmm; use super::*; @@ -38,6 +38,11 @@ pub enum VmmActionError { /// The action `StopMicroVm` failed either because of bad user input or an internal error. #[error("failed to shutdown the VM: {0}")] StopMicrovm(#[source] StopMicrovmError), + + /// One of the actions `GetVmConfiguration` or `SetVmConfiguration` failed either because of bad + /// input or an internal error. + #[error("failed to set configuration for the VM: {0}")] + MachineConfig(#[source] VmConfigError), } /// This enum represents the public interface of the VMM. Each action contains various @@ -55,6 +60,13 @@ pub enum VmmAction { /// When vmm is used as the crate by the other process, which is need to /// shutdown the vcpu threads and destory all of the object. ShutdownMicroVm, + + /// Get the configuration of the microVM. + GetVmConfiguration, + + /// Set the microVM configuration (memory & vcpu) using `VmConfig` as input. This + /// action can only be called before the microVM has booted. + SetVmConfiguration(VmConfigInfo), } /// The enum represents the response sent by the VMM in case of success. The response is either @@ -63,6 +75,8 @@ pub enum VmmAction { pub enum VmmData { /// No data is sent on the channel. Empty, + /// The microVM configuration represented by `VmConfigInfo`. + MachineConfiguration(Box), } /// Request data type used to communicate between the API and the VMM. @@ -114,6 +128,12 @@ impl VmmService { } VmmAction::StartMicroVm => self.start_microvm(vmm, event_mgr), VmmAction::ShutdownMicroVm => self.shutdown_microvm(vmm), + VmmAction::GetVmConfiguration => Ok(VmmData::MachineConfiguration(Box::new( + self.machine_config.clone(), + ))), + VmmAction::SetVmConfiguration(machine_config) => { + self.set_vm_configuration(vmm, machine_config) + } }; debug!("send vmm response: {:?}", response); @@ -193,4 +213,134 @@ impl VmmService { Ok(VmmData::Empty) } + + /// Set virtual machine configuration configurations. + pub fn set_vm_configuration( + &mut self, + vmm: &mut Vmm, + machine_config: VmConfigInfo, + ) -> VmmRequestResult { + use self::VmConfigError::*; + use self::VmmActionError::MachineConfig; + + let vm = vmm + .get_vm_by_id_mut("") + .ok_or(VmmActionError::InvalidVMID)?; + if vm.is_vm_initialized() { + return Err(MachineConfig(UpdateNotAllowedPostBoot)); + } + + // If the check is successful, set it up together. + let mut config = vm.vm_config().clone(); + if config.vcpu_count != machine_config.vcpu_count { + let vcpu_count = machine_config.vcpu_count; + // Check that the vcpu_count value is >=1. + if vcpu_count == 0 { + return Err(MachineConfig(InvalidVcpuCount(vcpu_count))); + } + config.vcpu_count = vcpu_count; + } + + if config.cpu_topology != machine_config.cpu_topology { + let cpu_topology = &machine_config.cpu_topology; + // Check if dies_per_socket, cores_per_die, threads_per_core and socket number is valid + if cpu_topology.threads_per_core < 1 || cpu_topology.threads_per_core > 2 { + return Err(MachineConfig(InvalidThreadsPerCore( + cpu_topology.threads_per_core, + ))); + } + let vcpu_count_from_topo = cpu_topology + .sockets + .checked_mul(cpu_topology.dies_per_socket) + .ok_or(MachineConfig(VcpuCountExceedsMaximum))? + .checked_mul(cpu_topology.cores_per_die) + .ok_or(MachineConfig(VcpuCountExceedsMaximum))? + .checked_mul(cpu_topology.threads_per_core) + .ok_or(MachineConfig(VcpuCountExceedsMaximum))?; + if vcpu_count_from_topo > MAX_SUPPORTED_VCPUS { + return Err(MachineConfig(VcpuCountExceedsMaximum)); + } + if vcpu_count_from_topo < config.vcpu_count { + return Err(MachineConfig(InvalidCpuTopology(vcpu_count_from_topo))); + } + config.cpu_topology = cpu_topology.clone(); + } else { + // the same default + let mut default_cpu_topology = CpuTopology { + threads_per_core: 1, + cores_per_die: config.vcpu_count, + dies_per_socket: 1, + sockets: 1, + }; + if machine_config.max_vcpu_count > config.vcpu_count { + default_cpu_topology.cores_per_die = machine_config.max_vcpu_count; + } + config.cpu_topology = default_cpu_topology; + } + let cpu_topology = &config.cpu_topology; + let max_vcpu_from_topo = cpu_topology.threads_per_core + * cpu_topology.cores_per_die + * cpu_topology.dies_per_socket + * cpu_topology.sockets; + // If the max_vcpu_count inferred by cpu_topology is not equal to + // max_vcpu_count, max_vcpu_count will be changed. currently, max vcpu size + // is used when cpu_topology is not defined and help define the cores_per_die + // for the default cpu topology. + let mut max_vcpu_count = machine_config.max_vcpu_count; + if max_vcpu_count < config.vcpu_count { + return Err(MachineConfig(InvalidMaxVcpuCount(max_vcpu_count))); + } + if max_vcpu_from_topo != max_vcpu_count { + max_vcpu_count = max_vcpu_from_topo; + info!("Since max_vcpu_count is not equal to cpu topo information, we have changed the max vcpu count to {}", max_vcpu_from_topo); + } + config.max_vcpu_count = max_vcpu_count; + + config.cpu_pm = machine_config.cpu_pm; + config.mem_type = machine_config.mem_type; + + let mem_size_mib_value = machine_config.mem_size_mib; + // Support 1TB memory at most, 2MB aligned for huge page. + if mem_size_mib_value == 0 || mem_size_mib_value > 0x10_0000 || mem_size_mib_value % 2 != 0 + { + return Err(MachineConfig(InvalidMemorySize(mem_size_mib_value))); + } + config.mem_size_mib = mem_size_mib_value; + + config.mem_file_path = machine_config.mem_file_path.clone(); + + let reserve_memory_bytes = machine_config.reserve_memory_bytes; + // Reserved memory must be 2MB aligned and less than half of the total memory. + if reserve_memory_bytes % 0x200000 != 0 + || reserve_memory_bytes > (config.mem_size_mib as u64) << 20 + { + return Err(MachineConfig(InvalidReservedMemorySize( + reserve_memory_bytes as usize >> 20, + ))); + } + config.reserve_memory_bytes = reserve_memory_bytes; + if config.mem_type == "hugetlbfs" && config.mem_file_path.is_empty() { + return Err(MachineConfig(InvalidMemFilePath("".to_owned()))); + } + config.vpmu_feature = machine_config.vpmu_feature; + + let vm_id = vm.shared_info().read().unwrap().id.clone(); + let serial_path = match machine_config.serial_path { + Some(value) => value, + None => { + if config.serial_path.is_none() { + String::from("/run/dragonball/") + &vm_id + "_com1" + } else { + // Safe to unwrap() because we have checked it has a value. + config.serial_path.as_ref().unwrap().clone() + } + } + }; + config.serial_path = Some(serial_path); + + vm.set_vm_config(config.clone()); + self.machine_config = config; + + Ok(VmmData::Empty) + } } diff --git a/src/dragonball/src/vm/mod.rs b/src/dragonball/src/vm/mod.rs index 1089c99bf..7c83aabb4 100644 --- a/src/dragonball/src/vm/mod.rs +++ b/src/dragonball/src/vm/mod.rs @@ -689,9 +689,12 @@ impl Vm { .state = InstanceState::Starting; self.init_guest_memory()?; - let vm_as = self.vm_as().cloned().ok_or(StartMicrovmError::AddressManagerError( - AddressManagerError::GuestMemoryNotInitialized, - ))?; + let vm_as = self + .vm_as() + .cloned() + .ok_or(StartMicrovmError::AddressManagerError( + AddressManagerError::GuestMemoryNotInitialized, + ))?; self.init_vcpu_manager(vm_as.clone(), vcpu_seccomp_filter) .map_err(StartMicrovmError::Vcpu)?; From a1593322bd013e12127fd24a0a6216ee9996d1ef Mon Sep 17 00:00:00 2001 From: wllenyj Date: Mon, 16 May 2022 02:10:58 +0800 Subject: [PATCH 07/24] dragonball: add vsock api to api server Enables vsock to use the api for device configuration. Signed-off-by: wllenyj --- src/dragonball/src/api/v1/vmm_action.rs | 50 +++++++++++++++++++++++++ src/dragonball/src/vm/mod.rs | 5 +++ 2 files changed, 55 insertions(+) diff --git a/src/dragonball/src/api/v1/vmm_action.rs b/src/dragonball/src/api/v1/vmm_action.rs index 754d877b4..d4d598da8 100644 --- a/src/dragonball/src/api/v1/vmm_action.rs +++ b/src/dragonball/src/api/v1/vmm_action.rs @@ -17,6 +17,9 @@ use crate::event_manager::EventManager; use crate::vm::{CpuTopology, KernelConfigInfo, VmConfigInfo}; use crate::vmm::Vmm; +#[cfg(feature = "virtio-vsock")] +use crate::device_manager::vsock_dev_mgr::{VsockDeviceConfigInfo, VsockDeviceError}; + use super::*; /// Wrapper for all errors associated with VMM actions. @@ -43,6 +46,11 @@ pub enum VmmActionError { /// input or an internal error. #[error("failed to set configuration for the VM: {0}")] MachineConfig(#[source] VmConfigError), + + #[cfg(feature = "virtio-vsock")] + /// The action `InsertVsockDevice` failed either because of bad user input or an internal error. + #[error("failed to add virtio-vsock device: {0}")] + Vsock(#[source] VsockDeviceError), } /// This enum represents the public interface of the VMM. Each action contains various @@ -67,6 +75,12 @@ pub enum VmmAction { /// Set the microVM configuration (memory & vcpu) using `VmConfig` as input. This /// action can only be called before the microVM has booted. SetVmConfiguration(VmConfigInfo), + + #[cfg(feature = "virtio-vsock")] + /// Add a new vsock device or update one that already exists using the + /// `VsockDeviceConfig` as input. This action can only be called before the microVM has + /// booted. The response is sent using the `OutcomeSender`. + InsertVsockDevice(VsockDeviceConfigInfo), } /// The enum represents the response sent by the VMM in case of success. The response is either @@ -134,6 +148,8 @@ impl VmmService { VmmAction::SetVmConfiguration(machine_config) => { self.set_vm_configuration(vmm, machine_config) } + #[cfg(feature = "virtio-vsock")] + VmmAction::InsertVsockDevice(vsock_cfg) => self.add_vsock_device(vmm, vsock_cfg), }; debug!("send vmm response: {:?}", response); @@ -343,4 +359,38 @@ impl VmmService { Ok(VmmData::Empty) } + + #[cfg(feature = "virtio-vsock")] + fn add_vsock_device(&self, vmm: &mut Vmm, config: VsockDeviceConfigInfo) -> VmmRequestResult { + let vm = vmm + .get_vm_by_id_mut("") + .ok_or(VmmActionError::InvalidVMID)?; + if vm.is_vm_initialized() { + return Err(VmmActionError::Vsock( + VsockDeviceError::UpdateNotAllowedPostBoot, + )); + } + + // VMADDR_CID_ANY (-1U) means any address for binding; + // VMADDR_CID_HYPERVISOR (0) is reserved for services built into the hypervisor; + // VMADDR_CID_RESERVED (1) must not be used; + // VMADDR_CID_HOST (2) is the well-known address of the host. + if config.guest_cid <= 2 { + return Err(VmmActionError::Vsock(VsockDeviceError::GuestCIDInvalid( + config.guest_cid, + ))); + } + + info!("add_vsock_device: {:?}", config); + let ctx = vm.create_device_op_context(None).map_err(|e| { + info!("create device op context error: {:?}", e); + VmmActionError::Vsock(VsockDeviceError::UpdateNotAllowedPostBoot) + })?; + + vm.device_manager_mut() + .vsock_manager + .insert_device(ctx, config) + .map(|_| VmmData::Empty) + .map_err(VmmActionError::Vsock) + } } diff --git a/src/dragonball/src/vm/mod.rs b/src/dragonball/src/vm/mod.rs index 7c83aabb4..4063b7770 100644 --- a/src/dragonball/src/vm/mod.rs +++ b/src/dragonball/src/vm/mod.rs @@ -250,6 +250,11 @@ impl Vm { &self.device_manager } + /// Gets a mutable reference to the device manager by this VM. + pub fn device_manager_mut(&mut self) -> &mut DeviceManager { + &mut self.device_manager + } + /// Get a reference to EpollManager. pub fn epoll_manager(&self) -> &EpollManager { &self.epoll_manager From 090de2dae2d19a08d32f91bfaba167a715a56682 Mon Sep 17 00:00:00 2001 From: wllenyj Date: Tue, 24 May 2022 00:11:25 +0800 Subject: [PATCH 08/24] dragonball: fix the clippy errors. fix clippy errors and do fmt in this PR. Signed-off-by: wllenyj --- src/dragonball/src/address_space_manager.rs | 20 +++++++++---------- src/dragonball/src/config_manager.rs | 15 +++++++++++++- .../src/device_manager/console_manager.rs | 14 ++++++------- src/dragonball/src/vcpu/vcpu_manager.rs | 14 +++++++------ 4 files changed, 39 insertions(+), 24 deletions(-) diff --git a/src/dragonball/src/address_space_manager.rs b/src/dragonball/src/address_space_manager.rs index 2e4276626..5a54c0da0 100644 --- a/src/dragonball/src/address_space_manager.rs +++ b/src/dragonball/src/address_space_manager.rs @@ -642,7 +642,7 @@ impl AddressSpaceMgr { let node = self .numa_nodes .entry(guest_numa_node_id) - .or_insert(NumaNode::new()); + .or_insert_with(NumaNode::new); node.add_info(&NumaNodeInfo { base: region.start_addr(), size: region.len(), @@ -736,7 +736,7 @@ mod tests { .unwrap(); val = gmem.read_obj(GuestAddress(GUEST_MEM_START + 0x1)).unwrap(); assert_eq!(val, 0xa5); - val = gmem.read_obj(GuestAddress(GUEST_MEM_START + 0x0)).unwrap(); + val = gmem.read_obj(GuestAddress(GUEST_MEM_START)).unwrap(); assert_eq!(val, 1); val = gmem.read_obj(GuestAddress(GUEST_MEM_START + 0x2)).unwrap(); assert_eq!(val, 3); @@ -837,7 +837,7 @@ mod tests { size: mem_size >> 20, host_numa_node_id: None, guest_numa_node_id: Some(0), - vcpu_ids: cpu_vec.clone(), + vcpu_ids: cpu_vec, }]; let mut builder = AddressSpaceMgrBuilder::new("hugeshmem", "").unwrap(); builder.toggle_prealloc(true); @@ -852,9 +852,9 @@ mod tests { assert_eq!(builder.mem_type, "shmem"); assert_eq!(builder.mem_file, "/tmp/shmem"); assert_eq!(builder.mem_index, 0); - assert_eq!(builder.mem_suffix, true); - assert_eq!(builder.mem_prealloc, false); - assert_eq!(builder.dirty_page_logging, false); + assert!(builder.mem_suffix); + assert!(!builder.mem_prealloc); + assert!(!builder.dirty_page_logging); assert!(builder.vmfd.is_none()); assert_eq!(&builder.get_next_mem_file(), "/tmp/shmem0"); @@ -867,10 +867,10 @@ mod tests { assert_eq!(&builder.get_next_mem_file(), "/tmp/shmem"); assert_eq!(builder.mem_index, 3); - builder.toggle_prealloc(true); - builder.toggle_dirty_page_logging(true); - assert_eq!(builder.mem_prealloc, true); - assert_eq!(builder.dirty_page_logging, true); + builder.set_prealloc(true); + builder.set_dirty_page_logging(true); + assert!(builder.mem_prealloc); + assert!(builder.dirty_page_logging); } #[test] diff --git a/src/dragonball/src/config_manager.rs b/src/dragonball/src/config_manager.rs index de16ab0fb..e25b792e7 100644 --- a/src/dragonball/src/config_manager.rs +++ b/src/dragonball/src/config_manager.rs @@ -178,6 +178,15 @@ where info_list: Vec>, } +impl Default for DeviceConfigInfos +where + T: ConfigItem + Clone, +{ + fn default() -> Self { + Self::new() + } +} + impl DeviceConfigInfos where T: ConfigItem + Clone, @@ -221,12 +230,16 @@ where } } - #[allow(dead_code)] /// Get number of device configuration information objects. pub fn len(&self) -> usize { self.info_list.len() } + /// Returns true if the device configuration information objects is empty. + pub fn is_empty(&self) -> bool { + self.info_list.len() == 0 + } + /// Add a device configuration information object at the tail. pub fn push(&mut self, info: DeviceConfigInfo) { self.info_list.push(info); diff --git a/src/dragonball/src/device_manager/console_manager.rs b/src/dragonball/src/device_manager/console_manager.rs index 4ebad5816..1e3b2a2f2 100644 --- a/src/dragonball/src/device_manager/console_manager.rs +++ b/src/dragonball/src/device_manager/console_manager.rs @@ -426,13 +426,13 @@ mod tests { }; writer.flush().unwrap(); - writer.write("".as_bytes()).unwrap(); - writer.write("\n".as_bytes()).unwrap(); - writer.write("\n\n".as_bytes()).unwrap(); - writer.write("\n\n\n".as_bytes()).unwrap(); - writer.write("12\n23\n34\n56".as_bytes()).unwrap(); - writer.write("78".as_bytes()).unwrap(); - writer.write("90\n".as_bytes()).unwrap(); + writer.write_all("".as_bytes()).unwrap(); + writer.write_all("\n".as_bytes()).unwrap(); + writer.write_all("\n\n".as_bytes()).unwrap(); + writer.write_all("\n\n\n".as_bytes()).unwrap(); + writer.write_all("12\n23\n34\n56".as_bytes()).unwrap(); + writer.write_all("78".as_bytes()).unwrap(); + writer.write_all("90\n".as_bytes()).unwrap(); writer.flush().unwrap(); } diff --git a/src/dragonball/src/vcpu/vcpu_manager.rs b/src/dragonball/src/vcpu/vcpu_manager.rs index 189dfc615..278a88ece 100644 --- a/src/dragonball/src/vcpu/vcpu_manager.rs +++ b/src/dragonball/src/vcpu/vcpu_manager.rs @@ -784,14 +784,13 @@ impl VcpuManager { mod hotplug { use std::cmp::Ordering; - use super::*; - #[cfg(not(test))] - use dbs_upcall::CpuDevRequest; - use dbs_upcall::{DevMgrRequest, DevMgrResponse, UpcallClientRequest, UpcallClientResponse}; + use dbs_upcall::{CpuDevRequest, DevMgrRequest}; - #[cfg(all(target_arch = "x86_64", not(test)))] + use super::*; + + #[cfg(all(target_arch = "x86_64"))] use dbs_boot::mptable::APIC_VERSION; - #[cfg(all(target_arch = "aarch64", not(test)))] + #[cfg(all(target_arch = "aarch64"))] const APIC_VERSION: u8 = 0; #[cfg(feature = "dbs-upcall")] @@ -933,6 +932,9 @@ mod hotplug { upcall_client: Arc>, request: DevMgrRequest, ) -> std::result::Result<(), VcpuManagerError> { + // This is used to fix clippy warnings. + use dbs_upcall::{DevMgrResponse, UpcallClientRequest, UpcallClientResponse}; + let vcpu_state_event = self.vcpu_state_event.try_clone().unwrap(); let vcpu_state_sender = self.vcpu_state_sender.clone(); From 87d38ae49fa0dc69d0956e50522d1cc207d0975b Mon Sep 17 00:00:00 2001 From: Chao Wu Date: Fri, 10 Jun 2022 16:39:52 +0800 Subject: [PATCH 09/24] Doc: add document for Dragonball API add detailed explanation for Dragonball API Signed-off-by: Chao Wu --- src/dragonball/docs/api.md | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/src/dragonball/docs/api.md b/src/dragonball/docs/api.md index cd2bc2db8..dab49835a 100644 --- a/src/dragonball/docs/api.md +++ b/src/dragonball/docs/api.md @@ -3,5 +3,25 @@ We provide plenty API for Kata runtime to interact with `Dragonball` virtual machine manager. This document provides the introduction for each of them. -TODO: Details will be added in the Part III PR for `Dragonball` +## `ConfigureBootSource` +Configure the boot source of the VM using `BootSourceConfig`. This action can only be called before the VM has booted. + +### Boot Source Config +1. `kernel_path`: Path of the kernel image. `Dragonball` only supports compressed kernel image for now. +2. `initrd_path`: Path of the initrd (could be None) +3. `boot_args`: Boot arguments passed to the kernel (could be None) + +## `SetVmConfiguration` +Set virtual machine configuration using `VmConfigInfo` to initialize VM. + +### VM Config Info +1. `vcpu_count`: Number of vCPU to start. Currently we only support up to 255 vCPUs. +2. `max_vcpu_count`: Max number of vCPU can be added through CPU hotplug. +3. `cpu_pm`: CPU power management. +4. `cpu_topology`: CPU topology information (including `threads_per_core`, `cores_per_die`, `dies_per_socket` and `sockets`). +5. `vpmu_feature`: `vPMU` feature level. +6. `mem_type`: Memory type that can be either `hugetlbfs` or `shmem`, default is `shmem`. +7. `mem_file_path` : Memory file path. +8. `mem_size_mib`: The memory size in MiB. The maximum memory size is 1TB. +9. `serial_path`: Optional sock path. From 3d20387a25a8910413185a8317d073f087e71a6e Mon Sep 17 00:00:00 2001 From: wllenyj Date: Mon, 16 May 2022 17:17:02 +0800 Subject: [PATCH 10/24] dragonball: add virtio-blk device support Virtio-blk devices are supported. Signed-off-by: wllenyj --- src/dragonball/Cargo.toml | 1 + src/dragonball/src/api/v1/vmm_action.rs | 102 +++ .../src/device_manager/blk_dev_mgr.rs | 795 ++++++++++++++++++ src/dragonball/src/device_manager/mod.rs | 45 +- src/dragonball/src/error.rs | 5 + 5 files changed, 945 insertions(+), 3 deletions(-) create mode 100644 src/dragonball/src/device_manager/blk_dev_mgr.rs diff --git a/src/dragonball/Cargo.toml b/src/dragonball/Cargo.toml index d1b87beda..fcedd492c 100644 --- a/src/dragonball/Cargo.toml +++ b/src/dragonball/Cargo.toml @@ -49,6 +49,7 @@ acpi = [] atomic-guest-memory = [] hotplug = ["virtio-vsock"] virtio-vsock = ["dbs-virtio-devices/virtio-vsock", "virtio-queue"] +virtio-blk = ["dbs-virtio-devices/virtio-blk", "virtio-queue"] [patch.'crates-io'] dbs-device = { git = "https://github.com/openanolis/dragonball-sandbox.git", rev = "7a8e832b53d66994d6a16f0513d69f540583dcd0" } diff --git a/src/dragonball/src/api/v1/vmm_action.rs b/src/dragonball/src/api/v1/vmm_action.rs index d4d598da8..253a6854c 100644 --- a/src/dragonball/src/api/v1/vmm_action.rs +++ b/src/dragonball/src/api/v1/vmm_action.rs @@ -17,6 +17,10 @@ use crate::event_manager::EventManager; use crate::vm::{CpuTopology, KernelConfigInfo, VmConfigInfo}; use crate::vmm::Vmm; +#[cfg(feature = "virtio-blk")] +use crate::device_manager::blk_dev_mgr::{ + BlockDeviceConfigInfo, BlockDeviceConfigUpdateInfo, BlockDeviceError, BlockDeviceMgr, +}; #[cfg(feature = "virtio-vsock")] use crate::device_manager::vsock_dev_mgr::{VsockDeviceConfigInfo, VsockDeviceError}; @@ -29,6 +33,10 @@ pub enum VmmActionError { #[error("the virtual machine instance ID is invalid")] InvalidVMID, + /// Failed to hotplug, due to Upcall not ready. + #[error("Upcall not ready, can't hotplug device.")] + UpcallNotReady, + /// The action `ConfigureBootSource` failed either because of bad user input or an internal /// error. #[error("failed to configure boot source for VM: {0}")] @@ -51,6 +59,11 @@ pub enum VmmActionError { /// The action `InsertVsockDevice` failed either because of bad user input or an internal error. #[error("failed to add virtio-vsock device: {0}")] Vsock(#[source] VsockDeviceError), + + #[cfg(feature = "virtio-blk")] + /// Block device related errors. + #[error("virtio-blk device error: {0}")] + Block(#[source] BlockDeviceError), } /// This enum represents the public interface of the VMM. Each action contains various @@ -81,6 +94,20 @@ pub enum VmmAction { /// `VsockDeviceConfig` as input. This action can only be called before the microVM has /// booted. The response is sent using the `OutcomeSender`. InsertVsockDevice(VsockDeviceConfigInfo), + + #[cfg(feature = "virtio-blk")] + /// Add a new block device or update one that already exists using the `BlockDeviceConfig` as + /// input. This action can only be called before the microVM has booted. + InsertBlockDevice(BlockDeviceConfigInfo), + + #[cfg(feature = "virtio-blk")] + /// Remove a new block device for according to given drive_id + RemoveBlockDevice(String), + + #[cfg(feature = "virtio-blk")] + /// Update a block device, after microVM start. Currently, the only updatable properties + /// are the RX and TX rate limiters. + UpdateBlockDevice(BlockDeviceConfigUpdateInfo), } /// The enum represents the response sent by the VMM in case of success. The response is either @@ -150,6 +177,18 @@ impl VmmService { } #[cfg(feature = "virtio-vsock")] VmmAction::InsertVsockDevice(vsock_cfg) => self.add_vsock_device(vmm, vsock_cfg), + #[cfg(feature = "virtio-blk")] + VmmAction::InsertBlockDevice(block_device_config) => { + self.add_block_device(vmm, event_mgr, block_device_config) + } + #[cfg(feature = "virtio-blk")] + VmmAction::UpdateBlockDevice(blk_update) => { + self.update_blk_rate_limiters(vmm, blk_update) + } + #[cfg(feature = "virtio-blk")] + VmmAction::RemoveBlockDevice(drive_id) => { + self.remove_block_device(vmm, event_mgr, &drive_id) + } }; debug!("send vmm response: {:?}", response); @@ -393,4 +432,67 @@ impl VmmService { .map(|_| VmmData::Empty) .map_err(VmmActionError::Vsock) } + + #[cfg(feature = "virtio-blk")] + // Only call this function as part of the API. + // If the drive_id does not exist, a new Block Device Config is added to the list. + fn add_block_device( + &mut self, + vmm: &mut Vmm, + event_mgr: &mut EventManager, + config: BlockDeviceConfigInfo, + ) -> VmmRequestResult { + let vm = vmm + .get_vm_by_id_mut("") + .ok_or(VmmActionError::InvalidVMID)?; + let ctx = vm + .create_device_op_context(Some(event_mgr.epoll_manager())) + .map_err(|e| { + if let StartMicrovmError::UpcallNotReady = e { + return VmmActionError::UpcallNotReady; + } + VmmActionError::Block(BlockDeviceError::UpdateNotAllowedPostBoot) + })?; + + BlockDeviceMgr::insert_device(vm.device_manager_mut(), ctx, config) + .map(|_| VmmData::Empty) + .map_err(VmmActionError::Block) + } + + #[cfg(feature = "virtio-blk")] + /// Updates configuration for an emulated net device as described in `config`. + fn update_blk_rate_limiters( + &mut self, + vmm: &mut Vmm, + config: BlockDeviceConfigUpdateInfo, + ) -> VmmRequestResult { + let vm = vmm + .get_vm_by_id_mut("") + .ok_or(VmmActionError::InvalidVMID)?; + + BlockDeviceMgr::update_device_ratelimiters(vm.device_manager_mut(), config) + .map(|_| VmmData::Empty) + .map_err(VmmActionError::Block) + } + + #[cfg(feature = "virtio-blk")] + // Only call this function as part of the API. + // If the drive_id does not exist, a new Block Device Config is added to the list. + fn remove_block_device( + &mut self, + vmm: &mut Vmm, + event_mgr: &mut EventManager, + drive_id: &str, + ) -> VmmRequestResult { + let vm = vmm + .get_vm_by_id_mut("") + .ok_or(VmmActionError::InvalidVMID)?; + let ctx = vm + .create_device_op_context(Some(event_mgr.epoll_manager())) + .map_err(|_| VmmActionError::Block(BlockDeviceError::UpdateNotAllowedPostBoot))?; + + BlockDeviceMgr::remove_device(vm.device_manager_mut(), ctx, drive_id) + .map(|_| VmmData::Empty) + .map_err(VmmActionError::Block) + } } diff --git a/src/dragonball/src/device_manager/blk_dev_mgr.rs b/src/dragonball/src/device_manager/blk_dev_mgr.rs new file mode 100644 index 000000000..8a1b2d02f --- /dev/null +++ b/src/dragonball/src/device_manager/blk_dev_mgr.rs @@ -0,0 +1,795 @@ +// Copyright 2020-2022 Alibaba, Inc. or its affiliates. All Rights Reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +//! Device manager for virtio-blk and vhost-user-blk devices. +use std::collections::{vec_deque, VecDeque}; +use std::convert::TryInto; +use std::fs::OpenOptions; +use std::os::unix::fs::OpenOptionsExt; +use std::os::unix::io::AsRawFd; +use std::path::{Path, PathBuf}; +use std::sync::Arc; + +use dbs_virtio_devices as virtio; +use dbs_virtio_devices::block::{aio::Aio, io_uring::IoUring, Block, LocalFile, Ufile}; +use serde_derive::{Deserialize, Serialize}; + +use crate::address_space_manager::GuestAddressSpaceImpl; +use crate::config_manager::{ConfigItem, DeviceConfigInfo, RateLimiterConfigInfo}; +use crate::device_manager::{DeviceManager, DeviceMgrError, DeviceOpContext}; +use crate::vm::KernelConfigInfo; + +use super::DbsMmioV2Device; + +// The flag of whether to use the shared irq. +const USE_SHARED_IRQ: bool = true; +// The flag of whether to use the generic irq. +const USE_GENERIC_IRQ: bool = true; + +macro_rules! info( + ($l:expr, $($args:tt)+) => { + slog::info!($l, $($args)+; slog::o!("subsystem" => "block_manager")) + }; +); + +macro_rules! error( + ($l:expr, $($args:tt)+) => { + slog::error!($l, $($args)+; slog::o!("subsystem" => "block_manager")) + }; +); + +macro_rules! get_bucket_update { + ($self:ident, $rate_limiter: ident, $metric: ident) => {{ + match &$self.$rate_limiter { + Some(rl_cfg) => { + let tb_cfg = &rl_cfg.$metric; + dbs_utils::rate_limiter::RateLimiter::make_bucket( + tb_cfg.size, + tb_cfg.one_time_burst, + tb_cfg.refill_time, + ) + // Updated active rate-limiter. + .map(dbs_utils::rate_limiter::BucketUpdate::Update) + // Updated/deactivated rate-limiter + .unwrap_or(dbs_utils::rate_limiter::BucketUpdate::Disabled) + } + // No update to the rate-limiter. + None => dbs_utils::rate_limiter::BucketUpdate::None, + } + }}; +} + +/// Default queue size for VirtIo block devices. +pub const QUEUE_SIZE: u16 = 128; + +/// Errors associated with the operations allowed on a drive. +#[derive(Debug, thiserror::Error)] +pub enum BlockDeviceError { + /// Invalid VM instance ID. + #[error("invalid VM instance id")] + InvalidVMID, + + /// The block device path is invalid. + #[error("invalid block device path '{0}'")] + InvalidBlockDevicePath(PathBuf), + + /// The block device type is invalid. + #[error("invalid block device type")] + InvalidBlockDeviceType, + + /// The block device path was already used for a different drive. + #[error("block device path '{0}' already exists")] + BlockDevicePathAlreadyExists(PathBuf), + + /// The device id doesn't exist. + #[error("invalid block device id '{0}'")] + InvalidDeviceId(String), + + /// Cannot perform the requested operation after booting the microVM. + #[error("block device does not support runtime update")] + UpdateNotAllowedPostBoot, + + /// A root block device was already added. + #[error("could not add multiple virtual machine root devices")] + RootBlockDeviceAlreadyAdded, + + /// Failed to send patch message to block epoll handler. + #[error("could not send patch message to the block epoll handler")] + BlockEpollHanderSendFail, + + /// Failure from device manager, + #[error("device manager errors: {0}")] + DeviceManager(#[from] DeviceMgrError), + + /// Failure from virtio subsystem. + #[error(transparent)] + Virtio(virtio::Error), + + /// Unable to seek the block device backing file due to invalid permissions or + /// the file was deleted/corrupted. + #[error("cannot create block device: {0}")] + CreateBlockDevice(#[source] virtio::Error), + + /// Cannot open the block device backing file. + #[error("cannot open the block device backing file: {0}")] + OpenBlockDevice(#[source] std::io::Error), + + /// Cannot initialize a MMIO Block Device or add a device to the MMIO Bus. + #[error("failure while registering block device: {0}")] + RegisterBlockDevice(#[source] DeviceMgrError), +} + +/// Type of low level storage device/protocol for virtio-blk devices. +#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize)] +pub enum BlockDeviceType { + /// Unknown low level device type. + Unknown, + /// Vhost-user-blk based low level device. + /// SPOOL is a reliable NVMe virtualization system for the cloud environment. + /// You could learn more SPOOL here: https://www.usenix.org/conference/atc20/presentation/xue + Spool, + /// Local disk/file based low level device. + RawBlock, +} + +impl BlockDeviceType { + /// Get type of low level storage device/protocol by parsing `path`. + pub fn get_type(path: &str) -> BlockDeviceType { + // SPOOL path should be started with "spool", e.g. "spool:/device1" + if path.starts_with("spool:/") { + BlockDeviceType::Spool + } else { + BlockDeviceType::RawBlock + } + } +} + +/// Configuration information for a block device. +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +pub struct BlockDeviceConfigUpdateInfo { + /// Unique identifier of the drive. + pub drive_id: String, + /// Rate Limiter for I/O operations. + pub rate_limiter: Option, +} + +impl BlockDeviceConfigUpdateInfo { + /// Provides a `BucketUpdate` description for the bandwidth rate limiter. + pub fn bytes(&self) -> dbs_utils::rate_limiter::BucketUpdate { + get_bucket_update!(self, rate_limiter, bandwidth) + } + /// Provides a `BucketUpdate` description for the ops rate limiter. + pub fn ops(&self) -> dbs_utils::rate_limiter::BucketUpdate { + get_bucket_update!(self, rate_limiter, ops) + } +} + +/// Configuration information for a block device. +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +pub struct BlockDeviceConfigInfo { + /// Unique identifier of the drive. + pub drive_id: String, + /// Type of low level storage/protocol. + pub device_type: BlockDeviceType, + /// Path of the drive. + pub path_on_host: PathBuf, + /// If set to true, it makes the current device the root block device. + /// Setting this flag to true will mount the block device in the + /// guest under /dev/vda unless the part_uuid is present. + pub is_root_device: bool, + /// Part-UUID. Represents the unique id of the boot partition of this device. + /// It is optional and it will be used only if the `is_root_device` field is true. + pub part_uuid: Option, + /// If set to true, the drive is opened in read-only mode. Otherwise, the + /// drive is opened as read-write. + pub is_read_only: bool, + /// If set to false, the drive is opened with buffered I/O mode. Otherwise, the + /// drive is opened with direct I/O mode. + pub is_direct: bool, + /// Don't close `path_on_host` file when dropping the device. + pub no_drop: bool, + /// Block device multi-queue + pub num_queues: usize, + /// Virtio queue size. + pub queue_size: u16, + /// Rate Limiter for I/O operations. + pub rate_limiter: Option, + /// Use shared irq + pub use_shared_irq: Option, + /// Use generic irq + pub use_generic_irq: Option, +} + +impl std::default::Default for BlockDeviceConfigInfo { + fn default() -> Self { + Self { + drive_id: String::default(), + device_type: BlockDeviceType::RawBlock, + path_on_host: PathBuf::default(), + is_root_device: false, + part_uuid: None, + is_read_only: false, + is_direct: Self::default_direct(), + no_drop: Self::default_no_drop(), + num_queues: Self::default_num_queues(), + queue_size: 256, + rate_limiter: None, + use_shared_irq: None, + use_generic_irq: None, + } + } +} + +impl BlockDeviceConfigInfo { + /// Get default queue numbers + pub fn default_num_queues() -> usize { + 1 + } + + /// Get default value of is_direct switch + pub fn default_direct() -> bool { + true + } + + /// Get default value of no_drop switch + pub fn default_no_drop() -> bool { + false + } + + /// Get type of low level storage/protocol. + pub fn device_type(&self) -> BlockDeviceType { + self.device_type + } + + /// Returns a reference to `path_on_host`. + pub fn path_on_host(&self) -> &PathBuf { + &self.path_on_host + } + + /// Returns a reference to the part_uuid. + pub fn get_part_uuid(&self) -> Option<&String> { + self.part_uuid.as_ref() + } + + /// Checks whether the drive had read only permissions. + pub fn is_read_only(&self) -> bool { + self.is_read_only + } + + /// Checks whether the drive uses direct I/O + pub fn is_direct(&self) -> bool { + self.is_direct + } + + /// Get number and size of queues supported. + pub fn queue_sizes(&self) -> Vec { + (0..self.num_queues) + .map(|_| self.queue_size) + .collect::>() + } +} + +impl ConfigItem for BlockDeviceConfigInfo { + type Err = BlockDeviceError; + + fn id(&self) -> &str { + &self.drive_id + } + + fn check_conflicts(&self, other: &Self) -> Result<(), BlockDeviceError> { + if self.drive_id == other.drive_id { + Ok(()) + } else if self.path_on_host == other.path_on_host { + Err(BlockDeviceError::BlockDevicePathAlreadyExists( + self.path_on_host.clone(), + )) + } else { + Ok(()) + } + } +} + +impl std::fmt::Debug for BlockDeviceInfo { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self.config) + } +} + +/// Block Device Info +pub type BlockDeviceInfo = DeviceConfigInfo; + +/// Wrapper for the collection that holds all the Block Devices Configs +//#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +#[derive(Clone)] +pub struct BlockDeviceMgr { + /// A list of `BlockDeviceInfo` objects. + info_list: VecDeque, + has_root_block: bool, + has_part_uuid_root: bool, + read_only_root: bool, + part_uuid: Option, + use_shared_irq: bool, +} + +impl BlockDeviceMgr { + /// returns a front-to-back iterator. + pub fn iter(&self) -> vec_deque::Iter { + self.info_list.iter() + } + + /// Checks whether any of the added BlockDevice is the root. + pub fn has_root_block_device(&self) -> bool { + self.has_root_block + } + + /// Checks whether the root device is configured using a part UUID. + pub fn has_part_uuid_root(&self) -> bool { + self.has_part_uuid_root + } + + /// Checks whether the root device has read-only permisssions. + pub fn is_read_only_root(&self) -> bool { + self.read_only_root + } + + /// Gets the index of the device with the specified `drive_id` if it exists in the list. + pub fn get_index_of_drive_id(&self, id: &str) -> Option { + self.info_list + .iter() + .position(|info| info.config.id().eq(id)) + } + + /// Gets the 'BlockDeviceConfigInfo' of the device with the specified `drive_id` if it exists in the list. + pub fn get_config_of_drive_id(&self, drive_id: &str) -> Option { + match self.get_index_of_drive_id(drive_id) { + Some(index) => { + let config = self.info_list.get(index).unwrap().config.clone(); + Some(config) + } + None => None, + } + } + + /// Inserts `block_device_config` in the block device configuration list. + /// If an entry with the same id already exists, it will attempt to update + /// the existing entry. + /// Inserting a secondary root block device will fail. + pub fn insert_device( + device_mgr: &mut DeviceManager, + mut ctx: DeviceOpContext, + config: BlockDeviceConfigInfo, + ) -> std::result::Result<(), BlockDeviceError> { + if !cfg!(feature = "hotplug") && ctx.is_hotplug { + return Err(BlockDeviceError::UpdateNotAllowedPostBoot); + } + + let mgr = &mut device_mgr.block_manager; + + // If the id of the drive already exists in the list, the operation is update. + match mgr.get_index_of_drive_id(config.id()) { + Some(index) => { + // No support for runtime update yet. + if ctx.is_hotplug { + Err(BlockDeviceError::BlockDevicePathAlreadyExists( + config.path_on_host.clone(), + )) + } else { + for (idx, info) in mgr.info_list.iter().enumerate() { + if idx != index { + info.config.check_conflicts(&config)?; + } + } + mgr.update(index, config) + } + } + None => { + for info in mgr.info_list.iter() { + info.config.check_conflicts(&config)?; + } + let config2 = config.clone(); + let index = mgr.create(config2)?; + if !ctx.is_hotplug { + return Ok(()); + } + + match config.device_type { + BlockDeviceType::RawBlock => { + let device = Self::create_blk_device(&config, &mut ctx) + .map_err(BlockDeviceError::Virtio)?; + let dev = DeviceManager::create_mmio_virtio_device( + device, + &mut ctx, + config.use_shared_irq.unwrap_or(mgr.use_shared_irq), + config.use_generic_irq.unwrap_or(USE_GENERIC_IRQ), + ) + .map_err(BlockDeviceError::DeviceManager)?; + mgr.update_device_by_index(index, Arc::clone(&dev))?; + // live-upgrade need save/restore device from info.device. + mgr.info_list[index].set_device(dev.clone()); + ctx.insert_hotplug_mmio_device(&dev, None).map_err(|e| { + let logger = ctx.logger().new(slog::o!()); + BlockDeviceMgr::remove_device(device_mgr, ctx, &config.drive_id) + .unwrap(); + error!( + logger, + "failed to hot-add virtio block device {}, {:?}", + &config.drive_id, + e + ); + BlockDeviceError::DeviceManager(e) + }) + } + _ => Err(BlockDeviceError::InvalidBlockDeviceType), + } + } + } + } + + /// Attaches all block devices from the BlockDevicesConfig. + pub fn attach_devices( + &mut self, + ctx: &mut DeviceOpContext, + ) -> std::result::Result<(), BlockDeviceError> { + for info in self.info_list.iter_mut() { + match info.config.device_type { + BlockDeviceType::RawBlock => { + info!( + ctx.logger(), + "attach virtio-blk device, drive_id {}, path {}", + info.config.drive_id, + info.config.path_on_host.to_str().unwrap_or("") + ); + let device = Self::create_blk_device(&info.config, ctx) + .map_err(BlockDeviceError::Virtio)?; + let device = DeviceManager::create_mmio_virtio_device( + device, + ctx, + info.config.use_shared_irq.unwrap_or(self.use_shared_irq), + info.config.use_generic_irq.unwrap_or(USE_GENERIC_IRQ), + ) + .map_err(BlockDeviceError::RegisterBlockDevice)?; + info.device = Some(device); + } + _ => { + return Err(BlockDeviceError::OpenBlockDevice( + std::io::Error::from_raw_os_error(libc::EINVAL), + )); + } + } + } + + Ok(()) + } + + /// Removes all virtio-blk devices + pub fn remove_devices(&mut self, ctx: &mut DeviceOpContext) -> Result<(), DeviceMgrError> { + while let Some(mut info) = self.info_list.pop_back() { + info!(ctx.logger(), "remove drive {}", info.config.drive_id); + if let Some(device) = info.device.take() { + DeviceManager::destroy_mmio_virtio_device(device, ctx)?; + } + } + + Ok(()) + } + + fn remove(&mut self, drive_id: &str) -> Option { + match self.get_index_of_drive_id(drive_id) { + Some(index) => self.info_list.remove(index), + None => None, + } + } + + /// remove a block device, it basically is the inverse operation of `insert_device`` + pub fn remove_device( + dev_mgr: &mut DeviceManager, + mut ctx: DeviceOpContext, + drive_id: &str, + ) -> std::result::Result<(), BlockDeviceError> { + if !cfg!(feature = "hotplug") { + return Err(BlockDeviceError::UpdateNotAllowedPostBoot); + } + + let mgr = &mut dev_mgr.block_manager; + match mgr.remove(drive_id) { + Some(mut info) => { + info!(ctx.logger(), "remove drive {}", info.config.drive_id); + if let Some(device) = info.device.take() { + DeviceManager::destroy_mmio_virtio_device(device, &mut ctx) + .map_err(BlockDeviceError::DeviceManager)?; + } + } + None => return Err(BlockDeviceError::InvalidDeviceId(drive_id.to_owned())), + } + + Ok(()) + } + + fn create_blk_device( + cfg: &BlockDeviceConfigInfo, + ctx: &mut DeviceOpContext, + ) -> std::result::Result>, virtio::Error> { + let epoll_mgr = ctx.epoll_mgr.clone().ok_or(virtio::Error::InvalidInput)?; + + // Safe to unwrap() because we have verified it when parsing device type. + //let path = cfg.path_on_host.to_str().unwrap(); + let mut block_files: Vec> = vec![]; + + match cfg.device_type { + BlockDeviceType::RawBlock => { + let custom_flags = if cfg.is_direct() { + info!( + ctx.logger(), + "Open block device \"{}\" in direct mode.", + cfg.path_on_host().display() + ); + libc::O_DIRECT + } else { + info!( + ctx.logger(), + "Open block device \"{}\" in buffer mode.", + cfg.path_on_host().display(), + ); + 0 + }; + let io_uring_supported = IoUring::is_supported(); + for i in 0..cfg.num_queues { + let queue_size = cfg.queue_sizes()[i] as u32; + let file = OpenOptions::new() + .read(true) + .custom_flags(custom_flags) + .write(!cfg.is_read_only()) + .open(cfg.path_on_host())?; + info!(ctx.logger(), "Queue {}: block file opened", i); + + if io_uring_supported { + info!( + ctx.logger(), + "Queue {}: Using io_uring Raw disk file, queue size {}.", i, queue_size + ); + let io_engine = IoUring::new(file.as_raw_fd(), queue_size)?; + block_files.push(Box::new(LocalFile::new(file, cfg.no_drop, io_engine)?)); + } else { + info!( + ctx.logger(), + "Queue {}: Since io_uring_supported is not enabled, change to default support of Aio Raw disk file, queue size {}", i, queue_size + ); + let io_engine = Aio::new(file.as_raw_fd(), queue_size)?; + block_files.push(Box::new(LocalFile::new(file, cfg.no_drop, io_engine)?)); + } + } + } + _ => { + error!( + ctx.logger(), + "invalid block device type: {:?}", cfg.device_type + ); + return Err(virtio::Error::InvalidInput); + } + }; + + let mut limiters = vec![]; + for _i in 0..cfg.num_queues { + if let Some(limiter) = cfg.rate_limiter.clone().map(|mut v| { + v.resize(cfg.num_queues as u64); + v.try_into().unwrap() + }) { + limiters.push(limiter); + } + } + + Ok(Box::new(Block::new( + block_files, + cfg.is_read_only, + Arc::new(cfg.queue_sizes()), + epoll_mgr, + limiters, + )?)) + } + + /// Generated guest kernel commandline related to root block device. + pub fn generate_kernel_boot_args( + &self, + kernel_config: &mut KernelConfigInfo, + ) -> std::result::Result<(), DeviceMgrError> { + // Respect user configuration if kernel_cmdline contains "root=", + // special attention for the case when kernel command line starting with "root=xxx" + let old_kernel_cmdline = format!(" {}", kernel_config.kernel_cmdline().as_str()); + if !old_kernel_cmdline.contains(" root=") && self.has_root_block { + let cmdline = kernel_config.kernel_cmdline_mut(); + if let Some(ref uuid) = self.part_uuid { + cmdline + .insert("root", &format!("PART_UUID={}", uuid)) + .map_err(DeviceMgrError::Cmdline)?; + } else { + cmdline + .insert("root", "/dev/vda") + .map_err(DeviceMgrError::Cmdline)?; + } + if self.read_only_root { + if old_kernel_cmdline.contains(" rw") { + return Err(DeviceMgrError::InvalidOperation); + } + cmdline.insert_str("ro").map_err(DeviceMgrError::Cmdline)?; + } + } + + Ok(()) + } + + /// insert a block device's config. return index on success. + fn create( + &mut self, + block_device_config: BlockDeviceConfigInfo, + ) -> std::result::Result { + self.check_data_file_present(&block_device_config)?; + if self + .get_index_of_drive_path(&block_device_config.path_on_host) + .is_some() + { + return Err(BlockDeviceError::BlockDevicePathAlreadyExists( + block_device_config.path_on_host, + )); + } + + // check whether the Device Config belongs to a root device + // we need to satisfy the condition by which a VMM can only have on root device + if block_device_config.is_root_device { + if self.has_root_block { + return Err(BlockDeviceError::RootBlockDeviceAlreadyAdded); + } else { + self.has_root_block = true; + self.read_only_root = block_device_config.is_read_only; + self.has_part_uuid_root = block_device_config.part_uuid.is_some(); + self.part_uuid = block_device_config.part_uuid.clone(); + // Root Device should be the first in the list whether or not PART_UUID is specified + // in order to avoid bugs in case of switching from part_uuid boot scenarios to + // /dev/vda boot type. + self.info_list + .push_front(BlockDeviceInfo::new(block_device_config)); + Ok(0) + } + } else { + self.info_list + .push_back(BlockDeviceInfo::new(block_device_config)); + Ok(self.info_list.len() - 1) + } + } + + /// Updates a Block Device Config. The update fails if it would result in two + /// root block devices. + fn update( + &mut self, + mut index: usize, + new_config: BlockDeviceConfigInfo, + ) -> std::result::Result<(), BlockDeviceError> { + // Check if the path exists + self.check_data_file_present(&new_config)?; + if let Some(idx) = self.get_index_of_drive_path(&new_config.path_on_host) { + if idx != index { + return Err(BlockDeviceError::BlockDevicePathAlreadyExists( + new_config.path_on_host.clone(), + )); + } + } + + if self.info_list.get(index).is_none() { + return Err(InvalidDeviceId(index.to_string())); + } + // Check if the root block device is being updated. + if self.info_list[index].config.is_root_device { + self.has_root_block = new_config.is_root_device; + self.read_only_root = new_config.is_root_device && new_config.is_read_only; + self.has_part_uuid_root = new_config.part_uuid.is_some(); + self.part_uuid = new_config.part_uuid.clone(); + } else if new_config.is_root_device { + // Check if a second root block device is being added. + if self.has_root_block { + return Err(BlockDeviceError::RootBlockDeviceAlreadyAdded); + } else { + // One of the non-root blocks is becoming root. + self.has_root_block = true; + self.read_only_root = new_config.is_read_only; + self.has_part_uuid_root = new_config.part_uuid.is_some(); + self.part_uuid = new_config.part_uuid.clone(); + + // Make sure the root device is on the first position. + self.info_list.swap(0, index); + // Block config to be updated has moved to first position. + index = 0; + } + } + // Update the config. + self.info_list[index].config = new_config; + + Ok(()) + } + + fn check_data_file_present( + &self, + block_device_config: &BlockDeviceConfigInfo, + ) -> std::result::Result<(), BlockDeviceError> { + if block_device_config.device_type == BlockDeviceType::RawBlock + && !block_device_config.path_on_host.exists() + { + Err(BlockDeviceError::InvalidBlockDevicePath( + block_device_config.path_on_host.clone(), + )) + } else { + Ok(()) + } + } + + fn get_index_of_drive_path(&self, drive_path: &Path) -> Option { + self.info_list + .iter() + .position(|info| info.config.path_on_host.eq(drive_path)) + } + + /// update devce information in `info_list`. The caller of this method is + /// `insert_device` when hotplug is true. + pub fn update_device_by_index( + &mut self, + index: usize, + device: Arc, + ) -> Result<(), BlockDeviceError> { + if let Some(info) = self.info_list.get_mut(index) { + info.device = Some(device); + return Ok(()); + } + + Err(BlockDeviceError::InvalidDeviceId("".to_owned())) + } + + /// Update the ratelimiter settings of a virtio blk device. + pub fn update_device_ratelimiters( + device_mgr: &mut DeviceManager, + new_cfg: BlockDeviceConfigUpdateInfo, + ) -> std::result::Result<(), BlockDeviceError> { + let mgr = &mut device_mgr.block_manager; + match mgr.get_index_of_drive_id(&new_cfg.drive_id) { + Some(index) => { + let config = &mut mgr.info_list[index].config; + config.rate_limiter = new_cfg.rate_limiter.clone(); + let device = mgr.info_list[index] + .device + .as_mut() + .ok_or_else(|| BlockDeviceError::InvalidDeviceId("".to_owned()))?; + if let Some(mmio_dev) = device.as_any().downcast_ref::() { + let guard = mmio_dev.state(); + let inner_dev = guard.get_inner_device(); + if let Some(blk_dev) = inner_dev + .as_any() + .downcast_ref::>() + { + return blk_dev + .set_patch_rate_limiters(new_cfg.bytes(), new_cfg.ops()) + .map(|_p| ()) + .map_err(|_e| BlockDeviceError::BlockEpollHanderSendFail); + } + } + Ok(()) + } + None => Err(BlockDeviceError::InvalidDeviceId(new_cfg.drive_id)), + } + } +} + +impl Default for BlockDeviceMgr { + /// Constructor for the BlockDeviceConfigs. It initializes an empty LinkedList. + fn default() -> BlockDeviceMgr { + BlockDeviceMgr { + info_list: VecDeque::::new(), + has_root_block: false, + has_part_uuid_root: false, + read_only_root: false, + part_uuid: None, + use_shared_irq: USE_SHARED_IRQ, + } + } +} diff --git a/src/dragonball/src/device_manager/mod.rs b/src/dragonball/src/device_manager/mod.rs index 07698d4b8..11a742bbb 100644 --- a/src/dragonball/src/device_manager/mod.rs +++ b/src/dragonball/src/device_manager/mod.rs @@ -59,6 +59,12 @@ pub mod vsock_dev_mgr; #[cfg(feature = "virtio-vsock")] use self::vsock_dev_mgr::VsockDeviceMgr; +#[cfg(feature = "virtio-blk")] +/// virtio-block device manager +pub mod blk_dev_mgr; +#[cfg(feature = "virtio-blk")] +use self::blk_dev_mgr::BlockDeviceMgr; + macro_rules! info( ($l:expr, $($args:tt)+) => { slog::info!($l, $($args)+; slog::o!("subsystem" => "device_manager")) @@ -71,21 +77,27 @@ pub enum DeviceMgrError { /// Invalid operation. #[error("invalid device manager operation")] InvalidOperation, + /// Failed to get device resource. #[error("failed to get device assigned resources")] GetDeviceResource, + /// Appending to kernel command line failed. #[error("failed to add kernel command line parameter for device: {0}")] Cmdline(#[source] linux_loader::cmdline::Error), + /// Failed to manage console devices. #[error(transparent)] ConsoleManager(console_manager::ConsoleManagerError), + /// Failed to create the device. #[error("failed to create virtual device: {0}")] CreateDevice(#[source] io::Error), + /// Failed to perform an operation on the bus. #[error(transparent)] IoManager(IoManagerError), + /// Failure from legacy device manager. #[error(transparent)] LegacyManager(legacy::Error), @@ -409,6 +421,11 @@ pub struct DeviceManager { pub(crate) mmio_device_info: HashMap<(DeviceType, String), MMIODeviceInfo>, #[cfg(feature = "virtio-vsock")] pub(crate) vsock_manager: VsockDeviceMgr, + + #[cfg(feature = "virtio-blk")] + // If there is a Root Block Device, this should be added as the first element of the list. + // This is necessary because we want the root to always be mounted on /dev/vda. + pub(crate) block_manager: BlockDeviceMgr, } impl DeviceManager { @@ -433,6 +450,8 @@ impl DeviceManager { mmio_device_info: HashMap::new(), #[cfg(feature = "virtio-vsock")] vsock_manager: VsockDeviceMgr::default(), + #[cfg(feature = "virtio-blk")] + block_manager: BlockDeviceMgr::default(), } } @@ -553,9 +572,18 @@ impl DeviceManager { self.create_legacy_devices(&mut ctx)?; self.init_legacy_devices(dmesg_fifo, com1_sock_path, &mut ctx)?; + #[cfg(feature = "virtio-blk")] + self.block_manager + .attach_devices(&mut ctx) + .map_err(StartMicrovmError::BlockDeviceError)?; + #[cfg(feature = "virtio-vsock")] self.vsock_manager.attach_devices(&mut ctx)?; + #[cfg(feature = "virtio-blk")] + self.block_manager + .generate_kernel_boot_args(kernel_config) + .map_err(StartMicrovmError::DeviceManager)?; ctx.generate_kernel_boot_args(kernel_config) .map_err(StartMicrovmError::DeviceManager)?; @@ -570,10 +598,21 @@ impl DeviceManager { /// Remove all devices when shutdown the associated virtual machine pub fn remove_devices( &mut self, - _vm_as: GuestAddressSpaceImpl, - _epoll_mgr: EpollManager, - _address_space: Option<&AddressSpace>, + vm_as: GuestAddressSpaceImpl, + epoll_mgr: EpollManager, + address_space: Option<&AddressSpace>, ) -> Result<()> { + // create context for removing devices + let mut ctx = DeviceOpContext::new( + Some(epoll_mgr), + self, + Some(vm_as), + address_space.cloned(), + true, + ); + + #[cfg(feature = "virtio-blk")] + self.block_manager.remove_devices(&mut ctx)?; Ok(()) } } diff --git a/src/dragonball/src/error.rs b/src/dragonball/src/error.rs index f6b37c867..9c9c46564 100644 --- a/src/dragonball/src/error.rs +++ b/src/dragonball/src/error.rs @@ -158,6 +158,11 @@ pub enum StartMicroVmError { /// Upcall connect Error. #[error("failure while connecting the upcall client: {0}")] UpcallConnectError(#[source] dbs_upcall::UpcallClientError), + + #[cfg(feature = "virtio-blk")] + /// Virtio-blk errors. + #[error("virtio-blk errors: {0}")] + BlockDeviceError(#[source] device_manager::blk_dev_mgr::BlockDeviceError), } /// Errors associated with starting the instance. From 948381bdbee2bd2589f40702d688d353ba325729 Mon Sep 17 00:00:00 2001 From: wllenyj Date: Mon, 16 May 2022 18:08:24 +0800 Subject: [PATCH 11/24] dragonball: add virtio-net device support Virtio-net devices are supported. Signed-off-by: wllenyj --- src/dragonball/Cargo.toml | 1 + src/dragonball/src/api/v1/vmm_action.rs | 71 ++++ src/dragonball/src/config_manager.rs | 22 + .../src/device_manager/blk_dev_mgr.rs | 25 +- src/dragonball/src/device_manager/mod.rs | 16 + .../src/device_manager/virtio_net_dev_mgr.rs | 386 ++++++++++++++++++ src/dragonball/src/error.rs | 5 + 7 files changed, 504 insertions(+), 22 deletions(-) create mode 100644 src/dragonball/src/device_manager/virtio_net_dev_mgr.rs diff --git a/src/dragonball/Cargo.toml b/src/dragonball/Cargo.toml index fcedd492c..d44e115ca 100644 --- a/src/dragonball/Cargo.toml +++ b/src/dragonball/Cargo.toml @@ -50,6 +50,7 @@ atomic-guest-memory = [] hotplug = ["virtio-vsock"] virtio-vsock = ["dbs-virtio-devices/virtio-vsock", "virtio-queue"] virtio-blk = ["dbs-virtio-devices/virtio-blk", "virtio-queue"] +virtio-net = ["dbs-virtio-devices/virtio-net", "virtio-queue"] [patch.'crates-io'] dbs-device = { git = "https://github.com/openanolis/dragonball-sandbox.git", rev = "7a8e832b53d66994d6a16f0513d69f540583dcd0" } diff --git a/src/dragonball/src/api/v1/vmm_action.rs b/src/dragonball/src/api/v1/vmm_action.rs index 253a6854c..0a021341a 100644 --- a/src/dragonball/src/api/v1/vmm_action.rs +++ b/src/dragonball/src/api/v1/vmm_action.rs @@ -21,6 +21,11 @@ use crate::vmm::Vmm; use crate::device_manager::blk_dev_mgr::{ BlockDeviceConfigInfo, BlockDeviceConfigUpdateInfo, BlockDeviceError, BlockDeviceMgr, }; +#[cfg(feature = "virtio-net")] +use crate::device_manager::virtio_net_dev_mgr::{ + VirtioNetDeviceConfigInfo, VirtioNetDeviceConfigUpdateInfo, VirtioNetDeviceError, + VirtioNetDeviceMgr, +}; #[cfg(feature = "virtio-vsock")] use crate::device_manager::vsock_dev_mgr::{VsockDeviceConfigInfo, VsockDeviceError}; @@ -64,6 +69,11 @@ pub enum VmmActionError { /// Block device related errors. #[error("virtio-blk device error: {0}")] Block(#[source] BlockDeviceError), + + #[cfg(feature = "virtio-net")] + /// Net device related errors. + #[error("virtio-net device error: {0}")] + VirtioNet(#[source] VirtioNetDeviceError), } /// This enum represents the public interface of the VMM. Each action contains various @@ -108,6 +118,17 @@ pub enum VmmAction { /// Update a block device, after microVM start. Currently, the only updatable properties /// are the RX and TX rate limiters. UpdateBlockDevice(BlockDeviceConfigUpdateInfo), + + #[cfg(feature = "virtio-net")] + /// Add a new network interface config or update one that already exists using the + /// `NetworkInterfaceConfig` as input. This action can only be called before the microVM has + /// booted. The response is sent using the `OutcomeSender`. + InsertNetworkDevice(VirtioNetDeviceConfigInfo), + + #[cfg(feature = "virtio-net")] + /// Update a network interface, after microVM start. Currently, the only updatable properties + /// are the RX and TX rate limiters. + UpdateNetworkInterface(VirtioNetDeviceConfigUpdateInfo), } /// The enum represents the response sent by the VMM in case of success. The response is either @@ -189,6 +210,14 @@ impl VmmService { VmmAction::RemoveBlockDevice(drive_id) => { self.remove_block_device(vmm, event_mgr, &drive_id) } + #[cfg(feature = "virtio-net")] + VmmAction::InsertNetworkDevice(virtio_net_cfg) => { + self.add_virtio_net_device(vmm, event_mgr, virtio_net_cfg) + } + #[cfg(feature = "virtio-net")] + VmmAction::UpdateNetworkInterface(netif_update) => { + self.update_net_rate_limiters(vmm, netif_update) + } }; debug!("send vmm response: {:?}", response); @@ -495,4 +524,46 @@ impl VmmService { .map(|_| VmmData::Empty) .map_err(VmmActionError::Block) } + + #[cfg(feature = "virtio-net")] + fn add_virtio_net_device( + &mut self, + vmm: &mut Vmm, + event_mgr: &mut EventManager, + config: VirtioNetDeviceConfigInfo, + ) -> VmmRequestResult { + let vm = vmm + .get_vm_by_id_mut("") + .ok_or(VmmActionError::InvalidVMID)?; + let ctx = vm + .create_device_op_context(Some(event_mgr.epoll_manager())) + .map_err(|e| { + if let StartMicrovmError::MicroVMAlreadyRunning = e { + VmmActionError::VirtioNet(VirtioNetDeviceError::UpdateNotAllowedPostBoot) + } else if let StartMicrovmError::UpcallNotReady = e { + VmmActionError::UpcallNotReady + } else { + VmmActionError::StartMicrovm(e) + } + })?; + + VirtioNetDeviceMgr::insert_device(vm.device_manager_mut(), ctx, config) + .map(|_| VmmData::Empty) + .map_err(VmmActionError::VirtioNet) + } + + #[cfg(feature = "virtio-net")] + fn update_net_rate_limiters( + &mut self, + vmm: &mut Vmm, + config: VirtioNetDeviceConfigUpdateInfo, + ) -> VmmRequestResult { + let vm = vmm + .get_vm_by_id_mut("") + .ok_or(VmmActionError::InvalidVMID)?; + + VirtioNetDeviceMgr::update_device_ratelimiters(vm.device_manager_mut(), config) + .map(|_| VmmData::Empty) + .map_err(VmmActionError::VirtioNet) + } } diff --git a/src/dragonball/src/config_manager.rs b/src/dragonball/src/config_manager.rs index e25b792e7..cbfb79041 100644 --- a/src/dragonball/src/config_manager.rs +++ b/src/dragonball/src/config_manager.rs @@ -10,6 +10,28 @@ use dbs_device::DeviceIo; use dbs_utils::rate_limiter::{RateLimiter, TokenBucket}; use serde_derive::{Deserialize, Serialize}; +macro_rules! get_bucket_update { + ($self:ident, $rate_limiter: ident, $metric: ident) => {{ + match &$self.$rate_limiter { + Some(rl_cfg) => { + let tb_cfg = &rl_cfg.$metric; + dbs_utils::rate_limiter::RateLimiter::make_bucket( + tb_cfg.size, + tb_cfg.one_time_burst, + tb_cfg.refill_time, + ) + // Updated active rate-limiter. + .map(dbs_utils::rate_limiter::BucketUpdate::Update) + // Updated/deactivated rate-limiter + .unwrap_or(dbs_utils::rate_limiter::BucketUpdate::Disabled) + } + // No update to the rate-limiter. + None => dbs_utils::rate_limiter::BucketUpdate::None, + } + }}; +} +pub(crate) use get_bucket_update; + /// Trait for generic configuration information. pub trait ConfigItem { /// Related errors. diff --git a/src/dragonball/src/device_manager/blk_dev_mgr.rs b/src/dragonball/src/device_manager/blk_dev_mgr.rs index 8a1b2d02f..38e46cd59 100644 --- a/src/dragonball/src/device_manager/blk_dev_mgr.rs +++ b/src/dragonball/src/device_manager/blk_dev_mgr.rs @@ -20,7 +20,9 @@ use dbs_virtio_devices::block::{aio::Aio, io_uring::IoUring, Block, LocalFile, U use serde_derive::{Deserialize, Serialize}; use crate::address_space_manager::GuestAddressSpaceImpl; -use crate::config_manager::{ConfigItem, DeviceConfigInfo, RateLimiterConfigInfo}; +use crate::config_manager::{ + get_bucket_update, ConfigItem, DeviceConfigInfo, RateLimiterConfigInfo, +}; use crate::device_manager::{DeviceManager, DeviceMgrError, DeviceOpContext}; use crate::vm::KernelConfigInfo; @@ -43,27 +45,6 @@ macro_rules! error( }; ); -macro_rules! get_bucket_update { - ($self:ident, $rate_limiter: ident, $metric: ident) => {{ - match &$self.$rate_limiter { - Some(rl_cfg) => { - let tb_cfg = &rl_cfg.$metric; - dbs_utils::rate_limiter::RateLimiter::make_bucket( - tb_cfg.size, - tb_cfg.one_time_burst, - tb_cfg.refill_time, - ) - // Updated active rate-limiter. - .map(dbs_utils::rate_limiter::BucketUpdate::Update) - // Updated/deactivated rate-limiter - .unwrap_or(dbs_utils::rate_limiter::BucketUpdate::Disabled) - } - // No update to the rate-limiter. - None => dbs_utils::rate_limiter::BucketUpdate::None, - } - }}; -} - /// Default queue size for VirtIo block devices. pub const QUEUE_SIZE: u16 = 128; diff --git a/src/dragonball/src/device_manager/mod.rs b/src/dragonball/src/device_manager/mod.rs index 11a742bbb..9313919f8 100644 --- a/src/dragonball/src/device_manager/mod.rs +++ b/src/dragonball/src/device_manager/mod.rs @@ -65,6 +65,12 @@ pub mod blk_dev_mgr; #[cfg(feature = "virtio-blk")] use self::blk_dev_mgr::BlockDeviceMgr; +#[cfg(feature = "virtio-net")] +/// Device manager for virtio-net devices. +pub mod virtio_net_dev_mgr; +#[cfg(feature = "virtio-net")] +use self::virtio_net_dev_mgr::VirtioNetDeviceMgr; + macro_rules! info( ($l:expr, $($args:tt)+) => { slog::info!($l, $($args)+; slog::o!("subsystem" => "device_manager")) @@ -426,6 +432,9 @@ pub struct DeviceManager { // If there is a Root Block Device, this should be added as the first element of the list. // This is necessary because we want the root to always be mounted on /dev/vda. pub(crate) block_manager: BlockDeviceMgr, + + #[cfg(feature = "virtio-net")] + pub(crate) virtio_net_manager: VirtioNetDeviceMgr, } impl DeviceManager { @@ -452,6 +461,8 @@ impl DeviceManager { vsock_manager: VsockDeviceMgr::default(), #[cfg(feature = "virtio-blk")] block_manager: BlockDeviceMgr::default(), + #[cfg(feature = "virtio-net")] + virtio_net_manager: VirtioNetDeviceMgr::default(), } } @@ -577,6 +588,11 @@ impl DeviceManager { .attach_devices(&mut ctx) .map_err(StartMicrovmError::BlockDeviceError)?; + #[cfg(feature = "virtio-net")] + self.virtio_net_manager + .attach_devices(&mut ctx) + .map_err(StartMicrovmError::VirtioNetDeviceError)?; + #[cfg(feature = "virtio-vsock")] self.vsock_manager.attach_devices(&mut ctx)?; diff --git a/src/dragonball/src/device_manager/virtio_net_dev_mgr.rs b/src/dragonball/src/device_manager/virtio_net_dev_mgr.rs new file mode 100644 index 000000000..65f29fbcd --- /dev/null +++ b/src/dragonball/src/device_manager/virtio_net_dev_mgr.rs @@ -0,0 +1,386 @@ +// Copyright 2020-2022 Alibaba, Inc. or its affiliates. All Rights Reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +use std::convert::TryInto; +use std::sync::Arc; + +use dbs_utils::net::{MacAddr, Tap, TapError}; +use dbs_utils::rate_limiter::BucketUpdate; +use dbs_virtio_devices as virtio; +use dbs_virtio_devices::net::Net; +use dbs_virtio_devices::Error as VirtioError; +use serde_derive::{Deserialize, Serialize}; + +use crate::address_space_manager::GuestAddressSpaceImpl; +use crate::config_manager::{ + get_bucket_update, ConfigItem, DeviceConfigInfo, DeviceConfigInfos, RateLimiterConfigInfo, +}; +use crate::device_manager::{DeviceManager, DeviceMgrError, DeviceOpContext}; + +use super::DbsMmioV2Device; + +/// Default number of virtio queues, one rx/tx pair. +pub const NUM_QUEUES: usize = 2; +/// Default size of virtio queues. +pub const QUEUE_SIZE: u16 = 256; +// The flag of whether to use the shared irq. +const USE_SHARED_IRQ: bool = true; +// The flag of whether to use the generic irq. +const USE_GENERIC_IRQ: bool = true; + +/// Errors associated with virtio net device operations. +#[derive(Debug, thiserror::Error)] +pub enum VirtioNetDeviceError { + /// The virtual machine instance ID is invalid. + #[error("the virtual machine instance ID is invalid")] + InvalidVMID, + + /// The iface ID is invalid. + #[error("invalid virtio-net iface id '{0}'")] + InvalidIfaceId(String), + + /// Invalid queue number configuration for virtio_net device. + #[error("invalid queue number {0} for virtio-net device")] + InvalidQueueNum(usize), + + /// Failure from device manager, + #[error("failure in device manager operations, {0}")] + DeviceManager(#[source] DeviceMgrError), + + /// The Context Identifier is already in use. + #[error("the device ID {0} already exists")] + DeviceIDAlreadyExist(String), + + /// The MAC address is already in use. + #[error("the guest MAC address {0} is already in use")] + GuestMacAddressInUse(String), + + /// The host device name is already in use. + #[error("the host device name {0} is already in use")] + HostDeviceNameInUse(String), + + /// Cannot open/create tap device. + #[error("cannot open TAP device")] + OpenTap(#[source] TapError), + + /// Failure from virtio subsystem. + #[error(transparent)] + Virtio(VirtioError), + + /// Failed to send patch message to net epoll handler. + #[error("could not send patch message to the net epoll handler")] + NetEpollHanderSendFail, + + /// The update is not allowed after booting the microvm. + #[error("update operation is not allowed after boot")] + UpdateNotAllowedPostBoot, + + /// Split this at some point. + /// Internal errors are due to resource exhaustion. + /// Users errors are due to invalid permissions. + #[error("cannot create network device: {0}")] + CreateNetDevice(#[source] VirtioError), + + /// Cannot initialize a MMIO Network Device or add a device to the MMIO Bus. + #[error("failure while registering network device: {0}")] + RegisterNetDevice(#[source] DeviceMgrError), +} + +/// Configuration information for virtio net devices. +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +pub struct VirtioNetDeviceConfigUpdateInfo { + /// ID of the guest network interface. + pub iface_id: String, + /// Rate Limiter for received packages. + pub rx_rate_limiter: Option, + /// Rate Limiter for transmitted packages. + pub tx_rate_limiter: Option, +} + +impl VirtioNetDeviceConfigUpdateInfo { + /// Provides a `BucketUpdate` description for the RX bandwidth rate limiter. + pub fn rx_bytes(&self) -> BucketUpdate { + get_bucket_update!(self, rx_rate_limiter, bandwidth) + } + /// Provides a `BucketUpdate` description for the RX ops rate limiter. + pub fn rx_ops(&self) -> BucketUpdate { + get_bucket_update!(self, rx_rate_limiter, ops) + } + /// Provides a `BucketUpdate` description for the TX bandwidth rate limiter. + pub fn tx_bytes(&self) -> BucketUpdate { + get_bucket_update!(self, tx_rate_limiter, bandwidth) + } + /// Provides a `BucketUpdate` description for the TX ops rate limiter. + pub fn tx_ops(&self) -> BucketUpdate { + get_bucket_update!(self, tx_rate_limiter, ops) + } +} + +/// Configuration information for virtio net devices. +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize, Default)] +pub struct VirtioNetDeviceConfigInfo { + /// ID of the guest network interface. + pub iface_id: String, + /// Host level path for the guest network interface. + pub host_dev_name: String, + /// Number of virtqueues to use. + pub num_queues: usize, + /// Size of each virtqueue. Unit: byte. + pub queue_size: u16, + /// Guest MAC address. + pub guest_mac: Option, + /// Rate Limiter for received packages. + pub rx_rate_limiter: Option, + /// Rate Limiter for transmitted packages. + pub tx_rate_limiter: Option, + /// allow duplicate mac + pub allow_duplicate_mac: bool, + /// Use shared irq + pub use_shared_irq: Option, + /// Use generic irq + pub use_generic_irq: Option, +} + +impl VirtioNetDeviceConfigInfo { + /// Returns the tap device that `host_dev_name` refers to. + pub fn open_tap(&self) -> std::result::Result { + Tap::open_named(self.host_dev_name.as_str(), false).map_err(VirtioNetDeviceError::OpenTap) + } + + /// Returns a reference to the mac address. It the mac address is not configured, it + /// return None. + pub fn guest_mac(&self) -> Option<&MacAddr> { + self.guest_mac.as_ref() + } + + ///Rx and Tx queue and max queue sizes + pub fn queue_sizes(&self) -> Vec { + let mut queue_size = self.queue_size; + if queue_size == 0 { + queue_size = QUEUE_SIZE; + } + let num_queues = if self.num_queues > 0 { + self.num_queues + } else { + NUM_QUEUES + }; + + (0..num_queues).map(|_| queue_size).collect::>() + } +} + +impl ConfigItem for VirtioNetDeviceConfigInfo { + type Err = VirtioNetDeviceError; + + fn id(&self) -> &str { + &self.iface_id + } + + fn check_conflicts(&self, other: &Self) -> Result<(), VirtioNetDeviceError> { + if self.iface_id == other.iface_id { + Err(VirtioNetDeviceError::DeviceIDAlreadyExist( + self.iface_id.clone(), + )) + } else if !other.allow_duplicate_mac + && self.guest_mac.is_some() + && self.guest_mac == other.guest_mac + { + Err(VirtioNetDeviceError::GuestMacAddressInUse( + self.guest_mac.as_ref().unwrap().to_string(), + )) + } else if self.host_dev_name == other.host_dev_name { + Err(VirtioNetDeviceError::HostDeviceNameInUse( + self.host_dev_name.clone(), + )) + } else { + Ok(()) + } + } +} + +/// Virtio Net Device Info +pub type VirtioNetDeviceInfo = DeviceConfigInfo; + +/// Device manager to manage all virtio net devices. +pub struct VirtioNetDeviceMgr { + pub(crate) info_list: DeviceConfigInfos, + pub(crate) use_shared_irq: bool, +} + +impl VirtioNetDeviceMgr { + /// Gets the index of the device with the specified `drive_id` if it exists in the list. + pub fn get_index_of_iface_id(&self, if_id: &str) -> Option { + self.info_list + .iter() + .position(|info| info.config.iface_id.eq(if_id)) + } + + /// Insert or update a virtio net device into the manager. + pub fn insert_device( + device_mgr: &mut DeviceManager, + mut ctx: DeviceOpContext, + config: VirtioNetDeviceConfigInfo, + ) -> std::result::Result<(), VirtioNetDeviceError> { + if config.num_queues % 2 != 0 { + return Err(VirtioNetDeviceError::InvalidQueueNum(config.num_queues)); + } + if !cfg!(feature = "hotplug") && ctx.is_hotplug { + return Err(VirtioNetDeviceError::UpdateNotAllowedPostBoot); + } + + let mgr = &mut device_mgr.virtio_net_manager; + + slog::info!( + ctx.logger(), + "add virtio-net device configuration"; + "subsystem" => "net_dev_mgr", + "id" => &config.iface_id, + "host_dev_name" => &config.host_dev_name, + ); + + let device_index = mgr.info_list.insert_or_update(&config)?; + + if ctx.is_hotplug { + slog::info!( + ctx.logger(), + "attach virtio-net device"; + "subsystem" => "net_dev_mgr", + "id" => &config.iface_id, + "host_dev_name" => &config.host_dev_name, + ); + + match Self::create_device(&config, &mut ctx) { + Ok(device) => { + let dev = DeviceManager::create_mmio_virtio_device( + device, + &mut ctx, + config.use_shared_irq.unwrap_or(mgr.use_shared_irq), + config.use_generic_irq.unwrap_or(USE_GENERIC_IRQ), + ) + .map_err(VirtioNetDeviceError::DeviceManager)?; + ctx.insert_hotplug_mmio_device(&dev.clone(), None) + .map_err(VirtioNetDeviceError::DeviceManager)?; + // live-upgrade need save/restore device from info.device. + mgr.info_list[device_index].set_device(dev); + } + Err(e) => { + mgr.info_list.remove(device_index); + return Err(VirtioNetDeviceError::Virtio(e)); + } + } + } + + Ok(()) + } + + /// Update the ratelimiter settings of a virtio net device. + pub fn update_device_ratelimiters( + device_mgr: &mut DeviceManager, + new_cfg: VirtioNetDeviceConfigUpdateInfo, + ) -> std::result::Result<(), VirtioNetDeviceError> { + let mgr = &mut device_mgr.virtio_net_manager; + match mgr.get_index_of_iface_id(&new_cfg.iface_id) { + Some(index) => { + let config = &mut mgr.info_list[index].config; + config.rx_rate_limiter = new_cfg.rx_rate_limiter.clone(); + config.tx_rate_limiter = new_cfg.tx_rate_limiter.clone(); + let device = mgr.info_list[index].device.as_mut().ok_or_else(|| { + VirtioNetDeviceError::InvalidIfaceId(new_cfg.iface_id.clone()) + })?; + + if let Some(mmio_dev) = device.as_any().downcast_ref::() { + let guard = mmio_dev.state(); + let inner_dev = guard.get_inner_device(); + if let Some(net_dev) = inner_dev + .as_any() + .downcast_ref::>() + { + return net_dev + .set_patch_rate_limiters( + new_cfg.rx_bytes(), + new_cfg.rx_ops(), + new_cfg.tx_bytes(), + new_cfg.tx_ops(), + ) + .map(|_p| ()) + .map_err(|_e| VirtioNetDeviceError::NetEpollHanderSendFail); + } + } + Ok(()) + } + None => Err(VirtioNetDeviceError::InvalidIfaceId( + new_cfg.iface_id.clone(), + )), + } + } + + /// Attach all configured vsock device to the virtual machine instance. + pub fn attach_devices( + &mut self, + ctx: &mut DeviceOpContext, + ) -> std::result::Result<(), VirtioNetDeviceError> { + for info in self.info_list.iter_mut() { + slog::info!( + ctx.logger(), + "attach virtio-net device"; + "subsystem" => "net_dev_mgr", + "id" => &info.config.iface_id, + "host_dev_name" => &info.config.host_dev_name, + ); + + let device = Self::create_device(&info.config, ctx) + .map_err(VirtioNetDeviceError::CreateNetDevice)?; + let device = DeviceManager::create_mmio_virtio_device( + device, + ctx, + info.config.use_shared_irq.unwrap_or(self.use_shared_irq), + info.config.use_generic_irq.unwrap_or(USE_GENERIC_IRQ), + ) + .map_err(VirtioNetDeviceError::RegisterNetDevice)?; + info.set_device(device); + } + + Ok(()) + } + + fn create_device( + cfg: &VirtioNetDeviceConfigInfo, + ctx: &mut DeviceOpContext, + ) -> std::result::Result>, virtio::Error> { + let epoll_mgr = ctx.epoll_mgr.clone().ok_or(virtio::Error::InvalidInput)?; + let rx_rate_limiter = match cfg.rx_rate_limiter.as_ref() { + Some(rl) => Some(rl.try_into().map_err(virtio::Error::IOError)?), + None => None, + }; + let tx_rate_limiter = match cfg.tx_rate_limiter.as_ref() { + Some(rl) => Some(rl.try_into().map_err(virtio::Error::IOError)?), + None => None, + }; + + let net_device = Net::new( + cfg.host_dev_name.clone(), + cfg.guest_mac(), + Arc::new(cfg.queue_sizes()), + epoll_mgr, + rx_rate_limiter, + tx_rate_limiter, + )?; + + Ok(Box::new(net_device)) + } +} + +impl Default for VirtioNetDeviceMgr { + /// Create a new virtio net device manager. + fn default() -> Self { + VirtioNetDeviceMgr { + info_list: DeviceConfigInfos::new(), + use_shared_irq: USE_SHARED_IRQ, + } + } +} diff --git a/src/dragonball/src/error.rs b/src/dragonball/src/error.rs index 9c9c46564..667405c5e 100644 --- a/src/dragonball/src/error.rs +++ b/src/dragonball/src/error.rs @@ -163,6 +163,11 @@ pub enum StartMicroVmError { /// Virtio-blk errors. #[error("virtio-blk errors: {0}")] BlockDeviceError(#[source] device_manager::blk_dev_mgr::BlockDeviceError), + + #[cfg(feature = "virtio-net")] + /// Virtio-net errors. + #[error("virtio-net errors: {0}")] + VirtioNetDeviceError(#[source] device_manager::virtio_net_dev_mgr::VirtioNetDeviceError), } /// Errors associated with starting the instance. From 11b3f95140c7d840e0cac66355fa22c92b2777b5 Mon Sep 17 00:00:00 2001 From: wllenyj Date: Mon, 16 May 2022 21:24:09 +0800 Subject: [PATCH 12/24] dragonball: add virtio-fs device support Virtio-fs devices are supported. Fixes: #4257 Signed-off-by: wllenyj --- src/dragonball/Cargo.toml | 2 + src/dragonball/src/api/v1/vmm_action.rs | 95 ++++ .../src/device_manager/blk_dev_mgr.rs | 2 +- .../src/device_manager/fs_dev_mgr.rs | 523 ++++++++++++++++++ .../device_manager/memory_region_handler.rs | 110 ++++ src/dragonball/src/device_manager/mod.rs | 23 + src/dragonball/src/error.rs | 5 + 7 files changed, 759 insertions(+), 1 deletion(-) create mode 100644 src/dragonball/src/device_manager/fs_dev_mgr.rs create mode 100644 src/dragonball/src/device_manager/memory_region_handler.rs diff --git a/src/dragonball/Cargo.toml b/src/dragonball/Cargo.toml index d44e115ca..0f4aa582f 100644 --- a/src/dragonball/Cargo.toml +++ b/src/dragonball/Cargo.toml @@ -51,6 +51,8 @@ hotplug = ["virtio-vsock"] virtio-vsock = ["dbs-virtio-devices/virtio-vsock", "virtio-queue"] virtio-blk = ["dbs-virtio-devices/virtio-blk", "virtio-queue"] virtio-net = ["dbs-virtio-devices/virtio-net", "virtio-queue"] +# virtio-fs only work on atomic-guest-memory +virtio-fs = ["dbs-virtio-devices/virtio-fs", "virtio-queue", "atomic-guest-memory"] [patch.'crates-io'] dbs-device = { git = "https://github.com/openanolis/dragonball-sandbox.git", rev = "7a8e832b53d66994d6a16f0513d69f540583dcd0" } diff --git a/src/dragonball/src/api/v1/vmm_action.rs b/src/dragonball/src/api/v1/vmm_action.rs index 0a021341a..dfba2faf1 100644 --- a/src/dragonball/src/api/v1/vmm_action.rs +++ b/src/dragonball/src/api/v1/vmm_action.rs @@ -21,6 +21,10 @@ use crate::vmm::Vmm; use crate::device_manager::blk_dev_mgr::{ BlockDeviceConfigInfo, BlockDeviceConfigUpdateInfo, BlockDeviceError, BlockDeviceMgr, }; +#[cfg(feature = "virtio-fs")] +use crate::device_manager::fs_dev_mgr::{ + FsDeviceConfigInfo, FsDeviceConfigUpdateInfo, FsDeviceError, FsDeviceMgr, FsMountConfigInfo, +}; #[cfg(feature = "virtio-net")] use crate::device_manager::virtio_net_dev_mgr::{ VirtioNetDeviceConfigInfo, VirtioNetDeviceConfigUpdateInfo, VirtioNetDeviceError, @@ -74,6 +78,11 @@ pub enum VmmActionError { /// Net device related errors. #[error("virtio-net device error: {0}")] VirtioNet(#[source] VirtioNetDeviceError), + + #[cfg(feature = "virtio-fs")] + /// The action `InsertFsDevice` failed either because of bad user input or an internal error. + #[error("virtio-fs device: {0}")] + FsDevice(#[source] FsDeviceError), } /// This enum represents the public interface of the VMM. Each action contains various @@ -129,6 +138,22 @@ pub enum VmmAction { /// Update a network interface, after microVM start. Currently, the only updatable properties /// are the RX and TX rate limiters. UpdateNetworkInterface(VirtioNetDeviceConfigUpdateInfo), + + #[cfg(feature = "virtio-fs")] + /// Add a new shared fs device or update one that already exists using the + /// `FsDeviceConfig` as input. This action can only be called before the microVM has + /// booted. + InsertFsDevice(FsDeviceConfigInfo), + + #[cfg(feature = "virtio-fs")] + /// Attach a new virtiofs Backend fs or detach an existing virtiofs Backend fs using the + /// `FsMountConfig` as input. This action can only be called _after_ the microVM has + /// booted. + ManipulateFsBackendFs(FsMountConfigInfo), + + #[cfg(feature = "virtio-fs")] + /// Update fs rate limiter, after microVM start. + UpdateFsDevice(FsDeviceConfigUpdateInfo), } /// The enum represents the response sent by the VMM in case of success. The response is either @@ -218,6 +243,17 @@ impl VmmService { VmmAction::UpdateNetworkInterface(netif_update) => { self.update_net_rate_limiters(vmm, netif_update) } + #[cfg(feature = "virtio-fs")] + VmmAction::InsertFsDevice(fs_cfg) => self.add_fs_device(vmm, fs_cfg), + + #[cfg(feature = "virtio-fs")] + VmmAction::ManipulateFsBackendFs(fs_mount_cfg) => { + self.manipulate_fs_backend_fs(vmm, fs_mount_cfg) + } + #[cfg(feature = "virtio-fs")] + VmmAction::UpdateFsDevice(fs_update_cfg) => { + self.update_fs_rate_limiters(vmm, fs_update_cfg) + } }; debug!("send vmm response: {:?}", response); @@ -566,4 +602,63 @@ impl VmmService { .map(|_| VmmData::Empty) .map_err(VmmActionError::VirtioNet) } + + #[cfg(feature = "virtio-fs")] + fn add_fs_device(&mut self, vmm: &mut Vmm, config: FsDeviceConfigInfo) -> VmmRequestResult { + let vm = vmm + .get_vm_by_id_mut("") + .ok_or(VmmActionError::InvalidVMID)?; + let hotplug = vm.is_vm_initialized(); + if !cfg!(feature = "hotplug") && hotplug { + return Err(VmmActionError::FsDevice( + FsDeviceError::UpdateNotAllowedPostBoot, + )); + } + + let ctx = vm.create_device_op_context(None).map_err(|e| { + info!("create device op context error: {:?}", e); + VmmActionError::FsDevice(FsDeviceError::UpdateNotAllowedPostBoot) + })?; + FsDeviceMgr::insert_device(vm.device_manager_mut(), ctx, config) + .map(|_| VmmData::Empty) + .map_err(VmmActionError::FsDevice) + } + + #[cfg(feature = "virtio-fs")] + fn manipulate_fs_backend_fs( + &self, + vmm: &mut Vmm, + config: FsMountConfigInfo, + ) -> VmmRequestResult { + let vm = vmm + .get_vm_by_id_mut("") + .ok_or(VmmActionError::InvalidVMID)?; + + if !vm.is_vm_initialized() { + return Err(VmmActionError::FsDevice(FsDeviceError::MicroVMNotRunning)); + } + + FsDeviceMgr::manipulate_backend_fs(vm.device_manager_mut(), config) + .map(|_| VmmData::Empty) + .map_err(VmmActionError::FsDevice) + } + + #[cfg(feature = "virtio-fs")] + fn update_fs_rate_limiters( + &self, + vmm: &mut Vmm, + config: FsDeviceConfigUpdateInfo, + ) -> VmmRequestResult { + let vm = vmm + .get_vm_by_id_mut("") + .ok_or(VmmActionError::InvalidVMID)?; + + if !vm.is_vm_initialized() { + return Err(VmmActionError::FsDevice(FsDeviceError::MicroVMNotRunning)); + } + + FsDeviceMgr::update_device_ratelimiters(vm.device_manager_mut(), config) + .map(|_| VmmData::Empty) + .map_err(VmmActionError::FsDevice) + } } diff --git a/src/dragonball/src/device_manager/blk_dev_mgr.rs b/src/dragonball/src/device_manager/blk_dev_mgr.rs index 38e46cd59..a624f68db 100644 --- a/src/dragonball/src/device_manager/blk_dev_mgr.rs +++ b/src/dragonball/src/device_manager/blk_dev_mgr.rs @@ -176,7 +176,7 @@ pub struct BlockDeviceConfigInfo { pub no_drop: bool, /// Block device multi-queue pub num_queues: usize, - /// Virtio queue size. + /// Virtio queue size. Size: byte pub queue_size: u16, /// Rate Limiter for I/O operations. pub rate_limiter: Option, diff --git a/src/dragonball/src/device_manager/fs_dev_mgr.rs b/src/dragonball/src/device_manager/fs_dev_mgr.rs new file mode 100644 index 000000000..c8fb55366 --- /dev/null +++ b/src/dragonball/src/device_manager/fs_dev_mgr.rs @@ -0,0 +1,523 @@ +// Copyright 2020-2022 Alibaba Cloud. All Rights Reserved. +// Copyright 2019 Intel Corporation. All Rights Reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::convert::TryInto; + +use dbs_utils::epoll_manager::EpollManager; +use dbs_virtio_devices::{self as virtio, Error as VirtIoError}; +use serde_derive::{Deserialize, Serialize}; +use slog::{error, info}; + +use crate::address_space_manager::GuestAddressSpaceImpl; +use crate::config_manager::{ + get_bucket_update, ConfigItem, DeviceConfigInfo, DeviceConfigInfos, RateLimiterConfigInfo, +}; +use crate::device_manager::{ + DbsMmioV2Device, DeviceManager, DeviceMgrError, DeviceOpContext, DeviceVirtioRegionHandler, +}; + +use super::DbsVirtioDevice; + +// The flag of whether to use the shared irq. +const USE_SHARED_IRQ: bool = true; +// The flag of whether to use the generic irq. +const USE_GENERIC_IRQ: bool = true; +// Default cache size is 2 Gi since this is a typical VM memory size. +const DEFAULT_CACHE_SIZE: u64 = 2 * 1024 * 1024 * 1024; + +/// Errors associated with `FsDeviceConfig`. +#[derive(Debug, thiserror::Error)] +pub enum FsDeviceError { + /// Invalid fs, "virtio" or "vhostuser" is allowed. + #[error("the fs type is invalid, virtio or vhostuser is allowed")] + InvalidFs, + + /// Cannot access address space. + #[error("Cannot access address space.")] + AddressSpaceNotInitialized, + + /// Cannot convert RateLimterConfigInfo into RateLimiter. + #[error("failure while converting RateLimterConfigInfo into RateLimiter: {0}")] + RateLimterConfigInfoTryInto(#[source] std::io::Error), + + /// The fs device tag was already used for a different fs. + #[error("VirtioFs device tag {0} already exists")] + FsDeviceTagAlreadyExists(String), + + /// The fs device path was already used for a different fs. + #[error("VirtioFs device tag {0} already exists")] + FsDevicePathAlreadyExists(String), + + /// The update is not allowed after booting the microvm. + #[error("update operation is not allowed after boot")] + UpdateNotAllowedPostBoot, + + /// The attachbackendfs operation fails. + #[error("Fs device attach a backend fs failed")] + AttachBackendFailed(String), + + /// attach backend fs must be done when vm is running. + #[error("vm is not running when attaching a backend fs")] + MicroVMNotRunning, + + /// The mount tag doesn't exist. + #[error("fs tag'{0}' doesn't exist")] + TagNotExists(String), + + /// Failed to send patch message to VirtioFs epoll handler. + #[error("could not send patch message to the VirtioFs epoll handler")] + VirtioFsEpollHanderSendFail, + + /// Creating a shared-fs device fails (if the vhost-user socket cannot be open.) + #[error("cannot create shared-fs device: {0}")] + CreateFsDevice(#[source] VirtIoError), + + /// Cannot initialize a shared-fs device or add a device to the MMIO Bus. + #[error("failure while registering shared-fs device: {0}")] + RegisterFsDevice(#[source] DeviceMgrError), + + /// The device manager errors. + #[error("DeviceManager error: {0}")] + DeviceManager(#[source] DeviceMgrError), +} + +/// Configuration information for a vhost-user-fs device. +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +pub struct FsDeviceConfigInfo { + /// vhost-user socket path. + pub sock_path: String, + /// virtiofs mount tag name used inside the guest. + /// used as the device name during mount. + pub tag: String, + /// Number of virtqueues to use. + pub num_queues: usize, + /// Size of each virtqueue. Unit: byte. + pub queue_size: u16, + /// DAX cache window size + pub cache_size: u64, + /// Number of thread pool workers. + pub thread_pool_size: u16, + /// The caching policy the file system should use (auto, always or never). + /// This cache policy is set for virtio-fs, visit https://gitlab.com/virtio-fs/virtiofsd to get further information. + pub cache_policy: String, + /// Writeback cache + pub writeback_cache: bool, + /// Enable no_open or not + pub no_open: bool, + /// Enable xattr or not + pub xattr: bool, + /// Drop CAP_SYS_RESOURCE or not + pub drop_sys_resource: bool, + /// virtio fs or vhostuser fs. + pub mode: String, + /// Enable kill_priv_v2 or not + pub fuse_killpriv_v2: bool, + /// Enable no_readdir or not + pub no_readdir: bool, + /// Rate Limiter for I/O operations. + pub rate_limiter: Option, + /// Use shared irq + pub use_shared_irq: Option, + /// Use generic irq + pub use_generic_irq: Option, +} + +impl std::default::Default for FsDeviceConfigInfo { + fn default() -> Self { + Self { + sock_path: String::default(), + tag: String::default(), + num_queues: 1, + queue_size: 1024, + cache_size: DEFAULT_CACHE_SIZE, + thread_pool_size: 0, + cache_policy: Self::default_cache_policy(), + writeback_cache: Self::default_writeback_cache(), + no_open: Self::default_no_open(), + fuse_killpriv_v2: Self::default_fuse_killpriv_v2(), + no_readdir: Self::default_no_readdir(), + xattr: Self::default_xattr(), + drop_sys_resource: Self::default_drop_sys_resource(), + mode: Self::default_fs_mode(), + rate_limiter: Some(RateLimiterConfigInfo::default()), + use_shared_irq: None, + use_generic_irq: None, + } + } +} + +impl FsDeviceConfigInfo { + /// The default mode is set to 'virtio' for 'virtio-fs' device. + pub fn default_fs_mode() -> String { + String::from("virtio") + } + + /// The default cache policy + pub fn default_cache_policy() -> String { + "always".to_string() + } + + /// The default setting of writeback cache + pub fn default_writeback_cache() -> bool { + true + } + + /// The default setting of no_open + pub fn default_no_open() -> bool { + true + } + + /// The default setting of killpriv_v2 + pub fn default_fuse_killpriv_v2() -> bool { + false + } + + /// The default setting of xattr + pub fn default_xattr() -> bool { + false + } + + /// The default setting of drop_sys_resource + pub fn default_drop_sys_resource() -> bool { + false + } + + /// The default setting of no_readdir + pub fn default_no_readdir() -> bool { + false + } + + /// The default setting of rate limiter + pub fn default_fs_rate_limiter() -> Option { + None + } +} + +/// Configuration information for virtio-fs. +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +pub struct FsDeviceConfigUpdateInfo { + /// virtiofs mount tag name used inside the guest. + /// used as the device name during mount. + pub tag: String, + /// Rate Limiter for I/O operations. + pub rate_limiter: Option, +} + +impl FsDeviceConfigUpdateInfo { + /// Provides a `BucketUpdate` description for the bandwidth rate limiter. + pub fn bytes(&self) -> dbs_utils::rate_limiter::BucketUpdate { + get_bucket_update!(self, rate_limiter, bandwidth) + } + /// Provides a `BucketUpdate` description for the ops rate limiter. + pub fn ops(&self) -> dbs_utils::rate_limiter::BucketUpdate { + get_bucket_update!(self, rate_limiter, ops) + } +} + +impl ConfigItem for FsDeviceConfigInfo { + type Err = FsDeviceError; + + fn id(&self) -> &str { + &self.tag + } + + fn check_conflicts(&self, other: &Self) -> Result<(), FsDeviceError> { + if self.tag == other.tag { + Err(FsDeviceError::FsDeviceTagAlreadyExists(self.tag.clone())) + } else if self.mode.as_str() == "vhostuser" && self.sock_path == other.sock_path { + Err(FsDeviceError::FsDevicePathAlreadyExists( + self.sock_path.clone(), + )) + } else { + Ok(()) + } + } +} + +/// Configuration information of manipulating backend fs for a virtiofs device. +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +pub struct FsMountConfigInfo { + /// Mount operations, mount, update, umount + pub ops: String, + /// The backend fs type to mount. + pub fstype: Option, + /// the source file/directory the backend fs points to + pub source: Option, + /// where the backend fs gets mounted + pub mountpoint: String, + /// backend fs config content in json format + pub config: Option, + /// virtiofs mount tag name used inside the guest. + /// used as the device name during mount. + pub tag: String, + /// Path to file that contains file lists that should be prefetched by rafs + pub prefetch_list_path: Option, + /// What size file supports dax + pub dax_threshold_size_kb: Option, +} + +pub(crate) type FsDeviceInfo = DeviceConfigInfo; + +impl ConfigItem for FsDeviceInfo { + type Err = FsDeviceError; + fn id(&self) -> &str { + &self.config.tag + } + + fn check_conflicts(&self, other: &Self) -> Result<(), FsDeviceError> { + if self.config.tag == other.config.tag { + Err(FsDeviceError::FsDeviceTagAlreadyExists( + self.config.tag.clone(), + )) + } else if self.config.sock_path == other.config.sock_path { + Err(FsDeviceError::FsDevicePathAlreadyExists( + self.config.sock_path.clone(), + )) + } else { + Ok(()) + } + } +} + +/// Wrapper for the collection that holds all the Fs Devices Configs +pub struct FsDeviceMgr { + /// A list of `FsDeviceConfig` objects. + pub(crate) info_list: DeviceConfigInfos, + pub(crate) use_shared_irq: bool, +} + +impl FsDeviceMgr { + /// Inserts `fs_cfg` in the shared-fs device configuration list. + pub fn insert_device( + device_mgr: &mut DeviceManager, + ctx: DeviceOpContext, + fs_cfg: FsDeviceConfigInfo, + ) -> std::result::Result<(), FsDeviceError> { + // It's too complicated to manage life cycle of shared-fs service process for hotplug. + if ctx.is_hotplug { + error!( + ctx.logger(), + "no support of shared-fs device hotplug"; + "subsystem" => "shared-fs", + "tag" => &fs_cfg.tag, + ); + return Err(FsDeviceError::UpdateNotAllowedPostBoot); + } + + info!( + ctx.logger(), + "add shared-fs device configuration"; + "subsystem" => "shared-fs", + "tag" => &fs_cfg.tag, + ); + device_mgr + .fs_manager + .lock() + .unwrap() + .info_list + .insert_or_update(&fs_cfg)?; + + Ok(()) + } + + /// Attaches all vhost-user-fs devices from the FsDevicesConfig. + pub fn attach_devices( + &mut self, + ctx: &mut DeviceOpContext, + ) -> std::result::Result<(), FsDeviceError> { + let epoll_mgr = ctx + .epoll_mgr + .clone() + .ok_or(FsDeviceError::CreateFsDevice(virtio::Error::InvalidInput))?; + + for info in self.info_list.iter_mut() { + let device = Self::create_fs_device(&info.config, ctx, epoll_mgr.clone())?; + let mmio_device = DeviceManager::create_mmio_virtio_device( + device, + ctx, + info.config.use_shared_irq.unwrap_or(self.use_shared_irq), + info.config.use_generic_irq.unwrap_or(USE_GENERIC_IRQ), + ) + .map_err(FsDeviceError::RegisterFsDevice)?; + + info.set_device(mmio_device); + } + + Ok(()) + } + + fn create_fs_device( + config: &FsDeviceConfigInfo, + ctx: &mut DeviceOpContext, + epoll_mgr: EpollManager, + ) -> std::result::Result { + match config.mode.as_str() { + "virtio" => Self::attach_virtio_fs_devices(config, ctx, epoll_mgr), + _ => Err(FsDeviceError::CreateFsDevice(virtio::Error::InvalidInput)), + } + } + + fn attach_virtio_fs_devices( + config: &FsDeviceConfigInfo, + ctx: &mut DeviceOpContext, + epoll_mgr: EpollManager, + ) -> std::result::Result { + info!( + ctx.logger(), + "add virtio-fs device configuration"; + "subsystem" => "virito-fs", + "tag" => &config.tag, + "dax_window_size" => &config.cache_size, + ); + + let limiter = if let Some(rlc) = config.rate_limiter.clone() { + Some( + rlc.try_into() + .map_err(FsDeviceError::RateLimterConfigInfoTryInto)?, + ) + } else { + None + }; + + let vm_as = ctx.get_vm_as().map_err(|e| { + error!(ctx.logger(), "virtio-fs get vm_as error: {:?}", e; + "subsystem" => "virito-fs"); + FsDeviceError::DeviceManager(e) + })?; + let address_space = match ctx.address_space.as_ref() { + Some(address_space) => address_space.clone(), + None => { + error!(ctx.logger(), "virtio-fs get address_space error"; "subsystem" => "virito-fs"); + return Err(FsDeviceError::AddressSpaceNotInitialized); + } + }; + let handler = DeviceVirtioRegionHandler { + vm_as, + address_space, + }; + + let device = Box::new( + virtio::fs::VirtioFs::new( + &config.tag, + config.num_queues, + config.queue_size, + config.cache_size, + &config.cache_policy, + config.thread_pool_size, + config.writeback_cache, + config.no_open, + config.fuse_killpriv_v2, + config.xattr, + config.drop_sys_resource, + config.no_readdir, + Box::new(handler), + epoll_mgr, + limiter, + ) + .map_err(FsDeviceError::CreateFsDevice)?, + ); + + Ok(device) + } + + /// Attach a backend fs to a VirtioFs device or detach a backend + /// fs from a Virtiofs device + pub fn manipulate_backend_fs( + device_mgr: &mut DeviceManager, + config: FsMountConfigInfo, + ) -> std::result::Result<(), FsDeviceError> { + let mut found = false; + + let mgr = &mut device_mgr.fs_manager.lock().unwrap(); + for info in mgr + .info_list + .iter() + .filter(|info| info.config.tag.as_str() == config.tag.as_str()) + { + found = true; + if let Some(device) = info.device.as_ref() { + if let Some(mmio_dev) = device.as_any().downcast_ref::() { + let mut guard = mmio_dev.state(); + let inner_dev = guard.get_inner_device_mut(); + if let Some(virtio_fs_dev) = inner_dev + .as_any_mut() + .downcast_mut::>() + { + return virtio_fs_dev + .manipulate_backend_fs( + config.source, + config.fstype, + &config.mountpoint, + config.config, + &config.ops, + config.prefetch_list_path, + config.dax_threshold_size_kb, + ) + .map(|_p| ()) + .map_err(|e| FsDeviceError::AttachBackendFailed(e.to_string())); + } + } + } + } + if !found { + Err(FsDeviceError::AttachBackendFailed( + "fs tag not found".to_string(), + )) + } else { + Ok(()) + } + } + + /// Gets the index of the device with the specified `tag` if it exists in the list. + pub fn get_index_of_tag(&self, tag: &str) -> Option { + self.info_list + .iter() + .position(|info| info.config.id().eq(tag)) + } + + /// Update the ratelimiter settings of a virtio fs device. + pub fn update_device_ratelimiters( + device_mgr: &mut DeviceManager, + new_cfg: FsDeviceConfigUpdateInfo, + ) -> std::result::Result<(), FsDeviceError> { + let mgr = &mut device_mgr.fs_manager.lock().unwrap(); + match mgr.get_index_of_tag(&new_cfg.tag) { + Some(index) => { + let config = &mut mgr.info_list[index].config; + config.rate_limiter = new_cfg.rate_limiter.clone(); + let device = mgr.info_list[index] + .device + .as_mut() + .ok_or_else(|| FsDeviceError::TagNotExists("".to_owned()))?; + + if let Some(mmio_dev) = device.as_any().downcast_ref::() { + let guard = mmio_dev.state(); + let inner_dev = guard.get_inner_device(); + if let Some(fs_dev) = inner_dev + .as_any() + .downcast_ref::>() + { + return fs_dev + .set_patch_rate_limiters(new_cfg.bytes(), new_cfg.ops()) + .map(|_p| ()) + .map_err(|_e| FsDeviceError::VirtioFsEpollHanderSendFail); + } + } + Ok(()) + } + None => Err(FsDeviceError::TagNotExists(new_cfg.tag)), + } + } +} + +impl Default for FsDeviceMgr { + /// Create a new `FsDeviceMgr` object.. + fn default() -> Self { + FsDeviceMgr { + info_list: DeviceConfigInfos::new(), + use_shared_irq: USE_SHARED_IRQ, + } + } +} diff --git a/src/dragonball/src/device_manager/memory_region_handler.rs b/src/dragonball/src/device_manager/memory_region_handler.rs new file mode 100644 index 000000000..2be149ef9 --- /dev/null +++ b/src/dragonball/src/device_manager/memory_region_handler.rs @@ -0,0 +1,110 @@ +// Copyright 2022 Alibaba, Inc. or its affiliates. All Rights Reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::io; +use std::sync::Arc; + +use dbs_address_space::{AddressSpace, AddressSpaceRegion, AddressSpaceRegionType}; +use dbs_virtio_devices::{Error as VirtIoError, VirtioRegionHandler}; +use log::{debug, error}; +use vm_memory::{FileOffset, GuestAddressSpace, GuestMemoryRegion, GuestRegionMmap}; + +use crate::address_space_manager::GuestAddressSpaceImpl; + +/// This struct implements the VirtioRegionHandler trait, which inserts the memory +/// region of the virtio device into vm_as and address_space. +/// +/// * After region is inserted into the vm_as, the virtio device can read guest memory +/// data using vm_as.get_slice with GuestAddress. +/// +/// * Insert virtio memory into address_space so that the correct guest last address can +/// be found when initializing the e820 table. The e820 table is a table that describes +/// guest memory prepared before the guest startup. we need to config the correct guest +/// memory address and length in the table. The virtio device memory belongs to the MMIO +/// space and does not belong to the Guest Memory space. Therefore, it cannot be configured +/// into the e820 table. When creating AddressSpaceRegion we use +/// AddressSpaceRegionType::ReservedMemory type, in this way, address_space will know that +/// this region a special memory, it will don't put the this memory in e820 table. +/// +/// This function relies on the atomic-guest-memory feature. Without this feature enabled, memory +/// regions cannot be inserted into vm_as. Because the insert_region interface of vm_as does +/// not insert regions in place, but returns an array of inserted regions. We need to manually +/// replace this array of regions with vm_as, and that's what atomic-guest-memory feature does. +/// So we rely on the atomic-guest-memory feature here +pub struct DeviceVirtioRegionHandler { + pub(crate) vm_as: GuestAddressSpaceImpl, + pub(crate) address_space: AddressSpace, +} + +impl DeviceVirtioRegionHandler { + fn insert_address_space( + &mut self, + region: Arc, + ) -> std::result::Result<(), VirtIoError> { + let file_offset = match region.file_offset() { + // TODO: use from_arc + Some(f) => Some(FileOffset::new(f.file().try_clone()?, 0)), + None => None, + }; + + let as_region = Arc::new(AddressSpaceRegion::build( + AddressSpaceRegionType::DAXMemory, + region.start_addr(), + region.size() as u64, + None, + file_offset, + region.flags(), + false, + )); + + self.address_space.insert_region(as_region).map_err(|e| { + error!("inserting address apace error: {}", e); + // dbs-virtio-devices should not depend on dbs-address-space. + // So here io::Error is used instead of AddressSpaceError directly. + VirtIoError::IOError(io::Error::new( + io::ErrorKind::Other, + format!( + "invalid address space region ({0:#x}, {1:#x})", + region.start_addr().0, + region.len() + ), + )) + })?; + Ok(()) + } + + fn insert_vm_as( + &mut self, + region: Arc, + ) -> std::result::Result<(), VirtIoError> { + let vm_as_new = self.vm_as.memory().insert_region(region).map_err(|e| { + error!( + "DeviceVirtioRegionHandler failed to insert guest memory region: {:?}.", + e + ); + VirtIoError::InsertMmap(e) + })?; + // Do not expect poisoned lock here, so safe to unwrap(). + self.vm_as.lock().unwrap().replace(vm_as_new); + + Ok(()) + } +} + +impl VirtioRegionHandler for DeviceVirtioRegionHandler { + fn insert_region( + &mut self, + region: Arc, + ) -> std::result::Result<(), VirtIoError> { + debug!( + "add geust memory region to address_space/vm_as, new region: {:?}", + region + ); + + self.insert_address_space(region.clone())?; + self.insert_vm_as(region)?; + + Ok(()) + } +} diff --git a/src/dragonball/src/device_manager/mod.rs b/src/dragonball/src/device_manager/mod.rs index 9313919f8..3cb0477e3 100644 --- a/src/dragonball/src/device_manager/mod.rs +++ b/src/dragonball/src/device_manager/mod.rs @@ -71,6 +71,16 @@ pub mod virtio_net_dev_mgr; #[cfg(feature = "virtio-net")] use self::virtio_net_dev_mgr::VirtioNetDeviceMgr; +#[cfg(feature = "virtio-fs")] +/// virtio-block device manager +pub mod fs_dev_mgr; +#[cfg(feature = "virtio-fs")] +use self::fs_dev_mgr::FsDeviceMgr; +#[cfg(feature = "virtio-fs")] +mod memory_region_handler; +#[cfg(feature = "virtio-fs")] +pub use self::memory_region_handler::*; + macro_rules! info( ($l:expr, $($args:tt)+) => { slog::info!($l, $($args)+; slog::o!("subsystem" => "device_manager")) @@ -435,6 +445,9 @@ pub struct DeviceManager { #[cfg(feature = "virtio-net")] pub(crate) virtio_net_manager: VirtioNetDeviceMgr, + + #[cfg(feature = "virtio-fs")] + fs_manager: Arc>, } impl DeviceManager { @@ -463,6 +476,8 @@ impl DeviceManager { block_manager: BlockDeviceMgr::default(), #[cfg(feature = "virtio-net")] virtio_net_manager: VirtioNetDeviceMgr::default(), + #[cfg(feature = "virtio-fs")] + fs_manager: Arc::new(Mutex::new(FsDeviceMgr::default())), } } @@ -588,6 +603,14 @@ impl DeviceManager { .attach_devices(&mut ctx) .map_err(StartMicrovmError::BlockDeviceError)?; + #[cfg(feature = "virtio-fs")] + { + let mut fs_manager = self.fs_manager.lock().unwrap(); + fs_manager + .attach_devices(&mut ctx) + .map_err(StartMicrovmError::FsDeviceError)?; + } + #[cfg(feature = "virtio-net")] self.virtio_net_manager .attach_devices(&mut ctx) diff --git a/src/dragonball/src/error.rs b/src/dragonball/src/error.rs index 667405c5e..2c6fbf6c4 100644 --- a/src/dragonball/src/error.rs +++ b/src/dragonball/src/error.rs @@ -168,6 +168,11 @@ pub enum StartMicroVmError { /// Virtio-net errors. #[error("virtio-net errors: {0}")] VirtioNetDeviceError(#[source] device_manager::virtio_net_dev_mgr::VirtioNetDeviceError), + + #[cfg(feature = "virtio-fs")] + /// Virtio-fs errors. + #[error("virtio-fs errors: {0}")] + FsDeviceError(#[source] device_manager::fs_dev_mgr::FsDeviceError), } /// Errors associated with starting the instance. From 38957fe00b71a79fe1e2f16063a334e6ee79074c Mon Sep 17 00:00:00 2001 From: Chao Wu Date: Sun, 3 Jul 2022 23:34:26 +0800 Subject: [PATCH 13/24] UT: fix compile error in unit tests fix compile error in unit tests for DummyConfigInfo. Signed-off-by: Chao Wu --- src/dragonball/src/address_space_manager.rs | 4 ++-- src/dragonball/src/api/v1/vmm_action.rs | 1 - src/dragonball/src/config_manager.rs | 3 +-- src/dragonball/src/vcpu/vcpu_impl.rs | 9 --------- 4 files changed, 3 insertions(+), 14 deletions(-) diff --git a/src/dragonball/src/address_space_manager.rs b/src/dragonball/src/address_space_manager.rs index 5a54c0da0..9992833e0 100644 --- a/src/dragonball/src/address_space_manager.rs +++ b/src/dragonball/src/address_space_manager.rs @@ -867,8 +867,8 @@ mod tests { assert_eq!(&builder.get_next_mem_file(), "/tmp/shmem"); assert_eq!(builder.mem_index, 3); - builder.set_prealloc(true); - builder.set_dirty_page_logging(true); + builder.toggle_prealloc(true); + builder.toggle_dirty_page_logging(true); assert!(builder.mem_prealloc); assert!(builder.dirty_page_logging); } diff --git a/src/dragonball/src/api/v1/vmm_action.rs b/src/dragonball/src/api/v1/vmm_action.rs index dfba2faf1..2e284d79b 100644 --- a/src/dragonball/src/api/v1/vmm_action.rs +++ b/src/dragonball/src/api/v1/vmm_action.rs @@ -10,7 +10,6 @@ use std::fs::File; use std::sync::mpsc::{Receiver, Sender, TryRecvError}; use log::{debug, error, info, warn}; -use vmm_sys_util::eventfd::EventFd; use crate::error::{Result, StartMicrovmError, StopMicrovmError}; use crate::event_manager::EventManager; diff --git a/src/dragonball/src/config_manager.rs b/src/dragonball/src/config_manager.rs index cbfb79041..a80fd14c7 100644 --- a/src/dragonball/src/config_manager.rs +++ b/src/dragonball/src/config_manager.rs @@ -30,7 +30,6 @@ macro_rules! get_bucket_update { } }}; } -pub(crate) use get_bucket_update; /// Trait for generic configuration information. pub trait ConfigItem { @@ -414,7 +413,7 @@ mod tests { Exist, } - #[derive(Clone, Debug)] + #[derive(Clone, Debug, Default)] pub struct DummyConfigInfo { id: String, content: String, diff --git a/src/dragonball/src/vcpu/vcpu_impl.rs b/src/dragonball/src/vcpu/vcpu_impl.rs index 2dbccf66d..513fa435f 100644 --- a/src/dragonball/src/vcpu/vcpu_impl.rs +++ b/src/dragonball/src/vcpu/vcpu_impl.rs @@ -966,15 +966,6 @@ pub mod tests { fn test_vcpu_check_io_port_info() { let (vcpu, _receiver) = create_vcpu(); - // boot complete signal - let res = vcpu - .check_io_port_info( - MAGIC_IOPORT_SIGNAL_GUEST_BOOT_COMPLETE, - &[MAGIC_VALUE_SIGNAL_GUEST_BOOT_COMPLETE], - ) - .unwrap(); - assert!(res); - // debug info signal let res = vcpu .check_io_port_info(MAGIC_IOPORT_DEBUG_INFO, &[0, 0, 0, 0]) From dd003ebe0e690e85cf7d68e05a7e4dc782237f35 Mon Sep 17 00:00:00 2001 From: Chao Wu Date: Mon, 4 Jul 2022 11:37:42 +0800 Subject: [PATCH 14/24] Dragonball: change error name and fix compile error Change error name from `StartMicrovm` to `StartMicroVm`, `StartMicrovmError` to `StartMicroVmError`. Besides, we fix a compile error in config_manager. Signed-off-by: Chao Wu --- src/dragonball/src/api/v1/vmm_action.rs | 18 ++--- src/dragonball/src/config_manager.rs | 2 + .../src/device_manager/blk_dev_mgr.rs | 5 +- .../src/device_manager/fs_dev_mgr.rs | 3 +- src/dragonball/src/device_manager/mod.rs | 28 +++---- .../src/device_manager/virtio_net_dev_mgr.rs | 3 +- .../src/device_manager/vsock_dev_mgr.rs | 20 ++--- src/dragonball/src/lib.rs | 2 +- src/dragonball/src/vm/mod.rs | 74 +++++++++---------- src/dragonball/src/vm/x86_64.rs | 36 ++++----- 10 files changed, 97 insertions(+), 94 deletions(-) diff --git a/src/dragonball/src/api/v1/vmm_action.rs b/src/dragonball/src/api/v1/vmm_action.rs index 2e284d79b..6dedbfc28 100644 --- a/src/dragonball/src/api/v1/vmm_action.rs +++ b/src/dragonball/src/api/v1/vmm_action.rs @@ -11,7 +11,7 @@ use std::sync::mpsc::{Receiver, Sender, TryRecvError}; use log::{debug, error, info, warn}; -use crate::error::{Result, StartMicrovmError, StopMicrovmError}; +use crate::error::{Result, StartMicroVmError, StopMicrovmError}; use crate::event_manager::EventManager; use crate::vm::{CpuTopology, KernelConfigInfo, VmConfigInfo}; use crate::vmm::Vmm; @@ -310,8 +310,8 @@ impl VmmService { } fn start_microvm(&mut self, vmm: &mut Vmm, event_mgr: &mut EventManager) -> VmmRequestResult { - use self::StartMicrovmError::MicroVMAlreadyRunning; - use self::VmmActionError::StartMicrovm; + use self::StartMicroVmError::MicroVMAlreadyRunning; + use self::VmmActionError::StartMicroVm; let vmm_seccomp_filter = vmm.vmm_seccomp_filter(); let vcpu_seccomp_filter = vmm.vcpu_seccomp_filter(); @@ -319,12 +319,12 @@ impl VmmService { .get_vm_by_id_mut("") .ok_or(VmmActionError::InvalidVMID)?; if vm.is_vm_initialized() { - return Err(StartMicrovm(MicroVMAlreadyRunning)); + return Err(StartMicroVm(MicroVMAlreadyRunning)); } vm.start_microvm(event_mgr, vmm_seccomp_filter, vcpu_seccomp_filter) .map(|_| VmmData::Empty) - .map_err(StartMicrovm) + .map_err(StartMicroVm) } fn shutdown_microvm(&mut self, vmm: &mut Vmm) -> VmmRequestResult { @@ -512,7 +512,7 @@ impl VmmService { let ctx = vm .create_device_op_context(Some(event_mgr.epoll_manager())) .map_err(|e| { - if let StartMicrovmError::UpcallNotReady = e { + if let StartMicroVmError::UpcallNotReady = e { return VmmActionError::UpcallNotReady; } VmmActionError::Block(BlockDeviceError::UpdateNotAllowedPostBoot) @@ -573,12 +573,12 @@ impl VmmService { let ctx = vm .create_device_op_context(Some(event_mgr.epoll_manager())) .map_err(|e| { - if let StartMicrovmError::MicroVMAlreadyRunning = e { + if let StartMicroVmError::MicroVMAlreadyRunning = e { VmmActionError::VirtioNet(VirtioNetDeviceError::UpdateNotAllowedPostBoot) - } else if let StartMicrovmError::UpcallNotReady = e { + } else if let StartMicroVmError::UpcallNotReady = e { VmmActionError::UpcallNotReady } else { - VmmActionError::StartMicrovm(e) + VmmActionError::StartMicroVm(e) } })?; diff --git a/src/dragonball/src/config_manager.rs b/src/dragonball/src/config_manager.rs index a80fd14c7..f855be126 100644 --- a/src/dragonball/src/config_manager.rs +++ b/src/dragonball/src/config_manager.rs @@ -10,6 +10,8 @@ use dbs_device::DeviceIo; use dbs_utils::rate_limiter::{RateLimiter, TokenBucket}; use serde_derive::{Deserialize, Serialize}; +/// Get bucket update for rate limiter. +#[macro_export] macro_rules! get_bucket_update { ($self:ident, $rate_limiter: ident, $metric: ident) => {{ match &$self.$rate_limiter { diff --git a/src/dragonball/src/device_manager/blk_dev_mgr.rs b/src/dragonball/src/device_manager/blk_dev_mgr.rs index a624f68db..0d35c4f95 100644 --- a/src/dragonball/src/device_manager/blk_dev_mgr.rs +++ b/src/dragonball/src/device_manager/blk_dev_mgr.rs @@ -20,10 +20,9 @@ use dbs_virtio_devices::block::{aio::Aio, io_uring::IoUring, Block, LocalFile, U use serde_derive::{Deserialize, Serialize}; use crate::address_space_manager::GuestAddressSpaceImpl; -use crate::config_manager::{ - get_bucket_update, ConfigItem, DeviceConfigInfo, RateLimiterConfigInfo, -}; +use crate::config_manager::{ConfigItem, DeviceConfigInfo, RateLimiterConfigInfo}; use crate::device_manager::{DeviceManager, DeviceMgrError, DeviceOpContext}; +use crate::get_bucket_update; use crate::vm::KernelConfigInfo; use super::DbsMmioV2Device; diff --git a/src/dragonball/src/device_manager/fs_dev_mgr.rs b/src/dragonball/src/device_manager/fs_dev_mgr.rs index c8fb55366..d699dcbde 100644 --- a/src/dragonball/src/device_manager/fs_dev_mgr.rs +++ b/src/dragonball/src/device_manager/fs_dev_mgr.rs @@ -12,11 +12,12 @@ use slog::{error, info}; use crate::address_space_manager::GuestAddressSpaceImpl; use crate::config_manager::{ - get_bucket_update, ConfigItem, DeviceConfigInfo, DeviceConfigInfos, RateLimiterConfigInfo, + ConfigItem, DeviceConfigInfo, DeviceConfigInfos, RateLimiterConfigInfo, }; use crate::device_manager::{ DbsMmioV2Device, DeviceManager, DeviceMgrError, DeviceOpContext, DeviceVirtioRegionHandler, }; +use crate::get_bucket_update; use super::DbsVirtioDevice; diff --git a/src/dragonball/src/device_manager/mod.rs b/src/dragonball/src/device_manager/mod.rs index 3cb0477e3..53a1e2df9 100644 --- a/src/dragonball/src/device_manager/mod.rs +++ b/src/dragonball/src/device_manager/mod.rs @@ -40,7 +40,7 @@ use dbs_upcall::{ use dbs_virtio_devices::vsock::backend::VsockInnerConnector; use crate::address_space_manager::GuestAddressSpaceImpl; -use crate::error::StartMicrovmError; +use crate::error::StartMicroVmError; use crate::resource_manager::ResourceManager; use crate::vm::{KernelConfigInfo, Vm}; use crate::IoManagerCached; @@ -503,7 +503,7 @@ impl DeviceManager { pub fn create_legacy_devices( &mut self, ctx: &mut DeviceOpContext, - ) -> std::result::Result<(), StartMicrovmError> { + ) -> std::result::Result<(), StartMicroVmError> { #[cfg(target_arch = "x86_64")] { let mut tx = ctx.io_context.begin_tx(); @@ -517,7 +517,7 @@ impl DeviceManager { } Err(e) => { ctx.io_context.cancel_tx(tx); - return Err(StartMicrovmError::LegacyDevice(e)); + return Err(StartMicroVmError::LegacyDevice(e)); } } } @@ -531,10 +531,10 @@ impl DeviceManager { dmesg_fifo: Option>, com1_sock_path: Option, _ctx: &mut DeviceOpContext, - ) -> std::result::Result<(), StartMicrovmError> { + ) -> std::result::Result<(), StartMicroVmError> { // Connect serial ports to the console and dmesg_fifo. self.set_guest_kernel_log_stream(dmesg_fifo) - .map_err(|_| StartMicrovmError::EventFd)?; + .map_err(|_| StartMicroVmError::EventFd)?; info!(self.logger, "init console path: {:?}", com1_sock_path); if let Some(path) = com1_sock_path { @@ -542,13 +542,13 @@ impl DeviceManager { let com1 = legacy_manager.get_com1_serial(); self.con_manager .create_socket_console(com1, path) - .map_err(StartMicrovmError::DeviceManager)?; + .map_err(StartMicroVmError::DeviceManager)?; } } else if let Some(legacy_manager) = self.legacy_manager.as_ref() { let com1 = legacy_manager.get_com1_serial(); self.con_manager .create_stdio_console(com1) - .map_err(StartMicrovmError::DeviceManager)?; + .map_err(StartMicroVmError::DeviceManager)?; } Ok(()) @@ -586,7 +586,7 @@ impl DeviceManager { com1_sock_path: Option, dmesg_fifo: Option>, address_space: Option<&AddressSpace>, - ) -> std::result::Result<(), StartMicrovmError> { + ) -> std::result::Result<(), StartMicroVmError> { let mut ctx = DeviceOpContext::new( Some(epoll_mgr), self, @@ -601,20 +601,20 @@ impl DeviceManager { #[cfg(feature = "virtio-blk")] self.block_manager .attach_devices(&mut ctx) - .map_err(StartMicrovmError::BlockDeviceError)?; + .map_err(StartMicroVmError::BlockDeviceError)?; #[cfg(feature = "virtio-fs")] { let mut fs_manager = self.fs_manager.lock().unwrap(); fs_manager .attach_devices(&mut ctx) - .map_err(StartMicrovmError::FsDeviceError)?; + .map_err(StartMicroVmError::FsDeviceError)?; } #[cfg(feature = "virtio-net")] self.virtio_net_manager .attach_devices(&mut ctx) - .map_err(StartMicrovmError::VirtioNetDeviceError)?; + .map_err(StartMicroVmError::VirtioNetDeviceError)?; #[cfg(feature = "virtio-vsock")] self.vsock_manager.attach_devices(&mut ctx)?; @@ -622,15 +622,15 @@ impl DeviceManager { #[cfg(feature = "virtio-blk")] self.block_manager .generate_kernel_boot_args(kernel_config) - .map_err(StartMicrovmError::DeviceManager)?; + .map_err(StartMicroVmError::DeviceManager)?; ctx.generate_kernel_boot_args(kernel_config) - .map_err(StartMicrovmError::DeviceManager)?; + .map_err(StartMicroVmError::DeviceManager)?; Ok(()) } /// Start all registered devices when booting the associated virtual machine. - pub fn start_devices(&mut self) -> std::result::Result<(), StartMicrovmError> { + pub fn start_devices(&mut self) -> std::result::Result<(), StartMicroVmError> { Ok(()) } diff --git a/src/dragonball/src/device_manager/virtio_net_dev_mgr.rs b/src/dragonball/src/device_manager/virtio_net_dev_mgr.rs index 65f29fbcd..3e81f2948 100644 --- a/src/dragonball/src/device_manager/virtio_net_dev_mgr.rs +++ b/src/dragonball/src/device_manager/virtio_net_dev_mgr.rs @@ -18,9 +18,10 @@ use serde_derive::{Deserialize, Serialize}; use crate::address_space_manager::GuestAddressSpaceImpl; use crate::config_manager::{ - get_bucket_update, ConfigItem, DeviceConfigInfo, DeviceConfigInfos, RateLimiterConfigInfo, + ConfigItem, DeviceConfigInfo, DeviceConfigInfos, RateLimiterConfigInfo, }; use crate::device_manager::{DeviceManager, DeviceMgrError, DeviceOpContext}; +use crate::get_bucket_update; use super::DbsMmioV2Device; diff --git a/src/dragonball/src/device_manager/vsock_dev_mgr.rs b/src/dragonball/src/device_manager/vsock_dev_mgr.rs index cec58d7de..5e49bbfb3 100644 --- a/src/dragonball/src/device_manager/vsock_dev_mgr.rs +++ b/src/dragonball/src/device_manager/vsock_dev_mgr.rs @@ -17,7 +17,7 @@ use dbs_virtio_devices::vsock::Vsock; use dbs_virtio_devices::Error as VirtioError; use serde_derive::{Deserialize, Serialize}; -use super::StartMicrovmError; +use super::StartMicroVmError; use crate::config_manager::{ConfigItem, DeviceConfigInfo, DeviceConfigInfos}; use crate::device_manager::{DeviceManager, DeviceOpContext}; @@ -185,11 +185,11 @@ impl VsockDeviceMgr { pub fn attach_devices( &mut self, ctx: &mut DeviceOpContext, - ) -> std::result::Result<(), StartMicrovmError> { + ) -> std::result::Result<(), StartMicroVmError> { let epoll_mgr = ctx .epoll_mgr .clone() - .ok_or(StartMicrovmError::CreateVsockDevice( + .ok_or(StartMicroVmError::CreateVsockDevice( virtio::Error::InvalidInput, ))?; @@ -209,32 +209,32 @@ impl VsockDeviceMgr { epoll_mgr.clone(), ) .map_err(VirtioError::VirtioVsockError) - .map_err(StartMicrovmError::CreateVsockDevice)?, + .map_err(StartMicroVmError::CreateVsockDevice)?, ); if let Some(uds_path) = info.config.uds_path.as_ref() { let unix_backend = VsockUnixStreamBackend::new(uds_path.clone()) .map_err(VirtioError::VirtioVsockError) - .map_err(StartMicrovmError::CreateVsockDevice)?; + .map_err(StartMicroVmError::CreateVsockDevice)?; device .add_backend(Box::new(unix_backend), true) .map_err(VirtioError::VirtioVsockError) - .map_err(StartMicrovmError::CreateVsockDevice)?; + .map_err(StartMicroVmError::CreateVsockDevice)?; } if let Some(tcp_addr) = info.config.tcp_addr.as_ref() { let tcp_backend = VsockTcpBackend::new(tcp_addr.clone()) .map_err(VirtioError::VirtioVsockError) - .map_err(StartMicrovmError::CreateVsockDevice)?; + .map_err(StartMicroVmError::CreateVsockDevice)?; device .add_backend(Box::new(tcp_backend), false) .map_err(VirtioError::VirtioVsockError) - .map_err(StartMicrovmError::CreateVsockDevice)?; + .map_err(StartMicroVmError::CreateVsockDevice)?; } // add inner backend to the the first added vsock device if let Some(inner_backend) = self.default_inner_backend.take() { device .add_backend(Box::new(inner_backend), false) .map_err(VirtioError::VirtioVsockError) - .map_err(StartMicrovmError::CreateVsockDevice)?; + .map_err(StartMicroVmError::CreateVsockDevice)?; } let device = DeviceManager::create_mmio_virtio_device_with_features( device, @@ -243,7 +243,7 @@ impl VsockDeviceMgr { info.config.use_shared_irq.unwrap_or(self.use_shared_irq), info.config.use_generic_irq.unwrap_or(USE_GENERIC_IRQ), ) - .map_err(StartMicrovmError::RegisterVsockDevice)?; + .map_err(StartMicroVmError::RegisterVsockDevice)?; info.device = Some(device); } diff --git a/src/dragonball/src/lib.rs b/src/dragonball/src/lib.rs index bd58159ac..7371e8213 100644 --- a/src/dragonball/src/lib.rs +++ b/src/dragonball/src/lib.rs @@ -36,7 +36,7 @@ mod event_manager; mod io_manager; mod vmm; -pub use self::error::StartMicrovmError; +pub use self::error::StartMicroVmError; pub use self::io_manager::IoManagerCached; pub use self::vmm::Vmm; diff --git a/src/dragonball/src/vm/mod.rs b/src/dragonball/src/vm/mod.rs index 4063b7770..8c3cb21c9 100644 --- a/src/dragonball/src/vm/mod.rs +++ b/src/dragonball/src/vm/mod.rs @@ -30,7 +30,7 @@ use crate::address_space_manager::{ use crate::api::v1::{InstanceInfo, InstanceState}; use crate::device_manager::console_manager::DmesgWriter; use crate::device_manager::{DeviceManager, DeviceMgrError, DeviceOpContext}; -use crate::error::{LoadInitrdError, Result, StartMicrovmError, StopMicrovmError}; +use crate::error::{LoadInitrdError, Result, StartMicroVmError, StopMicrovmError}; use crate::event_manager::EventManager; use crate::kvm_context::KvmContext; use crate::resource_manager::ResourceManager; @@ -357,7 +357,7 @@ impl Vm { pub fn create_device_op_context( &mut self, epoll_mgr: Option, - ) -> std::result::Result { + ) -> std::result::Result { if !self.is_vm_initialized() { Ok(DeviceOpContext::create_boot_ctx(self, epoll_mgr)) } else { @@ -365,9 +365,9 @@ impl Vm { } } - pub(crate) fn check_health(&self) -> std::result::Result<(), StartMicrovmError> { + pub(crate) fn check_health(&self) -> std::result::Result<(), StartMicroVmError> { if self.kernel_config.is_none() { - return Err(StartMicrovmError::MissingKernelConfig); + return Err(StartMicroVmError::MissingKernelConfig); } Ok(()) } @@ -432,25 +432,25 @@ impl Vm { pub(crate) fn init_devices( &mut self, epoll_manager: EpollManager, - ) -> std::result::Result<(), StartMicrovmError> { + ) -> std::result::Result<(), StartMicroVmError> { info!(self.logger, "VM: initializing devices ..."); let com1_sock_path = self.vm_config.serial_path.clone(); let kernel_config = self .kernel_config .as_mut() - .ok_or(StartMicrovmError::MissingKernelConfig)?; + .ok_or(StartMicroVmError::MissingKernelConfig)?; info!(self.logger, "VM: create interrupt manager"); self.device_manager .create_interrupt_manager() - .map_err(StartMicrovmError::DeviceManager)?; + .map_err(StartMicroVmError::DeviceManager)?; info!(self.logger, "VM: create devices"); let vm_as = self.address_space .get_vm_as() - .ok_or(StartMicrovmError::AddressManagerError( + .ok_or(StartMicroVmError::AddressManagerError( AddressManagerError::GuestMemoryNotInitialized, ))?; self.device_manager.create_devices( @@ -501,7 +501,7 @@ impl Vm { Box::new(DmesgWriter::new(&self.logger)) } - pub(crate) fn init_guest_memory(&mut self) -> std::result::Result<(), StartMicrovmError> { + pub(crate) fn init_guest_memory(&mut self) -> std::result::Result<(), StartMicroVmError> { info!(self.logger, "VM: initializing guest memory..."); // We are not allowing reinitialization of vm guest memory. if self.address_space.is_initialized() { @@ -512,7 +512,7 @@ impl Vm { let mem_size = (self.vm_config.mem_size_mib as u64) << 20; let reserve_memory_bytes = self.vm_config.reserve_memory_bytes; if reserve_memory_bytes > (mem_size >> 1) as u64 { - return Err(StartMicrovmError::ConfigureInvalid(String::from( + return Err(StartMicroVmError::ConfigureInvalid(String::from( "invalid reserve_memory_bytes", ))); } @@ -553,11 +553,11 @@ impl Vm { ); let mut address_space_param = AddressSpaceMgrBuilder::new(&mem_type, &mem_file_path) - .map_err(StartMicrovmError::AddressManagerError)?; + .map_err(StartMicroVmError::AddressManagerError)?; address_space_param.set_kvm_vm_fd(self.vm_fd.clone()); self.address_space .create_address_space(&self.resource_manager, &numa_regions, address_space_param) - .map_err(StartMicrovmError::AddressManagerError)?; + .map_err(StartMicroVmError::AddressManagerError)?; info!(self.logger, "VM: initializing guest memory done"); Ok(()) @@ -566,18 +566,18 @@ impl Vm { fn init_configure_system( &mut self, vm_as: &GuestAddressSpaceImpl, - ) -> std::result::Result<(), StartMicrovmError> { + ) -> std::result::Result<(), StartMicroVmError> { let vm_memory = vm_as.memory(); let kernel_config = self .kernel_config .as_ref() - .ok_or(StartMicrovmError::MissingKernelConfig)?; + .ok_or(StartMicroVmError::MissingKernelConfig)?; //let cmdline = kernel_config.cmdline.clone(); let initrd: Option = match kernel_config.initrd_file() { Some(f) => { let initrd_file = f.try_clone(); if initrd_file.is_err() { - return Err(StartMicrovmError::InitrdLoader( + return Err(StartMicroVmError::InitrdLoader( LoadInitrdError::ReadInitrd(io::Error::from(io::ErrorKind::InvalidData)), )); } @@ -638,12 +638,12 @@ impl Vm { fn load_kernel( &mut self, vm_memory: &GuestMemoryImpl, - ) -> std::result::Result { + ) -> std::result::Result { // This is the easy way out of consuming the value of the kernel_cmdline. let kernel_config = self .kernel_config .as_mut() - .ok_or(StartMicrovmError::MissingKernelConfig)?; + .ok_or(StartMicroVmError::MissingKernelConfig)?; let high_mem_addr = GuestAddress(dbs_boot::get_kernel_start()); #[cfg(target_arch = "x86_64")] @@ -653,7 +653,7 @@ impl Vm { kernel_config.kernel_file_mut(), Some(high_mem_addr), ) - .map_err(StartMicrovmError::KernelLoader); + .map_err(StartMicroVmError::KernelLoader); #[cfg(target_arch = "aarch64")] return linux_loader::loader::pe::PE::load( @@ -662,7 +662,7 @@ impl Vm { kernel_config.kernel_file_mut(), Some(high_mem_addr), ) - .map_err(StartMicrovmError::KernelLoader); + .map_err(StartMicroVmError::KernelLoader); } /// Set up the initial microVM state and start the vCPU threads. @@ -674,10 +674,10 @@ impl Vm { event_mgr: &mut EventManager, vmm_seccomp_filter: BpfProgram, vcpu_seccomp_filter: BpfProgram, - ) -> std::result::Result<(), StartMicrovmError> { + ) -> std::result::Result<(), StartMicroVmError> { info!(self.logger, "VM: received instance start command"); if self.is_vm_initialized() { - return Err(StartMicrovmError::MicroVMAlreadyRunning); + return Err(StartMicroVmError::MicroVMAlreadyRunning); } let request_ts = TimestampUs::default(); @@ -697,12 +697,12 @@ impl Vm { let vm_as = self .vm_as() .cloned() - .ok_or(StartMicrovmError::AddressManagerError( + .ok_or(StartMicroVmError::AddressManagerError( AddressManagerError::GuestMemoryNotInitialized, ))?; self.init_vcpu_manager(vm_as.clone(), vcpu_seccomp_filter) - .map_err(StartMicrovmError::Vcpu)?; + .map_err(StartMicroVmError::Vcpu)?; self.init_microvm(event_mgr.epoll_manager(), vm_as.clone(), request_ts)?; self.init_configure_system(&vm_as)?; self.init_upcall()?; @@ -712,9 +712,9 @@ impl Vm { info!(self.logger, "VM: start vcpus"); self.vcpu_manager() - .map_err(StartMicrovmError::Vcpu)? + .map_err(StartMicroVmError::Vcpu)? .start_boot_vcpus(vmm_seccomp_filter) - .map_err(StartMicrovmError::Vcpu)?; + .map_err(StartMicroVmError::Vcpu)?; // Use expect() to crash if the other thread poisoned this lock. self.shared_info @@ -730,29 +730,29 @@ impl Vm { #[cfg(feature = "hotplug")] impl Vm { /// initialize upcall client for guest os - fn new_upcall(&mut self) -> std::result::Result<(), StartMicrovmError> { + fn new_upcall(&mut self) -> std::result::Result<(), StartMicroVmError> { // get vsock inner connector for upcall let inner_connector = self .device_manager .get_vsock_inner_connector() - .ok_or(StartMicrovmError::UpcallMissVsock)?; + .ok_or(StartMicroVmError::UpcallMissVsock)?; let mut upcall_client = UpcallClient::new( inner_connector, self.epoll_manager.clone(), DevMgrService::default(), ) - .map_err(StartMicrovmError::UpcallInitError)?; + .map_err(StartMicroVmError::UpcallInitError)?; upcall_client .connect() - .map_err(StartMicrovmError::UpcallConnectError)?; + .map_err(StartMicroVmError::UpcallConnectError)?; self.upcall_client = Some(Arc::new(upcall_client)); info!(self.logger, "upcall client init success"); Ok(()) } - fn init_upcall(&mut self) -> std::result::Result<(), StartMicrovmError> { + fn init_upcall(&mut self) -> std::result::Result<(), StartMicroVmError> { info!(self.logger, "VM upcall init"); if let Err(e) = self.new_upcall() { info!( @@ -762,7 +762,7 @@ impl Vm { Err(e) } else { self.vcpu_manager() - .map_err(StartMicrovmError::Vcpu)? + .map_err(StartMicroVmError::Vcpu)? .set_upcall_channel(self.upcall_client().clone()); Ok(()) } @@ -776,27 +776,27 @@ impl Vm { fn create_device_hotplug_context( &self, epoll_mgr: Option, - ) -> std::result::Result { + ) -> std::result::Result { if self.upcall_client().is_none() { - Err(StartMicrovmError::UpcallMissVsock) + Err(StartMicroVmError::UpcallMissVsock) } else if self.is_upcall_client_ready() { Ok(DeviceOpContext::create_hotplug_ctx(self, epoll_mgr)) } else { - Err(StartMicrovmError::UpcallNotReady) + Err(StartMicroVmError::UpcallNotReady) } } } #[cfg(not(feature = "hotplug"))] impl Vm { - fn init_upcall(&mut self) -> std::result::Result<(), StartMicrovmError> { + fn init_upcall(&mut self) -> std::result::Result<(), StartMicroVmError> { Ok(()) } fn create_device_hotplug_context( &self, _epoll_mgr: Option, - ) -> std::result::Result { - Err(StartMicrovmError::MicroVMAlreadyRunning) + ) -> std::result::Result { + Err(StartMicroVmError::MicroVMAlreadyRunning) } } diff --git a/src/dragonball/src/vm/x86_64.rs b/src/dragonball/src/vm/x86_64.rs index 6bd74acc0..3fcebdc20 100644 --- a/src/dragonball/src/vm/x86_64.rs +++ b/src/dragonball/src/vm/x86_64.rs @@ -20,7 +20,7 @@ use slog::info; use vm_memory::{Address, Bytes, GuestAddress, GuestAddressSpace, GuestMemory}; use crate::address_space_manager::{GuestAddressSpaceImpl, GuestMemoryImpl}; -use crate::error::{Error, Result, StartMicrovmError}; +use crate::error::{Error, Result, StartMicroVmError}; use crate::event_manager::EventManager; use crate::vm::{Vm, VmError}; @@ -183,7 +183,7 @@ impl Vm { epoll_mgr: EpollManager, vm_as: GuestAddressSpaceImpl, request_ts: TimestampUs, - ) -> std::result::Result<(), StartMicrovmError> { + ) -> std::result::Result<(), StartMicroVmError> { info!(self.logger, "VM: start initializing microvm ..."); self.init_tss()?; @@ -195,9 +195,9 @@ impl Vm { let reset_event_fd = self.device_manager.get_reset_eventfd().unwrap(); self.vcpu_manager() - .map_err(StartMicrovmError::Vcpu)? + .map_err(StartMicroVmError::Vcpu)? .set_reset_event_fd(reset_event_fd) - .map_err(StartMicrovmError::Vcpu)?; + .map_err(StartMicroVmError::Vcpu)?; if self.vm_config.cpu_pm == "on" { // TODO: add cpu_pm support. issue #4590. @@ -207,9 +207,9 @@ impl Vm { let vm_memory = vm_as.memory(); let kernel_loader_result = self.load_kernel(vm_memory.deref())?; self.vcpu_manager() - .map_err(StartMicrovmError::Vcpu)? + .map_err(StartMicroVmError::Vcpu)? .create_boot_vcpus(request_ts, kernel_loader_result.kernel_load) - .map_err(StartMicrovmError::Vcpu)?; + .map_err(StartMicroVmError::Vcpu)?; info!(self.logger, "VM: initializing microvm done"); Ok(()) @@ -224,10 +224,10 @@ impl Vm { vm_memory: &GuestMemoryImpl, cmdline: &Cmdline, initrd: Option, - ) -> std::result::Result<(), StartMicrovmError> { + ) -> std::result::Result<(), StartMicroVmError> { let cmdline_addr = GuestAddress(dbs_boot::layout::CMDLINE_START); linux_loader::loader::load_cmdline(vm_memory, cmdline_addr, cmdline) - .map_err(StartMicrovmError::LoadCommandline)?; + .map_err(StartMicroVmError::LoadCommandline)?; configure_system( vm_memory, @@ -239,27 +239,27 @@ impl Vm { self.vm_config.max_vcpu_count, self.vm_config.reserve_memory_bytes, ) - .map_err(StartMicrovmError::ConfigureSystem) + .map_err(StartMicroVmError::ConfigureSystem) } /// Initializes the guest memory. - pub(crate) fn init_tss(&mut self) -> std::result::Result<(), StartMicrovmError> { + pub(crate) fn init_tss(&mut self) -> std::result::Result<(), StartMicroVmError> { self.vm_fd .set_tss_address(dbs_boot::layout::KVM_TSS_ADDRESS.try_into().unwrap()) - .map_err(|e| StartMicrovmError::ConfigureVm(VmError::VmSetup(e))) + .map_err(|e| StartMicroVmError::ConfigureVm(VmError::VmSetup(e))) } /// Creates the irq chip and an in-kernel device model for the PIT. pub(crate) fn setup_interrupt_controller( &mut self, - ) -> std::result::Result<(), StartMicrovmError> { + ) -> std::result::Result<(), StartMicroVmError> { self.vm_fd .create_irq_chip() - .map_err(|e| StartMicrovmError::ConfigureVm(VmError::VmSetup(e))) + .map_err(|e| StartMicroVmError::ConfigureVm(VmError::VmSetup(e))) } /// Creates an in-kernel device model for the PIT. - pub(crate) fn create_pit(&self) -> std::result::Result<(), StartMicrovmError> { + pub(crate) fn create_pit(&self) -> std::result::Result<(), StartMicroVmError> { info!(self.logger, "VM: create pit"); // We need to enable the emulation of a dummy speaker port stub so that writing to port 0x61 // (i.e. KVM_SPEAKER_BASE_ADDRESS) does not trigger an exit to user space. @@ -272,20 +272,20 @@ impl Vm { // correct amount of memory from our pointer, and we verify the return result. self.vm_fd .create_pit2(pit_config) - .map_err(|e| StartMicrovmError::ConfigureVm(VmError::VmSetup(e))) + .map_err(|e| StartMicroVmError::ConfigureVm(VmError::VmSetup(e))) } pub(crate) fn register_events( &mut self, event_mgr: &mut EventManager, - ) -> std::result::Result<(), StartMicrovmError> { + ) -> std::result::Result<(), StartMicroVmError> { let reset_evt = self .device_manager .get_reset_eventfd() - .map_err(StartMicrovmError::DeviceManager)?; + .map_err(StartMicroVmError::DeviceManager)?; event_mgr .register_exit_eventfd(&reset_evt) - .map_err(|_| StartMicrovmError::RegisterEvent)?; + .map_err(|_| StartMicroVmError::RegisterEvent)?; self.reset_eventfd = Some(reset_evt); Ok(()) From d88b1bf01c152cae589cb13c238c3c85cc67dcb0 Mon Sep 17 00:00:00 2001 From: wllenyj Date: Thu, 26 May 2022 17:29:50 +0800 Subject: [PATCH 15/24] dragonball: update vsock dependency 1. fix vsock device init failed 2. fix VsockDeviceConfigInfo not found Signed-off-by: wllenyj --- src/dragonball/src/api/v1/mod.rs | 4 +--- src/dragonball/src/api/v1/vmm_action.rs | 8 ++++---- src/dragonball/src/device_manager/vsock_dev_mgr.rs | 14 ++++++++++++++ 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/src/dragonball/src/api/v1/mod.rs b/src/dragonball/src/api/v1/mod.rs index 2077c982e..99e3075eb 100644 --- a/src/dragonball/src/api/v1/mod.rs +++ b/src/dragonball/src/api/v1/mod.rs @@ -4,9 +4,7 @@ //! API Version 1 related data structures to configure the vmm. mod vmm_action; -pub use self::vmm_action::{ - VmmAction, VmmActionError, VmmData, VmmRequest, VmmResponse, VmmService, -}; +pub use self::vmm_action::*; /// Wrapper for configuring the microVM boot source. mod boot_source; diff --git a/src/dragonball/src/api/v1/vmm_action.rs b/src/dragonball/src/api/v1/vmm_action.rs index 6dedbfc28..2962994b4 100644 --- a/src/dragonball/src/api/v1/vmm_action.rs +++ b/src/dragonball/src/api/v1/vmm_action.rs @@ -17,20 +17,20 @@ use crate::vm::{CpuTopology, KernelConfigInfo, VmConfigInfo}; use crate::vmm::Vmm; #[cfg(feature = "virtio-blk")] -use crate::device_manager::blk_dev_mgr::{ +pub use crate::device_manager::blk_dev_mgr::{ BlockDeviceConfigInfo, BlockDeviceConfigUpdateInfo, BlockDeviceError, BlockDeviceMgr, }; #[cfg(feature = "virtio-fs")] -use crate::device_manager::fs_dev_mgr::{ +pub use crate::device_manager::fs_dev_mgr::{ FsDeviceConfigInfo, FsDeviceConfigUpdateInfo, FsDeviceError, FsDeviceMgr, FsMountConfigInfo, }; #[cfg(feature = "virtio-net")] -use crate::device_manager::virtio_net_dev_mgr::{ +pub use crate::device_manager::virtio_net_dev_mgr::{ VirtioNetDeviceConfigInfo, VirtioNetDeviceConfigUpdateInfo, VirtioNetDeviceError, VirtioNetDeviceMgr, }; #[cfg(feature = "virtio-vsock")] -use crate::device_manager::vsock_dev_mgr::{VsockDeviceConfigInfo, VsockDeviceError}; +pub use crate::device_manager::vsock_dev_mgr::{VsockDeviceConfigInfo, VsockDeviceError}; use super::*; diff --git a/src/dragonball/src/device_manager/vsock_dev_mgr.rs b/src/dragonball/src/device_manager/vsock_dev_mgr.rs index 5e49bbfb3..4f0f07413 100644 --- a/src/dragonball/src/device_manager/vsock_dev_mgr.rs +++ b/src/dragonball/src/device_manager/vsock_dev_mgr.rs @@ -88,6 +88,20 @@ pub struct VsockDeviceConfigInfo { pub use_generic_irq: Option, } +impl Default for VsockDeviceConfigInfo { + fn default() -> Self { + Self { + id: String::default(), + guest_cid: 0, + uds_path: None, + tcp_addr: None, + queue_size: Vec::from(QUEUE_SIZES), + use_shared_irq: None, + use_generic_irq: None, + } + } +} + impl VsockDeviceConfigInfo { /// Get number and size of queues supported. pub fn queue_sizes(&self) -> Vec { From bde6609b93c33db1c86e7cf3d40a0b9abce5110f Mon Sep 17 00:00:00 2001 From: Chao Wu Date: Mon, 4 Jul 2022 19:47:14 +0800 Subject: [PATCH 16/24] hotplug: add room for other hotplug solution Add room in the code for other hotplug solution without upcall Signed-off-by: Chao Wu --- src/dragonball/src/device_manager/mod.rs | 4 ++-- src/dragonball/src/error.rs | 4 ++-- src/dragonball/src/vcpu/vcpu_manager.rs | 12 +++++++----- src/dragonball/src/vm/mod.rs | 22 ++++++++++++++++++---- 4 files changed, 29 insertions(+), 13 deletions(-) diff --git a/src/dragonball/src/device_manager/mod.rs b/src/dragonball/src/device_manager/mod.rs index 53a1e2df9..af19fdeac 100644 --- a/src/dragonball/src/device_manager/mod.rs +++ b/src/dragonball/src/device_manager/mod.rs @@ -323,11 +323,11 @@ impl DeviceOpContext { } } -#[cfg(not(feature = "hotplug"))] +#[cfg(all(feature = "hotplug", not(feature = "dbs-upcall")))] impl DeviceOpContext { pub(crate) fn insert_hotplug_mmio_device( &self, - _dev: &Arc, + _dev: &Arc, _callback: Option<()>, ) -> Result<()> { Err(DeviceMgrError::InvalidOperation) diff --git a/src/dragonball/src/error.rs b/src/dragonball/src/error.rs index 2c6fbf6c4..b4ee40119 100644 --- a/src/dragonball/src/error.rs +++ b/src/dragonball/src/error.rs @@ -149,12 +149,12 @@ pub enum StartMicroVmError { #[error("vCPU related error: {0}")] Vcpu(#[source] vcpu::VcpuManagerError), - #[cfg(feature = "hotplug")] + #[cfg(all(feature = "hotplug", feature = "dbs-upcall"))] /// Upcall initialize Error. #[error("failure while initializing the upcall client: {0}")] UpcallInitError(#[source] dbs_upcall::UpcallClientError), - #[cfg(feature = "hotplug")] + #[cfg(all(feature = "hotplug", feature = "dbs-upcall"))] /// Upcall connect Error. #[error("failure while connecting the upcall client: {0}")] UpcallConnectError(#[source] dbs_upcall::UpcallClientError), diff --git a/src/dragonball/src/vcpu/vcpu_manager.rs b/src/dragonball/src/vcpu/vcpu_manager.rs index 278a88ece..f6f3e93ff 100644 --- a/src/dragonball/src/vcpu/vcpu_manager.rs +++ b/src/dragonball/src/vcpu/vcpu_manager.rs @@ -147,6 +147,7 @@ pub enum VcpuResizeError { #[error("Removable vcpu not enough, removable vcpu num: {0}, number to remove: {1}, present vcpu count {2}")] LackRemovableVcpus(u16, u16, u16), + #[cfg(all(feature = "hotplug", feature = "dbs-upcall"))] /// Cannot update the configuration by upcall channel. #[error("cannot update the configuration by upcall channel: {0}")] Upcall(#[source] dbs_upcall::UpcallClientError), @@ -782,13 +783,14 @@ impl VcpuManager { #[cfg(feature = "hotplug")] mod hotplug { + #[cfg(feature = "dbs-upcall")] + use super::*; + #[cfg(feature = "dbs-upcall")] + use dbs_upcall::{CpuDevRequest, DevMgrRequest}; + #[cfg(feature = "dbs-upcall")] use std::cmp::Ordering; - use dbs_upcall::{CpuDevRequest, DevMgrRequest}; - - use super::*; - - #[cfg(all(target_arch = "x86_64"))] + #[cfg(all(target_arch = "x86_64", feature = "dbs-upcall"))] use dbs_boot::mptable::APIC_VERSION; #[cfg(all(target_arch = "aarch64"))] const APIC_VERSION: u8 = 0; diff --git a/src/dragonball/src/vm/mod.rs b/src/dragonball/src/vm/mod.rs index 8c3cb21c9..31b41baed 100644 --- a/src/dragonball/src/vm/mod.rs +++ b/src/dragonball/src/vm/mod.rs @@ -20,7 +20,7 @@ use slog::{error, info}; use vm_memory::{Bytes, GuestAddress, GuestAddressSpace}; use vmm_sys_util::eventfd::EventFd; -#[cfg(feature = "hotplug")] +#[cfg(all(feature = "hotplug", feature = "dbs-upcall"))] use dbs_upcall::{DevMgrService, UpcallClient}; use crate::address_space_manager::{ @@ -195,7 +195,7 @@ pub struct Vm { #[cfg(target_arch = "aarch64")] irqchip_handle: Option>, - #[cfg(feature = "hotplug")] + #[cfg(all(feature = "hotplug", feature = "dbs-upcall"))] upcall_client: Option>>, } @@ -240,7 +240,7 @@ impl Vm { #[cfg(target_arch = "aarch64")] irqchip_handle: None, - #[cfg(feature = "hotplug")] + #[cfg(all(feature = "hotplug", feature = "dbs-upcall"))] upcall_client: None, }) } @@ -305,7 +305,7 @@ impl Vm { /// returns true if system upcall service is ready pub fn is_upcall_client_ready(&self) -> bool { - #[cfg(feature = "hotplug")] + #[cfg(all(feature = "hotplug", feature = "dbs-upcall"))] { if let Some(upcall_client) = self.upcall_client() { return upcall_client.is_ready(); @@ -705,6 +705,7 @@ impl Vm { .map_err(StartMicroVmError::Vcpu)?; self.init_microvm(event_mgr.epoll_manager(), vm_as.clone(), request_ts)?; self.init_configure_system(&vm_as)?; + #[cfg(feature = "dbs-upcall")] self.init_upcall()?; info!(self.logger, "VM: register events"); @@ -730,6 +731,7 @@ impl Vm { #[cfg(feature = "hotplug")] impl Vm { /// initialize upcall client for guest os + #[cfg(feature = "dbs-upcall")] fn new_upcall(&mut self) -> std::result::Result<(), StartMicroVmError> { // get vsock inner connector for upcall let inner_connector = self @@ -752,6 +754,7 @@ impl Vm { Ok(()) } + #[cfg(feature = "dbs-upcall")] fn init_upcall(&mut self) -> std::result::Result<(), StartMicroVmError> { info!(self.logger, "VM upcall init"); if let Err(e) = self.new_upcall() { @@ -769,10 +772,12 @@ impl Vm { } /// Get upcall client. + #[cfg(feature = "dbs-upcall")] pub fn upcall_client(&self) -> &Option>> { &self.upcall_client } + #[cfg(feature = "dbs-upcall")] fn create_device_hotplug_context( &self, epoll_mgr: Option, @@ -785,6 +790,15 @@ impl Vm { Err(StartMicroVmError::UpcallNotReady) } } + + // We will support hotplug without upcall in future stages. + #[cfg(not(feature = "dbs-upcall"))] + fn create_device_hotplug_context( + &self, + _epoll_mgr: Option, + ) -> std::result::Result { + Err(StartMicroVmError::MicroVMAlreadyRunning) + } } #[cfg(not(feature = "hotplug"))] From cb54ac6c6e2182b32acda86980073c5621b5dc53 Mon Sep 17 00:00:00 2001 From: Chao Wu Date: Mon, 4 Jul 2022 20:27:04 +0800 Subject: [PATCH 17/24] memory: remove reserve_memory_bytes This is currently an unsupported feature and we will remove it from the current code. Signed-off-by: Chao Wu --- src/dragonball/src/api/v1/vmm_action.rs | 10 ---------- src/dragonball/src/vm/mod.rs | 13 +------------ src/dragonball/src/vm/x86_64.rs | 13 ------------- 3 files changed, 1 insertion(+), 35 deletions(-) diff --git a/src/dragonball/src/api/v1/vmm_action.rs b/src/dragonball/src/api/v1/vmm_action.rs index 2962994b4..1bd40cad0 100644 --- a/src/dragonball/src/api/v1/vmm_action.rs +++ b/src/dragonball/src/api/v1/vmm_action.rs @@ -428,16 +428,6 @@ impl VmmService { config.mem_file_path = machine_config.mem_file_path.clone(); - let reserve_memory_bytes = machine_config.reserve_memory_bytes; - // Reserved memory must be 2MB aligned and less than half of the total memory. - if reserve_memory_bytes % 0x200000 != 0 - || reserve_memory_bytes > (config.mem_size_mib as u64) << 20 - { - return Err(MachineConfig(InvalidReservedMemorySize( - reserve_memory_bytes as usize >> 20, - ))); - } - config.reserve_memory_bytes = reserve_memory_bytes; if config.mem_type == "hugetlbfs" && config.mem_file_path.is_empty() { return Err(MachineConfig(InvalidMemFilePath("".to_owned()))); } diff --git a/src/dragonball/src/vm/mod.rs b/src/dragonball/src/vm/mod.rs index 31b41baed..e7248a811 100644 --- a/src/dragonball/src/vm/mod.rs +++ b/src/dragonball/src/vm/mod.rs @@ -125,8 +125,6 @@ pub struct VmConfigInfo { pub mem_file_path: String, /// The memory size in MiB. pub mem_size_mib: usize, - /// reserve memory bytes - pub reserve_memory_bytes: u64, /// sock path pub serial_path: Option, @@ -149,7 +147,6 @@ impl Default for VmConfigInfo { mem_type: String::from("shmem"), mem_file_path: String::from(""), mem_size_mib: 128, - reserve_memory_bytes: 0, serial_path: None, } } @@ -510,12 +507,6 @@ impl Vm { // vcpu boot up require local memory. reserve 100 MiB memory let mem_size = (self.vm_config.mem_size_mib as u64) << 20; - let reserve_memory_bytes = self.vm_config.reserve_memory_bytes; - if reserve_memory_bytes > (mem_size >> 1) as u64 { - return Err(StartMicroVmError::ConfigureInvalid(String::from( - "invalid reserve_memory_bytes", - ))); - } let mem_type = self.vm_config.mem_type.clone(); let mut mem_file_path = String::from(""); @@ -543,12 +534,10 @@ impl Vm { info!( self.logger, - "VM: mem_type:{} mem_file_path:{}, mem_size:{}, reserve_memory_bytes:{}, \ - numa_regions:{:?}", + "VM: mem_type:{} mem_file_path:{}, mem_size:{}, numa_regions:{:?}", mem_type, mem_file_path, mem_size, - reserve_memory_bytes, numa_regions, ); diff --git a/src/dragonball/src/vm/x86_64.rs b/src/dragonball/src/vm/x86_64.rs index 3fcebdc20..96ca0acb1 100644 --- a/src/dragonball/src/vm/x86_64.rs +++ b/src/dragonball/src/vm/x86_64.rs @@ -48,7 +48,6 @@ fn configure_system( initrd: &Option, boot_cpus: u8, max_cpus: u8, - rsv_mem_bytes: u64, ) -> super::Result<()> { const KERNEL_BOOT_FLAG_MAGIC: u16 = 0xaa55; const KERNEL_HDR_MAGIC: u32 = 0x5372_6448; @@ -111,17 +110,6 @@ fn configure_system( } } - // reserve memory from microVM. - if rsv_mem_bytes > 0 { - add_e820_entry( - &mut params.0, - mem_end.raw_value().max(mmio_end.raw_value()) + 1, - rsv_mem_bytes, - bootparam::E820_RESERVED, - ) - .map_err(Error::BootSystem)?; - } - let zero_page_addr = GuestAddress(layout::ZERO_PAGE_START); guest_mem .checked_offset(zero_page_addr, mem::size_of::()) @@ -237,7 +225,6 @@ impl Vm { &initrd, self.vm_config.vcpu_count, self.vm_config.max_vcpu_count, - self.vm_config.reserve_memory_bytes, ) .map_err(StartMicroVmError::ConfigureSystem) } From b646d7cb37d77fb1b928490c27e64942254da596 Mon Sep 17 00:00:00 2001 From: Chao Wu Date: Mon, 4 Jul 2022 20:59:45 +0800 Subject: [PATCH 18/24] config: remove ht_enabled Since cpu topology could tell whether hyper thread is enabled or not, we removed ht_enabled config from VmConfigInfo Signed-off-by: Chao Wu --- src/dragonball/src/vm/mod.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/dragonball/src/vm/mod.rs b/src/dragonball/src/vm/mod.rs index e7248a811..7afdefedc 100644 --- a/src/dragonball/src/vm/mod.rs +++ b/src/dragonball/src/vm/mod.rs @@ -110,8 +110,6 @@ pub struct VmConfigInfo { pub vcpu_count: u8, /// Max number of vcpu can be added pub max_vcpu_count: u8, - /// Enable or disable hyperthreading. - pub ht_enabled: bool, /// cpu power management. pub cpu_pm: String, /// cpu topology information @@ -135,7 +133,6 @@ impl Default for VmConfigInfo { VmConfigInfo { vcpu_count: 1, max_vcpu_count: 1, - ht_enabled: false, cpu_pm: String::from("on"), cpu_topology: CpuTopology { threads_per_core: 1, From 5ea35ddcdce13a79c2b2fed02fbc1bf6c8d602a9 Mon Sep 17 00:00:00 2001 From: Chao Wu Date: Mon, 4 Jul 2022 23:50:25 +0800 Subject: [PATCH 19/24] refractor: remove redundant by_id remove redundant by_id in get_vm_by_id_mut and get_vm_by_id. They are optimized to get_vm_mut and get_vm. Signed-off-by: Chao Wu --- src/dragonball/src/api/v1/vmm_action.rs | 48 +++++++------------------ src/dragonball/src/event_manager.rs | 2 +- src/dragonball/src/vmm.rs | 6 ++-- 3 files changed, 16 insertions(+), 40 deletions(-) diff --git a/src/dragonball/src/api/v1/vmm_action.rs b/src/dragonball/src/api/v1/vmm_action.rs index 1bd40cad0..ae9db4fb4 100644 --- a/src/dragonball/src/api/v1/vmm_action.rs +++ b/src/dragonball/src/api/v1/vmm_action.rs @@ -279,9 +279,7 @@ impl VmmService { }; use super::VmmActionError::BootSource; - let vm = vmm - .get_vm_by_id_mut("") - .ok_or(VmmActionError::InvalidVMID)?; + let vm = vmm.get_vm_mut().ok_or(VmmActionError::InvalidVMID)?; if vm.is_vm_initialized() { return Err(BootSource(UpdateNotAllowedPostBoot)); } @@ -315,9 +313,7 @@ impl VmmService { let vmm_seccomp_filter = vmm.vmm_seccomp_filter(); let vcpu_seccomp_filter = vmm.vcpu_seccomp_filter(); - let vm = vmm - .get_vm_by_id_mut("") - .ok_or(VmmActionError::InvalidVMID)?; + let vm = vmm.get_vm_mut().ok_or(VmmActionError::InvalidVMID)?; if vm.is_vm_initialized() { return Err(StartMicroVm(MicroVMAlreadyRunning)); } @@ -342,9 +338,7 @@ impl VmmService { use self::VmConfigError::*; use self::VmmActionError::MachineConfig; - let vm = vmm - .get_vm_by_id_mut("") - .ok_or(VmmActionError::InvalidVMID)?; + let vm = vmm.get_vm_mut().ok_or(VmmActionError::InvalidVMID)?; if vm.is_vm_initialized() { return Err(MachineConfig(UpdateNotAllowedPostBoot)); } @@ -455,9 +449,7 @@ impl VmmService { #[cfg(feature = "virtio-vsock")] fn add_vsock_device(&self, vmm: &mut Vmm, config: VsockDeviceConfigInfo) -> VmmRequestResult { - let vm = vmm - .get_vm_by_id_mut("") - .ok_or(VmmActionError::InvalidVMID)?; + let vm = vmm.get_vm_mut().ok_or(VmmActionError::InvalidVMID)?; if vm.is_vm_initialized() { return Err(VmmActionError::Vsock( VsockDeviceError::UpdateNotAllowedPostBoot, @@ -496,9 +488,7 @@ impl VmmService { event_mgr: &mut EventManager, config: BlockDeviceConfigInfo, ) -> VmmRequestResult { - let vm = vmm - .get_vm_by_id_mut("") - .ok_or(VmmActionError::InvalidVMID)?; + let vm = vmm.get_vm_mut().ok_or(VmmActionError::InvalidVMID)?; let ctx = vm .create_device_op_context(Some(event_mgr.epoll_manager())) .map_err(|e| { @@ -520,9 +510,7 @@ impl VmmService { vmm: &mut Vmm, config: BlockDeviceConfigUpdateInfo, ) -> VmmRequestResult { - let vm = vmm - .get_vm_by_id_mut("") - .ok_or(VmmActionError::InvalidVMID)?; + let vm = vmm.get_vm_mut().ok_or(VmmActionError::InvalidVMID)?; BlockDeviceMgr::update_device_ratelimiters(vm.device_manager_mut(), config) .map(|_| VmmData::Empty) @@ -538,9 +526,7 @@ impl VmmService { event_mgr: &mut EventManager, drive_id: &str, ) -> VmmRequestResult { - let vm = vmm - .get_vm_by_id_mut("") - .ok_or(VmmActionError::InvalidVMID)?; + let vm = vmm.get_vm_mut().ok_or(VmmActionError::InvalidVMID)?; let ctx = vm .create_device_op_context(Some(event_mgr.epoll_manager())) .map_err(|_| VmmActionError::Block(BlockDeviceError::UpdateNotAllowedPostBoot))?; @@ -557,9 +543,7 @@ impl VmmService { event_mgr: &mut EventManager, config: VirtioNetDeviceConfigInfo, ) -> VmmRequestResult { - let vm = vmm - .get_vm_by_id_mut("") - .ok_or(VmmActionError::InvalidVMID)?; + let vm = vmm.get_vm_mut().ok_or(VmmActionError::InvalidVMID)?; let ctx = vm .create_device_op_context(Some(event_mgr.epoll_manager())) .map_err(|e| { @@ -583,9 +567,7 @@ impl VmmService { vmm: &mut Vmm, config: VirtioNetDeviceConfigUpdateInfo, ) -> VmmRequestResult { - let vm = vmm - .get_vm_by_id_mut("") - .ok_or(VmmActionError::InvalidVMID)?; + let vm = vmm.get_vm_mut().ok_or(VmmActionError::InvalidVMID)?; VirtioNetDeviceMgr::update_device_ratelimiters(vm.device_manager_mut(), config) .map(|_| VmmData::Empty) @@ -594,9 +576,7 @@ impl VmmService { #[cfg(feature = "virtio-fs")] fn add_fs_device(&mut self, vmm: &mut Vmm, config: FsDeviceConfigInfo) -> VmmRequestResult { - let vm = vmm - .get_vm_by_id_mut("") - .ok_or(VmmActionError::InvalidVMID)?; + let vm = vmm.get_vm_mut().ok_or(VmmActionError::InvalidVMID)?; let hotplug = vm.is_vm_initialized(); if !cfg!(feature = "hotplug") && hotplug { return Err(VmmActionError::FsDevice( @@ -619,9 +599,7 @@ impl VmmService { vmm: &mut Vmm, config: FsMountConfigInfo, ) -> VmmRequestResult { - let vm = vmm - .get_vm_by_id_mut("") - .ok_or(VmmActionError::InvalidVMID)?; + let vm = vmm.get_vm_mut().ok_or(VmmActionError::InvalidVMID)?; if !vm.is_vm_initialized() { return Err(VmmActionError::FsDevice(FsDeviceError::MicroVMNotRunning)); @@ -638,9 +616,7 @@ impl VmmService { vmm: &mut Vmm, config: FsDeviceConfigUpdateInfo, ) -> VmmRequestResult { - let vm = vmm - .get_vm_by_id_mut("") - .ok_or(VmmActionError::InvalidVMID)?; + let vm = vmm.get_vm_mut().ok_or(VmmActionError::InvalidVMID)?; if !vm.is_vm_initialized() { return Err(VmmActionError::FsDevice(FsDeviceError::MicroVMNotRunning)); diff --git a/src/dragonball/src/event_manager.rs b/src/dragonball/src/event_manager.rs index 9a2ad9d1c..f07b78650 100644 --- a/src/dragonball/src/event_manager.rs +++ b/src/dragonball/src/event_manager.rs @@ -135,7 +135,7 @@ impl MutEventSubscriber for VmmEpollHandler { self.vmm_event_count.fetch_add(1, Ordering::AcqRel); } EPOLL_EVENT_EXIT => { - let vm = vmm.get_vm_by_id("").unwrap(); + let vm = vmm.get_vm().unwrap(); match vm.get_reset_eventfd() { Some(ev) => { if let Err(e) = ev.read() { diff --git a/src/dragonball/src/vmm.rs b/src/dragonball/src/vmm.rs index 362415dbd..a25543e34 100644 --- a/src/dragonball/src/vmm.rs +++ b/src/dragonball/src/vmm.rs @@ -85,12 +85,12 @@ impl Vmm { } /// Get a reference to a virtual machine managed by the VMM. - pub fn get_vm_by_id(&self, _id: &str) -> Option<&Vm> { + pub fn get_vm(&self) -> Option<&Vm> { Some(&self.vm) } /// Get a mutable reference to a virtual machine managed by the VMM. - pub fn get_vm_by_id_mut(&mut self, _id: &str) -> Option<&mut Vm> { + pub fn get_vm_mut(&mut self) -> Option<&mut Vm> { Some(&mut self.vm) } @@ -156,7 +156,7 @@ impl Vmm { /// Waits for all vCPUs to exit and terminates the Dragonball process. fn stop(&mut self, exit_code: i32) -> i32 { info!("Vmm is stopping."); - if let Some(vm) = self.get_vm_by_id_mut("") { + if let Some(vm) = self.get_vm_mut() { if vm.is_vm_initialized() { if let Err(e) = vm.remove_devices() { warn!("failed to remove devices: {:?}", e); From 6a1fe85f1092ee4e8580bffeefd54f2f65e88e7c Mon Sep 17 00:00:00 2001 From: Chao Wu Date: Wed, 6 Jul 2022 01:01:05 +0800 Subject: [PATCH 20/24] vfio: add vfio as TODO We add vfio as TODO in this commit and create a github issue for this. Signed-off-by: Chao Wu --- src/dragonball/src/device_manager/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/dragonball/src/device_manager/mod.rs b/src/dragonball/src/device_manager/mod.rs index af19fdeac..b8e1ea996 100644 --- a/src/dragonball/src/device_manager/mod.rs +++ b/src/dragonball/src/device_manager/mod.rs @@ -631,6 +631,7 @@ impl DeviceManager { /// Start all registered devices when booting the associated virtual machine. pub fn start_devices(&mut self) -> std::result::Result<(), StartMicroVmError> { + // TODO: add vfio support here. issue #4589. Ok(()) } From 5d3b53ee7beb3107277df849db9598347ed737ea Mon Sep 17 00:00:00 2001 From: Chao Wu Date: Wed, 6 Jul 2022 01:01:39 +0800 Subject: [PATCH 21/24] downtime: add downtime support add downtime support in `resume_all_vcpus_with_downtime` Signed-off-by: Chao Wu --- src/dragonball/src/api/v1/instance_info.rs | 4 ++++ src/dragonball/src/vm/mod.rs | 12 ++++++++++++ 2 files changed, 16 insertions(+) diff --git a/src/dragonball/src/api/v1/instance_info.rs b/src/dragonball/src/api/v1/instance_info.rs index d457b6124..ae159aa61 100644 --- a/src/dragonball/src/api/v1/instance_info.rs +++ b/src/dragonball/src/api/v1/instance_info.rs @@ -54,6 +54,8 @@ pub struct InstanceInfo { pub async_state: AsyncState, /// List of tids of vcpu threads (vcpu index, tid) pub tids: Vec<(u8, u32)>, + /// Last instance downtime + pub last_instance_downtime: u64, } impl InstanceInfo { @@ -66,6 +68,7 @@ impl InstanceInfo { pid: std::process::id(), async_state: AsyncState::Uninitialized, tids: Vec::new(), + last_instance_downtime: 0, } } } @@ -79,6 +82,7 @@ impl Default for InstanceInfo { pid: std::process::id(), async_state: AsyncState::Uninitialized, tids: Vec::new(), + last_instance_downtime: 0, } } } diff --git a/src/dragonball/src/vm/mod.rs b/src/dragonball/src/vm/mod.rs index 7afdefedc..e372af51e 100644 --- a/src/dragonball/src/vm/mod.rs +++ b/src/dragonball/src/vm/mod.rs @@ -420,6 +420,18 @@ impl Vm { pub fn resume_all_vcpus_with_downtime(&mut self) -> std::result::Result<(), VcpuManagerError> { self.vcpu_manager()?.resume_all_vcpus()?; + if self.start_instance_downtime != 0 { + let now = TimestampUs::default(); + let downtime = now.time_us - self.start_instance_downtime; + info!(self.logger, "VM: instance downtime: {} us", downtime); + self.start_instance_downtime = 0; + if let Ok(mut info) = self.shared_info.write() { + info.last_instance_downtime = downtime; + } else { + error!(self.logger, "Failed to update live upgrade downtime, couldn't be written due to poisoned lock"); + } + } + Ok(()) } From e14e98bbeb6a2d0c141591275e55813c8d8a38e4 Mon Sep 17 00:00:00 2001 From: Chao Wu Date: Wed, 6 Jul 2022 11:20:50 +0800 Subject: [PATCH 22/24] cpu_topo: add handle_cpu_topology function add handle_cpu_topology funciton to make it easier to understand the set_vm_configuration function. Signed-off-by: Chao Wu --- src/dragonball/src/api/v1/vmm_action.rs | 56 ++++++++++++++----------- 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/src/dragonball/src/api/v1/vmm_action.rs b/src/dragonball/src/api/v1/vmm_action.rs index ae9db4fb4..4510b7e03 100644 --- a/src/dragonball/src/api/v1/vmm_action.rs +++ b/src/dragonball/src/api/v1/vmm_action.rs @@ -16,6 +16,9 @@ use crate::event_manager::EventManager; use crate::vm::{CpuTopology, KernelConfigInfo, VmConfigInfo}; use crate::vmm::Vmm; +use self::VmConfigError::*; +use self::VmmActionError::MachineConfig; + #[cfg(feature = "virtio-blk")] pub use crate::device_manager::blk_dev_mgr::{ BlockDeviceConfigInfo, BlockDeviceConfigUpdateInfo, BlockDeviceError, BlockDeviceMgr, @@ -335,9 +338,6 @@ impl VmmService { vmm: &mut Vmm, machine_config: VmConfigInfo, ) -> VmmRequestResult { - use self::VmConfigError::*; - use self::VmmActionError::MachineConfig; - let vm = vmm.get_vm_mut().ok_or(VmmActionError::InvalidVMID)?; if vm.is_vm_initialized() { return Err(MachineConfig(UpdateNotAllowedPostBoot)); @@ -356,27 +356,7 @@ impl VmmService { if config.cpu_topology != machine_config.cpu_topology { let cpu_topology = &machine_config.cpu_topology; - // Check if dies_per_socket, cores_per_die, threads_per_core and socket number is valid - if cpu_topology.threads_per_core < 1 || cpu_topology.threads_per_core > 2 { - return Err(MachineConfig(InvalidThreadsPerCore( - cpu_topology.threads_per_core, - ))); - } - let vcpu_count_from_topo = cpu_topology - .sockets - .checked_mul(cpu_topology.dies_per_socket) - .ok_or(MachineConfig(VcpuCountExceedsMaximum))? - .checked_mul(cpu_topology.cores_per_die) - .ok_or(MachineConfig(VcpuCountExceedsMaximum))? - .checked_mul(cpu_topology.threads_per_core) - .ok_or(MachineConfig(VcpuCountExceedsMaximum))?; - if vcpu_count_from_topo > MAX_SUPPORTED_VCPUS { - return Err(MachineConfig(VcpuCountExceedsMaximum)); - } - if vcpu_count_from_topo < config.vcpu_count { - return Err(MachineConfig(InvalidCpuTopology(vcpu_count_from_topo))); - } - config.cpu_topology = cpu_topology.clone(); + config.cpu_topology = handle_cpu_topology(cpu_topology, config.vcpu_count)?.clone(); } else { // the same default let mut default_cpu_topology = CpuTopology { @@ -627,3 +607,31 @@ impl VmmService { .map_err(VmmActionError::FsDevice) } } + +fn handle_cpu_topology( + cpu_topology: &CpuTopology, + vcpu_count: u8, +) -> std::result::Result<&CpuTopology, VmmActionError> { + // Check if dies_per_socket, cores_per_die, threads_per_core and socket number is valid + if cpu_topology.threads_per_core < 1 || cpu_topology.threads_per_core > 2 { + return Err(MachineConfig(InvalidThreadsPerCore( + cpu_topology.threads_per_core, + ))); + } + let vcpu_count_from_topo = cpu_topology + .sockets + .checked_mul(cpu_topology.dies_per_socket) + .ok_or(MachineConfig(VcpuCountExceedsMaximum))? + .checked_mul(cpu_topology.cores_per_die) + .ok_or(MachineConfig(VcpuCountExceedsMaximum))? + .checked_mul(cpu_topology.threads_per_core) + .ok_or(MachineConfig(VcpuCountExceedsMaximum))?; + if vcpu_count_from_topo > MAX_SUPPORTED_VCPUS { + return Err(MachineConfig(VcpuCountExceedsMaximum)); + } + if vcpu_count_from_topo < vcpu_count { + return Err(MachineConfig(InvalidCpuTopology(vcpu_count_from_topo))); + } + + Ok(cpu_topology) +} From 47a4142e0d88eaf888269ae98b396911a44a23a2 Mon Sep 17 00:00:00 2001 From: Chao Wu Date: Wed, 6 Jul 2022 11:38:12 +0800 Subject: [PATCH 23/24] fs: change vhostuser and virtio into const change fs mode vhostuser and virtio into const. Signed-off-by: Chao Wu --- src/dragonball/src/device_manager/fs_dev_mgr.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/dragonball/src/device_manager/fs_dev_mgr.rs b/src/dragonball/src/device_manager/fs_dev_mgr.rs index d699dcbde..088dc980f 100644 --- a/src/dragonball/src/device_manager/fs_dev_mgr.rs +++ b/src/dragonball/src/device_manager/fs_dev_mgr.rs @@ -27,6 +27,10 @@ const USE_SHARED_IRQ: bool = true; const USE_GENERIC_IRQ: bool = true; // Default cache size is 2 Gi since this is a typical VM memory size. const DEFAULT_CACHE_SIZE: u64 = 2 * 1024 * 1024 * 1024; +// We have 2 supported fs device mode, vhostuser and virtio +const VHOSTUSER_FS_MODE: &str = "vhostuser"; +// We have 2 supported fs device mode, vhostuser and virtio +const VIRTIO_FS_MODE: &str = "virtio"; /// Errors associated with `FsDeviceConfig`. #[derive(Debug, thiserror::Error)] @@ -152,7 +156,7 @@ impl std::default::Default for FsDeviceConfigInfo { impl FsDeviceConfigInfo { /// The default mode is set to 'virtio' for 'virtio-fs' device. pub fn default_fs_mode() -> String { - String::from("virtio") + String::from(VIRTIO_FS_MODE) } /// The default cache policy @@ -227,7 +231,7 @@ impl ConfigItem for FsDeviceConfigInfo { fn check_conflicts(&self, other: &Self) -> Result<(), FsDeviceError> { if self.tag == other.tag { Err(FsDeviceError::FsDeviceTagAlreadyExists(self.tag.clone())) - } else if self.mode.as_str() == "vhostuser" && self.sock_path == other.sock_path { + } else if self.mode.as_str() == VHOSTUSER_FS_MODE && self.sock_path == other.sock_path { Err(FsDeviceError::FsDevicePathAlreadyExists( self.sock_path.clone(), )) @@ -354,8 +358,8 @@ impl FsDeviceMgr { ctx: &mut DeviceOpContext, epoll_mgr: EpollManager, ) -> std::result::Result { - match config.mode.as_str() { - "virtio" => Self::attach_virtio_fs_devices(config, ctx, epoll_mgr), + match &config.mode as &str { + VIRTIO_FS_MODE => Self::attach_virtio_fs_devices(config, ctx, epoll_mgr), _ => Err(FsDeviceError::CreateFsDevice(virtio::Error::InvalidInput)), } } From 9cee52153b263d9a7267f9a6df1fc493b676b941 Mon Sep 17 00:00:00 2001 From: Chao Wu Date: Wed, 6 Jul 2022 11:58:21 +0800 Subject: [PATCH 24/24] fmt: do cargo fmt and add a dependency for blk_dev fmt: do cargo fmt and add a dependency for blk_dev Signed-off-by: Chao Wu --- src/dragonball/src/device_manager/blk_dev_mgr.rs | 1 + src/dragonball/src/vcpu/aarch64.rs | 9 ++++----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/dragonball/src/device_manager/blk_dev_mgr.rs b/src/dragonball/src/device_manager/blk_dev_mgr.rs index 0d35c4f95..9b3ee9041 100644 --- a/src/dragonball/src/device_manager/blk_dev_mgr.rs +++ b/src/dragonball/src/device_manager/blk_dev_mgr.rs @@ -21,6 +21,7 @@ use serde_derive::{Deserialize, Serialize}; use crate::address_space_manager::GuestAddressSpaceImpl; use crate::config_manager::{ConfigItem, DeviceConfigInfo, RateLimiterConfigInfo}; +use crate::device_manager::blk_dev_mgr::BlockDeviceError::InvalidDeviceId; use crate::device_manager::{DeviceManager, DeviceMgrError, DeviceOpContext}; use crate::get_bucket_update; use crate::vm::KernelConfigInfo; diff --git a/src/dragonball/src/vcpu/aarch64.rs b/src/dragonball/src/vcpu/aarch64.rs index 759774a62..054a1f65d 100644 --- a/src/dragonball/src/vcpu/aarch64.rs +++ b/src/dragonball/src/vcpu/aarch64.rs @@ -6,20 +6,20 @@ // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. +use std::ops::Deref; use std::sync::mpsc::{channel, Sender}; use std::sync::Arc; -use std::ops::Deref; use crate::IoManagerCached; -use dbs_utils::time::TimestampUs; use dbs_arch::regs; use dbs_boot::get_fdt_addr; +use dbs_utils::time::TimestampUs; use kvm_ioctls::{VcpuFd, VmFd}; use vm_memory::{Address, GuestAddress, GuestAddressSpace}; use vmm_sys_util::eventfd::EventFd; use crate::address_space_manager::GuestAddressSpaceImpl; -use crate::vcpu::vcpu_impl::{Result, Vcpu, VcpuStateEvent, VcpuError}; +use crate::vcpu::vcpu_impl::{Result, Vcpu, VcpuError, VcpuStateEvent}; use crate::vcpu::VcpuConfig; #[allow(unused)] @@ -111,8 +111,7 @@ impl Vcpu { .map_err(VcpuError::REGSConfiguration)?; } - self.mpidr = - regs::read_mpidr(&self.fd).map_err(VcpuError::REGSConfiguration)?; + self.mpidr = regs::read_mpidr(&self.fd).map_err(VcpuError::REGSConfiguration)?; Ok(()) }