From 0f402a14f96533775b985626cd4fe83833096649 Mon Sep 17 00:00:00 2001 From: Chao Wu Date: Thu, 7 Dec 2023 15:00:49 +0800 Subject: [PATCH 1/4] dragonball: add InsertHostDevice vmm action Introduce a new vmm action InsertHostDevice to passthrough host pci devices like NIC or GPU devices into guest so that users could have high performance usage of those devices. fixes: #8741 Signed-off-by: Gerry Liu Signed-off-by: Zizheng Bian Signed-off-by: Zha Bin Signed-off-by: Helin Guo Signed-off-by: Chao Wu --- src/dragonball/Cargo.lock | 53 ++ src/dragonball/Cargo.toml | 4 + src/dragonball/src/api/v1/vmm_action.rs | 31 +- .../src/dbs_device/src/resources.rs | 4 +- src/dragonball/src/dbs_pci/src/lib.rs | 1 + src/dragonball/src/device_manager/mod.rs | 45 +- .../src/device_manager/vfio_dev_mgr/mod.rs | 624 ++++++++++++++++++ .../device_manager/vfio_dev_mgr/pci_vfio.rs | 169 +++++ src/dragonball/src/error.rs | 10 + src/dragonball/src/resource_manager.rs | 2 +- src/dragonball/src/vm/mod.rs | 2 +- src/dragonball/src/vm/x86_64.rs | 24 +- src/runtime-rs/Cargo.lock | 1 + 13 files changed, 961 insertions(+), 9 deletions(-) create mode 100644 src/dragonball/src/device_manager/vfio_dev_mgr/mod.rs create mode 100644 src/dragonball/src/device_manager/vfio_dev_mgr/pci_vfio.rs diff --git a/src/dragonball/Cargo.lock b/src/dragonball/Cargo.lock index 7915c83cd..bb758d1f8 100644 --- a/src/dragonball/Cargo.lock +++ b/src/dragonball/Cargo.lock @@ -344,6 +344,26 @@ dependencies = [ "vmm-sys-util", ] +[[package]] +name = "dbs-pci" +version = "0.1.0" +dependencies = [ + "byteorder", + "dbs-allocator", + "dbs-boot", + "dbs-device", + "dbs-interrupt", + "downcast-rs", + "kvm-bindings", + "kvm-ioctls", + "libc", + "log", + "thiserror", + "vfio-bindings", + "vfio-ioctls", + "vm-memory", +] + [[package]] name = "dbs-upcall" version = "0.3.0" @@ -398,6 +418,7 @@ dependencies = [ "serde_json", "thiserror", "threadpool", + "timerfd", "vhost", "virtio-bindings", "virtio-queue", @@ -454,6 +475,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "downcast-rs" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ea835d29036a4087793836fa931b08837ad5e957da9e23886b29586fb9b6650" + [[package]] name = "dragonball" version = "0.1.0" @@ -469,6 +496,7 @@ dependencies = [ "dbs-device", "dbs-interrupt", "dbs-legacy-devices", + "dbs-pci", "dbs-upcall", "dbs-utils", "dbs-virtio-devices", @@ -494,6 +522,8 @@ dependencies = [ "test-utils", "thiserror", "tracing", + "vfio-bindings", + "vfio-ioctls", "virtio-queue", "vm-memory", "vmm-sys-util", @@ -2074,6 +2104,29 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "vfio-bindings" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43449b404c488f70507dca193debd4bea361fe8089869b947adc19720e464bce" + +[[package]] +name = "vfio-ioctls" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "068bac78842164a8ecc1d1a84a8d8a9168ab29fa3c96942689e286a30ae22ac4" +dependencies = [ + "byteorder", + "kvm-bindings", + "kvm-ioctls", + "libc", + "log", + "thiserror", + "vfio-bindings", + "vm-memory", + "vmm-sys-util", +] + [[package]] name = "vhost" version = "0.6.1" diff --git a/src/dragonball/Cargo.toml b/src/dragonball/Cargo.toml index ca41c613f..3b45ca4dc 100644 --- a/src/dragonball/Cargo.toml +++ b/src/dragonball/Cargo.toml @@ -25,6 +25,7 @@ dbs-utils = { path = "./src/dbs_utils" } dbs-virtio-devices = { path = "./src/dbs_virtio_devices", optional = true, features = [ "virtio-mmio", ] } +dbs-pci = { path = "./src/dbs_pci", optional = true } derivative = "2.2.0" kvm-bindings = "0.6.0" kvm-ioctls = "0.12.0" @@ -48,6 +49,8 @@ virtio-queue = { version = "0.7.0", optional = true } vm-memory = { version = "0.10.0", features = ["backend-mmap"] } crossbeam-channel = "0.5.6" fuse-backend-rs = "0.10.5" +vfio-bindings = { version = "0.3.0", optional = true } +vfio-ioctls = { version = "0.1.0", optional = true } [dev-dependencies] slog-async = "2.7.0" @@ -77,3 +80,4 @@ vhost-net = ["dbs-virtio-devices/vhost-net"] vhost-user-fs = ["dbs-virtio-devices/vhost-user-fs"] vhost-user-net = ["dbs-virtio-devices/vhost-user-net"] vhost-user-blk = ["dbs-virtio-devices/vhost-user-blk"] +host-device = ["dep:vfio-bindings", "dep:vfio-ioctls", "dep:dbs-pci"] diff --git a/src/dragonball/src/api/v1/vmm_action.rs b/src/dragonball/src/api/v1/vmm_action.rs index 0d2f678ed..a2d341c7c 100644 --- a/src/dragonball/src/api/v1/vmm_action.rs +++ b/src/dragonball/src/api/v1/vmm_action.rs @@ -36,6 +36,8 @@ pub use crate::device_manager::fs_dev_mgr::{ }; #[cfg(feature = "virtio-mem")] pub use crate::device_manager::mem_dev_mgr::{MemDeviceConfigInfo, MemDeviceError}; +#[cfg(feature = "host-device")] +use crate::device_manager::vfio_dev_mgr::{HostDeviceConfig, VfioDeviceError, VfioDeviceHostInfo}; #[cfg(feature = "vhost-net")] pub use crate::device_manager::vhost_net_dev_mgr::{ VhostNetDeviceConfigInfo, VhostNetDeviceError, VhostNetDeviceMgr, @@ -148,11 +150,16 @@ pub enum VmmActionError { /// End tracing Failed. #[error("End tracing failed: {0}")] EndTracingFailed(#[source] TraceError), + + #[cfg(feature = "host-device")] + /// The action `InsertHostDevice` failed either because of bad user input or an internal error. + #[error("failed to add VFIO passthrough device: {0:?}")] + HostDeviceConfig(#[source] VfioDeviceError), } /// This enum represents the public interface of the VMM. Each action contains various /// bits of information (ids, paths, etc.). -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq)] pub enum VmmAction { /// Configure the boot source of the microVM using `BootSourceConfig`. /// This action can only be called before the microVM has booted. @@ -245,6 +252,10 @@ pub enum VmmAction { /// Add a new balloon device or update one that already exists using the `BalloonDeviceConfig` /// as input. InsertBalloonDevice(BalloonDeviceConfigInfo), + + #[cfg(feature = "host-device")] + /// Add a VFIO assignment host device or update that already exists + InsertHostDevice(HostDeviceConfig), } /// The enum represents the response sent by the VMM in case of success. The response is either @@ -371,6 +382,8 @@ impl VmmService { VmmAction::InsertBalloonDevice(balloon_cfg) => { self.add_balloon_device(vmm, event_mgr, balloon_cfg) } + #[cfg(feature = "host-device")] + VmmAction::InsertHostDevice(hostdev_cfg) => self.add_vfio_device(vmm, hostdev_cfg), }; debug!("send vmm response: {:?}", response); @@ -813,6 +826,22 @@ impl VmmService { .map_err(VmmActionError::FsDevice) } + #[cfg(feature = "host-device")] + fn add_vfio_device(&self, vmm: &mut Vmm, config: HostDeviceConfig) -> VmmRequestResult { + let vm = vmm.get_vm_mut().ok_or(VmmActionError::HostDeviceConfig( + VfioDeviceError::InvalidVMID, + ))?; + info!("add_vfio_device: {:?}", config); + + vm.device_manager() + .vfio_manager + .lock() + .unwrap() + .insert_device(config.into()) + .map_err(VmmActionError::HostDeviceConfig)?; + Ok(VmmData::Empty) + } + #[cfg(feature = "hotplug")] #[instrument(skip(self))] fn resize_vcpu(&mut self, vmm: &mut Vmm, config: VcpuResizeInfo) -> VmmRequestResult { diff --git a/src/dragonball/src/dbs_device/src/resources.rs b/src/dragonball/src/dbs_device/src/resources.rs index e87b0fe87..4f5290745 100644 --- a/src/dragonball/src/dbs_device/src/resources.rs +++ b/src/dragonball/src/dbs_device/src/resources.rs @@ -202,7 +202,7 @@ pub enum Resource { size: u32, }, /// Network Interface Card MAC address. - MacAddresss(String), + MacAddress(String), /// KVM memslot index. KvmMemSlot(u32), } @@ -310,7 +310,7 @@ impl DeviceResources { /// Get the first resource information for NIC MAC address. pub fn get_mac_address(&self) -> Option { for entry in self.0.iter().as_ref() { - if let Resource::MacAddresss(addr) = entry { + if let Resource::MacAddress(addr) = entry { return Some(addr.clone()); } } diff --git a/src/dragonball/src/dbs_pci/src/lib.rs b/src/dragonball/src/dbs_pci/src/lib.rs index 92ef8b4ea..7a0119f3c 100644 --- a/src/dragonball/src/dbs_pci/src/lib.rs +++ b/src/dragonball/src/dbs_pci/src/lib.rs @@ -54,6 +54,7 @@ mod msix; pub use msix::{MsixCap, MsixState, MSIX_TABLE_ENTRY_SIZE}; mod vfio; +pub use vfio::{VfioPciDevice, VfioPciError, VENDOR_NVIDIA}; /// Error codes related to PCI root/bus/device operations. #[derive(Debug, thiserror::Error)] diff --git a/src/dragonball/src/device_manager/mod.rs b/src/dragonball/src/device_manager/mod.rs index 090c13524..90630a7f4 100644 --- a/src/dragonball/src/device_manager/mod.rs +++ b/src/dragonball/src/device_manager/mod.rs @@ -107,6 +107,11 @@ use self::balloon_dev_mgr::BalloonDeviceMgr; pub mod vhost_net_dev_mgr; #[cfg(feature = "vhost-net")] use self::vhost_net_dev_mgr::VhostNetDeviceMgr; +#[cfg(feature = "host-device")] +/// Device manager for PCI/MMIO VFIO devices. +pub mod vfio_dev_mgr; +#[cfg(feature = "host-device")] +use self::vfio_dev_mgr::VfioDeviceMgr; #[cfg(feature = "vhost-user-net")] /// Device manager for vhost-user-net devices. @@ -273,6 +278,8 @@ pub struct DeviceOpContext { upcall_client: Option>>, #[cfg(feature = "dbs-virtio-devices")] virtio_devices: Vec>, + #[cfg(feature = "host-device")] + vfio_manager: Option>>, vm_config: Option, shared_info: Arc>, } @@ -313,6 +320,8 @@ impl DeviceOpContext { virtio_devices: Vec::new(), vm_config, shared_info, + #[cfg(feature = "host-device")] + vfio_manager: None, } } @@ -435,6 +444,13 @@ impl DeviceOpContext { } } +#[cfg(feature = "host-device")] +impl DeviceOpContext { + pub(crate) fn set_vfio_manager(&mut self, vfio_device_mgr: Arc>) { + self.vfio_manager = Some(vfio_device_mgr); + } +} + #[cfg(all(feature = "hotplug", feature = "dbs-upcall"))] impl DeviceOpContext { pub(crate) fn create_hotplug_ctx(vm: &Vm, epoll_mgr: Option) -> Self { @@ -555,6 +571,8 @@ pub struct DeviceManager { #[cfg(feature = "vhost-user-net")] vhost_user_net_manager: VhostUserNetDeviceMgr, + #[cfg(feature = "host-device")] + pub(crate) vfio_manager: Arc>, } impl DeviceManager { @@ -571,7 +589,7 @@ impl DeviceManager { io_lock: Arc::new(Mutex::new(())), irq_manager: Arc::new(KvmIrqManager::new(vm_fd.clone())), res_manager, - vm_fd, + vm_fd: vm_fd.clone(), logger: logger.new(slog::o!()), shared_info, @@ -595,6 +613,8 @@ impl DeviceManager { vhost_net_manager: VhostNetDeviceMgr::default(), #[cfg(feature = "vhost-user-net")] vhost_user_net_manager: VhostUserNetDeviceMgr::default(), + #[cfg(feature = "host-device")] + vfio_manager: Arc::new(Mutex::new(VfioDeviceMgr::new(vm_fd, logger))), } } @@ -775,6 +795,14 @@ impl DeviceManager { .attach_devices(&mut ctx) .map_err(StartMicroVmError::VhostUserNetDeviceError)?; + #[cfg(feature = "host-device")] + { + // It is safe bacause we don't expect poison lock. + let mut vfio_manager = self.vfio_manager.lock().unwrap(); + vfio_manager.attach_devices(&mut ctx)?; + ctx.set_vfio_manager(self.vfio_manager.clone()) + } + // Ensure that all devices are attached before kernel boot args are // generated. ctx.generate_kernel_boot_args(kernel_config) @@ -792,8 +820,17 @@ impl DeviceManager { } /// Start all registered devices when booting the associated virtual machine. - pub fn start_devices(&mut self) -> std::result::Result<(), StartMicroVmError> { - // TODO: add vfio support here. issue #4589. + pub fn start_devices( + &mut self, + vm_as: &GuestAddressSpaceImpl, + ) -> std::result::Result<(), StartMicroVmError> { + // It is safe because we don't expect poison lock. + #[cfg(feature = "host-device")] + self.vfio_manager + .lock() + .unwrap() + .start_devices(vm_as) + .map_err(StartMicroVmError::RegisterDMAAddress)?; Ok(()) } @@ -1202,6 +1239,8 @@ mod tests { vhost_net_manager: VhostNetDeviceMgr::default(), #[cfg(feature = "vhost-user-net")] vhost_user_net_manager: VhostUserNetDeviceMgr::default(), + #[cfg(feature = "host-device")] + vfio_manager: Arc::new(Mutex::new(VfioDeviceMgr::new(vm_fd, &logger))), logger, shared_info, diff --git a/src/dragonball/src/device_manager/vfio_dev_mgr/mod.rs b/src/dragonball/src/device_manager/vfio_dev_mgr/mod.rs new file mode 100644 index 000000000..2828996a9 --- /dev/null +++ b/src/dragonball/src/device_manager/vfio_dev_mgr/mod.rs @@ -0,0 +1,624 @@ +// Copyright 2023 Alibaba, Inc. or its affiliates. All Rights Reserved. +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. +//! Device manager for host passthrough devices. +// we allow missing_doc temporaily, because rust can't use this declariation in marco + +#![allow(missing_docs)] +mod pci_vfio; +pub use pci_vfio::PciSystemManager; + +use std::collections::HashMap; +use std::ops::Deref; +use std::os::fd::RawFd; +use std::path::Path; +use std::sync::{Arc, Weak}; + +use super::StartMicroVmError; +use crate::address_space_manager::{GuestAddressSpaceImpl, GuestMemoryImpl}; +use crate::config_manager::{ConfigItem, DeviceConfigInfo, DeviceConfigInfos}; +use crate::device_manager::{DeviceManagerContext, DeviceMgrError, DeviceOpContext}; +use crate::resource_manager::{ResourceError, ResourceManager}; +use dbs_device::resources::Resource::LegacyIrq; +use dbs_device::resources::ResourceConstraint; +use dbs_device::DeviceIo; +use dbs_interrupt::KvmIrqManager; +#[cfg(target_arch = "aarch64")] +use dbs_pci::ECAM_SPACE_LENGTH; +use dbs_pci::{VfioPciDevice, VENDOR_NVIDIA}; +use kvm_ioctls::{DeviceFd, VmFd}; +use serde_derive::{Deserialize, Serialize}; +use vfio_ioctls::{VfioContainer, VfioDevice}; +use vm_memory::{ + Address, GuestAddressSpace, GuestMemory, GuestMemoryRegion, GuestRegionMmap, + MemoryRegionAddress, +}; + +// The flag of whether to use the shared irq. +const USE_SHARED_IRQ: bool = true; + +/// Errors associated with the operations allowed on a host device +#[derive(Debug, thiserror::Error)] +pub enum VfioDeviceError { + /// Internal error. + #[error("VFIO subsystem internal error")] + InternalError, + + /// The virtual machine instance ID is invalid. + #[error("the virtual machine instance ID is invalid")] + InvalidVMID, + + /// Cannot open host device due to invalid bus::slot::function + #[error("can't open host device for VFIO")] + CannotOpenVfioDevice, + + /// The Context Identifier is already in use. + #[error("the device ID {0} already exists")] + DeviceIDAlreadyExist(String), + + /// Host device string (bus::slot::function) is already in use + #[error("device '{0}' is already in use")] + DeviceAlreadyInUse(String), + + /// The configuration of vfio device is invalid. + #[error("The configuration of vfio device is invalid")] + InvalidConfig, + + /// No resource available + #[error("no resource available for VFIO device")] + NoResource, + + /// Cannot perform the requested operation after booting the microVM + #[error("update operation is not allowed after boot")] + UpdateNotAllowedPostBoot, + + /// Failed to create kvm device + #[error("failed to create kvm device: {0:?}")] + CreateKvmDevice(#[source] vmm_sys_util::errno::Error), + + /// Failed to restore vfio mlock count + #[error("failure while restoring vfio mlock count: {0:?}")] + RestoreMlockCount(#[source] std::io::Error), + + /// Failure in device manager while managing VFIO device + #[error("failure in device manager while managing VFIO device, {0:?}")] + VfioDeviceMgr(#[source] DeviceMgrError), + + /// Failure in VFIO IOCTL subsystem. + #[error("failure while configuring VFIO device, {0:?}")] + VfioIoctlError(#[source] vfio_ioctls::VfioError), + + /// Failure in VFIO PCI subsystem. + #[error("failure while managing PCI VFIO device: {0:?}")] + VfioPciError(#[source] dbs_pci::VfioPciError), + + /// Failure in PCI subsystem. + #[error("PCI subsystem failed to manage the device: {0:?}")] + PciError(#[source] dbs_pci::Error), + + /// Failed to get vfio host info + #[error("PCI get host info failed: {0}")] + GetHostInfo(String), + + /// Invalid PCI device ID + #[error("invalid PCI device ID: {0}")] + InvalidDeviceID(u32), + + /// Failed to allocate device resource + #[error("failure while allocate device resource: {0:?}")] + AllocateDeviceResource(#[source] ResourceError), + + /// Vfio container not found + #[error("vfio container not found")] + VfioContainerNotFound, + + /// Generic IO error. + #[error("Generic IO error, {0}")] + IoError(#[source] std::io::Error), +} + +type Result = std::result::Result; + +/// Host info for vfio device +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] +pub struct VfioDeviceHostInfo { + pub group_id: u32, + pub group_fd: RawFd, + pub device_fd: RawFd, +} + +/// Configuration information for a VFIO PCI device. +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize, Default)] +pub struct VfioPciDeviceConfig { + /// PCI device information: "bus:slot:function" + pub bus_slot_func: String, + /// PCI vendor and device id + /// high 16bit : low 16bit = device_id : vendor_id + pub vendor_device_id: u32, + /// Deice ID used in guest, guest_dev_id = slot + pub guest_dev_id: Option, + /// Clique ID for Nvidia GPUs and RDMA NICs + pub clique_id: Option, +} + +impl VfioPciDeviceConfig { + /// default pci domain is 0 + pub fn host_pci_domain(&self) -> u32 { + 0 + } + + pub fn valid_vendor_device(&self) -> bool { + if self.vendor_device_id == 0 { + return true; + } + // vendor_device_id high 16bit : low 16bit = device_id : vendor_id + self.vendor_device_id != 0 + && (self.vendor_device_id & 0xffff) != 0 + && ((self.vendor_device_id >> 16) & 0xffff) != 0 + } +} + +/// Configuration for a specific Vfio Device +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +pub enum VfioDevConfig { + Pci(VfioPciDeviceConfig), +} + +impl Default for VfioDevConfig { + fn default() -> Self { + Self::Pci(Default::default()) + } +} + +/// Configuration information for a VFIO device. +#[derive(Clone, Debug, PartialEq, Deserialize, Serialize, Default)] +pub struct HostDeviceConfig { + /// Unique identifier of the hostdev + pub hostdev_id: String, + /// Sysfs path for device + pub sysfs_path: String, + /// Device specific config + pub dev_config: VfioPciDeviceConfig, +} + +impl ConfigItem for HostDeviceConfig { + type Err = VfioDeviceError; + + fn id(&self) -> &str { + &self.hostdev_id + } + + fn check_conflicts(&self, other: &Self) -> Result<()> { + if self.hostdev_id == other.hostdev_id { + return Err(VfioDeviceError::DeviceIDAlreadyExist( + self.hostdev_id.clone(), + )); + } + + if !self.sysfs_path.is_empty() && self.sysfs_path == other.sysfs_path { + return Err(VfioDeviceError::DeviceAlreadyInUse(self.sysfs_path.clone())); + } + + if !self.dev_config.bus_slot_func.is_empty() + && self.dev_config.bus_slot_func == other.dev_config.bus_slot_func + { + return Err(VfioDeviceError::DeviceAlreadyInUse( + self.dev_config.bus_slot_func.clone(), + )); + } + + Ok(()) + } +} + +/// Vfio device info +pub type VfioDeviceInfo = DeviceConfigInfo; + +/// A device manager to manage all VFIO devices. +pub struct VfioDeviceMgr { + vm_fd: Arc, + info_list: DeviceConfigInfos, + locked_vm_size: u64, + vfio_container: Option>, + pci_vfio_manager: Option>, + pci_legacy_irqs: Option>, + nvidia_shared_irq: Option, + logger: slog::Logger, +} + +impl VfioDeviceMgr { + /// Create a new VFIO device manager. + pub fn new(vm_fd: Arc, logger: &slog::Logger) -> Self { + VfioDeviceMgr { + vm_fd, + info_list: DeviceConfigInfos::new(), + locked_vm_size: 0, + vfio_container: None, + pci_vfio_manager: None, + pci_legacy_irqs: Some(HashMap::new()), + nvidia_shared_irq: None, + logger: logger.new(slog::o!()), + } + } + + /// Insert or update a VFIO device into the manager. + pub fn insert_device(&mut self, config: HostDeviceConfig) -> Result<()> { + let _device_index = self.info_list.insert_or_update(&config)?; + Ok(()) + } + + /// Attach all configured VFIO device to the virtual machine instance. + pub fn attach_devices( + &mut self, + ctx: &mut DeviceOpContext, + ) -> std::result::Result<(), StartMicroVmError> { + for (idx, info) in self.info_list.clone().iter().enumerate() { + self.create_device(&info.config, ctx, idx) + .map_err(StartMicroVmError::CreateVfioDevice)?; + } + Ok(()) + } + + /// Start all VFIO devices. + pub fn start_devices(&mut self, vm_as: &GuestAddressSpaceImpl) -> Result<()> { + if self.vfio_container.is_some() { + let vm_memory = vm_as.memory(); + self.register_memory(vm_memory.deref())?; + } + Ok(()) + } + + pub(crate) fn get_kvm_dev_fd(&self) -> Result { + let mut kvm_vfio_dev = kvm_bindings::kvm_create_device { + type_: kvm_bindings::kvm_device_type_KVM_DEV_TYPE_VFIO, + fd: 0, + flags: 0, + }; + let kvm_dev_fd = self + .vm_fd + .create_device(&mut kvm_vfio_dev) + .map_err(|e| VfioDeviceError::IoError(std::io::Error::from_raw_os_error(e.errno())))?; + Ok(kvm_dev_fd) + } + + /// Get vfio container object. You should call get_vfio_manager to get vfio_manager Firstly. + pub fn get_vfio_container(&mut self) -> Result> { + if let Some(vfio_container) = self.vfio_container.as_ref() { + Ok(vfio_container.clone()) + } else { + let kvm_dev_fd = Arc::new(self.get_kvm_dev_fd()?); + let vfio_container = + Arc::new(VfioContainer::new(kvm_dev_fd).map_err(VfioDeviceError::VfioIoctlError)?); + self.vfio_container = Some(vfio_container.clone()); + + Ok(vfio_container) + } + } + + fn create_device( + &mut self, + cfg: &HostDeviceConfig, + ctx: &mut DeviceOpContext, + idx: usize, + ) -> Result> { + let sysfs_path = Self::build_sysfs_path(cfg)?; + let device = self.attach_pci_vfio_device(ctx, sysfs_path, &cfg.dev_config)?; + self.info_list[idx].device = Some(device.clone()); + Ok(device) + } + /// Gets the index of the device with the specified `hostdev_id` if it exists in the list. + fn get_index_of_hostdev_id(&self, id: &str) -> Option { + self.info_list + .iter() + .position(|info| info.config.id().eq(id)) + } + + /// Register guest memory to the VFIO container. + /// + /// # Arguments + /// * `guest_mem`: guest memory configuration object. + pub(crate) fn register_memory(&mut self, vm_memory: &GuestMemoryImpl) -> Result<()> { + for region in vm_memory.iter() { + self.register_memory_region(region)?; + } + Ok(()) + } + + pub(crate) fn register_memory_region(&mut self, region: &GuestRegionMmap) -> Result<()> { + let gpa = region.start_addr().raw_value(); + let size = region.len(); + let user_addr = region + .get_host_address(MemoryRegionAddress(0)) + .expect("guest memory region should be mapped and has HVA.") + as u64; + let readonly = region.prot() & libc::PROT_WRITE == 0; + self.register_region(gpa, size, user_addr, readonly) + } + + pub(crate) fn register_region( + &mut self, + iova: u64, + size: u64, + user_addr: u64, + readonly: bool, + ) -> Result<()> { + slog::info!( + self.logger, + "map guest physical memory"; + "subsystem" => "vfio_dev_mgr", + "iova" => iova, + "size" => size, + "user_addr" => user_addr, + "readonly" => readonly, + ); + //FIXME: add readonly flag when related commit is pushed to upstream vfio-ioctls + self.get_vfio_container()? + .vfio_dma_map(iova, size, user_addr) + .map_err(VfioDeviceError::VfioIoctlError)?; + self.locked_vm_size += size; + Ok(()) + } + + /// Clear locked size because iommu table is cleared + pub(crate) fn clear_locked_size(&mut self) { + self.locked_vm_size = 0; + } + + pub(crate) fn unregister_region(&mut self, region: &GuestRegionMmap) -> Result<()> { + let gpa = region.start_addr().raw_value(); + let size = region.len(); + + self.get_vfio_container()? + .vfio_dma_unmap(gpa, size) + .map_err(VfioDeviceError::VfioIoctlError)?; + + self.locked_vm_size -= size; + Ok(()) + } + pub(crate) fn update_memory(&mut self, region: &GuestRegionMmap) -> Result<()> { + if self.locked_vm_size != 0 { + self.register_memory_region(region)?; + } + Ok(()) + } + pub(crate) fn build_sysfs_path(cfg: &HostDeviceConfig) -> Result { + if cfg.sysfs_path.is_empty() { + let (bdf, domain) = ( + &cfg.dev_config.bus_slot_func, + cfg.dev_config.host_pci_domain(), + ); + let len = bdf.split(':').count(); + if len == 0 { + Err(VfioDeviceError::InvalidConfig) + } else if len == 2 { + Ok(format!("/sys/bus/pci/devices/{:04}:{}", domain, bdf)) + } else { + Ok(format!("/sys/bus/pci/devices/{}", bdf)) + } + } else { + Ok(cfg.sysfs_path.clone()) + } + } + + /// Get all PCI devices' legacy irqs + pub fn get_pci_legacy_irqs(&self) -> Option<&HashMap> { + self.pci_legacy_irqs.as_ref() + } +} + +impl VfioDeviceMgr { + pub(super) fn attach_pci_vfio_device( + &mut self, + ctx: &mut DeviceOpContext, + sysfs_path: String, + cfg: &VfioPciDeviceConfig, + ) -> Result> { + slog::info!( + ctx.logger(), + "attach vfio pci device"; + "subsystem" => "vfio_dev_mgr", + "host_bdf" => &cfg.bus_slot_func, + ); + // safe to get pci_manager + let pci_manager = self.create_pci_manager( + ctx.irq_manager.clone(), + ctx.io_context.clone(), + ctx.res_manager.clone(), + )?; + let pci_bus = pci_manager.pci_root_bus(); + let id = pci_manager + .new_device_id(cfg.guest_dev_id) + .ok_or(VfioDeviceError::NoResource)?; + slog::info!( + ctx.logger(), + "PCI:{} vfio pci device id: {}, vendor_device: 0x{:x}", + &sysfs_path, id, cfg.vendor_device_id; + "subsystem" => "vfio_dev_mgr", + "guest_bdf" => id, + ); + if !cfg.valid_vendor_device() { + return Err(VfioDeviceError::InvalidConfig); + } + let vfio_container = self.get_vfio_container()?; + let vfio_dev = VfioDevice::new(Path::new(&sysfs_path), vfio_container.clone()) + .map_err(VfioDeviceError::VfioIoctlError)?; + // Use Weak::clone to break cycle reference: + // + // reference 1: VfioPciDevice reference to PciBus + // reference 2: VfioPciDevice -> PciManager -> PciBus -> VfioPciDevice + let vfio_pci_device = Arc::new( + VfioPciDevice::create( + id, + sysfs_path, + Arc::downgrade(&pci_bus), + vfio_dev, + Arc::downgrade(self.get_pci_manager().unwrap()), + ctx.vm_fd.clone(), + cfg.vendor_device_id, + cfg.clique_id, + vfio_container, + ) + .map_err(VfioDeviceError::VfioPciError)?, + ); + let mut requires = Vec::new(); + vfio_pci_device.get_resource_requirements(&mut requires); + let vendor_id = vfio_pci_device.vendor_id(); + if vendor_id == VENDOR_NVIDIA && self.nvidia_shared_irq.is_some() { + requires.retain(|x| !matches!(x, ResourceConstraint::LegacyIrq { irq: _ })); + } + let mut resource = ctx + .res_manager + .allocate_device_resources(&requires, USE_SHARED_IRQ) + .or(Err(VfioDeviceError::NoResource))?; + if vendor_id == VENDOR_NVIDIA { + if let Some(irq) = self.nvidia_shared_irq { + resource.append(LegacyIrq(irq)); + } else { + self.nvidia_shared_irq = resource.get_legacy_irq(); + } + } + vfio_pci_device + .activate( + Arc::downgrade(&vfio_pci_device) as Weak, + resource, + ) + .map_err(VfioDeviceError::VfioPciError)?; + if let Some(irq) = vfio_pci_device.get_assigned_resources().get_legacy_irq() { + self.pci_legacy_irqs + .as_mut() + .map(|v| v.insert(vfio_pci_device.device_id(), irq as u8)); + } + // PciBus reference to VfioPciDevice + pci_bus + .register_device(vfio_pci_device.clone()) + .map_err(VfioDeviceError::PciError)?; + Ok(vfio_pci_device) + } + + pub(crate) fn create_pci_manager( + &mut self, + irq_manager: Arc, + io_context: DeviceManagerContext, + res_manager: Arc, + ) -> Result<&mut Arc> { + if self.pci_vfio_manager.is_none() { + let mut mgr = PciSystemManager::new(irq_manager, io_context, res_manager.clone())?; + let requirements = mgr.resource_requirements(); + let resources = res_manager + .allocate_device_resources(&requirements, USE_SHARED_IRQ) + .or(Err(VfioDeviceError::NoResource))?; + mgr.activate(resources)?; + self.pci_vfio_manager = Some(Arc::new(mgr)); + } + Ok(self.pci_vfio_manager.as_mut().unwrap()) + } + + /// Get the PCI manager to support PCI device passthrough + pub fn get_pci_manager(&mut self) -> Option<&mut Arc> { + self.pci_vfio_manager.as_mut() + } +} + +#[cfg(all(test, feature = "test-mock"))] +mod tests { + use kvm_ioctls::Kvm; + use logger::LOGGER; + use vm_memory::{GuestAddress, GuestMemoryMmap, MmapRegion}; + + use super::*; + use crate::config_manager::DeviceInfoGroup; + use crate::test_utils::tests::create_vm_for_test; + + type VfioDeviceInfo = DeviceInfoGroup; + + fn get_vfio_dev_mgr() -> VfioDeviceMgr { + let kvm = Kvm::new().unwrap(); + let vm_fd = Arc::new(kvm.create_vm().unwrap()); + let logger = Arc::new(LOGGER.new_logger(slog::o!())); + VfioDeviceMgr::new(vm_fd, &logger) + } + + #[test] + fn test_register_memory() { + let mut mgr = get_vfio_dev_mgr(); + // mock for vfio_dma_map. + let mut vfio_container = VfioContainer::default(); + vfio_container.vfio_dma_map = true; + vfio_container.vfio_dma_unmap = true; + mgr.vfio_container = Some(Arc::new(vfio_container)); + let region_size = 0x1000; + let region1 = + GuestRegionMmap::new(MmapRegion::new(region_size).unwrap(), GuestAddress(0x4000)) + .unwrap(); + let region2 = + GuestRegionMmap::new(MmapRegion::new(region_size).unwrap(), GuestAddress(0xc000)) + .unwrap(); + let regions = vec![region1, region2]; + let gmm = Arc::new(GuestMemoryMmap::from_regions(regions).unwrap()); + assert!(mgr.register_memory(&gmm.clone()).is_ok()); + assert_eq!(mgr.locked_vm_size, region_size as u64 * 2); + for region in gmm.iter() { + mgr.unregister_region(region).unwrap(); + } + assert_eq!(mgr.locked_vm_size, 0); + } + + #[test] + fn test_register_region() { + let kvm = Kvm::new().unwrap(); + let vm_fd = Arc::new(kvm.create_vm().unwrap()); + let logger = Arc::new(LOGGER.new_logger(slog::o!())); + let mut mgr = VfioDeviceMgr::new(vm_fd, &logger); + // mock for vfio_dma_map. + let mut vfio_container = VfioContainer::default(); + vfio_container.vfio_dma_map = true; + vfio_container.vfio_dma_unmap = true; + mgr.vfio_container = Some(Arc::new(vfio_container)); + let region_size = 0x400000; + let region = + GuestRegionMmap::new(MmapRegion::new(region_size).unwrap(), GuestAddress(0x0000)) + .unwrap(); + let gpa = region.start_addr().raw_value(); + let size = region.len() as u64; + let user_addr = region.get_host_address(MemoryRegionAddress(0)).unwrap() as u64; + let readonly = region.prot() & libc::PROT_WRITE == 0; + mgr.register_region(gpa, size, user_addr, readonly).unwrap(); + assert_eq!(mgr.locked_vm_size, region_size as u64); + assert!(mgr.unregister_region(®ion).is_ok()); + assert_eq!(mgr.locked_vm_size, 0); + } + + #[test] + fn test_vfio_attach_pci_vfio_devices() { + let vm = create_vm_for_test(); + let mut mgr = vm.device_manager.vfio_manager.lock().unwrap(); + let config = VfioDeviceConfigInfo { + hostdev_id: "hostdev_1".to_string(), + sysfs_path: "uuid1".to_string(), + bus_slot_func: "0:0:1".to_string(), + mode: "pci".to_string(), + vendor_device_id: 0, + guest_dev_id: None, + clique_id: None, + }; + let mut device_op_ctx = DeviceOpContext::new( + Some(vm.epoll_manager.clone()), + &vm.device_manager, + Some(vm.vm_as().unwrap().clone()), + vm.address_space.address_space.clone(), + false, + None, + vm.address_space.get_base_to_slot_map(), + vm.shared_info().clone(), + ); + // Invalid resources. + assert!(matches!( + mgr.attach_pci_vfio_devices(&mut device_op_ctx, &config), + Err(VfioDeviceError::VfioPciError(_)) + )); + } +} diff --git a/src/dragonball/src/device_manager/vfio_dev_mgr/pci_vfio.rs b/src/dragonball/src/device_manager/vfio_dev_mgr/pci_vfio.rs new file mode 100644 index 000000000..78f03db2f --- /dev/null +++ b/src/dragonball/src/device_manager/vfio_dev_mgr/pci_vfio.rs @@ -0,0 +1,169 @@ +// Copyright (C) 2023 Alibaba Cloud. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::sync::Arc; + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +use dbs_device::resources::Resource; +use dbs_device::resources::{DeviceResources, ResourceConstraint}; +use dbs_interrupt::KvmIrqManager; +#[cfg(target_arch = "aarch64")] +use dbs_pci::ECAM_SPACE_LENGTH; +use dbs_pci::{create_pci_root_bus, PciBus, PciDevice, PciRootDevice, PciSystemContext}; + +use super::{Result, VfioDeviceError}; +#[cfg(target_arch = "aarch64")] +use crate::device_manager::vfio_dev_mgr::USE_SHARED_IRQ; +use crate::device_manager::DeviceManagerContext; +use crate::resource_manager::ResourceManager; + +/// we only support one pci bus +pub const PCI_BUS_DEFAULT: u8 = 0; + +/// PCI pass-through device manager. +#[derive(Clone)] +pub struct PciSystemManager { + pub irq_manager: Arc, + pub io_context: DeviceManagerContext, + pub pci_root: Arc, + pub pci_root_bus: Arc, +} + +impl PciSystemManager { + /// Create a new PCI pass-through device manager. + pub fn new( + irq_manager: Arc, + io_context: DeviceManagerContext, + res_manager: Arc, + ) -> std::result::Result { + let resources = PciSystemManager::allocate_root_device_resources(res_manager)?; + let pci_root = Arc::new( + PciRootDevice::create(PCI_BUS_DEFAULT, resources).map_err(VfioDeviceError::PciError)?, + ); + let pci_root_bus = + create_pci_root_bus(PCI_BUS_DEFAULT).map_err(VfioDeviceError::PciError)?; + + Ok(PciSystemManager { + irq_manager, + io_context, + pci_root, + pci_root_bus, + }) + } + + // The x86 pci root device is a pio device with a fixed pio base address and length. + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + fn allocate_root_device_resources( + _res_manager: Arc, + ) -> Result { + let mut resources = DeviceResources::new(); + resources.append(Resource::PioAddressRange { + // PCI CONFIG_ADDRESS port address 0xcf8 and uses 32 bits + // PCI COFIG_DATA port address 0xcfc and uses 32 bits + // so the resource registered begins at 0xcf8 and takes 8 bytes as size + base: 0xcf8, + size: 0x8, + }); + Ok(resources) + } + + // The pci root device of arm is a mmio device, and its reg range is ECAM space, + // which needs to be dynamically applied from the resource pool. In addition, + // the ECAM space is used to enumerate and identify PCI devices. + #[cfg(target_arch = "aarch64")] + fn allocate_root_device_resources( + res_manager: Arc, + ) -> Result { + let requests = vec![ResourceConstraint::MmioAddress { + range: Some((0x0, 0xffff_ffff)), + align: 4096, + size: ECAM_SPACE_LENGTH, + }]; + let resources = res_manager + .allocate_device_resources(&requests, USE_SHARED_IRQ) + .map_err(VfioDeviceError::AllocateDeviceResource)?; + Ok(resources) + } + + /// Activate the PCI subsystem. + pub fn activate(&mut self, resources: DeviceResources) -> Result<()> { + let bus_id = self.pci_root_bus.bus_id(); + + self.pci_root + .add_bus(self.pci_root_bus.clone(), bus_id) + .map_err(VfioDeviceError::PciError)?; + PciRootDevice::activate(self.pci_root.clone(), &mut self.io_context) + .map_err(VfioDeviceError::PciError)?; + + self.pci_root_bus + .assign_resources(resources) + .map_err(VfioDeviceError::PciError)?; + + Ok(()) + } + + /// Get resource requirements of the PCI subsystem. + #[allow(clippy::vec_init_then_push)] + pub fn resource_requirements(&self) -> Vec { + let mut requests = Vec::new(); + + // allocate 512MB MMIO address below 4G. + requests.push(ResourceConstraint::MmioAddress { + range: Some((0x0, 0xffff_ffff)), + align: 4096, + size: 512u64 << 20, + }); + // allocate 2048GB MMIO address above 4G. + requests.push(ResourceConstraint::MmioAddress { + range: Some((0x1_0000_0000, 0xffff_ffff_ffff_ffff)), + align: 4096, + size: 2048u64 << 30, + }); + // allocate 8KB IO port + requests.push(ResourceConstraint::PioAddress { + range: None, + align: 1, + size: 8u16 << 10, + }); + + requests + } + + /// Get the PCI root bus. + pub fn pci_root_bus(&self) -> Arc { + self.pci_root_bus.clone() + } + + /// Allocate a PCI device id. + pub fn new_device_id(&self, device_id: Option) -> Option { + self.pci_root_bus.allocate_device_id(device_id) + } + + pub fn free_device_id(&self, device_id: u32) -> Option> { + self.pci_root_bus.free_device_id(device_id) + } + + /// Obtain ECAM space resources, that is, pci root device resources. + #[cfg(target_arch = "aarch64")] + pub fn get_ecam_space(&self) -> DeviceResources { + self.pci_root.get_device_resources() + } + + /// Obtain BAR space resources, that is, pci root bus resources. + #[cfg(target_arch = "aarch64")] + pub fn get_bar_space(&self) -> DeviceResources { + self.pci_root_bus.get_device_resources() + } +} + +impl PciSystemContext for PciSystemManager { + type D = DeviceManagerContext; + + fn get_device_manager_context(&self) -> Self::D { + self.io_context.clone() + } + + fn get_interrupt_manager(&self) -> Arc { + self.irq_manager.clone() + } +} diff --git a/src/dragonball/src/error.rs b/src/dragonball/src/error.rs index 6dc427a2a..c2885de28 100644 --- a/src/dragonball/src/error.rs +++ b/src/dragonball/src/error.rs @@ -14,6 +14,8 @@ use dbs_arch::pmu::PmuError; #[cfg(feature = "dbs-virtio-devices")] use dbs_virtio_devices::Error as VirtioError; +#[cfg(feature = "host-device")] +use crate::device_manager::vfio_dev_mgr::VfioDeviceError; use crate::{address_space_manager, device_manager, resource_manager, vcpu, vm}; /// Shorthand result type for internal VMM commands. @@ -205,6 +207,14 @@ pub enum StartMicroVmError { VhostUserNetDeviceError( #[source] device_manager::vhost_user_net_dev_mgr::VhostUserNetDeviceError, ), + #[cfg(feature = "host-device")] + /// Failed to create VFIO device + #[error("cannot create VFIO device {0:?}")] + CreateVfioDevice(#[source] VfioDeviceError), + #[cfg(feature = "host-device")] + /// Failed to register DMA memory address range. + #[error("failure while registering DMA address range: {0:?}")] + RegisterDMAAddress(#[source] VfioDeviceError), } /// Errors associated with starting the instance. diff --git a/src/dragonball/src/resource_manager.rs b/src/dragonball/src/resource_manager.rs index b0f96e252..eb3897b46 100644 --- a/src/dragonball/src/resource_manager.rs +++ b/src/dragonball/src/resource_manager.rs @@ -565,7 +565,7 @@ impl ResourceManager { Resource::LegacyIrq(base) => self.free_legacy_irq(*base), Resource::MsiIrq { ty: _, base, size } => self.free_msi_irq(*base, *size), Resource::KvmMemSlot(slot) => self.free_kvm_mem_slot(*slot), - Resource::MacAddresss(_) => Ok(()), + Resource::MacAddress(_) => Ok(()), }; result?; } diff --git a/src/dragonball/src/vm/mod.rs b/src/dragonball/src/vm/mod.rs index 2964936b7..ced8e3e98 100644 --- a/src/dragonball/src/vm/mod.rs +++ b/src/dragonball/src/vm/mod.rs @@ -494,7 +494,7 @@ impl Vm { )?; info!(self.logger, "VM: start devices"); - self.device_manager.start_devices()?; + self.device_manager.start_devices(vm_as)?; info!(self.logger, "VM: initializing devices done"); Ok(()) diff --git a/src/dragonball/src/vm/x86_64.rs b/src/dragonball/src/vm/x86_64.rs index 4aedeafd8..fca35f829 100644 --- a/src/dragonball/src/vm/x86_64.rs +++ b/src/dragonball/src/vm/x86_64.rs @@ -6,6 +6,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. +use std::collections::HashMap; use std::convert::TryInto; use std::ops::Deref; @@ -48,6 +49,7 @@ fn configure_system( initrd: &Option, boot_cpus: u8, max_cpus: u8, + pci_legacy_irqs: Option<&HashMap>, ) -> super::Result<()> { const KERNEL_BOOT_FLAG_MAGIC: u16 = 0xaa55; const KERNEL_HDR_MAGIC: u32 = 0x5372_6448; @@ -59,7 +61,8 @@ fn configure_system( let himem_start = GuestAddress(layout::HIMEM_START); // Note that this puts the mptable at the last 1k of Linux's 640k base RAM - mptable::setup_mptable(guest_mem, boot_cpus, max_cpus, None).map_err(Error::MpTableSetup)?; + mptable::setup_mptable(guest_mem, boot_cpus, max_cpus, pci_legacy_irqs) + .map_err(Error::MpTableSetup)?; let mut params: BootParamsWrapper = BootParamsWrapper(bootparam::boot_params::default()); @@ -219,6 +222,24 @@ impl Vm { .as_bytes_with_nul() .len(); + #[cfg(feature = "host-device")] + { + // Don't expect poisoned lock here. + let vfio_manager = self.device_manager.vfio_manager.lock().unwrap(); + configure_system( + vm_memory, + self.address_space.address_space(), + cmdline_addr, + cmdline_size, + &initrd, + self.vm_config.vcpu_count, + self.vm_config.max_vcpu_count, + vfio_manager.get_pci_legacy_irqs(), + ) + .map_err(StartMicroVmError::ConfigureSystem) + } + + #[cfg(not(feature = "host-device"))] configure_system( vm_memory, self.address_space.address_space(), @@ -227,6 +248,7 @@ impl Vm { &initrd, self.vm_config.vcpu_count, self.vm_config.max_vcpu_count, + None, ) .map_err(StartMicroVmError::ConfigureSystem) } diff --git a/src/runtime-rs/Cargo.lock b/src/runtime-rs/Cargo.lock index 375b7c91e..99c38e33c 100644 --- a/src/runtime-rs/Cargo.lock +++ b/src/runtime-rs/Cargo.lock @@ -829,6 +829,7 @@ dependencies = [ "serde_json", "thiserror", "threadpool", + "timerfd", "vhost", "virtio-bindings", "virtio-queue", From a3f7601f5a2a15b6326d1b8781879f0315fb74b2 Mon Sep 17 00:00:00 2001 From: Chao Wu Date: Mon, 25 Dec 2023 19:43:27 +0800 Subject: [PATCH 2/4] dragonball: add pci hotplug / hot-unplug support Introduce two new vmm action to implement pci hotplug and pci hot-unplug: PrepareRemoveHostDevice and RemoveHostDevice. PrepareRemoveHostDevice is to call upcall to unregister the pci device in the guest kernel. RemoveHostDevice should be called after PrepareRemoveHostDevice, it is used to clean the PCI resource in the Dragonball side. fixes: #8741 Signed-off-by: Gerry Liu Signed-off-by: Zizheng Bian Signed-off-by: Zha Bin Signed-off-by: Helin Guo Signed-off-by: Chao Wu --- src/dragonball/src/api/v1/vmm_action.rs | 106 ++++++++- src/dragonball/src/dbs_pci/src/vfio.rs | 37 +++- .../src/dbs_upcall/src/dev_mgr_service.rs | 26 +++ src/dragonball/src/dbs_upcall/src/lib.rs | 2 +- .../dbs_virtio_devices/src/mmio/mmio_v2.rs | 3 +- src/dragonball/src/device_manager/mod.rs | 77 ++++++- .../src/device_manager/vfio_dev_mgr/mod.rs | 208 +++++++++++++++++- src/dragonball/src/vm/mod.rs | 10 +- 8 files changed, 450 insertions(+), 19 deletions(-) diff --git a/src/dragonball/src/api/v1/vmm_action.rs b/src/dragonball/src/api/v1/vmm_action.rs index a2d341c7c..5791cb8b9 100644 --- a/src/dragonball/src/api/v1/vmm_action.rs +++ b/src/dragonball/src/api/v1/vmm_action.rs @@ -9,13 +9,14 @@ use std::fs::File; use std::sync::{Arc, Mutex}; -use crossbeam_channel::{Receiver, Sender, TryRecvError}; +use crossbeam_channel::{unbounded, Receiver, Sender, TryRecvError}; use log::{debug, error, info, warn}; use tracing::instrument; use crate::error::{Result, StartMicroVmError, StopMicrovmError}; use crate::event_manager::EventManager; use crate::tracer::{DragonballTracer, TraceError, TraceInfo}; +use crate::vcpu::VcpuManagerError; use crate::vm::{CpuTopology, KernelConfigInfo, VmConfigInfo}; use crate::vmm::Vmm; @@ -155,6 +156,10 @@ pub enum VmmActionError { /// The action `InsertHostDevice` failed either because of bad user input or an internal error. #[error("failed to add VFIO passthrough device: {0:?}")] HostDeviceConfig(#[source] VfioDeviceError), + #[cfg(feature = "host-device")] + /// The action 'RemoveHostDevice' failed because of vcpu manager internal error. + #[error("remove host device error: {0}")] + RemoveHostDevice(#[source] VcpuManagerError), } /// This enum represents the public interface of the VMM. Each action contains various @@ -256,6 +261,14 @@ pub enum VmmAction { #[cfg(feature = "host-device")] /// Add a VFIO assignment host device or update that already exists InsertHostDevice(HostDeviceConfig), + + #[cfg(feature = "host-device")] + /// Prepare to remove a VFIO assignment host device that already exists + PrepareRemoveHostDevice(String), + + #[cfg(feature = "host-device")] + /// Add a VFIO assignment host device or update that already exists + RemoveHostDevice(String), } /// The enum represents the response sent by the VMM in case of success. The response is either @@ -268,6 +281,8 @@ pub enum VmmData { MachineConfiguration(Box), /// Prometheus Metrics represented by String. HypervisorMetrics(String), + /// Sync Hotplug + SyncHotplug((Sender>, Receiver>)), } /// Request data type used to communicate between the API and the VMM. @@ -384,6 +399,12 @@ impl VmmService { } #[cfg(feature = "host-device")] VmmAction::InsertHostDevice(hostdev_cfg) => self.add_vfio_device(vmm, hostdev_cfg), + #[cfg(feature = "host-device")] + VmmAction::PrepareRemoveHostDevice(hostdev_id) => { + self.prepare_remove_vfio_device(vmm, &hostdev_id) + } + #[cfg(feature = "host-device")] + VmmAction::RemoveHostDevice(hostdev_cfg) => self.remove_vfio_device(vmm, &hostdev_cfg), }; debug!("send vmm response: {:?}", response); @@ -552,6 +573,8 @@ impl VmmService { // - Some(path), legacy_manager will create_socket_console on that path. config.serial_path = machine_config.serial_path; + config.pci_hotplug_enabled = machine_config.pci_hotplug_enabled; + vm.set_vm_config(config.clone()); self.machine_config = config; @@ -833,15 +856,94 @@ impl VmmService { ))?; info!("add_vfio_device: {:?}", config); + let mut ctx = vm.create_device_op_context(None).map_err(|e| { + info!("create device op context error: {:?}", e); + if let StartMicroVmError::MicroVMAlreadyRunning = e { + VmmActionError::HostDeviceConfig(VfioDeviceError::UpdateNotAllowedPostBoot) + } else if let StartMicroVmError::UpcallServerNotReady = e { + VmmActionError::UpcallServerNotReady + } else { + VmmActionError::StartMicroVm(e) + } + })?; + vm.device_manager() .vfio_manager .lock() .unwrap() - .insert_device(config.into()) + .insert_device(&mut ctx, config.into()) .map_err(VmmActionError::HostDeviceConfig)?; Ok(VmmData::Empty) } + // using upcall to unplug the pci device in the guest + #[cfg(feature = "host-device")] + fn prepare_remove_vfio_device(&mut self, vmm: &mut Vmm, hostdev_id: &str) -> VmmRequestResult { + let vm = vmm.get_vm_mut().ok_or(VmmActionError::HostDeviceConfig( + VfioDeviceError::InvalidVMID, + ))?; + + info!("prepare_remove_vfio_device: {:?}", hostdev_id); + let ctx = vm.create_device_op_context(None).map_err(|e| { + info!("create device op context error: {:?}", e); + if let StartMicroVmError::MicroVMAlreadyRunning = e { + VmmActionError::HostDeviceConfig(VfioDeviceError::UpdateNotAllowedPostBoot) + } else if let StartMicroVmError::UpcallServerNotReady = e { + VmmActionError::UpcallServerNotReady + } else { + VmmActionError::StartMicroVm(e) + } + })?; + + let (sender, receiver) = unbounded(); + + // It is safe because we don't expect poison lock. + let vfio_manager = vm.device_manager.vfio_manager.lock().unwrap(); + + vfio_manager + .prepare_remove_device(&ctx, hostdev_id, sender.clone()) + .map(|_| VmmData::SyncHotplug((sender, receiver))) + .map_err(VmmActionError::HostDeviceConfig) + } + + #[cfg(feature = "host-device")] + fn remove_vfio_device(&self, vmm: &mut Vmm, hostdev_id: &str) -> VmmRequestResult { + let vm = vmm.get_vm_mut().ok_or(VmmActionError::HostDeviceConfig( + VfioDeviceError::InvalidVMID, + ))?; + + info!("remove_vfio_device: {:?}", hostdev_id); + let mut ctx = vm.create_device_op_context(None).map_err(|e| { + info!("create device op context error: {:?}", e); + if let StartMicroVmError::MicroVMAlreadyRunning = e { + VmmActionError::HostDeviceConfig(VfioDeviceError::UpdateNotAllowedPostBoot) + } else if let StartMicroVmError::UpcallServerNotReady = e { + VmmActionError::UpcallServerNotReady + } else { + VmmActionError::StartMicroVm(e) + } + })?; + + // It is safe because we don't expect poison lock. + let mut vfio_manager = vm.device_manager.vfio_manager.lock().unwrap(); + + vfio_manager + .remove_device(&mut ctx, hostdev_id) + .map_err(VmmActionError::HostDeviceConfig)?; + + // we need to revalidate io_manager cache in all vcpus + // in order to drop old io_manager and close device's fd + vm.vcpu_manager() + .map_err(VmmActionError::RemoveHostDevice)? + .revalidate_all_vcpus_cache() + .map_err(VmmActionError::RemoveHostDevice)?; + + // FIXME: we should clear corresponding information because vfio module in + // host kernel will clear iommu table in this scenario. + + Ok(VmmData::Empty) + } + #[cfg(feature = "hotplug")] #[instrument(skip(self))] fn resize_vcpu(&mut self, vmm: &mut Vmm, config: VcpuResizeInfo) -> VmmRequestResult { diff --git a/src/dragonball/src/dbs_pci/src/vfio.rs b/src/dragonball/src/dbs_pci/src/vfio.rs index 7ca6fcc14..76284b1f0 100644 --- a/src/dragonball/src/dbs_pci/src/vfio.rs +++ b/src/dragonball/src/dbs_pci/src/vfio.rs @@ -3,6 +3,7 @@ // // SPDX-License-Identifier: Apache-2.0 +use std::any::Any; use std::io; use std::os::unix::io::AsRawFd; use std::ptr::null_mut; @@ -884,7 +885,7 @@ impl Region { } } -struct VfioPciDeviceState { +pub struct VfioPciDeviceState { vfio_path: String, interrupt: Interrupt, vfio_dev: Arc, @@ -947,6 +948,10 @@ impl VfioPciDeviceState { }) } + pub fn vfio_dev(&self) -> &Arc { + &self.vfio_dev + } + fn read_config_byte(&self, offset: u32) -> u8 { let mut data: [u8; 1] = [0]; self.vfio_dev @@ -1314,6 +1319,23 @@ impl VfioPciDeviceState { Ok(()) } + fn free_register_resources(&self) -> Result<()> { + let mut register_resources = DeviceResources::new(); + for region in self.regions.iter() { + let resources = region.to_resources(); + for res in resources.get_all_resources() { + register_resources.append(res.clone()); + } + } + + self.bus + .upgrade() + .ok_or(VfioPciError::BusIsDropped)? + .free_resources(register_resources); + + Ok(()) + } + fn unregister_regions(&mut self, vm: &Arc) -> Result<()> { // This routine handle VfioPciDevice dropped but not unmap memory if self.context.upgrade().is_none() { @@ -1661,7 +1683,7 @@ impl VfioPciDevice { Ok(()) } - fn state(&self) -> MutexGuard> { + pub fn state(&self) -> MutexGuard> { // Don't expect poisoned lock self.state .lock() @@ -1687,6 +1709,14 @@ impl VfioPciDevice { .expect("poisoned lock for VFIO PCI device") .read_config_word(PCI_CONFIG_VENDOR_OFFSET) } + + pub fn clear_device(&self) -> Result<()> { + let mut state = self.state(); + state.free_register_resources()?; + let _ = state.unregister_regions(&self.vm_fd); + + Ok(()) + } } impl DeviceIo for VfioPciDevice { @@ -1784,7 +1814,8 @@ impl DeviceIo for VfioPciDevice { fn get_trapped_io_resources(&self) -> DeviceResources { self.state().trapped_resources.clone() } - fn as_any(&self) -> &dyn std::any::Any { + + fn as_any(&self) -> &dyn Any { self } } diff --git a/src/dragonball/src/dbs_upcall/src/dev_mgr_service.rs b/src/dragonball/src/dbs_upcall/src/dev_mgr_service.rs index f61882810..5a4068fd0 100755 --- a/src/dragonball/src/dbs_upcall/src/dev_mgr_service.rs +++ b/src/dragonball/src/dbs_upcall/src/dev_mgr_service.rs @@ -45,6 +45,16 @@ struct DevMgrMsgHeader { pub msg_flags: u32, } +/// Command struct to add/del a PCI Device. +#[repr(C)] +#[derive(Copy, Clone, Debug, PartialEq)] +pub struct PciDevRequest { + /// PCI bus number + pub busno: u8, + /// Combined device number and function number + pub devfn: u8, +} + /// Command struct to add/del a MMIO Virtio Device. #[repr(C)] #[derive(Copy, Clone, Debug, Eq, PartialEq)] @@ -128,6 +138,10 @@ pub enum DevMgrRequest { AddVcpu(CpuDevRequest), /// Del a VCPU DelVcpu(CpuDevRequest), + /// Add a PCI device + AddPciDev(PciDevRequest), + /// Delete a PCI device + DelPciDev(PciDevRequest), } impl DevMgrRequest { @@ -167,6 +181,18 @@ impl DevMgrRequest { let vcpu_dev = unsafe { &mut *(buffer[size_hdr..].as_ptr() as *mut CpuDevRequest) }; *vcpu_dev = s.clone(); } + DevMgrRequest::AddPciDev(s) => { + msg_hdr.msg_type = DevMgrMsgType::AddPci as u32; + msg_hdr.msg_size = mem::size_of::() as u32; + let pci_dev = unsafe { &mut *(buffer[size_hdr..].as_ptr() as *mut PciDevRequest) }; + *pci_dev = *s; + } + DevMgrRequest::DelPciDev(s) => { + msg_hdr.msg_type = DevMgrMsgType::DelPci as u32; + msg_hdr.msg_size = mem::size_of::() as u32; + let pci_dev = unsafe { &mut *(buffer[size_hdr..].as_ptr() as *mut PciDevRequest) }; + *pci_dev = *s; + } } buffer diff --git a/src/dragonball/src/dbs_upcall/src/lib.rs b/src/dragonball/src/dbs_upcall/src/lib.rs index 8e03c4e01..d6daae681 100755 --- a/src/dragonball/src/dbs_upcall/src/lib.rs +++ b/src/dragonball/src/dbs_upcall/src/lib.rs @@ -23,7 +23,7 @@ use log::{debug, error, info, trace, warn}; use timerfd::{SetTimeFlags, TimerFd, TimerState}; pub use crate::dev_mgr_service::{ - CpuDevRequest, DevMgrRequest, DevMgrResponse, DevMgrService, MmioDevRequest, + CpuDevRequest, DevMgrRequest, DevMgrResponse, DevMgrService, MmioDevRequest, PciDevRequest, }; const SERVER_PORT: u32 = 0xDB; diff --git a/src/dragonball/src/dbs_virtio_devices/src/mmio/mmio_v2.rs b/src/dragonball/src/dbs_virtio_devices/src/mmio/mmio_v2.rs index fad6bbb89..23fa7ee93 100644 --- a/src/dragonball/src/dbs_virtio_devices/src/mmio/mmio_v2.rs +++ b/src/dragonball/src/dbs_virtio_devices/src/mmio/mmio_v2.rs @@ -2,6 +2,7 @@ // // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause +use std::any::Any; use std::sync::atomic::{AtomicU32, Ordering}; use std::sync::{Arc, Mutex, MutexGuard}; @@ -484,7 +485,7 @@ where resources } - fn as_any(&self) -> &dyn std::any::Any { + fn as_any(&self) -> &dyn Any { self } } diff --git a/src/dragonball/src/device_manager/mod.rs b/src/dragonball/src/device_manager/mod.rs index 90630a7f4..e3a964122 100644 --- a/src/dragonball/src/device_manager/mod.rs +++ b/src/dragonball/src/device_manager/mod.rs @@ -36,9 +36,11 @@ use dbs_virtio_devices::{ VirtioDevice, }; +#[cfg(feature = "host-device")] +use dbs_pci::VfioPciDevice; #[cfg(all(feature = "hotplug", feature = "dbs-upcall"))] use dbs_upcall::{ - DevMgrRequest, DevMgrService, MmioDevRequest, UpcallClient, UpcallClientError, + DevMgrRequest, DevMgrService, MmioDevRequest, PciDevRequest, UpcallClient, UpcallClientError, UpcallClientRequest, UpcallClientResponse, }; #[cfg(feature = "hotplug")] @@ -46,6 +48,8 @@ use dbs_virtio_devices::vsock::backend::VsockInnerConnector; use crate::address_space_manager::GuestAddressSpaceImpl; use crate::api::v1::InstanceInfo; +#[cfg(feature = "host-device")] +use crate::device_manager::vfio_dev_mgr::PciSystemManager; use crate::error::StartMicroVmError; use crate::resource_manager::ResourceManager; use crate::vm::{KernelConfigInfo, Vm, VmConfigInfo}; @@ -169,6 +173,11 @@ pub enum DeviceMgrError { /// Failed to free device resource. #[error("failed to free device resources: {0}")] ResourceError(#[source] crate::resource_manager::ResourceError), + + #[cfg(feature = "host-device")] + /// Error from Vfio Pci + #[error("failed to do vfio pci operation: {0:?}")] + VfioPci(#[source] dbs_pci::VfioPciError), } /// Specialized version of `std::result::Result` for device manager operations. @@ -273,6 +282,8 @@ pub struct DeviceOpContext { address_space: Option, logger: slog::Logger, is_hotplug: bool, + #[cfg(all(feature = "hotplug", feature = "host-device"))] + pci_hotplug_enabled: bool, #[cfg(all(feature = "hotplug", feature = "dbs-upcall"))] upcall_client: Option>>, @@ -304,6 +315,12 @@ impl DeviceOpContext { }; let logger = device_mgr.logger.new(slog::o!()); + #[cfg(all(feature = "hotplug", feature = "host-device"))] + let pci_hotplug_enabled = vm_config + .clone() + .map(|c| c.pci_hotplug_enabled) + .unwrap_or(false); + DeviceOpContext { epoll_mgr, io_context, @@ -314,6 +331,8 @@ impl DeviceOpContext { address_space, logger, is_hotplug, + #[cfg(all(feature = "hotplug", feature = "host-device"))] + pci_hotplug_enabled, #[cfg(all(feature = "hotplug", feature = "dbs-upcall"))] upcall_client: None, #[cfg(feature = "dbs-virtio-devices")] @@ -526,6 +545,37 @@ impl DeviceOpContext { self.call_hotplug_device(req, callback) } + + #[cfg(feature = "host-device")] + pub(crate) fn insert_hotplug_pci_device( + &self, + dev: &Arc, + callback: Option>, + ) -> Result<()> { + if !self.is_hotplug || !self.pci_hotplug_enabled { + return Err(DeviceMgrError::InvalidOperation); + } + + let (busno, devfn) = DeviceManager::get_pci_device_info(dev)?; + let req = DevMgrRequest::AddPciDev(PciDevRequest { busno, devfn }); + + self.call_hotplug_device(req, callback) + } + + #[cfg(feature = "host-device")] + pub(crate) fn remove_hotplug_pci_device( + &self, + dev: &Arc, + callback: Option>, + ) -> Result<()> { + if !self.is_hotplug || !self.pci_hotplug_enabled { + return Err(DeviceMgrError::InvalidOperation); + } + let (busno, devfn) = DeviceManager::get_pci_device_info(dev)?; + let req = DevMgrRequest::DelPciDev(PciDevRequest { busno, devfn }); + + self.call_hotplug_device(req, callback) + } } #[cfg(all(feature = "hotplug", feature = "acpi"))] @@ -1152,6 +1202,30 @@ impl DeviceManager { Ok(()) } } + + #[cfg(feature = "host-device")] + fn get_pci_device_info(device: &Arc) -> Result<(u8, u8)> { + if let Some(pci_dev) = device + .as_any() + .downcast_ref::>() + { + // reference from kernel: include/uapi/linux/pci.h + let busno = pci_dev.bus_id().map_err(DeviceMgrError::VfioPci)?; + let slot = pci_dev.device_id(); + let func = 0; + // The slot/function address of each device is encoded + // in a single byte as follows: + // + // 7:3 = slot + // 2:0 = function + // together those 8 bits combined as devfn value + let devfn = (((slot) & 0x1f) << 3) | ((func) & 0x07); + + return Ok((busno, devfn)); + } + + Err(DeviceMgrError::GetDeviceResource) + } } #[cfg(feature = "hotplug")] @@ -1282,6 +1356,7 @@ mod tests { sockets: 1, }, vpmu_feature: 0, + pci_hotplug_enabled: false, }; vm.set_vm_config(vm_config.clone()); vm.init_guest_memory().unwrap(); diff --git a/src/dragonball/src/device_manager/vfio_dev_mgr/mod.rs b/src/dragonball/src/device_manager/vfio_dev_mgr/mod.rs index 2828996a9..cb5e7dbdd 100644 --- a/src/dragonball/src/device_manager/vfio_dev_mgr/mod.rs +++ b/src/dragonball/src/device_manager/vfio_dev_mgr/mod.rs @@ -18,19 +18,17 @@ use std::os::fd::RawFd; use std::path::Path; use std::sync::{Arc, Weak}; -use super::StartMicroVmError; -use crate::address_space_manager::{GuestAddressSpaceImpl, GuestMemoryImpl}; -use crate::config_manager::{ConfigItem, DeviceConfigInfo, DeviceConfigInfos}; -use crate::device_manager::{DeviceManagerContext, DeviceMgrError, DeviceOpContext}; -use crate::resource_manager::{ResourceError, ResourceManager}; +use crossbeam_channel::Sender; use dbs_device::resources::Resource::LegacyIrq; -use dbs_device::resources::ResourceConstraint; +use dbs_device::resources::{DeviceResources, Resource, ResourceConstraint}; use dbs_device::DeviceIo; use dbs_interrupt::KvmIrqManager; #[cfg(target_arch = "aarch64")] use dbs_pci::ECAM_SPACE_LENGTH; use dbs_pci::{VfioPciDevice, VENDOR_NVIDIA}; +use dbs_upcall::{DevMgrResponse, UpcallClientResponse}; use kvm_ioctls::{DeviceFd, VmFd}; +use log::error; use serde_derive::{Deserialize, Serialize}; use vfio_ioctls::{VfioContainer, VfioDevice}; use vm_memory::{ @@ -38,6 +36,12 @@ use vm_memory::{ MemoryRegionAddress, }; +use super::StartMicroVmError; +use crate::address_space_manager::{GuestAddressSpaceImpl, GuestMemoryImpl}; +use crate::config_manager::{ConfigItem, DeviceConfigInfo, DeviceConfigInfos}; +use crate::device_manager::{DeviceManagerContext, DeviceMgrError, DeviceOpContext}; +use crate::resource_manager::{ResourceError, ResourceManager}; + // The flag of whether to use the shared irq. const USE_SHARED_IRQ: bool = true; @@ -245,9 +249,35 @@ impl VfioDeviceMgr { } } - /// Insert or update a VFIO device into the manager. - pub fn insert_device(&mut self, config: HostDeviceConfig) -> Result<()> { - let _device_index = self.info_list.insert_or_update(&config)?; + /// Insert or update a VFIO device into the manager.ig)?; + pub fn insert_device( + &mut self, + ctx: &mut DeviceOpContext, + config: HostDeviceConfig, + ) -> Result<()> { + if !cfg!(feature = "hotplug") && ctx.is_hotplug { + return Err(VfioDeviceError::UpdateNotAllowedPostBoot); + } + slog::info!( + ctx.logger(), + "add VFIO device configuration"; + "subsystem" => "vfio_dev_mgr", + "hostdev_id" => &config.hostdev_id, + "bdf" => &config.dev_config.bus_slot_func, + ); + let device_index = self.info_list.insert_or_update(&config)?; + // Handle device hotplug case + if ctx.is_hotplug { + slog::info!( + ctx.logger(), + "attach VFIO device"; + "subsystem" => "vfio_dev_mgr", + "hostdev_id" => &config.hostdev_id, + "bdf" => &config.dev_config.bus_slot_func, + ); + self.add_device(ctx, &config, device_index)?; + } + Ok(()) } @@ -256,6 +286,17 @@ impl VfioDeviceMgr { &mut self, ctx: &mut DeviceOpContext, ) -> std::result::Result<(), StartMicroVmError> { + // create and attach pci root bus + #[cfg(all(feature = "hotplug", feature = "host-device"))] + if ctx.pci_hotplug_enabled { + let _ = self + .create_pci_manager( + ctx.irq_manager.clone(), + ctx.io_context.clone(), + ctx.res_manager.clone(), + ) + .map_err(StartMicroVmError::CreateVfioDevice)?; + } for (idx, info) in self.info_list.clone().iter().enumerate() { self.create_device(&info.config, ctx, idx) .map_err(StartMicroVmError::CreateVfioDevice)?; @@ -263,6 +304,85 @@ impl VfioDeviceMgr { Ok(()) } + pub fn remove_device(&mut self, ctx: &mut DeviceOpContext, hostdev_id: &str) -> Result<()> { + if !cfg!(feature = "hotplug") { + return Err(VfioDeviceError::UpdateNotAllowedPostBoot); + } + + slog::info!( + ctx.logger(), + "remove VFIO device"; + "subsystem" => "vfio_dev_mgr", + "hostdev_id" => hostdev_id, + ); + let device_index = self + .get_index_of_hostdev_id(hostdev_id) + .ok_or(VfioDeviceError::InvalidConfig)?; + let mut info = self + .info_list + .remove(device_index) + .ok_or(VfioDeviceError::InvalidConfig)?; + + self.remove_vfio_device(ctx, &mut info) + } + + /// prepare to remove device + pub fn prepare_remove_device( + &self, + ctx: &DeviceOpContext, + hostdev_id: &str, + result_sender: Sender>, + ) -> Result<()> { + if !cfg!(feature = "hotplug") { + return Err(VfioDeviceError::UpdateNotAllowedPostBoot); + } + + slog::info!( + ctx.logger(), + "prepare remove VFIO device"; + "subsystem" => "vfio_dev_mgr", + "hostdev_id" => hostdev_id, + ); + + let device_index = self + .get_index_of_hostdev_id(hostdev_id) + .ok_or(VfioDeviceError::InvalidConfig)?; + + let info = &self.info_list[device_index]; + if let Some(dev) = info.device.as_ref() { + let callback: Option> = + Some(Box::new(move |result| match result { + UpcallClientResponse::DevMgr(response) => { + if let DevMgrResponse::Other(resp) = response { + if let Err(e) = result_sender.send(Some(resp.result)) { + error!("send upcall result failed, due to {:?}!", e); + } + } + } + UpcallClientResponse::UpcallReset => { + if let Err(e) = result_sender.send(None) { + error!("send upcall result failed, due to {:?}!", e); + } + } + #[cfg(test)] + UpcallClientResponse::FakeResponse => {} + })); + ctx.remove_hotplug_pci_device(dev, callback) + .map_err(VfioDeviceError::VfioDeviceMgr)? + } + Ok(()) + } + + fn remove_vfio_device( + &mut self, + ctx: &mut DeviceOpContext, + info: &mut DeviceConfigInfo, + ) -> Result<()> { + let device = info.device.take().ok_or(VfioDeviceError::InvalidConfig)?; + self.remove_pci_vfio_device(&device, ctx)?; + Ok(()) + } + /// Start all VFIO devices. pub fn start_devices(&mut self, vm_as: &GuestAddressSpaceImpl) -> Result<()> { if self.vfio_container.is_some() { @@ -310,6 +430,26 @@ impl VfioDeviceMgr { self.info_list[idx].device = Some(device.clone()); Ok(device) } + + fn add_device( + &mut self, + ctx: &mut DeviceOpContext, + cfg: &HostDeviceConfig, + idx: usize, + ) -> Result<()> { + let dev = self.create_device(cfg, ctx, idx)?; + if self.locked_vm_size == 0 && self.vfio_container.is_some() { + let vm_as = ctx + .get_vm_as() + .map_err(|_| VfioDeviceError::InternalError)?; + let vm_memory = vm_as.memory(); + + self.register_memory(vm_memory.deref())?; + } + ctx.insert_hotplug_pci_device(&dev, None) + .map_err(VfioDeviceError::VfioDeviceMgr) + } + /// Gets the index of the device with the specified `hostdev_id` if it exists in the list. fn get_index_of_hostdev_id(&self, id: &str) -> Option { self.info_list @@ -379,12 +519,14 @@ impl VfioDeviceMgr { self.locked_vm_size -= size; Ok(()) } + pub(crate) fn update_memory(&mut self, region: &GuestRegionMmap) -> Result<()> { if self.locked_vm_size != 0 { self.register_memory_region(region)?; } Ok(()) } + pub(crate) fn build_sysfs_path(cfg: &HostDeviceConfig) -> Result { if cfg.sysfs_path.is_empty() { let (bdf, domain) = ( @@ -499,6 +641,52 @@ impl VfioDeviceMgr { Ok(vfio_pci_device) } + fn remove_pci_vfio_device( + &mut self, + device: &Arc, + ctx: &mut DeviceOpContext, + ) -> Result<()> { + // safe to unwrap because type is decided + let vfio_pci_device = device + .as_any() + .downcast_ref::>() + .unwrap(); + + let device_id = vfio_pci_device.device_id() as u32; + + // safe to unwrap because pci vfio manager is already created + let _ = self + .pci_vfio_manager + .as_mut() + .unwrap() + .free_device_id(device_id) + .ok_or(VfioDeviceError::InvalidDeviceID(device_id))?; + + let resources = vfio_pci_device.get_assigned_resources(); + let vendor_id = vfio_pci_device.vendor_id(); + let filtered_resources = if vendor_id == VENDOR_NVIDIA { + let mut filtered_resources = DeviceResources::new(); + for resource in resources.get_all_resources() { + if let Resource::LegacyIrq(_) = resource { + continue; + } else { + filtered_resources.append(resource.clone()) + } + } + filtered_resources + } else { + resources + }; + + ctx.res_manager.free_device_resources(&filtered_resources); + + vfio_pci_device + .clear_device() + .map_err(VfioDeviceError::VfioPciError)?; + + Ok(()) + } + pub(crate) fn create_pci_manager( &mut self, irq_manager: Arc, @@ -516,7 +704,7 @@ impl VfioDeviceMgr { } Ok(self.pci_vfio_manager.as_mut().unwrap()) } - + /// Get the PCI manager to support PCI device passthrough pub fn get_pci_manager(&mut self) -> Option<&mut Arc> { self.pci_vfio_manager.as_mut() diff --git a/src/dragonball/src/vm/mod.rs b/src/dragonball/src/vm/mod.rs index ced8e3e98..22e3619de 100644 --- a/src/dragonball/src/vm/mod.rs +++ b/src/dragonball/src/vm/mod.rs @@ -138,6 +138,9 @@ pub struct VmConfigInfo { /// sock path pub serial_path: Option, + + /// Enable PCI device hotplug or not + pub pci_hotplug_enabled: bool, } impl Default for VmConfigInfo { @@ -157,6 +160,7 @@ impl Default for VmConfigInfo { mem_file_path: String::from(""), mem_size_mib: 128, serial_path: None, + pci_hotplug_enabled: false, } } } @@ -182,7 +186,7 @@ pub struct Vm { shared_info: Arc>, address_space: AddressSpaceMgr, - device_manager: DeviceManager, + pub device_manager: DeviceManager, dmesg_fifo: Option>, kernel_config: Option, logger: slog::Logger, @@ -928,6 +932,7 @@ pub mod tests { sockets: 1, }, vpmu_feature: 0, + pci_hotplug_enabled: false, }; let mut vm = create_vm_instance(); @@ -960,6 +965,7 @@ pub mod tests { sockets: 1, }, vpmu_feature: 0, + pci_hotplug_enabled: false, }; vm.set_vm_config(vm_config); assert!(vm.init_guest_memory().is_ok()); @@ -1008,6 +1014,7 @@ pub mod tests { sockets: 1, }, vpmu_feature: 0, + pci_hotplug_enabled: false, }; vm.set_vm_config(vm_config); @@ -1084,6 +1091,7 @@ pub mod tests { sockets: 1, }, vpmu_feature: 0, + pci_hotplug_enabled: false, }; vm.set_vm_config(vm_config); From f9e0a4bd7ef6e04be53fadc6f7ca48a739208908 Mon Sep 17 00:00:00 2001 From: Chao Wu Date: Wed, 27 Dec 2023 15:11:40 +0800 Subject: [PATCH 3/4] upcall: introduce pci device add & del kernel patch add pci add and del guest kernel patch as the extension in the upcall device manager server side. also, dump config version to 120 since we need to add config for dragonball pci in upcall fixes: #8741 Signed-off-by: Gerry Liu Signed-off-by: Helin Guo Signed-off-by: Chao Wu --- .../dragonball-experimental/upcall.conf | 1 + tools/packaging/kernel/kata_config_version | 2 +- ...l-add-pci-hotplug-hot-unplug-support.patch | 173 ++++++++++++++++++ 3 files changed, 175 insertions(+), 1 deletion(-) create mode 100644 tools/packaging/kernel/patches/5.10.x/dragonball-experimental/0008-upcall-add-pci-hotplug-hot-unplug-support.patch diff --git a/tools/packaging/kernel/configs/fragments/build-type/dragonball-experimental/upcall.conf b/tools/packaging/kernel/configs/fragments/build-type/dragonball-experimental/upcall.conf index 75596e687..a7e608b14 100644 --- a/tools/packaging/kernel/configs/fragments/build-type/dragonball-experimental/upcall.conf +++ b/tools/packaging/kernel/configs/fragments/build-type/dragonball-experimental/upcall.conf @@ -4,3 +4,4 @@ CONFIG_DRAGONBALL_UPCALL_SRV=y CONFIG_DRAGONBALL_DEVICE_MANAGER=y CONFIG_DRAGONBALL_HOTPLUG_VIRTIO_MMIO=y CONFIG_DRAGONBALL_HOTPLUG_CPU=y +CONFIG_DRAGONBALL_HOTPLUG_PCI=y diff --git a/tools/packaging/kernel/kata_config_version b/tools/packaging/kernel/kata_config_version index 078fa0fe5..52bd8e43a 100644 --- a/tools/packaging/kernel/kata_config_version +++ b/tools/packaging/kernel/kata_config_version @@ -1 +1 @@ -119 +120 diff --git a/tools/packaging/kernel/patches/5.10.x/dragonball-experimental/0008-upcall-add-pci-hotplug-hot-unplug-support.patch b/tools/packaging/kernel/patches/5.10.x/dragonball-experimental/0008-upcall-add-pci-hotplug-hot-unplug-support.patch new file mode 100644 index 000000000..d4171e64a --- /dev/null +++ b/tools/packaging/kernel/patches/5.10.x/dragonball-experimental/0008-upcall-add-pci-hotplug-hot-unplug-support.patch @@ -0,0 +1,173 @@ +From 4ed40d8ce3793129ba9c0b7b663a5e137aceb70c Mon Sep 17 00:00:00 2001 +From: Chao Wu +Date: Wed, 27 Dec 2023 14:43:47 +0800 +Subject: [PATCH] upcall: add pci hotplug / hot-unplug support + +add two new upcall functions add_pci_dev and del_pci_dev, mainly for hotplugging +and hot-unplugging pci device in the guest kernel through the upcall server. + +Users could implement upcall client side with add_pci or del_pci command and trigger +those commands in the hypervisor side. + +As always, Dragonball hypervisor will implement the client side to do pci hotplug and +hot-unplug as an example + +Signed-off-by: Gerry Liu +Signed-off-by: Helin Guo +Signed-off-by: Chao Wu +--- + drivers/misc/dragonball/upcall_srv/Kconfig | 11 +++ + .../upcall_srv/dragonball_device_manager.c | 90 +++++++++++++++++++ + 2 files changed, 101 insertions(+) + +diff --git a/drivers/misc/dragonball/upcall_srv/Kconfig b/drivers/misc/dragonball/upcall_srv/Kconfig +index fc83f03c2edd..19a6ca957ea6 100644 +--- a/drivers/misc/dragonball/upcall_srv/Kconfig ++++ b/drivers/misc/dragonball/upcall_srv/Kconfig +@@ -47,3 +47,14 @@ config DRAGONBALL_HOTPLUG_CPU + structure with command and parameter to hot-pluging an vCPU. + + If unsure, say N. ++ ++config DRAGONBALL_HOTPLUG_PCI ++ bool "PCI hotplug/hotunplug support" ++ depends on DRAGONBALL_DEVICE_MANAGER ++ default y ++ help ++ This configure implements a PCI hotplug/hotunplug support, vmm ++ should send hotplug request by vsock which follow special data ++ structure with command and parameter to hot-pluging a PCI device. ++ ++ If unsure, say N. +diff --git a/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c b/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c +index 088d38623b8d..3544afefa2a9 100644 +--- a/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c ++++ b/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c +@@ -22,6 +22,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -90,6 +91,12 @@ struct devmgr_req { + uint8_t apic_ids[256]; + #endif + } cpu_dev_info; ++#endif ++#if defined(CONFIG_DRAGONBALL_HOTPLUG_PCI) ++ struct { ++ uint8_t busno; ++ uint8_t devfn; ++ } pci_dev_info; + #endif + } msg_load; + }; +@@ -117,6 +124,9 @@ struct devmgr_reply { + #endif + #if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) + struct cpu_dev_reply_info cpu_dev_info; ++#endif ++#if defined(CONFIG_DRAGONBALL_HOTPLUG_PCI) ++ struct {} pci_dev_info; + #endif + } msg_load; + }; +@@ -286,6 +296,82 @@ static int del_mmio_dev(struct devmgr_req *req, + } + #endif + ++#if defined(CONFIG_DRAGONBALL_HOTPLUG_PCI) ++static int add_pci_dev(struct devmgr_req *req, ++ struct devmgr_reply *rep) ++{ ++ int ret = 0; ++ struct devmgr_msg_header *rep_mh = &rep->msg_header; ++ uint8_t busno = req->msg_load.pci_dev_info.busno; ++ uint8_t devfn = req->msg_load.pci_dev_info.devfn; ++ struct pci_bus *bus; ++ struct pci_dev *dev; ++ ++ pr_info("add pci device of busno: %02x, devfn: %02x\n", busno, devfn); ++ ++ pci_lock_rescan_remove(); ++ ++ /* It is similar to pci_rescan_bus */ ++ ++ bus = pci_find_bus(0, busno); ++ if (!bus) { ++ pr_err("Could not find PCI bus for busno %02x\n", busno); ++ ret = -ENODEV; ++ goto out; ++ } ++ ++ pci_scan_slot(bus, devfn); ++ dev = pci_get_slot(bus, devfn); ++ if (!dev) { ++ pr_err("Could not find PCI device for slot %02x\n", devfn); ++ ret = -ENODEV; ++ goto out; ++ } ++ ++ pci_bus_claim_resources(bus); ++ ++ pci_bus_add_devices(bus); ++ ++ pci_dev_put(dev); ++ ++out: ++ pci_unlock_rescan_remove(); ++ if (!ret) ++ _fill_msg_header(rep_mh, 0, ADD_PCI, 0); ++ return ret; ++} ++ ++static int del_pci_dev(struct devmgr_req *req, ++ struct devmgr_reply *rep) ++{ ++ int ret = 0; ++ struct devmgr_msg_header *rep_mh = &rep->msg_header; ++ uint8_t busno = req->msg_load.pci_dev_info.busno; ++ uint8_t devfn = req->msg_load.pci_dev_info.devfn; ++ struct pci_dev *dev; ++ ++ pr_info("remove pci device of busno: %02x, devfn: %02x\n", busno, devfn); ++ ++ pci_lock_rescan_remove(); ++ ++ dev = pci_get_domain_bus_and_slot(0, busno, devfn); ++ ++ if (!dev) { ++ pr_err("Could not find PCI device for slot %02x\n", devfn); ++ ret = -ENODEV; ++ goto out; ++ } ++ ++ pci_stop_and_remove_bus_device(dev); ++ ++ pci_dev_put(dev); ++out: ++ pci_unlock_rescan_remove(); ++ if (!ret) ++ _fill_msg_header(rep_mh, 0, DEL_PCI, 0); ++ return ret; ++} ++#endif + + #if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) + #if defined(CONFIG_X86_64) +@@ -522,6 +608,10 @@ static struct { + {ADD_CPU, add_cpu_dev}, + {DEL_CPU, del_cpu_dev}, + #endif ++#if defined(CONFIG_DRAGONBALL_HOTPLUG_PCI) ++ {ADD_PCI, add_pci_dev}, ++ {DEL_PCI, del_pci_dev}, ++#endif + }; + + static action_route_t get_action(struct devmgr_req *req) +-- +2.31.1 + From 71c322c293b38a814013ab819bc5863f5d24e38b Mon Sep 17 00:00:00 2001 From: Chao Wu Date: Thu, 28 Dec 2023 21:50:03 +0800 Subject: [PATCH 4/4] runtime-rs: fix ci complains vfio commits introduce quite a lot change in runtime-rs, this commit is for all the changes related to ci, including compilation errors and so on. Signed-off-by: Chao Wu --- src/agent/Cargo.lock | 322 +++++++++++++++++- src/dragonball/src/api/v1/vmm_action.rs | 4 +- .../src/dbs_device/src/resources.rs | 2 +- src/dragonball/src/dbs_pci/src/lib.rs | 2 + src/dragonball/src/dbs_pci/src/vfio.rs | 2 +- src/dragonball/src/device_manager/mod.rs | 19 ++ .../src/device_manager/vfio_dev_mgr/mod.rs | 18 +- src/dragonball/src/test_utils.rs | 1 + src/dragonball/src/vcpu/vcpu_manager.rs | 2 + src/dragonball/src/vm/mod.rs | 1 + src/libs/kata-types/src/annotations/mod.rs | 2 +- src/runtime-rs/Cargo.lock | 52 +++ src/runtime-rs/crates/hypervisor/Cargo.toml | 2 +- 13 files changed, 408 insertions(+), 21 deletions(-) diff --git a/src/agent/Cargo.lock b/src/agent/Cargo.lock index 4b6069409..e4b9899cb 100644 --- a/src/agent/Cargo.lock +++ b/src/agent/Cargo.lock @@ -8,6 +8,17 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "ahash" +version = "0.7.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a824f2aa7e75a0c98c5a504fceb80649e9c35265d44525b5f94de4771a395cd" +dependencies = [ + "getrandom", + "once_cell", + "version_check", +] + [[package]] name = "aho-corasick" version = "1.0.5" @@ -47,6 +58,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c5d78ce20460b82d3fa150275ed9d55e21064fc7951177baacf86a145c4a4b1f" +[[package]] +name = "arrayvec" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" + [[package]] name = "async-broadcast" version = "0.5.1" @@ -246,6 +263,18 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "bitvec" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -270,6 +299,51 @@ dependencies = [ "log", ] +[[package]] +name = "borsh" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4114279215a005bc675e386011e594e1d9b800918cea18fcadadcce864a2046b" +dependencies = [ + "borsh-derive", + "hashbrown", +] + +[[package]] +name = "borsh-derive" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0754613691538d51f329cce9af41d7b7ca150bc973056f1156611489475f54f7" +dependencies = [ + "borsh-derive-internal", + "borsh-schema-derive-internal", + "proc-macro-crate 0.1.5", + "proc-macro2", + "syn 1.0.109", +] + +[[package]] +name = "borsh-derive-internal" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "afb438156919598d2c7bad7e1c0adf3d26ed3840dbc010db1a882a65583ca2fb" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "borsh-schema-derive-internal" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "634205cc43f74a1b9046ef87c4540ebda95696ec0f315024860cad7c5b0f5ccd" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "bumpalo" version = "3.10.0" @@ -278,9 +352,36 @@ checksum = "37ccbd214614c6783386c1af30caf03192f17891059cecc394b4fb119e363de3" [[package]] name = "byte-unit" -version = "3.1.4" +version = "5.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "415301c9de11005d4b92193c0eb7ac7adc37e5a49e0ac9bed0a42343512744b8" +checksum = "d405b41420a161b4e1dd5a52e3349f41b4dae9a39be02aff1d67fe53256430ac" +dependencies = [ + "rust_decimal", + "serde", + "utf8-width", +] + +[[package]] +name = "bytecheck" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b6372023ac861f6e6dc89c8344a8f398fb42aaba2b5dbc649ca0c0e9dbcb627" +dependencies = [ + "bytecheck_derive", + "ptr_meta", + "simdutf8", +] + +[[package]] +name = "bytecheck_derive" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7ec4c6f261935ad534c0c22dbef2201b45918860eb1c574b972bd213a76af61" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] [[package]] name = "byteorder" @@ -489,10 +590,32 @@ dependencies = [ ] [[package]] -name = "crossbeam-utils" -version = "0.8.16" +name = "crossbeam-deque" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294" +checksum = "fca89a0e215bab21874660c67903c5f143333cab1da83d041c7ded6053774751" +dependencies = [ + "cfg-if 1.0.0", + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e3681d554572a651dda4186cd47240627c3d0114d45a95f6ad27f2f22e7548d" +dependencies = [ + "autocfg", + "cfg-if 1.0.0", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3a430a770ebd84726f584a90ee7f020d28db52c6d02138900f22341f866d39c" dependencies = [ "cfg-if 1.0.0", ] @@ -735,6 +858,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + [[package]] name = "futures" version = "0.3.21" @@ -890,6 +1019,9 @@ name = "hashbrown" version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db0d4cf898abf0081f964436dc980e96670a0f36863e4b83aaacdb65c9d7ccc3" +dependencies = [ + "ahash", +] [[package]] name = "heck" @@ -1229,6 +1361,7 @@ dependencies = [ "serde_json", "slog", "slog-scope", + "sysinfo", "thiserror", "toml", ] @@ -1241,9 +1374,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.139" +version = "0.2.151" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79" +checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4" [[package]] name = "libseccomp" @@ -1519,6 +1652,15 @@ dependencies = [ "memoffset 0.7.1", ] +[[package]] +name = "ntapi" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4" +dependencies = [ + "winapi", +] + [[package]] name = "num-integer" version = "0.1.45" @@ -1816,6 +1958,15 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" +[[package]] +name = "proc-macro-crate" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d6ea3c4595b96363c13943497db34af4460fb474a95c43f4446ad341b8c9785" +dependencies = [ + "toml", +] + [[package]] name = "proc-macro-crate" version = "1.2.1" @@ -2022,6 +2173,26 @@ dependencies = [ "ttrpc-codegen", ] +[[package]] +name = "ptr_meta" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1" +dependencies = [ + "ptr_meta_derive", +] + +[[package]] +name = "ptr_meta_derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "quote" version = "1.0.27" @@ -2031,6 +2202,12 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + [[package]] name = "rand" version = "0.8.5" @@ -2061,6 +2238,26 @@ dependencies = [ "getrandom", ] +[[package]] +name = "rayon" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c27db03db7734835b3f53954b534c91069375ce6ccaa2e065441e07d9b6cdb1" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "redox_syscall" version = "0.2.13" @@ -2134,6 +2331,15 @@ dependencies = [ "winapi", ] +[[package]] +name = "rend" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2571463863a6bd50c32f94402933f03457a3fbaf697a707c5be741e459f08fd" +dependencies = [ + "bytecheck", +] + [[package]] name = "reqwest" version = "0.11.18" @@ -2171,6 +2377,34 @@ dependencies = [ "winreg", ] +[[package]] +name = "rkyv" +version = "0.7.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0200c8230b013893c0b2d6213d6ec64ed2b9be2e0e016682b7224ff82cff5c58" +dependencies = [ + "bitvec", + "bytecheck", + "hashbrown", + "ptr_meta", + "rend", + "rkyv_derive", + "seahash", + "tinyvec", + "uuid", +] + +[[package]] +name = "rkyv_derive" +version = "0.7.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5c462a1328c8e67e4d6dbad1eb0355dd43e8ab432c6e227a43657f16ade5033" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "rlimit" version = "0.5.4" @@ -2195,6 +2429,22 @@ dependencies = [ "tokio", ] +[[package]] +name = "rust_decimal" +version = "1.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4c4216490d5a413bc6d10fa4742bd7d4955941d062c0ef873141d6b0e7b30fd" +dependencies = [ + "arrayvec", + "borsh", + "bytes 1.1.0", + "num-traits", + "rand", + "rkyv", + "serde", + "serde_json", +] + [[package]] name = "rustix" version = "0.37.3" @@ -2291,6 +2541,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +[[package]] +name = "seahash" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" + [[package]] name = "security-framework" version = "2.9.2" @@ -2459,6 +2715,12 @@ dependencies = [ "libc", ] +[[package]] +name = "simdutf8" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a" + [[package]] name = "slab" version = "0.4.6" @@ -2596,12 +2858,33 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "sysinfo" +version = "0.29.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd727fc423c2060f6c92d9534cef765c65a6ed3f428a03d7def74a8c4348e666" +dependencies = [ + "cfg-if 1.0.0", + "core-foundation-sys", + "libc", + "ntapi", + "once_cell", + "rayon", + "winapi", +] + [[package]] name = "take_mut" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f764005d11ee5f36500a149ace24e00e3da98b0158b3e2d53a7495660d3f4d60" +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + [[package]] name = "tempfile" version = "3.3.0" @@ -3024,6 +3307,18 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "utf8-width" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86bd8d4e895da8537e5315b8254664e6b769c4ff3db18321b297a1e7004392e3" + +[[package]] +name = "uuid" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e395fcf16a7a3d8127ec99782007af141946b4795001f876d54fb0d55978560" + [[package]] name = "valuable" version = "0.1.0" @@ -3407,6 +3702,15 @@ dependencies = [ "winapi", ] +[[package]] +name = "wyz" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" +dependencies = [ + "tap", +] + [[package]] name = "xattr" version = "0.2.3" @@ -3473,7 +3777,7 @@ version = "3.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41d1794a946878c0e807f55a397187c11fc7a038ba5d868e7db4f3bd7760bc9d" dependencies = [ - "proc-macro-crate", + "proc-macro-crate 1.2.1", "proc-macro2", "quote", "regex", @@ -3512,7 +3816,7 @@ version = "3.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "934d7a7dfc310d6ee06c87ffe88ef4eca7d3e37bb251dece2ef93da8f17d8ecd" dependencies = [ - "proc-macro-crate", + "proc-macro-crate 1.2.1", "proc-macro2", "quote", "syn 1.0.109", diff --git a/src/dragonball/src/api/v1/vmm_action.rs b/src/dragonball/src/api/v1/vmm_action.rs index 5791cb8b9..586b998cb 100644 --- a/src/dragonball/src/api/v1/vmm_action.rs +++ b/src/dragonball/src/api/v1/vmm_action.rs @@ -38,7 +38,7 @@ pub use crate::device_manager::fs_dev_mgr::{ #[cfg(feature = "virtio-mem")] pub use crate::device_manager::mem_dev_mgr::{MemDeviceConfigInfo, MemDeviceError}; #[cfg(feature = "host-device")] -use crate::device_manager::vfio_dev_mgr::{HostDeviceConfig, VfioDeviceError, VfioDeviceHostInfo}; +use crate::device_manager::vfio_dev_mgr::{HostDeviceConfig, VfioDeviceError}; #[cfg(feature = "vhost-net")] pub use crate::device_manager::vhost_net_dev_mgr::{ VhostNetDeviceConfigInfo, VhostNetDeviceError, VhostNetDeviceMgr, @@ -871,7 +871,7 @@ impl VmmService { .vfio_manager .lock() .unwrap() - .insert_device(&mut ctx, config.into()) + .insert_device(&mut ctx, config) .map_err(VmmActionError::HostDeviceConfig)?; Ok(VmmData::Empty) } diff --git a/src/dragonball/src/dbs_device/src/resources.rs b/src/dragonball/src/dbs_device/src/resources.rs index 4f5290745..b834c16c3 100644 --- a/src/dragonball/src/dbs_device/src/resources.rs +++ b/src/dragonball/src/dbs_device/src/resources.rs @@ -403,7 +403,7 @@ pub(crate) mod tests { resource.append(entry.clone()); assert_eq!(entry, resource[6]); - let entry = Resource::MacAddresss(MAC_ADDRESS.to_string()); + let entry = Resource::MacAddress(MAC_ADDRESS.to_string()); resource.append(entry.clone()); assert_eq!(entry, resource[7]); diff --git a/src/dragonball/src/dbs_pci/src/lib.rs b/src/dragonball/src/dbs_pci/src/lib.rs index 7a0119f3c..625682c1f 100644 --- a/src/dragonball/src/dbs_pci/src/lib.rs +++ b/src/dragonball/src/dbs_pci/src/lib.rs @@ -40,6 +40,8 @@ pub use configuration::{ mod device; pub use device::PciDevice; +#[cfg(target_arch = "aarch64")] +pub use device::{PciBusResources, ECAM_SPACE_LENGTH}; mod root_bus; pub use root_bus::create_pci_root_bus; diff --git a/src/dragonball/src/dbs_pci/src/vfio.rs b/src/dragonball/src/dbs_pci/src/vfio.rs index 76284b1f0..c828c4ccb 100644 --- a/src/dragonball/src/dbs_pci/src/vfio.rs +++ b/src/dragonball/src/dbs_pci/src/vfio.rs @@ -229,7 +229,7 @@ impl Interrupt { fn get_irq_pin(&self) -> u32 { if let Some(legacy_irq) = self.legacy_irq { - (PciInterruptPin::IntA as u32) << 8 | self.legacy_irq.unwrap() + (PciInterruptPin::IntA as u32) << 8 | legacy_irq } else { 0 } diff --git a/src/dragonball/src/device_manager/mod.rs b/src/dragonball/src/device_manager/mod.rs index e3a964122..d50294471 100644 --- a/src/dragonball/src/device_manager/mod.rs +++ b/src/dragonball/src/device_manager/mod.rs @@ -20,6 +20,8 @@ use dbs_device::resources::Resource; use dbs_device::DeviceIo; use dbs_interrupt::KvmIrqManager; use dbs_legacy_devices::ConsoleHandler; +#[cfg(all(feature = "host-device", target_arch = "aarch64"))] +use dbs_pci::PciBusResources; use dbs_utils::epoll_manager::EpollManager; use kvm_ioctls::VmFd; @@ -1030,6 +1032,23 @@ impl DeviceManager { Err(DeviceMgrError::GetDeviceResource) } + + /// Get pci bus resources for creating fdt. + #[cfg(feature = "host-device")] + pub fn get_pci_bus_resources(&self) -> Option { + let mut vfio_dev_mgr = self.vfio_manager.lock().unwrap(); + let vfio_pci_mgr = vfio_dev_mgr.get_pci_manager(); + if vfio_pci_mgr.is_none() { + return None; + } + let pci_manager = vfio_pci_mgr.unwrap(); + let ecam_space = pci_manager.get_ecam_space(); + let bar_space = pci_manager.get_bar_space(); + Some(PciBusResources { + ecam_space, + bar_space, + }) + } } #[cfg(feature = "dbs-virtio-devices")] diff --git a/src/dragonball/src/device_manager/vfio_dev_mgr/mod.rs b/src/dragonball/src/device_manager/vfio_dev_mgr/mod.rs index cb5e7dbdd..c3c3b6bac 100644 --- a/src/dragonball/src/device_manager/vfio_dev_mgr/mod.rs +++ b/src/dragonball/src/device_manager/vfio_dev_mgr/mod.rs @@ -23,12 +23,10 @@ use dbs_device::resources::Resource::LegacyIrq; use dbs_device::resources::{DeviceResources, Resource, ResourceConstraint}; use dbs_device::DeviceIo; use dbs_interrupt::KvmIrqManager; -#[cfg(target_arch = "aarch64")] -use dbs_pci::ECAM_SPACE_LENGTH; use dbs_pci::{VfioPciDevice, VENDOR_NVIDIA}; use dbs_upcall::{DevMgrResponse, UpcallClientResponse}; use kvm_ioctls::{DeviceFd, VmFd}; -use log::error; +use log::{debug, error}; use serde_derive::{Deserialize, Serialize}; use vfio_ioctls::{VfioContainer, VfioDevice}; use vm_memory::{ @@ -116,6 +114,10 @@ pub enum VfioDeviceError { #[error("failure while allocate device resource: {0:?}")] AllocateDeviceResource(#[source] ResourceError), + /// Failed to free device resource + #[error("failure while freeing device resource: {0:?}")] + FreeDeviceResource(#[source] ResourceError), + /// Vfio container not found #[error("vfio container not found")] VfioContainerNotFound, @@ -364,8 +366,10 @@ impl VfioDeviceMgr { error!("send upcall result failed, due to {:?}!", e); } } - #[cfg(test)] - UpcallClientResponse::FakeResponse => {} + #[allow(unreachable_patterns)] + _ => { + debug!("this arm should only be triggered under test"); + } })); ctx.remove_hotplug_pci_device(dev, callback) .map_err(VfioDeviceError::VfioDeviceMgr)? @@ -678,7 +682,9 @@ impl VfioDeviceMgr { resources }; - ctx.res_manager.free_device_resources(&filtered_resources); + ctx.res_manager + .free_device_resources(&filtered_resources) + .map_err(VfioDeviceError::FreeDeviceResource)?; vfio_pci_device .clear_device() diff --git a/src/dragonball/src/test_utils.rs b/src/dragonball/src/test_utils.rs index dec006f43..58612cbaf 100644 --- a/src/dragonball/src/test_utils.rs +++ b/src/dragonball/src/test_utils.rs @@ -39,6 +39,7 @@ pub mod tests { sockets: 1, }, vpmu_feature: 0, + pci_hotplug_enabled: false, }; vm.set_vm_config(vm_config); vm.init_guest_memory().unwrap(); diff --git a/src/dragonball/src/vcpu/vcpu_manager.rs b/src/dragonball/src/vcpu/vcpu_manager.rs index 45d0541f4..a7164eb03 100644 --- a/src/dragonball/src/vcpu/vcpu_manager.rs +++ b/src/dragonball/src/vcpu/vcpu_manager.rs @@ -1133,6 +1133,7 @@ mod tests { sockets: 1, }, vpmu_feature: 0, + pci_hotplug_enabled: false, }; vm.set_vm_config(vm_config); vm.init_guest_memory().unwrap(); @@ -1181,6 +1182,7 @@ mod tests { sockets: 1, }, vpmu_feature: 0, + pci_hotplug_enabled: false, }; vm.set_vm_config(vm_config.clone()); vm.init_guest_memory().unwrap(); diff --git a/src/dragonball/src/vm/mod.rs b/src/dragonball/src/vm/mod.rs index 22e3619de..2ac1cc39d 100644 --- a/src/dragonball/src/vm/mod.rs +++ b/src/dragonball/src/vm/mod.rs @@ -186,6 +186,7 @@ pub struct Vm { shared_info: Arc>, address_space: AddressSpaceMgr, + /// device manager for Dragonball pub device_manager: DeviceManager, dmesg_fifo: Option>, kernel_config: Option, diff --git a/src/libs/kata-types/src/annotations/mod.rs b/src/libs/kata-types/src/annotations/mod.rs index fd316c2e2..4024ce2cb 100644 --- a/src/libs/kata-types/src/annotations/mod.rs +++ b/src/libs/kata-types/src/annotations/mod.rs @@ -705,7 +705,7 @@ impl Annotation { } // Hypervisor Memory related annotations KATA_ANNO_CFG_HYPERVISOR_DEFAULT_MEMORY => { - match byte_unit::Byte::parse_str(value,true) { + match byte_unit::Byte::parse_str(value, true) { Ok(mem_bytes) => { let memory_size = mem_bytes .get_adjusted_unit(byte_unit::Unit::MiB) diff --git a/src/runtime-rs/Cargo.lock b/src/runtime-rs/Cargo.lock index 99c38e33c..b593214b3 100644 --- a/src/runtime-rs/Cargo.lock +++ b/src/runtime-rs/Cargo.lock @@ -775,6 +775,26 @@ dependencies = [ "vmm-sys-util 0.11.1", ] +[[package]] +name = "dbs-pci" +version = "0.1.0" +dependencies = [ + "byteorder", + "dbs-allocator", + "dbs-boot", + "dbs-device", + "dbs-interrupt", + "downcast-rs", + "kvm-bindings", + "kvm-ioctls", + "libc", + "log", + "thiserror", + "vfio-bindings", + "vfio-ioctls", + "vm-memory", +] + [[package]] name = "dbs-upcall" version = "0.3.0" @@ -906,6 +926,12 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0688c2a7f92e427f44895cd63841bff7b29f8d7a1648b9e7e07a4a365b2e1257" +[[package]] +name = "downcast-rs" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ea835d29036a4087793836fa931b08837ad5e957da9e23886b29586fb9b6650" + [[package]] name = "dragonball" version = "0.1.0" @@ -921,6 +947,7 @@ dependencies = [ "dbs-device", "dbs-interrupt", "dbs-legacy-devices", + "dbs-pci", "dbs-upcall", "dbs-utils", "dbs-virtio-devices", @@ -943,6 +970,8 @@ dependencies = [ "slog-scope", "thiserror", "tracing", + "vfio-bindings", + "vfio-ioctls", "virtio-queue", "vm-memory", "vmm-sys-util 0.11.1", @@ -4147,6 +4176,29 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "vfio-bindings" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43449b404c488f70507dca193debd4bea361fe8089869b947adc19720e464bce" + +[[package]] +name = "vfio-ioctls" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "068bac78842164a8ecc1d1a84a8d8a9168ab29fa3c96942689e286a30ae22ac4" +dependencies = [ + "byteorder", + "kvm-bindings", + "kvm-ioctls", + "libc", + "log", + "thiserror", + "vfio-bindings", + "vm-memory", + "vmm-sys-util 0.11.1", +] + [[package]] name = "vhost" version = "0.6.1" diff --git a/src/runtime-rs/crates/hypervisor/Cargo.toml b/src/runtime-rs/crates/hypervisor/Cargo.toml index a8520b15b..ef47fe191 100644 --- a/src/runtime-rs/crates/hypervisor/Cargo.toml +++ b/src/runtime-rs/crates/hypervisor/Cargo.toml @@ -35,7 +35,7 @@ kata-types = { path = "../../../libs/kata-types" } logging = { path = "../../../libs/logging" } shim-interface = { path = "../../../libs/shim-interface" } -dragonball = { path = "../../../dragonball", features = ["atomic-guest-memory", "virtio-vsock", "hotplug", "virtio-blk", "virtio-net", "virtio-fs", "vhost-net", "dbs-upcall","virtio-mem", "virtio-balloon", "vhost-user-net"] } +dragonball = { path = "../../../dragonball", features = ["atomic-guest-memory", "virtio-vsock", "hotplug", "virtio-blk", "virtio-net", "virtio-fs", "vhost-net", "dbs-upcall", "virtio-mem", "virtio-balloon", "vhost-user-net", "host-device"] } ch-config = { path = "ch-config", optional = true } tests_utils = { path = "../../tests/utils" }