CCv0: Merge main into CCv0 branch

Merge remote-tracking branch 'upstream/main' into CCv0

Fixes: #5932
Signed-off-by: Megan Wright <megan.wright@ibm.com>
This commit is contained in:
Megan Wright
2022-12-20 09:34:27 +00:00
52 changed files with 2246 additions and 167 deletions

1
.gitignore vendored
View File

@@ -13,4 +13,3 @@ src/agent/protocols/src/*.rs
!src/agent/protocols/src/lib.rs
build
src/tools/log-parser/kata-log-parser

View File

@@ -1 +1 @@
3.1.0-alpha0
3.1.0-alpha1

View File

@@ -481,7 +481,7 @@ When using the file system type virtio-fs (default), `virtiofsd` is required
```bash
$ pushd kata-containers/tools/packaging/static-build/virtiofsd
$ ./build-static-virtiofsd.sh
$ ./build.sh
$ popd
```

View File

@@ -257,6 +257,48 @@ This launches a BusyBox container named `hello`, and it will be removed by `--rm
The `--cni` flag enables CNI networking for the container. Without this flag, a container with just a
loopback interface is created.
### Launch containers using `ctr` command line with rootfs bundle
#### Get rootfs
Use the script to create rootfs
```bash
ctr i pull quay.io/prometheus/busybox:latest
ctr i export rootfs.tar quay.io/prometheus/busybox:latest
rootfs_tar=rootfs.tar
bundle_dir="./bundle"
mkdir -p "${bundle_dir}"
# extract busybox rootfs
rootfs_dir="${bundle_dir}/rootfs"
mkdir -p "${rootfs_dir}"
layers_dir="$(mktemp -d)"
tar -C "${layers_dir}" -pxf "${rootfs_tar}"
for ((i=0;i<$(cat ${layers_dir}/manifest.json | jq -r ".[].Layers | length");i++)); do
tar -C ${rootfs_dir} -xf ${layers_dir}/$(cat ${layers_dir}/manifest.json | jq -r ".[].Layers[${i}]")
done
```
#### Get `config.json`
Use runc spec to generate `config.json`
```bash
cd ./bundle/rootfs
runc spec
mv config.json ../
```
Change the root `path` in `config.json` to the absolute path of rootfs
```JSON
"root":{
"path":"/root/test/bundle/rootfs",
"readonly": false
},
```
#### Run container
```bash
sudo ctr run -d --runtime io.containerd.run.kata.v2 --config bundle/config.json hello
sudo ctr t exec --exec-id ${ID} -t hello sh
```
### Launch Pods with `crictl` command line
With the `crictl` command line of `cri-tools`, you can specify runtime class with `-r` or `--runtime` flag.

View File

@@ -197,11 +197,6 @@ vhost_user_store_path = "<Path of the base directory for vhost-user device>"
> under `[hypervisor.qemu]` section.
For the subdirectories of `vhost_user_store_path`: `block` is used for block
device; `block/sockets` is where we expect UNIX domain sockets for vhost-user
block devices to live; `block/devices` is where simulated block device nodes
for vhost-user block devices are created.
For the subdirectories of `vhost_user_store_path`:
- `block` is used for block device;
- `block/sockets` is where we expect UNIX domain sockets for vhost-user

View File

@@ -406,19 +406,10 @@ impl VmmService {
}
config.vpmu_feature = machine_config.vpmu_feature;
let vm_id = vm.shared_info().read().unwrap().id.clone();
let serial_path = match machine_config.serial_path {
Some(value) => value,
None => {
if config.serial_path.is_none() {
String::from("/run/dragonball/") + &vm_id + "_com1"
} else {
// Safe to unwrap() because we have checked it has a value.
config.serial_path.as_ref().unwrap().clone()
}
}
};
config.serial_path = Some(serial_path);
// If serial_path is:
// - None, legacy_manager will create_stdio_console.
// - Some(path), legacy_manager will create_socket_console on that path.
config.serial_path = machine_config.serial_path;
vm.set_vm_config(config.clone());
self.machine_config = config;

View File

@@ -595,23 +595,17 @@ impl DeviceManager {
.map_err(|_| StartMicroVmError::EventFd)?;
info!(self.logger, "init console path: {:?}", com1_sock_path);
if let Some(legacy_manager) = self.legacy_manager.as_ref() {
if let Some(path) = com1_sock_path {
// Currently, the `com1_sock_path` "stdio" is only reserved for creating the stdio console
if path != "stdio" {
let com1 = legacy_manager.get_com1_serial();
self.con_manager
.create_socket_console(com1, path)
.map_err(StartMicroVmError::DeviceManager)?;
return Ok(());
}
}
let com1 = legacy_manager.get_com1_serial();
self.con_manager
.create_stdio_console(com1)
.map_err(StartMicroVmError::DeviceManager)?;
if let Some(path) = com1_sock_path {
self.con_manager
.create_socket_console(com1, path)
.map_err(StartMicroVmError::DeviceManager)?;
} else {
self.con_manager
.create_stdio_console(com1)
.map_err(StartMicroVmError::DeviceManager)?;
}
}
Ok(())

159
src/libs/Cargo.lock generated
View File

@@ -203,6 +203,12 @@ version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37ab347416e802de484e4d03c7316c48f1ecb56574dfd4a46a80f173ce1de04d"
[[package]]
name = "fnv"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "futures"
version = "0.3.21"
@@ -344,6 +350,82 @@ dependencies = [
"libc",
]
[[package]]
name = "hex"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
[[package]]
name = "http"
version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75f43d41e26995c17e71ee126451dd3941010b0514a81a9d11f3b341debc2399"
dependencies = [
"bytes 1.1.0",
"fnv",
"itoa",
]
[[package]]
name = "http-body"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1"
dependencies = [
"bytes 1.1.0",
"http",
"pin-project-lite",
]
[[package]]
name = "httparse"
version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904"
[[package]]
name = "httpdate"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421"
[[package]]
name = "hyper"
version = "0.14.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "034711faac9d2166cb1baf1a2fb0b60b1f277f8492fd72176c17f3515e1abd3c"
dependencies = [
"bytes 1.1.0",
"futures-channel",
"futures-core",
"futures-util",
"http",
"http-body",
"httparse",
"httpdate",
"itoa",
"pin-project-lite",
"socket2",
"tokio",
"tower-service",
"tracing",
"want",
]
[[package]]
name = "hyperlocal"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fafdf7b2b2de7c9784f76e02c0935e65a8117ec3b768644379983ab333ac98c"
dependencies = [
"futures-util",
"hex",
"hyper",
"pin-project",
"tokio",
]
[[package]]
name = "indexmap"
version = "1.8.1"
@@ -647,6 +729,26 @@ dependencies = [
"indexmap",
]
[[package]]
name = "pin-project"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ad29a609b6bcd67fee905812e544992d216af9d755757c05ed2d0e15a74c6ecc"
dependencies = [
"pin-project-internal",
]
[[package]]
name = "pin-project-internal"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "069bdb1e05adc7a8990dce9cc75370895fbe4e3d58b9b73bf1aee56359344a55"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "pin-project-lite"
version = "0.2.8"
@@ -955,6 +1057,16 @@ dependencies = [
"syn",
]
[[package]]
name = "shim-interface"
version = "0.1.0"
dependencies = [
"anyhow",
"hyper",
"hyperlocal",
"tokio",
]
[[package]]
name = "slab"
version = "0.4.6"
@@ -1010,9 +1122,9 @@ checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83"
[[package]]
name = "socket2"
version = "0.4.4"
version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "66d72b759436ae32898a2af0a14218dbf55efde3feeb170eb623637db85ee1e0"
checksum = "02e2d2db9033d13a1567121ddd7a095ee144db4e1ca1b1bda3419bc0da294ebd"
dependencies = [
"libc",
"winapi",
@@ -1115,6 +1227,7 @@ dependencies = [
"libc",
"memchr",
"mio",
"num_cpus",
"pin-project-lite",
"socket2",
"tokio-macros",
@@ -1154,6 +1267,38 @@ dependencies = [
"serde",
]
[[package]]
name = "tower-service"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52"
[[package]]
name = "tracing"
version = "0.1.34"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d0ecdcb44a79f0fe9844f0c4f33a342cbcbb5117de8001e6ba0dc2351327d09"
dependencies = [
"cfg-if",
"pin-project-lite",
"tracing-core",
]
[[package]]
name = "tracing-core"
version = "0.1.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f54c8ca710e81886d498c2fd3331b56c93aa248d49de2222ad2742247c60072f"
dependencies = [
"lazy_static",
]
[[package]]
name = "try-lock"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59547bce71d9c38b83d9c0e92b6066c4253371f15005def0c30d9657f50c7642"
[[package]]
name = "ttrpc"
version = "0.6.1"
@@ -1222,6 +1367,16 @@ dependencies = [
"nix 0.23.1",
]
[[package]]
name = "want"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ce8a968cb1cd110d136ff8b819a556d6fb6d919363c61534f6860c7eb172ba0"
dependencies = [
"log",
"try-lock",
]
[[package]]
name = "wasi"
version = "0.9.0+wasi-snapshot-preview1"

View File

@@ -6,6 +6,7 @@ members = [
"oci",
"protocols",
"safe-path",
"shim-interface",
"test-utils",
]
resolver = "2"

View File

@@ -0,0 +1,18 @@
[package]
name = "shim-interface"
version = "0.1.0"
description = "A library to provide service interface of Kata Containers"
keywords = ["kata", "container", "http"]
categories = ["services"]
authors = ["The Kata Containers community <kata-dev@lists.katacontainers.io>"]
repository = "https://github.com/kata-containers/kata-containers.git"
homepage = "https://katacontainers.io/"
readme = "README.md"
license = "Apache-2.0"
edition = "2018"
[dependencies]
anyhow = "^1.0"
tokio = { version = "1.8.0", features = ["rt-multi-thread"] }
hyper = { version = "0.14.20", features = ["stream", "server", "http1"] }
hyperlocal = "0.8"

View File

@@ -0,0 +1,66 @@
// Copyright (c) 2022 Alibaba Cloud
//
// SPDX-License-Identifier: Apache-2.0
//
//! shim-interface is a common library for different components of Kata Containers
//! to make function call through services inside the runtime(runtime-rs runtime).
//!
//! Shim management:
//! Currently, inside the shim, there is a shim management server running as the shim
//! starts, working as a RESTful server. To make function call in runtime from another
//! binary, using the utilities provided in this library is one of the methods.
//!
//! You may construct clients by construct a MgmtClient and let is make specific
//! HTTP request to the server. The server inside shim will multiplex the request
//! to its corresponding handler and run certain methods.
use std::path::Path;
use anyhow::{anyhow, Result};
pub mod shim_mgmt;
pub const KATA_PATH: &str = "/run/kata";
pub const SHIM_MGMT_SOCK_NAME: &str = "shim-monitor.sock";
// return sandbox's storage path
pub fn sb_storage_path() -> String {
String::from(KATA_PATH)
}
// returns the address of the unix domain socket(UDS) for communication with shim
// management service using http
// normally returns "unix:///run/kata/{sid}/shim_monitor.sock"
pub fn mgmt_socket_addr(sid: &str) -> Result<String> {
if sid.is_empty() {
return Err(anyhow!(
"Empty sandbox id for acquiring socket address for shim_mgmt"
));
}
let p = Path::new(&sb_storage_path())
.join(sid)
.join(SHIM_MGMT_SOCK_NAME);
if let Some(p) = p.to_str() {
Ok(format!("unix://{}", p))
} else {
Err(anyhow!("Bad socket path"))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_mgmt_socket_addr() {
let sid = "414123";
let addr = mgmt_socket_addr(sid).unwrap();
assert_eq!(addr, "unix:///run/kata/414123/shim-monitor.sock");
let sid = "";
assert!(mgmt_socket_addr(sid).is_err());
}
}

View File

@@ -1,3 +1,4 @@
#![allow(dead_code)]
// Copyright (c) 2019-2022 Alibaba Cloud
// Copyright (c) 2019-2022 Ant Group
//
@@ -10,7 +11,7 @@
use std::{path::Path, path::PathBuf, time::Duration};
use super::server::mgmt_socket_addr;
use crate::mgmt_socket_addr;
use anyhow::{anyhow, Context, Result};
use hyper::{Body, Client, Method, Request, Response};
use hyperlocal::{UnixClientExt, UnixConnector, Uri};
@@ -30,8 +31,8 @@ pub struct MgmtClient {
impl MgmtClient {
/// Construct a new client connecting to shim mgmt server
pub fn new(sid: String, timeout: Option<Duration>) -> Result<Self> {
let unix_socket_path = mgmt_socket_addr(sid);
pub fn new(sid: &str, timeout: Option<Duration>) -> Result<Self> {
let unix_socket_path = mgmt_socket_addr(sid).context("Failed to get unix socket path")?;
let s_addr = unix_socket_path
.strip_prefix("unix:")
.context("failed to strix prefix")?;

View File

@@ -0,0 +1,24 @@
// Copyright (c) 2022 Alibaba Cloud
//
// SPDX-License-Identifier: Apache-2.0
//
/// The shim management client module
pub mod client;
/// The key for direct volume path
pub const DIRECT_VOLUME_PATH_KEY: &str = "path";
/// URL for stats direct volume
pub const DIRECT_VOLUME_STATS_URL: &str = "/direct-volume/stats";
/// URL for resizing direct volume
pub const DIRECT_VOLUME_RESIZE_URL: &str = "/direct-volume/resize";
/// URL for querying agent's socket
pub const AGENT_URL: &str = "/agent-url";
/// URL for operation on guest iptable (ipv4)
pub const IP_TABLE_URL: &str = "/iptables";
/// URL for operation on guest iptable (ipv6)
pub const IP6_TABLE_URL: &str = "/ip6tables";
/// URL for querying metrics inside shim
pub const METRICS_URL: &str = "/metrics";
pub const ERR_NO_SHIM_SERVER: &str = "Failed to create shim management server";

View File

@@ -28,6 +28,7 @@ rand = "0.8.4"
kata-sys-util = { path = "../../../libs/kata-sys-util" }
kata-types = { path = "../../../libs/kata-types" }
logging = { path = "../../../libs/logging" }
shim-interface = { path = "../../../libs/shim-interface" }
dragonball = { path = "../../../dragonball", features = ["atomic-guest-memory", "virtio-vsock", "hotplug", "virtio-blk", "virtio-net", "virtio-fs"] }

View File

@@ -20,7 +20,8 @@ use kata_types::{
capabilities::{Capabilities, CapabilityBits},
config::hypervisor::Hypervisor as HypervisorConfig,
};
use persist::{sandbox_persist::Persist, KATA_PATH};
use persist::sandbox_persist::Persist;
use shim_interface::KATA_PATH;
use std::{collections::HashSet, fs::create_dir_all, path::PathBuf};
const DRAGONBALL_KERNEL: &str = "vmlinux";

View File

@@ -14,7 +14,7 @@ use kata_types::capabilities::Capabilities;
use super::inner::DragonballInner;
use crate::{utils, VcpuThreadIds, VmmState};
use persist::KATA_PATH;
use shim_interface::KATA_PATH;
const DEFAULT_HYBRID_VSOCK_NAME: &str = "kata.hvsock";
fn get_vsock_path(root: &str) -> String {

View File

@@ -10,6 +10,7 @@ async-trait = "0.1.48"
anyhow = "^1.0"
kata-sys-util = { path = "../../../libs/kata-sys-util"}
kata-types = { path = "../../../libs/kata-types" }
shim-interface = { path = "../../../libs/shim-interface" }
libc = "0.2"
serde = { version = "1.0.138", features = ["derive"] }
serde_json = "1.0.82"

View File

@@ -7,9 +7,9 @@
pub mod sandbox_persist;
use anyhow::{anyhow, Context, Ok, Result};
use serde::de;
use shim_interface::KATA_PATH;
use std::{fs::File, io::BufReader};
pub const KATA_PATH: &str = "/run/kata";
pub const PERSIST_FILE: &str = "state.json";
use kata_sys_util::validate::verify_id;
use safe_path::scoped_join;

View File

@@ -68,11 +68,14 @@ impl ResourceManager {
pub async fn handler_rootfs(
&self,
cid: &str,
root: &oci::Root,
bundle_path: &str,
rootfs_mounts: &[Mount],
) -> Result<Arc<dyn Rootfs>> {
let inner = self.inner.read().await;
inner.handler_rootfs(cid, bundle_path, rootfs_mounts).await
inner
.handler_rootfs(cid, root, bundle_path, rootfs_mounts)
.await
}
pub async fn handler_volumes(

View File

@@ -196,6 +196,7 @@ impl ResourceManagerInner {
pub async fn handler_rootfs(
&self,
cid: &str,
root: &oci::Root,
bundle_path: &str,
rootfs_mounts: &[Mount],
) -> Result<Arc<dyn Rootfs>> {
@@ -205,6 +206,7 @@ impl ResourceManagerInner {
self.hypervisor.as_ref(),
&self.sid,
cid,
root,
bundle_path,
rootfs_mounts,
)

View File

@@ -51,16 +51,37 @@ impl RootFsResource {
}
}
#[allow(clippy::too_many_arguments)]
pub async fn handler_rootfs(
&self,
share_fs: &Option<Arc<dyn ShareFs>>,
hypervisor: &dyn Hypervisor,
sid: &str,
cid: &str,
root: &oci::Root,
bundle_path: &str,
rootfs_mounts: &[Mount],
) -> Result<Arc<dyn Rootfs>> {
match rootfs_mounts {
// if rootfs_mounts is empty
mounts_vec if mounts_vec.is_empty() => {
if let Some(share_fs) = share_fs {
let share_fs_mount = share_fs.get_share_fs_mount();
// share fs rootfs
Ok(Arc::new(
share_fs_rootfs::ShareFsRootfs::new(
&share_fs_mount,
cid,
root.path.as_str(),
None,
)
.await
.context("new share fs rootfs")?,
))
} else {
return Err(anyhow!("share fs is unavailable"));
}
}
mounts_vec if is_single_layer_rootfs(mounts_vec) => {
// Safe as single_layer_rootfs must have one layer
let layer = &mounts_vec[0];
@@ -86,7 +107,7 @@ impl RootFsResource {
&share_fs_mount,
cid,
bundle_path,
layer,
Some(layer),
)
.await
.context("new share fs rootfs")?,

View File

@@ -23,14 +23,18 @@ impl ShareFsRootfs {
share_fs_mount: &Arc<dyn ShareFsMount>,
cid: &str,
bundle_path: &str,
rootfs: &Mount,
rootfs: Option<&Mount>,
) -> Result<Self> {
let bundle_rootfs = format!("{}/{}", bundle_path, ROOTFS);
rootfs.mount(&bundle_rootfs).context(format!(
"mount rootfs from {:?} to {}",
&rootfs, &bundle_rootfs
))?;
let bundle_rootfs = if let Some(rootfs) = rootfs {
let bundle_rootfs = format!("{}/{}", bundle_path, ROOTFS);
rootfs.mount(&bundle_rootfs).context(format!(
"mount rootfs from {:?} to {}",
&rootfs, &bundle_rootfs
))?;
bundle_rootfs
} else {
bundle_path.to_string()
};
let mount_result = share_fs_mount
.share_rootfs(ShareFsRootfsConfig {
cid: cid.to_string(),

View File

@@ -18,6 +18,7 @@ common = { path = "./common" }
kata-types = { path = "../../../libs/kata-types" }
logging = { path = "../../../libs/logging"}
oci = { path = "../../../libs/oci" }
shim-interface = { path = "../../../libs/shim-interface" }
persist = { path = "../persist" }
hypervisor = { path = "../hypervisor" }
# runtime handler

View File

@@ -11,6 +11,6 @@ logging::logger_with_subsystem!(sl, "runtimes");
pub mod manager;
pub use manager::RuntimeHandlerManager;
pub use shim_interface;
mod shim_mgmt;
pub use shim_mgmt::{client::MgmtClient, server::sb_storage_path};
mod static_resource;

View File

@@ -19,6 +19,7 @@ use kata_types::{annotations::Annotation, config::TomlConfig};
#[cfg(feature = "linux")]
use linux_container::LinuxContainer;
use persist::sandbox_persist::Persist;
use shim_interface::shim_mgmt::ERR_NO_SHIM_SERVER;
use tokio::sync::{mpsc::Sender, RwLock};
#[cfg(feature = "virt")]
use virt_container::{
@@ -117,7 +118,9 @@ impl RuntimeHandlerManagerInner {
let shim_mgmt_svr = MgmtServer::new(
&self.id,
self.runtime_instance.as_ref().unwrap().sandbox.clone(),
);
)
.context(ERR_NO_SHIM_SERVER)?;
tokio::task::spawn(Arc::new(shim_mgmt_svr).run());
info!(sl!(), "shim management http server starts");

View File

@@ -12,7 +12,7 @@ use common::Sandbox;
use hyper::{Body, Method, Request, Response, StatusCode};
use std::sync::Arc;
use super::server::{AGENT_URL, IP6_TABLE_URL, IP_TABLE_URL};
use shim_interface::shim_mgmt::{AGENT_URL, IP6_TABLE_URL, IP_TABLE_URL};
// main router for response, this works as a multiplexer on
// http arrival which invokes the corresponding handler function

View File

@@ -4,6 +4,11 @@
// SPDX-License-Identifier: Apache-2.0
//
pub mod client;
//! The server side of shim management implementation, receive HTTP
//! requests and multiplex them to corresponding functions inside shim
//!
//! To call services in a RESTful convention, use the client
//! from libs/shim-interface library
mod handlers;
pub mod server;

View File

@@ -1,5 +1,5 @@
// Copyright (c) 2019-2022 Alibaba Cloud
// Copyright (c) 2019-2022 Ant Group
// Copyright (c) 2022 Alibaba Cloud
// Copyright (c) 2022 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
@@ -16,21 +16,11 @@ use std::{fs, path::Path, sync::Arc};
use anyhow::{Context, Result};
use common::Sandbox;
use hyper::{server::conn::Http, service::service_fn};
use persist::KATA_PATH;
use shim_interface::{mgmt_socket_addr, shim_mgmt::ERR_NO_SHIM_SERVER};
use tokio::net::UnixListener;
use super::handlers::handler_mux;
pub(crate) const DIRECT_VOLUMN_PATH_KEY: &str = "path";
pub(crate) const DIRECT_VOLUMN_STATS_URL: &str = "/direct-volumn/stats";
pub(crate) const DIRECT_VOLUMN_RESIZE_URL: &str = "/direct-volumn/resize";
pub(crate) const AGENT_URL: &str = "/agent-url";
pub(crate) const IP_TABLE_URL: &str = "/iptables";
pub(crate) const IP6_TABLE_URL: &str = "/ip6tables";
pub(crate) const METRICS_URL: &str = "/metrics";
const SHIM_MGMT_SOCK_NAME: &str = "shim-monitor.sock";
/// The shim management server instance
pub struct MgmtServer {
/// socket address(with prefix like hvsock://)
@@ -42,11 +32,11 @@ pub struct MgmtServer {
impl MgmtServer {
/// construct a new management server
pub fn new(sid: &str, sandbox: Arc<dyn Sandbox>) -> Self {
Self {
s_addr: mgmt_socket_addr(sid.to_owned()),
pub fn new(sid: &str, sandbox: Arc<dyn Sandbox>) -> Result<Self> {
Ok(Self {
s_addr: mgmt_socket_addr(sid).context(ERR_NO_SHIM_SERVER)?,
sandbox,
}
})
}
// TODO(when metrics is supported): write metric addresses to fs
@@ -75,21 +65,6 @@ impl MgmtServer {
}
}
// return sandbox's storage path
pub fn sb_storage_path() -> String {
String::from(KATA_PATH)
}
// returns the address of the unix domain socket(UDS) for communication with shim
// management service using http
// normally returns "unix:///run/kata/{sid}/shim_monitor.sock"
pub fn mgmt_socket_addr(sid: String) -> String {
let p = Path::new(&sb_storage_path())
.join(sid)
.join(SHIM_MGMT_SOCK_NAME);
format!("unix://{}", p.to_string_lossy())
}
// from path, return a unix listener corresponding to that path,
// if the path(socket file) is not created, we create that here
async fn listener_from_path(path: String) -> Result<UnixListener> {
@@ -104,15 +79,3 @@ async fn listener_from_path(path: String) -> Result<UnixListener> {
info!(sl!(), "mgmt-svr: binding to path {}", path);
UnixListener::bind(file_path).context("bind address")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn mgmt_svr_test_sock_addr() {
let sid = String::from("414123");
let addr = mgmt_socket_addr(sid);
assert_eq!(addr, "unix:///run/kata/414123/shim-monitor.sock");
}
}

View File

@@ -17,6 +17,7 @@ use common::{
},
};
use kata_sys_util::k8s::update_ephemeral_storage_type;
use oci::{LinuxResources, Process as OCIProcess};
use resource::ResourceManager;
use tokio::sync::RwLock;
@@ -85,23 +86,30 @@ impl Container {
let sandbox_pidns = is_pid_namespace_enabled(&spec);
amend_spec(&mut spec, toml_config.runtime.disable_guest_seccomp).context("amend spec")?;
// get mutable root from oci spec
let mut root = match spec.root.as_mut() {
Some(root) => root,
None => return Err(anyhow!("spec miss root field")),
};
// handler rootfs
let rootfs = self
.resource_manager
.handler_rootfs(&config.container_id, &config.bundle, &config.rootfs_mounts)
.handler_rootfs(
&config.container_id,
root,
&config.bundle,
&config.rootfs_mounts,
)
.await
.context("handler rootfs")?;
// update rootfs
match spec.root.as_mut() {
Some(root) => {
root.path = rootfs
.get_guest_rootfs_path()
.await
.context("get guest rootfs path")?
}
None => return Err(anyhow!("spec miss root field")),
};
root.path = rootfs
.get_guest_rootfs_path()
.await
.context("get guest rootfs path")?;
let mut storages = vec![];
if let Some(storage) = rootfs.get_storage().await {
storages.push(storage);

View File

@@ -16,5 +16,6 @@ ttrpc = { version = "0.6.1" }
common = { path = "../runtimes/common" }
containerd-shim-protos = { version = "0.2.0", features = ["async"]}
logging = { path = "../../../libs/logging"}
shim-interface = { path = "../../../libs/shim-interface" }
runtimes = { path = "../runtimes" }
persist = { path = "../persist" }

View File

@@ -28,7 +28,7 @@ use ttrpc::asynchronous::Server;
use crate::task_service::TaskService;
/// message buffer size
const MESSAGE_BUFFER_SIZE: usize = 8;
use persist::KATA_PATH;
use shim_interface::KATA_PATH;
pub struct ServiceManager {
receiver: Option<Receiver<Message>>,

View File

@@ -825,7 +825,7 @@ func (s *service) Kill(ctx context.Context, r *taskAPI.KillRequest) (_ *ptypes.E
// Pids returns all pids inside the container
// Since for kata, it cannot get the process's pid from VM,
// thus only return the Shim's pid directly.
// thus only return the hypervisor's pid directly.
func (s *service) Pids(ctx context.Context, r *taskAPI.PidsRequest) (_ *taskAPI.PidsResponse, err error) {
shimLog.WithField("container", r.ID).Debug("Pids() start")
defer shimLog.WithField("container", r.ID).Debug("Pids() end")
@@ -927,7 +927,7 @@ func (s *service) Connect(ctx context.Context, r *taskAPI.ConnectRequest) (_ *ta
return &taskAPI.ConnectResponse{
ShimPid: s.pid,
//Since kata cannot get the container's pid in VM, thus only return the shim's pid.
//Since kata cannot get the container's pid in VM, thus only return the hypervisor's pid.
TaskPid: s.hpid,
}, nil
}

View File

@@ -1517,7 +1517,10 @@ dependencies = [
"reqwest",
"runtimes",
"semver",
"serde",
"serde_json",
"strum",
"strum_macros",
"tempfile",
"thiserror",
]
@@ -2858,18 +2861,18 @@ checksum = "e25dfac463d778e353db5be2449d1cce89bd6fd23c9f1ea21310ce6e5a1b29c4"
[[package]]
name = "serde"
version = "1.0.145"
version = "1.0.149"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "728eb6351430bccb993660dfffc5a72f91ccc1295abaa8ce19b27ebe4f75568b"
checksum = "256b9932320c590e707b94576e3cc1f7c9024d0ee6612dfbcf1cb106cbe8e055"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.145"
version = "1.0.149"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81fa1584d3d1bcacd84c277a0dfe21f5b0f6accf4a23d04d4c6d61f1af522b4c"
checksum = "b4eae9b04cbffdfd550eb462ed33bc6a1b68c935127d008b27444d08380f94e4"
dependencies = [
"proc-macro2",
"quote",
@@ -3043,9 +3046,9 @@ checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601"
[[package]]
name = "syn"
version = "1.0.102"
version = "1.0.105"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fcd952facd492f9be3ef0d0b7032a6e442ee9b361d4acc2b1d0c4aaa5f613a1"
checksum = "60b9b43d45702de4c839cb9b51d9f529c5dd26a4aff255b42b1ebc03e88ee908"
dependencies = [
"proc-macro2",
"quote",

View File

@@ -19,8 +19,11 @@ serde_json = "1.0.85"
thiserror = "1.0.35"
privdrop = "0.5.2"
nix = "0.25.0"
strum = "0.24.1"
strum_macros = "0.24.3"
runtimes = { path = "../../runtime-rs/crates/runtimes" }
serde = "1.0.149"
[target.'cfg(target_arch = "s390x")'.dependencies]
reqwest = { version = "0.11", default-features = false, features = ["json", "blocking", "native-tls"] }

View File

@@ -3,8 +3,6 @@
// SPDX-License-Identifier: Apache-2.0
//
use anyhow::Result;
#[cfg(target_arch = "aarch64")]
pub mod aarch64;
#[cfg(target_arch = "aarch64")]
@@ -32,7 +30,3 @@ pub use x86_64 as arch_specific;
target_arch = "x86_64"
)))]
compile_error!("unknown architecture");
pub fn check() -> Result<()> {
arch_specific::check()
}

View File

@@ -8,6 +8,7 @@ pub use arch_specific::*;
mod arch_specific {
use crate::check;
use crate::types::*;
use anyhow::{anyhow, Result};
const PROC_CPUINFO: &str = "/proc/cpuinfo";
@@ -16,8 +17,19 @@ mod arch_specific {
const CPU_FLAGS_INTEL: &[&str] = &["lm", "sse4_1", "vmx"];
const CPU_ATTRIBS_INTEL: &[&str] = &["GenuineIntel"];
// check cpu
fn check_cpu() -> Result<()> {
// List of check functions
static CHECK_LIST: &[CheckItem] = &[CheckItem {
name: CheckType::CheckCpu,
descr: "This parameter performs the cpu check",
fp: check_cpu,
perm: PermissionType::NonPrivileged,
}];
pub fn get_checks() -> &'static [CheckItem<'static>] {
CHECK_LIST
}
fn check_cpu(_args: &str) -> Result<()> {
println!("INFO: check CPU: x86_64");
let cpu_info = check::get_single_cpu_info(PROC_CPUINFO, CPUINFO_DELIMITER)?;
@@ -42,14 +54,4 @@ mod arch_specific {
Ok(())
}
pub fn check() -> Result<()> {
println!("INFO: check: x86_64");
let _cpu_result = check_cpu();
// TODO: collect outcome of tests to determine if checks pass or not
Ok(())
}
}

View File

@@ -58,6 +58,15 @@ pub enum CheckSubCommand {
/// Only compare the current and latest available versions
CheckVersionOnly,
/// List official release packages
OnlyListReleases,
/// List all official and pre-release packages
IncludeAllReleases,
/// List all available checks
List,
}
#[derive(Debug, Args)]

View File

@@ -7,11 +7,27 @@
use anyhow::{anyhow, Result};
use reqwest::header::{CONTENT_TYPE, USER_AGENT};
use serde_json::Value;
use std::collections::HashMap;
use serde::{Deserialize, Serialize};
#[derive(Debug, Deserialize, Serialize, PartialEq)]
struct Release {
tag_name: String,
prerelease: bool,
created_at: String,
tarball_url: String,
}
const KATA_GITHUB_URL: &str =
"https://api.github.com/repos/kata-containers/kata-containers/releases/latest";
#[cfg(any(
target_arch = "aarch64",
target_arch = "powerpc64le",
target_arch = "x86_64"
))]
const KATA_GITHUB_RELEASE_URL: &str =
"https://api.github.com/repos/kata-containers/kata-containers/releases";
const JSON_TYPE: &str = "application/json";
const USER_AGT: &str = "kata";
#[cfg(any(target_arch = "s390x", target_arch = "x86_64"))]
fn get_cpu_info(cpu_info_file: &str) -> Result<String> {
@@ -104,16 +120,14 @@ pub fn run_network_checks() -> Result<()> {
Ok(())
}
fn get_kata_version_by_url(url: &str) -> std::result::Result<String, reqwest::Error> {
let content = reqwest::blocking::Client::new()
.get(url)
.header(CONTENT_TYPE, "application/json")
.header(USER_AGENT, "kata")
fn get_kata_all_releases_by_url() -> std::result::Result<Vec<Release>, reqwest::Error> {
let releases: Vec<Release> = reqwest::blocking::Client::new()
.get(KATA_GITHUB_RELEASE_URL)
.header(CONTENT_TYPE, JSON_TYPE)
.header(USER_AGENT, USER_AGT)
.send()?
.json::<HashMap<String, Value>>()?;
let version = content["tag_name"].as_str().unwrap();
Ok(version.to_string())
.json()?;
Ok(releases)
}
fn handle_reqwest_error(e: reqwest::Error) -> anyhow::Error {
@@ -136,10 +150,37 @@ fn handle_reqwest_error(e: reqwest::Error) -> anyhow::Error {
anyhow!(e).context("unknown http connection failure: {:?}")
}
pub fn check_version() -> Result<()> {
let version = get_kata_version_by_url(KATA_GITHUB_URL).map_err(handle_reqwest_error)?;
pub fn check_all_releases() -> Result<()> {
let releases: Vec<Release> = get_kata_all_releases_by_url().map_err(handle_reqwest_error)?;
println!("Version: {}", version);
for release in releases {
if !release.prerelease {
println!(
"Official : Release {:15}; created {} ; {}",
release.tag_name, release.created_at, release.tarball_url
);
} else {
println!(
"PreRelease: Release {:15}; created {} ; {}",
release.tag_name, release.created_at, release.tarball_url
);
}
}
Ok(())
}
pub fn check_official_releases() -> Result<()> {
let releases: Vec<Release> = get_kata_all_releases_by_url().map_err(handle_reqwest_error)?;
println!("Official Releases...");
for release in releases {
if !release.prerelease {
println!(
"Release {:15}; created {} ; {}",
release.tag_name, release.created_at, release.tarball_url
);
}
}
Ok(())
}
@@ -149,6 +190,23 @@ pub fn check_version() -> Result<()> {
mod tests {
use super::*;
use semver::Version;
use serde_json::Value;
use std::collections::HashMap;
const KATA_GITHUB_URL: &str =
"https://api.github.com/repos/kata-containers/kata-containers/releases/latest";
fn get_kata_version_by_url(url: &str) -> std::result::Result<String, reqwest::Error> {
let content = reqwest::blocking::Client::new()
.get(url)
.header(CONTENT_TYPE, JSON_TYPE)
.header(USER_AGENT, USER_AGT)
.send()?
.json::<HashMap<String, Value>>()?;
let version = content["tag_name"].as_str().unwrap();
Ok(version.to_string())
}
#[test]
fn test_get_cpu_info_empty_input() {

View File

@@ -7,6 +7,7 @@ mod arch;
mod args;
mod check;
mod ops;
mod types;
mod utils;
use anyhow::Result;

View File

@@ -3,23 +3,77 @@
// SPDX-License-Identifier: Apache-2.0
//
use crate::arch;
use crate::check;
use crate::ops::version;
use crate::arch::x86_64::get_checks;
use crate::args::{CheckArgument, CheckSubCommand, IptablesCommand, MetricsCommand};
use anyhow::Result;
use crate::check;
use crate::ops::version;
use crate::types::*;
use anyhow::{anyhow, Result};
const NAME: &str = "kata-ctl";
// This function retrieves the cmd function passes as argument
fn get_builtin_check_func(name: CheckType) -> Result<BuiltinCmdFp> {
let check_list = get_checks();
for check in check_list {
if check.name.eq(&name) {
return Ok(check.fp);
}
}
Err(anyhow!("Invalid command: {:?}", name))
}
// This function is called from each 'kata-ctl check' argument section
fn handle_builtin_check(check: CheckType, args: &str) -> Result<()> {
let f = match get_builtin_check_func(check) {
Ok(fp) => fp,
Err(e) => return Err(e),
};
f(args)
}
fn get_client_cmd_details() -> Vec<String> {
let mut cmds = Vec::new();
let check_list = get_checks();
for cmd in check_list {
cmds.push(format!("{} ({}. Mode: {})", cmd.name, cmd.descr, cmd.perm));
}
cmds
}
fn print_check_list() -> Result<()> {
let cmds = get_client_cmd_details();
if cmds.is_empty() {
println!("Checks not found!\n");
return Ok(());
}
cmds.iter().for_each(|n| println!(" - {}", n));
println!();
Ok(())
}
pub fn handle_check(checkcmd: CheckArgument) -> Result<()> {
let command = checkcmd.command;
match command {
CheckSubCommand::All => {
// run architecture-specific tests
arch::check()?;
handle_builtin_check(CheckType::CheckCpu, "")?;
// run code that uses network checks
check::run_network_checks()?;
@@ -27,12 +81,33 @@ pub fn handle_check(checkcmd: CheckArgument) -> Result<()> {
CheckSubCommand::NoNetworkChecks => {
// run architecture-specific tests
arch::check()?;
handle_builtin_check(CheckType::CheckCpu, "")?;
}
CheckSubCommand::CheckVersionOnly => {
// retrieve latest release
check::check_version()?;
handle_version()?;
}
CheckSubCommand::List => {
print_check_list()?;
}
CheckSubCommand::OnlyListReleases => {
// retrieve official release
#[cfg(any(
target_arch = "aarch64",
target_arch = "powerpc64le",
target_arch = "x86_64"
))]
check::check_official_releases()?;
}
CheckSubCommand::IncludeAllReleases => {
// retrieve ALL releases including prerelease
#[cfg(any(
target_arch = "aarch64",
target_arch = "powerpc64le",
target_arch = "x86_64"
))]
check::check_all_releases()?;
}
}
@@ -65,6 +140,7 @@ pub fn handle_metrics(_args: MetricsCommand) -> Result<()> {
pub fn handle_version() -> Result<()> {
let version = version::get().unwrap();
println!("{} version {:?} (type: rust)", NAME, version);
Ok(())
}

View File

@@ -0,0 +1,35 @@
// Copyright (c) 2022 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
use anyhow::Result;
use strum_macros::EnumString;
// Builtin check command handler type.
pub type BuiltinCmdFp = fn(args: &str) -> Result<()>;
// CheckType encodes the name of each check provided by kata-ctl.
#[derive(Debug, strum_macros::Display, EnumString, PartialEq)]
pub enum CheckType {
CheckCpu,
CheckNetwork,
}
// PermissionType is used to show whether a check needs to run with elevated (super-user)
// privileges, or whether it can run as normal user.
#[derive(strum_macros::Display, EnumString, PartialEq)]
pub enum PermissionType {
Privileged,
NonPrivileged,
}
// CheckItem is used to encode the check metadata that each architecture
// returns in a list of CheckItem's using the architecture implementation
// of get_checks().
pub struct CheckItem<'a> {
pub name: CheckType,
pub descr: &'a str,
pub fp: BuiltinCmdFp,
pub perm: PermissionType,
}

View File

@@ -1,9 +1,11 @@
**/*.tar.gz
image-builder/nsdax
dracut/Dockerfile
dracut/dracut.conf.d/15-extra-libs.conf
/.*.done
/*_rootfs
/kata-centos-dnf.conf
/kata-containers-initrd.img
/kata-containers.img
*_rootfs
kata-centos-dnf.conf
kata-containers-initrd.img
kata-containers.img
rootfs-builder/centos/RPM-GPG-KEY-*
typescript

View File

@@ -4,6 +4,11 @@
*.snap
*.tar.gz
*.tar.xz
*.gz
*.dsc
*.deb
*.buildinfo
*.changes
debian.series
parts/
prime/
@@ -11,3 +16,4 @@ sha256sums.asc
snap/.snapcraft/
stage/
typescript
kata-linux-*

View File

@@ -541,6 +541,16 @@ main() {
[ -z "${subcmd}" ] && usage 1
if [[ ${build_type} == "experimental" ]] && [[ ${hypervisor_target} == "dragonball" ]]; then
build_type="dragonball-experimental"
if [ -n "$kernel_version" ]; then
kernel_major_version=$(get_major_kernel_version "${kernel_version}")
if [[ ${kernel_major_version} != "5.10" ]]; then
info "dragonball-experimental kernel patches are only tested on 5.10.x kernel now, other kernel version may cause confliction"
fi
fi
fi
# If not kernel version take it from versions.yaml
if [ -z "$kernel_version" ]; then
if [[ ${build_type} == "experimental" ]]; then
@@ -556,6 +566,8 @@ main() {
kernel_version=$(get_from_kata_deps "assets.kernel-experimental.tag")
;;
esac
elif [[ ${build_type} == "dragonball-experimental" ]]; then
kernel_version=$(get_from_kata_deps "assets.dragonball-kernel-experimental.version")
elif [[ "${conf_guest}" != "" ]]; then
#If specifying a tag for kernel_version, must be formatted version-like to avoid unintended parsing issues
kernel_version=$(get_from_kata_deps "assets.kernel.${conf_guest}.version" 2>/dev/null || true)

View File

@@ -0,0 +1,6 @@
# Add support for upcall and device manager service
CONFIG_DRAGONBALL_DRIVERS=y
CONFIG_DRAGONBALL_UPCALL_SRV=y
CONFIG_DRAGONBALL_DEVICE_MANAGER=y
CONFIG_DRAGONBALL_HOTPLUG_VIRTIO_MMIO=y
CONFIG_DRAGONBALL_HOTPLUG_CPU=y

View File

@@ -6,4 +6,5 @@ CONFIG_EFI_SECRET=m
CONFIG_EFI_STUB=y
CONFIG_MODULE_SIG=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_VIRT_DRIVERS=y

View File

@@ -1 +1 @@
97
98

View File

@@ -0,0 +1,520 @@
From 691186a091ecfc1777531a61594b88394d384cff Mon Sep 17 00:00:00 2001
From: Chao Wu <chaowu@linux.alibaba.com>
Date: Wed, 9 Nov 2022 11:38:36 +0800
Subject: [PATCH 1/4] upcall: establish upcall server
Upcall is a direct communication tool between hypervisor and guest. This
patch introduces the server side in the upcall system.
At the start of the upcall server, A kthread `db-vsock-srv` will be
created. In this kthread, a vsock listener is established upon specific
port(currently that port is 0xDB, DB refers to Dragonball). After socket
is created, it will start accepting the connection from the client side.
If the connection is established, upcall server will try to get cmd from
the client and that cmd could determine which upcall service will handle
the request from the client.
Besides, different service needs to be registered into upcall server so
that it could handle the request from the client. There is a
`register_db_vsock_service` in this commit provided for every service to
register service into service_entry list during initialization and we will
introduce device manager service in the following commits.
Signed-off-by: Liu Jiang <gerry@linux.alibaba.com>
Signed-off-by: Zizheng Bian <zizheng.bian@linux.alibaba.com>
Signed-off-by: Chao Wu <chaowu@linux.alibaba.com>
Signed-off-by: WangYu <WangYu@linux.alibaba.com>
Signed-off-by: Xingjun Liu <xingjun.liu@linux.alibaba.com>
---
drivers/misc/Kconfig | 1 +
drivers/misc/Makefile | 1 +
drivers/misc/dragonball/Kconfig | 21 ++
drivers/misc/dragonball/Makefile | 6 +
drivers/misc/dragonball/upcall_srv/Kconfig | 14 +
drivers/misc/dragonball/upcall_srv/Makefile | 13 +
.../upcall_srv/dragonball_upcall_srv.c | 323 ++++++++++++++++++
include/dragonball/upcall_srv.h | 42 +++
8 files changed, 421 insertions(+)
create mode 100644 drivers/misc/dragonball/Kconfig
create mode 100644 drivers/misc/dragonball/Makefile
create mode 100644 drivers/misc/dragonball/upcall_srv/Kconfig
create mode 100644 drivers/misc/dragonball/upcall_srv/Makefile
create mode 100644 drivers/misc/dragonball/upcall_srv/dragonball_upcall_srv.c
create mode 100644 include/dragonball/upcall_srv.h
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index fafa8b0d8099..e05a14f77510 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -481,4 +481,5 @@ source "drivers/misc/ocxl/Kconfig"
source "drivers/misc/cardreader/Kconfig"
source "drivers/misc/habanalabs/Kconfig"
source "drivers/misc/uacce/Kconfig"
+source "drivers/misc/dragonball/Kconfig"
endmenu
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index d23231e73330..e0a1af9c05b2 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -57,3 +57,4 @@ obj-$(CONFIG_HABANA_AI) += habanalabs/
obj-$(CONFIG_UACCE) += uacce/
obj-$(CONFIG_XILINX_SDFEC) += xilinx_sdfec.o
obj-$(CONFIG_HISI_HIKEY_USB) += hisi_hikey_usb.o
+obj-$(CONFIG_DRAGONBALL_DRIVERS) += dragonball/
diff --git a/drivers/misc/dragonball/Kconfig b/drivers/misc/dragonball/Kconfig
new file mode 100644
index 000000000000..f81be3721908
--- /dev/null
+++ b/drivers/misc/dragonball/Kconfig
@@ -0,0 +1,21 @@
+#
+# Alibaba Dragonball Secure Container Runtime Drivers
+#
+
+menuconfig DRAGONBALL_DRIVERS
+ bool "Alibaba Dragonball Secure Container Runtime Drivers"
+ depends on X86_64 || ARM64
+ default n
+ help
+ Alibaba Dragonball is a secure container runtime with an embedded micro-vmm
+ to securely isolate container workloads.
+
+ Say Y here to get to see options for various misc drivers to support the
+ Alibaba Dragonball secure container runtime. This option alone does not
+ add any kernel code.
+
+ If unsure, say N.
+
+if DRAGONBALL_DRIVERS
+source "drivers/misc/dragonball/upcall_srv/Kconfig"
+endif # DRAGONBALL_DRIVERS
diff --git a/drivers/misc/dragonball/Makefile b/drivers/misc/dragonball/Makefile
new file mode 100644
index 000000000000..b7bd86d73ade
--- /dev/null
+++ b/drivers/misc/dragonball/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Dragonball misc drivers
+#
+
+obj-$(CONFIG_DRAGONBALL_UPCALL_SRV) += upcall_srv/
diff --git a/drivers/misc/dragonball/upcall_srv/Kconfig b/drivers/misc/dragonball/upcall_srv/Kconfig
new file mode 100644
index 000000000000..b00bf1f8637d
--- /dev/null
+++ b/drivers/misc/dragonball/upcall_srv/Kconfig
@@ -0,0 +1,14 @@
+#
+# Alibaba Dragonball Secure Container Runtime Drivers for vsock
+#
+
+config DRAGONBALL_UPCALL_SRV
+ bool "Dragonball in-kernel Virtual Sockets Server"
+ depends on VIRTIO_VSOCKETS
+ default y
+ help
+ This configure implements an in-kernel vsock server to dispatch Dragonball
+ requests to registered service handlers, based on the reliable Virtual
+ Sockets communication channels between guest and host/vmm.
+
+ If unsure, say N.
diff --git a/drivers/misc/dragonball/upcall_srv/Makefile b/drivers/misc/dragonball/upcall_srv/Makefile
new file mode 100644
index 000000000000..4102e6c7edef
--- /dev/null
+++ b/drivers/misc/dragonball/upcall_srv/Makefile
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the in-kernel vsock server.
+#
+# Copyright (C) 2022 Alibaba Cloud, Inc
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version
+# 2 of the License, or (at your option) any later version.
+#
+
+obj-$(CONFIG_DRAGONBALL_UPCALL_SRV) += dragonball_upcall_srv.o
diff --git a/drivers/misc/dragonball/upcall_srv/dragonball_upcall_srv.c b/drivers/misc/dragonball/upcall_srv/dragonball_upcall_srv.c
new file mode 100644
index 000000000000..1670bd8597f0
--- /dev/null
+++ b/drivers/misc/dragonball/upcall_srv/dragonball_upcall_srv.c
@@ -0,0 +1,323 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * drivers/misc/dragonball/upcall_srv/dragonball_upcall_srv.c
+ * Dragonball upcall server
+ *
+ * Copyright (C) 2022 Alibaba Cloud, Inc
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#define pr_fmt(fmt) "db-upcall-srv: " fmt
+
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/vm_sockets.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+#include <dragonball/upcall_srv.h>
+
+struct db_conn_info {
+ struct work_struct work;
+ struct socket *sock;
+};
+
+struct db_service_entry {
+ char cmd;
+ db_vsock_svc_handler_t handler;
+ struct list_head list;
+};
+
+/* Protects registered command. */
+static DEFINE_RWLOCK(db_service_lock);
+static LIST_HEAD(db_service_list);
+
+static struct task_struct *db_service_task;
+static unsigned int db_server_port = DB_SERVER_PORT;
+
+struct socket *db_create_vsock_listener(unsigned int port)
+{
+ struct socket *sock;
+ int ret = 0;
+
+ union {
+ struct sockaddr sa;
+ struct sockaddr_vm svm;
+ } addr = {
+ .svm = {
+ .svm_family = AF_VSOCK,
+ .svm_port = port,
+ .svm_cid = VMADDR_CID_ANY,
+ }
+ };
+
+ ret = sock_create_kern(&init_net, AF_VSOCK, SOCK_STREAM, 0, &sock);
+ if (ret) {
+ pr_err("Server vsock create failed, err: %d\n", ret);
+ return ERR_PTR(ret);
+ }
+
+ ret = sock->ops->bind(sock, &addr.sa, sizeof(addr.svm));
+ if (ret) {
+ pr_err("Server vsock bind failed, err: %d\n", ret);
+ goto err;
+ }
+ ret = sock->ops->listen(sock, 10);
+ if (ret < 0) {
+ pr_err("Server vsock listen error: %d\n", ret);
+ goto err;
+ }
+
+ return sock;
+err:
+ sock_release(sock);
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(db_create_vsock_listener);
+
+int db_vsock_sendmsg(struct socket *sock, char *buf, size_t len)
+{
+ struct kvec vec;
+ struct msghdr msgh;
+
+ vec.iov_base = buf;
+ vec.iov_len = len;
+ memset(&msgh, 0, sizeof(msgh));
+
+ return kernel_sendmsg(sock, &msgh, &vec, 1, len);
+}
+EXPORT_SYMBOL_GPL(db_vsock_sendmsg);
+
+int db_vsock_recvmsg(struct socket *sock, char *buf, size_t len, int flags)
+{
+ struct kvec vec;
+ struct msghdr msgh;
+
+ memset(&vec, 0, sizeof(vec));
+ memset(&msgh, 0, sizeof(msgh));
+ vec.iov_base = buf;
+ vec.iov_len = len;
+
+ return kernel_recvmsg(sock, &msgh, &vec, 1, len, flags);
+}
+EXPORT_SYMBOL_GPL(db_vsock_recvmsg);
+
+static int db_vsock_recvcmd(struct socket *cli_socket, char *cmd)
+{
+ int ret;
+ char rcv;
+ long timeout;
+ struct kvec vec;
+ struct msghdr msg;
+
+ memset(&vec, 0, sizeof(vec));
+ memset(&msg, 0, sizeof(msg));
+ vec.iov_base = &rcv;
+ vec.iov_len = 1;
+
+ timeout = cli_socket->sk->sk_rcvtimeo;
+ cli_socket->sk->sk_rcvtimeo = DB_INIT_TIMEOUT * HZ;
+ ret = kernel_recvmsg(cli_socket, &msg, &vec, 1, 1, 0);
+ cli_socket->sk->sk_rcvtimeo = timeout;
+ *cmd = rcv;
+
+ return ret;
+}
+
+/*
+ * The workqueue handler for vsock work_struct.
+ *
+ * Each worker-pool bound to an actual CPU implements concurrency management
+ * by hooking into the scheduler. The worker-pool is notified whenever an
+ * active worker wakes up or sleeps and keeps track of the number of the
+ * currently runnable workers. Generally, work items are not expected to hog
+ * a CPU and consume many cycles. That means maintaining just enough concurrency
+ * to prevent work processing from stalling should be optimal.
+ *
+ * So it's OK to sleep in a workqueue handler, it won't cause too many worker
+ * threads.
+ */
+static void db_conn_service(struct work_struct *work)
+{
+ struct db_conn_info *conn_info =
+ container_of(work, struct db_conn_info, work);
+ struct db_service_entry *service_entry;
+ int len, ret = -1;
+ char cmd;
+
+ len = db_vsock_recvcmd(conn_info->sock, &cmd);
+ if (len <= 0)
+ goto recv_failed;
+
+ read_lock(&db_service_lock);
+ list_for_each_entry(service_entry, &db_service_list, list) {
+ if (cmd == service_entry->cmd) {
+ ret = service_entry->handler(conn_info->sock);
+ break;
+ }
+ }
+ read_unlock(&db_service_lock);
+
+recv_failed:
+ if (ret) {
+ sock_release(conn_info->sock);
+ pr_info("Client connection closed, error code: %d\n", ret);
+ }
+ kfree(conn_info);
+}
+
+static int db_create_cli_conn(struct socket *sock)
+{
+ struct db_conn_info *conn;
+
+ conn = kmalloc(sizeof(*conn), GFP_KERNEL);
+ if (!conn)
+ return -ENOMEM;
+
+ conn->sock = sock;
+ INIT_WORK(&conn->work, db_conn_service);
+ schedule_work(&conn->work);
+
+ return 0;
+}
+
+static int db_vsock_server(void *data)
+{
+ struct socket *sock;
+ int err;
+
+ sock = db_create_vsock_listener(db_server_port);
+ if (IS_ERR(sock)) {
+ err = PTR_ERR(sock);
+ pr_err("Init server err: %d\n", err);
+ return err;
+ }
+
+ while (!kthread_should_stop()) {
+ struct socket *conn;
+
+ conn = sock_alloc();
+ if (!conn)
+ return -ENOMEM;
+
+ conn->type = sock->type;
+ conn->ops = sock->ops;
+
+ /* 0:propotal 1:kernel */
+ err = sock->ops->accept(sock, conn, 0, 1);
+ if (err < 0) {
+ pr_err("Server accept err: %d\n", err);
+ sock_release(conn);
+ continue;
+ }
+
+ err = db_create_cli_conn(conn);
+ if (err)
+ pr_err("Create client connetion err: %d\n", err);
+ }
+
+ return 0;
+}
+
+static int db_create_service(void)
+{
+ struct task_struct *service;
+ int rc = 0;
+
+ service = kthread_create(db_vsock_server, NULL, "db-vsock-srv");
+ if (IS_ERR(service)) {
+ rc = PTR_ERR(service);
+ pr_err("Server task create failed, err: %d\n", rc);
+ } else {
+ db_service_task = service;
+ wake_up_process(service);
+ }
+ return rc;
+}
+
+static int db_vsock_srv_cmdline_set(const char *device,
+ const struct kernel_param *kp)
+{
+ unsigned int port = 0;
+ int processed, consumed = 0;
+
+ /* Get "@<port>" */
+ processed = sscanf(device, "@%u%n", &port, &consumed);
+ if (processed < 1 || device[consumed] || port == 0 || port > 1024) {
+ pr_err("Using @<port> format and port range (0, 1024].\n");
+ return -EINVAL;
+ }
+
+ db_server_port = port;
+ return 0;
+}
+
+static const struct kernel_param_ops db_vsock_srv_cmdline_param_ops = {
+ .set = db_vsock_srv_cmdline_set,
+};
+
+device_param_cb(port, &db_vsock_srv_cmdline_param_ops, NULL, 0400);
+
+int register_db_vsock_service(const char cmd, db_vsock_svc_handler_t handler)
+{
+ int rc = -EEXIST;
+ struct db_service_entry *service_entry;
+
+ write_lock(&db_service_lock);
+ list_for_each_entry(service_entry, &db_service_list, list) {
+ if (cmd == service_entry->cmd) {
+ rc = -EEXIST;
+ goto out;
+ }
+ }
+
+ service_entry = kzalloc(sizeof(*service_entry), GFP_KERNEL);
+ if (!service_entry) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ service_entry->cmd = cmd;
+ service_entry->handler = handler;
+ list_add_tail(&service_entry->list, &db_service_list);
+ rc = 0;
+out:
+ write_unlock(&db_service_lock);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(register_db_vsock_service);
+
+int unregister_db_vsock_service(const char cmd)
+{
+ int rc = -EEXIST;
+ struct db_service_entry *service_entry, *n;
+
+ write_lock(&db_service_lock);
+ list_for_each_entry_safe(service_entry, n, &db_service_list, list) {
+ if (cmd == service_entry->cmd) {
+ list_del(&service_entry->list);
+ rc = 0;
+ break;
+ }
+ }
+ write_unlock(&db_service_lock);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(unregister_db_vsock_service);
+
+static int __init db_vsock_srv_init(void)
+{
+ return db_create_service();
+}
+
+late_initcall(db_vsock_srv_init);
+
+MODULE_AUTHOR("Alibaba, Inc.");
+MODULE_DESCRIPTION("Dragonball vsock server");
+MODULE_LICENSE("GPL v2");
diff --git a/include/dragonball/upcall_srv.h b/include/dragonball/upcall_srv.h
new file mode 100644
index 000000000000..1c733982cc30
--- /dev/null
+++ b/include/dragonball/upcall_srv.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * db_upcall_srv.h Virtual Sockets Server for Dragonball
+ *
+ * Copyright (C) 2022 Alibaba Cloud, Inc
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#ifndef _DB_UPCALL_SRV_H
+#define _DB_UPCALL_SRV_H
+
+#include <linux/workqueue.h>
+#include <linux/net.h>
+
+/* Vsock port to listen for incoming connections. */
+#define DB_SERVER_PORT 0xDB
+#define DB_RECVBUF_SIZE 0x400
+#define DB_INIT_TIMEOUT 10
+
+/*
+ * Vsock service handler to handle new incoming connections.
+ *
+ * Return:
+ * 0: on success and the callback takes ownership of the sock.
+ * !0: on failure and the callback should keep the sock as is.
+ */
+typedef int (*db_vsock_svc_handler_t) (struct socket *sock);
+
+extern int register_db_vsock_service(const char cmd,
+ db_vsock_svc_handler_t handler);
+extern int unregister_db_vsock_service(const char cmd);
+
+extern struct socket *db_create_vsock_listener(unsigned int port);
+extern int db_vsock_sendmsg(struct socket *sock, char *buf, size_t len);
+extern int db_vsock_recvmsg(struct socket *sock, char *buf, size_t len,
+ int flags);
+
+#endif /* _DB_UPCALL_SRV_H */
--
2.19.1.6.gb485710b

View File

@@ -0,0 +1,330 @@
From 66e67cc3ebbebc9d138fff084c487d6b9d21f60c Mon Sep 17 00:00:00 2001
From: Chao Wu <chaowu@linux.alibaba.com>
Date: Mon, 21 Nov 2022 19:19:26 +0800
Subject: [PATCH 2/4] upcall: introduce device manager upcall service
Different services are registered into upcall server to handle the
request from the client side. This commit introduces devic manager
upcall service and when new message gets into upcall server, cmd `d` is
used for identifying the device manager service.
After a request is sent to device manager service, db_devmgr_handler
will start handle the request. A kthread `db_devmgr_server` will be
created and it will send CONNECT message to the client side to notify
the client start sending message for device management operations.
`db_devmgr_process` will be used for determining which device operations
will be triggered through msg_type. `get_action` will find out the
action for dealing with the operation and `action` fn will execute the
actual device management operation in the device manager service.
Signed-off-by: Liu Jiang <gerry@linux.alibaba.com>
Signed-off-by: Zizheng Bian <zizheng.bian@linux.alibaba.com>
Signed-off-by: Chao Wu <chaowu@linux.alibaba.com>
Signed-off-by: WangYu <WangYu@linux.alibaba.com>
Signed-off-by: Xingjun Liu <xingjun.liu@linux.alibaba.com>
---
drivers/misc/dragonball/upcall_srv/Kconfig | 12 +
drivers/misc/dragonball/upcall_srv/Makefile | 1 +
.../upcall_srv/dragonball_device_manager.c | 235 ++++++++++++++++++
include/dragonball/device_manager.h | 18 ++
4 files changed, 266 insertions(+)
create mode 100644 drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
create mode 100644 include/dragonball/device_manager.h
diff --git a/drivers/misc/dragonball/upcall_srv/Kconfig b/drivers/misc/dragonball/upcall_srv/Kconfig
index b00bf1f8637d..6554a9741c00 100644
--- a/drivers/misc/dragonball/upcall_srv/Kconfig
+++ b/drivers/misc/dragonball/upcall_srv/Kconfig
@@ -12,3 +12,15 @@ config DRAGONBALL_UPCALL_SRV
Sockets communication channels between guest and host/vmm.
If unsure, say N.
+
+config DRAGONBALL_DEVICE_MANAGER
+ bool "Vsock Service to Handle Dragonball Device Management Requests"
+ depends on DRAGONBALL_UPCALL_SRV
+ depends on VIRTIO_VSOCKETS
+ default y
+ help
+ This configure implements a vsock service to handle Dragonball device
+ management requests, such as getting device information, hot-plugging
+ devices etc.
+
+ If unsure, say N.
diff --git a/drivers/misc/dragonball/upcall_srv/Makefile b/drivers/misc/dragonball/upcall_srv/Makefile
index 4102e6c7edef..409c0c11e2e6 100644
--- a/drivers/misc/dragonball/upcall_srv/Makefile
+++ b/drivers/misc/dragonball/upcall_srv/Makefile
@@ -11,3 +11,4 @@
#
obj-$(CONFIG_DRAGONBALL_UPCALL_SRV) += dragonball_upcall_srv.o
+obj-$(CONFIG_DRAGONBALL_DEVICE_MANAGER) += dragonball_device_manager.o
diff --git a/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c b/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
new file mode 100644
index 000000000000..ebcb6ef74285
--- /dev/null
+++ b/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
@@ -0,0 +1,235 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * drivers/misc/dragonball/vsock_srv/dragonball_device_manager.c
+ * vsock service for device management.
+ *
+ * Copyright (C) 2022 Alibaba Cloud, Inc
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#define pr_fmt(fmt) "db-dev-mgr: " fmt
+
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/virtio_mmio.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/cpuhotplug.h>
+#include <asm/cpu.h>
+#include <dragonball/upcall_srv.h>
+#include <dragonball/device_manager.h>
+#ifdef CONFIG_ARM64
+#include <linux/irqdomain.h>
+#include <linux/irq.h>
+#endif
+#include <linux/percpu.h>
+#include <linux/device.h>
+#include <asm/numa.h>
+
+/*
+ * Following designs are adopted to simplify implementation:
+ * 1) fix size messages with padding to ease receiving logic.
+ * 2) binary encoding instead of string encoding because it's on the same host.
+ * 3) synchronous communication in ping-pong mode, one in-fly request at most.
+ * 4) do not support module unloading
+ */
+
+/* These definitions are synchronized with dragonball */
+#define DEV_MGR_MSG_SIZE 0x400
+#define DEVMGR_CMD_BYTE 'd'
+#define DEVMGR_MAGIC_VERSION 0x444D0100 /* 'DM' + Version 1.0 */
+#define SHARED_IRQ_NO 5
+
+/* Type of request and reply messages. */
+enum devmgr_msg_type {
+ CONNECT = 0x00000000,
+ ADD_CPU = 0x00000001,
+ DEL_CPU = 0x00000002,
+ ADD_MEM = 0x00000003,
+ DEL_MEM = 0x00000004,
+ ADD_MMIO = 0x00000005,
+ DEL_MMIO = 0x00000006,
+ ADD_PCI = 0x00000007,
+ DEL_PCI = 0x00000008,
+};
+
+struct devmgr_msg_header {
+ /* magic version for identifying upcall */
+ uint32_t magic_version;
+ /* size of the upcall message */
+ uint32_t msg_size;
+ /* type for the message to identify its usage */
+ uint32_t msg_type;
+ /* flag for extra information */
+ uint32_t msg_flags;
+};
+
+struct devmgr_req {
+ struct devmgr_msg_header msg_header;
+ union {
+ char pad[DEV_MGR_MSG_SIZE - sizeof(struct devmgr_msg_header)];
+ } msg_load;
+};
+
+struct devmgr_reply {
+ struct devmgr_msg_header msg_header;
+ /*
+ * if ret is 0, it means the operation is successful.
+ * if ret is not 0, return value will be error code.
+ */
+ int32_t ret;
+ union {
+ char pad[DEV_MGR_MSG_SIZE - sizeof(struct devmgr_msg_header) - sizeof(int32_t)];
+ } msg_load;
+};
+
+struct task_res {
+ struct task_struct *task;
+ struct socket *sock;
+ struct devmgr_req req;
+ struct devmgr_reply reply;
+};
+
+typedef int (*action_route_t) (struct devmgr_req *req,
+ struct devmgr_reply *rep);
+
+static void _fill_msg_header(struct devmgr_msg_header *msg, uint32_t msg_size,
+ uint32_t msg_type, uint32_t msg_flags)
+{
+ msg->magic_version = DEVMGR_MAGIC_VERSION;
+ msg->msg_size = msg_size;
+ msg->msg_type = msg_type;
+ msg->msg_flags = msg_flags;
+}
+
+static struct {
+ enum devmgr_msg_type cmd;
+ action_route_t fn;
+} opt_map[] = {
+};
+
+static action_route_t get_action(struct devmgr_req *req)
+{
+ int i;
+ action_route_t action = NULL;
+ int size_opt = ARRAY_SIZE(opt_map);
+
+ for (i = 0; i < size_opt; i++) {
+ if (opt_map[i].cmd == req->msg_header.msg_type) {
+ action = opt_map[i].fn;
+ break;
+ }
+ }
+ return action;
+}
+
+static void db_devmgr_process(struct devmgr_req *req,
+ struct devmgr_reply *rep)
+{
+ int err;
+ action_route_t action;
+ struct devmgr_msg_header *req_mh = &req->msg_header;
+ struct devmgr_msg_header *rep_mh = &rep->msg_header;
+
+ if (req_mh->magic_version != DEVMGR_MAGIC_VERSION) {
+ _fill_msg_header(rep_mh, 0, req->msg_header.msg_type, 0);
+ return;
+ }
+
+ action = get_action(req);
+ if (action == NULL) {
+ pr_err("db_devmgr_process : Not found valid command");
+ rep->ret = -1;
+ _fill_msg_header(rep_mh, 0, req->msg_header.msg_type, 0);
+ return;
+ }
+
+ err = action(req, rep);
+ if (err) {
+ pr_err("db_devmgr_process : Command run failed, err: %d", err);
+ rep->ret = err;
+ _fill_msg_header(rep_mh, 0, req->msg_header.msg_type, 0);
+ return;
+ }
+}
+
+static int db_devmgr_server(void *data)
+{
+ struct task_res *res = (struct task_res *)data;
+ struct devmgr_msg_header *rep_mh = &res->reply.msg_header;
+ int len;
+
+ _fill_msg_header(rep_mh, 0, CONNECT, 0);
+ len = db_vsock_sendmsg(res->sock, (char *)&res->reply, DEV_MGR_MSG_SIZE);
+ if (len <= 0) {
+ pr_err("db_devmgr_server : Server send message failed, err: %d", len);
+ sock_release(res->sock);
+ kfree(res);
+ return len;
+ }
+
+ while (!kthread_should_stop()) {
+ len = db_vsock_recvmsg(res->sock, (char *)&res->req,
+ DEV_MGR_MSG_SIZE, 0);
+ if (len <= 0)
+ break;
+
+ /* The result(OK or Error) will fill into res->reply field */
+ db_devmgr_process(&res->req, &res->reply);
+
+ len = db_vsock_sendmsg(res->sock, (char *)&res->reply,
+ DEV_MGR_MSG_SIZE);
+ if (len <= 0)
+ break;
+ }
+
+ /* TODO: check who shutdown the socket, receiving or sending. */
+ sock_release(res->sock);
+ kfree(res);
+ return 0;
+}
+
+static int db_devmgr_handler(struct socket *sock)
+{
+ struct task_res *res;
+ struct task_struct *conn_task;
+
+ /* TODO: ensure singleton, only one server exists */
+ res = kzalloc(sizeof(*res), GFP_KERNEL);
+ if (!res)
+ return -ENOMEM;
+
+ res->sock = sock;
+ conn_task = kthread_create(db_devmgr_server, res, "db_dev_mgr");
+ if (IS_ERR(conn_task)) {
+ pr_err("db_devmgr_handler : Client process thread create failed, err: %d",
+ (int)PTR_ERR(conn_task));
+ goto failed;
+ } else {
+ res->task = conn_task;
+ wake_up_process(conn_task);
+ }
+
+ return 0;
+failed:
+ kfree(res);
+ return PTR_ERR(conn_task);
+}
+
+static int __init db_device_manager_init(void)
+{
+ return register_db_vsock_service(DEVMGR_CMD_BYTE, db_devmgr_handler);
+}
+
+late_initcall(db_device_manager_init);
+
+MODULE_AUTHOR("Alibaba, Inc.");
+MODULE_DESCRIPTION("Dragonball Device Manager");
+MODULE_LICENSE("GPL v2");
diff --git a/include/dragonball/device_manager.h b/include/dragonball/device_manager.h
new file mode 100644
index 000000000000..a1713e9f026d
--- /dev/null
+++ b/include/dragonball/device_manager.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * device_manager.h Device Manager for Dragonball
+ *
+ * Copyright (C) 2022 Alibaba Cloud, Inc
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#ifndef _DB_DEVICE_MANAGER_H
+#define _DB_DEVICE_MANAGER_H
+
+#include <linux/device.h>
+
+#endif /* _DB_DEVICE_MANAGER_H */
--
2.19.1.6.gb485710b

View File

@@ -0,0 +1,301 @@
From 901fb71c77144b170465611617f0f25099d8a780 Mon Sep 17 00:00:00 2001
From: Chao Wu <chaowu@linux.alibaba.com>
Date: Mon, 21 Nov 2022 19:44:50 +0800
Subject: [PATCH 3/4] upcall: add cpu hotplug/hot-unplug into device manager
service
Add cpu hotplug and hot-unplug support into device manager. In the
`devmgr_req` message, `msg_type` ADD_CPU in `msg_header` will trigger
`add_cpu_dev` action and DEL_CPU will trigger `del_cpu_dev` action, and
we use `apic_ids` and `count` delivered in `cpu_dev_info` to notify
which and how many cpus will be hotplugged / hot-unplugged.
`add_cpu_dev` and `del_cpu_dev` will eventually trigger `add_cpu_upcall`
and `del_cpu_upcall` to trigger the cpu hotplug / hot-unplug process in
the kernel. After the cpu hotplug / hot-unplug process,
`cpu_event_notification` will generate device manager reply to the
client side.
Signed-off-by: Liu Jiang <gerry@linux.alibaba.com>
Signed-off-by: Zizheng Bian <zizheng.bian@linux.alibaba.com>
Signed-off-by: Chao Wu <chaowu@linux.alibaba.com>
Signed-off-by: WangYu <WangYu@linux.alibaba.com>
Signed-off-by: Xingjun Liu <xingjun.liu@linux.alibaba.com>
---
drivers/misc/dragonball/upcall_srv/Kconfig | 11 +
.../upcall_srv/dragonball_device_manager.c | 219 ++++++++++++++++++
2 files changed, 230 insertions(+)
diff --git a/drivers/misc/dragonball/upcall_srv/Kconfig b/drivers/misc/dragonball/upcall_srv/Kconfig
index 6554a9741c00..b237882a2928 100644
--- a/drivers/misc/dragonball/upcall_srv/Kconfig
+++ b/drivers/misc/dragonball/upcall_srv/Kconfig
@@ -24,3 +24,14 @@ config DRAGONBALL_DEVICE_MANAGER
devices etc.
If unsure, say N.
+
+config DRAGONBALL_HOTPLUG_CPU
+ bool "CPU hotplug/hotunplug support"
+ depends on DRAGONBALL_DEVICE_MANAGER
+ default y
+ help
+ This configure implements a vCPU hotplug/hotunplug support, vmm
+ should send hotplug request by vsock which follow special data
+ structure with command and parameter to hot-pluging an vCPU.
+
+ If unsure, say N.
diff --git a/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c b/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
index ebcb6ef74285..210ef5d6c9d5 100644
--- a/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
+++ b/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
@@ -75,9 +75,20 @@ struct devmgr_req {
struct devmgr_msg_header msg_header;
union {
char pad[DEV_MGR_MSG_SIZE - sizeof(struct devmgr_msg_header)];
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU)
+ struct {
+ uint8_t count;
+ uint8_t apic_ver;
+ uint8_t apic_ids[256];
+ } cpu_dev_info;
+#endif
} msg_load;
};
+struct cpu_dev_reply_info {
+ uint32_t apic_index;
+};
+
struct devmgr_reply {
struct devmgr_msg_header msg_header;
/*
@@ -87,6 +98,9 @@ struct devmgr_reply {
int32_t ret;
union {
char pad[DEV_MGR_MSG_SIZE - sizeof(struct devmgr_msg_header) - sizeof(int32_t)];
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU)
+ struct cpu_dev_reply_info cpu_dev_info;
+#endif
} msg_load;
};
@@ -109,10 +123,215 @@ static void _fill_msg_header(struct devmgr_msg_header *msg, uint32_t msg_size,
msg->msg_flags = msg_flags;
}
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_X86_64)
+static int get_cpu_id(int apic_id)
+{
+ int i;
+
+ for (i = 0; i < num_processors; i++) {
+ if (cpu_physical_id(i) == apic_id)
+ return i;
+ }
+ return -1;
+}
+
+/**
+ * Return the first failed hotplug index of the apic_ids to dragonball.
+ * If it is not equal to the count of all hotplug needed vcpus,
+ * we will rollback the vcpus from apics_ids[0] to apic_ids[i-1] in dragonball.
+ */
+static void cpu_event_notification(
+ uint8_t apic_ids_index,
+ int ret,
+ uint32_t action_type,
+ struct devmgr_reply *rep)
+{
+ pr_info("cpu event notification: apic ids index %d", apic_ids_index);
+ rep->msg_load.cpu_dev_info.apic_index = apic_ids_index;
+ rep->ret = ret;
+ _fill_msg_header(&rep->msg_header,
+ sizeof(struct cpu_dev_reply_info), action_type, 0);
+}
+#endif
+
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_X86_64)
+static int add_cpu_upcall(int apic_id, uint8_t apic_ver)
+{
+ int cpu_id, node_id;
+ int ret;
+
+ pr_info("adding vcpu apic_id %d", apic_id);
+
+ /**
+ * Get the mutex lock for hotplug and cpu update and cpu write lock.
+ * So that other threads won't influence the hotplug process.
+ */
+ lock_device_hotplug();
+ cpu_maps_update_begin();
+ cpu_hotplug_begin();
+
+ cpu_id = generic_processor_info(apic_id, apic_ver);
+ if (cpu_id < 0) {
+ pr_err("cpu (apic id %d) cannot be added, generic processor info failed", apic_id);
+ ret = -EINVAL;
+ goto rollback_generic_cpu;
+ }
+
+ /* update numa mapping for hot-plugged cpus. */
+ node_id = numa_cpu_node(cpu_id);
+ if (node_id != NUMA_NO_NODE)
+ numa_set_node(cpu_id, node_id);
+
+ ret = arch_register_cpu(cpu_id);
+ if (ret) {
+ pr_err("cpu %d cannot be added, register cpu failed %d", cpu_id, ret);
+ goto rollback_register_cpu;
+ }
+
+ cpu_hotplug_done();
+ cpu_maps_update_done();
+ unlock_device_hotplug();
+
+ ret = add_cpu(cpu_id);
+ if (ret) {
+ pr_err("cpu %d cannot be added, cpu up failed: %d", cpu_id, ret);
+ goto rollback_cpu_up;
+ }
+ return ret;
+
+rollback_cpu_up:
+ arch_unregister_cpu(cpu_id);
+ set_cpu_present(cpu_id, false);
+ per_cpu(x86_cpu_to_apicid, cpu_id) = -1;
+ num_processors--;
+ return ret;
+
+rollback_register_cpu:
+ set_cpu_present(cpu_id, false);
+ per_cpu(x86_cpu_to_apicid, cpu_id) = -1;
+ num_processors--;
+rollback_generic_cpu:
+ cpu_hotplug_done();
+ cpu_maps_update_done();
+ unlock_device_hotplug();
+ return ret;
+}
+
+static int del_cpu_upcall(int apic_id)
+{
+ int cpu_id = get_cpu_id(apic_id);
+ int ret;
+
+ if (cpu_id == 0) {
+ pr_err("cannot del bootstrap processor.");
+ return -EINVAL;
+ }
+ pr_info("deleting vcpu %d", cpu_id);
+ ret = remove_cpu(cpu_id);
+ if (ret) {
+ pr_err("del vcpu failed, err: %d", ret);
+ return ret;
+ }
+
+ lock_device_hotplug();
+ cpu_maps_update_begin();
+ cpu_hotplug_begin();
+
+ arch_unregister_cpu(cpu_id);
+ set_cpu_present(cpu_id, false);
+ per_cpu(x86_cpu_to_apicid, cpu_id) = -1;
+ num_processors--;
+
+ cpu_hotplug_done();
+ cpu_maps_update_done();
+ unlock_device_hotplug();
+
+ return ret;
+}
+
+static int add_cpu_dev(struct devmgr_req *req,
+ struct devmgr_reply *rep)
+{
+ int ret;
+ uint8_t i;
+ int apic_id;
+
+ uint8_t count = req->msg_load.cpu_dev_info.count;
+ uint8_t apic_ver = req->msg_load.cpu_dev_info.apic_ver;
+ uint8_t *apic_ids = req->msg_load.cpu_dev_info.apic_ids;
+
+ pr_info("add vcpu number: %d", count);
+
+ for (i = 0; i < count; ++i) {
+ apic_id = apic_ids[i];
+ if (get_cpu_id(apic_id) != -1) {
+ pr_err("cpu cannot be added: apci_id %d is already been used.", apic_id);
+ ret = -EINVAL;
+ return ret;
+ }
+ }
+
+ for (i = 0; i < count; ++i) {
+ apic_id = apic_ids[i];
+ ret = add_cpu_upcall(apic_id, apic_ver);
+ if (ret != 0)
+ break;
+ }
+
+ if (!ret)
+ cpu_event_notification(i, ret, ADD_CPU, rep);
+ return ret;
+}
+
+static int del_cpu_dev(struct devmgr_req *req,
+ struct devmgr_reply *rep)
+{
+ int ret;
+ uint8_t i;
+ int cpu_id;
+
+ uint8_t count = req->msg_load.cpu_dev_info.count;
+ uint8_t *apic_ids = req->msg_load.cpu_dev_info.apic_ids;
+
+ pr_info("del vcpu number : %d", count);
+
+ if (count >= num_processors) {
+ pr_err("cpu del parameter check error: cannot remove all vcpus");
+ ret = -EINVAL;
+ cpu_event_notification(0, ret, DEL_CPU, rep);
+ return ret;
+ }
+
+ for (i = 0; i < count; ++i) {
+ cpu_id = get_cpu_id(apic_ids[i]);
+ if (!cpu_possible(cpu_id)) {
+ pr_err("cpu %d cannot be deleted: cpu not possible", cpu_id);
+ ret = -EINVAL;
+ cpu_event_notification(0, ret, DEL_CPU, rep);
+ return ret;
+ }
+ }
+
+ for (i = 0; i < count; ++i) {
+ ret = del_cpu_upcall(apic_ids[i]);
+ if (ret != 0)
+ break;
+ }
+
+ if (!ret)
+ cpu_event_notification(i, ret, DEL_CPU, rep);
+ return ret;
+}
+#endif
+
static struct {
enum devmgr_msg_type cmd;
action_route_t fn;
} opt_map[] = {
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_X86_64)
+ {ADD_CPU, add_cpu_dev},
+ {DEL_CPU, del_cpu_dev},
+#endif
};
static action_route_t get_action(struct devmgr_req *req)
--
2.19.1.6.gb485710b

View File

@@ -0,0 +1,415 @@
From 90ced3463137076b9df2200b5c6ad720660c6bfc Mon Sep 17 00:00:00 2001
From: Chao Wu <chaowu@linux.alibaba.com>
Date: Wed, 23 Nov 2022 19:23:47 +0800
Subject: [PATCH 4/4] upcall: add virtio-mmio hotplug/hot-unplug into device
manager service
Add virtio-mmio hotplug/hot-unplug support into device manager. In the
`devmgr_req` message, `msg_type` ADD_MMIO in `msg_header` will trigger
`add_mmio_dev` action and DEL_MMIO will trigger `del_mmio_dev` action,
and we use `mmio_base`, `mmio_size` and `mmio_irq` delivered in
`add_mmio_dev` to notify how to hotplug the virtio-mmio device
Also `virtio_mmio_add_device` and `virtio_mmio_del_device` are
introduced under /drivers/virtio/virtio_mmio.c, and we extract
`vm_add_device` from `vm_cmdline_set` to help hotplug virtio-mmio
device.
Signed-off-by: Liu Jiang <gerry@linux.alibaba.com>
Signed-off-by: Zizheng Bian <zizheng.bian@linux.alibaba.com>
Signed-off-by: Chao Wu <chaowu@linux.alibaba.com>
Signed-off-by: WangYu <WangYu@linux.alibaba.com>
Signed-off-by: Xingjun Liu <xingjun.liu@linux.alibaba.com>
---
drivers/misc/dragonball/upcall_srv/Kconfig | 12 ++
.../upcall_srv/dragonball_device_manager.c | 112 ++++++++++++++
drivers/virtio/Kconfig | 14 ++
drivers/virtio/virtio_mmio.c | 138 +++++++++++++++---
include/dragonball/device_manager.h | 5 +
5 files changed, 259 insertions(+), 22 deletions(-)
diff --git a/drivers/misc/dragonball/upcall_srv/Kconfig b/drivers/misc/dragonball/upcall_srv/Kconfig
index b237882a2928..fc83f03c2edd 100644
--- a/drivers/misc/dragonball/upcall_srv/Kconfig
+++ b/drivers/misc/dragonball/upcall_srv/Kconfig
@@ -25,6 +25,18 @@ config DRAGONBALL_DEVICE_MANAGER
If unsure, say N.
+config DRAGONBALL_HOTPLUG_VIRTIO_MMIO
+ bool "Virtio-MMIO device hotplug/hotunplug support"
+ depends on DRAGONBALL_DEVICE_MANAGER
+ default y
+ help
+ This configure implements a Virtio-MMIO device hotplug/hotunplug
+ support, vmm should send hotplug request by vsock which follow
+ special data structure with command and parameter to hot-pluging
+ an MMIO device.
+
+ If unsure, say N.
+
config DRAGONBALL_HOTPLUG_CPU
bool "CPU hotplug/hotunplug support"
depends on DRAGONBALL_DEVICE_MANAGER
diff --git a/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c b/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
index 210ef5d6c9d5..5a95b2ba63e8 100644
--- a/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
+++ b/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
@@ -75,6 +75,13 @@ struct devmgr_req {
struct devmgr_msg_header msg_header;
union {
char pad[DEV_MGR_MSG_SIZE - sizeof(struct devmgr_msg_header)];
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_VIRTIO_MMIO)
+ struct {
+ uint64_t mmio_base;
+ uint64_t mmio_size;
+ uint32_t mmio_irq;
+ } add_mmio_dev;
+#endif
#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU)
struct {
uint8_t count;
@@ -98,6 +105,10 @@ struct devmgr_reply {
int32_t ret;
union {
char pad[DEV_MGR_MSG_SIZE - sizeof(struct devmgr_msg_header) - sizeof(int32_t)];
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_VIRTIO_MMIO)
+ struct {
+ } add_mmio_dev;
+#endif
#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU)
struct cpu_dev_reply_info cpu_dev_info;
#endif
@@ -114,6 +125,62 @@ struct task_res {
typedef int (*action_route_t) (struct devmgr_req *req,
struct devmgr_reply *rep);
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_VIRTIO_MMIO)
+#ifdef CONFIG_ARM64
+static uint32_t get_device_virq(uint32_t pin)
+{
+ uint32_t virq;
+ struct device_node *node;
+ struct irq_fwspec dummy_fwspec = {
+ .param_count = 3,
+ .param = {0, 0, IRQ_TYPE_EDGE_RISING}
+ };
+
+ node = of_find_node_by_name(NULL, "intc");
+ if (!node) {
+ pr_err("interrupt controller device node not found.");
+ return 0;
+ }
+ dummy_fwspec.param[1] = pin;
+ dummy_fwspec.fwnode = of_node_to_fwnode(node);
+ virq = irq_create_fwspec_mapping(&dummy_fwspec);
+ of_node_put(node);
+ return virq;
+}
+#elif defined(CONFIG_X86_64)
+static inline uint32_t get_device_virq(uint32_t irq)
+{
+ return irq;
+}
+#endif
+
+static int get_dev_resource(struct devmgr_req *req, struct resource *res)
+{
+ uint64_t base = req->msg_load.add_mmio_dev.mmio_base;
+ uint64_t size = req->msg_load.add_mmio_dev.mmio_size;
+ uint32_t irq = req->msg_load.add_mmio_dev.mmio_irq;
+ uint32_t virq;
+
+ if (req->msg_header.msg_size != sizeof(req->msg_load.add_mmio_dev))
+ return -EINVAL;
+
+ res[0].flags = IORESOURCE_MEM;
+ res[0].start = base;
+ res[0].end = base + size - 1;
+ res[1].flags = IORESOURCE_IRQ;
+ virq = get_device_virq(irq);
+ if (!virq)
+ return -EINVAL;
+ res[1].start = res[1].end = virq;
+
+ /* detect the irq sharing mode */
+ if (irq == SHARED_IRQ_NO)
+ res[1].flags |= IORESOURCE_IRQ_SHAREABLE;
+
+ return 0;
+}
+#endif
+
static void _fill_msg_header(struct devmgr_msg_header *msg, uint32_t msg_size,
uint32_t msg_type, uint32_t msg_flags)
{
@@ -154,6 +221,47 @@ static void cpu_event_notification(
}
#endif
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_VIRTIO_MMIO)
+static int add_mmio_dev(struct devmgr_req *req,
+ struct devmgr_reply *rep)
+{
+ int ret;
+ struct resource res[2] = {};
+ struct devmgr_msg_header *rep_mh = &rep->msg_header;
+
+ ret = get_dev_resource(req, res);
+ if (ret)
+ return ret;
+
+ ret = virtio_mmio_add_device(res, ARRAY_SIZE(res));
+ if (!ret) {
+ rep->ret = ret;
+ _fill_msg_header(rep_mh, 0, ADD_MMIO, 0);
+ }
+ return ret;
+}
+
+static int del_mmio_dev(struct devmgr_req *req,
+ struct devmgr_reply *rep)
+{
+ int ret;
+ struct resource res[2] = {};
+ struct devmgr_msg_header *rep_mh = &rep->msg_header;
+
+ ret = get_dev_resource(req, res);
+ if (ret)
+ return ret;
+
+ ret = virtio_mmio_del_device(res, ARRAY_SIZE(res));
+ if (!ret) {
+ rep->ret = ret;
+ _fill_msg_header(rep_mh, 0, DEL_MMIO, 0);
+ }
+ return ret;
+}
+#endif
+
+
#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_X86_64)
static int add_cpu_upcall(int apic_id, uint8_t apic_ver)
{
@@ -328,6 +436,10 @@ static struct {
enum devmgr_msg_type cmd;
action_route_t fn;
} opt_map[] = {
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_VIRTIO_MMIO)
+ {ADD_MMIO, add_mmio_dev},
+ {DEL_MMIO, del_mmio_dev},
+#endif
#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_X86_64)
{ADD_CPU, add_cpu_dev},
{DEL_CPU, del_cpu_dev},
diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
index 7b41130d3f35..084de9827b5d 100644
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
@@ -139,4 +139,18 @@ config VIRTIO_DMA_SHARED_BUFFER
This option adds a flavor of dma buffers that are backed by
virtio resources.
+config VIRTIO_MMIO_DRAGONBALL
+ bool "Enable features for Dragonball virtio MMIO devcies"
+ default n
+ depends on VIRTIO_MMIO
+ depends on X86_64 || ARM64
+ select X86_PLATFORM_MSI
+ select VIRTIO_MMIO_MSI
+ help
+ The Dragonball VMM implements several optimizations for MMIO virtio
+ devices. This option enables support of those optimization features:
+ - virtio-mmio hotplug through upcall
+
+ If unsure, say N
+
endif # VIRTIO_MENU
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index 238383ff1064..c29d533bd4a8 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -652,16 +652,41 @@ static struct device vm_cmdline_parent = {
static int vm_cmdline_parent_registered;
static int vm_cmdline_id;
+static int vm_add_device(struct resource *resources, size_t res_size)
+{
+ int err;
+ struct platform_device *pdev;
+
+ if (!vm_cmdline_parent_registered) {
+ err = device_register(&vm_cmdline_parent);
+ if (err) {
+ pr_err("Failed to register parent device!\n");
+ return err;
+ }
+ vm_cmdline_parent_registered = 1;
+ }
+
+ pr_info("Registering device virtio-mmio.%d at 0x%llx-0x%llx, IRQ %d.\n",
+ vm_cmdline_id,
+ (unsigned long long)resources[0].start,
+ (unsigned long long)resources[0].end,
+ (int)resources[1].start);
+
+ pdev = platform_device_register_resndata(&vm_cmdline_parent,
+ "virtio-mmio", vm_cmdline_id++,
+ resources, res_size, NULL, 0);
+
+ return PTR_ERR_OR_ZERO(pdev);
+}
+
static int vm_cmdline_set(const char *device,
const struct kernel_param *kp)
{
- int err;
struct resource resources[2] = {};
char *str;
long long int base, size;
unsigned int irq;
int processed, consumed = 0;
- struct platform_device *pdev;
/* Consume "size" part of the command line parameter */
size = memparse(device, &str);
@@ -686,26 +711,7 @@ static int vm_cmdline_set(const char *device,
resources[1].flags = IORESOURCE_IRQ;
resources[1].start = resources[1].end = irq;
- if (!vm_cmdline_parent_registered) {
- err = device_register(&vm_cmdline_parent);
- if (err) {
- pr_err("Failed to register parent device!\n");
- return err;
- }
- vm_cmdline_parent_registered = 1;
- }
-
- pr_info("Registering device virtio-mmio.%d at 0x%llx-0x%llx, IRQ %d.\n",
- vm_cmdline_id,
- (unsigned long long)resources[0].start,
- (unsigned long long)resources[0].end,
- (int)resources[1].start);
-
- pdev = platform_device_register_resndata(&vm_cmdline_parent,
- "virtio-mmio", vm_cmdline_id++,
- resources, ARRAY_SIZE(resources), NULL, 0);
-
- return PTR_ERR_OR_ZERO(pdev);
+ return vm_add_device(resources, ARRAY_SIZE(resources));
}
static int vm_cmdline_get_device(struct device *dev, void *data)
@@ -755,6 +761,94 @@ static void vm_unregister_cmdline_devices(void)
}
}
+#ifdef CONFIG_DRAGONBALL_DEVICE_MANAGER
+static int vm_match_device(struct device *dev, void *data)
+{
+ struct resource *resource = (struct resource *)data;
+ struct platform_device *pdev = to_platform_device(dev);
+
+ if ((pdev->resource[0].start == resource[0].start) &&
+ (pdev->resource[0].end == resource[0].end) &&
+ (pdev->resource[1].start == resource[1].start))
+ return 1;
+ return 0;
+}
+
+static struct device *vm_find_device(struct resource *res)
+{
+ return device_find_child(&vm_cmdline_parent, res, vm_match_device);
+}
+
+static int vm_device_overlap(struct device *dev, void *data)
+{
+ struct resource *res = (struct resource *)data;
+ struct platform_device *pdev = to_platform_device(dev);
+
+ /* Detect IRQ number conflicts except shared IRQs. */
+ if (!(res[1].flags & IORESOURCE_IRQ_SHAREABLE) &&
+ (pdev->resource[1].start == res[1].start)) {
+ return 1;
+ }
+
+ /* Detect device MMIO addresses overlapping */
+ if ((pdev->resource[0].start < res[0].end) &&
+ (pdev->resource[0].end > res[0].start)) {
+ return 1;
+ }
+
+ return 0;
+}
+
+static struct device *vm_detect_resource(struct resource *res)
+{
+ /* return NULL if no resource overlapped */
+ return device_find_child(&vm_cmdline_parent, res, vm_device_overlap);
+}
+
+int virtio_mmio_add_device(struct resource *resources, size_t res_size)
+{
+ int err;
+ struct device *dev;
+
+ if (res_size < 2 || !resources)
+ return -EINVAL;
+
+ dev = vm_detect_resource(resources);
+ if (dev) {
+ put_device(dev);
+ return -EEXIST;
+ }
+
+ lock_device_hotplug();
+ err = vm_add_device(resources, res_size);
+ unlock_device_hotplug();
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(virtio_mmio_add_device);
+
+int virtio_mmio_del_device(struct resource *resources, size_t res_size)
+{
+ int ret;
+ struct device *dev;
+
+ if (res_size < 2 || !resources)
+ return -EINVAL;
+
+ dev = vm_find_device(resources);
+ if (!dev)
+ return -ENODEV;
+
+ put_device(dev);
+ lock_device_hotplug();
+ ret = vm_unregister_cmdline_device(dev, NULL);
+ unlock_device_hotplug();
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(virtio_mmio_del_device);
+#endif /* CONFIG_DRAGONBALL_DEVICE_MANAGER */
+
#else
static void vm_unregister_cmdline_devices(void)
diff --git a/include/dragonball/device_manager.h b/include/dragonball/device_manager.h
index a1713e9f026d..785761c47f97 100644
--- a/include/dragonball/device_manager.h
+++ b/include/dragonball/device_manager.h
@@ -15,4 +15,9 @@
#include <linux/device.h>
+#if defined(CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES)
+int virtio_mmio_add_device(struct resource *resources, size_t res_size);
+int virtio_mmio_del_device(struct resource *resources, size_t res_size);
+#endif
+
#endif /* _DB_DEVICE_MANAGER_H */
--
2.19.1.6.gb485710b

View File

@@ -180,6 +180,11 @@ assets:
url: "https://cdn.kernel.org/pub/linux/kernel/v5.x/"
version: "v5.15.7"
dragonball-kernel-experimental:
description: "Linux kernel with Dragonball VMM optimizations like upcall"
url: "https://cdn.kernel.org/pub/linux/kernel/v5.x/"
version: "v5.10.25"
externals:
description: "Third-party projects used by the system"