Merge pull request #6012 from GeorginaKin/CCv0

CCv0: Merge main into CCv0 branch
This commit is contained in:
Fabiano Fidêncio
2023-01-09 22:51:33 +01:00
committed by GitHub
91 changed files with 1984 additions and 2475 deletions

View File

@@ -43,6 +43,16 @@ function install_yq() {
"aarch64")
goarch=arm64
;;
"arm64")
# If we're on an apple silicon machine, just assign amd64.
# The version of yq we use doesn't have a darwin arm build,
# but Rosetta can come to the rescue here.
if [ $goos == "Darwin" ]; then
goarch=amd64
else
goarch=arm64
fi
;;
"ppc64le")
goarch=ppc64le
;;
@@ -64,7 +74,7 @@ function install_yq() {
fi
## NOTE: ${var,,} => gives lowercase value of var
local yq_url="https://${yq_pkg}/releases/download/${yq_version}/yq_${goos,,}_${goarch}"
local yq_url="https://${yq_pkg}/releases/download/${yq_version}/yq_${goos}_${goarch}"
curl -o "${yq_path}" -LSsf "${yq_url}"
[ $? -ne 0 ] && die "Download ${yq_url} failed"
chmod +x "${yq_path}"

View File

@@ -71,12 +71,6 @@ To use containerd, modify the `--container-runtime` argument:
> **Notes:**
> - Adjust the `--memory 6144` line to suit your environment and requirements. Kata Containers default to
> requesting 2048MB per container. We recommended you supply more than that to the Minikube node.
> - Prior to Minikube/Kubernetes v1.14, the beta `RuntimeClass` feature also needed enabling with
> the following.
>
> | what | why |
> | ---- | --- |
> | `--feature-gates=RuntimeClass=true` | Kata needs to use the `RuntimeClass` Kubernetes feature |
The full command is therefore:
@@ -138,17 +132,9 @@ $ kubectl -n kube-system exec ${podname} -- ps -ef | fgrep infinity
## Enabling Kata Containers
> **Note:** Only Minikube/Kubernetes versions <= 1.13 require this step. Since version
> v1.14, the `RuntimeClass` is enabled by default. Performing this step on Kubernetes > v1.14 is
> however benign.
Now you have installed the Kata Containers components in the Minikube node. Next, you need to configure
Kubernetes `RuntimeClass` to know when to use Kata Containers to run a pod.
```sh
$ kubectl apply -f https://raw.githubusercontent.com/kubernetes/node-api/master/manifests/runtimeclass_crd.yaml > runtimeclass_crd.yaml
```
### Register the runtime
Now register the `kata qemu` runtime with that class. This should result in no errors:

1
src/agent/Cargo.lock generated
View File

@@ -4196,6 +4196,7 @@ dependencies = [
"cgroups-rs",
"futures",
"inotify",
"kata-sys-util",
"lazy_static",
"libc",
"libseccomp",

View File

@@ -11,6 +11,7 @@ serde_json = "1.0.39"
serde_derive = "1.0.91"
oci = { path = "../../libs/oci" }
protocols = { path ="../../libs/protocols" }
kata-sys-util = { path = "../../libs/kata-sys-util" }
caps = "0.5.0"
nix = "0.24.2"
scopeguard = "1.0.0"

View File

@@ -6,7 +6,7 @@
use anyhow::{anyhow, Context, Result};
use libc::pid_t;
use oci::{ContainerState, LinuxDevice, LinuxIdMapping};
use oci::{Hook, Linux, LinuxNamespace, LinuxResources, Spec};
use oci::{Linux, LinuxNamespace, LinuxResources, Spec};
use std::clone::Clone;
use std::ffi::CString;
use std::fmt::Display;
@@ -67,6 +67,9 @@ use rlimit::{setrlimit, Resource, Rlim};
use tokio::io::AsyncBufReadExt;
use tokio::sync::Mutex;
use kata_sys_util::hooks::HookStates;
use kata_sys_util::validate::valid_env;
pub const EXEC_FIFO_FILENAME: &str = "exec.fifo";
const INIT: &str = "INIT";
@@ -1098,12 +1101,14 @@ impl BaseContainer for LinuxContainer {
}
}
if spec.hooks.is_some() {
info!(self.logger, "poststop");
let hooks = spec.hooks.as_ref().unwrap();
for h in hooks.poststop.iter() {
execute_hook(&self.logger, h, &st).await?;
}
// guest Poststop hook
// * should be executed after the container is deleted but before the delete operation returns
// * the executable file is in agent namespace
// * should also be executed in agent namespace.
if let Some(hooks) = spec.hooks.as_ref() {
info!(self.logger, "guest Poststop hook");
let mut hook_states = HookStates::new();
hook_states.execute_hooks(&hooks.poststop, Some(st))?;
}
self.status.transition(ContainerState::Stopped);
@@ -1149,16 +1154,14 @@ impl BaseContainer for LinuxContainer {
.ok_or_else(|| anyhow!("OCI spec was not found"))?;
let st = self.oci_state()?;
// run poststart hook
if spec.hooks.is_some() {
info!(self.logger, "poststart hook");
let hooks = spec
.hooks
.as_ref()
.ok_or_else(|| anyhow!("OCI hooks were not found"))?;
for h in hooks.poststart.iter() {
execute_hook(&self.logger, h, &st).await?;
}
// guest Poststart hook
// * should be executed after the container is started but before the delete operation returns
// * the executable file is in agent namespace
// * should also be executed in agent namespace.
if let Some(hooks) = spec.hooks.as_ref() {
info!(self.logger, "guest Poststart hook");
let mut hook_states = HookStates::new();
hook_states.execute_hooks(&hooks.poststart, Some(st))?;
}
unistd::close(fd)?;
@@ -1379,13 +1382,14 @@ async fn join_namespaces(
info!(logger, "get ready to run prestart hook!");
// run prestart hook
if spec.hooks.is_some() {
info!(logger, "prestart hook");
let hooks = spec.hooks.as_ref().unwrap();
for h in hooks.prestart.iter() {
execute_hook(&logger, h, st).await?;
}
// guest Prestart hook
// * should be executed during the start operation, and before the container command is executed
// * the executable file is in agent namespace
// * should also be executed in agent namespace.
if let Some(hooks) = spec.hooks.as_ref() {
info!(logger, "guest Prestart hook");
let mut hook_states = HookStates::new();
hook_states.execute_hooks(&hooks.prestart, Some(st.clone()))?;
}
// notify child run prestart hooks completed
@@ -1565,143 +1569,6 @@ fn set_sysctls(sysctls: &HashMap<String, String>) -> Result<()> {
Ok(())
}
use std::process::Stdio;
use std::time::Duration;
use tokio::io::{AsyncReadExt, AsyncWriteExt};
pub async fn execute_hook(logger: &Logger, h: &Hook, st: &OCIState) -> Result<()> {
let logger = logger.new(o!("action" => "execute-hook"));
let binary = PathBuf::from(h.path.as_str());
let path = binary.canonicalize()?;
if !path.exists() {
return Err(anyhow!("Path {:?} does not exist", path));
}
let mut args = h.args.clone();
// the hook.args[0] is the hook binary name which shouldn't be included
// in the Command.args
if args.len() > 1 {
args.remove(0);
}
// all invalid envs will be omitted, only valid envs will be passed to hook.
let env: HashMap<&str, &str> = h.env.iter().filter_map(|e| valid_env(e)).collect();
// Avoid the exit signal to be reaped by the global reaper.
let _wait_locker = WAIT_PID_LOCKER.lock().await;
let mut child = tokio::process::Command::new(path)
.args(args.iter())
.envs(env.iter())
.kill_on_drop(true)
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()?;
// default timeout 10s
let mut timeout: u64 = 10;
// if timeout is set if hook, then use the specified value
if let Some(t) = h.timeout {
if t > 0 {
timeout = t as u64;
}
}
let state = serde_json::to_string(st)?;
let path = h.path.clone();
let join_handle = tokio::spawn(async move {
if let Some(mut stdin) = child.stdin.take() {
match stdin.write_all(state.as_bytes()).await {
Ok(_) => {}
Err(e) => {
info!(logger, "write to child stdin failed: {:?}", e);
}
}
}
// read something from stdout and stderr for debug
if let Some(stdout) = child.stdout.as_mut() {
let mut out = String::new();
match stdout.read_to_string(&mut out).await {
Ok(_) => {
info!(logger, "child stdout: {}", out.as_str());
}
Err(e) => {
info!(logger, "read from child stdout failed: {:?}", e);
}
}
}
let mut err = String::new();
if let Some(stderr) = child.stderr.as_mut() {
match stderr.read_to_string(&mut err).await {
Ok(_) => {
info!(logger, "child stderr: {}", err.as_str());
}
Err(e) => {
info!(logger, "read from child stderr failed: {:?}", e);
}
}
}
match child.wait().await {
Ok(exit) => {
let code = exit
.code()
.ok_or_else(|| anyhow!("hook exit status has no status code"))?;
if code != 0 {
error!(
logger,
"hook {} exit status is {}, error message is {}", &path, code, err
);
return Err(anyhow!(nix::Error::UnknownErrno));
}
debug!(logger, "hook {} exit status is 0", &path);
Ok(())
}
Err(e) => Err(anyhow!(
"wait child error: {} {}",
e,
e.raw_os_error().unwrap()
)),
}
});
match tokio::time::timeout(Duration::new(timeout, 0), join_handle).await {
Ok(r) => r.unwrap(),
Err(_) => Err(anyhow!(nix::Error::ETIMEDOUT)),
}
}
// valid environment variables according to https://doc.rust-lang.org/std/env/fn.set_var.html#panics
fn valid_env(e: &str) -> Option<(&str, &str)> {
// wherther key or value will contain NULL char.
if e.as_bytes().contains(&b'\0') {
return None;
}
let v: Vec<&str> = e.splitn(2, '=').collect();
// key can't hold an `equal` sign, but value can
if v.len() != 2 {
return None;
}
let (key, value) = (v[0].trim(), v[1].trim());
// key can't be empty
if key.is_empty() {
return None;
}
Some((key, value))
}
#[cfg(test)]
mod tests {
use super::*;
@@ -1712,7 +1579,6 @@ mod tests {
use std::os::unix::io::AsRawFd;
use tempfile::tempdir;
use test_utils::skip_if_not_root;
use tokio::process::Command;
macro_rules! sl {
() => {
@@ -1720,113 +1586,6 @@ mod tests {
};
}
async fn which(cmd: &str) -> String {
let output: std::process::Output = Command::new("which")
.arg(cmd)
.output()
.await
.expect("which command failed to run");
match String::from_utf8(output.stdout) {
Ok(v) => v.trim_end_matches('\n').to_string(),
Err(e) => panic!("Invalid UTF-8 sequence: {}", e),
}
}
#[tokio::test]
async fn test_execute_hook() {
let temp_file = "/tmp/test_execute_hook";
let touch = which("touch").await;
defer!(fs::remove_file(temp_file).unwrap(););
let invalid_str = vec![97, b'\0', 98];
let invalid_string = std::str::from_utf8(&invalid_str).unwrap();
let invalid_env = format!("{}=value", invalid_string);
execute_hook(
&slog_scope::logger(),
&Hook {
path: touch,
args: vec!["touch".to_string(), temp_file.to_string()],
env: vec![invalid_env],
timeout: Some(10),
},
&OCIState {
version: "1.2.3".to_string(),
id: "321".to_string(),
status: ContainerState::Running,
pid: 2,
bundle: "".to_string(),
annotations: Default::default(),
},
)
.await
.unwrap();
assert_eq!(Path::new(&temp_file).exists(), true);
}
#[tokio::test]
async fn test_execute_hook_with_error() {
let ls = which("ls").await;
let res = execute_hook(
&slog_scope::logger(),
&Hook {
path: ls,
args: vec!["ls".to_string(), "/tmp/not-exist".to_string()],
env: vec![],
timeout: None,
},
&OCIState {
version: "1.2.3".to_string(),
id: "321".to_string(),
status: ContainerState::Running,
pid: 2,
bundle: "".to_string(),
annotations: Default::default(),
},
)
.await;
let expected_err = nix::Error::UnknownErrno;
assert_eq!(
res.unwrap_err().downcast::<nix::Error>().unwrap(),
expected_err
);
}
#[tokio::test]
async fn test_execute_hook_with_timeout() {
let sleep = which("sleep").await;
let res = execute_hook(
&slog_scope::logger(),
&Hook {
path: sleep,
args: vec!["sleep".to_string(), "2".to_string()],
env: vec![],
timeout: Some(1),
},
&OCIState {
version: "1.2.3".to_string(),
id: "321".to_string(),
status: ContainerState::Running,
pid: 2,
bundle: "".to_string(),
annotations: Default::default(),
},
)
.await;
let expected_err = nix::Error::ETIMEDOUT;
assert_eq!(
res.unwrap_err().downcast::<nix::Error>().unwrap(),
expected_err
);
}
#[test]
fn test_status_transtition() {
let mut status = ContainerStatus::new();
@@ -2141,49 +1900,4 @@ mod tests {
let ret = do_init_child(std::io::stdin().as_raw_fd());
assert!(ret.is_err(), "Expecting Err, Got {:?}", ret);
}
#[test]
fn test_valid_env() {
let env = valid_env("a=b=c");
assert_eq!(Some(("a", "b=c")), env);
let env = valid_env("a=b");
assert_eq!(Some(("a", "b")), env);
let env = valid_env("a =b");
assert_eq!(Some(("a", "b")), env);
let env = valid_env(" a =b");
assert_eq!(Some(("a", "b")), env);
let env = valid_env("a= b");
assert_eq!(Some(("a", "b")), env);
let env = valid_env("a=b ");
assert_eq!(Some(("a", "b")), env);
let env = valid_env("a=b c ");
assert_eq!(Some(("a", "b c")), env);
let env = valid_env("=b");
assert_eq!(None, env);
let env = valid_env("a=");
assert_eq!(Some(("a", "")), env);
let env = valid_env("a==");
assert_eq!(Some(("a", "=")), env);
let env = valid_env("a");
assert_eq!(None, env);
let invalid_str = vec![97, b'\0', 98];
let invalid_string = std::str::from_utf8(&invalid_str).unwrap();
let invalid_env = format!("{}=value", invalid_string);
let env = valid_env(&invalid_env);
assert_eq!(None, env);
let invalid_env = format!("key={}", invalid_string);
let env = valid_env(&invalid_env);
assert_eq!(None, env);
}
}

View File

@@ -228,7 +228,7 @@ mod tests {
assert!(c.max_memslots >= 32);
let kvm = Kvm::new().unwrap();
let f = unsafe { File::from_raw_fd(kvm.as_raw_fd()) };
let f = std::mem::ManuallyDrop::new(unsafe { File::from_raw_fd(kvm.as_raw_fd()) });
let m1 = f.metadata().unwrap();
let m2 = File::open("/dev/kvm").unwrap().metadata().unwrap();

View File

@@ -758,13 +758,11 @@ impl Drop for Vcpu {
#[cfg(test)]
pub mod tests {
use std::os::unix::io::AsRawFd;
use std::sync::mpsc::{channel, Receiver};
use std::sync::Mutex;
use arc_swap::ArcSwap;
use dbs_device::device_manager::IoManager;
use kvm_ioctls::Kvm;
use lazy_static::lazy_static;
use test_utils::skip_if_not_root;
@@ -812,12 +810,8 @@ pub mod tests {
#[cfg(target_arch = "x86_64")]
fn create_vcpu() -> (Vcpu, Receiver<VcpuStateEvent>) {
// Call for kvm too frequently would cause error in some host kernel.
std::thread::sleep(std::time::Duration::from_millis(5));
let kvm = Kvm::new().unwrap();
let vm = Arc::new(kvm.create_vm().unwrap());
let kvm_context = KvmContext::new(Some(kvm.as_raw_fd())).unwrap();
let kvm_context = KvmContext::new(None).unwrap();
let vm = kvm_context.kvm().create_vm().unwrap();
let vcpu_fd = Arc::new(vm.create_vcpu(0).unwrap());
let io_manager = IoManagerCached::new(Arc::new(ArcSwap::new(Arc::new(IoManager::new()))));
let supported_cpuid = kvm_context

View File

@@ -1041,3 +1041,376 @@ impl MutEventSubscriber for VcpuEpollHandler {
ops.add(Events::new(&self.eventfd, EventSet::IN)).unwrap();
}
}
#[cfg(test)]
mod tests {
use std::os::unix::io::AsRawFd;
use std::sync::{Arc, RwLock};
use dbs_utils::epoll_manager::EpollManager;
#[cfg(feature = "hotplug")]
use dbs_virtio_devices::vsock::backend::VsockInnerBackend;
use seccompiler::BpfProgram;
use test_utils::skip_if_not_root;
use vmm_sys_util::eventfd::EventFd;
use super::*;
use crate::api::v1::InstanceInfo;
use crate::vcpu::vcpu_impl::tests::{EmulationCase, EMULATE_RES};
use crate::vm::{CpuTopology, Vm, VmConfigInfo};
fn get_vm() -> Vm {
let instance_info = Arc::new(RwLock::new(InstanceInfo::default()));
let epoll_manager = EpollManager::default();
let mut vm = Vm::new(None, instance_info, epoll_manager).unwrap();
let vm_config = VmConfigInfo {
vcpu_count: 1,
max_vcpu_count: 3,
cpu_pm: "off".to_string(),
mem_type: "shmem".to_string(),
mem_file_path: "".to_string(),
mem_size_mib: 100,
serial_path: None,
cpu_topology: CpuTopology {
threads_per_core: 1,
cores_per_die: 3,
dies_per_socket: 1,
sockets: 1,
},
vpmu_feature: 0,
};
vm.set_vm_config(vm_config);
vm.init_guest_memory().unwrap();
vm.init_vcpu_manager(vm.vm_as().unwrap().clone(), BpfProgram::default())
.unwrap();
vm.vcpu_manager()
.unwrap()
.set_reset_event_fd(EventFd::new(libc::EFD_NONBLOCK).unwrap())
.unwrap();
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
vm.setup_interrupt_controller().unwrap();
}
vm
}
fn get_present_unstart_vcpus(vcpu_manager: &std::sync::MutexGuard<'_, VcpuManager>) -> u8 {
vcpu_manager
.vcpu_infos
.iter()
.fold(0, |sum, info| sum + info.vcpu.is_some() as u8)
}
#[test]
fn test_vcpu_manager_config() {
skip_if_not_root!();
let instance_info = Arc::new(RwLock::new(InstanceInfo::default()));
let epoll_manager = EpollManager::default();
let mut vm = Vm::new(None, instance_info, epoll_manager).unwrap();
let vm_config = VmConfigInfo {
vcpu_count: 1,
max_vcpu_count: 2,
cpu_pm: "off".to_string(),
mem_type: "shmem".to_string(),
mem_file_path: "".to_string(),
mem_size_mib: 1,
serial_path: None,
cpu_topology: CpuTopology {
threads_per_core: 1,
cores_per_die: 2,
dies_per_socket: 1,
sockets: 1,
},
vpmu_feature: 0,
};
vm.set_vm_config(vm_config.clone());
vm.init_guest_memory().unwrap();
vm.init_vcpu_manager(vm.vm_as().unwrap().clone(), BpfProgram::default())
.unwrap();
let mut vcpu_manager = vm.vcpu_manager().unwrap();
// test the vcpu_config
assert_eq!(
vcpu_manager.vcpu_infos.len(),
vm_config.max_vcpu_count as usize
);
assert_eq!(
vcpu_manager.vcpu_config.boot_vcpu_count,
vm_config.vcpu_count
);
assert_eq!(
vcpu_manager.vcpu_config.max_vcpu_count,
vm_config.max_vcpu_count
);
let reset_event_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap();
let reset_event_fd_raw = reset_event_fd.as_raw_fd();
vcpu_manager.set_reset_event_fd(reset_event_fd).unwrap();
// test the reset_event_fd
assert_eq!(
vcpu_manager.reset_event_fd.as_ref().unwrap().as_raw_fd(),
reset_event_fd_raw
);
}
#[test]
fn test_vcpu_manager_boot_vcpus() {
skip_if_not_root!();
let vm = get_vm();
let mut vcpu_manager = vm.vcpu_manager().unwrap();
// test create boot vcpu
assert!(vcpu_manager
.create_boot_vcpus(TimestampUs::default(), GuestAddress(0))
.is_ok());
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 1);
// test start boot vcpus
assert!(vcpu_manager.start_boot_vcpus(BpfProgram::default()).is_ok());
}
#[test]
fn test_vcpu_manager_operate_vcpus() {
skip_if_not_root!();
let vm = get_vm();
let mut vcpu_manager = vm.vcpu_manager().unwrap();
// test create vcpu more than max
let res = vcpu_manager.create_vcpus(20, None, None);
assert!(matches!(res, Err(VcpuManagerError::ExpectedVcpuExceedMax)));
// test create vcpus
assert!(vcpu_manager.create_vcpus(2, None, None).is_ok());
assert_eq!(vcpu_manager.present_vcpus_count(), 0);
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 2);
assert_eq!(vcpu_manager.vcpus().len(), 2);
assert_eq!(vcpu_manager.vcpus_mut().len(), 2);
// test start vcpus
assert!(vcpu_manager
.start_vcpus(1, BpfProgram::default(), false)
.is_ok());
assert_eq!(vcpu_manager.present_vcpus_count(), 1);
assert_eq!(vcpu_manager.present_vcpus(), vec![0]);
assert!(vcpu_manager
.start_vcpus(2, BpfProgram::default(), false)
.is_ok());
assert_eq!(vcpu_manager.present_vcpus_count(), 2);
assert_eq!(vcpu_manager.present_vcpus(), vec![0, 1]);
// test start vcpus more than created
let res = vcpu_manager.start_vcpus(3, BpfProgram::default(), false);
assert!(matches!(res, Err(VcpuManagerError::VcpuNotCreate)));
// test start vcpus less than started
assert!(vcpu_manager
.start_vcpus(1, BpfProgram::default(), false)
.is_ok());
}
#[test]
fn test_vcpu_manager_pause_resume_vcpus() {
skip_if_not_root!();
*(EMULATE_RES.lock().unwrap()) = EmulationCase::Error(libc::EINTR);
let vm = get_vm();
let mut vcpu_manager = vm.vcpu_manager().unwrap();
assert!(vcpu_manager
.create_boot_vcpus(TimestampUs::default(), GuestAddress(0))
.is_ok());
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 1);
assert!(vcpu_manager.start_boot_vcpus(BpfProgram::default()).is_ok());
// invalid cpuid for pause
let cpu_indexes = vec![2];
let res = vcpu_manager.pause_vcpus(&cpu_indexes);
assert!(matches!(res, Err(VcpuManagerError::VcpuNotFound(_))));
// pause success
let cpu_indexes = vec![0];
assert!(vcpu_manager.pause_vcpus(&cpu_indexes).is_ok());
// invalid cpuid for resume
let cpu_indexes = vec![2];
let res = vcpu_manager.resume_vcpus(&cpu_indexes);
assert!(matches!(res, Err(VcpuManagerError::VcpuNotFound(_))));
// success resume
let cpu_indexes = vec![0];
assert!(vcpu_manager.resume_vcpus(&cpu_indexes).is_ok());
// pause and resume all
assert!(vcpu_manager.pause_all_vcpus().is_ok());
assert!(vcpu_manager.resume_all_vcpus().is_ok());
}
#[test]
fn test_vcpu_manager_exit_vcpus() {
skip_if_not_root!();
*(EMULATE_RES.lock().unwrap()) = EmulationCase::Error(libc::EINTR);
let vm = get_vm();
let mut vcpu_manager = vm.vcpu_manager().unwrap();
assert!(vcpu_manager
.create_boot_vcpus(TimestampUs::default(), GuestAddress(0))
.is_ok());
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 1);
assert!(vcpu_manager.start_boot_vcpus(BpfProgram::default()).is_ok());
// invalid cpuid for exit
let cpu_indexes = vec![2];
let res = vcpu_manager.exit_vcpus(&cpu_indexes);
assert!(matches!(res, Err(VcpuManagerError::VcpuNotFound(_))));
// exit success
let cpu_indexes = vec![0];
assert!(vcpu_manager.exit_vcpus(&cpu_indexes).is_ok());
}
#[test]
fn test_vcpu_manager_exit_all_vcpus() {
skip_if_not_root!();
*(EMULATE_RES.lock().unwrap()) = EmulationCase::Error(libc::EINTR);
let vm = get_vm();
let mut vcpu_manager = vm.vcpu_manager().unwrap();
assert!(vcpu_manager
.create_boot_vcpus(TimestampUs::default(), GuestAddress(0))
.is_ok());
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 1);
assert!(vcpu_manager.start_boot_vcpus(BpfProgram::default()).is_ok());
// exit all success
assert!(vcpu_manager.exit_all_vcpus().is_ok());
assert_eq!(vcpu_manager.vcpu_infos.len(), 0);
assert!(vcpu_manager.io_manager.is_none());
}
#[test]
fn test_vcpu_manager_revalidate_vcpus_cache() {
skip_if_not_root!();
*(EMULATE_RES.lock().unwrap()) = EmulationCase::Error(libc::EINTR);
let vm = get_vm();
let mut vcpu_manager = vm.vcpu_manager().unwrap();
assert!(vcpu_manager
.create_boot_vcpus(TimestampUs::default(), GuestAddress(0))
.is_ok());
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 1);
assert!(vcpu_manager.start_boot_vcpus(BpfProgram::default()).is_ok());
// invalid cpuid for exit
let cpu_indexes = vec![2];
let res = vcpu_manager.revalidate_vcpus_cache(&cpu_indexes);
assert!(matches!(res, Err(VcpuManagerError::VcpuNotFound(_))));
// revalidate success
let cpu_indexes = vec![0];
assert!(vcpu_manager.revalidate_vcpus_cache(&cpu_indexes).is_ok());
}
#[test]
fn test_vcpu_manager_revalidate_all_vcpus_cache() {
skip_if_not_root!();
*(EMULATE_RES.lock().unwrap()) = EmulationCase::Error(libc::EINTR);
let vm = get_vm();
let mut vcpu_manager = vm.vcpu_manager().unwrap();
assert!(vcpu_manager
.create_boot_vcpus(TimestampUs::default(), GuestAddress(0))
.is_ok());
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 1);
assert!(vcpu_manager.start_boot_vcpus(BpfProgram::default()).is_ok());
// revalidate all success
assert!(vcpu_manager.revalidate_all_vcpus_cache().is_ok());
}
#[test]
#[cfg(feature = "hotplug")]
fn test_vcpu_manager_resize_cpu() {
skip_if_not_root!();
let vm = get_vm();
let mut vcpu_manager = vm.vcpu_manager().unwrap();
assert!(vcpu_manager
.create_boot_vcpus(TimestampUs::default(), GuestAddress(0))
.is_ok());
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 1);
assert!(vcpu_manager.start_boot_vcpus(BpfProgram::default()).is_ok());
// set vcpus in hotplug action
let cpu_ids = vec![0];
vcpu_manager.set_vcpus_action(VcpuAction::Hotplug, cpu_ids);
// vcpu is already in hotplug process
let res = vcpu_manager.resize_vcpu(1, None);
assert!(matches!(
res,
Err(VcpuManagerError::VcpuResize(
VcpuResizeError::VcpuIsHotplugging
))
));
// clear vcpus action
let cpu_ids = vec![0];
vcpu_manager.set_vcpus_action(VcpuAction::None, cpu_ids);
// no upcall channel
let res = vcpu_manager.resize_vcpu(1, None);
assert!(matches!(
res,
Err(VcpuManagerError::VcpuResize(
VcpuResizeError::UpdateNotAllowedPostBoot
))
));
// init upcall channel
let dev_mgr_service = DevMgrService {};
let vsock_backend = VsockInnerBackend::new().unwrap();
let connector = vsock_backend.get_connector();
let epoll_manager = EpollManager::default();
let mut upcall_client =
UpcallClient::new(connector, epoll_manager, dev_mgr_service).unwrap();
assert!(upcall_client.connect().is_ok());
vcpu_manager.set_upcall_channel(Some(Arc::new(upcall_client)));
// success: no need to resize
vcpu_manager.resize_vcpu(1, None).unwrap();
// exceeed max vcpu count
let res = vcpu_manager.resize_vcpu(4, None);
assert!(matches!(
res,
Err(VcpuManagerError::VcpuResize(
VcpuResizeError::ExpectedVcpuExceedMax
))
));
// remove vcpu 0
let res = vcpu_manager.resize_vcpu(0, None);
assert!(matches!(
res,
Err(VcpuManagerError::VcpuResize(
VcpuResizeError::Vcpu0CanNotBeRemoved
))
));
}
}

View File

@@ -492,6 +492,13 @@ impl Vm {
.map_err(StopMicrovmError::DeviceManager)
}
/// Remove upcall client when the VM is destoryed.
#[cfg(feature = "dbs-upcall")]
pub fn remove_upcall(&mut self) -> std::result::Result<(), StopMicrovmError> {
self.upcall_client = None;
Ok(())
}
/// Reset the console into canonical mode.
pub fn reset_console(&self) -> std::result::Result<(), DeviceMgrError> {
self.device_manager.reset_console()

View File

@@ -162,6 +162,11 @@ impl Vmm {
warn!("failed to remove devices: {:?}", e);
}
#[cfg(feature = "dbs-upcall")]
if let Err(e) = vm.remove_upcall() {
warn!("failed to remove upcall: {:?}", e);
}
if let Err(e) = vm.reset_console() {
warn!("Cannot set canonical mode for the terminal. {:?}", e);
}
@@ -174,6 +179,8 @@ impl Vmm {
if let Err(e) = mgr.exit_all_vcpus() {
warn!("Failed to exit vcpu thread. {:?}", e);
}
#[cfg(feature = "dbs-upcall")]
mgr.set_upcall_channel(None);
}
Err(e) => warn!("Failed to get vcpu manager {:?}", e),
}

View File

@@ -13,6 +13,7 @@ use std::time::Duration;
use subprocess::{ExitStatus, Popen, PopenConfig, PopenError, Redirection};
use crate::validate::valid_env;
use crate::{eother, sl};
const DEFAULT_HOOK_TIMEOUT_SEC: i32 = 10;
@@ -206,9 +207,8 @@ impl<'a> HookExecutor<'a> {
let mut envs: Vec<(OsString, OsString)> = Vec::new();
for e in hook.env.iter() {
match e.split_once('=') {
Some((key, value)) => envs.push((OsString::from(key), OsString::from(value))),
None => warn!(sl!(), "env {} of hook {:?} is invalid", e, hook),
if let Some((key, value)) = valid_env(e) {
envs.push((OsString::from(key), OsString::from(value)));
}
}

View File

@@ -60,7 +60,7 @@ use crate::sl;
/// Default permission for directories created for mountpoint.
const MOUNT_PERM: u32 = 0o755;
const PROC_MOUNTS_FILE: &str = "/proc/mounts";
pub const PROC_MOUNTS_FILE: &str = "/proc/mounts";
const PROC_FIELDS_PER_LINE: usize = 6;
const PROC_DEVICE_INDEX: usize = 0;
const PROC_PATH_INDEX: usize = 1;

View File

@@ -27,6 +27,27 @@ pub fn verify_id(id: &str) -> Result<(), Error> {
}
}
// check and reserve valid environment variables
// invalid env var may cause panic, refer to https://doc.rust-lang.org/std/env/fn.set_var.html#panics
// key should not:
// * contain NUL character '\0'
// * contain ASCII equal sign '='
// * be empty
// value should not:
// * contain NUL character '\0'
pub fn valid_env(e: &str) -> Option<(&str, &str)> {
// split the env str by '=' at the first time to ensure there is no '=' in key,
// and also to ensure there is at least '=' in env str
if let Some((key, value)) = e.split_once('=') {
if !key.is_empty() && !key.as_bytes().contains(&b'\0') && !value.as_bytes().contains(&b'\0')
{
return Some((key.trim(), value.trim()));
}
}
None
}
#[cfg(test)]
mod tests {
use super::*;
@@ -257,4 +278,49 @@ mod tests {
}
}
}
#[test]
fn test_valid_env() {
let env = valid_env("a=b=c");
assert_eq!(Some(("a", "b=c")), env);
let env = valid_env("a=b");
assert_eq!(Some(("a", "b")), env);
let env = valid_env("a =b");
assert_eq!(Some(("a", "b")), env);
let env = valid_env(" a =b");
assert_eq!(Some(("a", "b")), env);
let env = valid_env("a= b");
assert_eq!(Some(("a", "b")), env);
let env = valid_env("a=b ");
assert_eq!(Some(("a", "b")), env);
let env = valid_env("a=b c ");
assert_eq!(Some(("a", "b c")), env);
let env = valid_env("=b");
assert_eq!(None, env);
let env = valid_env("a=");
assert_eq!(Some(("a", "")), env);
let env = valid_env("a==");
assert_eq!(Some(("a", "=")), env);
let env = valid_env("a");
assert_eq!(None, env);
let invalid_str = vec![97, b'\0', 98];
let invalid_string = std::str::from_utf8(&invalid_str).unwrap();
let invalid_env = format!("{}=value", invalid_string);
let env = valid_env(&invalid_env);
assert_eq!(None, env);
let invalid_env = format!("key={}", invalid_string);
let env = valid_env(&invalid_env);
assert_eq!(None, env);
}
}

View File

@@ -5,7 +5,7 @@
//
use anyhow::{anyhow, Context, Result};
use std::path::PathBuf;
use std::{collections::HashMap, path::PathBuf};
/// Prefix to mark a volume as Kata special.
pub const KATA_VOLUME_TYPE_PREFIX: &str = "kata:";
@@ -19,6 +19,12 @@ pub const KATA_EPHEMERAL_VOLUME_TYPE: &str = "ephemeral";
/// KATA_HOST_DIR_TYPE use for host empty dir
pub const KATA_HOST_DIR_VOLUME_TYPE: &str = "kata:hostdir";
/// KATA_MOUNT_INFO_FILE_NAME is used for the file that holds direct-volume mount info
pub const KATA_MOUNT_INFO_FILE_NAME: &str = "mountInfo.json";
/// KATA_DIRECT_VOLUME_ROOT_PATH is the root path used for concatenating with the direct-volume mount info file path
pub const KATA_DIRECT_VOLUME_ROOT_PATH: &str = "/run/kata-containers/shared/direct-volumes";
/// Information about a mount.
#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
pub struct Mount {
@@ -49,6 +55,22 @@ impl Mount {
}
}
/// DirectVolumeMountInfo contains the information needed by Kata
/// to consume a host block device and mount it as a filesystem inside the guest VM.
#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
pub struct DirectVolumeMountInfo {
/// The type of the volume (ie. block)
pub volume_type: String,
/// The device backing the volume.
pub device: String,
/// The filesystem type to be mounted on the volume.
pub fs_type: String,
/// Additional metadata to pass to the agent regarding this volume.
pub metadata: HashMap<String, String>,
/// Additional mount options.
pub options: Vec<String>,
}
/// Check whether a mount type is a marker for Kata specific volume.
pub fn is_kata_special_volume(ty: &str) -> bool {
ty.len() > KATA_VOLUME_TYPE_PREFIX.len() && ty.starts_with(KATA_VOLUME_TYPE_PREFIX)

View File

@@ -35,7 +35,7 @@ impl MgmtClient {
let unix_socket_path = mgmt_socket_addr(sid).context("Failed to get unix socket path")?;
let s_addr = unix_socket_path
.strip_prefix("unix:")
.context("failed to strix prefix")?;
.context("failed to strip prefix")?;
let sock_path = Path::new("/").join(s_addr).as_path().to_owned();
let client = Client::unix();
Ok(Self {
@@ -49,32 +49,52 @@ impl MgmtClient {
/// Parameter uri should be like "/agent-url" etc.
pub async fn get(&self, uri: &str) -> Result<Response<Body>> {
let url: hyper::Uri = Uri::new(&self.sock_path, uri).into();
let work = self.client.get(url);
match self.timeout {
Some(timeout) => match tokio::time::timeout(timeout, work).await {
Ok(result) => result.map_err(|e| anyhow!(e)),
Err(_) => Err(anyhow!("TIMEOUT")),
},
// if timeout not set, work executes directly
None => work.await.context("failed to GET"),
}
let req = Request::builder()
.method(Method::GET)
.uri(url)
.body(Body::empty())?;
return self.send_request(req).await;
}
/// The HTTP Post method for client
pub async fn post(
&self,
uri: &str,
content_type: &str,
content: &str,
) -> Result<Response<Body>> {
let url: hyper::Uri = Uri::new(&self.sock_path, uri).into();
// build body from content
let body = Body::from(content.to_string());
let req = Request::builder()
.method(Method::POST)
.uri(url)
.header("content-type", content_type)
.body(body)?;
return self.send_request(req).await;
}
/// The http PUT method for client
pub async fn put(&self, uri: &str, data: Vec<u8>) -> Result<Response<Body>> {
let url: hyper::Uri = Uri::new(&self.sock_path, uri).into();
let request = Request::builder()
let req = Request::builder()
.method(Method::PUT)
.uri(url)
.body(Body::from(data))
.unwrap();
let work = self.client.request(request);
.body(Body::from(data))?;
return self.send_request(req).await;
}
async fn send_request(&self, req: Request<Body>) -> Result<Response<Body>> {
let msg = format!("Request ({:?}) to uri {:?}", req.method(), req.uri());
let resp = self.client.request(req);
match self.timeout {
Some(timeout) => match tokio::time::timeout(timeout, work).await {
Some(timeout) => match tokio::time::timeout(timeout, resp).await {
Ok(result) => result.map_err(|e| anyhow!(e)),
Err(_) => Err(anyhow!("TIMEOUT")),
Err(_) => Err(anyhow!("{:?} timeout after {:?}", msg, self.timeout)),
},
None => work.await.context("failed to PUT"),
// if client timeout is not set, request waits with no deadline
None => resp.await.context(format!("{:?} failed", msg)),
}
}
}

View File

@@ -329,6 +329,16 @@ version = "3.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "415301c9de11005d4b92193c0eb7ac7adc37e5a49e0ac9bed0a42343512744b8"
[[package]]
name = "byte-unit"
version = "4.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "581ad4b3d627b0c09a0ccb2912148f839acaca0b93cf54cbe42b6c674e86079c"
dependencies = [
"serde",
"utf8-width",
]
[[package]]
name = "byteorder"
version = "1.4.3"
@@ -651,6 +661,20 @@ dependencies = [
"mio",
]
[[package]]
name = "dbs-upcall"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2fa8b67657cd71779eaceea1b5fa989b62a1be629a07be8498417772e5a8d35"
dependencies = [
"anyhow",
"dbs-utils",
"dbs-virtio-devices",
"log",
"thiserror",
"timerfd",
]
[[package]]
name = "dbs-utils"
version = "0.2.1"
@@ -743,6 +767,7 @@ dependencies = [
"dbs-device",
"dbs-interrupt",
"dbs-legacy-devices",
"dbs-upcall",
"dbs-utils",
"dbs-virtio-devices",
"kvm-bindings",
@@ -1362,7 +1387,7 @@ dependencies = [
"anyhow",
"base64",
"bitmask-enum",
"byte-unit",
"byte-unit 3.1.4",
"glob",
"lazy_static",
"num_cpus",
@@ -2281,6 +2306,7 @@ dependencies = [
"anyhow",
"async-trait",
"bitflags",
"byte-unit 4.0.17",
"cgroups-rs",
"futures 0.3.21",
"hypervisor",
@@ -2301,6 +2327,7 @@ dependencies = [
"serde_json",
"slog",
"slog-scope",
"tempfile",
"test-utils",
"tokio",
"uuid",
@@ -3023,6 +3050,12 @@ dependencies = [
"percent-encoding",
]
[[package]]
name = "utf8-width"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5190c9442dcdaf0ddd50f37420417d219ae5261bbf5db120d0f9bab996c9cba1"
[[package]]
name = "uuid"
version = "0.4.0"

View File

@@ -136,6 +136,14 @@ block_device_driver = "@DEFBLOCKSTORAGEDRIVER_DB@"
# of shim, does not need an external virtiofsd process.
shared_fs = "@DBSHAREDFS@"
# Enable huge pages for VM RAM, default false
# Enabling this will result in the VM memory
# being allocated using huge pages.
# This is useful when you want to use vhost-user network
# stacks within the container. This will automatically
# result in memory pre allocation
#enable_hugepages = true
[agent.@PROJECT_TYPE@]
container_pipe_size=@PIPESIZE@
# If enabled, make the agent display debug-level messages.

View File

@@ -20,8 +20,8 @@ pub use types::{
GetIPTablesResponse, GuestDetailsResponse, HealthCheckResponse, IPAddress, IPFamily, Interface,
Interfaces, ListProcessesRequest, MemHotplugByProbeRequest, OnlineCPUMemRequest,
OomEventResponse, ReadStreamRequest, ReadStreamResponse, RemoveContainerRequest,
ReseedRandomDevRequest, Route, Routes, SetGuestDateTimeRequest, SetIPTablesRequest,
SetIPTablesResponse, SignalProcessRequest, StatsContainerResponse, Storage,
ReseedRandomDevRequest, ResizeVolumeRequest, Route, Routes, SetGuestDateTimeRequest,
SetIPTablesRequest, SetIPTablesResponse, SignalProcessRequest, StatsContainerResponse, Storage,
TtyWinResizeRequest, UpdateContainerRequest, UpdateInterfaceRequest, UpdateRoutesRequest,
VersionCheckResponse, WaitProcessRequest, WaitProcessResponse, WriteStreamRequest,
WriteStreamResponse,

View File

@@ -7,7 +7,7 @@
use anyhow::{anyhow, Result};
use std::convert::TryFrom;
use serde::Deserialize;
use serde::{Deserialize, Serialize};
#[derive(PartialEq, Clone, Default)]
pub struct Empty {}
@@ -561,6 +561,14 @@ pub struct OomEventResponse {
pub container_id: String,
}
// ResizeVolumeRequest is also the common struct for serialization and deserialization with json
// between shim-client HTTP calls to the shim-mgmt-server
#[derive(Serialize, Deserialize, PartialEq, Clone, Default, Debug)]
pub struct ResizeVolumeRequest {
pub volume_guest_path: String,
pub size: u64,
}
#[cfg(test)]
mod test {
use std::convert::TryFrom;

View File

@@ -30,6 +30,6 @@ kata-types = { path = "../../../libs/kata-types" }
logging = { path = "../../../libs/logging" }
shim-interface = { path = "../../../libs/shim-interface" }
dragonball = { path = "../../../dragonball", features = ["atomic-guest-memory", "virtio-vsock", "hotplug", "virtio-blk", "virtio-net", "virtio-fs"] }
dragonball = { path = "../../../dragonball", features = ["atomic-guest-memory", "virtio-vsock", "hotplug", "virtio-blk", "virtio-net", "virtio-fs","dbs-upcall"] }
[features]

View File

@@ -7,7 +7,7 @@
use super::vmm_instance::VmmInstance;
use crate::{
device::Device, hypervisor_persist::HypervisorState, kernel_param::KernelParams, VmmState,
HYPERVISOR_DRAGONBALL, VM_ROOTFS_DRIVER_BLK,
DEV_HUGEPAGES, HUGETLBFS, HYPERVISOR_DRAGONBALL, SHMEM, VM_ROOTFS_DRIVER_BLK,
};
use anyhow::{anyhow, Context, Result};
use async_trait::async_trait;
@@ -187,11 +187,18 @@ impl DragonballInner {
fn set_vm_base_config(&mut self) -> Result<()> {
let serial_path = [&self.run_dir, "console.sock"].join("/");
let (mem_type, mem_file_path) = if self.config.memory_info.enable_hugepages {
(String::from(HUGETLBFS), String::from(DEV_HUGEPAGES))
} else {
(String::from(SHMEM), String::from(""))
};
let vm_config = VmConfigInfo {
serial_path: Some(serial_path),
mem_size_mib: self.config.memory_info.default_memory as usize,
vcpu_count: self.config.cpu_info.default_vcpus as u8,
max_vcpu_count: self.config.cpu_info.default_maxvcpus as u8,
mem_type,
mem_file_path,
..Default::default()
};
info!(sl!(), "vm config: {:?}", vm_config);

View File

@@ -327,9 +327,9 @@ impl VmmInstance {
}
}
}
return Err(anyhow::anyhow!(
Err(anyhow::anyhow!(
"After {} attempts, it still doesn't work.",
REQUEST_RETRY
));
))
}
}

View File

@@ -27,6 +27,13 @@ use kata_types::config::hypervisor::Hypervisor as HypervisorConfig;
// Config which driver to use as vm root dev
const VM_ROOTFS_DRIVER_BLK: &str = "virtio-blk";
const VM_ROOTFS_DRIVER_PMEM: &str = "virtio-pmem";
// before using hugepages for VM, we need to mount hugetlbfs
// /dev/hugepages will be the mount point
// mkdir -p /dev/hugepages
// mount -t hugetlbfs none /dev/hugepages
const DEV_HUGEPAGES: &str = "/dev/hugepages";
pub const HUGETLBFS: &str = "hugetlbfs";
const SHMEM: &str = "shmem";
pub const HYPERVISOR_DRAGONBALL: &str = "dragonball";
pub const HYPERVISOR_QEMU: &str = "qemu";

View File

@@ -26,7 +26,7 @@ pub fn to_disk<T: serde::Serialize>(value: &T, sid: &str) -> Result<()> {
serde_json::to_writer_pretty(f, &j)?;
return Ok(());
}
return Err(anyhow!("invalid sid {}", sid));
Err(anyhow!("invalid sid {}", sid))
}
pub fn from_disk<T>(sid: &str) -> Result<T>
@@ -41,7 +41,7 @@ where
let reader = BufReader::new(file);
return serde_json::from_reader(reader).map_err(|e| anyhow!(e.to_string()));
}
return Err(anyhow!("invalid sid {}", sid));
Err(anyhow!("invalid sid {}", sid))
}
#[cfg(test)]

View File

@@ -7,11 +7,13 @@ license = "Apache-2.0"
[dev-dependencies]
test-utils = { path = "../../../libs/test-utils" }
tempfile = "3.2.0"
[dependencies]
anyhow = "^1.0"
async-trait = "0.1.48"
bitflags = "1.2.1"
byte-unit = "4.0.14"
cgroups-rs = "0.2.9"
futures = "0.3.11"
lazy_static = "1.4.0"

View File

@@ -81,10 +81,10 @@ impl ResourceManager {
pub async fn handler_volumes(
&self,
cid: &str,
oci_mounts: &[oci::Mount],
spec: &oci::Spec,
) -> Result<Vec<Arc<dyn Volume>>> {
let inner = self.inner.read().await;
inner.handler_volumes(cid, oci_mounts).await
inner.handler_volumes(cid, spec).await
}
pub async fn dump(&self) {

View File

@@ -216,10 +216,10 @@ impl ResourceManagerInner {
pub async fn handler_volumes(
&self,
cid: &str,
oci_mounts: &[oci::Mount],
spec: &oci::Spec,
) -> Result<Vec<Arc<dyn Volume>>> {
self.volume_resource
.handler_volumes(&self.share_fs, cid, oci_mounts)
.handler_volumes(&self.share_fs, cid, spec)
.await
}

View File

@@ -27,6 +27,7 @@ pub trait Rootfs: Send + Sync {
async fn get_guest_rootfs_path(&self) -> Result<String>;
async fn get_rootfs_mount(&self) -> Result<Vec<oci::Mount>>;
async fn get_storage(&self) -> Option<Storage>;
async fn cleanup(&self) -> Result<()>;
}
#[derive(Default)]
@@ -66,11 +67,10 @@ impl RootFsResource {
// if rootfs_mounts is empty
mounts_vec if mounts_vec.is_empty() => {
if let Some(share_fs) = share_fs {
let share_fs_mount = share_fs.get_share_fs_mount();
// share fs rootfs
Ok(Arc::new(
share_fs_rootfs::ShareFsRootfs::new(
&share_fs_mount,
share_fs,
cid,
root.path.as_str(),
None,
@@ -86,25 +86,18 @@ impl RootFsResource {
// Safe as single_layer_rootfs must have one layer
let layer = &mounts_vec[0];
let rootfs: Arc<dyn Rootfs> = if let Some(share_fs) = share_fs {
let share_fs_mount = share_fs.get_share_fs_mount();
// nydus rootfs
if layer.fs_type == NYDUS_ROOTFS_TYPE {
Arc::new(
nydus_rootfs::NydusRootfs::new(
&share_fs_mount,
hypervisor,
sid,
cid,
layer,
)
.await
.context("new nydus rootfs")?,
nydus_rootfs::NydusRootfs::new(share_fs, hypervisor, sid, cid, layer)
.await
.context("new nydus rootfs")?,
)
} else {
// share fs rootfs
Arc::new(
share_fs_rootfs::ShareFsRootfs::new(
&share_fs_mount,
share_fs,
cid,
bundle_path,
Some(layer),

View File

@@ -9,8 +9,8 @@ use super::{Rootfs, TYPE_OVERLAY_FS};
use crate::{
rootfs::{HYBRID_ROOTFS_LOWER_DIR, ROOTFS},
share_fs::{
do_get_guest_path, do_get_guest_share_path, get_host_rw_shared_path, rafs_mount,
ShareFsMount, ShareFsRootfsConfig, PASSTHROUGH_FS_DIR,
do_get_guest_path, do_get_guest_share_path, get_host_rw_shared_path, rafs_mount, ShareFs,
ShareFsRootfsConfig, PASSTHROUGH_FS_DIR,
},
};
use agent::Storage;
@@ -36,12 +36,13 @@ pub(crate) struct NydusRootfs {
impl NydusRootfs {
pub async fn new(
share_fs_mount: &Arc<dyn ShareFsMount>,
share_fs: &Arc<dyn ShareFs>,
h: &dyn Hypervisor,
sid: &str,
cid: &str,
rootfs: &Mount,
) -> Result<Self> {
let share_fs_mount = share_fs.get_share_fs_mount();
let extra_options =
NydusExtraOptions::new(rootfs).context("failed to parse nydus extra options")?;
info!(sl!(), "extra_option {:?}", &extra_options);
@@ -72,7 +73,7 @@ impl NydusRootfs {
let rootfs_guest_path = do_get_guest_path(ROOTFS, cid, false, false);
// bind mount the snapshot dir under the share directory
share_fs_mount
.share_rootfs(ShareFsRootfsConfig {
.share_rootfs(&ShareFsRootfsConfig {
cid: cid.to_string(),
source: extra_options.snapshot_dir.clone(),
target: SNAPSHOT_DIR.to_string(),
@@ -143,4 +144,10 @@ impl Rootfs for NydusRootfs {
async fn get_storage(&self) -> Option<Storage> {
Some(self.rootfs.clone())
}
async fn cleanup(&self) -> Result<()> {
// TODO: Clean up NydusRootfs after the container is killed
warn!(sl!(), "Cleaning up NydusRootfs is still unimplemented.");
Ok(())
}
}

View File

@@ -7,20 +7,22 @@
use agent::Storage;
use anyhow::{Context, Result};
use async_trait::async_trait;
use kata_sys_util::mount::Mounter;
use kata_sys_util::mount::{umount_timeout, Mounter};
use kata_types::mount::Mount;
use std::sync::Arc;
use super::{Rootfs, ROOTFS};
use crate::share_fs::{ShareFsMount, ShareFsRootfsConfig};
use crate::share_fs::{ShareFs, ShareFsRootfsConfig};
pub(crate) struct ShareFsRootfs {
guest_path: String,
share_fs: Arc<dyn ShareFs>,
config: ShareFsRootfsConfig,
}
impl ShareFsRootfs {
pub async fn new(
share_fs_mount: &Arc<dyn ShareFsMount>,
share_fs: &Arc<dyn ShareFs>,
cid: &str,
bundle_path: &str,
rootfs: Option<&Mount>,
@@ -35,19 +37,25 @@ impl ShareFsRootfs {
} else {
bundle_path.to_string()
};
let share_fs_mount = share_fs.get_share_fs_mount();
let config = ShareFsRootfsConfig {
cid: cid.to_string(),
source: bundle_rootfs.to_string(),
target: ROOTFS.to_string(),
readonly: false,
is_rafs: false,
};
let mount_result = share_fs_mount
.share_rootfs(ShareFsRootfsConfig {
cid: cid.to_string(),
source: bundle_rootfs.to_string(),
target: ROOTFS.to_string(),
readonly: false,
is_rafs: false,
})
.share_rootfs(&config)
.await
.context("share rootfs")?;
Ok(ShareFsRootfs {
guest_path: mount_result.guest_path,
share_fs: Arc::clone(share_fs),
config,
})
}
}
@@ -65,4 +73,17 @@ impl Rootfs for ShareFsRootfs {
async fn get_storage(&self) -> Option<Storage> {
None
}
async fn cleanup(&self) -> Result<()> {
// Umount the mount point shared to guest
let share_fs_mount = self.share_fs.get_share_fs_mount();
share_fs_mount
.umount_rootfs(&self.config)
.await
.context("umount shared rootfs")?;
// Umount the bundle rootfs
umount_timeout(&self.config.source, 0).context("umount bundle rootfs")?;
Ok(())
}
}

View File

@@ -15,6 +15,7 @@ use tokio::sync::Mutex;
pub use utils::{do_get_guest_path, do_get_guest_share_path, get_host_rw_shared_path};
mod virtio_fs_share_mount;
use virtio_fs_share_mount::VirtiofsShareMount;
pub use virtio_fs_share_mount::EPHEMERAL_PATH;
use std::{collections::HashMap, fmt::Debug, path::PathBuf, sync::Arc};
@@ -47,7 +48,7 @@ pub trait ShareFs: Send + Sync {
fn mounted_info_set(&self) -> Arc<Mutex<HashMap<String, MountedInfo>>>;
}
#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct ShareFsRootfsConfig {
// TODO: for nydus v5/v6 need to update ShareFsMount
pub cid: String,
@@ -120,14 +121,16 @@ impl MountedInfo {
#[async_trait]
pub trait ShareFsMount: Send + Sync {
async fn share_rootfs(&self, config: ShareFsRootfsConfig) -> Result<ShareFsMountResult>;
async fn share_volume(&self, config: ShareFsVolumeConfig) -> Result<ShareFsMountResult>;
async fn share_rootfs(&self, config: &ShareFsRootfsConfig) -> Result<ShareFsMountResult>;
async fn share_volume(&self, config: &ShareFsVolumeConfig) -> Result<ShareFsMountResult>;
/// Upgrade to readwrite permission
async fn upgrade_to_rw(&self, file_name: &str) -> Result<()>;
/// Downgrade to readonly permission
async fn downgrade_to_ro(&self, file_name: &str) -> Result<()>;
/// Umount the volume
async fn umount(&self, file_name: &str) -> Result<()>;
async fn umount_volume(&self, file_name: &str) -> Result<()>;
/// Umount the rootfs
async fn umount_rootfs(&self, config: &ShareFsRootfsConfig) -> Result<()>;
}
pub fn new(id: &str, config: &SharedFsInfo) -> Result<Arc<dyn ShareFs>> {

View File

@@ -17,7 +17,7 @@ use std::path::Path;
const WATCHABLE_PATH_NAME: &str = "watchable";
const WATCHABLE_BIND_DEV_TYPE: &str = "watchable-bind";
const EPHEMERAL_PATH: &str = "/run/kata-containers/sandbox/ephemeral";
pub const EPHEMERAL_PATH: &str = "/run/kata-containers/sandbox/ephemeral";
use super::{
utils::{self, do_get_host_path},
@@ -38,7 +38,7 @@ impl VirtiofsShareMount {
#[async_trait]
impl ShareFsMount for VirtiofsShareMount {
async fn share_rootfs(&self, config: ShareFsRootfsConfig) -> Result<ShareFsMountResult> {
async fn share_rootfs(&self, config: &ShareFsRootfsConfig) -> Result<ShareFsMountResult> {
// TODO: select virtiofs or support nydus
let guest_path = utils::share_to_guest(
&config.source,
@@ -56,7 +56,7 @@ impl ShareFsMount for VirtiofsShareMount {
})
}
async fn share_volume(&self, config: ShareFsVolumeConfig) -> Result<ShareFsMountResult> {
async fn share_volume(&self, config: &ShareFsVolumeConfig) -> Result<ShareFsMountResult> {
let mut guest_path = utils::share_to_guest(
&config.source,
&config.target,
@@ -103,7 +103,7 @@ impl ShareFsMount for VirtiofsShareMount {
source: guest_path,
fs_type: String::from("bind"),
fs_group: None,
options: config.mount_options,
options: config.mount_options.clone(),
mount_point: watchable_guest_mount.clone(),
};
@@ -194,10 +194,34 @@ impl ShareFsMount for VirtiofsShareMount {
Ok(())
}
async fn umount(&self, file_name: &str) -> Result<()> {
let host_dest = do_get_host_path(file_name, &self.id, "", true, true);
umount_timeout(host_dest, 0).context("Umount readwrite host dest")?;
async fn umount_volume(&self, file_name: &str) -> Result<()> {
let host_dest = do_get_host_path(file_name, &self.id, "", true, false);
umount_timeout(&host_dest, 0).context("umount volume")?;
// Umount event will be propagated to ro directory
// Remove the directory of mointpoint
if let Ok(md) = fs::metadata(&host_dest) {
if md.is_file() {
fs::remove_file(&host_dest).context("remove the volume mount point as a file")?;
}
if md.is_dir() {
fs::remove_dir(&host_dest).context("remove the volume mount point as a dir")?;
}
}
Ok(())
}
async fn umount_rootfs(&self, config: &ShareFsRootfsConfig) -> Result<()> {
let host_dest = do_get_host_path(&config.target, &self.id, &config.cid, false, false);
umount_timeout(&host_dest, 0).context("umount rootfs")?;
// Remove the directory of mointpoint
if let Ok(md) = fs::metadata(&host_dest) {
if md.is_dir() {
fs::remove_dir(&host_dest).context("remove the rootfs mount point as a dir")?;
}
}
Ok(())
}
}

View File

@@ -30,6 +30,7 @@ impl Volume for BlockVolume {
}
async fn cleanup(&self) -> Result<()> {
// TODO: Clean up BlockVolume
warn!(sl!(), "Cleaning up BlockVolume is still unimplemented.");
Ok(())
}

View File

@@ -34,6 +34,7 @@ impl Volume for DefaultVolume {
}
async fn cleanup(&self) -> Result<()> {
// TODO: Clean up DefaultVolume
warn!(sl!(), "Cleaning up DefaultVolume is still unimplemented.");
Ok(())
}

View File

@@ -0,0 +1,223 @@
// Copyright (c) 2019-2022 Alibaba Cloud
// Copyright (c) 2019-2022 Ant Group
//
// SPDX-License-Identifier: Apache-2.0
//
use std::{
collections::HashMap,
fs::File,
io::{BufRead, BufReader},
};
use crate::share_fs::EPHEMERAL_PATH;
use agent::Storage;
use anyhow::{anyhow, Context, Ok, Result};
use async_trait::async_trait;
use byte_unit::Byte;
use hypervisor::HUGETLBFS;
use kata_sys_util::{fs::get_base_name, mount::PROC_MOUNTS_FILE};
use kata_types::mount::KATA_EPHEMERAL_VOLUME_TYPE;
use super::{Volume, BIND};
type PageSize = Byte;
type Limit = u64;
const NODEV: &str = "nodev";
// container hugepage
pub(crate) struct Hugepage {
// storage info
storage: Option<Storage>,
// mount info
mount: oci::Mount,
}
// handle hugepage
impl Hugepage {
pub(crate) fn new(
mount: &oci::Mount,
hugepage_limits_map: HashMap<PageSize, Limit>,
fs_options: Vec<String>,
) -> Result<Self> {
// Create mount option string
let page_size = get_page_size(fs_options).context("failed to get page size")?;
let option = hugepage_limits_map
.get(&page_size)
.map(|limit| format!("pagesize={},size={}", page_size.get_bytes(), limit))
.context("failed to get hugepage option")?;
let base_name = get_base_name(mount.source.clone())?
.into_string()
.map_err(|e| anyhow!("failed to convert to string{:?}", e))?;
let mut mount = mount.clone();
// Set the mount source path to a path that resides inside the VM
mount.source = format!("{}{}{}", EPHEMERAL_PATH, "/", base_name);
// Set the mount type to "bind"
mount.r#type = BIND.to_string();
// Create a storage struct so that kata agent is able to create
// hugetlbfs backed volume inside the VM
let storage = Storage {
driver: KATA_EPHEMERAL_VOLUME_TYPE.to_string(),
source: NODEV.to_string(),
fs_type: HUGETLBFS.to_string(),
mount_point: mount.source.clone(),
options: vec![option],
..Default::default()
};
Ok(Self {
storage: Some(storage),
mount,
})
}
}
#[async_trait]
impl Volume for Hugepage {
fn get_volume_mount(&self) -> Result<Vec<oci::Mount>> {
Ok(vec![self.mount.clone()])
}
fn get_storage(&self) -> Result<Vec<agent::Storage>> {
let s = if let Some(s) = self.storage.as_ref() {
vec![s.clone()]
} else {
vec![]
};
Ok(s)
}
async fn cleanup(&self) -> Result<()> {
Ok(())
}
}
pub(crate) fn get_huge_page_option(m: &oci::Mount) -> Result<Option<Vec<String>>> {
if m.source.is_empty() {
return Err(anyhow!("empty mount source"));
}
let file = File::open(PROC_MOUNTS_FILE).context("failed open file")?;
let reader = BufReader::new(file);
for line in reader.lines().flatten() {
let items: Vec<&str> = line.split(' ').collect();
if m.source == items[1] && items[2] == HUGETLBFS {
let fs_options: Vec<&str> = items[3].split(',').collect();
return Ok(Some(
fs_options
.iter()
.map(|&s| s.to_string())
.collect::<Vec<String>>(),
));
}
}
Ok(None)
}
// TODO add hugepage limit to sandbox memory once memory hotplug is enabled
// https://github.com/kata-containers/kata-containers/issues/5880
pub(crate) fn get_huge_page_limits_map(spec: &oci::Spec) -> Result<HashMap<PageSize, Limit>> {
let mut hugepage_limits_map: HashMap<PageSize, Limit> = HashMap::new();
if let Some(l) = &spec.linux {
if let Some(r) = &l.resources {
let hugepage_limits = r.hugepage_limits.clone();
for hugepage_limit in hugepage_limits {
// the pagesize send from oci spec is MB or GB, change it to Mi and Gi
let page_size = hugepage_limit.page_size.replace('B', "i");
let page_size = Byte::from_str(page_size)
.context("failed to create Byte object from String")?;
hugepage_limits_map.insert(page_size, hugepage_limit.limit);
}
return Ok(hugepage_limits_map);
}
return Ok(hugepage_limits_map);
}
Ok(hugepage_limits_map)
}
fn get_page_size(fs_options: Vec<String>) -> Result<Byte> {
for fs_option in fs_options {
if fs_option.starts_with("pagesize=") {
let page_size = fs_option
.strip_prefix("pagesize=")
// the parameters passed are in unit M or G, append i to be Mi and Gi
.map(|s| format!("{}i", s))
.context("failed to strip prefix pagesize")?;
return Byte::from_str(page_size)
.map_err(|_| anyhow!("failed to convert string to byte"));
}
}
Err(anyhow!("failed to get page size"))
}
#[cfg(test)]
mod tests {
use std::{collections::HashMap, fs};
use crate::volume::hugepage::{get_page_size, HUGETLBFS, NODEV};
use super::{get_huge_page_limits_map, get_huge_page_option};
use byte_unit::Byte;
use nix::mount::{mount, umount, MsFlags};
use oci::{Linux, LinuxHugepageLimit, LinuxResources};
use test_utils::skip_if_not_root;
#[test]
fn test_get_huge_page_option() {
let format_sizes = ["1GB", "2MB"];
let mut huge_page_limits: Vec<LinuxHugepageLimit> = vec![];
for format_size in format_sizes {
huge_page_limits.push(LinuxHugepageLimit {
page_size: format_size.to_string(),
limit: 100000,
});
}
let spec = oci::Spec {
linux: Some(Linux {
resources: Some(LinuxResources {
hugepage_limits: huge_page_limits,
..Default::default()
}),
..Default::default()
}),
..Default::default()
};
assert!(get_huge_page_limits_map(&spec).is_ok());
let mut expect_res = HashMap::new();
expect_res.insert(Byte::from_str("1Gi").ok().unwrap(), 100000);
expect_res.insert(Byte::from_str("2Mi").ok().unwrap(), 100000);
assert_eq!(get_huge_page_limits_map(&spec).unwrap(), expect_res);
}
#[test]
fn test_get_huge_page_size() {
skip_if_not_root!();
let format_sizes = ["1Gi", "2Mi"];
for format_size in format_sizes {
let dir = tempfile::tempdir().unwrap();
let dst = dir.path().join(format!("hugepages-{}", format_size));
fs::create_dir_all(&dst).unwrap();
mount(
Some(NODEV),
&dst,
Some(HUGETLBFS),
MsFlags::MS_NODEV,
Some(format!("pagesize={}", format_size).as_str()),
)
.unwrap();
let mount = oci::Mount {
source: dst.to_str().unwrap().to_string(),
..Default::default()
};
let option = get_huge_page_option(&mount).unwrap().unwrap();
let page_size = get_page_size(option).unwrap();
assert_eq!(page_size, Byte::from_str(format_size).unwrap());
umount(&dst).unwrap();
fs::remove_dir(&dst).unwrap();
}
}
}

View File

@@ -6,17 +6,20 @@
mod block_volume;
mod default_volume;
pub mod hugepage;
mod share_fs_volume;
mod shm_volume;
use async_trait::async_trait;
use std::{sync::Arc, vec::Vec};
use anyhow::{Context, Result};
use std::{sync::Arc, vec::Vec};
use tokio::sync::RwLock;
use crate::share_fs::ShareFs;
use self::hugepage::{get_huge_page_limits_map, get_huge_page_option};
const BIND: &str = "bind";
#[async_trait]
pub trait Volume: Send + Sync {
fn get_volume_mount(&self) -> Result<Vec<oci::Mount>>;
@@ -43,9 +46,11 @@ impl VolumeResource {
&self,
share_fs: &Option<Arc<dyn ShareFs>>,
cid: &str,
oci_mounts: &[oci::Mount],
spec: &oci::Spec,
) -> Result<Vec<Arc<dyn Volume>>> {
let mut volumes: Vec<Arc<dyn Volume>> = vec![];
let oci_mounts = &spec.mounts;
// handle mounts
for m in oci_mounts {
let volume: Arc<dyn Volume> = if shm_volume::is_shim_volume(m) {
let shm_size = shm_volume::DEFAULT_SHM_SIZE;
@@ -59,6 +64,17 @@ impl VolumeResource {
.await
.with_context(|| format!("new share fs volume {:?}", m))?,
)
} else if let Some(options) =
get_huge_page_option(m).context("failed to check huge page")?
{
// get hugepage limits from oci
let hugepage_limits =
get_huge_page_limits_map(spec).context("get huge page option")?;
// handle container hugepage
Arc::new(
hugepage::Hugepage::new(m, hugepage_limits, options)
.with_context(|| format!("handle hugepages {:?}", m))?,
)
} else if block_volume::is_block_volume(m) {
Arc::new(
block_volume::BlockVolume::new(m)

View File

@@ -7,7 +7,7 @@
use std::{
path::{Path, PathBuf},
str::FromStr,
sync::{Arc, Weak},
sync::Arc,
};
use anyhow::{anyhow, Context, Result};
@@ -24,7 +24,7 @@ use kata_types::mount;
// device nodes to the guest.
// skip the volumes whose source had already set to guest share dir.
pub(crate) struct ShareFsVolume {
share_fs: Option<Weak<dyn ShareFs>>,
share_fs: Option<Arc<dyn ShareFs>>,
mounts: Vec<oci::Mount>,
storages: Vec<agent::Storage>,
}
@@ -40,7 +40,7 @@ impl ShareFsVolume {
let file_name = generate_mount_path("sandbox", file_name);
let mut volume = Self {
share_fs: share_fs.as_ref().map(Arc::downgrade),
share_fs: share_fs.as_ref().map(Arc::clone),
mounts: vec![],
storages: vec![],
};
@@ -112,7 +112,7 @@ impl ShareFsVolume {
} else {
// Not mounted ever
let mount_result = share_fs_mount
.share_volume(ShareFsVolumeConfig {
.share_volume(&ShareFsVolumeConfig {
// The scope of shared volume is sandbox
cid: String::from(""),
source: m.source.clone(),
@@ -158,12 +158,9 @@ impl Volume for ShareFsVolume {
}
async fn cleanup(&self) -> Result<()> {
if self.share_fs.is_none() {
return Ok(());
}
let share_fs = match self.share_fs.as_ref().unwrap().upgrade() {
Some(share_fs) => share_fs,
None => return Err(anyhow!("The share_fs was released unexpectedly")),
let share_fs = match self.share_fs.as_ref() {
Some(fs) => fs,
None => return Ok(()),
};
let mounted_info_set = share_fs.mounted_info_set();
@@ -219,7 +216,7 @@ impl Volume for ShareFsVolume {
mounted_info_set.remove(&host_source);
// Umount the volume
share_fs_mount
.umount(&file_name)
.umount_volume(&file_name)
.await
.context("Umount volume")?
}

View File

@@ -100,6 +100,7 @@ impl Volume for ShmVolume {
}
async fn cleanup(&self) -> Result<()> {
// TODO: Clean up ShmVolume
warn!(sl!(), "Cleaning up ShmVolume is still unimplemented.");
Ok(())
}

View File

@@ -119,7 +119,7 @@ impl Container {
// handler volumes
let volumes = self
.resource_manager
.handler_volumes(&config.container_id, &spec.mounts)
.handler_volumes(&config.container_id, &spec)
.await
.context("handler volumes")?;
let mut oci_mounts = vec![];
@@ -400,7 +400,6 @@ fn amend_spec(spec: &mut oci::Spec, disable_guest_seccomp: bool) -> Result<()> {
resource.devices = Vec::new();
resource.pids = None;
resource.block_io = None;
resource.hugepage_limits = Vec::new();
resource.network = None;
resource.rdma = HashMap::new();
}

View File

@@ -249,6 +249,7 @@ impl ContainerInner {
.await?;
self.clean_volumes().await.context("clean volumes")?;
self.clean_rootfs().await.context("clean rootfs")?;
Ok(())
}
@@ -279,7 +280,7 @@ impl ContainerInner {
unhandled.push(Arc::clone(v));
warn!(
sl!(),
"Failed to clean volume {:?}, error = {:?}",
"Failed to clean the volume = {:?}, error = {:?}",
v.get_volume_mount(),
err
);
@@ -290,4 +291,23 @@ impl ContainerInner {
}
Ok(())
}
async fn clean_rootfs(&mut self) -> Result<()> {
let mut unhandled = Vec::new();
for rootfs in self.rootfs.iter() {
if let Err(err) = rootfs.cleanup().await {
unhandled.push(Arc::clone(rootfs));
warn!(
sl!(),
"Failed to umount rootfs, cid = {:?}, error = {:?}",
self.container_id(),
err
);
}
}
if !unhandled.is_empty() {
self.rootfs = unhandled;
}
Ok(())
}
}

View File

@@ -248,6 +248,12 @@ impl Sandbox for VirtSandbox {
.await
.context("delete cgroups")?;
info!(sl!(), "delete hypervisor");
self.hypervisor
.cleanup()
.await
.context("delete hypervisor")?;
info!(sl!(), "stop monitor");
self.monitor.stop().await;

View File

@@ -898,15 +898,15 @@ endif
@printf "\tbinary installation path (BINDIR) : %s\n" $(abspath $(BINDIR))
@printf "\tbinaries to install :\n"
@printf \
"$(foreach b,$(sort $(BINLIST)),$(shell printf "\\t - $(shell readlink -m $(DESTDIR)/$(BINDIR)/$(b))\\\n"))"
"$(foreach b,$(sort $(BINLIST)),$(shell printf "\\t - $(abspath $(DESTDIR)/$(BINDIR)/$(b))\\\n"))"
@printf \
"$(foreach b,$(sort $(SHIMV2)),$(shell printf "\\t - $(shell readlink -m $(DESTDIR)/$(BINDIR)/$(b))\\\n"))"
"$(foreach b,$(sort $(SHIMV2)),$(shell printf "\\t - $(abspath $(DESTDIR)/$(BINDIR)/$(b))\\\n"))"
@printf \
"$(foreach b,$(sort $(MONITOR)),$(shell printf "\\t - $(shell readlink -m $(DESTDIR)/$(BINDIR)/$(b))\\\n"))"
"$(foreach b,$(sort $(MONITOR)),$(shell printf "\\t - $(abspath $(DESTDIR)/$(BINDIR)/$(b))\\\n"))"
@printf \
"$(foreach b,$(sort $(BINLIBEXECLIST)),$(shell printf "\\t - $(shell readlink -m $(DESTDIR)/$(PKGLIBEXECDIR)/$(b))\\\n"))"
"$(foreach b,$(sort $(BINLIBEXECLIST)),$(shell printf "\\t - $(abspath $(DESTDIR)/$(PKGLIBEXECDIR)/$(b))\\\n"))"
@printf \
"$(foreach s,$(sort $(SCRIPTS)),$(shell printf "\\t - $(shell readlink -m $(DESTDIR)/$(BINDIR)/$(s))\\\n"))"
"$(foreach s,$(sort $(SCRIPTS)),$(shell printf "\\t - $(abspath $(DESTDIR)/$(BINDIR)/$(s))\\\n"))"
@printf "\tconfigs to install (CONFIGS) :\n"
@printf \
"$(foreach c,$(sort $(CONFIGS)),$(shell printf "\\t - $(c)\\\n"))"

View File

@@ -41,7 +41,7 @@ func TestCreateSandboxSuccess(t *testing.T) {
},
}
testingImpl.CreateSandboxFunc = func(ctx context.Context, sandboxConfig vc.SandboxConfig) (vc.VCSandbox, error) {
testingImpl.CreateSandboxFunc = func(ctx context.Context, sandboxConfig vc.SandboxConfig, hookFunc func(context.Context) error) (vc.VCSandbox, error) {
return sandbox, nil
}

View File

@@ -14,6 +14,7 @@ import (
"net/http"
"net/http/pprof"
"net/url"
"os"
"path/filepath"
"strconv"
"strings"
@@ -306,8 +307,19 @@ func GetSandboxesStoragePath() string {
return "/run/vc/sbs"
}
// GetSandboxesStoragePath returns the storage path where sandboxes info are stored in runtime-rs
func GetSandboxesStoragePathRust() string {
return "/run/kata"
}
// SocketAddress returns the address of the unix domain socket for communicating with the
// shim management endpoint
func SocketAddress(id string) string {
return fmt.Sprintf("unix://%s", filepath.Join(string(filepath.Separator), GetSandboxesStoragePath(), id, "shim-monitor.sock"))
socketAddress := fmt.Sprintf("unix://%s", filepath.Join(string(filepath.Separator), GetSandboxesStoragePath(), id, "shim-monitor.sock"))
_, err := os.Stat(socketAddress)
// if the path not exist, check the rust runtime path
if err != nil {
return fmt.Sprintf("unix://%s", filepath.Join(string(filepath.Separator), GetSandboxesStoragePathRust(), id, "shim-monitor.sock"))
}
return socketAddress
}

View File

@@ -162,17 +162,19 @@ func CreateSandbox(ctx context.Context, vci vc.VC, ociSpec specs.Spec, runtimeCo
ociSpec.Annotations["nerdctl/network-namespace"] = sandboxConfig.NetworkConfig.NetworkID
sandboxConfig.Annotations["nerdctl/network-namespace"] = ociSpec.Annotations["nerdctl/network-namespace"]
// Run pre-start OCI hooks, in the runtime namespace.
if err := PreStartHooks(ctx, ociSpec, containerID, bundlePath); err != nil {
return nil, vc.Process{}, err
}
sandbox, err := vci.CreateSandbox(ctx, sandboxConfig, func(ctx context.Context) error {
// Run pre-start OCI hooks, in the runtime namespace.
if err := PreStartHooks(ctx, ociSpec, containerID, bundlePath); err != nil {
return err
}
// Run create runtime OCI hooks, in the runtime namespace.
if err := CreateRuntimeHooks(ctx, ociSpec, containerID, bundlePath); err != nil {
return nil, vc.Process{}, err
}
// Run create runtime OCI hooks, in the runtime namespace.
if err := CreateRuntimeHooks(ctx, ociSpec, containerID, bundlePath); err != nil {
return err
}
sandbox, err := vci.CreateSandbox(ctx, sandboxConfig)
return nil
})
if err != nil {
return nil, vc.Process{}, err
}
@@ -255,6 +257,12 @@ func CreateContainer(ctx context.Context, sandbox vc.VCSandbox, ociSpec specs.Sp
return vc.Process{}, err
}
hid, err := sandbox.GetHypervisorPid()
if err != nil {
return vc.Process{}, err
}
ctx = context.WithValue(ctx, vc.HypervisorPidKey{}, hid)
// Run pre-start OCI hooks.
err = EnterNetNS(sandbox.GetNetNs(), func() error {
return PreStartHooks(ctx, ociSpec, containerID, bundlePath)

View File

@@ -274,7 +274,7 @@ func TestCreateSandboxAnnotations(t *testing.T) {
rootFs := vc.RootFs{Mounted: true}
testingImpl.CreateSandboxFunc = func(ctx context.Context, sandboxConfig vc.SandboxConfig) (vc.VCSandbox, error) {
testingImpl.CreateSandboxFunc = func(ctx context.Context, sandboxConfig vc.SandboxConfig, hookFunc func(context.Context) error) (vc.VCSandbox, error) {
return &vcmock.Sandbox{
MockID: testSandboxID,
MockContainers: []*vcmock.Container{

View File

@@ -17,6 +17,7 @@ import (
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace"
syscallWrapper "github.com/kata-containers/kata-containers/src/runtime/pkg/syscall"
vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
)
@@ -38,8 +39,16 @@ func runHook(ctx context.Context, spec specs.Spec, hook specs.Hook, cid, bundleP
defer span.End()
katatrace.AddTags(span, "path", hook.Path, "args", hook.Args)
pid, ok := ctx.Value(vc.HypervisorPidKey{}).(int)
if !ok || pid == 0 {
hookLogger().Info("no hypervisor pid")
pid = syscallWrapper.Gettid()
}
hookLogger().Infof("hypervisor pid %v", pid)
state := specs.State{
Pid: syscallWrapper.Gettid(),
Pid: pid,
Bundle: bundlePath,
ID: cid,
Annotations: spec.Annotations,

View File

@@ -23,21 +23,6 @@ var signalLog = logrus.WithField("default-signal-logger", true)
// or a fatal signal is received.
var CrashOnError = false
// List of handled signals.
//
// The value is true if receiving the signal should be fatal.
var handledSignalsMap = map[syscall.Signal]bool{
syscall.SIGABRT: true,
syscall.SIGBUS: true,
syscall.SIGILL: true,
syscall.SIGQUIT: true,
syscall.SIGSEGV: true,
syscall.SIGSTKFLT: true,
syscall.SIGSYS: true,
syscall.SIGTRAP: true,
syscall.SIGUSR1: false,
}
// DieCb is the callback function type that needs to be defined for every call
// into the Die() function. This callback will be run as the first function of
// the Die() implementation.

View File

@@ -0,0 +1,22 @@
// Copyright (c) 2023 Apple Inc.
//
// SPDX-License-Identifier: Apache-2.0
//
package signals
import "syscall"
// List of handled signals.
//
// The value is true if receiving the signal should be fatal.
var handledSignalsMap = map[syscall.Signal]bool{
syscall.SIGABRT: true,
syscall.SIGBUS: true,
syscall.SIGILL: true,
syscall.SIGQUIT: true,
syscall.SIGSEGV: true,
syscall.SIGSYS: true,
syscall.SIGTRAP: true,
syscall.SIGUSR1: false,
}

View File

@@ -0,0 +1,23 @@
// Copyright 2018 Intel Corporation.
//
// SPDX-License-Identifier: Apache-2.0
//
package signals
import "syscall"
// List of handled signals.
//
// The value is true if receiving the signal should be fatal.
var handledSignalsMap = map[syscall.Signal]bool{
syscall.SIGABRT: true,
syscall.SIGBUS: true,
syscall.SIGILL: true,
syscall.SIGQUIT: true,
syscall.SIGSEGV: true,
syscall.SIGSTKFLT: true,
syscall.SIGSYS: true,
syscall.SIGTRAP: true,
syscall.SIGUSR1: false,
}

View File

@@ -5,10 +5,6 @@
package utils
import (
"golang.org/x/sys/unix"
)
// PidType is the type of provided pid value and how it should be treated
type PidType int
@@ -24,13 +20,3 @@ const (
// ProcessGroup affects all processes in the group
ProcessGroup PidType = pidTypeProcessGroupId
)
// Create a new sched core domain
func Create(t PidType) error {
return unix.Prctl(unix.PR_SCHED_CORE, unix.PR_SCHED_CORE_CREATE, 0, uintptr(t), 0)
}
// ShareFrom shares the sched core domain from the provided pid
func ShareFrom(pid uint64, t PidType) error {
return unix.Prctl(unix.PR_SCHED_CORE, unix.PR_SCHED_CORE_SHARE_FROM, uintptr(pid), uintptr(t), 0)
}

View File

@@ -0,0 +1,20 @@
// Copyright (c) 2023 Apple Inc.
//
// SPDX-License-Identifier: Apache-2.0
//
package utils
import (
"golang.org/x/sys/unix"
)
// Create a new sched core domain
func Create(t PidType) error {
return unix.Prctl(unix.PR_SCHED_CORE, unix.PR_SCHED_CORE_CREATE, 0, uintptr(t), 0)
}
// ShareFrom shares the sched core domain from the provided pid
func ShareFrom(pid uint64, t PidType) error {
return unix.Prctl(unix.PR_SCHED_CORE, unix.PR_SCHED_CORE_SHARE_FROM, uintptr(pid), uintptr(t), 0)
}

View File

@@ -0,0 +1,22 @@
// Copyright (c) 2023 Apple Inc.
//
// SPDX-License-Identifier: Apache-2.0
//
//go:build !linux
package utils
import (
"errors"
)
// Create a new sched core domain
func Create(t PidType) error {
return errors.New("schedcore not available on non-Linux platforms")
}
// ShareFrom shares the sched core domain from the provided pid
func ShareFrom(pid uint64, t PidType) error {
return errors.New("schedcore not available on non-Linux platforms")
}

View File

@@ -44,16 +44,16 @@ func SetLogger(ctx context.Context, logger *logrus.Entry) {
// CreateSandbox is the virtcontainers sandbox creation entry point.
// CreateSandbox creates a sandbox and its containers. It does not start them.
func CreateSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factory) (VCSandbox, error) {
func CreateSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factory, prestartHookFunc func(context.Context) error) (VCSandbox, error) {
span, ctx := katatrace.Trace(ctx, virtLog, "CreateSandbox", apiTracingTags)
defer span.End()
s, err := createSandboxFromConfig(ctx, sandboxConfig, factory)
s, err := createSandboxFromConfig(ctx, sandboxConfig, factory, prestartHookFunc)
return s, err
}
func createSandboxFromConfig(ctx context.Context, sandboxConfig SandboxConfig, factory Factory) (_ *Sandbox, err error) {
func createSandboxFromConfig(ctx context.Context, sandboxConfig SandboxConfig, factory Factory, prestartHookFunc func(context.Context) error) (_ *Sandbox, err error) {
span, ctx := katatrace.Trace(ctx, virtLog, "createSandboxFromConfig", apiTracingTags)
defer span.End()
@@ -88,7 +88,7 @@ func createSandboxFromConfig(ctx context.Context, sandboxConfig SandboxConfig, f
}
// Start the VM
if err = s.startVM(ctx); err != nil {
if err = s.startVM(ctx, prestartHookFunc); err != nil {
return nil, err
}

View File

@@ -145,7 +145,7 @@ func TestCreateSandboxNoopAgentSuccessful(t *testing.T) {
config := newTestSandboxConfigNoop()
ctx := WithNewAgentFunc(context.Background(), newMockAgent)
p, err := CreateSandbox(ctx, config, nil)
p, err := CreateSandbox(ctx, config, nil, nil)
assert.NoError(err)
assert.NotNil(p)
@@ -178,7 +178,7 @@ func TestCreateSandboxKataAgentSuccessful(t *testing.T) {
defer hybridVSockTTRPCMock.Stop()
ctx := WithNewAgentFunc(context.Background(), newMockAgent)
p, err := CreateSandbox(ctx, config, nil)
p, err := CreateSandbox(ctx, config, nil, nil)
assert.NoError(err)
assert.NotNil(p)
@@ -199,7 +199,7 @@ func TestCreateSandboxFailing(t *testing.T) {
config := SandboxConfig{}
ctx := WithNewAgentFunc(context.Background(), newMockAgent)
p, err := CreateSandbox(ctx, config, nil)
p, err := CreateSandbox(ctx, config, nil, nil)
assert.Error(err)
assert.Nil(p.(*Sandbox))
}
@@ -227,7 +227,7 @@ func createAndStartSandbox(ctx context.Context, config SandboxConfig) (sandbox V
err error) {
// Create sandbox
sandbox, err = CreateSandbox(ctx, config, nil)
sandbox, err = CreateSandbox(ctx, config, nil, nil)
if sandbox == nil || err != nil {
return nil, "", err
}
@@ -260,7 +260,7 @@ func TestReleaseSandbox(t *testing.T) {
config := newTestSandboxConfigNoop()
ctx := WithNewAgentFunc(context.Background(), newMockAgent)
s, err := CreateSandbox(ctx, config, nil)
s, err := CreateSandbox(ctx, config, nil, nil)
assert.NoError(t, err)
assert.NotNil(t, s)

View File

@@ -220,7 +220,7 @@ var vmAddNetPutRequest = func(clh *cloudHypervisor) error {
resp.Body.Close()
resp.Body = io.NopCloser(bytes.NewBuffer(respBody))
if resp.StatusCode != 204 {
if resp.StatusCode != 200 && resp.StatusCode != 204 {
clh.Logger().Errorf("vmAddNetPut failed with error '%d'. Response: %+v", resp.StatusCode, resp)
return fmt.Errorf("Failed to add the network device '%+v' to Cloud Hypervisor: %v", netDevice, resp.StatusCode)
}
@@ -886,6 +886,15 @@ func (clh *cloudHypervisor) hotPlugVFIODevice(device *config.VFIODev) error {
return err
}
func (clh *cloudHypervisor) hotplugAddNetDevice(e Endpoint) error {
err := clh.addNet(e)
if err != nil {
return err
}
return clh.vmAddNetPut()
}
func (clh *cloudHypervisor) HotplugAddDevice(ctx context.Context, devInfo interface{}, devType DeviceType) (interface{}, error) {
span, _ := katatrace.Trace(ctx, clh.Logger(), "HotplugAddDevice", clhTracingTags, map[string]string{"sandbox_id": clh.id})
defer span.End()
@@ -897,6 +906,9 @@ func (clh *cloudHypervisor) HotplugAddDevice(ctx context.Context, devInfo interf
case VfioDev:
device := devInfo.(*config.VFIODev)
return nil, clh.hotPlugVFIODevice(device)
case NetDev:
device := devInfo.(Endpoint)
return nil, clh.hotplugAddNetDevice(device)
default:
return nil, fmt.Errorf("cannot hotplug device: unsupported device type '%v'", devType)
}

View File

@@ -64,7 +64,7 @@ func Example_createAndStartSandbox() {
}
// Create the sandbox
s, err := vc.CreateSandbox(context.Background(), sandboxConfig, nil)
s, err := vc.CreateSandbox(context.Background(), sandboxConfig, nil, nil)
if err != nil {
fmt.Printf("Could not create sandbox: %s", err)
return

View File

@@ -52,6 +52,9 @@ const (
// RemoteHypervisor is the Remote hypervisor.
RemoteHypervisor HypervisorType = "remote"
// VirtFrameworkHypervisor is the Darwin Virtualization.framework hypervisor
VirtframeworkHypervisor HypervisorType = "virtframework"
// MockHypervisor is a mock hypervisor for testing purposes
MockHypervisor HypervisorType = "mock"
@@ -87,6 +90,8 @@ var (
)
// In some architectures the maximum number of vCPUs depends on the number of physical cores.
// TODO (dcantah): Find a suitable value for darwin/vfw. Seems perf degrades if > number of host
// cores.
var defaultMaxVCPUs = govmm.MaxVCPUs()
// agnostic list of kernel root parameters for NVDIMM
@@ -183,6 +188,9 @@ func (hType *HypervisorType) Set(value string) error {
case "remote":
*hType = RemoteHypervisor
return nil
case "virtframework":
*hType = VirtframeworkHypervisor
return nil
case "mock":
*hType = MockHypervisor
return nil

View File

@@ -0,0 +1,26 @@
// Copyright (c) 2023 Apple Inc.
//
// SPDX-License-Identifier: Apache-2.0
//
package virtcontainers
import (
"fmt"
)
// NewHypervisor returns a hypervisor from a hypervisor type.
func NewHypervisor(hType HypervisorType) (Hypervisor, error) {
switch hType {
case VirtframeworkHypervisor:
return &virtFramework{}, nil
case MockHypervisor:
return &mockHypervisor{}, nil
default:
return nil, fmt.Errorf("Unknown hypervisor type %s", hType)
}
}
func availableGuestProtection() (guestProtection, error) {
return noneProtection, nil
}

View File

@@ -27,7 +27,6 @@ func generateVMSocket(id string, vmStogarePath string) (interface{}, error) {
// NewHypervisor returns an hypervisor from a hypervisor type.
func NewHypervisor(hType HypervisorType) (Hypervisor, error) {
switch hType {
case QemuHypervisor:
return &qemu{}, nil

View File

@@ -31,8 +31,8 @@ func (impl *VCImpl) SetFactory(ctx context.Context, factory Factory) {
}
// CreateSandbox implements the VC function of the same name.
func (impl *VCImpl) CreateSandbox(ctx context.Context, sandboxConfig SandboxConfig) (VCSandbox, error) {
return CreateSandbox(ctx, sandboxConfig, impl.factory)
func (impl *VCImpl) CreateSandbox(ctx context.Context, sandboxConfig SandboxConfig, hookFunc func(context.Context) error) (VCSandbox, error) {
return CreateSandbox(ctx, sandboxConfig, impl.factory, hookFunc)
}
// CleanupContainer is used by shimv2 to stop and delete a container exclusively, once there is no container

View File

@@ -24,7 +24,7 @@ type VC interface {
SetLogger(ctx context.Context, logger *logrus.Entry)
SetFactory(ctx context.Context, factory Factory)
CreateSandbox(ctx context.Context, sandboxConfig SandboxConfig) (VCSandbox, error)
CreateSandbox(ctx context.Context, sandboxConfig SandboxConfig, hookFunc func(context.Context) error) (VCSandbox, error)
CleanupContainer(ctx context.Context, sandboxID, containerID string, force bool) error
}

View File

@@ -252,6 +252,22 @@ func (n *LinuxNetwork) removeSingleEndpoint(ctx context.Context, s *Sandbox, idx
return nil
}
func (n *LinuxNetwork) endpointAlreadyAdded(netInfo *NetworkInfo) bool {
for _, ep := range n.eps {
// Existing endpoint
if ep.Name() == netInfo.Iface.Name {
return true
}
pair := ep.NetworkPair()
// Existing virtual endpoints
if pair != nil && (pair.TapInterface.Name == netInfo.Iface.Name || pair.TapInterface.TAPIface.Name == netInfo.Iface.Name || pair.VirtIface.Name == netInfo.Iface.Name) {
return true
}
}
return false
}
// Scan the networking namespace through netlink and then:
// 1. Create the endpoints for the relevant interfaces found there.
// 2. Attach them to the VM.
@@ -292,6 +308,12 @@ func (n *LinuxNetwork) addAllEndpoints(ctx context.Context, s *Sandbox, hotplug
continue
}
// Skip any interfaces that are already added
if n.endpointAlreadyAdded(&netInfo) {
networkLogger().WithField("endpoint", netInfo.Iface.Name).Info("already added")
continue
}
if err := doNetNS(n.netNSPath, func(_ ns.NetNS) error {
_, err = n.addSingleEndpoint(ctx, s, netInfo, hotplug)
return err

View File

@@ -23,7 +23,6 @@ import (
"syscall"
"time"
"github.com/containernetworking/plugins/pkg/ns"
"github.com/kata-containers/kata-containers/src/runtime/pkg/katautils/katatrace"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils"
"github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils/retry"
@@ -54,8 +53,6 @@ const (
nydusPassthroughfs = "passthrough_fs"
sharedPathInGuest = "/containers"
shimNsPath = "/proc/self/ns/net"
)
var (
@@ -85,13 +82,6 @@ type nydusd struct {
debug bool
}
func startInShimNS(cmd *exec.Cmd) error {
// Create nydusd in shim netns as it needs to access host network
return doNetNS(shimNsPath, func(_ ns.NetNS) error {
return cmd.Start()
})
}
func (nd *nydusd) Start(ctx context.Context, onQuit onQuitFunc) (int, error) {
span, _ := katatrace.Trace(ctx, nd.Logger(), "Start", nydusdTracingTags)
defer span.End()

View File

@@ -0,0 +1,21 @@
// Copyright (c) 2017 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0
//
package virtcontainers
import (
"os/exec"
"github.com/containernetworking/plugins/pkg/ns"
)
const shimNsPath = "/proc/self/ns/net"
func startInShimNS(cmd *exec.Cmd) error {
// Create nydusd in shim netns as it needs to access host network
return doNetNS(shimNsPath, func(_ ns.NetNS) error {
return cmd.Start()
})
}

View File

@@ -0,0 +1,15 @@
// Copyright (c) 2023 Apple Inc.
//
// SPDX-License-Identifier: Apache-2.0
//
//go:build !linux
package virtcontainers
import "os/exec"
// No-op on net namespace join on other platforms.
func startInShimNS(cmd *exec.Cmd) error {
return cmd.Start()
}

View File

@@ -42,9 +42,9 @@ func (m *VCMock) SetFactory(ctx context.Context, factory vc.Factory) {
}
// CreateSandbox implements the VC function of the same name.
func (m *VCMock) CreateSandbox(ctx context.Context, sandboxConfig vc.SandboxConfig) (vc.VCSandbox, error) {
func (m *VCMock) CreateSandbox(ctx context.Context, sandboxConfig vc.SandboxConfig, hookFunc func(context.Context) error) (vc.VCSandbox, error) {
if m.CreateSandboxFunc != nil {
return m.CreateSandboxFunc(ctx, sandboxConfig)
return m.CreateSandboxFunc(ctx, sandboxConfig, hookFunc)
}
return nil, fmt.Errorf("%s: %s (%+v): sandboxConfig: %v", mockErrorPrefix, getSelf(), m, sandboxConfig)

View File

@@ -120,22 +120,22 @@ func TestVCMockCreateSandbox(t *testing.T) {
assert.Nil(m.CreateSandboxFunc)
ctx := context.Background()
_, err := m.CreateSandbox(ctx, vc.SandboxConfig{})
_, err := m.CreateSandbox(ctx, vc.SandboxConfig{}, nil)
assert.Error(err)
assert.True(IsMockError(err))
m.CreateSandboxFunc = func(ctx context.Context, sandboxConfig vc.SandboxConfig) (vc.VCSandbox, error) {
m.CreateSandboxFunc = func(ctx context.Context, sandboxConfig vc.SandboxConfig, hookFunc func(context.Context) error) (vc.VCSandbox, error) {
return &Sandbox{}, nil
}
sandbox, err := m.CreateSandbox(ctx, vc.SandboxConfig{})
sandbox, err := m.CreateSandbox(ctx, vc.SandboxConfig{}, nil)
assert.NoError(err)
assert.Equal(sandbox, &Sandbox{})
// reset
m.CreateSandboxFunc = nil
_, err = m.CreateSandbox(ctx, vc.SandboxConfig{})
_, err = m.CreateSandbox(ctx, vc.SandboxConfig{}, nil)
assert.Error(err)
assert.True(IsMockError(err))
}

View File

@@ -88,6 +88,6 @@ type VCMock struct {
SetLoggerFunc func(ctx context.Context, logger *logrus.Entry)
SetFactoryFunc func(ctx context.Context, factory vc.Factory)
CreateSandboxFunc func(ctx context.Context, sandboxConfig vc.SandboxConfig) (vc.VCSandbox, error)
CreateSandboxFunc func(ctx context.Context, sandboxConfig vc.SandboxConfig, hookFunc func(context.Context) error) (vc.VCSandbox, error)
CleanupContainerFunc func(ctx context.Context, sandboxID, containerID string, force bool) error
}

View File

@@ -95,6 +95,9 @@ var (
errSandboxNotRunning = errors.New("Sandbox not running")
)
// HypervisorPidKey is the context key for hypervisor pid
type HypervisorPidKey struct{}
// SandboxStatus describes a sandbox status.
type SandboxStatus struct {
ContainersStatus []ContainerStatus
@@ -1197,7 +1200,7 @@ func (s *Sandbox) cleanSwap(ctx context.Context) {
}
// startVM starts the VM.
func (s *Sandbox) startVM(ctx context.Context) (err error) {
func (s *Sandbox) startVM(ctx context.Context, prestartHookFunc func(context.Context) error) (err error) {
span, ctx := katatrace.Trace(ctx, s.Logger(), "startVM", sandboxTracingTags, map[string]string{"sandbox_id": s.id})
defer span.End()
@@ -1244,9 +1247,24 @@ func (s *Sandbox) startVM(ctx context.Context) (err error) {
}
}
if prestartHookFunc != nil {
hid, err := s.GetHypervisorPid()
if err != nil {
return err
}
s.Logger().Infof("hypervisor pid is %v", hid)
ctx = context.WithValue(ctx, HypervisorPidKey{}, hid)
if err := prestartHookFunc(ctx); err != nil {
return err
}
}
// In case of vm factory, network interfaces are hotplugged
// after vm is started.
if s.factory != nil {
// In case of prestartHookFunc, network config might have been changed.
// We need to rescan and handle the change.
if s.factory != nil || prestartHookFunc != nil {
if _, err := s.network.AddEndpoints(ctx, s, nil, true); err != nil {
return err
}

View File

@@ -1348,7 +1348,7 @@ func TestSandboxCreationFromConfigRollbackFromCreateSandbox(t *testing.T) {
// Ensure hypervisor doesn't exist
assert.NoError(os.Remove(hConf.HypervisorPath))
_, err := createSandboxFromConfig(ctx, sConf, nil)
_, err := createSandboxFromConfig(ctx, sConf, nil, nil)
// Fail at createSandbox: QEMU path does not exist, it is expected. Then rollback is called
assert.Error(err)

View File

@@ -19,6 +19,7 @@ import (
"github.com/sirupsen/logrus"
)
// Mutable and not constant so we can mock in tests
var urandomDev = "/dev/urandom"
// VM is abstraction of a virtual machine.

File diff suppressed because it is too large Load Diff

View File

@@ -21,9 +21,15 @@ privdrop = "0.5.2"
nix = "0.25.0"
strum = "0.24.1"
strum_macros = "0.24.3"
serde = { version = "1.0.149", features = ["derive"] }
url = "2.3.1"
futures = "0.3.24"
base64 = "0.13.0"
runtimes = { path = "../../runtime-rs/crates/runtimes" }
serde = "1.0.149"
shim-interface = { path = "../../libs/shim-interface"}
kata-types = { path = "../../libs/kata-types" }
safe-path = { path = "../../libs/safe-path" }
agent = { path = "../../runtime-rs/crates/agent"}
[target.'cfg(target_arch = "s390x")'.dependencies]
reqwest = { version = "0.11", default-features = false, features = ["json", "blocking", "native-tls"] }
@@ -34,3 +40,4 @@ reqwest = { version = "0.11", default-features = false, features = ["json", "blo
[dev-dependencies]
semver = "1.0.12"
tempfile = "3.1.0"
test-utils = { path = "../../libs/test-utils" }

View File

@@ -50,7 +50,7 @@ vendor:
cargo vendor
test:
@RUSTFLAGS="$(EXTRA_RUSTFLAGS) --deny warnings" cargo test --target $(TRIPLE) $(if $(findstring release,$(BUILD_TYPE)),--release) $(EXTRA_RUSTFEATURES)
@RUSTFLAGS="$(EXTRA_RUSTFLAGS) --deny warnings" cargo test --target $(TRIPLE) $(if $(findstring release,$(BUILD_TYPE)),--release) $(EXTRA_RUSTFEATURES) -- --nocapture
install:
@RUSTFLAGS="$(EXTRA_RUSTFLAGS) --deny warnings" cargo install --target $(TRIPLE) --path .

View File

@@ -7,12 +7,21 @@
pub use arch_specific::*;
mod arch_specific {
use crate::types::*;
use anyhow::Result;
use std::path::Path;
const KVM_DEV: &str = "/dev/kvm";
pub fn check() -> Result<()> {
// List of check functions
static CHECK_LIST: &[CheckItem] = &[CheckItem {
name: CheckType::CheckCpu,
descr: "This parameter performs the host check",
fp: check,
perm: PermissionType::NonPrivileged,
}];
pub fn check(_args: &str) -> Result<()> {
println!("INFO: check: aarch64");
if Path::new(KVM_DEV).exists() {
println!("Kata Containers can run on this host\n");
@@ -22,4 +31,8 @@ mod arch_specific {
Ok(())
}
pub fn get_checks() -> Option<&'static [CheckItem<'static>]> {
Some(CHECK_LIST)
}
}

View File

@@ -3,6 +3,7 @@
// SPDX-License-Identifier: Apache-2.0
//
use crate::types::*;
#[cfg(target_arch = "powerpc64le")]
pub use arch_specific::*;
@@ -12,4 +13,8 @@ mod arch_specific {
pub fn check() -> Result<()> {
unimplemented!("Check not implemented in powerpc64le");
}
pub fn get_checks() -> Option<&'static [CheckItem<'static>]> {
None
}
}

View File

@@ -9,6 +9,7 @@ pub use arch_specific::*;
mod arch_specific {
use crate::check;
use crate::types::*;
use anyhow::{anyhow, Result};
const PROC_CPUINFO: &str = "/proc/cpuinfo";
@@ -38,7 +39,7 @@ mod arch_specific {
Ok(())
}
pub fn check() -> Result<()> {
pub fn check(_args: &str) -> Result<()> {
println!("INFO: check: s390x");
let _cpu_result = check_cpu();
@@ -48,4 +49,16 @@ mod arch_specific {
Ok(())
}
// List of check functions
static CHECK_LIST: &[CheckItem] = &[CheckItem {
name: CheckType::CheckCpu,
descr: "This parameter performs the cpu check",
fp: check,
perm: PermissionType::NonPrivileged,
}];
pub fn get_checks() -> Option<&'static [CheckItem<'static>]> {
Some(CHECK_LIST)
}
}

View File

@@ -25,8 +25,8 @@ mod arch_specific {
perm: PermissionType::NonPrivileged,
}];
pub fn get_checks() -> &'static [CheckItem<'static>] {
CHECK_LIST
pub fn get_checks() -> Option<&'static [CheckItem<'static>]> {
Some(CHECK_LIST)
}
fn check_cpu(_args: &str) -> Result<()> {

View File

@@ -20,7 +20,7 @@ pub enum Commands {
Check(CheckArgument),
/// Directly assign a volume to Kata Containers to manage
DirectVolume,
DirectVolume(DirectVolumeCommand),
/// Display settings
Env,
@@ -93,3 +93,46 @@ pub enum IpTablesArguments {
/// Configure iptables
Metrics,
}
#[derive(Debug, Args)]
pub struct DirectVolumeCommand {
#[clap(subcommand)]
pub directvol_cmd: DirectVolSubcommand,
}
#[derive(Debug, Subcommand)]
pub enum DirectVolSubcommand {
/// Add a direct assigned block volume device to the Kata Containers runtime
Add(DirectVolAddArgs),
/// Remove a direct assigned block volume device from the Kata Containers runtime
Remove(DirectVolRemoveArgs),
/// Get the filesystem stat of a direct assigned volume
Stats(DirectVolStatsArgs),
/// Resize a direct assigned block volume
Resize(DirectVolResizeArgs),
}
#[derive(Debug, Args)]
pub struct DirectVolAddArgs {
pub volume_path: String,
pub mount_info: String,
}
#[derive(Debug, Args)]
pub struct DirectVolRemoveArgs {
pub volume_path: String,
}
#[derive(Debug, Args)]
pub struct DirectVolStatsArgs {
pub volume_path: String,
}
#[derive(Debug, Args)]
pub struct DirectVolResizeArgs {
pub volume_path: String,
pub resize_size: u64,
}

View File

@@ -16,12 +16,6 @@ struct Release {
tarball_url: String,
}
#[cfg(any(
target_arch = "aarch64",
target_arch = "powerpc64le",
target_arch = "x86_64"
))]
const KATA_GITHUB_RELEASE_URL: &str =
"https://api.github.com/repos/kata-containers/kata-containers/releases";
@@ -42,7 +36,7 @@ pub fn get_single_cpu_info(cpu_info_file: &str, substring: &str) -> Result<Strin
let contents = get_cpu_info(cpu_info_file)?;
if contents.is_empty() {
return Err(anyhow!("cpu_info string is empty"))?;
return Err(anyhow!("cpu_info string is empty"));
}
let subcontents: Vec<&str> = contents.split(substring).collect();
@@ -60,7 +54,7 @@ pub fn get_single_cpu_info(cpu_info_file: &str, substring: &str) -> Result<Strin
#[cfg(any(target_arch = "s390x", target_arch = "x86_64"))]
pub fn get_cpu_flags(cpu_info: &str, cpu_flags_tag: &str) -> Result<String> {
if cpu_info.is_empty() {
return Err(anyhow!("cpu_info string is empty"))?;
return Err(anyhow!("cpu_info string is empty"));
}
let subcontents: Vec<&str> = cpu_info.split('\n').collect();
@@ -120,12 +114,13 @@ pub fn run_network_checks() -> Result<()> {
Ok(())
}
fn get_kata_all_releases_by_url() -> std::result::Result<Vec<Release>, reqwest::Error> {
fn get_kata_all_releases_by_url(url: &str) -> std::result::Result<Vec<Release>, reqwest::Error> {
let releases: Vec<Release> = reqwest::blocking::Client::new()
.get(KATA_GITHUB_RELEASE_URL)
.get(url)
.header(CONTENT_TYPE, JSON_TYPE)
.header(USER_AGENT, USER_AGT)
.send()?
.error_for_status()?
.json()?;
Ok(releases)
}
@@ -151,7 +146,8 @@ fn handle_reqwest_error(e: reqwest::Error) -> anyhow::Error {
}
pub fn check_all_releases() -> Result<()> {
let releases: Vec<Release> = get_kata_all_releases_by_url().map_err(handle_reqwest_error)?;
let releases: Vec<Release> =
get_kata_all_releases_by_url(KATA_GITHUB_RELEASE_URL).map_err(handle_reqwest_error)?;
for release in releases {
if !release.prerelease {
@@ -170,7 +166,8 @@ pub fn check_all_releases() -> Result<()> {
}
pub fn check_official_releases() -> Result<()> {
let releases: Vec<Release> = get_kata_all_releases_by_url().map_err(handle_reqwest_error)?;
let releases: Vec<Release> =
get_kata_all_releases_by_url(KATA_GITHUB_RELEASE_URL).map_err(handle_reqwest_error)?;
println!("Official Releases...");
for release in releases {
@@ -190,23 +187,6 @@ pub fn check_official_releases() -> Result<()> {
mod tests {
use super::*;
use semver::Version;
use serde_json::Value;
use std::collections::HashMap;
const KATA_GITHUB_URL: &str =
"https://api.github.com/repos/kata-containers/kata-containers/releases/latest";
fn get_kata_version_by_url(url: &str) -> std::result::Result<String, reqwest::Error> {
let content = reqwest::blocking::Client::new()
.get(url)
.header(CONTENT_TYPE, JSON_TYPE)
.header(USER_AGENT, USER_AGT)
.send()?
.json::<HashMap<String, Value>>()?;
let version = content["tag_name"].as_str().unwrap();
Ok(version.to_string())
}
#[test]
fn test_get_cpu_info_empty_input() {
@@ -232,7 +212,10 @@ mod tests {
fn check_version_by_empty_url() {
const TEST_URL: &str = "http:";
let expected = "builder error: empty host";
let actual = get_kata_version_by_url(TEST_URL).err().unwrap().to_string();
let actual = get_kata_all_releases_by_url(TEST_URL)
.err()
.unwrap()
.to_string();
assert_eq!(expected, actual);
}
@@ -240,7 +223,10 @@ mod tests {
fn check_version_by_garbage_url() {
const TEST_URL: &str = "_localhost_";
let expected = "builder error: relative URL without a base";
let actual = get_kata_version_by_url(TEST_URL).err().unwrap().to_string();
let actual = get_kata_all_releases_by_url(TEST_URL)
.err()
.unwrap()
.to_string();
assert_eq!(expected, actual);
}
@@ -248,15 +234,31 @@ mod tests {
fn check_version_by_invalid_url() {
const TEST_URL: &str = "http://localhost :80";
let expected = "builder error: invalid domain character";
let actual = get_kata_version_by_url(TEST_URL).err().unwrap().to_string();
let actual = get_kata_all_releases_by_url(TEST_URL)
.err()
.unwrap()
.to_string();
assert_eq!(expected, actual);
}
#[test]
fn check_latest_version() {
let version = get_kata_version_by_url(KATA_GITHUB_URL).unwrap();
let releases = get_kata_all_releases_by_url(KATA_GITHUB_RELEASE_URL);
// sometime in GitHub action accessing to github.com API may fail
// we can skip this test to prevent the whole test fail.
if releases.is_err() {
println!(
"WARNING!!!\nget kata version failed({:?}), this maybe a temporary error, just skip the test.",
releases.unwrap_err()
);
return;
}
let releases = releases.unwrap();
let v = Version::parse(&version).unwrap();
assert!(!releases.is_empty());
let release = &releases[0];
let v = Version::parse(&release.tag_name).unwrap();
assert!(!v.major.to_string().is_empty());
assert!(!v.minor.to_string().is_empty());
assert!(!v.patch.to_string().is_empty());

View File

@@ -17,16 +17,17 @@ use std::process::exit;
use args::{Commands, KataCtlCli};
use ops::check_ops::{
handle_check, handle_check_volume, handle_env, handle_exec, handle_factory, handle_iptables,
handle_metrics, handle_version,
handle_check, handle_env, handle_exec, handle_factory, handle_iptables, handle_metrics,
handle_version,
};
use ops::volume_ops::handle_direct_volume;
fn real_main() -> Result<()> {
let args = KataCtlCli::parse();
match args.command {
Commands::Check(args) => handle_check(args),
Commands::DirectVolume => handle_check_volume(),
Commands::DirectVolume(args) => handle_direct_volume(args),
Commands::Env => handle_env(),
Commands::Exec => handle_exec(),
Commands::Factory => handle_factory(),

View File

@@ -5,3 +5,4 @@
pub mod check_ops;
pub mod version;
pub mod volume_ops;

View File

@@ -3,7 +3,7 @@
// SPDX-License-Identifier: Apache-2.0
//
use crate::arch::x86_64::get_checks;
use crate::arch::arch_specific::get_checks;
use crate::args::{CheckArgument, CheckSubCommand, IptablesCommand, MetricsCommand};
@@ -19,11 +19,11 @@ const NAME: &str = "kata-ctl";
// This function retrieves the cmd function passes as argument
fn get_builtin_check_func(name: CheckType) -> Result<BuiltinCmdFp> {
let check_list = get_checks();
for check in check_list {
if check.name.eq(&name) {
return Ok(check.fp);
if let Some(check_list) = get_checks() {
for check in check_list {
if check.name.eq(&name) {
return Ok(check.fp);
}
}
}
@@ -42,10 +42,10 @@ fn handle_builtin_check(check: CheckType, args: &str) -> Result<()> {
fn get_client_cmd_details() -> Vec<String> {
let mut cmds = Vec::new();
let check_list = get_checks();
for cmd in check_list {
cmds.push(format!("{} ({}. Mode: {})", cmd.name, cmd.descr, cmd.perm));
if let Some(check_list) = get_checks() {
for cmd in check_list {
cmds.push(format!("{} ({}. Mode: {})", cmd.name, cmd.descr, cmd.perm));
}
}
cmds
@@ -93,20 +93,10 @@ pub fn handle_check(checkcmd: CheckArgument) -> Result<()> {
}
CheckSubCommand::OnlyListReleases => {
// retrieve official release
#[cfg(any(
target_arch = "aarch64",
target_arch = "powerpc64le",
target_arch = "x86_64"
))]
check::check_official_releases()?;
}
CheckSubCommand::IncludeAllReleases => {
// retrieve ALL releases including prerelease
#[cfg(any(
target_arch = "aarch64",
target_arch = "powerpc64le",
target_arch = "x86_64"
))]
check::check_all_releases()?;
}
}
@@ -114,10 +104,6 @@ pub fn handle_check(checkcmd: CheckArgument) -> Result<()> {
Ok(())
}
pub fn handle_check_volume() -> Result<()> {
Ok(())
}
pub fn handle_env() -> Result<()> {
Ok(())
}

View File

@@ -0,0 +1,293 @@
// Copyright (c) 2022 Boston University
//
// SPDX-License-Identifier: Apache-2.0
//
use crate::args::{DirectVolSubcommand, DirectVolumeCommand};
use anyhow::{anyhow, Ok, Result};
use futures::executor;
use kata_types::mount::{
DirectVolumeMountInfo, KATA_DIRECT_VOLUME_ROOT_PATH, KATA_MOUNT_INFO_FILE_NAME,
};
use nix;
use reqwest::StatusCode;
use safe_path;
use std::{fs, path::PathBuf, time::Duration};
use url;
use agent::ResizeVolumeRequest;
use shim_interface::shim_mgmt::client::MgmtClient;
use shim_interface::shim_mgmt::{
DIRECT_VOLUME_PATH_KEY, DIRECT_VOLUME_RESIZE_URL, DIRECT_VOLUME_STATS_URL,
};
const TIMEOUT: Duration = Duration::from_millis(2000);
const CONTENT_TYPE_JSON: &str = "application/json";
pub fn handle_direct_volume(vol_cmd: DirectVolumeCommand) -> Result<()> {
if !nix::unistd::Uid::effective().is_root() {
return Err(anyhow!(
"super-user privileges are required for the direct-volume subcommand"
));
}
let command = vol_cmd.directvol_cmd;
let cmd_result: Option<String> = match command {
DirectVolSubcommand::Add(args) => add(&args.volume_path, &args.mount_info)?,
DirectVolSubcommand::Remove(args) => remove(&args.volume_path)?,
DirectVolSubcommand::Stats(args) => executor::block_on(stats(&args.volume_path))?,
DirectVolSubcommand::Resize(args) => {
executor::block_on(resize(&args.volume_path, args.resize_size))?
}
};
if let Some(cmd_result) = cmd_result {
println!("{:?}", cmd_result);
}
Ok(())
}
async fn resize(volume_path: &str, size: u64) -> Result<Option<String>> {
let sandbox_id = get_sandbox_id_for_volume(volume_path)?;
let mount_info = get_volume_mount_info(volume_path)?;
let resize_req = ResizeVolumeRequest {
size,
volume_guest_path: mount_info.device,
};
let encoded = serde_json::to_string(&resize_req)?;
let shim_client = MgmtClient::new(&sandbox_id, Some(TIMEOUT))?;
let url = DIRECT_VOLUME_RESIZE_URL;
let response = shim_client
.post(url, &String::from(CONTENT_TYPE_JSON), &encoded)
.await?;
let status = response.status();
if status != StatusCode::OK {
let body = format!("{:?}", response.into_body());
return Err(anyhow!(
"failed to resize volume ({:?}): {:?}",
status,
body
));
}
Ok(None)
}
async fn stats(volume_path: &str) -> Result<Option<String>> {
let sandbox_id = get_sandbox_id_for_volume(volume_path)?;
let mount_info = get_volume_mount_info(volume_path)?;
let req_url = url::form_urlencoded::Serializer::new(String::from(DIRECT_VOLUME_STATS_URL))
.append_pair(DIRECT_VOLUME_PATH_KEY, &mount_info.device)
.finish();
let shim_client = MgmtClient::new(&sandbox_id, Some(TIMEOUT))?;
let response = shim_client.get(&req_url).await?;
// turn body into string
let body = format!("{:?}", response.into_body());
Ok(Some(body))
}
// join_path joins user provided volumepath with kata direct-volume root path
// the volume_path is base64-encoded and then safely joined to the end of path prefix
fn join_path(prefix: &str, volume_path: &str) -> Result<PathBuf> {
if volume_path.is_empty() {
return Err(anyhow!("volume path must not be empty"));
}
let b64_encoded_path = base64::encode(volume_path.as_bytes());
Ok(safe_path::scoped_join(prefix, b64_encoded_path)?)
}
// add writes the mount info (json string) of a direct volume into a filesystem path known to Kata Containers.
pub fn add(volume_path: &str, mount_info: &str) -> Result<Option<String>> {
let mount_info_dir_path = join_path(KATA_DIRECT_VOLUME_ROOT_PATH, volume_path)?;
// create directory if missing
fs::create_dir_all(&mount_info_dir_path)?;
// This behavior of deserializing and serializing comes from
// https://github.com/kata-containers/kata-containers/blob/cd27ad144e1a111cb606015c5c9671431535e644/src/runtime/pkg/direct-volume/utils.go#L57-L79
// Assuming that this is for the purpose of validating the json schema.
let unserialized_mount_info: DirectVolumeMountInfo = serde_json::from_str(mount_info)?;
let mount_info_file_path = mount_info_dir_path.join(KATA_MOUNT_INFO_FILE_NAME);
let serialized_mount_info = serde_json::to_string(&unserialized_mount_info)?;
fs::write(mount_info_file_path, serialized_mount_info)?;
Ok(None)
}
// remove deletes the direct volume path including all the files inside it.
pub fn remove(volume_path: &str) -> Result<Option<String>> {
let path = join_path(KATA_DIRECT_VOLUME_ROOT_PATH, volume_path)?;
// removes path and any children it contains.
fs::remove_dir_all(path)?;
Ok(None)
}
pub fn get_volume_mount_info(volume_path: &str) -> Result<DirectVolumeMountInfo> {
let mount_info_file_path =
join_path(KATA_DIRECT_VOLUME_ROOT_PATH, volume_path)?.join(KATA_MOUNT_INFO_FILE_NAME);
let mount_info_file = fs::read_to_string(mount_info_file_path)?;
let mount_info: DirectVolumeMountInfo = serde_json::from_str(&mount_info_file)?;
Ok(mount_info)
}
// get_sandbox_id_for_volume finds the id of the first sandbox found in the dir.
// We expect a direct-assigned volume is associated with only a sandbox at a time.
pub fn get_sandbox_id_for_volume(volume_path: &str) -> Result<String> {
let dir_path = join_path(KATA_DIRECT_VOLUME_ROOT_PATH, volume_path)?;
let paths = fs::read_dir(dir_path)?;
for path in paths {
let path = path?;
// compare with MOUNT_INFO_FILE_NAME
if path.file_name() == KATA_MOUNT_INFO_FILE_NAME {
continue;
}
let file_name = path.file_name();
// turn file_name into String and return it
let file_name = file_name.to_str().ok_or_else(|| {
anyhow!(
"failed to convert file_name {:?} to string",
file_name.to_string_lossy()
)
})?;
return Ok(String::from(file_name));
}
return Err(anyhow!("no sandbox found for {}", volume_path));
}
#[cfg(test)]
mod tests {
use super::*;
use kata_types::mount::DirectVolumeMountInfo;
use std::{collections::HashMap, fs};
use tempfile::tempdir;
use test_utils::skip_if_not_root;
#[test]
fn test_get_sandbox_id_for_volume() {
// this test has to run as root, so has to manually cleanup afterwards
skip_if_not_root!();
// create KATA_DIRECT_VOLUME_ROOT_PATH first as safe_path::scoped_join
// requires prefix dir to exist
fs::create_dir_all(KATA_DIRECT_VOLUME_ROOT_PATH)
.expect("create kata direct volume root path failed");
let test_sandbox_id = "sandboxid_test_file";
let test_volume_path = String::from("a/b/c");
let joined_volume_path =
join_path(KATA_DIRECT_VOLUME_ROOT_PATH, &test_volume_path).unwrap();
let test_file_dir = joined_volume_path.join(test_sandbox_id);
fs::create_dir_all(&joined_volume_path).expect("failed to mkdir -p");
fs::write(&test_file_dir, "teststring").expect("failed to write");
// test that get_sandbox_id gets the correct sandboxid it sees
let got = get_sandbox_id_for_volume(&test_volume_path).unwrap();
assert!(got.eq(test_sandbox_id));
// test that get_sandbox_id returns error if no sandboxid found
fs::remove_file(&test_file_dir).expect("failed to remove");
get_sandbox_id_for_volume(&test_volume_path).expect_err("error expected");
// cleanup test directory
fs::remove_dir_all(&joined_volume_path).expect("failed to cleanup test")
}
#[test]
fn test_path_join() {
#[derive(Debug)]
struct TestData<'a> {
rootfs: &'a str,
volume_path: &'a str,
result: Result<PathBuf>,
}
// the safe_path::scoped_join requires the prefix path to exist on testing machine
let root_fs = tempdir().expect("failed to create tmpdir").into_path();
let root_fs_str = root_fs.to_str().unwrap();
let relative_secret_path = "../../etc/passwd";
let b64_relative_secret_path = base64::encode(relative_secret_path);
// this byte array b64encodes to "/abcdddd"
let b64_abs_path = vec![253, 166, 220, 117, 215, 93];
let converted_relative_path = "abcdddd";
let tests = &[
TestData {
rootfs: root_fs_str,
volume_path: "",
result: Err(anyhow!("volume path must not be empty")),
},
TestData {
rootfs: root_fs_str,
volume_path: relative_secret_path,
result: Ok(root_fs.join(b64_relative_secret_path)),
},
TestData {
rootfs: root_fs_str,
volume_path: unsafe { std::str::from_utf8_unchecked(&b64_abs_path) },
result: Ok(root_fs.join(converted_relative_path)),
},
];
for (i, d) in tests.iter().enumerate() {
let msg = format!("test[{}]: {:?}", i, d);
let result = join_path(d.rootfs, d.volume_path);
let msg = format!("{}, result: {:?}", msg, result);
if d.result.is_ok() {
assert!(
result.as_ref().unwrap() == d.result.as_ref().unwrap(),
"{}",
msg
);
continue;
}
let expected_error = format!("{}", d.result.as_ref().unwrap_err());
let actual_error = format!("{}", result.unwrap_err());
assert!(actual_error == expected_error, "{}", msg);
}
}
#[test]
fn test_add_remove() {
skip_if_not_root!();
// example volume dir is a/b/c, note the behavior of join would take "/a" as absolute path.
// testing with isn't really viable here since the path is then b64 encoded,
// so this test had to run as root and call `remove()` to manully cleanup afterwards.
fs::create_dir_all(KATA_DIRECT_VOLUME_ROOT_PATH)
.expect("create kata direct volume root path failed");
let base_dir = tempdir().expect("failed to create tmpdir");
let dir_name = base_dir.path().join("a/b/c");
let volume_path = String::from(dir_name.to_str().unwrap());
let actual: DirectVolumeMountInfo = DirectVolumeMountInfo {
volume_type: String::from("block"),
device: String::from("/dev/sda"),
fs_type: String::from("ext4"),
metadata: HashMap::new(),
options: vec![String::from("journal_dev"), String::from("noload")],
};
// serialize volumemountinfo into json string
let mount_info = serde_json::to_string(&actual).unwrap();
add(&volume_path, &mount_info).expect("add failed");
let expected_file_path = volume_path;
let expected: DirectVolumeMountInfo = get_volume_mount_info(&expected_file_path).unwrap();
remove(&expected_file_path).expect("remove failed");
assert_eq!(actual.device, expected.device);
assert_eq!(actual.fs_type, expected.fs_type);
assert_eq!(actual.metadata, expected.metadata);
assert_eq!(actual.options, expected.options);
assert_eq!(actual.volume_type, expected.volume_type);
}
}

View File

@@ -139,6 +139,12 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "base64"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
[[package]]
name = "bit-vec"
version = "0.6.3"
@@ -151,6 +157,22 @@ version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "bitmask-enum"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd9e32d7420c85055e8107e5b2463c4eeefeaac18b52359fe9f9c08a18f342b2"
dependencies = [
"quote",
"syn",
]
[[package]]
name = "byte-unit"
version = "3.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "415301c9de11005d4b92193c0eb7ac7adc37e5a49e0ac9bed0a42343512744b8"
[[package]]
name = "byteorder"
version = "1.4.3"
@@ -274,6 +296,12 @@ dependencies = [
"os_str_bytes",
]
[[package]]
name = "common-path"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2382f75942f4b3be3690fe4f86365e9c853c1587d6ee58212cebf6e2a9ccd101"
[[package]]
name = "concurrent-queue"
version = "1.2.4"
@@ -474,6 +502,17 @@ version = "2.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0"
[[package]]
name = "fail"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe5e43d0f78a42ad591453aedb1d7ae631ce7ee445c7643691055a9ed8d3b01c"
dependencies = [
"log",
"once_cell",
"rand 0.8.5",
]
[[package]]
name = "fastrand"
version = "1.7.0"
@@ -609,6 +648,17 @@ dependencies = [
"slab",
]
[[package]]
name = "getrandom"
version = "0.1.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce"
dependencies = [
"cfg-if 1.0.0",
"libc",
"wasi 0.9.0+wasi-snapshot-preview1",
]
[[package]]
name = "getrandom"
version = "0.2.7"
@@ -620,6 +670,12 @@ dependencies = [
"wasi 0.11.0+wasi-snapshot-preview1",
]
[[package]]
name = "glob"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
[[package]]
name = "hashbrown"
version = "0.11.2"
@@ -724,6 +780,50 @@ version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35"
[[package]]
name = "kata-sys-util"
version = "0.1.0"
dependencies = [
"byteorder",
"cgroups-rs",
"chrono",
"common-path",
"fail",
"kata-types",
"lazy_static",
"libc",
"nix 0.24.2",
"oci",
"once_cell",
"rand 0.7.3",
"serde_json",
"slog",
"slog-scope",
"subprocess",
"thiserror",
]
[[package]]
name = "kata-types"
version = "0.1.0"
dependencies = [
"anyhow",
"base64",
"bitmask-enum",
"byte-unit",
"glob",
"lazy_static",
"num_cpus",
"oci",
"regex",
"serde",
"serde_json",
"slog",
"slog-scope",
"thiserror",
"toml",
]
[[package]]
name = "lazy_static"
version = "1.4.0"
@@ -744,6 +844,7 @@ dependencies = [
"cgroups-rs",
"chrono",
"derive_builder",
"kata-sys-util",
"libc",
"logging",
"nix 0.23.1",
@@ -1232,6 +1333,19 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "rand"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03"
dependencies = [
"getrandom 0.1.16",
"libc",
"rand_chacha 0.2.2",
"rand_core 0.5.1",
"rand_hc",
]
[[package]]
name = "rand"
version = "0.8.5"
@@ -1239,8 +1353,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"libc",
"rand_chacha",
"rand_core",
"rand_chacha 0.3.1",
"rand_core 0.6.3",
]
[[package]]
name = "rand_chacha"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402"
dependencies = [
"ppv-lite86",
"rand_core 0.5.1",
]
[[package]]
@@ -1250,7 +1374,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
"rand_core",
"rand_core 0.6.3",
]
[[package]]
name = "rand_core"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19"
dependencies = [
"getrandom 0.1.16",
]
[[package]]
@@ -1259,7 +1392,16 @@ version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7"
dependencies = [
"getrandom",
"getrandom 0.2.7",
]
[[package]]
name = "rand_hc"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c"
dependencies = [
"rand_core 0.5.1",
]
[[package]]
@@ -1277,7 +1419,7 @@ version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b"
dependencies = [
"getrandom",
"getrandom 0.2.7",
"redox_syscall",
"thiserror",
]
@@ -1368,6 +1510,7 @@ dependencies = [
"cgroups-rs",
"futures",
"inotify",
"kata-sys-util",
"lazy_static",
"libc",
"libseccomp",
@@ -1557,6 +1700,16 @@ version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
[[package]]
name = "subprocess"
version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c2e86926081dda636c546d8c5e641661049d7562a68f5488be4a1f7f66f6086"
dependencies = [
"libc",
"winapi",
]
[[package]]
name = "syn"
version = "1.0.91"
@@ -1834,6 +1987,12 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9d5b2c62b4012a3e1eca5a7e077d13b3bf498c4073e33ccd58626607748ceeca"
[[package]]
name = "wasi"
version = "0.9.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519"
[[package]]
name = "wasi"
version = "0.10.0+wasi-snapshot-preview1"
@@ -2010,7 +2169,7 @@ dependencies = [
"nix 0.23.1",
"once_cell",
"ordered-stream",
"rand",
"rand 0.8.5",
"serde",
"serde_repr",
"sha1",

View File

@@ -9,6 +9,7 @@ edition = "2018"
[dependencies]
rustjail = { path = "../../../agent/rustjail", features = ["standard-oci-runtime"] }
oci = { path = "../../../libs/oci" }
kata-sys-util = { path = "../../../libs/kata-sys-util" }
logging = { path = "../../../libs/logging" }
derive_builder = "0.10.2"
libc = "0.2.108"

View File

@@ -19,18 +19,20 @@ use oci::{ContainerState, State as OCIState};
use procfs;
use rustjail::cgroups::fs::Manager as CgroupManager;
use rustjail::{
container::{self, BaseContainer, LinuxContainer, EXEC_FIFO_FILENAME},
container::{BaseContainer, LinuxContainer, EXEC_FIFO_FILENAME},
process::{Process, ProcessOperations},
specconv::CreateOpts,
};
use scopeguard::defer;
use slog::{debug, Logger};
use slog::{debug, info, Logger};
use std::{
env::current_dir,
fs,
path::{Path, PathBuf},
};
use kata_sys_util::hooks::HookStates;
pub const CONFIG_FILE_NAME: &str = "config.json";
#[derive(Debug, Copy, Clone, PartialEq)]
@@ -139,14 +141,10 @@ impl Container {
annotations: spec.annotations.clone(),
};
if spec.hooks.is_some() {
let hooks = spec
.hooks
.as_ref()
.ok_or_else(|| anyhow!("hooks config was not present"))?;
for h in hooks.poststop.iter() {
container::execute_hook(logger, h, &oci_state).await?;
}
if let Some(hooks) = spec.hooks.as_ref() {
info!(&logger, "Poststop Hooks");
let mut poststop_hookstates = HookStates::new();
poststop_hookstates.execute_hooks(&hooks.poststop, Some(oci_state.clone()))?;
}
match oci_state.status {

View File

@@ -153,36 +153,36 @@ get_kernel() {
return
fi
#Remove extra 'v'
version=${version#v}
#Remove extra 'v'
version=${version#v}
major_version=$(echo "${version}" | cut -d. -f1)
kernel_tarball="linux-${version}.tar.xz"
major_version=$(echo "${version}" | cut -d. -f1)
kernel_tarball="linux-${version}.tar.xz"
if [ ! -f sha256sums.asc ] || ! grep -q "${kernel_tarball}" sha256sums.asc; then
shasum_url="https://cdn.kernel.org/pub/linux/kernel/v${major_version}.x/sha256sums.asc"
info "Download kernel checksum file: sha256sums.asc from ${shasum_url}"
curl --fail -OL "${shasum_url}"
fi
grep "${kernel_tarball}" sha256sums.asc >"${kernel_tarball}.sha256"
if [ ! -f sha256sums.asc ] || ! grep -q "${kernel_tarball}" sha256sums.asc; then
shasum_url="https://cdn.kernel.org/pub/linux/kernel/v${major_version}.x/sha256sums.asc"
info "Download kernel checksum file: sha256sums.asc from ${shasum_url}"
curl --fail -OL "${shasum_url}"
fi
grep "${kernel_tarball}" sha256sums.asc >"${kernel_tarball}.sha256"
if [ -f "${kernel_tarball}" ] && ! sha256sum -c "${kernel_tarball}.sha256"; then
info "invalid kernel tarball ${kernel_tarball} removing "
rm -f "${kernel_tarball}"
fi
if [ ! -f "${kernel_tarball}" ]; then
info "Download kernel version ${version}"
info "Download kernel"
curl --fail -OL "https://www.kernel.org/pub/linux/kernel/v${major_version}.x/${kernel_tarball}"
else
info "kernel tarball already downloaded"
fi
if [ -f "${kernel_tarball}" ] && ! sha256sum -c "${kernel_tarball}.sha256"; then
info "invalid kernel tarball ${kernel_tarball} removing "
rm -f "${kernel_tarball}"
fi
if [ ! -f "${kernel_tarball}" ]; then
info "Download kernel version ${version}"
info "Download kernel"
curl --fail -OL "https://www.kernel.org/pub/linux/kernel/v${major_version}.x/${kernel_tarball}"
else
info "kernel tarball already downloaded"
fi
sha256sum -c "${kernel_tarball}.sha256"
sha256sum -c "${kernel_tarball}.sha256"
tar xf "${kernel_tarball}"
tar xf "${kernel_tarball}"
mv "linux-${version}" "${kernel_path}"
mv "linux-${version}" "${kernel_path}"
}
get_major_kernel_version() {

View File

@@ -16,6 +16,7 @@ RUN apt-get update && \
flex \
git \
iptables \
kmod \
libelf-dev \
libssl-dev \
patch && \

View File

@@ -75,7 +75,7 @@ assets:
url: "https://github.com/cloud-hypervisor/cloud-hypervisor"
uscan-url: >-
https://github.com/cloud-hypervisor/cloud-hypervisor/tags.*/v?(\d\S+)\.tar\.gz
version: "v28.0"
version: "v28.1"
firecracker:
description: "Firecracker micro-VMM"