Merge pull request #5788 from openanolis/runtime-rs-ocihook

runtime-rs: add oci hook support
This commit is contained in:
Chao Wu
2023-03-03 01:06:21 +08:00
committed by GitHub
28 changed files with 500 additions and 35 deletions

View File

@@ -11,6 +11,7 @@ Kata Containers design documents:
- [Host cgroups](host-cgroups.md) - [Host cgroups](host-cgroups.md)
- [Agent systemd cgroup](agent-systemd-cgroup.md) - [Agent systemd cgroup](agent-systemd-cgroup.md)
- [`Inotify` support](inotify.md) - [`Inotify` support](inotify.md)
- [`Hooks` support](hooks-handling.md)
- [Metrics(Kata 2.0)](kata-2-0-metrics.md) - [Metrics(Kata 2.0)](kata-2-0-metrics.md)
- [Design for Kata Containers `Lazyload` ability with `nydus`](kata-nydus-design.md) - [Design for Kata Containers `Lazyload` ability with `nydus`](kata-nydus-design.md)
- [Design for direct-assigned volume](direct-blk-device-assignment.md) - [Design for direct-assigned volume](direct-blk-device-assignment.md)

View File

@@ -0,0 +1,63 @@
# Kata Containers support for `Hooks`
## Introduction
During container's lifecycle, different Hooks can be executed to do custom actions. In Kata Containers, we support two types of Hooks, `OCI Hooks` and `Kata Hooks`.
### OCI Hooks
The OCI Spec stipulates six hooks that can be executed at different time points and namespaces, including `Prestart Hooks`, `CreateRuntime Hooks`, `CreateContainer Hooks`, `StartContainer Hooks`, `Poststart Hooks` and `Poststop Hooks`. We support these types of Hooks as compatible as possible in Kata Containers.
The path and arguments of these hooks will be passed to Kata for execution via `bundle/config.json`. For example:
```
...
"hooks": {
"prestart": [
{
"path": "/usr/bin/prestart-hook",
"args": ["prestart-hook", "arg1", "arg2"],
"env": [ "key1=value1"]
}
],
"createRuntime": [
{
"path": "/usr/bin/createRuntime-hook",
"args": ["createRuntime-hook", "arg1", "arg2"],
"env": [ "key1=value1"]
}
]
}
...
```
### Kata Hooks
In Kata, we support another three kinds of hooks executed in guest VM, including `Guest Prestart Hook`, `Guest Poststart Hook`, `Guest Poststop Hook`.
The executable files for Kata Hooks must be packaged in the *guest rootfs*. The file path to those guest hooks should be specified in the configuration file, and guest hooks must be stored in a subdirectory of `guest_hook_path` according to their hook type. For example:
+ In configuration file:
```
guest_hook_path="/usr/share/hooks"
```
+ In guest rootfs, prestart-hook is stored in `/usr/share/hooks/prestart/prestart-hook`.
## Execution
The table below summarized when and where those different hooks will be executed in Kata Containers:
| Hook Name | Hook Type | Hook Path | Exec Place | Exec Time |
|---|---|---|---|---|
| `Prestart(deprecated)` | OCI hook | host runtime namespace | host runtime namespace | After VM is started, before container is created. |
| `CreateRuntime` | OCI hook | host runtime namespace | host runtime namespace | After VM is started, before container is created, after `Prestart` hooks. |
| `CreateContainer` | OCI hook | host runtime namespace | host vmm namespace* | After VM is started, before container is created, after `CreateRuntime` hooks. |
| `StartContainer` | OCI hook | guest container namespace | guest container namespace | After container is created, before container is started. |
| `Poststart` | OCI hook | host runtime namespace | host runtime namespace | After container is started, before start operation returns. |
| `Poststop` | OCI hook | host runtime namespace | host runtime namespace | After container is deleted, before delete operation returns. |
| `Guest Prestart` | Kata hook | guest agent namespace | guest agent namespace | During start operation, before container command is executed. |
| `Guest Poststart` | Kata hook | guest agent namespace | guest agent namespace | During start operation, after container command is executed, before start operation returns. |
| `Guest Poststop` | Kata hook | guest agent namespace | guest agent namespace | During delete operation, after container is deleted, before delete operation returns. |
+ `Hook Path` specifies where hook's path be resolved.
+ `Exec Place` specifies in which namespace those hooks can be executed.
+ For `CreateContainer` Hooks, OCI requires to run them inside the container namespace while the hook executable path is in the host runtime, which is a non-starter for VM-based containers. So we design to keep them running in the *host vmm namespace.*
+ `Exec Time` specifies at which time point those hooks can be executed.

View File

@@ -374,13 +374,18 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
let buf = read_sync(crfd)?; let buf = read_sync(crfd)?;
let spec_str = std::str::from_utf8(&buf)?; let spec_str = std::str::from_utf8(&buf)?;
let spec: oci::Spec = serde_json::from_str(spec_str)?; let spec: oci::Spec = serde_json::from_str(spec_str)?;
log_child!(cfd_log, "notify parent to send oci process"); log_child!(cfd_log, "notify parent to send oci process");
write_sync(cwfd, SYNC_SUCCESS, "")?; write_sync(cwfd, SYNC_SUCCESS, "")?;
let buf = read_sync(crfd)?; let buf = read_sync(crfd)?;
let process_str = std::str::from_utf8(&buf)?; let process_str = std::str::from_utf8(&buf)?;
let oci_process: oci::Process = serde_json::from_str(process_str)?; let oci_process: oci::Process = serde_json::from_str(process_str)?;
log_child!(cfd_log, "notify parent to send oci state");
write_sync(cwfd, SYNC_SUCCESS, "")?;
let buf = read_sync(crfd)?;
let state_str = std::str::from_utf8(&buf)?;
let mut state: oci::State = serde_json::from_str(state_str)?;
log_child!(cfd_log, "notify parent to send cgroup manager"); log_child!(cfd_log, "notify parent to send cgroup manager");
write_sync(cwfd, SYNC_SUCCESS, "")?; write_sync(cwfd, SYNC_SUCCESS, "")?;
@@ -743,6 +748,19 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
unistd::read(fd, buf)?; unistd::read(fd, buf)?;
} }
if init {
// StartContainer Hooks:
// * should be run in container namespace
// * should be run after container is created and before container is started (before user-specific command is executed)
// * spec details: https://github.com/opencontainers/runtime-spec/blob/c1662686cff159595277b79322d0272f5182941b/config.md#startcontainer-hooks
state.pid = std::process::id() as i32;
state.status = oci::ContainerState::Created;
if let Some(hooks) = spec.hooks.as_ref() {
let mut start_container_states = HookStates::new();
start_container_states.execute_hooks(&hooks.start_container, Some(state))?;
}
}
// With NoNewPrivileges, we should set seccomp as close to // With NoNewPrivileges, we should set seccomp as close to
// do_exec as possible in order to reduce the amount of // do_exec as possible in order to reduce the amount of
// system calls in the seccomp profiles. // system calls in the seccomp profiles.
@@ -1323,7 +1341,6 @@ async fn join_namespaces(
write_async(pipe_w, SYNC_DATA, spec_str.as_str()).await?; write_async(pipe_w, SYNC_DATA, spec_str.as_str()).await?;
info!(logger, "wait child received oci spec"); info!(logger, "wait child received oci spec");
read_async(pipe_r).await?; read_async(pipe_r).await?;
info!(logger, "send oci process from parent to child"); info!(logger, "send oci process from parent to child");
@@ -1333,6 +1350,13 @@ async fn join_namespaces(
info!(logger, "wait child received oci process"); info!(logger, "wait child received oci process");
read_async(pipe_r).await?; read_async(pipe_r).await?;
info!(logger, "try to send state from parent to child");
let state_str = serde_json::to_string(st)?;
write_async(pipe_w, SYNC_DATA, state_str.as_str()).await?;
info!(logger, "wait child received oci state");
read_async(pipe_r).await?;
let cm_str = if use_systemd_cgroup { let cm_str = if use_systemd_cgroup {
serde_json::to_string(cm.as_any()?.downcast_ref::<SystemdManager>().unwrap()) serde_json::to_string(cm.as_any()?.downcast_ref::<SystemdManager>().unwrap())
} else { } else {

View File

@@ -153,13 +153,17 @@ fn hook_grpc_to_oci(h: &[grpcHook]) -> Vec<oci::Hook> {
fn hooks_grpc_to_oci(h: &grpc::Hooks) -> oci::Hooks { fn hooks_grpc_to_oci(h: &grpc::Hooks) -> oci::Hooks {
let prestart = hook_grpc_to_oci(h.Prestart.as_ref()); let prestart = hook_grpc_to_oci(h.Prestart.as_ref());
let create_runtime = hook_grpc_to_oci(h.CreateRuntime.as_ref());
let create_container = hook_grpc_to_oci(h.CreateContainer.as_ref());
let start_container = hook_grpc_to_oci(h.StartContainer.as_ref());
let poststart = hook_grpc_to_oci(h.Poststart.as_ref()); let poststart = hook_grpc_to_oci(h.Poststart.as_ref());
let poststop = hook_grpc_to_oci(h.Poststop.as_ref()); let poststop = hook_grpc_to_oci(h.Poststop.as_ref());
oci::Hooks { oci::Hooks {
prestart, prestart,
create_runtime,
create_container,
start_container,
poststart, poststart,
poststop, poststop,
} }
@@ -831,6 +835,45 @@ mod tests {
Timeout: 10, Timeout: 10,
..Default::default() ..Default::default()
}])), }])),
CreateRuntime: protobuf::RepeatedField::from(Vec::from([grpc::Hook {
Path: String::from("createruntimepath"),
Args: protobuf::RepeatedField::from(Vec::from([
String::from("arg1"),
String::from("arg2"),
])),
Env: protobuf::RepeatedField::from(Vec::from([
String::from("env1"),
String::from("env2"),
])),
Timeout: 10,
..Default::default()
}])),
CreateContainer: protobuf::RepeatedField::from(Vec::from([grpc::Hook {
Path: String::from("createcontainerpath"),
Args: protobuf::RepeatedField::from(Vec::from([
String::from("arg1"),
String::from("arg2"),
])),
Env: protobuf::RepeatedField::from(Vec::from([
String::from("env1"),
String::from("env2"),
])),
Timeout: 10,
..Default::default()
}])),
StartContainer: protobuf::RepeatedField::from(Vec::from([grpc::Hook {
Path: String::from("startcontainerpath"),
Args: protobuf::RepeatedField::from(Vec::from([
String::from("arg1"),
String::from("arg2"),
])),
Env: protobuf::RepeatedField::from(Vec::from([
String::from("env1"),
String::from("env2"),
])),
Timeout: 10,
..Default::default()
}])),
..Default::default() ..Default::default()
}, },
result: oci::Hooks { result: oci::Hooks {
@@ -860,6 +903,24 @@ mod tests {
env: Vec::from([String::from("env1"), String::from("env2")]), env: Vec::from([String::from("env1"), String::from("env2")]),
timeout: Some(10), timeout: Some(10),
}]), }]),
create_runtime: Vec::from([oci::Hook {
path: String::from("createruntimepath"),
args: Vec::from([String::from("arg1"), String::from("arg2")]),
env: Vec::from([String::from("env1"), String::from("env2")]),
timeout: Some(10),
}]),
create_container: Vec::from([oci::Hook {
path: String::from("createcontainerpath"),
args: Vec::from([String::from("arg1"), String::from("arg2")]),
env: Vec::from([String::from("env1"), String::from("env2")]),
timeout: Some(10),
}]),
start_container: Vec::from([oci::Hook {
path: String::from("startcontainerpath"),
args: Vec::from([String::from("arg1"), String::from("arg2")]),
env: Vec::from([String::from("env1"), String::from("env2")]),
timeout: Some(10),
}]),
}, },
}, },
TestData { TestData {
@@ -892,6 +953,45 @@ mod tests {
Timeout: 10, Timeout: 10,
..Default::default() ..Default::default()
}])), }])),
CreateRuntime: protobuf::RepeatedField::from(Vec::from([grpc::Hook {
Path: String::from("createruntimepath"),
Args: protobuf::RepeatedField::from(Vec::from([
String::from("arg1"),
String::from("arg2"),
])),
Env: protobuf::RepeatedField::from(Vec::from([
String::from("env1"),
String::from("env2"),
])),
Timeout: 10,
..Default::default()
}])),
CreateContainer: protobuf::RepeatedField::from(Vec::from([grpc::Hook {
Path: String::from("createcontainerpath"),
Args: protobuf::RepeatedField::from(Vec::from([
String::from("arg1"),
String::from("arg2"),
])),
Env: protobuf::RepeatedField::from(Vec::from([
String::from("env1"),
String::from("env2"),
])),
Timeout: 10,
..Default::default()
}])),
StartContainer: protobuf::RepeatedField::from(Vec::from([grpc::Hook {
Path: String::from("startcontainerpath"),
Args: protobuf::RepeatedField::from(Vec::from([
String::from("arg1"),
String::from("arg2"),
])),
Env: protobuf::RepeatedField::from(Vec::from([
String::from("env1"),
String::from("env2"),
])),
Timeout: 10,
..Default::default()
}])),
..Default::default() ..Default::default()
}, },
result: oci::Hooks { result: oci::Hooks {
@@ -908,6 +1008,24 @@ mod tests {
env: Vec::from([String::from("env1"), String::from("env2")]), env: Vec::from([String::from("env1"), String::from("env2")]),
timeout: Some(10), timeout: Some(10),
}]), }]),
create_runtime: Vec::from([oci::Hook {
path: String::from("createruntimepath"),
args: Vec::from([String::from("arg1"), String::from("arg2")]),
env: Vec::from([String::from("env1"), String::from("env2")]),
timeout: Some(10),
}]),
create_container: Vec::from([oci::Hook {
path: String::from("createcontainerpath"),
args: Vec::from([String::from("arg1"), String::from("arg2")]),
env: Vec::from([String::from("env1"), String::from("env2")]),
timeout: Some(10),
}]),
start_container: Vec::from([oci::Hook {
path: String::from("startcontainerpath"),
args: Vec::from([String::from("arg1"), String::from("arg2")]),
env: Vec::from([String::from("env1"), String::from("env2")]),
timeout: Some(10),
}]),
}, },
}, },
]; ];

View File

@@ -50,6 +50,8 @@ pub struct InstanceInfo {
pub vmm_version: String, pub vmm_version: String,
/// The pid of the current VMM process. /// The pid of the current VMM process.
pub pid: u32, pub pid: u32,
/// The tid of the current VMM master thread.
pub master_tid: u32,
/// The state of async actions. /// The state of async actions.
pub async_state: AsyncState, pub async_state: AsyncState,
/// List of tids of vcpu threads (vcpu index, tid) /// List of tids of vcpu threads (vcpu index, tid)
@@ -66,6 +68,7 @@ impl InstanceInfo {
state: InstanceState::Uninitialized, state: InstanceState::Uninitialized,
vmm_version, vmm_version,
pid: std::process::id(), pid: std::process::id(),
master_tid: 0,
async_state: AsyncState::Uninitialized, async_state: AsyncState::Uninitialized,
tids: Vec::new(), tids: Vec::new(),
last_instance_downtime: 0, last_instance_downtime: 0,
@@ -80,6 +83,7 @@ impl Default for InstanceInfo {
state: InstanceState::Uninitialized, state: InstanceState::Uninitialized,
vmm_version: env!("CARGO_PKG_VERSION").to_string(), vmm_version: env!("CARGO_PKG_VERSION").to_string(),
pid: std::process::id(), pid: std::process::id(),
master_tid: 0,
async_state: AsyncState::Uninitialized, async_state: AsyncState::Uninitialized,
tids: Vec::new(), tids: Vec::new(),
last_instance_downtime: 0, last_instance_downtime: 0,

View File

@@ -193,6 +193,12 @@ pub struct Hooks {
#[serde(default, skip_serializing_if = "Vec::is_empty")] #[serde(default, skip_serializing_if = "Vec::is_empty")]
pub prestart: Vec<Hook>, pub prestart: Vec<Hook>,
#[serde(default, skip_serializing_if = "Vec::is_empty")] #[serde(default, skip_serializing_if = "Vec::is_empty")]
pub create_runtime: Vec<Hook>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub create_container: Vec<Hook>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub start_container: Vec<Hook>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub poststart: Vec<Hook>, pub poststart: Vec<Hook>,
#[serde(default, skip_serializing_if = "Vec::is_empty")] #[serde(default, skip_serializing_if = "Vec::is_empty")]
pub poststop: Vec<Hook>, pub poststop: Vec<Hook>,
@@ -1401,6 +1407,7 @@ mod tests {
env: vec![], env: vec![],
timeout: None, timeout: None,
}], }],
..Default::default()
}), }),
annotations: [ annotations: [
("com.example.key1".to_string(), "value1".to_string()), ("com.example.key1".to_string(), "value1".to_string()),

View File

@@ -166,6 +166,15 @@ message Hooks {
// Poststop is a list of hooks to be run after the container process exits. // Poststop is a list of hooks to be run after the container process exits.
repeated Hook Poststop = 3 [(gogoproto.nullable) = false]; repeated Hook Poststop = 3 [(gogoproto.nullable) = false];
// Createruntime is a list of hooks to be run during the creation of runtime(sandbox).
repeated Hook CreateRuntime = 4 [(gogoproto.nullable) = false];
// CreateContainer is a list of hooks to be run after VM is started, and before container is created.
repeated Hook CreateContainer = 5 [(gogoproto.nullable) = false];
// StartContainer is a list of hooks to be run after container is created, but before it is started.
repeated Hook StartContainer = 6 [(gogoproto.nullable) = false];
} }
message Hook { message Hook {

View File

@@ -294,6 +294,9 @@ impl From<oci::Hooks> for crate::oci::Hooks {
fn from(from: Hooks) -> Self { fn from(from: Hooks) -> Self {
crate::oci::Hooks { crate::oci::Hooks {
Prestart: from_vec(from.prestart), Prestart: from_vec(from.prestart),
CreateRuntime: from_vec(from.create_runtime),
CreateContainer: from_vec(from.create_container),
StartContainer: from_vec(from.start_container),
Poststart: from_vec(from.poststart), Poststart: from_vec(from.poststart),
Poststop: from_vec(from.poststop), Poststop: from_vec(from.poststop),
unknown_fields: Default::default(), unknown_fields: Default::default(),
@@ -970,20 +973,34 @@ impl From<crate::oci::Hook> for oci::Hook {
impl From<crate::oci::Hooks> for oci::Hooks { impl From<crate::oci::Hooks> for oci::Hooks {
fn from(mut from: crate::oci::Hooks) -> Self { fn from(mut from: crate::oci::Hooks) -> Self {
let mut prestart = Vec::new(); let prestart = from.take_Prestart().into_iter().map(|i| i.into()).collect();
for hook in from.take_Prestart().to_vec() { let create_runtime = from
prestart.push(hook.into()) .take_CreateRuntime()
} .into_iter()
let mut poststart = Vec::new(); .map(|i| i.into())
for hook in from.take_Poststart().to_vec() { .collect();
poststart.push(hook.into()); let create_container = from
} .take_CreateContainer()
let mut poststop = Vec::new(); .into_iter()
for hook in from.take_Poststop().to_vec() { .map(|i| i.into())
poststop.push(hook.into()); .collect();
} let start_container = from
.take_StartContainer()
.into_iter()
.map(|i| i.into())
.collect();
let poststart = from
.take_Poststart()
.into_iter()
.map(|i| i.into())
.collect();
let poststop = from.take_Poststop().into_iter().map(|i| i.into()).collect();
oci::Hooks { oci::Hooks {
prestart, prestart,
create_runtime,
create_container,
start_container,
poststart, poststart,
poststop, poststop,
} }

View File

@@ -1768,6 +1768,8 @@ dependencies = [
"bitflags", "bitflags",
"cfg-if 1.0.0", "cfg-if 1.0.0",
"libc", "libc",
"memoffset 0.6.5",
"pin-utils",
] ]
[[package]] [[package]]
@@ -2455,12 +2457,15 @@ dependencies = [
"hyper", "hyper",
"hyperlocal", "hyperlocal",
"hypervisor", "hypervisor",
"kata-sys-util",
"kata-types", "kata-types",
"lazy_static", "lazy_static",
"linux_container", "linux_container",
"logging", "logging",
"nix 0.25.1",
"oci", "oci",
"persist", "persist",
"serde_json",
"shim-interface", "shim-interface",
"slog", "slog",
"slog-scope", "slog-scope",

View File

@@ -124,7 +124,6 @@ pub struct CreateContainerRequest {
pub devices: Vec<Device>, pub devices: Vec<Device>,
pub storages: Vec<Storage>, pub storages: Vec<Storage>,
pub oci: Option<oci::Spec>, pub oci: Option<oci::Spec>,
pub guest_hooks: Option<oci::Hooks>,
pub sandbox_pidns: bool, pub sandbox_pidns: bool,
pub rootfs_mounts: Vec<oci::Mount>, pub rootfs_mounts: Vec<oci::Mount>,
} }

View File

@@ -472,6 +472,10 @@ impl CloudHypervisorInner {
Ok(Vec::<u32>::new()) Ok(Vec::<u32>::new())
} }
pub(crate) async fn get_vmm_master_tid(&self) -> Result<u32> {
todo!()
}
pub(crate) async fn check(&self) -> Result<()> { pub(crate) async fn check(&self) -> Result<()> {
Ok(()) Ok(())
} }

View File

@@ -118,6 +118,11 @@ impl Hypervisor for CloudHypervisor {
inner.get_pids().await inner.get_pids().await
} }
async fn get_vmm_master_tid(&self) -> Result<u32> {
let inner = self.inner.read().await;
inner.get_vmm_master_tid().await
}
async fn check(&self) -> Result<()> { async fn check(&self) -> Result<()> {
let inner = self.inner.read().await; let inner = self.inner.read().await;
inner.check().await inner.check().await

View File

@@ -127,6 +127,11 @@ impl DragonballInner {
Ok(Vec::from_iter(pids.into_iter())) Ok(Vec::from_iter(pids.into_iter()))
} }
pub(crate) async fn get_vmm_master_tid(&self) -> Result<u32> {
let master_tid = self.vmm_instance.get_vmm_master_tid();
Ok(master_tid)
}
pub(crate) async fn check(&self) -> Result<()> { pub(crate) async fn check(&self) -> Result<()> {
Ok(()) Ok(())
} }

View File

@@ -117,6 +117,11 @@ impl Hypervisor for Dragonball {
inner.get_pids().await inner.get_pids().await
} }
async fn get_vmm_master_tid(&self) -> Result<u32> {
let inner = self.inner.read().await;
inner.get_vmm_master_tid().await
}
async fn check(&self) -> Result<()> { async fn check(&self) -> Result<()> {
let inner = self.inner.read().await; let inner = self.inner.read().await;
inner.check().await inner.check().await

View File

@@ -75,6 +75,12 @@ impl VmmInstance {
share_info_lock.write().unwrap().id = String::from(id); share_info_lock.write().unwrap().id = String::from(id);
} }
pub fn get_vmm_master_tid(&self) -> u32 {
let info = self.vmm_shared_info.clone();
let result = info.read().unwrap().master_tid;
result
}
pub fn get_vcpu_tids(&self) -> Vec<(u8, u32)> { pub fn get_vcpu_tids(&self) -> Vec<(u8, u32)> {
let info = self.vmm_shared_info.clone(); let info = self.vmm_shared_info.clone();
let result = info.read().unwrap().tids.clone(); let result = info.read().unwrap().tids.clone();
@@ -103,6 +109,7 @@ impl VmmInstance {
Some(kvm.into_raw_fd()), Some(kvm.into_raw_fd()),
) )
.expect("Failed to start vmm"); .expect("Failed to start vmm");
let vmm_shared_info = self.get_shared_info();
self.vmm_thread = Some( self.vmm_thread = Some(
thread::Builder::new() thread::Builder::new()
@@ -110,6 +117,9 @@ impl VmmInstance {
.spawn(move || { .spawn(move || {
|| -> Result<i32> { || -> Result<i32> {
debug!(sl!(), "run vmm thread start"); debug!(sl!(), "run vmm thread start");
let cur_tid = nix::unistd::gettid().as_raw() as u32;
vmm_shared_info.write().unwrap().master_tid = cur_tid;
if let Some(netns_path) = netns { if let Some(netns_path) = netns {
info!(sl!(), "set netns for vmm master {}", &netns_path); info!(sl!(), "set netns for vmm master {}", &netns_path);
let netns_fd = File::open(&netns_path) let netns_fd = File::open(&netns_path)

View File

@@ -87,6 +87,7 @@ pub trait Hypervisor: Send + Sync {
async fn hypervisor_config(&self) -> HypervisorConfig; async fn hypervisor_config(&self) -> HypervisorConfig;
async fn get_thread_ids(&self) -> Result<VcpuThreadIds>; async fn get_thread_ids(&self) -> Result<VcpuThreadIds>;
async fn get_pids(&self) -> Result<Vec<u32>>; async fn get_pids(&self) -> Result<Vec<u32>>;
async fn get_vmm_master_tid(&self) -> Result<u32>;
async fn cleanup(&self) -> Result<()>; async fn cleanup(&self) -> Result<()>;
async fn check(&self) -> Result<()>; async fn check(&self) -> Result<()>;
async fn get_jailer_root(&self) -> Result<String>; async fn get_jailer_root(&self) -> Result<String>;

View File

@@ -89,6 +89,11 @@ impl QemuInner {
todo!() todo!()
} }
pub(crate) async fn get_vmm_master_tid(&self) -> Result<u32> {
info!(sl!(), "QemuInner::get_vmm_master_tid()");
todo!()
}
pub(crate) async fn cleanup(&self) -> Result<()> { pub(crate) async fn cleanup(&self) -> Result<()> {
info!(sl!(), "QemuInner::cleanup()"); info!(sl!(), "QemuInner::cleanup()");
todo!() todo!()

View File

@@ -103,6 +103,11 @@ impl Hypervisor for Qemu {
inner.get_thread_ids().await inner.get_thread_ids().await
} }
async fn get_vmm_master_tid(&self) -> Result<u32> {
let inner = self.inner.read().await;
inner.get_vmm_master_tid().await
}
async fn cleanup(&self) -> Result<()> { async fn cleanup(&self) -> Result<()> {
let inner = self.inner.read().await; let inner = self.inner.read().await;
inner.cleanup().await inner.cleanup().await

View File

@@ -5,6 +5,7 @@
// //
mod endpoint; mod endpoint;
pub use endpoint::endpoint_persist::EndpointState;
pub use endpoint::Endpoint; pub use endpoint::Endpoint;
mod network_entity; mod network_entity;
mod network_info; mod network_info;
@@ -17,7 +18,7 @@ use network_with_netns::NetworkWithNetns;
mod network_pair; mod network_pair;
use network_pair::NetworkPair; use network_pair::NetworkPair;
mod utils; mod utils;
pub use endpoint::endpoint_persist::EndpointState; pub use utils::netns::NetnsGuard;
use std::sync::Arc; use std::sync::Arc;

View File

@@ -10,12 +10,12 @@ use anyhow::{Context, Result};
use nix::sched::{setns, CloneFlags}; use nix::sched::{setns, CloneFlags};
use nix::unistd::{getpid, gettid}; use nix::unistd::{getpid, gettid};
pub(crate) struct NetnsGuard { pub struct NetnsGuard {
old_netns: Option<File>, old_netns: Option<File>,
} }
impl NetnsGuard { impl NetnsGuard {
pub(crate) fn new(new_netns_path: &str) -> Result<Self> { pub fn new(new_netns_path: &str) -> Result<Self> {
let old_netns = if !new_netns_path.is_empty() { let old_netns = if !new_netns_path.is_empty() {
let current_netns_path = format!("/proc/{}/task/{}/ns/{}", getpid(), gettid(), "net"); let current_netns_path = format!("/proc/{}/task/{}/ns/{}", getpid(), gettid(), "net");
let old_netns = File::open(&current_netns_path) let old_netns = File::open(&current_netns_path)

View File

@@ -13,9 +13,12 @@ slog-scope = "4.4.0"
tokio = { version = "1.8.0", features = ["rt-multi-thread"] } tokio = { version = "1.8.0", features = ["rt-multi-thread"] }
hyper = { version = "0.14.20", features = ["stream", "server", "http1"] } hyper = { version = "0.14.20", features = ["stream", "server", "http1"] }
hyperlocal = "0.8" hyperlocal = "0.8"
serde_json = "1.0.88"
nix = "0.25.0"
common = { path = "./common" } common = { path = "./common" }
kata-types = { path = "../../../libs/kata-types" } kata-types = { path = "../../../libs/kata-types" }
kata-sys-util = { path = "../../../libs/kata-sys-util" }
logging = { path = "../../../libs/logging"} logging = { path = "../../../libs/logging"}
oci = { path = "../../../libs/oci" } oci = { path = "../../../libs/oci" }
shim-interface = { path = "../../../libs/shim-interface" } shim-interface = { path = "../../../libs/shim-interface" }

View File

@@ -26,3 +26,4 @@ agent = { path = "../../agent" }
kata-sys-util = { path = "../../../../libs/kata-sys-util" } kata-sys-util = { path = "../../../../libs/kata-sys-util" }
kata-types = { path = "../../../../libs/kata-types" } kata-types = { path = "../../../../libs/kata-types" }
oci = { path = "../../../../libs/oci" } oci = { path = "../../../../libs/oci" }

View File

@@ -9,7 +9,13 @@ use async_trait::async_trait;
#[async_trait] #[async_trait]
pub trait Sandbox: Send + Sync { pub trait Sandbox: Send + Sync {
async fn start(&self, netns: Option<String>, dns: Vec<String>) -> Result<()>; async fn start(
&self,
netns: Option<String>,
dns: Vec<String>,
spec: &oci::Spec,
state: &oci::State,
) -> Result<()>;
async fn stop(&self) -> Result<()>; async fn stop(&self) -> Result<()>;
async fn cleanup(&self) -> Result<()>; async fn cleanup(&self) -> Result<()>;
async fn shutdown(&self) -> Result<()>; async fn shutdown(&self) -> Result<()>;

View File

@@ -18,6 +18,7 @@ use hypervisor::Param;
use kata_types::{ use kata_types::{
annotations::Annotation, config::default::DEFAULT_GUEST_DNS_FILE, config::TomlConfig, annotations::Annotation, config::default::DEFAULT_GUEST_DNS_FILE, config::TomlConfig,
}; };
#[cfg(feature = "linux")] #[cfg(feature = "linux")]
use linux_container::LinuxContainer; use linux_container::LinuxContainer;
use persist::sandbox_persist::Persist; use persist::sandbox_persist::Persist;
@@ -50,6 +51,8 @@ impl RuntimeHandlerManagerInner {
async fn init_runtime_handler( async fn init_runtime_handler(
&mut self, &mut self,
spec: &oci::Spec,
state: &oci::State,
netns: Option<String>, netns: Option<String>,
dns: Vec<String>, dns: Vec<String>,
config: Arc<TomlConfig>, config: Arc<TomlConfig>,
@@ -74,14 +77,19 @@ impl RuntimeHandlerManagerInner {
// start sandbox // start sandbox
runtime_instance runtime_instance
.sandbox .sandbox
.start(netns, dns) .start(netns, dns, spec, state)
.await .await
.context("start sandbox")?; .context("start sandbox")?;
self.runtime_instance = Some(Arc::new(runtime_instance)); self.runtime_instance = Some(Arc::new(runtime_instance));
Ok(()) Ok(())
} }
async fn try_init(&mut self, spec: &oci::Spec, options: &Option<Vec<u8>>) -> Result<()> { async fn try_init(
&mut self,
spec: &oci::Spec,
state: &oci::State,
options: &Option<Vec<u8>>,
) -> Result<()> {
// return if runtime instance has init // return if runtime instance has init
if self.runtime_instance.is_some() { if self.runtime_instance.is_some() {
return Ok(()); return Ok(());
@@ -121,7 +129,7 @@ impl RuntimeHandlerManagerInner {
} }
let config = load_config(spec, options).context("load config")?; let config = load_config(spec, options).context("load config")?;
self.init_runtime_handler(netns, dns, Arc::new(config)) self.init_runtime_handler(spec, state, netns, dns, Arc::new(config))
.await .await
.context("init runtime handler")?; .context("init runtime handler")?;
@@ -207,10 +215,11 @@ impl RuntimeHandlerManager {
async fn try_init_runtime_instance( async fn try_init_runtime_instance(
&self, &self,
spec: &oci::Spec, spec: &oci::Spec,
state: &oci::State,
options: &Option<Vec<u8>>, options: &Option<Vec<u8>>,
) -> Result<()> { ) -> Result<()> {
let mut inner = self.inner.write().await; let mut inner = self.inner.write().await;
inner.try_init(spec, options).await inner.try_init(spec, state, options).await
} }
pub async fn handler_message(&self, req: Request) -> Result<Response> { pub async fn handler_message(&self, req: Request) -> Result<Response> {
@@ -222,8 +231,16 @@ impl RuntimeHandlerManager {
oci::OCI_SPEC_CONFIG_FILE_NAME oci::OCI_SPEC_CONFIG_FILE_NAME
); );
let spec = oci::Spec::load(&bundler_path).context("load spec")?; let spec = oci::Spec::load(&bundler_path).context("load spec")?;
let state = oci::State {
version: spec.version.clone(),
id: container_config.container_id.to_string(),
status: oci::ContainerState::Creating,
pid: 0,
bundle: bundler_path,
annotations: spec.annotations.clone(),
};
self.try_init_runtime_instance(&spec, &container_config.options) self.try_init_runtime_instance(&spec, &state, &container_config.options)
.await .await
.context("try init runtime instance")?; .context("try init runtime instance")?;
let instance = self let instance = self

View File

@@ -37,6 +37,7 @@ pub struct Container {
pid: u32, pid: u32,
pub container_id: ContainerID, pub container_id: ContainerID,
config: ContainerConfig, config: ContainerConfig,
spec: oci::Spec,
inner: Arc<RwLock<ContainerInner>>, inner: Arc<RwLock<ContainerInner>>,
agent: Arc<dyn Agent>, agent: Arc<dyn Agent>,
resource_manager: Arc<ResourceManager>, resource_manager: Arc<ResourceManager>,
@@ -47,6 +48,7 @@ impl Container {
pub fn new( pub fn new(
pid: u32, pid: u32,
config: ContainerConfig, config: ContainerConfig,
spec: oci::Spec,
agent: Arc<dyn Agent>, agent: Arc<dyn Agent>,
resource_manager: Arc<ResourceManager>, resource_manager: Arc<ResourceManager>,
) -> Result<Self> { ) -> Result<Self> {
@@ -67,6 +69,7 @@ impl Container {
pid, pid,
container_id, container_id,
config, config,
spec,
inner: Arc::new(RwLock::new(ContainerInner::new( inner: Arc::new(RwLock::new(ContainerInner::new(
agent.clone(), agent.clone(),
init_process, init_process,
@@ -382,11 +385,31 @@ impl Container {
.context("agent update container")?; .context("agent update container")?;
Ok(()) Ok(())
} }
pub async fn config(&self) -> ContainerConfig {
self.config.clone()
}
pub async fn spec(&self) -> oci::Spec {
self.spec.clone()
}
} }
fn amend_spec(spec: &mut oci::Spec, disable_guest_seccomp: bool) -> Result<()> { fn amend_spec(spec: &mut oci::Spec, disable_guest_seccomp: bool) -> Result<()> {
// hook should be done on host // Only the StartContainer hook needs to be reserved for execution in the guest
spec.hooks = None; let start_container_hooks = match spec.hooks.as_ref() {
Some(hooks) => hooks.start_container.clone(),
None => Vec::new(),
};
spec.hooks = if start_container_hooks.is_empty() {
None
} else {
Some(oci::Hooks {
start_container: start_container_hooks,
..Default::default()
})
};
// special process K8s ephemeral volumes. // special process K8s ephemeral volumes.
update_ephemeral_storage_type(spec); update_ephemeral_storage_type(spec);

View File

@@ -5,11 +5,10 @@
// //
use anyhow::{anyhow, Context, Result}; use anyhow::{anyhow, Context, Result};
use async_trait::async_trait;
use std::{collections::HashMap, sync::Arc}; use std::{collections::HashMap, sync::Arc};
use agent::Agent; use agent::Agent;
use async_trait::async_trait;
use common::{ use common::{
error::Error, error::Error,
types::{ types::{
@@ -19,10 +18,14 @@ use common::{
}, },
ContainerManager, ContainerManager,
}; };
use hypervisor::Hypervisor;
use oci::Process as OCIProcess; use oci::Process as OCIProcess;
use resource::network::NetnsGuard;
use resource::ResourceManager; use resource::ResourceManager;
use tokio::sync::RwLock; use tokio::sync::RwLock;
use kata_sys_util::hooks::HookStates;
use super::{logger_with_process, Container}; use super::{logger_with_process, Container};
pub struct VirtContainerManager { pub struct VirtContainerManager {
@@ -31,6 +34,7 @@ pub struct VirtContainerManager {
containers: Arc<RwLock<HashMap<String, Container>>>, containers: Arc<RwLock<HashMap<String, Container>>>,
resource_manager: Arc<ResourceManager>, resource_manager: Arc<ResourceManager>,
agent: Arc<dyn Agent>, agent: Arc<dyn Agent>,
hypervisor: Arc<dyn Hypervisor>,
} }
impl VirtContainerManager { impl VirtContainerManager {
@@ -38,6 +42,7 @@ impl VirtContainerManager {
sid: &str, sid: &str,
pid: u32, pid: u32,
agent: Arc<dyn Agent>, agent: Arc<dyn Agent>,
hypervisor: Arc<dyn Hypervisor>,
resource_manager: Arc<ResourceManager>, resource_manager: Arc<ResourceManager>,
) -> Self { ) -> Self {
Self { Self {
@@ -46,6 +51,7 @@ impl VirtContainerManager {
containers: Default::default(), containers: Default::default(),
resource_manager, resource_manager,
agent, agent,
hypervisor,
} }
} }
} }
@@ -55,12 +61,37 @@ impl ContainerManager for VirtContainerManager {
async fn create_container(&self, config: ContainerConfig, spec: oci::Spec) -> Result<PID> { async fn create_container(&self, config: ContainerConfig, spec: oci::Spec) -> Result<PID> {
let container = Container::new( let container = Container::new(
self.pid, self.pid,
config, config.clone(),
spec.clone(),
self.agent.clone(), self.agent.clone(),
self.resource_manager.clone(), self.resource_manager.clone(),
) )
.context("new container")?; .context("new container")?;
// CreateContainer Hooks:
// * should be run in vmm namespace (hook path in runtime namespace)
// * should be run after the vm is started, before container is created, and after CreateRuntime Hooks
// * spec details: https://github.com/opencontainers/runtime-spec/blob/c1662686cff159595277b79322d0272f5182941b/config.md#createcontainer-hooks
let vmm_master_tid = self.hypervisor.get_vmm_master_tid().await?;
let vmm_netns_path = format!("/proc/{}/task/{}/ns/{}", self.pid, vmm_master_tid, "net");
let state = oci::State {
version: spec.version.clone(),
id: config.container_id.clone(),
status: oci::ContainerState::Creating,
pid: vmm_master_tid as i32,
bundle: config.bundle.clone(),
annotations: spec.annotations.clone(),
};
// new scope, CreateContainer hooks in which will execute in a new network namespace
{
let _netns_guard = NetnsGuard::new(&vmm_netns_path).context("vmm netns guard")?;
if let Some(hooks) = spec.hooks.as_ref() {
let mut create_container_hook_states = HookStates::new();
create_container_hook_states.execute_hooks(&hooks.create_container, Some(state))?;
}
}
let mut containers = self.containers.write().await; let mut containers = self.containers.write().await;
container.create(spec).await.context("create")?; container.create(spec).await.context("create")?;
containers.insert(container.container_id.to_string(), container); containers.insert(container.container_id.to_string(), container);
@@ -87,6 +118,26 @@ impl ContainerManager for VirtContainerManager {
let c = containers let c = containers
.remove(container_id) .remove(container_id)
.ok_or_else(|| Error::ContainerNotFound(container_id.to_string()))?; .ok_or_else(|| Error::ContainerNotFound(container_id.to_string()))?;
// Poststop Hooks:
// * should be run in runtime namespace
// * should be run after the container is deleted but before delete operation returns
// * spec details: https://github.com/opencontainers/runtime-spec/blob/c1662686cff159595277b79322d0272f5182941b/config.md#poststop
let c_spec = c.spec().await;
let vmm_master_tid = self.hypervisor.get_vmm_master_tid().await?;
let state = oci::State {
version: c_spec.version.clone(),
id: c.container_id.to_string(),
status: oci::ContainerState::Stopped,
pid: vmm_master_tid as i32,
bundle: c.config().await.bundle,
annotations: c_spec.annotations.clone(),
};
if let Some(hooks) = c_spec.hooks.as_ref() {
let mut poststop_hook_states = HookStates::new();
poststop_hook_states.execute_hooks(&hooks.poststop, Some(state))?;
}
c.state_process(process).await.context("state process") c.state_process(process).await.context("state process")
} }
ProcessType::Exec => { ProcessType::Exec => {
@@ -190,6 +241,26 @@ impl ContainerManager for VirtContainerManager {
.get(container_id) .get(container_id)
.ok_or_else(|| Error::ContainerNotFound(container_id.clone()))?; .ok_or_else(|| Error::ContainerNotFound(container_id.clone()))?;
c.start(process).await.context("start")?; c.start(process).await.context("start")?;
// Poststart Hooks:
// * should be run in runtime namespace
// * should be run after user-specific command is executed but before start operation returns
// * spec details: https://github.com/opencontainers/runtime-spec/blob/c1662686cff159595277b79322d0272f5182941b/config.md#poststart
let c_spec = c.spec().await;
let vmm_master_tid = self.hypervisor.get_vmm_master_tid().await?;
let state = oci::State {
version: c_spec.version.clone(),
id: c.container_id.to_string(),
status: oci::ContainerState::Running,
pid: vmm_master_tid as i32,
bundle: c.config().await.bundle,
annotations: c_spec.annotations.clone(),
};
if let Some(hooks) = c_spec.hooks.as_ref() {
let mut poststart_hook_states = HookStates::new();
poststart_hook_states.execute_hooks(&hooks.poststart, Some(state))?;
}
Ok(PID { pid: self.pid }) Ok(PID { pid: self.pid })
} }

View File

@@ -86,13 +86,18 @@ impl RuntimeHandler for VirtContainer {
sid, sid,
msg_sender, msg_sender,
agent.clone(), agent.clone(),
hypervisor, hypervisor.clone(),
resource_manager.clone(), resource_manager.clone(),
) )
.await .await
.context("new virt sandbox")?; .context("new virt sandbox")?;
let container_manager = let container_manager = container_manager::VirtContainerManager::new(
container_manager::VirtContainerManager::new(sid, pid, agent, resource_manager); sid,
pid,
agent,
hypervisor,
resource_manager,
);
Ok(RuntimeInstance { Ok(RuntimeInstance {
sandbox: Arc::new(sandbox), sandbox: Arc::new(sandbox),
container_manager: Arc::new(container_manager), container_manager: Arc::new(container_manager),

View File

@@ -17,6 +17,7 @@ use common::{
}; };
use containerd_shim_protos::events::task::TaskOOM; use containerd_shim_protos::events::task::TaskOOM;
use hypervisor::{dragonball::Dragonball, Hypervisor, HYPERVISOR_DRAGONBALL}; use hypervisor::{dragonball::Dragonball, Hypervisor, HYPERVISOR_DRAGONBALL};
use kata_sys_util::hooks::HookStates;
use kata_types::config::{ use kata_types::config::{
default::{DEFAULT_AGENT_LOG_PORT, DEFAULT_AGENT_VSOCK_PORT}, default::{DEFAULT_AGENT_LOG_PORT, DEFAULT_AGENT_VSOCK_PORT},
TomlConfig, TomlConfig,
@@ -117,11 +118,50 @@ impl VirtSandbox {
Ok(resource_configs) Ok(resource_configs)
} }
async fn execute_oci_hook_functions(
&self,
prestart_hooks: &[oci::Hook],
create_runtime_hooks: &[oci::Hook],
state: &oci::State,
) -> Result<()> {
let mut st = state.clone();
// for dragonball, we use vmm_master_tid
let vmm_pid = self
.hypervisor
.get_vmm_master_tid()
.await
.context("get vmm master tid")?;
st.pid = vmm_pid as i32;
// Prestart Hooks [DEPRECATED in newest oci spec]:
// * should be run in runtime namespace
// * should be run after vm is started, but before container is created
// if Prestart Hook and CreateRuntime Hook are both supported
// * spec details: https://github.com/opencontainers/runtime-spec/blob/c1662686cff159595277b79322d0272f5182941b/config.md#prestart
let mut prestart_hook_states = HookStates::new();
prestart_hook_states.execute_hooks(prestart_hooks, Some(st.clone()))?;
// CreateRuntime Hooks:
// * should be run in runtime namespace
// * should be run when creating the runtime
// * spec details: https://github.com/opencontainers/runtime-spec/blob/c1662686cff159595277b79322d0272f5182941b/config.md#createruntime-hooks
let mut create_runtime_hook_states = HookStates::new();
create_runtime_hook_states.execute_hooks(create_runtime_hooks, Some(st.clone()))?;
Ok(())
}
} }
#[async_trait] #[async_trait]
impl Sandbox for VirtSandbox { impl Sandbox for VirtSandbox {
async fn start(&self, netns: Option<String>, dns: Vec<String>) -> Result<()> { async fn start(
&self,
netns: Option<String>,
dns: Vec<String>,
spec: &oci::Spec,
state: &oci::State,
) -> Result<()> {
let id = &self.sid; let id = &self.sid;
// if sandbox running, return // if sandbox running, return
@@ -149,6 +189,17 @@ impl Sandbox for VirtSandbox {
self.hypervisor.start_vm(10_000).await.context("start vm")?; self.hypervisor.start_vm(10_000).await.context("start vm")?;
info!(sl!(), "start vm"); info!(sl!(), "start vm");
// execute pre-start hook functions, including Prestart Hooks and CreateRuntime Hooks
let (prestart_hooks, create_runtime_hooks) = match spec.hooks.as_ref() {
Some(hooks) => (hooks.prestart.clone(), hooks.create_runtime.clone()),
None => (Vec::new(), Vec::new()),
};
self.execute_oci_hook_functions(&prestart_hooks, &create_runtime_hooks, state)
.await?;
// TODO: if prestart_hooks is not empty, rescan the network endpoints(rely on hotplug endpoints).
// see: https://github.com/kata-containers/kata-containers/issues/6378
// connect agent // connect agent
// set agent socket // set agent socket
let address = self let address = self