Merge pull request #5788 from openanolis/runtime-rs-ocihook

runtime-rs: add oci hook support
This commit is contained in:
Chao Wu
2023-03-03 01:06:21 +08:00
committed by GitHub
28 changed files with 500 additions and 35 deletions

View File

@@ -124,7 +124,6 @@ pub struct CreateContainerRequest {
pub devices: Vec<Device>,
pub storages: Vec<Storage>,
pub oci: Option<oci::Spec>,
pub guest_hooks: Option<oci::Hooks>,
pub sandbox_pidns: bool,
pub rootfs_mounts: Vec<oci::Mount>,
}

View File

@@ -472,6 +472,10 @@ impl CloudHypervisorInner {
Ok(Vec::<u32>::new())
}
pub(crate) async fn get_vmm_master_tid(&self) -> Result<u32> {
todo!()
}
pub(crate) async fn check(&self) -> Result<()> {
Ok(())
}

View File

@@ -118,6 +118,11 @@ impl Hypervisor for CloudHypervisor {
inner.get_pids().await
}
async fn get_vmm_master_tid(&self) -> Result<u32> {
let inner = self.inner.read().await;
inner.get_vmm_master_tid().await
}
async fn check(&self) -> Result<()> {
let inner = self.inner.read().await;
inner.check().await

View File

@@ -127,6 +127,11 @@ impl DragonballInner {
Ok(Vec::from_iter(pids.into_iter()))
}
pub(crate) async fn get_vmm_master_tid(&self) -> Result<u32> {
let master_tid = self.vmm_instance.get_vmm_master_tid();
Ok(master_tid)
}
pub(crate) async fn check(&self) -> Result<()> {
Ok(())
}

View File

@@ -117,6 +117,11 @@ impl Hypervisor for Dragonball {
inner.get_pids().await
}
async fn get_vmm_master_tid(&self) -> Result<u32> {
let inner = self.inner.read().await;
inner.get_vmm_master_tid().await
}
async fn check(&self) -> Result<()> {
let inner = self.inner.read().await;
inner.check().await

View File

@@ -75,6 +75,12 @@ impl VmmInstance {
share_info_lock.write().unwrap().id = String::from(id);
}
pub fn get_vmm_master_tid(&self) -> u32 {
let info = self.vmm_shared_info.clone();
let result = info.read().unwrap().master_tid;
result
}
pub fn get_vcpu_tids(&self) -> Vec<(u8, u32)> {
let info = self.vmm_shared_info.clone();
let result = info.read().unwrap().tids.clone();
@@ -103,6 +109,7 @@ impl VmmInstance {
Some(kvm.into_raw_fd()),
)
.expect("Failed to start vmm");
let vmm_shared_info = self.get_shared_info();
self.vmm_thread = Some(
thread::Builder::new()
@@ -110,6 +117,9 @@ impl VmmInstance {
.spawn(move || {
|| -> Result<i32> {
debug!(sl!(), "run vmm thread start");
let cur_tid = nix::unistd::gettid().as_raw() as u32;
vmm_shared_info.write().unwrap().master_tid = cur_tid;
if let Some(netns_path) = netns {
info!(sl!(), "set netns for vmm master {}", &netns_path);
let netns_fd = File::open(&netns_path)

View File

@@ -87,6 +87,7 @@ pub trait Hypervisor: Send + Sync {
async fn hypervisor_config(&self) -> HypervisorConfig;
async fn get_thread_ids(&self) -> Result<VcpuThreadIds>;
async fn get_pids(&self) -> Result<Vec<u32>>;
async fn get_vmm_master_tid(&self) -> Result<u32>;
async fn cleanup(&self) -> Result<()>;
async fn check(&self) -> Result<()>;
async fn get_jailer_root(&self) -> Result<String>;

View File

@@ -89,6 +89,11 @@ impl QemuInner {
todo!()
}
pub(crate) async fn get_vmm_master_tid(&self) -> Result<u32> {
info!(sl!(), "QemuInner::get_vmm_master_tid()");
todo!()
}
pub(crate) async fn cleanup(&self) -> Result<()> {
info!(sl!(), "QemuInner::cleanup()");
todo!()

View File

@@ -103,6 +103,11 @@ impl Hypervisor for Qemu {
inner.get_thread_ids().await
}
async fn get_vmm_master_tid(&self) -> Result<u32> {
let inner = self.inner.read().await;
inner.get_vmm_master_tid().await
}
async fn cleanup(&self) -> Result<()> {
let inner = self.inner.read().await;
inner.cleanup().await

View File

@@ -5,6 +5,7 @@
//
mod endpoint;
pub use endpoint::endpoint_persist::EndpointState;
pub use endpoint::Endpoint;
mod network_entity;
mod network_info;
@@ -17,7 +18,7 @@ use network_with_netns::NetworkWithNetns;
mod network_pair;
use network_pair::NetworkPair;
mod utils;
pub use endpoint::endpoint_persist::EndpointState;
pub use utils::netns::NetnsGuard;
use std::sync::Arc;

View File

@@ -10,12 +10,12 @@ use anyhow::{Context, Result};
use nix::sched::{setns, CloneFlags};
use nix::unistd::{getpid, gettid};
pub(crate) struct NetnsGuard {
pub struct NetnsGuard {
old_netns: Option<File>,
}
impl NetnsGuard {
pub(crate) fn new(new_netns_path: &str) -> Result<Self> {
pub fn new(new_netns_path: &str) -> Result<Self> {
let old_netns = if !new_netns_path.is_empty() {
let current_netns_path = format!("/proc/{}/task/{}/ns/{}", getpid(), gettid(), "net");
let old_netns = File::open(&current_netns_path)

View File

@@ -13,9 +13,12 @@ slog-scope = "4.4.0"
tokio = { version = "1.8.0", features = ["rt-multi-thread"] }
hyper = { version = "0.14.20", features = ["stream", "server", "http1"] }
hyperlocal = "0.8"
serde_json = "1.0.88"
nix = "0.25.0"
common = { path = "./common" }
kata-types = { path = "../../../libs/kata-types" }
kata-sys-util = { path = "../../../libs/kata-sys-util" }
logging = { path = "../../../libs/logging"}
oci = { path = "../../../libs/oci" }
shim-interface = { path = "../../../libs/shim-interface" }

View File

@@ -26,3 +26,4 @@ agent = { path = "../../agent" }
kata-sys-util = { path = "../../../../libs/kata-sys-util" }
kata-types = { path = "../../../../libs/kata-types" }
oci = { path = "../../../../libs/oci" }

View File

@@ -9,7 +9,13 @@ use async_trait::async_trait;
#[async_trait]
pub trait Sandbox: Send + Sync {
async fn start(&self, netns: Option<String>, dns: Vec<String>) -> Result<()>;
async fn start(
&self,
netns: Option<String>,
dns: Vec<String>,
spec: &oci::Spec,
state: &oci::State,
) -> Result<()>;
async fn stop(&self) -> Result<()>;
async fn cleanup(&self) -> Result<()>;
async fn shutdown(&self) -> Result<()>;

View File

@@ -18,6 +18,7 @@ use hypervisor::Param;
use kata_types::{
annotations::Annotation, config::default::DEFAULT_GUEST_DNS_FILE, config::TomlConfig,
};
#[cfg(feature = "linux")]
use linux_container::LinuxContainer;
use persist::sandbox_persist::Persist;
@@ -50,6 +51,8 @@ impl RuntimeHandlerManagerInner {
async fn init_runtime_handler(
&mut self,
spec: &oci::Spec,
state: &oci::State,
netns: Option<String>,
dns: Vec<String>,
config: Arc<TomlConfig>,
@@ -74,14 +77,19 @@ impl RuntimeHandlerManagerInner {
// start sandbox
runtime_instance
.sandbox
.start(netns, dns)
.start(netns, dns, spec, state)
.await
.context("start sandbox")?;
self.runtime_instance = Some(Arc::new(runtime_instance));
Ok(())
}
async fn try_init(&mut self, spec: &oci::Spec, options: &Option<Vec<u8>>) -> Result<()> {
async fn try_init(
&mut self,
spec: &oci::Spec,
state: &oci::State,
options: &Option<Vec<u8>>,
) -> Result<()> {
// return if runtime instance has init
if self.runtime_instance.is_some() {
return Ok(());
@@ -121,7 +129,7 @@ impl RuntimeHandlerManagerInner {
}
let config = load_config(spec, options).context("load config")?;
self.init_runtime_handler(netns, dns, Arc::new(config))
self.init_runtime_handler(spec, state, netns, dns, Arc::new(config))
.await
.context("init runtime handler")?;
@@ -207,10 +215,11 @@ impl RuntimeHandlerManager {
async fn try_init_runtime_instance(
&self,
spec: &oci::Spec,
state: &oci::State,
options: &Option<Vec<u8>>,
) -> Result<()> {
let mut inner = self.inner.write().await;
inner.try_init(spec, options).await
inner.try_init(spec, state, options).await
}
pub async fn handler_message(&self, req: Request) -> Result<Response> {
@@ -222,8 +231,16 @@ impl RuntimeHandlerManager {
oci::OCI_SPEC_CONFIG_FILE_NAME
);
let spec = oci::Spec::load(&bundler_path).context("load spec")?;
let state = oci::State {
version: spec.version.clone(),
id: container_config.container_id.to_string(),
status: oci::ContainerState::Creating,
pid: 0,
bundle: bundler_path,
annotations: spec.annotations.clone(),
};
self.try_init_runtime_instance(&spec, &container_config.options)
self.try_init_runtime_instance(&spec, &state, &container_config.options)
.await
.context("try init runtime instance")?;
let instance = self

View File

@@ -37,6 +37,7 @@ pub struct Container {
pid: u32,
pub container_id: ContainerID,
config: ContainerConfig,
spec: oci::Spec,
inner: Arc<RwLock<ContainerInner>>,
agent: Arc<dyn Agent>,
resource_manager: Arc<ResourceManager>,
@@ -47,6 +48,7 @@ impl Container {
pub fn new(
pid: u32,
config: ContainerConfig,
spec: oci::Spec,
agent: Arc<dyn Agent>,
resource_manager: Arc<ResourceManager>,
) -> Result<Self> {
@@ -67,6 +69,7 @@ impl Container {
pid,
container_id,
config,
spec,
inner: Arc::new(RwLock::new(ContainerInner::new(
agent.clone(),
init_process,
@@ -382,11 +385,31 @@ impl Container {
.context("agent update container")?;
Ok(())
}
pub async fn config(&self) -> ContainerConfig {
self.config.clone()
}
pub async fn spec(&self) -> oci::Spec {
self.spec.clone()
}
}
fn amend_spec(spec: &mut oci::Spec, disable_guest_seccomp: bool) -> Result<()> {
// hook should be done on host
spec.hooks = None;
// Only the StartContainer hook needs to be reserved for execution in the guest
let start_container_hooks = match spec.hooks.as_ref() {
Some(hooks) => hooks.start_container.clone(),
None => Vec::new(),
};
spec.hooks = if start_container_hooks.is_empty() {
None
} else {
Some(oci::Hooks {
start_container: start_container_hooks,
..Default::default()
})
};
// special process K8s ephemeral volumes.
update_ephemeral_storage_type(spec);

View File

@@ -5,11 +5,10 @@
//
use anyhow::{anyhow, Context, Result};
use async_trait::async_trait;
use std::{collections::HashMap, sync::Arc};
use agent::Agent;
use async_trait::async_trait;
use common::{
error::Error,
types::{
@@ -19,10 +18,14 @@ use common::{
},
ContainerManager,
};
use hypervisor::Hypervisor;
use oci::Process as OCIProcess;
use resource::network::NetnsGuard;
use resource::ResourceManager;
use tokio::sync::RwLock;
use kata_sys_util::hooks::HookStates;
use super::{logger_with_process, Container};
pub struct VirtContainerManager {
@@ -31,6 +34,7 @@ pub struct VirtContainerManager {
containers: Arc<RwLock<HashMap<String, Container>>>,
resource_manager: Arc<ResourceManager>,
agent: Arc<dyn Agent>,
hypervisor: Arc<dyn Hypervisor>,
}
impl VirtContainerManager {
@@ -38,6 +42,7 @@ impl VirtContainerManager {
sid: &str,
pid: u32,
agent: Arc<dyn Agent>,
hypervisor: Arc<dyn Hypervisor>,
resource_manager: Arc<ResourceManager>,
) -> Self {
Self {
@@ -46,6 +51,7 @@ impl VirtContainerManager {
containers: Default::default(),
resource_manager,
agent,
hypervisor,
}
}
}
@@ -55,12 +61,37 @@ impl ContainerManager for VirtContainerManager {
async fn create_container(&self, config: ContainerConfig, spec: oci::Spec) -> Result<PID> {
let container = Container::new(
self.pid,
config,
config.clone(),
spec.clone(),
self.agent.clone(),
self.resource_manager.clone(),
)
.context("new container")?;
// CreateContainer Hooks:
// * should be run in vmm namespace (hook path in runtime namespace)
// * should be run after the vm is started, before container is created, and after CreateRuntime Hooks
// * spec details: https://github.com/opencontainers/runtime-spec/blob/c1662686cff159595277b79322d0272f5182941b/config.md#createcontainer-hooks
let vmm_master_tid = self.hypervisor.get_vmm_master_tid().await?;
let vmm_netns_path = format!("/proc/{}/task/{}/ns/{}", self.pid, vmm_master_tid, "net");
let state = oci::State {
version: spec.version.clone(),
id: config.container_id.clone(),
status: oci::ContainerState::Creating,
pid: vmm_master_tid as i32,
bundle: config.bundle.clone(),
annotations: spec.annotations.clone(),
};
// new scope, CreateContainer hooks in which will execute in a new network namespace
{
let _netns_guard = NetnsGuard::new(&vmm_netns_path).context("vmm netns guard")?;
if let Some(hooks) = spec.hooks.as_ref() {
let mut create_container_hook_states = HookStates::new();
create_container_hook_states.execute_hooks(&hooks.create_container, Some(state))?;
}
}
let mut containers = self.containers.write().await;
container.create(spec).await.context("create")?;
containers.insert(container.container_id.to_string(), container);
@@ -87,6 +118,26 @@ impl ContainerManager for VirtContainerManager {
let c = containers
.remove(container_id)
.ok_or_else(|| Error::ContainerNotFound(container_id.to_string()))?;
// Poststop Hooks:
// * should be run in runtime namespace
// * should be run after the container is deleted but before delete operation returns
// * spec details: https://github.com/opencontainers/runtime-spec/blob/c1662686cff159595277b79322d0272f5182941b/config.md#poststop
let c_spec = c.spec().await;
let vmm_master_tid = self.hypervisor.get_vmm_master_tid().await?;
let state = oci::State {
version: c_spec.version.clone(),
id: c.container_id.to_string(),
status: oci::ContainerState::Stopped,
pid: vmm_master_tid as i32,
bundle: c.config().await.bundle,
annotations: c_spec.annotations.clone(),
};
if let Some(hooks) = c_spec.hooks.as_ref() {
let mut poststop_hook_states = HookStates::new();
poststop_hook_states.execute_hooks(&hooks.poststop, Some(state))?;
}
c.state_process(process).await.context("state process")
}
ProcessType::Exec => {
@@ -190,6 +241,26 @@ impl ContainerManager for VirtContainerManager {
.get(container_id)
.ok_or_else(|| Error::ContainerNotFound(container_id.clone()))?;
c.start(process).await.context("start")?;
// Poststart Hooks:
// * should be run in runtime namespace
// * should be run after user-specific command is executed but before start operation returns
// * spec details: https://github.com/opencontainers/runtime-spec/blob/c1662686cff159595277b79322d0272f5182941b/config.md#poststart
let c_spec = c.spec().await;
let vmm_master_tid = self.hypervisor.get_vmm_master_tid().await?;
let state = oci::State {
version: c_spec.version.clone(),
id: c.container_id.to_string(),
status: oci::ContainerState::Running,
pid: vmm_master_tid as i32,
bundle: c.config().await.bundle,
annotations: c_spec.annotations.clone(),
};
if let Some(hooks) = c_spec.hooks.as_ref() {
let mut poststart_hook_states = HookStates::new();
poststart_hook_states.execute_hooks(&hooks.poststart, Some(state))?;
}
Ok(PID { pid: self.pid })
}

View File

@@ -86,13 +86,18 @@ impl RuntimeHandler for VirtContainer {
sid,
msg_sender,
agent.clone(),
hypervisor,
hypervisor.clone(),
resource_manager.clone(),
)
.await
.context("new virt sandbox")?;
let container_manager =
container_manager::VirtContainerManager::new(sid, pid, agent, resource_manager);
let container_manager = container_manager::VirtContainerManager::new(
sid,
pid,
agent,
hypervisor,
resource_manager,
);
Ok(RuntimeInstance {
sandbox: Arc::new(sandbox),
container_manager: Arc::new(container_manager),

View File

@@ -17,6 +17,7 @@ use common::{
};
use containerd_shim_protos::events::task::TaskOOM;
use hypervisor::{dragonball::Dragonball, Hypervisor, HYPERVISOR_DRAGONBALL};
use kata_sys_util::hooks::HookStates;
use kata_types::config::{
default::{DEFAULT_AGENT_LOG_PORT, DEFAULT_AGENT_VSOCK_PORT},
TomlConfig,
@@ -117,11 +118,50 @@ impl VirtSandbox {
Ok(resource_configs)
}
async fn execute_oci_hook_functions(
&self,
prestart_hooks: &[oci::Hook],
create_runtime_hooks: &[oci::Hook],
state: &oci::State,
) -> Result<()> {
let mut st = state.clone();
// for dragonball, we use vmm_master_tid
let vmm_pid = self
.hypervisor
.get_vmm_master_tid()
.await
.context("get vmm master tid")?;
st.pid = vmm_pid as i32;
// Prestart Hooks [DEPRECATED in newest oci spec]:
// * should be run in runtime namespace
// * should be run after vm is started, but before container is created
// if Prestart Hook and CreateRuntime Hook are both supported
// * spec details: https://github.com/opencontainers/runtime-spec/blob/c1662686cff159595277b79322d0272f5182941b/config.md#prestart
let mut prestart_hook_states = HookStates::new();
prestart_hook_states.execute_hooks(prestart_hooks, Some(st.clone()))?;
// CreateRuntime Hooks:
// * should be run in runtime namespace
// * should be run when creating the runtime
// * spec details: https://github.com/opencontainers/runtime-spec/blob/c1662686cff159595277b79322d0272f5182941b/config.md#createruntime-hooks
let mut create_runtime_hook_states = HookStates::new();
create_runtime_hook_states.execute_hooks(create_runtime_hooks, Some(st.clone()))?;
Ok(())
}
}
#[async_trait]
impl Sandbox for VirtSandbox {
async fn start(&self, netns: Option<String>, dns: Vec<String>) -> Result<()> {
async fn start(
&self,
netns: Option<String>,
dns: Vec<String>,
spec: &oci::Spec,
state: &oci::State,
) -> Result<()> {
let id = &self.sid;
// if sandbox running, return
@@ -149,6 +189,17 @@ impl Sandbox for VirtSandbox {
self.hypervisor.start_vm(10_000).await.context("start vm")?;
info!(sl!(), "start vm");
// execute pre-start hook functions, including Prestart Hooks and CreateRuntime Hooks
let (prestart_hooks, create_runtime_hooks) = match spec.hooks.as_ref() {
Some(hooks) => (hooks.prestart.clone(), hooks.create_runtime.clone()),
None => (Vec::new(), Vec::new()),
};
self.execute_oci_hook_functions(&prestart_hooks, &create_runtime_hooks, state)
.await?;
// TODO: if prestart_hooks is not empty, rescan the network endpoints(rely on hotplug endpoints).
// see: https://github.com/kata-containers/kata-containers/issues/6378
// connect agent
// set agent socket
let address = self