mirror of
https://github.com/aljazceru/kata-containers.git
synced 2025-12-17 14:24:27 +01:00
CCv0: Merge main into CCv0 branch
Merge remote-tracking branch 'upstream/main' into CCv0 Fixes: #5905 Signed-off-by: Georgina Kinge <georgina.kinge@ibm.com>
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -4,6 +4,8 @@
|
||||
**/*.rej
|
||||
**/target
|
||||
**/.vscode
|
||||
**/.idea
|
||||
**/.fleet
|
||||
pkg/logging/Cargo.lock
|
||||
src/agent/src/version.rs
|
||||
src/agent/kata-agent.service
|
||||
|
||||
@@ -86,6 +86,27 @@ $ sudo sed -i '/^disable_guest_seccomp/ s/true/false/' /etc/kata-containers/conf
|
||||
|
||||
This will pass container seccomp profiles to the kata agent.
|
||||
|
||||
## Enable SELinux on the guest
|
||||
|
||||
> **Note:**
|
||||
>
|
||||
> - To enable SELinux on the guest, SELinux MUST be also enabled on the host.
|
||||
> - You MUST create and build a rootfs image for SELinux in advance.
|
||||
> See [Create a rootfs image](#create-a-rootfs-image) and [Build a rootfs image](#build-a-rootfs-image).
|
||||
> - SELinux on the guest is supported in only a rootfs image currently, so
|
||||
> you cannot enable SELinux with the agent init (`AGENT_INIT=yes`) yet.
|
||||
|
||||
Enable guest SELinux in Enforcing mode as follows:
|
||||
|
||||
```
|
||||
$ sudo sed -i '/^disable_guest_selinux/ s/true/false/g' /etc/kata-containers/configuration.toml
|
||||
```
|
||||
|
||||
The runtime automatically will set `selinux=1` to the kernel parameters and `xattr` option to
|
||||
`virtiofsd` when `disable_guest_selinux` is set to `false`.
|
||||
|
||||
If you want to enable SELinux in Permissive mode, add `enforcing=0` to the kernel parameters.
|
||||
|
||||
## Enable full debug
|
||||
|
||||
Enable full debug as follows:
|
||||
@@ -256,6 +277,12 @@ If you want to build the agent without seccomp capability, you need to run the `
|
||||
$ script -fec 'sudo -E AGENT_INIT=yes USE_DOCKER=true SECCOMP=no ./rootfs.sh "${distro}"'
|
||||
```
|
||||
|
||||
If you want to enable SELinux on the guest, you MUST choose `centos` and run the `rootfs.sh` script with `SELINUX=yes` as follows.
|
||||
|
||||
```
|
||||
$ script -fec 'sudo -E GOPATH=$GOPATH USE_DOCKER=true SELINUX=yes ./rootfs.sh centos'
|
||||
```
|
||||
|
||||
> **Note:**
|
||||
>
|
||||
> - Check the [compatibility matrix](../tools/osbuilder/README.md#platform-distro-compatibility-matrix) before creating rootfs.
|
||||
@@ -283,6 +310,19 @@ $ script -fec 'sudo -E USE_DOCKER=true ./image_builder.sh "${ROOTFS_DIR}"'
|
||||
$ popd
|
||||
```
|
||||
|
||||
If you want to enable SELinux on the guest, you MUST run the `image_builder.sh` script with `SELINUX=yes`
|
||||
to label the guest image as follows.
|
||||
To label the image on the host, you need to make sure that SELinux is enabled (`selinuxfs` is mounted) on the host
|
||||
and the rootfs MUST be created by running the `rootfs.sh` with `SELINUX=yes`.
|
||||
|
||||
```
|
||||
$ script -fec 'sudo -E USE_DOCKER=true SELINUX=yes ./image_builder.sh ${ROOTFS_DIR}'
|
||||
```
|
||||
|
||||
Currently, the `image_builder.sh` uses `chcon` as an interim solution in order to apply `container_runtime_exec_t`
|
||||
to the `kata-agent`. Hence, if you run `restorecon` to the guest image after running the `image_builder.sh`,
|
||||
the `kata-agent` needs to be labeled `container_runtime_exec_t` again by yourself.
|
||||
|
||||
> **Notes:**
|
||||
>
|
||||
> - You must ensure that the *default Docker runtime* is `runc` to make use of
|
||||
|
||||
2
src/agent/Cargo.lock
generated
2
src/agent/Cargo.lock
generated
@@ -2300,7 +2300,6 @@ dependencies = [
|
||||
"slog",
|
||||
"slog-scope",
|
||||
"slog-stdlog",
|
||||
"sysinfo",
|
||||
"tempfile",
|
||||
"test-utils",
|
||||
"thiserror",
|
||||
@@ -4076,6 +4075,7 @@ dependencies = [
|
||||
"tempfile",
|
||||
"test-utils",
|
||||
"tokio",
|
||||
"xattr",
|
||||
"zbus",
|
||||
]
|
||||
|
||||
|
||||
@@ -35,6 +35,7 @@ inotify = "0.9.2"
|
||||
libseccomp = { version = "0.3.0", optional = true }
|
||||
zbus = "2.3.0"
|
||||
bit-vec= "0.6.3"
|
||||
xattr = "0.2.3"
|
||||
|
||||
[dev-dependencies]
|
||||
serial_test = "0.5.0"
|
||||
|
||||
@@ -30,6 +30,7 @@ use crate::log_child;
|
||||
use crate::process::Process;
|
||||
#[cfg(feature = "seccomp")]
|
||||
use crate::seccomp;
|
||||
use crate::selinux;
|
||||
use crate::specconv::CreateOpts;
|
||||
use crate::{mount, validator};
|
||||
|
||||
@@ -109,7 +110,6 @@ impl Default for ContainerStatus {
|
||||
}
|
||||
|
||||
// We might want to change this to thiserror in the future
|
||||
const MissingCGroupManager: &str = "failed to get container's cgroup Manager";
|
||||
const MissingLinux: &str = "no linux config";
|
||||
const InvalidNamespace: &str = "invalid namespace type";
|
||||
|
||||
@@ -243,7 +243,7 @@ pub struct LinuxContainer {
|
||||
pub id: String,
|
||||
pub root: String,
|
||||
pub config: Config,
|
||||
pub cgroup_manager: Option<Box<dyn Manager + Send + Sync>>,
|
||||
pub cgroup_manager: Box<dyn Manager + Send + Sync>,
|
||||
pub init_process_pid: pid_t,
|
||||
pub init_process_start_time: u64,
|
||||
pub uid_map_path: String,
|
||||
@@ -292,16 +292,11 @@ impl Container for LinuxContainer {
|
||||
));
|
||||
}
|
||||
|
||||
if self.cgroup_manager.is_some() {
|
||||
self.cgroup_manager
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.freeze(FreezerState::Frozen)?;
|
||||
self.cgroup_manager.as_ref().freeze(FreezerState::Frozen)?;
|
||||
|
||||
self.status.transition(ContainerState::Paused);
|
||||
return Ok(());
|
||||
}
|
||||
Err(anyhow!(MissingCGroupManager))
|
||||
self.status.transition(ContainerState::Paused);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn resume(&mut self) -> Result<()> {
|
||||
@@ -310,16 +305,11 @@ impl Container for LinuxContainer {
|
||||
return Err(anyhow!("container status is: {:?}, not paused", status));
|
||||
}
|
||||
|
||||
if self.cgroup_manager.is_some() {
|
||||
self.cgroup_manager
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.freeze(FreezerState::Thawed)?;
|
||||
self.cgroup_manager.as_ref().freeze(FreezerState::Thawed)?;
|
||||
|
||||
self.status.transition(ContainerState::Running);
|
||||
return Ok(());
|
||||
}
|
||||
Err(anyhow!(MissingCGroupManager))
|
||||
self.status.transition(ContainerState::Running);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -537,6 +527,8 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
|
||||
}
|
||||
}
|
||||
|
||||
let selinux_enabled = selinux::is_enabled()?;
|
||||
|
||||
sched::unshare(to_new & !CloneFlags::CLONE_NEWUSER)?;
|
||||
|
||||
if userns {
|
||||
@@ -638,6 +630,18 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
|
||||
capctl::prctl::set_no_new_privs().map_err(|_| anyhow!("cannot set no new privileges"))?;
|
||||
}
|
||||
|
||||
// Set SELinux label
|
||||
if !oci_process.selinux_label.is_empty() {
|
||||
if !selinux_enabled {
|
||||
return Err(anyhow!(
|
||||
"SELinux label for the process is provided but SELinux is not enabled on the running kernel"
|
||||
));
|
||||
}
|
||||
|
||||
log_child!(cfd_log, "Set SELinux label to the container process");
|
||||
selinux::set_exec_label(&oci_process.selinux_label)?;
|
||||
}
|
||||
|
||||
// Log unknown seccomp system calls in advance before the log file descriptor closes.
|
||||
#[cfg(feature = "seccomp")]
|
||||
if let Some(ref scmp) = linux.seccomp {
|
||||
@@ -847,22 +851,17 @@ impl BaseContainer for LinuxContainer {
|
||||
}
|
||||
|
||||
fn stats(&self) -> Result<StatsContainerResponse> {
|
||||
let mut r = StatsContainerResponse::default();
|
||||
|
||||
if self.cgroup_manager.is_some() {
|
||||
r.cgroup_stats =
|
||||
SingularPtrField::some(self.cgroup_manager.as_ref().unwrap().get_stats()?);
|
||||
}
|
||||
|
||||
// what about network interface stats?
|
||||
|
||||
Ok(r)
|
||||
Ok(StatsContainerResponse {
|
||||
cgroup_stats: SingularPtrField::some(self.cgroup_manager.as_ref().get_stats()?),
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
|
||||
fn set(&mut self, r: LinuxResources) -> Result<()> {
|
||||
if self.cgroup_manager.is_some() {
|
||||
self.cgroup_manager.as_ref().unwrap().set(&r, true)?;
|
||||
}
|
||||
self.cgroup_manager.as_ref().set(&r, true)?;
|
||||
|
||||
self.config
|
||||
.spec
|
||||
.as_mut()
|
||||
@@ -1035,7 +1034,7 @@ impl BaseContainer for LinuxContainer {
|
||||
&logger,
|
||||
spec,
|
||||
&p,
|
||||
self.cgroup_manager.as_ref().unwrap().as_ref(),
|
||||
self.cgroup_manager.as_ref(),
|
||||
self.config.use_systemd_cgroup,
|
||||
&st,
|
||||
&mut pipe_w,
|
||||
@@ -1114,19 +1113,19 @@ impl BaseContainer for LinuxContainer {
|
||||
)?;
|
||||
fs::remove_dir_all(&self.root)?;
|
||||
|
||||
if let Some(cgm) = self.cgroup_manager.as_mut() {
|
||||
// Kill all of the processes created in this container to prevent
|
||||
// the leak of some daemon process when this container shared pidns
|
||||
// with the sandbox.
|
||||
let pids = cgm.get_pids().context("get cgroup pids")?;
|
||||
for i in pids {
|
||||
if let Err(e) = signal::kill(Pid::from_raw(i), Signal::SIGKILL) {
|
||||
warn!(self.logger, "kill the process {} error: {:?}", i, e);
|
||||
}
|
||||
let cgm = self.cgroup_manager.as_mut();
|
||||
// Kill all of the processes created in this container to prevent
|
||||
// the leak of some daemon process when this container shared pidns
|
||||
// with the sandbox.
|
||||
let pids = cgm.get_pids().context("get cgroup pids")?;
|
||||
for i in pids {
|
||||
if let Err(e) = signal::kill(Pid::from_raw(i), Signal::SIGKILL) {
|
||||
warn!(self.logger, "kill the process {} error: {:?}", i, e);
|
||||
}
|
||||
|
||||
cgm.destroy().context("destroy cgroups")?;
|
||||
}
|
||||
|
||||
cgm.destroy().context("destroy cgroups")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -1514,7 +1513,7 @@ impl LinuxContainer {
|
||||
Ok(LinuxContainer {
|
||||
id: id.clone(),
|
||||
root,
|
||||
cgroup_manager: Some(cgroup_manager),
|
||||
cgroup_manager,
|
||||
status: ContainerStatus::new(),
|
||||
uid_map_path: String::from(""),
|
||||
gid_map_path: "".to_string(),
|
||||
@@ -1980,24 +1979,12 @@ mod tests {
|
||||
assert!(format!("{:?}", ret).contains("failed to pause container"))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_linuxcontainer_pause_cgroupmgr_is_none() {
|
||||
let ret = new_linux_container_and_then(|mut c: LinuxContainer| {
|
||||
c.cgroup_manager = None;
|
||||
c.pause().map_err(|e| anyhow!(e))
|
||||
});
|
||||
|
||||
assert!(ret.is_err(), "Expecting error, Got {:?}", ret);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_linuxcontainer_pause() {
|
||||
let ret = new_linux_container_and_then(|mut c: LinuxContainer| {
|
||||
let cgroup_manager: Box<dyn Manager + Send + Sync> =
|
||||
Box::new(FsManager::new("").map_err(|e| {
|
||||
anyhow!(format!("fail to create cgroup manager with path: {:}", e))
|
||||
})?);
|
||||
c.cgroup_manager = Some(cgroup_manager);
|
||||
c.cgroup_manager = Box::new(FsManager::new("").map_err(|e| {
|
||||
anyhow!(format!("fail to create cgroup manager with path: {:}", e))
|
||||
})?);
|
||||
c.pause().map_err(|e| anyhow!(e))
|
||||
});
|
||||
|
||||
@@ -2016,25 +2003,12 @@ mod tests {
|
||||
assert!(format!("{:?}", ret).contains("not paused"))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_linuxcontainer_resume_cgroupmgr_is_none() {
|
||||
let ret = new_linux_container_and_then(|mut c: LinuxContainer| {
|
||||
c.status.transition(ContainerState::Paused);
|
||||
c.cgroup_manager = None;
|
||||
c.resume().map_err(|e| anyhow!(e))
|
||||
});
|
||||
|
||||
assert!(ret.is_err(), "Expecting error, Got {:?}", ret);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_linuxcontainer_resume() {
|
||||
let ret = new_linux_container_and_then(|mut c: LinuxContainer| {
|
||||
let cgroup_manager: Box<dyn Manager + Send + Sync> =
|
||||
Box::new(FsManager::new("").map_err(|e| {
|
||||
anyhow!(format!("fail to create cgroup manager with path: {:}", e))
|
||||
})?);
|
||||
c.cgroup_manager = Some(cgroup_manager);
|
||||
c.cgroup_manager = Box::new(FsManager::new("").map_err(|e| {
|
||||
anyhow!(format!("fail to create cgroup manager with path: {:}", e))
|
||||
})?);
|
||||
// Change status to paused, this way we can resume it
|
||||
c.status.transition(ContainerState::Paused);
|
||||
c.resume().map_err(|e| anyhow!(e))
|
||||
|
||||
@@ -38,6 +38,7 @@ pub mod pipestream;
|
||||
pub mod process;
|
||||
#[cfg(feature = "seccomp")]
|
||||
pub mod seccomp;
|
||||
pub mod selinux;
|
||||
pub mod specconv;
|
||||
pub mod sync;
|
||||
pub mod sync_with_async;
|
||||
|
||||
@@ -25,6 +25,7 @@ use std::fs::File;
|
||||
use std::io::{BufRead, BufReader};
|
||||
|
||||
use crate::container::DEFAULT_DEVICES;
|
||||
use crate::selinux;
|
||||
use crate::sync::write_count;
|
||||
use std::string::ToString;
|
||||
|
||||
@@ -181,6 +182,8 @@ pub fn init_rootfs(
|
||||
None => flags |= MsFlags::MS_SLAVE,
|
||||
}
|
||||
|
||||
let label = &linux.mount_label;
|
||||
|
||||
let root = spec
|
||||
.root
|
||||
.as_ref()
|
||||
@@ -244,7 +247,7 @@ pub fn init_rootfs(
|
||||
}
|
||||
}
|
||||
|
||||
mount_from(cfd_log, m, rootfs, flags, &data, "")?;
|
||||
mount_from(cfd_log, m, rootfs, flags, &data, label)?;
|
||||
// bind mount won't change mount options, we need remount to make mount options
|
||||
// effective.
|
||||
// first check that we have non-default options required before attempting a
|
||||
@@ -524,7 +527,6 @@ pub fn pivot_rootfs<P: ?Sized + NixPath + std::fmt::Debug>(path: &P) -> Result<(
|
||||
|
||||
fn rootfs_parent_mount_private(path: &str) -> Result<()> {
|
||||
let mount_infos = parse_mount_table(MOUNTINFO_PATH)?;
|
||||
|
||||
let mut max_len = 0;
|
||||
let mut mount_point = String::from("");
|
||||
let mut options = String::from("");
|
||||
@@ -767,9 +769,9 @@ fn mount_from(
|
||||
rootfs: &str,
|
||||
flags: MsFlags,
|
||||
data: &str,
|
||||
_label: &str,
|
||||
label: &str,
|
||||
) -> Result<()> {
|
||||
let d = String::from(data);
|
||||
let mut d = String::from(data);
|
||||
let dest = secure_join(rootfs, &m.destination);
|
||||
|
||||
let src = if m.r#type.as_str() == "bind" {
|
||||
@@ -822,6 +824,37 @@ fn mount_from(
|
||||
e
|
||||
})?;
|
||||
|
||||
// Set the SELinux context for the mounts
|
||||
let mut use_xattr = false;
|
||||
if !label.is_empty() {
|
||||
if selinux::is_enabled()? {
|
||||
let device = Path::new(&m.source)
|
||||
.file_name()
|
||||
.ok_or_else(|| anyhow!("invalid device source path: {}", &m.source))?
|
||||
.to_str()
|
||||
.ok_or_else(|| anyhow!("failed to convert device source path: {}", &m.source))?;
|
||||
|
||||
match device {
|
||||
// SELinux does not support labeling of /proc or /sys
|
||||
"proc" | "sysfs" => (),
|
||||
// SELinux does not support mount labeling against /dev/mqueue,
|
||||
// so we use setxattr instead
|
||||
"mqueue" => {
|
||||
use_xattr = true;
|
||||
}
|
||||
_ => {
|
||||
log_child!(cfd_log, "add SELinux mount label to {}", dest.as_str());
|
||||
selinux::add_mount_label(&mut d, label);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
log_child!(
|
||||
cfd_log,
|
||||
"SELinux label for the mount is provided but SELinux is not enabled on the running kernel"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
mount(
|
||||
Some(src.as_str()),
|
||||
dest.as_str(),
|
||||
@@ -834,6 +867,10 @@ fn mount_from(
|
||||
e
|
||||
})?;
|
||||
|
||||
if !label.is_empty() && selinux::is_enabled()? && use_xattr {
|
||||
xattr::set(dest.as_str(), "security.selinux", label.as_bytes())?;
|
||||
}
|
||||
|
||||
if flags.contains(MsFlags::MS_BIND)
|
||||
&& flags.intersects(
|
||||
!(MsFlags::MS_REC
|
||||
|
||||
80
src/agent/rustjail/src/selinux.rs
Normal file
80
src/agent/rustjail/src/selinux.rs
Normal file
@@ -0,0 +1,80 @@
|
||||
// Copyright 2022 Sony Group Corporation
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use nix::unistd::gettid;
|
||||
use std::fs::{self, OpenOptions};
|
||||
use std::io::prelude::*;
|
||||
use std::path::Path;
|
||||
|
||||
pub fn is_enabled() -> Result<bool> {
|
||||
let buf = fs::read_to_string("/proc/mounts")?;
|
||||
let enabled = buf.contains("selinuxfs");
|
||||
|
||||
Ok(enabled)
|
||||
}
|
||||
|
||||
pub fn add_mount_label(data: &mut String, label: &str) {
|
||||
if data.is_empty() {
|
||||
let context = format!("context=\"{}\"", label);
|
||||
data.push_str(&context);
|
||||
} else {
|
||||
let context = format!(",context=\"{}\"", label);
|
||||
data.push_str(&context);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_exec_label(label: &str) -> Result<()> {
|
||||
let mut attr_path = Path::new("/proc/thread-self/attr/exec").to_path_buf();
|
||||
if !attr_path.exists() {
|
||||
// Fall back to the old convention
|
||||
attr_path = Path::new("/proc/self/task")
|
||||
.join(gettid().to_string())
|
||||
.join("attr/exec")
|
||||
}
|
||||
|
||||
let mut file = OpenOptions::new()
|
||||
.write(true)
|
||||
.truncate(true)
|
||||
.open(attr_path)?;
|
||||
file.write_all(label.as_bytes())
|
||||
.with_context(|| "failed to apply SELinux label")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
const TEST_LABEL: &str = "system_u:system_r:unconfined_t:s0";
|
||||
|
||||
#[test]
|
||||
fn test_is_enabled() {
|
||||
let ret = is_enabled();
|
||||
assert!(ret.is_ok(), "Expecting Ok, Got {:?}", ret);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_add_mount_label() {
|
||||
let mut data = String::new();
|
||||
add_mount_label(&mut data, TEST_LABEL);
|
||||
assert_eq!(data, format!("context=\"{}\"", TEST_LABEL));
|
||||
|
||||
let mut data = String::from("defaults");
|
||||
add_mount_label(&mut data, TEST_LABEL);
|
||||
assert_eq!(data, format!("defaults,context=\"{}\"", TEST_LABEL));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_set_exec_label() {
|
||||
let ret = set_exec_label(TEST_LABEL);
|
||||
if is_enabled().unwrap() {
|
||||
assert!(ret.is_ok(), "Expecting Ok, Got {:?}", ret);
|
||||
} else {
|
||||
assert!(ret.is_err(), "Expecting error, Got {:?}", ret);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -6,6 +6,7 @@
|
||||
use crate::container::Config;
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use oci::{Linux, LinuxIdMapping, LinuxNamespace, Spec};
|
||||
use regex::Regex;
|
||||
use std::collections::HashMap;
|
||||
use std::path::{Component, PathBuf};
|
||||
|
||||
@@ -86,6 +87,23 @@ fn hostname(oci: &Spec) -> Result<()> {
|
||||
|
||||
fn security(oci: &Spec) -> Result<()> {
|
||||
let linux = get_linux(oci)?;
|
||||
let label_pattern = r".*_u:.*_r:.*_t:s[0-9]|1[0-5].*";
|
||||
let label_regex = Regex::new(label_pattern)?;
|
||||
|
||||
if let Some(ref process) = oci.process {
|
||||
if !process.selinux_label.is_empty() && !label_regex.is_match(&process.selinux_label) {
|
||||
return Err(anyhow!(
|
||||
"SELinux label for the process is invalid format: {}",
|
||||
&process.selinux_label
|
||||
));
|
||||
}
|
||||
}
|
||||
if !linux.mount_label.is_empty() && !label_regex.is_match(&linux.mount_label) {
|
||||
return Err(anyhow!(
|
||||
"SELinux label for the mount is invalid format: {}",
|
||||
&linux.mount_label
|
||||
));
|
||||
}
|
||||
|
||||
if linux.masked_paths.is_empty() && linux.readonly_paths.is_empty() {
|
||||
return Ok(());
|
||||
@@ -95,8 +113,6 @@ fn security(oci: &Spec) -> Result<()> {
|
||||
return Err(anyhow!("Linux namespace does not contain mount"));
|
||||
}
|
||||
|
||||
// don't care about selinux at present
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -285,7 +301,7 @@ pub fn validate(conf: &Config) -> Result<()> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use oci::Mount;
|
||||
use oci::{Mount, Process};
|
||||
|
||||
#[test]
|
||||
fn test_namespace() {
|
||||
@@ -388,6 +404,29 @@ mod tests {
|
||||
];
|
||||
spec.linux = Some(linux);
|
||||
security(&spec).unwrap();
|
||||
|
||||
// SELinux
|
||||
let valid_label = "system_u:system_r:container_t:s0:c123,c456";
|
||||
let mut process = Process::default();
|
||||
process.selinux_label = valid_label.to_string();
|
||||
spec.process = Some(process);
|
||||
security(&spec).unwrap();
|
||||
|
||||
let mut linux = Linux::default();
|
||||
linux.mount_label = valid_label.to_string();
|
||||
spec.linux = Some(linux);
|
||||
security(&spec).unwrap();
|
||||
|
||||
let invalid_label = "system_u:system_r:container_t";
|
||||
let mut process = Process::default();
|
||||
process.selinux_label = invalid_label.to_string();
|
||||
spec.process = Some(process);
|
||||
security(&spec).unwrap_err();
|
||||
|
||||
let mut linux = Linux::default();
|
||||
linux.mount_label = invalid_label.to_string();
|
||||
spec.linux = Some(linux);
|
||||
security(&spec).unwrap_err();
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -348,14 +348,13 @@ impl AgentService {
|
||||
}
|
||||
|
||||
// start oom event loop
|
||||
if let Some(ref ctr) = ctr.cgroup_manager {
|
||||
let cg_path = ctr.get_cgroup_path("memory");
|
||||
|
||||
if let Ok(cg_path) = cg_path {
|
||||
let rx = notifier::notify_oom(cid.as_str(), cg_path.to_string()).await?;
|
||||
let cg_path = ctr.cgroup_manager.as_ref().get_cgroup_path("memory");
|
||||
|
||||
s.run_oom_event_monitor(rx, cid.clone()).await;
|
||||
}
|
||||
if let Ok(cg_path) = cg_path {
|
||||
let rx = notifier::notify_oom(cid.as_str(), cg_path.to_string()).await?;
|
||||
|
||||
s.run_oom_event_monitor(rx, cid.clone()).await;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -542,11 +541,7 @@ impl AgentService {
|
||||
let ctr = sandbox
|
||||
.get_container(cid)
|
||||
.ok_or_else(|| anyhow!("Invalid container id {}", cid))?;
|
||||
let cm = ctr
|
||||
.cgroup_manager
|
||||
.as_ref()
|
||||
.ok_or_else(|| anyhow!("cgroup manager not exist"))?;
|
||||
cm.freeze(state)?;
|
||||
ctr.cgroup_manager.as_ref().freeze(state)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -556,11 +551,7 @@ impl AgentService {
|
||||
let ctr = sandbox
|
||||
.get_container(cid)
|
||||
.ok_or_else(|| anyhow!("Invalid container id {}", cid))?;
|
||||
let cm = ctr
|
||||
.cgroup_manager
|
||||
.as_ref()
|
||||
.ok_or_else(|| anyhow!("cgroup manager not exist"))?;
|
||||
let pids = cm.get_pids()?;
|
||||
let pids = ctr.cgroup_manager.as_ref().get_pids()?;
|
||||
Ok(pids)
|
||||
}
|
||||
|
||||
|
||||
@@ -298,7 +298,6 @@ impl Sandbox {
|
||||
info!(self.logger, "updating {}", ctr.id.as_str());
|
||||
ctr.cgroup_manager
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.update_cpuset_path(guest_cpuset.as_str(), container_cpust)?;
|
||||
}
|
||||
|
||||
|
||||
8
src/libs/Cargo.lock
generated
8
src/libs/Cargo.lock
generated
@@ -40,6 +40,12 @@ version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
||||
|
||||
[[package]]
|
||||
name = "base64"
|
||||
version = "0.13.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "1.2.1"
|
||||
@@ -420,6 +426,8 @@ dependencies = [
|
||||
name = "kata-types"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"base64",
|
||||
"bitmask-enum",
|
||||
"byte-unit",
|
||||
"glob",
|
||||
|
||||
@@ -43,8 +43,6 @@
|
||||
use std::fmt::Debug;
|
||||
use std::fs;
|
||||
use std::io::{self, BufRead};
|
||||
use std::os::raw::c_char;
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
use std::os::unix::fs::{DirBuilderExt, OpenOptionsExt};
|
||||
use std::os::unix::io::AsRawFd;
|
||||
use std::path::{Path, PathBuf};
|
||||
@@ -213,11 +211,11 @@ pub fn create_mount_destination<S: AsRef<Path>, D: AsRef<Path>, R: AsRef<Path>>(
|
||||
}
|
||||
}
|
||||
|
||||
/// Remount a bind mount into readonly mode.
|
||||
/// Remount a bind mount
|
||||
///
|
||||
/// # Safety
|
||||
/// Caller needs to ensure safety of the `dst` to avoid possible file path based attacks.
|
||||
pub fn bind_remount_read_only<P: AsRef<Path>>(dst: P) -> Result<()> {
|
||||
pub fn bind_remount<P: AsRef<Path>>(dst: P, readonly: bool) -> Result<()> {
|
||||
let dst = dst.as_ref();
|
||||
if dst.is_empty() {
|
||||
return Err(Error::NullMountPointPath);
|
||||
@@ -226,7 +224,7 @@ pub fn bind_remount_read_only<P: AsRef<Path>>(dst: P) -> Result<()> {
|
||||
.canonicalize()
|
||||
.map_err(|_e| Error::InvalidPath(dst.to_path_buf()))?;
|
||||
|
||||
do_rebind_mount_read_only(dst, MsFlags::empty())
|
||||
do_rebind_mount(dst, readonly, MsFlags::empty())
|
||||
}
|
||||
|
||||
/// Bind mount `src` to `dst` in slave mode, optionally in readonly mode if `readonly` is true.
|
||||
@@ -239,7 +237,7 @@ pub fn bind_remount_read_only<P: AsRef<Path>>(dst: P) -> Result<()> {
|
||||
pub fn bind_mount_unchecked<S: AsRef<Path>, D: AsRef<Path>>(
|
||||
src: S,
|
||||
dst: D,
|
||||
read_only: bool,
|
||||
readonly: bool,
|
||||
) -> Result<()> {
|
||||
fail::fail_point!("bind_mount", |_| {
|
||||
Err(Error::FailureInject(
|
||||
@@ -275,8 +273,8 @@ pub fn bind_mount_unchecked<S: AsRef<Path>, D: AsRef<Path>>(
|
||||
.map_err(|e| Error::Mount(PathBuf::new(), dst.to_path_buf(), e))?;
|
||||
|
||||
// Optionally rebind into readonly mode.
|
||||
if read_only {
|
||||
do_rebind_mount_read_only(dst, MsFlags::empty())?;
|
||||
if readonly {
|
||||
do_rebind_mount(dst, readonly, MsFlags::empty())?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -356,7 +354,7 @@ impl Mounter for kata_types::mount::Mount {
|
||||
// Bind mount readonly.
|
||||
let bro_flag = MsFlags::MS_BIND | MsFlags::MS_RDONLY;
|
||||
if (o_flag & bro_flag) == bro_flag {
|
||||
do_rebind_mount_read_only(target, o_flag)?;
|
||||
do_rebind_mount(target, true, o_flag)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -364,12 +362,16 @@ impl Mounter for kata_types::mount::Mount {
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn do_rebind_mount_read_only<P: AsRef<Path>>(path: P, flags: MsFlags) -> Result<()> {
|
||||
fn do_rebind_mount<P: AsRef<Path>>(path: P, readonly: bool, flags: MsFlags) -> Result<()> {
|
||||
mount(
|
||||
Some(""),
|
||||
path.as_ref(),
|
||||
Some(""),
|
||||
flags | MsFlags::MS_BIND | MsFlags::MS_REMOUNT | MsFlags::MS_RDONLY,
|
||||
if readonly {
|
||||
flags | MsFlags::MS_BIND | MsFlags::MS_REMOUNT | MsFlags::MS_RDONLY
|
||||
} else {
|
||||
flags | MsFlags::MS_BIND | MsFlags::MS_REMOUNT
|
||||
},
|
||||
Some(""),
|
||||
)
|
||||
.map_err(|e| Error::Remount(path.as_ref().to_path_buf(), e))
|
||||
@@ -756,18 +758,11 @@ pub fn umount_all<P: AsRef<Path>>(mountpoint: P, lazy_umount: bool) -> Result<()
|
||||
|
||||
// Counterpart of nix::umount2, with support of `UMOUNT_FOLLOW`.
|
||||
fn umount2<P: AsRef<Path>>(path: P, lazy_umount: bool) -> std::io::Result<()> {
|
||||
let path_ptr = path.as_ref().as_os_str().as_bytes().as_ptr() as *const c_char;
|
||||
let mut flags = MntFlags::UMOUNT_NOFOLLOW.bits();
|
||||
let mut flags = MntFlags::UMOUNT_NOFOLLOW;
|
||||
if lazy_umount {
|
||||
flags |= MntFlags::MNT_DETACH.bits();
|
||||
}
|
||||
|
||||
// Safe because parameter is valid and we have checked the reuslt.
|
||||
if unsafe { libc::umount2(path_ptr, flags) } < 0 {
|
||||
Err(io::Error::last_os_error())
|
||||
} else {
|
||||
Ok(())
|
||||
flags |= MntFlags::MNT_DETACH;
|
||||
}
|
||||
nix::mount::umount2(path.as_ref(), flags).map_err(io::Error::from)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -820,21 +815,21 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
#[ignore]
|
||||
fn test_bind_remount_read_only() {
|
||||
fn test_bind_remount() {
|
||||
let tmpdir = tempfile::tempdir().unwrap();
|
||||
let tmpdir2 = tempfile::tempdir().unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
bind_remount_read_only(&PathBuf::from("")),
|
||||
bind_remount(&PathBuf::from(""), true),
|
||||
Err(Error::NullMountPointPath)
|
||||
));
|
||||
assert!(matches!(
|
||||
bind_remount_read_only(&PathBuf::from("../______doesn't____exist____nnn")),
|
||||
bind_remount(&PathBuf::from("../______doesn't____exist____nnn"), true),
|
||||
Err(Error::InvalidPath(_))
|
||||
));
|
||||
|
||||
bind_mount_unchecked(tmpdir2.path(), tmpdir.path(), true).unwrap();
|
||||
bind_remount_read_only(tmpdir.path()).unwrap();
|
||||
bind_remount(tmpdir.path(), true).unwrap();
|
||||
umount_timeout(tmpdir.path().to_str().unwrap(), 0).unwrap();
|
||||
}
|
||||
|
||||
|
||||
@@ -35,7 +35,7 @@ pub const DEFAULT_VHOST_USER_STORE_PATH: &str = "/var/run/vhost-user";
|
||||
pub const DEFAULT_BLOCK_NVDIMM_MEM_OFFSET: u64 = 0;
|
||||
|
||||
pub const DEFAULT_SHARED_FS_TYPE: &str = "virtio-fs";
|
||||
pub const DEFAULT_VIRTIO_FS_CACHE_MODE: &str = "none";
|
||||
pub const DEFAULT_VIRTIO_FS_CACHE_MODE: &str = "never";
|
||||
pub const DEFAULT_VIRTIO_FS_DAX_SIZE_MB: u32 = 1024;
|
||||
pub const DEFAULT_SHARED_9PFS_SIZE_MB: u32 = 128 * 1024;
|
||||
pub const MIN_SHARED_9PFS_SIZE_MB: u32 = 4 * 1024;
|
||||
|
||||
@@ -864,7 +864,7 @@ impl SharedFsInfo {
|
||||
)?;
|
||||
}
|
||||
|
||||
let l = ["none", "auto", "always"];
|
||||
let l = ["never", "auto", "always"];
|
||||
|
||||
if !l.contains(&self.virtio_fs_cache.as_str()) {
|
||||
return Err(eother!(
|
||||
|
||||
19
src/runtime-rs/Cargo.lock
generated
19
src/runtime-rs/Cargo.lock
generated
@@ -249,6 +249,12 @@ dependencies = [
|
||||
"rustc-demangle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "base64"
|
||||
version = "0.13.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "1.3.2"
|
||||
@@ -1352,6 +1358,8 @@ dependencies = [
|
||||
name = "kata-types"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"base64",
|
||||
"bitmask-enum",
|
||||
"byte-unit",
|
||||
"glob",
|
||||
@@ -2288,6 +2296,7 @@ dependencies = [
|
||||
"rtnetlink",
|
||||
"scopeguard",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"slog",
|
||||
"slog-scope",
|
||||
"test-utils",
|
||||
@@ -2526,6 +2535,16 @@ dependencies = [
|
||||
"unix_socket2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "shim-ctl"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"common",
|
||||
"logging",
|
||||
"runtimes",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "signal-hook-registry"
|
||||
version = "1.4.0"
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
[workspace]
|
||||
members = [
|
||||
"crates/shim",
|
||||
"crates/shim-ctl",
|
||||
]
|
||||
|
||||
@@ -120,6 +120,8 @@ See the
|
||||
See the
|
||||
[debugging section of the developer guide](../../docs/Developer-Guide.md#troubleshoot-kata-containers).
|
||||
|
||||
An [experimental alternative binary](crates/shim-ctl/README.md) is available that removes containerd dependencies and makes it easier to run the shim proper outside of the runtime's usual deployment environment (i.e. on a developer machine).
|
||||
|
||||
## Limitations
|
||||
|
||||
For Kata Containers limitations, see the
|
||||
|
||||
@@ -14,6 +14,7 @@ pub mod hypervisor_persist;
|
||||
pub use device::*;
|
||||
pub mod dragonball;
|
||||
mod kernel_param;
|
||||
pub mod qemu;
|
||||
pub use kernel_param::Param;
|
||||
mod utils;
|
||||
use std::collections::HashMap;
|
||||
@@ -28,6 +29,8 @@ const VM_ROOTFS_DRIVER_BLK: &str = "virtio-blk";
|
||||
const VM_ROOTFS_DRIVER_PMEM: &str = "virtio-pmem";
|
||||
|
||||
pub const HYPERVISOR_DRAGONBALL: &str = "dragonball";
|
||||
pub const HYPERVISOR_QEMU: &str = "qemu";
|
||||
|
||||
#[derive(PartialEq)]
|
||||
pub(crate) enum VmmState {
|
||||
NotReady,
|
||||
|
||||
142
src/runtime-rs/crates/hypervisor/src/qemu/inner.rs
Normal file
142
src/runtime-rs/crates/hypervisor/src/qemu/inner.rs
Normal file
@@ -0,0 +1,142 @@
|
||||
// Copyright (c) 2022 Red Hat
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use anyhow::Result;
|
||||
|
||||
use crate::{HypervisorConfig, VcpuThreadIds};
|
||||
use kata_types::capabilities::{Capabilities, CapabilityBits};
|
||||
|
||||
const VSOCK_SCHEME: &str = "vsock";
|
||||
const VSOCK_AGENT_CID: u32 = 3;
|
||||
const VSOCK_AGENT_PORT: u32 = 1024;
|
||||
|
||||
unsafe impl Send for QemuInner {}
|
||||
unsafe impl Sync for QemuInner {}
|
||||
|
||||
pub struct QemuInner {
|
||||
config: HypervisorConfig,
|
||||
}
|
||||
|
||||
impl QemuInner {
|
||||
pub fn new() -> QemuInner {
|
||||
QemuInner {
|
||||
config: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn prepare_vm(&mut self, _id: &str, _netns: Option<String>) -> Result<()> {
|
||||
info!(sl!(), "Preparing QEMU VM");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn start_vm(&mut self, _timeout: i32) -> Result<()> {
|
||||
info!(sl!(), "Starting QEMU VM");
|
||||
|
||||
let mut command = std::process::Command::new(&self.config.path);
|
||||
|
||||
command
|
||||
.arg("-kernel")
|
||||
.arg(&self.config.boot_info.kernel)
|
||||
.arg("-m")
|
||||
.arg(format!("{}M", &self.config.memory_info.default_memory))
|
||||
.arg("-initrd")
|
||||
.arg(&self.config.boot_info.initrd)
|
||||
.arg("-vga")
|
||||
.arg("none")
|
||||
.arg("-nodefaults")
|
||||
.arg("-nographic");
|
||||
|
||||
command.spawn()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn stop_vm(&mut self) -> Result<()> {
|
||||
info!(sl!(), "Stopping QEMU VM");
|
||||
todo!()
|
||||
}
|
||||
|
||||
pub(crate) fn pause_vm(&self) -> Result<()> {
|
||||
info!(sl!(), "Pausing QEMU VM");
|
||||
todo!()
|
||||
}
|
||||
|
||||
pub(crate) fn resume_vm(&self) -> Result<()> {
|
||||
info!(sl!(), "Resuming QEMU VM");
|
||||
todo!()
|
||||
}
|
||||
|
||||
pub(crate) async fn save_vm(&self) -> Result<()> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
/// TODO: using a single hardcoded CID is clearly not adequate in the long
|
||||
/// run. Use the recently added VsockConfig infrastructure to fix this.
|
||||
pub(crate) async fn get_agent_socket(&self) -> Result<String> {
|
||||
info!(sl!(), "QemuInner::get_agent_socket()");
|
||||
Ok(format!(
|
||||
"{}://{}:{}",
|
||||
VSOCK_SCHEME, VSOCK_AGENT_CID, VSOCK_AGENT_PORT
|
||||
))
|
||||
}
|
||||
|
||||
pub(crate) async fn disconnect(&mut self) {
|
||||
info!(sl!(), "QemuInner::disconnect()");
|
||||
todo!()
|
||||
}
|
||||
|
||||
pub(crate) async fn get_thread_ids(&self) -> Result<VcpuThreadIds> {
|
||||
info!(sl!(), "QemuInner::get_thread_ids()");
|
||||
todo!()
|
||||
}
|
||||
|
||||
pub(crate) async fn cleanup(&self) -> Result<()> {
|
||||
info!(sl!(), "QemuInner::cleanup()");
|
||||
todo!()
|
||||
}
|
||||
|
||||
pub(crate) async fn get_pids(&self) -> Result<Vec<u32>> {
|
||||
info!(sl!(), "QemuInner::get_pids()");
|
||||
todo!()
|
||||
}
|
||||
|
||||
pub(crate) async fn check(&self) -> Result<()> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
pub(crate) async fn get_jailer_root(&self) -> Result<String> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
pub(crate) async fn capabilities(&self) -> Result<Capabilities> {
|
||||
let mut caps = Capabilities::default();
|
||||
caps.set(CapabilityBits::FsSharingSupport);
|
||||
Ok(caps)
|
||||
}
|
||||
|
||||
pub fn set_hypervisor_config(&mut self, config: HypervisorConfig) {
|
||||
self.config = config;
|
||||
}
|
||||
|
||||
pub fn hypervisor_config(&self) -> HypervisorConfig {
|
||||
info!(sl!(), "QemuInner::hypervisor_config()");
|
||||
self.config.clone()
|
||||
}
|
||||
}
|
||||
|
||||
use crate::device::Device;
|
||||
|
||||
// device manager part of Hypervisor
|
||||
impl QemuInner {
|
||||
pub(crate) async fn add_device(&mut self, device: Device) -> Result<()> {
|
||||
info!(sl!(), "QemuInner::add_device() {}", device);
|
||||
todo!()
|
||||
}
|
||||
|
||||
pub(crate) async fn remove_device(&mut self, device: Device) -> Result<()> {
|
||||
info!(sl!(), "QemuInner::remove_device() {} ", device);
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
134
src/runtime-rs/crates/hypervisor/src/qemu/mod.rs
Normal file
134
src/runtime-rs/crates/hypervisor/src/qemu/mod.rs
Normal file
@@ -0,0 +1,134 @@
|
||||
// Copyright (c) 2022 Red Hat
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
mod inner;
|
||||
|
||||
use crate::device::Device;
|
||||
use crate::hypervisor_persist::HypervisorState;
|
||||
use crate::Hypervisor;
|
||||
use crate::{HypervisorConfig, VcpuThreadIds};
|
||||
use inner::QemuInner;
|
||||
use kata_types::capabilities::Capabilities;
|
||||
|
||||
use anyhow::Result;
|
||||
use async_trait::async_trait;
|
||||
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
pub struct Qemu {
|
||||
inner: Arc<RwLock<QemuInner>>,
|
||||
}
|
||||
|
||||
impl Default for Qemu {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl Qemu {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
inner: Arc::new(RwLock::new(QemuInner::new())),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn set_hypervisor_config(&mut self, config: HypervisorConfig) {
|
||||
let mut inner = self.inner.write().await;
|
||||
inner.set_hypervisor_config(config)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Hypervisor for Qemu {
|
||||
async fn prepare_vm(&self, id: &str, netns: Option<String>) -> Result<()> {
|
||||
let mut inner = self.inner.write().await;
|
||||
inner.prepare_vm(id, netns).await
|
||||
}
|
||||
|
||||
async fn start_vm(&self, timeout: i32) -> Result<()> {
|
||||
let mut inner = self.inner.write().await;
|
||||
inner.start_vm(timeout).await
|
||||
}
|
||||
|
||||
async fn stop_vm(&self) -> Result<()> {
|
||||
let mut inner = self.inner.write().await;
|
||||
inner.stop_vm()
|
||||
}
|
||||
|
||||
async fn pause_vm(&self) -> Result<()> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.pause_vm()
|
||||
}
|
||||
|
||||
async fn resume_vm(&self) -> Result<()> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.resume_vm()
|
||||
}
|
||||
|
||||
async fn save_vm(&self) -> Result<()> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.save_vm().await
|
||||
}
|
||||
|
||||
async fn add_device(&self, device: Device) -> Result<()> {
|
||||
let mut inner = self.inner.write().await;
|
||||
inner.add_device(device).await
|
||||
}
|
||||
|
||||
async fn remove_device(&self, device: Device) -> Result<()> {
|
||||
let mut inner = self.inner.write().await;
|
||||
inner.remove_device(device).await
|
||||
}
|
||||
|
||||
async fn get_agent_socket(&self) -> Result<String> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.get_agent_socket().await
|
||||
}
|
||||
|
||||
async fn disconnect(&self) {
|
||||
let mut inner = self.inner.write().await;
|
||||
inner.disconnect().await
|
||||
}
|
||||
|
||||
async fn hypervisor_config(&self) -> HypervisorConfig {
|
||||
let inner = self.inner.read().await;
|
||||
inner.hypervisor_config()
|
||||
}
|
||||
|
||||
async fn get_thread_ids(&self) -> Result<VcpuThreadIds> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.get_thread_ids().await
|
||||
}
|
||||
|
||||
async fn cleanup(&self) -> Result<()> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.cleanup().await
|
||||
}
|
||||
|
||||
async fn get_pids(&self) -> Result<Vec<u32>> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.get_pids().await
|
||||
}
|
||||
|
||||
async fn check(&self) -> Result<()> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.check().await
|
||||
}
|
||||
|
||||
async fn get_jailer_root(&self) -> Result<String> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.get_jailer_root().await
|
||||
}
|
||||
|
||||
async fn save_state(&self) -> Result<HypervisorState> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
async fn capabilities(&self) -> Result<Capabilities> {
|
||||
let inner = self.inner.read().await;
|
||||
inner.capabilities().await
|
||||
}
|
||||
}
|
||||
@@ -11,14 +11,15 @@ use share_virtio_fs_inline::ShareVirtioFsInline;
|
||||
mod share_virtio_fs_standalone;
|
||||
use share_virtio_fs_standalone::ShareVirtioFsStandalone;
|
||||
mod utils;
|
||||
use tokio::sync::Mutex;
|
||||
pub use utils::{do_get_guest_path, do_get_guest_share_path, get_host_rw_shared_path};
|
||||
mod virtio_fs_share_mount;
|
||||
use virtio_fs_share_mount::VirtiofsShareMount;
|
||||
|
||||
use std::sync::Arc;
|
||||
use std::{collections::HashMap, fmt::Debug, path::PathBuf, sync::Arc};
|
||||
|
||||
use agent::Storage;
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use anyhow::{anyhow, Context, Ok, Result};
|
||||
use async_trait::async_trait;
|
||||
use hypervisor::Hypervisor;
|
||||
use kata_types::config::hypervisor::SharedFsInfo;
|
||||
@@ -43,8 +44,10 @@ pub trait ShareFs: Send + Sync {
|
||||
async fn setup_device_before_start_vm(&self, h: &dyn Hypervisor) -> Result<()>;
|
||||
async fn setup_device_after_start_vm(&self, h: &dyn Hypervisor) -> Result<()>;
|
||||
async fn get_storages(&self) -> Result<Vec<Storage>>;
|
||||
fn mounted_info_set(&self) -> Arc<Mutex<HashMap<String, MountedInfo>>>;
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ShareFsRootfsConfig {
|
||||
// TODO: for nydus v5/v6 need to update ShareFsMount
|
||||
pub cid: String,
|
||||
@@ -54,6 +57,7 @@ pub struct ShareFsRootfsConfig {
|
||||
pub is_rafs: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ShareFsVolumeConfig {
|
||||
pub cid: String,
|
||||
pub source: String,
|
||||
@@ -69,10 +73,61 @@ pub struct ShareFsMountResult {
|
||||
pub storages: Vec<agent::Storage>,
|
||||
}
|
||||
|
||||
/// Save mounted info for sandbox-level shared files.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct MountedInfo {
|
||||
// Guest path
|
||||
pub guest_path: PathBuf,
|
||||
// Ref count of containers that uses this volume with read only permission
|
||||
pub ro_ref_count: usize,
|
||||
// Ref count of containers that uses this volume with read write permission
|
||||
pub rw_ref_count: usize,
|
||||
}
|
||||
|
||||
impl MountedInfo {
|
||||
pub fn new(guest_path: PathBuf, readonly: bool) -> Self {
|
||||
Self {
|
||||
guest_path,
|
||||
ro_ref_count: if readonly { 1 } else { 0 },
|
||||
rw_ref_count: if readonly { 0 } else { 1 },
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if the mount has read only permission
|
||||
pub fn readonly(&self) -> bool {
|
||||
self.rw_ref_count == 0
|
||||
}
|
||||
|
||||
/// Ref count for all permissions
|
||||
pub fn ref_count(&self) -> usize {
|
||||
self.ro_ref_count + self.rw_ref_count
|
||||
}
|
||||
|
||||
// File/dir name in the form of "sandbox-<uuid>-<file/dir name>"
|
||||
pub fn file_name(&self) -> Result<String> {
|
||||
match self.guest_path.file_name() {
|
||||
Some(file_name) => match file_name.to_str() {
|
||||
Some(file_name) => Ok(file_name.to_owned()),
|
||||
None => Err(anyhow!("failed to get string from {:?}", file_name)),
|
||||
},
|
||||
None => Err(anyhow!(
|
||||
"failed to get file name from the guest_path {:?}",
|
||||
self.guest_path
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait ShareFsMount: Send + Sync {
|
||||
async fn share_rootfs(&self, config: ShareFsRootfsConfig) -> Result<ShareFsMountResult>;
|
||||
async fn share_volume(&self, config: ShareFsVolumeConfig) -> Result<ShareFsMountResult>;
|
||||
/// Upgrade to readwrite permission
|
||||
async fn upgrade_to_rw(&self, file_name: &str) -> Result<()>;
|
||||
/// Downgrade to readonly permission
|
||||
async fn downgrade_to_ro(&self, file_name: &str) -> Result<()>;
|
||||
/// Umount the volume
|
||||
async fn umount(&self, file_name: &str) -> Result<()>;
|
||||
}
|
||||
|
||||
pub fn new(id: &str, config: &SharedFsInfo) -> Result<Arc<dyn ShareFs>> {
|
||||
|
||||
@@ -4,11 +4,14 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use agent::Storage;
|
||||
use anyhow::{Context, Result};
|
||||
use async_trait::async_trait;
|
||||
use hypervisor::Hypervisor;
|
||||
use kata_types::config::hypervisor::SharedFsInfo;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
use super::{
|
||||
share_virtio_fs::{
|
||||
@@ -30,6 +33,7 @@ pub struct ShareVirtioFsInlineConfig {
|
||||
pub struct ShareVirtioFsInline {
|
||||
config: ShareVirtioFsInlineConfig,
|
||||
share_fs_mount: Arc<dyn ShareFsMount>,
|
||||
mounted_info_set: Arc<Mutex<HashMap<String, MountedInfo>>>,
|
||||
}
|
||||
|
||||
impl ShareVirtioFsInline {
|
||||
@@ -37,6 +41,7 @@ impl ShareVirtioFsInline {
|
||||
Ok(Self {
|
||||
config: ShareVirtioFsInlineConfig { id: id.to_string() },
|
||||
share_fs_mount: Arc::new(VirtiofsShareMount::new(id)),
|
||||
mounted_info_set: Arc::new(Mutex::new(HashMap::new())),
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -77,4 +82,8 @@ impl ShareFs for ShareVirtioFsInline {
|
||||
storages.push(shared_volume);
|
||||
Ok(storages)
|
||||
}
|
||||
|
||||
fn mounted_info_set(&self) -> Arc<Mutex<HashMap<String, MountedInfo>>> {
|
||||
self.mounted_info_set.clone()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,8 +4,12 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use std::{process::Stdio, sync::Arc};
|
||||
use std::{collections::HashMap, process::Stdio, sync::Arc};
|
||||
|
||||
use crate::share_fs::share_virtio_fs::{
|
||||
prepare_virtiofs, FS_TYPE_VIRTIO_FS, KATA_VIRTIO_FS_DEV_TYPE, MOUNT_GUEST_TAG,
|
||||
};
|
||||
use crate::share_fs::{KATA_GUEST_SHARE_DIR, VIRTIO_FS};
|
||||
use agent::Storage;
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use async_trait::async_trait;
|
||||
@@ -16,19 +20,18 @@ use tokio::{
|
||||
process::{Child, Command},
|
||||
sync::{
|
||||
mpsc::{channel, Receiver, Sender},
|
||||
RwLock,
|
||||
Mutex, RwLock,
|
||||
},
|
||||
};
|
||||
|
||||
use super::{
|
||||
share_virtio_fs::generate_sock_path, utils::ensure_dir_exist, utils::get_host_ro_shared_path,
|
||||
virtio_fs_share_mount::VirtiofsShareMount, ShareFs, ShareFsMount,
|
||||
virtio_fs_share_mount::VirtiofsShareMount, MountedInfo, ShareFs, ShareFsMount,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ShareVirtioFsStandaloneConfig {
|
||||
id: String,
|
||||
jail_root: String,
|
||||
|
||||
// virtio_fs_daemon is the virtio-fs vhost-user daemon path
|
||||
pub virtio_fs_daemon: String,
|
||||
@@ -38,14 +41,16 @@ pub struct ShareVirtioFsStandaloneConfig {
|
||||
pub virtio_fs_extra_args: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
#[derive(Default, Debug)]
|
||||
struct ShareVirtioFsStandaloneInner {
|
||||
pid: Option<u32>,
|
||||
}
|
||||
|
||||
pub(crate) struct ShareVirtioFsStandalone {
|
||||
inner: Arc<RwLock<ShareVirtioFsStandaloneInner>>,
|
||||
config: ShareVirtioFsStandaloneConfig,
|
||||
share_fs_mount: Arc<dyn ShareFsMount>,
|
||||
mounted_info_set: Arc<Mutex<HashMap<String, MountedInfo>>>,
|
||||
}
|
||||
|
||||
impl ShareVirtioFsStandalone {
|
||||
@@ -54,26 +59,33 @@ impl ShareVirtioFsStandalone {
|
||||
inner: Arc::new(RwLock::new(ShareVirtioFsStandaloneInner::default())),
|
||||
config: ShareVirtioFsStandaloneConfig {
|
||||
id: id.to_string(),
|
||||
jail_root: "".to_string(),
|
||||
virtio_fs_daemon: config.virtio_fs_daemon.clone(),
|
||||
virtio_fs_cache: config.virtio_fs_cache.clone(),
|
||||
virtio_fs_extra_args: config.virtio_fs_extra_args.clone(),
|
||||
},
|
||||
share_fs_mount: Arc::new(VirtiofsShareMount::new(id)),
|
||||
mounted_info_set: Arc::new(Mutex::new(HashMap::new())),
|
||||
})
|
||||
}
|
||||
|
||||
fn virtiofsd_args(&self, sock_path: &str) -> Result<Vec<String>> {
|
||||
let source_path = get_host_ro_shared_path(&self.config.id);
|
||||
ensure_dir_exist(&source_path)?;
|
||||
let shared_dir = source_path
|
||||
.to_str()
|
||||
.ok_or_else(|| anyhow!("convert source path {:?} to str failed", source_path))?;
|
||||
|
||||
let mut args: Vec<String> = vec![
|
||||
String::from("-f"),
|
||||
format!("--socket-path={}", sock_path),
|
||||
String::from("-o"),
|
||||
format!("source={}", source_path.to_str().unwrap()),
|
||||
String::from("-o"),
|
||||
format!("cache={}", self.config.virtio_fs_cache),
|
||||
String::from("--socket-path"),
|
||||
String::from(sock_path),
|
||||
String::from("--shared-dir"),
|
||||
String::from(shared_dir),
|
||||
String::from("--cache"),
|
||||
self.config.virtio_fs_cache.clone(),
|
||||
String::from("--sandbox"),
|
||||
String::from("none"),
|
||||
String::from("--seccomp"),
|
||||
String::from("none"),
|
||||
];
|
||||
|
||||
if !self.config.virtio_fs_extra_args.is_empty() {
|
||||
@@ -84,8 +96,8 @@ impl ShareVirtioFsStandalone {
|
||||
Ok(args)
|
||||
}
|
||||
|
||||
async fn setup_virtiofsd(&self) -> Result<()> {
|
||||
let sock_path = generate_sock_path(&self.config.jail_root);
|
||||
async fn setup_virtiofsd(&self, h: &dyn Hypervisor) -> Result<()> {
|
||||
let sock_path = generate_sock_path(&h.get_jailer_root().await?);
|
||||
let args = self.virtiofsd_args(&sock_path).context("virtiofsd args")?;
|
||||
|
||||
let mut cmd = Command::new(&self.config.virtio_fs_daemon);
|
||||
@@ -160,8 +172,11 @@ impl ShareFs for ShareVirtioFsStandalone {
|
||||
self.share_fs_mount.clone()
|
||||
}
|
||||
|
||||
async fn setup_device_before_start_vm(&self, _h: &dyn Hypervisor) -> Result<()> {
|
||||
self.setup_virtiofsd().await.context("setup virtiofsd")?;
|
||||
async fn setup_device_before_start_vm(&self, h: &dyn Hypervisor) -> Result<()> {
|
||||
prepare_virtiofs(h, VIRTIO_FS, &self.config.id, &h.get_jailer_root().await?)
|
||||
.await
|
||||
.context("prepare virtiofs")?;
|
||||
self.setup_virtiofsd(h).await.context("setup virtiofsd")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -170,6 +185,23 @@ impl ShareFs for ShareVirtioFsStandalone {
|
||||
}
|
||||
|
||||
async fn get_storages(&self) -> Result<Vec<Storage>> {
|
||||
Ok(vec![])
|
||||
let mut storages: Vec<Storage> = Vec::new();
|
||||
|
||||
let shared_volume: Storage = Storage {
|
||||
driver: String::from(KATA_VIRTIO_FS_DEV_TYPE),
|
||||
driver_options: Vec::new(),
|
||||
source: String::from(MOUNT_GUEST_TAG),
|
||||
fs_type: String::from(FS_TYPE_VIRTIO_FS),
|
||||
fs_group: None,
|
||||
options: vec![String::from("nodev")],
|
||||
mount_point: String::from(KATA_GUEST_SHARE_DIR),
|
||||
};
|
||||
|
||||
storages.push(shared_volume);
|
||||
Ok(storages)
|
||||
}
|
||||
|
||||
fn mounted_info_set(&self) -> Arc<Mutex<HashMap<String, MountedInfo>>> {
|
||||
self.mounted_info_set.clone()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@ pub(crate) fn ensure_dir_exist(path: &Path) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Bind mount the original path to the runtime directory.
|
||||
pub(crate) fn share_to_guest(
|
||||
// absolute path for source
|
||||
source: &str,
|
||||
@@ -37,7 +38,7 @@ pub(crate) fn share_to_guest(
|
||||
// to remount the read only dir mount point directly.
|
||||
if readonly {
|
||||
let dst = do_get_host_path(target, sid, cid, is_volume, true);
|
||||
mount::bind_remount_read_only(&dst).context("bind remount readonly")?;
|
||||
mount::bind_remount(&dst, readonly).context("bind remount readonly")?;
|
||||
}
|
||||
|
||||
Ok(do_get_guest_path(target, cid, is_volume, is_rafs))
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
use agent::Storage;
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use async_trait::async_trait;
|
||||
use kata_sys_util::mount::{bind_remount, umount_timeout};
|
||||
use kata_types::k8s::is_watchable_mount;
|
||||
use kata_types::mount;
|
||||
use nix::sys::stat::stat;
|
||||
@@ -19,10 +20,12 @@ const WATCHABLE_BIND_DEV_TYPE: &str = "watchable-bind";
|
||||
const EPHEMERAL_PATH: &str = "/run/kata-containers/sandbox/ephemeral";
|
||||
|
||||
use super::{
|
||||
utils, ShareFsMount, ShareFsMountResult, ShareFsRootfsConfig, ShareFsVolumeConfig,
|
||||
utils::{self, do_get_host_path},
|
||||
ShareFsMount, ShareFsMountResult, ShareFsRootfsConfig, ShareFsVolumeConfig,
|
||||
KATA_GUEST_SHARE_DIR, PASSTHROUGH_FS_DIR,
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct VirtiofsShareMount {
|
||||
id: String,
|
||||
}
|
||||
@@ -166,4 +169,35 @@ impl ShareFsMount for VirtiofsShareMount {
|
||||
storages: vec![],
|
||||
})
|
||||
}
|
||||
|
||||
async fn upgrade_to_rw(&self, file_name: &str) -> Result<()> {
|
||||
// Remount readonly directory with readwrite permission
|
||||
let host_dest = do_get_host_path(file_name, &self.id, "", true, true);
|
||||
bind_remount(&host_dest, false)
|
||||
.context("remount readonly directory with readwrite permission")?;
|
||||
// Remount readwrite directory with readwrite permission
|
||||
let host_dest = do_get_host_path(file_name, &self.id, "", true, false);
|
||||
bind_remount(&host_dest, false)
|
||||
.context("remount readwrite directory with readwrite permission")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn downgrade_to_ro(&self, file_name: &str) -> Result<()> {
|
||||
// Remount readwrite directory with readonly permission
|
||||
let host_dest = do_get_host_path(file_name, &self.id, "", true, false);
|
||||
bind_remount(&host_dest, true)
|
||||
.context("remount readwrite directory with readonly permission")?;
|
||||
// Remount readonly directory with readonly permission
|
||||
let host_dest = do_get_host_path(file_name, &self.id, "", true, true);
|
||||
bind_remount(&host_dest, true)
|
||||
.context("remount readonly directory with readonly permission")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn umount(&self, file_name: &str) -> Result<()> {
|
||||
let host_dest = do_get_host_path(file_name, &self.id, "", true, true);
|
||||
umount_timeout(&host_dest, 0).context("Umount readwrite host dest")?;
|
||||
// Umount event will be propagated to ro directory
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,9 +5,11 @@
|
||||
//
|
||||
|
||||
use anyhow::Result;
|
||||
use async_trait::async_trait;
|
||||
|
||||
use super::Volume;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct BlockVolume {}
|
||||
|
||||
/// BlockVolume: block device volume
|
||||
@@ -17,6 +19,7 @@ impl BlockVolume {
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Volume for BlockVolume {
|
||||
fn get_volume_mount(&self) -> anyhow::Result<Vec<oci::Mount>> {
|
||||
todo!()
|
||||
@@ -26,8 +29,9 @@ impl Volume for BlockVolume {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn cleanup(&self) -> Result<()> {
|
||||
todo!()
|
||||
async fn cleanup(&self) -> Result<()> {
|
||||
warn!(sl!(), "Cleaning up BlockVolume is still unimplemented.");
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -5,9 +5,11 @@
|
||||
//
|
||||
|
||||
use anyhow::Result;
|
||||
use async_trait::async_trait;
|
||||
|
||||
use super::Volume;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct DefaultVolume {
|
||||
mount: oci::Mount,
|
||||
}
|
||||
@@ -21,6 +23,7 @@ impl DefaultVolume {
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Volume for DefaultVolume {
|
||||
fn get_volume_mount(&self) -> anyhow::Result<Vec<oci::Mount>> {
|
||||
Ok(vec![self.mount.clone()])
|
||||
@@ -30,7 +33,8 @@ impl Volume for DefaultVolume {
|
||||
Ok(vec![])
|
||||
}
|
||||
|
||||
fn cleanup(&self) -> Result<()> {
|
||||
todo!()
|
||||
async fn cleanup(&self) -> Result<()> {
|
||||
warn!(sl!(), "Cleaning up DefaultVolume is still unimplemented.");
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@ mod block_volume;
|
||||
mod default_volume;
|
||||
mod share_fs_volume;
|
||||
mod shm_volume;
|
||||
use async_trait::async_trait;
|
||||
|
||||
use std::{sync::Arc, vec::Vec};
|
||||
|
||||
@@ -16,10 +17,11 @@ use tokio::sync::RwLock;
|
||||
|
||||
use crate::share_fs::ShareFs;
|
||||
|
||||
#[async_trait]
|
||||
pub trait Volume: Send + Sync {
|
||||
fn get_volume_mount(&self) -> Result<Vec<oci::Mount>>;
|
||||
fn get_storage(&self) -> Result<Vec<agent::Storage>>;
|
||||
fn cleanup(&self) -> Result<()>;
|
||||
async fn cleanup(&self) -> Result<()>;
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
|
||||
@@ -4,12 +4,17 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use std::{path::Path, sync::Arc};
|
||||
use std::{
|
||||
path::{Path, PathBuf},
|
||||
str::FromStr,
|
||||
sync::{Arc, Weak},
|
||||
};
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use async_trait::async_trait;
|
||||
|
||||
use super::Volume;
|
||||
use crate::share_fs::{ShareFs, ShareFsVolumeConfig};
|
||||
use crate::share_fs::{MountedInfo, ShareFs, ShareFsVolumeConfig};
|
||||
use kata_types::mount;
|
||||
|
||||
// copy file to container's rootfs if filesystem sharing is not supported, otherwise
|
||||
@@ -19,6 +24,7 @@ use kata_types::mount;
|
||||
// device nodes to the guest.
|
||||
// skip the volumes whose source had already set to guest share dir.
|
||||
pub(crate) struct ShareFsVolume {
|
||||
share_fs: Option<Weak<dyn ShareFs>>,
|
||||
mounts: Vec<oci::Mount>,
|
||||
storages: Vec<agent::Storage>,
|
||||
}
|
||||
@@ -29,10 +35,12 @@ impl ShareFsVolume {
|
||||
m: &oci::Mount,
|
||||
cid: &str,
|
||||
) -> Result<Self> {
|
||||
// The file_name is in the format of "sandbox-{uuid}-{file_name}"
|
||||
let file_name = Path::new(&m.source).file_name().unwrap().to_str().unwrap();
|
||||
let file_name = generate_mount_path(cid, file_name);
|
||||
let file_name = generate_mount_path("sandbox", file_name);
|
||||
|
||||
let mut volume = Self {
|
||||
share_fs: share_fs.as_ref().map(Arc::downgrade),
|
||||
mounts: vec![],
|
||||
storages: vec![],
|
||||
};
|
||||
@@ -59,36 +67,87 @@ impl ShareFsVolume {
|
||||
}
|
||||
}
|
||||
Some(share_fs) => {
|
||||
let readonly = m.options.iter().any(|opt| opt == "ro");
|
||||
|
||||
let share_fs_mount = share_fs.get_share_fs_mount();
|
||||
let mount_result = share_fs_mount
|
||||
.share_volume(ShareFsVolumeConfig {
|
||||
cid: cid.to_string(),
|
||||
source: m.source.clone(),
|
||||
target: file_name,
|
||||
readonly: m.options.iter().any(|o| *o == "ro"),
|
||||
mount_options: m.options.clone(),
|
||||
mount: m.clone(),
|
||||
is_rafs: false,
|
||||
let mounted_info_set = share_fs.mounted_info_set();
|
||||
let mut mounted_info_set = mounted_info_set.lock().await;
|
||||
if let Some(mut mounted_info) = mounted_info_set.get(&m.source).cloned() {
|
||||
// Mounted at least once
|
||||
let guest_path = mounted_info
|
||||
.guest_path
|
||||
.clone()
|
||||
.as_os_str()
|
||||
.to_str()
|
||||
.unwrap()
|
||||
.to_owned();
|
||||
if !readonly && mounted_info.readonly() {
|
||||
// The current mount should be upgraded to readwrite permission
|
||||
info!(
|
||||
sl!(),
|
||||
"The mount will be upgraded, mount = {:?}, cid = {}", m, cid
|
||||
);
|
||||
share_fs_mount
|
||||
.upgrade_to_rw(
|
||||
&mounted_info
|
||||
.file_name()
|
||||
.context("get name of mounted info")?,
|
||||
)
|
||||
.await
|
||||
.context("upgrade mount")?;
|
||||
}
|
||||
if readonly {
|
||||
mounted_info.ro_ref_count += 1;
|
||||
} else {
|
||||
mounted_info.rw_ref_count += 1;
|
||||
}
|
||||
mounted_info_set.insert(m.source.clone(), mounted_info);
|
||||
|
||||
volume.mounts.push(oci::Mount {
|
||||
destination: m.destination.clone(),
|
||||
r#type: "bind".to_string(),
|
||||
source: guest_path,
|
||||
options: m.options.clone(),
|
||||
})
|
||||
.await
|
||||
.context("share fs volume")?;
|
||||
} else {
|
||||
// Not mounted ever
|
||||
let mount_result = share_fs_mount
|
||||
.share_volume(ShareFsVolumeConfig {
|
||||
// The scope of shared volume is sandbox
|
||||
cid: String::from(""),
|
||||
source: m.source.clone(),
|
||||
target: file_name.clone(),
|
||||
readonly,
|
||||
mount_options: m.options.clone(),
|
||||
mount: m.clone(),
|
||||
is_rafs: false,
|
||||
})
|
||||
.await
|
||||
.context("mount shared volume")?;
|
||||
let mounted_info = MountedInfo::new(
|
||||
PathBuf::from_str(&mount_result.guest_path)
|
||||
.context("convert guest path")?,
|
||||
readonly,
|
||||
);
|
||||
mounted_info_set.insert(m.source.clone(), mounted_info);
|
||||
// set storages for the volume
|
||||
volume.storages = mount_result.storages;
|
||||
|
||||
// set storages for the volume
|
||||
volume.storages = mount_result.storages;
|
||||
|
||||
// set mount for the volume
|
||||
volume.mounts.push(oci::Mount {
|
||||
destination: m.destination.clone(),
|
||||
r#type: "bind".to_string(),
|
||||
source: mount_result.guest_path,
|
||||
options: m.options.clone(),
|
||||
});
|
||||
// set mount for the volume
|
||||
volume.mounts.push(oci::Mount {
|
||||
destination: m.destination.clone(),
|
||||
r#type: "bind".to_string(),
|
||||
source: mount_result.guest_path,
|
||||
options: m.options.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(volume)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Volume for ShareFsVolume {
|
||||
fn get_volume_mount(&self) -> anyhow::Result<Vec<oci::Mount>> {
|
||||
Ok(self.mounts.clone())
|
||||
@@ -98,8 +157,75 @@ impl Volume for ShareFsVolume {
|
||||
Ok(self.storages.clone())
|
||||
}
|
||||
|
||||
fn cleanup(&self) -> Result<()> {
|
||||
todo!()
|
||||
async fn cleanup(&self) -> Result<()> {
|
||||
if self.share_fs.is_none() {
|
||||
return Ok(());
|
||||
}
|
||||
let share_fs = match self.share_fs.as_ref().unwrap().upgrade() {
|
||||
Some(share_fs) => share_fs,
|
||||
None => return Err(anyhow!("The share_fs was released unexpectedly")),
|
||||
};
|
||||
|
||||
let mounted_info_set = share_fs.mounted_info_set();
|
||||
let mut mounted_info_set = mounted_info_set.lock().await;
|
||||
for m in self.mounts.iter() {
|
||||
let (host_source, mut mounted_info) = match mounted_info_set
|
||||
.iter()
|
||||
.find(|entry| entry.1.guest_path.as_os_str().to_str().unwrap() == m.source)
|
||||
.map(|entry| (entry.0.to_owned(), entry.1.clone()))
|
||||
{
|
||||
Some(entry) => entry,
|
||||
None => {
|
||||
warn!(
|
||||
sl!(),
|
||||
"The mounted info for guest path {} not found", m.source
|
||||
);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let old_readonly = mounted_info.readonly();
|
||||
|
||||
if m.options.iter().any(|opt| *opt == "ro") {
|
||||
mounted_info.ro_ref_count -= 1;
|
||||
} else {
|
||||
mounted_info.rw_ref_count -= 1;
|
||||
}
|
||||
|
||||
debug!(
|
||||
sl!(),
|
||||
"Ref count for {} was updated to {} due to volume cleanup",
|
||||
host_source,
|
||||
mounted_info.ref_count()
|
||||
);
|
||||
let share_fs_mount = share_fs.get_share_fs_mount();
|
||||
let file_name = mounted_info.file_name()?;
|
||||
|
||||
if mounted_info.ref_count() > 0 {
|
||||
// Downgrade to readonly if no container needs readwrite permission
|
||||
if !old_readonly && mounted_info.readonly() {
|
||||
info!(sl!(), "Downgrade {} to readonly due to no container that needs readwrite permission", host_source);
|
||||
share_fs_mount
|
||||
.downgrade_to_ro(&file_name)
|
||||
.await
|
||||
.context("Downgrade volume")?;
|
||||
}
|
||||
mounted_info_set.insert(host_source.clone(), mounted_info);
|
||||
} else {
|
||||
info!(
|
||||
sl!(),
|
||||
"The path will be umounted due to no references, host_source = {}", host_source
|
||||
);
|
||||
mounted_info_set.remove(&host_source);
|
||||
// Umount the volume
|
||||
share_fs_mount
|
||||
.umount(&file_name)
|
||||
.await
|
||||
.context("Umount volume")?
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
use std::path::Path;
|
||||
|
||||
use anyhow::Result;
|
||||
use async_trait::async_trait;
|
||||
|
||||
use super::Volume;
|
||||
use crate::share_fs::DEFAULT_KATA_GUEST_SANDBOX_DIR;
|
||||
@@ -19,6 +20,7 @@ pub const DEFAULT_SHM_SIZE: u64 = 65536 * 1024;
|
||||
// KATA_EPHEMERAL_DEV_TYPE creates a tmpfs backed volume for sharing files between containers.
|
||||
pub const KATA_EPHEMERAL_DEV_TYPE: &str = "ephemeral";
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct ShmVolume {
|
||||
mount: oci::Mount,
|
||||
storage: Option<agent::Storage>,
|
||||
@@ -82,6 +84,7 @@ impl ShmVolume {
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Volume for ShmVolume {
|
||||
fn get_volume_mount(&self) -> anyhow::Result<Vec<oci::Mount>> {
|
||||
Ok(vec![self.mount.clone()])
|
||||
@@ -96,8 +99,9 @@ impl Volume for ShmVolume {
|
||||
Ok(s)
|
||||
}
|
||||
|
||||
fn cleanup(&self) -> Result<()> {
|
||||
todo!()
|
||||
async fn cleanup(&self) -> Result<()> {
|
||||
warn!(sl!(), "Cleaning up ShmVolume is still unimplemented.");
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use agent::Agent;
|
||||
@@ -81,8 +82,8 @@ impl Container {
|
||||
let mut inner = self.inner.write().await;
|
||||
let toml_config = self.resource_manager.config().await;
|
||||
let config = &self.config;
|
||||
amend_spec(&mut spec, toml_config.runtime.disable_guest_seccomp).context("amend spec")?;
|
||||
let sandbox_pidns = is_pid_namespace_enabled(&spec);
|
||||
amend_spec(&mut spec, toml_config.runtime.disable_guest_seccomp).context("amend spec")?;
|
||||
|
||||
// handler rootfs
|
||||
let rootfs = self
|
||||
@@ -143,13 +144,10 @@ impl Container {
|
||||
// create container
|
||||
let r = agent::CreateContainerRequest {
|
||||
process_id: agent::ContainerProcessID::new(&config.container_id, ""),
|
||||
string_user: None,
|
||||
devices: vec![],
|
||||
storages,
|
||||
oci: Some(spec),
|
||||
guest_hooks: None,
|
||||
sandbox_pidns,
|
||||
rootfs_mounts: vec![],
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
self.agent
|
||||
@@ -258,7 +256,7 @@ impl Container {
|
||||
signal: u32,
|
||||
all: bool,
|
||||
) -> Result<()> {
|
||||
let inner = self.inner.read().await;
|
||||
let mut inner = self.inner.write().await;
|
||||
inner.signal_process(container_process, signal, all).await
|
||||
}
|
||||
|
||||
@@ -396,6 +394,7 @@ fn amend_spec(spec: &mut oci::Spec, disable_guest_seccomp: bool) -> Result<()> {
|
||||
resource.block_io = None;
|
||||
resource.hugepage_limits = Vec::new();
|
||||
resource.network = None;
|
||||
resource.rdma = HashMap::new();
|
||||
}
|
||||
|
||||
// Host pidns path does not make sense in kata. Let's just align it with
|
||||
@@ -404,7 +403,10 @@ fn amend_spec(spec: &mut oci::Spec, disable_guest_seccomp: bool) -> Result<()> {
|
||||
for n in linux.namespaces.iter() {
|
||||
match n.r#type.as_str() {
|
||||
oci::PIDNAMESPACE | oci::NETWORKNAMESPACE => continue,
|
||||
_ => ns.push(n.clone()),
|
||||
_ => ns.push(oci::LinuxNamespace {
|
||||
r#type: n.r#type.clone(),
|
||||
path: "".to_string(),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -233,7 +233,7 @@ impl ContainerInner {
|
||||
}
|
||||
|
||||
pub(crate) async fn signal_process(
|
||||
&self,
|
||||
&mut self,
|
||||
process: &ContainerProcess,
|
||||
signal: u32,
|
||||
all: bool,
|
||||
@@ -247,6 +247,9 @@ impl ContainerInner {
|
||||
self.agent
|
||||
.signal_process(agent::SignalProcessRequest { process_id, signal })
|
||||
.await?;
|
||||
|
||||
self.clean_volumes().await.context("clean volumes")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -268,4 +271,23 @@ impl ContainerInner {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn clean_volumes(&mut self) -> Result<()> {
|
||||
let mut unhandled = Vec::new();
|
||||
for v in self.volumes.iter() {
|
||||
if let Err(err) = v.cleanup().await {
|
||||
unhandled.push(Arc::clone(v));
|
||||
warn!(
|
||||
sl!(),
|
||||
"Failed to clean volume {:?}, error = {:?}",
|
||||
v.get_volume_mount(),
|
||||
err
|
||||
);
|
||||
}
|
||||
}
|
||||
if !unhandled.is_empty() {
|
||||
self.volumes = unhandled;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,7 +6,11 @@
|
||||
|
||||
use std::{
|
||||
io,
|
||||
os::unix::{io::FromRawFd, net::UnixStream as StdUnixStream},
|
||||
os::unix::{
|
||||
io::{FromRawFd, RawFd},
|
||||
net::UnixStream as StdUnixStream,
|
||||
prelude::AsRawFd,
|
||||
},
|
||||
pin::Pin,
|
||||
task::Context as TaskContext,
|
||||
task::Poll,
|
||||
@@ -52,8 +56,21 @@ impl ShimIo {
|
||||
"new shim io stdin {:?} stdout {:?} stderr {:?}", stdin, stdout, stderr
|
||||
);
|
||||
|
||||
let set_flag_with_blocking = |fd: RawFd| {
|
||||
let flag = unsafe { libc::fcntl(fd, libc::F_GETFL) };
|
||||
let ret = unsafe { libc::fcntl(fd, libc::F_SETFL, flag & !libc::O_NONBLOCK) };
|
||||
if ret < 0 {
|
||||
error!(sl!(), "failed to set fcntl for fd {} error {}", fd, ret);
|
||||
}
|
||||
};
|
||||
|
||||
let stdin_fd: Option<Box<dyn AsyncRead + Send + Unpin>> = if let Some(stdin) = stdin {
|
||||
info!(sl!(), "open stdin {:?}", &stdin);
|
||||
|
||||
// Since the stdin peer point (which is hold by containerd) could not be openned
|
||||
// immediately, which would block here's open with block mode, and we wouldn't want to
|
||||
// block here, thus here opened with nonblock and then reset it to block mode for
|
||||
// tokio async io.
|
||||
match OpenOptions::new()
|
||||
.read(true)
|
||||
.write(false)
|
||||
@@ -61,7 +78,11 @@ impl ShimIo {
|
||||
.open(&stdin)
|
||||
.await
|
||||
{
|
||||
Ok(file) => Some(Box::new(file)),
|
||||
Ok(file) => {
|
||||
// Set it to blocking to avoid infinitely handling EAGAIN when the reader is empty
|
||||
set_flag_with_blocking(file.as_raw_fd());
|
||||
Some(Box::new(file))
|
||||
}
|
||||
Err(err) => {
|
||||
error!(sl!(), "failed to open {} error {:?}", &stdin, err);
|
||||
None
|
||||
|
||||
@@ -133,22 +133,14 @@ impl Process {
|
||||
let io_name = io_name.to_string();
|
||||
let logger = self.logger.new(o!("io_name" => io_name));
|
||||
let _ = tokio::spawn(async move {
|
||||
loop {
|
||||
match tokio::io::copy(&mut reader, &mut writer).await {
|
||||
Err(e) => {
|
||||
if let Some(error_code) = e.raw_os_error() {
|
||||
if error_code == libc::EAGAIN {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
warn!(logger, "run_io_copy: failed to copy stream: {}", e);
|
||||
}
|
||||
Ok(length) => {
|
||||
warn!(logger, "run_io_copy: stop to copy stream length {}", length)
|
||||
}
|
||||
};
|
||||
break;
|
||||
}
|
||||
match tokio::io::copy(&mut reader, &mut writer).await {
|
||||
Err(e) => {
|
||||
warn!(logger, "run_io_copy: failed to copy stream: {}", e);
|
||||
}
|
||||
Ok(length) => {
|
||||
info!(logger, "run_io_copy: stop to copy stream length {}", length)
|
||||
}
|
||||
};
|
||||
|
||||
wgw.done();
|
||||
});
|
||||
|
||||
@@ -21,7 +21,10 @@ use anyhow::{anyhow, Context, Result};
|
||||
use async_trait::async_trait;
|
||||
use common::{message::Message, RuntimeHandler, RuntimeInstance};
|
||||
use hypervisor::{dragonball::Dragonball, Hypervisor, HYPERVISOR_DRAGONBALL};
|
||||
use kata_types::config::{hypervisor::register_hypervisor_plugin, DragonballConfig, TomlConfig};
|
||||
use hypervisor::{qemu::Qemu, HYPERVISOR_QEMU};
|
||||
use kata_types::config::{
|
||||
hypervisor::register_hypervisor_plugin, DragonballConfig, QemuConfig, TomlConfig,
|
||||
};
|
||||
use resource::ResourceManager;
|
||||
use sandbox::VIRTCONTAINER;
|
||||
use tokio::sync::mpsc::Sender;
|
||||
@@ -36,6 +39,8 @@ impl RuntimeHandler for VirtContainer {
|
||||
// register
|
||||
let dragonball_config = Arc::new(DragonballConfig::new());
|
||||
register_hypervisor_plugin("dragonball", dragonball_config);
|
||||
let qemu_config = Arc::new(QemuConfig::new());
|
||||
register_hypervisor_plugin("qemu", qemu_config);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -106,6 +111,13 @@ async fn new_hypervisor(toml_config: &TomlConfig) -> Result<Arc<dyn Hypervisor>>
|
||||
.await;
|
||||
Ok(Arc::new(hypervisor))
|
||||
}
|
||||
HYPERVISOR_QEMU => {
|
||||
let mut hypervisor = Qemu::new();
|
||||
hypervisor
|
||||
.set_hypervisor_config(hypervisor_config.clone())
|
||||
.await;
|
||||
Ok(Arc::new(hypervisor))
|
||||
}
|
||||
_ => Err(anyhow!("Unsupported hypervisor {}", &hypervisor_name)),
|
||||
}
|
||||
}
|
||||
@@ -149,4 +161,27 @@ agent_name="kata"
|
||||
let res = new_agent(&toml_config);
|
||||
assert!(res.is_ok());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_new_hypervisor() {
|
||||
VirtContainer::init().unwrap();
|
||||
|
||||
let toml_config = {
|
||||
let config_content = r#"
|
||||
[hypervisor.qemu]
|
||||
path = "/bin/echo"
|
||||
kernel = "/bin/echo"
|
||||
image = "/bin/echo"
|
||||
firmware = ""
|
||||
|
||||
[runtime]
|
||||
hypervisor_name="qemu"
|
||||
"#;
|
||||
TomlConfig::load(config_content).map_err(|e| anyhow!("can not load config toml: {}", e))
|
||||
}
|
||||
.unwrap();
|
||||
|
||||
let res = new_hypervisor(&toml_config).await;
|
||||
assert!(res.is_ok());
|
||||
}
|
||||
}
|
||||
|
||||
14
src/runtime-rs/crates/shim-ctl/Cargo.toml
Normal file
14
src/runtime-rs/crates/shim-ctl/Cargo.toml
Normal file
@@ -0,0 +1,14 @@
|
||||
[package]
|
||||
name = "shim-ctl"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
anyhow = "^1.0"
|
||||
common = { path = "../runtimes/common" }
|
||||
logging = { path = "../../../libs/logging"}
|
||||
runtimes = { path = "../runtimes" }
|
||||
tokio = { version = "1.8.0", features = [ "rt", "rt-multi-thread" ] }
|
||||
|
||||
51
src/runtime-rs/crates/shim-ctl/README.md
Normal file
51
src/runtime-rs/crates/shim-ctl/README.md
Normal file
@@ -0,0 +1,51 @@
|
||||
### Purpose
|
||||
`shim-ctl` is a binary to exercise the shim proper without containerd
|
||||
dependencies.
|
||||
|
||||
The actual Kata shim is hard to execute outside of deployment environments due
|
||||
to its dependency on containerd's shim v2 protocol. Among others, the
|
||||
dependency requires having a socket with a remote end that's capable of driving
|
||||
the shim using the shim v2 `ttrpc` protocol, and a binary for shim to publish
|
||||
events to.
|
||||
|
||||
Since at least some of the shim v2 protocol dependencies are fairly hard to
|
||||
mock up, this presents a significant obstacle to development.
|
||||
|
||||
`shim-ctl` takes advantage of the fact that due to the shim implementation
|
||||
architecture, only the outermost couple of shim layers are
|
||||
containerd-dependent and all of the inner layers that do the actual heavy
|
||||
lifting don't depend on containerd. This allows `shim-ctl` to replace the
|
||||
containerd-dependent layers with something that's easier to use on a
|
||||
developer's machine.
|
||||
|
||||
### Usage
|
||||
|
||||
After building the binary as usual with `cargo build` run `shim-ctl` as follows.
|
||||
|
||||
Even though `shim-ctl` does away with containerd dependencies it still has
|
||||
some requirements of its execution environment. In particular, it needs a
|
||||
Kata `configuration.toml` file, some Kata distribution files to point a bunch
|
||||
of `configuration.toml` keys to (like hypervisor keys `path`, `kernel` or
|
||||
`initrd`) and a container bundle. These are however much easier to fulfill
|
||||
than the original containerd dependencies, and doing so is a one-off task -
|
||||
once done they can be reused for an unlimited number of modify-build-run
|
||||
development cycles.
|
||||
|
||||
`shim-ctl` also needs to be launched from an exported container bundle
|
||||
directory. One can be created by running
|
||||
|
||||
```
|
||||
mkdir rootfs
|
||||
podman export $(podman create busybox) | tar -C ./rootfs -xvf -
|
||||
runc spec -b .
|
||||
```
|
||||
|
||||
in a suitable directory.
|
||||
|
||||
The program can then be launched like this:
|
||||
|
||||
```
|
||||
cd /the/bundle/directory
|
||||
KATA_CONF_FILE=/path/to/configuration-qemu.toml /path/to/shim-ctl
|
||||
```
|
||||
|
||||
45
src/runtime-rs/crates/shim-ctl/src/main.rs
Normal file
45
src/runtime-rs/crates/shim-ctl/src/main.rs
Normal file
@@ -0,0 +1,45 @@
|
||||
// Copyright (c) 2022 Red Hat
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use common::{
|
||||
message::Message,
|
||||
types::{ContainerConfig, Request},
|
||||
};
|
||||
use runtimes::RuntimeHandlerManager;
|
||||
use tokio::sync::mpsc::channel;
|
||||
|
||||
const MESSAGE_BUFFER_SIZE: usize = 8;
|
||||
const WORKER_THREADS: usize = 2;
|
||||
|
||||
async fn real_main() {
|
||||
let (sender, _receiver) = channel::<Message>(MESSAGE_BUFFER_SIZE);
|
||||
let manager = RuntimeHandlerManager::new("xxx", sender).await.unwrap();
|
||||
|
||||
let req = Request::CreateContainer(ContainerConfig {
|
||||
container_id: "xxx".to_owned(),
|
||||
bundle: ".".to_owned(),
|
||||
rootfs_mounts: Vec::new(),
|
||||
terminal: false,
|
||||
options: None,
|
||||
stdin: None,
|
||||
stdout: None,
|
||||
stderr: None,
|
||||
});
|
||||
|
||||
manager.handler_message(req).await.ok();
|
||||
}
|
||||
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let runtime = tokio::runtime::Builder::new_multi_thread()
|
||||
.worker_threads(WORKER_THREADS)
|
||||
.enable_all()
|
||||
.build()
|
||||
.context("prepare tokio runtime")?;
|
||||
|
||||
runtime.block_on(real_main());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -197,6 +197,11 @@ DEFDISABLEGUESTEMPTYDIR := false
|
||||
DEFAULTEXPFEATURES := []
|
||||
|
||||
DEFDISABLESELINUX := false
|
||||
|
||||
# Default guest SELinux configuration
|
||||
DEFDISABLEGUESTSELINUX := true
|
||||
DEFGUESTSELINUXLABEL := system_u:system_r:container_t
|
||||
|
||||
#Default SeccomSandbox param
|
||||
#The same default policy is used by libvirt
|
||||
#More explanation on https://lists.gnu.org/archive/html/qemu-devel/2017-02/msg03348.html
|
||||
@@ -562,6 +567,8 @@ USER_VARS += DEFNETWORKMODEL_QEMU
|
||||
USER_VARS += DEFDISABLEGUESTEMPTYDIR
|
||||
USER_VARS += DEFDISABLEGUESTSECCOMP
|
||||
USER_VARS += DEFDISABLESELINUX
|
||||
USER_VARS += DEFDISABLEGUESTSELINUX
|
||||
USER_VARS += DEFGUESTSELINUXLABEL
|
||||
USER_VARS += DEFAULTEXPFEATURES
|
||||
USER_VARS += DEFDISABLEBLOCK
|
||||
USER_VARS += DEFBLOCKSTORAGEDRIVER_ACRN
|
||||
|
||||
@@ -76,6 +76,7 @@ type RuntimeConfigInfo struct {
|
||||
type RuntimeInfo struct {
|
||||
Config RuntimeConfigInfo
|
||||
Path string
|
||||
GuestSeLinuxLabel string
|
||||
Experimental []exp.Feature
|
||||
Version RuntimeVersionInfo
|
||||
Debug bool
|
||||
@@ -186,6 +187,7 @@ func getRuntimeInfo(configFile string, config oci.RuntimeConfig) RuntimeInfo {
|
||||
SandboxCgroupOnly: config.SandboxCgroupOnly,
|
||||
Experimental: config.Experimental,
|
||||
DisableGuestSeccomp: config.DisableGuestSeccomp,
|
||||
GuestSeLinuxLabel: config.GuestSeLinuxLabel,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -38,6 +38,13 @@ image = "@IMAGEPATH@"
|
||||
# disable applying SELinux on the VMM process (default false)
|
||||
disable_selinux=@DEFDISABLESELINUX@
|
||||
|
||||
# disable applying SELinux on the container process
|
||||
# If set to false, the type `container_t` is applied to the container process by default.
|
||||
# Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built
|
||||
# with `SELINUX=yes`.
|
||||
# (default: true)
|
||||
disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
|
||||
|
||||
# Path to the firmware.
|
||||
# If you want Cloud Hypervisor to use a specific firmware, set its path below.
|
||||
# This is option is only used when confidential_guest is enabled.
|
||||
@@ -321,6 +328,14 @@ internetworking_model="@DEFNETWORKMODEL_CLH@"
|
||||
# (default: true)
|
||||
disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
|
||||
|
||||
# Apply a custom SELinux security policy to the container process inside the VM.
|
||||
# This is used when you want to apply a type other than the default `container_t`,
|
||||
# so general users should not uncomment and apply it.
|
||||
# (format: "user:role:type")
|
||||
# Note: You cannot specify MCS policy with the label because the sensitivity levels and
|
||||
# categories are determined automatically by high-level container runtimes such as containerd.
|
||||
#guest_selinux_label="@DEFGUESTSELINUXLABEL@"
|
||||
|
||||
# If enabled, the runtime will create opentracing.io traces and spans.
|
||||
# (See https://www.jaegertracing.io/docs/getting-started).
|
||||
# (default: disabled)
|
||||
|
||||
@@ -463,6 +463,14 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
|
||||
# disable applying SELinux on the VMM process (default false)
|
||||
disable_selinux=@DEFDISABLESELINUX@
|
||||
|
||||
# disable applying SELinux on the container process
|
||||
# If set to false, the type `container_t` is applied to the container process by default.
|
||||
# Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built
|
||||
# with `SELINUX=yes`.
|
||||
# (default: true)
|
||||
disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
|
||||
|
||||
|
||||
[factory]
|
||||
# VM templating support. Once enabled, new VMs are created from template
|
||||
# using vm cloning. They will share the same initial kernel, initramfs and
|
||||
@@ -580,6 +588,14 @@ internetworking_model="@DEFNETWORKMODEL_QEMU@"
|
||||
# (default: true)
|
||||
disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
|
||||
|
||||
# Apply a custom SELinux security policy to the container process inside the VM.
|
||||
# This is used when you want to apply a type other than the default `container_t`,
|
||||
# so general users should not uncomment and apply it.
|
||||
# (format: "user:role:type")
|
||||
# Note: You cannot specify MCS policy with the label because the sensitivity levels and
|
||||
# categories are determined automatically by high-level container runtimes such as containerd.
|
||||
#guest_selinux_label="@DEFGUESTSELINUXLABEL@"
|
||||
|
||||
# If enabled, the runtime will create opentracing.io traces and spans.
|
||||
# (See https://www.jaegertracing.io/docs/getting-started).
|
||||
# (default: disabled)
|
||||
|
||||
@@ -90,6 +90,7 @@ const defaultSevSnpGuest = false
|
||||
const defaultGuestSwap = false
|
||||
const defaultRootlessHypervisor = false
|
||||
const defaultDisableSeccomp = false
|
||||
const defaultDisableGuestSeLinux = true
|
||||
const defaultVfioMode = "guest-kernel"
|
||||
const defaultLegacySerial = false
|
||||
const defaultGuestPreAttestation = false
|
||||
|
||||
@@ -60,9 +60,9 @@ const (
|
||||
type tomlConfig struct {
|
||||
Hypervisor map[string]hypervisor
|
||||
Agent map[string]agent
|
||||
Runtime runtime
|
||||
Image image
|
||||
Factory factory
|
||||
Runtime runtime
|
||||
}
|
||||
|
||||
type image struct {
|
||||
@@ -164,6 +164,7 @@ type hypervisor struct {
|
||||
Rootless bool `toml:"rootless"`
|
||||
DisableSeccomp bool `toml:"disable_seccomp"`
|
||||
DisableSeLinux bool `toml:"disable_selinux"`
|
||||
DisableGuestSeLinux bool `toml:"disable_guest_selinux"`
|
||||
LegacySerial bool `toml:"use_legacy_serial"`
|
||||
GuestPreAttestation bool `toml:"guest_pre_attestation"`
|
||||
EnableVCPUsPinning bool `toml:"enable_vcpus_pinning"`
|
||||
@@ -175,12 +176,13 @@ type runtime struct {
|
||||
JaegerUser string `toml:"jaeger_user"`
|
||||
JaegerPassword string `toml:"jaeger_password"`
|
||||
VfioMode string `toml:"vfio_mode"`
|
||||
GuestSeLinuxLabel string `toml:"guest_selinux_label"`
|
||||
SandboxBindMounts []string `toml:"sandbox_bind_mounts"`
|
||||
Experimental []string `toml:"experimental"`
|
||||
Debug bool `toml:"enable_debug"`
|
||||
Tracing bool `toml:"enable_tracing"`
|
||||
DisableNewNetNs bool `toml:"disable_new_netns"`
|
||||
DisableGuestSeccomp bool `toml:"disable_guest_seccomp"`
|
||||
Debug bool `toml:"enable_debug"`
|
||||
SandboxCgroupOnly bool `toml:"sandbox_cgroup_only"`
|
||||
StaticSandboxResourceMgmt bool `toml:"static_sandbox_resource_mgmt"`
|
||||
EnablePprof bool `toml:"enable_pprof"`
|
||||
@@ -701,6 +703,7 @@ func newFirecrackerHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
|
||||
TxRateLimiterMaxRate: txRateLimiterMaxRate,
|
||||
EnableAnnotations: h.EnableAnnotations,
|
||||
DisableSeLinux: h.DisableSeLinux,
|
||||
DisableGuestSeLinux: true, // Guest SELinux is not supported in Firecracker
|
||||
}, nil
|
||||
}
|
||||
|
||||
@@ -854,6 +857,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
|
||||
SEVGuestPolicy: h.SEVGuestPolicy,
|
||||
SEVCertChainPath: h.SEVCertChainPath,
|
||||
EnableVCPUsPinning: h.EnableVCPUsPinning,
|
||||
DisableGuestSeLinux: h.DisableGuestSeLinux,
|
||||
}, nil
|
||||
}
|
||||
|
||||
@@ -920,6 +924,7 @@ func newAcrnHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
|
||||
GuestHookPath: h.guestHookPath(),
|
||||
DisableSeLinux: h.DisableSeLinux,
|
||||
EnableAnnotations: h.EnableAnnotations,
|
||||
DisableGuestSeLinux: true, // Guest SELinux is not supported in ACRN
|
||||
}, nil
|
||||
}
|
||||
|
||||
@@ -1025,6 +1030,7 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
|
||||
DisableSeccomp: h.DisableSeccomp,
|
||||
ConfidentialGuest: h.ConfidentialGuest,
|
||||
DisableSeLinux: h.DisableSeLinux,
|
||||
DisableGuestSeLinux: h.DisableGuestSeLinux,
|
||||
NetRateLimiterBwMaxRate: h.getNetRateLimiterBwMaxRate(),
|
||||
NetRateLimiterBwOneTimeBurst: h.getNetRateLimiterBwOneTimeBurst(),
|
||||
NetRateLimiterOpsMaxRate: h.getNetRateLimiterOpsMaxRate(),
|
||||
@@ -1262,6 +1268,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig {
|
||||
GuestSwap: defaultGuestSwap,
|
||||
Rootless: defaultRootlessHypervisor,
|
||||
DisableSeccomp: defaultDisableSeccomp,
|
||||
DisableGuestSeLinux: defaultDisableGuestSeLinux,
|
||||
LegacySerial: defaultLegacySerial,
|
||||
GuestPreAttestation: defaultGuestPreAttestation,
|
||||
GuestPreAttestationProxy: defaultGuestPreAttestationProxy,
|
||||
@@ -1356,7 +1363,7 @@ func LoadConfiguration(configPath string, ignoreLogging bool) (resolvedConfigPat
|
||||
}
|
||||
|
||||
config.DisableGuestSeccomp = tomlConf.Runtime.DisableGuestSeccomp
|
||||
|
||||
config.GuestSeLinuxLabel = tomlConf.Runtime.GuestSeLinuxLabel
|
||||
config.StaticSandboxResourceMgmt = tomlConf.Runtime.StaticSandboxResourceMgmt
|
||||
config.SandboxCgroupOnly = tomlConf.Runtime.SandboxCgroupOnly
|
||||
config.DisableNewNetNs = tomlConf.Runtime.DisableNewNetNs
|
||||
|
||||
@@ -554,6 +554,7 @@ func TestMinimalRuntimeConfig(t *testing.T) {
|
||||
VhostUserStorePath: defaultVhostUserStorePath,
|
||||
VirtioFSCache: defaultVirtioFSCacheMode,
|
||||
BlockDeviceAIO: defaultBlockDeviceAIO,
|
||||
DisableGuestSeLinux: defaultDisableGuestSeLinux,
|
||||
}
|
||||
|
||||
expectedAgentConfig := vc.KataAgentConfig{
|
||||
|
||||
@@ -128,6 +128,9 @@ type RuntimeConfig struct {
|
||||
//Determines if seccomp should be applied inside guest
|
||||
DisableGuestSeccomp bool
|
||||
|
||||
//SELinux security context applied to the container process inside guest.
|
||||
GuestSeLinuxLabel string
|
||||
|
||||
// Sandbox sizing information which, if provided, indicates the size of
|
||||
// the sandbox needed for the workload(s)
|
||||
SandboxCPUs uint32
|
||||
@@ -948,6 +951,8 @@ func SandboxConfig(ocispec specs.Spec, runtime RuntimeConfig, bundlePath, cid st
|
||||
|
||||
DisableGuestSeccomp: runtime.DisableGuestSeccomp,
|
||||
|
||||
GuestSeLinuxLabel: runtime.GuestSeLinuxLabel,
|
||||
|
||||
Experimental: runtime.Experimental,
|
||||
|
||||
ServiceOffload: runtime.ServiceOffload,
|
||||
|
||||
@@ -77,6 +77,8 @@ const (
|
||||
MinHypervisorMemory = 256
|
||||
|
||||
defaultMsize9p = 8192
|
||||
|
||||
defaultDisableGuestSeLinux = true
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -356,6 +358,7 @@ type HypervisorConfig struct {
|
||||
Rootless bool
|
||||
DisableSeccomp bool
|
||||
DisableSeLinux bool
|
||||
DisableGuestSeLinux bool
|
||||
LegacySerial bool
|
||||
EnableVCPUsPinning bool
|
||||
}
|
||||
|
||||
@@ -92,22 +92,24 @@ func TestHypervisorConfigValidTemplateConfig(t *testing.T) {
|
||||
func TestHypervisorConfigDefaults(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
hypervisorConfig := &HypervisorConfig{
|
||||
KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel),
|
||||
ImagePath: fmt.Sprintf("%s/%s", testDir, testImage),
|
||||
HypervisorPath: "",
|
||||
KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel),
|
||||
ImagePath: fmt.Sprintf("%s/%s", testDir, testImage),
|
||||
HypervisorPath: "",
|
||||
DisableGuestSeLinux: defaultDisableGuestSeLinux,
|
||||
}
|
||||
testHypervisorConfigValid(t, hypervisorConfig, true)
|
||||
|
||||
hypervisorConfigDefaultsExpected := &HypervisorConfig{
|
||||
KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel),
|
||||
ImagePath: fmt.Sprintf("%s/%s", testDir, testImage),
|
||||
HypervisorPath: "",
|
||||
NumVCPUs: defaultVCPUs,
|
||||
MemorySize: defaultMemSzMiB,
|
||||
DefaultBridges: defaultBridges,
|
||||
BlockDeviceDriver: defaultBlockDriver,
|
||||
DefaultMaxVCPUs: defaultMaxVCPUs,
|
||||
Msize9p: defaultMsize9p,
|
||||
KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel),
|
||||
ImagePath: fmt.Sprintf("%s/%s", testDir, testImage),
|
||||
HypervisorPath: "",
|
||||
NumVCPUs: defaultVCPUs,
|
||||
MemorySize: defaultMemSzMiB,
|
||||
DefaultBridges: defaultBridges,
|
||||
BlockDeviceDriver: defaultBlockDriver,
|
||||
DefaultMaxVCPUs: defaultMaxVCPUs,
|
||||
Msize9p: defaultMsize9p,
|
||||
DisableGuestSeLinux: defaultDisableGuestSeLinux,
|
||||
}
|
||||
|
||||
assert.Exactly(hypervisorConfig, hypervisorConfigDefaultsExpected)
|
||||
|
||||
@@ -37,6 +37,7 @@ import (
|
||||
"context"
|
||||
"github.com/gogo/protobuf/proto"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/opencontainers/selinux/go-selinux"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
"google.golang.org/grpc/codes"
|
||||
@@ -70,6 +71,9 @@ const (
|
||||
kernelParamDebugConsole = "agent.debug_console"
|
||||
kernelParamDebugConsoleVPort = "agent.debug_console_vport"
|
||||
kernelParamDebugConsoleVPortValue = "1026"
|
||||
|
||||
// Default SELinux type applied to the container process inside guest
|
||||
defaultSeLinuxContainerType = "container_t"
|
||||
)
|
||||
|
||||
type customRequestTimeoutKeyType struct{}
|
||||
@@ -906,7 +910,7 @@ func (k *kataAgent) removeIgnoredOCIMount(spec *specs.Spec, ignoredMounts map[st
|
||||
return nil
|
||||
}
|
||||
|
||||
func (k *kataAgent) constrainGRPCSpec(grpcSpec *grpc.Spec, passSeccomp bool, stripVfio bool) {
|
||||
func (k *kataAgent) constrainGRPCSpec(grpcSpec *grpc.Spec, passSeccomp bool, disableGuestSeLinux bool, guestSeLinuxLabel string, stripVfio bool) error {
|
||||
// Disable Hooks since they have been handled on the host and there is
|
||||
// no reason to send them to the agent. It would make no sense to try
|
||||
// to apply them on the guest.
|
||||
@@ -918,11 +922,34 @@ func (k *kataAgent) constrainGRPCSpec(grpcSpec *grpc.Spec, passSeccomp bool, str
|
||||
grpcSpec.Linux.Seccomp = nil
|
||||
}
|
||||
|
||||
// Disable SELinux inside of the virtual machine, the label will apply
|
||||
// to the KVM process
|
||||
// Pass SELinux label for the container process to the agent.
|
||||
if grpcSpec.Process.SelinuxLabel != "" {
|
||||
k.Logger().Info("SELinux label from config will be applied to the hypervisor process, not the VM workload")
|
||||
grpcSpec.Process.SelinuxLabel = ""
|
||||
if !disableGuestSeLinux {
|
||||
k.Logger().Info("SELinux label will be applied to the container process inside guest")
|
||||
|
||||
var label string
|
||||
if guestSeLinuxLabel != "" {
|
||||
label = guestSeLinuxLabel
|
||||
} else {
|
||||
label = grpcSpec.Process.SelinuxLabel
|
||||
}
|
||||
|
||||
processContext, err := selinux.NewContext(label)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Change the type from KVM to container because the type passed from the high-level
|
||||
// runtime is for KVM process.
|
||||
if guestSeLinuxLabel == "" {
|
||||
processContext["type"] = defaultSeLinuxContainerType
|
||||
}
|
||||
grpcSpec.Process.SelinuxLabel = processContext.Get()
|
||||
} else {
|
||||
k.Logger().Info("Empty SELinux label for the process and the mount because guest SELinux is disabled")
|
||||
grpcSpec.Process.SelinuxLabel = ""
|
||||
grpcSpec.Linux.MountLabel = ""
|
||||
}
|
||||
}
|
||||
|
||||
// By now only CPU constraints are supported
|
||||
@@ -984,6 +1011,8 @@ func (k *kataAgent) constrainGRPCSpec(grpcSpec *grpc.Spec, passSeccomp bool, str
|
||||
}
|
||||
grpcSpec.Linux.Devices = linuxDevices
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (k *kataAgent) handleShm(mounts []specs.Mount, sandbox *Sandbox) {
|
||||
@@ -1267,9 +1296,20 @@ func (k *kataAgent) createContainer(ctx context.Context, sandbox *Sandbox, c *Co
|
||||
|
||||
passSeccomp := !sandbox.config.DisableGuestSeccomp && sandbox.seccompSupported
|
||||
|
||||
// Currently, guest SELinux can be enabled only when SELinux is enabled on the host side.
|
||||
if !sandbox.config.HypervisorConfig.DisableGuestSeLinux && !selinux.GetEnabled() {
|
||||
return nil, fmt.Errorf("Guest SELinux is enabled, but SELinux is disabled on the host side")
|
||||
}
|
||||
if sandbox.config.HypervisorConfig.DisableGuestSeLinux && sandbox.config.GuestSeLinuxLabel != "" {
|
||||
return nil, fmt.Errorf("Custom SELinux security policy is provided, but guest SELinux is disabled")
|
||||
}
|
||||
|
||||
// We need to constrain the spec to make sure we're not
|
||||
// passing irrelevant information to the agent.
|
||||
k.constrainGRPCSpec(grpcSpec, passSeccomp, sandbox.config.VfioMode == config.VFIOModeGuestKernel)
|
||||
err = k.constrainGRPCSpec(grpcSpec, passSeccomp, sandbox.config.HypervisorConfig.DisableGuestSeLinux, sandbox.config.GuestSeLinuxLabel, sandbox.config.VfioMode == config.VFIOModeGuestKernel)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
req := &grpc.CreateContainerRequest{
|
||||
ContainerId: c.id,
|
||||
|
||||
@@ -619,7 +619,7 @@ func TestConstrainGRPCSpec(t *testing.T) {
|
||||
}
|
||||
|
||||
k := kataAgent{}
|
||||
k.constrainGRPCSpec(g, true, true)
|
||||
k.constrainGRPCSpec(g, true, true, "", true)
|
||||
|
||||
// Check nil fields
|
||||
assert.Nil(g.Hooks)
|
||||
|
||||
@@ -189,6 +189,7 @@ func (s *Sandbox) dumpConfig(ss *persistapi.SandboxState) {
|
||||
SystemdCgroup: sconfig.SystemdCgroup,
|
||||
SandboxCgroupOnly: sconfig.SandboxCgroupOnly,
|
||||
DisableGuestSeccomp: sconfig.DisableGuestSeccomp,
|
||||
GuestSeLinuxLabel: sconfig.GuestSeLinuxLabel,
|
||||
}
|
||||
|
||||
ss.Config.SandboxBindMounts = append(ss.Config.SandboxBindMounts, sconfig.SandboxBindMounts...)
|
||||
@@ -429,6 +430,7 @@ func loadSandboxConfig(id string) (*SandboxConfig, error) {
|
||||
SystemdCgroup: savedConf.SystemdCgroup,
|
||||
SandboxCgroupOnly: savedConf.SandboxCgroupOnly,
|
||||
DisableGuestSeccomp: savedConf.DisableGuestSeccomp,
|
||||
GuestSeLinuxLabel: savedConf.GuestSeLinuxLabel,
|
||||
}
|
||||
sconfig.SandboxBindMounts = append(sconfig.SandboxBindMounts, savedConf.SandboxBindMounts...)
|
||||
|
||||
|
||||
@@ -243,19 +243,6 @@ type ContainerConfig struct {
|
||||
// SandboxConfig is a sandbox configuration.
|
||||
// Refs: virtcontainers/sandbox.go:SandboxConfig
|
||||
type SandboxConfig struct {
|
||||
// Information for fields not saved:
|
||||
// * Annotation: this is kind of casual data, we don't need casual data in persist file,
|
||||
// if you know this data needs to persist, please gives it
|
||||
// a specific field
|
||||
|
||||
ContainerConfigs []ContainerConfig
|
||||
|
||||
// SandboxBindMounts - list of paths to mount into guest
|
||||
SandboxBindMounts []string
|
||||
|
||||
// Experimental enables experimental features
|
||||
Experimental []string
|
||||
|
||||
// Cgroups specifies specific cgroup settings for the various subsystems that the container is
|
||||
// placed into to limit the resources the container has available
|
||||
Cgroups *configs.Cgroup `json:"cgroups"`
|
||||
@@ -265,8 +252,24 @@ type SandboxConfig struct {
|
||||
|
||||
KataShimConfig *ShimConfig
|
||||
|
||||
HypervisorType string
|
||||
NetworkConfig NetworkConfig
|
||||
// Custom SELinux security policy to the container process inside the VM
|
||||
GuestSeLinuxLabel string
|
||||
|
||||
HypervisorType string
|
||||
|
||||
// SandboxBindMounts - list of paths to mount into guest
|
||||
SandboxBindMounts []string
|
||||
|
||||
// Experimental enables experimental features
|
||||
Experimental []string
|
||||
|
||||
// Information for fields not saved:
|
||||
// * Annotation: this is kind of casual data, we don't need casual data in persist file,
|
||||
// if you know this data needs to persist, please gives it a specific field
|
||||
ContainerConfigs []ContainerConfig
|
||||
|
||||
NetworkConfig NetworkConfig
|
||||
|
||||
HypervisorConfig HypervisorConfig
|
||||
|
||||
ShmSize uint64
|
||||
|
||||
@@ -247,6 +247,9 @@ const (
|
||||
// DisableGuestSeccomp is a sandbox annotation that determines if seccomp should be applied inside guest.
|
||||
DisableGuestSeccomp = kataAnnotRuntimePrefix + "disable_guest_seccomp"
|
||||
|
||||
// GuestSeLinuxLabel is a SELinux security policy that is applied to a container process inside guest.
|
||||
GuestSeLinuxLabel = kataAnnotRuntimePrefix + "guest_selinux_label"
|
||||
|
||||
// SandboxCgroupOnly is a sandbox annotation that determines if kata processes are managed only in sandbox cgroup.
|
||||
SandboxCgroupOnly = kataAnnotRuntimePrefix + "sandbox_cgroup_only"
|
||||
|
||||
|
||||
@@ -170,6 +170,15 @@ func (q *qemu) kernelParameters() string {
|
||||
// set the maximum number of vCPUs
|
||||
params = append(params, Param{"nr_cpus", fmt.Sprintf("%d", q.config.DefaultMaxVCPUs)})
|
||||
|
||||
// set the SELinux params in accordance with the runtime configuration, disable_guest_selinux.
|
||||
if q.config.DisableGuestSeLinux {
|
||||
q.Logger().Info("Set selinux=0 to kernel params because SELinux on the guest is disabled")
|
||||
params = append(params, Param{"selinux", "0"})
|
||||
} else {
|
||||
q.Logger().Info("Set selinux=1 to kernel params because SELinux on the guest is enabled")
|
||||
params = append(params, Param{"selinux", "1"})
|
||||
}
|
||||
|
||||
// add the params specified by the provided config. As the kernel
|
||||
// honours the last parameter value set and since the config-provided
|
||||
// params are added here, they will take priority over the defaults.
|
||||
@@ -465,6 +474,13 @@ func (q *qemu) createVirtiofsDaemon(sharedPath string) (VirtiofsDaemon, error) {
|
||||
return nd, nil
|
||||
}
|
||||
|
||||
// Set the xattr option for virtiofsd daemon to enable extended attributes
|
||||
// in virtiofs if SELinux on the guest side is enabled.
|
||||
if !q.config.DisableGuestSeLinux {
|
||||
q.Logger().Info("Set the xattr option for virtiofsd")
|
||||
q.config.VirtioFSExtraArgs = append(q.config.VirtioFSExtraArgs, "-o", "xattr")
|
||||
}
|
||||
|
||||
// default use virtiofsd
|
||||
return &virtiofsd{
|
||||
path: q.config.VirtioFSDaemon,
|
||||
@@ -914,7 +930,6 @@ func (q *qemu) StartVM(ctx context.Context, timeout int) error {
|
||||
// the SELinux label. If these processes require privileged, we do
|
||||
// notwant to run them under confinement.
|
||||
if !q.config.DisableSeLinux {
|
||||
|
||||
if err := label.SetProcessLabel(q.config.SELinuxProcessLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -27,15 +27,16 @@ import (
|
||||
|
||||
func newQemuConfig() HypervisorConfig {
|
||||
return HypervisorConfig{
|
||||
KernelPath: testQemuKernelPath,
|
||||
InitrdPath: testQemuInitrdPath,
|
||||
HypervisorPath: testQemuPath,
|
||||
NumVCPUs: defaultVCPUs,
|
||||
MemorySize: defaultMemSzMiB,
|
||||
DefaultBridges: defaultBridges,
|
||||
BlockDeviceDriver: defaultBlockDriver,
|
||||
DefaultMaxVCPUs: defaultMaxVCPUs,
|
||||
Msize9p: defaultMsize9p,
|
||||
KernelPath: testQemuKernelPath,
|
||||
InitrdPath: testQemuInitrdPath,
|
||||
HypervisorPath: testQemuPath,
|
||||
NumVCPUs: defaultVCPUs,
|
||||
MemorySize: defaultMemSzMiB,
|
||||
DefaultBridges: defaultBridges,
|
||||
BlockDeviceDriver: defaultBlockDriver,
|
||||
DefaultMaxVCPUs: defaultMaxVCPUs,
|
||||
Msize9p: defaultMsize9p,
|
||||
DisableGuestSeLinux: defaultDisableGuestSeLinux,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -58,7 +59,7 @@ func testQemuKernelParameters(t *testing.T, kernelParams []Param, expected strin
|
||||
}
|
||||
|
||||
func TestQemuKernelParameters(t *testing.T) {
|
||||
expectedOut := fmt.Sprintf("panic=1 nr_cpus=%d foo=foo bar=bar", govmm.MaxVCPUs())
|
||||
expectedOut := fmt.Sprintf("panic=1 nr_cpus=%d selinux=0 foo=foo bar=bar", govmm.MaxVCPUs())
|
||||
params := []Param{
|
||||
{
|
||||
Key: "foo",
|
||||
|
||||
@@ -135,6 +135,8 @@ type SandboxConfig struct {
|
||||
Hostname string
|
||||
ID string
|
||||
HypervisorType HypervisorType
|
||||
// Custom SELinux security policy to the container process inside the VM
|
||||
GuestSeLinuxLabel string
|
||||
// Volumes is a list of shared volumes between the host and the Sandbox.
|
||||
Volumes []types.Volume
|
||||
// SandboxBindMounts - list of paths to mount into guest
|
||||
|
||||
@@ -25,6 +25,8 @@ const cpBinaryName = "cp"
|
||||
|
||||
const fileMode0755 = os.FileMode(0755)
|
||||
|
||||
const maxWaitDelay = 50 * time.Millisecond
|
||||
|
||||
// The DefaultRateLimiterRefillTime is used for calculating the rate at
|
||||
// which a TokenBucket is replinished, in cases where a RateLimiter is
|
||||
// applied to either network or disk I/O.
|
||||
@@ -306,6 +308,44 @@ func ConvertAddressFamily(family int32) pbTypes.IPFamily {
|
||||
}
|
||||
}
|
||||
|
||||
func waitProcessUsingWaitLoop(pid int, timeoutSecs uint, logger *logrus.Entry) bool {
|
||||
secs := time.Duration(timeoutSecs) * time.Second
|
||||
timeout := time.After(secs)
|
||||
delay := 1 * time.Millisecond
|
||||
|
||||
for {
|
||||
// Wait4 is used to reap and check that a child terminated.
|
||||
// Without the Wait4 call, Kill(0) for a child will always exit without
|
||||
// error because the process isn't reaped.
|
||||
// Wait4 return ECHLD error for non-child processes. Kill(0) is meant
|
||||
// to address this case, once the process is reaped by init process,
|
||||
// the call will return ESRCH error.
|
||||
|
||||
// "A watched pot never boils" and an unwaited-for process never appears to die!
|
||||
waitedPid, err := syscall.Wait4(pid, nil, syscall.WNOHANG, nil)
|
||||
|
||||
if waitedPid == pid && err == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
if err := syscall.Kill(pid, syscall.Signal(0)); err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
select {
|
||||
case <-time.After(delay):
|
||||
delay = delay * 5
|
||||
|
||||
if delay > maxWaitDelay {
|
||||
delay = maxWaitDelay
|
||||
}
|
||||
case <-timeout:
|
||||
logger.Warnf("process %v still running after waiting %ds", pid, timeoutSecs)
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// WaitLocalProcess waits for the specified process for up to timeoutSecs seconds.
|
||||
//
|
||||
// Notes:
|
||||
@@ -334,49 +374,24 @@ func WaitLocalProcess(pid int, timeoutSecs uint, initialSignal syscall.Signal, l
|
||||
}
|
||||
}
|
||||
|
||||
pidRunning := true
|
||||
|
||||
secs := time.Duration(timeoutSecs)
|
||||
timeout := time.After(secs * time.Second)
|
||||
|
||||
// Wait for the VM process to terminate
|
||||
outer:
|
||||
for {
|
||||
select {
|
||||
case <-time.After(50 * time.Millisecond):
|
||||
// Check if the process is running periodically to avoid a busy loop
|
||||
|
||||
var _status syscall.WaitStatus
|
||||
var _rusage syscall.Rusage
|
||||
var waitedPid int
|
||||
|
||||
// "A watched pot never boils" and an unwaited-for process never appears to die!
|
||||
waitedPid, err = syscall.Wait4(pid, &_status, syscall.WNOHANG, &_rusage)
|
||||
|
||||
if waitedPid == pid && err == nil {
|
||||
pidRunning = false
|
||||
break outer
|
||||
}
|
||||
|
||||
if err = syscall.Kill(pid, syscall.Signal(0)); err != nil {
|
||||
pidRunning = false
|
||||
break outer
|
||||
}
|
||||
|
||||
break
|
||||
|
||||
case <-timeout:
|
||||
logger.Warnf("process %v still running after waiting %ds", pid, timeoutSecs)
|
||||
|
||||
break outer
|
||||
}
|
||||
}
|
||||
pidRunning := waitForProcessCompletion(pid, timeoutSecs, logger)
|
||||
|
||||
if pidRunning {
|
||||
// Force process to die
|
||||
if err = syscall.Kill(pid, syscall.SIGKILL); err != nil {
|
||||
if err == syscall.ESRCH {
|
||||
logger.WithField("pid", pid).Warnf("process already finished")
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("Failed to stop process %v: %s", pid, err)
|
||||
}
|
||||
|
||||
for {
|
||||
_, err := syscall.Wait4(pid, nil, 0, nil)
|
||||
if err != syscall.EINTR {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
|
||||
@@ -5,6 +5,12 @@
|
||||
|
||||
package utils
|
||||
|
||||
import "github.com/sirupsen/logrus"
|
||||
|
||||
func GetDevicePathAndFsTypeOptions(mountPoint string) (devicePath, fsType string, fsOptions []string, err error) {
|
||||
return
|
||||
}
|
||||
|
||||
func waitForProcessCompletion(pid int, timeoutSecs uint, logger *logrus.Entry) bool {
|
||||
return waitProcessUsingWaitLoop(pid, timeoutSecs, logger)
|
||||
}
|
||||
|
||||
@@ -14,8 +14,10 @@ import (
|
||||
"os"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
"unsafe"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
@@ -151,3 +153,60 @@ func IsAPVFIOMediatedDevice(sysfsdev string) bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func waitProcessUsingPidfd(pid int, timeoutSecs uint, logger *logrus.Entry) (bool, error) {
|
||||
pidfd, err := unix.PidfdOpen(pid, 0)
|
||||
|
||||
if err != nil {
|
||||
if err == unix.ESRCH {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
return true, err
|
||||
}
|
||||
|
||||
defer unix.Close(pidfd)
|
||||
var n int
|
||||
|
||||
maxDelay := time.Duration(timeoutSecs) * time.Second
|
||||
end := time.Now().Add(maxDelay)
|
||||
|
||||
for {
|
||||
remaining := time.Until(end).Milliseconds()
|
||||
if remaining < 0 {
|
||||
remaining = 0
|
||||
}
|
||||
|
||||
n, err = unix.Poll([]unix.PollFd{{Fd: int32(pidfd), Events: unix.POLLIN}}, int(remaining))
|
||||
if err != unix.EINTR {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil || n != 1 {
|
||||
logger.Warnf("process %v still running after waiting %ds", pid, timeoutSecs)
|
||||
return true, err
|
||||
}
|
||||
|
||||
for {
|
||||
err := unix.Waitid(unix.P_PIDFD, pidfd, nil, unix.WEXITED, nil)
|
||||
if err == unix.EINVAL {
|
||||
err = unix.Waitid(unix.P_PID, pid, nil, unix.WEXITED, nil)
|
||||
}
|
||||
|
||||
if err != unix.EINTR {
|
||||
break
|
||||
}
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
|
||||
func waitForProcessCompletion(pid int, timeoutSecs uint, logger *logrus.Entry) bool {
|
||||
pidRunning, err := waitProcessUsingPidfd(pid, timeoutSecs, logger)
|
||||
|
||||
if err == unix.ENOSYS {
|
||||
pidRunning = waitProcessUsingWaitLoop(pid, timeoutSecs, logger)
|
||||
}
|
||||
|
||||
return pidRunning
|
||||
}
|
||||
|
||||
@@ -337,7 +337,6 @@ impl ContainerLauncher {
|
||||
self.runner
|
||||
.cgroup_manager
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.as_any()?
|
||||
.downcast_ref::<CgroupManager>()
|
||||
.unwrap()
|
||||
|
||||
@@ -65,6 +65,8 @@ readonly -a systemd_files=(
|
||||
|
||||
# Set a default value
|
||||
AGENT_INIT=${AGENT_INIT:-no}
|
||||
SELINUX=${SELINUX:-no}
|
||||
SELINUXFS="/sys/fs/selinux"
|
||||
|
||||
# Align image to 128M
|
||||
readonly mem_boundary_mb=128
|
||||
@@ -94,6 +96,10 @@ Extra environment variables:
|
||||
DEFAULT: not set
|
||||
USE_PODMAN: If set and USE_DOCKER not set, will build image in a Podman Container (requries podman)
|
||||
DEFAULT: not set
|
||||
SELINUX: If set to "yes", the rootfs is labeled for SELinux.
|
||||
Make sure that selinuxfs is mounted to /sys/fs/selinux on the host
|
||||
and the rootfs is built with SELINUX=yes.
|
||||
DEFAULT value: "no"
|
||||
|
||||
|
||||
Following diagram shows how the resulting image will look like
|
||||
@@ -135,6 +141,7 @@ build_with_container() {
|
||||
local nsdax_bin="$9"
|
||||
local container_image_name="image-builder-osbuilder"
|
||||
local shared_files=""
|
||||
local selinuxfs=""
|
||||
|
||||
image_dir=$(readlink -f "$(dirname "${image}")")
|
||||
image_name=$(basename "${image}")
|
||||
@@ -158,6 +165,14 @@ build_with_container() {
|
||||
shared_files+="-v ${mke2fs_conf}:${mke2fs_conf}:ro "
|
||||
fi
|
||||
|
||||
if [ "${SELINUX}" == "yes" ]; then
|
||||
if mountpoint $SELINUXFS > /dev/null; then
|
||||
selinuxfs="-v ${SELINUXFS}:${SELINUXFS}"
|
||||
else
|
||||
die "Make sure that SELinux is enabled on the host"
|
||||
fi
|
||||
fi
|
||||
|
||||
#Make sure we use a compatible runtime to build rootfs
|
||||
# In case Clear Containers Runtime is installed we dont want to hit issue:
|
||||
#https://github.com/clearcontainers/runtime/issues/828
|
||||
@@ -172,12 +187,14 @@ build_with_container() {
|
||||
--env ROOT_FREE_SPACE="${root_free_space}" \
|
||||
--env NSDAX_BIN="${nsdax_bin}" \
|
||||
--env KATA_BUILD_CC="${KATA_BUILD_CC}" \
|
||||
--env SELINUX="${SELINUX}" \
|
||||
--env DEBUG="${DEBUG}" \
|
||||
-v /dev:/dev \
|
||||
-v "${script_dir}":"/osbuilder" \
|
||||
-v "${script_dir}/../scripts":"/scripts" \
|
||||
-v "${rootfs}":"/rootfs" \
|
||||
-v "${image_dir}":"/image" \
|
||||
${selinuxfs} \
|
||||
${shared_files} \
|
||||
${container_image_name} \
|
||||
bash "/osbuilder/${script_name}" -o "/image/${image_name}" /rootfs
|
||||
@@ -398,6 +415,7 @@ create_rootfs_image() {
|
||||
local img_size="$3"
|
||||
local fs_type="$4"
|
||||
local block_size="$5"
|
||||
local agent_bin="$6"
|
||||
|
||||
create_disk "${image}" "${img_size}" "${fs_type}" "${rootfs_start}"
|
||||
|
||||
@@ -416,6 +434,31 @@ create_rootfs_image() {
|
||||
|
||||
info "Copying content from rootfs to root partition"
|
||||
cp -a "${rootfs}"/* "${mount_dir}"
|
||||
|
||||
if [ "${SELINUX}" == "yes" ]; then
|
||||
if [ "${AGENT_INIT}" == "yes" ]; then
|
||||
die "Guest SELinux with the agent init is not supported yet"
|
||||
fi
|
||||
|
||||
info "Labeling rootfs for SELinux"
|
||||
selinuxfs_path="${mount_dir}${SELINUXFS}"
|
||||
mkdir -p $selinuxfs_path
|
||||
if mountpoint $SELINUXFS > /dev/null && \
|
||||
chroot "${mount_dir}" command -v restorecon > /dev/null; then
|
||||
mount -t selinuxfs selinuxfs $selinuxfs_path
|
||||
chroot "${mount_dir}" restorecon -RF -e ${SELINUXFS} /
|
||||
# TODO: This operation will be removed after the updated container-selinux that
|
||||
# includes the following commit is released.
|
||||
# https://github.com/containers/container-selinux/commit/39f83cc74d50bd10ab6be4d0bdd98bc04857469f
|
||||
# We use chcon as an interim solution until then.
|
||||
chroot "${mount_dir}" chcon -t container_runtime_exec_t "/usr/bin/${agent_bin}"
|
||||
umount $selinuxfs_path
|
||||
else
|
||||
die "Could not label the rootfs. Make sure that SELinux is enabled on the host \
|
||||
and the rootfs is built with SELINUX=yes"
|
||||
fi
|
||||
fi
|
||||
|
||||
sync
|
||||
OK "rootfs copied"
|
||||
|
||||
@@ -549,7 +592,7 @@ main() {
|
||||
# consider in calculate_img_size
|
||||
rootfs_img_size=$((img_size - dax_header_sz))
|
||||
create_rootfs_image "${rootfs}" "${image}" "${rootfs_img_size}" \
|
||||
"${fs_type}" "${block_size}"
|
||||
"${fs_type}" "${block_size}" "${agent_bin}"
|
||||
|
||||
# insert at the beginning of the image the MBR + DAX header
|
||||
set_dax_header "${image}" "${img_size}" "${fs_type}" "${nsdax_bin}"
|
||||
|
||||
@@ -8,10 +8,15 @@ OS_VERSION=${OS_VERSION:-stream9}
|
||||
PACKAGES="chrony iptables"
|
||||
[ "$AGENT_INIT" = no ] && PACKAGES+=" systemd"
|
||||
[ "$SECCOMP" = yes ] && PACKAGES+=" libseccomp"
|
||||
[ "$SELINUX" = yes ] && PACKAGES+=" container-selinux"
|
||||
|
||||
# Container registry tag is different from metalink repo, e.g. "stream9" => "9-stream"
|
||||
os_repo_version="$(sed -E "s/(stream)(.+)/\2-\1/" <<< "$OS_VERSION")"
|
||||
|
||||
METALINK="https://mirrors.centos.org/metalink?repo=centos-baseos-$os_repo_version&arch=\$basearch"
|
||||
if [ "$SELINUX" == yes ]; then
|
||||
# AppStream repository is required for the container-selinux package
|
||||
METALINK_APPSTREAM="https://mirrors.centos.org/metalink?repo=centos-appstream-$os_repo_version&arch=\$basearch"
|
||||
fi
|
||||
GPG_KEY_FILE=RPM-GPG-KEY-CentOS-Official
|
||||
GPG_KEY_URL="https://centos.org/keys/$GPG_KEY_FILE"
|
||||
|
||||
@@ -26,6 +26,7 @@ LIBC=${LIBC:-musl}
|
||||
# The kata agent enables seccomp feature.
|
||||
# However, it is not enforced by default: you need to enable that in the main configuration file.
|
||||
SECCOMP=${SECCOMP:-"yes"}
|
||||
SELINUX=${SELINUX:-"no"}
|
||||
|
||||
lib_file="${script_dir}/../scripts/lib.sh"
|
||||
source "$lib_file"
|
||||
@@ -143,6 +144,11 @@ ROOTFS_DIR Path to the directory that is populated with the rootfs.
|
||||
SECCOMP When set to "no", the kata-agent is built without seccomp capability.
|
||||
Default value: "yes"
|
||||
|
||||
SELINUX When set to "yes", build the rootfs with the required packages to
|
||||
enable SELinux in the VM.
|
||||
Make sure the guest kernel is compiled with SELinux enabled.
|
||||
Default value: "no"
|
||||
|
||||
USE_DOCKER If set, build the rootfs inside a container (requires
|
||||
Docker).
|
||||
Default value: <not set>
|
||||
@@ -369,6 +375,15 @@ build_rootfs_distro()
|
||||
|
||||
echo "Required rust version: $RUST_VERSION"
|
||||
|
||||
if [ "${SELINUX}" == "yes" ]; then
|
||||
if [ "${AGENT_INIT}" == "yes" ]; then
|
||||
die "Guest SELinux with the agent init is not supported yet"
|
||||
fi
|
||||
if [ "${distro}" != "centos" ]; then
|
||||
die "The guest rootfs must be CentOS to enable guest SELinux"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -z "${USE_DOCKER}" ] && [ -z "${USE_PODMAN}" ]; then
|
||||
info "build directly"
|
||||
build_rootfs ${ROOTFS_DIR}
|
||||
@@ -454,6 +469,7 @@ build_rootfs_distro()
|
||||
--env AA_KBC="${AA_KBC}" \
|
||||
--env KATA_BUILD_CC="${KATA_BUILD_CC}" \
|
||||
--env SECCOMP="${SECCOMP}" \
|
||||
--env SELINUX="${SELINUX}" \
|
||||
--env DEBUG="${DEBUG}" \
|
||||
--env HOME="/root" \
|
||||
-v "${repo_dir}":"/kata-containers" \
|
||||
|
||||
@@ -79,7 +79,23 @@ gpgcheck=1
|
||||
gpgkey=file://${CONFIG_DIR}/${GPG_KEY_FILE}
|
||||
EOF
|
||||
fi
|
||||
|
||||
if [ "$SELINUX" == "yes" ]; then
|
||||
cat > "${DNF_CONF}" << EOF
|
||||
[appstream]
|
||||
name=${OS_NAME}-${OS_VERSION} upstream
|
||||
releasever=${OS_VERSION}
|
||||
EOF
|
||||
echo "metalink=$METALINK_APPSTREAM" >> "$DNF_CONF"
|
||||
if [ -n "$GPG_KEY_URL" ]; then
|
||||
if [ ! -f "${CONFIG_DIR}/${GPG_KEY_FILE}" ]; then
|
||||
curl -L "${GPG_KEY_URL}" -o "${CONFIG_DIR}/${GPG_KEY_FILE}"
|
||||
fi
|
||||
cat >> "${DNF_CONF}" << EOF
|
||||
gpgcheck=1
|
||||
gpgkey=file://${CONFIG_DIR}/${GPG_KEY_FILE}
|
||||
EOF
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
build_rootfs()
|
||||
|
||||
@@ -56,7 +56,7 @@ function get_container_runtime() {
|
||||
|
||||
function install_artifacts() {
|
||||
echo "copying kata artifacts onto host"
|
||||
cp -a /opt/kata-artifacts/opt/kata/* /opt/kata/
|
||||
cp -au /opt/kata-artifacts/opt/kata/* /opt/kata/
|
||||
chmod +x /opt/kata/bin/*
|
||||
chmod +x /opt/kata/runtime-rs/bin/*
|
||||
}
|
||||
|
||||
@@ -159,12 +159,12 @@ get_kernel() {
|
||||
major_version=$(echo "${version}" | cut -d. -f1)
|
||||
kernel_tarball="linux-${version}.tar.xz"
|
||||
|
||||
if [ ! -f sha256sums.asc ] || ! grep -q "${kernel_tarball}" sha256sums.asc; then
|
||||
shasum_url="https://cdn.kernel.org/pub/linux/kernel/v${major_version}.x/sha256sums.asc"
|
||||
info "Download kernel checksum file: sha256sums.asc from ${shasum_url}"
|
||||
curl --fail -OL "${shasum_url}"
|
||||
fi
|
||||
grep "${kernel_tarball}" sha256sums.asc >"${kernel_tarball}.sha256"
|
||||
if [ ! -f sha256sums.asc ] || ! grep -q "${kernel_tarball}" sha256sums.asc; then
|
||||
shasum_url="https://cdn.kernel.org/pub/linux/kernel/v${major_version}.x/sha256sums.asc"
|
||||
info "Download kernel checksum file: sha256sums.asc from ${shasum_url}"
|
||||
curl --fail -OL "${shasum_url}"
|
||||
fi
|
||||
grep "${kernel_tarball}" sha256sums.asc >"${kernel_tarball}.sha256"
|
||||
|
||||
if [ -f "${kernel_tarball}" ] && ! sha256sum -c "${kernel_tarball}.sha256"; then
|
||||
info "invalid kernel tarball ${kernel_tarball} removing "
|
||||
|
||||
@@ -39,6 +39,12 @@ CONFIG_ARM64_PMEM=y
|
||||
CONFIG_ARM64_RAS_EXTN=y
|
||||
# end of ARMv8.2 architectural feature
|
||||
|
||||
#
|
||||
# ARMv8.5 architectural features
|
||||
#
|
||||
CONFIG_ARCH_RANDOM=y
|
||||
CONFIG_RANDOM_TRUST_CPU=y
|
||||
|
||||
CONFIG_NO_HZ_FULL=y
|
||||
CONFIG_GENERIC_MSI_IRQ_DOMAIN=y
|
||||
CONFIG_RANDOMIZE_BASE=y
|
||||
|
||||
12
tools/packaging/kernel/configs/fragments/common/lsm.conf
Normal file
12
tools/packaging/kernel/configs/fragments/common/lsm.conf
Normal file
@@ -0,0 +1,12 @@
|
||||
# SELinux support:
|
||||
CONFIG_AUDIT=y
|
||||
CONFIG_AUDITSYSCALL=y
|
||||
CONFIG_LSM_MMAP_MIN_ADDR=6553
|
||||
CONFIG_NETWORK_SECMARK=y
|
||||
CONFIG_SECURITY_NETWORK=y
|
||||
CONFIG_SECURITY_SELINUX=y
|
||||
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
|
||||
CONFIG_SECURITY_SELINUX_DEVELOP=y
|
||||
CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=0
|
||||
CONFIG_SECURITY_SELINUX_SIDTAB_HASH_BITS=9
|
||||
CONFIG_SECURITY_SELINUX_SID2STR_CACHE_SIZE=256
|
||||
@@ -1 +1 @@
|
||||
96
|
||||
97
|
||||
|
||||
@@ -48,9 +48,9 @@ info "Calling edksetup script"
|
||||
source edksetup.sh
|
||||
|
||||
if [ "${ovmf_build}" == "sev" ]; then
|
||||
info "Creating dummy grub file"
|
||||
#required for building AmdSev package without grub
|
||||
touch OvmfPkg/AmdSev/Grub/grub.efi
|
||||
info "Creating dummy grub file"
|
||||
#required for building AmdSev package without grub
|
||||
touch OvmfPkg/AmdSev/Grub/grub.efi
|
||||
fi
|
||||
|
||||
info "Building ovmf"
|
||||
@@ -93,5 +93,5 @@ fi
|
||||
local_dir=${PWD}
|
||||
pushd $DESTDIR
|
||||
tar -czvf "${local_dir}/${ovmf_dir}-${ovmf_build}.tar.gz" "./$PREFIX"
|
||||
rm -rf $(dirname ./$PREFIX)
|
||||
rm -rf $(dirname ./$PREFIX)
|
||||
popd
|
||||
|
||||
Reference in New Issue
Block a user