mirror of
https://github.com/aljazceru/kata-containers.git
synced 2025-12-17 22:34:25 +01:00
CCv0: Merge main into CCv0 branch
Merge remote-tracking branch 'upstream/main' into CCv0 Fixes: #5905 Signed-off-by: Georgina Kinge <georgina.kinge@ibm.com>
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -4,6 +4,8 @@
|
|||||||
**/*.rej
|
**/*.rej
|
||||||
**/target
|
**/target
|
||||||
**/.vscode
|
**/.vscode
|
||||||
|
**/.idea
|
||||||
|
**/.fleet
|
||||||
pkg/logging/Cargo.lock
|
pkg/logging/Cargo.lock
|
||||||
src/agent/src/version.rs
|
src/agent/src/version.rs
|
||||||
src/agent/kata-agent.service
|
src/agent/kata-agent.service
|
||||||
|
|||||||
@@ -86,6 +86,27 @@ $ sudo sed -i '/^disable_guest_seccomp/ s/true/false/' /etc/kata-containers/conf
|
|||||||
|
|
||||||
This will pass container seccomp profiles to the kata agent.
|
This will pass container seccomp profiles to the kata agent.
|
||||||
|
|
||||||
|
## Enable SELinux on the guest
|
||||||
|
|
||||||
|
> **Note:**
|
||||||
|
>
|
||||||
|
> - To enable SELinux on the guest, SELinux MUST be also enabled on the host.
|
||||||
|
> - You MUST create and build a rootfs image for SELinux in advance.
|
||||||
|
> See [Create a rootfs image](#create-a-rootfs-image) and [Build a rootfs image](#build-a-rootfs-image).
|
||||||
|
> - SELinux on the guest is supported in only a rootfs image currently, so
|
||||||
|
> you cannot enable SELinux with the agent init (`AGENT_INIT=yes`) yet.
|
||||||
|
|
||||||
|
Enable guest SELinux in Enforcing mode as follows:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ sudo sed -i '/^disable_guest_selinux/ s/true/false/g' /etc/kata-containers/configuration.toml
|
||||||
|
```
|
||||||
|
|
||||||
|
The runtime automatically will set `selinux=1` to the kernel parameters and `xattr` option to
|
||||||
|
`virtiofsd` when `disable_guest_selinux` is set to `false`.
|
||||||
|
|
||||||
|
If you want to enable SELinux in Permissive mode, add `enforcing=0` to the kernel parameters.
|
||||||
|
|
||||||
## Enable full debug
|
## Enable full debug
|
||||||
|
|
||||||
Enable full debug as follows:
|
Enable full debug as follows:
|
||||||
@@ -256,6 +277,12 @@ If you want to build the agent without seccomp capability, you need to run the `
|
|||||||
$ script -fec 'sudo -E AGENT_INIT=yes USE_DOCKER=true SECCOMP=no ./rootfs.sh "${distro}"'
|
$ script -fec 'sudo -E AGENT_INIT=yes USE_DOCKER=true SECCOMP=no ./rootfs.sh "${distro}"'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
If you want to enable SELinux on the guest, you MUST choose `centos` and run the `rootfs.sh` script with `SELINUX=yes` as follows.
|
||||||
|
|
||||||
|
```
|
||||||
|
$ script -fec 'sudo -E GOPATH=$GOPATH USE_DOCKER=true SELINUX=yes ./rootfs.sh centos'
|
||||||
|
```
|
||||||
|
|
||||||
> **Note:**
|
> **Note:**
|
||||||
>
|
>
|
||||||
> - Check the [compatibility matrix](../tools/osbuilder/README.md#platform-distro-compatibility-matrix) before creating rootfs.
|
> - Check the [compatibility matrix](../tools/osbuilder/README.md#platform-distro-compatibility-matrix) before creating rootfs.
|
||||||
@@ -283,6 +310,19 @@ $ script -fec 'sudo -E USE_DOCKER=true ./image_builder.sh "${ROOTFS_DIR}"'
|
|||||||
$ popd
|
$ popd
|
||||||
```
|
```
|
||||||
|
|
||||||
|
If you want to enable SELinux on the guest, you MUST run the `image_builder.sh` script with `SELINUX=yes`
|
||||||
|
to label the guest image as follows.
|
||||||
|
To label the image on the host, you need to make sure that SELinux is enabled (`selinuxfs` is mounted) on the host
|
||||||
|
and the rootfs MUST be created by running the `rootfs.sh` with `SELINUX=yes`.
|
||||||
|
|
||||||
|
```
|
||||||
|
$ script -fec 'sudo -E USE_DOCKER=true SELINUX=yes ./image_builder.sh ${ROOTFS_DIR}'
|
||||||
|
```
|
||||||
|
|
||||||
|
Currently, the `image_builder.sh` uses `chcon` as an interim solution in order to apply `container_runtime_exec_t`
|
||||||
|
to the `kata-agent`. Hence, if you run `restorecon` to the guest image after running the `image_builder.sh`,
|
||||||
|
the `kata-agent` needs to be labeled `container_runtime_exec_t` again by yourself.
|
||||||
|
|
||||||
> **Notes:**
|
> **Notes:**
|
||||||
>
|
>
|
||||||
> - You must ensure that the *default Docker runtime* is `runc` to make use of
|
> - You must ensure that the *default Docker runtime* is `runc` to make use of
|
||||||
|
|||||||
2
src/agent/Cargo.lock
generated
2
src/agent/Cargo.lock
generated
@@ -2300,7 +2300,6 @@ dependencies = [
|
|||||||
"slog",
|
"slog",
|
||||||
"slog-scope",
|
"slog-scope",
|
||||||
"slog-stdlog",
|
"slog-stdlog",
|
||||||
"sysinfo",
|
|
||||||
"tempfile",
|
"tempfile",
|
||||||
"test-utils",
|
"test-utils",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
@@ -4076,6 +4075,7 @@ dependencies = [
|
|||||||
"tempfile",
|
"tempfile",
|
||||||
"test-utils",
|
"test-utils",
|
||||||
"tokio",
|
"tokio",
|
||||||
|
"xattr",
|
||||||
"zbus",
|
"zbus",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
@@ -35,6 +35,7 @@ inotify = "0.9.2"
|
|||||||
libseccomp = { version = "0.3.0", optional = true }
|
libseccomp = { version = "0.3.0", optional = true }
|
||||||
zbus = "2.3.0"
|
zbus = "2.3.0"
|
||||||
bit-vec= "0.6.3"
|
bit-vec= "0.6.3"
|
||||||
|
xattr = "0.2.3"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
serial_test = "0.5.0"
|
serial_test = "0.5.0"
|
||||||
|
|||||||
@@ -30,6 +30,7 @@ use crate::log_child;
|
|||||||
use crate::process::Process;
|
use crate::process::Process;
|
||||||
#[cfg(feature = "seccomp")]
|
#[cfg(feature = "seccomp")]
|
||||||
use crate::seccomp;
|
use crate::seccomp;
|
||||||
|
use crate::selinux;
|
||||||
use crate::specconv::CreateOpts;
|
use crate::specconv::CreateOpts;
|
||||||
use crate::{mount, validator};
|
use crate::{mount, validator};
|
||||||
|
|
||||||
@@ -109,7 +110,6 @@ impl Default for ContainerStatus {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// We might want to change this to thiserror in the future
|
// We might want to change this to thiserror in the future
|
||||||
const MissingCGroupManager: &str = "failed to get container's cgroup Manager";
|
|
||||||
const MissingLinux: &str = "no linux config";
|
const MissingLinux: &str = "no linux config";
|
||||||
const InvalidNamespace: &str = "invalid namespace type";
|
const InvalidNamespace: &str = "invalid namespace type";
|
||||||
|
|
||||||
@@ -243,7 +243,7 @@ pub struct LinuxContainer {
|
|||||||
pub id: String,
|
pub id: String,
|
||||||
pub root: String,
|
pub root: String,
|
||||||
pub config: Config,
|
pub config: Config,
|
||||||
pub cgroup_manager: Option<Box<dyn Manager + Send + Sync>>,
|
pub cgroup_manager: Box<dyn Manager + Send + Sync>,
|
||||||
pub init_process_pid: pid_t,
|
pub init_process_pid: pid_t,
|
||||||
pub init_process_start_time: u64,
|
pub init_process_start_time: u64,
|
||||||
pub uid_map_path: String,
|
pub uid_map_path: String,
|
||||||
@@ -292,16 +292,11 @@ impl Container for LinuxContainer {
|
|||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
if self.cgroup_manager.is_some() {
|
self.cgroup_manager.as_ref().freeze(FreezerState::Frozen)?;
|
||||||
self.cgroup_manager
|
|
||||||
.as_ref()
|
|
||||||
.unwrap()
|
|
||||||
.freeze(FreezerState::Frozen)?;
|
|
||||||
|
|
||||||
self.status.transition(ContainerState::Paused);
|
self.status.transition(ContainerState::Paused);
|
||||||
return Ok(());
|
|
||||||
}
|
Ok(())
|
||||||
Err(anyhow!(MissingCGroupManager))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn resume(&mut self) -> Result<()> {
|
fn resume(&mut self) -> Result<()> {
|
||||||
@@ -310,16 +305,11 @@ impl Container for LinuxContainer {
|
|||||||
return Err(anyhow!("container status is: {:?}, not paused", status));
|
return Err(anyhow!("container status is: {:?}, not paused", status));
|
||||||
}
|
}
|
||||||
|
|
||||||
if self.cgroup_manager.is_some() {
|
self.cgroup_manager.as_ref().freeze(FreezerState::Thawed)?;
|
||||||
self.cgroup_manager
|
|
||||||
.as_ref()
|
|
||||||
.unwrap()
|
|
||||||
.freeze(FreezerState::Thawed)?;
|
|
||||||
|
|
||||||
self.status.transition(ContainerState::Running);
|
self.status.transition(ContainerState::Running);
|
||||||
return Ok(());
|
|
||||||
}
|
Ok(())
|
||||||
Err(anyhow!(MissingCGroupManager))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -537,6 +527,8 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let selinux_enabled = selinux::is_enabled()?;
|
||||||
|
|
||||||
sched::unshare(to_new & !CloneFlags::CLONE_NEWUSER)?;
|
sched::unshare(to_new & !CloneFlags::CLONE_NEWUSER)?;
|
||||||
|
|
||||||
if userns {
|
if userns {
|
||||||
@@ -638,6 +630,18 @@ fn do_init_child(cwfd: RawFd) -> Result<()> {
|
|||||||
capctl::prctl::set_no_new_privs().map_err(|_| anyhow!("cannot set no new privileges"))?;
|
capctl::prctl::set_no_new_privs().map_err(|_| anyhow!("cannot set no new privileges"))?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Set SELinux label
|
||||||
|
if !oci_process.selinux_label.is_empty() {
|
||||||
|
if !selinux_enabled {
|
||||||
|
return Err(anyhow!(
|
||||||
|
"SELinux label for the process is provided but SELinux is not enabled on the running kernel"
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
log_child!(cfd_log, "Set SELinux label to the container process");
|
||||||
|
selinux::set_exec_label(&oci_process.selinux_label)?;
|
||||||
|
}
|
||||||
|
|
||||||
// Log unknown seccomp system calls in advance before the log file descriptor closes.
|
// Log unknown seccomp system calls in advance before the log file descriptor closes.
|
||||||
#[cfg(feature = "seccomp")]
|
#[cfg(feature = "seccomp")]
|
||||||
if let Some(ref scmp) = linux.seccomp {
|
if let Some(ref scmp) = linux.seccomp {
|
||||||
@@ -847,22 +851,17 @@ impl BaseContainer for LinuxContainer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn stats(&self) -> Result<StatsContainerResponse> {
|
fn stats(&self) -> Result<StatsContainerResponse> {
|
||||||
let mut r = StatsContainerResponse::default();
|
|
||||||
|
|
||||||
if self.cgroup_manager.is_some() {
|
|
||||||
r.cgroup_stats =
|
|
||||||
SingularPtrField::some(self.cgroup_manager.as_ref().unwrap().get_stats()?);
|
|
||||||
}
|
|
||||||
|
|
||||||
// what about network interface stats?
|
// what about network interface stats?
|
||||||
|
|
||||||
Ok(r)
|
Ok(StatsContainerResponse {
|
||||||
|
cgroup_stats: SingularPtrField::some(self.cgroup_manager.as_ref().get_stats()?),
|
||||||
|
..Default::default()
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn set(&mut self, r: LinuxResources) -> Result<()> {
|
fn set(&mut self, r: LinuxResources) -> Result<()> {
|
||||||
if self.cgroup_manager.is_some() {
|
self.cgroup_manager.as_ref().set(&r, true)?;
|
||||||
self.cgroup_manager.as_ref().unwrap().set(&r, true)?;
|
|
||||||
}
|
|
||||||
self.config
|
self.config
|
||||||
.spec
|
.spec
|
||||||
.as_mut()
|
.as_mut()
|
||||||
@@ -1035,7 +1034,7 @@ impl BaseContainer for LinuxContainer {
|
|||||||
&logger,
|
&logger,
|
||||||
spec,
|
spec,
|
||||||
&p,
|
&p,
|
||||||
self.cgroup_manager.as_ref().unwrap().as_ref(),
|
self.cgroup_manager.as_ref(),
|
||||||
self.config.use_systemd_cgroup,
|
self.config.use_systemd_cgroup,
|
||||||
&st,
|
&st,
|
||||||
&mut pipe_w,
|
&mut pipe_w,
|
||||||
@@ -1114,7 +1113,7 @@ impl BaseContainer for LinuxContainer {
|
|||||||
)?;
|
)?;
|
||||||
fs::remove_dir_all(&self.root)?;
|
fs::remove_dir_all(&self.root)?;
|
||||||
|
|
||||||
if let Some(cgm) = self.cgroup_manager.as_mut() {
|
let cgm = self.cgroup_manager.as_mut();
|
||||||
// Kill all of the processes created in this container to prevent
|
// Kill all of the processes created in this container to prevent
|
||||||
// the leak of some daemon process when this container shared pidns
|
// the leak of some daemon process when this container shared pidns
|
||||||
// with the sandbox.
|
// with the sandbox.
|
||||||
@@ -1126,7 +1125,7 @@ impl BaseContainer for LinuxContainer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
cgm.destroy().context("destroy cgroups")?;
|
cgm.destroy().context("destroy cgroups")?;
|
||||||
}
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1514,7 +1513,7 @@ impl LinuxContainer {
|
|||||||
Ok(LinuxContainer {
|
Ok(LinuxContainer {
|
||||||
id: id.clone(),
|
id: id.clone(),
|
||||||
root,
|
root,
|
||||||
cgroup_manager: Some(cgroup_manager),
|
cgroup_manager,
|
||||||
status: ContainerStatus::new(),
|
status: ContainerStatus::new(),
|
||||||
uid_map_path: String::from(""),
|
uid_map_path: String::from(""),
|
||||||
gid_map_path: "".to_string(),
|
gid_map_path: "".to_string(),
|
||||||
@@ -1980,24 +1979,12 @@ mod tests {
|
|||||||
assert!(format!("{:?}", ret).contains("failed to pause container"))
|
assert!(format!("{:?}", ret).contains("failed to pause container"))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_linuxcontainer_pause_cgroupmgr_is_none() {
|
|
||||||
let ret = new_linux_container_and_then(|mut c: LinuxContainer| {
|
|
||||||
c.cgroup_manager = None;
|
|
||||||
c.pause().map_err(|e| anyhow!(e))
|
|
||||||
});
|
|
||||||
|
|
||||||
assert!(ret.is_err(), "Expecting error, Got {:?}", ret);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_linuxcontainer_pause() {
|
fn test_linuxcontainer_pause() {
|
||||||
let ret = new_linux_container_and_then(|mut c: LinuxContainer| {
|
let ret = new_linux_container_and_then(|mut c: LinuxContainer| {
|
||||||
let cgroup_manager: Box<dyn Manager + Send + Sync> =
|
c.cgroup_manager = Box::new(FsManager::new("").map_err(|e| {
|
||||||
Box::new(FsManager::new("").map_err(|e| {
|
|
||||||
anyhow!(format!("fail to create cgroup manager with path: {:}", e))
|
anyhow!(format!("fail to create cgroup manager with path: {:}", e))
|
||||||
})?);
|
})?);
|
||||||
c.cgroup_manager = Some(cgroup_manager);
|
|
||||||
c.pause().map_err(|e| anyhow!(e))
|
c.pause().map_err(|e| anyhow!(e))
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -2016,25 +2003,12 @@ mod tests {
|
|||||||
assert!(format!("{:?}", ret).contains("not paused"))
|
assert!(format!("{:?}", ret).contains("not paused"))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_linuxcontainer_resume_cgroupmgr_is_none() {
|
|
||||||
let ret = new_linux_container_and_then(|mut c: LinuxContainer| {
|
|
||||||
c.status.transition(ContainerState::Paused);
|
|
||||||
c.cgroup_manager = None;
|
|
||||||
c.resume().map_err(|e| anyhow!(e))
|
|
||||||
});
|
|
||||||
|
|
||||||
assert!(ret.is_err(), "Expecting error, Got {:?}", ret);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_linuxcontainer_resume() {
|
fn test_linuxcontainer_resume() {
|
||||||
let ret = new_linux_container_and_then(|mut c: LinuxContainer| {
|
let ret = new_linux_container_and_then(|mut c: LinuxContainer| {
|
||||||
let cgroup_manager: Box<dyn Manager + Send + Sync> =
|
c.cgroup_manager = Box::new(FsManager::new("").map_err(|e| {
|
||||||
Box::new(FsManager::new("").map_err(|e| {
|
|
||||||
anyhow!(format!("fail to create cgroup manager with path: {:}", e))
|
anyhow!(format!("fail to create cgroup manager with path: {:}", e))
|
||||||
})?);
|
})?);
|
||||||
c.cgroup_manager = Some(cgroup_manager);
|
|
||||||
// Change status to paused, this way we can resume it
|
// Change status to paused, this way we can resume it
|
||||||
c.status.transition(ContainerState::Paused);
|
c.status.transition(ContainerState::Paused);
|
||||||
c.resume().map_err(|e| anyhow!(e))
|
c.resume().map_err(|e| anyhow!(e))
|
||||||
|
|||||||
@@ -38,6 +38,7 @@ pub mod pipestream;
|
|||||||
pub mod process;
|
pub mod process;
|
||||||
#[cfg(feature = "seccomp")]
|
#[cfg(feature = "seccomp")]
|
||||||
pub mod seccomp;
|
pub mod seccomp;
|
||||||
|
pub mod selinux;
|
||||||
pub mod specconv;
|
pub mod specconv;
|
||||||
pub mod sync;
|
pub mod sync;
|
||||||
pub mod sync_with_async;
|
pub mod sync_with_async;
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ use std::fs::File;
|
|||||||
use std::io::{BufRead, BufReader};
|
use std::io::{BufRead, BufReader};
|
||||||
|
|
||||||
use crate::container::DEFAULT_DEVICES;
|
use crate::container::DEFAULT_DEVICES;
|
||||||
|
use crate::selinux;
|
||||||
use crate::sync::write_count;
|
use crate::sync::write_count;
|
||||||
use std::string::ToString;
|
use std::string::ToString;
|
||||||
|
|
||||||
@@ -181,6 +182,8 @@ pub fn init_rootfs(
|
|||||||
None => flags |= MsFlags::MS_SLAVE,
|
None => flags |= MsFlags::MS_SLAVE,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let label = &linux.mount_label;
|
||||||
|
|
||||||
let root = spec
|
let root = spec
|
||||||
.root
|
.root
|
||||||
.as_ref()
|
.as_ref()
|
||||||
@@ -244,7 +247,7 @@ pub fn init_rootfs(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
mount_from(cfd_log, m, rootfs, flags, &data, "")?;
|
mount_from(cfd_log, m, rootfs, flags, &data, label)?;
|
||||||
// bind mount won't change mount options, we need remount to make mount options
|
// bind mount won't change mount options, we need remount to make mount options
|
||||||
// effective.
|
// effective.
|
||||||
// first check that we have non-default options required before attempting a
|
// first check that we have non-default options required before attempting a
|
||||||
@@ -524,7 +527,6 @@ pub fn pivot_rootfs<P: ?Sized + NixPath + std::fmt::Debug>(path: &P) -> Result<(
|
|||||||
|
|
||||||
fn rootfs_parent_mount_private(path: &str) -> Result<()> {
|
fn rootfs_parent_mount_private(path: &str) -> Result<()> {
|
||||||
let mount_infos = parse_mount_table(MOUNTINFO_PATH)?;
|
let mount_infos = parse_mount_table(MOUNTINFO_PATH)?;
|
||||||
|
|
||||||
let mut max_len = 0;
|
let mut max_len = 0;
|
||||||
let mut mount_point = String::from("");
|
let mut mount_point = String::from("");
|
||||||
let mut options = String::from("");
|
let mut options = String::from("");
|
||||||
@@ -767,9 +769,9 @@ fn mount_from(
|
|||||||
rootfs: &str,
|
rootfs: &str,
|
||||||
flags: MsFlags,
|
flags: MsFlags,
|
||||||
data: &str,
|
data: &str,
|
||||||
_label: &str,
|
label: &str,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let d = String::from(data);
|
let mut d = String::from(data);
|
||||||
let dest = secure_join(rootfs, &m.destination);
|
let dest = secure_join(rootfs, &m.destination);
|
||||||
|
|
||||||
let src = if m.r#type.as_str() == "bind" {
|
let src = if m.r#type.as_str() == "bind" {
|
||||||
@@ -822,6 +824,37 @@ fn mount_from(
|
|||||||
e
|
e
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
|
// Set the SELinux context for the mounts
|
||||||
|
let mut use_xattr = false;
|
||||||
|
if !label.is_empty() {
|
||||||
|
if selinux::is_enabled()? {
|
||||||
|
let device = Path::new(&m.source)
|
||||||
|
.file_name()
|
||||||
|
.ok_or_else(|| anyhow!("invalid device source path: {}", &m.source))?
|
||||||
|
.to_str()
|
||||||
|
.ok_or_else(|| anyhow!("failed to convert device source path: {}", &m.source))?;
|
||||||
|
|
||||||
|
match device {
|
||||||
|
// SELinux does not support labeling of /proc or /sys
|
||||||
|
"proc" | "sysfs" => (),
|
||||||
|
// SELinux does not support mount labeling against /dev/mqueue,
|
||||||
|
// so we use setxattr instead
|
||||||
|
"mqueue" => {
|
||||||
|
use_xattr = true;
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
log_child!(cfd_log, "add SELinux mount label to {}", dest.as_str());
|
||||||
|
selinux::add_mount_label(&mut d, label);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
log_child!(
|
||||||
|
cfd_log,
|
||||||
|
"SELinux label for the mount is provided but SELinux is not enabled on the running kernel"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
mount(
|
mount(
|
||||||
Some(src.as_str()),
|
Some(src.as_str()),
|
||||||
dest.as_str(),
|
dest.as_str(),
|
||||||
@@ -834,6 +867,10 @@ fn mount_from(
|
|||||||
e
|
e
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
|
if !label.is_empty() && selinux::is_enabled()? && use_xattr {
|
||||||
|
xattr::set(dest.as_str(), "security.selinux", label.as_bytes())?;
|
||||||
|
}
|
||||||
|
|
||||||
if flags.contains(MsFlags::MS_BIND)
|
if flags.contains(MsFlags::MS_BIND)
|
||||||
&& flags.intersects(
|
&& flags.intersects(
|
||||||
!(MsFlags::MS_REC
|
!(MsFlags::MS_REC
|
||||||
|
|||||||
80
src/agent/rustjail/src/selinux.rs
Normal file
80
src/agent/rustjail/src/selinux.rs
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
// Copyright 2022 Sony Group Corporation
|
||||||
|
//
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
use anyhow::{Context, Result};
|
||||||
|
use nix::unistd::gettid;
|
||||||
|
use std::fs::{self, OpenOptions};
|
||||||
|
use std::io::prelude::*;
|
||||||
|
use std::path::Path;
|
||||||
|
|
||||||
|
pub fn is_enabled() -> Result<bool> {
|
||||||
|
let buf = fs::read_to_string("/proc/mounts")?;
|
||||||
|
let enabled = buf.contains("selinuxfs");
|
||||||
|
|
||||||
|
Ok(enabled)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn add_mount_label(data: &mut String, label: &str) {
|
||||||
|
if data.is_empty() {
|
||||||
|
let context = format!("context=\"{}\"", label);
|
||||||
|
data.push_str(&context);
|
||||||
|
} else {
|
||||||
|
let context = format!(",context=\"{}\"", label);
|
||||||
|
data.push_str(&context);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn set_exec_label(label: &str) -> Result<()> {
|
||||||
|
let mut attr_path = Path::new("/proc/thread-self/attr/exec").to_path_buf();
|
||||||
|
if !attr_path.exists() {
|
||||||
|
// Fall back to the old convention
|
||||||
|
attr_path = Path::new("/proc/self/task")
|
||||||
|
.join(gettid().to_string())
|
||||||
|
.join("attr/exec")
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut file = OpenOptions::new()
|
||||||
|
.write(true)
|
||||||
|
.truncate(true)
|
||||||
|
.open(attr_path)?;
|
||||||
|
file.write_all(label.as_bytes())
|
||||||
|
.with_context(|| "failed to apply SELinux label")?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
const TEST_LABEL: &str = "system_u:system_r:unconfined_t:s0";
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_is_enabled() {
|
||||||
|
let ret = is_enabled();
|
||||||
|
assert!(ret.is_ok(), "Expecting Ok, Got {:?}", ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_add_mount_label() {
|
||||||
|
let mut data = String::new();
|
||||||
|
add_mount_label(&mut data, TEST_LABEL);
|
||||||
|
assert_eq!(data, format!("context=\"{}\"", TEST_LABEL));
|
||||||
|
|
||||||
|
let mut data = String::from("defaults");
|
||||||
|
add_mount_label(&mut data, TEST_LABEL);
|
||||||
|
assert_eq!(data, format!("defaults,context=\"{}\"", TEST_LABEL));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_set_exec_label() {
|
||||||
|
let ret = set_exec_label(TEST_LABEL);
|
||||||
|
if is_enabled().unwrap() {
|
||||||
|
assert!(ret.is_ok(), "Expecting Ok, Got {:?}", ret);
|
||||||
|
} else {
|
||||||
|
assert!(ret.is_err(), "Expecting error, Got {:?}", ret);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -6,6 +6,7 @@
|
|||||||
use crate::container::Config;
|
use crate::container::Config;
|
||||||
use anyhow::{anyhow, Context, Result};
|
use anyhow::{anyhow, Context, Result};
|
||||||
use oci::{Linux, LinuxIdMapping, LinuxNamespace, Spec};
|
use oci::{Linux, LinuxIdMapping, LinuxNamespace, Spec};
|
||||||
|
use regex::Regex;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::path::{Component, PathBuf};
|
use std::path::{Component, PathBuf};
|
||||||
|
|
||||||
@@ -86,6 +87,23 @@ fn hostname(oci: &Spec) -> Result<()> {
|
|||||||
|
|
||||||
fn security(oci: &Spec) -> Result<()> {
|
fn security(oci: &Spec) -> Result<()> {
|
||||||
let linux = get_linux(oci)?;
|
let linux = get_linux(oci)?;
|
||||||
|
let label_pattern = r".*_u:.*_r:.*_t:s[0-9]|1[0-5].*";
|
||||||
|
let label_regex = Regex::new(label_pattern)?;
|
||||||
|
|
||||||
|
if let Some(ref process) = oci.process {
|
||||||
|
if !process.selinux_label.is_empty() && !label_regex.is_match(&process.selinux_label) {
|
||||||
|
return Err(anyhow!(
|
||||||
|
"SELinux label for the process is invalid format: {}",
|
||||||
|
&process.selinux_label
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !linux.mount_label.is_empty() && !label_regex.is_match(&linux.mount_label) {
|
||||||
|
return Err(anyhow!(
|
||||||
|
"SELinux label for the mount is invalid format: {}",
|
||||||
|
&linux.mount_label
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
if linux.masked_paths.is_empty() && linux.readonly_paths.is_empty() {
|
if linux.masked_paths.is_empty() && linux.readonly_paths.is_empty() {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
@@ -95,8 +113,6 @@ fn security(oci: &Spec) -> Result<()> {
|
|||||||
return Err(anyhow!("Linux namespace does not contain mount"));
|
return Err(anyhow!("Linux namespace does not contain mount"));
|
||||||
}
|
}
|
||||||
|
|
||||||
// don't care about selinux at present
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -285,7 +301,7 @@ pub fn validate(conf: &Config) -> Result<()> {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use oci::Mount;
|
use oci::{Mount, Process};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_namespace() {
|
fn test_namespace() {
|
||||||
@@ -388,6 +404,29 @@ mod tests {
|
|||||||
];
|
];
|
||||||
spec.linux = Some(linux);
|
spec.linux = Some(linux);
|
||||||
security(&spec).unwrap();
|
security(&spec).unwrap();
|
||||||
|
|
||||||
|
// SELinux
|
||||||
|
let valid_label = "system_u:system_r:container_t:s0:c123,c456";
|
||||||
|
let mut process = Process::default();
|
||||||
|
process.selinux_label = valid_label.to_string();
|
||||||
|
spec.process = Some(process);
|
||||||
|
security(&spec).unwrap();
|
||||||
|
|
||||||
|
let mut linux = Linux::default();
|
||||||
|
linux.mount_label = valid_label.to_string();
|
||||||
|
spec.linux = Some(linux);
|
||||||
|
security(&spec).unwrap();
|
||||||
|
|
||||||
|
let invalid_label = "system_u:system_r:container_t";
|
||||||
|
let mut process = Process::default();
|
||||||
|
process.selinux_label = invalid_label.to_string();
|
||||||
|
spec.process = Some(process);
|
||||||
|
security(&spec).unwrap_err();
|
||||||
|
|
||||||
|
let mut linux = Linux::default();
|
||||||
|
linux.mount_label = invalid_label.to_string();
|
||||||
|
spec.linux = Some(linux);
|
||||||
|
security(&spec).unwrap_err();
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|||||||
@@ -348,15 +348,14 @@ impl AgentService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// start oom event loop
|
// start oom event loop
|
||||||
if let Some(ref ctr) = ctr.cgroup_manager {
|
|
||||||
let cg_path = ctr.get_cgroup_path("memory");
|
let cg_path = ctr.cgroup_manager.as_ref().get_cgroup_path("memory");
|
||||||
|
|
||||||
if let Ok(cg_path) = cg_path {
|
if let Ok(cg_path) = cg_path {
|
||||||
let rx = notifier::notify_oom(cid.as_str(), cg_path.to_string()).await?;
|
let rx = notifier::notify_oom(cid.as_str(), cg_path.to_string()).await?;
|
||||||
|
|
||||||
s.run_oom_event_monitor(rx, cid.clone()).await;
|
s.run_oom_event_monitor(rx, cid.clone()).await;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@@ -542,11 +541,7 @@ impl AgentService {
|
|||||||
let ctr = sandbox
|
let ctr = sandbox
|
||||||
.get_container(cid)
|
.get_container(cid)
|
||||||
.ok_or_else(|| anyhow!("Invalid container id {}", cid))?;
|
.ok_or_else(|| anyhow!("Invalid container id {}", cid))?;
|
||||||
let cm = ctr
|
ctr.cgroup_manager.as_ref().freeze(state)?;
|
||||||
.cgroup_manager
|
|
||||||
.as_ref()
|
|
||||||
.ok_or_else(|| anyhow!("cgroup manager not exist"))?;
|
|
||||||
cm.freeze(state)?;
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -556,11 +551,7 @@ impl AgentService {
|
|||||||
let ctr = sandbox
|
let ctr = sandbox
|
||||||
.get_container(cid)
|
.get_container(cid)
|
||||||
.ok_or_else(|| anyhow!("Invalid container id {}", cid))?;
|
.ok_or_else(|| anyhow!("Invalid container id {}", cid))?;
|
||||||
let cm = ctr
|
let pids = ctr.cgroup_manager.as_ref().get_pids()?;
|
||||||
.cgroup_manager
|
|
||||||
.as_ref()
|
|
||||||
.ok_or_else(|| anyhow!("cgroup manager not exist"))?;
|
|
||||||
let pids = cm.get_pids()?;
|
|
||||||
Ok(pids)
|
Ok(pids)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -298,7 +298,6 @@ impl Sandbox {
|
|||||||
info!(self.logger, "updating {}", ctr.id.as_str());
|
info!(self.logger, "updating {}", ctr.id.as_str());
|
||||||
ctr.cgroup_manager
|
ctr.cgroup_manager
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.unwrap()
|
|
||||||
.update_cpuset_path(guest_cpuset.as_str(), container_cpust)?;
|
.update_cpuset_path(guest_cpuset.as_str(), container_cpust)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
8
src/libs/Cargo.lock
generated
8
src/libs/Cargo.lock
generated
@@ -40,6 +40,12 @@ version = "1.1.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "base64"
|
||||||
|
version = "0.13.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bitflags"
|
name = "bitflags"
|
||||||
version = "1.2.1"
|
version = "1.2.1"
|
||||||
@@ -420,6 +426,8 @@ dependencies = [
|
|||||||
name = "kata-types"
|
name = "kata-types"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"anyhow",
|
||||||
|
"base64",
|
||||||
"bitmask-enum",
|
"bitmask-enum",
|
||||||
"byte-unit",
|
"byte-unit",
|
||||||
"glob",
|
"glob",
|
||||||
|
|||||||
@@ -43,8 +43,6 @@
|
|||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
use std::fs;
|
use std::fs;
|
||||||
use std::io::{self, BufRead};
|
use std::io::{self, BufRead};
|
||||||
use std::os::raw::c_char;
|
|
||||||
use std::os::unix::ffi::OsStrExt;
|
|
||||||
use std::os::unix::fs::{DirBuilderExt, OpenOptionsExt};
|
use std::os::unix::fs::{DirBuilderExt, OpenOptionsExt};
|
||||||
use std::os::unix::io::AsRawFd;
|
use std::os::unix::io::AsRawFd;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
@@ -213,11 +211,11 @@ pub fn create_mount_destination<S: AsRef<Path>, D: AsRef<Path>, R: AsRef<Path>>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Remount a bind mount into readonly mode.
|
/// Remount a bind mount
|
||||||
///
|
///
|
||||||
/// # Safety
|
/// # Safety
|
||||||
/// Caller needs to ensure safety of the `dst` to avoid possible file path based attacks.
|
/// Caller needs to ensure safety of the `dst` to avoid possible file path based attacks.
|
||||||
pub fn bind_remount_read_only<P: AsRef<Path>>(dst: P) -> Result<()> {
|
pub fn bind_remount<P: AsRef<Path>>(dst: P, readonly: bool) -> Result<()> {
|
||||||
let dst = dst.as_ref();
|
let dst = dst.as_ref();
|
||||||
if dst.is_empty() {
|
if dst.is_empty() {
|
||||||
return Err(Error::NullMountPointPath);
|
return Err(Error::NullMountPointPath);
|
||||||
@@ -226,7 +224,7 @@ pub fn bind_remount_read_only<P: AsRef<Path>>(dst: P) -> Result<()> {
|
|||||||
.canonicalize()
|
.canonicalize()
|
||||||
.map_err(|_e| Error::InvalidPath(dst.to_path_buf()))?;
|
.map_err(|_e| Error::InvalidPath(dst.to_path_buf()))?;
|
||||||
|
|
||||||
do_rebind_mount_read_only(dst, MsFlags::empty())
|
do_rebind_mount(dst, readonly, MsFlags::empty())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Bind mount `src` to `dst` in slave mode, optionally in readonly mode if `readonly` is true.
|
/// Bind mount `src` to `dst` in slave mode, optionally in readonly mode if `readonly` is true.
|
||||||
@@ -239,7 +237,7 @@ pub fn bind_remount_read_only<P: AsRef<Path>>(dst: P) -> Result<()> {
|
|||||||
pub fn bind_mount_unchecked<S: AsRef<Path>, D: AsRef<Path>>(
|
pub fn bind_mount_unchecked<S: AsRef<Path>, D: AsRef<Path>>(
|
||||||
src: S,
|
src: S,
|
||||||
dst: D,
|
dst: D,
|
||||||
read_only: bool,
|
readonly: bool,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
fail::fail_point!("bind_mount", |_| {
|
fail::fail_point!("bind_mount", |_| {
|
||||||
Err(Error::FailureInject(
|
Err(Error::FailureInject(
|
||||||
@@ -275,8 +273,8 @@ pub fn bind_mount_unchecked<S: AsRef<Path>, D: AsRef<Path>>(
|
|||||||
.map_err(|e| Error::Mount(PathBuf::new(), dst.to_path_buf(), e))?;
|
.map_err(|e| Error::Mount(PathBuf::new(), dst.to_path_buf(), e))?;
|
||||||
|
|
||||||
// Optionally rebind into readonly mode.
|
// Optionally rebind into readonly mode.
|
||||||
if read_only {
|
if readonly {
|
||||||
do_rebind_mount_read_only(dst, MsFlags::empty())?;
|
do_rebind_mount(dst, readonly, MsFlags::empty())?;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -356,7 +354,7 @@ impl Mounter for kata_types::mount::Mount {
|
|||||||
// Bind mount readonly.
|
// Bind mount readonly.
|
||||||
let bro_flag = MsFlags::MS_BIND | MsFlags::MS_RDONLY;
|
let bro_flag = MsFlags::MS_BIND | MsFlags::MS_RDONLY;
|
||||||
if (o_flag & bro_flag) == bro_flag {
|
if (o_flag & bro_flag) == bro_flag {
|
||||||
do_rebind_mount_read_only(target, o_flag)?;
|
do_rebind_mount(target, true, o_flag)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -364,12 +362,16 @@ impl Mounter for kata_types::mount::Mount {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn do_rebind_mount_read_only<P: AsRef<Path>>(path: P, flags: MsFlags) -> Result<()> {
|
fn do_rebind_mount<P: AsRef<Path>>(path: P, readonly: bool, flags: MsFlags) -> Result<()> {
|
||||||
mount(
|
mount(
|
||||||
Some(""),
|
Some(""),
|
||||||
path.as_ref(),
|
path.as_ref(),
|
||||||
Some(""),
|
Some(""),
|
||||||
flags | MsFlags::MS_BIND | MsFlags::MS_REMOUNT | MsFlags::MS_RDONLY,
|
if readonly {
|
||||||
|
flags | MsFlags::MS_BIND | MsFlags::MS_REMOUNT | MsFlags::MS_RDONLY
|
||||||
|
} else {
|
||||||
|
flags | MsFlags::MS_BIND | MsFlags::MS_REMOUNT
|
||||||
|
},
|
||||||
Some(""),
|
Some(""),
|
||||||
)
|
)
|
||||||
.map_err(|e| Error::Remount(path.as_ref().to_path_buf(), e))
|
.map_err(|e| Error::Remount(path.as_ref().to_path_buf(), e))
|
||||||
@@ -756,18 +758,11 @@ pub fn umount_all<P: AsRef<Path>>(mountpoint: P, lazy_umount: bool) -> Result<()
|
|||||||
|
|
||||||
// Counterpart of nix::umount2, with support of `UMOUNT_FOLLOW`.
|
// Counterpart of nix::umount2, with support of `UMOUNT_FOLLOW`.
|
||||||
fn umount2<P: AsRef<Path>>(path: P, lazy_umount: bool) -> std::io::Result<()> {
|
fn umount2<P: AsRef<Path>>(path: P, lazy_umount: bool) -> std::io::Result<()> {
|
||||||
let path_ptr = path.as_ref().as_os_str().as_bytes().as_ptr() as *const c_char;
|
let mut flags = MntFlags::UMOUNT_NOFOLLOW;
|
||||||
let mut flags = MntFlags::UMOUNT_NOFOLLOW.bits();
|
|
||||||
if lazy_umount {
|
if lazy_umount {
|
||||||
flags |= MntFlags::MNT_DETACH.bits();
|
flags |= MntFlags::MNT_DETACH;
|
||||||
}
|
|
||||||
|
|
||||||
// Safe because parameter is valid and we have checked the reuslt.
|
|
||||||
if unsafe { libc::umount2(path_ptr, flags) } < 0 {
|
|
||||||
Err(io::Error::last_os_error())
|
|
||||||
} else {
|
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
|
nix::mount::umount2(path.as_ref(), flags).map_err(io::Error::from)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
@@ -820,21 +815,21 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[ignore]
|
#[ignore]
|
||||||
fn test_bind_remount_read_only() {
|
fn test_bind_remount() {
|
||||||
let tmpdir = tempfile::tempdir().unwrap();
|
let tmpdir = tempfile::tempdir().unwrap();
|
||||||
let tmpdir2 = tempfile::tempdir().unwrap();
|
let tmpdir2 = tempfile::tempdir().unwrap();
|
||||||
|
|
||||||
assert!(matches!(
|
assert!(matches!(
|
||||||
bind_remount_read_only(&PathBuf::from("")),
|
bind_remount(&PathBuf::from(""), true),
|
||||||
Err(Error::NullMountPointPath)
|
Err(Error::NullMountPointPath)
|
||||||
));
|
));
|
||||||
assert!(matches!(
|
assert!(matches!(
|
||||||
bind_remount_read_only(&PathBuf::from("../______doesn't____exist____nnn")),
|
bind_remount(&PathBuf::from("../______doesn't____exist____nnn"), true),
|
||||||
Err(Error::InvalidPath(_))
|
Err(Error::InvalidPath(_))
|
||||||
));
|
));
|
||||||
|
|
||||||
bind_mount_unchecked(tmpdir2.path(), tmpdir.path(), true).unwrap();
|
bind_mount_unchecked(tmpdir2.path(), tmpdir.path(), true).unwrap();
|
||||||
bind_remount_read_only(tmpdir.path()).unwrap();
|
bind_remount(tmpdir.path(), true).unwrap();
|
||||||
umount_timeout(tmpdir.path().to_str().unwrap(), 0).unwrap();
|
umount_timeout(tmpdir.path().to_str().unwrap(), 0).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ pub const DEFAULT_VHOST_USER_STORE_PATH: &str = "/var/run/vhost-user";
|
|||||||
pub const DEFAULT_BLOCK_NVDIMM_MEM_OFFSET: u64 = 0;
|
pub const DEFAULT_BLOCK_NVDIMM_MEM_OFFSET: u64 = 0;
|
||||||
|
|
||||||
pub const DEFAULT_SHARED_FS_TYPE: &str = "virtio-fs";
|
pub const DEFAULT_SHARED_FS_TYPE: &str = "virtio-fs";
|
||||||
pub const DEFAULT_VIRTIO_FS_CACHE_MODE: &str = "none";
|
pub const DEFAULT_VIRTIO_FS_CACHE_MODE: &str = "never";
|
||||||
pub const DEFAULT_VIRTIO_FS_DAX_SIZE_MB: u32 = 1024;
|
pub const DEFAULT_VIRTIO_FS_DAX_SIZE_MB: u32 = 1024;
|
||||||
pub const DEFAULT_SHARED_9PFS_SIZE_MB: u32 = 128 * 1024;
|
pub const DEFAULT_SHARED_9PFS_SIZE_MB: u32 = 128 * 1024;
|
||||||
pub const MIN_SHARED_9PFS_SIZE_MB: u32 = 4 * 1024;
|
pub const MIN_SHARED_9PFS_SIZE_MB: u32 = 4 * 1024;
|
||||||
|
|||||||
@@ -864,7 +864,7 @@ impl SharedFsInfo {
|
|||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
let l = ["none", "auto", "always"];
|
let l = ["never", "auto", "always"];
|
||||||
|
|
||||||
if !l.contains(&self.virtio_fs_cache.as_str()) {
|
if !l.contains(&self.virtio_fs_cache.as_str()) {
|
||||||
return Err(eother!(
|
return Err(eother!(
|
||||||
|
|||||||
19
src/runtime-rs/Cargo.lock
generated
19
src/runtime-rs/Cargo.lock
generated
@@ -249,6 +249,12 @@ dependencies = [
|
|||||||
"rustc-demangle",
|
"rustc-demangle",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "base64"
|
||||||
|
version = "0.13.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bitflags"
|
name = "bitflags"
|
||||||
version = "1.3.2"
|
version = "1.3.2"
|
||||||
@@ -1352,6 +1358,8 @@ dependencies = [
|
|||||||
name = "kata-types"
|
name = "kata-types"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"anyhow",
|
||||||
|
"base64",
|
||||||
"bitmask-enum",
|
"bitmask-enum",
|
||||||
"byte-unit",
|
"byte-unit",
|
||||||
"glob",
|
"glob",
|
||||||
@@ -2288,6 +2296,7 @@ dependencies = [
|
|||||||
"rtnetlink",
|
"rtnetlink",
|
||||||
"scopeguard",
|
"scopeguard",
|
||||||
"serde",
|
"serde",
|
||||||
|
"serde_json",
|
||||||
"slog",
|
"slog",
|
||||||
"slog-scope",
|
"slog-scope",
|
||||||
"test-utils",
|
"test-utils",
|
||||||
@@ -2526,6 +2535,16 @@ dependencies = [
|
|||||||
"unix_socket2",
|
"unix_socket2",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "shim-ctl"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"common",
|
||||||
|
"logging",
|
||||||
|
"runtimes",
|
||||||
|
"tokio",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "signal-hook-registry"
|
name = "signal-hook-registry"
|
||||||
version = "1.4.0"
|
version = "1.4.0"
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
[workspace]
|
[workspace]
|
||||||
members = [
|
members = [
|
||||||
"crates/shim",
|
"crates/shim",
|
||||||
|
"crates/shim-ctl",
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -120,6 +120,8 @@ See the
|
|||||||
See the
|
See the
|
||||||
[debugging section of the developer guide](../../docs/Developer-Guide.md#troubleshoot-kata-containers).
|
[debugging section of the developer guide](../../docs/Developer-Guide.md#troubleshoot-kata-containers).
|
||||||
|
|
||||||
|
An [experimental alternative binary](crates/shim-ctl/README.md) is available that removes containerd dependencies and makes it easier to run the shim proper outside of the runtime's usual deployment environment (i.e. on a developer machine).
|
||||||
|
|
||||||
## Limitations
|
## Limitations
|
||||||
|
|
||||||
For Kata Containers limitations, see the
|
For Kata Containers limitations, see the
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ pub mod hypervisor_persist;
|
|||||||
pub use device::*;
|
pub use device::*;
|
||||||
pub mod dragonball;
|
pub mod dragonball;
|
||||||
mod kernel_param;
|
mod kernel_param;
|
||||||
|
pub mod qemu;
|
||||||
pub use kernel_param::Param;
|
pub use kernel_param::Param;
|
||||||
mod utils;
|
mod utils;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
@@ -28,6 +29,8 @@ const VM_ROOTFS_DRIVER_BLK: &str = "virtio-blk";
|
|||||||
const VM_ROOTFS_DRIVER_PMEM: &str = "virtio-pmem";
|
const VM_ROOTFS_DRIVER_PMEM: &str = "virtio-pmem";
|
||||||
|
|
||||||
pub const HYPERVISOR_DRAGONBALL: &str = "dragonball";
|
pub const HYPERVISOR_DRAGONBALL: &str = "dragonball";
|
||||||
|
pub const HYPERVISOR_QEMU: &str = "qemu";
|
||||||
|
|
||||||
#[derive(PartialEq)]
|
#[derive(PartialEq)]
|
||||||
pub(crate) enum VmmState {
|
pub(crate) enum VmmState {
|
||||||
NotReady,
|
NotReady,
|
||||||
|
|||||||
142
src/runtime-rs/crates/hypervisor/src/qemu/inner.rs
Normal file
142
src/runtime-rs/crates/hypervisor/src/qemu/inner.rs
Normal file
@@ -0,0 +1,142 @@
|
|||||||
|
// Copyright (c) 2022 Red Hat
|
||||||
|
//
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
use anyhow::Result;
|
||||||
|
|
||||||
|
use crate::{HypervisorConfig, VcpuThreadIds};
|
||||||
|
use kata_types::capabilities::{Capabilities, CapabilityBits};
|
||||||
|
|
||||||
|
const VSOCK_SCHEME: &str = "vsock";
|
||||||
|
const VSOCK_AGENT_CID: u32 = 3;
|
||||||
|
const VSOCK_AGENT_PORT: u32 = 1024;
|
||||||
|
|
||||||
|
unsafe impl Send for QemuInner {}
|
||||||
|
unsafe impl Sync for QemuInner {}
|
||||||
|
|
||||||
|
pub struct QemuInner {
|
||||||
|
config: HypervisorConfig,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl QemuInner {
|
||||||
|
pub fn new() -> QemuInner {
|
||||||
|
QemuInner {
|
||||||
|
config: Default::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) async fn prepare_vm(&mut self, _id: &str, _netns: Option<String>) -> Result<()> {
|
||||||
|
info!(sl!(), "Preparing QEMU VM");
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) async fn start_vm(&mut self, _timeout: i32) -> Result<()> {
|
||||||
|
info!(sl!(), "Starting QEMU VM");
|
||||||
|
|
||||||
|
let mut command = std::process::Command::new(&self.config.path);
|
||||||
|
|
||||||
|
command
|
||||||
|
.arg("-kernel")
|
||||||
|
.arg(&self.config.boot_info.kernel)
|
||||||
|
.arg("-m")
|
||||||
|
.arg(format!("{}M", &self.config.memory_info.default_memory))
|
||||||
|
.arg("-initrd")
|
||||||
|
.arg(&self.config.boot_info.initrd)
|
||||||
|
.arg("-vga")
|
||||||
|
.arg("none")
|
||||||
|
.arg("-nodefaults")
|
||||||
|
.arg("-nographic");
|
||||||
|
|
||||||
|
command.spawn()?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn stop_vm(&mut self) -> Result<()> {
|
||||||
|
info!(sl!(), "Stopping QEMU VM");
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn pause_vm(&self) -> Result<()> {
|
||||||
|
info!(sl!(), "Pausing QEMU VM");
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn resume_vm(&self) -> Result<()> {
|
||||||
|
info!(sl!(), "Resuming QEMU VM");
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) async fn save_vm(&self) -> Result<()> {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// TODO: using a single hardcoded CID is clearly not adequate in the long
|
||||||
|
/// run. Use the recently added VsockConfig infrastructure to fix this.
|
||||||
|
pub(crate) async fn get_agent_socket(&self) -> Result<String> {
|
||||||
|
info!(sl!(), "QemuInner::get_agent_socket()");
|
||||||
|
Ok(format!(
|
||||||
|
"{}://{}:{}",
|
||||||
|
VSOCK_SCHEME, VSOCK_AGENT_CID, VSOCK_AGENT_PORT
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) async fn disconnect(&mut self) {
|
||||||
|
info!(sl!(), "QemuInner::disconnect()");
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) async fn get_thread_ids(&self) -> Result<VcpuThreadIds> {
|
||||||
|
info!(sl!(), "QemuInner::get_thread_ids()");
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) async fn cleanup(&self) -> Result<()> {
|
||||||
|
info!(sl!(), "QemuInner::cleanup()");
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) async fn get_pids(&self) -> Result<Vec<u32>> {
|
||||||
|
info!(sl!(), "QemuInner::get_pids()");
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) async fn check(&self) -> Result<()> {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) async fn get_jailer_root(&self) -> Result<String> {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) async fn capabilities(&self) -> Result<Capabilities> {
|
||||||
|
let mut caps = Capabilities::default();
|
||||||
|
caps.set(CapabilityBits::FsSharingSupport);
|
||||||
|
Ok(caps)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn set_hypervisor_config(&mut self, config: HypervisorConfig) {
|
||||||
|
self.config = config;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn hypervisor_config(&self) -> HypervisorConfig {
|
||||||
|
info!(sl!(), "QemuInner::hypervisor_config()");
|
||||||
|
self.config.clone()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
use crate::device::Device;
|
||||||
|
|
||||||
|
// device manager part of Hypervisor
|
||||||
|
impl QemuInner {
|
||||||
|
pub(crate) async fn add_device(&mut self, device: Device) -> Result<()> {
|
||||||
|
info!(sl!(), "QemuInner::add_device() {}", device);
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) async fn remove_device(&mut self, device: Device) -> Result<()> {
|
||||||
|
info!(sl!(), "QemuInner::remove_device() {} ", device);
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
}
|
||||||
134
src/runtime-rs/crates/hypervisor/src/qemu/mod.rs
Normal file
134
src/runtime-rs/crates/hypervisor/src/qemu/mod.rs
Normal file
@@ -0,0 +1,134 @@
|
|||||||
|
// Copyright (c) 2022 Red Hat
|
||||||
|
//
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
mod inner;
|
||||||
|
|
||||||
|
use crate::device::Device;
|
||||||
|
use crate::hypervisor_persist::HypervisorState;
|
||||||
|
use crate::Hypervisor;
|
||||||
|
use crate::{HypervisorConfig, VcpuThreadIds};
|
||||||
|
use inner::QemuInner;
|
||||||
|
use kata_types::capabilities::Capabilities;
|
||||||
|
|
||||||
|
use anyhow::Result;
|
||||||
|
use async_trait::async_trait;
|
||||||
|
|
||||||
|
use std::sync::Arc;
|
||||||
|
use tokio::sync::RwLock;
|
||||||
|
|
||||||
|
pub struct Qemu {
|
||||||
|
inner: Arc<RwLock<QemuInner>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for Qemu {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Qemu {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
inner: Arc::new(RwLock::new(QemuInner::new())),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn set_hypervisor_config(&mut self, config: HypervisorConfig) {
|
||||||
|
let mut inner = self.inner.write().await;
|
||||||
|
inner.set_hypervisor_config(config)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Hypervisor for Qemu {
|
||||||
|
async fn prepare_vm(&self, id: &str, netns: Option<String>) -> Result<()> {
|
||||||
|
let mut inner = self.inner.write().await;
|
||||||
|
inner.prepare_vm(id, netns).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn start_vm(&self, timeout: i32) -> Result<()> {
|
||||||
|
let mut inner = self.inner.write().await;
|
||||||
|
inner.start_vm(timeout).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn stop_vm(&self) -> Result<()> {
|
||||||
|
let mut inner = self.inner.write().await;
|
||||||
|
inner.stop_vm()
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn pause_vm(&self) -> Result<()> {
|
||||||
|
let inner = self.inner.read().await;
|
||||||
|
inner.pause_vm()
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn resume_vm(&self) -> Result<()> {
|
||||||
|
let inner = self.inner.read().await;
|
||||||
|
inner.resume_vm()
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn save_vm(&self) -> Result<()> {
|
||||||
|
let inner = self.inner.read().await;
|
||||||
|
inner.save_vm().await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn add_device(&self, device: Device) -> Result<()> {
|
||||||
|
let mut inner = self.inner.write().await;
|
||||||
|
inner.add_device(device).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn remove_device(&self, device: Device) -> Result<()> {
|
||||||
|
let mut inner = self.inner.write().await;
|
||||||
|
inner.remove_device(device).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn get_agent_socket(&self) -> Result<String> {
|
||||||
|
let inner = self.inner.read().await;
|
||||||
|
inner.get_agent_socket().await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn disconnect(&self) {
|
||||||
|
let mut inner = self.inner.write().await;
|
||||||
|
inner.disconnect().await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn hypervisor_config(&self) -> HypervisorConfig {
|
||||||
|
let inner = self.inner.read().await;
|
||||||
|
inner.hypervisor_config()
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn get_thread_ids(&self) -> Result<VcpuThreadIds> {
|
||||||
|
let inner = self.inner.read().await;
|
||||||
|
inner.get_thread_ids().await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn cleanup(&self) -> Result<()> {
|
||||||
|
let inner = self.inner.read().await;
|
||||||
|
inner.cleanup().await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn get_pids(&self) -> Result<Vec<u32>> {
|
||||||
|
let inner = self.inner.read().await;
|
||||||
|
inner.get_pids().await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn check(&self) -> Result<()> {
|
||||||
|
let inner = self.inner.read().await;
|
||||||
|
inner.check().await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn get_jailer_root(&self) -> Result<String> {
|
||||||
|
let inner = self.inner.read().await;
|
||||||
|
inner.get_jailer_root().await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn save_state(&self) -> Result<HypervisorState> {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn capabilities(&self) -> Result<Capabilities> {
|
||||||
|
let inner = self.inner.read().await;
|
||||||
|
inner.capabilities().await
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -11,14 +11,15 @@ use share_virtio_fs_inline::ShareVirtioFsInline;
|
|||||||
mod share_virtio_fs_standalone;
|
mod share_virtio_fs_standalone;
|
||||||
use share_virtio_fs_standalone::ShareVirtioFsStandalone;
|
use share_virtio_fs_standalone::ShareVirtioFsStandalone;
|
||||||
mod utils;
|
mod utils;
|
||||||
|
use tokio::sync::Mutex;
|
||||||
pub use utils::{do_get_guest_path, do_get_guest_share_path, get_host_rw_shared_path};
|
pub use utils::{do_get_guest_path, do_get_guest_share_path, get_host_rw_shared_path};
|
||||||
mod virtio_fs_share_mount;
|
mod virtio_fs_share_mount;
|
||||||
use virtio_fs_share_mount::VirtiofsShareMount;
|
use virtio_fs_share_mount::VirtiofsShareMount;
|
||||||
|
|
||||||
use std::sync::Arc;
|
use std::{collections::HashMap, fmt::Debug, path::PathBuf, sync::Arc};
|
||||||
|
|
||||||
use agent::Storage;
|
use agent::Storage;
|
||||||
use anyhow::{anyhow, Context, Result};
|
use anyhow::{anyhow, Context, Ok, Result};
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use hypervisor::Hypervisor;
|
use hypervisor::Hypervisor;
|
||||||
use kata_types::config::hypervisor::SharedFsInfo;
|
use kata_types::config::hypervisor::SharedFsInfo;
|
||||||
@@ -43,8 +44,10 @@ pub trait ShareFs: Send + Sync {
|
|||||||
async fn setup_device_before_start_vm(&self, h: &dyn Hypervisor) -> Result<()>;
|
async fn setup_device_before_start_vm(&self, h: &dyn Hypervisor) -> Result<()>;
|
||||||
async fn setup_device_after_start_vm(&self, h: &dyn Hypervisor) -> Result<()>;
|
async fn setup_device_after_start_vm(&self, h: &dyn Hypervisor) -> Result<()>;
|
||||||
async fn get_storages(&self) -> Result<Vec<Storage>>;
|
async fn get_storages(&self) -> Result<Vec<Storage>>;
|
||||||
|
fn mounted_info_set(&self) -> Arc<Mutex<HashMap<String, MountedInfo>>>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
pub struct ShareFsRootfsConfig {
|
pub struct ShareFsRootfsConfig {
|
||||||
// TODO: for nydus v5/v6 need to update ShareFsMount
|
// TODO: for nydus v5/v6 need to update ShareFsMount
|
||||||
pub cid: String,
|
pub cid: String,
|
||||||
@@ -54,6 +57,7 @@ pub struct ShareFsRootfsConfig {
|
|||||||
pub is_rafs: bool,
|
pub is_rafs: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
pub struct ShareFsVolumeConfig {
|
pub struct ShareFsVolumeConfig {
|
||||||
pub cid: String,
|
pub cid: String,
|
||||||
pub source: String,
|
pub source: String,
|
||||||
@@ -69,10 +73,61 @@ pub struct ShareFsMountResult {
|
|||||||
pub storages: Vec<agent::Storage>,
|
pub storages: Vec<agent::Storage>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Save mounted info for sandbox-level shared files.
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct MountedInfo {
|
||||||
|
// Guest path
|
||||||
|
pub guest_path: PathBuf,
|
||||||
|
// Ref count of containers that uses this volume with read only permission
|
||||||
|
pub ro_ref_count: usize,
|
||||||
|
// Ref count of containers that uses this volume with read write permission
|
||||||
|
pub rw_ref_count: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MountedInfo {
|
||||||
|
pub fn new(guest_path: PathBuf, readonly: bool) -> Self {
|
||||||
|
Self {
|
||||||
|
guest_path,
|
||||||
|
ro_ref_count: if readonly { 1 } else { 0 },
|
||||||
|
rw_ref_count: if readonly { 0 } else { 1 },
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if the mount has read only permission
|
||||||
|
pub fn readonly(&self) -> bool {
|
||||||
|
self.rw_ref_count == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Ref count for all permissions
|
||||||
|
pub fn ref_count(&self) -> usize {
|
||||||
|
self.ro_ref_count + self.rw_ref_count
|
||||||
|
}
|
||||||
|
|
||||||
|
// File/dir name in the form of "sandbox-<uuid>-<file/dir name>"
|
||||||
|
pub fn file_name(&self) -> Result<String> {
|
||||||
|
match self.guest_path.file_name() {
|
||||||
|
Some(file_name) => match file_name.to_str() {
|
||||||
|
Some(file_name) => Ok(file_name.to_owned()),
|
||||||
|
None => Err(anyhow!("failed to get string from {:?}", file_name)),
|
||||||
|
},
|
||||||
|
None => Err(anyhow!(
|
||||||
|
"failed to get file name from the guest_path {:?}",
|
||||||
|
self.guest_path
|
||||||
|
)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
pub trait ShareFsMount: Send + Sync {
|
pub trait ShareFsMount: Send + Sync {
|
||||||
async fn share_rootfs(&self, config: ShareFsRootfsConfig) -> Result<ShareFsMountResult>;
|
async fn share_rootfs(&self, config: ShareFsRootfsConfig) -> Result<ShareFsMountResult>;
|
||||||
async fn share_volume(&self, config: ShareFsVolumeConfig) -> Result<ShareFsMountResult>;
|
async fn share_volume(&self, config: ShareFsVolumeConfig) -> Result<ShareFsMountResult>;
|
||||||
|
/// Upgrade to readwrite permission
|
||||||
|
async fn upgrade_to_rw(&self, file_name: &str) -> Result<()>;
|
||||||
|
/// Downgrade to readonly permission
|
||||||
|
async fn downgrade_to_ro(&self, file_name: &str) -> Result<()>;
|
||||||
|
/// Umount the volume
|
||||||
|
async fn umount(&self, file_name: &str) -> Result<()>;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new(id: &str, config: &SharedFsInfo) -> Result<Arc<dyn ShareFs>> {
|
pub fn new(id: &str, config: &SharedFsInfo) -> Result<Arc<dyn ShareFs>> {
|
||||||
|
|||||||
@@ -4,11 +4,14 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
//
|
//
|
||||||
|
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use agent::Storage;
|
use agent::Storage;
|
||||||
use anyhow::{Context, Result};
|
use anyhow::{Context, Result};
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use hypervisor::Hypervisor;
|
use hypervisor::Hypervisor;
|
||||||
use kata_types::config::hypervisor::SharedFsInfo;
|
use kata_types::config::hypervisor::SharedFsInfo;
|
||||||
|
use tokio::sync::Mutex;
|
||||||
|
|
||||||
use super::{
|
use super::{
|
||||||
share_virtio_fs::{
|
share_virtio_fs::{
|
||||||
@@ -30,6 +33,7 @@ pub struct ShareVirtioFsInlineConfig {
|
|||||||
pub struct ShareVirtioFsInline {
|
pub struct ShareVirtioFsInline {
|
||||||
config: ShareVirtioFsInlineConfig,
|
config: ShareVirtioFsInlineConfig,
|
||||||
share_fs_mount: Arc<dyn ShareFsMount>,
|
share_fs_mount: Arc<dyn ShareFsMount>,
|
||||||
|
mounted_info_set: Arc<Mutex<HashMap<String, MountedInfo>>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ShareVirtioFsInline {
|
impl ShareVirtioFsInline {
|
||||||
@@ -37,6 +41,7 @@ impl ShareVirtioFsInline {
|
|||||||
Ok(Self {
|
Ok(Self {
|
||||||
config: ShareVirtioFsInlineConfig { id: id.to_string() },
|
config: ShareVirtioFsInlineConfig { id: id.to_string() },
|
||||||
share_fs_mount: Arc::new(VirtiofsShareMount::new(id)),
|
share_fs_mount: Arc::new(VirtiofsShareMount::new(id)),
|
||||||
|
mounted_info_set: Arc::new(Mutex::new(HashMap::new())),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -77,4 +82,8 @@ impl ShareFs for ShareVirtioFsInline {
|
|||||||
storages.push(shared_volume);
|
storages.push(shared_volume);
|
||||||
Ok(storages)
|
Ok(storages)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn mounted_info_set(&self) -> Arc<Mutex<HashMap<String, MountedInfo>>> {
|
||||||
|
self.mounted_info_set.clone()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,8 +4,12 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
//
|
//
|
||||||
|
|
||||||
use std::{process::Stdio, sync::Arc};
|
use std::{collections::HashMap, process::Stdio, sync::Arc};
|
||||||
|
|
||||||
|
use crate::share_fs::share_virtio_fs::{
|
||||||
|
prepare_virtiofs, FS_TYPE_VIRTIO_FS, KATA_VIRTIO_FS_DEV_TYPE, MOUNT_GUEST_TAG,
|
||||||
|
};
|
||||||
|
use crate::share_fs::{KATA_GUEST_SHARE_DIR, VIRTIO_FS};
|
||||||
use agent::Storage;
|
use agent::Storage;
|
||||||
use anyhow::{anyhow, Context, Result};
|
use anyhow::{anyhow, Context, Result};
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
@@ -16,19 +20,18 @@ use tokio::{
|
|||||||
process::{Child, Command},
|
process::{Child, Command},
|
||||||
sync::{
|
sync::{
|
||||||
mpsc::{channel, Receiver, Sender},
|
mpsc::{channel, Receiver, Sender},
|
||||||
RwLock,
|
Mutex, RwLock,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
use super::{
|
use super::{
|
||||||
share_virtio_fs::generate_sock_path, utils::ensure_dir_exist, utils::get_host_ro_shared_path,
|
share_virtio_fs::generate_sock_path, utils::ensure_dir_exist, utils::get_host_ro_shared_path,
|
||||||
virtio_fs_share_mount::VirtiofsShareMount, ShareFs, ShareFsMount,
|
virtio_fs_share_mount::VirtiofsShareMount, MountedInfo, ShareFs, ShareFsMount,
|
||||||
};
|
};
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct ShareVirtioFsStandaloneConfig {
|
pub struct ShareVirtioFsStandaloneConfig {
|
||||||
id: String,
|
id: String,
|
||||||
jail_root: String,
|
|
||||||
|
|
||||||
// virtio_fs_daemon is the virtio-fs vhost-user daemon path
|
// virtio_fs_daemon is the virtio-fs vhost-user daemon path
|
||||||
pub virtio_fs_daemon: String,
|
pub virtio_fs_daemon: String,
|
||||||
@@ -38,14 +41,16 @@ pub struct ShareVirtioFsStandaloneConfig {
|
|||||||
pub virtio_fs_extra_args: Vec<String>,
|
pub virtio_fs_extra_args: Vec<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default, Debug)]
|
||||||
struct ShareVirtioFsStandaloneInner {
|
struct ShareVirtioFsStandaloneInner {
|
||||||
pid: Option<u32>,
|
pid: Option<u32>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) struct ShareVirtioFsStandalone {
|
pub(crate) struct ShareVirtioFsStandalone {
|
||||||
inner: Arc<RwLock<ShareVirtioFsStandaloneInner>>,
|
inner: Arc<RwLock<ShareVirtioFsStandaloneInner>>,
|
||||||
config: ShareVirtioFsStandaloneConfig,
|
config: ShareVirtioFsStandaloneConfig,
|
||||||
share_fs_mount: Arc<dyn ShareFsMount>,
|
share_fs_mount: Arc<dyn ShareFsMount>,
|
||||||
|
mounted_info_set: Arc<Mutex<HashMap<String, MountedInfo>>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ShareVirtioFsStandalone {
|
impl ShareVirtioFsStandalone {
|
||||||
@@ -54,26 +59,33 @@ impl ShareVirtioFsStandalone {
|
|||||||
inner: Arc::new(RwLock::new(ShareVirtioFsStandaloneInner::default())),
|
inner: Arc::new(RwLock::new(ShareVirtioFsStandaloneInner::default())),
|
||||||
config: ShareVirtioFsStandaloneConfig {
|
config: ShareVirtioFsStandaloneConfig {
|
||||||
id: id.to_string(),
|
id: id.to_string(),
|
||||||
jail_root: "".to_string(),
|
|
||||||
virtio_fs_daemon: config.virtio_fs_daemon.clone(),
|
virtio_fs_daemon: config.virtio_fs_daemon.clone(),
|
||||||
virtio_fs_cache: config.virtio_fs_cache.clone(),
|
virtio_fs_cache: config.virtio_fs_cache.clone(),
|
||||||
virtio_fs_extra_args: config.virtio_fs_extra_args.clone(),
|
virtio_fs_extra_args: config.virtio_fs_extra_args.clone(),
|
||||||
},
|
},
|
||||||
share_fs_mount: Arc::new(VirtiofsShareMount::new(id)),
|
share_fs_mount: Arc::new(VirtiofsShareMount::new(id)),
|
||||||
|
mounted_info_set: Arc::new(Mutex::new(HashMap::new())),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn virtiofsd_args(&self, sock_path: &str) -> Result<Vec<String>> {
|
fn virtiofsd_args(&self, sock_path: &str) -> Result<Vec<String>> {
|
||||||
let source_path = get_host_ro_shared_path(&self.config.id);
|
let source_path = get_host_ro_shared_path(&self.config.id);
|
||||||
ensure_dir_exist(&source_path)?;
|
ensure_dir_exist(&source_path)?;
|
||||||
|
let shared_dir = source_path
|
||||||
|
.to_str()
|
||||||
|
.ok_or_else(|| anyhow!("convert source path {:?} to str failed", source_path))?;
|
||||||
|
|
||||||
let mut args: Vec<String> = vec![
|
let mut args: Vec<String> = vec![
|
||||||
String::from("-f"),
|
String::from("--socket-path"),
|
||||||
format!("--socket-path={}", sock_path),
|
String::from(sock_path),
|
||||||
String::from("-o"),
|
String::from("--shared-dir"),
|
||||||
format!("source={}", source_path.to_str().unwrap()),
|
String::from(shared_dir),
|
||||||
String::from("-o"),
|
String::from("--cache"),
|
||||||
format!("cache={}", self.config.virtio_fs_cache),
|
self.config.virtio_fs_cache.clone(),
|
||||||
|
String::from("--sandbox"),
|
||||||
|
String::from("none"),
|
||||||
|
String::from("--seccomp"),
|
||||||
|
String::from("none"),
|
||||||
];
|
];
|
||||||
|
|
||||||
if !self.config.virtio_fs_extra_args.is_empty() {
|
if !self.config.virtio_fs_extra_args.is_empty() {
|
||||||
@@ -84,8 +96,8 @@ impl ShareVirtioFsStandalone {
|
|||||||
Ok(args)
|
Ok(args)
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn setup_virtiofsd(&self) -> Result<()> {
|
async fn setup_virtiofsd(&self, h: &dyn Hypervisor) -> Result<()> {
|
||||||
let sock_path = generate_sock_path(&self.config.jail_root);
|
let sock_path = generate_sock_path(&h.get_jailer_root().await?);
|
||||||
let args = self.virtiofsd_args(&sock_path).context("virtiofsd args")?;
|
let args = self.virtiofsd_args(&sock_path).context("virtiofsd args")?;
|
||||||
|
|
||||||
let mut cmd = Command::new(&self.config.virtio_fs_daemon);
|
let mut cmd = Command::new(&self.config.virtio_fs_daemon);
|
||||||
@@ -160,8 +172,11 @@ impl ShareFs for ShareVirtioFsStandalone {
|
|||||||
self.share_fs_mount.clone()
|
self.share_fs_mount.clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn setup_device_before_start_vm(&self, _h: &dyn Hypervisor) -> Result<()> {
|
async fn setup_device_before_start_vm(&self, h: &dyn Hypervisor) -> Result<()> {
|
||||||
self.setup_virtiofsd().await.context("setup virtiofsd")?;
|
prepare_virtiofs(h, VIRTIO_FS, &self.config.id, &h.get_jailer_root().await?)
|
||||||
|
.await
|
||||||
|
.context("prepare virtiofs")?;
|
||||||
|
self.setup_virtiofsd(h).await.context("setup virtiofsd")?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -170,6 +185,23 @@ impl ShareFs for ShareVirtioFsStandalone {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async fn get_storages(&self) -> Result<Vec<Storage>> {
|
async fn get_storages(&self) -> Result<Vec<Storage>> {
|
||||||
Ok(vec![])
|
let mut storages: Vec<Storage> = Vec::new();
|
||||||
|
|
||||||
|
let shared_volume: Storage = Storage {
|
||||||
|
driver: String::from(KATA_VIRTIO_FS_DEV_TYPE),
|
||||||
|
driver_options: Vec::new(),
|
||||||
|
source: String::from(MOUNT_GUEST_TAG),
|
||||||
|
fs_type: String::from(FS_TYPE_VIRTIO_FS),
|
||||||
|
fs_group: None,
|
||||||
|
options: vec![String::from("nodev")],
|
||||||
|
mount_point: String::from(KATA_GUEST_SHARE_DIR),
|
||||||
|
};
|
||||||
|
|
||||||
|
storages.push(shared_volume);
|
||||||
|
Ok(storages)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn mounted_info_set(&self) -> Arc<Mutex<HashMap<String, MountedInfo>>> {
|
||||||
|
self.mounted_info_set.clone()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ pub(crate) fn ensure_dir_exist(path: &Path) -> Result<()> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Bind mount the original path to the runtime directory.
|
||||||
pub(crate) fn share_to_guest(
|
pub(crate) fn share_to_guest(
|
||||||
// absolute path for source
|
// absolute path for source
|
||||||
source: &str,
|
source: &str,
|
||||||
@@ -37,7 +38,7 @@ pub(crate) fn share_to_guest(
|
|||||||
// to remount the read only dir mount point directly.
|
// to remount the read only dir mount point directly.
|
||||||
if readonly {
|
if readonly {
|
||||||
let dst = do_get_host_path(target, sid, cid, is_volume, true);
|
let dst = do_get_host_path(target, sid, cid, is_volume, true);
|
||||||
mount::bind_remount_read_only(&dst).context("bind remount readonly")?;
|
mount::bind_remount(&dst, readonly).context("bind remount readonly")?;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(do_get_guest_path(target, cid, is_volume, is_rafs))
|
Ok(do_get_guest_path(target, cid, is_volume, is_rafs))
|
||||||
|
|||||||
@@ -7,6 +7,7 @@
|
|||||||
use agent::Storage;
|
use agent::Storage;
|
||||||
use anyhow::{anyhow, Context, Result};
|
use anyhow::{anyhow, Context, Result};
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
|
use kata_sys_util::mount::{bind_remount, umount_timeout};
|
||||||
use kata_types::k8s::is_watchable_mount;
|
use kata_types::k8s::is_watchable_mount;
|
||||||
use kata_types::mount;
|
use kata_types::mount;
|
||||||
use nix::sys::stat::stat;
|
use nix::sys::stat::stat;
|
||||||
@@ -19,10 +20,12 @@ const WATCHABLE_BIND_DEV_TYPE: &str = "watchable-bind";
|
|||||||
const EPHEMERAL_PATH: &str = "/run/kata-containers/sandbox/ephemeral";
|
const EPHEMERAL_PATH: &str = "/run/kata-containers/sandbox/ephemeral";
|
||||||
|
|
||||||
use super::{
|
use super::{
|
||||||
utils, ShareFsMount, ShareFsMountResult, ShareFsRootfsConfig, ShareFsVolumeConfig,
|
utils::{self, do_get_host_path},
|
||||||
|
ShareFsMount, ShareFsMountResult, ShareFsRootfsConfig, ShareFsVolumeConfig,
|
||||||
KATA_GUEST_SHARE_DIR, PASSTHROUGH_FS_DIR,
|
KATA_GUEST_SHARE_DIR, PASSTHROUGH_FS_DIR,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
pub struct VirtiofsShareMount {
|
pub struct VirtiofsShareMount {
|
||||||
id: String,
|
id: String,
|
||||||
}
|
}
|
||||||
@@ -166,4 +169,35 @@ impl ShareFsMount for VirtiofsShareMount {
|
|||||||
storages: vec![],
|
storages: vec![],
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn upgrade_to_rw(&self, file_name: &str) -> Result<()> {
|
||||||
|
// Remount readonly directory with readwrite permission
|
||||||
|
let host_dest = do_get_host_path(file_name, &self.id, "", true, true);
|
||||||
|
bind_remount(&host_dest, false)
|
||||||
|
.context("remount readonly directory with readwrite permission")?;
|
||||||
|
// Remount readwrite directory with readwrite permission
|
||||||
|
let host_dest = do_get_host_path(file_name, &self.id, "", true, false);
|
||||||
|
bind_remount(&host_dest, false)
|
||||||
|
.context("remount readwrite directory with readwrite permission")?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn downgrade_to_ro(&self, file_name: &str) -> Result<()> {
|
||||||
|
// Remount readwrite directory with readonly permission
|
||||||
|
let host_dest = do_get_host_path(file_name, &self.id, "", true, false);
|
||||||
|
bind_remount(&host_dest, true)
|
||||||
|
.context("remount readwrite directory with readonly permission")?;
|
||||||
|
// Remount readonly directory with readonly permission
|
||||||
|
let host_dest = do_get_host_path(file_name, &self.id, "", true, true);
|
||||||
|
bind_remount(&host_dest, true)
|
||||||
|
.context("remount readonly directory with readonly permission")?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn umount(&self, file_name: &str) -> Result<()> {
|
||||||
|
let host_dest = do_get_host_path(file_name, &self.id, "", true, true);
|
||||||
|
umount_timeout(&host_dest, 0).context("Umount readwrite host dest")?;
|
||||||
|
// Umount event will be propagated to ro directory
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,9 +5,11 @@
|
|||||||
//
|
//
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
|
use async_trait::async_trait;
|
||||||
|
|
||||||
use super::Volume;
|
use super::Volume;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
pub(crate) struct BlockVolume {}
|
pub(crate) struct BlockVolume {}
|
||||||
|
|
||||||
/// BlockVolume: block device volume
|
/// BlockVolume: block device volume
|
||||||
@@ -17,6 +19,7 @@ impl BlockVolume {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
impl Volume for BlockVolume {
|
impl Volume for BlockVolume {
|
||||||
fn get_volume_mount(&self) -> anyhow::Result<Vec<oci::Mount>> {
|
fn get_volume_mount(&self) -> anyhow::Result<Vec<oci::Mount>> {
|
||||||
todo!()
|
todo!()
|
||||||
@@ -26,8 +29,9 @@ impl Volume for BlockVolume {
|
|||||||
todo!()
|
todo!()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn cleanup(&self) -> Result<()> {
|
async fn cleanup(&self) -> Result<()> {
|
||||||
todo!()
|
warn!(sl!(), "Cleaning up BlockVolume is still unimplemented.");
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -5,9 +5,11 @@
|
|||||||
//
|
//
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
|
use async_trait::async_trait;
|
||||||
|
|
||||||
use super::Volume;
|
use super::Volume;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
pub(crate) struct DefaultVolume {
|
pub(crate) struct DefaultVolume {
|
||||||
mount: oci::Mount,
|
mount: oci::Mount,
|
||||||
}
|
}
|
||||||
@@ -21,6 +23,7 @@ impl DefaultVolume {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
impl Volume for DefaultVolume {
|
impl Volume for DefaultVolume {
|
||||||
fn get_volume_mount(&self) -> anyhow::Result<Vec<oci::Mount>> {
|
fn get_volume_mount(&self) -> anyhow::Result<Vec<oci::Mount>> {
|
||||||
Ok(vec![self.mount.clone()])
|
Ok(vec![self.mount.clone()])
|
||||||
@@ -30,7 +33,8 @@ impl Volume for DefaultVolume {
|
|||||||
Ok(vec![])
|
Ok(vec![])
|
||||||
}
|
}
|
||||||
|
|
||||||
fn cleanup(&self) -> Result<()> {
|
async fn cleanup(&self) -> Result<()> {
|
||||||
todo!()
|
warn!(sl!(), "Cleaning up DefaultVolume is still unimplemented.");
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ mod block_volume;
|
|||||||
mod default_volume;
|
mod default_volume;
|
||||||
mod share_fs_volume;
|
mod share_fs_volume;
|
||||||
mod shm_volume;
|
mod shm_volume;
|
||||||
|
use async_trait::async_trait;
|
||||||
|
|
||||||
use std::{sync::Arc, vec::Vec};
|
use std::{sync::Arc, vec::Vec};
|
||||||
|
|
||||||
@@ -16,10 +17,11 @@ use tokio::sync::RwLock;
|
|||||||
|
|
||||||
use crate::share_fs::ShareFs;
|
use crate::share_fs::ShareFs;
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
pub trait Volume: Send + Sync {
|
pub trait Volume: Send + Sync {
|
||||||
fn get_volume_mount(&self) -> Result<Vec<oci::Mount>>;
|
fn get_volume_mount(&self) -> Result<Vec<oci::Mount>>;
|
||||||
fn get_storage(&self) -> Result<Vec<agent::Storage>>;
|
fn get_storage(&self) -> Result<Vec<agent::Storage>>;
|
||||||
fn cleanup(&self) -> Result<()>;
|
async fn cleanup(&self) -> Result<()>;
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
|
|||||||
@@ -4,12 +4,17 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
//
|
//
|
||||||
|
|
||||||
use std::{path::Path, sync::Arc};
|
use std::{
|
||||||
|
path::{Path, PathBuf},
|
||||||
|
str::FromStr,
|
||||||
|
sync::{Arc, Weak},
|
||||||
|
};
|
||||||
|
|
||||||
use anyhow::{anyhow, Context, Result};
|
use anyhow::{anyhow, Context, Result};
|
||||||
|
use async_trait::async_trait;
|
||||||
|
|
||||||
use super::Volume;
|
use super::Volume;
|
||||||
use crate::share_fs::{ShareFs, ShareFsVolumeConfig};
|
use crate::share_fs::{MountedInfo, ShareFs, ShareFsVolumeConfig};
|
||||||
use kata_types::mount;
|
use kata_types::mount;
|
||||||
|
|
||||||
// copy file to container's rootfs if filesystem sharing is not supported, otherwise
|
// copy file to container's rootfs if filesystem sharing is not supported, otherwise
|
||||||
@@ -19,6 +24,7 @@ use kata_types::mount;
|
|||||||
// device nodes to the guest.
|
// device nodes to the guest.
|
||||||
// skip the volumes whose source had already set to guest share dir.
|
// skip the volumes whose source had already set to guest share dir.
|
||||||
pub(crate) struct ShareFsVolume {
|
pub(crate) struct ShareFsVolume {
|
||||||
|
share_fs: Option<Weak<dyn ShareFs>>,
|
||||||
mounts: Vec<oci::Mount>,
|
mounts: Vec<oci::Mount>,
|
||||||
storages: Vec<agent::Storage>,
|
storages: Vec<agent::Storage>,
|
||||||
}
|
}
|
||||||
@@ -29,10 +35,12 @@ impl ShareFsVolume {
|
|||||||
m: &oci::Mount,
|
m: &oci::Mount,
|
||||||
cid: &str,
|
cid: &str,
|
||||||
) -> Result<Self> {
|
) -> Result<Self> {
|
||||||
|
// The file_name is in the format of "sandbox-{uuid}-{file_name}"
|
||||||
let file_name = Path::new(&m.source).file_name().unwrap().to_str().unwrap();
|
let file_name = Path::new(&m.source).file_name().unwrap().to_str().unwrap();
|
||||||
let file_name = generate_mount_path(cid, file_name);
|
let file_name = generate_mount_path("sandbox", file_name);
|
||||||
|
|
||||||
let mut volume = Self {
|
let mut volume = Self {
|
||||||
|
share_fs: share_fs.as_ref().map(Arc::downgrade),
|
||||||
mounts: vec![],
|
mounts: vec![],
|
||||||
storages: vec![],
|
storages: vec![],
|
||||||
};
|
};
|
||||||
@@ -59,20 +67,69 @@ impl ShareFsVolume {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
Some(share_fs) => {
|
Some(share_fs) => {
|
||||||
|
let readonly = m.options.iter().any(|opt| opt == "ro");
|
||||||
|
|
||||||
let share_fs_mount = share_fs.get_share_fs_mount();
|
let share_fs_mount = share_fs.get_share_fs_mount();
|
||||||
|
let mounted_info_set = share_fs.mounted_info_set();
|
||||||
|
let mut mounted_info_set = mounted_info_set.lock().await;
|
||||||
|
if let Some(mut mounted_info) = mounted_info_set.get(&m.source).cloned() {
|
||||||
|
// Mounted at least once
|
||||||
|
let guest_path = mounted_info
|
||||||
|
.guest_path
|
||||||
|
.clone()
|
||||||
|
.as_os_str()
|
||||||
|
.to_str()
|
||||||
|
.unwrap()
|
||||||
|
.to_owned();
|
||||||
|
if !readonly && mounted_info.readonly() {
|
||||||
|
// The current mount should be upgraded to readwrite permission
|
||||||
|
info!(
|
||||||
|
sl!(),
|
||||||
|
"The mount will be upgraded, mount = {:?}, cid = {}", m, cid
|
||||||
|
);
|
||||||
|
share_fs_mount
|
||||||
|
.upgrade_to_rw(
|
||||||
|
&mounted_info
|
||||||
|
.file_name()
|
||||||
|
.context("get name of mounted info")?,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.context("upgrade mount")?;
|
||||||
|
}
|
||||||
|
if readonly {
|
||||||
|
mounted_info.ro_ref_count += 1;
|
||||||
|
} else {
|
||||||
|
mounted_info.rw_ref_count += 1;
|
||||||
|
}
|
||||||
|
mounted_info_set.insert(m.source.clone(), mounted_info);
|
||||||
|
|
||||||
|
volume.mounts.push(oci::Mount {
|
||||||
|
destination: m.destination.clone(),
|
||||||
|
r#type: "bind".to_string(),
|
||||||
|
source: guest_path,
|
||||||
|
options: m.options.clone(),
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
// Not mounted ever
|
||||||
let mount_result = share_fs_mount
|
let mount_result = share_fs_mount
|
||||||
.share_volume(ShareFsVolumeConfig {
|
.share_volume(ShareFsVolumeConfig {
|
||||||
cid: cid.to_string(),
|
// The scope of shared volume is sandbox
|
||||||
|
cid: String::from(""),
|
||||||
source: m.source.clone(),
|
source: m.source.clone(),
|
||||||
target: file_name,
|
target: file_name.clone(),
|
||||||
readonly: m.options.iter().any(|o| *o == "ro"),
|
readonly,
|
||||||
mount_options: m.options.clone(),
|
mount_options: m.options.clone(),
|
||||||
mount: m.clone(),
|
mount: m.clone(),
|
||||||
is_rafs: false,
|
is_rafs: false,
|
||||||
})
|
})
|
||||||
.await
|
.await
|
||||||
.context("share fs volume")?;
|
.context("mount shared volume")?;
|
||||||
|
let mounted_info = MountedInfo::new(
|
||||||
|
PathBuf::from_str(&mount_result.guest_path)
|
||||||
|
.context("convert guest path")?,
|
||||||
|
readonly,
|
||||||
|
);
|
||||||
|
mounted_info_set.insert(m.source.clone(), mounted_info);
|
||||||
// set storages for the volume
|
// set storages for the volume
|
||||||
volume.storages = mount_result.storages;
|
volume.storages = mount_result.storages;
|
||||||
|
|
||||||
@@ -85,10 +142,12 @@ impl ShareFsVolume {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
Ok(volume)
|
Ok(volume)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
impl Volume for ShareFsVolume {
|
impl Volume for ShareFsVolume {
|
||||||
fn get_volume_mount(&self) -> anyhow::Result<Vec<oci::Mount>> {
|
fn get_volume_mount(&self) -> anyhow::Result<Vec<oci::Mount>> {
|
||||||
Ok(self.mounts.clone())
|
Ok(self.mounts.clone())
|
||||||
@@ -98,8 +157,75 @@ impl Volume for ShareFsVolume {
|
|||||||
Ok(self.storages.clone())
|
Ok(self.storages.clone())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn cleanup(&self) -> Result<()> {
|
async fn cleanup(&self) -> Result<()> {
|
||||||
todo!()
|
if self.share_fs.is_none() {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
let share_fs = match self.share_fs.as_ref().unwrap().upgrade() {
|
||||||
|
Some(share_fs) => share_fs,
|
||||||
|
None => return Err(anyhow!("The share_fs was released unexpectedly")),
|
||||||
|
};
|
||||||
|
|
||||||
|
let mounted_info_set = share_fs.mounted_info_set();
|
||||||
|
let mut mounted_info_set = mounted_info_set.lock().await;
|
||||||
|
for m in self.mounts.iter() {
|
||||||
|
let (host_source, mut mounted_info) = match mounted_info_set
|
||||||
|
.iter()
|
||||||
|
.find(|entry| entry.1.guest_path.as_os_str().to_str().unwrap() == m.source)
|
||||||
|
.map(|entry| (entry.0.to_owned(), entry.1.clone()))
|
||||||
|
{
|
||||||
|
Some(entry) => entry,
|
||||||
|
None => {
|
||||||
|
warn!(
|
||||||
|
sl!(),
|
||||||
|
"The mounted info for guest path {} not found", m.source
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let old_readonly = mounted_info.readonly();
|
||||||
|
|
||||||
|
if m.options.iter().any(|opt| *opt == "ro") {
|
||||||
|
mounted_info.ro_ref_count -= 1;
|
||||||
|
} else {
|
||||||
|
mounted_info.rw_ref_count -= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
debug!(
|
||||||
|
sl!(),
|
||||||
|
"Ref count for {} was updated to {} due to volume cleanup",
|
||||||
|
host_source,
|
||||||
|
mounted_info.ref_count()
|
||||||
|
);
|
||||||
|
let share_fs_mount = share_fs.get_share_fs_mount();
|
||||||
|
let file_name = mounted_info.file_name()?;
|
||||||
|
|
||||||
|
if mounted_info.ref_count() > 0 {
|
||||||
|
// Downgrade to readonly if no container needs readwrite permission
|
||||||
|
if !old_readonly && mounted_info.readonly() {
|
||||||
|
info!(sl!(), "Downgrade {} to readonly due to no container that needs readwrite permission", host_source);
|
||||||
|
share_fs_mount
|
||||||
|
.downgrade_to_ro(&file_name)
|
||||||
|
.await
|
||||||
|
.context("Downgrade volume")?;
|
||||||
|
}
|
||||||
|
mounted_info_set.insert(host_source.clone(), mounted_info);
|
||||||
|
} else {
|
||||||
|
info!(
|
||||||
|
sl!(),
|
||||||
|
"The path will be umounted due to no references, host_source = {}", host_source
|
||||||
|
);
|
||||||
|
mounted_info_set.remove(&host_source);
|
||||||
|
// Umount the volume
|
||||||
|
share_fs_mount
|
||||||
|
.umount(&file_name)
|
||||||
|
.await
|
||||||
|
.context("Umount volume")?
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -7,6 +7,7 @@
|
|||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
|
use async_trait::async_trait;
|
||||||
|
|
||||||
use super::Volume;
|
use super::Volume;
|
||||||
use crate::share_fs::DEFAULT_KATA_GUEST_SANDBOX_DIR;
|
use crate::share_fs::DEFAULT_KATA_GUEST_SANDBOX_DIR;
|
||||||
@@ -19,6 +20,7 @@ pub const DEFAULT_SHM_SIZE: u64 = 65536 * 1024;
|
|||||||
// KATA_EPHEMERAL_DEV_TYPE creates a tmpfs backed volume for sharing files between containers.
|
// KATA_EPHEMERAL_DEV_TYPE creates a tmpfs backed volume for sharing files between containers.
|
||||||
pub const KATA_EPHEMERAL_DEV_TYPE: &str = "ephemeral";
|
pub const KATA_EPHEMERAL_DEV_TYPE: &str = "ephemeral";
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
pub(crate) struct ShmVolume {
|
pub(crate) struct ShmVolume {
|
||||||
mount: oci::Mount,
|
mount: oci::Mount,
|
||||||
storage: Option<agent::Storage>,
|
storage: Option<agent::Storage>,
|
||||||
@@ -82,6 +84,7 @@ impl ShmVolume {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
impl Volume for ShmVolume {
|
impl Volume for ShmVolume {
|
||||||
fn get_volume_mount(&self) -> anyhow::Result<Vec<oci::Mount>> {
|
fn get_volume_mount(&self) -> anyhow::Result<Vec<oci::Mount>> {
|
||||||
Ok(vec![self.mount.clone()])
|
Ok(vec![self.mount.clone()])
|
||||||
@@ -96,8 +99,9 @@ impl Volume for ShmVolume {
|
|||||||
Ok(s)
|
Ok(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn cleanup(&self) -> Result<()> {
|
async fn cleanup(&self) -> Result<()> {
|
||||||
todo!()
|
warn!(sl!(), "Cleaning up ShmVolume is still unimplemented.");
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -4,6 +4,7 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
//
|
//
|
||||||
|
|
||||||
|
use std::collections::HashMap;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use agent::Agent;
|
use agent::Agent;
|
||||||
@@ -81,8 +82,8 @@ impl Container {
|
|||||||
let mut inner = self.inner.write().await;
|
let mut inner = self.inner.write().await;
|
||||||
let toml_config = self.resource_manager.config().await;
|
let toml_config = self.resource_manager.config().await;
|
||||||
let config = &self.config;
|
let config = &self.config;
|
||||||
amend_spec(&mut spec, toml_config.runtime.disable_guest_seccomp).context("amend spec")?;
|
|
||||||
let sandbox_pidns = is_pid_namespace_enabled(&spec);
|
let sandbox_pidns = is_pid_namespace_enabled(&spec);
|
||||||
|
amend_spec(&mut spec, toml_config.runtime.disable_guest_seccomp).context("amend spec")?;
|
||||||
|
|
||||||
// handler rootfs
|
// handler rootfs
|
||||||
let rootfs = self
|
let rootfs = self
|
||||||
@@ -143,13 +144,10 @@ impl Container {
|
|||||||
// create container
|
// create container
|
||||||
let r = agent::CreateContainerRequest {
|
let r = agent::CreateContainerRequest {
|
||||||
process_id: agent::ContainerProcessID::new(&config.container_id, ""),
|
process_id: agent::ContainerProcessID::new(&config.container_id, ""),
|
||||||
string_user: None,
|
|
||||||
devices: vec![],
|
|
||||||
storages,
|
storages,
|
||||||
oci: Some(spec),
|
oci: Some(spec),
|
||||||
guest_hooks: None,
|
|
||||||
sandbox_pidns,
|
sandbox_pidns,
|
||||||
rootfs_mounts: vec![],
|
..Default::default()
|
||||||
};
|
};
|
||||||
|
|
||||||
self.agent
|
self.agent
|
||||||
@@ -258,7 +256,7 @@ impl Container {
|
|||||||
signal: u32,
|
signal: u32,
|
||||||
all: bool,
|
all: bool,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let inner = self.inner.read().await;
|
let mut inner = self.inner.write().await;
|
||||||
inner.signal_process(container_process, signal, all).await
|
inner.signal_process(container_process, signal, all).await
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -396,6 +394,7 @@ fn amend_spec(spec: &mut oci::Spec, disable_guest_seccomp: bool) -> Result<()> {
|
|||||||
resource.block_io = None;
|
resource.block_io = None;
|
||||||
resource.hugepage_limits = Vec::new();
|
resource.hugepage_limits = Vec::new();
|
||||||
resource.network = None;
|
resource.network = None;
|
||||||
|
resource.rdma = HashMap::new();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Host pidns path does not make sense in kata. Let's just align it with
|
// Host pidns path does not make sense in kata. Let's just align it with
|
||||||
@@ -404,7 +403,10 @@ fn amend_spec(spec: &mut oci::Spec, disable_guest_seccomp: bool) -> Result<()> {
|
|||||||
for n in linux.namespaces.iter() {
|
for n in linux.namespaces.iter() {
|
||||||
match n.r#type.as_str() {
|
match n.r#type.as_str() {
|
||||||
oci::PIDNAMESPACE | oci::NETWORKNAMESPACE => continue,
|
oci::PIDNAMESPACE | oci::NETWORKNAMESPACE => continue,
|
||||||
_ => ns.push(n.clone()),
|
_ => ns.push(oci::LinuxNamespace {
|
||||||
|
r#type: n.r#type.clone(),
|
||||||
|
path: "".to_string(),
|
||||||
|
}),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -233,7 +233,7 @@ impl ContainerInner {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) async fn signal_process(
|
pub(crate) async fn signal_process(
|
||||||
&self,
|
&mut self,
|
||||||
process: &ContainerProcess,
|
process: &ContainerProcess,
|
||||||
signal: u32,
|
signal: u32,
|
||||||
all: bool,
|
all: bool,
|
||||||
@@ -247,6 +247,9 @@ impl ContainerInner {
|
|||||||
self.agent
|
self.agent
|
||||||
.signal_process(agent::SignalProcessRequest { process_id, signal })
|
.signal_process(agent::SignalProcessRequest { process_id, signal })
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
|
self.clean_volumes().await.context("clean volumes")?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -268,4 +271,23 @@ impl ContainerInner {
|
|||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn clean_volumes(&mut self) -> Result<()> {
|
||||||
|
let mut unhandled = Vec::new();
|
||||||
|
for v in self.volumes.iter() {
|
||||||
|
if let Err(err) = v.cleanup().await {
|
||||||
|
unhandled.push(Arc::clone(v));
|
||||||
|
warn!(
|
||||||
|
sl!(),
|
||||||
|
"Failed to clean volume {:?}, error = {:?}",
|
||||||
|
v.get_volume_mount(),
|
||||||
|
err
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !unhandled.is_empty() {
|
||||||
|
self.volumes = unhandled;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,7 +6,11 @@
|
|||||||
|
|
||||||
use std::{
|
use std::{
|
||||||
io,
|
io,
|
||||||
os::unix::{io::FromRawFd, net::UnixStream as StdUnixStream},
|
os::unix::{
|
||||||
|
io::{FromRawFd, RawFd},
|
||||||
|
net::UnixStream as StdUnixStream,
|
||||||
|
prelude::AsRawFd,
|
||||||
|
},
|
||||||
pin::Pin,
|
pin::Pin,
|
||||||
task::Context as TaskContext,
|
task::Context as TaskContext,
|
||||||
task::Poll,
|
task::Poll,
|
||||||
@@ -52,8 +56,21 @@ impl ShimIo {
|
|||||||
"new shim io stdin {:?} stdout {:?} stderr {:?}", stdin, stdout, stderr
|
"new shim io stdin {:?} stdout {:?} stderr {:?}", stdin, stdout, stderr
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let set_flag_with_blocking = |fd: RawFd| {
|
||||||
|
let flag = unsafe { libc::fcntl(fd, libc::F_GETFL) };
|
||||||
|
let ret = unsafe { libc::fcntl(fd, libc::F_SETFL, flag & !libc::O_NONBLOCK) };
|
||||||
|
if ret < 0 {
|
||||||
|
error!(sl!(), "failed to set fcntl for fd {} error {}", fd, ret);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
let stdin_fd: Option<Box<dyn AsyncRead + Send + Unpin>> = if let Some(stdin) = stdin {
|
let stdin_fd: Option<Box<dyn AsyncRead + Send + Unpin>> = if let Some(stdin) = stdin {
|
||||||
info!(sl!(), "open stdin {:?}", &stdin);
|
info!(sl!(), "open stdin {:?}", &stdin);
|
||||||
|
|
||||||
|
// Since the stdin peer point (which is hold by containerd) could not be openned
|
||||||
|
// immediately, which would block here's open with block mode, and we wouldn't want to
|
||||||
|
// block here, thus here opened with nonblock and then reset it to block mode for
|
||||||
|
// tokio async io.
|
||||||
match OpenOptions::new()
|
match OpenOptions::new()
|
||||||
.read(true)
|
.read(true)
|
||||||
.write(false)
|
.write(false)
|
||||||
@@ -61,7 +78,11 @@ impl ShimIo {
|
|||||||
.open(&stdin)
|
.open(&stdin)
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
Ok(file) => Some(Box::new(file)),
|
Ok(file) => {
|
||||||
|
// Set it to blocking to avoid infinitely handling EAGAIN when the reader is empty
|
||||||
|
set_flag_with_blocking(file.as_raw_fd());
|
||||||
|
Some(Box::new(file))
|
||||||
|
}
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
error!(sl!(), "failed to open {} error {:?}", &stdin, err);
|
error!(sl!(), "failed to open {} error {:?}", &stdin, err);
|
||||||
None
|
None
|
||||||
|
|||||||
@@ -133,22 +133,14 @@ impl Process {
|
|||||||
let io_name = io_name.to_string();
|
let io_name = io_name.to_string();
|
||||||
let logger = self.logger.new(o!("io_name" => io_name));
|
let logger = self.logger.new(o!("io_name" => io_name));
|
||||||
let _ = tokio::spawn(async move {
|
let _ = tokio::spawn(async move {
|
||||||
loop {
|
|
||||||
match tokio::io::copy(&mut reader, &mut writer).await {
|
match tokio::io::copy(&mut reader, &mut writer).await {
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
if let Some(error_code) = e.raw_os_error() {
|
|
||||||
if error_code == libc::EAGAIN {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
warn!(logger, "run_io_copy: failed to copy stream: {}", e);
|
warn!(logger, "run_io_copy: failed to copy stream: {}", e);
|
||||||
}
|
}
|
||||||
Ok(length) => {
|
Ok(length) => {
|
||||||
warn!(logger, "run_io_copy: stop to copy stream length {}", length)
|
info!(logger, "run_io_copy: stop to copy stream length {}", length)
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
wgw.done();
|
wgw.done();
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -21,7 +21,10 @@ use anyhow::{anyhow, Context, Result};
|
|||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use common::{message::Message, RuntimeHandler, RuntimeInstance};
|
use common::{message::Message, RuntimeHandler, RuntimeInstance};
|
||||||
use hypervisor::{dragonball::Dragonball, Hypervisor, HYPERVISOR_DRAGONBALL};
|
use hypervisor::{dragonball::Dragonball, Hypervisor, HYPERVISOR_DRAGONBALL};
|
||||||
use kata_types::config::{hypervisor::register_hypervisor_plugin, DragonballConfig, TomlConfig};
|
use hypervisor::{qemu::Qemu, HYPERVISOR_QEMU};
|
||||||
|
use kata_types::config::{
|
||||||
|
hypervisor::register_hypervisor_plugin, DragonballConfig, QemuConfig, TomlConfig,
|
||||||
|
};
|
||||||
use resource::ResourceManager;
|
use resource::ResourceManager;
|
||||||
use sandbox::VIRTCONTAINER;
|
use sandbox::VIRTCONTAINER;
|
||||||
use tokio::sync::mpsc::Sender;
|
use tokio::sync::mpsc::Sender;
|
||||||
@@ -36,6 +39,8 @@ impl RuntimeHandler for VirtContainer {
|
|||||||
// register
|
// register
|
||||||
let dragonball_config = Arc::new(DragonballConfig::new());
|
let dragonball_config = Arc::new(DragonballConfig::new());
|
||||||
register_hypervisor_plugin("dragonball", dragonball_config);
|
register_hypervisor_plugin("dragonball", dragonball_config);
|
||||||
|
let qemu_config = Arc::new(QemuConfig::new());
|
||||||
|
register_hypervisor_plugin("qemu", qemu_config);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -106,6 +111,13 @@ async fn new_hypervisor(toml_config: &TomlConfig) -> Result<Arc<dyn Hypervisor>>
|
|||||||
.await;
|
.await;
|
||||||
Ok(Arc::new(hypervisor))
|
Ok(Arc::new(hypervisor))
|
||||||
}
|
}
|
||||||
|
HYPERVISOR_QEMU => {
|
||||||
|
let mut hypervisor = Qemu::new();
|
||||||
|
hypervisor
|
||||||
|
.set_hypervisor_config(hypervisor_config.clone())
|
||||||
|
.await;
|
||||||
|
Ok(Arc::new(hypervisor))
|
||||||
|
}
|
||||||
_ => Err(anyhow!("Unsupported hypervisor {}", &hypervisor_name)),
|
_ => Err(anyhow!("Unsupported hypervisor {}", &hypervisor_name)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -149,4 +161,27 @@ agent_name="kata"
|
|||||||
let res = new_agent(&toml_config);
|
let res = new_agent(&toml_config);
|
||||||
assert!(res.is_ok());
|
assert!(res.is_ok());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_new_hypervisor() {
|
||||||
|
VirtContainer::init().unwrap();
|
||||||
|
|
||||||
|
let toml_config = {
|
||||||
|
let config_content = r#"
|
||||||
|
[hypervisor.qemu]
|
||||||
|
path = "/bin/echo"
|
||||||
|
kernel = "/bin/echo"
|
||||||
|
image = "/bin/echo"
|
||||||
|
firmware = ""
|
||||||
|
|
||||||
|
[runtime]
|
||||||
|
hypervisor_name="qemu"
|
||||||
|
"#;
|
||||||
|
TomlConfig::load(config_content).map_err(|e| anyhow!("can not load config toml: {}", e))
|
||||||
|
}
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let res = new_hypervisor(&toml_config).await;
|
||||||
|
assert!(res.is_ok());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
14
src/runtime-rs/crates/shim-ctl/Cargo.toml
Normal file
14
src/runtime-rs/crates/shim-ctl/Cargo.toml
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
[package]
|
||||||
|
name = "shim-ctl"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
anyhow = "^1.0"
|
||||||
|
common = { path = "../runtimes/common" }
|
||||||
|
logging = { path = "../../../libs/logging"}
|
||||||
|
runtimes = { path = "../runtimes" }
|
||||||
|
tokio = { version = "1.8.0", features = [ "rt", "rt-multi-thread" ] }
|
||||||
|
|
||||||
51
src/runtime-rs/crates/shim-ctl/README.md
Normal file
51
src/runtime-rs/crates/shim-ctl/README.md
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
### Purpose
|
||||||
|
`shim-ctl` is a binary to exercise the shim proper without containerd
|
||||||
|
dependencies.
|
||||||
|
|
||||||
|
The actual Kata shim is hard to execute outside of deployment environments due
|
||||||
|
to its dependency on containerd's shim v2 protocol. Among others, the
|
||||||
|
dependency requires having a socket with a remote end that's capable of driving
|
||||||
|
the shim using the shim v2 `ttrpc` protocol, and a binary for shim to publish
|
||||||
|
events to.
|
||||||
|
|
||||||
|
Since at least some of the shim v2 protocol dependencies are fairly hard to
|
||||||
|
mock up, this presents a significant obstacle to development.
|
||||||
|
|
||||||
|
`shim-ctl` takes advantage of the fact that due to the shim implementation
|
||||||
|
architecture, only the outermost couple of shim layers are
|
||||||
|
containerd-dependent and all of the inner layers that do the actual heavy
|
||||||
|
lifting don't depend on containerd. This allows `shim-ctl` to replace the
|
||||||
|
containerd-dependent layers with something that's easier to use on a
|
||||||
|
developer's machine.
|
||||||
|
|
||||||
|
### Usage
|
||||||
|
|
||||||
|
After building the binary as usual with `cargo build` run `shim-ctl` as follows.
|
||||||
|
|
||||||
|
Even though `shim-ctl` does away with containerd dependencies it still has
|
||||||
|
some requirements of its execution environment. In particular, it needs a
|
||||||
|
Kata `configuration.toml` file, some Kata distribution files to point a bunch
|
||||||
|
of `configuration.toml` keys to (like hypervisor keys `path`, `kernel` or
|
||||||
|
`initrd`) and a container bundle. These are however much easier to fulfill
|
||||||
|
than the original containerd dependencies, and doing so is a one-off task -
|
||||||
|
once done they can be reused for an unlimited number of modify-build-run
|
||||||
|
development cycles.
|
||||||
|
|
||||||
|
`shim-ctl` also needs to be launched from an exported container bundle
|
||||||
|
directory. One can be created by running
|
||||||
|
|
||||||
|
```
|
||||||
|
mkdir rootfs
|
||||||
|
podman export $(podman create busybox) | tar -C ./rootfs -xvf -
|
||||||
|
runc spec -b .
|
||||||
|
```
|
||||||
|
|
||||||
|
in a suitable directory.
|
||||||
|
|
||||||
|
The program can then be launched like this:
|
||||||
|
|
||||||
|
```
|
||||||
|
cd /the/bundle/directory
|
||||||
|
KATA_CONF_FILE=/path/to/configuration-qemu.toml /path/to/shim-ctl
|
||||||
|
```
|
||||||
|
|
||||||
45
src/runtime-rs/crates/shim-ctl/src/main.rs
Normal file
45
src/runtime-rs/crates/shim-ctl/src/main.rs
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
// Copyright (c) 2022 Red Hat
|
||||||
|
//
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
use anyhow::{Context, Result};
|
||||||
|
use common::{
|
||||||
|
message::Message,
|
||||||
|
types::{ContainerConfig, Request},
|
||||||
|
};
|
||||||
|
use runtimes::RuntimeHandlerManager;
|
||||||
|
use tokio::sync::mpsc::channel;
|
||||||
|
|
||||||
|
const MESSAGE_BUFFER_SIZE: usize = 8;
|
||||||
|
const WORKER_THREADS: usize = 2;
|
||||||
|
|
||||||
|
async fn real_main() {
|
||||||
|
let (sender, _receiver) = channel::<Message>(MESSAGE_BUFFER_SIZE);
|
||||||
|
let manager = RuntimeHandlerManager::new("xxx", sender).await.unwrap();
|
||||||
|
|
||||||
|
let req = Request::CreateContainer(ContainerConfig {
|
||||||
|
container_id: "xxx".to_owned(),
|
||||||
|
bundle: ".".to_owned(),
|
||||||
|
rootfs_mounts: Vec::new(),
|
||||||
|
terminal: false,
|
||||||
|
options: None,
|
||||||
|
stdin: None,
|
||||||
|
stdout: None,
|
||||||
|
stderr: None,
|
||||||
|
});
|
||||||
|
|
||||||
|
manager.handler_message(req).await.ok();
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
let runtime = tokio::runtime::Builder::new_multi_thread()
|
||||||
|
.worker_threads(WORKER_THREADS)
|
||||||
|
.enable_all()
|
||||||
|
.build()
|
||||||
|
.context("prepare tokio runtime")?;
|
||||||
|
|
||||||
|
runtime.block_on(real_main());
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
@@ -197,6 +197,11 @@ DEFDISABLEGUESTEMPTYDIR := false
|
|||||||
DEFAULTEXPFEATURES := []
|
DEFAULTEXPFEATURES := []
|
||||||
|
|
||||||
DEFDISABLESELINUX := false
|
DEFDISABLESELINUX := false
|
||||||
|
|
||||||
|
# Default guest SELinux configuration
|
||||||
|
DEFDISABLEGUESTSELINUX := true
|
||||||
|
DEFGUESTSELINUXLABEL := system_u:system_r:container_t
|
||||||
|
|
||||||
#Default SeccomSandbox param
|
#Default SeccomSandbox param
|
||||||
#The same default policy is used by libvirt
|
#The same default policy is used by libvirt
|
||||||
#More explanation on https://lists.gnu.org/archive/html/qemu-devel/2017-02/msg03348.html
|
#More explanation on https://lists.gnu.org/archive/html/qemu-devel/2017-02/msg03348.html
|
||||||
@@ -562,6 +567,8 @@ USER_VARS += DEFNETWORKMODEL_QEMU
|
|||||||
USER_VARS += DEFDISABLEGUESTEMPTYDIR
|
USER_VARS += DEFDISABLEGUESTEMPTYDIR
|
||||||
USER_VARS += DEFDISABLEGUESTSECCOMP
|
USER_VARS += DEFDISABLEGUESTSECCOMP
|
||||||
USER_VARS += DEFDISABLESELINUX
|
USER_VARS += DEFDISABLESELINUX
|
||||||
|
USER_VARS += DEFDISABLEGUESTSELINUX
|
||||||
|
USER_VARS += DEFGUESTSELINUXLABEL
|
||||||
USER_VARS += DEFAULTEXPFEATURES
|
USER_VARS += DEFAULTEXPFEATURES
|
||||||
USER_VARS += DEFDISABLEBLOCK
|
USER_VARS += DEFDISABLEBLOCK
|
||||||
USER_VARS += DEFBLOCKSTORAGEDRIVER_ACRN
|
USER_VARS += DEFBLOCKSTORAGEDRIVER_ACRN
|
||||||
|
|||||||
@@ -76,6 +76,7 @@ type RuntimeConfigInfo struct {
|
|||||||
type RuntimeInfo struct {
|
type RuntimeInfo struct {
|
||||||
Config RuntimeConfigInfo
|
Config RuntimeConfigInfo
|
||||||
Path string
|
Path string
|
||||||
|
GuestSeLinuxLabel string
|
||||||
Experimental []exp.Feature
|
Experimental []exp.Feature
|
||||||
Version RuntimeVersionInfo
|
Version RuntimeVersionInfo
|
||||||
Debug bool
|
Debug bool
|
||||||
@@ -186,6 +187,7 @@ func getRuntimeInfo(configFile string, config oci.RuntimeConfig) RuntimeInfo {
|
|||||||
SandboxCgroupOnly: config.SandboxCgroupOnly,
|
SandboxCgroupOnly: config.SandboxCgroupOnly,
|
||||||
Experimental: config.Experimental,
|
Experimental: config.Experimental,
|
||||||
DisableGuestSeccomp: config.DisableGuestSeccomp,
|
DisableGuestSeccomp: config.DisableGuestSeccomp,
|
||||||
|
GuestSeLinuxLabel: config.GuestSeLinuxLabel,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -38,6 +38,13 @@ image = "@IMAGEPATH@"
|
|||||||
# disable applying SELinux on the VMM process (default false)
|
# disable applying SELinux on the VMM process (default false)
|
||||||
disable_selinux=@DEFDISABLESELINUX@
|
disable_selinux=@DEFDISABLESELINUX@
|
||||||
|
|
||||||
|
# disable applying SELinux on the container process
|
||||||
|
# If set to false, the type `container_t` is applied to the container process by default.
|
||||||
|
# Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built
|
||||||
|
# with `SELINUX=yes`.
|
||||||
|
# (default: true)
|
||||||
|
disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
|
||||||
|
|
||||||
# Path to the firmware.
|
# Path to the firmware.
|
||||||
# If you want Cloud Hypervisor to use a specific firmware, set its path below.
|
# If you want Cloud Hypervisor to use a specific firmware, set its path below.
|
||||||
# This is option is only used when confidential_guest is enabled.
|
# This is option is only used when confidential_guest is enabled.
|
||||||
@@ -321,6 +328,14 @@ internetworking_model="@DEFNETWORKMODEL_CLH@"
|
|||||||
# (default: true)
|
# (default: true)
|
||||||
disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
|
disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
|
||||||
|
|
||||||
|
# Apply a custom SELinux security policy to the container process inside the VM.
|
||||||
|
# This is used when you want to apply a type other than the default `container_t`,
|
||||||
|
# so general users should not uncomment and apply it.
|
||||||
|
# (format: "user:role:type")
|
||||||
|
# Note: You cannot specify MCS policy with the label because the sensitivity levels and
|
||||||
|
# categories are determined automatically by high-level container runtimes such as containerd.
|
||||||
|
#guest_selinux_label="@DEFGUESTSELINUXLABEL@"
|
||||||
|
|
||||||
# If enabled, the runtime will create opentracing.io traces and spans.
|
# If enabled, the runtime will create opentracing.io traces and spans.
|
||||||
# (See https://www.jaegertracing.io/docs/getting-started).
|
# (See https://www.jaegertracing.io/docs/getting-started).
|
||||||
# (default: disabled)
|
# (default: disabled)
|
||||||
|
|||||||
@@ -463,6 +463,14 @@ valid_entropy_sources = @DEFVALIDENTROPYSOURCES@
|
|||||||
# disable applying SELinux on the VMM process (default false)
|
# disable applying SELinux on the VMM process (default false)
|
||||||
disable_selinux=@DEFDISABLESELINUX@
|
disable_selinux=@DEFDISABLESELINUX@
|
||||||
|
|
||||||
|
# disable applying SELinux on the container process
|
||||||
|
# If set to false, the type `container_t` is applied to the container process by default.
|
||||||
|
# Note: To enable guest SELinux, the guest rootfs must be CentOS that is created and built
|
||||||
|
# with `SELINUX=yes`.
|
||||||
|
# (default: true)
|
||||||
|
disable_guest_selinux=@DEFDISABLEGUESTSELINUX@
|
||||||
|
|
||||||
|
|
||||||
[factory]
|
[factory]
|
||||||
# VM templating support. Once enabled, new VMs are created from template
|
# VM templating support. Once enabled, new VMs are created from template
|
||||||
# using vm cloning. They will share the same initial kernel, initramfs and
|
# using vm cloning. They will share the same initial kernel, initramfs and
|
||||||
@@ -580,6 +588,14 @@ internetworking_model="@DEFNETWORKMODEL_QEMU@"
|
|||||||
# (default: true)
|
# (default: true)
|
||||||
disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
|
disable_guest_seccomp=@DEFDISABLEGUESTSECCOMP@
|
||||||
|
|
||||||
|
# Apply a custom SELinux security policy to the container process inside the VM.
|
||||||
|
# This is used when you want to apply a type other than the default `container_t`,
|
||||||
|
# so general users should not uncomment and apply it.
|
||||||
|
# (format: "user:role:type")
|
||||||
|
# Note: You cannot specify MCS policy with the label because the sensitivity levels and
|
||||||
|
# categories are determined automatically by high-level container runtimes such as containerd.
|
||||||
|
#guest_selinux_label="@DEFGUESTSELINUXLABEL@"
|
||||||
|
|
||||||
# If enabled, the runtime will create opentracing.io traces and spans.
|
# If enabled, the runtime will create opentracing.io traces and spans.
|
||||||
# (See https://www.jaegertracing.io/docs/getting-started).
|
# (See https://www.jaegertracing.io/docs/getting-started).
|
||||||
# (default: disabled)
|
# (default: disabled)
|
||||||
|
|||||||
@@ -90,6 +90,7 @@ const defaultSevSnpGuest = false
|
|||||||
const defaultGuestSwap = false
|
const defaultGuestSwap = false
|
||||||
const defaultRootlessHypervisor = false
|
const defaultRootlessHypervisor = false
|
||||||
const defaultDisableSeccomp = false
|
const defaultDisableSeccomp = false
|
||||||
|
const defaultDisableGuestSeLinux = true
|
||||||
const defaultVfioMode = "guest-kernel"
|
const defaultVfioMode = "guest-kernel"
|
||||||
const defaultLegacySerial = false
|
const defaultLegacySerial = false
|
||||||
const defaultGuestPreAttestation = false
|
const defaultGuestPreAttestation = false
|
||||||
|
|||||||
@@ -60,9 +60,9 @@ const (
|
|||||||
type tomlConfig struct {
|
type tomlConfig struct {
|
||||||
Hypervisor map[string]hypervisor
|
Hypervisor map[string]hypervisor
|
||||||
Agent map[string]agent
|
Agent map[string]agent
|
||||||
Runtime runtime
|
|
||||||
Image image
|
Image image
|
||||||
Factory factory
|
Factory factory
|
||||||
|
Runtime runtime
|
||||||
}
|
}
|
||||||
|
|
||||||
type image struct {
|
type image struct {
|
||||||
@@ -164,6 +164,7 @@ type hypervisor struct {
|
|||||||
Rootless bool `toml:"rootless"`
|
Rootless bool `toml:"rootless"`
|
||||||
DisableSeccomp bool `toml:"disable_seccomp"`
|
DisableSeccomp bool `toml:"disable_seccomp"`
|
||||||
DisableSeLinux bool `toml:"disable_selinux"`
|
DisableSeLinux bool `toml:"disable_selinux"`
|
||||||
|
DisableGuestSeLinux bool `toml:"disable_guest_selinux"`
|
||||||
LegacySerial bool `toml:"use_legacy_serial"`
|
LegacySerial bool `toml:"use_legacy_serial"`
|
||||||
GuestPreAttestation bool `toml:"guest_pre_attestation"`
|
GuestPreAttestation bool `toml:"guest_pre_attestation"`
|
||||||
EnableVCPUsPinning bool `toml:"enable_vcpus_pinning"`
|
EnableVCPUsPinning bool `toml:"enable_vcpus_pinning"`
|
||||||
@@ -175,12 +176,13 @@ type runtime struct {
|
|||||||
JaegerUser string `toml:"jaeger_user"`
|
JaegerUser string `toml:"jaeger_user"`
|
||||||
JaegerPassword string `toml:"jaeger_password"`
|
JaegerPassword string `toml:"jaeger_password"`
|
||||||
VfioMode string `toml:"vfio_mode"`
|
VfioMode string `toml:"vfio_mode"`
|
||||||
|
GuestSeLinuxLabel string `toml:"guest_selinux_label"`
|
||||||
SandboxBindMounts []string `toml:"sandbox_bind_mounts"`
|
SandboxBindMounts []string `toml:"sandbox_bind_mounts"`
|
||||||
Experimental []string `toml:"experimental"`
|
Experimental []string `toml:"experimental"`
|
||||||
Debug bool `toml:"enable_debug"`
|
|
||||||
Tracing bool `toml:"enable_tracing"`
|
Tracing bool `toml:"enable_tracing"`
|
||||||
DisableNewNetNs bool `toml:"disable_new_netns"`
|
DisableNewNetNs bool `toml:"disable_new_netns"`
|
||||||
DisableGuestSeccomp bool `toml:"disable_guest_seccomp"`
|
DisableGuestSeccomp bool `toml:"disable_guest_seccomp"`
|
||||||
|
Debug bool `toml:"enable_debug"`
|
||||||
SandboxCgroupOnly bool `toml:"sandbox_cgroup_only"`
|
SandboxCgroupOnly bool `toml:"sandbox_cgroup_only"`
|
||||||
StaticSandboxResourceMgmt bool `toml:"static_sandbox_resource_mgmt"`
|
StaticSandboxResourceMgmt bool `toml:"static_sandbox_resource_mgmt"`
|
||||||
EnablePprof bool `toml:"enable_pprof"`
|
EnablePprof bool `toml:"enable_pprof"`
|
||||||
@@ -701,6 +703,7 @@ func newFirecrackerHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
|
|||||||
TxRateLimiterMaxRate: txRateLimiterMaxRate,
|
TxRateLimiterMaxRate: txRateLimiterMaxRate,
|
||||||
EnableAnnotations: h.EnableAnnotations,
|
EnableAnnotations: h.EnableAnnotations,
|
||||||
DisableSeLinux: h.DisableSeLinux,
|
DisableSeLinux: h.DisableSeLinux,
|
||||||
|
DisableGuestSeLinux: true, // Guest SELinux is not supported in Firecracker
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -854,6 +857,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
|
|||||||
SEVGuestPolicy: h.SEVGuestPolicy,
|
SEVGuestPolicy: h.SEVGuestPolicy,
|
||||||
SEVCertChainPath: h.SEVCertChainPath,
|
SEVCertChainPath: h.SEVCertChainPath,
|
||||||
EnableVCPUsPinning: h.EnableVCPUsPinning,
|
EnableVCPUsPinning: h.EnableVCPUsPinning,
|
||||||
|
DisableGuestSeLinux: h.DisableGuestSeLinux,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -920,6 +924,7 @@ func newAcrnHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
|
|||||||
GuestHookPath: h.guestHookPath(),
|
GuestHookPath: h.guestHookPath(),
|
||||||
DisableSeLinux: h.DisableSeLinux,
|
DisableSeLinux: h.DisableSeLinux,
|
||||||
EnableAnnotations: h.EnableAnnotations,
|
EnableAnnotations: h.EnableAnnotations,
|
||||||
|
DisableGuestSeLinux: true, // Guest SELinux is not supported in ACRN
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1025,6 +1030,7 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
|
|||||||
DisableSeccomp: h.DisableSeccomp,
|
DisableSeccomp: h.DisableSeccomp,
|
||||||
ConfidentialGuest: h.ConfidentialGuest,
|
ConfidentialGuest: h.ConfidentialGuest,
|
||||||
DisableSeLinux: h.DisableSeLinux,
|
DisableSeLinux: h.DisableSeLinux,
|
||||||
|
DisableGuestSeLinux: h.DisableGuestSeLinux,
|
||||||
NetRateLimiterBwMaxRate: h.getNetRateLimiterBwMaxRate(),
|
NetRateLimiterBwMaxRate: h.getNetRateLimiterBwMaxRate(),
|
||||||
NetRateLimiterBwOneTimeBurst: h.getNetRateLimiterBwOneTimeBurst(),
|
NetRateLimiterBwOneTimeBurst: h.getNetRateLimiterBwOneTimeBurst(),
|
||||||
NetRateLimiterOpsMaxRate: h.getNetRateLimiterOpsMaxRate(),
|
NetRateLimiterOpsMaxRate: h.getNetRateLimiterOpsMaxRate(),
|
||||||
@@ -1262,6 +1268,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig {
|
|||||||
GuestSwap: defaultGuestSwap,
|
GuestSwap: defaultGuestSwap,
|
||||||
Rootless: defaultRootlessHypervisor,
|
Rootless: defaultRootlessHypervisor,
|
||||||
DisableSeccomp: defaultDisableSeccomp,
|
DisableSeccomp: defaultDisableSeccomp,
|
||||||
|
DisableGuestSeLinux: defaultDisableGuestSeLinux,
|
||||||
LegacySerial: defaultLegacySerial,
|
LegacySerial: defaultLegacySerial,
|
||||||
GuestPreAttestation: defaultGuestPreAttestation,
|
GuestPreAttestation: defaultGuestPreAttestation,
|
||||||
GuestPreAttestationProxy: defaultGuestPreAttestationProxy,
|
GuestPreAttestationProxy: defaultGuestPreAttestationProxy,
|
||||||
@@ -1356,7 +1363,7 @@ func LoadConfiguration(configPath string, ignoreLogging bool) (resolvedConfigPat
|
|||||||
}
|
}
|
||||||
|
|
||||||
config.DisableGuestSeccomp = tomlConf.Runtime.DisableGuestSeccomp
|
config.DisableGuestSeccomp = tomlConf.Runtime.DisableGuestSeccomp
|
||||||
|
config.GuestSeLinuxLabel = tomlConf.Runtime.GuestSeLinuxLabel
|
||||||
config.StaticSandboxResourceMgmt = tomlConf.Runtime.StaticSandboxResourceMgmt
|
config.StaticSandboxResourceMgmt = tomlConf.Runtime.StaticSandboxResourceMgmt
|
||||||
config.SandboxCgroupOnly = tomlConf.Runtime.SandboxCgroupOnly
|
config.SandboxCgroupOnly = tomlConf.Runtime.SandboxCgroupOnly
|
||||||
config.DisableNewNetNs = tomlConf.Runtime.DisableNewNetNs
|
config.DisableNewNetNs = tomlConf.Runtime.DisableNewNetNs
|
||||||
|
|||||||
@@ -554,6 +554,7 @@ func TestMinimalRuntimeConfig(t *testing.T) {
|
|||||||
VhostUserStorePath: defaultVhostUserStorePath,
|
VhostUserStorePath: defaultVhostUserStorePath,
|
||||||
VirtioFSCache: defaultVirtioFSCacheMode,
|
VirtioFSCache: defaultVirtioFSCacheMode,
|
||||||
BlockDeviceAIO: defaultBlockDeviceAIO,
|
BlockDeviceAIO: defaultBlockDeviceAIO,
|
||||||
|
DisableGuestSeLinux: defaultDisableGuestSeLinux,
|
||||||
}
|
}
|
||||||
|
|
||||||
expectedAgentConfig := vc.KataAgentConfig{
|
expectedAgentConfig := vc.KataAgentConfig{
|
||||||
|
|||||||
@@ -128,6 +128,9 @@ type RuntimeConfig struct {
|
|||||||
//Determines if seccomp should be applied inside guest
|
//Determines if seccomp should be applied inside guest
|
||||||
DisableGuestSeccomp bool
|
DisableGuestSeccomp bool
|
||||||
|
|
||||||
|
//SELinux security context applied to the container process inside guest.
|
||||||
|
GuestSeLinuxLabel string
|
||||||
|
|
||||||
// Sandbox sizing information which, if provided, indicates the size of
|
// Sandbox sizing information which, if provided, indicates the size of
|
||||||
// the sandbox needed for the workload(s)
|
// the sandbox needed for the workload(s)
|
||||||
SandboxCPUs uint32
|
SandboxCPUs uint32
|
||||||
@@ -948,6 +951,8 @@ func SandboxConfig(ocispec specs.Spec, runtime RuntimeConfig, bundlePath, cid st
|
|||||||
|
|
||||||
DisableGuestSeccomp: runtime.DisableGuestSeccomp,
|
DisableGuestSeccomp: runtime.DisableGuestSeccomp,
|
||||||
|
|
||||||
|
GuestSeLinuxLabel: runtime.GuestSeLinuxLabel,
|
||||||
|
|
||||||
Experimental: runtime.Experimental,
|
Experimental: runtime.Experimental,
|
||||||
|
|
||||||
ServiceOffload: runtime.ServiceOffload,
|
ServiceOffload: runtime.ServiceOffload,
|
||||||
|
|||||||
@@ -77,6 +77,8 @@ const (
|
|||||||
MinHypervisorMemory = 256
|
MinHypervisorMemory = 256
|
||||||
|
|
||||||
defaultMsize9p = 8192
|
defaultMsize9p = 8192
|
||||||
|
|
||||||
|
defaultDisableGuestSeLinux = true
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@@ -356,6 +358,7 @@ type HypervisorConfig struct {
|
|||||||
Rootless bool
|
Rootless bool
|
||||||
DisableSeccomp bool
|
DisableSeccomp bool
|
||||||
DisableSeLinux bool
|
DisableSeLinux bool
|
||||||
|
DisableGuestSeLinux bool
|
||||||
LegacySerial bool
|
LegacySerial bool
|
||||||
EnableVCPUsPinning bool
|
EnableVCPUsPinning bool
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -95,6 +95,7 @@ func TestHypervisorConfigDefaults(t *testing.T) {
|
|||||||
KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel),
|
KernelPath: fmt.Sprintf("%s/%s", testDir, testKernel),
|
||||||
ImagePath: fmt.Sprintf("%s/%s", testDir, testImage),
|
ImagePath: fmt.Sprintf("%s/%s", testDir, testImage),
|
||||||
HypervisorPath: "",
|
HypervisorPath: "",
|
||||||
|
DisableGuestSeLinux: defaultDisableGuestSeLinux,
|
||||||
}
|
}
|
||||||
testHypervisorConfigValid(t, hypervisorConfig, true)
|
testHypervisorConfigValid(t, hypervisorConfig, true)
|
||||||
|
|
||||||
@@ -108,6 +109,7 @@ func TestHypervisorConfigDefaults(t *testing.T) {
|
|||||||
BlockDeviceDriver: defaultBlockDriver,
|
BlockDeviceDriver: defaultBlockDriver,
|
||||||
DefaultMaxVCPUs: defaultMaxVCPUs,
|
DefaultMaxVCPUs: defaultMaxVCPUs,
|
||||||
Msize9p: defaultMsize9p,
|
Msize9p: defaultMsize9p,
|
||||||
|
DisableGuestSeLinux: defaultDisableGuestSeLinux,
|
||||||
}
|
}
|
||||||
|
|
||||||
assert.Exactly(hypervisorConfig, hypervisorConfigDefaultsExpected)
|
assert.Exactly(hypervisorConfig, hypervisorConfigDefaultsExpected)
|
||||||
|
|||||||
@@ -37,6 +37,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"github.com/gogo/protobuf/proto"
|
"github.com/gogo/protobuf/proto"
|
||||||
"github.com/opencontainers/runtime-spec/specs-go"
|
"github.com/opencontainers/runtime-spec/specs-go"
|
||||||
|
"github.com/opencontainers/selinux/go-selinux"
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
"golang.org/x/sys/unix"
|
"golang.org/x/sys/unix"
|
||||||
"google.golang.org/grpc/codes"
|
"google.golang.org/grpc/codes"
|
||||||
@@ -70,6 +71,9 @@ const (
|
|||||||
kernelParamDebugConsole = "agent.debug_console"
|
kernelParamDebugConsole = "agent.debug_console"
|
||||||
kernelParamDebugConsoleVPort = "agent.debug_console_vport"
|
kernelParamDebugConsoleVPort = "agent.debug_console_vport"
|
||||||
kernelParamDebugConsoleVPortValue = "1026"
|
kernelParamDebugConsoleVPortValue = "1026"
|
||||||
|
|
||||||
|
// Default SELinux type applied to the container process inside guest
|
||||||
|
defaultSeLinuxContainerType = "container_t"
|
||||||
)
|
)
|
||||||
|
|
||||||
type customRequestTimeoutKeyType struct{}
|
type customRequestTimeoutKeyType struct{}
|
||||||
@@ -906,7 +910,7 @@ func (k *kataAgent) removeIgnoredOCIMount(spec *specs.Spec, ignoredMounts map[st
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (k *kataAgent) constrainGRPCSpec(grpcSpec *grpc.Spec, passSeccomp bool, stripVfio bool) {
|
func (k *kataAgent) constrainGRPCSpec(grpcSpec *grpc.Spec, passSeccomp bool, disableGuestSeLinux bool, guestSeLinuxLabel string, stripVfio bool) error {
|
||||||
// Disable Hooks since they have been handled on the host and there is
|
// Disable Hooks since they have been handled on the host and there is
|
||||||
// no reason to send them to the agent. It would make no sense to try
|
// no reason to send them to the agent. It would make no sense to try
|
||||||
// to apply them on the guest.
|
// to apply them on the guest.
|
||||||
@@ -918,11 +922,34 @@ func (k *kataAgent) constrainGRPCSpec(grpcSpec *grpc.Spec, passSeccomp bool, str
|
|||||||
grpcSpec.Linux.Seccomp = nil
|
grpcSpec.Linux.Seccomp = nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Disable SELinux inside of the virtual machine, the label will apply
|
// Pass SELinux label for the container process to the agent.
|
||||||
// to the KVM process
|
|
||||||
if grpcSpec.Process.SelinuxLabel != "" {
|
if grpcSpec.Process.SelinuxLabel != "" {
|
||||||
k.Logger().Info("SELinux label from config will be applied to the hypervisor process, not the VM workload")
|
if !disableGuestSeLinux {
|
||||||
|
k.Logger().Info("SELinux label will be applied to the container process inside guest")
|
||||||
|
|
||||||
|
var label string
|
||||||
|
if guestSeLinuxLabel != "" {
|
||||||
|
label = guestSeLinuxLabel
|
||||||
|
} else {
|
||||||
|
label = grpcSpec.Process.SelinuxLabel
|
||||||
|
}
|
||||||
|
|
||||||
|
processContext, err := selinux.NewContext(label)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Change the type from KVM to container because the type passed from the high-level
|
||||||
|
// runtime is for KVM process.
|
||||||
|
if guestSeLinuxLabel == "" {
|
||||||
|
processContext["type"] = defaultSeLinuxContainerType
|
||||||
|
}
|
||||||
|
grpcSpec.Process.SelinuxLabel = processContext.Get()
|
||||||
|
} else {
|
||||||
|
k.Logger().Info("Empty SELinux label for the process and the mount because guest SELinux is disabled")
|
||||||
grpcSpec.Process.SelinuxLabel = ""
|
grpcSpec.Process.SelinuxLabel = ""
|
||||||
|
grpcSpec.Linux.MountLabel = ""
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// By now only CPU constraints are supported
|
// By now only CPU constraints are supported
|
||||||
@@ -984,6 +1011,8 @@ func (k *kataAgent) constrainGRPCSpec(grpcSpec *grpc.Spec, passSeccomp bool, str
|
|||||||
}
|
}
|
||||||
grpcSpec.Linux.Devices = linuxDevices
|
grpcSpec.Linux.Devices = linuxDevices
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (k *kataAgent) handleShm(mounts []specs.Mount, sandbox *Sandbox) {
|
func (k *kataAgent) handleShm(mounts []specs.Mount, sandbox *Sandbox) {
|
||||||
@@ -1267,9 +1296,20 @@ func (k *kataAgent) createContainer(ctx context.Context, sandbox *Sandbox, c *Co
|
|||||||
|
|
||||||
passSeccomp := !sandbox.config.DisableGuestSeccomp && sandbox.seccompSupported
|
passSeccomp := !sandbox.config.DisableGuestSeccomp && sandbox.seccompSupported
|
||||||
|
|
||||||
|
// Currently, guest SELinux can be enabled only when SELinux is enabled on the host side.
|
||||||
|
if !sandbox.config.HypervisorConfig.DisableGuestSeLinux && !selinux.GetEnabled() {
|
||||||
|
return nil, fmt.Errorf("Guest SELinux is enabled, but SELinux is disabled on the host side")
|
||||||
|
}
|
||||||
|
if sandbox.config.HypervisorConfig.DisableGuestSeLinux && sandbox.config.GuestSeLinuxLabel != "" {
|
||||||
|
return nil, fmt.Errorf("Custom SELinux security policy is provided, but guest SELinux is disabled")
|
||||||
|
}
|
||||||
|
|
||||||
// We need to constrain the spec to make sure we're not
|
// We need to constrain the spec to make sure we're not
|
||||||
// passing irrelevant information to the agent.
|
// passing irrelevant information to the agent.
|
||||||
k.constrainGRPCSpec(grpcSpec, passSeccomp, sandbox.config.VfioMode == config.VFIOModeGuestKernel)
|
err = k.constrainGRPCSpec(grpcSpec, passSeccomp, sandbox.config.HypervisorConfig.DisableGuestSeLinux, sandbox.config.GuestSeLinuxLabel, sandbox.config.VfioMode == config.VFIOModeGuestKernel)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
req := &grpc.CreateContainerRequest{
|
req := &grpc.CreateContainerRequest{
|
||||||
ContainerId: c.id,
|
ContainerId: c.id,
|
||||||
|
|||||||
@@ -619,7 +619,7 @@ func TestConstrainGRPCSpec(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
k := kataAgent{}
|
k := kataAgent{}
|
||||||
k.constrainGRPCSpec(g, true, true)
|
k.constrainGRPCSpec(g, true, true, "", true)
|
||||||
|
|
||||||
// Check nil fields
|
// Check nil fields
|
||||||
assert.Nil(g.Hooks)
|
assert.Nil(g.Hooks)
|
||||||
|
|||||||
@@ -189,6 +189,7 @@ func (s *Sandbox) dumpConfig(ss *persistapi.SandboxState) {
|
|||||||
SystemdCgroup: sconfig.SystemdCgroup,
|
SystemdCgroup: sconfig.SystemdCgroup,
|
||||||
SandboxCgroupOnly: sconfig.SandboxCgroupOnly,
|
SandboxCgroupOnly: sconfig.SandboxCgroupOnly,
|
||||||
DisableGuestSeccomp: sconfig.DisableGuestSeccomp,
|
DisableGuestSeccomp: sconfig.DisableGuestSeccomp,
|
||||||
|
GuestSeLinuxLabel: sconfig.GuestSeLinuxLabel,
|
||||||
}
|
}
|
||||||
|
|
||||||
ss.Config.SandboxBindMounts = append(ss.Config.SandboxBindMounts, sconfig.SandboxBindMounts...)
|
ss.Config.SandboxBindMounts = append(ss.Config.SandboxBindMounts, sconfig.SandboxBindMounts...)
|
||||||
@@ -429,6 +430,7 @@ func loadSandboxConfig(id string) (*SandboxConfig, error) {
|
|||||||
SystemdCgroup: savedConf.SystemdCgroup,
|
SystemdCgroup: savedConf.SystemdCgroup,
|
||||||
SandboxCgroupOnly: savedConf.SandboxCgroupOnly,
|
SandboxCgroupOnly: savedConf.SandboxCgroupOnly,
|
||||||
DisableGuestSeccomp: savedConf.DisableGuestSeccomp,
|
DisableGuestSeccomp: savedConf.DisableGuestSeccomp,
|
||||||
|
GuestSeLinuxLabel: savedConf.GuestSeLinuxLabel,
|
||||||
}
|
}
|
||||||
sconfig.SandboxBindMounts = append(sconfig.SandboxBindMounts, savedConf.SandboxBindMounts...)
|
sconfig.SandboxBindMounts = append(sconfig.SandboxBindMounts, savedConf.SandboxBindMounts...)
|
||||||
|
|
||||||
|
|||||||
@@ -243,19 +243,6 @@ type ContainerConfig struct {
|
|||||||
// SandboxConfig is a sandbox configuration.
|
// SandboxConfig is a sandbox configuration.
|
||||||
// Refs: virtcontainers/sandbox.go:SandboxConfig
|
// Refs: virtcontainers/sandbox.go:SandboxConfig
|
||||||
type SandboxConfig struct {
|
type SandboxConfig struct {
|
||||||
// Information for fields not saved:
|
|
||||||
// * Annotation: this is kind of casual data, we don't need casual data in persist file,
|
|
||||||
// if you know this data needs to persist, please gives it
|
|
||||||
// a specific field
|
|
||||||
|
|
||||||
ContainerConfigs []ContainerConfig
|
|
||||||
|
|
||||||
// SandboxBindMounts - list of paths to mount into guest
|
|
||||||
SandboxBindMounts []string
|
|
||||||
|
|
||||||
// Experimental enables experimental features
|
|
||||||
Experimental []string
|
|
||||||
|
|
||||||
// Cgroups specifies specific cgroup settings for the various subsystems that the container is
|
// Cgroups specifies specific cgroup settings for the various subsystems that the container is
|
||||||
// placed into to limit the resources the container has available
|
// placed into to limit the resources the container has available
|
||||||
Cgroups *configs.Cgroup `json:"cgroups"`
|
Cgroups *configs.Cgroup `json:"cgroups"`
|
||||||
@@ -265,8 +252,24 @@ type SandboxConfig struct {
|
|||||||
|
|
||||||
KataShimConfig *ShimConfig
|
KataShimConfig *ShimConfig
|
||||||
|
|
||||||
|
// Custom SELinux security policy to the container process inside the VM
|
||||||
|
GuestSeLinuxLabel string
|
||||||
|
|
||||||
HypervisorType string
|
HypervisorType string
|
||||||
|
|
||||||
|
// SandboxBindMounts - list of paths to mount into guest
|
||||||
|
SandboxBindMounts []string
|
||||||
|
|
||||||
|
// Experimental enables experimental features
|
||||||
|
Experimental []string
|
||||||
|
|
||||||
|
// Information for fields not saved:
|
||||||
|
// * Annotation: this is kind of casual data, we don't need casual data in persist file,
|
||||||
|
// if you know this data needs to persist, please gives it a specific field
|
||||||
|
ContainerConfigs []ContainerConfig
|
||||||
|
|
||||||
NetworkConfig NetworkConfig
|
NetworkConfig NetworkConfig
|
||||||
|
|
||||||
HypervisorConfig HypervisorConfig
|
HypervisorConfig HypervisorConfig
|
||||||
|
|
||||||
ShmSize uint64
|
ShmSize uint64
|
||||||
|
|||||||
@@ -247,6 +247,9 @@ const (
|
|||||||
// DisableGuestSeccomp is a sandbox annotation that determines if seccomp should be applied inside guest.
|
// DisableGuestSeccomp is a sandbox annotation that determines if seccomp should be applied inside guest.
|
||||||
DisableGuestSeccomp = kataAnnotRuntimePrefix + "disable_guest_seccomp"
|
DisableGuestSeccomp = kataAnnotRuntimePrefix + "disable_guest_seccomp"
|
||||||
|
|
||||||
|
// GuestSeLinuxLabel is a SELinux security policy that is applied to a container process inside guest.
|
||||||
|
GuestSeLinuxLabel = kataAnnotRuntimePrefix + "guest_selinux_label"
|
||||||
|
|
||||||
// SandboxCgroupOnly is a sandbox annotation that determines if kata processes are managed only in sandbox cgroup.
|
// SandboxCgroupOnly is a sandbox annotation that determines if kata processes are managed only in sandbox cgroup.
|
||||||
SandboxCgroupOnly = kataAnnotRuntimePrefix + "sandbox_cgroup_only"
|
SandboxCgroupOnly = kataAnnotRuntimePrefix + "sandbox_cgroup_only"
|
||||||
|
|
||||||
|
|||||||
@@ -170,6 +170,15 @@ func (q *qemu) kernelParameters() string {
|
|||||||
// set the maximum number of vCPUs
|
// set the maximum number of vCPUs
|
||||||
params = append(params, Param{"nr_cpus", fmt.Sprintf("%d", q.config.DefaultMaxVCPUs)})
|
params = append(params, Param{"nr_cpus", fmt.Sprintf("%d", q.config.DefaultMaxVCPUs)})
|
||||||
|
|
||||||
|
// set the SELinux params in accordance with the runtime configuration, disable_guest_selinux.
|
||||||
|
if q.config.DisableGuestSeLinux {
|
||||||
|
q.Logger().Info("Set selinux=0 to kernel params because SELinux on the guest is disabled")
|
||||||
|
params = append(params, Param{"selinux", "0"})
|
||||||
|
} else {
|
||||||
|
q.Logger().Info("Set selinux=1 to kernel params because SELinux on the guest is enabled")
|
||||||
|
params = append(params, Param{"selinux", "1"})
|
||||||
|
}
|
||||||
|
|
||||||
// add the params specified by the provided config. As the kernel
|
// add the params specified by the provided config. As the kernel
|
||||||
// honours the last parameter value set and since the config-provided
|
// honours the last parameter value set and since the config-provided
|
||||||
// params are added here, they will take priority over the defaults.
|
// params are added here, they will take priority over the defaults.
|
||||||
@@ -465,6 +474,13 @@ func (q *qemu) createVirtiofsDaemon(sharedPath string) (VirtiofsDaemon, error) {
|
|||||||
return nd, nil
|
return nd, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Set the xattr option for virtiofsd daemon to enable extended attributes
|
||||||
|
// in virtiofs if SELinux on the guest side is enabled.
|
||||||
|
if !q.config.DisableGuestSeLinux {
|
||||||
|
q.Logger().Info("Set the xattr option for virtiofsd")
|
||||||
|
q.config.VirtioFSExtraArgs = append(q.config.VirtioFSExtraArgs, "-o", "xattr")
|
||||||
|
}
|
||||||
|
|
||||||
// default use virtiofsd
|
// default use virtiofsd
|
||||||
return &virtiofsd{
|
return &virtiofsd{
|
||||||
path: q.config.VirtioFSDaemon,
|
path: q.config.VirtioFSDaemon,
|
||||||
@@ -914,7 +930,6 @@ func (q *qemu) StartVM(ctx context.Context, timeout int) error {
|
|||||||
// the SELinux label. If these processes require privileged, we do
|
// the SELinux label. If these processes require privileged, we do
|
||||||
// notwant to run them under confinement.
|
// notwant to run them under confinement.
|
||||||
if !q.config.DisableSeLinux {
|
if !q.config.DisableSeLinux {
|
||||||
|
|
||||||
if err := label.SetProcessLabel(q.config.SELinuxProcessLabel); err != nil {
|
if err := label.SetProcessLabel(q.config.SELinuxProcessLabel); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -36,6 +36,7 @@ func newQemuConfig() HypervisorConfig {
|
|||||||
BlockDeviceDriver: defaultBlockDriver,
|
BlockDeviceDriver: defaultBlockDriver,
|
||||||
DefaultMaxVCPUs: defaultMaxVCPUs,
|
DefaultMaxVCPUs: defaultMaxVCPUs,
|
||||||
Msize9p: defaultMsize9p,
|
Msize9p: defaultMsize9p,
|
||||||
|
DisableGuestSeLinux: defaultDisableGuestSeLinux,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -58,7 +59,7 @@ func testQemuKernelParameters(t *testing.T, kernelParams []Param, expected strin
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestQemuKernelParameters(t *testing.T) {
|
func TestQemuKernelParameters(t *testing.T) {
|
||||||
expectedOut := fmt.Sprintf("panic=1 nr_cpus=%d foo=foo bar=bar", govmm.MaxVCPUs())
|
expectedOut := fmt.Sprintf("panic=1 nr_cpus=%d selinux=0 foo=foo bar=bar", govmm.MaxVCPUs())
|
||||||
params := []Param{
|
params := []Param{
|
||||||
{
|
{
|
||||||
Key: "foo",
|
Key: "foo",
|
||||||
|
|||||||
@@ -135,6 +135,8 @@ type SandboxConfig struct {
|
|||||||
Hostname string
|
Hostname string
|
||||||
ID string
|
ID string
|
||||||
HypervisorType HypervisorType
|
HypervisorType HypervisorType
|
||||||
|
// Custom SELinux security policy to the container process inside the VM
|
||||||
|
GuestSeLinuxLabel string
|
||||||
// Volumes is a list of shared volumes between the host and the Sandbox.
|
// Volumes is a list of shared volumes between the host and the Sandbox.
|
||||||
Volumes []types.Volume
|
Volumes []types.Volume
|
||||||
// SandboxBindMounts - list of paths to mount into guest
|
// SandboxBindMounts - list of paths to mount into guest
|
||||||
|
|||||||
@@ -25,6 +25,8 @@ const cpBinaryName = "cp"
|
|||||||
|
|
||||||
const fileMode0755 = os.FileMode(0755)
|
const fileMode0755 = os.FileMode(0755)
|
||||||
|
|
||||||
|
const maxWaitDelay = 50 * time.Millisecond
|
||||||
|
|
||||||
// The DefaultRateLimiterRefillTime is used for calculating the rate at
|
// The DefaultRateLimiterRefillTime is used for calculating the rate at
|
||||||
// which a TokenBucket is replinished, in cases where a RateLimiter is
|
// which a TokenBucket is replinished, in cases where a RateLimiter is
|
||||||
// applied to either network or disk I/O.
|
// applied to either network or disk I/O.
|
||||||
@@ -306,6 +308,44 @@ func ConvertAddressFamily(family int32) pbTypes.IPFamily {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func waitProcessUsingWaitLoop(pid int, timeoutSecs uint, logger *logrus.Entry) bool {
|
||||||
|
secs := time.Duration(timeoutSecs) * time.Second
|
||||||
|
timeout := time.After(secs)
|
||||||
|
delay := 1 * time.Millisecond
|
||||||
|
|
||||||
|
for {
|
||||||
|
// Wait4 is used to reap and check that a child terminated.
|
||||||
|
// Without the Wait4 call, Kill(0) for a child will always exit without
|
||||||
|
// error because the process isn't reaped.
|
||||||
|
// Wait4 return ECHLD error for non-child processes. Kill(0) is meant
|
||||||
|
// to address this case, once the process is reaped by init process,
|
||||||
|
// the call will return ESRCH error.
|
||||||
|
|
||||||
|
// "A watched pot never boils" and an unwaited-for process never appears to die!
|
||||||
|
waitedPid, err := syscall.Wait4(pid, nil, syscall.WNOHANG, nil)
|
||||||
|
|
||||||
|
if waitedPid == pid && err == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := syscall.Kill(pid, syscall.Signal(0)); err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-time.After(delay):
|
||||||
|
delay = delay * 5
|
||||||
|
|
||||||
|
if delay > maxWaitDelay {
|
||||||
|
delay = maxWaitDelay
|
||||||
|
}
|
||||||
|
case <-timeout:
|
||||||
|
logger.Warnf("process %v still running after waiting %ds", pid, timeoutSecs)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// WaitLocalProcess waits for the specified process for up to timeoutSecs seconds.
|
// WaitLocalProcess waits for the specified process for up to timeoutSecs seconds.
|
||||||
//
|
//
|
||||||
// Notes:
|
// Notes:
|
||||||
@@ -334,49 +374,24 @@ func WaitLocalProcess(pid int, timeoutSecs uint, initialSignal syscall.Signal, l
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pidRunning := true
|
pidRunning := waitForProcessCompletion(pid, timeoutSecs, logger)
|
||||||
|
|
||||||
secs := time.Duration(timeoutSecs)
|
|
||||||
timeout := time.After(secs * time.Second)
|
|
||||||
|
|
||||||
// Wait for the VM process to terminate
|
|
||||||
outer:
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case <-time.After(50 * time.Millisecond):
|
|
||||||
// Check if the process is running periodically to avoid a busy loop
|
|
||||||
|
|
||||||
var _status syscall.WaitStatus
|
|
||||||
var _rusage syscall.Rusage
|
|
||||||
var waitedPid int
|
|
||||||
|
|
||||||
// "A watched pot never boils" and an unwaited-for process never appears to die!
|
|
||||||
waitedPid, err = syscall.Wait4(pid, &_status, syscall.WNOHANG, &_rusage)
|
|
||||||
|
|
||||||
if waitedPid == pid && err == nil {
|
|
||||||
pidRunning = false
|
|
||||||
break outer
|
|
||||||
}
|
|
||||||
|
|
||||||
if err = syscall.Kill(pid, syscall.Signal(0)); err != nil {
|
|
||||||
pidRunning = false
|
|
||||||
break outer
|
|
||||||
}
|
|
||||||
|
|
||||||
break
|
|
||||||
|
|
||||||
case <-timeout:
|
|
||||||
logger.Warnf("process %v still running after waiting %ds", pid, timeoutSecs)
|
|
||||||
|
|
||||||
break outer
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if pidRunning {
|
if pidRunning {
|
||||||
// Force process to die
|
// Force process to die
|
||||||
if err = syscall.Kill(pid, syscall.SIGKILL); err != nil {
|
if err = syscall.Kill(pid, syscall.SIGKILL); err != nil {
|
||||||
|
if err == syscall.ESRCH {
|
||||||
|
logger.WithField("pid", pid).Warnf("process already finished")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
return fmt.Errorf("Failed to stop process %v: %s", pid, err)
|
return fmt.Errorf("Failed to stop process %v: %s", pid, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for {
|
||||||
|
_, err := syscall.Wait4(pid, nil, 0, nil)
|
||||||
|
if err != syscall.EINTR {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
|||||||
@@ -5,6 +5,12 @@
|
|||||||
|
|
||||||
package utils
|
package utils
|
||||||
|
|
||||||
|
import "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
func GetDevicePathAndFsTypeOptions(mountPoint string) (devicePath, fsType string, fsOptions []string, err error) {
|
func GetDevicePathAndFsTypeOptions(mountPoint string) (devicePath, fsType string, fsOptions []string, err error) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func waitForProcessCompletion(pid int, timeoutSecs uint, logger *logrus.Entry) bool {
|
||||||
|
return waitProcessUsingWaitLoop(pid, timeoutSecs, logger)
|
||||||
|
}
|
||||||
|
|||||||
@@ -14,8 +14,10 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
"syscall"
|
"syscall"
|
||||||
|
"time"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
|
"github.com/sirupsen/logrus"
|
||||||
"golang.org/x/sys/unix"
|
"golang.org/x/sys/unix"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -151,3 +153,60 @@ func IsAPVFIOMediatedDevice(sysfsdev string) bool {
|
|||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func waitProcessUsingPidfd(pid int, timeoutSecs uint, logger *logrus.Entry) (bool, error) {
|
||||||
|
pidfd, err := unix.PidfdOpen(pid, 0)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
if err == unix.ESRCH {
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return true, err
|
||||||
|
}
|
||||||
|
|
||||||
|
defer unix.Close(pidfd)
|
||||||
|
var n int
|
||||||
|
|
||||||
|
maxDelay := time.Duration(timeoutSecs) * time.Second
|
||||||
|
end := time.Now().Add(maxDelay)
|
||||||
|
|
||||||
|
for {
|
||||||
|
remaining := time.Until(end).Milliseconds()
|
||||||
|
if remaining < 0 {
|
||||||
|
remaining = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
n, err = unix.Poll([]unix.PollFd{{Fd: int32(pidfd), Events: unix.POLLIN}}, int(remaining))
|
||||||
|
if err != unix.EINTR {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil || n != 1 {
|
||||||
|
logger.Warnf("process %v still running after waiting %ds", pid, timeoutSecs)
|
||||||
|
return true, err
|
||||||
|
}
|
||||||
|
|
||||||
|
for {
|
||||||
|
err := unix.Waitid(unix.P_PIDFD, pidfd, nil, unix.WEXITED, nil)
|
||||||
|
if err == unix.EINVAL {
|
||||||
|
err = unix.Waitid(unix.P_PID, pid, nil, unix.WEXITED, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != unix.EINTR {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func waitForProcessCompletion(pid int, timeoutSecs uint, logger *logrus.Entry) bool {
|
||||||
|
pidRunning, err := waitProcessUsingPidfd(pid, timeoutSecs, logger)
|
||||||
|
|
||||||
|
if err == unix.ENOSYS {
|
||||||
|
pidRunning = waitProcessUsingWaitLoop(pid, timeoutSecs, logger)
|
||||||
|
}
|
||||||
|
|
||||||
|
return pidRunning
|
||||||
|
}
|
||||||
|
|||||||
@@ -337,7 +337,6 @@ impl ContainerLauncher {
|
|||||||
self.runner
|
self.runner
|
||||||
.cgroup_manager
|
.cgroup_manager
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.unwrap()
|
|
||||||
.as_any()?
|
.as_any()?
|
||||||
.downcast_ref::<CgroupManager>()
|
.downcast_ref::<CgroupManager>()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
|
|||||||
@@ -65,6 +65,8 @@ readonly -a systemd_files=(
|
|||||||
|
|
||||||
# Set a default value
|
# Set a default value
|
||||||
AGENT_INIT=${AGENT_INIT:-no}
|
AGENT_INIT=${AGENT_INIT:-no}
|
||||||
|
SELINUX=${SELINUX:-no}
|
||||||
|
SELINUXFS="/sys/fs/selinux"
|
||||||
|
|
||||||
# Align image to 128M
|
# Align image to 128M
|
||||||
readonly mem_boundary_mb=128
|
readonly mem_boundary_mb=128
|
||||||
@@ -94,6 +96,10 @@ Extra environment variables:
|
|||||||
DEFAULT: not set
|
DEFAULT: not set
|
||||||
USE_PODMAN: If set and USE_DOCKER not set, will build image in a Podman Container (requries podman)
|
USE_PODMAN: If set and USE_DOCKER not set, will build image in a Podman Container (requries podman)
|
||||||
DEFAULT: not set
|
DEFAULT: not set
|
||||||
|
SELINUX: If set to "yes", the rootfs is labeled for SELinux.
|
||||||
|
Make sure that selinuxfs is mounted to /sys/fs/selinux on the host
|
||||||
|
and the rootfs is built with SELINUX=yes.
|
||||||
|
DEFAULT value: "no"
|
||||||
|
|
||||||
|
|
||||||
Following diagram shows how the resulting image will look like
|
Following diagram shows how the resulting image will look like
|
||||||
@@ -135,6 +141,7 @@ build_with_container() {
|
|||||||
local nsdax_bin="$9"
|
local nsdax_bin="$9"
|
||||||
local container_image_name="image-builder-osbuilder"
|
local container_image_name="image-builder-osbuilder"
|
||||||
local shared_files=""
|
local shared_files=""
|
||||||
|
local selinuxfs=""
|
||||||
|
|
||||||
image_dir=$(readlink -f "$(dirname "${image}")")
|
image_dir=$(readlink -f "$(dirname "${image}")")
|
||||||
image_name=$(basename "${image}")
|
image_name=$(basename "${image}")
|
||||||
@@ -158,6 +165,14 @@ build_with_container() {
|
|||||||
shared_files+="-v ${mke2fs_conf}:${mke2fs_conf}:ro "
|
shared_files+="-v ${mke2fs_conf}:${mke2fs_conf}:ro "
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [ "${SELINUX}" == "yes" ]; then
|
||||||
|
if mountpoint $SELINUXFS > /dev/null; then
|
||||||
|
selinuxfs="-v ${SELINUXFS}:${SELINUXFS}"
|
||||||
|
else
|
||||||
|
die "Make sure that SELinux is enabled on the host"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
#Make sure we use a compatible runtime to build rootfs
|
#Make sure we use a compatible runtime to build rootfs
|
||||||
# In case Clear Containers Runtime is installed we dont want to hit issue:
|
# In case Clear Containers Runtime is installed we dont want to hit issue:
|
||||||
#https://github.com/clearcontainers/runtime/issues/828
|
#https://github.com/clearcontainers/runtime/issues/828
|
||||||
@@ -172,12 +187,14 @@ build_with_container() {
|
|||||||
--env ROOT_FREE_SPACE="${root_free_space}" \
|
--env ROOT_FREE_SPACE="${root_free_space}" \
|
||||||
--env NSDAX_BIN="${nsdax_bin}" \
|
--env NSDAX_BIN="${nsdax_bin}" \
|
||||||
--env KATA_BUILD_CC="${KATA_BUILD_CC}" \
|
--env KATA_BUILD_CC="${KATA_BUILD_CC}" \
|
||||||
|
--env SELINUX="${SELINUX}" \
|
||||||
--env DEBUG="${DEBUG}" \
|
--env DEBUG="${DEBUG}" \
|
||||||
-v /dev:/dev \
|
-v /dev:/dev \
|
||||||
-v "${script_dir}":"/osbuilder" \
|
-v "${script_dir}":"/osbuilder" \
|
||||||
-v "${script_dir}/../scripts":"/scripts" \
|
-v "${script_dir}/../scripts":"/scripts" \
|
||||||
-v "${rootfs}":"/rootfs" \
|
-v "${rootfs}":"/rootfs" \
|
||||||
-v "${image_dir}":"/image" \
|
-v "${image_dir}":"/image" \
|
||||||
|
${selinuxfs} \
|
||||||
${shared_files} \
|
${shared_files} \
|
||||||
${container_image_name} \
|
${container_image_name} \
|
||||||
bash "/osbuilder/${script_name}" -o "/image/${image_name}" /rootfs
|
bash "/osbuilder/${script_name}" -o "/image/${image_name}" /rootfs
|
||||||
@@ -398,6 +415,7 @@ create_rootfs_image() {
|
|||||||
local img_size="$3"
|
local img_size="$3"
|
||||||
local fs_type="$4"
|
local fs_type="$4"
|
||||||
local block_size="$5"
|
local block_size="$5"
|
||||||
|
local agent_bin="$6"
|
||||||
|
|
||||||
create_disk "${image}" "${img_size}" "${fs_type}" "${rootfs_start}"
|
create_disk "${image}" "${img_size}" "${fs_type}" "${rootfs_start}"
|
||||||
|
|
||||||
@@ -416,6 +434,31 @@ create_rootfs_image() {
|
|||||||
|
|
||||||
info "Copying content from rootfs to root partition"
|
info "Copying content from rootfs to root partition"
|
||||||
cp -a "${rootfs}"/* "${mount_dir}"
|
cp -a "${rootfs}"/* "${mount_dir}"
|
||||||
|
|
||||||
|
if [ "${SELINUX}" == "yes" ]; then
|
||||||
|
if [ "${AGENT_INIT}" == "yes" ]; then
|
||||||
|
die "Guest SELinux with the agent init is not supported yet"
|
||||||
|
fi
|
||||||
|
|
||||||
|
info "Labeling rootfs for SELinux"
|
||||||
|
selinuxfs_path="${mount_dir}${SELINUXFS}"
|
||||||
|
mkdir -p $selinuxfs_path
|
||||||
|
if mountpoint $SELINUXFS > /dev/null && \
|
||||||
|
chroot "${mount_dir}" command -v restorecon > /dev/null; then
|
||||||
|
mount -t selinuxfs selinuxfs $selinuxfs_path
|
||||||
|
chroot "${mount_dir}" restorecon -RF -e ${SELINUXFS} /
|
||||||
|
# TODO: This operation will be removed after the updated container-selinux that
|
||||||
|
# includes the following commit is released.
|
||||||
|
# https://github.com/containers/container-selinux/commit/39f83cc74d50bd10ab6be4d0bdd98bc04857469f
|
||||||
|
# We use chcon as an interim solution until then.
|
||||||
|
chroot "${mount_dir}" chcon -t container_runtime_exec_t "/usr/bin/${agent_bin}"
|
||||||
|
umount $selinuxfs_path
|
||||||
|
else
|
||||||
|
die "Could not label the rootfs. Make sure that SELinux is enabled on the host \
|
||||||
|
and the rootfs is built with SELINUX=yes"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
sync
|
sync
|
||||||
OK "rootfs copied"
|
OK "rootfs copied"
|
||||||
|
|
||||||
@@ -549,7 +592,7 @@ main() {
|
|||||||
# consider in calculate_img_size
|
# consider in calculate_img_size
|
||||||
rootfs_img_size=$((img_size - dax_header_sz))
|
rootfs_img_size=$((img_size - dax_header_sz))
|
||||||
create_rootfs_image "${rootfs}" "${image}" "${rootfs_img_size}" \
|
create_rootfs_image "${rootfs}" "${image}" "${rootfs_img_size}" \
|
||||||
"${fs_type}" "${block_size}"
|
"${fs_type}" "${block_size}" "${agent_bin}"
|
||||||
|
|
||||||
# insert at the beginning of the image the MBR + DAX header
|
# insert at the beginning of the image the MBR + DAX header
|
||||||
set_dax_header "${image}" "${img_size}" "${fs_type}" "${nsdax_bin}"
|
set_dax_header "${image}" "${img_size}" "${fs_type}" "${nsdax_bin}"
|
||||||
|
|||||||
@@ -8,10 +8,15 @@ OS_VERSION=${OS_VERSION:-stream9}
|
|||||||
PACKAGES="chrony iptables"
|
PACKAGES="chrony iptables"
|
||||||
[ "$AGENT_INIT" = no ] && PACKAGES+=" systemd"
|
[ "$AGENT_INIT" = no ] && PACKAGES+=" systemd"
|
||||||
[ "$SECCOMP" = yes ] && PACKAGES+=" libseccomp"
|
[ "$SECCOMP" = yes ] && PACKAGES+=" libseccomp"
|
||||||
|
[ "$SELINUX" = yes ] && PACKAGES+=" container-selinux"
|
||||||
|
|
||||||
# Container registry tag is different from metalink repo, e.g. "stream9" => "9-stream"
|
# Container registry tag is different from metalink repo, e.g. "stream9" => "9-stream"
|
||||||
os_repo_version="$(sed -E "s/(stream)(.+)/\2-\1/" <<< "$OS_VERSION")"
|
os_repo_version="$(sed -E "s/(stream)(.+)/\2-\1/" <<< "$OS_VERSION")"
|
||||||
|
|
||||||
METALINK="https://mirrors.centos.org/metalink?repo=centos-baseos-$os_repo_version&arch=\$basearch"
|
METALINK="https://mirrors.centos.org/metalink?repo=centos-baseos-$os_repo_version&arch=\$basearch"
|
||||||
|
if [ "$SELINUX" == yes ]; then
|
||||||
|
# AppStream repository is required for the container-selinux package
|
||||||
|
METALINK_APPSTREAM="https://mirrors.centos.org/metalink?repo=centos-appstream-$os_repo_version&arch=\$basearch"
|
||||||
|
fi
|
||||||
GPG_KEY_FILE=RPM-GPG-KEY-CentOS-Official
|
GPG_KEY_FILE=RPM-GPG-KEY-CentOS-Official
|
||||||
GPG_KEY_URL="https://centos.org/keys/$GPG_KEY_FILE"
|
GPG_KEY_URL="https://centos.org/keys/$GPG_KEY_FILE"
|
||||||
|
|||||||
@@ -26,6 +26,7 @@ LIBC=${LIBC:-musl}
|
|||||||
# The kata agent enables seccomp feature.
|
# The kata agent enables seccomp feature.
|
||||||
# However, it is not enforced by default: you need to enable that in the main configuration file.
|
# However, it is not enforced by default: you need to enable that in the main configuration file.
|
||||||
SECCOMP=${SECCOMP:-"yes"}
|
SECCOMP=${SECCOMP:-"yes"}
|
||||||
|
SELINUX=${SELINUX:-"no"}
|
||||||
|
|
||||||
lib_file="${script_dir}/../scripts/lib.sh"
|
lib_file="${script_dir}/../scripts/lib.sh"
|
||||||
source "$lib_file"
|
source "$lib_file"
|
||||||
@@ -143,6 +144,11 @@ ROOTFS_DIR Path to the directory that is populated with the rootfs.
|
|||||||
SECCOMP When set to "no", the kata-agent is built without seccomp capability.
|
SECCOMP When set to "no", the kata-agent is built without seccomp capability.
|
||||||
Default value: "yes"
|
Default value: "yes"
|
||||||
|
|
||||||
|
SELINUX When set to "yes", build the rootfs with the required packages to
|
||||||
|
enable SELinux in the VM.
|
||||||
|
Make sure the guest kernel is compiled with SELinux enabled.
|
||||||
|
Default value: "no"
|
||||||
|
|
||||||
USE_DOCKER If set, build the rootfs inside a container (requires
|
USE_DOCKER If set, build the rootfs inside a container (requires
|
||||||
Docker).
|
Docker).
|
||||||
Default value: <not set>
|
Default value: <not set>
|
||||||
@@ -369,6 +375,15 @@ build_rootfs_distro()
|
|||||||
|
|
||||||
echo "Required rust version: $RUST_VERSION"
|
echo "Required rust version: $RUST_VERSION"
|
||||||
|
|
||||||
|
if [ "${SELINUX}" == "yes" ]; then
|
||||||
|
if [ "${AGENT_INIT}" == "yes" ]; then
|
||||||
|
die "Guest SELinux with the agent init is not supported yet"
|
||||||
|
fi
|
||||||
|
if [ "${distro}" != "centos" ]; then
|
||||||
|
die "The guest rootfs must be CentOS to enable guest SELinux"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
if [ -z "${USE_DOCKER}" ] && [ -z "${USE_PODMAN}" ]; then
|
if [ -z "${USE_DOCKER}" ] && [ -z "${USE_PODMAN}" ]; then
|
||||||
info "build directly"
|
info "build directly"
|
||||||
build_rootfs ${ROOTFS_DIR}
|
build_rootfs ${ROOTFS_DIR}
|
||||||
@@ -454,6 +469,7 @@ build_rootfs_distro()
|
|||||||
--env AA_KBC="${AA_KBC}" \
|
--env AA_KBC="${AA_KBC}" \
|
||||||
--env KATA_BUILD_CC="${KATA_BUILD_CC}" \
|
--env KATA_BUILD_CC="${KATA_BUILD_CC}" \
|
||||||
--env SECCOMP="${SECCOMP}" \
|
--env SECCOMP="${SECCOMP}" \
|
||||||
|
--env SELINUX="${SELINUX}" \
|
||||||
--env DEBUG="${DEBUG}" \
|
--env DEBUG="${DEBUG}" \
|
||||||
--env HOME="/root" \
|
--env HOME="/root" \
|
||||||
-v "${repo_dir}":"/kata-containers" \
|
-v "${repo_dir}":"/kata-containers" \
|
||||||
|
|||||||
@@ -79,7 +79,23 @@ gpgcheck=1
|
|||||||
gpgkey=file://${CONFIG_DIR}/${GPG_KEY_FILE}
|
gpgkey=file://${CONFIG_DIR}/${GPG_KEY_FILE}
|
||||||
EOF
|
EOF
|
||||||
fi
|
fi
|
||||||
|
if [ "$SELINUX" == "yes" ]; then
|
||||||
|
cat > "${DNF_CONF}" << EOF
|
||||||
|
[appstream]
|
||||||
|
name=${OS_NAME}-${OS_VERSION} upstream
|
||||||
|
releasever=${OS_VERSION}
|
||||||
|
EOF
|
||||||
|
echo "metalink=$METALINK_APPSTREAM" >> "$DNF_CONF"
|
||||||
|
if [ -n "$GPG_KEY_URL" ]; then
|
||||||
|
if [ ! -f "${CONFIG_DIR}/${GPG_KEY_FILE}" ]; then
|
||||||
|
curl -L "${GPG_KEY_URL}" -o "${CONFIG_DIR}/${GPG_KEY_FILE}"
|
||||||
|
fi
|
||||||
|
cat >> "${DNF_CONF}" << EOF
|
||||||
|
gpgcheck=1
|
||||||
|
gpgkey=file://${CONFIG_DIR}/${GPG_KEY_FILE}
|
||||||
|
EOF
|
||||||
|
fi
|
||||||
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
build_rootfs()
|
build_rootfs()
|
||||||
|
|||||||
@@ -56,7 +56,7 @@ function get_container_runtime() {
|
|||||||
|
|
||||||
function install_artifacts() {
|
function install_artifacts() {
|
||||||
echo "copying kata artifacts onto host"
|
echo "copying kata artifacts onto host"
|
||||||
cp -a /opt/kata-artifacts/opt/kata/* /opt/kata/
|
cp -au /opt/kata-artifacts/opt/kata/* /opt/kata/
|
||||||
chmod +x /opt/kata/bin/*
|
chmod +x /opt/kata/bin/*
|
||||||
chmod +x /opt/kata/runtime-rs/bin/*
|
chmod +x /opt/kata/runtime-rs/bin/*
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -39,6 +39,12 @@ CONFIG_ARM64_PMEM=y
|
|||||||
CONFIG_ARM64_RAS_EXTN=y
|
CONFIG_ARM64_RAS_EXTN=y
|
||||||
# end of ARMv8.2 architectural feature
|
# end of ARMv8.2 architectural feature
|
||||||
|
|
||||||
|
#
|
||||||
|
# ARMv8.5 architectural features
|
||||||
|
#
|
||||||
|
CONFIG_ARCH_RANDOM=y
|
||||||
|
CONFIG_RANDOM_TRUST_CPU=y
|
||||||
|
|
||||||
CONFIG_NO_HZ_FULL=y
|
CONFIG_NO_HZ_FULL=y
|
||||||
CONFIG_GENERIC_MSI_IRQ_DOMAIN=y
|
CONFIG_GENERIC_MSI_IRQ_DOMAIN=y
|
||||||
CONFIG_RANDOMIZE_BASE=y
|
CONFIG_RANDOMIZE_BASE=y
|
||||||
|
|||||||
12
tools/packaging/kernel/configs/fragments/common/lsm.conf
Normal file
12
tools/packaging/kernel/configs/fragments/common/lsm.conf
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
# SELinux support:
|
||||||
|
CONFIG_AUDIT=y
|
||||||
|
CONFIG_AUDITSYSCALL=y
|
||||||
|
CONFIG_LSM_MMAP_MIN_ADDR=6553
|
||||||
|
CONFIG_NETWORK_SECMARK=y
|
||||||
|
CONFIG_SECURITY_NETWORK=y
|
||||||
|
CONFIG_SECURITY_SELINUX=y
|
||||||
|
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
|
||||||
|
CONFIG_SECURITY_SELINUX_DEVELOP=y
|
||||||
|
CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=0
|
||||||
|
CONFIG_SECURITY_SELINUX_SIDTAB_HASH_BITS=9
|
||||||
|
CONFIG_SECURITY_SELINUX_SID2STR_CACHE_SIZE=256
|
||||||
@@ -1 +1 @@
|
|||||||
96
|
97
|
||||||
|
|||||||
Reference in New Issue
Block a user