From 7df8edef1b117006fdc98ee09846a2d327ce2776 Mon Sep 17 00:00:00 2001 From: "fupan.lfp" Date: Tue, 10 Mar 2020 09:55:16 +0800 Subject: [PATCH 1/4] rustjail: replace protocol spec with oci spec transform the rpc protocol spec to oci spec. Signed-off-by: fupan.lfp --- src/agent/rustjail/src/capabilities.rs | 12 +- src/agent/rustjail/src/cgroups/fs/mod.rs | 214 +++++++++--------- src/agent/rustjail/src/cgroups/mod.rs | 2 +- src/agent/rustjail/src/container.rs | 274 +++++++++++------------ src/agent/rustjail/src/lib.rs | 5 +- src/agent/rustjail/src/mount.rs | 68 +++--- src/agent/rustjail/src/process.rs | 4 +- src/agent/rustjail/src/specconv.rs | 2 +- src/agent/rustjail/src/validator.rs | 80 +++---- src/agent/src/device.rs | 26 +-- src/agent/src/grpc.rs | 63 +++--- 11 files changed, 369 insertions(+), 381 deletions(-) diff --git a/src/agent/rustjail/src/capabilities.rs b/src/agent/rustjail/src/capabilities.rs index 7d7f066d8..5a156ff5e 100644 --- a/src/agent/rustjail/src/capabilities.rs +++ b/src/agent/rustjail/src/capabilities.rs @@ -10,7 +10,7 @@ use lazy_static; use crate::errors::*; use caps::{self, CapSet, Capability, CapsHashSet}; -use protocols::oci::LinuxCapabilities; +use oci::LinuxCapabilities; use slog::Logger; use std::collections::HashMap; @@ -103,30 +103,30 @@ pub fn drop_priviledges(logger: &Logger, caps: &LinuxCapabilities) -> Result<()> let all = caps::all(); - for c in all.difference(&to_capshashset(&logger, caps.Bounding.as_ref())) { + for c in all.difference(&to_capshashset(&logger, caps.bounding.as_ref())) { caps::drop(None, CapSet::Bounding, *c)?; } caps::set( None, CapSet::Effective, - to_capshashset(&logger, caps.Effective.as_ref()), + to_capshashset(&logger, caps.effective.as_ref()), )?; caps::set( None, CapSet::Permitted, - to_capshashset(&logger, caps.Permitted.as_ref()), + to_capshashset(&logger, caps.permitted.as_ref()), )?; caps::set( None, CapSet::Inheritable, - to_capshashset(&logger, caps.Inheritable.as_ref()), + to_capshashset(&logger, caps.inheritable.as_ref()), )?; if let Err(_) = caps::set( None, CapSet::Ambient, - to_capshashset(&logger, caps.Ambient.as_ref()), + to_capshashset(&logger, caps.ambient.as_ref()), ) { warn!(logger, "failed to set ambient capability"); } diff --git a/src/agent/rustjail/src/cgroups/fs/mod.rs b/src/agent/rustjail/src/cgroups/fs/mod.rs index 3aa039a02..336374292 100644 --- a/src/agent/rustjail/src/cgroups/fs/mod.rs +++ b/src/agent/rustjail/src/cgroups/fs/mod.rs @@ -10,12 +10,12 @@ use crate::errors::*; use lazy_static; use libc::{self, pid_t}; use nix::errno::Errno; +use oci::{LinuxDeviceCgroup, LinuxResources, LinuxThrottleDevice, LinuxWeightDevice}; use protobuf::{CachedSize, RepeatedField, SingularPtrField, UnknownFields}; use protocols::agent::{ BlkioStats, BlkioStatsEntry, CgroupStats, CpuStats, CpuUsage, HugetlbStats, MemoryData, MemoryStats, PidsStats, ThrottlingData, }; -use protocols::oci::{LinuxDeviceCgroup, LinuxResources, LinuxThrottleDevice, LinuxWeightDevice}; use regex::Regex; use std::collections::HashMap; use std::fs; @@ -57,63 +57,51 @@ lazy_static! { pub static ref DEFAULT_ALLOWED_DEVICES: Vec = { let mut v = Vec::new(); v.push(LinuxDeviceCgroup { - Allow: true, - Type: "c".to_string(), - Major: WILDCARD, - Minor: WILDCARD, - Access: "m".to_string(), - unknown_fields: UnknownFields::default(), - cached_size: CachedSize::default(), + allow: true, + r#type: "c".to_string(), + major: Some(WILDCARD), + minor: Some(WILDCARD), + access: "m".to_string(), }); v.push(LinuxDeviceCgroup { - Allow: true, - Type: "b".to_string(), - Major: WILDCARD, - Minor: WILDCARD, - Access: "m".to_string(), - unknown_fields: UnknownFields::default(), - cached_size: CachedSize::default(), + allow: true, + r#type: "b".to_string(), + major: Some(WILDCARD), + minor: Some(WILDCARD), + access: "m".to_string(), }); v.push(LinuxDeviceCgroup { - Allow: true, - Type: "c".to_string(), - Major: 5, - Minor: 1, - Access: "rwm".to_string(), - unknown_fields: UnknownFields::default(), - cached_size: CachedSize::default(), + allow: true, + r#type: "c".to_string(), + major: Some(5), + minor: Some(1), + access: "rwm".to_string(), }); v.push(LinuxDeviceCgroup { - Allow: true, - Type: "c".to_string(), - Major: 136, - Minor: WILDCARD, - Access: "rwm".to_string(), - unknown_fields: UnknownFields::default(), - cached_size: CachedSize::default(), + allow: true, + r#type: "c".to_string(), + major: Some(136), + minor: Some(WILDCARD), + access: "rwm".to_string(), }); v.push(LinuxDeviceCgroup { - Allow: true, - Type: "c".to_string(), - Major: 5, - Minor: 2, - Access: "rwm".to_string(), - unknown_fields: UnknownFields::default(), - cached_size: CachedSize::default(), + allow: true, + r#type: "c".to_string(), + major: Some(5), + minor: Some(2), + access: "rwm".to_string(), }); v.push(LinuxDeviceCgroup { - Allow: true, - Type: "c".to_string(), - Major: 10, - Minor: 200, - Access: "rwm".to_string(), - unknown_fields: UnknownFields::default(), - cached_size: CachedSize::default(), + allow: true, + r#type: "c".to_string(), + major: Some(10), + minor: Some(200), + access: "rwm".to_string(), }); v @@ -419,10 +407,10 @@ impl Subsystem for CpuSet { let mut cpus: &str = ""; let mut mems: &str = ""; - if r.CPU.is_some() { - let cpu = r.CPU.as_ref().unwrap(); - cpus = cpu.Cpus.as_str(); - mems = cpu.Mems.as_str(); + if r.cpu.is_some() { + let cpu = r.cpu.as_ref().unwrap(); + cpus = cpu.cpus.as_str(); + mems = cpu.mems.as_str(); } // For updatecontainer, just set the new value @@ -466,17 +454,25 @@ impl Subsystem for Cpu { } fn set(&self, dir: &str, r: &LinuxResources, _update: bool) -> Result<()> { - if r.CPU.is_none() { + if r.cpu.is_none() { return Ok(()); } - let cpu = r.CPU.as_ref().unwrap(); + let cpu = r.cpu.as_ref().unwrap(); - try_write_nonzero(dir, CPU_RT_PERIOD_US, cpu.RealtimePeriod as i128)?; - try_write_nonzero(dir, CPU_RT_RUNTIME_US, cpu.RealtimeRuntime as i128)?; - write_nonzero(dir, CPU_SHARES, cpu.Shares as i128)?; - write_nonzero(dir, CPU_CFS_QUOTA_US, cpu.Quota as i128)?; - write_nonzero(dir, CPU_CFS_PERIOD_US, cpu.Period as i128)?; + try_write_nonzero( + dir, + CPU_RT_PERIOD_US, + cpu.realtime_period.unwrap_or(0) as i128, + )?; + try_write_nonzero( + dir, + CPU_RT_RUNTIME_US, + cpu.realtime_runtime.unwrap_or(0) as i128, + )?; + write_nonzero(dir, CPU_SHARES, cpu.shares.unwrap_or(0) as i128)?; + write_nonzero(dir, CPU_CFS_QUOTA_US, cpu.quota.unwrap_or(0) as i128)?; + write_nonzero(dir, CPU_CFS_PERIOD_US, cpu.period.unwrap_or(0) as i128)?; Ok(()) } @@ -599,24 +595,24 @@ impl CpuAcct { } fn write_device(d: &LinuxDeviceCgroup, dir: &str) -> Result<()> { - let file = if d.Allow { DEVICES_ALLOW } else { DEVICES_DENY }; + let file = if d.allow { DEVICES_ALLOW } else { DEVICES_DENY }; - let major = if d.Major == WILDCARD { + let major = if d.major.unwrap_or(0) == WILDCARD { "*".to_string() } else { - d.Major.to_string() + d.major.unwrap_or(0).to_string() }; - let minor = if d.Minor == WILDCARD { + let minor = if d.minor.unwrap_or(0) == WILDCARD { "*".to_string() } else { - d.Minor.to_string() + d.minor.unwrap_or(0).to_string() }; - let t = if d.Type.is_empty() { + let t = if d.r#type.is_empty() { "a" } else { - d.Type.as_str() + d.r#type.as_str() }; let v = format!( @@ -624,7 +620,7 @@ fn write_device(d: &LinuxDeviceCgroup, dir: &str) -> Result<()> { t, major.as_str(), minor.as_str(), - d.Access.as_str() + d.access.as_str() ); info!(sl!(), "{}", v.as_str()); @@ -638,19 +634,17 @@ impl Subsystem for Devices { } fn set(&self, dir: &str, r: &LinuxResources, _update: bool) -> Result<()> { - for d in r.Devices.iter() { + for d in r.devices.iter() { write_device(d, dir)?; } for d in DEFAULT_DEVICES.iter() { let td = LinuxDeviceCgroup { - Allow: true, - Type: d.Type.clone(), - Major: d.Major, - Minor: d.Minor, - Access: "rwm".to_string(), - unknown_fields: UnknownFields::default(), - cached_size: CachedSize::default(), + allow: true, + r#type: d.r#type.clone(), + major: Some(d.major), + minor: Some(d.minor), + access: "rwm".to_string(), }; write_device(&td, dir)?; @@ -691,30 +685,34 @@ impl Subsystem for Memory { } fn set(&self, dir: &str, r: &LinuxResources, update: bool) -> Result<()> { - if r.Memory.is_none() { + if r.memory.is_none() { return Ok(()); } - let memory = r.Memory.as_ref().unwrap(); + let memory = r.memory.as_ref().unwrap(); // initialize kmem limits for accounting if !update { try_write(dir, KMEM_LIMIT, 1)?; try_write(dir, KMEM_LIMIT, -1)?; } - write_nonzero(dir, MEMORY_LIMIT, memory.Limit as i128)?; - write_nonzero(dir, MEMORY_SOFT_LIMIT, memory.Reservation as i128)?; + write_nonzero(dir, MEMORY_LIMIT, memory.limit.unwrap_or(0) as i128)?; + write_nonzero( + dir, + MEMORY_SOFT_LIMIT, + memory.reservation.unwrap_or(0) as i128, + )?; - try_write_nonzero(dir, MEMSW_LIMIT, memory.Swap as i128)?; - try_write_nonzero(dir, KMEM_LIMIT, memory.Kernel as i128)?; + try_write_nonzero(dir, MEMSW_LIMIT, memory.swap.unwrap_or(0) as i128)?; + try_write_nonzero(dir, KMEM_LIMIT, memory.kernel.unwrap_or(0) as i128)?; - write_nonzero(dir, KMEM_TCP_LIMIT, memory.KernelTCP as i128)?; + write_nonzero(dir, KMEM_TCP_LIMIT, memory.kernel_tcp.unwrap_or(0) as i128)?; - if memory.Swappiness <= 100 { - write_file(dir, SWAPPINESS, memory.Swappiness)?; + if memory.swapiness.unwrap_or(0) <= 100 { + write_file(dir, SWAPPINESS, memory.swapiness.unwrap_or(0))?; } - if memory.DisableOOMKiller { + if memory.disable_oom_killer.unwrap_or(false) { write_file(dir, OOM_CONTROL, 1)?; } @@ -808,14 +806,14 @@ impl Subsystem for Pids { } fn set(&self, dir: &str, r: &LinuxResources, _update: bool) -> Result<()> { - if r.Pids.is_none() { + if r.pids.is_none() { return Ok(()); } - let pids = r.Pids.as_ref().unwrap(); + let pids = r.pids.as_ref().unwrap(); - let v = if pids.Limit > 0 { - pids.Limit.to_string() + let v = if pids.limit > 0 { + pids.limit.to_string() } else { "max".to_string() }; @@ -857,14 +855,14 @@ impl Pids { #[inline] fn weight(d: &LinuxWeightDevice) -> (String, String) { ( - format!("{}:{} {}", d.Major, d.Minor, d.Weight), - format!("{}:{} {}", d.Major, d.Minor, d.LeafWeight), + format!("{:?} {:?}", d.blk, d.weight), + format!("{:?} {:?}", d.blk, d.leaf_weight), ) } #[inline] fn rate(d: &LinuxThrottleDevice) -> String { - format!("{}:{} {}", d.Major, d.Minor, d.Rate) + format!("{:?} {}", d.blk, d.rate) } fn write_blkio_device(dir: &str, file: &str, v: T) -> Result<()> { @@ -895,34 +893,38 @@ impl Subsystem for Blkio { } fn set(&self, dir: &str, r: &LinuxResources, _update: bool) -> Result<()> { - if r.BlockIO.is_none() { + if r.block_io.is_none() { return Ok(()); } - let blkio = r.BlockIO.as_ref().unwrap(); + let blkio = r.block_io.as_ref().unwrap(); - write_nonzero(dir, BLKIO_WEIGHT, blkio.Weight as i128)?; - write_nonzero(dir, BLKIO_LEAF_WEIGHT, blkio.LeafWeight as i128)?; + write_nonzero(dir, BLKIO_WEIGHT, blkio.weight.unwrap_or(0) as i128)?; + write_nonzero( + dir, + BLKIO_LEAF_WEIGHT, + blkio.leaf_weight.unwrap_or(0) as i128, + )?; - for d in blkio.WeightDevice.iter() { + for d in blkio.weight_device.iter() { let (w, lw) = weight(d); write_blkio_device(dir, BLKIO_WEIGHT_DEVICE, w)?; write_blkio_device(dir, BLKIO_LEAF_WEIGHT_DEVICE, lw)?; } - for d in blkio.ThrottleReadBpsDevice.iter() { + for d in blkio.throttle_read_bps_device.iter() { write_blkio_device(dir, BLKIO_READ_BPS_DEVICE, rate(d))?; } - for d in blkio.ThrottleWriteBpsDevice.iter() { + for d in blkio.throttle_write_bps_device.iter() { write_blkio_device(dir, BLKIO_WRITE_BPS_DEVICE, rate(d))?; } - for d in blkio.ThrottleReadIOPSDevice.iter() { + for d in blkio.throttle_read_iops_device.iter() { write_blkio_device(dir, BLKIO_READ_IOPS_DEVICE, rate(d))?; } - for d in blkio.ThrottleWriteIOPSDevice.iter() { + for d in blkio.throttle_write_iops_device.iter() { write_blkio_device(dir, BLKIO_WRITE_IOPS_DEVICE, rate(d))?; } @@ -1010,9 +1012,9 @@ impl Subsystem for HugeTLB { } fn set(&self, dir: &str, r: &LinuxResources, _update: bool) -> Result<()> { - for l in r.HugepageLimits.iter() { - let file = format!("hugetlb.{}.limit_in_bytes", l.Pagesize); - write_file(dir, file.as_str(), l.Limit)?; + for l in r.hugepage_limits.iter() { + let file = format!("hugetlb.{}.limit_in_bytes", l.page_size); + write_file(dir, file.as_str(), l.limit)?; } Ok(()) } @@ -1052,13 +1054,13 @@ impl Subsystem for NetCls { } fn set(&self, dir: &str, r: &LinuxResources, _update: bool) -> Result<()> { - if r.Network.is_none() { + if r.network.is_none() { return Ok(()); } - let network = r.Network.as_ref().unwrap(); + let network = r.network.as_ref().unwrap(); - write_nonzero(dir, NET_CLS_CLASSID, network.ClassID as i128)?; + write_nonzero(dir, NET_CLS_CLASSID, network.class_id.unwrap_or(0) as i128)?; Ok(()) } @@ -1070,14 +1072,14 @@ impl Subsystem for NetPrio { } fn set(&self, dir: &str, r: &LinuxResources, _update: bool) -> Result<()> { - if r.Network.is_none() { + if r.network.is_none() { return Ok(()); } - let network = r.Network.as_ref().unwrap(); + let network = r.network.as_ref().unwrap(); - for p in network.Priorities.iter() { - let prio = format!("{} {}", p.Name, p.Priority); + for p in network.priorities.iter() { + let prio = format!("{} {}", p.name, p.priority); try_write_file(dir, NET_PRIO_IFPRIOMAP, prio)?; } diff --git a/src/agent/rustjail/src/cgroups/mod.rs b/src/agent/rustjail/src/cgroups/mod.rs index c55f86470..0dbec74eb 100644 --- a/src/agent/rustjail/src/cgroups/mod.rs +++ b/src/agent/rustjail/src/cgroups/mod.rs @@ -5,8 +5,8 @@ use crate::errors::*; // use crate::configs::{FreezerState, Config}; +use oci::LinuxResources; use protocols::agent::CgroupStats; -use protocols::oci::LinuxResources; use std::collections::HashMap; pub mod fs; diff --git a/src/agent/rustjail/src/container.rs b/src/agent/rustjail/src/container.rs index a6f1fccb1..188d7e680 100644 --- a/src/agent/rustjail/src/container.rs +++ b/src/agent/rustjail/src/container.rs @@ -4,7 +4,7 @@ // use lazy_static; -use protocols::oci::{Hook, Linux, LinuxNamespace, LinuxResources, POSIXRlimit, Spec}; +use oci::{Hook, Linux, LinuxNamespace, LinuxResources, POSIXRlimit, Spec}; use serde_json; use std::ffi::{CStr, CString}; use std::fs; @@ -14,7 +14,7 @@ use std::path::{Path, PathBuf}; use std::time::SystemTime; // use crate::sync::Cond; use libc::pid_t; -use protocols::oci::{LinuxDevice, LinuxIDMapping}; +use oci::{LinuxDevice, LinuxIDMapping}; use std::clone::Clone; use std::fmt::Display; use std::process::Command; @@ -99,70 +99,58 @@ lazy_static! { pub static ref DEFAULT_DEVICES: Vec = { let mut v = Vec::new(); v.push(LinuxDevice { - Path: "/dev/null".to_string(), - Type: "c".to_string(), - Major: 1, - Minor: 3, - FileMode: 0o066, - UID: 0xffffffff, - GID: 0xffffffff, - unknown_fields: UnknownFields::default(), - cached_size: CachedSize::default(), + path: "/dev/null".to_string(), + r#type: "c".to_string(), + major: 1, + minor: 3, + file_mode: Some(0o066), + uid: Some(0xffffffff), + gid: Some(0xffffffff), }); v.push(LinuxDevice { - Path: "/dev/zero".to_string(), - Type: "c".to_string(), - Major: 1, - Minor: 5, - FileMode: 0o066, - UID: 0xffffffff, - GID: 0xffffffff, - unknown_fields: UnknownFields::default(), - cached_size: CachedSize::default(), + path: "/dev/zero".to_string(), + r#type: "c".to_string(), + major: 1, + minor: 5, + file_mode: Some(0o066), + uid: Some(0xffffffff), + gid: Some(0xffffffff), }); v.push(LinuxDevice { - Path: "/dev/full".to_string(), - Type: String::from("c"), - Major: 1, - Minor: 7, - FileMode: 0o066, - UID: 0xffffffff, - GID: 0xffffffff, - unknown_fields: UnknownFields::default(), - cached_size: CachedSize::default(), + path: "/dev/full".to_string(), + r#type: String::from("c"), + major: 1, + minor: 7, + file_mode: Some(0o066), + uid: Some(0xffffffff), + gid: Some(0xffffffff), }); v.push(LinuxDevice { - Path: "/dev/tty".to_string(), - Type: "c".to_string(), - Major: 5, - Minor: 0, - FileMode: 0o066, - UID: 0xffffffff, - GID: 0xffffffff, - unknown_fields: UnknownFields::default(), - cached_size: CachedSize::default(), + path: "/dev/tty".to_string(), + r#type: "c".to_string(), + major: 5, + minor: 0, + file_mode: Some(0o066), + uid: Some(0xffffffff), + gid: Some(0xffffffff), }); v.push(LinuxDevice { - Path: "/dev/urandom".to_string(), - Type: "c".to_string(), - Major: 1, - Minor: 9, - FileMode: 0o066, - UID: 0xffffffff, - GID: 0xffffffff, - unknown_fields: UnknownFields::default(), - cached_size: CachedSize::default(), + path: "/dev/urandom".to_string(), + r#type: "c".to_string(), + major: 1, + minor: 9, + file_mode: Some(0o066), + uid: Some(0xffffffff), + gid: Some(0xffffffff), }); v.push(LinuxDevice { - Path: "/dev/random".to_string(), - Type: "c".to_string(), - Major: 1, - Minor: 8, - FileMode: 0o066, - UID: 0xffffffff, - GID: 0xffffffff, - unknown_fields: UnknownFields::default(), - cached_size: CachedSize::default(), + path: "/dev/random".to_string(), + r#type: "c".to_string(), + major: 1, + minor: 8, + file_mode: Some(0o066), + uid: Some(0xffffffff), + gid: Some(0xffffffff), }); v }; @@ -274,16 +262,16 @@ impl BaseContainer for LinuxContainer { 0 }; - let root = oci.Root.as_ref().unwrap().Path.as_str(); + let root = oci.root.as_ref().unwrap().path.as_str(); let path = fs::canonicalize(root)?; let bundle = path.parent().unwrap().to_str().unwrap().to_string(); Ok(OCIState { - version: oci.Version.clone(), + version: oci.version.clone(), id: self.id(), status, pid, bundle, - annotations: oci.Annotations.clone(), + annotations: oci.annotations.clone(), }) } @@ -326,10 +314,10 @@ impl BaseContainer for LinuxContainer { .spec .as_mut() .unwrap() - .Linux + .linux .as_mut() .unwrap() - .Resources = SingularPtrField::some(r); + .resources = Some(r); Ok(()) } @@ -363,11 +351,11 @@ impl BaseContainer for LinuxContainer { } let spec = self.config.spec.as_ref().unwrap(); - if spec.Linux.is_none() { + if spec.linux.is_none() { return Err(ErrorKind::ErrorCode("no linux config".to_string()).into()); } - let linux = spec.Linux.as_ref().unwrap(); + let linux = spec.linux.as_ref().unwrap(); // get namespace vector to join/new let nses = get_namespaces(&linux, p.init, self.init_process_pid)?; info!(self.logger, "got namespaces {:?}!\n", nses); @@ -376,23 +364,23 @@ impl BaseContainer for LinuxContainer { let mut pidns = false; let mut userns = false; for ns in &nses { - let s = NAMESPACES.get(&ns.Type.as_str()); + let s = NAMESPACES.get(&ns.r#type.as_str()); if s.is_none() { return Err(ErrorKind::ErrorCode("invalid ns type".to_string()).into()); } let s = s.unwrap(); - if ns.Path.is_empty() { + if ns.path.is_empty() { to_new.set(*s, true); } else { - let fd = match fcntl::open(ns.Path.as_str(), OFlag::empty(), Mode::empty()) { + let fd = match fcntl::open(ns.path.as_str(), OFlag::empty(), Mode::empty()) { Ok(v) => v, Err(e) => { info!( self.logger, "cannot open type: {} path: {}", - ns.Type.clone(), - ns.Path.clone() + ns.r#type.clone(), + ns.path.clone() ); info!(self.logger, "error is : {}", e.as_errno().unwrap().desc()); return Err(e.into()); @@ -535,29 +523,27 @@ impl BaseContainer for LinuxContainer { mount::finish_rootfs(spec)?; } - if !p.oci.Cwd.is_empty() { - debug!(self.logger, "cwd: {}", p.oci.Cwd.as_str()); - unistd::chdir(p.oci.Cwd.as_str())?; + if !p.oci.cwd.is_empty() { + debug!(self.logger, "cwd: {}", p.oci.cwd.as_str()); + unistd::chdir(p.oci.cwd.as_str())?; } // setup uid/gid - info!(self.logger, "{:?}", p.oci.clone()); + info!(self.logger, "{:?}", &p.oci); - if p.oci.User.is_some() { - let guser = p.oci.User.as_ref().unwrap(); + let guser = &p.oci.user; - let uid = Uid::from_raw(guser.UID); - let gid = Gid::from_raw(guser.GID); + let uid = Uid::from_raw(guser.uid); + let gid = Gid::from_raw(guser.gid); - setid(uid, gid)?; + setid(uid, gid)?; - if !guser.AdditionalGids.is_empty() { - setgroups(guser.AdditionalGids.as_slice())?; - } + if guser.additional_gids.len() > 0 { + setgroups(guser.additional_gids.as_slice())?; } // NoNewPeiviledges, Drop capabilities - if p.oci.NoNewPrivileges { + if p.oci.no_new_privileges { if let Err(_) = prctl::set_no_new_privileges(true) { return Err( ErrorKind::ErrorCode("cannot set no new privileges".to_string()).into(), @@ -565,8 +551,8 @@ impl BaseContainer for LinuxContainer { } } - if p.oci.Capabilities.is_some() { - let c = p.oci.Capabilities.as_ref().unwrap(); + if p.oci.capabilities.is_some() { + let c = p.oci.capabilities.as_ref().unwrap(); info!(self.logger, "drop capabilities!"); capabilities::drop_priviledges(&self.logger, c)?; } @@ -597,8 +583,8 @@ impl BaseContainer for LinuxContainer { } // exec process - let args = p.oci.Args.to_vec(); - let env = p.oci.Env.to_vec(); + let args = p.oci.args.to_vec(); + let env = p.oci.env.to_vec(); do_exec(&self.logger, &args[0], &args, &env)?; Err(ErrorKind::ErrorCode("fail to create container".to_string()).into()) @@ -624,10 +610,10 @@ impl BaseContainer for LinuxContainer { signal::kill(Pid::from_raw(*pid), Some(Signal::SIGKILL))?; } - if spec.Hooks.is_some() { + if spec.hooks.is_some() { info!(self.logger, "poststop"); - let hooks = spec.Hooks.as_ref().unwrap(); - for h in hooks.Poststop.iter() { + let hooks = spec.hooks.as_ref().unwrap(); + for h in hooks.poststop.iter() { execute_hook(&self.logger, h, &st)?; } } @@ -724,12 +710,10 @@ fn do_exec(logger: &Logger, path: &str, args: &[String], env: &[String]) -> Resu fn get_namespaces(linux: &Linux, init: bool, init_pid: pid_t) -> Result> { let mut ns: Vec = Vec::new(); if init { - for i in &linux.Namespaces { + for i in &linux.namespaces { ns.push(LinuxNamespace { - Type: i.Type.clone(), - Path: i.Path.clone(), - unknown_fields: UnknownFields::default(), - cached_size: CachedSize::default(), + r#type: i.r#type.clone(), + path: i.path.clone(), }); } } else { @@ -748,10 +732,8 @@ fn get_namespaces(linux: &Linux, init: bool, init_pid: pid_t) -> Result { @@ -846,12 +828,12 @@ fn join_namespaces( write_mappings( &logger, &format!("/proc/{}/uid_map", child.as_raw()), - &linux.UIDMappings, + &linux.uid_mappings, )?; write_mappings( &logger, &format!("/proc/{}/gid_map", child.as_raw()), - &linux.GIDMappings, + &linux.gid_mappings, )?; } @@ -908,10 +890,10 @@ fn join_namespaces( let _ = read_sync(pfd)?; // run prestart hook - if spec.Hooks.is_some() { + if spec.hooks.is_some() { info!(logger, "prestart"); - let hooks = spec.Hooks.as_ref().unwrap(); - for h in hooks.Prestart.iter() { + let hooks = spec.hooks.as_ref().unwrap(); + for h in hooks.prestart.iter() { execute_hook(&logger, h, st)?; } } @@ -922,10 +904,10 @@ fn join_namespaces( // wait to run poststart hook let _ = read_sync(pfd)?; //run poststart hook - if spec.Hooks.is_some() { + if spec.hooks.is_some() { info!(logger, "poststart"); - let hooks = spec.Hooks.as_ref().unwrap(); - for h in hooks.Poststart.iter() { + let hooks = spec.hooks.as_ref().unwrap(); + for h in hooks.poststart.iter() { execute_hook(&logger, h, st)?; } } @@ -941,21 +923,21 @@ fn join_namespaces( unistd::close(pwfd)?; // set oom_score_adj - let p = if spec.Process.is_some() { - spec.Process.as_ref().unwrap() + let p = if spec.process.is_some() { + spec.process.as_ref().unwrap() } else { return Err(nix::Error::Sys(Errno::EINVAL).into()); }; - if p.OOMScoreAdj > 0 { + if p.oom_score_adj.is_some() { fs::write( "/proc/self/oom_score_adj", - p.OOMScoreAdj.to_string().as_bytes(), + p.oom_score_adj.unwrap().to_string().as_bytes(), )? } // set rlimit - for rl in p.Rlimits.iter() { + for rl in p.rlimits.iter() { setrlimit(rl)?; } @@ -1061,10 +1043,10 @@ fn join_namespaces( } if to_new.contains(CloneFlags::CLONE_NEWUTS) { - unistd::sethostname(&spec.Hostname)?; + unistd::sethostname(&spec.hostname)?; } - let rootfs = spec.Root.as_ref().unwrap().Path.as_str(); + let rootfs = spec.root.as_ref().unwrap().path.as_str(); let root = fs::canonicalize(rootfs)?; let rootfs = root.to_str().unwrap(); @@ -1104,7 +1086,7 @@ fn join_namespaces( } // setup sysctl - set_sysctls(&linux.Sysctl)?; + set_sysctls(&linux.sysctl)?; unistd::chdir("/")?; if let Err(_) = stat::stat("marker") { info!(logger, "not in expect root!!"); @@ -1193,11 +1175,11 @@ fn setup_stdio(p: &Process) -> Result<()> { fn write_mappings(logger: &Logger, path: &str, maps: &[LinuxIDMapping]) -> Result<()> { let mut data = String::new(); for m in maps { - if m.Size == 0 { + if m.size == 0 { continue; } - let val = format!("{} {} {}\n", m.ContainerID, m.HostID, m.Size); + let val = format!("{} {} {}\n", m.container_id, m.host_id, m.size); data = data + &val; } @@ -1273,16 +1255,16 @@ impl LinuxContainer { let spec = config.spec.as_ref().unwrap(); - if spec.Linux.is_none() { + if spec.linux.is_none() { return Err(nix::Error::Sys(Errno::EINVAL).into()); } - let linux = spec.Linux.as_ref().unwrap(); + let linux = spec.linux.as_ref().unwrap(); - let cpath = if linux.CgroupsPath.is_empty() { + let cpath = if linux.cgroups_path.is_empty() { format!("/{}", id.as_str()) } else { - linux.CgroupsPath.clone() + linux.cgroups_path.clone() }; let cgroup_manager = FsManager::new(cpath.as_str())?; @@ -1384,12 +1366,12 @@ lazy_static! { fn setrlimit(limit: &POSIXRlimit) -> Result<()> { let rl = libc::rlimit { - rlim_cur: limit.Soft, - rlim_max: limit.Hard, + rlim_cur: limit.soft, + rlim_max: limit.hard, }; - let res = if RLIMITMAPS.get(limit.Type.as_str()).is_some() { - *RLIMITMAPS.get(limit.Type.as_str()).unwrap() + let res = if RLIMITMAPS.get(limit.r#type.as_str()).is_some() { + *RLIMITMAPS.get(limit.r#type.as_str()).unwrap() } else { return Err(nix::Error::Sys(Errno::EINVAL).into()); }; @@ -1445,14 +1427,14 @@ use std::time::Duration; fn execute_hook(logger: &Logger, h: &Hook, st: &OCIState) -> Result<()> { let logger = logger.new(o!("action" => "execute-hook")); - let binary = PathBuf::from(h.Path.as_str()); + let binary = PathBuf::from(h.path.as_str()); let path = binary.canonicalize()?; if !path.exists() { return Err(ErrorKind::Nix(Error::from_errno(Errno::EINVAL)).into()); } - let args = h.Args.clone(); - let envs = h.Env.clone(); + let args = h.args.clone(); + let envs = h.env.clone(); let state = serde_json::to_string(st)?; // state.push_str("\n"); @@ -1565,25 +1547,27 @@ fn execute_hook(logger: &Logger, h: &Hook, st: &OCIState) -> Result<()> { let pid = rx.recv().unwrap(); info!(logger, "hook grand: {}", pid); - let status: i32 = if h.Timeout > 0 { - match rx.recv_timeout(Duration::from_secs(h.Timeout as u64)) { - Ok(s) => s, - Err(e) => { - let error = if e == RecvTimeoutError::Timeout { - -libc::ETIMEDOUT - } else { - -libc::EPIPE - }; - let _ = signal::kill(Pid::from_raw(pid), Some(Signal::SIGKILL)); - error + let status = { + if let Some(timeout) = h.timeout { + match rx.recv_timeout(Duration::from_secs(timeout as u64)) { + Ok(s) => s, + Err(e) => { + let error = if e == RecvTimeoutError::Timeout { + -libc::ETIMEDOUT + } else { + -libc::EPIPE + }; + let _ = signal::kill(Pid::from_raw(pid), Some(Signal::SIGKILL)); + error + } } - } - } else { - if let Ok(s) = rx.recv() { - s } else { - let _ = signal::kill(Pid::from_raw(pid), Some(Signal::SIGKILL)); - -libc::EPIPE + if let Ok(s) = rx.recv() { + s + } else { + let _ = signal::kill(Pid::from_raw(pid), Some(Signal::SIGKILL)); + -libc::EPIPE + } } }; diff --git a/src/agent/rustjail/src/lib.rs b/src/agent/rustjail/src/lib.rs index 6bfc3d9b6..f429b371e 100644 --- a/src/agent/rustjail/src/lib.rs +++ b/src/agent/rustjail/src/lib.rs @@ -68,7 +68,6 @@ pub mod validator; use std::collections::HashMap; use std::mem::MaybeUninit; - use oci::{ Box as ociBox, Hooks as ociHooks, Linux as ociLinux, LinuxCapabilities as ociLinuxCapabilities, Mount as ociMount, POSIXRlimit as ociPOSIXRlimit, Process as ociProcess, Root as ociRoot, @@ -79,7 +78,7 @@ use protocols::oci::{ Root as grpcRoot, Spec as grpcSpec, }; -fn process_grpc_to_oci(p: &grpcProcess) -> ociProcess { +pub fn process_grpc_to_oci(p: &grpcProcess) -> ociProcess { let console_size = if p.ConsoleSize.is_some() { let c = p.ConsoleSize.as_ref().unwrap(); Some(ociBox { @@ -296,7 +295,7 @@ fn blockio_grpc_to_oci(blk: &grpcLinuxBlockIO) -> ociLinuxBlockIO { } } -fn resources_grpc_to_oci(res: &grpcLinuxResources) -> ociLinuxResources { +pub fn resources_grpc_to_oci(res: &grpcLinuxResources) -> ociLinuxResources { let devices = { let mut d = Vec::new(); for dev in res.Devices.iter() { diff --git a/src/agent/rustjail/src/mount.rs b/src/agent/rustjail/src/mount.rs index d4dedc3a5..499d65b25 100644 --- a/src/agent/rustjail/src/mount.rs +++ b/src/agent/rustjail/src/mount.rs @@ -10,7 +10,7 @@ use nix::mount::{self, MntFlags, MsFlags}; use nix::sys::stat::{self, Mode, SFlag}; use nix::unistd::{self, Gid, Uid}; use nix::NixPath; -use protocols::oci::{LinuxDevice, Mount, Spec}; +use oci::{LinuxDevice, Mount, Spec}; use std::collections::{HashMap, HashSet}; use std::fs::{self, OpenOptions}; use std::os::unix; @@ -108,14 +108,14 @@ pub fn init_rootfs( lazy_static::initialize(&PROPAGATION); lazy_static::initialize(&LINUXDEVICETYPE); - let linux = spec.Linux.as_ref().unwrap(); + let linux = spec.linux.as_ref().unwrap(); let mut flags = MsFlags::MS_REC; - match PROPAGATION.get(&linux.RootfsPropagation.as_str()) { + match PROPAGATION.get(&linux.rootfs_propagation.as_str()) { Some(fl) => flags |= *fl, None => flags |= MsFlags::MS_SLAVE, } - let rootfs = spec.Root.as_ref().unwrap().Path.as_str(); + let rootfs = spec.root.as_ref().unwrap().path.as_str(); let root = fs::canonicalize(rootfs)?; let rootfs = root.to_str().unwrap(); @@ -128,13 +128,13 @@ pub fn init_rootfs( None::<&str>, )?; - for m in &spec.Mounts { + for m in &spec.mounts { let (mut flags, data) = parse_mount(&m); if !m.destination.starts_with("/") || m.destination.contains("..") { return Err(ErrorKind::Nix(nix::Error::Sys(Errno::EINVAL)).into()); } - if m.field_type == "cgroup" { - mount_cgroups(logger, m, rootfs, flags, &data, cpath, mounts)?; + if m.r#type == "cgroup" { + mount_cgroups(logger, &m, rootfs, flags, &data, cpath, mounts)?; } else { if m.destination == "/dev" { flags &= !MsFlags::MS_RDONLY; @@ -148,7 +148,7 @@ pub fn init_rootfs( unistd::chdir(rootfs)?; default_symlinks()?; - create_devices(&linux.Devices, bind_device)?; + create_devices(&linux.devices, bind_device)?; ensure_ptmx()?; unistd::chdir(&olddir)?; @@ -168,11 +168,9 @@ fn mount_cgroups( // mount tmpfs let ctm = Mount { source: "tmpfs".to_string(), - field_type: "tmpfs".to_string(), + r#type: "tmpfs".to_string(), destination: m.destination.clone(), - options: RepeatedField::default(), - unknown_fields: UnknownFields::default(), - cached_size: CachedSize::default(), + options: Vec::new(), }; let cflags = MsFlags::MS_NOEXEC | MsFlags::MS_NOSUID | MsFlags::MS_NODEV; @@ -217,11 +215,9 @@ fn mount_cgroups( let bm = Mount { source: source.to_string(), - field_type: "bind".to_string(), + r#type: "bind".to_string(), destination: destination.clone(), - options: RepeatedField::default(), - unknown_fields: UnknownFields::default(), - cached_size: CachedSize::default(), + options: Vec::new(), }; mount_from( @@ -430,7 +426,7 @@ fn mount_from(m: &Mount, rootfs: &str, flags: MsFlags, data: &str, _label: &str) let d = String::from(data); let dest = format!("{}{}", rootfs, &m.destination); - let src = if m.field_type.as_str() == "bind" { + let src = if m.r#type.as_str() == "bind" { let src = fs::canonicalize(m.source.as_str())?; let dir = if src.is_file() { Path::new(&dest).parent().unwrap() @@ -484,7 +480,7 @@ fn mount_from(m: &Mount, rootfs: &str, flags: MsFlags, data: &str, _label: &str) match mount::mount( Some(src.to_str().unwrap()), dest.as_str(), - Some(m.field_type.as_str()), + Some(m.r#type.as_str()), flags, Some(d.as_str()), ) { @@ -550,8 +546,8 @@ fn create_devices(devices: &[LinuxDevice], bind: bool) -> Result<()> { op(dev)?; } for dev in devices { - if !dev.Path.starts_with("/dev") || dev.Path.contains("..") { - let msg = format!("{} is not a valid device path", dev.Path); + if !dev.path.starts_with("/dev") || dev.path.contains("..") { + let msg = format!("{} is not a valid device path", dev.path); bail!(ErrorKind::ErrorCode(msg)); } op(dev)?; @@ -581,22 +577,22 @@ lazy_static! { } fn mknod_dev(dev: &LinuxDevice) -> Result<()> { - let f = match LINUXDEVICETYPE.get(dev.Type.as_str()) { + let f = match LINUXDEVICETYPE.get(dev.r#type.as_str()) { Some(v) => v, None => return Err(ErrorKind::ErrorCode("invalid spec".to_string()).into()), }; stat::mknod( - &dev.Path[1..], + &dev.path[1..], *f, - Mode::from_bits_truncate(dev.FileMode), - makedev(dev.Major as u64, dev.Minor as u64), + Mode::from_bits_truncate(dev.file_mode.unwrap_or(0)), + makedev(dev.major as u64, dev.minor as u64), )?; unistd::chown( - &dev.Path[1..], - Some(Uid::from_raw(dev.UID as uid_t)), - Some(Gid::from_raw(dev.GID as uid_t)), + &dev.path[1..], + Some(Uid::from_raw(dev.uid.unwrap_or(0) as uid_t)), + Some(Gid::from_raw(dev.gid.unwrap_or(0) as uid_t)), )?; Ok(()) @@ -604,7 +600,7 @@ fn mknod_dev(dev: &LinuxDevice) -> Result<()> { fn bind_dev(dev: &LinuxDevice) -> Result<()> { let fd = fcntl::open( - &dev.Path[1..], + &dev.path[1..], OFlag::O_RDWR | OFlag::O_CREAT, Mode::from_bits_truncate(0o644), )?; @@ -612,8 +608,8 @@ fn bind_dev(dev: &LinuxDevice) -> Result<()> { unistd::close(fd)?; mount::mount( - Some(&*dev.Path), - &dev.Path[1..], + Some(&*dev.path), + &dev.path[1..], None::<&str>, MsFlags::MS_BIND, None::<&str>, @@ -625,19 +621,19 @@ pub fn finish_rootfs(spec: &Spec) -> Result<()> { let olddir = unistd::getcwd()?; info!(sl!(), "{}", olddir.to_str().unwrap()); unistd::chdir("/")?; - if spec.Linux.is_some() { - let linux = spec.Linux.as_ref().unwrap(); + if spec.linux.is_some() { + let linux = spec.linux.as_ref().unwrap(); - for path in linux.MaskedPaths.iter() { + for path in linux.masked_paths.iter() { mask_path(path)?; } - for path in linux.ReadonlyPaths.iter() { + for path in linux.readonly_paths.iter() { readonly_path(path)?; } } - for m in spec.Mounts.iter() { + for m in spec.mounts.iter() { if m.destination == "/dev" { let (flags, _) = parse_mount(m); if flags.contains(MsFlags::MS_RDONLY) { @@ -652,7 +648,7 @@ pub fn finish_rootfs(spec: &Spec) -> Result<()> { } } - if spec.Root.as_ref().unwrap().Readonly { + if spec.root.as_ref().unwrap().readonly { let flags = MsFlags::MS_BIND | MsFlags::MS_RDONLY | MsFlags::MS_NODEV | MsFlags::MS_REMOUNT; mount::mount(Some("/"), "/", None::<&str>, flags, None::<&str>)?; diff --git a/src/agent/rustjail/src/process.rs b/src/agent/rustjail/src/process.rs index 8aac0c67d..2a525b48f 100644 --- a/src/agent/rustjail/src/process.rs +++ b/src/agent/rustjail/src/process.rs @@ -20,7 +20,7 @@ use nix::unistd::{self, Pid}; use nix::Result; use nix::Error; -use protocols::oci::Process as OCIProcess; +use oci::Process as OCIProcess; use slog::Logger; #[derive(Debug)] @@ -104,7 +104,7 @@ impl Process { info!(logger, "before create console socket!"); - if ocip.Terminal { + if ocip.terminal { let (psocket, csocket) = match socket::socketpair( AddressFamily::Unix, SockType::Stream, diff --git a/src/agent/rustjail/src/specconv.rs b/src/agent/rustjail/src/specconv.rs index 0803bfba9..ca67bee91 100644 --- a/src/agent/rustjail/src/specconv.rs +++ b/src/agent/rustjail/src/specconv.rs @@ -3,7 +3,7 @@ // SPDX-License-Identifier: Apache-2.0 // -use protocols::oci::Spec; +use oci::Spec; // use crate::configs::namespaces; // use crate::configs::device::Device; diff --git a/src/agent/rustjail/src/validator.rs b/src/agent/rustjail/src/validator.rs index 372c07cc1..54a4a1c25 100644 --- a/src/agent/rustjail/src/validator.rs +++ b/src/agent/rustjail/src/validator.rs @@ -3,14 +3,14 @@ use crate::errors::*; use lazy_static; use nix::errno::Errno; use nix::Error; +use oci::{LinuxIDMapping, LinuxNamespace, Spec}; use protobuf::RepeatedField; -use protocols::oci::{LinuxIDMapping, LinuxNamespace, Spec}; use std::collections::HashMap; use std::path::{Component, PathBuf}; -fn contain_namespace(nses: &RepeatedField, key: &str) -> bool { +fn contain_namespace(nses: &Vec, key: &str) -> bool { for ns in nses { - if ns.Type.as_str() == key { + if ns.r#type.as_str() == key { return true; } } @@ -18,10 +18,10 @@ fn contain_namespace(nses: &RepeatedField, key: &str) -> bool { false } -fn get_namespace_path(nses: &RepeatedField, key: &str) -> Result { +fn get_namespace_path(nses: &Vec, key: &str) -> Result { for ns in nses { - if ns.Type.as_str() == key { - return Ok(ns.Path.clone()); + if ns.r#type.as_str() == key { + return Ok(ns.path.clone()); } } @@ -71,15 +71,15 @@ fn network(_oci: &Spec) -> Result<()> { } fn hostname(oci: &Spec) -> Result<()> { - if oci.Hostname.is_empty() || oci.Hostname == "".to_string() { + if oci.hostname.is_empty() || oci.hostname == "".to_string() { return Ok(()); } - if oci.Linux.is_none() { + if oci.linux.is_none() { return Err(ErrorKind::Nix(Error::from_errno(Errno::EINVAL)).into()); } - let linux = oci.Linux.as_ref().unwrap(); - if !contain_namespace(&linux.Namespaces, "uts") { + let linux = oci.linux.as_ref().unwrap(); + if !contain_namespace(&linux.namespaces, "uts") { return Err(ErrorKind::Nix(Error::from_errno(Errno::EINVAL)).into()); } @@ -87,12 +87,12 @@ fn hostname(oci: &Spec) -> Result<()> { } fn security(oci: &Spec) -> Result<()> { - let linux = oci.Linux.as_ref().unwrap(); - if linux.MaskedPaths.len() == 0 && linux.ReadonlyPaths.len() == 0 { + let linux = oci.linux.as_ref().unwrap(); + if linux.masked_paths.len() == 0 && linux.readonly_paths.len() == 0 { return Ok(()); } - if !contain_namespace(&linux.Namespaces, "mount") { + if !contain_namespace(&linux.namespaces, "mount") { return Err(ErrorKind::Nix(Error::from_errno(Errno::EINVAL)).into()); } @@ -101,9 +101,9 @@ fn security(oci: &Spec) -> Result<()> { Ok(()) } -fn idmapping(maps: &RepeatedField) -> Result<()> { +fn idmapping(maps: &Vec) -> Result<()> { for map in maps { - if map.Size > 0 { + if map.size > 0 { return Ok(()); } } @@ -112,19 +112,19 @@ fn idmapping(maps: &RepeatedField) -> Result<()> { } fn usernamespace(oci: &Spec) -> Result<()> { - let linux = oci.Linux.as_ref().unwrap(); - if contain_namespace(&linux.Namespaces, "user") { + let linux = oci.linux.as_ref().unwrap(); + if contain_namespace(&linux.namespaces, "user") { let user_ns = PathBuf::from("/proc/self/ns/user"); if !user_ns.exists() { return Err(ErrorKind::ErrorCode("user namespace not supported!".to_string()).into()); } // check if idmappings is correct, at least I saw idmaps // with zero size was passed to agent - idmapping(&linux.UIDMappings)?; - idmapping(&linux.GIDMappings)?; + idmapping(&linux.uid_mappings)?; + idmapping(&linux.gid_mappings)?; } else { // no user namespace but idmap - if linux.UIDMappings.len() != 0 || linux.GIDMappings.len() != 0 { + if linux.uid_mappings.len() != 0 || linux.gid_mappings.len() != 0 { return Err(ErrorKind::Nix(Error::from_errno(Errno::EINVAL)).into()); } } @@ -133,8 +133,8 @@ fn usernamespace(oci: &Spec) -> Result<()> { } fn cgroupnamespace(oci: &Spec) -> Result<()> { - let linux = oci.Linux.as_ref().unwrap(); - if contain_namespace(&linux.Namespaces, "cgroup") { + let linux = oci.linux.as_ref().unwrap(); + if contain_namespace(&linux.namespaces, "cgroup") { let path = PathBuf::from("/proc/self/ns/cgroup"); if !path.exists() { return Err(ErrorKind::ErrorCode("cgroup unsupported!".to_string()).into()); @@ -178,10 +178,10 @@ fn check_host_ns(path: &str) -> Result<()> { } fn sysctl(oci: &Spec) -> Result<()> { - let linux = oci.Linux.as_ref().unwrap(); - for (key, _) in linux.Sysctl.iter() { + let linux = oci.linux.as_ref().unwrap(); + for (key, _) in linux.sysctl.iter() { if SYSCTLS.contains_key(key.as_str()) || key.starts_with("fs.mqueue.") { - if contain_namespace(&linux.Namespaces, "ipc") { + if contain_namespace(&linux.namespaces, "ipc") { continue; } else { return Err(ErrorKind::Nix(Error::from_errno(Errno::EINVAL)).into()); @@ -189,11 +189,11 @@ fn sysctl(oci: &Spec) -> Result<()> { } if key.starts_with("net.") { - if !contain_namespace(&linux.Namespaces, "network") { + if !contain_namespace(&linux.namespaces, "network") { return Err(ErrorKind::Nix(Error::from_errno(Errno::EINVAL)).into()); } - let net = get_namespace_path(&linux.Namespaces, "network")?; + let net = get_namespace_path(&linux.namespaces, "network")?; if net.is_empty() || net == "".to_string() { continue; } @@ -201,7 +201,7 @@ fn sysctl(oci: &Spec) -> Result<()> { check_host_ns(net.as_str())?; } - if contain_namespace(&linux.Namespaces, "uts") { + if contain_namespace(&linux.namespaces, "uts") { if key == "kernel.domainname" { continue; } @@ -217,21 +217,21 @@ fn sysctl(oci: &Spec) -> Result<()> { } fn rootless_euid_mapping(oci: &Spec) -> Result<()> { - let linux = oci.Linux.as_ref().unwrap(); - if !contain_namespace(&linux.Namespaces, "user") { + let linux = oci.linux.as_ref().unwrap(); + if !contain_namespace(&linux.namespaces, "user") { return Err(ErrorKind::Nix(Error::from_errno(Errno::EINVAL)).into()); } - if linux.UIDMappings.len() == 0 || linux.GIDMappings.len() == 0 { + if linux.gid_mappings.len() == 0 || linux.gid_mappings.len() == 0 { return Err(ErrorKind::Nix(Error::from_errno(Errno::EINVAL)).into()); } Ok(()) } -fn has_idmapping(maps: &RepeatedField, id: u32) -> bool { +fn has_idmapping(maps: &Vec, id: u32) -> bool { for map in maps { - if id >= map.ContainerID && id < map.ContainerID + map.Size { + if id >= map.container_id && id < map.container_id + map.size { return true; } } @@ -239,9 +239,9 @@ fn has_idmapping(maps: &RepeatedField, id: u32) -> bool { } fn rootless_euid_mount(oci: &Spec) -> Result<()> { - let linux = oci.Linux.as_ref().unwrap(); + let linux = oci.linux.as_ref().unwrap(); - for mnt in oci.Mounts.iter() { + for mnt in oci.mounts.iter() { for opt in mnt.options.iter() { if opt.starts_with("uid=") || opt.starts_with("gid=") { let fields: Vec<&str> = opt.split('=').collect(); @@ -253,13 +253,13 @@ fn rootless_euid_mount(oci: &Spec) -> Result<()> { let id = fields[1].trim().parse::()?; if opt.starts_with("uid=") { - if !has_idmapping(&linux.UIDMappings, id) { + if !has_idmapping(&linux.uid_mappings, id) { return Err(ErrorKind::Nix(Error::from_errno(Errno::EINVAL)).into()); } } if opt.starts_with("gid=") { - if !has_idmapping(&linux.GIDMappings, id) { + if !has_idmapping(&linux.gid_mappings, id) { return Err(ErrorKind::Nix(Error::from_errno(Errno::EINVAL)).into()); } } @@ -279,14 +279,14 @@ pub fn validate(conf: &Config) -> Result<()> { lazy_static::initialize(&SYSCTLS); let oci = conf.spec.as_ref().unwrap(); - if oci.Linux.is_none() { + if oci.linux.is_none() { return Err(ErrorKind::Nix(Error::from_errno(Errno::EINVAL)).into()); } - if oci.Root.is_none() { + if oci.root.is_none() { return Err(ErrorKind::Nix(Error::from_errno(Errno::EINVAL)).into()); } - let root = oci.Root.get_ref().Path.as_str(); + let root = oci.root.as_ref().unwrap().path.as_str(); rootfs(root)?; network(oci)?; diff --git a/src/agent/src/device.rs b/src/agent/src/device.rs index bbd97cc6b..72823a5c4 100644 --- a/src/agent/src/device.rs +++ b/src/agent/src/device.rs @@ -15,7 +15,7 @@ use crate::mount::{DRIVERBLKTYPE, DRIVERMMIOBLKTYPE, DRIVERNVDIMMTYPE, DRIVERSCS use crate::sandbox::Sandbox; use crate::{AGENT_CONFIG, GLOBAL_DEVICE_WATCHER}; use protocols::agent::Device; -use protocols::oci::Spec; +use oci::Spec; use rustjail::errors::*; // Convenience macro to obtain the scope logger @@ -207,7 +207,7 @@ fn update_spec_device_list(device: &Device, spec: &mut Spec) -> Result<()> { .into()); } - let linux = match spec.Linux.as_mut() { + let linux = match spec.linux.as_mut() { None => { return Err( ErrorKind::ErrorCode("Spec didn't container linux field".to_string()).into(), @@ -232,14 +232,14 @@ fn update_spec_device_list(device: &Device, spec: &mut Spec) -> Result<()> { "got the device: dev_path: {}, major: {}, minor: {}\n", &device.vm_path, major_id, minor_id ); - let devices = linux.Devices.as_mut_slice(); + let devices = linux.devices.as_mut_slice(); for dev in devices.iter_mut() { - if dev.Path == device.container_path { - let host_major = dev.Major; - let host_minor = dev.Minor; + if dev.path == device.container_path { + let host_major = dev.major; + let host_minor = dev.minor; - dev.Major = major_id as i64; - dev.Minor = minor_id as i64; + dev.major = major_id as i64; + dev.minor = minor_id as i64; info!( sl!(), @@ -252,12 +252,12 @@ fn update_spec_device_list(device: &Device, spec: &mut Spec) -> Result<()> { // Resources must be updated since they are used to identify the // device in the devices cgroup. - if let Some(res) = linux.Resources.as_mut() { - let ds = res.Devices.as_mut_slice(); + if let Some(res) = linux.resources.as_mut() { + let ds = res.devices.as_mut_slice(); for d in ds.iter_mut() { - if d.Major == host_major && d.Minor == host_minor { - d.Major = major_id as i64; - d.Minor = minor_id as i64; + if d.major == Some(host_major) && d.minor == Some(host_minor) { + d.major = Some(major_id as i64); + d.minor = Some(minor_id as i64); info!( sl!(), diff --git a/src/agent/src/grpc.rs b/src/agent/src/grpc.rs index b33615643..b010b449c 100644 --- a/src/agent/src/grpc.rs +++ b/src/agent/src/grpc.rs @@ -8,6 +8,7 @@ use grpcio::{EnvBuilder, Server, ServerBuilder}; use grpcio::{RpcStatus, RpcStatusCode}; use std::sync::{Arc, Mutex}; +use oci::{LinuxNamespace, Spec}; use protobuf::{RepeatedField, SingularPtrField}; use protocols::agent::CopyFileRequest; use protocols::agent::{ @@ -16,7 +17,6 @@ use protocols::agent::{ }; use protocols::empty::Empty; use protocols::health::{HealthCheckResponse, HealthCheckResponse_ServingStatus}; -use protocols::oci::{LinuxNamespace, Spec}; use rustjail; use rustjail::container::{BaseContainer, LinuxContainer}; use rustjail::errors::*; @@ -81,8 +81,8 @@ impl agentService { let sandbox; let mut s; - let oci = match oci_spec.as_mut() { - Some(spec) => spec, + let mut oci = match oci_spec.as_mut() { + Some(spec) => rustjail::grpc_to_oci(spec), None => { error!(sl!(), "no oci spec in the create container request!"); return Err( @@ -103,7 +103,7 @@ impl agentService { // updates the devices listed in the OCI spec, so that they actually // match real devices inside the VM. This step is necessary since we // cannot predict everything from the caller. - add_devices(&req.devices.to_vec(), oci, &self.sandbox)?; + add_devices(&req.devices.to_vec(), &mut oci, &self.sandbox)?; // Both rootfs and volumes (invoked with --volume for instance) will // be processed the same way. The idea is to always mount any provided @@ -119,11 +119,11 @@ impl agentService { s.container_mounts.insert(cid.clone(), m); } - update_container_namespaces(&s, oci)?; + update_container_namespaces(&s, &mut oci)?; // write spec to bundle path, hooks might // read ocispec - let olddir = setup_bundle(oci)?; + let olddir = setup_bundle(&oci)?; // restore the cwd for kata-agent process. defer!(unistd::chdir(&olddir).unwrap()); @@ -141,8 +141,14 @@ impl agentService { LinuxContainer::new(cid.as_str(), CONTAINER_BASE, opts, &sl!())?; let pipe_size = AGENT_CONFIG.read().unwrap().container_pipe_size; - let p = if oci.Process.is_some() { - let tp = Process::new(&sl!(), oci.get_Process(), eid.as_str(), true, pipe_size)?; + let p = if oci.process.is_some() { + let tp = Process::new( + &sl!(), + &oci.process.as_ref().unwrap(), + eid.as_str(), + true, + pipe_size, + )?; tp } else { info!(sl!(), "no process configurations!"); @@ -270,14 +276,15 @@ impl agentService { let mut sandbox = s.lock().unwrap(); // ignore string_user, not sure what it is - let ocip = if req.process.is_some() { + let process = if req.process.is_some() { req.process.as_ref().unwrap() } else { return Err(ErrorKind::Nix(nix::Error::from_errno(nix::errno::Errno::EINVAL)).into()); }; let pipe_size = AGENT_CONFIG.read().unwrap().container_pipe_size; - let p = Process::new(&sl!(), ocip, exec_id.as_str(), false, pipe_size)?; + let ocip = rustjail::process_grpc_to_oci(process); + let p = Process::new(&sl!(), &ocip, exec_id.as_str(), false, pipe_size)?; let ctr = match sandbox.get_container(cid.as_str()) { Some(v) => v, @@ -732,7 +739,7 @@ impl protocols::agent_grpc::AgentService for agentService { sink: ::grpcio::UnarySink, ) { let cid = req.container_id.clone(); - let res = req.resources.clone(); + let res = req.resources; let s = Arc::clone(&self.sandbox); let mut sandbox = s.lock().unwrap(); @@ -742,7 +749,8 @@ impl protocols::agent_grpc::AgentService for agentService { let resp = Empty::new(); if res.is_some() { - match ctr.set(res.unwrap()) { + let ociRes = rustjail::resources_grpc_to_oci(&res.unwrap()); + match ctr.set(ociRes) { Err(_e) => { let f = sink .fail(RpcStatus::new( @@ -1605,7 +1613,7 @@ pub fn start>(sandbox: Arc>, host: S, port: u16) // sense to rely on the namespace path provided by the host since namespaces // are different inside the guest. fn update_container_namespaces(sandbox: &Sandbox, spec: &mut Spec) -> Result<()> { - let linux = match spec.Linux.as_mut() { + let linux = match spec.linux.as_mut() { None => { return Err( ErrorKind::ErrorCode("Spec didn't container linux field".to_string()).into(), @@ -1616,26 +1624,26 @@ fn update_container_namespaces(sandbox: &Sandbox, spec: &mut Spec) -> Result<()> let mut pidNs = false; - let namespaces = linux.Namespaces.as_mut_slice(); + let namespaces = linux.namespaces.as_mut_slice(); for namespace in namespaces.iter_mut() { - if namespace.Type == NSTYPEPID { + if namespace.r#type == NSTYPEPID { pidNs = true; continue; } - if namespace.Type == NSTYPEIPC { - namespace.Path = sandbox.shared_ipcns.path.clone(); + if namespace.r#type == NSTYPEIPC { + namespace.path = sandbox.shared_ipcns.path.clone(); continue; } - if namespace.Type == NSTYPEUTS { - namespace.Path = sandbox.shared_utsns.path.clone(); + if namespace.r#type == NSTYPEUTS { + namespace.path = sandbox.shared_utsns.path.clone(); continue; } } if !pidNs && !sandbox.sandbox_pid_ns { - let mut pid_ns = LinuxNamespace::new(); - pid_ns.set_Type(NSTYPEPID.to_string()); - linux.Namespaces.push(pid_ns); + let mut pid_ns = LinuxNamespace::default(); + pid_ns.r#type = NSTYPEPID.to_string(); + linux.namespaces.push(pid_ns); } Ok(()) @@ -1764,24 +1772,23 @@ fn do_copy_file(req: &CopyFileRequest) -> Result<()> { Ok(()) } -fn setup_bundle(gspec: &Spec) -> Result { - if gspec.Root.is_none() { +fn setup_bundle(spec: &Spec) -> Result { + if spec.root.is_none() { return Err(nix::Error::Sys(Errno::EINVAL).into()); } - let root = gspec.Root.as_ref().unwrap().Path.as_str(); + let root = spec.root.as_ref().unwrap().path.as_str(); let rootfs = fs::canonicalize(root)?; let bundle_path = rootfs.parent().unwrap().to_str().unwrap(); let config = format!("{}/{}", bundle_path, "config.json"); - let oci = rustjail::grpc_to_oci(gspec); info!( sl!(), "{:?}", - oci.process.as_ref().unwrap().console_size.as_ref() + spec.process.as_ref().unwrap().console_size.as_ref() ); - let _ = oci.save(config.as_str()); + let _ = spec.save(config.as_str()); let olddir = unistd::getcwd().chain_err(|| "cannot getcwd")?; unistd::chdir(bundle_path)?; From ded27f48d5f8e70c1d05769857fac1ed0579ef4d Mon Sep 17 00:00:00 2001 From: "fupan.lfp" Date: Thu, 19 Mar 2020 20:12:58 +0800 Subject: [PATCH 2/4] oci: add Default and Clone to oci spec objects Add the clone and default feature to oci spec objects. Signed-off-by: fupan.lfp --- src/agent/oci/src/lib.rs | 94 ++++++++++++++++++++-------------------- src/agent/src/sandbox.rs | 14 +++--- 2 files changed, 54 insertions(+), 54 deletions(-) diff --git a/src/agent/oci/src/lib.rs b/src/agent/oci/src/lib.rs index ea10f79ed..19870db51 100644 --- a/src/agent/oci/src/lib.rs +++ b/src/agent/oci/src/lib.rs @@ -27,7 +27,7 @@ where *d == T::default() } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct Spec { #[serde( default, @@ -69,7 +69,7 @@ impl Spec { pub type LinuxRlimit = POSIXRlimit; -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct Process { #[serde(default)] pub terminal: bool, @@ -112,7 +112,7 @@ pub struct Process { pub selinux_label: String, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct LinuxCapabilities { #[serde(default, skip_serializing_if = "Vec::is_empty")] pub bounding: Vec, @@ -126,7 +126,7 @@ pub struct LinuxCapabilities { pub ambient: Vec, } -#[derive(Default, Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct Box { #[serde(default)] pub height: u32, @@ -134,7 +134,7 @@ pub struct Box { pub width: u32, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct User { #[serde(default)] pub uid: u32, @@ -150,7 +150,7 @@ pub struct User { pub username: String, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct Root { #[serde(default, skip_serializing_if = "String::is_empty")] pub path: String, @@ -158,7 +158,7 @@ pub struct Root { pub readonly: bool, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct Mount { #[serde(default)] pub destination: String, @@ -170,7 +170,7 @@ pub struct Mount { pub options: Vec, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct Hook { #[serde(default, skip_serializing_if = "String::is_empty")] pub path: String, @@ -182,7 +182,7 @@ pub struct Hook { pub timeout: Option, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct Hooks { #[serde(default, skip_serializing_if = "Vec::is_empty")] pub prestart: Vec, @@ -192,7 +192,7 @@ pub struct Hooks { pub poststop: Vec, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct Linux { #[serde(default, rename = "uidMappings", skip_serializing_if = "Vec::is_empty")] pub uid_mappings: Vec, @@ -238,7 +238,7 @@ pub struct Linux { pub intel_rdt: Option, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct LinuxNamespace { #[serde(default, skip_serializing_if = "String::is_empty")] pub r#type: LinuxNamespaceType, @@ -256,7 +256,7 @@ pub const USERNAMESPACE: &str = "user"; pub const UTSNAMESPACE: &str = "uts"; pub const CGROUPNAMESPACE: &str = "cgroup"; -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct LinuxIDMapping { #[serde(default, rename = "containerID")] pub container_id: u32, @@ -266,7 +266,7 @@ pub struct LinuxIDMapping { pub size: u32, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct POSIXRlimit { #[serde(default)] pub r#type: String, @@ -276,7 +276,7 @@ pub struct POSIXRlimit { pub soft: u64, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct LinuxHugepageLimit { #[serde(default, rename = "pageSize", skip_serializing_if = "String::is_empty")] pub page_size: String, @@ -284,7 +284,7 @@ pub struct LinuxHugepageLimit { pub limit: u64, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct LinuxInterfacePriority { #[serde(default, skip_serializing_if = "String::is_empty")] pub name: String, @@ -292,7 +292,7 @@ pub struct LinuxInterfacePriority { pub priority: u32, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct LinuxBlockIODevice { #[serde(default)] pub major: i64, @@ -300,7 +300,7 @@ pub struct LinuxBlockIODevice { pub minor: i64, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct LinuxWeightDevice { pub blk: LinuxBlockIODevice, #[serde(default, skip_serializing_if = "Option::is_none")] @@ -313,14 +313,14 @@ pub struct LinuxWeightDevice { pub leaf_weight: Option, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct LinuxThrottleDevice { pub blk: LinuxBlockIODevice, #[serde(default)] pub rate: u64, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct LinuxBlockIO { #[serde(default, skip_serializing_if = "Option::is_none")] pub weight: Option, @@ -362,7 +362,7 @@ pub struct LinuxBlockIO { pub throttle_write_iops_device: Vec, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct LinuxMemory { #[serde(default, skip_serializing_if = "Option::is_none")] pub limit: Option, @@ -384,7 +384,7 @@ pub struct LinuxMemory { pub disable_oom_killer: Option, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct LinuxCPU { #[serde(default, skip_serializing_if = "Option::is_none")] pub shares: Option, @@ -410,13 +410,13 @@ pub struct LinuxCPU { pub mems: String, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct LinuxPids { #[serde(default)] pub limit: i64, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct LinuxNetwork { #[serde(default, skip_serializing_if = "Option::is_none", rename = "classID")] pub class_id: Option, @@ -424,7 +424,7 @@ pub struct LinuxNetwork { pub priorities: Vec, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct LinuxRdma { #[serde( default, @@ -440,7 +440,7 @@ pub struct LinuxRdma { pub hca_objects: Option, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct LinuxResources { #[serde(default, skip_serializing_if = "Vec::is_empty")] pub devices: Vec, @@ -464,7 +464,7 @@ pub struct LinuxResources { pub rdma: HashMap, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct LinuxDevice { #[serde(default, skip_serializing_if = "String::is_empty")] pub path: String, @@ -482,7 +482,7 @@ pub struct LinuxDevice { pub gid: Option, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct LinuxDeviceCgroup { #[serde(default)] pub allow: bool, @@ -496,7 +496,7 @@ pub struct LinuxDeviceCgroup { pub access: String, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct Solaris { #[serde(default, skip_serializing_if = "String::is_empty")] pub milestone: String, @@ -520,13 +520,13 @@ pub struct Solaris { pub capped_memory: Option, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct SolarisCappedCPU { #[serde(default, skip_serializing_if = "String::is_empty")] pub ncpus: String, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct SolarisCappedMemory { #[serde(default, skip_serializing_if = "String::is_empty")] pub physical: String, @@ -534,7 +534,7 @@ pub struct SolarisCappedMemory { pub swap: String, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct SolarisAnet { #[serde(default, skip_serializing_if = "String::is_empty", rename = "linkname")] pub link_name: String, @@ -572,7 +572,7 @@ pub struct SolarisAnet { pub mac_address: String, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct Windows { #[serde( default, @@ -594,7 +594,7 @@ pub struct Windows { pub network: Option, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct WindowsResources { #[serde(default, skip_serializing_if = "Option::is_none")] pub memory: Option, @@ -604,13 +604,13 @@ pub struct WindowsResources { pub storage: Option, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct WindowsMemoryResources { #[serde(default, skip_serializing_if = "Option::is_none")] pub limit: Option, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct WindowsCPUResources { #[serde(default, skip_serializing_if = "Option::is_none")] pub count: Option, @@ -620,7 +620,7 @@ pub struct WindowsCPUResources { pub maximum: Option, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct WindowsStorageResources { #[serde(default, skip_serializing_if = "Option::is_none")] pub iops: Option, @@ -634,7 +634,7 @@ pub struct WindowsStorageResources { pub sandbox_size: Option, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct WindowsNetwork { #[serde( default, @@ -658,7 +658,7 @@ pub struct WindowsNetwork { pub network_shared_container_name: String, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct WindowsHyperV { #[serde( default, @@ -668,14 +668,14 @@ pub struct WindowsHyperV { pub utility_vm_path: String, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct VM { pub hypervisor: VMHypervisor, pub kernel: VMKernel, pub image: VMImage, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct VMHypervisor { #[serde(default)] pub path: String, @@ -683,7 +683,7 @@ pub struct VMHypervisor { pub parameters: String, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct VMKernel { #[serde(default)] pub path: String, @@ -693,7 +693,7 @@ pub struct VMKernel { pub initrd: String, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct VMImage { #[serde(default)] pub path: String, @@ -701,7 +701,7 @@ pub struct VMImage { pub format: String, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct LinuxSeccomp { #[serde(default, rename = "defaultAction")] pub default_action: LinuxSeccompAction, @@ -750,7 +750,7 @@ pub const OPGREATEREQUAL: &str = "SCMP_CMP_GE"; pub const OPGREATERTHAN: &str = "SCMP_CMP_GT"; pub const OPMASKEDEQUAL: &str = "SCMP_CMP_MASKED_EQ"; -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct LinuxSeccompArg { #[serde(default)] pub index: u32, @@ -762,7 +762,7 @@ pub struct LinuxSeccompArg { pub op: LinuxSeccompOperator, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct LinuxSyscall { #[serde(default, skip_serializing_if = "Vec::is_empty")] pub names: Vec, @@ -772,7 +772,7 @@ pub struct LinuxSyscall { pub args: Vec, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct LinuxIntelRdt { #[serde( default, @@ -782,7 +782,7 @@ pub struct LinuxIntelRdt { pub l3_cache_schema: String, } -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq)] pub struct State { #[serde( default, diff --git a/src/agent/src/sandbox.rs b/src/agent/src/sandbox.rs index b7e85b29e..414c2a745 100644 --- a/src/agent/src/sandbox.rs +++ b/src/agent/src/sandbox.rs @@ -282,7 +282,7 @@ mod tests { use super::Sandbox; use crate::{mount::BareMount, skip_if_not_root}; use nix::mount::MsFlags; - use protocols::oci::{Linux, Root, Spec}; + use oci::{Linux, Root, Spec}; use rustjail::container::LinuxContainer; use rustjail::specconv::CreateOpts; use slog::Logger; @@ -489,13 +489,13 @@ mod tests { } fn create_dummy_opts() -> CreateOpts { - let mut root = Root::new(); - root.Path = String::from("/"); + let mut root = Root::default(); + root.path = String::from("/"); - let linux = Linux::new(); - let mut spec = Spec::new(); - spec.Root = Some(root).into(); - spec.Linux = Some(linux).into(); + let linux = Linux::default(); + let mut spec = Spec::default(); + spec.root = Some(root).into(); + spec.linux = Some(linux).into(); CreateOpts { cgroup_name: "".to_string(), From e56b10f835114a83403d0ecf3db162822bd97b30 Mon Sep 17 00:00:00 2001 From: "fupan.lfp" Date: Thu, 19 Mar 2020 20:22:15 +0800 Subject: [PATCH 3/4] rustjail: remove the unused imported crates remove the unused imported crates Signed-off-by: fupan.lfp --- src/agent/rustjail/src/container.rs | 2 +- src/agent/rustjail/src/mount.rs | 1 - src/agent/rustjail/src/validator.rs | 1 - src/agent/src/main.rs | 1 - 4 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/agent/rustjail/src/container.rs b/src/agent/rustjail/src/container.rs index 188d7e680..4629529c4 100644 --- a/src/agent/rustjail/src/container.rs +++ b/src/agent/rustjail/src/container.rs @@ -44,7 +44,7 @@ use nix::unistd::{self, ForkResult, Gid, Pid, Uid}; use nix::Error; use libc; -use protobuf::{CachedSize, SingularPtrField, UnknownFields}; +use protobuf::SingularPtrField; use oci::State as OCIState; use std::collections::HashMap; diff --git a/src/agent/rustjail/src/mount.rs b/src/agent/rustjail/src/mount.rs index 499d65b25..aff77b19a 100644 --- a/src/agent/rustjail/src/mount.rs +++ b/src/agent/rustjail/src/mount.rs @@ -26,7 +26,6 @@ use crate::errors::*; use lazy_static; use std::string::ToString; -use protobuf::{CachedSize, RepeatedField, UnknownFields}; use slog::Logger; // Info reveals information about a particular mounted filesystem. This diff --git a/src/agent/rustjail/src/validator.rs b/src/agent/rustjail/src/validator.rs index 54a4a1c25..76777eb34 100644 --- a/src/agent/rustjail/src/validator.rs +++ b/src/agent/rustjail/src/validator.rs @@ -4,7 +4,6 @@ use lazy_static; use nix::errno::Errno; use nix::Error; use oci::{LinuxIDMapping, LinuxNamespace, Spec}; -use protobuf::RepeatedField; use std::collections::HashMap; use std::path::{Component, PathBuf}; diff --git a/src/agent/src/main.rs b/src/agent/src/main.rs index 6829c22b3..dab89daa0 100644 --- a/src/agent/src/main.rs +++ b/src/agent/src/main.rs @@ -16,7 +16,6 @@ extern crate regex; extern crate rustjail; extern crate serde_json; extern crate signal_hook; -#[macro_use] extern crate scan_fmt; extern crate oci; From c1b6838e254896213c1a3495ac1ee0245b56ee2b Mon Sep 17 00:00:00 2001 From: "fupan.lfp" Date: Fri, 20 Mar 2020 16:08:37 +0800 Subject: [PATCH 4/4] rustjail: refactoring the way of creating container process In the previous implementation, create a container process by forking the parent process as the container process, and then at the forked child process do much more setting, such as rootfs mounting, drop capabilities and so on, at last exec the container entry cmd to switch into container process. But since the parent is a muti thread process, which would cause a dead lock in the forked child. For example, if one of the parent process's thread do some malloc operation, which would take a mutex lock, and at the same time, the parent forked a child process, since the mutex lock status would be inherited by the child process but there's no chance to release the lock in the child since the child process only has a single thread which would meet a dead lock if it would do some malloc operation. Thus, the new implementation would do exec directly after forked and then do the setting in the exec process. Of course, this requred a data communication between parent and child since the child cannot depends on the shared memory by fork way. Fixes: #166 Fixes: #133 Signed-off-by: fupan.lfp --- src/agent/rustjail/src/capabilities.rs | 24 +- src/agent/rustjail/src/cgroups/fs/mod.rs | 23 +- src/agent/rustjail/src/container.rs | 1401 +++++++++++----------- src/agent/rustjail/src/errors.rs | 4 +- src/agent/rustjail/src/lib.rs | 8 +- src/agent/rustjail/src/mount.rs | 69 +- src/agent/rustjail/src/process.rs | 58 +- src/agent/rustjail/src/sync.rs | 177 +++ src/agent/rustjail/src/validator.rs | 6 + src/agent/src/device.rs | 2 +- src/agent/src/grpc.rs | 4 +- src/agent/src/main.rs | 8 +- 12 files changed, 1007 insertions(+), 777 deletions(-) create mode 100644 src/agent/rustjail/src/sync.rs diff --git a/src/agent/rustjail/src/capabilities.rs b/src/agent/rustjail/src/capabilities.rs index 5a156ff5e..6a2f1201a 100644 --- a/src/agent/rustjail/src/capabilities.rs +++ b/src/agent/rustjail/src/capabilities.rs @@ -9,10 +9,12 @@ use lazy_static; use crate::errors::*; +use crate::log_child; +use crate::sync::write_count; use caps::{self, CapSet, Capability, CapsHashSet}; use oci::LinuxCapabilities; -use slog::Logger; use std::collections::HashMap; +use std::os::unix::io::RawFd; lazy_static! { pub static ref CAPSMAP: HashMap = { @@ -76,14 +78,14 @@ lazy_static! { }; } -fn to_capshashset(logger: &Logger, caps: &[String]) -> CapsHashSet { +fn to_capshashset(cfd_log: RawFd, caps: &[String]) -> CapsHashSet { let mut r = CapsHashSet::new(); for cap in caps.iter() { let c = CAPSMAP.get(cap); if c.is_none() { - warn!(logger, "{} is not a cap", cap); + log_child!(cfd_log, "{} is not a cap", cap); continue; } @@ -98,37 +100,35 @@ pub fn reset_effective() -> Result<()> { Ok(()) } -pub fn drop_priviledges(logger: &Logger, caps: &LinuxCapabilities) -> Result<()> { - let logger = logger.new(o!("subsystem" => "capabilities")); - +pub fn drop_priviledges(cfd_log: RawFd, caps: &LinuxCapabilities) -> Result<()> { let all = caps::all(); - for c in all.difference(&to_capshashset(&logger, caps.bounding.as_ref())) { + for c in all.difference(&to_capshashset(cfd_log, caps.bounding.as_ref())) { caps::drop(None, CapSet::Bounding, *c)?; } caps::set( None, CapSet::Effective, - to_capshashset(&logger, caps.effective.as_ref()), + to_capshashset(cfd_log, caps.effective.as_ref()), )?; caps::set( None, CapSet::Permitted, - to_capshashset(&logger, caps.permitted.as_ref()), + to_capshashset(cfd_log, caps.permitted.as_ref()), )?; caps::set( None, CapSet::Inheritable, - to_capshashset(&logger, caps.inheritable.as_ref()), + to_capshashset(cfd_log, caps.inheritable.as_ref()), )?; if let Err(_) = caps::set( None, CapSet::Ambient, - to_capshashset(&logger, caps.ambient.as_ref()), + to_capshashset(cfd_log, caps.ambient.as_ref()), ) { - warn!(logger, "failed to set ambient capability"); + log_child!(cfd_log, "failed to set ambient capability"); } Ok(()) diff --git a/src/agent/rustjail/src/cgroups/fs/mod.rs b/src/agent/rustjail/src/cgroups/fs/mod.rs index 336374292..aba62822f 100644 --- a/src/agent/rustjail/src/cgroups/fs/mod.rs +++ b/src/agent/rustjail/src/cgroups/fs/mod.rs @@ -2,7 +2,6 @@ // // SPDX-License-Identifier: Apache-2.0 // - use crate::cgroups::FreezerState; use crate::cgroups::Manager as CgroupManager; use crate::container::DEFAULT_DEVICES; @@ -19,6 +18,7 @@ use protocols::agent::{ use regex::Regex; use std::collections::HashMap; use std::fs; +use std::path::Path; // Convenience macro to obtain the scope logger macro_rules! sl { @@ -207,7 +207,7 @@ fn parse_size(s: &str, m: &HashMap) -> Result { fn custom_size(mut size: f64, base: f64, m: &Vec) -> String { let mut i = 0; - while size > base { + while size >= base && i < m.len() - 1 { size /= base; i += 1; } @@ -307,7 +307,6 @@ where T: ToString, { let p = format!("{}/{}", dir, file); - info!(sl!(), "{}", p.as_str()); fs::write(p.as_str(), v.to_string().as_bytes())?; Ok(()) } @@ -936,6 +935,11 @@ fn get_blkio_stat(dir: &str, file: &str) -> Result = l.split(' ').collect(); @@ -1224,7 +1228,7 @@ fn get_all_procs(dir: &str) -> Result> { Ok(m) } -#[derive(Debug, Clone)] +#[derive(Serialize, Deserialize, Debug, Clone)] pub struct Manager { pub paths: HashMap, pub mounts: HashMap, @@ -1238,7 +1242,6 @@ pub const FROZEN: &'static str = "FROZEN"; impl CgroupManager for Manager { fn apply(&self, pid: pid_t) -> Result<()> { for (key, value) in &self.paths { - info!(sl!(), "apply cgroup {}", key); apply(value, pid)?; } @@ -1249,7 +1252,6 @@ impl CgroupManager for Manager { for (key, value) in &self.paths { let _ = fs::create_dir_all(value); let sub = get_subsystem(key)?; - info!(sl!(), "setting cgroup {}", key); sub.set(value, spec, update)?; } @@ -1301,9 +1303,16 @@ impl CgroupManager for Manager { }; // BlkioStats + // note that virtiofs has no blkio stats info!(sl!(), "blkio_stats"); let blkio_stats = if self.paths.get("blkio").is_some() { - SingularPtrField::some(Blkio().get_stats(self.paths.get("blkio").unwrap())?) + match Blkio().get_stats(self.paths.get("blkio").unwrap()) { + Ok(stat) => SingularPtrField::some(stat), + Err(e) => { + warn!(sl!(), "failed to get blkio stats"); + SingularPtrField::none() + } + } } else { SingularPtrField::none() }; diff --git a/src/agent/rustjail/src/container.rs b/src/agent/rustjail/src/container.rs index 4629529c4..17b9fefca 100644 --- a/src/agent/rustjail/src/container.rs +++ b/src/agent/rustjail/src/container.rs @@ -8,7 +8,6 @@ use oci::{Hook, Linux, LinuxNamespace, LinuxResources, POSIXRlimit, Spec}; use serde_json; use std::ffi::{CStr, CString}; use std::fs; -use std::mem; use std::os::unix::io::RawFd; use std::path::{Path, PathBuf}; use std::time::SystemTime; @@ -17,14 +16,16 @@ use libc::pid_t; use oci::{LinuxDevice, LinuxIDMapping}; use std::clone::Clone; use std::fmt::Display; -use std::process::Command; +use std::process::{Child, Command}; // use crate::configs::namespaces::{NamespaceType}; use crate::cgroups::Manager as CgroupManager; use crate::process::Process; // use crate::intelrdt::Manager as RdtManager; use crate::errors::*; +use crate::log_child; use crate::specconv::CreateOpts; +use crate::sync::*; // use crate::stats::Stats; use crate::capabilities::{self, CAPSMAP}; use crate::cgroups::fs::{self as fscgroup, Manager as FsManager}; @@ -34,12 +35,11 @@ use protocols::agent::StatsContainerResponse; use nix::errno::Errno; use nix::fcntl::{self, OFlag}; +use nix::fcntl::{FcntlArg, FdFlag}; use nix::pty; use nix::sched::{self, CloneFlags}; use nix::sys::signal::{self, Signal}; -use nix::sys::socket::{self, ControlMessage, ControlMessageOwned, MsgFlags}; use nix::sys::stat::{self, Mode}; -use nix::sys::uio::IoVec; use nix::unistd::{self, ForkResult, Gid, Pid, Uid}; use nix::Error; @@ -48,12 +48,23 @@ use protobuf::SingularPtrField; use oci::State as OCIState; use std::collections::HashMap; +use std::io::BufRead; +use std::io::BufReader; +use std::os::unix::io::FromRawFd; use slog::{debug, info, o, Logger}; const STATE_FILENAME: &'static str = "state.json"; const EXEC_FIFO_FILENAME: &'static str = "exec.fifo"; const VER_MARKER: &'static str = "1.2.5"; +const PID_NS_PATH: &str = "/proc/self/ns/pid"; + +const INIT: &str = "INIT"; +const NO_PIVOT: &str = "NO_PIVOT"; +const CRFD_FD: &str = "CRFD_FD"; +const CWFD_FD: &str = "CWFD_FD"; +const CLOG_FD: &str = "CLOG_FD"; +const FIFO_FD: &str = "FIFO_FD"; type Status = Option; pub type Config = CreateOpts; @@ -240,6 +251,321 @@ pub trait Container: BaseContainer { // fn notify_memory_pressure(&self, lvl: PressureLevel) -> Result<(Sender, Receiver)>; } +pub fn init_child() { + let cwfd = std::env::var(CWFD_FD).unwrap().parse::().unwrap(); + let cfd_log = std::env::var(CLOG_FD).unwrap().parse::().unwrap(); + match do_init_child(cwfd) { + Ok(_) => (), + Err(e) => { + log_child!(cfd_log, "child exit: {:?}", e); + write_sync(cwfd, SYNC_FAILED, format!("{:?}", e).as_str()); + return; + } + } + + std::process::exit(-1); +} + +fn do_init_child(cwfd: RawFd) -> Result<()> { + lazy_static::initialize(&NAMESPACES); + lazy_static::initialize(&DEFAULT_DEVICES); + lazy_static::initialize(&RLIMITMAPS); + lazy_static::initialize(&CAPSMAP); + + let init = std::env::var(INIT)?.eq(format!("{}", true).as_str()); + let no_pivot = std::env::var(NO_PIVOT)?.eq(format!("{}", true).as_str()); + let crfd = std::env::var(CRFD_FD)?.parse::().unwrap(); + let cfd_log = std::env::var(CLOG_FD)?.parse::().unwrap(); + + log_child!(cfd_log, "child process start run"); + let buf = read_sync(crfd)?; + let spec_str = std::str::from_utf8(&buf)?; + let spec: oci::Spec = serde_json::from_str(spec_str)?; + + log_child!(cfd_log, "notify parent to send oci process"); + write_sync(cwfd, SYNC_SUCCESS, "")?; + + let buf = read_sync(crfd)?; + let process_str = std::str::from_utf8(&buf)?; + let mut oci_process: oci::Process = serde_json::from_str(process_str)?; + log_child!(cfd_log, "notify parent to send cgroup manager"); + write_sync(cwfd, SYNC_SUCCESS, "")?; + + let buf = read_sync(crfd)?; + let cm_str = std::str::from_utf8(&buf)?; + let cm: FsManager = serde_json::from_str(cm_str)?; + + let p = if spec.process.is_some() { + spec.process.as_ref().unwrap() + } else { + return Err(ErrorKind::ErrorCode("didn't find process in Spec".to_string()).into()); + }; + + if spec.linux.is_none() { + return Err(ErrorKind::ErrorCode("no linux config".to_string()).into()); + } + let linux = spec.linux.as_ref().unwrap(); + + // get namespace vector to join/new + let nses = get_namespaces(&linux)?; + + let mut userns = false; + let mut to_new = CloneFlags::empty(); + let mut to_join = Vec::new(); + + for ns in &nses { + let s = NAMESPACES.get(&ns.r#type.as_str()); + if s.is_none() { + return Err(ErrorKind::ErrorCode("invalid ns type".to_string()).into()); + } + let s = s.unwrap(); + + if ns.path.is_empty() { + // skip the pidns since it has been done in parent process. + if *s != CloneFlags::CLONE_NEWPID { + to_new.set(*s, true); + } + } else { + let fd = match fcntl::open(ns.path.as_str(), OFlag::O_CLOEXEC, Mode::empty()) { + Ok(v) => v, + Err(e) => { + log_child!( + cfd_log, + "cannot open type: {} path: {}", + ns.r#type.clone(), + ns.path.clone() + ); + log_child!(cfd_log, "error is : {}", e.as_errno().unwrap().desc()); + return Err(e.into()); + } + }; + + if *s != CloneFlags::CLONE_NEWPID { + to_join.push((*s, fd)); + } + } + } + + if to_new.contains(CloneFlags::CLONE_NEWUSER) { + userns = true; + } + + if p.oom_score_adj.is_some() { + log_child!(cfd_log, "write oom score {}", p.oom_score_adj.unwrap()); + fs::write( + "/proc/self/oom_score_adj", + p.oom_score_adj.unwrap().to_string().as_bytes(), + )?; + } + + // set rlimit + for rl in p.rlimits.iter() { + log_child!(cfd_log, "set resource limit: {:?}", rl); + setrlimit(rl)?; + } + + if userns { + log_child!(cfd_log, "enter new user namespace"); + sched::unshare(CloneFlags::CLONE_NEWUSER)?; + } + + log_child!(cfd_log, "notify parent unshare user ns completed"); + // notify parent unshare user ns completed. + write_sync(cwfd, SYNC_SUCCESS, "")?; + // wait parent to setup user id mapping. + log_child!(cfd_log, "wait parent to setup user id mapping"); + read_sync(crfd)?; + + if userns { + log_child!(cfd_log, "setup user id"); + setid(Uid::from_raw(0), Gid::from_raw(0))?; + } + + let mut mount_fd = -1; + let mut bind_device = false; + for (s, fd) in to_join { + if s == CloneFlags::CLONE_NEWNS { + mount_fd = fd; + continue; + } + + log_child!(cfd_log, "join namespace {:?}", s); + if let Err(e) = sched::setns(fd, s) { + if s == CloneFlags::CLONE_NEWUSER { + if e.as_errno().unwrap() != Errno::EINVAL { + write_sync(cwfd, SYNC_FAILED, format!("{:?}", e).as_str()); + return Err(e.into()); + } + } else { + write_sync(cwfd, SYNC_FAILED, format!("{:?}", e).as_str()); + return Err(e.into()); + } + } + unistd::close(fd)?; + + if s == CloneFlags::CLONE_NEWUSER { + setid(Uid::from_raw(0), Gid::from_raw(0))?; + bind_device = true; + } + } + + sched::unshare(to_new & !CloneFlags::CLONE_NEWUSER)?; + + if userns { + bind_device = true; + } + + if to_new.contains(CloneFlags::CLONE_NEWUTS) { + unistd::sethostname(&spec.hostname)?; + } + + let rootfs = spec.root.as_ref().unwrap().path.as_str(); + log_child!(cfd_log, "setup rootfs {}", rootfs); + let root = fs::canonicalize(rootfs)?; + let rootfs = root.to_str().unwrap(); + + if to_new.contains(CloneFlags::CLONE_NEWNS) { + // setup rootfs + mount::init_rootfs(cfd_log, &spec, &cm.paths, &cm.mounts, bind_device)?; + } + + if init { + // notify parent to run prestart hooks + write_sync(cwfd, SYNC_SUCCESS, "")?; + // wait parent run prestart hooks + let _ = read_sync(crfd)?; + } + + if mount_fd != -1 { + sched::setns(mount_fd, CloneFlags::CLONE_NEWNS)?; + unistd::close(mount_fd)?; + } + + if to_new.contains(CloneFlags::CLONE_NEWNS) { + // unistd::chroot(rootfs)?; + if no_pivot { + mount::ms_move_root(rootfs)?; + } else { + // pivot root + mount::pivot_rootfs(rootfs)?; + } + + // setup sysctl + set_sysctls(&linux.sysctl)?; + unistd::chdir("/")?; + } + + if to_new.contains(CloneFlags::CLONE_NEWNS) { + mount::finish_rootfs(cfd_log, &spec)?; + } + + if !oci_process.cwd.is_empty() { + unistd::chdir(oci_process.cwd.as_str())?; + } + + let guser = &oci_process.user; + + let uid = Uid::from_raw(guser.uid); + let gid = Gid::from_raw(guser.gid); + + setid(uid, gid)?; + + if guser.additional_gids.len() > 0 { + setgroups(guser.additional_gids.as_slice()).map_err(|e| { + write_sync( + cwfd, + SYNC_FAILED, + format!("setgroups failed: {:?}", e).as_str(), + ); + e + })?; + } + + // NoNewPeiviledges, Drop capabilities + if oci_process.no_new_privileges { + if let Err(_) = prctl::set_no_new_privileges(true) { + return Err(ErrorKind::ErrorCode("cannot set no new privileges".to_string()).into()); + } + } + + if oci_process.capabilities.is_some() { + let c = oci_process.capabilities.as_ref().unwrap(); + capabilities::drop_priviledges(cfd_log, c)?; + } + + if init { + // notify parent to run poststart hooks + // cfd is closed when return from join_namespaces + // should retunr cfile instead of cfd? + write_sync(cwfd, SYNC_SUCCESS, "")?; + } + + let args = oci_process.args.to_vec(); + let env = oci_process.env.to_vec(); + + let mut fifofd = -1; + if init { + fifofd = std::env::var(FIFO_FD)?.parse::().unwrap(); + } + + //cleanup the env inherited from parent + for (key, _) in env::vars() { + env::remove_var(key); + } + + // setup the envs + for e in env.iter() { + let v: Vec<&str> = e.splitn(2, "=").collect(); + if v.len() != 2 { + //info!(logger, "incorrect env config!"); + continue; + } + env::set_var(v[0], v[1]); + } + + let exec_file = Path::new(&args[0]); + log_child!(cfd_log, "process command: {:?}", &args); + if !exec_file.exists() { + match find_file(exec_file) { + Some(_) => (), + None => { + return Err( + ErrorKind::ErrorCode(format!("the file {} is not exist", &args[0])).into(), + ); + } + } + } + + // notify parent that the child's ready to start + write_sync(cwfd, SYNC_SUCCESS, "")?; + log_child!(cfd_log, "ready to run exec"); + unistd::close(cfd_log); + unistd::close(crfd); + unistd::close(cwfd); + + if oci_process.terminal { + unistd::setsid()?; + unsafe { + libc::ioctl(0, libc::TIOCSCTTY); + } + } + + if init { + let fd = fcntl::open( + format!("/proc/self/fd/{}", fifofd).as_str(), + OFlag::O_RDONLY | OFlag::O_CLOEXEC, + Mode::from_bits_truncate(0), + )?; + unistd::close(fifofd)?; + let mut buf: &mut [u8] = &mut [0]; + unistd::read(fd, &mut buf)?; + } + + do_exec(&args); + + Err(ErrorKind::ErrorCode("fail to create container".to_string()).into()) +} + impl BaseContainer for LinuxContainer { fn id(&self) -> String { self.id.clone() @@ -322,6 +648,7 @@ impl BaseContainer for LinuxContainer { } fn start(&mut self, mut p: Process) -> Result<()> { + let tty = p.tty; let fifo_file = format!("{}/{}", &self.root, EXEC_FIFO_FILENAME); info!(self.logger, "enter container.start!"); let mut fifofd: RawFd = -1; @@ -334,16 +661,12 @@ impl BaseContainer for LinuxContainer { fifofd = fcntl::open( fifo_file.as_str(), - OFlag::O_PATH | OFlag::O_CLOEXEC, + OFlag::O_PATH, Mode::from_bits(0).unwrap(), )?; } info!(self.logger, "exec fifo opened!"); - lazy_static::initialize(&NAMESPACES); - lazy_static::initialize(&DEFAULT_DEVICES); - lazy_static::initialize(&RLIMITMAPS); - lazy_static::initialize(&CAPSMAP); fscgroup::init_static(); if self.config.spec.is_none() { @@ -354,240 +677,197 @@ impl BaseContainer for LinuxContainer { if spec.linux.is_none() { return Err(ErrorKind::ErrorCode("no linux config".to_string()).into()); } - let linux = spec.linux.as_ref().unwrap(); - // get namespace vector to join/new - let nses = get_namespaces(&linux, p.init, self.init_process_pid)?; - info!(self.logger, "got namespaces {:?}!\n", nses); - let mut to_new = CloneFlags::empty(); - let mut to_join = Vec::new(); - let mut pidns = false; - let mut userns = false; - for ns in &nses { - let s = NAMESPACES.get(&ns.r#type.as_str()); - if s.is_none() { - return Err(ErrorKind::ErrorCode("invalid ns type".to_string()).into()); - } - let s = s.unwrap(); - if ns.path.is_empty() { - to_new.set(*s, true); - } else { - let fd = match fcntl::open(ns.path.as_str(), OFlag::empty(), Mode::empty()) { - Ok(v) => v, - Err(e) => { - info!( - self.logger, - "cannot open type: {} path: {}", - ns.r#type.clone(), - ns.path.clone() - ); - info!(self.logger, "error is : {}", e.as_errno().unwrap().desc()); - return Err(e.into()); - } - }; - // .chain_err(|| format!("fail to open ns {}", &ns.Type))?; - to_join.push((*s, fd)); - } - - if *s == CloneFlags::CLONE_NEWPID { - pidns = true; - } - } - - if to_new.contains(CloneFlags::CLONE_NEWUSER) { - userns = true; - } - - let mut parent: u32 = 0; let st = self.oci_state()?; - let (child, cfd) = match join_namespaces( - &self.logger, - &spec, - to_new, - &to_join, - pidns, - userns, - p.init, - self.config.no_pivot_root, - self.cgroup_manager.as_ref().unwrap(), - &st, - &mut parent, - ) { - Ok((u, v)) => (u, v), - Err(e) => { - if parent == 0 { - info!(self.logger, "parent process error out!"); - return Err(e); - } else if parent == 1 { - info!(self.logger, "child process 1 error out!"); - std::process::exit(-1); - } else { - info!(self.logger, "child process 2 error out!"); - std::process::exit(-2); + let execid = p.exec_id.clone(); + let (pfd_log, cfd_log) = unistd::pipe().chain_err(|| "failed to create pipe")?; + fcntl::fcntl(pfd_log, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC)); + + let logger = self.logger.new(o!("action" => "child process log")); + let log_handler = thread::spawn(move || { + let log_file = unsafe { std::fs::File::from_raw_fd(pfd_log) }; + let mut reader = BufReader::new(log_file); + let execid = execid; + + loop { + let mut line = String::new(); + match reader.read_line(&mut line) { + Err(e) => { + info!(logger, "read child process log error: {:?}", e); + break; + } + Ok(count) => { + if count == 0 { + info!( + logger, + "execid:{}, read child process log end", + execid.as_str() + ); + break; + } + + info!(logger, "execid:{},{}", execid.as_str(), line); + } } } + }); + + info!(self.logger, "exec fifo opened!"); + let (prfd, cwfd) = unistd::pipe().chain_err(|| "failed to create pipe")?; + let (crfd, pwfd) = unistd::pipe().chain_err(|| "failed to create pipe")?; + fcntl::fcntl(prfd, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC)); + fcntl::fcntl(pwfd, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC)); + + defer!({ + unistd::close(prfd); + unistd::close(pwfd); + }); + + let mut child_stdin = std::process::Stdio::null(); + let mut child_stdout = std::process::Stdio::null(); + let mut child_stderr = std::process::Stdio::null(); + let mut stdin = -1; + let mut stdout = -1; + let mut stderr = -1; + + if tty { + let pseduo = pty::openpty(None, None)?; + p.term_master = Some(pseduo.master); + fcntl::fcntl(pseduo.master, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC)); + fcntl::fcntl(pseduo.slave, FcntlArg::F_SETFD(FdFlag::FD_CLOEXEC)); + + child_stdin = unsafe { std::process::Stdio::from_raw_fd(pseduo.slave) }; + child_stdout = unsafe { std::process::Stdio::from_raw_fd(pseduo.slave) }; + child_stderr = unsafe { std::process::Stdio::from_raw_fd(pseduo.slave) }; + } else { + stdin = p.stdin.unwrap(); + stdout = p.stdout.unwrap(); + stderr = p.stderr.unwrap(); + child_stdin = unsafe { std::process::Stdio::from_raw_fd(stdin) }; + child_stdout = unsafe { std::process::Stdio::from_raw_fd(stdout) }; + child_stderr = unsafe { std::process::Stdio::from_raw_fd(stderr) }; + } + + let old_pid_ns = match fcntl::open(PID_NS_PATH, OFlag::O_CLOEXEC, Mode::empty()) { + Ok(v) => v, + Err(e) => { + error!( + self.logger, + "cannot open pid ns path: {} with error: {:?}", PID_NS_PATH, e + ); + return Err(e.into()); + } }; - info!(self.logger, "entered namespaces!"); - if child != Pid::from_raw(-1) { - // parent - p.pid = child.as_raw(); - self.status = Some("created".to_string()); - if p.init { - self.init_process_pid = p.pid; - unistd::close(fifofd)?; - } - self.created = SystemTime::now(); - // defer!({ self.processes.insert(p.pid, p); () }); - // parent process need to receive ptmx masterfd - // and set it up in process struct - unistd::close(p.stdin.unwrap())?; - unistd::close(p.stderr.unwrap())?; - unistd::close(p.stdout.unwrap())?; + //restore the parent's process's pid namespace. + defer!({ + sched::setns(old_pid_ns, CloneFlags::CLONE_NEWPID); + unistd::close(old_pid_ns); + }); - for &(_, fd) in &to_join { - let _ = unistd::close(fd); - } - - // create the pipes for notify process exited - let (exit_pipe_r, exit_pipe_w) = - unistd::pipe2(OFlag::O_CLOEXEC).chain_err(|| "failed to create pipe")?; - p.exit_pipe_w = Some(exit_pipe_w); - p.exit_pipe_r = Some(exit_pipe_r); - - let console_fd = if p.parent_console_socket.is_some() { - p.parent_console_socket.unwrap() - } else { - self.processes.insert(p.pid, p); - return Ok(()); - }; - - let mut v: Vec = vec![0; 40]; - let iov = IoVec::from_mut_slice(v.as_mut_slice()); - let mut c: Vec = vec![0; 40]; - - match socket::recvmsg(console_fd, &[iov], Some(&mut c), MsgFlags::empty()) { - Ok(rmsg) => { - let cmsg: Vec = rmsg.cmsgs().collect(); - // expect the vector lenght 1 - if cmsg.len() != 1 { - return Err( - ErrorKind::ErrorCode("error in semd/recvmsg!".to_string()).into() - ); - } - - match &cmsg[0] { - ControlMessageOwned::ScmRights(v) => { - if v.len() != 1 { - return Err(ErrorKind::ErrorCode( - "error in send/recvmsg!".to_string(), - ) - .into()); - } - - p.term_master = Some(v[0]); - } - // all other cases are error - _ => { - return Err( - ErrorKind::ErrorCode("error in send/recvmsg!".to_string()).into() - ); - } - } - } - Err(e) => return Err(ErrorKind::Nix(e).into()), - } - - unistd::close(p.parent_console_socket.unwrap())?; - unistd::close(p.console_socket.unwrap())?; - - // turn off echo - // let mut term = termios::tcgetattr(p.term_master.unwrap())?; - // term.local_flags &= !(LocalFlags::ECHO | LocalFlags::ICANON); - // termios::tcsetattr(p.term_master.unwrap(), SetArg::TCSANOW, &term)?; - - self.processes.insert(p.pid, p); - - return Ok(()); - } // end parent - - // setup stdio in child process - // need fd to send master fd to parent... store the fd in - // process struct? - setup_stdio(&p)?; - - if to_new.contains(CloneFlags::CLONE_NEWNS) { - info!(self.logger, "finish rootfs!"); - mount::finish_rootfs(spec)?; + let mut pidns = None; + if !p.init { + pidns = Some(get_pid_namespace(&self.logger, linux)?); } - if !p.oci.cwd.is_empty() { - debug!(self.logger, "cwd: {}", p.oci.cwd.as_str()); - unistd::chdir(p.oci.cwd.as_str())?; + if pidns.is_some() { + sched::setns(pidns.unwrap(), CloneFlags::CLONE_NEWPID) + .chain_err(|| "failed to join pidns")?; + unistd::close(pidns.unwrap())?; + } else { + sched::unshare(CloneFlags::CLONE_NEWPID)?; } - // setup uid/gid - info!(self.logger, "{:?}", &p.oci); + let exec_path = std::env::current_exe()?; + let mut child = std::process::Command::new(exec_path); + let mut child = child + .arg("init") + .stdin(child_stdin) + .stdout(child_stdout) + .stderr(child_stderr) + .env(INIT, format!("{}", p.init)) + .env(NO_PIVOT, format!("{}", self.config.no_pivot_root)) + .env(CRFD_FD, format!("{}", crfd)) + .env(CWFD_FD, format!("{}", cwfd)) + .env(CLOG_FD, format!("{}", cfd_log)); - let guser = &p.oci.user; - - let uid = Uid::from_raw(guser.uid); - let gid = Gid::from_raw(guser.gid); - - setid(uid, gid)?; - - if guser.additional_gids.len() > 0 { - setgroups(guser.additional_gids.as_slice())?; + if p.init { + child = child.env(FIFO_FD, format!("{}", fifofd)); } - // NoNewPeiviledges, Drop capabilities - if p.oci.no_new_privileges { - if let Err(_) = prctl::set_no_new_privileges(true) { - return Err( - ErrorKind::ErrorCode("cannot set no new privileges".to_string()).into(), + let mut child = child.spawn()?; + + unistd::close(crfd)?; + unistd::close(cwfd)?; + unistd::close(cfd_log)?; + + p.pid = child.id() as i32; + if p.init { + self.init_process_pid = p.pid; + } + + if p.init { + unistd::close(fifofd); + } + + info!(self.logger, "execid: {}, child pid: {}", p.exec_id, p.pid); + + match join_namespaces( + &self.logger, + &spec, + &p, + self.cgroup_manager.as_ref().unwrap(), + &st, + &mut child, + pwfd, + prfd, + ) { + Ok(_) => (), + Err(e) => { + error!( + self.logger, + "execid:{}, create container process error {:?}", + p.exec_id.as_str(), + e ); + // kill the child process. + signal::kill(Pid::from_raw(p.pid), Some(Signal::SIGKILL)); + return Err(e); } - } + }; - if p.oci.capabilities.is_some() { - let c = p.oci.capabilities.as_ref().unwrap(); - info!(self.logger, "drop capabilities!"); - capabilities::drop_priviledges(&self.logger, c)?; - } + info!( + self.logger, + "execid: {}, entered namespaces!", + p.exec_id.as_str() + ); + + self.status = Some("created".to_string()); + self.created = SystemTime::now(); + + // create the pipes for notify process exited + let (exit_pipe_r, exit_pipe_w) = unistd::pipe2(OFlag::O_CLOEXEC) + .chain_err(|| "failed to create pipe") + .map_err(|e| { + signal::kill(Pid::from_raw(child.id() as i32), Some(Signal::SIGKILL)); + e + })?; + + p.exit_pipe_w = Some(exit_pipe_w); + p.exit_pipe_r = Some(exit_pipe_r); if p.init { - // notify parent to run poststart hooks - // cfd is closed when return from join_namespaces - // should retunr cfile instead of cfd? - write_sync(cfd, 0)?; + let spec = self.config.spec.as_mut().unwrap(); + update_namespaces(&self.logger, spec, p.pid)?; } + self.processes.insert(p.pid, p); - // new and the stat parent process - // For init process, we need to setup a lot of things - // For exec process, only need to join existing namespaces, - // the namespaces are got from init process or from - // saved spec. - debug!(self.logger, "before setup execfifo!"); - info!(self.logger, "{}", VER_MARKER); - if p.init { - let fd = fcntl::open( - format!("/proc/self/fd/{}", fifofd).as_str(), - OFlag::O_RDONLY | OFlag::O_CLOEXEC, - Mode::from_bits_truncate(0), - )?; - unistd::close(fifofd)?; - let mut buf: &mut [u8] = &mut [0]; - unistd::read(fd, &mut buf)?; - } - - // exec process - let args = p.oci.args.to_vec(); - let env = p.oci.env.to_vec(); - do_exec(&self.logger, &args[0], &args, &env)?; - - Err(ErrorKind::ErrorCode("fail to create container".to_string()).into()) + info!(self.logger, "wait on child log handler"); + log_handler.join(); + info!(self.logger, "create process completed"); + return Ok(()); } fn run(&mut self, p: Process) -> Result<()> { @@ -619,6 +899,7 @@ impl BaseContainer for LinuxContainer { } self.status = Some("stopped".to_string()); + fs::remove_dir_all(&self.root)?; Ok(()) } @@ -654,9 +935,26 @@ impl BaseContainer for LinuxContainer { use std::env; -fn do_exec(logger: &Logger, path: &str, args: &[String], env: &[String]) -> Result<()> { - let logger = logger.new(o!("command" => "exec")); +fn find_file

(exe_name: P) -> Option +where + P: AsRef, +{ + env::var_os("PATH").and_then(|paths| { + env::split_paths(&paths) + .filter_map(|dir| { + let full_path = dir.join(&exe_name); + if full_path.is_file() { + Some(full_path) + } else { + None + } + }) + .next() + }) +} +fn do_exec(args: &[String]) -> Result<()> { + let path = &args[0]; let p = CString::new(path.to_string()).unwrap(); let sa: Vec = args .iter() @@ -664,126 +962,41 @@ fn do_exec(logger: &Logger, path: &str, args: &[String], env: &[String]) -> Resu .collect(); let a: Vec<&CStr> = sa.iter().map(|s| s.as_c_str()).collect(); - for (key, _) in env::vars() { - env::remove_var(key); - } - - for e in env.iter() { - let v: Vec<&str> = e.splitn(2, "=").collect(); - if v.len() != 2 { - info!(logger, "incorrect env config!"); - continue; - } - env::set_var(v[0], v[1]); - } - /* - let env: Vec = env - .iter() - .map(|s| CString::new(s.to_string()).unwrap_or_default()) - .collect(); - */ - // execvp doesn't use env for the search path, so we set env manually - debug!(logger, "exec process right now!"); if let Err(e) = unistd::execvp(p.as_c_str(), a.as_slice()) { - info!(logger, "execve failed!!!"); - info!(logger, "binary: {:?}, args: {:?}, envs: {:?}", p, a, env); + // info!(logger, "execve failed!!!"); + // info!(logger, "binary: {:?}, args: {:?}, envs: {:?}", p, a, env); match e { nix::Error::Sys(errno) => { - info!(logger, "{}", errno.desc()); - } - Error::InvalidPath => { - info!(logger, "invalid path"); - } - Error::InvalidUtf8 => { - info!(logger, "invalid utf8"); - } - Error::UnsupportedOperation => { - info!(logger, "unsupported operation"); + std::process::exit(errno as i32); } + _ => std::process::exit(-2), } - std::process::exit(-2); } // should never reach here Ok(()) } -fn get_namespaces(linux: &Linux, init: bool, init_pid: pid_t) -> Result> { - let mut ns: Vec = Vec::new(); - if init { - for i in &linux.namespaces { - ns.push(LinuxNamespace { - r#type: i.r#type.clone(), - path: i.path.clone(), - }); +fn update_namespaces(logger: &Logger, spec: &mut Spec, init_pid: RawFd) -> Result<()> { + let linux = match spec.linux.as_mut() { + None => { + return Err( + ErrorKind::ErrorCode("Spec didn't container linux field".to_string()).into(), + ) } - } else { - for i in NAMESPACES.keys() { - let ns_path = format!("/proc/{}/ns/{}", init_pid, TYPETONAME.get(i).unwrap()); - let ns_path_buf = Path::new(&ns_path).read_link()?; + Some(l) => l, + }; - let init_ns_path = format!( + let namespaces = linux.namespaces.as_mut_slice(); + for namespace in namespaces.iter_mut() { + if TYPETONAME.contains_key(namespace.r#type.as_str()) { + let ns_path = format!( "/proc/{}/ns/{}", - unistd::getpid(), - TYPETONAME.get(i).unwrap() + init_pid, + TYPETONAME.get(namespace.r#type.as_str()).unwrap() ); - let init_ns_path_buf = Path::new(&init_ns_path).read_link()?; - // ignore the namespace which is the same with system init namespace, - // since it shouldn't be join. - if !ns_path_buf.eq(&init_ns_path_buf) { - ns.push(LinuxNamespace { - r#type: i.to_string(), - path: ns_path, - }); - } - } - } - Ok(ns) -} - -pub const PIDSIZE: usize = mem::size_of::(); - -fn read_sync(fd: RawFd) -> Result { - let mut v: [u8; PIDSIZE] = [0; PIDSIZE]; - let mut len = 0; - - loop { - match unistd::read(fd, &mut v[len..]) { - Ok(l) => { - len += l; - if len == PIDSIZE { - break; - } - } - - Err(e) => { - if e != Error::from_errno(Errno::EINTR) { - return Err(e.into()); - } - } - } - } - - Ok(pid_t::from_be_bytes(v)) -} - -fn write_sync(fd: RawFd, pid: pid_t) -> Result<()> { - let buf = pid.to_be_bytes(); - let mut len = 0; - - loop { - match unistd::write(fd, &buf[len..]) { - Ok(l) => { - len += l; - if len == PIDSIZE { - break; - } - } - - Err(e) => { - if e != Error::from_errno(Errno::EINTR) { - return Err(e.into()); - } + if namespace.path == "" { + namespace.path = ns_path; } } } @@ -791,383 +1004,215 @@ fn write_sync(fd: RawFd, pid: pid_t) -> Result<()> { Ok(()) } +fn get_pid_namespace(logger: &Logger, linux: &Linux) -> Result { + for ns in &linux.namespaces { + if ns.r#type == "pid" { + if ns.path == "" { + error!(logger, "pid ns path is empty"); + return Err(ErrorKind::ErrorCode("pid ns path is empty".to_string()).into()); + } + + let fd = match fcntl::open(ns.path.as_str(), OFlag::O_CLOEXEC, Mode::empty()) { + Ok(v) => v, + Err(e) => { + error!( + logger, + "cannot open type: {} path: {}", + ns.r#type.clone(), + ns.path.clone() + ); + error!(logger, "error is : {}", e.as_errno().unwrap().desc()); + return Err(e.into()); + } + }; + + return Ok(fd); + } + } + + Err(ErrorKind::ErrorCode("cannot find the pid ns".to_string()).into()) +} + +fn is_userns_enabled(linux: &Linux) -> bool { + for ns in &linux.namespaces { + if ns.r#type == "user" && ns.path == "" { + return true; + } + } + + false +} + +fn get_namespaces(linux: &Linux) -> Result> { + let mut ns: Vec = Vec::new(); + for i in &linux.namespaces { + ns.push(LinuxNamespace { + r#type: i.r#type.clone(), + path: i.path.clone(), + }); + } + Ok(ns) +} + fn join_namespaces( logger: &Logger, spec: &Spec, - to_new: CloneFlags, - to_join: &Vec<(CloneFlags, RawFd)>, - pidns: bool, - userns: bool, - init: bool, - no_pivot: bool, + p: &Process, cm: &FsManager, st: &OCIState, - parent: &mut u32, -) -> Result<(Pid, RawFd)> { + child: &mut Child, + pwfd: RawFd, + prfd: RawFd, +) -> Result<()> { let logger = logger.new(o!("action" => "join-namespaces")); - // let ccond = Cond::new().chain_err(|| "create cond failed")?; - // let pcond = Cond::new().chain_err(|| "create cond failed")?; - let (pfd, cfd) = unistd::pipe2(OFlag::O_CLOEXEC).chain_err(|| "failed to create pipe")?; - let (crfd, pwfd) = unistd::pipe2(OFlag::O_CLOEXEC)?; - let linux = spec.linux.as_ref().unwrap(); let res = linux.resources.as_ref(); - match unistd::fork()? { - ForkResult::Parent { child } => { - // let mut pfile = unsafe { File::from_raw_fd(pfd) }; - unistd::close(cfd)?; - unistd::close(crfd)?; + let userns = is_userns_enabled(linux); - //wait child setup user namespace - let _ = read_sync(pfd)?; + info!( + logger, + "execid: {}, try to send spec from parent to child", + p.exec_id.as_str() + ); + let spec_str = serde_json::to_string(spec)?; + write_sync(pwfd, SYNC_DATA, spec_str.as_str())?; - if userns { - // setup uid/gid mappings - write_mappings( - &logger, - &format!("/proc/{}/uid_map", child.as_raw()), - &linux.uid_mappings, - )?; - write_mappings( - &logger, - &format!("/proc/{}/gid_map", child.as_raw()), - &linux.gid_mappings, - )?; - } + info!( + logger, + "execid: {}, wait child received oci spec", + p.exec_id.as_str() + ); - // apply cgroups - if init { - if res.is_some() { - info!(logger, "apply cgroups!"); - cm.set(res.unwrap(), false)?; - } - } + // child.try_wait()?; + read_sync(prfd)?; - if res.is_some() { - cm.apply(child.as_raw())?; - } + info!( + logger, + "execid: {}, send oci process from parent to child", + p.exec_id.as_str() + ); + let process_str = serde_json::to_string(&p.oci)?; + write_sync(pwfd, SYNC_DATA, process_str.as_str())?; - write_sync(pwfd, 0)?; + info!( + logger, + "execid: {}, wait child received oci process", + p.exec_id.as_str() + ); + read_sync(prfd)?; - let mut pid = child.as_raw(); - info!(logger, "first child! {}", pid); - info!(logger, "wait for final child!"); - if pidns { - pid = read_sync(pfd)?; - // pfile.read_to_string(&mut json)?; - /* - let msg: SyncPC = match serde_json::from_reader(&mut pfile) { - Ok(u) => u, - Err(e) => { - match e.classify() { - Category::Io => info!("Io error!"), - Category::Syntax => info!("syntax error!"), - Category::Data => info!("data error!"), - Category::Eof => info!("end of file!"), - } + let cm_str = serde_json::to_string(cm)?; + write_sync(pwfd, SYNC_DATA, cm_str.as_str())?; - return Err(ErrorKind::Serde(e).into()); - } - }; - */ - // notify child continue - info!(logger, "got final child pid! {}", pid); - write_sync(pwfd, 0)?; - info!(logger, "resume child!"); - // wait for child to exit - // Since the child would be reaped by our reaper, so - // there is no need reap the child here. - // wait::waitpid(Some(child), None); - } - // read out child pid here. we don't use - // cgroup to get it - // and the wait for child exit to get grandchild - - if init { - info!(logger, "wait for hook!"); - let _ = read_sync(pfd)?; - - // run prestart hook - if spec.hooks.is_some() { - info!(logger, "prestart"); - let hooks = spec.hooks.as_ref().unwrap(); - for h in hooks.prestart.iter() { - execute_hook(&logger, h, st)?; - } - } - - // notify child run prestart hooks completed - write_sync(pwfd, 0)?; - - // wait to run poststart hook - let _ = read_sync(pfd)?; - //run poststart hook - if spec.hooks.is_some() { - info!(logger, "poststart"); - let hooks = spec.hooks.as_ref().unwrap(); - for h in hooks.poststart.iter() { - execute_hook(&logger, h, st)?; - } - } - } - unistd::close(pfd)?; - unistd::close(pwfd)?; - - return Ok((Pid::from_raw(pid), cfd)); - } - ForkResult::Child => { - *parent = 1; - unistd::close(pfd)?; - unistd::close(pwfd)?; - // set oom_score_adj - - let p = if spec.process.is_some() { - spec.process.as_ref().unwrap() - } else { - return Err(nix::Error::Sys(Errno::EINVAL).into()); - }; - - if p.oom_score_adj.is_some() { - fs::write( - "/proc/self/oom_score_adj", - p.oom_score_adj.unwrap().to_string().as_bytes(), - )? - } - - // set rlimit - for rl in p.rlimits.iter() { - setrlimit(rl)?; - } - - if userns { - sched::unshare(CloneFlags::CLONE_NEWUSER)?; - } - - write_sync(cfd, 0)?; - let _ = read_sync(crfd)?; - - if userns { - setid(Uid::from_raw(0), Gid::from_raw(0))?; - } - } - } - - // child process continues - // let mut cfile = unsafe { File::from_raw_fd(cfd) }; - let mut mount_fd = -1; - let mut bind_device = false; - for &(s, fd) in to_join { - if s == CloneFlags::CLONE_NEWNS { - mount_fd = fd; - continue; - } - - // just skip user namespace for now - // we cannot join user namespace in multithreaded - // program, which is us(kata-agent using grpc) - // To fix this - // 1. write kata-agent as singlethread program - // 2. use a binary to exec OR self exec to enter - // namespaces before multithreaded, the way - // rustjail works - /* - if s == CloneFlags::CLONE_NEWUSER { - unistd::close(fd)?; - continue; - } - */ - if let Err(e) = sched::setns(fd, s) { - info!(logger, "setns error: {}", e.as_errno().unwrap().desc()); - info!(logger, "setns: ns type: {:?}", s); - if s == CloneFlags::CLONE_NEWUSER { - if e.as_errno().unwrap() != Errno::EINVAL { - return Err(e.into()); - } - } else { - return Err(e.into()); - } - } - unistd::close(fd)?; - - if s == CloneFlags::CLONE_NEWUSER { - setid(Uid::from_raw(0), Gid::from_raw(0))?; - bind_device = true; - } - } - - info!(logger, "to_new: {:?}", to_new); - sched::unshare(to_new & !CloneFlags::CLONE_NEWUSER)?; + //wait child setup user namespace + info!( + logger, + "execid: {}, wait child setup user namespace", + p.exec_id.as_str() + ); + read_sync(prfd)?; if userns { - bind_device = true; + info!( + logger, + "execid: {}, setup uid/gid mappings", + p.exec_id.as_str() + ); + // setup uid/gid mappings + write_mappings( + &logger, + &format!("/proc/{}/uid_map", p.pid), + &linux.uid_mappings, + )?; + write_mappings( + &logger, + &format!("/proc/{}/gid_map", p.pid), + &linux.gid_mappings, + )?; } - // create a pipe for sync between parent and child. - // here we should make sure the parent return pid before - // the child notify grand parent to run hooks, otherwise - // both of the parent and his child would write cfd at the same - // time which would mesh the grand parent to read. - let (chfd, phfd) = unistd::pipe2(OFlag::O_CLOEXEC) - .chain_err(|| "failed to create pipe for syncing run hooks")?; + // apply cgroups + if p.init { + if res.is_some() { + info!(logger, "apply cgroups!"); + cm.set(res.unwrap(), false)?; + } + } - if pidns { - match unistd::fork()? { - ForkResult::Parent { child } => { - unistd::close(chfd)?; - // set child pid to topmost parent and the exit - write_sync(cfd, child.as_raw())?; + if res.is_some() { + cm.apply(p.pid)?; + } - info!( - logger, - "json: {}", - serde_json::to_string(&SyncPC { - pid: child.as_raw() - }) - .unwrap() - ); - // wait for parent read it and the continue - info!(logger, "after send out child pid!"); - let _ = read_sync(crfd)?; + info!( + logger, + "execid: {}, notify child to continue", + p.exec_id.as_str() + ); + // notify child to continue + write_sync(pwfd, SYNC_SUCCESS, "")?; - // notify child to continue. - write_sync(phfd, 0)?; - std::process::exit(0); + if p.init { + info!( + logger, + "execid:{}, notify child parent ready to run prestart hook!", + p.exec_id.as_str() + ); + let _ = read_sync(prfd)?; + + info!( + logger, + "execid:{}, get ready to run prestart hook!", + p.exec_id.as_str() + ); + + // run prestart hook + if spec.hooks.is_some() { + info!(logger, "execid:{}, prestart", p.exec_id.as_str()); + let hooks = spec.hooks.as_ref().unwrap(); + for h in hooks.prestart.iter() { + execute_hook(&logger, h, st)?; } - ForkResult::Child => { - *parent = 2; - unistd::close(phfd)?; + } + + // notify child run prestart hooks completed + info!( + logger, + "execid:{}, notify child run prestart hook completed!", + p.exec_id.as_str() + ); + write_sync(pwfd, SYNC_SUCCESS, "")?; + + info!( + logger, + "execid:{}, notify child parent ready to run poststart hook!", + p.exec_id.as_str() + ); + // wait to run poststart hook + read_sync(prfd)?; + info!( + logger, + "execid:{}, get ready to run poststart hook!", + p.exec_id.as_str() + ); + + //run poststart hook + if spec.hooks.is_some() { + info!(logger, "execid:{}, poststart", p.exec_id.as_str()); + let hooks = spec.hooks.as_ref().unwrap(); + for h in hooks.poststart.iter() { + execute_hook(&logger, h, st)?; } } } - if to_new.contains(CloneFlags::CLONE_NEWUTS) { - unistd::sethostname(&spec.hostname)?; - } - - let rootfs = spec.root.as_ref().unwrap().path.as_str(); - let root = fs::canonicalize(rootfs)?; - let rootfs = root.to_str().unwrap(); - - if to_new.contains(CloneFlags::CLONE_NEWNS) { - // setup rootfs - info!(logger, "setup rootfs!"); - mount::init_rootfs(&logger, &spec, &cm.paths, &cm.mounts, bind_device)?; - } - - // wait until parent notified - if pidns { - let _ = read_sync(chfd)?; - } - unistd::close(chfd)?; - - if init { - // notify parent to run prestart hooks - write_sync(cfd, 0)?; - // wait parent run prestart hooks - let _ = read_sync(crfd)?; - } - - unistd::close(crfd)?; - - if mount_fd != -1 { - sched::setns(mount_fd, CloneFlags::CLONE_NEWNS)?; - unistd::close(mount_fd)?; - } - - if to_new.contains(CloneFlags::CLONE_NEWNS) { - // unistd::chroot(rootfs)?; - if no_pivot { - mount::ms_move_root(rootfs)?; - } else { - // pivot root - mount::pivot_rootfs(rootfs)?; - } - - // setup sysctl - set_sysctls(&linux.sysctl)?; - unistd::chdir("/")?; - if let Err(_) = stat::stat("marker") { - info!(logger, "not in expect root!!"); - } - info!(logger, "in expect rootfs!"); - - if let Err(_) = stat::stat("/bin/sh") { - info!(logger, "no '/bin/sh'???"); - } - } - - // notify parent to continue before block on exec fifo - - info!(logger, "rootfs: {}", &rootfs); - - // block on exec fifo - - Ok((Pid::from_raw(-1), cfd)) -} - -fn setup_stdio(p: &Process) -> Result<()> { - if p.console_socket.is_some() { - // we can setup ptmx master for process - // turn off echo - // let mut term = termios::tcgetattr(0)?; - // termios::cfmakeraw(&mut term); - // term.local_flags &= !(LocalFlags::ECHO | LocalFlags::ICANON); - // term.control_chars[VMIN] = 1; - // term.control_chars[VTIME] = 0; - - let pseduo = pty::openpty(None, None)?; - defer!(unistd::close(pseduo.master).unwrap()); - let data: &[u8] = b"/dev/ptmx"; - let iov = [IoVec::from_slice(&data)]; - let fds = [pseduo.master]; - let cmsg = ControlMessage::ScmRights(&fds); - let mut console_fd = p.console_socket.unwrap(); - - socket::sendmsg(console_fd, &iov, &[cmsg], MsgFlags::empty(), None)?; - - unistd::close(console_fd)?; - unistd::close(p.parent_console_socket.unwrap())?; - console_fd = pseduo.slave; - - unistd::setsid()?; - unsafe { - libc::ioctl(console_fd, libc::TIOCSCTTY); - } - unistd::dup2(console_fd, 0)?; - unistd::dup2(console_fd, 1)?; - unistd::dup2(console_fd, 2)?; - - // turn off echo - // let mut term = termios::tcgetattr(0)?; - // term.local_flags &= !(LocalFlags::ECHO | LocalFlags::ICANON); - // termios::tcsetattr(0, SetArg::TCSANOW, &term)?; - - if console_fd > 2 { - unistd::close(console_fd)?; - } - } else { - // dup stdin/stderr/stdout - unistd::dup2(p.stdin.unwrap(), 0)?; - unistd::dup2(p.stdout.unwrap(), 1)?; - unistd::dup2(p.stderr.unwrap(), 2)?; - - if p.stdin.unwrap() > 2 { - unistd::close(p.stdin.unwrap())?; - } - - if p.stdout.unwrap() > 2 { - unistd::close(p.stdout.unwrap())?; - } - if p.stderr.unwrap() > 2 { - unistd::close(p.stderr.unwrap())?; - } - } - - unistd::close(p.parent_stdin.unwrap())?; - unistd::close(p.parent_stdout.unwrap())?; - unistd::close(p.parent_stderr.unwrap())?; + info!( + logger, + "execid:{}, wait for child process ready to run exec", + p.exec_id.as_str() + ); + read_sync(prfd)?; Ok(()) } @@ -1441,7 +1486,9 @@ fn execute_hook(logger: &Logger, h: &Hook, st: &OCIState) -> Result<()> { let (rfd, wfd) = unistd::pipe2(OFlag::O_CLOEXEC)?; match unistd::fork()? { ForkResult::Parent { child: _ch } => { - let status = read_sync(rfd)?; + let buf = read_sync(rfd)?; + let buf_array: [u8; 4] = [buf[0], buf[1], buf[2], buf[3]]; + let status: i32 = i32::from_be_bytes(buf_array); info!(logger, "hook child: {}", _ch); @@ -1572,7 +1619,7 @@ fn execute_hook(logger: &Logger, h: &Hook, st: &OCIState) -> Result<()> { }; handle.join().unwrap(); - let _ = write_sync(wfd, status); + let _ = write_sync(wfd, status, ""); // let _ = wait::waitpid(Pid::from_raw(pid), // Some(WaitPidFlag::WEXITED | WaitPidFlag::__WALL)); std::process::exit(0); diff --git a/src/agent/rustjail/src/errors.rs b/src/agent/rustjail/src/errors.rs index 130f610e1..8478a50b6 100644 --- a/src/agent/rustjail/src/errors.rs +++ b/src/agent/rustjail/src/errors.rs @@ -16,11 +16,13 @@ error_chain! { Ffi(std::ffi::NulError); Caps(caps::errors::Error); Serde(serde_json::Error); - UTF8(std::string::FromUtf8Error); + FromUTF8(std::string::FromUtf8Error); Parse(std::num::ParseIntError); Scanfmt(scan_fmt::parse::ScanError); Ip(std::net::AddrParseError); Regex(regex::Error); + EnvVar(std::env::VarError); + UTF8(std::str::Utf8Error); } // define new errors errors { diff --git a/src/agent/rustjail/src/lib.rs b/src/agent/rustjail/src/lib.rs index f429b371e..b3866d988 100644 --- a/src/agent/rustjail/src/lib.rs +++ b/src/agent/rustjail/src/lib.rs @@ -42,14 +42,14 @@ macro_rules! sl { }; } +pub mod capabilities; pub mod cgroups; pub mod container; pub mod errors; pub mod mount; pub mod process; pub mod specconv; -// pub mod sync; -pub mod capabilities; +pub mod sync; pub mod validator; // pub mod factory; @@ -66,8 +66,6 @@ pub mod validator; // construtc ociSpec from grpcSpec, which is needed for hook // execution. since hooks read config.json -use std::collections::HashMap; -use std::mem::MaybeUninit; use oci::{ Box as ociBox, Hooks as ociHooks, Linux as ociLinux, LinuxCapabilities as ociLinuxCapabilities, Mount as ociMount, POSIXRlimit as ociPOSIXRlimit, Process as ociProcess, Root as ociRoot, @@ -77,6 +75,8 @@ use protocols::oci::{ Hooks as grpcHooks, Linux as grpcLinux, Mount as grpcMount, Process as grpcProcess, Root as grpcRoot, Spec as grpcSpec, }; +use std::collections::HashMap; +use std::mem::MaybeUninit; pub fn process_grpc_to_oci(p: &grpcProcess) -> ociProcess { let console_size = if p.ConsoleSize.is_some() { diff --git a/src/agent/rustjail/src/mount.rs b/src/agent/rustjail/src/mount.rs index aff77b19a..52a1d0dde 100644 --- a/src/agent/rustjail/src/mount.rs +++ b/src/agent/rustjail/src/mount.rs @@ -14,6 +14,7 @@ use oci::{LinuxDevice, Mount, Spec}; use std::collections::{HashMap, HashSet}; use std::fs::{self, OpenOptions}; use std::os::unix; +use std::os::unix::io::RawFd; use std::path::{Path, PathBuf}; use path_absolutize::*; @@ -23,10 +24,11 @@ use std::io::{BufRead, BufReader}; use crate::container::DEFAULT_DEVICES; use crate::errors::*; +use crate::sync::write_count; use lazy_static; use std::string::ToString; -use slog::Logger; +use crate::log_child; // Info reveals information about a particular mounted filesystem. This // struct is populated from the content in the /proc//mountinfo file. @@ -97,7 +99,7 @@ lazy_static! { } pub fn init_rootfs( - logger: &Logger, + cfd_log: RawFd, spec: &Spec, cpath: &HashMap, mounts: &HashMap, @@ -133,13 +135,13 @@ pub fn init_rootfs( return Err(ErrorKind::Nix(nix::Error::Sys(Errno::EINVAL)).into()); } if m.r#type == "cgroup" { - mount_cgroups(logger, &m, rootfs, flags, &data, cpath, mounts)?; + mount_cgroups(cfd_log, &m, rootfs, flags, &data, cpath, mounts)?; } else { if m.destination == "/dev" { flags &= !MsFlags::MS_RDONLY; } - mount_from(&m, &rootfs, flags, &data, "")?; + mount_from(cfd_log, &m, &rootfs, flags, &data, "")?; } } @@ -156,7 +158,7 @@ pub fn init_rootfs( } fn mount_cgroups( - logger: &Logger, + cfd_log: RawFd, m: &Mount, rootfs: &str, flags: MsFlags, @@ -173,8 +175,8 @@ fn mount_cgroups( }; let cflags = MsFlags::MS_NOEXEC | MsFlags::MS_NOSUID | MsFlags::MS_NODEV; - info!(logger, "tmpfs"); - mount_from(&ctm, rootfs, cflags, "", "")?; + // info!(logger, "tmpfs"); + mount_from(cfd_log, &ctm, rootfs, cflags, "", "")?; let olddir = unistd::getcwd()?; unistd::chdir(rootfs)?; @@ -183,7 +185,7 @@ fn mount_cgroups( // bind mount cgroups for (key, mount) in mounts.iter() { - info!(logger, "{}", key); + log_child!(cfd_log, "mount cgroup subsystem {}", key); let source = if cpath.get(key).is_some() { cpath.get(key).unwrap() } else { @@ -210,7 +212,7 @@ fn mount_cgroups( srcs.insert(source.to_string()); - info!(logger, "{}", destination.as_str()); + log_child!(cfd_log, "mount destination: {}", destination.as_str()); let bm = Mount { source: source.to_string(), @@ -219,25 +221,24 @@ fn mount_cgroups( options: Vec::new(), }; - mount_from( - &bm, - rootfs, - flags | MsFlags::MS_REC | MsFlags::MS_BIND, - "", - "", - )?; + let mut mount_flags: MsFlags = flags | MsFlags::MS_REC | MsFlags::MS_BIND; + if key.contains("systemd") { + mount_flags &= !MsFlags::MS_RDONLY; + } + mount_from(cfd_log, &bm, rootfs, mount_flags, "", "")?; if key != base { let src = format!("{}/{}", m.destination.as_str(), key); match unix::fs::symlink(destination.as_str(), &src[1..]) { Err(e) => { - info!( - logger, + log_child!( + cfd_log, "symlink: {} {} err: {}", key, destination.as_str(), e.to_string() ); + return Err(e.into()); } Ok(_) => {} @@ -421,7 +422,14 @@ fn parse_mount(m: &Mount) -> (MsFlags, String) { (flags, data.join(",")) } -fn mount_from(m: &Mount, rootfs: &str, flags: MsFlags, data: &str, _label: &str) -> Result<()> { +fn mount_from( + cfd_log: RawFd, + m: &Mount, + rootfs: &str, + flags: MsFlags, + data: &str, + _label: &str, +) -> Result<()> { let d = String::from(data); let dest = format!("{}{}", rootfs, &m.destination); @@ -437,8 +445,8 @@ fn mount_from(m: &Mount, rootfs: &str, flags: MsFlags, data: &str, _label: &str) match fs::create_dir_all(&dir) { Ok(_) => {} Err(e) => { - info!( - sl!(), + log_child!( + cfd_log, "creat dir {}: {}", dir.to_str().unwrap(), e.to_string() @@ -456,8 +464,6 @@ fn mount_from(m: &Mount, rootfs: &str, flags: MsFlags, data: &str, _label: &str) PathBuf::from(&m.source) }; - info!(sl!(), "{}, {}", src.to_str().unwrap(), dest.as_str()); - // ignore this check since some mount's src didn't been a directory // such as tmpfs. /* @@ -472,7 +478,12 @@ fn mount_from(m: &Mount, rootfs: &str, flags: MsFlags, data: &str, _label: &str) match stat::stat(dest.as_str()) { Ok(_) => {} Err(e) => { - info!(sl!(), "{}: {}", dest.as_str(), e.as_errno().unwrap().desc()); + log_child!( + cfd_log, + "{}: {}", + dest.as_str(), + e.as_errno().unwrap().desc() + ); } } @@ -485,7 +496,7 @@ fn mount_from(m: &Mount, rootfs: &str, flags: MsFlags, data: &str, _label: &str) ) { Ok(_) => {} Err(e) => { - info!(sl!(), "mount error: {}", e.as_errno().unwrap().desc()); + log_child!(cfd_log, "mount error: {}", e.as_errno().unwrap().desc()); return Err(e.into()); } } @@ -508,8 +519,8 @@ fn mount_from(m: &Mount, rootfs: &str, flags: MsFlags, data: &str, _label: &str) None::<&str>, ) { Err(e) => { - info!( - sl!(), + log_child!( + cfd_log, "remout {}: {}", dest.as_str(), e.as_errno().unwrap().desc() @@ -616,9 +627,9 @@ fn bind_dev(dev: &LinuxDevice) -> Result<()> { Ok(()) } -pub fn finish_rootfs(spec: &Spec) -> Result<()> { +pub fn finish_rootfs(cfd_log: RawFd, spec: &Spec) -> Result<()> { let olddir = unistd::getcwd()?; - info!(sl!(), "{}", olddir.to_str().unwrap()); + log_child!(cfd_log, "old cwd: {}", olddir.to_str().unwrap()); unistd::chdir("/")?; if spec.linux.is_some() { let linux = spec.linux.as_ref().unwrap(); diff --git a/src/agent/rustjail/src/process.rs b/src/agent/rustjail/src/process.rs index 2a525b48f..d4deb0b84 100644 --- a/src/agent/rustjail/src/process.rs +++ b/src/agent/rustjail/src/process.rs @@ -34,10 +34,8 @@ pub struct Process { pub extra_files: Vec, // pub caps: Capabilities, // pub rlimits: Vec, - pub console_socket: Option, pub term_master: Option, - // parent end of fds - pub parent_console_socket: Option, + pub tty: bool, pub parent_stdin: Option, pub parent_stdout: Option, pub parent_stderr: Option, @@ -89,9 +87,8 @@ impl Process { exit_pipe_w: None, exit_pipe_r: None, extra_files: Vec::new(), - console_socket: None, + tty: ocip.terminal, term_master: None, - parent_console_socket: None, parent_stdin: None, parent_stdout: None, parent_stderr: None, @@ -104,44 +101,21 @@ impl Process { info!(logger, "before create console socket!"); - if ocip.terminal { - let (psocket, csocket) = match socket::socketpair( - AddressFamily::Unix, - SockType::Stream, - None, - SockFlag::SOCK_CLOEXEC, - ) { - Ok((u, v)) => (u, v), - Err(e) => { - match e { - Error::Sys(errno) => { - info!(logger, "socketpair: {}", errno.desc()); - } - _ => { - info!(logger, "socketpair: other error!"); - } - } - return Err(e); - } - }; - p.parent_console_socket = Some(psocket); - p.console_socket = Some(csocket); + if !p.tty { + info!(logger, "created console socket!"); + + let (stdin, pstdin) = unistd::pipe2(OFlag::O_CLOEXEC)?; + p.parent_stdin = Some(pstdin); + p.stdin = Some(stdin); + + let (pstdout, stdout) = create_extended_pipe(OFlag::O_CLOEXEC, pipe_size)?; + p.parent_stdout = Some(pstdout); + p.stdout = Some(stdout); + + let (pstderr, stderr) = create_extended_pipe(OFlag::O_CLOEXEC, pipe_size)?; + p.parent_stderr = Some(pstderr); + p.stderr = Some(stderr); } - - info!(logger, "created console socket!"); - - let (stdin, pstdin) = unistd::pipe2(OFlag::O_CLOEXEC)?; - p.parent_stdin = Some(pstdin); - p.stdin = Some(stdin); - - let (pstdout, stdout) = create_extended_pipe(OFlag::O_CLOEXEC, pipe_size)?; - p.parent_stdout = Some(pstdout); - p.stdout = Some(stdout); - - let (pstderr, stderr) = create_extended_pipe(OFlag::O_CLOEXEC, pipe_size)?; - p.parent_stderr = Some(pstderr); - p.stderr = Some(stderr); - Ok(p) } } diff --git a/src/agent/rustjail/src/sync.rs b/src/agent/rustjail/src/sync.rs new file mode 100644 index 000000000..b17277f87 --- /dev/null +++ b/src/agent/rustjail/src/sync.rs @@ -0,0 +1,177 @@ +// Copyright (c) 2019 Ant Financial +// +// SPDX-License-Identifier: Apache-2.0 +// + +use crate::errors::*; +use nix::errno::Errno; +use nix::unistd; +use nix::Error; +use std::mem; +use std::os::unix::io::RawFd; + +pub const SYNC_SUCCESS: i32 = 1; +pub const SYNC_FAILED: i32 = 2; +pub const SYNC_DATA: i32 = 3; + +const DATA_SIZE: usize = 100; +const MSG_SIZE: usize = mem::size_of::(); + +#[macro_export] +macro_rules! log_child { + ($fd:expr, $($arg:tt)+) => ({ + let lfd = $fd; + let mut log_str = format_args!($($arg)+).to_string(); + log_str.push('\n'); + write_count(lfd, log_str.as_bytes(), log_str.len()); + }) +} + +pub fn write_count(fd: RawFd, buf: &[u8], count: usize) -> Result { + let mut len = 0; + + loop { + match unistd::write(fd, &buf[len..]) { + Ok(l) => { + len += l; + if len == count { + break; + } + } + + Err(e) => { + if e != Error::from_errno(Errno::EINTR) { + return Err(e.into()); + } + } + } + } + + Ok(len) +} + +fn read_count(fd: RawFd, count: usize) -> Result> { + let mut v: Vec = vec![0; count]; + let mut len = 0; + + loop { + match unistd::read(fd, &mut v[len..]) { + Ok(l) => { + len += l; + if len == count || l == 0 { + break; + } + } + + Err(e) => { + if e != Error::from_errno(Errno::EINTR) { + return Err(e.into()); + } + } + } + } + + Ok(v[0..len].to_vec()) +} + +pub fn read_sync(fd: RawFd) -> Result> { + let buf = read_count(fd, MSG_SIZE)?; + if buf.len() != MSG_SIZE { + return Err(ErrorKind::ErrorCode(format!( + "process: {} failed to receive sync message from peer: got msg length: {}, expected: {}", + std::process::id(), + buf.len(), + MSG_SIZE + )) + .into()); + } + let buf_array: [u8; MSG_SIZE] = [buf[0], buf[1], buf[2], buf[3]]; + let msg: i32 = i32::from_be_bytes(buf_array); + match msg { + SYNC_SUCCESS => return Ok(Vec::new()), + SYNC_DATA => { + let buf = read_count(fd, MSG_SIZE)?; + let buf_array: [u8; MSG_SIZE] = [buf[0], buf[1], buf[2], buf[3]]; + let msg_length: i32 = i32::from_be_bytes(buf_array); + let data_buf = read_count(fd, msg_length as usize)?; + + return Ok(data_buf); + } + SYNC_FAILED => { + let mut error_buf = vec![]; + loop { + let buf = read_count(fd, DATA_SIZE)?; + + error_buf.extend(&buf); + if DATA_SIZE == buf.len() { + continue; + } else { + break; + } + } + + let error_str = match std::str::from_utf8(&error_buf) { + Ok(v) => v, + Err(e) => { + return Err(ErrorKind::ErrorCode(format!( + "receive error message from child process failed: {:?}", + e + )) + .into()) + } + }; + + return Err(ErrorKind::ErrorCode(String::from(error_str)).into()); + } + _ => return Err(ErrorKind::ErrorCode("error in receive sync message".to_string()).into()), + } +} + +pub fn write_sync(fd: RawFd, msg_type: i32, data_str: &str) -> Result<()> { + let buf = msg_type.to_be_bytes(); + + let count = write_count(fd, &buf, MSG_SIZE)?; + if count != MSG_SIZE { + return Err(ErrorKind::ErrorCode("error in send sync message".to_string()).into()); + } + + match msg_type { + SYNC_FAILED => match write_count(fd, data_str.as_bytes(), data_str.len()) { + Ok(_count) => unistd::close(fd)?, + Err(e) => { + unistd::close(fd)?; + return Err( + ErrorKind::ErrorCode("error in send message to process".to_string()).into(), + ); + } + }, + SYNC_DATA => { + let length: i32 = data_str.len() as i32; + match write_count(fd, &length.to_be_bytes(), MSG_SIZE) { + Ok(_count) => (), + Err(e) => { + unistd::close(fd)?; + return Err(ErrorKind::ErrorCode( + "error in send message to process".to_string(), + ) + .into()); + } + } + + match write_count(fd, data_str.as_bytes(), data_str.len()) { + Ok(_count) => (), + Err(e) => { + unistd::close(fd)?; + return Err(ErrorKind::ErrorCode( + "error in send message to process".to_string(), + ) + .into()); + } + } + } + + _ => (), + }; + + Ok(()) +} diff --git a/src/agent/rustjail/src/validator.rs b/src/agent/rustjail/src/validator.rs index 76777eb34..3e06398d8 100644 --- a/src/agent/rustjail/src/validator.rs +++ b/src/agent/rustjail/src/validator.rs @@ -1,9 +1,15 @@ +// Copyright (c) 2019 Ant Financial +// +// SPDX-License-Identifier: Apache-2.0 +// + use crate::container::Config; use crate::errors::*; use lazy_static; use nix::errno::Errno; use nix::Error; use oci::{LinuxIDMapping, LinuxNamespace, Spec}; +use protobuf::RepeatedField; use std::collections::HashMap; use std::path::{Component, PathBuf}; diff --git a/src/agent/src/device.rs b/src/agent/src/device.rs index 72823a5c4..65c6d0201 100644 --- a/src/agent/src/device.rs +++ b/src/agent/src/device.rs @@ -14,8 +14,8 @@ use crate::linux_abi::*; use crate::mount::{DRIVERBLKTYPE, DRIVERMMIOBLKTYPE, DRIVERNVDIMMTYPE, DRIVERSCSITYPE}; use crate::sandbox::Sandbox; use crate::{AGENT_CONFIG, GLOBAL_DEVICE_WATCHER}; -use protocols::agent::Device; use oci::Spec; +use protocols::agent::Device; use rustjail::errors::*; // Convenience macro to obtain the scope logger diff --git a/src/agent/src/grpc.rs b/src/agent/src/grpc.rs index b010b449c..1a8fc5886 100644 --- a/src/agent/src/grpc.rs +++ b/src/agent/src/grpc.rs @@ -578,11 +578,11 @@ impl protocols::agent_grpc::AgentService for agentService { req: protocols::agent::ExecProcessRequest, sink: ::grpcio::UnarySink, ) { - if let Err(_) = self.do_exec_process(req) { + if let Err(e) = self.do_exec_process(req) { let f = sink .fail(RpcStatus::new( RpcStatusCode::Internal, - Some(String::from("fail to exec process!")), + Some(format!("{}", e)), )) .map_err(|_e| error!(sl!(), "fail to exec process!")); ctx.spawn(f); diff --git a/src/agent/src/main.rs b/src/agent/src/main.rs index dab89daa0..41a29fc1a 100644 --- a/src/agent/src/main.rs +++ b/src/agent/src/main.rs @@ -10,14 +10,14 @@ #![allow(non_snake_case)] #[macro_use] extern crate lazy_static; +extern crate oci; extern crate prctl; extern crate protocols; extern crate regex; extern crate rustjail; +extern crate scan_fmt; extern crate serde_json; extern crate signal_hook; -extern crate scan_fmt; -extern crate oci; #[macro_use] extern crate scopeguard; @@ -100,6 +100,10 @@ fn announce(logger: &Logger) { fn main() -> Result<()> { let args: Vec = env::args().collect(); + if args.len() == 2 && args[1] == "init" { + rustjail::container::init_child(); + exit(0); + } env::set_var("RUST_BACKTRACE", "full");