From 0b2bd64124425817d7b7b7ad36054070aa83e4f6 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Fri, 11 Feb 2022 11:53:00 +1100 Subject: [PATCH 1/3] device: Rework update_spec_pci() to update_env_pci() This function updates PCIDEVICE_ environment variables (such as those supplied by the Kubernetes SR-IOV plugin) in the OCI spec to be correct for the Kata VM, rather than for the host. We neglected to actually call this function, however, and it turns out that when we do, we need to do things slightly different. We actually need to adjust envionment variables both in the OCI spec when creating a container and also in the variables supplied for exec-ing a new process within an existing container. Adjust the function so that it can be used for both these cases. Signed-off-by: David Gibson --- src/agent/src/device.rs | 76 ++++++++++++++++++----------------------- 1 file changed, 33 insertions(+), 43 deletions(-) diff --git a/src/agent/src/device.rs b/src/agent/src/device.rs index 629cc3fc2..813c4d569 100644 --- a/src/agent/src/device.rs +++ b/src/agent/src/device.rs @@ -592,38 +592,35 @@ fn update_spec_devices(spec: &mut Spec, mut updates: HashMap<&str, DevUpdate>) - Ok(()) } -// update_spec_pci PCI addresses in the OCI spec to be guest addresses -// instead of host addresses. It is given a map of (host address => -// guest address) +// update_env_pci alters PCI addresses in a set of environment +// variables to be correct for the VM instead of the host. It is +// given a map of (host address => guest address) #[instrument] -fn update_spec_pci(spec: &mut Spec, updates: HashMap) -> Result<()> { - // Correct PCI addresses in the environment - if let Some(process) = spec.process.as_mut() { - for envvar in process.env.iter_mut() { - let eqpos = envvar - .find('=') - .ok_or_else(|| anyhow!("Malformed OCI env entry {:?}", envvar))?; +fn update_env_pci(env: &mut [String], pcimap: &HashMap) -> Result<()> { + for envvar in env { + let eqpos = envvar + .find('=') + .ok_or_else(|| anyhow!("Malformed OCI env entry {:?}", envvar))?; - let (name, eqval) = envvar.split_at(eqpos); - let val = &eqval[1..]; + let (name, eqval) = envvar.split_at(eqpos); + let val = &eqval[1..]; - if !name.starts_with("PCIDEVICE_") { - continue; - } - - let mut guest_addrs = Vec::::new(); - - for host_addr in val.split(',') { - let host_addr = pci::Address::from_str(host_addr) - .with_context(|| format!("Can't parse {} environment variable", name))?; - let guest_addr = updates - .get(&host_addr) - .ok_or_else(|| anyhow!("Unable to translate host PCI address {}", host_addr))?; - guest_addrs.push(format!("{}", guest_addr)); - } - - envvar.replace_range(eqpos + 1.., guest_addrs.join(",").as_str()); + if !name.starts_with("PCIDEVICE_") { + continue; } + + let mut guest_addrs = Vec::::new(); + + for host_addr in val.split(',') { + let host_addr = pci::Address::from_str(host_addr) + .with_context(|| format!("Can't parse {} environment variable", name))?; + let guest_addr = pcimap + .get(&host_addr) + .ok_or_else(|| anyhow!("Unable to translate host PCI address {}", host_addr))?; + guest_addrs.push(format!("{}", guest_addr)); + } + + envvar.replace_range(eqpos + 1.., guest_addrs.join(",").as_str()); } Ok(()) @@ -860,7 +857,7 @@ pub fn update_device_cgroup(spec: &mut Spec) -> Result<()> { mod tests { use super::*; use crate::uevent::spawn_test_watcher; - use oci::{Linux, Process}; + use oci::Linux; use std::iter::FromIterator; use tempfile::tempdir; @@ -1199,7 +1196,7 @@ mod tests { } #[test] - fn test_update_spec_pci() { + fn test_update_env_pci() { let example_map = [ // Each is a host,guest pair of pci addresses ("0000:1a:01.0", "0000:01:01.0"), @@ -1209,17 +1206,11 @@ mod tests { ("0000:01:01.0", "ffff:02:1f.7"), ]; - let mut spec = Spec { - process: Some(Process { - env: vec![ - "PCIDEVICE_x=0000:1a:01.0,0000:1b:02.0".to_string(), - "PCIDEVICE_y=0000:01:01.0".to_string(), - "NOTAPCIDEVICE_blah=abcd:ef:01.0".to_string(), - ], - ..Process::default() - }), - ..Spec::default() - }; + let mut env = vec![ + "PCIDEVICE_x=0000:1a:01.0,0000:1b:02.0".to_string(), + "PCIDEVICE_y=0000:01:01.0".to_string(), + "NOTAPCIDEVICE_blah=abcd:ef:01.0".to_string(), + ]; let pci_fixups = example_map .iter() @@ -1231,10 +1222,9 @@ mod tests { }) .collect(); - let res = update_spec_pci(&mut spec, pci_fixups); + let res = update_env_pci(&mut env, &pci_fixups); assert!(res.is_ok()); - let env = &spec.process.as_ref().unwrap().env; assert_eq!(env[0], "PCIDEVICE_x=0000:01:01.0,0000:01:02.0"); assert_eq!(env[1], "PCIDEVICE_y=ffff:02:1f.7"); assert_eq!(env[2], "NOTAPCIDEVICE_blah=abcd:ef:01.0"); From 7b7f426a3f5d1838cedc3dc9520035edb2cc6995 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Fri, 11 Feb 2022 12:07:21 +1100 Subject: [PATCH 2/3] device: Keep host to VM PCI mapping persistently add_devices() generates a mapping of host to guest PCI addresses which is used to update some environment variables for the workload. Currently it just does this locally, but it turns out we're going to need the same map again in order to correct environment variables for processes exec-ed into the existing container. Move the map to the sandbox structure so we can keep it around for those later uses. Signed-off-by: David Gibson --- src/agent/src/device.rs | 4 ++-- src/agent/src/sandbox.rs | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/agent/src/device.rs b/src/agent/src/device.rs index 813c4d569..ad181141c 100644 --- a/src/agent/src/device.rs +++ b/src/agent/src/device.rs @@ -765,7 +765,6 @@ pub async fn add_devices( sandbox: &Arc>, ) -> Result<()> { let mut dev_updates = HashMap::<&str, DevUpdate>::with_capacity(devices.len()); - let mut pci_updates = HashMap::::new(); for device in devices.iter() { let update = add_device(device, sandbox).await?; @@ -780,8 +779,9 @@ pub async fn add_devices( )); } + let mut sb = sandbox.lock().await; for (host, guest) in update.pci { - if let Some(other_guest) = pci_updates.insert(host, guest) { + if let Some(other_guest) = sb.pcimap.insert(host, guest) { return Err(anyhow!( "Conflicting guest address for host device {} ({} versus {})", host, diff --git a/src/agent/src/sandbox.rs b/src/agent/src/sandbox.rs index 4dfb2eda5..3edbe7ea0 100644 --- a/src/agent/src/sandbox.rs +++ b/src/agent/src/sandbox.rs @@ -8,6 +8,7 @@ use crate::mount::{get_mount_fs_type, remove_mounts, TYPE_ROOTFS}; use crate::namespace::Namespace; use crate::netlink::Handle; use crate::network::Network; +use crate::pci; use crate::uevent::{Uevent, UeventMatcher}; use crate::watcher::BindWatcher; use anyhow::{anyhow, Context, Result}; @@ -56,6 +57,7 @@ pub struct Sandbox { pub event_rx: Arc>>, pub event_tx: Option>, pub bind_watcher: BindWatcher, + pub pcimap: HashMap, } impl Sandbox { @@ -88,6 +90,7 @@ impl Sandbox { event_rx, event_tx: Some(tx), bind_watcher: BindWatcher::new(), + pcimap: HashMap::new(), }) } From 9590874d9caac7537c5f3a13f797a2fbf5a7553b Mon Sep 17 00:00:00 2001 From: David Gibson Date: Fri, 11 Feb 2022 12:29:43 +1100 Subject: [PATCH 3/3] device: Update PCIDEVICE_ environment variables for the guest In commit 78dff468bf1 we introduced logic to rewrite PCIDEVICE_ environment variables for the container so that they contain correct addresses for the Kata VM rather than for the host. Unfortunately, we never actually invoked the function to do this. It turns out we need to do this not only at container creation time, but also for environment variables supplied to processes exec-ed into the container after creation (e.g. with crictl exec). Add calls to make both those updates. fixes #3634 Signed-off-by: David Gibson --- src/agent/src/device.rs | 8 +++++++- src/agent/src/rpc.rs | 9 +++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/agent/src/device.rs b/src/agent/src/device.rs index ad181141c..31c5c120b 100644 --- a/src/agent/src/device.rs +++ b/src/agent/src/device.rs @@ -596,7 +596,10 @@ fn update_spec_devices(spec: &mut Spec, mut updates: HashMap<&str, DevUpdate>) - // variables to be correct for the VM instead of the host. It is // given a map of (host address => guest address) #[instrument] -fn update_env_pci(env: &mut [String], pcimap: &HashMap) -> Result<()> { +pub fn update_env_pci( + env: &mut [String], + pcimap: &HashMap, +) -> Result<()> { for envvar in env { let eqpos = envvar .find('=') @@ -793,6 +796,9 @@ pub async fn add_devices( } } + if let Some(process) = spec.process.as_mut() { + update_env_pci(&mut process.env, &sandbox.lock().await.pcimap)? + } update_spec_devices(spec, dev_updates) } diff --git a/src/agent/src/rpc.rs b/src/agent/src/rpc.rs index 444be723c..276746b9f 100644 --- a/src/agent/src/rpc.rs +++ b/src/agent/src/rpc.rs @@ -43,7 +43,9 @@ use nix::sys::stat; use nix::unistd::{self, Pid}; use rustjail::process::ProcessOperations; -use crate::device::{add_devices, get_virtio_blk_pci_device_name, update_device_cgroup}; +use crate::device::{ + add_devices, get_virtio_blk_pci_device_name, update_device_cgroup, update_env_pci, +}; use crate::linux_abi::*; use crate::metrics::get_metrics; use crate::mount::{add_storages, baremount, remove_mounts, STORAGE_HANDLER_LIST}; @@ -359,11 +361,14 @@ impl AgentService { let s = self.sandbox.clone(); let mut sandbox = s.lock().await; - let process = req + let mut process = req .process .into_option() .ok_or_else(|| anyhow!(nix::Error::EINVAL))?; + // Apply any necessary corrections for PCI addresses + update_env_pci(&mut process.Env, &sandbox.pcimap)?; + let pipe_size = AGENT_CONFIG.read().await.container_pipe_size; let ocip = rustjail::process_grpc_to_oci(&process); let p = Process::new(&sl!(), &ocip, exec_id.as_str(), false, pipe_size)?;