From 08d80c1aaad735ba617f88be6fa6f7c1f786e08e Mon Sep 17 00:00:00 2001 From: David Gibson Date: Thu, 8 Oct 2020 16:13:14 +1100 Subject: [PATCH 1/4] agent/device: update_spec_device_list() should error if dev not found If update_spec_device_list() is given a device that can't be found in the OCI spec, it currently does nothing, and returns Ok(()). That doesn't seem like what we'd expect and is not what the Go agent in Kata 1 does. Change it to return an error in that case, like Kata 1. Signed-off-by: David Gibson --- src/agent/src/device.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/agent/src/device.rs b/src/agent/src/device.rs index 5dbb16615..8b7e60412 100644 --- a/src/agent/src/device.rs +++ b/src/agent/src/device.rs @@ -262,10 +262,14 @@ fn update_spec_device_list(device: &Device, spec: &mut Spec) -> Result<()> { } } } + return Ok(()); } } - Ok(()) + Err(anyhow!( + "Should have found a matching device {} in the spec", + device.vm_path + )) } // device.Id should be the predicted device name (vda, vdb, ...) From 2477c355bc8c76d15006ada5dc2ab657e29c9064 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Thu, 8 Oct 2020 16:23:36 +1100 Subject: [PATCH 2/4] agent/device: Forward port update_spec_device_list() unit test The Kata 1 Go agent included a unit test for updateSpecDeviceList, but no such unit test exists for the Rust agent's equivalent update_spec_device_list(). Port the Kata1 test to Rust. Signed-off-by: David Gibson --- src/agent/src/device.rs | 66 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/src/agent/src/device.rs b/src/agent/src/device.rs index 8b7e60412..6e5d8815b 100644 --- a/src/agent/src/device.rs +++ b/src/agent/src/device.rs @@ -417,4 +417,70 @@ mod tests { assert_eq!(devices[0].major, Some(major)); assert_eq!(devices[0].minor, Some(minor)); } + + #[test] + fn test_update_spec_device_list() { + let (major, minor) = (7, 2); + let mut device = Device::default(); + let mut spec = Spec::default(); + + // container_path empty + let res = update_spec_device_list(&device, &mut spec); + assert!(res.is_err()); + + device.container_path = "/dev/null".to_string(); + + // linux is empty + let res = update_spec_device_list(&device, &mut spec); + assert!(res.is_err()); + + spec.linux = Some(Linux::default()); + + // linux.devices is empty + let res = update_spec_device_list(&device, &mut spec); + assert!(res.is_err()); + + spec.linux.as_mut().unwrap().devices = vec![oci::LinuxDevice { + path: "/dev/null2".to_string(), + major, + minor, + ..oci::LinuxDevice::default() + }]; + + // vm_path empty + let res = update_spec_device_list(&device, &mut spec); + assert!(res.is_err()); + + device.vm_path = "/dev/null".to_string(); + + // guest and host path are not the same + let res = update_spec_device_list(&device, &mut spec); + assert!(res.is_err(), "device={:?} spec={:?}", device, spec); + + spec.linux.as_mut().unwrap().devices[0].path = device.container_path.clone(); + + // spec.linux.resources is empty + let res = update_spec_device_list(&device, &mut spec); + assert!(res.is_ok()); + + // update both devices and cgroup lists + spec.linux.as_mut().unwrap().devices = vec![oci::LinuxDevice { + path: device.container_path.clone(), + major, + minor, + ..oci::LinuxDevice::default() + }]; + + spec.linux.as_mut().unwrap().resources = Some(oci::LinuxResources { + devices: vec![oci::LinuxDeviceCgroup { + major: Some(major), + minor: Some(minor), + ..oci::LinuxDeviceCgroup::default() + }], + ..oci::LinuxResources::default() + }); + + let res = update_spec_device_list(&device, &mut spec); + assert!(res.is_ok()); + } } From 859301b0097f50a0f325df1aa48210acfa73c630 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Thu, 8 Oct 2020 17:01:29 +1100 Subject: [PATCH 3/4] agent/device: Index all devices in spec before updating them The agent needs to update device entries in the OCI spec so that it has the correct major:minor numbers for the guest, which may differ from the host. Entries in the main device list are looked up by device path, but entries in the device resources list are looked up by (host) major:minor. This is done one device at a time, updating as we go in update_spec_device_list(). But since the host and guest have different namespaces, one device might have the same major:minor as a different device on the host. In that case we could update one resource entry to the correct guest values, then mistakenly update it again because it now matches a different host device. To avoid this, rather than looking up and updating one by one, we make all the lookups in advance, creating a map from (host) device path to the indices in the spec where the device and resource entries can be found. Port from the Go agent in Kata 1, https://github.com/kata-containers/agent/commit/d88d46849130 Fixes: #703 Signed-off-by: David Gibson --- src/agent/src/device.rs | 258 ++++++++++++++++++++++++++++++++-------- 1 file changed, 207 insertions(+), 51 deletions(-) diff --git a/src/agent/src/device.rs b/src/agent/src/device.rs index 6e5d8815b..89433d34e 100644 --- a/src/agent/src/device.rs +++ b/src/agent/src/device.rs @@ -28,8 +28,15 @@ macro_rules! sl { const VM_ROOTFS: &str = "/"; +struct DevIndexEntry { + idx: usize, + residx: Vec, +} + +struct DevIndex(HashMap); + // DeviceHandler is the type of callback to be defined to handle every type of device driver. -type DeviceHandler = fn(&Device, &mut Spec, &Arc>) -> Result<()>; +type DeviceHandler = fn(&Device, &mut Spec, &Arc>, &DevIndex) -> Result<()>; // DeviceHandlerList lists the supported drivers. #[cfg_attr(rustfmt, rustfmt_skip)] @@ -194,7 +201,7 @@ fn scan_scsi_bus(scsi_addr: &str) -> Result<()> { // the same device in the list of devices provided through the OCI spec. // This is needed to update information about minor/major numbers that cannot // be predicted from the caller. -fn update_spec_device_list(device: &Device, spec: &mut Spec) -> Result<()> { +fn update_spec_device_list(device: &Device, spec: &mut Spec, devidx: &DevIndex) -> Result<()> { let major_id: c_uint; let minor_id: c_uint; @@ -228,48 +235,44 @@ fn update_spec_device_list(device: &Device, spec: &mut Spec) -> Result<()> { "got the device: dev_path: {}, major: {}, minor: {}\n", &device.vm_path, major_id, minor_id ); - let devices = linux.devices.as_mut_slice(); - for dev in devices.iter_mut() { - if dev.path == device.container_path { - let host_major = dev.major; - let host_minor = dev.minor; + if let Some(idxdata) = devidx.0.get(device.container_path.as_str()) { + let dev = &mut linux.devices[idxdata.idx]; + let host_major = dev.major; + let host_minor = dev.minor; - dev.major = major_id as i64; - dev.minor = minor_id as i64; + dev.major = major_id as i64; + dev.minor = minor_id as i64; + + info!( + sl!(), + "change the device from major: {} minor: {} to vm device major: {} minor: {}", + host_major, + host_minor, + major_id, + minor_id + ); + + // Resources must be updated since they are used to identify + // the device in the devices cgroup. + for ridx in &idxdata.residx { + // unwrap is safe, because residx would be empty if there + // were no resources + let res = &mut linux.resources.as_mut().unwrap().devices[*ridx]; + res.major = Some(major_id as i64); + res.minor = Some(minor_id as i64); info!( sl!(), - "change the device from major: {} minor: {} to vm device major: {} minor: {}", - host_major, - host_minor, - major_id, - minor_id + "set resources for device major: {} minor: {}\n", major_id, minor_id ); - - // Resources must be updated since they are used to identify the - // device in the devices cgroup. - if let Some(res) = linux.resources.as_mut() { - let ds = res.devices.as_mut_slice(); - for d in ds.iter_mut() { - if d.major == Some(host_major) && d.minor == Some(host_minor) { - d.major = Some(major_id as i64); - d.minor = Some(minor_id as i64); - - info!( - sl!(), - "set resources for device major: {} minor: {}\n", major_id, minor_id - ); - } - } - } - return Ok(()); } + Ok(()) + } else { + Err(anyhow!( + "Should have found a matching device {} in the spec", + device.vm_path + )) } - - Err(anyhow!( - "Should have found a matching device {} in the spec", - device.vm_path - )) } // device.Id should be the predicted device name (vda, vdb, ...) @@ -278,12 +281,13 @@ fn virtiommio_blk_device_handler( device: &Device, spec: &mut Spec, _sandbox: &Arc>, + devidx: &DevIndex, ) -> Result<()> { if device.vm_path == "" { return Err(anyhow!("Invalid path for virtio mmio blk device")); } - update_spec_device_list(device, spec) + update_spec_device_list(device, spec, devidx) } // device.Id should be the PCI address in the format "bridgeAddr/deviceAddr". @@ -293,6 +297,7 @@ fn virtio_blk_device_handler( device: &Device, spec: &mut Spec, sandbox: &Arc>, + devidx: &DevIndex, ) -> Result<()> { let mut dev = device.clone(); @@ -302,7 +307,7 @@ fn virtio_blk_device_handler( dev.vm_path = get_pci_device_name(sandbox, &device.id)?; } - update_spec_device_list(&dev, spec) + update_spec_device_list(&dev, spec, devidx) } // device.Id should be the SCSI address of the disk in the format "scsiID:lunID" @@ -310,22 +315,46 @@ fn virtio_scsi_device_handler( device: &Device, spec: &mut Spec, sandbox: &Arc>, + devidx: &DevIndex, ) -> Result<()> { let mut dev = device.clone(); dev.vm_path = get_scsi_device_name(sandbox, &device.id)?; - update_spec_device_list(&dev, spec) + update_spec_device_list(&dev, spec, devidx) } fn virtio_nvdimm_device_handler( device: &Device, spec: &mut Spec, _sandbox: &Arc>, + devidx: &DevIndex, ) -> Result<()> { if device.vm_path == "" { return Err(anyhow!("Invalid path for nvdimm device")); } - update_spec_device_list(device, spec) + update_spec_device_list(device, spec, devidx) +} + +impl DevIndex { + fn new(spec: &Spec) -> DevIndex { + let mut map = HashMap::new(); + + for linux in spec.linux.as_ref() { + for (i, d) in linux.devices.iter().enumerate() { + let mut residx = Vec::new(); + + for linuxres in linux.resources.as_ref() { + for (j, r) in linuxres.devices.iter().enumerate() { + if r.major == Some(d.major) && r.minor == Some(d.minor) { + residx.push(j); + } + } + } + map.insert(d.path.clone(), DevIndexEntry { idx: i, residx }); + } + } + DevIndex(map) + } } pub fn add_devices( @@ -333,14 +362,21 @@ pub fn add_devices( spec: &mut Spec, sandbox: &Arc>, ) -> Result<()> { + let devidx = DevIndex::new(spec); + for device in devices.iter() { - add_device(device, spec, sandbox)?; + add_device(device, spec, sandbox, &devidx)?; } Ok(()) } -fn add_device(device: &Device, spec: &mut Spec, sandbox: &Arc>) -> Result<()> { +fn add_device( + device: &Device, + spec: &mut Spec, + sandbox: &Arc>, + devidx: &DevIndex, +) -> Result<()> { // log before validation to help with debugging gRPC protocol version differences. info!(sl!(), "device-id: {}, device-type: {}, device-vm-path: {}, device-container-path: {}, device-options: {:?}", device.id, device.field_type, device.vm_path, device.container_path, device.options); @@ -359,7 +395,7 @@ fn add_device(device: &Device, spec: &mut Spec, sandbox: &Arc>) - match DEVICEHANDLERLIST.get(device.field_type.as_str()) { None => Err(anyhow!("Unknown device type {}", device.field_type)), - Some(dev_handler) => dev_handler(device, spec, sandbox), + Some(dev_handler) => dev_handler(device, spec, sandbox, devidx), } } @@ -425,19 +461,22 @@ mod tests { let mut spec = Spec::default(); // container_path empty - let res = update_spec_device_list(&device, &mut spec); + let devidx = DevIndex::new(&spec); + let res = update_spec_device_list(&device, &mut spec, &devidx); assert!(res.is_err()); device.container_path = "/dev/null".to_string(); // linux is empty - let res = update_spec_device_list(&device, &mut spec); + let devidx = DevIndex::new(&spec); + let res = update_spec_device_list(&device, &mut spec, &devidx); assert!(res.is_err()); spec.linux = Some(Linux::default()); // linux.devices is empty - let res = update_spec_device_list(&device, &mut spec); + let devidx = DevIndex::new(&spec); + let res = update_spec_device_list(&device, &mut spec, &devidx); assert!(res.is_err()); spec.linux.as_mut().unwrap().devices = vec![oci::LinuxDevice { @@ -448,19 +487,22 @@ mod tests { }]; // vm_path empty - let res = update_spec_device_list(&device, &mut spec); + let devidx = DevIndex::new(&spec); + let res = update_spec_device_list(&device, &mut spec, &devidx); assert!(res.is_err()); device.vm_path = "/dev/null".to_string(); // guest and host path are not the same - let res = update_spec_device_list(&device, &mut spec); + let devidx = DevIndex::new(&spec); + let res = update_spec_device_list(&device, &mut spec, &devidx); assert!(res.is_err(), "device={:?} spec={:?}", device, spec); spec.linux.as_mut().unwrap().devices[0].path = device.container_path.clone(); // spec.linux.resources is empty - let res = update_spec_device_list(&device, &mut spec); + let devidx = DevIndex::new(&spec); + let res = update_spec_device_list(&device, &mut spec, &devidx); assert!(res.is_ok()); // update both devices and cgroup lists @@ -480,7 +522,121 @@ mod tests { ..oci::LinuxResources::default() }); - let res = update_spec_device_list(&device, &mut spec); + let devidx = DevIndex::new(&spec); + let res = update_spec_device_list(&device, &mut spec, &devidx); assert!(res.is_ok()); } + + #[test] + fn test_update_spec_device_list_guest_host_conflict() { + let null_rdev = fs::metadata("/dev/null").unwrap().rdev(); + let zero_rdev = fs::metadata("/dev/zero").unwrap().rdev(); + let full_rdev = fs::metadata("/dev/full").unwrap().rdev(); + + let host_major_a = stat::major(null_rdev) as i64; + let host_minor_a = stat::minor(null_rdev) as i64; + let host_major_b = stat::major(zero_rdev) as i64; + let host_minor_b = stat::minor(zero_rdev) as i64; + + let mut spec = Spec { + linux: Some(Linux { + devices: vec![ + oci::LinuxDevice { + path: "/dev/a".to_string(), + r#type: "c".to_string(), + major: host_major_a, + minor: host_minor_a, + ..oci::LinuxDevice::default() + }, + oci::LinuxDevice { + path: "/dev/b".to_string(), + r#type: "c".to_string(), + major: host_major_b, + minor: host_minor_b, + ..oci::LinuxDevice::default() + }, + ], + resources: Some(LinuxResources { + devices: vec![ + oci::LinuxDeviceCgroup { + r#type: "c".to_string(), + major: Some(host_major_a), + minor: Some(host_minor_a), + ..oci::LinuxDeviceCgroup::default() + }, + oci::LinuxDeviceCgroup { + r#type: "c".to_string(), + major: Some(host_major_b), + minor: Some(host_minor_b), + ..oci::LinuxDeviceCgroup::default() + }, + ], + ..LinuxResources::default() + }), + ..Linux::default() + }), + ..Spec::default() + }; + let devidx = DevIndex::new(&spec); + + let dev_a = Device { + container_path: "/dev/a".to_string(), + vm_path: "/dev/zero".to_string(), + ..Device::default() + }; + + let guest_major_a = stat::major(zero_rdev) as i64; + let guest_minor_a = stat::minor(zero_rdev) as i64; + + let dev_b = Device { + container_path: "/dev/b".to_string(), + vm_path: "/dev/full".to_string(), + ..Device::default() + }; + + let guest_major_b = stat::major(full_rdev) as i64; + let guest_minor_b = stat::minor(full_rdev) as i64; + + let specdevices = &spec.linux.as_ref().unwrap().devices; + assert_eq!(host_major_a, specdevices[0].major); + assert_eq!(host_minor_a, specdevices[0].minor); + assert_eq!(host_major_b, specdevices[1].major); + assert_eq!(host_minor_b, specdevices[1].minor); + + let specresources = spec.linux.as_ref().unwrap().resources.as_ref().unwrap(); + assert_eq!(Some(host_major_a), specresources.devices[0].major); + assert_eq!(Some(host_minor_a), specresources.devices[0].minor); + assert_eq!(Some(host_major_b), specresources.devices[1].major); + assert_eq!(Some(host_minor_b), specresources.devices[1].minor); + + let res = update_spec_device_list(&dev_a, &mut spec, &devidx); + assert!(res.is_ok()); + + let specdevices = &spec.linux.as_ref().unwrap().devices; + assert_eq!(guest_major_a, specdevices[0].major); + assert_eq!(guest_minor_a, specdevices[0].minor); + assert_eq!(host_major_b, specdevices[1].major); + assert_eq!(host_minor_b, specdevices[1].minor); + + let specresources = spec.linux.as_ref().unwrap().resources.as_ref().unwrap(); + assert_eq!(Some(guest_major_a), specresources.devices[0].major); + assert_eq!(Some(guest_minor_a), specresources.devices[0].minor); + assert_eq!(Some(host_major_b), specresources.devices[1].major); + assert_eq!(Some(host_minor_b), specresources.devices[1].minor); + + let res = update_spec_device_list(&dev_b, &mut spec, &devidx); + assert!(res.is_ok()); + + let specdevices = &spec.linux.as_ref().unwrap().devices; + assert_eq!(guest_major_a, specdevices[0].major); + assert_eq!(guest_minor_a, specdevices[0].minor); + assert_eq!(guest_major_b, specdevices[1].major); + assert_eq!(guest_minor_b, specdevices[1].minor); + + let specresources = spec.linux.as_ref().unwrap().resources.as_ref().unwrap(); + assert_eq!(Some(guest_major_a), specresources.devices[0].major); + assert_eq!(Some(guest_minor_a), specresources.devices[0].minor); + assert_eq!(Some(guest_major_b), specresources.devices[1].major); + assert_eq!(Some(guest_minor_b), specresources.devices[1].minor); + } } From ae6b8ec747000af468fbe71af5eb897a8dbd7fd2 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Thu, 8 Oct 2020 17:15:14 +1100 Subject: [PATCH 4/4] agent/device: Check type as well as major:minor when looking up devices To update device resource entries from host to guest, we search for the right entry by host major:minor numbers, then later update it. However block and character devices exist in separate major:minor namespaces so we could have one block and one character device with matching major:minor and thus incorrectly update both with the details for whichever device is processed second. Add a check on device type to prevent this. Port from the Kata 1 Go agent https://github.com/kata-containers/agent/commit/27ebdc9d2761 Fixes: #703 Signed-off-by: David Gibson --- src/agent/src/device.rs | 78 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 77 insertions(+), 1 deletion(-) diff --git a/src/agent/src/device.rs b/src/agent/src/device.rs index 89433d34e..18f53e0c9 100644 --- a/src/agent/src/device.rs +++ b/src/agent/src/device.rs @@ -345,7 +345,10 @@ impl DevIndex { for linuxres in linux.resources.as_ref() { for (j, r) in linuxres.devices.iter().enumerate() { - if r.major == Some(d.major) && r.minor == Some(d.minor) { + if r.r#type == d.r#type + && r.major == Some(d.major) + && r.minor == Some(d.minor) + { residx.push(j); } } @@ -639,4 +642,77 @@ mod tests { assert_eq!(Some(guest_major_b), specresources.devices[1].major); assert_eq!(Some(guest_minor_b), specresources.devices[1].minor); } + + #[test] + fn test_update_spec_device_list_char_block_conflict() { + let null_rdev = fs::metadata("/dev/null").unwrap().rdev(); + + let guest_major = stat::major(null_rdev) as i64; + let guest_minor = stat::minor(null_rdev) as i64; + let host_major: i64 = 99; + let host_minor: i64 = 99; + + let mut spec = Spec { + linux: Some(Linux { + devices: vec![ + oci::LinuxDevice { + path: "/dev/char".to_string(), + r#type: "c".to_string(), + major: host_major, + minor: host_minor, + ..oci::LinuxDevice::default() + }, + oci::LinuxDevice { + path: "/dev/block".to_string(), + r#type: "b".to_string(), + major: host_major, + minor: host_minor, + ..oci::LinuxDevice::default() + }, + ], + resources: Some(LinuxResources { + devices: vec![ + LinuxDeviceCgroup { + r#type: "c".to_string(), + major: Some(host_major), + minor: Some(host_minor), + ..LinuxDeviceCgroup::default() + }, + LinuxDeviceCgroup { + r#type: "b".to_string(), + major: Some(host_major), + minor: Some(host_minor), + ..LinuxDeviceCgroup::default() + }, + ], + ..LinuxResources::default() + }), + ..Linux::default() + }), + ..Spec::default() + }; + let devidx = DevIndex::new(&spec); + + let dev = Device { + container_path: "/dev/char".to_string(), + vm_path: "/dev/null".to_string(), + ..Device::default() + }; + + let specresources = spec.linux.as_ref().unwrap().resources.as_ref().unwrap(); + assert_eq!(Some(host_major), specresources.devices[0].major); + assert_eq!(Some(host_minor), specresources.devices[0].minor); + assert_eq!(Some(host_major), specresources.devices[1].major); + assert_eq!(Some(host_minor), specresources.devices[1].minor); + + let res = update_spec_device_list(&dev, &mut spec, &devidx); + assert!(res.is_ok()); + + // Only the char device, not the block device should be updated + let specresources = spec.linux.as_ref().unwrap().resources.as_ref().unwrap(); + assert_eq!(Some(guest_major), specresources.devices[0].major); + assert_eq!(Some(guest_minor), specresources.devices[0].minor); + assert_eq!(Some(host_major), specresources.devices[1].major); + assert_eq!(Some(host_minor), specresources.devices[1].minor); + } }