From 5b3a349db58458fd6227d1ac828b7ad2c5679bc7 Mon Sep 17 00:00:00 2001 From: "James O. D. Hunt" Date: Mon, 11 Oct 2021 14:42:49 +0100 Subject: [PATCH 1/2] trace-forwarder: Support Hybrid VSOCK Add support for Hybrid VSOCK. Unlike standard vsock (`vsock(7)`), under hybrid VSOCK, the hypervisor creates a "master" *UNIX* socket on the host. For guest-initiated VSOCK connections (such as the Kata agent uses for agent tracing), the hypervisor will then attempt to open a VSOCK port-specific variant of the socket which it expects a server to be listening on. Running the trace forwarder with the new `--socket-path` option and passing it the Hypervisor specific master UNIX socket path, the trace forwarder will listen on the VSOCK port-specific socket path to handle Kata agent traces. For further details and examples, see the README or run the trace forwarder with `--help`. Fixes: #2786. Signed-off-by: James O. D. Hunt --- src/trace-forwarder/Makefile | 3 +- src/trace-forwarder/README.md | 126 +++++++++- src/trace-forwarder/src/handler.rs | 42 ++-- src/trace-forwarder/src/main.rs | 338 +++++++++++++++++++++------ src/trace-forwarder/src/server.rs | 119 +++++++--- src/trace-forwarder/src/utils.rs | 357 +++++++++++++++++++++++++++++ 6 files changed, 851 insertions(+), 134 deletions(-) create mode 100644 src/trace-forwarder/src/utils.rs diff --git a/src/trace-forwarder/Makefile b/src/trace-forwarder/Makefile index 2ae095a00..6cc28568e 100644 --- a/src/trace-forwarder/Makefile +++ b/src/trace-forwarder/Makefile @@ -1,4 +1,4 @@ -# Copyright (c) 2020 Intel Corporation +# Copyright (c) 2020-2021 Intel Corporation # # SPDX-License-Identifier: Apache-2.0 # @@ -17,6 +17,7 @@ vendor: cargo vendor test: + @cargo test --all -- --nocapture install: diff --git a/src/trace-forwarder/README.md b/src/trace-forwarder/README.md index 3cd36db44..8675a9644 100644 --- a/src/trace-forwarder/README.md +++ b/src/trace-forwarder/README.md @@ -8,13 +8,133 @@ which runs inside the virtual machine. The trace forwarder, which must be started before the agent, listens over VSOCK for trace data sent by the agent running inside the virtual machine. The -trace spans are exported to an OpenTelemetry collector (such as Jaeger) running by -default on the host. +trace spans are exported to an OpenTelemetry collector (such as Jaeger) +running by default on the host. + +## Quick start + +1. Start the OpenTelemetry collector (such as Jaeger). +1. [Start the trace forwarder](#run). +1. Ensure agent tracing is enabled in the Kata configuration file. +1. Create a Kata container as usual. + +## Run + +The way the trace forwarder is run depends on the configured hypervisor. + +### Determine configured hypervisor + +To identify which hypervisor Kata is configured to use, either look in the +configuration file, or run: + +```bash +$ kata-runtime env --json|jq '.Hypervisor.Path' +``` + +### QEMU + +Since QEMU supports VSOCK sockets in the standard way, if you are using QEMU +simply run the trace forwarder using the default options: + +#### Run the forwarder + +```bash +$ cargo run +``` + +You can now proceed to create a Kata container as normal. + +### Cloud Hypervisor and Firecracker + +Cloud Hypervisor and Firecracker both use "hybrid VSOCK" which uses a local +UNIX socket rather than the host kernel to handle communication with the +guest. As such, you need to specify the path to the UNIX socket. + +Since the trace forwarder needs to be run before the VM (sandbox) is started +and since the socket path is sandbox-specific, you need to run the `env` +command to determine the "template path". This path includes a `{ID}` tag that +represents the real sandbox ID or name. + +### Examples + +#### Configured hypervisor is Cloud Hypervisor + +```bash +$ socket_path=$(sudo kata-runtime env --json | jq '.Hypervisor.SocketPath') +$ echo "$socket_path" +"/run/vc/vm/{ID}/clh.sock" +``` + +#### Configured hypervisor is Firecracker + +```bash +$ socket_path=$(sudo kata-runtime env --json | jq '.Hypervisor.SocketPath') +$ echo "$socket_path" +"/run/vc/firecracker/{ID}/root/kata.hvsock" +``` + +> **Note:** +> +> Do not rely on the paths shown above: you should run the command yourself +> as these paths _may_ change. + +Once you have determined the template path, build and install the forwarder, +create the sandbox directory and then run the trace forwarder. + +#### Build and install + +If you are using the [QEMU hypervisor](#qemu), this step is not necessary. + +If you are using Cloud Hypervisor of Firecracker, using the tool is simpler if +it has been installed. + +##### Build + +```bash +$ make +``` + +##### Install + +```bash +$ cargo install --path . +$ sudo install -o root -g root -m 0755 ~/.cargo/bin/kata-trace-forwarder /usr/local/bin +``` + +#### Create sandbox directory + +You will need to change the `sandbox_id` variable below to match the name of +the container (sandbox) you plan to create _after_ starting the trace +forwarder. + +The `socket_path` variable was set in the +[Cloud Hypervisor and Firecracker](#cloud-hypervisor-and-firecracker) section. + +```bash +$ sandbox_id="foo" +$ sudo mkdir -p $(dirname "$socket_path") +``` + +#### Run the forwarder specifying socket path + +```bash +$ sudo kata-trace-forwarder --socket-path "$socket_path" +``` + +You can now proceed as normal to create the "foo" Kata container. + +> **Note:** +> +> Since the trace forwarder needs to create the socket in the sandbox +> directory, and since that directory is owned by the `root` user, the trace +> forwarder must also be run as `root`. This requirement is unique to +> hypervisors that use hybrid VSOCK: QEMU does not require special privileges +> to run the trace forwarder. ## Full details Run: -``` +```bash $ cargo run -- --help ``` diff --git a/src/trace-forwarder/src/handler.rs b/src/trace-forwarder/src/handler.rs index e7b465248..1ac76ec2d 100644 --- a/src/trace-forwarder/src/handler.rs +++ b/src/trace-forwarder/src/handler.rs @@ -1,15 +1,16 @@ -// Copyright (c) 2020 Intel Corporation +// Copyright (c) 2020-2021 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 // use anyhow::{anyhow, Context, Result}; use byteorder::{ByteOrder, NetworkEndian}; +use futures::executor::block_on; use opentelemetry::sdk::export::trace::{SpanData, SpanExporter}; use slog::{debug, info, o, Logger}; +use std::fs::File; use std::io::{ErrorKind, Read}; -use std::net::Shutdown; -use vsock::VsockStream; +use std::os::unix::io::{FromRawFd, RawFd}; // The VSOCK "packet" protocol used comprises two elements: // @@ -28,14 +29,13 @@ fn mk_io_err(msg: &str) -> std::io::Error { std::io::Error::new(std::io::ErrorKind::Other, msg.to_string()) } -pub async fn handle_connection<'a>( +async fn handle_async_connection<'a>( logger: Logger, - mut conn: VsockStream, + mut conn: &'a mut dyn Read, exporter: &'a mut dyn SpanExporter, dump_only: bool, ) -> Result<()> { - let logger = logger.new(o!("subsystem" => "handler", - "connection" => format!("{:?}", conn))); + let logger = logger.new(o!("subsystem" => "handler")); debug!(logger, "handling connection"); @@ -43,12 +43,7 @@ pub async fn handle_connection<'a>( .await .map_err(|e| mk_io_err(&format!("failed to handle data: {:}", e)))?; - debug!(&logger, "handled data"); - - conn.shutdown(Shutdown::Read) - .map_err(|e| mk_io_err(&format!("shutdown failed: {:}", e)))?; - - debug!(&logger, "shutdown connection"); + debug!(&logger, "handled connection"); Ok(()) } @@ -83,7 +78,7 @@ async fn handle_trace_data<'a>( reader .read_exact(&mut encoded_payload) - .with_context(|| format!("failed to read payload"))?; + .with_context(|| "failed to read payload")?; debug!(logger, "read payload"); @@ -95,9 +90,7 @@ async fn handle_trace_data<'a>( if dump_only { debug!(logger, "dump-only: {:?}", span_data); } else { - let mut batch = Vec::::new(); - - batch.push(span_data); + let batch = vec![span_data]; // Call low-level Jaeger exporter to send the trace span immediately. let result = exporter.export(batch).await; @@ -112,3 +105,18 @@ async fn handle_trace_data<'a>( Ok(()) } + +pub fn handle_connection( + logger: Logger, + fd: RawFd, + exporter: &mut dyn SpanExporter, + dump_only: bool, +) -> Result<()> { + let mut file = unsafe { File::from_raw_fd(fd) }; + + let conn = handle_async_connection(logger, &mut file, exporter, dump_only); + + block_on(conn)?; + + Ok(()) +} diff --git a/src/trace-forwarder/src/main.rs b/src/trace-forwarder/src/main.rs index 68346c64a..1eeab1c74 100644 --- a/src/trace-forwarder/src/main.rs +++ b/src/trace-forwarder/src/main.rs @@ -1,4 +1,4 @@ -// Copyright (c) 2020 Intel Corporation +// Copyright (c) 2020-2021 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 // @@ -14,13 +14,46 @@ use std::process::exit; // Traces will be created using this program name const DEFAULT_TRACE_NAME: &str = "kata-agent"; -const VSOCK_CID_ANY: &str = "any"; const ABOUT_TEXT: &str = "Kata Containers Trace Forwarder"; const DESCRIPTION_TEXT: &str = r#" DESCRIPTION: Kata Containers component that runs on the host and forwards - trace data from the container to a trace collector on the host."#; + trace data from the container to a trace collector on the host. + + This tool requires agent tracing to be enabled in the Kata + configuration file. It uses VSOCK to listen for trace data originating + from the Kata agent running inside the Kata Container. + + The variety of VSOCK used depends on the configuration hypervisor: + + |------------------------|--------------------|----------------| + | Hypervisor | Type of VSOCK | Run as user | + |------------------------|--------------------|----------------| + | Cloud Hypervisor (CLH) | Firecracker Hybrid | privileged | + |------------------------|--------------------|----------------| + | QEMU | Standard | non-privileged | + |------------------------|--------------------|----------------| + | Firecracker (FC) | Firecracker Hybrid | privileged | + |------------------------|--------------------|----------------| + + Key: + + - Firecracker Hybrid VSOCK: See the Firecracker + VSOCK documentation. + - Standard VSOCK: see vsock(7). + + The way this tool is run depends on the configured hypervisor. + See EXAMPLES for further information. + + Note that Hybrid VSOCK requries root privileges. Due to the way the + hybrid protocol works, the specified "master socket" itself is not used: to + communicate with the agent, this tool must generate a socket path using + the specified socket path as a prefix. Since the master socket will be + created in a root-owned directory when the Kata Containers VM (sandbox) is + created, this tool must be run as root to allow it to create the second + agent-specific socket. + "#; const DEFAULT_LOG_LEVEL: slog::Level = slog::Level::Info; @@ -35,6 +68,12 @@ const DEFAULT_JAEGER_PORT: &str = "6831"; mod handler; mod server; mod tracer; +mod utils; + +use crate::utils::{ + make_hybrid_socket_path, str_to_vsock_cid, str_to_vsock_port, VSOCK_CID_ANY_STR, +}; +use server::VsockType; fn announce(logger: &Logger, version: &str, dump_only: bool) { let commit = env::var("VERSION_COMMIT").map_or(String::new(), |s| s); @@ -49,17 +88,45 @@ fn make_examples_text(program_name: &str) -> String { format!( r#"EXAMPLES: -- Normally run on host specifying VSOCK port number - for Kata Containers agent to connect to: +- Example assuming QEMU is the Kata configured hypervisor: - $ {program} --trace-name {trace_name:?} -p 12345 + $ {program} --trace-name {trace_name:?} +- Example assuming cloud-hypervisor is the Kata configured hypervisor + and the sandbox _about_ to be created will be called {sandbox_id:?}: + + $ sandbox_id={sandbox_id:?} + $ sudo {program} --trace-name {trace_name:?} --socket-path /run/vc/vm/{sandbox_id}/clh.sock + +- Example assuming firecracker is the Kata configured hypervisor + and the sandbox _about_ to be created will be called {sandbox_id:?}: + + $ sandbox_id={sandbox_id:?} + $ sudo {program} --trace-name {trace_name:?} --socket-path /run/vc/firecracker/{sandbox_id}/root/kata.hvsock "#, program = program_name, trace_name = DEFAULT_TRACE_NAME, + sandbox_id = "foo" ) } +fn handle_hybrid_vsock(socket_path: &str, port: Option<&str>) -> Result { + let socket_path = make_hybrid_socket_path(socket_path, port, DEFAULT_KATA_VSOCK_TRACING_PORT)?; + + let vsock = VsockType::Hybrid { socket_path }; + + Ok(vsock) +} + +fn handle_standard_vsock(cid: Option<&str>, port: Option<&str>) -> Result { + let cid = str_to_vsock_cid(cid)?; + let port = str_to_vsock_port(port, DEFAULT_KATA_VSOCK_TRACING_PORT)?; + + let vsock = VsockType::Standard { port, cid }; + + Ok(vsock) +} + fn real_main() -> Result<()> { let version = crate_version!(); let name = crate_name!(); @@ -107,78 +174,33 @@ fn real_main() -> Result<()> { .takes_value(true) .required(false), ) + .arg( + Arg::with_name("socket-path") + .long("socket-path") + .help("Full path to hypervisor socket (needs root! cloud-hypervisor and firecracker hypervisors only)") + .takes_value(true) + .required(false), + ) .arg( Arg::with_name("vsock-cid") .long("vsock-cid") - .help(&format!("VSOCK CID number (or {:?})", VSOCK_CID_ANY)) + .help(&format!( + "VSOCK CID number (or {:?}) (QEMU hypervisor only)", + VSOCK_CID_ANY_STR + )) .takes_value(true) .required(false) - .default_value(VSOCK_CID_ANY), + .default_value(VSOCK_CID_ANY_STR), ) .arg( Arg::with_name("vsock-port") .long("vsock-port") - .help("VSOCK port number") + .help("VSOCK port number (QEMU hypervisor only)") .takes_value(true) .default_value(DEFAULT_KATA_VSOCK_TRACING_PORT), ) .get_matches(); - let vsock_port: u32 = args - .value_of("vsock-port") - .ok_or(anyhow!("Need VSOCK port number")) - .map_or_else( - |e| Err(anyhow!(e)), - |p| { - p.parse::() - .map_err(|e| anyhow!(format!("VSOCK port number must be an integer: {:?}", e))) - }, - )?; - - if vsock_port == 0 { - return Err(anyhow!("VSOCK port number cannot be zero")); - } - - let vsock_cid: u32 = args - .value_of("vsock-cid") - .ok_or(libc::VMADDR_CID_ANY as u32) - .map_or_else( - |e| Err(anyhow!(e)), - |c| { - if c == VSOCK_CID_ANY { - // Explicit request for "any CID" - Ok(libc::VMADDR_CID_ANY as u32) - } else { - c.parse::() - .map_err(|e| anyhow!(format!("CID number must be an integer: {:?}", e))) - } - }, - ) - .map_err(|e| anyhow!(e))?; - - if vsock_cid == 0 { - return Err(anyhow!("VSOCK CID cannot be zero")); - } - - let jaeger_port: u32 = args - .value_of("jaeger-port") - .ok_or("Need Jaeger port number") - .map(|p| p.parse::().unwrap()) - .map_err(|e| anyhow!("Jaeger port number must be an integer: {:?}", e))?; - - if jaeger_port == 0 { - return Err(anyhow!("Jaeger port number cannot be zero")); - } - - let jaeger_host = args - .value_of("jaeger-host") - .ok_or("Need Jaeger host") - .map_err(|e| anyhow!(e))?; - - if jaeger_host == "" { - return Err(anyhow!("Jaeger host cannot be blank")); - } - // Cannot fail as a default has been specified let log_level_name = args.value_of("log-level").unwrap(); @@ -198,7 +220,7 @@ fn real_main() -> Result<()> { .map_or_else( |e| Err(anyhow!(e)), |n| { - if n == "" { + if n.is_empty() { Err(anyhow!("Need non-blank trace name")) } else { Ok(n) @@ -206,10 +228,36 @@ fn real_main() -> Result<()> { }, )?; - let mut server = server::VsockTraceServer::new( + // Handle the Hybrid VSOCK option first (since it cannot be defaulted). + let vsock = if let Some(socket_path) = args.value_of("socket-path") { + handle_hybrid_vsock(socket_path, args.value_of("vsock-port")) + } else { + // The default is standard VSOCK + handle_standard_vsock(args.value_of("vsock-cid"), args.value_of("vsock-port")) + }?; + + let jaeger_port: u32 = args + .value_of("jaeger-port") + .ok_or("Need Jaeger port number") + .map(|p| p.parse::().unwrap()) + .map_err(|e| anyhow!("Jaeger port number must be an integer: {:?}", e))?; + + if jaeger_port == 0 { + return Err(anyhow!("Jaeger port number cannot be zero")); + } + + let jaeger_host = args + .value_of("jaeger-host") + .ok_or("Need Jaeger host") + .map_err(|e| anyhow!(e))?; + + if jaeger_host.is_empty() { + return Err(anyhow!("Jaeger host cannot be blank")); + } + + let server = server::VsockTraceServer::new( &logger, - vsock_port, - vsock_cid, + vsock, jaeger_host, jaeger_port, trace_name, @@ -228,13 +276,151 @@ fn real_main() -> Result<()> { } fn main() { - match real_main() { - Err(e) => { - eprintln!("ERROR: {}", e); - exit(1); - } - _ => (), - }; - + if let Err(e) = real_main() { + eprintln!("ERROR: {}", e); + exit(1); + } exit(0); } + +#[cfg(test)] +mod tests { + use super::*; + use crate::assert_result; + use utils::{ + ERR_HVSOCK_SOC_PATH_EMPTY, ERR_VSOCK_CID_EMPTY, ERR_VSOCK_CID_NOT_NUMERIC, + ERR_VSOCK_PORT_EMPTY, ERR_VSOCK_PORT_NOT_NUMERIC, ERR_VSOCK_PORT_ZERO, VSOCK_CID_ANY, + }; + + #[test] + fn test_handle_hybrid_vsock() { + #[derive(Debug)] + struct TestData<'a> { + socket_path: &'a str, + port: Option<&'a str>, + result: Result, + } + + let tests = &[ + TestData { + socket_path: "", + port: None, + result: Err(anyhow!(ERR_HVSOCK_SOC_PATH_EMPTY)), + }, + TestData { + socket_path: "/foo/bar", + port: None, + result: Ok(VsockType::Hybrid { + socket_path: format!("/foo/bar_{}", DEFAULT_KATA_VSOCK_TRACING_PORT), + }), + }, + TestData { + socket_path: "/foo/bar", + port: Some(""), + result: Err(anyhow!(ERR_VSOCK_PORT_EMPTY)), + }, + TestData { + socket_path: "/foo/bar", + port: Some("foo bar"), + result: Err(anyhow!(ERR_VSOCK_PORT_NOT_NUMERIC)), + }, + TestData { + socket_path: "/foo/bar", + port: Some("9"), + result: Ok(VsockType::Hybrid { + socket_path: "/foo/bar_9".into(), + }), + }, + ]; + + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d); + + let result = handle_hybrid_vsock(d.socket_path, d.port); + + let msg = format!("{}: result: {:?}", msg, result); + + assert_result!(d.result, result, msg); + } + } + + #[test] + fn test_handle_standard_vsock() { + #[derive(Debug)] + struct TestData<'a> { + cid: Option<&'a str>, + port: Option<&'a str>, + result: Result, + } + + let tests = &[ + TestData { + cid: None, + port: None, + result: Ok(VsockType::Standard { + cid: VSOCK_CID_ANY, + port: DEFAULT_KATA_VSOCK_TRACING_PORT.parse::().unwrap(), + }), + }, + TestData { + cid: Some(""), + port: None, + result: Err(anyhow!(ERR_VSOCK_CID_EMPTY)), + }, + TestData { + cid: Some("1"), + port: Some(""), + result: Err(anyhow!(ERR_VSOCK_PORT_EMPTY)), + }, + TestData { + cid: Some("1 foo"), + port: None, + result: Err(anyhow!(ERR_VSOCK_CID_NOT_NUMERIC)), + }, + TestData { + cid: None, + port: Some("1 foo"), + result: Err(anyhow!(ERR_VSOCK_PORT_NOT_NUMERIC)), + }, + TestData { + cid: Some("1"), + port: Some("0"), + result: Err(anyhow!(ERR_VSOCK_PORT_ZERO)), + }, + TestData { + cid: Some("1"), + port: None, + result: Ok(VsockType::Standard { + cid: 1, + port: DEFAULT_KATA_VSOCK_TRACING_PORT.parse::().unwrap(), + }), + }, + TestData { + cid: Some("123"), + port: Some("999"), + result: Ok(VsockType::Standard { + cid: 123, + port: 999, + }), + }, + TestData { + cid: Some(VSOCK_CID_ANY_STR), + port: Some("999"), + result: Ok(VsockType::Standard { + cid: VSOCK_CID_ANY, + port: 999, + }), + }, + ]; + + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d); + + let result = handle_standard_vsock(d.cid, d.port); + + let msg = format!("{}: result: {:?}", msg, result); + + assert_result!(d.result, result, msg); + } + } +} diff --git a/src/trace-forwarder/src/server.rs b/src/trace-forwarder/src/server.rs index cca1b31f2..393828bfb 100644 --- a/src/trace-forwarder/src/server.rs +++ b/src/trace-forwarder/src/server.rs @@ -4,17 +4,24 @@ // use crate::handler; -use anyhow::Result; -use futures::executor::block_on; -use slog::{debug, error, info, o, Logger}; +use anyhow::{anyhow, Result}; +use opentelemetry::sdk::export::trace::SpanExporter; +use slog::{debug, o, Logger}; +use std::os::unix::io::AsRawFd; +use std::os::unix::net::UnixListener; use vsock::{SockAddr, VsockListener}; use crate::tracer; +#[derive(Debug, Clone, PartialEq)] +pub enum VsockType { + Standard { port: u32, cid: u32 }, + Hybrid { socket_path: String }, +} + #[derive(Debug)] pub struct VsockTraceServer { - pub vsock_port: u32, - pub vsock_cid: u32, + pub vsock: VsockType, pub jaeger_host: String, pub jaeger_port: u32, @@ -27,8 +34,7 @@ pub struct VsockTraceServer { impl VsockTraceServer { pub fn new( logger: &Logger, - vsock_port: u32, - vsock_cid: u32, + vsock: VsockType, jaeger_host: &str, jaeger_port: u32, jaeger_service_name: &str, @@ -37,8 +43,7 @@ impl VsockTraceServer { let logger = logger.new(o!("subsystem" => "server")); VsockTraceServer { - vsock_port, - vsock_cid, + vsock, jaeger_host: jaeger_host.to_string(), jaeger_port, jaeger_service_name: jaeger_service_name.to_string(), @@ -47,13 +52,7 @@ impl VsockTraceServer { } } - pub fn start(&mut self) -> Result<()> { - let sock_addr = SockAddr::new_vsock(self.vsock_cid, self.vsock_port); - - let listener = VsockListener::bind(&sock_addr)?; - - info!(self.logger, "listening for client connections"; "vsock-port" => self.vsock_port, "vsock-cid" => self.vsock_cid); - + pub fn start(&self) -> Result<()> { let result = tracer::create_jaeger_trace_exporter( self.jaeger_service_name.clone(), self.jaeger_host.clone(), @@ -62,27 +61,73 @@ impl VsockTraceServer { let mut exporter = result?; - for conn in listener.incoming() { - debug!(self.logger, "got client connection"); - - match conn { - Err(e) => { - error!(self.logger, "client connection failed"; "error" => format!("{}", e)) - } - Ok(conn) => { - debug!(self.logger, "client connection successful"); - - let logger = self.logger.new(o!()); - - let f = handler::handle_connection(logger, conn, &mut exporter, self.dump_only); - - block_on(f)?; - } - } - - debug!(self.logger, "handled client connection"); + match &self.vsock { + VsockType::Standard { port, cid } => start_std_vsock( + self.logger.clone(), + &mut exporter, + *port, + *cid, + self.dump_only, + ), + VsockType::Hybrid { socket_path } => start_hybrid_vsock( + self.logger.clone(), + &mut exporter, + socket_path, + self.dump_only, + ), } - - Ok(()) } } + +fn start_hybrid_vsock( + logger: Logger, + exporter: &mut dyn SpanExporter, + socket_path: &str, + dump_only: bool, +) -> Result<()> { + // Remove the socket if it already exists + let _ = std::fs::remove_file(socket_path); + + let listener = + UnixListener::bind(socket_path).map_err(|e| anyhow!("You need to be root: {:?}", e))?; + + debug!(logger, "Waiting for connections"; + "vsock-type" => "hybrid", + "vsock-socket-path" => socket_path); + + for conn in listener.incoming() { + let conn = conn?; + + let fd = conn.as_raw_fd(); + + handler::handle_connection(logger.clone(), fd, exporter, dump_only)?; + } + + Ok(()) +} + +fn start_std_vsock( + logger: Logger, + exporter: &mut dyn SpanExporter, + port: u32, + cid: u32, + dump_only: bool, +) -> Result<()> { + let sock_addr = SockAddr::new_vsock(cid, port); + let listener = VsockListener::bind(&sock_addr)?; + + debug!(logger, "Waiting for connections"; + "vsock-type" => "standard", + "vsock-cid" => cid, + "vsock-port" => port); + + for conn in listener.incoming() { + let conn = conn?; + + let fd = conn.as_raw_fd(); + + handler::handle_connection(logger.clone(), fd, exporter, dump_only)?; + } + + Ok(()) +} diff --git a/src/trace-forwarder/src/utils.rs b/src/trace-forwarder/src/utils.rs new file mode 100644 index 000000000..e89e4e691 --- /dev/null +++ b/src/trace-forwarder/src/utils.rs @@ -0,0 +1,357 @@ +// Copyright (c) 2021 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +use anyhow::{anyhow, Result}; + +// Request for "any CID" +pub const VSOCK_CID_ANY_STR: &str = "any"; +// Numeric equivalent to VSOCK_CID_ANY_STR +pub const VSOCK_CID_ANY: u32 = libc::VMADDR_CID_ANY; + +pub const ERR_VSOCK_PORT_EMPTY: &str = "VSOCK port cannot be empty"; +pub const ERR_VSOCK_PORT_NOT_NUMERIC: &str = "VSOCK port number must be an integer"; +pub const ERR_VSOCK_PORT_ZERO: &str = "VSOCK port number cannot be zero"; + +pub const ERR_VSOCK_CID_EMPTY: &str = "VSOCK CID cannot be empty"; +pub const ERR_VSOCK_CID_NOT_NUMERIC: &str = "VSOCK CID must be an integer"; + +pub const ERR_HVSOCK_SOC_PATH_EMPTY: &str = "Hybrid VSOCK socket path cannot be empty"; + +// Parameters: +// +// 1: expected Result +// 2: actual Result +// 3: string used to identify the test on error +#[macro_export] +macro_rules! assert_result { + ($expected_result:expr, $actual_result:expr, $msg:expr) => { + if $expected_result.is_ok() { + let expected_level = $expected_result.as_ref().unwrap(); + let actual_level = $actual_result.unwrap(); + assert!(*expected_level == actual_level, "{}", $msg); + } else { + let expected_error = $expected_result.as_ref().unwrap_err(); + let expected_error_msg = format!("{:?}", expected_error); + + if let Err(actual_error) = $actual_result { + let actual_error_msg = format!("{:?}", actual_error); + + assert!(expected_error_msg == actual_error_msg, "{}", $msg); + } else { + assert!(expected_error_msg == "expected error, got OK", "{}", $msg); + } + } + }; +} + +// Create a Hybrid VSOCK path from the specified socket, appending either +// the user specified port or the default port. +pub fn make_hybrid_socket_path( + socket_path: &str, + user_port: Option<&str>, + default_port: &str, +) -> Result { + if socket_path.is_empty() { + return Err(anyhow!(ERR_HVSOCK_SOC_PATH_EMPTY)); + } + + let port_str = if let Some(user_port) = user_port { + user_port + } else { + default_port + }; + + let port = port_str_to_port(port_str)?; + + let full_path = format!("{}_{}", socket_path, port); + + Ok(full_path) +} + +// Convert a string to a VSOCK CID value. +pub fn str_to_vsock_cid(cid: Option<&str>) -> Result { + let cid_str = if let Some(cid) = cid { + cid + } else { + VSOCK_CID_ANY_STR + }; + + let cid: u32 = match cid_str { + VSOCK_CID_ANY_STR => Ok(VSOCK_CID_ANY), + "" => return Err(anyhow!(ERR_VSOCK_CID_EMPTY)), + _ => cid_str + .parse::() + .map_err(|_| anyhow!(ERR_VSOCK_CID_NOT_NUMERIC)), + }?; + + Ok(cid) +} + +// Convert a user specified VSOCK port number string into a VSOCK port number, +// or use the default value if not specified. +pub fn str_to_vsock_port(port: Option<&str>, default_port: &str) -> Result { + let port_str = if let Some(port) = port { + port + } else { + default_port + }; + + let port = port_str_to_port(port_str)?; + + Ok(port) +} + +// Convert a string port value into a numeric value. +fn port_str_to_port(port: &str) -> Result { + if port.is_empty() { + return Err(anyhow!(ERR_VSOCK_PORT_EMPTY)); + } + + let port: u32 = port + .parse::() + .map_err(|_| anyhow!(ERR_VSOCK_PORT_NOT_NUMERIC))?; + + if port == 0 { + return Err(anyhow!(ERR_VSOCK_PORT_ZERO)); + } + + Ok(port) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_port_str_to_port() { + #[derive(Debug)] + struct TestData<'a> { + port: &'a str, + result: Result, + } + + let tests = &[ + TestData { + port: "", + result: Err(anyhow!(ERR_VSOCK_PORT_EMPTY)), + }, + TestData { + port: "a", + result: Err(anyhow!(ERR_VSOCK_PORT_NOT_NUMERIC)), + }, + TestData { + port: "foo bar", + result: Err(anyhow!(ERR_VSOCK_PORT_NOT_NUMERIC)), + }, + TestData { + port: "1 bar", + result: Err(anyhow!(ERR_VSOCK_PORT_NOT_NUMERIC)), + }, + TestData { + port: "0", + result: Err(anyhow!(ERR_VSOCK_PORT_ZERO)), + }, + TestData { + port: "2", + result: Ok(2), + }, + TestData { + port: "12345", + result: Ok(12345), + }, + ]; + + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d); + + let result = port_str_to_port(d.port); + + let msg = format!("{}: result: {:?}", msg, result); + + assert_result!(d.result, result, msg); + } + } + + #[test] + fn test_str_to_vsock_port() { + #[derive(Debug)] + struct TestData<'a> { + port: Option<&'a str>, + default_port: &'a str, + result: Result, + } + + let tests = &[ + TestData { + port: None, + default_port: "", + result: Err(anyhow!(ERR_VSOCK_PORT_EMPTY)), + }, + TestData { + port: None, + default_port: "foo", + result: Err(anyhow!(ERR_VSOCK_PORT_NOT_NUMERIC)), + }, + TestData { + port: None, + default_port: "1 foo", + result: Err(anyhow!(ERR_VSOCK_PORT_NOT_NUMERIC)), + }, + TestData { + port: None, + default_port: "0", + result: Err(anyhow!(ERR_VSOCK_PORT_ZERO)), + }, + TestData { + port: None, + default_port: "1234", + result: Ok(1234), + }, + TestData { + port: Some(""), + default_port: "1234", + result: Err(anyhow!(ERR_VSOCK_PORT_EMPTY)), + }, + TestData { + port: Some("1 foo"), + default_port: "1234", + result: Err(anyhow!(ERR_VSOCK_PORT_NOT_NUMERIC)), + }, + TestData { + port: Some("0"), + default_port: "1234", + result: Err(anyhow!(ERR_VSOCK_PORT_ZERO)), + }, + ]; + + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d); + + let result = str_to_vsock_port(d.port, d.default_port); + + let msg = format!("{}: result: {:?}", msg, result); + + assert_result!(d.result, result, msg); + } + } + + #[test] + fn test_str_to_vsock_cid() { + #[derive(Debug)] + struct TestData<'a> { + cid: Option<&'a str>, + result: Result, + } + + let tests = &[ + TestData { + cid: None, + result: Ok(VSOCK_CID_ANY), + }, + TestData { + cid: Some(VSOCK_CID_ANY_STR), + result: Ok(VSOCK_CID_ANY), + }, + TestData { + cid: Some(""), + result: Err(anyhow!(ERR_VSOCK_CID_EMPTY)), + }, + TestData { + cid: Some("foo"), + result: Err(anyhow!(ERR_VSOCK_CID_NOT_NUMERIC)), + }, + TestData { + cid: Some("1 foo"), + result: Err(anyhow!(ERR_VSOCK_CID_NOT_NUMERIC)), + }, + TestData { + cid: Some("123"), + result: Ok(123), + }, + ]; + + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d); + + let result = str_to_vsock_cid(d.cid); + + let msg = format!("{}: result: {:?}", msg, result); + + assert_result!(d.result, result, msg); + } + } + + #[test] + fn test_make_hybrid_socket_path() { + #[derive(Debug)] + struct TestData<'a> { + socket_path: &'a str, + user_port: Option<&'a str>, + default_port: &'a str, + result: Result, + } + + let tests = &[ + TestData { + socket_path: "", + user_port: None, + default_port: "", + result: Err(anyhow!(ERR_HVSOCK_SOC_PATH_EMPTY)), + }, + TestData { + socket_path: "/foo", + user_port: None, + default_port: "", + result: Err(anyhow!(ERR_VSOCK_PORT_EMPTY)), + }, + TestData { + socket_path: "/foo", + user_port: None, + default_port: "1 foo", + result: Err(anyhow!(ERR_VSOCK_PORT_NOT_NUMERIC)), + }, + TestData { + socket_path: "/foo", + user_port: None, + default_port: "0", + result: Err(anyhow!(ERR_VSOCK_PORT_ZERO)), + }, + TestData { + socket_path: "/foo", + user_port: None, + default_port: "1", + result: Ok("/foo_1".into()), + }, + TestData { + socket_path: "/foo", + user_port: Some(""), + default_port: "1", + result: Err(anyhow!(ERR_VSOCK_PORT_EMPTY)), + }, + TestData { + socket_path: "/foo", + user_port: Some("1 foo"), + default_port: "1", + result: Err(anyhow!(ERR_VSOCK_PORT_NOT_NUMERIC)), + }, + TestData { + socket_path: "/foo", + user_port: Some("2"), + default_port: "1", + result: Ok("/foo_2".into()), + }, + ]; + + for (i, d) in tests.iter().enumerate() { + let msg = format!("test[{}]: {:?}", i, d); + + let result = make_hybrid_socket_path(d.socket_path, d.user_port, d.default_port); + + let msg = format!("{}: result: {:?}", msg, result); + + assert_result!(d.result, result, msg); + } + } +} From e61f5e29319edc018ce4a27dec830c4f891b3119 Mon Sep 17 00:00:00 2001 From: "James O. D. Hunt" Date: Mon, 11 Oct 2021 14:33:30 +0100 Subject: [PATCH 2/2] runtime: Show socket path in kata-env output Display a pseudo path to the sandbox socket in the output of `kata-runtime env` for those hypervisors that use Hybrid VSOCK. The path is not a real path since the command does not create a sandbox. The output includes a `{ID}` tag which would be replaced with the real sandbox ID (name) when the sandbox was created. This feature is only useful for agent tracing with the trace forwarder where the configured hypervisor uses Hybrid VSOCK. Note that the features required a new `setConfig()` method to be added to the `hypervisor` interface. This isn't normally needed as the specified hypervisor configuration passed to `setConfig()` is also passed to `createSandbox()`. However the new call is required by `kata-runtime env` to display the correct socket path for Firecracker. The new method isn't wholly redundant for the main code path though as it's now used by each hypervisor's `createSandbox()` call. Signed-off-by: James O. D. Hunt --- src/runtime/cmd/kata-runtime/kata-env.go | 26 ++++++- src/runtime/cmd/kata-runtime/kata-env_test.go | 62 +++++++++++++++- src/runtime/virtcontainers/acrn.go | 17 ++++- src/runtime/virtcontainers/acrn_test.go | 15 ++++ src/runtime/virtcontainers/clh.go | 15 +++- src/runtime/virtcontainers/clh_test.go | 44 +++++++++++ src/runtime/virtcontainers/fc.go | 74 +++++++++++++------ src/runtime/virtcontainers/fc_test.go | 26 +++++++ src/runtime/virtcontainers/hypervisor.go | 40 +++++++++- src/runtime/virtcontainers/hypervisor_test.go | 4 +- src/runtime/virtcontainers/mock_hypervisor.go | 11 ++- src/runtime/virtcontainers/qemu.go | 18 ++++- src/runtime/virtcontainers/qemu_test.go | 14 ++++ src/runtime/virtcontainers/sandbox.go | 4 +- src/runtime/virtcontainers/vm.go | 4 +- 15 files changed, 324 insertions(+), 50 deletions(-) diff --git a/src/runtime/cmd/kata-runtime/kata-env.go b/src/runtime/cmd/kata-runtime/kata-env.go index be22d7b32..4f275ad00 100644 --- a/src/runtime/cmd/kata-runtime/kata-env.go +++ b/src/runtime/cmd/kata-runtime/kata-env.go @@ -29,7 +29,7 @@ import ( // // XXX: Increment for every change to the output format // (meaning any change to the EnvInfo type). -const formatVersion = "1.0.25" +const formatVersion = "1.0.26" // MetaInfo stores information on the format of the output itself type MetaInfo struct { @@ -108,6 +108,7 @@ type HypervisorInfo struct { EntropySource string SharedFS string VirtioFSDaemon string + SocketPath string Msize9p uint32 MemorySlots uint32 PCIeRootPort uint32 @@ -305,7 +306,7 @@ func getAgentInfo(config oci.RuntimeConfig) (AgentInfo, error) { return agent, nil } -func getHypervisorInfo(config oci.RuntimeConfig) HypervisorInfo { +func getHypervisorInfo(config oci.RuntimeConfig) (HypervisorInfo, error) { hypervisorPath := config.HypervisorConfig.HypervisorPath version, err := getCommandVersion(hypervisorPath) @@ -313,6 +314,19 @@ func getHypervisorInfo(config oci.RuntimeConfig) HypervisorInfo { version = unknown } + hypervisorType := config.HypervisorType + + socketPath := unknown + + // It is only reliable to make this call as root since a + // non-privileged user may not have access to /dev/vhost-vsock. + if os.Geteuid() == 0 { + socketPath, err = vc.GetHypervisorSocketTemplate(hypervisorType, &config.HypervisorConfig) + if err != nil { + return HypervisorInfo{}, err + } + } + return HypervisorInfo{ Debug: config.HypervisorConfig.Debug, MachineType: config.HypervisorConfig.HypervisorMachineType, @@ -327,7 +341,8 @@ func getHypervisorInfo(config oci.RuntimeConfig) HypervisorInfo { HotplugVFIOOnRootBus: config.HypervisorConfig.HotplugVFIOOnRootBus, PCIeRootPort: config.HypervisorConfig.PCIeRootPort, - } + SocketPath: socketPath, + }, nil } func getEnvInfo(configFile string, config oci.RuntimeConfig) (env EnvInfo, err error) { @@ -352,7 +367,10 @@ func getEnvInfo(configFile string, config oci.RuntimeConfig) (env EnvInfo, err e return EnvInfo{}, err } - hypervisor := getHypervisorInfo(config) + hypervisor, err := getHypervisorInfo(config) + if err != nil { + return EnvInfo{}, err + } image := ImageInfo{ Path: config.HypervisorConfig.ImagePath, diff --git a/src/runtime/cmd/kata-runtime/kata-env_test.go b/src/runtime/cmd/kata-runtime/kata-env_test.go index 779e0335c..992d46e6d 100644 --- a/src/runtime/cmd/kata-runtime/kata-env_test.go +++ b/src/runtime/cmd/kata-runtime/kata-env_test.go @@ -277,7 +277,7 @@ VERSION_ID="%s" } func getExpectedHypervisor(config oci.RuntimeConfig) HypervisorInfo { - return HypervisorInfo{ + info := HypervisorInfo{ Version: testHypervisorVersion, Path: config.HypervisorConfig.HypervisorPath, MachineType: config.HypervisorConfig.HypervisorMachineType, @@ -292,6 +292,16 @@ func getExpectedHypervisor(config oci.RuntimeConfig) HypervisorInfo { HotplugVFIOOnRootBus: config.HypervisorConfig.HotplugVFIOOnRootBus, PCIeRootPort: config.HypervisorConfig.PCIeRootPort, } + + if os.Geteuid() == 0 { + // This assumes the test hypervisor is a non-hybrid-vsock + // one (such as QEMU). + info.SocketPath = "" + } else { + info.SocketPath = unknown + } + + return info } func getExpectedImage(config oci.RuntimeConfig) ImageInfo { @@ -1007,12 +1017,58 @@ func TestGetHypervisorInfo(t *testing.T) { _, config, err := makeRuntimeConfig(tmpdir) assert.NoError(err) - info := getHypervisorInfo(config) + info, err := getHypervisorInfo(config) + assert.NoError(err) assert.Equal(info.Version, testHypervisorVersion) err = os.Remove(config.HypervisorConfig.HypervisorPath) assert.NoError(err) - info = getHypervisorInfo(config) + info, err = getHypervisorInfo(config) + assert.NoError(err) assert.Equal(info.Version, unknown) } + +func TestGetHypervisorInfoSocket(t *testing.T) { + assert := assert.New(t) + + tmpdir, err := ioutil.TempDir("", "") + assert.NoError(err) + defer os.RemoveAll(tmpdir) + + _, config, err := makeRuntimeConfig(tmpdir) + assert.NoError(err) + + type TestHypervisorDetails struct { + hType vc.HypervisorType + hybridVsock bool + } + + hypervisors := []TestHypervisorDetails{ + {vc.AcrnHypervisor, false}, + {vc.ClhHypervisor, true}, + {vc.FirecrackerHypervisor, true}, + {vc.MockHypervisor, false}, + {vc.QemuHypervisor, false}, + } + + for i, details := range hypervisors { + msg := fmt.Sprintf("hypervisor[%d]: %+v", i, details) + + config.HypervisorType = details.hType + + info, err := getHypervisorInfo(config) + assert.NoError(err, msg) + + if os.Geteuid() == 0 { + if !details.hybridVsock { + assert.Equal(info.SocketPath, "", msg) + } else { + assert.NotEmpty(info.SocketPath, msg) + assert.True(strings.HasPrefix(info.SocketPath, "/"), msg) + } + } else { + assert.Equal(info.SocketPath, unknown, msg) + } + } +} diff --git a/src/runtime/virtcontainers/acrn.go b/src/runtime/virtcontainers/acrn.go index a8c5b530f..a2b7b559a 100644 --- a/src/runtime/virtcontainers/acrn.go +++ b/src/runtime/virtcontainers/acrn.go @@ -284,13 +284,11 @@ func (a *Acrn) setup(ctx context.Context, id string, hypervisorConfig *Hyperviso span, _ := katatrace.Trace(ctx, a.Logger(), "setup", acrnTracingTags, map[string]string{"sandbox_id": a.id}) defer span.End() - err := hypervisorConfig.valid() - if err != nil { + if err := a.setConfig(hypervisorConfig); err != nil { return err } a.id = id - a.config = *hypervisorConfig a.arch = newAcrnArch(a.config) var create bool @@ -302,6 +300,9 @@ func (a *Acrn) setup(ctx context.Context, id string, hypervisorConfig *Hyperviso if create { a.Logger().Debug("Setting UUID") + + var err error + if uuid, err = a.GetNextAvailableUUID(); err != nil { return err } @@ -342,6 +343,16 @@ func (a *Acrn) createDummyVirtioBlkDev(ctx context.Context, devices []Device) ([ return devices, nil } +func (a *Acrn) setConfig(config *HypervisorConfig) error { + if err := config.valid(); err != nil { + return err + } + + a.config = *config + + return nil +} + // createSandbox is the Hypervisor sandbox creation. func (a *Acrn) createSandbox(ctx context.Context, id string, networkNS NetworkNamespace, hypervisorConfig *HypervisorConfig) error { // Save the tracing context diff --git a/src/runtime/virtcontainers/acrn_test.go b/src/runtime/virtcontainers/acrn_test.go index 55a3c63b5..8940c251a 100644 --- a/src/runtime/virtcontainers/acrn_test.go +++ b/src/runtime/virtcontainers/acrn_test.go @@ -259,3 +259,18 @@ func TestAcrnMemoryTopology(t *testing.T) { assert.NoError(err) assert.Exactly(memory, expectedOut) } + +func TestAcrnSetConfig(t *testing.T) { + assert := assert.New(t) + + config := newAcrnConfig() + + a := &Acrn{} + + assert.Equal(a.config, HypervisorConfig{}) + + err := a.setConfig(&config) + assert.NoError(err) + + assert.Equal(a.config, config) +} diff --git a/src/runtime/virtcontainers/clh.go b/src/runtime/virtcontainers/clh.go index 6788993e4..3afb06417 100644 --- a/src/runtime/virtcontainers/clh.go +++ b/src/runtime/virtcontainers/clh.go @@ -188,6 +188,17 @@ var clhDebugKernelParams = []Param{ // //########################################################### +func (clh *cloudHypervisor) setConfig(config *HypervisorConfig) error { + err := config.valid() + if err != nil { + return err + } + + clh.config = *config + + return nil +} + // For cloudHypervisor this call only sets the internal structure up. // The VM will be created and started through startSandbox(). func (clh *cloudHypervisor) createSandbox(ctx context.Context, id string, networkNS NetworkNamespace, hypervisorConfig *HypervisorConfig) error { @@ -197,13 +208,11 @@ func (clh *cloudHypervisor) createSandbox(ctx context.Context, id string, networ clh.ctx = newCtx defer span.End() - err := hypervisorConfig.valid() - if err != nil { + if err := clh.setConfig(hypervisorConfig); err != nil { return err } clh.id = id - clh.config = *hypervisorConfig clh.state.state = clhNotReady clh.Logger().WithField("function", "createSandbox").Info("creating Sandbox") diff --git a/src/runtime/virtcontainers/clh_test.go b/src/runtime/virtcontainers/clh_test.go index 84f5b92e9..6cc682fe7 100644 --- a/src/runtime/virtcontainers/clh_test.go +++ b/src/runtime/virtcontainers/clh_test.go @@ -11,11 +11,13 @@ import ( "os" "path/filepath" "reflect" + "strings" "testing" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/config" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/persist" chclient "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/cloud-hypervisor/client" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils" "github.com/pkg/errors" "github.com/stretchr/testify/assert" @@ -366,3 +368,45 @@ func TestCloudHypervisorHotplugRemoveDevice(t *testing.T) { _, err = clh.hotplugRemoveDevice(context.Background(), nil, netDev) assert.Error(err, "Hotplug remove pmem block device expected error") } + +func TestClhGenerateSocket(t *testing.T) { + assert := assert.New(t) + + // Ensure the type is fully constructed + hypervisor, err := NewHypervisor("clh") + assert.NoError(err) + + clh, ok := hypervisor.(*cloudHypervisor) + assert.True(ok) + + clh.addVSock(1, "path") + + s, err := clh.generateSocket("c") + + assert.NoError(err) + assert.NotNil(s) + + hvsock, ok := s.(types.HybridVSock) + assert.True(ok) + assert.NotEmpty(hvsock.UdsPath) + + // Path must be absolute + assert.True(strings.HasPrefix(hvsock.UdsPath, "/")) + + assert.NotZero(hvsock.Port) +} + +func TestClhSetConfig(t *testing.T) { + assert := assert.New(t) + + config, err := newClhConfig() + assert.NoError(err) + + clh := &cloudHypervisor{} + assert.Equal(clh.config, HypervisorConfig{}) + + err = clh.setConfig(&config) + assert.NoError(err) + + assert.Equal(clh.config, config) +} diff --git a/src/runtime/virtcontainers/fc.go b/src/runtime/virtcontainers/fc.go index 5e8fd6cb3..be16b3777 100644 --- a/src/runtime/virtcontainers/fc.go +++ b/src/runtime/virtcontainers/fc.go @@ -146,15 +146,16 @@ type firecracker struct { fcConfig *types.FcConfig // Parameters configured before VM starts connection *client.Firecracker //Tracks the current active connection - id string //Unique ID per pod. Normally maps to the sandbox id - vmPath string //All jailed VM assets need to be under this - chrootBaseDir string //chroot base for the jailer - jailerRoot string - socketPath string - netNSPath string - uid string //UID and GID to be used for the VMM - gid string - fcConfigPath string + id string //Unique ID per pod. Normally maps to the sandbox id + vmPath string //All jailed VM assets need to be under this + chrootBaseDir string //chroot base for the jailer + jailerRoot string + socketPath string + hybridSocketPath string + netNSPath string + uid string //UID and GID to be used for the VMM + gid string + fcConfigPath string info FirecrackerInfo config HypervisorConfig @@ -186,6 +187,17 @@ func (fc *firecracker) truncateID(id string) string { return id } +func (fc *firecracker) setConfig(config *HypervisorConfig) error { + err := config.valid() + if err != nil { + return err + } + + fc.config = *config + + return nil +} + // For firecracker this call only sets the internal structure up. // The sandbox will be created and started through startSandbox(). func (fc *firecracker) createSandbox(ctx context.Context, id string, networkNS NetworkNamespace, hypervisorConfig *HypervisorConfig) error { @@ -198,8 +210,27 @@ func (fc *firecracker) createSandbox(ctx context.Context, id string, networkNS N //https://github.com/kata-containers/runtime/issues/1065 fc.id = fc.truncateID(id) fc.state.set(notReady) - fc.config = *hypervisorConfig + if err := fc.setConfig(hypervisorConfig); err != nil { + return err + } + + fc.setPaths(&fc.config) + + // So we need to repopulate this at startSandbox where it is valid + fc.netNSPath = networkNS.NetNsPath + + // Till we create lower privileged kata user run as root + // https://github.com/kata-containers/runtime/issues/1869 + fc.uid = "0" + fc.gid = "0" + + fc.fcConfig = &types.FcConfig{} + fc.fcConfigPath = filepath.Join(fc.vmPath, defaultFcConfig) + return nil +} + +func (fc *firecracker) setPaths(hypervisorConfig *HypervisorConfig) { // When running with jailer all resources need to be under // a specific location and that location needs to have // exec permission (i.e. should not be mounted noexec, e.g. /run, /var/run) @@ -220,17 +251,7 @@ func (fc *firecracker) createSandbox(ctx context.Context, id string, networkNS N // with the name of "firecracker.socket" fc.socketPath = filepath.Join(fc.jailerRoot, "run", fcSocket) - // So we need to repopulate this at startSandbox where it is valid - fc.netNSPath = networkNS.NetNsPath - - // Till we create lower privileged kata user run as root - // https://github.com/kata-containers/runtime/issues/1869 - fc.uid = "0" - fc.gid = "0" - - fc.fcConfig = &types.FcConfig{} - fc.fcConfigPath = filepath.Join(fc.vmPath, defaultFcConfig) - return nil + fc.hybridSocketPath = filepath.Join(fc.jailerRoot, defaultHybridVSocketName) } func (fc *firecracker) newFireClient(ctx context.Context) *client.Firecracker { @@ -783,7 +804,7 @@ func (fc *firecracker) startSandbox(ctx context.Context, timeout int) error { } // make sure 'others' don't have access to this socket - err = os.Chmod(filepath.Join(fc.jailerRoot, defaultHybridVSocketName), 0640) + err = os.Chmod(fc.hybridSocketPath, 0640) if err != nil { return fmt.Errorf("Could not change socket permissions: %v", err) } @@ -1225,10 +1246,15 @@ func (fc *firecracker) check() error { func (fc *firecracker) generateSocket(id string) (interface{}, error) { fc.Logger().Debug("Using hybrid-vsock endpoint") - udsPath := filepath.Join(fc.jailerRoot, defaultHybridVSocketName) + + // Method is being run outside of the normal container workflow + if fc.jailerRoot == "" { + fc.id = id + fc.setPaths(&fc.config) + } return types.HybridVSock{ - UdsPath: udsPath, + UdsPath: fc.hybridSocketPath, Port: uint32(vSockPort), }, nil } diff --git a/src/runtime/virtcontainers/fc_test.go b/src/runtime/virtcontainers/fc_test.go index c1ac3da08..9e58ba0de 100644 --- a/src/runtime/virtcontainers/fc_test.go +++ b/src/runtime/virtcontainers/fc_test.go @@ -6,6 +6,7 @@ package virtcontainers import ( + "strings" "testing" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" @@ -23,6 +24,10 @@ func TestFCGenerateSocket(t *testing.T) { hvsock, ok := i.(types.HybridVSock) assert.True(ok) assert.NotEmpty(hvsock.UdsPath) + + // Path must be absolute + assert.True(strings.HasPrefix(hvsock.UdsPath, "/")) + assert.NotZero(hvsock.Port) } @@ -64,3 +69,24 @@ func TestFCParseVersion(t *testing.T) { assert.Equal(parsedVersion, v) } } + +func TestFcSetConfig(t *testing.T) { + assert := assert.New(t) + + config := HypervisorConfig{ + HypervisorPath: "/some/where/firecracker", + KernelPath: "/some/where/kernel", + ImagePath: "/some/where/image", + JailerPath: "/some/where/jailer", + Debug: true, + } + + fc := firecracker{} + + assert.Equal(fc.config, HypervisorConfig{}) + + err := fc.setConfig(&config) + assert.NoError(err) + + assert.Equal(fc.config, config) +} diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index c32c5d937..d3ced8564 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -185,8 +185,8 @@ func (hType *HypervisorType) String() string { } } -// newHypervisor returns an hypervisor from and hypervisor type. -func newHypervisor(hType HypervisorType) (hypervisor, error) { +// NewHypervisor returns an hypervisor from and hypervisor type. +func NewHypervisor(hType HypervisorType) (hypervisor, error) { store, err := persist.GetDriver() if err != nil { return nil, err @@ -214,6 +214,41 @@ func newHypervisor(hType HypervisorType) (hypervisor, error) { } } +// GetHypervisorSocketTemplate returns the full "template" path to the +// hypervisor socket. If the specified hypervisor doesn't use a socket, +// an empty string is returned. +// +// The returned value is not the actual socket path since this function +// does not create a sandbox. Instead a path is returned with a special +// template value "{ID}" which would be replaced with the real sandbox +// name sandbox creation time. +func GetHypervisorSocketTemplate(hType HypervisorType, config *HypervisorConfig) (string, error) { + hypervisor, err := NewHypervisor(hType) + if err != nil { + return "", err + } + + if err := hypervisor.setConfig(config); err != nil { + return "", err + } + + // Tag that is used to represent the name of a sandbox + const sandboxID = "{ID}" + + socket, err := hypervisor.generateSocket(sandboxID) + if err != nil { + return "", err + } + + var socketPath string + + if hybridVsock, ok := socket.(types.HybridVSock); ok { + socketPath = hybridVsock.UdsPath + } + + return socketPath, nil +} + // Param is a key/value representation for hypervisor and kernel parameters. type Param struct { Key string @@ -858,6 +893,7 @@ func generateVMSocket(id string, vmStogarePath string) (interface{}, error) { // hypervisor is the virtcontainers hypervisor interface. // The default hypervisor implementation is Qemu. type hypervisor interface { + setConfig(config *HypervisorConfig) error createSandbox(ctx context.Context, id string, networkNS NetworkNamespace, hypervisorConfig *HypervisorConfig) error startSandbox(ctx context.Context, timeout int) error // If wait is set, don't actively stop the sandbox: diff --git a/src/runtime/virtcontainers/hypervisor_test.go b/src/runtime/virtcontainers/hypervisor_test.go index 9d5675329..59dfbbede 100644 --- a/src/runtime/virtcontainers/hypervisor_test.go +++ b/src/runtime/virtcontainers/hypervisor_test.go @@ -67,7 +67,7 @@ func TestStringFromUnknownHypervisorType(t *testing.T) { func testNewHypervisorFromHypervisorType(t *testing.T, hypervisorType HypervisorType, expected hypervisor) { assert := assert.New(t) - hy, err := newHypervisor(hypervisorType) + hy, err := NewHypervisor(hypervisorType) assert.NoError(err) assert.Exactly(hy, expected) } @@ -82,7 +82,7 @@ func TestNewHypervisorFromUnknownHypervisorType(t *testing.T) { var hypervisorType HypervisorType assert := assert.New(t) - hy, err := newHypervisor(hypervisorType) + hy, err := NewHypervisor(hypervisorType) assert.Error(err) assert.Nil(hy) } diff --git a/src/runtime/virtcontainers/mock_hypervisor.go b/src/runtime/virtcontainers/mock_hypervisor.go index e9e6f5b03..33db505cc 100644 --- a/src/runtime/virtcontainers/mock_hypervisor.go +++ b/src/runtime/virtcontainers/mock_hypervisor.go @@ -30,9 +30,16 @@ func (m *mockHypervisor) hypervisorConfig() HypervisorConfig { return HypervisorConfig{} } +func (m *mockHypervisor) setConfig(config *HypervisorConfig) error { + if err := config.valid(); err != nil { + return err + } + + return nil +} + func (m *mockHypervisor) createSandbox(ctx context.Context, id string, networkNS NetworkNamespace, hypervisorConfig *HypervisorConfig) error { - err := hypervisorConfig.valid() - if err != nil { + if err := m.setConfig(hypervisorConfig); err != nil { return err } diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index fd04bbba3..c73fe49d2 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -229,13 +229,14 @@ func (q *qemu) setup(ctx context.Context, id string, hypervisorConfig *Hyperviso span, _ := katatrace.Trace(ctx, q.Logger(), "setup", qemuTracingTags, map[string]string{"sandbox_id": q.id}) defer span.End() - err := hypervisorConfig.valid() - if err != nil { + if err := q.setConfig(hypervisorConfig); err != nil { return err } q.id = id - q.config = *hypervisorConfig + + var err error + q.arch, err = newQemuArch(q.config) if err != nil { return err @@ -464,6 +465,17 @@ func (q *qemu) setupFileBackedMem(knobs *govmmQemu.Knobs, memory *govmmQemu.Memo memory.Path = target } +func (q *qemu) setConfig(config *HypervisorConfig) error { + err := config.valid() + if err != nil { + return err + } + + q.config = *config + + return nil +} + // createSandbox is the Hypervisor sandbox creation implementation for govmmQemu. func (q *qemu) createSandbox(ctx context.Context, id string, networkNS NetworkNamespace, hypervisorConfig *HypervisorConfig) error { // Save the tracing context diff --git a/src/runtime/virtcontainers/qemu_test.go b/src/runtime/virtcontainers/qemu_test.go index b16fa6bda..7b9b52843 100644 --- a/src/runtime/virtcontainers/qemu_test.go +++ b/src/runtime/virtcontainers/qemu_test.go @@ -585,3 +585,17 @@ func TestQemuGetpids(t *testing.T) { assert.True(pids[0] == 100) assert.True(pids[1] == 200) } + +func TestQemuSetConfig(t *testing.T) { + assert := assert.New(t) + + config := newQemuConfig() + + q := &qemu{} + + assert.Equal(q.config, HypervisorConfig{}) + err := q.setConfig(&config) + assert.NoError(err) + + assert.Equal(q.config, config) +} diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index 9f8af2684..8ce76497a 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -152,7 +152,7 @@ func (sandboxConfig *SandboxConfig) valid() bool { return false } - if _, err := newHypervisor(sandboxConfig.HypervisorType); err != nil { + if _, err := NewHypervisor(sandboxConfig.HypervisorType); err != nil { sandboxConfig.HypervisorType = QemuHypervisor } @@ -498,7 +498,7 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor // create agent instance agent := getNewAgentFunc(ctx)() - hypervisor, err := newHypervisor(sandboxConfig.HypervisorType) + hypervisor, err := NewHypervisor(sandboxConfig.HypervisorType) if err != nil { return nil, err } diff --git a/src/runtime/virtcontainers/vm.go b/src/runtime/virtcontainers/vm.go index 8437fb920..667a64583 100644 --- a/src/runtime/virtcontainers/vm.go +++ b/src/runtime/virtcontainers/vm.go @@ -85,7 +85,7 @@ func GrpcToVMConfig(j *pb.GrpcVMConfig) (*VMConfig, error) { // NewVM creates a new VM based on provided VMConfig. func NewVM(ctx context.Context, config VMConfig) (*VM, error) { // 1. setup hypervisor - hypervisor, err := newHypervisor(config.HypervisorType) + hypervisor, err := NewHypervisor(config.HypervisorType) if err != nil { return nil, err } @@ -165,7 +165,7 @@ func NewVM(ctx context.Context, config VMConfig) (*VM, error) { func NewVMFromGrpc(ctx context.Context, v *pb.GrpcVM, config VMConfig) (*VM, error) { virtLog.WithField("GrpcVM", v).WithField("config", config).Info("create new vm from Grpc") - hypervisor, err := newHypervisor(config.HypervisorType) + hypervisor, err := NewHypervisor(config.HypervisorType) if err != nil { return nil, err }