From 15bcb8eb6d91e08e298c592cba9c392c918a91b0 Mon Sep 17 00:00:00 2001 From: Michael Neale Date: Sat, 5 Apr 2025 07:03:06 +1100 Subject: [PATCH] fix: allowlist path exception (#2022) --- .goosehints | 8 + Cargo.lock | 2 + crates/goose-server/ALLOWLIST.md | 46 ++ crates/goose-server/Cargo.toml | 1 + crates/goose-server/src/routes/extension.rs | 612 ++++++++++++++++++++ 5 files changed, 669 insertions(+) create mode 100644 .goosehints create mode 100644 crates/goose-server/ALLOWLIST.md diff --git a/.goosehints b/.goosehints new file mode 100644 index 00000000..d90b952f --- /dev/null +++ b/.goosehints @@ -0,0 +1,8 @@ +This is a rust project with crates in crate dir. + +ui/desktop has an electron app in typescript. + +tips: +- can look at unstaged changes for what is being worked on if starting +- always check rust compiles, cargo fmt etc and cargo clippy -- -D warnings (as well as run tests in files you are working on) +- in ui/desktop, look at how you can run lint checks and if other tests can run \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 652e6405..1f083fa8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2480,6 +2480,7 @@ dependencies = [ "mcp-core", "mcp-server", "once_cell", + "reqwest 0.12.12", "serde", "serde_json", "serde_yaml", @@ -4790,6 +4791,7 @@ dependencies = [ "cookie", "cookie_store", "encoding_rs", + "futures-channel", "futures-core", "futures-util", "h2 0.4.8", diff --git a/crates/goose-server/ALLOWLIST.md b/crates/goose-server/ALLOWLIST.md new file mode 100644 index 00000000..80cda4f9 --- /dev/null +++ b/crates/goose-server/ALLOWLIST.md @@ -0,0 +1,46 @@ +# Goose Extension Allowlist + +The allowlist feature provides a security mechanism for controlling which MCP commands can be used by goose. +By default, goose will let you run any MCP via any command, which isn't always desired. + +## How It Works + +1. When enabled, Goose will only allow execution of commands that match entries in the allowlist +2. Commands not in the allowlist will be rejected with an error message +3. The allowlist is fetched from a URL specified by the `GOOSE_ALLOWLIST` environment variable and cached while running. + +## Setup + +Set the `GOOSE_ALLOWLIST` environment variable to the URL of your allowlist YAML file: + +```bash +export GOOSE_ALLOWLIST=https://example.com/goose-allowlist.yaml +``` + +If this environment variable is not set, no allowlist restrictions will be applied (all commands will be allowed). + +## Allowlist File Format + +The allowlist file should be a YAML file with the following structure: + +```yaml +extensions: + - id: extension-id-1 + command: command-name-1 + - id: extension-id-2 + command: command-name-2 +``` + +Example: + +```yaml +extensions: + - id: slack + command: uvx mcp_slack + - id: github + command: uvx mcp_github + - id: jira + command: uvx mcp_jira +``` + +Note that the command should be the full command to launch the MCP (environment variables are provided for context by goose). Additional arguments will be rejected (to avoid injection attacks) \ No newline at end of file diff --git a/crates/goose-server/Cargo.toml b/crates/goose-server/Cargo.toml index 534e97b0..89ab0b2e 100644 --- a/crates/goose-server/Cargo.toml +++ b/crates/goose-server/Cargo.toml @@ -35,6 +35,7 @@ serde_yaml = "0.9.34" axum-extra = "0.10.0" utoipa = { version = "4.1", features = ["axum_extras"] } dirs = "6.0.0" +reqwest = { version = "0.12.9", features = ["json", "rustls-tls", "blocking"], default-features = false } [[bin]] name = "goosed" diff --git a/crates/goose-server/src/routes/extension.rs b/crates/goose-server/src/routes/extension.rs index 6bff014f..07e1f355 100644 --- a/crates/goose-server/src/routes/extension.rs +++ b/crates/goose-server/src/routes/extension.rs @@ -1,4 +1,7 @@ use std::collections::HashMap; +use std::env; +use std::path::Path; +use std::sync::OnceLock; use crate::state::AppState; use axum::{extract::State, routing::post, Json, Router}; @@ -156,6 +159,18 @@ async fn add_extension( env_keys, timeout, } => { + // Check allowlist for Stdio extensions + if !is_command_allowed(&cmd, &args) { + return Ok(Json(ExtensionResponse { + error: true, + message: Some(format!( + "Extension '{}' is not in the allowed extensions list. Command: '{} {}'. If you require access please ask your administrator to update the allowlist.", + args.join(" "), + cmd, args.join(" ") + )), + })); + } + let mut env_map = HashMap::new(); for key in env_keys { match config.get_secret(&key) { @@ -265,3 +280,600 @@ pub fn routes(state: AppState) -> Router { .route("/extensions/remove", post(remove_extension)) .with_state(state) } + +/// Structure representing the allowed extensions from the YAML file +#[derive(Deserialize, Debug, Clone)] +struct AllowedExtensions { + extensions: Vec, +} + +/// Structure representing an individual extension entry in the allowlist +#[derive(Deserialize, Debug, Clone)] +struct ExtensionAllowlistEntry { + #[allow(dead_code)] + id: String, + command: String, +} + +// Global cache for the allowed extensions +static ALLOWED_EXTENSIONS: OnceLock> = OnceLock::new(); + +/// Fetches and parses the allowed extensions from the URL specified in GOOSE_ALLOWLIST env var +fn fetch_allowed_extensions() -> Option { + match env::var("GOOSE_ALLOWLIST") { + Err(_) => { + // Environment variable not set, no allowlist to enforce + None + } + Ok(url) => match reqwest::blocking::get(&url) { + Err(e) => { + eprintln!("Failed to fetch allowlist: {}", e); + None + } + Ok(response) if !response.status().is_success() => { + eprintln!("Failed to fetch allowlist, status: {}", response.status()); + None + } + Ok(response) => match response.text() { + Err(e) => { + eprintln!("Failed to read allowlist response: {}", e); + None + } + Ok(text) => match serde_yaml::from_str::(&text) { + Ok(allowed) => Some(allowed), + Err(e) => { + eprintln!("Failed to parse allowlist YAML: {}", e); + None + } + }, + }, + }, + } +} + +/// Gets the cached allowed extensions or fetches them if not yet cached +fn get_allowed_extensions() -> &'static Option { + ALLOWED_EXTENSIONS.get_or_init(fetch_allowed_extensions) +} + +/// Checks if a command is allowed based on the allowlist +fn is_command_allowed(cmd: &str, args: &[String]) -> bool { + is_command_allowed_with_allowlist(&make_full_cmd(cmd, args), get_allowed_extensions()) +} + +fn make_full_cmd(cmd: &str, args: &[String]) -> String { + // trim each arg string to remove any leading/trailing whitespace + let args_trimmed = args.iter().map(|arg| arg.trim()).collect::>(); + + format!("{} {}", cmd.trim(), args_trimmed.join(" ").trim()) +} + +/// Normalizes a command name by removing common executable extensions (.exe, .cmd, .bat) +/// This makes the allowlist more portable across different operating systems +fn normalize_command_name(cmd: &str) -> String { + cmd.replace(".exe", "") + .replace(".cmd", "") + .replace(".bat", "") + .replace(" -y ", " ") + .replace(" -y", "") + .replace("-y ", "") + .to_string() +} + +/// Implementation of command allowlist checking that takes an explicit allowlist parameter +/// This makes it easier to test without relying on global state +fn is_command_allowed_with_allowlist( + cmd: &str, + allowed_extensions: &Option, +) -> bool { + // Extract the first part of the command (before any spaces) + let first_part = cmd.split_whitespace().next().unwrap_or(cmd); + + // Extract the base command name (last part of the path) + let cmd_base_with_ext = Path::new(first_part) + .file_name() + .and_then(|name| name.to_str()) + .unwrap_or(first_part); + + // Normalize the command name by removing extensions like .exe or .cmd + let cmd_base = normalize_command_name(cmd_base_with_ext); + + // Special case: Always allow commands ending with "/goosed" or equal to "goosed" + // But still enforce that it's in the same directory as the current executable + if cmd_base == "goosed" { + // Only allow exact matches (no arguments) + if cmd == first_part { + // For absolute paths, check that it's in the same directory as the current executable + if (first_part.contains('/') || first_part.contains('\\')) + && !first_part.starts_with("./") + { + let current_exe = std::env::current_exe().unwrap(); + let current_exe_dir = current_exe.parent().unwrap(); + let expected_path = current_exe_dir.join("goosed").to_str().unwrap().to_string(); + + // Normalize both paths before comparing + let normalized_cmd_path = normalize_command_name(first_part); + let normalized_expected_path = normalize_command_name(&expected_path); + + if normalized_cmd_path == normalized_expected_path { + return true; + } + // If the path doesn't match, don't allow it + println!("Goosed not in expected directory: {}", cmd); + println!("Expected path: {}", expected_path); + return false; + } else { + // For non-path goosed or relative paths, allow it + return true; + } + } + return false; + } + + match allowed_extensions { + // No allowlist configured, allow all commands + None => true, + + // Empty allowlist, allow all commands + Some(extensions) if extensions.extensions.is_empty() => true, + + // Check against the allowlist + Some(extensions) => { + // Strip out the Goose.app/Contents/Resources/bin/ prefix if present + let mut cmd_to_check = cmd.to_string(); + + // Check if the command path contains Goose.app/Contents/Resources/bin/ + if cmd_to_check.contains("Goose.app/Contents/Resources/bin/") { + // Find the position of "Goose.app/Contents/Resources/bin/" + if let Some(idx) = cmd_to_check.find("Goose.app/Contents/Resources/bin/") { + // Extract only the part after "Goose.app/Contents/Resources/bin/" + cmd_to_check = cmd_to_check + [(idx + "Goose.app/Contents/Resources/bin/".len())..] + .to_string(); + } + } else { + // Only apply the path check if we're not dealing with a Goose.app path + // Check that the command exists as a peer command to current executable directory + // Only apply this check if the command includes a path separator + let current_exe = std::env::current_exe().unwrap(); + let current_exe_dir = current_exe.parent().unwrap(); + let expected_path = current_exe_dir + .join(&cmd_base) + .to_str() + .unwrap() + .to_string(); + + // Normalize both paths before comparing + let normalized_cmd_path = normalize_command_name(first_part); + + if (first_part.contains('/') || first_part.contains('\\')) + && normalized_cmd_path != expected_path + && !cmd_to_check.contains("Goose.app/Contents/Resources/bin/") + { + println!("Command not in expected directory: {}", cmd); + return false; + } + + // Remove current_exe_dir + "/" from the cmd to clean it up + let path_to_trim = format!("{}/", current_exe_dir.to_str().unwrap()); + cmd_to_check = cmd_to_check.replace(&path_to_trim, ""); + } + + println!("Command to check after path trimming: {}", cmd_to_check); + + // Remove @version suffix from command parts + let parts: Vec<&str> = cmd_to_check.split_whitespace().collect(); + let mut cleaned_parts: Vec = Vec::new(); + + for part in parts { + if part.contains('@') { + // Keep only the part before the @ symbol + if let Some(base_part) = part.split('@').next() { + cleaned_parts.push(base_part.to_string()); + } else { + cleaned_parts.push(part.to_string()); + } + } else { + cleaned_parts.push(part.to_string()); + } + } + + // Reconstruct the command without version suffixes + cmd_to_check = cleaned_parts.join(" "); + + println!("Command to check after @version removal: {}", cmd_to_check); + + // Normalize the command before comparing with allowlist entries + let normalized_cmd = normalize_command_name(&cmd_to_check); + + println!("Final normalized command: {}", normalized_cmd); + + extensions.extensions.iter().any(|entry| { + let normalized_entry = normalize_command_name(&entry.command); + normalized_cmd == normalized_entry + }) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::env; + + #[test] + fn test_normalize_command_name() { + // Test removing .exe extension + assert_eq!(normalize_command_name("goosed.exe"), "goosed"); + assert_eq!( + normalize_command_name("/path/to/goosed.exe"), + "/path/to/goosed" + ); + + // Test removing .cmd extension + assert_eq!(normalize_command_name("script.cmd"), "script"); + assert_eq!( + normalize_command_name("/path/to/script.cmd"), + "/path/to/script" + ); + + assert_eq!(normalize_command_name("batch.bat"), "batch"); + + assert_eq!(normalize_command_name("npx -y thing"), "npx thing"); + assert_eq!( + normalize_command_name("/path/to/batch.bat thing"), + "/path/to/batch thing" + ); + + // Test with no extension + assert_eq!(normalize_command_name("goosed"), "goosed"); + assert_eq!(normalize_command_name("/path/to/goosed"), "/path/to/goosed"); + } + + // Create a test allowlist with the given commands + fn create_test_allowlist(commands: &[&str]) -> Option { + if commands.is_empty() { + return Some(AllowedExtensions { extensions: vec![] }); + } + + let entries = commands + .iter() + .enumerate() + .map(|(i, cmd)| ExtensionAllowlistEntry { + id: format!("test-{}", i), + command: cmd.to_string(), + }) + .collect(); + + Some(AllowedExtensions { + extensions: entries, + }) + } + + #[test] + fn test_make_full() { + assert_eq!( + make_full_cmd("uvx", &vec!["mcp_slack".to_string()]), + "uvx mcp_slack" + ); + assert_eq!( + make_full_cmd("uvx", &vec!["mcp_slack ".to_string()]), + "uvx mcp_slack" + ); + assert_eq!( + make_full_cmd( + "uvx", + &vec!["mcp_slack".to_string(), "--verbose".to_string()] + ), + "uvx mcp_slack --verbose" + ); + assert_eq!( + make_full_cmd( + "uvx", + &vec!["mcp_slack".to_string(), " --verbose".to_string()] + ), + "uvx mcp_slack --verbose" + ); + } + + #[test] + fn test_command_allowed_when_matching() { + let allowlist = create_test_allowlist(&[ + "uvx something", + "uvx mcp_slack", + "npx mcp_github", + "minecraft", + ]); + + // Test with exact command matches + assert!(is_command_allowed_with_allowlist( + "uvx something", + &allowlist + )); + + // Test with exact command matches + assert!(is_command_allowed_with_allowlist("minecraft", &allowlist)); + + assert!(is_command_allowed_with_allowlist( + "uvx mcp_slack", + &allowlist + )); + assert!(is_command_allowed_with_allowlist( + "npx mcp_github", + &allowlist + )); + + // Get the current executable directory for reference + let current_exe = std::env::current_exe().unwrap(); + let current_exe_dir = current_exe.parent().unwrap(); + + // Create a full path command that would be in the current executable directory + // For testing purposes, we'll use a direct path to the command in the allowlist + let full_path_cmd = current_exe_dir + .join("uvx my_mcp") + .to_str() + .unwrap() + .to_string(); + + // Create a test allowlist with the command name (without path) + let path_test_allowlist = create_test_allowlist(&["uvx my_mcp"]); + + // This should be allowed because the path is correct and the base command matches + println!( + "Current executable directory: {}", + current_exe_dir.to_str().unwrap() + ); + println!("Path test allowlist: {:?}", path_test_allowlist); + assert!(is_command_allowed_with_allowlist( + &full_path_cmd, + &path_test_allowlist + )); + + // Test with additional arguments - should NOT match because we require exact matches + assert!(!is_command_allowed_with_allowlist( + "uvx mcp_slack --verbose --flag=value", + &allowlist + )); + + // Test with a path that doesn't match the current directory - should fail + assert!(!is_command_allowed_with_allowlist( + "/Users/username/path/to/uvx mcp_slack", + &allowlist + )); + + // These should NOT match with exact matching + assert!(!is_command_allowed_with_allowlist( + "uvx other_command", + &allowlist + )); + assert!(!is_command_allowed_with_allowlist( + "prefix_npx mcp_github", + &allowlist + )); + } + + #[test] + fn test_command_allowed_simple() { + let allowlist = create_test_allowlist(&[ + "uvx something", + "uvx mcp_slack", + "npx mcp_github", + "minecraft", + ]); + + // Test with version, anything @version can be stripped when matching + assert!(is_command_allowed_with_allowlist( + "npx -y mcp_github@latest", + &allowlist + )); + } + + #[test] + fn test_command_allowed_flexible() { + let allowlist = create_test_allowlist(&[ + "uvx something", + "uvx mcp_slack", + "npx -y mcp_github", + "npx -y mcp_hammer start", + "minecraft", + ]); + + // Test with version, anything @version can be stripped when matching + assert!(is_command_allowed_with_allowlist( + "uvx something@1.0.13", + &allowlist + )); + + // Test with shim path - 'Goose.app/Contents/Resources/bin/' and before can be stripped to get the command to match + assert!(is_command_allowed_with_allowlist( + "/private/var/folders/fq/rd_cb6/T/AppTranslocation/EA0195/d/Goose.app/Contents/Resources/bin/uvx something", + &allowlist + )); + + // Test with shim path & latest version + assert!(is_command_allowed_with_allowlist( + "/private/var/folders/fq/rd_cb6/T/AppTranslocation/EA0195/d/Goose.app/Contents/Resources/bin/uvx something@latest", + &allowlist + )); + + // Test with exact command matches + assert!(is_command_allowed_with_allowlist( + "uvx something", + &allowlist + )); + + // Test with -y added, it is allowed (ie doesn't matter if we see a -y in there) + assert!(is_command_allowed_with_allowlist( + "npx -y mcp_github@latest", + &allowlist + )); + + // Test with -y added, and a version and parameter, it is allowed (npx mcp_hammer start is allowed) + assert!(is_command_allowed_with_allowlist( + "npx -y mcp_hammer@latest start", + &allowlist + )); + + // Test with shim path & latest version + assert!(is_command_allowed_with_allowlist( + "/private/var/folders/fq/rd_cb6/T/AppTranslocation/EA0195/d/Goose.app/Contents/Resources/bin/npx -y mcp_hammer@latest start", + &allowlist + )); + } + + #[test] + fn test_command_not_allowed_when_not_matching() { + let allowlist = + create_test_allowlist(&["uvx something", "uvx mcp_slack", "npx mcp_github"]); + + // These should not be allowed + assert!(!is_command_allowed_with_allowlist( + "/Users/username/path/to/uvx_malicious", + &allowlist + )); + assert!(!is_command_allowed_with_allowlist( + "unauthorized_command", + &allowlist + )); + assert!(!is_command_allowed_with_allowlist("/bin/bash", &allowlist)); + assert!(!is_command_allowed_with_allowlist( + "uvx unauthorized", + &allowlist + )); + } + + #[test] + fn test_all_commands_allowed_when_no_allowlist() { + // Empty allowlist should allow all commands + let empty_allowlist = create_test_allowlist(&[]); + assert!(is_command_allowed_with_allowlist( + "any_command_should_be_allowed", + &empty_allowlist + )); + + // No allowlist should allow all commands + assert!(is_command_allowed_with_allowlist( + "any_command_should_be_allowed", + &None + )); + } + + #[test] + fn test_goosed_special_case() { + // Create a restrictive allowlist that doesn't include goosed + let allowlist = create_test_allowlist(&["uvx mcp_slack"]); + + // Get the current executable directory for goosed path testing + let current_exe = std::env::current_exe().unwrap(); + let current_exe_dir = current_exe.parent().unwrap(); + let goosed_path = current_exe_dir.join("goosed").to_str().unwrap().to_string(); + let goosed_exe_path = current_exe_dir + .join("goosed.exe") + .to_str() + .unwrap() + .to_string(); + + // This should be allowed because it's goosed in the correct directory + assert!(is_command_allowed_with_allowlist(&goosed_path, &allowlist)); + + // This should also be allowed because it's goosed.exe in the correct directory + assert!(is_command_allowed_with_allowlist( + &goosed_exe_path, + &allowlist + )); + + // These should NOT be allowed because they're in the wrong directory + assert!(!is_command_allowed_with_allowlist( + "/usr/local/bin/goosed", + &allowlist + )); + assert!(!is_command_allowed_with_allowlist( + "/Users/username/path/to/goosed", + &allowlist + )); + + // Commands with arguments should NOT be allowed - we require exact matches + assert!(!is_command_allowed_with_allowlist( + "/Users/username/path/to/goosed --flag value", + &allowlist + )); + + // Simple goosed without path should be allowed + assert!(is_command_allowed_with_allowlist("./goosed", &allowlist)); + assert!(is_command_allowed_with_allowlist("goosed", &allowlist)); + + // These should NOT be allowed because they don't end with "/goosed" + assert!(!is_command_allowed_with_allowlist( + "/usr/local/bin/goosed-extra", + &allowlist + )); + assert!(!is_command_allowed_with_allowlist( + "/usr/local/bin/not-goosed", + &allowlist + )); + assert!(!is_command_allowed_with_allowlist( + "goosed-extra", + &allowlist + )); + } + + #[test] + fn test_fetch_allowed_extensions_from_url() { + // Start a mock server - we need to use a blocking approach since fetch_allowed_extensions is blocking + let server = std::net::TcpListener::bind("127.0.0.1:0").unwrap(); + let port = server.local_addr().unwrap().port(); + let server_url = format!("http://127.0.0.1:{}", port); + let server_path = "/allowed_extensions.yaml"; + + // Define the mock response + let yaml_content = r#"extensions: + - id: slack + command: uvx mcp_slack + - id: github + command: uvx mcp_github +"#; + + // Spawn a thread to handle the request + let handle = std::thread::spawn(move || { + let (stream, _) = server.accept().unwrap(); + let mut buf_reader = std::io::BufReader::new(&stream); + let mut request_line = String::new(); + std::io::BufRead::read_line(&mut buf_reader, &mut request_line).unwrap(); + + // Very simple HTTP response + let response = format!( + "HTTP/1.1 200 OK\r\nContent-Length: {}\r\nContent-Type: text/yaml\r\n\r\n{}", + yaml_content.len(), + yaml_content + ); + + let mut writer = std::io::BufWriter::new(&stream); + std::io::Write::write_all(&mut writer, response.as_bytes()).unwrap(); + std::io::Write::flush(&mut writer).unwrap(); + }); + + // Set the environment variable to point to our mock server + env::set_var("GOOSE_ALLOWLIST", format!("{}{}", server_url, server_path)); + + // Give the server a moment to start + std::thread::sleep(std::time::Duration::from_millis(100)); + + // Call the function that fetches from the URL + let allowed_extensions = fetch_allowed_extensions(); + + // Verify the result + assert!(allowed_extensions.is_some()); + let extensions = allowed_extensions.unwrap(); + assert_eq!(extensions.extensions.len(), 2); + assert_eq!(extensions.extensions[0].id, "slack"); + assert_eq!(extensions.extensions[0].command, "uvx mcp_slack"); + assert_eq!(extensions.extensions[1].id, "github"); + assert_eq!(extensions.extensions[1].command, "uvx mcp_github"); + + // Clean up + env::remove_var("GOOSE_ALLOWLIST"); + + // Wait for the server thread to complete + handle.join().unwrap(); + } +}