diff --git a/Cargo.lock b/Cargo.lock index 2f3c59dd..9982d5ba 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,17 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" +[[package]] +name = "aes" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0" +dependencies = [ + "cfg-if", + "cipher", + "cpufeatures", +] + [[package]] name = "ahash" version = "0.8.11" @@ -896,6 +907,15 @@ dependencies = [ "generic-array", ] +[[package]] +name = "block-padding" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8894febbff9f758034a5b8e12d87918f56dfc64a8e1fe757d65e29041538d93" +dependencies = [ + "generic-array", +] + [[package]] name = "brotli" version = "7.0.0" @@ -951,6 +971,12 @@ version = "3.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" +[[package]] +name = "bytecount" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce" + [[package]] name = "bytemuck" version = "1.22.0" @@ -997,6 +1023,15 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" +[[package]] +name = "cbc" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26b52a9543ae338f279b96b0b9fed9c8093744685043739079ce85cd58f289a6" +dependencies = [ + "cipher", +] + [[package]] name = "cc" version = "1.2.16" @@ -1087,6 +1122,16 @@ dependencies = [ "half", ] +[[package]] +name = "cipher" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" +dependencies = [ + "crypto-common", + "inout", +] + [[package]] name = "clang-sys" version = "1.8.1" @@ -2205,6 +2250,7 @@ dependencies = [ "indoc", "kill_tree", "lazy_static", + "lopdf", "mcp-core", "mcp-server", "once_cell", @@ -2860,6 +2906,16 @@ version = "2.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd" +[[package]] +name = "inout" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01" +dependencies = [ + "block-padding", + "generic-array", +] + [[package]] name = "interpolate_name" version = "0.2.4" @@ -3171,6 +3227,30 @@ dependencies = [ "imgref", ] +[[package]] +name = "lopdf" +version = "0.35.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c7c1d3350d071cb86987a6bcb205c7019a0eb70dcad92b454fec722cca8d68b" +dependencies = [ + "aes", + "cbc", + "chrono", + "encoding_rs", + "flate2", + "indexmap 2.7.1", + "itoa", + "log", + "md-5", + "nom", + "nom_locate", + "rangemap", + "rayon", + "thiserror 2.0.11", + "time", + "weezl", +] + [[package]] name = "macro_rules_attribute" version = "0.2.0" @@ -3303,6 +3383,16 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if", + "digest", +] + [[package]] name = "memchr" version = "2.7.4" @@ -3461,6 +3551,17 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "nom_locate" +version = "4.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e3c83c053b0713da60c5b8de47fe8e494fe3ece5267b2f23090a07a053ba8f3" +dependencies = [ + "bytecount", + "memchr", + "nom", +] + [[package]] name = "noop_proc_macro" version = "0.3.0" @@ -4152,6 +4253,12 @@ dependencies = [ "getrandom 0.2.15", ] +[[package]] +name = "rangemap" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60fcc7d6849342eff22c4350c8b9a989ee8ceabc4b481253e8946b9fe83d684" + [[package]] name = "rav1e" version = "0.7.1" diff --git a/crates/goose-mcp/Cargo.toml b/crates/goose-mcp/Cargo.toml index 864ff82b..7286774c 100644 --- a/crates/goose-mcp/Cargo.toml +++ b/crates/goose-mcp/Cargo.toml @@ -39,6 +39,7 @@ regex = "1.11.1" once_cell = "1.20.2" ignore = "0.4" temp-env = "0.3" +lopdf = "0.35.0" [dev-dependencies] serial_test = "3.0.0" diff --git a/crates/goose-mcp/src/computercontroller/mod.rs b/crates/goose-mcp/src/computercontroller/mod.rs index 832814b6..6a1688df 100644 --- a/crates/goose-mcp/src/computercontroller/mod.rs +++ b/crates/goose-mcp/src/computercontroller/mod.rs @@ -19,6 +19,8 @@ use mcp_core::{ use mcp_server::router::CapabilitiesBuilder; use mcp_server::Router; +mod pdf_tool; + mod platform; use platform::{create_system_automation, SystemAutomation}; @@ -232,6 +234,33 @@ impl ComputerControllerRouter { }), ); + let pdf_tool = Tool::new( + "pdf_tool", + indoc! {r#" + Process PDF files to extract text and images. + Supports operations: + - extract_text: Extract all text content from the PDF + - extract_images: Extract and save embedded images to PNG files + + Use this when there is a .pdf file or files that need to be processed. + "#}, + json!({ + "type": "object", + "required": ["path", "operation"], + "properties": { + "path": { + "type": "string", + "description": "Path to the PDF file" + }, + "operation": { + "type": "string", + "enum": ["extract_text", "extract_images"], + "description": "Operation to perform on the PDF" + } + } + }), + ); + // choose_app_strategy().cache_dir() // - macOS/Linux: ~/.cache/goose/computer_controller/ // - Windows: ~\AppData\Local\Block\goose\cache\computer_controller\ @@ -359,6 +388,7 @@ impl ComputerControllerRouter { quick_script_tool, computer_control_tool, cache_tool, + pdf_tool, ], cache_dir, active_resources: Arc::new(Mutex::new(HashMap::new())), @@ -653,6 +683,20 @@ impl ComputerControllerRouter { } // Implement cache tool functionality + async fn pdf_tool(&self, params: Value) -> Result, ToolError> { + let path = params + .get("path") + .and_then(|v| v.as_str()) + .ok_or_else(|| ToolError::InvalidParameters("Missing 'path' parameter".into()))?; + + let operation = params + .get("operation") + .and_then(|v| v.as_str()) + .ok_or_else(|| ToolError::InvalidParameters("Missing 'operation' parameter".into()))?; + + crate::computercontroller::pdf_tool::pdf_tool(path, operation, &self.cache_dir).await + } + async fn cache(&self, params: Value) -> Result, ToolError> { let command = params .get("command") @@ -764,6 +808,7 @@ impl Router for ComputerControllerRouter { "automation_script" => this.quick_script(arguments).await, "computer_control" => this.computer_control(arguments).await, "cache" => this.cache(arguments).await, + "pdf_tool" => this.pdf_tool(arguments).await, _ => Err(ToolError::NotFound(format!("Tool {} not found", tool_name))), } }) diff --git a/crates/goose-mcp/src/computercontroller/pdf_tool.rs b/crates/goose-mcp/src/computercontroller/pdf_tool.rs new file mode 100644 index 00000000..f25dde64 --- /dev/null +++ b/crates/goose-mcp/src/computercontroller/pdf_tool.rs @@ -0,0 +1,422 @@ +use lopdf::{content::Content as PdfContent, Document, Object}; +use mcp_core::{Content, ToolError}; +use std::{fs, path::Path}; + +pub async fn pdf_tool( + path: &str, + operation: &str, + cache_dir: &Path, +) -> Result, ToolError> { + // Open and parse the PDF file + let doc = Document::load(path) + .map_err(|e| ToolError::ExecutionError(format!("Failed to open PDF file: {}", e)))?; + + let result = match operation { + "extract_text" => { + let mut text = String::new(); + + // Iterate over each page in the document + for (page_num, page_id) in doc.get_pages() { + text.push_str(&format!("Page {}:\n", page_num)); + + // Try to get text from page contents + if let Ok(page_obj) = doc.get_object(page_id) { + if let Ok(page_dict) = page_obj.as_dict() { + // Try to get text from Contents stream + if let Ok(contents) = + page_dict.get(b"Contents").and_then(|c| c.as_reference()) + { + if let Ok(content_obj) = doc.get_object(contents) { + if let Ok(stream) = content_obj.as_stream() { + if let Ok(content_data) = stream.get_plain_content() { + if let Ok(content) = PdfContent::decode(&content_data) { + // Process each operation in the content stream + for operation in content.operations { + match operation.operator.as_ref() { + // "Tj" operator: show text + "Tj" => { + for operand in operation.operands { + if let Object::String(ref bytes, _) = + operand + { + if let Ok(s) = + std::str::from_utf8(bytes) + { + text.push_str(s); + } + } + } + text.push(' '); + } + // "TJ" operator: show text with positioning + "TJ" => { + if let Some(Object::Array(ref arr)) = + operation.operands.first() + { + let mut last_was_text = false; + for element in arr { + match element { + Object::String( + ref bytes, + _, + ) => { + if let Ok(s) = + std::str::from_utf8( + bytes, + ) + { + if last_was_text { + text.push(' '); + } + text.push_str(s); + last_was_text = true; + } + } + Object::Integer(offset) => { + // Large negative offsets often indicate word spacing + if *offset < -100 { + text.push(' '); + last_was_text = false; + } + } + Object::Real(offset) => { + if *offset < -100.0 { + text.push(' '); + last_was_text = false; + } + } + _ => {} + } + } + text.push(' '); + } + } + _ => (), // Ignore other operators + } + } + } + } + } + } + } + } + } + text.push('\n'); + } + + if text.trim().is_empty() { + "No text found in PDF".to_string() + } else { + format!("Extracted text from PDF:\n\n{}", text) + } + } + + "extract_images" => { + let cache_dir = cache_dir.join("pdf_images"); + fs::create_dir_all(&cache_dir).map_err(|e| { + ToolError::ExecutionError(format!("Failed to create image cache directory: {}", e)) + })?; + + let mut images = Vec::new(); + let mut image_count = 0; + + // Helper function to determine file extension based on stream dict + fn get_image_extension(dict: &lopdf::Dictionary) -> &'static str { + if let Ok(filter) = dict.get(b"Filter") { + match filter { + Object::Name(name) => { + match name.as_slice() { + b"DCTDecode" => ".jpg", + b"JBIG2Decode" => ".jbig2", + b"JPXDecode" => ".jp2", + b"CCITTFaxDecode" => ".tiff", + b"FlateDecode" => { + // PNG-like images often use FlateDecode + // Check color space to confirm + if let Ok(cs) = dict.get(b"ColorSpace") { + if let Ok(name) = cs.as_name() { + if name == b"DeviceRGB" || name == b"DeviceGray" { + return ".png"; + } + } + } + ".raw" + } + _ => ".raw", + } + } + Object::Array(filters) => { + // If multiple filters, check the last one + if let Some(Object::Name(name)) = filters.last() { + match name.as_slice() { + b"DCTDecode" => return ".jpg", + b"JPXDecode" => return ".jp2", + _ => {} + } + } + ".raw" + } + _ => ".raw", + } + } else { + ".raw" + } + } + + // Process each page + for (page_num, page_id) in doc.get_pages() { + let page = doc.get_object(page_id).map_err(|e| { + ToolError::ExecutionError(format!("Failed to get page {}: {}", page_num, e)) + })?; + + let page_dict = page.as_dict().map_err(|e| { + ToolError::ExecutionError(format!( + "Failed to get page dict {}: {}", + page_num, e + )) + })?; + + // Get page resources - handle both direct dict and reference + let resources = match page_dict.get(b"Resources") { + Ok(res) => match res { + Object::Dictionary(dict) => Ok(dict), + Object::Reference(id) => doc + .get_object(*id) + .map_err(|e| { + ToolError::ExecutionError(format!( + "Failed to get resource reference: {}", + e + )) + }) + .and_then(|obj| { + obj.as_dict().map_err(|e| { + ToolError::ExecutionError(format!( + "Resource reference is not a dictionary: {}", + e + )) + }) + }), + _ => Err(ToolError::ExecutionError( + "Resources is neither dictionary nor reference".to_string(), + )), + }, + Err(e) => Err(ToolError::ExecutionError(format!( + "Failed to get Resources: {}", + e + ))), + }?; + + // Look for XObject dictionary - handle both direct dict and reference + let xobjects = match resources.get(b"XObject") { + Ok(xobj) => match xobj { + Object::Dictionary(dict) => Ok(dict), + Object::Reference(id) => doc + .get_object(*id) + .map_err(|e| { + ToolError::ExecutionError(format!( + "Failed to get XObject reference: {}", + e + )) + }) + .and_then(|obj| { + obj.as_dict().map_err(|e| { + ToolError::ExecutionError(format!( + "XObject reference is not a dictionary: {}", + e + )) + }) + }), + _ => Err(ToolError::ExecutionError( + "XObject is neither dictionary nor reference".to_string(), + )), + }, + Err(e) => Err(ToolError::ExecutionError(format!( + "Failed to get XObject: {}", + e + ))), + }; + + if let Ok(xobjects) = xobjects { + for (name, xobject) in xobjects.iter() { + let xobject_id = xobject.as_reference().map_err(|_| { + ToolError::ExecutionError("Failed to get XObject reference".to_string()) + })?; + + let xobject = doc.get_object(xobject_id).map_err(|e| { + ToolError::ExecutionError(format!("Failed to get XObject: {}", e)) + })?; + + if let Ok(stream) = xobject.as_stream() { + // Check if it's an image + if let Ok(subtype) = + stream.dict.get(b"Subtype").and_then(|s| s.as_name()) + { + if subtype == b"Image" { + let extension = get_image_extension(&stream.dict); + + // Get image metadata + let width = stream + .dict + .get(b"Width") + .and_then(|w| w.as_i64()) + .unwrap_or(0); + let height = stream + .dict + .get(b"Height") + .and_then(|h| h.as_i64()) + .unwrap_or(0); + let bpc = stream + .dict + .get(b"BitsPerComponent") + .and_then(|b| b.as_i64()) + .unwrap_or(0); + + // Get the image data + if let Ok(data) = stream.get_plain_content() { + let image_path = cache_dir.join(format!( + "page{}_obj{}_{}{}", + page_num, + xobject_id.0, + String::from_utf8_lossy(name), + extension + )); + + fs::write(&image_path, &data).map_err(|e| { + ToolError::ExecutionError(format!( + "Failed to write image: {}", + e + )) + })?; + + images.push(format!( + "Saved image to: {} ({}x{}, {} bits per component)", + image_path.display(), + width, + height, + bpc + )); + image_count += 1; + } + } + } + } + } + } + } + + if images.is_empty() { + "No images found in PDF".to_string() + } else { + format!("Found {} images:\n{}", image_count, images.join("\n")) + } + } + + _ => { + return Err(ToolError::InvalidParameters(format!( + "Invalid operation: {}. Valid operations are: 'extract_text', 'extract_images'", + operation + ))) + } + }; + + Ok(vec![Content::text(result)]) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::path::PathBuf; + + #[tokio::test] + async fn test_pdf_text_extraction() { + let test_pdf_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("src/computercontroller/tests/data/test.pdf"); + let cache_dir = tempfile::tempdir().unwrap().into_path(); + + println!("Testing text extraction from: {}", test_pdf_path.display()); + + let result = pdf_tool(test_pdf_path.to_str().unwrap(), "extract_text", &cache_dir).await; + + assert!(result.is_ok(), "PDF text extraction should succeed"); + let content = result.unwrap(); + assert!(!content.is_empty(), "Extracted text should not be empty"); + let text = content[0].as_text().unwrap(); + println!("Extracted text:\n{}", text); + assert!(text.contains("Page 1"), "Should contain page marker"); + assert!( + text.contains("This is a test PDF"), + "Should contain expected test content" + ); + } + + #[tokio::test] + async fn test_pdf_image_extraction() { + let test_pdf_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("src/computercontroller/tests/data/test_image.pdf"); + let cache_dir = tempfile::tempdir().unwrap().into_path(); + + println!("Testing image extraction from: {}", test_pdf_path.display()); + + // Now try image extraction + let result = pdf_tool( + test_pdf_path.to_str().unwrap(), + "extract_images", + &cache_dir, + ) + .await; + + println!("Image extraction result: {:?}", result); + assert!(result.is_ok(), "PDF image extraction should succeed"); + let content = result.unwrap(); + assert!( + !content.is_empty(), + "Image extraction result should not be empty" + ); + let text = content[0].as_text().unwrap(); + println!("Extracted content: {}", text); + + // Should either find images or explicitly state none were found + assert!( + text.contains("Saved image to:") || text.contains("No images found"), + "Should either save images or report none found" + ); + + // If we found images, verify they exist + if text.contains("Saved image to:") { + // Extract the file path from the output + let file_path = text + .lines() + .find(|line| line.contains("Saved image to:")) + .and_then(|line| line.split(": ").nth(1)) + .and_then(|path| path.split(" (").next()) + .expect("Should have a valid file path"); + + println!("Verifying image file exists: {}", file_path); + assert!(PathBuf::from(file_path).exists(), "Image file should exist"); + } + } + + #[tokio::test] + async fn test_pdf_invalid_path() { + let cache_dir = tempfile::tempdir().unwrap().into_path(); + let result = pdf_tool("nonexistent.pdf", "extract_text", &cache_dir).await; + + assert!(result.is_err(), "Should fail with invalid path"); + } + + #[tokio::test] + async fn test_pdf_invalid_operation() { + let test_pdf_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("src/computercontroller/tests/data/test.pdf"); + let cache_dir = tempfile::tempdir().unwrap().into_path(); + + let result = pdf_tool( + test_pdf_path.to_str().unwrap(), + "invalid_operation", + &cache_dir, + ) + .await; + + assert!(result.is_err(), "Should fail with invalid operation"); + } +} diff --git a/crates/goose-mcp/src/computercontroller/tests/data/test.pdf b/crates/goose-mcp/src/computercontroller/tests/data/test.pdf new file mode 100644 index 00000000..e52e7627 Binary files /dev/null and b/crates/goose-mcp/src/computercontroller/tests/data/test.pdf differ diff --git a/crates/goose-mcp/src/computercontroller/tests/data/test_image.pdf b/crates/goose-mcp/src/computercontroller/tests/data/test_image.pdf new file mode 100644 index 00000000..698f9745 Binary files /dev/null and b/crates/goose-mcp/src/computercontroller/tests/data/test_image.pdf differ diff --git a/crates/goose-mcp/src/lib.rs b/crates/goose-mcp/src/lib.rs index 991159f8..1345d2dd 100644 --- a/crates/goose-mcp/src/lib.rs +++ b/crates/goose-mcp/src/lib.rs @@ -7,7 +7,7 @@ pub static APP_STRATEGY: Lazy = Lazy::new(|| AppStrategyArgs { app_name: "goose".to_string(), }); -mod computercontroller; +pub mod computercontroller; mod developer; mod google_drive; mod jetbrains;