mirror of
https://github.com/aljazceru/goose.git
synced 2025-12-18 14:44:21 +01:00
feat: goose to read PDFs (#1522)
This commit is contained in:
107
Cargo.lock
generated
107
Cargo.lock
generated
@@ -17,6 +17,17 @@ version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627"
|
||||
|
||||
[[package]]
|
||||
name = "aes"
|
||||
version = "0.8.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"cipher",
|
||||
"cpufeatures",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ahash"
|
||||
version = "0.8.11"
|
||||
@@ -896,6 +907,15 @@ dependencies = [
|
||||
"generic-array",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "block-padding"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a8894febbff9f758034a5b8e12d87918f56dfc64a8e1fe757d65e29041538d93"
|
||||
dependencies = [
|
||||
"generic-array",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "brotli"
|
||||
version = "7.0.0"
|
||||
@@ -951,6 +971,12 @@ version = "3.17.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf"
|
||||
|
||||
[[package]]
|
||||
name = "bytecount"
|
||||
version = "0.6.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce"
|
||||
|
||||
[[package]]
|
||||
name = "bytemuck"
|
||||
version = "1.22.0"
|
||||
@@ -997,6 +1023,15 @@ version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
|
||||
|
||||
[[package]]
|
||||
name = "cbc"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "26b52a9543ae338f279b96b0b9fed9c8093744685043739079ce85cd58f289a6"
|
||||
dependencies = [
|
||||
"cipher",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.2.16"
|
||||
@@ -1087,6 +1122,16 @@ dependencies = [
|
||||
"half",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cipher"
|
||||
version = "0.4.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad"
|
||||
dependencies = [
|
||||
"crypto-common",
|
||||
"inout",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clang-sys"
|
||||
version = "1.8.1"
|
||||
@@ -2205,6 +2250,7 @@ dependencies = [
|
||||
"indoc",
|
||||
"kill_tree",
|
||||
"lazy_static",
|
||||
"lopdf",
|
||||
"mcp-core",
|
||||
"mcp-server",
|
||||
"once_cell",
|
||||
@@ -2860,6 +2906,16 @@ version = "2.0.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd"
|
||||
|
||||
[[package]]
|
||||
name = "inout"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01"
|
||||
dependencies = [
|
||||
"block-padding",
|
||||
"generic-array",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "interpolate_name"
|
||||
version = "0.2.4"
|
||||
@@ -3171,6 +3227,30 @@ dependencies = [
|
||||
"imgref",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lopdf"
|
||||
version = "0.35.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c7c1d3350d071cb86987a6bcb205c7019a0eb70dcad92b454fec722cca8d68b"
|
||||
dependencies = [
|
||||
"aes",
|
||||
"cbc",
|
||||
"chrono",
|
||||
"encoding_rs",
|
||||
"flate2",
|
||||
"indexmap 2.7.1",
|
||||
"itoa",
|
||||
"log",
|
||||
"md-5",
|
||||
"nom",
|
||||
"nom_locate",
|
||||
"rangemap",
|
||||
"rayon",
|
||||
"thiserror 2.0.11",
|
||||
"time",
|
||||
"weezl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "macro_rules_attribute"
|
||||
version = "0.2.0"
|
||||
@@ -3303,6 +3383,16 @@ dependencies = [
|
||||
"tracing-subscriber",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "md-5"
|
||||
version = "0.10.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"digest",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.7.4"
|
||||
@@ -3461,6 +3551,17 @@ dependencies = [
|
||||
"minimal-lexical",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nom_locate"
|
||||
version = "4.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e3c83c053b0713da60c5b8de47fe8e494fe3ece5267b2f23090a07a053ba8f3"
|
||||
dependencies = [
|
||||
"bytecount",
|
||||
"memchr",
|
||||
"nom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "noop_proc_macro"
|
||||
version = "0.3.0"
|
||||
@@ -4152,6 +4253,12 @@ dependencies = [
|
||||
"getrandom 0.2.15",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rangemap"
|
||||
version = "1.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f60fcc7d6849342eff22c4350c8b9a989ee8ceabc4b481253e8946b9fe83d684"
|
||||
|
||||
[[package]]
|
||||
name = "rav1e"
|
||||
version = "0.7.1"
|
||||
|
||||
@@ -39,6 +39,7 @@ regex = "1.11.1"
|
||||
once_cell = "1.20.2"
|
||||
ignore = "0.4"
|
||||
temp-env = "0.3"
|
||||
lopdf = "0.35.0"
|
||||
|
||||
[dev-dependencies]
|
||||
serial_test = "3.0.0"
|
||||
|
||||
@@ -19,6 +19,8 @@ use mcp_core::{
|
||||
use mcp_server::router::CapabilitiesBuilder;
|
||||
use mcp_server::Router;
|
||||
|
||||
mod pdf_tool;
|
||||
|
||||
mod platform;
|
||||
use platform::{create_system_automation, SystemAutomation};
|
||||
|
||||
@@ -232,6 +234,33 @@ impl ComputerControllerRouter {
|
||||
}),
|
||||
);
|
||||
|
||||
let pdf_tool = Tool::new(
|
||||
"pdf_tool",
|
||||
indoc! {r#"
|
||||
Process PDF files to extract text and images.
|
||||
Supports operations:
|
||||
- extract_text: Extract all text content from the PDF
|
||||
- extract_images: Extract and save embedded images to PNG files
|
||||
|
||||
Use this when there is a .pdf file or files that need to be processed.
|
||||
"#},
|
||||
json!({
|
||||
"type": "object",
|
||||
"required": ["path", "operation"],
|
||||
"properties": {
|
||||
"path": {
|
||||
"type": "string",
|
||||
"description": "Path to the PDF file"
|
||||
},
|
||||
"operation": {
|
||||
"type": "string",
|
||||
"enum": ["extract_text", "extract_images"],
|
||||
"description": "Operation to perform on the PDF"
|
||||
}
|
||||
}
|
||||
}),
|
||||
);
|
||||
|
||||
// choose_app_strategy().cache_dir()
|
||||
// - macOS/Linux: ~/.cache/goose/computer_controller/
|
||||
// - Windows: ~\AppData\Local\Block\goose\cache\computer_controller\
|
||||
@@ -359,6 +388,7 @@ impl ComputerControllerRouter {
|
||||
quick_script_tool,
|
||||
computer_control_tool,
|
||||
cache_tool,
|
||||
pdf_tool,
|
||||
],
|
||||
cache_dir,
|
||||
active_resources: Arc::new(Mutex::new(HashMap::new())),
|
||||
@@ -653,6 +683,20 @@ impl ComputerControllerRouter {
|
||||
}
|
||||
|
||||
// Implement cache tool functionality
|
||||
async fn pdf_tool(&self, params: Value) -> Result<Vec<Content>, ToolError> {
|
||||
let path = params
|
||||
.get("path")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| ToolError::InvalidParameters("Missing 'path' parameter".into()))?;
|
||||
|
||||
let operation = params
|
||||
.get("operation")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| ToolError::InvalidParameters("Missing 'operation' parameter".into()))?;
|
||||
|
||||
crate::computercontroller::pdf_tool::pdf_tool(path, operation, &self.cache_dir).await
|
||||
}
|
||||
|
||||
async fn cache(&self, params: Value) -> Result<Vec<Content>, ToolError> {
|
||||
let command = params
|
||||
.get("command")
|
||||
@@ -764,6 +808,7 @@ impl Router for ComputerControllerRouter {
|
||||
"automation_script" => this.quick_script(arguments).await,
|
||||
"computer_control" => this.computer_control(arguments).await,
|
||||
"cache" => this.cache(arguments).await,
|
||||
"pdf_tool" => this.pdf_tool(arguments).await,
|
||||
_ => Err(ToolError::NotFound(format!("Tool {} not found", tool_name))),
|
||||
}
|
||||
})
|
||||
|
||||
422
crates/goose-mcp/src/computercontroller/pdf_tool.rs
Normal file
422
crates/goose-mcp/src/computercontroller/pdf_tool.rs
Normal file
@@ -0,0 +1,422 @@
|
||||
use lopdf::{content::Content as PdfContent, Document, Object};
|
||||
use mcp_core::{Content, ToolError};
|
||||
use std::{fs, path::Path};
|
||||
|
||||
pub async fn pdf_tool(
|
||||
path: &str,
|
||||
operation: &str,
|
||||
cache_dir: &Path,
|
||||
) -> Result<Vec<Content>, ToolError> {
|
||||
// Open and parse the PDF file
|
||||
let doc = Document::load(path)
|
||||
.map_err(|e| ToolError::ExecutionError(format!("Failed to open PDF file: {}", e)))?;
|
||||
|
||||
let result = match operation {
|
||||
"extract_text" => {
|
||||
let mut text = String::new();
|
||||
|
||||
// Iterate over each page in the document
|
||||
for (page_num, page_id) in doc.get_pages() {
|
||||
text.push_str(&format!("Page {}:\n", page_num));
|
||||
|
||||
// Try to get text from page contents
|
||||
if let Ok(page_obj) = doc.get_object(page_id) {
|
||||
if let Ok(page_dict) = page_obj.as_dict() {
|
||||
// Try to get text from Contents stream
|
||||
if let Ok(contents) =
|
||||
page_dict.get(b"Contents").and_then(|c| c.as_reference())
|
||||
{
|
||||
if let Ok(content_obj) = doc.get_object(contents) {
|
||||
if let Ok(stream) = content_obj.as_stream() {
|
||||
if let Ok(content_data) = stream.get_plain_content() {
|
||||
if let Ok(content) = PdfContent::decode(&content_data) {
|
||||
// Process each operation in the content stream
|
||||
for operation in content.operations {
|
||||
match operation.operator.as_ref() {
|
||||
// "Tj" operator: show text
|
||||
"Tj" => {
|
||||
for operand in operation.operands {
|
||||
if let Object::String(ref bytes, _) =
|
||||
operand
|
||||
{
|
||||
if let Ok(s) =
|
||||
std::str::from_utf8(bytes)
|
||||
{
|
||||
text.push_str(s);
|
||||
}
|
||||
}
|
||||
}
|
||||
text.push(' ');
|
||||
}
|
||||
// "TJ" operator: show text with positioning
|
||||
"TJ" => {
|
||||
if let Some(Object::Array(ref arr)) =
|
||||
operation.operands.first()
|
||||
{
|
||||
let mut last_was_text = false;
|
||||
for element in arr {
|
||||
match element {
|
||||
Object::String(
|
||||
ref bytes,
|
||||
_,
|
||||
) => {
|
||||
if let Ok(s) =
|
||||
std::str::from_utf8(
|
||||
bytes,
|
||||
)
|
||||
{
|
||||
if last_was_text {
|
||||
text.push(' ');
|
||||
}
|
||||
text.push_str(s);
|
||||
last_was_text = true;
|
||||
}
|
||||
}
|
||||
Object::Integer(offset) => {
|
||||
// Large negative offsets often indicate word spacing
|
||||
if *offset < -100 {
|
||||
text.push(' ');
|
||||
last_was_text = false;
|
||||
}
|
||||
}
|
||||
Object::Real(offset) => {
|
||||
if *offset < -100.0 {
|
||||
text.push(' ');
|
||||
last_was_text = false;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
text.push(' ');
|
||||
}
|
||||
}
|
||||
_ => (), // Ignore other operators
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
text.push('\n');
|
||||
}
|
||||
|
||||
if text.trim().is_empty() {
|
||||
"No text found in PDF".to_string()
|
||||
} else {
|
||||
format!("Extracted text from PDF:\n\n{}", text)
|
||||
}
|
||||
}
|
||||
|
||||
"extract_images" => {
|
||||
let cache_dir = cache_dir.join("pdf_images");
|
||||
fs::create_dir_all(&cache_dir).map_err(|e| {
|
||||
ToolError::ExecutionError(format!("Failed to create image cache directory: {}", e))
|
||||
})?;
|
||||
|
||||
let mut images = Vec::new();
|
||||
let mut image_count = 0;
|
||||
|
||||
// Helper function to determine file extension based on stream dict
|
||||
fn get_image_extension(dict: &lopdf::Dictionary) -> &'static str {
|
||||
if let Ok(filter) = dict.get(b"Filter") {
|
||||
match filter {
|
||||
Object::Name(name) => {
|
||||
match name.as_slice() {
|
||||
b"DCTDecode" => ".jpg",
|
||||
b"JBIG2Decode" => ".jbig2",
|
||||
b"JPXDecode" => ".jp2",
|
||||
b"CCITTFaxDecode" => ".tiff",
|
||||
b"FlateDecode" => {
|
||||
// PNG-like images often use FlateDecode
|
||||
// Check color space to confirm
|
||||
if let Ok(cs) = dict.get(b"ColorSpace") {
|
||||
if let Ok(name) = cs.as_name() {
|
||||
if name == b"DeviceRGB" || name == b"DeviceGray" {
|
||||
return ".png";
|
||||
}
|
||||
}
|
||||
}
|
||||
".raw"
|
||||
}
|
||||
_ => ".raw",
|
||||
}
|
||||
}
|
||||
Object::Array(filters) => {
|
||||
// If multiple filters, check the last one
|
||||
if let Some(Object::Name(name)) = filters.last() {
|
||||
match name.as_slice() {
|
||||
b"DCTDecode" => return ".jpg",
|
||||
b"JPXDecode" => return ".jp2",
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
".raw"
|
||||
}
|
||||
_ => ".raw",
|
||||
}
|
||||
} else {
|
||||
".raw"
|
||||
}
|
||||
}
|
||||
|
||||
// Process each page
|
||||
for (page_num, page_id) in doc.get_pages() {
|
||||
let page = doc.get_object(page_id).map_err(|e| {
|
||||
ToolError::ExecutionError(format!("Failed to get page {}: {}", page_num, e))
|
||||
})?;
|
||||
|
||||
let page_dict = page.as_dict().map_err(|e| {
|
||||
ToolError::ExecutionError(format!(
|
||||
"Failed to get page dict {}: {}",
|
||||
page_num, e
|
||||
))
|
||||
})?;
|
||||
|
||||
// Get page resources - handle both direct dict and reference
|
||||
let resources = match page_dict.get(b"Resources") {
|
||||
Ok(res) => match res {
|
||||
Object::Dictionary(dict) => Ok(dict),
|
||||
Object::Reference(id) => doc
|
||||
.get_object(*id)
|
||||
.map_err(|e| {
|
||||
ToolError::ExecutionError(format!(
|
||||
"Failed to get resource reference: {}",
|
||||
e
|
||||
))
|
||||
})
|
||||
.and_then(|obj| {
|
||||
obj.as_dict().map_err(|e| {
|
||||
ToolError::ExecutionError(format!(
|
||||
"Resource reference is not a dictionary: {}",
|
||||
e
|
||||
))
|
||||
})
|
||||
}),
|
||||
_ => Err(ToolError::ExecutionError(
|
||||
"Resources is neither dictionary nor reference".to_string(),
|
||||
)),
|
||||
},
|
||||
Err(e) => Err(ToolError::ExecutionError(format!(
|
||||
"Failed to get Resources: {}",
|
||||
e
|
||||
))),
|
||||
}?;
|
||||
|
||||
// Look for XObject dictionary - handle both direct dict and reference
|
||||
let xobjects = match resources.get(b"XObject") {
|
||||
Ok(xobj) => match xobj {
|
||||
Object::Dictionary(dict) => Ok(dict),
|
||||
Object::Reference(id) => doc
|
||||
.get_object(*id)
|
||||
.map_err(|e| {
|
||||
ToolError::ExecutionError(format!(
|
||||
"Failed to get XObject reference: {}",
|
||||
e
|
||||
))
|
||||
})
|
||||
.and_then(|obj| {
|
||||
obj.as_dict().map_err(|e| {
|
||||
ToolError::ExecutionError(format!(
|
||||
"XObject reference is not a dictionary: {}",
|
||||
e
|
||||
))
|
||||
})
|
||||
}),
|
||||
_ => Err(ToolError::ExecutionError(
|
||||
"XObject is neither dictionary nor reference".to_string(),
|
||||
)),
|
||||
},
|
||||
Err(e) => Err(ToolError::ExecutionError(format!(
|
||||
"Failed to get XObject: {}",
|
||||
e
|
||||
))),
|
||||
};
|
||||
|
||||
if let Ok(xobjects) = xobjects {
|
||||
for (name, xobject) in xobjects.iter() {
|
||||
let xobject_id = xobject.as_reference().map_err(|_| {
|
||||
ToolError::ExecutionError("Failed to get XObject reference".to_string())
|
||||
})?;
|
||||
|
||||
let xobject = doc.get_object(xobject_id).map_err(|e| {
|
||||
ToolError::ExecutionError(format!("Failed to get XObject: {}", e))
|
||||
})?;
|
||||
|
||||
if let Ok(stream) = xobject.as_stream() {
|
||||
// Check if it's an image
|
||||
if let Ok(subtype) =
|
||||
stream.dict.get(b"Subtype").and_then(|s| s.as_name())
|
||||
{
|
||||
if subtype == b"Image" {
|
||||
let extension = get_image_extension(&stream.dict);
|
||||
|
||||
// Get image metadata
|
||||
let width = stream
|
||||
.dict
|
||||
.get(b"Width")
|
||||
.and_then(|w| w.as_i64())
|
||||
.unwrap_or(0);
|
||||
let height = stream
|
||||
.dict
|
||||
.get(b"Height")
|
||||
.and_then(|h| h.as_i64())
|
||||
.unwrap_or(0);
|
||||
let bpc = stream
|
||||
.dict
|
||||
.get(b"BitsPerComponent")
|
||||
.and_then(|b| b.as_i64())
|
||||
.unwrap_or(0);
|
||||
|
||||
// Get the image data
|
||||
if let Ok(data) = stream.get_plain_content() {
|
||||
let image_path = cache_dir.join(format!(
|
||||
"page{}_obj{}_{}{}",
|
||||
page_num,
|
||||
xobject_id.0,
|
||||
String::from_utf8_lossy(name),
|
||||
extension
|
||||
));
|
||||
|
||||
fs::write(&image_path, &data).map_err(|e| {
|
||||
ToolError::ExecutionError(format!(
|
||||
"Failed to write image: {}",
|
||||
e
|
||||
))
|
||||
})?;
|
||||
|
||||
images.push(format!(
|
||||
"Saved image to: {} ({}x{}, {} bits per component)",
|
||||
image_path.display(),
|
||||
width,
|
||||
height,
|
||||
bpc
|
||||
));
|
||||
image_count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if images.is_empty() {
|
||||
"No images found in PDF".to_string()
|
||||
} else {
|
||||
format!("Found {} images:\n{}", image_count, images.join("\n"))
|
||||
}
|
||||
}
|
||||
|
||||
_ => {
|
||||
return Err(ToolError::InvalidParameters(format!(
|
||||
"Invalid operation: {}. Valid operations are: 'extract_text', 'extract_images'",
|
||||
operation
|
||||
)))
|
||||
}
|
||||
};
|
||||
|
||||
Ok(vec![Content::text(result)])
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_pdf_text_extraction() {
|
||||
let test_pdf_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
.join("src/computercontroller/tests/data/test.pdf");
|
||||
let cache_dir = tempfile::tempdir().unwrap().into_path();
|
||||
|
||||
println!("Testing text extraction from: {}", test_pdf_path.display());
|
||||
|
||||
let result = pdf_tool(test_pdf_path.to_str().unwrap(), "extract_text", &cache_dir).await;
|
||||
|
||||
assert!(result.is_ok(), "PDF text extraction should succeed");
|
||||
let content = result.unwrap();
|
||||
assert!(!content.is_empty(), "Extracted text should not be empty");
|
||||
let text = content[0].as_text().unwrap();
|
||||
println!("Extracted text:\n{}", text);
|
||||
assert!(text.contains("Page 1"), "Should contain page marker");
|
||||
assert!(
|
||||
text.contains("This is a test PDF"),
|
||||
"Should contain expected test content"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_pdf_image_extraction() {
|
||||
let test_pdf_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
.join("src/computercontroller/tests/data/test_image.pdf");
|
||||
let cache_dir = tempfile::tempdir().unwrap().into_path();
|
||||
|
||||
println!("Testing image extraction from: {}", test_pdf_path.display());
|
||||
|
||||
// Now try image extraction
|
||||
let result = pdf_tool(
|
||||
test_pdf_path.to_str().unwrap(),
|
||||
"extract_images",
|
||||
&cache_dir,
|
||||
)
|
||||
.await;
|
||||
|
||||
println!("Image extraction result: {:?}", result);
|
||||
assert!(result.is_ok(), "PDF image extraction should succeed");
|
||||
let content = result.unwrap();
|
||||
assert!(
|
||||
!content.is_empty(),
|
||||
"Image extraction result should not be empty"
|
||||
);
|
||||
let text = content[0].as_text().unwrap();
|
||||
println!("Extracted content: {}", text);
|
||||
|
||||
// Should either find images or explicitly state none were found
|
||||
assert!(
|
||||
text.contains("Saved image to:") || text.contains("No images found"),
|
||||
"Should either save images or report none found"
|
||||
);
|
||||
|
||||
// If we found images, verify they exist
|
||||
if text.contains("Saved image to:") {
|
||||
// Extract the file path from the output
|
||||
let file_path = text
|
||||
.lines()
|
||||
.find(|line| line.contains("Saved image to:"))
|
||||
.and_then(|line| line.split(": ").nth(1))
|
||||
.and_then(|path| path.split(" (").next())
|
||||
.expect("Should have a valid file path");
|
||||
|
||||
println!("Verifying image file exists: {}", file_path);
|
||||
assert!(PathBuf::from(file_path).exists(), "Image file should exist");
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_pdf_invalid_path() {
|
||||
let cache_dir = tempfile::tempdir().unwrap().into_path();
|
||||
let result = pdf_tool("nonexistent.pdf", "extract_text", &cache_dir).await;
|
||||
|
||||
assert!(result.is_err(), "Should fail with invalid path");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_pdf_invalid_operation() {
|
||||
let test_pdf_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
.join("src/computercontroller/tests/data/test.pdf");
|
||||
let cache_dir = tempfile::tempdir().unwrap().into_path();
|
||||
|
||||
let result = pdf_tool(
|
||||
test_pdf_path.to_str().unwrap(),
|
||||
"invalid_operation",
|
||||
&cache_dir,
|
||||
)
|
||||
.await;
|
||||
|
||||
assert!(result.is_err(), "Should fail with invalid operation");
|
||||
}
|
||||
}
|
||||
BIN
crates/goose-mcp/src/computercontroller/tests/data/test.pdf
Normal file
BIN
crates/goose-mcp/src/computercontroller/tests/data/test.pdf
Normal file
Binary file not shown.
Binary file not shown.
@@ -7,7 +7,7 @@ pub static APP_STRATEGY: Lazy<AppStrategyArgs> = Lazy::new(|| AppStrategyArgs {
|
||||
app_name: "goose".to_string(),
|
||||
});
|
||||
|
||||
mod computercontroller;
|
||||
pub mod computercontroller;
|
||||
mod developer;
|
||||
mod google_drive;
|
||||
mod jetbrains;
|
||||
|
||||
Reference in New Issue
Block a user