From e048f7d6f714258c9fa1bc109412522167c0f91b Mon Sep 17 00:00:00 2001 From: Alice Hau <110418948+ahau-square@users.noreply.github.com> Date: Tue, 15 Jul 2025 09:45:39 -0400 Subject: [PATCH] [feat]: improve file search tools to add globsearch / grep tools (#3368) Co-authored-by: Alice Hau --- Cargo.lock | 1 + crates/goose-mcp/Cargo.toml | 1 + crates/goose-mcp/src/developer/mod.rs | 156 +++++++++++++++++++++++++- 3 files changed, 154 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ff99e271..409617db 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3627,6 +3627,7 @@ dependencies = [ "chrono", "docx-rs", "etcetera", + "glob", "google-apis-common", "google-docs1", "google-drive3", diff --git a/crates/goose-mcp/Cargo.toml b/crates/goose-mcp/Cargo.toml index b6b77d01..64db2513 100644 --- a/crates/goose-mcp/Cargo.toml +++ b/crates/goose-mcp/Cargo.toml @@ -62,6 +62,7 @@ utoipa = { version = "4.1", optional = true } hyper = "1" serde_with = "3" which = "6.0" +glob = "0.3" [dev-dependencies] diff --git a/crates/goose-mcp/src/developer/mod.rs b/crates/goose-mcp/src/developer/mod.rs index 882dd1d8..ddc81831 100644 --- a/crates/goose-mcp/src/developer/mod.rs +++ b/crates/goose-mcp/src/developer/mod.rs @@ -158,10 +158,9 @@ impl DeveloperRouter { sourcing files do not persist between tool calls. So you may need to repeat them each time by stringing together commands, e.g. `cd example && ls` or `source env/bin/activate && pip install numpy` - **Important**: Use ripgrep - `rg` - when you need to locate a file or a code reference, other solutions - may show ignored or hidden files. For example *do not* use `find` or `ls -r` - - List files by name: `rg --files | rg ` - - List files that contain a regex: `rg '' -l` + - Restrictions: Avoid find, grep, cat, head, tail, ls - use dedicated tools instead (Grep, Glob, Read, LS) + - Multiple commands: Use ; or && to chain commands, avoid newlines + - Pathnames: Use absolute paths and avoid cd unless explicitly requested "#}, }; @@ -178,6 +177,88 @@ impl DeveloperRouter { None, ); + let glob_tool = Tool::new( + "glob".to_string(), + indoc! {r#" + Search for files using glob patterns. + + This tool provides fast file pattern matching using glob syntax. + Returns matching file paths sorted by modification time. + Examples: + - `*.rs` - Find all Rust files in current directory + - `src/**/*.py` - Find all Python files recursively in src directory + - `**/test*.js` - Find all JavaScript test files recursively + + **Important**: Use this tool instead of shell commands like `find` or `ls -r` for file searching, + as it properly handles ignored files and is more efficient. This tool respects .gooseignore patterns. + + Use this tool when you need to locate files by name patterns rather than content. + "#}.to_string(), + json!({ + "type": "object", + "required": ["pattern"], + "properties": { + "pattern": {"type": "string", "description": "The glob pattern to search for"}, + "path": {"type": "string", "description": "The directory to search in (defaults to current directory)"} + } + }), + Some(ToolAnnotations { + title: Some("Search files by pattern".to_string()), + read_only_hint: true, + destructive_hint: false, + idempotent_hint: true, + open_world_hint: false, + }), + ); + + let grep_tool = Tool::new( + "grep".to_string(), + indoc! {r#" + Execute file content search commands using ripgrep, grep, or find. + + Use this tool to run search commands that look for content within files. The tool + executes your command directly and filters results to respect .gooseignore patterns. + + **Recommended tools and usage:** + + **ripgrep (rg)** - Fast, recommended for most searches: + - List files containing pattern: `rg -l "pattern"` + - Case-insensitive search: `rg -i "pattern"` + - Search specific file types: `rg "pattern" --glob "*.js"` + - Show matches with context: `rg "pattern" -C 3` + - List files by name: `rg --files | rg ` + - List files that contain a regex: `rg '' -l` + - Sort by modification time: `rg -l "pattern" --sort modified` + + **grep** - Traditional Unix tool: + - Recursive search: `grep -r "pattern" .` + - List files only: `grep -rl "pattern" .` + - Include specific files: `grep -r "pattern" --include="*.py"` + + **find + grep** - When you need complex file filtering: + - `find . -name "*.py" -exec grep -l "pattern" {} \;` + - `find . -type f -newer file.txt -exec grep "pattern" {} \;` + + **Important**: Use this tool instead of the shell tool for search commands, as it + properly filters results to respect ignored files. + "#} + .to_string(), + json!({ + "type": "object", + "required": ["command"], + "properties": { + "command": {"type": "string", "description": "The search command to execute (rg, grep, find, etc.)"} + } + }), + Some(ToolAnnotations { + title: Some("Search file contents".to_string()), + read_only_hint: true, + destructive_hint: false, + idempotent_hint: true, + open_world_hint: false, + }), + ); + // Create text editor tool with different descriptions based on editor API configuration let (text_editor_desc, str_replace_command) = if let Some(ref editor) = editor_model { ( @@ -483,6 +564,8 @@ impl DeveloperRouter { Self { tools: vec![ bash_tool, + glob_tool, + grep_tool, text_editor_tool, list_windows_tool, screen_capture_tool, @@ -672,6 +755,69 @@ impl DeveloperRouter { ]) } + async fn glob(&self, params: Value) -> Result, ToolError> { + let pattern = + params + .get("pattern") + .and_then(|v| v.as_str()) + .ok_or(ToolError::InvalidParameters( + "The pattern string is required".to_string(), + ))?; + + let search_path = params.get("path").and_then(|v| v.as_str()).unwrap_or("."); + + let full_pattern = if search_path == "." { + pattern.to_string() + } else { + format!("{}/{}", search_path.trim_end_matches('/'), pattern) + }; + + let glob_result = glob::glob(&full_pattern) + .map_err(|e| ToolError::InvalidParameters(format!("Invalid glob pattern: {}", e)))?; + + let mut file_paths_with_metadata = Vec::new(); + + for entry in glob_result { + match entry { + Ok(path) => { + // Check if the path should be ignored + if !self.is_ignored(&path) { + // Get file metadata for sorting by modification time + if let Ok(metadata) = std::fs::metadata(&path) { + if metadata.is_file() { + let modified = metadata + .modified() + .unwrap_or(std::time::SystemTime::UNIX_EPOCH); + file_paths_with_metadata.push((path, modified)); + } + } + } + } + Err(e) => { + tracing::warn!("Error reading glob entry: {}", e); + } + } + } + + // Sort by modification time (newest first) + file_paths_with_metadata.sort_by(|a, b| b.1.cmp(&a.1)); + + // Extract just the file paths + let file_paths: Vec = file_paths_with_metadata + .into_iter() + .map(|(path, _)| path.to_string_lossy().to_string()) + .collect(); + + let result = file_paths.join("\n"); + + Ok(vec![ + Content::text(result.clone()).with_audience(vec![Role::Assistant]), + Content::text(result) + .with_audience(vec![Role::User]) + .with_priority(0.0), + ]) + } + async fn text_editor(&self, params: Value) -> Result, ToolError> { let command = params .get("command") @@ -1438,6 +1584,8 @@ impl Router for DeveloperRouter { Box::pin(async move { match tool_name.as_str() { "shell" => this.bash(arguments, notifier).await, + "glob" => this.glob(arguments).await, + "grep" => this.bash(arguments, notifier).await, "text_editor" => this.text_editor(arguments).await, "list_windows" => this.list_windows(arguments).await, "screen_capture" => this.screen_capture(arguments).await,