[feat]: improve file search tools to add globsearch / grep tools (#3368)

Co-authored-by: Alice Hau <ahau@squareup.com>
This commit is contained in:
Alice Hau
2025-07-15 09:45:39 -04:00
committed by GitHub
parent b444b5f0a3
commit e048f7d6f7
3 changed files with 154 additions and 4 deletions

1
Cargo.lock generated
View File

@@ -3627,6 +3627,7 @@ dependencies = [
"chrono",
"docx-rs",
"etcetera",
"glob",
"google-apis-common",
"google-docs1",
"google-drive3",

View File

@@ -62,6 +62,7 @@ utoipa = { version = "4.1", optional = true }
hyper = "1"
serde_with = "3"
which = "6.0"
glob = "0.3"
[dev-dependencies]

View File

@@ -158,10 +158,9 @@ impl DeveloperRouter {
sourcing files do not persist between tool calls. So you may need to repeat them each time by
stringing together commands, e.g. `cd example && ls` or `source env/bin/activate && pip install numpy`
**Important**: Use ripgrep - `rg` - when you need to locate a file or a code reference, other solutions
may show ignored or hidden files. For example *do not* use `find` or `ls -r`
- List files by name: `rg --files | rg <filename>`
- List files that contain a regex: `rg '<regex>' -l`
- Restrictions: Avoid find, grep, cat, head, tail, ls - use dedicated tools instead (Grep, Glob, Read, LS)
- Multiple commands: Use ; or && to chain commands, avoid newlines
- Pathnames: Use absolute paths and avoid cd unless explicitly requested
"#},
};
@@ -178,6 +177,88 @@ impl DeveloperRouter {
None,
);
let glob_tool = Tool::new(
"glob".to_string(),
indoc! {r#"
Search for files using glob patterns.
This tool provides fast file pattern matching using glob syntax.
Returns matching file paths sorted by modification time.
Examples:
- `*.rs` - Find all Rust files in current directory
- `src/**/*.py` - Find all Python files recursively in src directory
- `**/test*.js` - Find all JavaScript test files recursively
**Important**: Use this tool instead of shell commands like `find` or `ls -r` for file searching,
as it properly handles ignored files and is more efficient. This tool respects .gooseignore patterns.
Use this tool when you need to locate files by name patterns rather than content.
"#}.to_string(),
json!({
"type": "object",
"required": ["pattern"],
"properties": {
"pattern": {"type": "string", "description": "The glob pattern to search for"},
"path": {"type": "string", "description": "The directory to search in (defaults to current directory)"}
}
}),
Some(ToolAnnotations {
title: Some("Search files by pattern".to_string()),
read_only_hint: true,
destructive_hint: false,
idempotent_hint: true,
open_world_hint: false,
}),
);
let grep_tool = Tool::new(
"grep".to_string(),
indoc! {r#"
Execute file content search commands using ripgrep, grep, or find.
Use this tool to run search commands that look for content within files. The tool
executes your command directly and filters results to respect .gooseignore patterns.
**Recommended tools and usage:**
**ripgrep (rg)** - Fast, recommended for most searches:
- List files containing pattern: `rg -l "pattern"`
- Case-insensitive search: `rg -i "pattern"`
- Search specific file types: `rg "pattern" --glob "*.js"`
- Show matches with context: `rg "pattern" -C 3`
- List files by name: `rg --files | rg <filename>`
- List files that contain a regex: `rg '<regex>' -l`
- Sort by modification time: `rg -l "pattern" --sort modified`
**grep** - Traditional Unix tool:
- Recursive search: `grep -r "pattern" .`
- List files only: `grep -rl "pattern" .`
- Include specific files: `grep -r "pattern" --include="*.py"`
**find + grep** - When you need complex file filtering:
- `find . -name "*.py" -exec grep -l "pattern" {} \;`
- `find . -type f -newer file.txt -exec grep "pattern" {} \;`
**Important**: Use this tool instead of the shell tool for search commands, as it
properly filters results to respect ignored files.
"#}
.to_string(),
json!({
"type": "object",
"required": ["command"],
"properties": {
"command": {"type": "string", "description": "The search command to execute (rg, grep, find, etc.)"}
}
}),
Some(ToolAnnotations {
title: Some("Search file contents".to_string()),
read_only_hint: true,
destructive_hint: false,
idempotent_hint: true,
open_world_hint: false,
}),
);
// Create text editor tool with different descriptions based on editor API configuration
let (text_editor_desc, str_replace_command) = if let Some(ref editor) = editor_model {
(
@@ -483,6 +564,8 @@ impl DeveloperRouter {
Self {
tools: vec![
bash_tool,
glob_tool,
grep_tool,
text_editor_tool,
list_windows_tool,
screen_capture_tool,
@@ -672,6 +755,69 @@ impl DeveloperRouter {
])
}
async fn glob(&self, params: Value) -> Result<Vec<Content>, ToolError> {
let pattern =
params
.get("pattern")
.and_then(|v| v.as_str())
.ok_or(ToolError::InvalidParameters(
"The pattern string is required".to_string(),
))?;
let search_path = params.get("path").and_then(|v| v.as_str()).unwrap_or(".");
let full_pattern = if search_path == "." {
pattern.to_string()
} else {
format!("{}/{}", search_path.trim_end_matches('/'), pattern)
};
let glob_result = glob::glob(&full_pattern)
.map_err(|e| ToolError::InvalidParameters(format!("Invalid glob pattern: {}", e)))?;
let mut file_paths_with_metadata = Vec::new();
for entry in glob_result {
match entry {
Ok(path) => {
// Check if the path should be ignored
if !self.is_ignored(&path) {
// Get file metadata for sorting by modification time
if let Ok(metadata) = std::fs::metadata(&path) {
if metadata.is_file() {
let modified = metadata
.modified()
.unwrap_or(std::time::SystemTime::UNIX_EPOCH);
file_paths_with_metadata.push((path, modified));
}
}
}
}
Err(e) => {
tracing::warn!("Error reading glob entry: {}", e);
}
}
}
// Sort by modification time (newest first)
file_paths_with_metadata.sort_by(|a, b| b.1.cmp(&a.1));
// Extract just the file paths
let file_paths: Vec<String> = file_paths_with_metadata
.into_iter()
.map(|(path, _)| path.to_string_lossy().to_string())
.collect();
let result = file_paths.join("\n");
Ok(vec![
Content::text(result.clone()).with_audience(vec![Role::Assistant]),
Content::text(result)
.with_audience(vec![Role::User])
.with_priority(0.0),
])
}
async fn text_editor(&self, params: Value) -> Result<Vec<Content>, ToolError> {
let command = params
.get("command")
@@ -1438,6 +1584,8 @@ impl Router for DeveloperRouter {
Box::pin(async move {
match tool_name.as_str() {
"shell" => this.bash(arguments, notifier).await,
"glob" => this.glob(arguments).await,
"grep" => this.bash(arguments, notifier).await,
"text_editor" => this.text_editor(arguments).await,
"list_windows" => this.list_windows(arguments).await,
"screen_capture" => this.screen_capture(arguments).await,