mirror of
https://github.com/aljazceru/goose.git
synced 2025-12-23 00:54:22 +01:00
feat: goose bench framework for functional and regression testing
Co-authored-by: Zaki Ali <zaki@squareup.com>
This commit is contained in:
@@ -2,12 +2,15 @@ use anyhow::Result;
|
||||
use clap::{Args, Parser, Subcommand};
|
||||
|
||||
use goose::config::Config;
|
||||
|
||||
use goose_cli::commands::agent_version::AgentCommand;
|
||||
use goose_cli::commands::bench::{list_suites, run_benchmark};
|
||||
use goose_cli::commands::configure::handle_configure;
|
||||
use goose_cli::commands::info::handle_info;
|
||||
use goose_cli::commands::mcp::run_server;
|
||||
use goose_cli::logging::setup_logging;
|
||||
use goose_cli::session;
|
||||
use goose_cli::session::build_session;
|
||||
use goose_cli::{commands::agent_version::AgentCommand, session};
|
||||
use std::io::{self, Read};
|
||||
use std::path::PathBuf;
|
||||
|
||||
@@ -194,6 +197,66 @@ enum Command {
|
||||
#[arg(short, long, help = "Enforce to re-configure goose during update")]
|
||||
reconfigure: bool,
|
||||
},
|
||||
|
||||
Bench {
|
||||
#[arg(
|
||||
short = 's',
|
||||
long = "suites",
|
||||
value_name = "BENCH_SUITE_NAME",
|
||||
help = "Run this list of bench-suites.",
|
||||
long_help = "Specify a comma-separated list of evaluation-suite names to be run.",
|
||||
value_delimiter = ','
|
||||
)]
|
||||
suites: Vec<String>,
|
||||
|
||||
#[arg(
|
||||
short = 'i',
|
||||
long = "include-dir",
|
||||
value_name = "DIR_NAME",
|
||||
action = clap::ArgAction::Append,
|
||||
long_help = "Make one or more dirs available to all bench suites. Specify either a single dir-name, a comma-separated list of dir-names, or use this multiple instances of this flag to specify multiple dirs.",
|
||||
value_delimiter = ','
|
||||
)]
|
||||
include_dirs: Vec<PathBuf>,
|
||||
|
||||
#[arg(
|
||||
long = "repeat",
|
||||
value_name = "QUANTITY",
|
||||
long_help = "Number of times to repeat the benchmark run.",
|
||||
default_value = "1"
|
||||
)]
|
||||
repeat: usize,
|
||||
|
||||
#[arg(
|
||||
long = "list",
|
||||
value_name = "LIST",
|
||||
help = "List all available bench suites."
|
||||
)]
|
||||
list: bool,
|
||||
|
||||
#[arg(
|
||||
long = "output",
|
||||
short = 'o',
|
||||
value_name = "FILE",
|
||||
help = "Save benchmark results to a file"
|
||||
)]
|
||||
output: Option<PathBuf>,
|
||||
|
||||
#[arg(
|
||||
long = "format",
|
||||
value_name = "FORMAT",
|
||||
help = "Output format (text, json)",
|
||||
default_value = "text"
|
||||
)]
|
||||
format: String,
|
||||
|
||||
#[arg(
|
||||
long = "summary",
|
||||
help = "Show only summary results",
|
||||
action = clap::ArgAction::SetTrue
|
||||
)]
|
||||
summary: bool,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(clap::ValueEnum, Clone, Debug)]
|
||||
@@ -232,6 +295,7 @@ async fn main() -> Result<()> {
|
||||
builtin,
|
||||
)
|
||||
.await;
|
||||
|
||||
setup_logging(session.session_file().file_stem().and_then(|s| s.to_str()))?;
|
||||
let _ = session.interactive(None).await;
|
||||
return Ok(());
|
||||
@@ -290,6 +354,56 @@ async fn main() -> Result<()> {
|
||||
goose_cli::commands::update::update(canary, reconfigure)?;
|
||||
return Ok(());
|
||||
}
|
||||
Some(Command::Bench {
|
||||
suites,
|
||||
include_dirs,
|
||||
repeat,
|
||||
list,
|
||||
output,
|
||||
format,
|
||||
summary,
|
||||
}) => {
|
||||
if list {
|
||||
let suites = list_suites().await?;
|
||||
for suite in suites.keys() {
|
||||
println!("{}: {}", suite, suites.get(suite).unwrap());
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
let suites = if suites.is_empty() {
|
||||
vec!["core".to_string()]
|
||||
} else {
|
||||
suites
|
||||
};
|
||||
let current_dir = std::env::current_dir()?;
|
||||
|
||||
for i in 0..repeat {
|
||||
if repeat > 1 {
|
||||
println!("\nRun {} of {}:", i + 1, repeat);
|
||||
}
|
||||
let results = run_benchmark(suites.clone(), include_dirs.clone()).await?;
|
||||
|
||||
// Handle output based on format
|
||||
let output_str = match format.as_str() {
|
||||
"json" => serde_json::to_string_pretty(&results)?,
|
||||
_ => results.to_string(), // Uses Display impl
|
||||
};
|
||||
|
||||
// Save to file if specified
|
||||
if let Some(path) = &output {
|
||||
std::fs::write(current_dir.join(path), &output_str)?;
|
||||
println!("Results saved to: {}", path.display());
|
||||
} else {
|
||||
// Print to console
|
||||
if summary {
|
||||
println!("{}", results.summary());
|
||||
} else {
|
||||
println!("{}", output_str);
|
||||
}
|
||||
}
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
None => {
|
||||
if !Config::global().exists() {
|
||||
let _ = handle_configure().await;
|
||||
|
||||
Reference in New Issue
Block a user