From c6d0bc671fae0825082b77f9987fb92477f386a7 Mon Sep 17 00:00:00 2001 From: Sam Osborn Date: Thu, 1 May 2025 11:00:54 -0500 Subject: [PATCH] feat(bench): add configurable output directory for benchmarks (#2290) --- crates/goose-bench/src/bench_config.rs | 2 ++ crates/goose-bench/src/bench_work_dir.rs | 31 ++++++++++++++++--- .../goose-bench/src/runners/bench_runner.rs | 25 +++++++++++++-- crates/goose-cli/src/cli.rs | 8 ++++- 4 files changed, 57 insertions(+), 9 deletions(-) diff --git a/crates/goose-bench/src/bench_config.rs b/crates/goose-bench/src/bench_config.rs index d3289605..fa582a76 100644 --- a/crates/goose-bench/src/bench_config.rs +++ b/crates/goose-bench/src/bench_config.rs @@ -30,6 +30,7 @@ pub struct BenchRunConfig { pub include_dirs: Vec, pub repeat: Option, pub run_id: Option, + pub output_dir: Option, pub eval_result_filename: String, pub run_summary_filename: String, pub env_file: Option, @@ -63,6 +64,7 @@ impl Default for BenchRunConfig { include_dirs: vec![], repeat: Some(2), run_id: None, + output_dir: None, eval_result_filename: "eval-results.json".to_string(), run_summary_filename: "run-results-summary.json".to_string(), env_file: None, diff --git a/crates/goose-bench/src/bench_work_dir.rs b/crates/goose-bench/src/bench_work_dir.rs index f832033e..a995e119 100644 --- a/crates/goose-bench/src/bench_work_dir.rs +++ b/crates/goose-bench/src/bench_work_dir.rs @@ -1,3 +1,4 @@ +use anyhow::Context; use chrono::Local; use include_dir::{include_dir, Dir}; use serde::{Deserialize, Serialize}; @@ -53,15 +54,35 @@ impl BenchmarkWorkDir { } } - pub fn init_experiment() { + pub fn init_experiment(output_dir: PathBuf) -> anyhow::Result<()> { + if !output_dir.is_absolute() { + anyhow::bail!( + "Internal Error: init_experiment received a non-absolute path: {}", + output_dir.display() + ); + } + // create experiment folder let current_time = Local::now().format("%H:%M:%S").to_string(); let current_date = Local::now().format("%Y-%m-%d").to_string(); - let exp_name = format!("{}-{}", ¤t_date, current_time); - let base_path = PathBuf::from(format!("./benchmark-{}", exp_name)); - fs::create_dir_all(&base_path).unwrap(); - std::env::set_current_dir(&base_path).unwrap(); + let exp_folder_name = format!("benchmark-{}-{}", ¤t_date, ¤t_time); + let base_path = output_dir.join(exp_folder_name); + + fs::create_dir_all(&base_path).with_context(|| { + format!( + "Failed to create benchmark directory: {}", + base_path.display() + ) + })?; + std::env::set_current_dir(&base_path).with_context(|| { + format!( + "Failed to change working directory to: {}", + base_path.display() + ) + })?; + Ok(()) } + pub fn canonical_dirs(include_dirs: Vec) -> Vec { include_dirs .iter() diff --git a/crates/goose-bench/src/runners/bench_runner.rs b/crates/goose-bench/src/runners/bench_runner.rs index b826afb4..a48620e6 100644 --- a/crates/goose-bench/src/runners/bench_runner.rs +++ b/crates/goose-bench/src/runners/bench_runner.rs @@ -3,6 +3,7 @@ use crate::bench_work_dir::BenchmarkWorkDir; use crate::eval_suites::EvaluationSuite; use crate::runners::model_runner::ModelRunner; use crate::utilities::{await_process_exits, parallel_bench_cmd}; +use anyhow::Context; use std::path::PathBuf; #[derive(Clone)] @@ -11,9 +12,27 @@ pub struct BenchRunner { } impl BenchRunner { - pub fn new(config: PathBuf) -> anyhow::Result { - let config = BenchRunConfig::from(config)?; - BenchmarkWorkDir::init_experiment(); + pub fn new(config_path: PathBuf) -> anyhow::Result { + let config = BenchRunConfig::from(config_path.clone())?; + + let resolved_output_dir = match &config.output_dir { + Some(path) => { + if !path.is_absolute() { + anyhow::bail!( + "Config Error in '{}': 'output_dir' must be an absolute path, but found relative path: {}", + config_path.display(), + path.display() + ); + } + path.clone() + } + None => std::env::current_dir().context( + "Failed to get current working directory to use as default output directory", + )?, + }; + + BenchmarkWorkDir::init_experiment(resolved_output_dir)?; + config.save("config.cfg".to_string()); Ok(BenchRunner { config }) } diff --git a/crates/goose-cli/src/cli.rs b/crates/goose-cli/src/cli.rs index e400dfbe..b0109bd0 100644 --- a/crates/goose-cli/src/cli.rs +++ b/crates/goose-cli/src/cli.rs @@ -533,7 +533,13 @@ pub async fn cli() -> Result<()> { Some(Command::Bench { cmd }) => { match cmd { BenchCommand::Selectors { config } => BenchRunner::list_selectors(config)?, - BenchCommand::InitConfig { name } => BenchRunConfig::default().save(name), + BenchCommand::InitConfig { name } => { + let mut config = BenchRunConfig::default(); + let cwd = + std::env::current_dir().expect("Failed to get current working directory"); + config.output_dir = Some(cwd); + config.save(name); + } BenchCommand::Run { config } => BenchRunner::new(config)?.run()?, BenchCommand::EvalModel { config } => ModelRunner::from(config)?.run()?, BenchCommand::ExecEval { config } => {