mirror of
https://github.com/aljazceru/goose.git
synced 2025-12-19 07:04:21 +01:00
Feat: support auto-including dirs in binary/bench-work-dir (#1576)
This commit is contained in:
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -2254,6 +2254,7 @@ dependencies = [
|
|||||||
"chrono",
|
"chrono",
|
||||||
"ctor",
|
"ctor",
|
||||||
"goose",
|
"goose",
|
||||||
|
"include_dir",
|
||||||
"mcp-core",
|
"mcp-core",
|
||||||
"paste",
|
"paste",
|
||||||
"serde",
|
"serde",
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ serde = { version = "1.0", features = ["derive"] }
|
|||||||
tracing = "0.1"
|
tracing = "0.1"
|
||||||
tracing-subscriber = { version = "0.3", features = ["registry"] }
|
tracing-subscriber = { version = "0.3", features = ["registry"] }
|
||||||
tokio = { version = "1.0", features = ["full"] }
|
tokio = { version = "1.0", features = ["full"] }
|
||||||
|
include_dir = "0.7.4"
|
||||||
|
|
||||||
[target.'cfg(target_os = "windows")'.dependencies]
|
[target.'cfg(target_os = "windows")'.dependencies]
|
||||||
winapi = { version = "0.3", features = ["wincred"] }
|
winapi = { version = "0.3", features = ["wincred"] }
|
||||||
@@ -1,4 +1,5 @@
|
|||||||
use chrono::Local;
|
use chrono::Local;
|
||||||
|
use include_dir::{include_dir, Dir};
|
||||||
use std::fs;
|
use std::fs;
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::io::ErrorKind;
|
use std::io::ErrorKind;
|
||||||
@@ -6,6 +7,8 @@ use std::path::Path;
|
|||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::process::Command;
|
use std::process::Command;
|
||||||
|
|
||||||
|
pub static BUILTIN_EVAL_ASSETS: Dir = include_dir!("$CARGO_MANIFEST_DIR/src");
|
||||||
|
|
||||||
pub struct BenchmarkWorkDir {
|
pub struct BenchmarkWorkDir {
|
||||||
pub base_path: PathBuf,
|
pub base_path: PathBuf,
|
||||||
cwd: PathBuf,
|
cwd: PathBuf,
|
||||||
@@ -16,7 +19,7 @@ pub struct BenchmarkWorkDir {
|
|||||||
|
|
||||||
impl Default for BenchmarkWorkDir {
|
impl Default for BenchmarkWorkDir {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
BenchmarkWorkDir::new("work_dir".to_string(), Vec::new())
|
Self::new("work_dir".to_string(), Vec::new())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl BenchmarkWorkDir {
|
impl BenchmarkWorkDir {
|
||||||
@@ -42,9 +45,11 @@ impl BenchmarkWorkDir {
|
|||||||
// deep copy each dir
|
// deep copy each dir
|
||||||
let _: Vec<_> = dirs
|
let _: Vec<_> = dirs
|
||||||
.iter()
|
.iter()
|
||||||
.map(|d| BenchmarkWorkDir::deep_copy(d.as_path(), base_path.as_path(), true))
|
.map(|d| Self::deep_copy(d.as_path(), base_path.as_path(), true))
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
|
Self::copy_auto_included_dirs(&base_path);
|
||||||
|
|
||||||
std::env::set_current_dir(&base_path).unwrap();
|
std::env::set_current_dir(&base_path).unwrap();
|
||||||
|
|
||||||
BenchmarkWorkDir {
|
BenchmarkWorkDir {
|
||||||
@@ -55,6 +60,13 @@ impl BenchmarkWorkDir {
|
|||||||
eval: None,
|
eval: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
fn copy_auto_included_dirs(dest: &PathBuf) {
|
||||||
|
BUILTIN_EVAL_ASSETS
|
||||||
|
.get_dir("assets")
|
||||||
|
.unwrap()
|
||||||
|
.extract(dest)
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
pub fn cd(&mut self, path: PathBuf) -> anyhow::Result<&mut Self> {
|
pub fn cd(&mut self, path: PathBuf) -> anyhow::Result<&mut Self> {
|
||||||
fs::create_dir_all(&path)?;
|
fs::create_dir_all(&path)?;
|
||||||
std::env::set_current_dir(&path)?;
|
std::env::set_current_dir(&path)?;
|
||||||
@@ -132,7 +144,7 @@ impl BenchmarkWorkDir {
|
|||||||
let here = PathBuf::from(".").canonicalize()?;
|
let here = PathBuf::from(".").canonicalize()?;
|
||||||
let artifact_at_root = self.base_path.clone().join(asset_rel_path);
|
let artifact_at_root = self.base_path.clone().join(asset_rel_path);
|
||||||
|
|
||||||
BenchmarkWorkDir::deep_copy(artifact_at_root.as_path(), here.as_path(), true)?;
|
Self::deep_copy(artifact_at_root.as_path(), here.as_path(), true)?;
|
||||||
Ok(PathBuf::from(path))
|
Ok(PathBuf::from(path))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -115,7 +115,7 @@ async fn run_eval(
|
|||||||
|
|
||||||
async fn run_suite(suite: &str, work_dir: &mut BenchmarkWorkDir) -> anyhow::Result<SuiteResult> {
|
async fn run_suite(suite: &str, work_dir: &mut BenchmarkWorkDir) -> anyhow::Result<SuiteResult> {
|
||||||
let mut suite_result = SuiteResult::new(suite.to_string());
|
let mut suite_result = SuiteResult::new(suite.to_string());
|
||||||
let eval_lock = Mutex::new(0);
|
let eval_lock = Mutex::new(());
|
||||||
|
|
||||||
if let Some(evals) = EvaluationSuiteFactory::create(suite) {
|
if let Some(evals) = EvaluationSuiteFactory::create(suite) {
|
||||||
for eval in evals {
|
for eval in evals {
|
||||||
@@ -152,7 +152,7 @@ pub async fn run_benchmark(
|
|||||||
format!("{}-{}", provider_name, goose_model),
|
format!("{}-{}", provider_name, goose_model),
|
||||||
include_dirs.clone(),
|
include_dirs.clone(),
|
||||||
);
|
);
|
||||||
let suite_lock = Mutex::new(0);
|
let suite_lock = Mutex::new(());
|
||||||
for suite in suites {
|
for suite in suites {
|
||||||
let _unused = suite_lock.lock().await;
|
let _unused = suite_lock.lock().await;
|
||||||
work_dir.set_suite(suite);
|
work_dir.set_suite(suite);
|
||||||
|
|||||||
Reference in New Issue
Block a user