mirror of
https://github.com/aljazceru/goose.git
synced 2025-12-18 22:54:24 +01:00
bugfix: refactor workdirs to be async-safe, and simpler (#1558)
This commit is contained in:
84
Cargo.lock
generated
84
Cargo.lock
generated
@@ -287,9 +287,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "aws-config"
|
name = "aws-config"
|
||||||
version = "1.5.17"
|
version = "1.5.18"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "490aa7465ee685b2ced076bb87ef654a47724a7844e2c7d3af4e749ce5b875dd"
|
checksum = "90aff65e86db5fe300752551c1b015ef72b708ac54bded8ef43d0d53cb7cb0b1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aws-credential-types",
|
"aws-credential-types",
|
||||||
"aws-runtime",
|
"aws-runtime",
|
||||||
@@ -297,7 +297,7 @@ dependencies = [
|
|||||||
"aws-sdk-ssooidc",
|
"aws-sdk-ssooidc",
|
||||||
"aws-sdk-sts",
|
"aws-sdk-sts",
|
||||||
"aws-smithy-async",
|
"aws-smithy-async",
|
||||||
"aws-smithy-http",
|
"aws-smithy-http 0.61.1",
|
||||||
"aws-smithy-json",
|
"aws-smithy-json",
|
||||||
"aws-smithy-runtime",
|
"aws-smithy-runtime",
|
||||||
"aws-smithy-runtime-api",
|
"aws-smithy-runtime-api",
|
||||||
@@ -336,7 +336,7 @@ dependencies = [
|
|||||||
"aws-credential-types",
|
"aws-credential-types",
|
||||||
"aws-sigv4",
|
"aws-sigv4",
|
||||||
"aws-smithy-async",
|
"aws-smithy-async",
|
||||||
"aws-smithy-http",
|
"aws-smithy-http 0.60.12",
|
||||||
"aws-smithy-runtime",
|
"aws-smithy-runtime",
|
||||||
"aws-smithy-runtime-api",
|
"aws-smithy-runtime-api",
|
||||||
"aws-smithy-types",
|
"aws-smithy-types",
|
||||||
@@ -354,15 +354,15 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "aws-sdk-bedrockruntime"
|
name = "aws-sdk-bedrockruntime"
|
||||||
version = "1.75.0"
|
version = "1.76.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "2ddf7475b6f50a1a5be8edb1bcdf6e4ae00feed5b890d14a3f1f0e14d76f5a16"
|
checksum = "b538f72f5ab8d23de44aacd109788c37e268fe9f4d060168714a12514d73b434"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aws-credential-types",
|
"aws-credential-types",
|
||||||
"aws-runtime",
|
"aws-runtime",
|
||||||
"aws-smithy-async",
|
"aws-smithy-async",
|
||||||
"aws-smithy-eventstream",
|
"aws-smithy-eventstream",
|
||||||
"aws-smithy-http",
|
"aws-smithy-http 0.61.1",
|
||||||
"aws-smithy-json",
|
"aws-smithy-json",
|
||||||
"aws-smithy-runtime",
|
"aws-smithy-runtime",
|
||||||
"aws-smithy-runtime-api",
|
"aws-smithy-runtime-api",
|
||||||
@@ -378,14 +378,14 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "aws-sdk-sso"
|
name = "aws-sdk-sso"
|
||||||
version = "1.60.0"
|
version = "1.61.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "60186fab60b24376d3e33b9ff0a43485f99efd470e3b75a9160c849741d63d56"
|
checksum = "e65ff295979977039a25f5a0bf067a64bc5e6aa38f3cef4037cf42516265553c"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aws-credential-types",
|
"aws-credential-types",
|
||||||
"aws-runtime",
|
"aws-runtime",
|
||||||
"aws-smithy-async",
|
"aws-smithy-async",
|
||||||
"aws-smithy-http",
|
"aws-smithy-http 0.61.1",
|
||||||
"aws-smithy-json",
|
"aws-smithy-json",
|
||||||
"aws-smithy-runtime",
|
"aws-smithy-runtime",
|
||||||
"aws-smithy-runtime-api",
|
"aws-smithy-runtime-api",
|
||||||
@@ -400,14 +400,14 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "aws-sdk-ssooidc"
|
name = "aws-sdk-ssooidc"
|
||||||
version = "1.61.0"
|
version = "1.62.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7033130ce1ee13e6018905b7b976c915963755aef299c1521897679d6cd4f8ef"
|
checksum = "91430a60f754f235688387b75ee798ef00cfd09709a582be2b7525ebb5306d4f"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aws-credential-types",
|
"aws-credential-types",
|
||||||
"aws-runtime",
|
"aws-runtime",
|
||||||
"aws-smithy-async",
|
"aws-smithy-async",
|
||||||
"aws-smithy-http",
|
"aws-smithy-http 0.61.1",
|
||||||
"aws-smithy-json",
|
"aws-smithy-json",
|
||||||
"aws-smithy-runtime",
|
"aws-smithy-runtime",
|
||||||
"aws-smithy-runtime-api",
|
"aws-smithy-runtime-api",
|
||||||
@@ -422,14 +422,14 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "aws-sdk-sts"
|
name = "aws-sdk-sts"
|
||||||
version = "1.61.0"
|
version = "1.62.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "c5c1cac7677179d622b4448b0d31bcb359185295dc6fca891920cfb17e2b5156"
|
checksum = "9276e139d39fff5a0b0c984fc2d30f970f9a202da67234f948fda02e5bea1dbe"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aws-credential-types",
|
"aws-credential-types",
|
||||||
"aws-runtime",
|
"aws-runtime",
|
||||||
"aws-smithy-async",
|
"aws-smithy-async",
|
||||||
"aws-smithy-http",
|
"aws-smithy-http 0.61.1",
|
||||||
"aws-smithy-json",
|
"aws-smithy-json",
|
||||||
"aws-smithy-query",
|
"aws-smithy-query",
|
||||||
"aws-smithy-runtime",
|
"aws-smithy-runtime",
|
||||||
@@ -450,7 +450,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "9bfe75fad52793ce6dec0dc3d4b1f388f038b5eb866c8d4d7f3a8e21b5ea5051"
|
checksum = "9bfe75fad52793ce6dec0dc3d4b1f388f038b5eb866c8d4d7f3a8e21b5ea5051"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aws-credential-types",
|
"aws-credential-types",
|
||||||
"aws-smithy-http",
|
"aws-smithy-http 0.60.12",
|
||||||
"aws-smithy-runtime-api",
|
"aws-smithy-runtime-api",
|
||||||
"aws-smithy-types",
|
"aws-smithy-types",
|
||||||
"bytes",
|
"bytes",
|
||||||
@@ -479,9 +479,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "aws-smithy-eventstream"
|
name = "aws-smithy-eventstream"
|
||||||
version = "0.60.6"
|
version = "0.60.7"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8b18559a41e0c909b77625adf2b8c50de480a8041e5e4a3f5f7d177db70abc5a"
|
checksum = "461e5e02f9864cba17cff30f007c2e37ade94d01e87cdb5204e44a84e6d38c17"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aws-smithy-types",
|
"aws-smithy-types",
|
||||||
"bytes",
|
"bytes",
|
||||||
@@ -493,6 +493,26 @@ name = "aws-smithy-http"
|
|||||||
version = "0.60.12"
|
version = "0.60.12"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7809c27ad8da6a6a68c454e651d4962479e81472aa19ae99e59f9aba1f9713cc"
|
checksum = "7809c27ad8da6a6a68c454e651d4962479e81472aa19ae99e59f9aba1f9713cc"
|
||||||
|
dependencies = [
|
||||||
|
"aws-smithy-runtime-api",
|
||||||
|
"aws-smithy-types",
|
||||||
|
"bytes",
|
||||||
|
"bytes-utils",
|
||||||
|
"futures-core",
|
||||||
|
"http 0.2.12",
|
||||||
|
"http-body 0.4.6",
|
||||||
|
"once_cell",
|
||||||
|
"percent-encoding",
|
||||||
|
"pin-project-lite",
|
||||||
|
"pin-utils",
|
||||||
|
"tracing",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "aws-smithy-http"
|
||||||
|
version = "0.61.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e6f276f21c7921fe902826618d1423ae5bf74cf8c1b8472aee8434f3dfd31824"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aws-smithy-eventstream",
|
"aws-smithy-eventstream",
|
||||||
"aws-smithy-runtime-api",
|
"aws-smithy-runtime-api",
|
||||||
@@ -535,7 +555,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "d526a12d9ed61fadefda24abe2e682892ba288c2018bcb38b1b4c111d13f6d92"
|
checksum = "d526a12d9ed61fadefda24abe2e682892ba288c2018bcb38b1b4c111d13f6d92"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aws-smithy-async",
|
"aws-smithy-async",
|
||||||
"aws-smithy-http",
|
"aws-smithy-http 0.60.12",
|
||||||
"aws-smithy-runtime-api",
|
"aws-smithy-runtime-api",
|
||||||
"aws-smithy-types",
|
"aws-smithy-types",
|
||||||
"bytes",
|
"bytes",
|
||||||
@@ -997,9 +1017,9 @@ checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bytes"
|
name = "bytes"
|
||||||
version = "1.10.0"
|
version = "1.10.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f61dac84819c6588b558454b194026eb1f09c293b9036ae9b159e74e73ab6cf9"
|
checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bytes-utils"
|
name = "bytes-utils"
|
||||||
@@ -1730,9 +1750,9 @@ checksum = "1c7a8fb8a9fbf66c1f703fe16184d10ca0ee9d23be5b4436400408ba54a95005"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "either"
|
name = "either"
|
||||||
version = "1.14.0"
|
version = "1.15.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "b7914353092ddf589ad78f25c5c1c21b7f80b0ff8621e7c814c3485b5306da9d"
|
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "encode_unicode"
|
name = "encode_unicode"
|
||||||
@@ -4552,9 +4572,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ring"
|
name = "ring"
|
||||||
version = "0.17.11"
|
version = "0.17.12"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "da5349ae27d3887ca812fb375b45a4fbb36d8d12d2df394968cd86e35683fe73"
|
checksum = "ed9b823fa29b721a59671b41d6b06e66b29e0628e207e8b1c3ceeda701ec928d"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cc",
|
"cc",
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
@@ -5475,9 +5495,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "time"
|
name = "time"
|
||||||
version = "0.3.37"
|
version = "0.3.38"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "35e7868883861bd0e56d9ac6efcaaca0d6d5d82a2a7ec8209ff492c07cf37b21"
|
checksum = "bb041120f25f8fbe8fd2dbe4671c7c2ed74d83be2e7a77529bf7e0790ae3f472"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"deranged",
|
"deranged",
|
||||||
"itoa",
|
"itoa",
|
||||||
@@ -5492,15 +5512,15 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "time-core"
|
name = "time-core"
|
||||||
version = "0.1.2"
|
version = "0.1.3"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3"
|
checksum = "765c97a5b985b7c11d7bc27fa927dc4fe6af3a6dfb021d28deb60d3bf51e76ef"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "time-macros"
|
name = "time-macros"
|
||||||
version = "0.2.19"
|
version = "0.2.20"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "2834e6017e3e5e4b9834939793b282bc03b37a3336245fa820e35e233e2a85de"
|
checksum = "e8093bc3e81c3bc5f7879de09619d06c9a5a5e45ca44dfeeb7225bae38005c5c"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"num-conv",
|
"num-conv",
|
||||||
"time-core",
|
"time-core",
|
||||||
|
|||||||
167
crates/goose-bench/src/bench_work_dir.rs
Normal file
167
crates/goose-bench/src/bench_work_dir.rs
Normal file
@@ -0,0 +1,167 @@
|
|||||||
|
use chrono::Local;
|
||||||
|
use std::fs;
|
||||||
|
use std::io;
|
||||||
|
use std::io::ErrorKind;
|
||||||
|
use std::path::Path;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
use std::process::Command;
|
||||||
|
|
||||||
|
pub struct BenchmarkWorkDir {
|
||||||
|
pub base_path: PathBuf,
|
||||||
|
cwd: PathBuf,
|
||||||
|
run_name: String,
|
||||||
|
suite: Option<String>,
|
||||||
|
eval: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for BenchmarkWorkDir {
|
||||||
|
fn default() -> Self {
|
||||||
|
BenchmarkWorkDir::new("work_dir".to_string(), Vec::new())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl BenchmarkWorkDir {
|
||||||
|
pub fn new(work_dir_name: String, include_dirs: Vec<PathBuf>) -> Self {
|
||||||
|
let base_path = PathBuf::from(format!("./benchmark-{}", work_dir_name));
|
||||||
|
fs::create_dir_all(&base_path).unwrap();
|
||||||
|
|
||||||
|
let current_time = Local::now().format("%H:%M:%S").to_string();
|
||||||
|
let current_date = Local::now().format("%Y-%m-%d").to_string();
|
||||||
|
let run_name = format!("{}-{}", ¤t_date, current_time);
|
||||||
|
|
||||||
|
let mut base_path = PathBuf::from(&base_path).canonicalize().unwrap();
|
||||||
|
base_path.push(run_name.clone());
|
||||||
|
fs::create_dir_all(&base_path).unwrap();
|
||||||
|
base_path.pop();
|
||||||
|
|
||||||
|
// abs paths from dir-strings
|
||||||
|
let dirs = include_dirs
|
||||||
|
.iter()
|
||||||
|
.map(|d| d.canonicalize().unwrap())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
// deep copy each dir
|
||||||
|
let _: Vec<_> = dirs
|
||||||
|
.iter()
|
||||||
|
.map(|d| BenchmarkWorkDir::deep_copy(d.as_path(), base_path.as_path(), true))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
std::env::set_current_dir(&base_path).unwrap();
|
||||||
|
|
||||||
|
BenchmarkWorkDir {
|
||||||
|
base_path: base_path.clone(),
|
||||||
|
cwd: base_path.clone(),
|
||||||
|
run_name,
|
||||||
|
suite: None,
|
||||||
|
eval: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub fn cd(&mut self, path: PathBuf) -> anyhow::Result<&mut Self> {
|
||||||
|
fs::create_dir_all(&path)?;
|
||||||
|
std::env::set_current_dir(&path)?;
|
||||||
|
self.cwd = path;
|
||||||
|
Ok(self)
|
||||||
|
}
|
||||||
|
pub fn set_suite(&mut self, suite: &str) {
|
||||||
|
self.eval = None;
|
||||||
|
self.suite = Some(suite.to_string());
|
||||||
|
|
||||||
|
let mut suite_dir = self.base_path.clone();
|
||||||
|
suite_dir.push(self.run_name.clone());
|
||||||
|
suite_dir.push(suite);
|
||||||
|
|
||||||
|
self.cd(suite_dir.clone()).unwrap_or_else(|_| {
|
||||||
|
panic!("Failed to execute cd into {}", suite_dir.clone().display())
|
||||||
|
});
|
||||||
|
}
|
||||||
|
pub fn set_eval(&mut self, eval: &str) {
|
||||||
|
self.eval = Some(eval.to_string());
|
||||||
|
|
||||||
|
let mut eval_dir = self.base_path.clone();
|
||||||
|
eval_dir.push(self.run_name.clone());
|
||||||
|
eval_dir.push(self.suite.clone().unwrap());
|
||||||
|
eval_dir.push(eval);
|
||||||
|
|
||||||
|
self.cd(eval_dir.clone())
|
||||||
|
.unwrap_or_else(|_| panic!("Failed to execute cd into {}", eval_dir.clone().display()));
|
||||||
|
}
|
||||||
|
|
||||||
|
fn chop_relative_base<P: AsRef<Path>>(path: P) -> anyhow::Result<PathBuf> {
|
||||||
|
let path = path.as_ref();
|
||||||
|
|
||||||
|
// Get the path components as an iterator
|
||||||
|
let mut components = path.components();
|
||||||
|
|
||||||
|
// Check the first component
|
||||||
|
if let Some(first) = components.next() {
|
||||||
|
use std::path::Component;
|
||||||
|
|
||||||
|
match first {
|
||||||
|
Component::ParentDir => Err(anyhow::anyhow!("RelativePathBaseError: Only paths relative to the current working directory are supported.")),
|
||||||
|
// If first component is "."
|
||||||
|
Component::CurDir => Ok(components.collect()),
|
||||||
|
// Otherwise, keep the full path
|
||||||
|
_ => {
|
||||||
|
// Create a new PathBuf
|
||||||
|
let mut result = PathBuf::new();
|
||||||
|
// Add back the first component
|
||||||
|
result.push(first);
|
||||||
|
// Add all remaining components
|
||||||
|
result.extend(components);
|
||||||
|
Ok(result)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Empty path
|
||||||
|
Ok(PathBuf::new())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn fs_get(&mut self, path: String) -> anyhow::Result<PathBuf> {
|
||||||
|
let p = PathBuf::from(&path);
|
||||||
|
if p.exists() {
|
||||||
|
return Ok(PathBuf::from(path));
|
||||||
|
}
|
||||||
|
|
||||||
|
if p.is_absolute() {
|
||||||
|
return Err(anyhow::anyhow!("AbsolutePathError: Only paths relative to the current working directory are supported."));
|
||||||
|
}
|
||||||
|
|
||||||
|
let asset_rel_path = Self::chop_relative_base(p.clone())
|
||||||
|
.unwrap_or_else(|_| panic!("AbsolutePathError: Only paths relative to the current working directory are supported."));
|
||||||
|
|
||||||
|
let here = PathBuf::from(".").canonicalize()?;
|
||||||
|
let artifact_at_root = self.base_path.clone().join(asset_rel_path);
|
||||||
|
|
||||||
|
BenchmarkWorkDir::deep_copy(artifact_at_root.as_path(), here.as_path(), true)?;
|
||||||
|
Ok(PathBuf::from(path))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn deep_copy<P, Q>(src: P, dst: Q, recursive: bool) -> io::Result<()>
|
||||||
|
where
|
||||||
|
P: AsRef<Path>,
|
||||||
|
Q: AsRef<Path>,
|
||||||
|
{
|
||||||
|
let src = src.as_ref();
|
||||||
|
let dst = dst.as_ref();
|
||||||
|
|
||||||
|
let mut cmd = Command::new("cp");
|
||||||
|
|
||||||
|
// Add -r flag if recursive is true
|
||||||
|
if recursive {
|
||||||
|
cmd.arg("-r");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add source and destination paths
|
||||||
|
cmd.arg(src).arg(dst);
|
||||||
|
|
||||||
|
// Execute the command
|
||||||
|
let output = cmd.output()?;
|
||||||
|
|
||||||
|
if output.status.success() {
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
let error_message = String::from_utf8_lossy(&output.stderr).to_string();
|
||||||
|
Err(io::Error::new(ErrorKind::Other, error_message))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,8 +1,8 @@
|
|||||||
// Create a new file called test.txt with the content 'Hello, World!
|
// Create a new file called test.txt with the content 'Hello, World!
|
||||||
|
|
||||||
|
use crate::bench_work_dir::BenchmarkWorkDir;
|
||||||
use crate::eval_suites::{BenchAgent, Evaluation, EvaluationMetric, ExtensionRequirements};
|
use crate::eval_suites::{BenchAgent, Evaluation, EvaluationMetric, ExtensionRequirements};
|
||||||
use crate::register_evaluation;
|
use crate::register_evaluation;
|
||||||
use crate::work_dir::WorkDir;
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use goose::message::MessageContent;
|
use goose::message::MessageContent;
|
||||||
use mcp_core::role::Role;
|
use mcp_core::role::Role;
|
||||||
@@ -22,7 +22,7 @@ impl Evaluation for DeveloperCreateFile {
|
|||||||
async fn run(
|
async fn run(
|
||||||
&self,
|
&self,
|
||||||
mut agent: Box<dyn BenchAgent>,
|
mut agent: Box<dyn BenchAgent>,
|
||||||
_work_dir: &mut WorkDir,
|
_work_dir: &mut BenchmarkWorkDir,
|
||||||
) -> anyhow::Result<Vec<(String, EvaluationMetric)>> {
|
) -> anyhow::Result<Vec<(String, EvaluationMetric)>> {
|
||||||
let mut metrics = Vec::new();
|
let mut metrics = Vec::new();
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
|
use crate::bench_work_dir::BenchmarkWorkDir;
|
||||||
use crate::eval_suites::{BenchAgent, Evaluation, EvaluationMetric, ExtensionRequirements};
|
use crate::eval_suites::{BenchAgent, Evaluation, EvaluationMetric, ExtensionRequirements};
|
||||||
use crate::register_evaluation;
|
use crate::register_evaluation;
|
||||||
use crate::work_dir::WorkDir;
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
// use std::fs;
|
// use std::fs;
|
||||||
|
|
||||||
@@ -17,7 +17,7 @@ impl Evaluation for ExampleEval {
|
|||||||
async fn run(
|
async fn run(
|
||||||
&self,
|
&self,
|
||||||
mut agent: Box<dyn BenchAgent>,
|
mut agent: Box<dyn BenchAgent>,
|
||||||
_work_dir: &mut WorkDir,
|
_work_dir: &mut BenchmarkWorkDir,
|
||||||
) -> anyhow::Result<Vec<(String, EvaluationMetric)>> {
|
) -> anyhow::Result<Vec<(String, EvaluationMetric)>> {
|
||||||
println!("ExampleEval - run");
|
println!("ExampleEval - run");
|
||||||
// let f = work_dir.fs_get(String::from("./arbitrary_dir/arbitrary_file.txt"))?;
|
// let f = work_dir.fs_get(String::from("./arbitrary_dir/arbitrary_file.txt"))?;
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
|
use crate::bench_work_dir::BenchmarkWorkDir;
|
||||||
use crate::eval_suites::{BenchAgent, Evaluation, EvaluationMetric, ExtensionRequirements};
|
use crate::eval_suites::{BenchAgent, Evaluation, EvaluationMetric, ExtensionRequirements};
|
||||||
use crate::register_evaluation;
|
use crate::register_evaluation;
|
||||||
use crate::work_dir::WorkDir;
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use goose::message::MessageContent;
|
use goose::message::MessageContent;
|
||||||
use mcp_core::content::Content;
|
use mcp_core::content::Content;
|
||||||
@@ -21,7 +21,7 @@ impl Evaluation for DeveloperImage {
|
|||||||
async fn run(
|
async fn run(
|
||||||
&self,
|
&self,
|
||||||
mut agent: Box<dyn BenchAgent>,
|
mut agent: Box<dyn BenchAgent>,
|
||||||
_work_dir: &mut WorkDir,
|
_work_dir: &mut BenchmarkWorkDir,
|
||||||
) -> anyhow::Result<Vec<(String, EvaluationMetric)>> {
|
) -> anyhow::Result<Vec<(String, EvaluationMetric)>> {
|
||||||
let mut metrics = Vec::new();
|
let mut metrics = Vec::new();
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
|
use crate::bench_work_dir::BenchmarkWorkDir;
|
||||||
use crate::eval_suites::{BenchAgent, Evaluation, EvaluationMetric, ExtensionRequirements};
|
use crate::eval_suites::{BenchAgent, Evaluation, EvaluationMetric, ExtensionRequirements};
|
||||||
use crate::register_evaluation;
|
use crate::register_evaluation;
|
||||||
use crate::work_dir::WorkDir;
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use goose::message::MessageContent;
|
use goose::message::MessageContent;
|
||||||
use mcp_core::role::Role;
|
use mcp_core::role::Role;
|
||||||
@@ -20,7 +20,7 @@ impl Evaluation for DeveloperListFiles {
|
|||||||
async fn run(
|
async fn run(
|
||||||
&self,
|
&self,
|
||||||
mut agent: Box<dyn BenchAgent>,
|
mut agent: Box<dyn BenchAgent>,
|
||||||
_work_dir: &mut WorkDir,
|
_work_dir: &mut BenchmarkWorkDir,
|
||||||
) -> anyhow::Result<Vec<(String, EvaluationMetric)>> {
|
) -> anyhow::Result<Vec<(String, EvaluationMetric)>> {
|
||||||
let mut metrics = Vec::new();
|
let mut metrics = Vec::new();
|
||||||
|
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
// Create a new file called test.txt with the content 'Hello, World!
|
// Create a new file called test.txt with the content 'Hello, World!
|
||||||
|
|
||||||
|
use crate::bench_work_dir::BenchmarkWorkDir;
|
||||||
use crate::eval_suites::{BenchAgent, Evaluation, EvaluationMetric, ExtensionRequirements};
|
use crate::eval_suites::{BenchAgent, Evaluation, EvaluationMetric, ExtensionRequirements};
|
||||||
use crate::register_evaluation;
|
use crate::register_evaluation;
|
||||||
use crate::work_dir::WorkDir;
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use goose::message::MessageContent;
|
use goose::message::MessageContent;
|
||||||
use mcp_core::role::Role;
|
use mcp_core::role::Role;
|
||||||
@@ -22,7 +22,7 @@ impl Evaluation for MemoryRememberMemory {
|
|||||||
async fn run(
|
async fn run(
|
||||||
&self,
|
&self,
|
||||||
mut agent: Box<dyn BenchAgent>,
|
mut agent: Box<dyn BenchAgent>,
|
||||||
_work_dir: &mut WorkDir,
|
_work_dir: &mut BenchmarkWorkDir,
|
||||||
) -> anyhow::Result<Vec<(String, EvaluationMetric)>> {
|
) -> anyhow::Result<Vec<(String, EvaluationMetric)>> {
|
||||||
let mut metrics = Vec::new();
|
let mut metrics = Vec::new();
|
||||||
|
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
// Create a new file called test.txt with the content 'Hello, World!
|
// Create a new file called test.txt with the content 'Hello, World!
|
||||||
|
|
||||||
|
use crate::bench_work_dir::BenchmarkWorkDir;
|
||||||
use crate::eval_suites::{BenchAgent, Evaluation, EvaluationMetric, ExtensionRequirements};
|
use crate::eval_suites::{BenchAgent, Evaluation, EvaluationMetric, ExtensionRequirements};
|
||||||
use crate::register_evaluation;
|
use crate::register_evaluation;
|
||||||
use crate::work_dir::WorkDir;
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use goose::message::MessageContent;
|
use goose::message::MessageContent;
|
||||||
use mcp_core::role::Role;
|
use mcp_core::role::Role;
|
||||||
@@ -22,7 +22,7 @@ impl Evaluation for ComputerControllerScript {
|
|||||||
async fn run(
|
async fn run(
|
||||||
&self,
|
&self,
|
||||||
mut agent: Box<dyn BenchAgent>,
|
mut agent: Box<dyn BenchAgent>,
|
||||||
_work_dir: &mut WorkDir,
|
_work_dir: &mut BenchmarkWorkDir,
|
||||||
) -> anyhow::Result<Vec<(String, EvaluationMetric)>> {
|
) -> anyhow::Result<Vec<(String, EvaluationMetric)>> {
|
||||||
let mut metrics = Vec::new();
|
let mut metrics = Vec::new();
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
|
use crate::bench_work_dir::BenchmarkWorkDir;
|
||||||
use crate::eval_suites::{BenchAgent, Evaluation, EvaluationMetric, ExtensionRequirements};
|
use crate::eval_suites::{BenchAgent, Evaluation, EvaluationMetric, ExtensionRequirements};
|
||||||
use crate::register_evaluation;
|
use crate::register_evaluation;
|
||||||
use crate::work_dir::WorkDir;
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use std::fs;
|
use std::fs;
|
||||||
|
|
||||||
@@ -18,31 +18,20 @@ impl Evaluation for DeveloperSearchReplace {
|
|||||||
async fn run(
|
async fn run(
|
||||||
&self,
|
&self,
|
||||||
mut agent: Box<dyn BenchAgent>,
|
mut agent: Box<dyn BenchAgent>,
|
||||||
work_dir: &mut WorkDir,
|
work_dir: &mut BenchmarkWorkDir,
|
||||||
) -> anyhow::Result<Vec<(String, EvaluationMetric)>> {
|
) -> anyhow::Result<Vec<(String, EvaluationMetric)>> {
|
||||||
let mut metrics = Vec::new();
|
let mut metrics = Vec::new();
|
||||||
|
|
||||||
// Try to find the assets directory
|
let _target_file = match work_dir.fs_get("./assets/kubernetes_swagger.json".to_string()) {
|
||||||
let assets_dir_path = work_dir.path.join("assets");
|
Ok(file) => file,
|
||||||
let _assets_exists = assets_dir_path.exists();
|
Err(_) => {
|
||||||
|
return Err(anyhow::anyhow!(
|
||||||
// Get the kubernetes_swagger.json file from the assets directory and copy it to the working directory for eval
|
"Could not find kubernetes_swagger.json file"
|
||||||
// so the agent can modify it
|
))
|
||||||
let source_file = work_dir.path.join("assets").join("kubernetes_swagger.json");
|
}
|
||||||
let target_file = std::env::current_dir()
|
};
|
||||||
.unwrap_or_default()
|
let mut source_file = work_dir.base_path.clone();
|
||||||
.join("kubernetes_swagger.json");
|
source_file.push("assets/kubernetes_swagger.json");
|
||||||
|
|
||||||
// Copy the file to the root of the working directory if it doesn't exist there yet
|
|
||||||
if !target_file.exists() && source_file.exists() {
|
|
||||||
println!("Copying file from {:?} to {:?}", source_file, target_file);
|
|
||||||
fs::copy(&source_file, &target_file)?;
|
|
||||||
println!("File copied successfully");
|
|
||||||
} else {
|
|
||||||
return Err(anyhow::anyhow!(
|
|
||||||
"Could not find kubernetes_swagger.json file"
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Send the prompt to modify the file
|
// Send the prompt to modify the file
|
||||||
let _messages = agent.prompt("Remove the io.k8s.api.admissionregistration.v1.ServiceReference definition block and replace with a new definition for io.k8s.api.admissionregistration.v1.FakeServiceReference. Update the fields in the definition as well to be consistent. Don't change the property names. Don't update any references to the old definition. Only modify the definition and it's description to 'FakeServiceReference simulates a reference to a fake service for testing purposes.'.The file to modify is kubernetes_swagger.json.".to_string()).await?;
|
let _messages = agent.prompt("Remove the io.k8s.api.admissionregistration.v1.ServiceReference definition block and replace with a new definition for io.k8s.api.admissionregistration.v1.FakeServiceReference. Update the fields in the definition as well to be consistent. Don't change the property names. Don't update any references to the old definition. Only modify the definition and it's description to 'FakeServiceReference simulates a reference to a fake service for testing purposes.'.The file to modify is kubernetes_swagger.json.".to_string()).await?;
|
||||||
@@ -53,7 +42,7 @@ impl Evaluation for DeveloperSearchReplace {
|
|||||||
.join("kubernetes_swagger.json");
|
.join("kubernetes_swagger.json");
|
||||||
|
|
||||||
// Read the expected patch file from the assets directory
|
// Read the expected patch file from the assets directory
|
||||||
let patch_file_path = work_dir.path.join("assets").join("kubernetes.patch");
|
let patch_file_path = work_dir.base_path.join("assets").join("kubernetes.patch");
|
||||||
if !patch_file_path.exists() {
|
if !patch_file_path.exists() {
|
||||||
return Err(anyhow::anyhow!("Could not find patch file"));
|
return Err(anyhow::anyhow!("Could not find patch file"));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
// Create a new file called test.txt with the content 'Hello, World!
|
// Create a new file called test.txt with the content 'Hello, World!
|
||||||
|
|
||||||
|
use crate::bench_work_dir::BenchmarkWorkDir;
|
||||||
use crate::eval_suites::{BenchAgent, Evaluation, EvaluationMetric, ExtensionRequirements};
|
use crate::eval_suites::{BenchAgent, Evaluation, EvaluationMetric, ExtensionRequirements};
|
||||||
use crate::register_evaluation;
|
use crate::register_evaluation;
|
||||||
use crate::work_dir::WorkDir;
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use goose::message::MessageContent;
|
use goose::message::MessageContent;
|
||||||
use mcp_core::role::Role;
|
use mcp_core::role::Role;
|
||||||
@@ -22,7 +22,7 @@ impl Evaluation for ComputerControllerWebScrape {
|
|||||||
async fn run(
|
async fn run(
|
||||||
&self,
|
&self,
|
||||||
mut agent: Box<dyn BenchAgent>,
|
mut agent: Box<dyn BenchAgent>,
|
||||||
_work_dir: &mut WorkDir,
|
_work_dir: &mut BenchmarkWorkDir,
|
||||||
) -> anyhow::Result<Vec<(String, EvaluationMetric)>> {
|
) -> anyhow::Result<Vec<(String, EvaluationMetric)>> {
|
||||||
let mut metrics = Vec::new();
|
let mut metrics = Vec::new();
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
use crate::work_dir::WorkDir;
|
use crate::bench_work_dir::BenchmarkWorkDir;
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
@@ -42,7 +42,7 @@ pub trait Evaluation: Send + Sync {
|
|||||||
async fn run(
|
async fn run(
|
||||||
&self,
|
&self,
|
||||||
agent: Box<dyn BenchAgent>,
|
agent: Box<dyn BenchAgent>,
|
||||||
run_loc: &mut WorkDir,
|
run_loc: &mut BenchmarkWorkDir,
|
||||||
) -> Result<Vec<(String, EvaluationMetric)>>;
|
) -> Result<Vec<(String, EvaluationMetric)>>;
|
||||||
|
|
||||||
fn name(&self) -> &str;
|
fn name(&self) -> &str;
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
|
pub mod bench_work_dir;
|
||||||
pub mod error_capture;
|
pub mod error_capture;
|
||||||
pub mod eval_suites;
|
pub mod eval_suites;
|
||||||
pub mod reporting;
|
pub mod reporting;
|
||||||
pub mod work_dir;
|
|
||||||
|
|||||||
@@ -1,113 +0,0 @@
|
|||||||
use std::fs;
|
|
||||||
use std::io;
|
|
||||||
use std::path::Path;
|
|
||||||
use std::path::PathBuf;
|
|
||||||
|
|
||||||
pub struct WorkDir {
|
|
||||||
pub path: PathBuf,
|
|
||||||
traversal: Vec<PathBuf>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Default for WorkDir {
|
|
||||||
fn default() -> Self {
|
|
||||||
let path = PathBuf::from(".").canonicalize().unwrap();
|
|
||||||
WorkDir {
|
|
||||||
path: path.clone(),
|
|
||||||
traversal: vec![path.clone()],
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
impl WorkDir {
|
|
||||||
pub fn new(path: &str) -> Self {
|
|
||||||
let path = PathBuf::from(path);
|
|
||||||
WorkDir {
|
|
||||||
path: path.clone(),
|
|
||||||
traversal: vec![path.clone()],
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn at(path: String, include_dirs: Vec<PathBuf>) -> anyhow::Result<WorkDir> {
|
|
||||||
fs::create_dir_all(&path)?;
|
|
||||||
|
|
||||||
let dirs = include_dirs
|
|
||||||
.iter()
|
|
||||||
.map(|d| d.canonicalize().unwrap())
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
|
|
||||||
let p = PathBuf::from(&path).canonicalize()?;
|
|
||||||
let _: Vec<_> = dirs
|
|
||||||
.iter()
|
|
||||||
.map(|d| WorkDir::deep_copy(d.as_path(), p.as_path()))
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
std::env::set_current_dir(&path)?;
|
|
||||||
|
|
||||||
Ok(WorkDir::new(p.to_string_lossy().to_string().as_str()))
|
|
||||||
}
|
|
||||||
pub fn move_to(&mut self, path: String) -> anyhow::Result<&mut Self> {
|
|
||||||
fs::create_dir_all(&path)?;
|
|
||||||
self.traversal.push(PathBuf::from(&path));
|
|
||||||
std::env::set_current_dir(&path)?;
|
|
||||||
Ok(self)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn fs_get(&mut self, path: String) -> anyhow::Result<PathBuf> {
|
|
||||||
let p = Path::new(&path);
|
|
||||||
if !p.exists() {
|
|
||||||
let artifact_at_root = if p.is_dir() {
|
|
||||||
self.traversal[0].clone().join(&path).canonicalize()?
|
|
||||||
} else {
|
|
||||||
self.traversal[0]
|
|
||||||
.clone()
|
|
||||||
.join(p.parent().unwrap_or(Path::new("")))
|
|
||||||
.canonicalize()?
|
|
||||||
};
|
|
||||||
|
|
||||||
let here = PathBuf::from(".").canonicalize()?;
|
|
||||||
|
|
||||||
WorkDir::deep_copy(artifact_at_root.as_path(), here.as_path())?;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(PathBuf::from(path))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn deep_copy(src: &Path, dst: &Path) -> io::Result<()> {
|
|
||||||
// Create the destination directory with the source's name
|
|
||||||
let dst_dir = if let Some(src_name) = src.file_name() {
|
|
||||||
dst.join(src_name)
|
|
||||||
} else {
|
|
||||||
return Err(io::Error::new(
|
|
||||||
io::ErrorKind::InvalidInput,
|
|
||||||
"Source path must have a file name",
|
|
||||||
));
|
|
||||||
};
|
|
||||||
|
|
||||||
// Create the destination directory if it doesn't exist
|
|
||||||
if !dst_dir.exists() {
|
|
||||||
fs::create_dir_all(&dst_dir)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy each entry in the source directory
|
|
||||||
for entry in fs::read_dir(src)? {
|
|
||||||
let entry = entry?;
|
|
||||||
let ty = entry.file_type()?;
|
|
||||||
let src_path = entry.path();
|
|
||||||
let dst_path = dst_dir.join(entry.file_name());
|
|
||||||
|
|
||||||
if ty.is_dir() {
|
|
||||||
WorkDir::deep_copy(&src_path, dst_path.parent().unwrap())?;
|
|
||||||
} else {
|
|
||||||
fs::copy(&src_path, &dst_path)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Drop for WorkDir {
|
|
||||||
fn drop(&mut self) {
|
|
||||||
self.traversal.pop();
|
|
||||||
std::env::set_current_dir("..").unwrap()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,13 +1,12 @@
|
|||||||
use crate::session::build_session;
|
use crate::session::build_session;
|
||||||
use crate::Session;
|
use crate::Session;
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use chrono::Local;
|
|
||||||
use goose::config::Config;
|
use goose::config::Config;
|
||||||
use goose::message::Message;
|
use goose::message::Message;
|
||||||
|
use goose_bench::bench_work_dir::BenchmarkWorkDir;
|
||||||
use goose_bench::error_capture::ErrorCaptureLayer;
|
use goose_bench::error_capture::ErrorCaptureLayer;
|
||||||
use goose_bench::eval_suites::{BenchAgent, BenchAgentError, Evaluation, EvaluationSuiteFactory};
|
use goose_bench::eval_suites::{BenchAgent, BenchAgentError, Evaluation, EvaluationSuiteFactory};
|
||||||
use goose_bench::reporting::{BenchmarkResults, EvaluationResult, SuiteResult};
|
use goose_bench::reporting::{BenchmarkResults, EvaluationResult, SuiteResult};
|
||||||
use goose_bench::work_dir::WorkDir;
|
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
@@ -77,48 +76,47 @@ impl BenchAgent for BenchAgentWrapper {
|
|||||||
|
|
||||||
async fn run_eval(
|
async fn run_eval(
|
||||||
evaluation: Box<dyn Evaluation>,
|
evaluation: Box<dyn Evaluation>,
|
||||||
work_dir: &mut WorkDir,
|
work_dir: &mut BenchmarkWorkDir,
|
||||||
) -> anyhow::Result<EvaluationResult> {
|
) -> anyhow::Result<EvaluationResult> {
|
||||||
let mut result = EvaluationResult::new(evaluation.name().to_string());
|
let mut result = EvaluationResult::new(evaluation.name().to_string());
|
||||||
|
|
||||||
if let Ok(work_dir) = work_dir.move_to(format!("./{}", &evaluation.name())) {
|
let requirements = evaluation.required_extensions();
|
||||||
let requirements = evaluation.required_extensions();
|
|
||||||
|
|
||||||
// Create session with error capture
|
// Create session with error capture
|
||||||
let base_session =
|
let base_session =
|
||||||
build_session(None, false, requirements.external, requirements.builtin).await;
|
build_session(None, false, requirements.external, requirements.builtin).await;
|
||||||
|
|
||||||
let bench_session = Arc::new(Mutex::new(BenchSession::new(base_session)));
|
let bench_session = Arc::new(Mutex::new(BenchSession::new(base_session)));
|
||||||
let bench_session_clone = bench_session.clone();
|
let bench_session_clone = bench_session.clone();
|
||||||
|
|
||||||
if let Ok(metrics) = evaluation
|
if let Ok(metrics) = evaluation
|
||||||
.run(Box::new(BenchAgentWrapper(bench_session)), work_dir)
|
.run(Box::new(BenchAgentWrapper(bench_session)), work_dir)
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
for (name, metric) in metrics {
|
for (name, metric) in metrics {
|
||||||
result.add_metric(name, metric);
|
result.add_metric(name, metric);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add any errors that occurred
|
// Add any errors that occurred
|
||||||
let agent = BenchAgentWrapper(bench_session_clone);
|
let agent = BenchAgentWrapper(bench_session_clone);
|
||||||
for error in agent.get_errors().await {
|
for error in agent.get_errors().await {
|
||||||
result.add_error(error);
|
result.add_error(error);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn run_suite(suite: &str, work_dir: &mut WorkDir) -> anyhow::Result<SuiteResult> {
|
async fn run_suite(suite: &str, work_dir: &mut BenchmarkWorkDir) -> anyhow::Result<SuiteResult> {
|
||||||
let mut suite_result = SuiteResult::new(suite.to_string());
|
let mut suite_result = SuiteResult::new(suite.to_string());
|
||||||
|
let eval_lock = Mutex::new(0);
|
||||||
|
|
||||||
if let Ok(work_dir) = work_dir.move_to(format!("./{}", &suite)) {
|
if let Some(evals) = EvaluationSuiteFactory::create(suite) {
|
||||||
if let Some(evals) = EvaluationSuiteFactory::create(suite) {
|
for eval in evals {
|
||||||
for eval in evals {
|
let _unused = eval_lock.lock().await;
|
||||||
let eval_result = run_eval(eval, work_dir).await?;
|
work_dir.set_eval(eval.name());
|
||||||
suite_result.add_evaluation(eval_result);
|
let eval_result = run_eval(eval, work_dir).await?;
|
||||||
}
|
suite_result.add_evaluation(eval_result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -135,24 +133,25 @@ pub async fn run_benchmark(
|
|||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
let config = Config::global();
|
let config = Config::global();
|
||||||
|
let goose_model: String = config
|
||||||
|
.get("GOOSE_MODEL")
|
||||||
|
.expect("No model configured. Run 'goose configure' first");
|
||||||
let provider_name: String = config
|
let provider_name: String = config
|
||||||
.get("GOOSE_PROVIDER")
|
.get("GOOSE_PROVIDER")
|
||||||
.expect("No provider configured. Run 'goose configure' first");
|
.expect("No provider configured. Run 'goose configure' first");
|
||||||
|
|
||||||
let mut results = BenchmarkResults::new(provider_name.clone());
|
let mut results = BenchmarkResults::new(provider_name.clone());
|
||||||
|
|
||||||
let current_time = Local::now().format("%H:%M:%S").to_string();
|
let mut work_dir = BenchmarkWorkDir::new(
|
||||||
let current_date = Local::now().format("%Y-%m-%d").to_string();
|
format!("{}-{}", provider_name, goose_model),
|
||||||
if let Ok(mut work_dir) = WorkDir::at(
|
|
||||||
format!("./benchmark-{}", &provider_name),
|
|
||||||
include_dirs.clone(),
|
include_dirs.clone(),
|
||||||
) {
|
);
|
||||||
if let Ok(work_dir) = work_dir.move_to(format!("./{}-{}", ¤t_date, current_time)) {
|
let suite_lock = Mutex::new(0);
|
||||||
for suite in suites {
|
for suite in suites {
|
||||||
let suite_result = run_suite(suite, work_dir).await?;
|
let _unused = suite_lock.lock().await;
|
||||||
results.add_suite(suite_result);
|
work_dir.set_suite(suite);
|
||||||
}
|
let suite_result = run_suite(suite, &mut work_dir).await?;
|
||||||
}
|
results.add_suite(suite_result);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(results)
|
Ok(results)
|
||||||
|
|||||||
Reference in New Issue
Block a user