chore: cleanup bench evals copy session dir code (#2131)

This commit is contained in:
Alice Hau
2025-04-10 14:05:37 -04:00
committed by GitHub
parent 15157c465d
commit 21971db722
6 changed files with 11 additions and 83 deletions

View File

@@ -1,8 +1,7 @@
use crate::bench_work_dir::BenchmarkWorkDir;
use anyhow::{Context, Result};
use goose::message::Message;
use goose::session::storage;
use std::fs::{self, File};
use std::fs::File;
use std::io::Write;
use std::path::PathBuf;
@@ -31,39 +30,3 @@ pub fn write_response_to_file(
Ok(text_content)
}
/// Copy the most recent session file to the current working directory
///
/// This function finds the most recent Goose session file (.jsonl) and copies it
/// to the current working directory. Session files are stored by the Goose framework
/// in a platform-specific data directory.
///
/// # Returns
/// - Ok(session_path) if successfully copied, where session_path is the path to the copied file
/// - Err if any errors occurred during the process
pub fn copy_session_to_cwd() -> Result<PathBuf> {
// Try to get the most recent session file
let src_path = storage::get_most_recent_session()
.with_context(|| "Failed to find any recent session files")?;
// Extract the filename from the path
let filename = src_path
.file_name()
.ok_or_else(|| anyhow::anyhow!("Invalid session filename"))?;
// Create the destination path in the current directory
let dest_path = PathBuf::from(".").join(filename);
// Copy the file
fs::copy(&src_path, &dest_path).with_context(|| {
format!(
"Failed to copy from '{}' to '{}'",
src_path.display(),
dest_path.display()
)
})?;
println!("Session file copied to: {}", dest_path.display());
Ok(dest_path)
}

View File

@@ -1,8 +1,8 @@
use crate::bench_session::BenchAgent;
use crate::bench_work_dir::BenchmarkWorkDir;
use crate::eval_suites::{
collect_baseline_metrics, copy_session_to_cwd, metrics_hashmap_to_vec, write_response_to_file,
EvalMetricValue, Evaluation, ExtensionRequirements,
collect_baseline_metrics, metrics_hashmap_to_vec, write_response_to_file, EvalMetricValue,
Evaluation, ExtensionRequirements,
};
use crate::register_evaluation;
use async_trait::async_trait;
@@ -65,13 +65,6 @@ impl Evaluation for BlogSummary {
EvalMetricValue::Boolean(used_fetch_tool),
));
// Copy the session file to the current working directory
if let Err(e) = copy_session_to_cwd() {
println!("Warning: Failed to copy session file: {}", e);
} else {
println!("Successfully copied session file to current directory");
}
Ok(metrics)
}

View File

@@ -1,8 +1,8 @@
use crate::bench_session::BenchAgent;
use crate::bench_work_dir::BenchmarkWorkDir;
use crate::eval_suites::{
collect_baseline_metrics, copy_session_to_cwd, metrics_hashmap_to_vec, EvalMetricValue,
Evaluation, ExtensionRequirements,
collect_baseline_metrics, metrics_hashmap_to_vec, EvalMetricValue, Evaluation,
ExtensionRequirements,
};
use crate::register_evaluation;
use async_trait::async_trait;
@@ -97,13 +97,6 @@ impl Evaluation for FlappyBird {
}
}
// Copy the session file to the current working directory
if let Err(e) = copy_session_to_cwd() {
println!("Warning: Failed to copy session file: {}", e);
} else {
println!("Successfully copied session file to current directory");
}
Ok(metrics)
}

View File

@@ -1,8 +1,8 @@
use crate::bench_session::BenchAgent;
use crate::bench_work_dir::BenchmarkWorkDir;
use crate::eval_suites::{
collect_baseline_metrics, copy_session_to_cwd, metrics_hashmap_to_vec, EvalMetricValue,
Evaluation, ExtensionRequirements,
collect_baseline_metrics, metrics_hashmap_to_vec, EvalMetricValue, Evaluation,
ExtensionRequirements,
};
use crate::register_evaluation;
use async_trait::async_trait;
@@ -75,13 +75,6 @@ impl Evaluation for GooseWiki {
EvalMetricValue::Boolean(valid_tool_call),
));
// Copy the session file to the current working directory
if let Err(e) = copy_session_to_cwd() {
println!("Warning: Failed to copy session file: {}", e);
} else {
println!("Successfully copied session file to current directory");
}
Ok(metrics)
}

View File

@@ -1,8 +1,8 @@
use crate::bench_session::BenchAgent;
use crate::bench_work_dir::BenchmarkWorkDir;
use crate::eval_suites::{
collect_baseline_metrics, copy_session_to_cwd, metrics_hashmap_to_vec, write_response_to_file,
EvalMetricValue, Evaluation, ExtensionRequirements,
collect_baseline_metrics, metrics_hashmap_to_vec, write_response_to_file, EvalMetricValue,
Evaluation, ExtensionRequirements,
};
use crate::register_evaluation;
use async_trait::async_trait;
@@ -85,13 +85,6 @@ Present the information in order of significance or quality. Focus specifically
EvalMetricValue::Boolean(used_fetch_tool),
));
// Copy the session file to the current working directory
if let Err(e) = copy_session_to_cwd() {
println!("Warning: Failed to copy session file: {}", e);
} else {
println!("Successfully copied session file to current directory");
}
Ok(metrics)
}

View File

@@ -1,8 +1,8 @@
use crate::bench_session::BenchAgent;
use crate::bench_work_dir::BenchmarkWorkDir;
use crate::eval_suites::{
collect_baseline_metrics, copy_session_to_cwd, metrics_hashmap_to_vec, EvalMetricValue,
Evaluation, ExtensionRequirements,
collect_baseline_metrics, metrics_hashmap_to_vec, EvalMetricValue, Evaluation,
ExtensionRequirements,
};
use crate::register_evaluation;
use async_trait::async_trait;
@@ -153,13 +153,6 @@ After writing the script, run it using python3 and show the results. Do not ask
EvalMetricValue::Boolean(correct_results),
));
// Copy the session file to the current working directory
if let Err(e) = copy_session_to_cwd() {
println!("Warning: Failed to copy session file: {}", e);
} else {
println!("Successfully copied session file to current directory");
}
Ok(metrics)
}