feat: add additional goosebench evals (#1571)

Co-authored-by: Alice Hau <alice.a.hau@gmail.com>
This commit is contained in:
Alice Hau
2025-03-10 15:11:44 -04:00
committed by GitHub
parent 8689d24407
commit bb4feacf03
14 changed files with 859 additions and 3 deletions

View File

@@ -56,6 +56,20 @@ impl BenchAgent for BenchSession {
let errors = self.errors.lock().await;
errors.clone()
}
async fn get_token_usage(&self) -> Option<i32> {
// Get token usage from the provider
if let Ok(usage) = self.session.get_usage().await {
// Sum up total tokens across all usage entries
let total_tokens = usage
.iter()
.map(|u| u.usage.total_tokens.unwrap_or(0))
.sum();
Some(total_tokens)
} else {
None
}
}
}
// Wrapper struct to implement BenchAgent for Arc<Mutex<BenchSession>>
@@ -72,6 +86,11 @@ impl BenchAgent for BenchAgentWrapper {
let session = self.0.lock().await;
session.get_errors().await
}
async fn get_token_usage(&self) -> Option<i32> {
let session = self.0.lock().await;
session.get_token_usage().await
}
}
async fn run_eval(