mirror of
https://github.com/aljazceru/goose.git
synced 2025-12-18 14:44:21 +01:00
feat: add additional goosebench evals (#1571)
Co-authored-by: Alice Hau <alice.a.hau@gmail.com>
This commit is contained in:
@@ -56,6 +56,20 @@ impl BenchAgent for BenchSession {
|
||||
let errors = self.errors.lock().await;
|
||||
errors.clone()
|
||||
}
|
||||
|
||||
async fn get_token_usage(&self) -> Option<i32> {
|
||||
// Get token usage from the provider
|
||||
if let Ok(usage) = self.session.get_usage().await {
|
||||
// Sum up total tokens across all usage entries
|
||||
let total_tokens = usage
|
||||
.iter()
|
||||
.map(|u| u.usage.total_tokens.unwrap_or(0))
|
||||
.sum();
|
||||
Some(total_tokens)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Wrapper struct to implement BenchAgent for Arc<Mutex<BenchSession>>
|
||||
@@ -72,6 +86,11 @@ impl BenchAgent for BenchAgentWrapper {
|
||||
let session = self.0.lock().await;
|
||||
session.get_errors().await
|
||||
}
|
||||
|
||||
async fn get_token_usage(&self) -> Option<i32> {
|
||||
let session = self.0.lock().await;
|
||||
session.get_token_usage().await
|
||||
}
|
||||
}
|
||||
|
||||
async fn run_eval(
|
||||
|
||||
Reference in New Issue
Block a user