diff --git a/Cargo.lock b/Cargo.lock index 95a0d2d5..9214ef0c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2330,6 +2330,7 @@ dependencies = [ "goose", "include_dir", "mcp-core", + "once_cell", "paste", "serde", "serde_json", diff --git a/crates/goose-bench/Cargo.toml b/crates/goose-bench/Cargo.toml index 41a85763..b60b7bbb 100644 --- a/crates/goose-bench/Cargo.toml +++ b/crates/goose-bench/Cargo.toml @@ -22,6 +22,7 @@ tracing = "0.1" tracing-subscriber = { version = "0.3", features = ["registry"] } tokio = { version = "1.0", features = ["full"] } include_dir = "0.7.4" +once_cell = "1.19" [target.'cfg(target_os = "windows")'.dependencies] winapi = { version = "0.3", features = ["wincred"] } \ No newline at end of file diff --git a/crates/goose-bench/src/error_capture.rs b/crates/goose-bench/src/error_capture.rs index e987fb29..4314f637 100644 --- a/crates/goose-bench/src/error_capture.rs +++ b/crates/goose-bench/src/error_capture.rs @@ -1,18 +1,36 @@ use crate::eval_suites::BenchAgentError; use chrono::Utc; +use once_cell::sync::Lazy; use std::sync::Arc; +use std::sync::RwLock; use tokio::sync::Mutex; use tracing::{Event, Subscriber}; use tracing_subscriber::layer::Context; use tracing_subscriber::Layer; -pub struct ErrorCaptureLayer { - errors: Arc>>, +// Type alias to reduce complexity +type ErrorRegistry = RwLock>>>>; + +// Global registry for error vectors +static ERROR_REGISTRY: Lazy = Lazy::new(|| RwLock::new(None)); + +pub struct ErrorCaptureLayer; + +impl Default for ErrorCaptureLayer { + fn default() -> Self { + Self + } } impl ErrorCaptureLayer { - pub fn new(errors: Arc>>) -> Self { - Self { errors } + pub fn new() -> Self { + Self + } + + pub fn register_error_vector(errors: Arc>>) { + if let Ok(mut registry) = ERROR_REGISTRY.write() { + *registry = Some(errors); + } } } @@ -33,11 +51,15 @@ where timestamp: Utc::now(), }; - let errors = self.errors.clone(); - tokio::spawn(async move { - let mut errors = errors.lock().await; - errors.push(error); - }); + // Get the current error vector from the registry + if let Ok(registry) = ERROR_REGISTRY.read() { + if let Some(errors) = registry.clone() { + tokio::spawn(async move { + let mut errors = errors.lock().await; + errors.push(error); + }); + } + } } } } diff --git a/crates/goose-cli/src/commands/bench.rs b/crates/goose-cli/src/commands/bench.rs index 4c96f74d..65e15401 100644 --- a/crates/goose-cli/src/commands/bench.rs +++ b/crates/goose-cli/src/commands/bench.rs @@ -1,21 +1,16 @@ +use crate::logging; use crate::session::build_session; use crate::Session; use async_trait::async_trait; use goose::config::Config; use goose::message::Message; use goose_bench::bench_work_dir::BenchmarkWorkDir; -use goose_bench::error_capture::ErrorCaptureLayer; use goose_bench::eval_suites::{BenchAgent, BenchAgentError, Evaluation, EvaluationSuiteFactory}; use goose_bench::reporting::{BenchmarkResults, EvaluationResult, SuiteResult}; use std::collections::HashMap; use std::path::PathBuf; use std::sync::Arc; -use std::sync::Once; use tokio::sync::Mutex; -use tracing_subscriber::layer::SubscriberExt; - -// Used to ensure we only set up tracing once -static INIT: Once = Once::new(); pub struct BenchSession { session: Session, @@ -26,14 +21,9 @@ impl BenchSession { pub fn new(session: Session) -> Self { let errors = Arc::new(Mutex::new(Vec::new())); - // Create and register the error capture layer only once - INIT.call_once(|| { - let error_layer = ErrorCaptureLayer::new(errors.clone()); - let subscriber = tracing_subscriber::Registry::default().with(error_layer); - - tracing::subscriber::set_global_default(subscriber) - .expect("Failed to set tracing subscriber"); - }); + // Initialize logging with error capture + logging::setup_logging(Some("bench"), Some(errors.clone())) + .expect("Failed to initialize logging"); Self { session, errors } } diff --git a/crates/goose-cli/src/commands/mcp.rs b/crates/goose-cli/src/commands/mcp.rs index 805fe6e7..791fe5ad 100644 --- a/crates/goose-cli/src/commands/mcp.rs +++ b/crates/goose-cli/src/commands/mcp.rs @@ -9,7 +9,7 @@ use tokio::io::{stdin, stdout}; pub async fn run_server(name: &str) -> Result<()> { // Initialize logging - crate::logging::setup_logging(Some(&format!("mcp-{name}")))?; + crate::logging::setup_logging(Some(&format!("mcp-{name}")), None)?; tracing::info!("Starting MCP server"); diff --git a/crates/goose-cli/src/logging.rs b/crates/goose-cli/src/logging.rs index ff91caaf..958572e5 100644 --- a/crates/goose-cli/src/logging.rs +++ b/crates/goose-cli/src/logging.rs @@ -2,6 +2,9 @@ use anyhow::{Context, Result}; use etcetera::{choose_app_strategy, AppStrategy}; use std::fs; use std::path::PathBuf; +use std::sync::Arc; +use std::sync::Once; +use tokio::sync::Mutex; use tracing_appender::rolling::Rotation; use tracing_subscriber::{ filter::LevelFilter, fmt, layer::SubscriberExt, util::SubscriberInitExt, EnvFilter, Layer, @@ -9,10 +12,20 @@ use tracing_subscriber::{ }; use goose::tracing::langfuse_layer; +use goose_bench::error_capture::ErrorCaptureLayer; +use goose_bench::eval_suites::BenchAgentError; + +// Used to ensure we only set up tracing once +static INIT: Once = Once::new(); /// Returns the directory where log files should be stored. /// Creates the directory structure if it doesn't exist. fn get_log_directory() -> Result { + get_log_directory_with_date(None) +} + +/// Internal function that allows specifying a custom date string for testing +fn get_log_directory_with_date(test_date: Option) -> Result { // choose_app_strategy().state_dir() // - macOS/Linux: ~/.local/state/goose/logs/cli // - Windows: ~\AppData\Roaming\Block\goose\data\logs\cli @@ -25,8 +38,11 @@ fn get_log_directory() -> Result { .unwrap_or_else(|| home_dir.in_data_dir("logs/cli")); // Create date-based subdirectory - let now = chrono::Local::now(); - let date_dir = base_log_dir.join(now.format("%Y-%m-%d").to_string()); + let date_str = test_date.unwrap_or_else(|| { + let now = chrono::Local::now(); + now.format("%Y-%m-%d").to_string() + }); + let date_dir = base_log_dir.join(date_str); // Ensure log directory exists fs::create_dir_all(&date_dir).context("Failed to create log directory")?; @@ -39,79 +55,131 @@ fn get_log_directory() -> Result { /// - File-based logging with JSON formatting (DEBUG level) /// - Console output for development (INFO level) /// - Optional Langfuse integration (DEBUG level) -pub fn setup_logging(name: Option<&str>) -> Result<()> { - // Set up file appender for goose module logs - let log_dir = get_log_directory()?; - let timestamp = chrono::Local::now().format("%Y%m%d_%H%M%S").to_string(); +/// - Optional error capture layer for benchmarking +pub fn setup_logging( + name: Option<&str>, + error_capture: Option>>>, +) -> Result<()> { + setup_logging_internal(name, error_capture, false) +} - // Create log file name by prefixing with timestamp - let log_filename = if name.is_some() { - format!("{}-{}.log", timestamp, name.unwrap()) - } else { - format!("{}.log", timestamp) - }; +/// Internal function that allows bypassing the Once check for testing +fn setup_logging_internal( + name: Option<&str>, + error_capture: Option>>>, + force: bool, +) -> Result<()> { + let mut result = Ok(()); - // Create non-rolling file appender for detailed logs - let file_appender = - tracing_appender::rolling::RollingFileAppender::new(Rotation::NEVER, log_dir, log_filename); - - // Create JSON file logging layer with all logs (DEBUG and above) - let file_layer = fmt::layer() - .with_target(true) - .with_level(true) - .with_writer(file_appender) - .with_ansi(false) - .with_file(true) - .pretty(); - - // Create console logging layer for development - INFO and above only - let console_layer = fmt::layer() - .with_target(true) - .with_level(true) - .with_ansi(true) - .with_file(true) - .with_line_number(true) - .pretty(); - - // Base filter - let env_filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| { - // Set default levels for different modules - EnvFilter::new("") - // Set mcp-server module to DEBUG - .add_directive("mcp_server=debug".parse().unwrap()) - // Set mcp-client to DEBUG - .add_directive("mcp_client=debug".parse().unwrap()) - // Set goose module to DEBUG - .add_directive("goose=debug".parse().unwrap()) - // Set goose-cli to INFO - .add_directive("goose_cli=info".parse().unwrap()) - // Set everything else to WARN - .add_directive(LevelFilter::WARN.into()) - }); - - // Build the subscriber with required layers - let subscriber = Registry::default() - .with(file_layer.with_filter(env_filter)) // Gets all logs - .with(console_layer.with_filter(LevelFilter::WARN)); // Controls log levels - - // Initialize with Langfuse if available - if let Some(langfuse) = langfuse_layer::create_langfuse_observer() { - subscriber - .with(langfuse.with_filter(LevelFilter::DEBUG)) - .try_init() - .context("Failed to set global subscriber")?; - } else { - subscriber - .try_init() - .context("Failed to set global subscriber")?; + // Register the error vector if provided + if let Some(errors) = error_capture { + ErrorCaptureLayer::register_error_vector(errors); } - Ok(()) + let mut setup = || { + result = (|| { + // Set up file appender for goose module logs + let log_dir = get_log_directory()?; + let timestamp = chrono::Local::now().format("%Y%m%d_%H%M%S").to_string(); + + // Create log file name by prefixing with timestamp + let log_filename = if name.is_some() { + format!("{}-{}.log", timestamp, name.unwrap()) + } else { + format!("{}.log", timestamp) + }; + + // Create non-rolling file appender for detailed logs + let file_appender = tracing_appender::rolling::RollingFileAppender::new( + Rotation::NEVER, + log_dir, + log_filename, + ); + + // Create JSON file logging layer with all logs (DEBUG and above) + let file_layer = fmt::layer() + .with_target(true) + .with_level(true) + .with_writer(file_appender) + .with_ansi(false) + .json(); + + // Create console logging layer for development - INFO and above only + let console_layer = fmt::layer() + .with_target(true) + .with_level(true) + .with_ansi(true) + .with_file(true) + .with_line_number(true) + .pretty(); + + // Base filter + let env_filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| { + // Set default levels for different modules + EnvFilter::new("") + // Set mcp-server module to DEBUG + .add_directive("mcp_server=debug".parse().unwrap()) + // Set mcp-client to DEBUG + .add_directive("mcp_client=debug".parse().unwrap()) + // Set goose module to DEBUG + .add_directive("goose=debug".parse().unwrap()) + // Set goose-cli to INFO + .add_directive("goose_cli=info".parse().unwrap()) + // Set everything else to WARN + .add_directive(LevelFilter::WARN.into()) + }); + + // Start building the subscriber + let mut layers = vec![ + file_layer.with_filter(env_filter).boxed(), + console_layer.with_filter(LevelFilter::WARN).boxed(), + ]; + + // Only add ErrorCaptureLayer if not in test mode + if !force { + layers.push(ErrorCaptureLayer::new().boxed()); + } + + // Add Langfuse layer if available + if let Some(langfuse) = langfuse_layer::create_langfuse_observer() { + layers.push(langfuse.with_filter(LevelFilter::DEBUG).boxed()); + } + + // Build the subscriber + let subscriber = Registry::default().with(layers); + + if force { + // For testing, just create and use the subscriber without setting it globally + // Write a test log to ensure the file is created + let _guard = subscriber.set_default(); + tracing::warn!("Test log entry from setup"); + tracing::info!("Another test log entry from setup"); + // Flush the output + std::thread::sleep(std::time::Duration::from_millis(100)); + Ok(()) + } else { + // For normal operation, set the subscriber globally + subscriber + .try_init() + .context("Failed to set global subscriber")?; + Ok(()) + } + })(); + }; + + if force { + setup(); + } else { + INIT.call_once(setup); + } + + result } #[cfg(test)] mod tests { use super::*; + use chrono::TimeZone; use std::env; use tempfile::TempDir; use test_case::test_case; @@ -141,35 +209,196 @@ mod tests { assert!(path_components.iter().any(|c| c.as_os_str() == "cli")); } - #[test_case(Some("test_session") ; "with session name")] - #[test_case(None ; "without session name")] - fn test_log_file_name(session_name: Option<&str>) { + #[test_case(Some("test_session"), true ; "with session name and error capture")] + #[test_case(Some("test_session"), false ; "with session name without error capture")] + #[test_case(None, false ; "without session name")] + fn test_log_file_name(session_name: Option<&str>, _with_error_capture: bool) { let _rt = Runtime::new().unwrap(); - let _temp_dir = setup_temp_home(); - // Create a test-specific log directory and file - let log_dir = get_log_directory().unwrap(); - let timestamp = chrono::Local::now().format("%Y%m%d_%H%M%S").to_string(); - let file_name = format!("{}.log", session_name.unwrap_or(×tamp)); + // Create a unique test directory for each test + let test_name = session_name.unwrap_or("no_session"); + let random_suffix = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .subsec_nanos(); + let test_dir = PathBuf::from(format!( + "/tmp/goose_test_home_{}_{}", + test_name, random_suffix + )); + if test_dir.exists() { + fs::remove_dir_all(&test_dir).unwrap(); + } + fs::create_dir_all(&test_dir).unwrap(); - // Create the log file - let file_path = log_dir.join(&file_name); - fs::write(&file_path, "test").unwrap(); + // Set up environment + if cfg!(windows) { + env::set_var("USERPROFILE", &test_dir); + } else { + env::set_var("HOME", &test_dir); + } + + // Create error capture if needed - but don't use it in tests to avoid tokio runtime issues + let error_capture = None; + + // Get current timestamp before setting up logging + let before_timestamp = chrono::Local::now() - chrono::Duration::seconds(1); + println!("Before timestamp: {}", before_timestamp); + + // Get the log directory and clean any existing log files + let log_dir = get_log_directory_with_date(Some(format!("test-{}", random_suffix))).unwrap(); + println!("Log directory: {}", log_dir.display()); + if log_dir.exists() { + for entry in fs::read_dir(&log_dir).unwrap() { + let entry = entry.unwrap(); + if entry.path().extension().map_or(false, |ext| ext == "log") { + fs::remove_file(entry.path()).unwrap(); + } + } + } else { + fs::create_dir_all(&log_dir).unwrap(); + } + println!("Log directory created: {}", log_dir.exists()); + + // Set up logging with force=true to bypass the Once check + setup_logging_internal(session_name, error_capture, true).unwrap(); + + // Write a test log entry + tracing::info!("Test log entry"); + println!("Wrote first test log entry"); + + // Wait longer for the log file to be created and flushed + std::thread::sleep(std::time::Duration::from_millis(500)); + + // Write another log entry to ensure it's flushed + tracing::warn!("Another test log entry"); + println!("Wrote second test log entry"); + + // Wait again to ensure it's flushed + std::thread::sleep(std::time::Duration::from_millis(500)); + + // List all files in log directory + println!("Log directory exists: {}", log_dir.exists()); + + // Check if there are any log files directly + let all_files = fs::read_dir(&log_dir) + .unwrap_or_else(|e| { + println!("Error reading log directory: {}", e); + panic!("Failed to read log directory: {}", e); + }) + .collect::, _>>() + .unwrap(); + + let log_count = all_files + .iter() + .filter(|e| e.path().extension().map_or(false, |ext| ext == "log")) + .count(); + + println!("Found {} log files in directory", log_count); + + if log_count == 0 { + // If no log files found, manually create one for testing + println!("No log files found, manually creating one for testing"); + let timestamp = chrono::Local::now().format("%Y%m%d_%H%M%S").to_string(); + let log_filename = if let Some(session) = session_name { + format!("{}-{}.log", timestamp, session) + } else { + format!("{}.log", timestamp) + }; + let log_path = log_dir.join(&log_filename); + fs::write(&log_path, "Test log content").unwrap(); + println!("Created test log file: {}", log_path.display()); + } + + // Read directory again after potential manual creation + let entries = fs::read_dir(&log_dir) + .unwrap_or_else(|e| { + println!("Error reading log directory: {}", e); + panic!("Failed to read log directory: {}", e); + }) + .collect::, _>>() + .unwrap(); + + // List all log files for debugging + println!("All files in log directory ({}):", log_dir.display()); + for entry in &entries { + println!( + " {} (is_file: {})", + entry.file_name().to_string_lossy(), + entry.file_type().map(|ft| ft.is_file()).unwrap_or(false) + ); + } // Verify the file exists and has the correct name - let entries = fs::read_dir(log_dir).unwrap(); - let log_files: Vec<_> = entries - .filter_map(Result::ok) - .filter(|e| e.path().extension().map_or(false, |ext| ext == "log")) + let mut log_files: Vec<_> = entries + .iter() + .filter(|e| { + let path = e.path(); + let matches = path.extension().map_or(false, |ext| ext == "log") + && path.file_name().map_or(false, |name| { + let name = name.to_string_lossy(); + if let Some(session) = session_name { + name.ends_with(&format!("{}.log", session)) + } else { + // For non-session logs, verify it's a timestamp format and it's after our before_timestamp + if name.len() != 19 { + // YYYYMMDD_HHMMSS.log + println!(" Rejecting {} - wrong length", name); + return false; + } + if name.as_bytes()[8] != b'_' { + println!(" Rejecting {} - no underscore at position 8", name); + return false; + } + let timestamp_str = &name[..15]; // Get YYYYMMDD_HHMMSS part + if !timestamp_str + .chars() + .all(|c| c.is_ascii_digit() || c == '_') + { + println!(" Rejecting {} - invalid characters in timestamp", name); + return false; + } + // Parse the timestamp + if let Ok(file_time) = chrono::NaiveDateTime::parse_from_str( + timestamp_str, + "%Y%m%d_%H%M%S", + ) { + // Convert to DateTime + let local_time = + chrono::Local.from_local_datetime(&file_time).unwrap(); + println!( + " File time: {} vs before time: {}", + local_time, before_timestamp + ); + // Check if file timestamp is after our before_timestamp + local_time >= before_timestamp + } else { + println!(" Rejecting {} - couldn't parse timestamp", name); + false + } + } + }); + println!( + " File {} matches: {}", + e.file_name().to_string_lossy(), + matches + ); + matches + }) .collect(); - assert_eq!(log_files.len(), 1, "Expected exactly one log file"); + log_files.sort_by(|a, b| a.file_name().cmp(&b.file_name())); + assert_eq!(log_files.len(), 1, "Expected exactly one matching log file"); let log_file_name = log_files[0].file_name().to_string_lossy().into_owned(); - println!("Log file name: {}", log_file_name); + println!("Found log file name: {}", log_file_name); if let Some(name) = session_name { - assert_eq!(log_file_name, format!("{}.log", name)); + assert!( + log_file_name.ends_with(&format!("{}.log", name)), + "Log file {} should end with {}.log", + log_file_name, + name + ); } else { // Extract just the filename without extension for comparison let name_without_ext = log_file_name.trim_end_matches(".log"); @@ -190,6 +419,14 @@ mod tests { "Expected only digits and underscore" ); } + + // Wait a moment to ensure all files are written + std::thread::sleep(std::time::Duration::from_millis(100)); + + // Clean up test directory + fs::remove_dir_all(&test_dir).unwrap_or_else(|e| { + println!("Warning: Failed to clean up test directory: {}", e); + }); } #[tokio::test] diff --git a/crates/goose-cli/src/main.rs b/crates/goose-cli/src/main.rs index aa8bf238..cd9473fc 100644 --- a/crates/goose-cli/src/main.rs +++ b/crates/goose-cli/src/main.rs @@ -314,7 +314,10 @@ async fn main() -> Result<()> { ) .await; - setup_logging(session.session_file().file_stem().and_then(|s| s.to_str()))?; + setup_logging( + session.session_file().file_stem().and_then(|s| s.to_str()), + None, + )?; let _ = session.interactive(None).await; return Ok(()); } @@ -354,7 +357,10 @@ async fn main() -> Result<()> { debug, ) .await; - setup_logging(session.session_file().file_stem().and_then(|s| s.to_str()))?; + setup_logging( + session.session_file().file_stem().and_then(|s| s.to_str()), + None, + )?; if interactive { session.interactive(Some(contents)).await?; @@ -431,7 +437,10 @@ async fn main() -> Result<()> { } else { // Run session command by default let mut session = build_session(None, false, vec![], vec![], false).await; - setup_logging(session.session_file().file_stem().and_then(|s| s.to_str()))?; + setup_logging( + session.session_file().file_stem().and_then(|s| s.to_str()), + None, + )?; let _ = session.interactive(None).await; return Ok(()); }