Merge 'simulator: provide high level commands on top of a single runner' from Alperen Keleş

Previously, the simulator only generated and ran a single run with the given configurations, and anyone was free to implement retrial or looping on top, which is still possible. I still wanted to start adding some higher level commands on top, especially now that we have access to bug base, so this PR adds the following options; - loop: run the simulator in a loop `n` times, with the option to short circuit if any run fails. - list: list all the bugs in the bug base. - test: run the simulator against a specific bug by providing a `filter`, this is useful when debugging a certain issue so you can first run a bunch of tests in a `loop`, and rerun them using `test` when you think you have a fix. Reviewed-by: Jussi Saurio <jussi.saurio@gmail.com> Closes #1317
2026-01-28 20:34:24 +01:00 · 2025-04-12 10:35:52 +03:00
parent cfe8aed40e e13b5bc698
commit edbeced312
4 changed files with 219 additions and 67 deletions
--- a/simulator/main.rs
+++ b/simulator/main.rs
@@ -6,7 +6,7 @@ use notify::event::{DataChange, ModifyKind};
 use notify::{EventKind, RecursiveMode, Watcher};
 use rand::prelude::*;
 use runner::bugbase::{Bug, BugBase, LoadedBug};
-use runner::cli::SimulatorCLI;
+use runner::cli::{SimulatorCLI, SimulatorCommand};
 use runner::env::SimulatorEnv;
 use runner::execution::{execute_plans, Execution, ExecutionHistory, ExecutionResult};
 use runner::{differential, watch};
@@ -48,15 +48,87 @@ impl Paths {

 fn main() -> Result<(), String> {
    init_logger();
-
-    let cli_opts = SimulatorCLI::parse();
+    let mut cli_opts = SimulatorCLI::parse();
    cli_opts.validate()?;

+    match cli_opts.subcommand {
+        Some(SimulatorCommand::List) => {
+            let mut bugbase = BugBase::load().map_err(|e| format!("{:?}", e))?;
+            bugbase.list_bugs()
+        }
+        Some(SimulatorCommand::Loop { n, short_circuit }) => {
+            banner();
+            for i in 0..n {
+                println!("iteration {}", i);
+                let result = testing_main(&cli_opts);
+                if result.is_err() && short_circuit {
+                    println!("short circuiting after {} iterations", i);
+                    return result;
+                } else if result.is_err() {
+                    println!("iteration {} failed", i);
+                } else {
+                    println!("iteration {} succeeded", i);
+                }
+            }
+            Ok(())
+        }
+        Some(SimulatorCommand::Test { filter }) => {
+            let mut bugbase = BugBase::load().map_err(|e| format!("{:?}", e))?;
+            let bugs = bugbase.load_bugs()?;
+            let mut bugs = bugs
+                .into_iter()
+                .flat_map(|bug| {
+                    let runs = bug
+                        .runs
+                        .into_iter()
+                        .filter_map(|run| run.error.clone().map(|_| run))
+                        .filter(|run| run.error.as_ref().unwrap().contains(&filter))
+                        .map(|run| run.cli_options)
+                        .collect::<Vec<_>>();
+
+                    runs.into_iter()
+                        .map(|mut cli_opts| {
+                            cli_opts.seed = Some(bug.seed);
+                            cli_opts.load = None;
+                            cli_opts
+                        })
+                        .collect::<Vec<_>>()
+                })
+                .collect::<Vec<_>>();
+
+            bugs.sort();
+            bugs.dedup_by(|a, b| a == b);
+
+            println!(
+                "found {} previously triggered configurations with {}",
+                bugs.len(),
+                filter
+            );
+
+            let results = bugs
+                .into_iter()
+                .map(|cli_opts| testing_main(&cli_opts))
+                .collect::<Vec<_>>();
+
+            let (successes, failures): (Vec<_>, Vec<_>) =
+                results.into_iter().partition(|result| result.is_ok());
+            println!("the results of the change are:");
+            println!("\t{} successful runs", successes.len());
+            println!("\t{} failed runs", failures.len());
+            Ok(())
+        }
+        None => {
+            banner();
+            testing_main(&cli_opts)
+        }
+    }
+}
+
+fn testing_main(cli_opts: &SimulatorCLI) -> Result<(), String> {
    let mut bugbase = BugBase::load().map_err(|e| format!("{:?}", e))?;
-    banner();

    let last_execution = Arc::new(Mutex::new(Execution::new(0, 0, 0)));
-    let (seed, env, plans) = setup_simulation(&mut bugbase, &cli_opts, |p| &p.plan, |p| &p.db);
+    let (seed, env, plans) = setup_simulation(&mut bugbase, cli_opts, |p| &p.plan, |p| &p.db);

    let paths = bugbase.paths(seed);

@@ -66,7 +138,7 @@ fn main() -> Result<(), String> {
    }

    if cli_opts.watch {
-        watch_mode(seed, &cli_opts, &paths, last_execution.clone()).unwrap();
+        watch_mode(seed, cli_opts, &paths, last_execution.clone()).unwrap();
        return Ok(());
    }

@@ -74,7 +146,7 @@ fn main() -> Result<(), String> {
        differential_testing(
            seed,
            &mut bugbase,
-            &cli_opts,
+            cli_opts,
            &paths,
            plans,
            last_execution.clone(),
@@ -83,7 +155,7 @@ fn main() -> Result<(), String> {
        run_simulator(
            seed,
            &mut bugbase,
-            &cli_opts,
+            cli_opts,
            &paths,
            env,
            plans,
@@ -190,9 +262,15 @@ fn run_simulator(
    );

    if cli_opts.doublecheck {
-        let env = SimulatorEnv::new(seed, cli_opts, &paths.doublecheck_db);
-        let env = Arc::new(Mutex::new(env));
-        doublecheck(env, paths, &plans, last_execution.clone(), result)
+        doublecheck(
+            seed,
+            bugbase,
+            cli_opts,
+            paths,
+            &plans,
+            last_execution.clone(),
+            result,
+        )
    } else {
        // No doublecheck, run shrinking if panicking or found a bug.
        match &result {
@@ -303,12 +381,17 @@ fn run_simulator(
 }

 fn doublecheck(
-    env: Arc<Mutex<SimulatorEnv>>,
+    seed: u64,
+    bugbase: &mut BugBase,
+    cli_opts: &SimulatorCLI,
    paths: &Paths,
    plans: &[InteractionPlan],
    last_execution: Arc<Mutex<Execution>>,
    result: SandboxedResult,
 ) -> Result<(), String> {
+    let env = SimulatorEnv::new(seed, cli_opts, &paths.doublecheck_db);
+    let env = Arc::new(Mutex::new(env));
+
    // Run the simulation again
    let result2 = SandboxedResult::from(
        std::panic::catch_unwind(|| {
@@ -317,50 +400,24 @@ fn doublecheck(
        last_execution.clone(),
    );

-    match (result, result2) {
+    let doublecheck_result = match (result, result2) {
        (SandboxedResult::Correct, SandboxedResult::Panicked { .. }) => {
-            log::error!("doublecheck failed! first run succeeded, but second run panicked.");
-            Err("doublecheck failed! first run succeeded, but second run panicked.".to_string())
+            Err("first run succeeded, but second run panicked.".to_string())
        }
        (SandboxedResult::FoundBug { .. }, SandboxedResult::Panicked { .. }) => {
-            log::error!(
-                "doublecheck failed! first run failed an assertion, but second run panicked."
-            );
-            Err(
-                "doublecheck failed! first run failed an assertion, but second run panicked."
-                    .to_string(),
-            )
+            Err("first run failed an assertion, but second run panicked.".to_string())
        }
        (SandboxedResult::Panicked { .. }, SandboxedResult::Correct) => {
-            log::error!("doublecheck failed! first run panicked, but second run succeeded.");
-            Err("doublecheck failed! first run panicked, but second run succeeded.".to_string())
+            Err("first run panicked, but second run succeeded.".to_string())
        }
        (SandboxedResult::Panicked { .. }, SandboxedResult::FoundBug { .. }) => {
-            log::error!(
-                "doublecheck failed! first run panicked, but second run failed an assertion."
-            );
-            Err(
-                "doublecheck failed! first run panicked, but second run failed an assertion."
-                    .to_string(),
-            )
+            Err("first run panicked, but second run failed an assertion.".to_string())
        }
        (SandboxedResult::Correct, SandboxedResult::FoundBug { .. }) => {
-            log::error!(
-                "doublecheck failed! first run succeeded, but second run failed an assertion."
-            );
-            Err(
-                "doublecheck failed! first run succeeded, but second run failed an assertion."
-                    .to_string(),
-            )
+            Err("first run succeeded, but second run failed an assertion.".to_string())
        }
        (SandboxedResult::FoundBug { .. }, SandboxedResult::Correct) => {
-            log::error!(
-                "doublecheck failed! first run failed an assertion, but second run succeeded."
-            );
-            Err(
-                "doublecheck failed! first run failed an assertion, but second run succeeded."
-                    .to_string(),
-            )
+            Err("first run failed an assertion, but second run succeeded.".to_string())
        }
        (SandboxedResult::Correct, SandboxedResult::Correct)
        | (SandboxedResult::FoundBug { .. }, SandboxedResult::FoundBug { .. })
@@ -369,18 +426,30 @@ fn doublecheck(
            let db_bytes = std::fs::read(&paths.db).unwrap();
            let doublecheck_db_bytes = std::fs::read(&paths.doublecheck_db).unwrap();
            if db_bytes != doublecheck_db_bytes {
-                log::error!("doublecheck failed! database files are different.");
-                log::error!("current: {}", paths.db.display());
-                log::error!("doublecheck: {}", paths.doublecheck_db.display());
                Err(
-                    "doublecheck failed! database files are different, check binary diffs for more details.".to_string()
+                    "database files are different, check binary diffs for more details."
+                        .to_string(),
                )
            } else {
-                log::info!("doublecheck succeeded! database files are the same.");
-                println!("doublecheck succeeded! database files are the same.");
                Ok(())
            }
        }
+    };
+
+    match doublecheck_result {
+        Ok(_) => {
+            log::info!("doublecheck succeeded");
+            println!("doublecheck succeeded");
+            bugbase.mark_successful_run(seed, cli_opts)?;
+            Ok(())
+        }
+        Err(e) => {
+            log::error!("doublecheck failed: '{}'", e);
+            bugbase
+                .add_bug(seed, plans[0].clone(), Some(e.clone()), cli_opts)
+                .unwrap();
+            Err(format!("doublecheck failed: '{}'", e))
+        }
    }
 }

@@ -417,6 +486,7 @@ fn differential_testing(
        SandboxedResult::Correct => {
            log::info!("simulation succeeded, output of Limbo conforms to SQLite");
            println!("simulation succeeded, output of Limbo conforms to SQLite");
+            bugbase.mark_successful_run(seed, cli_opts).unwrap();
            Ok(())
        }
        SandboxedResult::Panicked { error, .. } | SandboxedResult::FoundBug { error, .. } => {
--- a/simulator/runner/bugbase.rs
+++ b/simulator/runner/bugbase.rs
@@ -33,14 +33,14 @@ pub struct LoadedBug {
 #[derive(Clone, Serialize, Deserialize)]
 pub(crate) struct BugRun {
    /// Commit hash of the current version of Limbo.
-    hash: String,
+    pub(crate) hash: String,
    /// Timestamp of the run.
    #[serde(with = "chrono::serde::ts_seconds")]
-    timestamp: DateTime<Utc>,
+    pub(crate) timestamp: DateTime<Utc>,
    /// Error message of the run.
-    error: Option<String>,
+    pub(crate) error: Option<String>,
    /// Options
-    cli_options: SimulatorCLI,
+    pub(crate) cli_options: SimulatorCLI,
 }

 impl Bug {
@@ -270,7 +270,10 @@ impl BugBase {
            Some(Bug::Unloaded { .. }) => {
                let plan =
                    std::fs::read_to_string(self.path.join(seed.to_string()).join("plan.json"))
-                        .or(Err("should be able to read plan file".to_string()))?;
+                        .or(Err(format!(
+                            "should be able to read plan file at {}",
+                            self.path.join(seed.to_string()).join("plan.json").display()
+                        )))?;
                let plan: InteractionPlan = serde_json::from_str(&plan)
                    .or(Err("should be able to deserialize plan".to_string()))?;

@@ -330,6 +333,45 @@ impl BugBase {

        Ok(())
    }
+
+    pub(crate) fn load_bugs(&mut self) -> Result<Vec<LoadedBug>, String> {
+        let seeds = self.bugs.keys().map(|seed| *seed).collect::<Vec<_>>();
+
+        seeds
+            .iter()
+            .map(|seed| self.load_bug(*seed))
+            .collect::<Result<Vec<_>, _>>()
+    }
+
+    pub(crate) fn list_bugs(&mut self) -> Result<(), String> {
+        let bugs = self.load_bugs()?;
+        for bug in bugs {
+            println!("seed: {}", bug.seed);
+            println!("plan: {}", bug.plan.stats());
+            println!("runs:");
+            println!("  ------------------");
+            for run in &bug.runs {
+                println!("  - hash: {}", run.hash);
+                println!("    timestamp: {}", run.timestamp);
+                println!(
+                    "    type: {}",
+                    if run.cli_options.differential {
+                        "differential"
+                    } else if run.cli_options.doublecheck {
+                        "doublecheck"
+                    } else {
+                        "default"
+                    }
+                );
+                if let Some(error) = &run.error {
+                    println!("    error: {}", error);
+                }
+            }
+            println!("  ------------------");
+        }
+
+        Ok(())
+    }
 }

 impl BugBase {
--- a/simulator/runner/cli.rs
+++ b/simulator/runner/cli.rs
@@ -1,7 +1,7 @@
 use clap::{command, Parser};
 use serde::{Deserialize, Serialize};

-#[derive(Parser, Clone, Serialize, Deserialize)]
+#[derive(Parser, Debug, Clone, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Ord)]
 #[command(name = "limbo-simulator")]
 #[command(author, version, about, long_about = None)]
 pub struct SimulatorCLI {
@@ -19,14 +19,14 @@ pub struct SimulatorCLI {
        help = "change the maximum size of the randomly generated sequence of interactions",
        default_value_t = 5000
    )]
-    pub maximum_size: usize,
+    pub maximum_tests: usize,
    #[clap(
        short = 'k',
        long,
        help = "change the minimum size of the randomly generated sequence of interactions",
        default_value_t = 1000
    )]
-    pub minimum_size: usize,
+    pub minimum_tests: usize,
    #[clap(
        short = 't',
        long,
@@ -44,19 +44,59 @@ pub struct SimulatorCLI {
    pub watch: bool,
    #[clap(long, help = "run differential testing between sqlite and Limbo")]
    pub differential: bool,
+    #[clap(subcommand)]
+    pub subcommand: Option<SimulatorCommand>,
+}
+
+#[derive(Parser, Debug, Clone, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Ord)]
+pub enum SimulatorCommand {
+    #[clap(about = "run the simulator in a loop")]
+    Loop {
+        #[clap(
+            short = 'n',
+            long,
+            help = "number of iterations to run the simulator",
+            default_value_t = 5
+        )]
+        n: usize,
+        #[clap(
+            short = 's',
+            long,
+            help = "short circuit the simulator, stop on the first failure",
+            default_value_t = false
+        )]
+        short_circuit: bool,
+    },
+    #[clap(about = "list all the bugs in the base")]
+    List,
+    #[clap(about = "run the simulator against a specific bug")]
+    Test {
+        #[clap(
+            short = 'b',
+            long,
+            help = "run the simulator with previous buggy runs for the specific filter"
+        )]
+        filter: String,
+    },
 }

 impl SimulatorCLI {
-    pub fn validate(&self) -> Result<(), String> {
-        if self.minimum_size < 1 {
+    pub fn validate(&mut self) -> Result<(), String> {
+        if self.minimum_tests < 1 {
            return Err("minimum size must be at least 1".to_string());
        }
-        if self.maximum_size < 1 {
+        if self.maximum_tests < 1 {
            return Err("maximum size must be at least 1".to_string());
        }
-        // todo: fix an issue here where if minimum size is not defined, it prevents setting low maximum sizes.
-        if self.minimum_size > self.maximum_size {
-            return Err("Minimum size cannot be greater than maximum size".to_string());
+
+        if self.minimum_tests > self.maximum_tests {
+            log::warn!(
+                "minimum size '{}' is greater than '{}' maximum size, setting both to '{}'",
+                self.minimum_tests,
+                self.maximum_tests,
+                self.maximum_tests
+            );
+            self.minimum_tests = self.maximum_tests - 1;
        }

        if self.seed.is_some() && self.load.is_some() {
--- a/simulator/runner/env.rs
+++ b/simulator/runner/env.rs
@@ -67,7 +67,7 @@ impl SimulatorEnv {
        };

        let opts = SimulatorOpts {
-            ticks: rng.gen_range(cli_opts.minimum_size..=cli_opts.maximum_size),
+            ticks: rng.gen_range(cli_opts.minimum_tests..=cli_opts.maximum_tests),
            max_connections: 1, // TODO: for now let's use one connection as we didn't implement
            // correct transactions processing
            max_tables: rng.gen_range(0..128),
@@ -77,7 +77,7 @@ impl SimulatorEnv {
            delete_percent,
            drop_percent,
            page_size: 4096, // TODO: randomize this too
-            max_interactions: rng.gen_range(cli_opts.minimum_size..=cli_opts.maximum_size),
+            max_interactions: rng.gen_range(cli_opts.minimum_tests..=cli_opts.maximum_tests),
            max_time_simulation: cli_opts.maximum_time,
        };