Merge 'simulator: provide high level commands on top of a single runner' from Alperen Keleş

Previously, the simulator only generated and ran a single run with the
given configurations, and anyone was free to implement retrial or
looping on top, which is still possible.
I still wanted to start adding some higher level commands on top,
especially now that we have access to bug base, so this PR adds the
following options;
- loop: run the simulator in a loop `n` times, with the option to short
circuit if any run fails.
- list: list all the bugs in the bug base.
- test: run the simulator against a specific bug by providing a
`filter`, this is useful when debugging a certain issue so you can first
run a bunch of tests in a `loop`, and rerun them using `test` when you
think you have a fix.

Reviewed-by: Jussi Saurio <jussi.saurio@gmail.com>

Closes #1317
This commit is contained in:
Jussi Saurio
2025-04-12 10:35:52 +03:00
4 changed files with 219 additions and 67 deletions

View File

@@ -6,7 +6,7 @@ use notify::event::{DataChange, ModifyKind};
use notify::{EventKind, RecursiveMode, Watcher};
use rand::prelude::*;
use runner::bugbase::{Bug, BugBase, LoadedBug};
use runner::cli::SimulatorCLI;
use runner::cli::{SimulatorCLI, SimulatorCommand};
use runner::env::SimulatorEnv;
use runner::execution::{execute_plans, Execution, ExecutionHistory, ExecutionResult};
use runner::{differential, watch};
@@ -48,15 +48,87 @@ impl Paths {
fn main() -> Result<(), String> {
init_logger();
let cli_opts = SimulatorCLI::parse();
let mut cli_opts = SimulatorCLI::parse();
cli_opts.validate()?;
match cli_opts.subcommand {
Some(SimulatorCommand::List) => {
let mut bugbase = BugBase::load().map_err(|e| format!("{:?}", e))?;
bugbase.list_bugs()
}
Some(SimulatorCommand::Loop { n, short_circuit }) => {
banner();
for i in 0..n {
println!("iteration {}", i);
let result = testing_main(&cli_opts);
if result.is_err() && short_circuit {
println!("short circuiting after {} iterations", i);
return result;
} else if result.is_err() {
println!("iteration {} failed", i);
} else {
println!("iteration {} succeeded", i);
}
}
Ok(())
}
Some(SimulatorCommand::Test { filter }) => {
let mut bugbase = BugBase::load().map_err(|e| format!("{:?}", e))?;
let bugs = bugbase.load_bugs()?;
let mut bugs = bugs
.into_iter()
.flat_map(|bug| {
let runs = bug
.runs
.into_iter()
.filter_map(|run| run.error.clone().map(|_| run))
.filter(|run| run.error.as_ref().unwrap().contains(&filter))
.map(|run| run.cli_options)
.collect::<Vec<_>>();
runs.into_iter()
.map(|mut cli_opts| {
cli_opts.seed = Some(bug.seed);
cli_opts.load = None;
cli_opts
})
.collect::<Vec<_>>()
})
.collect::<Vec<_>>();
bugs.sort();
bugs.dedup_by(|a, b| a == b);
println!(
"found {} previously triggered configurations with {}",
bugs.len(),
filter
);
let results = bugs
.into_iter()
.map(|cli_opts| testing_main(&cli_opts))
.collect::<Vec<_>>();
let (successes, failures): (Vec<_>, Vec<_>) =
results.into_iter().partition(|result| result.is_ok());
println!("the results of the change are:");
println!("\t{} successful runs", successes.len());
println!("\t{} failed runs", failures.len());
Ok(())
}
None => {
banner();
testing_main(&cli_opts)
}
}
}
fn testing_main(cli_opts: &SimulatorCLI) -> Result<(), String> {
let mut bugbase = BugBase::load().map_err(|e| format!("{:?}", e))?;
banner();
let last_execution = Arc::new(Mutex::new(Execution::new(0, 0, 0)));
let (seed, env, plans) = setup_simulation(&mut bugbase, &cli_opts, |p| &p.plan, |p| &p.db);
let (seed, env, plans) = setup_simulation(&mut bugbase, cli_opts, |p| &p.plan, |p| &p.db);
let paths = bugbase.paths(seed);
@@ -66,7 +138,7 @@ fn main() -> Result<(), String> {
}
if cli_opts.watch {
watch_mode(seed, &cli_opts, &paths, last_execution.clone()).unwrap();
watch_mode(seed, cli_opts, &paths, last_execution.clone()).unwrap();
return Ok(());
}
@@ -74,7 +146,7 @@ fn main() -> Result<(), String> {
differential_testing(
seed,
&mut bugbase,
&cli_opts,
cli_opts,
&paths,
plans,
last_execution.clone(),
@@ -83,7 +155,7 @@ fn main() -> Result<(), String> {
run_simulator(
seed,
&mut bugbase,
&cli_opts,
cli_opts,
&paths,
env,
plans,
@@ -190,9 +262,15 @@ fn run_simulator(
);
if cli_opts.doublecheck {
let env = SimulatorEnv::new(seed, cli_opts, &paths.doublecheck_db);
let env = Arc::new(Mutex::new(env));
doublecheck(env, paths, &plans, last_execution.clone(), result)
doublecheck(
seed,
bugbase,
cli_opts,
paths,
&plans,
last_execution.clone(),
result,
)
} else {
// No doublecheck, run shrinking if panicking or found a bug.
match &result {
@@ -303,12 +381,17 @@ fn run_simulator(
}
fn doublecheck(
env: Arc<Mutex<SimulatorEnv>>,
seed: u64,
bugbase: &mut BugBase,
cli_opts: &SimulatorCLI,
paths: &Paths,
plans: &[InteractionPlan],
last_execution: Arc<Mutex<Execution>>,
result: SandboxedResult,
) -> Result<(), String> {
let env = SimulatorEnv::new(seed, cli_opts, &paths.doublecheck_db);
let env = Arc::new(Mutex::new(env));
// Run the simulation again
let result2 = SandboxedResult::from(
std::panic::catch_unwind(|| {
@@ -317,50 +400,24 @@ fn doublecheck(
last_execution.clone(),
);
match (result, result2) {
let doublecheck_result = match (result, result2) {
(SandboxedResult::Correct, SandboxedResult::Panicked { .. }) => {
log::error!("doublecheck failed! first run succeeded, but second run panicked.");
Err("doublecheck failed! first run succeeded, but second run panicked.".to_string())
Err("first run succeeded, but second run panicked.".to_string())
}
(SandboxedResult::FoundBug { .. }, SandboxedResult::Panicked { .. }) => {
log::error!(
"doublecheck failed! first run failed an assertion, but second run panicked."
);
Err(
"doublecheck failed! first run failed an assertion, but second run panicked."
.to_string(),
)
Err("first run failed an assertion, but second run panicked.".to_string())
}
(SandboxedResult::Panicked { .. }, SandboxedResult::Correct) => {
log::error!("doublecheck failed! first run panicked, but second run succeeded.");
Err("doublecheck failed! first run panicked, but second run succeeded.".to_string())
Err("first run panicked, but second run succeeded.".to_string())
}
(SandboxedResult::Panicked { .. }, SandboxedResult::FoundBug { .. }) => {
log::error!(
"doublecheck failed! first run panicked, but second run failed an assertion."
);
Err(
"doublecheck failed! first run panicked, but second run failed an assertion."
.to_string(),
)
Err("first run panicked, but second run failed an assertion.".to_string())
}
(SandboxedResult::Correct, SandboxedResult::FoundBug { .. }) => {
log::error!(
"doublecheck failed! first run succeeded, but second run failed an assertion."
);
Err(
"doublecheck failed! first run succeeded, but second run failed an assertion."
.to_string(),
)
Err("first run succeeded, but second run failed an assertion.".to_string())
}
(SandboxedResult::FoundBug { .. }, SandboxedResult::Correct) => {
log::error!(
"doublecheck failed! first run failed an assertion, but second run succeeded."
);
Err(
"doublecheck failed! first run failed an assertion, but second run succeeded."
.to_string(),
)
Err("first run failed an assertion, but second run succeeded.".to_string())
}
(SandboxedResult::Correct, SandboxedResult::Correct)
| (SandboxedResult::FoundBug { .. }, SandboxedResult::FoundBug { .. })
@@ -369,18 +426,30 @@ fn doublecheck(
let db_bytes = std::fs::read(&paths.db).unwrap();
let doublecheck_db_bytes = std::fs::read(&paths.doublecheck_db).unwrap();
if db_bytes != doublecheck_db_bytes {
log::error!("doublecheck failed! database files are different.");
log::error!("current: {}", paths.db.display());
log::error!("doublecheck: {}", paths.doublecheck_db.display());
Err(
"doublecheck failed! database files are different, check binary diffs for more details.".to_string()
"database files are different, check binary diffs for more details."
.to_string(),
)
} else {
log::info!("doublecheck succeeded! database files are the same.");
println!("doublecheck succeeded! database files are the same.");
Ok(())
}
}
};
match doublecheck_result {
Ok(_) => {
log::info!("doublecheck succeeded");
println!("doublecheck succeeded");
bugbase.mark_successful_run(seed, cli_opts)?;
Ok(())
}
Err(e) => {
log::error!("doublecheck failed: '{}'", e);
bugbase
.add_bug(seed, plans[0].clone(), Some(e.clone()), cli_opts)
.unwrap();
Err(format!("doublecheck failed: '{}'", e))
}
}
}
@@ -417,6 +486,7 @@ fn differential_testing(
SandboxedResult::Correct => {
log::info!("simulation succeeded, output of Limbo conforms to SQLite");
println!("simulation succeeded, output of Limbo conforms to SQLite");
bugbase.mark_successful_run(seed, cli_opts).unwrap();
Ok(())
}
SandboxedResult::Panicked { error, .. } | SandboxedResult::FoundBug { error, .. } => {

View File

@@ -33,14 +33,14 @@ pub struct LoadedBug {
#[derive(Clone, Serialize, Deserialize)]
pub(crate) struct BugRun {
/// Commit hash of the current version of Limbo.
hash: String,
pub(crate) hash: String,
/// Timestamp of the run.
#[serde(with = "chrono::serde::ts_seconds")]
timestamp: DateTime<Utc>,
pub(crate) timestamp: DateTime<Utc>,
/// Error message of the run.
error: Option<String>,
pub(crate) error: Option<String>,
/// Options
cli_options: SimulatorCLI,
pub(crate) cli_options: SimulatorCLI,
}
impl Bug {
@@ -270,7 +270,10 @@ impl BugBase {
Some(Bug::Unloaded { .. }) => {
let plan =
std::fs::read_to_string(self.path.join(seed.to_string()).join("plan.json"))
.or(Err("should be able to read plan file".to_string()))?;
.or(Err(format!(
"should be able to read plan file at {}",
self.path.join(seed.to_string()).join("plan.json").display()
)))?;
let plan: InteractionPlan = serde_json::from_str(&plan)
.or(Err("should be able to deserialize plan".to_string()))?;
@@ -330,6 +333,45 @@ impl BugBase {
Ok(())
}
pub(crate) fn load_bugs(&mut self) -> Result<Vec<LoadedBug>, String> {
let seeds = self.bugs.keys().map(|seed| *seed).collect::<Vec<_>>();
seeds
.iter()
.map(|seed| self.load_bug(*seed))
.collect::<Result<Vec<_>, _>>()
}
pub(crate) fn list_bugs(&mut self) -> Result<(), String> {
let bugs = self.load_bugs()?;
for bug in bugs {
println!("seed: {}", bug.seed);
println!("plan: {}", bug.plan.stats());
println!("runs:");
println!(" ------------------");
for run in &bug.runs {
println!(" - hash: {}", run.hash);
println!(" timestamp: {}", run.timestamp);
println!(
" type: {}",
if run.cli_options.differential {
"differential"
} else if run.cli_options.doublecheck {
"doublecheck"
} else {
"default"
}
);
if let Some(error) = &run.error {
println!(" error: {}", error);
}
}
println!(" ------------------");
}
Ok(())
}
}
impl BugBase {

View File

@@ -1,7 +1,7 @@
use clap::{command, Parser};
use serde::{Deserialize, Serialize};
#[derive(Parser, Clone, Serialize, Deserialize)]
#[derive(Parser, Debug, Clone, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Ord)]
#[command(name = "limbo-simulator")]
#[command(author, version, about, long_about = None)]
pub struct SimulatorCLI {
@@ -19,14 +19,14 @@ pub struct SimulatorCLI {
help = "change the maximum size of the randomly generated sequence of interactions",
default_value_t = 5000
)]
pub maximum_size: usize,
pub maximum_tests: usize,
#[clap(
short = 'k',
long,
help = "change the minimum size of the randomly generated sequence of interactions",
default_value_t = 1000
)]
pub minimum_size: usize,
pub minimum_tests: usize,
#[clap(
short = 't',
long,
@@ -44,19 +44,59 @@ pub struct SimulatorCLI {
pub watch: bool,
#[clap(long, help = "run differential testing between sqlite and Limbo")]
pub differential: bool,
#[clap(subcommand)]
pub subcommand: Option<SimulatorCommand>,
}
#[derive(Parser, Debug, Clone, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Ord)]
pub enum SimulatorCommand {
#[clap(about = "run the simulator in a loop")]
Loop {
#[clap(
short = 'n',
long,
help = "number of iterations to run the simulator",
default_value_t = 5
)]
n: usize,
#[clap(
short = 's',
long,
help = "short circuit the simulator, stop on the first failure",
default_value_t = false
)]
short_circuit: bool,
},
#[clap(about = "list all the bugs in the base")]
List,
#[clap(about = "run the simulator against a specific bug")]
Test {
#[clap(
short = 'b',
long,
help = "run the simulator with previous buggy runs for the specific filter"
)]
filter: String,
},
}
impl SimulatorCLI {
pub fn validate(&self) -> Result<(), String> {
if self.minimum_size < 1 {
pub fn validate(&mut self) -> Result<(), String> {
if self.minimum_tests < 1 {
return Err("minimum size must be at least 1".to_string());
}
if self.maximum_size < 1 {
if self.maximum_tests < 1 {
return Err("maximum size must be at least 1".to_string());
}
// todo: fix an issue here where if minimum size is not defined, it prevents setting low maximum sizes.
if self.minimum_size > self.maximum_size {
return Err("Minimum size cannot be greater than maximum size".to_string());
if self.minimum_tests > self.maximum_tests {
log::warn!(
"minimum size '{}' is greater than '{}' maximum size, setting both to '{}'",
self.minimum_tests,
self.maximum_tests,
self.maximum_tests
);
self.minimum_tests = self.maximum_tests - 1;
}
if self.seed.is_some() && self.load.is_some() {

View File

@@ -67,7 +67,7 @@ impl SimulatorEnv {
};
let opts = SimulatorOpts {
ticks: rng.gen_range(cli_opts.minimum_size..=cli_opts.maximum_size),
ticks: rng.gen_range(cli_opts.minimum_tests..=cli_opts.maximum_tests),
max_connections: 1, // TODO: for now let's use one connection as we didn't implement
// correct transactions processing
max_tables: rng.gen_range(0..128),
@@ -77,7 +77,7 @@ impl SimulatorEnv {
delete_percent,
drop_percent,
page_size: 4096, // TODO: randomize this too
max_interactions: rng.gen_range(cli_opts.minimum_size..=cli_opts.maximum_size),
max_interactions: rng.gen_range(cli_opts.minimum_tests..=cli_opts.maximum_tests),
max_time_simulation: cli_opts.maximum_time,
};