diff --git a/.gitignore b/.gitignore index 369a9b7ef..726ec8796 100644 --- a/.gitignore +++ b/.gitignore @@ -35,3 +35,4 @@ dist/ testing/limbo_output.txt **/limbo_output.txt testing/test.log +.bugbase diff --git a/Cargo.lock b/Cargo.lock index 73c0fccc1..a85ec67df 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -732,7 +732,16 @@ version = "5.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" dependencies = [ - "dirs-sys", + "dirs-sys 0.4.1", +] + +[[package]] +name = "dirs" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e" +dependencies = [ + "dirs-sys 0.5.0", ] [[package]] @@ -743,10 +752,22 @@ checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" dependencies = [ "libc", "option-ext", - "redox_users", + "redox_users 0.4.6", "windows-sys 0.48.0", ] +[[package]] +name = "dirs-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab" +dependencies = [ + "libc", + "option-ext", + "redox_users 0.5.0", + "windows-sys 0.59.0", +] + [[package]] name = "displaydoc" version = "0.2.5" @@ -1678,7 +1699,7 @@ dependencies = [ "comfy-table", "csv", "ctrlc", - "dirs", + "dirs 5.0.1", "env_logger 0.10.2", "limbo_core", "miette", @@ -1846,6 +1867,7 @@ version = "0.0.19-pre.4" dependencies = [ "anarchist-readable-name-generator-lib", "clap", + "dirs 6.0.0", "env_logger 0.10.2", "limbo_core", "log", @@ -1857,7 +1879,6 @@ dependencies = [ "rusqlite", "serde", "serde_json", - "tempfile", ] [[package]] @@ -2797,6 +2818,17 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "redox_users" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd6f9d3d47bdd2ad6945c5015a226ec6155d0bcdfd8f7cd29f86b71f8de99d2b" +dependencies = [ + "getrandom 0.2.15", + "libredox", + "thiserror 2.0.12", +] + [[package]] name = "regex" version = "1.11.1" diff --git a/docs/testing.md b/docs/testing.md index 21823957f..399cc53fe 100644 --- a/docs/testing.md +++ b/docs/testing.md @@ -74,8 +74,63 @@ This will enable trace-level logs for the limbo_core crate and disable logs else ## Deterministic Simulation Testing (DST): -TODO! +Limbo simulator uses randomized deterministic simulations to test the Limbo database behaviors. +Each simulation begins with a random configurations: + +- the database workload distribution(percentages of reads, writes, deletes...), +- database parameters(page size), +- number of reader or writers, etc. + +Based on these parameters, we randomly generate **interaction plans**. Interaction plans consist of statements/queries, and assertions that will be executed in order. The building blocks of interaction plans are: + +- Randomly generated SQL queries satisfying the workload distribution, +- Properties, which contain multiple matching queries with assertions indicating the expected result. + +An example of a property is the following: + +```sql +-- begin testing 'Select-Select-Optimizer' +-- ASSUME table marvelous_ideal exists; +SELECT ((devoted_ahmed = -9142609771.541502 AND loving_wicker = -1246708244.164486)) FROM marvelous_ideal WHERE TRUE; +SELECT * FROM marvelous_ideal WHERE (devoted_ahmed = -9142609771.541502 AND loving_wicker = -1246708244.164486); +-- ASSERT select queries should return the same amount of results; +-- end testing 'Select-Select-Optimizer' +``` + +The simulator starts from an initially empty database, adding random interactions based on the workload distribution. It can +add random queries unrelated to the properties without breaking the property invariants to reach more diverse states and respect the configured workload distribution. + +The simulator executes the interaction plans in a loop, and checks the assertions. It can add random queries unrelated to the properties without +breaking the property invariants to reach more diverse states and respect the configured workload distribution. + +## Usage + +To run the simulator, you can use the following command: + +```bash +RUST_LOG=limbo_sim=debug cargo run --bin limbo_sim +``` + +The simulator CLI has a few configuration options that you can explore via `--help` flag. + +```txt +The Limbo deterministic simulator + +Usage: limbo_sim [OPTIONS] + +Options: + -s, --seed set seed for reproducible runs + -d, --doublecheck enable doublechecking, run the simulator with the plan twice and check output equality + -n, --maximum-size change the maximum size of the randomly generated sequence of interactions [default: 5000] + -k, --minimum-size change the minimum size of the randomly generated sequence of interactions [default: 1000] + -t, --maximum-time change the maximum time of the simulation(in seconds) [default: 3600] + -l, --load load plan from the bug base + -w, --watch enable watch mode that reruns the simulation on file changes + --differential run differential testing between sqlite and Limbo + -h, --help Print help + -V, --version Print version +``` ## Fuzzing diff --git a/simulator/Cargo.toml b/simulator/Cargo.toml index 991b72fc5..285604094 100644 --- a/simulator/Cargo.toml +++ b/simulator/Cargo.toml @@ -19,7 +19,6 @@ limbo_core = { path = "../core" } rand = "0.8.5" rand_chacha = "0.3.1" log = "0.4.20" -tempfile = "3.0.7" env_logger = "0.10.1" regex = "1.11.1" regex-syntax = { version = "0.8.5", default-features = false, features = [ @@ -31,3 +30,4 @@ serde = { version = "1.0", features = ["derive"] } serde_json = { version = "1.0" } notify = "8.0.0" rusqlite = { version = "0.34", features = ["bundled"] } +dirs = "6.0.0" diff --git a/simulator/README.md b/simulator/README.md index 4e9081bd7..87d61479d 100644 --- a/simulator/README.md +++ b/simulator/README.md @@ -15,20 +15,18 @@ Based on these parameters, we randomly generate **interaction plans**. Interacti An example of a property is the following: -```json -{ - "name": "Read your own writes", - "queries": [ - "INSERT INTO t1 (id) VALUES (1)", - "SELECT * FROM t1 WHERE id = 1" - ], - "assertions": [ - "result.rows.length == 1", - "result.rows[0].id == 1" - ] -} +```sql +-- begin testing 'Select-Select-Optimizer' +-- ASSUME table marvelous_ideal exists; +SELECT ((devoted_ahmed = -9142609771.541502 AND loving_wicker = -1246708244.164486)) FROM marvelous_ideal WHERE TRUE; +SELECT * FROM marvelous_ideal WHERE (devoted_ahmed = -9142609771.541502 AND loving_wicker = -1246708244.164486); +-- ASSERT select queries should return the same amount of results; +-- end testing 'Select-Select-Optimizer' ``` +The simulator starts from an initially empty database, adding random interactions based on the workload distribution. It can +add random queries unrelated to the properties without breaking the property invariants to reach more diverse states and respect the configured workload distribution. + The simulator executes the interaction plans in a loop, and checks the assertions. It can add random queries unrelated to the properties without breaking the property invariants to reach more diverse states and respect the configured workload distribution. @@ -44,36 +42,72 @@ The simulator code is broken into 4 main parts: To run the simulator, you can use the following command: ```bash -cargo run +RUST_LOG=limbo_sim=debug cargo run --bin limbo_sim ``` -This prompt (in the future) will invoke a clap command line interface to configure the simulator. For now, the simulator runs with the default configurations changing the `main.rs` file. If you want to see the logs, you can change the `RUST_LOG` environment variable. +The simulator CLI has a few configuration options that you can explore via `--help` flag. -```bash -RUST_LOG=info cargo run --bin limbo_sim +```txt +The Limbo deterministic simulator + +Usage: limbo_sim [OPTIONS] + +Options: + -s, --seed set seed for reproducible runs + -d, --doublecheck enable doublechecking, run the simulator with the plan twice and check output equality + -n, --maximum-size change the maximum size of the randomly generated sequence of interactions [default: 5000] + -k, --minimum-size change the minimum size of the randomly generated sequence of interactions [default: 1000] + -t, --maximum-time change the maximum time of the simulation(in seconds) [default: 3600] + -l, --load load plan from the bug base + -w, --watch enable watch mode that reruns the simulation on file changes + --differential run differential testing between sqlite and Limbo + -h, --help Print help + -V, --version Print version ``` ## Adding new properties -Todo +The properties are defined in `simulator/generation/property.rs` in the `Property` enum. Each property is documented with +inline doc comments, an example is given below: -## Adding new generation functions +```rust +/// Insert-Select is a property in which the inserted row +/// must be in the resulting rows of a select query that has a +/// where clause that matches the inserted row. +/// The execution of the property is as follows +/// INSERT INTO VALUES (...) +/// I_0 +/// I_1 +/// ... +/// I_n +/// SELECT * FROM WHERE +/// The interactions in the middle has the following constraints; +/// - There will be no errors in the middle interactions. +/// - The inserted row will not be deleted. +/// - The inserted row will not be updated. +/// - The table `t` will not be renamed, dropped, or altered. +InsertValuesSelect { + /// The insert query + insert: Insert, + /// Selected row index + row_index: usize, + /// Additional interactions in the middle of the property + queries: Vec, + /// The select query + select: Select, +}, +``` -Todo - -## Adding new models - -Todo - -## Coverage with Limbo - -Todo +If you would like to add a new property, you can add a new variant to the `Property` enum, and the corresponding +generation function in `simulator/generation/property.rs`. The generation function should return a `Property` instance, and +it should generate the necessary queries and assertions for the property. ## Automatic Compatibility Testing with SQLite -Todo +You can use the `--differential` flag to run the simulator in differential testing mode. This mode will run the same interaction plan on both Limbo and SQLite, and compare the results. It will also check for any panics or errors in either database. ## Resources + - [(reading) TigerBeetle Deterministic Simulation Testing](https://docs.tigerbeetle.com/about/vopr/) - [(reading) sled simulation guide (jepsen-proof engineering)](https://sled.rs/simulation.html) - [(video) "Testing Distributed Systems w/ Deterministic Simulation" by Will Wilson](https://www.youtube.com/watch?v=4fFDFbi3toc) diff --git a/simulator/generation/plan.rs b/simulator/generation/plan.rs index ecad92344..4f4900b34 100644 --- a/simulator/generation/plan.rs +++ b/simulator/generation/plan.rs @@ -38,7 +38,7 @@ impl InteractionPlan { let interactions = interactions.lines().collect::>(); let plan: InteractionPlan = serde_json::from_str( - std::fs::read_to_string(plan_path.with_extension("plan.json")) + std::fs::read_to_string(plan_path.with_extension("json")) .unwrap() .as_str(), ) @@ -71,7 +71,6 @@ impl InteractionPlan { let _ = plan[j].split_off(k); break; } - if interactions[i].contains(plan[j][k].to_string().as_str()) { i += 1; k += 1; @@ -86,7 +85,7 @@ impl InteractionPlan { j += 1; } } - + let _ = plan.split_off(j); plan } } diff --git a/simulator/generation/property.rs b/simulator/generation/property.rs index cbcd2c479..d73f17f96 100644 --- a/simulator/generation/property.rs +++ b/simulator/generation/property.rs @@ -407,7 +407,7 @@ impl Property { match (select_predicate, select_star) { (Ok(rows1), Ok(rows2)) => { // If rows1 results have more than 1 column, there is a problem - if rows1.iter().find(|vs| vs.len() > 1).is_some() { + if rows1.iter().any(|vs| vs.len() > 1) { return Err(LimboError::InternalError( "Select query without the star should return only one column".to_string(), )); diff --git a/simulator/main.rs b/simulator/main.rs index d28c2b017..ef22853f4 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -2,10 +2,10 @@ use clap::Parser; use generation::plan::{Interaction, InteractionPlan, InteractionPlanState}; use generation::ArbitraryFrom; -use limbo_core::Database; use notify::event::{DataChange, ModifyKind}; use notify::{EventKind, RecursiveMode, Watcher}; use rand::prelude::*; +use runner::bugbase::{Bug, BugBase}; use runner::cli::SimulatorCLI; use runner::env::SimulatorEnv; use runner::execution::{execute_plans, Execution, ExecutionHistory, ExecutionResult}; @@ -15,13 +15,13 @@ use std::backtrace::Backtrace; use std::io::Write; use std::path::{Path, PathBuf}; use std::sync::{mpsc, Arc, Mutex}; -use tempfile::TempDir; mod generation; mod model; mod runner; mod shrink; struct Paths { + base: PathBuf, db: PathBuf, plan: PathBuf, shrunk_plan: PathBuf, @@ -31,34 +31,16 @@ struct Paths { } impl Paths { - fn new(output_dir: &Path, shrink: bool, doublecheck: bool) -> Self { - let paths = Paths { - db: PathBuf::from(output_dir).join("simulator.db"), - plan: PathBuf::from(output_dir).join("simulator.plan"), - shrunk_plan: PathBuf::from(output_dir).join("simulator_shrunk.plan"), - history: PathBuf::from(output_dir).join("simulator.history"), - doublecheck_db: PathBuf::from(output_dir).join("simulator_double.db"), - shrunk_db: PathBuf::from(output_dir).join("simulator_shrunk.db"), - }; - - // Print the seed, the locations of the database and the plan file - log::info!("database path: {:?}", paths.db); - if doublecheck { - log::info!("doublecheck database path: {:?}", paths.doublecheck_db); - } else if shrink { - log::info!("shrunk database path: {:?}", paths.shrunk_db); + fn new(output_dir: &Path) -> Self { + Paths { + base: output_dir.to_path_buf(), + db: PathBuf::from(output_dir).join("test.db"), + plan: PathBuf::from(output_dir).join("plan.sql"), + shrunk_plan: PathBuf::from(output_dir).join("shrunk.sql"), + history: PathBuf::from(output_dir).join("history.txt"), + doublecheck_db: PathBuf::from(output_dir).join("double.db"), + shrunk_db: PathBuf::from(output_dir).join("shrunk.db"), } - log::info!("simulator plan path: {:?}", paths.plan); - log::info!( - "simulator plan serialized path: {:?}", - paths.plan.with_extension("plan.json") - ); - if shrink { - log::info!("shrunk plan path: {:?}", paths.shrunk_plan); - } - log::info!("simulator history path: {:?}", paths.history); - - paths } } @@ -68,45 +50,37 @@ fn main() -> Result<(), String> { let cli_opts = SimulatorCLI::parse(); cli_opts.validate()?; - let seed = cli_opts.seed.unwrap_or_else(|| thread_rng().next_u64()); - - let output_dir = match &cli_opts.output_dir { - Some(dir) => Path::new(dir).to_path_buf(), - None => TempDir::new().map_err(|e| format!("{:?}", e))?.into_path(), - }; - + let mut bugbase = BugBase::load().map_err(|e| format!("{:?}", e))?; banner(); - let paths = Paths::new(&output_dir, cli_opts.shrink, cli_opts.doublecheck); - - log::info!("seed: {}", seed); + // let paths = Paths::new(&output_dir, cli_opts.doublecheck); let last_execution = Arc::new(Mutex::new(Execution::new(0, 0, 0))); - let (env, plans) = setup_simulation(seed, &cli_opts, &paths.db, &paths.plan); + let (seed, env, plans) = setup_simulation(&mut bugbase, &cli_opts, |p| &p.plan, |p| &p.db); + + let paths = bugbase.paths(seed); + + // Create the output directory if it doesn't exist + if !paths.base.exists() { + std::fs::create_dir_all(&paths.base).map_err(|e| format!("{:?}", e))?; + } if cli_opts.watch { watch_mode(seed, &cli_opts, &paths, last_execution.clone()).unwrap(); } else if cli_opts.differential { differential_testing(env, plans, last_execution.clone()) } else { - run_simulator(&cli_opts, &paths, env, plans, last_execution.clone()); + run_simulator( + seed, + &mut bugbase, + &cli_opts, + &paths, + env, + plans, + last_execution.clone(), + ); } // Print the seed, the locations of the database and the plan file at the end again for easily accessing them. - println!("database path: {:?}", paths.db); - if cli_opts.doublecheck { - println!("doublecheck database path: {:?}", paths.doublecheck_db); - } else if cli_opts.shrink { - println!("shrunk database path: {:?}", paths.shrunk_db); - } - println!("simulator plan path: {:?}", paths.plan); - println!( - "simulator plan serialized path: {:?}", - paths.plan.with_extension("plan.json") - ); - if cli_opts.shrink { - println!("shrunk plan path: {:?}", paths.shrunk_plan); - } - println!("simulator history path: {:?}", paths.history); println!("seed: {}", seed); Ok(()) @@ -140,7 +114,6 @@ fn watch_mode( std::panic::catch_unwind(|| { let plan: Vec> = InteractionPlan::compute_via_diff(&paths.plan); - let mut env = SimulatorEnv::new(seed, cli_opts, &paths.db); plan.iter().for_each(|is| { is.iter().for_each(|i| { @@ -173,6 +146,8 @@ fn watch_mode( } fn run_simulator( + seed: u64, + bugbase: &mut BugBase, cli_opts: &SimulatorCLI, paths: &Paths, env: SimulatorEnv, @@ -204,13 +179,17 @@ fn run_simulator( ); if cli_opts.doublecheck { - doublecheck(env.clone(), paths, &plans, last_execution.clone(), result); + let env = SimulatorEnv::new(seed, cli_opts, &paths.doublecheck_db); + let env = Arc::new(Mutex::new(env)); + doublecheck(env, paths, &plans, last_execution.clone(), result); } else { // No doublecheck, run shrinking if panicking or found a bug. match &result { SandboxedResult::Correct => { log::info!("simulation succeeded"); println!("simulation succeeded"); + // remove the bugbase entry + bugbase.remove_bug(seed).unwrap(); } SandboxedResult::Panicked { error, @@ -240,59 +219,62 @@ fn run_simulator( log::error!("simulation failed: '{}'", error); println!("simulation failed: '{}'", error); - if cli_opts.shrink { - log::info!("Starting to shrink"); + log::info!("Starting to shrink"); - let shrunk_plans = plans - .iter() - .map(|plan| { - let shrunk = plan.shrink_interaction_plan(last_execution); - log::info!("{}", shrunk.stats()); - shrunk - }) - .collect::>(); + let shrunk_plans = plans + .iter() + .map(|plan| { + let shrunk = plan.shrink_interaction_plan(last_execution); + log::info!("{}", shrunk.stats()); + shrunk + }) + .collect::>(); - // Write the shrunk plan to a file - let mut f = std::fs::File::create(&paths.shrunk_plan).unwrap(); - f.write_all(shrunk_plans[0].to_string().as_bytes()).unwrap(); + // Write the shrunk plan to a file + let mut f = std::fs::File::create(&paths.shrunk_plan).unwrap(); + f.write_all(shrunk_plans[0].to_string().as_bytes()).unwrap(); - let last_execution = Arc::new(Mutex::new(*last_execution)); + let last_execution = Arc::new(Mutex::new(*last_execution)); + let env = SimulatorEnv::new(seed, cli_opts, &paths.shrunk_db); - let shrunk = SandboxedResult::from( - std::panic::catch_unwind(|| { - run_simulation( - env.clone(), - &mut shrunk_plans.clone(), - last_execution.clone(), - ) - }), - last_execution, - ); - - match (&shrunk, &result) { - ( - SandboxedResult::Panicked { error: e1, .. }, - SandboxedResult::Panicked { error: e2, .. }, + let env = Arc::new(Mutex::new(env)); + let shrunk = SandboxedResult::from( + std::panic::catch_unwind(|| { + run_simulation( + env.clone(), + &mut shrunk_plans.clone(), + last_execution.clone(), ) - | ( - SandboxedResult::FoundBug { error: e1, .. }, - SandboxedResult::FoundBug { error: e2, .. }, - ) => { - if e1 != e2 { - log::error!( - "shrinking failed, the error was not properly reproduced" - ); - } else { - log::info!("shrinking succeeded"); - } - } - (_, SandboxedResult::Correct) => { - unreachable!("shrinking should never be called on a correct simulation") - } - _ => { + }), + last_execution, + ); + + match (&shrunk, &result) { + ( + SandboxedResult::Panicked { error: e1, .. }, + SandboxedResult::Panicked { error: e2, .. }, + ) + | ( + SandboxedResult::FoundBug { error: e1, .. }, + SandboxedResult::FoundBug { error: e2, .. }, + ) => { + if e1 != e2 { log::error!("shrinking failed, the error was not properly reproduced"); + bugbase.add_bug(seed, plans[0].clone()).unwrap(); + } else { + log::info!("shrinking succeeded"); + println!("shrinking succeeded"); + // Save the shrunk database + bugbase.add_bug(seed, shrunk_plans[0].clone()).unwrap(); } } + (_, SandboxedResult::Correct) => { + unreachable!("shrinking should never be called on a correct simulation") + } + _ => { + log::error!("shrinking failed, the error was not properly reproduced"); + bugbase.add_bug(seed, plans[0].clone()).unwrap(); + } } } } @@ -306,16 +288,6 @@ fn doublecheck( last_execution: Arc>, result: SandboxedResult, ) { - { - let mut env_ = env.lock().unwrap(); - env_.db = Database::open_file( - env_.io.clone(), - paths.doublecheck_db.to_str().unwrap(), - false, - ) - .unwrap(); - } - // Run the simulation again let result2 = SandboxedResult::from( std::panic::catch_unwind(|| { @@ -443,54 +415,71 @@ impl SandboxedResult { } fn setup_simulation( - mut seed: u64, + bugbase: &mut BugBase, cli_opts: &SimulatorCLI, - db_path: &Path, - plan_path: &Path, -) -> (SimulatorEnv, Vec) { - if let Some(load) = &cli_opts.load { - let seed_path = PathBuf::from(load).with_extension("seed"); - let seed_str = std::fs::read_to_string(&seed_path).unwrap(); - seed = seed_str.parse().unwrap(); - } + plan_path: fn(&Paths) -> &Path, + db_path: fn(&Paths) -> &Path, +) -> (u64, SimulatorEnv, Vec) { + if let Some(seed) = &cli_opts.load { + let seed = seed.parse::().expect("seed should be a number"); + let bug = bugbase + .get_bug(seed) + .unwrap_or_else(|| panic!("bug '{}' not found in bug base", seed)); - let mut env = SimulatorEnv::new(seed, cli_opts, db_path); + let paths = bugbase.paths(seed); + if !paths.base.exists() { + std::fs::create_dir_all(&paths.base).unwrap(); + } + let env = SimulatorEnv::new(bug.seed(), cli_opts, db_path(&paths)); - // todo: the loading works correctly because of a hacky decision - // Right now, the plan generation is the only point we use the rng, so the environment doesn't - // even need it. In the future, especially with multi-connections and multi-threading, we might - // use the RNG for more things such as scheduling, so this assumption will fail. When that happens, - // we'll need to reachitect this logic by saving and loading RNG state. - let plans = if let Some(load) = &cli_opts.load { - log::info!("Loading database interaction plan..."); - let plan = std::fs::read_to_string(load).unwrap(); - let plan: InteractionPlan = serde_json::from_str(&plan).unwrap(); - vec![plan] + let plan = match bug { + Bug::Loaded { plan, .. } => plan.clone(), + Bug::Unloaded { seed } => { + let seed = *seed; + bugbase + .load_bug(seed) + .unwrap_or_else(|_| panic!("could not load bug '{}' in bug base", seed)) + } + }; + + std::fs::write(plan_path(&paths), plan.to_string()).unwrap(); + std::fs::write( + plan_path(&paths).with_extension("json"), + serde_json::to_string_pretty(&plan).unwrap(), + ) + .unwrap(); + let plans = vec![plan]; + (seed, env, plans) } else { + let seed = cli_opts.seed.unwrap_or_else(|| { + let mut rng = rand::thread_rng(); + rng.next_u64() + }); + + let paths = bugbase.paths(seed); + if !paths.base.exists() { + std::fs::create_dir_all(&paths.base).unwrap(); + } + let mut env = SimulatorEnv::new(seed, cli_opts, &paths.db); + log::info!("Generating database interaction plan..."); - (1..=env.opts.max_connections) + + let plans = (1..=env.opts.max_connections) .map(|_| InteractionPlan::arbitrary_from(&mut env.rng.clone(), &mut env)) - .collect::>() - }; + .collect::>(); - // todo: for now, we only use 1 connection, so it's safe to use the first plan. - let plan = plans[0].clone(); - - let mut f = std::fs::File::create(plan_path).unwrap(); - // todo: create a detailed plan file with all the plans. for now, we only use 1 connection, so it's safe to use the first plan. - f.write_all(plan.to_string().as_bytes()).unwrap(); - - let serialized_plan_path = plan_path.with_extension("plan.json"); - let mut f = std::fs::File::create(&serialized_plan_path).unwrap(); - f.write_all(serde_json::to_string(&plan).unwrap().as_bytes()) + // todo: for now, we only use 1 connection, so it's safe to use the first plan. + let plan = &plans[0]; + log::info!("{}", plan.stats()); + std::fs::write(plan_path(&paths), plan.to_string()).unwrap(); + std::fs::write( + plan_path(&paths).with_extension("json"), + serde_json::to_string_pretty(&plan).unwrap(), + ) .unwrap(); - let seed_path = plan_path.with_extension("seed"); - let mut f = std::fs::File::create(&seed_path).unwrap(); - f.write_all(seed.to_string().as_bytes()).unwrap(); - - log::info!("{}", plan.stats()); - (env, plans) + (seed, env, plans) + } } fn run_simulation( diff --git a/simulator/runner/bugbase.rs b/simulator/runner/bugbase.rs new file mode 100644 index 000000000..83c4273b3 --- /dev/null +++ b/simulator/runner/bugbase.rs @@ -0,0 +1,241 @@ +use std::{ + collections::HashMap, + io::{self, Write}, + path::PathBuf, + process::Command, +}; + +use crate::{InteractionPlan, Paths}; + +/// A bug is a run that has been identified as buggy. +#[derive(Clone)] +pub(crate) enum Bug { + Unloaded { seed: u64 }, + Loaded { seed: u64, plan: InteractionPlan }, +} + +impl Bug { + /// Check if the bug is loaded. + pub(crate) fn is_loaded(&self) -> bool { + match self { + Bug::Unloaded { .. } => false, + Bug::Loaded { .. } => true, + } + } + + /// Get the seed of the bug. + pub(crate) fn seed(&self) -> u64 { + match self { + Bug::Unloaded { seed } => *seed, + Bug::Loaded { seed, .. } => *seed, + } + } +} + +/// Bug Base is a local database of buggy runs. +pub(crate) struct BugBase { + /// Path to the bug base directory. + path: PathBuf, + /// The list of buggy runs, uniquely identified by their seed + bugs: HashMap, +} + +impl BugBase { + /// Create a new bug base. + fn new(path: PathBuf) -> Result { + let mut bugs = HashMap::new(); + // list all the bugs in the path as directories + if let Ok(entries) = std::fs::read_dir(&path) { + for entry in entries.flatten() { + if entry.file_type().is_ok_and(|ft| ft.is_dir()) { + let seed = entry + .file_name() + .to_string_lossy() + .to_string() + .parse::() + .or(Err(format!( + "failed to parse seed from directory name {}", + entry.file_name().to_string_lossy() + )))?; + bugs.insert(seed, Bug::Unloaded { seed }); + } + } + } + + Ok(Self { path, bugs }) + } + + /// Load the bug base from one of the potential paths. + pub(crate) fn load() -> Result { + let potential_paths = vec![ + // limbo project directory + BugBase::get_limbo_project_dir()?, + // home directory + dirs::home_dir().ok_or("should be able to get home directory".to_string())?, + // current directory + std::env::current_dir() + .or(Err("should be able to get current directory".to_string()))?, + ]; + + for path in potential_paths { + let path = path.join(".bugbase"); + if path.exists() { + return BugBase::new(path); + } + } + + println!("select bug base location:"); + println!("1. limbo project directory"); + println!("2. home directory"); + println!("3. current directory"); + print!("> "); + io::stdout().flush().unwrap(); + let mut choice = String::new(); + io::stdin() + .read_line(&mut choice) + .expect("failed to read line"); + + let choice = choice + .trim() + .parse::() + .or(Err(format!("invalid choice {choice}")))?; + let path = match choice { + 1 => BugBase::get_limbo_project_dir()?.join(".bugbase"), + 2 => { + let home = std::env::var("HOME").or(Err("failed to get home directory"))?; + PathBuf::from(home).join(".bugbase") + } + 3 => PathBuf::from(".bugbase"), + _ => return Err(format!("invalid choice {choice}")), + }; + + if path.exists() { + unreachable!("bug base already exists at {}", path.display()); + } else { + std::fs::create_dir_all(&path).or(Err("failed to create bug base"))?; + log::info!("bug base created at {}", path.display()); + BugBase::new(path) + } + } + + /// Add a new bug to the bug base. + pub(crate) fn add_bug(&mut self, seed: u64, plan: InteractionPlan) -> Result<(), String> { + log::debug!("adding bug with seed {}", seed); + if self.bugs.contains_key(&seed) { + return Err(format!("Bug with hash {} already exists", seed)); + } + self.save_bug(seed, &plan)?; + self.bugs.insert(seed, Bug::Loaded { seed, plan }); + Ok(()) + } + + /// Get a bug from the bug base. + pub(crate) fn get_bug(&self, seed: u64) -> Option<&Bug> { + self.bugs.get(&seed) + } + + /// Save a bug to the bug base. + pub(crate) fn save_bug(&self, seed: u64, plan: &InteractionPlan) -> Result<(), String> { + let bug_path = self.path.join(seed.to_string()); + std::fs::create_dir_all(&bug_path) + .or(Err("should be able to create bug directory".to_string()))?; + + let seed_path = bug_path.join("seed.txt"); + std::fs::write(&seed_path, seed.to_string()) + .or(Err("should be able to write seed file".to_string()))?; + + // At some point we might want to save the commit hash of the current + // version of Limbo. + // let commit_hash = Self::get_current_commit_hash()?; + // let commit_hash_path = bug_path.join("commit_hash.txt"); + // std::fs::write(&commit_hash_path, commit_hash) + // .or(Err("should be able to write commit hash file".to_string()))?; + + let plan_path = bug_path.join("plan.json"); + std::fs::write( + &plan_path, + serde_json::to_string(plan).or(Err("should be able to serialize plan".to_string()))?, + ) + .or(Err("should be able to write plan file".to_string()))?; + + let readable_plan_path = bug_path.join("plan.sql"); + std::fs::write(&readable_plan_path, plan.to_string()) + .or(Err("should be able to write readable plan file".to_string()))?; + Ok(()) + } + + pub(crate) fn load_bug(&mut self, seed: u64) -> Result { + let seed_match = self.bugs.get(&seed); + + match seed_match { + None => Err(format!("No bugs found for seed {}", seed)), + Some(Bug::Unloaded { .. }) => { + let plan = + std::fs::read_to_string(self.path.join(seed.to_string()).join("plan.json")) + .or(Err("should be able to read plan file".to_string()))?; + let plan: InteractionPlan = serde_json::from_str(&plan) + .or(Err("should be able to deserialize plan".to_string()))?; + + let bug = Bug::Loaded { + seed, + plan: plan.clone(), + }; + self.bugs.insert(seed, bug); + log::debug!("Loaded bug with seed {}", seed); + Ok(plan) + } + Some(Bug::Loaded { plan, .. }) => { + log::warn!( + "Bug with seed {} is already loaded, returning the existing plan", + seed + ); + Ok(plan.clone()) + } + } + } + + pub(crate) fn remove_bug(&mut self, seed: u64) -> Result<(), String> { + self.bugs.remove(&seed); + std::fs::remove_dir_all(self.path.join(seed.to_string())) + .or(Err("should be able to remove bug directory".to_string()))?; + + log::debug!("Removed bug with seed {}", seed); + Ok(()) + } +} + +impl BugBase { + /// Get the path to the bug base directory. + pub(crate) fn path(&self) -> &PathBuf { + &self.path + } + + /// Get the path to the database file for a given seed. + pub(crate) fn db_path(&self, seed: u64) -> PathBuf { + self.path.join(format!("{}/test.db", seed)) + } + + /// Get paths to all the files for a given seed. + pub(crate) fn paths(&self, seed: u64) -> Paths { + let base = self.path.join(format!("{}/", seed)); + Paths::new(&base) + } +} + +impl BugBase { + pub(crate) fn get_limbo_project_dir() -> Result { + Ok(PathBuf::from( + String::from_utf8( + Command::new("git") + .args(["rev-parse", "--git-dir"]) + .output() + .or(Err("should be able to get the git path".to_string()))? + .stdout, + ) + .or(Err("commit hash should be valid utf8".to_string()))? + .trim() + .strip_suffix(".git") + .ok_or("should be able to strip .git suffix".to_string())?, + )) + } +} diff --git a/simulator/runner/cli.rs b/simulator/runner/cli.rs index a18c47212..b07b89d47 100644 --- a/simulator/runner/cli.rs +++ b/simulator/runner/cli.rs @@ -6,8 +6,6 @@ use clap::{command, Parser}; pub struct SimulatorCLI { #[clap(short, long, help = "set seed for reproducible runs", default_value = None)] pub seed: Option, - #[clap(short, long, help = "set custom output directory for produced files", default_value = None)] - pub output_dir: Option, #[clap( short, long, @@ -35,13 +33,7 @@ pub struct SimulatorCLI { default_value_t = 60 * 60 // default to 1 hour )] pub maximum_time: usize, - #[clap( - short = 'm', - long, - help = "minimize(shrink) the failing counterexample" - )] - pub shrink: bool, - #[clap(short = 'l', long, help = "load plan from a file")] + #[clap(short = 'l', long, help = "load plan from the bug base")] pub load: Option, #[clap( short = 'w', @@ -66,14 +58,8 @@ impl SimulatorCLI { return Err("Minimum size cannot be greater than maximum size".to_string()); } - // Make sure incompatible options are not set - if self.shrink && self.doublecheck { - return Err("Cannot use shrink and doublecheck at the same time".to_string()); - } - - if let Some(plan_path) = &self.load { - std::fs::File::open(plan_path) - .map_err(|_| format!("Plan file '{}' could not be opened", plan_path))?; + if self.seed.is_some() && self.load.is_some() { + return Err("Cannot set seed and load plan at the same time".to_string()); } Ok(()) diff --git a/simulator/runner/env.rs b/simulator/runner/env.rs index a9409ad7e..19233fc4a 100644 --- a/simulator/runner/env.rs +++ b/simulator/runner/env.rs @@ -85,6 +85,7 @@ impl SimulatorEnv { // Remove existing database file if it exists if db_path.exists() { std::fs::remove_file(db_path).unwrap(); + std::fs::remove_file(db_path.with_extension("db-wal")).unwrap(); } let db = match Database::open_file(io.clone(), db_path.to_str().unwrap(), false) { diff --git a/simulator/runner/execution.rs b/simulator/runner/execution.rs index 8ae4b0cf6..757d9f3ce 100644 --- a/simulator/runner/execution.rs +++ b/simulator/runner/execution.rs @@ -68,7 +68,12 @@ pub(crate) fn execute_plans( // Pick the connection to interact with let connection_index = pick_index(env.connections.len(), &mut env.rng); let state = &mut states[connection_index]; - + std::thread::sleep(std::time::Duration::from_millis( + std::env::var("TICK_SLEEP") + .unwrap_or("0".into()) + .parse() + .unwrap_or(0), + )); history.history.push(Execution::new( connection_index, state.interaction_pointer, @@ -121,6 +126,7 @@ fn execute_plan( } else { match execute_interaction(env, connection_index, interaction, &mut state.stack) { Ok(next_execution) => { + interaction.shadow(env); log::debug!("connection {} processed", connection_index); // Move to the next interaction or property match next_execution { diff --git a/simulator/runner/mod.rs b/simulator/runner/mod.rs index 36a6fbb0a..792c4bddd 100644 --- a/simulator/runner/mod.rs +++ b/simulator/runner/mod.rs @@ -1,3 +1,4 @@ +pub mod bugbase; pub mod cli; pub mod differential; pub mod env;