Merge 'simulator: Add Bug Database(BugBase)' from Alperen Keleş

Previously, simulator used `tempfile` for storing the resulting
interaction plans, database file, seeds, and all relevant information.
This posed the problem that this information became ephemeral, and we
were not able to properly use the results of previous runs for
optimizing future runs. This PR removes the CLI option `output_dir`,
bases the storage infrastructure on top of `BugBase` interface.

Reviewed-by: Pere Diaz Bou <pere-altea@homail.com>

Closes #1276
This commit is contained in:
Pekka Enberg
2025-04-11 09:35:09 +03:00
13 changed files with 553 additions and 208 deletions

1
.gitignore vendored
View File

@@ -35,3 +35,4 @@ dist/
testing/limbo_output.txt
**/limbo_output.txt
testing/test.log
.bugbase

40
Cargo.lock generated
View File

@@ -732,7 +732,16 @@ version = "5.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225"
dependencies = [
"dirs-sys",
"dirs-sys 0.4.1",
]
[[package]]
name = "dirs"
version = "6.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e"
dependencies = [
"dirs-sys 0.5.0",
]
[[package]]
@@ -743,10 +752,22 @@ checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c"
dependencies = [
"libc",
"option-ext",
"redox_users",
"redox_users 0.4.6",
"windows-sys 0.48.0",
]
[[package]]
name = "dirs-sys"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab"
dependencies = [
"libc",
"option-ext",
"redox_users 0.5.0",
"windows-sys 0.59.0",
]
[[package]]
name = "displaydoc"
version = "0.2.5"
@@ -1678,7 +1699,7 @@ dependencies = [
"comfy-table",
"csv",
"ctrlc",
"dirs",
"dirs 5.0.1",
"env_logger 0.10.2",
"limbo_core",
"miette",
@@ -1846,6 +1867,7 @@ version = "0.0.19-pre.4"
dependencies = [
"anarchist-readable-name-generator-lib",
"clap",
"dirs 6.0.0",
"env_logger 0.10.2",
"limbo_core",
"log",
@@ -1857,7 +1879,6 @@ dependencies = [
"rusqlite",
"serde",
"serde_json",
"tempfile",
]
[[package]]
@@ -2797,6 +2818,17 @@ dependencies = [
"thiserror 1.0.69",
]
[[package]]
name = "redox_users"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd6f9d3d47bdd2ad6945c5015a226ec6155d0bcdfd8f7cd29f86b71f8de99d2b"
dependencies = [
"getrandom 0.2.15",
"libredox",
"thiserror 2.0.12",
]
[[package]]
name = "regex"
version = "1.11.1"

View File

@@ -74,8 +74,63 @@ This will enable trace-level logs for the limbo_core crate and disable logs else
## Deterministic Simulation Testing (DST):
TODO!
Limbo simulator uses randomized deterministic simulations to test the Limbo database behaviors.
Each simulation begins with a random configurations:
- the database workload distribution(percentages of reads, writes, deletes...),
- database parameters(page size),
- number of reader or writers, etc.
Based on these parameters, we randomly generate **interaction plans**. Interaction plans consist of statements/queries, and assertions that will be executed in order. The building blocks of interaction plans are:
- Randomly generated SQL queries satisfying the workload distribution,
- Properties, which contain multiple matching queries with assertions indicating the expected result.
An example of a property is the following:
```sql
-- begin testing 'Select-Select-Optimizer'
-- ASSUME table marvelous_ideal exists;
SELECT ((devoted_ahmed = -9142609771.541502 AND loving_wicker = -1246708244.164486)) FROM marvelous_ideal WHERE TRUE;
SELECT * FROM marvelous_ideal WHERE (devoted_ahmed = -9142609771.541502 AND loving_wicker = -1246708244.164486);
-- ASSERT select queries should return the same amount of results;
-- end testing 'Select-Select-Optimizer'
```
The simulator starts from an initially empty database, adding random interactions based on the workload distribution. It can
add random queries unrelated to the properties without breaking the property invariants to reach more diverse states and respect the configured workload distribution.
The simulator executes the interaction plans in a loop, and checks the assertions. It can add random queries unrelated to the properties without
breaking the property invariants to reach more diverse states and respect the configured workload distribution.
## Usage
To run the simulator, you can use the following command:
```bash
RUST_LOG=limbo_sim=debug cargo run --bin limbo_sim
```
The simulator CLI has a few configuration options that you can explore via `--help` flag.
```txt
The Limbo deterministic simulator
Usage: limbo_sim [OPTIONS]
Options:
-s, --seed <SEED> set seed for reproducible runs
-d, --doublecheck enable doublechecking, run the simulator with the plan twice and check output equality
-n, --maximum-size <MAXIMUM_SIZE> change the maximum size of the randomly generated sequence of interactions [default: 5000]
-k, --minimum-size <MINIMUM_SIZE> change the minimum size of the randomly generated sequence of interactions [default: 1000]
-t, --maximum-time <MAXIMUM_TIME> change the maximum time of the simulation(in seconds) [default: 3600]
-l, --load <LOAD> load plan from the bug base
-w, --watch enable watch mode that reruns the simulation on file changes
--differential run differential testing between sqlite and Limbo
-h, --help Print help
-V, --version Print version
```
## Fuzzing

View File

@@ -19,7 +19,6 @@ limbo_core = { path = "../core" }
rand = "0.8.5"
rand_chacha = "0.3.1"
log = "0.4.20"
tempfile = "3.0.7"
env_logger = "0.10.1"
regex = "1.11.1"
regex-syntax = { version = "0.8.5", default-features = false, features = [
@@ -31,3 +30,4 @@ serde = { version = "1.0", features = ["derive"] }
serde_json = { version = "1.0" }
notify = "8.0.0"
rusqlite = { version = "0.34", features = ["bundled"] }
dirs = "6.0.0"

View File

@@ -15,20 +15,18 @@ Based on these parameters, we randomly generate **interaction plans**. Interacti
An example of a property is the following:
```json
{
"name": "Read your own writes",
"queries": [
"INSERT INTO t1 (id) VALUES (1)",
"SELECT * FROM t1 WHERE id = 1"
],
"assertions": [
"result.rows.length == 1",
"result.rows[0].id == 1"
]
}
```sql
-- begin testing 'Select-Select-Optimizer'
-- ASSUME table marvelous_ideal exists;
SELECT ((devoted_ahmed = -9142609771.541502 AND loving_wicker = -1246708244.164486)) FROM marvelous_ideal WHERE TRUE;
SELECT * FROM marvelous_ideal WHERE (devoted_ahmed = -9142609771.541502 AND loving_wicker = -1246708244.164486);
-- ASSERT select queries should return the same amount of results;
-- end testing 'Select-Select-Optimizer'
```
The simulator starts from an initially empty database, adding random interactions based on the workload distribution. It can
add random queries unrelated to the properties without breaking the property invariants to reach more diverse states and respect the configured workload distribution.
The simulator executes the interaction plans in a loop, and checks the assertions. It can add random queries unrelated to the properties without
breaking the property invariants to reach more diverse states and respect the configured workload distribution.
@@ -44,36 +42,72 @@ The simulator code is broken into 4 main parts:
To run the simulator, you can use the following command:
```bash
cargo run
RUST_LOG=limbo_sim=debug cargo run --bin limbo_sim
```
This prompt (in the future) will invoke a clap command line interface to configure the simulator. For now, the simulator runs with the default configurations changing the `main.rs` file. If you want to see the logs, you can change the `RUST_LOG` environment variable.
The simulator CLI has a few configuration options that you can explore via `--help` flag.
```bash
RUST_LOG=info cargo run --bin limbo_sim
```txt
The Limbo deterministic simulator
Usage: limbo_sim [OPTIONS]
Options:
-s, --seed <SEED> set seed for reproducible runs
-d, --doublecheck enable doublechecking, run the simulator with the plan twice and check output equality
-n, --maximum-size <MAXIMUM_SIZE> change the maximum size of the randomly generated sequence of interactions [default: 5000]
-k, --minimum-size <MINIMUM_SIZE> change the minimum size of the randomly generated sequence of interactions [default: 1000]
-t, --maximum-time <MAXIMUM_TIME> change the maximum time of the simulation(in seconds) [default: 3600]
-l, --load <LOAD> load plan from the bug base
-w, --watch enable watch mode that reruns the simulation on file changes
--differential run differential testing between sqlite and Limbo
-h, --help Print help
-V, --version Print version
```
## Adding new properties
Todo
The properties are defined in `simulator/generation/property.rs` in the `Property` enum. Each property is documented with
inline doc comments, an example is given below:
## Adding new generation functions
```rust
/// Insert-Select is a property in which the inserted row
/// must be in the resulting rows of a select query that has a
/// where clause that matches the inserted row.
/// The execution of the property is as follows
/// INSERT INTO <t> VALUES (...)
/// I_0
/// I_1
/// ...
/// I_n
/// SELECT * FROM <t> WHERE <predicate>
/// The interactions in the middle has the following constraints;
/// - There will be no errors in the middle interactions.
/// - The inserted row will not be deleted.
/// - The inserted row will not be updated.
/// - The table `t` will not be renamed, dropped, or altered.
InsertValuesSelect {
/// The insert query
insert: Insert,
/// Selected row index
row_index: usize,
/// Additional interactions in the middle of the property
queries: Vec<Query>,
/// The select query
select: Select,
},
```
Todo
## Adding new models
Todo
## Coverage with Limbo
Todo
If you would like to add a new property, you can add a new variant to the `Property` enum, and the corresponding
generation function in `simulator/generation/property.rs`. The generation function should return a `Property` instance, and
it should generate the necessary queries and assertions for the property.
## Automatic Compatibility Testing with SQLite
Todo
You can use the `--differential` flag to run the simulator in differential testing mode. This mode will run the same interaction plan on both Limbo and SQLite, and compare the results. It will also check for any panics or errors in either database.
## Resources
- [(reading) TigerBeetle Deterministic Simulation Testing](https://docs.tigerbeetle.com/about/vopr/)
- [(reading) sled simulation guide (jepsen-proof engineering)](https://sled.rs/simulation.html)
- [(video) "Testing Distributed Systems w/ Deterministic Simulation" by Will Wilson](https://www.youtube.com/watch?v=4fFDFbi3toc)

View File

@@ -38,7 +38,7 @@ impl InteractionPlan {
let interactions = interactions.lines().collect::<Vec<_>>();
let plan: InteractionPlan = serde_json::from_str(
std::fs::read_to_string(plan_path.with_extension("plan.json"))
std::fs::read_to_string(plan_path.with_extension("json"))
.unwrap()
.as_str(),
)
@@ -71,7 +71,6 @@ impl InteractionPlan {
let _ = plan[j].split_off(k);
break;
}
if interactions[i].contains(plan[j][k].to_string().as_str()) {
i += 1;
k += 1;
@@ -86,7 +85,7 @@ impl InteractionPlan {
j += 1;
}
}
let _ = plan.split_off(j);
plan
}
}

View File

@@ -407,7 +407,7 @@ impl Property {
match (select_predicate, select_star) {
(Ok(rows1), Ok(rows2)) => {
// If rows1 results have more than 1 column, there is a problem
if rows1.iter().find(|vs| vs.len() > 1).is_some() {
if rows1.iter().any(|vs| vs.len() > 1) {
return Err(LimboError::InternalError(
"Select query without the star should return only one column".to_string(),
));

View File

@@ -2,10 +2,10 @@
use clap::Parser;
use generation::plan::{Interaction, InteractionPlan, InteractionPlanState};
use generation::ArbitraryFrom;
use limbo_core::Database;
use notify::event::{DataChange, ModifyKind};
use notify::{EventKind, RecursiveMode, Watcher};
use rand::prelude::*;
use runner::bugbase::{Bug, BugBase};
use runner::cli::SimulatorCLI;
use runner::env::SimulatorEnv;
use runner::execution::{execute_plans, Execution, ExecutionHistory, ExecutionResult};
@@ -15,13 +15,13 @@ use std::backtrace::Backtrace;
use std::io::Write;
use std::path::{Path, PathBuf};
use std::sync::{mpsc, Arc, Mutex};
use tempfile::TempDir;
mod generation;
mod model;
mod runner;
mod shrink;
struct Paths {
base: PathBuf,
db: PathBuf,
plan: PathBuf,
shrunk_plan: PathBuf,
@@ -31,34 +31,16 @@ struct Paths {
}
impl Paths {
fn new(output_dir: &Path, shrink: bool, doublecheck: bool) -> Self {
let paths = Paths {
db: PathBuf::from(output_dir).join("simulator.db"),
plan: PathBuf::from(output_dir).join("simulator.plan"),
shrunk_plan: PathBuf::from(output_dir).join("simulator_shrunk.plan"),
history: PathBuf::from(output_dir).join("simulator.history"),
doublecheck_db: PathBuf::from(output_dir).join("simulator_double.db"),
shrunk_db: PathBuf::from(output_dir).join("simulator_shrunk.db"),
};
// Print the seed, the locations of the database and the plan file
log::info!("database path: {:?}", paths.db);
if doublecheck {
log::info!("doublecheck database path: {:?}", paths.doublecheck_db);
} else if shrink {
log::info!("shrunk database path: {:?}", paths.shrunk_db);
fn new(output_dir: &Path) -> Self {
Paths {
base: output_dir.to_path_buf(),
db: PathBuf::from(output_dir).join("test.db"),
plan: PathBuf::from(output_dir).join("plan.sql"),
shrunk_plan: PathBuf::from(output_dir).join("shrunk.sql"),
history: PathBuf::from(output_dir).join("history.txt"),
doublecheck_db: PathBuf::from(output_dir).join("double.db"),
shrunk_db: PathBuf::from(output_dir).join("shrunk.db"),
}
log::info!("simulator plan path: {:?}", paths.plan);
log::info!(
"simulator plan serialized path: {:?}",
paths.plan.with_extension("plan.json")
);
if shrink {
log::info!("shrunk plan path: {:?}", paths.shrunk_plan);
}
log::info!("simulator history path: {:?}", paths.history);
paths
}
}
@@ -68,45 +50,37 @@ fn main() -> Result<(), String> {
let cli_opts = SimulatorCLI::parse();
cli_opts.validate()?;
let seed = cli_opts.seed.unwrap_or_else(|| thread_rng().next_u64());
let output_dir = match &cli_opts.output_dir {
Some(dir) => Path::new(dir).to_path_buf(),
None => TempDir::new().map_err(|e| format!("{:?}", e))?.into_path(),
};
let mut bugbase = BugBase::load().map_err(|e| format!("{:?}", e))?;
banner();
let paths = Paths::new(&output_dir, cli_opts.shrink, cli_opts.doublecheck);
log::info!("seed: {}", seed);
// let paths = Paths::new(&output_dir, cli_opts.doublecheck);
let last_execution = Arc::new(Mutex::new(Execution::new(0, 0, 0)));
let (env, plans) = setup_simulation(seed, &cli_opts, &paths.db, &paths.plan);
let (seed, env, plans) = setup_simulation(&mut bugbase, &cli_opts, |p| &p.plan, |p| &p.db);
let paths = bugbase.paths(seed);
// Create the output directory if it doesn't exist
if !paths.base.exists() {
std::fs::create_dir_all(&paths.base).map_err(|e| format!("{:?}", e))?;
}
if cli_opts.watch {
watch_mode(seed, &cli_opts, &paths, last_execution.clone()).unwrap();
} else if cli_opts.differential {
differential_testing(env, plans, last_execution.clone())
} else {
run_simulator(&cli_opts, &paths, env, plans, last_execution.clone());
run_simulator(
seed,
&mut bugbase,
&cli_opts,
&paths,
env,
plans,
last_execution.clone(),
);
}
// Print the seed, the locations of the database and the plan file at the end again for easily accessing them.
println!("database path: {:?}", paths.db);
if cli_opts.doublecheck {
println!("doublecheck database path: {:?}", paths.doublecheck_db);
} else if cli_opts.shrink {
println!("shrunk database path: {:?}", paths.shrunk_db);
}
println!("simulator plan path: {:?}", paths.plan);
println!(
"simulator plan serialized path: {:?}",
paths.plan.with_extension("plan.json")
);
if cli_opts.shrink {
println!("shrunk plan path: {:?}", paths.shrunk_plan);
}
println!("simulator history path: {:?}", paths.history);
println!("seed: {}", seed);
Ok(())
@@ -140,7 +114,6 @@ fn watch_mode(
std::panic::catch_unwind(|| {
let plan: Vec<Vec<Interaction>> =
InteractionPlan::compute_via_diff(&paths.plan);
let mut env = SimulatorEnv::new(seed, cli_opts, &paths.db);
plan.iter().for_each(|is| {
is.iter().for_each(|i| {
@@ -173,6 +146,8 @@ fn watch_mode(
}
fn run_simulator(
seed: u64,
bugbase: &mut BugBase,
cli_opts: &SimulatorCLI,
paths: &Paths,
env: SimulatorEnv,
@@ -204,13 +179,17 @@ fn run_simulator(
);
if cli_opts.doublecheck {
doublecheck(env.clone(), paths, &plans, last_execution.clone(), result);
let env = SimulatorEnv::new(seed, cli_opts, &paths.doublecheck_db);
let env = Arc::new(Mutex::new(env));
doublecheck(env, paths, &plans, last_execution.clone(), result);
} else {
// No doublecheck, run shrinking if panicking or found a bug.
match &result {
SandboxedResult::Correct => {
log::info!("simulation succeeded");
println!("simulation succeeded");
// remove the bugbase entry
bugbase.remove_bug(seed).unwrap();
}
SandboxedResult::Panicked {
error,
@@ -240,59 +219,62 @@ fn run_simulator(
log::error!("simulation failed: '{}'", error);
println!("simulation failed: '{}'", error);
if cli_opts.shrink {
log::info!("Starting to shrink");
log::info!("Starting to shrink");
let shrunk_plans = plans
.iter()
.map(|plan| {
let shrunk = plan.shrink_interaction_plan(last_execution);
log::info!("{}", shrunk.stats());
shrunk
})
.collect::<Vec<_>>();
let shrunk_plans = plans
.iter()
.map(|plan| {
let shrunk = plan.shrink_interaction_plan(last_execution);
log::info!("{}", shrunk.stats());
shrunk
})
.collect::<Vec<_>>();
// Write the shrunk plan to a file
let mut f = std::fs::File::create(&paths.shrunk_plan).unwrap();
f.write_all(shrunk_plans[0].to_string().as_bytes()).unwrap();
// Write the shrunk plan to a file
let mut f = std::fs::File::create(&paths.shrunk_plan).unwrap();
f.write_all(shrunk_plans[0].to_string().as_bytes()).unwrap();
let last_execution = Arc::new(Mutex::new(*last_execution));
let last_execution = Arc::new(Mutex::new(*last_execution));
let env = SimulatorEnv::new(seed, cli_opts, &paths.shrunk_db);
let shrunk = SandboxedResult::from(
std::panic::catch_unwind(|| {
run_simulation(
env.clone(),
&mut shrunk_plans.clone(),
last_execution.clone(),
)
}),
last_execution,
);
match (&shrunk, &result) {
(
SandboxedResult::Panicked { error: e1, .. },
SandboxedResult::Panicked { error: e2, .. },
let env = Arc::new(Mutex::new(env));
let shrunk = SandboxedResult::from(
std::panic::catch_unwind(|| {
run_simulation(
env.clone(),
&mut shrunk_plans.clone(),
last_execution.clone(),
)
| (
SandboxedResult::FoundBug { error: e1, .. },
SandboxedResult::FoundBug { error: e2, .. },
) => {
if e1 != e2 {
log::error!(
"shrinking failed, the error was not properly reproduced"
);
} else {
log::info!("shrinking succeeded");
}
}
(_, SandboxedResult::Correct) => {
unreachable!("shrinking should never be called on a correct simulation")
}
_ => {
}),
last_execution,
);
match (&shrunk, &result) {
(
SandboxedResult::Panicked { error: e1, .. },
SandboxedResult::Panicked { error: e2, .. },
)
| (
SandboxedResult::FoundBug { error: e1, .. },
SandboxedResult::FoundBug { error: e2, .. },
) => {
if e1 != e2 {
log::error!("shrinking failed, the error was not properly reproduced");
bugbase.add_bug(seed, plans[0].clone()).unwrap();
} else {
log::info!("shrinking succeeded");
println!("shrinking succeeded");
// Save the shrunk database
bugbase.add_bug(seed, shrunk_plans[0].clone()).unwrap();
}
}
(_, SandboxedResult::Correct) => {
unreachable!("shrinking should never be called on a correct simulation")
}
_ => {
log::error!("shrinking failed, the error was not properly reproduced");
bugbase.add_bug(seed, plans[0].clone()).unwrap();
}
}
}
}
@@ -306,16 +288,6 @@ fn doublecheck(
last_execution: Arc<Mutex<Execution>>,
result: SandboxedResult,
) {
{
let mut env_ = env.lock().unwrap();
env_.db = Database::open_file(
env_.io.clone(),
paths.doublecheck_db.to_str().unwrap(),
false,
)
.unwrap();
}
// Run the simulation again
let result2 = SandboxedResult::from(
std::panic::catch_unwind(|| {
@@ -443,54 +415,71 @@ impl SandboxedResult {
}
fn setup_simulation(
mut seed: u64,
bugbase: &mut BugBase,
cli_opts: &SimulatorCLI,
db_path: &Path,
plan_path: &Path,
) -> (SimulatorEnv, Vec<InteractionPlan>) {
if let Some(load) = &cli_opts.load {
let seed_path = PathBuf::from(load).with_extension("seed");
let seed_str = std::fs::read_to_string(&seed_path).unwrap();
seed = seed_str.parse().unwrap();
}
plan_path: fn(&Paths) -> &Path,
db_path: fn(&Paths) -> &Path,
) -> (u64, SimulatorEnv, Vec<InteractionPlan>) {
if let Some(seed) = &cli_opts.load {
let seed = seed.parse::<u64>().expect("seed should be a number");
let bug = bugbase
.get_bug(seed)
.unwrap_or_else(|| panic!("bug '{}' not found in bug base", seed));
let mut env = SimulatorEnv::new(seed, cli_opts, db_path);
let paths = bugbase.paths(seed);
if !paths.base.exists() {
std::fs::create_dir_all(&paths.base).unwrap();
}
let env = SimulatorEnv::new(bug.seed(), cli_opts, db_path(&paths));
// todo: the loading works correctly because of a hacky decision
// Right now, the plan generation is the only point we use the rng, so the environment doesn't
// even need it. In the future, especially with multi-connections and multi-threading, we might
// use the RNG for more things such as scheduling, so this assumption will fail. When that happens,
// we'll need to reachitect this logic by saving and loading RNG state.
let plans = if let Some(load) = &cli_opts.load {
log::info!("Loading database interaction plan...");
let plan = std::fs::read_to_string(load).unwrap();
let plan: InteractionPlan = serde_json::from_str(&plan).unwrap();
vec![plan]
let plan = match bug {
Bug::Loaded { plan, .. } => plan.clone(),
Bug::Unloaded { seed } => {
let seed = *seed;
bugbase
.load_bug(seed)
.unwrap_or_else(|_| panic!("could not load bug '{}' in bug base", seed))
}
};
std::fs::write(plan_path(&paths), plan.to_string()).unwrap();
std::fs::write(
plan_path(&paths).with_extension("json"),
serde_json::to_string_pretty(&plan).unwrap(),
)
.unwrap();
let plans = vec![plan];
(seed, env, plans)
} else {
let seed = cli_opts.seed.unwrap_or_else(|| {
let mut rng = rand::thread_rng();
rng.next_u64()
});
let paths = bugbase.paths(seed);
if !paths.base.exists() {
std::fs::create_dir_all(&paths.base).unwrap();
}
let mut env = SimulatorEnv::new(seed, cli_opts, &paths.db);
log::info!("Generating database interaction plan...");
(1..=env.opts.max_connections)
let plans = (1..=env.opts.max_connections)
.map(|_| InteractionPlan::arbitrary_from(&mut env.rng.clone(), &mut env))
.collect::<Vec<_>>()
};
.collect::<Vec<_>>();
// todo: for now, we only use 1 connection, so it's safe to use the first plan.
let plan = plans[0].clone();
let mut f = std::fs::File::create(plan_path).unwrap();
// todo: create a detailed plan file with all the plans. for now, we only use 1 connection, so it's safe to use the first plan.
f.write_all(plan.to_string().as_bytes()).unwrap();
let serialized_plan_path = plan_path.with_extension("plan.json");
let mut f = std::fs::File::create(&serialized_plan_path).unwrap();
f.write_all(serde_json::to_string(&plan).unwrap().as_bytes())
// todo: for now, we only use 1 connection, so it's safe to use the first plan.
let plan = &plans[0];
log::info!("{}", plan.stats());
std::fs::write(plan_path(&paths), plan.to_string()).unwrap();
std::fs::write(
plan_path(&paths).with_extension("json"),
serde_json::to_string_pretty(&plan).unwrap(),
)
.unwrap();
let seed_path = plan_path.with_extension("seed");
let mut f = std::fs::File::create(&seed_path).unwrap();
f.write_all(seed.to_string().as_bytes()).unwrap();
log::info!("{}", plan.stats());
(env, plans)
(seed, env, plans)
}
}
fn run_simulation(

241
simulator/runner/bugbase.rs Normal file
View File

@@ -0,0 +1,241 @@
use std::{
collections::HashMap,
io::{self, Write},
path::PathBuf,
process::Command,
};
use crate::{InteractionPlan, Paths};
/// A bug is a run that has been identified as buggy.
#[derive(Clone)]
pub(crate) enum Bug {
Unloaded { seed: u64 },
Loaded { seed: u64, plan: InteractionPlan },
}
impl Bug {
/// Check if the bug is loaded.
pub(crate) fn is_loaded(&self) -> bool {
match self {
Bug::Unloaded { .. } => false,
Bug::Loaded { .. } => true,
}
}
/// Get the seed of the bug.
pub(crate) fn seed(&self) -> u64 {
match self {
Bug::Unloaded { seed } => *seed,
Bug::Loaded { seed, .. } => *seed,
}
}
}
/// Bug Base is a local database of buggy runs.
pub(crate) struct BugBase {
/// Path to the bug base directory.
path: PathBuf,
/// The list of buggy runs, uniquely identified by their seed
bugs: HashMap<u64, Bug>,
}
impl BugBase {
/// Create a new bug base.
fn new(path: PathBuf) -> Result<Self, String> {
let mut bugs = HashMap::new();
// list all the bugs in the path as directories
if let Ok(entries) = std::fs::read_dir(&path) {
for entry in entries.flatten() {
if entry.file_type().is_ok_and(|ft| ft.is_dir()) {
let seed = entry
.file_name()
.to_string_lossy()
.to_string()
.parse::<u64>()
.or(Err(format!(
"failed to parse seed from directory name {}",
entry.file_name().to_string_lossy()
)))?;
bugs.insert(seed, Bug::Unloaded { seed });
}
}
}
Ok(Self { path, bugs })
}
/// Load the bug base from one of the potential paths.
pub(crate) fn load() -> Result<Self, String> {
let potential_paths = vec![
// limbo project directory
BugBase::get_limbo_project_dir()?,
// home directory
dirs::home_dir().ok_or("should be able to get home directory".to_string())?,
// current directory
std::env::current_dir()
.or(Err("should be able to get current directory".to_string()))?,
];
for path in potential_paths {
let path = path.join(".bugbase");
if path.exists() {
return BugBase::new(path);
}
}
println!("select bug base location:");
println!("1. limbo project directory");
println!("2. home directory");
println!("3. current directory");
print!("> ");
io::stdout().flush().unwrap();
let mut choice = String::new();
io::stdin()
.read_line(&mut choice)
.expect("failed to read line");
let choice = choice
.trim()
.parse::<u32>()
.or(Err(format!("invalid choice {choice}")))?;
let path = match choice {
1 => BugBase::get_limbo_project_dir()?.join(".bugbase"),
2 => {
let home = std::env::var("HOME").or(Err("failed to get home directory"))?;
PathBuf::from(home).join(".bugbase")
}
3 => PathBuf::from(".bugbase"),
_ => return Err(format!("invalid choice {choice}")),
};
if path.exists() {
unreachable!("bug base already exists at {}", path.display());
} else {
std::fs::create_dir_all(&path).or(Err("failed to create bug base"))?;
log::info!("bug base created at {}", path.display());
BugBase::new(path)
}
}
/// Add a new bug to the bug base.
pub(crate) fn add_bug(&mut self, seed: u64, plan: InteractionPlan) -> Result<(), String> {
log::debug!("adding bug with seed {}", seed);
if self.bugs.contains_key(&seed) {
return Err(format!("Bug with hash {} already exists", seed));
}
self.save_bug(seed, &plan)?;
self.bugs.insert(seed, Bug::Loaded { seed, plan });
Ok(())
}
/// Get a bug from the bug base.
pub(crate) fn get_bug(&self, seed: u64) -> Option<&Bug> {
self.bugs.get(&seed)
}
/// Save a bug to the bug base.
pub(crate) fn save_bug(&self, seed: u64, plan: &InteractionPlan) -> Result<(), String> {
let bug_path = self.path.join(seed.to_string());
std::fs::create_dir_all(&bug_path)
.or(Err("should be able to create bug directory".to_string()))?;
let seed_path = bug_path.join("seed.txt");
std::fs::write(&seed_path, seed.to_string())
.or(Err("should be able to write seed file".to_string()))?;
// At some point we might want to save the commit hash of the current
// version of Limbo.
// let commit_hash = Self::get_current_commit_hash()?;
// let commit_hash_path = bug_path.join("commit_hash.txt");
// std::fs::write(&commit_hash_path, commit_hash)
// .or(Err("should be able to write commit hash file".to_string()))?;
let plan_path = bug_path.join("plan.json");
std::fs::write(
&plan_path,
serde_json::to_string(plan).or(Err("should be able to serialize plan".to_string()))?,
)
.or(Err("should be able to write plan file".to_string()))?;
let readable_plan_path = bug_path.join("plan.sql");
std::fs::write(&readable_plan_path, plan.to_string())
.or(Err("should be able to write readable plan file".to_string()))?;
Ok(())
}
pub(crate) fn load_bug(&mut self, seed: u64) -> Result<InteractionPlan, String> {
let seed_match = self.bugs.get(&seed);
match seed_match {
None => Err(format!("No bugs found for seed {}", seed)),
Some(Bug::Unloaded { .. }) => {
let plan =
std::fs::read_to_string(self.path.join(seed.to_string()).join("plan.json"))
.or(Err("should be able to read plan file".to_string()))?;
let plan: InteractionPlan = serde_json::from_str(&plan)
.or(Err("should be able to deserialize plan".to_string()))?;
let bug = Bug::Loaded {
seed,
plan: plan.clone(),
};
self.bugs.insert(seed, bug);
log::debug!("Loaded bug with seed {}", seed);
Ok(plan)
}
Some(Bug::Loaded { plan, .. }) => {
log::warn!(
"Bug with seed {} is already loaded, returning the existing plan",
seed
);
Ok(plan.clone())
}
}
}
pub(crate) fn remove_bug(&mut self, seed: u64) -> Result<(), String> {
self.bugs.remove(&seed);
std::fs::remove_dir_all(self.path.join(seed.to_string()))
.or(Err("should be able to remove bug directory".to_string()))?;
log::debug!("Removed bug with seed {}", seed);
Ok(())
}
}
impl BugBase {
/// Get the path to the bug base directory.
pub(crate) fn path(&self) -> &PathBuf {
&self.path
}
/// Get the path to the database file for a given seed.
pub(crate) fn db_path(&self, seed: u64) -> PathBuf {
self.path.join(format!("{}/test.db", seed))
}
/// Get paths to all the files for a given seed.
pub(crate) fn paths(&self, seed: u64) -> Paths {
let base = self.path.join(format!("{}/", seed));
Paths::new(&base)
}
}
impl BugBase {
pub(crate) fn get_limbo_project_dir() -> Result<PathBuf, String> {
Ok(PathBuf::from(
String::from_utf8(
Command::new("git")
.args(["rev-parse", "--git-dir"])
.output()
.or(Err("should be able to get the git path".to_string()))?
.stdout,
)
.or(Err("commit hash should be valid utf8".to_string()))?
.trim()
.strip_suffix(".git")
.ok_or("should be able to strip .git suffix".to_string())?,
))
}
}

View File

@@ -6,8 +6,6 @@ use clap::{command, Parser};
pub struct SimulatorCLI {
#[clap(short, long, help = "set seed for reproducible runs", default_value = None)]
pub seed: Option<u64>,
#[clap(short, long, help = "set custom output directory for produced files", default_value = None)]
pub output_dir: Option<String>,
#[clap(
short,
long,
@@ -35,13 +33,7 @@ pub struct SimulatorCLI {
default_value_t = 60 * 60 // default to 1 hour
)]
pub maximum_time: usize,
#[clap(
short = 'm',
long,
help = "minimize(shrink) the failing counterexample"
)]
pub shrink: bool,
#[clap(short = 'l', long, help = "load plan from a file")]
#[clap(short = 'l', long, help = "load plan from the bug base")]
pub load: Option<String>,
#[clap(
short = 'w',
@@ -66,14 +58,8 @@ impl SimulatorCLI {
return Err("Minimum size cannot be greater than maximum size".to_string());
}
// Make sure incompatible options are not set
if self.shrink && self.doublecheck {
return Err("Cannot use shrink and doublecheck at the same time".to_string());
}
if let Some(plan_path) = &self.load {
std::fs::File::open(plan_path)
.map_err(|_| format!("Plan file '{}' could not be opened", plan_path))?;
if self.seed.is_some() && self.load.is_some() {
return Err("Cannot set seed and load plan at the same time".to_string());
}
Ok(())

View File

@@ -85,6 +85,7 @@ impl SimulatorEnv {
// Remove existing database file if it exists
if db_path.exists() {
std::fs::remove_file(db_path).unwrap();
std::fs::remove_file(db_path.with_extension("db-wal")).unwrap();
}
let db = match Database::open_file(io.clone(), db_path.to_str().unwrap(), false) {

View File

@@ -68,7 +68,12 @@ pub(crate) fn execute_plans(
// Pick the connection to interact with
let connection_index = pick_index(env.connections.len(), &mut env.rng);
let state = &mut states[connection_index];
std::thread::sleep(std::time::Duration::from_millis(
std::env::var("TICK_SLEEP")
.unwrap_or("0".into())
.parse()
.unwrap_or(0),
));
history.history.push(Execution::new(
connection_index,
state.interaction_pointer,
@@ -121,6 +126,7 @@ fn execute_plan(
} else {
match execute_interaction(env, connection_index, interaction, &mut state.stack) {
Ok(next_execution) => {
interaction.shadow(env);
log::debug!("connection {} processed", connection_index);
// Move to the next interaction or property
match next_execution {

View File

@@ -1,3 +1,4 @@
pub mod bugbase;
pub mod cli;
pub mod differential;
pub mod env;