diff --git a/Cargo.lock b/Cargo.lock index e7365d1e9..0f19f5586 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -782,6 +782,18 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "filetime" +version = "0.2.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35c0522e981e68cbfa8c3f978441a5f34b30b96e146b33cd3359176b50fe8586" +dependencies = [ + "cfg-if", + "libc", + "libredox", + "windows-sys 0.59.0", +] + [[package]] name = "findshlibs" version = "0.10.2" @@ -800,6 +812,15 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c2141d6d6c8512188a7891b4b01590a45f6dac67afb4f255c4124dbb86d4eaa" +[[package]] +name = "fsevent-sys" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76ee7a02da4d231650c7cea31349b889be2f45ddb3ef3032d2ec8185f6313fd2" +dependencies = [ + "libc", +] + [[package]] name = "futures" version = "0.3.31" @@ -1056,6 +1077,26 @@ dependencies = [ "str_stack", ] +[[package]] +name = "inotify" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f37dccff2791ab604f9babef0ba14fbe0be30bd368dc541e2b08d07c8aa908f3" +dependencies = [ + "bitflags 2.8.0", + "inotify-sys", + "libc", +] + +[[package]] +name = "inotify-sys" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e05c02b5e89bff3b946cedeca278abc628fe811e604f027c45a8aa3cf793d0eb" +dependencies = [ + "libc", +] + [[package]] name = "io-uring" version = "0.6.4" @@ -1170,6 +1211,26 @@ dependencies = [ "chrono", ] +[[package]] +name = "kqueue" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7447f1ca1b7b563588a205fe93dea8df60fd981423a768bc1c0ded35ed147d0c" +dependencies = [ + "kqueue-sys", + "libc", +] + +[[package]] +name = "kqueue-sys" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed9625ffda8729b85e45cf04090035ac368927b8cebc34898e7c120f52e4838b" +dependencies = [ + "bitflags 1.3.2", + "libc", +] + [[package]] name = "lazy_static" version = "1.5.0" @@ -1210,6 +1271,7 @@ checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" dependencies = [ "bitflags 2.8.0", "libc", + "redox_syscall", ] [[package]] @@ -1343,8 +1405,11 @@ dependencies = [ "env_logger 0.10.2", "limbo_core", "log", + "notify", "rand", "rand_chacha", + "serde", + "serde_json", "tempfile", ] @@ -1475,6 +1540,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" dependencies = [ "libc", + "log", "wasi", "windows-sys 0.52.0", ] @@ -1558,6 +1624,31 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "notify" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fee8403b3d66ac7b26aee6e40a897d85dc5ce26f44da36b8b73e987cc52e943" +dependencies = [ + "bitflags 2.8.0", + "filetime", + "fsevent-sys", + "inotify", + "kqueue", + "libc", + "log", + "mio", + "notify-types", + "walkdir", + "windows-sys 0.59.0", +] + +[[package]] +name = "notify-types" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e0826a989adedc2a244799e823aece04662b66609d96af8dff7ac6df9a8925d" + [[package]] name = "num-format" version = "0.4.4" diff --git a/simulator/Cargo.toml b/simulator/Cargo.toml index 31a54f1e6..43956e5e2 100644 --- a/simulator/Cargo.toml +++ b/simulator/Cargo.toml @@ -23,3 +23,6 @@ tempfile = "3.0.7" env_logger = "0.10.1" anarchist-readable-name-generator-lib = "0.1.2" clap = { version = "4.5", features = ["derive"] } +serde = { version = "1.0", features = ["derive"] } +serde_json = { version = "1.0" } +notify = "8.0.0" diff --git a/simulator/generation/plan.rs b/simulator/generation/plan.rs index 8cd484cd2..3b124b8f8 100644 --- a/simulator/generation/plan.rs +++ b/simulator/generation/plan.rs @@ -1,13 +1,15 @@ -use std::{fmt::Display, rc::Rc, vec}; +use std::{fmt::Display, path::Path, rc::Rc, vec}; use limbo_core::{Connection, Result, StepResult}; +use serde::{Deserialize, Serialize}; use crate::{ model::{ query::{Create, Insert, Query, Select}, table::Value, }, - SimConnection, SimulatorEnv, + runner::env::SimConnection, + SimulatorEnv, }; use crate::generation::{frequency, Arbitrary, ArbitraryFrom}; @@ -19,18 +21,81 @@ use super::{ pub(crate) type ResultSet = Result>>; -#[derive(Clone)] +#[derive(Clone, Serialize, Deserialize)] pub(crate) struct InteractionPlan { pub(crate) plan: Vec, } +impl InteractionPlan { + /// Compute via diff computes a a plan from a given `.plan` file without the need to parse + /// sql. This is possible because there are two versions of the plan file, one that is human + /// readable and one that is serialized as JSON. Under watch mode, the users will be able to + /// delete interactions from the human readable file, and this function uses the JSON file as + /// a baseline to detect with interactions were deleted and constructs the plan from the + /// remaining interactions. + pub(crate) fn compute_via_diff(plan_path: &Path) -> Vec> { + let interactions = std::fs::read_to_string(plan_path).unwrap(); + let interactions = interactions.lines().collect::>(); + + let plan: InteractionPlan = serde_json::from_str( + std::fs::read_to_string(plan_path.with_extension("plan.json")) + .unwrap() + .as_str(), + ) + .unwrap(); + + let mut plan = plan + .plan + .into_iter() + .map(|i| i.interactions()) + .collect::>(); + + let (mut i, mut j1, mut j2) = (0, 0, 0); + + while i < interactions.len() && j1 < plan.len() { + if interactions[i].starts_with("-- begin") + || interactions[i].starts_with("-- end") + || interactions[i].is_empty() + { + i += 1; + continue; + } + + if interactions[i].contains(plan[j1][j2].to_string().as_str()) { + i += 1; + if j2 + 1 < plan[j1].len() { + j2 += 1; + } else { + j1 += 1; + j2 = 0; + } + } else { + plan[j1].remove(j2); + + if plan[j1].is_empty() { + plan.remove(j1); + j2 = 0; + } + } + } + if j1 < plan.len() { + if j2 < plan[j1].len() { + let _ = plan[j1].split_off(j2); + } + let _ = plan.split_off(j1); + } + + plan + } +} + pub(crate) struct InteractionPlanState { pub(crate) stack: Vec, pub(crate) interaction_pointer: usize, pub(crate) secondary_pointer: usize, } -#[derive(Clone)] +#[derive(Clone, Serialize, Deserialize)] pub(crate) enum Interactions { Property(Property), Query(Query), @@ -108,12 +173,12 @@ impl Display for InteractionPlan { match interaction { Interaction::Query(query) => writeln!(f, "{};", query)?, Interaction::Assumption(assumption) => { - writeln!(f, "-- ASSUME: {};", assumption.message)? + writeln!(f, "-- ASSUME {};", assumption.message)? } Interaction::Assertion(assertion) => { - writeln!(f, "-- ASSERT: {};", assertion.message)? + writeln!(f, "-- ASSERT {};", assertion.message)? } - Interaction::Fault(fault) => writeln!(f, "-- FAULT: {};", fault)?, + Interaction::Fault(fault) => writeln!(f, "-- FAULT '{}';", fault)?, } } writeln!(f, "-- end testing '{}'", name)?; @@ -160,9 +225,9 @@ impl Display for Interaction { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::Query(query) => write!(f, "{}", query), - Self::Assumption(assumption) => write!(f, "ASSUME: {}", assumption.message), - Self::Assertion(assertion) => write!(f, "ASSERT: {}", assertion.message), - Self::Fault(fault) => write!(f, "FAULT: {}", fault), + Self::Assumption(assumption) => write!(f, "ASSUME {}", assumption.message), + Self::Assertion(assertion) => write!(f, "ASSERT {}", assertion.message), + Self::Fault(fault) => write!(f, "FAULT '{}'", fault), } } } @@ -178,7 +243,7 @@ pub(crate) struct Assertion { pub(crate) message: String, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize, Deserialize)] pub(crate) enum Fault { Disconnect, } @@ -195,6 +260,29 @@ impl Interactions { pub(crate) fn shadow(&self, env: &mut SimulatorEnv) { match self { Interactions::Property(property) => { + match property { + Property::InsertSelect { + insert, + row_index: _, + queries, + select, + } => { + insert.shadow(env); + for query in queries { + query.shadow(env); + } + select.shadow(env); + } + Property::DoubleCreateFailure { create, queries } => { + if env.tables.iter().any(|t| t.name == create.table.name) { + return; + } + create.shadow(env); + for query in queries { + query.shadow(env); + } + } + } for interaction in property.interactions() { match interaction { Interaction::Query(query) => match query { @@ -220,23 +308,7 @@ impl Interactions { } } } - Interactions::Query(query) => match query { - Query::Create(create) => { - if !env.tables.iter().any(|t| t.name == create.table.name) { - env.tables.push(create.table.clone()); - } - } - Query::Insert(insert) => { - let table = env - .tables - .iter_mut() - .find(|t| t.name == insert.table) - .unwrap(); - table.rows.extend(insert.values.clone()); - } - Query::Delete(_) => todo!(), - Query::Select(_) => {} - }, + Interactions::Query(query) => query.shadow(env), Interactions::Fault(_) => {} } } @@ -317,6 +389,14 @@ impl ArbitraryFrom<&mut SimulatorEnv> for InteractionPlan { } impl Interaction { + pub(crate) fn shadow(&self, env: &mut SimulatorEnv) { + match self { + Self::Query(query) => query.shadow(env), + Self::Assumption(_) => {} + Self::Assertion(_) => {} + Self::Fault(_) => {} + } + } pub(crate) fn execute_query(&self, conn: &mut Rc) -> ResultSet { if let Self::Query(query) = self { let query_str = query.to_string(); diff --git a/simulator/generation/property.rs b/simulator/generation/property.rs index c902aca9d..bfa1e1ed5 100644 --- a/simulator/generation/property.rs +++ b/simulator/generation/property.rs @@ -1,4 +1,5 @@ use limbo_core::LimboError; +use serde::{Deserialize, Serialize}; use crate::{ model::{ @@ -16,7 +17,7 @@ use super::{ /// Properties are representations of executable specifications /// about the database behavior. -#[derive(Clone)] +#[derive(Clone, Serialize, Deserialize)] pub(crate) enum Property { /// Insert-Select is a property in which the inserted row /// must be in the resulting rows of a select query that has a @@ -103,7 +104,6 @@ impl Property { let assertion = Interaction::Assertion(Assertion { message: format!( - // todo: add the part inserting ({} = {})", "row [{:?}] not found in table {}", row.iter().map(|v| v.to_string()).collect::>(), insert.table, diff --git a/simulator/main.rs b/simulator/main.rs index cb182ce5d..82c39b809 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -1,20 +1,20 @@ #![allow(clippy::arc_with_non_send_sync, dead_code)] use clap::Parser; -use core::panic; -use generation::plan::{InteractionPlan, InteractionPlanState}; +use generation::plan::{Interaction, InteractionPlan, InteractionPlanState}; use generation::ArbitraryFrom; use limbo_core::Database; +use notify::event::{DataChange, ModifyKind}; +use notify::{EventKind, RecursiveMode, Watcher}; use rand::prelude::*; -use rand_chacha::ChaCha8Rng; use runner::cli::SimulatorCLI; -use runner::env::{SimConnection, SimulatorEnv, SimulatorOpts}; +use runner::env::SimulatorEnv; use runner::execution::{execute_plans, Execution, ExecutionHistory, ExecutionResult}; -use runner::io::SimulatorIO; +use runner::watch; use std::any::Any; use std::backtrace::Backtrace; use std::io::Write; use std::path::{Path, PathBuf}; -use std::sync::{Arc, Mutex}; +use std::sync::{mpsc, Arc, Mutex}; use tempfile::TempDir; mod generation; @@ -49,6 +49,10 @@ impl Paths { log::info!("shrunk database path: {:?}", paths.shrunk_db); } log::info!("simulator plan path: {:?}", paths.plan); + log::info!( + "simulator plan serialized path: {:?}", + paths.plan.with_extension("plan.json") + ); if shrink { log::info!("shrunk plan path: {:?}", paths.shrunk_plan); } @@ -77,7 +81,85 @@ fn main() -> Result<(), String> { log::info!("seed: {}", seed); let last_execution = Arc::new(Mutex::new(Execution::new(0, 0, 0))); + let (env, plans) = setup_simulation(seed, &cli_opts, &paths.db, &paths.plan); + if cli_opts.watch { + watch_mode(seed, &cli_opts, &paths, last_execution.clone()).unwrap(); + } else { + run_simulator(seed, &cli_opts, &paths, env, plans, last_execution.clone()); + } + + Ok(()) +} + +fn watch_mode( + seed: u64, + cli_opts: &SimulatorCLI, + paths: &Paths, + last_execution: Arc>, +) -> notify::Result<()> { + let (tx, rx) = mpsc::channel::>(); + println!("watching {:?}", paths.plan); + // Use recommended_watcher() to automatically select the best implementation + // for your platform. The `EventHandler` passed to this constructor can be a + // closure, a `std::sync::mpsc::Sender`, a `crossbeam_channel::Sender`, or + // another type the trait is implemented for. + let mut watcher = notify::recommended_watcher(tx)?; + + // Add a path to be watched. All files and directories at that path and + // below will be monitored for changes. + watcher.watch(&paths.plan, RecursiveMode::NonRecursive)?; + // Block forever, printing out events as they come in + for res in rx { + match res { + Ok(event) => { + if let EventKind::Modify(ModifyKind::Data(DataChange::Content)) = event.kind { + log::info!("plan file modified, rerunning simulation"); + + let result = SandboxedResult::from( + std::panic::catch_unwind(|| { + let plan: Vec> = + InteractionPlan::compute_via_diff(&paths.plan); + + let mut env = SimulatorEnv::new(seed, cli_opts, &paths.db); + plan.iter().for_each(|is| { + is.iter().for_each(|i| { + i.shadow(&mut env); + }); + }); + let env = Arc::new(Mutex::new(env.clone())); + watch::run_simulation(env, &mut [plan], last_execution.clone()) + }), + last_execution.clone(), + ); + match result { + SandboxedResult::Correct => { + log::info!("simulation succeeded"); + println!("simulation succeeded"); + } + SandboxedResult::Panicked { error, .. } + | SandboxedResult::FoundBug { error, .. } => { + log::error!("simulation failed: '{}'", error); + println!("simulation failed: '{}'", error); + } + } + } + } + Err(e) => println!("watch error: {:?}", e), + } + } + + Ok(()) +} + +fn run_simulator( + seed: u64, + cli_opts: &SimulatorCLI, + paths: &Paths, + env: SimulatorEnv, + plans: Vec, + last_execution: Arc>, +) { std::panic::set_hook(Box::new(move |info| { log::error!("panic occurred"); @@ -94,81 +176,22 @@ fn main() -> Result<(), String> { log::error!("captured backtrace:\n{}", bt); })); + let env = Arc::new(Mutex::new(env)); let result = SandboxedResult::from( std::panic::catch_unwind(|| { - run_simulation( - seed, - &cli_opts, - &paths.db, - &paths.plan, - last_execution.clone(), - None, - ) + run_simulation(env.clone(), &mut plans.clone(), last_execution.clone()) }), last_execution.clone(), ); if cli_opts.doublecheck { - // Run the simulation again - let result2 = SandboxedResult::from( - std::panic::catch_unwind(|| { - run_simulation( - seed, - &cli_opts, - &paths.doublecheck_db, - &paths.plan, - last_execution.clone(), - None, - ) - }), - last_execution.clone(), - ); - - match (result, result2) { - (SandboxedResult::Correct, SandboxedResult::Panicked { .. }) => { - log::error!("doublecheck failed! first run succeeded, but second run panicked."); - } - (SandboxedResult::FoundBug { .. }, SandboxedResult::Panicked { .. }) => { - log::error!( - "doublecheck failed! first run failed an assertion, but second run panicked." - ); - } - (SandboxedResult::Panicked { .. }, SandboxedResult::Correct) => { - log::error!("doublecheck failed! first run panicked, but second run succeeded."); - } - (SandboxedResult::Panicked { .. }, SandboxedResult::FoundBug { .. }) => { - log::error!( - "doublecheck failed! first run panicked, but second run failed an assertion." - ); - } - (SandboxedResult::Correct, SandboxedResult::FoundBug { .. }) => { - log::error!( - "doublecheck failed! first run succeeded, but second run failed an assertion." - ); - } - (SandboxedResult::FoundBug { .. }, SandboxedResult::Correct) => { - log::error!( - "doublecheck failed! first run failed an assertion, but second run succeeded." - ); - } - (SandboxedResult::Correct, SandboxedResult::Correct) - | (SandboxedResult::FoundBug { .. }, SandboxedResult::FoundBug { .. }) - | (SandboxedResult::Panicked { .. }, SandboxedResult::Panicked { .. }) => { - // Compare the two database files byte by byte - let db_bytes = std::fs::read(&paths.db).unwrap(); - let doublecheck_db_bytes = std::fs::read(&paths.doublecheck_db).unwrap(); - if db_bytes != doublecheck_db_bytes { - log::error!("doublecheck failed! database files are different."); - } else { - log::info!("doublecheck succeeded! database files are the same."); - } - } - } + doublecheck(env.clone(), paths, &plans, last_execution.clone(), result); } else { // No doublecheck, run shrinking if panicking or found a bug. match &result { SandboxedResult::Correct => { log::info!("simulation succeeded"); + println!("simulation succeeded"); } SandboxedResult::Panicked { error, @@ -196,21 +219,28 @@ fn main() -> Result<(), String> { } log::error!("simulation failed: '{}'", error); + println!("simulation failed: '{}'", error); if cli_opts.shrink { log::info!("Starting to shrink"); - let shrink = Some(last_execution); + + let shrunk_plans = plans + .iter() + .map(|plan| { + let shrunk = plan.shrink_interaction_plan(last_execution); + log::info!("{}", shrunk.stats()); + shrunk + }) + .collect::>(); + let last_execution = Arc::new(Mutex::new(*last_execution)); let shrunk = SandboxedResult::from( std::panic::catch_unwind(|| { run_simulation( - seed, - &cli_opts, - &paths.shrunk_db, - &paths.shrunk_plan, + env.clone(), + &mut shrunk_plans.clone(), last_execution.clone(), - shrink, ) }), last_execution, @@ -258,35 +288,78 @@ fn main() -> Result<(), String> { println!("shrunk database path: {:?}", paths.shrunk_db); } println!("simulator plan path: {:?}", paths.plan); + println!( + "simulator plan serialized path: {:?}", + paths.plan.with_extension("plan.json") + ); if cli_opts.shrink { println!("shrunk plan path: {:?}", paths.shrunk_plan); } println!("simulator history path: {:?}", paths.history); println!("seed: {}", seed); - - Ok(()) } -fn move_db_and_plan_files(output_dir: &Path) { - let old_db_path = output_dir.join("simulator.db"); - let old_plan_path = output_dir.join("simulator.plan"); +fn doublecheck( + env: Arc>, + paths: &Paths, + plans: &[InteractionPlan], + last_execution: Arc>, + result: SandboxedResult, +) { + { + let mut env_ = env.lock().unwrap(); + env_.db = + Database::open_file(env_.io.clone(), paths.doublecheck_db.to_str().unwrap()).unwrap(); + } - let new_db_path = output_dir.join("simulator_double.db"); - let new_plan_path = output_dir.join("simulator_double.plan"); + // Run the simulation again + let result2 = SandboxedResult::from( + std::panic::catch_unwind(|| { + run_simulation(env.clone(), &mut plans.to_owned(), last_execution.clone()) + }), + last_execution.clone(), + ); - std::fs::rename(&old_db_path, &new_db_path).unwrap(); - std::fs::rename(&old_plan_path, &new_plan_path).unwrap(); -} - -fn revert_db_and_plan_files(output_dir: &Path) { - let old_db_path = output_dir.join("simulator.db"); - let old_plan_path = output_dir.join("simulator.plan"); - - let new_db_path = output_dir.join("simulator_double.db"); - let new_plan_path = output_dir.join("simulator_double.plan"); - - std::fs::rename(&new_db_path, &old_db_path).unwrap(); - std::fs::rename(&new_plan_path, &old_plan_path).unwrap(); + match (result, result2) { + (SandboxedResult::Correct, SandboxedResult::Panicked { .. }) => { + log::error!("doublecheck failed! first run succeeded, but second run panicked."); + } + (SandboxedResult::FoundBug { .. }, SandboxedResult::Panicked { .. }) => { + log::error!( + "doublecheck failed! first run failed an assertion, but second run panicked." + ); + } + (SandboxedResult::Panicked { .. }, SandboxedResult::Correct) => { + log::error!("doublecheck failed! first run panicked, but second run succeeded."); + } + (SandboxedResult::Panicked { .. }, SandboxedResult::FoundBug { .. }) => { + log::error!( + "doublecheck failed! first run panicked, but second run failed an assertion." + ); + } + (SandboxedResult::Correct, SandboxedResult::FoundBug { .. }) => { + log::error!( + "doublecheck failed! first run succeeded, but second run failed an assertion." + ); + } + (SandboxedResult::FoundBug { .. }, SandboxedResult::Correct) => { + log::error!( + "doublecheck failed! first run failed an assertion, but second run succeeded." + ); + } + (SandboxedResult::Correct, SandboxedResult::Correct) + | (SandboxedResult::FoundBug { .. }, SandboxedResult::FoundBug { .. }) + | (SandboxedResult::Panicked { .. }, SandboxedResult::Panicked { .. }) => { + // Compare the two database files byte by byte + let db_bytes = std::fs::read(&paths.db).unwrap(); + let doublecheck_db_bytes = std::fs::read(&paths.doublecheck_db).unwrap(); + if db_bytes != doublecheck_db_bytes { + log::error!("doublecheck failed! database files are different."); + } else { + log::info!("doublecheck succeeded! database files are the same."); + } + } + } } #[derive(Debug)] @@ -342,67 +415,64 @@ impl SandboxedResult { } } -fn run_simulation( - seed: u64, +fn setup_simulation( + mut seed: u64, cli_opts: &SimulatorCLI, db_path: &Path, plan_path: &Path, +) -> (SimulatorEnv, Vec) { + if let Some(load) = &cli_opts.load { + let seed_path = PathBuf::from(load).with_extension("seed"); + let seed_str = std::fs::read_to_string(&seed_path).unwrap(); + seed = seed_str.parse().unwrap(); + } + + let mut env = SimulatorEnv::new(seed, cli_opts, db_path); + + // todo: the loading works correctly because of a hacky decision + // Rigth now, the plan generation is the only point we use the rng, so the environment doesn't + // even need it. In the future, especially with multi-connections and multi-threading, we might + // use the RNG for more things such as scheduling, so this assumption will fail. When that happens, + // we'll need to reachitect this logic by saving and loading RNG state. + let plans = if let Some(load) = &cli_opts.load { + log::info!("Loading database interaction plan..."); + let plan = std::fs::read_to_string(load).unwrap(); + let plan: InteractionPlan = serde_json::from_str(&plan).unwrap(); + vec![plan] + } else { + log::info!("Generating database interaction plan..."); + (1..=env.opts.max_connections) + .map(|_| InteractionPlan::arbitrary_from(&mut env.rng.clone(), &mut env)) + .collect::>() + }; + + // todo: for now, we only use 1 connection, so it's safe to use the first plan. + let plan = plans[0].clone(); + + let mut f = std::fs::File::create(plan_path).unwrap(); + // todo: create a detailed plan file with all the plans. for now, we only use 1 connection, so it's safe to use the first plan. + f.write_all(plan.to_string().as_bytes()).unwrap(); + + let serialized_plan_path = plan_path.with_extension("plan.json"); + let mut f = std::fs::File::create(&serialized_plan_path).unwrap(); + f.write_all(serde_json::to_string(&plan).unwrap().as_bytes()) + .unwrap(); + + let seed_path = plan_path.with_extension("seed"); + let mut f = std::fs::File::create(&seed_path).unwrap(); + f.write_all(seed.to_string().as_bytes()).unwrap(); + + log::info!("{}", plan.stats()); + (env, plans) +} + +fn run_simulation( + env: Arc>, + plans: &mut [InteractionPlan], last_execution: Arc>, - shrink: Option<&Execution>, ) -> ExecutionResult { - let mut rng = ChaCha8Rng::seed_from_u64(seed); + log::info!("Executing database interaction plan..."); - let (create_percent, read_percent, write_percent, delete_percent) = { - let mut remaining = 100.0; - let read_percent = rng.gen_range(0.0..=remaining); - remaining -= read_percent; - let write_percent = rng.gen_range(0.0..=remaining); - remaining -= write_percent; - let delete_percent = remaining; - - let create_percent = write_percent / 10.0; - let write_percent = write_percent - create_percent; - - (create_percent, read_percent, write_percent, delete_percent) - }; - - let opts = SimulatorOpts { - ticks: rng.gen_range(cli_opts.minimum_size..=cli_opts.maximum_size), - max_connections: 1, // TODO: for now let's use one connection as we didn't implement - // correct transactions procesing - max_tables: rng.gen_range(0..128), - create_percent, - read_percent, - write_percent, - delete_percent, - page_size: 4096, // TODO: randomize this too - max_interactions: rng.gen_range(cli_opts.minimum_size..=cli_opts.maximum_size), - max_time_simulation: cli_opts.maximum_time, - }; - let io = Arc::new(SimulatorIO::new(seed, opts.page_size).unwrap()); - - let db = match Database::open_file(io.clone(), db_path.to_str().unwrap()) { - Ok(db) => db, - Err(e) => { - panic!("error opening simulator test file {:?}: {:?}", db_path, e); - } - }; - - let connections = vec![SimConnection::Disconnected; opts.max_connections]; - - let mut env = SimulatorEnv { - opts, - tables: Vec::new(), - connections, - rng, - io, - db, - }; - - log::info!("Generating database interaction plan..."); - let mut plans = (1..=env.opts.max_connections) - .map(|_| InteractionPlan::arbitrary_from(&mut env.rng.clone(), &mut env)) - .collect::>(); let mut states = plans .iter() .map(|_| InteractionPlanState { @@ -411,27 +481,9 @@ fn run_simulation( secondary_pointer: 0, }) .collect::>(); + let result = execute_plans(env.clone(), plans, &mut states, last_execution); - let plan = if let Some(failing_execution) = shrink { - // todo: for now, we only use 1 connection, so it's safe to use the first plan. - println!("Interactions Before: {}", plans[0].plan.len()); - let shrunk = plans[0].shrink_interaction_plan(failing_execution); - println!("Interactions After: {}", shrunk.plan.len()); - shrunk - } else { - plans[0].clone() - }; - - let mut f = std::fs::File::create(plan_path).unwrap(); - // todo: create a detailed plan file with all the plans. for now, we only use 1 connection, so it's safe to use the first plan. - f.write_all(plan.to_string().as_bytes()).unwrap(); - - log::info!("{}", plan.stats()); - - log::info!("Executing database interaction plan..."); - - let result = execute_plans(&mut env, &mut plans, &mut states, last_execution); - + let env = env.lock().unwrap(); env.io.print_stats(); log::info!("Simulation completed"); diff --git a/simulator/model/query.rs b/simulator/model/query.rs index 9138b1988..f03bbde6f 100644 --- a/simulator/model/query.rs +++ b/simulator/model/query.rs @@ -1,8 +1,13 @@ use std::fmt::Display; -use crate::model::table::{Table, Value}; +use serde::{Deserialize, Serialize}; -#[derive(Clone, Debug, PartialEq)] +use crate::{ + model::table::{Table, Value}, + runner::env::SimulatorEnv, +}; + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub(crate) enum Predicate { And(Vec), // p1 AND p2 AND p3... AND pn Or(Vec), // p1 OR p2 OR p3... OR pn @@ -83,7 +88,7 @@ impl Display for Predicate { } // This type represents the potential queries on the database. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize, Deserialize)] pub(crate) enum Query { Create(Create), Select(Select), @@ -108,30 +113,65 @@ impl Query { | Query::Delete(Delete { table, .. }) => vec![table.clone()], } } + + pub(crate) fn shadow(&self, env: &mut SimulatorEnv) { + match self { + Query::Create(create) => create.shadow(env), + Query::Insert(insert) => insert.shadow(env), + Query::Delete(delete) => delete.shadow(env), + Query::Select(select) => select.shadow(env), + } + } } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize, Deserialize)] pub(crate) struct Create { pub(crate) table: Table, } -#[derive(Clone, Debug, PartialEq)] +impl Create { + pub(crate) fn shadow(&self, env: &mut SimulatorEnv) { + if !env.tables.iter().any(|t| t.name == self.table.name) { + env.tables.push(self.table.clone()); + } + } +} + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub(crate) struct Select { pub(crate) table: String, pub(crate) predicate: Predicate, } -#[derive(Clone, Debug, PartialEq)] +impl Select { + pub(crate) fn shadow(&self, _env: &mut SimulatorEnv) {} +} + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub(crate) struct Insert { pub(crate) table: String, pub(crate) values: Vec>, } -#[derive(Clone, Debug, PartialEq)] +impl Insert { + pub(crate) fn shadow(&self, env: &mut SimulatorEnv) { + if let Some(t) = env.tables.iter_mut().find(|t| t.name == self.table) { + t.rows.extend(self.values.clone()); + } + } +} + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub(crate) struct Delete { pub(crate) table: String, pub(crate) predicate: Predicate, } +impl Delete { + pub(crate) fn shadow(&self, _env: &mut SimulatorEnv) { + todo!() + } +} + impl Display for Query { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { diff --git a/simulator/model/table.rs b/simulator/model/table.rs index ab3b003af..ff3e2e5bf 100644 --- a/simulator/model/table.rs +++ b/simulator/model/table.rs @@ -1,5 +1,7 @@ use std::{fmt::Display, ops::Deref}; +use serde::{Deserialize, Serialize}; + pub(crate) struct Name(pub(crate) String); impl Deref for Name { @@ -10,14 +12,14 @@ impl Deref for Name { } } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize, Deserialize)] pub(crate) struct Table { pub(crate) rows: Vec>, pub(crate) name: String, pub(crate) columns: Vec, } -#[allow(dead_code)] -#[derive(Debug, Clone)] + +#[derive(Debug, Clone, Serialize, Deserialize)] pub(crate) struct Column { pub(crate) name: String, pub(crate) column_type: ColumnType, @@ -25,7 +27,7 @@ pub(crate) struct Column { pub(crate) unique: bool, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize, Deserialize)] pub(crate) enum ColumnType { Integer, Float, @@ -44,10 +46,30 @@ impl Display for ColumnType { } } -#[derive(Clone, Debug, PartialEq)] +fn float_to_string(float: &f64, serializer: S) -> Result +where + S: serde::Serializer, +{ + serializer.serialize_str(&format!("{}", float)) +} + +fn string_to_float<'de, D>(deserializer: D) -> Result +where + D: serde::Deserializer<'de>, +{ + let s = String::deserialize(deserializer)?; + s.parse().map_err(serde::de::Error::custom) +} + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub(crate) enum Value { Null, Integer(i64), + // we use custom serialization to preserve float precision + #[serde( + serialize_with = "float_to_string", + deserialize_with = "string_to_float" + )] Float(f64), Text(String), Blob(Vec), diff --git a/simulator/runner/cli.rs b/simulator/runner/cli.rs index b4a6d94f1..93a14849f 100644 --- a/simulator/runner/cli.rs +++ b/simulator/runner/cli.rs @@ -41,6 +41,14 @@ pub struct SimulatorCLI { help = "minimize(shrink) the failing counterexample" )] pub shrink: bool, + #[clap(short = 'l', long, help = "load plan from a file")] + pub load: Option, + #[clap( + short = 'w', + long, + help = "enable watch mode that reruns the simulation on file changes" + )] + pub watch: bool, } impl SimulatorCLI { @@ -51,9 +59,21 @@ impl SimulatorCLI { if self.maximum_size < 1 { return Err("maximum size must be at least 1".to_string()); } + // todo: fix an issue here where if minimum size is not defined, it prevents setting low maximum sizes. if self.minimum_size > self.maximum_size { return Err("Minimum size cannot be greater than maximum size".to_string()); } + + // Make sure uncompatible options are not set + if self.shrink && self.doublecheck { + return Err("Cannot use shrink and doublecheck at the same time".to_string()); + } + + if let Some(plan_path) = &self.load { + std::fs::File::open(plan_path) + .map_err(|_| format!("Plan file '{}' could not be opened", plan_path))?; + } + Ok(()) } } diff --git a/simulator/runner/env.rs b/simulator/runner/env.rs index 7edad025f..2813b80e8 100644 --- a/simulator/runner/env.rs +++ b/simulator/runner/env.rs @@ -1,13 +1,18 @@ +use std::path::Path; use std::rc::Rc; use std::sync::Arc; use limbo_core::{Connection, Database}; +use rand::{Rng, SeedableRng}; use rand_chacha::ChaCha8Rng; use crate::model::table::Table; use crate::runner::io::SimulatorIO; +use super::cli::SimulatorCLI; + +#[derive(Clone)] pub(crate) struct SimulatorEnv { pub(crate) opts: SimulatorOpts, pub(crate) tables: Vec, @@ -17,6 +22,65 @@ pub(crate) struct SimulatorEnv { pub(crate) rng: ChaCha8Rng, } +impl SimulatorEnv { + pub(crate) fn new(seed: u64, cli_opts: &SimulatorCLI, db_path: &Path) -> Self { + let mut rng = ChaCha8Rng::seed_from_u64(seed); + + let (create_percent, read_percent, write_percent, delete_percent) = { + let mut remaining = 100.0; + let read_percent = rng.gen_range(0.0..=remaining); + remaining -= read_percent; + let write_percent = rng.gen_range(0.0..=remaining); + remaining -= write_percent; + let delete_percent = remaining; + + let create_percent = write_percent / 10.0; + let write_percent = write_percent - create_percent; + + (create_percent, read_percent, write_percent, delete_percent) + }; + + let opts = SimulatorOpts { + ticks: rng.gen_range(cli_opts.minimum_size..=cli_opts.maximum_size), + max_connections: 1, // TODO: for now let's use one connection as we didn't implement + // correct transactions procesing + max_tables: rng.gen_range(0..128), + create_percent, + read_percent, + write_percent, + delete_percent, + page_size: 4096, // TODO: randomize this too + max_interactions: rng.gen_range(cli_opts.minimum_size..=cli_opts.maximum_size), + max_time_simulation: cli_opts.maximum_time, + }; + + let io = Arc::new(SimulatorIO::new(seed, opts.page_size).unwrap()); + + // Remove existing database file if it exists + if db_path.exists() { + std::fs::remove_file(db_path).unwrap(); + } + + let db = match Database::open_file(io.clone(), db_path.to_str().unwrap()) { + Ok(db) => db, + Err(e) => { + panic!("error opening simulator test file {:?}: {:?}", db_path, e); + } + }; + + let connections = vec![SimConnection::Disconnected; opts.max_connections]; + + SimulatorEnv { + opts, + tables: Vec::new(), + connections, + rng, + io, + db, + } + } +} + #[derive(Clone)] pub(crate) enum SimConnection { Connected(Rc), diff --git a/simulator/runner/execution.rs b/simulator/runner/execution.rs index e39fed01f..6544928a1 100644 --- a/simulator/runner/execution.rs +++ b/simulator/runner/execution.rs @@ -36,7 +36,7 @@ pub(crate) struct ExecutionHistory { } impl ExecutionHistory { - fn new() -> Self { + pub(crate) fn new() -> Self { Self { history: Vec::new(), } @@ -49,19 +49,20 @@ pub(crate) struct ExecutionResult { } impl ExecutionResult { - fn new(history: ExecutionHistory, error: Option) -> Self { + pub(crate) fn new(history: ExecutionHistory, error: Option) -> Self { Self { history, error } } } pub(crate) fn execute_plans( - env: &mut SimulatorEnv, + env: Arc>, plans: &mut [InteractionPlan], states: &mut [InteractionPlanState], last_execution: Arc>, ) -> ExecutionResult { let mut history = ExecutionHistory::new(); let now = std::time::Instant::now(); + let mut env = env.lock().unwrap(); for _tick in 0..env.opts.ticks { // Pick the connection to interact with let connection_index = pick_index(env.connections.len(), &mut env.rng); @@ -77,7 +78,7 @@ pub(crate) fn execute_plans( last_execution.interaction_index = state.interaction_pointer; last_execution.secondary_index = state.secondary_pointer; // Execute the interaction for the selected connection - match execute_plan(env, connection_index, plans, states) { + match execute_plan(&mut env, connection_index, plans, states) { Ok(_) => {} Err(err) => { return ExecutionResult::new(history, Some(err)); @@ -155,14 +156,14 @@ fn execute_plan( /// `execute_interaction` uses this type in conjunction with a result, where /// the `Err` case indicates a full-stop due to a bug, and the `Ok` case /// indicates the next step in the plan. -enum ExecutionContinuation { +pub(crate) enum ExecutionContinuation { /// Default continuation, execute the next interaction. NextInteraction, /// Typically used in the case of preconditions failures, skip to the next property. NextProperty, } -fn execute_interaction( +pub(crate) fn execute_interaction( env: &mut SimulatorEnv, connection_index: usize, interaction: &Interaction, diff --git a/simulator/runner/mod.rs b/simulator/runner/mod.rs index 3f014bef0..2eabaef8b 100644 --- a/simulator/runner/mod.rs +++ b/simulator/runner/mod.rs @@ -4,3 +4,4 @@ pub mod execution; #[allow(dead_code)] pub mod file; pub mod io; +pub mod watch; diff --git a/simulator/runner/watch.rs b/simulator/runner/watch.rs new file mode 100644 index 000000000..75ecb1801 --- /dev/null +++ b/simulator/runner/watch.rs @@ -0,0 +1,133 @@ +use std::sync::{Arc, Mutex}; + +use crate::{ + generation::{ + pick_index, + plan::{Interaction, InteractionPlanState}, + }, + runner::execution::ExecutionContinuation, +}; + +use super::{ + env::{SimConnection, SimulatorEnv}, + execution::{execute_interaction, Execution, ExecutionHistory, ExecutionResult}, +}; + +pub(crate) fn run_simulation( + env: Arc>, + plans: &mut [Vec>], + last_execution: Arc>, +) -> ExecutionResult { + let mut states = plans + .iter() + .map(|_| InteractionPlanState { + stack: vec![], + interaction_pointer: 0, + secondary_pointer: 0, + }) + .collect::>(); + let result = execute_plans(env.clone(), plans, &mut states, last_execution); + + let env = env.lock().unwrap(); + env.io.print_stats(); + + log::info!("Simulation completed"); + + result +} + +pub(crate) fn execute_plans( + env: Arc>, + plans: &mut [Vec>], + states: &mut [InteractionPlanState], + last_execution: Arc>, +) -> ExecutionResult { + let mut history = ExecutionHistory::new(); + let now = std::time::Instant::now(); + let mut env = env.lock().unwrap(); + for _tick in 0..env.opts.ticks { + // Pick the connection to interact with + let connection_index = pick_index(env.connections.len(), &mut env.rng); + let state = &mut states[connection_index]; + + history.history.push(Execution::new( + connection_index, + state.interaction_pointer, + state.secondary_pointer, + )); + let mut last_execution = last_execution.lock().unwrap(); + last_execution.connection_index = connection_index; + last_execution.interaction_index = state.interaction_pointer; + last_execution.secondary_index = state.secondary_pointer; + // Execute the interaction for the selected connection + match execute_plan(&mut env, connection_index, plans, states) { + Ok(_) => {} + Err(err) => { + return ExecutionResult::new(history, Some(err)); + } + } + // Check if the maximum time for the simulation has been reached + if now.elapsed().as_secs() >= env.opts.max_time_simulation as u64 { + return ExecutionResult::new( + history, + Some(limbo_core::LimboError::InternalError( + "maximum time for simulation reached".into(), + )), + ); + } + } + + ExecutionResult::new(history, None) +} + +fn execute_plan( + env: &mut SimulatorEnv, + connection_index: usize, + plans: &mut [Vec>], + states: &mut [InteractionPlanState], +) -> limbo_core::Result<()> { + let connection = &env.connections[connection_index]; + let plan = &mut plans[connection_index]; + let state = &mut states[connection_index]; + + if state.interaction_pointer >= plan.len() { + return Ok(()); + } + + let interaction = &plan[state.interaction_pointer][state.secondary_pointer]; + + if let SimConnection::Disconnected = connection { + log::info!("connecting {}", connection_index); + env.connections[connection_index] = SimConnection::Connected(env.db.connect()); + } else { + match execute_interaction(env, connection_index, interaction, &mut state.stack) { + Ok(next_execution) => { + log::debug!("connection {} processed", connection_index); + // Move to the next interaction or property + match next_execution { + ExecutionContinuation::NextInteraction => { + if state.secondary_pointer + 1 >= plan[state.interaction_pointer].len() { + // If we have reached the end of the interactions for this property, move to the next property + state.interaction_pointer += 1; + state.secondary_pointer = 0; + } else { + // Otherwise, move to the next interaction + state.secondary_pointer += 1; + } + } + ExecutionContinuation::NextProperty => { + // Skip to the next property + state.interaction_pointer += 1; + state.secondary_pointer = 0; + } + } + } + Err(err) => { + log::error!("error {}", err); + return Err(err); + } + } + } + + Ok(()) +}