diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index b354dd8e3..0eef76c70 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -64,8 +64,12 @@ jobs: - uses: useblacksmith/rust-cache@v3 with: prefix-key: "v1-rust" # can be updated if we need to reset caches due to non-trivial change in the dependencies (for example, custom env var were set for single workspace project) - - name: Install the project + - name: Simulator default run: ./scripts/run-sim --maximum-tests 1000 --min-tick 10 --max-tick 50 loop -n 10 -s + - name: Simulator InsertHeavy + run: ./scripts/run-sim --maximum-tests 1000 --min-tick 10 --max-tick 50 --profile write_heavy loop -n 10 -s + - name: Simulator Faultless + run: ./scripts/run-sim --maximum-tests 1000 --min-tick 10 --max-tick 50 --profile faultless loop -n 10 -s test-limbo: runs-on: blacksmith-4vcpu-ubuntu-2404 diff --git a/Cargo.lock b/Cargo.lock index e888186cc..cf9df0f9f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -426,6 +426,15 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" +[[package]] +name = "castaway" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dec551ab6e7578819132c713a93c022a05d60159dc86e7a7050223577484c55a" +dependencies = [ + "rustversion", +] + [[package]] name = "cc" version = "1.2.17" @@ -601,6 +610,21 @@ dependencies = [ "unicode-width 0.2.0", ] +[[package]] +name = "compact_str" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b79c4069c6cad78e2e0cdfcbd26275770669fb39fd308a752dc110e83b9af32" +dependencies = [ + "castaway", + "cfg-if", + "itoa", + "rustversion", + "ryu", + "serde", + "static_assertions", +] + [[package]] name = "concurrent-queue" version = "2.5.0" @@ -1402,6 +1426,29 @@ dependencies = [ "slab", ] +[[package]] +name = "garde" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a989bd2fd12136080f7825ff410d9239ce84a2a639487fc9d924ee42e2fb84f" +dependencies = [ + "compact_str", + "garde_derive", + "serde", + "smallvec", +] + +[[package]] +name = "garde_derive" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f7f0545bbbba0a37d4d445890fa5759814e0716f02417b39f6fab292193df68" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.100", +] + [[package]] name = "genawaiter" version = "0.99.1" @@ -1953,6 +2000,17 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "json5" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96b0db21af676c1ce64250b5f40f3ce2cf27e4e47cb91ed91eb6fe9350b430c1" +dependencies = [ + "pest", + "pest_derive", + "serde", +] + [[package]] name = "julian_day_converter" version = "0.4.5" @@ -2132,8 +2190,10 @@ dependencies = [ "clap", "dirs 6.0.0", "env_logger 0.10.2", + "garde", "hex", "itertools 0.14.0", + "json5", "log", "notify", "rand 0.9.2", @@ -2141,9 +2201,11 @@ dependencies = [ "regex", "regex-syntax 0.8.5", "rusqlite", + "schemars 1.0.4", "serde", "serde_json", "sql_generation", + "strum", "tracing", "tracing-subscriber", "turso_core", @@ -2641,6 +2703,50 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +[[package]] +name = "pest" +version = "2.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1db05f56d34358a8b1066f67cbb203ee3e7ed2ba674a6263a1d5ec6db2204323" +dependencies = [ + "memchr", + "thiserror 2.0.12", + "ucd-trie", +] + +[[package]] +name = "pest_derive" +version = "2.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb056d9e8ea77922845ec74a1c4e8fb17e7c218cc4fc11a15c5d25e189aa40bc" +dependencies = [ + "pest", + "pest_generator", +] + +[[package]] +name = "pest_generator" +version = "2.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87e404e638f781eb3202dc82db6760c8ae8a1eeef7fb3fa8264b2ef280504966" +dependencies = [ + "pest", + "pest_meta", + "proc-macro2", + "quote", + "syn 2.0.100", +] + +[[package]] +name = "pest_meta" +version = "2.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edd1101f170f5903fde0914f899bb503d9ff5271d7ba76bbb70bea63690cc0d5" +dependencies = [ + "pest", + "sha2", +] + [[package]] name = "pin-project-lite" version = "0.2.16" @@ -3109,6 +3215,26 @@ dependencies = [ "thiserror 2.0.12", ] +[[package]] +name = "ref-cast" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a0ae411dbe946a674d89546582cea4ba2bb8defac896622d6496f14c23ba5cf" +dependencies = [ + "ref-cast-impl", +] + +[[package]] +name = "ref-cast-impl" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1165225c21bff1f3bbce98f5a1f889949bc902d3575308cc7b0de30b4f6d27c7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.100", +] + [[package]] name = "regex" version = "1.11.1" @@ -3355,7 +3481,20 @@ checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615" dependencies = [ "dyn-clone", "indexmap 1.9.3", - "schemars_derive", + "schemars_derive 0.8.22", + "serde", + "serde_json", +] + +[[package]] +name = "schemars" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82d20c4491bc164fa2f6c5d44565947a52ad80b9505d8e36f8d54c27c739fcd0" +dependencies = [ + "dyn-clone", + "ref-cast", + "schemars_derive 1.0.4", "serde", "serde_json", ] @@ -3372,6 +3511,18 @@ dependencies = [ "syn 2.0.100", ] +[[package]] +name = "schemars_derive" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33d020396d1d138dc19f1165df7545479dcd58d93810dc5d646a16e55abefa80" +dependencies = [ + "proc-macro2", + "quote", + "serde_derive_internals", + "syn 2.0.100", +] + [[package]] name = "scopeguard" version = "1.2.0" @@ -3436,6 +3587,17 @@ dependencies = [ "serde", ] +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "sharded-slab" version = "0.1.7" @@ -3474,6 +3636,9 @@ name = "smallvec" version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +dependencies = [ + "serde", +] [[package]] name = "socket2" @@ -3503,10 +3668,12 @@ version = "0.1.5-pre.1" dependencies = [ "anarchist-readable-name-generator-lib 0.2.0", "anyhow", + "garde", "hex", "itertools 0.14.0", "rand 0.9.2", "rand_chacha 0.9.0", + "schemars 1.0.4", "serde", "tracing", "turso_core", @@ -3528,6 +3695,12 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "str_stack" version = "0.1.0" @@ -4038,7 +4211,7 @@ dependencies = [ "mimalloc", "nu-ansi-term 0.50.1", "rustyline", - "schemars", + "schemars 0.8.22", "serde", "serde_json", "shlex", @@ -4269,6 +4442,12 @@ version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" +[[package]] +name = "ucd-trie" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" + [[package]] name = "uncased" version = "0.9.10" diff --git a/Cargo.toml b/Cargo.toml index 3d32c6177..da36927d9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -67,6 +67,8 @@ rusqlite = { version = "0.37.0", features = ["bundled"] } itertools = "0.14.0" rand = "0.9.2" tracing = "0.1.41" +schemars = "1.0.4" +garde = "0.22" [profile.release] debug = "line-tables-only" diff --git a/Makefile b/Makefile index d2abf3376..6070bb70e 100644 --- a/Makefile +++ b/Makefile @@ -201,3 +201,7 @@ endif fi .PHONY: merge-pr + +sim-schema: + cargo run -p limbo_sim -- print-schema > simulator/configs/custom/profile-schema.json + diff --git a/simulator/.gitignore b/simulator/.gitignore new file mode 100644 index 000000000..edec4e579 --- /dev/null +++ b/simulator/.gitignore @@ -0,0 +1 @@ +configs/custom \ No newline at end of file diff --git a/simulator/Cargo.toml b/simulator/Cargo.toml index f01896716..a8f85ca58 100644 --- a/simulator/Cargo.toml +++ b/simulator/Cargo.toml @@ -4,7 +4,7 @@ name = "limbo_sim" version.workspace = true authors.workspace = true -edition.workspace = true +edition = "2024" license.workspace = true repository.workspace = true description = "The Limbo deterministic simulator" @@ -38,3 +38,7 @@ hex = "0.4.3" itertools = "0.14.0" sql_generation = { workspace = true } turso_parser = { workspace = true } +schemars = { workspace = true } +garde = { workspace = true, features = ["derive", "serde"] } +json5 = { version = "0.4.1" } +strum = { workspace = true } diff --git a/simulator/README.md b/simulator/README.md index 87d61479d..3de0afb99 100644 --- a/simulator/README.md +++ b/simulator/README.md @@ -106,6 +106,18 @@ it should generate the necessary queries and assertions for the property. You can use the `--differential` flag to run the simulator in differential testing mode. This mode will run the same interaction plan on both Limbo and SQLite, and compare the results. It will also check for any panics or errors in either database. +## Simulator Profiles +A Simulator Profile allows you to influence query generation and I/O fault injection. You can run predefined profiles or you can create your own custom profile in a separate JSON file. You can select the profile you want by passing the `--profile` flag to he CLI. It will accept a predefined Profile name or a file path. + +For development purposes, you can run `make sim-schema` to generate a JsonSchema of the `Profile` struct. Then you can create profiles to test locally in a `configs/custom` folder that is gitignored and have editor integration by adding `$schema` tag to reference the generated JsonSchema: + +```json +{ + "$schema": "./profile-schema.json", + ... +} +``` + ## Resources - [(reading) TigerBeetle Deterministic Simulation Testing](https://docs.tigerbeetle.com/about/vopr/) diff --git a/simulator/generation/mod.rs b/simulator/generation/mod.rs index 79bdf506f..88a40d708 100644 --- a/simulator/generation/mod.rs +++ b/simulator/generation/mod.rs @@ -25,9 +25,17 @@ impl GenerationContext for SimulatorEnv { &self.tables.tables } - fn opts(&self) -> sql_generation::generation::Opts { - sql_generation::generation::Opts { - indexes: self.opts.experimental_indexes, - } + fn opts(&self) -> &sql_generation::generation::Opts { + &self.profile.query.gen_opts + } +} + +impl GenerationContext for &mut SimulatorEnv { + fn tables(&self) -> &Vec { + &self.tables.tables + } + + fn opts(&self) -> &sql_generation::generation::Opts { + &self.profile.query.gen_opts } } diff --git a/simulator/generation/plan.rs b/simulator/generation/plan.rs index 47763657a..365b4cd3d 100644 --- a/simulator/generation/plan.rs +++ b/simulator/generation/plan.rs @@ -9,25 +9,25 @@ use std::{ use serde::{Deserialize, Serialize}; use sql_generation::{ - generation::{frequency, query::SelectFree, Arbitrary, ArbitraryFrom}, + generation::{Arbitrary, ArbitraryFrom, GenerationContext, frequency, query::SelectFree}, model::{ - query::{update::Update, Create, CreateIndex, Delete, Drop, Insert, Select}, + query::{Create, CreateIndex, Delete, Drop, Insert, Select, update::Update}, table::SimValue, }, }; use turso_core::{Connection, Result, StepResult}; use crate::{ + SimulatorEnv, generation::Shadow, model::Query, runner::{ env::{SimConnection, SimulationType, SimulatorTables}, io::SimulatorIO, }, - SimulatorEnv, }; -use super::property::{remaining, Property}; +use super::property::{Property, remaining}; pub(crate) type ResultSet = Result>>; @@ -254,16 +254,27 @@ impl Display for InteractionPlan { #[derive(Debug, Clone, Copy)] pub(crate) struct InteractionStats { - pub(crate) read_count: usize, - pub(crate) write_count: usize, - pub(crate) delete_count: usize, - pub(crate) update_count: usize, - pub(crate) create_count: usize, - pub(crate) create_index_count: usize, - pub(crate) drop_count: usize, - pub(crate) begin_count: usize, - pub(crate) commit_count: usize, - pub(crate) rollback_count: usize, + pub(crate) select_count: u32, + pub(crate) insert_count: u32, + pub(crate) delete_count: u32, + pub(crate) update_count: u32, + pub(crate) create_count: u32, + pub(crate) create_index_count: u32, + pub(crate) drop_count: u32, + pub(crate) begin_count: u32, + pub(crate) commit_count: u32, + pub(crate) rollback_count: u32, +} + +impl InteractionStats { + pub fn total_writes(&self) -> u32 { + self.insert_count + + self.delete_count + + self.update_count + + self.create_count + + self.create_index_count + + self.drop_count + } } impl Display for InteractionStats { @@ -271,8 +282,8 @@ impl Display for InteractionStats { write!( f, "Read: {}, Write: {}, Delete: {}, Update: {}, Create: {}, CreateIndex: {}, Drop: {}, Begin: {}, Commit: {}, Rollback: {}", - self.read_count, - self.write_count, + self.select_count, + self.insert_count, self.delete_count, self.update_count, self.create_count, @@ -351,8 +362,8 @@ impl InteractionPlan { pub(crate) fn stats(&self) -> InteractionStats { let mut stats = InteractionStats { - read_count: 0, - write_count: 0, + select_count: 0, + insert_count: 0, delete_count: 0, update_count: 0, create_count: 0, @@ -365,8 +376,8 @@ impl InteractionPlan { fn query_stat(q: &Query, stats: &mut InteractionStats) { match q { - Query::Select(_) => stats.read_count += 1, - Query::Insert(_) => stats.write_count += 1, + Query::Select(_) => stats.select_count += 1, + Query::Insert(_) => stats.insert_count += 1, Query::Delete(_) => stats.delete_count += 1, Query::Create(_) => stats.create_count += 1, Query::Drop(_) => stats.drop_count += 1, @@ -395,16 +406,14 @@ impl InteractionPlan { stats } -} -impl ArbitraryFrom<&mut SimulatorEnv> for InteractionPlan { - fn arbitrary_from(rng: &mut R, env: &mut SimulatorEnv) -> Self { + pub fn generate_plan(rng: &mut R, env: &mut SimulatorEnv) -> Self { let mut plan = InteractionPlan::new(); - let num_interactions = env.opts.max_interactions; + let num_interactions = env.opts.max_interactions as usize; // First create at least one table - let create_query = Create::arbitrary(rng); + let create_query = Create::arbitrary(rng, env); env.tables.push(create_query.table.clone()); plan.plan @@ -416,7 +425,7 @@ impl ArbitraryFrom<&mut SimulatorEnv> for InteractionPlan { plan.plan.len(), num_interactions ); - let interactions = Interactions::arbitrary_from(rng, (env, plan.stats())); + let interactions = Interactions::arbitrary_from(rng, env, (env, plan.stats())); interactions.shadow(&mut env.tables); plan.plan.push(interactions); } @@ -756,42 +765,42 @@ fn reopen_database(env: &mut SimulatorEnv) { } fn random_create(rng: &mut R, env: &SimulatorEnv) -> Interactions { - let mut create = Create::arbitrary(rng); + let mut create = Create::arbitrary(rng, env); while env.tables.iter().any(|t| t.name == create.table.name) { - create = Create::arbitrary(rng); + create = Create::arbitrary(rng, env); } Interactions::Query(Query::Create(create)) } fn random_read(rng: &mut R, env: &SimulatorEnv) -> Interactions { - Interactions::Query(Query::Select(Select::arbitrary_from(rng, env))) + Interactions::Query(Query::Select(Select::arbitrary(rng, env))) } fn random_expr(rng: &mut R, env: &SimulatorEnv) -> Interactions { - Interactions::Query(Query::Select(SelectFree::arbitrary_from(rng, env).0)) + Interactions::Query(Query::Select(SelectFree::arbitrary(rng, env).0)) } fn random_write(rng: &mut R, env: &SimulatorEnv) -> Interactions { - Interactions::Query(Query::Insert(Insert::arbitrary_from(rng, env))) + Interactions::Query(Query::Insert(Insert::arbitrary(rng, env))) } fn random_delete(rng: &mut R, env: &SimulatorEnv) -> Interactions { - Interactions::Query(Query::Delete(Delete::arbitrary_from(rng, env))) + Interactions::Query(Query::Delete(Delete::arbitrary(rng, env))) } fn random_update(rng: &mut R, env: &SimulatorEnv) -> Interactions { - Interactions::Query(Query::Update(Update::arbitrary_from(rng, env))) + Interactions::Query(Query::Update(Update::arbitrary(rng, env))) } fn random_drop(rng: &mut R, env: &SimulatorEnv) -> Interactions { - Interactions::Query(Query::Drop(Drop::arbitrary_from(rng, env))) + Interactions::Query(Query::Drop(Drop::arbitrary(rng, env))) } fn random_create_index(rng: &mut R, env: &SimulatorEnv) -> Option { if env.tables.is_empty() { return None; } - let mut create_index = CreateIndex::arbitrary_from(rng, env); + let mut create_index = CreateIndex::arbitrary(rng, env); while env .tables .iter() @@ -801,7 +810,7 @@ fn random_create_index(rng: &mut R, env: &SimulatorEnv) -> Option< .iter() .any(|i| i == &create_index.index_name) { - create_index = CreateIndex::arbitrary_from(rng, env); + create_index = CreateIndex::arbitrary(rng, env); } Some(Interactions::Query(Query::CreateIndex(create_index))) @@ -818,29 +827,30 @@ fn random_fault(rng: &mut R, env: &SimulatorEnv) -> Interactions { } impl ArbitraryFrom<(&SimulatorEnv, InteractionStats)> for Interactions { - fn arbitrary_from( + fn arbitrary_from( rng: &mut R, + _context: &C, (env, stats): (&SimulatorEnv, InteractionStats), ) -> Self { - let remaining_ = remaining(env, &stats); + let remaining_ = remaining(env.opts.max_interactions, &env.profile.query, &stats); frequency( vec![ ( - f64::min(remaining_.read, remaining_.write) + remaining_.create, + u32::min(remaining_.select, remaining_.insert) + remaining_.create, Box::new(|rng: &mut R| { - Interactions::Property(Property::arbitrary_from(rng, (env, &stats))) + Interactions::Property(Property::arbitrary_from(rng, env, (env, &stats))) }), ), ( - remaining_.read, + remaining_.select, Box::new(|rng: &mut R| random_read(rng, env)), ), ( - remaining_.read / 3.0, + remaining_.select / 3, Box::new(|rng: &mut R| random_expr(rng, env)), ), ( - remaining_.write, + remaining_.insert, Box::new(|rng: &mut R| random_write(rng, env)), ), ( @@ -868,15 +878,15 @@ impl ArbitraryFrom<(&SimulatorEnv, InteractionStats)> for Interactions { ), ( // remaining_.drop, - 0.0, + 0, Box::new(|rng: &mut R| random_drop(rng, env)), ), ( remaining_ - .read - .min(remaining_.write) + .select + .min(remaining_.insert) .min(remaining_.create) - .max(1.0), + .max(1), Box::new(|rng: &mut R| random_fault(rng, env)), ), ], diff --git a/simulator/generation/property.rs b/simulator/generation/property.rs index 288c4e75d..ca75477ac 100644 --- a/simulator/generation/property.rs +++ b/simulator/generation/property.rs @@ -1,21 +1,23 @@ use serde::{Deserialize, Serialize}; use sql_generation::{ - generation::{frequency, pick, pick_index, ArbitraryFrom}, + generation::{Arbitrary, ArbitraryFrom, GenerationContext, frequency, pick, pick_index}, model::{ query::{ + Create, Delete, Drop, Insert, Select, predicate::Predicate, select::{CompoundOperator, CompoundSelect, ResultColumn, SelectBody, SelectInner}, transaction::{Begin, Commit, Rollback}, update::Update, - Create, Delete, Drop, Insert, Select, }, table::SimValue, }, }; -use turso_core::{types, LimboError}; +use turso_core::{LimboError, types}; use turso_parser::ast::{self, Distinctness}; -use crate::{generation::Shadow as _, model::Query, runner::env::SimulatorEnv}; +use crate::{ + generation::Shadow as _, model::Query, profiles::query::QueryProfile, runner::env::SimulatorEnv, +}; use super::plan::{Assertion, Interaction, InteractionStats, ResultSet}; @@ -301,7 +303,10 @@ impl Property { for row in rows { for (i, (col, val)) in update.set_values.iter().enumerate() { if &row[i] != val { - return Ok(Err(format!("updated row {} has incorrect value for column {col}: expected {val}, got {}", i, row[i]))); + return Ok(Err(format!( + "updated row {} has incorrect value for column {col}: expected {val}, got {}", + i, row[i] + ))); } } } @@ -380,7 +385,10 @@ impl Property { if found { Ok(Ok(())) } else { - Ok(Err(format!("row [{:?}] not found in table", row.iter().map(|v| v.to_string()).collect::>()))) + Ok(Err(format!( + "row [{:?}] not found in table", + row.iter().map(|v| v.to_string()).collect::>() + ))) } } Err(err) => Err(LimboError::InternalError(err.to_string())), @@ -854,15 +862,22 @@ impl Property { match (select_result_set, select_tlp_result_set) { (Ok(select_rows), Ok(select_tlp_rows)) => { if select_rows.len() != select_tlp_rows.len() { - return Ok(Err(format!("row count mismatch: select returned {} rows, select_tlp returned {} rows", select_rows.len(), select_tlp_rows.len()))); + return Ok(Err(format!( + "row count mismatch: select returned {} rows, select_tlp returned {} rows", + select_rows.len(), + select_tlp_rows.len() + ))); } // Check if any row in select_rows is not in select_tlp_rows for row in select_rows.iter() { if !select_tlp_rows.iter().any(|r| r == row) { tracing::debug!( - "select and select_tlp returned different rows, ({}) is in select but not in select_tlp", - row.iter().map(|v| v.to_string()).collect::>().join(", ") - ); + "select and select_tlp returned different rows, ({}) is in select but not in select_tlp", + row.iter() + .map(|v| v.to_string()) + .collect::>() + .join(", ") + ); return Ok(Err(format!( "row mismatch: row [{}] exists in select results but not in select_tlp results", print_row(row) @@ -873,9 +888,12 @@ impl Property { for row in select_tlp_rows.iter() { if !select_rows.iter().any(|r| r == row) { tracing::debug!( - "select and select_tlp returned different rows, ({}) is in select_tlp but not in select", - row.iter().map(|v| v.to_string()).collect::>().join(", ") - ); + "select and select_tlp returned different rows, ({}) is in select_tlp but not in select", + row.iter() + .map(|v| v.to_string()) + .collect::>() + .join(", ") + ); return Ok(Err(format!( "row mismatch: row [{}] exists in select_tlp but not in select", @@ -935,7 +953,9 @@ impl Property { if union_count == count1 + count2 { Ok(Ok(())) } else { - Ok(Err(format!("UNION ALL should preserve cardinality but it didn't: {count1} + {count2} != {union_count}"))) + Ok(Err(format!( + "UNION ALL should preserve cardinality but it didn't: {count1} + {count2} != {union_count}" + ))) } } (Err(e), _, _) | (_, Err(e), _) | (_, _, Err(e)) => { @@ -952,7 +972,7 @@ impl Property { } fn assert_all_table_values(tables: &[String]) -> impl Iterator + use<'_> { - let checks = tables.iter().flat_map(|table| { + tables.iter().flat_map(|table| { let select = Interaction::Query(Query::Select(Select::simple( table.clone(), Predicate::true_(), @@ -1006,50 +1026,64 @@ fn assert_all_table_values(tables: &[String]) -> impl Iterator Remaining { - let remaining_read = ((env.opts.max_interactions as f64 * env.opts.read_percent / 100.0) - - (stats.read_count as f64)) - .max(0.0); - let remaining_write = ((env.opts.max_interactions as f64 * env.opts.write_percent / 100.0) - - (stats.write_count as f64)) - .max(0.0); - let remaining_create = ((env.opts.max_interactions as f64 * env.opts.create_percent / 100.0) - - (stats.create_count as f64)) - .max(0.0); +pub(crate) fn remaining( + max_interactions: u32, + opts: &QueryProfile, + stats: &InteractionStats, +) -> Remaining { + let total_weight = opts.select_weight + + opts.create_table_weight + + opts.create_index_weight + + opts.insert_weight + + opts.update_weight + + opts.delete_weight + + opts.drop_table_weight; - let remaining_create_index = - ((env.opts.max_interactions as f64 * env.opts.create_index_percent / 100.0) - - (stats.create_index_count as f64)) - .max(0.0); + let total_select = (max_interactions * opts.select_weight) / total_weight; + let total_insert = (max_interactions * opts.insert_weight) / total_weight; + let total_create = (max_interactions * opts.create_table_weight) / total_weight; + let total_create_index = (max_interactions * opts.create_index_weight) / total_weight; + let total_delete = (max_interactions * opts.delete_weight) / total_weight; + let total_update = (max_interactions * opts.update_weight) / total_weight; + let total_drop = (max_interactions * opts.drop_table_weight) / total_weight; - let remaining_delete = ((env.opts.max_interactions as f64 * env.opts.delete_percent / 100.0) - - (stats.delete_count as f64)) - .max(0.0); - let remaining_update = ((env.opts.max_interactions as f64 * env.opts.update_percent / 100.0) - - (stats.update_count as f64)) - .max(0.0); - let remaining_drop = ((env.opts.max_interactions as f64 * env.opts.drop_percent / 100.0) - - (stats.drop_count as f64)) - .max(0.0); + let remaining_select = total_select + .checked_sub(stats.select_count) + .unwrap_or_default(); + let remaining_insert = total_insert + .checked_sub(stats.insert_count) + .unwrap_or_default(); + let remaining_create = total_create + .checked_sub(stats.create_count) + .unwrap_or_default(); + let remaining_create_index = total_create_index + .checked_sub(stats.create_index_count) + .unwrap_or_default(); + let remaining_delete = total_delete + .checked_sub(stats.delete_count) + .unwrap_or_default(); + let remaining_update = total_update + .checked_sub(stats.update_count) + .unwrap_or_default(); + let remaining_drop = total_drop.checked_sub(stats.drop_count).unwrap_or_default(); Remaining { - read: remaining_read, - write: remaining_write, + select: remaining_select, + insert: remaining_insert, create: remaining_create, create_index: remaining_create_index, delete: remaining_delete, @@ -1067,7 +1101,7 @@ fn property_insert_values_select( let table = pick(&env.tables, rng); // Generate rows to insert let rows = (0..rng.random_range(1..=5)) - .map(|_| Vec::::arbitrary_from(rng, table)) + .map(|_| Vec::::arbitrary_from(rng, env, table)) .collect::>(); // Pick a random row to select @@ -1101,7 +1135,7 @@ fn property_insert_values_select( })); } for _ in 0..rng.random_range(0..3) { - let query = Query::arbitrary_from(rng, (env, remaining)); + let query = Query::arbitrary_from(rng, env, remaining); match &query { Query::Delete(Delete { table: t, @@ -1144,7 +1178,7 @@ fn property_insert_values_select( // Select the row let select_query = Select::simple( table.name.clone(), - Predicate::arbitrary_from(rng, (table, &row)), + Predicate::arbitrary_from(rng, env, (table, &row)), ); Property::InsertValuesSelect { @@ -1158,7 +1192,7 @@ fn property_insert_values_select( fn property_read_your_updates_back(rng: &mut R, env: &SimulatorEnv) -> Property { // e.g. UPDATE t SET a=1, b=2 WHERE c=1; - let update = Update::arbitrary_from(rng, env); + let update = Update::arbitrary(rng, env); // e.g. SELECT a, b FROM t WHERE c=1; let select = Select::single( update.table().to_string(), @@ -1190,7 +1224,7 @@ fn property_select_limit(rng: &mut R, env: &SimulatorEnv) -> Prope let select = Select::single( table.name.clone(), vec![ResultColumn::Star], - Predicate::arbitrary_from(rng, table), + Predicate::arbitrary_from(rng, env, table), Some(rng.random_range(1..=5)), Distinctness::All, ); @@ -1215,7 +1249,7 @@ fn property_double_create_failure( // - [x] There will be no errors in the middle interactions.(best effort) // - [ ] Table `t` will not be renamed or dropped.(todo: add this constraint once ALTER or DROP is implemented) for _ in 0..rng.random_range(0..3) { - let query = Query::arbitrary_from(rng, (env, remaining)); + let query = Query::arbitrary_from(rng, env, remaining); if let Query::Create(Create { table: t }) = &query { // There will be no errors in the middle interactions. // - Creating the same table is an error @@ -1240,7 +1274,7 @@ fn property_delete_select( // Get a random table let table = pick(&env.tables, rng); // Generate a random predicate - let predicate = Predicate::arbitrary_from(rng, table); + let predicate = Predicate::arbitrary_from(rng, env, table); // Create random queries respecting the constraints let mut queries = Vec::new(); @@ -1248,7 +1282,7 @@ fn property_delete_select( // - [x] A row that holds for the predicate will not be inserted. // - [ ] The table `t` will not be renamed, dropped, or altered. (todo: add this constraint once ALTER or DROP is implemented) for _ in 0..rng.random_range(0..3) { - let query = Query::arbitrary_from(rng, (env, remaining)); + let query = Query::arbitrary_from(rng, env, remaining); match &query { Query::Insert(Insert::Values { table: t, values }) => { // A row that holds for the predicate will not be inserted. @@ -1303,7 +1337,7 @@ fn property_drop_select( // - [x] There will be no errors in the middle interactions. (this constraint is impossible to check, so this is just best effort) // - [-] The table `t` will not be created, no table will be renamed to `t`. (todo: update this constraint once ALTER is implemented) for _ in 0..rng.random_range(0..3) { - let query = Query::arbitrary_from(rng, (env, remaining)); + let query = Query::arbitrary_from(rng, env, remaining); if let Query::Create(Create { table: t }) = &query { // - The table `t` will not be created if t.name == table.name { @@ -1313,7 +1347,10 @@ fn property_drop_select( queries.push(query); } - let select = Select::simple(table.name.clone(), Predicate::arbitrary_from(rng, table)); + let select = Select::simple( + table.name.clone(), + Predicate::arbitrary_from(rng, env, table), + ); Property::DropSelect { table: table.name.clone(), @@ -1326,7 +1363,7 @@ fn property_select_select_optimizer(rng: &mut R, env: &SimulatorEn // Get a random table let table = pick(&env.tables, rng); // Generate a random predicate - let predicate = Predicate::arbitrary_from(rng, table); + let predicate = Predicate::arbitrary_from(rng, env, table); // Transform into a Binary predicate to force values to be casted to a bool let expr = ast::Expr::Binary( Box::new(predicate.0), @@ -1344,8 +1381,8 @@ fn property_where_true_false_null(rng: &mut R, env: &SimulatorEnv) // Get a random table let table = pick(&env.tables, rng); // Generate a random predicate - let p1 = Predicate::arbitrary_from(rng, table); - let p2 = Predicate::arbitrary_from(rng, table); + let p1 = Predicate::arbitrary_from(rng, env, table); + let p2 = Predicate::arbitrary_from(rng, env, table); // Create the select query let select = Select::simple(table.name.clone(), p1); @@ -1363,8 +1400,8 @@ fn property_union_all_preserves_cardinality( // Get a random table let table = pick(&env.tables, rng); // Generate a random predicate - let p1 = Predicate::arbitrary_from(rng, table); - let p2 = Predicate::arbitrary_from(rng, table); + let p1 = Predicate::arbitrary_from(rng, env, table); + let p2 = Predicate::arbitrary_from(rng, env, table); // Create the select query let select = Select::single( @@ -1387,7 +1424,7 @@ fn property_fsync_no_wait( remaining: &Remaining, ) -> Property { Property::FsyncNoWait { - query: Query::arbitrary_from(rng, (env, remaining)), + query: Query::arbitrary_from(rng, env, remaining), tables: env.tables.iter().map(|t| t.name.clone()).collect(), } } @@ -1398,108 +1435,111 @@ fn property_faulty_query( remaining: &Remaining, ) -> Property { Property::FaultyQuery { - query: Query::arbitrary_from(rng, (env, remaining)), + query: Query::arbitrary_from(rng, env, remaining), tables: env.tables.iter().map(|t| t.name.clone()).collect(), } } impl ArbitraryFrom<(&SimulatorEnv, &InteractionStats)> for Property { - fn arbitrary_from( + fn arbitrary_from( rng: &mut R, + context: &C, (env, stats): (&SimulatorEnv, &InteractionStats), ) -> Self { - let remaining_ = remaining(env, stats); + let opts = context.opts(); + let remaining_ = remaining(env.opts.max_interactions, &env.profile.query, stats); frequency( vec![ ( if !env.opts.disable_insert_values_select { - f64::min(remaining_.read, remaining_.write) + u32::min(remaining_.select, remaining_.insert) } else { - 0.0 + 0 }, Box::new(|rng: &mut R| property_insert_values_select(rng, env, &remaining_)), ), ( - remaining_.read, + remaining_.select, Box::new(|rng: &mut R| property_table_has_expected_content(rng, env)), ), ( - f64::min(remaining_.read, remaining_.write), + u32::min(remaining_.select, remaining_.insert), Box::new(|rng: &mut R| property_read_your_updates_back(rng, env)), ), ( if !env.opts.disable_double_create_failure { - remaining_.create / 2.0 + remaining_.create / 2 } else { - 0.0 + 0 }, Box::new(|rng: &mut R| property_double_create_failure(rng, env, &remaining_)), ), ( if !env.opts.disable_select_limit { - remaining_.read + remaining_.select } else { - 0.0 + 0 }, Box::new(|rng: &mut R| property_select_limit(rng, env)), ), ( if !env.opts.disable_delete_select { - f64::min(remaining_.read, remaining_.write).min(remaining_.delete) + u32::min(remaining_.select, remaining_.insert).min(remaining_.delete) } else { - 0.0 + 0 }, Box::new(|rng: &mut R| property_delete_select(rng, env, &remaining_)), ), ( if !env.opts.disable_drop_select { // remaining_.drop - 0.0 + 0 } else { - 0.0 + 0 }, Box::new(|rng: &mut R| property_drop_select(rng, env, &remaining_)), ), ( if !env.opts.disable_select_optimizer { - remaining_.read / 2.0 + remaining_.select / 2 } else { - 0.0 + 0 }, Box::new(|rng: &mut R| property_select_select_optimizer(rng, env)), ), ( - if env.opts.experimental_indexes && !env.opts.disable_where_true_false_null { - remaining_.read / 2.0 + if opts.indexes && !env.opts.disable_where_true_false_null { + remaining_.select / 2 } else { - 0.0 + 0 }, Box::new(|rng: &mut R| property_where_true_false_null(rng, env)), ), ( - if env.opts.experimental_indexes - && !env.opts.disable_union_all_preserves_cardinality - { - remaining_.read / 3.0 + if opts.indexes && !env.opts.disable_union_all_preserves_cardinality { + remaining_.select / 3 } else { - 0.0 + 0 }, Box::new(|rng: &mut R| property_union_all_preserves_cardinality(rng, env)), ), ( - if !env.opts.disable_fsync_no_wait { - 50.0 // Freestyle number + if env.profile.io.enable && !env.opts.disable_fsync_no_wait { + 50 // Freestyle number } else { - 0.0 + 0 }, Box::new(|rng: &mut R| property_fsync_no_wait(rng, env, &remaining_)), ), ( - if !env.opts.disable_faulty_query { - 20.0 + if env.profile.io.enable + && env.profile.io.fault.enable + && !env.opts.disable_faulty_query + { + 20 } else { - 0.0 + 0 }, Box::new(|rng: &mut R| property_faulty_query(rng, env, &remaining_)), ), diff --git a/simulator/generation/query.rs b/simulator/generation/query.rs index bb1344c2a..72541c4d7 100644 --- a/simulator/generation/query.rs +++ b/simulator/generation/query.rs @@ -1,35 +1,39 @@ -use crate::{model::Query, SimulatorEnv}; +use crate::model::Query; use rand::Rng; use sql_generation::{ - generation::{frequency, Arbitrary, ArbitraryFrom}, - model::query::{update::Update, Create, Delete, Insert, Select}, + generation::{Arbitrary, ArbitraryFrom, GenerationContext, frequency}, + model::query::{Create, Delete, Insert, Select, update::Update}, }; use super::property::Remaining; -impl ArbitraryFrom<(&SimulatorEnv, &Remaining)> for Query { - fn arbitrary_from(rng: &mut R, (env, remaining): (&SimulatorEnv, &Remaining)) -> Self { +impl ArbitraryFrom<&Remaining> for Query { + fn arbitrary_from( + rng: &mut R, + context: &C, + remaining: &Remaining, + ) -> Self { frequency( vec![ ( remaining.create, - Box::new(|rng| Self::Create(Create::arbitrary(rng))), + Box::new(|rng| Self::Create(Create::arbitrary(rng, context))), ), ( - remaining.read, - Box::new(|rng| Self::Select(Select::arbitrary_from(rng, env))), + remaining.select, + Box::new(|rng| Self::Select(Select::arbitrary(rng, context))), ), ( - remaining.write, - Box::new(|rng| Self::Insert(Insert::arbitrary_from(rng, env))), + remaining.insert, + Box::new(|rng| Self::Insert(Insert::arbitrary(rng, context))), ), ( remaining.update, - Box::new(|rng| Self::Update(Update::arbitrary_from(rng, env))), + Box::new(|rng| Self::Update(Update::arbitrary(rng, context))), ), ( - f64::min(remaining.write, remaining.delete), - Box::new(|rng| Self::Delete(Delete::arbitrary_from(rng, env))), + remaining.insert.min(remaining.delete), + Box::new(|rng| Self::Delete(Delete::arbitrary(rng, context))), ), ], rng, diff --git a/simulator/main.rs b/simulator/main.rs index ccf8977ae..4cd9557b0 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -8,25 +8,26 @@ use rand::prelude::*; use runner::bugbase::{Bug, BugBase, LoadedBug}; use runner::cli::{SimulatorCLI, SimulatorCommand}; use runner::env::SimulatorEnv; -use runner::execution::{execute_plans, Execution, ExecutionHistory, ExecutionResult}; +use runner::execution::{Execution, ExecutionHistory, ExecutionResult, execute_plans}; use runner::{differential, watch}; -use sql_generation::generation::ArbitraryFrom; use std::any::Any; use std::backtrace::Backtrace; use std::fs::OpenOptions; use std::io::{IsTerminal, Write}; use std::path::Path; -use std::sync::{mpsc, Arc, Mutex}; +use std::sync::{Arc, Mutex, mpsc}; +use tracing_subscriber::EnvFilter; use tracing_subscriber::field::MakeExt; use tracing_subscriber::fmt::format; -use tracing_subscriber::EnvFilter; use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; +use crate::profiles::Profile; use crate::runner::doublecheck; use crate::runner::env::{Paths, SimulationPhase, SimulationType}; mod generation; mod model; +mod profiles; mod runner; mod shrink; @@ -35,80 +36,89 @@ fn main() -> anyhow::Result<()> { let mut cli_opts = SimulatorCLI::parse(); cli_opts.validate()?; - match cli_opts.subcommand { - Some(SimulatorCommand::List) => { - let mut bugbase = BugBase::load()?; - bugbase.list_bugs() - } - Some(SimulatorCommand::Loop { n, short_circuit }) => { - banner(); - for i in 0..n { - println!("iteration {i}"); - let result = testing_main(&cli_opts); - if result.is_err() && short_circuit { - println!("short circuiting after {i} iterations"); - return result; - } else if result.is_err() { - println!("iteration {i} failed"); - } else { - println!("iteration {i} succeeded"); - } + let profile = Profile::parse_from_type(cli_opts.profile.clone())?; + tracing::debug!(sim_profile = ?profile); + + if let Some(ref command) = cli_opts.subcommand { + match command { + SimulatorCommand::List => { + let mut bugbase = BugBase::load()?; + bugbase.list_bugs() + } + SimulatorCommand::Loop { n, short_circuit } => { + banner(); + for i in 0..*n { + println!("iteration {i}"); + let result = testing_main(&cli_opts, &profile); + if result.is_err() && *short_circuit { + println!("short circuiting after {i} iterations"); + return result; + } else if result.is_err() { + println!("iteration {i} failed"); + } else { + println!("iteration {i} succeeded"); + } + } + Ok(()) + } + SimulatorCommand::Test { filter } => { + let mut bugbase = BugBase::load()?; + let bugs = bugbase.load_bugs()?; + let mut bugs = bugs + .into_iter() + .flat_map(|bug| { + let runs = bug + .runs + .into_iter() + .filter_map(|run| run.error.clone().map(|_| run)) + .filter(|run| run.error.as_ref().unwrap().contains(filter)) + .map(|run| run.cli_options) + .collect::>(); + + runs.into_iter() + .map(|mut cli_opts| { + cli_opts.seed = Some(bug.seed); + cli_opts.load = None; + cli_opts + }) + .collect::>() + }) + .collect::>(); + + bugs.sort(); + bugs.dedup_by(|a, b| a == b); + + println!( + "found {} previously triggered configurations with {}", + bugs.len(), + filter + ); + + let results = bugs + .into_iter() + .map(|cli_opts| testing_main(&cli_opts, &profile)) + .collect::>(); + + let (successes, failures): (Vec<_>, Vec<_>) = + results.into_iter().partition(|result| result.is_ok()); + println!("the results of the change are:"); + println!("\t{} successful runs", successes.len()); + println!("\t{} failed runs", failures.len()); + Ok(()) + } + SimulatorCommand::PrintSchema => { + let schema = schemars::schema_for!(crate::Profile); + println!("{}", serde_json::to_string_pretty(&schema).unwrap()); + Ok(()) } - Ok(()) - } - Some(SimulatorCommand::Test { filter }) => { - let mut bugbase = BugBase::load()?; - let bugs = bugbase.load_bugs()?; - let mut bugs = bugs - .into_iter() - .flat_map(|bug| { - let runs = bug - .runs - .into_iter() - .filter_map(|run| run.error.clone().map(|_| run)) - .filter(|run| run.error.as_ref().unwrap().contains(&filter)) - .map(|run| run.cli_options) - .collect::>(); - - runs.into_iter() - .map(|mut cli_opts| { - cli_opts.seed = Some(bug.seed); - cli_opts.load = None; - cli_opts - }) - .collect::>() - }) - .collect::>(); - - bugs.sort(); - bugs.dedup_by(|a, b| a == b); - - println!( - "found {} previously triggered configurations with {}", - bugs.len(), - filter - ); - - let results = bugs - .into_iter() - .map(|cli_opts| testing_main(&cli_opts)) - .collect::>(); - - let (successes, failures): (Vec<_>, Vec<_>) = - results.into_iter().partition(|result| result.is_ok()); - println!("the results of the change are:"); - println!("\t{} successful runs", successes.len()); - println!("\t{} failed runs", failures.len()); - Ok(()) - } - None => { - banner(); - testing_main(&cli_opts) } + } else { + banner(); + testing_main(&cli_opts, &profile) } } -fn testing_main(cli_opts: &SimulatorCLI) -> anyhow::Result<()> { +fn testing_main(cli_opts: &SimulatorCLI, profile: &Profile) -> anyhow::Result<()> { let mut bugbase = if cli_opts.disable_bugbase { None } else { @@ -116,7 +126,7 @@ fn testing_main(cli_opts: &SimulatorCLI) -> anyhow::Result<()> { Some(BugBase::load()?) }; - let (seed, mut env, plans) = setup_simulation(bugbase.as_mut(), cli_opts); + let (seed, mut env, plans) = setup_simulation(bugbase.as_mut(), cli_opts, profile); if cli_opts.watch { watch_mode(env).unwrap(); @@ -471,6 +481,7 @@ impl SandboxedResult { fn setup_simulation( bugbase: Option<&mut BugBase>, cli_opts: &SimulatorCLI, + profile: &Profile, ) -> (u64, SimulatorEnv, Vec) { if let Some(seed) = &cli_opts.load { let seed = seed.parse::().expect("seed should be a number"); @@ -484,7 +495,13 @@ fn setup_simulation( if !paths.base.exists() { std::fs::create_dir_all(&paths.base).unwrap(); } - let env = SimulatorEnv::new(bug.seed(), cli_opts, paths, SimulationType::Default); + let env = SimulatorEnv::new( + bug.seed(), + cli_opts, + paths, + SimulationType::Default, + profile, + ); let plan = match bug { Bug::Loaded(LoadedBug { plan, .. }) => plan.clone(), @@ -528,12 +545,12 @@ fn setup_simulation( Paths::new(&dir) }; - let mut env = SimulatorEnv::new(seed, cli_opts, paths, SimulationType::Default); + let mut env = SimulatorEnv::new(seed, cli_opts, paths, SimulationType::Default, profile); tracing::info!("Generating database interaction plan..."); let plans = (1..=env.opts.max_connections) - .map(|_| InteractionPlan::arbitrary_from(&mut env.rng.clone(), &mut env)) + .map(|_| InteractionPlan::generate_plan(&mut env.rng.clone(), &mut env)) .collect::>(); // todo: for now, we only use 1 connection, so it's safe to use the first plan. diff --git a/simulator/model/mod.rs b/simulator/model/mod.rs index ce249baf5..73863f725 100644 --- a/simulator/model/mod.rs +++ b/simulator/model/mod.rs @@ -5,14 +5,14 @@ use itertools::Itertools; use serde::{Deserialize, Serialize}; use sql_generation::model::{ query::{ + Create, CreateIndex, Delete, Drop, EmptyContext, Insert, Select, select::{CompoundOperator, FromClause, ResultColumn, SelectInner}, transaction::{Begin, Commit, Rollback}, update::Update, - Create, CreateIndex, Delete, Drop, EmptyContext, Insert, Select, }, table::{JoinTable, JoinType, SimValue, Table, TableContext}, }; -use turso_parser::ast::{fmt::ToTokens, Distinctness}; +use turso_parser::ast::{Distinctness, fmt::ToTokens}; use crate::{generation::Shadow, runner::env::SimulatorTables}; @@ -282,10 +282,11 @@ impl Shadow for SelectInner { Ok(join_table) } else { - assert!(self - .columns - .iter() - .all(|col| matches!(col, ResultColumn::Expr(_)))); + assert!( + self.columns + .iter() + .all(|col| matches!(col, ResultColumn::Expr(_))) + ); // If `WHERE` is false, just return an empty table if !self.where_clause.test(&[], &Table::anonymous(vec![])) { diff --git a/simulator/profiles/io.rs b/simulator/profiles/io.rs new file mode 100644 index 000000000..4bf44683a --- /dev/null +++ b/simulator/profiles/io.rs @@ -0,0 +1,79 @@ +use garde::Validate; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +use super::{max_dependent, min_dependent}; + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Validate)] +#[serde(deny_unknown_fields, default)] +pub struct IOProfile { + #[garde(skip)] + pub enable: bool, + #[garde(dive)] + pub latency: LatencyProfile, + #[garde(dive)] + pub fault: FaultProfile, + // TODO: expand here with header corruption options and faults on specific IO operations +} + +impl Default for IOProfile { + fn default() -> Self { + Self { + enable: true, + latency: Default::default(), + fault: Default::default(), + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Validate)] +#[serde(deny_unknown_fields, default)] +pub struct LatencyProfile { + #[garde(skip)] + pub enable: bool, + #[garde(range(min = 0, max = 100))] + /// Added IO latency probability + pub latency_probability: usize, + #[garde(custom(max_dependent(&self.max_tick)))] + /// Minimum tick time in microseconds for simulated time + pub min_tick: u64, + #[garde(custom(min_dependent(&self.min_tick)))] + /// Maximum tick time in microseconds for simulated time + pub max_tick: u64, +} + +impl Default for LatencyProfile { + fn default() -> Self { + Self { + enable: true, + latency_probability: 1, + min_tick: 1, + max_tick: 30, + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Validate)] +#[serde(deny_unknown_fields, default)] +pub struct FaultProfile { + #[garde(skip)] + pub enable: bool, + // TODO: modify SimIo impls to have a FaultProfile inside so they can skip faults depending on the profile + #[garde(skip)] + pub read: bool, + #[garde(skip)] + pub write: bool, + #[garde(skip)] + pub sync: bool, +} + +impl Default for FaultProfile { + fn default() -> Self { + Self { + enable: true, + read: true, + write: true, + sync: true, + } + } +} diff --git a/simulator/profiles/mod.rs b/simulator/profiles/mod.rs new file mode 100644 index 000000000..633bb3750 --- /dev/null +++ b/simulator/profiles/mod.rs @@ -0,0 +1,192 @@ +use std::{ + fmt::Display, + fs, + num::NonZeroU32, + path::{Path, PathBuf}, + str::FromStr, +}; + +use anyhow::Context; +use garde::Validate; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use sql_generation::generation::{InsertOpts, LargeTableOpts, Opts, QueryOpts, TableOpts}; +use strum::EnumString; + +use crate::profiles::{ + io::{FaultProfile, IOProfile}, + query::QueryProfile, +}; + +pub mod io; +pub mod query; + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Validate)] +#[serde(default)] +#[schemars(deny_unknown_fields)] +pub struct Profile { + #[garde(skip)] + /// Experimental MVCC feature + pub experimental_mvcc: bool, + #[garde(dive)] + pub io: IOProfile, + #[garde(dive)] + pub query: QueryProfile, +} + +#[allow(clippy::derivable_impls)] +impl Default for Profile { + fn default() -> Self { + Self { + experimental_mvcc: false, + io: Default::default(), + query: Default::default(), + } + } +} + +impl Profile { + pub fn write_heavy() -> Self { + let profile = Profile { + query: QueryProfile { + gen_opts: Opts { + // TODO: in the future tweak blob size for bigger inserts + // TODO: increase number of rows as well + table: TableOpts { + large_table: LargeTableOpts { + large_table_prob: 0.4, + ..Default::default() + }, + ..Default::default() + }, + query: QueryOpts { + insert: InsertOpts { + min_rows: NonZeroU32::new(5).unwrap(), + max_rows: NonZeroU32::new(11).unwrap(), + }, + ..Default::default() + }, + ..Default::default() + }, + select_weight: 30, + insert_weight: 70, + delete_weight: 0, + update_weight: 0, + ..Default::default() + }, + ..Default::default() + }; + + // Validate that we as the developer are not creating an incorrect default profile + profile.validate().unwrap(); + profile + } + + pub fn faultless() -> Self { + let profile = Profile { + io: IOProfile { + fault: FaultProfile { + enable: false, + ..Default::default() + }, + ..Default::default() + }, + query: QueryProfile { + create_table_weight: 0, + create_index_weight: 0, + ..Default::default() + }, + ..Default::default() + }; + + // Validate that we as the developer are not creating an incorrect default profile + profile.validate().unwrap(); + profile + } + + pub fn parse_from_type(profile_type: ProfileType) -> anyhow::Result { + let profile = match profile_type { + ProfileType::Default => Self::default(), + ProfileType::WriteHeavy => Self::write_heavy(), + ProfileType::Faultless => Self::faultless(), + ProfileType::Custom(path) => { + Self::parse(path).with_context(|| "failed to parse JSON profile")? + } + }; + Ok(profile) + } + + // TODO: in the future handle extension and composability of profiles here + pub fn parse(path: impl AsRef) -> anyhow::Result { + let contents = fs::read_to_string(path)?; + // use json5 so we can support comments and trailing commas + let profile: Profile = json5::from_str(&contents)?; + profile.validate()?; + Ok(profile) + } +} + +#[derive( + Debug, + Default, + Clone, + Serialize, + Deserialize, + EnumString, + PartialEq, + Eq, + PartialOrd, + Ord, + strum::Display, + strum::VariantNames, +)] +#[serde(rename_all = "snake_case")] +#[strum(ascii_case_insensitive, serialize_all = "snake_case")] +pub enum ProfileType { + #[default] + Default, + WriteHeavy, + Faultless, + #[strum(disabled)] + Custom(PathBuf), +} + +impl ProfileType { + pub fn parse(s: &str) -> anyhow::Result { + if let Ok(prof) = ProfileType::from_str(s) { + Ok(prof) + } else if let path = PathBuf::from(s) + && path.exists() + { + Ok(ProfileType::Custom(path)) + } else { + Err(anyhow::anyhow!( + "failed identifying predifined profile or custom profile path" + )) + } + } +} + +/// Minimum value of field is dependent on another field in the struct +fn min_dependent(min: &T) -> impl FnOnce(&T, &()) -> garde::Result + '_ { + move |value, _| { + if value < min { + return Err(garde::Error::new(format!( + "`{value}` is smaller than `{min}`" + ))); + } + Ok(()) + } +} + +/// Maximum value of field is dependent on another field in the struct +fn max_dependent(max: &T) -> impl FnOnce(&T, &()) -> garde::Result + '_ { + move |value, _| { + if value > max { + return Err(garde::Error::new(format!( + "`{value}` is bigger than `{max}`" + ))); + } + Ok(()) + } +} diff --git a/simulator/profiles/query.rs b/simulator/profiles/query.rs new file mode 100644 index 000000000..a58c983e0 --- /dev/null +++ b/simulator/profiles/query.rs @@ -0,0 +1,50 @@ +use garde::Validate; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use sql_generation::generation::Opts; + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Validate)] +#[serde(deny_unknown_fields, default)] +pub struct QueryProfile { + #[garde(dive)] + pub gen_opts: Opts, + #[garde(skip)] + pub select_weight: u32, + #[garde(skip)] + pub create_table_weight: u32, + #[garde(skip)] + pub create_index_weight: u32, + #[garde(skip)] + pub insert_weight: u32, + #[garde(skip)] + pub update_weight: u32, + #[garde(skip)] + pub delete_weight: u32, + #[garde(skip)] + pub drop_table_weight: u32, +} + +impl Default for QueryProfile { + fn default() -> Self { + Self { + gen_opts: Opts::default(), + select_weight: 60, + create_table_weight: 15, + create_index_weight: 5, + insert_weight: 30, + update_weight: 20, + delete_weight: 20, + drop_table_weight: 2, + } + } +} + +#[derive(Debug, Clone, strum::VariantArray)] +pub enum QueryTypes { + CreateTable, + CreateIndex, + Insert, + Update, + Delete, + DropTable, +} diff --git a/simulator/runner/bugbase.rs b/simulator/runner/bugbase.rs index 1a3be5889..179c292f1 100644 --- a/simulator/runner/bugbase.rs +++ b/simulator/runner/bugbase.rs @@ -6,7 +6,7 @@ use std::{ time::SystemTime, }; -use anyhow::{anyhow, Context}; +use anyhow::{Context, anyhow}; use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; diff --git a/simulator/runner/cli.rs b/simulator/runner/cli.rs index 1a79a121f..daa00be38 100644 --- a/simulator/runner/cli.rs +++ b/simulator/runner/cli.rs @@ -1,6 +1,13 @@ -use clap::{command, Parser}; +use clap::{ + Arg, Command, Error, Parser, + builder::{PossibleValue, TypedValueParser, ValueParserFactory}, + command, + error::{ContextKind, ContextValue, ErrorKind}, +}; use serde::{Deserialize, Serialize}; +use crate::profiles::ProfileType; + #[derive(Parser, Debug, Clone, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Ord)] #[command(name = "limbo-simulator")] #[command(author, version, about, long_about = None)] @@ -107,34 +114,25 @@ pub struct SimulatorCLI { pub disable_faulty_query: bool, #[clap(long, help = "disable Reopen-Database fault", default_value_t = false)] pub disable_reopen_database: bool, - #[clap( - long = "latency-prob", - help = "added IO latency probability", - default_value_t = 1 - )] - pub latency_probability: usize, - #[clap( - long, - help = "Minimum tick time in microseconds for simulated time", - default_value_t = 1 - )] - pub min_tick: u64, - #[clap( - long, - help = "Maximum tick time in microseconds for simulated time", - default_value_t = 30 - )] - pub max_tick: u64, + #[clap(long = "latency-prob", help = "added IO latency probability")] + pub latency_probability: Option, + #[clap(long, help = "Minimum tick time in microseconds for simulated time")] + pub min_tick: Option, + #[clap(long, help = "Maximum tick time in microseconds for simulated time")] + pub max_tick: Option, #[clap(long, help = "Enable experimental MVCC feature")] - pub experimental_mvcc: bool, + pub experimental_mvcc: Option, #[clap(long, help = "Disable experimental indexing feature")] - pub disable_experimental_indexes: bool, + pub disable_experimental_indexes: Option, #[clap( long, help = "Keep all database and plan files", default_value_t = false )] pub keep_files: bool, + #[clap(long, default_value_t = ProfileType::Default)] + /// Profile selector for Simulation run + pub profile: ProfileType, } #[derive(Parser, Debug, Clone, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Ord)] @@ -167,6 +165,8 @@ pub enum SimulatorCommand { )] filter: String, }, + /// Print profile Json Schema + PrintSchema, } impl SimulatorCLI { @@ -192,10 +192,10 @@ impl SimulatorCLI { anyhow::bail!("Cannot set seed and load plan at the same time"); } - if self.latency_probability > 100 { + if self.latency_probability.is_some_and(|prob| prob > 100) { anyhow::bail!( "latency probability must be a number between 0 and 100. Got `{}`", - self.latency_probability + self.latency_probability.unwrap() ); } @@ -206,3 +206,70 @@ impl SimulatorCLI { Ok(()) } } + +#[derive(Clone)] +pub struct ProfileTypeParser; + +impl TypedValueParser for ProfileTypeParser { + type Value = ProfileType; + + fn parse_ref( + &self, + cmd: &Command, + arg: Option<&Arg>, + value: &std::ffi::OsStr, + ) -> Result { + let s = value + .to_str() + .ok_or_else(|| Error::new(ErrorKind::InvalidUtf8).with_cmd(cmd))?; + + ProfileType::parse(s).map_err(|_| { + let mut err = Error::new(ErrorKind::InvalidValue).with_cmd(cmd); + if let Some(arg) = arg { + err.insert( + ContextKind::InvalidArg, + ContextValue::String(arg.to_string()), + ); + } + err.insert( + ContextKind::InvalidValue, + ContextValue::String(s.to_string()), + ); + err.insert( + ContextKind::ValidValue, + ContextValue::Strings( + self.possible_values() + .unwrap() + .map(|s| s.get_name().to_string()) + .collect(), + ), + ); + err + }) + } + + fn possible_values(&self) -> Option + '_>> { + use strum::VariantNames; + Some(Box::new( + Self::Value::VARIANTS + .iter() + .map(|variant| { + // Custom variant should be listed as a Custom path + if variant.eq_ignore_ascii_case("custom") { + "CUSTOM_PATH" + } else { + variant + } + }) + .map(PossibleValue::new), + )) + } +} + +impl ValueParserFactory for ProfileType { + type Parser = ProfileTypeParser; + + fn value_parser() -> Self::Parser { + ProfileTypeParser + } +} diff --git a/simulator/runner/differential.rs b/simulator/runner/differential.rs index 5723418c1..a91e02fa8 100644 --- a/simulator/runner/differential.rs +++ b/simulator/runner/differential.rs @@ -4,18 +4,18 @@ use sql_generation::{generation::pick_index, model::table::SimValue}; use turso_core::Value; use crate::{ + InteractionPlan, generation::{ - plan::{Interaction, InteractionPlanState, ResultSet}, Shadow as _, + plan::{Interaction, InteractionPlanState, ResultSet}, }, model::Query, runner::execution::ExecutionContinuation, - InteractionPlan, }; use super::{ env::{SimConnection, SimulatorEnv}, - execution::{execute_interaction, Execution, ExecutionHistory, ExecutionResult}, + execution::{Execution, ExecutionHistory, ExecutionResult, execute_interaction}, }; pub(crate) fn run_simulation( @@ -249,7 +249,9 @@ fn execute_plan( match (limbo_values, rusqlite_values) { (Ok(limbo_values), Ok(rusqlite_values)) => { if limbo_values != rusqlite_values { - tracing::error!("returned values from limbo and rusqlite results do not match"); + tracing::error!( + "returned values from limbo and rusqlite results do not match" + ); let diff = limbo_values .iter() .zip(rusqlite_values.iter()) @@ -303,7 +305,9 @@ fn execute_plan( tracing::warn!("rusqlite error {}", rusqlite_err); } (Ok(limbo_result), Err(rusqlite_err)) => { - tracing::error!("limbo and rusqlite results do not match, limbo returned values but rusqlite failed"); + tracing::error!( + "limbo and rusqlite results do not match, limbo returned values but rusqlite failed" + ); tracing::error!("limbo values {:?}", limbo_result); tracing::error!("rusqlite error {}", rusqlite_err); return Err(turso_core::LimboError::InternalError( @@ -311,7 +315,9 @@ fn execute_plan( )); } (Err(limbo_err), Ok(_)) => { - tracing::error!("limbo and rusqlite results do not match, limbo failed but rusqlite returned values"); + tracing::error!( + "limbo and rusqlite results do not match, limbo failed but rusqlite returned values" + ); tracing::error!("limbo error {}", limbo_err); return Err(turso_core::LimboError::InternalError( "limbo and rusqlite results do not match".into(), diff --git a/simulator/runner/doublecheck.rs b/simulator/runner/doublecheck.rs index 7c9d33b4e..2b1b5fccb 100644 --- a/simulator/runner/doublecheck.rs +++ b/simulator/runner/doublecheck.rs @@ -6,13 +6,13 @@ use std::{ use sql_generation::generation::pick_index; use crate::{ - generation::plan::InteractionPlanState, runner::execution::ExecutionContinuation, - InteractionPlan, + InteractionPlan, generation::plan::InteractionPlanState, + runner::execution::ExecutionContinuation, }; use super::{ env::{SimConnection, SimulatorEnv}, - execution::{execute_interaction, Execution, ExecutionHistory, ExecutionResult}, + execution::{Execution, ExecutionHistory, ExecutionResult, execute_interaction}, }; pub(crate) fn run_simulation( @@ -207,7 +207,9 @@ fn execute_plan( match (limbo_values, doublecheck_values) { (Ok(limbo_values), Ok(doublecheck_values)) => { if limbo_values != doublecheck_values { - tracing::error!("returned values from limbo and doublecheck results do not match"); + tracing::error!( + "returned values from limbo and doublecheck results do not match" + ); tracing::debug!("limbo values {:?}", limbo_values); tracing::debug!( "doublecheck values {:?}", @@ -231,7 +233,9 @@ fn execute_plan( } } (Ok(limbo_result), Err(doublecheck_err)) => { - tracing::error!("limbo and doublecheck results do not match, limbo returned values but doublecheck failed"); + tracing::error!( + "limbo and doublecheck results do not match, limbo returned values but doublecheck failed" + ); tracing::error!("limbo values {:?}", limbo_result); tracing::error!("doublecheck error {}", doublecheck_err); return Err(turso_core::LimboError::InternalError( @@ -239,7 +243,9 @@ fn execute_plan( )); } (Err(limbo_err), Ok(_)) => { - tracing::error!("limbo and doublecheck results do not match, limbo failed but doublecheck returned values"); + tracing::error!( + "limbo and doublecheck results do not match, limbo failed but doublecheck returned values" + ); tracing::error!("limbo error {}", limbo_err); return Err(turso_core::LimboError::InternalError( "limbo and doublecheck results do not match".into(), diff --git a/simulator/runner/env.rs b/simulator/runner/env.rs index a29adc591..567f2bad9 100644 --- a/simulator/runner/env.rs +++ b/simulator/runner/env.rs @@ -5,11 +5,13 @@ use std::panic::UnwindSafe; use std::path::{Path, PathBuf}; use std::sync::Arc; +use garde::Validate; use rand::{Rng, SeedableRng}; use rand_chacha::ChaCha8Rng; use sql_generation::model::table::Table; use turso_core::Database; +use crate::profiles::Profile; use crate::runner::io::SimulatorIO; use super::cli::SimulatorCLI; @@ -59,6 +61,7 @@ impl Deref for SimulatorTables { pub(crate) struct SimulatorEnv { pub(crate) opts: SimulatorOpts, + pub profile: Profile, pub(crate) connections: Vec, pub(crate) io: Arc, pub(crate) db: Option>, @@ -85,6 +88,7 @@ impl SimulatorEnv { paths: self.paths.clone(), type_: self.type_, phase: self.phase, + profile: self.profile.clone(), } } @@ -93,13 +97,15 @@ impl SimulatorEnv { self.connections.iter_mut().for_each(|c| c.disconnect()); self.rng = ChaCha8Rng::seed_from_u64(self.opts.seed); + let latency_prof = &self.profile.io.latency; + let io = Arc::new( SimulatorIO::new( self.opts.seed, self.opts.page_size, - self.opts.latency_probability, - self.opts.min_tick, - self.opts.max_tick, + latency_prof.latency_probability, + latency_prof.min_tick, + latency_prof.max_tick, ) .unwrap(), ); @@ -119,8 +125,8 @@ impl SimulatorEnv { let db = match Database::open_file( io.clone(), db_path.to_str().unwrap(), - self.opts.experimental_mvcc, - self.opts.experimental_indexes, + self.profile.experimental_mvcc, + self.profile.query.gen_opts.indexes, ) { Ok(db) => db, Err(e) => { @@ -161,6 +167,7 @@ impl SimulatorEnv { cli_opts: &SimulatorCLI, paths: Paths, simulation_type: SimulationType, + profile: &Profile, ) -> Self { let mut rng = ChaCha8Rng::seed_from_u64(seed); @@ -223,13 +230,6 @@ impl SimulatorEnv { max_connections: 1, // TODO: for now let's use one connection as we didn't implement // correct transactions processing max_tables: rng.random_range(0..128), - create_percent, - create_index_percent, - read_percent, - write_percent, - delete_percent, - drop_percent, - update_percent, disable_select_optimizer: cli_opts.disable_select_optimizer, disable_insert_values_select: cli_opts.disable_insert_values_select, disable_double_create_failure: cli_opts.disable_double_create_failure, @@ -242,27 +242,12 @@ impl SimulatorEnv { disable_fsync_no_wait: cli_opts.disable_fsync_no_wait, disable_faulty_query: cli_opts.disable_faulty_query, page_size: 4096, // TODO: randomize this too - max_interactions: rng.random_range(cli_opts.minimum_tests..=cli_opts.maximum_tests), + max_interactions: rng.random_range(cli_opts.minimum_tests..=cli_opts.maximum_tests) + as u32, max_time_simulation: cli_opts.maximum_time, disable_reopen_database: cli_opts.disable_reopen_database, - latency_probability: cli_opts.latency_probability, - experimental_mvcc: cli_opts.experimental_mvcc, - experimental_indexes: !cli_opts.disable_experimental_indexes, - min_tick: cli_opts.min_tick, - max_tick: cli_opts.max_tick, }; - let io = Arc::new( - SimulatorIO::new( - seed, - opts.page_size, - cli_opts.latency_probability, - cli_opts.min_tick, - cli_opts.max_tick, - ) - .unwrap(), - ); - // Remove existing database file if it exists let db_path = paths.db(&simulation_type, &SimulationPhase::Test); @@ -275,11 +260,44 @@ impl SimulatorEnv { std::fs::remove_file(&wal_path).unwrap(); } + let mut profile = profile.clone(); + // Conditionals here so that we can override some profile options from the CLI + if let Some(mvcc) = cli_opts.experimental_mvcc { + profile.experimental_mvcc = mvcc; + } + if let Some(indexes) = cli_opts.disable_experimental_indexes { + profile.query.gen_opts.indexes = indexes; + } + if let Some(latency_prob) = cli_opts.latency_probability { + profile.io.latency.latency_probability = latency_prob; + } + if let Some(max_tick) = cli_opts.max_tick { + profile.io.latency.max_tick = max_tick; + } + if let Some(min_tick) = cli_opts.min_tick { + profile.io.latency.min_tick = min_tick; + } + + profile.validate().unwrap(); + + let latency_prof = &profile.io.latency; + + let io = Arc::new( + SimulatorIO::new( + seed, + opts.page_size, + latency_prof.latency_probability, + latency_prof.min_tick, + latency_prof.max_tick, + ) + .unwrap(), + ); + let db = match Database::open_file( io.clone(), db_path.to_str().unwrap(), - opts.experimental_mvcc, - opts.experimental_indexes, + profile.experimental_mvcc, + profile.query.gen_opts.indexes, ) { Ok(db) => db, Err(e) => { @@ -301,6 +319,7 @@ impl SimulatorEnv { db: Some(db), type_: simulation_type, phase: SimulationPhase::Test, + profile: profile.clone(), } } @@ -394,15 +413,6 @@ pub(crate) struct SimulatorOpts { pub(crate) ticks: usize, pub(crate) max_connections: usize, pub(crate) max_tables: usize, - // this next options are the distribution of workload where read_percent + write_percent + - // delete_percent == 100% - pub(crate) create_percent: f64, - pub(crate) create_index_percent: f64, - pub(crate) read_percent: f64, - pub(crate) write_percent: f64, - pub(crate) delete_percent: f64, - pub(crate) update_percent: f64, - pub(crate) drop_percent: f64, pub(crate) disable_select_optimizer: bool, pub(crate) disable_insert_values_select: bool, @@ -416,14 +426,9 @@ pub(crate) struct SimulatorOpts { pub(crate) disable_faulty_query: bool, pub(crate) disable_reopen_database: bool, - pub(crate) max_interactions: usize, + pub(crate) max_interactions: u32, pub(crate) page_size: usize, pub(crate) max_time_simulation: usize, - pub(crate) latency_probability: usize, - pub(crate) experimental_mvcc: bool, - pub(crate) experimental_indexes: bool, - pub min_tick: u64, - pub max_tick: u64, } #[derive(Debug, Clone)] diff --git a/simulator/runner/execution.rs b/simulator/runner/execution.rs index fa3dcbff9..a7d7aa3d6 100644 --- a/simulator/runner/execution.rs +++ b/simulator/runner/execution.rs @@ -5,8 +5,8 @@ use tracing::instrument; use turso_core::{Connection, LimboError, Result, StepResult}; use crate::generation::{ - plan::{Interaction, InteractionPlan, InteractionPlanState, ResultSet}, Shadow as _, + plan::{Interaction, InteractionPlan, InteractionPlanState, ResultSet}, }; use super::env::{SimConnection, SimulatorEnv}; diff --git a/simulator/runner/file.rs b/simulator/runner/file.rs index bbda05b1d..0d644dc28 100644 --- a/simulator/runner/file.rs +++ b/simulator/runner/file.rs @@ -6,10 +6,10 @@ use std::{ use rand::Rng as _; use rand_chacha::ChaCha8Rng; -use tracing::{instrument, Level}; +use tracing::{Level, instrument}; use turso_core::{File, Result}; -use crate::runner::{clock::SimulatorClock, FAULT_ERROR_MSG}; +use crate::runner::{FAULT_ERROR_MSG, clock::SimulatorClock}; pub(crate) struct SimulatorFile { pub path: String, pub(crate) inner: Arc, @@ -201,7 +201,9 @@ impl File for SimulatorFile { self.nr_sync_calls.set(self.nr_sync_calls.get() + 1); if self.fault.get() { // TODO: Enable this when https://github.com/tursodatabase/turso/issues/2091 is fixed. - tracing::debug!("ignoring sync fault because it causes false positives with current simulator design"); + tracing::debug!( + "ignoring sync fault because it causes false positives with current simulator design" + ); self.fault.set(false); } let c = if let Some(latency) = self.generate_latency_duration() { diff --git a/simulator/runner/io.rs b/simulator/runner/io.rs index c6b6bdbc3..fcc23be75 100644 --- a/simulator/runner/io.rs +++ b/simulator/runner/io.rs @@ -5,7 +5,7 @@ use std::{ use rand::{RngCore, SeedableRng}; use rand_chacha::ChaCha8Rng; -use turso_core::{Clock, Instant, OpenFlags, PlatformIO, Result, IO}; +use turso_core::{Clock, IO, Instant, OpenFlags, PlatformIO, Result}; use crate::runner::{clock::SimulatorClock, file::SimulatorFile}; diff --git a/simulator/runner/watch.rs b/simulator/runner/watch.rs index 95d65ad64..3f2615543 100644 --- a/simulator/runner/watch.rs +++ b/simulator/runner/watch.rs @@ -10,7 +10,7 @@ use crate::{ use super::{ env::{SimConnection, SimulatorEnv}, - execution::{execute_interaction, Execution, ExecutionHistory, ExecutionResult}, + execution::{Execution, ExecutionHistory, ExecutionResult, execute_interaction}, }; pub(crate) fn run_simulation( diff --git a/simulator/shrink/plan.rs b/simulator/shrink/plan.rs index bccd07afd..7def800ce 100644 --- a/simulator/shrink/plan.rs +++ b/simulator/shrink/plan.rs @@ -1,4 +1,5 @@ use crate::{ + SandboxedResult, SimulatorEnv, generation::{ plan::{Interaction, InteractionPlan, Interactions}, property::Property, @@ -6,7 +7,6 @@ use crate::{ model::Query, run_simulation, runner::execution::Execution, - SandboxedResult, SimulatorEnv, }; use std::sync::{Arc, Mutex}; diff --git a/sql_generation/Cargo.toml b/sql_generation/Cargo.toml index d84d08380..cf82bb036 100644 --- a/sql_generation/Cargo.toml +++ b/sql_generation/Cargo.toml @@ -19,6 +19,8 @@ anarchist-readable-name-generator-lib = "0.2.0" itertools = { workspace = true } anyhow = { workspace = true } tracing = { workspace = true } +schemars = { workspace = true } +garde = { workspace = true, features = ["derive", "serde"] } [dev-dependencies] rand_chacha = "0.9.0" diff --git a/sql_generation/generation/expr.rs b/sql_generation/generation/expr.rs index c07d81414..244bf6469 100644 --- a/sql_generation/generation/expr.rs +++ b/sql_generation/generation/expr.rs @@ -5,7 +5,7 @@ use turso_parser::ast::{ use crate::{ generation::{ frequency, gen_random_text, one_of, pick, pick_index, Arbitrary, ArbitraryFrom, - ArbitrarySizedFrom, GenerationContext, + ArbitrarySized, ArbitrarySizedFrom, GenerationContext, }, model::table::SimValue, }; @@ -14,8 +14,21 @@ impl Arbitrary for Box where T: Arbitrary, { - fn arbitrary(rng: &mut R) -> Self { - Box::from(T::arbitrary(rng)) + fn arbitrary(rng: &mut R, context: &C) -> Self { + Box::from(T::arbitrary(rng, context)) + } +} + +impl ArbitrarySized for Box +where + T: ArbitrarySized, +{ + fn arbitrary_sized( + rng: &mut R, + context: &C, + size: usize, + ) -> Self { + Box::from(T::arbitrary_sized(rng, context, size)) } } @@ -23,8 +36,13 @@ impl ArbitrarySizedFrom for Box where T: ArbitrarySizedFrom, { - fn arbitrary_sized_from(rng: &mut R, t: A, size: usize) -> Self { - Box::from(T::arbitrary_sized_from(rng, t, size)) + fn arbitrary_sized_from( + rng: &mut R, + context: &C, + t: A, + size: usize, + ) -> Self { + Box::from(T::arbitrary_sized_from(rng, context, t, size)) } } @@ -32,8 +50,8 @@ impl Arbitrary for Option where T: Arbitrary, { - fn arbitrary(rng: &mut R) -> Self { - rng.random_bool(0.5).then_some(T::arbitrary(rng)) + fn arbitrary(rng: &mut R, context: &C) -> Self { + rng.random_bool(0.5).then_some(T::arbitrary(rng, context)) } } @@ -41,9 +59,14 @@ impl ArbitrarySizedFrom for Option where T: ArbitrarySizedFrom, { - fn arbitrary_sized_from(rng: &mut R, t: A, size: usize) -> Self { + fn arbitrary_sized_from( + rng: &mut R, + context: &C, + t: A, + size: usize, + ) -> Self { rng.random_bool(0.5) - .then_some(T::arbitrary_sized_from(rng, t, size)) + .then_some(T::arbitrary_sized_from(rng, context, t, size)) } } @@ -51,20 +74,26 @@ impl ArbitraryFrom for Vec where T: ArbitraryFrom, { - fn arbitrary_from(rng: &mut R, t: A) -> Self { + fn arbitrary_from(rng: &mut R, context: &C, t: A) -> Self { let size = rng.random_range(0..5); - (0..size).map(|_| T::arbitrary_from(rng, t)).collect() + (0..size) + .map(|_| T::arbitrary_from(rng, context, t)) + .collect() } } // Freestyling generation -impl ArbitrarySizedFrom<&C> for Expr { - fn arbitrary_sized_from(rng: &mut R, t: &C, size: usize) -> Self { +impl ArbitrarySized for Expr { + fn arbitrary_sized( + rng: &mut R, + context: &C, + size: usize, + ) -> Self { frequency( vec![ ( 1, - Box::new(|rng| Expr::Literal(ast::Literal::arbitrary_from(rng, t))), + Box::new(|rng| Expr::Literal(ast::Literal::arbitrary(rng, context))), ), ( size, @@ -79,9 +108,9 @@ impl ArbitrarySizedFrom<&C> for Expr { // }), Box::new(|rng: &mut R| { Expr::Binary( - Box::arbitrary_sized_from(rng, t, size - 1), - Operator::arbitrary(rng), - Box::arbitrary_sized_from(rng, t, size - 1), + Box::arbitrary_sized(rng, context, size - 1), + Operator::arbitrary(rng, context), + Box::arbitrary_sized(rng, context, size - 1), ) }), // Box::new(|rng| Expr::Case { @@ -133,8 +162,8 @@ impl ArbitrarySizedFrom<&C> for Expr { // }) Box::new(|rng| { Expr::Unary( - UnaryOperator::arbitrary_from(rng, t), - Box::arbitrary_sized_from(rng, t, size - 1), + UnaryOperator::arbitrary(rng, context), + Box::arbitrary_sized(rng, context, size - 1), ) }), // TODO: skip Exists for now @@ -159,7 +188,7 @@ impl ArbitrarySizedFrom<&C> for Expr { } impl Arbitrary for Operator { - fn arbitrary(rng: &mut R) -> Self { + fn arbitrary(rng: &mut R, _context: &C) -> Self { let choices = [ Operator::Add, Operator::And, @@ -190,7 +219,7 @@ impl Arbitrary for Operator { } impl Arbitrary for Type { - fn arbitrary(rng: &mut R) -> Self { + fn arbitrary(rng: &mut R, _context: &C) -> Self { let name = pick(&["INT", "INTEGER", "REAL", "TEXT", "BLOB", "ANY"], rng).to_string(); Self { name, @@ -199,11 +228,11 @@ impl Arbitrary for Type { } } -impl ArbitraryFrom<&C> for QualifiedName { - fn arbitrary_from(rng: &mut R, t: &C) -> Self { +impl Arbitrary for QualifiedName { + fn arbitrary(rng: &mut R, context: &C) -> Self { // TODO: for now just generate table name - let table_idx = pick_index(t.tables().len(), rng); - let table = &t.tables()[table_idx]; + let table_idx = pick_index(context.tables().len(), rng); + let table = &context.tables()[table_idx]; // TODO: for now forego alias Self { db_name: None, @@ -213,8 +242,8 @@ impl ArbitraryFrom<&C> for QualifiedName { } } -impl ArbitraryFrom<&C> for LikeOperator { - fn arbitrary_from(rng: &mut R, _t: &C) -> Self { +impl Arbitrary for LikeOperator { + fn arbitrary(rng: &mut R, _t: &C) -> Self { let choice = rng.random_range(0..4); match choice { 0 => LikeOperator::Glob, @@ -227,8 +256,8 @@ impl ArbitraryFrom<&C> for LikeOperator { } // Current implementation does not take into account the columns affinity nor if table is Strict -impl ArbitraryFrom<&C> for ast::Literal { - fn arbitrary_from(rng: &mut R, _t: &C) -> Self { +impl Arbitrary for ast::Literal { + fn arbitrary(rng: &mut R, _t: &C) -> Self { loop { let choice = rng.random_range(0..5); let lit = match choice { @@ -255,7 +284,11 @@ impl ArbitraryFrom<&C> for ast::Literal { // Creates a litreal value impl ArbitraryFrom<&Vec<&SimValue>> for ast::Expr { - fn arbitrary_from(rng: &mut R, values: &Vec<&SimValue>) -> Self { + fn arbitrary_from( + rng: &mut R, + _context: &C, + values: &Vec<&SimValue>, + ) -> Self { if values.is_empty() { return Self::Literal(ast::Literal::Null); } @@ -265,8 +298,8 @@ impl ArbitraryFrom<&Vec<&SimValue>> for ast::Expr { } } -impl ArbitraryFrom<&C> for UnaryOperator { - fn arbitrary_from(rng: &mut R, _t: &C) -> Self { +impl Arbitrary for UnaryOperator { + fn arbitrary(rng: &mut R, _t: &C) -> Self { let choice = rng.random_range(0..4); match choice { 0 => Self::BitwiseNot, diff --git a/sql_generation/generation/mod.rs b/sql_generation/generation/mod.rs index 25bd7ec09..25f353673 100644 --- a/sql_generation/generation/mod.rs +++ b/sql_generation/generation/mod.rs @@ -3,24 +3,13 @@ use std::{iter::Sum, ops::SubAssign}; use anarchist_readable_name_generator_lib::readable_name_custom; use rand::{distr::uniform::SampleUniform, Rng}; -use crate::model::table::Table; - pub mod expr; +pub mod opts; pub mod predicate; pub mod query; pub mod table; -#[derive(Debug, Clone, Copy)] -pub struct Opts { - /// Indexes enabled - pub indexes: bool, -} - -/// Trait used to provide context to generation functions -pub trait GenerationContext { - fn tables(&self) -> &Vec; - fn opts(&self) -> Opts; -} +pub use opts::*; type ArbitraryFromFunc<'a, R, T> = Box T + 'a>; type Choice<'a, R, T> = (usize, Box Option + 'a>); @@ -30,7 +19,7 @@ type Choice<'a, R, T> = (usize, Box Option + 'a>); /// the possible values of the type, with a bias towards smaller values for /// practicality. pub trait Arbitrary { - fn arbitrary(rng: &mut R) -> Self; + fn arbitrary(rng: &mut R, context: &C) -> Self; } /// ArbitrarySized trait for generating random values of a specific size @@ -40,7 +29,8 @@ pub trait Arbitrary { /// must fit in the given size. This is useful for generating values that are /// constrained by a specific size, such as integers or strings. pub trait ArbitrarySized { - fn arbitrary_sized(rng: &mut R, size: usize) -> Self; + fn arbitrary_sized(rng: &mut R, context: &C, size: usize) + -> Self; } /// ArbitraryFrom trait for generating random values from a given value @@ -49,7 +39,7 @@ pub trait ArbitrarySized { /// such as generating an integer within an interval, or a value that fits in a table, /// or a predicate satisfying a given table row. pub trait ArbitraryFrom { - fn arbitrary_from(rng: &mut R, t: T) -> Self; + fn arbitrary_from(rng: &mut R, context: &C, t: T) -> Self; } /// ArbitrarySizedFrom trait for generating random values from a given value @@ -61,12 +51,21 @@ pub trait ArbitraryFrom { /// This is useful for generating values that are constrained by a specific size, /// such as integers or strings, while still being dependent on the given value. pub trait ArbitrarySizedFrom { - fn arbitrary_sized_from(rng: &mut R, t: T, size: usize) -> Self; + fn arbitrary_sized_from( + rng: &mut R, + context: &C, + t: T, + size: usize, + ) -> Self; } /// ArbitraryFromMaybe trait for fallibally generating random values from a given value pub trait ArbitraryFromMaybe { - fn arbitrary_from_maybe(rng: &mut R, t: T) -> Option + fn arbitrary_from_maybe( + rng: &mut R, + context: &C, + t: T, + ) -> Option where Self: Sized; } @@ -143,11 +142,15 @@ pub fn pick_index(choices: usize, rng: &mut R) -> usize { /// pick_n_unique is a helper function for uniformly picking N unique elements from a range. /// The elements themselves are usize, typically representing indices. -pub fn pick_n_unique(range: std::ops::Range, n: usize, rng: &mut R) -> Vec { +pub fn pick_n_unique( + range: std::ops::Range, + n: usize, + rng: &mut R, +) -> impl Iterator { use rand::seq::SliceRandom; let mut items: Vec = range.collect(); items.shuffle(rng); - items.into_iter().take(n).collect() + items.into_iter().take(n) } /// gen_random_text uses `anarchist_readable_name_generator_lib` to generate random @@ -169,20 +172,41 @@ pub fn gen_random_text(rng: &mut T) -> String { } } -pub fn pick_unique( - items: &[T], +pub fn pick_unique<'a, T: PartialEq>( + items: &'a [T], count: usize, rng: &mut impl rand::Rng, -) -> Vec -where - ::Owned: PartialEq, -{ - let mut picked: Vec = Vec::new(); +) -> impl Iterator { + let mut picked: Vec<&T> = Vec::new(); while picked.len() < count { let item = pick(items, rng); - if !picked.contains(&item.to_owned()) { - picked.push(item.to_owned()); + if !picked.contains(&item) { + picked.push(item); + } + } + picked.into_iter() +} + +#[cfg(test)] +mod tests { + use crate::{ + generation::{GenerationContext, Opts}, + model::table::Table, + }; + + #[derive(Debug, Default, Clone)] + pub struct TestContext { + pub opts: Opts, + pub tables: Vec
, + } + + impl GenerationContext for TestContext { + fn tables(&self) -> &Vec
{ + &self.tables + } + + fn opts(&self) -> &Opts { + &self.opts } } - picked } diff --git a/sql_generation/generation/opts.rs b/sql_generation/generation/opts.rs new file mode 100644 index 000000000..190033748 --- /dev/null +++ b/sql_generation/generation/opts.rs @@ -0,0 +1,238 @@ +use std::{ + fmt::Display, + num::{NonZero, NonZeroU32}, + ops::Range, +}; + +use garde::Validate; +use rand::distr::weighted::WeightedIndex; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +use crate::model::table::Table; + +/// Trait used to provide context to generation functions +pub trait GenerationContext { + fn tables(&self) -> &Vec
; + fn opts(&self) -> &Opts; +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Validate)] +#[serde(deny_unknown_fields, default)] +pub struct Opts { + #[garde(skip)] + /// Indexes enabled + pub indexes: bool, + #[garde(dive)] + pub table: TableOpts, + #[garde(dive)] + pub query: QueryOpts, +} + +impl Default for Opts { + fn default() -> Self { + Self { + indexes: true, + table: Default::default(), + query: Default::default(), + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Validate)] +#[serde(deny_unknown_fields, default)] +pub struct TableOpts { + #[garde(dive)] + pub large_table: LargeTableOpts, + /// Range of numbers of columns to generate + #[garde(custom(range_struct_min(1)))] + pub column_range: Range, +} + +impl Default for TableOpts { + fn default() -> Self { + Self { + large_table: Default::default(), + // Up to 10 columns + column_range: 1..11, + } + } +} + +/// Options for generating large tables +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Validate)] +#[serde(deny_unknown_fields, default)] +pub struct LargeTableOpts { + #[garde(skip)] + pub enable: bool, + #[garde(range(min = 0.0, max = 1.0))] + pub large_table_prob: f64, + + /// Range of numbers of columns to generate + #[garde(custom(range_struct_min(1)))] + pub column_range: Range, +} + +impl Default for LargeTableOpts { + fn default() -> Self { + Self { + enable: true, + large_table_prob: 0.1, + // todo: make this higher (128+) + column_range: 64..125, + } + } +} + +#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema, Validate)] +#[serde(deny_unknown_fields, default)] +pub struct QueryOpts { + #[garde(dive)] + pub select: SelectOpts, + #[garde(dive)] + pub from_clause: FromClauseOpts, + #[garde(dive)] + pub insert: InsertOpts, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Validate)] +#[serde(deny_unknown_fields, default)] +pub struct SelectOpts { + #[garde(range(min = 0.0, max = 1.0))] + pub order_by_prob: f64, + #[garde(length(min = 1))] + pub compound_selects: Vec, +} + +impl Default for SelectOpts { + fn default() -> Self { + Self { + order_by_prob: 0.3, + compound_selects: vec![ + CompoundSelectWeight { + num_compound_selects: 0, + weight: 95, + }, + CompoundSelectWeight { + num_compound_selects: 1, + weight: 4, + }, + CompoundSelectWeight { + num_compound_selects: 2, + weight: 1, + }, + ], + } + } +} + +impl SelectOpts { + pub fn compound_select_weighted_index(&self) -> WeightedIndex { + WeightedIndex::new(self.compound_selects.iter().map(|weight| weight.weight)).unwrap() + } +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Serialize, Deserialize, JsonSchema)] +#[serde(deny_unknown_fields)] +pub struct CompoundSelectWeight { + pub num_compound_selects: u32, + pub weight: u32, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Validate)] +#[serde(deny_unknown_fields)] +pub struct FromClauseOpts { + #[garde(length(min = 1))] + pub joins: Vec, +} + +impl Default for FromClauseOpts { + fn default() -> Self { + Self { + joins: vec![ + JoinWeight { + num_joins: 0, + weight: 90, + }, + JoinWeight { + num_joins: 1, + weight: 7, + }, + JoinWeight { + num_joins: 2, + weight: 3, + }, + ], + } + } +} + +impl FromClauseOpts { + pub fn as_weighted_index(&self) -> WeightedIndex { + WeightedIndex::new(self.joins.iter().map(|weight| weight.weight)).unwrap() + } +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Serialize, Deserialize, JsonSchema)] +#[serde(deny_unknown_fields)] +pub struct JoinWeight { + pub num_joins: u32, + pub weight: u32, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Validate)] +#[serde(deny_unknown_fields)] +pub struct InsertOpts { + #[garde(skip)] + pub min_rows: NonZeroU32, + #[garde(skip)] + pub max_rows: NonZeroU32, +} + +impl Default for InsertOpts { + fn default() -> Self { + Self { + min_rows: NonZero::new(1).unwrap(), + max_rows: NonZero::new(10).unwrap(), + } + } +} + +fn range_struct_min( + min: T, +) -> impl FnOnce(&Range, &()) -> garde::Result { + move |value, _| { + if value.start < min { + return Err(garde::Error::new(format!( + "range start `{}` is smaller than {min}", + value.start + ))); + } else if value.end < min { + return Err(garde::Error::new(format!( + "range end `{}` is smaller than {min}", + value.end + ))); + } + Ok(()) + } +} + +#[allow(dead_code)] +fn range_struct_max( + max: T, +) -> impl FnOnce(&Range, &()) -> garde::Result { + move |value, _| { + if value.start > max { + return Err(garde::Error::new(format!( + "range start `{}` is smaller than {max}", + value.start + ))); + } else if value.end > max { + return Err(garde::Error::new(format!( + "range end `{}` is smaller than {max}", + value.end + ))); + } + Ok(()) + } +} diff --git a/sql_generation/generation/predicate/binary.rs b/sql_generation/generation/predicate/binary.rs index 29c1727a9..a5901a9f8 100644 --- a/sql_generation/generation/predicate/binary.rs +++ b/sql_generation/generation/predicate/binary.rs @@ -7,7 +7,7 @@ use crate::{ backtrack, one_of, pick, predicate::{CompoundPredicate, SimplePredicate}, table::{GTValue, LTValue, LikeValue}, - ArbitraryFrom, ArbitraryFromMaybe as _, + ArbitraryFrom, ArbitraryFromMaybe as _, GenerationContext, }, model::{ query::predicate::Predicate, @@ -17,8 +17,9 @@ use crate::{ impl Predicate { /// Generate an [ast::Expr::Binary] [Predicate] from a column and [SimValue] - pub fn from_column_binary( + pub fn from_column_binary( rng: &mut R, + context: &C, column_name: &str, value: &SimValue, ) -> Predicate { @@ -32,7 +33,7 @@ impl Predicate { ) }), Box::new(|rng| { - let gt_value = GTValue::arbitrary_from(rng, value).0; + let gt_value = GTValue::arbitrary_from(rng, context, value).0; Expr::Binary( Box::new(Expr::Id(ast::Name::Ident(column_name.to_string()))), ast::Operator::Greater, @@ -40,7 +41,7 @@ impl Predicate { ) }), Box::new(|rng| { - let lt_value = LTValue::arbitrary_from(rng, value).0; + let lt_value = LTValue::arbitrary_from(rng, context, value).0; Expr::Binary( Box::new(Expr::Id(ast::Name::Ident(column_name.to_string()))), ast::Operator::Less, @@ -54,7 +55,12 @@ impl Predicate { } /// Produces a true [ast::Expr::Binary] [Predicate] that is true for the provided row in the given table - pub fn true_binary(rng: &mut R, t: &Table, row: &[SimValue]) -> Predicate { + pub fn true_binary( + rng: &mut R, + context: &C, + t: &Table, + row: &[SimValue], + ) -> Predicate { // Pick a column let column_index = rng.random_range(0..t.columns.len()); let mut column = t.columns[column_index].clone(); @@ -93,7 +99,7 @@ impl Predicate { ( 1, Box::new(|rng| { - let v = SimValue::arbitrary_from(rng, &column.column_type); + let v = SimValue::arbitrary_from(rng, context, &column.column_type); if &v == value { None } else { @@ -111,7 +117,7 @@ impl Predicate { ( 1, Box::new(|rng| { - let lt_value = LTValue::arbitrary_from(rng, value).0; + let lt_value = LTValue::arbitrary_from(rng, context, value).0; Some(Expr::Binary( Box::new(ast::Expr::Qualified( ast::Name::new(&table_name), @@ -125,7 +131,7 @@ impl Predicate { ( 1, Box::new(|rng| { - let gt_value = GTValue::arbitrary_from(rng, value).0; + let gt_value = GTValue::arbitrary_from(rng, context, value).0; Some(Expr::Binary( Box::new(ast::Expr::Qualified( ast::Name::new(&table_name), @@ -140,7 +146,7 @@ impl Predicate { 1, Box::new(|rng| { // TODO: generation for Like and Glob expressions should be extracted to different module - LikeValue::arbitrary_from_maybe(rng, value).map(|like| { + LikeValue::arbitrary_from_maybe(rng, context, value).map(|like| { Expr::Like { lhs: Box::new(ast::Expr::Qualified( ast::Name::new(&table_name), @@ -162,7 +168,12 @@ impl Predicate { } /// Produces an [ast::Expr::Binary] [Predicate] that is false for the provided row in the given table - pub fn false_binary(rng: &mut R, t: &Table, row: &[SimValue]) -> Predicate { + pub fn false_binary( + rng: &mut R, + context: &C, + t: &Table, + row: &[SimValue], + ) -> Predicate { // Pick a column let column_index = rng.random_range(0..t.columns.len()); let mut column = t.columns[column_index].clone(); @@ -197,7 +208,7 @@ impl Predicate { }), Box::new(|rng| { let v = loop { - let v = SimValue::arbitrary_from(rng, &column.column_type); + let v = SimValue::arbitrary_from(rng, context, &column.column_type); if &v != value { break v; } @@ -212,7 +223,7 @@ impl Predicate { ) }), Box::new(|rng| { - let gt_value = GTValue::arbitrary_from(rng, value).0; + let gt_value = GTValue::arbitrary_from(rng, context, value).0; Expr::Binary( Box::new(ast::Expr::Qualified( ast::Name::new(&table_name), @@ -223,7 +234,7 @@ impl Predicate { ) }), Box::new(|rng| { - let lt_value = LTValue::arbitrary_from(rng, value).0; + let lt_value = LTValue::arbitrary_from(rng, context, value).0; Expr::Binary( Box::new(ast::Expr::Qualified( ast::Name::new(&table_name), @@ -242,8 +253,9 @@ impl Predicate { impl SimplePredicate { /// Generates a true [ast::Expr::Binary] [SimplePredicate] from a [TableContext] for a row in the table - pub fn true_binary( + pub fn true_binary( rng: &mut R, + context: &C, table: &T, row: &[SimValue], ) -> Self { @@ -271,7 +283,7 @@ impl SimplePredicate { ) }), Box::new(|rng| { - let lt_value = LTValue::arbitrary_from(rng, column_value).0; + let lt_value = LTValue::arbitrary_from(rng, context, column_value).0; Expr::Binary( Box::new(Expr::Qualified( ast::Name::new(table_name), @@ -282,7 +294,7 @@ impl SimplePredicate { ) }), Box::new(|rng| { - let gt_value = GTValue::arbitrary_from(rng, column_value).0; + let gt_value = GTValue::arbitrary_from(rng, context, column_value).0; Expr::Binary( Box::new(Expr::Qualified( ast::Name::new(table_name), @@ -299,8 +311,9 @@ impl SimplePredicate { } /// Generates a false [ast::Expr::Binary] [SimplePredicate] from a [TableContext] for a row in the table - pub fn false_binary( + pub fn false_binary( rng: &mut R, + context: &C, table: &T, row: &[SimValue], ) -> Self { @@ -328,7 +341,7 @@ impl SimplePredicate { ) }), Box::new(|rng| { - let gt_value = GTValue::arbitrary_from(rng, column_value).0; + let gt_value = GTValue::arbitrary_from(rng, context, column_value).0; Expr::Binary( Box::new(ast::Expr::Qualified( ast::Name::new(table_name), @@ -339,7 +352,7 @@ impl SimplePredicate { ) }), Box::new(|rng| { - let lt_value = LTValue::arbitrary_from(rng, column_value).0; + let lt_value = LTValue::arbitrary_from(rng, context, column_value).0; Expr::Binary( Box::new(ast::Expr::Qualified( ast::Name::new(table_name), @@ -360,8 +373,9 @@ impl CompoundPredicate { /// Decide if you want to create an AND or an OR /// /// Creates a Compound Predicate that is TRUE or FALSE for at least a single row - pub fn from_table_binary( + pub fn from_table_binary( rng: &mut R, + context: &C, table: &T, predicate_value: bool, ) -> Self { @@ -381,7 +395,7 @@ impl CompoundPredicate { // An AND for false requires at least one of its children to be false if predicate_value { (0..rng.random_range(1..=3)) - .map(|_| SimplePredicate::arbitrary_from(rng, (table, row, true)).0) + .map(|_| SimplePredicate::arbitrary_from(rng, context, (table, row, true)).0) .reduce(|accum, curr| { Predicate(Expr::Binary( Box::new(accum.0), @@ -405,7 +419,7 @@ impl CompoundPredicate { booleans .iter() - .map(|b| SimplePredicate::arbitrary_from(rng, (table, row, *b)).0) + .map(|b| SimplePredicate::arbitrary_from(rng, context, (table, row, *b)).0) .reduce(|accum, curr| { Predicate(Expr::Binary( Box::new(accum.0), @@ -431,7 +445,7 @@ impl CompoundPredicate { booleans .iter() - .map(|b| SimplePredicate::arbitrary_from(rng, (table, row, *b)).0) + .map(|b| SimplePredicate::arbitrary_from(rng, context, (table, row, *b)).0) .reduce(|accum, curr| { Predicate(Expr::Binary( Box::new(accum.0), @@ -442,7 +456,7 @@ impl CompoundPredicate { .unwrap_or(Predicate::true_()) } else { (0..rng.random_range(1..=3)) - .map(|_| SimplePredicate::arbitrary_from(rng, (table, row, false)).0) + .map(|_| SimplePredicate::arbitrary_from(rng, context, (table, row, false)).0) .reduce(|accum, curr| { Predicate(Expr::Binary( Box::new(accum.0), @@ -463,7 +477,9 @@ mod tests { use rand_chacha::ChaCha8Rng; use crate::{ - generation::{pick, predicate::SimplePredicate, Arbitrary, ArbitraryFrom as _}, + generation::{ + pick, predicate::SimplePredicate, tests::TestContext, Arbitrary, ArbitraryFrom as _, + }, model::{ query::predicate::{expr_to_value, Predicate}, table::{SimValue, Table}, @@ -481,20 +497,22 @@ mod tests { fn fuzz_true_binary_predicate() { let seed = get_seed(); let mut rng = ChaCha8Rng::seed_from_u64(seed); + let context = &TestContext::default(); + for _ in 0..10000 { - let table = Table::arbitrary(&mut rng); + let table = Table::arbitrary(&mut rng, context); let num_rows = rng.random_range(1..10); let values: Vec> = (0..num_rows) .map(|_| { table .columns .iter() - .map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type)) + .map(|c| SimValue::arbitrary_from(&mut rng, context, &c.column_type)) .collect() }) .collect(); let row = pick(&values, &mut rng); - let predicate = Predicate::true_binary(&mut rng, &table, row); + let predicate = Predicate::true_binary(&mut rng, context, &table, row); let value = expr_to_value(&predicate.0, row, &table); assert!( value.as_ref().is_some_and(|value| value.as_bool()), @@ -507,20 +525,22 @@ mod tests { fn fuzz_false_binary_predicate() { let seed = get_seed(); let mut rng = ChaCha8Rng::seed_from_u64(seed); + let context = &TestContext::default(); + for _ in 0..10000 { - let table = Table::arbitrary(&mut rng); + let table = Table::arbitrary(&mut rng, context); let num_rows = rng.random_range(1..10); let values: Vec> = (0..num_rows) .map(|_| { table .columns .iter() - .map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type)) + .map(|c| SimValue::arbitrary_from(&mut rng, context, &c.column_type)) .collect() }) .collect(); let row = pick(&values, &mut rng); - let predicate = Predicate::false_binary(&mut rng, &table, row); + let predicate = Predicate::false_binary(&mut rng, context, &table, row); let value = expr_to_value(&predicate.0, row, &table); assert!( !value.as_ref().is_some_and(|value| value.as_bool()), @@ -533,21 +553,23 @@ mod tests { fn fuzz_true_binary_simple_predicate() { let seed = get_seed(); let mut rng = ChaCha8Rng::seed_from_u64(seed); + let context = &TestContext::default(); + for _ in 0..10000 { - let mut table = Table::arbitrary(&mut rng); + let mut table = Table::arbitrary(&mut rng, context); let num_rows = rng.random_range(1..10); let values: Vec> = (0..num_rows) .map(|_| { table .columns .iter() - .map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type)) + .map(|c| SimValue::arbitrary_from(&mut rng, context, &c.column_type)) .collect() }) .collect(); table.rows.extend(values.clone()); let row = pick(&table.rows, &mut rng); - let predicate = SimplePredicate::true_binary(&mut rng, &table, row); + let predicate = SimplePredicate::true_binary(&mut rng, context, &table, row); let result = values .iter() .map(|row| predicate.0.test(row, &table)) @@ -561,21 +583,23 @@ mod tests { fn fuzz_false_binary_simple_predicate() { let seed = get_seed(); let mut rng = ChaCha8Rng::seed_from_u64(seed); + let context = &TestContext::default(); + for _ in 0..10000 { - let mut table = Table::arbitrary(&mut rng); + let mut table = Table::arbitrary(&mut rng, context); let num_rows = rng.random_range(1..10); let values: Vec> = (0..num_rows) .map(|_| { table .columns .iter() - .map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type)) + .map(|c| SimValue::arbitrary_from(&mut rng, context, &c.column_type)) .collect() }) .collect(); table.rows.extend(values.clone()); let row = pick(&table.rows, &mut rng); - let predicate = SimplePredicate::false_binary(&mut rng, &table, row); + let predicate = SimplePredicate::false_binary(&mut rng, context, &table, row); let result = values .iter() .map(|row| predicate.0.test(row, &table)) diff --git a/sql_generation/generation/predicate/mod.rs b/sql_generation/generation/predicate/mod.rs index b919ad0bd..78fa30ae4 100644 --- a/sql_generation/generation/predicate/mod.rs +++ b/sql_generation/generation/predicate/mod.rs @@ -1,9 +1,12 @@ use rand::{seq::SliceRandom as _, Rng}; use turso_parser::ast::{self, Expr}; -use crate::model::{ - query::predicate::Predicate, - table::{SimValue, Table, TableContext}, +use crate::{ + generation::GenerationContext, + model::{ + query::predicate::Predicate, + table::{SimValue, Table, TableContext}, + }, }; use super::{one_of, ArbitraryFrom}; @@ -18,20 +21,24 @@ struct CompoundPredicate(Predicate); struct SimplePredicate(Predicate); impl, T: TableContext> ArbitraryFrom<(&T, A, bool)> for SimplePredicate { - fn arbitrary_from(rng: &mut R, (table, row, predicate_value): (&T, A, bool)) -> Self { + fn arbitrary_from( + rng: &mut R, + context: &C, + (table, row, predicate_value): (&T, A, bool), + ) -> Self { let row = row.as_ref(); // Pick an operator let choice = rng.random_range(0..2); // Pick an operator match predicate_value { true => match choice { - 0 => SimplePredicate::true_binary(rng, table, row), - 1 => SimplePredicate::true_unary(rng, table, row), + 0 => SimplePredicate::true_binary(rng, context, table, row), + 1 => SimplePredicate::true_unary(rng, context, table, row), _ => unreachable!(), }, false => match choice { - 0 => SimplePredicate::false_binary(rng, table, row), - 1 => SimplePredicate::false_unary(rng, table, row), + 0 => SimplePredicate::false_binary(rng, context, table, row), + 1 => SimplePredicate::false_unary(rng, context, table, row), _ => unreachable!(), }, } @@ -39,43 +46,59 @@ impl, T: TableContext> ArbitraryFrom<(&T, A, bool)> for Sim } impl ArbitraryFrom<(&T, bool)> for CompoundPredicate { - fn arbitrary_from(rng: &mut R, (table, predicate_value): (&T, bool)) -> Self { - CompoundPredicate::from_table_binary(rng, table, predicate_value) + fn arbitrary_from( + rng: &mut R, + context: &C, + (table, predicate_value): (&T, bool), + ) -> Self { + CompoundPredicate::from_table_binary(rng, context, table, predicate_value) } } impl ArbitraryFrom<&T> for Predicate { - fn arbitrary_from(rng: &mut R, table: &T) -> Self { + fn arbitrary_from(rng: &mut R, context: &C, table: &T) -> Self { let predicate_value = rng.random_bool(0.5); - Predicate::arbitrary_from(rng, (table, predicate_value)).parens() + Predicate::arbitrary_from(rng, context, (table, predicate_value)).parens() } } impl ArbitraryFrom<(&T, bool)> for Predicate { - fn arbitrary_from(rng: &mut R, (table, predicate_value): (&T, bool)) -> Self { - CompoundPredicate::arbitrary_from(rng, (table, predicate_value)).0 + fn arbitrary_from( + rng: &mut R, + context: &C, + (table, predicate_value): (&T, bool), + ) -> Self { + CompoundPredicate::arbitrary_from(rng, context, (table, predicate_value)).0 } } impl ArbitraryFrom<(&str, &SimValue)> for Predicate { - fn arbitrary_from(rng: &mut R, (column_name, value): (&str, &SimValue)) -> Self { - Predicate::from_column_binary(rng, column_name, value) + fn arbitrary_from( + rng: &mut R, + context: &C, + (column_name, value): (&str, &SimValue), + ) -> Self { + Predicate::from_column_binary(rng, context, column_name, value) } } impl ArbitraryFrom<(&Table, &Vec)> for Predicate { - fn arbitrary_from(rng: &mut R, (t, row): (&Table, &Vec)) -> Self { + fn arbitrary_from( + rng: &mut R, + context: &C, + (t, row): (&Table, &Vec), + ) -> Self { // We want to produce a predicate that is true for the row // We can do this by creating several predicates that // are true, some that are false, combiend them in ways that correspond to the creation of a true predicate // Produce some true and false predicates let mut true_predicates = (1..=rng.random_range(1..=4)) - .map(|_| Predicate::true_binary(rng, t, row)) + .map(|_| Predicate::true_binary(rng, context, t, row)) .collect::>(); let false_predicates = (0..=rng.random_range(0..=3)) - .map(|_| Predicate::false_binary(rng, t, row)) + .map(|_| Predicate::false_binary(rng, context, t, row)) .collect::>(); // Start building a top level predicate from a true predicate @@ -231,7 +254,9 @@ mod tests { use rand_chacha::ChaCha8Rng; use crate::{ - generation::{pick, predicate::SimplePredicate, Arbitrary, ArbitraryFrom as _}, + generation::{ + pick, predicate::SimplePredicate, tests::TestContext, Arbitrary, ArbitraryFrom as _, + }, model::{ query::predicate::{expr_to_value, Predicate}, table::{SimValue, Table}, @@ -249,20 +274,23 @@ mod tests { fn fuzz_arbitrary_table_true_simple_predicate() { let seed = get_seed(); let mut rng = ChaCha8Rng::seed_from_u64(seed); + let context = &TestContext::default(); + for _ in 0..10000 { - let table = Table::arbitrary(&mut rng); + let table = Table::arbitrary(&mut rng, context); let num_rows = rng.random_range(1..10); let values: Vec> = (0..num_rows) .map(|_| { table .columns .iter() - .map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type)) + .map(|c| SimValue::arbitrary_from(&mut rng, context, &c.column_type)) .collect() }) .collect(); let row = pick(&values, &mut rng); - let predicate = SimplePredicate::arbitrary_from(&mut rng, (&table, row, true)).0; + let predicate = + SimplePredicate::arbitrary_from(&mut rng, context, (&table, row, true)).0; let value = expr_to_value(&predicate.0, row, &table); assert!( value.as_ref().is_some_and(|value| value.as_bool()), @@ -275,20 +303,23 @@ mod tests { fn fuzz_arbitrary_table_false_simple_predicate() { let seed = get_seed(); let mut rng = ChaCha8Rng::seed_from_u64(seed); + let context = &TestContext::default(); + for _ in 0..10000 { - let table = Table::arbitrary(&mut rng); + let table = Table::arbitrary(&mut rng, context); let num_rows = rng.random_range(1..10); let values: Vec> = (0..num_rows) .map(|_| { table .columns .iter() - .map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type)) + .map(|c| SimValue::arbitrary_from(&mut rng, context, &c.column_type)) .collect() }) .collect(); let row = pick(&values, &mut rng); - let predicate = SimplePredicate::arbitrary_from(&mut rng, (&table, row, false)).0; + let predicate = + SimplePredicate::arbitrary_from(&mut rng, context, (&table, row, false)).0; let value = expr_to_value(&predicate.0, row, &table); assert!( !value.as_ref().is_some_and(|value| value.as_bool()), @@ -301,20 +332,22 @@ mod tests { fn fuzz_arbitrary_row_table_predicate() { let seed = get_seed(); let mut rng = ChaCha8Rng::seed_from_u64(seed); + let context = &TestContext::default(); + for _ in 0..10000 { - let table = Table::arbitrary(&mut rng); + let table = Table::arbitrary(&mut rng, context); let num_rows = rng.random_range(1..10); let values: Vec> = (0..num_rows) .map(|_| { table .columns .iter() - .map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type)) + .map(|c| SimValue::arbitrary_from(&mut rng, context, &c.column_type)) .collect() }) .collect(); let row = pick(&values, &mut rng); - let predicate = Predicate::arbitrary_from(&mut rng, (&table, row)); + let predicate = Predicate::arbitrary_from(&mut rng, context, (&table, row)); let value = expr_to_value(&predicate.0, row, &table); assert!( value.as_ref().is_some_and(|value| value.as_bool()), @@ -327,20 +360,22 @@ mod tests { fn fuzz_arbitrary_true_table_predicate() { let seed = get_seed(); let mut rng = ChaCha8Rng::seed_from_u64(seed); + let context = &TestContext::default(); + for _ in 0..10000 { - let mut table = Table::arbitrary(&mut rng); + let mut table = Table::arbitrary(&mut rng, context); let num_rows = rng.random_range(1..10); let values: Vec> = (0..num_rows) .map(|_| { table .columns .iter() - .map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type)) + .map(|c| SimValue::arbitrary_from(&mut rng, context, &c.column_type)) .collect() }) .collect(); table.rows.extend(values.clone()); - let predicate = Predicate::arbitrary_from(&mut rng, (&table, true)); + let predicate = Predicate::arbitrary_from(&mut rng, context, (&table, true)); let result = values .iter() .map(|row| predicate.test(row, &table)) @@ -354,20 +389,22 @@ mod tests { fn fuzz_arbitrary_false_table_predicate() { let seed = get_seed(); let mut rng = ChaCha8Rng::seed_from_u64(seed); + let context = &TestContext::default(); + for _ in 0..10000 { - let mut table = Table::arbitrary(&mut rng); + let mut table = Table::arbitrary(&mut rng, context); let num_rows = rng.random_range(1..10); let values: Vec> = (0..num_rows) .map(|_| { table .columns .iter() - .map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type)) + .map(|c| SimValue::arbitrary_from(&mut rng, context, &c.column_type)) .collect() }) .collect(); table.rows.extend(values.clone()); - let predicate = Predicate::arbitrary_from(&mut rng, (&table, false)); + let predicate = Predicate::arbitrary_from(&mut rng, context, (&table, false)); let result = values .iter() .map(|row| predicate.test(row, &table)) diff --git a/sql_generation/generation/predicate/unary.rs b/sql_generation/generation/predicate/unary.rs index 62c6d7d65..bfcd1cff0 100644 --- a/sql_generation/generation/predicate/unary.rs +++ b/sql_generation/generation/predicate/unary.rs @@ -5,7 +5,9 @@ use turso_parser::ast::{self, Expr}; use crate::{ - generation::{backtrack, pick, predicate::SimplePredicate, ArbitraryFromMaybe}, + generation::{ + backtrack, pick, predicate::SimplePredicate, ArbitraryFromMaybe, GenerationContext, + }, model::{ query::predicate::Predicate, table::{SimValue, TableContext}, @@ -15,7 +17,11 @@ use crate::{ pub struct TrueValue(pub SimValue); impl ArbitraryFromMaybe<&SimValue> for TrueValue { - fn arbitrary_from_maybe(_rng: &mut R, value: &SimValue) -> Option + fn arbitrary_from_maybe( + _rng: &mut R, + _context: &C, + value: &SimValue, + ) -> Option where Self: Sized, { @@ -25,7 +31,11 @@ impl ArbitraryFromMaybe<&SimValue> for TrueValue { } impl ArbitraryFromMaybe<&Vec<&SimValue>> for TrueValue { - fn arbitrary_from_maybe(rng: &mut R, values: &Vec<&SimValue>) -> Option + fn arbitrary_from_maybe( + rng: &mut R, + context: &C, + values: &Vec<&SimValue>, + ) -> Option where Self: Sized, { @@ -34,14 +44,18 @@ impl ArbitraryFromMaybe<&Vec<&SimValue>> for TrueValue { } let value = pick(values, rng); - Self::arbitrary_from_maybe(rng, *value) + Self::arbitrary_from_maybe(rng, context, *value) } } pub struct FalseValue(pub SimValue); impl ArbitraryFromMaybe<&SimValue> for FalseValue { - fn arbitrary_from_maybe(_rng: &mut R, value: &SimValue) -> Option + fn arbitrary_from_maybe( + _rng: &mut R, + _context: &C, + value: &SimValue, + ) -> Option where Self: Sized, { @@ -51,7 +65,11 @@ impl ArbitraryFromMaybe<&SimValue> for FalseValue { } impl ArbitraryFromMaybe<&Vec<&SimValue>> for FalseValue { - fn arbitrary_from_maybe(rng: &mut R, values: &Vec<&SimValue>) -> Option + fn arbitrary_from_maybe( + rng: &mut R, + context: &C, + values: &Vec<&SimValue>, + ) -> Option where Self: Sized, { @@ -60,7 +78,7 @@ impl ArbitraryFromMaybe<&Vec<&SimValue>> for FalseValue { } let value = pick(values, rng); - Self::arbitrary_from_maybe(rng, *value) + Self::arbitrary_from_maybe(rng, context, *value) } } @@ -68,8 +86,9 @@ impl ArbitraryFromMaybe<&Vec<&SimValue>> for FalseValue { pub struct BitNotValue(pub SimValue); impl ArbitraryFromMaybe<(&SimValue, bool)> for BitNotValue { - fn arbitrary_from_maybe( + fn arbitrary_from_maybe( _rng: &mut R, + _context: &C, (value, predicate): (&SimValue, bool), ) -> Option where @@ -82,8 +101,9 @@ impl ArbitraryFromMaybe<(&SimValue, bool)> for BitNotValue { } impl ArbitraryFromMaybe<(&Vec<&SimValue>, bool)> for BitNotValue { - fn arbitrary_from_maybe( + fn arbitrary_from_maybe( rng: &mut R, + context: &C, (values, predicate): (&Vec<&SimValue>, bool), ) -> Option where @@ -94,15 +114,16 @@ impl ArbitraryFromMaybe<(&Vec<&SimValue>, bool)> for BitNotValue { } let value = pick(values, rng); - Self::arbitrary_from_maybe(rng, (*value, predicate)) + Self::arbitrary_from_maybe(rng, context, (*value, predicate)) } } // TODO: have some more complex generation with columns names here as well impl SimplePredicate { /// Generates a true [ast::Expr::Unary] [SimplePredicate] from a [TableContext] for some values in the table - pub fn true_unary( + pub fn true_unary( rng: &mut R, + context: &C, table: &T, row: &[SimValue], ) -> Self { @@ -120,7 +141,7 @@ impl SimplePredicate { ( num_retries, Box::new(|rng| { - TrueValue::arbitrary_from_maybe(rng, column_value).map(|value| { + TrueValue::arbitrary_from_maybe(rng, context, column_value).map(|value| { assert!(value.0.as_bool()); // Positive is a no-op in Sqlite Expr::unary(ast::UnaryOperator::Positive, Expr::Literal(value.0.into())) @@ -151,7 +172,7 @@ impl SimplePredicate { ( num_retries, Box::new(|rng| { - FalseValue::arbitrary_from_maybe(rng, column_value).map(|value| { + FalseValue::arbitrary_from_maybe(rng, context, column_value).map(|value| { assert!(!value.0.as_bool()); Expr::unary(ast::UnaryOperator::Not, Expr::Literal(value.0.into())) }) @@ -167,8 +188,9 @@ impl SimplePredicate { } /// Generates a false [ast::Expr::Unary] [SimplePredicate] from a [TableContext] for a row in the table - pub fn false_unary( + pub fn false_unary( rng: &mut R, + context: &C, table: &T, row: &[SimValue], ) -> Self { @@ -217,7 +239,7 @@ impl SimplePredicate { ( num_retries, Box::new(|rng| { - TrueValue::arbitrary_from_maybe(rng, column_value).map(|value| { + TrueValue::arbitrary_from_maybe(rng, context, column_value).map(|value| { assert!(value.0.as_bool()); Expr::unary(ast::UnaryOperator::Not, Expr::Literal(value.0.into())) }) @@ -239,7 +261,9 @@ mod tests { use rand_chacha::ChaCha8Rng; use crate::{ - generation::{pick, predicate::SimplePredicate, Arbitrary, ArbitraryFrom as _}, + generation::{ + pick, predicate::SimplePredicate, tests::TestContext, Arbitrary, ArbitraryFrom as _, + }, model::table::{SimValue, Table}, }; @@ -254,21 +278,23 @@ mod tests { fn fuzz_true_unary_simple_predicate() { let seed = get_seed(); let mut rng = ChaCha8Rng::seed_from_u64(seed); + let context = &TestContext::default(); + for _ in 0..10000 { - let mut table = Table::arbitrary(&mut rng); + let mut table = Table::arbitrary(&mut rng, context); let num_rows = rng.random_range(1..10); let values: Vec> = (0..num_rows) .map(|_| { table .columns .iter() - .map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type)) + .map(|c| SimValue::arbitrary_from(&mut rng, context, &c.column_type)) .collect() }) .collect(); table.rows.extend(values.clone()); let row = pick(&table.rows, &mut rng); - let predicate = SimplePredicate::true_unary(&mut rng, &table, row); + let predicate = SimplePredicate::true_unary(&mut rng, context, &table, row); let result = values .iter() .map(|row| predicate.0.test(row, &table)) @@ -282,21 +308,23 @@ mod tests { fn fuzz_false_unary_simple_predicate() { let seed = get_seed(); let mut rng = ChaCha8Rng::seed_from_u64(seed); + let context = &TestContext::default(); + for _ in 0..10000 { - let mut table = Table::arbitrary(&mut rng); + let mut table = Table::arbitrary(&mut rng, context); let num_rows = rng.random_range(1..10); let values: Vec> = (0..num_rows) .map(|_| { table .columns .iter() - .map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type)) + .map(|c| SimValue::arbitrary_from(&mut rng, context, &c.column_type)) .collect() }) .collect(); table.rows.extend(values.clone()); let row = pick(&table.rows, &mut rng); - let predicate = SimplePredicate::false_unary(&mut rng, &table, row); + let predicate = SimplePredicate::false_unary(&mut rng, context, &table, row); let result = values .iter() .map(|row| predicate.0.test(row, &table)) diff --git a/sql_generation/generation/query.rs b/sql_generation/generation/query.rs index d7840a001..e2a36ddb9 100644 --- a/sql_generation/generation/query.rs +++ b/sql_generation/generation/query.rs @@ -1,5 +1,5 @@ use crate::generation::{ - gen_random_text, pick_n_unique, pick_unique, Arbitrary, ArbitraryFrom, ArbitrarySizedFrom, + gen_random_text, pick_n_unique, pick_unique, Arbitrary, ArbitraryFrom, ArbitrarySized, GenerationContext, }; use crate::model::query::predicate::Predicate; @@ -17,23 +17,20 @@ use turso_parser::ast::{Expr, SortOrder}; use super::{backtrack, pick}; impl Arbitrary for Create { - fn arbitrary(rng: &mut R) -> Self { + fn arbitrary(rng: &mut R, context: &C) -> Self { Create { - table: Table::arbitrary(rng), + table: Table::arbitrary(rng, context), } } } -impl ArbitraryFrom<&Vec
> for FromClause { - fn arbitrary_from(rng: &mut R, tables: &Vec
) -> Self { - let num_joins = match rng.random_range(0..=100) { - 0..=90 => 0, - 91..=97 => 1, - 98..=100 => 2, - _ => unreachable!(), - }; +impl Arbitrary for FromClause { + fn arbitrary(rng: &mut R, context: &C) -> Self { + let opts = &context.opts().query.from_clause; + let weights = opts.as_weighted_index(); + let num_joins = opts.joins[rng.sample(weights)].num_joins; - let mut tables = tables.clone(); + let mut tables = context.tables().clone(); let mut table = pick(&tables, rng).clone(); tables.retain(|t| t.name != table.name); @@ -74,7 +71,7 @@ impl ArbitraryFrom<&Vec
> for FromClause { ); } - let predicate = Predicate::arbitrary_from(rng, &table); + let predicate = Predicate::arbitrary_from(rng, context, &table); Some(JoinedTable { table: joined_table_name, join_type: JoinType::Inner, @@ -86,31 +83,32 @@ impl ArbitraryFrom<&Vec
> for FromClause { } } -impl ArbitraryFrom<&C> for SelectInner { - fn arbitrary_from(rng: &mut R, env: &C) -> Self { - let from = FromClause::arbitrary_from(rng, env.tables()); +impl Arbitrary for SelectInner { + fn arbitrary(rng: &mut R, env: &C) -> Self { + let from = FromClause::arbitrary(rng, env); let tables = env.tables().clone(); let join_table = from.into_join_table(&tables); let cuml_col_count = join_table.columns().count(); - let order_by = 'order_by: { - if rng.random_bool(0.3) { + let order_by = rng + .random_bool(env.opts().query.select.order_by_prob) + .then(|| { let order_by_table_candidates = from .joins .iter() - .map(|j| j.table.clone()) - .chain(std::iter::once(from.table.clone())) + .map(|j| &j.table) + .chain(std::iter::once(&from.table)) .collect::>(); let order_by_col_count = (rng.random::() * rng.random::() * (cuml_col_count as f64)) as usize; // skew towards 0 if order_by_col_count == 0 { - break 'order_by None; + return None; } let mut col_names = std::collections::HashSet::new(); let mut order_by_cols = Vec::new(); while order_by_cols.len() < order_by_col_count { let table = pick(&order_by_table_candidates, rng); - let table = tables.iter().find(|t| t.name == *table).unwrap(); + let table = tables.iter().find(|t| t.name == table.as_str()).unwrap(); let col = pick(&table.columns, rng); let col_name = format!("{}.{}", table.name, col.name); if col_names.insert(col_name.clone()) { @@ -127,38 +125,38 @@ impl ArbitraryFrom<&C> for SelectInner { Some(OrderBy { columns: order_by_cols, }) - } else { - None - } - }; + }) + .flatten(); SelectInner { distinctness: if env.opts().indexes { - Distinctness::arbitrary(rng) + Distinctness::arbitrary(rng, env) } else { Distinctness::All }, columns: vec![ResultColumn::Star], from: Some(from), - where_clause: Predicate::arbitrary_from(rng, &join_table), + where_clause: Predicate::arbitrary_from(rng, env, &join_table), order_by, } } } -impl ArbitrarySizedFrom<&C> for SelectInner { - fn arbitrary_sized_from(rng: &mut R, env: &C, num_result_columns: usize) -> Self { - let mut select_inner = SelectInner::arbitrary_from(rng, env); +impl ArbitrarySized for SelectInner { + fn arbitrary_sized( + rng: &mut R, + env: &C, + num_result_columns: usize, + ) -> Self { + let mut select_inner = SelectInner::arbitrary(rng, env); let select_from = &select_inner.from.as_ref().unwrap(); let table_names = select_from .joins .iter() - .map(|j| j.table.clone()) - .chain(std::iter::once(select_from.table.clone())) - .collect::>(); + .map(|j| &j.table) + .chain(std::iter::once(&select_from.table)); let flat_columns_names = table_names - .iter() .flat_map(|t| { env.tables() .iter() @@ -166,29 +164,30 @@ impl ArbitrarySizedFrom<&C> for SelectInner { .unwrap() .columns .iter() - .map(|c| format!("{}.{}", t.clone(), c.name)) + .map(move |c| format!("{}.{}", t, c.name)) }) .collect::>(); let selected_columns = pick_unique(&flat_columns_names, num_result_columns, rng); - let mut columns = Vec::new(); - for column_name in selected_columns { - columns.push(ResultColumn::Column(column_name.clone())); - } + let columns = selected_columns + .map(|col_name| ResultColumn::Column(col_name.clone())) + .collect(); + select_inner.columns = columns; select_inner } } impl Arbitrary for Distinctness { - fn arbitrary(rng: &mut R) -> Self { + fn arbitrary(rng: &mut R, _context: &C) -> Self { match rng.random_range(0..=5) { 0..4 => Distinctness::All, _ => Distinctness::Distinct, } } } + impl Arbitrary for CompoundOperator { - fn arbitrary(rng: &mut R) -> Self { + fn arbitrary(rng: &mut R, _context: &C) -> Self { match rng.random_range(0..=1) { 0 => CompoundOperator::Union, 1 => CompoundOperator::UnionAll, @@ -202,26 +201,23 @@ impl Arbitrary for CompoundOperator { /// arbitrary expressions without referring to the tables. pub struct SelectFree(pub Select); -impl ArbitraryFrom<&C> for SelectFree { - fn arbitrary_from(rng: &mut R, env: &C) -> Self { - let expr = Predicate(Expr::arbitrary_sized_from(rng, env, 8)); +impl Arbitrary for SelectFree { + fn arbitrary(rng: &mut R, env: &C) -> Self { + let expr = Predicate(Expr::arbitrary_sized(rng, env, 8)); let select = Select::expr(expr); Self(select) } } -impl ArbitraryFrom<&C> for Select { - fn arbitrary_from(rng: &mut R, env: &C) -> Self { +impl Arbitrary for Select { + fn arbitrary(rng: &mut R, env: &C) -> Self { // Generate a number of selects based on the query size // If experimental indexes are enabled, we can have selects with compounds // Otherwise, we just have a single select with no compounds + let opts = &env.opts().query.select; let num_compound_selects = if env.opts().indexes { - match rng.random_range(0..=100) { - 0..=95 => 0, - 96..=99 => 1, - 100 => 2, - _ => unreachable!(), - } + opts.compound_selects[rng.sample(opts.compound_select_weighted_index())] + .num_compound_selects } else { 0 }; @@ -231,10 +227,10 @@ impl ArbitraryFrom<&C> for Select { let num_result_columns = rng.random_range(1..=min_column_count_across_tables); - let mut first = SelectInner::arbitrary_sized_from(rng, env, num_result_columns); + let mut first = SelectInner::arbitrary_sized(rng, env, num_result_columns); let mut rest: Vec = (0..num_compound_selects) - .map(|_| SelectInner::arbitrary_sized_from(rng, env, num_result_columns)) + .map(|_| SelectInner::arbitrary_sized(rng, env, num_result_columns)) .collect(); if !rest.is_empty() { @@ -251,7 +247,7 @@ impl ArbitraryFrom<&C> for Select { compounds: rest .into_iter() .map(|s| CompoundSelect { - operator: CompoundOperator::arbitrary(rng), + operator: CompoundOperator::arbitrary(rng, env), select: Box::new(s), }) .collect(), @@ -261,17 +257,18 @@ impl ArbitraryFrom<&C> for Select { } } -impl ArbitraryFrom<&C> for Insert { - fn arbitrary_from(rng: &mut R, env: &C) -> Self { +impl Arbitrary for Insert { + fn arbitrary(rng: &mut R, env: &C) -> Self { + let opts = &env.opts().query.insert; let gen_values = |rng: &mut R| { let table = pick(env.tables(), rng); - let num_rows = rng.random_range(1..10); + let num_rows = rng.random_range(opts.min_rows.get()..opts.max_rows.get()); let values: Vec> = (0..num_rows) .map(|_| { table .columns .iter() - .map(|c| SimValue::arbitrary_from(rng, &c.column_type)) + .map(|c| SimValue::arbitrary_from(rng, env, &c.column_type)) .collect() }) .collect(); @@ -285,7 +282,7 @@ impl ArbitraryFrom<&C> for Insert { // Find a non-empty table let select_table = env.tables().iter().find(|t| !t.rows.is_empty())?; let row = pick(&select_table.rows, rng); - let predicate = Predicate::arbitrary_from(rng, (select_table, row)); + let predicate = Predicate::arbitrary_from(rng, env, (select_table, row)); // Pick another table to insert into let select = Select::simple(select_table.name.clone(), predicate); let table = pick(env.tables(), rng); @@ -301,18 +298,18 @@ impl ArbitraryFrom<&C> for Insert { } } -impl ArbitraryFrom<&C> for Delete { - fn arbitrary_from(rng: &mut R, env: &C) -> Self { +impl Arbitrary for Delete { + fn arbitrary(rng: &mut R, env: &C) -> Self { let table = pick(env.tables(), rng); Self { table: table.name.clone(), - predicate: Predicate::arbitrary_from(rng, table), + predicate: Predicate::arbitrary_from(rng, env, table), } } } -impl ArbitraryFrom<&C> for Drop { - fn arbitrary_from(rng: &mut R, env: &C) -> Self { +impl Arbitrary for Drop { + fn arbitrary(rng: &mut R, env: &C) -> Self { let table = pick(env.tables(), rng); Self { table: table.name.clone(), @@ -320,8 +317,8 @@ impl ArbitraryFrom<&C> for Drop { } } -impl ArbitraryFrom<&C> for CreateIndex { - fn arbitrary_from(rng: &mut R, env: &C) -> Self { +impl Arbitrary for CreateIndex { + fn arbitrary(rng: &mut R, env: &C) -> Self { assert!( !env.tables().is_empty(), "Cannot create an index when no tables exist in the environment." @@ -340,7 +337,6 @@ impl ArbitraryFrom<&C> for CreateIndex { let picked_column_indices = pick_n_unique(0..table.columns.len(), num_columns_to_pick, rng); let columns = picked_column_indices - .into_iter() .map(|i| { let column = &table.columns[i]; ( @@ -368,24 +364,23 @@ impl ArbitraryFrom<&C> for CreateIndex { } } -impl ArbitraryFrom<&C> for Update { - fn arbitrary_from(rng: &mut R, env: &C) -> Self { +impl Arbitrary for Update { + fn arbitrary(rng: &mut R, env: &C) -> Self { let table = pick(env.tables(), rng); let num_cols = rng.random_range(1..=table.columns.len()); let columns = pick_unique(&table.columns, num_cols, rng); let set_values: Vec<(String, SimValue)> = columns - .iter() .map(|column| { ( column.name.clone(), - SimValue::arbitrary_from(rng, &column.column_type), + SimValue::arbitrary_from(rng, env, &column.column_type), ) }) .collect(); Update { table: table.name.clone(), set_values, - predicate: Predicate::arbitrary_from(rng, table), + predicate: Predicate::arbitrary_from(rng, env, table), } } } diff --git a/sql_generation/generation/table.rs b/sql_generation/generation/table.rs index d21397cbe..0ea821d9b 100644 --- a/sql_generation/generation/table.rs +++ b/sql_generation/generation/table.rs @@ -3,54 +3,52 @@ use std::collections::HashSet; use rand::Rng; use turso_core::Value; -use crate::generation::{gen_random_text, pick, readable_name_custom, Arbitrary, ArbitraryFrom}; +use crate::generation::{ + gen_random_text, pick, readable_name_custom, Arbitrary, ArbitraryFrom, GenerationContext, +}; use crate::model::table::{Column, ColumnType, Name, SimValue, Table}; use super::ArbitraryFromMaybe; impl Arbitrary for Name { - fn arbitrary(rng: &mut R) -> Self { + fn arbitrary(rng: &mut R, _c: &C) -> Self { let name = readable_name_custom("_", rng); Name(name.replace("-", "_")) } } impl Arbitrary for Table { - fn arbitrary(rng: &mut R) -> Self { - let name = Name::arbitrary(rng).0; - let columns = loop { - let large_table = rng.random_bool(0.1); - let column_size = if large_table { - rng.random_range(64..125) // todo: make this higher (128+) - } else { - rng.random_range(1..=10) - }; - let columns = (1..=column_size) - .map(|_| Column::arbitrary(rng)) - .collect::>(); - // TODO: see if there is a better way to detect duplicates here - let mut set = HashSet::with_capacity(columns.len()); - set.extend(columns.iter()); - // Has repeated column name inside so generate again - if set.len() != columns.len() { - continue; + fn arbitrary(rng: &mut R, context: &C) -> Self { + let opts = context.opts().table.clone(); + let name = Name::arbitrary(rng, context).0; + let large_table = + opts.large_table.enable && rng.random_bool(opts.large_table.large_table_prob); + let column_size = if large_table { + rng.random_range(opts.large_table.column_range) + } else { + rng.random_range(opts.column_range) + } as usize; + let mut column_set = HashSet::with_capacity(column_size); + for col in std::iter::repeat_with(|| Column::arbitrary(rng, context)) { + column_set.insert(col); + if column_set.len() == column_size { + break; } - break columns; - }; + } Table { rows: Vec::new(), name, - columns, + columns: Vec::from_iter(column_set), indexes: vec![], } } } impl Arbitrary for Column { - fn arbitrary(rng: &mut R) -> Self { - let name = Name::arbitrary(rng).0; - let column_type = ColumnType::arbitrary(rng); + fn arbitrary(rng: &mut R, context: &C) -> Self { + let name = Name::arbitrary(rng, context).0; + let column_type = ColumnType::arbitrary(rng, context); Self { name, column_type, @@ -61,16 +59,20 @@ impl Arbitrary for Column { } impl Arbitrary for ColumnType { - fn arbitrary(rng: &mut R) -> Self { + fn arbitrary(rng: &mut R, _context: &C) -> Self { pick(&[Self::Integer, Self::Float, Self::Text, Self::Blob], rng).to_owned() } } impl ArbitraryFrom<&Table> for Vec { - fn arbitrary_from(rng: &mut R, table: &Table) -> Self { + fn arbitrary_from( + rng: &mut R, + context: &C, + table: &Table, + ) -> Self { let mut row = Vec::new(); for column in table.columns.iter() { - let value = SimValue::arbitrary_from(rng, &column.column_type); + let value = SimValue::arbitrary_from(rng, context, &column.column_type); row.push(value); } row @@ -78,7 +80,11 @@ impl ArbitraryFrom<&Table> for Vec { } impl ArbitraryFrom<&Vec<&SimValue>> for SimValue { - fn arbitrary_from(rng: &mut R, values: &Vec<&Self>) -> Self { + fn arbitrary_from( + rng: &mut R, + _context: &C, + values: &Vec<&Self>, + ) -> Self { if values.is_empty() { return Self(Value::Null); } @@ -88,7 +94,11 @@ impl ArbitraryFrom<&Vec<&SimValue>> for SimValue { } impl ArbitraryFrom<&ColumnType> for SimValue { - fn arbitrary_from(rng: &mut R, column_type: &ColumnType) -> Self { + fn arbitrary_from( + rng: &mut R, + _context: &C, + column_type: &ColumnType, + ) -> Self { let value = match column_type { ColumnType::Integer => Value::Integer(rng.random_range(i64::MIN..i64::MAX)), ColumnType::Float => Value::Float(rng.random_range(-1e10..1e10)), @@ -102,19 +112,27 @@ impl ArbitraryFrom<&ColumnType> for SimValue { pub struct LTValue(pub SimValue); impl ArbitraryFrom<&Vec<&SimValue>> for LTValue { - fn arbitrary_from(rng: &mut R, values: &Vec<&SimValue>) -> Self { + fn arbitrary_from( + rng: &mut R, + context: &C, + values: &Vec<&SimValue>, + ) -> Self { if values.is_empty() { return Self(SimValue(Value::Null)); } // Get value less than all values let value = Value::exec_min(values.iter().map(|value| &value.0)); - Self::arbitrary_from(rng, &SimValue(value)) + Self::arbitrary_from(rng, context, &SimValue(value)) } } impl ArbitraryFrom<&SimValue> for LTValue { - fn arbitrary_from(rng: &mut R, value: &SimValue) -> Self { + fn arbitrary_from( + rng: &mut R, + _context: &C, + value: &SimValue, + ) -> Self { let new_value = match &value.0 { Value::Integer(i) => Value::Integer(rng.random_range(i64::MIN..*i - 1)), Value::Float(f) => Value::Float(f - rng.random_range(0.0..1e10)), @@ -164,19 +182,27 @@ impl ArbitraryFrom<&SimValue> for LTValue { pub struct GTValue(pub SimValue); impl ArbitraryFrom<&Vec<&SimValue>> for GTValue { - fn arbitrary_from(rng: &mut R, values: &Vec<&SimValue>) -> Self { + fn arbitrary_from( + rng: &mut R, + context: &C, + values: &Vec<&SimValue>, + ) -> Self { if values.is_empty() { return Self(SimValue(Value::Null)); } // Get value greater than all values let value = Value::exec_max(values.iter().map(|value| &value.0)); - Self::arbitrary_from(rng, &SimValue(value)) + Self::arbitrary_from(rng, context, &SimValue(value)) } } impl ArbitraryFrom<&SimValue> for GTValue { - fn arbitrary_from(rng: &mut R, value: &SimValue) -> Self { + fn arbitrary_from( + rng: &mut R, + _context: &C, + value: &SimValue, + ) -> Self { let new_value = match &value.0 { Value::Integer(i) => Value::Integer(rng.random_range(*i..i64::MAX)), Value::Float(f) => Value::Float(rng.random_range(*f..1e10)), @@ -226,7 +252,11 @@ impl ArbitraryFrom<&SimValue> for GTValue { pub struct LikeValue(pub SimValue); impl ArbitraryFromMaybe<&SimValue> for LikeValue { - fn arbitrary_from_maybe(rng: &mut R, value: &SimValue) -> Option { + fn arbitrary_from_maybe( + rng: &mut R, + _context: &C, + value: &SimValue, + ) -> Option { match &value.0 { value @ Value::Text(..) => { let t = value.to_string();