From 19d9003cd79b1272392f0cec23fca824b7926701 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Tue, 26 Aug 2025 14:05:36 -0300 Subject: [PATCH 01/20] create profiles folder --- simulator/main.rs | 1 + simulator/profiles/io.rs | 9 +++++++++ simulator/profiles/mod.rs | 5 +++++ simulator/profiles/query.rs | 29 +++++++++++++++++++++++++++++ 4 files changed, 44 insertions(+) create mode 100644 simulator/profiles/io.rs create mode 100644 simulator/profiles/mod.rs create mode 100644 simulator/profiles/query.rs diff --git a/simulator/main.rs b/simulator/main.rs index ccf8977ae..7db63dbb4 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -27,6 +27,7 @@ use crate::runner::env::{Paths, SimulationPhase, SimulationType}; mod generation; mod model; +mod profiles; mod runner; mod shrink; diff --git a/simulator/profiles/io.rs b/simulator/profiles/io.rs new file mode 100644 index 000000000..fd91d7033 --- /dev/null +++ b/simulator/profiles/io.rs @@ -0,0 +1,9 @@ +#[derive(Debug, Default, Clone)] +pub struct IOProfile { + enable: bool, +} + +#[derive(Debug, Default, Clone)] +pub struct LatencyProfile { + enable: bool, +} diff --git a/simulator/profiles/mod.rs b/simulator/profiles/mod.rs new file mode 100644 index 000000000..a4192602d --- /dev/null +++ b/simulator/profiles/mod.rs @@ -0,0 +1,5 @@ +mod io; +mod query; + +#[derive(Debug, Default, Clone)] +pub struct Profile {} diff --git a/simulator/profiles/query.rs b/simulator/profiles/query.rs new file mode 100644 index 000000000..725513709 --- /dev/null +++ b/simulator/profiles/query.rs @@ -0,0 +1,29 @@ +#[derive(Debug, Default, Clone)] +pub struct CreateTableProfile { + enable: bool, +} + +#[derive(Debug, Default, Clone)] +pub struct CreateIndexProfile { + enable: bool, +} + +#[derive(Debug, Default, Clone)] +pub struct InsertProfile { + enable: bool, +} + +#[derive(Debug, Default, Clone)] +pub struct UpdateProfile { + enable: bool, +} + +#[derive(Debug, Default, Clone)] +pub struct DeleteProfile { + enable: bool, +} + +#[derive(Debug, Default, Clone)] +pub struct DropTableProfile { + enable: bool, +} From 918c2a3f690a6a3a3236f3f542324ae46b26c1f3 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Tue, 26 Aug 2025 16:00:11 -0300 Subject: [PATCH 02/20] extend latency profile + impl Default manually --- simulator/profiles/io.rs | 31 ++++++++++++++++++++++-- simulator/profiles/query.rs | 48 ++++++++++++++++++++++++++++++++----- 2 files changed, 71 insertions(+), 8 deletions(-) diff --git a/simulator/profiles/io.rs b/simulator/profiles/io.rs index fd91d7033..b24987916 100644 --- a/simulator/profiles/io.rs +++ b/simulator/profiles/io.rs @@ -1,9 +1,36 @@ -#[derive(Debug, Default, Clone)] +#[derive(Debug, Clone)] pub struct IOProfile { enable: bool, + latency: LatencyProfile, } -#[derive(Debug, Default, Clone)] +impl Default for IOProfile { + fn default() -> Self { + Self { + enable: true, + latency: Default::default(), + } + } +} + +#[derive(Debug, Clone)] pub struct LatencyProfile { enable: bool, + /// Added IO latency probability + latency_probability: usize, + /// Minimum tick time in microseconds for simulated time + min_tick: u64, + /// Maximum tick time in microseconds for simulated time + max_tick: u64, +} + +impl Default for LatencyProfile { + fn default() -> Self { + Self { + enable: true, + latency_probability: 1, + min_tick: 1, + max_tick: 30, + } + } } diff --git a/simulator/profiles/query.rs b/simulator/profiles/query.rs index 725513709..919b8ecd5 100644 --- a/simulator/profiles/query.rs +++ b/simulator/profiles/query.rs @@ -1,29 +1,65 @@ -#[derive(Debug, Default, Clone)] +#[derive(Debug, Clone)] pub struct CreateTableProfile { enable: bool, } -#[derive(Debug, Default, Clone)] +impl Default for CreateTableProfile { + fn default() -> Self { + Self { enable: true } + } +} + +#[derive(Debug, Clone)] pub struct CreateIndexProfile { enable: bool, } -#[derive(Debug, Default, Clone)] +impl Default for CreateIndexProfile { + fn default() -> Self { + Self { enable: true } + } +} + +#[derive(Debug, Clone)] pub struct InsertProfile { enable: bool, } -#[derive(Debug, Default, Clone)] +impl Default for InsertProfile { + fn default() -> Self { + Self { enable: true } + } +} + +#[derive(Debug, Clone)] pub struct UpdateProfile { enable: bool, } -#[derive(Debug, Default, Clone)] +impl Default for UpdateProfile { + fn default() -> Self { + Self { enable: true } + } +} + +#[derive(Debug, Clone)] pub struct DeleteProfile { enable: bool, } -#[derive(Debug, Default, Clone)] +impl Default for DeleteProfile { + fn default() -> Self { + Self { enable: true } + } +} + +#[derive(Debug, Clone)] pub struct DropTableProfile { enable: bool, } + +impl Default for DropTableProfile { + fn default() -> Self { + Self { enable: true } + } +} From ef16bc4cfb3bd8e1386f143c7cd7aff4c409bffe Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Tue, 26 Aug 2025 16:13:52 -0300 Subject: [PATCH 03/20] add profiles together --- simulator/profiles/io.rs | 12 ++++++------ simulator/profiles/mod.rs | 21 +++++++++++++++++++-- simulator/profiles/query.rs | 22 ++++++++++++++++------ 3 files changed, 41 insertions(+), 14 deletions(-) diff --git a/simulator/profiles/io.rs b/simulator/profiles/io.rs index b24987916..4bca40b8f 100644 --- a/simulator/profiles/io.rs +++ b/simulator/profiles/io.rs @@ -1,7 +1,7 @@ #[derive(Debug, Clone)] pub struct IOProfile { - enable: bool, - latency: LatencyProfile, + pub enable: bool, + pub latency: LatencyProfile, } impl Default for IOProfile { @@ -15,13 +15,13 @@ impl Default for IOProfile { #[derive(Debug, Clone)] pub struct LatencyProfile { - enable: bool, + pub enable: bool, /// Added IO latency probability - latency_probability: usize, + pub latency_probability: usize, /// Minimum tick time in microseconds for simulated time - min_tick: u64, + pub min_tick: u64, /// Maximum tick time in microseconds for simulated time - max_tick: u64, + pub max_tick: u64, } impl Default for LatencyProfile { diff --git a/simulator/profiles/mod.rs b/simulator/profiles/mod.rs index a4192602d..6e5187836 100644 --- a/simulator/profiles/mod.rs +++ b/simulator/profiles/mod.rs @@ -1,5 +1,22 @@ +use crate::profiles::{io::IOProfile, query::QueryProfile}; + mod io; mod query; -#[derive(Debug, Default, Clone)] -pub struct Profile {} +#[derive(Debug, Clone)] +pub struct Profile { + /// Experimental MVCC feature + pub experimental_mvcc: bool, + pub io: IOProfile, + pub query: QueryProfile, +} + +impl Default for Profile { + fn default() -> Self { + Self { + experimental_mvcc: false, + io: Default::default(), + query: Default::default(), + } + } +} diff --git a/simulator/profiles/query.rs b/simulator/profiles/query.rs index 919b8ecd5..0b1c663c3 100644 --- a/simulator/profiles/query.rs +++ b/simulator/profiles/query.rs @@ -1,6 +1,16 @@ +#[derive(Debug, Default, Clone)] +pub struct QueryProfile { + pub create_table: CreateTableProfile, + pub create_index: CreateIndexProfile, + pub insert: InsertProfile, + pub update: UpdateProfile, + pub delete: DeleteProfile, + pub drop_table: DropTableProfile, +} + #[derive(Debug, Clone)] pub struct CreateTableProfile { - enable: bool, + pub enable: bool, } impl Default for CreateTableProfile { @@ -11,7 +21,7 @@ impl Default for CreateTableProfile { #[derive(Debug, Clone)] pub struct CreateIndexProfile { - enable: bool, + pub enable: bool, } impl Default for CreateIndexProfile { @@ -22,7 +32,7 @@ impl Default for CreateIndexProfile { #[derive(Debug, Clone)] pub struct InsertProfile { - enable: bool, + pub enable: bool, } impl Default for InsertProfile { @@ -33,7 +43,7 @@ impl Default for InsertProfile { #[derive(Debug, Clone)] pub struct UpdateProfile { - enable: bool, + pub enable: bool, } impl Default for UpdateProfile { @@ -44,7 +54,7 @@ impl Default for UpdateProfile { #[derive(Debug, Clone)] pub struct DeleteProfile { - enable: bool, + pub enable: bool, } impl Default for DeleteProfile { @@ -55,7 +65,7 @@ impl Default for DeleteProfile { #[derive(Debug, Clone)] pub struct DropTableProfile { - enable: bool, + pub enable: bool, } impl Default for DropTableProfile { From e0552629e32b0bc35271ac4fb567ddfd3e7b8bc0 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Wed, 27 Aug 2025 01:11:25 -0300 Subject: [PATCH 04/20] create Generation Options structs --- simulator/generation/mod.rs | 6 +-- simulator/runner/env.rs | 9 +++++ sql_generation/generation/mod.rs | 15 +------- sql_generation/generation/opts.rs | 62 +++++++++++++++++++++++++++++++ 4 files changed, 75 insertions(+), 17 deletions(-) create mode 100644 sql_generation/generation/opts.rs diff --git a/simulator/generation/mod.rs b/simulator/generation/mod.rs index 79bdf506f..a63936ab5 100644 --- a/simulator/generation/mod.rs +++ b/simulator/generation/mod.rs @@ -25,9 +25,7 @@ impl GenerationContext for SimulatorEnv { &self.tables.tables } - fn opts(&self) -> sql_generation::generation::Opts { - sql_generation::generation::Opts { - indexes: self.opts.experimental_indexes, - } + fn opts(&self) -> &sql_generation::generation::Opts { + &self.gen_opts } } diff --git a/simulator/runner/env.rs b/simulator/runner/env.rs index a29adc591..50b21c61a 100644 --- a/simulator/runner/env.rs +++ b/simulator/runner/env.rs @@ -7,6 +7,7 @@ use std::sync::Arc; use rand::{Rng, SeedableRng}; use rand_chacha::ChaCha8Rng; +use sql_generation::generation::Opts; use sql_generation::model::table::Table; use turso_core::Database; @@ -59,6 +60,7 @@ impl Deref for SimulatorTables { pub(crate) struct SimulatorEnv { pub(crate) opts: SimulatorOpts, + pub gen_opts: Opts, pub(crate) connections: Vec, pub(crate) io: Arc, pub(crate) db: Option>, @@ -85,6 +87,7 @@ impl SimulatorEnv { paths: self.paths.clone(), type_: self.type_, phase: self.phase, + gen_opts: self.gen_opts.clone(), } } @@ -291,6 +294,11 @@ impl SimulatorEnv { .map(|_| SimConnection::Disconnected) .collect::>(); + let gen_opts = Opts { + indexes: opts.experimental_indexes, + ..Default::default() + }; + SimulatorEnv { opts, tables: SimulatorTables::new(), @@ -301,6 +309,7 @@ impl SimulatorEnv { db: Some(db), type_: simulation_type, phase: SimulationPhase::Test, + gen_opts, } } diff --git a/sql_generation/generation/mod.rs b/sql_generation/generation/mod.rs index 25bd7ec09..6d475590a 100644 --- a/sql_generation/generation/mod.rs +++ b/sql_generation/generation/mod.rs @@ -3,24 +3,13 @@ use std::{iter::Sum, ops::SubAssign}; use anarchist_readable_name_generator_lib::readable_name_custom; use rand::{distr::uniform::SampleUniform, Rng}; -use crate::model::table::Table; - +pub mod opts; pub mod expr; pub mod predicate; pub mod query; pub mod table; -#[derive(Debug, Clone, Copy)] -pub struct Opts { - /// Indexes enabled - pub indexes: bool, -} - -/// Trait used to provide context to generation functions -pub trait GenerationContext { - fn tables(&self) -> &Vec; - fn opts(&self) -> Opts; -} +pub use opts::*; type ArbitraryFromFunc<'a, R, T> = Box T + 'a>; type Choice<'a, R, T> = (usize, Box Option + 'a>); diff --git a/sql_generation/generation/opts.rs b/sql_generation/generation/opts.rs new file mode 100644 index 000000000..d9caf81d7 --- /dev/null +++ b/sql_generation/generation/opts.rs @@ -0,0 +1,62 @@ +use std::ops::Range; + +use crate::model::table::Table; + +#[derive(Debug, Clone)] +pub struct Opts { + /// Indexes enabled + pub indexes: bool, + pub table: TableOpts, +} + +impl Default for Opts { + fn default() -> Self { + Self { + indexes: true, + table: Default::default(), + } + } +} + +/// Trait used to provide context to generation functions +pub trait GenerationContext { + fn tables(&self) -> &Vec
; + fn opts(&self) -> &Opts; +} + +#[derive(Debug, Clone)] +pub struct TableOpts { + pub large_table: LargeTableOpts, + /// Range of numbers of columns to generate + pub column_range: Range, +} + +impl Default for TableOpts { + fn default() -> Self { + Self { + large_table: Default::default(), + // Up to 10 columns + column_range: 1..11, + } + } +} + +/// Options for generating large tables +#[derive(Debug, Clone)] +pub struct LargeTableOpts { + pub enable: bool, + pub large_table_prob: f32, + /// Range of numbers of columns to generate + pub column_range: Range, +} + +impl Default for LargeTableOpts { + fn default() -> Self { + Self { + enable: true, + large_table_prob: 0.1, + // todo: make this higher (128+) + column_range: 64..125, + } + } +} From 1a8b78afd80009ee74cdc7fcc4293f173527ac8b Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Wed, 27 Aug 2025 01:48:06 -0300 Subject: [PATCH 05/20] create `ArbitraryContext` and `ArbitraryContextFrom` traits to pass generation context + start implementing them in `Table` + `FromClause` --- sql_generation/generation/mod.rs | 14 +++++- sql_generation/generation/opts.rs | 49 ++++++++++++++++++- sql_generation/generation/query.rs | 75 +++++++++++++++++++++++++++++- sql_generation/generation/table.rs | 55 +++++++++++++--------- 4 files changed, 167 insertions(+), 26 deletions(-) diff --git a/sql_generation/generation/mod.rs b/sql_generation/generation/mod.rs index 6d475590a..331dc65f4 100644 --- a/sql_generation/generation/mod.rs +++ b/sql_generation/generation/mod.rs @@ -3,8 +3,8 @@ use std::{iter::Sum, ops::SubAssign}; use anarchist_readable_name_generator_lib::readable_name_custom; use rand::{distr::uniform::SampleUniform, Rng}; -pub mod opts; pub mod expr; +pub mod opts; pub mod predicate; pub mod query; pub mod table; @@ -41,6 +41,18 @@ pub trait ArbitraryFrom { fn arbitrary_from(rng: &mut R, t: T) -> Self; } +pub trait ArbitraryContext { + fn arbitrary_with_context(rng: &mut R, context: &C) -> Self; +} + +pub trait ArbitraryContextFrom { + fn arbitrary_with_context_from( + rng: &mut R, + context: &C, + t: T, + ) -> Self; +} + /// ArbitrarySizedFrom trait for generating random values from a given value /// ArbitrarySizedFrom allows for constructing relations, where the generated /// value is dependent on the given value and a size constraint. These relations diff --git a/sql_generation/generation/opts.rs b/sql_generation/generation/opts.rs index d9caf81d7..b61da6cdc 100644 --- a/sql_generation/generation/opts.rs +++ b/sql_generation/generation/opts.rs @@ -1,5 +1,7 @@ use std::ops::Range; +use rand::distr::weighted::WeightedIndex; + use crate::model::table::Table; #[derive(Debug, Clone)] @@ -7,6 +9,7 @@ pub struct Opts { /// Indexes enabled pub indexes: bool, pub table: TableOpts, + pub query: QueryOpts, } impl Default for Opts { @@ -14,6 +17,7 @@ impl Default for Opts { Self { indexes: true, table: Default::default(), + query: Default::default(), } } } @@ -45,7 +49,7 @@ impl Default for TableOpts { #[derive(Debug, Clone)] pub struct LargeTableOpts { pub enable: bool, - pub large_table_prob: f32, + pub large_table_prob: f64, /// Range of numbers of columns to generate pub column_range: Range, } @@ -60,3 +64,46 @@ impl Default for LargeTableOpts { } } } + +#[derive(Debug, Default, Clone)] +pub struct QueryOpts { + pub from_clause: FromClauseOpts, +} + +#[derive(Debug, Clone)] +pub struct FromClauseOpts { + pub joins: Vec, +} + +impl Default for FromClauseOpts { + fn default() -> Self { + Self { + joins: vec![ + JoinWeight { + num_joins: 0, + weight: 90, + }, + JoinWeight { + num_joins: 1, + weight: 7, + }, + JoinWeight { + num_joins: 2, + weight: 3, + }, + ], + } + } +} + +impl FromClauseOpts { + pub fn as_weighted_index(&self) -> WeightedIndex { + WeightedIndex::new(self.joins.iter().map(|weight| weight.weight)).unwrap() + } +} + +#[derive(Debug, Clone, PartialEq, PartialOrd)] +pub struct JoinWeight { + pub num_joins: u32, + pub weight: u32, +} diff --git a/sql_generation/generation/query.rs b/sql_generation/generation/query.rs index d7840a001..f3729bf9e 100644 --- a/sql_generation/generation/query.rs +++ b/sql_generation/generation/query.rs @@ -1,6 +1,6 @@ use crate::generation::{ - gen_random_text, pick_n_unique, pick_unique, Arbitrary, ArbitraryFrom, ArbitrarySizedFrom, - GenerationContext, + gen_random_text, pick_n_unique, pick_unique, Arbitrary, ArbitraryContext, ArbitraryContextFrom, + ArbitraryFrom, ArbitrarySizedFrom, GenerationContext, }; use crate::model::query::predicate::Predicate; use crate::model::query::select::{ @@ -24,6 +24,77 @@ impl Arbitrary for Create { } } +impl ArbitraryContext for Create { + fn arbitrary_with_context(rng: &mut R, context: &C) -> Self { + Create { + table: Table::arbitrary_with_context(rng, context), + } + } +} + +impl ArbitraryContextFrom<&Vec
> for FromClause { + fn arbitrary_with_context_from( + rng: &mut R, + context: &C, + tables: &Vec
, + ) -> Self { + let opts = &context.opts().query.from_clause; + let weights = opts.as_weighted_index(); + let num_joins = opts.joins[rng.sample(weights)].num_joins; + + let mut tables = tables.clone(); + let mut table = pick(&tables, rng).clone(); + + tables.retain(|t| t.name != table.name); + + let name = table.name.clone(); + + let mut table_context = JoinTable { + tables: Vec::new(), + rows: Vec::new(), + }; + + let joins: Vec<_> = (0..num_joins) + .filter_map(|_| { + if tables.is_empty() { + return None; + } + let join_table = pick(&tables, rng).clone(); + let joined_table_name = join_table.name.clone(); + + tables.retain(|t| t.name != join_table.name); + table_context.rows = table_context + .rows + .iter() + .cartesian_product(join_table.rows.iter()) + .map(|(t_row, j_row)| { + let mut row = t_row.clone(); + row.extend(j_row.clone()); + row + }) + .collect(); + // TODO: inneficient. use a Deque to push_front? + table_context.tables.insert(0, join_table); + for row in &mut table.rows { + assert_eq!( + row.len(), + table.columns.len(), + "Row length does not match column length after join" + ); + } + + let predicate = Predicate::arbitrary_from(rng, &table); + Some(JoinedTable { + table: joined_table_name, + join_type: JoinType::Inner, + on: predicate, + }) + }) + .collect(); + FromClause { table: name, joins } + } +} + impl ArbitraryFrom<&Vec
> for FromClause { fn arbitrary_from(rng: &mut R, tables: &Vec
) -> Self { let num_joins = match rng.random_range(0..=100) { diff --git a/sql_generation/generation/table.rs b/sql_generation/generation/table.rs index d21397cbe..32bcb8117 100644 --- a/sql_generation/generation/table.rs +++ b/sql_generation/generation/table.rs @@ -3,7 +3,10 @@ use std::collections::HashSet; use rand::Rng; use turso_core::Value; -use crate::generation::{gen_random_text, pick, readable_name_custom, Arbitrary, ArbitraryFrom}; +use crate::generation::{ + gen_random_text, pick, readable_name_custom, Arbitrary, ArbitraryContext, ArbitraryFrom, + GenerationContext, Opts, +}; use crate::model::table::{Column, ColumnType, Name, SimValue, Table}; use super::ArbitraryFromMaybe; @@ -15,38 +18,46 @@ impl Arbitrary for Name { } } -impl Arbitrary for Table { - fn arbitrary(rng: &mut R) -> Self { +impl Table { + fn gen_table(rng: &mut R, opts: &Opts) -> Self { + let opts = opts.table.clone(); let name = Name::arbitrary(rng).0; - let columns = loop { - let large_table = rng.random_bool(0.1); - let column_size = if large_table { - rng.random_range(64..125) // todo: make this higher (128+) - } else { - rng.random_range(1..=10) - }; - let columns = (1..=column_size) - .map(|_| Column::arbitrary(rng)) - .collect::>(); - // TODO: see if there is a better way to detect duplicates here - let mut set = HashSet::with_capacity(columns.len()); - set.extend(columns.iter()); - // Has repeated column name inside so generate again - if set.len() != columns.len() { - continue; + let large_table = + opts.large_table.enable && rng.random_bool(opts.large_table.large_table_prob); + let column_size = if large_table { + rng.random_range(opts.large_table.column_range) + } else { + rng.random_range(opts.column_range) + } as usize; + let mut column_set = HashSet::with_capacity(column_size); + for col in std::iter::repeat_with(|| Column::arbitrary(rng)) { + column_set.insert(col); + if column_set.len() == column_size { + break; } - break columns; - }; + } Table { rows: Vec::new(), name, - columns, + columns: Vec::from_iter(column_set.into_iter()), indexes: vec![], } } } +impl Arbitrary for Table { + fn arbitrary(rng: &mut R) -> Self { + Table::gen_table(rng, &Opts::default()) + } +} + +impl ArbitraryContext for Table { + fn arbitrary_with_context(rng: &mut R, context: &C) -> Self { + Table::gen_table(rng, context.opts()) + } +} + impl Arbitrary for Column { fn arbitrary(rng: &mut R) -> Self { let name = Name::arbitrary(rng).0; From 9bc8bdb279bf70e73def09a61f4060052889718d Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Wed, 27 Aug 2025 13:03:18 -0300 Subject: [PATCH 06/20] all Arbitrary traits need to pass a GenerationContext --- sql_generation/generation/expr.rs | 97 +++++++---- sql_generation/generation/mod.rs | 56 ++++-- sql_generation/generation/opts.rs | 12 +- sql_generation/generation/predicate/binary.rs | 98 +++++++---- sql_generation/generation/predicate/mod.rs | 107 ++++++++---- sql_generation/generation/predicate/unary.rs | 72 +++++--- sql_generation/generation/query.rs | 160 +++++------------- sql_generation/generation/table.rs | 89 ++++++---- 8 files changed, 392 insertions(+), 299 deletions(-) diff --git a/sql_generation/generation/expr.rs b/sql_generation/generation/expr.rs index c07d81414..244bf6469 100644 --- a/sql_generation/generation/expr.rs +++ b/sql_generation/generation/expr.rs @@ -5,7 +5,7 @@ use turso_parser::ast::{ use crate::{ generation::{ frequency, gen_random_text, one_of, pick, pick_index, Arbitrary, ArbitraryFrom, - ArbitrarySizedFrom, GenerationContext, + ArbitrarySized, ArbitrarySizedFrom, GenerationContext, }, model::table::SimValue, }; @@ -14,8 +14,21 @@ impl Arbitrary for Box where T: Arbitrary, { - fn arbitrary(rng: &mut R) -> Self { - Box::from(T::arbitrary(rng)) + fn arbitrary(rng: &mut R, context: &C) -> Self { + Box::from(T::arbitrary(rng, context)) + } +} + +impl ArbitrarySized for Box +where + T: ArbitrarySized, +{ + fn arbitrary_sized( + rng: &mut R, + context: &C, + size: usize, + ) -> Self { + Box::from(T::arbitrary_sized(rng, context, size)) } } @@ -23,8 +36,13 @@ impl ArbitrarySizedFrom for Box where T: ArbitrarySizedFrom, { - fn arbitrary_sized_from(rng: &mut R, t: A, size: usize) -> Self { - Box::from(T::arbitrary_sized_from(rng, t, size)) + fn arbitrary_sized_from( + rng: &mut R, + context: &C, + t: A, + size: usize, + ) -> Self { + Box::from(T::arbitrary_sized_from(rng, context, t, size)) } } @@ -32,8 +50,8 @@ impl Arbitrary for Option where T: Arbitrary, { - fn arbitrary(rng: &mut R) -> Self { - rng.random_bool(0.5).then_some(T::arbitrary(rng)) + fn arbitrary(rng: &mut R, context: &C) -> Self { + rng.random_bool(0.5).then_some(T::arbitrary(rng, context)) } } @@ -41,9 +59,14 @@ impl ArbitrarySizedFrom for Option where T: ArbitrarySizedFrom, { - fn arbitrary_sized_from(rng: &mut R, t: A, size: usize) -> Self { + fn arbitrary_sized_from( + rng: &mut R, + context: &C, + t: A, + size: usize, + ) -> Self { rng.random_bool(0.5) - .then_some(T::arbitrary_sized_from(rng, t, size)) + .then_some(T::arbitrary_sized_from(rng, context, t, size)) } } @@ -51,20 +74,26 @@ impl ArbitraryFrom for Vec where T: ArbitraryFrom, { - fn arbitrary_from(rng: &mut R, t: A) -> Self { + fn arbitrary_from(rng: &mut R, context: &C, t: A) -> Self { let size = rng.random_range(0..5); - (0..size).map(|_| T::arbitrary_from(rng, t)).collect() + (0..size) + .map(|_| T::arbitrary_from(rng, context, t)) + .collect() } } // Freestyling generation -impl ArbitrarySizedFrom<&C> for Expr { - fn arbitrary_sized_from(rng: &mut R, t: &C, size: usize) -> Self { +impl ArbitrarySized for Expr { + fn arbitrary_sized( + rng: &mut R, + context: &C, + size: usize, + ) -> Self { frequency( vec![ ( 1, - Box::new(|rng| Expr::Literal(ast::Literal::arbitrary_from(rng, t))), + Box::new(|rng| Expr::Literal(ast::Literal::arbitrary(rng, context))), ), ( size, @@ -79,9 +108,9 @@ impl ArbitrarySizedFrom<&C> for Expr { // }), Box::new(|rng: &mut R| { Expr::Binary( - Box::arbitrary_sized_from(rng, t, size - 1), - Operator::arbitrary(rng), - Box::arbitrary_sized_from(rng, t, size - 1), + Box::arbitrary_sized(rng, context, size - 1), + Operator::arbitrary(rng, context), + Box::arbitrary_sized(rng, context, size - 1), ) }), // Box::new(|rng| Expr::Case { @@ -133,8 +162,8 @@ impl ArbitrarySizedFrom<&C> for Expr { // }) Box::new(|rng| { Expr::Unary( - UnaryOperator::arbitrary_from(rng, t), - Box::arbitrary_sized_from(rng, t, size - 1), + UnaryOperator::arbitrary(rng, context), + Box::arbitrary_sized(rng, context, size - 1), ) }), // TODO: skip Exists for now @@ -159,7 +188,7 @@ impl ArbitrarySizedFrom<&C> for Expr { } impl Arbitrary for Operator { - fn arbitrary(rng: &mut R) -> Self { + fn arbitrary(rng: &mut R, _context: &C) -> Self { let choices = [ Operator::Add, Operator::And, @@ -190,7 +219,7 @@ impl Arbitrary for Operator { } impl Arbitrary for Type { - fn arbitrary(rng: &mut R) -> Self { + fn arbitrary(rng: &mut R, _context: &C) -> Self { let name = pick(&["INT", "INTEGER", "REAL", "TEXT", "BLOB", "ANY"], rng).to_string(); Self { name, @@ -199,11 +228,11 @@ impl Arbitrary for Type { } } -impl ArbitraryFrom<&C> for QualifiedName { - fn arbitrary_from(rng: &mut R, t: &C) -> Self { +impl Arbitrary for QualifiedName { + fn arbitrary(rng: &mut R, context: &C) -> Self { // TODO: for now just generate table name - let table_idx = pick_index(t.tables().len(), rng); - let table = &t.tables()[table_idx]; + let table_idx = pick_index(context.tables().len(), rng); + let table = &context.tables()[table_idx]; // TODO: for now forego alias Self { db_name: None, @@ -213,8 +242,8 @@ impl ArbitraryFrom<&C> for QualifiedName { } } -impl ArbitraryFrom<&C> for LikeOperator { - fn arbitrary_from(rng: &mut R, _t: &C) -> Self { +impl Arbitrary for LikeOperator { + fn arbitrary(rng: &mut R, _t: &C) -> Self { let choice = rng.random_range(0..4); match choice { 0 => LikeOperator::Glob, @@ -227,8 +256,8 @@ impl ArbitraryFrom<&C> for LikeOperator { } // Current implementation does not take into account the columns affinity nor if table is Strict -impl ArbitraryFrom<&C> for ast::Literal { - fn arbitrary_from(rng: &mut R, _t: &C) -> Self { +impl Arbitrary for ast::Literal { + fn arbitrary(rng: &mut R, _t: &C) -> Self { loop { let choice = rng.random_range(0..5); let lit = match choice { @@ -255,7 +284,11 @@ impl ArbitraryFrom<&C> for ast::Literal { // Creates a litreal value impl ArbitraryFrom<&Vec<&SimValue>> for ast::Expr { - fn arbitrary_from(rng: &mut R, values: &Vec<&SimValue>) -> Self { + fn arbitrary_from( + rng: &mut R, + _context: &C, + values: &Vec<&SimValue>, + ) -> Self { if values.is_empty() { return Self::Literal(ast::Literal::Null); } @@ -265,8 +298,8 @@ impl ArbitraryFrom<&Vec<&SimValue>> for ast::Expr { } } -impl ArbitraryFrom<&C> for UnaryOperator { - fn arbitrary_from(rng: &mut R, _t: &C) -> Self { +impl Arbitrary for UnaryOperator { + fn arbitrary(rng: &mut R, _t: &C) -> Self { let choice = rng.random_range(0..4); match choice { 0 => Self::BitwiseNot, diff --git a/sql_generation/generation/mod.rs b/sql_generation/generation/mod.rs index 331dc65f4..18fa78021 100644 --- a/sql_generation/generation/mod.rs +++ b/sql_generation/generation/mod.rs @@ -19,7 +19,7 @@ type Choice<'a, R, T> = (usize, Box Option + 'a>); /// the possible values of the type, with a bias towards smaller values for /// practicality. pub trait Arbitrary { - fn arbitrary(rng: &mut R) -> Self; + fn arbitrary(rng: &mut R, context: &C) -> Self; } /// ArbitrarySized trait for generating random values of a specific size @@ -29,7 +29,8 @@ pub trait Arbitrary { /// must fit in the given size. This is useful for generating values that are /// constrained by a specific size, such as integers or strings. pub trait ArbitrarySized { - fn arbitrary_sized(rng: &mut R, size: usize) -> Self; + fn arbitrary_sized(rng: &mut R, context: &C, size: usize) + -> Self; } /// ArbitraryFrom trait for generating random values from a given value @@ -38,19 +39,7 @@ pub trait ArbitrarySized { /// such as generating an integer within an interval, or a value that fits in a table, /// or a predicate satisfying a given table row. pub trait ArbitraryFrom { - fn arbitrary_from(rng: &mut R, t: T) -> Self; -} - -pub trait ArbitraryContext { - fn arbitrary_with_context(rng: &mut R, context: &C) -> Self; -} - -pub trait ArbitraryContextFrom { - fn arbitrary_with_context_from( - rng: &mut R, - context: &C, - t: T, - ) -> Self; + fn arbitrary_from(rng: &mut R, context: &C, t: T) -> Self; } /// ArbitrarySizedFrom trait for generating random values from a given value @@ -62,12 +51,21 @@ pub trait ArbitraryContextFrom { /// This is useful for generating values that are constrained by a specific size, /// such as integers or strings, while still being dependent on the given value. pub trait ArbitrarySizedFrom { - fn arbitrary_sized_from(rng: &mut R, t: T, size: usize) -> Self; + fn arbitrary_sized_from( + rng: &mut R, + context: &C, + t: T, + size: usize, + ) -> Self; } /// ArbitraryFromMaybe trait for fallibally generating random values from a given value pub trait ArbitraryFromMaybe { - fn arbitrary_from_maybe(rng: &mut R, t: T) -> Option + fn arbitrary_from_maybe( + rng: &mut R, + context: &C, + t: T, + ) -> Option where Self: Sized; } @@ -187,3 +185,27 @@ where } picked } + +#[cfg(test)] +mod tests { + use crate::{ + generation::{GenerationContext, Opts}, + model::table::Table, + }; + + #[derive(Debug, Default, Clone)] + pub struct TestContext { + pub opts: Opts, + pub tables: Vec
, + } + + impl GenerationContext for TestContext { + fn tables(&self) -> &Vec
{ + &self.tables + } + + fn opts(&self) -> &Opts { + &self.opts + } + } +} diff --git a/sql_generation/generation/opts.rs b/sql_generation/generation/opts.rs index b61da6cdc..d978d54cd 100644 --- a/sql_generation/generation/opts.rs +++ b/sql_generation/generation/opts.rs @@ -4,6 +4,12 @@ use rand::distr::weighted::WeightedIndex; use crate::model::table::Table; +/// Trait used to provide context to generation functions +pub trait GenerationContext { + fn tables(&self) -> &Vec
; + fn opts(&self) -> &Opts; +} + #[derive(Debug, Clone)] pub struct Opts { /// Indexes enabled @@ -22,12 +28,6 @@ impl Default for Opts { } } -/// Trait used to provide context to generation functions -pub trait GenerationContext { - fn tables(&self) -> &Vec
; - fn opts(&self) -> &Opts; -} - #[derive(Debug, Clone)] pub struct TableOpts { pub large_table: LargeTableOpts, diff --git a/sql_generation/generation/predicate/binary.rs b/sql_generation/generation/predicate/binary.rs index 29c1727a9..a5901a9f8 100644 --- a/sql_generation/generation/predicate/binary.rs +++ b/sql_generation/generation/predicate/binary.rs @@ -7,7 +7,7 @@ use crate::{ backtrack, one_of, pick, predicate::{CompoundPredicate, SimplePredicate}, table::{GTValue, LTValue, LikeValue}, - ArbitraryFrom, ArbitraryFromMaybe as _, + ArbitraryFrom, ArbitraryFromMaybe as _, GenerationContext, }, model::{ query::predicate::Predicate, @@ -17,8 +17,9 @@ use crate::{ impl Predicate { /// Generate an [ast::Expr::Binary] [Predicate] from a column and [SimValue] - pub fn from_column_binary( + pub fn from_column_binary( rng: &mut R, + context: &C, column_name: &str, value: &SimValue, ) -> Predicate { @@ -32,7 +33,7 @@ impl Predicate { ) }), Box::new(|rng| { - let gt_value = GTValue::arbitrary_from(rng, value).0; + let gt_value = GTValue::arbitrary_from(rng, context, value).0; Expr::Binary( Box::new(Expr::Id(ast::Name::Ident(column_name.to_string()))), ast::Operator::Greater, @@ -40,7 +41,7 @@ impl Predicate { ) }), Box::new(|rng| { - let lt_value = LTValue::arbitrary_from(rng, value).0; + let lt_value = LTValue::arbitrary_from(rng, context, value).0; Expr::Binary( Box::new(Expr::Id(ast::Name::Ident(column_name.to_string()))), ast::Operator::Less, @@ -54,7 +55,12 @@ impl Predicate { } /// Produces a true [ast::Expr::Binary] [Predicate] that is true for the provided row in the given table - pub fn true_binary(rng: &mut R, t: &Table, row: &[SimValue]) -> Predicate { + pub fn true_binary( + rng: &mut R, + context: &C, + t: &Table, + row: &[SimValue], + ) -> Predicate { // Pick a column let column_index = rng.random_range(0..t.columns.len()); let mut column = t.columns[column_index].clone(); @@ -93,7 +99,7 @@ impl Predicate { ( 1, Box::new(|rng| { - let v = SimValue::arbitrary_from(rng, &column.column_type); + let v = SimValue::arbitrary_from(rng, context, &column.column_type); if &v == value { None } else { @@ -111,7 +117,7 @@ impl Predicate { ( 1, Box::new(|rng| { - let lt_value = LTValue::arbitrary_from(rng, value).0; + let lt_value = LTValue::arbitrary_from(rng, context, value).0; Some(Expr::Binary( Box::new(ast::Expr::Qualified( ast::Name::new(&table_name), @@ -125,7 +131,7 @@ impl Predicate { ( 1, Box::new(|rng| { - let gt_value = GTValue::arbitrary_from(rng, value).0; + let gt_value = GTValue::arbitrary_from(rng, context, value).0; Some(Expr::Binary( Box::new(ast::Expr::Qualified( ast::Name::new(&table_name), @@ -140,7 +146,7 @@ impl Predicate { 1, Box::new(|rng| { // TODO: generation for Like and Glob expressions should be extracted to different module - LikeValue::arbitrary_from_maybe(rng, value).map(|like| { + LikeValue::arbitrary_from_maybe(rng, context, value).map(|like| { Expr::Like { lhs: Box::new(ast::Expr::Qualified( ast::Name::new(&table_name), @@ -162,7 +168,12 @@ impl Predicate { } /// Produces an [ast::Expr::Binary] [Predicate] that is false for the provided row in the given table - pub fn false_binary(rng: &mut R, t: &Table, row: &[SimValue]) -> Predicate { + pub fn false_binary( + rng: &mut R, + context: &C, + t: &Table, + row: &[SimValue], + ) -> Predicate { // Pick a column let column_index = rng.random_range(0..t.columns.len()); let mut column = t.columns[column_index].clone(); @@ -197,7 +208,7 @@ impl Predicate { }), Box::new(|rng| { let v = loop { - let v = SimValue::arbitrary_from(rng, &column.column_type); + let v = SimValue::arbitrary_from(rng, context, &column.column_type); if &v != value { break v; } @@ -212,7 +223,7 @@ impl Predicate { ) }), Box::new(|rng| { - let gt_value = GTValue::arbitrary_from(rng, value).0; + let gt_value = GTValue::arbitrary_from(rng, context, value).0; Expr::Binary( Box::new(ast::Expr::Qualified( ast::Name::new(&table_name), @@ -223,7 +234,7 @@ impl Predicate { ) }), Box::new(|rng| { - let lt_value = LTValue::arbitrary_from(rng, value).0; + let lt_value = LTValue::arbitrary_from(rng, context, value).0; Expr::Binary( Box::new(ast::Expr::Qualified( ast::Name::new(&table_name), @@ -242,8 +253,9 @@ impl Predicate { impl SimplePredicate { /// Generates a true [ast::Expr::Binary] [SimplePredicate] from a [TableContext] for a row in the table - pub fn true_binary( + pub fn true_binary( rng: &mut R, + context: &C, table: &T, row: &[SimValue], ) -> Self { @@ -271,7 +283,7 @@ impl SimplePredicate { ) }), Box::new(|rng| { - let lt_value = LTValue::arbitrary_from(rng, column_value).0; + let lt_value = LTValue::arbitrary_from(rng, context, column_value).0; Expr::Binary( Box::new(Expr::Qualified( ast::Name::new(table_name), @@ -282,7 +294,7 @@ impl SimplePredicate { ) }), Box::new(|rng| { - let gt_value = GTValue::arbitrary_from(rng, column_value).0; + let gt_value = GTValue::arbitrary_from(rng, context, column_value).0; Expr::Binary( Box::new(Expr::Qualified( ast::Name::new(table_name), @@ -299,8 +311,9 @@ impl SimplePredicate { } /// Generates a false [ast::Expr::Binary] [SimplePredicate] from a [TableContext] for a row in the table - pub fn false_binary( + pub fn false_binary( rng: &mut R, + context: &C, table: &T, row: &[SimValue], ) -> Self { @@ -328,7 +341,7 @@ impl SimplePredicate { ) }), Box::new(|rng| { - let gt_value = GTValue::arbitrary_from(rng, column_value).0; + let gt_value = GTValue::arbitrary_from(rng, context, column_value).0; Expr::Binary( Box::new(ast::Expr::Qualified( ast::Name::new(table_name), @@ -339,7 +352,7 @@ impl SimplePredicate { ) }), Box::new(|rng| { - let lt_value = LTValue::arbitrary_from(rng, column_value).0; + let lt_value = LTValue::arbitrary_from(rng, context, column_value).0; Expr::Binary( Box::new(ast::Expr::Qualified( ast::Name::new(table_name), @@ -360,8 +373,9 @@ impl CompoundPredicate { /// Decide if you want to create an AND or an OR /// /// Creates a Compound Predicate that is TRUE or FALSE for at least a single row - pub fn from_table_binary( + pub fn from_table_binary( rng: &mut R, + context: &C, table: &T, predicate_value: bool, ) -> Self { @@ -381,7 +395,7 @@ impl CompoundPredicate { // An AND for false requires at least one of its children to be false if predicate_value { (0..rng.random_range(1..=3)) - .map(|_| SimplePredicate::arbitrary_from(rng, (table, row, true)).0) + .map(|_| SimplePredicate::arbitrary_from(rng, context, (table, row, true)).0) .reduce(|accum, curr| { Predicate(Expr::Binary( Box::new(accum.0), @@ -405,7 +419,7 @@ impl CompoundPredicate { booleans .iter() - .map(|b| SimplePredicate::arbitrary_from(rng, (table, row, *b)).0) + .map(|b| SimplePredicate::arbitrary_from(rng, context, (table, row, *b)).0) .reduce(|accum, curr| { Predicate(Expr::Binary( Box::new(accum.0), @@ -431,7 +445,7 @@ impl CompoundPredicate { booleans .iter() - .map(|b| SimplePredicate::arbitrary_from(rng, (table, row, *b)).0) + .map(|b| SimplePredicate::arbitrary_from(rng, context, (table, row, *b)).0) .reduce(|accum, curr| { Predicate(Expr::Binary( Box::new(accum.0), @@ -442,7 +456,7 @@ impl CompoundPredicate { .unwrap_or(Predicate::true_()) } else { (0..rng.random_range(1..=3)) - .map(|_| SimplePredicate::arbitrary_from(rng, (table, row, false)).0) + .map(|_| SimplePredicate::arbitrary_from(rng, context, (table, row, false)).0) .reduce(|accum, curr| { Predicate(Expr::Binary( Box::new(accum.0), @@ -463,7 +477,9 @@ mod tests { use rand_chacha::ChaCha8Rng; use crate::{ - generation::{pick, predicate::SimplePredicate, Arbitrary, ArbitraryFrom as _}, + generation::{ + pick, predicate::SimplePredicate, tests::TestContext, Arbitrary, ArbitraryFrom as _, + }, model::{ query::predicate::{expr_to_value, Predicate}, table::{SimValue, Table}, @@ -481,20 +497,22 @@ mod tests { fn fuzz_true_binary_predicate() { let seed = get_seed(); let mut rng = ChaCha8Rng::seed_from_u64(seed); + let context = &TestContext::default(); + for _ in 0..10000 { - let table = Table::arbitrary(&mut rng); + let table = Table::arbitrary(&mut rng, context); let num_rows = rng.random_range(1..10); let values: Vec> = (0..num_rows) .map(|_| { table .columns .iter() - .map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type)) + .map(|c| SimValue::arbitrary_from(&mut rng, context, &c.column_type)) .collect() }) .collect(); let row = pick(&values, &mut rng); - let predicate = Predicate::true_binary(&mut rng, &table, row); + let predicate = Predicate::true_binary(&mut rng, context, &table, row); let value = expr_to_value(&predicate.0, row, &table); assert!( value.as_ref().is_some_and(|value| value.as_bool()), @@ -507,20 +525,22 @@ mod tests { fn fuzz_false_binary_predicate() { let seed = get_seed(); let mut rng = ChaCha8Rng::seed_from_u64(seed); + let context = &TestContext::default(); + for _ in 0..10000 { - let table = Table::arbitrary(&mut rng); + let table = Table::arbitrary(&mut rng, context); let num_rows = rng.random_range(1..10); let values: Vec> = (0..num_rows) .map(|_| { table .columns .iter() - .map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type)) + .map(|c| SimValue::arbitrary_from(&mut rng, context, &c.column_type)) .collect() }) .collect(); let row = pick(&values, &mut rng); - let predicate = Predicate::false_binary(&mut rng, &table, row); + let predicate = Predicate::false_binary(&mut rng, context, &table, row); let value = expr_to_value(&predicate.0, row, &table); assert!( !value.as_ref().is_some_and(|value| value.as_bool()), @@ -533,21 +553,23 @@ mod tests { fn fuzz_true_binary_simple_predicate() { let seed = get_seed(); let mut rng = ChaCha8Rng::seed_from_u64(seed); + let context = &TestContext::default(); + for _ in 0..10000 { - let mut table = Table::arbitrary(&mut rng); + let mut table = Table::arbitrary(&mut rng, context); let num_rows = rng.random_range(1..10); let values: Vec> = (0..num_rows) .map(|_| { table .columns .iter() - .map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type)) + .map(|c| SimValue::arbitrary_from(&mut rng, context, &c.column_type)) .collect() }) .collect(); table.rows.extend(values.clone()); let row = pick(&table.rows, &mut rng); - let predicate = SimplePredicate::true_binary(&mut rng, &table, row); + let predicate = SimplePredicate::true_binary(&mut rng, context, &table, row); let result = values .iter() .map(|row| predicate.0.test(row, &table)) @@ -561,21 +583,23 @@ mod tests { fn fuzz_false_binary_simple_predicate() { let seed = get_seed(); let mut rng = ChaCha8Rng::seed_from_u64(seed); + let context = &TestContext::default(); + for _ in 0..10000 { - let mut table = Table::arbitrary(&mut rng); + let mut table = Table::arbitrary(&mut rng, context); let num_rows = rng.random_range(1..10); let values: Vec> = (0..num_rows) .map(|_| { table .columns .iter() - .map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type)) + .map(|c| SimValue::arbitrary_from(&mut rng, context, &c.column_type)) .collect() }) .collect(); table.rows.extend(values.clone()); let row = pick(&table.rows, &mut rng); - let predicate = SimplePredicate::false_binary(&mut rng, &table, row); + let predicate = SimplePredicate::false_binary(&mut rng, context, &table, row); let result = values .iter() .map(|row| predicate.0.test(row, &table)) diff --git a/sql_generation/generation/predicate/mod.rs b/sql_generation/generation/predicate/mod.rs index b919ad0bd..78fa30ae4 100644 --- a/sql_generation/generation/predicate/mod.rs +++ b/sql_generation/generation/predicate/mod.rs @@ -1,9 +1,12 @@ use rand::{seq::SliceRandom as _, Rng}; use turso_parser::ast::{self, Expr}; -use crate::model::{ - query::predicate::Predicate, - table::{SimValue, Table, TableContext}, +use crate::{ + generation::GenerationContext, + model::{ + query::predicate::Predicate, + table::{SimValue, Table, TableContext}, + }, }; use super::{one_of, ArbitraryFrom}; @@ -18,20 +21,24 @@ struct CompoundPredicate(Predicate); struct SimplePredicate(Predicate); impl, T: TableContext> ArbitraryFrom<(&T, A, bool)> for SimplePredicate { - fn arbitrary_from(rng: &mut R, (table, row, predicate_value): (&T, A, bool)) -> Self { + fn arbitrary_from( + rng: &mut R, + context: &C, + (table, row, predicate_value): (&T, A, bool), + ) -> Self { let row = row.as_ref(); // Pick an operator let choice = rng.random_range(0..2); // Pick an operator match predicate_value { true => match choice { - 0 => SimplePredicate::true_binary(rng, table, row), - 1 => SimplePredicate::true_unary(rng, table, row), + 0 => SimplePredicate::true_binary(rng, context, table, row), + 1 => SimplePredicate::true_unary(rng, context, table, row), _ => unreachable!(), }, false => match choice { - 0 => SimplePredicate::false_binary(rng, table, row), - 1 => SimplePredicate::false_unary(rng, table, row), + 0 => SimplePredicate::false_binary(rng, context, table, row), + 1 => SimplePredicate::false_unary(rng, context, table, row), _ => unreachable!(), }, } @@ -39,43 +46,59 @@ impl, T: TableContext> ArbitraryFrom<(&T, A, bool)> for Sim } impl ArbitraryFrom<(&T, bool)> for CompoundPredicate { - fn arbitrary_from(rng: &mut R, (table, predicate_value): (&T, bool)) -> Self { - CompoundPredicate::from_table_binary(rng, table, predicate_value) + fn arbitrary_from( + rng: &mut R, + context: &C, + (table, predicate_value): (&T, bool), + ) -> Self { + CompoundPredicate::from_table_binary(rng, context, table, predicate_value) } } impl ArbitraryFrom<&T> for Predicate { - fn arbitrary_from(rng: &mut R, table: &T) -> Self { + fn arbitrary_from(rng: &mut R, context: &C, table: &T) -> Self { let predicate_value = rng.random_bool(0.5); - Predicate::arbitrary_from(rng, (table, predicate_value)).parens() + Predicate::arbitrary_from(rng, context, (table, predicate_value)).parens() } } impl ArbitraryFrom<(&T, bool)> for Predicate { - fn arbitrary_from(rng: &mut R, (table, predicate_value): (&T, bool)) -> Self { - CompoundPredicate::arbitrary_from(rng, (table, predicate_value)).0 + fn arbitrary_from( + rng: &mut R, + context: &C, + (table, predicate_value): (&T, bool), + ) -> Self { + CompoundPredicate::arbitrary_from(rng, context, (table, predicate_value)).0 } } impl ArbitraryFrom<(&str, &SimValue)> for Predicate { - fn arbitrary_from(rng: &mut R, (column_name, value): (&str, &SimValue)) -> Self { - Predicate::from_column_binary(rng, column_name, value) + fn arbitrary_from( + rng: &mut R, + context: &C, + (column_name, value): (&str, &SimValue), + ) -> Self { + Predicate::from_column_binary(rng, context, column_name, value) } } impl ArbitraryFrom<(&Table, &Vec)> for Predicate { - fn arbitrary_from(rng: &mut R, (t, row): (&Table, &Vec)) -> Self { + fn arbitrary_from( + rng: &mut R, + context: &C, + (t, row): (&Table, &Vec), + ) -> Self { // We want to produce a predicate that is true for the row // We can do this by creating several predicates that // are true, some that are false, combiend them in ways that correspond to the creation of a true predicate // Produce some true and false predicates let mut true_predicates = (1..=rng.random_range(1..=4)) - .map(|_| Predicate::true_binary(rng, t, row)) + .map(|_| Predicate::true_binary(rng, context, t, row)) .collect::>(); let false_predicates = (0..=rng.random_range(0..=3)) - .map(|_| Predicate::false_binary(rng, t, row)) + .map(|_| Predicate::false_binary(rng, context, t, row)) .collect::>(); // Start building a top level predicate from a true predicate @@ -231,7 +254,9 @@ mod tests { use rand_chacha::ChaCha8Rng; use crate::{ - generation::{pick, predicate::SimplePredicate, Arbitrary, ArbitraryFrom as _}, + generation::{ + pick, predicate::SimplePredicate, tests::TestContext, Arbitrary, ArbitraryFrom as _, + }, model::{ query::predicate::{expr_to_value, Predicate}, table::{SimValue, Table}, @@ -249,20 +274,23 @@ mod tests { fn fuzz_arbitrary_table_true_simple_predicate() { let seed = get_seed(); let mut rng = ChaCha8Rng::seed_from_u64(seed); + let context = &TestContext::default(); + for _ in 0..10000 { - let table = Table::arbitrary(&mut rng); + let table = Table::arbitrary(&mut rng, context); let num_rows = rng.random_range(1..10); let values: Vec> = (0..num_rows) .map(|_| { table .columns .iter() - .map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type)) + .map(|c| SimValue::arbitrary_from(&mut rng, context, &c.column_type)) .collect() }) .collect(); let row = pick(&values, &mut rng); - let predicate = SimplePredicate::arbitrary_from(&mut rng, (&table, row, true)).0; + let predicate = + SimplePredicate::arbitrary_from(&mut rng, context, (&table, row, true)).0; let value = expr_to_value(&predicate.0, row, &table); assert!( value.as_ref().is_some_and(|value| value.as_bool()), @@ -275,20 +303,23 @@ mod tests { fn fuzz_arbitrary_table_false_simple_predicate() { let seed = get_seed(); let mut rng = ChaCha8Rng::seed_from_u64(seed); + let context = &TestContext::default(); + for _ in 0..10000 { - let table = Table::arbitrary(&mut rng); + let table = Table::arbitrary(&mut rng, context); let num_rows = rng.random_range(1..10); let values: Vec> = (0..num_rows) .map(|_| { table .columns .iter() - .map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type)) + .map(|c| SimValue::arbitrary_from(&mut rng, context, &c.column_type)) .collect() }) .collect(); let row = pick(&values, &mut rng); - let predicate = SimplePredicate::arbitrary_from(&mut rng, (&table, row, false)).0; + let predicate = + SimplePredicate::arbitrary_from(&mut rng, context, (&table, row, false)).0; let value = expr_to_value(&predicate.0, row, &table); assert!( !value.as_ref().is_some_and(|value| value.as_bool()), @@ -301,20 +332,22 @@ mod tests { fn fuzz_arbitrary_row_table_predicate() { let seed = get_seed(); let mut rng = ChaCha8Rng::seed_from_u64(seed); + let context = &TestContext::default(); + for _ in 0..10000 { - let table = Table::arbitrary(&mut rng); + let table = Table::arbitrary(&mut rng, context); let num_rows = rng.random_range(1..10); let values: Vec> = (0..num_rows) .map(|_| { table .columns .iter() - .map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type)) + .map(|c| SimValue::arbitrary_from(&mut rng, context, &c.column_type)) .collect() }) .collect(); let row = pick(&values, &mut rng); - let predicate = Predicate::arbitrary_from(&mut rng, (&table, row)); + let predicate = Predicate::arbitrary_from(&mut rng, context, (&table, row)); let value = expr_to_value(&predicate.0, row, &table); assert!( value.as_ref().is_some_and(|value| value.as_bool()), @@ -327,20 +360,22 @@ mod tests { fn fuzz_arbitrary_true_table_predicate() { let seed = get_seed(); let mut rng = ChaCha8Rng::seed_from_u64(seed); + let context = &TestContext::default(); + for _ in 0..10000 { - let mut table = Table::arbitrary(&mut rng); + let mut table = Table::arbitrary(&mut rng, context); let num_rows = rng.random_range(1..10); let values: Vec> = (0..num_rows) .map(|_| { table .columns .iter() - .map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type)) + .map(|c| SimValue::arbitrary_from(&mut rng, context, &c.column_type)) .collect() }) .collect(); table.rows.extend(values.clone()); - let predicate = Predicate::arbitrary_from(&mut rng, (&table, true)); + let predicate = Predicate::arbitrary_from(&mut rng, context, (&table, true)); let result = values .iter() .map(|row| predicate.test(row, &table)) @@ -354,20 +389,22 @@ mod tests { fn fuzz_arbitrary_false_table_predicate() { let seed = get_seed(); let mut rng = ChaCha8Rng::seed_from_u64(seed); + let context = &TestContext::default(); + for _ in 0..10000 { - let mut table = Table::arbitrary(&mut rng); + let mut table = Table::arbitrary(&mut rng, context); let num_rows = rng.random_range(1..10); let values: Vec> = (0..num_rows) .map(|_| { table .columns .iter() - .map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type)) + .map(|c| SimValue::arbitrary_from(&mut rng, context, &c.column_type)) .collect() }) .collect(); table.rows.extend(values.clone()); - let predicate = Predicate::arbitrary_from(&mut rng, (&table, false)); + let predicate = Predicate::arbitrary_from(&mut rng, context, (&table, false)); let result = values .iter() .map(|row| predicate.test(row, &table)) diff --git a/sql_generation/generation/predicate/unary.rs b/sql_generation/generation/predicate/unary.rs index 62c6d7d65..bfcd1cff0 100644 --- a/sql_generation/generation/predicate/unary.rs +++ b/sql_generation/generation/predicate/unary.rs @@ -5,7 +5,9 @@ use turso_parser::ast::{self, Expr}; use crate::{ - generation::{backtrack, pick, predicate::SimplePredicate, ArbitraryFromMaybe}, + generation::{ + backtrack, pick, predicate::SimplePredicate, ArbitraryFromMaybe, GenerationContext, + }, model::{ query::predicate::Predicate, table::{SimValue, TableContext}, @@ -15,7 +17,11 @@ use crate::{ pub struct TrueValue(pub SimValue); impl ArbitraryFromMaybe<&SimValue> for TrueValue { - fn arbitrary_from_maybe(_rng: &mut R, value: &SimValue) -> Option + fn arbitrary_from_maybe( + _rng: &mut R, + _context: &C, + value: &SimValue, + ) -> Option where Self: Sized, { @@ -25,7 +31,11 @@ impl ArbitraryFromMaybe<&SimValue> for TrueValue { } impl ArbitraryFromMaybe<&Vec<&SimValue>> for TrueValue { - fn arbitrary_from_maybe(rng: &mut R, values: &Vec<&SimValue>) -> Option + fn arbitrary_from_maybe( + rng: &mut R, + context: &C, + values: &Vec<&SimValue>, + ) -> Option where Self: Sized, { @@ -34,14 +44,18 @@ impl ArbitraryFromMaybe<&Vec<&SimValue>> for TrueValue { } let value = pick(values, rng); - Self::arbitrary_from_maybe(rng, *value) + Self::arbitrary_from_maybe(rng, context, *value) } } pub struct FalseValue(pub SimValue); impl ArbitraryFromMaybe<&SimValue> for FalseValue { - fn arbitrary_from_maybe(_rng: &mut R, value: &SimValue) -> Option + fn arbitrary_from_maybe( + _rng: &mut R, + _context: &C, + value: &SimValue, + ) -> Option where Self: Sized, { @@ -51,7 +65,11 @@ impl ArbitraryFromMaybe<&SimValue> for FalseValue { } impl ArbitraryFromMaybe<&Vec<&SimValue>> for FalseValue { - fn arbitrary_from_maybe(rng: &mut R, values: &Vec<&SimValue>) -> Option + fn arbitrary_from_maybe( + rng: &mut R, + context: &C, + values: &Vec<&SimValue>, + ) -> Option where Self: Sized, { @@ -60,7 +78,7 @@ impl ArbitraryFromMaybe<&Vec<&SimValue>> for FalseValue { } let value = pick(values, rng); - Self::arbitrary_from_maybe(rng, *value) + Self::arbitrary_from_maybe(rng, context, *value) } } @@ -68,8 +86,9 @@ impl ArbitraryFromMaybe<&Vec<&SimValue>> for FalseValue { pub struct BitNotValue(pub SimValue); impl ArbitraryFromMaybe<(&SimValue, bool)> for BitNotValue { - fn arbitrary_from_maybe( + fn arbitrary_from_maybe( _rng: &mut R, + _context: &C, (value, predicate): (&SimValue, bool), ) -> Option where @@ -82,8 +101,9 @@ impl ArbitraryFromMaybe<(&SimValue, bool)> for BitNotValue { } impl ArbitraryFromMaybe<(&Vec<&SimValue>, bool)> for BitNotValue { - fn arbitrary_from_maybe( + fn arbitrary_from_maybe( rng: &mut R, + context: &C, (values, predicate): (&Vec<&SimValue>, bool), ) -> Option where @@ -94,15 +114,16 @@ impl ArbitraryFromMaybe<(&Vec<&SimValue>, bool)> for BitNotValue { } let value = pick(values, rng); - Self::arbitrary_from_maybe(rng, (*value, predicate)) + Self::arbitrary_from_maybe(rng, context, (*value, predicate)) } } // TODO: have some more complex generation with columns names here as well impl SimplePredicate { /// Generates a true [ast::Expr::Unary] [SimplePredicate] from a [TableContext] for some values in the table - pub fn true_unary( + pub fn true_unary( rng: &mut R, + context: &C, table: &T, row: &[SimValue], ) -> Self { @@ -120,7 +141,7 @@ impl SimplePredicate { ( num_retries, Box::new(|rng| { - TrueValue::arbitrary_from_maybe(rng, column_value).map(|value| { + TrueValue::arbitrary_from_maybe(rng, context, column_value).map(|value| { assert!(value.0.as_bool()); // Positive is a no-op in Sqlite Expr::unary(ast::UnaryOperator::Positive, Expr::Literal(value.0.into())) @@ -151,7 +172,7 @@ impl SimplePredicate { ( num_retries, Box::new(|rng| { - FalseValue::arbitrary_from_maybe(rng, column_value).map(|value| { + FalseValue::arbitrary_from_maybe(rng, context, column_value).map(|value| { assert!(!value.0.as_bool()); Expr::unary(ast::UnaryOperator::Not, Expr::Literal(value.0.into())) }) @@ -167,8 +188,9 @@ impl SimplePredicate { } /// Generates a false [ast::Expr::Unary] [SimplePredicate] from a [TableContext] for a row in the table - pub fn false_unary( + pub fn false_unary( rng: &mut R, + context: &C, table: &T, row: &[SimValue], ) -> Self { @@ -217,7 +239,7 @@ impl SimplePredicate { ( num_retries, Box::new(|rng| { - TrueValue::arbitrary_from_maybe(rng, column_value).map(|value| { + TrueValue::arbitrary_from_maybe(rng, context, column_value).map(|value| { assert!(value.0.as_bool()); Expr::unary(ast::UnaryOperator::Not, Expr::Literal(value.0.into())) }) @@ -239,7 +261,9 @@ mod tests { use rand_chacha::ChaCha8Rng; use crate::{ - generation::{pick, predicate::SimplePredicate, Arbitrary, ArbitraryFrom as _}, + generation::{ + pick, predicate::SimplePredicate, tests::TestContext, Arbitrary, ArbitraryFrom as _, + }, model::table::{SimValue, Table}, }; @@ -254,21 +278,23 @@ mod tests { fn fuzz_true_unary_simple_predicate() { let seed = get_seed(); let mut rng = ChaCha8Rng::seed_from_u64(seed); + let context = &TestContext::default(); + for _ in 0..10000 { - let mut table = Table::arbitrary(&mut rng); + let mut table = Table::arbitrary(&mut rng, context); let num_rows = rng.random_range(1..10); let values: Vec> = (0..num_rows) .map(|_| { table .columns .iter() - .map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type)) + .map(|c| SimValue::arbitrary_from(&mut rng, context, &c.column_type)) .collect() }) .collect(); table.rows.extend(values.clone()); let row = pick(&table.rows, &mut rng); - let predicate = SimplePredicate::true_unary(&mut rng, &table, row); + let predicate = SimplePredicate::true_unary(&mut rng, context, &table, row); let result = values .iter() .map(|row| predicate.0.test(row, &table)) @@ -282,21 +308,23 @@ mod tests { fn fuzz_false_unary_simple_predicate() { let seed = get_seed(); let mut rng = ChaCha8Rng::seed_from_u64(seed); + let context = &TestContext::default(); + for _ in 0..10000 { - let mut table = Table::arbitrary(&mut rng); + let mut table = Table::arbitrary(&mut rng, context); let num_rows = rng.random_range(1..10); let values: Vec> = (0..num_rows) .map(|_| { table .columns .iter() - .map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type)) + .map(|c| SimValue::arbitrary_from(&mut rng, context, &c.column_type)) .collect() }) .collect(); table.rows.extend(values.clone()); let row = pick(&table.rows, &mut rng); - let predicate = SimplePredicate::false_unary(&mut rng, &table, row); + let predicate = SimplePredicate::false_unary(&mut rng, context, &table, row); let result = values .iter() .map(|row| predicate.0.test(row, &table)) diff --git a/sql_generation/generation/query.rs b/sql_generation/generation/query.rs index f3729bf9e..ba0f9aeec 100644 --- a/sql_generation/generation/query.rs +++ b/sql_generation/generation/query.rs @@ -1,6 +1,6 @@ use crate::generation::{ - gen_random_text, pick_n_unique, pick_unique, Arbitrary, ArbitraryContext, ArbitraryContextFrom, - ArbitraryFrom, ArbitrarySizedFrom, GenerationContext, + gen_random_text, pick_n_unique, pick_unique, Arbitrary, ArbitraryFrom, ArbitrarySized, + GenerationContext, }; use crate::model::query::predicate::Predicate; use crate::model::query::select::{ @@ -17,32 +17,20 @@ use turso_parser::ast::{Expr, SortOrder}; use super::{backtrack, pick}; impl Arbitrary for Create { - fn arbitrary(rng: &mut R) -> Self { + fn arbitrary(rng: &mut R, context: &C) -> Self { Create { - table: Table::arbitrary(rng), + table: Table::arbitrary(rng, context), } } } -impl ArbitraryContext for Create { - fn arbitrary_with_context(rng: &mut R, context: &C) -> Self { - Create { - table: Table::arbitrary_with_context(rng, context), - } - } -} - -impl ArbitraryContextFrom<&Vec
> for FromClause { - fn arbitrary_with_context_from( - rng: &mut R, - context: &C, - tables: &Vec
, - ) -> Self { +impl Arbitrary for FromClause { + fn arbitrary(rng: &mut R, context: &C) -> Self { let opts = &context.opts().query.from_clause; let weights = opts.as_weighted_index(); let num_joins = opts.joins[rng.sample(weights)].num_joins; - let mut tables = tables.clone(); + let mut tables = context.tables().clone(); let mut table = pick(&tables, rng).clone(); tables.retain(|t| t.name != table.name); @@ -83,7 +71,7 @@ impl ArbitraryContextFrom<&Vec
> for FromClause { ); } - let predicate = Predicate::arbitrary_from(rng, &table); + let predicate = Predicate::arbitrary_from(rng, context, &table); Some(JoinedTable { table: joined_table_name, join_type: JoinType::Inner, @@ -95,71 +83,9 @@ impl ArbitraryContextFrom<&Vec
> for FromClause { } } -impl ArbitraryFrom<&Vec
> for FromClause { - fn arbitrary_from(rng: &mut R, tables: &Vec
) -> Self { - let num_joins = match rng.random_range(0..=100) { - 0..=90 => 0, - 91..=97 => 1, - 98..=100 => 2, - _ => unreachable!(), - }; - - let mut tables = tables.clone(); - let mut table = pick(&tables, rng).clone(); - - tables.retain(|t| t.name != table.name); - - let name = table.name.clone(); - - let mut table_context = JoinTable { - tables: Vec::new(), - rows: Vec::new(), - }; - - let joins: Vec<_> = (0..num_joins) - .filter_map(|_| { - if tables.is_empty() { - return None; - } - let join_table = pick(&tables, rng).clone(); - let joined_table_name = join_table.name.clone(); - - tables.retain(|t| t.name != join_table.name); - table_context.rows = table_context - .rows - .iter() - .cartesian_product(join_table.rows.iter()) - .map(|(t_row, j_row)| { - let mut row = t_row.clone(); - row.extend(j_row.clone()); - row - }) - .collect(); - // TODO: inneficient. use a Deque to push_front? - table_context.tables.insert(0, join_table); - for row in &mut table.rows { - assert_eq!( - row.len(), - table.columns.len(), - "Row length does not match column length after join" - ); - } - - let predicate = Predicate::arbitrary_from(rng, &table); - Some(JoinedTable { - table: joined_table_name, - join_type: JoinType::Inner, - on: predicate, - }) - }) - .collect(); - FromClause { table: name, joins } - } -} - -impl ArbitraryFrom<&C> for SelectInner { - fn arbitrary_from(rng: &mut R, env: &C) -> Self { - let from = FromClause::arbitrary_from(rng, env.tables()); +impl Arbitrary for SelectInner { + fn arbitrary(rng: &mut R, env: &C) -> Self { + let from = FromClause::arbitrary(rng, env); let tables = env.tables().clone(); let join_table = from.into_join_table(&tables); let cuml_col_count = join_table.columns().count(); @@ -205,21 +131,25 @@ impl ArbitraryFrom<&C> for SelectInner { SelectInner { distinctness: if env.opts().indexes { - Distinctness::arbitrary(rng) + Distinctness::arbitrary(rng, env) } else { Distinctness::All }, columns: vec![ResultColumn::Star], from: Some(from), - where_clause: Predicate::arbitrary_from(rng, &join_table), + where_clause: Predicate::arbitrary_from(rng, env, &join_table), order_by, } } } -impl ArbitrarySizedFrom<&C> for SelectInner { - fn arbitrary_sized_from(rng: &mut R, env: &C, num_result_columns: usize) -> Self { - let mut select_inner = SelectInner::arbitrary_from(rng, env); +impl ArbitrarySized for SelectInner { + fn arbitrary_sized( + rng: &mut R, + env: &C, + num_result_columns: usize, + ) -> Self { + let mut select_inner = SelectInner::arbitrary(rng, env); let select_from = &select_inner.from.as_ref().unwrap(); let table_names = select_from .joins @@ -251,7 +181,7 @@ impl ArbitrarySizedFrom<&C> for SelectInner { } impl Arbitrary for Distinctness { - fn arbitrary(rng: &mut R) -> Self { + fn arbitrary(rng: &mut R, _context: &C) -> Self { match rng.random_range(0..=5) { 0..4 => Distinctness::All, _ => Distinctness::Distinct, @@ -259,7 +189,7 @@ impl Arbitrary for Distinctness { } } impl Arbitrary for CompoundOperator { - fn arbitrary(rng: &mut R) -> Self { + fn arbitrary(rng: &mut R, _context: &C) -> Self { match rng.random_range(0..=1) { 0 => CompoundOperator::Union, 1 => CompoundOperator::UnionAll, @@ -273,16 +203,16 @@ impl Arbitrary for CompoundOperator { /// arbitrary expressions without referring to the tables. pub struct SelectFree(pub Select); -impl ArbitraryFrom<&C> for SelectFree { - fn arbitrary_from(rng: &mut R, env: &C) -> Self { - let expr = Predicate(Expr::arbitrary_sized_from(rng, env, 8)); +impl Arbitrary for SelectFree { + fn arbitrary(rng: &mut R, env: &C) -> Self { + let expr = Predicate(Expr::arbitrary_sized(rng, env, 8)); let select = Select::expr(expr); Self(select) } } -impl ArbitraryFrom<&C> for Select { - fn arbitrary_from(rng: &mut R, env: &C) -> Self { +impl Arbitrary for Select { + fn arbitrary(rng: &mut R, env: &C) -> Self { // Generate a number of selects based on the query size // If experimental indexes are enabled, we can have selects with compounds // Otherwise, we just have a single select with no compounds @@ -302,10 +232,10 @@ impl ArbitraryFrom<&C> for Select { let num_result_columns = rng.random_range(1..=min_column_count_across_tables); - let mut first = SelectInner::arbitrary_sized_from(rng, env, num_result_columns); + let mut first = SelectInner::arbitrary_sized(rng, env, num_result_columns); let mut rest: Vec = (0..num_compound_selects) - .map(|_| SelectInner::arbitrary_sized_from(rng, env, num_result_columns)) + .map(|_| SelectInner::arbitrary_sized(rng, env, num_result_columns)) .collect(); if !rest.is_empty() { @@ -322,7 +252,7 @@ impl ArbitraryFrom<&C> for Select { compounds: rest .into_iter() .map(|s| CompoundSelect { - operator: CompoundOperator::arbitrary(rng), + operator: CompoundOperator::arbitrary(rng, env), select: Box::new(s), }) .collect(), @@ -332,8 +262,8 @@ impl ArbitraryFrom<&C> for Select { } } -impl ArbitraryFrom<&C> for Insert { - fn arbitrary_from(rng: &mut R, env: &C) -> Self { +impl Arbitrary for Insert { + fn arbitrary(rng: &mut R, env: &C) -> Self { let gen_values = |rng: &mut R| { let table = pick(env.tables(), rng); let num_rows = rng.random_range(1..10); @@ -342,7 +272,7 @@ impl ArbitraryFrom<&C> for Insert { table .columns .iter() - .map(|c| SimValue::arbitrary_from(rng, &c.column_type)) + .map(|c| SimValue::arbitrary_from(rng, env, &c.column_type)) .collect() }) .collect(); @@ -356,7 +286,7 @@ impl ArbitraryFrom<&C> for Insert { // Find a non-empty table let select_table = env.tables().iter().find(|t| !t.rows.is_empty())?; let row = pick(&select_table.rows, rng); - let predicate = Predicate::arbitrary_from(rng, (select_table, row)); + let predicate = Predicate::arbitrary_from(rng, env, (select_table, row)); // Pick another table to insert into let select = Select::simple(select_table.name.clone(), predicate); let table = pick(env.tables(), rng); @@ -372,18 +302,18 @@ impl ArbitraryFrom<&C> for Insert { } } -impl ArbitraryFrom<&C> for Delete { - fn arbitrary_from(rng: &mut R, env: &C) -> Self { +impl Arbitrary for Delete { + fn arbitrary(rng: &mut R, env: &C) -> Self { let table = pick(env.tables(), rng); Self { table: table.name.clone(), - predicate: Predicate::arbitrary_from(rng, table), + predicate: Predicate::arbitrary_from(rng, env, table), } } } -impl ArbitraryFrom<&C> for Drop { - fn arbitrary_from(rng: &mut R, env: &C) -> Self { +impl Arbitrary for Drop { + fn arbitrary(rng: &mut R, env: &C) -> Self { let table = pick(env.tables(), rng); Self { table: table.name.clone(), @@ -391,8 +321,8 @@ impl ArbitraryFrom<&C> for Drop { } } -impl ArbitraryFrom<&C> for CreateIndex { - fn arbitrary_from(rng: &mut R, env: &C) -> Self { +impl Arbitrary for CreateIndex { + fn arbitrary(rng: &mut R, env: &C) -> Self { assert!( !env.tables().is_empty(), "Cannot create an index when no tables exist in the environment." @@ -439,8 +369,8 @@ impl ArbitraryFrom<&C> for CreateIndex { } } -impl ArbitraryFrom<&C> for Update { - fn arbitrary_from(rng: &mut R, env: &C) -> Self { +impl Arbitrary for Update { + fn arbitrary(rng: &mut R, env: &C) -> Self { let table = pick(env.tables(), rng); let num_cols = rng.random_range(1..=table.columns.len()); let columns = pick_unique(&table.columns, num_cols, rng); @@ -449,14 +379,14 @@ impl ArbitraryFrom<&C> for Update { .map(|column| { ( column.name.clone(), - SimValue::arbitrary_from(rng, &column.column_type), + SimValue::arbitrary_from(rng, env, &column.column_type), ) }) .collect(); Update { table: table.name.clone(), set_values, - predicate: Predicate::arbitrary_from(rng, table), + predicate: Predicate::arbitrary_from(rng, env, table), } } } diff --git a/sql_generation/generation/table.rs b/sql_generation/generation/table.rs index 32bcb8117..66f02250c 100644 --- a/sql_generation/generation/table.rs +++ b/sql_generation/generation/table.rs @@ -4,24 +4,23 @@ use rand::Rng; use turso_core::Value; use crate::generation::{ - gen_random_text, pick, readable_name_custom, Arbitrary, ArbitraryContext, ArbitraryFrom, - GenerationContext, Opts, + gen_random_text, pick, readable_name_custom, Arbitrary, ArbitraryFrom, GenerationContext, }; use crate::model::table::{Column, ColumnType, Name, SimValue, Table}; use super::ArbitraryFromMaybe; impl Arbitrary for Name { - fn arbitrary(rng: &mut R) -> Self { + fn arbitrary(rng: &mut R, _c: &C) -> Self { let name = readable_name_custom("_", rng); Name(name.replace("-", "_")) } } -impl Table { - fn gen_table(rng: &mut R, opts: &Opts) -> Self { - let opts = opts.table.clone(); - let name = Name::arbitrary(rng).0; +impl Arbitrary for Table { + fn arbitrary(rng: &mut R, context: &C) -> Self { + let opts = context.opts().table.clone(); + let name = Name::arbitrary(rng, context).0; let large_table = opts.large_table.enable && rng.random_bool(opts.large_table.large_table_prob); let column_size = if large_table { @@ -30,7 +29,7 @@ impl Table { rng.random_range(opts.column_range) } as usize; let mut column_set = HashSet::with_capacity(column_size); - for col in std::iter::repeat_with(|| Column::arbitrary(rng)) { + for col in std::iter::repeat_with(|| Column::arbitrary(rng, context)) { column_set.insert(col); if column_set.len() == column_size { break; @@ -46,22 +45,10 @@ impl Table { } } -impl Arbitrary for Table { - fn arbitrary(rng: &mut R) -> Self { - Table::gen_table(rng, &Opts::default()) - } -} - -impl ArbitraryContext for Table { - fn arbitrary_with_context(rng: &mut R, context: &C) -> Self { - Table::gen_table(rng, context.opts()) - } -} - impl Arbitrary for Column { - fn arbitrary(rng: &mut R) -> Self { - let name = Name::arbitrary(rng).0; - let column_type = ColumnType::arbitrary(rng); + fn arbitrary(rng: &mut R, context: &C) -> Self { + let name = Name::arbitrary(rng, context).0; + let column_type = ColumnType::arbitrary(rng, context); Self { name, column_type, @@ -72,16 +59,20 @@ impl Arbitrary for Column { } impl Arbitrary for ColumnType { - fn arbitrary(rng: &mut R) -> Self { + fn arbitrary(rng: &mut R, _context: &C) -> Self { pick(&[Self::Integer, Self::Float, Self::Text, Self::Blob], rng).to_owned() } } impl ArbitraryFrom<&Table> for Vec { - fn arbitrary_from(rng: &mut R, table: &Table) -> Self { + fn arbitrary_from( + rng: &mut R, + context: &C, + table: &Table, + ) -> Self { let mut row = Vec::new(); for column in table.columns.iter() { - let value = SimValue::arbitrary_from(rng, &column.column_type); + let value = SimValue::arbitrary_from(rng, context, &column.column_type); row.push(value); } row @@ -89,7 +80,11 @@ impl ArbitraryFrom<&Table> for Vec { } impl ArbitraryFrom<&Vec<&SimValue>> for SimValue { - fn arbitrary_from(rng: &mut R, values: &Vec<&Self>) -> Self { + fn arbitrary_from( + rng: &mut R, + _context: &C, + values: &Vec<&Self>, + ) -> Self { if values.is_empty() { return Self(Value::Null); } @@ -99,7 +94,11 @@ impl ArbitraryFrom<&Vec<&SimValue>> for SimValue { } impl ArbitraryFrom<&ColumnType> for SimValue { - fn arbitrary_from(rng: &mut R, column_type: &ColumnType) -> Self { + fn arbitrary_from( + rng: &mut R, + _context: &C, + column_type: &ColumnType, + ) -> Self { let value = match column_type { ColumnType::Integer => Value::Integer(rng.random_range(i64::MIN..i64::MAX)), ColumnType::Float => Value::Float(rng.random_range(-1e10..1e10)), @@ -113,19 +112,27 @@ impl ArbitraryFrom<&ColumnType> for SimValue { pub struct LTValue(pub SimValue); impl ArbitraryFrom<&Vec<&SimValue>> for LTValue { - fn arbitrary_from(rng: &mut R, values: &Vec<&SimValue>) -> Self { + fn arbitrary_from( + rng: &mut R, + context: &C, + values: &Vec<&SimValue>, + ) -> Self { if values.is_empty() { return Self(SimValue(Value::Null)); } // Get value less than all values let value = Value::exec_min(values.iter().map(|value| &value.0)); - Self::arbitrary_from(rng, &SimValue(value)) + Self::arbitrary_from(rng, context, &SimValue(value)) } } impl ArbitraryFrom<&SimValue> for LTValue { - fn arbitrary_from(rng: &mut R, value: &SimValue) -> Self { + fn arbitrary_from( + rng: &mut R, + _context: &C, + value: &SimValue, + ) -> Self { let new_value = match &value.0 { Value::Integer(i) => Value::Integer(rng.random_range(i64::MIN..*i - 1)), Value::Float(f) => Value::Float(f - rng.random_range(0.0..1e10)), @@ -175,19 +182,27 @@ impl ArbitraryFrom<&SimValue> for LTValue { pub struct GTValue(pub SimValue); impl ArbitraryFrom<&Vec<&SimValue>> for GTValue { - fn arbitrary_from(rng: &mut R, values: &Vec<&SimValue>) -> Self { + fn arbitrary_from( + rng: &mut R, + context: &C, + values: &Vec<&SimValue>, + ) -> Self { if values.is_empty() { return Self(SimValue(Value::Null)); } // Get value greater than all values let value = Value::exec_max(values.iter().map(|value| &value.0)); - Self::arbitrary_from(rng, &SimValue(value)) + Self::arbitrary_from(rng, context, &SimValue(value)) } } impl ArbitraryFrom<&SimValue> for GTValue { - fn arbitrary_from(rng: &mut R, value: &SimValue) -> Self { + fn arbitrary_from( + rng: &mut R, + _context: &C, + value: &SimValue, + ) -> Self { let new_value = match &value.0 { Value::Integer(i) => Value::Integer(rng.random_range(*i..i64::MAX)), Value::Float(f) => Value::Float(rng.random_range(*f..1e10)), @@ -237,7 +252,11 @@ impl ArbitraryFrom<&SimValue> for GTValue { pub struct LikeValue(pub SimValue); impl ArbitraryFromMaybe<&SimValue> for LikeValue { - fn arbitrary_from_maybe(rng: &mut R, value: &SimValue) -> Option { + fn arbitrary_from_maybe( + rng: &mut R, + _context: &C, + value: &SimValue, + ) -> Option { match &value.0 { value @ Value::Text(..) => { let t = value.to_string(); From 06b923d0c152790e450c69bd0b5b67275aa07dd4 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Wed, 27 Aug 2025 13:59:55 -0300 Subject: [PATCH 07/20] adjust simulator to use correct trait signature --- simulator/generation/mod.rs | 10 +++++ simulator/generation/plan.rs | 37 ++++++++-------- simulator/generation/property.rs | 72 ++++++++++++++++++-------------- simulator/generation/query.rs | 22 ++++++---- simulator/main.rs | 3 +- 5 files changed, 82 insertions(+), 62 deletions(-) diff --git a/simulator/generation/mod.rs b/simulator/generation/mod.rs index a63936ab5..bc4a3bd23 100644 --- a/simulator/generation/mod.rs +++ b/simulator/generation/mod.rs @@ -29,3 +29,13 @@ impl GenerationContext for SimulatorEnv { &self.gen_opts } } + +impl GenerationContext for &mut SimulatorEnv { + fn tables(&self) -> &Vec { + &self.tables.tables + } + + fn opts(&self) -> &sql_generation::generation::Opts { + &self.gen_opts + } +} diff --git a/simulator/generation/plan.rs b/simulator/generation/plan.rs index 47763657a..9e211759e 100644 --- a/simulator/generation/plan.rs +++ b/simulator/generation/plan.rs @@ -9,7 +9,7 @@ use std::{ use serde::{Deserialize, Serialize}; use sql_generation::{ - generation::{frequency, query::SelectFree, Arbitrary, ArbitraryFrom}, + generation::{frequency, query::SelectFree, Arbitrary, ArbitraryFrom, GenerationContext}, model::{ query::{update::Update, Create, CreateIndex, Delete, Drop, Insert, Select}, table::SimValue, @@ -395,16 +395,14 @@ impl InteractionPlan { stats } -} -impl ArbitraryFrom<&mut SimulatorEnv> for InteractionPlan { - fn arbitrary_from(rng: &mut R, env: &mut SimulatorEnv) -> Self { + pub fn generate_plan(rng: &mut R, env: &mut SimulatorEnv) -> Self { let mut plan = InteractionPlan::new(); let num_interactions = env.opts.max_interactions; // First create at least one table - let create_query = Create::arbitrary(rng); + let create_query = Create::arbitrary(rng, env); env.tables.push(create_query.table.clone()); plan.plan @@ -416,7 +414,7 @@ impl ArbitraryFrom<&mut SimulatorEnv> for InteractionPlan { plan.plan.len(), num_interactions ); - let interactions = Interactions::arbitrary_from(rng, (env, plan.stats())); + let interactions = Interactions::arbitrary_from(rng, env, (env, plan.stats())); interactions.shadow(&mut env.tables); plan.plan.push(interactions); } @@ -756,42 +754,42 @@ fn reopen_database(env: &mut SimulatorEnv) { } fn random_create(rng: &mut R, env: &SimulatorEnv) -> Interactions { - let mut create = Create::arbitrary(rng); + let mut create = Create::arbitrary(rng, env); while env.tables.iter().any(|t| t.name == create.table.name) { - create = Create::arbitrary(rng); + create = Create::arbitrary(rng, env); } Interactions::Query(Query::Create(create)) } fn random_read(rng: &mut R, env: &SimulatorEnv) -> Interactions { - Interactions::Query(Query::Select(Select::arbitrary_from(rng, env))) + Interactions::Query(Query::Select(Select::arbitrary(rng, env))) } fn random_expr(rng: &mut R, env: &SimulatorEnv) -> Interactions { - Interactions::Query(Query::Select(SelectFree::arbitrary_from(rng, env).0)) + Interactions::Query(Query::Select(SelectFree::arbitrary(rng, env).0)) } fn random_write(rng: &mut R, env: &SimulatorEnv) -> Interactions { - Interactions::Query(Query::Insert(Insert::arbitrary_from(rng, env))) + Interactions::Query(Query::Insert(Insert::arbitrary(rng, env))) } fn random_delete(rng: &mut R, env: &SimulatorEnv) -> Interactions { - Interactions::Query(Query::Delete(Delete::arbitrary_from(rng, env))) + Interactions::Query(Query::Delete(Delete::arbitrary(rng, env))) } fn random_update(rng: &mut R, env: &SimulatorEnv) -> Interactions { - Interactions::Query(Query::Update(Update::arbitrary_from(rng, env))) + Interactions::Query(Query::Update(Update::arbitrary(rng, env))) } fn random_drop(rng: &mut R, env: &SimulatorEnv) -> Interactions { - Interactions::Query(Query::Drop(Drop::arbitrary_from(rng, env))) + Interactions::Query(Query::Drop(Drop::arbitrary(rng, env))) } fn random_create_index(rng: &mut R, env: &SimulatorEnv) -> Option { if env.tables.is_empty() { return None; } - let mut create_index = CreateIndex::arbitrary_from(rng, env); + let mut create_index = CreateIndex::arbitrary(rng, env); while env .tables .iter() @@ -801,7 +799,7 @@ fn random_create_index(rng: &mut R, env: &SimulatorEnv) -> Option< .iter() .any(|i| i == &create_index.index_name) { - create_index = CreateIndex::arbitrary_from(rng, env); + create_index = CreateIndex::arbitrary(rng, env); } Some(Interactions::Query(Query::CreateIndex(create_index))) @@ -818,17 +816,18 @@ fn random_fault(rng: &mut R, env: &SimulatorEnv) -> Interactions { } impl ArbitraryFrom<(&SimulatorEnv, InteractionStats)> for Interactions { - fn arbitrary_from( + fn arbitrary_from( rng: &mut R, + _context: &C, (env, stats): (&SimulatorEnv, InteractionStats), ) -> Self { - let remaining_ = remaining(env, &stats); + let remaining_ = remaining(&env.opts, &stats); frequency( vec![ ( f64::min(remaining_.read, remaining_.write) + remaining_.create, Box::new(|rng: &mut R| { - Interactions::Property(Property::arbitrary_from(rng, (env, &stats))) + Interactions::Property(Property::arbitrary_from(rng, env, (env, &stats))) }), ), ( diff --git a/simulator/generation/property.rs b/simulator/generation/property.rs index 288c4e75d..8357a1e3b 100644 --- a/simulator/generation/property.rs +++ b/simulator/generation/property.rs @@ -1,6 +1,6 @@ use serde::{Deserialize, Serialize}; use sql_generation::{ - generation::{frequency, pick, pick_index, ArbitraryFrom}, + generation::{frequency, pick, pick_index, Arbitrary, ArbitraryFrom, GenerationContext}, model::{ query::{ predicate::Predicate, @@ -15,7 +15,11 @@ use sql_generation::{ use turso_core::{types, LimboError}; use turso_parser::ast::{self, Distinctness}; -use crate::{generation::Shadow as _, model::Query, runner::env::SimulatorEnv}; +use crate::{ + generation::Shadow as _, + model::Query, + runner::env::{SimulatorEnv, SimulatorOpts}, +}; use super::plan::{Assertion, Interaction, InteractionStats, ResultSet}; @@ -1021,29 +1025,29 @@ pub(crate) struct Remaining { pub(crate) drop: f64, } -pub(crate) fn remaining(env: &SimulatorEnv, stats: &InteractionStats) -> Remaining { - let remaining_read = ((env.opts.max_interactions as f64 * env.opts.read_percent / 100.0) +pub(crate) fn remaining(opts: &SimulatorOpts, stats: &InteractionStats) -> Remaining { + let remaining_read = ((opts.max_interactions as f64 * opts.read_percent / 100.0) - (stats.read_count as f64)) .max(0.0); - let remaining_write = ((env.opts.max_interactions as f64 * env.opts.write_percent / 100.0) + let remaining_write = ((opts.max_interactions as f64 * opts.write_percent / 100.0) - (stats.write_count as f64)) .max(0.0); - let remaining_create = ((env.opts.max_interactions as f64 * env.opts.create_percent / 100.0) + let remaining_create = ((opts.max_interactions as f64 * opts.create_percent / 100.0) - (stats.create_count as f64)) .max(0.0); - let remaining_create_index = - ((env.opts.max_interactions as f64 * env.opts.create_index_percent / 100.0) - - (stats.create_index_count as f64)) - .max(0.0); + let remaining_create_index = ((opts.max_interactions as f64 * opts.create_index_percent + / 100.0) + - (stats.create_index_count as f64)) + .max(0.0); - let remaining_delete = ((env.opts.max_interactions as f64 * env.opts.delete_percent / 100.0) + let remaining_delete = ((opts.max_interactions as f64 * opts.delete_percent / 100.0) - (stats.delete_count as f64)) .max(0.0); - let remaining_update = ((env.opts.max_interactions as f64 * env.opts.update_percent / 100.0) + let remaining_update = ((opts.max_interactions as f64 * opts.update_percent / 100.0) - (stats.update_count as f64)) .max(0.0); - let remaining_drop = ((env.opts.max_interactions as f64 * env.opts.drop_percent / 100.0) + let remaining_drop = ((opts.max_interactions as f64 * opts.drop_percent / 100.0) - (stats.drop_count as f64)) .max(0.0); @@ -1067,7 +1071,7 @@ fn property_insert_values_select( let table = pick(&env.tables, rng); // Generate rows to insert let rows = (0..rng.random_range(1..=5)) - .map(|_| Vec::::arbitrary_from(rng, table)) + .map(|_| Vec::::arbitrary_from(rng, env, table)) .collect::>(); // Pick a random row to select @@ -1101,7 +1105,7 @@ fn property_insert_values_select( })); } for _ in 0..rng.random_range(0..3) { - let query = Query::arbitrary_from(rng, (env, remaining)); + let query = Query::arbitrary_from(rng, env, remaining); match &query { Query::Delete(Delete { table: t, @@ -1144,7 +1148,7 @@ fn property_insert_values_select( // Select the row let select_query = Select::simple( table.name.clone(), - Predicate::arbitrary_from(rng, (table, &row)), + Predicate::arbitrary_from(rng, env, (table, &row)), ); Property::InsertValuesSelect { @@ -1158,7 +1162,7 @@ fn property_insert_values_select( fn property_read_your_updates_back(rng: &mut R, env: &SimulatorEnv) -> Property { // e.g. UPDATE t SET a=1, b=2 WHERE c=1; - let update = Update::arbitrary_from(rng, env); + let update = Update::arbitrary(rng, env); // e.g. SELECT a, b FROM t WHERE c=1; let select = Select::single( update.table().to_string(), @@ -1190,7 +1194,7 @@ fn property_select_limit(rng: &mut R, env: &SimulatorEnv) -> Prope let select = Select::single( table.name.clone(), vec![ResultColumn::Star], - Predicate::arbitrary_from(rng, table), + Predicate::arbitrary_from(rng, env, table), Some(rng.random_range(1..=5)), Distinctness::All, ); @@ -1215,7 +1219,7 @@ fn property_double_create_failure( // - [x] There will be no errors in the middle interactions.(best effort) // - [ ] Table `t` will not be renamed or dropped.(todo: add this constraint once ALTER or DROP is implemented) for _ in 0..rng.random_range(0..3) { - let query = Query::arbitrary_from(rng, (env, remaining)); + let query = Query::arbitrary_from(rng, env, remaining); if let Query::Create(Create { table: t }) = &query { // There will be no errors in the middle interactions. // - Creating the same table is an error @@ -1240,7 +1244,7 @@ fn property_delete_select( // Get a random table let table = pick(&env.tables, rng); // Generate a random predicate - let predicate = Predicate::arbitrary_from(rng, table); + let predicate = Predicate::arbitrary_from(rng, env, table); // Create random queries respecting the constraints let mut queries = Vec::new(); @@ -1248,7 +1252,7 @@ fn property_delete_select( // - [x] A row that holds for the predicate will not be inserted. // - [ ] The table `t` will not be renamed, dropped, or altered. (todo: add this constraint once ALTER or DROP is implemented) for _ in 0..rng.random_range(0..3) { - let query = Query::arbitrary_from(rng, (env, remaining)); + let query = Query::arbitrary_from(rng, env, remaining); match &query { Query::Insert(Insert::Values { table: t, values }) => { // A row that holds for the predicate will not be inserted. @@ -1303,7 +1307,7 @@ fn property_drop_select( // - [x] There will be no errors in the middle interactions. (this constraint is impossible to check, so this is just best effort) // - [-] The table `t` will not be created, no table will be renamed to `t`. (todo: update this constraint once ALTER is implemented) for _ in 0..rng.random_range(0..3) { - let query = Query::arbitrary_from(rng, (env, remaining)); + let query = Query::arbitrary_from(rng, env, remaining); if let Query::Create(Create { table: t }) = &query { // - The table `t` will not be created if t.name == table.name { @@ -1313,7 +1317,10 @@ fn property_drop_select( queries.push(query); } - let select = Select::simple(table.name.clone(), Predicate::arbitrary_from(rng, table)); + let select = Select::simple( + table.name.clone(), + Predicate::arbitrary_from(rng, env, table), + ); Property::DropSelect { table: table.name.clone(), @@ -1326,7 +1333,7 @@ fn property_select_select_optimizer(rng: &mut R, env: &SimulatorEn // Get a random table let table = pick(&env.tables, rng); // Generate a random predicate - let predicate = Predicate::arbitrary_from(rng, table); + let predicate = Predicate::arbitrary_from(rng, env, table); // Transform into a Binary predicate to force values to be casted to a bool let expr = ast::Expr::Binary( Box::new(predicate.0), @@ -1344,8 +1351,8 @@ fn property_where_true_false_null(rng: &mut R, env: &SimulatorEnv) // Get a random table let table = pick(&env.tables, rng); // Generate a random predicate - let p1 = Predicate::arbitrary_from(rng, table); - let p2 = Predicate::arbitrary_from(rng, table); + let p1 = Predicate::arbitrary_from(rng, env, table); + let p2 = Predicate::arbitrary_from(rng, env, table); // Create the select query let select = Select::simple(table.name.clone(), p1); @@ -1363,8 +1370,8 @@ fn property_union_all_preserves_cardinality( // Get a random table let table = pick(&env.tables, rng); // Generate a random predicate - let p1 = Predicate::arbitrary_from(rng, table); - let p2 = Predicate::arbitrary_from(rng, table); + let p1 = Predicate::arbitrary_from(rng, env, table); + let p2 = Predicate::arbitrary_from(rng, env, table); // Create the select query let select = Select::single( @@ -1387,7 +1394,7 @@ fn property_fsync_no_wait( remaining: &Remaining, ) -> Property { Property::FsyncNoWait { - query: Query::arbitrary_from(rng, (env, remaining)), + query: Query::arbitrary_from(rng, env, remaining), tables: env.tables.iter().map(|t| t.name.clone()).collect(), } } @@ -1398,17 +1405,18 @@ fn property_faulty_query( remaining: &Remaining, ) -> Property { Property::FaultyQuery { - query: Query::arbitrary_from(rng, (env, remaining)), + query: Query::arbitrary_from(rng, env, remaining), tables: env.tables.iter().map(|t| t.name.clone()).collect(), } } impl ArbitraryFrom<(&SimulatorEnv, &InteractionStats)> for Property { - fn arbitrary_from( + fn arbitrary_from( rng: &mut R, + _context: &C, (env, stats): (&SimulatorEnv, &InteractionStats), ) -> Self { - let remaining_ = remaining(env, stats); + let remaining_ = remaining(&env.opts, stats); frequency( vec![ diff --git a/simulator/generation/query.rs b/simulator/generation/query.rs index bb1344c2a..586891d28 100644 --- a/simulator/generation/query.rs +++ b/simulator/generation/query.rs @@ -1,35 +1,39 @@ -use crate::{model::Query, SimulatorEnv}; +use crate::model::Query; use rand::Rng; use sql_generation::{ - generation::{frequency, Arbitrary, ArbitraryFrom}, + generation::{frequency, Arbitrary, ArbitraryFrom, GenerationContext}, model::query::{update::Update, Create, Delete, Insert, Select}, }; use super::property::Remaining; -impl ArbitraryFrom<(&SimulatorEnv, &Remaining)> for Query { - fn arbitrary_from(rng: &mut R, (env, remaining): (&SimulatorEnv, &Remaining)) -> Self { +impl ArbitraryFrom<&Remaining> for Query { + fn arbitrary_from( + rng: &mut R, + context: &C, + remaining: &Remaining, + ) -> Self { frequency( vec![ ( remaining.create, - Box::new(|rng| Self::Create(Create::arbitrary(rng))), + Box::new(|rng| Self::Create(Create::arbitrary(rng, context))), ), ( remaining.read, - Box::new(|rng| Self::Select(Select::arbitrary_from(rng, env))), + Box::new(|rng| Self::Select(Select::arbitrary(rng, context))), ), ( remaining.write, - Box::new(|rng| Self::Insert(Insert::arbitrary_from(rng, env))), + Box::new(|rng| Self::Insert(Insert::arbitrary(rng, context))), ), ( remaining.update, - Box::new(|rng| Self::Update(Update::arbitrary_from(rng, env))), + Box::new(|rng| Self::Update(Update::arbitrary(rng, context))), ), ( f64::min(remaining.write, remaining.delete), - Box::new(|rng| Self::Delete(Delete::arbitrary_from(rng, env))), + Box::new(|rng| Self::Delete(Delete::arbitrary(rng, context))), ), ], rng, diff --git a/simulator/main.rs b/simulator/main.rs index 7db63dbb4..9b60a149a 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -10,7 +10,6 @@ use runner::cli::{SimulatorCLI, SimulatorCommand}; use runner::env::SimulatorEnv; use runner::execution::{execute_plans, Execution, ExecutionHistory, ExecutionResult}; use runner::{differential, watch}; -use sql_generation::generation::ArbitraryFrom; use std::any::Any; use std::backtrace::Backtrace; use std::fs::OpenOptions; @@ -534,7 +533,7 @@ fn setup_simulation( tracing::info!("Generating database interaction plan..."); let plans = (1..=env.opts.max_connections) - .map(|_| InteractionPlan::arbitrary_from(&mut env.rng.clone(), &mut env)) + .map(|_| InteractionPlan::generate_plan(&mut env.rng.clone(), &mut env)) .collect::>(); // todo: for now, we only use 1 connection, so it's safe to use the first plan. From bc6976fd33327b6663dcc2e34b200ae7cbdc9166 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Wed, 27 Aug 2025 15:20:32 -0300 Subject: [PATCH 08/20] add Select and Insert generation options --- sql_generation/generation/opts.rs | 62 +++++++++++++++++++++++++++++- sql_generation/generation/query.rs | 49 +++++++++++------------ 2 files changed, 84 insertions(+), 27 deletions(-) diff --git a/sql_generation/generation/opts.rs b/sql_generation/generation/opts.rs index d978d54cd..f6c7a671d 100644 --- a/sql_generation/generation/opts.rs +++ b/sql_generation/generation/opts.rs @@ -1,4 +1,7 @@ -use std::ops::Range; +use std::{ + num::{NonZero, NonZeroU32}, + ops::Range, +}; use rand::distr::weighted::WeightedIndex; @@ -67,7 +70,49 @@ impl Default for LargeTableOpts { #[derive(Debug, Default, Clone)] pub struct QueryOpts { + pub select: SelectOpts, pub from_clause: FromClauseOpts, + pub insert: InsertOpts, +} + +#[derive(Debug, Clone)] +pub struct SelectOpts { + pub order_by_prob: f64, + pub compound_selects: Vec, +} + +impl Default for SelectOpts { + fn default() -> Self { + Self { + order_by_prob: 0.3, + compound_selects: vec![ + CompoundSelectWeight { + num_compound_selects: 0, + weight: 95, + }, + CompoundSelectWeight { + num_compound_selects: 1, + weight: 4, + }, + CompoundSelectWeight { + num_compound_selects: 2, + weight: 1, + }, + ], + } + } +} + +impl SelectOpts { + pub fn compound_select_weighted_index(&self) -> WeightedIndex { + WeightedIndex::new(self.compound_selects.iter().map(|weight| weight.weight)).unwrap() + } +} + +#[derive(Debug, Clone, PartialEq, PartialOrd)] +pub struct CompoundSelectWeight { + pub num_compound_selects: u32, + pub weight: u32, } #[derive(Debug, Clone)] @@ -107,3 +152,18 @@ pub struct JoinWeight { pub num_joins: u32, pub weight: u32, } + +#[derive(Debug, Clone)] +pub struct InsertOpts { + pub min_rows: NonZeroU32, + pub max_rows: NonZeroU32, +} + +impl Default for InsertOpts { + fn default() -> Self { + Self { + min_rows: NonZero::new(1).unwrap(), + max_rows: NonZero::new(10).unwrap(), + } + } +} diff --git a/sql_generation/generation/query.rs b/sql_generation/generation/query.rs index ba0f9aeec..54ca17ccf 100644 --- a/sql_generation/generation/query.rs +++ b/sql_generation/generation/query.rs @@ -90,24 +90,25 @@ impl Arbitrary for SelectInner { let join_table = from.into_join_table(&tables); let cuml_col_count = join_table.columns().count(); - let order_by = 'order_by: { - if rng.random_bool(0.3) { + let order_by = rng + .random_bool(env.opts().query.select.order_by_prob) + .then(|| { let order_by_table_candidates = from .joins .iter() - .map(|j| j.table.clone()) - .chain(std::iter::once(from.table.clone())) + .map(|j| &j.table) + .chain(std::iter::once(&from.table)) .collect::>(); let order_by_col_count = (rng.random::() * rng.random::() * (cuml_col_count as f64)) as usize; // skew towards 0 if order_by_col_count == 0 { - break 'order_by None; + return None; } let mut col_names = std::collections::HashSet::new(); let mut order_by_cols = Vec::new(); while order_by_cols.len() < order_by_col_count { let table = pick(&order_by_table_candidates, rng); - let table = tables.iter().find(|t| t.name == *table).unwrap(); + let table = tables.iter().find(|t| t.name == table.as_str()).unwrap(); let col = pick(&table.columns, rng); let col_name = format!("{}.{}", table.name, col.name); if col_names.insert(col_name.clone()) { @@ -124,10 +125,8 @@ impl Arbitrary for SelectInner { Some(OrderBy { columns: order_by_cols, }) - } else { - None - } - }; + }) + .flatten(); SelectInner { distinctness: if env.opts().indexes { @@ -154,12 +153,10 @@ impl ArbitrarySized for SelectInner { let table_names = select_from .joins .iter() - .map(|j| j.table.clone()) - .chain(std::iter::once(select_from.table.clone())) - .collect::>(); + .map(|j| &j.table) + .chain(std::iter::once(&select_from.table)); let flat_columns_names = table_names - .iter() .flat_map(|t| { env.tables() .iter() @@ -167,14 +164,15 @@ impl ArbitrarySized for SelectInner { .unwrap() .columns .iter() - .map(|c| format!("{}.{}", t.clone(), c.name)) + .map(move |c| format!("{}.{}", t, c.name)) }) .collect::>(); let selected_columns = pick_unique(&flat_columns_names, num_result_columns, rng); - let mut columns = Vec::new(); - for column_name in selected_columns { - columns.push(ResultColumn::Column(column_name.clone())); - } + let columns = selected_columns + .into_iter() + .map(|col_name| ResultColumn::Column(col_name)) + .collect(); + select_inner.columns = columns; select_inner } @@ -188,6 +186,7 @@ impl Arbitrary for Distinctness { } } } + impl Arbitrary for CompoundOperator { fn arbitrary(rng: &mut R, _context: &C) -> Self { match rng.random_range(0..=1) { @@ -216,13 +215,10 @@ impl Arbitrary for Select { // Generate a number of selects based on the query size // If experimental indexes are enabled, we can have selects with compounds // Otherwise, we just have a single select with no compounds + let opts = &env.opts().query.select; let num_compound_selects = if env.opts().indexes { - match rng.random_range(0..=100) { - 0..=95 => 0, - 96..=99 => 1, - 100 => 2, - _ => unreachable!(), - } + opts.compound_selects[rng.sample(opts.compound_select_weighted_index())] + .num_compound_selects } else { 0 }; @@ -264,9 +260,10 @@ impl Arbitrary for Select { impl Arbitrary for Insert { fn arbitrary(rng: &mut R, env: &C) -> Self { + let opts = &env.opts().query.insert; let gen_values = |rng: &mut R| { let table = pick(env.tables(), rng); - let num_rows = rng.random_range(1..10); + let num_rows = rng.random_range(opts.min_rows.get()..opts.max_rows.get()); let values: Vec> = (0..num_rows) .map(|_| { table From faa943fc765be469acfdf6f27a2885aee7f889d8 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Thu, 28 Aug 2025 01:31:21 -0300 Subject: [PATCH 09/20] reduce cloning for `pick_unique` --- sql_generation/generation/mod.rs | 25 +++++++++++++------------ sql_generation/generation/query.rs | 5 +---- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/sql_generation/generation/mod.rs b/sql_generation/generation/mod.rs index 18fa78021..25f353673 100644 --- a/sql_generation/generation/mod.rs +++ b/sql_generation/generation/mod.rs @@ -142,11 +142,15 @@ pub fn pick_index(choices: usize, rng: &mut R) -> usize { /// pick_n_unique is a helper function for uniformly picking N unique elements from a range. /// The elements themselves are usize, typically representing indices. -pub fn pick_n_unique(range: std::ops::Range, n: usize, rng: &mut R) -> Vec { +pub fn pick_n_unique( + range: std::ops::Range, + n: usize, + rng: &mut R, +) -> impl Iterator { use rand::seq::SliceRandom; let mut items: Vec = range.collect(); items.shuffle(rng); - items.into_iter().take(n).collect() + items.into_iter().take(n) } /// gen_random_text uses `anarchist_readable_name_generator_lib` to generate random @@ -168,22 +172,19 @@ pub fn gen_random_text(rng: &mut T) -> String { } } -pub fn pick_unique( - items: &[T], +pub fn pick_unique<'a, T: PartialEq>( + items: &'a [T], count: usize, rng: &mut impl rand::Rng, -) -> Vec -where - ::Owned: PartialEq, -{ - let mut picked: Vec = Vec::new(); +) -> impl Iterator { + let mut picked: Vec<&T> = Vec::new(); while picked.len() < count { let item = pick(items, rng); - if !picked.contains(&item.to_owned()) { - picked.push(item.to_owned()); + if !picked.contains(&item) { + picked.push(item); } } - picked + picked.into_iter() } #[cfg(test)] diff --git a/sql_generation/generation/query.rs b/sql_generation/generation/query.rs index 54ca17ccf..e2a36ddb9 100644 --- a/sql_generation/generation/query.rs +++ b/sql_generation/generation/query.rs @@ -169,8 +169,7 @@ impl ArbitrarySized for SelectInner { .collect::>(); let selected_columns = pick_unique(&flat_columns_names, num_result_columns, rng); let columns = selected_columns - .into_iter() - .map(|col_name| ResultColumn::Column(col_name)) + .map(|col_name| ResultColumn::Column(col_name.clone())) .collect(); select_inner.columns = columns; @@ -338,7 +337,6 @@ impl Arbitrary for CreateIndex { let picked_column_indices = pick_n_unique(0..table.columns.len(), num_columns_to_pick, rng); let columns = picked_column_indices - .into_iter() .map(|i| { let column = &table.columns[i]; ( @@ -372,7 +370,6 @@ impl Arbitrary for Update { let num_cols = rng.random_range(1..=table.columns.len()); let columns = pick_unique(&table.columns, num_cols, rng); let set_values: Vec<(String, SimValue)> = columns - .iter() .map(|column| { ( column.name.clone(), From a1407869d495754ec7cf92e9e36faf1087e86e49 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Thu, 28 Aug 2025 02:22:34 -0300 Subject: [PATCH 10/20] add serde, schemars and garde to profiles and options --- Cargo.lock | 109 +++++++++++++++++++++++++++++- Cargo.toml | 2 + simulator/Cargo.toml | 2 + simulator/profiles/io.rs | 19 +++++- simulator/profiles/mod.rs | 35 +++++++++- simulator/profiles/query.rs | 100 +++++++++------------------ sql_generation/Cargo.toml | 2 + sql_generation/generation/opts.rs | 87 +++++++++++++++++++++--- 8 files changed, 273 insertions(+), 83 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5330e3d75..d796758b3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -426,6 +426,15 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" +[[package]] +name = "castaway" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dec551ab6e7578819132c713a93c022a05d60159dc86e7a7050223577484c55a" +dependencies = [ + "rustversion", +] + [[package]] name = "cc" version = "1.2.17" @@ -601,6 +610,21 @@ dependencies = [ "unicode-width 0.2.0", ] +[[package]] +name = "compact_str" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b79c4069c6cad78e2e0cdfcbd26275770669fb39fd308a752dc110e83b9af32" +dependencies = [ + "castaway", + "cfg-if", + "itoa", + "rustversion", + "ryu", + "serde", + "static_assertions", +] + [[package]] name = "concurrent-queue" version = "2.5.0" @@ -1402,6 +1426,29 @@ dependencies = [ "slab", ] +[[package]] +name = "garde" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a989bd2fd12136080f7825ff410d9239ce84a2a639487fc9d924ee42e2fb84f" +dependencies = [ + "compact_str", + "garde_derive", + "serde", + "smallvec", +] + +[[package]] +name = "garde_derive" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f7f0545bbbba0a37d4d445890fa5759814e0716f02417b39f6fab292193df68" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.100", +] + [[package]] name = "genawaiter" version = "0.99.1" @@ -2132,6 +2179,7 @@ dependencies = [ "clap", "dirs 6.0.0", "env_logger 0.10.2", + "garde", "hex", "itertools 0.14.0", "log", @@ -2141,6 +2189,7 @@ dependencies = [ "regex", "regex-syntax 0.8.5", "rusqlite", + "schemars 1.0.4", "serde", "serde_json", "sql_generation", @@ -3109,6 +3158,26 @@ dependencies = [ "thiserror 2.0.12", ] +[[package]] +name = "ref-cast" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a0ae411dbe946a674d89546582cea4ba2bb8defac896622d6496f14c23ba5cf" +dependencies = [ + "ref-cast-impl", +] + +[[package]] +name = "ref-cast-impl" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1165225c21bff1f3bbce98f5a1f889949bc902d3575308cc7b0de30b4f6d27c7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.100", +] + [[package]] name = "regex" version = "1.11.1" @@ -3355,7 +3424,20 @@ checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615" dependencies = [ "dyn-clone", "indexmap 1.9.3", - "schemars_derive", + "schemars_derive 0.8.22", + "serde", + "serde_json", +] + +[[package]] +name = "schemars" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82d20c4491bc164fa2f6c5d44565947a52ad80b9505d8e36f8d54c27c739fcd0" +dependencies = [ + "dyn-clone", + "ref-cast", + "schemars_derive 1.0.4", "serde", "serde_json", ] @@ -3372,6 +3454,18 @@ dependencies = [ "syn 2.0.100", ] +[[package]] +name = "schemars_derive" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33d020396d1d138dc19f1165df7545479dcd58d93810dc5d646a16e55abefa80" +dependencies = [ + "proc-macro2", + "quote", + "serde_derive_internals", + "syn 2.0.100", +] + [[package]] name = "scopeguard" version = "1.2.0" @@ -3474,6 +3568,9 @@ name = "smallvec" version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +dependencies = [ + "serde", +] [[package]] name = "socket2" @@ -3503,10 +3600,12 @@ version = "0.1.4" dependencies = [ "anarchist-readable-name-generator-lib 0.2.0", "anyhow", + "garde", "hex", "itertools 0.14.0", "rand 0.9.2", "rand_chacha 0.9.0", + "schemars 1.0.4", "serde", "tracing", "turso_core", @@ -3528,6 +3627,12 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "str_stack" version = "0.1.0" @@ -4038,7 +4143,7 @@ dependencies = [ "mimalloc", "nu-ansi-term 0.50.1", "rustyline", - "schemars", + "schemars 0.8.22", "serde", "serde_json", "shlex", diff --git a/Cargo.toml b/Cargo.toml index 832ab8d09..0e1fd5065 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -67,6 +67,8 @@ rusqlite = { version = "0.37.0", features = ["bundled"] } itertools = "0.14.0" rand = "0.9.2" tracing = "0.1.41" +schemars = "1.0.4" +garde = "0.22" [profile.release] debug = "line-tables-only" diff --git a/simulator/Cargo.toml b/simulator/Cargo.toml index f01896716..29f42ec31 100644 --- a/simulator/Cargo.toml +++ b/simulator/Cargo.toml @@ -38,3 +38,5 @@ hex = "0.4.3" itertools = "0.14.0" sql_generation = { workspace = true } turso_parser = { workspace = true } +schemars = { workspace = true } +garde = { workspace = true, features = ["derive", "serde"] } diff --git a/simulator/profiles/io.rs b/simulator/profiles/io.rs index 4bca40b8f..e49d6d1c9 100644 --- a/simulator/profiles/io.rs +++ b/simulator/profiles/io.rs @@ -1,7 +1,17 @@ -#[derive(Debug, Clone)] +use garde::Validate; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +use super::{max_dependent, min_dependent}; + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Validate)] +#[serde(deny_unknown_fields, default)] pub struct IOProfile { + #[garde(skip)] pub enable: bool, + #[garde(dive)] pub latency: LatencyProfile, + // TODO: expand here with header corruption options and faults on specific IO operations } impl Default for IOProfile { @@ -13,13 +23,18 @@ impl Default for IOProfile { } } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Validate)] +#[serde(deny_unknown_fields, default)] pub struct LatencyProfile { + #[garde(skip)] pub enable: bool, + #[garde(range(min = 0, max = 100))] /// Added IO latency probability pub latency_probability: usize, + #[garde(custom(max_dependent(&self.max_tick)))] /// Minimum tick time in microseconds for simulated time pub min_tick: u64, + #[garde(custom(min_dependent(&self.min_tick)))] /// Maximum tick time in microseconds for simulated time pub max_tick: u64, } diff --git a/simulator/profiles/mod.rs b/simulator/profiles/mod.rs index 6e5187836..6a5e677a2 100644 --- a/simulator/profiles/mod.rs +++ b/simulator/profiles/mod.rs @@ -1,13 +1,22 @@ +use std::fmt::Display; + +use garde::Validate; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + use crate::profiles::{io::IOProfile, query::QueryProfile}; mod io; mod query; -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Validate)] pub struct Profile { + #[garde(skip)] /// Experimental MVCC feature pub experimental_mvcc: bool, + #[garde(dive)] pub io: IOProfile, + #[garde(dive)] pub query: QueryProfile, } @@ -20,3 +29,27 @@ impl Default for Profile { } } } + +/// Minimum value of field is dependent on another field in the struct +fn min_dependent(min: &T) -> impl FnOnce(&T, &()) -> garde::Result + '_ { + move |value, _| { + if value < min { + return Err(garde::Error::new(format!( + "`{value}` is smaller than `{min}`" + ))); + } + Ok(()) + } +} + +/// Maximum value of field is dependent on another field in the struct +fn max_dependent(max: &T) -> impl FnOnce(&T, &()) -> garde::Result + '_ { + move |value, _| { + if value > max { + return Err(garde::Error::new(format!( + "`{value}` is bigger than `{max}`" + ))); + } + Ok(()) + } +} diff --git a/simulator/profiles/query.rs b/simulator/profiles/query.rs index 0b1c663c3..8a56e3734 100644 --- a/simulator/profiles/query.rs +++ b/simulator/profiles/query.rs @@ -1,75 +1,37 @@ -#[derive(Debug, Default, Clone)] +use garde::Validate; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use sql_generation::generation::Opts; + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Validate)] +#[serde(deny_unknown_fields, default)] pub struct QueryProfile { - pub create_table: CreateTableProfile, - pub create_index: CreateIndexProfile, - pub insert: InsertProfile, - pub update: UpdateProfile, - pub delete: DeleteProfile, - pub drop_table: DropTableProfile, + #[garde(dive)] + pub gen_opts: Opts, + #[garde(skip)] + pub create_table: bool, + #[garde(skip)] + pub create_index: bool, + #[garde(skip)] + pub insert: bool, + #[garde(skip)] + pub update: bool, + #[garde(skip)] + pub delete: bool, + #[garde(skip)] + pub drop_table: bool, } -#[derive(Debug, Clone)] -pub struct CreateTableProfile { - pub enable: bool, -} - -impl Default for CreateTableProfile { +impl Default for QueryProfile { fn default() -> Self { - Self { enable: true } - } -} - -#[derive(Debug, Clone)] -pub struct CreateIndexProfile { - pub enable: bool, -} - -impl Default for CreateIndexProfile { - fn default() -> Self { - Self { enable: true } - } -} - -#[derive(Debug, Clone)] -pub struct InsertProfile { - pub enable: bool, -} - -impl Default for InsertProfile { - fn default() -> Self { - Self { enable: true } - } -} - -#[derive(Debug, Clone)] -pub struct UpdateProfile { - pub enable: bool, -} - -impl Default for UpdateProfile { - fn default() -> Self { - Self { enable: true } - } -} - -#[derive(Debug, Clone)] -pub struct DeleteProfile { - pub enable: bool, -} - -impl Default for DeleteProfile { - fn default() -> Self { - Self { enable: true } - } -} - -#[derive(Debug, Clone)] -pub struct DropTableProfile { - pub enable: bool, -} - -impl Default for DropTableProfile { - fn default() -> Self { - Self { enable: true } + Self { + gen_opts: Opts::default(), + create_table: true, + create_index: true, + insert: true, + update: true, + delete: true, + drop_table: true, + } } } diff --git a/sql_generation/Cargo.toml b/sql_generation/Cargo.toml index d84d08380..cf82bb036 100644 --- a/sql_generation/Cargo.toml +++ b/sql_generation/Cargo.toml @@ -19,6 +19,8 @@ anarchist-readable-name-generator-lib = "0.2.0" itertools = { workspace = true } anyhow = { workspace = true } tracing = { workspace = true } +schemars = { workspace = true } +garde = { workspace = true, features = ["derive", "serde"] } [dev-dependencies] rand_chacha = "0.9.0" diff --git a/sql_generation/generation/opts.rs b/sql_generation/generation/opts.rs index f6c7a671d..190033748 100644 --- a/sql_generation/generation/opts.rs +++ b/sql_generation/generation/opts.rs @@ -1,9 +1,13 @@ use std::{ + fmt::Display, num::{NonZero, NonZeroU32}, ops::Range, }; +use garde::Validate; use rand::distr::weighted::WeightedIndex; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; use crate::model::table::Table; @@ -13,11 +17,15 @@ pub trait GenerationContext { fn opts(&self) -> &Opts; } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Validate)] +#[serde(deny_unknown_fields, default)] pub struct Opts { + #[garde(skip)] /// Indexes enabled pub indexes: bool, + #[garde(dive)] pub table: TableOpts, + #[garde(dive)] pub query: QueryOpts, } @@ -31,10 +39,13 @@ impl Default for Opts { } } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Validate)] +#[serde(deny_unknown_fields, default)] pub struct TableOpts { + #[garde(dive)] pub large_table: LargeTableOpts, /// Range of numbers of columns to generate + #[garde(custom(range_struct_min(1)))] pub column_range: Range, } @@ -49,11 +60,16 @@ impl Default for TableOpts { } /// Options for generating large tables -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Validate)] +#[serde(deny_unknown_fields, default)] pub struct LargeTableOpts { + #[garde(skip)] pub enable: bool, + #[garde(range(min = 0.0, max = 1.0))] pub large_table_prob: f64, + /// Range of numbers of columns to generate + #[garde(custom(range_struct_min(1)))] pub column_range: Range, } @@ -68,16 +84,23 @@ impl Default for LargeTableOpts { } } -#[derive(Debug, Default, Clone)] +#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema, Validate)] +#[serde(deny_unknown_fields, default)] pub struct QueryOpts { + #[garde(dive)] pub select: SelectOpts, + #[garde(dive)] pub from_clause: FromClauseOpts, + #[garde(dive)] pub insert: InsertOpts, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Validate)] +#[serde(deny_unknown_fields, default)] pub struct SelectOpts { + #[garde(range(min = 0.0, max = 1.0))] pub order_by_prob: f64, + #[garde(length(min = 1))] pub compound_selects: Vec, } @@ -109,14 +132,17 @@ impl SelectOpts { } } -#[derive(Debug, Clone, PartialEq, PartialOrd)] +#[derive(Debug, Clone, PartialEq, PartialOrd, Serialize, Deserialize, JsonSchema)] +#[serde(deny_unknown_fields)] pub struct CompoundSelectWeight { pub num_compound_selects: u32, pub weight: u32, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Validate)] +#[serde(deny_unknown_fields)] pub struct FromClauseOpts { + #[garde(length(min = 1))] pub joins: Vec, } @@ -147,15 +173,19 @@ impl FromClauseOpts { } } -#[derive(Debug, Clone, PartialEq, PartialOrd)] +#[derive(Debug, Clone, PartialEq, PartialOrd, Serialize, Deserialize, JsonSchema)] +#[serde(deny_unknown_fields)] pub struct JoinWeight { pub num_joins: u32, pub weight: u32, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Validate)] +#[serde(deny_unknown_fields)] pub struct InsertOpts { + #[garde(skip)] pub min_rows: NonZeroU32, + #[garde(skip)] pub max_rows: NonZeroU32, } @@ -167,3 +197,42 @@ impl Default for InsertOpts { } } } + +fn range_struct_min( + min: T, +) -> impl FnOnce(&Range, &()) -> garde::Result { + move |value, _| { + if value.start < min { + return Err(garde::Error::new(format!( + "range start `{}` is smaller than {min}", + value.start + ))); + } else if value.end < min { + return Err(garde::Error::new(format!( + "range end `{}` is smaller than {min}", + value.end + ))); + } + Ok(()) + } +} + +#[allow(dead_code)] +fn range_struct_max( + max: T, +) -> impl FnOnce(&Range, &()) -> garde::Result { + move |value, _| { + if value.start > max { + return Err(garde::Error::new(format!( + "range start `{}` is smaller than {max}", + value.start + ))); + } else if value.end > max { + return Err(garde::Error::new(format!( + "range end `{}` is smaller than {max}", + value.end + ))); + } + Ok(()) + } +} From 962666831b79a4db73c670e73d7e4964b1c9bac8 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Thu, 28 Aug 2025 12:33:37 -0300 Subject: [PATCH 11/20] read Profile file from path or use predefined profiles --- Cargo.lock | 74 +++++++++++++++++++++++++++ simulator/.gitignore | 1 + simulator/Cargo.toml | 4 +- simulator/generation/mod.rs | 4 +- simulator/generation/plan.rs | 8 +-- simulator/generation/property.rs | 44 +++++++++++----- simulator/generation/query.rs | 4 +- simulator/main.rs | 32 ++++++++---- simulator/model/mod.rs | 13 ++--- simulator/profiles/mod.rs | 88 +++++++++++++++++++++++++++++++- simulator/runner/bugbase.rs | 2 +- simulator/runner/cli.rs | 79 +++++++++++++++++++++++++++- simulator/runner/differential.rs | 18 ++++--- simulator/runner/doublecheck.rs | 18 ++++--- simulator/runner/env.rs | 14 ++--- simulator/runner/execution.rs | 2 +- simulator/runner/file.rs | 8 +-- simulator/runner/io.rs | 2 +- simulator/runner/watch.rs | 2 +- simulator/shrink/plan.rs | 2 +- 20 files changed, 350 insertions(+), 69 deletions(-) create mode 100644 simulator/.gitignore diff --git a/Cargo.lock b/Cargo.lock index d796758b3..d190cb086 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2000,6 +2000,17 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "json5" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96b0db21af676c1ce64250b5f40f3ce2cf27e4e47cb91ed91eb6fe9350b430c1" +dependencies = [ + "pest", + "pest_derive", + "serde", +] + [[package]] name = "julian_day_converter" version = "0.4.5" @@ -2182,6 +2193,7 @@ dependencies = [ "garde", "hex", "itertools 0.14.0", + "json5", "log", "notify", "rand 0.9.2", @@ -2193,6 +2205,7 @@ dependencies = [ "serde", "serde_json", "sql_generation", + "strum", "tracing", "tracing-subscriber", "turso_core", @@ -2690,6 +2703,50 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +[[package]] +name = "pest" +version = "2.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1db05f56d34358a8b1066f67cbb203ee3e7ed2ba674a6263a1d5ec6db2204323" +dependencies = [ + "memchr", + "thiserror 2.0.12", + "ucd-trie", +] + +[[package]] +name = "pest_derive" +version = "2.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb056d9e8ea77922845ec74a1c4e8fb17e7c218cc4fc11a15c5d25e189aa40bc" +dependencies = [ + "pest", + "pest_generator", +] + +[[package]] +name = "pest_generator" +version = "2.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87e404e638f781eb3202dc82db6760c8ae8a1eeef7fb3fa8264b2ef280504966" +dependencies = [ + "pest", + "pest_meta", + "proc-macro2", + "quote", + "syn 2.0.100", +] + +[[package]] +name = "pest_meta" +version = "2.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edd1101f170f5903fde0914f899bb503d9ff5271d7ba76bbb70bea63690cc0d5" +dependencies = [ + "pest", + "sha2", +] + [[package]] name = "pin-project-lite" version = "0.2.16" @@ -3530,6 +3587,17 @@ dependencies = [ "serde", ] +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "sharded-slab" version = "0.1.7" @@ -4374,6 +4442,12 @@ version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" +[[package]] +name = "ucd-trie" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" + [[package]] name = "uncased" version = "0.9.10" diff --git a/simulator/.gitignore b/simulator/.gitignore new file mode 100644 index 000000000..edec4e579 --- /dev/null +++ b/simulator/.gitignore @@ -0,0 +1 @@ +configs/custom \ No newline at end of file diff --git a/simulator/Cargo.toml b/simulator/Cargo.toml index 29f42ec31..a8f85ca58 100644 --- a/simulator/Cargo.toml +++ b/simulator/Cargo.toml @@ -4,7 +4,7 @@ name = "limbo_sim" version.workspace = true authors.workspace = true -edition.workspace = true +edition = "2024" license.workspace = true repository.workspace = true description = "The Limbo deterministic simulator" @@ -40,3 +40,5 @@ sql_generation = { workspace = true } turso_parser = { workspace = true } schemars = { workspace = true } garde = { workspace = true, features = ["derive", "serde"] } +json5 = { version = "0.4.1" } +strum = { workspace = true } diff --git a/simulator/generation/mod.rs b/simulator/generation/mod.rs index bc4a3bd23..88a40d708 100644 --- a/simulator/generation/mod.rs +++ b/simulator/generation/mod.rs @@ -26,7 +26,7 @@ impl GenerationContext for SimulatorEnv { } fn opts(&self) -> &sql_generation::generation::Opts { - &self.gen_opts + &self.profile.query.gen_opts } } @@ -36,6 +36,6 @@ impl GenerationContext for &mut SimulatorEnv { } fn opts(&self) -> &sql_generation::generation::Opts { - &self.gen_opts + &self.profile.query.gen_opts } } diff --git a/simulator/generation/plan.rs b/simulator/generation/plan.rs index 9e211759e..6b9db6c0d 100644 --- a/simulator/generation/plan.rs +++ b/simulator/generation/plan.rs @@ -9,25 +9,25 @@ use std::{ use serde::{Deserialize, Serialize}; use sql_generation::{ - generation::{frequency, query::SelectFree, Arbitrary, ArbitraryFrom, GenerationContext}, + generation::{Arbitrary, ArbitraryFrom, GenerationContext, frequency, query::SelectFree}, model::{ - query::{update::Update, Create, CreateIndex, Delete, Drop, Insert, Select}, + query::{Create, CreateIndex, Delete, Drop, Insert, Select, update::Update}, table::SimValue, }, }; use turso_core::{Connection, Result, StepResult}; use crate::{ + SimulatorEnv, generation::Shadow, model::Query, runner::{ env::{SimConnection, SimulationType, SimulatorTables}, io::SimulatorIO, }, - SimulatorEnv, }; -use super::property::{remaining, Property}; +use super::property::{Property, remaining}; pub(crate) type ResultSet = Result>>; diff --git a/simulator/generation/property.rs b/simulator/generation/property.rs index 8357a1e3b..0393d4ced 100644 --- a/simulator/generation/property.rs +++ b/simulator/generation/property.rs @@ -1,18 +1,18 @@ use serde::{Deserialize, Serialize}; use sql_generation::{ - generation::{frequency, pick, pick_index, Arbitrary, ArbitraryFrom, GenerationContext}, + generation::{Arbitrary, ArbitraryFrom, GenerationContext, frequency, pick, pick_index}, model::{ query::{ + Create, Delete, Drop, Insert, Select, predicate::Predicate, select::{CompoundOperator, CompoundSelect, ResultColumn, SelectBody, SelectInner}, transaction::{Begin, Commit, Rollback}, update::Update, - Create, Delete, Drop, Insert, Select, }, table::SimValue, }, }; -use turso_core::{types, LimboError}; +use turso_core::{LimboError, types}; use turso_parser::ast::{self, Distinctness}; use crate::{ @@ -305,7 +305,10 @@ impl Property { for row in rows { for (i, (col, val)) in update.set_values.iter().enumerate() { if &row[i] != val { - return Ok(Err(format!("updated row {} has incorrect value for column {col}: expected {val}, got {}", i, row[i]))); + return Ok(Err(format!( + "updated row {} has incorrect value for column {col}: expected {val}, got {}", + i, row[i] + ))); } } } @@ -384,7 +387,10 @@ impl Property { if found { Ok(Ok(())) } else { - Ok(Err(format!("row [{:?}] not found in table", row.iter().map(|v| v.to_string()).collect::>()))) + Ok(Err(format!( + "row [{:?}] not found in table", + row.iter().map(|v| v.to_string()).collect::>() + ))) } } Err(err) => Err(LimboError::InternalError(err.to_string())), @@ -858,15 +864,22 @@ impl Property { match (select_result_set, select_tlp_result_set) { (Ok(select_rows), Ok(select_tlp_rows)) => { if select_rows.len() != select_tlp_rows.len() { - return Ok(Err(format!("row count mismatch: select returned {} rows, select_tlp returned {} rows", select_rows.len(), select_tlp_rows.len()))); + return Ok(Err(format!( + "row count mismatch: select returned {} rows, select_tlp returned {} rows", + select_rows.len(), + select_tlp_rows.len() + ))); } // Check if any row in select_rows is not in select_tlp_rows for row in select_rows.iter() { if !select_tlp_rows.iter().any(|r| r == row) { tracing::debug!( - "select and select_tlp returned different rows, ({}) is in select but not in select_tlp", - row.iter().map(|v| v.to_string()).collect::>().join(", ") - ); + "select and select_tlp returned different rows, ({}) is in select but not in select_tlp", + row.iter() + .map(|v| v.to_string()) + .collect::>() + .join(", ") + ); return Ok(Err(format!( "row mismatch: row [{}] exists in select results but not in select_tlp results", print_row(row) @@ -877,9 +890,12 @@ impl Property { for row in select_tlp_rows.iter() { if !select_rows.iter().any(|r| r == row) { tracing::debug!( - "select and select_tlp returned different rows, ({}) is in select_tlp but not in select", - row.iter().map(|v| v.to_string()).collect::>().join(", ") - ); + "select and select_tlp returned different rows, ({}) is in select_tlp but not in select", + row.iter() + .map(|v| v.to_string()) + .collect::>() + .join(", ") + ); return Ok(Err(format!( "row mismatch: row [{}] exists in select_tlp but not in select", @@ -939,7 +955,9 @@ impl Property { if union_count == count1 + count2 { Ok(Ok(())) } else { - Ok(Err(format!("UNION ALL should preserve cardinality but it didn't: {count1} + {count2} != {union_count}"))) + Ok(Err(format!( + "UNION ALL should preserve cardinality but it didn't: {count1} + {count2} != {union_count}" + ))) } } (Err(e), _, _) | (_, Err(e), _) | (_, _, Err(e)) => { diff --git a/simulator/generation/query.rs b/simulator/generation/query.rs index 586891d28..e99a0de86 100644 --- a/simulator/generation/query.rs +++ b/simulator/generation/query.rs @@ -1,8 +1,8 @@ use crate::model::Query; use rand::Rng; use sql_generation::{ - generation::{frequency, Arbitrary, ArbitraryFrom, GenerationContext}, - model::query::{update::Update, Create, Delete, Insert, Select}, + generation::{Arbitrary, ArbitraryFrom, GenerationContext, frequency}, + model::query::{Create, Delete, Insert, Select, update::Update}, }; use super::property::Remaining; diff --git a/simulator/main.rs b/simulator/main.rs index 9b60a149a..2d4a45b30 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -8,19 +8,20 @@ use rand::prelude::*; use runner::bugbase::{Bug, BugBase, LoadedBug}; use runner::cli::{SimulatorCLI, SimulatorCommand}; use runner::env::SimulatorEnv; -use runner::execution::{execute_plans, Execution, ExecutionHistory, ExecutionResult}; +use runner::execution::{Execution, ExecutionHistory, ExecutionResult, execute_plans}; use runner::{differential, watch}; use std::any::Any; use std::backtrace::Backtrace; use std::fs::OpenOptions; use std::io::{IsTerminal, Write}; use std::path::Path; -use std::sync::{mpsc, Arc, Mutex}; +use std::sync::{Arc, Mutex, mpsc}; +use tracing_subscriber::EnvFilter; use tracing_subscriber::field::MakeExt; use tracing_subscriber::fmt::format; -use tracing_subscriber::EnvFilter; use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; +use crate::profiles::Profile; use crate::runner::doublecheck; use crate::runner::env::{Paths, SimulationPhase, SimulationType}; @@ -35,6 +36,10 @@ fn main() -> anyhow::Result<()> { let mut cli_opts = SimulatorCLI::parse(); cli_opts.validate()?; + let profile = Profile::parse_from_type(cli_opts.profile.clone())?; + tracing::debug!(sim_profile = ?profile); + dbg!(&profile); + match cli_opts.subcommand { Some(SimulatorCommand::List) => { let mut bugbase = BugBase::load()?; @@ -44,7 +49,7 @@ fn main() -> anyhow::Result<()> { banner(); for i in 0..n { println!("iteration {i}"); - let result = testing_main(&cli_opts); + let result = testing_main(&cli_opts, &profile); if result.is_err() && short_circuit { println!("short circuiting after {i} iterations"); return result; @@ -91,7 +96,7 @@ fn main() -> anyhow::Result<()> { let results = bugs .into_iter() - .map(|cli_opts| testing_main(&cli_opts)) + .map(|cli_opts| testing_main(&cli_opts, &profile)) .collect::>(); let (successes, failures): (Vec<_>, Vec<_>) = @@ -103,12 +108,12 @@ fn main() -> anyhow::Result<()> { } None => { banner(); - testing_main(&cli_opts) + testing_main(&cli_opts, &profile) } } } -fn testing_main(cli_opts: &SimulatorCLI) -> anyhow::Result<()> { +fn testing_main(cli_opts: &SimulatorCLI, profile: &Profile) -> anyhow::Result<()> { let mut bugbase = if cli_opts.disable_bugbase { None } else { @@ -116,7 +121,7 @@ fn testing_main(cli_opts: &SimulatorCLI) -> anyhow::Result<()> { Some(BugBase::load()?) }; - let (seed, mut env, plans) = setup_simulation(bugbase.as_mut(), cli_opts); + let (seed, mut env, plans) = setup_simulation(bugbase.as_mut(), cli_opts, profile); if cli_opts.watch { watch_mode(env).unwrap(); @@ -471,6 +476,7 @@ impl SandboxedResult { fn setup_simulation( bugbase: Option<&mut BugBase>, cli_opts: &SimulatorCLI, + profile: &Profile, ) -> (u64, SimulatorEnv, Vec) { if let Some(seed) = &cli_opts.load { let seed = seed.parse::().expect("seed should be a number"); @@ -484,7 +490,13 @@ fn setup_simulation( if !paths.base.exists() { std::fs::create_dir_all(&paths.base).unwrap(); } - let env = SimulatorEnv::new(bug.seed(), cli_opts, paths, SimulationType::Default); + let env = SimulatorEnv::new( + bug.seed(), + cli_opts, + paths, + SimulationType::Default, + profile, + ); let plan = match bug { Bug::Loaded(LoadedBug { plan, .. }) => plan.clone(), @@ -528,7 +540,7 @@ fn setup_simulation( Paths::new(&dir) }; - let mut env = SimulatorEnv::new(seed, cli_opts, paths, SimulationType::Default); + let mut env = SimulatorEnv::new(seed, cli_opts, paths, SimulationType::Default, profile); tracing::info!("Generating database interaction plan..."); diff --git a/simulator/model/mod.rs b/simulator/model/mod.rs index ce249baf5..73863f725 100644 --- a/simulator/model/mod.rs +++ b/simulator/model/mod.rs @@ -5,14 +5,14 @@ use itertools::Itertools; use serde::{Deserialize, Serialize}; use sql_generation::model::{ query::{ + Create, CreateIndex, Delete, Drop, EmptyContext, Insert, Select, select::{CompoundOperator, FromClause, ResultColumn, SelectInner}, transaction::{Begin, Commit, Rollback}, update::Update, - Create, CreateIndex, Delete, Drop, EmptyContext, Insert, Select, }, table::{JoinTable, JoinType, SimValue, Table, TableContext}, }; -use turso_parser::ast::{fmt::ToTokens, Distinctness}; +use turso_parser::ast::{Distinctness, fmt::ToTokens}; use crate::{generation::Shadow, runner::env::SimulatorTables}; @@ -282,10 +282,11 @@ impl Shadow for SelectInner { Ok(join_table) } else { - assert!(self - .columns - .iter() - .all(|col| matches!(col, ResultColumn::Expr(_)))); + assert!( + self.columns + .iter() + .all(|col| matches!(col, ResultColumn::Expr(_))) + ); // If `WHERE` is false, just return an empty table if !self.where_clause.test(&[], &Table::anonymous(vec![])) { diff --git a/simulator/profiles/mod.rs b/simulator/profiles/mod.rs index 6a5e677a2..8bab0b217 100644 --- a/simulator/profiles/mod.rs +++ b/simulator/profiles/mod.rs @@ -1,8 +1,16 @@ -use std::fmt::Display; +use std::{ + fmt::Display, + fs, + path::{Path, PathBuf}, + str::FromStr, +}; +use anyhow::Context; use garde::Validate; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use sql_generation::generation::Opts; +use strum::EnumString; use crate::profiles::{io::IOProfile, query::QueryProfile}; @@ -10,6 +18,7 @@ mod io; mod query; #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Validate)] +#[serde(deny_unknown_fields, default)] pub struct Profile { #[garde(skip)] /// Experimental MVCC feature @@ -30,6 +39,83 @@ impl Default for Profile { } } +impl Profile { + pub fn write_heavy() -> Self { + Profile { + query: QueryProfile { + gen_opts: Opts { + // TODO: in the future tweak blob size for bigger inserts + // TODO: increase number of rows increased as well + ..Default::default() + }, + delete: false, + update: false, + ..Default::default() + }, + ..Default::default() + } + } + + pub fn parse_from_type(profile_type: ProfileType) -> anyhow::Result { + let profile = match profile_type { + ProfileType::Default => Profile::default(), + ProfileType::WriteHeavy => Self::write_heavy(), + ProfileType::Custom(path) => { + Self::parse(path).with_context(|| "failed to parse JSON profile")? + } + }; + Ok(profile) + } + + // TODO: in the future handle extension and composability of profiles here + pub fn parse(path: impl AsRef) -> anyhow::Result { + let contents = fs::read_to_string(path)?; + // use json5 so we can support comments and trailing commas + let profile = json5::from_str(&contents)?; + Ok(profile) + } +} + +#[derive( + Debug, + Default, + Clone, + Serialize, + Deserialize, + EnumString, + PartialEq, + Eq, + PartialOrd, + Ord, + strum::Display, + strum::VariantNames, +)] +#[serde(rename_all = "snake_case")] +#[strum(ascii_case_insensitive, serialize_all = "snake_case")] +pub enum ProfileType { + #[default] + Default, + WriteHeavy, + #[strum(disabled)] + Custom(PathBuf), +} + +impl ProfileType { + pub fn parse(s: &str) -> anyhow::Result { + if let Ok(prof) = ProfileType::from_str(s) { + Ok(prof) + } else if let path = PathBuf::from(s) + && path.exists() + { + Ok(ProfileType::Custom(path)) + } else { + Err(anyhow::anyhow!( + "failed identifying predifined profile or custom profile path" + )) + } + } +} + /// Minimum value of field is dependent on another field in the struct fn min_dependent(min: &T) -> impl FnOnce(&T, &()) -> garde::Result + '_ { move |value, _| { diff --git a/simulator/runner/bugbase.rs b/simulator/runner/bugbase.rs index 1a3be5889..179c292f1 100644 --- a/simulator/runner/bugbase.rs +++ b/simulator/runner/bugbase.rs @@ -6,7 +6,7 @@ use std::{ time::SystemTime, }; -use anyhow::{anyhow, Context}; +use anyhow::{Context, anyhow}; use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; diff --git a/simulator/runner/cli.rs b/simulator/runner/cli.rs index 1a79a121f..d0ceaea24 100644 --- a/simulator/runner/cli.rs +++ b/simulator/runner/cli.rs @@ -1,6 +1,13 @@ -use clap::{command, Parser}; +use clap::{ + Arg, Command, Error, Parser, + builder::{PossibleValue, TypedValueParser, ValueParserFactory}, + command, + error::{ContextKind, ContextValue, ErrorKind}, +}; use serde::{Deserialize, Serialize}; +use crate::profiles::ProfileType; + #[derive(Parser, Debug, Clone, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Ord)] #[command(name = "limbo-simulator")] #[command(author, version, about, long_about = None)] @@ -135,6 +142,9 @@ pub struct SimulatorCLI { default_value_t = false )] pub keep_files: bool, + #[clap(long, default_value_t = ProfileType::Default)] + /// Profile selector for Simulation run + pub profile: ProfileType, } #[derive(Parser, Debug, Clone, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Ord)] @@ -206,3 +216,70 @@ impl SimulatorCLI { Ok(()) } } + +#[derive(Clone)] +pub struct ProfileTypeParser; + +impl TypedValueParser for ProfileTypeParser { + type Value = ProfileType; + + fn parse_ref( + &self, + cmd: &Command, + arg: Option<&Arg>, + value: &std::ffi::OsStr, + ) -> Result { + let s = value + .to_str() + .ok_or_else(|| Error::new(ErrorKind::InvalidUtf8).with_cmd(cmd))?; + + ProfileType::parse(s).map_err(|_| { + let mut err = Error::new(ErrorKind::InvalidValue).with_cmd(cmd); + if let Some(arg) = arg { + err.insert( + ContextKind::InvalidArg, + ContextValue::String(arg.to_string()), + ); + } + err.insert( + ContextKind::InvalidValue, + ContextValue::String(s.to_string()), + ); + err.insert( + ContextKind::ValidValue, + ContextValue::Strings( + self.possible_values() + .unwrap() + .map(|s| s.get_name().to_string()) + .collect(), + ), + ); + err + }) + } + + fn possible_values(&self) -> Option + '_>> { + use strum::VariantNames; + Some(Box::new( + Self::Value::VARIANTS + .into_iter() + .map(|variant| { + // Custom variant should be listed as a Custom path + if variant.eq_ignore_ascii_case("custom") { + "CUSTOM_PATH" + } else { + variant + } + }) + .map(|s| PossibleValue::new(s)), + )) + } +} + +impl ValueParserFactory for ProfileType { + type Parser = ProfileTypeParser; + + fn value_parser() -> Self::Parser { + ProfileTypeParser + } +} diff --git a/simulator/runner/differential.rs b/simulator/runner/differential.rs index 5723418c1..a91e02fa8 100644 --- a/simulator/runner/differential.rs +++ b/simulator/runner/differential.rs @@ -4,18 +4,18 @@ use sql_generation::{generation::pick_index, model::table::SimValue}; use turso_core::Value; use crate::{ + InteractionPlan, generation::{ - plan::{Interaction, InteractionPlanState, ResultSet}, Shadow as _, + plan::{Interaction, InteractionPlanState, ResultSet}, }, model::Query, runner::execution::ExecutionContinuation, - InteractionPlan, }; use super::{ env::{SimConnection, SimulatorEnv}, - execution::{execute_interaction, Execution, ExecutionHistory, ExecutionResult}, + execution::{Execution, ExecutionHistory, ExecutionResult, execute_interaction}, }; pub(crate) fn run_simulation( @@ -249,7 +249,9 @@ fn execute_plan( match (limbo_values, rusqlite_values) { (Ok(limbo_values), Ok(rusqlite_values)) => { if limbo_values != rusqlite_values { - tracing::error!("returned values from limbo and rusqlite results do not match"); + tracing::error!( + "returned values from limbo and rusqlite results do not match" + ); let diff = limbo_values .iter() .zip(rusqlite_values.iter()) @@ -303,7 +305,9 @@ fn execute_plan( tracing::warn!("rusqlite error {}", rusqlite_err); } (Ok(limbo_result), Err(rusqlite_err)) => { - tracing::error!("limbo and rusqlite results do not match, limbo returned values but rusqlite failed"); + tracing::error!( + "limbo and rusqlite results do not match, limbo returned values but rusqlite failed" + ); tracing::error!("limbo values {:?}", limbo_result); tracing::error!("rusqlite error {}", rusqlite_err); return Err(turso_core::LimboError::InternalError( @@ -311,7 +315,9 @@ fn execute_plan( )); } (Err(limbo_err), Ok(_)) => { - tracing::error!("limbo and rusqlite results do not match, limbo failed but rusqlite returned values"); + tracing::error!( + "limbo and rusqlite results do not match, limbo failed but rusqlite returned values" + ); tracing::error!("limbo error {}", limbo_err); return Err(turso_core::LimboError::InternalError( "limbo and rusqlite results do not match".into(), diff --git a/simulator/runner/doublecheck.rs b/simulator/runner/doublecheck.rs index 7c9d33b4e..2b1b5fccb 100644 --- a/simulator/runner/doublecheck.rs +++ b/simulator/runner/doublecheck.rs @@ -6,13 +6,13 @@ use std::{ use sql_generation::generation::pick_index; use crate::{ - generation::plan::InteractionPlanState, runner::execution::ExecutionContinuation, - InteractionPlan, + InteractionPlan, generation::plan::InteractionPlanState, + runner::execution::ExecutionContinuation, }; use super::{ env::{SimConnection, SimulatorEnv}, - execution::{execute_interaction, Execution, ExecutionHistory, ExecutionResult}, + execution::{Execution, ExecutionHistory, ExecutionResult, execute_interaction}, }; pub(crate) fn run_simulation( @@ -207,7 +207,9 @@ fn execute_plan( match (limbo_values, doublecheck_values) { (Ok(limbo_values), Ok(doublecheck_values)) => { if limbo_values != doublecheck_values { - tracing::error!("returned values from limbo and doublecheck results do not match"); + tracing::error!( + "returned values from limbo and doublecheck results do not match" + ); tracing::debug!("limbo values {:?}", limbo_values); tracing::debug!( "doublecheck values {:?}", @@ -231,7 +233,9 @@ fn execute_plan( } } (Ok(limbo_result), Err(doublecheck_err)) => { - tracing::error!("limbo and doublecheck results do not match, limbo returned values but doublecheck failed"); + tracing::error!( + "limbo and doublecheck results do not match, limbo returned values but doublecheck failed" + ); tracing::error!("limbo values {:?}", limbo_result); tracing::error!("doublecheck error {}", doublecheck_err); return Err(turso_core::LimboError::InternalError( @@ -239,7 +243,9 @@ fn execute_plan( )); } (Err(limbo_err), Ok(_)) => { - tracing::error!("limbo and doublecheck results do not match, limbo failed but doublecheck returned values"); + tracing::error!( + "limbo and doublecheck results do not match, limbo failed but doublecheck returned values" + ); tracing::error!("limbo error {}", limbo_err); return Err(turso_core::LimboError::InternalError( "limbo and doublecheck results do not match".into(), diff --git a/simulator/runner/env.rs b/simulator/runner/env.rs index 50b21c61a..20ac44e23 100644 --- a/simulator/runner/env.rs +++ b/simulator/runner/env.rs @@ -7,10 +7,10 @@ use std::sync::Arc; use rand::{Rng, SeedableRng}; use rand_chacha::ChaCha8Rng; -use sql_generation::generation::Opts; use sql_generation::model::table::Table; use turso_core::Database; +use crate::profiles::Profile; use crate::runner::io::SimulatorIO; use super::cli::SimulatorCLI; @@ -60,7 +60,7 @@ impl Deref for SimulatorTables { pub(crate) struct SimulatorEnv { pub(crate) opts: SimulatorOpts, - pub gen_opts: Opts, + pub profile: Profile, pub(crate) connections: Vec, pub(crate) io: Arc, pub(crate) db: Option>, @@ -87,7 +87,7 @@ impl SimulatorEnv { paths: self.paths.clone(), type_: self.type_, phase: self.phase, - gen_opts: self.gen_opts.clone(), + profile: self.profile.clone(), } } @@ -164,6 +164,7 @@ impl SimulatorEnv { cli_opts: &SimulatorCLI, paths: Paths, simulation_type: SimulationType, + profile: &Profile, ) -> Self { let mut rng = ChaCha8Rng::seed_from_u64(seed); @@ -294,11 +295,6 @@ impl SimulatorEnv { .map(|_| SimConnection::Disconnected) .collect::>(); - let gen_opts = Opts { - indexes: opts.experimental_indexes, - ..Default::default() - }; - SimulatorEnv { opts, tables: SimulatorTables::new(), @@ -309,7 +305,7 @@ impl SimulatorEnv { db: Some(db), type_: simulation_type, phase: SimulationPhase::Test, - gen_opts, + profile: profile.clone(), } } diff --git a/simulator/runner/execution.rs b/simulator/runner/execution.rs index fa3dcbff9..a7d7aa3d6 100644 --- a/simulator/runner/execution.rs +++ b/simulator/runner/execution.rs @@ -5,8 +5,8 @@ use tracing::instrument; use turso_core::{Connection, LimboError, Result, StepResult}; use crate::generation::{ - plan::{Interaction, InteractionPlan, InteractionPlanState, ResultSet}, Shadow as _, + plan::{Interaction, InteractionPlan, InteractionPlanState, ResultSet}, }; use super::env::{SimConnection, SimulatorEnv}; diff --git a/simulator/runner/file.rs b/simulator/runner/file.rs index bbda05b1d..0d644dc28 100644 --- a/simulator/runner/file.rs +++ b/simulator/runner/file.rs @@ -6,10 +6,10 @@ use std::{ use rand::Rng as _; use rand_chacha::ChaCha8Rng; -use tracing::{instrument, Level}; +use tracing::{Level, instrument}; use turso_core::{File, Result}; -use crate::runner::{clock::SimulatorClock, FAULT_ERROR_MSG}; +use crate::runner::{FAULT_ERROR_MSG, clock::SimulatorClock}; pub(crate) struct SimulatorFile { pub path: String, pub(crate) inner: Arc, @@ -201,7 +201,9 @@ impl File for SimulatorFile { self.nr_sync_calls.set(self.nr_sync_calls.get() + 1); if self.fault.get() { // TODO: Enable this when https://github.com/tursodatabase/turso/issues/2091 is fixed. - tracing::debug!("ignoring sync fault because it causes false positives with current simulator design"); + tracing::debug!( + "ignoring sync fault because it causes false positives with current simulator design" + ); self.fault.set(false); } let c = if let Some(latency) = self.generate_latency_duration() { diff --git a/simulator/runner/io.rs b/simulator/runner/io.rs index c6b6bdbc3..fcc23be75 100644 --- a/simulator/runner/io.rs +++ b/simulator/runner/io.rs @@ -5,7 +5,7 @@ use std::{ use rand::{RngCore, SeedableRng}; use rand_chacha::ChaCha8Rng; -use turso_core::{Clock, Instant, OpenFlags, PlatformIO, Result, IO}; +use turso_core::{Clock, IO, Instant, OpenFlags, PlatformIO, Result}; use crate::runner::{clock::SimulatorClock, file::SimulatorFile}; diff --git a/simulator/runner/watch.rs b/simulator/runner/watch.rs index 95d65ad64..3f2615543 100644 --- a/simulator/runner/watch.rs +++ b/simulator/runner/watch.rs @@ -10,7 +10,7 @@ use crate::{ use super::{ env::{SimConnection, SimulatorEnv}, - execution::{execute_interaction, Execution, ExecutionHistory, ExecutionResult}, + execution::{Execution, ExecutionHistory, ExecutionResult, execute_interaction}, }; pub(crate) fn run_simulation( diff --git a/simulator/shrink/plan.rs b/simulator/shrink/plan.rs index bccd07afd..7def800ce 100644 --- a/simulator/shrink/plan.rs +++ b/simulator/shrink/plan.rs @@ -1,4 +1,5 @@ use crate::{ + SandboxedResult, SimulatorEnv, generation::{ plan::{Interaction, InteractionPlan, Interactions}, property::Property, @@ -6,7 +7,6 @@ use crate::{ model::Query, run_simulation, runner::execution::Execution, - SandboxedResult, SimulatorEnv, }; use std::sync::{Arc, Mutex}; From 2f237fdcfd4752714a4d2bbe5efbaef702bc07ff Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Thu, 28 Aug 2025 17:26:36 -0300 Subject: [PATCH 12/20] adjust remaining calculation to use the profile --- simulator/generation/plan.rs | 63 ++++++++------ simulator/generation/property.rs | 136 ++++++++++++++++++------------- simulator/generation/query.rs | 6 +- simulator/profiles/mod.rs | 12 +-- simulator/profiles/query.rs | 43 +++++++--- simulator/runner/env.rs | 21 +---- 6 files changed, 159 insertions(+), 122 deletions(-) diff --git a/simulator/generation/plan.rs b/simulator/generation/plan.rs index 6b9db6c0d..365b4cd3d 100644 --- a/simulator/generation/plan.rs +++ b/simulator/generation/plan.rs @@ -254,16 +254,27 @@ impl Display for InteractionPlan { #[derive(Debug, Clone, Copy)] pub(crate) struct InteractionStats { - pub(crate) read_count: usize, - pub(crate) write_count: usize, - pub(crate) delete_count: usize, - pub(crate) update_count: usize, - pub(crate) create_count: usize, - pub(crate) create_index_count: usize, - pub(crate) drop_count: usize, - pub(crate) begin_count: usize, - pub(crate) commit_count: usize, - pub(crate) rollback_count: usize, + pub(crate) select_count: u32, + pub(crate) insert_count: u32, + pub(crate) delete_count: u32, + pub(crate) update_count: u32, + pub(crate) create_count: u32, + pub(crate) create_index_count: u32, + pub(crate) drop_count: u32, + pub(crate) begin_count: u32, + pub(crate) commit_count: u32, + pub(crate) rollback_count: u32, +} + +impl InteractionStats { + pub fn total_writes(&self) -> u32 { + self.insert_count + + self.delete_count + + self.update_count + + self.create_count + + self.create_index_count + + self.drop_count + } } impl Display for InteractionStats { @@ -271,8 +282,8 @@ impl Display for InteractionStats { write!( f, "Read: {}, Write: {}, Delete: {}, Update: {}, Create: {}, CreateIndex: {}, Drop: {}, Begin: {}, Commit: {}, Rollback: {}", - self.read_count, - self.write_count, + self.select_count, + self.insert_count, self.delete_count, self.update_count, self.create_count, @@ -351,8 +362,8 @@ impl InteractionPlan { pub(crate) fn stats(&self) -> InteractionStats { let mut stats = InteractionStats { - read_count: 0, - write_count: 0, + select_count: 0, + insert_count: 0, delete_count: 0, update_count: 0, create_count: 0, @@ -365,8 +376,8 @@ impl InteractionPlan { fn query_stat(q: &Query, stats: &mut InteractionStats) { match q { - Query::Select(_) => stats.read_count += 1, - Query::Insert(_) => stats.write_count += 1, + Query::Select(_) => stats.select_count += 1, + Query::Insert(_) => stats.insert_count += 1, Query::Delete(_) => stats.delete_count += 1, Query::Create(_) => stats.create_count += 1, Query::Drop(_) => stats.drop_count += 1, @@ -399,7 +410,7 @@ impl InteractionPlan { pub fn generate_plan(rng: &mut R, env: &mut SimulatorEnv) -> Self { let mut plan = InteractionPlan::new(); - let num_interactions = env.opts.max_interactions; + let num_interactions = env.opts.max_interactions as usize; // First create at least one table let create_query = Create::arbitrary(rng, env); @@ -821,25 +832,25 @@ impl ArbitraryFrom<(&SimulatorEnv, InteractionStats)> for Interactions { _context: &C, (env, stats): (&SimulatorEnv, InteractionStats), ) -> Self { - let remaining_ = remaining(&env.opts, &stats); + let remaining_ = remaining(env.opts.max_interactions, &env.profile.query, &stats); frequency( vec![ ( - f64::min(remaining_.read, remaining_.write) + remaining_.create, + u32::min(remaining_.select, remaining_.insert) + remaining_.create, Box::new(|rng: &mut R| { Interactions::Property(Property::arbitrary_from(rng, env, (env, &stats))) }), ), ( - remaining_.read, + remaining_.select, Box::new(|rng: &mut R| random_read(rng, env)), ), ( - remaining_.read / 3.0, + remaining_.select / 3, Box::new(|rng: &mut R| random_expr(rng, env)), ), ( - remaining_.write, + remaining_.insert, Box::new(|rng: &mut R| random_write(rng, env)), ), ( @@ -867,15 +878,15 @@ impl ArbitraryFrom<(&SimulatorEnv, InteractionStats)> for Interactions { ), ( // remaining_.drop, - 0.0, + 0, Box::new(|rng: &mut R| random_drop(rng, env)), ), ( remaining_ - .read - .min(remaining_.write) + .select + .min(remaining_.insert) .min(remaining_.create) - .max(1.0), + .max(1), Box::new(|rng: &mut R| random_fault(rng, env)), ), ], diff --git a/simulator/generation/property.rs b/simulator/generation/property.rs index 0393d4ced..4d200bad6 100644 --- a/simulator/generation/property.rs +++ b/simulator/generation/property.rs @@ -16,9 +16,7 @@ use turso_core::{LimboError, types}; use turso_parser::ast::{self, Distinctness}; use crate::{ - generation::Shadow as _, - model::Query, - runner::env::{SimulatorEnv, SimulatorOpts}, + generation::Shadow as _, model::Query, profiles::query::QueryProfile, runner::env::SimulatorEnv, }; use super::plan::{Assertion, Interaction, InteractionStats, ResultSet}; @@ -1034,44 +1032,66 @@ fn assert_all_table_values(tables: &[String]) -> impl Iterator Remaining { - let remaining_read = ((opts.max_interactions as f64 * opts.read_percent / 100.0) - - (stats.read_count as f64)) - .max(0.0); - let remaining_write = ((opts.max_interactions as f64 * opts.write_percent / 100.0) - - (stats.write_count as f64)) - .max(0.0); - let remaining_create = ((opts.max_interactions as f64 * opts.create_percent / 100.0) - - (stats.create_count as f64)) - .max(0.0); +pub(crate) fn remaining( + max_interactions: u32, + opts: &QueryProfile, + stats: &InteractionStats, +) -> Remaining { + let total_weight = opts.read_weight + opts.write_weight; - let remaining_create_index = ((opts.max_interactions as f64 * opts.create_index_percent - / 100.0) - - (stats.create_index_count as f64)) - .max(0.0); + // Total amount of reads. Only considers select operations + let total_reads = (max_interactions * opts.read_weight) / total_weight; + // Total amount of writes. + let total_writes = (max_interactions * opts.write_weight) / total_weight; - let remaining_delete = ((opts.max_interactions as f64 * opts.delete_percent / 100.0) - - (stats.delete_count as f64)) - .max(0.0); - let remaining_update = ((opts.max_interactions as f64 * opts.update_percent / 100.0) - - (stats.update_count as f64)) - .max(0.0); - let remaining_drop = ((opts.max_interactions as f64 * opts.drop_percent / 100.0) - - (stats.drop_count as f64)) - .max(0.0); + let remaining_select = total_reads + .checked_sub(stats.select_count) + .unwrap_or_default(); + + // This total is the sum of all the query weights that are write operations + let sum_write_weight = opts.create_table_weight + + opts.create_index_weight + + opts.insert_weight + + opts.update_weight + + opts.delete_weight + + opts.drop_table_weight; + + let total_insert = (total_writes * opts.insert_weight) / sum_write_weight; + let total_create = (total_writes * opts.create_table_weight) / sum_write_weight; + let total_create_index = (total_writes * opts.create_index_weight) / sum_write_weight; + let total_delete = (total_writes * opts.delete_weight) / sum_write_weight; + let total_update = (total_writes * opts.update_weight) / sum_write_weight; + let total_drop = (total_writes * opts.drop_table_weight) / sum_write_weight; + + let remaining_insert = total_insert + .checked_sub(stats.insert_count) + .unwrap_or_default(); + let remaining_create = total_create + .checked_sub(stats.create_count) + .unwrap_or_default(); + let remaining_create_index = total_create_index + .checked_sub(stats.create_index_count) + .unwrap_or_default(); + let remaining_delete = total_delete + .checked_sub(stats.delete_count) + .unwrap_or_default(); + let remaining_update = total_update + .checked_sub(stats.update_count) + .unwrap_or_default(); + let remaining_drop = total_drop.checked_sub(stats.drop_count).unwrap_or_default(); Remaining { - read: remaining_read, - write: remaining_write, + select: remaining_select, + insert: remaining_insert, create: remaining_create, create_index: remaining_create_index, delete: remaining_delete, @@ -1434,72 +1454,72 @@ impl ArbitraryFrom<(&SimulatorEnv, &InteractionStats)> for Property { _context: &C, (env, stats): (&SimulatorEnv, &InteractionStats), ) -> Self { - let remaining_ = remaining(&env.opts, stats); + let remaining_ = remaining(env.opts.max_interactions, &env.profile.query, stats); frequency( vec![ ( if !env.opts.disable_insert_values_select { - f64::min(remaining_.read, remaining_.write) + u32::min(remaining_.select, remaining_.insert) } else { - 0.0 + 0 }, Box::new(|rng: &mut R| property_insert_values_select(rng, env, &remaining_)), ), ( - remaining_.read, + remaining_.select, Box::new(|rng: &mut R| property_table_has_expected_content(rng, env)), ), ( - f64::min(remaining_.read, remaining_.write), + u32::min(remaining_.select, remaining_.insert), Box::new(|rng: &mut R| property_read_your_updates_back(rng, env)), ), ( if !env.opts.disable_double_create_failure { - remaining_.create / 2.0 + remaining_.create / 2 } else { - 0.0 + 0 }, Box::new(|rng: &mut R| property_double_create_failure(rng, env, &remaining_)), ), ( if !env.opts.disable_select_limit { - remaining_.read + remaining_.select } else { - 0.0 + 0 }, Box::new(|rng: &mut R| property_select_limit(rng, env)), ), ( if !env.opts.disable_delete_select { - f64::min(remaining_.read, remaining_.write).min(remaining_.delete) + u32::min(remaining_.select, remaining_.insert).min(remaining_.delete) } else { - 0.0 + 0 }, Box::new(|rng: &mut R| property_delete_select(rng, env, &remaining_)), ), ( if !env.opts.disable_drop_select { // remaining_.drop - 0.0 + 0 } else { - 0.0 + 0 }, Box::new(|rng: &mut R| property_drop_select(rng, env, &remaining_)), ), ( if !env.opts.disable_select_optimizer { - remaining_.read / 2.0 + remaining_.select / 2 } else { - 0.0 + 0 }, Box::new(|rng: &mut R| property_select_select_optimizer(rng, env)), ), ( if env.opts.experimental_indexes && !env.opts.disable_where_true_false_null { - remaining_.read / 2.0 + remaining_.select / 2 } else { - 0.0 + 0 }, Box::new(|rng: &mut R| property_where_true_false_null(rng, env)), ), @@ -1507,25 +1527,25 @@ impl ArbitraryFrom<(&SimulatorEnv, &InteractionStats)> for Property { if env.opts.experimental_indexes && !env.opts.disable_union_all_preserves_cardinality { - remaining_.read / 3.0 + remaining_.select / 3 } else { - 0.0 + 0 }, Box::new(|rng: &mut R| property_union_all_preserves_cardinality(rng, env)), ), ( if !env.opts.disable_fsync_no_wait { - 50.0 // Freestyle number + 50 // Freestyle number } else { - 0.0 + 0 }, Box::new(|rng: &mut R| property_fsync_no_wait(rng, env, &remaining_)), ), ( if !env.opts.disable_faulty_query { - 20.0 + 20 } else { - 0.0 + 0 }, Box::new(|rng: &mut R| property_faulty_query(rng, env, &remaining_)), ), diff --git a/simulator/generation/query.rs b/simulator/generation/query.rs index e99a0de86..72541c4d7 100644 --- a/simulator/generation/query.rs +++ b/simulator/generation/query.rs @@ -20,11 +20,11 @@ impl ArbitraryFrom<&Remaining> for Query { Box::new(|rng| Self::Create(Create::arbitrary(rng, context))), ), ( - remaining.read, + remaining.select, Box::new(|rng| Self::Select(Select::arbitrary(rng, context))), ), ( - remaining.write, + remaining.insert, Box::new(|rng| Self::Insert(Insert::arbitrary(rng, context))), ), ( @@ -32,7 +32,7 @@ impl ArbitraryFrom<&Remaining> for Query { Box::new(|rng| Self::Update(Update::arbitrary(rng, context))), ), ( - f64::min(remaining.write, remaining.delete), + remaining.insert.min(remaining.delete), Box::new(|rng| Self::Delete(Delete::arbitrary(rng, context))), ), ], diff --git a/simulator/profiles/mod.rs b/simulator/profiles/mod.rs index 8bab0b217..553aa9f26 100644 --- a/simulator/profiles/mod.rs +++ b/simulator/profiles/mod.rs @@ -14,8 +14,8 @@ use strum::EnumString; use crate::profiles::{io::IOProfile, query::QueryProfile}; -mod io; -mod query; +pub mod io; +pub mod query; #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Validate)] #[serde(deny_unknown_fields, default)] @@ -45,11 +45,13 @@ impl Profile { query: QueryProfile { gen_opts: Opts { // TODO: in the future tweak blob size for bigger inserts - // TODO: increase number of rows increased as well + // TODO: increase number of rows as well ..Default::default() }, - delete: false, - update: false, + read_weight: 30, + write_weight: 70, + delete_weight: 0, + update_weight: 0, ..Default::default() }, ..Default::default() diff --git a/simulator/profiles/query.rs b/simulator/profiles/query.rs index 8a56e3734..d331dac4c 100644 --- a/simulator/profiles/query.rs +++ b/simulator/profiles/query.rs @@ -9,29 +9,48 @@ pub struct QueryProfile { #[garde(dive)] pub gen_opts: Opts, #[garde(skip)] - pub create_table: bool, + /// Effectively the weight of how many select operations we want + pub read_weight: u32, #[garde(skip)] - pub create_index: bool, + pub write_weight: u32, + // All weights below are only going to be sampled when we determine we are doing a write operation, + // meaning we first sample between `read_weight` and `write_weight`, and if we a write_weight we will then sample the weights below #[garde(skip)] - pub insert: bool, + pub create_table_weight: u32, #[garde(skip)] - pub update: bool, + pub create_index_weight: u32, #[garde(skip)] - pub delete: bool, + pub insert_weight: u32, #[garde(skip)] - pub drop_table: bool, + pub update_weight: u32, + #[garde(skip)] + pub delete_weight: u32, + #[garde(skip)] + pub drop_table_weight: u32, } impl Default for QueryProfile { fn default() -> Self { Self { gen_opts: Opts::default(), - create_table: true, - create_index: true, - insert: true, - update: true, - delete: true, - drop_table: true, + read_weight: 60, + write_weight: 50, + create_table_weight: 15, + create_index_weight: 5, + insert_weight: 30, + update_weight: 20, + delete_weight: 20, + drop_table_weight: 2, } } } + +#[derive(Debug, Clone, strum::VariantArray)] +pub enum QueryTypes { + CreateTable, + CreateIndex, + Insert, + Update, + Delete, + DropTable, +} diff --git a/simulator/runner/env.rs b/simulator/runner/env.rs index 20ac44e23..c0e97d45a 100644 --- a/simulator/runner/env.rs +++ b/simulator/runner/env.rs @@ -227,13 +227,6 @@ impl SimulatorEnv { max_connections: 1, // TODO: for now let's use one connection as we didn't implement // correct transactions processing max_tables: rng.random_range(0..128), - create_percent, - create_index_percent, - read_percent, - write_percent, - delete_percent, - drop_percent, - update_percent, disable_select_optimizer: cli_opts.disable_select_optimizer, disable_insert_values_select: cli_opts.disable_insert_values_select, disable_double_create_failure: cli_opts.disable_double_create_failure, @@ -246,7 +239,8 @@ impl SimulatorEnv { disable_fsync_no_wait: cli_opts.disable_fsync_no_wait, disable_faulty_query: cli_opts.disable_faulty_query, page_size: 4096, // TODO: randomize this too - max_interactions: rng.random_range(cli_opts.minimum_tests..=cli_opts.maximum_tests), + max_interactions: rng.random_range(cli_opts.minimum_tests..=cli_opts.maximum_tests) + as u32, max_time_simulation: cli_opts.maximum_time, disable_reopen_database: cli_opts.disable_reopen_database, latency_probability: cli_opts.latency_probability, @@ -399,15 +393,6 @@ pub(crate) struct SimulatorOpts { pub(crate) ticks: usize, pub(crate) max_connections: usize, pub(crate) max_tables: usize, - // this next options are the distribution of workload where read_percent + write_percent + - // delete_percent == 100% - pub(crate) create_percent: f64, - pub(crate) create_index_percent: f64, - pub(crate) read_percent: f64, - pub(crate) write_percent: f64, - pub(crate) delete_percent: f64, - pub(crate) update_percent: f64, - pub(crate) drop_percent: f64, pub(crate) disable_select_optimizer: bool, pub(crate) disable_insert_values_select: bool, @@ -421,7 +406,7 @@ pub(crate) struct SimulatorOpts { pub(crate) disable_faulty_query: bool, pub(crate) disable_reopen_database: bool, - pub(crate) max_interactions: usize, + pub(crate) max_interactions: u32, pub(crate) page_size: usize, pub(crate) max_time_simulation: usize, pub(crate) latency_probability: usize, From 463eb1fefd97175575d05ce79c3a49326a09bcae Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 29 Aug 2025 02:39:49 -0300 Subject: [PATCH 13/20] simplify profile weights for writes --- simulator/generation/property.rs | 31 ++++++++++++------------------- simulator/profiles/mod.rs | 4 ++-- simulator/profiles/query.rs | 10 ++-------- 3 files changed, 16 insertions(+), 29 deletions(-) diff --git a/simulator/generation/property.rs b/simulator/generation/property.rs index 4d200bad6..8fe207685 100644 --- a/simulator/generation/property.rs +++ b/simulator/generation/property.rs @@ -1046,32 +1046,25 @@ pub(crate) fn remaining( opts: &QueryProfile, stats: &InteractionStats, ) -> Remaining { - let total_weight = opts.read_weight + opts.write_weight; - - // Total amount of reads. Only considers select operations - let total_reads = (max_interactions * opts.read_weight) / total_weight; - // Total amount of writes. - let total_writes = (max_interactions * opts.write_weight) / total_weight; - - let remaining_select = total_reads - .checked_sub(stats.select_count) - .unwrap_or_default(); - - // This total is the sum of all the query weights that are write operations - let sum_write_weight = opts.create_table_weight + let total_weight = opts.select_weight + + opts.create_table_weight + opts.create_index_weight + opts.insert_weight + opts.update_weight + opts.delete_weight + opts.drop_table_weight; - let total_insert = (total_writes * opts.insert_weight) / sum_write_weight; - let total_create = (total_writes * opts.create_table_weight) / sum_write_weight; - let total_create_index = (total_writes * opts.create_index_weight) / sum_write_weight; - let total_delete = (total_writes * opts.delete_weight) / sum_write_weight; - let total_update = (total_writes * opts.update_weight) / sum_write_weight; - let total_drop = (total_writes * opts.drop_table_weight) / sum_write_weight; + let total_select = (max_interactions * opts.select_weight) / total_weight; + let total_insert = (max_interactions * opts.insert_weight) / total_weight; + let total_create = (max_interactions * opts.create_table_weight) / total_weight; + let total_create_index = (max_interactions * opts.create_index_weight) / total_weight; + let total_delete = (max_interactions * opts.delete_weight) / total_weight; + let total_update = (max_interactions * opts.update_weight) / total_weight; + let total_drop = (max_interactions * opts.drop_table_weight) / total_weight; + let remaining_select = total_select + .checked_sub(stats.select_count) + .unwrap_or_default(); let remaining_insert = total_insert .checked_sub(stats.insert_count) .unwrap_or_default(); diff --git a/simulator/profiles/mod.rs b/simulator/profiles/mod.rs index 553aa9f26..44c4d5bc4 100644 --- a/simulator/profiles/mod.rs +++ b/simulator/profiles/mod.rs @@ -48,8 +48,8 @@ impl Profile { // TODO: increase number of rows as well ..Default::default() }, - read_weight: 30, - write_weight: 70, + select_weight: 30, + insert_weight: 70, delete_weight: 0, update_weight: 0, ..Default::default() diff --git a/simulator/profiles/query.rs b/simulator/profiles/query.rs index d331dac4c..a58c983e0 100644 --- a/simulator/profiles/query.rs +++ b/simulator/profiles/query.rs @@ -9,12 +9,7 @@ pub struct QueryProfile { #[garde(dive)] pub gen_opts: Opts, #[garde(skip)] - /// Effectively the weight of how many select operations we want - pub read_weight: u32, - #[garde(skip)] - pub write_weight: u32, - // All weights below are only going to be sampled when we determine we are doing a write operation, - // meaning we first sample between `read_weight` and `write_weight`, and if we a write_weight we will then sample the weights below + pub select_weight: u32, #[garde(skip)] pub create_table_weight: u32, #[garde(skip)] @@ -33,8 +28,7 @@ impl Default for QueryProfile { fn default() -> Self { Self { gen_opts: Opts::default(), - read_weight: 60, - write_weight: 50, + select_weight: 60, create_table_weight: 15, create_index_weight: 5, insert_weight: 30, From 61fa7546c1c0fb1eaabafd021ee37f33d80c05c3 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 29 Aug 2025 11:47:46 -0300 Subject: [PATCH 14/20] fold some `SimulatorOpts` fields to `Profile` --- simulator/generation/property.rs | 13 +++--- simulator/main.rs | 1 - simulator/runner/cli.rs | 32 +++++--------- simulator/runner/env.rs | 71 +++++++++++++++++++------------- 4 files changed, 59 insertions(+), 58 deletions(-) diff --git a/simulator/generation/property.rs b/simulator/generation/property.rs index 8fe207685..ab11a2d0e 100644 --- a/simulator/generation/property.rs +++ b/simulator/generation/property.rs @@ -1444,9 +1444,10 @@ fn property_faulty_query( impl ArbitraryFrom<(&SimulatorEnv, &InteractionStats)> for Property { fn arbitrary_from( rng: &mut R, - _context: &C, + context: &C, (env, stats): (&SimulatorEnv, &InteractionStats), ) -> Self { + let opts = context.opts(); let remaining_ = remaining(env.opts.max_interactions, &env.profile.query, stats); frequency( @@ -1509,7 +1510,7 @@ impl ArbitraryFrom<(&SimulatorEnv, &InteractionStats)> for Property { Box::new(|rng: &mut R| property_select_select_optimizer(rng, env)), ), ( - if env.opts.experimental_indexes && !env.opts.disable_where_true_false_null { + if opts.indexes && !env.opts.disable_where_true_false_null { remaining_.select / 2 } else { 0 @@ -1517,9 +1518,7 @@ impl ArbitraryFrom<(&SimulatorEnv, &InteractionStats)> for Property { Box::new(|rng: &mut R| property_where_true_false_null(rng, env)), ), ( - if env.opts.experimental_indexes - && !env.opts.disable_union_all_preserves_cardinality - { + if opts.indexes && !env.opts.disable_union_all_preserves_cardinality { remaining_.select / 3 } else { 0 @@ -1527,7 +1526,7 @@ impl ArbitraryFrom<(&SimulatorEnv, &InteractionStats)> for Property { Box::new(|rng: &mut R| property_union_all_preserves_cardinality(rng, env)), ), ( - if !env.opts.disable_fsync_no_wait { + if env.profile.io.enable && !env.opts.disable_fsync_no_wait { 50 // Freestyle number } else { 0 @@ -1535,7 +1534,7 @@ impl ArbitraryFrom<(&SimulatorEnv, &InteractionStats)> for Property { Box::new(|rng: &mut R| property_fsync_no_wait(rng, env, &remaining_)), ), ( - if !env.opts.disable_faulty_query { + if env.profile.io.enable && !env.opts.disable_faulty_query { 20 } else { 0 diff --git a/simulator/main.rs b/simulator/main.rs index 2d4a45b30..be6df9c37 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -38,7 +38,6 @@ fn main() -> anyhow::Result<()> { let profile = Profile::parse_from_type(cli_opts.profile.clone())?; tracing::debug!(sim_profile = ?profile); - dbg!(&profile); match cli_opts.subcommand { Some(SimulatorCommand::List) => { diff --git a/simulator/runner/cli.rs b/simulator/runner/cli.rs index d0ceaea24..9891ee7b2 100644 --- a/simulator/runner/cli.rs +++ b/simulator/runner/cli.rs @@ -114,28 +114,16 @@ pub struct SimulatorCLI { pub disable_faulty_query: bool, #[clap(long, help = "disable Reopen-Database fault", default_value_t = false)] pub disable_reopen_database: bool, - #[clap( - long = "latency-prob", - help = "added IO latency probability", - default_value_t = 1 - )] - pub latency_probability: usize, - #[clap( - long, - help = "Minimum tick time in microseconds for simulated time", - default_value_t = 1 - )] - pub min_tick: u64, - #[clap( - long, - help = "Maximum tick time in microseconds for simulated time", - default_value_t = 30 - )] - pub max_tick: u64, + #[clap(long = "latency-prob", help = "added IO latency probability")] + pub latency_probability: Option, + #[clap(long, help = "Minimum tick time in microseconds for simulated time")] + pub min_tick: Option, + #[clap(long, help = "Maximum tick time in microseconds for simulated time")] + pub max_tick: Option, #[clap(long, help = "Enable experimental MVCC feature")] - pub experimental_mvcc: bool, + pub experimental_mvcc: Option, #[clap(long, help = "Disable experimental indexing feature")] - pub disable_experimental_indexes: bool, + pub disable_experimental_indexes: Option, #[clap( long, help = "Keep all database and plan files", @@ -202,10 +190,10 @@ impl SimulatorCLI { anyhow::bail!("Cannot set seed and load plan at the same time"); } - if self.latency_probability > 100 { + if self.latency_probability.is_some_and(|prob| prob > 100) { anyhow::bail!( "latency probability must be a number between 0 and 100. Got `{}`", - self.latency_probability + self.latency_probability.unwrap() ); } diff --git a/simulator/runner/env.rs b/simulator/runner/env.rs index c0e97d45a..567f2bad9 100644 --- a/simulator/runner/env.rs +++ b/simulator/runner/env.rs @@ -5,6 +5,7 @@ use std::panic::UnwindSafe; use std::path::{Path, PathBuf}; use std::sync::Arc; +use garde::Validate; use rand::{Rng, SeedableRng}; use rand_chacha::ChaCha8Rng; use sql_generation::model::table::Table; @@ -96,13 +97,15 @@ impl SimulatorEnv { self.connections.iter_mut().for_each(|c| c.disconnect()); self.rng = ChaCha8Rng::seed_from_u64(self.opts.seed); + let latency_prof = &self.profile.io.latency; + let io = Arc::new( SimulatorIO::new( self.opts.seed, self.opts.page_size, - self.opts.latency_probability, - self.opts.min_tick, - self.opts.max_tick, + latency_prof.latency_probability, + latency_prof.min_tick, + latency_prof.max_tick, ) .unwrap(), ); @@ -122,8 +125,8 @@ impl SimulatorEnv { let db = match Database::open_file( io.clone(), db_path.to_str().unwrap(), - self.opts.experimental_mvcc, - self.opts.experimental_indexes, + self.profile.experimental_mvcc, + self.profile.query.gen_opts.indexes, ) { Ok(db) => db, Err(e) => { @@ -243,24 +246,8 @@ impl SimulatorEnv { as u32, max_time_simulation: cli_opts.maximum_time, disable_reopen_database: cli_opts.disable_reopen_database, - latency_probability: cli_opts.latency_probability, - experimental_mvcc: cli_opts.experimental_mvcc, - experimental_indexes: !cli_opts.disable_experimental_indexes, - min_tick: cli_opts.min_tick, - max_tick: cli_opts.max_tick, }; - let io = Arc::new( - SimulatorIO::new( - seed, - opts.page_size, - cli_opts.latency_probability, - cli_opts.min_tick, - cli_opts.max_tick, - ) - .unwrap(), - ); - // Remove existing database file if it exists let db_path = paths.db(&simulation_type, &SimulationPhase::Test); @@ -273,11 +260,44 @@ impl SimulatorEnv { std::fs::remove_file(&wal_path).unwrap(); } + let mut profile = profile.clone(); + // Conditionals here so that we can override some profile options from the CLI + if let Some(mvcc) = cli_opts.experimental_mvcc { + profile.experimental_mvcc = mvcc; + } + if let Some(indexes) = cli_opts.disable_experimental_indexes { + profile.query.gen_opts.indexes = indexes; + } + if let Some(latency_prob) = cli_opts.latency_probability { + profile.io.latency.latency_probability = latency_prob; + } + if let Some(max_tick) = cli_opts.max_tick { + profile.io.latency.max_tick = max_tick; + } + if let Some(min_tick) = cli_opts.min_tick { + profile.io.latency.min_tick = min_tick; + } + + profile.validate().unwrap(); + + let latency_prof = &profile.io.latency; + + let io = Arc::new( + SimulatorIO::new( + seed, + opts.page_size, + latency_prof.latency_probability, + latency_prof.min_tick, + latency_prof.max_tick, + ) + .unwrap(), + ); + let db = match Database::open_file( io.clone(), db_path.to_str().unwrap(), - opts.experimental_mvcc, - opts.experimental_indexes, + profile.experimental_mvcc, + profile.query.gen_opts.indexes, ) { Ok(db) => db, Err(e) => { @@ -409,11 +429,6 @@ pub(crate) struct SimulatorOpts { pub(crate) max_interactions: u32, pub(crate) page_size: usize, pub(crate) max_time_simulation: usize, - pub(crate) latency_probability: usize, - pub(crate) experimental_mvcc: bool, - pub(crate) experimental_indexes: bool, - pub min_tick: u64, - pub max_tick: u64, } #[derive(Debug, Clone)] From b9cc556a558012506a333bd1ff3b39374873f37c Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 29 Aug 2025 12:30:09 -0300 Subject: [PATCH 15/20] adjust write heavy profile to insert more rows --- simulator/profiles/mod.rs | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/simulator/profiles/mod.rs b/simulator/profiles/mod.rs index 44c4d5bc4..ff7eebd6d 100644 --- a/simulator/profiles/mod.rs +++ b/simulator/profiles/mod.rs @@ -1,6 +1,7 @@ use std::{ fmt::Display, fs, + num::NonZeroU32, path::{Path, PathBuf}, str::FromStr, }; @@ -9,7 +10,7 @@ use anyhow::Context; use garde::Validate; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; -use sql_generation::generation::Opts; +use sql_generation::generation::{InsertOpts, LargeTableOpts, Opts, QueryOpts, TableOpts}; use strum::EnumString; use crate::profiles::{io::IOProfile, query::QueryProfile}; @@ -41,11 +42,25 @@ impl Default for Profile { impl Profile { pub fn write_heavy() -> Self { - Profile { + let profile = Profile { query: QueryProfile { gen_opts: Opts { // TODO: in the future tweak blob size for bigger inserts // TODO: increase number of rows as well + table: TableOpts { + large_table: LargeTableOpts { + large_table_prob: 0.4, + ..Default::default() + }, + ..Default::default() + }, + query: QueryOpts { + insert: InsertOpts { + min_rows: NonZeroU32::new(5).unwrap(), + max_rows: NonZeroU32::new(11).unwrap(), + }, + ..Default::default() + }, ..Default::default() }, select_weight: 30, @@ -55,12 +70,16 @@ impl Profile { ..Default::default() }, ..Default::default() - } + }; + + // Validate that we as the developer are not creating an incorrect default profile + profile.validate().unwrap(); + profile } pub fn parse_from_type(profile_type: ProfileType) -> anyhow::Result { let profile = match profile_type { - ProfileType::Default => Profile::default(), + ProfileType::Default => Self::default(), ProfileType::WriteHeavy => Self::write_heavy(), ProfileType::Custom(path) => { Self::parse(path).with_context(|| "failed to parse JSON profile")? @@ -73,7 +92,8 @@ impl Profile { pub fn parse(path: impl AsRef) -> anyhow::Result { let contents = fs::read_to_string(path)?; // use json5 so we can support comments and trailing commas - let profile = json5::from_str(&contents)?; + let profile: Profile = json5::from_str(&contents)?; + profile.validate()?; Ok(profile) } } From 9aac45c3de18697b0b0dd44914b89de034cc58c3 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 29 Aug 2025 14:51:58 -0300 Subject: [PATCH 16/20] small docs for profile --- simulator/README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/simulator/README.md b/simulator/README.md index 87d61479d..2c5519fd1 100644 --- a/simulator/README.md +++ b/simulator/README.md @@ -106,6 +106,11 @@ it should generate the necessary queries and assertions for the property. You can use the `--differential` flag to run the simulator in differential testing mode. This mode will run the same interaction plan on both Limbo and SQLite, and compare the results. It will also check for any panics or errors in either database. +## Simulator Profiles +A Simulator Profile allows you to influence query generation and I/O fault injection. You can run predefined profiles or you can create your own custom profile in a separate JSON file. You can select the profile you want by passing the `--profile` flag to he CLI. It will accept a predefined Profile name or a file path. + +For development purposes, you can create profiles to test locally in a `configs/custom` folder that is gigignored + ## Resources - [(reading) TigerBeetle Deterministic Simulation Testing](https://docs.tigerbeetle.com/about/vopr/) From 961c0cd28298f879513afe4f3e5afec762da9e42 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Sat, 30 Aug 2025 11:53:06 -0300 Subject: [PATCH 17/20] script to save JsonSchema for editor integration --- Makefile | 4 + simulator/README.md | 9 +- simulator/main.rs | 142 +++++++++++++++-------------- simulator/profiles/mod.rs | 3 +- simulator/runner/cli.rs | 2 + sql_generation/generation/table.rs | 2 +- 6 files changed, 91 insertions(+), 71 deletions(-) diff --git a/Makefile b/Makefile index d2abf3376..6070bb70e 100644 --- a/Makefile +++ b/Makefile @@ -201,3 +201,7 @@ endif fi .PHONY: merge-pr + +sim-schema: + cargo run -p limbo_sim -- print-schema > simulator/configs/custom/profile-schema.json + diff --git a/simulator/README.md b/simulator/README.md index 2c5519fd1..3de0afb99 100644 --- a/simulator/README.md +++ b/simulator/README.md @@ -109,7 +109,14 @@ You can use the `--differential` flag to run the simulator in differential testi ## Simulator Profiles A Simulator Profile allows you to influence query generation and I/O fault injection. You can run predefined profiles or you can create your own custom profile in a separate JSON file. You can select the profile you want by passing the `--profile` flag to he CLI. It will accept a predefined Profile name or a file path. -For development purposes, you can create profiles to test locally in a `configs/custom` folder that is gigignored +For development purposes, you can run `make sim-schema` to generate a JsonSchema of the `Profile` struct. Then you can create profiles to test locally in a `configs/custom` folder that is gitignored and have editor integration by adding `$schema` tag to reference the generated JsonSchema: + +```json +{ + "$schema": "./profile-schema.json", + ... +} +``` ## Resources diff --git a/simulator/main.rs b/simulator/main.rs index be6df9c37..4cd9557b0 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -39,76 +39,82 @@ fn main() -> anyhow::Result<()> { let profile = Profile::parse_from_type(cli_opts.profile.clone())?; tracing::debug!(sim_profile = ?profile); - match cli_opts.subcommand { - Some(SimulatorCommand::List) => { - let mut bugbase = BugBase::load()?; - bugbase.list_bugs() - } - Some(SimulatorCommand::Loop { n, short_circuit }) => { - banner(); - for i in 0..n { - println!("iteration {i}"); - let result = testing_main(&cli_opts, &profile); - if result.is_err() && short_circuit { - println!("short circuiting after {i} iterations"); - return result; - } else if result.is_err() { - println!("iteration {i} failed"); - } else { - println!("iteration {i} succeeded"); - } + if let Some(ref command) = cli_opts.subcommand { + match command { + SimulatorCommand::List => { + let mut bugbase = BugBase::load()?; + bugbase.list_bugs() + } + SimulatorCommand::Loop { n, short_circuit } => { + banner(); + for i in 0..*n { + println!("iteration {i}"); + let result = testing_main(&cli_opts, &profile); + if result.is_err() && *short_circuit { + println!("short circuiting after {i} iterations"); + return result; + } else if result.is_err() { + println!("iteration {i} failed"); + } else { + println!("iteration {i} succeeded"); + } + } + Ok(()) + } + SimulatorCommand::Test { filter } => { + let mut bugbase = BugBase::load()?; + let bugs = bugbase.load_bugs()?; + let mut bugs = bugs + .into_iter() + .flat_map(|bug| { + let runs = bug + .runs + .into_iter() + .filter_map(|run| run.error.clone().map(|_| run)) + .filter(|run| run.error.as_ref().unwrap().contains(filter)) + .map(|run| run.cli_options) + .collect::>(); + + runs.into_iter() + .map(|mut cli_opts| { + cli_opts.seed = Some(bug.seed); + cli_opts.load = None; + cli_opts + }) + .collect::>() + }) + .collect::>(); + + bugs.sort(); + bugs.dedup_by(|a, b| a == b); + + println!( + "found {} previously triggered configurations with {}", + bugs.len(), + filter + ); + + let results = bugs + .into_iter() + .map(|cli_opts| testing_main(&cli_opts, &profile)) + .collect::>(); + + let (successes, failures): (Vec<_>, Vec<_>) = + results.into_iter().partition(|result| result.is_ok()); + println!("the results of the change are:"); + println!("\t{} successful runs", successes.len()); + println!("\t{} failed runs", failures.len()); + Ok(()) + } + SimulatorCommand::PrintSchema => { + let schema = schemars::schema_for!(crate::Profile); + println!("{}", serde_json::to_string_pretty(&schema).unwrap()); + Ok(()) } - Ok(()) - } - Some(SimulatorCommand::Test { filter }) => { - let mut bugbase = BugBase::load()?; - let bugs = bugbase.load_bugs()?; - let mut bugs = bugs - .into_iter() - .flat_map(|bug| { - let runs = bug - .runs - .into_iter() - .filter_map(|run| run.error.clone().map(|_| run)) - .filter(|run| run.error.as_ref().unwrap().contains(&filter)) - .map(|run| run.cli_options) - .collect::>(); - - runs.into_iter() - .map(|mut cli_opts| { - cli_opts.seed = Some(bug.seed); - cli_opts.load = None; - cli_opts - }) - .collect::>() - }) - .collect::>(); - - bugs.sort(); - bugs.dedup_by(|a, b| a == b); - - println!( - "found {} previously triggered configurations with {}", - bugs.len(), - filter - ); - - let results = bugs - .into_iter() - .map(|cli_opts| testing_main(&cli_opts, &profile)) - .collect::>(); - - let (successes, failures): (Vec<_>, Vec<_>) = - results.into_iter().partition(|result| result.is_ok()); - println!("the results of the change are:"); - println!("\t{} successful runs", successes.len()); - println!("\t{} failed runs", failures.len()); - Ok(()) - } - None => { - banner(); - testing_main(&cli_opts, &profile) } + } else { + banner(); + testing_main(&cli_opts, &profile) } } diff --git a/simulator/profiles/mod.rs b/simulator/profiles/mod.rs index ff7eebd6d..90838f488 100644 --- a/simulator/profiles/mod.rs +++ b/simulator/profiles/mod.rs @@ -19,7 +19,8 @@ pub mod io; pub mod query; #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Validate)] -#[serde(deny_unknown_fields, default)] +#[serde(default)] +#[schemars(deny_unknown_fields)] pub struct Profile { #[garde(skip)] /// Experimental MVCC feature diff --git a/simulator/runner/cli.rs b/simulator/runner/cli.rs index 9891ee7b2..bfd71e8c9 100644 --- a/simulator/runner/cli.rs +++ b/simulator/runner/cli.rs @@ -165,6 +165,8 @@ pub enum SimulatorCommand { )] filter: String, }, + /// Print profile Json Schema + PrintSchema, } impl SimulatorCLI { diff --git a/sql_generation/generation/table.rs b/sql_generation/generation/table.rs index 66f02250c..0ea821d9b 100644 --- a/sql_generation/generation/table.rs +++ b/sql_generation/generation/table.rs @@ -39,7 +39,7 @@ impl Arbitrary for Table { Table { rows: Vec::new(), name, - columns: Vec::from_iter(column_set.into_iter()), + columns: Vec::from_iter(column_set), indexes: vec![], } } From 5881ee71d616fac7caeed6e2e500449b02219d13 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Sat, 30 Aug 2025 12:20:19 -0300 Subject: [PATCH 18/20] clippy --- simulator/generation/property.rs | 5 ++--- simulator/profiles/mod.rs | 1 + simulator/runner/cli.rs | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/simulator/generation/property.rs b/simulator/generation/property.rs index ab11a2d0e..7129f7989 100644 --- a/simulator/generation/property.rs +++ b/simulator/generation/property.rs @@ -972,7 +972,7 @@ impl Property { } fn assert_all_table_values(tables: &[String]) -> impl Iterator + use<'_> { - let checks = tables.iter().flat_map(|table| { + tables.iter().flat_map(|table| { let select = Interaction::Query(Query::Select(Select::simple( table.clone(), Predicate::true_(), @@ -1026,8 +1026,7 @@ fn assert_all_table_values(tables: &[String]) -> impl Iterator Self { Self { diff --git a/simulator/runner/cli.rs b/simulator/runner/cli.rs index bfd71e8c9..daa00be38 100644 --- a/simulator/runner/cli.rs +++ b/simulator/runner/cli.rs @@ -252,7 +252,7 @@ impl TypedValueParser for ProfileTypeParser { use strum::VariantNames; Some(Box::new( Self::Value::VARIANTS - .into_iter() + .iter() .map(|variant| { // Custom variant should be listed as a Custom path if variant.eq_ignore_ascii_case("custom") { @@ -261,7 +261,7 @@ impl TypedValueParser for ProfileTypeParser { variant } }) - .map(|s| PossibleValue::new(s)), + .map(PossibleValue::new), )) } } From 4f2bc96dbeeb4bd0184fe7070acea1167128d7a4 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Sat, 30 Aug 2025 12:48:45 -0300 Subject: [PATCH 19/20] add Faultless profile --- simulator/generation/property.rs | 5 ++++- simulator/profiles/io.rs | 28 ++++++++++++++++++++++++++++ simulator/profiles/mod.rs | 29 ++++++++++++++++++++++++++++- 3 files changed, 60 insertions(+), 2 deletions(-) diff --git a/simulator/generation/property.rs b/simulator/generation/property.rs index 7129f7989..ca75477ac 100644 --- a/simulator/generation/property.rs +++ b/simulator/generation/property.rs @@ -1533,7 +1533,10 @@ impl ArbitraryFrom<(&SimulatorEnv, &InteractionStats)> for Property { Box::new(|rng: &mut R| property_fsync_no_wait(rng, env, &remaining_)), ), ( - if env.profile.io.enable && !env.opts.disable_faulty_query { + if env.profile.io.enable + && env.profile.io.fault.enable + && !env.opts.disable_faulty_query + { 20 } else { 0 diff --git a/simulator/profiles/io.rs b/simulator/profiles/io.rs index e49d6d1c9..4bf44683a 100644 --- a/simulator/profiles/io.rs +++ b/simulator/profiles/io.rs @@ -11,6 +11,8 @@ pub struct IOProfile { pub enable: bool, #[garde(dive)] pub latency: LatencyProfile, + #[garde(dive)] + pub fault: FaultProfile, // TODO: expand here with header corruption options and faults on specific IO operations } @@ -19,6 +21,7 @@ impl Default for IOProfile { Self { enable: true, latency: Default::default(), + fault: Default::default(), } } } @@ -49,3 +52,28 @@ impl Default for LatencyProfile { } } } + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Validate)] +#[serde(deny_unknown_fields, default)] +pub struct FaultProfile { + #[garde(skip)] + pub enable: bool, + // TODO: modify SimIo impls to have a FaultProfile inside so they can skip faults depending on the profile + #[garde(skip)] + pub read: bool, + #[garde(skip)] + pub write: bool, + #[garde(skip)] + pub sync: bool, +} + +impl Default for FaultProfile { + fn default() -> Self { + Self { + enable: true, + read: true, + write: true, + sync: true, + } + } +} diff --git a/simulator/profiles/mod.rs b/simulator/profiles/mod.rs index 20bb3b5ea..633bb3750 100644 --- a/simulator/profiles/mod.rs +++ b/simulator/profiles/mod.rs @@ -13,7 +13,10 @@ use serde::{Deserialize, Serialize}; use sql_generation::generation::{InsertOpts, LargeTableOpts, Opts, QueryOpts, TableOpts}; use strum::EnumString; -use crate::profiles::{io::IOProfile, query::QueryProfile}; +use crate::profiles::{ + io::{FaultProfile, IOProfile}, + query::QueryProfile, +}; pub mod io; pub mod query; @@ -79,10 +82,33 @@ impl Profile { profile } + pub fn faultless() -> Self { + let profile = Profile { + io: IOProfile { + fault: FaultProfile { + enable: false, + ..Default::default() + }, + ..Default::default() + }, + query: QueryProfile { + create_table_weight: 0, + create_index_weight: 0, + ..Default::default() + }, + ..Default::default() + }; + + // Validate that we as the developer are not creating an incorrect default profile + profile.validate().unwrap(); + profile + } + pub fn parse_from_type(profile_type: ProfileType) -> anyhow::Result { let profile = match profile_type { ProfileType::Default => Self::default(), ProfileType::WriteHeavy => Self::write_heavy(), + ProfileType::Faultless => Self::faultless(), ProfileType::Custom(path) => { Self::parse(path).with_context(|| "failed to parse JSON profile")? } @@ -120,6 +146,7 @@ pub enum ProfileType { #[default] Default, WriteHeavy, + Faultless, #[strum(disabled)] Custom(PathBuf), } From 2cc0bb12d7c57110c100a046c7b3ac9ecfbd546b Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Sat, 30 Aug 2025 13:08:08 -0300 Subject: [PATCH 20/20] add Simulator profiles to CI --- .github/workflows/rust.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index b354dd8e3..0eef76c70 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -64,8 +64,12 @@ jobs: - uses: useblacksmith/rust-cache@v3 with: prefix-key: "v1-rust" # can be updated if we need to reset caches due to non-trivial change in the dependencies (for example, custom env var were set for single workspace project) - - name: Install the project + - name: Simulator default run: ./scripts/run-sim --maximum-tests 1000 --min-tick 10 --max-tick 50 loop -n 10 -s + - name: Simulator InsertHeavy + run: ./scripts/run-sim --maximum-tests 1000 --min-tick 10 --max-tick 50 --profile write_heavy loop -n 10 -s + - name: Simulator Faultless + run: ./scripts/run-sim --maximum-tests 1000 --min-tick 10 --max-tick 50 --profile faultless loop -n 10 -s test-limbo: runs-on: blacksmith-4vcpu-ubuntu-2404