From b6f94b2fa1e27a1080ec32711c7355cceb74ef25 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Thu, 9 Oct 2025 17:19:47 -0300 Subject: [PATCH 1/3] remove dead code in sim --- simulator/generation/plan.rs | 28 ----------- simulator/generation/property.rs | 81 ++------------------------------ simulator/main.rs | 3 +- simulator/model/mod.rs | 10 ---- simulator/runner/bugbase.rs | 4 ++ simulator/runner/env.rs | 22 --------- simulator/runner/execution.rs | 1 + simulator/runner/memory/io.rs | 6 +-- simulator/runner/mod.rs | 2 +- 9 files changed, 14 insertions(+), 143 deletions(-) diff --git a/simulator/generation/plan.rs b/simulator/generation/plan.rs index c8925cd59..337978569 100644 --- a/simulator/generation/plan.rs +++ b/simulator/generation/plan.rs @@ -63,11 +63,6 @@ impl InteractionPlan { Self { plan, mvcc, len } } - #[inline] - pub fn plan(&self) -> &[Interactions] { - &self.plan - } - /// Length of interactions that are not transaction statements #[inline] pub fn len(&self) -> usize { @@ -629,14 +624,6 @@ impl InteractionsType { } impl Interactions { - pub(crate) fn name(&self) -> Option<&str> { - match &self.interactions { - InteractionsType::Property(property) => Some(property.name()), - InteractionsType::Query(_) => None, - InteractionsType::Fault(_) => None, - } - } - pub(crate) fn interactions(&self) -> Vec { match &self.interactions { InteractionsType::Property(property) => property.interactions(self.connection_index), @@ -726,17 +713,6 @@ pub(crate) struct InteractionStats { pub(crate) rollback_count: u32, } -impl InteractionStats { - pub fn total_writes(&self) -> u32 { - self.insert_count - + self.delete_count - + self.update_count - + self.create_count - + self.create_index_count - + self.drop_count - } -} - impl Display for InteractionStats { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( @@ -758,10 +734,6 @@ impl Display for InteractionStats { type AssertionFunc = dyn Fn(&Vec, &mut SimulatorEnv) -> Result>; -enum AssertionAST { - Pick(), -} - #[derive(Clone)] pub struct Assertion { pub func: Rc, diff --git a/simulator/generation/property.rs b/simulator/generation/property.rs index 47b352406..b67026dae 100644 --- a/simulator/generation/property.rs +++ b/simulator/generation/property.rs @@ -8,7 +8,7 @@ use rand::distr::{Distribution, weighted::WeightedIndex}; use serde::{Deserialize, Serialize}; use sql_generation::{ - generation::{Arbitrary, ArbitraryFrom, GenerationContext, Opts, pick, pick_index}, + generation::{Arbitrary, ArbitraryFrom, GenerationContext, pick, pick_index}, model::{ query::{ Create, Delete, Drop, Insert, Select, @@ -17,7 +17,7 @@ use sql_generation::{ transaction::{Begin, Commit, Rollback}, update::Update, }, - table::{SimValue, Table}, + table::SimValue, }, }; use strum::IntoEnumIterator; @@ -27,40 +27,15 @@ use turso_parser::ast::{self, Distinctness}; use crate::{ common::print_diff, generation::{ - Shadow as _, WeightedDistribution, - plan::InteractionType, - query::{QueryDistribution, possible_queries}, + Shadow as _, WeightedDistribution, plan::InteractionType, query::QueryDistribution, }, model::{Query, QueryCapabilities, QueryDiscriminants}, profiles::query::QueryProfile, - runner::env::{ShadowTablesMut, SimulatorEnv}, + runner::env::SimulatorEnv, }; use super::plan::{Assertion, Interaction, InteractionStats, ResultSet}; -#[derive(Debug, Clone, Copy)] -struct PropertyGenContext<'a> { - tables: &'a Vec, - opts: &'a sql_generation::generation::Opts, -} - -impl<'a> PropertyGenContext<'a> { - #[inline] - fn new(tables: &'a Vec, opts: &'a Opts) -> Self { - Self { tables, opts } - } -} - -impl<'a> GenerationContext for PropertyGenContext<'a> { - fn tables(&self) -> &Vec { - self.tables - } - - fn opts(&self) -> &sql_generation::generation::Opts { - self.opts - } -} - /// Properties are representations of executable specifications /// about the database behavior. #[derive(Debug, Clone, Serialize, Deserialize, strum::EnumDiscriminants)] @@ -1925,11 +1900,6 @@ impl PropertyDiscriminants { } } -pub fn possiple_properties(tables: &[Table]) -> Vec { - let queries = possible_queries(tables); - PropertyDiscriminants::can_generate(queries) -} - pub(super) struct PropertyDistribution<'a> { properties: Vec, weights: WeightedIndex, @@ -1995,49 +1965,6 @@ impl<'a> ArbitraryFrom<&PropertyDistribution<'a>> for Property { } } -fn generate_queries( - rng: &mut R, - ctx: &impl GenerationContext, - amount: usize, - init_queries: &[&Query], - func: F, -) -> Vec -where - F: Fn(&mut R, PropertyGenContext) -> Option, -{ - // Create random queries respecting the constraints - let mut queries = Vec::new(); - - let range = 0..amount; - if !range.is_empty() { - let mut tmp_tables = ctx.tables().clone(); - - for query in init_queries { - tmp_shadow(&mut tmp_tables, query); - } - - for _ in range { - let tmp_ctx = PropertyGenContext::new(&tmp_tables, ctx.opts()); - - let Some(query) = func(rng, tmp_ctx) else { - continue; - }; - - tmp_shadow(&mut tmp_tables, &query); - - queries.push(query); - } - } - queries -} - -fn tmp_shadow(tmp_tables: &mut Vec
, query: &Query) { - let mut tx_tables = None; - let mut tmp_shadow_tables = ShadowTablesMut::new(tmp_tables, &mut tx_tables); - - let _ = query.shadow(&mut tmp_shadow_tables); -} - fn print_row(row: &[SimValue]) -> String { row.iter() .map(|v| match &v.0 { diff --git a/simulator/main.rs b/simulator/main.rs index 15376a13b..6a5d097b8 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -1,4 +1,4 @@ -#![allow(clippy::arc_with_non_send_sync, dead_code)] +#![allow(clippy::arc_with_non_send_sync)] use anyhow::anyhow; use clap::Parser; use generation::plan::{InteractionPlan, InteractionPlanState}; @@ -421,6 +421,7 @@ enum SandboxedResult { error: String, last_execution: Execution, }, + #[expect(dead_code)] FoundBug { error: String, history: ExecutionHistory, diff --git a/simulator/model/mod.rs b/simulator/model/mod.rs index 9e3d29db2..3f8a4ec9d 100644 --- a/simulator/model/mod.rs +++ b/simulator/model/mod.rs @@ -204,16 +204,6 @@ impl QueryDiscriminants { QueryDiscriminants::Drop, QueryDiscriminants::CreateIndex, ]; - - #[inline] - pub fn is_transaction(&self) -> bool { - matches!(self, Self::Begin | Self::Commit | Self::Rollback) - } - - #[inline] - pub fn is_ddl(&self) -> bool { - matches!(self, Self::Create | Self::CreateIndex | Self::Drop) - } } impl Shadow for Create { diff --git a/simulator/runner/bugbase.rs b/simulator/runner/bugbase.rs index 179c292f1..dd0d6f432 100644 --- a/simulator/runner/bugbase.rs +++ b/simulator/runner/bugbase.rs @@ -49,6 +49,7 @@ pub(crate) struct BugRun { } impl Bug { + #[expect(dead_code)] /// Check if the bug is loaded. pub(crate) fn is_loaded(&self) -> bool { match self { @@ -130,6 +131,7 @@ impl BugBase { Err(anyhow!("failed to create bug base")) } + #[expect(dead_code)] /// Load the bug base from one of the potential paths. pub(crate) fn interactive_load() -> anyhow::Result { let potential_paths = vec![ @@ -338,6 +340,7 @@ impl BugBase { } } + #[expect(dead_code)] pub(crate) fn mark_successful_run( &mut self, seed: u64, @@ -434,6 +437,7 @@ impl BugBase { } impl BugBase { + #[expect(dead_code)] /// Get the path to the bug base directory. pub(crate) fn path(&self) -> &PathBuf { &self.path diff --git a/simulator/runner/env.rs b/simulator/runner/env.rs index 300b08c84..79497c38b 100644 --- a/simulator/runner/env.rs +++ b/simulator/runner/env.rs @@ -83,18 +83,6 @@ impl<'a, 'b> ShadowTablesMut<'a> where 'a: 'b, { - /// Creation of [ShadowTablesMut] outside of [SimulatorEnv] should be done sparingly and carefully. - /// Should only need to call this function if we need to do shadowing in a temporary model table - pub fn new( - commited_tables: &'a mut Vec
, - transaction_tables: &'a mut Option, - ) -> Self { - ShadowTablesMut { - commited_tables, - transaction_tables, - } - } - fn tables(&'a self) -> &'a Vec
{ self.transaction_tables .as_ref() @@ -312,7 +300,6 @@ impl SimulatorEnv { seed, ticks: rng .random_range(cli_opts.minimum_tests as usize..=cli_opts.maximum_tests as usize), - max_tables: rng.random_range(0..128), disable_select_optimizer: cli_opts.disable_select_optimizer, disable_insert_values_select: cli_opts.disable_insert_values_select, disable_double_create_failure: cli_opts.disable_double_create_failure, @@ -528,14 +515,6 @@ impl SimulatorEnv { } } -pub trait ConnectionTrait -where - Self: std::marker::Sized + Clone, -{ - fn is_connected(&self) -> bool; - fn disconnect(&mut self); -} - pub(crate) enum SimConnection { LimboConnection(Arc), SQLiteConnection(rusqlite::Connection), @@ -584,7 +563,6 @@ impl Display for SimConnection { pub(crate) struct SimulatorOpts { pub(crate) seed: u64, pub(crate) ticks: usize, - pub(crate) max_tables: usize, pub(crate) disable_select_optimizer: bool, pub(crate) disable_insert_values_select: bool, diff --git a/simulator/runner/execution.rs b/simulator/runner/execution.rs index e3cfef375..7bc9b40e4 100644 --- a/simulator/runner/execution.rs +++ b/simulator/runner/execution.rs @@ -46,6 +46,7 @@ impl ExecutionHistory { } pub struct ExecutionResult { + #[expect(dead_code)] pub history: ExecutionHistory, pub error: Option, } diff --git a/simulator/runner/memory/io.rs b/simulator/runner/memory/io.rs index 007398a10..557ada9a2 100644 --- a/simulator/runner/memory/io.rs +++ b/simulator/runner/memory/io.rs @@ -1,4 +1,4 @@ -use std::cell::{Cell, RefCell}; +use std::cell::RefCell; use std::sync::Arc; use indexmap::IndexMap; @@ -121,7 +121,7 @@ pub struct MemorySimIO { timeouts: CallbackQueue, pub files: RefCell>>, pub rng: RefCell, - pub nr_run_once_faults: Cell, + #[expect(dead_code)] pub page_size: usize, seed: u64, latency_probability: u8, @@ -141,13 +141,11 @@ impl MemorySimIO { ) -> Self { let files = RefCell::new(IndexMap::new()); let rng = RefCell::new(ChaCha8Rng::seed_from_u64(seed)); - let nr_run_once_faults = Cell::new(0); Self { callbacks: Arc::new(Mutex::new(Vec::new())), timeouts: Arc::new(Mutex::new(Vec::new())), files, rng, - nr_run_once_faults, page_size, seed, latency_probability, diff --git a/simulator/runner/mod.rs b/simulator/runner/mod.rs index 7afbaa720..0f60c95fb 100644 --- a/simulator/runner/mod.rs +++ b/simulator/runner/mod.rs @@ -5,7 +5,7 @@ pub mod differential; pub mod doublecheck; pub mod env; pub mod execution; -#[allow(dead_code)] +#[expect(dead_code)] pub mod file; pub mod io; pub mod memory; From fb6c5ffcff295133963a90b27c6f86abb35ad6bf Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Thu, 9 Oct 2025 16:48:57 -0300 Subject: [PATCH 2/3] move SimValue generation to separate files to facilitate generation of new types of values in the future --- sql_generation/generation/mod.rs | 1 + sql_generation/generation/predicate/binary.rs | 2 +- sql_generation/generation/table.rs | 232 +----------------- sql_generation/generation/value/cmp.rs | 146 +++++++++++ sql_generation/generation/value/mod.rs | 58 +++++ sql_generation/generation/value/pattern.rs | 44 ++++ 6 files changed, 252 insertions(+), 231 deletions(-) create mode 100644 sql_generation/generation/value/cmp.rs create mode 100644 sql_generation/generation/value/mod.rs create mode 100644 sql_generation/generation/value/pattern.rs diff --git a/sql_generation/generation/mod.rs b/sql_generation/generation/mod.rs index 1292b3448..e67dc482b 100644 --- a/sql_generation/generation/mod.rs +++ b/sql_generation/generation/mod.rs @@ -8,6 +8,7 @@ pub mod opts; pub mod predicate; pub mod query; pub mod table; +pub mod value; pub use opts::*; diff --git a/sql_generation/generation/predicate/binary.rs b/sql_generation/generation/predicate/binary.rs index 37b2e4e93..e3b52d5ec 100644 --- a/sql_generation/generation/predicate/binary.rs +++ b/sql_generation/generation/predicate/binary.rs @@ -6,7 +6,7 @@ use crate::{ generation::{ backtrack, one_of, pick, predicate::{CompoundPredicate, SimplePredicate}, - table::{GTValue, LTValue, LikeValue}, + value::{GTValue, LTValue, LikeValue}, ArbitraryFrom, ArbitraryFromMaybe as _, GenerationContext, }, model::{ diff --git a/sql_generation/generation/table.rs b/sql_generation/generation/table.rs index 6e55942c3..2ced09fee 100644 --- a/sql_generation/generation/table.rs +++ b/sql_generation/generation/table.rs @@ -2,14 +2,9 @@ use std::sync::atomic::{AtomicU64, Ordering}; use indexmap::IndexSet; use rand::Rng; -use turso_core::Value; -use crate::generation::{ - gen_random_text, pick, readable_name_custom, Arbitrary, ArbitraryFrom, GenerationContext, -}; -use crate::model::table::{Column, ColumnType, Name, SimValue, Table}; - -use super::ArbitraryFromMaybe; +use crate::generation::{pick, readable_name_custom, Arbitrary, GenerationContext}; +use crate::model::table::{Column, ColumnType, Name, Table}; static COUNTER: AtomicU64 = AtomicU64::new(0); @@ -67,226 +62,3 @@ impl Arbitrary for ColumnType { pick(&[Self::Integer, Self::Float, Self::Text, Self::Blob], rng).to_owned() } } - -impl ArbitraryFrom<&Table> for Vec { - fn arbitrary_from( - rng: &mut R, - context: &C, - table: &Table, - ) -> Self { - let mut row = Vec::new(); - for column in table.columns.iter() { - let value = SimValue::arbitrary_from(rng, context, &column.column_type); - row.push(value); - } - row - } -} - -impl ArbitraryFrom<&Vec<&SimValue>> for SimValue { - fn arbitrary_from( - rng: &mut R, - _context: &C, - values: &Vec<&Self>, - ) -> Self { - if values.is_empty() { - return Self(Value::Null); - } - - pick(values, rng).to_owned().clone() - } -} - -impl ArbitraryFrom<&ColumnType> for SimValue { - fn arbitrary_from( - rng: &mut R, - _context: &C, - column_type: &ColumnType, - ) -> Self { - let value = match column_type { - ColumnType::Integer => Value::Integer(rng.random_range(i64::MIN..i64::MAX)), - ColumnType::Float => Value::Float(rng.random_range(-1e10..1e10)), - ColumnType::Text => Value::build_text(gen_random_text(rng)), - ColumnType::Blob => Value::Blob(gen_random_text(rng).as_bytes().to_vec()), - }; - SimValue(value) - } -} - -pub struct LTValue(pub SimValue); - -impl ArbitraryFrom<&Vec<&SimValue>> for LTValue { - fn arbitrary_from( - rng: &mut R, - context: &C, - values: &Vec<&SimValue>, - ) -> Self { - if values.is_empty() { - return Self(SimValue(Value::Null)); - } - - // Get value less than all values - let value = Value::exec_min(values.iter().map(|value| &value.0)); - Self::arbitrary_from(rng, context, &SimValue(value)) - } -} - -impl ArbitraryFrom<&SimValue> for LTValue { - fn arbitrary_from( - rng: &mut R, - _context: &C, - value: &SimValue, - ) -> Self { - let new_value = match &value.0 { - Value::Integer(i) => Value::Integer(rng.random_range(i64::MIN..*i - 1)), - Value::Float(f) => Value::Float(f - rng.random_range(0.0..1e10)), - value @ Value::Text(..) => { - // Either shorten the string, or make at least one character smaller and mutate the rest - let mut t = value.to_string(); - if rng.random_bool(0.01) { - t.pop(); - Value::build_text(t) - } else { - let mut t = t.chars().map(|c| c as u32).collect::>(); - let index = rng.random_range(0..t.len()); - t[index] -= 1; - // Mutate the rest of the string - for val in t.iter_mut().skip(index + 1) { - *val = rng.random_range('a' as u32..='z' as u32); - } - let t = t - .into_iter() - .map(|c| char::from_u32(c).unwrap_or('z')) - .collect::(); - Value::build_text(t) - } - } - Value::Blob(b) => { - // Either shorten the blob, or make at least one byte smaller and mutate the rest - let mut b = b.clone(); - if rng.random_bool(0.01) { - b.pop(); - Value::Blob(b) - } else { - let index = rng.random_range(0..b.len()); - b[index] -= 1; - // Mutate the rest of the blob - for val in b.iter_mut().skip(index + 1) { - *val = rng.random_range(0..=255); - } - Value::Blob(b) - } - } - _ => unreachable!(), - }; - Self(SimValue(new_value)) - } -} - -pub struct GTValue(pub SimValue); - -impl ArbitraryFrom<&Vec<&SimValue>> for GTValue { - fn arbitrary_from( - rng: &mut R, - context: &C, - values: &Vec<&SimValue>, - ) -> Self { - if values.is_empty() { - return Self(SimValue(Value::Null)); - } - // Get value greater than all values - let value = Value::exec_max(values.iter().map(|value| &value.0)); - - Self::arbitrary_from(rng, context, &SimValue(value)) - } -} - -impl ArbitraryFrom<&SimValue> for GTValue { - fn arbitrary_from( - rng: &mut R, - _context: &C, - value: &SimValue, - ) -> Self { - let new_value = match &value.0 { - Value::Integer(i) => Value::Integer(rng.random_range(*i..i64::MAX)), - Value::Float(f) => Value::Float(rng.random_range(*f..1e10)), - value @ Value::Text(..) => { - // Either lengthen the string, or make at least one character smaller and mutate the rest - let mut t = value.to_string(); - if rng.random_bool(0.01) { - t.push(rng.random_range(0..=255) as u8 as char); - Value::build_text(t) - } else { - let mut t = t.chars().map(|c| c as u32).collect::>(); - let index = rng.random_range(0..t.len()); - t[index] += 1; - // Mutate the rest of the string - for val in t.iter_mut().skip(index + 1) { - *val = rng.random_range('a' as u32..='z' as u32); - } - let t = t - .into_iter() - .map(|c| char::from_u32(c).unwrap_or('a')) - .collect::(); - Value::build_text(t) - } - } - Value::Blob(b) => { - // Either lengthen the blob, or make at least one byte smaller and mutate the rest - let mut b = b.clone(); - if rng.random_bool(0.01) { - b.push(rng.random_range(0..=255)); - Value::Blob(b) - } else { - let index = rng.random_range(0..b.len()); - b[index] += 1; - // Mutate the rest of the blob - for val in b.iter_mut().skip(index + 1) { - *val = rng.random_range(0..=255); - } - Value::Blob(b) - } - } - _ => unreachable!(), - }; - Self(SimValue(new_value)) - } -} - -pub struct LikeValue(pub SimValue); - -impl ArbitraryFromMaybe<&SimValue> for LikeValue { - fn arbitrary_from_maybe( - rng: &mut R, - _context: &C, - value: &SimValue, - ) -> Option { - match &value.0 { - value @ Value::Text(..) => { - let t = value.to_string(); - let mut t = t.chars().collect::>(); - // Remove a number of characters, either insert `_` for each character removed, or - // insert one `%` for the whole substring - let mut i = 0; - while i < t.len() { - if rng.random_bool(0.1) { - t[i] = '_'; - } else if rng.random_bool(0.05) { - t[i] = '%'; - // skip a list of characters - for _ in 0..rng.random_range(0..=3.min(t.len() - i - 1)) { - t.remove(i + 1); - } - } - i += 1; - } - let index = rng.random_range(0..t.len()); - t.insert(index, '%'); - Some(Self(SimValue(Value::build_text( - t.into_iter().collect::(), - )))) - } - _ => None, - } - } -} diff --git a/sql_generation/generation/value/cmp.rs b/sql_generation/generation/value/cmp.rs new file mode 100644 index 000000000..567a59a5e --- /dev/null +++ b/sql_generation/generation/value/cmp.rs @@ -0,0 +1,146 @@ +use turso_core::Value; + +use crate::{ + generation::{ArbitraryFrom, GenerationContext}, + model::table::SimValue, +}; + +pub struct LTValue(pub SimValue); + +impl ArbitraryFrom<&Vec<&SimValue>> for LTValue { + fn arbitrary_from( + rng: &mut R, + context: &C, + values: &Vec<&SimValue>, + ) -> Self { + if values.is_empty() { + return Self(SimValue(Value::Null)); + } + + // Get value less than all values + let value = Value::exec_min(values.iter().map(|value| &value.0)); + Self::arbitrary_from(rng, context, &SimValue(value)) + } +} + +impl ArbitraryFrom<&SimValue> for LTValue { + fn arbitrary_from( + rng: &mut R, + _context: &C, + value: &SimValue, + ) -> Self { + let new_value = match &value.0 { + Value::Integer(i) => Value::Integer(rng.random_range(i64::MIN..*i - 1)), + Value::Float(f) => Value::Float(f - rng.random_range(0.0..1e10)), + value @ Value::Text(..) => { + // Either shorten the string, or make at least one character smaller and mutate the rest + let mut t = value.to_string(); + if rng.random_bool(0.01) { + t.pop(); + Value::build_text(t) + } else { + let mut t = t.chars().map(|c| c as u32).collect::>(); + let index = rng.random_range(0..t.len()); + t[index] -= 1; + // Mutate the rest of the string + for val in t.iter_mut().skip(index + 1) { + *val = rng.random_range('a' as u32..='z' as u32); + } + let t = t + .into_iter() + .map(|c| char::from_u32(c).unwrap_or('z')) + .collect::(); + Value::build_text(t) + } + } + Value::Blob(b) => { + // Either shorten the blob, or make at least one byte smaller and mutate the rest + let mut b = b.clone(); + if rng.random_bool(0.01) { + b.pop(); + Value::Blob(b) + } else { + let index = rng.random_range(0..b.len()); + b[index] -= 1; + // Mutate the rest of the blob + for val in b.iter_mut().skip(index + 1) { + *val = rng.random_range(0..=255); + } + Value::Blob(b) + } + } + _ => unreachable!(), + }; + Self(SimValue(new_value)) + } +} + +pub struct GTValue(pub SimValue); + +impl ArbitraryFrom<&Vec<&SimValue>> for GTValue { + fn arbitrary_from( + rng: &mut R, + context: &C, + values: &Vec<&SimValue>, + ) -> Self { + if values.is_empty() { + return Self(SimValue(Value::Null)); + } + // Get value greater than all values + let value = Value::exec_max(values.iter().map(|value| &value.0)); + + Self::arbitrary_from(rng, context, &SimValue(value)) + } +} + +impl ArbitraryFrom<&SimValue> for GTValue { + fn arbitrary_from( + rng: &mut R, + _context: &C, + value: &SimValue, + ) -> Self { + let new_value = match &value.0 { + Value::Integer(i) => Value::Integer(rng.random_range(*i..i64::MAX)), + Value::Float(f) => Value::Float(rng.random_range(*f..1e10)), + value @ Value::Text(..) => { + // Either lengthen the string, or make at least one character smaller and mutate the rest + let mut t = value.to_string(); + if rng.random_bool(0.01) { + t.push(rng.random_range(0..=255) as u8 as char); + Value::build_text(t) + } else { + let mut t = t.chars().map(|c| c as u32).collect::>(); + let index = rng.random_range(0..t.len()); + t[index] += 1; + // Mutate the rest of the string + for val in t.iter_mut().skip(index + 1) { + *val = rng.random_range('a' as u32..='z' as u32); + } + let t = t + .into_iter() + .map(|c| char::from_u32(c).unwrap_or('a')) + .collect::(); + Value::build_text(t) + } + } + Value::Blob(b) => { + // Either lengthen the blob, or make at least one byte smaller and mutate the rest + let mut b = b.clone(); + if rng.random_bool(0.01) { + b.push(rng.random_range(0..=255)); + Value::Blob(b) + } else { + let index = rng.random_range(0..b.len()); + b[index] += 1; + // Mutate the rest of the blob + for val in b.iter_mut().skip(index + 1) { + *val = rng.random_range(0..=255); + } + Value::Blob(b) + } + } + _ => unreachable!(), + }; + Self(SimValue(new_value)) + } +} diff --git a/sql_generation/generation/value/mod.rs b/sql_generation/generation/value/mod.rs new file mode 100644 index 000000000..e0c98ad84 --- /dev/null +++ b/sql_generation/generation/value/mod.rs @@ -0,0 +1,58 @@ +use rand::Rng; +use turso_core::Value; + +use crate::{ + generation::{gen_random_text, pick, ArbitraryFrom, GenerationContext}, + model::table::{ColumnType, SimValue, Table}, +}; + +mod cmp; +mod pattern; + +pub use cmp::{GTValue, LTValue}; +pub use pattern::LikeValue; + +impl ArbitraryFrom<&Table> for Vec { + fn arbitrary_from( + rng: &mut R, + context: &C, + table: &Table, + ) -> Self { + let mut row = Vec::new(); + for column in table.columns.iter() { + let value = SimValue::arbitrary_from(rng, context, &column.column_type); + row.push(value); + } + row + } +} + +impl ArbitraryFrom<&Vec<&SimValue>> for SimValue { + fn arbitrary_from( + rng: &mut R, + _context: &C, + values: &Vec<&Self>, + ) -> Self { + if values.is_empty() { + return Self(Value::Null); + } + + pick(values, rng).to_owned().clone() + } +} + +impl ArbitraryFrom<&ColumnType> for SimValue { + fn arbitrary_from( + rng: &mut R, + _context: &C, + column_type: &ColumnType, + ) -> Self { + let value = match column_type { + ColumnType::Integer => Value::Integer(rng.random_range(i64::MIN..i64::MAX)), + ColumnType::Float => Value::Float(rng.random_range(-1e10..1e10)), + ColumnType::Text => Value::build_text(gen_random_text(rng)), + ColumnType::Blob => Value::Blob(gen_random_text(rng).as_bytes().to_vec()), + }; + SimValue(value) + } +} diff --git a/sql_generation/generation/value/pattern.rs b/sql_generation/generation/value/pattern.rs new file mode 100644 index 000000000..3bf0d7a9f --- /dev/null +++ b/sql_generation/generation/value/pattern.rs @@ -0,0 +1,44 @@ +use turso_core::Value; + +use crate::{ + generation::{ArbitraryFromMaybe, GenerationContext}, + model::table::SimValue, +}; + +pub struct LikeValue(pub SimValue); + +impl ArbitraryFromMaybe<&SimValue> for LikeValue { + fn arbitrary_from_maybe( + rng: &mut R, + _context: &C, + value: &SimValue, + ) -> Option { + match &value.0 { + value @ Value::Text(..) => { + let t = value.to_string(); + let mut t = t.chars().collect::>(); + // Remove a number of characters, either insert `_` for each character removed, or + // insert one `%` for the whole substring + let mut i = 0; + while i < t.len() { + if rng.random_bool(0.1) { + t[i] = '_'; + } else if rng.random_bool(0.05) { + t[i] = '%'; + // skip a list of characters + for _ in 0..rng.random_range(0..=3.min(t.len() - i - 1)) { + t.remove(i + 1); + } + } + i += 1; + } + let index = rng.random_range(0..t.len()); + t.insert(index, '%'); + Some(Self(SimValue(Value::build_text( + t.into_iter().collect::(), + )))) + } + _ => None, + } + } +} From 642ec3032d5b2a5c92409f9b6bdc429d0770408a Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Wed, 8 Oct 2025 12:07:49 -0300 Subject: [PATCH 3/3] use parser's `ColumnDefinition` for Sql Generation `Column` struct --- parser/Cargo.toml | 1 + parser/src/ast.rs | 5 +++++ sql_generation/Cargo.toml | 2 +- sql_generation/generation/table.rs | 3 +-- sql_generation/model/query/create.rs | 15 ++++++++------- sql_generation/model/table.rs | 22 +++++++++++++++++++--- whopper/main.rs | 28 +++++++++++++++++++++++----- 7 files changed, 58 insertions(+), 18 deletions(-) diff --git a/parser/Cargo.toml b/parser/Cargo.toml index 6f9720bc8..a140f4e44 100644 --- a/parser/Cargo.toml +++ b/parser/Cargo.toml @@ -13,6 +13,7 @@ name = "turso_parser" [features] default = [] serde = ["dep:serde", "bitflags/serde"] +simulator = [] [dependencies] bitflags = { workspace = true } diff --git a/parser/src/ast.rs b/parser/src/ast.rs index dae656cc4..81b47a967 100644 --- a/parser/src/ast.rs +++ b/parser/src/ast.rs @@ -1121,6 +1121,11 @@ pub struct NamedColumnConstraint { // https://sqlite.org/syntax/column-constraint.html #[derive(Clone, Debug, PartialEq, Eq)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[cfg_attr(feature = "simulator", derive(strum::EnumDiscriminants))] +#[cfg_attr( + feature = "simulator", + strum_discriminants(derive(strum::VariantArray)) +)] pub enum ColumnConstraint { /// `PRIMARY KEY` PrimaryKey { diff --git a/sql_generation/Cargo.toml b/sql_generation/Cargo.toml index d42668237..5c4de8d6e 100644 --- a/sql_generation/Cargo.toml +++ b/sql_generation/Cargo.toml @@ -13,7 +13,7 @@ path = "lib.rs" hex = { workspace = true } serde = { workspace = true, features = ["derive"] } turso_core = { workspace = true, features = ["simulator"] } -turso_parser = { workspace = true, features = ["serde"] } +turso_parser = { workspace = true, features = ["serde", "simulator"] } rand = { workspace = true } anarchist-readable-name-generator-lib = "0.2.0" itertools = { workspace = true } diff --git a/sql_generation/generation/table.rs b/sql_generation/generation/table.rs index 2ced09fee..21c89a179 100644 --- a/sql_generation/generation/table.rs +++ b/sql_generation/generation/table.rs @@ -51,8 +51,7 @@ impl Arbitrary for Column { Self { name, column_type, - primary: false, - unique: false, + constraints: vec![], // TODO: later implement arbitrary here for ColumnConstraint } } } diff --git a/sql_generation/model/query/create.rs b/sql_generation/model/query/create.rs index 607d5fe8d..ee028e879 100644 --- a/sql_generation/model/query/create.rs +++ b/sql_generation/model/query/create.rs @@ -1,5 +1,6 @@ use std::fmt::Display; +use itertools::Itertools; use serde::{Deserialize, Serialize}; use crate::model::table::Table; @@ -13,13 +14,13 @@ impl Display for Create { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "CREATE TABLE {} (", self.table.name)?; - for (i, column) in self.table.columns.iter().enumerate() { - if i != 0 { - write!(f, ",")?; - } - write!(f, "{} {}", column.name, column.column_type)?; - } + let cols = self + .table + .columns + .iter() + .map(|column| column.to_string()) + .join(", "); - write!(f, ")") + write!(f, "{cols})") } } diff --git a/sql_generation/model/table.rs b/sql_generation/model/table.rs index 87057b42b..1060b8bb8 100644 --- a/sql_generation/model/table.rs +++ b/sql_generation/model/table.rs @@ -1,8 +1,9 @@ use std::{fmt::Display, hash::Hash, ops::Deref}; +use itertools::Itertools; use serde::{Deserialize, Serialize}; use turso_core::{numeric::Numeric, types}; -use turso_parser::ast; +use turso_parser::ast::{self, ColumnConstraint}; use crate::model::query::predicate::Predicate; @@ -63,8 +64,7 @@ impl Table { pub struct Column { pub name: String, pub column_type: ColumnType, - pub primary: bool, - pub unique: bool, + pub constraints: Vec, } // Uniquely defined by name in this case @@ -82,6 +82,22 @@ impl PartialEq for Column { impl Eq for Column {} +impl Display for Column { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let constraints = self + .constraints + .iter() + .map(|constraint| constraint.to_string()) + .join(" "); + let mut col_string = format!("{} {}", self.name, self.column_type); + if !constraints.is_empty() { + col_string.push(' '); + col_string.push_str(&constraints); + } + write!(f, "{col_string}") + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub enum ColumnType { Integer, diff --git a/whopper/main.rs b/whopper/main.rs index 086b4687e..16da0fbf8 100644 --- a/whopper/main.rs +++ b/whopper/main.rs @@ -18,7 +18,7 @@ use tracing_subscriber::{EnvFilter, layer::SubscriberExt, util::SubscriberInitEx use turso_core::{ CipherMode, Connection, Database, DatabaseOpts, EncryptionOpts, IO, OpenFlags, Statement, }; -use turso_parser::ast::SortOrder; +use turso_parser::ast::{ColumnConstraint, SortOrder}; mod io; use crate::io::FILE_SIZE_SOFT_LIMIT; @@ -332,12 +332,18 @@ fn create_initial_schema(rng: &mut ChaCha8Rng) -> Vec { let num_columns = rng.random_range(2..=8); let mut columns = Vec::new(); + // TODO: there is no proper unique generation yet in whopper, so disable primary keys for now + + // let primary = ColumnConstraint::PrimaryKey { + // order: None, + // conflict_clause: None, + // auto_increment: false, + // }; // Always add an id column as primary key columns.push(Column { name: "id".to_string(), column_type: ColumnType::Integer, - primary: true, - unique: false, + constraints: vec![], }); // Add random columns @@ -348,11 +354,19 @@ fn create_initial_schema(rng: &mut ChaCha8Rng) -> Vec { _ => ColumnType::Float, }; + // FIXME: before sql_generation did not incorporate ColumnConstraint into the sql string + // now it does and it the simulation here fails `whopper` with UNIQUE CONSTRAINT ERROR + // 20% chance of unique + let constraints = if rng.random_bool(0.0) { + vec![ColumnConstraint::Unique(None)] + } else { + Vec::new() + }; + columns.push(Column { name: format!("col_{j}"), column_type: col_type, - primary: false, - unique: rng.random_bool(0.2), // 20% chance of unique + constraints, }); } @@ -366,6 +380,10 @@ fn create_initial_schema(rng: &mut ChaCha8Rng) -> Vec { schema.push(Create { table }); } + for create in &schema { + println!("{create}"); + } + schema }