From 0285bdd72ce4a72f3ef4e061e5101ba0415f8eac Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Mon, 25 Aug 2025 15:14:10 -0300 Subject: [PATCH] copy generation code from simulator --- Cargo.lock | 25 +- Cargo.toml | 3 + sql_generation/Cargo.toml | 12 + sql_generation/generation/expr.rs | 296 ++++ sql_generation/generation/mod.rs | 166 ++ sql_generation/generation/plan.rs | 833 +++++++++ sql_generation/generation/predicate/binary.rs | 586 +++++++ sql_generation/generation/predicate/mod.rs | 378 ++++ sql_generation/generation/predicate/unary.rs | 306 ++++ sql_generation/generation/property.rs | 1533 +++++++++++++++++ sql_generation/generation/query.rs | 447 +++++ sql_generation/generation/table.rs | 258 +++ sql_generation/lib.rs | 3 +- sql_generation/model/mod.rs | 4 + sql_generation/model/query/create.rs | 45 + sql_generation/model/query/create_index.rs | 106 ++ sql_generation/model/query/delete.rs | 41 + sql_generation/model/query/drop.rs | 34 + sql_generation/model/query/insert.rs | 87 + sql_generation/model/query/mod.rs | 129 ++ sql_generation/model/query/predicate.rs | 146 ++ sql_generation/model/query/select.rs | 496 ++++++ sql_generation/model/query/transaction.rs | 60 + sql_generation/model/query/update.rs | 71 + sql_generation/model/table.rs | 428 +++++ 25 files changed, 6490 insertions(+), 3 deletions(-) create mode 100644 sql_generation/generation/expr.rs create mode 100644 sql_generation/generation/mod.rs create mode 100644 sql_generation/generation/plan.rs create mode 100644 sql_generation/generation/predicate/binary.rs create mode 100644 sql_generation/generation/predicate/mod.rs create mode 100644 sql_generation/generation/predicate/unary.rs create mode 100644 sql_generation/generation/property.rs create mode 100644 sql_generation/generation/query.rs create mode 100644 sql_generation/generation/table.rs create mode 100644 sql_generation/model/mod.rs create mode 100644 sql_generation/model/query/create.rs create mode 100644 sql_generation/model/query/create_index.rs create mode 100644 sql_generation/model/query/delete.rs create mode 100644 sql_generation/model/query/drop.rs create mode 100644 sql_generation/model/query/insert.rs create mode 100644 sql_generation/model/query/mod.rs create mode 100644 sql_generation/model/query/predicate.rs create mode 100644 sql_generation/model/query/select.rs create mode 100644 sql_generation/model/query/transaction.rs create mode 100644 sql_generation/model/query/update.rs create mode 100644 sql_generation/model/table.rs diff --git a/Cargo.lock b/Cargo.lock index d7db34d08..1569f69ce 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -119,6 +119,15 @@ dependencies = [ "rand 0.8.5", ] +[[package]] +name = "anarchist-readable-name-generator-lib" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09a645c34bad5551ed4b2496536985efdc4373b097c0e57abf2eb14774538278" +dependencies = [ + "rand 0.9.2", +] + [[package]] name = "android-tzdata" version = "0.1.1" @@ -2125,7 +2134,7 @@ dependencies = [ name = "limbo_sim" version = "0.1.4" dependencies = [ - "anarchist-readable-name-generator-lib", + "anarchist-readable-name-generator-lib 0.1.2", "anyhow", "chrono", "clap", @@ -3465,6 +3474,18 @@ checksum = "d372029cb5195f9ab4e4b9aef550787dce78b124fcaee8d82519925defcd6f0d" [[package]] name = "sql_generation" version = "0.1.4" +dependencies = [ + "anarchist-readable-name-generator-lib 0.2.0", + "anyhow", + "hex", + "itertools 0.14.0", + "rand 0.9.2", + "rand_chacha 0.9.0", + "serde", + "tracing", + "turso_core", + "turso_parser", +] [[package]] name = "sqlparser_bench" @@ -4163,7 +4184,7 @@ dependencies = [ name = "turso_stress" version = "0.1.4" dependencies = [ - "anarchist-readable-name-generator-lib", + "anarchist-readable-name-generator-lib 0.1.2", "antithesis_sdk", "clap", "hex", diff --git a/Cargo.toml b/Cargo.toml index f61b10ecc..092d76d98 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -64,6 +64,9 @@ serde_json = "1.0" anyhow = "1.0.98" mimalloc = { version = "0.1.47", default-features = false } rusqlite = { version = "0.37.0", features = ["bundled"] } +itertools = "0.14.0" +rand = "0.9.2" +tracing = "0.1.41" [profile.release] debug = "line-tables-only" diff --git a/sql_generation/Cargo.toml b/sql_generation/Cargo.toml index b4b5dbbcf..d84d08380 100644 --- a/sql_generation/Cargo.toml +++ b/sql_generation/Cargo.toml @@ -10,3 +10,15 @@ repository.workspace = true path = "lib.rs" [dependencies] +hex = "0.4.3" +serde = { workspace = true, features = ["derive"] } +turso_core = { workspace = true, features = ["simulator"] } +turso_parser = { workspace = true, features = ["serde"] } +rand = { workspace = true } +anarchist-readable-name-generator-lib = "0.2.0" +itertools = { workspace = true } +anyhow = { workspace = true } +tracing = { workspace = true } + +[dev-dependencies] +rand_chacha = "0.9.0" diff --git a/sql_generation/generation/expr.rs b/sql_generation/generation/expr.rs new file mode 100644 index 000000000..c5e33758c --- /dev/null +++ b/sql_generation/generation/expr.rs @@ -0,0 +1,296 @@ +use turso_parser::ast::{ + self, Expr, LikeOperator, Name, Operator, QualifiedName, Type, UnaryOperator, +}; + +use crate::{ + generation::{ + frequency, gen_random_text, one_of, pick, pick_index, Arbitrary, ArbitraryFrom, + ArbitrarySizedFrom, + }, + model::table::SimValue, +}; + +impl Arbitrary for Box +where + T: Arbitrary, +{ + fn arbitrary(rng: &mut R) -> Self { + Box::from(T::arbitrary(rng)) + } +} + +impl ArbitrarySizedFrom for Box +where + T: ArbitrarySizedFrom, +{ + fn arbitrary_sized_from(rng: &mut R, t: A, size: usize) -> Self { + Box::from(T::arbitrary_sized_from(rng, t, size)) + } +} + +impl Arbitrary for Option +where + T: Arbitrary, +{ + fn arbitrary(rng: &mut R) -> Self { + rng.random_bool(0.5).then_some(T::arbitrary(rng)) + } +} + +impl ArbitrarySizedFrom for Option +where + T: ArbitrarySizedFrom, +{ + fn arbitrary_sized_from(rng: &mut R, t: A, size: usize) -> Self { + rng.random_bool(0.5) + .then_some(T::arbitrary_sized_from(rng, t, size)) + } +} + +impl ArbitraryFrom for Vec +where + T: ArbitraryFrom, +{ + fn arbitrary_from(rng: &mut R, t: A) -> Self { + let size = rng.random_range(0..5); + (0..size).map(|_| T::arbitrary_from(rng, t)).collect() + } +} + +// Freestyling generation +impl ArbitrarySizedFrom<&SimulatorEnv> for Expr { + fn arbitrary_sized_from(rng: &mut R, t: &SimulatorEnv, size: usize) -> Self { + frequency( + vec![ + ( + 1, + Box::new(|rng| Expr::Literal(ast::Literal::arbitrary_from(rng, t))), + ), + ( + size, + Box::new(|rng| { + one_of( + vec![ + // Box::new(|rng: &mut R| Expr::Between { + // lhs: Box::arbitrary_sized_from(rng, t, size - 1), + // not: rng.gen_bool(0.5), + // start: Box::arbitrary_sized_from(rng, t, size - 1), + // end: Box::arbitrary_sized_from(rng, t, size - 1), + // }), + Box::new(|rng: &mut R| { + Expr::Binary( + Box::arbitrary_sized_from(rng, t, size - 1), + Operator::arbitrary(rng), + Box::arbitrary_sized_from(rng, t, size - 1), + ) + }), + // Box::new(|rng| Expr::Case { + // base: Option::arbitrary_from(rng, t), + // when_then_pairs: { + // let size = rng.gen_range(0..5); + // (0..size) + // .map(|_| (Self::arbitrary_from(rng, t), Self::arbitrary_from(rng, t))) + // .collect() + // }, + // else_expr: Option::arbitrary_from(rng, t), + // }), + // Box::new(|rng| Expr::Cast { + // expr: Box::arbitrary_sized_from(rng, t), + // type_name: Option::arbitrary(rng), + // }), + // Box::new(|rng| Expr::Collate(Box::arbitrary_sized_from(rng, t), CollateName::arbitrary(rng).0)), + // Box::new(|rng| Expr::InList { + // lhs: Box::arbitrary_sized_from(rng, t), + // not: rng.gen_bool(0.5), + // rhs: Option::arbitrary_from(rng, t), + // }), + // Box::new(|rng| Expr::IsNull(Box::arbitrary_sized_from(rng, t))), + // Box::new(|rng| { + // // let op = LikeOperator::arbitrary_from(rng, t); + // let op = ast::LikeOperator::Like; // todo: remove this line when LikeOperator is implemented + // let escape = if matches!(op, LikeOperator::Like) { + // Option::arbitrary_sized_from(rng, t, size - 1) + // } else { + // None + // }; + // Expr::Like { + // lhs: Box::arbitrary_sized_from(rng, t, size - 1), + // not: rng.gen_bool(0.5), + // op, + // rhs: Box::arbitrary_sized_from(rng, t, size - 1), + // escape, + // } + // }), + // Box::new(|rng| Expr::NotNull(Box::arbitrary_sized_from(rng, t))), + // // TODO: only supports one paranthesized expression + // Box::new(|rng| Expr::Parenthesized(vec![Expr::arbitrary_from(rng, t)])), + // Box::new(|rng| { + // let table_idx = pick_index(t.tables.len(), rng); + // let table = &t.tables[table_idx]; + // let col_idx = pick_index(table.columns.len(), rng); + // let col = &table.columns[col_idx]; + // Expr::Qualified(Name(table.name.clone()), Name(col.name.clone())) + // }) + Box::new(|rng| { + Expr::Unary( + UnaryOperator::arbitrary_from(rng, t), + Box::arbitrary_sized_from(rng, t, size - 1), + ) + }), + // TODO: skip Exists for now + // TODO: skip Function Call for now + // TODO: skip Function Call Star for now + // TODO: skip ID for now + // TODO: skip InSelect as still need to implement ArbitratyFrom for Select + // TODO: skip InTable + // TODO: skip Name + // TODO: Skip DoublyQualified for now + // TODO: skip Raise + // TODO: skip subquery + ], + rng, + ) + }), + ), + ], + rng, + ) + } +} + +impl Arbitrary for Operator { + fn arbitrary(rng: &mut R) -> Self { + let choices = [ + Operator::Add, + Operator::And, + // Operator::ArrowRight, -- todo: not implemented in `binary_compare` yet + // Operator::ArrowRightShift, -- todo: not implemented in `binary_compare` yet + Operator::BitwiseAnd, + // Operator::BitwiseNot, -- todo: not implemented in `binary_compare` yet + Operator::BitwiseOr, + // Operator::Concat, -- todo: not implemented in `exec_concat` + Operator::Divide, + Operator::Equals, + Operator::Greater, + Operator::GreaterEquals, + Operator::Is, + Operator::IsNot, + Operator::LeftShift, + Operator::Less, + Operator::LessEquals, + Operator::Modulus, + Operator::Multiply, + Operator::NotEquals, + Operator::Or, + Operator::RightShift, + Operator::Subtract, + ]; + *pick(&choices, rng) + } +} + +impl Arbitrary for Type { + fn arbitrary(rng: &mut R) -> Self { + let name = pick(&["INT", "INTEGER", "REAL", "TEXT", "BLOB", "ANY"], rng).to_string(); + Self { + name, + size: None, // TODO: come back later here + } + } +} + +struct CollateName(String); + +impl Arbitrary for CollateName { + fn arbitrary(rng: &mut R) -> Self { + let choice = rng.random_range(0..3); + CollateName( + match choice { + 0 => "BINARY", + 1 => "RTRIM", + 2 => "NOCASE", + _ => unreachable!(), + } + .to_string(), + ) + } +} + +impl ArbitraryFrom<&SimulatorEnv> for QualifiedName { + fn arbitrary_from(rng: &mut R, t: &SimulatorEnv) -> Self { + // TODO: for now just generate table name + let table_idx = pick_index(t.tables.len(), rng); + let table = &t.tables[table_idx]; + // TODO: for now forego alias + Self { + db_name: None, + name: Name::new(&table.name), + alias: None, + } + } +} + +impl ArbitraryFrom<&SimulatorEnv> for LikeOperator { + fn arbitrary_from(rng: &mut R, _t: &SimulatorEnv) -> Self { + let choice = rng.random_range(0..4); + match choice { + 0 => LikeOperator::Glob, + 1 => LikeOperator::Like, + 2 => LikeOperator::Match, + 3 => LikeOperator::Regexp, + _ => unreachable!(), + } + } +} + +// Current implementation does not take into account the columns affinity nor if table is Strict +impl ArbitraryFrom<&SimulatorEnv> for ast::Literal { + fn arbitrary_from(rng: &mut R, _t: &SimulatorEnv) -> Self { + loop { + let choice = rng.random_range(0..5); + let lit = match choice { + 0 => ast::Literal::Numeric({ + let integer = rng.random_bool(0.5); + if integer { + rng.random_range(i64::MIN..i64::MAX).to_string() + } else { + rng.random_range(-1e10..1e10).to_string() + } + }), + 1 => ast::Literal::String(format!("'{}'", gen_random_text(rng))), + 2 => ast::Literal::Blob(hex::encode(gen_random_text(rng).as_bytes())), + // TODO: skip Keyword + 3 => continue, + 4 => ast::Literal::Null, + // TODO: Ignore Date stuff for now + _ => continue, + }; + break lit; + } + } +} + +// Creates a litreal value +impl ArbitraryFrom<&Vec<&SimValue>> for ast::Expr { + fn arbitrary_from(rng: &mut R, values: &Vec<&SimValue>) -> Self { + if values.is_empty() { + return Self::Literal(ast::Literal::Null); + } + // TODO: for now just convert the value to an ast::Literal + let value = pick(values, rng); + Expr::Literal((*value).into()) + } +} + +impl ArbitraryFrom<&SimulatorEnv> for UnaryOperator { + fn arbitrary_from(rng: &mut R, _t: &SimulatorEnv) -> Self { + let choice = rng.random_range(0..4); + match choice { + 0 => Self::BitwiseNot, + 1 => Self::Negative, + 2 => Self::Not, + 3 => Self::Positive, + _ => unreachable!(), + } + } +} diff --git a/sql_generation/generation/mod.rs b/sql_generation/generation/mod.rs new file mode 100644 index 000000000..44ae7f34d --- /dev/null +++ b/sql_generation/generation/mod.rs @@ -0,0 +1,166 @@ +use std::{iter::Sum, ops::SubAssign}; + +use anarchist_readable_name_generator_lib::readable_name_custom; +use rand::{distr::uniform::SampleUniform, Rng}; + +mod expr; +pub mod plan; +mod predicate; +pub mod property; +pub mod query; +pub mod table; + +type ArbitraryFromFunc<'a, R, T> = Box T + 'a>; +type Choice<'a, R, T> = (usize, Box Option + 'a>); + +/// Arbitrary trait for generating random values +/// An implementation of arbitrary is assumed to be a uniform sampling of +/// the possible values of the type, with a bias towards smaller values for +/// practicality. +pub trait Arbitrary { + fn arbitrary(rng: &mut R) -> Self; +} + +/// ArbitrarySized trait for generating random values of a specific size +/// An implementation of arbitrary_sized is assumed to be a uniform sampling of +/// the possible values of the type, with a bias towards smaller values for +/// practicality, but with the additional constraint that the generated value +/// must fit in the given size. This is useful for generating values that are +/// constrained by a specific size, such as integers or strings. +pub trait ArbitrarySized { + fn arbitrary_sized(rng: &mut R, size: usize) -> Self; +} + +/// ArbitraryFrom trait for generating random values from a given value +/// ArbitraryFrom allows for constructing relations, where the generated +/// value is dependent on the given value. These relations could be constraints +/// such as generating an integer within an interval, or a value that fits in a table, +/// or a predicate satisfying a given table row. +pub trait ArbitraryFrom { + fn arbitrary_from(rng: &mut R, t: T) -> Self; +} + +/// ArbitrarySizedFrom trait for generating random values from a given value +/// ArbitrarySizedFrom allows for constructing relations, where the generated +/// value is dependent on the given value and a size constraint. These relations +/// could be constraints such as generating an integer within an interval, +/// or a value that fits in a table, or a predicate satisfying a given table row, +/// but with the additional constraint that the generated value must fit in the given size. +/// This is useful for generating values that are constrained by a specific size, +/// such as integers or strings, while still being dependent on the given value. +pub trait ArbitrarySizedFrom { + fn arbitrary_sized_from(rng: &mut R, t: T, size: usize) -> Self; +} + +/// ArbitraryFromMaybe trait for fallibally generating random values from a given value +pub trait ArbitraryFromMaybe { + fn arbitrary_from_maybe(rng: &mut R, t: T) -> Option + where + Self: Sized; +} + +/// Frequency is a helper function for composing different generators with different frequency +/// of occurrences. +/// The type signature for the `N` parameter is a bit complex, but it +/// roughly corresponds to a type that can be summed, compared, subtracted and sampled, which are +/// the operations we require for the implementation. +// todo: switch to a simpler type signature that can accommodate all integer and float types, which +// should be enough for our purposes. +pub(crate) fn frequency< + T, + R: Rng, + N: Sum + PartialOrd + Copy + Default + SampleUniform + SubAssign, +>( + choices: Vec<(N, ArbitraryFromFunc)>, + rng: &mut R, +) -> T { + let total = choices.iter().map(|(weight, _)| *weight).sum::(); + let mut choice = rng.random_range(N::default()..total); + + for (weight, f) in choices { + if choice < weight { + return f(rng); + } + choice -= weight; + } + + unreachable!() +} + +/// one_of is a helper function for composing different generators with equal probability of occurrence. +pub(crate) fn one_of(choices: Vec>, rng: &mut R) -> T { + let index = rng.random_range(0..choices.len()); + choices[index](rng) +} + +/// backtrack is a helper function for composing different "failable" generators. +/// The function takes a list of functions that return an Option, along with number of retries +/// to make before giving up. +pub(crate) fn backtrack(mut choices: Vec>, rng: &mut R) -> Option { + loop { + // If there are no more choices left, we give up + let choices_ = choices + .iter() + .enumerate() + .filter(|(_, (retries, _))| *retries > 0) + .collect::>(); + if choices_.is_empty() { + tracing::trace!("backtrack: no more choices left"); + return None; + } + // Run a one_of on the remaining choices + let (choice_index, choice) = pick(&choices_, rng); + let choice_index = *choice_index; + // If the choice returns None, we decrement the number of retries and try again + let result = choice.1(rng); + if result.is_some() { + return result; + } else { + choices[choice_index].0 -= 1; + } + } +} + +/// pick is a helper function for uniformly picking a random element from a slice +pub(crate) fn pick<'a, T, R: Rng>(choices: &'a [T], rng: &mut R) -> &'a T { + let index = rng.random_range(0..choices.len()); + &choices[index] +} + +/// pick_index is typically used for picking an index from a slice to later refer to the element +/// at that index. +pub(crate) fn pick_index(choices: usize, rng: &mut R) -> usize { + rng.random_range(0..choices) +} + +/// pick_n_unique is a helper function for uniformly picking N unique elements from a range. +/// The elements themselves are usize, typically representing indices. +pub(crate) fn pick_n_unique( + range: std::ops::Range, + n: usize, + rng: &mut R, +) -> Vec { + use rand::seq::SliceRandom; + let mut items: Vec = range.collect(); + items.shuffle(rng); + items.into_iter().take(n).collect() +} + +/// gen_random_text uses `anarchist_readable_name_generator_lib` to generate random +/// readable names for tables, columns, text values etc. +pub(crate) fn gen_random_text(rng: &mut T) -> String { + let big_text = rng.random_ratio(1, 1000); + if big_text { + // let max_size: u64 = 2 * 1024 * 1024 * 1024; + let max_size: u64 = 2 * 1024; + let size = rng.random_range(1024..max_size); + let mut name = String::with_capacity(size as usize); + for i in 0..size { + name.push(((i % 26) as u8 + b'A') as char); + } + name + } else { + let name = readable_name_custom("_", rng); + name.replace("-", "_") + } +} diff --git a/sql_generation/generation/plan.rs b/sql_generation/generation/plan.rs new file mode 100644 index 000000000..eac9359b3 --- /dev/null +++ b/sql_generation/generation/plan.rs @@ -0,0 +1,833 @@ +use std::{ + collections::HashSet, + fmt::{Debug, Display}, + path::Path, + sync::Arc, + vec, +}; + +use serde::{Deserialize, Serialize}; + +use turso_core::{Connection, Result, StepResult}; + +use crate::{ + generation::query::SelectFree, + model::{ + query::{update::Update, Create, CreateIndex, Delete, Drop, Insert, Query, Select}, + table::SimValue, + }, + runner::{ + env::{SimConnection, SimulationType, SimulatorTables}, + io::SimulatorIO, + }, + SimulatorEnv, +}; + +use crate::generation::{frequency, Arbitrary, ArbitraryFrom}; + +use super::property::{remaining, Property}; + +pub(crate) type ResultSet = Result>>; + +#[derive(Clone, Serialize, Deserialize)] +pub(crate) struct InteractionPlan { + pub(crate) plan: Vec, +} + +impl InteractionPlan { + /// Compute via diff computes a a plan from a given `.plan` file without the need to parse + /// sql. This is possible because there are two versions of the plan file, one that is human + /// readable and one that is serialized as JSON. Under watch mode, the users will be able to + /// delete interactions from the human readable file, and this function uses the JSON file as + /// a baseline to detect with interactions were deleted and constructs the plan from the + /// remaining interactions. + pub(crate) fn compute_via_diff(plan_path: &Path) -> Vec> { + let interactions = std::fs::read_to_string(plan_path).unwrap(); + let interactions = interactions.lines().collect::>(); + + let plan: InteractionPlan = serde_json::from_str( + std::fs::read_to_string(plan_path.with_extension("json")) + .unwrap() + .as_str(), + ) + .unwrap(); + + let mut plan = plan + .plan + .into_iter() + .map(|i| i.interactions()) + .collect::>(); + + let (mut i, mut j) = (0, 0); + + while i < interactions.len() && j < plan.len() { + if interactions[i].starts_with("-- begin") + || interactions[i].starts_with("-- end") + || interactions[i].is_empty() + { + i += 1; + continue; + } + + // interactions[i] is the i'th line in the human readable plan + // plan[j][k] is the k'th interaction in the j'th property + let mut k = 0; + + while k < plan[j].len() { + if i >= interactions.len() { + let _ = plan.split_off(j + 1); + let _ = plan[j].split_off(k); + break; + } + tracing::error!("Comparing '{}' with '{}'", interactions[i], plan[j][k]); + if interactions[i].contains(plan[j][k].to_string().as_str()) { + i += 1; + k += 1; + } else { + plan[j].remove(k); + panic!("Comparing '{}' with '{}'", interactions[i], plan[j][k]); + } + } + + if plan[j].is_empty() { + plan.remove(j); + } else { + j += 1; + } + } + let _ = plan.split_off(j); + plan + } +} + +pub(crate) struct InteractionPlanState { + pub(crate) stack: Vec, + pub(crate) interaction_pointer: usize, + pub(crate) secondary_pointer: usize, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub(crate) enum Interactions { + Property(Property), + Query(Query), + Fault(Fault), +} + +impl Interactions { + pub(crate) fn name(&self) -> Option<&str> { + match self { + Interactions::Property(property) => Some(property.name()), + Interactions::Query(_) => None, + Interactions::Fault(_) => None, + } + } + + pub(crate) fn interactions(&self) -> Vec { + match self { + Interactions::Property(property) => property.interactions(), + Interactions::Query(query) => vec![Interaction::Query(query.clone())], + Interactions::Fault(fault) => vec![Interaction::Fault(fault.clone())], + } + } +} + +impl Interactions { + pub(crate) fn dependencies(&self) -> HashSet { + match self { + Interactions::Property(property) => { + property + .interactions() + .iter() + .fold(HashSet::new(), |mut acc, i| match i { + Interaction::Query(q) => { + acc.extend(q.dependencies()); + acc + } + _ => acc, + }) + } + Interactions::Query(query) => query.dependencies(), + Interactions::Fault(_) => HashSet::new(), + } + } + + pub(crate) fn uses(&self) -> Vec { + match self { + Interactions::Property(property) => { + property + .interactions() + .iter() + .fold(vec![], |mut acc, i| match i { + Interaction::Query(q) => { + acc.extend(q.uses()); + acc + } + _ => acc, + }) + } + Interactions::Query(query) => query.uses(), + Interactions::Fault(_) => vec![], + } + } +} + +impl Display for InteractionPlan { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for interactions in &self.plan { + match interactions { + Interactions::Property(property) => { + let name = property.name(); + writeln!(f, "-- begin testing '{name}'")?; + for interaction in property.interactions() { + write!(f, "\t")?; + + match interaction { + Interaction::Query(query) => writeln!(f, "{query};")?, + Interaction::Assumption(assumption) => { + writeln!(f, "-- ASSUME {};", assumption.name)? + } + Interaction::Assertion(assertion) => { + writeln!(f, "-- ASSERT {};", assertion.name)? + } + Interaction::Fault(fault) => writeln!(f, "-- FAULT '{fault}';")?, + Interaction::FsyncQuery(query) => { + writeln!(f, "-- FSYNC QUERY;")?; + writeln!(f, "{query};")?; + writeln!(f, "{query};")? + } + Interaction::FaultyQuery(query) => { + writeln!(f, "{query}; -- FAULTY QUERY")? + } + } + } + writeln!(f, "-- end testing '{name}'")?; + } + Interactions::Fault(fault) => { + writeln!(f, "-- FAULT '{fault}'")?; + } + Interactions::Query(query) => { + writeln!(f, "{query};")?; + } + } + } + + Ok(()) + } +} + +#[derive(Debug, Clone, Copy)] +pub(crate) struct InteractionStats { + pub(crate) read_count: usize, + pub(crate) write_count: usize, + pub(crate) delete_count: usize, + pub(crate) update_count: usize, + pub(crate) create_count: usize, + pub(crate) create_index_count: usize, + pub(crate) drop_count: usize, + pub(crate) begin_count: usize, + pub(crate) commit_count: usize, + pub(crate) rollback_count: usize, +} + +impl Display for InteractionStats { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "Read: {}, Write: {}, Delete: {}, Update: {}, Create: {}, CreateIndex: {}, Drop: {}, Begin: {}, Commit: {}, Rollback: {}", + self.read_count, + self.write_count, + self.delete_count, + self.update_count, + self.create_count, + self.create_index_count, + self.drop_count, + self.begin_count, + self.commit_count, + self.rollback_count, + ) + } +} + +#[derive(Debug)] +pub(crate) enum Interaction { + Query(Query), + Assumption(Assertion), + Assertion(Assertion), + Fault(Fault), + /// Will attempt to run any random query. However, when the connection tries to sync it will + /// close all connections and reopen the database and assert that no data was lost + FsyncQuery(Query), + FaultyQuery(Query), +} + +impl Display for Interaction { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Query(query) => write!(f, "{query}"), + Self::Assumption(assumption) => write!(f, "ASSUME {}", assumption.name), + Self::Assertion(assertion) => write!(f, "ASSERT {}", assertion.name), + Self::Fault(fault) => write!(f, "FAULT '{fault}'"), + Self::FsyncQuery(query) => write!(f, "{query}"), + Self::FaultyQuery(query) => write!(f, "{query}; -- FAULTY QUERY"), + } + } +} + +type AssertionFunc = dyn Fn(&Vec, &mut SimulatorEnv) -> Result>; + +enum AssertionAST { + Pick(), +} + +pub(crate) struct Assertion { + pub(crate) func: Box, + pub(crate) name: String, // For display purposes in the plan +} + +impl Debug for Assertion { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Assertion") + .field("name", &self.name) + .finish() + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub(crate) enum Fault { + Disconnect, + ReopenDatabase, +} + +impl Display for Fault { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Fault::Disconnect => write!(f, "DISCONNECT"), + Fault::ReopenDatabase => write!(f, "REOPEN_DATABASE"), + } + } +} + +impl InteractionPlan { + pub(crate) fn new() -> Self { + Self { plan: Vec::new() } + } + + pub(crate) fn stats(&self) -> InteractionStats { + let mut stats = InteractionStats { + read_count: 0, + write_count: 0, + delete_count: 0, + update_count: 0, + create_count: 0, + create_index_count: 0, + drop_count: 0, + begin_count: 0, + commit_count: 0, + rollback_count: 0, + }; + + fn query_stat(q: &Query, stats: &mut InteractionStats) { + match q { + Query::Select(_) => stats.read_count += 1, + Query::Insert(_) => stats.write_count += 1, + Query::Delete(_) => stats.delete_count += 1, + Query::Create(_) => stats.create_count += 1, + Query::Drop(_) => stats.drop_count += 1, + Query::Update(_) => stats.update_count += 1, + Query::CreateIndex(_) => stats.create_index_count += 1, + Query::Begin(_) => stats.begin_count += 1, + Query::Commit(_) => stats.commit_count += 1, + Query::Rollback(_) => stats.rollback_count += 1, + } + } + for interactions in &self.plan { + match interactions { + Interactions::Property(property) => { + for interaction in &property.interactions() { + if let Interaction::Query(query) = interaction { + query_stat(query, &mut stats); + } + } + } + Interactions::Query(query) => { + query_stat(query, &mut stats); + } + Interactions::Fault(_) => {} + } + } + + stats + } +} + +impl ArbitraryFrom<&mut SimulatorEnv> for InteractionPlan { + fn arbitrary_from(rng: &mut R, env: &mut SimulatorEnv) -> Self { + let mut plan = InteractionPlan::new(); + + let num_interactions = env.opts.max_interactions; + + // First create at least one table + let create_query = Create::arbitrary(rng); + env.tables.push(create_query.table.clone()); + + plan.plan + .push(Interactions::Query(Query::Create(create_query))); + + while plan.plan.len() < num_interactions { + tracing::debug!( + "Generating interaction {}/{}", + plan.plan.len(), + num_interactions + ); + let interactions = Interactions::arbitrary_from(rng, (env, plan.stats())); + interactions.shadow(&mut env.tables); + plan.plan.push(interactions); + } + + tracing::info!("Generated plan with {} interactions", plan.plan.len()); + plan + } +} + +impl Interaction { + pub(crate) fn execute_query(&self, conn: &mut Arc, _io: &SimulatorIO) -> ResultSet { + if let Self::Query(query) = self { + let query_str = query.to_string(); + let rows = conn.query(&query_str); + if rows.is_err() { + let err = rows.err(); + tracing::debug!( + "Error running query '{}': {:?}", + &query_str[0..query_str.len().min(4096)], + err + ); + if let Some(turso_core::LimboError::ParseError(e)) = err { + panic!("Unexpected parse error: {e}"); + } + return Err(err.unwrap()); + } + let rows = rows?; + assert!(rows.is_some()); + let mut rows = rows.unwrap(); + let mut out = Vec::new(); + while let Ok(row) = rows.step() { + match row { + StepResult::Row => { + let row = rows.row().unwrap(); + let mut r = Vec::new(); + for v in row.get_values() { + let v = v.into(); + r.push(v); + } + out.push(r); + } + StepResult::IO => { + rows.run_once().unwrap(); + } + StepResult::Interrupt => {} + StepResult::Done => { + break; + } + StepResult::Busy => { + return Err(turso_core::LimboError::Busy); + } + } + } + + Ok(out) + } else { + unreachable!("unexpected: this function should only be called on queries") + } + } + + pub(crate) fn execute_assertion( + &self, + stack: &Vec, + env: &mut SimulatorEnv, + ) -> Result<()> { + match self { + Self::Assertion(assertion) => { + let result = assertion.func.as_ref()(stack, env); + match result { + Ok(Ok(())) => Ok(()), + Ok(Err(message)) => Err(turso_core::LimboError::InternalError(format!( + "Assertion '{}' failed: {}", + assertion.name, message + ))), + Err(err) => Err(turso_core::LimboError::InternalError(format!( + "Assertion '{}' execution error: {}", + assertion.name, err + ))), + } + } + _ => { + unreachable!("unexpected: this function should only be called on assertions") + } + } + } + + pub(crate) fn execute_assumption( + &self, + stack: &Vec, + env: &mut SimulatorEnv, + ) -> Result<()> { + match self { + Self::Assumption(assumption) => { + let result = assumption.func.as_ref()(stack, env); + match result { + Ok(Ok(())) => Ok(()), + Ok(Err(message)) => Err(turso_core::LimboError::InternalError(format!( + "Assumption '{}' failed: {}", + assumption.name, message + ))), + Err(err) => Err(turso_core::LimboError::InternalError(format!( + "Assumption '{}' execution error: {}", + assumption.name, err + ))), + } + } + _ => { + unreachable!("unexpected: this function should only be called on assumptions") + } + } + } + + pub(crate) fn execute_fault(&self, env: &mut SimulatorEnv, conn_index: usize) -> Result<()> { + match self { + Self::Fault(fault) => { + match fault { + Fault::Disconnect => { + if env.connections[conn_index].is_connected() { + env.connections[conn_index].disconnect(); + } else { + return Err(turso_core::LimboError::InternalError( + "connection already disconnected".into(), + )); + } + env.connections[conn_index] = SimConnection::Disconnected; + } + Fault::ReopenDatabase => { + reopen_database(env); + } + } + Ok(()) + } + _ => { + unreachable!("unexpected: this function should only be called on faults") + } + } + } + + pub(crate) fn execute_fsync_query( + &self, + conn: Arc, + env: &mut SimulatorEnv, + ) -> ResultSet { + if let Self::FsyncQuery(query) = self { + let query_str = query.to_string(); + let rows = conn.query(&query_str); + if rows.is_err() { + let err = rows.err(); + tracing::debug!( + "Error running query '{}': {:?}", + &query_str[0..query_str.len().min(4096)], + err + ); + return Err(err.unwrap()); + } + let mut rows = rows.unwrap().unwrap(); + let mut out = Vec::new(); + while let Ok(row) = rows.step() { + match row { + StepResult::Row => { + let row = rows.row().unwrap(); + let mut r = Vec::new(); + for v in row.get_values() { + let v = v.into(); + r.push(v); + } + out.push(r); + } + StepResult::IO => { + let syncing = { + let files = env.io.files.borrow(); + // TODO: currently assuming we only have 1 file that is syncing + files + .iter() + .any(|file| file.sync_completion.borrow().is_some()) + }; + if syncing { + reopen_database(env); + } else { + rows.run_once().unwrap(); + } + } + StepResult::Done => { + break; + } + StepResult::Busy => { + return Err(turso_core::LimboError::Busy); + } + StepResult::Interrupt => {} + } + } + + Ok(out) + } else { + unreachable!("unexpected: this function should only be called on queries") + } + } + + pub(crate) fn execute_faulty_query( + &self, + conn: &Arc, + env: &mut SimulatorEnv, + ) -> ResultSet { + use rand::Rng; + if let Self::FaultyQuery(query) = self { + let query_str = query.to_string(); + let rows = conn.query(&query_str); + if rows.is_err() { + let err = rows.err(); + tracing::debug!( + "Error running query '{}': {:?}", + &query_str[0..query_str.len().min(4096)], + err + ); + if let Some(turso_core::LimboError::ParseError(e)) = err { + panic!("Unexpected parse error: {e}"); + } + return Err(err.unwrap()); + } + let mut rows = rows.unwrap().unwrap(); + let mut out = Vec::new(); + let mut current_prob = 0.05; + let mut incr = 0.001; + loop { + let syncing = { + let files = env.io.files.borrow(); + files + .iter() + .any(|file| file.sync_completion.borrow().is_some()) + }; + let inject_fault = env.rng.gen_bool(current_prob); + // TODO: avoid for now injecting faults when syncing + if inject_fault && !syncing { + env.io.inject_fault(true); + } + + match rows.step()? { + StepResult::Row => { + let row = rows.row().unwrap(); + let mut r = Vec::new(); + for v in row.get_values() { + let v = v.into(); + r.push(v); + } + out.push(r); + } + StepResult::IO => { + rows.run_once()?; + current_prob += incr; + if current_prob > 1.0 { + current_prob = 1.0; + } else { + incr *= 1.01; + } + } + StepResult::Done => { + break; + } + StepResult::Busy => { + return Err(turso_core::LimboError::Busy); + } + StepResult::Interrupt => {} + } + } + + Ok(out) + } else { + unreachable!("unexpected: this function should only be called on queries") + } + } +} + +fn reopen_database(env: &mut SimulatorEnv) { + // 1. Close all connections without default checkpoint-on-close behavior + // to expose bugs related to how we handle WAL + let num_conns = env.connections.len(); + env.connections.clear(); + + // Clear all open files + // TODO: for correct reporting of faults we should get all the recorded numbers and transfer to the new file + env.io.files.borrow_mut().clear(); + + // 2. Re-open database + match env.type_ { + SimulationType::Differential => { + for _ in 0..num_conns { + env.connections.push(SimConnection::SQLiteConnection( + rusqlite::Connection::open(env.get_db_path()) + .expect("Failed to open SQLite connection"), + )); + } + } + SimulationType::Default | SimulationType::Doublecheck => { + env.db = None; + let db = match turso_core::Database::open_file( + env.io.clone(), + env.get_db_path().to_str().expect("path should be 'to_str'"), + false, + true, + ) { + Ok(db) => db, + Err(e) => { + tracing::error!( + "Failed to open database at {}: {}", + env.get_db_path().display(), + e + ); + panic!("Failed to open database: {e}"); + } + }; + + env.db = Some(db); + + for _ in 0..num_conns { + env.connections.push(SimConnection::LimboConnection( + env.db.as_ref().expect("db to be Some").connect().unwrap(), + )); + } + } + }; +} + +fn random_create(rng: &mut R, env: &SimulatorEnv) -> Interactions { + let mut create = Create::arbitrary(rng); + while env.tables.iter().any(|t| t.name == create.table.name) { + create = Create::arbitrary(rng); + } + Interactions::Query(Query::Create(create)) +} + +fn random_read(rng: &mut R, env: &SimulatorEnv) -> Interactions { + Interactions::Query(Query::Select(Select::arbitrary_from(rng, env))) +} + +fn random_expr(rng: &mut R, env: &SimulatorEnv) -> Interactions { + Interactions::Query(Query::Select(SelectFree::arbitrary_from(rng, env).0)) +} + +fn random_write(rng: &mut R, env: &SimulatorEnv) -> Interactions { + Interactions::Query(Query::Insert(Insert::arbitrary_from(rng, env))) +} + +fn random_delete(rng: &mut R, env: &SimulatorEnv) -> Interactions { + Interactions::Query(Query::Delete(Delete::arbitrary_from(rng, env))) +} + +fn random_update(rng: &mut R, env: &SimulatorEnv) -> Interactions { + Interactions::Query(Query::Update(Update::arbitrary_from(rng, env))) +} + +fn random_drop(rng: &mut R, env: &SimulatorEnv) -> Interactions { + Interactions::Query(Query::Drop(Drop::arbitrary_from(rng, env))) +} + +fn random_create_index(rng: &mut R, env: &SimulatorEnv) -> Option { + if env.tables.is_empty() { + return None; + } + let mut create_index = CreateIndex::arbitrary_from(rng, env); + while env + .tables + .iter() + .find(|t| t.name == create_index.table_name) + .expect("table should exist") + .indexes + .iter() + .any(|i| i == &create_index.index_name) + { + create_index = CreateIndex::arbitrary_from(rng, env); + } + + Some(Interactions::Query(Query::CreateIndex(create_index))) +} + +fn random_fault(rng: &mut R, env: &SimulatorEnv) -> Interactions { + let faults = if env.opts.disable_reopen_database { + vec![Fault::Disconnect] + } else { + vec![Fault::Disconnect, Fault::ReopenDatabase] + }; + let fault = faults[rng.random_range(0..faults.len())].clone(); + Interactions::Fault(fault) +} + +impl ArbitraryFrom<(&SimulatorEnv, InteractionStats)> for Interactions { + fn arbitrary_from( + rng: &mut R, + (env, stats): (&SimulatorEnv, InteractionStats), + ) -> Self { + let remaining_ = remaining(env, &stats); + frequency( + vec![ + ( + f64::min(remaining_.read, remaining_.write) + remaining_.create, + Box::new(|rng: &mut R| { + Interactions::Property(Property::arbitrary_from(rng, (env, &stats))) + }), + ), + ( + remaining_.read, + Box::new(|rng: &mut R| random_read(rng, env)), + ), + ( + remaining_.read / 3.0, + Box::new(|rng: &mut R| random_expr(rng, env)), + ), + ( + remaining_.write, + Box::new(|rng: &mut R| random_write(rng, env)), + ), + ( + remaining_.create, + Box::new(|rng: &mut R| random_create(rng, env)), + ), + ( + remaining_.create_index, + Box::new(|rng: &mut R| { + if let Some(interaction) = random_create_index(rng, env) { + interaction + } else { + // if no tables exist, we can't create an index, so fallback to creating a table + random_create(rng, env) + } + }), + ), + ( + remaining_.delete, + Box::new(|rng: &mut R| random_delete(rng, env)), + ), + ( + remaining_.update, + Box::new(|rng: &mut R| random_update(rng, env)), + ), + ( + // remaining_.drop, + 0.0, + Box::new(|rng: &mut R| random_drop(rng, env)), + ), + ( + remaining_ + .read + .min(remaining_.write) + .min(remaining_.create) + .max(1.0), + Box::new(|rng: &mut R| random_fault(rng, env)), + ), + ], + rng, + ) + } +} diff --git a/sql_generation/generation/predicate/binary.rs b/sql_generation/generation/predicate/binary.rs new file mode 100644 index 000000000..29c1727a9 --- /dev/null +++ b/sql_generation/generation/predicate/binary.rs @@ -0,0 +1,586 @@ +//! Contains code for generation for [ast::Expr::Binary] Predicate + +use turso_parser::ast::{self, Expr}; + +use crate::{ + generation::{ + backtrack, one_of, pick, + predicate::{CompoundPredicate, SimplePredicate}, + table::{GTValue, LTValue, LikeValue}, + ArbitraryFrom, ArbitraryFromMaybe as _, + }, + model::{ + query::predicate::Predicate, + table::{SimValue, Table, TableContext}, + }, +}; + +impl Predicate { + /// Generate an [ast::Expr::Binary] [Predicate] from a column and [SimValue] + pub fn from_column_binary( + rng: &mut R, + column_name: &str, + value: &SimValue, + ) -> Predicate { + let expr = one_of( + vec![ + Box::new(|_| { + Expr::Binary( + Box::new(Expr::Id(ast::Name::Ident(column_name.to_string()))), + ast::Operator::Equals, + Box::new(Expr::Literal(value.into())), + ) + }), + Box::new(|rng| { + let gt_value = GTValue::arbitrary_from(rng, value).0; + Expr::Binary( + Box::new(Expr::Id(ast::Name::Ident(column_name.to_string()))), + ast::Operator::Greater, + Box::new(Expr::Literal(gt_value.into())), + ) + }), + Box::new(|rng| { + let lt_value = LTValue::arbitrary_from(rng, value).0; + Expr::Binary( + Box::new(Expr::Id(ast::Name::Ident(column_name.to_string()))), + ast::Operator::Less, + Box::new(Expr::Literal(lt_value.into())), + ) + }), + ], + rng, + ); + Predicate(expr) + } + + /// Produces a true [ast::Expr::Binary] [Predicate] that is true for the provided row in the given table + pub fn true_binary(rng: &mut R, t: &Table, row: &[SimValue]) -> Predicate { + // Pick a column + let column_index = rng.random_range(0..t.columns.len()); + let mut column = t.columns[column_index].clone(); + let value = &row[column_index]; + + let mut table_name = t.name.clone(); + if t.name.is_empty() { + // If the table name is empty, we cannot create a qualified expression + // so we use the column name directly + let mut splitted = column.name.split('.'); + table_name = splitted + .next() + .expect("Column name should have a table prefix for a joined table") + .to_string(); + column.name = splitted + .next() + .expect("Column name should have a column suffix for a joined table") + .to_string(); + } + + let expr = backtrack( + vec![ + ( + 1, + Box::new(|_| { + Some(Expr::Binary( + Box::new(ast::Expr::Qualified( + ast::Name::new(&table_name), + ast::Name::new(&column.name), + )), + ast::Operator::Equals, + Box::new(Expr::Literal(value.into())), + )) + }), + ), + ( + 1, + Box::new(|rng| { + let v = SimValue::arbitrary_from(rng, &column.column_type); + if &v == value { + None + } else { + Some(Expr::Binary( + Box::new(ast::Expr::Qualified( + ast::Name::new(&table_name), + ast::Name::new(&column.name), + )), + ast::Operator::NotEquals, + Box::new(Expr::Literal(v.into())), + )) + } + }), + ), + ( + 1, + Box::new(|rng| { + let lt_value = LTValue::arbitrary_from(rng, value).0; + Some(Expr::Binary( + Box::new(ast::Expr::Qualified( + ast::Name::new(&table_name), + ast::Name::new(&column.name), + )), + ast::Operator::Greater, + Box::new(Expr::Literal(lt_value.into())), + )) + }), + ), + ( + 1, + Box::new(|rng| { + let gt_value = GTValue::arbitrary_from(rng, value).0; + Some(Expr::Binary( + Box::new(ast::Expr::Qualified( + ast::Name::new(&table_name), + ast::Name::new(&column.name), + )), + ast::Operator::Less, + Box::new(Expr::Literal(gt_value.into())), + )) + }), + ), + ( + 1, + Box::new(|rng| { + // TODO: generation for Like and Glob expressions should be extracted to different module + LikeValue::arbitrary_from_maybe(rng, value).map(|like| { + Expr::Like { + lhs: Box::new(ast::Expr::Qualified( + ast::Name::new(&table_name), + ast::Name::new(&column.name), + )), + not: false, // TODO: also generate this value eventually + op: ast::LikeOperator::Like, + rhs: Box::new(Expr::Literal(like.0.into())), + escape: None, // TODO: implement + } + }) + }), + ), + ], + rng, + ); + // Backtrack will always return Some here + Predicate(expr.unwrap()) + } + + /// Produces an [ast::Expr::Binary] [Predicate] that is false for the provided row in the given table + pub fn false_binary(rng: &mut R, t: &Table, row: &[SimValue]) -> Predicate { + // Pick a column + let column_index = rng.random_range(0..t.columns.len()); + let mut column = t.columns[column_index].clone(); + let mut table_name = t.name.clone(); + let value = &row[column_index]; + + if t.name.is_empty() { + // If the table name is empty, we cannot create a qualified expression + // so we use the column name directly + let mut splitted = column.name.split('.'); + table_name = splitted + .next() + .expect("Column name should have a table prefix for a joined table") + .to_string(); + column.name = splitted + .next() + .expect("Column name should have a column suffix for a joined table") + .to_string(); + } + + let expr = one_of( + vec![ + Box::new(|_| { + Expr::Binary( + Box::new(ast::Expr::Qualified( + ast::Name::new(&table_name), + ast::Name::new(&column.name), + )), + ast::Operator::NotEquals, + Box::new(Expr::Literal(value.into())), + ) + }), + Box::new(|rng| { + let v = loop { + let v = SimValue::arbitrary_from(rng, &column.column_type); + if &v != value { + break v; + } + }; + Expr::Binary( + Box::new(ast::Expr::Qualified( + ast::Name::new(&table_name), + ast::Name::new(&column.name), + )), + ast::Operator::Equals, + Box::new(Expr::Literal(v.into())), + ) + }), + Box::new(|rng| { + let gt_value = GTValue::arbitrary_from(rng, value).0; + Expr::Binary( + Box::new(ast::Expr::Qualified( + ast::Name::new(&table_name), + ast::Name::new(&column.name), + )), + ast::Operator::Greater, + Box::new(Expr::Literal(gt_value.into())), + ) + }), + Box::new(|rng| { + let lt_value = LTValue::arbitrary_from(rng, value).0; + Expr::Binary( + Box::new(ast::Expr::Qualified( + ast::Name::new(&table_name), + ast::Name::new(&column.name), + )), + ast::Operator::Less, + Box::new(Expr::Literal(lt_value.into())), + ) + }), + ], + rng, + ); + Predicate(expr) + } +} + +impl SimplePredicate { + /// Generates a true [ast::Expr::Binary] [SimplePredicate] from a [TableContext] for a row in the table + pub fn true_binary( + rng: &mut R, + table: &T, + row: &[SimValue], + ) -> Self { + // Pick a random column + let columns = table.columns().collect::>(); + let column_index = rng.random_range(0..columns.len()); + let column = columns[column_index]; + let column_value = &row[column_index]; + let table_name = column.table_name; + // Avoid creation of NULLs + if row.is_empty() { + return SimplePredicate(Predicate(Expr::Literal(SimValue::TRUE.into()))); + } + + let expr = one_of( + vec![ + Box::new(|_rng| { + Expr::Binary( + Box::new(ast::Expr::Qualified( + ast::Name::new(table_name), + ast::Name::new(&column.column.name), + )), + ast::Operator::Equals, + Box::new(Expr::Literal(column_value.into())), + ) + }), + Box::new(|rng| { + let lt_value = LTValue::arbitrary_from(rng, column_value).0; + Expr::Binary( + Box::new(Expr::Qualified( + ast::Name::new(table_name), + ast::Name::new(&column.column.name), + )), + ast::Operator::Greater, + Box::new(Expr::Literal(lt_value.into())), + ) + }), + Box::new(|rng| { + let gt_value = GTValue::arbitrary_from(rng, column_value).0; + Expr::Binary( + Box::new(Expr::Qualified( + ast::Name::new(table_name), + ast::Name::new(&column.column.name), + )), + ast::Operator::Less, + Box::new(Expr::Literal(gt_value.into())), + ) + }), + ], + rng, + ); + SimplePredicate(Predicate(expr)) + } + + /// Generates a false [ast::Expr::Binary] [SimplePredicate] from a [TableContext] for a row in the table + pub fn false_binary( + rng: &mut R, + table: &T, + row: &[SimValue], + ) -> Self { + let columns = table.columns().collect::>(); + // Pick a random column + let column_index = rng.random_range(0..columns.len()); + let column = columns[column_index]; + let column_value = &row[column_index]; + let table_name = column.table_name; + // Avoid creation of NULLs + if row.is_empty() { + return SimplePredicate(Predicate(Expr::Literal(SimValue::FALSE.into()))); + } + + let expr = one_of( + vec![ + Box::new(|_rng| { + Expr::Binary( + Box::new(Expr::Qualified( + ast::Name::new(table_name), + ast::Name::new(&column.column.name), + )), + ast::Operator::NotEquals, + Box::new(Expr::Literal(column_value.into())), + ) + }), + Box::new(|rng| { + let gt_value = GTValue::arbitrary_from(rng, column_value).0; + Expr::Binary( + Box::new(ast::Expr::Qualified( + ast::Name::new(table_name), + ast::Name::new(&column.column.name), + )), + ast::Operator::Greater, + Box::new(Expr::Literal(gt_value.into())), + ) + }), + Box::new(|rng| { + let lt_value = LTValue::arbitrary_from(rng, column_value).0; + Expr::Binary( + Box::new(ast::Expr::Qualified( + ast::Name::new(table_name), + ast::Name::new(&column.column.name), + )), + ast::Operator::Less, + Box::new(Expr::Literal(lt_value.into())), + ) + }), + ], + rng, + ); + SimplePredicate(Predicate(expr)) + } +} + +impl CompoundPredicate { + /// Decide if you want to create an AND or an OR + /// + /// Creates a Compound Predicate that is TRUE or FALSE for at least a single row + pub fn from_table_binary( + rng: &mut R, + table: &T, + predicate_value: bool, + ) -> Self { + // Cannot pick a row if the table is empty + let rows = table.rows(); + if rows.is_empty() { + return Self(if predicate_value { + Predicate::true_() + } else { + Predicate::false_() + }); + } + let row = pick(rows, rng); + + let predicate = if rng.random_bool(0.7) { + // An AND for true requires each of its children to be true + // An AND for false requires at least one of its children to be false + if predicate_value { + (0..rng.random_range(1..=3)) + .map(|_| SimplePredicate::arbitrary_from(rng, (table, row, true)).0) + .reduce(|accum, curr| { + Predicate(Expr::Binary( + Box::new(accum.0), + ast::Operator::And, + Box::new(curr.0), + )) + }) + .unwrap_or(Predicate::true_()) + } else { + // Create a vector of random booleans + let mut booleans = (0..rng.random_range(1..=3)) + .map(|_| rng.random_bool(0.5)) + .collect::>(); + + let len = booleans.len(); + + // Make sure at least one of them is false + if booleans.iter().all(|b| *b) { + booleans[rng.random_range(0..len)] = false; + } + + booleans + .iter() + .map(|b| SimplePredicate::arbitrary_from(rng, (table, row, *b)).0) + .reduce(|accum, curr| { + Predicate(Expr::Binary( + Box::new(accum.0), + ast::Operator::And, + Box::new(curr.0), + )) + }) + .unwrap_or(Predicate::false_()) + } + } else { + // An OR for true requires at least one of its children to be true + // An OR for false requires each of its children to be false + if predicate_value { + // Create a vector of random booleans + let mut booleans = (0..rng.random_range(1..=3)) + .map(|_| rng.random_bool(0.5)) + .collect::>(); + let len = booleans.len(); + // Make sure at least one of them is true + if booleans.iter().all(|b| !*b) { + booleans[rng.random_range(0..len)] = true; + } + + booleans + .iter() + .map(|b| SimplePredicate::arbitrary_from(rng, (table, row, *b)).0) + .reduce(|accum, curr| { + Predicate(Expr::Binary( + Box::new(accum.0), + ast::Operator::Or, + Box::new(curr.0), + )) + }) + .unwrap_or(Predicate::true_()) + } else { + (0..rng.random_range(1..=3)) + .map(|_| SimplePredicate::arbitrary_from(rng, (table, row, false)).0) + .reduce(|accum, curr| { + Predicate(Expr::Binary( + Box::new(accum.0), + ast::Operator::Or, + Box::new(curr.0), + )) + }) + .unwrap_or(Predicate::false_()) + } + }; + Self(predicate) + } +} + +#[cfg(test)] +mod tests { + use rand::{Rng as _, SeedableRng as _}; + use rand_chacha::ChaCha8Rng; + + use crate::{ + generation::{pick, predicate::SimplePredicate, Arbitrary, ArbitraryFrom as _}, + model::{ + query::predicate::{expr_to_value, Predicate}, + table::{SimValue, Table}, + }, + }; + + fn get_seed() -> u64 { + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_secs() + } + + #[test] + fn fuzz_true_binary_predicate() { + let seed = get_seed(); + let mut rng = ChaCha8Rng::seed_from_u64(seed); + for _ in 0..10000 { + let table = Table::arbitrary(&mut rng); + let num_rows = rng.random_range(1..10); + let values: Vec> = (0..num_rows) + .map(|_| { + table + .columns + .iter() + .map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type)) + .collect() + }) + .collect(); + let row = pick(&values, &mut rng); + let predicate = Predicate::true_binary(&mut rng, &table, row); + let value = expr_to_value(&predicate.0, row, &table); + assert!( + value.as_ref().is_some_and(|value| value.as_bool()), + "Predicate: {predicate:#?}\nValue: {value:#?}\nSeed: {seed}" + ) + } + } + + #[test] + fn fuzz_false_binary_predicate() { + let seed = get_seed(); + let mut rng = ChaCha8Rng::seed_from_u64(seed); + for _ in 0..10000 { + let table = Table::arbitrary(&mut rng); + let num_rows = rng.random_range(1..10); + let values: Vec> = (0..num_rows) + .map(|_| { + table + .columns + .iter() + .map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type)) + .collect() + }) + .collect(); + let row = pick(&values, &mut rng); + let predicate = Predicate::false_binary(&mut rng, &table, row); + let value = expr_to_value(&predicate.0, row, &table); + assert!( + !value.as_ref().is_some_and(|value| value.as_bool()), + "Predicate: {predicate:#?}\nValue: {value:#?}\nSeed: {seed}" + ) + } + } + + #[test] + fn fuzz_true_binary_simple_predicate() { + let seed = get_seed(); + let mut rng = ChaCha8Rng::seed_from_u64(seed); + for _ in 0..10000 { + let mut table = Table::arbitrary(&mut rng); + let num_rows = rng.random_range(1..10); + let values: Vec> = (0..num_rows) + .map(|_| { + table + .columns + .iter() + .map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type)) + .collect() + }) + .collect(); + table.rows.extend(values.clone()); + let row = pick(&table.rows, &mut rng); + let predicate = SimplePredicate::true_binary(&mut rng, &table, row); + let result = values + .iter() + .map(|row| predicate.0.test(row, &table)) + .reduce(|accum, curr| accum || curr) + .unwrap_or(false); + assert!(result, "Predicate: {predicate:#?}\nSeed: {seed}") + } + } + + #[test] + fn fuzz_false_binary_simple_predicate() { + let seed = get_seed(); + let mut rng = ChaCha8Rng::seed_from_u64(seed); + for _ in 0..10000 { + let mut table = Table::arbitrary(&mut rng); + let num_rows = rng.random_range(1..10); + let values: Vec> = (0..num_rows) + .map(|_| { + table + .columns + .iter() + .map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type)) + .collect() + }) + .collect(); + table.rows.extend(values.clone()); + let row = pick(&table.rows, &mut rng); + let predicate = SimplePredicate::false_binary(&mut rng, &table, row); + let result = values + .iter() + .map(|row| predicate.0.test(row, &table)) + .any(|res| !res); + assert!(result, "Predicate: {predicate:#?}\nSeed: {seed}") + } + } +} diff --git a/sql_generation/generation/predicate/mod.rs b/sql_generation/generation/predicate/mod.rs new file mode 100644 index 000000000..0a06dead0 --- /dev/null +++ b/sql_generation/generation/predicate/mod.rs @@ -0,0 +1,378 @@ +use rand::{seq::SliceRandom as _, Rng}; +use turso_parser::ast::{self, Expr}; + +use crate::model::{ + query::predicate::Predicate, + table::{SimValue, Table, TableContext}, +}; + +use super::{one_of, ArbitraryFrom}; + +mod binary; +mod unary; + +#[derive(Debug)] +struct CompoundPredicate(Predicate); + +#[derive(Debug)] +struct SimplePredicate(Predicate); + +impl, T: TableContext> ArbitraryFrom<(&T, A, bool)> for SimplePredicate { + fn arbitrary_from(rng: &mut R, (table, row, predicate_value): (&T, A, bool)) -> Self { + let row = row.as_ref(); + // Pick an operator + let choice = rng.random_range(0..2); + // Pick an operator + match predicate_value { + true => match choice { + 0 => SimplePredicate::true_binary(rng, table, row), + 1 => SimplePredicate::true_unary(rng, table, row), + _ => unreachable!(), + }, + false => match choice { + 0 => SimplePredicate::false_binary(rng, table, row), + 1 => SimplePredicate::false_unary(rng, table, row), + _ => unreachable!(), + }, + } + } +} + +impl ArbitraryFrom<(&T, bool)> for CompoundPredicate { + fn arbitrary_from(rng: &mut R, (table, predicate_value): (&T, bool)) -> Self { + CompoundPredicate::from_table_binary(rng, table, predicate_value) + } +} + +impl ArbitraryFrom<&T> for Predicate { + fn arbitrary_from(rng: &mut R, table: &T) -> Self { + let predicate_value = rng.random_bool(0.5); + Predicate::arbitrary_from(rng, (table, predicate_value)).parens() + } +} + +impl ArbitraryFrom<(&T, bool)> for Predicate { + fn arbitrary_from(rng: &mut R, (table, predicate_value): (&T, bool)) -> Self { + CompoundPredicate::arbitrary_from(rng, (table, predicate_value)).0 + } +} + +impl ArbitraryFrom<(&str, &SimValue)> for Predicate { + fn arbitrary_from(rng: &mut R, (column_name, value): (&str, &SimValue)) -> Self { + Predicate::from_column_binary(rng, column_name, value) + } +} + +impl ArbitraryFrom<(&Table, &Vec)> for Predicate { + fn arbitrary_from(rng: &mut R, (t, row): (&Table, &Vec)) -> Self { + // We want to produce a predicate that is true for the row + // We can do this by creating several predicates that + // are true, some that are false, combiend them in ways that correspond to the creation of a true predicate + + // Produce some true and false predicates + let mut true_predicates = (1..=rng.random_range(1..=4)) + .map(|_| Predicate::true_binary(rng, t, row)) + .collect::>(); + + let false_predicates = (0..=rng.random_range(0..=3)) + .map(|_| Predicate::false_binary(rng, t, row)) + .collect::>(); + + // Start building a top level predicate from a true predicate + let mut result = true_predicates.pop().unwrap(); + + let mut predicates = true_predicates + .iter() + .map(|p| (true, p.clone())) + .chain(false_predicates.iter().map(|p| (false, p.clone()))) + .collect::>(); + + predicates.shuffle(rng); + + while !predicates.is_empty() { + // Create a new predicate from at least 1 and at most 3 predicates + let context = + predicates[0..rng.random_range(0..=usize::min(3, predicates.len()))].to_vec(); + // Shift `predicates` to remove the predicates in the context + predicates = predicates[context.len()..].to_vec(); + + // `result` is true, so we have the following three options to make a true predicate: + // T or F + // T or T + // T and T + + result = one_of( + vec![ + // T or (X1 or X2 or ... or Xn) + Box::new(|_| { + Predicate(Expr::Binary( + Box::new(result.0.clone()), + ast::Operator::Or, + Box::new( + context + .iter() + .map(|(_, p)| p.clone()) + .reduce(|accum, curr| { + Predicate(Expr::Binary( + Box::new(accum.0), + ast::Operator::Or, + Box::new(curr.0), + )) + }) + .unwrap_or(Predicate::false_()) + .0, + ), + )) + }), + // T or (T1 and T2 and ... and Tn) + Box::new(|_| { + Predicate(Expr::Binary( + Box::new(result.0.clone()), + ast::Operator::Or, + Box::new( + context + .iter() + .map(|(_, p)| p.clone()) + .reduce(|accum, curr| { + Predicate(Expr::Binary( + Box::new(accum.0), + ast::Operator::And, + Box::new(curr.0), + )) + }) + .unwrap_or(Predicate::true_()) + .0, + ), + )) + }), + // T and T + Box::new(|_| { + // Check if all the predicates in the context are true + if context.iter().all(|(b, _)| *b) { + // T and (X1 or X2 or ... or Xn) + Predicate(Expr::Binary( + Box::new(result.0.clone()), + ast::Operator::And, + Box::new( + context + .iter() + .map(|(_, p)| p.clone()) + .reduce(|accum, curr| { + Predicate(Expr::Binary( + Box::new(accum.0), + ast::Operator::And, + Box::new(curr.0), + )) + }) + .unwrap_or(Predicate::true_()) + .0, + ), + )) + } + // Check if there is at least one true predicate + else if context.iter().any(|(b, _)| *b) { + // T and (X1 or X2 or ... or Xn) + Predicate(Expr::Binary( + Box::new(result.0.clone()), + ast::Operator::And, + Box::new( + context + .iter() + .map(|(_, p)| p.clone()) + .reduce(|accum, curr| { + Predicate(Expr::Binary( + Box::new(accum.0), + ast::Operator::Or, + Box::new(curr.0), + )) + }) + .unwrap_or(Predicate::false_()) + .0, + ), + )) + // Predicate::And(vec![ + // result.clone(), + // Predicate::Or(context.iter().map(|(_, p)| p.clone()).collect()), + // ]) + } else { + // T and (X1 or X2 or ... or Xn or TRUE) + Predicate(Expr::Binary( + Box::new(result.0.clone()), + ast::Operator::And, + Box::new( + context + .iter() + .map(|(_, p)| p.clone()) + .chain(std::iter::once(Predicate::true_())) + .reduce(|accum, curr| { + Predicate(Expr::Binary( + Box::new(accum.0), + ast::Operator::Or, + Box::new(curr.0), + )) + }) + .unwrap() // Chain guarantees at least one value + .0, + ), + )) + } + }), + ], + rng, + ); + } + result + } +} + +#[cfg(test)] +mod tests { + use rand::{Rng as _, SeedableRng as _}; + use rand_chacha::ChaCha8Rng; + + use crate::{ + generation::{pick, predicate::SimplePredicate, Arbitrary, ArbitraryFrom as _}, + model::{ + query::predicate::{expr_to_value, Predicate}, + table::{SimValue, Table}, + }, + }; + + fn get_seed() -> u64 { + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_secs() + } + + #[test] + fn fuzz_arbitrary_table_true_simple_predicate() { + let seed = get_seed(); + let mut rng = ChaCha8Rng::seed_from_u64(seed); + for _ in 0..10000 { + let table = Table::arbitrary(&mut rng); + let num_rows = rng.random_range(1..10); + let values: Vec> = (0..num_rows) + .map(|_| { + table + .columns + .iter() + .map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type)) + .collect() + }) + .collect(); + let row = pick(&values, &mut rng); + let predicate = SimplePredicate::arbitrary_from(&mut rng, (&table, row, true)).0; + let value = expr_to_value(&predicate.0, row, &table); + assert!( + value.as_ref().is_some_and(|value| value.as_bool()), + "Predicate: {predicate:#?}\nValue: {value:#?}\nSeed: {seed}" + ) + } + } + + #[test] + fn fuzz_arbitrary_table_false_simple_predicate() { + let seed = get_seed(); + let mut rng = ChaCha8Rng::seed_from_u64(seed); + for _ in 0..10000 { + let table = Table::arbitrary(&mut rng); + let num_rows = rng.random_range(1..10); + let values: Vec> = (0..num_rows) + .map(|_| { + table + .columns + .iter() + .map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type)) + .collect() + }) + .collect(); + let row = pick(&values, &mut rng); + let predicate = SimplePredicate::arbitrary_from(&mut rng, (&table, row, false)).0; + let value = expr_to_value(&predicate.0, row, &table); + assert!( + !value.as_ref().is_some_and(|value| value.as_bool()), + "Predicate: {predicate:#?}\nValue: {value:#?}\nSeed: {seed}" + ) + } + } + + #[test] + fn fuzz_arbitrary_row_table_predicate() { + let seed = get_seed(); + let mut rng = ChaCha8Rng::seed_from_u64(seed); + for _ in 0..10000 { + let table = Table::arbitrary(&mut rng); + let num_rows = rng.random_range(1..10); + let values: Vec> = (0..num_rows) + .map(|_| { + table + .columns + .iter() + .map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type)) + .collect() + }) + .collect(); + let row = pick(&values, &mut rng); + let predicate = Predicate::arbitrary_from(&mut rng, (&table, row)); + let value = expr_to_value(&predicate.0, row, &table); + assert!( + value.as_ref().is_some_and(|value| value.as_bool()), + "Predicate: {predicate:#?}\nValue: {value:#?}\nSeed: {seed}" + ) + } + } + + #[test] + fn fuzz_arbitrary_true_table_predicate() { + let seed = get_seed(); + let mut rng = ChaCha8Rng::seed_from_u64(seed); + for _ in 0..10000 { + let mut table = Table::arbitrary(&mut rng); + let num_rows = rng.random_range(1..10); + let values: Vec> = (0..num_rows) + .map(|_| { + table + .columns + .iter() + .map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type)) + .collect() + }) + .collect(); + table.rows.extend(values.clone()); + let predicate = Predicate::arbitrary_from(&mut rng, (&table, true)); + let result = values + .iter() + .map(|row| predicate.test(row, &table)) + .reduce(|accum, curr| accum || curr) + .unwrap_or(false); + assert!(result, "Predicate: {predicate:#?}\nSeed: {seed}") + } + } + + #[test] + fn fuzz_arbitrary_false_table_predicate() { + let seed = get_seed(); + let mut rng = ChaCha8Rng::seed_from_u64(seed); + for _ in 0..10000 { + let mut table = Table::arbitrary(&mut rng); + let num_rows = rng.random_range(1..10); + let values: Vec> = (0..num_rows) + .map(|_| { + table + .columns + .iter() + .map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type)) + .collect() + }) + .collect(); + table.rows.extend(values.clone()); + let predicate = Predicate::arbitrary_from(&mut rng, (&table, false)); + let result = values + .iter() + .map(|row| predicate.test(row, &table)) + .any(|res| !res); + assert!(result, "Predicate: {predicate:#?}\nSeed: {seed}") + } + } +} diff --git a/sql_generation/generation/predicate/unary.rs b/sql_generation/generation/predicate/unary.rs new file mode 100644 index 000000000..6800740d7 --- /dev/null +++ b/sql_generation/generation/predicate/unary.rs @@ -0,0 +1,306 @@ +//! Contains code regarding generation for [ast::Expr::Unary] Predicate +//! TODO: for now just generating [ast::Literal], but want to also generate Columns and any +//! arbitrary [ast::Expr] + +use turso_parser::ast::{self, Expr}; + +use crate::{ + generation::{backtrack, pick, predicate::SimplePredicate, ArbitraryFromMaybe}, + model::{ + query::predicate::Predicate, + table::{SimValue, TableContext}, + }, +}; + +pub struct TrueValue(pub SimValue); + +impl ArbitraryFromMaybe<&SimValue> for TrueValue { + fn arbitrary_from_maybe(_rng: &mut R, value: &SimValue) -> Option + where + Self: Sized, + { + // If the Value is a true value return it else you cannot return a true Value + value.as_bool().then_some(Self(value.clone())) + } +} + +impl ArbitraryFromMaybe<&Vec<&SimValue>> for TrueValue { + fn arbitrary_from_maybe(rng: &mut R, values: &Vec<&SimValue>) -> Option + where + Self: Sized, + { + if values.is_empty() { + return Some(Self(SimValue::TRUE)); + } + + let value = pick(values, rng); + Self::arbitrary_from_maybe(rng, *value) + } +} + +pub struct FalseValue(pub SimValue); + +impl ArbitraryFromMaybe<&SimValue> for FalseValue { + fn arbitrary_from_maybe(_rng: &mut R, value: &SimValue) -> Option + where + Self: Sized, + { + // If the Value is a false value return it else you cannot return a false Value + (!value.as_bool()).then_some(Self(value.clone())) + } +} + +impl ArbitraryFromMaybe<&Vec<&SimValue>> for FalseValue { + fn arbitrary_from_maybe(rng: &mut R, values: &Vec<&SimValue>) -> Option + where + Self: Sized, + { + if values.is_empty() { + return Some(Self(SimValue::FALSE)); + } + + let value = pick(values, rng); + Self::arbitrary_from_maybe(rng, *value) + } +} + +pub struct BitNotValue(pub SimValue); + +impl ArbitraryFromMaybe<(&SimValue, bool)> for BitNotValue { + fn arbitrary_from_maybe( + _rng: &mut R, + (value, predicate): (&SimValue, bool), + ) -> Option + where + Self: Sized, + { + let bit_not_val = value.unary_exec(ast::UnaryOperator::BitwiseNot); + // If you bit not the Value and it meets the predicate return Some, else None + (bit_not_val.as_bool() == predicate).then_some(BitNotValue(value.clone())) + } +} + +impl ArbitraryFromMaybe<(&Vec<&SimValue>, bool)> for BitNotValue { + fn arbitrary_from_maybe( + rng: &mut R, + (values, predicate): (&Vec<&SimValue>, bool), + ) -> Option + where + Self: Sized, + { + if values.is_empty() { + return None; + } + + let value = pick(values, rng); + Self::arbitrary_from_maybe(rng, (*value, predicate)) + } +} + +// TODO: have some more complex generation with columns names here as well +impl SimplePredicate { + /// Generates a true [ast::Expr::Unary] [SimplePredicate] from a [TableContext] for some values in the table + pub fn true_unary( + rng: &mut R, + table: &T, + row: &[SimValue], + ) -> Self { + let columns = table.columns().collect::>(); + // Pick a random column + let column_index = rng.random_range(0..columns.len()); + let column_value = &row[column_index]; + let num_retries = row.len(); + // Avoid creation of NULLs + if row.is_empty() { + return SimplePredicate(Predicate(Expr::Literal(SimValue::TRUE.into()))); + } + let expr = backtrack( + vec![ + ( + num_retries, + Box::new(|rng| { + TrueValue::arbitrary_from_maybe(rng, column_value).map(|value| { + assert!(value.0.as_bool()); + // Positive is a no-op in Sqlite + Expr::unary(ast::UnaryOperator::Positive, Expr::Literal(value.0.into())) + }) + }), + ), + // ( + // num_retries, + // Box::new(|rng| { + // TrueValue::arbitrary_from_maybe(rng, column_value).map(|value| { + // assert!(value.0.as_bool()); + // // True Value with negative is still True + // Expr::unary(ast::UnaryOperator::Negative, Expr::Literal(value.0.into())) + // }) + // }), + // ), + // ( + // num_retries, + // Box::new(|rng| { + // BitNotValue::arbitrary_from_maybe(rng, (column_value, true)).map(|value| { + // Expr::unary( + // ast::UnaryOperator::BitwiseNot, + // Expr::Literal(value.0.into()), + // ) + // }) + // }), + // ), + ( + num_retries, + Box::new(|rng| { + FalseValue::arbitrary_from_maybe(rng, column_value).map(|value| { + assert!(!value.0.as_bool()); + Expr::unary(ast::UnaryOperator::Not, Expr::Literal(value.0.into())) + }) + }), + ), + ], + rng, + ); + // If cannot generate a value + SimplePredicate(Predicate( + expr.unwrap_or(Expr::Literal(SimValue::TRUE.into())), + )) + } + + /// Generates a false [ast::Expr::Unary] [SimplePredicate] from a [TableContext] for a row in the table + pub fn false_unary( + rng: &mut R, + table: &T, + row: &[SimValue], + ) -> Self { + let columns = table.columns().collect::>(); + // Pick a random column + let column_index = rng.random_range(0..columns.len()); + let column_value = &row[column_index]; + let num_retries = row.len(); + // Avoid creation of NULLs + if row.is_empty() { + return SimplePredicate(Predicate(Expr::Literal(SimValue::FALSE.into()))); + } + let expr = backtrack( + vec![ + // ( + // num_retries, + // Box::new(|rng| { + // FalseValue::arbitrary_from_maybe(rng, column_value).map(|value| { + // assert!(!value.0.as_bool()); + // // Positive is a no-op in Sqlite + // Expr::unary(ast::UnaryOperator::Positive, Expr::Literal(value.0.into())) + // }) + // }), + // ), + // ( + // num_retries, + // Box::new(|rng| { + // FalseValue::arbitrary_from_maybe(rng, column_value).map(|value| { + // assert!(!value.0.as_bool()); + // // True Value with negative is still True + // Expr::unary(ast::UnaryOperator::Negative, Expr::Literal(value.0.into())) + // }) + // }), + // ), + // ( + // num_retries, + // Box::new(|rng| { + // BitNotValue::arbitrary_from_maybe(rng, (column_value, false)).map(|value| { + // Expr::unary( + // ast::UnaryOperator::BitwiseNot, + // Expr::Literal(value.0.into()), + // ) + // }) + // }), + // ), + ( + num_retries, + Box::new(|rng| { + TrueValue::arbitrary_from_maybe(rng, column_value).map(|value| { + assert!(value.0.as_bool()); + Expr::unary(ast::UnaryOperator::Not, Expr::Literal(value.0.into())) + }) + }), + ), + ], + rng, + ); + // If cannot generate a value + SimplePredicate(Predicate( + expr.unwrap_or(Expr::Literal(SimValue::FALSE.into())), + )) + } +} + +#[cfg(test)] +mod tests { + use rand::{Rng as _, SeedableRng as _}; + use rand_chacha::ChaCha8Rng; + + use crate::{ + generation::{pick, predicate::SimplePredicate, Arbitrary, ArbitraryFrom as _}, + model::table::{SimValue, Table}, + }; + + fn get_seed() -> u64 { + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_secs() + } + + #[test] + fn fuzz_true_unary_simple_predicate() { + let seed = get_seed(); + let mut rng = ChaCha8Rng::seed_from_u64(seed); + for _ in 0..10000 { + let mut table = Table::arbitrary(&mut rng); + let num_rows = rng.random_range(1..10); + let values: Vec> = (0..num_rows) + .map(|_| { + table + .columns + .iter() + .map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type)) + .collect() + }) + .collect(); + table.rows.extend(values.clone()); + let row = pick(&table.rows, &mut rng); + let predicate = SimplePredicate::true_unary(&mut rng, &table, row); + let result = values + .iter() + .map(|row| predicate.0.test(row, &table)) + .reduce(|accum, curr| accum || curr) + .unwrap_or(false); + assert!(result, "Predicate: {predicate:#?}\nSeed: {seed}") + } + } + + #[test] + fn fuzz_false_unary_simple_predicate() { + let seed = get_seed(); + let mut rng = ChaCha8Rng::seed_from_u64(seed); + for _ in 0..10000 { + let mut table = Table::arbitrary(&mut rng); + let num_rows = rng.random_range(1..10); + let values: Vec> = (0..num_rows) + .map(|_| { + table + .columns + .iter() + .map(|c| SimValue::arbitrary_from(&mut rng, &c.column_type)) + .collect() + }) + .collect(); + table.rows.extend(values.clone()); + let row = pick(&table.rows, &mut rng); + let predicate = SimplePredicate::false_unary(&mut rng, &table, row); + let result = values + .iter() + .map(|row| predicate.0.test(row, &table)) + .any(|res| !res); + assert!(result, "Predicate: {predicate:#?}\nSeed: {seed}") + } + } +} diff --git a/sql_generation/generation/property.rs b/sql_generation/generation/property.rs new file mode 100644 index 000000000..495f75ec7 --- /dev/null +++ b/sql_generation/generation/property.rs @@ -0,0 +1,1533 @@ +use serde::{Deserialize, Serialize}; +use turso_core::{types, LimboError}; +use turso_parser::ast::{self}; + +use crate::{ + model::{ + query::{ + predicate::Predicate, + select::{ + CompoundOperator, CompoundSelect, Distinctness, ResultColumn, SelectBody, + SelectInner, + }, + transaction::{Begin, Commit, Rollback}, + update::Update, + Create, Delete, Drop, Insert, Query, Select, + }, + table::SimValue, + }, + runner::env::SimulatorEnv, +}; + +use super::{ + frequency, pick, pick_index, + plan::{Assertion, Interaction, InteractionStats, ResultSet}, + ArbitraryFrom, +}; + +/// Properties are representations of executable specifications +/// about the database behavior. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub(crate) enum Property { + /// Insert-Select is a property in which the inserted row + /// must be in the resulting rows of a select query that has a + /// where clause that matches the inserted row. + /// The execution of the property is as follows + /// INSERT INTO VALUES (...) + /// I_0 + /// I_1 + /// ... + /// I_n + /// SELECT * FROM WHERE + /// The interactions in the middle has the following constraints; + /// - There will be no errors in the middle interactions. + /// - The inserted row will not be deleted. + /// - The inserted row will not be updated. + /// - The table `t` will not be renamed, dropped, or altered. + InsertValuesSelect { + /// The insert query + insert: Insert, + /// Selected row index + row_index: usize, + /// Additional interactions in the middle of the property + queries: Vec, + /// The select query + select: Select, + /// Interactive query information if any + interactive: Option, + }, + /// ReadYourUpdatesBack is a property in which the updated rows + /// must be in the resulting rows of a select query that has a + /// where clause that matches the updated row. + /// The execution of the property is as follows + /// UPDATE SET WHERE + /// SELECT FROM WHERE + /// These interactions are executed in immediate succession + /// just to verify the property that our updates did what they + /// were supposed to do. + ReadYourUpdatesBack { + update: Update, + select: Select, + }, + /// TableHasExpectedContent is a property in which the table + /// must have the expected content, i.e. all the insertions and + /// updates and deletions should have been persisted in the way + /// we think they were. + /// The execution of the property is as follows + /// SELECT * FROM + /// ASSERT + TableHasExpectedContent { + table: String, + }, + /// Double Create Failure is a property in which creating + /// the same table twice leads to an error. + /// The execution of the property is as follows + /// CREATE TABLE (...) + /// I_0 + /// I_1 + /// ... + /// I_n + /// CREATE TABLE (...) -> Error + /// The interactions in the middle has the following constraints; + /// - There will be no errors in the middle interactions. + /// - Table `t` will not be renamed or dropped. + DoubleCreateFailure { + /// The create query + create: Create, + /// Additional interactions in the middle of the property + queries: Vec, + }, + /// Select Limit is a property in which the select query + /// has a limit clause that is respected by the query. + /// The execution of the property is as follows + /// SELECT * FROM WHERE LIMIT + /// This property is a single-interaction property. + /// The interaction has the following constraints; + /// - The select query will respect the limit clause. + SelectLimit { + /// The select query + select: Select, + }, + /// Delete-Select is a property in which the deleted row + /// must not be in the resulting rows of a select query that has a + /// where clause that matches the deleted row. In practice, `p1` of + /// the delete query will be used as the predicate for the select query, + /// hence the select should return NO ROWS. + /// The execution of the property is as follows + /// DELETE FROM WHERE + /// I_0 + /// I_1 + /// ... + /// I_n + /// SELECT * FROM WHERE + /// The interactions in the middle has the following constraints; + /// - There will be no errors in the middle interactions. + /// - A row that holds for the predicate will not be inserted. + /// - The table `t` will not be renamed, dropped, or altered. + DeleteSelect { + table: String, + predicate: Predicate, + queries: Vec, + }, + /// Drop-Select is a property in which selecting from a dropped table + /// should result in an error. + /// The execution of the property is as follows + /// DROP TABLE + /// I_0 + /// I_1 + /// ... + /// I_n + /// SELECT * FROM WHERE -> Error + /// The interactions in the middle has the following constraints; + /// - There will be no errors in the middle interactions. + /// - The table `t` will not be created, no table will be renamed to `t`. + DropSelect { + table: String, + queries: Vec, + select: Select, + }, + /// Select-Select-Optimizer is a property in which we test the optimizer by + /// running two equivalent select queries, one with `SELECT from ` + /// and the other with `SELECT * from WHERE `. As highlighted by + /// Rigger et al. in Non-Optimizing Reference Engine Construction(NoREC), SQLite + /// tends to optimize `where` statements while keeping the result column expressions + /// unoptimized. This property is used to test the optimizer. The property is successful + /// if the two queries return the same number of rows. + SelectSelectOptimizer { + table: String, + predicate: Predicate, + }, + /// Where-True-False-Null is a property that tests the boolean logic implementation + /// in the database. It relies on the fact that `P == true || P == false || P == null` should return true, + /// as SQLite uses a ternary logic system. This property is invented in "Finding Bugs in Database Systems via Query Partitioning" + /// by Rigger et al. and it is canonically called Ternary Logic Partitioning (TLP). + WhereTrueFalseNull { + select: Select, + predicate: Predicate, + }, + /// UNION-ALL-Preserves-Cardinality is a property that tests the UNION ALL operator + /// implementation in the database. It relies on the fact that `SELECT * FROM WHERE UNION ALL SELECT * FROM WHERE ` + /// should return the same number of rows as `SELECT FROM WHERE `. + /// > The property is succesfull when the UNION ALL of 2 select queries returns the same number of rows + /// > as the sum of the two select queries. + UNIONAllPreservesCardinality { + select: Select, + where_clause: Predicate, + }, + /// FsyncNoWait is a property which tests if we do not loose any data after not waiting for fsync. + /// + /// # Interactions + /// - Executes the `query` without waiting for fsync + /// - Drop all connections and Reopen the database + /// - Execute the `query` again + /// - Query tables to assert that the values were inserted + /// + FsyncNoWait { + query: Query, + tables: Vec, + }, + FaultyQuery { + query: Query, + tables: Vec, + }, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct InteractiveQueryInfo { + start_with_immediate: bool, + end_with_commit: bool, +} + +impl Property { + pub(crate) fn name(&self) -> &str { + match self { + Property::InsertValuesSelect { .. } => "Insert-Values-Select", + Property::ReadYourUpdatesBack { .. } => "Read-Your-Updates-Back", + Property::TableHasExpectedContent { .. } => "Table-Has-Expected-Content", + Property::DoubleCreateFailure { .. } => "Double-Create-Failure", + Property::SelectLimit { .. } => "Select-Limit", + Property::DeleteSelect { .. } => "Delete-Select", + Property::DropSelect { .. } => "Drop-Select", + Property::SelectSelectOptimizer { .. } => "Select-Select-Optimizer", + Property::WhereTrueFalseNull { .. } => "Where-True-False-Null", + Property::FsyncNoWait { .. } => "FsyncNoWait", + Property::FaultyQuery { .. } => "FaultyQuery", + Property::UNIONAllPreservesCardinality { .. } => "UNION-All-Preserves-Cardinality", + } + } + /// interactions construct a list of interactions, which is an executable representation of the property. + /// the requirement of property -> vec conversion emerges from the need to serialize the property, + /// and `interaction` cannot be serialized directly. + pub(crate) fn interactions(&self) -> Vec { + match self { + Property::TableHasExpectedContent { table } => { + let table = table.to_string(); + let table_name = table.clone(); + let assumption = Interaction::Assumption(Assertion { + name: format!("table {} exists", table.clone()), + func: Box::new(move |_: &Vec, env: &mut SimulatorEnv| { + if env.tables.iter().any(|t| t.name == table_name) { + Ok(Ok(())) + } else { + Ok(Err(format!("table {table_name} does not exist"))) + } + }), + }); + + let select_interaction = Interaction::Query(Query::Select(Select::simple( + table.clone(), + Predicate::true_(), + ))); + + let assertion = Interaction::Assertion(Assertion { + name: format!("table {} should have the expected content", table.clone()), + func: Box::new(move |stack: &Vec, env| { + let rows = stack.last().unwrap(); + let Ok(rows) = rows else { + return Ok(Err(format!("expected rows but got error: {rows:?}"))); + }; + let sim_table = env + .tables + .iter() + .find(|t| t.name == table) + .expect("table should be in enviroment"); + if rows.len() != sim_table.rows.len() { + return Ok(Err(format!( + "expected {} rows but got {} for table {}", + sim_table.rows.len(), + rows.len(), + table.clone() + ))); + } + for expected_row in sim_table.rows.iter() { + if !rows.contains(expected_row) { + return Ok(Err(format!( + "expected row {:?} not found in table {}", + expected_row, + table.clone() + ))); + } + } + Ok(Ok(())) + }), + }); + + vec![assumption, select_interaction, assertion] + } + Property::ReadYourUpdatesBack { update, select } => { + let table = update.table().to_string(); + let assumption = Interaction::Assumption(Assertion { + name: format!("table {} exists", table.clone()), + func: Box::new(move |_: &Vec, env: &mut SimulatorEnv| { + if env.tables.iter().any(|t| t.name == table.clone()) { + Ok(Ok(())) + } else { + Ok(Err(format!("table {} does not exist", table.clone()))) + } + }), + }); + + let update_interaction = Interaction::Query(Query::Update(update.clone())); + let select_interaction = Interaction::Query(Query::Select(select.clone())); + + let update = update.clone(); + + let table = update.table().to_string(); + + let assertion = Interaction::Assertion(Assertion { + name: format!( + "updated rows should be found and have the updated values for table {}", + table.clone() + ), + func: Box::new(move |stack: &Vec, _| { + let rows = stack.last().unwrap(); + match rows { + Ok(rows) => { + for row in rows { + for (i, (col, val)) in update.set_values.iter().enumerate() { + if &row[i] != val { + return Ok(Err(format!("updated row {} has incorrect value for column {col}: expected {val}, got {}", i, row[i]))); + } + } + } + Ok(Ok(())) + } + Err(err) => Err(LimboError::InternalError(err.to_string())), + } + }), + }); + + vec![ + assumption, + update_interaction, + select_interaction, + assertion, + ] + } + Property::InsertValuesSelect { + insert, + row_index, + queries, + select, + interactive, + } => { + let (table, values) = if let Insert::Values { table, values } = insert { + (table, values) + } else { + unreachable!( + "insert query should be Insert::Values for Insert-Values-Select property" + ) + }; + // Check that the insert query has at least 1 value + assert!( + !values.is_empty(), + "insert query should have at least 1 value" + ); + + // Pick a random row within the insert values + let row = values[*row_index].clone(); + + // Assume that the table exists + let assumption = Interaction::Assumption(Assertion { + name: format!("table {} exists", insert.table()), + func: Box::new({ + let table_name = table.clone(); + move |_: &Vec, env: &mut SimulatorEnv| { + if env.tables.iter().any(|t| t.name == table_name) { + Ok(Ok(())) + } else { + Ok(Err(format!("table {table_name} does not exist"))) + } + } + }), + }); + + let assertion = Interaction::Assertion(Assertion { + name: format!( + "row [{:?}] should be found in table {}, interactive={} commit={}, rollback={}", + row.iter().map(|v| v.to_string()).collect::>(), + insert.table(), + interactive.is_some(), + interactive + .as_ref() + .map(|i| i.end_with_commit) + .unwrap_or(false), + interactive + .as_ref() + .map(|i| !i.end_with_commit) + .unwrap_or(false), + ), + func: Box::new(move |stack: &Vec, _| { + let rows = stack.last().unwrap(); + match rows { + Ok(rows) => { + let found = rows.iter().any(|r| r == &row); + if found { + Ok(Ok(())) + } else { + Ok(Err(format!("row [{:?}] not found in table", row.iter().map(|v| v.to_string()).collect::>()))) + } + } + Err(err) => Err(LimboError::InternalError(err.to_string())), + } + }), + }); + + let mut interactions = Vec::new(); + interactions.push(assumption); + interactions.push(Interaction::Query(Query::Insert(insert.clone()))); + interactions.extend(queries.clone().into_iter().map(Interaction::Query)); + interactions.push(Interaction::Query(Query::Select(select.clone()))); + interactions.push(assertion); + + interactions + } + Property::DoubleCreateFailure { create, queries } => { + let table_name = create.table.name.clone(); + + let assumption = Interaction::Assumption(Assertion { + name: "Double-Create-Failure should not be called on an existing table" + .to_string(), + func: Box::new(move |_: &Vec, env: &mut SimulatorEnv| { + if !env.tables.iter().any(|t| t.name == table_name) { + Ok(Ok(())) + } else { + Ok(Err(format!("table {table_name} already exists"))) + } + }), + }); + + let cq1 = Interaction::Query(Query::Create(create.clone())); + let cq2 = Interaction::Query(Query::Create(create.clone())); + + let table_name = create.table.name.clone(); + + let assertion = Interaction::Assertion(Assertion { + name: + "creating two tables with the name should result in a failure for the second query" + .to_string(), + func: Box::new(move |stack: &Vec, _| { + let last = stack.last().unwrap(); + match last { + Ok(success) => Ok(Err(format!("expected table creation to fail but it succeeded: {success:?}"))), + Err(e) => { + if e.to_string().to_lowercase().contains(&format!("table {table_name} already exists")) { + Ok(Ok(())) + } else { + Ok(Err(format!("expected table already exists error, got: {e}"))) + } + } + } + }), + }); + + let mut interactions = Vec::new(); + interactions.push(assumption); + interactions.push(cq1); + interactions.extend(queries.clone().into_iter().map(Interaction::Query)); + interactions.push(cq2); + interactions.push(assertion); + + interactions + } + Property::SelectLimit { select } => { + let assumption = Interaction::Assumption(Assertion { + name: format!( + "table ({}) exists", + select + .dependencies() + .into_iter() + .collect::>() + .join(", ") + ), + func: Box::new({ + let table_name = select.dependencies(); + move |_: &Vec, env: &mut SimulatorEnv| { + if table_name + .iter() + .all(|table| env.tables.iter().any(|t| t.name == *table)) + { + Ok(Ok(())) + } else { + let missing_tables = table_name + .iter() + .filter(|t| !env.tables.iter().any(|t2| t2.name == **t)) + .collect::>(); + Ok(Err(format!("missing tables: {missing_tables:?}"))) + } + } + }), + }); + + let limit = select + .limit + .expect("Property::SelectLimit without a LIMIT clause"); + + let assertion = Interaction::Assertion(Assertion { + name: "select query should respect the limit clause".to_string(), + func: Box::new(move |stack: &Vec, _| { + let last = stack.last().unwrap(); + match last { + Ok(rows) => { + if limit >= rows.len() { + Ok(Ok(())) + } else { + Ok(Err(format!( + "limit {} violated: got {} rows", + limit, + rows.len() + ))) + } + } + Err(_) => Ok(Ok(())), + } + }), + }); + + vec![ + assumption, + Interaction::Query(Query::Select(select.clone())), + assertion, + ] + } + Property::DeleteSelect { + table, + predicate, + queries, + } => { + let assumption = Interaction::Assumption(Assertion { + name: format!("table {table} exists"), + func: Box::new({ + let table = table.clone(); + move |_: &Vec, env: &mut SimulatorEnv| { + if env.tables.iter().any(|t| t.name == table) { + Ok(Ok(())) + } else { + { + let available_tables: Vec = + env.tables.iter().map(|t| t.name.clone()).collect(); + Ok(Err(format!( + "table \'{table}\' not found. Available tables: {available_tables:?}" + ))) + } + } + } + }), + }); + + let delete = Interaction::Query(Query::Delete(Delete { + table: table.clone(), + predicate: predicate.clone(), + })); + + let select = Interaction::Query(Query::Select(Select::simple( + table.clone(), + predicate.clone(), + ))); + + let assertion = Interaction::Assertion(Assertion { + name: format!("`{select}` should return no values for table `{table}`",), + func: Box::new(move |stack: &Vec, _| { + let rows = stack.last().unwrap(); + match rows { + Ok(rows) => { + if rows.is_empty() { + Ok(Ok(())) + } else { + Ok(Err(format!( + "expected no rows but got {} rows: {:?}", + rows.len(), + rows.iter() + .map(|r| print_row(r)) + .collect::>() + .join(", ") + ))) + } + } + Err(err) => Err(LimboError::InternalError(err.to_string())), + } + }), + }); + + let mut interactions = Vec::new(); + interactions.push(assumption); + interactions.push(delete); + interactions.extend(queries.clone().into_iter().map(Interaction::Query)); + interactions.push(select); + interactions.push(assertion); + + interactions + } + Property::DropSelect { + table, + queries, + select, + } => { + let assumption = Interaction::Assumption(Assertion { + name: format!("table {table} exists"), + func: Box::new({ + let table = table.clone(); + move |_, env: &mut SimulatorEnv| { + if env.tables.iter().any(|t| t.name == table) { + Ok(Ok(())) + } else { + { + let available_tables: Vec = + env.tables.iter().map(|t| t.name.clone()).collect(); + Ok(Err(format!( + "table \'{table}\' not found. Available tables: {available_tables:?}" + ))) + } + } + } + }), + }); + + let table_name = table.clone(); + + let assertion = Interaction::Assertion(Assertion { + name: format!("select query should result in an error for table '{table}'"), + func: Box::new(move |stack: &Vec, _| { + let last = stack.last().unwrap(); + match last { + Ok(success) => Ok(Err(format!( + "expected table creation to fail but it succeeded: {success:?}" + ))), + Err(e) => { + if e.to_string() + .contains(&format!("Table {table_name} does not exist")) + { + Ok(Ok(())) + } else { + Ok(Err(format!( + "expected table does not exist error, got: {e}" + ))) + } + } + } + }), + }); + + let drop = Interaction::Query(Query::Drop(Drop { + table: table.clone(), + })); + + let select = Interaction::Query(Query::Select(select.clone())); + + let mut interactions = Vec::new(); + + interactions.push(assumption); + interactions.push(drop); + interactions.extend(queries.clone().into_iter().map(Interaction::Query)); + interactions.push(select); + interactions.push(assertion); + + interactions + } + Property::SelectSelectOptimizer { table, predicate } => { + let assumption = Interaction::Assumption(Assertion { + name: format!("table {table} exists"), + func: Box::new({ + let table = table.clone(); + move |_: &Vec, env: &mut SimulatorEnv| { + if env.tables.iter().any(|t| t.name == table) { + Ok(Ok(())) + } else { + { + let available_tables: Vec = + env.tables.iter().map(|t| t.name.clone()).collect(); + Ok(Err(format!( + "table \'{table}\' not found. Available tables: {available_tables:?}" + ))) + } + } + } + }), + }); + + let select1 = Interaction::Query(Query::Select(Select::single( + table.clone(), + vec![ResultColumn::Expr(predicate.clone())], + Predicate::true_(), + None, + Distinctness::All, + ))); + + let select2_query = Query::Select(Select::simple(table.clone(), predicate.clone())); + + let select2 = Interaction::Query(select2_query); + + let assertion = Interaction::Assertion(Assertion { + name: "select queries should return the same amount of results".to_string(), + func: Box::new(move |stack: &Vec, _| { + let select_star = stack.last().unwrap(); + let select_predicate = stack.get(stack.len() - 2).unwrap(); + match (select_predicate, select_star) { + (Ok(rows1), Ok(rows2)) => { + // If rows1 results have more than 1 column, there is a problem + if rows1.iter().any(|vs| vs.len() > 1) { + return Err(LimboError::InternalError( + "Select query without the star should return only one column".to_string(), + )); + } + // Count the 1s in the select query without the star + let rows1_count = rows1 + .iter() + .filter(|vs| { + let v = vs.first().unwrap(); + v.as_bool() + }) + .count(); + tracing::debug!( + "select1 returned {} rows, select2 returned {} rows", + rows1_count, + rows2.len() + ); + if rows1_count == rows2.len() { + Ok(Ok(())) + } else { + Ok(Err(format!( + "row counts don't match: {} vs {}", + rows1_count, + rows2.len() + ))) + } + } + (Err(e1), Err(e2)) => { + tracing::debug!("Error in select1 AND select2: {}, {}", e1, e2); + Ok(Ok(())) + } + (Err(e), _) | (_, Err(e)) => { + tracing::error!("Error in select1 OR select2: {}", e); + Err(LimboError::InternalError(e.to_string())) + } + } + }), + }); + + vec![assumption, select1, select2, assertion] + } + Property::FsyncNoWait { query, tables } => { + let checks = assert_all_table_values(tables); + Vec::from_iter( + std::iter::once(Interaction::FsyncQuery(query.clone())).chain(checks), + ) + } + Property::FaultyQuery { query, tables } => { + let checks = assert_all_table_values(tables); + let query_clone = query.clone(); + let assert = Assertion { + // A fault may not occur as we first signal we want a fault injected, + // then when IO is called the fault triggers. It may happen that a fault is injected + // but no IO happens right after it + name: "fault occured".to_string(), + func: Box::new(move |stack, env: &mut SimulatorEnv| { + let last = stack.last().unwrap(); + match last { + Ok(_) => { + let _ = query_clone.shadow(&mut env.tables); + Ok(Ok(())) + } + Err(err) => { + // We cannot make any assumptions about the error content; all we are about is, if the statement errored, + // we don't shadow the results into the simulator env, i.e. we assume whatever the statement did was rolled back. + tracing::error!("Fault injection produced error: {err}"); + Ok(Ok(())) + } + } + }), + }; + let first = [ + Interaction::FaultyQuery(query.clone()), + Interaction::Assertion(assert), + ] + .into_iter(); + Vec::from_iter(first.chain(checks)) + } + Property::WhereTrueFalseNull { select, predicate } => { + let assumption = Interaction::Assumption(Assertion { + name: format!( + "tables ({}) exists", + select + .dependencies() + .into_iter() + .collect::>() + .join(", ") + ), + func: Box::new({ + let tables = select.dependencies(); + move |_: &Vec, env: &mut SimulatorEnv| { + if tables + .iter() + .all(|table| env.tables.iter().any(|t| t.name == *table)) + { + Ok(Ok(())) + } else { + let missing_tables = tables + .iter() + .filter(|t| !env.tables.iter().any(|t2| t2.name == **t)) + .collect::>(); + Ok(Err(format!("missing tables: {missing_tables:?}"))) + } + } + }), + }); + + let old_predicate = select.body.select.where_clause.clone(); + + let p_true = Predicate::and(vec![old_predicate.clone(), predicate.clone()]); + let p_false = Predicate::and(vec![ + old_predicate.clone(), + Predicate::not(predicate.clone()), + ]); + let p_null = Predicate::and(vec![ + old_predicate.clone(), + Predicate::is(predicate.clone(), Predicate::null()), + ]); + + let select_tlp = Select { + body: SelectBody { + select: Box::new(SelectInner { + distinctness: select.body.select.distinctness, + columns: select.body.select.columns.clone(), + from: select.body.select.from.clone(), + where_clause: p_true, + order_by: None, + }), + compounds: vec![ + CompoundSelect { + operator: CompoundOperator::UnionAll, + select: Box::new(SelectInner { + distinctness: select.body.select.distinctness, + columns: select.body.select.columns.clone(), + from: select.body.select.from.clone(), + where_clause: p_false, + order_by: None, + }), + }, + CompoundSelect { + operator: CompoundOperator::UnionAll, + select: Box::new(SelectInner { + distinctness: select.body.select.distinctness, + columns: select.body.select.columns.clone(), + from: select.body.select.from.clone(), + where_clause: p_null, + order_by: None, + }), + }, + ], + }, + limit: None, + }; + + let select = Interaction::Query(Query::Select(select.clone())); + let select_tlp = Interaction::Query(Query::Select(select_tlp)); + + // select and select_tlp should return the same rows + let assertion = Interaction::Assertion(Assertion { + name: "select and select_tlp should return the same rows".to_string(), + func: Box::new(move |stack: &Vec, _: &mut SimulatorEnv| { + if stack.len() < 2 { + return Err(LimboError::InternalError( + "Not enough result sets on the stack".to_string(), + )); + } + + let select_result_set = stack.get(stack.len() - 2).unwrap(); + let select_tlp_result_set = stack.last().unwrap(); + + match (select_result_set, select_tlp_result_set) { + (Ok(select_rows), Ok(select_tlp_rows)) => { + if select_rows.len() != select_tlp_rows.len() { + return Ok(Err(format!("row count mismatch: select returned {} rows, select_tlp returned {} rows", select_rows.len(), select_tlp_rows.len()))); + } + // Check if any row in select_rows is not in select_tlp_rows + for row in select_rows.iter() { + if !select_tlp_rows.iter().any(|r| r == row) { + tracing::debug!( + "select and select_tlp returned different rows, ({}) is in select but not in select_tlp", + row.iter().map(|v| v.to_string()).collect::>().join(", ") + ); + return Ok(Err(format!( + "row mismatch: row [{}] exists in select results but not in select_tlp results", + print_row(row) + ))); + } + } + // Check if any row in select_tlp_rows is not in select_rows + for row in select_tlp_rows.iter() { + if !select_rows.iter().any(|r| r == row) { + tracing::debug!( + "select and select_tlp returned different rows, ({}) is in select_tlp but not in select", + row.iter().map(|v| v.to_string()).collect::>().join(", ") + ); + + return Ok(Err(format!( + "row mismatch: row [{}] exists in select_tlp but not in select", + print_row(row) + ))); + } + } + // If we reach here, the rows are the same + tracing::trace!( + "select and select_tlp returned the same rows: {:?}", + select_rows + ); + + Ok(Ok(())) + } + (Err(e), _) | (_, Err(e)) => { + tracing::error!("Error in select or select_tlp: {}", e); + Err(LimboError::InternalError(e.to_string())) + } + } + }), + }); + + vec![assumption, select, select_tlp, assertion] + } + Property::UNIONAllPreservesCardinality { + select, + where_clause, + } => { + let s1 = select.clone(); + let mut s2 = select.clone(); + s2.body.select.where_clause = where_clause.clone(); + let s3 = Select::compound(s1.clone(), s2.clone(), CompoundOperator::UnionAll); + + vec![ + Interaction::Query(Query::Select(s1.clone())), + Interaction::Query(Query::Select(s2.clone())), + Interaction::Query(Query::Select(s3.clone())), + Interaction::Assertion(Assertion { + name: "UNION ALL should preserve cardinality".to_string(), + func: Box::new(move |stack: &Vec, _: &mut SimulatorEnv| { + if stack.len() < 3 { + return Err(LimboError::InternalError( + "Not enough result sets on the stack".to_string(), + )); + } + + let select1 = stack.get(stack.len() - 3).unwrap(); + let select2 = stack.get(stack.len() - 2).unwrap(); + let union_all = stack.last().unwrap(); + + match (select1, select2, union_all) { + (Ok(rows1), Ok(rows2), Ok(union_rows)) => { + let count1 = rows1.len(); + let count2 = rows2.len(); + let union_count = union_rows.len(); + if union_count == count1 + count2 { + Ok(Ok(())) + } else { + Ok(Err(format!("UNION ALL should preserve cardinality but it didn't: {count1} + {count2} != {union_count}"))) + } + } + (Err(e), _, _) | (_, Err(e), _) | (_, _, Err(e)) => { + tracing::error!("Error in select queries: {}", e); + Err(LimboError::InternalError(e.to_string())) + } + } + }), + }), + ] + } + } + } +} + +fn assert_all_table_values(tables: &[String]) -> impl Iterator + use<'_> { + let checks = tables.iter().flat_map(|table| { + let select = Interaction::Query(Query::Select(Select::simple( + table.clone(), + Predicate::true_(), + ))); + + let assertion = Interaction::Assertion(Assertion { + name: format!("table {table} should contain all of its expected values"), + func: Box::new({ + let table = table.clone(); + move |stack: &Vec, env: &mut SimulatorEnv| { + let table = env.tables.iter().find(|t| t.name == table).ok_or_else(|| { + LimboError::InternalError(format!( + "table {table} should exist in simulator env" + )) + })?; + let last = stack.last().unwrap(); + match last { + Ok(vals) => { + // Check if all values in the table are present in the result set + // Find a value in the table that is not in the result set + let model_contains_db = table.rows.iter().find(|v| { + !vals.iter().any(|r| { + &r == v + }) + }); + let db_contains_model = vals.iter().find(|v| { + !table.rows.iter().any(|r| &r == v) + }); + + if let Some(model_contains_db) = model_contains_db { + tracing::debug!( + "table {} does not contain the expected values, the simulator model has more rows than the database: {:?}", + table.name, + print_row(model_contains_db) + ); + Ok(Err(format!("table {} does not contain the expected values, the simulator model has more rows than the database: {:?}", table.name, print_row(model_contains_db)))) + } else if let Some(db_contains_model) = db_contains_model { + tracing::debug!( + "table {} does not contain the expected values, the database has more rows than the simulator model: {:?}", + table.name, + print_row(db_contains_model) + ); + Ok(Err(format!("table {} does not contain the expected values, the database has more rows than the simulator model: {:?}", table.name, print_row(db_contains_model)))) + } else { + Ok(Ok(())) + } + } + Err(err) => Err(LimboError::InternalError(format!("{err}"))), + } + } + }), + }); + [select, assertion].into_iter() + }); + checks +} + +#[derive(Debug)] +pub(crate) struct Remaining { + pub(crate) read: f64, + pub(crate) write: f64, + pub(crate) create: f64, + pub(crate) create_index: f64, + pub(crate) delete: f64, + pub(crate) update: f64, + pub(crate) drop: f64, +} + +pub(crate) fn remaining(env: &SimulatorEnv, stats: &InteractionStats) -> Remaining { + let remaining_read = ((env.opts.max_interactions as f64 * env.opts.read_percent / 100.0) + - (stats.read_count as f64)) + .max(0.0); + let remaining_write = ((env.opts.max_interactions as f64 * env.opts.write_percent / 100.0) + - (stats.write_count as f64)) + .max(0.0); + let remaining_create = ((env.opts.max_interactions as f64 * env.opts.create_percent / 100.0) + - (stats.create_count as f64)) + .max(0.0); + + let remaining_create_index = + ((env.opts.max_interactions as f64 * env.opts.create_index_percent / 100.0) + - (stats.create_index_count as f64)) + .max(0.0); + + let remaining_delete = ((env.opts.max_interactions as f64 * env.opts.delete_percent / 100.0) + - (stats.delete_count as f64)) + .max(0.0); + let remaining_update = ((env.opts.max_interactions as f64 * env.opts.update_percent / 100.0) + - (stats.update_count as f64)) + .max(0.0); + let remaining_drop = ((env.opts.max_interactions as f64 * env.opts.drop_percent / 100.0) + - (stats.drop_count as f64)) + .max(0.0); + + Remaining { + read: remaining_read, + write: remaining_write, + create: remaining_create, + create_index: remaining_create_index, + delete: remaining_delete, + drop: remaining_drop, + update: remaining_update, + } +} + +fn property_insert_values_select( + rng: &mut R, + env: &SimulatorEnv, + remaining: &Remaining, +) -> Property { + // Get a random table + let table = pick(&env.tables, rng); + // Generate rows to insert + let rows = (0..rng.random_range(1..=5)) + .map(|_| Vec::::arbitrary_from(rng, table)) + .collect::>(); + + // Pick a random row to select + let row_index = pick_index(rows.len(), rng); + let row = rows[row_index].clone(); + + // Insert the rows + let insert_query = Insert::Values { + table: table.name.clone(), + values: rows, + }; + + // Choose if we want queries to be executed in an interactive transaction + let interactive = if rng.random_bool(0.5) { + Some(InteractiveQueryInfo { + start_with_immediate: rng.random_bool(0.5), + end_with_commit: rng.random_bool(0.5), + }) + } else { + None + }; + // Create random queries respecting the constraints + let mut queries = Vec::new(); + // - [x] There will be no errors in the middle interactions. (this constraint is impossible to check, so this is just best effort) + // - [x] The inserted row will not be deleted. + // - [x] The inserted row will not be updated. + // - [ ] The table `t` will not be renamed, dropped, or altered. (todo: add this constraint once ALTER or DROP is implemented) + if let Some(ref interactive) = interactive { + queries.push(Query::Begin(Begin { + immediate: interactive.start_with_immediate, + })); + } + for _ in 0..rng.random_range(0..3) { + let query = Query::arbitrary_from(rng, (env, remaining)); + match &query { + Query::Delete(Delete { + table: t, + predicate, + }) => { + // The inserted row will not be deleted. + if t == &table.name && predicate.test(&row, table) { + continue; + } + } + Query::Create(Create { table: t }) => { + // There will be no errors in the middle interactions. + // - Creating the same table is an error + if t.name == table.name { + continue; + } + } + Query::Update(Update { + table: t, + set_values: _, + predicate, + }) => { + // The inserted row will not be updated. + if t == &table.name && predicate.test(&row, table) { + continue; + } + } + _ => (), + } + queries.push(query); + } + if let Some(ref interactive) = interactive { + queries.push(if interactive.end_with_commit { + Query::Commit(Commit) + } else { + Query::Rollback(Rollback) + }); + } + + // Select the row + let select_query = Select::simple( + table.name.clone(), + Predicate::arbitrary_from(rng, (table, &row)), + ); + + Property::InsertValuesSelect { + insert: insert_query, + row_index, + queries, + select: select_query, + interactive, + } +} + +fn property_read_your_updates_back(rng: &mut R, env: &SimulatorEnv) -> Property { + // e.g. UPDATE t SET a=1, b=2 WHERE c=1; + let update = Update::arbitrary_from(rng, env); + // e.g. SELECT a, b FROM t WHERE c=1; + let select = Select::single( + update.table().to_string(), + update + .set_values + .iter() + .map(|(col, _)| ResultColumn::Column(col.clone())) + .collect(), + update.predicate.clone(), + None, + Distinctness::All, + ); + + Property::ReadYourUpdatesBack { update, select } +} + +fn property_table_has_expected_content(rng: &mut R, env: &SimulatorEnv) -> Property { + // Get a random table + let table = pick(&env.tables, rng); + Property::TableHasExpectedContent { + table: table.name.clone(), + } +} + +fn property_select_limit(rng: &mut R, env: &SimulatorEnv) -> Property { + // Get a random table + let table = pick(&env.tables, rng); + // Select the table + let select = Select::single( + table.name.clone(), + vec![ResultColumn::Star], + Predicate::arbitrary_from(rng, table), + Some(rng.random_range(1..=5)), + Distinctness::All, + ); + Property::SelectLimit { select } +} + +fn property_double_create_failure( + rng: &mut R, + env: &SimulatorEnv, + remaining: &Remaining, +) -> Property { + // Get a random table + let table = pick(&env.tables, rng); + // Create the table + let create_query = Create { + table: table.clone(), + }; + + // Create random queries respecting the constraints + let mut queries = Vec::new(); + // The interactions in the middle has the following constraints; + // - [x] There will be no errors in the middle interactions.(best effort) + // - [ ] Table `t` will not be renamed or dropped.(todo: add this constraint once ALTER or DROP is implemented) + for _ in 0..rng.random_range(0..3) { + let query = Query::arbitrary_from(rng, (env, remaining)); + if let Query::Create(Create { table: t }) = &query { + // There will be no errors in the middle interactions. + // - Creating the same table is an error + if t.name == table.name { + continue; + } + } + queries.push(query); + } + + Property::DoubleCreateFailure { + create: create_query, + queries, + } +} + +fn property_delete_select( + rng: &mut R, + env: &SimulatorEnv, + remaining: &Remaining, +) -> Property { + // Get a random table + let table = pick(&env.tables, rng); + // Generate a random predicate + let predicate = Predicate::arbitrary_from(rng, table); + + // Create random queries respecting the constraints + let mut queries = Vec::new(); + // - [x] There will be no errors in the middle interactions. (this constraint is impossible to check, so this is just best effort) + // - [x] A row that holds for the predicate will not be inserted. + // - [ ] The table `t` will not be renamed, dropped, or altered. (todo: add this constraint once ALTER or DROP is implemented) + for _ in 0..rng.random_range(0..3) { + let query = Query::arbitrary_from(rng, (env, remaining)); + match &query { + Query::Insert(Insert::Values { table: t, values }) => { + // A row that holds for the predicate will not be inserted. + if t == &table.name && values.iter().any(|v| predicate.test(v, table)) { + continue; + } + } + Query::Insert(Insert::Select { + table: t, + select: _, + }) => { + // A row that holds for the predicate will not be inserted. + if t == &table.name { + continue; + } + } + Query::Update(Update { table: t, .. }) => { + // A row that holds for the predicate will not be updated. + if t == &table.name { + continue; + } + } + Query::Create(Create { table: t }) => { + // There will be no errors in the middle interactions. + // - Creating the same table is an error + if t.name == table.name { + continue; + } + } + _ => (), + } + queries.push(query); + } + + Property::DeleteSelect { + table: table.name.clone(), + predicate, + queries, + } +} + +fn property_drop_select( + rng: &mut R, + env: &SimulatorEnv, + remaining: &Remaining, +) -> Property { + // Get a random table + let table = pick(&env.tables, rng); + + // Create random queries respecting the constraints + let mut queries = Vec::new(); + // - [x] There will be no errors in the middle interactions. (this constraint is impossible to check, so this is just best effort) + // - [-] The table `t` will not be created, no table will be renamed to `t`. (todo: update this constraint once ALTER is implemented) + for _ in 0..rng.random_range(0..3) { + let query = Query::arbitrary_from(rng, (env, remaining)); + if let Query::Create(Create { table: t }) = &query { + // - The table `t` will not be created + if t.name == table.name { + continue; + } + } + queries.push(query); + } + + let select = Select::simple(table.name.clone(), Predicate::arbitrary_from(rng, table)); + + Property::DropSelect { + table: table.name.clone(), + queries, + select, + } +} + +fn property_select_select_optimizer(rng: &mut R, env: &SimulatorEnv) -> Property { + // Get a random table + let table = pick(&env.tables, rng); + // Generate a random predicate + let predicate = Predicate::arbitrary_from(rng, table); + // Transform into a Binary predicate to force values to be casted to a bool + let expr = ast::Expr::Binary( + Box::new(predicate.0), + ast::Operator::And, + Box::new(Predicate::true_().0), + ); + + Property::SelectSelectOptimizer { + table: table.name.clone(), + predicate: Predicate(expr), + } +} + +fn property_where_true_false_null(rng: &mut R, env: &SimulatorEnv) -> Property { + // Get a random table + let table = pick(&env.tables, rng); + // Generate a random predicate + let p1 = Predicate::arbitrary_from(rng, table); + let p2 = Predicate::arbitrary_from(rng, table); + + // Create the select query + let select = Select::simple(table.name.clone(), p1); + + Property::WhereTrueFalseNull { + select, + predicate: p2, + } +} + +fn property_union_all_preserves_cardinality( + rng: &mut R, + env: &SimulatorEnv, +) -> Property { + // Get a random table + let table = pick(&env.tables, rng); + // Generate a random predicate + let p1 = Predicate::arbitrary_from(rng, table); + let p2 = Predicate::arbitrary_from(rng, table); + + // Create the select query + let select = Select::single( + table.name.clone(), + vec![ResultColumn::Star], + p1, + None, + Distinctness::All, + ); + + Property::UNIONAllPreservesCardinality { + select, + where_clause: p2, + } +} + +fn property_fsync_no_wait( + rng: &mut R, + env: &SimulatorEnv, + remaining: &Remaining, +) -> Property { + Property::FsyncNoWait { + query: Query::arbitrary_from(rng, (env, remaining)), + tables: env.tables.iter().map(|t| t.name.clone()).collect(), + } +} + +fn property_faulty_query( + rng: &mut R, + env: &SimulatorEnv, + remaining: &Remaining, +) -> Property { + Property::FaultyQuery { + query: Query::arbitrary_from(rng, (env, remaining)), + tables: env.tables.iter().map(|t| t.name.clone()).collect(), + } +} + +impl ArbitraryFrom<(&SimulatorEnv, &InteractionStats)> for Property { + fn arbitrary_from( + rng: &mut R, + (env, stats): (&SimulatorEnv, &InteractionStats), + ) -> Self { + let remaining_ = remaining(env, stats); + + frequency( + vec![ + ( + if !env.opts.disable_insert_values_select { + f64::min(remaining_.read, remaining_.write) + } else { + 0.0 + }, + Box::new(|rng: &mut R| property_insert_values_select(rng, env, &remaining_)), + ), + ( + remaining_.read, + Box::new(|rng: &mut R| property_table_has_expected_content(rng, env)), + ), + ( + f64::min(remaining_.read, remaining_.write), + Box::new(|rng: &mut R| property_read_your_updates_back(rng, env)), + ), + ( + if !env.opts.disable_double_create_failure { + remaining_.create / 2.0 + } else { + 0.0 + }, + Box::new(|rng: &mut R| property_double_create_failure(rng, env, &remaining_)), + ), + ( + if !env.opts.disable_select_limit { + remaining_.read + } else { + 0.0 + }, + Box::new(|rng: &mut R| property_select_limit(rng, env)), + ), + ( + if !env.opts.disable_delete_select { + f64::min(remaining_.read, remaining_.write).min(remaining_.delete) + } else { + 0.0 + }, + Box::new(|rng: &mut R| property_delete_select(rng, env, &remaining_)), + ), + ( + if !env.opts.disable_drop_select { + // remaining_.drop + 0.0 + } else { + 0.0 + }, + Box::new(|rng: &mut R| property_drop_select(rng, env, &remaining_)), + ), + ( + if !env.opts.disable_select_optimizer { + remaining_.read / 2.0 + } else { + 0.0 + }, + Box::new(|rng: &mut R| property_select_select_optimizer(rng, env)), + ), + ( + if env.opts.experimental_indexes && !env.opts.disable_where_true_false_null { + remaining_.read / 2.0 + } else { + 0.0 + }, + Box::new(|rng: &mut R| property_where_true_false_null(rng, env)), + ), + ( + if env.opts.experimental_indexes + && !env.opts.disable_union_all_preserves_cardinality + { + remaining_.read / 3.0 + } else { + 0.0 + }, + Box::new(|rng: &mut R| property_union_all_preserves_cardinality(rng, env)), + ), + ( + if !env.opts.disable_fsync_no_wait { + 50.0 // Freestyle number + } else { + 0.0 + }, + Box::new(|rng: &mut R| property_fsync_no_wait(rng, env, &remaining_)), + ), + ( + if !env.opts.disable_faulty_query { + 20.0 + } else { + 0.0 + }, + Box::new(|rng: &mut R| property_faulty_query(rng, env, &remaining_)), + ), + ], + rng, + ) + } +} + +fn print_row(row: &[SimValue]) -> String { + row.iter() + .map(|v| match &v.0 { + types::Value::Null => "NULL".to_string(), + types::Value::Integer(i) => i.to_string(), + types::Value::Float(f) => f.to_string(), + types::Value::Text(t) => t.to_string(), + types::Value::Blob(b) => format!( + "X'{}'", + b.iter() + .fold(String::new(), |acc, b| acc + &format!("{b:02X}")) + ), + }) + .collect::>() + .join(", ") +} diff --git a/sql_generation/generation/query.rs b/sql_generation/generation/query.rs new file mode 100644 index 000000000..eff24613c --- /dev/null +++ b/sql_generation/generation/query.rs @@ -0,0 +1,447 @@ +use crate::generation::{ + gen_random_text, pick_n_unique, Arbitrary, ArbitraryFrom, ArbitrarySizedFrom, +}; +use crate::model::query::predicate::Predicate; +use crate::model::query::select::{ + CompoundOperator, CompoundSelect, Distinctness, FromClause, OrderBy, ResultColumn, SelectBody, + SelectInner, +}; +use crate::model::query::update::Update; +use crate::model::query::{Create, CreateIndex, Delete, Drop, Insert, Query, Select}; +use crate::model::table::{JoinTable, JoinType, JoinedTable, SimValue, Table, TableContext}; +use crate::SimulatorEnv; +use itertools::Itertools; +use rand::Rng; +use turso_parser::ast::{Expr, SortOrder}; + +use super::property::Remaining; +use super::{backtrack, frequency, pick}; + +impl Arbitrary for Create { + fn arbitrary(rng: &mut R) -> Self { + Create { + table: Table::arbitrary(rng), + } + } +} + +impl ArbitraryFrom<&Vec> for FromClause { + fn arbitrary_from(rng: &mut R, tables: &Vec
) -> Self { + let num_joins = match rng.random_range(0..=100) { + 0..=90 => 0, + 91..=97 => 1, + 98..=100 => 2, + _ => unreachable!(), + }; + + let mut tables = tables.clone(); + let mut table = pick(&tables, rng).clone(); + + tables.retain(|t| t.name != table.name); + + let name = table.name.clone(); + + let mut table_context = JoinTable { + tables: Vec::new(), + rows: Vec::new(), + }; + + let joins: Vec<_> = (0..num_joins) + .filter_map(|_| { + if tables.is_empty() { + return None; + } + let join_table = pick(&tables, rng).clone(); + let joined_table_name = join_table.name.clone(); + + tables.retain(|t| t.name != join_table.name); + table_context.rows = table_context + .rows + .iter() + .cartesian_product(join_table.rows.iter()) + .map(|(t_row, j_row)| { + let mut row = t_row.clone(); + row.extend(j_row.clone()); + row + }) + .collect(); + // TODO: inneficient. use a Deque to push_front? + table_context.tables.insert(0, join_table); + for row in &mut table.rows { + assert_eq!( + row.len(), + table.columns.len(), + "Row length does not match column length after join" + ); + } + + let predicate = Predicate::arbitrary_from(rng, &table); + Some(JoinedTable { + table: joined_table_name, + join_type: JoinType::Inner, + on: predicate, + }) + }) + .collect(); + FromClause { table: name, joins } + } +} + +impl ArbitraryFrom<&SimulatorEnv> for SelectInner { + fn arbitrary_from(rng: &mut R, env: &SimulatorEnv) -> Self { + let from = FromClause::arbitrary_from(rng, &env.tables); + let mut tables = env.tables.clone(); + // todo: this is a temporary hack because env is not separated from the tables + let join_table = from + .shadow(&mut tables) + .expect("Failed to shadow FromClause"); + let cuml_col_count = join_table.columns().count(); + + let order_by = 'order_by: { + if rng.random_bool(0.3) { + let order_by_table_candidates = from + .joins + .iter() + .map(|j| j.table.clone()) + .chain(std::iter::once(from.table.clone())) + .collect::>(); + let order_by_col_count = + (rng.random::() * rng.random::() * (cuml_col_count as f64)) as usize; // skew towards 0 + if order_by_col_count == 0 { + break 'order_by None; + } + let mut col_names = std::collections::HashSet::new(); + let mut order_by_cols = Vec::new(); + while order_by_cols.len() < order_by_col_count { + let table = pick(&order_by_table_candidates, rng); + let table = tables.iter().find(|t| t.name == *table).unwrap(); + let col = pick(&table.columns, rng); + let col_name = format!("{}.{}", table.name, col.name); + if col_names.insert(col_name.clone()) { + order_by_cols.push(( + col_name, + if rng.random_bool(0.5) { + SortOrder::Asc + } else { + SortOrder::Desc + }, + )); + } + } + Some(OrderBy { + columns: order_by_cols, + }) + } else { + None + } + }; + + SelectInner { + distinctness: if env.opts.experimental_indexes { + Distinctness::arbitrary(rng) + } else { + Distinctness::All + }, + columns: vec![ResultColumn::Star], + from: Some(from), + where_clause: Predicate::arbitrary_from(rng, &join_table), + order_by, + } + } +} + +impl ArbitrarySizedFrom<&SimulatorEnv> for SelectInner { + fn arbitrary_sized_from( + rng: &mut R, + env: &SimulatorEnv, + num_result_columns: usize, + ) -> Self { + let mut select_inner = SelectInner::arbitrary_from(rng, env); + let select_from = &select_inner.from.as_ref().unwrap(); + let table_names = select_from + .joins + .iter() + .map(|j| j.table.clone()) + .chain(std::iter::once(select_from.table.clone())) + .collect::>(); + + let flat_columns_names = table_names + .iter() + .flat_map(|t| { + env.tables + .iter() + .find(|table| table.name == *t) + .unwrap() + .columns + .iter() + .map(|c| format!("{}.{}", t.clone(), c.name)) + }) + .collect::>(); + let selected_columns = pick_unique(&flat_columns_names, num_result_columns, rng); + let mut columns = Vec::new(); + for column_name in selected_columns { + columns.push(ResultColumn::Column(column_name.clone())); + } + select_inner.columns = columns; + select_inner + } +} + +impl Arbitrary for Distinctness { + fn arbitrary(rng: &mut R) -> Self { + match rng.random_range(0..=5) { + 0..4 => Distinctness::All, + _ => Distinctness::Distinct, + } + } +} +impl Arbitrary for CompoundOperator { + fn arbitrary(rng: &mut R) -> Self { + match rng.random_range(0..=1) { + 0 => CompoundOperator::Union, + 1 => CompoundOperator::UnionAll, + _ => unreachable!(), + } + } +} + +/// SelectFree is a wrapper around Select that allows for arbitrary generation +/// of selects without requiring a specific environment, which is useful for generating +/// arbitrary expressions without referring to the tables. +pub(crate) struct SelectFree(pub(crate) Select); + +impl ArbitraryFrom<&SimulatorEnv> for SelectFree { + fn arbitrary_from(rng: &mut R, env: &SimulatorEnv) -> Self { + let expr = Predicate(Expr::arbitrary_sized_from(rng, env, 8)); + let select = Select::expr(expr); + Self(select) + } +} + +impl ArbitraryFrom<&SimulatorEnv> for Select { + fn arbitrary_from(rng: &mut R, env: &SimulatorEnv) -> Self { + // Generate a number of selects based on the query size + // If experimental indexes are enabled, we can have selects with compounds + // Otherwise, we just have a single select with no compounds + let num_compound_selects = if env.opts.experimental_indexes { + match rng.random_range(0..=100) { + 0..=95 => 0, + 96..=99 => 1, + 100 => 2, + _ => unreachable!(), + } + } else { + 0 + }; + + let min_column_count_across_tables = + env.tables.iter().map(|t| t.columns.len()).min().unwrap(); + + let num_result_columns = rng.random_range(1..=min_column_count_across_tables); + + let mut first = SelectInner::arbitrary_sized_from(rng, env, num_result_columns); + + let mut rest: Vec = (0..num_compound_selects) + .map(|_| SelectInner::arbitrary_sized_from(rng, env, num_result_columns)) + .collect(); + + if !rest.is_empty() { + // ORDER BY is not supported in compound selects yet + first.order_by = None; + for s in &mut rest { + s.order_by = None; + } + } + + Self { + body: SelectBody { + select: Box::new(first), + compounds: rest + .into_iter() + .map(|s| CompoundSelect { + operator: CompoundOperator::arbitrary(rng), + select: Box::new(s), + }) + .collect(), + }, + limit: None, + } + } +} + +impl ArbitraryFrom<&SimulatorEnv> for Insert { + fn arbitrary_from(rng: &mut R, env: &SimulatorEnv) -> Self { + let gen_values = |rng: &mut R| { + let table = pick(&env.tables, rng); + let num_rows = rng.random_range(1..10); + let values: Vec> = (0..num_rows) + .map(|_| { + table + .columns + .iter() + .map(|c| SimValue::arbitrary_from(rng, &c.column_type)) + .collect() + }) + .collect(); + Some(Insert::Values { + table: table.name.clone(), + values, + }) + }; + + let _gen_select = |rng: &mut R| { + // Find a non-empty table + let select_table = env.tables.iter().find(|t| !t.rows.is_empty())?; + let row = pick(&select_table.rows, rng); + let predicate = Predicate::arbitrary_from(rng, (select_table, row)); + // Pick another table to insert into + let select = Select::simple(select_table.name.clone(), predicate); + let table = pick(&env.tables, rng); + Some(Insert::Select { + table: table.name.clone(), + select: Box::new(select), + }) + }; + + // TODO: Add back gen_select when https://github.com/tursodatabase/turso/issues/2129 is fixed. + // Backtrack here cannot return None + backtrack(vec![(1, Box::new(gen_values))], rng).unwrap() + } +} + +impl ArbitraryFrom<&SimulatorEnv> for Delete { + fn arbitrary_from(rng: &mut R, env: &SimulatorEnv) -> Self { + let table = pick(&env.tables, rng); + Self { + table: table.name.clone(), + predicate: Predicate::arbitrary_from(rng, table), + } + } +} + +impl ArbitraryFrom<&SimulatorEnv> for Drop { + fn arbitrary_from(rng: &mut R, env: &SimulatorEnv) -> Self { + let table = pick(&env.tables, rng); + Self { + table: table.name.clone(), + } + } +} + +impl ArbitraryFrom<&SimulatorEnv> for CreateIndex { + fn arbitrary_from(rng: &mut R, env: &SimulatorEnv) -> Self { + assert!( + !env.tables.is_empty(), + "Cannot create an index when no tables exist in the environment." + ); + + let table = pick(&env.tables, rng); + + if table.columns.is_empty() { + panic!( + "Cannot create an index on table '{}' as it has no columns.", + table.name + ); + } + + let num_columns_to_pick = rng.random_range(1..=table.columns.len()); + let picked_column_indices = pick_n_unique(0..table.columns.len(), num_columns_to_pick, rng); + + let columns = picked_column_indices + .into_iter() + .map(|i| { + let column = &table.columns[i]; + ( + column.name.clone(), + if rng.random_bool(0.5) { + SortOrder::Asc + } else { + SortOrder::Desc + }, + ) + }) + .collect::>(); + + let index_name = format!( + "idx_{}_{}", + table.name, + gen_random_text(rng).chars().take(8).collect::() + ); + + CreateIndex { + index_name, + table_name: table.name.clone(), + columns, + } + } +} + +impl ArbitraryFrom<(&SimulatorEnv, &Remaining)> for Query { + fn arbitrary_from(rng: &mut R, (env, remaining): (&SimulatorEnv, &Remaining)) -> Self { + frequency( + vec![ + ( + remaining.create, + Box::new(|rng| Self::Create(Create::arbitrary(rng))), + ), + ( + remaining.read, + Box::new(|rng| Self::Select(Select::arbitrary_from(rng, env))), + ), + ( + remaining.write, + Box::new(|rng| Self::Insert(Insert::arbitrary_from(rng, env))), + ), + ( + remaining.update, + Box::new(|rng| Self::Update(Update::arbitrary_from(rng, env))), + ), + ( + f64::min(remaining.write, remaining.delete), + Box::new(|rng| Self::Delete(Delete::arbitrary_from(rng, env))), + ), + ], + rng, + ) + } +} + +fn pick_unique( + items: &[T], + count: usize, + rng: &mut impl rand::Rng, +) -> Vec +where + ::Owned: PartialEq, +{ + let mut picked: Vec = Vec::new(); + while picked.len() < count { + let item = pick(items, rng); + if !picked.contains(&item.to_owned()) { + picked.push(item.to_owned()); + } + } + picked +} + +impl ArbitraryFrom<&SimulatorEnv> for Update { + fn arbitrary_from(rng: &mut R, env: &SimulatorEnv) -> Self { + let table = pick(&env.tables, rng); + let num_cols = rng.random_range(1..=table.columns.len()); + let columns = pick_unique(&table.columns, num_cols, rng); + let set_values: Vec<(String, SimValue)> = columns + .iter() + .map(|column| { + ( + column.name.clone(), + SimValue::arbitrary_from(rng, &column.column_type), + ) + }) + .collect(); + Update { + table: table.name.clone(), + set_values, + predicate: Predicate::arbitrary_from(rng, table), + } + } +} diff --git a/sql_generation/generation/table.rs b/sql_generation/generation/table.rs new file mode 100644 index 000000000..fdddb6ff2 --- /dev/null +++ b/sql_generation/generation/table.rs @@ -0,0 +1,258 @@ +use std::collections::HashSet; + +use rand::Rng; +use turso_core::Value; + +use crate::generation::{gen_random_text, pick, readable_name_custom, Arbitrary, ArbitraryFrom}; +use crate::model::table::{Column, ColumnType, Name, SimValue, Table}; + +use super::ArbitraryFromMaybe; + +impl Arbitrary for Name { + fn arbitrary(rng: &mut R) -> Self { + let name = readable_name_custom("_", rng); + Name(name.replace("-", "_")) + } +} + +impl Arbitrary for Table { + fn arbitrary(rng: &mut R) -> Self { + let name = Name::arbitrary(rng).0; + let columns = loop { + let large_table = rng.random_bool(0.1); + let column_size = if large_table { + rng.random_range(64..125) // todo: make this higher (128+) + } else { + rng.random_range(1..=10) + }; + let columns = (1..=column_size) + .map(|_| Column::arbitrary(rng)) + .collect::>(); + // TODO: see if there is a better way to detect duplicates here + let mut set = HashSet::with_capacity(columns.len()); + set.extend(columns.iter()); + // Has repeated column name inside so generate again + if set.len() != columns.len() { + continue; + } + break columns; + }; + + Table { + rows: Vec::new(), + name, + columns, + indexes: vec![], + } + } +} + +impl Arbitrary for Column { + fn arbitrary(rng: &mut R) -> Self { + let name = Name::arbitrary(rng).0; + let column_type = ColumnType::arbitrary(rng); + Self { + name, + column_type, + primary: false, + unique: false, + } + } +} + +impl Arbitrary for ColumnType { + fn arbitrary(rng: &mut R) -> Self { + pick(&[Self::Integer, Self::Float, Self::Text, Self::Blob], rng).to_owned() + } +} + +impl ArbitraryFrom<&Table> for Vec { + fn arbitrary_from(rng: &mut R, table: &Table) -> Self { + let mut row = Vec::new(); + for column in table.columns.iter() { + let value = SimValue::arbitrary_from(rng, &column.column_type); + row.push(value); + } + row + } +} + +impl ArbitraryFrom<&Vec<&SimValue>> for SimValue { + fn arbitrary_from(rng: &mut R, values: &Vec<&Self>) -> Self { + if values.is_empty() { + return Self(Value::Null); + } + + pick(values, rng).to_owned().clone() + } +} + +impl ArbitraryFrom<&ColumnType> for SimValue { + fn arbitrary_from(rng: &mut R, column_type: &ColumnType) -> Self { + let value = match column_type { + ColumnType::Integer => Value::Integer(rng.random_range(i64::MIN..i64::MAX)), + ColumnType::Float => Value::Float(rng.random_range(-1e10..1e10)), + ColumnType::Text => Value::build_text(gen_random_text(rng)), + ColumnType::Blob => Value::Blob(gen_random_text(rng).as_bytes().to_vec()), + }; + SimValue(value) + } +} + +pub(crate) struct LTValue(pub(crate) SimValue); + +impl ArbitraryFrom<&Vec<&SimValue>> for LTValue { + fn arbitrary_from(rng: &mut R, values: &Vec<&SimValue>) -> Self { + if values.is_empty() { + return Self(SimValue(Value::Null)); + } + + // Get value less than all values + let value = Value::exec_min(values.iter().map(|value| &value.0)); + Self::arbitrary_from(rng, &SimValue(value)) + } +} + +impl ArbitraryFrom<&SimValue> for LTValue { + fn arbitrary_from(rng: &mut R, value: &SimValue) -> Self { + let new_value = match &value.0 { + Value::Integer(i) => Value::Integer(rng.random_range(i64::MIN..*i - 1)), + Value::Float(f) => Value::Float(f - rng.random_range(0.0..1e10)), + value @ Value::Text(..) => { + // Either shorten the string, or make at least one character smaller and mutate the rest + let mut t = value.to_string(); + if rng.random_bool(0.01) { + t.pop(); + Value::build_text(t) + } else { + let mut t = t.chars().map(|c| c as u32).collect::>(); + let index = rng.random_range(0..t.len()); + t[index] -= 1; + // Mutate the rest of the string + for val in t.iter_mut().skip(index + 1) { + *val = rng.random_range('a' as u32..='z' as u32); + } + let t = t + .into_iter() + .map(|c| char::from_u32(c).unwrap_or('z')) + .collect::(); + Value::build_text(t) + } + } + Value::Blob(b) => { + // Either shorten the blob, or make at least one byte smaller and mutate the rest + let mut b = b.clone(); + if rng.random_bool(0.01) { + b.pop(); + Value::Blob(b) + } else { + let index = rng.random_range(0..b.len()); + b[index] -= 1; + // Mutate the rest of the blob + for val in b.iter_mut().skip(index + 1) { + *val = rng.random_range(0..=255); + } + Value::Blob(b) + } + } + _ => unreachable!(), + }; + Self(SimValue(new_value)) + } +} + +pub(crate) struct GTValue(pub(crate) SimValue); + +impl ArbitraryFrom<&Vec<&SimValue>> for GTValue { + fn arbitrary_from(rng: &mut R, values: &Vec<&SimValue>) -> Self { + if values.is_empty() { + return Self(SimValue(Value::Null)); + } + // Get value greater than all values + let value = Value::exec_max(values.iter().map(|value| &value.0)); + + Self::arbitrary_from(rng, &SimValue(value)) + } +} + +impl ArbitraryFrom<&SimValue> for GTValue { + fn arbitrary_from(rng: &mut R, value: &SimValue) -> Self { + let new_value = match &value.0 { + Value::Integer(i) => Value::Integer(rng.random_range(*i..i64::MAX)), + Value::Float(f) => Value::Float(rng.random_range(*f..1e10)), + value @ Value::Text(..) => { + // Either lengthen the string, or make at least one character smaller and mutate the rest + let mut t = value.to_string(); + if rng.random_bool(0.01) { + t.push(rng.random_range(0..=255) as u8 as char); + Value::build_text(t) + } else { + let mut t = t.chars().map(|c| c as u32).collect::>(); + let index = rng.random_range(0..t.len()); + t[index] += 1; + // Mutate the rest of the string + for val in t.iter_mut().skip(index + 1) { + *val = rng.random_range('a' as u32..='z' as u32); + } + let t = t + .into_iter() + .map(|c| char::from_u32(c).unwrap_or('a')) + .collect::(); + Value::build_text(t) + } + } + Value::Blob(b) => { + // Either lengthen the blob, or make at least one byte smaller and mutate the rest + let mut b = b.clone(); + if rng.random_bool(0.01) { + b.push(rng.random_range(0..=255)); + Value::Blob(b) + } else { + let index = rng.random_range(0..b.len()); + b[index] += 1; + // Mutate the rest of the blob + for val in b.iter_mut().skip(index + 1) { + *val = rng.random_range(0..=255); + } + Value::Blob(b) + } + } + _ => unreachable!(), + }; + Self(SimValue(new_value)) + } +} + +pub(crate) struct LikeValue(pub(crate) SimValue); + +impl ArbitraryFromMaybe<&SimValue> for LikeValue { + fn arbitrary_from_maybe(rng: &mut R, value: &SimValue) -> Option { + match &value.0 { + value @ Value::Text(..) => { + let t = value.to_string(); + let mut t = t.chars().collect::>(); + // Remove a number of characters, either insert `_` for each character removed, or + // insert one `%` for the whole substring + let mut i = 0; + while i < t.len() { + if rng.random_bool(0.1) { + t[i] = '_'; + } else if rng.random_bool(0.05) { + t[i] = '%'; + // skip a list of characters + for _ in 0..rng.random_range(0..=3.min(t.len() - i - 1)) { + t.remove(i + 1); + } + } + i += 1; + } + let index = rng.random_range(0..t.len()); + t.insert(index, '%'); + Some(Self(SimValue(Value::build_text( + t.into_iter().collect::(), + )))) + } + _ => None, + } + } +} diff --git a/sql_generation/lib.rs b/sql_generation/lib.rs index 8b1378917..f52cdebdf 100644 --- a/sql_generation/lib.rs +++ b/sql_generation/lib.rs @@ -1 +1,2 @@ - +pub mod generation; +pub mod model; diff --git a/sql_generation/model/mod.rs b/sql_generation/model/mod.rs new file mode 100644 index 000000000..e68355ee4 --- /dev/null +++ b/sql_generation/model/mod.rs @@ -0,0 +1,4 @@ +pub mod query; +pub mod table; + +pub(crate) const FAULT_ERROR_MSG: &str = "Injected fault"; diff --git a/sql_generation/model/query/create.rs b/sql_generation/model/query/create.rs new file mode 100644 index 000000000..ab0cd9789 --- /dev/null +++ b/sql_generation/model/query/create.rs @@ -0,0 +1,45 @@ +use std::fmt::Display; + +use serde::{Deserialize, Serialize}; + +use crate::{ + generation::Shadow, + model::table::{SimValue, Table}, + runner::env::SimulatorTables, +}; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub(crate) struct Create { + pub(crate) table: Table, +} + +impl Shadow for Create { + type Result = anyhow::Result>>; + + fn shadow(&self, tables: &mut SimulatorTables) -> Self::Result { + if !tables.iter().any(|t| t.name == self.table.name) { + tables.push(self.table.clone()); + Ok(vec![]) + } else { + Err(anyhow::anyhow!( + "Table {} already exists. CREATE TABLE statement ignored.", + self.table.name + )) + } + } +} + +impl Display for Create { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "CREATE TABLE {} (", self.table.name)?; + + for (i, column) in self.table.columns.iter().enumerate() { + if i != 0 { + write!(f, ",")?; + } + write!(f, "{} {}", column.name, column.column_type)?; + } + + write!(f, ")") + } +} diff --git a/sql_generation/model/query/create_index.rs b/sql_generation/model/query/create_index.rs new file mode 100644 index 000000000..cc7f7566a --- /dev/null +++ b/sql_generation/model/query/create_index.rs @@ -0,0 +1,106 @@ +use crate::{ + generation::{gen_random_text, pick, pick_n_unique, ArbitraryFrom, Shadow}, + model::table::SimValue, + runner::env::{SimulatorEnv, SimulatorTables}, +}; +use rand::Rng; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] +pub enum SortOrder { + Asc, + Desc, +} + +impl std::fmt::Display for SortOrder { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + SortOrder::Asc => write!(f, "ASC"), + SortOrder::Desc => write!(f, "DESC"), + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] +pub(crate) struct CreateIndex { + pub(crate) index_name: String, + pub(crate) table_name: String, + pub(crate) columns: Vec<(String, SortOrder)>, +} + +impl Shadow for CreateIndex { + type Result = Vec>; + fn shadow(&self, env: &mut SimulatorTables) -> Vec> { + env.tables + .iter_mut() + .find(|t| t.name == self.table_name) + .unwrap() + .indexes + .push(self.index_name.clone()); + vec![] + } +} + +impl std::fmt::Display for CreateIndex { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "CREATE INDEX {} ON {} ({})", + self.index_name, + self.table_name, + self.columns + .iter() + .map(|(name, order)| format!("{name} {order}")) + .collect::>() + .join(", ") + ) + } +} + +impl ArbitraryFrom<&SimulatorEnv> for CreateIndex { + fn arbitrary_from(rng: &mut R, env: &SimulatorEnv) -> Self { + assert!( + !env.tables.is_empty(), + "Cannot create an index when no tables exist in the environment." + ); + + let table = pick(&env.tables, rng); + + if table.columns.is_empty() { + panic!( + "Cannot create an index on table '{}' as it has no columns.", + table.name + ); + } + + let num_columns_to_pick = rng.random_range(1..=table.columns.len()); + let picked_column_indices = pick_n_unique(0..table.columns.len(), num_columns_to_pick, rng); + + let columns = picked_column_indices + .into_iter() + .map(|i| { + let column = &table.columns[i]; + ( + column.name.clone(), + if rng.random_bool(0.5) { + SortOrder::Asc + } else { + SortOrder::Desc + }, + ) + }) + .collect::>(); + + let index_name = format!( + "idx_{}_{}", + table.name, + gen_random_text(rng).chars().take(8).collect::() + ); + + CreateIndex { + index_name, + table_name: table.name.clone(), + columns, + } + } +} diff --git a/sql_generation/model/query/delete.rs b/sql_generation/model/query/delete.rs new file mode 100644 index 000000000..265cdfe96 --- /dev/null +++ b/sql_generation/model/query/delete.rs @@ -0,0 +1,41 @@ +use std::fmt::Display; + +use serde::{Deserialize, Serialize}; + +use crate::{generation::Shadow, model::table::SimValue, runner::env::SimulatorTables}; + +use super::predicate::Predicate; + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub(crate) struct Delete { + pub(crate) table: String, + pub(crate) predicate: Predicate, +} + +impl Shadow for Delete { + type Result = anyhow::Result>>; + + fn shadow(&self, tables: &mut SimulatorTables) -> Self::Result { + let table = tables.tables.iter_mut().find(|t| t.name == self.table); + + if let Some(table) = table { + // If the table exists, we can delete from it + let t2 = table.clone(); + table.rows.retain_mut(|r| !self.predicate.test(r, &t2)); + } else { + // If the table does not exist, we return an error + return Err(anyhow::anyhow!( + "Table {} does not exist. DELETE statement ignored.", + self.table + )); + } + + Ok(vec![]) + } +} + +impl Display for Delete { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "DELETE FROM {} WHERE {}", self.table, self.predicate) + } +} diff --git a/sql_generation/model/query/drop.rs b/sql_generation/model/query/drop.rs new file mode 100644 index 000000000..2b4379ff9 --- /dev/null +++ b/sql_generation/model/query/drop.rs @@ -0,0 +1,34 @@ +use std::fmt::Display; + +use serde::{Deserialize, Serialize}; + +use crate::{generation::Shadow, model::table::SimValue, runner::env::SimulatorTables}; + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub(crate) struct Drop { + pub(crate) table: String, +} + +impl Shadow for Drop { + type Result = anyhow::Result>>; + + fn shadow(&self, tables: &mut SimulatorTables) -> Self::Result { + if !tables.iter().any(|t| t.name == self.table) { + // If the table does not exist, we return an error + return Err(anyhow::anyhow!( + "Table {} does not exist. DROP statement ignored.", + self.table + )); + } + + tables.tables.retain(|t| t.name != self.table); + + Ok(vec![]) + } +} + +impl Display for Drop { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "DROP TABLE {}", self.table) + } +} diff --git a/sql_generation/model/query/insert.rs b/sql_generation/model/query/insert.rs new file mode 100644 index 000000000..3dc8659df --- /dev/null +++ b/sql_generation/model/query/insert.rs @@ -0,0 +1,87 @@ +use std::fmt::Display; + +use serde::{Deserialize, Serialize}; + +use crate::{generation::Shadow, model::table::SimValue, runner::env::SimulatorTables}; + +use super::select::Select; + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub(crate) enum Insert { + Values { + table: String, + values: Vec>, + }, + Select { + table: String, + select: Box
to support resolving a value from another table +// This function attempts to convert an simpler easily computable expression into values +// TODO: In the future, we can try to expand this computation if we want to support harder properties that require us +// to already know more values before hand +pub fn expr_to_value( + expr: &ast::Expr, + row: &[SimValue], + table: &T, +) -> Option { + match expr { + ast::Expr::DoublyQualified(_, _, ast::Name::Ident(col_name)) + | ast::Expr::DoublyQualified(_, _, ast::Name::Quoted(col_name)) + | ast::Expr::Qualified(_, ast::Name::Ident(col_name)) + | ast::Expr::Qualified(_, ast::Name::Quoted(col_name)) + | ast::Expr::Id(ast::Name::Ident(col_name)) => { + let columns = table.columns().collect::>(); + assert_eq!(row.len(), columns.len()); + columns + .iter() + .zip(row.iter()) + .find(|(column, _)| column.column.name == *col_name) + .map(|(_, value)| value) + .cloned() + } + ast::Expr::Literal(literal) => Some(literal.into()), + ast::Expr::Binary(lhs, op, rhs) => { + let lhs = expr_to_value(lhs, row, table)?; + let rhs = expr_to_value(rhs, row, table)?; + Some(lhs.binary_compare(&rhs, *op)) + } + ast::Expr::Like { + lhs, + not, + op, + rhs, + escape: _, // TODO: support escape + } => { + let lhs = expr_to_value(lhs, row, table)?; + let rhs = expr_to_value(rhs, row, table)?; + let res = lhs.like_compare(&rhs, *op); + let value: SimValue = if *not { !res } else { res }.into(); + Some(value) + } + ast::Expr::Unary(op, expr) => { + let value = expr_to_value(expr, row, table)?; + Some(value.unary_exec(*op)) + } + ast::Expr::Parenthesized(exprs) => { + assert_eq!(exprs.len(), 1); + expr_to_value(&exprs[0], row, table) + } + _ => unreachable!("{:?}", expr), + } +} + +impl Display for Predicate { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.to_fmt(f) + } +} diff --git a/sql_generation/model/query/select.rs b/sql_generation/model/query/select.rs new file mode 100644 index 000000000..b5e516a0e --- /dev/null +++ b/sql_generation/model/query/select.rs @@ -0,0 +1,496 @@ +use std::{collections::HashSet, fmt::Display}; + +use anyhow::Context; +pub use ast::Distinctness; +use itertools::Itertools; +use serde::{Deserialize, Serialize}; +use turso_parser::ast::{self, fmt::ToTokens, SortOrder}; + +use crate::{ + generation::Shadow, + model::{ + query::EmptyContext, + table::{JoinTable, JoinType, JoinedTable, SimValue, Table, TableContext}, + }, + runner::env::SimulatorTables, +}; + +use super::predicate::Predicate; + +/// `SELECT` or `RETURNING` result column +// https://sqlite.org/syntax/result-column.html +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub enum ResultColumn { + /// expression + Expr(Predicate), + /// `*` + Star, + /// column name + Column(String), +} + +impl Display for ResultColumn { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ResultColumn::Expr(expr) => write!(f, "({expr})"), + ResultColumn::Star => write!(f, "*"), + ResultColumn::Column(name) => write!(f, "{name}"), + } + } +} +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub(crate) struct Select { + pub(crate) body: SelectBody, + pub(crate) limit: Option, +} + +impl Select { + pub fn simple(table: String, where_clause: Predicate) -> Self { + Self::single( + table, + vec![ResultColumn::Star], + where_clause, + None, + Distinctness::All, + ) + } + + pub fn expr(expr: Predicate) -> Self { + Select { + body: SelectBody { + select: Box::new(SelectInner { + distinctness: Distinctness::All, + columns: vec![ResultColumn::Expr(expr)], + from: None, + where_clause: Predicate::true_(), + order_by: None, + }), + compounds: Vec::new(), + }, + limit: None, + } + } + + pub fn single( + table: String, + result_columns: Vec, + where_clause: Predicate, + limit: Option, + distinct: Distinctness, + ) -> Self { + Select { + body: SelectBody { + select: Box::new(SelectInner { + distinctness: distinct, + columns: result_columns, + from: Some(FromClause { + table, + joins: Vec::new(), + }), + where_clause, + order_by: None, + }), + compounds: Vec::new(), + }, + limit, + } + } + + pub fn compound(left: Select, right: Select, operator: CompoundOperator) -> Self { + let mut body = left.body; + body.compounds.push(CompoundSelect { + operator, + select: Box::new(right.body.select.as_ref().clone()), + }); + Select { + body, + limit: left.limit.or(right.limit), + } + } + + pub(crate) fn dependencies(&self) -> HashSet { + if self.body.select.from.is_none() { + return HashSet::new(); + } + let from = self.body.select.from.as_ref().unwrap(); + let mut tables = HashSet::new(); + tables.insert(from.table.clone()); + + tables.extend(from.dependencies()); + + for compound in &self.body.compounds { + tables.extend( + compound + .select + .from + .as_ref() + .map(|f| f.dependencies()) + .unwrap_or(vec![]) + .into_iter(), + ); + } + + tables + } +} + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub struct SelectBody { + /// first select + pub select: Box, + /// compounds + pub compounds: Vec, +} + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub struct OrderBy { + pub columns: Vec<(String, SortOrder)>, +} + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub struct SelectInner { + /// `DISTINCT` + pub distinctness: Distinctness, + /// columns + pub columns: Vec, + /// `FROM` clause + pub from: Option, + /// `WHERE` clause + pub where_clause: Predicate, + /// `ORDER BY` clause + pub order_by: Option, +} + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub enum CompoundOperator { + /// `UNION` + Union, + /// `UNION ALL` + UnionAll, +} + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub struct CompoundSelect { + /// operator + pub operator: CompoundOperator, + /// select + pub select: Box, +} + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub struct FromClause { + /// table + pub table: String, + /// `JOIN`ed tables + pub joins: Vec, +} + +impl FromClause { + fn to_sql_ast(&self) -> ast::FromClause { + ast::FromClause { + select: Some(Box::new(ast::SelectTable::Table( + ast::QualifiedName::single(ast::Name::from_str(&self.table)), + None, + None, + ))), + joins: if self.joins.is_empty() { + None + } else { + Some( + self.joins + .iter() + .map(|join| ast::JoinedSelectTable { + operator: match join.join_type { + JoinType::Inner => { + ast::JoinOperator::TypedJoin(Some(ast::JoinType::INNER)) + } + JoinType::Left => { + ast::JoinOperator::TypedJoin(Some(ast::JoinType::LEFT)) + } + JoinType::Right => { + ast::JoinOperator::TypedJoin(Some(ast::JoinType::RIGHT)) + } + JoinType::Full => { + ast::JoinOperator::TypedJoin(Some(ast::JoinType::OUTER)) + } + JoinType::Cross => { + ast::JoinOperator::TypedJoin(Some(ast::JoinType::CROSS)) + } + }, + table: ast::SelectTable::Table( + ast::QualifiedName::single(ast::Name::from_str(&join.table)), + None, + None, + ), + constraint: Some(ast::JoinConstraint::On(join.on.0.clone())), + }) + .collect(), + ) + }, + } + } + + pub(crate) fn dependencies(&self) -> Vec { + let mut deps = vec![self.table.clone()]; + for join in &self.joins { + deps.push(join.table.clone()); + } + deps + } +} + +impl Shadow for FromClause { + type Result = anyhow::Result; + fn shadow(&self, env: &mut SimulatorTables) -> Self::Result { + let tables = &mut env.tables; + + let first_table = tables + .iter() + .find(|t| t.name == self.table) + .context("Table not found")?; + + let mut join_table = JoinTable { + tables: vec![first_table.clone()], + rows: Vec::new(), + }; + + for join in &self.joins { + let joined_table = tables + .iter() + .find(|t| t.name == join.table) + .context("Joined table not found")?; + + join_table.tables.push(joined_table.clone()); + + match join.join_type { + JoinType::Inner => { + // Implement inner join logic + let join_rows = joined_table + .rows + .iter() + .filter(|row| join.on.test(row, joined_table)) + .cloned() + .collect::>(); + // take a cartesian product of the rows + let all_row_pairs = join_table + .rows + .clone() + .into_iter() + .cartesian_product(join_rows.iter()); + + for (row1, row2) in all_row_pairs { + let row = row1.iter().chain(row2.iter()).cloned().collect::>(); + + let is_in = join.on.test(&row, &join_table); + + if is_in { + join_table.rows.push(row); + } + } + } + _ => todo!(), + } + } + Ok(join_table) + } +} + +impl Shadow for SelectInner { + type Result = anyhow::Result; + + fn shadow(&self, env: &mut SimulatorTables) -> Self::Result { + if let Some(from) = &self.from { + let mut join_table = from.shadow(env)?; + let col_count = join_table.columns().count(); + for row in &mut join_table.rows { + assert_eq!( + row.len(), + col_count, + "Row length does not match column length after join" + ); + } + let join_clone = join_table.clone(); + + join_table + .rows + .retain(|row| self.where_clause.test(row, &join_clone)); + + if self.distinctness == Distinctness::Distinct { + join_table.rows.sort_unstable(); + join_table.rows.dedup(); + } + + Ok(join_table) + } else { + assert!(self + .columns + .iter() + .all(|col| matches!(col, ResultColumn::Expr(_)))); + + // If `WHERE` is false, just return an empty table + if !self.where_clause.test(&[], &Table::anonymous(vec![])) { + return Ok(JoinTable { + tables: Vec::new(), + rows: Vec::new(), + }); + } + + // Compute the results of the column expressions and make a row + let mut row = Vec::new(); + for col in &self.columns { + match col { + ResultColumn::Expr(expr) => { + let value = expr.eval(&[], &Table::anonymous(vec![])); + if let Some(value) = value { + row.push(value); + } else { + return Err(anyhow::anyhow!( + "Failed to evaluate expression in free select ({})", + expr.0.format_with_context(&EmptyContext {}).unwrap() + )); + } + } + _ => unreachable!("Only expressions are allowed in free selects"), + } + } + + Ok(JoinTable { + tables: Vec::new(), + rows: vec![row], + }) + } + } +} + +impl Shadow for Select { + type Result = anyhow::Result>>; + + fn shadow(&self, env: &mut SimulatorTables) -> Self::Result { + let first_result = self.body.select.shadow(env)?; + + let mut rows = first_result.rows; + + for compound in self.body.compounds.iter() { + let compound_results = compound.select.shadow(env)?; + + match compound.operator { + CompoundOperator::Union => { + // Union means we need to combine the results, removing duplicates + let mut new_rows = compound_results.rows; + new_rows.extend(rows.clone()); + new_rows.sort_unstable(); + new_rows.dedup(); + rows = new_rows; + } + CompoundOperator::UnionAll => { + // Union all means we just concatenate the results + rows.extend(compound_results.rows.into_iter()); + } + } + } + + Ok(rows) + } +} + +impl Select { + pub fn to_sql_ast(&self) -> ast::Select { + ast::Select { + with: None, + body: ast::SelectBody { + select: Box::new(ast::OneSelect::Select(Box::new(ast::SelectInner { + distinctness: if self.body.select.distinctness == Distinctness::Distinct { + Some(ast::Distinctness::Distinct) + } else { + None + }, + columns: self + .body + .select + .columns + .iter() + .map(|col| match col { + ResultColumn::Expr(expr) => { + ast::ResultColumn::Expr(expr.0.clone(), None) + } + ResultColumn::Star => ast::ResultColumn::Star, + ResultColumn::Column(name) => ast::ResultColumn::Expr( + ast::Expr::Id(ast::Name::Ident(name.clone())), + None, + ), + }) + .collect(), + from: self.body.select.from.as_ref().map(|f| f.to_sql_ast()), + where_clause: Some(self.body.select.where_clause.0.clone()), + group_by: None, + window_clause: None, + }))), + compounds: Some( + self.body + .compounds + .iter() + .map(|compound| ast::CompoundSelect { + operator: match compound.operator { + CompoundOperator::Union => ast::CompoundOperator::Union, + CompoundOperator::UnionAll => ast::CompoundOperator::UnionAll, + }, + select: Box::new(ast::OneSelect::Select(Box::new(ast::SelectInner { + distinctness: Some(compound.select.distinctness), + columns: compound + .select + .columns + .iter() + .map(|col| match col { + ResultColumn::Expr(expr) => { + ast::ResultColumn::Expr(expr.0.clone(), None) + } + ResultColumn::Star => ast::ResultColumn::Star, + ResultColumn::Column(name) => ast::ResultColumn::Expr( + ast::Expr::Id(ast::Name::Ident(name.clone())), + None, + ), + }) + .collect(), + from: compound.select.from.as_ref().map(|f| f.to_sql_ast()), + where_clause: Some(compound.select.where_clause.0.clone()), + group_by: None, + window_clause: None, + }))), + }) + .collect(), + ), + }, + order_by: self.body.select.order_by.as_ref().map(|o| { + o.columns + .iter() + .map(|(name, order)| ast::SortedColumn { + expr: ast::Expr::Id(ast::Name::Ident(name.clone())), + order: match order { + SortOrder::Asc => Some(ast::SortOrder::Asc), + SortOrder::Desc => Some(ast::SortOrder::Desc), + }, + nulls: None, + }) + .collect() + }), + limit: self.limit.map(|l| { + Box::new(ast::Limit { + expr: ast::Expr::Literal(ast::Literal::Numeric(l.to_string())), + offset: None, + }) + }), + } + } +} +impl Display for Select { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.to_sql_ast().to_fmt_with_context(f, &EmptyContext {}) + } +} + +#[cfg(test)] +mod select_tests { + + #[test] + fn test_select_display() {} +} diff --git a/sql_generation/model/query/transaction.rs b/sql_generation/model/query/transaction.rs new file mode 100644 index 000000000..a73fb076e --- /dev/null +++ b/sql_generation/model/query/transaction.rs @@ -0,0 +1,60 @@ +use std::fmt::Display; + +use serde::{Deserialize, Serialize}; + +use crate::{generation::Shadow, model::table::SimValue, runner::env::SimulatorTables}; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub(crate) struct Begin { + pub(crate) immediate: bool, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub(crate) struct Commit; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub(crate) struct Rollback; + +impl Shadow for Begin { + type Result = Vec>; + fn shadow(&self, tables: &mut SimulatorTables) -> Self::Result { + tables.snapshot = Some(tables.tables.clone()); + vec![] + } +} + +impl Shadow for Commit { + type Result = Vec>; + fn shadow(&self, tables: &mut SimulatorTables) -> Self::Result { + tables.snapshot = None; + vec![] + } +} + +impl Shadow for Rollback { + type Result = Vec>; + fn shadow(&self, tables: &mut SimulatorTables) -> Self::Result { + if let Some(tables_) = tables.snapshot.take() { + tables.tables = tables_; + } + vec![] + } +} + +impl Display for Begin { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "BEGIN {}", if self.immediate { "IMMEDIATE" } else { "" }) + } +} + +impl Display for Commit { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "COMMIT") + } +} + +impl Display for Rollback { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "ROLLBACK") + } +} diff --git a/sql_generation/model/query/update.rs b/sql_generation/model/query/update.rs new file mode 100644 index 000000000..a4cc13fa8 --- /dev/null +++ b/sql_generation/model/query/update.rs @@ -0,0 +1,71 @@ +use std::fmt::Display; + +use serde::{Deserialize, Serialize}; + +use crate::{generation::Shadow, model::table::SimValue, runner::env::SimulatorTables}; + +use super::predicate::Predicate; + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub(crate) struct Update { + pub(crate) table: String, + pub(crate) set_values: Vec<(String, SimValue)>, // Pair of value for set expressions => SET name=value + pub(crate) predicate: Predicate, +} + +impl Update { + pub fn table(&self) -> &str { + &self.table + } +} + +impl Shadow for Update { + type Result = anyhow::Result>>; + + fn shadow(&self, tables: &mut SimulatorTables) -> Self::Result { + let table = tables.tables.iter_mut().find(|t| t.name == self.table); + + let table = if let Some(table) = table { + table + } else { + return Err(anyhow::anyhow!( + "Table {} does not exist. UPDATE statement ignored.", + self.table + )); + }; + + let t2 = table.clone(); + for row in table + .rows + .iter_mut() + .filter(|r| self.predicate.test(r, &t2)) + { + for (column, set_value) in &self.set_values { + if let Some((idx, _)) = table + .columns + .iter() + .enumerate() + .find(|(_, c)| &c.name == column) + { + row[idx] = set_value.clone(); + } + } + } + + Ok(vec![]) + } +} + +impl Display for Update { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "UPDATE {} SET ", self.table)?; + for (i, (name, value)) in self.set_values.iter().enumerate() { + if i != 0 { + write!(f, ", ")?; + } + write!(f, "{name} = {value}")?; + } + write!(f, " WHERE {}", self.predicate)?; + Ok(()) + } +} diff --git a/sql_generation/model/table.rs b/sql_generation/model/table.rs new file mode 100644 index 000000000..210039e17 --- /dev/null +++ b/sql_generation/model/table.rs @@ -0,0 +1,428 @@ +use std::{fmt::Display, hash::Hash, ops::Deref}; + +use serde::{Deserialize, Serialize}; +use turso_core::{numeric::Numeric, types}; +use turso_parser::ast; + +use crate::model::query::predicate::Predicate; + +pub(crate) struct Name(pub(crate) String); + +impl Deref for Name { + type Target = str; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +#[derive(Debug, Clone, Copy)] +pub struct ContextColumn<'a> { + pub table_name: &'a str, + pub column: &'a Column, +} + +pub trait TableContext { + fn columns<'a>(&'a self) -> impl Iterator>; + fn rows(&self) -> &Vec>; +} + +impl TableContext for Table { + fn columns<'a>(&'a self) -> impl Iterator> { + self.columns.iter().map(|col| ContextColumn { + column: col, + table_name: &self.name, + }) + } + + fn rows(&self) -> &Vec> { + &self.rows + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub(crate) struct Table { + pub(crate) name: String, + pub(crate) columns: Vec, + pub(crate) rows: Vec>, + pub(crate) indexes: Vec, +} + +impl Table { + pub fn anonymous(rows: Vec>) -> Self { + Self { + rows, + name: "".to_string(), + columns: vec![], + indexes: vec![], + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub(crate) struct Column { + pub(crate) name: String, + pub(crate) column_type: ColumnType, + pub(crate) primary: bool, + pub(crate) unique: bool, +} + +// Uniquely defined by name in this case +impl Hash for Column { + fn hash(&self, state: &mut H) { + self.name.hash(state); + } +} + +impl PartialEq for Column { + fn eq(&self, other: &Self) -> bool { + self.name == other.name + } +} + +impl Eq for Column {} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub(crate) enum ColumnType { + Integer, + Float, + Text, + Blob, +} + +impl Display for ColumnType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Integer => write!(f, "INTEGER"), + Self::Float => write!(f, "REAL"), + Self::Text => write!(f, "TEXT"), + Self::Blob => write!(f, "BLOB"), + } + } +} + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub struct JoinedTable { + /// table name + pub table: String, + /// `JOIN` type + pub join_type: JoinType, + /// `ON` clause + pub on: Predicate, +} + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub enum JoinType { + Inner, + Left, + Right, + Full, + Cross, +} + +impl TableContext for JoinTable { + fn columns<'a>(&'a self) -> impl Iterator> { + self.tables.iter().flat_map(|table| table.columns()) + } + + fn rows(&self) -> &Vec> { + &self.rows + } +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct JoinTable { + pub tables: Vec
, + pub rows: Vec>, +} + +fn float_to_string(float: &f64, serializer: S) -> Result +where + S: serde::Serializer, +{ + serializer.serialize_str(&format!("{float}")) +} + +fn string_to_float<'de, D>(deserializer: D) -> Result +where + D: serde::Deserializer<'de>, +{ + let s = String::deserialize(deserializer)?; + s.parse().map_err(serde::de::Error::custom) +} + +#[derive(Clone, Debug, PartialEq, PartialOrd, Eq, Ord, Serialize, Deserialize)] +pub(crate) struct SimValue(pub turso_core::Value); + +fn to_sqlite_blob(bytes: &[u8]) -> String { + format!( + "X'{}'", + bytes + .iter() + .fold(String::new(), |acc, b| acc + &format!("{b:02X}")) + ) +} + +impl Display for SimValue { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match &self.0 { + types::Value::Null => write!(f, "NULL"), + types::Value::Integer(i) => write!(f, "{i}"), + types::Value::Float(fl) => write!(f, "{fl}"), + value @ types::Value::Text(..) => write!(f, "'{value}'"), + types::Value::Blob(b) => write!(f, "{}", to_sqlite_blob(b)), + } + } +} + +impl SimValue { + pub const FALSE: Self = SimValue(types::Value::Integer(0)); + pub const TRUE: Self = SimValue(types::Value::Integer(1)); + + pub fn as_bool(&self) -> bool { + Numeric::from(&self.0).try_into_bool().unwrap_or_default() + } + + // TODO: support more predicates + /// Returns a Result of a Binary Operation + /// + /// TODO: forget collations for now + /// TODO: have the [ast::Operator::Equals], [ast::Operator::NotEquals], [ast::Operator::Greater], + /// [ast::Operator::GreaterEquals], [ast::Operator::Less], [ast::Operator::LessEquals] function to be extracted + /// into its functions in turso_core so that it can be used here + pub fn binary_compare(&self, other: &Self, operator: ast::Operator) -> SimValue { + match operator { + ast::Operator::Add => self.0.exec_add(&other.0).into(), + ast::Operator::And => self.0.exec_and(&other.0).into(), + ast::Operator::ArrowRight => todo!(), + ast::Operator::ArrowRightShift => todo!(), + ast::Operator::BitwiseAnd => self.0.exec_bit_and(&other.0).into(), + ast::Operator::BitwiseOr => self.0.exec_bit_or(&other.0).into(), + ast::Operator::BitwiseNot => todo!(), // TODO: Do not see any function usage of this operator in Core + ast::Operator::Concat => self.0.exec_concat(&other.0).into(), + ast::Operator::Equals => (self == other).into(), + ast::Operator::Divide => self.0.exec_divide(&other.0).into(), + ast::Operator::Greater => (self > other).into(), + ast::Operator::GreaterEquals => (self >= other).into(), + // TODO: Test these implementations + ast::Operator::Is => match (&self.0, &other.0) { + (types::Value::Null, types::Value::Null) => true.into(), + (types::Value::Null, _) => false.into(), + (_, types::Value::Null) => false.into(), + _ => self.binary_compare(other, ast::Operator::Equals), + }, + ast::Operator::IsNot => self + .binary_compare(other, ast::Operator::Is) + .unary_exec(ast::UnaryOperator::Not), + ast::Operator::LeftShift => self.0.exec_shift_left(&other.0).into(), + ast::Operator::Less => (self < other).into(), + ast::Operator::LessEquals => (self <= other).into(), + ast::Operator::Modulus => self.0.exec_remainder(&other.0).into(), + ast::Operator::Multiply => self.0.exec_multiply(&other.0).into(), + ast::Operator::NotEquals => (self != other).into(), + ast::Operator::Or => self.0.exec_or(&other.0).into(), + ast::Operator::RightShift => self.0.exec_shift_right(&other.0).into(), + ast::Operator::Subtract => self.0.exec_subtract(&other.0).into(), + } + } + + // TODO: support more operators. Copy the implementation for exec_glob + pub fn like_compare(&self, other: &Self, operator: ast::LikeOperator) -> bool { + match operator { + ast::LikeOperator::Glob => todo!(), + ast::LikeOperator::Like => { + // TODO: support ESCAPE `expr` option in AST + // TODO: regex cache + types::Value::exec_like( + None, + other.0.to_string().as_str(), + self.0.to_string().as_str(), + ) + } + ast::LikeOperator::Match => todo!(), + ast::LikeOperator::Regexp => todo!(), + } + } + + pub fn unary_exec(&self, operator: ast::UnaryOperator) -> SimValue { + let new_value = match operator { + ast::UnaryOperator::BitwiseNot => self.0.exec_bit_not(), + ast::UnaryOperator::Negative => { + SimValue(types::Value::Integer(0)) + .binary_compare(self, ast::Operator::Subtract) + .0 + } + ast::UnaryOperator::Not => self.0.exec_boolean_not(), + ast::UnaryOperator::Positive => self.0.clone(), + }; + Self(new_value) + } +} + +impl From for SimValue { + fn from(value: ast::Literal) -> Self { + Self::from(&value) + } +} + +/// Converts a SQL string literal with already-escaped single quotes to a regular string by: +/// - Removing the enclosing single quotes +/// - Converting sequences of 2N single quotes ('''''') to N single quotes (''') +/// +/// Assumes: +/// - The input starts and ends with a single quote +/// - The input contains a valid amount of single quotes inside the enclosing quotes; +/// i.e. any ' is escaped as a double '' +fn unescape_singlequotes(input: &str) -> String { + assert!( + input.starts_with('\'') && input.ends_with('\''), + "Input string must be wrapped in single quotes" + ); + // Skip first and last characters (the enclosing quotes) + let inner = &input[1..input.len() - 1]; + + let mut result = String::with_capacity(inner.len()); + let mut chars = inner.chars().peekable(); + + while let Some(c) = chars.next() { + if c == '\'' { + // Count consecutive single quotes + let mut quote_count = 1; + while chars.peek() == Some(&'\'') { + quote_count += 1; + chars.next(); + } + assert!( + quote_count % 2 == 0, + "Expected even number of quotes, got {quote_count} in string {input}" + ); + // For every pair of quotes, output one quote + for _ in 0..(quote_count / 2) { + result.push('\''); + } + } else { + result.push(c); + } + } + + result +} + +/// Escapes a string by doubling contained single quotes and then wrapping it in single quotes. +fn escape_singlequotes(input: &str) -> String { + let mut result = String::with_capacity(input.len() + 2); + result.push('\''); + result.push_str(&input.replace("'", "''")); + result.push('\''); + result +} + +impl From<&ast::Literal> for SimValue { + fn from(value: &ast::Literal) -> Self { + let new_value = match value { + ast::Literal::Null => types::Value::Null, + ast::Literal::Numeric(number) => Numeric::from(number).into(), + ast::Literal::String(string) => types::Value::build_text(unescape_singlequotes(string)), + ast::Literal::Blob(blob) => types::Value::Blob( + blob.as_bytes() + .chunks_exact(2) + .map(|pair| { + // We assume that sqlite3-parser has already validated that + // the input is valid hex string, thus unwrap is safe. + let hex_byte = std::str::from_utf8(pair).unwrap(); + u8::from_str_radix(hex_byte, 16).unwrap() + }) + .collect(), + ), + ast::Literal::Keyword(keyword) => match keyword.to_uppercase().as_str() { + "TRUE" => types::Value::Integer(1), + "FALSE" => types::Value::Integer(0), + "NULL" => types::Value::Null, + _ => unimplemented!("Unsupported keyword literal: {}", keyword), + }, + lit => unimplemented!("{:?}", lit), + }; + Self(new_value) + } +} + +impl From for ast::Literal { + fn from(value: SimValue) -> Self { + Self::from(&value) + } +} + +impl From<&SimValue> for ast::Literal { + fn from(value: &SimValue) -> Self { + match &value.0 { + types::Value::Null => Self::Null, + types::Value::Integer(i) => Self::Numeric(i.to_string()), + types::Value::Float(f) => Self::Numeric(f.to_string()), + text @ types::Value::Text(..) => Self::String(escape_singlequotes(&text.to_string())), + types::Value::Blob(blob) => Self::Blob(hex::encode(blob)), + } + } +} + +impl From for SimValue { + fn from(value: bool) -> Self { + if value { + SimValue::TRUE + } else { + SimValue::FALSE + } + } +} + +impl From for turso_core::types::Value { + fn from(value: SimValue) -> Self { + value.0 + } +} + +impl From<&SimValue> for turso_core::types::Value { + fn from(value: &SimValue) -> Self { + value.0.clone() + } +} + +impl From for SimValue { + fn from(value: turso_core::types::Value) -> Self { + Self(value) + } +} + +impl From<&turso_core::types::Value> for SimValue { + fn from(value: &turso_core::types::Value) -> Self { + Self(value.clone()) + } +} + +#[cfg(test)] +mod tests { + use crate::model::table::{escape_singlequotes, unescape_singlequotes}; + + #[test] + fn test_unescape_singlequotes() { + assert_eq!(unescape_singlequotes("'hello'"), "hello"); + assert_eq!(unescape_singlequotes("'O''Reilly'"), "O'Reilly"); + assert_eq!( + unescape_singlequotes("'multiple''single''quotes'"), + "multiple'single'quotes" + ); + assert_eq!(unescape_singlequotes("'test''''test'"), "test''test"); + assert_eq!(unescape_singlequotes("'many''''''quotes'"), "many'''quotes"); + } + + #[test] + fn test_escape_singlequotes() { + assert_eq!(escape_singlequotes("hello"), "'hello'"); + assert_eq!(escape_singlequotes("O'Reilly"), "'O''Reilly'"); + assert_eq!( + escape_singlequotes("multiple'single'quotes"), + "'multiple''single''quotes'" + ); + assert_eq!(escape_singlequotes("test''test"), "'test''''test'"); + assert_eq!(escape_singlequotes("many'''quotes"), "'many''''''quotes'"); + } +}