From 0bce68b38de708447f9acca2146d7b09e3c1b50a Mon Sep 17 00:00:00 2001 From: alpaylan Date: Sun, 6 Jul 2025 14:46:38 -0400 Subject: [PATCH] wip: add joins to the select --- Cargo.lock | 14 +- simulator/Cargo.toml | 1 + simulator/generation/mod.rs | 17 + simulator/generation/plan.rs | 133 ++++-- simulator/generation/predicate/binary.rs | 20 +- simulator/generation/predicate/unary.rs | 104 ++--- simulator/generation/property.rs | 307 ++++++++++--- simulator/generation/query.rs | 39 +- simulator/main.rs | 51 ++- simulator/model/query/create.rs | 15 +- simulator/model/query/delete.rs | 28 +- simulator/model/query/drop.rs | 19 +- simulator/model/query/insert.rs | 24 +- simulator/model/query/mod.rs | 18 +- simulator/model/query/predicate.rs | 52 ++- simulator/model/query/select.rs | 416 ++++++++++++++++-- simulator/model/query/update.rs | 33 +- simulator/model/table.rs | 21 +- simulator/runner/bugbase.rs | 64 ++- simulator/runner/cli.rs | 8 + simulator/runner/env.rs | 2 + simulator/runner/execution.rs | 2 +- simulator/shrink/plan.rs | 1 + vendored/sqlite3-parser/src/parser/ast/mod.rs | 2 +- .../sqlite3-parser/src/to_sql_string/expr.rs | 2 + 25 files changed, 1093 insertions(+), 300 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2e8b0bdfc..cb079c09c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -619,7 +619,7 @@ dependencies = [ "criterion-plot", "futures", "is-terminal", - "itertools", + "itertools 0.10.5", "num-traits", "once_cell", "oorandom", @@ -640,7 +640,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" dependencies = [ "cast", - "itertools", + "itertools 0.10.5", ] [[package]] @@ -1684,6 +1684,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.15" @@ -1956,6 +1965,7 @@ dependencies = [ "dirs 6.0.0", "env_logger 0.10.2", "hex", + "itertools 0.14.0", "log", "notify", "rand 0.8.5", diff --git a/simulator/Cargo.toml b/simulator/Cargo.toml index 78df7edb7..a0ee8571a 100644 --- a/simulator/Cargo.toml +++ b/simulator/Cargo.toml @@ -37,3 +37,4 @@ tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } anyhow.workspace = true turso_sqlite3_parser = { workspace = true, features = ["serde"]} hex = "0.4.3" +itertools = "0.14.0" diff --git a/simulator/generation/mod.rs b/simulator/generation/mod.rs index 2e4f57aca..aaa8f19a1 100644 --- a/simulator/generation/mod.rs +++ b/simulator/generation/mod.rs @@ -3,6 +3,8 @@ use std::{iter::Sum, ops::SubAssign}; use anarchist_readable_name_generator_lib::readable_name_custom; use rand::{distributions::uniform::SampleUniform, Rng}; +use crate::runner::env::SimulatorEnv; + mod expr; pub mod plan; mod predicate; @@ -37,6 +39,20 @@ pub trait ArbitraryFromMaybe { Self: Sized; } +/// Shadow trait for types that can be "shadowed" in the simulator environment. +/// Shadowing is a process of applying a transformation to the simulator environment +/// that reflects the changes made by the query or operation represented by the type. +/// The result of the shadowing is typically a vector of rows, which can be used to +/// update the simulator environment or to verify the correctness of the operation. +/// The `Result` type is used to indicate the type of the result of the shadowing +/// operation, which can vary depending on the type of the operation being shadowed. +/// For example, a `Create` operation might return an empty vector, while an `Insert` operation +/// might return a vector of rows that were inserted into the table. +pub(crate) trait Shadow { + type Result; + fn shadow(&self, env: &mut SimulatorEnv) -> Self::Result; +} + /// Frequency is a helper function for composing different generators with different frequency /// of occurrences. /// The type signature for the `N` parameter is a bit complex, but it @@ -142,3 +158,4 @@ pub(crate) fn gen_random_text(rng: &mut T) -> String { name.replace("-", "_") } } + diff --git a/simulator/generation/plan.rs b/simulator/generation/plan.rs index 12ab2e454..6281c91d1 100644 --- a/simulator/generation/plan.rs +++ b/simulator/generation/plan.rs @@ -7,9 +7,13 @@ use std::{ }; use serde::{Deserialize, Serialize}; + +use tracing; + use turso_core::{Connection, Result, StepResult, IO}; use crate::{ + generation::Shadow, model::{ query::{update::Update, Create, CreateIndex, Delete, Drop, Insert, Query, Select}, table::SimValue, @@ -106,6 +110,40 @@ pub(crate) enum Interactions { Fault(Fault), } +impl Shadow for Interactions { + type Result = (); + + fn shadow(&self, env: &mut SimulatorEnv) { + match self { + Interactions::Property(property) => { + let initial_tables = env.tables.clone(); + let mut is_error = false; + for interaction in property.interactions() { + match interaction { + Interaction::Query(query) + | Interaction::FsyncQuery(query) + | Interaction::FaultyQuery(query) => { + is_error = is_error || query.shadow(env).is_err(); + } + Interaction::Assertion(_) => {} + Interaction::Assumption(_) => {} + Interaction::Fault(_) => {} + } + if is_error { + // If any interaction fails, we reset the tables to the initial state + env.tables = initial_tables.clone(); + break; + } + } + } + Interactions::Query(query) => { + query.shadow(env); + } + Interactions::Fault(_) => {} + } + } +} + impl Interactions { pub(crate) fn name(&self) -> Option<&str> { match self { @@ -300,53 +338,45 @@ impl InteractionPlan { } pub(crate) fn stats(&self) -> InteractionStats { - let mut read = 0; - let mut write = 0; - let mut delete = 0; - let mut create = 0; - let mut drop = 0; - let mut update = 0; - let mut create_index = 0; + let mut stats = InteractionStats { + read_count: 0, + write_count: 0, + delete_count: 0, + update_count: 0, + create_count: 0, + create_index_count: 0, + drop_count: 0, + }; + + fn query_stat(q: &Query, stats: &mut InteractionStats) { + match q { + Query::Select(_) => stats.read_count += 1, + Query::Insert(_) => stats.write_count += 1, + Query::Delete(_) => stats.delete_count += 1, + Query::Create(_) => stats.create_count += 1, + Query::Drop(_) => stats.drop_count += 1, + Query::Update(_) => stats.update_count += 1, + Query::CreateIndex(_) => stats.create_index_count += 1, + } + } for interactions in &self.plan { match interactions { Interactions::Property(property) => { for interaction in &property.interactions() { if let Interaction::Query(query) = interaction { - match query { - Query::Select(_) => read += 1, - Query::Insert(_) => write += 1, - Query::Delete(_) => delete += 1, - Query::Create(_) => create += 1, - Query::Drop(_) => drop += 1, - Query::Update(_) => update += 1, - Query::CreateIndex(_) => create_index += 1, - } + query_stat(query, &mut stats); } } } - Interactions::Query(query) => match query { - Query::Select(_) => read += 1, - Query::Insert(_) => write += 1, - Query::Delete(_) => delete += 1, - Query::Create(_) => create += 1, - Query::Drop(_) => drop += 1, - Query::Update(_) => update += 1, - Query::CreateIndex(_) => create_index += 1, - }, + Interactions::Query(query) => { + query_stat(query, &mut stats); + } Interactions::Fault(_) => {} } } - InteractionStats { - read_count: read, - write_count: write, - delete_count: delete, - update_count: update, - create_count: create, - create_index_count: create_index, - drop_count: drop, - } + stats } } @@ -370,6 +400,26 @@ impl ArbitraryFrom<&mut SimulatorEnv> for InteractionPlan { num_interactions ); let interactions = Interactions::arbitrary_from(rng, (env, plan.stats())); + interactions.shadow(env); + // println!( + // "Generated interactions: {}", + // interactions + // .interactions() + // .iter() + // .map(|i| i.to_string()) + // .collect::>() + // .join(", ") + // ); + // println!("tables states"); + // for table in &env.tables { + // println!("Table: {}", table.name); + // for column in &table.columns { + // println!("\tColumn: {} - Type: {:?}", column.name, column.column_type); + // } + // for row in &table.rows { + // println!("\tRow: {:?}", row); + // } + // } plan.plan.push(interactions); } @@ -379,20 +429,23 @@ impl ArbitraryFrom<&mut SimulatorEnv> for InteractionPlan { } } -impl Interaction { - pub(crate) fn shadow(&self, env: &mut SimulatorEnv) -> Vec> { +impl Shadow for Interaction { + type Result = anyhow::Result>>; + fn shadow(&self, env: &mut SimulatorEnv) -> Self::Result { match self { Self::Query(query) => query.shadow(env), Self::FsyncQuery(query) => { - let mut first = query.shadow(env); - first.extend(query.shadow(env)); - first + let mut first = query.shadow(env)?; + first.extend(query.shadow(env)?); + Ok(first) } Self::Assumption(_) | Self::Assertion(_) | Self::Fault(_) | Self::FaultyQuery(_) => { - vec![] + Ok(vec![]) } } } +} +impl Interaction { pub(crate) fn execute_query(&self, conn: &mut Arc, io: &SimulatorIO) -> ResultSet { if let Self::Query(query) = self { let query_str = query.to_string(); diff --git a/simulator/generation/predicate/binary.rs b/simulator/generation/predicate/binary.rs index 146dc677a..c67682900 100644 --- a/simulator/generation/predicate/binary.rs +++ b/simulator/generation/predicate/binary.rs @@ -321,6 +321,7 @@ impl CompoundPredicate { ) -> Self { // Cannot pick a row if the table is empty if table.rows.is_empty() { + println!("Table is empty, returning a predicate that is always {}", predicate_value); return Self(if predicate_value { Predicate::true_() } else { @@ -328,11 +329,18 @@ impl CompoundPredicate { }); } let row = pick(&table.rows, rng); + println!( + "Creating a {} CompoundPredicate for table: {} and row: {:?}", + if predicate_value { "true" } else { "false" }, + table.name, + row + ); let predicate = if rng.gen_bool(0.7) { // An AND for true requires each of its children to be true // An AND for false requires at least one of its children to be false if predicate_value { - (0..rng.gen_range(0..=3)) + println!("Creating a true AND CompoundPredicate"); + (0..rng.gen_range(1..=3)) .map(|_| SimplePredicate::arbitrary_from(rng, (table, row, true)).0) .reduce(|accum, curr| { Predicate(Expr::Binary( @@ -344,14 +352,14 @@ impl CompoundPredicate { .unwrap_or(Predicate::true_()) } else { // Create a vector of random booleans - let mut booleans = (0..rng.gen_range(0..=3)) + let mut booleans = (0..rng.gen_range(1..=3)) .map(|_| rng.gen_bool(0.5)) .collect::>(); let len = booleans.len(); // Make sure at least one of them is false - if !booleans.is_empty() && booleans.iter().all(|b| *b) { + if booleans.iter().all(|b| *b) { booleans[rng.gen_range(0..len)] = false; } @@ -372,12 +380,12 @@ impl CompoundPredicate { // An OR for false requires each of its children to be false if predicate_value { // Create a vector of random booleans - let mut booleans = (0..rng.gen_range(0..=3)) + let mut booleans = (0..rng.gen_range(1..=3)) .map(|_| rng.gen_bool(0.5)) .collect::>(); let len = booleans.len(); // Make sure at least one of them is true - if !booleans.is_empty() && booleans.iter().all(|b| !*b) { + if booleans.iter().all(|b| !*b) { booleans[rng.gen_range(0..len)] = true; } @@ -393,7 +401,7 @@ impl CompoundPredicate { }) .unwrap_or(Predicate::true_()) } else { - (0..rng.gen_range(0..=3)) + (0..rng.gen_range(1..=3)) .map(|_| SimplePredicate::arbitrary_from(rng, (table, row, false)).0) .reduce(|accum, curr| { Predicate(Expr::Binary( diff --git a/simulator/generation/predicate/unary.rs b/simulator/generation/predicate/unary.rs index 6c5fa6db1..de0a3600e 100644 --- a/simulator/generation/predicate/unary.rs +++ b/simulator/generation/predicate/unary.rs @@ -121,27 +121,27 @@ impl SimplePredicate { }) }), ), - ( - num_retries, - Box::new(|rng| { - TrueValue::arbitrary_from_maybe(rng, column_value).map(|value| { - assert!(value.0.as_bool()); - // True Value with negative is still True - Expr::unary(ast::UnaryOperator::Negative, Expr::Literal(value.0.into())) - }) - }), - ), - ( - num_retries, - Box::new(|rng| { - BitNotValue::arbitrary_from_maybe(rng, (column_value, true)).map(|value| { - Expr::unary( - ast::UnaryOperator::BitwiseNot, - Expr::Literal(value.0.into()), - ) - }) - }), - ), + // ( + // num_retries, + // Box::new(|rng| { + // TrueValue::arbitrary_from_maybe(rng, column_value).map(|value| { + // assert!(value.0.as_bool()); + // // True Value with negative is still True + // Expr::unary(ast::UnaryOperator::Negative, Expr::Literal(value.0.into())) + // }) + // }), + // ), + // ( + // num_retries, + // Box::new(|rng| { + // BitNotValue::arbitrary_from_maybe(rng, (column_value, true)).map(|value| { + // Expr::unary( + // ast::UnaryOperator::BitwiseNot, + // Expr::Literal(value.0.into()), + // ) + // }) + // }), + // ), ( num_retries, Box::new(|rng| { @@ -172,37 +172,37 @@ impl SimplePredicate { } let expr = backtrack( vec![ - ( - num_retries, - Box::new(|rng| { - FalseValue::arbitrary_from_maybe(rng, column_value).map(|value| { - assert!(!value.0.as_bool()); - // Positive is a no-op in Sqlite - Expr::unary(ast::UnaryOperator::Positive, Expr::Literal(value.0.into())) - }) - }), - ), - ( - num_retries, - Box::new(|rng| { - FalseValue::arbitrary_from_maybe(rng, column_value).map(|value| { - assert!(!value.0.as_bool()); - // True Value with negative is still True - Expr::unary(ast::UnaryOperator::Negative, Expr::Literal(value.0.into())) - }) - }), - ), - ( - num_retries, - Box::new(|rng| { - BitNotValue::arbitrary_from_maybe(rng, (column_value, false)).map(|value| { - Expr::unary( - ast::UnaryOperator::BitwiseNot, - Expr::Literal(value.0.into()), - ) - }) - }), - ), + // ( + // num_retries, + // Box::new(|rng| { + // FalseValue::arbitrary_from_maybe(rng, column_value).map(|value| { + // assert!(!value.0.as_bool()); + // // Positive is a no-op in Sqlite + // Expr::unary(ast::UnaryOperator::Positive, Expr::Literal(value.0.into())) + // }) + // }), + // ), + // ( + // num_retries, + // Box::new(|rng| { + // FalseValue::arbitrary_from_maybe(rng, column_value).map(|value| { + // assert!(!value.0.as_bool()); + // // True Value with negative is still True + // Expr::unary(ast::UnaryOperator::Negative, Expr::Literal(value.0.into())) + // }) + // }), + // ), + // ( + // num_retries, + // Box::new(|rng| { + // BitNotValue::arbitrary_from_maybe(rng, (column_value, false)).map(|value| { + // Expr::unary( + // ast::UnaryOperator::BitwiseNot, + // Expr::Literal(value.0.into()), + // ) + // }) + // }), + // ), ( num_retries, Box::new(|rng| { diff --git a/simulator/generation/property.rs b/simulator/generation/property.rs index 9266e41a7..ca4379e61 100644 --- a/simulator/generation/property.rs +++ b/simulator/generation/property.rs @@ -1,12 +1,16 @@ use serde::{Deserialize, Serialize}; use turso_core::LimboError; -use turso_sqlite3_parser::ast; +use turso_sqlite3_parser::ast::{self}; use crate::{ model::{ query::{ predicate::Predicate, - select::{Distinctness, ResultColumn}, + select::{ + CompoundOperator, CompoundSelect, Distinctness, ResultColumn, SelectBody, + SelectInner, + }, + update::Update, Create, Delete, Drop, Insert, Query, Select, }, table::SimValue, @@ -127,6 +131,14 @@ pub(crate) enum Property { table: String, predicate: Predicate, }, + /// Where-True-False-Null is a property that tests the boolean logic implementation + /// in the database. It relies on the fact that `P == true || P == false || P == null` should return true, + /// as SQLite uses a ternary logic system. This property is invented in "Finding Bugs in Database Systems via Query Partitioning" + /// by Rigger et al. and it is canonically called Ternary Logic Partitioning (TLP). + WhereTrueFalseNull { + select: Select, + predicate: Predicate, + }, /// FsyncNoWait is a property which tests if we do not loose any data after not waiting for fsync. /// /// # Interactions @@ -154,6 +166,7 @@ impl Property { Property::DeleteSelect { .. } => "Delete-Select", Property::DropSelect { .. } => "Drop-Select", Property::SelectSelectOptimizer { .. } => "Select-Select-Optimizer", + Property::WhereTrueFalseNull { .. } => "Where-True-False-Null", Property::FsyncNoWait { .. } => "FsyncNoWait", Property::FaultyQuery { .. } => "FaultyQuery", } @@ -237,17 +250,17 @@ impl Property { let table_name = create.table.name.clone(); let assertion = Interaction::Assertion(Assertion { - message: - "creating two tables with the name should result in a failure for the second query" - .to_string(), - func: Box::new(move |stack: &Vec, _: &SimulatorEnv| { - let last = stack.last().unwrap(); - match last { - Ok(_) => Ok(false), - Err(e) => Ok(e.to_string().to_lowercase().contains(&format!("table {table_name} already exists"))), - } - }), - }); + message: + "creating two tables with the name should result in a failure for the second query" + .to_string(), + func: Box::new(move |stack: &Vec, _: &SimulatorEnv| { + let last = stack.last().unwrap(); + match last { + Ok(_) => Ok(false), + Err(e) => Ok(e.to_string().to_lowercase().contains(&format!("table {table_name} already exists"))), + } + }), + }); let mut interactions = Vec::new(); interactions.push(assumption); @@ -259,14 +272,15 @@ impl Property { interactions } Property::SelectLimit { select } => { - let table_name = select.table.clone(); let assumption = Interaction::Assumption(Assertion { - message: format!("table {} exists", table_name), + message: format!("table ({}) exists", select.dependencies().into_iter().collect::>().join(", ")), func: Box::new({ - let table_name = table_name.clone(); + let table_name = select.dependencies(); move |_: &Vec, env: &SimulatorEnv| { - Ok(env.tables.iter().any(|t| t.name == table_name)) + Ok(table_name + .iter() + .all(|table| env.tables.iter().any(|t| t.name == *table))) } }), }); @@ -312,13 +326,10 @@ impl Property { predicate: predicate.clone(), })); - let select = Interaction::Query(Query::Select(Select { - table: table.clone(), - result_columns: vec![ResultColumn::Star], - predicate: predicate.clone(), - limit: None, - distinct: Distinctness::All, - })); + let select = Interaction::Query(Query::Select(Select::simple( + table.clone(), + predicate.clone(), + ))); let assertion = Interaction::Assertion(Assertion { message: format!("`{}` should return no values for table `{}`", select, table,), @@ -399,21 +410,17 @@ impl Property { } }), }); - let select1 = Interaction::Query(Query::Select(Select { - table: table.clone(), - result_columns: vec![ResultColumn::Expr(predicate.clone())], - predicate: Predicate::true_(), - limit: None, - distinct: Distinctness::All, - })); - let select2_query = Query::Select(Select { - table: table.clone(), - result_columns: vec![ResultColumn::Star], - predicate: predicate.clone(), - limit: None, - distinct: Distinctness::All, - }); + let select1 = Interaction::Query(Query::Select(Select::single( + table.clone(), + vec![ResultColumn::Expr(predicate.clone())], + Predicate::true_(), + None, + Distinctness::All, + ))); + + let select2_query = Query::Select(Select::simple(table.clone(), predicate.clone())); + let select2 = Interaction::Query(select2_query); let assertion = Interaction::Assertion(Assertion { @@ -426,8 +433,8 @@ impl Property { // If rows1 results have more than 1 column, there is a problem if rows1.iter().any(|vs| vs.len() > 1) { return Err(LimboError::InternalError( - "Select query without the star should return only one column".to_string(), - )); + "Select query without the star should return only one column".to_string(), + )); } // Count the 1s in the select query without the star let rows1_count = rows1 @@ -437,9 +444,21 @@ impl Property { v.as_bool() }) .count(); + tracing::debug!( + "select1 returned {} rows, select2 returned {} rows", + rows1_count, + rows2.len() + ); Ok(rows1_count == rows2.len()) } - _ => Ok(false), + (Err(e1), Err(e2)) => { + tracing::debug!("Error in select1 AND select2: {}, {}", e1, e2); + Ok(true) + } + (Err(e), _) | (_, Err(e)) => { + tracing::error!("Error in select1 OR select2: {}", e); + Err(LimboError::InternalError(e.to_string())) + } } }), }); @@ -457,19 +476,141 @@ impl Property { let first = std::iter::once(Interaction::FaultyQuery(query.clone())); Vec::from_iter(first.chain(checks)) } + Property::WhereTrueFalseNull { select, predicate } => { + let assumption = Interaction::Assumption(Assertion { + message: format!( + "tables ({}) exists", + select + .dependencies() + .into_iter() + .collect::>() + .join(", ") + ), + func: Box::new({ + let tables = select.dependencies(); + move |_: &Vec, env: &SimulatorEnv| { + Ok(tables + .iter() + .all(|table| env.tables.iter().any(|t| t.name == *table))) + } + }), + }); + + let old_predicate = select.body.select.where_clause.clone(); + + let p_true = Predicate::and(vec![old_predicate.clone(), predicate.clone()]); + let p_false = Predicate::and(vec![ + old_predicate.clone(), + Predicate::not(predicate.clone()), + ]); + let p_null = Predicate::and(vec![ + old_predicate.clone(), + Predicate::is(predicate.clone(), Predicate::null()), + ]); + + let select_tlp = Select { + body: SelectBody { + select: Box::new(SelectInner { + distinctness: select.body.select.distinctness, + columns: select.body.select.columns.clone(), + from: select.body.select.from.clone(), + where_clause: p_true, + }), + compounds: vec![ + CompoundSelect { + operator: CompoundOperator::UnionAll, + select: Box::new(SelectInner { + distinctness: select.body.select.distinctness, + columns: select.body.select.columns.clone(), + from: select.body.select.from.clone(), + where_clause: p_false, + }), + }, + CompoundSelect { + operator: CompoundOperator::UnionAll, + select: Box::new(SelectInner { + distinctness: select.body.select.distinctness, + columns: select.body.select.columns.clone(), + from: select.body.select.from.clone(), + where_clause: p_null, + }), + }, + ], + }, + limit: None, + }; + + let select = Interaction::Query(Query::Select(select.clone())); + let select_tlp = Interaction::Query(Query::Select(select_tlp)); + + // select and select_tlp should return the same rows + let assertion = Interaction::Assertion(Assertion { + message: "select and select_tlp should return the same rows".to_string(), + func: Box::new(move |stack: &Vec, _: &SimulatorEnv| { + if stack.len() < 2 { + return Err(LimboError::InternalError( + "Not enough result sets on the stack".to_string(), + )); + } + + let select_result_set = stack.get(stack.len() - 2).unwrap(); + let select_tlp_result_set = stack.last().unwrap(); + + match (select_result_set, select_tlp_result_set) { + (Ok(select_rows), Ok(select_tlp_rows)) => { + if select_rows.len() != select_tlp_rows.len() { + return Ok(false); + } + // Check if any row in select_rows is not in select_tlp_rows + for row in select_rows.iter() { + if !select_tlp_rows.iter().any(|r| r == row) { + tracing::debug!( + "select and select_tlp returned different rows, ({}) is in select but not in select_tlp", + row.iter().map(|v| v.to_string()).collect::>().join(", ") + ); + return Ok(false); + } + } + // Check if any row in select_tlp_rows is not in select_rows + for row in select_tlp_rows.iter() { + if !select_rows.iter().any(|r| r == row) { + tracing::debug!( + "select and select_tlp returned different rows, ({}) is in select_tlp but not in select", + row.iter().map(|v| v.to_string()).collect::>().join(", ") + ); + + return Ok(false); + } + } + // If we reach here, the rows are the same + tracing::trace!( + "select and select_tlp returned the same rows: {:?}", + select_rows + ); + + Ok(true) + } + (Err(e), _) | (_, Err(e)) => { + tracing::error!("Error in select or select_tlp: {}", e); + Err(LimboError::InternalError(e.to_string())) + } + } + }), + }); + + vec![assumption, select, select_tlp, assertion] + } } } } fn assert_all_table_values(tables: &[String]) -> impl Iterator + use<'_> { let checks = tables.iter().flat_map(|table| { - let select = Interaction::Query(Query::Select(Select { - table: table.clone(), - result_columns: vec![ResultColumn::Star], - predicate: Predicate::true_(), - limit: None, - distinct: Distinctness::All, - })); + let select = Interaction::Query(Query::Select(Select::simple( + table.clone(), + Predicate::true_(), + ))); + let assertion = Interaction::Assertion(Assertion { message: format!( "table {} should contain all of its values after the wal reopened", @@ -589,19 +730,28 @@ fn property_insert_values_select( continue; } } + Query::Update(Update { + table: t, + set_values: _, + predicate, + }) => { + // The inserted row will not be updated. + if t == &table.name && predicate.test(&row, table) { + continue; + } + } _ => (), } queries.push(query); } // Select the row - let select_query = Select { - table: table.name.clone(), - result_columns: vec![ResultColumn::Star], - predicate: Predicate::arbitrary_from(rng, (table, &row)), - limit: None, - distinct: Distinctness::All, - }; + let select_query = Select::simple( + table.name.clone(), + Predicate::arbitrary_from(rng, (table, &row)), + ); + + println!("Select query: {select_query:?}"); Property::InsertValuesSelect { insert: insert_query, @@ -614,14 +764,16 @@ fn property_insert_values_select( fn property_select_limit(rng: &mut R, env: &SimulatorEnv) -> Property { // Get a random table let table = pick(&env.tables, rng); + println!("Selected table for 'property_select_limit': {}", table.name); // Select the table - let select = Select { - table: table.name.clone(), - result_columns: vec![ResultColumn::Star], - predicate: Predicate::arbitrary_from(rng, table), - limit: Some(rng.gen_range(1..=5)), - distinct: Distinctness::All, - }; + let select = Select::single( + table.name.clone(), + vec![ResultColumn::Star], + Predicate::arbitrary_from(rng, table), + Some(rng.gen_range(1..=5)), + Distinctness::All, + ); + println!("Select query for 'property_select_limit': {select:?}"); Property::SelectLimit { select } } @@ -726,13 +878,7 @@ fn property_drop_select( queries.push(query); } - let select = Select { - table: table.name.clone(), - result_columns: vec![ResultColumn::Star], - predicate: Predicate::arbitrary_from(rng, table), - limit: None, - distinct: Distinctness::All, - }; + let select = Select::simple(table.name.clone(), Predicate::arbitrary_from(rng, table)); Property::DropSelect { table: table.name.clone(), @@ -759,6 +905,21 @@ fn property_select_select_optimizer(rng: &mut R, env: &SimulatorEn } } +fn property_where_true_false_null(rng: &mut R, env: &SimulatorEnv) -> Property { + // Get a random table + let table = pick(&env.tables, rng); + // Generate a random predicate + let p1 = Predicate::arbitrary_from(rng, table); + let p2 = Predicate::arbitrary_from(rng, table); + + // Create the select query + let select = Select::simple(table.name.clone(), p1); + + Property::WhereTrueFalseNull { + select, + predicate: p2, + } +} fn property_fsync_no_wait( rng: &mut R, env: &SimulatorEnv, @@ -838,6 +999,14 @@ impl ArbitraryFrom<(&SimulatorEnv, &InteractionStats)> for Property { }, Box::new(|rng: &mut R| property_select_select_optimizer(rng, env)), ), + ( + if !env.opts.disable_where_true_false_null { + remaining_.read / 2.0 + } else { + 0.0 + }, + Box::new(|rng: &mut R| property_where_true_false_null(rng, env)), + ), ( if !env.opts.disable_fsync_no_wait { 50.0 // Freestyle number diff --git a/simulator/generation/query.rs b/simulator/generation/query.rs index 0cc1083d1..e875d2c99 100644 --- a/simulator/generation/query.rs +++ b/simulator/generation/query.rs @@ -1,5 +1,3 @@ -use std::collections::HashSet; - use crate::generation::{Arbitrary, ArbitraryFrom}; use crate::model::query::predicate::Predicate; use crate::model::query::select::{Distinctness, ResultColumn}; @@ -23,13 +21,13 @@ impl Arbitrary for Create { impl ArbitraryFrom<&SimulatorEnv> for Select { fn arbitrary_from(rng: &mut R, env: &SimulatorEnv) -> Self { let table = pick(&env.tables, rng); - Self { - table: table.name.clone(), - result_columns: vec![ResultColumn::Star], - predicate: Predicate::arbitrary_from(rng, table), - limit: Some(rng.gen_range(0..=1000)), - distinct: Distinctness::All, - } + Self::single( + table.name.clone(), + vec![ResultColumn::Star], + Predicate::arbitrary_from(rng, table), + Some(rng.gen_range(0..=1000)), + Distinctness::All, + ) } } @@ -59,13 +57,10 @@ impl ArbitraryFrom<&SimulatorEnv> for Insert { let row = pick(&select_table.rows, rng); let predicate = Predicate::arbitrary_from(rng, (select_table, row)); // Pick another table to insert into - let select = Select { - table: select_table.name.clone(), - result_columns: vec![ResultColumn::Star], + let select = Select::simple( + select_table.name.clone(), predicate, - limit: None, - distinct: Distinctness::All, - }; + ); let table = pick(&env.tables, rng); Some(Insert::Select { table: table.name.clone(), @@ -121,6 +116,10 @@ impl ArbitraryFrom<(&SimulatorEnv, &Remaining)> for Query { remaining.write, Box::new(|rng| Self::Insert(Insert::arbitrary_from(rng, env))), ), + ( + remaining.update, + Box::new(|rng| Self::Update(Update::arbitrary_from(rng, env))), + ), ( f64::min(remaining.write, remaining.delete), Box::new(|rng| Self::Delete(Delete::arbitrary_from(rng, env))), @@ -134,18 +133,10 @@ impl ArbitraryFrom<(&SimulatorEnv, &Remaining)> for Query { impl ArbitraryFrom<&SimulatorEnv> for Update { fn arbitrary_from(rng: &mut R, env: &SimulatorEnv) -> Self { let table = pick(&env.tables, rng); - let mut seen = HashSet::new(); let num_cols = rng.gen_range(1..=table.columns.len()); let set_values: Vec<(String, SimValue)> = (0..num_cols) .map(|_| { - let column = loop { - let column = pick(&table.columns, rng); - if seen.contains(&column.name) { - continue; - } - break column; - }; - seen.insert(column.name.clone()); + let column = pick(&table.columns, rng); ( column.name.clone(), SimValue::arbitrary_from(rng, &column.column_type), diff --git a/simulator/main.rs b/simulator/main.rs index 8b5e8e47b..8ce1851c7 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -22,6 +22,8 @@ use tracing_subscriber::fmt::format; use tracing_subscriber::EnvFilter; use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; +use crate::generation::Shadow; + mod generation; mod model; mod runner; @@ -246,7 +248,7 @@ fn watch_mode( fn run_simulator( seed: u64, - bugbase: Option<&mut BugBase>, + mut bugbase: Option<&mut BugBase>, cli_opts: &SimulatorCLI, paths: &Paths, env: SimulatorEnv, @@ -321,6 +323,19 @@ fn run_simulator( } tracing::error!("simulation failed: '{}'", error); + + if cli_opts.disable_shrinking { + tracing::info!("shrinking is disabled, skipping shrinking"); + if let Some(bugbase) = bugbase.as_deref_mut() { + bugbase + .add_bug(seed, plans[0].clone(), Some(error.clone()), cli_opts) + .unwrap(); + } + return Err(anyhow!("failed with error: '{}'", error)); + + } + + tracing::info!("Starting to shrink"); let shrunk_plans = plans @@ -334,6 +349,7 @@ fn run_simulator( // Write the shrunk plan to a file let mut f = std::fs::File::create(&paths.shrunk_plan).unwrap(); + tracing::trace!("writing shrunk plan to {}", paths.shrunk_plan.display()); f.write_all(shrunk_plans[0].to_string().as_bytes()).unwrap(); let last_execution = Arc::new(Mutex::new(*last_execution)); @@ -360,17 +376,24 @@ fn run_simulator( SandboxedResult::FoundBug { error: e1, .. }, SandboxedResult::FoundBug { error: e2, .. }, ) => { + if let Some(bugbase) = bugbase.as_deref_mut() { + tracing::trace!( + "adding bug to bugbase, seed: {}, plan: {}, error: {}", + seed, + plans[0].plan.len(), + error + ); + bugbase + .add_bug(seed, plans[0].clone(), Some(error.clone()), cli_opts) + .unwrap(); + } + if e1 != e2 { tracing::error!( ?shrunk, ?result, "shrinking failed, the error was not properly reproduced" ); - if let Some(bugbase) = bugbase { - bugbase - .add_bug(seed, plans[0].clone(), Some(error.clone()), cli_opts) - .unwrap(); - } Err(anyhow!("failed with error: '{}'", error)) } else { tracing::info!( @@ -379,15 +402,13 @@ fn run_simulator( shrunk_plans[0].plan.len() ); // Save the shrunk database - if let Some(bugbase) = bugbase { - bugbase - .add_bug( - seed, - shrunk_plans[0].clone(), - Some(e1.clone()), - cli_opts, - ) - .unwrap(); + if let Some(bugbase) = bugbase.as_deref_mut() { + bugbase.make_shrunk( + seed, + cli_opts, + shrunk_plans[0].clone(), + Some(e1.clone()), + )?; } Err(anyhow!("failed with error: '{}'", e1)) } diff --git a/simulator/model/query/create.rs b/simulator/model/query/create.rs index eba5842ab..2d6c13a42 100644 --- a/simulator/model/query/create.rs +++ b/simulator/model/query/create.rs @@ -3,6 +3,7 @@ use std::fmt::Display; use serde::{Deserialize, Serialize}; use crate::{ + generation::Shadow, model::table::{SimValue, Table}, SimulatorEnv, }; @@ -12,13 +13,21 @@ pub(crate) struct Create { pub(crate) table: Table, } -impl Create { - pub(crate) fn shadow(&self, env: &mut SimulatorEnv) -> Vec> { +impl Shadow for Create { + type Result = anyhow::Result>>; + + fn shadow(&self, env: &mut SimulatorEnv) -> Self::Result { if !env.tables.iter().any(|t| t.name == self.table.name) { env.tables.push(self.table.clone()); + Ok(vec![]) + } else { + Err(anyhow::anyhow!( + "Table {} already exists. CREATE TABLE statement ignored.", + self.table.name + )) } - vec![] + } } diff --git a/simulator/model/query/delete.rs b/simulator/model/query/delete.rs index df9dc6f8e..e044dbb0e 100644 --- a/simulator/model/query/delete.rs +++ b/simulator/model/query/delete.rs @@ -2,7 +2,7 @@ use std::fmt::Display; use serde::{Deserialize, Serialize}; -use crate::{model::table::SimValue, SimulatorEnv}; +use crate::{generation::Shadow, model::table::SimValue, SimulatorEnv}; use super::predicate::Predicate; @@ -12,19 +12,25 @@ pub(crate) struct Delete { pub(crate) predicate: Predicate, } -impl Delete { - pub(crate) fn shadow(&self, env: &mut SimulatorEnv) -> Vec> { - let table = env - .tables - .iter_mut() - .find(|t| t.name == self.table) - .unwrap(); +impl Shadow for Delete { + type Result = anyhow::Result>>; - let t2 = table.clone(); + fn shadow(&self, env: &mut SimulatorEnv) -> Self::Result { + let table = env.tables.iter_mut().find(|t| t.name == self.table); - table.rows.retain_mut(|r| !self.predicate.test(r, &t2)); + if let Some(table) = table { + // If the table exists, we can delete from it + let t2 = table.clone(); + table.rows.retain_mut(|r| !self.predicate.test(r, &t2)); + } else { + // If the table does not exist, we return an error + return Err(anyhow::anyhow!( + "Table {} does not exist. DELETE statement ignored.", + self.table + )); + } - vec![] + Ok(vec![]) } } diff --git a/simulator/model/query/drop.rs b/simulator/model/query/drop.rs index 2731586da..92cb0baea 100644 --- a/simulator/model/query/drop.rs +++ b/simulator/model/query/drop.rs @@ -2,17 +2,28 @@ use std::fmt::Display; use serde::{Deserialize, Serialize}; -use crate::{model::table::SimValue, SimulatorEnv}; +use crate::{generation::Shadow, model::table::SimValue, SimulatorEnv}; #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub(crate) struct Drop { pub(crate) table: String, } -impl Drop { - pub(crate) fn shadow(&self, env: &mut SimulatorEnv) -> Vec> { +impl Shadow for Drop { + type Result = anyhow::Result>>; + + fn shadow(&self, env: &mut SimulatorEnv) -> Self::Result { + if !env.tables.iter().any(|t| t.name == self.table) { + // If the table does not exist, we return an error + return Err(anyhow::anyhow!( + "Table {} does not exist. DROP statement ignored.", + self.table + )); + } + env.tables.retain(|t| t.name != self.table); - vec![] + + Ok(vec![]) } } diff --git a/simulator/model/query/insert.rs b/simulator/model/query/insert.rs index 47ad98e47..f9e119f76 100644 --- a/simulator/model/query/insert.rs +++ b/simulator/model/query/insert.rs @@ -2,7 +2,7 @@ use std::fmt::Display; use serde::{Deserialize, Serialize}; -use crate::{model::table::SimValue, SimulatorEnv}; +use crate::{generation::Shadow, model::table::SimValue, SimulatorEnv}; use super::select::Select; @@ -18,25 +18,39 @@ pub(crate) enum Insert { }, } -impl Insert { - pub(crate) fn shadow(&self, env: &mut SimulatorEnv) -> Vec> { +impl Shadow for Insert { + type Result = anyhow::Result>>; + + fn shadow(&self, env: &mut SimulatorEnv) -> Self::Result { match self { Insert::Values { table, values } => { if let Some(t) = env.tables.iter_mut().find(|t| &t.name == table) { t.rows.extend(values.clone()); + } else { + return Err(anyhow::anyhow!( + "Table {} does not exist. INSERT statement ignored.", + table + )); } } Insert::Select { table, select } => { - let rows = select.shadow(env); + let rows = select.shadow(env)?; if let Some(t) = env.tables.iter_mut().find(|t| &t.name == table) { t.rows.extend(rows); + } else { + return Err(anyhow::anyhow!( + "Table {} does not exist. INSERT statement ignored.", + table + )); } } } - vec![] + Ok(vec![]) } +} +impl Insert { pub(crate) fn table(&self) -> &str { match self { Insert::Values { table, .. } | Insert::Select { table, .. } => table, diff --git a/simulator/model/query/mod.rs b/simulator/model/query/mod.rs index 8a52514f3..1bf59b9c3 100644 --- a/simulator/model/query/mod.rs +++ b/simulator/model/query/mod.rs @@ -10,7 +10,7 @@ use serde::{Deserialize, Serialize}; use turso_sqlite3_parser::to_sql_string::ToSqlContext; use update::Update; -use crate::{model::table::SimValue, runner::env::SimulatorEnv}; +use crate::{generation::Shadow, model::table::SimValue, runner::env::SimulatorEnv}; pub mod create; pub mod create_index; @@ -36,9 +36,9 @@ pub(crate) enum Query { impl Query { pub(crate) fn dependencies(&self) -> HashSet { match self { + Query::Select(select) => select.dependencies(), Query::Create(_) => HashSet::new(), - Query::Select(Select { table, .. }) - | Query::Insert(Insert::Select { table, .. }) + Query::Insert(Insert::Select { table, .. }) | Query::Insert(Insert::Values { table, .. }) | Query::Delete(Delete { table, .. }) | Query::Update(Update { table, .. }) @@ -51,8 +51,8 @@ impl Query { pub(crate) fn uses(&self) -> Vec { match self { Query::Create(Create { table }) => vec![table.name.clone()], - Query::Select(Select { table, .. }) - | Query::Insert(Insert::Select { table, .. }) + Query::Select(select) => select.dependencies().into_iter().collect(), + Query::Insert(Insert::Select { table, .. }) | Query::Insert(Insert::Values { table, .. }) | Query::Delete(Delete { table, .. }) | Query::Update(Update { table, .. }) @@ -60,8 +60,12 @@ impl Query { Query::CreateIndex(CreateIndex { table_name, .. }) => vec![table_name.clone()], } } +} - pub(crate) fn shadow(&self, env: &mut SimulatorEnv) -> Vec> { +impl Shadow for Query { + type Result = anyhow::Result>>; + + fn shadow(&self, env: &mut SimulatorEnv) -> Self::Result { match self { Query::Create(create) => create.shadow(env), Query::Insert(insert) => insert.shadow(env), @@ -69,7 +73,7 @@ impl Query { Query::Select(select) => select.shadow(env), Query::Update(update) => update.shadow(env), Query::Drop(drop) => drop.shadow(env), - Query::CreateIndex(create_index) => create_index.shadow(env), + Query::CreateIndex(create_index) => Ok(create_index.shadow(env)), } } } diff --git a/simulator/model/query/predicate.rs b/simulator/model/query/predicate.rs index 1b342939f..481f0dd17 100644 --- a/simulator/model/query/predicate.rs +++ b/simulator/model/query/predicate.rs @@ -13,11 +13,59 @@ pub struct Predicate(pub ast::Expr); impl Predicate { pub(crate) fn true_() -> Self { - Self(ast::Expr::Literal(ast::Literal::Numeric("1".to_string()))) + Self(ast::Expr::Literal(ast::Literal::Keyword("TRUE".to_string()))) } pub(crate) fn false_() -> Self { - Self(ast::Expr::Literal(ast::Literal::Numeric("0".to_string()))) + Self(ast::Expr::Literal(ast::Literal::Keyword("FALSE".to_string()))) + } + pub(crate) fn null() -> Self { + Self(ast::Expr::Literal(ast::Literal::Null)) + } + + pub(crate) fn not(predicate: Predicate) -> Self { + let expr = ast::Expr::Unary(ast::UnaryOperator::Not, Box::new(predicate.0)); + Self(expr) + } + + pub(crate) fn and(predicates: Vec) -> Self { + if predicates.is_empty() { + Self::true_() + } else if predicates.len() == 1 { + predicates.into_iter().next().unwrap() + } else { + let expr = ast::Expr::Binary( + Box::new(predicates[0].0.clone()), + ast::Operator::And, + Box::new(Self::and(predicates[1..].to_vec()).0), + ); + Self(expr) + } + } + + pub(crate) fn or(predicates: Vec) -> Self { + if predicates.is_empty() { + Self::false_() + } else if predicates.len() == 1 { + predicates.into_iter().next().unwrap() + } else { + let expr = ast::Expr::Binary( + Box::new(predicates[0].0.clone()), + ast::Operator::Or, + Box::new(Self::or(predicates[1..].to_vec()).0), + ); + Self(expr) + } + } + + pub(crate) fn eq(lhs: Predicate, rhs: Predicate) -> Self { + let expr = ast::Expr::Binary(Box::new(lhs.0), ast::Operator::Equals, Box::new(rhs.0)); + Self(expr) + } + + pub(crate) fn is(lhs: Predicate, rhs: Predicate) -> Self { + let expr = ast::Expr::Binary(Box::new(lhs.0), ast::Operator::Is, Box::new(rhs.0)); + Self(expr) } pub(crate) fn test(&self, row: &[SimValue], table: &Table) -> bool { diff --git a/simulator/model/query/select.rs b/simulator/model/query/select.rs index f51db22ed..b6580f689 100644 --- a/simulator/model/query/select.rs +++ b/simulator/model/query/select.rs @@ -1,20 +1,22 @@ -use std::fmt::Display; +use std::{collections::HashSet, fmt::Display}; +use anyhow::Context; +pub use ast::Distinctness; +use itertools::Itertools; use serde::{Deserialize, Serialize}; +use turso_sqlite3_parser::{ast, to_sql_string::ToSqlString}; -use crate::{model::table::SimValue, SimulatorEnv}; +use crate::{ + generation::Shadow, + model::{ + query::EmptyContext, + table::{SimValue, Table}, + }, + SimulatorEnv, +}; use super::predicate::Predicate; -/// `SELECT` distinctness -#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] -pub(crate) enum Distinctness { - /// `DISTINCT` - Distinct, - /// `ALL` - All, -} - /// `SELECT` or `RETURNING` result column // https://sqlite.org/syntax/result-column.html #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] @@ -38,43 +40,379 @@ impl Display for ResultColumn { } #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub(crate) struct Select { - pub(crate) table: String, - pub(crate) result_columns: Vec, - pub(crate) predicate: Predicate, - pub(crate) distinct: Distinctness, + pub(crate) body: SelectBody, pub(crate) limit: Option, } impl Select { - pub(crate) fn shadow(&self, env: &mut SimulatorEnv) -> Vec> { - let table = env.tables.iter().find(|t| t.name == self.table.as_str()); - if let Some(table) = table { - table - .rows + pub fn simple(table: String, where_clause: Predicate) -> Self { + Self::single( + table, + vec![ResultColumn::Star], + where_clause, + None, + Distinctness::All, + ) + } + + pub fn single( + table: String, + result_columns: Vec, + where_clause: Predicate, + limit: Option, + distinct: Distinctness, + ) -> Self { + Select { + body: SelectBody { + select: Box::new(SelectInner { + distinctness: distinct, + columns: result_columns, + from: FromClause { + table, + joins: Vec::new(), + }, + where_clause, + }), + compounds: Vec::new(), + }, + limit, + } + } + + pub(crate) fn dependencies(&self) -> HashSet { + let mut tables = HashSet::new(); + tables.insert(self.body.select.from.table.clone()); + + tables.extend(self.body.select.from.dependencies().into_iter()); + + for compound in &self.body.compounds { + tables.extend(compound.select.from.dependencies().into_iter()); + } + + tables + } +} + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub struct SelectBody { + /// first select + pub select: Box, + /// compounds + pub compounds: Vec, +} + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub struct SelectInner { + /// `DISTINCT` + pub distinctness: Distinctness, + /// columns + pub columns: Vec, + /// `FROM` clause + pub from: FromClause, + /// `WHERE` clause + pub where_clause: Predicate, +} + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub enum CompoundOperator { + /// `UNION` + Union, + /// `UNION ALL` + UnionAll, +} + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub struct CompoundSelect { + /// operator + pub operator: CompoundOperator, + /// select + pub select: Box, +} + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub struct FromClause { + /// table + pub table: String, + /// `JOIN`ed tables + pub joins: Vec, +} + +impl FromClause { + fn to_sql_ast(&self) -> ast::FromClause { + ast::FromClause { + select: Some(Box::new(ast::SelectTable::Table( + ast::QualifiedName::single(ast::Name(self.table.clone())), + None, + None, + ))), + joins: Some( + self.joins + .iter() + .map(|join| ast::JoinedSelectTable { + operator: match join.join_type { + JoinType::Inner => { + ast::JoinOperator::TypedJoin(Some(ast::JoinType::INNER)) + } + JoinType::Left => { + ast::JoinOperator::TypedJoin(Some(ast::JoinType::LEFT)) + } + JoinType::Right => { + ast::JoinOperator::TypedJoin(Some(ast::JoinType::RIGHT)) + } + JoinType::Full => { + ast::JoinOperator::TypedJoin(Some(ast::JoinType::OUTER)) + } + JoinType::Cross => { + ast::JoinOperator::TypedJoin(Some(ast::JoinType::CROSS)) + } + }, + table: ast::SelectTable::Table( + ast::QualifiedName::single(ast::Name(join.table.clone())), + None, + None, + ), + constraint: Some(ast::JoinConstraint::On(join.on.0.clone())), + }) + .collect(), + ), + op: None, // FIXME: this is a temporary fix, we should remove this field + } + } + + pub(crate) fn dependencies(&self) -> Vec { + let mut deps = vec![self.table.clone()]; + for join in &self.joins { + deps.push(join.table.clone()); + } + deps + } +} +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub struct JoinedTable { + /// table name + pub table: String, + /// `JOIN` type + pub join_type: JoinType, + /// `ON` clause + pub on: Predicate, +} + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub enum JoinType { + Inner, + Left, + Right, + Full, + Cross, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct JoinTable { + pub tables: Vec, + pub rows: Vec>, +} + +impl JoinTable { + fn into_table(self) -> Table { + Table { + name: "".to_string(), + columns: self + .tables .iter() - .filter(|row| self.predicate.test(row, table)) - .cloned() - .collect() - } else { - vec![] + .flat_map(|t| { + t.columns.iter().map(|c| { + let mut c = c.clone(); + c.name = format!("{}.<{}", t.name, c.name); + c + }) + }) + .collect(), + rows: self.rows, } } } -impl Display for Select { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "SELECT {} FROM {} WHERE {}{}", - self.result_columns +impl Shadow for FromClause { + type Result = anyhow::Result; + fn shadow(&self, env: &mut SimulatorEnv) -> Self::Result { + let first_table = env + .tables + .iter() + .find(|t| t.name == self.table) + .context("Table not found")?; + + let mut join_table = JoinTable { + tables: vec![first_table.clone()], + rows: Vec::new(), + }; + + for join in &self.joins { + let joined_table = env + .tables .iter() - .map(ResultColumn::to_string) - .collect::>() - .join(", "), - self.table, - self.predicate, - self.limit - .map_or("".to_string(), |l| format!(" LIMIT {}", l)) - ) + .find(|t| t.name == join.table) + .context("Joined table not found")?; + + join_table.tables.push(joined_table.clone()); + + match join.join_type { + JoinType::Inner => { + // Implement inner join logic + let join_rows = joined_table + .rows + .iter() + .filter(|row| join.on.test(row, joined_table)) + .cloned() + .collect::>(); + // take a cartesian product of the rows + let mut all_row_pairs = + first_table.rows.iter().cartesian_product(join_rows.iter()); + + for (row1, row2) in all_row_pairs { + let row = row1.iter().chain(row2.iter()).cloned().collect::>(); + + let as_table = join_table.clone().into_table(); + let is_in = join.on.test(&row, &as_table); + + if is_in { + join_table.rows.push(row); + } + } + } + _ => todo!(), + } + } + Ok(join_table) } } + +impl Shadow for SelectInner { + type Result = anyhow::Result; + + fn shadow(&self, env: &mut SimulatorEnv) -> Self::Result { + let mut join_table = self.from.shadow(env)?; + let as_table = join_table.clone().into_table(); + join_table + .rows + .retain(|row| self.where_clause.test(row, &as_table)); + + if self.distinctness == Distinctness::Distinct { + join_table.rows.sort_unstable(); + join_table.rows.dedup(); + } + + Ok(join_table) + } +} + +impl Shadow for Select { + type Result = anyhow::Result>>; + + fn shadow(&self, env: &mut SimulatorEnv) -> Self::Result { + let first_result = self.body.select.shadow(env)?; + + let mut rows = first_result.into_table().rows; + + for compound in self.body.compounds.iter() { + let compound_results = compound.select.shadow(env)?; + + match compound.operator { + CompoundOperator::Union => todo!(), + CompoundOperator::UnionAll => { + // Union all means we just concatenate the results + rows.extend(compound_results.rows.into_iter()); + } + } + } + + Ok(rows) + } +} + +impl Select { + pub fn to_sql_ast(&self) -> ast::Select { + ast::Select { + with: None, + body: ast::SelectBody { + select: Box::new(ast::OneSelect::Select(Box::new(ast::SelectInner { + distinctness: Some(self.body.select.distinctness), + columns: self + .body + .select + .columns + .iter() + .map(|col| match col { + ResultColumn::Expr(expr) => { + ast::ResultColumn::Expr(expr.0.clone(), None) + } + ResultColumn::Star => ast::ResultColumn::Star, + ResultColumn::Column(name) => { + ast::ResultColumn::Expr(ast::Expr::Id(ast::Id(name.clone())), None) + } + }) + .collect(), + from: Some(self.body.select.from.to_sql_ast()), + where_clause: Some(self.body.select.where_clause.0.clone()), + group_by: None, + window_clause: None, + }))), + compounds: Some( + self.body + .compounds + .iter() + .map(|compound| ast::CompoundSelect { + operator: match compound.operator { + CompoundOperator::Union => ast::CompoundOperator::Union, + CompoundOperator::UnionAll => ast::CompoundOperator::UnionAll, + }, + select: Box::new(ast::OneSelect::Select(Box::new(ast::SelectInner { + distinctness: Some(compound.select.distinctness), + columns: compound + .select + .columns + .iter() + .map(|col| match col { + ResultColumn::Expr(expr) => { + ast::ResultColumn::Expr(expr.0.clone(), None) + } + ResultColumn::Star => ast::ResultColumn::Star, + ResultColumn::Column(name) => ast::ResultColumn::Expr( + ast::Expr::Id(ast::Id(name.clone())), + None, + ), + }) + .collect(), + from: Some(compound.select.from.to_sql_ast()), + where_clause: Some(compound.select.where_clause.0.clone()), + group_by: None, + window_clause: None, + }))), + }) + .collect(), + ), + }, + order_by: None, + limit: self.limit.map(|l| { + Box::new(ast::Limit { + expr: ast::Expr::Literal(ast::Literal::Numeric(l.to_string())), + offset: None, + }) + }), + } + } +} +impl Display for Select { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.to_sql_ast().to_sql_string(&EmptyContext {})) + } +} + +#[cfg(test)] +mod select_tests { + use super::*; + use crate::model::table::SimValue; + use crate::SimulatorEnv; + + #[test] + fn test_select_display() {} +} diff --git a/simulator/model/query/update.rs b/simulator/model/query/update.rs index 7bcff95e2..d7504005e 100644 --- a/simulator/model/query/update.rs +++ b/simulator/model/query/update.rs @@ -2,7 +2,7 @@ use std::fmt::Display; use serde::{Deserialize, Serialize}; -use crate::{model::table::SimValue, SimulatorEnv}; +use crate::{generation::Shadow, model::table::SimValue, SimulatorEnv}; use super::predicate::Predicate; @@ -13,13 +13,21 @@ pub(crate) struct Update { pub(crate) predicate: Predicate, } -impl Update { - pub(crate) fn shadow(&self, env: &mut SimulatorEnv) -> Vec> { - let table = env - .tables - .iter_mut() - .find(|t| t.name == self.table) - .unwrap(); +impl Shadow for Update { + type Result = anyhow::Result>>; + + fn shadow(&self, env: &mut SimulatorEnv) -> Self::Result { + let table = env.tables.iter_mut().find(|t| t.name == self.table); + + let table = if let Some(table) = table { + table + } else { + return Err(anyhow::anyhow!( + "Table {} does not exist. UPDATE statement ignored.", + self.table + )); + }; + let t2 = table.clone(); for row in table .rows @@ -27,17 +35,18 @@ impl Update { .filter(|r| self.predicate.test(r, &t2)) { for (column, set_value) in &self.set_values { - let (idx, _) = table + table .columns .iter() .enumerate() .find(|(_, c)| &c.name == column) - .unwrap(); - row[idx] = set_value.clone(); + .map(|(idx, _)| { + row[idx] = set_value.clone(); + }); } } - vec![] + Ok(vec![]) } } diff --git a/simulator/model/table.rs b/simulator/model/table.rs index f12cf7cb8..0686721ff 100644 --- a/simulator/model/table.rs +++ b/simulator/model/table.rs @@ -78,7 +78,7 @@ where s.parse().map_err(serde::de::Error::custom) } -#[derive(Clone, Debug, PartialEq, PartialOrd, Serialize, Deserialize)] +#[derive(Clone, Debug, PartialEq, PartialOrd, Eq, Ord, Serialize, Deserialize)] pub(crate) struct SimValue(pub turso_core::Value); fn to_sqlite_blob(bytes: &[u8]) -> String { @@ -131,9 +131,16 @@ impl SimValue { ast::Operator::Divide => self.0.exec_divide(&other.0).into(), ast::Operator::Greater => (self > other).into(), ast::Operator::GreaterEquals => (self >= other).into(), - // TODO: Should attempt to extract `Is` and `IsNot` handling in a function in Core - ast::Operator::Is => todo!(), - ast::Operator::IsNot => todo!(), + // TODO: Test these implementations + ast::Operator::Is => match (&self.0, &other.0) { + (types::Value::Null, types::Value::Null) => true.into(), + (types::Value::Null, _) => false.into(), + (_, types::Value::Null) => false.into(), + _ => self.binary_compare(other, ast::Operator::Equals), + }, + ast::Operator::IsNot => self + .binary_compare(other, ast::Operator::Is) + .unary_exec(ast::UnaryOperator::Not), ast::Operator::LeftShift => self.0.exec_shift_left(&other.0).into(), ast::Operator::Less => (self < other).into(), ast::Operator::LessEquals => (self <= other).into(), @@ -256,6 +263,12 @@ impl From<&ast::Literal> for SimValue { }) .collect(), ), + ast::Literal::Keyword(keyword) => match keyword.to_uppercase().as_str() { + "TRUE" => types::Value::Integer(1), + "FALSE" => types::Value::Integer(0), + "NULL" => types::Value::Null, + _ => unimplemented!("Unsupported keyword literal: {}", keyword), + }, lit => unimplemented!("{:?}", lit), }; Self(new_value) diff --git a/simulator/runner/bugbase.rs b/simulator/runner/bugbase.rs index 3bfb05a9c..af29e6bff 100644 --- a/simulator/runner/bugbase.rs +++ b/simulator/runner/bugbase.rs @@ -27,6 +27,8 @@ pub struct LoadedBug { pub seed: u64, /// The plan of the bug. pub plan: InteractionPlan, + /// The shrunk plan of the bug, if any. + pub shrunk_plan: Option, /// The runs of the bug. pub runs: Vec, } @@ -42,6 +44,8 @@ pub(crate) struct BugRun { pub(crate) error: Option, /// Options pub(crate) cli_options: SimulatorCLI, + /// Whether the run was a shrunk run. + pub(crate) shrunk: bool, } impl Bug { @@ -197,6 +201,7 @@ impl BugBase { timestamp: SystemTime::now().into(), error, cli_options: cli_options.clone(), + shrunk: false, }); self.bugs.insert(seed, Bug::Loaded(bug.clone())); } else { @@ -208,7 +213,9 @@ impl BugBase { timestamp: SystemTime::now().into(), error, cli_options: cli_options.clone(), + shrunk: false, }], + shrunk_plan: None, }; self.bugs.insert(seed, Bug::Loaded(bug.clone())); } @@ -246,6 +253,20 @@ impl BugBase { ) .with_context(|| "should be able to write plan file")?; + if let Some(shrunk_plan) = &bug.shrunk_plan { + let shrunk_plan_path = bug_path.join("shrunk.json"); + std::fs::write( + &shrunk_plan_path, + serde_json::to_string_pretty(shrunk_plan) + .with_context(|| "should be able to serialize shrunk plan")?, + ) + .with_context(|| "should be able to write shrunk plan file")?; + + let readable_shrunk_plan_path = bug_path.join("shrunk.sql"); + std::fs::write(&readable_shrunk_plan_path, shrunk_plan.to_string()) + .with_context(|| "should be able to write readable shrunk plan file")?; + } + let readable_plan_path = bug_path.join("plan.sql"); std::fs::write(&readable_plan_path, bug.plan.to_string()) .with_context(|| "should be able to write readable plan file")?; @@ -279,6 +300,17 @@ impl BugBase { })?; let plan: InteractionPlan = serde_json::from_str(&plan) .with_context(|| "should be able to deserialize plan")?; + + let shrunk_plan: Option = std::fs::read_to_string( + self.path.join(seed.to_string()).join("shrunk_plan.json"), + ) + .with_context(|| "should be able to read shrunk plan file") + .and_then(|shrunk| serde_json::from_str(&shrunk).map_err(|e| anyhow!("{}", e))) + .ok(); + + let shrunk_plan: Option = + shrunk_plan.and_then(|shrunk_plan| serde_json::from_str(&shrunk_plan).ok()); + let runs = std::fs::read_to_string(self.path.join(seed.to_string()).join("runs.json")) .with_context(|| "should be able to read runs file") @@ -289,6 +321,7 @@ impl BugBase { seed, plan: plan.clone(), runs, + shrunk_plan, }; self.bugs.insert(seed, Bug::Loaded(bug.clone())); @@ -313,9 +346,10 @@ impl BugBase { let bug = self.get_bug(seed); match bug { None => { - tracing::debug!("removing bug base entry for {}", seed); - std::fs::remove_dir_all(self.path.join(seed.to_string())) - .with_context(|| "should be able to remove bug directory")?; + // todo: do not forget to uncomment this + // tracing::debug!("removing bug base entry for {}", seed); + // std::fs::remove_dir_all(self.path.join(seed.to_string())) + // .with_context(|| "should be able to remove bug directory")?; } Some(_) => { let mut bug = self.load_bug(seed)?; @@ -324,6 +358,7 @@ impl BugBase { timestamp: SystemTime::now().into(), error: None, cli_options: cli_options.clone(), + shrunk: false, }); self.bugs.insert(seed, Bug::Loaded(bug.clone())); // Save the bug to the bug base. @@ -336,6 +371,29 @@ impl BugBase { Ok(()) } + pub(crate) fn make_shrunk( + &mut self, + seed: u64, + cli_options: &SimulatorCLI, + shrunk_plan: InteractionPlan, + error: Option, + ) -> anyhow::Result<()> { + let mut bug = self.load_bug(seed)?; + bug.shrunk_plan = Some(shrunk_plan); + bug.runs.push(BugRun { + hash: Self::get_current_commit_hash()?, + timestamp: SystemTime::now().into(), + error, + cli_options: cli_options.clone(), + shrunk: true, + }); + self.bugs.insert(seed, Bug::Loaded(bug.clone())); + // Save the bug to the bug base. + self.save_bug(seed) + .with_context(|| "should be able to save shrunk bug")?; + Ok(()) + } + pub(crate) fn load_bugs(&mut self) -> anyhow::Result> { let seeds = self.bugs.keys().copied().collect::>(); diff --git a/simulator/runner/cli.rs b/simulator/runner/cli.rs index 28daa1840..12226219c 100644 --- a/simulator/runner/cli.rs +++ b/simulator/runner/cli.rs @@ -48,6 +48,8 @@ pub struct SimulatorCLI { pub subcommand: Option, #[clap(long, help = "disable BugBase", default_value_t = false)] pub disable_bugbase: bool, + #[clap(long, help = "disable shrinking", default_value_t = false)] + pub disable_shrinking: bool, #[clap(long, help = "disable UPDATE Statement", default_value_t = false)] pub disable_update: bool, #[clap(long, help = "disable DELETE Statement", default_value_t = false)] @@ -82,6 +84,12 @@ pub struct SimulatorCLI { default_value_t = false )] pub disable_select_optimizer: bool, + #[clap( + long, + help = "disable Where-True-False-Null Property", + default_value_t = false + )] + pub disable_where_true_false_null: bool, #[clap(long, help = "disable FsyncNoWait Property", default_value_t = true)] pub disable_fsync_no_wait: bool, #[clap(long, help = "disable FaultyQuery Property", default_value_t = true)] diff --git a/simulator/runner/env.rs b/simulator/runner/env.rs index b85d8edb9..1edbcb701 100644 --- a/simulator/runner/env.rs +++ b/simulator/runner/env.rs @@ -117,6 +117,7 @@ impl SimulatorEnv { disable_select_limit: cli_opts.disable_select_limit, disable_delete_select: cli_opts.disable_delete_select, disable_drop_select: cli_opts.disable_drop_select, + disable_where_true_false_null: cli_opts.disable_where_true_false_null, disable_fsync_no_wait: cli_opts.disable_fsync_no_wait, disable_faulty_query: cli_opts.disable_faulty_query, page_size: 4096, // TODO: randomize this too @@ -234,6 +235,7 @@ pub(crate) struct SimulatorOpts { pub(crate) disable_select_limit: bool, pub(crate) disable_delete_select: bool, pub(crate) disable_drop_select: bool, + pub(crate) disable_where_true_false_null: bool, pub(crate) disable_fsync_no_wait: bool, pub(crate) disable_faulty_query: bool, pub(crate) disable_reopen_database: bool, diff --git a/simulator/runner/execution.rs b/simulator/runner/execution.rs index 7f8de7ddb..093d10bda 100644 --- a/simulator/runner/execution.rs +++ b/simulator/runner/execution.rs @@ -5,7 +5,7 @@ use turso_core::{Connection, LimboError, Result, StepResult}; use crate::generation::{ pick_index, - plan::{Interaction, InteractionPlan, InteractionPlanState, ResultSet}, + plan::{Interaction, InteractionPlan, InteractionPlanState, ResultSet}, Shadow as _, }; use super::env::{SimConnection, SimulatorEnv}; diff --git a/simulator/shrink/plan.rs b/simulator/shrink/plan.rs index d2d548b3b..7ebedfa71 100644 --- a/simulator/shrink/plan.rs +++ b/simulator/shrink/plan.rs @@ -72,6 +72,7 @@ impl InteractionPlan { } Property::SelectLimit { .. } | Property::SelectSelectOptimizer { .. } + | Property::WhereTrueFalseNull { .. } | Property::FsyncNoWait { .. } | Property::FaultyQuery { .. } => {} } diff --git a/vendored/sqlite3-parser/src/parser/ast/mod.rs b/vendored/sqlite3-parser/src/parser/ast/mod.rs index 956fa88bd..cc9dfcb8a 100644 --- a/vendored/sqlite3-parser/src/parser/ast/mod.rs +++ b/vendored/sqlite3-parser/src/parser/ast/mod.rs @@ -896,7 +896,7 @@ pub struct FromClause { pub select: Option>, // FIXME mandatory /// `JOIN`ed tabled pub joins: Option>, - op: Option, // FIXME transient + pub op: Option, // FIXME transient } impl FromClause { pub(crate) fn empty() -> Self { diff --git a/vendored/sqlite3-parser/src/to_sql_string/expr.rs b/vendored/sqlite3-parser/src/to_sql_string/expr.rs index e04c6bb2c..3f02d4737 100644 --- a/vendored/sqlite3-parser/src/to_sql_string/expr.rs +++ b/vendored/sqlite3-parser/src/to_sql_string/expr.rs @@ -30,11 +30,13 @@ impl ToSqlString for Expr { ret.push_str(&end.to_sql_string(context)); } Expr::Binary(lhs, op, rhs) => { + ret.push('('); ret.push_str(&lhs.to_sql_string(context)); ret.push(' '); ret.push_str(&op.to_string()); ret.push(' '); ret.push_str(&rhs.to_sql_string(context)); + ret.push(')'); } Expr::Case { base,