From 44a2fa60a3e0061498258f511d0a6d578023ae58 Mon Sep 17 00:00:00 2001 From: alpaylan Date: Wed, 9 Jul 2025 00:14:13 -0400 Subject: [PATCH] add union all cardinality preservation, start generating INSERT INTO t SELECT... queries, add integrity checks to the end of run_simulation --- simulator/generation/property.rs | 132 ++++++++++++++++++++++++++----- simulator/generation/query.rs | 5 +- simulator/main.rs | 14 +++- simulator/model/query/select.rs | 12 +++ simulator/runner/cli.rs | 6 ++ simulator/runner/env.rs | 2 + simulator/runner/execution.rs | 12 ++- simulator/runner/watch.rs | 17 +++- simulator/shrink/plan.rs | 1 + 9 files changed, 163 insertions(+), 38 deletions(-) diff --git a/simulator/generation/property.rs b/simulator/generation/property.rs index c1e59d5e4..05f8f6e06 100644 --- a/simulator/generation/property.rs +++ b/simulator/generation/property.rs @@ -139,6 +139,16 @@ pub(crate) enum Property { select: Select, predicate: Predicate, }, + /// UNION-ALL-Preserves-Cardinality is a property that tests the UNION ALL operator + /// implementation in the database. It relies on the fact that `SELECT * FROM WHERE UNION ALL SELECT * FROM WHERE ` + /// should return the same number of rows as `SELECT FROM WHERE `. + /// The property is succesfull when the UNION ALL of 2 select queries returns the same number of rows + /// as the sum of the two select queries. + UNIONAllPreservesCardinality { + select: Select, + where_clause: Predicate, + }, /// FsyncNoWait is a property which tests if we do not loose any data after not waiting for fsync. /// /// # Interactions @@ -169,6 +179,7 @@ impl Property { Property::WhereTrueFalseNull { .. } => "Where-True-False-Null", Property::FsyncNoWait { .. } => "FsyncNoWait", Property::FaultyQuery { .. } => "FaultyQuery", + Property::UNIONAllPreservesCardinality { .. } => "UNION-All-Preserves-Cardinality", } } /// interactions construct a list of interactions, which is an executable representation of the property. @@ -250,17 +261,17 @@ impl Property { let table_name = create.table.name.clone(); let assertion = Interaction::Assertion(Assertion { - message: - "creating two tables with the name should result in a failure for the second query" - .to_string(), - func: Box::new(move |stack: &Vec, _: &SimulatorEnv| { - let last = stack.last().unwrap(); - match last { - Ok(_) => Ok(false), - Err(e) => Ok(e.to_string().to_lowercase().contains(&format!("table {table_name} already exists"))), - } - }), - }); + message: + "creating two tables with the name should result in a failure for the second query" + .to_string(), + func: Box::new(move |stack: &Vec, _: &SimulatorEnv| { + let last = stack.last().unwrap(); + match last { + Ok(_) => Ok(false), + Err(e) => Ok(e.to_string().to_lowercase().contains(&format!("table {table_name} already exists"))), + } + }), + }); let mut interactions = Vec::new(); interactions.push(assumption); @@ -272,9 +283,15 @@ impl Property { interactions } Property::SelectLimit { select } => { - let assumption = Interaction::Assumption(Assertion { - message: format!("table ({}) exists", select.dependencies().into_iter().collect::>().join(", ")), + message: format!( + "table ({}) exists", + select + .dependencies() + .into_iter() + .collect::>() + .join(", ") + ), func: Box::new({ let table_name = select.dependencies(); move |_: &Vec, env: &SimulatorEnv| { @@ -433,8 +450,8 @@ impl Property { // If rows1 results have more than 1 column, there is a problem if rows1.iter().any(|vs| vs.len() > 1) { return Err(LimboError::InternalError( - "Select query without the star should return only one column".to_string(), - )); + "Select query without the star should return only one column".to_string(), + )); } // Count the 1s in the select query without the star let rows1_count = rows1 @@ -565,9 +582,9 @@ impl Property { for row in select_rows.iter() { if !select_tlp_rows.iter().any(|r| r == row) { tracing::debug!( - "select and select_tlp returned different rows, ({}) is in select but not in select_tlp", - row.iter().map(|v| v.to_string()).collect::>().join(", ") - ); + "select and select_tlp returned different rows, ({}) is in select but not in select_tlp", + row.iter().map(|v| v.to_string()).collect::>().join(", ") + ); return Ok(false); } } @@ -575,9 +592,9 @@ impl Property { for row in select_tlp_rows.iter() { if !select_rows.iter().any(|r| r == row) { tracing::debug!( - "select and select_tlp returned different rows, ({}) is in select_tlp but not in select", - row.iter().map(|v| v.to_string()).collect::>().join(", ") - ); + "select and select_tlp returned different rows, ({}) is in select_tlp but not in select", + row.iter().map(|v| v.to_string()).collect::>().join(", ") + ); return Ok(false); } @@ -600,6 +617,45 @@ impl Property { vec![assumption, select, select_tlp, assertion] } + Property::UNIONAllPreservesCardinality { select, where_clause } => { + let s1 = select.clone(); + let mut s2 = select.clone(); + s2.body.select.where_clause = where_clause.clone(); + let s3 = Select::compound(s1.clone(), s2.clone(), CompoundOperator::UnionAll); + + vec![ + Interaction::Query(Query::Select(s1.clone())), + Interaction::Query(Query::Select(s2.clone())), + Interaction::Query(Query::Select(s3.clone())), + Interaction::Assertion(Assertion { + message: "UNION ALL should preserve cardinality".to_string(), + func: Box::new(move |stack: &Vec, _: &SimulatorEnv| { + if stack.len() < 3 { + return Err(LimboError::InternalError( + "Not enough result sets on the stack".to_string(), + )); + } + + let select1 = stack.get(stack.len() - 3).unwrap(); + let select2 = stack.get(stack.len() - 2).unwrap(); + let union_all = stack.last().unwrap(); + + match (select1, select2, union_all) { + (Ok(rows1), Ok(rows2), Ok(union_rows)) => { + let count1 = rows1.len(); + let count2 = rows2.len(); + let union_count = union_rows.len(); + Ok(union_count == count1 + count2) + } + (Err(e), _, _) | (_, Err(e), _) | (_, _, Err(e)) => { + tracing::error!("Error in select queries: {}", e); + Err(LimboError::InternalError(e.to_string())) + } + } + }), + }), + ] + } } } } @@ -916,6 +972,32 @@ fn property_where_true_false_null(rng: &mut R, env: &SimulatorEnv) predicate: p2, } } + +fn property_union_all_preserves_cardinality( + rng: &mut R, + env: &SimulatorEnv, +) -> Property { + // Get a random table + let table = pick(&env.tables, rng); + // Generate a random predicate + let p1 = Predicate::arbitrary_from(rng, table); + let p2 = Predicate::arbitrary_from(rng, table); + + // Create the select query + let select = Select::single( + table.name.clone(), + vec![ResultColumn::Star], + p1, + None, + Distinctness::All, + ); + + Property::UNIONAllPreservesCardinality { + select, + where_clause: p2, + } +} + fn property_fsync_no_wait( rng: &mut R, env: &SimulatorEnv, @@ -1003,6 +1085,14 @@ impl ArbitraryFrom<(&SimulatorEnv, &InteractionStats)> for Property { }, Box::new(|rng: &mut R| property_where_true_false_null(rng, env)), ), + ( + if !env.opts.disable_union_all_preserves_cardinality { + remaining_.read / 3.0 + } else { + 0.0 + }, + Box::new(|rng: &mut R| property_union_all_preserves_cardinality(rng, env)), + ), ( if !env.opts.disable_fsync_no_wait { 50.0 // Freestyle number diff --git a/simulator/generation/query.rs b/simulator/generation/query.rs index 1b5d5f35d..75e00a2a6 100644 --- a/simulator/generation/query.rs +++ b/simulator/generation/query.rs @@ -164,7 +164,7 @@ impl ArbitraryFrom<&SimulatorEnv> for Insert { }) }; - let _gen_select = |rng: &mut R| { + let gen_select = |rng: &mut R| { // Find a non-empty table let select_table = env.tables.iter().find(|t| !t.rows.is_empty())?; let row = pick(&select_table.rows, rng); @@ -182,8 +182,7 @@ impl ArbitraryFrom<&SimulatorEnv> for Insert { backtrack( vec![ (1, Box::new(gen_values)), - // todo: test and enable this once `INSERT INTO SELECT * FROM
` is supported - // (1, Box::new(|rng| gen_select(rng))), + (1, Box::new(|rng| gen_select(rng))), ], rng, ) diff --git a/simulator/main.rs b/simulator/main.rs index 3d7001b73..306580278 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -332,10 +332,8 @@ fn run_simulator( .unwrap(); } return Err(anyhow!("failed with error: '{}'", error)); - } - tracing::info!("Starting to shrink"); let shrunk_plans = plans @@ -708,13 +706,23 @@ fn run_simulation( secondary_pointer: 0, }) .collect::>(); - let result = execute_plans(env.clone(), plans, &mut states, last_execution); + let mut result = execute_plans(env.clone(), plans, &mut states, last_execution); let env = env.lock().unwrap(); env.io.print_stats(); tracing::info!("Simulation completed"); + if result.error.is_none() { + let ic = integrity_check(&PathBuf::from(env.db_path.as_str())); + if let Err(err) = ic { + tracing::error!("integrity check failed: {}", err); + result.error = Some(turso_core::LimboError::InternalError(err.to_string())); + } else { + tracing::info!("integrity check passed"); + } + } + result } diff --git a/simulator/model/query/select.rs b/simulator/model/query/select.rs index 83f5ab55e..277c6017b 100644 --- a/simulator/model/query/select.rs +++ b/simulator/model/query/select.rs @@ -78,6 +78,18 @@ impl Select { } } + pub fn compound(left: Select, right: Select, operator: CompoundOperator) -> Self { + let mut body = left.body; + body.compounds.push(CompoundSelect { + operator, + select: Box::new(right.body.select.as_ref().clone()), + }); + Select { + body, + limit: left.limit.or(right.limit), + } + } + pub(crate) fn dependencies(&self) -> HashSet { let mut tables = HashSet::new(); tables.insert(self.body.select.from.table.clone()); diff --git a/simulator/runner/cli.rs b/simulator/runner/cli.rs index 12226219c..42a297d6c 100644 --- a/simulator/runner/cli.rs +++ b/simulator/runner/cli.rs @@ -90,6 +90,12 @@ pub struct SimulatorCLI { default_value_t = false )] pub disable_where_true_false_null: bool, + #[clap( + long, + help = "disable UNION ALL preserves cardinality Property", + default_value_t = false + )] + pub disable_union_all_preserves_cardinality: bool, #[clap(long, help = "disable FsyncNoWait Property", default_value_t = true)] pub disable_fsync_no_wait: bool, #[clap(long, help = "disable FaultyQuery Property", default_value_t = true)] diff --git a/simulator/runner/env.rs b/simulator/runner/env.rs index 0a0d8b184..3e71320f7 100644 --- a/simulator/runner/env.rs +++ b/simulator/runner/env.rs @@ -118,6 +118,7 @@ impl SimulatorEnv { disable_delete_select: cli_opts.disable_delete_select, disable_drop_select: cli_opts.disable_drop_select, disable_where_true_false_null: cli_opts.disable_where_true_false_null, + disable_union_all_preserves_cardinality: cli_opts.disable_union_all_preserves_cardinality, disable_fsync_no_wait: cli_opts.disable_fsync_no_wait, disable_faulty_query: cli_opts.disable_faulty_query, page_size: 4096, // TODO: randomize this too @@ -236,6 +237,7 @@ pub(crate) struct SimulatorOpts { pub(crate) disable_delete_select: bool, pub(crate) disable_drop_select: bool, pub(crate) disable_where_true_false_null: bool, + pub(crate) disable_union_all_preserves_cardinality: bool, pub(crate) disable_fsync_no_wait: bool, pub(crate) disable_faulty_query: bool, pub(crate) disable_reopen_database: bool, diff --git a/simulator/runner/execution.rs b/simulator/runner/execution.rs index 67f8c046a..7d6aa799f 100644 --- a/simulator/runner/execution.rs +++ b/simulator/runner/execution.rs @@ -65,16 +65,14 @@ pub(crate) fn execute_plans( let now = std::time::Instant::now(); env.clear_poison(); let mut env = env.lock().unwrap(); + + env.tables.clear(); + for _tick in 0..env.opts.ticks { // Pick the connection to interact with let connection_index = pick_index(env.connections.len(), &mut env.rng); let state = &mut states[connection_index]; - std::thread::sleep(std::time::Duration::from_millis( - std::env::var("TICK_SLEEP") - .unwrap_or("0".into()) - .parse() - .unwrap_or(0), - )); + history.history.push(Execution::new( connection_index, state.interaction_pointer, @@ -129,7 +127,7 @@ fn execute_plan( tracing::debug!("connection {} already connected", connection_index); match execute_interaction(env, connection_index, interaction, &mut state.stack) { Ok(next_execution) => { - interaction.shadow(&mut env.tables); + let _ = interaction.shadow(&mut env.tables); tracing::debug!("connection {} processed", connection_index); // Move to the next interaction or property match next_execution { diff --git a/simulator/runner/watch.rs b/simulator/runner/watch.rs index 7c3795f01..643c5bbad 100644 --- a/simulator/runner/watch.rs +++ b/simulator/runner/watch.rs @@ -1,11 +1,10 @@ -use std::sync::{Arc, Mutex}; +use std::{path::PathBuf, sync::{Arc, Mutex}}; use crate::{ generation::{ pick_index, plan::{Interaction, InteractionPlanState}, - }, - runner::execution::ExecutionContinuation, + }, integrity_check, runner::execution::ExecutionContinuation }; use super::{ @@ -26,13 +25,23 @@ pub(crate) fn run_simulation( secondary_pointer: 0, }) .collect::>(); - let result = execute_plans(env.clone(), plans, &mut states, last_execution); + let mut result = execute_plans(env.clone(), plans, &mut states, last_execution); let env = env.lock().unwrap(); env.io.print_stats(); tracing::info!("Simulation completed"); + if result.error.is_none() { + let ic = integrity_check(&PathBuf::from(env.db_path.as_str())); + if let Err(err) = ic { + tracing::error!("integrity check failed: {}", err); + result.error = Some(turso_core::LimboError::InternalError(err.to_string())); + } else { + tracing::info!("integrity check passed"); + } + } + result } diff --git a/simulator/shrink/plan.rs b/simulator/shrink/plan.rs index 7ebedfa71..2a592e612 100644 --- a/simulator/shrink/plan.rs +++ b/simulator/shrink/plan.rs @@ -73,6 +73,7 @@ impl InteractionPlan { Property::SelectLimit { .. } | Property::SelectSelectOptimizer { .. } | Property::WhereTrueFalseNull { .. } + | Property::UNIONAllPreservesCardinality { .. } | Property::FsyncNoWait { .. } | Property::FaultyQuery { .. } => {} }