diff --git a/core/translate/aggregation.rs b/core/translate/aggregation.rs index 994b73a33..e16d4e608 100644 --- a/core/translate/aggregation.rs +++ b/core/translate/aggregation.rs @@ -53,7 +53,7 @@ pub fn emit_ungrouped_aggregation<'a>( t_ctx.reg_nonagg_emit_once_flag, t_ctx.reg_offset, t_ctx.reg_result_cols_start.unwrap(), - t_ctx.reg_limit, + t_ctx.limit_ctx, t_ctx.reg_limit_offset_sum, )?; diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 919e607cc..77274df84 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -62,6 +62,32 @@ impl<'a> Resolver<'a> { } } +#[derive(Debug, Clone, Copy)] +pub struct LimitCtx { + /// Register holding the LIMIT value (e.g. LIMIT 5) + pub reg_limit: usize, + /// Whether to initialize the LIMIT counter to the LIMIT value; + /// There are cases like compound SELECTs where all the sub-selects + /// utilize the same limit register, but it is initialized only once. + pub initialize_counter: bool, +} + +impl LimitCtx { + pub fn new(program: &mut ProgramBuilder) -> Self { + Self { + reg_limit: program.alloc_register(), + initialize_counter: true, + } + } + + pub fn new_shared(reg_limit: usize) -> Self { + Self { + reg_limit, + initialize_counter: false, + } + } +} + /// The TranslateCtx struct holds various information and labels used during bytecode generation. /// It is used for maintaining state and control flow during the bytecode /// generation process. @@ -80,8 +106,7 @@ pub struct TranslateCtx<'a> { pub reg_nonagg_emit_once_flag: Option, // First register of the result columns of the query pub reg_result_cols_start: Option, - // The register holding the limit value, if any. - pub reg_limit: Option, + pub limit_ctx: Option, // The register holding the offset value, if any. pub reg_offset: Option, // The register holding the limit+offset value, if any. @@ -114,7 +139,7 @@ impl<'a> TranslateCtx<'a> { label_main_loop_end: None, reg_agg_start: None, reg_nonagg_emit_once_flag: None, - reg_limit: None, + limit_ctx: None, reg_offset: None, reg_limit_offset_sum: None, reg_result_cols_start: None, @@ -152,9 +177,105 @@ pub fn emit_program(program: &mut ProgramBuilder, plan: Plan, syms: &SymbolTable Plan::Select(plan) => emit_program_for_select(program, plan, syms), Plan::Delete(plan) => emit_program_for_delete(program, plan, syms), Plan::Update(plan) => emit_program_for_update(program, plan, syms), + Plan::CompoundSelect { .. } => emit_program_for_compound_select(program, plan, syms), } } +fn emit_program_for_compound_select( + program: &mut ProgramBuilder, + plan: Plan, + syms: &SymbolTable, +) -> Result<()> { + let Plan::CompoundSelect { + mut first, + mut rest, + limit, + .. + } = plan + else { + crate::bail_parse_error!("expected compound select plan"); + }; + + // Trivial exit on LIMIT 0 + if let Some(limit) = limit { + if limit == 0 { + program.epilogue(TransactionMode::Read); + program.result_columns = first.result_columns; + program.table_references = first.table_references; + return Ok(()); + } + } + + // Each subselect gets their own TranslateCtx, but they share the same limit_ctx + // because the LIMIT applies to the entire compound select, not just a single subselect. + let mut t_ctx_list = Vec::with_capacity(rest.len() + 1); + let reg_limit = if let Some(limit) = limit { + let reg = program.alloc_register(); + program.emit_insn(Insn::Integer { + value: limit as i64, + dest: reg, + }); + Some(reg) + } else { + None + }; + let limit_ctx = if let Some(reg_limit) = reg_limit { + Some(LimitCtx::new_shared(reg_limit)) + } else { + None + }; + let mut t_ctx_first = TranslateCtx::new( + program, + syms, + first.table_references.len(), + first.result_columns.len(), + ); + t_ctx_first.limit_ctx = limit_ctx; + t_ctx_list.push(t_ctx_first); + + for (select, _) in rest.iter() { + let mut t_ctx = TranslateCtx::new( + program, + syms, + select.table_references.len(), + select.result_columns.len(), + ); + t_ctx.limit_ctx = limit_ctx; + t_ctx_list.push(t_ctx); + } + + let mut first_t_ctx = t_ctx_list.remove(0); + emit_query(program, &mut first, &mut first_t_ctx)?; + + // TODO: add support for UNION, EXCEPT, INTERSECT + while !t_ctx_list.is_empty() { + let label_next_select = program.allocate_label(); + // If the LIMIT is reached in any subselect, jump to either: + // a) the IfNot of the next subselect, or + // b) the end of the program + if let Some(reg_limit) = reg_limit { + program.emit_insn(Insn::IfNot { + reg: reg_limit, + target_pc: label_next_select, + jump_if_null: true, + }); + } + let mut t_ctx = t_ctx_list.remove(0); + let (mut select, operator) = rest.remove(0); + if operator != ast::CompoundOperator::UnionAll { + crate::bail_parse_error!("unimplemented compound select operator: {:?}", operator); + } + emit_query(program, &mut select, &mut t_ctx)?; + program.preassign_label_to_next_insn(label_next_select); + } + + program.epilogue(TransactionMode::Read); + program.result_columns = first.result_columns; + program.table_references = first.table_references; + + Ok(()) +} + fn emit_program_for_select( program: &mut ProgramBuilder, mut plan: SelectPlan, @@ -204,16 +325,20 @@ pub fn emit_query<'a>( // Emit subqueries first so the results can be read in the main query loop. emit_subqueries(program, t_ctx, &mut plan.table_references)?; - if t_ctx.reg_limit.is_none() { - t_ctx.reg_limit = plan.limit.map(|_| program.alloc_register()); + if t_ctx.limit_ctx.is_none() { + t_ctx.limit_ctx = plan.limit.map(|_| LimitCtx::new(program)); } if t_ctx.reg_offset.is_none() { - t_ctx.reg_offset = plan.offset.map(|_| program.alloc_register()); + t_ctx.reg_offset = t_ctx + .reg_offset + .or_else(|| plan.offset.map(|_| program.alloc_register())); } if t_ctx.reg_limit_offset_sum.is_none() { - t_ctx.reg_limit_offset_sum = plan.offset.map(|_| program.alloc_register()); + t_ctx.reg_limit_offset_sum = t_ctx + .reg_limit_offset_sum + .or_else(|| plan.offset.map(|_| program.alloc_register())); } // No rows will be read from source table loops if there is a constant false condition eg. WHERE 0 @@ -522,12 +647,11 @@ fn emit_program_for_update( program.table_references = plan.table_references; return Ok(()); } - if t_ctx.reg_limit.is_none() && plan.limit.is_some() { - let reg = program.alloc_register(); - t_ctx.reg_limit = Some(reg); + if t_ctx.limit_ctx.is_none() && plan.limit.is_some() { + t_ctx.limit_ctx = Some(LimitCtx::new(program)); program.emit_insn(Insn::Integer { value: plan.limit.unwrap() as i64, - dest: reg, + dest: t_ctx.limit_ctx.unwrap().reg_limit, }); program.mark_last_insn_constant(); if t_ctx.reg_offset.is_none() && plan.offset.is_some_and(|n| n.ne(&0)) { @@ -541,7 +665,7 @@ fn emit_program_for_update( let combined_reg = program.alloc_register(); t_ctx.reg_limit_offset_sum = Some(combined_reg); program.emit_insn(Insn::OffsetLimit { - limit_reg: t_ctx.reg_limit.unwrap(), + limit_reg: t_ctx.limit_ctx.unwrap().reg_limit, offset_reg: reg, combined_reg, }); @@ -1019,9 +1143,9 @@ fn emit_update_insns( }); } - if let Some(limit_reg) = t_ctx.reg_limit { + if let Some(limit_ctx) = t_ctx.limit_ctx { program.emit_insn(Insn::DecrJumpZero { - reg: limit_reg, + reg: limit_ctx.reg_limit, target_pc: t_ctx.label_main_loop_end.unwrap(), }) } diff --git a/core/translate/group_by.rs b/core/translate/group_by.rs index 567a1c56c..75d111f38 100644 --- a/core/translate/group_by.rs +++ b/core/translate/group_by.rs @@ -892,7 +892,7 @@ pub fn group_by_emit_row_phase<'a>( t_ctx.reg_nonagg_emit_once_flag, t_ctx.reg_offset, t_ctx.reg_result_cols_start.unwrap(), - t_ctx.reg_limit, + t_ctx.limit_ctx, t_ctx.reg_limit_offset_sum, )?; } diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 085068c32..e9371b090 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -883,7 +883,7 @@ fn emit_loop_source<'a>( t_ctx.reg_nonagg_emit_once_flag, t_ctx.reg_offset, t_ctx.reg_result_cols_start.unwrap(), - t_ctx.reg_limit, + t_ctx.limit_ctx, t_ctx.reg_limit_offset_sum, )?; diff --git a/core/translate/optimizer/mod.rs b/core/translate/optimizer/mod.rs index 9faad7888..c4fe27956 100644 --- a/core/translate/optimizer/mod.rs +++ b/core/translate/optimizer/mod.rs @@ -37,6 +37,13 @@ pub fn optimize_plan(plan: &mut Plan, schema: &Schema) -> Result<()> { Plan::Select(plan) => optimize_select_plan(plan, schema), Plan::Delete(plan) => optimize_delete_plan(plan, schema), Plan::Update(plan) => optimize_update_plan(plan, schema), + Plan::CompoundSelect { first, rest, .. } => { + optimize_select_plan(first, schema)?; + for (plan, _) in rest { + optimize_select_plan(plan, schema)?; + } + Ok(()) + } } } diff --git a/core/translate/order_by.rs b/core/translate/order_by.rs index 6f25fb29a..7e1e71232 100644 --- a/core/translate/order_by.rs +++ b/core/translate/order_by.rs @@ -177,7 +177,7 @@ pub fn emit_order_by( program, plan, start_reg, - t_ctx.reg_limit, + t_ctx.limit_ctx, t_ctx.reg_offset, t_ctx.reg_limit_offset_sum, Some(sort_loop_end_label), diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 3b2554a48..15630b9b8 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -264,6 +264,13 @@ impl Ord for EvalAt { #[derive(Debug, Clone)] pub enum Plan { Select(SelectPlan), + CompoundSelect { + first: SelectPlan, + rest: Vec<(SelectPlan, ast::CompoundOperator)>, + limit: Option, + offset: Option, + order_by: Option>, + }, Delete(DeletePlan), Update(UpdatePlan), } @@ -909,6 +916,41 @@ impl Display for Plan { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match self { Self::Select(select_plan) => select_plan.fmt(f), + Self::CompoundSelect { + first, + rest, + limit, + offset, + order_by, + } => { + first.fmt(f)?; + for (plan, operator) in rest { + writeln!(f, "{}", operator)?; + plan.fmt(f)?; + } + if let Some(limit) = limit { + writeln!(f, "LIMIT: {}", limit)?; + } + if let Some(offset) = offset { + writeln!(f, "OFFSET: {}", offset)?; + } + if let Some(order_by) = order_by { + writeln!(f, "ORDER BY:")?; + for (expr, dir) in order_by { + writeln!( + f, + " - {} {}", + expr, + if *dir == SortOrder::Asc { + "ASC" + } else { + "DESC" + } + )?; + } + } + Ok(()) + } Self::Delete(delete_plan) => delete_plan.fmt(f), Self::Update(update_plan) => update_plan.fmt(f), } diff --git a/core/translate/planner.rs b/core/translate/planner.rs index 89dfb4608..525a612fa 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -280,7 +280,7 @@ fn parse_from_clause_table<'a>( let Plan::Select(mut subplan) = prepare_select_plan(schema, *subselect, syms, Some(scope))? else { - unreachable!(); + crate::bail_parse_error!("Only non-compound SELECT queries are currently supported in FROM clause subqueries"); }; subplan.query_type = SelectQueryType::Subquery { yield_reg: usize::MAX, // will be set later in bytecode emission diff --git a/core/translate/result_row.rs b/core/translate/result_row.rs index 6584d99af..d1466773c 100644 --- a/core/translate/result_row.rs +++ b/core/translate/result_row.rs @@ -4,7 +4,7 @@ use crate::{ }; use super::{ - emitter::Resolver, + emitter::{LimitCtx, Resolver}, expr::translate_expr, plan::{Distinctness, SelectPlan, SelectQueryType}, }; @@ -22,7 +22,7 @@ pub fn emit_select_result( reg_nonagg_emit_once_flag: Option, reg_offset: Option, reg_result_cols_start: usize, - reg_limit: Option, + limit_ctx: Option, reg_limit_offset_sum: Option, ) -> Result<()> { if let (Some(jump_to), Some(_)) = (offset_jump_to, label_on_limit_reached) { @@ -61,7 +61,7 @@ pub fn emit_select_result( program, plan, start_reg, - reg_limit, + limit_ctx, reg_offset, reg_limit_offset_sum, label_on_limit_reached, @@ -76,7 +76,7 @@ pub fn emit_result_row_and_limit( program: &mut ProgramBuilder, plan: &SelectPlan, result_columns_start_reg: usize, - reg_limit: Option, + limit_ctx: Option, reg_offset: Option, reg_limit_offset_sum: Option, label_on_limit_reached: Option, @@ -103,11 +103,14 @@ pub fn emit_result_row_and_limit( // is always 1 here. return Ok(()); } - program.emit_insn(Insn::Integer { - value: limit as i64, - dest: reg_limit.expect("reg_limit must be Some"), - }); - program.mark_last_insn_constant(); + let limit_ctx = limit_ctx.expect("limit_ctx must be Some if plan.limit is Some"); + if limit_ctx.initialize_counter { + program.emit_insn(Insn::Integer { + value: limit as i64, + dest: limit_ctx.reg_limit, + }); + program.mark_last_insn_constant(); + } if let Some(offset) = plan.offset { program.emit_insn(Insn::Integer { @@ -117,7 +120,7 @@ pub fn emit_result_row_and_limit( program.mark_last_insn_constant(); program.emit_insn(Insn::OffsetLimit { - limit_reg: reg_limit.expect("reg_limit must be Some"), + limit_reg: limit_ctx.reg_limit, combined_reg: reg_limit_offset_sum.expect("reg_limit_offset_sum must be Some"), offset_reg: reg_offset.expect("reg_offset must be Some"), }); @@ -125,7 +128,7 @@ pub fn emit_result_row_and_limit( } program.emit_insn(Insn::DecrJumpZero { - reg: reg_limit.expect("reg_limit must be Some"), + reg: limit_ctx.reg_limit, target_pc: label_on_limit_reached.unwrap(), }); } diff --git a/core/translate/select.rs b/core/translate/select.rs index fa2946283..300613cbd 100644 --- a/core/translate/select.rs +++ b/core/translate/select.rs @@ -14,7 +14,7 @@ use crate::vdbe::builder::{ProgramBuilderOpts, QueryMode}; use crate::vdbe::insn::Insn; use crate::SymbolTable; use crate::{schema::Schema, vdbe::builder::ProgramBuilder, Result}; -use limbo_sqlite3_parser::ast::{self, SortOrder}; +use limbo_sqlite3_parser::ast::{self, CompoundSelect, SortOrder}; use limbo_sqlite3_parser::ast::{ResultColumn, SelectInner}; pub fn translate_select( @@ -26,16 +26,34 @@ pub fn translate_select( ) -> Result { let mut select_plan = prepare_select_plan(schema, select, syms, None)?; optimize_plan(&mut select_plan, schema)?; - let Plan::Select(ref select) = select_plan else { - panic!("select_plan is not a SelectPlan"); + let opts = match &select_plan { + Plan::Select(select) => ProgramBuilderOpts { + query_mode, + num_cursors: count_plan_required_cursors(select), + approx_num_insns: estimate_num_instructions(select), + approx_num_labels: estimate_num_labels(select), + }, + Plan::CompoundSelect { first, rest, .. } => ProgramBuilderOpts { + query_mode, + num_cursors: count_plan_required_cursors(first) + + rest + .iter() + .map(|(plan, _)| count_plan_required_cursors(plan)) + .sum::(), + approx_num_insns: estimate_num_instructions(first) + + rest + .iter() + .map(|(plan, _)| estimate_num_instructions(plan)) + .sum::(), + approx_num_labels: estimate_num_labels(first) + + rest + .iter() + .map(|(plan, _)| estimate_num_labels(plan)) + .sum::(), + }, + other => panic!("plan is not a SelectPlan: {:?}", other), }; - let opts = ProgramBuilderOpts { - query_mode, - num_cursors: count_plan_required_cursors(select), - approx_num_insns: estimate_num_instructions(select), - approx_num_labels: estimate_num_labels(select), - }; program.extend(&opts); emit_program(&mut program, select_plan, syms)?; Ok(program) @@ -43,11 +61,91 @@ pub fn translate_select( pub fn prepare_select_plan<'a>( schema: &Schema, - select: ast::Select, + mut select: ast::Select, syms: &SymbolTable, outer_scope: Option<&'a Scope<'a>>, ) -> Result { - match *select.body.select { + let compounds = select.body.compounds.take(); + match compounds { + None => { + let limit = select.limit.take(); + Ok(Plan::Select(prepare_one_select_plan( + schema, + *select.body.select, + limit.as_deref(), + select.order_by.take(), + select.with.take(), + syms, + outer_scope, + )?)) + } + Some(compounds) => { + let mut first = prepare_one_select_plan( + schema, + *select.body.select, + None, + None, + None, + syms, + outer_scope, + )?; + let mut rest = Vec::with_capacity(compounds.len()); + for CompoundSelect { select, operator } in compounds { + // TODO: add support for UNION, EXCEPT and INTERSECT + if operator != ast::CompoundOperator::UnionAll { + crate::bail_parse_error!("only UNION ALL is supported for compound SELECTs"); + } + let plan = + prepare_one_select_plan(schema, *select, None, None, None, syms, outer_scope)?; + rest.push((plan, operator)); + } + // Ensure all subplans have same number of result columns + let first_num_result_columns = first.result_columns.len(); + for (plan, operator) in rest.iter() { + if plan.result_columns.len() != first_num_result_columns { + crate::bail_parse_error!("SELECTs to the left and right of {} do not have the same number of result columns", operator); + } + } + let (limit, offset) = select.limit.map_or(Ok((None, None)), |l| parse_limit(&l))?; + + first.limit = limit.clone(); + for (plan, _) in rest.iter_mut() { + plan.limit = limit.clone(); + } + + // FIXME: handle OFFSET for compound selects + if offset.map_or(false, |o| o > 0) { + crate::bail_parse_error!("OFFSET is not supported for compound SELECTs yet"); + } + // FIXME: handle ORDER BY for compound selects + if select.order_by.is_some() { + crate::bail_parse_error!("ORDER BY is not supported for compound SELECTs yet"); + } + // FIXME: handle WITH for compound selects + if select.with.is_some() { + crate::bail_parse_error!("WITH is not supported for compound SELECTs yet"); + } + Ok(Plan::CompoundSelect { + first, + rest, + limit, + offset, + order_by: None, + }) + } + } +} + +fn prepare_one_select_plan<'a>( + schema: &Schema, + select: ast::OneSelect, + limit: Option<&ast::Limit>, + order_by: Option>, + with: Option, + syms: &SymbolTable, + outer_scope: Option<&'a Scope<'a>>, +) -> Result { + match select { ast::OneSelect::Select(select_inner) => { let SelectInner { mut columns, @@ -64,8 +162,6 @@ pub fn prepare_select_plan<'a>( let mut where_predicates = vec![]; - let with = select.with; - // Parse the FROM clause into a vec of TableReferences. Fold all the join conditions expressions into the WHERE clause. let table_references = parse_from(schema, from, syms, with, &mut where_predicates, outer_scope)?; @@ -375,7 +471,7 @@ pub fn prepare_select_plan<'a>( plan.aggregates = aggregate_expressions; // Parse the ORDER BY clause - if let Some(order_by) = select.order_by { + if let Some(order_by) = order_by { let mut key = Vec::new(); for mut o in order_by { @@ -397,11 +493,10 @@ pub fn prepare_select_plan<'a>( } // Parse the LIMIT/OFFSET clause - (plan.limit, plan.offset) = - select.limit.map_or(Ok((None, None)), |l| parse_limit(&l))?; + (plan.limit, plan.offset) = limit.map_or(Ok((None, None)), |l| parse_limit(l))?; // Return the unoptimized query plan - Ok(Plan::Select(plan)) + Ok(plan) } ast::OneSelect::Values(values) => { let len = values[0].len(); @@ -430,7 +525,7 @@ pub fn prepare_select_plan<'a>( values, }; - Ok(Plan::Select(plan)) + Ok(plan) } } } diff --git a/core/translate/subquery.rs b/core/translate/subquery.rs index 2752c168e..0afc38ffd 100644 --- a/core/translate/subquery.rs +++ b/core/translate/subquery.rs @@ -5,7 +5,7 @@ use crate::{ }; use super::{ - emitter::{emit_query, Resolver, TranslateCtx}, + emitter::{emit_query, LimitCtx, Resolver, TranslateCtx}, main_loop::LoopLabels, plan::{SelectPlan, SelectQueryType, TableReference}, }; @@ -77,7 +77,7 @@ pub fn emit_subquery<'a>( reg_result_cols_start: None, result_column_indexes_in_orderby_sorter: (0..plan.result_columns.len()).collect(), result_columns_to_skip_in_orderby_sorter: None, - reg_limit: plan.limit.map(|_| program.alloc_register()), + limit_ctx: plan.limit.map(|_| LimitCtx::new(program)), reg_offset: plan.offset.map(|_| program.alloc_register()), reg_limit_offset_sum: plan.offset.map(|_| program.alloc_register()), resolver: Resolver::new(t_ctx.resolver.symbol_table), @@ -95,7 +95,7 @@ pub fn emit_subquery<'a>( if let Some(limit) = plan.limit { program.emit_insn(Insn::Integer { value: limit as i64, - dest: metadata.reg_limit.unwrap(), + dest: metadata.limit_ctx.unwrap().reg_limit, }); } let result_column_start_reg = emit_query(program, plan, &mut metadata)?; diff --git a/testing/select.test b/testing/select.test index 6f0c6997d..4b377e1bc 100755 --- a/testing/select.test +++ b/testing/select.test @@ -240,3 +240,48 @@ do_execsql_test select-invalid-numeric-text { do_execsql_test select-invalid-numeric-text { select -'E'; } {0} + +do_execsql_test_on_specific_db {:memory:} select-union-all-1 { + CREATE TABLE t1(x INTEGER); + CREATE TABLE t2(x INTEGER); + CREATE TABLE t3(x INTEGER); + + INSERT INTO t1 VALUES(1),(2),(3); + INSERT INTO t2 VALUES(4),(5),(6); + INSERT INTO t3 VALUES(7),(8),(9); + + SELECT x FROM t1 + UNION ALL + SELECT x FROM t2 + UNION ALL + SELECT x FROM t3; +} {1 +2 +3 +4 +5 +6 +7 +8 +9} + +do_execsql_test_on_specific_db {:memory:} select-union-all-with-filters { + CREATE TABLE t4(x INTEGER); + CREATE TABLE t5(x INTEGER); + CREATE TABLE t6(x INTEGER); + + INSERT INTO t4 VALUES(1),(2),(3),(4); + INSERT INTO t5 VALUES(5),(6),(7),(8); + INSERT INTO t6 VALUES(9),(10),(11),(12); + + SELECT x FROM t4 WHERE x > 2 + UNION ALL + SELECT x FROM t5 WHERE x < 7 + UNION ALL + SELECT x FROM t6 WHERE x = 10; +} {3 +4 +5 +6 +10} + diff --git a/tests/integration/fuzz/mod.rs b/tests/integration/fuzz/mod.rs index f7c3860d7..232348e39 100644 --- a/tests/integration/fuzz/mod.rs +++ b/tests/integration/fuzz/mod.rs @@ -417,6 +417,93 @@ mod tests { } } + #[test] + pub fn compound_select_fuzz() { + let _ = env_logger::try_init(); + let (mut rng, seed) = rng_from_time(); + log::info!("compound_select_fuzz seed: {}", seed); + + // Constants for fuzzing parameters + const MAX_TABLES: usize = 5; + const MIN_TABLES: usize = 1; + const MAX_ROWS_PER_TABLE: usize = 15; + const MIN_ROWS_PER_TABLE: usize = 5; + const NUM_FUZZ_ITERATIONS: usize = 1000; + // How many more SELECTs than tables can be in a UNION (e.g., if 2 tables, max 2+2=4 SELECTs) + const MAX_SELECTS_IN_UNION_EXTRA: usize = 2; + const MAX_LIMIT_VALUE: usize = 50; + + let db = TempDatabase::new_empty(); + let limbo_conn = db.connect_limbo(); + let sqlite_conn = rusqlite::Connection::open_in_memory().unwrap(); + + let mut table_names = Vec::new(); + let num_tables = rng.random_range(MIN_TABLES..=MAX_TABLES); + + for i in 0..num_tables { + let table_name = format!("t{}", i); + // Schema: c1 INTEGER, c2 INTEGER, c3 INTEGER for simplicity and UNION ALL compatibility + let create_table_sql = format!( + "CREATE TABLE {} (c1 INTEGER, c2 INTEGER, c3 INTEGER)", + table_name + ); + + limbo_exec_rows(&db, &limbo_conn, &create_table_sql); + sqlite_exec_rows(&sqlite_conn, &create_table_sql); + + let num_rows_to_insert = rng.random_range(MIN_ROWS_PER_TABLE..=MAX_ROWS_PER_TABLE); + for _ in 0..num_rows_to_insert { + let c1_val: i64 = rng.random_range(-1000..1000); + let c2_val: i64 = rng.random_range(-1000..1000); + let c3_val: i64 = rng.random_range(-1000..1000); + + let insert_sql = format!( + "INSERT INTO {} VALUES ({}, {}, {})", + table_name, c1_val, c2_val, c3_val + ); + limbo_exec_rows(&db, &limbo_conn, &insert_sql); + sqlite_exec_rows(&sqlite_conn, &insert_sql); + } + table_names.push(table_name); + } + + for iter_num in 0..NUM_FUZZ_ITERATIONS { + // Number of SELECT clauses to be UNION ALL'd + let num_selects_in_union = + rng.random_range(1..=(table_names.len() + MAX_SELECTS_IN_UNION_EXTRA)); + let mut select_statements = Vec::new(); + + for _ in 0..num_selects_in_union { + // Randomly pick a table + let table_to_select_from = &table_names[rng.random_range(0..table_names.len())]; + select_statements.push(format!("SELECT c1, c2, c3 FROM {}", table_to_select_from)); + } + + let mut query = select_statements.join(" UNION ALL "); + + if rng.random_bool(0.8) { + let limit_val = rng.random_range(0..=MAX_LIMIT_VALUE); // LIMIT 0 is valid + query = format!("{} LIMIT {}", query, limit_val); + } + + log::debug!( + "Iteration {}/{}: Query: {}", + iter_num + 1, + NUM_FUZZ_ITERATIONS, + query + ); + + let limbo_results = limbo_exec_rows(&db, &limbo_conn, &query); + let sqlite_results = sqlite_exec_rows(&sqlite_conn, &query); + + assert_eq!( + limbo_results, sqlite_results, + "query: {}, limbo: {:?}, sqlite: {:?}, seed: {}", + query, limbo_results, sqlite_results, seed + ); + } + } + #[test] pub fn arithmetic_expression_fuzz() { let _ = env_logger::try_init();