diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 0bc54bb9a..5b12e4375 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -95,7 +95,7 @@ pub struct TranslateCtx<'a> { /// Used to distinguish database operations #[allow(clippy::upper_case_acronyms, dead_code)] -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum OperationMode { SELECT, INSERT, diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 6a3b1fc8b..6c9072ab9 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1770,41 +1770,79 @@ pub fn translate_expr( is_rowid_alias, } => { let table_reference = referenced_tables.as_ref().unwrap().get(*table).unwrap(); + let index = table_reference.op.index(); + let use_covering_index = table_reference.utilizes_covering_index(); match table_reference.op { // If we are reading a column from a table, we find the cursor that corresponds to // the table and read the column from the cursor. - Operation::Scan { .. } | Operation::Search(_) => match &table_reference.table { - Table::BTree(_) => { - let cursor_id = program.resolve_cursor_id(&table_reference.identifier); - if *is_rowid_alias { - program.emit_insn(Insn::RowId { - cursor_id, - dest: target_register, - }); - } else { - program.emit_insn(Insn::Column { + // If we have a covering index, we don't have an open table cursor so we read from the index cursor. + Operation::Scan { .. } | Operation::Search(_) => { + match &table_reference.table { + Table::BTree(_) => { + let table_cursor_id = if use_covering_index { + None + } else { + Some(program.resolve_cursor_id(&table_reference.identifier)) + }; + let index_cursor_id = if let Some(index) = index { + Some(program.resolve_cursor_id(&index.name)) + } else { + None + }; + if *is_rowid_alias { + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn(Insn::IdxRowId { + cursor_id: index_cursor_id, + dest: target_register, + }); + } else if let Some(table_cursor_id) = table_cursor_id { + program.emit_insn(Insn::RowId { + cursor_id: table_cursor_id, + dest: target_register, + }); + } else { + unreachable!("Either index or table cursor must be opened"); + } + } else { + let read_cursor = if use_covering_index { + index_cursor_id + .expect("index cursor should be opened when use_covering_index=true") + } else { + table_cursor_id + .expect("table cursor should be opened when use_covering_index=false") + }; + let column = if use_covering_index { + let index = index.expect("index cursor should be opened when use_covering_index=true"); + index.column_table_pos_to_index_pos(*column).unwrap_or_else(|| { + panic!("covering index {} does not contain column number {} of table {}", index.name, column, table_reference.identifier) + }) + } else { + *column + }; + program.emit_insn(Insn::Column { + cursor_id: read_cursor, + column, + dest: target_register, + }); + } + let Some(column) = table_reference.table.get_column_at(*column) else { + crate::bail_parse_error!("column index out of bounds"); + }; + maybe_apply_affinity(column.ty, target_register, program); + Ok(target_register) + } + Table::Virtual(_) => { + let cursor_id = program.resolve_cursor_id(&table_reference.identifier); + program.emit_insn(Insn::VColumn { cursor_id, column: *column, dest: target_register, }); + Ok(target_register) } - let Some(column) = table_reference.table.get_column_at(*column) else { - crate::bail_parse_error!("column index out of bounds"); - }; - maybe_apply_affinity(column.ty, target_register, program); - Ok(target_register) + _ => unreachable!(), } - Table::Virtual(_) => { - let cursor_id = program.resolve_cursor_id(&table_reference.identifier); - program.emit_insn(Insn::VColumn { - cursor_id, - column: *column, - dest: target_register, - }); - Ok(target_register) - } - _ => unreachable!(), - }, + } // If we are reading a column from a subquery, we instead copy the column from the // subquery's result registers. Operation::Subquery { @@ -1822,11 +1860,23 @@ pub fn translate_expr( } ast::Expr::RowId { database: _, table } => { let table_reference = referenced_tables.as_ref().unwrap().get(*table).unwrap(); - let cursor_id = program.resolve_cursor_id(&table_reference.identifier); - program.emit_insn(Insn::RowId { - cursor_id, - dest: target_register, - }); + let index = table_reference.op.index(); + let use_covering_index = table_reference.utilizes_covering_index(); + if use_covering_index { + let index = + index.expect("index cursor should be opened when use_covering_index=true"); + let cursor_id = program.resolve_cursor_id(&index.name); + program.emit_insn(Insn::IdxRowId { + cursor_id, + dest: target_register, + }); + } else { + let cursor_id = program.resolve_cursor_id(&table_reference.identifier); + program.emit_insn(Insn::RowId { + cursor_id, + dest: target_register, + }); + } Ok(target_register) } ast::Expr::InList { .. } => todo!(), diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 7a2e1b9ef..74da26438 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -6,7 +6,7 @@ use crate::{ translate::result_row::emit_select_result, types::SeekOp, vdbe::{ - builder::{CursorType, ProgramBuilder}, + builder::ProgramBuilder, insn::{CmpInsFlags, Insn}, BranchOffset, }, @@ -81,77 +81,67 @@ pub fn init_loop( t_ctx.meta_left_joins[table_index] = Some(lj_metadata); } } + let (table_cursor_id, index_cursor_id) = table.open_cursors(program, mode)?; match &table.op { - Operation::Scan { index, .. } => { - let cursor_id = program.alloc_cursor_id( - Some(table.identifier.clone()), - match &table.table { - Table::BTree(_) => CursorType::BTreeTable(table.btree().unwrap().clone()), - Table::Virtual(_) => { - CursorType::VirtualTable(table.virtual_table().unwrap().clone()) - } - other => panic!("Invalid table reference type in Scan: {:?}", other), - }, - ); - let index_cursor_id = index.as_ref().map(|i| { - program.alloc_cursor_id(Some(i.name.clone()), CursorType::BTreeIndex(i.clone())) - }); - match (mode, &table.table) { - (OperationMode::SELECT, Table::BTree(btree)) => { - let root_page = btree.root_page; + Operation::Scan { index, .. } => match (mode, &table.table) { + (OperationMode::SELECT, Table::BTree(btree)) => { + let root_page = btree.root_page; + if let Some(cursor_id) = table_cursor_id { program.emit_insn(Insn::OpenRead { cursor_id, root_page, }); - if let Some(index_cursor_id) = index_cursor_id { - program.emit_insn(Insn::OpenRead { - cursor_id: index_cursor_id, - root_page: index.as_ref().unwrap().root_page, - }); - } } - (OperationMode::DELETE, Table::BTree(btree)) => { - let root_page = btree.root_page; - program.emit_insn(Insn::OpenWrite { - cursor_id, - root_page: root_page.into(), + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn(Insn::OpenRead { + cursor_id: index_cursor_id, + root_page: index.as_ref().unwrap().root_page, }); } - (OperationMode::UPDATE, Table::BTree(btree)) => { - let root_page = btree.root_page; - program.emit_insn(Insn::OpenWrite { - cursor_id, - root_page: root_page.into(), - }); - if let Some(index_cursor_id) = index_cursor_id { - program.emit_insn(Insn::OpenWrite { - cursor_id: index_cursor_id, - root_page: index.as_ref().unwrap().root_page.into(), - }); - } - } - (_, Table::Virtual(_)) => { - program.emit_insn(Insn::VOpen { cursor_id }); - } - _ => { - unimplemented!() - } } - } - Operation::Search(search) => { - let table_cursor_id = program.alloc_cursor_id( - Some(table.identifier.clone()), - CursorType::BTreeTable(table.btree().unwrap().clone()), - ); - - match mode { - OperationMode::SELECT => { - program.emit_insn(Insn::OpenRead { - cursor_id: table_cursor_id, - root_page: table.table.get_root_page(), + (OperationMode::DELETE, Table::BTree(btree)) => { + let root_page = btree.root_page; + program.emit_insn(Insn::OpenWrite { + cursor_id: table_cursor_id + .expect("table cursor is always opened in OperationMode::DELETE"), + root_page: root_page.into(), + }); + } + (OperationMode::UPDATE, Table::BTree(btree)) => { + let root_page = btree.root_page; + program.emit_insn(Insn::OpenWrite { + cursor_id: table_cursor_id + .expect("table cursor is always opened in OperationMode::UPDATE"), + root_page: root_page.into(), + }); + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn(Insn::OpenWrite { + cursor_id: index_cursor_id, + root_page: index.as_ref().unwrap().root_page.into(), }); } + } + (_, Table::Virtual(_)) => { + if let Some(cursor_id) = table_cursor_id { + program.emit_insn(Insn::VOpen { cursor_id }); + } + } + _ => { + unimplemented!() + } + }, + Operation::Search(search) => { + match mode { + OperationMode::SELECT => { + if let Some(table_cursor_id) = table_cursor_id { + program.emit_insn(Insn::OpenRead { + cursor_id: table_cursor_id, + root_page: table.table.get_root_page(), + }); + } + } OperationMode::DELETE | OperationMode::UPDATE => { + let table_cursor_id = table_cursor_id.expect("table cursor is always opened in OperationMode::DELETE or OperationMode::UPDATE"); program.emit_insn(Insn::OpenWrite { cursor_id: table_cursor_id, root_page: table.table.get_root_page().into(), @@ -166,21 +156,18 @@ pub fn init_loop( index: Some(index), .. } = search { - let index_cursor_id = program.alloc_cursor_id( - Some(index.name.clone()), - CursorType::BTreeIndex(index.clone()), - ); - match mode { OperationMode::SELECT => { program.emit_insn(Insn::OpenRead { - cursor_id: index_cursor_id, + cursor_id: index_cursor_id + .expect("index cursor is always opened in Seek with index"), root_page: index.root_page, }); } OperationMode::UPDATE | OperationMode::DELETE => { program.emit_insn(Insn::OpenWrite { - cursor_id: index_cursor_id, + cursor_id: index_cursor_id + .expect("index cursor is always opened in Seek with index"), root_page: index.root_page.into(), }); } @@ -229,6 +216,8 @@ pub fn open_loop( } } + let (table_cursor_id, index_cursor_id) = table.resolve_cursors(program)?; + match &table.op { Operation::Subquery { plan, .. } => { let (yield_reg, coroutine_implementation_start) = match &plan.query_type { @@ -274,10 +263,10 @@ pub fn open_loop( program.resolve_label(jump_target_when_true, program.offset()); } } - Operation::Scan { iter_dir, index } => { - let cursor_id = program.resolve_cursor_id(&table.identifier); - let index_cursor_id = index.as_ref().map(|i| program.resolve_cursor_id(&i.name)); - let iteration_cursor_id = index_cursor_id.unwrap_or(cursor_id); + Operation::Scan { iter_dir, .. } => { + let iteration_cursor_id = index_cursor_id.unwrap_or_else(|| { + table_cursor_id.expect("Either index or table cursor must be opened") + }); if !matches!(&table.table, Table::Virtual(_)) { if *iter_dir == IterationDirection::Backwards { program.emit_insn(Insn::Last { @@ -389,7 +378,8 @@ pub fn open_loop( // Emit VFilter with the computed arguments. program.emit_insn(Insn::VFilter { - cursor_id, + cursor_id: table_cursor_id + .expect("Virtual tables do not support covering indexes"), arg_count: count, args_reg: start_reg, idx_str: maybe_idx_str, @@ -399,11 +389,13 @@ pub fn open_loop( } program.resolve_label(loop_start, program.offset()); - if let Some(index_cursor_id) = index_cursor_id { - program.emit_insn(Insn::DeferredSeek { - index_cursor_id, - table_cursor_id: cursor_id, - }); + if let Some(table_cursor_id) = table_cursor_id { + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn(Insn::DeferredSeek { + index_cursor_id, + table_cursor_id, + }); + } } for (_, cond) in predicates.iter().enumerate().filter(|(i, cond)| { @@ -426,7 +418,6 @@ pub fn open_loop( } } Operation::Search(search) => { - let table_cursor_id = program.resolve_cursor_id(&table.identifier); // Open the loop for the index search. // Rowid equality point lookups are handled with a SeekRowid instruction which does not loop, since it is a single row lookup. if let Search::RowidEq { cmp_expr } = search { @@ -439,22 +430,17 @@ pub fn open_loop( &t_ctx.resolver, )?; program.emit_insn(Insn::SeekRowid { - cursor_id: table_cursor_id, + cursor_id: table_cursor_id + .expect("Search::RowidEq requires a table cursor"), src_reg, target_pc: next, }); } else { // Otherwise, it's an index/rowid scan, i.e. first a seek is performed and then a scan until the comparison expression is not satisfied anymore. - let index_cursor_id = if let Search::Seek { - index: Some(index), .. - } = search - { - Some(program.resolve_cursor_id(&index.name)) - } else { - None - }; let is_index = index_cursor_id.is_some(); - let seek_cursor_id = index_cursor_id.unwrap_or(table_cursor_id); + let seek_cursor_id = index_cursor_id.unwrap_or_else(|| { + table_cursor_id.expect("Either index or table cursor must be opened") + }); let Search::Seek { seek_def, .. } = search else { unreachable!("Rowid equality point lookup should have been handled above"); }; @@ -483,11 +469,13 @@ pub fn open_loop( )?; if let Some(index_cursor_id) = index_cursor_id { - // Don't do a btree table seek until it's actually necessary to read from the table. - program.emit_insn(Insn::DeferredSeek { - index_cursor_id, - table_cursor_id, - }); + if let Some(table_cursor_id) = table_cursor_id { + // Don't do a btree table seek until it's actually necessary to read from the table. + program.emit_insn(Insn::DeferredSeek { + index_cursor_id, + table_cursor_id, + }); + } } } @@ -785,6 +773,8 @@ pub fn close_loop( .get(table_index) .expect("source has no loop labels"); + let (table_cursor_id, index_cursor_id) = table.resolve_cursors(program)?; + match &table.op { Operation::Subquery { .. } => { program.resolve_label(loop_labels.next, program.offset()); @@ -795,14 +785,11 @@ pub fn close_loop( target_pc: loop_labels.loop_start, }); } - Operation::Scan { - index, iter_dir, .. - } => { + Operation::Scan { iter_dir, .. } => { program.resolve_label(loop_labels.next, program.offset()); - - let cursor_id = program.resolve_cursor_id(&table.identifier); - let index_cursor_id = index.as_ref().map(|i| program.resolve_cursor_id(&i.name)); - let iteration_cursor_id = index_cursor_id.unwrap_or(cursor_id); + let iteration_cursor_id = index_cursor_id.unwrap_or_else(|| { + table_cursor_id.expect("Either index or table cursor must be opened") + }); match &table.table { Table::BTree(_) => { if *iter_dir == IterationDirection::Backwards { @@ -819,7 +806,8 @@ pub fn close_loop( } Table::Virtual(_) => { program.emit_insn(Insn::VNext { - cursor_id, + cursor_id: table_cursor_id + .expect("Virtual tables do not support covering indexes"), pc_if_next: loop_labels.loop_start, }); } @@ -828,33 +816,24 @@ pub fn close_loop( } Operation::Search(search) => { program.resolve_label(loop_labels.next, program.offset()); + let iteration_cursor_id = index_cursor_id.unwrap_or_else(|| { + table_cursor_id.expect("Either index or table cursor must be opened") + }); // Rowid equality point lookups are handled with a SeekRowid instruction which does not loop, so there is no need to emit a Next instruction. if !matches!(search, Search::RowidEq { .. }) { - let (cursor_id, iter_dir) = match search { - Search::Seek { - index: Some(index), - seek_def, - .. - } => (program.resolve_cursor_id(&index.name), seek_def.iter_dir), - Search::Seek { - index: None, - seek_def, - .. - } => ( - program.resolve_cursor_id(&table.identifier), - seek_def.iter_dir, - ), + let iter_dir = match search { + Search::Seek { seek_def, .. } => seek_def.iter_dir, Search::RowidEq { .. } => unreachable!(), }; if iter_dir == IterationDirection::Backwards { program.emit_insn(Insn::Prev { - cursor_id, + cursor_id: iteration_cursor_id, pc_if_prev: loop_labels.loop_start, }); } else { program.emit_insn(Insn::Next { - cursor_id, + cursor_id: iteration_cursor_id, pc_if_next: loop_labels.loop_start, }); } diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 80875da91..fe764ee50 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -167,32 +167,34 @@ fn eliminate_orderby_like_groupby(plan: &mut SelectPlan) -> Result<()> { Ok(()) } +/// Eliminate unnecessary ORDER BY clauses. +/// Returns true if the ORDER BY clause was eliminated. fn eliminate_unnecessary_orderby( table_references: &mut [TableReference], available_indexes: &HashMap>>, order_by: &mut Option>, group_by: &Option, -) -> Result<()> { +) -> Result { let Some(order) = order_by else { - return Ok(()); + return Ok(false); }; let Some(first_table_reference) = table_references.first_mut() else { - return Ok(()); + return Ok(false); }; let Some(btree_table) = first_table_reference.btree() else { - return Ok(()); + return Ok(false); }; // If GROUP BY clause is present, we can't rely on already ordered columns because GROUP BY reorders the data // This early return prevents the elimination of ORDER BY when GROUP BY exists, as sorting must be applied after grouping // And if ORDER BY clause duplicates GROUP BY we handle it later in fn eliminate_orderby_like_groupby if group_by.is_some() { - return Ok(()); + return Ok(false); } let Operation::Scan { index, iter_dir, .. } = &mut first_table_reference.op else { - return Ok(()); + return Ok(false); }; assert!( @@ -207,7 +209,7 @@ fn eliminate_unnecessary_orderby( Direction::Descending => IterationDirection::Backwards, }; *order_by = None; - return Ok(()); + return Ok(true); } // Find the best matching index for the ORDER BY columns @@ -235,7 +237,7 @@ fn eliminate_unnecessary_orderby( } let Some(matching_index) = best_index.0 else { - return Ok(()); + return Ok(false); }; let match_count = best_index.1; @@ -280,7 +282,7 @@ fn eliminate_unnecessary_orderby( } } - Ok(()) + Ok(order_by.is_none()) } /** @@ -300,7 +302,8 @@ fn use_indexes( group_by: &Option, ) -> Result<()> { // Try to use indexes for eliminating ORDER BY clauses - eliminate_unnecessary_orderby(table_references, available_indexes, order_by, group_by)?; + let did_eliminate_orderby = + eliminate_unnecessary_orderby(table_references, available_indexes, order_by, group_by)?; // Try to use indexes for WHERE conditions for (table_index, table_reference) in table_references.iter_mut().enumerate() { @@ -346,6 +349,12 @@ fn use_indexes( i += 1; } } + if did_eliminate_orderby && table_index == 0 { + // If we already made the decision to remove ORDER BY based on the Rowid (e.g. ORDER BY id), then skip this. + // It would be possible to analyze the index and see if the covering index would retain the ordering guarantee, + // but we just don't do that yet. + continue; + } if let Some(indexes) = available_indexes.get(table_name) { if let Some(search) = try_extract_index_search_from_where_clause( where_clause, @@ -359,6 +368,22 @@ fn use_indexes( } } } + + // Finally, if there's no other reason to use an index, if an index covers the columns used in the query, let's use it. + if let Some(indexes) = available_indexes.get(table_reference.table.get_name()) { + for index_candidate in indexes.iter() { + let is_covering = table_reference.index_is_covering(index_candidate); + if let Operation::Scan { index, .. } = &mut table_reference.op { + if index.is_some() { + continue; + } + if is_covering { + *index = Some(index_candidate.clone()); + break; + } + } + } + } } Ok(()) diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 25a4fd7ef..07a8de392 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -11,8 +11,11 @@ use std::{ use crate::{ function::AggFunc, schema::{BTreeTable, Column, Index, Table}, - vdbe::BranchOffset, - VirtualTable, + vdbe::{ + builder::{CursorType, ProgramBuilder}, + BranchOffset, CursorID, + }, + Result, VirtualTable, }; use crate::{ schema::{PseudoTable, Type}, @@ -20,6 +23,8 @@ use crate::{ util::can_pushdown_predicate, }; +use super::emitter::OperationMode; + #[derive(Debug, Clone)] pub struct ResultSetColumn { pub expr: ast::Expr, @@ -491,6 +496,100 @@ impl TableReference { pub fn mark_column_used(&mut self, index: usize) { self.col_used_mask.set(index); } + + /// Open the necessary cursors for this table reference. + /// Generally a table cursor is always opened unless a SELECT query can use a covering index. + /// An index cursor is opened if an index is used in any way for reading data from the table. + pub fn open_cursors( + &self, + program: &mut ProgramBuilder, + mode: OperationMode, + ) -> Result<(Option, Option)> { + let index = self.op.index(); + match &self.table { + Table::BTree(btree) => { + let use_covering_index = self.utilizes_covering_index(); + let table_cursor_id = if use_covering_index && mode == OperationMode::SELECT { + None + } else { + Some(program.alloc_cursor_id( + Some(self.identifier.clone()), + CursorType::BTreeTable(btree.clone()), + )) + }; + let index_cursor_id = if let Some(index) = index { + Some(program.alloc_cursor_id( + Some(index.name.clone()), + CursorType::BTreeIndex(index.clone()), + )) + } else { + None + }; + Ok((table_cursor_id, index_cursor_id)) + } + Table::Virtual(virtual_table) => { + let table_cursor_id = Some(program.alloc_cursor_id( + Some(self.identifier.clone()), + CursorType::VirtualTable(virtual_table.clone()), + )); + let index_cursor_id = None; + Ok((table_cursor_id, index_cursor_id)) + } + Table::Pseudo(_) => Ok((None, None)), + } + } + + /// Resolve the already opened cursors for this table reference. + pub fn resolve_cursors( + &self, + program: &mut ProgramBuilder, + ) -> Result<(Option, Option)> { + let index = self.op.index(); + let table_cursor_id = program.resolve_cursor_id_safe(&self.identifier); + let index_cursor_id = index.map(|index| program.resolve_cursor_id(&index.name)); + Ok((table_cursor_id, index_cursor_id)) + } + + /// Returns true if a given index is a covering index for this [TableReference]. + pub fn index_is_covering(&self, index: &Index) -> bool { + let Table::BTree(btree) = &self.table else { + return false; + }; + if self.col_used_mask.is_empty() { + return false; + } + let mut index_cols_mask = ColumnUsedMask::new(); + for col in index.columns.iter() { + index_cols_mask.set(col.pos_in_table); + } + + // If a table has a rowid (i.e. is not a WITHOUT ROWID table), the index is guaranteed to contain the rowid as well. + if btree.has_rowid { + if let Some(pos_of_rowid_alias_col) = btree.get_rowid_alias_column().map(|(pos, _)| pos) + { + let mut empty_mask = ColumnUsedMask::new(); + empty_mask.set(pos_of_rowid_alias_col); + if self.col_used_mask == empty_mask { + // However if the index would be ONLY used for the rowid, then let's not bother using it to cover the query. + // Example: if the query is SELECT id FROM t, and id is a rowid alias, then let's rather just scan the table + // instead of an index. + return false; + } + index_cols_mask.set(pos_of_rowid_alias_col); + } + } + + index_cols_mask.contains_all_set_bits_of(&self.col_used_mask) + } + + /// Returns true if the index selected for use with this [TableReference] is a covering index, + /// meaning that it contains all the columns that are referenced in the query. + pub fn utilizes_covering_index(&self) -> bool { + let Some(index) = self.op.index() else { + return false; + }; + self.index_is_covering(index.as_ref()) + } } /// A definition of a rowid/index search. diff --git a/tests/integration/fuzz/mod.rs b/tests/integration/fuzz/mod.rs index f76a005ba..929b33d8d 100644 --- a/tests/integration/fuzz/mod.rs +++ b/tests/integration/fuzz/mod.rs @@ -4,7 +4,7 @@ pub mod grammar_generator; mod tests { use std::{collections::HashSet, rc::Rc}; - use rand::{Rng, SeedableRng}; + use rand::{seq::IndexedRandom, Rng, SeedableRng}; use rand_chacha::ChaCha8Rng; use rusqlite::params; @@ -213,34 +213,47 @@ mod tests { }; // Create all different 3-column primary key permutations let dbs = [ - TempDatabase::new_with_rusqlite("CREATE TABLE t(x, y, z, PRIMARY KEY (x, y, z))"), - TempDatabase::new_with_rusqlite("CREATE TABLE t(x, y, z, PRIMARY KEY (x desc, y, z))"), - TempDatabase::new_with_rusqlite("CREATE TABLE t(x, y, z, PRIMARY KEY (x, y desc, z))"), - TempDatabase::new_with_rusqlite("CREATE TABLE t(x, y, z, PRIMARY KEY (x, y, z desc))"), TempDatabase::new_with_rusqlite( - "CREATE TABLE t(x, y, z, PRIMARY KEY (x desc, y desc, z))", + "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x, y, z))", ), TempDatabase::new_with_rusqlite( - "CREATE TABLE t(x, y, z, PRIMARY KEY (x, y desc, z desc))", + "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x desc, y, z))", ), TempDatabase::new_with_rusqlite( - "CREATE TABLE t(x, y, z, PRIMARY KEY (x desc, y, z desc))", + "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x, y desc, z))", ), TempDatabase::new_with_rusqlite( - "CREATE TABLE t(x, y, z, PRIMARY KEY (x desc, y desc, z desc))", + "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x, y, z desc))", + ), + TempDatabase::new_with_rusqlite( + "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x desc, y desc, z))", + ), + TempDatabase::new_with_rusqlite( + "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x, y desc, z desc))", + ), + TempDatabase::new_with_rusqlite( + "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x desc, y, z desc))", + ), + TempDatabase::new_with_rusqlite( + "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x desc, y desc, z desc))", ), ]; let mut pk_tuples = HashSet::new(); while pk_tuples.len() < 100000 { - pk_tuples.insert((rng.random_range(0..3000), rng.random_range(0..3000))); + pk_tuples.insert(( + rng.random_range(0..3000), + rng.random_range(0..3000), + rng.random_range(0..3000), + )); } let mut tuples = Vec::new(); for pk_tuple in pk_tuples { tuples.push(format!( - "({}, {}, {})", + "({}, {}, {}, {})", pk_tuple.0, pk_tuple.1, - rng.random_range(0..2000) + pk_tuple.2, + rng.random_range(0..3000) )); } let insert = format!("INSERT INTO t VALUES {}", tuples.join(", ")); @@ -298,6 +311,21 @@ mod tests { ITERATIONS ); } + // let's choose random columns from the table + let col_choices = ["x", "y", "z", "nonindexed_col"]; + let col_choices_weights = [10.0, 10.0, 10.0, 3.0]; + let num_cols_in_select = rng.random_range(1..=4); + let select_cols = col_choices + .choose_multiple_weighted(&mut rng, num_cols_in_select, |s| { + let idx = col_choices.iter().position(|c| c == s).unwrap(); + col_choices_weights[idx] + }) + .unwrap() + .collect::>() + .iter() + .map(|x| x.to_string()) + .collect::>(); + let (comp1, comp2, comp3) = all_comps[rng.random_range(0..all_comps.len())]; // Similarly as for the constraints, generate order by permutations so that the only columns involved in the index seek are potentially part of the ORDER BY. let (order_by1, order_by2, order_by3) = { @@ -318,7 +346,7 @@ mod tests { } }; - // Generate random values for the WHERE clause constraints + // Generate random values for the WHERE clause constraints. Only involve primary key columns. let (col_val_first, col_val_second, col_val_third) = { if comp1.is_some() && comp2.is_some() && comp3.is_some() { ( @@ -372,8 +400,11 @@ mod tests { // Generate final query string let query = format!( - "SELECT * FROM t {} {} LIMIT {}", - where_clause, order_by, limit + "SELECT {} FROM t {} {} LIMIT {}", + select_cols.join(", "), + where_clause, + order_by, + limit ); log::debug!("query: {}", query); @@ -398,19 +429,53 @@ mod tests { } }); - if order_by_only_equalities { - let query_no_limit = - format!("SELECT * FROM t {} {} {}", where_clause, order_by, ""); - let limbo_no_limit = - limbo_exec_rows(&dbs[i], &limbo_conns[i], &query_no_limit); - let sqlite_no_limit = sqlite_exec_rows(&sqlite_conn, &query_no_limit); - let limbo_rev = limbo_no_limit.iter().cloned().rev().collect::>(); - if limbo_rev == sqlite_no_limit { + let query_no_limit = + format!("SELECT * FROM t {} {} {}", where_clause, order_by, ""); + let limbo_no_limit = limbo_exec_rows(&dbs[i], &limbo_conns[i], &query_no_limit); + let sqlite_no_limit = sqlite_exec_rows(&sqlite_conn, &query_no_limit); + let limbo_rev = limbo_no_limit.iter().cloned().rev().collect::>(); + if limbo_rev == sqlite_no_limit && order_by_only_equalities { + continue; + } + + // finally, if the order by columns specified contain duplicates, sqlite might've returned the rows in an arbitrary different order. + // e.g. SELECT x,y,z FROM t ORDER BY x,y -- if there are duplicates on (x,y), the ordering returned might be different for limbo and sqlite. + // let's check this case and forgive ourselves if the ordering is different for this reason (but no other reason!) + let order_by_cols = select_cols + .iter() + .enumerate() + .filter(|(i, _)| { + order_by_components + .iter() + .any(|o| o.starts_with(col_choices[*i])) + }) + .map(|(i, _)| i) + .collect::>(); + let duplicate_on_order_by_exists = { + let mut exists = false; + 'outer: for (i, row) in limbo_no_limit.iter().enumerate() { + for (j, other_row) in limbo_no_limit.iter().enumerate() { + if i != j + && order_by_cols.iter().all(|&col| row[col] == other_row[col]) + { + exists = true; + break 'outer; + } + } + } + exists + }; + if duplicate_on_order_by_exists { + let len_equal = limbo_no_limit.len() == sqlite_no_limit.len(); + let all_contained = + len_equal && limbo_no_limit.iter().all(|x| sqlite_no_limit.contains(x)); + if all_contained { continue; } } + panic!( - "limbo: {:?}, sqlite: {:?}, seed: {}, query: {}", + "DIFFERENT RESULTS! limbo: {:?}, sqlite: {:?}, seed: {}, query: {}", limbo, sqlite, seed, query ); }