diff --git a/core/pseudo.rs b/core/pseudo.rs index 3cb8e421c..b05fa834f 100644 --- a/core/pseudo.rs +++ b/core/pseudo.rs @@ -46,6 +46,10 @@ impl Cursor for PseudoCursor { Ok(x) } + fn seek_rowid(&mut self, _: u64) -> Result> { + unimplemented!(); + } + fn record(&self) -> Result>> { Ok(self.current.borrow()) } diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 95ae7b7b3..b5118d706 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -137,12 +137,77 @@ impl BTreeCursor { } } + fn btree_seek_rowid( + &mut self, + rowid: u64, + ) -> Result, Option)>> { + self.move_to(rowid)?; + + let mem_page = { + let mem_page = self.page.borrow(); + let mem_page = mem_page.as_ref().unwrap(); + mem_page.clone() + }; + + let page_idx = mem_page.page_idx; + let page = self.pager.read_page(page_idx)?; + let page = RefCell::borrow(&page); + if page.is_locked() { + return Ok(CursorResult::IO); + } + let page = page.contents.read().unwrap(); + let page = page.as_ref().unwrap(); + + for cell_idx in 0..page.cell_count() { + match &page.cell_get(cell_idx)? { + BTreeCell::TableLeafCell(TableLeafCell { + _rowid: cell_rowid, + _payload: p, + first_overflow_page: _, + }) => { + if *cell_rowid == rowid { + let record = crate::storage::sqlite3_ondisk::read_record(p)?; + return Ok(CursorResult::Ok((Some(*cell_rowid), Some(record)))); + } + } + cell_type => { + unreachable!("unexpected cell type: {:?}", cell_type); + } + } + } + + Ok(CursorResult::Ok((None, None))) + } + fn move_to_root(&mut self) { self.page .replace(Some(Rc::new(MemPage::new(None, self.root_page, 0)))); } pub fn move_to(&mut self, key: u64) -> Result> { + // For a table with N rows, we can find any row by row id in O(log(N)) time by starting at the root page and following the B-tree pointers. + // B-trees consist of interior pages and leaf pages. Interior pages contain pointers to other pages, while leaf pages contain the actual row data. + // + // Conceptually, each Interior Cell in a interior page has a rowid and a left child node, and the page itself has a right-most child node. + // Example: consider an interior page that contains cells C1(rowid=10), C2(rowid=20), C3(rowid=30). + // - All rows with rowids <= 10 are in the left child node of C1. + // - All rows with rowids > 10 and <= 20 are in the left child node of C2. + // - All rows with rowids > 20 and <= 30 are in the left child node of C3. + // - All rows with rowids > 30 are in the right-most child node of the page. + // + // There will generally be multiple levels of interior pages before we reach a leaf page, + // so we need to follow the interior page pointers until we reach the leaf page that contains the row we are looking for (if it exists). + // + // Here's a high-level overview of the algorithm: + // 1. Since we start at the root page, its cells are all interior cells. + // 2. We scan the interior cells until we find a cell whose rowid is greater than or equal to the rowid we are looking for. + // 3. Follow the left child pointer of the cell we found in step 2. + // a. In case none of the cells in the page have a rowid greater than or equal to the rowid we are looking for, + // we follow the right-most child pointer of the page instead (since all rows with rowids greater than the rowid we are looking for are in the right-most child node). + // 4. We are now at a new page. If it's another interior page, we repeat the process from step 2. If it's a leaf page, we continue to step 5. + // 5. We scan the leaf cells in the leaf page until we find the cell whose rowid is equal to the rowid we are looking for. + // This cell contains the actual data we are looking for. + // 6. If we find the cell, we return the record. Otherwise, we return an empty result. self.move_to_root(); loop { @@ -800,6 +865,17 @@ impl Cursor for BTreeCursor { Ok(*self.rowid.borrow()) } + fn seek_rowid(&mut self, rowid: u64) -> Result> { + match self.btree_seek_rowid(rowid)? { + CursorResult::Ok((rowid, record)) => { + self.rowid.replace(rowid); + self.record.replace(record); + Ok(CursorResult::Ok(rowid.is_some())) + } + CursorResult::IO => Ok(CursorResult::IO), + } + } + fn record(&self) -> Result>> { Ok(self.record.borrow()) } diff --git a/core/translate/select.rs b/core/translate/select.rs index f218eee8f..e8117d022 100644 --- a/core/translate/select.rs +++ b/core/translate/select.rs @@ -4,6 +4,7 @@ use crate::storage::sqlite3_ondisk::DatabaseHeader; use crate::translate::expr::{analyze_columns, maybe_apply_affinity, translate_expr}; use crate::translate::where_clause::{ process_where, translate_processed_where, translate_tableless_where, ProcessedWhereClause, + SeekRowid, WhereExpr, }; use crate::translate::{normalize_ident, Insn}; use crate::types::{OwnedRecord, OwnedValue}; @@ -95,6 +96,12 @@ pub struct LeftJoinBookkeeping { pub on_match_jump_to_label: BranchOffset, } +#[derive(Debug)] +pub enum Plan { + Scan, + SeekRowid, +} + /// Represents a single loop in an ordered list of opened read table loops. /// /// The list is used to generate inner loops like this: @@ -110,6 +117,8 @@ pub struct LeftJoinBookkeeping { pub struct LoopInfo { // The table or table alias that we are looping over pub identifier: String, + // The plan for this loop + pub plan: Plan, // Metadata about a left join, if any pub left_join_maybe: Option, // The label for the instruction that reads the next row for this table @@ -532,7 +541,7 @@ fn translate_tables_begin( .src_tables .get(*idx) .expect("loop order out of bounds"); - let loop_info = translate_table_open_cursor(program, join); + let loop_info = translate_table_open_cursor(program, join, &processed_where); loops.push(loop_info); } @@ -556,6 +565,7 @@ fn translate_tables_begin( translate_table_open_loop( program, select, + &loops, loop_info, &processed_where, current_loop_early_terminate_label, @@ -570,14 +580,17 @@ fn translate_tables_end(program: &mut ProgramBuilder, loops: &[LoopInfo]) { for table_loop in loops.iter().rev() { let cursor_id = table_loop.open_cursor; program.resolve_label(table_loop.next_row_label, program.offset()); - program.emit_insn(Insn::NextAsync { cursor_id }); - program.emit_insn_with_label_dependency( - Insn::NextAwait { - cursor_id, - pc_if_next: table_loop.rewind_label, - }, - table_loop.rewind_label, - ); + if let Plan::Scan = table_loop.plan { + // If we're scanning a table, we need to emit a Next instruction to fetch the next row. + program.emit_insn(Insn::NextAsync { cursor_id }); + program.emit_insn_with_label_dependency( + Insn::NextAwait { + cursor_id, + pc_if_next: table_loop.rewind_label, + }, + table_loop.rewind_label, + ); + } if let Some(left_join) = &table_loop.left_join_maybe { left_join_match_flag_check(program, left_join, cursor_id); @@ -585,7 +598,11 @@ fn translate_tables_end(program: &mut ProgramBuilder, loops: &[LoopInfo]) { } } -fn translate_table_open_cursor(program: &mut ProgramBuilder, table: &SrcTable) -> LoopInfo { +fn translate_table_open_cursor( + program: &mut ProgramBuilder, + table: &SrcTable, + w: &ProcessedWhereClause, +) -> LoopInfo { let cursor_id = program.alloc_cursor_id(Some(table.identifier.clone()), Some(table.table.clone())); let root_page = match &table.table { @@ -597,8 +614,20 @@ fn translate_table_open_cursor(program: &mut ProgramBuilder, table: &SrcTable) - root_page, }); program.emit_insn(Insn::OpenReadAwait); + + let has_where_term_where_rowid_index_usable = w.terms.iter().any(|term| { + matches!( + term.expr, + WhereExpr::SeekRowid(SeekRowid { table: t, .. }) if *t == table.identifier + ) + }); LoopInfo { identifier: table.identifier.clone(), + plan: if has_where_term_where_rowid_index_usable { + Plan::SeekRowid + } else { + Plan::Scan + }, left_join_maybe: if table.is_outer_join() { Some(LeftJoinBookkeeping { match_flag_register: program.alloc_register(), @@ -680,27 +709,50 @@ fn left_join_match_flag_check( fn translate_table_open_loop( program: &mut ProgramBuilder, select: &Select, + loops: &[LoopInfo], loop_info: &LoopInfo, w: &ProcessedWhereClause, early_terminate_label: BranchOffset, ) -> Result<()> { if let Some(left_join) = loop_info.left_join_maybe.as_ref() { + // In a left join loop, initialize the left join match flag to false + // If the condition checks pass, it will eventually be set to true + // If not, NULLs will be emitted for the right table for this row in the outer table. left_join_match_flag_initialize(program, left_join); } - program.emit_insn(Insn::RewindAsync { - cursor_id: loop_info.open_cursor, - }); - program.defer_label_resolution(loop_info.rewind_label, program.offset() as usize); - program.emit_insn_with_label_dependency( - Insn::RewindAwait { + if let Plan::Scan = loop_info.plan { + // If we're scanning, we need to rewind the cursor to the beginning of the table + // before we start processing the rows in the loop. + // Consider a nested loop query like: + // SELECT * FROM a JOIN b ON a.someprop = b.someprop; + // We need to rewind the cursor to the beginning of b for each row in a, + // so that we can iterate over all rows in b for each row in a. + // + // If we're not scanning, we're seeking by rowid, so we don't need to rewind the cursor, + // since we're only going to be reading one row. + program.emit_insn(Insn::RewindAsync { cursor_id: loop_info.open_cursor, - pc_if_empty: loop_info.rewind_on_empty_label, - }, - loop_info.rewind_on_empty_label, - ); + }); + program.defer_label_resolution(loop_info.rewind_label, program.offset() as usize); + program.emit_insn_with_label_dependency( + Insn::RewindAwait { + cursor_id: loop_info.open_cursor, + pc_if_empty: loop_info.rewind_on_empty_label, + }, + loop_info.rewind_on_empty_label, + ); + } - translate_processed_where(program, select, loop_info, w, early_terminate_label, None)?; + translate_processed_where( + program, + select, + loops, + loop_info, + w, + early_terminate_label, + None, + )?; if let Some(left_join) = loop_info.left_join_maybe.as_ref() { left_join_match_flag_set_true(program, left_join); diff --git a/core/translate/where_clause.rs b/core/translate/where_clause.rs index fca060a65..d52b44073 100644 --- a/core/translate/where_clause.rs +++ b/core/translate/where_clause.rs @@ -1,5 +1,4 @@ use crate::{ - error::LimboError, function::ScalarFunc, translate::{expr::translate_expr, select::Select}, util::normalize_ident, @@ -11,10 +10,54 @@ use super::select::LoopInfo; use sqlite3_parser::ast::{self}; +#[derive(Debug)] +pub struct SeekRowid<'a> { + pub table: &'a str, + pub rowid_expr: &'a ast::Expr, +} + +#[derive(Debug)] +pub enum WhereExpr<'a> { + Expr(&'a ast::Expr), + SeekRowid(SeekRowid<'a>), +} + #[derive(Debug)] pub struct WhereTerm<'a> { - pub expr: &'a ast::Expr, - pub evaluate_at_tbl: &'a String, + // The expression that should be evaluated. + pub expr: WhereExpr<'a>, + // If this term is part of an outer join, this is the index of the outer join table in select.src_tables + pub outer_join_table_index: Option, + // A bitmask of which table indexes (in select.src_tables) the expression references. + pub table_references_bitmask: usize, +} + +impl<'a> WhereTerm<'a> { + pub fn evaluate_at_loop(&self, select: &'a Select) -> usize { + if let Some(outer_join_table) = self.outer_join_table_index { + // E.g. + // SELECT u.age, p.name FROM users u LEFT JOIN products p ON u.id = 5; + // We can't skip rows from the 'users' table since u.id = 5 is a LEFT JOIN condition; instead we need to skip/null out rows from the 'products' table. + outer_join_table + } else { + // E.g. + // SELECT u.age, p.name FROM users u WHERE u.id = 5; + // We can skip rows from the 'users' table if u.id = 5 is false. + self.innermost_table(select) + } + } + + // Find the innermost table that the expression references. + // Innermost means 'most nested in the nested loop'. + pub fn innermost_table(&self, select: &'a Select) -> usize { + let mut table = 0; + for i in 0..select.src_tables.len() { + if self.table_references_bitmask & (1 << i) != 0 { + table = i; + } + } + table + } } #[derive(Debug)] @@ -33,7 +76,7 @@ pub fn split_constraint_to_terms<'a>( select: &'a Select, mut processed_where_clause: ProcessedWhereClause<'a>, where_clause_or_join_constraint: &'a ast::Expr, - outer_join_table_name: Option<&'a String>, + outer_join_table: Option, ) -> Result> { let mut queue = vec![where_clause_or_join_constraint]; @@ -45,68 +88,24 @@ pub fn split_constraint_to_terms<'a>( } expr => { if expr.is_always_true()? { + // Terms that are always true can be skipped, as they don't constrain the result set in any way. continue; } let term = WhereTerm { - expr, - evaluate_at_tbl: match outer_join_table_name { - Some(table) => { - // If we had e.g. SELECT * FROM t1 LEFT JOIN t2 WHERE t1.a > 10, - // we could evaluate the t1.a > 10 condition at the cursor for t1, i.e. the outer table, - // skipping t1 rows that don't match the condition. - // - // However, if we have SELECT * FROM t1 LEFT JOIN t2 ON t1.a > 10, - // we need to evaluate the t1.a > 10 condition at the cursor for t2, i.e. the inner table, - // because we need to skip rows from t2 that don't match the condition. - // - // In inner joins, both of the above are equivalent, but in left joins they are not. - let tbl = select - .src_tables - .iter() - .find(|t| t.identifier == *table) - .ok_or(LimboError::ParseError(format!( - "Could not find cursor for table {}", - table - )))?; + expr: { + let seekrowid_candidate = select + .src_tables + .iter() + .enumerate() + .find_map(|(i, _)| { + expr.check_seekrowid_candidate(i, select).unwrap_or(None) + }) + .map(WhereExpr::SeekRowid); - &tbl.identifier - } - None => { - // For any non-outer-join condition expression, find the cursor that it should be evaluated at. - // This is the cursor that is the rightmost/innermost cursor that the expression depends on. - // In SELECT * FROM t1, t2 WHERE t1.a > 10, the condition should be evaluated at the cursor for t1. - // In SELECT * FROM t1, t2 WHERE t1.a > 10 OR t2.b > 20, the condition should be evaluated at the cursor for t2. - // - // We are splitting any AND expressions in this function, so for example in this query: - // 'SELECT * FROM t1, t2 WHERE t1.a > 10 AND t2.b > 20' - // we can evaluate the t1.a > 10 condition at the cursor for t1, and the t2.b > 20 condition at the cursor for t2. - // - // For expressions that don't depend on any cursor, we can evaluate them at the leftmost/outermost cursor. - // E.g. 'SELECT * FROM t1 JOIN t2 ON false' can be evaluated at the cursor for t1. - let table_refs = introspect_expression_for_table_refs(select, expr)?; - - // Get the innermost loop that matches any table_refs, and if not found, fall back to outermost loop - - let tbl = - processed_where_clause - .loop_order - .iter() - .rev() - .find_map(|i| { - let tbl = &select.src_tables[*i]; - if table_refs.contains(&&tbl.identifier) { - Some(tbl) - } else { - None - } - }); - - let first_loop = processed_where_clause.loop_order[0]; - - tbl.map(|t| &t.identifier) - .unwrap_or(&select.src_tables[first_loop].identifier) - } + seekrowid_candidate.unwrap_or(WhereExpr::Expr(expr)) }, + outer_join_table_index: outer_join_table, + table_references_bitmask: introspect_expression_for_table_refs(select, expr)?, }; processed_where_clause.terms.push(term); } @@ -136,7 +135,7 @@ pub fn process_where<'a>(select: &'a Select) -> Result> wc = split_constraint_to_terms(select, wc, w, None)?; } - for table in select.src_tables.iter() { + for (i, table) in select.src_tables.iter().enumerate() { if table.join_info.is_none() { continue; } @@ -146,15 +145,25 @@ pub fn process_where<'a>(select: &'a Select) -> Result> select, wc, expr, - if table.is_outer_join() { - Some(&table.identifier) - } else { - None - }, + if table.is_outer_join() { Some(i) } else { None }, )?; } } + // sort seekrowids first (if e.g. u.id = 1 and u.age > 50, we want to seek on u.id = 1 first) + // since seekrowid replaces a loop, we need to evaluate it first. + // E.g. + // SELECT u.age FROM users WHERE u.id = 5 AND u.age > 50; + // We need to seek on u.id = 5 first, and then evaluate u.age > 50. + // If we evaluate u.age > 50 first, we haven't read the row yet. + wc.terms.sort_by(|a, b| { + if let WhereExpr::SeekRowid(_) = a.expr { + std::cmp::Ordering::Less + } else { + std::cmp::Ordering::Greater + } + }); + Ok(wc) } @@ -211,43 +220,101 @@ pub fn translate_tableless_where( pub fn translate_processed_where<'a>( program: &mut ProgramBuilder, select: &'a Select, + loops: &[LoopInfo], current_loop: &'a LoopInfo, where_c: &'a ProcessedWhereClause, - skip_entire_table_label: BranchOffset, + skip_entire_loop_label: BranchOffset, cursor_hint: Option, ) -> Result<()> { - if where_c - .terms - .iter() - .filter(|t| *t.evaluate_at_tbl == current_loop.identifier) - .any(|t| t.expr.is_always_false().unwrap_or(false)) - { - program.emit_insn_with_label_dependency( - Insn::Goto { - target_pc: skip_entire_table_label, - }, - skip_entire_table_label, - ); - return Ok(()); + // If any of the terms are always false, we can skip the entire loop. + for t in where_c.terms.iter().filter(|t| { + select.src_tables[t.evaluate_at_loop(select)].identifier == current_loop.identifier + }) { + if let WhereExpr::Expr(e) = &t.expr { + if e.is_always_false().unwrap_or(false) { + program.emit_insn_with_label_dependency( + Insn::Goto { + target_pc: skip_entire_loop_label, + }, + skip_entire_loop_label, + ); + return Ok(()); + } + } } - for term in where_c - .terms - .iter() - .filter(|t| *t.evaluate_at_tbl == current_loop.identifier) - { - let jump_target_when_false = current_loop.next_row_label; + + for term in where_c.terms.iter().filter(|t| { + select.src_tables[t.evaluate_at_loop(select)].identifier == current_loop.identifier + }) { + let jump_target_when_false = loops[term.evaluate_at_loop(select)].next_row_label; let jump_target_when_true = program.allocate_label(); - translate_condition_expr( - program, - select, - &term.expr, - cursor_hint, - ConditionMetadata { - jump_if_condition_is_true: false, - jump_target_when_false, - jump_target_when_true, - }, - )?; + match &term.expr { + WhereExpr::Expr(e) => { + translate_condition_expr( + program, + select, + e, + cursor_hint, + ConditionMetadata { + jump_if_condition_is_true: false, + jump_target_when_false, + jump_target_when_true, + }, + )?; + } + WhereExpr::SeekRowid(s) => { + let cursor_id = program.resolve_cursor_id(s.table, cursor_hint); + + let computed_rowid_reg = program.alloc_register(); + let _ = translate_expr( + program, + Some(select), + s.rowid_expr, + computed_rowid_reg, + cursor_hint, + )?; + + if !program.has_cursor_emitted_seekrowid(cursor_id) { + program.emit_insn_with_label_dependency( + Insn::SeekRowid { + cursor_id, + src_reg: computed_rowid_reg, + target_pc: jump_target_when_false, + }, + jump_target_when_false, + ); + } else { + // If we have already emitted a SeekRowid instruction for this cursor, then other equality checks + // against that table should be done using the row that was already fetched. + // e.g. select u.age, p.name from users u join products p on u.id = p.id and p.id = 5; + // emitting two SeekRowid instructions for the same 'p' cursor would yield an incorrect result. + // Assume we are looping over users u, and right now u.id = 3. + // We first SeekRowid on p.id = 3, and find a row. + // If we then SeekRowid for p.id = 5, we would find a row with p.id = 5, + // and end up with a result where u.id = 3 and p.id = 5, which is incorrect. + // Instead we replace the second SeekRowid with a comparison against the row that was already fetched, + // i.e. we compare p.id == 5, which would not match (and is the correct result). + // + // It would probably be better to modify the AST in the WhereTerms directly, but that would require + // refactoring to not use &'a Ast::Expr references in the WhereTerms, i.e. the WhereClause would own its data + // and could mutate it to change the query as needed. We probably need to do this anyway if we want to have some + // kind of Query Plan construct that is not just a container for AST nodes. + let rowid_reg = program.alloc_register(); + program.emit_insn(Insn::RowId { + cursor_id, + dest: rowid_reg, + }); + program.emit_insn_with_label_dependency( + Insn::Ne { + lhs: rowid_reg, + rhs: computed_rowid_reg, + target_pc: jump_target_when_false, + }, + jump_target_when_false, + ); + } + } + } program.resolve_label(jump_target_when_true, program.offset()); } @@ -720,19 +787,20 @@ fn translate_condition_expr( fn introspect_expression_for_table_refs<'a>( select: &'a Select, where_expr: &'a ast::Expr, -) -> Result> { - let mut table_refs = vec![]; +) -> Result { + let mut table_refs_mask = 0; match where_expr { ast::Expr::Binary(e1, _, e2) => { - table_refs.extend(introspect_expression_for_table_refs(select, e1)?); - table_refs.extend(introspect_expression_for_table_refs(select, e2)?); + table_refs_mask |= introspect_expression_for_table_refs(select, e1)?; + table_refs_mask |= introspect_expression_for_table_refs(select, e2)?; } ast::Expr::Id(ident) => { let ident = normalize_ident(&ident.0); let matching_tables = select .src_tables .iter() - .filter(|t| t.table.get_column(&ident).is_some()); + .enumerate() + .filter(|(_, t)| t.table.get_column(&ident).is_some()); let mut matches = 0; let mut matching_tbl = None; @@ -744,8 +812,8 @@ fn introspect_expression_for_table_refs<'a>( } } - if let Some(tbl) = matching_tbl { - table_refs.push(&tbl.identifier); + if let Some((tbl_index, _)) = matching_tbl { + table_refs_mask |= 1 << tbl_index; } else { crate::bail_parse_error!("column not found: {}", &ident) } @@ -753,42 +821,46 @@ fn introspect_expression_for_table_refs<'a>( ast::Expr::Qualified(tbl, ident) => { let tbl = normalize_ident(&tbl.0); let ident = normalize_ident(&ident.0); - let matching_table = select.src_tables.iter().find(|t| t.identifier == tbl); + let matching_table = select + .src_tables + .iter() + .enumerate() + .find(|(_, t)| t.identifier == tbl); if matching_table.is_none() { crate::bail_parse_error!("table not found: {}", &tbl) } let matching_table = matching_table.unwrap(); - if matching_table.table.get_column(&ident).is_none() { + if matching_table.1.table.get_column(&ident).is_none() { crate::bail_parse_error!("column with qualified name {}.{} not found", &tbl, &ident) } - table_refs.push(&matching_table.identifier); + table_refs_mask |= 1 << matching_table.0; } ast::Expr::Literal(_) => {} ast::Expr::Like { lhs, rhs, .. } => { - table_refs.extend(introspect_expression_for_table_refs(select, lhs)?); - table_refs.extend(introspect_expression_for_table_refs(select, rhs)?); + table_refs_mask |= introspect_expression_for_table_refs(select, lhs)?; + table_refs_mask |= introspect_expression_for_table_refs(select, rhs)?; } - ast::Expr::FunctionCall { args, .. } => { - if let Some(args) = args { - for arg in args { - table_refs.extend(introspect_expression_for_table_refs(select, arg)?); - } + ast::Expr::FunctionCall { + args: Some(args), .. + } => { + for arg in args { + table_refs_mask |= introspect_expression_for_table_refs(select, arg)?; } } ast::Expr::InList { lhs, rhs, .. } => { - table_refs.extend(introspect_expression_for_table_refs(select, lhs)?); + table_refs_mask |= introspect_expression_for_table_refs(select, lhs)?; if let Some(rhs_list) = rhs { for rhs_expr in rhs_list { - table_refs.extend(introspect_expression_for_table_refs(select, rhs_expr)?); + table_refs_mask |= introspect_expression_for_table_refs(select, rhs_expr)?; } } } _ => {} } - Ok(table_refs) + Ok(table_refs_mask) } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -797,7 +869,8 @@ pub enum ConstantCondition { AlwaysFalse, } -pub trait Evaluatable { +pub trait Evaluatable<'a> { + // if the expression is a constant expression e.g. '1', returns the constant condition fn check_constant(&self) -> Result>; fn is_always_true(&self) -> Result { Ok(self @@ -809,9 +882,160 @@ pub trait Evaluatable { .check_constant()? .map_or(false, |c| c == ConstantCondition::AlwaysFalse)) } + // if the expression is the primary key of a table, returns the index of the table + fn check_primary_key(&self, select: &'a Select) -> Result>; + // Returns a bitmask of which table indexes the expression references + fn get_table_references_bitmask(&self, select: &'a Select) -> Result; + // Checks if the expression is a candidate for seekrowid optimization + fn check_seekrowid_candidate( + &'a self, + table_index: usize, + select: &'a Select, + ) -> Result>>; } -impl Evaluatable for ast::Expr { +impl<'a> Evaluatable<'a> for ast::Expr { + fn get_table_references_bitmask(&self, select: &'a Select) -> Result { + match self { + ast::Expr::Id(ident) => { + let ident = normalize_ident(&ident.0); + let tables = select.src_tables.iter().enumerate().filter_map(|(i, t)| { + if t.table.get_column(&ident).is_some() { + Some(i) + } else { + None + } + }); + + let mut matches = 0; + let mut matching_tbl = None; + + for tbl in tables { + matching_tbl = Some(tbl); + matches += 1; + if matches > 1 { + crate::bail_parse_error!("ambiguous column name {}", ident) + } + } + + Ok(matching_tbl.unwrap_or(0)) + } + ast::Expr::Qualified(tbl, ident) => { + let tbl = normalize_ident(&tbl.0); + let ident = normalize_ident(&ident.0); + let table = select + .src_tables + .iter() + .enumerate() + .find(|(_, t)| t.identifier == tbl && t.table.get_column(&ident).is_some()); + + if table.is_none() { + crate::bail_parse_error!("table not found: {}", tbl) + } + + let table = table.unwrap(); + + Ok(table.0) + } + ast::Expr::Binary(lhs, _, rhs) => { + let lhs = lhs.as_ref().get_table_references_bitmask(select)?; + let rhs = rhs.as_ref().get_table_references_bitmask(select)?; + + Ok(lhs | rhs) + } + _ => Ok(0), + } + } + fn check_primary_key(&self, select: &'a Select) -> Result> { + match self { + ast::Expr::Id(ident) => { + let ident = normalize_ident(&ident.0); + let tables = select.src_tables.iter().enumerate().filter_map(|(i, t)| { + if t.table + .get_column(&ident) + .map_or(false, |(_, c)| c.primary_key) + { + Some(i) + } else { + None + } + }); + + let mut matches = 0; + let mut matching_tbl = None; + + for tbl in tables { + matching_tbl = Some(tbl); + matches += 1; + if matches > 1 { + crate::bail_parse_error!("ambiguous column name {}", ident) + } + } + + Ok(matching_tbl) + } + ast::Expr::Qualified(tbl, ident) => { + let tbl = normalize_ident(&tbl.0); + let ident = normalize_ident(&ident.0); + let table = select.src_tables.iter().enumerate().find(|(_, t)| { + t.identifier == tbl + && t.table + .get_column(&ident) + .map_or(false, |(_, c)| c.primary_key) + }); + + if table.is_none() { + crate::bail_parse_error!("table not found: {}", tbl) + } + + let table = table.unwrap(); + + Ok(Some(table.0)) + } + _ => Ok(None), + } + } + fn check_seekrowid_candidate( + &'a self, + table_index: usize, + select: &'a Select, + ) -> Result>> { + match self { + ast::Expr::Binary(lhs, ast::Operator::Equals, rhs) => { + let lhs = lhs.as_ref(); + let rhs = rhs.as_ref(); + + if let Some(lhs_table_index) = lhs.check_primary_key(select)? { + let rhs_table_refs_bitmask = rhs.get_table_references_bitmask(select)?; + // For now, we only support seekrowid optimization if the primary key is in an inner loop compared to the other expression. + // Example: explain select u.age, p.name from users u join products p on u.id = p.id; + // In this case, we loop over the users table and seek the products table. + // We also support the case where the other expression is a constant, + // e.g. SELECT * FROM USERS u WHERE u.id = 5. + // In this case the bitmask of the other expression is 0. + if lhs_table_index == table_index && lhs_table_index >= rhs_table_refs_bitmask { + return Ok(Some(SeekRowid { + table: &select.src_tables[table_index].identifier, + rowid_expr: rhs, + })); + } + } + + if let Some(rhs_table_index) = rhs.check_primary_key(select)? { + let lhs_table_refs_bitmask = lhs.get_table_references_bitmask(select)?; + if rhs_table_index == table_index && rhs_table_index >= lhs_table_refs_bitmask { + return Ok(Some(SeekRowid { + table: &select.src_tables[table_index].identifier, + rowid_expr: lhs, + })); + } + } + + Ok(None) + } + _ => Ok(None), + } + } fn check_constant(&self) -> Result> { match self { ast::Expr::Literal(lit) => match lit { diff --git a/core/types.rs b/core/types.rs index 328acb39d..c94f6fb1f 100644 --- a/core/types.rs +++ b/core/types.rs @@ -369,6 +369,7 @@ pub trait Cursor { fn next(&mut self) -> Result>; fn wait_for_completion(&mut self) -> Result<()>; fn rowid(&self) -> Result>; + fn seek_rowid(&mut self, rowid: u64) -> Result>; fn record(&self) -> Result>>; fn insert( &mut self, diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index 7b3699a04..133ec781e 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -19,6 +19,8 @@ pub struct ProgramBuilder { pub cursor_ref: Vec<(Option, Option)>, // List of deferred label resolutions. Each entry is a pair of (label, insn_reference). deferred_label_resolutions: Vec<(BranchOffset, InsnReference)>, + // Bitmask of cursors that have emitted a SeekRowid instruction. + seekrowid_emitted_bitmask: u64, } impl ProgramBuilder { @@ -33,6 +35,7 @@ impl ProgramBuilder { cursor_ref: Vec::new(), constant_insns: Vec::new(), deferred_label_resolutions: Vec::new(), + seekrowid_emitted_bitmask: 0, } } @@ -64,8 +67,23 @@ impl ProgramBuilder { cursor } - pub fn emit_insn(&mut self, insn: Insn) { + pub fn has_cursor_emitted_seekrowid(&self, cursor_id: CursorID) -> bool { + (self.seekrowid_emitted_bitmask & (1 << cursor_id)) != 0 + } + + fn set_cursor_emitted_seekrowid(&mut self, cursor_id: CursorID) { + self.seekrowid_emitted_bitmask |= 1 << cursor_id; + } + + fn _emit_insn(&mut self, insn: Insn) { + if let Insn::SeekRowid { cursor_id, .. } = insn { + self.set_cursor_emitted_seekrowid(cursor_id); + } self.insns.push(insn); + } + + pub fn emit_insn(&mut self, insn: Insn) { + self._emit_insn(insn); if let Some(label) = self.next_insn_label { self.next_insn_label = None; self.resolve_label(label, (self.insns.len() - 1) as BranchOffset); @@ -85,7 +103,7 @@ impl ProgramBuilder { } pub fn emit_insn_with_label_dependency(&mut self, insn: Insn, label: BranchOffset) { - self.insns.push(insn); + self._emit_insn(insn); self.add_label_dependency(label, (self.insns.len() - 1) as BranchOffset); } @@ -270,6 +288,10 @@ impl ProgramBuilder { } => { *end_offset = to_offset; } + Insn::SeekRowid { target_pc, .. } => { + assert!(*target_pc < 0); + *target_pc = to_offset; + } _ => { todo!("missing resolve_label for {:?}", insn); } diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index 98f3d0c91..3a955440f 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -179,7 +179,14 @@ pub fn insn_to_str(program: &Program, addr: InsnReference, insn: &Insn, indent: 0, OwnedValue::Text(Rc::new("".to_string())), 0, - format!("root={}", root_page), + format!( + "table={}, root={}", + program.cursor_ref[*cursor_id] + .0 + .as_ref() + .unwrap_or(&format!("cursor {}", cursor_id)), + root_page + ), ), Insn::OpenReadAwait => ( "OpenReadAwait", @@ -222,14 +229,20 @@ pub fn insn_to_str(program: &Program, addr: InsnReference, insn: &Insn, indent: 0, OwnedValue::Text(Rc::new("".to_string())), 0, - "".to_string(), + format!( + "Rewind table {}", + program.cursor_ref[*cursor_id] + .0 + .as_ref() + .unwrap_or(&format!("cursor {}", cursor_id)) + ), ), Insn::Column { cursor_id, column, dest, } => { - let (_, table) = &program.cursor_ref[*cursor_id]; + let (table_identifier, table) = &program.cursor_ref[*cursor_id]; ( "Column", *cursor_id as i32, @@ -240,10 +253,9 @@ pub fn insn_to_str(program: &Program, addr: InsnReference, insn: &Insn, indent: format!( "r[{}]={}.{}", dest, - table + table_identifier .as_ref() - .map(|x| x.get_name()) - .unwrap_or(format!("cursor {}", cursor_id).as_str()), + .unwrap_or(&format!("cursor {}", cursor_id)), table .as_ref() .and_then(|x| x.column_index_to_name(*column)) @@ -377,10 +389,30 @@ pub fn insn_to_str(program: &Program, addr: InsnReference, insn: &Insn, indent: "r[{}]={}.rowid", dest, &program.cursor_ref[*cursor_id] - .1 + .0 .as_ref() - .map(|x| x.get_name()) - .unwrap_or(format!("cursor {}", cursor_id).as_str()) + .unwrap_or(&format!("cursor {}", cursor_id)) + ), + ), + Insn::SeekRowid { + cursor_id, + src_reg, + target_pc, + } => ( + "SeekRowid", + *cursor_id as i32, + *src_reg as i32, + *target_pc as i32, + OwnedValue::Text(Rc::new("".to_string())), + 0, + format!( + "if (r[{}]!={}.rowid) goto {}", + src_reg, + &program.cursor_ref[*cursor_id] + .0 + .as_ref() + .unwrap_or(&format!("cursor {}", cursor_id)), + target_pc ), ), Insn::DecrJumpZero { reg, target_pc } => ( diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 69190dfe8..29813c98e 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -222,6 +222,13 @@ pub enum Insn { dest: usize, }, + // Seek to a rowid in the cursor. If not found, jump to the given PC. Otherwise, continue to the next instruction. + SeekRowid { + cursor_id: CursorID, + src_reg: usize, + target_pc: BranchOffset, + }, + // Decrement the given register and jump to the given PC if the result is zero. DecrJumpZero { reg: usize, @@ -428,6 +435,9 @@ impl Program { (OwnedValue::Float(lhs), OwnedValue::Float(rhs)) => { state.registers[dest] = OwnedValue::Float(lhs + rhs); } + ((OwnedValue::Null, _) | (_, OwnedValue::Null)) => { + state.registers[dest] = OwnedValue::Null; + } _ => { todo!(); } @@ -779,6 +789,34 @@ impl Program { } state.pc += 1; } + Insn::SeekRowid { + cursor_id, + src_reg, + target_pc, + } => { + let cursor = cursors.get_mut(cursor_id).unwrap(); + let rowid = match &state.registers[*src_reg] { + OwnedValue::Integer(rowid) => *rowid as u64, + _ => { + return Err(LimboError::InternalError( + "SeekRowid: the value in the register is not an integer".into(), + )); + } + }; + match cursor.seek_rowid(rowid)? { + CursorResult::Ok(found) => { + if !found { + state.pc = *target_pc; + } else { + state.pc += 1; + } + } + CursorResult::IO => { + // If there is I/O, the instruction is restarted. + return Ok(StepResult::IO); + } + } + } Insn::DecrJumpZero { reg, target_pc } => { assert!(*target_pc >= 0); match state.registers[*reg] { diff --git a/core/vdbe/sorter.rs b/core/vdbe/sorter.rs index 26704ae4b..e489a99a4 100644 --- a/core/vdbe/sorter.rs +++ b/core/vdbe/sorter.rs @@ -75,6 +75,10 @@ impl Cursor for Sorter { todo!(); } + fn seek_rowid(&mut self, _: u64) -> Result> { + unimplemented!(); + } + fn record(&self) -> Result>> { Ok(self.current.borrow()) } diff --git a/testing/join.test b/testing/join.test index ead76e1ee..1e2d08c6c 100755 --- a/testing/join.test +++ b/testing/join.test @@ -117,6 +117,11 @@ do_execsql_test left-join-self { } {Jamie| Cindy|Jamie} +do_execsql_test left-join-self-2 { + select u1.first_name as user_name, u2.first_name as neighbor_name from users u1 left join users as u2 on u2.id = u1.id + 1 limit 2; +} {Jamie|Cindy +Cindy|Tommy} + do_execsql_test left-join-self-with-where { select u1.first_name as user_name, u2.first_name as neighbor_name from users u1 left join users as u2 on u1.id = u2.id + 1 where u1.id = 5 limit 2; } {Edward|Jennifer} diff --git a/testing/where.test b/testing/where.test index 96e263537..bbcc32eba 100755 --- a/testing/where.test +++ b/testing/where.test @@ -245,3 +245,11 @@ do_execsql_test where_name_not_in_list_or_name_eq_shirt { 8|sneakers|82.0 10|coat|33.0 11|accessories|81.0} + +do_execsql_test where_multiple { + select id, first_name, age from users where id = 5 and age < 50; +} {5|Edward|15} + +do_execsql_test where_multiple_flipped { + select id, first_name, age from users where age < 50 and id = 5; +} {5|Edward|15} \ No newline at end of file