From 390d0e673f31b5e245a36b8bc9b8842cc15814cd Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sat, 1 Feb 2025 22:28:16 +0200 Subject: [PATCH 1/9] plan.rs: refactor data structures - Get rid of SourceOperator tree - Make plan have a Vec of TableReference, and TableReference now contains the information from the old SourceOperator. - Remove `predicates` (conditions) from Table References -- put everything in the WHERE clause like SQLite, and attach metadata to the where clause expressions with JoinAwareConditionExpr struct. - Refactor select_star() to be simpler now that we use a vec, not a tree --- core/translate/plan.rs | 304 +++++++++++++++++------------------------ 1 file changed, 125 insertions(+), 179 deletions(-) diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 075e6f93f..c072690a5 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -9,7 +9,6 @@ use crate::{ function::AggFunc, schema::{BTreeTable, Column, Index, Table}, vdbe::BranchOffset, - Result, }; use crate::{ schema::{PseudoTable, Type}, @@ -31,6 +30,28 @@ pub struct GroupBy { pub having: Option>, } +/// In a query plan, WHERE clause conditions and JOIN conditions are all folded into a vector of JoinAwareConditionExpr. +/// This is done so that we can evaluate the conditions at the correct loop depth. +/// We also need to keep track of whether the condition came from an OUTER JOIN. Take this example: +/// SELECT * FROM users u LEFT JOIN products p ON u.id = 5. +/// Even though the condition only refers to 'u', we CANNOT evaluate it at the users loop, because we need to emit NULL +/// values for the columns of 'p', for EVERY row in 'u', instead of completely skipping any rows in 'u' where the condition is false. +#[derive(Debug, Clone)] +pub struct JoinAwareConditionExpr { + /// The original condition expression. + pub expr: ast::Expr, + /// Is this condition originally from an OUTER JOIN? + /// If so, we need to evaluate it at the loop of the right table in that JOIN, + /// regardless of which tables it references. + /// We also cannot e.g. short circuit the entire query in the optimizer if the condition is statically false. + pub from_outer_join: bool, + /// The loop index where to evaluate the condition. + /// For example, in `SELECT * FROM u JOIN p WHERE u.id = 5`, the condition can already be evaluated at the first loop (idx 0), + /// because that is the rightmost table that it references. + pub eval_at_loop: usize, +} + +/// A query plan is either a SELECT or a DELETE (for now) #[derive(Debug, Clone)] pub enum Plan { Select(SelectPlan), @@ -51,12 +72,13 @@ pub enum SelectQueryType { #[derive(Debug, Clone)] pub struct SelectPlan { - /// A tree of sources (tables). - pub source: SourceOperator, + /// List of table references in loop order, outermost first. + pub table_references: Vec, /// the columns inside SELECT ... FROM pub result_columns: Vec, - /// where clause split into a vec at 'AND' boundaries. - pub where_clause: Option>, + /// where clause split into a vec at 'AND' boundaries. all join conditions also get shoved in here, + /// and we keep track of which join they came from (mainly for OUTER JOIN processing) + pub where_clause: Vec, /// group by clause pub group_by: Option, /// order by clause @@ -67,8 +89,6 @@ pub struct SelectPlan { pub limit: Option, /// offset clause pub offset: Option, - /// all the tables referenced in the query - pub referenced_tables: Vec, /// all the indexes available pub available_indexes: Vec>, /// query contains a constant condition that is always false @@ -80,212 +100,150 @@ pub struct SelectPlan { #[allow(dead_code)] #[derive(Debug, Clone)] pub struct DeletePlan { - /// A tree of sources (tables). - pub source: SourceOperator, + /// List of table references. Delete is always a single table. + pub table_references: Vec, /// the columns inside SELECT ... FROM pub result_columns: Vec, /// where clause split into a vec at 'AND' boundaries. - pub where_clause: Option>, + pub where_clause: Vec, /// order by clause pub order_by: Option>, /// limit clause pub limit: Option, /// offset clause pub offset: Option, - /// all the tables referenced in the query - pub referenced_tables: Vec, /// all the indexes available pub available_indexes: Vec>, /// query contains a constant condition that is always false pub contains_constant_false_condition: bool, } -impl Display for Plan { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - match self { - Select(select_plan) => write!(f, "{}", select_plan.source), - Delete(delete_plan) => write!(f, "{}", delete_plan.source), - } - } -} - #[derive(Clone, Debug, PartialEq, Eq)] pub enum IterationDirection { Forwards, Backwards, } -impl SourceOperator { - pub fn select_star(&self, out_columns: &mut Vec) { - for (table_index, col, idx) in self.select_star_helper() { - out_columns.push(ResultSetColumn { - name: col.name.clone(), - expr: ast::Expr::Column { - database: None, - table: table_index, - column: idx, - is_rowid_alias: col.is_rowid_alias, - }, - contains_aggregates: false, - }); - } - } - - /// All this ceremony is required to deduplicate columns when joining with USING - fn select_star_helper(&self) -> Vec<(usize, &Column, usize)> { - match self { - SourceOperator::Join { - left, right, using, .. - } => { - let mut columns = left.select_star_helper(); - - // Join columns are filtered out from the right side - // in the case of a USING join. - if let Some(using_cols) = using { - let right_columns = right.select_star_helper(); - - for (table_index, col, idx) in right_columns { - if !using_cols - .iter() - .any(|using_col| col.name.eq_ignore_ascii_case(&using_col.0)) - { - columns.push((table_index, col, idx)); - } - } - } else { - columns.extend(right.select_star_helper()); - } - columns - } - SourceOperator::Scan { - table_reference, .. - } - | SourceOperator::Search { - table_reference, .. - } - | SourceOperator::Subquery { - table_reference, .. - } => table_reference +pub fn select_star(tables: &[TableReference], out_columns: &mut Vec) { + for (current_table_index, table) in tables.iter().enumerate() { + let maybe_using_cols = table + .join_info + .as_ref() + .and_then(|join_info| join_info.using.as_ref()); + out_columns.extend( + table .columns() .iter() .enumerate() - .map(|(i, col)| (table_reference.table_index, col, i)) - .collect(), - SourceOperator::Nothing { .. } => Vec::new(), - } + .filter(|(_, col)| { + // If we are joining with USING, we need to deduplicate the columns from the right table + // that are also present in the USING clause. + if let Some(using_cols) = maybe_using_cols { + !using_cols + .iter() + .any(|using_col| col.name.eq_ignore_ascii_case(&using_col.0)) + } else { + true + } + }) + .map(|(i, col)| ResultSetColumn { + name: col.name.clone(), + expr: ast::Expr::Column { + database: None, + table: current_table_index, + column: i, + is_rowid_alias: col.is_rowid_alias, + }, + contains_aggregates: false, + }), + ); } } +/// Join information for a table reference. +#[derive(Debug, Clone)] +pub struct JoinInfo { + /// Whether this is an OUTER JOIN. + pub outer: bool, + /// The USING clause for the join, if any. NATURAL JOIN is transformed into USING (col1, col2, ...). + pub using: Option, +} + +/// A table reference in the query plan. +/// For example, SELECT * FROM users u JOIN products p JOIN (SELECT * FROM users) sub +/// has three table references: +/// 1. operation=Scan, table=users, table_identifier=u, reference_type=BTreeTable, join_info=None +/// 2. operation=Scan, table=products, table_identifier=p, reference_type=BTreeTable, join_info=Some(JoinInfo { outer: false, using: None }), +/// 3. operation=Subquery, table=users, table_identifier=sub, reference_type=Subquery, join_info=None +#[derive(Debug, Clone)] +pub struct TableReference { + /// The operation that this table reference performs. + pub op: Operation, + /// Table object, which contains metadata about the table, e.g. columns. + pub table: Table, + /// The name of the table as referred to in the query, either the literal name or an alias e.g. "users" or "u" + pub identifier: String, + /// The join info for this table reference, if it is the right side of a join (which all except the first table reference have) + pub join_info: Option, +} + /** - A SourceOperator is a Node in the query plan that reads data from a table. + A SourceOperator is a reference in the query plan that reads data from a table. */ #[derive(Clone, Debug)] -pub enum SourceOperator { - // Join operator - // This operator is used to join two source operators. - // It takes a left and right source operator, a list of predicates to evaluate, - // and a boolean indicating whether it is an outer join. - Join { - id: usize, - left: Box, - right: Box, - predicates: Option>, - outer: bool, - using: Option, - }, - // Scan operator - // This operator is used to scan a table. - // It takes a table to scan and an optional list of predicates to evaluate. - // The predicates are used to filter rows from the table. - // e.g. SELECT * FROM t1 WHERE t1.foo = 5 +pub enum Operation { + // Scan operation + // This operation is used to scan a table. // The iter_dir are uset to indicate the direction of the iterator. // The use of Option for iter_dir is aimed at implementing a conservative optimization strategy: it only pushes // iter_dir down to Scan when iter_dir is None, to prevent potential result set errors caused by multiple - // assignments. for more detailed discussions, please refer to https://github.com/penberg/limbo/pull/376 + // assignments. for more detailed discussions, please refer to https://github.com/tursodatabase/limbo/pull/376 Scan { - id: usize, - table_reference: TableReference, - predicates: Option>, iter_dir: Option, }, - // Search operator - // This operator is used to search for a row in a table using an index + // Search operation + // This operation is used to search for a row in a table using an index // (i.e. a primary key or a secondary index) - Search { - id: usize, - table_reference: TableReference, - search: Search, - predicates: Option>, - }, + Search(Search), + /// Subquery operation + /// This operation is used to represent a subquery in the query plan. + /// The subquery itself (recursively) contains an arbitrary SelectPlan. Subquery { - id: usize, - table_reference: TableReference, plan: Box, - predicates: Option>, - }, - // Nothing operator - // This operator is used to represent an empty query. - // e.g. SELECT * from foo WHERE 0 will eventually be optimized to Nothing. - Nothing { - id: usize, - }, -} - -/// The type of the table reference, either BTreeTable or Subquery -#[derive(Clone, Debug, PartialEq, Eq)] -pub enum TableReferenceType { - /// A BTreeTable is a table that is stored on disk in a B-tree index. - BTreeTable, - /// A subquery. - Subquery { - /// The index of the first register in the query plan that contains the result columns of the subquery. result_columns_start_reg: usize, }, } -/// A query plan has a list of TableReference objects, each of which represents a table or subquery. -#[derive(Clone, Debug)] -pub struct TableReference { - /// Table object, which contains metadata about the table, e.g. columns. - pub table: Table, - /// The name of the table as referred to in the query, either the literal name or an alias e.g. "users" or "u" - pub table_identifier: String, - /// The index of this reference in the list of TableReference objects in the query plan - /// The reference at index 0 is the first table in the FROM clause, the reference at index 1 is the second table in the FROM clause, etc. - /// So, the index is relevant for determining when predicates (WHERE, ON filters etc.) should be evaluated. - pub table_index: usize, - /// The type of the table reference, either BTreeTable or Subquery - pub reference_type: TableReferenceType, -} - impl TableReference { + /// Returns the btree table for this table reference, if it is a BTreeTable. pub fn btree(&self) -> Option> { - match self.reference_type { - TableReferenceType::BTreeTable => self.table.btree(), - TableReferenceType::Subquery { .. } => None, - } + self.table.btree() } - pub fn new_subquery(identifier: String, table_index: usize, plan: &SelectPlan) -> Self { + + /// Creates a new TableReference for a subquery. + pub fn new_subquery(identifier: String, plan: SelectPlan, join_info: Option) -> Self { + let table = Table::Pseudo(Rc::new(PseudoTable::new_with_columns( + plan.result_columns + .iter() + .map(|rc| Column { + name: rc.name.clone(), + ty: Type::Text, // FIXME: infer proper type + ty_str: "TEXT".to_string(), + is_rowid_alias: false, + primary_key: false, + notnull: false, + default: None, + }) + .collect(), + ))); Self { - table: Table::Pseudo(Rc::new(PseudoTable::new_with_columns( - plan.result_columns - .iter() - .map(|rc| Column { - name: rc.name.clone(), - ty: Type::Text, // FIXME: infer proper type - ty_str: "TEXT".to_string(), - is_rowid_alias: false, - primary_key: false, - notnull: false, - default: None, - }) - .collect(), - ))), - table_identifier: identifier.clone(), - table_index, - reference_type: TableReferenceType::Subquery { + op: Operation::Subquery { + plan: Box::new(plan), result_columns_start_reg: 0, // Will be set in the bytecode emission phase }, + table, + identifier: identifier.clone(), + join_info, } } @@ -300,32 +258,20 @@ impl TableReference { #[derive(Clone, Debug)] pub enum Search { /// A rowid equality point lookup. This is a special case that uses the SeekRowid bytecode instruction and does not loop. - RowidEq { cmp_expr: ast::Expr }, + RowidEq { cmp_expr: JoinAwareConditionExpr }, /// A rowid search. Uses bytecode instructions like SeekGT, SeekGE etc. RowidSearch { cmp_op: ast::Operator, - cmp_expr: ast::Expr, + cmp_expr: JoinAwareConditionExpr, }, /// A secondary index search. Uses bytecode instructions like SeekGE, SeekGT etc. IndexSearch { index: Rc, cmp_op: ast::Operator, - cmp_expr: ast::Expr, + cmp_expr: JoinAwareConditionExpr, }, } -impl SourceOperator { - pub fn id(&self) -> usize { - match self { - SourceOperator::Join { id, .. } => *id, - SourceOperator::Scan { id, .. } => *id, - SourceOperator::Search { id, .. } => *id, - SourceOperator::Subquery { id, .. } => *id, - SourceOperator::Nothing { id } => *id, - } - } -} - #[derive(Clone, Copy, Debug, PartialEq)] pub enum Direction { Ascending, From e63256f657a43c0de52041f8f2e8415a187a49d0 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sat, 1 Feb 2025 22:30:51 +0200 Subject: [PATCH 2/9] Change Display implementation of Plan to work with new data structures --- core/translate/plan.rs | 256 ++++++++++++----------------------------- 1 file changed, 75 insertions(+), 181 deletions(-) diff --git a/core/translate/plan.rs b/core/translate/plan.rs index c072690a5..e92661969 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -306,204 +306,98 @@ impl Display for Aggregate { } } -// For EXPLAIN QUERY PLAN -impl Display for SourceOperator { +/// For EXPLAIN QUERY PLAN +impl Display for Plan { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + Select(select_plan) => select_plan.fmt(f), + Delete(delete_plan) => delete_plan.fmt(f), + } + } +} + +impl Display for SelectPlan { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - fn fmt_operator( - operator: &SourceOperator, - f: &mut Formatter, - level: usize, - last: bool, - ) -> fmt::Result { - let indent = if level == 0 { - if last { "`--" } else { "|--" }.to_string() + writeln!(f, "QUERY PLAN")?; + + // Print each table reference with appropriate indentation based on join depth + for (i, reference) in self.table_references.iter().enumerate() { + let is_last = i == self.table_references.len() - 1; + let indent = if i == 0 { + if is_last { "`--" } else { "|--" }.to_string() } else { format!( " {}{}", - "| ".repeat(level - 1), - if last { "`--" } else { "|--" } + "| ".repeat(i - 1), + if is_last { "`--" } else { "|--" } ) }; - match operator { - SourceOperator::Join { - left, - right, - predicates, - outer, - .. - } => { - let join_name = if *outer { "OUTER JOIN" } else { "JOIN" }; - match predicates - .as_ref() - .and_then(|ps| if ps.is_empty() { None } else { Some(ps) }) - { - Some(ps) => { - let predicates_string = ps - .iter() - .map(|p| p.to_string()) - .collect::>() - .join(" AND "); - writeln!(f, "{}{} ON {}", indent, join_name, predicates_string)?; - } - None => writeln!(f, "{}{}", indent, join_name)?, + match &reference.op { + Operation::Scan { .. } => { + let table_name = if reference.table.get_name() == reference.identifier { + reference.identifier.clone() + } else { + format!("{} AS {}", reference.table.get_name(), reference.identifier) + }; + + writeln!(f, "{}SCAN {}", indent, table_name)?; + } + Operation::Search(search) => match search { + Search::RowidEq { .. } | Search::RowidSearch { .. } => { + writeln!( + f, + "{}SEARCH {} USING INTEGER PRIMARY KEY (rowid=?)", + indent, reference.identifier + )?; } - fmt_operator(left, f, level + 1, false)?; - fmt_operator(right, f, level + 1, true) - } - SourceOperator::Scan { - table_reference, - predicates: filter, - .. - } => { - let table_name = - if table_reference.table.get_name() == table_reference.table_identifier { - table_reference.table_identifier.clone() - } else { - format!( - "{} AS {}", - &table_reference.table.get_name(), - &table_reference.table_identifier - ) - }; - let filter_string = filter.as_ref().map(|f| { - let filters_string = f - .iter() - .map(|p| p.to_string()) - .collect::>() - .join(" AND "); - format!("FILTER {}", filters_string) - }); - match filter_string { - Some(fs) => writeln!(f, "{}SCAN {} {}", indent, table_name, fs), - None => writeln!(f, "{}SCAN {}", indent, table_name), - }?; - Ok(()) - } - SourceOperator::Search { - table_reference, - search, - .. - } => { - match search { - Search::RowidEq { .. } | Search::RowidSearch { .. } => { - writeln!( - f, - "{}SEARCH {} USING INTEGER PRIMARY KEY (rowid=?)", - indent, table_reference.table_identifier - )?; - } - Search::IndexSearch { index, .. } => { - writeln!( - f, - "{}SEARCH {} USING INDEX {}", - indent, table_reference.table_identifier, index.name - )?; - } + Search::IndexSearch { index, .. } => { + writeln!( + f, + "{}SEARCH {} USING INDEX {}", + indent, reference.identifier, index.name + )?; + } + }, + Operation::Subquery { plan, .. } => { + writeln!(f, "{}SUBQUERY {}", indent, reference.identifier)?; + // Indent and format the subquery plan + for line in format!("{}", plan).lines() { + writeln!(f, "{} {}", indent, line)?; } - Ok(()) } - SourceOperator::Subquery { plan, .. } => { - fmt_operator(&plan.source, f, level + 1, last) - } - SourceOperator::Nothing { .. } => Ok(()), } } + Ok(()) + } +} + +impl Display for DeletePlan { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { writeln!(f, "QUERY PLAN")?; - fmt_operator(self, f, 0, true) - } -} -/** - Returns a bitmask where each bit corresponds to a table in the `tables` vector. - If a table is referenced in the given Operator, the corresponding bit is set to 1. - Example: - if tables = [(table1, "t1"), (table2, "t2"), (table3, "t3")], - and the Operator is a join between table2 and table3, - then the return value will be (in bits): 110 -*/ -pub fn get_table_ref_bitmask_for_operator<'a>( - tables: &'a Vec, - operator: &'a SourceOperator, -) -> Result { - let mut table_refs_mask = 0; - match operator { - SourceOperator::Join { left, right, .. } => { - table_refs_mask |= get_table_ref_bitmask_for_operator(tables, left)?; - table_refs_mask |= get_table_ref_bitmask_for_operator(tables, right)?; - } - SourceOperator::Scan { - table_reference, .. - } => { - table_refs_mask |= 1 - << tables - .iter() - .position(|t| t.table_identifier == table_reference.table_identifier) - .unwrap(); - } - SourceOperator::Search { - table_reference, .. - } => { - table_refs_mask |= 1 - << tables - .iter() - .position(|t| t.table_identifier == table_reference.table_identifier) - .unwrap(); - } - SourceOperator::Subquery { .. } => {} - SourceOperator::Nothing { .. } => {} - } - Ok(table_refs_mask) -} + // Delete plan should only have one table reference + if let Some(reference) = self.table_references.first() { + let indent = "`--"; -/** - Returns a bitmask where each bit corresponds to a table in the `tables` vector. - If a table is referenced in the given AST expression, the corresponding bit is set to 1. - Example: - if tables = [(table1, "t1"), (table2, "t2"), (table3, "t3")], - and predicate = "t1.a = t2.b" - then the return value will be (in bits): 011 -*/ -#[allow(clippy::only_used_in_recursion)] -pub fn get_table_ref_bitmask_for_ast_expr<'a>( - tables: &'a Vec, - predicate: &'a ast::Expr, -) -> Result { - let mut table_refs_mask = 0; - match predicate { - ast::Expr::Binary(e1, _, e2) => { - table_refs_mask |= get_table_ref_bitmask_for_ast_expr(tables, e1)?; - table_refs_mask |= get_table_ref_bitmask_for_ast_expr(tables, e2)?; - } - ast::Expr::Column { table, .. } => { - table_refs_mask |= 1 << table; - } - ast::Expr::Id(_) => unreachable!("Id should be resolved to a Column before optimizer"), - ast::Expr::Qualified(_, _) => { - unreachable!("Qualified should be resolved to a Column before optimizer") - } - ast::Expr::Literal(_) => {} - ast::Expr::Like { lhs, rhs, .. } => { - table_refs_mask |= get_table_ref_bitmask_for_ast_expr(tables, lhs)?; - table_refs_mask |= get_table_ref_bitmask_for_ast_expr(tables, rhs)?; - } - ast::Expr::FunctionCall { - args: Some(args), .. - } => { - for arg in args { - table_refs_mask |= get_table_ref_bitmask_for_ast_expr(tables, arg)?; - } - } - ast::Expr::InList { lhs, rhs, .. } => { - table_refs_mask |= get_table_ref_bitmask_for_ast_expr(tables, lhs)?; - if let Some(rhs_list) = rhs { - for rhs_expr in rhs_list { - table_refs_mask |= get_table_ref_bitmask_for_ast_expr(tables, rhs_expr)?; + match &reference.op { + Operation::Scan { .. } => { + let table_name = if reference.table.get_name() == reference.identifier { + reference.identifier.clone() + } else { + format!("{} AS {}", reference.table.get_name(), reference.identifier) + }; + + writeln!(f, "{}DELETE FROM {}", indent, table_name)?; + } + Operation::Search { .. } => { + panic!("DELETE plans should not contain search operations"); + } + Operation::Subquery { .. } => { + panic!("DELETE plans should not contain subqueries"); } } } - _ => {} + Ok(()) } - - Ok(table_refs_mask) } From 16a97d3b9849f6b9f918a567ca8fa3ee7fcb4b1e Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sat, 1 Feb 2025 22:31:27 +0200 Subject: [PATCH 3/9] planner.rs: refactor from/join + where parsing logic - use new TableReference and JoinAwareConditionExpr - add utilities for determining at which loop depth a WHERE condition should be evaluated, now that "operators" do not carry condition expressions inside them anymore. --- core/translate/planner.rs | 254 ++++++++++++++++++++------------------ 1 file changed, 133 insertions(+), 121 deletions(-) diff --git a/core/translate/planner.rs b/core/translate/planner.rs index f66c96ce3..043ef0e35 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -1,5 +1,8 @@ use super::{ - plan::{Aggregate, Plan, SelectQueryType, SourceOperator, TableReference, TableReferenceType}, + plan::{ + Aggregate, JoinAwareConditionExpr, JoinInfo, Operation, Plan, SelectQueryType, + TableReference, + }, select::prepare_select_plan, SymbolTable, }; @@ -14,21 +17,6 @@ use sqlite3_parser::ast::{self, Expr, FromClause, JoinType, Limit, UnaryOperator pub const ROWID: &str = "rowid"; -pub struct OperatorIdCounter { - id: usize, -} - -impl OperatorIdCounter { - pub fn new() -> Self { - Self { id: 1 } - } - pub fn get_next_id(&mut self) -> usize { - let id = self.id; - self.id += 1; - id - } -} - pub fn resolve_aggregates(expr: &Expr, aggs: &mut Vec) -> bool { if aggs .iter() @@ -140,10 +128,9 @@ pub fn bind_column_references(expr: &mut Expr, referenced_tables: &[TableReferen } Expr::Qualified(tbl, id) => { let normalized_table_name = normalize_ident(tbl.0.as_str()); - let matching_tbl_idx = referenced_tables.iter().position(|t| { - t.table_identifier - .eq_ignore_ascii_case(&normalized_table_name) - }); + let matching_tbl_idx = referenced_tables + .iter() + .position(|t| t.identifier.eq_ignore_ascii_case(&normalized_table_name)); if matching_tbl_idx.is_none() { crate::bail_parse_error!("Table {} not found", normalized_table_name); } @@ -273,10 +260,9 @@ pub fn bind_column_references(expr: &mut Expr, referenced_tables: &[TableReferen fn parse_from_clause_table( schema: &Schema, table: ast::SelectTable, - operator_id_counter: &mut OperatorIdCounter, cur_table_index: usize, syms: &SymbolTable, -) -> Result<(TableReference, SourceOperator)> { +) -> Result { match table { ast::SelectTable::Table(qualified_name, maybe_alias, _) => { let normalized_qualified_name = normalize_ident(qualified_name.name.0.as_str()); @@ -289,21 +275,12 @@ fn parse_from_clause_table( ast::As::Elided(id) => id, }) .map(|a| a.0); - let table_reference = TableReference { + Ok(TableReference { + op: Operation::Scan { iter_dir: None }, table: Table::BTree(table.clone()), - table_identifier: alias.unwrap_or(normalized_qualified_name), - table_index: cur_table_index, - reference_type: TableReferenceType::BTreeTable, - }; - Ok(( - table_reference.clone(), - SourceOperator::Scan { - table_reference, - predicates: None, - id: operator_id_counter.get_next_id(), - iter_dir: None, - }, - )) + identifier: alias.unwrap_or(normalized_qualified_name), + join_info: None, + }) } ast::SelectTable::Select(subselect, maybe_alias) => { let Plan::Select(mut subplan) = prepare_select_plan(schema, *subselect, syms)? else { @@ -319,17 +296,8 @@ fn parse_from_clause_table( ast::As::Elided(id) => id.0.clone(), }) .unwrap_or(format!("subquery_{}", cur_table_index)); - let table_reference = - TableReference::new_subquery(identifier.clone(), cur_table_index, &subplan); - Ok(( - table_reference.clone(), - SourceOperator::Subquery { - id: operator_id_counter.get_next_id(), - table_reference, - plan: Box::new(subplan), - predicates: None, - }, - )) + let table_reference = TableReference::new_subquery(identifier, subplan, None); + Ok(table_reference) } _ => todo!(), } @@ -338,99 +306,124 @@ fn parse_from_clause_table( pub fn parse_from( schema: &Schema, mut from: Option, - operator_id_counter: &mut OperatorIdCounter, syms: &SymbolTable, -) -> Result<(SourceOperator, Vec)> { + out_where_clause: &mut Vec, +) -> Result> { if from.as_ref().and_then(|f| f.select.as_ref()).is_none() { - return Ok(( - SourceOperator::Nothing { - id: operator_id_counter.get_next_id(), - }, - vec![], - )); + return Ok(vec![]); } - let mut table_index = 0; let mut tables = vec![]; let mut from_owned = std::mem::take(&mut from).unwrap(); let select_owned = *std::mem::take(&mut from_owned.select).unwrap(); let joins_owned = std::mem::take(&mut from_owned.joins).unwrap_or_default(); - let (table_reference, mut operator) = - parse_from_clause_table(schema, select_owned, operator_id_counter, table_index, syms)?; - + let table_reference = parse_from_clause_table(schema, select_owned, 0, syms)?; tables.push(table_reference); - table_index += 1; for join in joins_owned.into_iter() { - let JoinParseResult { - source_operator: right, - is_outer_join: outer, - using, - predicates, - } = parse_join( - schema, - join, - operator_id_counter, - &mut tables, - table_index, - syms, - )?; - operator = SourceOperator::Join { - left: Box::new(operator), - right: Box::new(right), - predicates, - outer, - using, - id: operator_id_counter.get_next_id(), - }; - table_index += 1; + parse_join(schema, join, syms, &mut tables, out_where_clause)?; } - Ok((operator, tables)) + Ok(tables) } pub fn parse_where( where_clause: Option, - referenced_tables: &[TableReference], -) -> Result>> { + table_references: &[TableReference], + out_where_clause: &mut Vec, +) -> Result<()> { if let Some(where_expr) = where_clause { let mut predicates = vec![]; break_predicate_at_and_boundaries(where_expr, &mut predicates); for expr in predicates.iter_mut() { - bind_column_references(expr, referenced_tables)?; + bind_column_references(expr, table_references)?; } - Ok(Some(predicates)) + for expr in predicates { + let eval_at_loop = get_rightmost_table_referenced_in_expr(&expr)?; + out_where_clause.push(JoinAwareConditionExpr { + expr, + from_outer_join: false, + eval_at_loop, + }); + } + Ok(()) } else { - Ok(None) + Ok(()) } } -struct JoinParseResult { - source_operator: SourceOperator, - is_outer_join: bool, - using: Option, - predicates: Option>, +/** + Returns the rightmost table index that is referenced in the given AST expression. + Rightmost = innermost loop. + This is used to determine where we should evaluate a given condition expression, + and it needs to be the rightmost table referenced in the expression, because otherwise + the condition would be evaluated before a row is read from that table. +*/ +fn get_rightmost_table_referenced_in_expr<'a>(predicate: &'a ast::Expr) -> Result { + let mut max_table_idx = 0; + match predicate { + ast::Expr::Binary(e1, _, e2) => { + max_table_idx = max_table_idx.max(get_rightmost_table_referenced_in_expr(e1)?); + max_table_idx = max_table_idx.max(get_rightmost_table_referenced_in_expr(e2)?); + } + ast::Expr::Column { table, .. } => { + max_table_idx = max_table_idx.max(*table); + } + ast::Expr::Id(_) => { + /* Id referring to column will already have been rewritten as an Expr::Column */ + /* we only get here with literal 'true' or 'false' etc */ + } + ast::Expr::Qualified(_, _) => { + unreachable!("Qualified should be resolved to a Column before optimizer") + } + ast::Expr::Literal(_) => {} + ast::Expr::Like { lhs, rhs, .. } => { + max_table_idx = max_table_idx.max(get_rightmost_table_referenced_in_expr(lhs)?); + max_table_idx = max_table_idx.max(get_rightmost_table_referenced_in_expr(rhs)?); + } + ast::Expr::FunctionCall { + args: Some(args), .. + } => { + for arg in args { + max_table_idx = max_table_idx.max(get_rightmost_table_referenced_in_expr(arg)?); + } + } + ast::Expr::InList { lhs, rhs, .. } => { + max_table_idx = max_table_idx.max(get_rightmost_table_referenced_in_expr(lhs)?); + if let Some(rhs_list) = rhs { + for rhs_expr in rhs_list { + max_table_idx = + max_table_idx.max(get_rightmost_table_referenced_in_expr(rhs_expr)?); + } + } + } + _ => {} + } + + Ok(max_table_idx) } fn parse_join( schema: &Schema, join: ast::JoinedSelectTable, - operator_id_counter: &mut OperatorIdCounter, - tables: &mut Vec, - table_index: usize, syms: &SymbolTable, -) -> Result { + tables: &mut Vec, + out_where_clause: &mut Vec, +) -> Result<()> { let ast::JoinedSelectTable { operator: join_operator, table, constraint, } = join; - let (table_reference, source_operator) = - parse_from_clause_table(schema, table, operator_id_counter, table_index, syms)?; - - tables.push(table_reference); + let cur_table_index = tables.len(); + tables.push(parse_from_clause_table( + schema, + table, + cur_table_index, + syms, + )?); let (outer, natural) = match join_operator { ast::JoinOperator::TypedJoin(Some(join_type)) => { @@ -442,23 +435,21 @@ fn parse_join( }; let mut using = None; - let mut predicates = None; if natural && constraint.is_some() { crate::bail_parse_error!("NATURAL JOIN cannot be combined with ON or USING clause"); } let constraint = if natural { + assert!(tables.len() >= 2); + let rightmost_table = tables.last().unwrap(); // NATURAL JOIN is first transformed into a USING join with the common columns - let left_tables = &tables[..table_index]; - assert!(!left_tables.is_empty()); - let right_table = &tables[table_index]; - let right_cols = &right_table.columns(); + let right_cols = rightmost_table.columns(); let mut distinct_names: Option = None; // TODO: O(n^2) maybe not great for large tables or big multiway joins for right_col in right_cols.iter() { let mut found_match = false; - for left_table in left_tables.iter() { + for left_table in tables.iter().take(tables.len() - 1) { for left_col in left_table.columns().iter() { if left_col.name == right_col.name { if let Some(distinct_names) = distinct_names.as_mut() { @@ -495,16 +486,28 @@ fn parse_join( for predicate in preds.iter_mut() { bind_column_references(predicate, tables)?; } - predicates = Some(preds); + for pred in preds { + let cur_table_idx = tables.len() - 1; + let eval_at_loop = if outer { + cur_table_idx + } else { + get_rightmost_table_referenced_in_expr(&pred)? + }; + out_where_clause.push(JoinAwareConditionExpr { + expr: pred, + from_outer_join: outer, + eval_at_loop, + }); + } } ast::JoinConstraint::Using(distinct_names) => { // USING join is replaced with a list of equality predicates - let mut using_predicates = vec![]; for distinct_name in distinct_names.iter() { let name_normalized = normalize_ident(distinct_name.0.as_str()); - let left_tables = &tables[..table_index]; + let cur_table_idx = tables.len() - 1; + let left_tables = &tables[..cur_table_idx]; assert!(!left_tables.is_empty()); - let right_table = &tables[table_index]; + let right_table = tables.last().unwrap(); let mut left_col = None; for (left_table_idx, left_table) in left_tables.iter().enumerate() { left_col = left_table @@ -536,7 +539,7 @@ fn parse_join( } let (left_table_idx, left_col_idx, left_col) = left_col.unwrap(); let (right_col_idx, right_col) = right_col.unwrap(); - using_predicates.push(Expr::Binary( + let expr = Expr::Binary( Box::new(Expr::Column { database: None, table: left_table_idx, @@ -546,24 +549,33 @@ fn parse_join( ast::Operator::Equals, Box::new(Expr::Column { database: None, - table: right_table.table_index, + table: cur_table_idx, column: right_col_idx, is_rowid_alias: right_col.is_rowid_alias, }), - )); + ); + let eval_at_loop = if outer { + cur_table_idx + } else { + get_rightmost_table_referenced_in_expr(&expr)? + }; + out_where_clause.push(JoinAwareConditionExpr { + expr, + from_outer_join: outer, + eval_at_loop, + }); } - predicates = Some(using_predicates); using = Some(distinct_names); } } } - Ok(JoinParseResult { - source_operator, - is_outer_join: outer, - using, - predicates, - }) + assert!(tables.len() >= 2); + let last_idx = tables.len() - 1; + let rightmost_table = tables.get_mut(last_idx).unwrap(); + rightmost_table.join_info = Some(JoinInfo { outer, using }); + + Ok(()) } pub fn parse_limit(limit: Limit) -> Result<(Option, Option)> { From 2ddac4bf21bdf00cb7ef42ee0ddbc20278482e25 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sat, 1 Feb 2025 22:33:26 +0200 Subject: [PATCH 4/9] select.rs: use new data structures when parsing select --- core/translate/select.rs | 53 ++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/core/translate/select.rs b/core/translate/select.rs index 46226abbd..6a9250296 100644 --- a/core/translate/select.rs +++ b/core/translate/select.rs @@ -1,12 +1,12 @@ use super::emitter::emit_program; use super::expr::get_name; -use super::plan::SelectQueryType; +use super::plan::{select_star, SelectQueryType}; use crate::function::{AggFunc, ExtFunc, Func}; use crate::translate::optimizer::optimize_plan; use crate::translate::plan::{Aggregate, Direction, GroupBy, Plan, ResultSetColumn, SelectPlan}; use crate::translate::planner::{ bind_column_references, break_predicate_at_and_boundaries, parse_from, parse_limit, - parse_where, resolve_aggregates, OperatorIdCounter, + parse_where, resolve_aggregates, }; use crate::util::normalize_ident; use crate::SymbolTable; @@ -43,52 +43,51 @@ pub fn prepare_select_plan( crate::bail_parse_error!("SELECT without columns is not allowed"); } - let mut operator_id_counter = OperatorIdCounter::new(); + let mut where_predicates = vec![]; - // Parse the FROM clause - let (source, referenced_tables) = - parse_from(schema, from, &mut operator_id_counter, syms)?; + // Parse the FROM clause into a vec of TableReferences. Fold all the join conditions expressions into the WHERE clause. + let table_references = parse_from(schema, from, syms, &mut where_predicates)?; let mut plan = SelectPlan { - source, + table_references, result_columns: vec![], - where_clause: None, + where_clause: where_predicates, group_by: None, order_by: None, aggregates: vec![], limit: None, offset: None, - referenced_tables, available_indexes: schema.indexes.clone().into_values().flatten().collect(), contains_constant_false_condition: false, query_type: SelectQueryType::TopLevel, }; - // Parse the WHERE clause - plan.where_clause = parse_where(where_clause, &plan.referenced_tables)?; + // Parse the actual WHERE clause and add its conditions to the plan WHERE clause that already contains the join conditions. + parse_where(where_clause, &plan.table_references, &mut plan.where_clause)?; let mut aggregate_expressions = Vec::new(); for (result_column_idx, column) in columns.iter_mut().enumerate() { match column { ResultColumn::Star => { - plan.source.select_star(&mut plan.result_columns); + select_star(&plan.table_references, &mut plan.result_columns); } ResultColumn::TableStar(name) => { let name_normalized = normalize_ident(name.0.as_str()); let referenced_table = plan - .referenced_tables + .table_references .iter() - .find(|t| t.table_identifier == name_normalized); + .enumerate() + .find(|(_, t)| t.identifier == name_normalized); if referenced_table.is_none() { crate::bail_parse_error!("Table {} not found", name.0); } - let table_reference = referenced_table.unwrap(); - for (idx, col) in table_reference.columns().iter().enumerate() { + let (table_index, table) = referenced_table.unwrap(); + for (idx, col) in table.columns().iter().enumerate() { plan.result_columns.push(ResultSetColumn { expr: ast::Expr::Column { database: None, // TODO: support different databases - table: table_reference.table_index, + table: table_index, column: idx, is_rowid_alias: col.is_rowid_alias, }, @@ -98,7 +97,7 @@ pub fn prepare_select_plan( } } ResultColumn::Expr(ref mut expr, maybe_alias) => { - bind_column_references(expr, &plan.referenced_tables)?; + bind_column_references(expr, &plan.table_references)?; match expr { ast::Expr::FunctionCall { name, @@ -141,7 +140,7 @@ pub fn prepare_select_plan( name: get_name( maybe_alias.as_ref(), expr, - &plan.referenced_tables, + &plan.table_references, || format!("expr_{}", result_column_idx), ), expr: expr.clone(), @@ -155,7 +154,7 @@ pub fn prepare_select_plan( name: get_name( maybe_alias.as_ref(), expr, - &plan.referenced_tables, + &plan.table_references, || format!("expr_{}", result_column_idx), ), expr: expr.clone(), @@ -174,7 +173,7 @@ pub fn prepare_select_plan( name: get_name( maybe_alias.as_ref(), expr, - &plan.referenced_tables, + &plan.table_references, || format!("expr_{}", result_column_idx), ), expr: expr.clone(), @@ -191,7 +190,7 @@ pub fn prepare_select_plan( name: get_name( maybe_alias.as_ref(), expr, - &plan.referenced_tables, + &plan.table_references, || format!("expr_{}", result_column_idx), ), expr: expr.clone(), @@ -225,7 +224,7 @@ pub fn prepare_select_plan( name: get_name( maybe_alias.as_ref(), expr, - &plan.referenced_tables, + &plan.table_references, || format!("expr_{}", result_column_idx), ), expr: expr.clone(), @@ -245,7 +244,7 @@ pub fn prepare_select_plan( name: get_name( maybe_alias.as_ref(), expr, - &plan.referenced_tables, + &plan.table_references, || format!("expr_{}", result_column_idx), ), expr: expr.clone(), @@ -258,7 +257,7 @@ pub fn prepare_select_plan( } if let Some(mut group_by) = group_by { for expr in group_by.exprs.iter_mut() { - bind_column_references(expr, &plan.referenced_tables)?; + bind_column_references(expr, &plan.table_references)?; } plan.group_by = Some(GroupBy { @@ -267,7 +266,7 @@ pub fn prepare_select_plan( let mut predicates = vec![]; break_predicate_at_and_boundaries(having, &mut predicates); for expr in predicates.iter_mut() { - bind_column_references(expr, &plan.referenced_tables)?; + bind_column_references(expr, &plan.table_references)?; let contains_aggregates = resolve_aggregates(expr, &mut aggregate_expressions); if !contains_aggregates { @@ -313,7 +312,7 @@ pub fn prepare_select_plan( o.expr }; - bind_column_references(&mut expr, &plan.referenced_tables)?; + bind_column_references(&mut expr, &plan.table_references)?; resolve_aggregates(&expr, &mut plan.aggregates); key.push(( From 09b6bad0af4ce3aa8ee4c204927c2df3c918d8bd Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sat, 1 Feb 2025 22:33:47 +0200 Subject: [PATCH 5/9] delete.rs: use new data structures when parsing delete --- core/translate/delete.rs | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/core/translate/delete.rs b/core/translate/delete.rs index 6bbe6270b..8741e1e62 100644 --- a/core/translate/delete.rs +++ b/core/translate/delete.rs @@ -1,13 +1,13 @@ use crate::schema::Table; use crate::translate::emitter::emit_program; use crate::translate::optimizer::optimize_plan; -use crate::translate::plan::{DeletePlan, Plan, SourceOperator}; +use crate::translate::plan::{DeletePlan, Operation, Plan}; use crate::translate::planner::{parse_limit, parse_where}; use crate::vdbe::builder::ProgramBuilder; use crate::{schema::Schema, Result, SymbolTable}; use sqlite3_parser::ast::{Expr, Limit, QualifiedName}; -use super::plan::{TableReference, TableReferenceType}; +use super::plan::TableReference; pub fn translate_delete( program: &mut ProgramBuilder, @@ -33,33 +33,28 @@ pub fn prepare_delete_plan( None => crate::bail_corrupt_error!("Parse error: no such table: {}", tbl_name), }; - let btree_table_ref = TableReference { + let table_references = vec![TableReference { table: Table::BTree(table.clone()), - table_identifier: table.name.clone(), - table_index: 0, - reference_type: TableReferenceType::BTreeTable, - }; - let referenced_tables = vec![btree_table_ref.clone()]; + identifier: table.name.clone(), + op: Operation::Scan { iter_dir: None }, + join_info: None, + }]; + + let mut where_predicates = vec![]; // Parse the WHERE clause - let resolved_where_clauses = parse_where(where_clause, &referenced_tables)?; + parse_where(where_clause, &table_references, &mut where_predicates)?; // Parse the LIMIT/OFFSET clause let (resolved_limit, resolved_offset) = limit.map_or(Ok((None, None)), |l| parse_limit(*l))?; let plan = DeletePlan { - source: SourceOperator::Scan { - id: 0, - table_reference: btree_table_ref, - predicates: resolved_where_clauses.clone(), - iter_dir: None, - }, + table_references, result_columns: vec![], - where_clause: resolved_where_clauses, + where_clause: where_predicates, order_by: None, limit: resolved_limit, offset: resolved_offset, - referenced_tables, available_indexes: vec![], contains_constant_false_condition: false, }; From 89fba9305a5793ebde9dcb89286a144bc91617e1 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sat, 1 Feb 2025 22:40:36 +0200 Subject: [PATCH 6/9] main_loop.rs: use iteration instead of recursion Now that we do not have a tree of SourceOperators but rather a Vec of TableReferences, we can just use loops instead of recursion for handling the main query loop. --- core/translate/main_loop.rs | 986 ++++++++++++++++-------------------- 1 file changed, 450 insertions(+), 536 deletions(-) diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 9450fcf55..9cff582d9 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -16,7 +16,8 @@ use super::{ expr::{translate_condition_expr, translate_expr, ConditionMetadata}, order_by::{order_by_sorter_insert, sorter_insert}, plan::{ - IterationDirection, Search, SelectPlan, SelectQueryType, SourceOperator, TableReference, + IterationDirection, JoinAwareConditionExpr, Operation, Search, SelectPlan, SelectQueryType, + TableReference, }, }; @@ -46,118 +47,48 @@ pub struct LoopLabels { pub fn init_loop( program: &mut ProgramBuilder, t_ctx: &mut TranslateCtx, - source: &SourceOperator, + tables: &[TableReference], mode: &OperationMode, ) -> Result<()> { - let operator_id = source.id(); - let loop_labels = LoopLabels { - next: program.allocate_label(), - loop_start: program.allocate_label(), - loop_end: program.allocate_label(), - }; - t_ctx.labels_main_loop.insert(operator_id, loop_labels); + for (table_index, table) in tables.iter().enumerate() { + let loop_labels = LoopLabels { + next: program.allocate_label(), + loop_start: program.allocate_label(), + loop_end: program.allocate_label(), + }; + t_ctx.labels_main_loop.push(loop_labels); - match source { - SourceOperator::Subquery { .. } => Ok(()), - SourceOperator::Join { - id, - left, - right, - outer, - .. - } => { - if *outer { + // Initialize bookkeeping for OUTER JOIN + if let Some(join_info) = table.join_info.as_ref() { + if join_info.outer { let lj_metadata = LeftJoinMetadata { reg_match_flag: program.alloc_register(), label_match_flag_set_true: program.allocate_label(), label_match_flag_check_value: program.allocate_label(), }; - t_ctx.meta_left_joins.insert(*id, lj_metadata); + t_ctx.meta_left_joins.insert(table_index, lj_metadata); } - init_loop(program, t_ctx, left, mode)?; - init_loop(program, t_ctx, right, mode)?; - - Ok(()) } - SourceOperator::Scan { - table_reference, .. - } => { - let cursor_id = program.alloc_cursor_id( - Some(table_reference.table_identifier.clone()), - CursorType::BTreeTable(table_reference.btree().unwrap().clone()), - ); - let root_page = table_reference.table.get_root_page(); - - match mode { - OperationMode::SELECT => { - program.emit_insn(Insn::OpenReadAsync { - cursor_id, - root_page, - }); - program.emit_insn(Insn::OpenReadAwait {}); - } - OperationMode::DELETE => { - program.emit_insn(Insn::OpenWriteAsync { - cursor_id, - root_page, - }); - program.emit_insn(Insn::OpenWriteAwait {}); - } - _ => { - unimplemented!() - } - } - - Ok(()) - } - SourceOperator::Search { - table_reference, - search, - .. - } => { - let table_cursor_id = program.alloc_cursor_id( - Some(table_reference.table_identifier.clone()), - CursorType::BTreeTable(table_reference.btree().unwrap().clone()), - ); - - match mode { - OperationMode::SELECT => { - program.emit_insn(Insn::OpenReadAsync { - cursor_id: table_cursor_id, - root_page: table_reference.table.get_root_page(), - }); - program.emit_insn(Insn::OpenReadAwait {}); - } - OperationMode::DELETE => { - program.emit_insn(Insn::OpenWriteAsync { - cursor_id: table_cursor_id, - root_page: table_reference.table.get_root_page(), - }); - program.emit_insn(Insn::OpenWriteAwait {}); - } - _ => { - unimplemented!() - } - } - - if let Search::IndexSearch { index, .. } = search { - let index_cursor_id = program.alloc_cursor_id( - Some(index.name.clone()), - CursorType::BTreeIndex(index.clone()), + match &table.op { + Operation::Scan { .. } => { + let cursor_id = program.alloc_cursor_id( + Some(table.identifier.clone()), + CursorType::BTreeTable(table.btree().unwrap().clone()), ); + let root_page = table.table.get_root_page(); match mode { OperationMode::SELECT => { program.emit_insn(Insn::OpenReadAsync { - cursor_id: index_cursor_id, - root_page: index.root_page, + cursor_id, + root_page, }); - program.emit_insn(Insn::OpenReadAwait); + program.emit_insn(Insn::OpenReadAwait {}); } OperationMode::DELETE => { program.emit_insn(Insn::OpenWriteAsync { - cursor_id: index_cursor_id, - root_page: index.root_page, + cursor_id, + root_page, }); program.emit_insn(Insn::OpenWriteAwait {}); } @@ -166,11 +97,64 @@ pub fn init_loop( } } } + Operation::Search(search) => { + let table_cursor_id = program.alloc_cursor_id( + Some(table.identifier.clone()), + CursorType::BTreeTable(table.btree().unwrap().clone()), + ); - Ok(()) + match mode { + OperationMode::SELECT => { + program.emit_insn(Insn::OpenReadAsync { + cursor_id: table_cursor_id, + root_page: table.table.get_root_page(), + }); + program.emit_insn(Insn::OpenReadAwait {}); + } + OperationMode::DELETE => { + program.emit_insn(Insn::OpenWriteAsync { + cursor_id: table_cursor_id, + root_page: table.table.get_root_page(), + }); + program.emit_insn(Insn::OpenWriteAwait {}); + } + _ => { + unimplemented!() + } + } + + if let Search::IndexSearch { index, .. } = search { + let index_cursor_id = program.alloc_cursor_id( + Some(index.name.clone()), + CursorType::BTreeIndex(index.clone()), + ); + + match mode { + OperationMode::SELECT => { + program.emit_insn(Insn::OpenReadAsync { + cursor_id: index_cursor_id, + root_page: index.root_page, + }); + program.emit_insn(Insn::OpenReadAwait); + } + OperationMode::DELETE => { + program.emit_insn(Insn::OpenWriteAsync { + cursor_id: index_cursor_id, + root_page: index.root_page, + }); + program.emit_insn(Insn::OpenWriteAwait {}); + } + _ => { + unimplemented!() + } + } + } + } + _ => {} } - SourceOperator::Nothing { .. } => Ok(()), } + + Ok(()) } /// Set up the main query execution loop @@ -179,52 +163,64 @@ pub fn init_loop( pub fn open_loop( program: &mut ProgramBuilder, t_ctx: &mut TranslateCtx, - source: &mut SourceOperator, - referenced_tables: &[TableReference], + tables: &[TableReference], + predicates: &[JoinAwareConditionExpr], ) -> Result<()> { - match source { - SourceOperator::Subquery { - id, - predicates, - plan, - .. - } => { - let (yield_reg, coroutine_implementation_start) = match &plan.query_type { - SelectQueryType::Subquery { - yield_reg, - coroutine_implementation_start, - } => (*yield_reg, *coroutine_implementation_start), - _ => unreachable!("Subquery operator with non-subquery query type"), - }; - // In case the subquery is an inner loop, it needs to be reinitialized on each iteration of the outer loop. - program.emit_insn(Insn::InitCoroutine { - yield_reg, - jump_on_definition: BranchOffset::Offset(0), - start_offset: coroutine_implementation_start, - }); - let LoopLabels { - loop_start, - loop_end, - next, - } = *t_ctx - .labels_main_loop - .get(id) - .expect("subquery has no loop labels"); - program.resolve_label(loop_start, program.offset()); - // A subquery within the main loop of a parent query has no cursor, so instead of advancing the cursor, - // it emits a Yield which jumps back to the main loop of the subquery itself to retrieve the next row. - // When the subquery coroutine completes, this instruction jumps to the label at the top of the termination_label_stack, - // which in this case is the end of the Yield-Goto loop in the parent query. - program.emit_insn(Insn::Yield { - yield_reg, - end_offset: loop_end, - }); + for (table_index, table) in tables.iter().enumerate() { + let LoopLabels { + loop_start, + loop_end, + next, + } = *t_ctx + .labels_main_loop + .get(table_index) + .expect("table has no loop labels"); - // These are predicates evaluated outside of the subquery, - // so they are translated here. - // E.g. SELECT foo FROM (SELECT bar as foo FROM t1) sub WHERE sub.foo > 10 - if let Some(preds) = predicates { - for expr in preds { + // Each OUTER JOIN has a "match flag" that is initially set to false, + // and is set to true when a match is found for the OUTER JOIN. + // This is used to determine whether to emit actual columns or NULLs for the columns of the right table. + if let Some(join_info) = table.join_info.as_ref() { + if join_info.outer { + let lj_meta = t_ctx.meta_left_joins.get(&table_index).unwrap(); + program.emit_insn(Insn::Integer { + value: 0, + dest: lj_meta.reg_match_flag, + }); + } + } + + match &table.op { + Operation::Subquery { plan, .. } => { + let (yield_reg, coroutine_implementation_start) = match &plan.query_type { + SelectQueryType::Subquery { + yield_reg, + coroutine_implementation_start, + } => (*yield_reg, *coroutine_implementation_start), + _ => unreachable!("Subquery operator with non-subquery query type"), + }; + // In case the subquery is an inner loop, it needs to be reinitialized on each iteration of the outer loop. + program.emit_insn(Insn::InitCoroutine { + yield_reg, + jump_on_definition: BranchOffset::Offset(0), + start_offset: coroutine_implementation_start, + }); + program.resolve_label(loop_start, program.offset()); + // A subquery within the main loop of a parent query has no cursor, so instead of advancing the cursor, + // it emits a Yield which jumps back to the main loop of the subquery itself to retrieve the next row. + // When the subquery coroutine completes, this instruction jumps to the label at the top of the termination_label_stack, + // which in this case is the end of the Yield-Goto loop in the parent query. + program.emit_insn(Insn::Yield { + yield_reg, + end_offset: loop_end, + }); + + // These are predicates evaluated outside of the subquery, + // so they are translated here. + // E.g. SELECT foo FROM (SELECT bar as foo FROM t1) sub WHERE sub.foo > 10 + for cond in predicates + .iter() + .filter(|cond| cond.eval_at_loop == table_index) + { let jump_target_when_true = program.allocate_label(); let condition_metadata = ConditionMetadata { jump_if_condition_is_true: false, @@ -233,325 +229,253 @@ pub fn open_loop( }; translate_condition_expr( program, - referenced_tables, - expr, + tables, + &cond.expr, condition_metadata, &t_ctx.resolver, )?; program.resolve_label(jump_target_when_true, program.offset()); } } + Operation::Scan { iter_dir } => { + let cursor_id = program.resolve_cursor_id(&table.identifier); + if iter_dir + .as_ref() + .is_some_and(|dir| *dir == IterationDirection::Backwards) + { + program.emit_insn(Insn::LastAsync { cursor_id }); + } else { + program.emit_insn(Insn::RewindAsync { cursor_id }); + } + program.emit_insn( + if iter_dir + .as_ref() + .is_some_and(|dir| *dir == IterationDirection::Backwards) + { + Insn::LastAwait { + cursor_id, + pc_if_empty: loop_end, + } + } else { + Insn::RewindAwait { + cursor_id, + pc_if_empty: loop_end, + } + }, + ); + program.resolve_label(loop_start, program.offset()); - Ok(()) - } - SourceOperator::Join { - id, - left, - right, - predicates, - outer, - .. - } => { - open_loop(program, t_ctx, left, referenced_tables)?; - - let LoopLabels { next, .. } = *t_ctx - .labels_main_loop - .get(&right.id()) - .expect("right side of join has no loop labels"); - - let mut jump_target_when_false = next; - - if *outer { - let lj_meta = t_ctx.meta_left_joins.get(id).unwrap(); - program.emit_insn(Insn::Integer { - value: 0, - dest: lj_meta.reg_match_flag, - }); - jump_target_when_false = lj_meta.label_match_flag_check_value; - } - - open_loop(program, t_ctx, right, referenced_tables)?; - - if let Some(predicates) = predicates { - let jump_target_when_true = program.allocate_label(); - let condition_metadata = ConditionMetadata { - jump_if_condition_is_true: false, - jump_target_when_true, - jump_target_when_false, - }; - for predicate in predicates.iter() { + for cond in predicates + .iter() + .filter(|cond| cond.eval_at_loop == table_index) + { + let jump_target_when_true = program.allocate_label(); + let condition_metadata = ConditionMetadata { + jump_if_condition_is_true: false, + jump_target_when_true, + jump_target_when_false: next, + }; translate_condition_expr( program, - referenced_tables, - predicate, + tables, + &cond.expr, condition_metadata, &t_ctx.resolver, )?; + program.resolve_label(jump_target_when_true, program.offset()); } - program.resolve_label(jump_target_when_true, program.offset()); } + Operation::Search(search) => { + let table_cursor_id = program.resolve_cursor_id(&table.identifier); + // Open the loop for the index search. + // Rowid equality point lookups are handled with a SeekRowid instruction which does not loop, since it is a single row lookup. + if !matches!(search, Search::RowidEq { .. }) { + let index_cursor_id = if let Search::IndexSearch { index, .. } = search { + Some(program.resolve_cursor_id(&index.name)) + } else { + None + }; + let cmp_reg = program.alloc_register(); + let (cmp_expr, cmp_op) = match search { + Search::IndexSearch { + cmp_expr, cmp_op, .. + } => (cmp_expr, cmp_op), + Search::RowidSearch { cmp_expr, cmp_op } => (cmp_expr, cmp_op), + Search::RowidEq { .. } => unreachable!(), + }; - if *outer { - let lj_meta = t_ctx.meta_left_joins.get(id).unwrap(); + // TODO this only handles ascending indexes + match cmp_op { + ast::Operator::Equals + | ast::Operator::Greater + | ast::Operator::GreaterEquals => { + translate_expr( + program, + Some(tables), + &cmp_expr.expr, + cmp_reg, + &t_ctx.resolver, + )?; + } + ast::Operator::Less | ast::Operator::LessEquals => { + program.emit_insn(Insn::Null { + dest: cmp_reg, + dest_end: None, + }); + } + _ => unreachable!(), + } + // If we try to seek to a key that is not present in the table/index, we exit the loop entirely. + program.emit_insn(match cmp_op { + ast::Operator::Equals | ast::Operator::GreaterEquals => Insn::SeekGE { + is_index: index_cursor_id.is_some(), + cursor_id: index_cursor_id.unwrap_or(table_cursor_id), + start_reg: cmp_reg, + num_regs: 1, + target_pc: loop_end, + }, + ast::Operator::Greater + | ast::Operator::Less + | ast::Operator::LessEquals => Insn::SeekGT { + is_index: index_cursor_id.is_some(), + cursor_id: index_cursor_id.unwrap_or(table_cursor_id), + start_reg: cmp_reg, + num_regs: 1, + target_pc: loop_end, + }, + _ => unreachable!(), + }); + if *cmp_op == ast::Operator::Less || *cmp_op == ast::Operator::LessEquals { + translate_expr( + program, + Some(tables), + &cmp_expr.expr, + cmp_reg, + &t_ctx.resolver, + )?; + } + + program.resolve_label(loop_start, program.offset()); + // TODO: We are currently only handling ascending indexes. + // For conditions like index_key > 10, we have already seeked to the first key greater than 10, and can just scan forward. + // For conditions like index_key < 10, we are at the beginning of the index, and will scan forward and emit IdxGE(10) with a conditional jump to the end. + // For conditions like index_key = 10, we have already seeked to the first key greater than or equal to 10, and can just scan forward and emit IdxGT(10) with a conditional jump to the end. + // For conditions like index_key >= 10, we have already seeked to the first key greater than or equal to 10, and can just scan forward. + // For conditions like index_key <= 10, we are at the beginning of the index, and will scan forward and emit IdxGT(10) with a conditional jump to the end. + // For conditions like index_key != 10, TODO. probably the optimal way is not to use an index at all. + // + // For primary key searches we emit RowId and then compare it to the seek value. + + match cmp_op { + ast::Operator::Equals | ast::Operator::LessEquals => { + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn(Insn::IdxGT { + cursor_id: index_cursor_id, + start_reg: cmp_reg, + num_regs: 1, + target_pc: loop_end, + }); + } else { + let rowid_reg = program.alloc_register(); + program.emit_insn(Insn::RowId { + cursor_id: table_cursor_id, + dest: rowid_reg, + }); + program.emit_insn(Insn::Gt { + lhs: rowid_reg, + rhs: cmp_reg, + target_pc: loop_end, + flags: CmpInsFlags::default(), + }); + } + } + ast::Operator::Less => { + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn(Insn::IdxGE { + cursor_id: index_cursor_id, + start_reg: cmp_reg, + num_regs: 1, + target_pc: loop_end, + }); + } else { + let rowid_reg = program.alloc_register(); + program.emit_insn(Insn::RowId { + cursor_id: table_cursor_id, + dest: rowid_reg, + }); + program.emit_insn(Insn::Ge { + lhs: rowid_reg, + rhs: cmp_reg, + target_pc: loop_end, + flags: CmpInsFlags::default(), + }); + } + } + _ => {} + } + + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn(Insn::DeferredSeek { + index_cursor_id, + table_cursor_id, + }); + } + } + + if let Search::RowidEq { cmp_expr } = search { + let src_reg = program.alloc_register(); + translate_expr( + program, + Some(tables), + &cmp_expr.expr, + src_reg, + &t_ctx.resolver, + )?; + program.emit_insn(Insn::SeekRowid { + cursor_id: table_cursor_id, + src_reg, + target_pc: next, + }); + } + for cond in predicates + .iter() + .filter(|cond| cond.eval_at_loop == table_index) + { + let jump_target_when_true = program.allocate_label(); + let condition_metadata = ConditionMetadata { + jump_if_condition_is_true: false, + jump_target_when_true, + jump_target_when_false: next, + }; + translate_condition_expr( + program, + tables, + &cond.expr, + condition_metadata, + &t_ctx.resolver, + )?; + program.resolve_label(jump_target_when_true, program.offset()); + } + } + } + + // Set the match flag to true if this is a LEFT JOIN. + // At this point of execution we are going to emit columns for the left table, + // and either emit columns or NULLs for the right table, depending on whether the null_flag is set + // for the right table's cursor. + if let Some(join_info) = table.join_info.as_ref() { + if join_info.outer { + let lj_meta = t_ctx.meta_left_joins.get(&table_index).unwrap(); program.resolve_label(lj_meta.label_match_flag_set_true, program.offset()); program.emit_insn(Insn::Integer { value: 1, dest: lj_meta.reg_match_flag, }); } - - Ok(()) } - SourceOperator::Scan { - id, - table_reference, - predicates, - iter_dir, - } => { - let cursor_id = program.resolve_cursor_id(&table_reference.table_identifier); - if iter_dir - .as_ref() - .is_some_and(|dir| *dir == IterationDirection::Backwards) - { - program.emit_insn(Insn::LastAsync { cursor_id }); - } else { - program.emit_insn(Insn::RewindAsync { cursor_id }); - } - let LoopLabels { - loop_start, - loop_end, - next, - } = *t_ctx - .labels_main_loop - .get(id) - .expect("scan has no loop labels"); - program.emit_insn( - if iter_dir - .as_ref() - .is_some_and(|dir| *dir == IterationDirection::Backwards) - { - Insn::LastAwait { - cursor_id, - pc_if_empty: loop_end, - } - } else { - Insn::RewindAwait { - cursor_id, - pc_if_empty: loop_end, - } - }, - ); - program.resolve_label(loop_start, program.offset()); - - if let Some(preds) = predicates { - for expr in preds { - let jump_target_when_true = program.allocate_label(); - let condition_metadata = ConditionMetadata { - jump_if_condition_is_true: false, - jump_target_when_true, - jump_target_when_false: next, - }; - translate_condition_expr( - program, - referenced_tables, - expr, - condition_metadata, - &t_ctx.resolver, - )?; - program.resolve_label(jump_target_when_true, program.offset()); - } - } - - Ok(()) - } - SourceOperator::Search { - id, - table_reference, - search, - predicates, - .. - } => { - let table_cursor_id = program.resolve_cursor_id(&table_reference.table_identifier); - let LoopLabels { - loop_start, - loop_end, - next, - } = *t_ctx - .labels_main_loop - .get(id) - .expect("search has no loop labels"); - // Open the loop for the index search. - // Rowid equality point lookups are handled with a SeekRowid instruction which does not loop, since it is a single row lookup. - if !matches!(search, Search::RowidEq { .. }) { - let index_cursor_id = if let Search::IndexSearch { index, .. } = search { - Some(program.resolve_cursor_id(&index.name)) - } else { - None - }; - let cmp_reg = program.alloc_register(); - let (cmp_expr, cmp_op) = match search { - Search::IndexSearch { - cmp_expr, cmp_op, .. - } => (cmp_expr, cmp_op), - Search::RowidSearch { cmp_expr, cmp_op } => (cmp_expr, cmp_op), - Search::RowidEq { .. } => unreachable!(), - }; - // TODO this only handles ascending indexes - match cmp_op { - ast::Operator::Equals - | ast::Operator::Greater - | ast::Operator::GreaterEquals => { - translate_expr( - program, - Some(referenced_tables), - cmp_expr, - cmp_reg, - &t_ctx.resolver, - )?; - } - ast::Operator::Less | ast::Operator::LessEquals => { - program.emit_insn(Insn::Null { - dest: cmp_reg, - dest_end: None, - }); - } - _ => unreachable!(), - } - // If we try to seek to a key that is not present in the table/index, we exit the loop entirely. - program.emit_insn(match cmp_op { - ast::Operator::Equals | ast::Operator::GreaterEquals => Insn::SeekGE { - is_index: index_cursor_id.is_some(), - cursor_id: index_cursor_id.unwrap_or(table_cursor_id), - start_reg: cmp_reg, - num_regs: 1, - target_pc: loop_end, - }, - ast::Operator::Greater | ast::Operator::Less | ast::Operator::LessEquals => { - Insn::SeekGT { - is_index: index_cursor_id.is_some(), - cursor_id: index_cursor_id.unwrap_or(table_cursor_id), - start_reg: cmp_reg, - num_regs: 1, - target_pc: loop_end, - } - } - _ => unreachable!(), - }); - if *cmp_op == ast::Operator::Less || *cmp_op == ast::Operator::LessEquals { - translate_expr( - program, - Some(referenced_tables), - cmp_expr, - cmp_reg, - &t_ctx.resolver, - )?; - } - - program.resolve_label(loop_start, program.offset()); - // TODO: We are currently only handling ascending indexes. - // For conditions like index_key > 10, we have already seeked to the first key greater than 10, and can just scan forward. - // For conditions like index_key < 10, we are at the beginning of the index, and will scan forward and emit IdxGE(10) with a conditional jump to the end. - // For conditions like index_key = 10, we have already seeked to the first key greater than or equal to 10, and can just scan forward and emit IdxGT(10) with a conditional jump to the end. - // For conditions like index_key >= 10, we have already seeked to the first key greater than or equal to 10, and can just scan forward. - // For conditions like index_key <= 10, we are at the beginning of the index, and will scan forward and emit IdxGT(10) with a conditional jump to the end. - // For conditions like index_key != 10, TODO. probably the optimal way is not to use an index at all. - // - // For primary key searches we emit RowId and then compare it to the seek value. - - match cmp_op { - ast::Operator::Equals | ast::Operator::LessEquals => { - if let Some(index_cursor_id) = index_cursor_id { - program.emit_insn(Insn::IdxGT { - cursor_id: index_cursor_id, - start_reg: cmp_reg, - num_regs: 1, - target_pc: loop_end, - }); - } else { - let rowid_reg = program.alloc_register(); - program.emit_insn(Insn::RowId { - cursor_id: table_cursor_id, - dest: rowid_reg, - }); - program.emit_insn(Insn::Gt { - lhs: rowid_reg, - rhs: cmp_reg, - target_pc: loop_end, - flags: CmpInsFlags::default(), - }); - } - } - ast::Operator::Less => { - if let Some(index_cursor_id) = index_cursor_id { - program.emit_insn(Insn::IdxGE { - cursor_id: index_cursor_id, - start_reg: cmp_reg, - num_regs: 1, - target_pc: loop_end, - }); - } else { - let rowid_reg = program.alloc_register(); - program.emit_insn(Insn::RowId { - cursor_id: table_cursor_id, - dest: rowid_reg, - }); - program.emit_insn(Insn::Ge { - lhs: rowid_reg, - rhs: cmp_reg, - target_pc: loop_end, - flags: CmpInsFlags::default(), - }); - } - } - _ => {} - } - - if let Some(index_cursor_id) = index_cursor_id { - program.emit_insn(Insn::DeferredSeek { - index_cursor_id, - table_cursor_id, - }); - } - } - - if let Search::RowidEq { cmp_expr } = search { - let src_reg = program.alloc_register(); - translate_expr( - program, - Some(referenced_tables), - cmp_expr, - src_reg, - &t_ctx.resolver, - )?; - program.emit_insn(Insn::SeekRowid { - cursor_id: table_cursor_id, - src_reg, - target_pc: next, - }); - } - if let Some(predicates) = predicates { - for predicate in predicates.iter() { - let jump_target_when_true = program.allocate_label(); - let condition_metadata = ConditionMetadata { - jump_if_condition_is_true: false, - jump_target_when_true, - jump_target_when_false: next, - }; - translate_condition_expr( - program, - referenced_tables, - predicate, - condition_metadata, - &t_ctx.resolver, - )?; - program.resolve_label(jump_target_when_true, program.offset()); - } - } - - Ok(()) - } - SourceOperator::Nothing { .. } => Ok(()), } + + Ok(()) } /// SQLite (and so Limbo) processes joins as a nested loop. @@ -620,7 +544,7 @@ fn emit_loop_source( cur_reg += 1; translate_expr( program, - Some(&plan.referenced_tables), + Some(&plan.table_references), expr, key_reg, &t_ctx.resolver, @@ -639,7 +563,7 @@ fn emit_loop_source( cur_reg += 1; translate_expr( program, - Some(&plan.referenced_tables), + Some(&plan.table_references), expr, agg_reg, &t_ctx.resolver, @@ -676,7 +600,7 @@ fn emit_loop_source( let reg = start_reg + i; translate_aggregation_step( program, - &plan.referenced_tables, + &plan.table_references, agg, reg, &t_ctx.resolver, @@ -692,7 +616,7 @@ fn emit_loop_source( let reg = start_reg + num_aggs + i; translate_expr( program, - Some(&plan.referenced_tables), + Some(&plan.table_references), &rc.expr, reg, &t_ctx.resolver, @@ -705,16 +629,17 @@ fn emit_loop_source( plan.aggregates.is_empty(), "We should not get here with aggregates" ); - let loop_labels = *t_ctx + let offset_jump_to = t_ctx .labels_main_loop - .get(&plan.source.id()) - .expect("source has no loop labels"); + .get(0) + .map(|l| l.next) + .or_else(|| t_ctx.label_main_loop_end); emit_select_result( program, t_ctx, plan, t_ctx.label_main_loop_end, - Some(loop_labels.next), + offset_jump_to, )?; Ok(()) @@ -728,33 +653,85 @@ fn emit_loop_source( pub fn close_loop( program: &mut ProgramBuilder, t_ctx: &mut TranslateCtx, - source: &SourceOperator, + tables: &[TableReference], ) -> Result<()> { - let loop_labels = *t_ctx - .labels_main_loop - .get(&source.id()) - .expect("source has no loop labels"); - match source { - SourceOperator::Subquery { .. } => { - program.resolve_label(loop_labels.next, program.offset()); - // A subquery has no cursor to call NextAsync on, so it just emits a Goto - // to the Yield instruction, which in turn jumps back to the main loop of the subquery, - // so that the next row from the subquery can be read. - program.emit_insn(Insn::Goto { - target_pc: loop_labels.loop_start, - }); - } - SourceOperator::Join { - id, - left, - right, - outer, - .. - } => { - close_loop(program, t_ctx, right)?; + // We close the loops for all tables in reverse order, i.e. innermost first. + // OPEN t1 + // OPEN t2 + // OPEN t3 + // + // CLOSE t3 + // CLOSE t2 + // CLOSE t1 + for (idx, table) in tables.iter().rev().enumerate() { + let table_index = tables.len() - idx - 1; + let loop_labels = *t_ctx + .labels_main_loop + .get(table_index) + .expect("source has no loop labels"); - if *outer { - let lj_meta = t_ctx.meta_left_joins.get(id).unwrap(); + match &table.op { + Operation::Subquery { .. } => { + program.resolve_label(loop_labels.next, program.offset()); + // A subquery has no cursor to call NextAsync on, so it just emits a Goto + // to the Yield instruction, which in turn jumps back to the main loop of the subquery, + // so that the next row from the subquery can be read. + program.emit_insn(Insn::Goto { + target_pc: loop_labels.loop_start, + }); + } + Operation::Scan { iter_dir, .. } => { + program.resolve_label(loop_labels.next, program.offset()); + let cursor_id = program.resolve_cursor_id(&table.identifier); + if iter_dir + .as_ref() + .is_some_and(|dir| *dir == IterationDirection::Backwards) + { + program.emit_insn(Insn::PrevAsync { cursor_id }); + } else { + program.emit_insn(Insn::NextAsync { cursor_id }); + } + if iter_dir + .as_ref() + .is_some_and(|dir| *dir == IterationDirection::Backwards) + { + program.emit_insn(Insn::PrevAwait { + cursor_id, + pc_if_next: loop_labels.loop_start, + }); + } else { + program.emit_insn(Insn::NextAwait { + cursor_id, + pc_if_next: loop_labels.loop_start, + }); + } + } + Operation::Search(search) => { + program.resolve_label(loop_labels.next, program.offset()); + // Rowid equality point lookups are handled with a SeekRowid instruction which does not loop, so there is no need to emit a NextAsync instruction. + if !matches!(search, Search::RowidEq { .. }) { + let cursor_id = match search { + Search::IndexSearch { index, .. } => program.resolve_cursor_id(&index.name), + Search::RowidSearch { .. } => program.resolve_cursor_id(&table.identifier), + Search::RowidEq { .. } => unreachable!(), + }; + + program.emit_insn(Insn::NextAsync { cursor_id }); + program.emit_insn(Insn::NextAwait { + cursor_id, + pc_if_next: loop_labels.loop_start, + }); + } + } + } + + program.resolve_label(loop_labels.loop_end, program.offset()); + + // Handle OUTER JOIN logic. The reason this comes after the "loop end" mark is that we may need to still jump back + // and emit a row with NULLs for the right table, and then jump back to the next row of the left table. + if let Some(join_info) = table.join_info.as_ref() { + if join_info.outer { + let lj_meta = t_ctx.meta_left_joins.get(&table_index).unwrap(); // The left join match flag is set to 1 when there is any match on the right table // (e.g. SELECT * FROM t1 LEFT JOIN t2 ON t1.a = t2.a). // If the left join match flag has been set to 1, we jump to the next row on the outer table, @@ -770,13 +747,9 @@ pub fn close_loop( // but since it's a LEFT JOIN, we still need to emit a row with NULLs for the right table. // In that case, we now enter the routine that does exactly that. // First we set the right table cursor's "pseudo null bit" on, which means any Insn::Column will return NULL - let right_cursor_id = match right.as_ref() { - SourceOperator::Scan { - table_reference, .. - } => program.resolve_cursor_id(&table_reference.table_identifier), - SourceOperator::Search { - table_reference, .. - } => program.resolve_cursor_id(&table_reference.table_identifier), + let right_cursor_id = match &table.op { + Operation::Scan { .. } => program.resolve_cursor_id(&table.identifier), + Operation::Search { .. } => program.resolve_cursor_id(&table.identifier), _ => unreachable!(), }; program.emit_insn(Insn::NullRow { @@ -794,66 +767,7 @@ pub fn close_loop( assert_eq!(program.offset(), jump_offset); } - - close_loop(program, t_ctx, left)?; } - SourceOperator::Scan { - table_reference, - iter_dir, - .. - } => { - program.resolve_label(loop_labels.next, program.offset()); - let cursor_id = program.resolve_cursor_id(&table_reference.table_identifier); - if iter_dir - .as_ref() - .is_some_and(|dir| *dir == IterationDirection::Backwards) - { - program.emit_insn(Insn::PrevAsync { cursor_id }); - } else { - program.emit_insn(Insn::NextAsync { cursor_id }); - } - if iter_dir - .as_ref() - .is_some_and(|dir| *dir == IterationDirection::Backwards) - { - program.emit_insn(Insn::PrevAwait { - cursor_id, - pc_if_next: loop_labels.loop_start, - }); - } else { - program.emit_insn(Insn::NextAwait { - cursor_id, - pc_if_next: loop_labels.loop_start, - }); - } - } - SourceOperator::Search { - table_reference, - search, - .. - } => { - program.resolve_label(loop_labels.next, program.offset()); - if matches!(search, Search::RowidEq { .. }) { - // Rowid equality point lookups are handled with a SeekRowid instruction which does not loop, so there is no need to emit a NextAsync instruction. - return Ok(()); - } - let cursor_id = match search { - Search::IndexSearch { index, .. } => program.resolve_cursor_id(&index.name), - Search::RowidSearch { .. } => { - program.resolve_cursor_id(&table_reference.table_identifier) - } - Search::RowidEq { .. } => unreachable!(), - }; - - program.emit_insn(Insn::NextAsync { cursor_id }); - program.emit_insn(Insn::NextAwait { - cursor_id, - pc_if_next: loop_labels.loop_start, - }); - } - SourceOperator::Nothing { .. } => {} - }; - - program.resolve_label(loop_labels.loop_end, program.offset()); + } Ok(()) } From 98439cd936de82f1937a57f7694fe4722e86beec Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sat, 1 Feb 2025 22:49:17 +0200 Subject: [PATCH 7/9] optimizer.rs: refactor to use new data structures and remove unnecessary stuff We don't need `push_predicates()` because that never REALLY was a predicate pushdown optimization -- it just pushed WHERE clause condition expressions into the correct SourceOperator nodes in the tree. Now that we don't have a SourceOperator tree anymore and we keep the conditions in the WHERE clause instead, we don't need to "push" anything anymore. Leaves room for ACTUAL predicate pushdown optimizations later :) We also don't need any weird bitmask stuff anymore, and perhaps we never did, to determine where conditions should be evaluated. --- core/translate/optimizer.rs | 721 ++++++++---------------------------- 1 file changed, 158 insertions(+), 563 deletions(-) diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index d6caba85e..6cddb8533 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -5,9 +5,8 @@ use sqlite3_parser::ast; use crate::{schema::Index, Result}; use super::plan::{ - get_table_ref_bitmask_for_ast_expr, get_table_ref_bitmask_for_operator, DeletePlan, Direction, - IterationDirection, Plan, Search, SelectPlan, SourceOperator, TableReference, - TableReferenceType, + DeletePlan, Direction, IterationDirection, JoinAwareConditionExpr, Operation, Plan, Search, + SelectPlan, TableReference, }; pub fn optimize_plan(plan: &mut Plan) -> Result<()> { @@ -23,33 +22,22 @@ pub fn optimize_plan(plan: &mut Plan) -> Result<()> { * but having them separate makes them easier to understand */ fn optimize_select_plan(plan: &mut SelectPlan) -> Result<()> { - optimize_subqueries(&mut plan.source)?; + optimize_subqueries(plan)?; rewrite_exprs_select(plan)?; if let ConstantConditionEliminationResult::ImpossibleCondition = - eliminate_constants(&mut plan.source, &mut plan.where_clause)? + eliminate_constant_conditions(&mut plan.where_clause)? { plan.contains_constant_false_condition = true; return Ok(()); } - push_predicates( - &mut plan.source, - &mut plan.where_clause, - &plan.referenced_tables, - )?; - use_indexes( - &mut plan.source, - &plan.referenced_tables, + &mut plan.table_references, &plan.available_indexes, + &mut plan.where_clause, )?; - eliminate_unnecessary_orderby( - &mut plan.source, - &mut plan.order_by, - &plan.referenced_tables, - &plan.available_indexes, - )?; + eliminate_unnecessary_orderby(plan)?; Ok(()) } @@ -57,83 +45,67 @@ fn optimize_select_plan(plan: &mut SelectPlan) -> Result<()> { fn optimize_delete_plan(plan: &mut DeletePlan) -> Result<()> { rewrite_exprs_delete(plan)?; if let ConstantConditionEliminationResult::ImpossibleCondition = - eliminate_constants(&mut plan.source, &mut plan.where_clause)? + eliminate_constant_conditions(&mut plan.where_clause)? { plan.contains_constant_false_condition = true; return Ok(()); } use_indexes( - &mut plan.source, - &plan.referenced_tables, + &mut plan.table_references, &plan.available_indexes, + &mut plan.where_clause, )?; Ok(()) } -fn optimize_subqueries(operator: &mut SourceOperator) -> Result<()> { - match operator { - SourceOperator::Subquery { plan, .. } => { +fn optimize_subqueries(plan: &mut SelectPlan) -> Result<()> { + for table in plan.table_references.iter_mut() { + if let Operation::Subquery { plan, .. } = &mut table.op { optimize_select_plan(&mut *plan)?; - Ok(()) } - SourceOperator::Join { left, right, .. } => { - optimize_subqueries(left)?; - optimize_subqueries(right)?; - Ok(()) - } - _ => Ok(()), } + + Ok(()) } -fn _operator_is_already_ordered_by( - operator: &mut SourceOperator, +fn query_is_already_ordered_by( + table_references: &[TableReference], key: &mut ast::Expr, - referenced_tables: &[TableReference], available_indexes: &Vec>, ) -> Result { - match operator { - SourceOperator::Scan { - table_reference, .. - } => Ok(key.is_rowid_alias_of(table_reference.table_index)), - SourceOperator::Search { - table_reference, - search, - .. - } => match search { - Search::RowidEq { .. } => Ok(key.is_rowid_alias_of(table_reference.table_index)), - Search::RowidSearch { .. } => Ok(key.is_rowid_alias_of(table_reference.table_index)), + let first_table = table_references.first(); + if first_table.is_none() { + return Ok(false); + } + let table_reference = first_table.unwrap(); + match &table_reference.op { + Operation::Scan { .. } => Ok(key.is_rowid_alias_of(0)), + Operation::Search(search) => match search { + Search::RowidEq { .. } => Ok(key.is_rowid_alias_of(0)), + Search::RowidSearch { .. } => Ok(key.is_rowid_alias_of(0)), Search::IndexSearch { index, .. } => { - let index_idx = key.check_index_scan( - table_reference.table_index, - referenced_tables, - available_indexes, - )?; + let index_idx = key.check_index_scan(0, &table_reference, available_indexes)?; let index_is_the_same = index_idx .map(|i| Rc::ptr_eq(&available_indexes[i], index)) .unwrap_or(false); Ok(index_is_the_same) } }, - SourceOperator::Join { left, .. } => { - _operator_is_already_ordered_by(left, key, referenced_tables, available_indexes) - } _ => Ok(false), } } -fn eliminate_unnecessary_orderby( - operator: &mut SourceOperator, - order_by: &mut Option>, - referenced_tables: &[TableReference], - available_indexes: &Vec>, -) -> Result<()> { - if order_by.is_none() { +fn eliminate_unnecessary_orderby(plan: &mut SelectPlan) -> Result<()> { + if plan.order_by.is_none() { + return Ok(()); + } + if plan.table_references.len() == 0 { return Ok(()); } - let o = order_by.as_mut().unwrap(); + let o = plan.order_by.as_mut().unwrap(); if o.len() != 1 { // TODO: handle multiple order by keys @@ -143,76 +115,55 @@ fn eliminate_unnecessary_orderby( let (key, direction) = o.first_mut().unwrap(); let already_ordered = - _operator_is_already_ordered_by(operator, key, referenced_tables, available_indexes)?; + query_is_already_ordered_by(&plan.table_references, key, &plan.available_indexes)?; if already_ordered { - push_scan_direction(operator, direction); - *order_by = None; + push_scan_direction(&mut plan.table_references[0], direction); + plan.order_by = None; } Ok(()) } /** - * Use indexes where possible + * Use indexes where possible. + * Right now we make decisions about using indexes ONLY based on condition expressions, not e.g. ORDER BY or others. + * This is just because we are WIP. + * + * When this function is called, condition expressions from both the actual WHERE clause and the JOIN clauses are in the where_clause vector. + * If we find a condition that can be used to index scan, we pop it off from the where_clause vector and put it into a Search operation. + * We put it there simply because it makes it a bit easier to track during translation. */ fn use_indexes( - operator: &mut SourceOperator, - referenced_tables: &[TableReference], - available_indexes: &[Rc], + table_references: &mut [TableReference], + available_indexes: &Vec>, + where_clause: &mut Vec, ) -> Result<()> { - match operator { - SourceOperator::Subquery { .. } => Ok(()), - SourceOperator::Search { .. } => Ok(()), - SourceOperator::Scan { - table_reference, - predicates: filter, - id, - .. - } => { - if filter.is_none() { - return Ok(()); - } + if where_clause.is_empty() { + return Ok(()); + } - let fs = filter.as_mut().unwrap(); - for i in 0..fs.len() { - let f = fs[i].take_ownership(); - let table_index = referenced_tables - .iter() - .position(|t| t.table_identifier == table_reference.table_identifier) - .unwrap(); - match try_extract_index_search_expression( - f, + 'outer: for (table_index, table_reference) in table_references.iter_mut().enumerate() { + if let Operation::Scan { .. } = &mut table_reference.op { + let mut i = 0; + while i < where_clause.len() { + let cond = where_clause.get_mut(i).unwrap(); + if let Some(index_search) = try_extract_index_search_expression( + cond, table_index, - referenced_tables, + &table_reference, available_indexes, )? { - Either::Left(non_index_using_expr) => { - fs[i] = non_index_using_expr; - } - Either::Right(index_search) => { - fs.remove(i); - *operator = SourceOperator::Search { - id: *id, - table_reference: table_reference.clone(), - predicates: Some(fs.clone()), - search: index_search, - }; - - return Ok(()); - } + where_clause.remove(i); + table_reference.op = Operation::Search(index_search); + continue 'outer; } + i += 1; } - - Ok(()) } - SourceOperator::Join { left, right, .. } => { - use_indexes(left, referenced_tables, available_indexes)?; - use_indexes(right, referenced_tables, available_indexes)?; - Ok(()) - } - SourceOperator::Nothing { .. } => Ok(()), } + + Ok(()) } #[derive(Debug, PartialEq, Clone)] @@ -221,377 +172,38 @@ enum ConstantConditionEliminationResult { ImpossibleCondition, } -// removes predicates that are always true -// returns a ConstantEliminationResult indicating whether any predicates are always false -fn eliminate_constants( - operator: &mut SourceOperator, - where_clause: &mut Option>, +/// Removes predicates that are always true. +/// Returns a ConstantEliminationResult indicating whether any predicates are always false. +/// This is used to determine whether the query can be aborted early. +fn eliminate_constant_conditions( + where_clause: &mut Vec, ) -> Result { - if let Some(predicates) = where_clause { - let mut i = 0; - while i < predicates.len() { - let predicate = &predicates[i]; - if predicate.is_always_true()? { - // true predicates can be removed since they don't affect the result - predicates.remove(i); - } else if predicate.is_always_false()? { - // any false predicate in a list of conjuncts (AND-ed predicates) will make the whole list false - predicates.truncate(0); - return Ok(ConstantConditionEliminationResult::ImpossibleCondition); - } else { + let mut i = 0; + while i < where_clause.len() { + let predicate = &where_clause[i]; + if predicate.expr.is_always_true()? { + // true predicates can be removed since they don't affect the result + where_clause.remove(i); + } else if predicate.expr.is_always_false()? { + // any false predicate in a list of conjuncts (AND-ed predicates) will make the whole list false, + // except an outer join condition, because that just results in NULLs, not skipping the whole loop + if predicate.from_outer_join { i += 1; - } - } - } - match operator { - SourceOperator::Subquery { .. } => Ok(ConstantConditionEliminationResult::Continue), - SourceOperator::Join { - left, - right, - predicates, - outer, - .. - } => { - if eliminate_constants(left, where_clause)? - == ConstantConditionEliminationResult::ImpossibleCondition - { - return Ok(ConstantConditionEliminationResult::ImpossibleCondition); - } - if eliminate_constants(right, where_clause)? - == ConstantConditionEliminationResult::ImpossibleCondition - && !*outer - { - return Ok(ConstantConditionEliminationResult::ImpossibleCondition); - } - - if predicates.is_none() { - return Ok(ConstantConditionEliminationResult::Continue); - } - - let predicates = predicates.as_mut().unwrap(); - - let mut i = 0; - while i < predicates.len() { - let predicate = &mut predicates[i]; - if predicate.is_always_true()? { - predicates.remove(i); - } else if predicate.is_always_false()? { - if !*outer { - predicates.truncate(0); - return Ok(ConstantConditionEliminationResult::ImpossibleCondition); - } - // in an outer join, we can't skip rows, so just replace all constant false predicates with 0 - // so we don't later have to evaluate anything more complex or special-case the identifiers true and false - // which are just aliases for 1 and 0 - *predicate = ast::Expr::Literal(ast::Literal::Numeric("0".to_string())); - i += 1; - } else { - i += 1; - } - } - - Ok(ConstantConditionEliminationResult::Continue) - } - SourceOperator::Scan { predicates, .. } => { - if let Some(ps) = predicates { - let mut i = 0; - while i < ps.len() { - let predicate = &ps[i]; - if predicate.is_always_true()? { - // true predicates can be removed since they don't affect the result - ps.remove(i); - } else if predicate.is_always_false()? { - // any false predicate in a list of conjuncts (AND-ed predicates) will make the whole list false - ps.truncate(0); - return Ok(ConstantConditionEliminationResult::ImpossibleCondition); - } else { - i += 1; - } - } - - if ps.is_empty() { - *predicates = None; - } - } - Ok(ConstantConditionEliminationResult::Continue) - } - SourceOperator::Search { predicates, .. } => { - if let Some(predicates) = predicates { - let mut i = 0; - while i < predicates.len() { - let predicate = &predicates[i]; - if predicate.is_always_true()? { - // true predicates can be removed since they don't affect the result - predicates.remove(i); - } else if predicate.is_always_false()? { - // any false predicate in a list of conjuncts (AND-ed predicates) will make the whole list false - predicates.truncate(0); - return Ok(ConstantConditionEliminationResult::ImpossibleCondition); - } else { - i += 1; - } - } - } - - Ok(ConstantConditionEliminationResult::Continue) - } - SourceOperator::Nothing { .. } => Ok(ConstantConditionEliminationResult::Continue), - } -} - -/** - Recursively pushes predicates down the tree, as far as possible. - Where a predicate is pushed determines at which loop level it will be evaluated. - For example, in SELECT * FROM t1 JOIN t2 JOIN t3 WHERE t1.a = t2.a AND t2.b = t3.b AND t1.c = 1 - the predicate t1.c = 1 can be pushed to t1 and will be evaluated in the first (outermost) loop, - the predicate t1.a = t2.a can be pushed to t2 and will be evaluated in the second loop - while t2.b = t3.b will be evaluated in the third loop. -*/ -fn push_predicates( - operator: &mut SourceOperator, - where_clause: &mut Option>, - referenced_tables: &Vec, -) -> Result<()> { - // First try to push down any predicates from the WHERE clause - if let Some(predicates) = where_clause { - let mut i = 0; - while i < predicates.len() { - // Take ownership of predicate to try pushing it down - let predicate = predicates[i].take_ownership(); - // If predicate was successfully pushed (None returned), remove it from WHERE - let Some(predicate) = push_predicate(operator, predicate, referenced_tables)? else { - predicates.remove(i); continue; - }; - predicates[i] = predicate; + } + where_clause.truncate(0); + return Ok(ConstantConditionEliminationResult::ImpossibleCondition); + } else { i += 1; } - // Clean up empty WHERE clause - if predicates.is_empty() { - *where_clause = None; - } } - match operator { - SourceOperator::Subquery { .. } => Ok(()), - SourceOperator::Join { - left, - right, - predicates, - outer, - .. - } => { - // Recursively push predicates down both sides of join - push_predicates(left, where_clause, referenced_tables)?; - push_predicates(right, where_clause, referenced_tables)?; - - if predicates.is_none() { - return Ok(()); - } - - let predicates = predicates.as_mut().unwrap(); - - let mut i = 0; - while i < predicates.len() { - let predicate_owned = predicates[i].take_ownership(); - - // For a join like SELECT * FROM left INNER JOIN right ON left.id = right.id AND left.name = 'foo' - // the predicate 'left.name = 'foo' can already be evaluated in the outer loop (left side of join) - // because the row can immediately be skipped if left.name != 'foo'. - // But for a LEFT JOIN, we can't do this since we need to ensure that all rows from the left table are included, - // even if there are no matching rows from the right table. This is why we can't push LEFT JOIN predicates to the left side. - let push_result = if *outer { - Some(predicate_owned) - } else { - push_predicate(left, predicate_owned, referenced_tables)? - }; - - // Try pushing to left side first (see comment above for reasoning) - let Some(predicate) = push_result else { - predicates.remove(i); - continue; - }; - - // Then try right side - let Some(predicate) = push_predicate(right, predicate, referenced_tables)? else { - predicates.remove(i); - continue; - }; - - // If neither side could take it, keep in join predicates (not sure if this actually happens in practice) - // this is effectively the same as pushing to the right side, so maybe it could be removed and assert here - // that we don't reach this code - predicates[i] = predicate; - i += 1; - } - - Ok(()) - } - // Base cases - nowhere else to push to - SourceOperator::Scan { .. } => Ok(()), - SourceOperator::Search { .. } => Ok(()), - SourceOperator::Nothing { .. } => Ok(()), - } + Ok(ConstantConditionEliminationResult::Continue) } -/** - Push a single predicate down the tree, as far as possible. - Returns Ok(None) if the predicate was pushed, otherwise returns itself as Ok(Some(predicate)) -*/ -fn push_predicate( - operator: &mut SourceOperator, - predicate: ast::Expr, - referenced_tables: &Vec, -) -> Result> { - match operator { - SourceOperator::Subquery { - predicates, - table_reference, - .. - } => { - // **TODO**: we are currently just evaluating the predicate after the subquery yields, - // and not trying to do anythign more sophisticated. - // E.g. literally: SELECT * FROM (SELECT * FROM t1) sub WHERE sub.col = 'foo' - // - // It is possible, and not overly difficult, to determine that we can also push the - // predicate into the subquery coroutine itself before it yields. The above query would - // effectively become: SELECT * FROM (SELECT * FROM t1 WHERE col = 'foo') sub - // - // This matters more in cases where the subquery builds some kind of sorter/index in memory - // (or on disk) and in those cases pushing the predicate down to the coroutine will make the - // subquery produce less intermediate data. In cases where no intermediate data structures are - // built, it doesn't matter. - // - // Moreover, in many cases the subquery can even be completely eliminated, e.g. the above original - // query would become: SELECT * FROM t1 WHERE col = 'foo' without the subquery. - // **END TODO** - - // Find position of this subquery in referenced_tables array - let subquery_index = referenced_tables - .iter() - .position(|t| { - t.table_identifier == table_reference.table_identifier - && matches!(t.reference_type, TableReferenceType::Subquery { .. }) - }) - .unwrap(); - - // Get bitmask showing which tables this predicate references - let predicate_bitmask = - get_table_ref_bitmask_for_ast_expr(referenced_tables, &predicate)?; - - // Each table has a bit position based on join order from left to right - // e.g. in SELECT * FROM t1 JOIN t2 JOIN t3 - // t1 is position 0 (001), t2 is position 1 (010), t3 is position 2 (100) - // To push a predicate to a given table, it can only reference that table and tables to its left - // Example: For table t2 at position 1 (bit 010): - // - Can push: 011 (t2 + t1), 001 (just t1), 010 (just t2) - // - Can't push: 110 (t2 + t3) - let next_table_on_the_right_in_join_bitmask = 1 << (subquery_index + 1); - if predicate_bitmask >= next_table_on_the_right_in_join_bitmask { - return Ok(Some(predicate)); - } - - if predicates.is_none() { - predicates.replace(vec![predicate]); - } else { - predicates.as_mut().unwrap().push(predicate); - } - - Ok(None) - } - SourceOperator::Scan { - predicates, - table_reference, - .. - } => { - // Find position of this table in referenced_tables array - let table_index = referenced_tables - .iter() - .position(|t| { - t.table_identifier == table_reference.table_identifier - && t.reference_type == TableReferenceType::BTreeTable - }) - .unwrap(); - - // Get bitmask showing which tables this predicate references - let predicate_bitmask = - get_table_ref_bitmask_for_ast_expr(referenced_tables, &predicate)?; - - // Each table has a bit position based on join order from left to right - // e.g. in SELECT * FROM t1 JOIN t2 JOIN t3 - // t1 is position 0 (001), t2 is position 1 (010), t3 is position 2 (100) - // To push a predicate to a given table, it can only reference that table and tables to its left - // Example: For table t2 at position 1 (bit 010): - // - Can push: 011 (t2 + t1), 001 (just t1), 010 (just t2) - // - Can't push: 110 (t2 + t3) - let next_table_on_the_right_in_join_bitmask = 1 << (table_index + 1); - if predicate_bitmask >= next_table_on_the_right_in_join_bitmask { - return Ok(Some(predicate)); - } - - // Add predicate to this table's filters - if predicates.is_none() { - predicates.replace(vec![predicate]); - } else { - predicates.as_mut().unwrap().push(predicate); - } - - Ok(None) - } - // Search nodes don't exist yet at this point; Scans are transformed to Search in use_indexes() - SourceOperator::Search { .. } => unreachable!(), - SourceOperator::Join { - left, - right, - predicates: join_on_preds, - outer, - .. - } => { - // Try pushing to left side first - let push_result_left = push_predicate(left, predicate, referenced_tables)?; - if push_result_left.is_none() { - return Ok(None); - } - // Then try right side - let push_result_right = - push_predicate(right, push_result_left.unwrap(), referenced_tables)?; - if push_result_right.is_none() { - return Ok(None); - } - - // For LEFT JOIN, predicates must stay at join level - if *outer { - return Ok(Some(push_result_right.unwrap())); - } - - let pred = push_result_right.unwrap(); - - // Get bitmasks for tables referenced in predicate and both sides of join - let table_refs_bitmask = get_table_ref_bitmask_for_ast_expr(referenced_tables, &pred)?; - let left_bitmask = get_table_ref_bitmask_for_operator(referenced_tables, left)?; - let right_bitmask = get_table_ref_bitmask_for_operator(referenced_tables, right)?; - - // If predicate doesn't reference tables from both sides, it can't be a join condition - if table_refs_bitmask & left_bitmask == 0 || table_refs_bitmask & right_bitmask == 0 { - return Ok(Some(pred)); - } - - // Add as join predicate since it references both sides - if join_on_preds.is_none() { - join_on_preds.replace(vec![pred]); - } else { - join_on_preds.as_mut().unwrap().push(pred); - } - - Ok(None) - } - SourceOperator::Nothing { .. } => Ok(Some(predicate)), - } -} - -fn push_scan_direction(operator: &mut SourceOperator, direction: &Direction) { - match operator { - SourceOperator::Scan { iter_dir, .. } => { +fn push_scan_direction(table: &mut TableReference, direction: &Direction) { + match &mut table.op { + Operation::Scan { iter_dir, .. } => { if iter_dir.is_none() { match direction { Direction::Ascending => *iter_dir = Some(IterationDirection::Forwards), @@ -599,22 +211,19 @@ fn push_scan_direction(operator: &mut SourceOperator, direction: &Direction) { } } } - _ => todo!(), + _ => {} } } fn rewrite_exprs_select(plan: &mut SelectPlan) -> Result<()> { - rewrite_source_operator_exprs(&mut plan.source)?; for rc in plan.result_columns.iter_mut() { rewrite_expr(&mut rc.expr)?; } for agg in plan.aggregates.iter_mut() { rewrite_expr(&mut agg.original_expr)?; } - if let Some(predicates) = &mut plan.where_clause { - for expr in predicates { - rewrite_expr(expr)?; - } + for cond in plan.where_clause.iter_mut() { + rewrite_expr(&mut cond.expr)?; } if let Some(group_by) = &mut plan.group_by { for expr in group_by.exprs.iter_mut() { @@ -631,57 +240,12 @@ fn rewrite_exprs_select(plan: &mut SelectPlan) -> Result<()> { } fn rewrite_exprs_delete(plan: &mut DeletePlan) -> Result<()> { - rewrite_source_operator_exprs(&mut plan.source)?; - if let Some(predicates) = &mut plan.where_clause { - for expr in predicates { - rewrite_expr(expr)?; - } + for cond in plan.where_clause.iter_mut() { + rewrite_expr(&mut cond.expr)?; } - Ok(()) } -fn rewrite_source_operator_exprs(operator: &mut SourceOperator) -> Result<()> { - match operator { - SourceOperator::Join { - left, - right, - predicates, - .. - } => { - rewrite_source_operator_exprs(left)?; - rewrite_source_operator_exprs(right)?; - - if let Some(predicates) = predicates { - for expr in predicates.iter_mut() { - rewrite_expr(expr)?; - } - } - - Ok(()) - } - SourceOperator::Scan { predicates, .. } | SourceOperator::Search { predicates, .. } => { - if let Some(predicates) = predicates { - for expr in predicates.iter_mut() { - rewrite_expr(expr)?; - } - } - - Ok(()) - } - SourceOperator::Subquery { predicates, .. } => { - if let Some(predicates) = predicates { - for expr in predicates.iter_mut() { - rewrite_expr(expr)?; - } - } - - Ok(()) - } - SourceOperator::Nothing { .. } => Ok(()), - } -} - #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum ConstantPredicate { AlwaysTrue, @@ -709,7 +273,7 @@ pub trait Optimizable { fn check_index_scan( &mut self, table_index: usize, - referenced_tables: &[TableReference], + table_reference: &TableReference, available_indexes: &[Rc], ) -> Result>; } @@ -728,7 +292,7 @@ impl Optimizable for ast::Expr { fn check_index_scan( &mut self, table_index: usize, - referenced_tables: &[TableReference], + table_reference: &TableReference, available_indexes: &[Rc], ) -> Result> { match self { @@ -737,9 +301,8 @@ impl Optimizable for ast::Expr { return Ok(None); } for (idx, index) in available_indexes.iter().enumerate() { - let table_ref = &referenced_tables[*table]; - if index.table_name == table_ref.table.get_name() { - let column = table_ref.table.get_column_at(*column); + if index.table_name == table_reference.table.get_name() { + let column = table_reference.table.get_column_at(*column); if index.columns.first().unwrap().name == column.name { return Ok(Some(idx)); } @@ -766,12 +329,12 @@ impl Optimizable for ast::Expr { return Ok(None); } let lhs_index = - lhs.check_index_scan(table_index, referenced_tables, available_indexes)?; + lhs.check_index_scan(table_index, &table_reference, available_indexes)?; if lhs_index.is_some() { return Ok(lhs_index); } let rhs_index = - rhs.check_index_scan(table_index, referenced_tables, available_indexes)?; + rhs.check_index_scan(table_index, &table_reference, available_indexes)?; if rhs_index.is_some() { // swap lhs and rhs let swapped_operator = match *op { @@ -911,31 +474,41 @@ impl Optimizable for ast::Expr { } } -pub enum Either { - Left(T), - Right(U), -} - pub fn try_extract_index_search_expression( - expr: ast::Expr, + cond: &mut JoinAwareConditionExpr, table_index: usize, - referenced_tables: &[TableReference], + table_reference: &TableReference, available_indexes: &[Rc], -) -> Result> { - match expr { - ast::Expr::Binary(mut lhs, operator, mut rhs) => { +) -> Result> { + if cond.eval_at_loop != table_index { + return Ok(None); + } + match &mut cond.expr { + ast::Expr::Binary(lhs, operator, rhs) => { if lhs.is_rowid_alias_of(table_index) { match operator { ast::Operator::Equals => { - return Ok(Either::Right(Search::RowidEq { cmp_expr: *rhs })); + let rhs_owned = rhs.take_ownership(); + return Ok(Some(Search::RowidEq { + cmp_expr: JoinAwareConditionExpr { + expr: rhs_owned, + from_outer_join: cond.from_outer_join, + eval_at_loop: cond.eval_at_loop, + }, + })); } ast::Operator::Greater | ast::Operator::GreaterEquals | ast::Operator::Less | ast::Operator::LessEquals => { - return Ok(Either::Right(Search::RowidSearch { - cmp_op: operator, - cmp_expr: *rhs, + let rhs_owned = rhs.take_ownership(); + return Ok(Some(Search::RowidSearch { + cmp_op: *operator, + cmp_expr: JoinAwareConditionExpr { + expr: rhs_owned, + from_outer_join: cond.from_outer_join, + eval_at_loop: cond.eval_at_loop, + }, })); } _ => {} @@ -945,15 +518,27 @@ pub fn try_extract_index_search_expression( if rhs.is_rowid_alias_of(table_index) { match operator { ast::Operator::Equals => { - return Ok(Either::Right(Search::RowidEq { cmp_expr: *lhs })); + let lhs_owned = lhs.take_ownership(); + return Ok(Some(Search::RowidEq { + cmp_expr: JoinAwareConditionExpr { + expr: lhs_owned, + from_outer_join: cond.from_outer_join, + eval_at_loop: cond.eval_at_loop, + }, + })); } ast::Operator::Greater | ast::Operator::GreaterEquals | ast::Operator::Less | ast::Operator::LessEquals => { - return Ok(Either::Right(Search::RowidSearch { - cmp_op: operator, - cmp_expr: *lhs, + let lhs_owned = lhs.take_ownership(); + return Ok(Some(Search::RowidSearch { + cmp_op: *operator, + cmp_expr: JoinAwareConditionExpr { + expr: lhs_owned, + from_outer_join: cond.from_outer_join, + eval_at_loop: cond.eval_at_loop, + }, })); } _ => {} @@ -961,7 +546,7 @@ pub fn try_extract_index_search_expression( } if let Some(index_index) = - lhs.check_index_scan(table_index, referenced_tables, available_indexes)? + lhs.check_index_scan(table_index, &table_reference, available_indexes)? { match operator { ast::Operator::Equals @@ -969,10 +554,15 @@ pub fn try_extract_index_search_expression( | ast::Operator::GreaterEquals | ast::Operator::Less | ast::Operator::LessEquals => { - return Ok(Either::Right(Search::IndexSearch { + let rhs_owned = rhs.take_ownership(); + return Ok(Some(Search::IndexSearch { index: available_indexes[index_index].clone(), - cmp_op: operator, - cmp_expr: *rhs, + cmp_op: *operator, + cmp_expr: JoinAwareConditionExpr { + expr: rhs_owned, + from_outer_join: cond.from_outer_join, + eval_at_loop: cond.eval_at_loop, + }, })); } _ => {} @@ -980,7 +570,7 @@ pub fn try_extract_index_search_expression( } if let Some(index_index) = - rhs.check_index_scan(table_index, referenced_tables, available_indexes)? + rhs.check_index_scan(table_index, &table_reference, available_indexes)? { match operator { ast::Operator::Equals @@ -988,19 +578,24 @@ pub fn try_extract_index_search_expression( | ast::Operator::GreaterEquals | ast::Operator::Less | ast::Operator::LessEquals => { - return Ok(Either::Right(Search::IndexSearch { + let lhs_owned = lhs.take_ownership(); + return Ok(Some(Search::IndexSearch { index: available_indexes[index_index].clone(), - cmp_op: operator, - cmp_expr: *lhs, + cmp_op: *operator, + cmp_expr: JoinAwareConditionExpr { + expr: lhs_owned, + from_outer_join: cond.from_outer_join, + eval_at_loop: cond.eval_at_loop, + }, })); } _ => {} } } - Ok(Either::Left(ast::Expr::Binary(lhs, operator, rhs))) + Ok(None) } - _ => Ok(Either::Left(expr)), + _ => Ok(None), } } From 82a2850de9b5c48b25881f362fc948081e3eac4e Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sat, 1 Feb 2025 22:52:24 +0200 Subject: [PATCH 8/9] subquery.rs: use iteration instead of recursion and simplify --- core/translate/subquery.rs | 41 ++++++++++---------------------------- 1 file changed, 11 insertions(+), 30 deletions(-) diff --git a/core/translate/subquery.rs b/core/translate/subquery.rs index c805f3ca5..e66acc88e 100644 --- a/core/translate/subquery.rs +++ b/core/translate/subquery.rs @@ -7,7 +7,7 @@ use crate::{ use super::{ emitter::{emit_query, Resolver, TranslateCtx}, - plan::{SelectPlan, SelectQueryType, SourceOperator, TableReference, TableReferenceType}, + plan::{Operation, SelectPlan, SelectQueryType, TableReference}, }; /// Emit the subqueries contained in the FROM clause. @@ -15,42 +15,23 @@ use super::{ pub fn emit_subqueries( program: &mut ProgramBuilder, t_ctx: &mut TranslateCtx, - referenced_tables: &mut [TableReference], - source: &mut SourceOperator, + tables: &mut [TableReference], ) -> Result<()> { - match source { - SourceOperator::Subquery { - table_reference, + for table in tables.iter_mut() { + if let Operation::Subquery { plan, - .. - } => { + result_columns_start_reg, + } = &mut table.op + { // Emit the subquery and get the start register of the result columns. let result_columns_start = emit_subquery(program, plan, t_ctx)?; - // Set the result_columns_start_reg in the TableReference object. + // Set the start register of the subquery's result columns. // This is done so that translate_expr() can read the result columns of the subquery, // as if it were reading from a regular table. - let table_ref = referenced_tables - .iter_mut() - .find(|t| t.table_identifier == table_reference.table_identifier) - .unwrap(); - if let TableReferenceType::Subquery { - result_columns_start_reg, - .. - } = &mut table_ref.reference_type - { - *result_columns_start_reg = result_columns_start; - } else { - unreachable!("emit_subqueries called on non-subquery"); - } - Ok(()) + *result_columns_start_reg = result_columns_start; } - SourceOperator::Join { left, right, .. } => { - emit_subqueries(program, t_ctx, referenced_tables, left)?; - emit_subqueries(program, t_ctx, referenced_tables, right)?; - Ok(()) - } - _ => Ok(()), } + Ok(()) } /// Emit a subquery and return the start register of the result columns. @@ -87,7 +68,7 @@ pub fn emit_subquery<'a>( } let end_coroutine_label = program.allocate_label(); let mut metadata = TranslateCtx { - labels_main_loop: HashMap::new(), + labels_main_loop: vec![], label_main_loop_end: None, meta_group_by: None, meta_left_joins: HashMap::new(), From c18c6ad64d3f3558fc16aab68235589e9472a76c Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sat, 1 Feb 2025 22:53:18 +0200 Subject: [PATCH 9/9] Marginal changes to use new data structures and field names --- core/translate/emitter.rs | 58 ++++++++++++++++++------------------ core/translate/expr.rs | 22 +++++++------- core/translate/group_by.rs | 4 +-- core/translate/order_by.rs | 4 +-- core/translate/result_row.rs | 2 +- 5 files changed, 45 insertions(+), 45 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index bf585dddc..12db50a95 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -16,8 +16,8 @@ use super::aggregation::emit_ungrouped_aggregation; use super::group_by::{emit_group_by, init_group_by, GroupByMetadata}; use super::main_loop::{close_loop, emit_loop, init_loop, open_loop, LeftJoinMetadata, LoopLabels}; use super::order_by::{emit_order_by, init_order_by, SortMetadata}; -use super::plan::SelectPlan; -use super::plan::SourceOperator; +use super::plan::Operation; +use super::plan::{SelectPlan, TableReference}; use super::subquery::emit_subqueries; #[derive(Debug)] @@ -58,7 +58,7 @@ impl<'a> Resolver<'a> { #[derive(Debug)] pub struct TranslateCtx<'a> { // A typical query plan is a nested loop. Each loop has its own LoopLabels (see the definition of LoopLabels for more details) - pub labels_main_loop: HashMap, + pub labels_main_loop: Vec, // label for the instruction that jumps to the next phase of the query after the main loop // we don't know ahead of time what that is (GROUP BY, ORDER BY, etc.) pub label_main_loop_end: Option, @@ -111,7 +111,7 @@ fn prologue<'a>( let start_offset = program.offset(); let t_ctx = TranslateCtx { - labels_main_loop: HashMap::new(), + labels_main_loop: Vec::new(), label_main_loop_end: None, reg_agg_start: None, reg_limit: None, @@ -195,12 +195,7 @@ pub fn emit_query<'a>( t_ctx: &'a mut TranslateCtx<'a>, ) -> Result { // Emit subqueries first so the results can be read in the main query loop. - emit_subqueries( - program, - t_ctx, - &mut plan.referenced_tables, - &mut plan.source, - )?; + emit_subqueries(program, t_ctx, &mut plan.table_references)?; if t_ctx.reg_limit.is_none() { t_ctx.reg_limit = plan.limit.map(|_| program.alloc_register()); @@ -236,16 +231,21 @@ pub fn emit_query<'a>( if let Some(ref mut group_by) = plan.group_by { init_group_by(program, t_ctx, group_by, &plan.aggregates)?; } - init_loop(program, t_ctx, &plan.source, &OperationMode::SELECT)?; + init_loop( + program, + t_ctx, + &plan.table_references, + &OperationMode::SELECT, + )?; // Set up main query execution loop - open_loop(program, t_ctx, &mut plan.source, &plan.referenced_tables)?; + open_loop(program, t_ctx, &plan.table_references, &plan.where_clause)?; // Process result columns and expressions in the inner loop emit_loop(program, t_ctx, plan)?; // Clean up and close the main execution loop - close_loop(program, t_ctx, &plan.source)?; + close_loop(program, t_ctx, &plan.table_references)?; program.resolve_label(after_main_loop_label, program.offset()); @@ -285,20 +285,25 @@ fn emit_program_for_delete( } // Initialize cursors and other resources needed for query execution - init_loop(program, &mut t_ctx, &plan.source, &OperationMode::DELETE)?; + init_loop( + program, + &mut t_ctx, + &plan.table_references, + &OperationMode::DELETE, + )?; // Set up main query execution loop open_loop( program, &mut t_ctx, - &mut plan.source, - &plan.referenced_tables, + &mut plan.table_references, + &plan.where_clause, )?; - emit_delete_insns(program, &mut t_ctx, &plan.source, &plan.limit)?; + emit_delete_insns(program, &mut t_ctx, &plan.table_references, &plan.limit)?; // Clean up and close the main execution loop - close_loop(program, &mut t_ctx, &plan.source)?; + close_loop(program, &mut t_ctx, &plan.table_references)?; program.resolve_label(after_main_loop_label, program.offset()); @@ -315,20 +320,15 @@ fn emit_program_for_delete( fn emit_delete_insns( program: &mut ProgramBuilder, t_ctx: &mut TranslateCtx, - source: &SourceOperator, + table_references: &[TableReference], limit: &Option, ) -> Result<()> { - let cursor_id = match source { - SourceOperator::Scan { - table_reference, .. - } => program.resolve_cursor_id(&table_reference.table_identifier), - SourceOperator::Search { - table_reference, - search, - .. - } => match search { + let table_reference = table_references.first().unwrap(); + let cursor_id = match &table_reference.op { + Operation::Scan { .. } => program.resolve_cursor_id(&table_reference.identifier), + Operation::Search(search) => match search { Search::RowidEq { .. } | Search::RowidSearch { .. } => { - program.resolve_cursor_id(&table_reference.table_identifier) + program.resolve_cursor_id(&table_reference.identifier) } Search::IndexSearch { index, .. } => program.resolve_cursor_id(&index.name), }, diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 0b521d4a8..771978cba 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -13,7 +13,7 @@ use crate::vdbe::{ use crate::Result; use super::emitter::Resolver; -use super::plan::{TableReference, TableReferenceType}; +use super::plan::{Operation, TableReference}; #[derive(Debug, Clone, Copy)] pub struct ConditionMetadata { @@ -1738,12 +1738,12 @@ pub fn translate_expr( column, is_rowid_alias, } => { - let tbl_ref = referenced_tables.as_ref().unwrap().get(*table).unwrap(); - match tbl_ref.reference_type { + let table_reference = referenced_tables.as_ref().unwrap().get(*table).unwrap(); + match table_reference.op { // If we are reading a column from a table, we find the cursor that corresponds to // the table and read the column from the cursor. - TableReferenceType::BTreeTable => { - let cursor_id = program.resolve_cursor_id(&tbl_ref.table_identifier); + Operation::Scan { .. } | Operation::Search(_) => { + let cursor_id = program.resolve_cursor_id(&table_reference.identifier); if *is_rowid_alias { program.emit_insn(Insn::RowId { cursor_id, @@ -1756,13 +1756,13 @@ pub fn translate_expr( dest: target_register, }); } - let column = tbl_ref.table.get_column_at(*column); + let column = table_reference.table.get_column_at(*column); maybe_apply_affinity(column.ty, target_register, program); Ok(target_register) } // If we are reading a column from a subquery, we instead copy the column from the // subquery's result registers. - TableReferenceType::Subquery { + Operation::Subquery { result_columns_start_reg, .. } => { @@ -1776,8 +1776,8 @@ pub fn translate_expr( } } ast::Expr::RowId { database: _, table } => { - let tbl_ref = referenced_tables.as_ref().unwrap().get(*table).unwrap(); - let cursor_id = program.resolve_cursor_id(&tbl_ref.table_identifier); + let table_reference = referenced_tables.as_ref().unwrap().get(*table).unwrap(); + let cursor_id = program.resolve_cursor_id(&table_reference.identifier); program.emit_insn(Insn::RowId { cursor_id, dest: target_register, @@ -2087,8 +2087,8 @@ pub fn get_name( } match expr { ast::Expr::Column { table, column, .. } => { - let table_ref = referenced_tables.get(*table).unwrap(); - table_ref.table.get_column_at(*column).name.clone() + let table_reference = referenced_tables.get(*table).unwrap(); + table_reference.table.get_column_at(*column).name.clone() } _ => fallback(), } diff --git a/core/translate/group_by.rs b/core/translate/group_by.rs index 284f9e478..b537257a0 100644 --- a/core/translate/group_by.rs +++ b/core/translate/group_by.rs @@ -274,7 +274,7 @@ pub fn emit_group_by<'a>( let agg_result_reg = start_reg + i; translate_aggregation_step_groupby( program, - &plan.referenced_tables, + &plan.table_references, pseudo_cursor, cursor_index, agg, @@ -384,7 +384,7 @@ pub fn emit_group_by<'a>( for expr in having.iter() { translate_condition_expr( program, - &plan.referenced_tables, + &plan.table_references, expr, ConditionMetadata { jump_if_condition_is_true: false, diff --git a/core/translate/order_by.rs b/core/translate/order_by.rs index 6e6341239..a908da571 100644 --- a/core/translate/order_by.rs +++ b/core/translate/order_by.rs @@ -184,7 +184,7 @@ pub fn order_by_sorter_insert( let key_reg = start_reg + i; translate_expr( program, - Some(&plan.referenced_tables), + Some(&plan.table_references), expr, key_reg, &t_ctx.resolver, @@ -205,7 +205,7 @@ pub fn order_by_sorter_insert( } translate_expr( program, - Some(&plan.referenced_tables), + Some(&plan.table_references), &rc.expr, cur_reg, &t_ctx.resolver, diff --git a/core/translate/result_row.rs b/core/translate/result_row.rs index 0b4b343d3..ad8454c25 100644 --- a/core/translate/result_row.rs +++ b/core/translate/result_row.rs @@ -29,7 +29,7 @@ pub fn emit_select_result( let reg = start_reg + i; translate_expr( program, - Some(&plan.referenced_tables), + Some(&plan.table_references), &rc.expr, reg, &t_ctx.resolver,