From 84cf4033d5387e5d240aea63ca5ea279a096dccc Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Mon, 22 Jul 2024 21:21:08 +0300 Subject: [PATCH 1/2] Refactor join processing - Make all constraints a list of WhereTerms in a ProcessedWhereClause - Support multiple joins instead of just one --- core/translate/expr.rs | 31 ++- core/translate/mod.rs | 263 ++++++++++------------- core/translate/select.rs | 47 ++++- core/translate/where_clause.rs | 376 +++++++++++++-------------------- core/vdbe/builder.rs | 4 + core/vdbe/mod.rs | 4 +- sqlite3/src/lib.rs | 3 +- testing/join.test | 40 +++- 8 files changed, 368 insertions(+), 400 deletions(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 9ea943a8f..449f2b85d 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -36,10 +36,11 @@ pub fn build_select<'a>(schema: &Schema, select: &'a ast::Select) -> Result table, None => anyhow::bail!("Parse error: no such table: {}", table_name), }; + let identifier = normalize_ident(maybe_alias.unwrap_or(table_name)); let mut joins = Vec::new(); joins.push(SrcTable { table: Table::BTree(table.clone()), - alias: maybe_alias, + identifier, join_info: None, }); if let Some(selected_joins) = &from.joins { @@ -60,9 +61,11 @@ pub fn build_select<'a>(schema: &Schema, select: &'a ast::Select) -> Result table, None => anyhow::bail!("Parse error: no such table: {}", table_name), }; + let identifier = normalize_ident(maybe_alias.unwrap_or(table_name)); + joins.push(SrcTable { table: Table::BTree(table), - alias: maybe_alias, + identifier, join_info: Some(join), }); } @@ -292,7 +295,7 @@ pub fn translate_expr( }; for arg in args { let reg = program.alloc_register(); - let _ = translate_expr(program, select, &arg, reg, cursor_hint)?; + let _ = translate_expr(program, select, arg, reg, cursor_hint)?; match arg { ast::Expr::Literal(_) => program.mark_last_insn_constant(), _ => {} @@ -625,11 +628,11 @@ fn wrap_eval_jump_expr( program.preassign_label_to_next_insn(if_true_label); } -pub fn resolve_ident_qualified<'a>( +pub fn resolve_ident_qualified( program: &ProgramBuilder, table_name: &String, ident: &String, - select: &'a Select, + select: &Select, cursor_hint: Option, ) -> Result<(usize, Type, usize, bool)> { let ident = normalize_ident(ident); @@ -637,11 +640,7 @@ pub fn resolve_ident_qualified<'a>( for join in &select.src_tables { match join.table { Table::BTree(ref table) => { - let table_identifier = normalize_ident(match join.alias { - Some(alias) => alias, - None => &table.name, - }); - if table_identifier == *table_name { + if *join.identifier == table_name { let res = table .columns .iter() @@ -649,7 +648,7 @@ pub fn resolve_ident_qualified<'a>( .find(|(_, col)| col.name == *ident); if res.is_some() { let (idx, col) = res.unwrap(); - let cursor_id = program.resolve_cursor_id(&table_identifier, cursor_hint); + let cursor_id = program.resolve_cursor_id(&join.identifier, cursor_hint); return Ok((idx, col.ty, cursor_id, col.primary_key)); } } @@ -664,10 +663,10 @@ pub fn resolve_ident_qualified<'a>( ); } -pub fn resolve_ident_table<'a>( +pub fn resolve_ident_table( program: &ProgramBuilder, ident: &String, - select: &'a Select, + select: &Select, cursor_hint: Option, ) -> Result<(usize, Type, usize, bool)> { let ident = normalize_ident(ident); @@ -675,10 +674,6 @@ pub fn resolve_ident_table<'a>( for join in &select.src_tables { match join.table { Table::BTree(ref table) => { - let table_identifier = normalize_ident(match join.alias { - Some(alias) => alias, - None => &table.name, - }); let res = table .columns .iter() @@ -704,7 +699,7 @@ pub fn resolve_ident_table<'a>( is_primary_key = res.1.primary_key; } } - let cursor_id = program.resolve_cursor_id(&table_identifier, cursor_hint); + let cursor_id = program.resolve_cursor_id(&join.identifier, cursor_hint); found.push((idx, col_type, cursor_id, is_primary_key)); } } diff --git a/core/translate/mod.rs b/core/translate/mod.rs index 68fa38d04..becd1c00a 100644 --- a/core/translate/mod.rs +++ b/core/translate/mod.rs @@ -10,15 +10,15 @@ use crate::pager::Pager; use crate::schema::{Column, PseudoTable, Schema, Table}; use crate::sqlite3_ondisk::{DatabaseHeader, MIN_PAGE_CACHE_SIZE}; use crate::translate::select::{ColumnInfo, LoopInfo, Select, SrcTable}; -use crate::translate::where_clause::{ - evaluate_conditions, translate_conditions, translate_where, Inner, Left, QueryConstraint, -}; +use crate::translate::where_clause::{translate_processed_where, translate_where}; use crate::types::{OwnedRecord, OwnedValue}; use crate::util::normalize_ident; use crate::vdbe::{BranchOffset, Insn, Program, builder::ProgramBuilder}; use anyhow::Result; use expr::{build_select, maybe_apply_affinity, translate_expr}; +use select::LeftJoinBookkeeping; use sqlite3_parser::ast::{self, Literal}; +use where_clause::{process_where, ProcessedWhereClause}; struct LimitInfo { limit_reg: usize, @@ -114,7 +114,7 @@ fn translate_select(mut select: Select) -> Result { }; if !select.src_tables.is_empty() { - let constraint = translate_tables_begin(&mut program, &mut select)?; + translate_tables_begin(&mut program, &mut select)?; let (register_start, column_count) = if let Some(sort_columns) = select.order_by { let start = program.next_free_register(); @@ -153,7 +153,7 @@ fn translate_select(mut select: Select) -> Result { } } - translate_tables_end(&mut program, &select, constraint); + translate_tables_end(&mut program, &select); if select.exist_aggregation { let mut target = register_start; @@ -283,7 +283,7 @@ fn translate_sorter( start_reg: register_start, count, }); - emit_limit_insn(&limit_info, program); + emit_limit_insn(limit_info, program); program.emit_insn(Insn::SorterNext { cursor_id: sort_info.sorter_cursor, pc_if_next: sorter_data_offset, @@ -292,145 +292,44 @@ fn translate_sorter( Ok(()) } -fn translate_tables_begin( - program: &mut ProgramBuilder, - select: &mut Select, -) -> Result> { +fn translate_tables_begin(program: &mut ProgramBuilder, select: &mut Select) -> Result<()> { for join in &select.src_tables { let loop_info = translate_table_open_cursor(program, join); select.loops.push(loop_info); } - let conditions = evaluate_conditions(program, select, None)?; + let processed_where = process_where(program, select)?; - for loop_info in &mut select.loops { - let mut left_join_match_flag_maybe = None; - if let Some(QueryConstraint::Left(Left { - match_flag, - right_cursor, - .. - })) = conditions.as_ref() - { - if loop_info.open_cursor == *right_cursor { - left_join_match_flag_maybe = Some(*match_flag); - } - } - translate_table_open_loop(program, loop_info, left_join_match_flag_maybe); + for loop_info in &select.loops { + translate_table_open_loop(program, select, loop_info, &processed_where)?; } - translate_conditions(program, select, conditions, None) + Ok(()) } -fn handle_skip_row( - program: &mut ProgramBuilder, - cursor_id: usize, - next_row_instruction_offset: BranchOffset, - constraint: &Option, -) { - match constraint { - Some(QueryConstraint::Left(Left { - where_clause, - join_clause, - match_flag, - match_flag_hit_marker, - found_match_next_row_label, - left_cursor, - right_cursor, - .. - })) => { - if let Some(where_clause) = where_clause { - if where_clause.no_match_target_cursor == cursor_id { - program.resolve_label( - where_clause.no_match_jump_label, - next_row_instruction_offset, - ); - } - } - if let Some(join_clause) = join_clause { - if join_clause.no_match_target_cursor == cursor_id { - program.resolve_label( - join_clause.no_match_jump_label, - next_row_instruction_offset, - ); - } - } - if cursor_id == *right_cursor { - // If the left join match flag has been set to 1, we jump to the next row (result row has been emitted already) - program.emit_insn_with_label_dependency( - Insn::IfPos { - reg: *match_flag, - target_pc: *found_match_next_row_label, - decrement_by: 0, - }, - *found_match_next_row_label, - ); - // If not, we set the right table cursor's "pseudo null bit" on, which means any Insn::Column will return NULL - program.emit_insn(Insn::NullRow { - cursor_id: *right_cursor, - }); - // Jump to setting the left join match flag to 1 again, but this time the right table cursor will set everything to null - program.emit_insn_with_label_dependency( - Insn::Goto { - target_pc: *match_flag_hit_marker, - }, - *match_flag_hit_marker, - ); - } - if cursor_id == *left_cursor { - program.resolve_label(*found_match_next_row_label, next_row_instruction_offset); - } - } - Some(QueryConstraint::Inner(Inner { - where_clause, - join_clause, - .. - })) => { - if let Some(join_clause) = join_clause { - if cursor_id == join_clause.no_match_target_cursor { - program.resolve_label( - join_clause.no_match_jump_label, - next_row_instruction_offset, - ); - } - } - if let Some(where_clause) = where_clause { - if cursor_id == where_clause.no_match_target_cursor { - program.resolve_label( - where_clause.no_match_jump_label, - next_row_instruction_offset, - ); - } - } - } - None => {} - } -} - -fn translate_tables_end( - program: &mut ProgramBuilder, - select: &Select, - constraint: Option, -) { +fn translate_tables_end(program: &mut ProgramBuilder, select: &Select) { // iterate in reverse order as we open cursors in order for table_loop in select.loops.iter().rev() { let cursor_id = table_loop.open_cursor; - let next_row_instruction_offset = program.offset(); + program.resolve_label(table_loop.next_row_label, program.offset()); program.emit_insn(Insn::NextAsync { cursor_id }); - program.emit_insn(Insn::NextAwait { - cursor_id, - pc_if_next: table_loop.rewind_offset as BranchOffset, - }); - program.resolve_label(table_loop.rewind_label, program.offset()); - handle_skip_row(program, cursor_id, next_row_instruction_offset, &constraint); + program.emit_insn_with_label_dependency( + Insn::NextAwait { + cursor_id, + pc_if_next: table_loop.rewind_label, + }, + table_loop.rewind_label, + ); + + if let Some(ljbk) = &table_loop.left_join_bookkeeping { + left_join_match_flag_check(program, ljbk, cursor_id); + } } } fn translate_table_open_cursor(program: &mut ProgramBuilder, table: &SrcTable) -> LoopInfo { - let table_identifier = normalize_ident(match table.alias { - Some(alias) => alias, - None => &table.table.get_name(), - }); - let cursor_id = program.alloc_cursor_id(Some(table_identifier), Some(table.table.clone())); + let cursor_id = + program.alloc_cursor_id(Some(table.identifier.clone()), Some(table.table.clone())); let root_page = match &table.table { Table::BTree(btree) => btree.root_page, Table::Pseudo(_) => todo!(), @@ -441,37 +340,109 @@ fn translate_table_open_cursor(program: &mut ProgramBuilder, table: &SrcTable) - }); program.emit_insn(Insn::OpenReadAwait); LoopInfo { + identifier: table.identifier.clone(), + left_join_bookkeeping: if table.is_outer_join() { + Some(LeftJoinBookkeeping { + match_flag_register: program.alloc_register(), + on_match_jump_to_label: program.allocate_label(), + set_match_flag_true_label: program.allocate_label(), + }) + } else { + None + }, open_cursor: cursor_id, - rewind_offset: 0, - rewind_label: 0, + next_row_label: program.allocate_label(), + rewind_label: program.allocate_label(), + rewind_on_empty_label: program.allocate_label(), } } +/** +* initialize left join match flag to false +* if condition checks pass, it will eventually be set to true +*/ +fn left_join_match_flag_initialize(program: &mut ProgramBuilder, ljbk: &LeftJoinBookkeeping) { + program.emit_insn(Insn::Integer { + value: 0, + dest: ljbk.match_flag_register, + }); +} + +/** +* after the relevant conditional jumps have been emitted, set the left join match flag to true +*/ +fn left_join_match_flag_set_true(program: &mut ProgramBuilder, ljbk: &LeftJoinBookkeeping) { + program.defer_label_resolution(ljbk.set_match_flag_true_label, program.offset() as usize); + program.emit_insn(Insn::Integer { + value: 1, + dest: ljbk.match_flag_register, + }); +} + +/** +* check if the left join match flag is set to true +* if it is, jump to the next row on the outer table +* if not, set the right table cursor's "pseudo null bit" on +* then jump to setting the left join match flag to true again, +* which will effectively emit all nulls for the right table. +*/ +fn left_join_match_flag_check( + program: &mut ProgramBuilder, + ljbk: &LeftJoinBookkeeping, + cursor_id: usize, +) { + // If the left join match flag has been set to 1, we jump to the next row on the outer table (result row has been emitted already) + program.emit_insn_with_label_dependency( + Insn::IfPos { + reg: ljbk.match_flag_register, + target_pc: ljbk.on_match_jump_to_label, + decrement_by: 0, + }, + ljbk.on_match_jump_to_label, + ); + // If not, we set the right table cursor's "pseudo null bit" on, which means any Insn::Column will return NULL + program.emit_insn(Insn::NullRow { cursor_id }); + // Jump to setting the left join match flag to 1 again, but this time the right table cursor will set everything to null + program.emit_insn_with_label_dependency( + Insn::Goto { + target_pc: ljbk.set_match_flag_true_label, + }, + ljbk.set_match_flag_true_label, + ); + // This points to the NextAsync instruction of the next table in the loop + // (i.e. the outer table, since we're iterating in reverse order) + program.resolve_label(ljbk.on_match_jump_to_label, program.offset()); +} + fn translate_table_open_loop( program: &mut ProgramBuilder, - loop_info: &mut LoopInfo, - left_join_match_flag_maybe: Option, -) { - if let Some(match_flag) = left_join_match_flag_maybe { - // Initialize left join as not matched - program.emit_insn(Insn::Integer { - value: 0, - dest: match_flag, - }); + select: &Select, + loop_info: &LoopInfo, + w: &ProcessedWhereClause, +) -> Result<()> { + if let Some(ljbk) = loop_info.left_join_bookkeeping.as_ref() { + left_join_match_flag_initialize(program, ljbk); } + program.emit_insn(Insn::RewindAsync { cursor_id: loop_info.open_cursor, }); - let rewind_await_label = program.allocate_label(); + program.defer_label_resolution(loop_info.rewind_label, program.offset() as usize); program.emit_insn_with_label_dependency( Insn::RewindAwait { cursor_id: loop_info.open_cursor, - pc_if_empty: rewind_await_label, + pc_if_empty: loop_info.rewind_on_empty_label, }, - rewind_await_label, + loop_info.rewind_on_empty_label, ); - loop_info.rewind_label = rewind_await_label; - loop_info.rewind_offset = program.offset() - 1; + + translate_processed_where(program, select, loop_info, w, None)?; + + if let Some(ljbk) = loop_info.left_join_bookkeeping.as_ref() { + left_join_match_flag_set_true(program, ljbk); + } + + Ok(()) } fn translate_columns( @@ -539,11 +510,7 @@ fn translate_table_star( target_register: usize, cursor_hint: Option, ) { - let table_identifier = normalize_ident(match table.alias { - Some(alias) => alias, - None => &table.table.get_name(), - }); - let table_cursor = program.resolve_cursor_id(&table_identifier, cursor_hint); + let table_cursor = program.resolve_cursor_id(&table.identifier, cursor_hint); let table = &table.table; for (i, col) in table.columns().iter().enumerate() { let col_target_register = target_register + i; diff --git a/core/translate/select.rs b/core/translate/select.rs index f54e581f8..49220932c 100644 --- a/core/translate/select.rs +++ b/core/translate/select.rs @@ -1,11 +1,32 @@ -use sqlite3_parser::ast; +use sqlite3_parser::ast::{self, JoinOperator, JoinType}; use crate::{function::Func, schema::Table, vdbe::BranchOffset}; +#[derive(Debug)] pub struct SrcTable<'a> { pub table: Table, - pub alias: Option<&'a String>, - pub join_info: Option<&'a ast::JoinedSelectTable>, // FIXME: preferably this should be a reference with lifetime == Select ast expr + pub identifier: String, + pub join_info: Option<&'a ast::JoinedSelectTable>, +} + +impl SrcTable<'_> { + pub fn is_outer_join(&self) -> bool { + matches!( + self.join_info, + Some(ast::JoinedSelectTable { + operator: JoinOperator::TypedJoin { + natural: false, + join_type: Some( + JoinType::Left + | JoinType::LeftOuter + | JoinType::Right + | JoinType::RightOuter + ) + }, + .. + }) + ) + } } #[derive(Debug)] @@ -29,9 +50,27 @@ impl<'a> ColumnInfo<'a> { } } +pub struct LeftJoinBookkeeping { + // integer register that holds a flag that is set to true if the current row has a match for the left join + pub match_flag_register: usize, + // label for the instruction that sets the match flag to true + pub set_match_flag_true_label: BranchOffset, + // label for the instruction where the program jumps to if the current row has a match for the left join + pub on_match_jump_to_label: BranchOffset, +} + pub struct LoopInfo { - pub rewind_offset: BranchOffset, + // The table or table alias that we are looping over + pub identifier: String, + // Metadata about a left join, if any + pub left_join_bookkeeping: Option, + // The label for the instruction that reads the next row for this table + pub next_row_label: BranchOffset, + // The label for the instruction that rewinds the cursor for this table pub rewind_label: BranchOffset, + // The label for the instruction that is jumped to in the Rewind instruction if the table is empty + pub rewind_on_empty_label: BranchOffset, + // The ID of the cursor that is opened for this table pub open_cursor: usize, } diff --git a/core/translate/where_clause.rs b/core/translate/where_clause.rs index 062d0f124..20489cf9c 100644 --- a/core/translate/where_clause.rs +++ b/core/translate/where_clause.rs @@ -1,51 +1,142 @@ use anyhow::Result; -use sqlite3_parser::ast::{self, JoinOperator}; +use sqlite3_parser::ast::{self}; use crate::{ - translate::expr::{resolve_ident_qualified, resolve_ident_table, translate_expr}, function::SingleRowFunc, + translate::expr::{resolve_ident_qualified, resolve_ident_table, translate_expr}, translate::select::Select, vdbe::{BranchOffset, Insn, builder::ProgramBuilder}, }; -const HARDCODED_CURSOR_LEFT_TABLE: usize = 0; -const HARDCODED_CURSOR_RIGHT_TABLE: usize = 1; +use super::select::LoopInfo; #[derive(Debug)] -pub struct Where { - pub constraint_expr: ast::Expr, - pub no_match_jump_label: BranchOffset, - pub no_match_target_cursor: usize, +pub struct WhereTerm { + pub expr: ast::Expr, + pub evaluate_at_cursor: usize, } #[derive(Debug)] -pub struct Join { - pub constraint_expr: ast::Expr, - pub no_match_jump_label: BranchOffset, - pub no_match_target_cursor: usize, +pub struct ProcessedWhereClause { + pub terms: Vec, } -#[derive(Debug)] -pub struct Left { - pub where_clause: Option, - pub join_clause: Option, - pub match_flag: usize, - pub match_flag_hit_marker: BranchOffset, - pub found_match_next_row_label: BranchOffset, - pub left_cursor: usize, - pub right_cursor: usize, +/** +* Split a constraint into a flat list of WhereTerms. +* The splitting is done at logical 'AND' operator boundaries. +* WhereTerms are currently just a wrapper around an ast::Expr, +* combined with the ID of the cursor where the term should be evaluated. +*/ +pub fn split_constraint_to_terms<'a>( + program: &'a mut ProgramBuilder, + select: &'a Select, + where_clause_or_join_constraint: &ast::Expr, + outer_join_table_name: Option<&'a String>, +) -> Result> { + let mut terms = Vec::new(); + let mut queue = vec![where_clause_or_join_constraint]; + + while let Some(expr) = queue.pop() { + match expr { + ast::Expr::Binary(left, ast::Operator::And, right) => { + queue.push(left); + queue.push(right); + } + expr => { + let term = WhereTerm { + expr: expr.clone(), + evaluate_at_cursor: match outer_join_table_name { + Some(table) => { + // If we had e.g. SELECT * FROM t1 LEFT JOIN t2 WHERE t1.a > 10, + // we could evaluate the t1.a > 10 condition at the cursor for t1, i.e. the outer table, + // skipping t1 rows that don't match the condition. + // + // However, if we have SELECT * FROM t1 LEFT JOIN t2 ON t1.a > 10, + // we need to evaluate the t1.a > 10 condition at the cursor for t2, i.e. the inner table, + // because we need to skip rows from t2 that don't match the condition. + // + // In inner joins, both of the above are equivalent, but in left joins they are not. + select + .loops + .iter() + .find(|t| t.identifier == *table) + .ok_or(anyhow::anyhow!( + "Could not find cursor for table {}", + table + ))? + .open_cursor + } + None => { + // For any non-outer-join condition expression, find the cursor that it should be evaluated at. + // This is the cursor that is the rightmost/innermost cursor that the expression depends on. + // In SELECT * FROM t1, t2 WHERE t1.a > 10, the condition should be evaluated at the cursor for t1. + // In SELECT * FROM t1, t2 WHERE t1.a > 10 OR t2.b > 20, the condition should be evaluated at the cursor for t2. + // + // We are splitting any AND expressions in this function, so for example in this query: + // 'SELECT * FROM t1, t2 WHERE t1.a > 10 AND t2.b > 20' + // we can evaluate the t1.a > 10 condition at the cursor for t1, and the t2.b > 20 condition at the cursor for t2. + // + // For expressions that don't depend on any cursor, we can evaluate them at the leftmost/outermost cursor. + // E.g. 'SELECT * FROM t1 JOIN t2 ON false' can be evaluated at the cursor for t1. + let cursors = + introspect_expression_for_cursors(program, select, expr, None)?; + + let outermost_cursor = select + .loops + .iter() + .map(|t| t.open_cursor) + .min() + .ok_or_else(|| { + anyhow::anyhow!("No open cursors found in any of the loops") + })?; + + *cursors.iter().max().unwrap_or(&outermost_cursor) + } + }, + }; + terms.push(term); + } + } + } + + Ok(terms) } -#[derive(Debug)] -pub struct Inner { - pub where_clause: Option, - pub join_clause: Option, -} +/** +* Split the WHERE clause and any JOIN ON clauses into a flat list of WhereTerms +* that can be evaluated at the appropriate cursor. +*/ +pub fn process_where<'a>( + program: &'a mut ProgramBuilder, + select: &'a Select, +) -> Result { + let mut wc = ProcessedWhereClause { terms: Vec::new() }; + if let Some(w) = &select.where_clause { + wc.terms + .extend(split_constraint_to_terms(program, select, w, None)?); + } -#[derive(Debug)] -pub enum QueryConstraint { - Left(Left), - Inner(Inner), + for table in select.src_tables.iter() { + if table.join_info.is_none() { + continue; + } + let join_info = table.join_info.unwrap(); + if let Some(ast::JoinConstraint::On(expr)) = &join_info.constraint { + let terms = split_constraint_to_terms( + program, + select, + expr, + if table.is_outer_join() { + Some(&table.identifier) + } else { + None + }, + )?; + wc.terms.extend(terms); + } + } + + Ok(wc) } pub fn translate_where( @@ -61,175 +152,27 @@ pub fn translate_where( } } -pub fn evaluate_conditions( +/** +* Translate the WHERE clause and JOIN ON clauses into a series of conditional jump instructions. +* At this point the WHERE clause and JOIN ON clauses have been split into a series of terms that can be evaluated at the appropriate cursor. +* We evaluate each term at the appropriate cursor. +*/ +pub fn translate_processed_where<'a>( program: &mut ProgramBuilder, - select: &Select, + select: &'a Select, + current_loop: &'a LoopInfo, + where_c: &'a ProcessedWhereClause, cursor_hint: Option, -) -> Result> { - let join_constraints = select - .src_tables - .iter() - .map(|v| v.join_info) - .filter_map(|v| v.map(|v| (v.constraint.clone(), v.operator))) - .collect::>(); - // TODO: only supports one JOIN; -> add support for multiple JOINs, e.g. SELECT * FROM a JOIN b ON a.id = b.id JOIN c ON b.id = c.id - if join_constraints.len() > 1 { - anyhow::bail!("Parse error: multiple JOINs not supported"); +) -> Result<()> { + for term in where_c.terms.iter() { + if term.evaluate_at_cursor != current_loop.open_cursor { + continue; + } + let target_jump = current_loop.next_row_label; + translate_condition_expr(program, select, &term.expr, target_jump, false, cursor_hint)?; } - let join_maybe = join_constraints.first(); - - let parsed_where_maybe = select.where_clause.as_ref().map(|where_clause| Where { - constraint_expr: where_clause.clone(), - no_match_jump_label: program.allocate_label(), - no_match_target_cursor: get_no_match_target_cursor( - program, - select, - where_clause, - cursor_hint, - ), - }); - - let parsed_join_maybe = join_maybe.and_then(|(constraint, _)| { - if let Some(ast::JoinConstraint::On(expr)) = constraint { - Some(Join { - constraint_expr: expr.clone(), - no_match_jump_label: program.allocate_label(), - no_match_target_cursor: get_no_match_target_cursor( - program, - select, - expr, - cursor_hint, - ), - }) - } else { - None - } - }); - - let constraint_maybe = match (parsed_where_maybe, parsed_join_maybe) { - (None, None) => None, - (Some(where_clause), None) => Some(QueryConstraint::Inner(Inner { - where_clause: Some(where_clause), - join_clause: None, - })), - (where_clause, Some(join_clause)) => { - let (_, op) = join_maybe.unwrap(); - match op { - JoinOperator::TypedJoin { natural, join_type } => { - if *natural { - todo!("Natural join not supported"); - } - // default to inner join when no join type is specified - let join_type = join_type.unwrap_or(ast::JoinType::Inner); - match join_type { - ast::JoinType::Inner | ast::JoinType::Cross => { - // cross join with a condition is an inner join - Some(QueryConstraint::Inner(Inner { - where_clause, - join_clause: Some(join_clause), - })) - } - ast::JoinType::LeftOuter | ast::JoinType::Left => { - let left_join_match_flag = program.alloc_register(); - let left_join_match_flag_hit_marker = program.allocate_label(); - let left_join_found_match_next_row_label = program.allocate_label(); - - Some(QueryConstraint::Left(Left { - where_clause, - join_clause: Some(join_clause), - found_match_next_row_label: left_join_found_match_next_row_label, - match_flag: left_join_match_flag, - match_flag_hit_marker: left_join_match_flag_hit_marker, - left_cursor: HARDCODED_CURSOR_LEFT_TABLE, // FIXME: hardcoded - right_cursor: HARDCODED_CURSOR_RIGHT_TABLE, // FIXME: hardcoded - })) - } - ast::JoinType::RightOuter | ast::JoinType::Right => { - todo!(); - } - ast::JoinType::FullOuter | ast::JoinType::Full => { - todo!(); - } - } - } - JoinOperator::Comma => { - todo!(); - } - } - } - }; - - Ok(constraint_maybe) -} - -pub fn translate_conditions( - program: &mut ProgramBuilder, - select: &Select, - conditions: Option, - cursor_hint: Option, -) -> Result> { - match conditions.as_ref() { - Some(QueryConstraint::Left(Left { - where_clause, - join_clause, - match_flag, - match_flag_hit_marker, - .. - })) => { - if let Some(where_clause) = where_clause { - translate_condition_expr( - program, - select, - &where_clause.constraint_expr, - where_clause.no_match_jump_label, - false, - cursor_hint, - )?; - } - if let Some(join_clause) = join_clause { - translate_condition_expr( - program, - select, - &join_clause.constraint_expr, - join_clause.no_match_jump_label, - false, - cursor_hint, - )?; - } - // Set match flag to 1 if we hit the marker (i.e. jump didn't happen to no_match_label as a result of the condition) - program.emit_insn(Insn::Integer { - value: 1, - dest: *match_flag, - }); - program.defer_label_resolution(*match_flag_hit_marker, (program.offset() - 1) as usize); - } - Some(QueryConstraint::Inner(inner_join)) => { - if let Some(where_clause) = &inner_join.where_clause { - translate_condition_expr( - program, - select, - &where_clause.constraint_expr, - where_clause.no_match_jump_label, - false, - cursor_hint, - )?; - } - if let Some(join_clause) = &inner_join.join_clause { - translate_condition_expr( - program, - select, - &join_clause.constraint_expr, - join_clause.no_match_jump_label, - false, - cursor_hint, - )?; - } - } - None => {} - } - - Ok(conditions) + Ok(()) } fn translate_condition_expr( @@ -532,13 +475,7 @@ fn introspect_expression_for_cursors( cursors.push(cursor_id); } ast::Expr::Literal(_) => {} - ast::Expr::Like { - lhs, - not: _, - op: _, - rhs, - escape: _, - } => { + ast::Expr::Like { lhs, rhs, .. } => { cursors.extend(introspect_expression_for_cursors( program, select, @@ -552,6 +489,18 @@ fn introspect_expression_for_cursors( cursor_hint, )?); } + ast::Expr::FunctionCall { args, .. } => { + if let Some(args) = args { + for arg in args { + cursors.extend(introspect_expression_for_cursors( + program, + select, + arg, + cursor_hint, + )?); + } + } + } other => { anyhow::bail!("Parse error: unsupported expression: {:?}", other); } @@ -559,26 +508,3 @@ fn introspect_expression_for_cursors( Ok(cursors) } - -fn get_no_match_target_cursor( - program: &ProgramBuilder, - select: &Select, - expr: &ast::Expr, - cursor_hint: Option, -) -> usize { - // This is the hackiest part of the code. We are finding the cursor that should be advanced to the next row - // when the condition is not met. This is done by introspecting the expression and finding the innermost cursor that is - // used in the expression. This is a very naive approach and will not work in all cases. - // Thankfully though it might be possible to just refine the logic contained here to make it work in all cases. Maybe. - let cursors = - introspect_expression_for_cursors(program, select, expr, cursor_hint).unwrap_or_default(); - if cursors.is_empty() { - assert!( - select.loops.len() > 0, - "select.loops is populated based on select.src_tables. Expect at least 1 table if this function is called" - ); - select.loops.first().unwrap().open_cursor - } else { - *cursors.iter().max().unwrap() - } -} diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index 9305b65da..40ef9269f 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -242,6 +242,10 @@ impl ProgramBuilder { assert!(*target_pc < 0); *target_pc = to_offset; } + Insn::NextAwait { pc_if_next, .. } => { + assert!(*pc_if_next < 0); + *pc_if_next = to_offset; + } _ => { todo!("missing resolve_label for {:?}", insn); } diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 575dbb6bc..0feb6e92e 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -686,7 +686,7 @@ impl Program { if let Some(ref rowid) = cursor.rowid()? { state.registers[*dest] = OwnedValue::Integer(*rowid as i64); } else { - todo!(); + state.registers[*dest] = OwnedValue::Null; } state.pc += 1; } @@ -1397,7 +1397,7 @@ mod tests { OwnedValue::Integer(value) => { // Check that the value is within the range of i64 assert!( - value >= i64::MIN && value <= i64::MAX, + (i64::MIN..=i64::MAX).contains(&value), "Random number out of range" ); } diff --git a/sqlite3/src/lib.rs b/sqlite3/src/lib.rs index b9c01f62c..280c26690 100644 --- a/sqlite3/src/lib.rs +++ b/sqlite3/src/lib.rs @@ -808,8 +808,7 @@ pub unsafe extern "C" fn sqlite3_errmsg(_db: *mut sqlite3) -> *const std::ffi::c let err_msg = if (*_db).err_code != SQLITE_OK { if !(*_db).p_err.is_null() { - let cstr = (*_db).p_err as *const std::ffi::c_char; - cstr + (*_db).p_err as *const std::ffi::c_char } else { std::ptr::null() } diff --git a/testing/join.test b/testing/join.test index 88f30a5f4..7e361e9cc 100755 --- a/testing/join.test +++ b/testing/join.test @@ -79,6 +79,21 @@ Alan| Michael| Brianna|} +do_execsql_test left-join-with-where-2 { + select users.first_name, products.name from users left join products on users.id < 2 where users.id < 3; +} {Jamie|hat +Jamie|cap +Jamie|shirt +Jamie|sweater +Jamie|sweatshirt +Jamie|shorts +Jamie|jeans +Jamie|sneakers +Jamie|boots +Jamie|coat +Jamie|accessories +Cindy|} + do_execsql_test left-join-non-pk { select users.first_name as user_name, products.name as product_name from users left join products on users.first_name = products.name limit 3; } {Jamie| @@ -107,4 +122,27 @@ Cindy|cap} do_execsql_test left-join-no-join-conditions-but-multiple-where { select u.first_name, p.name from users u left join products as p where u.id = p.id or u.first_name = p.name limit 2; } {Jamie|hat -Cindy|cap} \ No newline at end of file +Cindy|cap} + +do_execsql_test four-way-inner-join { + select u1.first_name, u2.first_name, u3.first_name, u4.first_name from users u1 join users u2 on u1.id = u2.id join users u3 on u2.id = u3.id + 1 join users u4 on u3.id = u4.id + 1 limit 1; +} {Tommy|Tommy|Cindy|Jamie} + +do_execsql_test leftjoin-innerjoin-where { + select u.first_name, p.name, p2.name from users u left join products p on p.name = u.first_name join products p2 on length(p2.name) > 8 where u.first_name = 'Franklin'; +} {Franklin||sweatshirt +Franklin||accessories} + +do_execsql_test leftjoin-leftjoin-where { + select u.first_name, p.name, p2.name from users u left join products p on p.name = u.first_name join products p2 on length(p2.name) > 8 where u.first_name = 'Franklin'; +} {Franklin||sweatshirt +Franklin||accessories} + +do_execsql_test innerjoin-leftjoin-where { + select u.first_name, u2.first_name, p.name from users u join users u2 on u.id = u2.id + 1 left join products p on p.name = u.first_name where u.first_name = 'Franklin'; +} {Franklin|Cynthia|} + +do_execsql_test innerjoin-leftjoin-with-or-terms { + select u.first_name, u2.first_name, p.name from users u join users u2 on u.id = u2.id + 1 left join products p on p.name = u.first_name or p.name like 'sweat%' where u.first_name = 'Franklin'; +} {Franklin|Cynthia|sweater +Franklin|Cynthia|sweatshirt} From d05ad6e6029f6194630a4cde547113e41f877688 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Tue, 23 Jul 2024 19:34:30 +0300 Subject: [PATCH 2/2] Dont allocate separate vectors in split_constraint_to_terms --- core/translate/where_clause.rs | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/core/translate/where_clause.rs b/core/translate/where_clause.rs index 20489cf9c..bb60048a3 100644 --- a/core/translate/where_clause.rs +++ b/core/translate/where_clause.rs @@ -5,7 +5,7 @@ use crate::{ function::SingleRowFunc, translate::expr::{resolve_ident_qualified, resolve_ident_table, translate_expr}, translate::select::Select, - vdbe::{BranchOffset, Insn, builder::ProgramBuilder}, + vdbe::{builder::ProgramBuilder, BranchOffset, Insn}, }; use super::select::LoopInfo; @@ -30,10 +30,10 @@ pub struct ProcessedWhereClause { pub fn split_constraint_to_terms<'a>( program: &'a mut ProgramBuilder, select: &'a Select, + processed_where_clause: &mut ProcessedWhereClause, where_clause_or_join_constraint: &ast::Expr, outer_join_table_name: Option<&'a String>, -) -> Result> { - let mut terms = Vec::new(); +) -> Result<()> { let mut queue = vec![where_clause_or_join_constraint]; while let Some(expr) = queue.pop() { @@ -94,12 +94,12 @@ pub fn split_constraint_to_terms<'a>( } }, }; - terms.push(term); + processed_where_clause.terms.push(term); } } } - Ok(terms) + Ok(()) } /** @@ -112,8 +112,7 @@ pub fn process_where<'a>( ) -> Result { let mut wc = ProcessedWhereClause { terms: Vec::new() }; if let Some(w) = &select.where_clause { - wc.terms - .extend(split_constraint_to_terms(program, select, w, None)?); + split_constraint_to_terms(program, select, &mut wc, w, None)?; } for table in select.src_tables.iter() { @@ -122,9 +121,10 @@ pub fn process_where<'a>( } let join_info = table.join_info.unwrap(); if let Some(ast::JoinConstraint::On(expr)) = &join_info.constraint { - let terms = split_constraint_to_terms( + split_constraint_to_terms( program, select, + &mut wc, expr, if table.is_outer_join() { Some(&table.identifier) @@ -132,7 +132,6 @@ pub fn process_where<'a>( None }, )?; - wc.terms.extend(terms); } }