diff --git a/core/lib.rs b/core/lib.rs index 0ffdcd6e3..ee329b2f2 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -17,6 +17,7 @@ static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; use fallible_iterator::FallibleIterator; use log::trace; use schema::Schema; +use sqlite3_parser::ast; use sqlite3_parser::{ast::Cmd, lexer::sql::Parser}; use std::sync::Arc; use std::{cell::RefCell, rc::Rc}; @@ -27,6 +28,9 @@ use storage::sqlite3_ondisk::DatabaseHeader; #[cfg(feature = "fs")] use storage::wal::WalFile; +use translate::optimizer::optimize_plan; +use translate::planner::prepare_select_plan; + pub use error::LimboError; pub type Result = std::result::Result; @@ -173,7 +177,17 @@ impl Connection { program.explain(); Ok(None) } - Cmd::ExplainQueryPlan(_stmt) => Ok(None), + Cmd::ExplainQueryPlan(stmt) => { + match stmt { + ast::Stmt::Select(select) => { + let plan = prepare_select_plan(&self.schema, select)?; + let plan = optimize_plan(plan)?; + println!("{}", plan); + } + _ => todo!(), + } + Ok(None) + } } } else { Ok(None) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs new file mode 100644 index 000000000..cac0aafe3 --- /dev/null +++ b/core/translate/emitter.rs @@ -0,0 +1,898 @@ +use std::cell::RefCell; +use std::collections::HashMap; +use std::rc::Rc; +use std::usize; + +use crate::schema::{BTreeTable, Column, PseudoTable, Table}; +use crate::storage::sqlite3_ondisk::DatabaseHeader; +use crate::types::{OwnedRecord, OwnedValue}; +use crate::vdbe::builder::ProgramBuilder; +use crate::vdbe::{BranchOffset, Insn, Program}; +use crate::Result; + +use super::expr::{ + translate_aggregation, translate_condition_expr, translate_expr, translate_table_columns, + ConditionMetadata, +}; +use super::plan::Plan; +use super::plan::{Operator, ProjectionColumn}; + +/** + * The Emitter trait is used to emit bytecode instructions for a given operator in the query plan. + * + * - step: perform a single step of the operator, emitting bytecode instructions as needed, + and returning a result indicating whether the operator is ready to emit a result row +*/ +pub trait Emitter { + fn step( + &mut self, + pb: &mut ProgramBuilder, + m: &mut Metadata, + referenced_tables: &[(Rc, String)], + ) -> Result; + fn result_columns( + &self, + program: &mut ProgramBuilder, + referenced_tables: &[(Rc, String)], + metadata: &mut Metadata, + cursor_override: Option, + ) -> Result; + fn result_row( + &mut self, + program: &mut ProgramBuilder, + referenced_tables: &[(Rc, String)], + metadata: &mut Metadata, + cursor_override: Option, + ) -> Result<()>; +} + +#[derive(Debug)] +pub struct LeftJoinMetadata { + // integer register that holds a flag that is set to true if the current row has a match for the left join + pub match_flag_register: usize, + // label for the instruction that sets the match flag to true + pub set_match_flag_true_label: BranchOffset, + // label for the instruction that checks if the match flag is true + pub check_match_flag_label: BranchOffset, + // label for the instruction where the program jumps to if the current row has a match for the left join + pub on_match_jump_to_label: BranchOffset, +} + +#[derive(Debug)] +pub struct SortMetadata { + // cursor id for the Sorter table where the sorted rows are stored + pub sort_cursor: usize, + // cursor id for the Pseudo table where rows are temporarily inserted from the Sorter table + pub pseudo_table_cursor: usize, + // label where the SorterData instruction is emitted; SorterNext will jump here if there is more data to read + pub sorter_data_label: BranchOffset, + // label for the instruction immediately following SorterNext; SorterSort will jump here in case there is no data + pub done_label: BranchOffset, +} + +#[derive(Debug, Default)] +pub struct Metadata { + // labels for the instructions that terminate the execution when a conditional check evaluates to false. typically jumps to Halt, but can also jump to AggFinal if a parent in the tree is an aggregation + termination_labels: Vec, + // labels for the instructions that jump to the next row in the current operator. + // for example, in a join with two nested scans, the inner loop will jump to its Next instruction when the join condition is false; + // in a join with a scan and a seek, the seek will jump to the scan's Next instruction when the join condition is false. + next_row_labels: HashMap, + // labels for the Rewind instructions. + rewind_labels: Vec, + // mapping between Aggregation operator id and the register that holds the start of the aggregation result + aggregation_start_registers: HashMap, + // mapping between Order operator id and associated metadata + sorts: HashMap, + // mapping between Join operator id and associated metadata (for left joins only) + left_joins: HashMap, +} + +/** +* Emitters return one of three possible results from the step() method: +* - Continue: the operator is not yet ready to emit a result row +* - ReadyToEmit: the operator is ready to emit a result row +* - Done: the operator has completed execution +* For example, a Scan operator will return Continue until it has opened a cursor, rewound it and applied any predicates. +* At that point, it will return ReadyToEmit. +* Finally, when the Scan operator has emitted a Next instruction, it will return Done. +* +* Parent operators are free to make decisions based on the result a child operator's step() method. +* +* When the root operator of a Plan returns ReadyToEmit, a ResultRow will always be emitted. +* When the root operator returns Done, the bytecode plan is complete. +* + +*/ +#[derive(Debug, PartialEq)] +pub enum OpStepResult { + Continue, + ReadyToEmit, + Done, +} + +impl Emitter for Operator { + fn step( + &mut self, + program: &mut ProgramBuilder, + m: &mut Metadata, + referenced_tables: &[(Rc, String)], + ) -> Result { + match self { + Operator::Scan { + table, + table_identifier, + id, + step, + predicates, + .. + } => { + *step += 1; + const SCAN_OPEN_READ: usize = 1; + const SCAN_REWIND_AND_CONDITIONS: usize = 2; + const SCAN_NEXT: usize = 3; + match *step { + SCAN_OPEN_READ => { + let cursor_id = program.alloc_cursor_id( + Some(table_identifier.clone()), + Some(Table::BTree(table.clone())), + ); + let root_page = table.root_page; + let next_row_label = program.allocate_label(); + m.next_row_labels.insert(*id, next_row_label); + program.emit_insn(Insn::OpenReadAsync { + cursor_id, + root_page, + }); + program.emit_insn(Insn::OpenReadAwait); + + Ok(OpStepResult::Continue) + } + SCAN_REWIND_AND_CONDITIONS => { + let cursor_id = program.resolve_cursor_id(table_identifier, None); + program.emit_insn(Insn::RewindAsync { cursor_id }); + let rewind_label = program.allocate_label(); + let halt_label = m.termination_labels.last().unwrap(); + m.rewind_labels.push(rewind_label); + program.defer_label_resolution(rewind_label, program.offset() as usize); + program.emit_insn_with_label_dependency( + Insn::RewindAwait { + cursor_id, + pc_if_empty: *halt_label, + }, + *halt_label, + ); + + let jump_label = m.next_row_labels.get(id).unwrap_or(halt_label); + if let Some(preds) = predicates { + for expr in preds { + let jump_target_when_true = program.allocate_label(); + let condition_metadata = ConditionMetadata { + jump_if_condition_is_true: false, + jump_target_when_true, + jump_target_when_false: *jump_label, + }; + translate_condition_expr( + program, + referenced_tables, + expr, + None, + condition_metadata, + )?; + program.resolve_label(jump_target_when_true, program.offset()); + } + } + + Ok(OpStepResult::ReadyToEmit) + } + SCAN_NEXT => { + let cursor_id = program.resolve_cursor_id(table_identifier, None); + program + .resolve_label(*m.next_row_labels.get(id).unwrap(), program.offset()); + program.emit_insn(Insn::NextAsync { cursor_id }); + let jump_label = m.rewind_labels.pop().unwrap(); + program.emit_insn_with_label_dependency( + Insn::NextAwait { + cursor_id, + pc_if_next: jump_label, + }, + jump_label, + ); + Ok(OpStepResult::Done) + } + _ => Ok(OpStepResult::Done), + } + } + Operator::SeekRowid { + table, + table_identifier, + rowid_predicate, + predicates, + step, + id, + .. + } => { + *step += 1; + const SEEKROWID_OPEN_READ: usize = 1; + const SEEKROWID_SEEK_AND_CONDITIONS: usize = 2; + match *step { + SEEKROWID_OPEN_READ => { + let cursor_id = program.alloc_cursor_id( + Some(table_identifier.clone()), + Some(Table::BTree(table.clone())), + ); + let root_page = table.root_page; + program.emit_insn(Insn::OpenReadAsync { + cursor_id, + root_page, + }); + program.emit_insn(Insn::OpenReadAwait); + + Ok(OpStepResult::Continue) + } + SEEKROWID_SEEK_AND_CONDITIONS => { + let cursor_id = program.resolve_cursor_id(table_identifier, None); + let rowid_reg = program.alloc_register(); + translate_expr( + program, + Some(referenced_tables), + rowid_predicate, + rowid_reg, + None, + )?; + let jump_label = m + .next_row_labels + .get(id) + .unwrap_or(&m.termination_labels.last().unwrap()); + program.emit_insn_with_label_dependency( + Insn::SeekRowid { + cursor_id, + src_reg: rowid_reg, + target_pc: *jump_label, + }, + *jump_label, + ); + if let Some(predicates) = predicates { + for predicate in predicates.iter() { + let jump_target_when_true = program.allocate_label(); + let condition_metadata = ConditionMetadata { + jump_if_condition_is_true: false, + jump_target_when_true, + jump_target_when_false: *jump_label, + }; + translate_condition_expr( + program, + referenced_tables, + predicate, + None, + condition_metadata, + )?; + program.resolve_label(jump_target_when_true, program.offset()); + } + } + + Ok(OpStepResult::ReadyToEmit) + } + _ => Ok(OpStepResult::Done), + } + } + Operator::Join { + left, + right, + outer, + predicates, + step, + id, + .. + } => { + *step += 1; + const JOIN_INIT: usize = 1; + const JOIN_DO_JOIN: usize = 2; + const JOIN_END: usize = 3; + match *step { + JOIN_INIT => { + if *outer { + let lj_metadata = LeftJoinMetadata { + match_flag_register: program.alloc_register(), + set_match_flag_true_label: program.allocate_label(), + check_match_flag_label: program.allocate_label(), + on_match_jump_to_label: program.allocate_label(), + }; + m.left_joins.insert(*id, lj_metadata); + } + left.step(program, m, referenced_tables)?; + right.step(program, m, referenced_tables)?; + + Ok(OpStepResult::Continue) + } + JOIN_DO_JOIN => { + left.step(program, m, referenced_tables)?; + + let mut jump_target_when_false = *m + .next_row_labels + .get(&right.id()) + .or(m.next_row_labels.get(&left.id())) + .unwrap_or(&m.termination_labels.last().unwrap()); + + if *outer { + let lj_meta = m.left_joins.get(id).unwrap(); + program.emit_insn(Insn::Integer { + value: 0, + dest: lj_meta.match_flag_register, + }); + jump_target_when_false = lj_meta.check_match_flag_label; + } + m.next_row_labels.insert(right.id(), jump_target_when_false); + + right.step(program, m, referenced_tables)?; + + if let Some(predicates) = predicates { + let jump_target_when_true = program.allocate_label(); + let condition_metadata = ConditionMetadata { + jump_if_condition_is_true: false, + jump_target_when_true, + jump_target_when_false, + }; + for predicate in predicates.iter() { + translate_condition_expr( + program, + referenced_tables, + predicate, + None, + condition_metadata, + )?; + } + program.resolve_label(jump_target_when_true, program.offset()); + } + + if *outer { + let lj_meta = m.left_joins.get(id).unwrap(); + program.defer_label_resolution( + lj_meta.set_match_flag_true_label, + program.offset() as usize, + ); + program.emit_insn(Insn::Integer { + value: 1, + dest: lj_meta.match_flag_register, + }); + } + + Ok(OpStepResult::ReadyToEmit) + } + JOIN_END => { + right.step(program, m, referenced_tables)?; + + if *outer { + let lj_meta = m.left_joins.get(id).unwrap(); + // If the left join match flag has been set to 1, we jump to the next row on the outer table (result row has been emitted already) + program.resolve_label(lj_meta.check_match_flag_label, program.offset()); + program.emit_insn_with_label_dependency( + Insn::IfPos { + reg: lj_meta.match_flag_register, + target_pc: lj_meta.on_match_jump_to_label, + decrement_by: 0, + }, + lj_meta.on_match_jump_to_label, + ); + // If not, we set the right table cursor's "pseudo null bit" on, which means any Insn::Column will return NULL + let right_cursor_id = match right.as_ref() { + Operator::Scan { + table_identifier, .. + } => program.resolve_cursor_id(table_identifier, None), + Operator::SeekRowid { + table_identifier, .. + } => program.resolve_cursor_id(table_identifier, None), + _ => unreachable!(), + }; + program.emit_insn(Insn::NullRow { + cursor_id: right_cursor_id, + }); + // Jump to setting the left join match flag to 1 again, but this time the right table cursor will set everything to null + program.emit_insn_with_label_dependency( + Insn::Goto { + target_pc: lj_meta.set_match_flag_true_label, + }, + lj_meta.set_match_flag_true_label, + ); + // This points to the NextAsync instruction of the left table + program.resolve_label(lj_meta.on_match_jump_to_label, program.offset()); + } + left.step(program, m, referenced_tables)?; + + Ok(OpStepResult::Done) + } + _ => Ok(OpStepResult::Done), + } + } + Operator::Aggregate { + id, + source, + aggregates, + step, + } => { + *step += 1; + const AGGREGATE_INIT: usize = 1; + const AGGREGATE_WAIT_UNTIL_SOURCE_READY: usize = 2; + match *step { + AGGREGATE_INIT => { + let agg_final_label = program.allocate_label(); + m.termination_labels.push(agg_final_label); + let num_aggs = aggregates.len(); + let start_reg = program.alloc_registers(num_aggs); + m.aggregation_start_registers.insert(*id, start_reg); + + Ok(OpStepResult::Continue) + } + AGGREGATE_WAIT_UNTIL_SOURCE_READY => loop { + match source.step(program, m, referenced_tables)? { + OpStepResult::Continue => {} + OpStepResult::ReadyToEmit => { + let start_reg = m.aggregation_start_registers.get(id).unwrap(); + for (i, agg) in aggregates.iter().enumerate() { + let agg_result_reg = start_reg + i; + translate_aggregation( + program, + referenced_tables, + agg, + agg_result_reg, + None, + )?; + } + } + OpStepResult::Done => { + return Ok(OpStepResult::ReadyToEmit); + } + } + }, + _ => Ok(OpStepResult::Done), + } + } + Operator::Filter { .. } => unreachable!("predicates have been pushed down"), + Operator::Limit { source, step, .. } => { + *step += 1; + loop { + match source.step(program, m, referenced_tables)? { + OpStepResult::Continue => continue, + OpStepResult::ReadyToEmit => { + return Ok(OpStepResult::ReadyToEmit); + } + OpStepResult::Done => return Ok(OpStepResult::Done), + } + } + } + Operator::Order { + id, + source, + key, + step, + } => { + *step += 1; + const ORDER_INIT: usize = 1; + const ORDER_INSERT_INTO_SORTER: usize = 2; + const ORDER_SORT_AND_OPEN_LOOP: usize = 3; + const ORDER_NEXT: usize = 4; + match *step { + ORDER_INIT => { + let sort_cursor = program.alloc_cursor_id(None, None); + m.sorts.insert( + *id, + SortMetadata { + sort_cursor, + pseudo_table_cursor: usize::MAX, // will be set later + sorter_data_label: program.allocate_label(), + done_label: program.allocate_label(), + }, + ); + let mut order = Vec::new(); + for (_, direction) in key.iter() { + order.push(OwnedValue::Integer(*direction as i64)); + } + program.emit_insn(Insn::SorterOpen { + cursor_id: sort_cursor, + columns: key.len(), + order: OwnedRecord::new(order), + }); + + loop { + match source.step(program, m, referenced_tables)? { + OpStepResult::Continue => continue, + OpStepResult::ReadyToEmit => { + return Ok(OpStepResult::Continue); + } + OpStepResult::Done => { + return Ok(OpStepResult::Done); + } + } + } + } + ORDER_INSERT_INTO_SORTER => { + let sort_keys_count = key.len(); + let source_cols_count = source.column_count(referenced_tables); + let start_reg = program.alloc_registers(sort_keys_count); + for (i, (expr, _)) in key.iter().enumerate() { + let key_reg = start_reg + i; + translate_expr(program, Some(referenced_tables), expr, key_reg, None)?; + } + source.result_columns(program, referenced_tables, m, None)?; + + let dest = program.alloc_register(); + program.emit_insn(Insn::MakeRecord { + start_reg, + count: sort_keys_count + source_cols_count, + dest_reg: dest, + }); + + let sort_metadata = m.sorts.get_mut(id).unwrap(); + program.emit_insn(Insn::SorterInsert { + cursor_id: sort_metadata.sort_cursor, + record_reg: dest, + }); + + Ok(OpStepResult::Continue) + } + ORDER_SORT_AND_OPEN_LOOP => { + loop { + match source.step(program, m, referenced_tables)? { + OpStepResult::Done => { + break; + } + _ => unreachable!(), + } + } + let column_names = source.column_names(); + let pseudo_columns = column_names + .iter() + .map(|name| Column { + name: name.clone(), + primary_key: false, + ty: crate::schema::Type::Null, + }) + .collect::>(); + + let pseudo_cursor = program.alloc_cursor_id( + None, + Some(Table::Pseudo(Rc::new(PseudoTable { + columns: pseudo_columns, + }))), + ); + + let pseudo_content_reg = program.alloc_register(); + program.emit_insn(Insn::OpenPseudo { + cursor_id: pseudo_cursor, + content_reg: pseudo_content_reg, + num_fields: key.len() + source.column_count(referenced_tables), + }); + + let sort_metadata = m.sorts.get(id).unwrap(); + program.emit_insn_with_label_dependency( + Insn::SorterSort { + cursor_id: sort_metadata.sort_cursor, + pc_if_empty: sort_metadata.done_label, + }, + sort_metadata.done_label, + ); + + program.defer_label_resolution( + sort_metadata.sorter_data_label, + program.offset() as usize, + ); + program.emit_insn(Insn::SorterData { + cursor_id: sort_metadata.sort_cursor, + dest_reg: pseudo_content_reg, + pseudo_cursor, + }); + + let sort_metadata = m.sorts.get_mut(id).unwrap(); + + sort_metadata.pseudo_table_cursor = pseudo_cursor; + + Ok(OpStepResult::ReadyToEmit) + } + ORDER_NEXT => { + let sort_metadata = m.sorts.get(id).unwrap(); + program.emit_insn_with_label_dependency( + Insn::SorterNext { + cursor_id: sort_metadata.sort_cursor, + pc_if_next: sort_metadata.sorter_data_label, + }, + sort_metadata.sorter_data_label, + ); + + program.resolve_label(sort_metadata.done_label, program.offset()); + + Ok(OpStepResult::Done) + } + _ => unreachable!(), + } + } + Operator::Projection { source, step, .. } => { + *step += 1; + const PROJECTION_WAIT_UNTIL_SOURCE_READY: usize = 1; + const PROJECTION_FINALIZE_SOURCE: usize = 2; + match *step { + PROJECTION_WAIT_UNTIL_SOURCE_READY => loop { + match source.step(program, m, referenced_tables)? { + OpStepResult::Continue => continue, + OpStepResult::ReadyToEmit | OpStepResult::Done => { + return Ok(OpStepResult::ReadyToEmit); + } + } + }, + PROJECTION_FINALIZE_SOURCE => { + match source.step(program, m, referenced_tables)? { + OpStepResult::Done => { + return Ok(OpStepResult::Done); + } + _ => unreachable!(), + } + } + _ => Ok(OpStepResult::Done), + } + } + Operator::Nothing => Ok(OpStepResult::Done), + } + } + fn result_columns( + &self, + program: &mut ProgramBuilder, + referenced_tables: &[(Rc, String)], + m: &mut Metadata, + cursor_override: Option, + ) -> Result { + let col_count = self.column_count(referenced_tables); + match self { + Operator::Scan { + table, + table_identifier, + .. + } => { + let start_reg = program.alloc_registers(col_count); + translate_table_columns( + program, + table, + table_identifier, + cursor_override, + start_reg, + ); + + Ok(start_reg) + } + Operator::Join { left, right, .. } => { + let left_start_reg = + left.result_columns(program, referenced_tables, m, cursor_override)?; + right.result_columns(program, referenced_tables, m, cursor_override)?; + + Ok(left_start_reg) + } + Operator::Aggregate { id, aggregates, .. } => { + let start_reg = m.aggregation_start_registers.get(id).unwrap(); + for (i, agg) in aggregates.iter().enumerate() { + let agg_result_reg = *start_reg + i; + program.emit_insn(Insn::AggFinal { + register: agg_result_reg, + func: agg.func.clone(), + }); + } + + Ok(*start_reg) + } + Operator::Filter { .. } => unreachable!("predicates have been pushed down"), + Operator::SeekRowid { + table_identifier, + table, + .. + } => { + let start_reg = program.alloc_registers(col_count); + translate_table_columns( + program, + table, + table_identifier, + cursor_override, + start_reg, + ); + + Ok(start_reg) + } + Operator::Limit { .. } => { + unimplemented!() + } + Operator::Order { + id, source, key, .. + } => { + let sort_metadata = m.sorts.get(id).unwrap(); + let cursor_override = Some(sort_metadata.sort_cursor); + let sort_keys_count = key.len(); + let start_reg = program.alloc_registers(sort_keys_count); + for (i, (expr, _)) in key.iter().enumerate() { + let key_reg = start_reg + i; + translate_expr( + program, + Some(referenced_tables), + expr, + key_reg, + cursor_override, + )?; + } + source.result_columns(program, referenced_tables, m, cursor_override)?; + + Ok(start_reg) + } + Operator::Projection { expressions, .. } => { + let expr_count = expressions + .iter() + .map(|e| e.column_count(referenced_tables)) + .sum(); + let start_reg = program.alloc_registers(expr_count); + let mut cur_reg = start_reg; + for expr in expressions { + match expr { + ProjectionColumn::Column(expr) => { + translate_expr( + program, + Some(referenced_tables), + expr, + cur_reg, + cursor_override, + )?; + cur_reg += 1; + } + ProjectionColumn::Star => { + for (table, table_identifier) in referenced_tables.iter() { + cur_reg = translate_table_columns( + program, + table, + table_identifier, + cursor_override, + cur_reg, + ); + } + } + ProjectionColumn::TableStar(_, table_identifier) => { + let (table, table_identifier) = referenced_tables + .iter() + .find(|(_, id)| id == table_identifier) + .unwrap(); + cur_reg = translate_table_columns( + program, + table, + table_identifier, + cursor_override, + cur_reg, + ); + } + } + } + + Ok(start_reg) + } + Operator::Nothing => unimplemented!(), + } + } + fn result_row( + &mut self, + program: &mut ProgramBuilder, + referenced_tables: &[(Rc, String)], + m: &mut Metadata, + cursor_override: Option, + ) -> Result<()> { + match self { + Operator::Order { id, source, .. } => { + let sort_metadata = m.sorts.get(id).unwrap(); + source.result_row( + program, + referenced_tables, + m, + Some(sort_metadata.pseudo_table_cursor), + )?; + + Ok(()) + } + Operator::Limit { source, limit, .. } => { + source.result_row(program, referenced_tables, m, cursor_override)?; + let limit_reg = program.alloc_register(); + program.emit_insn(Insn::Integer { + value: *limit as i64, + dest: limit_reg, + }); + program.mark_last_insn_constant(); + let jump_label = m.termination_labels.last().unwrap(); + program.emit_insn_with_label_dependency( + Insn::DecrJumpZero { + reg: limit_reg, + target_pc: *jump_label, + }, + *jump_label, + ); + + Ok(()) + } + operator => { + let start_reg = + operator.result_columns(program, referenced_tables, m, cursor_override)?; + program.emit_insn(Insn::ResultRow { + start_reg, + count: operator.column_count(referenced_tables), + }); + Ok(()) + } + } + } +} + +fn prologue() -> Result<( + ProgramBuilder, + Metadata, + BranchOffset, + BranchOffset, + BranchOffset, +)> { + let mut program = ProgramBuilder::new(); + let init_label = program.allocate_label(); + let halt_label = program.allocate_label(); + + program.emit_insn_with_label_dependency( + Insn::Init { + target_pc: init_label, + }, + init_label, + ); + + let start_offset = program.offset(); + + let metadata = Metadata { + termination_labels: vec![halt_label], + ..Default::default() + }; + + Ok((program, metadata, init_label, halt_label, start_offset)) +} + +fn epilogue( + program: &mut ProgramBuilder, + init_label: BranchOffset, + halt_label: BranchOffset, + start_offset: BranchOffset, +) -> Result<()> { + program.resolve_label(halt_label, program.offset()); + program.emit_insn(Insn::Halt); + + program.resolve_label(init_label, program.offset()); + program.emit_insn(Insn::Transaction); + + program.emit_constant_insns(); + program.emit_insn(Insn::Goto { + target_pc: start_offset, + }); + + program.resolve_deferred_labels(); + + Ok(()) +} + +pub fn emit_program( + database_header: Rc>, + mut plan: Plan, +) -> Result { + let (mut program, mut metadata, init_label, halt_label, start_offset) = prologue()?; + + loop { + match plan + .root_operator + .step(&mut program, &mut metadata, &plan.referenced_tables)? + { + OpStepResult::Continue => {} + OpStepResult::ReadyToEmit => { + plan.root_operator.result_row( + &mut program, + &plan.referenced_tables, + &mut metadata, + None, + )?; + } + OpStepResult::Done => { + epilogue(&mut program, init_label, halt_label, start_offset)?; + return Ok(program.build(database_header)); + } + } + } +} diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 71a1c29eb..c5d70e264 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1,19 +1,518 @@ -use core::panic; - use crate::{function::JsonFunc, Result}; -use sqlite3_parser::ast::{self, Expr, UnaryOperator}; +use sqlite3_parser::ast::{self, UnaryOperator}; +use std::rc::Rc; +use crate::function::{AggFunc, Func, ScalarFunc}; +use crate::schema::Type; +use crate::util::normalize_ident; use crate::{ - function::{Func, ScalarFunc}, - schema::{Table, Type}, - translate::select::{ColumnInfo, Select, SrcTable}, - util::normalize_ident, + schema::BTreeTable, vdbe::{builder::ProgramBuilder, BranchOffset, Insn}, }; +use super::plan::Aggregate; + +#[derive(Default, Debug, Clone, Copy)] +pub struct ConditionMetadata { + pub jump_if_condition_is_true: bool, + pub jump_target_when_true: BranchOffset, + pub jump_target_when_false: BranchOffset, +} + +pub fn translate_condition_expr( + program: &mut ProgramBuilder, + referenced_tables: &[(Rc, String)], + expr: &ast::Expr, + cursor_hint: Option, + condition_metadata: ConditionMetadata, +) -> Result<()> { + match expr { + ast::Expr::Between { .. } => todo!(), + ast::Expr::Binary(lhs, ast::Operator::And, rhs) => { + // In a binary AND, never jump to the 'jump_target_when_true' label on the first condition, because + // the second condition must also be true. + let _ = translate_condition_expr( + program, + referenced_tables, + lhs, + cursor_hint, + ConditionMetadata { + jump_if_condition_is_true: false, + ..condition_metadata + }, + ); + let _ = translate_condition_expr( + program, + referenced_tables, + rhs, + cursor_hint, + condition_metadata, + ); + } + ast::Expr::Binary(lhs, ast::Operator::Or, rhs) => { + let jump_target_when_false = program.allocate_label(); + let _ = translate_condition_expr( + program, + referenced_tables, + lhs, + cursor_hint, + ConditionMetadata { + // If the first condition is true, we don't need to evaluate the second condition. + jump_if_condition_is_true: true, + jump_target_when_false, + ..condition_metadata + }, + ); + program.resolve_label(jump_target_when_false, program.offset()); + let _ = translate_condition_expr( + program, + referenced_tables, + rhs, + cursor_hint, + condition_metadata, + ); + } + ast::Expr::Binary(lhs, op, rhs) => { + let lhs_reg = program.alloc_register(); + let rhs_reg = program.alloc_register(); + let _ = translate_expr(program, Some(referenced_tables), lhs, lhs_reg, cursor_hint); + match lhs.as_ref() { + ast::Expr::Literal(_) => program.mark_last_insn_constant(), + _ => {} + } + let _ = translate_expr(program, Some(referenced_tables), rhs, rhs_reg, cursor_hint); + match rhs.as_ref() { + ast::Expr::Literal(_) => program.mark_last_insn_constant(), + _ => {} + } + match op { + ast::Operator::Greater => { + if condition_metadata.jump_if_condition_is_true { + program.emit_insn_with_label_dependency( + Insn::Gt { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: condition_metadata.jump_target_when_true, + }, + condition_metadata.jump_target_when_true, + ) + } else { + program.emit_insn_with_label_dependency( + Insn::Le { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: condition_metadata.jump_target_when_false, + }, + condition_metadata.jump_target_when_false, + ) + } + } + ast::Operator::GreaterEquals => { + if condition_metadata.jump_if_condition_is_true { + program.emit_insn_with_label_dependency( + Insn::Ge { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: condition_metadata.jump_target_when_true, + }, + condition_metadata.jump_target_when_true, + ) + } else { + program.emit_insn_with_label_dependency( + Insn::Lt { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: condition_metadata.jump_target_when_false, + }, + condition_metadata.jump_target_when_false, + ) + } + } + ast::Operator::Less => { + if condition_metadata.jump_if_condition_is_true { + program.emit_insn_with_label_dependency( + Insn::Lt { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: condition_metadata.jump_target_when_true, + }, + condition_metadata.jump_target_when_true, + ) + } else { + program.emit_insn_with_label_dependency( + Insn::Ge { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: condition_metadata.jump_target_when_false, + }, + condition_metadata.jump_target_when_false, + ) + } + } + ast::Operator::LessEquals => { + if condition_metadata.jump_if_condition_is_true { + program.emit_insn_with_label_dependency( + Insn::Le { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: condition_metadata.jump_target_when_true, + }, + condition_metadata.jump_target_when_true, + ) + } else { + program.emit_insn_with_label_dependency( + Insn::Gt { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: condition_metadata.jump_target_when_false, + }, + condition_metadata.jump_target_when_false, + ) + } + } + ast::Operator::Equals => { + if condition_metadata.jump_if_condition_is_true { + program.emit_insn_with_label_dependency( + Insn::Eq { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: condition_metadata.jump_target_when_true, + }, + condition_metadata.jump_target_when_true, + ) + } else { + program.emit_insn_with_label_dependency( + Insn::Ne { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: condition_metadata.jump_target_when_false, + }, + condition_metadata.jump_target_when_false, + ) + } + } + ast::Operator::NotEquals => { + if condition_metadata.jump_if_condition_is_true { + program.emit_insn_with_label_dependency( + Insn::Ne { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: condition_metadata.jump_target_when_true, + }, + condition_metadata.jump_target_when_true, + ) + } else { + program.emit_insn_with_label_dependency( + Insn::Eq { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: condition_metadata.jump_target_when_false, + }, + condition_metadata.jump_target_when_false, + ) + } + } + ast::Operator::Is => todo!(), + ast::Operator::IsNot => todo!(), + _ => { + todo!("op {:?} not implemented", op); + } + } + } + ast::Expr::Literal(lit) => match lit { + ast::Literal::Numeric(val) => { + let maybe_int = val.parse::(); + if let Ok(int_value) = maybe_int { + let reg = program.alloc_register(); + program.emit_insn(Insn::Integer { + value: int_value, + dest: reg, + }); + if condition_metadata.jump_if_condition_is_true { + program.emit_insn_with_label_dependency( + Insn::If { + reg, + target_pc: condition_metadata.jump_target_when_true, + null_reg: reg, + }, + condition_metadata.jump_target_when_true, + ) + } else { + program.emit_insn_with_label_dependency( + Insn::IfNot { + reg, + target_pc: condition_metadata.jump_target_when_false, + null_reg: reg, + }, + condition_metadata.jump_target_when_false, + ) + } + } else { + crate::bail_parse_error!("unsupported literal type in condition"); + } + } + ast::Literal::String(string) => { + let reg = program.alloc_register(); + program.emit_insn(Insn::String8 { + value: string.clone(), + dest: reg, + }); + if condition_metadata.jump_if_condition_is_true { + program.emit_insn_with_label_dependency( + Insn::If { + reg, + target_pc: condition_metadata.jump_target_when_true, + null_reg: reg, + }, + condition_metadata.jump_target_when_true, + ) + } else { + program.emit_insn_with_label_dependency( + Insn::IfNot { + reg, + target_pc: condition_metadata.jump_target_when_false, + null_reg: reg, + }, + condition_metadata.jump_target_when_false, + ) + } + } + unimpl => todo!("literal {:?} not implemented", unimpl), + }, + ast::Expr::InList { lhs, not, rhs } => { + // lhs is e.g. a column reference + // rhs is an Option> + // If rhs is None, it means the IN expression is always false, i.e. tbl.id IN (). + // If rhs is Some, it means the IN expression has a list of values to compare against, e.g. tbl.id IN (1, 2, 3). + // + // The IN expression is equivalent to a series of OR expressions. + // For example, `a IN (1, 2, 3)` is equivalent to `a = 1 OR a = 2 OR a = 3`. + // The NOT IN expression is equivalent to a series of AND expressions. + // For example, `a NOT IN (1, 2, 3)` is equivalent to `a != 1 AND a != 2 AND a != 3`. + // + // SQLite typically optimizes IN expressions to use a binary search on an ephemeral index if there are many values. + // For now we don't have the plumbing to do that, so we'll just emit a series of comparisons, + // which is what SQLite also does for small lists of values. + // TODO: Let's refactor this later to use a more efficient implementation conditionally based on the number of values. + + if rhs.is_none() { + // If rhs is None, IN expressions are always false and NOT IN expressions are always true. + if *not { + // On a trivially true NOT IN () expression we can only jump to the 'jump_target_when_true' label if 'jump_if_condition_is_true'; otherwise me must fall through. + // This is because in a more complex condition we might need to evaluate the rest of the condition. + // Note that we are already breaking up our WHERE clauses into a series of terms at "AND" boundaries, so right now we won't be running into cases where jumping on true would be incorrect, + // but once we have e.g. parenthesization and more complex conditions, not having this 'if' here would introduce a bug. + if condition_metadata.jump_if_condition_is_true { + program.emit_insn_with_label_dependency( + Insn::Goto { + target_pc: condition_metadata.jump_target_when_true, + }, + condition_metadata.jump_target_when_true, + ); + } + } else { + program.emit_insn_with_label_dependency( + Insn::Goto { + target_pc: condition_metadata.jump_target_when_false, + }, + condition_metadata.jump_target_when_false, + ); + } + return Ok(()); + } + + // The left hand side only needs to be evaluated once we have a list of values to compare against. + let lhs_reg = program.alloc_register(); + let _ = translate_expr(program, Some(referenced_tables), lhs, lhs_reg, cursor_hint)?; + + let rhs = rhs.as_ref().unwrap(); + + // The difference between a local jump and an "upper level" jump is that for example in this case: + // WHERE foo IN (1,2,3) OR bar = 5, + // we can immediately jump to the 'jump_target_when_true' label of the ENTIRE CONDITION if foo = 1, foo = 2, or foo = 3 without evaluating the bar = 5 condition. + // This is why in Binary-OR expressions we set jump_if_condition_is_true to true for the first condition. + // However, in this example: + // WHERE foo IN (1,2,3) AND bar = 5, + // we can't jump to the 'jump_target_when_true' label of the entire condition foo = 1, foo = 2, or foo = 3, because we still need to evaluate the bar = 5 condition later. + // This is why in that case we just jump over the rest of the IN conditions in this "local" branch which evaluates the IN condition. + let jump_target_when_true = if condition_metadata.jump_if_condition_is_true { + condition_metadata.jump_target_when_true + } else { + program.allocate_label() + }; + + if !*not { + // If it's an IN expression, we need to jump to the 'jump_target_when_true' label if any of the conditions are true. + for (i, expr) in rhs.iter().enumerate() { + let rhs_reg = program.alloc_register(); + let last_condition = i == rhs.len() - 1; + let _ = translate_expr( + program, + Some(referenced_tables), + expr, + rhs_reg, + cursor_hint, + )?; + // If this is not the last condition, we need to jump to the 'jump_target_when_true' label if the condition is true. + if !last_condition { + program.emit_insn_with_label_dependency( + Insn::Eq { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: jump_target_when_true, + }, + jump_target_when_true, + ); + } else { + // If this is the last condition, we need to jump to the 'jump_target_when_false' label if there is no match. + program.emit_insn_with_label_dependency( + Insn::Ne { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: condition_metadata.jump_target_when_false, + }, + condition_metadata.jump_target_when_false, + ); + } + } + // If we got here, then the last condition was a match, so we jump to the 'jump_target_when_true' label if 'jump_if_condition_is_true'. + // If not, we can just fall through without emitting an unnecessary instruction. + if condition_metadata.jump_if_condition_is_true { + program.emit_insn_with_label_dependency( + Insn::Goto { + target_pc: condition_metadata.jump_target_when_true, + }, + condition_metadata.jump_target_when_true, + ); + } + } else { + // If it's a NOT IN expression, we need to jump to the 'jump_target_when_false' label if any of the conditions are true. + for expr in rhs.iter() { + let rhs_reg = program.alloc_register(); + let _ = translate_expr( + program, + Some(referenced_tables), + expr, + rhs_reg, + cursor_hint, + )?; + program.emit_insn_with_label_dependency( + Insn::Eq { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: condition_metadata.jump_target_when_false, + }, + condition_metadata.jump_target_when_false, + ); + } + // If we got here, then none of the conditions were a match, so we jump to the 'jump_target_when_true' label if 'jump_if_condition_is_true'. + // If not, we can just fall through without emitting an unnecessary instruction. + if condition_metadata.jump_if_condition_is_true { + program.emit_insn_with_label_dependency( + Insn::Goto { + target_pc: condition_metadata.jump_target_when_true, + }, + condition_metadata.jump_target_when_true, + ); + } + } + + if !condition_metadata.jump_if_condition_is_true { + program.resolve_label(jump_target_when_true, program.offset()); + } + } + ast::Expr::Like { + lhs, + not, + op, + rhs, + escape: _, + } => { + let cur_reg = program.alloc_register(); + assert!(match rhs.as_ref() { + ast::Expr::Literal(_) => true, + _ => false, + }); + match op { + ast::LikeOperator::Like => { + let pattern_reg = program.alloc_register(); + let column_reg = program.alloc_register(); + // LIKE(pattern, column). We should translate the pattern first before the column + let _ = translate_expr( + program, + Some(referenced_tables), + rhs, + pattern_reg, + cursor_hint, + )?; + program.mark_last_insn_constant(); + let _ = translate_expr( + program, + Some(referenced_tables), + lhs, + column_reg, + cursor_hint, + )?; + program.emit_insn(Insn::Function { + func: crate::vdbe::Func::Scalar(ScalarFunc::Like), + start_reg: pattern_reg, + dest: cur_reg, + }); + } + ast::LikeOperator::Glob => todo!(), + ast::LikeOperator::Match => todo!(), + ast::LikeOperator::Regexp => todo!(), + } + if !*not { + if condition_metadata.jump_if_condition_is_true { + program.emit_insn_with_label_dependency( + Insn::If { + reg: cur_reg, + target_pc: condition_metadata.jump_target_when_true, + null_reg: cur_reg, + }, + condition_metadata.jump_target_when_true, + ); + } else { + program.emit_insn_with_label_dependency( + Insn::IfNot { + reg: cur_reg, + target_pc: condition_metadata.jump_target_when_false, + null_reg: cur_reg, + }, + condition_metadata.jump_target_when_false, + ); + } + } else { + if condition_metadata.jump_if_condition_is_true { + program.emit_insn_with_label_dependency( + Insn::IfNot { + reg: cur_reg, + target_pc: condition_metadata.jump_target_when_true, + null_reg: cur_reg, + }, + condition_metadata.jump_target_when_true, + ); + } else { + program.emit_insn_with_label_dependency( + Insn::If { + reg: cur_reg, + target_pc: condition_metadata.jump_target_when_false, + null_reg: cur_reg, + }, + condition_metadata.jump_target_when_false, + ); + } + } + } + _ => todo!("op {:?} not implemented", expr), + } + Ok(()) +} + pub fn translate_expr( program: &mut ProgramBuilder, - select: Option<&Select>, + referenced_tables: Option<&[(Rc, String)]>, expr: &ast::Expr, target_register: usize, cursor_hint: Option, @@ -23,8 +522,8 @@ pub fn translate_expr( ast::Expr::Binary(e1, op, e2) => { let e1_reg = program.alloc_register(); let e2_reg = program.alloc_register(); - let _ = translate_expr(program, select, e1, e1_reg, cursor_hint)?; - let _ = translate_expr(program, select, e2, e2_reg, cursor_hint)?; + let _ = translate_expr(program, referenced_tables, e1, e1_reg, cursor_hint)?; + let _ = translate_expr(program, referenced_tables, e2, e2_reg, cursor_hint)?; match op { ast::Operator::NotEquals => { @@ -136,6 +635,7 @@ pub fn translate_expr( Some(Func::Agg(_)) => { crate::bail_parse_error!("aggregation function in non-aggregation context") } + Some(Func::Json(j)) => match j { JsonFunc::JSON => { let args = if let Some(args) = args { @@ -153,7 +653,7 @@ pub fn translate_expr( ); }; let regs = program.alloc_register(); - translate_expr(program, select, &args[0], regs, cursor_hint)?; + translate_expr(program, referenced_tables, &args[0], regs, cursor_hint)?; program.emit_insn(Insn::Function { start_reg: regs, dest: target_register, @@ -169,7 +669,7 @@ pub fn translate_expr( for arg in args.iter() { let reg = program.alloc_register(); - translate_expr(program, select, arg, reg, cursor_hint)?; + translate_expr(program, referenced_tables, arg, reg, cursor_hint)?; } program.emit_insn(Insn::Function { @@ -201,7 +701,7 @@ pub fn translate_expr( for (index, arg) in args.iter().enumerate() { let reg = translate_expr( program, - select, + referenced_tables, arg, target_register, cursor_hint, @@ -231,7 +731,7 @@ pub fn translate_expr( }; for arg in args.iter() { let reg = program.alloc_register(); - translate_expr(program, select, arg, reg, cursor_hint)?; + translate_expr(program, referenced_tables, arg, reg, cursor_hint)?; } program.emit_insn(Insn::Function { start_reg: target_register, @@ -254,13 +754,25 @@ pub fn translate_expr( }; let temp_reg = program.alloc_register(); - translate_expr(program, select, &args[0], temp_reg, cursor_hint)?; + translate_expr( + program, + referenced_tables, + &args[0], + temp_reg, + cursor_hint, + )?; program.emit_insn(Insn::NotNull { reg: temp_reg, target_pc: program.offset() + 2, }); - translate_expr(program, select, &args[1], temp_reg, cursor_hint)?; + translate_expr( + program, + referenced_tables, + &args[1], + temp_reg, + cursor_hint, + )?; program.emit_insn(Insn::Copy { src_reg: temp_reg, dst_reg: target_register, @@ -286,9 +798,16 @@ pub fn translate_expr( }; for arg in args { let reg = program.alloc_register(); - let _ = translate_expr(program, select, arg, reg, cursor_hint)?; - if let ast::Expr::Literal(_) = arg { - program.mark_last_insn_constant() + let _ = translate_expr( + program, + referenced_tables, + arg, + reg, + cursor_hint, + )?; + match arg { + ast::Expr::Literal(_) => program.mark_last_insn_constant(), + _ => {} } } program.emit_insn(Insn::Function { @@ -320,7 +839,13 @@ pub fn translate_expr( }; let regs = program.alloc_register(); - translate_expr(program, select, &args[0], regs, cursor_hint)?; + translate_expr( + program, + referenced_tables, + &args[0], + regs, + cursor_hint, + )?; program.emit_insn(Insn::Function { start_reg: regs, dest: target_register, @@ -352,7 +877,7 @@ pub fn translate_expr( let arg_reg = program.alloc_register(); let _ = translate_expr( program, - select, + referenced_tables, &args[0], arg_reg, cursor_hint, @@ -361,9 +886,9 @@ pub fn translate_expr( } } program.emit_insn(Insn::Function { - start_reg, + start_reg: start_reg, dest: target_register, - func: crate::vdbe::Func::Scalar(srf), + func: crate::vdbe::Func::Scalar(ScalarFunc::Date), }); Ok(target_register) } @@ -387,10 +912,28 @@ pub fn translate_expr( let start_reg = program.alloc_register(); let length_reg = program.alloc_register(); - translate_expr(program, select, &args[0], str_reg, cursor_hint)?; - translate_expr(program, select, &args[1], start_reg, cursor_hint)?; + translate_expr( + program, + referenced_tables, + &args[0], + str_reg, + cursor_hint, + )?; + translate_expr( + program, + referenced_tables, + &args[1], + start_reg, + cursor_hint, + )?; if args.len() == 3 { - translate_expr(program, select, &args[2], length_reg, cursor_hint)?; + translate_expr( + program, + referenced_tables, + &args[2], + length_reg, + cursor_hint, + )?; } program.emit_insn(Insn::Function { @@ -410,7 +953,7 @@ pub fn translate_expr( let arg_reg = program.alloc_register(); let _ = translate_expr( program, - select, + referenced_tables, &args[0], arg_reg, cursor_hint, @@ -419,7 +962,7 @@ pub fn translate_expr( } } program.emit_insn(Insn::Function { - start_reg, + start_reg: start_reg, dest: target_register, func: crate::vdbe::Func::Scalar(ScalarFunc::Time), }); @@ -446,7 +989,7 @@ pub fn translate_expr( for arg in args.iter() { let reg = program.alloc_register(); - translate_expr(program, select, arg, reg, cursor_hint)?; + translate_expr(program, referenced_tables, arg, reg, cursor_hint)?; if let ast::Expr::Literal(_) = arg { program.mark_last_insn_constant(); } @@ -460,7 +1003,7 @@ pub fn translate_expr( } ScalarFunc::Min => { let args = if let Some(args) = args { - if args.is_empty() { + if args.len() < 1 { crate::bail_parse_error!( "min function with less than one argument" ); @@ -471,22 +1014,29 @@ pub fn translate_expr( }; for arg in args { let reg = program.alloc_register(); - let _ = translate_expr(program, select, arg, reg, cursor_hint)?; - if let ast::Expr::Literal(_) = arg { - program.mark_last_insn_constant() + let _ = translate_expr( + program, + referenced_tables, + arg, + reg, + cursor_hint, + )?; + match arg { + ast::Expr::Literal(_) => program.mark_last_insn_constant(), + _ => {} } } program.emit_insn(Insn::Function { start_reg: target_register + 1, dest: target_register, - func: crate::vdbe::Func::Scalar(srf), + func: crate::vdbe::Func::Scalar(ScalarFunc::Min), }); Ok(target_register) } ScalarFunc::Max => { let args = if let Some(args) = args { - if args.is_empty() { + if args.len() < 1 { crate::bail_parse_error!( "max function with less than one argument" ); @@ -497,16 +1047,23 @@ pub fn translate_expr( }; for arg in args { let reg = program.alloc_register(); - let _ = translate_expr(program, select, arg, reg, cursor_hint)?; - if let ast::Expr::Literal(_) = arg { - program.mark_last_insn_constant() + let _ = translate_expr( + program, + referenced_tables, + arg, + reg, + cursor_hint, + )?; + match arg { + ast::Expr::Literal(_) => program.mark_last_insn_constant(), + _ => {} } } program.emit_insn(Insn::Function { start_reg: target_register + 1, dest: target_register, - func: crate::vdbe::Func::Scalar(srf), + func: crate::vdbe::Func::Scalar(ScalarFunc::Max), }); Ok(target_register) } @@ -521,7 +1078,7 @@ pub fn translate_expr( ast::Expr::Id(ident) => { // let (idx, col) = table.unwrap().get_column(&ident.0).unwrap(); let (idx, col_type, cursor_id, is_rowid_alias) = - resolve_ident_table(program, &ident.0, select, cursor_hint)?; + resolve_ident_table(program, &ident.0, referenced_tables, cursor_hint)?; if is_rowid_alias { program.emit_insn(Insn::RowId { cursor_id, @@ -582,8 +1139,13 @@ pub fn translate_expr( ast::Expr::NotNull(_) => todo!(), ast::Expr::Parenthesized(_) => todo!(), ast::Expr::Qualified(tbl, ident) => { - let (idx, col_type, cursor_id, is_primary_key) = - resolve_ident_qualified(program, &tbl.0, &ident.0, select.unwrap(), cursor_hint)?; + let (idx, col_type, cursor_id, is_primary_key) = resolve_ident_qualified( + program, + &tbl.0, + &ident.0, + referenced_tables.unwrap(), + cursor_hint, + )?; if is_primary_key { program.emit_insn(Insn::RowId { cursor_id, @@ -623,85 +1185,6 @@ pub fn translate_expr( } } -pub fn analyze_columns<'a>( - columns: &'a Vec, - joins: &Vec, -) -> Vec> { - let mut column_information_list = Vec::with_capacity(columns.len()); - for column in columns { - let mut info = ColumnInfo::new(column); - if let ast::ResultColumn::Star = column { - info.columns_to_allocate = 0; - for join in joins { - info.columns_to_allocate += join.table.columns().len(); - } - } else { - info.columns_to_allocate = 1; - analyze_column(column, &mut info); - } - column_information_list.push(info); - } - column_information_list -} - -/// Analyze a column expression. -/// -/// This function will walk all columns and find information about: -/// * Aggregation functions. -fn analyze_column<'a>(column: &'a ast::ResultColumn, column_info_out: &mut ColumnInfo<'a>) { - match column { - ast::ResultColumn::Expr(expr, _) => analyze_expr(expr, column_info_out), - ast::ResultColumn::Star => {} - ast::ResultColumn::TableStar(_) => {} - } -} - -pub fn analyze_expr<'a>(expr: &'a Expr, column_info_out: &mut ColumnInfo<'a>) { - match expr { - ast::Expr::FunctionCall { - name, - distinctness: _, - args, - filter_over: _, - order_by: _, - } => { - let args_count = if let Some(args) = args { args.len() } else { 0 }; - let func_type = - match Func::resolve_function(normalize_ident(name.0.as_str()).as_str(), args_count) - { - Ok(func) => Some(func), - Err(_) => None, - }; - if func_type.is_none() { - let args = args.as_ref().unwrap(); - if !args.is_empty() { - analyze_expr(args.first().unwrap(), column_info_out); - } - } else { - column_info_out.func = func_type; - // TODO(pere): use lifetimes for args? Arenas would be lovely here :( - column_info_out.args = args; - } - } - ast::Expr::FunctionCallStar { - name, - filter_over: _, - } => { - let func_type = - match Func::resolve_function(normalize_ident(name.0.as_str()).as_str(), 1) { - Ok(func) => Some(func), - Err(_) => None, - }; - if func_type.is_none() { - panic!("Function not found"); - } else { - column_info_out.func = func_type; - } - } - _ => {} - } -} - fn wrap_eval_jump_expr( program: &mut ProgramBuilder, insn: Insn, @@ -724,46 +1207,41 @@ pub fn resolve_ident_qualified( program: &ProgramBuilder, table_name: &String, ident: &String, - select: &Select, + referenced_tables: &[(Rc, String)], cursor_hint: Option, ) -> Result<(usize, Type, usize, bool)> { let ident = normalize_ident(ident); let table_name = normalize_ident(table_name); - for join in &select.src_tables { - match join.table { - Table::BTree(ref table) => { - if *join.identifier == table_name { - let res = table - .columns - .iter() - .enumerate() - .find(|(_, col)| col.name == *ident) - .map(|(idx, col)| (idx, col.ty, col.primary_key)); - let mut idx; - let mut col_type; - let mut is_primary_key; - if res.is_some() { - (idx, col_type, is_primary_key) = res.unwrap(); - // overwrite if cursor hint is provided - if let Some(cursor_hint) = cursor_hint { - let cols = &program.cursor_ref[cursor_hint].1; - if let Some(res) = cols.as_ref().and_then(|res| { - res.columns() - .iter() - .enumerate() - .find(|x| x.1.name == format!("{}.{}", table_name, ident)) - }) { - idx = res.0; - col_type = res.1.ty; - is_primary_key = res.1.primary_key; - } - } - let cursor_id = program.resolve_cursor_id(&join.identifier, cursor_hint); - return Ok((idx, col_type, cursor_id, is_primary_key)); + for (catalog_table, identifier) in referenced_tables.iter() { + if *identifier == table_name { + let res = catalog_table + .columns + .iter() + .enumerate() + .find(|(_, col)| col.name == *ident) + .map(|(idx, col)| (idx, col.ty, col.primary_key)); + let mut idx; + let mut col_type; + let mut is_primary_key; + if res.is_some() { + (idx, col_type, is_primary_key) = res.unwrap(); + // overwrite if cursor hint is provided + if let Some(cursor_hint) = cursor_hint { + let cols = &program.cursor_ref[cursor_hint].1; + if let Some(res) = cols.as_ref().and_then(|res| { + res.columns() + .iter() + .enumerate() + .find(|x| x.1.name == format!("{}.{}", table_name, ident)) + }) { + idx = res.0; + col_type = res.1.ty; + is_primary_key = res.1.primary_key; } } + let cursor_id = program.resolve_cursor_id(identifier, cursor_hint); + return Ok((idx, col_type, cursor_id, is_primary_key)); } - Table::Pseudo(_) => todo!(), } } crate::bail_parse_error!( @@ -776,44 +1254,39 @@ pub fn resolve_ident_qualified( pub fn resolve_ident_table( program: &ProgramBuilder, ident: &String, - select: Option<&Select>, + referenced_tables: Option<&[(Rc, String)]>, cursor_hint: Option, ) -> Result<(usize, Type, usize, bool)> { let ident = normalize_ident(ident); let mut found = Vec::new(); - for join in &select.unwrap().src_tables { - match join.table { - Table::BTree(ref table) => { - let res = table - .columns - .iter() - .enumerate() - .find(|(_, col)| col.name == *ident) - .map(|(idx, col)| (idx, col.ty, table.column_is_rowid_alias(col))); - let mut idx; - let mut col_type; - let mut is_rowid_alias; - if res.is_some() { - (idx, col_type, is_rowid_alias) = res.unwrap(); - // overwrite if cursor hint is provided - if let Some(cursor_hint) = cursor_hint { - let cols = &program.cursor_ref[cursor_hint].1; - if let Some(res) = cols.as_ref().and_then(|res| { - res.columns() - .iter() - .enumerate() - .find(|x| x.1.name == *ident) - }) { - idx = res.0; - col_type = res.1.ty; - is_rowid_alias = table.column_is_rowid_alias(res.1); - } - } - let cursor_id = program.resolve_cursor_id(&join.identifier, cursor_hint); - found.push((idx, col_type, cursor_id, is_rowid_alias)); + for (catalog_table, identifier) in referenced_tables.unwrap() { + let res = catalog_table + .columns + .iter() + .enumerate() + .find(|(_, col)| col.name == *ident) + .map(|(idx, col)| (idx, col.ty, catalog_table.column_is_rowid_alias(col))); + let mut idx; + let mut col_type; + let mut is_rowid_alias; + if res.is_some() { + (idx, col_type, is_rowid_alias) = res.unwrap(); + // overwrite if cursor hint is provided + if let Some(cursor_hint) = cursor_hint { + let cols = &program.cursor_ref[cursor_hint].1; + if let Some(res) = cols.as_ref().and_then(|res| { + res.columns() + .iter() + .enumerate() + .find(|x| x.1.name == *ident) + }) { + idx = res.0; + col_type = res.1.ty; + is_rowid_alias = catalog_table.column_is_rowid_alias(&res.1); } } - Table::Pseudo(_) => todo!(), + let cursor_id = program.resolve_cursor_id(identifier, cursor_hint); + found.push((idx, col_type, cursor_id, is_rowid_alias)); } } if found.len() == 1 { @@ -833,3 +1306,276 @@ pub fn maybe_apply_affinity(col_type: Type, target_register: usize, program: &mu }) } } + +pub fn translate_table_columns( + program: &mut ProgramBuilder, + table: &Rc, + table_identifier: &str, + cursor_override: Option, + start_reg: usize, +) -> usize { + let mut cur_reg = start_reg; + let cursor_id = cursor_override.unwrap_or(program.resolve_cursor_id(table_identifier, None)); + for i in 0..table.columns.len() { + let is_rowid = table.column_is_rowid_alias(&table.columns[i]); + let col_type = &table.columns[i].ty; + if is_rowid { + program.emit_insn(Insn::RowId { + cursor_id, + dest: cur_reg, + }); + } else { + program.emit_insn(Insn::Column { + cursor_id, + column: i, + dest: cur_reg, + }); + } + maybe_apply_affinity(*col_type, cur_reg, program); + cur_reg += 1; + } + cur_reg +} + +pub fn translate_aggregation( + program: &mut ProgramBuilder, + referenced_tables: &[(Rc, String)], + agg: &Aggregate, + target_register: usize, + cursor_hint: Option, +) -> Result { + let dest = match agg.func { + AggFunc::Avg => { + if agg.args.len() != 1 { + crate::bail_parse_error!("avg bad number of arguments"); + } + let expr = &agg.args[0]; + let expr_reg = program.alloc_register(); + let _ = translate_expr( + program, + Some(referenced_tables), + expr, + expr_reg, + cursor_hint, + )?; + program.emit_insn(Insn::AggStep { + acc_reg: target_register, + col: expr_reg, + delimiter: 0, + func: AggFunc::Avg, + }); + target_register + } + AggFunc::Count => { + let expr_reg = if agg.args.is_empty() { + program.alloc_register() + } else { + let expr = &agg.args[0]; + let expr_reg = program.alloc_register(); + let _ = translate_expr( + program, + Some(referenced_tables), + expr, + expr_reg, + cursor_hint, + ); + expr_reg + }; + program.emit_insn(Insn::AggStep { + acc_reg: target_register, + col: expr_reg, + delimiter: 0, + func: AggFunc::Count, + }); + target_register + } + AggFunc::GroupConcat => { + if agg.args.len() != 1 && agg.args.len() != 2 { + crate::bail_parse_error!("group_concat bad number of arguments"); + } + + let expr_reg = program.alloc_register(); + let delimiter_reg = program.alloc_register(); + + let expr = &agg.args[0]; + let delimiter_expr: ast::Expr; + + if agg.args.len() == 2 { + match &agg.args[1] { + ast::Expr::Id(ident) => { + if ident.0.starts_with('"') { + delimiter_expr = + ast::Expr::Literal(ast::Literal::String(ident.0.to_string())); + } else { + delimiter_expr = agg.args[1].clone(); + } + } + ast::Expr::Literal(ast::Literal::String(s)) => { + delimiter_expr = ast::Expr::Literal(ast::Literal::String(s.to_string())); + } + _ => crate::bail_parse_error!("Incorrect delimiter parameter"), + }; + } else { + delimiter_expr = ast::Expr::Literal(ast::Literal::String(String::from("\",\""))); + } + + translate_expr( + program, + Some(referenced_tables), + expr, + expr_reg, + cursor_hint, + )?; + translate_expr( + program, + Some(referenced_tables), + &delimiter_expr, + delimiter_reg, + cursor_hint, + )?; + + program.emit_insn(Insn::AggStep { + acc_reg: target_register, + col: expr_reg, + delimiter: delimiter_reg, + func: AggFunc::GroupConcat, + }); + + target_register + } + AggFunc::Max => { + if agg.args.len() != 1 { + crate::bail_parse_error!("max bad number of arguments"); + } + let expr = &agg.args[0]; + let expr_reg = program.alloc_register(); + let _ = translate_expr( + program, + Some(referenced_tables), + expr, + expr_reg, + cursor_hint, + ); + program.emit_insn(Insn::AggStep { + acc_reg: target_register, + col: expr_reg, + delimiter: 0, + func: AggFunc::Max, + }); + target_register + } + AggFunc::Min => { + if agg.args.len() != 1 { + crate::bail_parse_error!("min bad number of arguments"); + } + let expr = &agg.args[0]; + let expr_reg = program.alloc_register(); + let _ = translate_expr( + program, + Some(referenced_tables), + expr, + expr_reg, + cursor_hint, + ); + program.emit_insn(Insn::AggStep { + acc_reg: target_register, + col: expr_reg, + delimiter: 0, + func: AggFunc::Min, + }); + target_register + } + AggFunc::StringAgg => { + if agg.args.len() != 2 { + crate::bail_parse_error!("string_agg bad number of arguments"); + } + + let expr_reg = program.alloc_register(); + let delimiter_reg = program.alloc_register(); + + let expr = &agg.args[0]; + let delimiter_expr: ast::Expr; + + match &agg.args[1] { + ast::Expr::Id(ident) => { + if ident.0.starts_with('"') { + crate::bail_parse_error!("no such column: \",\" - should this be a string literal in single-quotes?"); + } else { + delimiter_expr = agg.args[1].clone(); + } + } + ast::Expr::Literal(ast::Literal::String(s)) => { + delimiter_expr = ast::Expr::Literal(ast::Literal::String(s.to_string())); + } + _ => crate::bail_parse_error!("Incorrect delimiter parameter"), + }; + + translate_expr( + program, + Some(referenced_tables), + expr, + expr_reg, + cursor_hint, + )?; + translate_expr( + program, + Some(referenced_tables), + &delimiter_expr, + delimiter_reg, + cursor_hint, + )?; + + program.emit_insn(Insn::AggStep { + acc_reg: target_register, + col: expr_reg, + delimiter: delimiter_reg, + func: AggFunc::StringAgg, + }); + + target_register + } + AggFunc::Sum => { + if agg.args.len() != 1 { + crate::bail_parse_error!("sum bad number of arguments"); + } + let expr = &agg.args[0]; + let expr_reg = program.alloc_register(); + let _ = translate_expr( + program, + Some(referenced_tables), + expr, + expr_reg, + cursor_hint, + )?; + program.emit_insn(Insn::AggStep { + acc_reg: target_register, + col: expr_reg, + delimiter: 0, + func: AggFunc::Sum, + }); + target_register + } + AggFunc::Total => { + if agg.args.len() != 1 { + crate::bail_parse_error!("total bad number of arguments"); + } + let expr = &agg.args[0]; + let expr_reg = program.alloc_register(); + let _ = translate_expr( + program, + Some(referenced_tables), + expr, + expr_reg, + cursor_hint, + )?; + program.emit_insn(Insn::AggStep { + acc_reg: target_register, + col: expr_reg, + delimiter: 0, + func: AggFunc::Total, + }); + target_register + } + }; + Ok(dest) +} diff --git a/core/translate/mod.rs b/core/translate/mod.rs index 5a21fee4c..3b7d9ea62 100644 --- a/core/translate/mod.rs +++ b/core/translate/mod.rs @@ -7,10 +7,13 @@ //! a SELECT statement will be translated into a sequence of instructions that //! will read rows from the database and filter them according to a WHERE clause. +pub(crate) mod emitter; pub(crate) mod expr; pub(crate) mod insert; +pub(crate) mod optimizer; +pub(crate) mod plan; +pub(crate) mod planner; pub(crate) mod select; -pub(crate) mod where_clause; use std::cell::RefCell; use std::rc::Rc; @@ -18,11 +21,10 @@ use std::rc::Rc; use crate::schema::Schema; use crate::storage::pager::Pager; use crate::storage::sqlite3_ondisk::{DatabaseHeader, MIN_PAGE_CACHE_SIZE}; -use crate::util::normalize_ident; use crate::vdbe::{builder::ProgramBuilder, Insn, Program}; use crate::{bail_parse_error, Result}; use insert::translate_insert; -use select::{prepare_select, translate_select}; +use select::translate_select; use sqlite3_parser::ast; /// Translate SQL statement into bytecode program. @@ -56,10 +58,7 @@ pub fn translate( ast::Stmt::Release(_) => bail_parse_error!("RELEASE not supported yet"), ast::Stmt::Rollback { .. } => bail_parse_error!("ROLLBACK not supported yet"), ast::Stmt::Savepoint(_) => bail_parse_error!("SAVEPOINT not supported yet"), - ast::Stmt::Select(select) => { - let select = prepare_select(schema, &select)?; - translate_select(select, database_header) - } + ast::Stmt::Select(select) => translate_select(schema, select, database_header), ast::Stmt::Update { .. } => bail_parse_error!("UPDATE not supported yet"), ast::Stmt::Vacuum(_, _) => bail_parse_error!("VACUUM not supported yet"), ast::Stmt::Insert { diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs new file mode 100644 index 000000000..4cf598cf9 --- /dev/null +++ b/core/translate/optimizer.rs @@ -0,0 +1,742 @@ +use std::rc::Rc; + +use sqlite3_parser::ast; + +use crate::{schema::BTreeTable, util::normalize_ident, Result}; + +use super::plan::{ + get_table_ref_bitmask_for_ast_expr, get_table_ref_bitmask_for_operator, Operator, Plan, +}; + +/** + * Make a few passes over the plan to optimize it. + */ +pub fn optimize_plan(mut select_plan: Plan) -> Result { + push_predicates( + &mut select_plan.root_operator, + &select_plan.referenced_tables, + )?; + eliminate_constants(&mut select_plan.root_operator)?; + use_indexes( + &mut select_plan.root_operator, + &select_plan.referenced_tables, + )?; + Ok(select_plan) +} + +/** + * Use indexes where possible (currently just primary key lookups) + */ +fn use_indexes( + operator: &mut Operator, + referenced_tables: &[(Rc, String)], +) -> Result<()> { + match operator { + Operator::Scan { + table, + predicates: filter, + table_identifier, + id, + .. + } => { + if filter.is_none() { + return Ok(()); + } + + let fs = filter.as_mut().unwrap(); + let mut i = 0; + let mut maybe_rowid_predicate = None; + while i < fs.len() { + let f = fs[i].take_ownership(); + let table_index = referenced_tables + .iter() + .position(|(t, t_id)| Rc::ptr_eq(t, table) && t_id == table_identifier) + .unwrap(); + let (can_use, expr) = + try_extract_rowid_comparison_expression(f, table_index, referenced_tables)?; + if can_use { + maybe_rowid_predicate = Some(expr); + fs.remove(i); + break; + } else { + fs[i] = expr; + i += 1; + } + } + + if let Some(rowid_predicate) = maybe_rowid_predicate { + let predicates_owned = if fs.is_empty() { + None + } else { + Some(fs.drain(..).collect()) + }; + *operator = Operator::SeekRowid { + table: table.clone(), + table_identifier: table_identifier.clone(), + rowid_predicate, + predicates: predicates_owned, + id: *id, + step: 0, + } + } + + return Ok(()); + } + Operator::Aggregate { source, .. } => { + use_indexes(source, referenced_tables)?; + return Ok(()); + } + Operator::Filter { source, .. } => { + use_indexes(source, referenced_tables)?; + return Ok(()); + } + Operator::SeekRowid { .. } => { + return Ok(()); + } + Operator::Limit { source, .. } => { + use_indexes(source, referenced_tables)?; + return Ok(()); + } + Operator::Join { left, right, .. } => { + use_indexes(left, referenced_tables)?; + use_indexes(right, referenced_tables)?; + return Ok(()); + } + Operator::Order { source, .. } => { + use_indexes(source, referenced_tables)?; + return Ok(()); + } + Operator::Projection { source, .. } => { + use_indexes(source, referenced_tables)?; + return Ok(()); + } + Operator::Nothing => { + return Ok(()); + } + } +} + +// removes predicates that are always true +// returns false if there is an impossible predicate that is always false +fn eliminate_constants(operator: &mut Operator) -> Result { + match operator { + Operator::Filter { + source, predicates, .. + } => { + let mut i = 0; + while i < predicates.len() { + let predicate = &predicates[i]; + if predicate.is_always_true()? { + predicates.remove(i); + } else if predicate.is_always_false()? { + return Ok(false); + } else { + i += 1; + } + } + + if predicates.is_empty() { + *operator = source.take_ownership(); + eliminate_constants(operator)?; + } else { + eliminate_constants(source)?; + } + + return Ok(true); + } + Operator::Join { + left, + right, + predicates, + outer, + .. + } => { + if !eliminate_constants(left)? { + return Ok(false); + } + if !eliminate_constants(right)? && !*outer { + return Ok(false); + } + + if predicates.is_none() { + return Ok(true); + } + + let predicates = predicates.as_mut().unwrap(); + + let mut i = 0; + while i < predicates.len() { + let predicate = &predicates[i]; + if predicate.is_always_true()? { + predicates.remove(i); + } else if predicate.is_always_false()? && !*outer { + return Ok(false); + } else { + i += 1; + } + } + + return Ok(true); + } + Operator::Aggregate { source, .. } => { + let ok = eliminate_constants(source)?; + if !ok { + *source = Box::new(Operator::Nothing); + } + return Ok(ok); + } + Operator::SeekRowid { + rowid_predicate, + predicates, + .. + } => { + if let Some(predicates) = predicates { + let mut i = 0; + while i < predicates.len() { + let predicate = &predicates[i]; + if predicate.is_always_true()? { + predicates.remove(i); + } else if predicate.is_always_false()? { + return Ok(false); + } else { + i += 1; + } + } + } + + if rowid_predicate.is_always_false()? { + return Ok(false); + } + + return Ok(true); + } + Operator::Limit { source, .. } => { + let ok = eliminate_constants(source)?; + if !ok { + *operator = Operator::Nothing; + } + return Ok(ok); + } + Operator::Order { source, .. } => { + let ok = eliminate_constants(source)?; + if !ok { + *operator = Operator::Nothing; + } + return Ok(true); + } + Operator::Projection { source, .. } => { + let ok = eliminate_constants(source)?; + if !ok { + *operator = Operator::Nothing; + } + return Ok(ok); + } + Operator::Scan { predicates, .. } => { + if let Some(ps) = predicates { + let mut i = 0; + while i < ps.len() { + let predicate = &ps[i]; + if predicate.is_always_true()? { + ps.remove(i); + } else if predicate.is_always_false()? { + return Ok(false); + } else { + i += 1; + } + } + + if ps.is_empty() { + *predicates = None; + } + } + return Ok(true); + } + Operator::Nothing => return Ok(true), + } +} + +/** + Recursively pushes predicates down the tree, as far as possible. +*/ +fn push_predicates( + operator: &mut Operator, + referenced_tables: &Vec<(Rc, String)>, +) -> Result<()> { + match operator { + Operator::Filter { + source, predicates, .. + } => { + let mut i = 0; + while i < predicates.len() { + // try to push the predicate to the source + // if it succeeds, remove the predicate from the filter + let predicate_owned = predicates[i].take_ownership(); + let Some(predicate) = push_predicate(source, predicate_owned, referenced_tables)? + else { + predicates.remove(i); + continue; + }; + predicates[i] = predicate; + i += 1; + } + + if predicates.is_empty() { + *operator = source.take_ownership(); + } + + return Ok(()); + } + Operator::Join { + left, + right, + predicates, + outer, + .. + } => { + push_predicates(left, referenced_tables)?; + push_predicates(right, referenced_tables)?; + + if predicates.is_none() { + return Ok(()); + } + + let predicates = predicates.as_mut().unwrap(); + + let mut i = 0; + while i < predicates.len() { + // try to push the predicate to the left side first, then to the right side + + // temporarily take ownership of the predicate + let predicate_owned = predicates[i].take_ownership(); + // left join predicates cant be pushed to the left side + let push_result = if *outer { + Some(predicate_owned) + } else { + push_predicate(left, predicate_owned, referenced_tables)? + }; + // if the predicate was pushed to a child, remove it from the list + let Some(predicate) = push_result else { + predicates.remove(i); + continue; + }; + // otherwise try to push it to the right side + // if it was pushed to the right side, remove it from the list + let Some(predicate) = push_predicate(right, predicate, referenced_tables)? else { + predicates.remove(i); + continue; + }; + // otherwise keep the predicate in the list + predicates[i] = predicate; + i += 1; + } + + return Ok(()); + } + Operator::Aggregate { source, .. } => { + push_predicates(source, referenced_tables)?; + + return Ok(()); + } + Operator::SeekRowid { .. } => { + return Ok(()); + } + Operator::Limit { source, .. } => { + push_predicates(source, referenced_tables)?; + return Ok(()); + } + Operator::Order { source, .. } => { + push_predicates(source, referenced_tables)?; + return Ok(()); + } + Operator::Projection { source, .. } => { + push_predicates(source, referenced_tables)?; + return Ok(()); + } + Operator::Scan { .. } => { + return Ok(()); + } + Operator::Nothing => { + return Ok(()); + } + } +} + +/** + Push a single predicate down the tree, as far as possible. + Returns Ok(None) if the predicate was pushed, otherwise returns itself as Ok(Some(predicate)) +*/ +fn push_predicate( + operator: &mut Operator, + predicate: ast::Expr, + referenced_tables: &Vec<(Rc, String)>, +) -> Result> { + match operator { + Operator::Scan { + predicates, + table_identifier, + .. + } => { + let table_index = referenced_tables + .iter() + .position(|(_, t_id)| t_id == table_identifier) + .unwrap(); + + let predicate_bitmask = + get_table_ref_bitmask_for_ast_expr(referenced_tables, &predicate)?; + + // the expression is allowed to refer to tables on its left, i.e. the righter bits in the mask + // e.g. if this table is 0010, and the table on its right in the join is 0100: + // if predicate_bitmask is 0011, the predicate can be pushed (refers to this table and the table on its left) + // if predicate_bitmask is 0001, the predicate can be pushed (refers to the table on its left) + // if predicate_bitmask is 0101, the predicate can't be pushed (refers to this table and a table on its right) + let next_table_on_the_right_in_join_bitmask = 1 << (table_index + 1); + if predicate_bitmask >= next_table_on_the_right_in_join_bitmask { + return Ok(Some(predicate)); + } + + if predicates.is_none() { + predicates.replace(vec![predicate]); + } else { + predicates.as_mut().unwrap().push(predicate); + } + + return Ok(None); + } + Operator::Filter { + source, + predicates: ps, + .. + } => { + let push_result = push_predicate(source, predicate, referenced_tables)?; + if push_result.is_none() { + return Ok(None); + } + + ps.push(push_result.unwrap()); + + return Ok(None); + } + Operator::Join { + left, + right, + predicates: join_on_preds, + outer, + .. + } => { + let push_result_left = push_predicate(left, predicate, referenced_tables)?; + if push_result_left.is_none() { + return Ok(None); + } + let push_result_right = + push_predicate(right, push_result_left.unwrap(), referenced_tables)?; + if push_result_right.is_none() { + return Ok(None); + } + + if *outer { + return Ok(Some(push_result_right.unwrap())); + } + + let pred = push_result_right.unwrap(); + + let table_refs_bitmask = get_table_ref_bitmask_for_ast_expr(referenced_tables, &pred)?; + + let left_bitmask = get_table_ref_bitmask_for_operator(referenced_tables, left)?; + let right_bitmask = get_table_ref_bitmask_for_operator(referenced_tables, right)?; + + if table_refs_bitmask & left_bitmask == 0 || table_refs_bitmask & right_bitmask == 0 { + return Ok(Some(pred)); + } + + if join_on_preds.is_none() { + join_on_preds.replace(vec![pred]); + } else { + join_on_preds.as_mut().unwrap().push(pred); + } + + return Ok(None); + } + Operator::Aggregate { source, .. } => { + let push_result = push_predicate(source, predicate, referenced_tables)?; + if push_result.is_none() { + return Ok(None); + } + + return Ok(Some(push_result.unwrap())); + } + Operator::SeekRowid { .. } => { + return Ok(Some(predicate)); + } + Operator::Limit { source, .. } => { + let push_result = push_predicate(source, predicate, referenced_tables)?; + if push_result.is_none() { + return Ok(None); + } + + return Ok(Some(push_result.unwrap())); + } + Operator::Order { source, .. } => { + let push_result = push_predicate(source, predicate, referenced_tables)?; + if push_result.is_none() { + return Ok(None); + } + + return Ok(Some(push_result.unwrap())); + } + Operator::Projection { source, .. } => { + let push_result = push_predicate(source, predicate, referenced_tables)?; + if push_result.is_none() { + return Ok(None); + } + + return Ok(Some(push_result.unwrap())); + } + Operator::Nothing => { + return Ok(Some(predicate)); + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ConstantPredicate { + AlwaysTrue, + AlwaysFalse, +} + +/** + Helper trait for expressions that can be optimized + Implemented for ast::Expr +*/ +pub trait Optimizable { + // if the expression is a constant expression e.g. '1', returns the constant condition + fn check_constant(&self) -> Result>; + fn is_always_true(&self) -> Result { + Ok(self + .check_constant()? + .map_or(false, |c| c == ConstantPredicate::AlwaysTrue)) + } + fn is_always_false(&self) -> Result { + Ok(self + .check_constant()? + .map_or(false, |c| c == ConstantPredicate::AlwaysFalse)) + } + // if the expression is the primary key of a table, returns the index of the table + fn check_primary_key( + &self, + referenced_tables: &[(Rc, String)], + ) -> Result>; +} + +impl Optimizable for ast::Expr { + fn check_primary_key( + &self, + referenced_tables: &[(Rc, String)], + ) -> Result> { + match self { + ast::Expr::Id(ident) => { + let ident = normalize_ident(&ident.0); + let tables = referenced_tables + .iter() + .enumerate() + .filter_map(|(i, (t, _))| { + if t.get_column(&ident).map_or(false, |(_, c)| c.primary_key) { + Some(i) + } else { + None + } + }); + + let mut matches = 0; + let mut matching_tbl = None; + + for tbl in tables { + matching_tbl = Some(tbl); + matches += 1; + if matches > 1 { + crate::bail_parse_error!("ambiguous column name {}", ident) + } + } + + Ok(matching_tbl) + } + ast::Expr::Qualified(tbl, ident) => { + let tbl = normalize_ident(&tbl.0); + let ident = normalize_ident(&ident.0); + let table = referenced_tables.iter().enumerate().find(|(_, (t, t_id))| { + *t_id == tbl && t.get_column(&ident).map_or(false, |(_, c)| c.primary_key) + }); + + if table.is_none() { + return Ok(None); + } + + let table = table.unwrap(); + + Ok(Some(table.0)) + } + _ => Ok(None), + } + } + fn check_constant(&self) -> Result> { + match self { + ast::Expr::Literal(lit) => match lit { + ast::Literal::Null => Ok(Some(ConstantPredicate::AlwaysFalse)), + ast::Literal::Numeric(b) => { + if let Ok(int_value) = b.parse::() { + return Ok(Some(if int_value == 0 { + ConstantPredicate::AlwaysFalse + } else { + ConstantPredicate::AlwaysTrue + })); + } + if let Ok(float_value) = b.parse::() { + return Ok(Some(if float_value == 0.0 { + ConstantPredicate::AlwaysFalse + } else { + ConstantPredicate::AlwaysTrue + })); + } + + Ok(None) + } + ast::Literal::String(s) => { + let without_quotes = s.trim_matches('\''); + if let Ok(int_value) = without_quotes.parse::() { + return Ok(Some(if int_value == 0 { + ConstantPredicate::AlwaysFalse + } else { + ConstantPredicate::AlwaysTrue + })); + } + + if let Ok(float_value) = without_quotes.parse::() { + return Ok(Some(if float_value == 0.0 { + ConstantPredicate::AlwaysFalse + } else { + ConstantPredicate::AlwaysTrue + })); + } + + Ok(Some(ConstantPredicate::AlwaysFalse)) + } + _ => Ok(None), + }, + ast::Expr::Unary(op, expr) => { + if *op == ast::UnaryOperator::Not { + let trivial = expr.check_constant()?; + return Ok(trivial.map(|t| match t { + ConstantPredicate::AlwaysTrue => ConstantPredicate::AlwaysFalse, + ConstantPredicate::AlwaysFalse => ConstantPredicate::AlwaysTrue, + })); + } + + if *op == ast::UnaryOperator::Negative { + let trivial = expr.check_constant()?; + return Ok(trivial); + } + + Ok(None) + } + ast::Expr::InList { lhs: _, not, rhs } => { + if rhs.is_none() { + return Ok(Some(if *not { + ConstantPredicate::AlwaysTrue + } else { + ConstantPredicate::AlwaysFalse + })); + } + let rhs = rhs.as_ref().unwrap(); + if rhs.is_empty() { + return Ok(Some(if *not { + ConstantPredicate::AlwaysTrue + } else { + ConstantPredicate::AlwaysFalse + })); + } + + Ok(None) + } + ast::Expr::Binary(lhs, op, rhs) => { + let lhs_trivial = lhs.check_constant()?; + let rhs_trivial = rhs.check_constant()?; + match op { + ast::Operator::And => { + if lhs_trivial == Some(ConstantPredicate::AlwaysFalse) + || rhs_trivial == Some(ConstantPredicate::AlwaysFalse) + { + return Ok(Some(ConstantPredicate::AlwaysFalse)); + } + if lhs_trivial == Some(ConstantPredicate::AlwaysTrue) + && rhs_trivial == Some(ConstantPredicate::AlwaysTrue) + { + return Ok(Some(ConstantPredicate::AlwaysTrue)); + } + + Ok(None) + } + ast::Operator::Or => { + if lhs_trivial == Some(ConstantPredicate::AlwaysTrue) + || rhs_trivial == Some(ConstantPredicate::AlwaysTrue) + { + return Ok(Some(ConstantPredicate::AlwaysTrue)); + } + if lhs_trivial == Some(ConstantPredicate::AlwaysFalse) + && rhs_trivial == Some(ConstantPredicate::AlwaysFalse) + { + return Ok(Some(ConstantPredicate::AlwaysFalse)); + } + + Ok(None) + } + _ => Ok(None), + } + } + _ => Ok(None), + } + } +} + +pub fn try_extract_rowid_comparison_expression( + expr: ast::Expr, + table_index: usize, + referenced_tables: &[(Rc, String)], +) -> Result<(bool, ast::Expr)> { + match expr { + ast::Expr::Binary(lhs, ast::Operator::Equals, rhs) => { + if let Some(lhs_table_index) = lhs.check_primary_key(referenced_tables)? { + if lhs_table_index == table_index { + return Ok((true, *rhs)); + } + } + + if let Some(rhs_table_index) = rhs.check_primary_key(referenced_tables)? { + if rhs_table_index == table_index { + return Ok((true, *lhs)); + } + } + + Ok((false, ast::Expr::Binary(lhs, ast::Operator::Equals, rhs))) + } + _ => Ok((false, expr)), + } +} + +trait TakeOwnership { + fn take_ownership(&mut self) -> Self; +} + +impl TakeOwnership for ast::Expr { + fn take_ownership(&mut self) -> Self { + std::mem::replace(self, ast::Expr::Literal(ast::Literal::Null)) + } +} + +impl TakeOwnership for Operator { + fn take_ownership(&mut self) -> Self { + std::mem::replace(self, Operator::Nothing) + } +} + +fn replace_with(expr: &mut T, mut replacement: T) { + *expr = replacement.take_ownership(); +} diff --git a/core/translate/plan.rs b/core/translate/plan.rs new file mode 100644 index 000000000..2e59cafdc --- /dev/null +++ b/core/translate/plan.rs @@ -0,0 +1,534 @@ +use core::fmt; +use std::{ + fmt::{Display, Formatter}, + rc::Rc, +}; + +use sqlite3_parser::ast; + +use crate::{function::AggFunc, schema::BTreeTable, util::normalize_ident, Result}; + +pub struct Plan { + pub root_operator: Operator, + pub referenced_tables: Vec<(Rc, String)>, +} + +impl Display for Plan { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.root_operator) + } +} + +/** + An Operator is a Node in the query plan. + Operators form a tree structure, with each having zero or more children. + For example, a query like `SELECT t1.foo FROM t1 ORDER BY t1.foo LIMIT 1` would have the following structure: + Limit + Order + Project + Scan + + Operators also have a unique ID, which is used to identify them in the query plan and attach metadata. + They also have a step counter, which is used to track the current step in the operator's execution. + TODO: perhaps 'step' shouldn't be in this struct, since it's an execution time concept, not a plan time concept. +*/ +#[derive(Clone, Debug)] +pub enum Operator { + // Aggregate operator + // This operator is used to compute aggregate functions like SUM, AVG, COUNT, etc. + // It takes a source operator and a list of aggregate functions to compute. + // GROUP BY is not supported yet. + Aggregate { + id: usize, + source: Box, + aggregates: Vec, + step: usize, + }, + // Filter operator + // This operator is used to filter rows from the source operator. + // It takes a source operator and a list of predicates to evaluate. + // Only rows for which all predicates evaluate to true are passed to the next operator. + // Generally filter operators will only exist in unoptimized plans, + // as the optimizer will try to push filters down to the lowest possible level, + // e.g. a table scan. + Filter { + id: usize, + source: Box, + predicates: Vec, + }, + // SeekRowid operator + // This operator is used to retrieve a single row from a table by its rowid. + // rowid_predicate is an expression that produces the comparison value for the rowid. + // e.g. rowid = 5, or rowid = other_table.foo + // predicates is an optional list of additional predicates to evaluate. + SeekRowid { + id: usize, + table: Rc, + table_identifier: String, + rowid_predicate: ast::Expr, + predicates: Option>, + step: usize, + }, + // Limit operator + // This operator is used to limit the number of rows returned by the source operator. + Limit { + id: usize, + source: Box, + limit: usize, + step: usize, + }, + // Join operator + // This operator is used to join two source operators. + // It takes a left and right source operator, a list of predicates to evaluate, + // and a boolean indicating whether it is an outer join. + Join { + id: usize, + left: Box, + right: Box, + predicates: Option>, + outer: bool, + step: usize, + }, + // Order operator + // This operator is used to sort the rows returned by the source operator. + Order { + id: usize, + source: Box, + key: Vec<(ast::Expr, Direction)>, + step: usize, + }, + // Projection operator + // This operator is used to project columns from the source operator. + // It takes a source operator and a list of expressions to evaluate. + // e.g. SELECT foo, bar FROM t1 + // In this example, the expressions would be [foo, bar] + // and the source operator would be a Scan operator for table t1. + Projection { + id: usize, + source: Box, + expressions: Vec, + step: usize, + }, + // Scan operator + // This operator is used to scan a table. + // It takes a table to scan and an optional list of predicates to evaluate. + // The predicates are used to filter rows from the table. + // e.g. SELECT * FROM t1 WHERE t1.foo = 5 + Scan { + id: usize, + table: Rc, + table_identifier: String, + predicates: Option>, + step: usize, + }, + // Nothing operator + // This operator is used to represent an empty query. + // e.g. SELECT * from foo WHERE 0 will eventually be optimized to Nothing. + Nothing, +} + +#[derive(Clone, Debug)] +pub enum ProjectionColumn { + Column(ast::Expr), + Star, + TableStar(Rc, String), +} + +impl ProjectionColumn { + pub fn column_count(&self, referenced_tables: &[(Rc, String)]) -> usize { + match self { + ProjectionColumn::Column(_) => 1, + ProjectionColumn::Star => { + let mut count = 0; + for (table, _) in referenced_tables { + count += table.columns.len(); + } + count + } + ProjectionColumn::TableStar(table, _) => table.columns.len(), + } + } +} + +impl Operator { + pub fn column_count(&self, referenced_tables: &[(Rc, String)]) -> usize { + match self { + Operator::Aggregate { aggregates, .. } => aggregates.len(), + Operator::Filter { source, .. } => source.column_count(referenced_tables), + Operator::SeekRowid { table, .. } => table.columns.len(), + Operator::Limit { source, .. } => source.column_count(referenced_tables), + Operator::Join { left, right, .. } => { + left.column_count(referenced_tables) + right.column_count(referenced_tables) + } + Operator::Order { source, .. } => source.column_count(referenced_tables), + Operator::Projection { expressions, .. } => expressions + .iter() + .map(|e| e.column_count(referenced_tables)) + .sum(), + Operator::Scan { table, .. } => table.columns.len(), + Operator::Nothing => 0, + } + } + + pub fn column_names(&self) -> Vec { + match self { + Operator::Aggregate { .. } => { + todo!(); + } + Operator::Filter { source, .. } => source.column_names(), + Operator::SeekRowid { table, .. } => { + table.columns.iter().map(|c| c.name.clone()).collect() + } + Operator::Limit { source, .. } => source.column_names(), + Operator::Join { left, right, .. } => { + let mut names = left.column_names(); + names.extend(right.column_names()); + names + } + Operator::Order { source, .. } => source.column_names(), + Operator::Projection { expressions, .. } => expressions + .iter() + .map(|e| match e { + ProjectionColumn::Column(expr) => match expr { + ast::Expr::Id(ident) => ident.0.clone(), + ast::Expr::Qualified(tbl, ident) => format!("{}.{}", tbl.0, ident.0), + _ => "expr".to_string(), + }, + ProjectionColumn::Star => "*".to_string(), + ProjectionColumn::TableStar(_, tbl) => format!("{}.{}", tbl, "*"), + }) + .collect(), + Operator::Scan { table, .. } => table.columns.iter().map(|c| c.name.clone()).collect(), + Operator::Nothing => vec![], + } + } + + pub fn id(&self) -> usize { + match self { + Operator::Aggregate { id, .. } => *id, + Operator::Filter { id, .. } => *id, + Operator::SeekRowid { id, .. } => *id, + Operator::Limit { id, .. } => *id, + Operator::Join { id, .. } => *id, + Operator::Order { id, .. } => *id, + Operator::Projection { id, .. } => *id, + Operator::Scan { id, .. } => *id, + Operator::Nothing => unreachable!(), + } + } +} + +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum Direction { + Ascending, + Descending, +} + +impl Display for Direction { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + match self { + Direction::Ascending => write!(f, "ASC"), + Direction::Descending => write!(f, "DESC"), + } + } +} + +#[derive(Clone, Debug, PartialEq)] +pub struct Aggregate { + pub func: AggFunc, + pub args: Vec, +} + +impl Display for Aggregate { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + let args_str = self + .args + .iter() + .map(|arg| arg.to_string()) + .collect::>() + .join(", "); + write!(f, "{:?}({})", self.func, args_str) + } +} + +// For EXPLAIN QUERY PLAN +impl Display for Operator { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + fn fmt_operator(operator: &Operator, f: &mut Formatter, level: usize) -> fmt::Result { + let indent = " ".repeat(level); + match operator { + Operator::Aggregate { + source, aggregates, .. + } => { + // e.g. Aggregate count(*), sum(x) + let aggregates_display_string = aggregates + .iter() + .map(|agg| agg.to_string()) + .collect::>() + .join(", "); + writeln!(f, "{}AGGREGATE {}", indent, aggregates_display_string)?; + fmt_operator(source, f, level + 1) + } + Operator::Filter { + source, predicates, .. + } => { + let predicates_string = predicates + .iter() + .map(|p| p.to_string()) + .collect::>() + .join(" AND "); + writeln!(f, "{}FILTER {}", indent, predicates_string)?; + fmt_operator(source, f, level + 1) + } + Operator::SeekRowid { + table, + rowid_predicate, + predicates, + .. + } => { + match predicates { + Some(ps) => { + let predicates_string = ps + .iter() + .map(|p| p.to_string()) + .collect::>() + .join(" AND "); + writeln!( + f, + "{}SEEK {}.rowid ON rowid={} FILTER {}", + indent, &table.name, rowid_predicate, predicates_string + )?; + } + None => writeln!( + f, + "{}SEEK {}.rowid ON rowid={}", + indent, &table.name, rowid_predicate + )?, + } + + Ok(()) + } + Operator::Limit { source, limit, .. } => { + writeln!(f, "{}TAKE {}", indent, limit)?; + fmt_operator(source, f, level + 1) + } + Operator::Join { + left, + right, + predicates, + outer, + .. + } => { + let join_name = if *outer { "OUTER JOIN" } else { "JOIN" }; + match predicates + .as_ref() + .and_then(|ps| if ps.is_empty() { None } else { Some(ps) }) + { + Some(ps) => { + let predicates_string = ps + .iter() + .map(|p| p.to_string()) + .collect::>() + .join(" AND "); + writeln!(f, "{}{} ON {}", indent, join_name, predicates_string)?; + } + None => writeln!(f, "{}{}", indent, join_name)?, + } + fmt_operator(left, f, level + 1)?; + fmt_operator(right, f, level + 1) + } + Operator::Order { source, key, .. } => { + let sort_keys_string = key + .iter() + .map(|(expr, dir)| format!("{} {}", expr, dir)) + .collect::>() + .join(", "); + writeln!(f, "{}SORT {}", indent, sort_keys_string)?; + fmt_operator(source, f, level + 1) + } + Operator::Projection { + source, + expressions, + .. + } => { + let expressions = expressions + .iter() + .map(|expr| match expr { + ProjectionColumn::Column(c) => c.to_string(), + ProjectionColumn::Star => "*".to_string(), + ProjectionColumn::TableStar(_, a) => format!("{}.{}", a, "*"), + }) + .collect::>() + .join(", "); + writeln!(f, "{}PROJECT {}", indent, expressions)?; + fmt_operator(source, f, level + 1) + } + Operator::Scan { + table, + predicates: filter, + table_identifier, + .. + } => { + let table_name = format!("{} AS {}", &table.name, &table_identifier); + let filter_string = filter.as_ref().map(|f| { + let filters_string = f + .iter() + .map(|p| p.to_string()) + .collect::>() + .join(" AND "); + format!("FILTER {}", filters_string) + }); + match filter_string { + Some(fs) => writeln!(f, "{}SCAN {} {}", indent, table_name, fs), + None => writeln!(f, "{}SCAN {}", indent, table_name), + }?; + Ok(()) + } + Operator::Nothing => Ok(()), + } + } + fmt_operator(self, f, 0) + } +} + +/** + Returns a bitmask where each bit corresponds to a table in the `tables` vector. + If a table is referenced in the given Operator, the corresponding bit is set to 1. + Example: + if tables = [(table1, "t1"), (table2, "t2"), (table3, "t3")], + and the Operator is a join between table2 and table3, + then the return value will be (in bits): 110 +*/ +pub fn get_table_ref_bitmask_for_operator<'a>( + tables: &'a Vec<(Rc, String)>, + operator: &'a Operator, +) -> Result { + let mut table_refs_mask = 0; + match operator { + Operator::Aggregate { source, .. } => { + table_refs_mask |= get_table_ref_bitmask_for_operator(tables, source)?; + } + Operator::Filter { + source, predicates, .. + } => { + table_refs_mask |= get_table_ref_bitmask_for_operator(tables, source)?; + for predicate in predicates { + table_refs_mask |= get_table_ref_bitmask_for_ast_expr(tables, predicate)?; + } + } + Operator::SeekRowid { table, .. } => { + table_refs_mask |= 1 + << tables + .iter() + .position(|(t, _)| Rc::ptr_eq(t, table)) + .unwrap(); + } + Operator::Limit { source, .. } => { + table_refs_mask |= get_table_ref_bitmask_for_operator(tables, source)?; + } + Operator::Join { left, right, .. } => { + table_refs_mask |= get_table_ref_bitmask_for_operator(tables, left)?; + table_refs_mask |= get_table_ref_bitmask_for_operator(tables, right)?; + } + Operator::Order { source, .. } => { + table_refs_mask |= get_table_ref_bitmask_for_operator(tables, source)?; + } + Operator::Projection { source, .. } => { + table_refs_mask |= get_table_ref_bitmask_for_operator(tables, source)?; + } + Operator::Scan { table, .. } => { + table_refs_mask |= 1 + << tables + .iter() + .position(|(t, _)| Rc::ptr_eq(t, table)) + .unwrap(); + } + Operator::Nothing => {} + } + Ok(table_refs_mask) +} + +/** + Returns a bitmask where each bit corresponds to a table in the `tables` vector. + If a table is referenced in the given AST expression, the corresponding bit is set to 1. + Example: + if tables = [(table1, "t1"), (table2, "t2"), (table3, "t3")], + and predicate = "t1.a = t2.b" + then the return value will be (in bits): 011 +*/ +pub fn get_table_ref_bitmask_for_ast_expr<'a>( + tables: &'a Vec<(Rc, String)>, + predicate: &'a ast::Expr, +) -> Result { + let mut table_refs_mask = 0; + match predicate { + ast::Expr::Binary(e1, _, e2) => { + table_refs_mask |= get_table_ref_bitmask_for_ast_expr(tables, e1)?; + table_refs_mask |= get_table_ref_bitmask_for_ast_expr(tables, e2)?; + } + ast::Expr::Id(ident) => { + let ident = normalize_ident(&ident.0); + let matching_tables = tables + .iter() + .enumerate() + .filter(|(_, (table, _))| table.get_column(&ident).is_some()); + + let mut matches = 0; + let mut matching_tbl = None; + for table in matching_tables { + matching_tbl = Some(table); + matches += 1; + if matches > 1 { + crate::bail_parse_error!("ambiguous column name {}", &ident) + } + } + + if let Some((tbl_index, _)) = matching_tbl { + table_refs_mask |= 1 << tbl_index; + } else { + crate::bail_parse_error!("column not found: {}", &ident) + } + } + ast::Expr::Qualified(tbl, ident) => { + let tbl = normalize_ident(&tbl.0); + let ident = normalize_ident(&ident.0); + let matching_table = tables + .iter() + .enumerate() + .find(|(_, (_, t_id))| *t_id == tbl); + + if matching_table.is_none() { + crate::bail_parse_error!("introspect: table not found: {}", &tbl) + } + let matching_table = matching_table.unwrap(); + if matching_table.1 .0.get_column(&ident).is_none() { + crate::bail_parse_error!("column with qualified name {}.{} not found", &tbl, &ident) + } + + table_refs_mask |= 1 << matching_table.0; + } + ast::Expr::Literal(_) => {} + ast::Expr::Like { lhs, rhs, .. } => { + table_refs_mask |= get_table_ref_bitmask_for_ast_expr(tables, lhs)?; + table_refs_mask |= get_table_ref_bitmask_for_ast_expr(tables, rhs)?; + } + ast::Expr::FunctionCall { + args: Some(args), .. + } => { + for arg in args { + table_refs_mask |= get_table_ref_bitmask_for_ast_expr(tables, arg)?; + } + } + ast::Expr::InList { lhs, rhs, .. } => { + table_refs_mask |= get_table_ref_bitmask_for_ast_expr(tables, lhs)?; + if let Some(rhs_list) = rhs { + for rhs_expr in rhs_list { + table_refs_mask |= get_table_ref_bitmask_for_ast_expr(tables, rhs_expr)?; + } + } + } + _ => {} + } + + Ok(table_refs_mask) +} diff --git a/core/translate/planner.rs b/core/translate/planner.rs new file mode 100644 index 000000000..94ca386b9 --- /dev/null +++ b/core/translate/planner.rs @@ -0,0 +1,369 @@ +use super::plan::{Aggregate, Direction, Operator, Plan, ProjectionColumn}; +use crate::{ + function::Func, + schema::{BTreeTable, Schema}, + util::normalize_ident, + Result, +}; +use sqlite3_parser::ast::{self, FromClause, JoinType, ResultColumn}; +use std::rc::Rc; + +pub struct OperatorIdCounter { + id: usize, +} + +impl OperatorIdCounter { + pub fn new() -> Self { + Self { id: 0 } + } + pub fn get_next_id(&mut self) -> usize { + let id = self.id; + self.id += 1; + id + } +} + +pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result { + match select.body.select { + ast::OneSelect::Select { + columns, + from, + where_clause, + .. + } => { + let col_count = columns.len(); + if col_count == 0 { + crate::bail_parse_error!("SELECT without columns is not allowed"); + } + + let mut operator_id_counter = OperatorIdCounter::new(); + + // Parse the FROM clause + let (mut operator, referenced_tables) = + parse_from(schema, from, &mut operator_id_counter)?; + + // Parse the WHERE clause + if let Some(w) = where_clause { + let mut predicates = vec![]; + break_predicate_at_and_boundaries(w, &mut predicates); + operator = Operator::Filter { + source: Box::new(operator), + predicates, + id: operator_id_counter.get_next_id(), + }; + } + + // Parse the SELECT clause to either a projection or an aggregation + // depending on the presence of aggregate functions. + // Since GROUP BY is not supported yet, mixing aggregate and non-aggregate + // columns is not allowed. + // + // If there are no aggregate functions, we can simply project the columns. + // For a simple SELECT *, the projection operator is skipped. + let is_select_star = col_count == 1 && matches!(columns[0], ast::ResultColumn::Star); + if !is_select_star { + let mut aggregate_expressions = Vec::new(); + let mut scalar_expressions = Vec::with_capacity(col_count); + for column in columns.clone() { + match column { + ast::ResultColumn::Star => { + scalar_expressions.push(ProjectionColumn::Star); + } + ast::ResultColumn::TableStar(name) => { + let name_normalized = normalize_ident(name.0.as_str()); + let referenced_table = referenced_tables + .iter() + .find(|(t, t_id)| *t_id == name_normalized); + + if referenced_table.is_none() { + crate::bail_parse_error!("Table {} not found", name.0); + } + let (table, identifier) = referenced_table.unwrap(); + scalar_expressions.push(ProjectionColumn::TableStar( + table.clone(), + identifier.clone(), + )); + } + ast::ResultColumn::Expr(expr, _) => match expr { + ast::Expr::FunctionCall { + name, + distinctness, + args, + filter_over, + order_by, + } => { + let args_count = if let Some(args) = &args { + args.len() + } else { + 0 + }; + match Func::resolve_function( + normalize_ident(name.0.as_str()).as_str(), + args_count, + ) { + Ok(Func::Agg(f)) => aggregate_expressions.push(Aggregate { + func: f, + args: args.unwrap(), + }), + Ok(_) => { + scalar_expressions.push(ProjectionColumn::Column( + ast::Expr::FunctionCall { + name, + distinctness, + args, + filter_over, + order_by, + }, + )); + } + _ => {} + } + } + ast::Expr::FunctionCallStar { name, filter_over } => { + match Func::resolve_function( + normalize_ident(name.0.as_str()).as_str(), + 0, + ) { + Ok(Func::Agg(f)) => aggregate_expressions.push(Aggregate { + func: f, + args: vec![], + }), + Ok(Func::Scalar(_)) => { + scalar_expressions.push(ProjectionColumn::Column( + ast::Expr::FunctionCallStar { name, filter_over }, + )); + } + _ => {} + } + } + _ => { + scalar_expressions.push(ProjectionColumn::Column(expr)); + } + }, + } + } + + let mixing_aggregate_and_non_aggregate_columns = + !aggregate_expressions.is_empty() && aggregate_expressions.len() != col_count; + + if mixing_aggregate_and_non_aggregate_columns { + crate::bail_parse_error!( + "mixing aggregate and non-aggregate columns is not allowed (GROUP BY is not supported)" + ); + } + if !aggregate_expressions.is_empty() { + operator = Operator::Aggregate { + source: Box::new(operator), + aggregates: aggregate_expressions, + id: operator_id_counter.get_next_id(), + step: 0, + } + } else if !scalar_expressions.is_empty() { + operator = Operator::Projection { + source: Box::new(operator), + expressions: scalar_expressions, + id: operator_id_counter.get_next_id(), + step: 0, + }; + } + } + + // Parse the ORDER BY clause + if let Some(order_by) = select.order_by { + let mut key = Vec::new(); + for o in order_by { + // if the ORDER BY expression is a number, interpret it as an 1-indexed column number + // otherwise, interpret it normally as an expression + let expr = if let ast::Expr::Literal(ast::Literal::Numeric(num)) = o.expr { + let column_number = num.parse::()?; + if column_number == 0 { + crate::bail_parse_error!("invalid column index: {}", column_number); + } + let maybe_result_column = columns.get(column_number - 1); + match maybe_result_column { + Some(ResultColumn::Expr(expr, _)) => expr.clone(), + None => { + crate::bail_parse_error!("invalid column index: {}", column_number) + } + _ => todo!(), + } + } else { + o.expr + }; + key.push(( + expr, + o.order.map_or(Direction::Ascending, |o| match o { + ast::SortOrder::Asc => Direction::Ascending, + ast::SortOrder::Desc => Direction::Descending, + }), + )); + } + operator = Operator::Order { + source: Box::new(operator), + key, + id: operator_id_counter.get_next_id(), + step: 0, + }; + } + + // Parse the LIMIT clause + if let Some(limit) = &select.limit { + operator = match &limit.expr { + ast::Expr::Literal(ast::Literal::Numeric(n)) => { + let l = n.parse()?; + if l == 0 { + Operator::Nothing + } else { + Operator::Limit { + source: Box::new(operator), + limit: l, + id: operator_id_counter.get_next_id(), + step: 0, + } + } + } + _ => todo!(), + } + } + + // Return the unoptimized query plan + return Ok(Plan { + root_operator: operator, + referenced_tables, + }); + } + _ => todo!(), + }; +} + +fn parse_from( + schema: &Schema, + from: Option, + operator_id_counter: &mut OperatorIdCounter, +) -> Result<(Operator, Vec<(Rc, String)>)> { + if from.as_ref().and_then(|f| f.select.as_ref()).is_none() { + return Ok((Operator::Nothing, vec![])); + } + + let from = from.unwrap(); + + let first_table = match *from.select.unwrap() { + ast::SelectTable::Table(qualified_name, maybe_alias, _) => { + let Some(table) = schema.get_table(&qualified_name.name.0) else { + crate::bail_parse_error!("Table {} not found", qualified_name.name.0); + }; + let alias = maybe_alias + .map(|a| match a { + ast::As::As(id) => id, + ast::As::Elided(id) => id, + }) + .map(|a| a.0); + + (table, alias.unwrap_or(qualified_name.name.0)) + } + _ => todo!(), + }; + + let mut operator = Operator::Scan { + table: first_table.0.clone(), + predicates: None, + table_identifier: first_table.1.clone(), + id: operator_id_counter.get_next_id(), + step: 0, + }; + + let mut tables = vec![first_table]; + + for join in from.joins.unwrap_or_default().into_iter() { + let (right, outer, predicates) = + parse_join(schema, join, operator_id_counter, &mut tables)?; + operator = Operator::Join { + left: Box::new(operator), + right: Box::new(right), + predicates, + outer, + id: operator_id_counter.get_next_id(), + step: 0, + } + } + + return Ok((operator, tables)); +} + +fn parse_join( + schema: &Schema, + join: ast::JoinedSelectTable, + operator_id_counter: &mut OperatorIdCounter, + tables: &mut Vec<(Rc, String)>, +) -> Result<(Operator, bool, Option>)> { + let ast::JoinedSelectTable { + operator, + table, + constraint, + } = join; + + let table = match table { + ast::SelectTable::Table(qualified_name, maybe_alias, _) => { + let Some(table) = schema.get_table(&qualified_name.name.0) else { + crate::bail_parse_error!("Table {} not found", qualified_name.name.0); + }; + let alias = maybe_alias + .map(|a| match a { + ast::As::As(id) => id, + ast::As::Elided(id) => id, + }) + .map(|a| a.0); + + (table, alias.unwrap_or(qualified_name.name.0)) + } + _ => todo!(), + }; + + tables.push(table.clone()); + + let outer = match operator { + ast::JoinOperator::TypedJoin(Some(join_type)) => { + if join_type == JoinType::LEFT | JoinType::OUTER { + true + } else if join_type == JoinType::RIGHT | JoinType::OUTER { + true + } else { + false + } + } + _ => false, + }; + + let predicates = constraint.map(|c| match c { + ast::JoinConstraint::On(expr) => { + let mut predicates = vec![]; + break_predicate_at_and_boundaries(expr, &mut predicates); + predicates + } + ast::JoinConstraint::Using(_) => todo!("USING joins not supported yet"), + }); + + Ok(( + Operator::Scan { + table: table.0.clone(), + predicates: None, + table_identifier: table.1.clone(), + id: operator_id_counter.get_next_id(), + step: 0, + }, + outer, + predicates, + )) +} + +fn break_predicate_at_and_boundaries(predicate: ast::Expr, out_predicates: &mut Vec) { + match predicate { + ast::Expr::Binary(left, ast::Operator::And, right) => { + break_predicate_at_and_boundaries(*left, out_predicates); + break_predicate_at_and_boundaries(*right, out_predicates); + } + _ => { + out_predicates.push(predicate); + } + } +} diff --git a/core/translate/select.rs b/core/translate/select.rs index 7321c0584..d8bcecfe5 100644 --- a/core/translate/select.rs +++ b/core/translate/select.rs @@ -1,1055 +1,19 @@ -use crate::function::{AggFunc, Func}; -use crate::schema::{Column, PseudoTable, Schema, Table}; +use std::{cell::RefCell, rc::Rc}; + use crate::storage::sqlite3_ondisk::DatabaseHeader; -use crate::translate::expr::{analyze_columns, maybe_apply_affinity, translate_expr}; -use crate::translate::where_clause::{ - process_where, translate_processed_where, translate_tableless_where, ProcessedWhereClause, - SeekRowid, WhereExpr, -}; -use crate::translate::{normalize_ident, Insn}; -use crate::types::{OwnedRecord, OwnedValue}; -use crate::vdbe::{builder::ProgramBuilder, BranchOffset, Program}; -use crate::Result; +use crate::{schema::Schema, vdbe::Program, Result}; +use sqlite3_parser::ast; -use sqlite3_parser::ast::{self, JoinOperator, JoinType, ResultColumn}; +use super::emitter::emit_program; +use super::optimizer::optimize_plan; +use super::planner::prepare_select_plan; -use std::cell::RefCell; -use std::rc::Rc; - -/// A representation of a `SELECT` statement that has all the information -/// needed for code generation. -pub struct Select<'a> { - /// Information about each column. - pub column_info: Vec>, - /// The tables we are retrieving data from, including tables mentioned - /// in `FROM` and `JOIN` clauses. - pub src_tables: Vec>, - /// The `LIMIT` clause. - pub limit: &'a Option, - /// The `ORDER BY` clause. - pub order_by: &'a Option>, - /// Whether the query contains an aggregation function. - pub exist_aggregation: bool, - /// The `WHERE` clause. - pub where_clause: &'a Option, -} - -#[derive(Debug)] -pub struct SrcTable<'a> { - pub table: Table, - pub identifier: String, - pub join_info: Option<&'a ast::JoinedSelectTable>, -} - -impl SrcTable<'_> { - pub fn is_outer_join(&self) -> bool { - if let Some(ast::JoinedSelectTable { - operator: JoinOperator::TypedJoin(Some(join_type)), - .. - }) = self.join_info - { - if *join_type == JoinType::LEFT | JoinType::OUTER { - true - } else { - *join_type == JoinType::RIGHT | JoinType::OUTER - } - } else { - false - } - } -} - -#[derive(Debug)] -pub struct ColumnInfo<'a> { - pub raw_column: &'a ast::ResultColumn, - pub func: Option, - pub args: &'a Option>, - pub columns_to_allocate: usize, /* number of result columns this col will result on */ -} - -impl<'a> ColumnInfo<'a> { - pub fn new(raw_column: &'a ast::ResultColumn) -> Self { - Self { - raw_column, - func: None, - args: &None, - columns_to_allocate: 1, - } - } - - pub fn is_aggregation_function(&self) -> bool { - matches!(self.func, Some(Func::Agg(_))) - } -} - -#[derive(Debug)] -pub struct LeftJoinBookkeeping { - // integer register that holds a flag that is set to true if the current row has a match for the left join - pub match_flag_register: usize, - // label for the instruction that sets the match flag to true - pub set_match_flag_true_label: BranchOffset, - // label for the instruction that checks if the match flag is true - pub check_match_flag_label: BranchOffset, - // label for the instruction where the program jumps to if the current row has a match for the left join - pub on_match_jump_to_label: BranchOffset, -} - -#[derive(Debug)] -pub enum Plan { - Scan, - SeekRowid, -} - -/// Represents a single loop in an ordered list of opened read table loops. -/// -/// The list is used to generate inner loops like this: -/// -/// cursor 0 = open table 0 -/// for each row in cursor 0 -/// cursor 1 = open table 1 -/// for each row in cursor 1 -/// ... -/// end cursor 1 -/// end cursor 0 -#[derive(Debug)] -pub struct LoopInfo { - // The table or table alias that we are looping over - pub identifier: String, - // The plan for this loop - pub plan: Plan, - // Metadata about a left join, if any - pub left_join_maybe: Option, - // The label for the instruction that reads the next row for this table - pub next_row_label: BranchOffset, - // The label for the instruction that rewinds the cursor for this table - pub rewind_label: BranchOffset, - // The label for the instruction that is jumped to in the Rewind instruction if the table is empty - pub rewind_on_empty_label: BranchOffset, - // The ID of the cursor that is opened for this table - pub open_cursor: usize, -} - -struct LimitInfo { - limit_reg: usize, - num: i64, - goto_label: BranchOffset, -} - -#[derive(Debug)] -struct SortInfo { - sorter_cursor: usize, - sorter_reg: usize, - count: usize, -} - -pub fn prepare_select<'a>(schema: &Schema, select: &'a ast::Select) -> Result> { - match &select.body.select { - ast::OneSelect::Select { - columns, - from: Some(from), - where_clause, - .. - } => { - let (table_name, maybe_alias) = match &from.select { - Some(select_table) => match select_table.as_ref() { - ast::SelectTable::Table(name, alias, ..) => ( - &name.name, - alias.as_ref().map(|als| match als { - ast::As::As(alias) => alias, // users as u - ast::As::Elided(alias) => alias, // users u - }), - ), - _ => todo!(), - }, - None => todo!(), - }; - let table_name = &table_name.0; - let maybe_alias = maybe_alias.map(|als| &als.0); - let table = match schema.get_table(table_name) { - Some(table) => table, - None => crate::bail_parse_error!("no such table: {}", table_name), - }; - let identifier = normalize_ident(maybe_alias.unwrap_or(table_name)); - let mut joins = Vec::new(); - joins.push(SrcTable { - table: Table::BTree(table.clone()), - identifier, - join_info: None, - }); - if let Some(selected_joins) = &from.joins { - for join in selected_joins { - let (table_name, maybe_alias) = match &join.table { - ast::SelectTable::Table(name, alias, ..) => ( - &name.name, - alias.as_ref().map(|als| match als { - ast::As::As(alias) => alias, // users as u - ast::As::Elided(alias) => alias, // users u - }), - ), - _ => todo!(), - }; - let table_name = &table_name.0; - let maybe_alias = maybe_alias.as_ref().map(|als| &als.0); - let table = match schema.get_table(table_name) { - Some(table) => table, - None => { - crate::bail_parse_error!("no such table: {}", table_name) - } - }; - let identifier = normalize_ident(maybe_alias.unwrap_or(table_name)); - - joins.push(SrcTable { - table: Table::BTree(table), - identifier, - join_info: Some(join), - }); - } - } - - let _table = Table::BTree(table); - let column_info = analyze_columns(columns, &joins); - let exist_aggregation = column_info - .iter() - .any(|info| info.is_aggregation_function()); - Ok(Select { - column_info, - src_tables: joins, - limit: &select.limit, - order_by: &select.order_by, - exist_aggregation, - where_clause, - }) - } - ast::OneSelect::Select { - columns, - from: None, - where_clause, - .. - } => { - let column_info = analyze_columns(columns, &Vec::new()); - let exist_aggregation = column_info - .iter() - .any(|info| info.is_aggregation_function()); - Ok(Select { - column_info, - src_tables: Vec::new(), - limit: &select.limit, - order_by: &select.order_by, - where_clause, - exist_aggregation, - }) - } - _ => todo!(), - } -} - -/// Generate code for a SELECT statement. pub fn translate_select( - mut select: Select, + schema: &Schema, + select: ast::Select, database_header: Rc>, ) -> Result { - let mut program = ProgramBuilder::new(); - let init_label = program.allocate_label(); - let early_terminate_label = program.allocate_label(); - program.emit_insn_with_label_dependency( - Insn::Init { - target_pc: init_label, - }, - init_label, - ); - let start_offset = program.offset(); - - let mut sort_info = if let Some(order_by) = select.order_by { - let sorter_cursor = program.alloc_cursor_id(None, None); - let mut order = Vec::new(); - for col in order_by { - order.push(OwnedValue::Integer(if let Some(ord) = col.order { - ord as i64 - } else { - 0 - })); - } - program.emit_insn(Insn::SorterOpen { - cursor_id: sorter_cursor, - order: OwnedRecord::new(order), - columns: select.column_info.len() + 1, // +1 for the key - }); - Some(SortInfo { - sorter_cursor, - sorter_reg: 0, // will be overwritten later - count: 0, // will be overwritten later - }) - } else { - None - }; - - let limit_info = if let Some(limit) = &select.limit { - assert!(limit.offset.is_none()); - let target_register = program.alloc_register(); - let limit_reg = translate_expr( - &mut program, - Some(&select), - &limit.expr, - target_register, - None, - )?; - let num = if let ast::Expr::Literal(ast::Literal::Numeric(num)) = &limit.expr { - num.parse::()? - } else { - todo!(); - }; - let goto_label = program.allocate_label(); - if num == 0 { - program.emit_insn_with_label_dependency( - Insn::Goto { - target_pc: goto_label, - }, - goto_label, - ); - } - Some(LimitInfo { - limit_reg, - num, - goto_label, - }) - } else { - None - }; - - if !select.src_tables.is_empty() { - let loops = translate_tables_begin(&mut program, &mut select, early_terminate_label)?; - - let (register_start, column_count) = if let Some(sort_columns) = select.order_by { - let start = program.next_free_register(); - for col in sort_columns.iter() { - let target = program.alloc_register(); - // if the ORDER BY expression is a number, interpret it as an 1-indexed column number - // otherwise, interpret it normally as an expression - let sort_col_expr = if let ast::Expr::Literal(ast::Literal::Numeric(num)) = - &col.expr - { - let column_number = num.parse::()?; - if column_number == 0 { - crate::bail_parse_error!("invalid column index: {}", column_number); - } - let maybe_result_column = select - .column_info - .get(column_number - 1) - .map(|col| &col.raw_column); - match maybe_result_column { - Some(ResultColumn::Expr(expr, _)) => expr, - None => crate::bail_parse_error!("invalid column index: {}", column_number), - _ => todo!(), - } - } else { - &col.expr - }; - translate_expr(&mut program, Some(&select), sort_col_expr, target, None)?; - } - let (_, result_cols_count) = translate_columns(&mut program, &select, None)?; - sort_info - .as_mut() - .map(|inner| inner.count = result_cols_count + sort_columns.len() + 1); // +1 for the key - (start, result_cols_count + sort_columns.len()) - } else { - translate_columns(&mut program, &select, None)? - }; - - if !select.exist_aggregation { - if let Some(ref mut sort_info) = sort_info { - let dest = program.alloc_register(); - program.emit_insn(Insn::MakeRecord { - start_reg: register_start, - count: column_count, - dest_reg: dest, - }); - program.emit_insn(Insn::SorterInsert { - cursor_id: sort_info.sorter_cursor, - record_reg: dest, - }); - sort_info.sorter_reg = register_start; - } else { - program.emit_insn(Insn::ResultRow { - start_reg: register_start, - count: column_count, - }); - emit_limit_insn(&limit_info, &mut program); - } - } - - translate_tables_end(&mut program, &loops); - - if select.exist_aggregation { - program.resolve_label(early_terminate_label, program.offset()); - let mut target = register_start; - for info in &select.column_info { - if let Some(Func::Agg(func)) = &info.func { - program.emit_insn(Insn::AggFinal { - register: target, - func: func.clone(), - }); - } - target += info.columns_to_allocate; - } - // only one result row - program.emit_insn(Insn::ResultRow { - start_reg: register_start, - count: column_count, - }); - emit_limit_insn(&limit_info, &mut program); - } - } else { - assert!(!select.exist_aggregation); - assert!(sort_info.is_none()); - let where_maybe = translate_tableless_where(&select, &mut program, early_terminate_label)?; - let (register_start, count) = translate_columns(&mut program, &select, None)?; - if let Some(where_clause_label) = where_maybe { - program.resolve_label(where_clause_label, program.offset() + 1); - } - program.emit_insn(Insn::ResultRow { - start_reg: register_start, - count, - }); - emit_limit_insn(&limit_info, &mut program); - }; - - // now do the sort for ORDER BY - if select.order_by.is_some() { - let _ = translate_sorter(&select, &mut program, &sort_info.unwrap(), &limit_info); - } - - if !select.exist_aggregation { - program.resolve_label(early_terminate_label, program.offset()); - } - program.emit_insn(Insn::Halt); - let halt_offset = program.offset() - 1; - if let Some(limit_info) = limit_info { - if limit_info.goto_label < 0 { - program.resolve_label(limit_info.goto_label, halt_offset); - } - } - program.resolve_label(init_label, program.offset()); - program.emit_insn(Insn::Transaction); - program.emit_constant_insns(); - program.emit_insn(Insn::Goto { - target_pc: start_offset, - }); - program.resolve_deferred_labels(); - Ok(program.build(database_header)) -} - -fn emit_limit_insn(limit_info: &Option, program: &mut ProgramBuilder) { - if limit_info.is_none() { - return; - } - let limit_info = limit_info.as_ref().unwrap(); - if limit_info.num > 0 { - program.emit_insn_with_label_dependency( - Insn::DecrJumpZero { - reg: limit_info.limit_reg, - target_pc: limit_info.goto_label, - }, - limit_info.goto_label, - ); - } -} - -fn translate_sorter( - select: &Select, - program: &mut ProgramBuilder, - sort_info: &SortInfo, - limit_info: &Option, -) -> Result<()> { - assert!(sort_info.count > 0); - let mut pseudo_columns = Vec::new(); - for col in select.column_info.iter() { - match col.raw_column { - ast::ResultColumn::Expr(expr, _) => match expr { - ast::Expr::Id(ident) => { - pseudo_columns.push(Column { - name: normalize_ident(&ident.0), - primary_key: false, - ty: crate::schema::Type::Null, - }); - } - ast::Expr::Qualified(table_name, ident) => { - pseudo_columns.push(Column { - name: normalize_ident(format!("{}.{}", table_name.0, ident.0).as_str()), - primary_key: false, - ty: crate::schema::Type::Null, - }); - } - other => { - todo!("translate_sorter: {:?}", other); - } - }, - ast::ResultColumn::Star => {} - ast::ResultColumn::TableStar(_) => {} - } - } - let pseudo_cursor = program.alloc_cursor_id( - None, - Some(Table::Pseudo(Rc::new(PseudoTable { - columns: pseudo_columns, - }))), - ); - let pseudo_content_reg = program.alloc_register(); - program.emit_insn(Insn::OpenPseudo { - cursor_id: pseudo_cursor, - content_reg: pseudo_content_reg, - num_fields: sort_info.count, - }); - let label = program.allocate_label(); - program.emit_insn_with_label_dependency( - Insn::SorterSort { - cursor_id: sort_info.sorter_cursor, - pc_if_empty: label, - }, - label, - ); - let sorter_data_offset = program.offset(); - program.emit_insn(Insn::SorterData { - cursor_id: sort_info.sorter_cursor, - dest_reg: pseudo_content_reg, - pseudo_cursor, - }); - let (register_start, count) = translate_columns(program, select, Some(pseudo_cursor))?; - program.emit_insn(Insn::ResultRow { - start_reg: register_start, - count, - }); - emit_limit_insn(limit_info, program); - program.emit_insn(Insn::SorterNext { - cursor_id: sort_info.sorter_cursor, - pc_if_next: sorter_data_offset, - }); - program.resolve_label(label, program.offset()); - Ok(()) -} - -fn translate_tables_begin( - program: &mut ProgramBuilder, - select: &mut Select, - early_terminate_label: BranchOffset, -) -> Result> { - let processed_where = process_where(select)?; - let mut loops = Vec::with_capacity(select.src_tables.len()); - for idx in &processed_where.loop_order { - let join = select - .src_tables - .get(*idx) - .expect("loop order out of bounds"); - let loop_info = translate_table_open_cursor(program, join, &processed_where); - loops.push(loop_info); - } - - for loop_info in &loops { - // early_terminate_label decides where to jump _IF_ there exists a condition on this loop that is always false. - // this is part of a constant folding optimization where we can skip the loop entirely if we know it will never produce any rows. - let current_loop_early_terminate_label = if let Some(left_join) = &loop_info.left_join_maybe - { - // If there exists a condition on the LEFT JOIN that is always false, e.g.: - // 'SELECT * FROM x LEFT JOIN y ON false' - // then we can't jump to e.g. Halt, but instead we need to still emit all rows from the 'x' table, with NULLs for the 'y' table. - // 'check_match_flag_label' is the label that checks if the left join match flag has been set to true, and if not (which it by default isn't), - // sets the 'y' cursor's "pseudo null bit" on, which means any Insn::Column after that will return NULL for the 'y' table. - left_join.check_match_flag_label - } else { - // If there exists a condition in an INNER JOIN (or WHERE) that is always false, then the query will not produce any rows. - // Example: 'SELECT * FROM x JOIN y ON false' or 'SELECT * FROM x WHERE false' - // Here we should jump to Halt (or e.g. AggFinal in case we have an aggregation expression like count() that should produce a 0 on empty input. - early_terminate_label - }; - translate_table_open_loop( - program, - select, - &loops, - loop_info, - &processed_where, - current_loop_early_terminate_label, - )?; - } - - Ok(loops) -} - -fn translate_tables_end(program: &mut ProgramBuilder, loops: &[LoopInfo]) { - // iterate in reverse order as we open cursors in order - for table_loop in loops.iter().rev() { - let cursor_id = table_loop.open_cursor; - program.resolve_label(table_loop.next_row_label, program.offset()); - if let Plan::Scan = table_loop.plan { - // If we're scanning a table, we need to emit a Next instruction to fetch the next row. - program.emit_insn(Insn::NextAsync { cursor_id }); - program.emit_insn_with_label_dependency( - Insn::NextAwait { - cursor_id, - pc_if_next: table_loop.rewind_label, - }, - table_loop.rewind_label, - ); - } - - if let Some(left_join) = &table_loop.left_join_maybe { - left_join_match_flag_check(program, left_join, cursor_id); - } - } -} - -fn translate_table_open_cursor( - program: &mut ProgramBuilder, - table: &SrcTable, - w: &ProcessedWhereClause, -) -> LoopInfo { - let cursor_id = - program.alloc_cursor_id(Some(table.identifier.clone()), Some(table.table.clone())); - let root_page = match &table.table { - Table::BTree(btree) => btree.root_page, - Table::Pseudo(_) => todo!(), - }; - program.emit_insn(Insn::OpenReadAsync { - cursor_id, - root_page, - }); - program.emit_insn(Insn::OpenReadAwait); - - let has_where_term_where_rowid_index_usable = w.terms.iter().any(|term| { - matches!( - term.expr, - WhereExpr::SeekRowid(SeekRowid { table: t, .. }) if *t == table.identifier - ) - }); - LoopInfo { - identifier: table.identifier.clone(), - plan: if has_where_term_where_rowid_index_usable { - Plan::SeekRowid - } else { - Plan::Scan - }, - left_join_maybe: if table.is_outer_join() { - Some(LeftJoinBookkeeping { - match_flag_register: program.alloc_register(), - on_match_jump_to_label: program.allocate_label(), - check_match_flag_label: program.allocate_label(), - set_match_flag_true_label: program.allocate_label(), - }) - } else { - None - }, - open_cursor: cursor_id, - next_row_label: program.allocate_label(), - rewind_label: program.allocate_label(), - rewind_on_empty_label: program.allocate_label(), - } -} - -/** -* initialize left join match flag to false -* if condition checks pass, it will eventually be set to true -*/ -fn left_join_match_flag_initialize(program: &mut ProgramBuilder, left_join: &LeftJoinBookkeeping) { - program.add_comment(program.offset(), "init LEFT JOIN match flag"); - program.emit_insn(Insn::Integer { - value: 0, - dest: left_join.match_flag_register, - }); -} - -/** -* after the relevant conditional jumps have been emitted, set the left join match flag to true -*/ -fn left_join_match_flag_set_true(program: &mut ProgramBuilder, left_join: &LeftJoinBookkeeping) { - program.defer_label_resolution( - left_join.set_match_flag_true_label, - program.offset() as usize, - ); - program.add_comment(program.offset(), "record LEFT JOIN hit"); - program.emit_insn(Insn::Integer { - value: 1, - dest: left_join.match_flag_register, - }); -} - -/** -* check if the left join match flag is set to true -* if it is, jump to the next row on the outer table -* if not, set the right table cursor's "pseudo null bit" on -* then jump to setting the left join match flag to true again, -* which will effectively emit all nulls for the right table. -*/ -fn left_join_match_flag_check( - program: &mut ProgramBuilder, - left_join: &LeftJoinBookkeeping, - cursor_id: usize, -) { - // If the left join match flag has been set to 1, we jump to the next row on the outer table (result row has been emitted already) - program.resolve_label(left_join.check_match_flag_label, program.offset()); - program.emit_insn_with_label_dependency( - Insn::IfPos { - reg: left_join.match_flag_register, - target_pc: left_join.on_match_jump_to_label, - decrement_by: 0, - }, - left_join.on_match_jump_to_label, - ); - // If not, we set the right table cursor's "pseudo null bit" on, which means any Insn::Column will return NULL - program.emit_insn(Insn::NullRow { cursor_id }); - // Jump to setting the left join match flag to 1 again, but this time the right table cursor will set everything to null - program.emit_insn_with_label_dependency( - Insn::Goto { - target_pc: left_join.set_match_flag_true_label, - }, - left_join.set_match_flag_true_label, - ); - // This points to the NextAsync instruction of the next table in the loop - // (i.e. the outer table, since we're iterating in reverse order) - program.resolve_label(left_join.on_match_jump_to_label, program.offset()); -} - -fn translate_table_open_loop( - program: &mut ProgramBuilder, - select: &Select, - loops: &[LoopInfo], - loop_info: &LoopInfo, - w: &ProcessedWhereClause, - early_terminate_label: BranchOffset, -) -> Result<()> { - if let Some(left_join) = loop_info.left_join_maybe.as_ref() { - // In a left join loop, initialize the left join match flag to false - // If the condition checks pass, it will eventually be set to true - // If not, NULLs will be emitted for the right table for this row in the outer table. - left_join_match_flag_initialize(program, left_join); - } - - if let Plan::Scan = loop_info.plan { - // If we're scanning, we need to rewind the cursor to the beginning of the table - // before we start processing the rows in the loop. - // Consider a nested loop query like: - // SELECT * FROM a JOIN b ON a.someprop = b.someprop; - // We need to rewind the cursor to the beginning of b for each row in a, - // so that we can iterate over all rows in b for each row in a. - // - // If we're not scanning, we're seeking by rowid, so we don't need to rewind the cursor, - // since we're only going to be reading one row. - program.emit_insn(Insn::RewindAsync { - cursor_id: loop_info.open_cursor, - }); - program.defer_label_resolution(loop_info.rewind_label, program.offset() as usize); - program.emit_insn_with_label_dependency( - Insn::RewindAwait { - cursor_id: loop_info.open_cursor, - pc_if_empty: loop_info.rewind_on_empty_label, - }, - loop_info.rewind_on_empty_label, - ); - } - - translate_processed_where( - program, - select, - loops, - loop_info, - w, - early_terminate_label, - None, - )?; - - if let Some(left_join) = loop_info.left_join_maybe.as_ref() { - left_join_match_flag_set_true(program, left_join); - } - - Ok(()) -} - -fn translate_columns( - program: &mut ProgramBuilder, - select: &Select, - cursor_hint: Option, -) -> Result<(usize, usize)> { - let register_start = program.next_free_register(); - - // allocate one register as output for each col - let registers: usize = select - .column_info - .iter() - .map(|col| col.columns_to_allocate) - .sum(); - program.alloc_registers(registers); - let count = program.next_free_register() - register_start; - - let mut target = register_start; - for info in select.column_info.iter() { - translate_column(program, select, info.raw_column, info, target, cursor_hint)?; - target += info.columns_to_allocate; - } - Ok((register_start, count)) -} - -fn translate_column( - program: &mut ProgramBuilder, - select: &Select, - col: &ast::ResultColumn, - info: &ColumnInfo, - target_register: usize, // where to store the result, in case of star it will be the start of registers added - cursor_hint: Option, -) -> Result<()> { - match col { - ast::ResultColumn::Expr(expr, _) => { - if info.is_aggregation_function() { - let _ = translate_aggregation( - program, - select, - expr, - info, - target_register, - cursor_hint, - )?; - } else { - let _ = translate_expr(program, Some(select), expr, target_register, cursor_hint)?; - } - } - ast::ResultColumn::Star => { - let mut target_register = target_register; - for join in &select.src_tables { - translate_table_star(join, program, target_register, cursor_hint); - target_register += &join.table.columns().len(); - } - } - ast::ResultColumn::TableStar(_) => todo!(), - } - Ok(()) -} - -fn translate_table_star( - table: &SrcTable, - program: &mut ProgramBuilder, - target_register: usize, - cursor_hint: Option, -) { - let table_cursor = program.resolve_cursor_id(&table.identifier, cursor_hint); - let table = &table.table; - for (i, col) in table.columns().iter().enumerate() { - let col_target_register = target_register + i; - if table.column_is_rowid_alias(col) { - program.emit_insn(Insn::RowId { - cursor_id: table_cursor, - dest: col_target_register, - }); - } else { - program.emit_insn(Insn::Column { - column: i, - dest: col_target_register, - cursor_id: table_cursor, - }); - maybe_apply_affinity(col.ty, col_target_register, program); - } - } -} - -fn translate_aggregation( - program: &mut ProgramBuilder, - select: &Select, - expr: &ast::Expr, - info: &ColumnInfo, - target_register: usize, - cursor_hint: Option, -) -> Result { - let _ = expr; - assert!(info.func.is_some()); - let func = info.func.as_ref().unwrap(); - let empty_args = &Vec::::new(); - let args = info.args.as_ref().unwrap_or(empty_args); - let dest = match func { - Func::Scalar(_) | Func::Json(_) => { - crate::bail_parse_error!("single row function in aggregation") - } - Func::Agg(agg_func) => match agg_func { - AggFunc::Avg => { - if args.len() != 1 { - crate::bail_parse_error!("avg bad number of arguments"); - } - let expr = &args[0]; - let expr_reg = program.alloc_register(); - let _ = translate_expr(program, Some(select), expr, expr_reg, cursor_hint)?; - program.emit_insn(Insn::AggStep { - acc_reg: target_register, - col: expr_reg, - delimiter: 0, - func: AggFunc::Avg, - }); - target_register - } - AggFunc::Count => { - let expr_reg = if args.is_empty() { - program.alloc_register() - } else { - let expr = &args[0]; - let expr_reg = program.alloc_register(); - let _ = translate_expr(program, Some(select), expr, expr_reg, cursor_hint); - expr_reg - }; - program.emit_insn(Insn::AggStep { - acc_reg: target_register, - col: expr_reg, - delimiter: 0, - func: AggFunc::Count, - }); - target_register - } - AggFunc::GroupConcat => { - if args.len() != 1 && args.len() != 2 { - crate::bail_parse_error!("group_concat bad number of arguments"); - } - - let expr_reg = program.alloc_register(); - let delimiter_reg = program.alloc_register(); - - let expr = &args[0]; - let delimiter_expr: ast::Expr; - - if args.len() == 2 { - match &args[1] { - ast::Expr::Id(ident) => { - if ident.0.starts_with('"') { - delimiter_expr = - ast::Expr::Literal(ast::Literal::String(ident.0.to_string())); - } else { - delimiter_expr = args[1].clone(); - } - } - ast::Expr::Literal(ast::Literal::String(s)) => { - delimiter_expr = - ast::Expr::Literal(ast::Literal::String(s.to_string())); - } - _ => crate::bail_parse_error!("Incorrect delimiter parameter"), - }; - } else { - delimiter_expr = - ast::Expr::Literal(ast::Literal::String(String::from("\",\""))); - } - - translate_expr(program, Some(select), expr, expr_reg, cursor_hint)?; - translate_expr( - program, - Some(select), - &delimiter_expr, - delimiter_reg, - cursor_hint, - )?; - - program.emit_insn(Insn::AggStep { - acc_reg: target_register, - col: expr_reg, - delimiter: delimiter_reg, - func: AggFunc::GroupConcat, - }); - - target_register - } - AggFunc::Max => { - if args.len() != 1 { - crate::bail_parse_error!("max bad number of arguments"); - } - let expr = &args[0]; - let expr_reg = program.alloc_register(); - let _ = translate_expr(program, Some(select), expr, expr_reg, cursor_hint); - program.emit_insn(Insn::AggStep { - acc_reg: target_register, - col: expr_reg, - delimiter: 0, - func: AggFunc::Max, - }); - target_register - } - AggFunc::Min => { - if args.len() != 1 { - crate::bail_parse_error!("min bad number of arguments"); - } - let expr = &args[0]; - let expr_reg = program.alloc_register(); - let _ = translate_expr(program, Some(select), expr, expr_reg, cursor_hint); - program.emit_insn(Insn::AggStep { - acc_reg: target_register, - col: expr_reg, - delimiter: 0, - func: AggFunc::Min, - }); - target_register - } - AggFunc::StringAgg => { - if args.len() != 2 { - crate::bail_parse_error!("string_agg bad number of arguments"); - } - - let expr_reg = program.alloc_register(); - let delimiter_reg = program.alloc_register(); - - let expr = &args[0]; - let delimiter_expr: ast::Expr; - - match &args[1] { - ast::Expr::Id(ident) => { - if ident.0.starts_with('"') { - crate::bail_parse_error!("no such column: \",\" - should this be a string literal in single-quotes?"); - } else { - delimiter_expr = args[1].clone(); - } - } - ast::Expr::Literal(ast::Literal::String(s)) => { - delimiter_expr = ast::Expr::Literal(ast::Literal::String(s.to_string())); - } - _ => crate::bail_parse_error!("Incorrect delimiter parameter"), - }; - - translate_expr(program, Some(select), expr, expr_reg, cursor_hint)?; - translate_expr( - program, - Some(select), - &delimiter_expr, - delimiter_reg, - cursor_hint, - )?; - - program.emit_insn(Insn::AggStep { - acc_reg: target_register, - col: expr_reg, - delimiter: delimiter_reg, - func: AggFunc::StringAgg, - }); - - target_register - } - AggFunc::Sum => { - if args.len() != 1 { - crate::bail_parse_error!("sum bad number of arguments"); - } - let expr = &args[0]; - let expr_reg = program.alloc_register(); - let _ = translate_expr(program, Some(select), expr, expr_reg, cursor_hint)?; - program.emit_insn(Insn::AggStep { - acc_reg: target_register, - col: expr_reg, - delimiter: 0, - func: AggFunc::Sum, - }); - target_register - } - AggFunc::Total => { - if args.len() != 1 { - crate::bail_parse_error!("total bad number of arguments"); - } - let expr = &args[0]; - let expr_reg = program.alloc_register(); - let _ = translate_expr(program, Some(select), expr, expr_reg, cursor_hint)?; - program.emit_insn(Insn::AggStep { - acc_reg: target_register, - col: expr_reg, - delimiter: 0, - func: AggFunc::Total, - }); - target_register - } - }, - }; - Ok(dest) + let select_plan = prepare_select_plan(schema, select)?; + let optimized_plan = optimize_plan(select_plan)?; + emit_program(database_header, optimized_plan) } diff --git a/core/translate/where_clause.rs b/core/translate/where_clause.rs deleted file mode 100644 index 261c54787..000000000 --- a/core/translate/where_clause.rs +++ /dev/null @@ -1,1152 +0,0 @@ -use crate::{ - function::ScalarFunc, - translate::{expr::translate_expr, select::Select}, - util::normalize_ident, - vdbe::{builder::ProgramBuilder, BranchOffset, Func, Insn}, - Result, -}; - -use super::select::LoopInfo; - -use sqlite3_parser::ast::{self}; - -#[derive(Debug)] -pub struct SeekRowid<'a> { - pub table: &'a str, - pub rowid_expr: &'a ast::Expr, -} - -#[derive(Debug)] -pub enum WhereExpr<'a> { - Expr(&'a ast::Expr), - SeekRowid(SeekRowid<'a>), -} - -#[derive(Debug)] -pub struct WhereTerm<'a> { - // The expression that should be evaluated. - pub expr: WhereExpr<'a>, - // If this term is part of an outer join, this is the index of the outer join table in select.src_tables - pub outer_join_table_index: Option, - // A bitmask of which table indexes (in select.src_tables) the expression references. - pub table_references_bitmask: usize, -} - -impl<'a> WhereTerm<'a> { - pub fn evaluate_at_loop(&self, select: &'a Select) -> usize { - if let Some(outer_join_table) = self.outer_join_table_index { - // E.g. - // SELECT u.age, p.name FROM users u LEFT JOIN products p ON u.id = 5; - // We can't skip rows from the 'users' table since u.id = 5 is a LEFT JOIN condition; instead we need to skip/null out rows from the 'products' table. - outer_join_table - } else { - // E.g. - // SELECT u.age, p.name FROM users u WHERE u.id = 5; - // We can skip rows from the 'users' table if u.id = 5 is false. - self.innermost_table(select) - } - } - - // Find the innermost table that the expression references. - // Innermost means 'most nested in the nested loop'. - pub fn innermost_table(&self, select: &'a Select) -> usize { - let mut table = 0; - for i in 0..select.src_tables.len() { - if self.table_references_bitmask & (1 << i) != 0 { - table = i; - } - } - table - } -} - -#[derive(Debug)] -pub struct ProcessedWhereClause<'a> { - pub loop_order: Vec, - pub terms: Vec>, -} - -/** -* Split a constraint into a flat list of WhereTerms. -* The splitting is done at logical 'AND' operator boundaries. -* WhereTerms are currently just a wrapper around an ast::Expr, -* combined with the ID of the cursor where the term should be evaluated. -*/ -pub fn split_constraint_to_terms<'a>( - select: &'a Select, - mut processed_where_clause: ProcessedWhereClause<'a>, - where_clause_or_join_constraint: &'a ast::Expr, - outer_join_table: Option, -) -> Result> { - let mut queue = vec![where_clause_or_join_constraint]; - - while let Some(expr) = queue.pop() { - match expr { - ast::Expr::Binary(left, ast::Operator::And, right) => { - queue.push(left); - queue.push(right); - } - expr => { - if expr.is_always_true()? { - // Terms that are always true can be skipped, as they don't constrain the result set in any way. - continue; - } - let term = WhereTerm { - expr: { - let seekrowid_candidate = select - .src_tables - .iter() - .enumerate() - .find_map(|(i, _)| { - expr.check_seekrowid_candidate(i, select).unwrap_or(None) - }) - .map(WhereExpr::SeekRowid); - - seekrowid_candidate.unwrap_or(WhereExpr::Expr(expr)) - }, - outer_join_table_index: outer_join_table, - table_references_bitmask: introspect_expression_for_table_refs(select, expr)?, - }; - processed_where_clause.terms.push(term); - } - } - } - - Ok(processed_where_clause) -} - -/** -* Split the WHERE clause and any JOIN ON clauses into a flat list of WhereTerms -* that can be evaluated at the appropriate cursor. -*/ -pub fn process_where<'a>(select: &'a Select) -> Result> { - let mut wc = ProcessedWhereClause { - terms: Vec::new(), - // In the future, analysis of the WHERE clause and JOIN ON clauses will be used to determine the optimal loop order. - // For now, we just use the order of the tables in the FROM clause. - loop_order: select - .src_tables - .iter() - .enumerate() - .map(|(i, _)| i) - .collect(), - }; - if let Some(w) = &select.where_clause { - wc = split_constraint_to_terms(select, wc, w, None)?; - } - - for (i, table) in select.src_tables.iter().enumerate() { - if table.join_info.is_none() { - continue; - } - let join_info = table.join_info.unwrap(); - if let Some(ast::JoinConstraint::On(expr)) = &join_info.constraint { - wc = split_constraint_to_terms( - select, - wc, - expr, - if table.is_outer_join() { Some(i) } else { None }, - )?; - } - } - - // sort seekrowids first (if e.g. u.id = 1 and u.age > 50, we want to seek on u.id = 1 first) - // since seekrowid replaces a loop, we need to evaluate it first. - // E.g. - // SELECT u.age FROM users WHERE u.id = 5 AND u.age > 50; - // We need to seek on u.id = 5 first, and then evaluate u.age > 50. - // If we evaluate u.age > 50 first, we haven't read the row yet. - wc.terms.sort_by(|a, b| { - if let WhereExpr::SeekRowid(_) = a.expr { - std::cmp::Ordering::Less - } else { - std::cmp::Ordering::Greater - } - }); - - Ok(wc) -} - -/** - * Translate the WHERE clause of a SELECT statement that doesn't have any tables. - * TODO: refactor this to use the same code path as the other WHERE clause translation functions. - */ -pub fn translate_tableless_where( - select: &Select, - program: &mut ProgramBuilder, - early_terminate_label: BranchOffset, -) -> Result> { - if let Some(w) = &select.where_clause { - if w.is_always_false()? { - program.emit_insn_with_label_dependency( - Insn::Goto { - target_pc: early_terminate_label, - }, - early_terminate_label, - ); - return Ok(None); - } - if w.is_always_true()? { - return Ok(None); - } - - let jump_target_when_false = program.allocate_label(); - let jump_target_when_true = program.allocate_label(); - translate_condition_expr( - program, - select, - w, - None, - ConditionMetadata { - jump_if_condition_is_true: false, - jump_target_when_false, - jump_target_when_true, - }, - )?; - - program.resolve_label(jump_target_when_true, program.offset()); - - Ok(Some(jump_target_when_false)) - } else { - Ok(None) - } -} - -/** -* Translate the WHERE clause and JOIN ON clauses into a series of conditional jump instructions. -* At this point the WHERE clause and JOIN ON clauses have been split into a series of terms that can be evaluated at the appropriate cursor. -* We evaluate each term at the appropriate cursor. -*/ -pub fn translate_processed_where<'a>( - program: &mut ProgramBuilder, - select: &'a Select, - loops: &[LoopInfo], - current_loop: &'a LoopInfo, - where_c: &'a ProcessedWhereClause, - skip_entire_loop_label: BranchOffset, - cursor_hint: Option, -) -> Result<()> { - // If any of the terms are always false, we can skip the entire loop. - for t in where_c.terms.iter().filter(|t| { - select.src_tables[t.evaluate_at_loop(select)].identifier == current_loop.identifier - }) { - if let WhereExpr::Expr(e) = &t.expr { - if e.is_always_false().unwrap_or(false) { - program.emit_insn_with_label_dependency( - Insn::Goto { - target_pc: skip_entire_loop_label, - }, - skip_entire_loop_label, - ); - return Ok(()); - } - } - } - - for term in where_c.terms.iter().filter(|t| { - select.src_tables[t.evaluate_at_loop(select)].identifier == current_loop.identifier - }) { - let jump_target_when_false = loops[term.evaluate_at_loop(select)].next_row_label; - let jump_target_when_true = program.allocate_label(); - match &term.expr { - WhereExpr::Expr(e) => { - translate_condition_expr( - program, - select, - e, - cursor_hint, - ConditionMetadata { - jump_if_condition_is_true: false, - jump_target_when_false, - jump_target_when_true, - }, - )?; - } - WhereExpr::SeekRowid(s) => { - let cursor_id = program.resolve_cursor_id(s.table, cursor_hint); - - let computed_rowid_reg = program.alloc_register(); - let _ = translate_expr( - program, - Some(select), - s.rowid_expr, - computed_rowid_reg, - cursor_hint, - )?; - - if !program.has_cursor_emitted_seekrowid(cursor_id) { - program.emit_insn_with_label_dependency( - Insn::SeekRowid { - cursor_id, - src_reg: computed_rowid_reg, - target_pc: jump_target_when_false, - }, - jump_target_when_false, - ); - } else { - // If we have already emitted a SeekRowid instruction for this cursor, then other equality checks - // against that table should be done using the row that was already fetched. - // e.g. select u.age, p.name from users u join products p on u.id = p.id and p.id = 5; - // emitting two SeekRowid instructions for the same 'p' cursor would yield an incorrect result. - // Assume we are looping over users u, and right now u.id = 3. - // We first SeekRowid on p.id = 3, and find a row. - // If we then SeekRowid for p.id = 5, we would find a row with p.id = 5, - // and end up with a result where u.id = 3 and p.id = 5, which is incorrect. - // Instead we replace the second SeekRowid with a comparison against the row that was already fetched, - // i.e. we compare p.id == 5, which would not match (and is the correct result). - // - // It would probably be better to modify the AST in the WhereTerms directly, but that would require - // refactoring to not use &'a Ast::Expr references in the WhereTerms, i.e. the WhereClause would own its data - // and could mutate it to change the query as needed. We probably need to do this anyway if we want to have some - // kind of Query Plan construct that is not just a container for AST nodes. - let rowid_reg = program.alloc_register(); - program.emit_insn(Insn::RowId { - cursor_id, - dest: rowid_reg, - }); - program.emit_insn_with_label_dependency( - Insn::Ne { - lhs: rowid_reg, - rhs: computed_rowid_reg, - target_pc: jump_target_when_false, - }, - jump_target_when_false, - ); - } - } - } - - program.resolve_label(jump_target_when_true, program.offset()); - } - - Ok(()) -} - -#[derive(Default, Debug, Clone, Copy)] -struct ConditionMetadata { - jump_if_condition_is_true: bool, - jump_target_when_true: BranchOffset, - jump_target_when_false: BranchOffset, -} - -fn translate_condition_expr( - program: &mut ProgramBuilder, - select: &Select, - expr: &ast::Expr, - cursor_hint: Option, - condition_metadata: ConditionMetadata, -) -> Result<()> { - match expr { - ast::Expr::Between { .. } => todo!(), - ast::Expr::Binary(lhs, ast::Operator::And, rhs) => { - // In a binary AND, never jump to the 'jump_target_when_true' label on the first condition, because - // the second condition must also be true. - let _ = translate_condition_expr( - program, - select, - lhs, - cursor_hint, - ConditionMetadata { - jump_if_condition_is_true: false, - ..condition_metadata - }, - ); - let _ = translate_condition_expr(program, select, rhs, cursor_hint, condition_metadata); - } - ast::Expr::Binary(lhs, ast::Operator::Or, rhs) => { - let jump_target_when_false = program.allocate_label(); - let _ = translate_condition_expr( - program, - select, - lhs, - cursor_hint, - ConditionMetadata { - // If the first condition is true, we don't need to evaluate the second condition. - jump_if_condition_is_true: true, - jump_target_when_false, - ..condition_metadata - }, - ); - program.resolve_label(jump_target_when_false, program.offset()); - let _ = translate_condition_expr(program, select, rhs, cursor_hint, condition_metadata); - } - ast::Expr::Binary(lhs, op, rhs) => { - let lhs_reg = program.alloc_register(); - let rhs_reg = program.alloc_register(); - let _ = translate_expr(program, Some(select), lhs, lhs_reg, cursor_hint); - if let ast::Expr::Literal(_) = lhs.as_ref() { - program.mark_last_insn_constant() - } - let _ = translate_expr(program, Some(select), rhs, rhs_reg, cursor_hint); - if let ast::Expr::Literal(_) = rhs.as_ref() { - program.mark_last_insn_constant() - } - match op { - ast::Operator::Greater => { - if condition_metadata.jump_if_condition_is_true { - program.emit_insn_with_label_dependency( - Insn::Gt { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: condition_metadata.jump_target_when_true, - }, - condition_metadata.jump_target_when_true, - ) - } else { - program.emit_insn_with_label_dependency( - Insn::Le { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: condition_metadata.jump_target_when_false, - }, - condition_metadata.jump_target_when_false, - ) - } - } - ast::Operator::GreaterEquals => { - if condition_metadata.jump_if_condition_is_true { - program.emit_insn_with_label_dependency( - Insn::Ge { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: condition_metadata.jump_target_when_true, - }, - condition_metadata.jump_target_when_true, - ) - } else { - program.emit_insn_with_label_dependency( - Insn::Lt { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: condition_metadata.jump_target_when_false, - }, - condition_metadata.jump_target_when_false, - ) - } - } - ast::Operator::Less => { - if condition_metadata.jump_if_condition_is_true { - program.emit_insn_with_label_dependency( - Insn::Lt { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: condition_metadata.jump_target_when_true, - }, - condition_metadata.jump_target_when_true, - ) - } else { - program.emit_insn_with_label_dependency( - Insn::Ge { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: condition_metadata.jump_target_when_false, - }, - condition_metadata.jump_target_when_false, - ) - } - } - ast::Operator::LessEquals => { - if condition_metadata.jump_if_condition_is_true { - program.emit_insn_with_label_dependency( - Insn::Le { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: condition_metadata.jump_target_when_true, - }, - condition_metadata.jump_target_when_true, - ) - } else { - program.emit_insn_with_label_dependency( - Insn::Gt { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: condition_metadata.jump_target_when_false, - }, - condition_metadata.jump_target_when_false, - ) - } - } - ast::Operator::Equals => { - if condition_metadata.jump_if_condition_is_true { - program.emit_insn_with_label_dependency( - Insn::Eq { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: condition_metadata.jump_target_when_true, - }, - condition_metadata.jump_target_when_true, - ) - } else { - program.emit_insn_with_label_dependency( - Insn::Ne { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: condition_metadata.jump_target_when_false, - }, - condition_metadata.jump_target_when_false, - ) - } - } - ast::Operator::NotEquals => { - if condition_metadata.jump_if_condition_is_true { - program.emit_insn_with_label_dependency( - Insn::Ne { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: condition_metadata.jump_target_when_true, - }, - condition_metadata.jump_target_when_true, - ) - } else { - program.emit_insn_with_label_dependency( - Insn::Eq { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: condition_metadata.jump_target_when_false, - }, - condition_metadata.jump_target_when_false, - ) - } - } - ast::Operator::Is => todo!(), - ast::Operator::IsNot => todo!(), - _ => { - todo!("op {:?} not implemented", op); - } - } - } - ast::Expr::Literal(lit) => match lit { - ast::Literal::Numeric(val) => { - let maybe_int = val.parse::(); - if let Ok(int_value) = maybe_int { - let reg = program.alloc_register(); - program.emit_insn(Insn::Integer { - value: int_value, - dest: reg, - }); - if condition_metadata.jump_if_condition_is_true { - program.emit_insn_with_label_dependency( - Insn::If { - reg, - target_pc: condition_metadata.jump_target_when_true, - null_reg: reg, - }, - condition_metadata.jump_target_when_true, - ) - } else { - program.emit_insn_with_label_dependency( - Insn::IfNot { - reg, - target_pc: condition_metadata.jump_target_when_false, - null_reg: reg, - }, - condition_metadata.jump_target_when_false, - ) - } - } else { - crate::bail_parse_error!("unsupported literal type in condition"); - } - } - ast::Literal::String(string) => { - let reg = program.alloc_register(); - program.emit_insn(Insn::String8 { - value: string.clone(), - dest: reg, - }); - if condition_metadata.jump_if_condition_is_true { - program.emit_insn_with_label_dependency( - Insn::If { - reg, - target_pc: condition_metadata.jump_target_when_true, - null_reg: reg, - }, - condition_metadata.jump_target_when_true, - ) - } else { - program.emit_insn_with_label_dependency( - Insn::IfNot { - reg, - target_pc: condition_metadata.jump_target_when_false, - null_reg: reg, - }, - condition_metadata.jump_target_when_false, - ) - } - } - unimpl => todo!("literal {:?} not implemented", unimpl), - }, - ast::Expr::InList { lhs, not, rhs } => { - // lhs is e.g. a column reference - // rhs is an Option> - // If rhs is None, it means the IN expression is always false, i.e. tbl.id IN (). - // If rhs is Some, it means the IN expression has a list of values to compare against, e.g. tbl.id IN (1, 2, 3). - // - // The IN expression is equivalent to a series of OR expressions. - // For example, `a IN (1, 2, 3)` is equivalent to `a = 1 OR a = 2 OR a = 3`. - // The NOT IN expression is equivalent to a series of AND expressions. - // For example, `a NOT IN (1, 2, 3)` is equivalent to `a != 1 AND a != 2 AND a != 3`. - // - // SQLite typically optimizes IN expressions to use a binary search on an ephemeral index if there are many values. - // For now we don't have the plumbing to do that, so we'll just emit a series of comparisons, - // which is what SQLite also does for small lists of values. - // TODO: Let's refactor this later to use a more efficient implementation conditionally based on the number of values. - - if rhs.is_none() { - // If rhs is None, IN expressions are always false and NOT IN expressions are always true. - if *not { - // On a trivially true NOT IN () expression we can only jump to the 'jump_target_when_true' label if 'jump_if_condition_is_true'; otherwise me must fall through. - // This is because in a more complex condition we might need to evaluate the rest of the condition. - // Note that we are already breaking up our WHERE clauses into a series of terms at "AND" boundaries, so right now we won't be running into cases where jumping on true would be incorrect, - // but once we have e.g. parenthesization and more complex conditions, not having this 'if' here would introduce a bug. - if condition_metadata.jump_if_condition_is_true { - program.emit_insn_with_label_dependency( - Insn::Goto { - target_pc: condition_metadata.jump_target_when_true, - }, - condition_metadata.jump_target_when_true, - ); - } - } else { - program.emit_insn_with_label_dependency( - Insn::Goto { - target_pc: condition_metadata.jump_target_when_false, - }, - condition_metadata.jump_target_when_false, - ); - } - return Ok(()); - } - - // The left hand side only needs to be evaluated once we have a list of values to compare against. - let lhs_reg = program.alloc_register(); - let _ = translate_expr(program, Some(select), lhs, lhs_reg, cursor_hint)?; - - let rhs = rhs.as_ref().unwrap(); - - // The difference between a local jump and an "upper level" jump is that for example in this case: - // WHERE foo IN (1,2,3) OR bar = 5, - // we can immediately jump to the 'jump_target_when_true' label of the ENTIRE CONDITION if foo = 1, foo = 2, or foo = 3 without evaluating the bar = 5 condition. - // This is why in Binary-OR expressions we set jump_if_condition_is_true to true for the first condition. - // However, in this example: - // WHERE foo IN (1,2,3) AND bar = 5, - // we can't jump to the 'jump_target_when_true' label of the entire condition foo = 1, foo = 2, or foo = 3, because we still need to evaluate the bar = 5 condition later. - // This is why in that case we just jump over the rest of the IN conditions in this "local" branch which evaluates the IN condition. - let jump_target_when_true = if condition_metadata.jump_if_condition_is_true { - condition_metadata.jump_target_when_true - } else { - program.allocate_label() - }; - - if !*not { - // If it's an IN expression, we need to jump to the 'jump_target_when_true' label if any of the conditions are true. - for (i, expr) in rhs.iter().enumerate() { - let rhs_reg = program.alloc_register(); - let last_condition = i == rhs.len() - 1; - let _ = translate_expr(program, Some(select), expr, rhs_reg, cursor_hint)?; - // If this is not the last condition, we need to jump to the 'jump_target_when_true' label if the condition is true. - if !last_condition { - program.emit_insn_with_label_dependency( - Insn::Eq { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: jump_target_when_true, - }, - jump_target_when_true, - ); - } else { - // If this is the last condition, we need to jump to the 'jump_target_when_false' label if there is no match. - program.emit_insn_with_label_dependency( - Insn::Ne { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: condition_metadata.jump_target_when_false, - }, - condition_metadata.jump_target_when_false, - ); - } - } - // If we got here, then the last condition was a match, so we jump to the 'jump_target_when_true' label if 'jump_if_condition_is_true'. - // If not, we can just fall through without emitting an unnecessary instruction. - if condition_metadata.jump_if_condition_is_true { - program.emit_insn_with_label_dependency( - Insn::Goto { - target_pc: condition_metadata.jump_target_when_true, - }, - condition_metadata.jump_target_when_true, - ); - } - } else { - // If it's a NOT IN expression, we need to jump to the 'jump_target_when_false' label if any of the conditions are true. - for expr in rhs.iter() { - let rhs_reg = program.alloc_register(); - let _ = translate_expr(program, Some(select), expr, rhs_reg, cursor_hint)?; - program.emit_insn_with_label_dependency( - Insn::Eq { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: condition_metadata.jump_target_when_false, - }, - condition_metadata.jump_target_when_false, - ); - } - // If we got here, then none of the conditions were a match, so we jump to the 'jump_target_when_true' label if 'jump_if_condition_is_true'. - // If not, we can just fall through without emitting an unnecessary instruction. - if condition_metadata.jump_if_condition_is_true { - program.emit_insn_with_label_dependency( - Insn::Goto { - target_pc: condition_metadata.jump_target_when_true, - }, - condition_metadata.jump_target_when_true, - ); - } - } - - if !condition_metadata.jump_if_condition_is_true { - program.resolve_label(jump_target_when_true, program.offset()); - } - } - ast::Expr::Like { - lhs, - not, - op, - rhs, - escape: _, - } => { - let cur_reg = program.alloc_register(); - assert!(match rhs.as_ref() { - ast::Expr::Literal(_) => true, - _ => false, - }); - match op { - ast::LikeOperator::Like => { - let pattern_reg = program.alloc_register(); - let column_reg = program.alloc_register(); - // LIKE(pattern, column). We should translate the pattern first before the column - let _ = translate_expr(program, Some(select), rhs, pattern_reg, cursor_hint)?; - program.mark_last_insn_constant(); - let _ = translate_expr(program, Some(select), lhs, column_reg, cursor_hint)?; - program.emit_insn(Insn::Function { - func: Func::Scalar(ScalarFunc::Like), - start_reg: pattern_reg, - dest: cur_reg, - }); - } - ast::LikeOperator::Glob => todo!(), - ast::LikeOperator::Match => todo!(), - ast::LikeOperator::Regexp => todo!(), - } - if !*not { - if condition_metadata.jump_if_condition_is_true { - program.emit_insn_with_label_dependency( - Insn::If { - reg: cur_reg, - target_pc: condition_metadata.jump_target_when_true, - null_reg: cur_reg, - }, - condition_metadata.jump_target_when_true, - ); - } else { - program.emit_insn_with_label_dependency( - Insn::IfNot { - reg: cur_reg, - target_pc: condition_metadata.jump_target_when_false, - null_reg: cur_reg, - }, - condition_metadata.jump_target_when_false, - ); - } - } else if condition_metadata.jump_if_condition_is_true { - program.emit_insn_with_label_dependency( - Insn::IfNot { - reg: cur_reg, - target_pc: condition_metadata.jump_target_when_true, - null_reg: cur_reg, - }, - condition_metadata.jump_target_when_true, - ); - } else { - program.emit_insn_with_label_dependency( - Insn::If { - reg: cur_reg, - target_pc: condition_metadata.jump_target_when_false, - null_reg: cur_reg, - }, - condition_metadata.jump_target_when_false, - ); - } - } - _ => todo!("op {:?} not implemented", expr), - } - Ok(()) -} - -fn introspect_expression_for_table_refs<'a>( - select: &'a Select, - where_expr: &'a ast::Expr, -) -> Result { - let mut table_refs_mask = 0; - match where_expr { - ast::Expr::Binary(e1, _, e2) => { - table_refs_mask |= introspect_expression_for_table_refs(select, e1)?; - table_refs_mask |= introspect_expression_for_table_refs(select, e2)?; - } - ast::Expr::Id(ident) => { - let ident = normalize_ident(&ident.0); - let matching_tables = select - .src_tables - .iter() - .enumerate() - .filter(|(_, t)| t.table.get_column(&ident).is_some()); - - let mut matches = 0; - let mut matching_tbl = None; - for table in matching_tables { - matching_tbl = Some(table); - matches += 1; - if matches > 1 { - crate::bail_parse_error!("ambiguous column name {}", &ident) - } - } - - if let Some((tbl_index, _)) = matching_tbl { - table_refs_mask |= 1 << tbl_index; - } else { - crate::bail_parse_error!("column not found: {}", &ident) - } - } - ast::Expr::Qualified(tbl, ident) => { - let tbl = normalize_ident(&tbl.0); - let ident = normalize_ident(&ident.0); - let matching_table = select - .src_tables - .iter() - .enumerate() - .find(|(_, t)| t.identifier == tbl); - - if matching_table.is_none() { - crate::bail_parse_error!("table not found: {}", &tbl) - } - let matching_table = matching_table.unwrap(); - if matching_table.1.table.get_column(&ident).is_none() { - crate::bail_parse_error!("column with qualified name {}.{} not found", &tbl, &ident) - } - - table_refs_mask |= 1 << matching_table.0; - } - ast::Expr::Literal(_) => {} - ast::Expr::Like { lhs, rhs, .. } => { - table_refs_mask |= introspect_expression_for_table_refs(select, lhs)?; - table_refs_mask |= introspect_expression_for_table_refs(select, rhs)?; - } - ast::Expr::FunctionCall { - args: Some(args), .. - } => { - for arg in args { - table_refs_mask |= introspect_expression_for_table_refs(select, arg)?; - } - } - ast::Expr::InList { lhs, rhs, .. } => { - table_refs_mask |= introspect_expression_for_table_refs(select, lhs)?; - if let Some(rhs_list) = rhs { - for rhs_expr in rhs_list { - table_refs_mask |= introspect_expression_for_table_refs(select, rhs_expr)?; - } - } - } - _ => {} - } - - Ok(table_refs_mask) -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum ConstantCondition { - AlwaysTrue, - AlwaysFalse, -} - -pub trait Evaluatable<'a> { - // if the expression is a constant expression e.g. '1', returns the constant condition - fn check_constant(&self) -> Result>; - fn is_always_true(&self) -> Result { - Ok(self - .check_constant()? - .map_or(false, |c| c == ConstantCondition::AlwaysTrue)) - } - fn is_always_false(&self) -> Result { - Ok(self - .check_constant()? - .map_or(false, |c| c == ConstantCondition::AlwaysFalse)) - } - // if the expression is the primary key of a table, returns the index of the table - fn check_primary_key(&self, select: &'a Select) -> Result>; - // Returns a bitmask of which table indexes the expression references - fn get_table_references_bitmask(&self, select: &'a Select) -> Result; - // Checks if the expression is a candidate for seekrowid optimization - fn check_seekrowid_candidate( - &'a self, - table_index: usize, - select: &'a Select, - ) -> Result>>; -} - -impl<'a> Evaluatable<'a> for ast::Expr { - fn get_table_references_bitmask(&self, select: &'a Select) -> Result { - match self { - ast::Expr::Id(ident) => { - let ident = normalize_ident(&ident.0); - let tables = select.src_tables.iter().enumerate().filter_map(|(i, t)| { - if t.table.get_column(&ident).is_some() { - Some(i) - } else { - None - } - }); - - let mut matches = 0; - let mut matching_tbl = None; - - for tbl in tables { - matching_tbl = Some(tbl); - matches += 1; - if matches > 1 { - crate::bail_parse_error!("ambiguous column name {}", ident) - } - } - - Ok(matching_tbl.unwrap_or(0)) - } - ast::Expr::Qualified(tbl, ident) => { - let tbl = normalize_ident(&tbl.0); - let ident = normalize_ident(&ident.0); - let table = select - .src_tables - .iter() - .enumerate() - .find(|(_, t)| t.identifier == tbl && t.table.get_column(&ident).is_some()); - - if table.is_none() { - crate::bail_parse_error!("table not found: {}", tbl) - } - - let table = table.unwrap(); - - Ok(table.0) - } - ast::Expr::Binary(lhs, _, rhs) => { - let lhs = lhs.as_ref().get_table_references_bitmask(select)?; - let rhs = rhs.as_ref().get_table_references_bitmask(select)?; - - Ok(lhs | rhs) - } - _ => Ok(0), - } - } - fn check_primary_key(&self, select: &'a Select) -> Result> { - match self { - ast::Expr::Id(ident) => { - let ident = normalize_ident(&ident.0); - let tables = select.src_tables.iter().enumerate().filter_map(|(i, t)| { - if t.table - .get_column(&ident) - .map_or(false, |(_, c)| c.primary_key) - { - Some(i) - } else { - None - } - }); - - let mut matches = 0; - let mut matching_tbl = None; - - for tbl in tables { - matching_tbl = Some(tbl); - matches += 1; - if matches > 1 { - crate::bail_parse_error!("ambiguous column name {}", ident) - } - } - - Ok(matching_tbl) - } - ast::Expr::Qualified(tbl, ident) => { - let tbl = normalize_ident(&tbl.0); - let ident = normalize_ident(&ident.0); - let table = select.src_tables.iter().enumerate().find(|(_, t)| { - t.identifier == tbl - && t.table - .get_column(&ident) - .map_or(false, |(_, c)| c.primary_key) - }); - - if table.is_none() { - crate::bail_parse_error!("table not found: {}", tbl) - } - - let table = table.unwrap(); - - Ok(Some(table.0)) - } - _ => Ok(None), - } - } - fn check_seekrowid_candidate( - &'a self, - table_index: usize, - select: &'a Select, - ) -> Result>> { - match self { - ast::Expr::Binary(lhs, ast::Operator::Equals, rhs) => { - let lhs = lhs.as_ref(); - let rhs = rhs.as_ref(); - - if let Some(lhs_table_index) = lhs.check_primary_key(select)? { - let rhs_table_refs_bitmask = rhs.get_table_references_bitmask(select)?; - // For now, we only support seekrowid optimization if the primary key is in an inner loop compared to the other expression. - // Example: explain select u.age, p.name from users u join products p on u.id = p.id; - // In this case, we loop over the users table and seek the products table. - // We also support the case where the other expression is a constant, - // e.g. SELECT * FROM USERS u WHERE u.id = 5. - // In this case the bitmask of the other expression is 0. - if lhs_table_index == table_index && lhs_table_index >= rhs_table_refs_bitmask { - return Ok(Some(SeekRowid { - table: &select.src_tables[table_index].identifier, - rowid_expr: rhs, - })); - } - } - - if let Some(rhs_table_index) = rhs.check_primary_key(select)? { - let lhs_table_refs_bitmask = lhs.get_table_references_bitmask(select)?; - if rhs_table_index == table_index && rhs_table_index >= lhs_table_refs_bitmask { - return Ok(Some(SeekRowid { - table: &select.src_tables[table_index].identifier, - rowid_expr: lhs, - })); - } - } - - Ok(None) - } - _ => Ok(None), - } - } - fn check_constant(&self) -> Result> { - match self { - ast::Expr::Literal(lit) => match lit { - ast::Literal::Null => Ok(Some(ConstantCondition::AlwaysFalse)), - ast::Literal::Numeric(b) => { - if let Ok(int_value) = b.parse::() { - return Ok(Some(if int_value == 0 { - ConstantCondition::AlwaysFalse - } else { - ConstantCondition::AlwaysTrue - })); - } - if let Ok(float_value) = b.parse::() { - return Ok(Some(if float_value == 0.0 { - ConstantCondition::AlwaysFalse - } else { - ConstantCondition::AlwaysTrue - })); - } - - Ok(None) - } - ast::Literal::String(s) => { - let without_quotes = s.trim_matches('\''); - if let Ok(int_value) = without_quotes.parse::() { - return Ok(Some(if int_value == 0 { - ConstantCondition::AlwaysFalse - } else { - ConstantCondition::AlwaysTrue - })); - } - - if let Ok(float_value) = without_quotes.parse::() { - return Ok(Some(if float_value == 0.0 { - ConstantCondition::AlwaysFalse - } else { - ConstantCondition::AlwaysTrue - })); - } - - Ok(Some(ConstantCondition::AlwaysFalse)) - } - _ => Ok(None), - }, - ast::Expr::Unary(op, expr) => { - if *op == ast::UnaryOperator::Not { - let trivial = expr.check_constant()?; - return Ok(trivial.map(|t| match t { - ConstantCondition::AlwaysTrue => ConstantCondition::AlwaysFalse, - ConstantCondition::AlwaysFalse => ConstantCondition::AlwaysTrue, - })); - } - - if *op == ast::UnaryOperator::Negative { - let trivial = expr.check_constant()?; - return Ok(trivial); - } - - Ok(None) - } - ast::Expr::InList { lhs: _, not, rhs } => { - if rhs.is_none() { - return Ok(Some(if *not { - ConstantCondition::AlwaysTrue - } else { - ConstantCondition::AlwaysFalse - })); - } - let rhs = rhs.as_ref().unwrap(); - if rhs.is_empty() { - return Ok(Some(if *not { - ConstantCondition::AlwaysTrue - } else { - ConstantCondition::AlwaysFalse - })); - } - - Ok(None) - } - ast::Expr::Binary(lhs, op, rhs) => { - let lhs_trivial = lhs.check_constant()?; - let rhs_trivial = rhs.check_constant()?; - match op { - ast::Operator::And => { - if lhs_trivial == Some(ConstantCondition::AlwaysFalse) - || rhs_trivial == Some(ConstantCondition::AlwaysFalse) - { - return Ok(Some(ConstantCondition::AlwaysFalse)); - } - if lhs_trivial == Some(ConstantCondition::AlwaysTrue) - && rhs_trivial == Some(ConstantCondition::AlwaysTrue) - { - return Ok(Some(ConstantCondition::AlwaysTrue)); - } - - Ok(None) - } - ast::Operator::Or => { - if lhs_trivial == Some(ConstantCondition::AlwaysTrue) - || rhs_trivial == Some(ConstantCondition::AlwaysTrue) - { - return Ok(Some(ConstantCondition::AlwaysTrue)); - } - if lhs_trivial == Some(ConstantCondition::AlwaysFalse) - && rhs_trivial == Some(ConstantCondition::AlwaysFalse) - { - return Ok(Some(ConstantCondition::AlwaysFalse)); - } - - Ok(None) - } - _ => Ok(None), - } - } - _ => Ok(None), - } - } -} diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index a20560334..721720a3e 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -70,18 +70,7 @@ impl ProgramBuilder { cursor } - pub fn has_cursor_emitted_seekrowid(&self, cursor_id: CursorID) -> bool { - (self.seekrowid_emitted_bitmask & (1 << cursor_id)) != 0 - } - - fn set_cursor_emitted_seekrowid(&mut self, cursor_id: CursorID) { - self.seekrowid_emitted_bitmask |= 1 << cursor_id; - } - fn _emit_insn(&mut self, insn: Insn) { - if let Insn::SeekRowid { cursor_id, .. } = insn { - self.set_cursor_emitted_seekrowid(cursor_id); - } self.insns.push(insn); } diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 0927ce0ef..4dddbe285 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -1120,7 +1120,8 @@ impl Program { let record = match *cursor.record()? { Some(ref record) => record.clone(), None => { - todo!(); + state.pc += 1; + continue; } }; state.registers[*dest_reg] = OwnedValue::Record(record.clone()); diff --git a/core/vdbe/sorter.rs b/core/vdbe/sorter.rs index b80ab6074..011392d1a 100644 --- a/core/vdbe/sorter.rs +++ b/core/vdbe/sorter.rs @@ -106,7 +106,7 @@ impl Cursor for Sorter { } fn get_null_flag(&self) -> bool { - todo!(); + false } fn exists(&mut self, key: &OwnedValue) -> Result> { diff --git a/testing/all.test b/testing/all.test index 26ee60589..67d07870a 100755 --- a/testing/all.test +++ b/testing/all.test @@ -12,4 +12,4 @@ source $testdir/where.test source $testdir/like.test source $testdir/scalar-functions.test source $testdir/orderby.test -source $testdir/json.test \ No newline at end of file +source $testdir/json.test diff --git a/testing/join.test b/testing/join.test index 1e2d08c6c..77cb040c4 100755 --- a/testing/join.test +++ b/testing/join.test @@ -199,4 +199,4 @@ Jamie||Edward} do_execsql_test left-join-constant-condition-true-inner-join-constant-condition-false { select u.first_name, p.name, u2.first_name from users u left join products as p on 1 join users u2 on 0 limit 5; -} {} \ No newline at end of file +} {} diff --git a/testing/select.test b/testing/select.test index ac8750bbe..3803bfba6 100755 --- a/testing/select.test +++ b/testing/select.test @@ -26,3 +26,15 @@ do_execsql_test select-add { do_execsql_test case-insensitive-columns { select u.aGe + 1 from USERS u where U.AGe = 91 limit 1; } {92} + +do_execsql_test table-star { + select p.*, p.name from products p limit 1; +} {1|hat|79.0|hat} + +do_execsql_test table-star-2 { + select p.*, u.age from users u join products p limit 1; +} {1|hat|79.0|94} + +do_execsql_test seekrowid { + select * from users u where u.id = 5; +} {"5|Edward|Miller|christiankramer@example.com|725-281-1033|08522 English Plain|Lake Keith|ID|23283|15"}