From 2e32ca0bdb517166601533dde9cac46ad01a70ab Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sun, 11 Aug 2024 16:58:32 +0300 Subject: [PATCH 01/20] More structured query planner --- core/lib.rs | 16 +- core/translate/emitter.rs | 838 +++++++++++++++++++++++ core/translate/expr.rs | 1048 ++++++++++++++++++++++++----- core/translate/mod.rs | 13 +- core/translate/optimizer.rs | 732 ++++++++++++++++++++ core/translate/plan.rs | 469 +++++++++++++ core/translate/planner.rs | 359 ++++++++++ core/translate/select.rs | 1060 +---------------------------- core/translate/where_clause.rs | 1152 -------------------------------- core/vdbe/mod.rs | 3 +- testing/agg-functions.test | 4 + testing/join.test | 4 +- testing/select.test | 8 + 13 files changed, 3314 insertions(+), 2392 deletions(-) create mode 100644 core/translate/emitter.rs create mode 100644 core/translate/optimizer.rs create mode 100644 core/translate/plan.rs create mode 100644 core/translate/planner.rs delete mode 100644 core/translate/where_clause.rs diff --git a/core/lib.rs b/core/lib.rs index 0ffdcd6e3..ee329b2f2 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -17,6 +17,7 @@ static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; use fallible_iterator::FallibleIterator; use log::trace; use schema::Schema; +use sqlite3_parser::ast; use sqlite3_parser::{ast::Cmd, lexer::sql::Parser}; use std::sync::Arc; use std::{cell::RefCell, rc::Rc}; @@ -27,6 +28,9 @@ use storage::sqlite3_ondisk::DatabaseHeader; #[cfg(feature = "fs")] use storage::wal::WalFile; +use translate::optimizer::optimize_plan; +use translate::planner::prepare_select_plan; + pub use error::LimboError; pub type Result = std::result::Result; @@ -173,7 +177,17 @@ impl Connection { program.explain(); Ok(None) } - Cmd::ExplainQueryPlan(_stmt) => Ok(None), + Cmd::ExplainQueryPlan(stmt) => { + match stmt { + ast::Stmt::Select(select) => { + let plan = prepare_select_plan(&self.schema, select)?; + let plan = optimize_plan(plan)?; + println!("{}", plan); + } + _ => todo!(), + } + Ok(None) + } } } else { Ok(None) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs new file mode 100644 index 000000000..5ecc5e2a2 --- /dev/null +++ b/core/translate/emitter.rs @@ -0,0 +1,838 @@ +use std::cell::RefCell; +use std::collections::HashMap; +use std::rc::Rc; +use std::usize; + +use crate::schema::{BTreeTable, Column, PseudoTable, Table}; +use crate::storage::sqlite3_ondisk::DatabaseHeader; +use crate::types::{OwnedRecord, OwnedValue}; +use crate::vdbe::builder::ProgramBuilder; +use crate::vdbe::{BranchOffset, Insn, Program}; +use crate::Result; + +use super::expr::maybe_apply_affinity; +use super::expr::{ + translate_aggregation, translate_condition_expr, translate_expr, ConditionMetadata, +}; +use super::plan::Plan; +use super::plan::{Operator, ProjectionColumn}; + +/** + * The Emitter trait is used to emit bytecode instructions for a given operator in the query plan. + * + * - start: open cursors, etc. + * - emit: open loops, emit conditional jumps etc. + * - end: close loops, etc. + * - result_columns: emit the bytecode instructions for the result columns. + * - result_row: emit the bytecode instructions for a result row. +*/ +pub trait Emitter { + fn start( + &mut self, + pb: &mut ProgramBuilder, + m: &mut Metadata, + referenced_tables: &[(Rc, String)], + ) -> Result<()>; + fn emit( + &mut self, + pb: &mut ProgramBuilder, + m: &mut Metadata, + referenced_tables: &[(Rc, String)], + can_emit_row: bool, + ) -> Result; + fn end( + &mut self, + pb: &mut ProgramBuilder, + m: &mut Metadata, + referenced_tables: &[(Rc, String)], + ) -> Result<()>; + fn result_columns( + &self, + program: &mut ProgramBuilder, + referenced_tables: &[(Rc, String)], + metadata: &mut Metadata, + cursor_override: Option, + ) -> Result; + fn result_row( + &mut self, + program: &mut ProgramBuilder, + referenced_tables: &[(Rc, String)], + metadata: &mut Metadata, + cursor_override: Option, + ) -> Result; +} + +#[derive(Debug)] +pub struct LeftJoinMetadata { + // integer register that holds a flag that is set to true if the current row has a match for the left join + pub match_flag_register: usize, + // label for the instruction that sets the match flag to true + pub set_match_flag_true_label: BranchOffset, + // label for the instruction that checks if the match flag is true + pub check_match_flag_label: BranchOffset, + // label for the instruction where the program jumps to if the current row has a match for the left join + pub on_match_jump_to_label: BranchOffset, +} + +#[derive(Debug)] +pub struct SortMetadata { + pub sort_cursor: usize, + pub sort_register: usize, + pub next_row_label: BranchOffset, + pub done_label: BranchOffset, +} + +#[derive(Debug)] +pub struct Metadata { + termination_labels: Vec, + next_row_labels: HashMap, + rewind_labels: Vec, + aggregations: HashMap, + sorts: HashMap, + left_joins: HashMap, +} + +impl Emitter for Operator { + fn start( + &mut self, + program: &mut ProgramBuilder, + m: &mut Metadata, + referenced_tables: &[(Rc, String)], + ) -> Result<()> { + match self { + Operator::Scan { + table, + table_identifier, + id, + .. + } => { + let cursor_id = program.alloc_cursor_id( + Some(table_identifier.clone()), + Some(Table::BTree(table.clone())), + ); + let root_page = table.root_page; + let next_row_label = program.allocate_label(); + m.next_row_labels.insert(*id, next_row_label); + program.emit_insn(Insn::OpenReadAsync { + cursor_id, + root_page, + }); + program.emit_insn(Insn::OpenReadAwait); + + Ok(()) + } + Operator::SeekRowid { + table, + table_identifier, + .. + } => { + let cursor_id = program.alloc_cursor_id( + Some(table_identifier.clone()), + Some(Table::BTree(table.clone())), + ); + let root_page = table.root_page; + program.emit_insn(Insn::OpenReadAsync { + cursor_id, + root_page, + }); + program.emit_insn(Insn::OpenReadAwait); + + Ok(()) + } + Operator::Join { + left, + right, + outer, + id, + .. + } => { + if *outer { + let lj_metadata = LeftJoinMetadata { + match_flag_register: program.alloc_register(), + set_match_flag_true_label: program.allocate_label(), + check_match_flag_label: program.allocate_label(), + on_match_jump_to_label: program.allocate_label(), + }; + m.left_joins.insert(*id, lj_metadata); + } + left.start(program, m, referenced_tables)?; + right.start(program, m, referenced_tables) + } + Operator::Aggregate { + id, + source, + aggregates, + } => { + let can_continue = source.start(program, m, referenced_tables)?; + + let agg_final_label = program.allocate_label(); + m.termination_labels.push(agg_final_label); + source.emit(program, m, referenced_tables, false)?; + + let num_aggs = aggregates.len(); + let start_reg = program.alloc_registers(num_aggs); + m.aggregations.insert(*id, start_reg); + + Ok(can_continue) + } + Operator::Filter { .. } => unreachable!("predicates have been pushed down"), + Operator::Limit { source, .. } => source.start(program, m, referenced_tables), + Operator::Order { id, source, key } => { + let sort_cursor = program.alloc_cursor_id(None, None); + m.sorts.insert( + *id, + SortMetadata { + sort_cursor, + sort_register: usize::MAX, // will be set later + next_row_label: program.allocate_label(), + done_label: program.allocate_label(), + }, + ); + let mut order = Vec::new(); + for (_, direction) in key.iter() { + order.push(OwnedValue::Integer(*direction as i64)); + } + program.emit_insn(Insn::SorterOpen { + cursor_id: sort_cursor, + columns: key.len(), + order: OwnedRecord::new(order), + }); + + source.start(program, m, referenced_tables) + } + Operator::Projection { source, .. } => source.start(program, m, referenced_tables), + Operator::Nothing => Ok(()), + } + } + fn emit( + &mut self, + program: &mut ProgramBuilder, + m: &mut Metadata, + referenced_tables: &[(Rc, String)], + can_emit_row: bool, + ) -> Result { + match self { + Operator::Aggregate { + source, + aggregates, + id, + } => { + let can_continue = source.emit(program, m, referenced_tables, false)?; + if !can_continue { + return Ok(false); + } + let start_reg = m.aggregations.get(id).unwrap(); + for (i, agg) in aggregates.iter().enumerate() { + let agg_result_reg = start_reg + i; + translate_aggregation(program, referenced_tables, agg, agg_result_reg, None)?; + } + + Ok(false) + } + Operator::Filter { .. } => unreachable!("predicates have been pushed down"), + Operator::SeekRowid { + rowid_predicate, + predicates, + table_identifier, + id, + .. + } => { + let cursor_id = program.resolve_cursor_id(table_identifier, None); + let rowid_reg = program.alloc_register(); + translate_expr( + program, + Some(referenced_tables), + rowid_predicate, + rowid_reg, + None, + )?; + let jump_label = m + .next_row_labels + .get(id) + .unwrap_or(&m.termination_labels.last().unwrap()); + program.emit_insn_with_label_dependency( + Insn::SeekRowid { + cursor_id, + src_reg: rowid_reg, + target_pc: *jump_label, + }, + *jump_label, + ); + if let Some(predicates) = predicates { + for predicate in predicates.iter() { + let jump_target_when_true = program.allocate_label(); + let condition_metadata = ConditionMetadata { + jump_if_condition_is_true: false, + jump_target_when_true, + jump_target_when_false: *jump_label, + }; + translate_condition_expr( + program, + referenced_tables, + predicate, + None, + condition_metadata, + )?; + program.resolve_label(jump_target_when_true, program.offset()); + } + } + Ok(true) + } + Operator::Limit { source, .. } => { + source.emit(program, m, referenced_tables, false)?; + Ok(true) + } + Operator::Join { + left, + right, + predicates, + outer, + id, + } => { + left.emit(program, m, referenced_tables, false)?; + + let mut jump_target_when_false = *m + .next_row_labels + .get(&right.id()) + .unwrap_or(&m.termination_labels.last().unwrap()); + + if *outer { + let lj_meta = m.left_joins.get(id).unwrap(); + program.emit_insn(Insn::Integer { + value: 0, + dest: lj_meta.match_flag_register, + }); + jump_target_when_false = lj_meta.check_match_flag_label; + m.next_row_labels.insert(right.id(), jump_target_when_false); + } + + right.emit(program, m, referenced_tables, false)?; + + if let Some(predicates) = predicates { + let jump_target_when_true = program.allocate_label(); + let condition_metadata = ConditionMetadata { + jump_if_condition_is_true: false, + jump_target_when_true, + jump_target_when_false, + }; + for predicate in predicates.iter() { + translate_condition_expr( + program, + referenced_tables, + predicate, + None, + condition_metadata, + )?; + } + program.resolve_label(jump_target_when_true, program.offset()); + } + + if *outer { + let lj_meta = m.left_joins.get(id).unwrap(); + program.defer_label_resolution( + lj_meta.set_match_flag_true_label, + program.offset() as usize, + ); + program.emit_insn(Insn::Integer { + value: 1, + dest: lj_meta.match_flag_register, + }); + } + + if can_emit_row { + return self.result_row(program, referenced_tables, m, None); + } + Ok(true) + } + Operator::Order { source, key, id } => { + source.emit(program, m, referenced_tables, false)?; + let sort_keys_count = key.len(); + let source_cols_count = source.column_count(referenced_tables); + let start_reg = program.alloc_registers(sort_keys_count); + for (i, (expr, _)) in key.iter().enumerate() { + let key_reg = start_reg + i; + translate_expr(program, Some(referenced_tables), expr, key_reg, None)?; + } + source.result_columns(program, referenced_tables, m, None)?; + + let dest = program.alloc_register(); + program.emit_insn(Insn::MakeRecord { + start_reg, + count: sort_keys_count + source_cols_count, + dest_reg: dest, + }); + + let sort_metadata = m.sorts.get_mut(id).unwrap(); + program.emit_insn(Insn::SorterInsert { + cursor_id: sort_metadata.sort_cursor, + record_reg: dest, + }); + sort_metadata.sort_register = start_reg; + + if can_emit_row { + return self.result_row(program, referenced_tables, m, None); + } + + Ok(true) + } + Operator::Projection { source, .. } => { + source.emit(program, m, referenced_tables, false)?; + if can_emit_row { + return self.result_row(program, referenced_tables, m, None); + } + + Ok(true) + } + Operator::Scan { + predicates, + table_identifier, + id, + .. + } => { + let cursor_id = program.resolve_cursor_id(table_identifier, None); + program.emit_insn(Insn::RewindAsync { cursor_id }); + let rewind_label = program.allocate_label(); + let halt_label = m.termination_labels.last().unwrap(); + m.rewind_labels.push(rewind_label); + program.defer_label_resolution(rewind_label, program.offset() as usize); + program.emit_insn_with_label_dependency( + Insn::RewindAwait { + cursor_id, + pc_if_empty: *halt_label, + }, + *halt_label, + ); + + let jump_label = m.next_row_labels.get(id).unwrap_or(halt_label); + if let Some(preds) = predicates { + for expr in preds { + let jump_target_when_true = program.allocate_label(); + let condition_metadata = ConditionMetadata { + jump_if_condition_is_true: false, + jump_target_when_true, + jump_target_when_false: *jump_label, + }; + translate_condition_expr( + program, + referenced_tables, + expr, + None, + condition_metadata, + )?; + program.resolve_label(jump_target_when_true, program.offset()); + } + } + + if can_emit_row { + return self.result_row(program, referenced_tables, m, None); + } + + Ok(true) + } + Operator::Nothing => Ok(false), + } + } + fn end( + &mut self, + program: &mut ProgramBuilder, + m: &mut Metadata, + referenced_tables: &[(Rc, String)], + ) -> Result<()> { + match self { + Operator::Scan { + table_identifier, + id, + .. + } => { + let cursor_id = program.resolve_cursor_id(table_identifier, None); + program.resolve_label(*m.next_row_labels.get(id).unwrap(), program.offset()); + program.emit_insn(Insn::NextAsync { cursor_id }); + let jump_label = m.rewind_labels.pop().unwrap(); + program.emit_insn_with_label_dependency( + Insn::NextAwait { + cursor_id, + pc_if_next: jump_label, + }, + jump_label, + ); + Ok(()) + } + Operator::Join { + left, + right, + outer, + id, + .. + } => { + right.end(program, m, referenced_tables)?; + + if *outer { + let lj_meta = m.left_joins.get(id).unwrap(); + // If the left join match flag has been set to 1, we jump to the next row on the outer table (result row has been emitted already) + program.resolve_label(lj_meta.check_match_flag_label, program.offset()); + program.emit_insn_with_label_dependency( + Insn::IfPos { + reg: lj_meta.match_flag_register, + target_pc: lj_meta.on_match_jump_to_label, + decrement_by: 0, + }, + lj_meta.on_match_jump_to_label, + ); + // If not, we set the right table cursor's "pseudo null bit" on, which means any Insn::Column will return NULL + let right_cursor_id = match right.as_ref() { + Operator::Scan { + table_identifier, .. + } => program.resolve_cursor_id(table_identifier, None), + Operator::SeekRowid { + table_identifier, .. + } => program.resolve_cursor_id(table_identifier, None), + _ => unreachable!(), + }; + program.emit_insn(Insn::NullRow { + cursor_id: right_cursor_id, + }); + // Jump to setting the left join match flag to 1 again, but this time the right table cursor will set everything to null + program.emit_insn_with_label_dependency( + Insn::Goto { + target_pc: lj_meta.set_match_flag_true_label, + }, + lj_meta.set_match_flag_true_label, + ); + // This points to the NextAsync instruction of the left table + program.resolve_label(lj_meta.on_match_jump_to_label, program.offset()); + } + left.end(program, m, referenced_tables) + } + Operator::Aggregate { + id, + source, + aggregates, + } => { + source.end(program, m, referenced_tables)?; + + program.resolve_label(m.termination_labels.pop().unwrap(), program.offset()); + let start_reg = m.aggregations.get(id).unwrap(); + for (i, agg) in aggregates.iter().enumerate() { + let agg_result_reg = *start_reg + i; + program.emit_insn(Insn::AggFinal { + register: agg_result_reg, + func: agg.func.clone(), + }); + } + program.emit_insn(Insn::ResultRow { + start_reg: *start_reg, + count: aggregates.len(), + }); + Ok(()) + } + Operator::Filter { .. } => unreachable!("predicates have been pushed down"), + Operator::SeekRowid { .. } => Ok(()), + Operator::Limit { source, limit, .. } => { + source.result_row(program, referenced_tables, m, None)?; + let limit_reg = program.alloc_register(); + program.emit_insn(Insn::Integer { + value: *limit as i64, + dest: limit_reg, + }); + program.mark_last_insn_constant(); + let jump_label = m.termination_labels.last().unwrap(); + program.emit_insn_with_label_dependency( + Insn::DecrJumpZero { + reg: limit_reg, + target_pc: *jump_label, + }, + *jump_label, + ); + + source.end(program, m, referenced_tables)?; + + Ok(()) + } + Operator::Order { id, .. } => { + let sort_metadata = m.sorts.get(id).unwrap(); + program.emit_insn_with_label_dependency( + Insn::SorterNext { + cursor_id: sort_metadata.sort_cursor, + pc_if_next: sort_metadata.next_row_label, + }, + sort_metadata.next_row_label, + ); + + program.resolve_label(sort_metadata.done_label, program.offset()); + + Ok(()) + } + Operator::Projection { source, .. } => source.end(program, m, referenced_tables), + Operator::Nothing => Ok(()), + } + } + fn result_columns( + &self, + program: &mut ProgramBuilder, + referenced_tables: &[(Rc, String)], + m: &mut Metadata, + cursor_override: Option, + ) -> Result { + let col_count = self.column_count(referenced_tables); + match self { + Operator::Scan { + table, + table_identifier, + .. + } => { + let start_reg = program.alloc_registers(col_count); + table_columns(program, table, table_identifier, cursor_override, start_reg); + + Ok(start_reg) + } + Operator::Join { left, right, .. } => { + let left_start_reg = + left.result_columns(program, referenced_tables, m, cursor_override)?; + right.result_columns(program, referenced_tables, m, cursor_override)?; + + Ok(left_start_reg) + } + Operator::Aggregate { id, aggregates, .. } => { + let start_reg = m.aggregations.get(id).unwrap(); + for (i, agg) in aggregates.iter().enumerate() { + let agg_result_reg = *start_reg + i; + program.emit_insn(Insn::AggFinal { + register: agg_result_reg, + func: agg.func.clone(), + }); + } + + Ok(*start_reg) + } + Operator::Filter { .. } => unreachable!("predicates have been pushed down"), + Operator::SeekRowid { + table_identifier, .. + } => { + let cursor_id = + cursor_override.unwrap_or(program.resolve_cursor_id(table_identifier, None)); + let start_reg = program.alloc_registers(col_count); + for i in 0..col_count { + program.emit_insn(Insn::Column { + cursor_id, + column: i, + dest: start_reg + i, + }); + } + + Ok(start_reg) + } + Operator::Limit { .. } => { + unimplemented!() + } + Operator::Order { .. } => { + todo!() + } + Operator::Projection { expressions, .. } => { + let expr_count = expressions + .iter() + .map(|e| e.column_count(referenced_tables)) + .sum(); + let start_reg = program.alloc_registers(expr_count); + let mut cur_reg = start_reg; + for expr in expressions { + match expr { + ProjectionColumn::Column(expr) => { + translate_expr( + program, + Some(referenced_tables), + expr, + cur_reg, + cursor_override, + )?; + cur_reg += 1; + } + ProjectionColumn::Star => { + for (table, table_identifier) in referenced_tables.iter() { + cur_reg = table_columns( + program, + table, + table_identifier, + cursor_override, + cur_reg, + ); + } + } + ProjectionColumn::TableStar(table, table_identifier) => { + let (table, table_identifier) = referenced_tables + .iter() + .find(|(_, id)| id == table_identifier) + .unwrap(); + let cursor_id = cursor_override + .unwrap_or(program.resolve_cursor_id(table_identifier, None)); + cur_reg = table_columns( + program, + table, + table_identifier, + Some(cursor_id), + cur_reg, + ); + } + } + } + + Ok(start_reg) + } + Operator::Nothing => unimplemented!(), + } + } + fn result_row( + &mut self, + program: &mut ProgramBuilder, + referenced_tables: &[(Rc, String)], + m: &mut Metadata, + cursor_override: Option, + ) -> Result { + match self { + Operator::Order { id, source, key } => { + source.end(program, m, referenced_tables)?; + let column_names = source.column_names(); + let pseudo_columns = column_names + .iter() + .map(|name| Column { + name: name.clone(), + primary_key: false, + ty: crate::schema::Type::Null, + }) + .collect::>(); + + let pseudo_cursor = program.alloc_cursor_id( + None, + Some(Table::Pseudo(Rc::new(PseudoTable { + columns: pseudo_columns, + }))), + ); + + let pseudo_content_reg = program.alloc_register(); + program.emit_insn(Insn::OpenPseudo { + cursor_id: pseudo_cursor, + content_reg: pseudo_content_reg, + num_fields: key.len() + source.column_count(referenced_tables), + }); + + let sort_metadata = m.sorts.get(id).unwrap(); + + program.emit_insn_with_label_dependency( + Insn::SorterSort { + cursor_id: sort_metadata.sort_cursor, + pc_if_empty: sort_metadata.done_label, + }, + sort_metadata.done_label, + ); + + program.defer_label_resolution( + sort_metadata.next_row_label, + program.offset() as usize, + ); + program.emit_insn(Insn::SorterData { + cursor_id: sort_metadata.sort_cursor, + dest_reg: pseudo_content_reg, + pseudo_cursor, + }); + + let done_label = sort_metadata.done_label; + + source.result_row(program, referenced_tables, m, Some(pseudo_cursor))?; + + program.resolve_label(done_label, program.offset()); + + Ok(true) + } + node => { + let start_reg = + node.result_columns(program, referenced_tables, m, cursor_override)?; + program.emit_insn(Insn::ResultRow { + start_reg, + count: node.column_count(referenced_tables), + }); + Ok(true) + } + } + } +} + +pub fn emit_program( + database_header: Rc>, + mut select_plan: Plan, +) -> Result { + let mut program = ProgramBuilder::new(); + let init_label = program.allocate_label(); + let halt_label = program.allocate_label(); + program.emit_insn_with_label_dependency( + Insn::Init { + target_pc: init_label, + }, + init_label, + ); + let start_offset = program.offset(); + + let mut metadata = Metadata { + termination_labels: vec![halt_label], + next_row_labels: HashMap::new(), + rewind_labels: Vec::new(), + aggregations: HashMap::new(), + sorts: HashMap::new(), + left_joins: HashMap::new(), + }; + + select_plan + .root_node + .start(&mut program, &mut metadata, &select_plan.referenced_tables)?; + select_plan.root_node.emit( + &mut program, + &mut metadata, + &select_plan.referenced_tables, + true, + )?; + select_plan + .root_node + .end(&mut program, &mut metadata, &select_plan.referenced_tables)?; + + program.resolve_label(halt_label, program.offset()); + program.emit_insn(Insn::Halt); + + program.resolve_label(init_label, program.offset()); + program.emit_insn(Insn::Transaction); + + program.emit_constant_insns(); + program.emit_insn(Insn::Goto { + target_pc: start_offset, + }); + + program.resolve_deferred_labels(); + Ok(program.build(database_header)) +} + +fn table_columns( + program: &mut ProgramBuilder, + table: &Rc, + table_identifier: &str, + cursor_override: Option, + start_reg: usize, +) -> usize { + let mut cur_reg = start_reg; + let cursor_id = cursor_override.unwrap_or(program.resolve_cursor_id(table_identifier, None)); + for i in 0..table.columns.len() { + let is_primary_key = table.columns[i].primary_key; + let col_type = &table.columns[i].ty; + if is_primary_key { + program.emit_insn(Insn::RowId { + cursor_id, + dest: cur_reg, + }); + } else { + program.emit_insn(Insn::Column { + cursor_id, + column: i, + dest: cur_reg, + }); + } + maybe_apply_affinity(*col_type, cur_reg, program); + cur_reg += 1; + } + cur_reg +} diff --git a/core/translate/expr.rs b/core/translate/expr.rs index c18a4c7ab..f3be84953 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1,19 +1,518 @@ -use core::panic; - use crate::{function::JsonFunc, Result}; -use sqlite3_parser::ast::{self, Expr, UnaryOperator}; +use sqlite3_parser::ast::{self, UnaryOperator}; +use std::rc::Rc; +use crate::function::{AggFunc, Func, ScalarFunc}; +use crate::schema::Type; +use crate::util::normalize_ident; use crate::{ - function::{Func, ScalarFunc}, - schema::{Table, Type}, - translate::select::{ColumnInfo, Select, SrcTable}, - util::normalize_ident, + schema::BTreeTable, vdbe::{builder::ProgramBuilder, BranchOffset, Insn}, }; +use super::plan::Aggregate; + +#[derive(Default, Debug, Clone, Copy)] +pub struct ConditionMetadata { + pub jump_if_condition_is_true: bool, + pub jump_target_when_true: BranchOffset, + pub jump_target_when_false: BranchOffset, +} + +pub fn translate_condition_expr( + program: &mut ProgramBuilder, + referenced_tables: &[(Rc, String)], + expr: &ast::Expr, + cursor_hint: Option, + condition_metadata: ConditionMetadata, +) -> Result<()> { + match expr { + ast::Expr::Between { .. } => todo!(), + ast::Expr::Binary(lhs, ast::Operator::And, rhs) => { + // In a binary AND, never jump to the 'jump_target_when_true' label on the first condition, because + // the second condition must also be true. + let _ = translate_condition_expr( + program, + referenced_tables, + lhs, + cursor_hint, + ConditionMetadata { + jump_if_condition_is_true: false, + ..condition_metadata + }, + ); + let _ = translate_condition_expr( + program, + referenced_tables, + rhs, + cursor_hint, + condition_metadata, + ); + } + ast::Expr::Binary(lhs, ast::Operator::Or, rhs) => { + let jump_target_when_false = program.allocate_label(); + let _ = translate_condition_expr( + program, + referenced_tables, + lhs, + cursor_hint, + ConditionMetadata { + // If the first condition is true, we don't need to evaluate the second condition. + jump_if_condition_is_true: true, + jump_target_when_false, + ..condition_metadata + }, + ); + program.resolve_label(jump_target_when_false, program.offset()); + let _ = translate_condition_expr( + program, + referenced_tables, + rhs, + cursor_hint, + condition_metadata, + ); + } + ast::Expr::Binary(lhs, op, rhs) => { + let lhs_reg = program.alloc_register(); + let rhs_reg = program.alloc_register(); + let _ = translate_expr(program, Some(referenced_tables), lhs, lhs_reg, cursor_hint); + match lhs.as_ref() { + ast::Expr::Literal(_) => program.mark_last_insn_constant(), + _ => {} + } + let _ = translate_expr(program, Some(referenced_tables), rhs, rhs_reg, cursor_hint); + match rhs.as_ref() { + ast::Expr::Literal(_) => program.mark_last_insn_constant(), + _ => {} + } + match op { + ast::Operator::Greater => { + if condition_metadata.jump_if_condition_is_true { + program.emit_insn_with_label_dependency( + Insn::Gt { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: condition_metadata.jump_target_when_true, + }, + condition_metadata.jump_target_when_true, + ) + } else { + program.emit_insn_with_label_dependency( + Insn::Le { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: condition_metadata.jump_target_when_false, + }, + condition_metadata.jump_target_when_false, + ) + } + } + ast::Operator::GreaterEquals => { + if condition_metadata.jump_if_condition_is_true { + program.emit_insn_with_label_dependency( + Insn::Ge { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: condition_metadata.jump_target_when_true, + }, + condition_metadata.jump_target_when_true, + ) + } else { + program.emit_insn_with_label_dependency( + Insn::Lt { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: condition_metadata.jump_target_when_false, + }, + condition_metadata.jump_target_when_false, + ) + } + } + ast::Operator::Less => { + if condition_metadata.jump_if_condition_is_true { + program.emit_insn_with_label_dependency( + Insn::Lt { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: condition_metadata.jump_target_when_true, + }, + condition_metadata.jump_target_when_true, + ) + } else { + program.emit_insn_with_label_dependency( + Insn::Ge { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: condition_metadata.jump_target_when_false, + }, + condition_metadata.jump_target_when_false, + ) + } + } + ast::Operator::LessEquals => { + if condition_metadata.jump_if_condition_is_true { + program.emit_insn_with_label_dependency( + Insn::Le { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: condition_metadata.jump_target_when_true, + }, + condition_metadata.jump_target_when_true, + ) + } else { + program.emit_insn_with_label_dependency( + Insn::Gt { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: condition_metadata.jump_target_when_false, + }, + condition_metadata.jump_target_when_false, + ) + } + } + ast::Operator::Equals => { + if condition_metadata.jump_if_condition_is_true { + program.emit_insn_with_label_dependency( + Insn::Eq { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: condition_metadata.jump_target_when_true, + }, + condition_metadata.jump_target_when_true, + ) + } else { + program.emit_insn_with_label_dependency( + Insn::Ne { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: condition_metadata.jump_target_when_false, + }, + condition_metadata.jump_target_when_false, + ) + } + } + ast::Operator::NotEquals => { + if condition_metadata.jump_if_condition_is_true { + program.emit_insn_with_label_dependency( + Insn::Ne { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: condition_metadata.jump_target_when_true, + }, + condition_metadata.jump_target_when_true, + ) + } else { + program.emit_insn_with_label_dependency( + Insn::Eq { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: condition_metadata.jump_target_when_false, + }, + condition_metadata.jump_target_when_false, + ) + } + } + ast::Operator::Is => todo!(), + ast::Operator::IsNot => todo!(), + _ => { + todo!("op {:?} not implemented", op); + } + } + } + ast::Expr::Literal(lit) => match lit { + ast::Literal::Numeric(val) => { + let maybe_int = val.parse::(); + if let Ok(int_value) = maybe_int { + let reg = program.alloc_register(); + program.emit_insn(Insn::Integer { + value: int_value, + dest: reg, + }); + if condition_metadata.jump_if_condition_is_true { + program.emit_insn_with_label_dependency( + Insn::If { + reg, + target_pc: condition_metadata.jump_target_when_true, + null_reg: reg, + }, + condition_metadata.jump_target_when_true, + ) + } else { + program.emit_insn_with_label_dependency( + Insn::IfNot { + reg, + target_pc: condition_metadata.jump_target_when_false, + null_reg: reg, + }, + condition_metadata.jump_target_when_false, + ) + } + } else { + crate::bail_parse_error!("unsupported literal type in condition"); + } + } + ast::Literal::String(string) => { + let reg = program.alloc_register(); + program.emit_insn(Insn::String8 { + value: string.clone(), + dest: reg, + }); + if condition_metadata.jump_if_condition_is_true { + program.emit_insn_with_label_dependency( + Insn::If { + reg, + target_pc: condition_metadata.jump_target_when_true, + null_reg: reg, + }, + condition_metadata.jump_target_when_true, + ) + } else { + program.emit_insn_with_label_dependency( + Insn::IfNot { + reg, + target_pc: condition_metadata.jump_target_when_false, + null_reg: reg, + }, + condition_metadata.jump_target_when_false, + ) + } + } + unimpl => todo!("literal {:?} not implemented", unimpl), + }, + ast::Expr::InList { lhs, not, rhs } => { + // lhs is e.g. a column reference + // rhs is an Option> + // If rhs is None, it means the IN expression is always false, i.e. tbl.id IN (). + // If rhs is Some, it means the IN expression has a list of values to compare against, e.g. tbl.id IN (1, 2, 3). + // + // The IN expression is equivalent to a series of OR expressions. + // For example, `a IN (1, 2, 3)` is equivalent to `a = 1 OR a = 2 OR a = 3`. + // The NOT IN expression is equivalent to a series of AND expressions. + // For example, `a NOT IN (1, 2, 3)` is equivalent to `a != 1 AND a != 2 AND a != 3`. + // + // SQLite typically optimizes IN expressions to use a binary search on an ephemeral index if there are many values. + // For now we don't have the plumbing to do that, so we'll just emit a series of comparisons, + // which is what SQLite also does for small lists of values. + // TODO: Let's refactor this later to use a more efficient implementation conditionally based on the number of values. + + if rhs.is_none() { + // If rhs is None, IN expressions are always false and NOT IN expressions are always true. + if *not { + // On a trivially true NOT IN () expression we can only jump to the 'jump_target_when_true' label if 'jump_if_condition_is_true'; otherwise me must fall through. + // This is because in a more complex condition we might need to evaluate the rest of the condition. + // Note that we are already breaking up our WHERE clauses into a series of terms at "AND" boundaries, so right now we won't be running into cases where jumping on true would be incorrect, + // but once we have e.g. parenthesization and more complex conditions, not having this 'if' here would introduce a bug. + if condition_metadata.jump_if_condition_is_true { + program.emit_insn_with_label_dependency( + Insn::Goto { + target_pc: condition_metadata.jump_target_when_true, + }, + condition_metadata.jump_target_when_true, + ); + } + } else { + program.emit_insn_with_label_dependency( + Insn::Goto { + target_pc: condition_metadata.jump_target_when_false, + }, + condition_metadata.jump_target_when_false, + ); + } + return Ok(()); + } + + // The left hand side only needs to be evaluated once we have a list of values to compare against. + let lhs_reg = program.alloc_register(); + let _ = translate_expr(program, Some(referenced_tables), lhs, lhs_reg, cursor_hint)?; + + let rhs = rhs.as_ref().unwrap(); + + // The difference between a local jump and an "upper level" jump is that for example in this case: + // WHERE foo IN (1,2,3) OR bar = 5, + // we can immediately jump to the 'jump_target_when_true' label of the ENTIRE CONDITION if foo = 1, foo = 2, or foo = 3 without evaluating the bar = 5 condition. + // This is why in Binary-OR expressions we set jump_if_condition_is_true to true for the first condition. + // However, in this example: + // WHERE foo IN (1,2,3) AND bar = 5, + // we can't jump to the 'jump_target_when_true' label of the entire condition foo = 1, foo = 2, or foo = 3, because we still need to evaluate the bar = 5 condition later. + // This is why in that case we just jump over the rest of the IN conditions in this "local" branch which evaluates the IN condition. + let jump_target_when_true = if condition_metadata.jump_if_condition_is_true { + condition_metadata.jump_target_when_true + } else { + program.allocate_label() + }; + + if !*not { + // If it's an IN expression, we need to jump to the 'jump_target_when_true' label if any of the conditions are true. + for (i, expr) in rhs.iter().enumerate() { + let rhs_reg = program.alloc_register(); + let last_condition = i == rhs.len() - 1; + let _ = translate_expr( + program, + Some(referenced_tables), + expr, + rhs_reg, + cursor_hint, + )?; + // If this is not the last condition, we need to jump to the 'jump_target_when_true' label if the condition is true. + if !last_condition { + program.emit_insn_with_label_dependency( + Insn::Eq { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: jump_target_when_true, + }, + jump_target_when_true, + ); + } else { + // If this is the last condition, we need to jump to the 'jump_target_when_false' label if there is no match. + program.emit_insn_with_label_dependency( + Insn::Ne { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: condition_metadata.jump_target_when_false, + }, + condition_metadata.jump_target_when_false, + ); + } + } + // If we got here, then the last condition was a match, so we jump to the 'jump_target_when_true' label if 'jump_if_condition_is_true'. + // If not, we can just fall through without emitting an unnecessary instruction. + if condition_metadata.jump_if_condition_is_true { + program.emit_insn_with_label_dependency( + Insn::Goto { + target_pc: condition_metadata.jump_target_when_true, + }, + condition_metadata.jump_target_when_true, + ); + } + } else { + // If it's a NOT IN expression, we need to jump to the 'jump_target_when_false' label if any of the conditions are true. + for expr in rhs.iter() { + let rhs_reg = program.alloc_register(); + let _ = translate_expr( + program, + Some(referenced_tables), + expr, + rhs_reg, + cursor_hint, + )?; + program.emit_insn_with_label_dependency( + Insn::Eq { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: condition_metadata.jump_target_when_false, + }, + condition_metadata.jump_target_when_false, + ); + } + // If we got here, then none of the conditions were a match, so we jump to the 'jump_target_when_true' label if 'jump_if_condition_is_true'. + // If not, we can just fall through without emitting an unnecessary instruction. + if condition_metadata.jump_if_condition_is_true { + program.emit_insn_with_label_dependency( + Insn::Goto { + target_pc: condition_metadata.jump_target_when_true, + }, + condition_metadata.jump_target_when_true, + ); + } + } + + if !condition_metadata.jump_if_condition_is_true { + program.resolve_label(jump_target_when_true, program.offset()); + } + } + ast::Expr::Like { + lhs, + not, + op, + rhs, + escape: _, + } => { + let cur_reg = program.alloc_register(); + assert!(match rhs.as_ref() { + ast::Expr::Literal(_) => true, + _ => false, + }); + match op { + ast::LikeOperator::Like => { + let pattern_reg = program.alloc_register(); + let column_reg = program.alloc_register(); + // LIKE(pattern, column). We should translate the pattern first before the column + let _ = translate_expr( + program, + Some(referenced_tables), + rhs, + pattern_reg, + cursor_hint, + )?; + program.mark_last_insn_constant(); + let _ = translate_expr( + program, + Some(referenced_tables), + lhs, + column_reg, + cursor_hint, + )?; + program.emit_insn(Insn::Function { + func: crate::vdbe::Func::Scalar(ScalarFunc::Like), + start_reg: pattern_reg, + dest: cur_reg, + }); + } + ast::LikeOperator::Glob => todo!(), + ast::LikeOperator::Match => todo!(), + ast::LikeOperator::Regexp => todo!(), + } + if !*not { + if condition_metadata.jump_if_condition_is_true { + program.emit_insn_with_label_dependency( + Insn::If { + reg: cur_reg, + target_pc: condition_metadata.jump_target_when_true, + null_reg: cur_reg, + }, + condition_metadata.jump_target_when_true, + ); + } else { + program.emit_insn_with_label_dependency( + Insn::IfNot { + reg: cur_reg, + target_pc: condition_metadata.jump_target_when_false, + null_reg: cur_reg, + }, + condition_metadata.jump_target_when_false, + ); + } + } else { + if condition_metadata.jump_if_condition_is_true { + program.emit_insn_with_label_dependency( + Insn::IfNot { + reg: cur_reg, + target_pc: condition_metadata.jump_target_when_true, + null_reg: cur_reg, + }, + condition_metadata.jump_target_when_true, + ); + } else { + program.emit_insn_with_label_dependency( + Insn::If { + reg: cur_reg, + target_pc: condition_metadata.jump_target_when_false, + null_reg: cur_reg, + }, + condition_metadata.jump_target_when_false, + ); + } + } + } + _ => todo!("op {:?} not implemented", expr), + } + Ok(()) +} + pub fn translate_expr( program: &mut ProgramBuilder, - select: Option<&Select>, + referenced_tables: Option<&[(Rc, String)]>, expr: &ast::Expr, target_register: usize, cursor_hint: Option, @@ -23,8 +522,8 @@ pub fn translate_expr( ast::Expr::Binary(e1, op, e2) => { let e1_reg = program.alloc_register(); let e2_reg = program.alloc_register(); - let _ = translate_expr(program, select, e1, e1_reg, cursor_hint)?; - let _ = translate_expr(program, select, e2, e2_reg, cursor_hint)?; + let _ = translate_expr(program, referenced_tables, e1, e1_reg, cursor_hint)?; + let _ = translate_expr(program, referenced_tables, e2, e2_reg, cursor_hint)?; match op { ast::Operator::NotEquals => { @@ -136,6 +635,7 @@ pub fn translate_expr( Some(Func::Agg(_)) => { crate::bail_parse_error!("aggregation function in non-aggregation context") } + Some(Func::Json(j)) => match j { JsonFunc::JSON => { let args = if let Some(args) = args { @@ -153,7 +653,7 @@ pub fn translate_expr( ); }; let regs = program.alloc_register(); - translate_expr(program, select, &args[0], regs, cursor_hint)?; + translate_expr(program, referenced_tables, &args[0], regs, cursor_hint)?; program.emit_insn(Insn::Function { start_reg: regs, dest: target_register, @@ -201,7 +701,7 @@ pub fn translate_expr( for (index, arg) in args.iter().enumerate() { let reg = translate_expr( program, - select, + referenced_tables, arg, target_register, cursor_hint, @@ -286,9 +786,16 @@ pub fn translate_expr( }; for arg in args { let reg = program.alloc_register(); - let _ = translate_expr(program, select, arg, reg, cursor_hint)?; - if let ast::Expr::Literal(_) = arg { - program.mark_last_insn_constant() + let _ = translate_expr( + program, + referenced_tables, + arg, + reg, + cursor_hint, + )?; + match arg { + ast::Expr::Literal(_) => program.mark_last_insn_constant(), + _ => {} } } program.emit_insn(Insn::Function { @@ -319,7 +826,13 @@ pub fn translate_expr( }; let regs = program.alloc_register(); - translate_expr(program, select, &args[0], regs, cursor_hint)?; + translate_expr( + program, + referenced_tables, + &args[0], + regs, + cursor_hint, + )?; program.emit_insn(Insn::Function { start_reg: regs, dest: target_register, @@ -351,7 +864,7 @@ pub fn translate_expr( let arg_reg = program.alloc_register(); let _ = translate_expr( program, - select, + referenced_tables, &args[0], arg_reg, cursor_hint, @@ -360,9 +873,9 @@ pub fn translate_expr( } } program.emit_insn(Insn::Function { - start_reg, + start_reg: start_reg, dest: target_register, - func: crate::vdbe::Func::Scalar(srf), + func: crate::vdbe::Func::Scalar(ScalarFunc::Date), }); Ok(target_register) } @@ -409,7 +922,7 @@ pub fn translate_expr( let arg_reg = program.alloc_register(); let _ = translate_expr( program, - select, + referenced_tables, &args[0], arg_reg, cursor_hint, @@ -418,7 +931,7 @@ pub fn translate_expr( } } program.emit_insn(Insn::Function { - start_reg, + start_reg: start_reg, dest: target_register, func: crate::vdbe::Func::Scalar(ScalarFunc::Time), }); @@ -445,7 +958,7 @@ pub fn translate_expr( for arg in args.iter() { let reg = program.alloc_register(); - translate_expr(program, select, arg, reg, cursor_hint)?; + translate_expr(program, referenced_tables, arg, reg, cursor_hint)?; if let ast::Expr::Literal(_) = arg { program.mark_last_insn_constant(); } @@ -459,7 +972,7 @@ pub fn translate_expr( } ScalarFunc::Min => { let args = if let Some(args) = args { - if args.is_empty() { + if args.len() < 1 { crate::bail_parse_error!( "min function with less than one argument" ); @@ -470,22 +983,29 @@ pub fn translate_expr( }; for arg in args { let reg = program.alloc_register(); - let _ = translate_expr(program, select, arg, reg, cursor_hint)?; - if let ast::Expr::Literal(_) = arg { - program.mark_last_insn_constant() + let _ = translate_expr( + program, + referenced_tables, + arg, + reg, + cursor_hint, + )?; + match arg { + ast::Expr::Literal(_) => program.mark_last_insn_constant(), + _ => {} } } program.emit_insn(Insn::Function { start_reg: target_register + 1, dest: target_register, - func: crate::vdbe::Func::Scalar(srf), + func: crate::vdbe::Func::Scalar(ScalarFunc::Min), }); Ok(target_register) } ScalarFunc::Max => { let args = if let Some(args) = args { - if args.is_empty() { + if args.len() < 1 { crate::bail_parse_error!( "max function with less than one argument" ); @@ -496,16 +1016,23 @@ pub fn translate_expr( }; for arg in args { let reg = program.alloc_register(); - let _ = translate_expr(program, select, arg, reg, cursor_hint)?; - if let ast::Expr::Literal(_) = arg { - program.mark_last_insn_constant() + let _ = translate_expr( + program, + referenced_tables, + arg, + reg, + cursor_hint, + )?; + match arg { + ast::Expr::Literal(_) => program.mark_last_insn_constant(), + _ => {} } } program.emit_insn(Insn::Function { start_reg: target_register + 1, dest: target_register, - func: crate::vdbe::Func::Scalar(srf), + func: crate::vdbe::Func::Scalar(ScalarFunc::Max), }); Ok(target_register) } @@ -519,9 +1046,9 @@ pub fn translate_expr( ast::Expr::FunctionCallStar { .. } => todo!(), ast::Expr::Id(ident) => { // let (idx, col) = table.unwrap().get_column(&ident.0).unwrap(); - let (idx, col_type, cursor_id, is_rowid_alias) = - resolve_ident_table(program, &ident.0, select, cursor_hint)?; - if is_rowid_alias { + let (idx, col_type, cursor_id, is_primary_key) = + resolve_ident_table(program, &ident.0, referenced_tables, cursor_hint)?; + if is_primary_key { program.emit_insn(Insn::RowId { cursor_id, dest: target_register, @@ -581,8 +1108,13 @@ pub fn translate_expr( ast::Expr::NotNull(_) => todo!(), ast::Expr::Parenthesized(_) => todo!(), ast::Expr::Qualified(tbl, ident) => { - let (idx, col_type, cursor_id, is_primary_key) = - resolve_ident_qualified(program, &tbl.0, &ident.0, select.unwrap(), cursor_hint)?; + let (idx, col_type, cursor_id, is_primary_key) = resolve_ident_qualified( + program, + &tbl.0, + &ident.0, + referenced_tables.unwrap(), + cursor_hint, + )?; if is_primary_key { program.emit_insn(Insn::RowId { cursor_id, @@ -622,85 +1154,6 @@ pub fn translate_expr( } } -pub fn analyze_columns<'a>( - columns: &'a Vec, - joins: &Vec, -) -> Vec> { - let mut column_information_list = Vec::with_capacity(columns.len()); - for column in columns { - let mut info = ColumnInfo::new(column); - if let ast::ResultColumn::Star = column { - info.columns_to_allocate = 0; - for join in joins { - info.columns_to_allocate += join.table.columns().len(); - } - } else { - info.columns_to_allocate = 1; - analyze_column(column, &mut info); - } - column_information_list.push(info); - } - column_information_list -} - -/// Analyze a column expression. -/// -/// This function will walk all columns and find information about: -/// * Aggregation functions. -fn analyze_column<'a>(column: &'a ast::ResultColumn, column_info_out: &mut ColumnInfo<'a>) { - match column { - ast::ResultColumn::Expr(expr, _) => analyze_expr(expr, column_info_out), - ast::ResultColumn::Star => {} - ast::ResultColumn::TableStar(_) => {} - } -} - -pub fn analyze_expr<'a>(expr: &'a Expr, column_info_out: &mut ColumnInfo<'a>) { - match expr { - ast::Expr::FunctionCall { - name, - distinctness: _, - args, - filter_over: _, - order_by: _, - } => { - let args_count = if let Some(args) = args { args.len() } else { 0 }; - let func_type = - match Func::resolve_function(normalize_ident(name.0.as_str()).as_str(), args_count) - { - Ok(func) => Some(func), - Err(_) => None, - }; - if func_type.is_none() { - let args = args.as_ref().unwrap(); - if !args.is_empty() { - analyze_expr(args.first().unwrap(), column_info_out); - } - } else { - column_info_out.func = func_type; - // TODO(pere): use lifetimes for args? Arenas would be lovely here :( - column_info_out.args = args; - } - } - ast::Expr::FunctionCallStar { - name, - filter_over: _, - } => { - let func_type = - match Func::resolve_function(normalize_ident(name.0.as_str()).as_str(), 1) { - Ok(func) => Some(func), - Err(_) => None, - }; - if func_type.is_none() { - panic!("Function not found"); - } else { - column_info_out.func = func_type; - } - } - _ => {} - } -} - fn wrap_eval_jump_expr( program: &mut ProgramBuilder, insn: Insn, @@ -723,46 +1176,41 @@ pub fn resolve_ident_qualified( program: &ProgramBuilder, table_name: &String, ident: &String, - select: &Select, + referenced_tables: &[(Rc, String)], cursor_hint: Option, ) -> Result<(usize, Type, usize, bool)> { let ident = normalize_ident(ident); let table_name = normalize_ident(table_name); - for join in &select.src_tables { - match join.table { - Table::BTree(ref table) => { - if *join.identifier == table_name { - let res = table - .columns - .iter() - .enumerate() - .find(|(_, col)| col.name == *ident) - .map(|(idx, col)| (idx, col.ty, col.primary_key)); - let mut idx; - let mut col_type; - let mut is_primary_key; - if res.is_some() { - (idx, col_type, is_primary_key) = res.unwrap(); - // overwrite if cursor hint is provided - if let Some(cursor_hint) = cursor_hint { - let cols = &program.cursor_ref[cursor_hint].1; - if let Some(res) = cols.as_ref().and_then(|res| { - res.columns() - .iter() - .enumerate() - .find(|x| x.1.name == format!("{}.{}", table_name, ident)) - }) { - idx = res.0; - col_type = res.1.ty; - is_primary_key = res.1.primary_key; - } - } - let cursor_id = program.resolve_cursor_id(&join.identifier, cursor_hint); - return Ok((idx, col_type, cursor_id, is_primary_key)); + for (catalog_table, identifier) in referenced_tables.iter() { + if *identifier == table_name { + let res = catalog_table + .columns + .iter() + .enumerate() + .find(|(_, col)| col.name == *ident) + .map(|(idx, col)| (idx, col.ty, col.primary_key)); + let mut idx; + let mut col_type; + let mut is_primary_key; + if res.is_some() { + (idx, col_type, is_primary_key) = res.unwrap(); + // overwrite if cursor hint is provided + if let Some(cursor_hint) = cursor_hint { + let cols = &program.cursor_ref[cursor_hint].1; + if let Some(res) = cols.as_ref().and_then(|res| { + res.columns() + .iter() + .enumerate() + .find(|x| x.1.name == format!("{}.{}", table_name, ident)) + }) { + idx = res.0; + col_type = res.1.ty; + is_primary_key = res.1.primary_key; } } + let cursor_id = program.resolve_cursor_id(identifier, cursor_hint); + return Ok((idx, col_type, cursor_id, is_primary_key)); } - Table::Pseudo(_) => todo!(), } } crate::bail_parse_error!( @@ -775,44 +1223,39 @@ pub fn resolve_ident_qualified( pub fn resolve_ident_table( program: &ProgramBuilder, ident: &String, - select: Option<&Select>, + referenced_tables: Option<&[(Rc, String)]>, cursor_hint: Option, ) -> Result<(usize, Type, usize, bool)> { let ident = normalize_ident(ident); let mut found = Vec::new(); - for join in &select.unwrap().src_tables { - match join.table { - Table::BTree(ref table) => { - let res = table - .columns - .iter() - .enumerate() - .find(|(_, col)| col.name == *ident) - .map(|(idx, col)| (idx, col.ty, table.column_is_rowid_alias(col))); - let mut idx; - let mut col_type; - let mut is_rowid_alias; - if res.is_some() { - (idx, col_type, is_rowid_alias) = res.unwrap(); - // overwrite if cursor hint is provided - if let Some(cursor_hint) = cursor_hint { - let cols = &program.cursor_ref[cursor_hint].1; - if let Some(res) = cols.as_ref().and_then(|res| { - res.columns() - .iter() - .enumerate() - .find(|x| x.1.name == *ident) - }) { - idx = res.0; - col_type = res.1.ty; - is_rowid_alias = table.column_is_rowid_alias(res.1); - } - } - let cursor_id = program.resolve_cursor_id(&join.identifier, cursor_hint); - found.push((idx, col_type, cursor_id, is_rowid_alias)); + for (catalog_table, identifier) in referenced_tables.unwrap() { + let res = catalog_table + .columns + .iter() + .enumerate() + .find(|(_, col)| col.name == *ident) + .map(|(idx, col)| (idx, col.ty, col.primary_key)); + let mut idx; + let mut col_type; + let mut is_primary_key; + if res.is_some() { + (idx, col_type, is_primary_key) = res.unwrap(); + // overwrite if cursor hint is provided + if let Some(cursor_hint) = cursor_hint { + let cols = &program.cursor_ref[cursor_hint].1; + if let Some(res) = cols.as_ref().and_then(|res| { + res.columns() + .iter() + .enumerate() + .find(|x| x.1.name == *ident) + }) { + idx = res.0; + col_type = res.1.ty; + is_primary_key = res.1.primary_key; } } - Table::Pseudo(_) => todo!(), + let cursor_id = program.resolve_cursor_id(identifier, cursor_hint); + found.push((idx, col_type, cursor_id, is_primary_key)); } } if found.len() == 1 { @@ -832,3 +1275,246 @@ pub fn maybe_apply_affinity(col_type: Type, target_register: usize, program: &mu }) } } + +pub fn translate_aggregation( + program: &mut ProgramBuilder, + referenced_tables: &[(Rc, String)], + agg: &Aggregate, + target_register: usize, + cursor_hint: Option, +) -> Result { + let dest = match agg.func { + AggFunc::Avg => { + if agg.args.len() != 1 { + crate::bail_parse_error!("avg bad number of arguments"); + } + let expr = &agg.args[0]; + let expr_reg = program.alloc_register(); + let _ = translate_expr( + program, + Some(referenced_tables), + expr, + expr_reg, + cursor_hint, + )?; + program.emit_insn(Insn::AggStep { + acc_reg: target_register, + col: expr_reg, + delimiter: 0, + func: AggFunc::Avg, + }); + target_register + } + AggFunc::Count => { + let expr_reg = if agg.args.is_empty() { + program.alloc_register() + } else { + let expr = &agg.args[0]; + let expr_reg = program.alloc_register(); + let _ = translate_expr( + program, + Some(referenced_tables), + expr, + expr_reg, + cursor_hint, + ); + expr_reg + }; + program.emit_insn(Insn::AggStep { + acc_reg: target_register, + col: expr_reg, + delimiter: 0, + func: AggFunc::Count, + }); + target_register + } + AggFunc::GroupConcat => { + if agg.args.len() != 1 && agg.args.len() != 2 { + crate::bail_parse_error!("group_concat bad number of arguments"); + } + + let expr_reg = program.alloc_register(); + let delimiter_reg = program.alloc_register(); + + let expr = &agg.args[0]; + let delimiter_expr: ast::Expr; + + if agg.args.len() == 2 { + match &agg.args[1] { + ast::Expr::Id(ident) => { + if ident.0.starts_with('"') { + delimiter_expr = + ast::Expr::Literal(ast::Literal::String(ident.0.to_string())); + } else { + delimiter_expr = agg.args[1].clone(); + } + } + ast::Expr::Literal(ast::Literal::String(s)) => { + delimiter_expr = ast::Expr::Literal(ast::Literal::String(s.to_string())); + } + _ => crate::bail_parse_error!("Incorrect delimiter parameter"), + }; + } else { + delimiter_expr = ast::Expr::Literal(ast::Literal::String(String::from("\",\""))); + } + + translate_expr( + program, + Some(referenced_tables), + expr, + expr_reg, + cursor_hint, + )?; + translate_expr( + program, + Some(referenced_tables), + &delimiter_expr, + delimiter_reg, + cursor_hint, + )?; + + program.emit_insn(Insn::AggStep { + acc_reg: target_register, + col: expr_reg, + delimiter: delimiter_reg, + func: AggFunc::GroupConcat, + }); + + target_register + } + AggFunc::Max => { + if agg.args.len() != 1 { + crate::bail_parse_error!("max bad number of arguments"); + } + let expr = &agg.args[0]; + let expr_reg = program.alloc_register(); + let _ = translate_expr( + program, + Some(referenced_tables), + expr, + expr_reg, + cursor_hint, + ); + program.emit_insn(Insn::AggStep { + acc_reg: target_register, + col: expr_reg, + delimiter: 0, + func: AggFunc::Max, + }); + target_register + } + AggFunc::Min => { + if agg.args.len() != 1 { + crate::bail_parse_error!("min bad number of arguments"); + } + let expr = &agg.args[0]; + let expr_reg = program.alloc_register(); + let _ = translate_expr( + program, + Some(referenced_tables), + expr, + expr_reg, + cursor_hint, + ); + program.emit_insn(Insn::AggStep { + acc_reg: target_register, + col: expr_reg, + delimiter: 0, + func: AggFunc::Min, + }); + target_register + } + AggFunc::StringAgg => { + if agg.args.len() != 2 { + crate::bail_parse_error!("string_agg bad number of arguments"); + } + + let expr_reg = program.alloc_register(); + let delimiter_reg = program.alloc_register(); + + let expr = &agg.args[0]; + let delimiter_expr: ast::Expr; + + match &agg.args[1] { + ast::Expr::Id(ident) => { + if ident.0.starts_with('"') { + crate::bail_parse_error!("no such column: \",\" - should this be a string literal in single-quotes?"); + } else { + delimiter_expr = agg.args[1].clone(); + } + } + ast::Expr::Literal(ast::Literal::String(s)) => { + delimiter_expr = ast::Expr::Literal(ast::Literal::String(s.to_string())); + } + _ => crate::bail_parse_error!("Incorrect delimiter parameter"), + }; + + translate_expr( + program, + Some(referenced_tables), + expr, + expr_reg, + cursor_hint, + )?; + translate_expr( + program, + Some(referenced_tables), + &delimiter_expr, + delimiter_reg, + cursor_hint, + )?; + + program.emit_insn(Insn::AggStep { + acc_reg: target_register, + col: expr_reg, + delimiter: delimiter_reg, + func: AggFunc::StringAgg, + }); + + target_register + } + AggFunc::Sum => { + if agg.args.len() != 1 { + crate::bail_parse_error!("sum bad number of arguments"); + } + let expr = &agg.args[0]; + let expr_reg = program.alloc_register(); + let _ = translate_expr( + program, + Some(referenced_tables), + expr, + expr_reg, + cursor_hint, + )?; + program.emit_insn(Insn::AggStep { + acc_reg: target_register, + col: expr_reg, + delimiter: 0, + func: AggFunc::Sum, + }); + target_register + } + AggFunc::Total => { + if agg.args.len() != 1 { + crate::bail_parse_error!("total bad number of arguments"); + } + let expr = &agg.args[0]; + let expr_reg = program.alloc_register(); + let _ = translate_expr( + program, + Some(referenced_tables), + expr, + expr_reg, + cursor_hint, + )?; + program.emit_insn(Insn::AggStep { + acc_reg: target_register, + col: expr_reg, + delimiter: 0, + func: AggFunc::Total, + }); + target_register + } + }; + Ok(dest) +} diff --git a/core/translate/mod.rs b/core/translate/mod.rs index 5a21fee4c..3b7d9ea62 100644 --- a/core/translate/mod.rs +++ b/core/translate/mod.rs @@ -7,10 +7,13 @@ //! a SELECT statement will be translated into a sequence of instructions that //! will read rows from the database and filter them according to a WHERE clause. +pub(crate) mod emitter; pub(crate) mod expr; pub(crate) mod insert; +pub(crate) mod optimizer; +pub(crate) mod plan; +pub(crate) mod planner; pub(crate) mod select; -pub(crate) mod where_clause; use std::cell::RefCell; use std::rc::Rc; @@ -18,11 +21,10 @@ use std::rc::Rc; use crate::schema::Schema; use crate::storage::pager::Pager; use crate::storage::sqlite3_ondisk::{DatabaseHeader, MIN_PAGE_CACHE_SIZE}; -use crate::util::normalize_ident; use crate::vdbe::{builder::ProgramBuilder, Insn, Program}; use crate::{bail_parse_error, Result}; use insert::translate_insert; -use select::{prepare_select, translate_select}; +use select::translate_select; use sqlite3_parser::ast; /// Translate SQL statement into bytecode program. @@ -56,10 +58,7 @@ pub fn translate( ast::Stmt::Release(_) => bail_parse_error!("RELEASE not supported yet"), ast::Stmt::Rollback { .. } => bail_parse_error!("ROLLBACK not supported yet"), ast::Stmt::Savepoint(_) => bail_parse_error!("SAVEPOINT not supported yet"), - ast::Stmt::Select(select) => { - let select = prepare_select(schema, &select)?; - translate_select(select, database_header) - } + ast::Stmt::Select(select) => translate_select(schema, select, database_header), ast::Stmt::Update { .. } => bail_parse_error!("UPDATE not supported yet"), ast::Stmt::Vacuum(_, _) => bail_parse_error!("VACUUM not supported yet"), ast::Stmt::Insert { diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs new file mode 100644 index 000000000..31a9811ab --- /dev/null +++ b/core/translate/optimizer.rs @@ -0,0 +1,732 @@ +use std::rc::Rc; + +use sqlite3_parser::ast; + +use crate::{schema::BTreeTable, util::normalize_ident, Result}; + +use super::plan::{ + get_table_ref_bitmask_for_ast_expr, get_table_ref_bitmask_for_query_plan_node, Operator, Plan, +}; + +/** + * Make a few passes over the plan to optimize it. + */ +pub fn optimize_plan(mut select_plan: Plan) -> Result { + push_predicates(&mut select_plan.root_node, &select_plan.referenced_tables)?; + eliminate_constants(&mut select_plan.root_node)?; + use_indexes(&mut select_plan.root_node, &select_plan.referenced_tables)?; + Ok(select_plan) +} + +/** + * Use indexes where possible (currently just primary key lookups) + */ +fn use_indexes(node: &mut Operator, referenced_tables: &[(Rc, String)]) -> Result<()> { + match node { + Operator::Scan { + table, + predicates: filter, + table_identifier, + id, + } => { + if filter.is_none() { + return Ok(()); + } + + let fs = filter.as_mut().unwrap(); + let mut i = 0; + let mut maybe_rowid_predicate = None; + while i < fs.len() { + let f = fs[i].take_ownership(); + let table_index = referenced_tables + .iter() + .position(|(t, t_id)| Rc::ptr_eq(t, table) && t_id == table_identifier) + .unwrap(); + let (can_use, expr) = + try_extract_rowid_comparison_expression(f, table_index, referenced_tables)?; + if can_use { + maybe_rowid_predicate = Some(expr); + fs.remove(i); + break; + } else { + fs[i] = expr; + i += 1; + } + } + + if let Some(rowid_predicate) = maybe_rowid_predicate { + let predicates_owned = if fs.is_empty() { + None + } else { + Some(fs.drain(..).collect()) + }; + *node = Operator::SeekRowid { + table: table.clone(), + table_identifier: table_identifier.clone(), + rowid_predicate, + predicates: predicates_owned, + id: *id, + } + } + + return Ok(()); + } + Operator::Aggregate { source, .. } => { + use_indexes(source, referenced_tables)?; + return Ok(()); + } + Operator::Filter { source, .. } => { + use_indexes(source, referenced_tables)?; + return Ok(()); + } + Operator::SeekRowid { .. } => { + return Ok(()); + } + Operator::Limit { source, .. } => { + use_indexes(source, referenced_tables)?; + return Ok(()); + } + Operator::Join { left, right, .. } => { + use_indexes(left, referenced_tables)?; + use_indexes(right, referenced_tables)?; + return Ok(()); + } + Operator::Order { source, .. } => { + use_indexes(source, referenced_tables)?; + return Ok(()); + } + Operator::Projection { source, .. } => { + use_indexes(source, referenced_tables)?; + return Ok(()); + } + Operator::Nothing => { + return Ok(()); + } + } +} + +// removes predicates that are always true +// returns false if there is an impossible predicate that is always false +fn eliminate_constants(node: &mut Operator) -> Result { + match node { + Operator::Filter { + source, predicates, .. + } => { + let mut i = 0; + while i < predicates.len() { + let predicate = &predicates[i]; + if predicate.is_always_true()? { + predicates.remove(i); + } else if predicate.is_always_false()? { + return Ok(false); + } else { + i += 1; + } + } + + if predicates.is_empty() { + *node = source.take_ownership(); + eliminate_constants(node)?; + } else { + eliminate_constants(source)?; + } + + return Ok(true); + } + Operator::Join { + left, + right, + predicates, + outer, + .. + } => { + if !eliminate_constants(left)? { + return Ok(false); + } + if !eliminate_constants(right)? && !*outer { + return Ok(false); + } + + if predicates.is_none() { + return Ok(true); + } + + let predicates = predicates.as_mut().unwrap(); + + let mut i = 0; + while i < predicates.len() { + let predicate = &predicates[i]; + if predicate.is_always_true()? { + predicates.remove(i); + } else if predicate.is_always_false()? && !*outer { + return Ok(false); + } else { + i += 1; + } + } + + return Ok(true); + } + Operator::Aggregate { source, .. } => { + let ok = eliminate_constants(source)?; + if !ok { + *source = Box::new(Operator::Nothing); + } + return Ok(ok); + } + Operator::SeekRowid { + rowid_predicate, + predicates, + .. + } => { + if let Some(predicates) = predicates { + let mut i = 0; + while i < predicates.len() { + let predicate = &predicates[i]; + if predicate.is_always_true()? { + predicates.remove(i); + } else if predicate.is_always_false()? { + return Ok(false); + } else { + i += 1; + } + } + } + + if rowid_predicate.is_always_false()? { + return Ok(false); + } + + return Ok(true); + } + Operator::Limit { source, .. } => { + let ok = eliminate_constants(source)?; + if !ok { + *node = Operator::Nothing; + } + return Ok(ok); + } + Operator::Order { source, .. } => { + let ok = eliminate_constants(source)?; + if !ok { + *node = Operator::Nothing; + } + return Ok(true); + } + Operator::Projection { source, .. } => { + let ok = eliminate_constants(source)?; + if !ok { + *node = Operator::Nothing; + } + return Ok(ok); + } + Operator::Scan { predicates, .. } => { + if let Some(ps) = predicates { + let mut i = 0; + while i < ps.len() { + let predicate = &ps[i]; + if predicate.is_always_true()? { + ps.remove(i); + } else if predicate.is_always_false()? { + return Ok(false); + } else { + i += 1; + } + } + + if ps.is_empty() { + *predicates = None; + } + } + return Ok(true); + } + Operator::Nothing => return Ok(true), + } +} + +/** + Recursively pushes predicates down the tree, as far as possible. +*/ +fn push_predicates( + node: &mut Operator, + referenced_tables: &Vec<(Rc, String)>, +) -> Result<()> { + match node { + Operator::Filter { + source, predicates, .. + } => { + let mut i = 0; + while i < predicates.len() { + // try to push the predicate to the source + // if it succeeds, remove the predicate from the filter + let predicate_owned = predicates[i].take_ownership(); + let Some(predicate) = push_predicate(source, predicate_owned, referenced_tables)? + else { + predicates.remove(i); + continue; + }; + predicates[i] = predicate; + i += 1; + } + + if predicates.is_empty() { + *node = source.take_ownership(); + } + + return Ok(()); + } + Operator::Join { + left, + right, + predicates, + outer, + .. + } => { + push_predicates(left, referenced_tables)?; + push_predicates(right, referenced_tables)?; + + if predicates.is_none() { + return Ok(()); + } + + let predicates = predicates.as_mut().unwrap(); + + let mut i = 0; + while i < predicates.len() { + // try to push the predicate to the left side first, then to the right side + + // temporarily take ownership of the predicate + let predicate_owned = predicates[i].take_ownership(); + // left join predicates cant be pushed to the left side + let push_result = if *outer { + Some(predicate_owned) + } else { + push_predicate(left, predicate_owned, referenced_tables)? + }; + // if the predicate was pushed to a child, remove it from the list + let Some(predicate) = push_result else { + predicates.remove(i); + continue; + }; + // otherwise try to push it to the right side + // if it was pushed to the right side, remove it from the list + let Some(predicate) = push_predicate(right, predicate, referenced_tables)? else { + predicates.remove(i); + continue; + }; + // otherwise keep the predicate in the list + predicates[i] = predicate; + i += 1; + } + + return Ok(()); + } + Operator::Aggregate { source, .. } => { + push_predicates(source, referenced_tables)?; + + return Ok(()); + } + Operator::SeekRowid { .. } => { + return Ok(()); + } + Operator::Limit { source, .. } => { + push_predicates(source, referenced_tables)?; + return Ok(()); + } + Operator::Order { source, .. } => { + push_predicates(source, referenced_tables)?; + return Ok(()); + } + Operator::Projection { source, .. } => { + push_predicates(source, referenced_tables)?; + return Ok(()); + } + Operator::Scan { .. } => { + return Ok(()); + } + Operator::Nothing => { + return Ok(()); + } + } +} + +/** + Push a single predicate down the tree, as far as possible. + Returns Ok(None) if the predicate was pushed, otherwise returns itself as Ok(Some(predicate)) +*/ +fn push_predicate( + node: &mut Operator, + predicate: ast::Expr, + referenced_tables: &Vec<(Rc, String)>, +) -> Result> { + match node { + Operator::Scan { + predicates, + table_identifier, + .. + } => { + let table_index = referenced_tables + .iter() + .position(|(_, t_id)| t_id == table_identifier) + .unwrap(); + + let predicate_bitmask = + get_table_ref_bitmask_for_ast_expr(referenced_tables, &predicate)?; + + // the expression is allowed to refer to tables on its left, i.e. the righter bits in the mask + // e.g. if this table is 0010, and the table on its right in the join is 0100: + // if predicate_bitmask is 0011, the predicate can be pushed (refers to this table and the table on its left) + // if predicate_bitmask is 0001, the predicate can be pushed (refers to the table on its left) + // if predicate_bitmask is 0101, the predicate can't be pushed (refers to this table and a table on its right) + let next_table_on_the_right_in_join_bitmask = 1 << (table_index + 1); + if predicate_bitmask >= next_table_on_the_right_in_join_bitmask { + return Ok(Some(predicate)); + } + + if predicates.is_none() { + predicates.replace(vec![predicate]); + } else { + predicates.as_mut().unwrap().push(predicate); + } + + return Ok(None); + } + Operator::Filter { + source, + predicates: ps, + .. + } => { + let push_result = push_predicate(source, predicate, referenced_tables)?; + if push_result.is_none() { + return Ok(None); + } + + ps.push(push_result.unwrap()); + + return Ok(None); + } + Operator::Join { + left, + right, + predicates: join_on_preds, + outer, + .. + } => { + let push_result_left = push_predicate(left, predicate, referenced_tables)?; + if push_result_left.is_none() { + return Ok(None); + } + let push_result_right = + push_predicate(right, push_result_left.unwrap(), referenced_tables)?; + if push_result_right.is_none() { + return Ok(None); + } + + if *outer { + return Ok(Some(push_result_right.unwrap())); + } + + let pred = push_result_right.unwrap(); + + let table_refs_bitmask = get_table_ref_bitmask_for_ast_expr(referenced_tables, &pred)?; + + let left_bitmask = get_table_ref_bitmask_for_query_plan_node(referenced_tables, left)?; + let right_bitmask = + get_table_ref_bitmask_for_query_plan_node(referenced_tables, right)?; + + if table_refs_bitmask & left_bitmask == 0 || table_refs_bitmask & right_bitmask == 0 { + return Ok(Some(pred)); + } + + if join_on_preds.is_none() { + join_on_preds.replace(vec![pred]); + } else { + join_on_preds.as_mut().unwrap().push(pred); + } + + return Ok(None); + } + Operator::Aggregate { source, .. } => { + let push_result = push_predicate(source, predicate, referenced_tables)?; + if push_result.is_none() { + return Ok(None); + } + + return Ok(Some(push_result.unwrap())); + } + Operator::SeekRowid { .. } => { + return Ok(Some(predicate)); + } + Operator::Limit { source, .. } => { + let push_result = push_predicate(source, predicate, referenced_tables)?; + if push_result.is_none() { + return Ok(None); + } + + return Ok(Some(push_result.unwrap())); + } + Operator::Order { source, .. } => { + let push_result = push_predicate(source, predicate, referenced_tables)?; + if push_result.is_none() { + return Ok(None); + } + + return Ok(Some(push_result.unwrap())); + } + Operator::Projection { source, .. } => { + let push_result = push_predicate(source, predicate, referenced_tables)?; + if push_result.is_none() { + return Ok(None); + } + + return Ok(Some(push_result.unwrap())); + } + Operator::Nothing => { + return Ok(Some(predicate)); + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ConstantPredicate { + AlwaysTrue, + AlwaysFalse, +} + +/** + Helper trait for expressions that can be optimized + Implemented for ast::Expr +*/ +pub trait Optimizable { + // if the expression is a constant expression e.g. '1', returns the constant condition + fn check_constant(&self) -> Result>; + fn is_always_true(&self) -> Result { + Ok(self + .check_constant()? + .map_or(false, |c| c == ConstantPredicate::AlwaysTrue)) + } + fn is_always_false(&self) -> Result { + Ok(self + .check_constant()? + .map_or(false, |c| c == ConstantPredicate::AlwaysFalse)) + } + // if the expression is the primary key of a table, returns the index of the table + fn check_primary_key( + &self, + referenced_tables: &[(Rc, String)], + ) -> Result>; +} + +impl Optimizable for ast::Expr { + fn check_primary_key( + &self, + referenced_tables: &[(Rc, String)], + ) -> Result> { + match self { + ast::Expr::Id(ident) => { + let ident = normalize_ident(&ident.0); + let tables = referenced_tables + .iter() + .enumerate() + .filter_map(|(i, (t, _))| { + if t.get_column(&ident).map_or(false, |(_, c)| c.primary_key) { + Some(i) + } else { + None + } + }); + + let mut matches = 0; + let mut matching_tbl = None; + + for tbl in tables { + matching_tbl = Some(tbl); + matches += 1; + if matches > 1 { + crate::bail_parse_error!("ambiguous column name {}", ident) + } + } + + Ok(matching_tbl) + } + ast::Expr::Qualified(tbl, ident) => { + let tbl = normalize_ident(&tbl.0); + let ident = normalize_ident(&ident.0); + let table = referenced_tables.iter().enumerate().find(|(_, (t, t_id))| { + *t_id == tbl && t.get_column(&ident).map_or(false, |(_, c)| c.primary_key) + }); + + if table.is_none() { + return Ok(None); + } + + let table = table.unwrap(); + + Ok(Some(table.0)) + } + _ => Ok(None), + } + } + fn check_constant(&self) -> Result> { + match self { + ast::Expr::Literal(lit) => match lit { + ast::Literal::Null => Ok(Some(ConstantPredicate::AlwaysFalse)), + ast::Literal::Numeric(b) => { + if let Ok(int_value) = b.parse::() { + return Ok(Some(if int_value == 0 { + ConstantPredicate::AlwaysFalse + } else { + ConstantPredicate::AlwaysTrue + })); + } + if let Ok(float_value) = b.parse::() { + return Ok(Some(if float_value == 0.0 { + ConstantPredicate::AlwaysFalse + } else { + ConstantPredicate::AlwaysTrue + })); + } + + Ok(None) + } + ast::Literal::String(s) => { + let without_quotes = s.trim_matches('\''); + if let Ok(int_value) = without_quotes.parse::() { + return Ok(Some(if int_value == 0 { + ConstantPredicate::AlwaysFalse + } else { + ConstantPredicate::AlwaysTrue + })); + } + + if let Ok(float_value) = without_quotes.parse::() { + return Ok(Some(if float_value == 0.0 { + ConstantPredicate::AlwaysFalse + } else { + ConstantPredicate::AlwaysTrue + })); + } + + Ok(Some(ConstantPredicate::AlwaysFalse)) + } + _ => Ok(None), + }, + ast::Expr::Unary(op, expr) => { + if *op == ast::UnaryOperator::Not { + let trivial = expr.check_constant()?; + return Ok(trivial.map(|t| match t { + ConstantPredicate::AlwaysTrue => ConstantPredicate::AlwaysFalse, + ConstantPredicate::AlwaysFalse => ConstantPredicate::AlwaysTrue, + })); + } + + if *op == ast::UnaryOperator::Negative { + let trivial = expr.check_constant()?; + return Ok(trivial); + } + + Ok(None) + } + ast::Expr::InList { lhs: _, not, rhs } => { + if rhs.is_none() { + return Ok(Some(if *not { + ConstantPredicate::AlwaysTrue + } else { + ConstantPredicate::AlwaysFalse + })); + } + let rhs = rhs.as_ref().unwrap(); + if rhs.is_empty() { + return Ok(Some(if *not { + ConstantPredicate::AlwaysTrue + } else { + ConstantPredicate::AlwaysFalse + })); + } + + Ok(None) + } + ast::Expr::Binary(lhs, op, rhs) => { + let lhs_trivial = lhs.check_constant()?; + let rhs_trivial = rhs.check_constant()?; + match op { + ast::Operator::And => { + if lhs_trivial == Some(ConstantPredicate::AlwaysFalse) + || rhs_trivial == Some(ConstantPredicate::AlwaysFalse) + { + return Ok(Some(ConstantPredicate::AlwaysFalse)); + } + if lhs_trivial == Some(ConstantPredicate::AlwaysTrue) + && rhs_trivial == Some(ConstantPredicate::AlwaysTrue) + { + return Ok(Some(ConstantPredicate::AlwaysTrue)); + } + + Ok(None) + } + ast::Operator::Or => { + if lhs_trivial == Some(ConstantPredicate::AlwaysTrue) + || rhs_trivial == Some(ConstantPredicate::AlwaysTrue) + { + return Ok(Some(ConstantPredicate::AlwaysTrue)); + } + if lhs_trivial == Some(ConstantPredicate::AlwaysFalse) + && rhs_trivial == Some(ConstantPredicate::AlwaysFalse) + { + return Ok(Some(ConstantPredicate::AlwaysFalse)); + } + + Ok(None) + } + _ => Ok(None), + } + } + _ => Ok(None), + } + } +} + +pub fn try_extract_rowid_comparison_expression( + expr: ast::Expr, + table_index: usize, + referenced_tables: &[(Rc, String)], +) -> Result<(bool, ast::Expr)> { + match expr { + ast::Expr::Binary(lhs, ast::Operator::Equals, rhs) => { + if let Some(lhs_table_index) = lhs.check_primary_key(referenced_tables)? { + if lhs_table_index == table_index { + return Ok((true, *rhs)); + } + } + + if let Some(rhs_table_index) = rhs.check_primary_key(referenced_tables)? { + if rhs_table_index == table_index { + return Ok((true, *lhs)); + } + } + + Ok((false, ast::Expr::Binary(lhs, ast::Operator::Equals, rhs))) + } + _ => Ok((false, expr)), + } +} + +trait TakeOwnership { + fn take_ownership(&mut self) -> Self; +} + +impl TakeOwnership for ast::Expr { + fn take_ownership(&mut self) -> Self { + std::mem::replace(self, ast::Expr::Literal(ast::Literal::Null)) + } +} + +impl TakeOwnership for Operator { + fn take_ownership(&mut self) -> Self { + std::mem::replace(self, Operator::Nothing) + } +} + +fn replace_with(expr: &mut T, mut replacement: T) { + *expr = replacement.take_ownership(); +} diff --git a/core/translate/plan.rs b/core/translate/plan.rs new file mode 100644 index 000000000..75162e07c --- /dev/null +++ b/core/translate/plan.rs @@ -0,0 +1,469 @@ +use core::fmt; +use std::{ + fmt::{Display, Formatter}, + rc::Rc, +}; + +use sqlite3_parser::ast; + +use crate::{function::AggFunc, schema::BTreeTable, util::normalize_ident, Result}; + +pub struct Plan { + pub root_node: Operator, + pub referenced_tables: Vec<(Rc, String)>, +} + +impl Display for Plan { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.root_node) + } +} + +/** + An Operator is a Node in the query plan. + Operators form a tree structure, with each having zero or more children. + For example, a query like `SELECT t1.foo FROM t1 ORDER BY t1.foo LIMIT 1` would have the following structure: + Limit + Order + Project + Scan +*/ +#[derive(Clone, Debug)] +pub enum Operator { + Aggregate { + id: usize, + source: Box, + aggregates: Vec, + }, + Filter { + id: usize, + source: Box, + predicates: Vec, + }, + SeekRowid { + id: usize, + table: Rc, + table_identifier: String, + rowid_predicate: ast::Expr, + predicates: Option>, + }, + Limit { + id: usize, + source: Box, + limit: usize, + }, + Join { + id: usize, + left: Box, + right: Box, + predicates: Option>, + outer: bool, + }, + Order { + id: usize, + source: Box, + key: Vec<(ast::Expr, Direction)>, + }, + Projection { + id: usize, + source: Box, + expressions: Vec, + }, + Scan { + id: usize, + table: Rc, + table_identifier: String, + predicates: Option>, + }, + Nothing, +} + +#[derive(Clone, Debug)] +pub enum ProjectionColumn { + Column(ast::Expr), + Star, + TableStar(Rc, String), +} + +impl ProjectionColumn { + pub fn column_count(&self, referenced_tables: &[(Rc, String)]) -> usize { + match self { + ProjectionColumn::Column(_) => 1, + ProjectionColumn::Star => { + let mut count = 0; + for (table, _) in referenced_tables { + count += table.columns.len(); + } + count + } + ProjectionColumn::TableStar(table, _) => table.columns.len(), + } + } +} + +impl Operator { + pub fn column_count(&self, referenced_tables: &[(Rc, String)]) -> usize { + match self { + Operator::Aggregate { aggregates, .. } => aggregates.len(), + Operator::Filter { source, .. } => source.column_count(referenced_tables), + Operator::SeekRowid { table, .. } => table.columns.len(), + Operator::Limit { source, .. } => source.column_count(referenced_tables), + Operator::Join { left, right, .. } => { + left.column_count(referenced_tables) + right.column_count(referenced_tables) + } + Operator::Order { source, .. } => source.column_count(referenced_tables), + Operator::Projection { expressions, .. } => expressions + .iter() + .map(|e| e.column_count(referenced_tables)) + .sum(), + Operator::Scan { table, .. } => table.columns.len(), + Operator::Nothing => 0, + } + } + + pub fn column_names(&self) -> Vec { + match self { + Operator::Aggregate { .. } => { + todo!(); + } + Operator::Filter { source, .. } => source.column_names(), + Operator::SeekRowid { table, .. } => { + table.columns.iter().map(|c| c.name.clone()).collect() + } + Operator::Limit { source, .. } => source.column_names(), + Operator::Join { left, right, .. } => { + let mut names = left.column_names(); + names.extend(right.column_names()); + names + } + Operator::Order { source, .. } => source.column_names(), + Operator::Projection { expressions, .. } => expressions + .iter() + .map(|e| match e { + ProjectionColumn::Column(expr) => match expr { + ast::Expr::Id(ident) => ident.0.clone(), + ast::Expr::Qualified(tbl, ident) => format!("{}.{}", tbl.0, ident.0), + _ => "expr".to_string(), + }, + ProjectionColumn::Star => "*".to_string(), + ProjectionColumn::TableStar(_, tbl) => format!("{}.{}", tbl, "*"), + }) + .collect(), + Operator::Scan { table, .. } => table.columns.iter().map(|c| c.name.clone()).collect(), + Operator::Nothing => vec![], + } + } + + pub fn id(&self) -> usize { + match self { + Operator::Aggregate { id, .. } => *id, + Operator::Filter { id, .. } => *id, + Operator::SeekRowid { id, .. } => *id, + Operator::Limit { id, .. } => *id, + Operator::Join { id, .. } => *id, + Operator::Order { id, .. } => *id, + Operator::Projection { id, .. } => *id, + Operator::Scan { id, .. } => *id, + Operator::Nothing => unreachable!(), + } + } +} + +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum Direction { + Ascending, + Descending, +} + +impl Display for Direction { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + match self { + Direction::Ascending => write!(f, "ASC"), + Direction::Descending => write!(f, "DESC"), + } + } +} + +#[derive(Clone, Debug, PartialEq)] +pub struct Aggregate { + pub func: AggFunc, + pub args: Vec, +} + +impl Display for Aggregate { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + let args_str = self + .args + .iter() + .map(|arg| arg.to_string()) + .collect::>() + .join(", "); + write!(f, "{:?}({})", self.func, args_str) + } +} + +// For EXPLAIN QUERY PLAN +impl Display for Operator { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + fn fmt_node(node: &Operator, f: &mut Formatter, level: usize) -> fmt::Result { + let indent = " ".repeat(level); + match node { + Operator::Aggregate { + source, aggregates, .. + } => { + // e.g. Aggregate count(*), sum(x) + let aggregates_display_string = aggregates + .iter() + .map(|agg| agg.to_string()) + .collect::>() + .join(", "); + writeln!(f, "{}AGGREGATE {}", indent, aggregates_display_string)?; + fmt_node(source, f, level + 1) + } + Operator::Filter { + source, predicates, .. + } => { + let predicates_string = predicates + .iter() + .map(|p| p.to_string()) + .collect::>() + .join(" AND "); + writeln!(f, "{}FILTER {}", indent, predicates_string)?; + fmt_node(source, f, level + 1) + } + Operator::SeekRowid { + table, + rowid_predicate, + predicates, + .. + } => { + match predicates { + Some(ps) => { + let predicates_string = ps + .iter() + .map(|p| p.to_string()) + .collect::>() + .join(" AND "); + writeln!( + f, + "{}SEEK {}.rowid ON rowid={} FILTER {}", + indent, &table.name, rowid_predicate, predicates_string + )?; + } + None => writeln!( + f, + "{}SEEK {}.rowid ON rowid={}", + indent, &table.name, rowid_predicate + )?, + } + + Ok(()) + } + Operator::Limit { source, limit, .. } => { + writeln!(f, "{}TAKE {}", indent, limit)?; + fmt_node(source, f, level + 1) + } + Operator::Join { + left, + right, + predicates, + outer, + .. + } => { + let join_name = if *outer { "OUTER JOIN" } else { "JOIN" }; + match predicates + .as_ref() + .and_then(|ps| if ps.is_empty() { None } else { Some(ps) }) + { + Some(ps) => { + let predicates_string = ps + .iter() + .map(|p| p.to_string()) + .collect::>() + .join(" AND "); + writeln!(f, "{}{} ON {}", indent, join_name, predicates_string)?; + } + None => writeln!(f, "{}{}", indent, join_name)?, + } + fmt_node(left, f, level + 1)?; + fmt_node(right, f, level + 1) + } + Operator::Order { source, key, .. } => { + let sort_keys_string = key + .iter() + .map(|(expr, dir)| format!("{} {}", expr, dir)) + .collect::>() + .join(", "); + writeln!(f, "{}SORT {}", indent, sort_keys_string)?; + fmt_node(source, f, level + 1) + } + Operator::Projection { + source, + expressions, + .. + } => { + let expressions = expressions + .iter() + .map(|expr| match expr { + ProjectionColumn::Column(c) => c.to_string(), + ProjectionColumn::Star => "*".to_string(), + ProjectionColumn::TableStar(_, a) => format!("{}.{}", a, "*"), + }) + .collect::>() + .join(", "); + writeln!(f, "{}PROJECT {}", indent, expressions)?; + fmt_node(source, f, level + 1) + } + Operator::Scan { + table, + predicates: filter, + table_identifier, + .. + } => { + let table_name = format!("{} AS {}", &table.name, &table_identifier); + let filter_string = filter.as_ref().map(|f| { + let filters_string = f + .iter() + .map(|p| p.to_string()) + .collect::>() + .join(" AND "); + format!("FILTER {}", filters_string) + }); + match filter_string { + Some(fs) => writeln!(f, "{}SCAN {} {}", indent, table_name, fs), + None => writeln!(f, "{}SCAN {}", indent, table_name), + }?; + Ok(()) + } + Operator::Nothing => Ok(()), + } + } + fmt_node(self, f, 0) + } +} + +pub fn get_table_ref_bitmask_for_query_plan_node<'a>( + tables: &'a Vec<(Rc, String)>, + node: &'a Operator, +) -> Result { + let mut table_refs_mask = 0; + match node { + Operator::Aggregate { source, .. } => { + table_refs_mask |= get_table_ref_bitmask_for_query_plan_node(tables, source)?; + } + Operator::Filter { + source, predicates, .. + } => { + table_refs_mask |= get_table_ref_bitmask_for_query_plan_node(tables, source)?; + for predicate in predicates { + table_refs_mask |= get_table_ref_bitmask_for_ast_expr(tables, predicate)?; + } + } + Operator::SeekRowid { table, .. } => { + table_refs_mask |= 1 + << tables + .iter() + .position(|(t, _)| Rc::ptr_eq(t, table)) + .unwrap(); + } + Operator::Limit { source, .. } => { + table_refs_mask |= get_table_ref_bitmask_for_query_plan_node(tables, source)?; + } + Operator::Join { left, right, .. } => { + table_refs_mask |= get_table_ref_bitmask_for_query_plan_node(tables, left)?; + table_refs_mask |= get_table_ref_bitmask_for_query_plan_node(tables, right)?; + } + Operator::Order { source, .. } => { + table_refs_mask |= get_table_ref_bitmask_for_query_plan_node(tables, source)?; + } + Operator::Projection { source, .. } => { + table_refs_mask |= get_table_ref_bitmask_for_query_plan_node(tables, source)?; + } + Operator::Scan { table, .. } => { + table_refs_mask |= 1 + << tables + .iter() + .position(|(t, _)| Rc::ptr_eq(t, table)) + .unwrap(); + } + Operator::Nothing => {} + } + Ok(table_refs_mask) +} + +pub fn get_table_ref_bitmask_for_ast_expr<'a>( + tables: &'a Vec<(Rc, String)>, + predicate: &'a ast::Expr, +) -> Result { + let mut table_refs_mask = 0; + match predicate { + ast::Expr::Binary(e1, _, e2) => { + table_refs_mask |= get_table_ref_bitmask_for_ast_expr(tables, e1)?; + table_refs_mask |= get_table_ref_bitmask_for_ast_expr(tables, e2)?; + } + ast::Expr::Id(ident) => { + let ident = normalize_ident(&ident.0); + let matching_tables = tables + .iter() + .enumerate() + .filter(|(_, (table, _))| table.get_column(&ident).is_some()); + + let mut matches = 0; + let mut matching_tbl = None; + for table in matching_tables { + matching_tbl = Some(table); + matches += 1; + if matches > 1 { + crate::bail_parse_error!("ambiguous column name {}", &ident) + } + } + + if let Some((tbl_index, _)) = matching_tbl { + table_refs_mask |= 1 << tbl_index; + } else { + crate::bail_parse_error!("column not found: {}", &ident) + } + } + ast::Expr::Qualified(tbl, ident) => { + let tbl = normalize_ident(&tbl.0); + let ident = normalize_ident(&ident.0); + let matching_table = tables + .iter() + .enumerate() + .find(|(_, (table, t_id))| *t_id == tbl); + + if matching_table.is_none() { + crate::bail_parse_error!("introspect: table not found: {}", &tbl) + } + let matching_table = matching_table.unwrap(); + if matching_table.1 .0.get_column(&ident).is_none() { + crate::bail_parse_error!("column with qualified name {}.{} not found", &tbl, &ident) + } + + table_refs_mask |= 1 << matching_table.0; + } + ast::Expr::Literal(_) => {} + ast::Expr::Like { lhs, rhs, .. } => { + table_refs_mask |= get_table_ref_bitmask_for_ast_expr(tables, lhs)?; + table_refs_mask |= get_table_ref_bitmask_for_ast_expr(tables, rhs)?; + } + ast::Expr::FunctionCall { + args: Some(args), .. + } => { + for arg in args { + table_refs_mask |= get_table_ref_bitmask_for_ast_expr(tables, arg)?; + } + } + ast::Expr::InList { lhs, rhs, .. } => { + table_refs_mask |= get_table_ref_bitmask_for_ast_expr(tables, lhs)?; + if let Some(rhs_list) = rhs { + for rhs_expr in rhs_list { + table_refs_mask |= get_table_ref_bitmask_for_ast_expr(tables, rhs_expr)?; + } + } + } + _ => {} + } + + Ok(table_refs_mask) +} diff --git a/core/translate/planner.rs b/core/translate/planner.rs new file mode 100644 index 000000000..9e494aca0 --- /dev/null +++ b/core/translate/planner.rs @@ -0,0 +1,359 @@ +use super::plan::{Aggregate, Direction, Operator, Plan, ProjectionColumn}; +use crate::{ + function::Func, + schema::{BTreeTable, Schema}, + util::normalize_ident, + Result, +}; +use sqlite3_parser::ast::{self, FromClause, JoinType, ResultColumn}; +use std::rc::Rc; + +pub struct NodeIdCounter { + id: usize, +} + +impl NodeIdCounter { + pub fn new() -> Self { + Self { id: 0 } + } + pub fn get_next_id(&mut self) -> usize { + let id = self.id; + self.id += 1; + id + } +} + +pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result { + match select.body.select { + ast::OneSelect::Select { + columns, + from, + where_clause, + .. + } => { + let col_count = columns.len(); + if col_count == 0 { + crate::bail_parse_error!("SELECT without columns is not allowed"); + } + + let mut node_id_counter = NodeIdCounter::new(); + + // Parse the FROM clause + let (mut node, referenced_tables) = parse_from(schema, from, &mut node_id_counter)?; + + // Parse the WHERE clause + if let Some(w) = where_clause { + node = Operator::Filter { + source: Box::new(node), + predicates: break_predicate_at_and_boundaries(w, vec![]), + id: node_id_counter.get_next_id(), + }; + } + + // Parse the SELECT clause to either a projection or an aggregation + // depending on the presence of aggregate functions. + // Since GROUP BY is not supported yet, mixing aggregate and non-aggregate + // columns is not allowed. + // + // If there are no aggregate functions, we can simply project the columns. + // For a simple SELECT *, the projection node is skipped. + let is_select_star = col_count == 1 && matches!(columns[0], ast::ResultColumn::Star); + if !is_select_star { + let mut aggregate_expressions = Vec::new(); + let mut scalar_expressions = Vec::with_capacity(col_count); + for column in columns.clone() { + match column { + ast::ResultColumn::Star => { + scalar_expressions.push(ProjectionColumn::Star); + } + ast::ResultColumn::TableStar(name) => { + let name_normalized = normalize_ident(name.0.as_str()); + let referenced_table = referenced_tables + .iter() + .find(|(t, t_id)| *t_id == name_normalized); + + if referenced_table.is_none() { + crate::bail_parse_error!("Table {} not found", name.0); + } + let (table, identifier) = referenced_table.unwrap(); + scalar_expressions.push(ProjectionColumn::TableStar( + table.clone(), + identifier.clone(), + )); + } + ast::ResultColumn::Expr(expr, _) => match expr { + ast::Expr::FunctionCall { + name, + distinctness, + args, + filter_over, + order_by, + } => { + let args_count = if let Some(args) = &args { + args.len() + } else { + 0 + }; + match Func::resolve_function( + normalize_ident(name.0.as_str()).as_str(), + args_count, + ) { + Ok(Func::Agg(f)) => aggregate_expressions.push(Aggregate { + func: f, + args: args.unwrap(), + }), + Ok(_) => { + scalar_expressions.push(ProjectionColumn::Column( + ast::Expr::FunctionCall { + name, + distinctness, + args, + filter_over, + order_by, + }, + )); + } + _ => {} + } + } + ast::Expr::FunctionCallStar { name, filter_over } => { + match Func::resolve_function( + normalize_ident(name.0.as_str()).as_str(), + 0, + ) { + Ok(Func::Agg(f)) => aggregate_expressions.push(Aggregate { + func: f, + args: vec![], + }), + Ok(Func::Scalar(_)) => { + scalar_expressions.push(ProjectionColumn::Column( + ast::Expr::FunctionCallStar { name, filter_over }, + )); + } + _ => {} + } + } + _ => { + scalar_expressions.push(ProjectionColumn::Column(expr)); + } + }, + } + } + + let mixing_aggregate_and_non_aggregate_columns = + !aggregate_expressions.is_empty() && aggregate_expressions.len() != col_count; + + if mixing_aggregate_and_non_aggregate_columns { + crate::bail_parse_error!( + "mixing aggregate and non-aggregate columns is not allowed (GROUP BY is not supported)" + ); + } + if !aggregate_expressions.is_empty() { + node = Operator::Aggregate { + source: Box::new(node), + aggregates: aggregate_expressions, + id: node_id_counter.get_next_id(), + } + } else if !scalar_expressions.is_empty() { + node = Operator::Projection { + source: Box::new(node), + expressions: scalar_expressions, + id: node_id_counter.get_next_id(), + }; + } + } + + // Parse the ORDER BY clause + if let Some(order_by) = select.order_by { + let mut key = Vec::new(); + for o in order_by { + // if the ORDER BY expression is a number, interpret it as an 1-indexed column number + // otherwise, interpret it normally as an expression + let expr = if let ast::Expr::Literal(ast::Literal::Numeric(num)) = o.expr { + let column_number = num.parse::()?; + if column_number == 0 { + crate::bail_parse_error!("invalid column index: {}", column_number); + } + let maybe_result_column = columns.get(column_number - 1); + match maybe_result_column { + Some(ResultColumn::Expr(expr, _)) => expr.clone(), + None => { + crate::bail_parse_error!("invalid column index: {}", column_number) + } + _ => todo!(), + } + } else { + o.expr + }; + key.push(( + expr, + o.order.map_or(Direction::Ascending, |o| match o { + ast::SortOrder::Asc => Direction::Ascending, + ast::SortOrder::Desc => Direction::Descending, + }), + )); + } + node = Operator::Order { + source: Box::new(node), + key, + id: node_id_counter.get_next_id(), + }; + } + + // Parse the LIMIT clause + if let Some(limit) = &select.limit { + node = match &limit.expr { + ast::Expr::Literal(ast::Literal::Numeric(n)) => { + let l = n.parse()?; + if l == 0 { + Operator::Nothing + } else { + Operator::Limit { + source: Box::new(node), + limit: l, + id: node_id_counter.get_next_id(), + } + } + } + _ => todo!(), + } + } + + // Return the unoptimized query plan + return Ok(Plan { + root_node: node, + referenced_tables, + }); + } + _ => todo!(), + }; +} + +fn parse_from( + schema: &Schema, + from: Option, + node_id_counter: &mut NodeIdCounter, +) -> Result<(Operator, Vec<(Rc, String)>)> { + if from.as_ref().and_then(|f| f.select.as_ref()).is_none() { + return Ok((Operator::Nothing, vec![])); + } + + let from = from.unwrap(); + + let first_table = match *from.select.unwrap() { + ast::SelectTable::Table(qualified_name, maybe_alias, _) => { + let Some(table) = schema.get_table(&qualified_name.name.0) else { + crate::bail_parse_error!("Table {} not found", qualified_name.name.0); + }; + let alias = maybe_alias + .map(|a| match a { + ast::As::As(id) => id, + ast::As::Elided(id) => id, + }) + .map(|a| a.0); + + (table, alias.unwrap_or(qualified_name.name.0)) + } + _ => todo!(), + }; + + let mut node = Operator::Scan { + table: first_table.0.clone(), + predicates: None, + table_identifier: first_table.1.clone(), + id: node_id_counter.get_next_id(), + }; + + let mut tables = vec![first_table]; + + for join in from.joins.unwrap_or_default().into_iter() { + let (right, outer, predicates) = parse_join(schema, join, node_id_counter, &mut tables)?; + node = Operator::Join { + left: Box::new(node), + right: Box::new(right), + predicates, + outer, + id: node_id_counter.get_next_id(), + } + } + + return Ok((node, tables)); +} + +fn parse_join( + schema: &Schema, + join: ast::JoinedSelectTable, + node_id_counter: &mut NodeIdCounter, + tables: &mut Vec<(Rc, String)>, +) -> Result<(Operator, bool, Option>)> { + let ast::JoinedSelectTable { + operator, + table, + constraint, + } = join; + + let table = match table { + ast::SelectTable::Table(qualified_name, maybe_alias, _) => { + let Some(table) = schema.get_table(&qualified_name.name.0) else { + crate::bail_parse_error!("Table {} not found", qualified_name.name.0); + }; + let alias = maybe_alias + .map(|a| match a { + ast::As::As(id) => id, + ast::As::Elided(id) => id, + }) + .map(|a| a.0); + + (table, alias.unwrap_or(qualified_name.name.0)) + } + _ => todo!(), + }; + + tables.push(table.clone()); + + let outer = match operator { + ast::JoinOperator::TypedJoin(Some(join_type)) => { + if join_type == JoinType::LEFT | JoinType::OUTER { + true + } else if join_type == JoinType::RIGHT | JoinType::OUTER { + true + } else { + false + } + } + _ => false, + }; + + let predicates = constraint.map(|c| match c { + ast::JoinConstraint::On(expr) => break_predicate_at_and_boundaries(expr, vec![]), + ast::JoinConstraint::Using(_) => todo!("USING joins not supported yet"), + }); + + Ok(( + Operator::Scan { + table: table.0.clone(), + predicates: None, + table_identifier: table.1.clone(), + id: node_id_counter.get_next_id(), + }, + outer, + predicates, + )) +} + +fn break_predicate_at_and_boundaries( + predicate: ast::Expr, + mut predicates: Vec, +) -> Vec { + match predicate { + ast::Expr::Binary(left, ast::Operator::And, right) => { + let ps = break_predicate_at_and_boundaries(*left, predicates); + let ps = break_predicate_at_and_boundaries(*right, ps); + ps + } + _ => { + predicates.push(predicate); + predicates + } + } +} diff --git a/core/translate/select.rs b/core/translate/select.rs index 7321c0584..d8bcecfe5 100644 --- a/core/translate/select.rs +++ b/core/translate/select.rs @@ -1,1055 +1,19 @@ -use crate::function::{AggFunc, Func}; -use crate::schema::{Column, PseudoTable, Schema, Table}; +use std::{cell::RefCell, rc::Rc}; + use crate::storage::sqlite3_ondisk::DatabaseHeader; -use crate::translate::expr::{analyze_columns, maybe_apply_affinity, translate_expr}; -use crate::translate::where_clause::{ - process_where, translate_processed_where, translate_tableless_where, ProcessedWhereClause, - SeekRowid, WhereExpr, -}; -use crate::translate::{normalize_ident, Insn}; -use crate::types::{OwnedRecord, OwnedValue}; -use crate::vdbe::{builder::ProgramBuilder, BranchOffset, Program}; -use crate::Result; +use crate::{schema::Schema, vdbe::Program, Result}; +use sqlite3_parser::ast; -use sqlite3_parser::ast::{self, JoinOperator, JoinType, ResultColumn}; +use super::emitter::emit_program; +use super::optimizer::optimize_plan; +use super::planner::prepare_select_plan; -use std::cell::RefCell; -use std::rc::Rc; - -/// A representation of a `SELECT` statement that has all the information -/// needed for code generation. -pub struct Select<'a> { - /// Information about each column. - pub column_info: Vec>, - /// The tables we are retrieving data from, including tables mentioned - /// in `FROM` and `JOIN` clauses. - pub src_tables: Vec>, - /// The `LIMIT` clause. - pub limit: &'a Option, - /// The `ORDER BY` clause. - pub order_by: &'a Option>, - /// Whether the query contains an aggregation function. - pub exist_aggregation: bool, - /// The `WHERE` clause. - pub where_clause: &'a Option, -} - -#[derive(Debug)] -pub struct SrcTable<'a> { - pub table: Table, - pub identifier: String, - pub join_info: Option<&'a ast::JoinedSelectTable>, -} - -impl SrcTable<'_> { - pub fn is_outer_join(&self) -> bool { - if let Some(ast::JoinedSelectTable { - operator: JoinOperator::TypedJoin(Some(join_type)), - .. - }) = self.join_info - { - if *join_type == JoinType::LEFT | JoinType::OUTER { - true - } else { - *join_type == JoinType::RIGHT | JoinType::OUTER - } - } else { - false - } - } -} - -#[derive(Debug)] -pub struct ColumnInfo<'a> { - pub raw_column: &'a ast::ResultColumn, - pub func: Option, - pub args: &'a Option>, - pub columns_to_allocate: usize, /* number of result columns this col will result on */ -} - -impl<'a> ColumnInfo<'a> { - pub fn new(raw_column: &'a ast::ResultColumn) -> Self { - Self { - raw_column, - func: None, - args: &None, - columns_to_allocate: 1, - } - } - - pub fn is_aggregation_function(&self) -> bool { - matches!(self.func, Some(Func::Agg(_))) - } -} - -#[derive(Debug)] -pub struct LeftJoinBookkeeping { - // integer register that holds a flag that is set to true if the current row has a match for the left join - pub match_flag_register: usize, - // label for the instruction that sets the match flag to true - pub set_match_flag_true_label: BranchOffset, - // label for the instruction that checks if the match flag is true - pub check_match_flag_label: BranchOffset, - // label for the instruction where the program jumps to if the current row has a match for the left join - pub on_match_jump_to_label: BranchOffset, -} - -#[derive(Debug)] -pub enum Plan { - Scan, - SeekRowid, -} - -/// Represents a single loop in an ordered list of opened read table loops. -/// -/// The list is used to generate inner loops like this: -/// -/// cursor 0 = open table 0 -/// for each row in cursor 0 -/// cursor 1 = open table 1 -/// for each row in cursor 1 -/// ... -/// end cursor 1 -/// end cursor 0 -#[derive(Debug)] -pub struct LoopInfo { - // The table or table alias that we are looping over - pub identifier: String, - // The plan for this loop - pub plan: Plan, - // Metadata about a left join, if any - pub left_join_maybe: Option, - // The label for the instruction that reads the next row for this table - pub next_row_label: BranchOffset, - // The label for the instruction that rewinds the cursor for this table - pub rewind_label: BranchOffset, - // The label for the instruction that is jumped to in the Rewind instruction if the table is empty - pub rewind_on_empty_label: BranchOffset, - // The ID of the cursor that is opened for this table - pub open_cursor: usize, -} - -struct LimitInfo { - limit_reg: usize, - num: i64, - goto_label: BranchOffset, -} - -#[derive(Debug)] -struct SortInfo { - sorter_cursor: usize, - sorter_reg: usize, - count: usize, -} - -pub fn prepare_select<'a>(schema: &Schema, select: &'a ast::Select) -> Result> { - match &select.body.select { - ast::OneSelect::Select { - columns, - from: Some(from), - where_clause, - .. - } => { - let (table_name, maybe_alias) = match &from.select { - Some(select_table) => match select_table.as_ref() { - ast::SelectTable::Table(name, alias, ..) => ( - &name.name, - alias.as_ref().map(|als| match als { - ast::As::As(alias) => alias, // users as u - ast::As::Elided(alias) => alias, // users u - }), - ), - _ => todo!(), - }, - None => todo!(), - }; - let table_name = &table_name.0; - let maybe_alias = maybe_alias.map(|als| &als.0); - let table = match schema.get_table(table_name) { - Some(table) => table, - None => crate::bail_parse_error!("no such table: {}", table_name), - }; - let identifier = normalize_ident(maybe_alias.unwrap_or(table_name)); - let mut joins = Vec::new(); - joins.push(SrcTable { - table: Table::BTree(table.clone()), - identifier, - join_info: None, - }); - if let Some(selected_joins) = &from.joins { - for join in selected_joins { - let (table_name, maybe_alias) = match &join.table { - ast::SelectTable::Table(name, alias, ..) => ( - &name.name, - alias.as_ref().map(|als| match als { - ast::As::As(alias) => alias, // users as u - ast::As::Elided(alias) => alias, // users u - }), - ), - _ => todo!(), - }; - let table_name = &table_name.0; - let maybe_alias = maybe_alias.as_ref().map(|als| &als.0); - let table = match schema.get_table(table_name) { - Some(table) => table, - None => { - crate::bail_parse_error!("no such table: {}", table_name) - } - }; - let identifier = normalize_ident(maybe_alias.unwrap_or(table_name)); - - joins.push(SrcTable { - table: Table::BTree(table), - identifier, - join_info: Some(join), - }); - } - } - - let _table = Table::BTree(table); - let column_info = analyze_columns(columns, &joins); - let exist_aggregation = column_info - .iter() - .any(|info| info.is_aggregation_function()); - Ok(Select { - column_info, - src_tables: joins, - limit: &select.limit, - order_by: &select.order_by, - exist_aggregation, - where_clause, - }) - } - ast::OneSelect::Select { - columns, - from: None, - where_clause, - .. - } => { - let column_info = analyze_columns(columns, &Vec::new()); - let exist_aggregation = column_info - .iter() - .any(|info| info.is_aggregation_function()); - Ok(Select { - column_info, - src_tables: Vec::new(), - limit: &select.limit, - order_by: &select.order_by, - where_clause, - exist_aggregation, - }) - } - _ => todo!(), - } -} - -/// Generate code for a SELECT statement. pub fn translate_select( - mut select: Select, + schema: &Schema, + select: ast::Select, database_header: Rc>, ) -> Result { - let mut program = ProgramBuilder::new(); - let init_label = program.allocate_label(); - let early_terminate_label = program.allocate_label(); - program.emit_insn_with_label_dependency( - Insn::Init { - target_pc: init_label, - }, - init_label, - ); - let start_offset = program.offset(); - - let mut sort_info = if let Some(order_by) = select.order_by { - let sorter_cursor = program.alloc_cursor_id(None, None); - let mut order = Vec::new(); - for col in order_by { - order.push(OwnedValue::Integer(if let Some(ord) = col.order { - ord as i64 - } else { - 0 - })); - } - program.emit_insn(Insn::SorterOpen { - cursor_id: sorter_cursor, - order: OwnedRecord::new(order), - columns: select.column_info.len() + 1, // +1 for the key - }); - Some(SortInfo { - sorter_cursor, - sorter_reg: 0, // will be overwritten later - count: 0, // will be overwritten later - }) - } else { - None - }; - - let limit_info = if let Some(limit) = &select.limit { - assert!(limit.offset.is_none()); - let target_register = program.alloc_register(); - let limit_reg = translate_expr( - &mut program, - Some(&select), - &limit.expr, - target_register, - None, - )?; - let num = if let ast::Expr::Literal(ast::Literal::Numeric(num)) = &limit.expr { - num.parse::()? - } else { - todo!(); - }; - let goto_label = program.allocate_label(); - if num == 0 { - program.emit_insn_with_label_dependency( - Insn::Goto { - target_pc: goto_label, - }, - goto_label, - ); - } - Some(LimitInfo { - limit_reg, - num, - goto_label, - }) - } else { - None - }; - - if !select.src_tables.is_empty() { - let loops = translate_tables_begin(&mut program, &mut select, early_terminate_label)?; - - let (register_start, column_count) = if let Some(sort_columns) = select.order_by { - let start = program.next_free_register(); - for col in sort_columns.iter() { - let target = program.alloc_register(); - // if the ORDER BY expression is a number, interpret it as an 1-indexed column number - // otherwise, interpret it normally as an expression - let sort_col_expr = if let ast::Expr::Literal(ast::Literal::Numeric(num)) = - &col.expr - { - let column_number = num.parse::()?; - if column_number == 0 { - crate::bail_parse_error!("invalid column index: {}", column_number); - } - let maybe_result_column = select - .column_info - .get(column_number - 1) - .map(|col| &col.raw_column); - match maybe_result_column { - Some(ResultColumn::Expr(expr, _)) => expr, - None => crate::bail_parse_error!("invalid column index: {}", column_number), - _ => todo!(), - } - } else { - &col.expr - }; - translate_expr(&mut program, Some(&select), sort_col_expr, target, None)?; - } - let (_, result_cols_count) = translate_columns(&mut program, &select, None)?; - sort_info - .as_mut() - .map(|inner| inner.count = result_cols_count + sort_columns.len() + 1); // +1 for the key - (start, result_cols_count + sort_columns.len()) - } else { - translate_columns(&mut program, &select, None)? - }; - - if !select.exist_aggregation { - if let Some(ref mut sort_info) = sort_info { - let dest = program.alloc_register(); - program.emit_insn(Insn::MakeRecord { - start_reg: register_start, - count: column_count, - dest_reg: dest, - }); - program.emit_insn(Insn::SorterInsert { - cursor_id: sort_info.sorter_cursor, - record_reg: dest, - }); - sort_info.sorter_reg = register_start; - } else { - program.emit_insn(Insn::ResultRow { - start_reg: register_start, - count: column_count, - }); - emit_limit_insn(&limit_info, &mut program); - } - } - - translate_tables_end(&mut program, &loops); - - if select.exist_aggregation { - program.resolve_label(early_terminate_label, program.offset()); - let mut target = register_start; - for info in &select.column_info { - if let Some(Func::Agg(func)) = &info.func { - program.emit_insn(Insn::AggFinal { - register: target, - func: func.clone(), - }); - } - target += info.columns_to_allocate; - } - // only one result row - program.emit_insn(Insn::ResultRow { - start_reg: register_start, - count: column_count, - }); - emit_limit_insn(&limit_info, &mut program); - } - } else { - assert!(!select.exist_aggregation); - assert!(sort_info.is_none()); - let where_maybe = translate_tableless_where(&select, &mut program, early_terminate_label)?; - let (register_start, count) = translate_columns(&mut program, &select, None)?; - if let Some(where_clause_label) = where_maybe { - program.resolve_label(where_clause_label, program.offset() + 1); - } - program.emit_insn(Insn::ResultRow { - start_reg: register_start, - count, - }); - emit_limit_insn(&limit_info, &mut program); - }; - - // now do the sort for ORDER BY - if select.order_by.is_some() { - let _ = translate_sorter(&select, &mut program, &sort_info.unwrap(), &limit_info); - } - - if !select.exist_aggregation { - program.resolve_label(early_terminate_label, program.offset()); - } - program.emit_insn(Insn::Halt); - let halt_offset = program.offset() - 1; - if let Some(limit_info) = limit_info { - if limit_info.goto_label < 0 { - program.resolve_label(limit_info.goto_label, halt_offset); - } - } - program.resolve_label(init_label, program.offset()); - program.emit_insn(Insn::Transaction); - program.emit_constant_insns(); - program.emit_insn(Insn::Goto { - target_pc: start_offset, - }); - program.resolve_deferred_labels(); - Ok(program.build(database_header)) -} - -fn emit_limit_insn(limit_info: &Option, program: &mut ProgramBuilder) { - if limit_info.is_none() { - return; - } - let limit_info = limit_info.as_ref().unwrap(); - if limit_info.num > 0 { - program.emit_insn_with_label_dependency( - Insn::DecrJumpZero { - reg: limit_info.limit_reg, - target_pc: limit_info.goto_label, - }, - limit_info.goto_label, - ); - } -} - -fn translate_sorter( - select: &Select, - program: &mut ProgramBuilder, - sort_info: &SortInfo, - limit_info: &Option, -) -> Result<()> { - assert!(sort_info.count > 0); - let mut pseudo_columns = Vec::new(); - for col in select.column_info.iter() { - match col.raw_column { - ast::ResultColumn::Expr(expr, _) => match expr { - ast::Expr::Id(ident) => { - pseudo_columns.push(Column { - name: normalize_ident(&ident.0), - primary_key: false, - ty: crate::schema::Type::Null, - }); - } - ast::Expr::Qualified(table_name, ident) => { - pseudo_columns.push(Column { - name: normalize_ident(format!("{}.{}", table_name.0, ident.0).as_str()), - primary_key: false, - ty: crate::schema::Type::Null, - }); - } - other => { - todo!("translate_sorter: {:?}", other); - } - }, - ast::ResultColumn::Star => {} - ast::ResultColumn::TableStar(_) => {} - } - } - let pseudo_cursor = program.alloc_cursor_id( - None, - Some(Table::Pseudo(Rc::new(PseudoTable { - columns: pseudo_columns, - }))), - ); - let pseudo_content_reg = program.alloc_register(); - program.emit_insn(Insn::OpenPseudo { - cursor_id: pseudo_cursor, - content_reg: pseudo_content_reg, - num_fields: sort_info.count, - }); - let label = program.allocate_label(); - program.emit_insn_with_label_dependency( - Insn::SorterSort { - cursor_id: sort_info.sorter_cursor, - pc_if_empty: label, - }, - label, - ); - let sorter_data_offset = program.offset(); - program.emit_insn(Insn::SorterData { - cursor_id: sort_info.sorter_cursor, - dest_reg: pseudo_content_reg, - pseudo_cursor, - }); - let (register_start, count) = translate_columns(program, select, Some(pseudo_cursor))?; - program.emit_insn(Insn::ResultRow { - start_reg: register_start, - count, - }); - emit_limit_insn(limit_info, program); - program.emit_insn(Insn::SorterNext { - cursor_id: sort_info.sorter_cursor, - pc_if_next: sorter_data_offset, - }); - program.resolve_label(label, program.offset()); - Ok(()) -} - -fn translate_tables_begin( - program: &mut ProgramBuilder, - select: &mut Select, - early_terminate_label: BranchOffset, -) -> Result> { - let processed_where = process_where(select)?; - let mut loops = Vec::with_capacity(select.src_tables.len()); - for idx in &processed_where.loop_order { - let join = select - .src_tables - .get(*idx) - .expect("loop order out of bounds"); - let loop_info = translate_table_open_cursor(program, join, &processed_where); - loops.push(loop_info); - } - - for loop_info in &loops { - // early_terminate_label decides where to jump _IF_ there exists a condition on this loop that is always false. - // this is part of a constant folding optimization where we can skip the loop entirely if we know it will never produce any rows. - let current_loop_early_terminate_label = if let Some(left_join) = &loop_info.left_join_maybe - { - // If there exists a condition on the LEFT JOIN that is always false, e.g.: - // 'SELECT * FROM x LEFT JOIN y ON false' - // then we can't jump to e.g. Halt, but instead we need to still emit all rows from the 'x' table, with NULLs for the 'y' table. - // 'check_match_flag_label' is the label that checks if the left join match flag has been set to true, and if not (which it by default isn't), - // sets the 'y' cursor's "pseudo null bit" on, which means any Insn::Column after that will return NULL for the 'y' table. - left_join.check_match_flag_label - } else { - // If there exists a condition in an INNER JOIN (or WHERE) that is always false, then the query will not produce any rows. - // Example: 'SELECT * FROM x JOIN y ON false' or 'SELECT * FROM x WHERE false' - // Here we should jump to Halt (or e.g. AggFinal in case we have an aggregation expression like count() that should produce a 0 on empty input. - early_terminate_label - }; - translate_table_open_loop( - program, - select, - &loops, - loop_info, - &processed_where, - current_loop_early_terminate_label, - )?; - } - - Ok(loops) -} - -fn translate_tables_end(program: &mut ProgramBuilder, loops: &[LoopInfo]) { - // iterate in reverse order as we open cursors in order - for table_loop in loops.iter().rev() { - let cursor_id = table_loop.open_cursor; - program.resolve_label(table_loop.next_row_label, program.offset()); - if let Plan::Scan = table_loop.plan { - // If we're scanning a table, we need to emit a Next instruction to fetch the next row. - program.emit_insn(Insn::NextAsync { cursor_id }); - program.emit_insn_with_label_dependency( - Insn::NextAwait { - cursor_id, - pc_if_next: table_loop.rewind_label, - }, - table_loop.rewind_label, - ); - } - - if let Some(left_join) = &table_loop.left_join_maybe { - left_join_match_flag_check(program, left_join, cursor_id); - } - } -} - -fn translate_table_open_cursor( - program: &mut ProgramBuilder, - table: &SrcTable, - w: &ProcessedWhereClause, -) -> LoopInfo { - let cursor_id = - program.alloc_cursor_id(Some(table.identifier.clone()), Some(table.table.clone())); - let root_page = match &table.table { - Table::BTree(btree) => btree.root_page, - Table::Pseudo(_) => todo!(), - }; - program.emit_insn(Insn::OpenReadAsync { - cursor_id, - root_page, - }); - program.emit_insn(Insn::OpenReadAwait); - - let has_where_term_where_rowid_index_usable = w.terms.iter().any(|term| { - matches!( - term.expr, - WhereExpr::SeekRowid(SeekRowid { table: t, .. }) if *t == table.identifier - ) - }); - LoopInfo { - identifier: table.identifier.clone(), - plan: if has_where_term_where_rowid_index_usable { - Plan::SeekRowid - } else { - Plan::Scan - }, - left_join_maybe: if table.is_outer_join() { - Some(LeftJoinBookkeeping { - match_flag_register: program.alloc_register(), - on_match_jump_to_label: program.allocate_label(), - check_match_flag_label: program.allocate_label(), - set_match_flag_true_label: program.allocate_label(), - }) - } else { - None - }, - open_cursor: cursor_id, - next_row_label: program.allocate_label(), - rewind_label: program.allocate_label(), - rewind_on_empty_label: program.allocate_label(), - } -} - -/** -* initialize left join match flag to false -* if condition checks pass, it will eventually be set to true -*/ -fn left_join_match_flag_initialize(program: &mut ProgramBuilder, left_join: &LeftJoinBookkeeping) { - program.add_comment(program.offset(), "init LEFT JOIN match flag"); - program.emit_insn(Insn::Integer { - value: 0, - dest: left_join.match_flag_register, - }); -} - -/** -* after the relevant conditional jumps have been emitted, set the left join match flag to true -*/ -fn left_join_match_flag_set_true(program: &mut ProgramBuilder, left_join: &LeftJoinBookkeeping) { - program.defer_label_resolution( - left_join.set_match_flag_true_label, - program.offset() as usize, - ); - program.add_comment(program.offset(), "record LEFT JOIN hit"); - program.emit_insn(Insn::Integer { - value: 1, - dest: left_join.match_flag_register, - }); -} - -/** -* check if the left join match flag is set to true -* if it is, jump to the next row on the outer table -* if not, set the right table cursor's "pseudo null bit" on -* then jump to setting the left join match flag to true again, -* which will effectively emit all nulls for the right table. -*/ -fn left_join_match_flag_check( - program: &mut ProgramBuilder, - left_join: &LeftJoinBookkeeping, - cursor_id: usize, -) { - // If the left join match flag has been set to 1, we jump to the next row on the outer table (result row has been emitted already) - program.resolve_label(left_join.check_match_flag_label, program.offset()); - program.emit_insn_with_label_dependency( - Insn::IfPos { - reg: left_join.match_flag_register, - target_pc: left_join.on_match_jump_to_label, - decrement_by: 0, - }, - left_join.on_match_jump_to_label, - ); - // If not, we set the right table cursor's "pseudo null bit" on, which means any Insn::Column will return NULL - program.emit_insn(Insn::NullRow { cursor_id }); - // Jump to setting the left join match flag to 1 again, but this time the right table cursor will set everything to null - program.emit_insn_with_label_dependency( - Insn::Goto { - target_pc: left_join.set_match_flag_true_label, - }, - left_join.set_match_flag_true_label, - ); - // This points to the NextAsync instruction of the next table in the loop - // (i.e. the outer table, since we're iterating in reverse order) - program.resolve_label(left_join.on_match_jump_to_label, program.offset()); -} - -fn translate_table_open_loop( - program: &mut ProgramBuilder, - select: &Select, - loops: &[LoopInfo], - loop_info: &LoopInfo, - w: &ProcessedWhereClause, - early_terminate_label: BranchOffset, -) -> Result<()> { - if let Some(left_join) = loop_info.left_join_maybe.as_ref() { - // In a left join loop, initialize the left join match flag to false - // If the condition checks pass, it will eventually be set to true - // If not, NULLs will be emitted for the right table for this row in the outer table. - left_join_match_flag_initialize(program, left_join); - } - - if let Plan::Scan = loop_info.plan { - // If we're scanning, we need to rewind the cursor to the beginning of the table - // before we start processing the rows in the loop. - // Consider a nested loop query like: - // SELECT * FROM a JOIN b ON a.someprop = b.someprop; - // We need to rewind the cursor to the beginning of b for each row in a, - // so that we can iterate over all rows in b for each row in a. - // - // If we're not scanning, we're seeking by rowid, so we don't need to rewind the cursor, - // since we're only going to be reading one row. - program.emit_insn(Insn::RewindAsync { - cursor_id: loop_info.open_cursor, - }); - program.defer_label_resolution(loop_info.rewind_label, program.offset() as usize); - program.emit_insn_with_label_dependency( - Insn::RewindAwait { - cursor_id: loop_info.open_cursor, - pc_if_empty: loop_info.rewind_on_empty_label, - }, - loop_info.rewind_on_empty_label, - ); - } - - translate_processed_where( - program, - select, - loops, - loop_info, - w, - early_terminate_label, - None, - )?; - - if let Some(left_join) = loop_info.left_join_maybe.as_ref() { - left_join_match_flag_set_true(program, left_join); - } - - Ok(()) -} - -fn translate_columns( - program: &mut ProgramBuilder, - select: &Select, - cursor_hint: Option, -) -> Result<(usize, usize)> { - let register_start = program.next_free_register(); - - // allocate one register as output for each col - let registers: usize = select - .column_info - .iter() - .map(|col| col.columns_to_allocate) - .sum(); - program.alloc_registers(registers); - let count = program.next_free_register() - register_start; - - let mut target = register_start; - for info in select.column_info.iter() { - translate_column(program, select, info.raw_column, info, target, cursor_hint)?; - target += info.columns_to_allocate; - } - Ok((register_start, count)) -} - -fn translate_column( - program: &mut ProgramBuilder, - select: &Select, - col: &ast::ResultColumn, - info: &ColumnInfo, - target_register: usize, // where to store the result, in case of star it will be the start of registers added - cursor_hint: Option, -) -> Result<()> { - match col { - ast::ResultColumn::Expr(expr, _) => { - if info.is_aggregation_function() { - let _ = translate_aggregation( - program, - select, - expr, - info, - target_register, - cursor_hint, - )?; - } else { - let _ = translate_expr(program, Some(select), expr, target_register, cursor_hint)?; - } - } - ast::ResultColumn::Star => { - let mut target_register = target_register; - for join in &select.src_tables { - translate_table_star(join, program, target_register, cursor_hint); - target_register += &join.table.columns().len(); - } - } - ast::ResultColumn::TableStar(_) => todo!(), - } - Ok(()) -} - -fn translate_table_star( - table: &SrcTable, - program: &mut ProgramBuilder, - target_register: usize, - cursor_hint: Option, -) { - let table_cursor = program.resolve_cursor_id(&table.identifier, cursor_hint); - let table = &table.table; - for (i, col) in table.columns().iter().enumerate() { - let col_target_register = target_register + i; - if table.column_is_rowid_alias(col) { - program.emit_insn(Insn::RowId { - cursor_id: table_cursor, - dest: col_target_register, - }); - } else { - program.emit_insn(Insn::Column { - column: i, - dest: col_target_register, - cursor_id: table_cursor, - }); - maybe_apply_affinity(col.ty, col_target_register, program); - } - } -} - -fn translate_aggregation( - program: &mut ProgramBuilder, - select: &Select, - expr: &ast::Expr, - info: &ColumnInfo, - target_register: usize, - cursor_hint: Option, -) -> Result { - let _ = expr; - assert!(info.func.is_some()); - let func = info.func.as_ref().unwrap(); - let empty_args = &Vec::::new(); - let args = info.args.as_ref().unwrap_or(empty_args); - let dest = match func { - Func::Scalar(_) | Func::Json(_) => { - crate::bail_parse_error!("single row function in aggregation") - } - Func::Agg(agg_func) => match agg_func { - AggFunc::Avg => { - if args.len() != 1 { - crate::bail_parse_error!("avg bad number of arguments"); - } - let expr = &args[0]; - let expr_reg = program.alloc_register(); - let _ = translate_expr(program, Some(select), expr, expr_reg, cursor_hint)?; - program.emit_insn(Insn::AggStep { - acc_reg: target_register, - col: expr_reg, - delimiter: 0, - func: AggFunc::Avg, - }); - target_register - } - AggFunc::Count => { - let expr_reg = if args.is_empty() { - program.alloc_register() - } else { - let expr = &args[0]; - let expr_reg = program.alloc_register(); - let _ = translate_expr(program, Some(select), expr, expr_reg, cursor_hint); - expr_reg - }; - program.emit_insn(Insn::AggStep { - acc_reg: target_register, - col: expr_reg, - delimiter: 0, - func: AggFunc::Count, - }); - target_register - } - AggFunc::GroupConcat => { - if args.len() != 1 && args.len() != 2 { - crate::bail_parse_error!("group_concat bad number of arguments"); - } - - let expr_reg = program.alloc_register(); - let delimiter_reg = program.alloc_register(); - - let expr = &args[0]; - let delimiter_expr: ast::Expr; - - if args.len() == 2 { - match &args[1] { - ast::Expr::Id(ident) => { - if ident.0.starts_with('"') { - delimiter_expr = - ast::Expr::Literal(ast::Literal::String(ident.0.to_string())); - } else { - delimiter_expr = args[1].clone(); - } - } - ast::Expr::Literal(ast::Literal::String(s)) => { - delimiter_expr = - ast::Expr::Literal(ast::Literal::String(s.to_string())); - } - _ => crate::bail_parse_error!("Incorrect delimiter parameter"), - }; - } else { - delimiter_expr = - ast::Expr::Literal(ast::Literal::String(String::from("\",\""))); - } - - translate_expr(program, Some(select), expr, expr_reg, cursor_hint)?; - translate_expr( - program, - Some(select), - &delimiter_expr, - delimiter_reg, - cursor_hint, - )?; - - program.emit_insn(Insn::AggStep { - acc_reg: target_register, - col: expr_reg, - delimiter: delimiter_reg, - func: AggFunc::GroupConcat, - }); - - target_register - } - AggFunc::Max => { - if args.len() != 1 { - crate::bail_parse_error!("max bad number of arguments"); - } - let expr = &args[0]; - let expr_reg = program.alloc_register(); - let _ = translate_expr(program, Some(select), expr, expr_reg, cursor_hint); - program.emit_insn(Insn::AggStep { - acc_reg: target_register, - col: expr_reg, - delimiter: 0, - func: AggFunc::Max, - }); - target_register - } - AggFunc::Min => { - if args.len() != 1 { - crate::bail_parse_error!("min bad number of arguments"); - } - let expr = &args[0]; - let expr_reg = program.alloc_register(); - let _ = translate_expr(program, Some(select), expr, expr_reg, cursor_hint); - program.emit_insn(Insn::AggStep { - acc_reg: target_register, - col: expr_reg, - delimiter: 0, - func: AggFunc::Min, - }); - target_register - } - AggFunc::StringAgg => { - if args.len() != 2 { - crate::bail_parse_error!("string_agg bad number of arguments"); - } - - let expr_reg = program.alloc_register(); - let delimiter_reg = program.alloc_register(); - - let expr = &args[0]; - let delimiter_expr: ast::Expr; - - match &args[1] { - ast::Expr::Id(ident) => { - if ident.0.starts_with('"') { - crate::bail_parse_error!("no such column: \",\" - should this be a string literal in single-quotes?"); - } else { - delimiter_expr = args[1].clone(); - } - } - ast::Expr::Literal(ast::Literal::String(s)) => { - delimiter_expr = ast::Expr::Literal(ast::Literal::String(s.to_string())); - } - _ => crate::bail_parse_error!("Incorrect delimiter parameter"), - }; - - translate_expr(program, Some(select), expr, expr_reg, cursor_hint)?; - translate_expr( - program, - Some(select), - &delimiter_expr, - delimiter_reg, - cursor_hint, - )?; - - program.emit_insn(Insn::AggStep { - acc_reg: target_register, - col: expr_reg, - delimiter: delimiter_reg, - func: AggFunc::StringAgg, - }); - - target_register - } - AggFunc::Sum => { - if args.len() != 1 { - crate::bail_parse_error!("sum bad number of arguments"); - } - let expr = &args[0]; - let expr_reg = program.alloc_register(); - let _ = translate_expr(program, Some(select), expr, expr_reg, cursor_hint)?; - program.emit_insn(Insn::AggStep { - acc_reg: target_register, - col: expr_reg, - delimiter: 0, - func: AggFunc::Sum, - }); - target_register - } - AggFunc::Total => { - if args.len() != 1 { - crate::bail_parse_error!("total bad number of arguments"); - } - let expr = &args[0]; - let expr_reg = program.alloc_register(); - let _ = translate_expr(program, Some(select), expr, expr_reg, cursor_hint)?; - program.emit_insn(Insn::AggStep { - acc_reg: target_register, - col: expr_reg, - delimiter: 0, - func: AggFunc::Total, - }); - target_register - } - }, - }; - Ok(dest) + let select_plan = prepare_select_plan(schema, select)?; + let optimized_plan = optimize_plan(select_plan)?; + emit_program(database_header, optimized_plan) } diff --git a/core/translate/where_clause.rs b/core/translate/where_clause.rs deleted file mode 100644 index 261c54787..000000000 --- a/core/translate/where_clause.rs +++ /dev/null @@ -1,1152 +0,0 @@ -use crate::{ - function::ScalarFunc, - translate::{expr::translate_expr, select::Select}, - util::normalize_ident, - vdbe::{builder::ProgramBuilder, BranchOffset, Func, Insn}, - Result, -}; - -use super::select::LoopInfo; - -use sqlite3_parser::ast::{self}; - -#[derive(Debug)] -pub struct SeekRowid<'a> { - pub table: &'a str, - pub rowid_expr: &'a ast::Expr, -} - -#[derive(Debug)] -pub enum WhereExpr<'a> { - Expr(&'a ast::Expr), - SeekRowid(SeekRowid<'a>), -} - -#[derive(Debug)] -pub struct WhereTerm<'a> { - // The expression that should be evaluated. - pub expr: WhereExpr<'a>, - // If this term is part of an outer join, this is the index of the outer join table in select.src_tables - pub outer_join_table_index: Option, - // A bitmask of which table indexes (in select.src_tables) the expression references. - pub table_references_bitmask: usize, -} - -impl<'a> WhereTerm<'a> { - pub fn evaluate_at_loop(&self, select: &'a Select) -> usize { - if let Some(outer_join_table) = self.outer_join_table_index { - // E.g. - // SELECT u.age, p.name FROM users u LEFT JOIN products p ON u.id = 5; - // We can't skip rows from the 'users' table since u.id = 5 is a LEFT JOIN condition; instead we need to skip/null out rows from the 'products' table. - outer_join_table - } else { - // E.g. - // SELECT u.age, p.name FROM users u WHERE u.id = 5; - // We can skip rows from the 'users' table if u.id = 5 is false. - self.innermost_table(select) - } - } - - // Find the innermost table that the expression references. - // Innermost means 'most nested in the nested loop'. - pub fn innermost_table(&self, select: &'a Select) -> usize { - let mut table = 0; - for i in 0..select.src_tables.len() { - if self.table_references_bitmask & (1 << i) != 0 { - table = i; - } - } - table - } -} - -#[derive(Debug)] -pub struct ProcessedWhereClause<'a> { - pub loop_order: Vec, - pub terms: Vec>, -} - -/** -* Split a constraint into a flat list of WhereTerms. -* The splitting is done at logical 'AND' operator boundaries. -* WhereTerms are currently just a wrapper around an ast::Expr, -* combined with the ID of the cursor where the term should be evaluated. -*/ -pub fn split_constraint_to_terms<'a>( - select: &'a Select, - mut processed_where_clause: ProcessedWhereClause<'a>, - where_clause_or_join_constraint: &'a ast::Expr, - outer_join_table: Option, -) -> Result> { - let mut queue = vec![where_clause_or_join_constraint]; - - while let Some(expr) = queue.pop() { - match expr { - ast::Expr::Binary(left, ast::Operator::And, right) => { - queue.push(left); - queue.push(right); - } - expr => { - if expr.is_always_true()? { - // Terms that are always true can be skipped, as they don't constrain the result set in any way. - continue; - } - let term = WhereTerm { - expr: { - let seekrowid_candidate = select - .src_tables - .iter() - .enumerate() - .find_map(|(i, _)| { - expr.check_seekrowid_candidate(i, select).unwrap_or(None) - }) - .map(WhereExpr::SeekRowid); - - seekrowid_candidate.unwrap_or(WhereExpr::Expr(expr)) - }, - outer_join_table_index: outer_join_table, - table_references_bitmask: introspect_expression_for_table_refs(select, expr)?, - }; - processed_where_clause.terms.push(term); - } - } - } - - Ok(processed_where_clause) -} - -/** -* Split the WHERE clause and any JOIN ON clauses into a flat list of WhereTerms -* that can be evaluated at the appropriate cursor. -*/ -pub fn process_where<'a>(select: &'a Select) -> Result> { - let mut wc = ProcessedWhereClause { - terms: Vec::new(), - // In the future, analysis of the WHERE clause and JOIN ON clauses will be used to determine the optimal loop order. - // For now, we just use the order of the tables in the FROM clause. - loop_order: select - .src_tables - .iter() - .enumerate() - .map(|(i, _)| i) - .collect(), - }; - if let Some(w) = &select.where_clause { - wc = split_constraint_to_terms(select, wc, w, None)?; - } - - for (i, table) in select.src_tables.iter().enumerate() { - if table.join_info.is_none() { - continue; - } - let join_info = table.join_info.unwrap(); - if let Some(ast::JoinConstraint::On(expr)) = &join_info.constraint { - wc = split_constraint_to_terms( - select, - wc, - expr, - if table.is_outer_join() { Some(i) } else { None }, - )?; - } - } - - // sort seekrowids first (if e.g. u.id = 1 and u.age > 50, we want to seek on u.id = 1 first) - // since seekrowid replaces a loop, we need to evaluate it first. - // E.g. - // SELECT u.age FROM users WHERE u.id = 5 AND u.age > 50; - // We need to seek on u.id = 5 first, and then evaluate u.age > 50. - // If we evaluate u.age > 50 first, we haven't read the row yet. - wc.terms.sort_by(|a, b| { - if let WhereExpr::SeekRowid(_) = a.expr { - std::cmp::Ordering::Less - } else { - std::cmp::Ordering::Greater - } - }); - - Ok(wc) -} - -/** - * Translate the WHERE clause of a SELECT statement that doesn't have any tables. - * TODO: refactor this to use the same code path as the other WHERE clause translation functions. - */ -pub fn translate_tableless_where( - select: &Select, - program: &mut ProgramBuilder, - early_terminate_label: BranchOffset, -) -> Result> { - if let Some(w) = &select.where_clause { - if w.is_always_false()? { - program.emit_insn_with_label_dependency( - Insn::Goto { - target_pc: early_terminate_label, - }, - early_terminate_label, - ); - return Ok(None); - } - if w.is_always_true()? { - return Ok(None); - } - - let jump_target_when_false = program.allocate_label(); - let jump_target_when_true = program.allocate_label(); - translate_condition_expr( - program, - select, - w, - None, - ConditionMetadata { - jump_if_condition_is_true: false, - jump_target_when_false, - jump_target_when_true, - }, - )?; - - program.resolve_label(jump_target_when_true, program.offset()); - - Ok(Some(jump_target_when_false)) - } else { - Ok(None) - } -} - -/** -* Translate the WHERE clause and JOIN ON clauses into a series of conditional jump instructions. -* At this point the WHERE clause and JOIN ON clauses have been split into a series of terms that can be evaluated at the appropriate cursor. -* We evaluate each term at the appropriate cursor. -*/ -pub fn translate_processed_where<'a>( - program: &mut ProgramBuilder, - select: &'a Select, - loops: &[LoopInfo], - current_loop: &'a LoopInfo, - where_c: &'a ProcessedWhereClause, - skip_entire_loop_label: BranchOffset, - cursor_hint: Option, -) -> Result<()> { - // If any of the terms are always false, we can skip the entire loop. - for t in where_c.terms.iter().filter(|t| { - select.src_tables[t.evaluate_at_loop(select)].identifier == current_loop.identifier - }) { - if let WhereExpr::Expr(e) = &t.expr { - if e.is_always_false().unwrap_or(false) { - program.emit_insn_with_label_dependency( - Insn::Goto { - target_pc: skip_entire_loop_label, - }, - skip_entire_loop_label, - ); - return Ok(()); - } - } - } - - for term in where_c.terms.iter().filter(|t| { - select.src_tables[t.evaluate_at_loop(select)].identifier == current_loop.identifier - }) { - let jump_target_when_false = loops[term.evaluate_at_loop(select)].next_row_label; - let jump_target_when_true = program.allocate_label(); - match &term.expr { - WhereExpr::Expr(e) => { - translate_condition_expr( - program, - select, - e, - cursor_hint, - ConditionMetadata { - jump_if_condition_is_true: false, - jump_target_when_false, - jump_target_when_true, - }, - )?; - } - WhereExpr::SeekRowid(s) => { - let cursor_id = program.resolve_cursor_id(s.table, cursor_hint); - - let computed_rowid_reg = program.alloc_register(); - let _ = translate_expr( - program, - Some(select), - s.rowid_expr, - computed_rowid_reg, - cursor_hint, - )?; - - if !program.has_cursor_emitted_seekrowid(cursor_id) { - program.emit_insn_with_label_dependency( - Insn::SeekRowid { - cursor_id, - src_reg: computed_rowid_reg, - target_pc: jump_target_when_false, - }, - jump_target_when_false, - ); - } else { - // If we have already emitted a SeekRowid instruction for this cursor, then other equality checks - // against that table should be done using the row that was already fetched. - // e.g. select u.age, p.name from users u join products p on u.id = p.id and p.id = 5; - // emitting two SeekRowid instructions for the same 'p' cursor would yield an incorrect result. - // Assume we are looping over users u, and right now u.id = 3. - // We first SeekRowid on p.id = 3, and find a row. - // If we then SeekRowid for p.id = 5, we would find a row with p.id = 5, - // and end up with a result where u.id = 3 and p.id = 5, which is incorrect. - // Instead we replace the second SeekRowid with a comparison against the row that was already fetched, - // i.e. we compare p.id == 5, which would not match (and is the correct result). - // - // It would probably be better to modify the AST in the WhereTerms directly, but that would require - // refactoring to not use &'a Ast::Expr references in the WhereTerms, i.e. the WhereClause would own its data - // and could mutate it to change the query as needed. We probably need to do this anyway if we want to have some - // kind of Query Plan construct that is not just a container for AST nodes. - let rowid_reg = program.alloc_register(); - program.emit_insn(Insn::RowId { - cursor_id, - dest: rowid_reg, - }); - program.emit_insn_with_label_dependency( - Insn::Ne { - lhs: rowid_reg, - rhs: computed_rowid_reg, - target_pc: jump_target_when_false, - }, - jump_target_when_false, - ); - } - } - } - - program.resolve_label(jump_target_when_true, program.offset()); - } - - Ok(()) -} - -#[derive(Default, Debug, Clone, Copy)] -struct ConditionMetadata { - jump_if_condition_is_true: bool, - jump_target_when_true: BranchOffset, - jump_target_when_false: BranchOffset, -} - -fn translate_condition_expr( - program: &mut ProgramBuilder, - select: &Select, - expr: &ast::Expr, - cursor_hint: Option, - condition_metadata: ConditionMetadata, -) -> Result<()> { - match expr { - ast::Expr::Between { .. } => todo!(), - ast::Expr::Binary(lhs, ast::Operator::And, rhs) => { - // In a binary AND, never jump to the 'jump_target_when_true' label on the first condition, because - // the second condition must also be true. - let _ = translate_condition_expr( - program, - select, - lhs, - cursor_hint, - ConditionMetadata { - jump_if_condition_is_true: false, - ..condition_metadata - }, - ); - let _ = translate_condition_expr(program, select, rhs, cursor_hint, condition_metadata); - } - ast::Expr::Binary(lhs, ast::Operator::Or, rhs) => { - let jump_target_when_false = program.allocate_label(); - let _ = translate_condition_expr( - program, - select, - lhs, - cursor_hint, - ConditionMetadata { - // If the first condition is true, we don't need to evaluate the second condition. - jump_if_condition_is_true: true, - jump_target_when_false, - ..condition_metadata - }, - ); - program.resolve_label(jump_target_when_false, program.offset()); - let _ = translate_condition_expr(program, select, rhs, cursor_hint, condition_metadata); - } - ast::Expr::Binary(lhs, op, rhs) => { - let lhs_reg = program.alloc_register(); - let rhs_reg = program.alloc_register(); - let _ = translate_expr(program, Some(select), lhs, lhs_reg, cursor_hint); - if let ast::Expr::Literal(_) = lhs.as_ref() { - program.mark_last_insn_constant() - } - let _ = translate_expr(program, Some(select), rhs, rhs_reg, cursor_hint); - if let ast::Expr::Literal(_) = rhs.as_ref() { - program.mark_last_insn_constant() - } - match op { - ast::Operator::Greater => { - if condition_metadata.jump_if_condition_is_true { - program.emit_insn_with_label_dependency( - Insn::Gt { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: condition_metadata.jump_target_when_true, - }, - condition_metadata.jump_target_when_true, - ) - } else { - program.emit_insn_with_label_dependency( - Insn::Le { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: condition_metadata.jump_target_when_false, - }, - condition_metadata.jump_target_when_false, - ) - } - } - ast::Operator::GreaterEquals => { - if condition_metadata.jump_if_condition_is_true { - program.emit_insn_with_label_dependency( - Insn::Ge { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: condition_metadata.jump_target_when_true, - }, - condition_metadata.jump_target_when_true, - ) - } else { - program.emit_insn_with_label_dependency( - Insn::Lt { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: condition_metadata.jump_target_when_false, - }, - condition_metadata.jump_target_when_false, - ) - } - } - ast::Operator::Less => { - if condition_metadata.jump_if_condition_is_true { - program.emit_insn_with_label_dependency( - Insn::Lt { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: condition_metadata.jump_target_when_true, - }, - condition_metadata.jump_target_when_true, - ) - } else { - program.emit_insn_with_label_dependency( - Insn::Ge { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: condition_metadata.jump_target_when_false, - }, - condition_metadata.jump_target_when_false, - ) - } - } - ast::Operator::LessEquals => { - if condition_metadata.jump_if_condition_is_true { - program.emit_insn_with_label_dependency( - Insn::Le { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: condition_metadata.jump_target_when_true, - }, - condition_metadata.jump_target_when_true, - ) - } else { - program.emit_insn_with_label_dependency( - Insn::Gt { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: condition_metadata.jump_target_when_false, - }, - condition_metadata.jump_target_when_false, - ) - } - } - ast::Operator::Equals => { - if condition_metadata.jump_if_condition_is_true { - program.emit_insn_with_label_dependency( - Insn::Eq { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: condition_metadata.jump_target_when_true, - }, - condition_metadata.jump_target_when_true, - ) - } else { - program.emit_insn_with_label_dependency( - Insn::Ne { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: condition_metadata.jump_target_when_false, - }, - condition_metadata.jump_target_when_false, - ) - } - } - ast::Operator::NotEquals => { - if condition_metadata.jump_if_condition_is_true { - program.emit_insn_with_label_dependency( - Insn::Ne { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: condition_metadata.jump_target_when_true, - }, - condition_metadata.jump_target_when_true, - ) - } else { - program.emit_insn_with_label_dependency( - Insn::Eq { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: condition_metadata.jump_target_when_false, - }, - condition_metadata.jump_target_when_false, - ) - } - } - ast::Operator::Is => todo!(), - ast::Operator::IsNot => todo!(), - _ => { - todo!("op {:?} not implemented", op); - } - } - } - ast::Expr::Literal(lit) => match lit { - ast::Literal::Numeric(val) => { - let maybe_int = val.parse::(); - if let Ok(int_value) = maybe_int { - let reg = program.alloc_register(); - program.emit_insn(Insn::Integer { - value: int_value, - dest: reg, - }); - if condition_metadata.jump_if_condition_is_true { - program.emit_insn_with_label_dependency( - Insn::If { - reg, - target_pc: condition_metadata.jump_target_when_true, - null_reg: reg, - }, - condition_metadata.jump_target_when_true, - ) - } else { - program.emit_insn_with_label_dependency( - Insn::IfNot { - reg, - target_pc: condition_metadata.jump_target_when_false, - null_reg: reg, - }, - condition_metadata.jump_target_when_false, - ) - } - } else { - crate::bail_parse_error!("unsupported literal type in condition"); - } - } - ast::Literal::String(string) => { - let reg = program.alloc_register(); - program.emit_insn(Insn::String8 { - value: string.clone(), - dest: reg, - }); - if condition_metadata.jump_if_condition_is_true { - program.emit_insn_with_label_dependency( - Insn::If { - reg, - target_pc: condition_metadata.jump_target_when_true, - null_reg: reg, - }, - condition_metadata.jump_target_when_true, - ) - } else { - program.emit_insn_with_label_dependency( - Insn::IfNot { - reg, - target_pc: condition_metadata.jump_target_when_false, - null_reg: reg, - }, - condition_metadata.jump_target_when_false, - ) - } - } - unimpl => todo!("literal {:?} not implemented", unimpl), - }, - ast::Expr::InList { lhs, not, rhs } => { - // lhs is e.g. a column reference - // rhs is an Option> - // If rhs is None, it means the IN expression is always false, i.e. tbl.id IN (). - // If rhs is Some, it means the IN expression has a list of values to compare against, e.g. tbl.id IN (1, 2, 3). - // - // The IN expression is equivalent to a series of OR expressions. - // For example, `a IN (1, 2, 3)` is equivalent to `a = 1 OR a = 2 OR a = 3`. - // The NOT IN expression is equivalent to a series of AND expressions. - // For example, `a NOT IN (1, 2, 3)` is equivalent to `a != 1 AND a != 2 AND a != 3`. - // - // SQLite typically optimizes IN expressions to use a binary search on an ephemeral index if there are many values. - // For now we don't have the plumbing to do that, so we'll just emit a series of comparisons, - // which is what SQLite also does for small lists of values. - // TODO: Let's refactor this later to use a more efficient implementation conditionally based on the number of values. - - if rhs.is_none() { - // If rhs is None, IN expressions are always false and NOT IN expressions are always true. - if *not { - // On a trivially true NOT IN () expression we can only jump to the 'jump_target_when_true' label if 'jump_if_condition_is_true'; otherwise me must fall through. - // This is because in a more complex condition we might need to evaluate the rest of the condition. - // Note that we are already breaking up our WHERE clauses into a series of terms at "AND" boundaries, so right now we won't be running into cases where jumping on true would be incorrect, - // but once we have e.g. parenthesization and more complex conditions, not having this 'if' here would introduce a bug. - if condition_metadata.jump_if_condition_is_true { - program.emit_insn_with_label_dependency( - Insn::Goto { - target_pc: condition_metadata.jump_target_when_true, - }, - condition_metadata.jump_target_when_true, - ); - } - } else { - program.emit_insn_with_label_dependency( - Insn::Goto { - target_pc: condition_metadata.jump_target_when_false, - }, - condition_metadata.jump_target_when_false, - ); - } - return Ok(()); - } - - // The left hand side only needs to be evaluated once we have a list of values to compare against. - let lhs_reg = program.alloc_register(); - let _ = translate_expr(program, Some(select), lhs, lhs_reg, cursor_hint)?; - - let rhs = rhs.as_ref().unwrap(); - - // The difference between a local jump and an "upper level" jump is that for example in this case: - // WHERE foo IN (1,2,3) OR bar = 5, - // we can immediately jump to the 'jump_target_when_true' label of the ENTIRE CONDITION if foo = 1, foo = 2, or foo = 3 without evaluating the bar = 5 condition. - // This is why in Binary-OR expressions we set jump_if_condition_is_true to true for the first condition. - // However, in this example: - // WHERE foo IN (1,2,3) AND bar = 5, - // we can't jump to the 'jump_target_when_true' label of the entire condition foo = 1, foo = 2, or foo = 3, because we still need to evaluate the bar = 5 condition later. - // This is why in that case we just jump over the rest of the IN conditions in this "local" branch which evaluates the IN condition. - let jump_target_when_true = if condition_metadata.jump_if_condition_is_true { - condition_metadata.jump_target_when_true - } else { - program.allocate_label() - }; - - if !*not { - // If it's an IN expression, we need to jump to the 'jump_target_when_true' label if any of the conditions are true. - for (i, expr) in rhs.iter().enumerate() { - let rhs_reg = program.alloc_register(); - let last_condition = i == rhs.len() - 1; - let _ = translate_expr(program, Some(select), expr, rhs_reg, cursor_hint)?; - // If this is not the last condition, we need to jump to the 'jump_target_when_true' label if the condition is true. - if !last_condition { - program.emit_insn_with_label_dependency( - Insn::Eq { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: jump_target_when_true, - }, - jump_target_when_true, - ); - } else { - // If this is the last condition, we need to jump to the 'jump_target_when_false' label if there is no match. - program.emit_insn_with_label_dependency( - Insn::Ne { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: condition_metadata.jump_target_when_false, - }, - condition_metadata.jump_target_when_false, - ); - } - } - // If we got here, then the last condition was a match, so we jump to the 'jump_target_when_true' label if 'jump_if_condition_is_true'. - // If not, we can just fall through without emitting an unnecessary instruction. - if condition_metadata.jump_if_condition_is_true { - program.emit_insn_with_label_dependency( - Insn::Goto { - target_pc: condition_metadata.jump_target_when_true, - }, - condition_metadata.jump_target_when_true, - ); - } - } else { - // If it's a NOT IN expression, we need to jump to the 'jump_target_when_false' label if any of the conditions are true. - for expr in rhs.iter() { - let rhs_reg = program.alloc_register(); - let _ = translate_expr(program, Some(select), expr, rhs_reg, cursor_hint)?; - program.emit_insn_with_label_dependency( - Insn::Eq { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: condition_metadata.jump_target_when_false, - }, - condition_metadata.jump_target_when_false, - ); - } - // If we got here, then none of the conditions were a match, so we jump to the 'jump_target_when_true' label if 'jump_if_condition_is_true'. - // If not, we can just fall through without emitting an unnecessary instruction. - if condition_metadata.jump_if_condition_is_true { - program.emit_insn_with_label_dependency( - Insn::Goto { - target_pc: condition_metadata.jump_target_when_true, - }, - condition_metadata.jump_target_when_true, - ); - } - } - - if !condition_metadata.jump_if_condition_is_true { - program.resolve_label(jump_target_when_true, program.offset()); - } - } - ast::Expr::Like { - lhs, - not, - op, - rhs, - escape: _, - } => { - let cur_reg = program.alloc_register(); - assert!(match rhs.as_ref() { - ast::Expr::Literal(_) => true, - _ => false, - }); - match op { - ast::LikeOperator::Like => { - let pattern_reg = program.alloc_register(); - let column_reg = program.alloc_register(); - // LIKE(pattern, column). We should translate the pattern first before the column - let _ = translate_expr(program, Some(select), rhs, pattern_reg, cursor_hint)?; - program.mark_last_insn_constant(); - let _ = translate_expr(program, Some(select), lhs, column_reg, cursor_hint)?; - program.emit_insn(Insn::Function { - func: Func::Scalar(ScalarFunc::Like), - start_reg: pattern_reg, - dest: cur_reg, - }); - } - ast::LikeOperator::Glob => todo!(), - ast::LikeOperator::Match => todo!(), - ast::LikeOperator::Regexp => todo!(), - } - if !*not { - if condition_metadata.jump_if_condition_is_true { - program.emit_insn_with_label_dependency( - Insn::If { - reg: cur_reg, - target_pc: condition_metadata.jump_target_when_true, - null_reg: cur_reg, - }, - condition_metadata.jump_target_when_true, - ); - } else { - program.emit_insn_with_label_dependency( - Insn::IfNot { - reg: cur_reg, - target_pc: condition_metadata.jump_target_when_false, - null_reg: cur_reg, - }, - condition_metadata.jump_target_when_false, - ); - } - } else if condition_metadata.jump_if_condition_is_true { - program.emit_insn_with_label_dependency( - Insn::IfNot { - reg: cur_reg, - target_pc: condition_metadata.jump_target_when_true, - null_reg: cur_reg, - }, - condition_metadata.jump_target_when_true, - ); - } else { - program.emit_insn_with_label_dependency( - Insn::If { - reg: cur_reg, - target_pc: condition_metadata.jump_target_when_false, - null_reg: cur_reg, - }, - condition_metadata.jump_target_when_false, - ); - } - } - _ => todo!("op {:?} not implemented", expr), - } - Ok(()) -} - -fn introspect_expression_for_table_refs<'a>( - select: &'a Select, - where_expr: &'a ast::Expr, -) -> Result { - let mut table_refs_mask = 0; - match where_expr { - ast::Expr::Binary(e1, _, e2) => { - table_refs_mask |= introspect_expression_for_table_refs(select, e1)?; - table_refs_mask |= introspect_expression_for_table_refs(select, e2)?; - } - ast::Expr::Id(ident) => { - let ident = normalize_ident(&ident.0); - let matching_tables = select - .src_tables - .iter() - .enumerate() - .filter(|(_, t)| t.table.get_column(&ident).is_some()); - - let mut matches = 0; - let mut matching_tbl = None; - for table in matching_tables { - matching_tbl = Some(table); - matches += 1; - if matches > 1 { - crate::bail_parse_error!("ambiguous column name {}", &ident) - } - } - - if let Some((tbl_index, _)) = matching_tbl { - table_refs_mask |= 1 << tbl_index; - } else { - crate::bail_parse_error!("column not found: {}", &ident) - } - } - ast::Expr::Qualified(tbl, ident) => { - let tbl = normalize_ident(&tbl.0); - let ident = normalize_ident(&ident.0); - let matching_table = select - .src_tables - .iter() - .enumerate() - .find(|(_, t)| t.identifier == tbl); - - if matching_table.is_none() { - crate::bail_parse_error!("table not found: {}", &tbl) - } - let matching_table = matching_table.unwrap(); - if matching_table.1.table.get_column(&ident).is_none() { - crate::bail_parse_error!("column with qualified name {}.{} not found", &tbl, &ident) - } - - table_refs_mask |= 1 << matching_table.0; - } - ast::Expr::Literal(_) => {} - ast::Expr::Like { lhs, rhs, .. } => { - table_refs_mask |= introspect_expression_for_table_refs(select, lhs)?; - table_refs_mask |= introspect_expression_for_table_refs(select, rhs)?; - } - ast::Expr::FunctionCall { - args: Some(args), .. - } => { - for arg in args { - table_refs_mask |= introspect_expression_for_table_refs(select, arg)?; - } - } - ast::Expr::InList { lhs, rhs, .. } => { - table_refs_mask |= introspect_expression_for_table_refs(select, lhs)?; - if let Some(rhs_list) = rhs { - for rhs_expr in rhs_list { - table_refs_mask |= introspect_expression_for_table_refs(select, rhs_expr)?; - } - } - } - _ => {} - } - - Ok(table_refs_mask) -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum ConstantCondition { - AlwaysTrue, - AlwaysFalse, -} - -pub trait Evaluatable<'a> { - // if the expression is a constant expression e.g. '1', returns the constant condition - fn check_constant(&self) -> Result>; - fn is_always_true(&self) -> Result { - Ok(self - .check_constant()? - .map_or(false, |c| c == ConstantCondition::AlwaysTrue)) - } - fn is_always_false(&self) -> Result { - Ok(self - .check_constant()? - .map_or(false, |c| c == ConstantCondition::AlwaysFalse)) - } - // if the expression is the primary key of a table, returns the index of the table - fn check_primary_key(&self, select: &'a Select) -> Result>; - // Returns a bitmask of which table indexes the expression references - fn get_table_references_bitmask(&self, select: &'a Select) -> Result; - // Checks if the expression is a candidate for seekrowid optimization - fn check_seekrowid_candidate( - &'a self, - table_index: usize, - select: &'a Select, - ) -> Result>>; -} - -impl<'a> Evaluatable<'a> for ast::Expr { - fn get_table_references_bitmask(&self, select: &'a Select) -> Result { - match self { - ast::Expr::Id(ident) => { - let ident = normalize_ident(&ident.0); - let tables = select.src_tables.iter().enumerate().filter_map(|(i, t)| { - if t.table.get_column(&ident).is_some() { - Some(i) - } else { - None - } - }); - - let mut matches = 0; - let mut matching_tbl = None; - - for tbl in tables { - matching_tbl = Some(tbl); - matches += 1; - if matches > 1 { - crate::bail_parse_error!("ambiguous column name {}", ident) - } - } - - Ok(matching_tbl.unwrap_or(0)) - } - ast::Expr::Qualified(tbl, ident) => { - let tbl = normalize_ident(&tbl.0); - let ident = normalize_ident(&ident.0); - let table = select - .src_tables - .iter() - .enumerate() - .find(|(_, t)| t.identifier == tbl && t.table.get_column(&ident).is_some()); - - if table.is_none() { - crate::bail_parse_error!("table not found: {}", tbl) - } - - let table = table.unwrap(); - - Ok(table.0) - } - ast::Expr::Binary(lhs, _, rhs) => { - let lhs = lhs.as_ref().get_table_references_bitmask(select)?; - let rhs = rhs.as_ref().get_table_references_bitmask(select)?; - - Ok(lhs | rhs) - } - _ => Ok(0), - } - } - fn check_primary_key(&self, select: &'a Select) -> Result> { - match self { - ast::Expr::Id(ident) => { - let ident = normalize_ident(&ident.0); - let tables = select.src_tables.iter().enumerate().filter_map(|(i, t)| { - if t.table - .get_column(&ident) - .map_or(false, |(_, c)| c.primary_key) - { - Some(i) - } else { - None - } - }); - - let mut matches = 0; - let mut matching_tbl = None; - - for tbl in tables { - matching_tbl = Some(tbl); - matches += 1; - if matches > 1 { - crate::bail_parse_error!("ambiguous column name {}", ident) - } - } - - Ok(matching_tbl) - } - ast::Expr::Qualified(tbl, ident) => { - let tbl = normalize_ident(&tbl.0); - let ident = normalize_ident(&ident.0); - let table = select.src_tables.iter().enumerate().find(|(_, t)| { - t.identifier == tbl - && t.table - .get_column(&ident) - .map_or(false, |(_, c)| c.primary_key) - }); - - if table.is_none() { - crate::bail_parse_error!("table not found: {}", tbl) - } - - let table = table.unwrap(); - - Ok(Some(table.0)) - } - _ => Ok(None), - } - } - fn check_seekrowid_candidate( - &'a self, - table_index: usize, - select: &'a Select, - ) -> Result>> { - match self { - ast::Expr::Binary(lhs, ast::Operator::Equals, rhs) => { - let lhs = lhs.as_ref(); - let rhs = rhs.as_ref(); - - if let Some(lhs_table_index) = lhs.check_primary_key(select)? { - let rhs_table_refs_bitmask = rhs.get_table_references_bitmask(select)?; - // For now, we only support seekrowid optimization if the primary key is in an inner loop compared to the other expression. - // Example: explain select u.age, p.name from users u join products p on u.id = p.id; - // In this case, we loop over the users table and seek the products table. - // We also support the case where the other expression is a constant, - // e.g. SELECT * FROM USERS u WHERE u.id = 5. - // In this case the bitmask of the other expression is 0. - if lhs_table_index == table_index && lhs_table_index >= rhs_table_refs_bitmask { - return Ok(Some(SeekRowid { - table: &select.src_tables[table_index].identifier, - rowid_expr: rhs, - })); - } - } - - if let Some(rhs_table_index) = rhs.check_primary_key(select)? { - let lhs_table_refs_bitmask = lhs.get_table_references_bitmask(select)?; - if rhs_table_index == table_index && rhs_table_index >= lhs_table_refs_bitmask { - return Ok(Some(SeekRowid { - table: &select.src_tables[table_index].identifier, - rowid_expr: lhs, - })); - } - } - - Ok(None) - } - _ => Ok(None), - } - } - fn check_constant(&self) -> Result> { - match self { - ast::Expr::Literal(lit) => match lit { - ast::Literal::Null => Ok(Some(ConstantCondition::AlwaysFalse)), - ast::Literal::Numeric(b) => { - if let Ok(int_value) = b.parse::() { - return Ok(Some(if int_value == 0 { - ConstantCondition::AlwaysFalse - } else { - ConstantCondition::AlwaysTrue - })); - } - if let Ok(float_value) = b.parse::() { - return Ok(Some(if float_value == 0.0 { - ConstantCondition::AlwaysFalse - } else { - ConstantCondition::AlwaysTrue - })); - } - - Ok(None) - } - ast::Literal::String(s) => { - let without_quotes = s.trim_matches('\''); - if let Ok(int_value) = without_quotes.parse::() { - return Ok(Some(if int_value == 0 { - ConstantCondition::AlwaysFalse - } else { - ConstantCondition::AlwaysTrue - })); - } - - if let Ok(float_value) = without_quotes.parse::() { - return Ok(Some(if float_value == 0.0 { - ConstantCondition::AlwaysFalse - } else { - ConstantCondition::AlwaysTrue - })); - } - - Ok(Some(ConstantCondition::AlwaysFalse)) - } - _ => Ok(None), - }, - ast::Expr::Unary(op, expr) => { - if *op == ast::UnaryOperator::Not { - let trivial = expr.check_constant()?; - return Ok(trivial.map(|t| match t { - ConstantCondition::AlwaysTrue => ConstantCondition::AlwaysFalse, - ConstantCondition::AlwaysFalse => ConstantCondition::AlwaysTrue, - })); - } - - if *op == ast::UnaryOperator::Negative { - let trivial = expr.check_constant()?; - return Ok(trivial); - } - - Ok(None) - } - ast::Expr::InList { lhs: _, not, rhs } => { - if rhs.is_none() { - return Ok(Some(if *not { - ConstantCondition::AlwaysTrue - } else { - ConstantCondition::AlwaysFalse - })); - } - let rhs = rhs.as_ref().unwrap(); - if rhs.is_empty() { - return Ok(Some(if *not { - ConstantCondition::AlwaysTrue - } else { - ConstantCondition::AlwaysFalse - })); - } - - Ok(None) - } - ast::Expr::Binary(lhs, op, rhs) => { - let lhs_trivial = lhs.check_constant()?; - let rhs_trivial = rhs.check_constant()?; - match op { - ast::Operator::And => { - if lhs_trivial == Some(ConstantCondition::AlwaysFalse) - || rhs_trivial == Some(ConstantCondition::AlwaysFalse) - { - return Ok(Some(ConstantCondition::AlwaysFalse)); - } - if lhs_trivial == Some(ConstantCondition::AlwaysTrue) - && rhs_trivial == Some(ConstantCondition::AlwaysTrue) - { - return Ok(Some(ConstantCondition::AlwaysTrue)); - } - - Ok(None) - } - ast::Operator::Or => { - if lhs_trivial == Some(ConstantCondition::AlwaysTrue) - || rhs_trivial == Some(ConstantCondition::AlwaysTrue) - { - return Ok(Some(ConstantCondition::AlwaysTrue)); - } - if lhs_trivial == Some(ConstantCondition::AlwaysFalse) - && rhs_trivial == Some(ConstantCondition::AlwaysFalse) - { - return Ok(Some(ConstantCondition::AlwaysFalse)); - } - - Ok(None) - } - _ => Ok(None), - } - } - _ => Ok(None), - } - } -} diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 77fc82454..f5f59f3f6 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -1120,7 +1120,8 @@ impl Program { let record = match *cursor.record()? { Some(ref record) => record.clone(), None => { - todo!(); + state.pc += 1; + continue; } }; state.registers[*dest_reg] = OwnedValue::Record(record.clone()); diff --git a/testing/agg-functions.test b/testing/agg-functions.test index e7c15e1c7..314ffd7e5 100755 --- a/testing/agg-functions.test +++ b/testing/agg-functions.test @@ -74,3 +74,7 @@ do_execsql_test select-string-agg-with-delimiter { do_execsql_test select-string-agg-with-column-delimiter { SELECT string_agg(name, id) FROM products; } {hat2cap3shirt4sweater5sweatshirt6shorts7jeans8sneakers9boots10coat11accessories} + +do_execsql_test select-count-star { + SELECT count(*) FROM users; +} {10000} diff --git a/testing/join.test b/testing/join.test index 1e2d08c6c..ea471d13e 100755 --- a/testing/join.test +++ b/testing/join.test @@ -141,7 +141,7 @@ do_execsql_test left-join-no-join-conditions-but-multiple-where { } {Jamie|hat Cindy|cap} -do_execsql_test left-join-order-by-qualified { + do_execsql_test left-join-order-by-qualified { select users.first_name, products.name from users left join products on users.id = products.id where users.first_name like 'Jam%' order by null limit 2; } {Jamie|hat James|} @@ -199,4 +199,4 @@ Jamie||Edward} do_execsql_test left-join-constant-condition-true-inner-join-constant-condition-false { select u.first_name, p.name, u2.first_name from users u left join products as p on 1 join users u2 on 0 limit 5; -} {} \ No newline at end of file +} {} diff --git a/testing/select.test b/testing/select.test index ac8750bbe..1540cd493 100755 --- a/testing/select.test +++ b/testing/select.test @@ -26,3 +26,11 @@ do_execsql_test select-add { do_execsql_test case-insensitive-columns { select u.aGe + 1 from USERS u where U.AGe = 91 limit 1; } {92} + +do_execsql_test table-star { + select p.*, p.name from products p limit 1; +} {1|hat|79.0|hat} + +do_execsql_test table-star-2 { + select p.*, u.age from users u join products p limit 1; +} {1|hat|79.0|94} From 9ab08ee2e6e04fc1dbd7b46bce2bcb4a508b9758 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sun, 11 Aug 2024 17:10:52 +0300 Subject: [PATCH 02/20] is_rowid_alias instead of primary_key --- core/translate/emitter.rs | 4 ++-- core/translate/expr.rs | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 5ecc5e2a2..682242980 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -817,9 +817,9 @@ fn table_columns( let mut cur_reg = start_reg; let cursor_id = cursor_override.unwrap_or(program.resolve_cursor_id(table_identifier, None)); for i in 0..table.columns.len() { - let is_primary_key = table.columns[i].primary_key; + let is_rowid = table.column_is_rowid_alias(&table.columns[i]); let col_type = &table.columns[i].ty; - if is_primary_key { + if is_rowid { program.emit_insn(Insn::RowId { cursor_id, dest: cur_reg, diff --git a/core/translate/expr.rs b/core/translate/expr.rs index f3be84953..5dc8e5df9 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1046,9 +1046,9 @@ pub fn translate_expr( ast::Expr::FunctionCallStar { .. } => todo!(), ast::Expr::Id(ident) => { // let (idx, col) = table.unwrap().get_column(&ident.0).unwrap(); - let (idx, col_type, cursor_id, is_primary_key) = + let (idx, col_type, cursor_id, is_rowid_alias) = resolve_ident_table(program, &ident.0, referenced_tables, cursor_hint)?; - if is_primary_key { + if is_rowid_alias { program.emit_insn(Insn::RowId { cursor_id, dest: target_register, @@ -1234,12 +1234,12 @@ pub fn resolve_ident_table( .iter() .enumerate() .find(|(_, col)| col.name == *ident) - .map(|(idx, col)| (idx, col.ty, col.primary_key)); + .map(|(idx, col)| (idx, col.ty, catalog_table.column_is_rowid_alias(col))); let mut idx; let mut col_type; - let mut is_primary_key; + let mut is_rowid_alias; if res.is_some() { - (idx, col_type, is_primary_key) = res.unwrap(); + (idx, col_type, is_rowid_alias) = res.unwrap(); // overwrite if cursor hint is provided if let Some(cursor_hint) = cursor_hint { let cols = &program.cursor_ref[cursor_hint].1; @@ -1251,11 +1251,11 @@ pub fn resolve_ident_table( }) { idx = res.0; col_type = res.1.ty; - is_primary_key = res.1.primary_key; + is_rowid_alias = catalog_table.column_is_rowid_alias(&res.1); } } let cursor_id = program.resolve_cursor_id(identifier, cursor_hint); - found.push((idx, col_type, cursor_id, is_primary_key)); + found.push((idx, col_type, cursor_id, is_rowid_alias)); } } if found.len() == 1 { From 989066eedfe17e8f250fc16101d80c0739cc3cd2 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sun, 11 Aug 2024 17:13:19 +0300 Subject: [PATCH 03/20] remove duplicate test after rebase --- testing/agg-functions.test | 4 ---- 1 file changed, 4 deletions(-) diff --git a/testing/agg-functions.test b/testing/agg-functions.test index 314ffd7e5..e7c15e1c7 100755 --- a/testing/agg-functions.test +++ b/testing/agg-functions.test @@ -74,7 +74,3 @@ do_execsql_test select-string-agg-with-delimiter { do_execsql_test select-string-agg-with-column-delimiter { SELECT string_agg(name, id) FROM products; } {hat2cap3shirt4sweater5sweatshirt6shorts7jeans8sneakers9boots10coat11accessories} - -do_execsql_test select-count-star { - SELECT count(*) FROM users; -} {10000} From 2d35641b86d1852533e0d48abd62a5b8b1539e44 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sun, 11 Aug 2024 17:18:26 +0300 Subject: [PATCH 04/20] whitespace --- testing/join.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/join.test b/testing/join.test index ea471d13e..77cb040c4 100755 --- a/testing/join.test +++ b/testing/join.test @@ -141,7 +141,7 @@ do_execsql_test left-join-no-join-conditions-but-multiple-where { } {Jamie|hat Cindy|cap} - do_execsql_test left-join-order-by-qualified { +do_execsql_test left-join-order-by-qualified { select users.first_name, products.name from users left join products on users.id = products.id where users.first_name like 'Jam%' order by null limit 2; } {Jamie|hat James|} From 1130ccf203ecc94be84d48207d4f22efd41059d9 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sun, 11 Aug 2024 19:14:36 +0300 Subject: [PATCH 05/20] mutable out parameter --- core/translate/planner.rs | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/core/translate/planner.rs b/core/translate/planner.rs index 9e494aca0..6ab00435e 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -43,9 +43,11 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

break_predicate_at_and_boundaries(expr, vec![]), + ast::JoinConstraint::On(expr) => { + let mut predicates = vec![]; + break_predicate_at_and_boundaries(expr, &mut predicates); + predicates + } ast::JoinConstraint::Using(_) => todo!("USING joins not supported yet"), }); @@ -341,19 +347,14 @@ fn parse_join( )) } -fn break_predicate_at_and_boundaries( - predicate: ast::Expr, - mut predicates: Vec, -) -> Vec { +fn break_predicate_at_and_boundaries(predicate: ast::Expr, out_predicates: &mut Vec) { match predicate { ast::Expr::Binary(left, ast::Operator::And, right) => { - let ps = break_predicate_at_and_boundaries(*left, predicates); - let ps = break_predicate_at_and_boundaries(*right, ps); - ps + break_predicate_at_and_boundaries(*left, out_predicates); + break_predicate_at_and_boundaries(*right, out_predicates); } _ => { - predicates.push(predicate); - predicates + out_predicates.push(predicate); } } } From 4c016b042b06f3f2600f400d77424699db8db824 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sun, 11 Aug 2024 19:18:59 +0300 Subject: [PATCH 06/20] comment about bitmasks --- core/translate/plan.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 75162e07c..a5e57f59e 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -342,6 +342,14 @@ impl Display for Operator { } } +/** + Returns a bitmask where each bit corresponds to a table in the `tables` vector. + If a table is referenced in the given Operator, the corresponding bit is set to 1. + Example: + if tables = [(table1, "t1"), (table2, "t2"), (table3, "t3")], + and the Operator is a join between table1 and table3, + then the return value will be (in bits): 101 +*/ pub fn get_table_ref_bitmask_for_query_plan_node<'a>( tables: &'a Vec<(Rc, String)>, node: &'a Operator, @@ -391,6 +399,14 @@ pub fn get_table_ref_bitmask_for_query_plan_node<'a>( Ok(table_refs_mask) } +/** + Returns a bitmask where each bit corresponds to a table in the `tables` vector. + If a table is referenced in the given AST expression, the corresponding bit is set to 1. + Example: + if tables = [(table1, "t1"), (table2, "t2"), (table3, "t3")], + and predicate = "t1.a = t2.b" + then the return value will be (in bits): 110 +*/ pub fn get_table_ref_bitmask_for_ast_expr<'a>( tables: &'a Vec<(Rc, String)>, predicate: &'a ast::Expr, From e7cc04e157151680a8900d1ba34c3ccd5c096d33 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sun, 11 Aug 2024 19:25:36 +0300 Subject: [PATCH 07/20] Operator comments --- core/translate/plan.rs | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/core/translate/plan.rs b/core/translate/plan.rs index a5e57f59e..3fa6098bd 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -30,16 +30,32 @@ impl Display for Plan { */ #[derive(Clone, Debug)] pub enum Operator { + // Aggregate operator + // This operator is used to compute aggregate functions like SUM, AVG, COUNT, etc. + // It takes a source operator and a list of aggregate functions to compute. + // GROUP BY is not supported yet. Aggregate { id: usize, source: Box, aggregates: Vec, }, + // Filter operator + // This operator is used to filter rows from the source operator. + // It takes a source operator and a list of predicates to evaluate. + // Only rows for which all predicates evaluate to true are passed to the next operator. + // Generally filter operators will only exist in unoptimized plans, + // as the optimizer will try to push filters down to the lowest possible level, + // e.g. a table scan. Filter { id: usize, source: Box, predicates: Vec, }, + // SeekRowid operator + // This operator is used to retrieve a single row from a table by its rowid. + // rowid_predicate is an expression that produces the comparison value for the rowid. + // e.g. rowid = 5, or rowid = other_table.foo + // predicates is an optional list of additional predicates to evaluate. SeekRowid { id: usize, table: Rc, @@ -47,11 +63,17 @@ pub enum Operator { rowid_predicate: ast::Expr, predicates: Option>, }, + // Limit operator + // This operator is used to limit the number of rows returned by the source operator. Limit { id: usize, source: Box, limit: usize, }, + // Join operator + // This operator is used to join two source operators. + // It takes a left and right source operator, a list of predicates to evaluate, + // and a boolean indicating whether it is an outer join. Join { id: usize, left: Box, @@ -59,22 +81,38 @@ pub enum Operator { predicates: Option>, outer: bool, }, + // Order operator + // This operator is used to sort the rows returned by the source operator. Order { id: usize, source: Box, key: Vec<(ast::Expr, Direction)>, }, + // Projection operator + // This operator is used to project columns from the source operator. + // It takes a source operator and a list of expressions to evaluate. + // e.g. SELECT foo, bar FROM t1 + // In this example, the expressions would be [foo, bar] + // and the source operator would be a Scan operator for table t1. Projection { id: usize, source: Box, expressions: Vec, }, + // Scan operator + // This operator is used to scan a table. + // It takes a table to scan and an optional list of predicates to evaluate. + // The predicates are used to filter rows from the table. + // e.g. SELECT * FROM t1 WHERE t1.foo = 5 Scan { id: usize, table: Rc, table_identifier: String, predicates: Option>, }, + // Nothing operator + // This operator is used to represent an empty query. + // e.g. SELECT * from foo WHERE 0 will eventually be optimized to Nothing. Nothing, } From 97dc98336cfe161d546a24285cf4bff079585ddd Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sun, 11 Aug 2024 19:28:23 +0300 Subject: [PATCH 08/20] fix comment --- core/translate/plan.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 3fa6098bd..059b47811 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -385,8 +385,8 @@ impl Display for Operator { If a table is referenced in the given Operator, the corresponding bit is set to 1. Example: if tables = [(table1, "t1"), (table2, "t2"), (table3, "t3")], - and the Operator is a join between table1 and table3, - then the return value will be (in bits): 101 + and the Operator is a join between table2 and table3, + then the return value will be (in bits): 110 */ pub fn get_table_ref_bitmask_for_query_plan_node<'a>( tables: &'a Vec<(Rc, String)>, @@ -443,7 +443,7 @@ pub fn get_table_ref_bitmask_for_query_plan_node<'a>( Example: if tables = [(table1, "t1"), (table2, "t2"), (table3, "t3")], and predicate = "t1.a = t2.b" - then the return value will be (in bits): 110 + then the return value will be (in bits): 011 */ pub fn get_table_ref_bitmask_for_ast_expr<'a>( tables: &'a Vec<(Rc, String)>, From 069826820e8b45ad1d7c1e37017e34e748a4aa77 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sun, 11 Aug 2024 19:36:27 +0300 Subject: [PATCH 09/20] Finish renaming node -> operator --- core/translate/emitter.rs | 12 +++---- core/translate/optimizer.rs | 52 +++++++++++++++++------------- core/translate/plan.rs | 44 ++++++++++++------------- core/translate/planner.rs | 64 +++++++++++++++++++------------------ 4 files changed, 91 insertions(+), 81 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 682242980..8e77a496e 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -742,12 +742,12 @@ impl Emitter for Operator { Ok(true) } - node => { + operator => { let start_reg = - node.result_columns(program, referenced_tables, m, cursor_override)?; + operator.result_columns(program, referenced_tables, m, cursor_override)?; program.emit_insn(Insn::ResultRow { start_reg, - count: node.column_count(referenced_tables), + count: operator.column_count(referenced_tables), }); Ok(true) } @@ -780,16 +780,16 @@ pub fn emit_program( }; select_plan - .root_node + .root_operator .start(&mut program, &mut metadata, &select_plan.referenced_tables)?; - select_plan.root_node.emit( + select_plan.root_operator.emit( &mut program, &mut metadata, &select_plan.referenced_tables, true, )?; select_plan - .root_node + .root_operator .end(&mut program, &mut metadata, &select_plan.referenced_tables)?; program.resolve_label(halt_label, program.offset()); diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 31a9811ab..006773332 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -5,24 +5,33 @@ use sqlite3_parser::ast; use crate::{schema::BTreeTable, util::normalize_ident, Result}; use super::plan::{ - get_table_ref_bitmask_for_ast_expr, get_table_ref_bitmask_for_query_plan_node, Operator, Plan, + get_table_ref_bitmask_for_ast_expr, get_table_ref_bitmask_for_operator, Operator, Plan, }; /** * Make a few passes over the plan to optimize it. */ pub fn optimize_plan(mut select_plan: Plan) -> Result { - push_predicates(&mut select_plan.root_node, &select_plan.referenced_tables)?; - eliminate_constants(&mut select_plan.root_node)?; - use_indexes(&mut select_plan.root_node, &select_plan.referenced_tables)?; + push_predicates( + &mut select_plan.root_operator, + &select_plan.referenced_tables, + )?; + eliminate_constants(&mut select_plan.root_operator)?; + use_indexes( + &mut select_plan.root_operator, + &select_plan.referenced_tables, + )?; Ok(select_plan) } /** * Use indexes where possible (currently just primary key lookups) */ -fn use_indexes(node: &mut Operator, referenced_tables: &[(Rc, String)]) -> Result<()> { - match node { +fn use_indexes( + operator: &mut Operator, + referenced_tables: &[(Rc, String)], +) -> Result<()> { + match operator { Operator::Scan { table, predicates: filter, @@ -60,7 +69,7 @@ fn use_indexes(node: &mut Operator, referenced_tables: &[(Rc, String } else { Some(fs.drain(..).collect()) }; - *node = Operator::SeekRowid { + *operator = Operator::SeekRowid { table: table.clone(), table_identifier: table_identifier.clone(), rowid_predicate, @@ -107,8 +116,8 @@ fn use_indexes(node: &mut Operator, referenced_tables: &[(Rc, String // removes predicates that are always true // returns false if there is an impossible predicate that is always false -fn eliminate_constants(node: &mut Operator) -> Result { - match node { +fn eliminate_constants(operator: &mut Operator) -> Result { + match operator { Operator::Filter { source, predicates, .. } => { @@ -125,8 +134,8 @@ fn eliminate_constants(node: &mut Operator) -> Result { } if predicates.is_empty() { - *node = source.take_ownership(); - eliminate_constants(node)?; + *operator = source.take_ownership(); + eliminate_constants(operator)?; } else { eliminate_constants(source)?; } @@ -202,21 +211,21 @@ fn eliminate_constants(node: &mut Operator) -> Result { Operator::Limit { source, .. } => { let ok = eliminate_constants(source)?; if !ok { - *node = Operator::Nothing; + *operator = Operator::Nothing; } return Ok(ok); } Operator::Order { source, .. } => { let ok = eliminate_constants(source)?; if !ok { - *node = Operator::Nothing; + *operator = Operator::Nothing; } return Ok(true); } Operator::Projection { source, .. } => { let ok = eliminate_constants(source)?; if !ok { - *node = Operator::Nothing; + *operator = Operator::Nothing; } return Ok(ok); } @@ -248,10 +257,10 @@ fn eliminate_constants(node: &mut Operator) -> Result { Recursively pushes predicates down the tree, as far as possible. */ fn push_predicates( - node: &mut Operator, + operator: &mut Operator, referenced_tables: &Vec<(Rc, String)>, ) -> Result<()> { - match node { + match operator { Operator::Filter { source, predicates, .. } => { @@ -270,7 +279,7 @@ fn push_predicates( } if predicates.is_empty() { - *node = source.take_ownership(); + *operator = source.take_ownership(); } return Ok(()); @@ -355,11 +364,11 @@ fn push_predicates( Returns Ok(None) if the predicate was pushed, otherwise returns itself as Ok(Some(predicate)) */ fn push_predicate( - node: &mut Operator, + operator: &mut Operator, predicate: ast::Expr, referenced_tables: &Vec<(Rc, String)>, ) -> Result> { - match node { + match operator { Operator::Scan { predicates, table_identifier, @@ -430,9 +439,8 @@ fn push_predicate( let table_refs_bitmask = get_table_ref_bitmask_for_ast_expr(referenced_tables, &pred)?; - let left_bitmask = get_table_ref_bitmask_for_query_plan_node(referenced_tables, left)?; - let right_bitmask = - get_table_ref_bitmask_for_query_plan_node(referenced_tables, right)?; + let left_bitmask = get_table_ref_bitmask_for_operator(referenced_tables, left)?; + let right_bitmask = get_table_ref_bitmask_for_operator(referenced_tables, right)?; if table_refs_bitmask & left_bitmask == 0 || table_refs_bitmask & right_bitmask == 0 { return Ok(Some(pred)); diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 059b47811..b3ef1e308 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -9,13 +9,13 @@ use sqlite3_parser::ast; use crate::{function::AggFunc, schema::BTreeTable, util::normalize_ident, Result}; pub struct Plan { - pub root_node: Operator, + pub root_operator: Operator, pub referenced_tables: Vec<(Rc, String)>, } impl Display for Plan { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.root_node) + write!(f, "{}", self.root_operator) } } @@ -243,9 +243,9 @@ impl Display for Aggregate { // For EXPLAIN QUERY PLAN impl Display for Operator { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - fn fmt_node(node: &Operator, f: &mut Formatter, level: usize) -> fmt::Result { + fn fmt_operator(operator: &Operator, f: &mut Formatter, level: usize) -> fmt::Result { let indent = " ".repeat(level); - match node { + match operator { Operator::Aggregate { source, aggregates, .. } => { @@ -256,7 +256,7 @@ impl Display for Operator { .collect::>() .join(", "); writeln!(f, "{}AGGREGATE {}", indent, aggregates_display_string)?; - fmt_node(source, f, level + 1) + fmt_operator(source, f, level + 1) } Operator::Filter { source, predicates, .. @@ -267,7 +267,7 @@ impl Display for Operator { .collect::>() .join(" AND "); writeln!(f, "{}FILTER {}", indent, predicates_string)?; - fmt_node(source, f, level + 1) + fmt_operator(source, f, level + 1) } Operator::SeekRowid { table, @@ -299,7 +299,7 @@ impl Display for Operator { } Operator::Limit { source, limit, .. } => { writeln!(f, "{}TAKE {}", indent, limit)?; - fmt_node(source, f, level + 1) + fmt_operator(source, f, level + 1) } Operator::Join { left, @@ -323,8 +323,8 @@ impl Display for Operator { } None => writeln!(f, "{}{}", indent, join_name)?, } - fmt_node(left, f, level + 1)?; - fmt_node(right, f, level + 1) + fmt_operator(left, f, level + 1)?; + fmt_operator(right, f, level + 1) } Operator::Order { source, key, .. } => { let sort_keys_string = key @@ -333,7 +333,7 @@ impl Display for Operator { .collect::>() .join(", "); writeln!(f, "{}SORT {}", indent, sort_keys_string)?; - fmt_node(source, f, level + 1) + fmt_operator(source, f, level + 1) } Operator::Projection { source, @@ -350,7 +350,7 @@ impl Display for Operator { .collect::>() .join(", "); writeln!(f, "{}PROJECT {}", indent, expressions)?; - fmt_node(source, f, level + 1) + fmt_operator(source, f, level + 1) } Operator::Scan { table, @@ -376,7 +376,7 @@ impl Display for Operator { Operator::Nothing => Ok(()), } } - fmt_node(self, f, 0) + fmt_operator(self, f, 0) } } @@ -388,19 +388,19 @@ impl Display for Operator { and the Operator is a join between table2 and table3, then the return value will be (in bits): 110 */ -pub fn get_table_ref_bitmask_for_query_plan_node<'a>( +pub fn get_table_ref_bitmask_for_operator<'a>( tables: &'a Vec<(Rc, String)>, - node: &'a Operator, + operator: &'a Operator, ) -> Result { let mut table_refs_mask = 0; - match node { + match operator { Operator::Aggregate { source, .. } => { - table_refs_mask |= get_table_ref_bitmask_for_query_plan_node(tables, source)?; + table_refs_mask |= get_table_ref_bitmask_for_operator(tables, source)?; } Operator::Filter { source, predicates, .. } => { - table_refs_mask |= get_table_ref_bitmask_for_query_plan_node(tables, source)?; + table_refs_mask |= get_table_ref_bitmask_for_operator(tables, source)?; for predicate in predicates { table_refs_mask |= get_table_ref_bitmask_for_ast_expr(tables, predicate)?; } @@ -413,17 +413,17 @@ pub fn get_table_ref_bitmask_for_query_plan_node<'a>( .unwrap(); } Operator::Limit { source, .. } => { - table_refs_mask |= get_table_ref_bitmask_for_query_plan_node(tables, source)?; + table_refs_mask |= get_table_ref_bitmask_for_operator(tables, source)?; } Operator::Join { left, right, .. } => { - table_refs_mask |= get_table_ref_bitmask_for_query_plan_node(tables, left)?; - table_refs_mask |= get_table_ref_bitmask_for_query_plan_node(tables, right)?; + table_refs_mask |= get_table_ref_bitmask_for_operator(tables, left)?; + table_refs_mask |= get_table_ref_bitmask_for_operator(tables, right)?; } Operator::Order { source, .. } => { - table_refs_mask |= get_table_ref_bitmask_for_query_plan_node(tables, source)?; + table_refs_mask |= get_table_ref_bitmask_for_operator(tables, source)?; } Operator::Projection { source, .. } => { - table_refs_mask |= get_table_ref_bitmask_for_query_plan_node(tables, source)?; + table_refs_mask |= get_table_ref_bitmask_for_operator(tables, source)?; } Operator::Scan { table, .. } => { table_refs_mask |= 1 diff --git a/core/translate/planner.rs b/core/translate/planner.rs index 6ab00435e..0061175eb 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -8,11 +8,11 @@ use crate::{ use sqlite3_parser::ast::{self, FromClause, JoinType, ResultColumn}; use std::rc::Rc; -pub struct NodeIdCounter { +pub struct OperatorIdCounter { id: usize, } -impl NodeIdCounter { +impl OperatorIdCounter { pub fn new() -> Self { Self { id: 0 } } @@ -36,19 +36,20 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

(schema: &Schema, select: ast::Select) -> Result

(schema: &Schema, select: ast::Select) -> Result

(schema: &Schema, select: ast::Select) -> Result

{ let l = n.parse()?; if l == 0 { Operator::Nothing } else { Operator::Limit { - source: Box::new(node), + source: Box::new(operator), limit: l, - id: node_id_counter.get_next_id(), + id: operator_id_counter.get_next_id(), } } } @@ -223,7 +224,7 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

(schema: &Schema, select: ast::Select) -> Result

, - node_id_counter: &mut NodeIdCounter, + operator_id_counter: &mut OperatorIdCounter, ) -> Result<(Operator, Vec<(Rc, String)>)> { if from.as_ref().and_then(|f| f.select.as_ref()).is_none() { return Ok((Operator::Nothing, vec![])); @@ -259,33 +260,34 @@ fn parse_from( _ => todo!(), }; - let mut node = Operator::Scan { + let mut operator = Operator::Scan { table: first_table.0.clone(), predicates: None, table_identifier: first_table.1.clone(), - id: node_id_counter.get_next_id(), + id: operator_id_counter.get_next_id(), }; let mut tables = vec![first_table]; for join in from.joins.unwrap_or_default().into_iter() { - let (right, outer, predicates) = parse_join(schema, join, node_id_counter, &mut tables)?; - node = Operator::Join { - left: Box::new(node), + let (right, outer, predicates) = + parse_join(schema, join, operator_id_counter, &mut tables)?; + operator = Operator::Join { + left: Box::new(operator), right: Box::new(right), predicates, outer, - id: node_id_counter.get_next_id(), + id: operator_id_counter.get_next_id(), } } - return Ok((node, tables)); + return Ok((operator, tables)); } fn parse_join( schema: &Schema, join: ast::JoinedSelectTable, - node_id_counter: &mut NodeIdCounter, + operator_id_counter: &mut OperatorIdCounter, tables: &mut Vec<(Rc, String)>, ) -> Result<(Operator, bool, Option>)> { let ast::JoinedSelectTable { @@ -340,7 +342,7 @@ fn parse_join( table: table.0.clone(), predicates: None, table_identifier: table.1.clone(), - id: node_id_counter.get_next_id(), + id: operator_id_counter.get_next_id(), }, outer, predicates, From 17cc3717c8a7f3378fe2ec1e9bb494fef47a3815 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Fri, 16 Aug 2024 19:43:29 +0300 Subject: [PATCH 10/20] rebase --- core/translate/expr.rs | 44 +++++++++++++++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 7 deletions(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 5dc8e5df9..04ff37763 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -669,7 +669,7 @@ pub fn translate_expr( for arg in args.iter() { let reg = program.alloc_register(); - translate_expr(program, select, arg, reg, cursor_hint)?; + translate_expr(program, referenced_tables, arg, reg, cursor_hint)?; } program.emit_insn(Insn::Function { @@ -731,7 +731,7 @@ pub fn translate_expr( }; for arg in args.iter() { let reg = program.alloc_register(); - translate_expr(program, select, arg, reg, cursor_hint)?; + translate_expr(program, referenced_tables, arg, reg, cursor_hint)?; } program.emit_insn(Insn::Function { start_reg: target_register, @@ -754,13 +754,25 @@ pub fn translate_expr( }; let temp_reg = program.alloc_register(); - translate_expr(program, select, &args[0], temp_reg, cursor_hint)?; + translate_expr( + program, + referenced_tables, + &args[0], + temp_reg, + cursor_hint, + )?; program.emit_insn(Insn::NotNull { reg: temp_reg, target_pc: program.offset() + 2, }); - translate_expr(program, select, &args[1], temp_reg, cursor_hint)?; + translate_expr( + program, + referenced_tables, + &args[1], + temp_reg, + cursor_hint, + )?; program.emit_insn(Insn::Copy { src_reg: temp_reg, dst_reg: target_register, @@ -899,10 +911,28 @@ pub fn translate_expr( let start_reg = program.alloc_register(); let length_reg = program.alloc_register(); - translate_expr(program, select, &args[0], str_reg, cursor_hint)?; - translate_expr(program, select, &args[1], start_reg, cursor_hint)?; + translate_expr( + program, + referenced_tables, + &args[0], + str_reg, + cursor_hint, + )?; + translate_expr( + program, + referenced_tables, + &args[1], + start_reg, + cursor_hint, + )?; if args.len() == 3 { - translate_expr(program, select, &args[2], length_reg, cursor_hint)?; + translate_expr( + program, + referenced_tables, + &args[2], + length_reg, + cursor_hint, + )?; } program.emit_insn(Insn::Function { From 25033d280c71de526e78ea5213f1d799ada8ab13 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Fri, 16 Aug 2024 19:58:59 +0300 Subject: [PATCH 11/20] more accurate variable name --- core/translate/emitter.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 8e77a496e..b97236dde 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -38,7 +38,7 @@ pub trait Emitter { pb: &mut ProgramBuilder, m: &mut Metadata, referenced_tables: &[(Rc, String)], - can_emit_row: bool, + is_root: bool, ) -> Result; fn end( &mut self, @@ -209,7 +209,7 @@ impl Emitter for Operator { program: &mut ProgramBuilder, m: &mut Metadata, referenced_tables: &[(Rc, String)], - can_emit_row: bool, + is_root: bool, ) -> Result { match self { Operator::Aggregate { @@ -339,7 +339,7 @@ impl Emitter for Operator { }); } - if can_emit_row { + if is_root { return self.result_row(program, referenced_tables, m, None); } Ok(true) @@ -369,7 +369,7 @@ impl Emitter for Operator { }); sort_metadata.sort_register = start_reg; - if can_emit_row { + if is_root { return self.result_row(program, referenced_tables, m, None); } @@ -377,7 +377,7 @@ impl Emitter for Operator { } Operator::Projection { source, .. } => { source.emit(program, m, referenced_tables, false)?; - if can_emit_row { + if is_root { return self.result_row(program, referenced_tables, m, None); } @@ -423,7 +423,7 @@ impl Emitter for Operator { } } - if can_emit_row { + if is_root { return self.result_row(program, referenced_tables, m, None); } From 2e7f240bb5a3435038be3a7bbd2ccfb7746caa5c Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sat, 17 Aug 2024 09:04:27 +0300 Subject: [PATCH 12/20] use table_columns() for seekrowid result_columns() --- core/translate/emitter.rs | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index b97236dde..41f10c37e 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -606,18 +606,12 @@ impl Emitter for Operator { } Operator::Filter { .. } => unreachable!("predicates have been pushed down"), Operator::SeekRowid { - table_identifier, .. + table_identifier, + table, + .. } => { - let cursor_id = - cursor_override.unwrap_or(program.resolve_cursor_id(table_identifier, None)); let start_reg = program.alloc_registers(col_count); - for i in 0..col_count { - program.emit_insn(Insn::Column { - cursor_id, - column: i, - dest: start_reg + i, - }); - } + table_columns(program, table, table_identifier, cursor_override, start_reg); Ok(start_reg) } From 69f549d2b9ead8572b02f2fdfe032c097253c9ae Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sat, 17 Aug 2024 09:07:29 +0300 Subject: [PATCH 13/20] remove unnecessary unwrap_or --- core/translate/emitter.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 41f10c37e..19480b2b1 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -656,13 +656,11 @@ impl Emitter for Operator { .iter() .find(|(_, id)| id == table_identifier) .unwrap(); - let cursor_id = cursor_override - .unwrap_or(program.resolve_cursor_id(table_identifier, None)); cur_reg = table_columns( program, table, table_identifier, - Some(cursor_id), + cursor_override, cur_reg, ); } From d70eb6b3d7e0fbb3e63517ce1d0c52713ddd8039 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sat, 17 Aug 2024 09:16:24 +0300 Subject: [PATCH 14/20] fix seekrowid operator not emitting result when root --- core/translate/emitter.rs | 5 +++++ testing/all.test | 2 +- testing/select.test | 4 ++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 19480b2b1..98b380218 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -276,6 +276,11 @@ impl Emitter for Operator { program.resolve_label(jump_target_when_true, program.offset()); } } + + if is_root { + return self.result_row(program, referenced_tables, m, None); + } + Ok(true) } Operator::Limit { source, .. } => { diff --git a/testing/all.test b/testing/all.test index 26ee60589..67d07870a 100755 --- a/testing/all.test +++ b/testing/all.test @@ -12,4 +12,4 @@ source $testdir/where.test source $testdir/like.test source $testdir/scalar-functions.test source $testdir/orderby.test -source $testdir/json.test \ No newline at end of file +source $testdir/json.test diff --git a/testing/select.test b/testing/select.test index 1540cd493..3803bfba6 100755 --- a/testing/select.test +++ b/testing/select.test @@ -34,3 +34,7 @@ do_execsql_test table-star { do_execsql_test table-star-2 { select p.*, u.age from users u join products p limit 1; } {1|hat|79.0|94} + +do_execsql_test seekrowid { + select * from users u where u.id = 5; +} {"5|Edward|Miller|christiankramer@example.com|725-281-1033|08522 English Plain|Lake Keith|ID|23283|15"} From e8c894e532d197201b6fbcf167fbe7de20fa75be Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sat, 17 Aug 2024 12:55:16 +0300 Subject: [PATCH 15/20] More flexible Emitter via stateful operators --- core/translate/emitter.rs | 1062 ++++++++++++++++++----------------- core/translate/optimizer.rs | 2 + core/translate/plan.rs | 13 +- core/translate/planner.rs | 7 + core/vdbe/builder.rs | 11 - core/vdbe/sorter.rs | 2 +- 6 files changed, 572 insertions(+), 525 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 98b380218..d7c611416 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -20,32 +20,16 @@ use super::plan::{Operator, ProjectionColumn}; /** * The Emitter trait is used to emit bytecode instructions for a given operator in the query plan. * - * - start: open cursors, etc. - * - emit: open loops, emit conditional jumps etc. - * - end: close loops, etc. - * - result_columns: emit the bytecode instructions for the result columns. - * - result_row: emit the bytecode instructions for a result row. + * - step: perform a single step of the operator, emitting bytecode instructions as needed, + and returning a result indicating whether the operator is ready to emit a result row */ pub trait Emitter { - fn start( + fn step( &mut self, pb: &mut ProgramBuilder, m: &mut Metadata, referenced_tables: &[(Rc, String)], - ) -> Result<()>; - fn emit( - &mut self, - pb: &mut ProgramBuilder, - m: &mut Metadata, - referenced_tables: &[(Rc, String)], - is_root: bool, - ) -> Result; - fn end( - &mut self, - pb: &mut ProgramBuilder, - m: &mut Metadata, - referenced_tables: &[(Rc, String)], - ) -> Result<()>; + ) -> Result; fn result_columns( &self, program: &mut ProgramBuilder, @@ -59,7 +43,7 @@ pub trait Emitter { referenced_tables: &[(Rc, String)], metadata: &mut Metadata, cursor_override: Option, - ) -> Result; + ) -> Result<()>; } #[derive(Debug)] @@ -77,6 +61,7 @@ pub struct LeftJoinMetadata { #[derive(Debug)] pub struct SortMetadata { pub sort_cursor: usize, + pub pseudo_table_cursor: usize, pub sort_register: usize, pub next_row_label: BranchOffset, pub done_label: BranchOffset, @@ -92,483 +77,534 @@ pub struct Metadata { left_joins: HashMap, } +#[derive(Debug, PartialEq)] +pub enum OpStepResult { + Continue, + ReadyToEmit, + Done, +} + impl Emitter for Operator { - fn start( + fn step( &mut self, program: &mut ProgramBuilder, m: &mut Metadata, referenced_tables: &[(Rc, String)], - ) -> Result<()> { + ) -> Result { match self { Operator::Scan { table, table_identifier, id, + step, + predicates, .. } => { - let cursor_id = program.alloc_cursor_id( - Some(table_identifier.clone()), - Some(Table::BTree(table.clone())), - ); - let root_page = table.root_page; - let next_row_label = program.allocate_label(); - m.next_row_labels.insert(*id, next_row_label); - program.emit_insn(Insn::OpenReadAsync { - cursor_id, - root_page, - }); - program.emit_insn(Insn::OpenReadAwait); + *step += 1; + const SCAN_OPEN_READ: usize = 1; + const SCAN_REWIND_AND_CONDITIONS: usize = 2; + const SCAN_NEXT: usize = 3; + match *step { + SCAN_OPEN_READ => { + let cursor_id = program.alloc_cursor_id( + Some(table_identifier.clone()), + Some(Table::BTree(table.clone())), + ); + let root_page = table.root_page; + let next_row_label = program.allocate_label(); + m.next_row_labels.insert(*id, next_row_label); + program.emit_insn(Insn::OpenReadAsync { + cursor_id, + root_page, + }); + program.emit_insn(Insn::OpenReadAwait); - Ok(()) + Ok(OpStepResult::Continue) + } + SCAN_REWIND_AND_CONDITIONS => { + let cursor_id = program.resolve_cursor_id(table_identifier, None); + program.emit_insn(Insn::RewindAsync { cursor_id }); + let rewind_label = program.allocate_label(); + let halt_label = m.termination_labels.last().unwrap(); + m.rewind_labels.push(rewind_label); + program.defer_label_resolution(rewind_label, program.offset() as usize); + program.emit_insn_with_label_dependency( + Insn::RewindAwait { + cursor_id, + pc_if_empty: *halt_label, + }, + *halt_label, + ); + + let jump_label = m.next_row_labels.get(id).unwrap_or(halt_label); + if let Some(preds) = predicates { + for expr in preds { + let jump_target_when_true = program.allocate_label(); + let condition_metadata = ConditionMetadata { + jump_if_condition_is_true: false, + jump_target_when_true, + jump_target_when_false: *jump_label, + }; + translate_condition_expr( + program, + referenced_tables, + expr, + None, + condition_metadata, + )?; + program.resolve_label(jump_target_when_true, program.offset()); + } + } + + Ok(OpStepResult::ReadyToEmit) + } + SCAN_NEXT => { + let cursor_id = program.resolve_cursor_id(table_identifier, None); + program + .resolve_label(*m.next_row_labels.get(id).unwrap(), program.offset()); + program.emit_insn(Insn::NextAsync { cursor_id }); + let jump_label = m.rewind_labels.pop().unwrap(); + program.emit_insn_with_label_dependency( + Insn::NextAwait { + cursor_id, + pc_if_next: jump_label, + }, + jump_label, + ); + Ok(OpStepResult::Done) + } + _ => Ok(OpStepResult::Done), + } } Operator::SeekRowid { table, table_identifier, - .. - } => { - let cursor_id = program.alloc_cursor_id( - Some(table_identifier.clone()), - Some(Table::BTree(table.clone())), - ); - let root_page = table.root_page; - program.emit_insn(Insn::OpenReadAsync { - cursor_id, - root_page, - }); - program.emit_insn(Insn::OpenReadAwait); - - Ok(()) - } - Operator::Join { - left, - right, - outer, - id, - .. - } => { - if *outer { - let lj_metadata = LeftJoinMetadata { - match_flag_register: program.alloc_register(), - set_match_flag_true_label: program.allocate_label(), - check_match_flag_label: program.allocate_label(), - on_match_jump_to_label: program.allocate_label(), - }; - m.left_joins.insert(*id, lj_metadata); - } - left.start(program, m, referenced_tables)?; - right.start(program, m, referenced_tables) - } - Operator::Aggregate { - id, - source, - aggregates, - } => { - let can_continue = source.start(program, m, referenced_tables)?; - - let agg_final_label = program.allocate_label(); - m.termination_labels.push(agg_final_label); - source.emit(program, m, referenced_tables, false)?; - - let num_aggs = aggregates.len(); - let start_reg = program.alloc_registers(num_aggs); - m.aggregations.insert(*id, start_reg); - - Ok(can_continue) - } - Operator::Filter { .. } => unreachable!("predicates have been pushed down"), - Operator::Limit { source, .. } => source.start(program, m, referenced_tables), - Operator::Order { id, source, key } => { - let sort_cursor = program.alloc_cursor_id(None, None); - m.sorts.insert( - *id, - SortMetadata { - sort_cursor, - sort_register: usize::MAX, // will be set later - next_row_label: program.allocate_label(), - done_label: program.allocate_label(), - }, - ); - let mut order = Vec::new(); - for (_, direction) in key.iter() { - order.push(OwnedValue::Integer(*direction as i64)); - } - program.emit_insn(Insn::SorterOpen { - cursor_id: sort_cursor, - columns: key.len(), - order: OwnedRecord::new(order), - }); - - source.start(program, m, referenced_tables) - } - Operator::Projection { source, .. } => source.start(program, m, referenced_tables), - Operator::Nothing => Ok(()), - } - } - fn emit( - &mut self, - program: &mut ProgramBuilder, - m: &mut Metadata, - referenced_tables: &[(Rc, String)], - is_root: bool, - ) -> Result { - match self { - Operator::Aggregate { - source, - aggregates, - id, - } => { - let can_continue = source.emit(program, m, referenced_tables, false)?; - if !can_continue { - return Ok(false); - } - let start_reg = m.aggregations.get(id).unwrap(); - for (i, agg) in aggregates.iter().enumerate() { - let agg_result_reg = start_reg + i; - translate_aggregation(program, referenced_tables, agg, agg_result_reg, None)?; - } - - Ok(false) - } - Operator::Filter { .. } => unreachable!("predicates have been pushed down"), - Operator::SeekRowid { rowid_predicate, predicates, - table_identifier, + step, id, .. } => { - let cursor_id = program.resolve_cursor_id(table_identifier, None); - let rowid_reg = program.alloc_register(); - translate_expr( - program, - Some(referenced_tables), - rowid_predicate, - rowid_reg, - None, - )?; - let jump_label = m - .next_row_labels - .get(id) - .unwrap_or(&m.termination_labels.last().unwrap()); - program.emit_insn_with_label_dependency( - Insn::SeekRowid { - cursor_id, - src_reg: rowid_reg, - target_pc: *jump_label, - }, - *jump_label, - ); - if let Some(predicates) = predicates { - for predicate in predicates.iter() { - let jump_target_when_true = program.allocate_label(); - let condition_metadata = ConditionMetadata { - jump_if_condition_is_true: false, - jump_target_when_true, - jump_target_when_false: *jump_label, - }; - translate_condition_expr( - program, - referenced_tables, - predicate, - None, - condition_metadata, - )?; - program.resolve_label(jump_target_when_true, program.offset()); + *step += 1; + const SEEKROWID_OPEN_READ: usize = 1; + const SEEKROWID_SEEK_AND_CONDITIONS: usize = 2; + match *step { + SEEKROWID_OPEN_READ => { + let cursor_id = program.alloc_cursor_id( + Some(table_identifier.clone()), + Some(Table::BTree(table.clone())), + ); + let root_page = table.root_page; + program.emit_insn(Insn::OpenReadAsync { + cursor_id, + root_page, + }); + program.emit_insn(Insn::OpenReadAwait); + + Ok(OpStepResult::Continue) } - } - - if is_root { - return self.result_row(program, referenced_tables, m, None); - } - - Ok(true) - } - Operator::Limit { source, .. } => { - source.emit(program, m, referenced_tables, false)?; - Ok(true) - } - Operator::Join { - left, - right, - predicates, - outer, - id, - } => { - left.emit(program, m, referenced_tables, false)?; - - let mut jump_target_when_false = *m - .next_row_labels - .get(&right.id()) - .unwrap_or(&m.termination_labels.last().unwrap()); - - if *outer { - let lj_meta = m.left_joins.get(id).unwrap(); - program.emit_insn(Insn::Integer { - value: 0, - dest: lj_meta.match_flag_register, - }); - jump_target_when_false = lj_meta.check_match_flag_label; - m.next_row_labels.insert(right.id(), jump_target_when_false); - } - - right.emit(program, m, referenced_tables, false)?; - - if let Some(predicates) = predicates { - let jump_target_when_true = program.allocate_label(); - let condition_metadata = ConditionMetadata { - jump_if_condition_is_true: false, - jump_target_when_true, - jump_target_when_false, - }; - for predicate in predicates.iter() { - translate_condition_expr( + SEEKROWID_SEEK_AND_CONDITIONS => { + let cursor_id = program.resolve_cursor_id(table_identifier, None); + let rowid_reg = program.alloc_register(); + translate_expr( program, - referenced_tables, - predicate, + Some(referenced_tables), + rowid_predicate, + rowid_reg, None, - condition_metadata, )?; + let jump_label = m + .next_row_labels + .get(id) + .unwrap_or(&m.termination_labels.last().unwrap()); + program.emit_insn_with_label_dependency( + Insn::SeekRowid { + cursor_id, + src_reg: rowid_reg, + target_pc: *jump_label, + }, + *jump_label, + ); + if let Some(predicates) = predicates { + for predicate in predicates.iter() { + let jump_target_when_true = program.allocate_label(); + let condition_metadata = ConditionMetadata { + jump_if_condition_is_true: false, + jump_target_when_true, + jump_target_when_false: *jump_label, + }; + translate_condition_expr( + program, + referenced_tables, + predicate, + None, + condition_metadata, + )?; + program.resolve_label(jump_target_when_true, program.offset()); + } + } + + Ok(OpStepResult::ReadyToEmit) } - program.resolve_label(jump_target_when_true, program.offset()); + _ => Ok(OpStepResult::Done), } - - if *outer { - let lj_meta = m.left_joins.get(id).unwrap(); - program.defer_label_resolution( - lj_meta.set_match_flag_true_label, - program.offset() as usize, - ); - program.emit_insn(Insn::Integer { - value: 1, - dest: lj_meta.match_flag_register, - }); - } - - if is_root { - return self.result_row(program, referenced_tables, m, None); - } - Ok(true) - } - Operator::Order { source, key, id } => { - source.emit(program, m, referenced_tables, false)?; - let sort_keys_count = key.len(); - let source_cols_count = source.column_count(referenced_tables); - let start_reg = program.alloc_registers(sort_keys_count); - for (i, (expr, _)) in key.iter().enumerate() { - let key_reg = start_reg + i; - translate_expr(program, Some(referenced_tables), expr, key_reg, None)?; - } - source.result_columns(program, referenced_tables, m, None)?; - - let dest = program.alloc_register(); - program.emit_insn(Insn::MakeRecord { - start_reg, - count: sort_keys_count + source_cols_count, - dest_reg: dest, - }); - - let sort_metadata = m.sorts.get_mut(id).unwrap(); - program.emit_insn(Insn::SorterInsert { - cursor_id: sort_metadata.sort_cursor, - record_reg: dest, - }); - sort_metadata.sort_register = start_reg; - - if is_root { - return self.result_row(program, referenced_tables, m, None); - } - - Ok(true) - } - Operator::Projection { source, .. } => { - source.emit(program, m, referenced_tables, false)?; - if is_root { - return self.result_row(program, referenced_tables, m, None); - } - - Ok(true) - } - Operator::Scan { - predicates, - table_identifier, - id, - .. - } => { - let cursor_id = program.resolve_cursor_id(table_identifier, None); - program.emit_insn(Insn::RewindAsync { cursor_id }); - let rewind_label = program.allocate_label(); - let halt_label = m.termination_labels.last().unwrap(); - m.rewind_labels.push(rewind_label); - program.defer_label_resolution(rewind_label, program.offset() as usize); - program.emit_insn_with_label_dependency( - Insn::RewindAwait { - cursor_id, - pc_if_empty: *halt_label, - }, - *halt_label, - ); - - let jump_label = m.next_row_labels.get(id).unwrap_or(halt_label); - if let Some(preds) = predicates { - for expr in preds { - let jump_target_when_true = program.allocate_label(); - let condition_metadata = ConditionMetadata { - jump_if_condition_is_true: false, - jump_target_when_true, - jump_target_when_false: *jump_label, - }; - translate_condition_expr( - program, - referenced_tables, - expr, - None, - condition_metadata, - )?; - program.resolve_label(jump_target_when_true, program.offset()); - } - } - - if is_root { - return self.result_row(program, referenced_tables, m, None); - } - - Ok(true) - } - Operator::Nothing => Ok(false), - } - } - fn end( - &mut self, - program: &mut ProgramBuilder, - m: &mut Metadata, - referenced_tables: &[(Rc, String)], - ) -> Result<()> { - match self { - Operator::Scan { - table_identifier, - id, - .. - } => { - let cursor_id = program.resolve_cursor_id(table_identifier, None); - program.resolve_label(*m.next_row_labels.get(id).unwrap(), program.offset()); - program.emit_insn(Insn::NextAsync { cursor_id }); - let jump_label = m.rewind_labels.pop().unwrap(); - program.emit_insn_with_label_dependency( - Insn::NextAwait { - cursor_id, - pc_if_next: jump_label, - }, - jump_label, - ); - Ok(()) } Operator::Join { left, right, outer, + predicates, + step, id, .. } => { - right.end(program, m, referenced_tables)?; + *step += 1; + const JOIN_INIT: usize = 1; + const JOIN_DO_JOIN: usize = 2; + const JOIN_END: usize = 3; + match *step { + JOIN_INIT => { + if *outer { + let lj_metadata = LeftJoinMetadata { + match_flag_register: program.alloc_register(), + set_match_flag_true_label: program.allocate_label(), + check_match_flag_label: program.allocate_label(), + on_match_jump_to_label: program.allocate_label(), + }; + m.left_joins.insert(*id, lj_metadata); + } + left.step(program, m, referenced_tables)?; + right.step(program, m, referenced_tables)?; - if *outer { - let lj_meta = m.left_joins.get(id).unwrap(); - // If the left join match flag has been set to 1, we jump to the next row on the outer table (result row has been emitted already) - program.resolve_label(lj_meta.check_match_flag_label, program.offset()); - program.emit_insn_with_label_dependency( - Insn::IfPos { - reg: lj_meta.match_flag_register, - target_pc: lj_meta.on_match_jump_to_label, - decrement_by: 0, - }, - lj_meta.on_match_jump_to_label, - ); - // If not, we set the right table cursor's "pseudo null bit" on, which means any Insn::Column will return NULL - let right_cursor_id = match right.as_ref() { - Operator::Scan { - table_identifier, .. - } => program.resolve_cursor_id(table_identifier, None), - Operator::SeekRowid { - table_identifier, .. - } => program.resolve_cursor_id(table_identifier, None), - _ => unreachable!(), - }; - program.emit_insn(Insn::NullRow { - cursor_id: right_cursor_id, - }); - // Jump to setting the left join match flag to 1 again, but this time the right table cursor will set everything to null - program.emit_insn_with_label_dependency( - Insn::Goto { - target_pc: lj_meta.set_match_flag_true_label, - }, - lj_meta.set_match_flag_true_label, - ); - // This points to the NextAsync instruction of the left table - program.resolve_label(lj_meta.on_match_jump_to_label, program.offset()); + Ok(OpStepResult::Continue) + } + JOIN_DO_JOIN => { + left.step(program, m, referenced_tables)?; + + let mut jump_target_when_false = *m + .next_row_labels + .get(&right.id()) + .or(m.next_row_labels.get(&left.id())) + .unwrap_or(&m.termination_labels.last().unwrap()); + + if *outer { + let lj_meta = m.left_joins.get(id).unwrap(); + program.emit_insn(Insn::Integer { + value: 0, + dest: lj_meta.match_flag_register, + }); + jump_target_when_false = lj_meta.check_match_flag_label; + } + m.next_row_labels.insert(right.id(), jump_target_when_false); + + right.step(program, m, referenced_tables)?; + + if let Some(predicates) = predicates { + let jump_target_when_true = program.allocate_label(); + let condition_metadata = ConditionMetadata { + jump_if_condition_is_true: false, + jump_target_when_true, + jump_target_when_false, + }; + for predicate in predicates.iter() { + translate_condition_expr( + program, + referenced_tables, + predicate, + None, + condition_metadata, + )?; + } + program.resolve_label(jump_target_when_true, program.offset()); + } + + if *outer { + let lj_meta = m.left_joins.get(id).unwrap(); + program.defer_label_resolution( + lj_meta.set_match_flag_true_label, + program.offset() as usize, + ); + program.emit_insn(Insn::Integer { + value: 1, + dest: lj_meta.match_flag_register, + }); + } + + Ok(OpStepResult::ReadyToEmit) + } + JOIN_END => { + right.step(program, m, referenced_tables)?; + + if *outer { + let lj_meta = m.left_joins.get(id).unwrap(); + // If the left join match flag has been set to 1, we jump to the next row on the outer table (result row has been emitted already) + program.resolve_label(lj_meta.check_match_flag_label, program.offset()); + program.emit_insn_with_label_dependency( + Insn::IfPos { + reg: lj_meta.match_flag_register, + target_pc: lj_meta.on_match_jump_to_label, + decrement_by: 0, + }, + lj_meta.on_match_jump_to_label, + ); + // If not, we set the right table cursor's "pseudo null bit" on, which means any Insn::Column will return NULL + let right_cursor_id = match right.as_ref() { + Operator::Scan { + table_identifier, .. + } => program.resolve_cursor_id(table_identifier, None), + Operator::SeekRowid { + table_identifier, .. + } => program.resolve_cursor_id(table_identifier, None), + _ => unreachable!(), + }; + program.emit_insn(Insn::NullRow { + cursor_id: right_cursor_id, + }); + // Jump to setting the left join match flag to 1 again, but this time the right table cursor will set everything to null + program.emit_insn_with_label_dependency( + Insn::Goto { + target_pc: lj_meta.set_match_flag_true_label, + }, + lj_meta.set_match_flag_true_label, + ); + // This points to the NextAsync instruction of the left table + program.resolve_label(lj_meta.on_match_jump_to_label, program.offset()); + } + left.step(program, m, referenced_tables)?; + + Ok(OpStepResult::Done) + } + _ => Ok(OpStepResult::Done), } - left.end(program, m, referenced_tables) } Operator::Aggregate { id, source, aggregates, + step, } => { - source.end(program, m, referenced_tables)?; + *step += 1; + const AGGREGATE_INIT: usize = 1; + const AGGREGATE_WAIT_UNTIL_SOURCE_READY: usize = 2; + match *step { + AGGREGATE_INIT => { + let agg_final_label = program.allocate_label(); + m.termination_labels.push(agg_final_label); + let num_aggs = aggregates.len(); + let start_reg = program.alloc_registers(num_aggs); + m.aggregations.insert(*id, start_reg); - program.resolve_label(m.termination_labels.pop().unwrap(), program.offset()); - let start_reg = m.aggregations.get(id).unwrap(); - for (i, agg) in aggregates.iter().enumerate() { - let agg_result_reg = *start_reg + i; - program.emit_insn(Insn::AggFinal { - register: agg_result_reg, - func: agg.func.clone(), - }); + Ok(OpStepResult::Continue) + } + AGGREGATE_WAIT_UNTIL_SOURCE_READY => loop { + match source.step(program, m, referenced_tables)? { + OpStepResult::Continue => {} + OpStepResult::ReadyToEmit => { + let start_reg = m.aggregations.get(id).unwrap(); + for (i, agg) in aggregates.iter().enumerate() { + let agg_result_reg = start_reg + i; + translate_aggregation( + program, + referenced_tables, + agg, + agg_result_reg, + None, + )?; + } + } + OpStepResult::Done => { + return Ok(OpStepResult::ReadyToEmit); + } + } + }, + _ => Ok(OpStepResult::Done), } - program.emit_insn(Insn::ResultRow { - start_reg: *start_reg, - count: aggregates.len(), - }); - Ok(()) } Operator::Filter { .. } => unreachable!("predicates have been pushed down"), - Operator::SeekRowid { .. } => Ok(()), - Operator::Limit { source, limit, .. } => { - source.result_row(program, referenced_tables, m, None)?; - let limit_reg = program.alloc_register(); - program.emit_insn(Insn::Integer { - value: *limit as i64, - dest: limit_reg, - }); - program.mark_last_insn_constant(); - let jump_label = m.termination_labels.last().unwrap(); - program.emit_insn_with_label_dependency( - Insn::DecrJumpZero { - reg: limit_reg, - target_pc: *jump_label, - }, - *jump_label, - ); - - source.end(program, m, referenced_tables)?; - - Ok(()) + Operator::Limit { source, step, .. } => { + *step += 1; + loop { + match source.step(program, m, referenced_tables)? { + OpStepResult::Continue => continue, + OpStepResult::ReadyToEmit => { + return Ok(OpStepResult::ReadyToEmit); + } + OpStepResult::Done => return Ok(OpStepResult::Done), + } + } } - Operator::Order { id, .. } => { - let sort_metadata = m.sorts.get(id).unwrap(); - program.emit_insn_with_label_dependency( - Insn::SorterNext { - cursor_id: sort_metadata.sort_cursor, - pc_if_next: sort_metadata.next_row_label, - }, - sort_metadata.next_row_label, - ); + Operator::Order { + id, + source, + key, + step, + } => { + *step += 1; + const ORDER_INIT: usize = 1; + const ORDER_INSERT_INTO_SORTER: usize = 2; + const ORDER_SORT_AND_OPEN_LOOP: usize = 3; + const ORDER_NEXT: usize = 4; + match *step { + ORDER_INIT => { + let sort_cursor = program.alloc_cursor_id(None, None); + m.sorts.insert( + *id, + SortMetadata { + sort_cursor, + pseudo_table_cursor: usize::MAX, // will be set later + sort_register: usize::MAX, // will be set later + next_row_label: program.allocate_label(), + done_label: program.allocate_label(), + }, + ); + let mut order = Vec::new(); + for (_, direction) in key.iter() { + order.push(OwnedValue::Integer(*direction as i64)); + } + program.emit_insn(Insn::SorterOpen { + cursor_id: sort_cursor, + columns: key.len(), + order: OwnedRecord::new(order), + }); - program.resolve_label(sort_metadata.done_label, program.offset()); + loop { + match source.step(program, m, referenced_tables)? { + OpStepResult::Continue => continue, + OpStepResult::ReadyToEmit => { + return Ok(OpStepResult::Continue); + } + OpStepResult::Done => { + return Ok(OpStepResult::Done); + } + } + } + } + ORDER_INSERT_INTO_SORTER => { + let sort_keys_count = key.len(); + let source_cols_count = source.column_count(referenced_tables); + let start_reg = program.alloc_registers(sort_keys_count); + for (i, (expr, _)) in key.iter().enumerate() { + let key_reg = start_reg + i; + translate_expr(program, Some(referenced_tables), expr, key_reg, None)?; + } + source.result_columns(program, referenced_tables, m, None)?; - Ok(()) + let dest = program.alloc_register(); + program.emit_insn(Insn::MakeRecord { + start_reg, + count: sort_keys_count + source_cols_count, + dest_reg: dest, + }); + + let sort_metadata = m.sorts.get_mut(id).unwrap(); + program.emit_insn(Insn::SorterInsert { + cursor_id: sort_metadata.sort_cursor, + record_reg: dest, + }); + sort_metadata.sort_register = start_reg; + + Ok(OpStepResult::Continue) + } + ORDER_SORT_AND_OPEN_LOOP => { + loop { + match source.step(program, m, referenced_tables)? { + OpStepResult::Done => { + break; + } + _ => unreachable!(), + } + } + let column_names = source.column_names(); + let pseudo_columns = column_names + .iter() + .map(|name| Column { + name: name.clone(), + primary_key: false, + ty: crate::schema::Type::Null, + }) + .collect::>(); + + let pseudo_cursor = program.alloc_cursor_id( + None, + Some(Table::Pseudo(Rc::new(PseudoTable { + columns: pseudo_columns, + }))), + ); + + let pseudo_content_reg = program.alloc_register(); + program.emit_insn(Insn::OpenPseudo { + cursor_id: pseudo_cursor, + content_reg: pseudo_content_reg, + num_fields: key.len() + source.column_count(referenced_tables), + }); + + let sort_metadata = m.sorts.get(id).unwrap(); + program.emit_insn_with_label_dependency( + Insn::SorterSort { + cursor_id: sort_metadata.sort_cursor, + pc_if_empty: sort_metadata.done_label, + }, + sort_metadata.done_label, + ); + + program.defer_label_resolution( + sort_metadata.next_row_label, + program.offset() as usize, + ); + program.emit_insn(Insn::SorterData { + cursor_id: sort_metadata.sort_cursor, + dest_reg: pseudo_content_reg, + pseudo_cursor, + }); + + let sort_metadata = m.sorts.get_mut(id).unwrap(); + + sort_metadata.pseudo_table_cursor = pseudo_cursor; + + Ok(OpStepResult::ReadyToEmit) + } + ORDER_NEXT => { + let sort_metadata = m.sorts.get(id).unwrap(); + program.emit_insn_with_label_dependency( + Insn::SorterNext { + cursor_id: sort_metadata.sort_cursor, + pc_if_next: sort_metadata.next_row_label, + }, + sort_metadata.next_row_label, + ); + + program.resolve_label(sort_metadata.done_label, program.offset()); + + Ok(OpStepResult::Done) + } + _ => unreachable!(), + } } - Operator::Projection { source, .. } => source.end(program, m, referenced_tables), - Operator::Nothing => Ok(()), + Operator::Projection { source, step, .. } => { + *step += 1; + const PROJECTION_WAIT_UNTIL_SOURCE_READY: usize = 1; + const PROJECTION_FINALIZE_SOURCE: usize = 2; + match *step { + PROJECTION_WAIT_UNTIL_SOURCE_READY => loop { + match source.step(program, m, referenced_tables)? { + OpStepResult::Continue => continue, + OpStepResult::ReadyToEmit | OpStepResult::Done => { + return Ok(OpStepResult::ReadyToEmit); + } + } + }, + PROJECTION_FINALIZE_SOURCE => { + match source.step(program, m, referenced_tables)? { + OpStepResult::Done => { + return Ok(OpStepResult::Done); + } + _ => unreachable!(), + } + } + _ => Ok(OpStepResult::Done), + } + } + Operator::Nothing => Ok(OpStepResult::Done), } } fn result_columns( @@ -623,8 +659,26 @@ impl Emitter for Operator { Operator::Limit { .. } => { unimplemented!() } - Operator::Order { .. } => { - todo!() + Operator::Order { + id, source, key, .. + } => { + let sort_metadata = m.sorts.get(id).unwrap(); + let cursor_override = Some(sort_metadata.sort_cursor); + let sort_keys_count = key.len(); + let start_reg = program.alloc_registers(sort_keys_count); + for (i, (expr, _)) in key.iter().enumerate() { + let key_reg = start_reg + i; + translate_expr( + program, + Some(referenced_tables), + expr, + key_reg, + cursor_override, + )?; + } + source.result_columns(program, referenced_tables, m, cursor_override)?; + + Ok(start_reg) } Operator::Projection { expressions, .. } => { let expr_count = expressions @@ -656,7 +710,7 @@ impl Emitter for Operator { ); } } - ProjectionColumn::TableStar(table, table_identifier) => { + ProjectionColumn::TableStar(_, table_identifier) => { let (table, table_identifier) = referenced_tables .iter() .find(|(_, id)| id == table_identifier) @@ -683,61 +737,37 @@ impl Emitter for Operator { referenced_tables: &[(Rc, String)], m: &mut Metadata, cursor_override: Option, - ) -> Result { + ) -> Result<()> { match self { - Operator::Order { id, source, key } => { - source.end(program, m, referenced_tables)?; - let column_names = source.column_names(); - let pseudo_columns = column_names - .iter() - .map(|name| Column { - name: name.clone(), - primary_key: false, - ty: crate::schema::Type::Null, - }) - .collect::>(); - - let pseudo_cursor = program.alloc_cursor_id( - None, - Some(Table::Pseudo(Rc::new(PseudoTable { - columns: pseudo_columns, - }))), - ); - - let pseudo_content_reg = program.alloc_register(); - program.emit_insn(Insn::OpenPseudo { - cursor_id: pseudo_cursor, - content_reg: pseudo_content_reg, - num_fields: key.len() + source.column_count(referenced_tables), - }); - + Operator::Order { id, source, .. } => { let sort_metadata = m.sorts.get(id).unwrap(); + source.result_row( + program, + referenced_tables, + m, + Some(sort_metadata.pseudo_table_cursor), + )?; - program.emit_insn_with_label_dependency( - Insn::SorterSort { - cursor_id: sort_metadata.sort_cursor, - pc_if_empty: sort_metadata.done_label, - }, - sort_metadata.done_label, - ); - - program.defer_label_resolution( - sort_metadata.next_row_label, - program.offset() as usize, - ); - program.emit_insn(Insn::SorterData { - cursor_id: sort_metadata.sort_cursor, - dest_reg: pseudo_content_reg, - pseudo_cursor, + Ok(()) + } + Operator::Limit { source, limit, .. } => { + source.result_row(program, referenced_tables, m, cursor_override)?; + let limit_reg = program.alloc_register(); + program.emit_insn(Insn::Integer { + value: *limit as i64, + dest: limit_reg, }); + program.mark_last_insn_constant(); + let jump_label = m.termination_labels.last().unwrap(); + program.emit_insn_with_label_dependency( + Insn::DecrJumpZero { + reg: limit_reg, + target_pc: *jump_label, + }, + *jump_label, + ); - let done_label = sort_metadata.done_label; - - source.result_row(program, referenced_tables, m, Some(pseudo_cursor))?; - - program.resolve_label(done_label, program.offset()); - - Ok(true) + Ok(()) } operator => { let start_reg = @@ -746,7 +776,7 @@ impl Emitter for Operator { start_reg, count: operator.column_count(referenced_tables), }); - Ok(true) + Ok(()) } } } @@ -776,18 +806,26 @@ pub fn emit_program( left_joins: HashMap::new(), }; - select_plan - .root_operator - .start(&mut program, &mut metadata, &select_plan.referenced_tables)?; - select_plan.root_operator.emit( - &mut program, - &mut metadata, - &select_plan.referenced_tables, - true, - )?; - select_plan - .root_operator - .end(&mut program, &mut metadata, &select_plan.referenced_tables)?; + loop { + match select_plan.root_operator.step( + &mut program, + &mut metadata, + &select_plan.referenced_tables, + )? { + OpStepResult::Continue => {} + OpStepResult::ReadyToEmit => { + select_plan.root_operator.result_row( + &mut program, + &select_plan.referenced_tables, + &mut metadata, + None, + )?; + } + OpStepResult::Done => { + break; + } + } + } program.resolve_label(halt_label, program.offset()); program.emit_insn(Insn::Halt); diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 006773332..4cf598cf9 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -37,6 +37,7 @@ fn use_indexes( predicates: filter, table_identifier, id, + .. } => { if filter.is_none() { return Ok(()); @@ -75,6 +76,7 @@ fn use_indexes( rowid_predicate, predicates: predicates_owned, id: *id, + step: 0, } } diff --git a/core/translate/plan.rs b/core/translate/plan.rs index b3ef1e308..2e59cafdc 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -27,6 +27,10 @@ impl Display for Plan { Order Project Scan + + Operators also have a unique ID, which is used to identify them in the query plan and attach metadata. + They also have a step counter, which is used to track the current step in the operator's execution. + TODO: perhaps 'step' shouldn't be in this struct, since it's an execution time concept, not a plan time concept. */ #[derive(Clone, Debug)] pub enum Operator { @@ -38,6 +42,7 @@ pub enum Operator { id: usize, source: Box, aggregates: Vec, + step: usize, }, // Filter operator // This operator is used to filter rows from the source operator. @@ -62,6 +67,7 @@ pub enum Operator { table_identifier: String, rowid_predicate: ast::Expr, predicates: Option>, + step: usize, }, // Limit operator // This operator is used to limit the number of rows returned by the source operator. @@ -69,6 +75,7 @@ pub enum Operator { id: usize, source: Box, limit: usize, + step: usize, }, // Join operator // This operator is used to join two source operators. @@ -80,6 +87,7 @@ pub enum Operator { right: Box, predicates: Option>, outer: bool, + step: usize, }, // Order operator // This operator is used to sort the rows returned by the source operator. @@ -87,6 +95,7 @@ pub enum Operator { id: usize, source: Box, key: Vec<(ast::Expr, Direction)>, + step: usize, }, // Projection operator // This operator is used to project columns from the source operator. @@ -98,6 +107,7 @@ pub enum Operator { id: usize, source: Box, expressions: Vec, + step: usize, }, // Scan operator // This operator is used to scan a table. @@ -109,6 +119,7 @@ pub enum Operator { table: Rc, table_identifier: String, predicates: Option>, + step: usize, }, // Nothing operator // This operator is used to represent an empty query. @@ -484,7 +495,7 @@ pub fn get_table_ref_bitmask_for_ast_expr<'a>( let matching_table = tables .iter() .enumerate() - .find(|(_, (table, t_id))| *t_id == tbl); + .find(|(_, (_, t_id))| *t_id == tbl); if matching_table.is_none() { crate::bail_parse_error!("introspect: table not found: {}", &tbl) diff --git a/core/translate/planner.rs b/core/translate/planner.rs index 0061175eb..94ca386b9 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -156,12 +156,14 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

(schema: &Schema, select: ast::Select) -> Result

(schema: &Schema, select: ast::Select) -> Result

bool { - (self.seekrowid_emitted_bitmask & (1 << cursor_id)) != 0 - } - - fn set_cursor_emitted_seekrowid(&mut self, cursor_id: CursorID) { - self.seekrowid_emitted_bitmask |= 1 << cursor_id; - } - fn _emit_insn(&mut self, insn: Insn) { - if let Insn::SeekRowid { cursor_id, .. } = insn { - self.set_cursor_emitted_seekrowid(cursor_id); - } self.insns.push(insn); } diff --git a/core/vdbe/sorter.rs b/core/vdbe/sorter.rs index b80ab6074..011392d1a 100644 --- a/core/vdbe/sorter.rs +++ b/core/vdbe/sorter.rs @@ -106,7 +106,7 @@ impl Cursor for Sorter { } fn get_null_flag(&self) -> bool { - todo!(); + false } fn exists(&mut self, key: &OwnedValue) -> Result> { From d7d195a618e2f539e2bf2835c5cdb5b7e35936f3 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sat, 17 Aug 2024 13:56:59 +0300 Subject: [PATCH 16/20] Cleanup and improve emitter.rs docs --- core/translate/emitter.rs | 51 +++++++++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 13 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index d7c611416..a772165a6 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -60,23 +60,50 @@ pub struct LeftJoinMetadata { #[derive(Debug)] pub struct SortMetadata { + // cursor id for the Sorter table where the sorted rows are stored pub sort_cursor: usize, + // cursor id for the Pseudo table where rows are temporarily inserted from the Sorter table pub pseudo_table_cursor: usize, - pub sort_register: usize, - pub next_row_label: BranchOffset, + // label where the SorterData instruction is emitted; SorterNext will jump here if there is more data to read + pub sorter_data_label: BranchOffset, + // label for the instruction immediately following SorterNext; SorterSort will jump here in case there is no data pub done_label: BranchOffset, } #[derive(Debug)] pub struct Metadata { + // labels for the instructions that terminate the execution when a conditional check evaluates to false. typically jumps to Halt, but can also jump to AggFinal if a parent in the tree is an aggregation termination_labels: Vec, + // labels for the instructions that jump to the next row in the current operator. + // for example, in a join with two nested scans, the inner loop will jump to its Next instruction when the join condition is false; + // in a join with a scan and a seek, the seek will jump to the scan's Next instruction when the join condition is false. next_row_labels: HashMap, + // labels for the Rewind instructions. rewind_labels: Vec, - aggregations: HashMap, + // mapping between Aggregation operator id and the register that holds the start of the aggregation result + aggregation_start_registers: HashMap, + // mapping between Order operator id and associated metadata sorts: HashMap, + // mapping between Join operator id and associated metadata (for left joins only) left_joins: HashMap, } +/** +* Emitters return one of three possible results from the step() method: +* - Continue: the operator is not yet ready to emit a result row +* - ReadyToEmit: the operator is ready to emit a result row +* - Done: the operator has completed execution +* For example, a Scan operator will return Continue until it has opened a cursor, rewound it and applied any predicates. +* At that point, it will return ReadyToEmit. +* Finally, when the Scan operator has emitted a Next instruction, it will return Done. +* +* Parent operators are free to make decisions based on the result a child operator's step() method. +* +* When the root operator of a Plan returns ReadyToEmit, a ResultRow will always be emitted. +* When the root operator returns Done, the bytecode plan is complete. +* + +*/ #[derive(Debug, PartialEq)] pub enum OpStepResult { Continue, @@ -392,7 +419,7 @@ impl Emitter for Operator { m.termination_labels.push(agg_final_label); let num_aggs = aggregates.len(); let start_reg = program.alloc_registers(num_aggs); - m.aggregations.insert(*id, start_reg); + m.aggregation_start_registers.insert(*id, start_reg); Ok(OpStepResult::Continue) } @@ -400,7 +427,7 @@ impl Emitter for Operator { match source.step(program, m, referenced_tables)? { OpStepResult::Continue => {} OpStepResult::ReadyToEmit => { - let start_reg = m.aggregations.get(id).unwrap(); + let start_reg = m.aggregation_start_registers.get(id).unwrap(); for (i, agg) in aggregates.iter().enumerate() { let agg_result_reg = start_reg + i; translate_aggregation( @@ -452,8 +479,7 @@ impl Emitter for Operator { SortMetadata { sort_cursor, pseudo_table_cursor: usize::MAX, // will be set later - sort_register: usize::MAX, // will be set later - next_row_label: program.allocate_label(), + sorter_data_label: program.allocate_label(), done_label: program.allocate_label(), }, ); @@ -501,7 +527,6 @@ impl Emitter for Operator { cursor_id: sort_metadata.sort_cursor, record_reg: dest, }); - sort_metadata.sort_register = start_reg; Ok(OpStepResult::Continue) } @@ -548,7 +573,7 @@ impl Emitter for Operator { ); program.defer_label_resolution( - sort_metadata.next_row_label, + sort_metadata.sorter_data_label, program.offset() as usize, ); program.emit_insn(Insn::SorterData { @@ -568,9 +593,9 @@ impl Emitter for Operator { program.emit_insn_with_label_dependency( Insn::SorterNext { cursor_id: sort_metadata.sort_cursor, - pc_if_next: sort_metadata.next_row_label, + pc_if_next: sort_metadata.sorter_data_label, }, - sort_metadata.next_row_label, + sort_metadata.sorter_data_label, ); program.resolve_label(sort_metadata.done_label, program.offset()); @@ -634,7 +659,7 @@ impl Emitter for Operator { Ok(left_start_reg) } Operator::Aggregate { id, aggregates, .. } => { - let start_reg = m.aggregations.get(id).unwrap(); + let start_reg = m.aggregation_start_registers.get(id).unwrap(); for (i, agg) in aggregates.iter().enumerate() { let agg_result_reg = *start_reg + i; program.emit_insn(Insn::AggFinal { @@ -801,7 +826,7 @@ pub fn emit_program( termination_labels: vec![halt_label], next_row_labels: HashMap::new(), rewind_labels: Vec::new(), - aggregations: HashMap::new(), + aggregation_start_registers: HashMap::new(), sorts: HashMap::new(), left_joins: HashMap::new(), }; From 05a6616803af1af8b2e1ebe111dca479c201e25a Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sat, 17 Aug 2024 14:12:57 +0300 Subject: [PATCH 17/20] BytecodeGenerator struct --- core/translate/emitter.rs | 144 +++++++++++++++++++++++++------------- 1 file changed, 94 insertions(+), 50 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index a772165a6..e263a4ee0 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -807,64 +807,108 @@ impl Emitter for Operator { } } -pub fn emit_program( +pub struct BytecodeGenerator { + program: ProgramBuilder, database_header: Rc>, - mut select_plan: Plan, -) -> Result { - let mut program = ProgramBuilder::new(); - let init_label = program.allocate_label(); - let halt_label = program.allocate_label(); - program.emit_insn_with_label_dependency( - Insn::Init { - target_pc: init_label, - }, - init_label, - ); - let start_offset = program.offset(); + metadata: Metadata, + plan: Plan, +} - let mut metadata = Metadata { - termination_labels: vec![halt_label], - next_row_labels: HashMap::new(), - rewind_labels: Vec::new(), - aggregation_start_registers: HashMap::new(), - sorts: HashMap::new(), - left_joins: HashMap::new(), - }; - - loop { - match select_plan.root_operator.step( - &mut program, - &mut metadata, - &select_plan.referenced_tables, - )? { - OpStepResult::Continue => {} - OpStepResult::ReadyToEmit => { - select_plan.root_operator.result_row( - &mut program, - &select_plan.referenced_tables, - &mut metadata, - None, - )?; - } - OpStepResult::Done => { - break; - } +impl BytecodeGenerator { + pub fn new(plan: Plan, database_header: Rc>) -> Self { + Self { + program: ProgramBuilder::new(), + database_header, + metadata: Metadata { + termination_labels: Vec::new(), + next_row_labels: HashMap::new(), + rewind_labels: Vec::new(), + aggregation_start_registers: HashMap::new(), + sorts: HashMap::new(), + left_joins: HashMap::new(), + }, + plan, } } - program.resolve_label(halt_label, program.offset()); - program.emit_insn(Insn::Halt); + fn prologue(&mut self) -> Result<(BranchOffset, BranchOffset, BranchOffset)> { + let init_label = self.program.allocate_label(); + let halt_label = self.program.allocate_label(); + self.metadata.termination_labels.push(halt_label); - program.resolve_label(init_label, program.offset()); - program.emit_insn(Insn::Transaction); + self.program.emit_insn_with_label_dependency( + Insn::Init { + target_pc: init_label, + }, + init_label, + ); - program.emit_constant_insns(); - program.emit_insn(Insn::Goto { - target_pc: start_offset, - }); + let start_offset = self.program.offset(); - program.resolve_deferred_labels(); - Ok(program.build(database_header)) + Ok((init_label, halt_label, start_offset)) + } + + fn epilogue( + &mut self, + init_label: BranchOffset, + halt_label: BranchOffset, + start_offset: BranchOffset, + ) -> Result<()> { + self.program + .resolve_label(halt_label, self.program.offset()); + self.program.emit_insn(Insn::Halt); + + self.program + .resolve_label(init_label, self.program.offset()); + self.program.emit_insn(Insn::Transaction); + + self.program.emit_constant_insns(); + self.program.emit_insn(Insn::Goto { + target_pc: start_offset, + }); + + self.program.resolve_deferred_labels(); + + Ok(()) + } + + fn build(self) -> Result { + Ok(self.program.build(self.database_header)) + } + + pub fn generate(mut self) -> Result { + let (init_label, halt_label, start_offset) = self.prologue()?; + + loop { + match self.plan.root_operator.step( + &mut self.program, + &mut self.metadata, + &self.plan.referenced_tables, + )? { + OpStepResult::Continue => {} + OpStepResult::ReadyToEmit => { + self.plan.root_operator.result_row( + &mut self.program, + &self.plan.referenced_tables, + &mut self.metadata, + None, + )?; + } + OpStepResult::Done => { + break; + } + } + } + + self.epilogue(init_label, halt_label, start_offset)?; + + self.build() + } +} + +pub fn emit_program(database_header: Rc>, plan: Plan) -> Result { + let generator = BytecodeGenerator::new(plan, database_header); + generator.generate() } fn table_columns( From b7fbe57ca7203c4d2fb75744b70e16ae7200d999 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sat, 17 Aug 2024 14:16:56 +0300 Subject: [PATCH 18/20] Move translate_table_columns to expr.rs --- core/translate/emitter.rs | 54 +++++++++++++-------------------------- core/translate/expr.rs | 30 ++++++++++++++++++++++ 2 files changed, 48 insertions(+), 36 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index e263a4ee0..9c45d952a 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -10,9 +10,9 @@ use crate::vdbe::builder::ProgramBuilder; use crate::vdbe::{BranchOffset, Insn, Program}; use crate::Result; -use super::expr::maybe_apply_affinity; use super::expr::{ - translate_aggregation, translate_condition_expr, translate_expr, ConditionMetadata, + translate_aggregation, translate_condition_expr, translate_expr, translate_table_columns, + ConditionMetadata, }; use super::plan::Plan; use super::plan::{Operator, ProjectionColumn}; @@ -647,7 +647,13 @@ impl Emitter for Operator { .. } => { let start_reg = program.alloc_registers(col_count); - table_columns(program, table, table_identifier, cursor_override, start_reg); + translate_table_columns( + program, + table, + table_identifier, + cursor_override, + start_reg, + ); Ok(start_reg) } @@ -677,7 +683,13 @@ impl Emitter for Operator { .. } => { let start_reg = program.alloc_registers(col_count); - table_columns(program, table, table_identifier, cursor_override, start_reg); + translate_table_columns( + program, + table, + table_identifier, + cursor_override, + start_reg, + ); Ok(start_reg) } @@ -726,7 +738,7 @@ impl Emitter for Operator { } ProjectionColumn::Star => { for (table, table_identifier) in referenced_tables.iter() { - cur_reg = table_columns( + cur_reg = translate_table_columns( program, table, table_identifier, @@ -740,7 +752,7 @@ impl Emitter for Operator { .iter() .find(|(_, id)| id == table_identifier) .unwrap(); - cur_reg = table_columns( + cur_reg = translate_table_columns( program, table, table_identifier, @@ -910,33 +922,3 @@ pub fn emit_program(database_header: Rc>, plan: Plan) -> let generator = BytecodeGenerator::new(plan, database_header); generator.generate() } - -fn table_columns( - program: &mut ProgramBuilder, - table: &Rc, - table_identifier: &str, - cursor_override: Option, - start_reg: usize, -) -> usize { - let mut cur_reg = start_reg; - let cursor_id = cursor_override.unwrap_or(program.resolve_cursor_id(table_identifier, None)); - for i in 0..table.columns.len() { - let is_rowid = table.column_is_rowid_alias(&table.columns[i]); - let col_type = &table.columns[i].ty; - if is_rowid { - program.emit_insn(Insn::RowId { - cursor_id, - dest: cur_reg, - }); - } else { - program.emit_insn(Insn::Column { - cursor_id, - column: i, - dest: cur_reg, - }); - } - maybe_apply_affinity(*col_type, cur_reg, program); - cur_reg += 1; - } - cur_reg -} diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 04ff37763..b0aa27273 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1306,6 +1306,36 @@ pub fn maybe_apply_affinity(col_type: Type, target_register: usize, program: &mu } } +pub fn translate_table_columns( + program: &mut ProgramBuilder, + table: &Rc, + table_identifier: &str, + cursor_override: Option, + start_reg: usize, +) -> usize { + let mut cur_reg = start_reg; + let cursor_id = cursor_override.unwrap_or(program.resolve_cursor_id(table_identifier, None)); + for i in 0..table.columns.len() { + let is_rowid = table.column_is_rowid_alias(&table.columns[i]); + let col_type = &table.columns[i].ty; + if is_rowid { + program.emit_insn(Insn::RowId { + cursor_id, + dest: cur_reg, + }); + } else { + program.emit_insn(Insn::Column { + cursor_id, + column: i, + dest: cur_reg, + }); + } + maybe_apply_affinity(*col_type, cur_reg, program); + cur_reg += 1; + } + cur_reg +} + pub fn translate_aggregation( program: &mut ProgramBuilder, referenced_tables: &[(Rc, String)], From 2b71a5802dde09d2e3dd1db4d11510787f727848 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sat, 17 Aug 2024 14:24:20 +0300 Subject: [PATCH 19/20] tweak --- core/translate/emitter.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 9c45d952a..107bfdb70 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -907,14 +907,11 @@ impl BytecodeGenerator { )?; } OpStepResult::Done => { - break; + self.epilogue(init_label, halt_label, start_offset)?; + return self.build(); } } } - - self.epilogue(init_label, halt_label, start_offset)?; - - self.build() } } From a79c0c5b34b07f12cb46b1797fcb4b0df1e30720 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sat, 17 Aug 2024 14:35:44 +0300 Subject: [PATCH 20/20] BytecodeGenerator struct was unnecessary --- core/translate/emitter.rs | 155 ++++++++++++++++---------------------- 1 file changed, 66 insertions(+), 89 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 107bfdb70..cac0aafe3 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -70,7 +70,7 @@ pub struct SortMetadata { pub done_label: BranchOffset, } -#[derive(Debug)] +#[derive(Debug, Default)] pub struct Metadata { // labels for the instructions that terminate the execution when a conditional check evaluates to false. typically jumps to Halt, but can also jump to AggFinal if a parent in the tree is an aggregation termination_labels: Vec, @@ -819,103 +819,80 @@ impl Emitter for Operator { } } -pub struct BytecodeGenerator { - program: ProgramBuilder, - database_header: Rc>, - metadata: Metadata, - plan: Plan, +fn prologue() -> Result<( + ProgramBuilder, + Metadata, + BranchOffset, + BranchOffset, + BranchOffset, +)> { + let mut program = ProgramBuilder::new(); + let init_label = program.allocate_label(); + let halt_label = program.allocate_label(); + + program.emit_insn_with_label_dependency( + Insn::Init { + target_pc: init_label, + }, + init_label, + ); + + let start_offset = program.offset(); + + let metadata = Metadata { + termination_labels: vec![halt_label], + ..Default::default() + }; + + Ok((program, metadata, init_label, halt_label, start_offset)) } -impl BytecodeGenerator { - pub fn new(plan: Plan, database_header: Rc>) -> Self { - Self { - program: ProgramBuilder::new(), - database_header, - metadata: Metadata { - termination_labels: Vec::new(), - next_row_labels: HashMap::new(), - rewind_labels: Vec::new(), - aggregation_start_registers: HashMap::new(), - sorts: HashMap::new(), - left_joins: HashMap::new(), - }, - plan, - } - } +fn epilogue( + program: &mut ProgramBuilder, + init_label: BranchOffset, + halt_label: BranchOffset, + start_offset: BranchOffset, +) -> Result<()> { + program.resolve_label(halt_label, program.offset()); + program.emit_insn(Insn::Halt); - fn prologue(&mut self) -> Result<(BranchOffset, BranchOffset, BranchOffset)> { - let init_label = self.program.allocate_label(); - let halt_label = self.program.allocate_label(); - self.metadata.termination_labels.push(halt_label); + program.resolve_label(init_label, program.offset()); + program.emit_insn(Insn::Transaction); - self.program.emit_insn_with_label_dependency( - Insn::Init { - target_pc: init_label, - }, - init_label, - ); + program.emit_constant_insns(); + program.emit_insn(Insn::Goto { + target_pc: start_offset, + }); - let start_offset = self.program.offset(); + program.resolve_deferred_labels(); - Ok((init_label, halt_label, start_offset)) - } + Ok(()) +} - fn epilogue( - &mut self, - init_label: BranchOffset, - halt_label: BranchOffset, - start_offset: BranchOffset, - ) -> Result<()> { - self.program - .resolve_label(halt_label, self.program.offset()); - self.program.emit_insn(Insn::Halt); +pub fn emit_program( + database_header: Rc>, + mut plan: Plan, +) -> Result { + let (mut program, mut metadata, init_label, halt_label, start_offset) = prologue()?; - self.program - .resolve_label(init_label, self.program.offset()); - self.program.emit_insn(Insn::Transaction); - - self.program.emit_constant_insns(); - self.program.emit_insn(Insn::Goto { - target_pc: start_offset, - }); - - self.program.resolve_deferred_labels(); - - Ok(()) - } - - fn build(self) -> Result { - Ok(self.program.build(self.database_header)) - } - - pub fn generate(mut self) -> Result { - let (init_label, halt_label, start_offset) = self.prologue()?; - - loop { - match self.plan.root_operator.step( - &mut self.program, - &mut self.metadata, - &self.plan.referenced_tables, - )? { - OpStepResult::Continue => {} - OpStepResult::ReadyToEmit => { - self.plan.root_operator.result_row( - &mut self.program, - &self.plan.referenced_tables, - &mut self.metadata, - None, - )?; - } - OpStepResult::Done => { - self.epilogue(init_label, halt_label, start_offset)?; - return self.build(); - } + loop { + match plan + .root_operator + .step(&mut program, &mut metadata, &plan.referenced_tables)? + { + OpStepResult::Continue => {} + OpStepResult::ReadyToEmit => { + plan.root_operator.result_row( + &mut program, + &plan.referenced_tables, + &mut metadata, + None, + )?; + } + OpStepResult::Done => { + epilogue(&mut program, init_label, halt_label, start_offset)?; + return Ok(program.build(database_header)); } } } } - -pub fn emit_program(database_header: Rc>, plan: Plan) -> Result { - let generator = BytecodeGenerator::new(plan, database_header); - generator.generate() -}