diff --git a/core/lib.rs b/core/lib.rs index b7344c260..bc97a7c7b 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -235,8 +235,8 @@ impl Connection { Cmd::ExplainQueryPlan(stmt) => { match stmt { ast::Stmt::Select(select) => { - let plan = prepare_select_plan(&self.schema.borrow(), select)?; - let (plan, _) = optimize_plan(plan)?; + let plan = prepare_select_plan(&*self.schema.borrow(), select)?; + let plan = optimize_plan(plan)?; println!("{}", plan); } _ => todo!(), diff --git a/core/schema.rs b/core/schema.rs index 7ebe249be..c60b8ff5f 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -90,7 +90,7 @@ impl Table { None => None, }, Table::Pseudo(table) => match table.columns.get(index) { - Some(column) => Some(&column.name), + Some(_) => None, None => None, }, } diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 5f3402389..8548b28b1 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -1,3 +1,6 @@ +// This module contains code for emitting bytecode instructions for SQL query execution. +// It handles translating high-level SQL operations into low-level bytecode that can be executed by the virtual machine. + use std::cell::RefCell; use std::collections::HashMap; use std::rc::{Rc, Weak}; @@ -6,7 +9,6 @@ use sqlite3_parser::ast; use crate::schema::{Column, PseudoTable, Table}; use crate::storage::sqlite3_ondisk::DatabaseHeader; -use crate::translate::expr::resolve_ident_pseudo_table; use crate::translate::plan::{IterationDirection, Search}; use crate::types::{OwnedRecord, OwnedValue}; use crate::vdbe::builder::ProgramBuilder; @@ -14,42 +16,14 @@ use crate::vdbe::{BranchOffset, Insn, Program}; use crate::{Connection, Result}; use super::expr::{ - translate_aggregation, translate_condition_expr, translate_expr, translate_table_columns, + translate_aggregation, translate_aggregation_groupby, translate_condition_expr, translate_expr, ConditionMetadata, }; -use super::optimizer::ExpressionResultCache; -use super::plan::{BTreeTableReference, Plan}; -use super::plan::{Operator, ProjectionColumn}; - -/** - * The Emitter trait is used to emit bytecode instructions for a given operator in the query plan. - * - * - step: perform a single step of the operator, emitting bytecode instructions as needed, - and returning a result indicating whether the operator is ready to emit a result row -*/ -pub trait Emitter { - fn step( - &mut self, - pb: &mut ProgramBuilder, - m: &mut Metadata, - referenced_tables: &[BTreeTableReference], - ) -> Result; - fn result_columns( - &self, - program: &mut ProgramBuilder, - referenced_tables: &[BTreeTableReference], - metadata: &mut Metadata, - cursor_override: Option<&SortCursorOverride>, - ) -> Result; - fn result_row( - &mut self, - program: &mut ProgramBuilder, - referenced_tables: &[BTreeTableReference], - metadata: &mut Metadata, - cursor_override: Option<&SortCursorOverride>, - ) -> Result<()>; -} +use super::optimizer::Optimizable; +use super::plan::{Aggregate, BTreeTableReference, Direction, Plan}; +use super::plan::{ResultSetColumn, SourceOperator}; +// Metadata for handling LEFT JOIN operations #[derive(Debug)] pub struct LeftJoinMetadata { // integer register that holds a flag that is set to true if the current row has a match for the left join @@ -62,20 +36,16 @@ pub struct LeftJoinMetadata { pub on_match_jump_to_label: BranchOffset, } +// Metadata for handling ORDER BY operations #[derive(Debug)] pub struct SortMetadata { // cursor id for the Sorter table where the sorted rows are stored pub sort_cursor: usize, - // cursor id for the Pseudo table where rows are temporarily inserted from the Sorter table - pub pseudo_table_cursor: usize, - // label where the SorterData instruction is emitted; SorterNext will jump here if there is more data to read - pub sorter_data_label: BranchOffset, - // label for the instruction immediately following SorterNext; SorterSort will jump here in case there is no data - pub done_label: BranchOffset, // register where the sorter data is inserted and later retrieved from pub sorter_data_register: usize, } +// Metadata for handling GROUP BY operations #[derive(Debug)] pub struct GroupByMetadata { // Cursor ID for the Sorter table where the grouped rows are stored @@ -90,12 +60,8 @@ pub struct GroupByMetadata { pub subroutine_accumulator_output_return_offset_register: usize, // Label for the instruction that sets the accumulator indicator to true (indicating data exists in the accumulator for the current group) pub accumulator_indicator_set_true_label: BranchOffset, - // Label for the instruction where SorterData is emitted (used for fetching sorted data) - pub sorter_data_label: BranchOffset, // Register holding the key used for sorting in the Sorter pub sorter_key_register: usize, - // Label for the instruction signaling the completion of grouping operations - pub grouping_done_label: BranchOffset, // Register holding a flag to abort the grouping process if necessary pub abort_flag_register: usize, // Register holding a boolean indicating whether there's data in the accumulator (used for aggregation) @@ -108,13 +74,6 @@ pub struct GroupByMetadata { pub group_exprs_comparison_register: usize, } -#[derive(Debug)] -pub struct SortCursorOverride { - pub cursor_id: usize, - pub pseudo_table: Table, - pub sort_key_len: usize, -} - /// The Metadata struct holds various information and labels used during bytecode generation. /// It is used for maintaining state and control flow during the bytecode /// generation process. @@ -126,1562 +85,26 @@ pub struct Metadata { // for example, in a join with two nested scans, the inner loop will jump to its Next instruction when the join condition is false; // in a join with a scan and a seek, the seek will jump to the scan's Next instruction when the join condition is false. next_row_labels: HashMap, - // labels for the Rewind instructions. + // labels for the instructions beginning the inner loop of a scan operator. scan_loop_body_labels: Vec, - // mapping between Aggregation operator id and the register that holds the start of the aggregation result - aggregation_start_registers: HashMap, - // mapping between Aggregation operator id and associated metadata (if the aggregation has a group by clause) - group_bys: HashMap, - // mapping between Order operator id and associated metadata - sorts: HashMap, + // metadata for the group by operator + group_by_metadata: Option, + // metadata for the order by operator + sort_metadata: Option, // mapping between Join operator id and associated metadata (for left joins only) left_joins: HashMap, - expr_result_cache: ExpressionResultCache, + // First register of the aggregation results + pub aggregation_start_register: Option, + // We need to emit result columns in the order they are present in the SELECT, but they may not be in the same order in the ORDER BY sorter. + // This vector holds the indexes of the result columns in the ORDER BY sorter. + pub result_column_indexes_in_orderby_sorter: HashMap, + // We might skip adding a SELECT result column into the ORDER BY sorter if it is an exact match in the ORDER BY keys. + // This vector holds the indexes of the result columns that we need to skip. + pub result_columns_to_skip_in_orderby_sorter: Option>, } -/// Emitters return one of three possible results from the step() method: -/// - Continue: the operator is not yet ready to emit a result row -/// - ReadyToEmit: the operator is ready to emit a result row -/// - Done: the operator has completed execution -/// For example, a Scan operator will return Continue until it has opened a cursor, rewound it and applied any predicates. -/// At that point, it will return ReadyToEmit. -/// Finally, when the Scan operator has emitted a Next instruction, it will return Done. -/// -/// Parent operators are free to make decisions based on the result a child operator's step() method. -/// -/// When the root operator of a Plan returns ReadyToEmit, a ResultRow will always be emitted. -/// When the root operator returns Done, the bytecode plan is complete. -#[derive(Debug, PartialEq)] -pub enum OpStepResult { - Continue, - ReadyToEmit, - Done, -} - -impl Emitter for Operator { - fn step( - &mut self, - program: &mut ProgramBuilder, - m: &mut Metadata, - referenced_tables: &[BTreeTableReference], - ) -> Result { - let current_operator_column_count = self.column_count(referenced_tables); - match self { - Operator::Scan { - table_reference, - id, - step, - predicates, - iter_dir, - } => { - *step += 1; - const SCAN_OPEN_READ: usize = 1; - const SCAN_BODY: usize = 2; - const SCAN_NEXT: usize = 3; - let reverse = iter_dir - .as_ref() - .is_some_and(|iter_dir| *iter_dir == IterationDirection::Backwards); - match *step { - SCAN_OPEN_READ => { - let cursor_id = program.alloc_cursor_id( - Some(table_reference.table_identifier.clone()), - Some(Table::BTree(table_reference.table.clone())), - ); - let root_page = table_reference.table.root_page; - let next_row_label = program.allocate_label(); - m.next_row_labels.insert(*id, next_row_label); - program.emit_insn(Insn::OpenReadAsync { - cursor_id, - root_page, - }); - program.emit_insn(Insn::OpenReadAwait); - - Ok(OpStepResult::Continue) - } - SCAN_BODY => { - let cursor_id = - program.resolve_cursor_id(&table_reference.table_identifier, None); - if reverse { - program.emit_insn(Insn::LastAsync { cursor_id }); - } else { - program.emit_insn(Insn::RewindAsync { cursor_id }); - } - let scan_loop_body_label = program.allocate_label(); - let halt_label = m.termination_label_stack.last().unwrap(); - program.emit_insn_with_label_dependency( - if reverse { - Insn::LastAwait { - cursor_id, - pc_if_empty: *halt_label, - } - } else { - Insn::RewindAwait { - cursor_id, - pc_if_empty: *halt_label, - } - }, - *halt_label, - ); - m.scan_loop_body_labels.push(scan_loop_body_label); - program.defer_label_resolution( - scan_loop_body_label, - program.offset() as usize, - ); - - let jump_label = m.next_row_labels.get(id).unwrap_or(halt_label); - if let Some(preds) = predicates { - for expr in preds { - let jump_target_when_true = program.allocate_label(); - let condition_metadata = ConditionMetadata { - jump_if_condition_is_true: false, - jump_target_when_true, - jump_target_when_false: *jump_label, - }; - translate_condition_expr( - program, - referenced_tables, - expr, - None, - condition_metadata, - )?; - program.resolve_label(jump_target_when_true, program.offset()); - } - } - - Ok(OpStepResult::ReadyToEmit) - } - SCAN_NEXT => { - let cursor_id = - program.resolve_cursor_id(&table_reference.table_identifier, None); - program - .resolve_label(*m.next_row_labels.get(id).unwrap(), program.offset()); - if reverse { - program.emit_insn(Insn::PrevAsync { cursor_id }); - } else { - program.emit_insn(Insn::NextAsync { cursor_id }); - } - let jump_label = m.scan_loop_body_labels.pop().unwrap(); - - if reverse { - program.emit_insn_with_label_dependency( - Insn::PrevAwait { - cursor_id, - pc_if_next: jump_label, - }, - jump_label, - ); - } else { - program.emit_insn_with_label_dependency( - Insn::NextAwait { - cursor_id, - pc_if_next: jump_label, - }, - jump_label, - ); - } - Ok(OpStepResult::Done) - } - _ => Ok(OpStepResult::Done), - } - } - Operator::Search { - table_reference, - search, - predicates, - step, - id, - .. - } => { - *step += 1; - const SEARCH_OPEN_READ: usize = 1; - const SEARCH_BODY: usize = 2; - const SEARCH_NEXT: usize = 3; - match *step { - SEARCH_OPEN_READ => { - let table_cursor_id = program.alloc_cursor_id( - Some(table_reference.table_identifier.clone()), - Some(Table::BTree(table_reference.table.clone())), - ); - - let next_row_label = program.allocate_label(); - - if !matches!(search, Search::PrimaryKeyEq { .. }) { - // Primary key equality search is handled with a SeekRowid instruction which does not loop, since it is a single row lookup. - m.next_row_labels.insert(*id, next_row_label); - } - - let scan_loop_body_label = program.allocate_label(); - m.scan_loop_body_labels.push(scan_loop_body_label); - program.emit_insn(Insn::OpenReadAsync { - cursor_id: table_cursor_id, - root_page: table_reference.table.root_page, - }); - program.emit_insn(Insn::OpenReadAwait); - - if let Search::IndexSearch { index, .. } = search { - let index_cursor_id = program.alloc_cursor_id( - Some(index.name.clone()), - Some(Table::Index(index.clone())), - ); - program.emit_insn(Insn::OpenReadAsync { - cursor_id: index_cursor_id, - root_page: index.root_page, - }); - program.emit_insn(Insn::OpenReadAwait); - } - Ok(OpStepResult::Continue) - } - SEARCH_BODY => { - let table_cursor_id = - program.resolve_cursor_id(&table_reference.table_identifier, None); - - // Open the loop for the index search. - // Primary key equality search is handled with a SeekRowid instruction which does not loop, since it is a single row lookup. - if !matches!(search, Search::PrimaryKeyEq { .. }) { - let index_cursor_id = if let Search::IndexSearch { index, .. } = search - { - Some(program.resolve_cursor_id(&index.name, None)) - } else { - None - }; - let scan_loop_body_label = *m.scan_loop_body_labels.last().unwrap(); - let cmp_reg = program.alloc_register(); - let (cmp_expr, cmp_op) = match search { - Search::IndexSearch { - cmp_expr, cmp_op, .. - } => (cmp_expr, cmp_op), - Search::PrimaryKeySearch { cmp_expr, cmp_op } => (cmp_expr, cmp_op), - Search::PrimaryKeyEq { .. } => unreachable!(), - }; - // TODO this only handles ascending indexes - match cmp_op { - ast::Operator::Equals - | ast::Operator::Greater - | ast::Operator::GreaterEquals => { - translate_expr( - program, - Some(referenced_tables), - cmp_expr, - cmp_reg, - None, - None, - )?; - } - ast::Operator::Less | ast::Operator::LessEquals => { - program.emit_insn(Insn::Null { - dest: cmp_reg, - dest_end: None, - }); - } - _ => unreachable!(), - } - program.emit_insn_with_label_dependency( - match cmp_op { - ast::Operator::Equals | ast::Operator::GreaterEquals => { - Insn::SeekGE { - is_index: index_cursor_id.is_some(), - cursor_id: index_cursor_id.unwrap_or(table_cursor_id), - start_reg: cmp_reg, - num_regs: 1, - target_pc: *m.termination_label_stack.last().unwrap(), - } - } - ast::Operator::Greater - | ast::Operator::Less - | ast::Operator::LessEquals => Insn::SeekGT { - is_index: index_cursor_id.is_some(), - cursor_id: index_cursor_id.unwrap_or(table_cursor_id), - start_reg: cmp_reg, - num_regs: 1, - target_pc: *m.termination_label_stack.last().unwrap(), - }, - _ => unreachable!(), - }, - *m.termination_label_stack.last().unwrap(), - ); - if *cmp_op == ast::Operator::Less - || *cmp_op == ast::Operator::LessEquals - { - translate_expr( - program, - Some(referenced_tables), - cmp_expr, - cmp_reg, - None, - None, - )?; - } - - program.defer_label_resolution( - scan_loop_body_label, - program.offset() as usize, - ); - // TODO: We are currently only handling ascending indexes. - // For conditions like index_key > 10, we have already seeked to the first key greater than 10, and can just scan forward. - // For conditions like index_key < 10, we are at the beginning of the index, and will scan forward and emit IdxGE(10) with a conditional jump to the end. - // For conditions like index_key = 10, we have already seeked to the first key greater than or equal to 10, and can just scan forward and emit IdxGT(10) with a conditional jump to the end. - // For conditions like index_key >= 10, we have already seeked to the first key greater than or equal to 10, and can just scan forward. - // For conditions like index_key <= 10, we are at the beginning of the index, and will scan forward and emit IdxGT(10) with a conditional jump to the end. - // For conditions like index_key != 10, TODO. probably the optimal way is not to use an index at all. - // - // For primary key searches we emit RowId and then compare it to the seek value. - - let abort_jump_target = *m - .next_row_labels - .get(id) - .unwrap_or(m.termination_label_stack.last().unwrap()); - match cmp_op { - ast::Operator::Equals | ast::Operator::LessEquals => { - if let Some(index_cursor_id) = index_cursor_id { - program.emit_insn_with_label_dependency( - Insn::IdxGT { - cursor_id: index_cursor_id, - start_reg: cmp_reg, - num_regs: 1, - target_pc: abort_jump_target, - }, - abort_jump_target, - ); - } else { - let rowid_reg = program.alloc_register(); - program.emit_insn(Insn::RowId { - cursor_id: table_cursor_id, - dest: rowid_reg, - }); - program.emit_insn_with_label_dependency( - Insn::Gt { - lhs: rowid_reg, - rhs: cmp_reg, - target_pc: abort_jump_target, - }, - abort_jump_target, - ); - } - } - ast::Operator::Less => { - if let Some(index_cursor_id) = index_cursor_id { - program.emit_insn_with_label_dependency( - Insn::IdxGE { - cursor_id: index_cursor_id, - start_reg: cmp_reg, - num_regs: 1, - target_pc: abort_jump_target, - }, - abort_jump_target, - ); - } else { - let rowid_reg = program.alloc_register(); - program.emit_insn(Insn::RowId { - cursor_id: table_cursor_id, - dest: rowid_reg, - }); - program.emit_insn_with_label_dependency( - Insn::Ge { - lhs: rowid_reg, - rhs: cmp_reg, - target_pc: abort_jump_target, - }, - abort_jump_target, - ); - } - } - _ => {} - } - - if let Some(index_cursor_id) = index_cursor_id { - program.emit_insn(Insn::DeferredSeek { - index_cursor_id, - table_cursor_id, - }); - } - } - - let jump_label = m - .next_row_labels - .get(id) - .unwrap_or(m.termination_label_stack.last().unwrap()); - - if let Search::PrimaryKeyEq { cmp_expr } = search { - let src_reg = program.alloc_register(); - translate_expr( - program, - Some(referenced_tables), - cmp_expr, - src_reg, - None, - None, - )?; - program.emit_insn_with_label_dependency( - Insn::SeekRowid { - cursor_id: table_cursor_id, - src_reg, - target_pc: *jump_label, - }, - *jump_label, - ); - } - if let Some(predicates) = predicates { - for predicate in predicates.iter() { - let jump_target_when_true = program.allocate_label(); - let condition_metadata = ConditionMetadata { - jump_if_condition_is_true: false, - jump_target_when_true, - jump_target_when_false: *jump_label, - }; - translate_condition_expr( - program, - referenced_tables, - predicate, - None, - condition_metadata, - )?; - program.resolve_label(jump_target_when_true, program.offset()); - } - } - - Ok(OpStepResult::ReadyToEmit) - } - SEARCH_NEXT => { - if matches!(search, Search::PrimaryKeyEq { .. }) { - // Primary key equality search is handled with a SeekRowid instruction which does not loop, so there is no need to emit a NextAsync instruction. - return Ok(OpStepResult::Done); - } - let cursor_id = match search { - Search::IndexSearch { index, .. } => { - program.resolve_cursor_id(&index.name, None) - } - Search::PrimaryKeySearch { .. } => { - program.resolve_cursor_id(&table_reference.table_identifier, None) - } - Search::PrimaryKeyEq { .. } => unreachable!(), - }; - program - .resolve_label(*m.next_row_labels.get(id).unwrap(), program.offset()); - program.emit_insn(Insn::NextAsync { cursor_id }); - let jump_label = m.scan_loop_body_labels.pop().unwrap(); - program.emit_insn_with_label_dependency( - Insn::NextAwait { - cursor_id, - pc_if_next: jump_label, - }, - jump_label, - ); - Ok(OpStepResult::Done) - } - _ => Ok(OpStepResult::Done), - } - } - Operator::Join { - left, - right, - outer, - predicates, - step, - id, - .. - } => { - *step += 1; - const JOIN_INIT: usize = 1; - const JOIN_DO_JOIN: usize = 2; - const JOIN_END: usize = 3; - match *step { - JOIN_INIT => { - if *outer { - let lj_metadata = LeftJoinMetadata { - match_flag_register: program.alloc_register(), - set_match_flag_true_label: program.allocate_label(), - check_match_flag_label: program.allocate_label(), - on_match_jump_to_label: program.allocate_label(), - }; - m.left_joins.insert(*id, lj_metadata); - } - left.step(program, m, referenced_tables)?; - right.step(program, m, referenced_tables)?; - - Ok(OpStepResult::Continue) - } - JOIN_DO_JOIN => { - left.step(program, m, referenced_tables)?; - - let mut jump_target_when_false = *m - .next_row_labels - .get(&right.id()) - .or(m.next_row_labels.get(&left.id())) - .unwrap_or(m.termination_label_stack.last().unwrap()); - - if *outer { - let lj_meta = m.left_joins.get(id).unwrap(); - program.emit_insn(Insn::Integer { - value: 0, - dest: lj_meta.match_flag_register, - }); - jump_target_when_false = lj_meta.check_match_flag_label; - } - m.next_row_labels.insert(right.id(), jump_target_when_false); - - right.step(program, m, referenced_tables)?; - - if let Some(predicates) = predicates { - let jump_target_when_true = program.allocate_label(); - let condition_metadata = ConditionMetadata { - jump_if_condition_is_true: false, - jump_target_when_true, - jump_target_when_false, - }; - for predicate in predicates.iter() { - translate_condition_expr( - program, - referenced_tables, - predicate, - None, - condition_metadata, - )?; - } - program.resolve_label(jump_target_when_true, program.offset()); - } - - if *outer { - let lj_meta = m.left_joins.get(id).unwrap(); - program.defer_label_resolution( - lj_meta.set_match_flag_true_label, - program.offset() as usize, - ); - program.emit_insn(Insn::Integer { - value: 1, - dest: lj_meta.match_flag_register, - }); - } - - Ok(OpStepResult::ReadyToEmit) - } - JOIN_END => { - right.step(program, m, referenced_tables)?; - - if *outer { - let lj_meta = m.left_joins.get(id).unwrap(); - // If the left join match flag has been set to 1, we jump to the next row on the outer table (result row has been emitted already) - program.resolve_label(lj_meta.check_match_flag_label, program.offset()); - program.emit_insn_with_label_dependency( - Insn::IfPos { - reg: lj_meta.match_flag_register, - target_pc: lj_meta.on_match_jump_to_label, - decrement_by: 0, - }, - lj_meta.on_match_jump_to_label, - ); - // If not, we set the right table cursor's "pseudo null bit" on, which means any Insn::Column will return NULL - let right_cursor_id = match right.as_ref() { - Operator::Scan { - table_reference, .. - } => program - .resolve_cursor_id(&table_reference.table_identifier, None), - Operator::Search { - table_reference, .. - } => program - .resolve_cursor_id(&table_reference.table_identifier, None), - _ => unreachable!(), - }; - program.emit_insn(Insn::NullRow { - cursor_id: right_cursor_id, - }); - // Jump to setting the left join match flag to 1 again, but this time the right table cursor will set everything to null - program.emit_insn_with_label_dependency( - Insn::Goto { - target_pc: lj_meta.set_match_flag_true_label, - }, - lj_meta.set_match_flag_true_label, - ); - } - let next_row_label = if *outer { - m.left_joins.get(id).unwrap().on_match_jump_to_label - } else { - *m.next_row_labels.get(&right.id()).unwrap() - }; - // This points to the NextAsync instruction of the left table - program.resolve_label(next_row_label, program.offset()); - left.step(program, m, referenced_tables)?; - - Ok(OpStepResult::Done) - } - _ => Ok(OpStepResult::Done), - } - } - Operator::Aggregate { - id, - source, - aggregates, - group_by, - step, - .. - } => { - *step += 1; - - // Group by aggregation eg. SELECT a, b, sum(c) FROM t GROUP BY a, b - if let Some(group_by) = group_by { - const GROUP_BY_INIT: usize = 1; - const GROUP_BY_INSERT_INTO_SORTER: usize = 2; - const GROUP_BY_SORT_AND_COMPARE: usize = 3; - const GROUP_BY_PREPARE_ROW: usize = 4; - const GROUP_BY_CLEAR_ACCUMULATOR_SUBROUTINE: usize = 5; - match *step { - GROUP_BY_INIT => { - let agg_final_label = program.allocate_label(); - m.termination_label_stack.push(agg_final_label); - let num_aggs = aggregates.len(); - - let sort_cursor = program.alloc_cursor_id(None, None); - - let abort_flag_register = program.alloc_register(); - let data_in_accumulator_indicator_register = program.alloc_register(); - let group_exprs_comparison_register = - program.alloc_registers(group_by.len()); - let group_exprs_accumulator_register = - program.alloc_registers(group_by.len()); - let agg_exprs_start_reg = program.alloc_registers(num_aggs); - m.aggregation_start_registers - .insert(*id, agg_exprs_start_reg); - let sorter_key_register = program.alloc_register(); - - let subroutine_accumulator_clear_label = program.allocate_label(); - let subroutine_accumulator_output_label = program.allocate_label(); - let sorter_data_label = program.allocate_label(); - let grouping_done_label = program.allocate_label(); - - let mut order = Vec::new(); - const ASCENDING: i64 = 0; - for _ in group_by.iter() { - order.push(OwnedValue::Integer(ASCENDING)); - } - program.emit_insn(Insn::SorterOpen { - cursor_id: sort_cursor, - columns: current_operator_column_count, - order: OwnedRecord::new(order), - }); - - program.add_comment(program.offset(), "clear group by abort flag"); - program.emit_insn(Insn::Integer { - value: 0, - dest: abort_flag_register, - }); - - program.add_comment( - program.offset(), - "initialize group by comparison registers to NULL", - ); - program.emit_insn(Insn::Null { - dest: group_exprs_comparison_register, - dest_end: if group_by.len() > 1 { - Some(group_exprs_comparison_register + group_by.len() - 1) - } else { - None - }, - }); - - program.add_comment( - program.offset(), - "go to clear accumulator subroutine", - ); - - let subroutine_accumulator_clear_return_offset_register = - program.alloc_register(); - program.emit_insn_with_label_dependency( - Insn::Gosub { - target_pc: subroutine_accumulator_clear_label, - return_reg: subroutine_accumulator_clear_return_offset_register, - }, - subroutine_accumulator_clear_label, - ); - - m.group_bys.insert( - *id, - GroupByMetadata { - sort_cursor, - subroutine_accumulator_clear_label, - subroutine_accumulator_clear_return_offset_register, - subroutine_accumulator_output_label, - subroutine_accumulator_output_return_offset_register: program - .alloc_register(), - accumulator_indicator_set_true_label: program.allocate_label(), - sorter_data_label, - grouping_done_label, - abort_flag_register, - data_in_accumulator_indicator_register, - group_exprs_accumulator_register, - group_exprs_comparison_register, - sorter_key_register, - }, - ); - - loop { - match source.step(program, m, referenced_tables)? { - OpStepResult::Continue => continue, - OpStepResult::ReadyToEmit => { - return Ok(OpStepResult::Continue); - } - OpStepResult::Done => { - return Ok(OpStepResult::Done); - } - } - } - } - GROUP_BY_INSERT_INTO_SORTER => { - let sort_keys_count = group_by.len(); - let start_reg = program.alloc_registers(current_operator_column_count); - for (i, expr) in group_by.iter().enumerate() { - let key_reg = start_reg + i; - translate_expr( - program, - Some(referenced_tables), - expr, - key_reg, - None, - None, - )?; - } - for (i, agg) in aggregates.iter().enumerate() { - // TODO it's a hack to assume aggregate functions have exactly one argument. - // Counterpoint e.g. GROUP_CONCAT(expr, separator). - // - // Here we are collecting scalars for the group by sorter, which will include - // both the group by expressions and the aggregate arguments. - // e.g. in `select u.first_name, sum(u.age) from users group by u.first_name` - // the sorter will have two scalars: u.first_name and u.age. - // these are then sorted by u.first_name, and for each u.first_name, we sum the u.age. - // the actual aggregation is done later in GROUP_BY_SORT_AND_COMPARE below. - // - // This is why we take the first argument of each aggregate function currently. - // It's mostly an artifact of the current architecture being a bit poor; we should recognize - // which scalars are dependencies of aggregate functions and explicitly collect those. - let expr = &agg.args[0]; - let agg_reg = start_reg + sort_keys_count + i; - translate_expr( - program, - Some(referenced_tables), - expr, - agg_reg, - None, - None, - )?; - } - - let group_by_metadata = m.group_bys.get(id).unwrap(); - - program.emit_insn(Insn::MakeRecord { - start_reg, - count: current_operator_column_count, - dest_reg: group_by_metadata.sorter_key_register, - }); - - let group_by_metadata = m.group_bys.get(id).unwrap(); - program.emit_insn(Insn::SorterInsert { - cursor_id: group_by_metadata.sort_cursor, - record_reg: group_by_metadata.sorter_key_register, - }); - - return Ok(OpStepResult::Continue); - } - #[allow(clippy::never_loop)] - GROUP_BY_SORT_AND_COMPARE => { - loop { - match source.step(program, m, referenced_tables)? { - OpStepResult::Done => { - break; - } - _ => unreachable!(), - } - } - - let group_by_metadata = m.group_bys.get_mut(id).unwrap(); - - let GroupByMetadata { - group_exprs_comparison_register: comparison_register, - subroutine_accumulator_output_return_offset_register, - subroutine_accumulator_output_label, - subroutine_accumulator_clear_return_offset_register, - subroutine_accumulator_clear_label, - data_in_accumulator_indicator_register, - accumulator_indicator_set_true_label, - group_exprs_accumulator_register: group_exprs_start_register, - abort_flag_register, - sorter_key_register, - .. - } = *group_by_metadata; - let halt_label = *m.termination_label_stack.first().unwrap(); - - let mut column_names = - Vec::with_capacity(current_operator_column_count); - for expr in group_by - .iter() - .chain(aggregates.iter().map(|agg| &agg.args[0])) - // FIXME: just blindly taking the first arg is a hack - { - // Sorter column names for group by are now just determined by stringifying the expression, since the group by - // columns and aggregations can be practically anything. - // FIXME: either come up with something more robust, or make this something like expr.to_canonical_string() so that we can handle - // things like `count(1)` and `COUNT(1)` the same way - column_names.push(expr.to_string()); - } - let pseudo_columns = column_names - .iter() - .map(|name| Column { - name: name.clone(), - primary_key: false, - ty: crate::schema::Type::Null, - }) - .collect::>(); - - let pseudo_table = Rc::new(PseudoTable { - columns: pseudo_columns, - }); - - let pseudo_cursor = program - .alloc_cursor_id(None, Some(Table::Pseudo(pseudo_table.clone()))); - - program.emit_insn(Insn::OpenPseudo { - cursor_id: pseudo_cursor, - content_reg: sorter_key_register, - num_fields: current_operator_column_count, - }); - - let group_by_metadata = m.group_bys.get(id).unwrap(); - program.emit_insn_with_label_dependency( - Insn::SorterSort { - cursor_id: group_by_metadata.sort_cursor, - pc_if_empty: group_by_metadata.grouping_done_label, - }, - group_by_metadata.grouping_done_label, - ); - - program.defer_label_resolution( - group_by_metadata.sorter_data_label, - program.offset() as usize, - ); - program.emit_insn(Insn::SorterData { - cursor_id: group_by_metadata.sort_cursor, - dest_reg: group_by_metadata.sorter_key_register, - pseudo_cursor, - }); - - let groups_start_reg = program.alloc_registers(group_by.len()); - for (i, expr) in group_by.iter().enumerate() { - let sorter_column_index = - resolve_ident_pseudo_table(&expr.to_string(), &pseudo_table)?; - let group_reg = groups_start_reg + i; - program.emit_insn(Insn::Column { - cursor_id: pseudo_cursor, - column: sorter_column_index, - dest: group_reg, - }); - } - - program.emit_insn(Insn::Compare { - start_reg_a: comparison_register, - start_reg_b: groups_start_reg, - count: group_by.len(), - }); - - let agg_step_label = program.allocate_label(); - - program.add_comment( - program.offset(), - "start new group if comparison is not equal", - ); - program.emit_insn_with_label_dependency( - Insn::Jump { - target_pc_lt: program.offset() + 1, - target_pc_eq: agg_step_label, - target_pc_gt: program.offset() + 1, - }, - agg_step_label, - ); - - program.emit_insn(Insn::Move { - source_reg: groups_start_reg, - dest_reg: comparison_register, - count: group_by.len(), - }); - - program.add_comment( - program.offset(), - "check if ended group had data, and output if so", - ); - program.emit_insn_with_label_dependency( - Insn::Gosub { - target_pc: subroutine_accumulator_output_label, - return_reg: - subroutine_accumulator_output_return_offset_register, - }, - subroutine_accumulator_output_label, - ); - - program.add_comment(program.offset(), "check abort flag"); - program.emit_insn_with_label_dependency( - Insn::IfPos { - reg: abort_flag_register, - target_pc: halt_label, - decrement_by: 0, - }, - m.termination_label_stack[0], - ); - - program - .add_comment(program.offset(), "goto clear accumulator subroutine"); - program.emit_insn_with_label_dependency( - Insn::Gosub { - target_pc: subroutine_accumulator_clear_label, - return_reg: subroutine_accumulator_clear_return_offset_register, - }, - subroutine_accumulator_clear_label, - ); - - program.resolve_label(agg_step_label, program.offset()); - let start_reg = m.aggregation_start_registers.get(id).unwrap(); - for (i, agg) in aggregates.iter().enumerate() { - let agg_result_reg = start_reg + i; - translate_aggregation( - program, - referenced_tables, - agg, - agg_result_reg, - Some(pseudo_cursor), - )?; - } - - program.add_comment( - program.offset(), - "don't emit group columns if continuing existing group", - ); - program.emit_insn_with_label_dependency( - Insn::If { - target_pc: accumulator_indicator_set_true_label, - reg: data_in_accumulator_indicator_register, - null_reg: 0, // unused in this case - }, - accumulator_indicator_set_true_label, - ); - - for (i, expr) in group_by.iter().enumerate() { - let key_reg = group_exprs_start_register + i; - let sorter_column_index = - resolve_ident_pseudo_table(&expr.to_string(), &pseudo_table)?; - program.emit_insn(Insn::Column { - cursor_id: pseudo_cursor, - column: sorter_column_index, - dest: key_reg, - }); - } - - program.resolve_label( - accumulator_indicator_set_true_label, - program.offset(), - ); - program.add_comment(program.offset(), "indicate data in accumulator"); - program.emit_insn(Insn::Integer { - value: 1, - dest: data_in_accumulator_indicator_register, - }); - - return Ok(OpStepResult::Continue); - } - GROUP_BY_PREPARE_ROW => { - let group_by_metadata = m.group_bys.get(id).unwrap(); - program.emit_insn_with_label_dependency( - Insn::SorterNext { - cursor_id: group_by_metadata.sort_cursor, - pc_if_next: group_by_metadata.sorter_data_label, - }, - group_by_metadata.sorter_data_label, - ); - - program.resolve_label( - group_by_metadata.grouping_done_label, - program.offset(), - ); - - program.add_comment(program.offset(), "emit row for final group"); - program.emit_insn_with_label_dependency( - Insn::Gosub { - target_pc: group_by_metadata - .subroutine_accumulator_output_label, - return_reg: group_by_metadata - .subroutine_accumulator_output_return_offset_register, - }, - group_by_metadata.subroutine_accumulator_output_label, - ); - - program.add_comment(program.offset(), "group by finished"); - let termination_label = - m.termination_label_stack[m.termination_label_stack.len() - 2]; - program.emit_insn_with_label_dependency( - Insn::Goto { - target_pc: termination_label, - }, - termination_label, - ); - program.emit_insn(Insn::Integer { - value: 1, - dest: group_by_metadata.abort_flag_register, - }); - program.emit_insn(Insn::Return { - return_reg: group_by_metadata - .subroutine_accumulator_output_return_offset_register, - }); - - program.resolve_label( - group_by_metadata.subroutine_accumulator_output_label, - program.offset(), - ); - - program.add_comment( - program.offset(), - "output group by row subroutine start", - ); - let termination_label = *m.termination_label_stack.last().unwrap(); - program.emit_insn_with_label_dependency( - Insn::IfPos { - reg: group_by_metadata.data_in_accumulator_indicator_register, - target_pc: termination_label, - decrement_by: 0, - }, - termination_label, - ); - program.emit_insn(Insn::Return { - return_reg: group_by_metadata - .subroutine_accumulator_output_return_offset_register, - }); - - return Ok(OpStepResult::ReadyToEmit); - } - GROUP_BY_CLEAR_ACCUMULATOR_SUBROUTINE => { - let group_by_metadata = m.group_bys.get(id).unwrap(); - program.emit_insn(Insn::Return { - return_reg: group_by_metadata - .subroutine_accumulator_output_return_offset_register, - }); - - program.add_comment( - program.offset(), - "clear accumulator subroutine start", - ); - program.resolve_label( - group_by_metadata.subroutine_accumulator_clear_label, - program.offset(), - ); - let start_reg = group_by_metadata.group_exprs_accumulator_register; - program.emit_insn(Insn::Null { - dest: start_reg, - dest_end: Some(start_reg + group_by.len() + aggregates.len() - 1), - }); - - program.emit_insn(Insn::Integer { - value: 0, - dest: group_by_metadata.data_in_accumulator_indicator_register, - }); - program.emit_insn(Insn::Return { - return_reg: group_by_metadata - .subroutine_accumulator_clear_return_offset_register, - }); - } - _ => { - return Ok(OpStepResult::Done); - } - } - } - - // Non-grouped aggregation e.g. SELECT COUNT(*) FROM t - - const AGGREGATE_INIT: usize = 1; - const AGGREGATE_WAIT_UNTIL_SOURCE_READY: usize = 2; - match *step { - AGGREGATE_INIT => { - let agg_final_label = program.allocate_label(); - m.termination_label_stack.push(agg_final_label); - let num_aggs = aggregates.len(); - let start_reg = program.alloc_registers(num_aggs); - m.aggregation_start_registers.insert(*id, start_reg); - - Ok(OpStepResult::Continue) - } - AGGREGATE_WAIT_UNTIL_SOURCE_READY => loop { - match source.step(program, m, referenced_tables)? { - OpStepResult::Continue => {} - OpStepResult::ReadyToEmit => { - let start_reg = m.aggregation_start_registers.get(id).unwrap(); - for (i, agg) in aggregates.iter().enumerate() { - let agg_result_reg = start_reg + i; - translate_aggregation( - program, - referenced_tables, - agg, - agg_result_reg, - None, - )?; - } - } - OpStepResult::Done => { - return Ok(OpStepResult::ReadyToEmit); - } - } - }, - _ => Ok(OpStepResult::Done), - } - } - Operator::Filter { .. } => unreachable!("predicates have been pushed down"), - Operator::Limit { source, step, .. } => { - *step += 1; - loop { - match source.step(program, m, referenced_tables)? { - OpStepResult::Continue => continue, - OpStepResult::ReadyToEmit => { - return Ok(OpStepResult::ReadyToEmit); - } - OpStepResult::Done => return Ok(OpStepResult::Done), - } - } - } - Operator::Order { - id, - source, - key, - step, - } => { - *step += 1; - const ORDER_INIT: usize = 1; - const ORDER_INSERT_INTO_SORTER: usize = 2; - const ORDER_SORT_AND_OPEN_LOOP: usize = 3; - const ORDER_NEXT: usize = 4; - match *step { - ORDER_INIT => { - m.termination_label_stack.push(program.allocate_label()); - let sort_cursor = program.alloc_cursor_id(None, None); - m.sorts.insert( - *id, - SortMetadata { - sort_cursor, - pseudo_table_cursor: usize::MAX, // will be set later - sorter_data_register: program.alloc_register(), - sorter_data_label: program.allocate_label(), - done_label: program.allocate_label(), - }, - ); - let mut order = Vec::new(); - for (_, direction) in key.iter() { - order.push(OwnedValue::Integer(*direction as i64)); - } - program.emit_insn(Insn::SorterOpen { - cursor_id: sort_cursor, - columns: key.len(), - order: OwnedRecord::new(order), - }); - - loop { - match source.step(program, m, referenced_tables)? { - OpStepResult::Continue => continue, - OpStepResult::ReadyToEmit => { - return Ok(OpStepResult::Continue); - } - OpStepResult::Done => { - return Ok(OpStepResult::Done); - } - } - } - } - ORDER_INSERT_INTO_SORTER => { - let sort_keys_count = key.len(); - let source_cols_count = source.column_count(referenced_tables); - let start_reg = program.alloc_registers(sort_keys_count); - source.result_columns(program, referenced_tables, m, None)?; - - for (i, (expr, _)) in key.iter().enumerate() { - let key_reg = start_reg + i; - translate_expr( - program, - Some(referenced_tables), - expr, - key_reg, - None, - m.expr_result_cache - .get_cached_result_registers(*id, i) - .as_ref(), - )?; - } - - let sort_metadata = m.sorts.get_mut(id).unwrap(); - program.emit_insn(Insn::MakeRecord { - start_reg, - count: sort_keys_count + source_cols_count, - dest_reg: sort_metadata.sorter_data_register, - }); - - program.emit_insn(Insn::SorterInsert { - cursor_id: sort_metadata.sort_cursor, - record_reg: sort_metadata.sorter_data_register, - }); - - Ok(OpStepResult::Continue) - } - #[allow(clippy::never_loop)] - ORDER_SORT_AND_OPEN_LOOP => { - loop { - match source.step(program, m, referenced_tables)? { - OpStepResult::Done => { - break; - } - _ => unreachable!(), - } - } - program.resolve_label( - m.termination_label_stack.pop().unwrap(), - program.offset(), - ); - let column_names = source.column_names(); - let mut pseudo_columns = vec![]; - for (i, _) in key.iter().enumerate() { - pseudo_columns.push(Column { - name: format!("sort_key_{}", i), - primary_key: false, - ty: crate::schema::Type::Null, - }); - } - for name in column_names { - pseudo_columns.push(Column { - name: name.clone(), - primary_key: false, - ty: crate::schema::Type::Null, - }); - } - - let num_fields = pseudo_columns.len(); - - let pseudo_cursor = program.alloc_cursor_id( - None, - Some(Table::Pseudo(Rc::new(PseudoTable { - columns: pseudo_columns, - }))), - ); - let sort_metadata = m.sorts.get(id).unwrap(); - - program.emit_insn(Insn::OpenPseudo { - cursor_id: pseudo_cursor, - content_reg: sort_metadata.sorter_data_register, - num_fields, - }); - - program.emit_insn_with_label_dependency( - Insn::SorterSort { - cursor_id: sort_metadata.sort_cursor, - pc_if_empty: sort_metadata.done_label, - }, - sort_metadata.done_label, - ); - - program.defer_label_resolution( - sort_metadata.sorter_data_label, - program.offset() as usize, - ); - program.emit_insn(Insn::SorterData { - cursor_id: sort_metadata.sort_cursor, - dest_reg: sort_metadata.sorter_data_register, - pseudo_cursor, - }); - - let sort_metadata = m.sorts.get_mut(id).unwrap(); - - sort_metadata.pseudo_table_cursor = pseudo_cursor; - - Ok(OpStepResult::ReadyToEmit) - } - ORDER_NEXT => { - let sort_metadata = m.sorts.get(id).unwrap(); - program.emit_insn_with_label_dependency( - Insn::SorterNext { - cursor_id: sort_metadata.sort_cursor, - pc_if_next: sort_metadata.sorter_data_label, - }, - sort_metadata.sorter_data_label, - ); - - program.resolve_label(sort_metadata.done_label, program.offset()); - - Ok(OpStepResult::Done) - } - _ => unreachable!(), - } - } - Operator::Projection { source, step, .. } => { - *step += 1; - const PROJECTION_WAIT_UNTIL_SOURCE_READY: usize = 1; - const PROJECTION_FINALIZE_SOURCE: usize = 2; - match *step { - PROJECTION_WAIT_UNTIL_SOURCE_READY => loop { - match source.step(program, m, referenced_tables)? { - OpStepResult::Continue => continue, - OpStepResult::ReadyToEmit | OpStepResult::Done => { - if matches!(**source, Operator::Aggregate { .. }) { - source.result_columns(program, referenced_tables, m, None)?; - } - return Ok(OpStepResult::ReadyToEmit); - } - } - }, - PROJECTION_FINALIZE_SOURCE => { - match source.step(program, m, referenced_tables)? { - OpStepResult::Done => Ok(OpStepResult::Done), - _ => unreachable!(), - } - } - _ => Ok(OpStepResult::Done), - } - } - Operator::Nothing => Ok(OpStepResult::Done), - } - } - fn result_columns( - &self, - program: &mut ProgramBuilder, - referenced_tables: &[BTreeTableReference], - m: &mut Metadata, - cursor_override: Option<&SortCursorOverride>, - ) -> Result { - let col_count = self.column_count(referenced_tables); - match self { - Operator::Scan { - table_reference, .. - } => { - let start_reg = program.alloc_registers(col_count); - let table = cursor_override - .map(|c| c.pseudo_table.clone()) - .unwrap_or_else(|| Table::BTree(table_reference.table.clone())); - let cursor_id = cursor_override.map(|c| c.cursor_id).unwrap_or_else(|| { - program.resolve_cursor_id(&table_reference.table_identifier, None) - }); - let start_column_offset = cursor_override.map(|c| c.sort_key_len).unwrap_or(0); - translate_table_columns(program, cursor_id, &table, start_column_offset, start_reg); - - Ok(start_reg) - } - Operator::Search { - table_reference, .. - } => { - let start_reg = program.alloc_registers(col_count); - let table = cursor_override - .map(|c| c.pseudo_table.clone()) - .unwrap_or_else(|| Table::BTree(table_reference.table.clone())); - let cursor_id = cursor_override.map(|c| c.cursor_id).unwrap_or_else(|| { - program.resolve_cursor_id(&table_reference.table_identifier, None) - }); - let start_column_offset = cursor_override.map(|c| c.sort_key_len).unwrap_or(0); - translate_table_columns(program, cursor_id, &table, start_column_offset, start_reg); - - Ok(start_reg) - } - Operator::Join { left, right, .. } => { - let left_start_reg = - left.result_columns(program, referenced_tables, m, cursor_override)?; - right.result_columns(program, referenced_tables, m, cursor_override)?; - - Ok(left_start_reg) - } - Operator::Aggregate { - id, - aggregates, - group_by, - .. - } => { - let agg_start_reg = m.aggregation_start_registers.get(id).unwrap(); - program.resolve_label(m.termination_label_stack.pop().unwrap(), program.offset()); - let mut result_column_idx = 0; - for (i, agg) in aggregates.iter().enumerate() { - let agg_result_reg = *agg_start_reg + i; - program.emit_insn(Insn::AggFinal { - register: agg_result_reg, - func: agg.func.clone(), - }); - m.expr_result_cache.cache_result_register( - *id, - result_column_idx, - agg_result_reg, - agg.original_expr.clone(), - ); - result_column_idx += 1; - } - - if let Some(group_by) = group_by { - let output_row_start_reg = - program.alloc_registers(aggregates.len() + group_by.len()); - let group_by_metadata = m.group_bys.get(id).unwrap(); - program.emit_insn(Insn::Copy { - src_reg: group_by_metadata.group_exprs_accumulator_register, - dst_reg: output_row_start_reg, - amount: group_by.len() - 1, - }); - for (i, source_expr) in group_by.iter().enumerate() { - m.expr_result_cache.cache_result_register( - *id, - result_column_idx + i, - output_row_start_reg + i, - source_expr.clone(), - ); - } - program.emit_insn(Insn::Copy { - src_reg: *agg_start_reg, - dst_reg: output_row_start_reg + group_by.len(), - amount: aggregates.len() - 1, - }); - - Ok(output_row_start_reg) - } else { - Ok(*agg_start_reg) - } - } - Operator::Filter { .. } => unreachable!("predicates have been pushed down"), - Operator::Limit { .. } => { - unimplemented!() - } - Operator::Order { id, key, .. } => { - let cursor_id = m.sorts.get(id).unwrap().pseudo_table_cursor; - let pseudo_table = program.resolve_cursor_to_table(cursor_id).unwrap(); - let start_column_offset = key.len(); - let column_count = pseudo_table.columns().len() - start_column_offset; - let start_reg = program.alloc_registers(column_count); - translate_table_columns( - program, - cursor_id, - &pseudo_table, - start_column_offset, - start_reg, - ); - - Ok(start_reg) - } - Operator::Projection { - expressions, id, .. - } => { - let expr_count = expressions - .iter() - .map(|e| e.column_count(referenced_tables)) - .sum(); - let start_reg = program.alloc_registers(expr_count); - let mut cur_reg = start_reg; - for expr in expressions { - match expr { - ProjectionColumn::Column(expr) => { - translate_expr( - program, - Some(referenced_tables), - expr, - cur_reg, - cursor_override.map(|c| c.cursor_id), - m.expr_result_cache - .get_cached_result_registers(*id, cur_reg - start_reg) - .as_ref(), - )?; - m.expr_result_cache.cache_result_register( - *id, - cur_reg - start_reg, - cur_reg, - expr.clone(), - ); - cur_reg += 1; - } - ProjectionColumn::Star => { - for table_reference in referenced_tables.iter() { - let table = cursor_override - .map(|c| c.pseudo_table.clone()) - .unwrap_or_else(|| Table::BTree(table_reference.table.clone())); - let cursor_id = - cursor_override.map(|c| c.cursor_id).unwrap_or_else(|| { - program.resolve_cursor_id( - &table_reference.table_identifier, - None, - ) - }); - let start_column_offset = - cursor_override.map(|c| c.sort_key_len).unwrap_or(0); - cur_reg = translate_table_columns( - program, - cursor_id, - &table, - start_column_offset, - cur_reg, - ); - } - } - ProjectionColumn::TableStar(table_reference) => { - let table_ref = referenced_tables - .iter() - .find(|t| t.table_identifier == table_reference.table_identifier) - .unwrap(); - - let table = cursor_override - .map(|c| c.pseudo_table.clone()) - .unwrap_or_else(|| Table::BTree(table_ref.table.clone())); - let cursor_id = - cursor_override.map(|c| c.cursor_id).unwrap_or_else(|| { - program - .resolve_cursor_id(&table_reference.table_identifier, None) - }); - let start_column_offset = - cursor_override.map(|c| c.sort_key_len).unwrap_or(0); - cur_reg = translate_table_columns( - program, - cursor_id, - &table, - start_column_offset, - cur_reg, - ); - } - } - } - - Ok(start_reg) - } - Operator::Nothing => unimplemented!(), - } - } - fn result_row( - &mut self, - program: &mut ProgramBuilder, - referenced_tables: &[BTreeTableReference], - m: &mut Metadata, - cursor_override: Option<&SortCursorOverride>, - ) -> Result<()> { - match self { - Operator::Limit { source, limit, .. } => { - source.result_row(program, referenced_tables, m, cursor_override)?; - let limit_reg = program.alloc_register(); - program.emit_insn(Insn::Integer { - value: *limit as i64, - dest: limit_reg, - }); - program.mark_last_insn_constant(); - let jump_label = m.termination_label_stack.first().unwrap(); - program.emit_insn_with_label_dependency( - Insn::DecrJumpZero { - reg: limit_reg, - target_pc: *jump_label, - }, - *jump_label, - ); - - Ok(()) - } - operator => { - let start_reg = - operator.result_columns(program, referenced_tables, m, cursor_override)?; - program.emit_insn(Insn::ResultRow { - start_reg, - count: operator.column_count(referenced_tables), - }); - Ok(()) - } - } - } -} - -fn prologue( - cache: ExpressionResultCache, -) -> Result<(ProgramBuilder, Metadata, BranchOffset, BranchOffset)> { +/// Initialize the program with basic setup and return initial metadata and labels +fn prologue() -> Result<(ProgramBuilder, Metadata, BranchOffset, BranchOffset)> { let mut program = ProgramBuilder::new(); let init_label = program.allocate_label(); let halt_label = program.allocate_label(); @@ -1697,28 +120,30 @@ fn prologue( let metadata = Metadata { termination_label_stack: vec![halt_label], - expr_result_cache: cache, - aggregation_start_registers: HashMap::new(), - group_bys: HashMap::new(), + group_by_metadata: None, left_joins: HashMap::new(), next_row_labels: HashMap::new(), scan_loop_body_labels: vec![], - sorts: HashMap::new(), + sort_metadata: None, + aggregation_start_register: None, + result_column_indexes_in_orderby_sorter: HashMap::new(), + result_columns_to_skip_in_orderby_sorter: None, }; Ok((program, metadata, init_label, start_offset)) } +/// Clean up and finalize the program, resolving any remaining labels +/// Note that although these are the final instructions, typically an SQLite +/// query will jump to the Transaction instruction via init_label. fn epilogue( program: &mut ProgramBuilder, metadata: &mut Metadata, init_label: BranchOffset, start_offset: BranchOffset, ) -> Result<()> { - program.resolve_label( - metadata.termination_label_stack.pop().unwrap(), - program.offset(), - ); + let halt_label = metadata.termination_label_stack.pop().unwrap(); + program.resolve_label(halt_label, program.offset()); program.emit_insn(Insn::Halt { err_code: 0, description: String::new(), @@ -1737,31 +162,1600 @@ fn epilogue( Ok(()) } +/// Main entry point for emitting bytecode for a SQL query +/// Takes a query plan and generates the corresponding bytecode program pub fn emit_program( database_header: Rc>, mut plan: Plan, - cache: ExpressionResultCache, connection: Weak, ) -> Result { - let (mut program, mut metadata, init_label, start_offset) = prologue(cache)?; - loop { - match plan - .root_operator - .step(&mut program, &mut metadata, &plan.referenced_tables)? - { - OpStepResult::Continue => {} - OpStepResult::ReadyToEmit => { - plan.root_operator.result_row( - &mut program, - &plan.referenced_tables, - &mut metadata, - None, - )?; + let (mut program, mut metadata, init_label, start_offset) = prologue()?; + + // Trivial exit on LIMIT 0 + if let Some(limit) = plan.limit { + if limit == 0 { + epilogue(&mut program, &mut metadata, init_label, start_offset)?; + return Ok(program.build(database_header, connection)); + } + } + + // Initialize cursors and other resources needed for query execution + if let Some(ref mut order_by) = plan.order_by { + init_order_by(&mut program, order_by, &mut metadata)?; + } + + if let Some(ref mut group_by) = plan.group_by { + let aggregates = plan.aggregates.as_mut().unwrap(); + init_group_by(&mut program, group_by, aggregates, &mut metadata)?; + } + init_source(&mut program, &plan.source, &mut metadata)?; + + // Set up main query execution loop + open_loop( + &mut program, + &mut plan.source, + &plan.referenced_tables, + &mut metadata, + )?; + + // Process result columns and expressions in the inner loop + inner_loop_emit(&mut program, &mut plan, &mut metadata)?; + + // Clean up and close the main execution loop + close_loop( + &mut program, + &mut plan.source, + &mut metadata, + &plan.referenced_tables, + )?; + + let mut order_by_necessary = plan.order_by.is_some(); + + // Handle GROUP BY and aggregation processing + if let Some(ref mut group_by) = plan.group_by { + group_by_emit( + &mut program, + &plan.result_columns, + group_by, + plan.order_by.as_ref(), + &plan.aggregates.as_ref().unwrap(), + plan.limit.clone(), + &plan.referenced_tables, + &mut metadata, + )?; + } else if let Some(ref mut aggregates) = plan.aggregates { + // Handle aggregation without GROUP BY + agg_without_group_by_emit( + &mut program, + &plan.referenced_tables, + &plan.result_columns, + aggregates, + &mut metadata, + )?; + // Single row result for aggregates without GROUP BY, so ORDER BY not needed + order_by_necessary = false; + } + + // Process ORDER BY results if needed + if let Some(ref mut order_by) = plan.order_by { + if order_by_necessary { + order_by_emit( + &mut program, + order_by, + &plan.result_columns, + plan.limit.clone(), + &mut metadata, + )?; + } + } + + // Finalize program + epilogue(&mut program, &mut metadata, init_label, start_offset)?; + + Ok(program.build(database_header, connection)) +} + +/// Initialize resources needed for ORDER BY processing +fn init_order_by( + program: &mut ProgramBuilder, + order_by: &Vec<(ast::Expr, Direction)>, + metadata: &mut Metadata, +) -> Result<()> { + metadata + .termination_label_stack + .push(program.allocate_label()); + let sort_cursor = program.alloc_cursor_id(None, None); + metadata.sort_metadata = Some(SortMetadata { + sort_cursor, + sorter_data_register: program.alloc_register(), + }); + let mut order = Vec::new(); + for (_, direction) in order_by.iter() { + order.push(OwnedValue::Integer(*direction as i64)); + } + program.emit_insn(Insn::SorterOpen { + cursor_id: sort_cursor, + columns: order_by.len(), + order: OwnedRecord::new(order), + }); + Ok(()) +} + +/// Initialize resources needed for GROUP BY processing +fn init_group_by( + program: &mut ProgramBuilder, + group_by: &Vec, + aggregates: &Vec, + metadata: &mut Metadata, +) -> Result<()> { + let agg_final_label = program.allocate_label(); + metadata.termination_label_stack.push(agg_final_label); + let num_aggs = aggregates.len(); + + let sort_cursor = program.alloc_cursor_id(None, None); + + let abort_flag_register = program.alloc_register(); + let data_in_accumulator_indicator_register = program.alloc_register(); + let group_exprs_comparison_register = program.alloc_registers(group_by.len()); + let group_exprs_accumulator_register = program.alloc_registers(group_by.len()); + let agg_exprs_start_reg = program.alloc_registers(num_aggs); + let sorter_key_register = program.alloc_register(); + + let subroutine_accumulator_clear_label = program.allocate_label(); + let subroutine_accumulator_output_label = program.allocate_label(); + + let mut order = Vec::new(); + const ASCENDING: i64 = 0; + for _ in group_by.iter() { + order.push(OwnedValue::Integer(ASCENDING)); + } + program.emit_insn(Insn::SorterOpen { + cursor_id: sort_cursor, + columns: aggregates.len() + group_by.len(), + order: OwnedRecord::new(order), + }); + + program.add_comment(program.offset(), "clear group by abort flag"); + program.emit_insn(Insn::Integer { + value: 0, + dest: abort_flag_register, + }); + + program.add_comment( + program.offset(), + "initialize group by comparison registers to NULL", + ); + program.emit_insn(Insn::Null { + dest: group_exprs_comparison_register, + dest_end: if group_by.len() > 1 { + Some(group_exprs_comparison_register + group_by.len() - 1) + } else { + None + }, + }); + + program.add_comment(program.offset(), "go to clear accumulator subroutine"); + + let subroutine_accumulator_clear_return_offset_register = program.alloc_register(); + program.emit_insn_with_label_dependency( + Insn::Gosub { + target_pc: subroutine_accumulator_clear_label, + return_reg: subroutine_accumulator_clear_return_offset_register, + }, + subroutine_accumulator_clear_label, + ); + + metadata.aggregation_start_register = Some(agg_exprs_start_reg); + + metadata.group_by_metadata = Some(GroupByMetadata { + sort_cursor, + subroutine_accumulator_clear_label, + subroutine_accumulator_clear_return_offset_register, + subroutine_accumulator_output_label, + subroutine_accumulator_output_return_offset_register: program.alloc_register(), + accumulator_indicator_set_true_label: program.allocate_label(), + abort_flag_register, + data_in_accumulator_indicator_register, + group_exprs_accumulator_register, + group_exprs_comparison_register, + sorter_key_register, + }); + Ok(()) +} + +/// Initialize resources needed for the source operators (tables, joins, etc) +fn init_source( + program: &mut ProgramBuilder, + source: &SourceOperator, + metadata: &mut Metadata, +) -> Result<()> { + match source { + SourceOperator::Join { + id, + left, + right, + outer, + .. + } => { + if *outer { + let lj_metadata = LeftJoinMetadata { + match_flag_register: program.alloc_register(), + set_match_flag_true_label: program.allocate_label(), + check_match_flag_label: program.allocate_label(), + on_match_jump_to_label: program.allocate_label(), + }; + metadata.left_joins.insert(*id, lj_metadata); } - OpStepResult::Done => { - epilogue(&mut program, &mut metadata, init_label, start_offset)?; - return Ok(program.build(database_header, connection)); + init_source(program, left, metadata)?; + init_source(program, right, metadata)?; + + return Ok(()); + } + SourceOperator::Scan { + id, + table_reference, + .. + } => { + let cursor_id = program.alloc_cursor_id( + Some(table_reference.table_identifier.clone()), + Some(Table::BTree(table_reference.table.clone())), + ); + let root_page = table_reference.table.root_page; + let next_row_label = program.allocate_label(); + metadata.next_row_labels.insert(*id, next_row_label); + program.emit_insn(Insn::OpenReadAsync { + cursor_id, + root_page, + }); + program.emit_insn(Insn::OpenReadAwait); + + return Ok(()); + } + SourceOperator::Search { + id, + table_reference, + search, + .. + } => { + let table_cursor_id = program.alloc_cursor_id( + Some(table_reference.table_identifier.clone()), + Some(Table::BTree(table_reference.table.clone())), + ); + + let next_row_label = program.allocate_label(); + + if !matches!(search, Search::PrimaryKeyEq { .. }) { + // Primary key equality search is handled with a SeekRowid instruction which does not loop, since it is a single row lookup. + metadata.next_row_labels.insert(*id, next_row_label); } + + let scan_loop_body_label = program.allocate_label(); + metadata.scan_loop_body_labels.push(scan_loop_body_label); + program.emit_insn(Insn::OpenReadAsync { + cursor_id: table_cursor_id, + root_page: table_reference.table.root_page, + }); + program.emit_insn(Insn::OpenReadAwait); + + if let Search::IndexSearch { index, .. } = search { + let index_cursor_id = program + .alloc_cursor_id(Some(index.name.clone()), Some(Table::Index(index.clone()))); + program.emit_insn(Insn::OpenReadAsync { + cursor_id: index_cursor_id, + root_page: index.root_page, + }); + program.emit_insn(Insn::OpenReadAwait); + } + + return Ok(()); + } + SourceOperator::Nothing => { + return Ok(()); } } } + +/// Set up the main query execution loop +/// For example in the case of a nested table scan, this means emitting the RewindAsync instruction +/// for all tables involved, outermost first. +fn open_loop( + program: &mut ProgramBuilder, + source: &mut SourceOperator, + referenced_tables: &[BTreeTableReference], + metadata: &mut Metadata, +) -> Result<()> { + match source { + SourceOperator::Join { + id, + left, + right, + predicates, + outer, + .. + } => { + open_loop(program, left, referenced_tables, metadata)?; + + let mut jump_target_when_false = *metadata + .next_row_labels + .get(&right.id()) + .or(metadata.next_row_labels.get(&left.id())) + .unwrap_or(metadata.termination_label_stack.last().unwrap()); + + if *outer { + let lj_meta = metadata.left_joins.get(id).unwrap(); + program.emit_insn(Insn::Integer { + value: 0, + dest: lj_meta.match_flag_register, + }); + jump_target_when_false = lj_meta.check_match_flag_label; + } + metadata + .next_row_labels + .insert(right.id(), jump_target_when_false); + + open_loop(program, right, referenced_tables, metadata)?; + + if let Some(predicates) = predicates { + let jump_target_when_true = program.allocate_label(); + let condition_metadata = ConditionMetadata { + jump_if_condition_is_true: false, + jump_target_when_true, + jump_target_when_false, + }; + for predicate in predicates.iter() { + translate_condition_expr( + program, + referenced_tables, + predicate, + condition_metadata, + None, + )?; + } + program.resolve_label(jump_target_when_true, program.offset()); + } + + if *outer { + let lj_meta = metadata.left_joins.get(id).unwrap(); + program.defer_label_resolution( + lj_meta.set_match_flag_true_label, + program.offset() as usize, + ); + program.emit_insn(Insn::Integer { + value: 1, + dest: lj_meta.match_flag_register, + }); + } + + return Ok(()); + } + SourceOperator::Scan { + id, + table_reference, + predicates, + iter_dir, + } => { + let cursor_id = program.resolve_cursor_id(&table_reference.table_identifier); + if iter_dir + .as_ref() + .is_some_and(|dir| *dir == IterationDirection::Backwards) + { + program.emit_insn(Insn::LastAsync { cursor_id }); + } else { + program.emit_insn(Insn::RewindAsync { cursor_id }); + } + let scan_loop_body_label = program.allocate_label(); + let halt_label = metadata.termination_label_stack.last().unwrap(); + program.emit_insn_with_label_dependency( + if iter_dir + .as_ref() + .is_some_and(|dir| *dir == IterationDirection::Backwards) + { + Insn::LastAwait { + cursor_id, + pc_if_empty: *halt_label, + } + } else { + Insn::RewindAwait { + cursor_id, + pc_if_empty: *halt_label, + } + }, + *halt_label, + ); + metadata.scan_loop_body_labels.push(scan_loop_body_label); + program.defer_label_resolution(scan_loop_body_label, program.offset() as usize); + + let jump_label = metadata.next_row_labels.get(id).unwrap_or(halt_label); + if let Some(preds) = predicates { + for expr in preds { + let jump_target_when_true = program.allocate_label(); + let condition_metadata = ConditionMetadata { + jump_if_condition_is_true: false, + jump_target_when_true, + jump_target_when_false: *jump_label, + }; + translate_condition_expr( + program, + referenced_tables, + expr, + condition_metadata, + None, + )?; + program.resolve_label(jump_target_when_true, program.offset()); + } + } + + return Ok(()); + } + SourceOperator::Search { + id, + table_reference, + search, + predicates, + .. + } => { + let table_cursor_id = program.resolve_cursor_id(&table_reference.table_identifier); + + // Open the loop for the index search. + // Primary key equality search is handled with a SeekRowid instruction which does not loop, since it is a single row lookup. + if !matches!(search, Search::PrimaryKeyEq { .. }) { + let index_cursor_id = if let Search::IndexSearch { index, .. } = search { + Some(program.resolve_cursor_id(&index.name)) + } else { + None + }; + let scan_loop_body_label = *metadata.scan_loop_body_labels.last().unwrap(); + let cmp_reg = program.alloc_register(); + let (cmp_expr, cmp_op) = match search { + Search::IndexSearch { + cmp_expr, cmp_op, .. + } => (cmp_expr, cmp_op), + Search::PrimaryKeySearch { cmp_expr, cmp_op } => (cmp_expr, cmp_op), + Search::PrimaryKeyEq { .. } => unreachable!(), + }; + // TODO this only handles ascending indexes + match cmp_op { + ast::Operator::Equals + | ast::Operator::Greater + | ast::Operator::GreaterEquals => { + translate_expr(program, Some(referenced_tables), cmp_expr, cmp_reg, None)?; + } + ast::Operator::Less | ast::Operator::LessEquals => { + program.emit_insn(Insn::Null { + dest: cmp_reg, + dest_end: None, + }); + } + _ => unreachable!(), + } + program.emit_insn_with_label_dependency( + match cmp_op { + ast::Operator::Equals | ast::Operator::GreaterEquals => Insn::SeekGE { + is_index: index_cursor_id.is_some(), + cursor_id: index_cursor_id.unwrap_or(table_cursor_id), + start_reg: cmp_reg, + num_regs: 1, + target_pc: *metadata.termination_label_stack.last().unwrap(), + }, + ast::Operator::Greater + | ast::Operator::Less + | ast::Operator::LessEquals => Insn::SeekGT { + is_index: index_cursor_id.is_some(), + cursor_id: index_cursor_id.unwrap_or(table_cursor_id), + start_reg: cmp_reg, + num_regs: 1, + target_pc: *metadata.termination_label_stack.last().unwrap(), + }, + _ => unreachable!(), + }, + *metadata.termination_label_stack.last().unwrap(), + ); + if *cmp_op == ast::Operator::Less || *cmp_op == ast::Operator::LessEquals { + translate_expr(program, Some(referenced_tables), cmp_expr, cmp_reg, None)?; + } + + program.defer_label_resolution(scan_loop_body_label, program.offset() as usize); + // TODO: We are currently only handling ascending indexes. + // For conditions like index_key > 10, we have already seeked to the first key greater than 10, and can just scan forward. + // For conditions like index_key < 10, we are at the beginning of the index, and will scan forward and emit IdxGE(10) with a conditional jump to the end. + // For conditions like index_key = 10, we have already seeked to the first key greater than or equal to 10, and can just scan forward and emit IdxGT(10) with a conditional jump to the end. + // For conditions like index_key >= 10, we have already seeked to the first key greater than or equal to 10, and can just scan forward. + // For conditions like index_key <= 10, we are at the beginning of the index, and will scan forward and emit IdxGT(10) with a conditional jump to the end. + // For conditions like index_key != 10, TODO. probably the optimal way is not to use an index at all. + // + // For primary key searches we emit RowId and then compare it to the seek value. + + let abort_jump_target = *metadata + .next_row_labels + .get(id) + .unwrap_or(metadata.termination_label_stack.last().unwrap()); + match cmp_op { + ast::Operator::Equals | ast::Operator::LessEquals => { + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn_with_label_dependency( + Insn::IdxGT { + cursor_id: index_cursor_id, + start_reg: cmp_reg, + num_regs: 1, + target_pc: abort_jump_target, + }, + abort_jump_target, + ); + } else { + let rowid_reg = program.alloc_register(); + program.emit_insn(Insn::RowId { + cursor_id: table_cursor_id, + dest: rowid_reg, + }); + program.emit_insn_with_label_dependency( + Insn::Gt { + lhs: rowid_reg, + rhs: cmp_reg, + target_pc: abort_jump_target, + }, + abort_jump_target, + ); + } + } + ast::Operator::Less => { + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn_with_label_dependency( + Insn::IdxGE { + cursor_id: index_cursor_id, + start_reg: cmp_reg, + num_regs: 1, + target_pc: abort_jump_target, + }, + abort_jump_target, + ); + } else { + let rowid_reg = program.alloc_register(); + program.emit_insn(Insn::RowId { + cursor_id: table_cursor_id, + dest: rowid_reg, + }); + program.emit_insn_with_label_dependency( + Insn::Ge { + lhs: rowid_reg, + rhs: cmp_reg, + target_pc: abort_jump_target, + }, + abort_jump_target, + ); + } + } + _ => {} + } + + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn(Insn::DeferredSeek { + index_cursor_id, + table_cursor_id, + }); + } + } + + let jump_label = metadata + .next_row_labels + .get(id) + .unwrap_or(metadata.termination_label_stack.last().unwrap()); + + if let Search::PrimaryKeyEq { cmp_expr } = search { + let src_reg = program.alloc_register(); + translate_expr(program, Some(referenced_tables), cmp_expr, src_reg, None)?; + program.emit_insn_with_label_dependency( + Insn::SeekRowid { + cursor_id: table_cursor_id, + src_reg, + target_pc: *jump_label, + }, + *jump_label, + ); + } + if let Some(predicates) = predicates { + for predicate in predicates.iter() { + let jump_target_when_true = program.allocate_label(); + let condition_metadata = ConditionMetadata { + jump_if_condition_is_true: false, + jump_target_when_true, + jump_target_when_false: *jump_label, + }; + translate_condition_expr( + program, + referenced_tables, + predicate, + condition_metadata, + None, + )?; + program.resolve_label(jump_target_when_true, program.offset()); + } + } + + return Ok(()); + } + SourceOperator::Nothing => { + return Ok(()); + } + } +} + +/// SQLite (and so Limbo) processes joins as a nested loop. +/// The inner loop may emit rows to various destinations depending on the query: +/// - a GROUP BY sorter (grouping is done by sorting based on the GROUP BY keys and aggregating while the GROUP BY keys match) +/// - an ORDER BY sorter (when there is no GROUP BY, but there is an ORDER BY) +/// - an AggStep (the columns are collected for aggregation, which is finished later) +/// - a ResultRow (there is none of the above, so the loop emits a result row directly) +pub enum InnerLoopEmitTarget<'a> { + GroupBySorter { + group_by: &'a Vec, + aggregates: &'a Vec, + }, + OrderBySorter { + order_by: &'a Vec<(ast::Expr, Direction)>, + }, + AggStep, + ResultRow { + limit: Option, + }, +} + +/// Emits the bytecode for the inner loop of a query. +/// At this point the cursors for all tables have been opened and rewound. +fn inner_loop_emit( + program: &mut ProgramBuilder, + plan: &mut Plan, + metadata: &mut Metadata, +) -> Result<()> { + if let Some(wc) = &plan.where_clause { + for predicate in wc.iter() { + if predicate.is_always_false()? { + return Ok(()); + } else if predicate.is_always_true()? { + // do nothing + } else { + unreachable!( + "all WHERE clause terms that are not trivially true or false should have been pushed down to the source" + ); + } + } + } + // if we have a group by, we emit a record into the group by sorter. + if let Some(group_by) = &plan.group_by { + return inner_loop_source_emit( + program, + &plan.result_columns, + &plan.aggregates, + metadata, + InnerLoopEmitTarget::GroupBySorter { + group_by, + aggregates: &plan.aggregates.as_ref().unwrap(), + }, + &plan.referenced_tables, + ); + } + // if we DONT have a group by, but we have aggregates, we emit without ResultRow. + // we also do not need to sort because we are emitting a single row. + if plan.aggregates.is_some() { + return inner_loop_source_emit( + program, + &plan.result_columns, + &plan.aggregates, + metadata, + InnerLoopEmitTarget::AggStep, + &plan.referenced_tables, + ); + } + // if we DONT have a group by, but we have an order by, we emit a record into the order by sorter. + if let Some(order_by) = &plan.order_by { + return inner_loop_source_emit( + program, + &plan.result_columns, + &plan.aggregates, + metadata, + InnerLoopEmitTarget::OrderBySorter { order_by }, + &plan.referenced_tables, + ); + } + // if we have neither, we emit a ResultRow. In that case, if we have a Limit, we handle that with DecrJumpZero. + return inner_loop_source_emit( + program, + &plan.result_columns, + &plan.aggregates, + metadata, + InnerLoopEmitTarget::ResultRow { limit: plan.limit }, + &plan.referenced_tables, + ); +} + +/// This is a helper function for inner_loop_emit, +/// which does a different thing depending on the emit target. +/// See the InnerLoopEmitTarget enum for more details. +fn inner_loop_source_emit( + program: &mut ProgramBuilder, + result_columns: &Vec, + aggregates: &Option>, + metadata: &mut Metadata, + emit_target: InnerLoopEmitTarget, + referenced_tables: &[BTreeTableReference], +) -> Result<()> { + match emit_target { + InnerLoopEmitTarget::GroupBySorter { + group_by, + aggregates, + } => { + let sort_keys_count = group_by.len(); + let aggregate_arguments_count = + aggregates.iter().map(|agg| agg.args.len()).sum::(); + let column_count = sort_keys_count + aggregate_arguments_count; + let start_reg = program.alloc_registers(column_count); + let mut cur_reg = start_reg; + + // The group by sorter rows will contain the grouping keys first. They are also the sort keys. + for expr in group_by.iter() { + let key_reg = cur_reg; + cur_reg += 1; + translate_expr(program, Some(referenced_tables), expr, key_reg, None)?; + } + // Then we have the aggregate arguments. + for agg in aggregates.iter() { + // Here we are collecting scalars for the group by sorter, which will include + // both the group by expressions and the aggregate arguments. + // e.g. in `select u.first_name, sum(u.age) from users group by u.first_name` + // the sorter will have two scalars: u.first_name and u.age. + // these are then sorted by u.first_name, and for each u.first_name, we sum the u.age. + // the actual aggregation is done later. + for expr in agg.args.iter() { + let agg_reg = cur_reg; + cur_reg += 1; + translate_expr(program, Some(referenced_tables), expr, agg_reg, None)?; + } + } + + // TODO: although it's less often useful, SQLite does allow for expressions in the SELECT that are not part of a GROUP BY or aggregate. + // We currently ignore those and only emit the GROUP BY keys and aggregate arguments. This should be fixed. + + let group_by_metadata = metadata.group_by_metadata.as_ref().unwrap(); + + sorter_insert( + program, + start_reg, + column_count, + group_by_metadata.sort_cursor, + group_by_metadata.sorter_key_register, + ); + + Ok(()) + } + InnerLoopEmitTarget::OrderBySorter { order_by } => { + order_by_sorter_insert( + program, + referenced_tables, + order_by, + result_columns, + &mut metadata.result_column_indexes_in_orderby_sorter, + &metadata.sort_metadata.as_ref().unwrap(), + None, + )?; + Ok(()) + } + InnerLoopEmitTarget::AggStep => { + let aggregates = aggregates.as_ref().unwrap(); + let agg_final_label = program.allocate_label(); + metadata.termination_label_stack.push(agg_final_label); + let num_aggs = aggregates.len(); + let start_reg = program.alloc_registers(num_aggs); + metadata.aggregation_start_register = Some(start_reg); + + // In planner.rs, we have collected all aggregates from the SELECT clause, including ones where the aggregate is embedded inside + // a more complex expression. Some examples: length(sum(x)), sum(x) + avg(y), sum(x) + 1, etc. + // The result of those more complex expressions depends on the final result of the aggregate, so we don't translate the complete expressions here. + // Instead, we translate the aggregates + any expressions that do not contain aggregates. + for (i, agg) in aggregates.iter().enumerate() { + let reg = start_reg + i; + translate_aggregation(program, referenced_tables, agg, reg)?; + } + for (i, rc) in result_columns.iter().enumerate() { + if rc.contains_aggregates { + // Do nothing, aggregates are computed above + // if this result column is e.g. something like sum(x) + 1 or length(sum(x)), we do not want to translate that (+1) or length() yet, + // it will be computed after the aggregations are finalized. + continue; + } + let reg = start_reg + num_aggs + i; + translate_expr(program, Some(referenced_tables), &rc.expr, reg, None)?; + } + Ok(()) + } + InnerLoopEmitTarget::ResultRow { limit } => { + assert!( + aggregates.is_none(), + "We should not get here with aggregates" + ); + emit_select_result( + program, + referenced_tables, + result_columns, + None, + limit.map(|l| (l, *metadata.termination_label_stack.last().unwrap())), + )?; + + Ok(()) + } + } +} + +/// Closes the loop for a given source operator. +/// For example in the case of a nested table scan, this means emitting the NextAsync instruction +/// for all tables involved, innermost first. +fn close_loop( + program: &mut ProgramBuilder, + source: &SourceOperator, + metadata: &mut Metadata, + referenced_tables: &[BTreeTableReference], +) -> Result<()> { + match source { + SourceOperator::Join { + id, + left, + right, + outer, + .. + } => { + close_loop(program, right, metadata, referenced_tables)?; + + if *outer { + let lj_meta = metadata.left_joins.get(id).unwrap(); + // If the left join match flag has been set to 1, we jump to the next row on the outer table (result row has been emitted already) + program.resolve_label(lj_meta.check_match_flag_label, program.offset()); + program.emit_insn_with_label_dependency( + Insn::IfPos { + reg: lj_meta.match_flag_register, + target_pc: lj_meta.on_match_jump_to_label, + decrement_by: 0, + }, + lj_meta.on_match_jump_to_label, + ); + // If not, we set the right table cursor's "pseudo null bit" on, which means any Insn::Column will return NULL + let right_cursor_id = match right.as_ref() { + SourceOperator::Scan { + table_reference, .. + } => program.resolve_cursor_id(&table_reference.table_identifier), + SourceOperator::Search { + table_reference, .. + } => program.resolve_cursor_id(&table_reference.table_identifier), + _ => unreachable!(), + }; + program.emit_insn(Insn::NullRow { + cursor_id: right_cursor_id, + }); + // Jump to setting the left join match flag to 1 again, but this time the right table cursor will set everything to null + program.emit_insn_with_label_dependency( + Insn::Goto { + target_pc: lj_meta.set_match_flag_true_label, + }, + lj_meta.set_match_flag_true_label, + ); + } + let next_row_label = if *outer { + metadata.left_joins.get(id).unwrap().on_match_jump_to_label + } else { + *metadata.next_row_labels.get(&right.id()).unwrap() + }; + // This points to the NextAsync instruction of the left table + program.resolve_label(next_row_label, program.offset()); + close_loop(program, left, metadata, referenced_tables)?; + + Ok(()) + } + SourceOperator::Scan { + id, + table_reference, + iter_dir, + .. + } => { + let cursor_id = program.resolve_cursor_id(&table_reference.table_identifier); + program.resolve_label(*metadata.next_row_labels.get(id).unwrap(), program.offset()); + if iter_dir + .as_ref() + .is_some_and(|dir| *dir == IterationDirection::Backwards) + { + program.emit_insn(Insn::PrevAsync { cursor_id }); + } else { + program.emit_insn(Insn::NextAsync { cursor_id }); + } + let jump_label = metadata.scan_loop_body_labels.pop().unwrap(); + + if iter_dir + .as_ref() + .is_some_and(|dir| *dir == IterationDirection::Backwards) + { + program.emit_insn_with_label_dependency( + Insn::PrevAwait { + cursor_id, + pc_if_next: jump_label, + }, + jump_label, + ); + } else { + program.emit_insn_with_label_dependency( + Insn::NextAwait { + cursor_id, + pc_if_next: jump_label, + }, + jump_label, + ); + } + Ok(()) + } + SourceOperator::Search { + id, + table_reference, + search, + .. + } => { + if matches!(search, Search::PrimaryKeyEq { .. }) { + // Primary key equality search is handled with a SeekRowid instruction which does not loop, so there is no need to emit a NextAsync instruction. + return Ok(()); + } + let cursor_id = match search { + Search::IndexSearch { index, .. } => program.resolve_cursor_id(&index.name), + Search::PrimaryKeySearch { .. } => { + program.resolve_cursor_id(&table_reference.table_identifier) + } + Search::PrimaryKeyEq { .. } => unreachable!(), + }; + program.resolve_label(*metadata.next_row_labels.get(id).unwrap(), program.offset()); + program.emit_insn(Insn::NextAsync { cursor_id }); + let jump_label = metadata.scan_loop_body_labels.pop().unwrap(); + program.emit_insn_with_label_dependency( + Insn::NextAwait { + cursor_id, + pc_if_next: jump_label, + }, + jump_label, + ); + + Ok(()) + } + SourceOperator::Nothing => Ok(()), + } +} + +/// Emits the bytecode for processing a GROUP BY clause. +/// This is called when the main query execution loop has finished processing, +/// and we now have data in the GROUP BY sorter. +fn group_by_emit( + program: &mut ProgramBuilder, + result_columns: &Vec, + group_by: &Vec, + order_by: Option<&Vec<(ast::Expr, Direction)>>, + aggregates: &Vec, + limit: Option, + referenced_tables: &[BTreeTableReference], + metadata: &mut Metadata, +) -> Result<()> { + let sort_loop_start_label = program.allocate_label(); + let grouping_done_label = program.allocate_label(); + let group_by_metadata = metadata.group_by_metadata.as_mut().unwrap(); + + let GroupByMetadata { + group_exprs_comparison_register: comparison_register, + subroutine_accumulator_output_return_offset_register, + subroutine_accumulator_output_label, + subroutine_accumulator_clear_return_offset_register, + subroutine_accumulator_clear_label, + data_in_accumulator_indicator_register, + accumulator_indicator_set_true_label, + group_exprs_accumulator_register: group_exprs_start_register, + abort_flag_register, + sorter_key_register, + .. + } = *group_by_metadata; + let halt_label = *metadata.termination_label_stack.first().unwrap(); + + // all group by columns and all arguments of agg functions are in the sorter. + // the sort keys are the group by columns (the aggregation within groups is done based on how long the sort keys remain the same) + let sorter_column_count = + group_by.len() + aggregates.iter().map(|agg| agg.args.len()).sum::(); + // sorter column names do not matter + let pseudo_columns = (0..sorter_column_count) + .map(|i| Column { + name: i.to_string(), + primary_key: false, + ty: crate::schema::Type::Null, + }) + .collect::>(); + + // A pseudo table is a "fake" table to which we read one row at a time from the sorter + let pseudo_table = Rc::new(PseudoTable { + columns: pseudo_columns, + }); + + let pseudo_cursor = program.alloc_cursor_id(None, Some(Table::Pseudo(pseudo_table.clone()))); + + program.emit_insn(Insn::OpenPseudo { + cursor_id: pseudo_cursor, + content_reg: sorter_key_register, + num_fields: sorter_column_count, + }); + + // Sort the sorter based on the group by columns + program.emit_insn_with_label_dependency( + Insn::SorterSort { + cursor_id: group_by_metadata.sort_cursor, + pc_if_empty: grouping_done_label, + }, + grouping_done_label, + ); + + program.defer_label_resolution(sort_loop_start_label, program.offset() as usize); + // Read a row from the sorted data in the sorter into the pseudo cursor + program.emit_insn(Insn::SorterData { + cursor_id: group_by_metadata.sort_cursor, + dest_reg: group_by_metadata.sorter_key_register, + pseudo_cursor, + }); + + // Read the group by columns from the pseudo cursor + let groups_start_reg = program.alloc_registers(group_by.len()); + for i in 0..group_by.len() { + let sorter_column_index = i; + let group_reg = groups_start_reg + i; + program.emit_insn(Insn::Column { + cursor_id: pseudo_cursor, + column: sorter_column_index, + dest: group_reg, + }); + } + + // Compare the group by columns to the previous group by columns to see if we are at a new group or not + program.emit_insn(Insn::Compare { + start_reg_a: comparison_register, + start_reg_b: groups_start_reg, + count: group_by.len(), + }); + + let agg_step_label = program.allocate_label(); + + program.add_comment( + program.offset(), + "start new group if comparison is not equal", + ); + // If we are at a new group, continue. If we are at the same group, jump to the aggregation step (i.e. accumulate more values into the aggregations) + program.emit_insn_with_label_dependency( + Insn::Jump { + target_pc_lt: program.offset() + 1, + target_pc_eq: agg_step_label, + target_pc_gt: program.offset() + 1, + }, + agg_step_label, + ); + + // New group, move current group by columns into the comparison register + program.emit_insn(Insn::Move { + source_reg: groups_start_reg, + dest_reg: comparison_register, + count: group_by.len(), + }); + + program.add_comment( + program.offset(), + "check if ended group had data, and output if so", + ); + program.emit_insn_with_label_dependency( + Insn::Gosub { + target_pc: subroutine_accumulator_output_label, + return_reg: subroutine_accumulator_output_return_offset_register, + }, + subroutine_accumulator_output_label, + ); + + program.add_comment(program.offset(), "check abort flag"); + program.emit_insn_with_label_dependency( + Insn::IfPos { + reg: abort_flag_register, + target_pc: halt_label, + decrement_by: 0, + }, + metadata.termination_label_stack[0], + ); + + program.add_comment(program.offset(), "goto clear accumulator subroutine"); + program.emit_insn_with_label_dependency( + Insn::Gosub { + target_pc: subroutine_accumulator_clear_label, + return_reg: subroutine_accumulator_clear_return_offset_register, + }, + subroutine_accumulator_clear_label, + ); + + // Accumulate the values into the aggregations + program.resolve_label(agg_step_label, program.offset()); + let start_reg = metadata.aggregation_start_register.unwrap(); + let mut cursor_index = group_by.len(); + for (i, agg) in aggregates.iter().enumerate() { + let agg_result_reg = start_reg + i; + translate_aggregation_groupby( + program, + referenced_tables, + pseudo_cursor, + cursor_index, + agg, + agg_result_reg, + )?; + cursor_index += agg.args.len(); + } + + // We only emit the group by columns if we are going to start a new group (i.e. the prev group will not accumulate any more values into the aggregations) + program.add_comment( + program.offset(), + "don't emit group columns if continuing existing group", + ); + program.emit_insn_with_label_dependency( + Insn::If { + target_pc: accumulator_indicator_set_true_label, + reg: data_in_accumulator_indicator_register, + null_reg: 0, // unused in this case + }, + accumulator_indicator_set_true_label, + ); + + // Read the group by columns for a finished group + for i in 0..group_by.len() { + let key_reg = group_exprs_start_register + i; + let sorter_column_index = i; + program.emit_insn(Insn::Column { + cursor_id: pseudo_cursor, + column: sorter_column_index, + dest: key_reg, + }); + } + + program.resolve_label(accumulator_indicator_set_true_label, program.offset()); + program.add_comment(program.offset(), "indicate data in accumulator"); + program.emit_insn(Insn::Integer { + value: 1, + dest: data_in_accumulator_indicator_register, + }); + + program.emit_insn_with_label_dependency( + Insn::SorterNext { + cursor_id: group_by_metadata.sort_cursor, + pc_if_next: sort_loop_start_label, + }, + sort_loop_start_label, + ); + + program.resolve_label(grouping_done_label, program.offset()); + + program.add_comment(program.offset(), "emit row for final group"); + program.emit_insn_with_label_dependency( + Insn::Gosub { + target_pc: group_by_metadata.subroutine_accumulator_output_label, + return_reg: group_by_metadata.subroutine_accumulator_output_return_offset_register, + }, + group_by_metadata.subroutine_accumulator_output_label, + ); + + program.add_comment(program.offset(), "group by finished"); + let termination_label = + metadata.termination_label_stack[metadata.termination_label_stack.len() - 2]; + program.emit_insn_with_label_dependency( + Insn::Goto { + target_pc: termination_label, + }, + termination_label, + ); + program.emit_insn(Insn::Integer { + value: 1, + dest: group_by_metadata.abort_flag_register, + }); + program.emit_insn(Insn::Return { + return_reg: group_by_metadata.subroutine_accumulator_output_return_offset_register, + }); + + program.resolve_label( + group_by_metadata.subroutine_accumulator_output_label, + program.offset(), + ); + + program.add_comment(program.offset(), "output group by row subroutine start"); + let termination_label = *metadata.termination_label_stack.last().unwrap(); + program.emit_insn_with_label_dependency( + Insn::IfPos { + reg: group_by_metadata.data_in_accumulator_indicator_register, + target_pc: termination_label, + decrement_by: 0, + }, + termination_label, + ); + program.emit_insn(Insn::Return { + return_reg: group_by_metadata.subroutine_accumulator_output_return_offset_register, + }); + + let agg_start_reg = metadata.aggregation_start_register.unwrap(); + program.resolve_label( + metadata.termination_label_stack.pop().unwrap(), + program.offset(), + ); + for (i, agg) in aggregates.iter().enumerate() { + let agg_result_reg = agg_start_reg + i; + program.emit_insn(Insn::AggFinal { + register: agg_result_reg, + func: agg.func.clone(), + }); + } + + // we now have the group by columns in registers (group_exprs_start_register..group_exprs_start_register + group_by.len() - 1) + // and the agg results in (agg_start_reg..agg_start_reg + aggregates.len() - 1) + // we need to call translate_expr on each result column, but replace the expr with a register copy in case any part of the + // result column expression matches a) a group by column or b) an aggregation result. + let mut precomputed_exprs_to_register = Vec::with_capacity(aggregates.len() + group_by.len()); + for (i, expr) in group_by.iter().enumerate() { + precomputed_exprs_to_register.push((expr, group_exprs_start_register + i)); + } + for (i, agg) in aggregates.iter().enumerate() { + precomputed_exprs_to_register.push((&agg.original_expr, agg_start_reg + i)); + } + + match order_by { + None => { + emit_select_result( + program, + referenced_tables, + result_columns, + Some(&precomputed_exprs_to_register), + limit.map(|l| (l, *metadata.termination_label_stack.last().unwrap())), + )?; + } + Some(order_by) => { + order_by_sorter_insert( + program, + referenced_tables, + order_by, + result_columns, + &mut metadata.result_column_indexes_in_orderby_sorter, + &metadata.sort_metadata.as_ref().unwrap(), + Some(&precomputed_exprs_to_register), + )?; + } + } + + program.emit_insn(Insn::Return { + return_reg: group_by_metadata.subroutine_accumulator_output_return_offset_register, + }); + + program.add_comment(program.offset(), "clear accumulator subroutine start"); + program.resolve_label( + group_by_metadata.subroutine_accumulator_clear_label, + program.offset(), + ); + let start_reg = group_by_metadata.group_exprs_accumulator_register; + program.emit_insn(Insn::Null { + dest: start_reg, + dest_end: Some(start_reg + group_by.len() + aggregates.len() - 1), + }); + + program.emit_insn(Insn::Integer { + value: 0, + dest: group_by_metadata.data_in_accumulator_indicator_register, + }); + program.emit_insn(Insn::Return { + return_reg: group_by_metadata.subroutine_accumulator_clear_return_offset_register, + }); + + Ok(()) +} + +/// Emits the bytecode for processing an aggregate without a GROUP BY clause. +/// This is called when the main query execution loop has finished processing, +/// and we can now materialize the aggregate results. +fn agg_without_group_by_emit( + program: &mut ProgramBuilder, + referenced_tables: &Vec, + result_columns: &Vec, + aggregates: &Vec, + metadata: &mut Metadata, +) -> Result<()> { + let agg_start_reg = metadata.aggregation_start_register.unwrap(); + for (i, agg) in aggregates.iter().enumerate() { + let agg_result_reg = agg_start_reg + i; + program.emit_insn(Insn::AggFinal { + register: agg_result_reg, + func: agg.func.clone(), + }); + } + // we now have the agg results in (agg_start_reg..agg_start_reg + aggregates.len() - 1) + // we need to call translate_expr on each result column, but replace the expr with a register copy in case any part of the + // result column expression matches a) a group by column or b) an aggregation result. + let mut precomputed_exprs_to_register = Vec::with_capacity(aggregates.len()); + for (i, agg) in aggregates.iter().enumerate() { + precomputed_exprs_to_register.push((&agg.original_expr, agg_start_reg + i)); + } + + // This always emits a ResultRow because currently it can only be used for a single row result + // Limit is None because we early exit on limit 0 and the max rows here is 1 + emit_select_result( + program, + referenced_tables, + result_columns, + Some(&precomputed_exprs_to_register), + None, + )?; + + Ok(()) +} + +/// Emits the bytecode for outputting rows from an ORDER BY sorter. +/// This is called when the main query execution loop has finished processing, +/// and we can now emit rows from the ORDER BY sorter. +fn order_by_emit( + program: &mut ProgramBuilder, + order_by: &Vec<(ast::Expr, Direction)>, + result_columns: &Vec, + limit: Option, + metadata: &mut Metadata, +) -> Result<()> { + let sort_loop_start_label = program.allocate_label(); + let sort_loop_end_label = program.allocate_label(); + program.resolve_label( + metadata.termination_label_stack.pop().unwrap(), + program.offset(), + ); + let mut pseudo_columns = vec![]; + for (i, _) in order_by.iter().enumerate() { + pseudo_columns.push(Column { + // Names don't matter. We are tracking which result column is in which position in the ORDER BY clause in m.result_column_indexes_in_orderby_sorter. + name: format!("sort_key_{}", i), + primary_key: false, + ty: crate::schema::Type::Null, + }); + } + for (i, rc) in result_columns.iter().enumerate() { + // If any result columns are not in the ORDER BY sorter, it's because they are equal to a sort key and were already added to the pseudo columns above. + if let Some(ref v) = metadata.result_columns_to_skip_in_orderby_sorter { + if v.contains(&i) { + continue; + } + } + pseudo_columns.push(Column { + name: rc.expr.to_string(), + primary_key: false, + ty: crate::schema::Type::Null, + }); + } + + let num_columns_in_sorter = order_by.len() + result_columns.len() + - metadata + .result_columns_to_skip_in_orderby_sorter + .as_ref() + .map(|v| v.len()) + .unwrap_or(0); + + let pseudo_cursor = program.alloc_cursor_id( + None, + Some(Table::Pseudo(Rc::new(PseudoTable { + columns: pseudo_columns, + }))), + ); + let sort_metadata = metadata.sort_metadata.as_mut().unwrap(); + + program.emit_insn(Insn::OpenPseudo { + cursor_id: pseudo_cursor, + content_reg: sort_metadata.sorter_data_register, + num_fields: num_columns_in_sorter, + }); + + program.emit_insn_with_label_dependency( + Insn::SorterSort { + cursor_id: sort_metadata.sort_cursor, + pc_if_empty: sort_loop_end_label, + }, + sort_loop_end_label, + ); + + program.defer_label_resolution(sort_loop_start_label, program.offset() as usize); + program.emit_insn(Insn::SorterData { + cursor_id: sort_metadata.sort_cursor, + dest_reg: sort_metadata.sorter_data_register, + pseudo_cursor, + }); + + // We emit the columns in SELECT order, not sorter order (sorter always has the sort keys first). + // This is tracked in m.result_column_indexes_in_orderby_sorter. + let cursor_id = pseudo_cursor; + let start_reg = program.alloc_registers(result_columns.len()); + for i in 0..result_columns.len() { + let reg = start_reg + i; + program.emit_insn(Insn::Column { + cursor_id, + column: metadata.result_column_indexes_in_orderby_sorter[&i], + dest: reg, + }); + } + emit_result_row_and_limit( + program, + start_reg, + result_columns.len(), + limit.map(|l| (l, sort_loop_end_label)), + )?; + + program.emit_insn_with_label_dependency( + Insn::SorterNext { + cursor_id: sort_metadata.sort_cursor, + pc_if_next: sort_loop_start_label, + }, + sort_loop_start_label, + ); + + program.resolve_label(sort_loop_end_label, program.offset()); + + Ok(()) +} + +/// Emits the bytecode for: result row and limit. +fn emit_result_row_and_limit( + program: &mut ProgramBuilder, + start_reg: usize, + result_columns_len: usize, + limit: Option<(usize, BranchOffset)>, +) -> Result<()> { + program.emit_insn(Insn::ResultRow { + start_reg, + count: result_columns_len, + }); + if let Some((limit, jump_label_on_limit_reached)) = limit { + let limit_reg = program.alloc_register(); + program.emit_insn(Insn::Integer { + value: limit as i64, + dest: limit_reg, + }); + program.mark_last_insn_constant(); + program.emit_insn_with_label_dependency( + Insn::DecrJumpZero { + reg: limit_reg, + target_pc: jump_label_on_limit_reached, + }, + jump_label_on_limit_reached, + ); + } + Ok(()) +} + +/// Emits the bytecode for: all result columns, result row, and limit. +fn emit_select_result( + program: &mut ProgramBuilder, + referenced_tables: &[BTreeTableReference], + result_columns: &[ResultSetColumn], + precomputed_exprs_to_register: Option<&Vec<(&ast::Expr, usize)>>, + limit: Option<(usize, BranchOffset)>, +) -> Result<()> { + let start_reg = program.alloc_registers(result_columns.len()); + for (i, rc) in result_columns.iter().enumerate() { + let reg = start_reg + i; + translate_expr( + program, + Some(referenced_tables), + &rc.expr, + reg, + precomputed_exprs_to_register, + )?; + } + emit_result_row_and_limit(program, start_reg, result_columns.len(), limit)?; + Ok(()) +} + +/// Emits the bytecode for inserting a row into a sorter. +/// This can be either a GROUP BY sorter or an ORDER BY sorter. +fn sorter_insert( + program: &mut ProgramBuilder, + start_reg: usize, + column_count: usize, + cursor_id: usize, + record_reg: usize, +) { + program.emit_insn(Insn::MakeRecord { + start_reg, + count: column_count, + dest_reg: record_reg, + }); + program.emit_insn(Insn::SorterInsert { + cursor_id, + record_reg, + }); +} + +/// Emits the bytecode for inserting a row into an ORDER BY sorter. +fn order_by_sorter_insert( + program: &mut ProgramBuilder, + referenced_tables: &[BTreeTableReference], + order_by: &Vec<(ast::Expr, Direction)>, + result_columns: &Vec, + result_column_indexes_in_orderby_sorter: &mut HashMap, + sort_metadata: &SortMetadata, + precomputed_exprs_to_register: Option<&Vec<(&ast::Expr, usize)>>, +) -> Result<()> { + let order_by_len = order_by.len(); + // If any result columns can be skipped due to being an exact duplicate of a sort key, we need to know which ones and their new index in the ORDER BY sorter. + let result_columns_to_skip = order_by_deduplicate_result_columns(order_by, result_columns); + let result_columns_to_skip_len = result_columns_to_skip + .as_ref() + .map(|v| v.len()) + .unwrap_or(0); + + // The ORDER BY sorter has the sort keys first, then the result columns. + let orderby_sorter_column_count = + order_by_len + result_columns.len() - result_columns_to_skip_len; + let start_reg = program.alloc_registers(orderby_sorter_column_count); + for (i, (expr, _)) in order_by.iter().enumerate() { + let key_reg = start_reg + i; + translate_expr( + program, + Some(referenced_tables), + expr, + key_reg, + precomputed_exprs_to_register, + )?; + } + let mut cur_reg = start_reg + order_by_len; + let mut cur_idx_in_orderby_sorter = order_by_len; + for (i, rc) in result_columns.iter().enumerate() { + if let Some(ref v) = result_columns_to_skip { + let found = v.iter().find(|(skipped_idx, _)| *skipped_idx == i); + // If the result column is in the list of columns to skip, we need to know its new index in the ORDER BY sorter. + if let Some((_, result_column_idx)) = found { + result_column_indexes_in_orderby_sorter.insert(i, *result_column_idx); + continue; + } + } + translate_expr( + program, + Some(referenced_tables), + &rc.expr, + cur_reg, + precomputed_exprs_to_register, + )?; + result_column_indexes_in_orderby_sorter.insert(i, cur_idx_in_orderby_sorter); + cur_idx_in_orderby_sorter += 1; + cur_reg += 1; + } + + sorter_insert( + program, + start_reg, + orderby_sorter_column_count, + sort_metadata.sort_cursor, + sort_metadata.sorter_data_register, + ); + Ok(()) +} + +/// In case any of the ORDER BY sort keys are exactly equal to a result column, we can skip emitting that result column. +/// If we skip a result column, we need to keep track what index in the ORDER BY sorter the result columns have, +/// because the result columns should be emitted in the SELECT clause order, not the ORDER BY clause order. +/// +/// If any result columns can be skipped, this returns list of 2-tuples of (SkippedResultColumnIndex: usize, ResultColumnIndexInOrderBySorter: usize) +fn order_by_deduplicate_result_columns( + order_by: &Vec<(ast::Expr, Direction)>, + result_columns: &Vec, +) -> Option> { + let mut result_column_remapping: Option> = None; + for (i, rc) in result_columns.iter().enumerate() { + // TODO: implement a custom equality check for expressions + // there are lots of examples where this breaks, even simple ones like + // sum(x) != SUM(x) + let found = order_by + .iter() + .enumerate() + .find(|(_, (expr, _))| expr == &rc.expr); + if let Some((j, _)) = found { + if let Some(ref mut v) = result_column_remapping { + v.push((i, j)); + } else { + result_column_remapping = Some(vec![(i, j)]); + } + } + } + + return result_column_remapping; +} diff --git a/core/translate/expr.rs b/core/translate/expr.rs index fca31b0ae..6c0b4437d 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1,10 +1,9 @@ use sqlite3_parser::ast::{self, UnaryOperator}; -use super::optimizer::CachedResult; #[cfg(feature = "json")] use crate::function::JsonFunc; use crate::function::{AggFunc, Func, FuncCtx, ScalarFunc}; -use crate::schema::{PseudoTable, Table, Type}; +use crate::schema::Type; use crate::util::normalize_ident; use crate::vdbe::{builder::ProgramBuilder, BranchOffset, Insn}; use crate::Result; @@ -22,8 +21,8 @@ pub fn translate_condition_expr( program: &mut ProgramBuilder, referenced_tables: &[BTreeTableReference], expr: &ast::Expr, - cursor_hint: Option, condition_metadata: ConditionMetadata, + precomputed_exprs_to_registers: Option<&Vec<(&ast::Expr, usize)>>, ) -> Result<()> { match expr { ast::Expr::Between { .. } => todo!(), @@ -34,18 +33,18 @@ pub fn translate_condition_expr( program, referenced_tables, lhs, - cursor_hint, ConditionMetadata { jump_if_condition_is_true: false, ..condition_metadata }, + precomputed_exprs_to_registers, ); let _ = translate_condition_expr( program, referenced_tables, rhs, - cursor_hint, condition_metadata, + precomputed_exprs_to_registers, ); } ast::Expr::Binary(lhs, ast::Operator::Or, rhs) => { @@ -54,21 +53,21 @@ pub fn translate_condition_expr( program, referenced_tables, lhs, - cursor_hint, ConditionMetadata { // If the first condition is true, we don't need to evaluate the second condition. jump_if_condition_is_true: true, jump_target_when_false, ..condition_metadata }, + precomputed_exprs_to_registers, ); program.resolve_label(jump_target_when_false, program.offset()); let _ = translate_condition_expr( program, referenced_tables, rhs, - cursor_hint, condition_metadata, + precomputed_exprs_to_registers, ); } ast::Expr::Binary(lhs, op, rhs) => { @@ -78,8 +77,7 @@ pub fn translate_condition_expr( Some(referenced_tables), lhs, lhs_reg, - cursor_hint, - None, + precomputed_exprs_to_registers, ); if let ast::Expr::Literal(_) = lhs.as_ref() { program.mark_last_insn_constant() @@ -90,8 +88,7 @@ pub fn translate_condition_expr( Some(referenced_tables), rhs, rhs_reg, - cursor_hint, - None, + precomputed_exprs_to_registers, ); if let ast::Expr::Literal(_) = rhs.as_ref() { program.mark_last_insn_constant() @@ -339,8 +336,7 @@ pub fn translate_condition_expr( Some(referenced_tables), lhs, lhs_reg, - cursor_hint, - None, + precomputed_exprs_to_registers, )?; let rhs = rhs.as_ref().unwrap(); @@ -369,8 +365,7 @@ pub fn translate_condition_expr( Some(referenced_tables), expr, rhs_reg, - cursor_hint, - None, + precomputed_exprs_to_registers, )?; // If this is not the last condition, we need to jump to the 'jump_target_when_true' label if the condition is true. if !last_condition { @@ -413,8 +408,7 @@ pub fn translate_condition_expr( Some(referenced_tables), expr, rhs_reg, - cursor_hint, - None, + precomputed_exprs_to_registers, )?; program.emit_insn_with_label_dependency( Insn::Eq { @@ -459,8 +453,7 @@ pub fn translate_condition_expr( Some(referenced_tables), lhs, column_reg, - cursor_hint, - None, + precomputed_exprs_to_registers, )?; if let ast::Expr::Literal(_) = lhs.as_ref() { program.mark_last_insn_constant(); @@ -470,8 +463,7 @@ pub fn translate_condition_expr( Some(referenced_tables), rhs, pattern_reg, - cursor_hint, - None, + precomputed_exprs_to_registers, )?; if let ast::Expr::Literal(_) = rhs.as_ref() { program.mark_last_insn_constant(); @@ -543,8 +535,8 @@ pub fn translate_condition_expr( program, referenced_tables, expr, - cursor_hint, condition_metadata, + precomputed_exprs_to_registers, ); } } @@ -553,71 +545,46 @@ pub fn translate_condition_expr( Ok(()) } -pub fn get_cached_or_translate( - program: &mut ProgramBuilder, - referenced_tables: Option<&[BTreeTableReference]>, - expr: &ast::Expr, - cursor_hint: Option, - cached_results: Option<&Vec<&CachedResult>>, -) -> Result { - if let Some(cached_results) = cached_results { - if let Some(cached_result) = cached_results - .iter() - .find(|cached_result| cached_result.source_expr == *expr) - { - return Ok(cached_result.register_idx); - } - } - let reg = program.alloc_register(); - translate_expr( - program, - referenced_tables, - expr, - reg, - cursor_hint, - cached_results, - )?; - Ok(reg) -} - pub fn translate_expr( program: &mut ProgramBuilder, referenced_tables: Option<&[BTreeTableReference]>, expr: &ast::Expr, target_register: usize, - cursor_hint: Option, - cached_results: Option<&Vec<&CachedResult>>, + precomputed_exprs_to_registers: Option<&Vec<(&ast::Expr, usize)>>, ) -> Result { - if let Some(cached_results) = &cached_results { - if let Some(cached_result) = cached_results - .iter() - .find(|cached_result| cached_result.source_expr == *expr) - { - program.emit_insn(Insn::Copy { - src_reg: cached_result.register_idx, - dst_reg: target_register, - amount: 0, - }); - return Ok(target_register); + if let Some(precomputed_exprs_to_registers) = precomputed_exprs_to_registers { + for (precomputed_expr, reg) in precomputed_exprs_to_registers.iter() { + // TODO: implement a custom equality check for expressions + // there are lots of examples where this breaks, even simple ones like + // sum(x) != SUM(x) + if expr == *precomputed_expr { + program.emit_insn(Insn::Copy { + src_reg: *reg, + dst_reg: target_register, + amount: 0, + }); + return Ok(target_register); + } } } - match expr { ast::Expr::Between { .. } => todo!(), ast::Expr::Binary(e1, op, e2) => { - let e1_reg = get_cached_or_translate( + let e1_reg = program.alloc_register(); + translate_expr( program, referenced_tables, e1, - cursor_hint, - cached_results, + e1_reg, + precomputed_exprs_to_registers, )?; - let e2_reg = get_cached_or_translate( + let e2_reg = program.alloc_register(); + translate_expr( program, referenced_tables, e2, - cursor_hint, - cached_results, + e2_reg, + precomputed_exprs_to_registers, )?; match op { @@ -740,8 +707,7 @@ pub fn translate_expr( referenced_tables, expr, reg_expr, - cursor_hint, - cached_results, + precomputed_exprs_to_registers, )?; let reg_type = program.alloc_register(); program.emit_insn(Insn::String8 { @@ -813,8 +779,7 @@ pub fn translate_expr( referenced_tables, &args[0], regs, - cursor_hint, - cached_results, + precomputed_exprs_to_registers, )?; program.emit_insn(Insn::Function { constant_mask: 0, @@ -840,8 +805,7 @@ pub fn translate_expr( referenced_tables, arg, reg, - cursor_hint, - cached_results, + precomputed_exprs_to_registers, )?; } @@ -878,8 +842,7 @@ pub fn translate_expr( referenced_tables, arg, target_register, - cursor_hint, - cached_results, + precomputed_exprs_to_registers, )?; if index < args.len() - 1 { program.emit_insn_with_label_dependency( @@ -914,8 +877,7 @@ pub fn translate_expr( referenced_tables, arg, reg, - cursor_hint, - cached_results, + precomputed_exprs_to_registers, )?; } program.emit_insn(Insn::Function { @@ -947,8 +909,7 @@ pub fn translate_expr( referenced_tables, arg, reg, - cursor_hint, - cached_results, + precomputed_exprs_to_registers, )?; } program.emit_insn(Insn::Function { @@ -984,8 +945,7 @@ pub fn translate_expr( referenced_tables, &args[0], temp_reg, - cursor_hint, - cached_results, + precomputed_exprs_to_registers, )?; program.emit_insn(Insn::NotNull { reg: temp_reg, @@ -997,8 +957,7 @@ pub fn translate_expr( referenced_tables, &args[1], temp_reg, - cursor_hint, - cached_results, + precomputed_exprs_to_registers, )?; program.emit_insn(Insn::Copy { src_reg: temp_reg, @@ -1030,8 +989,7 @@ pub fn translate_expr( referenced_tables, arg, reg, - cursor_hint, - cached_results, + precomputed_exprs_to_registers, )?; if let ast::Expr::Literal(_) = arg { program.mark_last_insn_constant() @@ -1078,8 +1036,7 @@ pub fn translate_expr( referenced_tables, &args[0], regs, - cursor_hint, - cached_results, + precomputed_exprs_to_registers, )?; program.emit_insn(Insn::Function { constant_mask: 0, @@ -1115,8 +1072,7 @@ pub fn translate_expr( referenced_tables, arg, target_reg, - cursor_hint, - cached_results, + precomputed_exprs_to_registers, )?; } } @@ -1153,16 +1109,14 @@ pub fn translate_expr( referenced_tables, &args[0], str_reg, - cursor_hint, - cached_results, + precomputed_exprs_to_registers, )?; translate_expr( program, referenced_tables, &args[1], start_reg, - cursor_hint, - cached_results, + precomputed_exprs_to_registers, )?; if args.len() == 3 { translate_expr( @@ -1170,8 +1124,7 @@ pub fn translate_expr( referenced_tables, &args[2], length_reg, - cursor_hint, - cached_results, + precomputed_exprs_to_registers, )?; } @@ -1200,8 +1153,7 @@ pub fn translate_expr( referenced_tables, &args[0], regs, - cursor_hint, - cached_results, + precomputed_exprs_to_registers, )?; program.emit_insn(Insn::Function { constant_mask: 0, @@ -1224,8 +1176,7 @@ pub fn translate_expr( referenced_tables, &args[0], arg_reg, - cursor_hint, - cached_results, + precomputed_exprs_to_registers, )?; start_reg = arg_reg; } @@ -1249,8 +1200,7 @@ pub fn translate_expr( referenced_tables, arg, target_reg, - cursor_hint, - cached_results, + precomputed_exprs_to_registers, )?; } } @@ -1289,8 +1239,7 @@ pub fn translate_expr( referenced_tables, arg, reg, - cursor_hint, - cached_results, + precomputed_exprs_to_registers, )?; if let ast::Expr::Literal(_) = arg { program.mark_last_insn_constant(); @@ -1322,8 +1271,7 @@ pub fn translate_expr( referenced_tables, arg, reg, - cursor_hint, - cached_results, + precomputed_exprs_to_registers, )?; if let ast::Expr::Literal(_) = arg { program.mark_last_insn_constant() @@ -1356,8 +1304,7 @@ pub fn translate_expr( referenced_tables, arg, reg, - cursor_hint, - cached_results, + precomputed_exprs_to_registers, )?; if let ast::Expr::Literal(_) = arg { program.mark_last_insn_constant() @@ -1394,8 +1341,7 @@ pub fn translate_expr( referenced_tables, &args[0], first_reg, - cursor_hint, - cached_results, + precomputed_exprs_to_registers, )?; let second_reg = program.alloc_register(); translate_expr( @@ -1403,8 +1349,7 @@ pub fn translate_expr( referenced_tables, &args[1], second_reg, - cursor_hint, - cached_results, + precomputed_exprs_to_registers, )?; program.emit_insn(Insn::Function { constant_mask: 0, @@ -1440,23 +1385,29 @@ pub fn translate_expr( } } ast::Expr::FunctionCallStar { .. } => todo!(), - ast::Expr::Id(ident) => { - // let (idx, col) = table.unwrap().get_column(&ident.0).unwrap(); - let (idx, col_type, cursor_id, is_rowid_alias) = - resolve_ident_table(program, &ident.0, referenced_tables, cursor_hint)?; - if is_rowid_alias { + ast::Expr::Id(_) => unreachable!("Id should be resolved to a Column before translation"), + ast::Expr::Column { + database: _, + table, + column, + is_rowid_alias: is_primary_key, + } => { + let tbl_ref = referenced_tables.as_ref().unwrap().get(*table).unwrap(); + let cursor_id = program.resolve_cursor_id(&tbl_ref.table_identifier); + if *is_primary_key { program.emit_insn(Insn::RowId { cursor_id, dest: target_register, }); } else { program.emit_insn(Insn::Column { - column: idx, - dest: target_register, cursor_id, + column: *column, + dest: target_register, }); } - maybe_apply_affinity(col_type, target_register, program); + let column = &tbl_ref.table.columns[*column]; + maybe_apply_affinity(column.ty, target_register, program); Ok(target_register) } ast::Expr::InList { .. } => todo!(), @@ -1529,8 +1480,7 @@ pub fn translate_expr( referenced_tables, &exprs[0], target_register, - cursor_hint, - cached_results, + precomputed_exprs_to_registers, )?; } else { // Parenthesized expressions with multiple arguments are reserved for special cases @@ -1539,28 +1489,8 @@ pub fn translate_expr( } Ok(target_register) } - ast::Expr::Qualified(tbl, ident) => { - let (idx, col_type, cursor_id, is_primary_key) = resolve_ident_qualified( - program, - &tbl.0, - &ident.0, - referenced_tables.unwrap(), - cursor_hint, - )?; - if is_primary_key { - program.emit_insn(Insn::RowId { - cursor_id, - dest: target_register, - }); - } else { - program.emit_insn(Insn::Column { - column: idx, - dest: target_register, - cursor_id, - }); - } - maybe_apply_affinity(col_type, target_register, program); - Ok(target_register) + ast::Expr::Qualified(_, _) => { + unreachable!("Qualified should be resolved to a Column before translation") } ast::Expr::Raise(_, _) => todo!(), ast::Expr::Subquery(_) => todo!(), @@ -1604,125 +1534,6 @@ fn wrap_eval_jump_expr( program.preassign_label_to_next_insn(if_true_label); } -pub fn resolve_ident_qualified( - program: &ProgramBuilder, - table_name: &str, - ident: &str, - referenced_tables: &[BTreeTableReference], - cursor_hint: Option, -) -> Result<(usize, Type, usize, bool)> { - let ident = normalize_ident(ident); - let table_name = normalize_ident(table_name); - for table_reference in referenced_tables.iter() { - if table_reference.table_identifier == table_name { - let res = table_reference - .table - .columns - .iter() - .enumerate() - .find(|(_, col)| col.name == *ident) - .map(|(idx, col)| (idx, col.ty, col.primary_key)); - let mut idx; - let mut col_type; - let mut is_primary_key; - if res.is_some() { - (idx, col_type, is_primary_key) = res.unwrap(); - // overwrite if cursor hint is provided - if let Some(cursor_hint) = cursor_hint { - let cols = &program.cursor_ref[cursor_hint].1; - if let Some(res) = cols.as_ref().and_then(|res| { - res.columns() - .iter() - .enumerate() - .find(|x| x.1.name == format!("{}.{}", table_name, ident)) - }) { - idx = res.0; - col_type = res.1.ty; - is_primary_key = res.1.primary_key; - } - } - let cursor_id = - program.resolve_cursor_id(&table_reference.table_identifier, cursor_hint); - return Ok((idx, col_type, cursor_id, is_primary_key)); - } - } - } - crate::bail_parse_error!( - "column with qualified name {}.{} not found", - table_name, - ident - ); -} - -pub fn resolve_ident_table( - program: &ProgramBuilder, - ident: &str, - referenced_tables: Option<&[BTreeTableReference]>, - cursor_hint: Option, -) -> Result<(usize, Type, usize, bool)> { - let ident = normalize_ident(ident); - let mut found = Vec::new(); - for table_reference in referenced_tables.unwrap() { - let res = table_reference - .table - .columns - .iter() - .enumerate() - .find(|(_, col)| col.name == *ident) - .map(|(idx, col)| { - ( - idx, - col.ty, - table_reference.table.column_is_rowid_alias(col), - ) - }); - let mut idx; - let mut col_type; - let mut is_rowid_alias; - if res.is_some() { - (idx, col_type, is_rowid_alias) = res.unwrap(); - // overwrite if cursor hint is provided - if let Some(cursor_hint) = cursor_hint { - let cols = &program.cursor_ref[cursor_hint].1; - if let Some(res) = cols.as_ref().and_then(|res| { - res.columns() - .iter() - .enumerate() - .find(|x| x.1.name == *ident) - }) { - idx = res.0; - col_type = res.1.ty; - is_rowid_alias = table_reference.table.column_is_rowid_alias(res.1); - } - } - let cursor_id = - program.resolve_cursor_id(&table_reference.table_identifier, cursor_hint); - found.push((idx, col_type, cursor_id, is_rowid_alias)); - } - } - if found.len() == 1 { - return Ok(found[0]); - } - if found.is_empty() { - crate::bail_parse_error!("column with name {} not found", ident.as_str()); - } - - crate::bail_parse_error!("ambiguous column name {}", ident.as_str()); -} - -pub fn resolve_ident_pseudo_table(ident: &String, pseudo_table: &PseudoTable) -> Result { - let res = pseudo_table - .columns - .iter() - .enumerate() - .find(|(_, col)| col.name == *ident); - if res.is_some() { - let (idx, _) = res.unwrap(); - return Ok(idx); - } - crate::bail_parse_error!("column with name {} not found", ident.as_str()); -} - pub fn maybe_apply_affinity(col_type: Type, target_register: usize, program: &mut ProgramBuilder) { if col_type == crate::schema::Type::Real { program.emit_insn(Insn::RealAffinity { @@ -1731,41 +1542,11 @@ pub fn maybe_apply_affinity(col_type: Type, target_register: usize, program: &mu } } -pub fn translate_table_columns( - program: &mut ProgramBuilder, - cursor_id: usize, - table: &Table, - start_column_offset: usize, - start_reg: usize, -) -> usize { - let mut cur_reg = start_reg; - for i in start_column_offset..table.columns().len() { - let is_rowid = table.column_is_rowid_alias(table.get_column_at(i)); - let col_type = &table.get_column_at(i).ty; - if is_rowid { - program.emit_insn(Insn::RowId { - cursor_id, - dest: cur_reg, - }); - } else { - program.emit_insn(Insn::Column { - cursor_id, - column: i, - dest: cur_reg, - }); - } - maybe_apply_affinity(*col_type, cur_reg, program); - cur_reg += 1; - } - cur_reg -} - pub fn translate_aggregation( program: &mut ProgramBuilder, referenced_tables: &[BTreeTableReference], agg: &Aggregate, target_register: usize, - cursor_hint: Option, ) -> Result { let dest = match agg.func { AggFunc::Avg => { @@ -1774,14 +1555,7 @@ pub fn translate_aggregation( } let expr = &agg.args[0]; let expr_reg = program.alloc_register(); - let _ = translate_expr( - program, - Some(referenced_tables), - expr, - expr_reg, - cursor_hint, - None, - )?; + let _ = translate_expr(program, Some(referenced_tables), expr, expr_reg, None)?; program.emit_insn(Insn::AggStep { acc_reg: target_register, col: expr_reg, @@ -1796,14 +1570,7 @@ pub fn translate_aggregation( } else { let expr = &agg.args[0]; let expr_reg = program.alloc_register(); - let _ = translate_expr( - program, - Some(referenced_tables), - expr, - expr_reg, - cursor_hint, - None, - ); + let _ = translate_expr(program, Some(referenced_tables), expr, expr_reg, None); expr_reg }; program.emit_insn(Insn::AggStep { @@ -1827,13 +1594,8 @@ pub fn translate_aggregation( if agg.args.len() == 2 { match &agg.args[1] { - ast::Expr::Id(ident) => { - if ident.0.starts_with('"') { - delimiter_expr = - ast::Expr::Literal(ast::Literal::String(ident.0.to_string())); - } else { - delimiter_expr = agg.args[1].clone(); - } + ast::Expr::Column { .. } => { + delimiter_expr = agg.args[1].clone(); } ast::Expr::Literal(ast::Literal::String(s)) => { delimiter_expr = ast::Expr::Literal(ast::Literal::String(s.to_string())); @@ -1844,20 +1606,12 @@ pub fn translate_aggregation( delimiter_expr = ast::Expr::Literal(ast::Literal::String(String::from("\",\""))); } - translate_expr( - program, - Some(referenced_tables), - expr, - expr_reg, - cursor_hint, - None, - )?; + translate_expr(program, Some(referenced_tables), expr, expr_reg, None)?; translate_expr( program, Some(referenced_tables), &delimiter_expr, delimiter_reg, - cursor_hint, None, )?; @@ -1876,14 +1630,7 @@ pub fn translate_aggregation( } let expr = &agg.args[0]; let expr_reg = program.alloc_register(); - let _ = translate_expr( - program, - Some(referenced_tables), - expr, - expr_reg, - cursor_hint, - None, - )?; + let _ = translate_expr(program, Some(referenced_tables), expr, expr_reg, None)?; program.emit_insn(Insn::AggStep { acc_reg: target_register, col: expr_reg, @@ -1898,14 +1645,7 @@ pub fn translate_aggregation( } let expr = &agg.args[0]; let expr_reg = program.alloc_register(); - let _ = translate_expr( - program, - Some(referenced_tables), - expr, - expr_reg, - cursor_hint, - None, - )?; + let _ = translate_expr(program, Some(referenced_tables), expr, expr_reg, None)?; program.emit_insn(Insn::AggStep { acc_reg: target_register, col: expr_reg, @@ -1926,12 +1666,8 @@ pub fn translate_aggregation( let delimiter_expr: ast::Expr; match &agg.args[1] { - ast::Expr::Id(ident) => { - if ident.0.starts_with('"') { - crate::bail_parse_error!("no such column: \",\" - should this be a string literal in single-quotes?"); - } else { - delimiter_expr = agg.args[1].clone(); - } + ast::Expr::Column { .. } => { + delimiter_expr = agg.args[1].clone(); } ast::Expr::Literal(ast::Literal::String(s)) => { delimiter_expr = ast::Expr::Literal(ast::Literal::String(s.to_string())); @@ -1939,20 +1675,12 @@ pub fn translate_aggregation( _ => crate::bail_parse_error!("Incorrect delimiter parameter"), }; - translate_expr( - program, - Some(referenced_tables), - expr, - expr_reg, - cursor_hint, - None, - )?; + translate_expr(program, Some(referenced_tables), expr, expr_reg, None)?; translate_expr( program, Some(referenced_tables), &delimiter_expr, delimiter_reg, - cursor_hint, None, )?; @@ -1971,14 +1699,7 @@ pub fn translate_aggregation( } let expr = &agg.args[0]; let expr_reg = program.alloc_register(); - let _ = translate_expr( - program, - Some(referenced_tables), - expr, - expr_reg, - cursor_hint, - None, - )?; + let _ = translate_expr(program, Some(referenced_tables), expr, expr_reg, None)?; program.emit_insn(Insn::AggStep { acc_reg: target_register, col: expr_reg, @@ -1993,14 +1714,188 @@ pub fn translate_aggregation( } let expr = &agg.args[0]; let expr_reg = program.alloc_register(); - let _ = translate_expr( - program, - Some(referenced_tables), - expr, - expr_reg, - cursor_hint, - None, - )?; + let _ = translate_expr(program, Some(referenced_tables), expr, expr_reg, None)?; + program.emit_insn(Insn::AggStep { + acc_reg: target_register, + col: expr_reg, + delimiter: 0, + func: AggFunc::Total, + }); + target_register + } + }; + Ok(dest) +} + +pub fn translate_aggregation_groupby( + program: &mut ProgramBuilder, + referenced_tables: &[BTreeTableReference], + group_by_sorter_cursor_id: usize, + cursor_index: usize, + agg: &Aggregate, + target_register: usize, +) -> Result { + let emit_column = |program: &mut ProgramBuilder, expr_reg: usize| { + program.emit_insn(Insn::Column { + cursor_id: group_by_sorter_cursor_id, + column: cursor_index, + dest: expr_reg, + }); + }; + let dest = match agg.func { + AggFunc::Avg => { + if agg.args.len() != 1 { + crate::bail_parse_error!("avg bad number of arguments"); + } + let expr_reg = program.alloc_register(); + emit_column(program, expr_reg); + program.emit_insn(Insn::AggStep { + acc_reg: target_register, + col: expr_reg, + delimiter: 0, + func: AggFunc::Avg, + }); + target_register + } + AggFunc::Count => { + let expr_reg = program.alloc_register(); + emit_column(program, expr_reg); + program.emit_insn(Insn::AggStep { + acc_reg: target_register, + col: expr_reg, + delimiter: 0, + func: AggFunc::Count, + }); + target_register + } + AggFunc::GroupConcat => { + if agg.args.len() != 1 && agg.args.len() != 2 { + crate::bail_parse_error!("group_concat bad number of arguments"); + } + + let expr_reg = program.alloc_register(); + let delimiter_reg = program.alloc_register(); + + let delimiter_expr: ast::Expr; + + if agg.args.len() == 2 { + match &agg.args[1] { + ast::Expr::Column { .. } => { + delimiter_expr = agg.args[1].clone(); + } + ast::Expr::Literal(ast::Literal::String(s)) => { + delimiter_expr = ast::Expr::Literal(ast::Literal::String(s.to_string())); + } + _ => crate::bail_parse_error!("Incorrect delimiter parameter"), + }; + } else { + delimiter_expr = ast::Expr::Literal(ast::Literal::String(String::from("\",\""))); + } + + emit_column(program, expr_reg); + translate_expr( + program, + Some(referenced_tables), + &delimiter_expr, + delimiter_reg, + None, + )?; + + program.emit_insn(Insn::AggStep { + acc_reg: target_register, + col: expr_reg, + delimiter: delimiter_reg, + func: AggFunc::GroupConcat, + }); + + target_register + } + AggFunc::Max => { + if agg.args.len() != 1 { + crate::bail_parse_error!("max bad number of arguments"); + } + let expr_reg = program.alloc_register(); + emit_column(program, expr_reg); + program.emit_insn(Insn::AggStep { + acc_reg: target_register, + col: expr_reg, + delimiter: 0, + func: AggFunc::Max, + }); + target_register + } + AggFunc::Min => { + if agg.args.len() != 1 { + crate::bail_parse_error!("min bad number of arguments"); + } + let expr_reg = program.alloc_register(); + emit_column(program, expr_reg); + program.emit_insn(Insn::AggStep { + acc_reg: target_register, + col: expr_reg, + delimiter: 0, + func: AggFunc::Min, + }); + target_register + } + AggFunc::StringAgg => { + if agg.args.len() != 2 { + crate::bail_parse_error!("string_agg bad number of arguments"); + } + + let expr_reg = program.alloc_register(); + let delimiter_reg = program.alloc_register(); + + let delimiter_expr: ast::Expr; + + match &agg.args[1] { + ast::Expr::Column { .. } => { + delimiter_expr = agg.args[1].clone(); + } + ast::Expr::Literal(ast::Literal::String(s)) => { + delimiter_expr = ast::Expr::Literal(ast::Literal::String(s.to_string())); + } + _ => crate::bail_parse_error!("Incorrect delimiter parameter"), + }; + + emit_column(program, expr_reg); + translate_expr( + program, + Some(referenced_tables), + &delimiter_expr, + delimiter_reg, + None, + )?; + + program.emit_insn(Insn::AggStep { + acc_reg: target_register, + col: expr_reg, + delimiter: delimiter_reg, + func: AggFunc::StringAgg, + }); + + target_register + } + AggFunc::Sum => { + if agg.args.len() != 1 { + crate::bail_parse_error!("sum bad number of arguments"); + } + let expr_reg = program.alloc_register(); + emit_column(program, expr_reg); + program.emit_insn(Insn::AggStep { + acc_reg: target_register, + col: expr_reg, + delimiter: 0, + func: AggFunc::Sum, + }); + target_register + } + AggFunc::Total => { + if agg.args.len() != 1 { + crate::bail_parse_error!("total bad number of arguments"); + } + let expr_reg = program.alloc_register(); + emit_column(program, expr_reg); program.emit_insn(Insn::AggStep { acc_reg: target_register, col: expr_reg, diff --git a/core/translate/insert.rs b/core/translate/insert.rs index ea890e994..614cde8b2 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -98,7 +98,6 @@ pub fn translate_insert( expr, column_registers_start + col, None, - None, )?; } program.emit_insn(Insn::Yield { diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 833943fe3..307df356b 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -1,12 +1,12 @@ -use std::{collections::HashMap, rc::Rc}; +use std::rc::Rc; use sqlite3_parser::ast; -use crate::{schema::Index, util::normalize_ident, Result}; +use crate::{schema::Index, Result}; use super::plan::{ get_table_ref_bitmask_for_ast_expr, get_table_ref_bitmask_for_operator, BTreeTableReference, - Direction, IterationDirection, Operator, Plan, ProjectionColumn, Search, + Direction, IterationDirection, Plan, Search, SourceOperator, }; /** @@ -14,115 +14,106 @@ use super::plan::{ * TODO: these could probably be done in less passes, * but having them separate makes them easier to understand */ -pub fn optimize_plan(mut select_plan: Plan) -> Result<(Plan, ExpressionResultCache)> { - let mut expr_result_cache = ExpressionResultCache::new(); +pub fn optimize_plan(mut select_plan: Plan) -> Result { push_predicates( - &mut select_plan.root_operator, + &mut select_plan.source, + &mut select_plan.where_clause, &select_plan.referenced_tables, )?; - if eliminate_constants(&mut select_plan.root_operator)? - == ConstantConditionEliminationResult::ImpossibleCondition - { - return Ok(( - Plan { - root_operator: Operator::Nothing, - referenced_tables: vec![], - available_indexes: vec![], - }, - expr_result_cache, - )); - } + eliminate_constants(&mut select_plan.source)?; use_indexes( - &mut select_plan.root_operator, + &mut select_plan.source, &select_plan.referenced_tables, &select_plan.available_indexes, )?; eliminate_unnecessary_orderby( - &mut select_plan.root_operator, + &mut select_plan.source, + &mut select_plan.order_by, + &select_plan.referenced_tables, &select_plan.available_indexes, )?; - find_shared_expressions_in_child_operators_and_mark_them_so_that_the_parent_operator_doesnt_recompute_them(&select_plan.root_operator, &mut expr_result_cache); - Ok((select_plan, expr_result_cache)) + Ok(select_plan) } fn _operator_is_already_ordered_by( - operator: &mut Operator, + operator: &mut SourceOperator, key: &mut ast::Expr, + referenced_tables: &[BTreeTableReference], available_indexes: &Vec>, ) -> Result { match operator { - Operator::Scan { + SourceOperator::Scan { table_reference, .. - } => Ok(key.is_primary_key_of(table_reference)), - Operator::Search { + } => Ok(key.is_rowid_alias_of(table_reference.table_index)), + SourceOperator::Search { table_reference, search, .. } => match search { - Search::PrimaryKeyEq { .. } => Ok(key.is_primary_key_of(table_reference)), - Search::PrimaryKeySearch { .. } => Ok(key.is_primary_key_of(table_reference)), + Search::PrimaryKeyEq { .. } => Ok(key.is_rowid_alias_of(table_reference.table_index)), + Search::PrimaryKeySearch { .. } => { + Ok(key.is_rowid_alias_of(table_reference.table_index)) + } Search::IndexSearch { index, .. } => { - let index_idx = key.check_index_scan(table_reference, available_indexes)?; + let index_idx = key.check_index_scan( + table_reference.table_index, + referenced_tables, + available_indexes, + )?; let index_is_the_same = index_idx .map(|i| Rc::ptr_eq(&available_indexes[i], index)) .unwrap_or(false); Ok(index_is_the_same) } }, - Operator::Join { left, .. } => { - _operator_is_already_ordered_by(left, key, available_indexes) - } - Operator::Aggregate { source, .. } => { - _operator_is_already_ordered_by(source, key, available_indexes) - } - Operator::Projection { source, .. } => { - _operator_is_already_ordered_by(source, key, available_indexes) + SourceOperator::Join { left, .. } => { + _operator_is_already_ordered_by(left, key, referenced_tables, available_indexes) } _ => Ok(false), } } fn eliminate_unnecessary_orderby( - operator: &mut Operator, + operator: &mut SourceOperator, + order_by: &mut Option>, + referenced_tables: &[BTreeTableReference], available_indexes: &Vec>, ) -> Result<()> { - match operator { - Operator::Order { source, key, .. } => { - if key.len() != 1 { - // TODO: handle multiple order by keys - return Ok(()); - } - - let (key, direction) = key.first_mut().unwrap(); - - let already_ordered = _operator_is_already_ordered_by(source, key, available_indexes)?; - - if already_ordered { - push_scan_direction(source, direction); - - *operator = source.take_ownership(); - } - Ok(()) - } - Operator::Limit { source, .. } => { - eliminate_unnecessary_orderby(source, available_indexes)?; - Ok(()) - } - _ => Ok(()), + if order_by.is_none() { + return Ok(()); } + + let o = order_by.as_mut().unwrap(); + + if o.len() != 1 { + // TODO: handle multiple order by keys + return Ok(()); + } + + let (key, direction) = o.first_mut().unwrap(); + + let already_ordered = + _operator_is_already_ordered_by(operator, key, referenced_tables, available_indexes)?; + + if already_ordered { + push_scan_direction(operator, direction); + *order_by = None; + } + + Ok(()) } /** * Use indexes where possible */ fn use_indexes( - operator: &mut Operator, + operator: &mut SourceOperator, referenced_tables: &[BTreeTableReference], available_indexes: &[Rc], ) -> Result<()> { match operator { - Operator::Search { .. } => Ok(()), - Operator::Scan { + SourceOperator::Search { .. } => Ok(()), + SourceOperator::Scan { table_reference, predicates: filter, id, @@ -135,25 +126,29 @@ fn use_indexes( let fs = filter.as_mut().unwrap(); for i in 0..fs.len() { let f = fs[i].take_ownership(); - let table_ref = referenced_tables + let table_index = referenced_tables .iter() - .find(|t| { + .position(|t| { Rc::ptr_eq(&t.table, &table_reference.table) && t.table_identifier == table_reference.table_identifier }) .unwrap(); - match try_extract_index_search_expression(f, table_ref, available_indexes)? { + match try_extract_index_search_expression( + f, + table_index, + referenced_tables, + available_indexes, + )? { Either::Left(non_index_using_expr) => { fs[i] = non_index_using_expr; } Either::Right(index_search) => { fs.remove(i); - *operator = Operator::Search { + *operator = SourceOperator::Search { id: *id, - table_reference: table_ref.clone(), + table_reference: table_reference.clone(), predicates: Some(fs.clone()), search: index_search, - step: 0, }; return Ok(()); @@ -163,32 +158,12 @@ fn use_indexes( Ok(()) } - Operator::Aggregate { source, .. } => { - use_indexes(source, referenced_tables, available_indexes)?; - Ok(()) - } - Operator::Filter { source, .. } => { - use_indexes(source, referenced_tables, available_indexes)?; - Ok(()) - } - Operator::Limit { source, .. } => { - use_indexes(source, referenced_tables, available_indexes)?; - Ok(()) - } - Operator::Join { left, right, .. } => { + SourceOperator::Join { left, right, .. } => { use_indexes(left, referenced_tables, available_indexes)?; use_indexes(right, referenced_tables, available_indexes)?; Ok(()) } - Operator::Order { source, .. } => { - use_indexes(source, referenced_tables, available_indexes)?; - Ok(()) - } - Operator::Projection { source, .. } => { - use_indexes(source, referenced_tables, available_indexes)?; - Ok(()) - } - Operator::Nothing => Ok(()), + SourceOperator::Nothing => Ok(()), } } @@ -200,33 +175,11 @@ enum ConstantConditionEliminationResult { // removes predicates that are always true // returns a ConstantEliminationResult indicating whether any predicates are always false -fn eliminate_constants(operator: &mut Operator) -> Result { +fn eliminate_constants( + operator: &mut SourceOperator, +) -> Result { match operator { - Operator::Filter { - source, predicates, .. - } => { - let mut i = 0; - while i < predicates.len() { - let predicate = &predicates[i]; - if predicate.is_always_true()? { - predicates.remove(i); - } else if predicate.is_always_false()? { - return Ok(ConstantConditionEliminationResult::ImpossibleCondition); - } else { - i += 1; - } - } - - if predicates.is_empty() { - *operator = source.take_ownership(); - eliminate_constants(operator)?; - } else { - eliminate_constants(source)?; - } - - Ok(ConstantConditionEliminationResult::Continue) - } - Operator::Join { + SourceOperator::Join { left, right, predicates, @@ -264,44 +217,7 @@ fn eliminate_constants(operator: &mut Operator) -> Result { - if eliminate_constants(source)? - == ConstantConditionEliminationResult::ImpossibleCondition - { - *source = Box::new(Operator::Nothing); - } - // Aggregation operator can return a row even if the source is empty e.g. count(1) from users where 0 - Ok(ConstantConditionEliminationResult::Continue) - } - Operator::Limit { source, .. } => { - let constant_elimination_result = eliminate_constants(source)?; - if constant_elimination_result - == ConstantConditionEliminationResult::ImpossibleCondition - { - *operator = Operator::Nothing; - } - Ok(constant_elimination_result) - } - Operator::Order { source, .. } => { - if eliminate_constants(source)? - == ConstantConditionEliminationResult::ImpossibleCondition - { - *operator = Operator::Nothing; - return Ok(ConstantConditionEliminationResult::ImpossibleCondition); - } - Ok(ConstantConditionEliminationResult::Continue) - } - Operator::Projection { source, .. } => { - if eliminate_constants(source)? - == ConstantConditionEliminationResult::ImpossibleCondition - { - *operator = Operator::Nothing; - return Ok(ConstantConditionEliminationResult::ImpossibleCondition); - } - - Ok(ConstantConditionEliminationResult::Continue) - } - Operator::Scan { predicates, .. } => { + SourceOperator::Scan { predicates, .. } => { if let Some(ps) = predicates { let mut i = 0; while i < ps.len() { @@ -321,7 +237,7 @@ fn eliminate_constants(operator: &mut Operator) -> Result { + SourceOperator::Search { predicates, .. } => { if let Some(predicates) = predicates { let mut i = 0; while i < predicates.len() { @@ -338,7 +254,7 @@ fn eliminate_constants(operator: &mut Operator) -> Result Ok(ConstantConditionEliminationResult::Continue), + SourceOperator::Nothing => Ok(ConstantConditionEliminationResult::Continue), } } @@ -346,42 +262,35 @@ fn eliminate_constants(operator: &mut Operator) -> Result>, referenced_tables: &Vec, ) -> Result<()> { - match operator { - Operator::Filter { - source, predicates, .. - } => { - let mut i = 0; - while i < predicates.len() { - // try to push the predicate to the source - // if it succeeds, remove the predicate from the filter - let predicate_owned = predicates[i].take_ownership(); - let Some(predicate) = push_predicate(source, predicate_owned, referenced_tables)? - else { - predicates.remove(i); - continue; - }; - predicates[i] = predicate; - i += 1; - } - - if predicates.is_empty() { - *operator = source.take_ownership(); - } - - Ok(()) + if let Some(predicates) = where_clause { + let mut i = 0; + while i < predicates.len() { + let predicate = predicates[i].take_ownership(); + let Some(predicate) = push_predicate(operator, predicate, referenced_tables)? else { + predicates.remove(i); + continue; + }; + predicates[i] = predicate; + i += 1; } - Operator::Join { + if predicates.is_empty() { + *where_clause = None; + } + } + match operator { + SourceOperator::Join { left, right, predicates, outer, .. } => { - push_predicates(left, referenced_tables)?; - push_predicates(right, referenced_tables)?; + push_predicates(left, where_clause, referenced_tables)?; + push_predicates(right, where_clause, referenced_tables)?; if predicates.is_none() { return Ok(()); @@ -419,26 +328,9 @@ fn push_predicates( Ok(()) } - Operator::Aggregate { source, .. } => { - push_predicates(source, referenced_tables)?; - - Ok(()) - } - Operator::Limit { source, .. } => { - push_predicates(source, referenced_tables)?; - Ok(()) - } - Operator::Order { source, .. } => { - push_predicates(source, referenced_tables)?; - Ok(()) - } - Operator::Projection { source, .. } => { - push_predicates(source, referenced_tables)?; - Ok(()) - } - Operator::Scan { .. } => Ok(()), - Operator::Search { .. } => Ok(()), - Operator::Nothing => Ok(()), + SourceOperator::Scan { .. } => Ok(()), + SourceOperator::Search { .. } => Ok(()), + SourceOperator::Nothing => Ok(()), } } @@ -447,12 +339,12 @@ fn push_predicates( Returns Ok(None) if the predicate was pushed, otherwise returns itself as Ok(Some(predicate)) */ fn push_predicate( - operator: &mut Operator, + operator: &mut SourceOperator, predicate: ast::Expr, referenced_tables: &Vec, ) -> Result> { match operator { - Operator::Scan { + SourceOperator::Scan { predicates, table_reference, .. @@ -483,22 +375,8 @@ fn push_predicate( Ok(None) } - Operator::Search { .. } => Ok(Some(predicate)), - Operator::Filter { - source, - predicates: ps, - .. - } => { - let push_result = push_predicate(source, predicate, referenced_tables)?; - if push_result.is_none() { - return Ok(None); - } - - ps.push(push_result.unwrap()); - - Ok(None) - } - Operator::Join { + SourceOperator::Search { .. } => Ok(Some(predicate)), + SourceOperator::Join { left, right, predicates: join_on_preds, @@ -538,46 +416,13 @@ fn push_predicate( Ok(None) } - Operator::Aggregate { source, .. } => { - let push_result = push_predicate(source, predicate, referenced_tables)?; - if push_result.is_none() { - return Ok(None); - } - - Ok(Some(push_result.unwrap())) - } - Operator::Limit { source, .. } => { - let push_result = push_predicate(source, predicate, referenced_tables)?; - if push_result.is_none() { - return Ok(None); - } - - Ok(Some(push_result.unwrap())) - } - Operator::Order { source, .. } => { - let push_result = push_predicate(source, predicate, referenced_tables)?; - if push_result.is_none() { - return Ok(None); - } - - Ok(Some(push_result.unwrap())) - } - Operator::Projection { source, .. } => { - let push_result = push_predicate(source, predicate, referenced_tables)?; - if push_result.is_none() { - return Ok(None); - } - - Ok(Some(push_result.unwrap())) - } - Operator::Nothing => Ok(Some(predicate)), + SourceOperator::Nothing => Ok(Some(predicate)), } } -fn push_scan_direction(operator: &mut Operator, direction: &Direction) { +fn push_scan_direction(operator: &mut SourceOperator, direction: &Direction) { match operator { - Operator::Projection { source, .. } => push_scan_direction(source, direction), - Operator::Scan { iter_dir, .. } => { + SourceOperator::Scan { iter_dir, .. } => { if iter_dir.is_none() { match direction { Direction::Ascending => *iter_dir = Some(IterationDirection::Forwards), @@ -589,378 +434,6 @@ fn push_scan_direction(operator: &mut Operator, direction: &Direction) { } } -#[derive(Debug)] -pub struct ExpressionResultCache { - resultmap: HashMap, - keymap: HashMap>, -} - -#[derive(Debug)] -pub struct CachedResult { - pub register_idx: usize, - pub source_expr: ast::Expr, -} - -const OPERATOR_ID_MULTIPLIER: usize = 10000; - -/** - ExpressionResultCache is a cache for the results of expressions that are computed in the query plan, - or more precisely, the VM registers that hold the results of these expressions. - - Right now the cache is mainly used to avoid recomputing e.g. the result of an aggregation expression - e.g. SELECT t.a, SUM(t.b) FROM t GROUP BY t.a ORDER BY SUM(t.b) -*/ -impl ExpressionResultCache { - pub fn new() -> Self { - ExpressionResultCache { - resultmap: HashMap::new(), - keymap: HashMap::new(), - } - } - - /** - Store the result of an expression that is computed in the query plan. - The result is stored in a VM register. A copy of the expression AST node is - stored as well, so that parent operators can use it to compare their own expressions - with the one that was computed in a child operator. - - This is a weakness of our current reliance on a 3rd party AST library, as we can't - e.g. modify the AST to add identifiers to nodes or replace nodes with some kind of - reference to a register, etc. - */ - pub fn cache_result_register( - &mut self, - operator_id: usize, - result_column_idx: usize, - register_idx: usize, - expr: ast::Expr, - ) { - let key = operator_id * OPERATOR_ID_MULTIPLIER + result_column_idx; - self.resultmap.insert( - key, - CachedResult { - register_idx, - source_expr: expr, - }, - ); - } - - /** - Set a mapping from a parent operator to a child operator, so that the parent operator - can look up the register of a result that was computed in the child operator. - E.g. "Parent operator's result column 3 is computed in child operator 5, result column 2" - */ - pub fn set_precomputation_key( - &mut self, - operator_id: usize, - result_column_idx: usize, - child_operator_id: usize, - child_operator_result_column_idx_mask: usize, - ) { - let key = operator_id * OPERATOR_ID_MULTIPLIER + result_column_idx; - - let mut values = Vec::new(); - for i in 0..64 { - if (child_operator_result_column_idx_mask >> i) & 1 == 1 { - values.push(child_operator_id * OPERATOR_ID_MULTIPLIER + i); - } - } - self.keymap.insert(key, values); - } - - /** - Get the cache entries for a given operator and result column index. - There may be multiple cached entries, e.g. a binary operator's both - arms may have been cached. - */ - pub fn get_cached_result_registers( - &self, - operator_id: usize, - result_column_idx: usize, - ) -> Option> { - let key = operator_id * OPERATOR_ID_MULTIPLIER + result_column_idx; - self.keymap.get(&key).and_then(|keys| { - let mut results = Vec::new(); - for key in keys { - if let Some(result) = self.resultmap.get(key) { - results.push(result); - } - } - if results.is_empty() { - None - } else { - Some(results) - } - }) - } -} - -type ResultColumnIndexBitmask = usize; - -/** - Find all result columns in an operator that match an expression, either fully or partially. - This is used to find the result columns that are computed in an operator and that are used - in a parent operator, so that the parent operator can look up the register that holds the result - of the child operator's expression. - - The result is returned as a bitmask due to performance neuroticism. A limitation of this is that - we can only handle 64 result columns per operator. -*/ -fn find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially( - expr: &ast::Expr, - operator: &Operator, -) -> ResultColumnIndexBitmask { - let exact_match = match operator { - Operator::Aggregate { - aggregates, - group_by, - .. - } => { - let mut idx = 0; - let mut mask = 0; - for agg in aggregates.iter() { - if agg.original_expr == *expr { - mask |= 1 << idx; - } - idx += 1; - } - - if let Some(group_by) = group_by { - for g in group_by.iter() { - if g == expr { - mask |= 1 << idx; - } - idx += 1 - } - } - - mask - } - Operator::Filter { .. } => 0, - Operator::Limit { .. } => 0, - Operator::Join { .. } => 0, - Operator::Order { .. } => 0, - Operator::Projection { expressions, .. } => { - let mut mask = 0; - for (idx, e) in expressions.iter().enumerate() { - match e { - ProjectionColumn::Column(c) => { - if c == expr { - mask |= 1 << idx; - } - } - ProjectionColumn::Star => {} - ProjectionColumn::TableStar(_) => {} - } - } - - mask - } - Operator::Scan { .. } => 0, - Operator::Search { .. } => 0, - Operator::Nothing => 0, - }; - - if exact_match != 0 { - return exact_match; - } - - match expr { - ast::Expr::Between { - lhs, - not: _, - start, - end, - } => { - let mut mask = 0; - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(lhs, operator); - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(start, operator); - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(end, operator); - mask - } - ast::Expr::Binary(lhs, _op, rhs) => { - let mut mask = 0; - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(lhs, operator); - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(rhs, operator); - mask - } - ast::Expr::Case { - base, - when_then_pairs, - else_expr, - } => { - let mut mask = 0; - if let Some(base) = base { - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(base, operator); - } - for (w, t) in when_then_pairs.iter() { - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(w, operator); - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(t, operator); - } - if let Some(e) = else_expr { - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(e, operator); - } - mask - } - ast::Expr::Cast { expr, type_name: _ } => { - find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially( - expr, operator, - ) - } - ast::Expr::Collate(expr, _collation) => { - find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially( - expr, operator, - ) - } - ast::Expr::DoublyQualified(_schema, _tbl, _ident) => 0, - ast::Expr::Exists(_) => 0, - ast::Expr::FunctionCall { - name: _, - distinctness: _, - args, - order_by: _, - filter_over: _, - } => { - let mut mask = 0; - if let Some(args) = args { - for a in args.iter() { - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(a, operator); - } - } - mask - } - ast::Expr::FunctionCallStar { - name: _, - filter_over: _, - } => 0, - ast::Expr::Id(_) => 0, - ast::Expr::InList { lhs, not: _, rhs } => { - let mut mask = 0; - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(lhs, operator); - if let Some(rhs) = rhs { - for r in rhs.iter() { - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(r, operator); - } - } - mask - } - ast::Expr::InSelect { - lhs, - not: _, - rhs: _, - } => { - find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially( - lhs, operator, - ) - } - ast::Expr::InTable { - lhs: _, - not: _, - rhs: _, - args: _, - } => 0, - ast::Expr::IsNull(expr) => { - find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially( - expr, operator, - ) - } - ast::Expr::Like { - lhs, - not: _, - op: _, - rhs, - escape: _, - } => { - let mut mask = 0; - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(lhs, operator); - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(rhs, operator); - mask - } - ast::Expr::Literal(_) => 0, - ast::Expr::Name(_) => 0, - ast::Expr::NotNull(expr) => { - find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially( - expr, operator, - ) - } - ast::Expr::Parenthesized(expr) => { - let mut mask = 0; - for e in expr.iter() { - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(e, operator); - } - mask - } - ast::Expr::Qualified(_, _) => 0, - ast::Expr::Raise(_, _) => 0, - ast::Expr::Subquery(_) => 0, - ast::Expr::Unary(_op, expr) => { - find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially( - expr, operator, - ) - } - ast::Expr::Variable(_) => 0, - } -} - -/** - * This function is used to find all the expressions that are shared between the parent operator and the child operators. - * If an expression is shared between the parent and child operators, then the parent operator should not recompute the expression. - * Instead, it should use the result of the expression that was computed by the child operator. -*/ -fn find_shared_expressions_in_child_operators_and_mark_them_so_that_the_parent_operator_doesnt_recompute_them( - operator: &Operator, - expr_result_cache: &mut ExpressionResultCache, -) { - match operator { - Operator::Aggregate { - source, - .. - } => { - find_shared_expressions_in_child_operators_and_mark_them_so_that_the_parent_operator_doesnt_recompute_them( - source, expr_result_cache, - ) - } - Operator::Filter { .. } => unreachable!(), - Operator::Limit { source, .. } => { - find_shared_expressions_in_child_operators_and_mark_them_so_that_the_parent_operator_doesnt_recompute_them(source, expr_result_cache) - } - Operator::Join { .. } => {} - Operator::Order { source, key, .. } => { - for (idx, (expr, _)) in key.iter().enumerate() { - let result = find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(expr, source); - if result != 0 { - expr_result_cache.set_precomputation_key( - operator.id(), - idx, - source.id(), - result, - ); - } - } - find_shared_expressions_in_child_operators_and_mark_them_so_that_the_parent_operator_doesnt_recompute_them(source, expr_result_cache) - } - Operator::Projection { source, expressions, .. } => { - for (idx, expr) in expressions.iter().enumerate() { - if let ProjectionColumn::Column(expr) = expr { - let result = find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(expr, source); - if result != 0 { - expr_result_cache.set_precomputation_key( - operator.id(), - idx, - source.id(), - result, - ); - } - } - } - find_shared_expressions_in_child_operators_and_mark_them_so_that_the_parent_operator_doesnt_recompute_them(source, expr_result_cache) - } - Operator::Scan { .. } => {} - Operator::Search { .. } => {} - Operator::Nothing => {} - } -} - #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum ConstantPredicate { AlwaysTrue, @@ -984,80 +457,56 @@ pub trait Optimizable { .check_constant()? .map_or(false, |c| c == ConstantPredicate::AlwaysFalse)) } - fn is_primary_key_of(&self, table_reference: &BTreeTableReference) -> bool; + fn is_rowid_alias_of(&self, table_index: usize) -> bool; fn check_index_scan( &mut self, - table_reference: &BTreeTableReference, + table_index: usize, + referenced_tables: &[BTreeTableReference], available_indexes: &[Rc], ) -> Result>; } impl Optimizable for ast::Expr { - fn is_primary_key_of(&self, table_reference: &BTreeTableReference) -> bool { + fn is_rowid_alias_of(&self, table_index: usize) -> bool { match self { - ast::Expr::Id(ident) => { - let ident = normalize_ident(&ident.0); - table_reference - .table - .get_column(&ident) - .map_or(false, |(_, c)| c.primary_key) - } - ast::Expr::Qualified(tbl, ident) => { - let tbl = normalize_ident(&tbl.0); - let ident = normalize_ident(&ident.0); - - tbl == table_reference.table_identifier - && table_reference - .table - .get_column(&ident) - .map_or(false, |(_, c)| c.primary_key) - } + ast::Expr::Column { + table, + is_rowid_alias, + .. + } => *is_rowid_alias && *table == table_index, _ => false, } } fn check_index_scan( &mut self, - table_reference: &BTreeTableReference, + table_index: usize, + referenced_tables: &[BTreeTableReference], available_indexes: &[Rc], ) -> Result> { match self { - ast::Expr::Id(ident) => { - let ident = normalize_ident(&ident.0); - let indexes = available_indexes - .iter() - .enumerate() - .filter(|(_, i)| { - i.table_name == table_reference.table_identifier - && i.columns.iter().any(|c| c.name == ident) - }) - .collect::>(); - if indexes.is_empty() { - return Ok(None); - } - if indexes.len() > 1 { - crate::bail_parse_error!("ambiguous column name {}", ident) - } - Ok(Some(indexes.first().unwrap().0)) - } - ast::Expr::Qualified(_, ident) => { - let ident = normalize_ident(&ident.0); - let index = available_indexes.iter().enumerate().find(|(_, i)| { - if i.table_name != table_reference.table.name { - return false; + ast::Expr::Column { table, column, .. } => { + for (idx, index) in available_indexes.iter().enumerate() { + if index.table_name == referenced_tables[*table].table.name { + let column = referenced_tables[*table] + .table + .columns + .get(*column) + .unwrap(); + if index.columns.first().unwrap().name == column.name { + return Ok(Some(idx)); + } } - i.columns.iter().any(|c| normalize_ident(&c.name) == ident) - }); - if index.is_none() { - return Ok(None); } - Ok(Some(index.unwrap().0)) + Ok(None) } ast::Expr::Binary(lhs, op, rhs) => { - let lhs_index = lhs.check_index_scan(table_reference, available_indexes)?; + let lhs_index = + lhs.check_index_scan(table_index, referenced_tables, available_indexes)?; if lhs_index.is_some() { return Ok(lhs_index); } - let rhs_index = rhs.check_index_scan(table_reference, available_indexes)?; + let rhs_index = + rhs.check_index_scan(table_index, referenced_tables, available_indexes)?; if rhs_index.is_some() { // swap lhs and rhs let lhs_new = rhs.take_ownership(); @@ -1196,12 +645,13 @@ pub enum Either { pub fn try_extract_index_search_expression( expr: ast::Expr, - table_reference: &BTreeTableReference, + table_index: usize, + referenced_tables: &[BTreeTableReference], available_indexes: &[Rc], ) -> Result> { match expr { ast::Expr::Binary(mut lhs, operator, mut rhs) => { - if lhs.is_primary_key_of(table_reference) { + if lhs.is_rowid_alias_of(table_index) { match operator { ast::Operator::Equals => { return Ok(Either::Right(Search::PrimaryKeyEq { cmp_expr: *rhs })); @@ -1219,7 +669,7 @@ pub fn try_extract_index_search_expression( } } - if rhs.is_primary_key_of(table_reference) { + if rhs.is_rowid_alias_of(table_index) { match operator { ast::Operator::Equals => { return Ok(Either::Right(Search::PrimaryKeyEq { cmp_expr: *lhs })); @@ -1237,7 +687,9 @@ pub fn try_extract_index_search_expression( } } - if let Some(index_index) = lhs.check_index_scan(table_reference, available_indexes)? { + if let Some(index_index) = + lhs.check_index_scan(table_index, referenced_tables, available_indexes)? + { match operator { ast::Operator::Equals | ast::Operator::Greater @@ -1254,7 +706,9 @@ pub fn try_extract_index_search_expression( } } - if let Some(index_index) = rhs.check_index_scan(table_reference, available_indexes)? { + if let Some(index_index) = + rhs.check_index_scan(table_index, referenced_tables, available_indexes)? + { match operator { ast::Operator::Equals | ast::Operator::Greater @@ -1287,8 +741,8 @@ impl TakeOwnership for ast::Expr { } } -impl TakeOwnership for Operator { +impl TakeOwnership for SourceOperator { fn take_ownership(&mut self) -> Self { - std::mem::replace(self, Operator::Nothing) + std::mem::replace(self, SourceOperator::Nothing) } } diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 9502e67ca..ef5d97948 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -9,20 +9,41 @@ use sqlite3_parser::ast; use crate::{ function::AggFunc, schema::{BTreeTable, Index}, - util::normalize_ident, Result, }; +#[derive(Debug)] +pub struct ResultSetColumn { + pub expr: ast::Expr, + // TODO: encode which aggregates (e.g. index bitmask of plan.aggregates) are present in this column + pub contains_aggregates: bool, +} + #[derive(Debug)] pub struct Plan { - pub root_operator: Operator, + /// A tree of sources (tables). + pub source: SourceOperator, + /// the columns inside SELECT ... FROM + pub result_columns: Vec, + /// where clause split into a vec at 'AND' boundaries. + pub where_clause: Option>, + /// group by clause + pub group_by: Option>, + /// order by clause + pub order_by: Option>, + /// all the aggregates collected from the result columns, order by, and (TODO) having clauses + pub aggregates: Option>, + /// limit clause + pub limit: Option, + /// all the tables referenced in the query pub referenced_tables: Vec, + /// all the indexes available pub available_indexes: Vec>, } impl Display for Plan { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.root_operator) + write!(f, "{}", self.source) } } @@ -33,82 +54,20 @@ pub enum IterationDirection { } /** - An Operator is a Node in the query plan. - Operators form a tree structure, with each having zero or more children. - For example, a query like `SELECT t1.foo FROM t1 ORDER BY t1.foo LIMIT 1` would have the following structure: - Limit - Order - Project - Scan - - Operators also have a unique ID, which is used to identify them in the query plan and attach metadata. - They also have a step counter, which is used to track the current step in the operator's execution. - TODO: perhaps 'step' shouldn't be in this struct, since it's an execution time concept, not a plan time concept. + A SourceOperator is a Node in the query plan that reads data from a table. */ #[derive(Clone, Debug)] -pub enum Operator { - // Aggregate operator - // This operator is used to compute aggregate functions like SUM, AVG, COUNT, etc. - // It takes a source operator and a list of aggregate functions to compute. - // GROUP BY is not supported yet. - Aggregate { - id: usize, - source: Box, - aggregates: Vec, - group_by: Option>, - step: usize, - }, - // Filter operator - // This operator is used to filter rows from the source operator. - // It takes a source operator and a list of predicates to evaluate. - // Only rows for which all predicates evaluate to true are passed to the next operator. - // Generally filter operators will only exist in unoptimized plans, - // as the optimizer will try to push filters down to the lowest possible level, - // e.g. a table scan. - Filter { - id: usize, - source: Box, - predicates: Vec, - }, - // Limit operator - // This operator is used to limit the number of rows returned by the source operator. - Limit { - id: usize, - source: Box, - limit: usize, - step: usize, - }, +pub enum SourceOperator { // Join operator // This operator is used to join two source operators. // It takes a left and right source operator, a list of predicates to evaluate, // and a boolean indicating whether it is an outer join. Join { id: usize, - left: Box, - right: Box, + left: Box, + right: Box, predicates: Option>, outer: bool, - step: usize, - }, - // Order operator - // This operator is used to sort the rows returned by the source operator. - Order { - id: usize, - source: Box, - key: Vec<(ast::Expr, Direction)>, - step: usize, - }, - // Projection operator - // This operator is used to project columns from the source operator. - // It takes a source operator and a list of expressions to evaluate. - // e.g. SELECT foo, bar FROM t1 - // In this example, the expressions would be [foo, bar] - // and the source operator would be a Scan operator for table t1. - Projection { - id: usize, - source: Box, - expressions: Vec, - step: usize, }, // Scan operator // This operator is used to scan a table. @@ -123,7 +82,6 @@ pub enum Operator { id: usize, table_reference: BTreeTableReference, predicates: Option>, - step: usize, iter_dir: Option, }, // Search operator @@ -134,7 +92,6 @@ pub enum Operator { table_reference: BTreeTableReference, search: Search, predicates: Option>, - step: usize, }, // Nothing operator // This operator is used to represent an empty query. @@ -146,6 +103,7 @@ pub enum Operator { pub struct BTreeTableReference { pub table: Rc, pub table_identifier: String, + pub table_index: usize, } /// An enum that represents a search operation that can be used to search for a row in a table using an index @@ -168,136 +126,13 @@ pub enum Search { }, } -#[derive(Clone, Debug)] -pub enum ProjectionColumn { - Column(ast::Expr), - Star, - TableStar(BTreeTableReference), -} - -impl ProjectionColumn { - pub fn column_count(&self, referenced_tables: &[BTreeTableReference]) -> usize { - match self { - ProjectionColumn::Column(_) => 1, - ProjectionColumn::Star => { - let mut count = 0; - for table_reference in referenced_tables { - count += table_reference.table.columns.len(); - } - count - } - ProjectionColumn::TableStar(table_reference) => table_reference.table.columns.len(), - } - } -} - -impl Operator { - pub fn column_count(&self, referenced_tables: &[BTreeTableReference]) -> usize { - match self { - Operator::Aggregate { - group_by, - aggregates, - .. - } => aggregates.len() + group_by.as_ref().map_or(0, |g| g.len()), - Operator::Filter { source, .. } => source.column_count(referenced_tables), - Operator::Limit { source, .. } => source.column_count(referenced_tables), - Operator::Join { left, right, .. } => { - left.column_count(referenced_tables) + right.column_count(referenced_tables) - } - Operator::Order { source, .. } => source.column_count(referenced_tables), - Operator::Projection { expressions, .. } => expressions - .iter() - .map(|e| e.column_count(referenced_tables)) - .sum(), - Operator::Scan { - table_reference, .. - } => table_reference.table.columns.len(), - Operator::Search { - table_reference, .. - } => table_reference.table.columns.len(), - Operator::Nothing => 0, - } - } - - pub fn column_names(&self) -> Vec { - match self { - Operator::Aggregate { - aggregates, - group_by, - .. - } => { - let mut names = vec![]; - for agg in aggregates.iter() { - names.push(agg.func.to_string().to_string()); - } - - if let Some(group_by) = group_by { - for expr in group_by.iter() { - match expr { - ast::Expr::Id(ident) => names.push(ident.0.clone()), - ast::Expr::Qualified(tbl, ident) => { - names.push(format!("{}.{}", tbl.0, ident.0)) - } - e => names.push(e.to_string()), - } - } - } - - names - } - Operator::Filter { source, .. } => source.column_names(), - Operator::Limit { source, .. } => source.column_names(), - Operator::Join { left, right, .. } => { - let mut names = left.column_names(); - names.extend(right.column_names()); - names - } - Operator::Order { source, .. } => source.column_names(), - Operator::Projection { expressions, .. } => expressions - .iter() - .map(|e| match e { - ProjectionColumn::Column(expr) => match expr { - ast::Expr::Id(ident) => ident.0.clone(), - ast::Expr::Qualified(tbl, ident) => format!("{}.{}", tbl.0, ident.0), - _ => "expr".to_string(), - }, - ProjectionColumn::Star => "*".to_string(), - ProjectionColumn::TableStar(table_reference) => { - format!("{}.{}", table_reference.table_identifier, "*") - } - }) - .collect(), - Operator::Scan { - table_reference, .. - } => table_reference - .table - .columns - .iter() - .map(|c| c.name.clone()) - .collect(), - Operator::Search { - table_reference, .. - } => table_reference - .table - .columns - .iter() - .map(|c| c.name.clone()) - .collect(), - Operator::Nothing => vec![], - } - } - +impl SourceOperator { pub fn id(&self) -> usize { match self { - Operator::Aggregate { id, .. } => *id, - Operator::Filter { id, .. } => *id, - Operator::Limit { id, .. } => *id, - Operator::Join { id, .. } => *id, - Operator::Order { id, .. } => *id, - Operator::Projection { id, .. } => *id, - Operator::Scan { id, .. } => *id, - Operator::Search { id, .. } => *id, - Operator::Nothing => unreachable!(), + SourceOperator::Join { id, .. } => *id, + SourceOperator::Scan { id, .. } => *id, + SourceOperator::Search { id, .. } => *id, + SourceOperator::Nothing => unreachable!(), } } } @@ -337,10 +172,10 @@ impl Display for Aggregate { } // For EXPLAIN QUERY PLAN -impl Display for Operator { +impl Display for SourceOperator { fn fmt(&self, f: &mut Formatter) -> fmt::Result { fn fmt_operator( - operator: &Operator, + operator: &SourceOperator, f: &mut Formatter, level: usize, last: bool, @@ -356,34 +191,7 @@ impl Display for Operator { }; match operator { - Operator::Aggregate { - source, aggregates, .. - } => { - // e.g. Aggregate count(*), sum(x) - let aggregates_display_string = aggregates - .iter() - .map(|agg| agg.to_string()) - .collect::>() - .join(", "); - writeln!(f, "{}AGGREGATE {}", indent, aggregates_display_string)?; - fmt_operator(source, f, level + 1, true) - } - Operator::Filter { - source, predicates, .. - } => { - let predicates_string = predicates - .iter() - .map(|p| p.to_string()) - .collect::>() - .join(" AND "); - writeln!(f, "{}FILTER {}", indent, predicates_string)?; - fmt_operator(source, f, level + 1, true) - } - Operator::Limit { source, limit, .. } => { - writeln!(f, "{}TAKE {}", indent, limit)?; - fmt_operator(source, f, level + 1, true) - } - Operator::Join { + SourceOperator::Join { left, right, predicates, @@ -408,35 +216,7 @@ impl Display for Operator { fmt_operator(left, f, level + 1, false)?; fmt_operator(right, f, level + 1, true) } - Operator::Order { source, key, .. } => { - let sort_keys_string = key - .iter() - .map(|(expr, dir)| format!("{} {}", expr, dir)) - .collect::>() - .join(", "); - writeln!(f, "{}SORT {}", indent, sort_keys_string)?; - fmt_operator(source, f, level + 1, true) - } - Operator::Projection { - source, - expressions, - .. - } => { - let expressions = expressions - .iter() - .map(|expr| match expr { - ProjectionColumn::Column(c) => c.to_string(), - ProjectionColumn::Star => "*".to_string(), - ProjectionColumn::TableStar(table_reference) => { - format!("{}.{}", table_reference.table_identifier, "*") - } - }) - .collect::>() - .join(", "); - writeln!(f, "{}PROJECT {}", indent, expressions)?; - fmt_operator(source, f, level + 1, true) - } - Operator::Scan { + SourceOperator::Scan { table_reference, predicates: filter, .. @@ -464,7 +244,7 @@ impl Display for Operator { }?; Ok(()) } - Operator::Search { + SourceOperator::Search { table_reference, search, .. @@ -487,7 +267,7 @@ impl Display for Operator { } Ok(()) } - Operator::Nothing => Ok(()), + SourceOperator::Nothing => Ok(()), } } writeln!(f, "QUERY PLAN")?; @@ -505,35 +285,15 @@ impl Display for Operator { */ pub fn get_table_ref_bitmask_for_operator<'a>( tables: &'a Vec, - operator: &'a Operator, + operator: &'a SourceOperator, ) -> Result { let mut table_refs_mask = 0; match operator { - Operator::Aggregate { source, .. } => { - table_refs_mask |= get_table_ref_bitmask_for_operator(tables, source)?; - } - Operator::Filter { - source, predicates, .. - } => { - table_refs_mask |= get_table_ref_bitmask_for_operator(tables, source)?; - for predicate in predicates { - table_refs_mask |= get_table_ref_bitmask_for_ast_expr(tables, predicate)?; - } - } - Operator::Limit { source, .. } => { - table_refs_mask |= get_table_ref_bitmask_for_operator(tables, source)?; - } - Operator::Join { left, right, .. } => { + SourceOperator::Join { left, right, .. } => { table_refs_mask |= get_table_ref_bitmask_for_operator(tables, left)?; table_refs_mask |= get_table_ref_bitmask_for_operator(tables, right)?; } - Operator::Order { source, .. } => { - table_refs_mask |= get_table_ref_bitmask_for_operator(tables, source)?; - } - Operator::Projection { source, .. } => { - table_refs_mask |= get_table_ref_bitmask_for_operator(tables, source)?; - } - Operator::Scan { + SourceOperator::Scan { table_reference, .. } => { table_refs_mask |= 1 @@ -542,7 +302,7 @@ pub fn get_table_ref_bitmask_for_operator<'a>( .position(|t| Rc::ptr_eq(&t.table, &table_reference.table)) .unwrap(); } - Operator::Search { + SourceOperator::Search { table_reference, .. } => { table_refs_mask |= 1 @@ -551,7 +311,7 @@ pub fn get_table_ref_bitmask_for_operator<'a>( .position(|t| Rc::ptr_eq(&t.table, &table_reference.table)) .unwrap(); } - Operator::Nothing => {} + SourceOperator::Nothing => {} } Ok(table_refs_mask) } @@ -574,46 +334,12 @@ pub fn get_table_ref_bitmask_for_ast_expr<'a>( table_refs_mask |= get_table_ref_bitmask_for_ast_expr(tables, e1)?; table_refs_mask |= get_table_ref_bitmask_for_ast_expr(tables, e2)?; } - ast::Expr::Id(ident) => { - let ident = normalize_ident(&ident.0); - let matching_tables = tables - .iter() - .enumerate() - .filter(|(_, table_reference)| table_reference.table.get_column(&ident).is_some()); - - let mut matches = 0; - let mut matching_tbl = None; - for table in matching_tables { - matching_tbl = Some(table); - matches += 1; - if matches > 1 { - crate::bail_parse_error!("ambiguous column name {}", &ident) - } - } - - if let Some((tbl_index, _)) = matching_tbl { - table_refs_mask |= 1 << tbl_index; - } else { - crate::bail_parse_error!("column not found: {}", &ident) - } + ast::Expr::Column { table, .. } => { + table_refs_mask |= 1 << table; } - ast::Expr::Qualified(tbl, ident) => { - let tbl = normalize_ident(&tbl.0); - let ident = normalize_ident(&ident.0); - let matching_table = tables - .iter() - .enumerate() - .find(|(_, t)| t.table_identifier == tbl); - - if matching_table.is_none() { - crate::bail_parse_error!("introspect: table not found: {}", &tbl) - } - let (table_index, table_reference) = matching_table.unwrap(); - if table_reference.table.get_column(&ident).is_none() { - crate::bail_parse_error!("column with qualified name {}.{} not found", &tbl, &ident) - } - - table_refs_mask |= 1 << table_index; + ast::Expr::Id(_) => unreachable!("Id should be resolved to a Column before optimizer"), + ast::Expr::Qualified(_, _) => { + unreachable!("Qualified should be resolved to a Column before optimizer") } ast::Expr::Literal(_) => {} ast::Expr::Like { lhs, rhs, .. } => { diff --git a/core/translate/planner.rs b/core/translate/planner.rs index 0a7fac1e2..51706f108 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -1,4 +1,6 @@ -use super::plan::{Aggregate, BTreeTableReference, Direction, Operator, Plan, ProjectionColumn}; +use super::plan::{ + Aggregate, BTreeTableReference, Direction, Plan, ResultSetColumn, SourceOperator, +}; use crate::{function::Func, schema::Schema, util::normalize_ident, Result}; use sqlite3_parser::ast::{self, FromClause, JoinType, ResultColumn}; @@ -18,6 +20,9 @@ impl OperatorIdCounter { } fn resolve_aggregates(expr: &ast::Expr, aggs: &mut Vec) { + if aggs.iter().any(|a| a.original_expr == *expr) { + return; + } match expr { ast::Expr::FunctionCall { name, args, .. } => { let args_count = if let Some(args) = &args { @@ -55,10 +60,171 @@ fn resolve_aggregates(expr: &ast::Expr, aggs: &mut Vec) { resolve_aggregates(lhs, aggs); resolve_aggregates(rhs, aggs); } + // TODO: handle other expressions that may contain aggregates _ => {} } } +/// Recursively resolve column references in an expression. +/// Id, Qualified and DoublyQualified are converted to Column. +fn bind_column_references( + expr: &mut ast::Expr, + referenced_tables: &[BTreeTableReference], +) -> Result<()> { + match expr { + ast::Expr::Id(id) => { + let mut match_result = None; + for (tbl_idx, table) in referenced_tables.iter().enumerate() { + let col_idx = table + .table + .columns + .iter() + .position(|c| c.name.eq_ignore_ascii_case(&id.0)); + if col_idx.is_some() { + if match_result.is_some() { + crate::bail_parse_error!("Column {} is ambiguous", id.0); + } + let col = table.table.columns.get(col_idx.unwrap()).unwrap(); + match_result = Some((tbl_idx, col_idx.unwrap(), col.primary_key)); + } + } + if match_result.is_none() { + crate::bail_parse_error!("Column {} not found", id.0); + } + let (tbl_idx, col_idx, is_primary_key) = match_result.unwrap(); + *expr = ast::Expr::Column { + database: None, // TODO: support different databases + table: tbl_idx, + column: col_idx, + is_rowid_alias: is_primary_key, + }; + Ok(()) + } + ast::Expr::Qualified(tbl, id) => { + let matching_tbl_idx = referenced_tables + .iter() + .position(|t| t.table_identifier.eq_ignore_ascii_case(&tbl.0)); + if matching_tbl_idx.is_none() { + crate::bail_parse_error!("Table {} not found", tbl.0); + } + let tbl_idx = matching_tbl_idx.unwrap(); + let col_idx = referenced_tables[tbl_idx] + .table + .columns + .iter() + .position(|c| c.name.eq_ignore_ascii_case(&id.0)); + if col_idx.is_none() { + crate::bail_parse_error!("Column {} not found", id.0); + } + let col = referenced_tables[tbl_idx] + .table + .columns + .get(col_idx.unwrap()) + .unwrap(); + *expr = ast::Expr::Column { + database: None, // TODO: support different databases + table: tbl_idx, + column: col_idx.unwrap(), + is_rowid_alias: col.primary_key, + }; + Ok(()) + } + ast::Expr::Between { + lhs, + not: _, + start, + end, + } => { + bind_column_references(lhs, referenced_tables)?; + bind_column_references(start, referenced_tables)?; + bind_column_references(end, referenced_tables)?; + Ok(()) + } + ast::Expr::Binary(expr, _operator, expr1) => { + bind_column_references(expr, referenced_tables)?; + bind_column_references(expr1, referenced_tables)?; + Ok(()) + } + ast::Expr::Case { + base, + when_then_pairs, + else_expr, + } => { + if let Some(base) = base { + bind_column_references(base, referenced_tables)?; + } + for (when, then) in when_then_pairs { + bind_column_references(when, referenced_tables)?; + bind_column_references(then, referenced_tables)?; + } + if let Some(else_expr) = else_expr { + bind_column_references(else_expr, referenced_tables)?; + } + Ok(()) + } + ast::Expr::Cast { expr, type_name: _ } => bind_column_references(expr, referenced_tables), + ast::Expr::Collate(expr, _string) => bind_column_references(expr, referenced_tables), + ast::Expr::FunctionCall { + name: _, + distinctness: _, + args, + order_by: _, + filter_over: _, + } => { + if let Some(args) = args { + for arg in args { + bind_column_references(arg, referenced_tables)?; + } + } + Ok(()) + } + // Column references cannot exist before binding + ast::Expr::Column { .. } => unreachable!(), + ast::Expr::DoublyQualified(_, _, _) => todo!(), + ast::Expr::Exists(_) => todo!(), + ast::Expr::FunctionCallStar { .. } => Ok(()), + ast::Expr::InList { lhs, not: _, rhs } => { + bind_column_references(lhs, referenced_tables)?; + if let Some(rhs) = rhs { + for arg in rhs { + bind_column_references(arg, referenced_tables)?; + } + } + Ok(()) + } + ast::Expr::InSelect { .. } => todo!(), + ast::Expr::InTable { .. } => todo!(), + ast::Expr::IsNull(expr) => { + bind_column_references(expr, referenced_tables)?; + Ok(()) + } + ast::Expr::Like { lhs, rhs, .. } => { + bind_column_references(lhs, referenced_tables)?; + bind_column_references(rhs, referenced_tables)?; + Ok(()) + } + ast::Expr::Literal(_) => Ok(()), + ast::Expr::Name(_) => todo!(), + ast::Expr::NotNull(expr) => { + bind_column_references(expr, referenced_tables)?; + Ok(()) + } + ast::Expr::Parenthesized(expr) => { + for e in expr.iter_mut() { + bind_column_references(e, referenced_tables)?; + } + Ok(()) + } + ast::Expr::Raise(_, _) => todo!(), + ast::Expr::Subquery(_) => todo!(), + ast::Expr::Unary(_, expr) => { + bind_column_references(expr, referenced_tables)?; + Ok(()) + } + ast::Expr::Variable(_) => todo!(), + } +} + #[allow(clippy::extra_unused_lifetimes)] pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result { match select.body.select { @@ -66,7 +232,7 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

{ let col_count = columns.len(); @@ -77,139 +243,173 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

{ - projection_expressions.push(ProjectionColumn::Star); - } - ast::ResultColumn::TableStar(name) => { - let name_normalized = normalize_ident(name.0.as_str()); - let referenced_table = referenced_tables - .iter() - .find(|t| t.table_identifier == name_normalized); - - if referenced_table.is_none() { - crate::bail_parse_error!("Table {} not found", name.0); + let mut aggregate_expressions = Vec::new(); + for column in columns.clone() { + match column { + ast::ResultColumn::Star => { + for table_reference in plan.referenced_tables.iter() { + for (idx, col) in table_reference.table.columns.iter().enumerate() { + plan.result_columns.push(ResultSetColumn { + expr: ast::Expr::Column { + database: None, // TODO: support different databases + table: table_reference.table_index, + column: idx, + is_rowid_alias: col.primary_key, + }, + contains_aggregates: false, + }); } - let table_reference = referenced_table.unwrap(); - projection_expressions - .push(ProjectionColumn::TableStar(table_reference.clone())); } - ast::ResultColumn::Expr(expr, _) => { - projection_expressions.push(ProjectionColumn::Column(expr.clone())); - match expr.clone() { - ast::Expr::FunctionCall { - name, - distinctness: _, - args, - filter_over: _, - order_by: _, - } => { - let args_count = if let Some(args) = &args { - args.len() - } else { - 0 - }; - match Func::resolve_function( - normalize_ident(name.0.as_str()).as_str(), - args_count, - ) { - Ok(Func::Agg(f)) => { - aggregate_expressions.push(Aggregate { - func: f, - args: args.unwrap(), - original_expr: expr.clone(), - }); - } - Ok(_) => { - resolve_aggregates(&expr, &mut aggregate_expressions); - } - _ => {} - } - } - ast::Expr::FunctionCallStar { - name, - filter_over: _, - } => { - if let Ok(Func::Agg(f)) = Func::resolve_function( - normalize_ident(name.0.as_str()).as_str(), - 0, - ) { - aggregate_expressions.push(Aggregate { + } + ast::ResultColumn::TableStar(name) => { + let name_normalized = normalize_ident(name.0.as_str()); + let referenced_table = plan + .referenced_tables + .iter() + .find(|t| t.table_identifier == name_normalized); + + if referenced_table.is_none() { + crate::bail_parse_error!("Table {} not found", name.0); + } + let table_reference = referenced_table.unwrap(); + for (idx, col) in table_reference.table.columns.iter().enumerate() { + plan.result_columns.push(ResultSetColumn { + expr: ast::Expr::Column { + database: None, // TODO: support different databases + table: table_reference.table_index, + column: idx, + is_rowid_alias: col.primary_key, + }, + contains_aggregates: false, + }); + } + } + ast::ResultColumn::Expr(mut expr, _) => { + bind_column_references(&mut expr, &plan.referenced_tables)?; + match &expr { + ast::Expr::FunctionCall { + name, + distinctness: _, + args, + filter_over: _, + order_by: _, + } => { + let args_count = if let Some(args) = &args { + args.len() + } else { + 0 + }; + match Func::resolve_function( + normalize_ident(name.0.as_str()).as_str(), + args_count, + ) { + Ok(Func::Agg(f)) => { + let agg = Aggregate { func: f, - args: vec![ast::Expr::Literal(ast::Literal::Numeric( - "1".to_string(), - ))], + args: args.as_ref().unwrap().clone(), original_expr: expr.clone(), + }; + aggregate_expressions.push(agg.clone()); + plan.result_columns.push(ResultSetColumn { + expr: expr.clone(), + contains_aggregates: true, }); } + Ok(_) => { + let cur_agg_count = aggregate_expressions.len(); + resolve_aggregates(&expr, &mut aggregate_expressions); + let contains_aggregates = + cur_agg_count != aggregate_expressions.len(); + plan.result_columns.push(ResultSetColumn { + expr: expr.clone(), + contains_aggregates, + }); + } + _ => {} } - ast::Expr::Binary(lhs, _, rhs) => { - resolve_aggregates(&lhs, &mut aggregate_expressions); - resolve_aggregates(&rhs, &mut aggregate_expressions); + } + ast::Expr::FunctionCallStar { + name, + filter_over: _, + } => { + if let Ok(Func::Agg(f)) = Func::resolve_function( + normalize_ident(name.0.as_str()).as_str(), + 0, + ) { + let agg = Aggregate { + func: f, + args: vec![ast::Expr::Literal(ast::Literal::Numeric( + "1".to_string(), + ))], + original_expr: expr.clone(), + }; + aggregate_expressions.push(agg.clone()); + plan.result_columns.push(ResultSetColumn { + expr: expr.clone(), + contains_aggregates: true, + }); + } else { + crate::bail_parse_error!( + "Invalid aggregate function: {}", + name.0 + ); } - _ => {} + } + expr => { + let cur_agg_count = aggregate_expressions.len(); + resolve_aggregates(expr, &mut aggregate_expressions); + let contains_aggregates = + cur_agg_count != aggregate_expressions.len(); + plan.result_columns.push(ResultSetColumn { + expr: expr.clone(), + contains_aggregates, + }); } } } } - if let Some(_group_by) = group_by.as_ref() { - if aggregate_expressions.is_empty() { - crate::bail_parse_error!( - "GROUP BY clause without aggregate functions is not allowed" - ); - } - for scalar in projection_expressions.iter() { - match scalar { - ProjectionColumn::Column(_) => {} - _ => { - crate::bail_parse_error!( - "Only column references are allowed in the SELECT clause when using GROUP BY" - ); - } - } - } - } - if !aggregate_expressions.is_empty() { - operator = Operator::Aggregate { - source: Box::new(operator), - aggregates: aggregate_expressions, - group_by: group_by.map(|g| g.exprs), // TODO: support HAVING - id: operator_id_counter.get_next_id(), - step: 0, - } - } - - if !projection_expressions.is_empty() { - operator = Operator::Projection { - source: Box::new(operator), - expressions: projection_expressions, - id: operator_id_counter.get_next_id(), - step: 0, - }; - } } + if let Some(group_by) = group_by.as_mut() { + for expr in group_by.exprs.iter_mut() { + bind_column_references(expr, &plan.referenced_tables)?; + } + if aggregate_expressions.is_empty() { + crate::bail_parse_error!( + "GROUP BY clause without aggregate functions is not allowed" + ); + } + } + + plan.group_by = group_by.map(|g| g.exprs); + plan.aggregates = if aggregate_expressions.is_empty() { + None + } else { + Some(aggregate_expressions) + }; // Parse the ORDER BY clause if let Some(order_by) = select.order_by { @@ -218,7 +418,7 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

()?; if column_number == 0 { crate::bail_parse_error!("invalid column index: {}", column_number); @@ -235,6 +435,11 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

(schema: &Schema, select: ast::Select) -> Result

{ let l = n.parse()?; - if l == 0 { - Operator::Nothing - } else { - Operator::Limit { - source: Box::new(operator), - limit: l, - id: operator_id_counter.get_next_id(), - step: 0, - } - } + Some(l) } _ => todo!(), } } // Return the unoptimized query plan - Ok(Plan { - root_operator: operator, - referenced_tables, - available_indexes: schema.indexes.clone().into_values().flatten().collect(), - }) + Ok(plan) } _ => todo!(), } @@ -287,9 +474,9 @@ fn parse_from( schema: &Schema, from: Option, operator_id_counter: &mut OperatorIdCounter, -) -> Result<(Operator, Vec)> { +) -> Result<(SourceOperator, Vec)> { if from.as_ref().and_then(|f| f.select.as_ref()).is_none() { - return Ok((Operator::Nothing, vec![])); + return Ok((SourceOperator::Nothing, vec![])); } let from = from.unwrap(); @@ -309,32 +496,33 @@ fn parse_from( BTreeTableReference { table: table.clone(), table_identifier: alias.unwrap_or(qualified_name.name.0), + table_index: 0, } } _ => todo!(), }; - let mut operator = Operator::Scan { + let mut operator = SourceOperator::Scan { table_reference: first_table.clone(), predicates: None, id: operator_id_counter.get_next_id(), - step: 0, iter_dir: None, }; let mut tables = vec![first_table]; + let mut table_index = 1; for join in from.joins.unwrap_or_default().into_iter() { let (right, outer, predicates) = - parse_join(schema, join, operator_id_counter, &mut tables)?; - operator = Operator::Join { + parse_join(schema, join, operator_id_counter, &mut tables, table_index)?; + operator = SourceOperator::Join { left: Box::new(operator), right: Box::new(right), predicates, outer, id: operator_id_counter.get_next_id(), - step: 0, - } + }; + table_index += 1; } Ok((operator, tables)) @@ -345,7 +533,8 @@ fn parse_join( join: ast::JoinedSelectTable, operator_id_counter: &mut OperatorIdCounter, tables: &mut Vec, -) -> Result<(Operator, bool, Option>)> { + table_index: usize, +) -> Result<(SourceOperator, bool, Option>)> { let ast::JoinedSelectTable { operator, table, @@ -366,6 +555,7 @@ fn parse_join( BTreeTableReference { table: table.clone(), table_identifier: alias.unwrap_or(qualified_name.name.0), + table_index, } } _ => todo!(), @@ -384,21 +574,26 @@ fn parse_join( _ => false, }; - let predicates = constraint.map(|c| match c { - ast::JoinConstraint::On(expr) => { - let mut predicates = vec![]; - break_predicate_at_and_boundaries(expr, &mut predicates); - predicates + let mut predicates = None; + if let Some(constraint) = constraint { + match constraint { + ast::JoinConstraint::On(expr) => { + let mut preds = vec![]; + break_predicate_at_and_boundaries(expr, &mut preds); + for predicate in preds.iter_mut() { + bind_column_references(predicate, tables)?; + } + predicates = Some(preds); + } + ast::JoinConstraint::Using(_) => todo!("USING joins not supported yet"), } - ast::JoinConstraint::Using(_) => todo!("USING joins not supported yet"), - }); + } Ok(( - Operator::Scan { + SourceOperator::Scan { table_reference: table.clone(), predicates: None, id: operator_id_counter.get_next_id(), - step: 0, iter_dir: None, }, outer, diff --git a/core/translate/select.rs b/core/translate/select.rs index d486f6c23..6d846ded8 100644 --- a/core/translate/select.rs +++ b/core/translate/select.rs @@ -17,11 +17,6 @@ pub fn translate_select( connection: Weak, ) -> Result { let select_plan = prepare_select_plan(schema, select)?; - let (optimized_plan, expr_result_cache) = optimize_plan(select_plan)?; - emit_program( - database_header, - optimized_plan, - expr_result_cache, - connection, - ) + let optimized_plan = optimize_plan(select_plan)?; + emit_program(database_header, optimized_plan, connection) } diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index 0115c13e2..8dd1cd4de 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -343,14 +343,7 @@ impl ProgramBuilder { } // translate table to cursor id - pub fn resolve_cursor_id( - &self, - table_identifier: &str, - cursor_hint: Option, - ) -> CursorID { - if let Some(cursor_hint) = cursor_hint { - return cursor_hint; - } + pub fn resolve_cursor_id(&self, table_identifier: &str) -> CursorID { self.cursor_ref .iter() .position(|(t_ident, _)| { @@ -361,10 +354,6 @@ impl ProgramBuilder { .unwrap() } - pub fn resolve_cursor_to_table(&self, cursor_id: CursorID) -> Option { - self.cursor_ref[cursor_id].1.clone() - } - pub fn resolve_deferred_labels(&mut self) { for i in 0..self.deferred_label_resolutions.len() { let (label, insn_reference) = self.deferred_label_resolutions[i]; diff --git a/testing/math.test b/testing/math.test index 9d6e359b4..c567550e2 100644 --- a/testing/math.test +++ b/testing/math.test @@ -15,6 +15,18 @@ do_execsql_test add-int-float { SELECT 10 + 0.1 } {10.1} +do_execsql_test add-agg-int-agg-int { + SELECT sum(1) + sum(2) +} {3} + +do_execsql_test add-agg-int-agg-float { + SELECT sum(1) + sum(2.5) +} {3.5} + +do_execsql_test add-agg-float-agg-int { + SELECT sum(1.5) + sum(2) +} {3.5} + do_execsql_test subtract-int { SELECT 10 - 1 } {9} @@ -27,6 +39,18 @@ do_execsql_test subtract-int-float { SELECT 10 - 0.1 } {9.9} +do_execsql_test subtract-agg-int-agg-int { + SELECT sum(3) - sum(1) +} {2} + +do_execsql_test subtract-agg-int-agg-float { + SELECT sum(3) - sum(1.5) +} {1.5} + +do_execsql_test subtract-agg-float-agg-int { + SELECT sum(3.5) - sum(1) +} {2.5} + do_execsql_test multiply-int { SELECT 10 * 2 } {20} @@ -43,6 +67,18 @@ do_execsql_test multiply-float-int { SELECT 1.45 * 10 } {14.5} +do_execsql_test multiply-agg-int-agg-int { + SELECT sum(2) * sum(3) +} {6} + +do_execsql_test multiply-agg-int-agg-float { + SELECT sum(2) * sum(3.5) +} {7.0} + +do_execsql_test multiply-agg-float-agg-int { + SELECT sum(2.5) * sum(3) +} {7.5} + do_execsql_test divide-int { SELECT 10 / 2 } {5} @@ -79,6 +115,17 @@ do_execsql_test divide-null { SELECT null / null } {} +do_execsql_test divide-agg-int-agg-int { + SELECT sum(4) / sum(2) +} {2} + +do_execsql_test divide-agg-int-agg-float { + SELECT sum(4) / sum(2.0) +} {2.0} + +do_execsql_test divide-agg-float-agg-int { + SELECT sum(4.0) / sum(2) +} {2.0} do_execsql_test add-agg-int { diff --git a/testing/orderby.test b/testing/orderby.test index a973ed114..5155efcee 100755 --- a/testing/orderby.test +++ b/testing/orderby.test @@ -115,4 +115,25 @@ Dennis|Ward|1 Whitney|Walker|1 Robert|Villanueva|1 Cynthia|Thomas|1 -Brandon|Tate|1} \ No newline at end of file +Brandon|Tate|1} + +do_execsql_test order-by-case-insensitive-aggregate { + select u.first_name, sum(u.age) from users u group by u.first_name order by SUM(u.aGe) desc limit 10; +} {Michael|11204 +David|8758 +Robert|8109 +Jennifer|7700 +John|7299 +Christopher|6397 +James|5921 +Joseph|5711 +Brian|5059 +William|5047} + +do_execsql_test order-by-agg-not-mentioned-in-select { + select u.first_name, length(group_concat(u.last_name)) from users u group by u.first_name order by max(u.email) desc limit 5; +} {Louis|65 +Carolyn|118 +Katelyn|40 +Erik|88 +Collin|15} \ No newline at end of file diff --git a/vendored/sqlite3-parser/src/parser/ast/fmt.rs b/vendored/sqlite3-parser/src/parser/ast/fmt.rs index 7ee2d1af4..80f87eefb 100644 --- a/vendored/sqlite3-parser/src/parser/ast/fmt.rs +++ b/vendored/sqlite3-parser/src/parser/ast/fmt.rs @@ -637,6 +637,7 @@ impl ToTokens for Expr { Ok(()) } Self::Id(id) => id.to_tokens(s), + Self::Column { .. } => Ok(()), Self::InList { lhs, not, rhs } => { lhs.to_tokens(s)?; if *not { diff --git a/vendored/sqlite3-parser/src/parser/ast/mod.rs b/vendored/sqlite3-parser/src/parser/ast/mod.rs index e3b9f86fa..690f5e71c 100644 --- a/vendored/sqlite3-parser/src/parser/ast/mod.rs +++ b/vendored/sqlite3-parser/src/parser/ast/mod.rs @@ -327,6 +327,17 @@ pub enum Expr { }, /// Identifier Id(Id), + /// Column + Column { + /// the x in `x.y.z`. index of the db in catalog. + database: Option, + /// the y in `x.y.z`. index of the table in catalog. + table: usize, + /// the z in `x.y.z`. index of the column in the table. + column: usize, + /// is the column a rowid alias + is_rowid_alias: bool, + }, /// `IN` InList { /// expression