diff --git a/core/lib.rs b/core/lib.rs index b7344c260..bc97a7c7b 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -235,8 +235,8 @@ impl Connection { Cmd::ExplainQueryPlan(stmt) => { match stmt { ast::Stmt::Select(select) => { - let plan = prepare_select_plan(&self.schema.borrow(), select)?; - let (plan, _) = optimize_plan(plan)?; + let plan = prepare_select_plan(&*self.schema.borrow(), select)?; + let plan = optimize_plan(plan)?; println!("{}", plan); } _ => todo!(), diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 5f3402389..fe23832db 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -17,9 +17,8 @@ use super::expr::{ translate_aggregation, translate_condition_expr, translate_expr, translate_table_columns, ConditionMetadata, }; -use super::optimizer::ExpressionResultCache; -use super::plan::{BTreeTableReference, Plan}; -use super::plan::{Operator, ProjectionColumn}; +use super::plan::{Aggregate, BTreeTableReference, Direction, Plan}; +use super::plan::{ResultSetColumn, SourceOperator}; /** * The Emitter trait is used to emit bytecode instructions for a given operator in the query plan. @@ -27,28 +26,28 @@ use super::plan::{Operator, ProjectionColumn}; * - step: perform a single step of the operator, emitting bytecode instructions as needed, and returning a result indicating whether the operator is ready to emit a result row */ -pub trait Emitter { - fn step( - &mut self, - pb: &mut ProgramBuilder, - m: &mut Metadata, - referenced_tables: &[BTreeTableReference], - ) -> Result; - fn result_columns( - &self, - program: &mut ProgramBuilder, - referenced_tables: &[BTreeTableReference], - metadata: &mut Metadata, - cursor_override: Option<&SortCursorOverride>, - ) -> Result; - fn result_row( - &mut self, - program: &mut ProgramBuilder, - referenced_tables: &[BTreeTableReference], - metadata: &mut Metadata, - cursor_override: Option<&SortCursorOverride>, - ) -> Result<()>; -} +// pub trait Emitter { +// fn step( +// &mut self, +// pb: &mut ProgramBuilder, +// m: &mut Metadata, +// referenced_tables: &[BTreeTableReference], +// ) -> Result; +// fn result_columns( +// &self, +// program: &mut ProgramBuilder, +// referenced_tables: &[BTreeTableReference], +// metadata: &mut Metadata, +// cursor_override: Option<&SortCursorOverride>, +// ) -> Result; +// fn result_row( +// &mut self, +// program: &mut ProgramBuilder, +// referenced_tables: &[BTreeTableReference], +// metadata: &mut Metadata, +// cursor_override: Option<&SortCursorOverride>, +// ) -> Result<()>; +// } #[derive(Debug)] pub struct LeftJoinMetadata { @@ -136,1552 +135,1493 @@ pub struct Metadata { sorts: HashMap, // mapping between Join operator id and associated metadata (for left joins only) left_joins: HashMap, - expr_result_cache: ExpressionResultCache, + // register holding the start of the result set + result_set_register_start: usize, } -/// Emitters return one of three possible results from the step() method: -/// - Continue: the operator is not yet ready to emit a result row -/// - ReadyToEmit: the operator is ready to emit a result row -/// - Done: the operator has completed execution -/// For example, a Scan operator will return Continue until it has opened a cursor, rewound it and applied any predicates. -/// At that point, it will return ReadyToEmit. -/// Finally, when the Scan operator has emitted a Next instruction, it will return Done. -/// -/// Parent operators are free to make decisions based on the result a child operator's step() method. -/// -/// When the root operator of a Plan returns ReadyToEmit, a ResultRow will always be emitted. -/// When the root operator returns Done, the bytecode plan is complete. -#[derive(Debug, PartialEq)] -pub enum OpStepResult { - Continue, - ReadyToEmit, - Done, -} - -impl Emitter for Operator { - fn step( - &mut self, - program: &mut ProgramBuilder, - m: &mut Metadata, - referenced_tables: &[BTreeTableReference], - ) -> Result { - let current_operator_column_count = self.column_count(referenced_tables); - match self { - Operator::Scan { - table_reference, - id, - step, - predicates, - iter_dir, - } => { - *step += 1; - const SCAN_OPEN_READ: usize = 1; - const SCAN_BODY: usize = 2; - const SCAN_NEXT: usize = 3; - let reverse = iter_dir - .as_ref() - .is_some_and(|iter_dir| *iter_dir == IterationDirection::Backwards); - match *step { - SCAN_OPEN_READ => { - let cursor_id = program.alloc_cursor_id( - Some(table_reference.table_identifier.clone()), - Some(Table::BTree(table_reference.table.clone())), - ); - let root_page = table_reference.table.root_page; - let next_row_label = program.allocate_label(); - m.next_row_labels.insert(*id, next_row_label); - program.emit_insn(Insn::OpenReadAsync { - cursor_id, - root_page, - }); - program.emit_insn(Insn::OpenReadAwait); - - Ok(OpStepResult::Continue) - } - SCAN_BODY => { - let cursor_id = - program.resolve_cursor_id(&table_reference.table_identifier, None); - if reverse { - program.emit_insn(Insn::LastAsync { cursor_id }); - } else { - program.emit_insn(Insn::RewindAsync { cursor_id }); - } - let scan_loop_body_label = program.allocate_label(); - let halt_label = m.termination_label_stack.last().unwrap(); - program.emit_insn_with_label_dependency( - if reverse { - Insn::LastAwait { - cursor_id, - pc_if_empty: *halt_label, - } - } else { - Insn::RewindAwait { - cursor_id, - pc_if_empty: *halt_label, - } - }, - *halt_label, - ); - m.scan_loop_body_labels.push(scan_loop_body_label); - program.defer_label_resolution( - scan_loop_body_label, - program.offset() as usize, - ); - - let jump_label = m.next_row_labels.get(id).unwrap_or(halt_label); - if let Some(preds) = predicates { - for expr in preds { - let jump_target_when_true = program.allocate_label(); - let condition_metadata = ConditionMetadata { - jump_if_condition_is_true: false, - jump_target_when_true, - jump_target_when_false: *jump_label, - }; - translate_condition_expr( - program, - referenced_tables, - expr, - None, - condition_metadata, - )?; - program.resolve_label(jump_target_when_true, program.offset()); - } - } - - Ok(OpStepResult::ReadyToEmit) - } - SCAN_NEXT => { - let cursor_id = - program.resolve_cursor_id(&table_reference.table_identifier, None); - program - .resolve_label(*m.next_row_labels.get(id).unwrap(), program.offset()); - if reverse { - program.emit_insn(Insn::PrevAsync { cursor_id }); - } else { - program.emit_insn(Insn::NextAsync { cursor_id }); - } - let jump_label = m.scan_loop_body_labels.pop().unwrap(); - - if reverse { - program.emit_insn_with_label_dependency( - Insn::PrevAwait { - cursor_id, - pc_if_next: jump_label, - }, - jump_label, - ); - } else { - program.emit_insn_with_label_dependency( - Insn::NextAwait { - cursor_id, - pc_if_next: jump_label, - }, - jump_label, - ); - } - Ok(OpStepResult::Done) - } - _ => Ok(OpStepResult::Done), - } - } - Operator::Search { - table_reference, - search, - predicates, - step, - id, - .. - } => { - *step += 1; - const SEARCH_OPEN_READ: usize = 1; - const SEARCH_BODY: usize = 2; - const SEARCH_NEXT: usize = 3; - match *step { - SEARCH_OPEN_READ => { - let table_cursor_id = program.alloc_cursor_id( - Some(table_reference.table_identifier.clone()), - Some(Table::BTree(table_reference.table.clone())), - ); - - let next_row_label = program.allocate_label(); - - if !matches!(search, Search::PrimaryKeyEq { .. }) { - // Primary key equality search is handled with a SeekRowid instruction which does not loop, since it is a single row lookup. - m.next_row_labels.insert(*id, next_row_label); - } - - let scan_loop_body_label = program.allocate_label(); - m.scan_loop_body_labels.push(scan_loop_body_label); - program.emit_insn(Insn::OpenReadAsync { - cursor_id: table_cursor_id, - root_page: table_reference.table.root_page, - }); - program.emit_insn(Insn::OpenReadAwait); - - if let Search::IndexSearch { index, .. } = search { - let index_cursor_id = program.alloc_cursor_id( - Some(index.name.clone()), - Some(Table::Index(index.clone())), - ); - program.emit_insn(Insn::OpenReadAsync { - cursor_id: index_cursor_id, - root_page: index.root_page, - }); - program.emit_insn(Insn::OpenReadAwait); - } - Ok(OpStepResult::Continue) - } - SEARCH_BODY => { - let table_cursor_id = - program.resolve_cursor_id(&table_reference.table_identifier, None); - - // Open the loop for the index search. - // Primary key equality search is handled with a SeekRowid instruction which does not loop, since it is a single row lookup. - if !matches!(search, Search::PrimaryKeyEq { .. }) { - let index_cursor_id = if let Search::IndexSearch { index, .. } = search - { - Some(program.resolve_cursor_id(&index.name, None)) - } else { - None - }; - let scan_loop_body_label = *m.scan_loop_body_labels.last().unwrap(); - let cmp_reg = program.alloc_register(); - let (cmp_expr, cmp_op) = match search { - Search::IndexSearch { - cmp_expr, cmp_op, .. - } => (cmp_expr, cmp_op), - Search::PrimaryKeySearch { cmp_expr, cmp_op } => (cmp_expr, cmp_op), - Search::PrimaryKeyEq { .. } => unreachable!(), - }; - // TODO this only handles ascending indexes - match cmp_op { - ast::Operator::Equals - | ast::Operator::Greater - | ast::Operator::GreaterEquals => { - translate_expr( - program, - Some(referenced_tables), - cmp_expr, - cmp_reg, - None, - None, - )?; - } - ast::Operator::Less | ast::Operator::LessEquals => { - program.emit_insn(Insn::Null { - dest: cmp_reg, - dest_end: None, - }); - } - _ => unreachable!(), - } - program.emit_insn_with_label_dependency( - match cmp_op { - ast::Operator::Equals | ast::Operator::GreaterEquals => { - Insn::SeekGE { - is_index: index_cursor_id.is_some(), - cursor_id: index_cursor_id.unwrap_or(table_cursor_id), - start_reg: cmp_reg, - num_regs: 1, - target_pc: *m.termination_label_stack.last().unwrap(), - } - } - ast::Operator::Greater - | ast::Operator::Less - | ast::Operator::LessEquals => Insn::SeekGT { - is_index: index_cursor_id.is_some(), - cursor_id: index_cursor_id.unwrap_or(table_cursor_id), - start_reg: cmp_reg, - num_regs: 1, - target_pc: *m.termination_label_stack.last().unwrap(), - }, - _ => unreachable!(), - }, - *m.termination_label_stack.last().unwrap(), - ); - if *cmp_op == ast::Operator::Less - || *cmp_op == ast::Operator::LessEquals - { - translate_expr( - program, - Some(referenced_tables), - cmp_expr, - cmp_reg, - None, - None, - )?; - } - - program.defer_label_resolution( - scan_loop_body_label, - program.offset() as usize, - ); - // TODO: We are currently only handling ascending indexes. - // For conditions like index_key > 10, we have already seeked to the first key greater than 10, and can just scan forward. - // For conditions like index_key < 10, we are at the beginning of the index, and will scan forward and emit IdxGE(10) with a conditional jump to the end. - // For conditions like index_key = 10, we have already seeked to the first key greater than or equal to 10, and can just scan forward and emit IdxGT(10) with a conditional jump to the end. - // For conditions like index_key >= 10, we have already seeked to the first key greater than or equal to 10, and can just scan forward. - // For conditions like index_key <= 10, we are at the beginning of the index, and will scan forward and emit IdxGT(10) with a conditional jump to the end. - // For conditions like index_key != 10, TODO. probably the optimal way is not to use an index at all. - // - // For primary key searches we emit RowId and then compare it to the seek value. - - let abort_jump_target = *m - .next_row_labels - .get(id) - .unwrap_or(m.termination_label_stack.last().unwrap()); - match cmp_op { - ast::Operator::Equals | ast::Operator::LessEquals => { - if let Some(index_cursor_id) = index_cursor_id { - program.emit_insn_with_label_dependency( - Insn::IdxGT { - cursor_id: index_cursor_id, - start_reg: cmp_reg, - num_regs: 1, - target_pc: abort_jump_target, - }, - abort_jump_target, - ); - } else { - let rowid_reg = program.alloc_register(); - program.emit_insn(Insn::RowId { - cursor_id: table_cursor_id, - dest: rowid_reg, - }); - program.emit_insn_with_label_dependency( - Insn::Gt { - lhs: rowid_reg, - rhs: cmp_reg, - target_pc: abort_jump_target, - }, - abort_jump_target, - ); - } - } - ast::Operator::Less => { - if let Some(index_cursor_id) = index_cursor_id { - program.emit_insn_with_label_dependency( - Insn::IdxGE { - cursor_id: index_cursor_id, - start_reg: cmp_reg, - num_regs: 1, - target_pc: abort_jump_target, - }, - abort_jump_target, - ); - } else { - let rowid_reg = program.alloc_register(); - program.emit_insn(Insn::RowId { - cursor_id: table_cursor_id, - dest: rowid_reg, - }); - program.emit_insn_with_label_dependency( - Insn::Ge { - lhs: rowid_reg, - rhs: cmp_reg, - target_pc: abort_jump_target, - }, - abort_jump_target, - ); - } - } - _ => {} - } - - if let Some(index_cursor_id) = index_cursor_id { - program.emit_insn(Insn::DeferredSeek { - index_cursor_id, - table_cursor_id, - }); - } - } - - let jump_label = m - .next_row_labels - .get(id) - .unwrap_or(m.termination_label_stack.last().unwrap()); - - if let Search::PrimaryKeyEq { cmp_expr } = search { - let src_reg = program.alloc_register(); - translate_expr( - program, - Some(referenced_tables), - cmp_expr, - src_reg, - None, - None, - )?; - program.emit_insn_with_label_dependency( - Insn::SeekRowid { - cursor_id: table_cursor_id, - src_reg, - target_pc: *jump_label, - }, - *jump_label, - ); - } - if let Some(predicates) = predicates { - for predicate in predicates.iter() { - let jump_target_when_true = program.allocate_label(); - let condition_metadata = ConditionMetadata { - jump_if_condition_is_true: false, - jump_target_when_true, - jump_target_when_false: *jump_label, - }; - translate_condition_expr( - program, - referenced_tables, - predicate, - None, - condition_metadata, - )?; - program.resolve_label(jump_target_when_true, program.offset()); - } - } - - Ok(OpStepResult::ReadyToEmit) - } - SEARCH_NEXT => { - if matches!(search, Search::PrimaryKeyEq { .. }) { - // Primary key equality search is handled with a SeekRowid instruction which does not loop, so there is no need to emit a NextAsync instruction. - return Ok(OpStepResult::Done); - } - let cursor_id = match search { - Search::IndexSearch { index, .. } => { - program.resolve_cursor_id(&index.name, None) - } - Search::PrimaryKeySearch { .. } => { - program.resolve_cursor_id(&table_reference.table_identifier, None) - } - Search::PrimaryKeyEq { .. } => unreachable!(), - }; - program - .resolve_label(*m.next_row_labels.get(id).unwrap(), program.offset()); - program.emit_insn(Insn::NextAsync { cursor_id }); - let jump_label = m.scan_loop_body_labels.pop().unwrap(); - program.emit_insn_with_label_dependency( - Insn::NextAwait { - cursor_id, - pc_if_next: jump_label, - }, - jump_label, - ); - Ok(OpStepResult::Done) - } - _ => Ok(OpStepResult::Done), - } - } - Operator::Join { - left, - right, - outer, - predicates, - step, - id, - .. - } => { - *step += 1; - const JOIN_INIT: usize = 1; - const JOIN_DO_JOIN: usize = 2; - const JOIN_END: usize = 3; - match *step { - JOIN_INIT => { - if *outer { - let lj_metadata = LeftJoinMetadata { - match_flag_register: program.alloc_register(), - set_match_flag_true_label: program.allocate_label(), - check_match_flag_label: program.allocate_label(), - on_match_jump_to_label: program.allocate_label(), - }; - m.left_joins.insert(*id, lj_metadata); - } - left.step(program, m, referenced_tables)?; - right.step(program, m, referenced_tables)?; - - Ok(OpStepResult::Continue) - } - JOIN_DO_JOIN => { - left.step(program, m, referenced_tables)?; - - let mut jump_target_when_false = *m - .next_row_labels - .get(&right.id()) - .or(m.next_row_labels.get(&left.id())) - .unwrap_or(m.termination_label_stack.last().unwrap()); - - if *outer { - let lj_meta = m.left_joins.get(id).unwrap(); - program.emit_insn(Insn::Integer { - value: 0, - dest: lj_meta.match_flag_register, - }); - jump_target_when_false = lj_meta.check_match_flag_label; - } - m.next_row_labels.insert(right.id(), jump_target_when_false); - - right.step(program, m, referenced_tables)?; - - if let Some(predicates) = predicates { - let jump_target_when_true = program.allocate_label(); - let condition_metadata = ConditionMetadata { - jump_if_condition_is_true: false, - jump_target_when_true, - jump_target_when_false, - }; - for predicate in predicates.iter() { - translate_condition_expr( - program, - referenced_tables, - predicate, - None, - condition_metadata, - )?; - } - program.resolve_label(jump_target_when_true, program.offset()); - } - - if *outer { - let lj_meta = m.left_joins.get(id).unwrap(); - program.defer_label_resolution( - lj_meta.set_match_flag_true_label, - program.offset() as usize, - ); - program.emit_insn(Insn::Integer { - value: 1, - dest: lj_meta.match_flag_register, - }); - } - - Ok(OpStepResult::ReadyToEmit) - } - JOIN_END => { - right.step(program, m, referenced_tables)?; - - if *outer { - let lj_meta = m.left_joins.get(id).unwrap(); - // If the left join match flag has been set to 1, we jump to the next row on the outer table (result row has been emitted already) - program.resolve_label(lj_meta.check_match_flag_label, program.offset()); - program.emit_insn_with_label_dependency( - Insn::IfPos { - reg: lj_meta.match_flag_register, - target_pc: lj_meta.on_match_jump_to_label, - decrement_by: 0, - }, - lj_meta.on_match_jump_to_label, - ); - // If not, we set the right table cursor's "pseudo null bit" on, which means any Insn::Column will return NULL - let right_cursor_id = match right.as_ref() { - Operator::Scan { - table_reference, .. - } => program - .resolve_cursor_id(&table_reference.table_identifier, None), - Operator::Search { - table_reference, .. - } => program - .resolve_cursor_id(&table_reference.table_identifier, None), - _ => unreachable!(), - }; - program.emit_insn(Insn::NullRow { - cursor_id: right_cursor_id, - }); - // Jump to setting the left join match flag to 1 again, but this time the right table cursor will set everything to null - program.emit_insn_with_label_dependency( - Insn::Goto { - target_pc: lj_meta.set_match_flag_true_label, - }, - lj_meta.set_match_flag_true_label, - ); - } - let next_row_label = if *outer { - m.left_joins.get(id).unwrap().on_match_jump_to_label - } else { - *m.next_row_labels.get(&right.id()).unwrap() - }; - // This points to the NextAsync instruction of the left table - program.resolve_label(next_row_label, program.offset()); - left.step(program, m, referenced_tables)?; - - Ok(OpStepResult::Done) - } - _ => Ok(OpStepResult::Done), - } - } - Operator::Aggregate { - id, - source, - aggregates, - group_by, - step, - .. - } => { - *step += 1; - - // Group by aggregation eg. SELECT a, b, sum(c) FROM t GROUP BY a, b - if let Some(group_by) = group_by { - const GROUP_BY_INIT: usize = 1; - const GROUP_BY_INSERT_INTO_SORTER: usize = 2; - const GROUP_BY_SORT_AND_COMPARE: usize = 3; - const GROUP_BY_PREPARE_ROW: usize = 4; - const GROUP_BY_CLEAR_ACCUMULATOR_SUBROUTINE: usize = 5; - match *step { - GROUP_BY_INIT => { - let agg_final_label = program.allocate_label(); - m.termination_label_stack.push(agg_final_label); - let num_aggs = aggregates.len(); - - let sort_cursor = program.alloc_cursor_id(None, None); - - let abort_flag_register = program.alloc_register(); - let data_in_accumulator_indicator_register = program.alloc_register(); - let group_exprs_comparison_register = - program.alloc_registers(group_by.len()); - let group_exprs_accumulator_register = - program.alloc_registers(group_by.len()); - let agg_exprs_start_reg = program.alloc_registers(num_aggs); - m.aggregation_start_registers - .insert(*id, agg_exprs_start_reg); - let sorter_key_register = program.alloc_register(); - - let subroutine_accumulator_clear_label = program.allocate_label(); - let subroutine_accumulator_output_label = program.allocate_label(); - let sorter_data_label = program.allocate_label(); - let grouping_done_label = program.allocate_label(); - - let mut order = Vec::new(); - const ASCENDING: i64 = 0; - for _ in group_by.iter() { - order.push(OwnedValue::Integer(ASCENDING)); - } - program.emit_insn(Insn::SorterOpen { - cursor_id: sort_cursor, - columns: current_operator_column_count, - order: OwnedRecord::new(order), - }); - - program.add_comment(program.offset(), "clear group by abort flag"); - program.emit_insn(Insn::Integer { - value: 0, - dest: abort_flag_register, - }); - - program.add_comment( - program.offset(), - "initialize group by comparison registers to NULL", - ); - program.emit_insn(Insn::Null { - dest: group_exprs_comparison_register, - dest_end: if group_by.len() > 1 { - Some(group_exprs_comparison_register + group_by.len() - 1) - } else { - None - }, - }); - - program.add_comment( - program.offset(), - "go to clear accumulator subroutine", - ); - - let subroutine_accumulator_clear_return_offset_register = - program.alloc_register(); - program.emit_insn_with_label_dependency( - Insn::Gosub { - target_pc: subroutine_accumulator_clear_label, - return_reg: subroutine_accumulator_clear_return_offset_register, - }, - subroutine_accumulator_clear_label, - ); - - m.group_bys.insert( - *id, - GroupByMetadata { - sort_cursor, - subroutine_accumulator_clear_label, - subroutine_accumulator_clear_return_offset_register, - subroutine_accumulator_output_label, - subroutine_accumulator_output_return_offset_register: program - .alloc_register(), - accumulator_indicator_set_true_label: program.allocate_label(), - sorter_data_label, - grouping_done_label, - abort_flag_register, - data_in_accumulator_indicator_register, - group_exprs_accumulator_register, - group_exprs_comparison_register, - sorter_key_register, - }, - ); - - loop { - match source.step(program, m, referenced_tables)? { - OpStepResult::Continue => continue, - OpStepResult::ReadyToEmit => { - return Ok(OpStepResult::Continue); - } - OpStepResult::Done => { - return Ok(OpStepResult::Done); - } - } - } - } - GROUP_BY_INSERT_INTO_SORTER => { - let sort_keys_count = group_by.len(); - let start_reg = program.alloc_registers(current_operator_column_count); - for (i, expr) in group_by.iter().enumerate() { - let key_reg = start_reg + i; - translate_expr( - program, - Some(referenced_tables), - expr, - key_reg, - None, - None, - )?; - } - for (i, agg) in aggregates.iter().enumerate() { - // TODO it's a hack to assume aggregate functions have exactly one argument. - // Counterpoint e.g. GROUP_CONCAT(expr, separator). - // - // Here we are collecting scalars for the group by sorter, which will include - // both the group by expressions and the aggregate arguments. - // e.g. in `select u.first_name, sum(u.age) from users group by u.first_name` - // the sorter will have two scalars: u.first_name and u.age. - // these are then sorted by u.first_name, and for each u.first_name, we sum the u.age. - // the actual aggregation is done later in GROUP_BY_SORT_AND_COMPARE below. - // - // This is why we take the first argument of each aggregate function currently. - // It's mostly an artifact of the current architecture being a bit poor; we should recognize - // which scalars are dependencies of aggregate functions and explicitly collect those. - let expr = &agg.args[0]; - let agg_reg = start_reg + sort_keys_count + i; - translate_expr( - program, - Some(referenced_tables), - expr, - agg_reg, - None, - None, - )?; - } - - let group_by_metadata = m.group_bys.get(id).unwrap(); - - program.emit_insn(Insn::MakeRecord { - start_reg, - count: current_operator_column_count, - dest_reg: group_by_metadata.sorter_key_register, - }); - - let group_by_metadata = m.group_bys.get(id).unwrap(); - program.emit_insn(Insn::SorterInsert { - cursor_id: group_by_metadata.sort_cursor, - record_reg: group_by_metadata.sorter_key_register, - }); - - return Ok(OpStepResult::Continue); - } - #[allow(clippy::never_loop)] - GROUP_BY_SORT_AND_COMPARE => { - loop { - match source.step(program, m, referenced_tables)? { - OpStepResult::Done => { - break; - } - _ => unreachable!(), - } - } - - let group_by_metadata = m.group_bys.get_mut(id).unwrap(); - - let GroupByMetadata { - group_exprs_comparison_register: comparison_register, - subroutine_accumulator_output_return_offset_register, - subroutine_accumulator_output_label, - subroutine_accumulator_clear_return_offset_register, - subroutine_accumulator_clear_label, - data_in_accumulator_indicator_register, - accumulator_indicator_set_true_label, - group_exprs_accumulator_register: group_exprs_start_register, - abort_flag_register, - sorter_key_register, - .. - } = *group_by_metadata; - let halt_label = *m.termination_label_stack.first().unwrap(); - - let mut column_names = - Vec::with_capacity(current_operator_column_count); - for expr in group_by - .iter() - .chain(aggregates.iter().map(|agg| &agg.args[0])) - // FIXME: just blindly taking the first arg is a hack - { - // Sorter column names for group by are now just determined by stringifying the expression, since the group by - // columns and aggregations can be practically anything. - // FIXME: either come up with something more robust, or make this something like expr.to_canonical_string() so that we can handle - // things like `count(1)` and `COUNT(1)` the same way - column_names.push(expr.to_string()); - } - let pseudo_columns = column_names - .iter() - .map(|name| Column { - name: name.clone(), - primary_key: false, - ty: crate::schema::Type::Null, - }) - .collect::>(); - - let pseudo_table = Rc::new(PseudoTable { - columns: pseudo_columns, - }); - - let pseudo_cursor = program - .alloc_cursor_id(None, Some(Table::Pseudo(pseudo_table.clone()))); - - program.emit_insn(Insn::OpenPseudo { - cursor_id: pseudo_cursor, - content_reg: sorter_key_register, - num_fields: current_operator_column_count, - }); - - let group_by_metadata = m.group_bys.get(id).unwrap(); - program.emit_insn_with_label_dependency( - Insn::SorterSort { - cursor_id: group_by_metadata.sort_cursor, - pc_if_empty: group_by_metadata.grouping_done_label, - }, - group_by_metadata.grouping_done_label, - ); - - program.defer_label_resolution( - group_by_metadata.sorter_data_label, - program.offset() as usize, - ); - program.emit_insn(Insn::SorterData { - cursor_id: group_by_metadata.sort_cursor, - dest_reg: group_by_metadata.sorter_key_register, - pseudo_cursor, - }); - - let groups_start_reg = program.alloc_registers(group_by.len()); - for (i, expr) in group_by.iter().enumerate() { - let sorter_column_index = - resolve_ident_pseudo_table(&expr.to_string(), &pseudo_table)?; - let group_reg = groups_start_reg + i; - program.emit_insn(Insn::Column { - cursor_id: pseudo_cursor, - column: sorter_column_index, - dest: group_reg, - }); - } - - program.emit_insn(Insn::Compare { - start_reg_a: comparison_register, - start_reg_b: groups_start_reg, - count: group_by.len(), - }); - - let agg_step_label = program.allocate_label(); - - program.add_comment( - program.offset(), - "start new group if comparison is not equal", - ); - program.emit_insn_with_label_dependency( - Insn::Jump { - target_pc_lt: program.offset() + 1, - target_pc_eq: agg_step_label, - target_pc_gt: program.offset() + 1, - }, - agg_step_label, - ); - - program.emit_insn(Insn::Move { - source_reg: groups_start_reg, - dest_reg: comparison_register, - count: group_by.len(), - }); - - program.add_comment( - program.offset(), - "check if ended group had data, and output if so", - ); - program.emit_insn_with_label_dependency( - Insn::Gosub { - target_pc: subroutine_accumulator_output_label, - return_reg: - subroutine_accumulator_output_return_offset_register, - }, - subroutine_accumulator_output_label, - ); - - program.add_comment(program.offset(), "check abort flag"); - program.emit_insn_with_label_dependency( - Insn::IfPos { - reg: abort_flag_register, - target_pc: halt_label, - decrement_by: 0, - }, - m.termination_label_stack[0], - ); - - program - .add_comment(program.offset(), "goto clear accumulator subroutine"); - program.emit_insn_with_label_dependency( - Insn::Gosub { - target_pc: subroutine_accumulator_clear_label, - return_reg: subroutine_accumulator_clear_return_offset_register, - }, - subroutine_accumulator_clear_label, - ); - - program.resolve_label(agg_step_label, program.offset()); - let start_reg = m.aggregation_start_registers.get(id).unwrap(); - for (i, agg) in aggregates.iter().enumerate() { - let agg_result_reg = start_reg + i; - translate_aggregation( - program, - referenced_tables, - agg, - agg_result_reg, - Some(pseudo_cursor), - )?; - } - - program.add_comment( - program.offset(), - "don't emit group columns if continuing existing group", - ); - program.emit_insn_with_label_dependency( - Insn::If { - target_pc: accumulator_indicator_set_true_label, - reg: data_in_accumulator_indicator_register, - null_reg: 0, // unused in this case - }, - accumulator_indicator_set_true_label, - ); - - for (i, expr) in group_by.iter().enumerate() { - let key_reg = group_exprs_start_register + i; - let sorter_column_index = - resolve_ident_pseudo_table(&expr.to_string(), &pseudo_table)?; - program.emit_insn(Insn::Column { - cursor_id: pseudo_cursor, - column: sorter_column_index, - dest: key_reg, - }); - } - - program.resolve_label( - accumulator_indicator_set_true_label, - program.offset(), - ); - program.add_comment(program.offset(), "indicate data in accumulator"); - program.emit_insn(Insn::Integer { - value: 1, - dest: data_in_accumulator_indicator_register, - }); - - return Ok(OpStepResult::Continue); - } - GROUP_BY_PREPARE_ROW => { - let group_by_metadata = m.group_bys.get(id).unwrap(); - program.emit_insn_with_label_dependency( - Insn::SorterNext { - cursor_id: group_by_metadata.sort_cursor, - pc_if_next: group_by_metadata.sorter_data_label, - }, - group_by_metadata.sorter_data_label, - ); - - program.resolve_label( - group_by_metadata.grouping_done_label, - program.offset(), - ); - - program.add_comment(program.offset(), "emit row for final group"); - program.emit_insn_with_label_dependency( - Insn::Gosub { - target_pc: group_by_metadata - .subroutine_accumulator_output_label, - return_reg: group_by_metadata - .subroutine_accumulator_output_return_offset_register, - }, - group_by_metadata.subroutine_accumulator_output_label, - ); - - program.add_comment(program.offset(), "group by finished"); - let termination_label = - m.termination_label_stack[m.termination_label_stack.len() - 2]; - program.emit_insn_with_label_dependency( - Insn::Goto { - target_pc: termination_label, - }, - termination_label, - ); - program.emit_insn(Insn::Integer { - value: 1, - dest: group_by_metadata.abort_flag_register, - }); - program.emit_insn(Insn::Return { - return_reg: group_by_metadata - .subroutine_accumulator_output_return_offset_register, - }); - - program.resolve_label( - group_by_metadata.subroutine_accumulator_output_label, - program.offset(), - ); - - program.add_comment( - program.offset(), - "output group by row subroutine start", - ); - let termination_label = *m.termination_label_stack.last().unwrap(); - program.emit_insn_with_label_dependency( - Insn::IfPos { - reg: group_by_metadata.data_in_accumulator_indicator_register, - target_pc: termination_label, - decrement_by: 0, - }, - termination_label, - ); - program.emit_insn(Insn::Return { - return_reg: group_by_metadata - .subroutine_accumulator_output_return_offset_register, - }); - - return Ok(OpStepResult::ReadyToEmit); - } - GROUP_BY_CLEAR_ACCUMULATOR_SUBROUTINE => { - let group_by_metadata = m.group_bys.get(id).unwrap(); - program.emit_insn(Insn::Return { - return_reg: group_by_metadata - .subroutine_accumulator_output_return_offset_register, - }); - - program.add_comment( - program.offset(), - "clear accumulator subroutine start", - ); - program.resolve_label( - group_by_metadata.subroutine_accumulator_clear_label, - program.offset(), - ); - let start_reg = group_by_metadata.group_exprs_accumulator_register; - program.emit_insn(Insn::Null { - dest: start_reg, - dest_end: Some(start_reg + group_by.len() + aggregates.len() - 1), - }); - - program.emit_insn(Insn::Integer { - value: 0, - dest: group_by_metadata.data_in_accumulator_indicator_register, - }); - program.emit_insn(Insn::Return { - return_reg: group_by_metadata - .subroutine_accumulator_clear_return_offset_register, - }); - } - _ => { - return Ok(OpStepResult::Done); - } - } - } - - // Non-grouped aggregation e.g. SELECT COUNT(*) FROM t - - const AGGREGATE_INIT: usize = 1; - const AGGREGATE_WAIT_UNTIL_SOURCE_READY: usize = 2; - match *step { - AGGREGATE_INIT => { - let agg_final_label = program.allocate_label(); - m.termination_label_stack.push(agg_final_label); - let num_aggs = aggregates.len(); - let start_reg = program.alloc_registers(num_aggs); - m.aggregation_start_registers.insert(*id, start_reg); - - Ok(OpStepResult::Continue) - } - AGGREGATE_WAIT_UNTIL_SOURCE_READY => loop { - match source.step(program, m, referenced_tables)? { - OpStepResult::Continue => {} - OpStepResult::ReadyToEmit => { - let start_reg = m.aggregation_start_registers.get(id).unwrap(); - for (i, agg) in aggregates.iter().enumerate() { - let agg_result_reg = start_reg + i; - translate_aggregation( - program, - referenced_tables, - agg, - agg_result_reg, - None, - )?; - } - } - OpStepResult::Done => { - return Ok(OpStepResult::ReadyToEmit); - } - } - }, - _ => Ok(OpStepResult::Done), - } - } - Operator::Filter { .. } => unreachable!("predicates have been pushed down"), - Operator::Limit { source, step, .. } => { - *step += 1; - loop { - match source.step(program, m, referenced_tables)? { - OpStepResult::Continue => continue, - OpStepResult::ReadyToEmit => { - return Ok(OpStepResult::ReadyToEmit); - } - OpStepResult::Done => return Ok(OpStepResult::Done), - } - } - } - Operator::Order { - id, - source, - key, - step, - } => { - *step += 1; - const ORDER_INIT: usize = 1; - const ORDER_INSERT_INTO_SORTER: usize = 2; - const ORDER_SORT_AND_OPEN_LOOP: usize = 3; - const ORDER_NEXT: usize = 4; - match *step { - ORDER_INIT => { - m.termination_label_stack.push(program.allocate_label()); - let sort_cursor = program.alloc_cursor_id(None, None); - m.sorts.insert( - *id, - SortMetadata { - sort_cursor, - pseudo_table_cursor: usize::MAX, // will be set later - sorter_data_register: program.alloc_register(), - sorter_data_label: program.allocate_label(), - done_label: program.allocate_label(), - }, - ); - let mut order = Vec::new(); - for (_, direction) in key.iter() { - order.push(OwnedValue::Integer(*direction as i64)); - } - program.emit_insn(Insn::SorterOpen { - cursor_id: sort_cursor, - columns: key.len(), - order: OwnedRecord::new(order), - }); - - loop { - match source.step(program, m, referenced_tables)? { - OpStepResult::Continue => continue, - OpStepResult::ReadyToEmit => { - return Ok(OpStepResult::Continue); - } - OpStepResult::Done => { - return Ok(OpStepResult::Done); - } - } - } - } - ORDER_INSERT_INTO_SORTER => { - let sort_keys_count = key.len(); - let source_cols_count = source.column_count(referenced_tables); - let start_reg = program.alloc_registers(sort_keys_count); - source.result_columns(program, referenced_tables, m, None)?; - - for (i, (expr, _)) in key.iter().enumerate() { - let key_reg = start_reg + i; - translate_expr( - program, - Some(referenced_tables), - expr, - key_reg, - None, - m.expr_result_cache - .get_cached_result_registers(*id, i) - .as_ref(), - )?; - } - - let sort_metadata = m.sorts.get_mut(id).unwrap(); - program.emit_insn(Insn::MakeRecord { - start_reg, - count: sort_keys_count + source_cols_count, - dest_reg: sort_metadata.sorter_data_register, - }); - - program.emit_insn(Insn::SorterInsert { - cursor_id: sort_metadata.sort_cursor, - record_reg: sort_metadata.sorter_data_register, - }); - - Ok(OpStepResult::Continue) - } - #[allow(clippy::never_loop)] - ORDER_SORT_AND_OPEN_LOOP => { - loop { - match source.step(program, m, referenced_tables)? { - OpStepResult::Done => { - break; - } - _ => unreachable!(), - } - } - program.resolve_label( - m.termination_label_stack.pop().unwrap(), - program.offset(), - ); - let column_names = source.column_names(); - let mut pseudo_columns = vec![]; - for (i, _) in key.iter().enumerate() { - pseudo_columns.push(Column { - name: format!("sort_key_{}", i), - primary_key: false, - ty: crate::schema::Type::Null, - }); - } - for name in column_names { - pseudo_columns.push(Column { - name: name.clone(), - primary_key: false, - ty: crate::schema::Type::Null, - }); - } - - let num_fields = pseudo_columns.len(); - - let pseudo_cursor = program.alloc_cursor_id( - None, - Some(Table::Pseudo(Rc::new(PseudoTable { - columns: pseudo_columns, - }))), - ); - let sort_metadata = m.sorts.get(id).unwrap(); - - program.emit_insn(Insn::OpenPseudo { - cursor_id: pseudo_cursor, - content_reg: sort_metadata.sorter_data_register, - num_fields, - }); - - program.emit_insn_with_label_dependency( - Insn::SorterSort { - cursor_id: sort_metadata.sort_cursor, - pc_if_empty: sort_metadata.done_label, - }, - sort_metadata.done_label, - ); - - program.defer_label_resolution( - sort_metadata.sorter_data_label, - program.offset() as usize, - ); - program.emit_insn(Insn::SorterData { - cursor_id: sort_metadata.sort_cursor, - dest_reg: sort_metadata.sorter_data_register, - pseudo_cursor, - }); - - let sort_metadata = m.sorts.get_mut(id).unwrap(); - - sort_metadata.pseudo_table_cursor = pseudo_cursor; - - Ok(OpStepResult::ReadyToEmit) - } - ORDER_NEXT => { - let sort_metadata = m.sorts.get(id).unwrap(); - program.emit_insn_with_label_dependency( - Insn::SorterNext { - cursor_id: sort_metadata.sort_cursor, - pc_if_next: sort_metadata.sorter_data_label, - }, - sort_metadata.sorter_data_label, - ); - - program.resolve_label(sort_metadata.done_label, program.offset()); - - Ok(OpStepResult::Done) - } - _ => unreachable!(), - } - } - Operator::Projection { source, step, .. } => { - *step += 1; - const PROJECTION_WAIT_UNTIL_SOURCE_READY: usize = 1; - const PROJECTION_FINALIZE_SOURCE: usize = 2; - match *step { - PROJECTION_WAIT_UNTIL_SOURCE_READY => loop { - match source.step(program, m, referenced_tables)? { - OpStepResult::Continue => continue, - OpStepResult::ReadyToEmit | OpStepResult::Done => { - if matches!(**source, Operator::Aggregate { .. }) { - source.result_columns(program, referenced_tables, m, None)?; - } - return Ok(OpStepResult::ReadyToEmit); - } - } - }, - PROJECTION_FINALIZE_SOURCE => { - match source.step(program, m, referenced_tables)? { - OpStepResult::Done => Ok(OpStepResult::Done), - _ => unreachable!(), - } - } - _ => Ok(OpStepResult::Done), - } - } - Operator::Nothing => Ok(OpStepResult::Done), - } - } - fn result_columns( - &self, - program: &mut ProgramBuilder, - referenced_tables: &[BTreeTableReference], - m: &mut Metadata, - cursor_override: Option<&SortCursorOverride>, - ) -> Result { - let col_count = self.column_count(referenced_tables); - match self { - Operator::Scan { - table_reference, .. - } => { - let start_reg = program.alloc_registers(col_count); - let table = cursor_override - .map(|c| c.pseudo_table.clone()) - .unwrap_or_else(|| Table::BTree(table_reference.table.clone())); - let cursor_id = cursor_override.map(|c| c.cursor_id).unwrap_or_else(|| { - program.resolve_cursor_id(&table_reference.table_identifier, None) - }); - let start_column_offset = cursor_override.map(|c| c.sort_key_len).unwrap_or(0); - translate_table_columns(program, cursor_id, &table, start_column_offset, start_reg); - - Ok(start_reg) - } - Operator::Search { - table_reference, .. - } => { - let start_reg = program.alloc_registers(col_count); - let table = cursor_override - .map(|c| c.pseudo_table.clone()) - .unwrap_or_else(|| Table::BTree(table_reference.table.clone())); - let cursor_id = cursor_override.map(|c| c.cursor_id).unwrap_or_else(|| { - program.resolve_cursor_id(&table_reference.table_identifier, None) - }); - let start_column_offset = cursor_override.map(|c| c.sort_key_len).unwrap_or(0); - translate_table_columns(program, cursor_id, &table, start_column_offset, start_reg); - - Ok(start_reg) - } - Operator::Join { left, right, .. } => { - let left_start_reg = - left.result_columns(program, referenced_tables, m, cursor_override)?; - right.result_columns(program, referenced_tables, m, cursor_override)?; - - Ok(left_start_reg) - } - Operator::Aggregate { - id, - aggregates, - group_by, - .. - } => { - let agg_start_reg = m.aggregation_start_registers.get(id).unwrap(); - program.resolve_label(m.termination_label_stack.pop().unwrap(), program.offset()); - let mut result_column_idx = 0; - for (i, agg) in aggregates.iter().enumerate() { - let agg_result_reg = *agg_start_reg + i; - program.emit_insn(Insn::AggFinal { - register: agg_result_reg, - func: agg.func.clone(), - }); - m.expr_result_cache.cache_result_register( - *id, - result_column_idx, - agg_result_reg, - agg.original_expr.clone(), - ); - result_column_idx += 1; - } - - if let Some(group_by) = group_by { - let output_row_start_reg = - program.alloc_registers(aggregates.len() + group_by.len()); - let group_by_metadata = m.group_bys.get(id).unwrap(); - program.emit_insn(Insn::Copy { - src_reg: group_by_metadata.group_exprs_accumulator_register, - dst_reg: output_row_start_reg, - amount: group_by.len() - 1, - }); - for (i, source_expr) in group_by.iter().enumerate() { - m.expr_result_cache.cache_result_register( - *id, - result_column_idx + i, - output_row_start_reg + i, - source_expr.clone(), - ); - } - program.emit_insn(Insn::Copy { - src_reg: *agg_start_reg, - dst_reg: output_row_start_reg + group_by.len(), - amount: aggregates.len() - 1, - }); - - Ok(output_row_start_reg) - } else { - Ok(*agg_start_reg) - } - } - Operator::Filter { .. } => unreachable!("predicates have been pushed down"), - Operator::Limit { .. } => { - unimplemented!() - } - Operator::Order { id, key, .. } => { - let cursor_id = m.sorts.get(id).unwrap().pseudo_table_cursor; - let pseudo_table = program.resolve_cursor_to_table(cursor_id).unwrap(); - let start_column_offset = key.len(); - let column_count = pseudo_table.columns().len() - start_column_offset; - let start_reg = program.alloc_registers(column_count); - translate_table_columns( - program, - cursor_id, - &pseudo_table, - start_column_offset, - start_reg, - ); - - Ok(start_reg) - } - Operator::Projection { - expressions, id, .. - } => { - let expr_count = expressions - .iter() - .map(|e| e.column_count(referenced_tables)) - .sum(); - let start_reg = program.alloc_registers(expr_count); - let mut cur_reg = start_reg; - for expr in expressions { - match expr { - ProjectionColumn::Column(expr) => { - translate_expr( - program, - Some(referenced_tables), - expr, - cur_reg, - cursor_override.map(|c| c.cursor_id), - m.expr_result_cache - .get_cached_result_registers(*id, cur_reg - start_reg) - .as_ref(), - )?; - m.expr_result_cache.cache_result_register( - *id, - cur_reg - start_reg, - cur_reg, - expr.clone(), - ); - cur_reg += 1; - } - ProjectionColumn::Star => { - for table_reference in referenced_tables.iter() { - let table = cursor_override - .map(|c| c.pseudo_table.clone()) - .unwrap_or_else(|| Table::BTree(table_reference.table.clone())); - let cursor_id = - cursor_override.map(|c| c.cursor_id).unwrap_or_else(|| { - program.resolve_cursor_id( - &table_reference.table_identifier, - None, - ) - }); - let start_column_offset = - cursor_override.map(|c| c.sort_key_len).unwrap_or(0); - cur_reg = translate_table_columns( - program, - cursor_id, - &table, - start_column_offset, - cur_reg, - ); - } - } - ProjectionColumn::TableStar(table_reference) => { - let table_ref = referenced_tables - .iter() - .find(|t| t.table_identifier == table_reference.table_identifier) - .unwrap(); - - let table = cursor_override - .map(|c| c.pseudo_table.clone()) - .unwrap_or_else(|| Table::BTree(table_ref.table.clone())); - let cursor_id = - cursor_override.map(|c| c.cursor_id).unwrap_or_else(|| { - program - .resolve_cursor_id(&table_reference.table_identifier, None) - }); - let start_column_offset = - cursor_override.map(|c| c.sort_key_len).unwrap_or(0); - cur_reg = translate_table_columns( - program, - cursor_id, - &table, - start_column_offset, - cur_reg, - ); - } - } - } - - Ok(start_reg) - } - Operator::Nothing => unimplemented!(), - } - } - fn result_row( - &mut self, - program: &mut ProgramBuilder, - referenced_tables: &[BTreeTableReference], - m: &mut Metadata, - cursor_override: Option<&SortCursorOverride>, - ) -> Result<()> { - match self { - Operator::Limit { source, limit, .. } => { - source.result_row(program, referenced_tables, m, cursor_override)?; - let limit_reg = program.alloc_register(); - program.emit_insn(Insn::Integer { - value: *limit as i64, - dest: limit_reg, - }); - program.mark_last_insn_constant(); - let jump_label = m.termination_label_stack.first().unwrap(); - program.emit_insn_with_label_dependency( - Insn::DecrJumpZero { - reg: limit_reg, - target_pc: *jump_label, - }, - *jump_label, - ); - - Ok(()) - } - operator => { - let start_reg = - operator.result_columns(program, referenced_tables, m, cursor_override)?; - program.emit_insn(Insn::ResultRow { - start_reg, - count: operator.column_count(referenced_tables), - }); - Ok(()) - } - } - } -} - -fn prologue( - cache: ExpressionResultCache, -) -> Result<(ProgramBuilder, Metadata, BranchOffset, BranchOffset)> { +// /// Emitters return one of three possible results from the step() method: +// /// - Continue: the operator is not yet ready to emit a result row +// /// - ReadyToEmit: the operator is ready to emit a result row +// /// - Done: the operator has completed execution +// /// For example, a Scan operator will return Continue until it has opened a cursor, rewound it and applied any predicates. +// /// At that point, it will return ReadyToEmit. +// /// Finally, when the Scan operator has emitted a Next instruction, it will return Done. +// /// +// /// Parent operators are free to make decisions based on the result a child operator's step() method. +// /// +// /// When the root operator of a Plan returns ReadyToEmit, a ResultRow will always be emitted. +// /// When the root operator returns Done, the bytecode plan is complete. +// #[derive(Debug, PartialEq)] +// pub enum OpStepResult { +// Continue, +// ReadyToEmit, +// Done, +// } + +// impl Emitter for SourceOperator { +// fn step( +// &mut self, +// program: &mut ProgramBuilder, +// m: &mut Metadata, +// referenced_tables: &[BTreeTableReference], +// ) -> Result { +// let current_operator_column_count = self.column_count(referenced_tables); +// match self { +// SourceOperator::Scan { +// table_reference, +// id, +// step, +// predicates, +// iter_dir, +// } => { +// *step += 1; +// const SCAN_OPEN_READ: usize = 1; +// const SCAN_BODY: usize = 2; +// const SCAN_NEXT: usize = 3; +// let reverse = iter_dir +// .as_ref() +// .is_some_and(|iter_dir| *iter_dir == IterationDirection::Backwards); +// match *step { +// SCAN_OPEN_READ => { +// let cursor_id = program.alloc_cursor_id( +// Some(table_reference.table_identifier.clone()), +// Some(Table::BTree(table_reference.table.clone())), +// ); +// let root_page = table_reference.table.root_page; +// let next_row_label = program.allocate_label(); +// m.next_row_labels.insert(*id, next_row_label); +// program.emit_insn(Insn::OpenReadAsync { +// cursor_id, +// root_page, +// }); +// program.emit_insn(Insn::OpenReadAwait); + +// Ok(OpStepResult::Continue) +// } +// SCAN_BODY => { +// let cursor_id = +// program.resolve_cursor_id(&table_reference.table_identifier, None); +// if reverse { +// program.emit_insn(Insn::LastAsync { cursor_id }); +// } else { +// program.emit_insn(Insn::RewindAsync { cursor_id }); +// } +// let scan_loop_body_label = program.allocate_label(); +// let halt_label = m.termination_label_stack.last().unwrap(); +// program.emit_insn_with_label_dependency( +// if reverse { +// Insn::LastAwait { +// cursor_id, +// pc_if_empty: *halt_label, +// } +// } else { +// Insn::RewindAwait { +// cursor_id, +// pc_if_empty: *halt_label, +// } +// }, +// *halt_label, +// ); +// m.scan_loop_body_labels.push(scan_loop_body_label); +// program.defer_label_resolution( +// scan_loop_body_label, +// program.offset() as usize, +// ); + +// let jump_label = m.next_row_labels.get(id).unwrap_or(halt_label); +// if let Some(preds) = predicates { +// for expr in preds { +// let jump_target_when_true = program.allocate_label(); +// let condition_metadata = ConditionMetadata { +// jump_if_condition_is_true: false, +// jump_target_when_true, +// jump_target_when_false: *jump_label, +// }; +// translate_condition_expr( +// program, +// referenced_tables, +// expr, +// None, +// condition_metadata, +// m.result_set_register_start, +// )?; +// program.resolve_label(jump_target_when_true, program.offset()); +// } +// } + +// Ok(OpStepResult::ReadyToEmit) +// } +// SCAN_NEXT => { +// let cursor_id = +// program.resolve_cursor_id(&table_reference.table_identifier, None); +// program +// .resolve_label(*m.next_row_labels.get(id).unwrap(), program.offset()); +// if reverse { +// program.emit_insn(Insn::PrevAsync { cursor_id }); +// } else { +// program.emit_insn(Insn::NextAsync { cursor_id }); +// } +// let jump_label = m.scan_loop_body_labels.pop().unwrap(); + +// if reverse { +// program.emit_insn_with_label_dependency( +// Insn::PrevAwait { +// cursor_id, +// pc_if_next: jump_label, +// }, +// jump_label, +// ); +// } else { +// program.emit_insn_with_label_dependency( +// Insn::NextAwait { +// cursor_id, +// pc_if_next: jump_label, +// }, +// jump_label, +// ); +// } +// Ok(OpStepResult::Done) +// } +// _ => Ok(OpStepResult::Done), +// } +// } +// SourceOperator::Search { +// table_reference, +// search, +// predicates, +// step, +// id, +// .. +// } => { +// *step += 1; +// const SEARCH_OPEN_READ: usize = 1; +// const SEARCH_BODY: usize = 2; +// const SEARCH_NEXT: usize = 3; +// match *step { +// SEARCH_OPEN_READ => { +// let table_cursor_id = program.alloc_cursor_id( +// Some(table_reference.table_identifier.clone()), +// Some(Table::BTree(table_reference.table.clone())), +// ); + +// let next_row_label = program.allocate_label(); + +// if !matches!(search, Search::PrimaryKeyEq { .. }) { +// // Primary key equality search is handled with a SeekRowid instruction which does not loop, since it is a single row lookup. +// m.next_row_labels.insert(*id, next_row_label); +// } + +// let scan_loop_body_label = program.allocate_label(); +// m.scan_loop_body_labels.push(scan_loop_body_label); +// program.emit_insn(Insn::OpenReadAsync { +// cursor_id: table_cursor_id, +// root_page: table_reference.table.root_page, +// }); +// program.emit_insn(Insn::OpenReadAwait); + +// if let Search::IndexSearch { index, .. } = search { +// let index_cursor_id = program.alloc_cursor_id( +// Some(index.name.clone()), +// Some(Table::Index(index.clone())), +// ); +// program.emit_insn(Insn::OpenReadAsync { +// cursor_id: index_cursor_id, +// root_page: index.root_page, +// }); +// program.emit_insn(Insn::OpenReadAwait); +// } +// Ok(OpStepResult::Continue) +// } +// SEARCH_BODY => { +// let table_cursor_id = +// program.resolve_cursor_id(&table_reference.table_identifier, None); + +// // Open the loop for the index search. +// // Primary key equality search is handled with a SeekRowid instruction which does not loop, since it is a single row lookup. +// if !matches!(search, Search::PrimaryKeyEq { .. }) { +// let index_cursor_id = if let Search::IndexSearch { index, .. } = search +// { +// Some(program.resolve_cursor_id(&index.name, None)) +// } else { +// None +// }; +// let scan_loop_body_label = *m.scan_loop_body_labels.last().unwrap(); +// let cmp_reg = program.alloc_register(); +// let (cmp_expr, cmp_op) = match search { +// Search::IndexSearch { +// cmp_expr, cmp_op, .. +// } => (cmp_expr, cmp_op), +// Search::PrimaryKeySearch { cmp_expr, cmp_op } => (cmp_expr, cmp_op), +// Search::PrimaryKeyEq { .. } => unreachable!(), +// }; +// // TODO this only handles ascending indexes +// match cmp_op { +// ast::Operator::Equals +// | ast::Operator::Greater +// | ast::Operator::GreaterEquals => { +// translate_expr( +// program, +// Some(referenced_tables), +// cmp_expr, +// cmp_reg, +// None, +// m.result_set_register_start, +// )?; +// } +// ast::Operator::Less | ast::Operator::LessEquals => { +// program.emit_insn(Insn::Null { +// dest: cmp_reg, +// dest_end: None, +// }); +// } +// _ => unreachable!(), +// } +// program.emit_insn_with_label_dependency( +// match cmp_op { +// ast::Operator::Equals | ast::Operator::GreaterEquals => { +// Insn::SeekGE { +// is_index: index_cursor_id.is_some(), +// cursor_id: index_cursor_id.unwrap_or(table_cursor_id), +// start_reg: cmp_reg, +// num_regs: 1, +// target_pc: *m.termination_label_stack.last().unwrap(), +// } +// } +// ast::Operator::Greater +// | ast::Operator::Less +// | ast::Operator::LessEquals => Insn::SeekGT { +// is_index: index_cursor_id.is_some(), +// cursor_id: index_cursor_id.unwrap_or(table_cursor_id), +// start_reg: cmp_reg, +// num_regs: 1, +// target_pc: *m.termination_label_stack.last().unwrap(), +// }, +// _ => unreachable!(), +// }, +// *m.termination_label_stack.last().unwrap(), +// ); +// if *cmp_op == ast::Operator::Less +// || *cmp_op == ast::Operator::LessEquals +// { +// translate_expr( +// program, +// Some(referenced_tables), +// cmp_expr, +// cmp_reg, +// None, +// m.result_set_register_start, +// )?; +// } + +// program.defer_label_resolution( +// scan_loop_body_label, +// program.offset() as usize, +// ); +// // TODO: We are currently only handling ascending indexes. +// // For conditions like index_key > 10, we have already seeked to the first key greater than 10, and can just scan forward. +// // For conditions like index_key < 10, we are at the beginning of the index, and will scan forward and emit IdxGE(10) with a conditional jump to the end. +// // For conditions like index_key = 10, we have already seeked to the first key greater than or equal to 10, and can just scan forward and emit IdxGT(10) with a conditional jump to the end. +// // For conditions like index_key >= 10, we have already seeked to the first key greater than or equal to 10, and can just scan forward. +// // For conditions like index_key <= 10, we are at the beginning of the index, and will scan forward and emit IdxGT(10) with a conditional jump to the end. +// // For conditions like index_key != 10, TODO. probably the optimal way is not to use an index at all. +// // +// // For primary key searches we emit RowId and then compare it to the seek value. + +// let abort_jump_target = *m +// .next_row_labels +// .get(id) +// .unwrap_or(m.termination_label_stack.last().unwrap()); +// match cmp_op { +// ast::Operator::Equals | ast::Operator::LessEquals => { +// if let Some(index_cursor_id) = index_cursor_id { +// program.emit_insn_with_label_dependency( +// Insn::IdxGT { +// cursor_id: index_cursor_id, +// start_reg: cmp_reg, +// num_regs: 1, +// target_pc: abort_jump_target, +// }, +// abort_jump_target, +// ); +// } else { +// let rowid_reg = program.alloc_register(); +// program.emit_insn(Insn::RowId { +// cursor_id: table_cursor_id, +// dest: rowid_reg, +// }); +// program.emit_insn_with_label_dependency( +// Insn::Gt { +// lhs: rowid_reg, +// rhs: cmp_reg, +// target_pc: abort_jump_target, +// }, +// abort_jump_target, +// ); +// } +// } +// ast::Operator::Less => { +// if let Some(index_cursor_id) = index_cursor_id { +// program.emit_insn_with_label_dependency( +// Insn::IdxGE { +// cursor_id: index_cursor_id, +// start_reg: cmp_reg, +// num_regs: 1, +// target_pc: abort_jump_target, +// }, +// abort_jump_target, +// ); +// } else { +// let rowid_reg = program.alloc_register(); +// program.emit_insn(Insn::RowId { +// cursor_id: table_cursor_id, +// dest: rowid_reg, +// }); +// program.emit_insn_with_label_dependency( +// Insn::Ge { +// lhs: rowid_reg, +// rhs: cmp_reg, +// target_pc: abort_jump_target, +// }, +// abort_jump_target, +// ); +// } +// } +// _ => {} +// } + +// if let Some(index_cursor_id) = index_cursor_id { +// program.emit_insn(Insn::DeferredSeek { +// index_cursor_id, +// table_cursor_id, +// }); +// } +// } + +// let jump_label = m +// .next_row_labels +// .get(id) +// .unwrap_or(m.termination_label_stack.last().unwrap()); + +// if let Search::PrimaryKeyEq { cmp_expr } = search { +// let src_reg = program.alloc_register(); +// translate_expr( +// program, +// Some(referenced_tables), +// cmp_expr, +// src_reg, +// None, +// m.result_set_register_start, +// )?; +// program.emit_insn_with_label_dependency( +// Insn::SeekRowid { +// cursor_id: table_cursor_id, +// src_reg, +// target_pc: *jump_label, +// }, +// *jump_label, +// ); +// } +// if let Some(predicates) = predicates { +// for predicate in predicates.iter() { +// let jump_target_when_true = program.allocate_label(); +// let condition_metadata = ConditionMetadata { +// jump_if_condition_is_true: false, +// jump_target_when_true, +// jump_target_when_false: *jump_label, +// }; +// translate_condition_expr( +// program, +// referenced_tables, +// predicate, +// None, +// condition_metadata, +// m.result_set_register_start, +// )?; +// program.resolve_label(jump_target_when_true, program.offset()); +// } +// } + +// Ok(OpStepResult::ReadyToEmit) +// } +// SEARCH_NEXT => { +// if matches!(search, Search::PrimaryKeyEq { .. }) { +// // Primary key equality search is handled with a SeekRowid instruction which does not loop, so there is no need to emit a NextAsync instruction. +// return Ok(OpStepResult::Done); +// } +// let cursor_id = match search { +// Search::IndexSearch { index, .. } => { +// program.resolve_cursor_id(&index.name, None) +// } +// Search::PrimaryKeySearch { .. } => { +// program.resolve_cursor_id(&table_reference.table_identifier, None) +// } +// Search::PrimaryKeyEq { .. } => unreachable!(), +// }; +// program +// .resolve_label(*m.next_row_labels.get(id).unwrap(), program.offset()); +// program.emit_insn(Insn::NextAsync { cursor_id }); +// let jump_label = m.scan_loop_body_labels.pop().unwrap(); +// program.emit_insn_with_label_dependency( +// Insn::NextAwait { +// cursor_id, +// pc_if_next: jump_label, +// }, +// jump_label, +// ); +// Ok(OpStepResult::Done) +// } +// _ => Ok(OpStepResult::Done), +// } +// } +// SourceOperator::Join { +// left, +// right, +// outer, +// predicates, +// step, +// id, +// .. +// } => { +// *step += 1; +// const JOIN_INIT: usize = 1; +// const JOIN_DO_JOIN: usize = 2; +// const JOIN_END: usize = 3; +// match *step { +// JOIN_INIT => { +// if *outer { +// let lj_metadata = LeftJoinMetadata { +// match_flag_register: program.alloc_register(), +// set_match_flag_true_label: program.allocate_label(), +// check_match_flag_label: program.allocate_label(), +// on_match_jump_to_label: program.allocate_label(), +// }; +// m.left_joins.insert(*id, lj_metadata); +// } +// left.step(program, m, referenced_tables)?; +// right.step(program, m, referenced_tables)?; + +// Ok(OpStepResult::Continue) +// } +// JOIN_DO_JOIN => { +// left.step(program, m, referenced_tables)?; + +// let mut jump_target_when_false = *m +// .next_row_labels +// .get(&right.id()) +// .or(m.next_row_labels.get(&left.id())) +// .unwrap_or(m.termination_label_stack.last().unwrap()); + +// if *outer { +// let lj_meta = m.left_joins.get(id).unwrap(); +// program.emit_insn(Insn::Integer { +// value: 0, +// dest: lj_meta.match_flag_register, +// }); +// jump_target_when_false = lj_meta.check_match_flag_label; +// } +// m.next_row_labels.insert(right.id(), jump_target_when_false); + +// right.step(program, m, referenced_tables)?; + +// if let Some(predicates) = predicates { +// let jump_target_when_true = program.allocate_label(); +// let condition_metadata = ConditionMetadata { +// jump_if_condition_is_true: false, +// jump_target_when_true, +// jump_target_when_false, +// }; +// for predicate in predicates.iter() { +// translate_condition_expr( +// program, +// referenced_tables, +// predicate, +// None, +// condition_metadata, +// m.result_set_register_start, +// )?; +// } +// program.resolve_label(jump_target_when_true, program.offset()); +// } + +// if *outer { +// let lj_meta = m.left_joins.get(id).unwrap(); +// program.defer_label_resolution( +// lj_meta.set_match_flag_true_label, +// program.offset() as usize, +// ); +// program.emit_insn(Insn::Integer { +// value: 1, +// dest: lj_meta.match_flag_register, +// }); +// } + +// Ok(OpStepResult::ReadyToEmit) +// } +// JOIN_END => { +// right.step(program, m, referenced_tables)?; + +// if *outer { +// let lj_meta = m.left_joins.get(id).unwrap(); +// // If the left join match flag has been set to 1, we jump to the next row on the outer table (result row has been emitted already) +// program.resolve_label(lj_meta.check_match_flag_label, program.offset()); +// program.emit_insn_with_label_dependency( +// Insn::IfPos { +// reg: lj_meta.match_flag_register, +// target_pc: lj_meta.on_match_jump_to_label, +// decrement_by: 0, +// }, +// lj_meta.on_match_jump_to_label, +// ); +// // If not, we set the right table cursor's "pseudo null bit" on, which means any Insn::Column will return NULL +// let right_cursor_id = match right.as_ref() { +// SourceOperator::Scan { +// table_reference, .. +// } => program +// .resolve_cursor_id(&table_reference.table_identifier, None), +// SourceOperator::Search { +// table_reference, .. +// } => program +// .resolve_cursor_id(&table_reference.table_identifier, None), +// _ => unreachable!(), +// }; +// program.emit_insn(Insn::NullRow { +// cursor_id: right_cursor_id, +// }); +// // Jump to setting the left join match flag to 1 again, but this time the right table cursor will set everything to null +// program.emit_insn_with_label_dependency( +// Insn::Goto { +// target_pc: lj_meta.set_match_flag_true_label, +// }, +// lj_meta.set_match_flag_true_label, +// ); +// } +// let next_row_label = if *outer { +// m.left_joins.get(id).unwrap().on_match_jump_to_label +// } else { +// *m.next_row_labels.get(&right.id()).unwrap() +// }; +// // This points to the NextAsync instruction of the left table +// program.resolve_label(next_row_label, program.offset()); +// left.step(program, m, referenced_tables)?; + +// Ok(OpStepResult::Done) +// } +// _ => Ok(OpStepResult::Done), +// } +// } +// SourceOperator::Projection { +// id, +// source, +// expressions, +// aggregates, +// group_by, +// step, +// .. +// } => { +// *step += 1; + +// if !aggregates.is_empty() && group_by.is_none() { +// const PROJECTION_WAIT_UNTIL_SOURCE_READY: usize = 1; +// const PROJECTION_FINALIZE_SOURCE: usize = 2; +// match *step { +// PROJECTION_WAIT_UNTIL_SOURCE_READY => loop { +// match source.step(program, m, referenced_tables)? { +// OpStepResult::Continue => continue, +// OpStepResult::ReadyToEmit | OpStepResult::Done => { +// return Ok(OpStepResult::ReadyToEmit); +// } +// } +// }, +// PROJECTION_FINALIZE_SOURCE => { +// match source.step(program, m, referenced_tables)? { +// OpStepResult::Done => return Ok(OpStepResult::Done), +// _ => unreachable!(), +// } +// } +// _ => return Ok(OpStepResult::Done), +// } +// } + +// // Group by aggregation eg. SELECT a, b, sum(c) FROM t GROUP BY a, b +// if let Some(group_by) = group_by { +// const GROUP_BY_INIT: usize = 1; +// const GROUP_BY_INSERT_INTO_SORTER: usize = 2; +// const GROUP_BY_SORT_AND_COMPARE: usize = 3; +// const GROUP_BY_PREPARE_ROW: usize = 4; +// const GROUP_BY_CLEAR_ACCUMULATOR_SUBROUTINE: usize = 5; +// match *step { +// GROUP_BY_INIT => { +// let agg_final_label = program.allocate_label(); +// m.termination_label_stack.push(agg_final_label); +// let num_aggs = aggregates.len(); + +// let sort_cursor = program.alloc_cursor_id(None, None); + +// let abort_flag_register = program.alloc_register(); +// let data_in_accumulator_indicator_register = program.alloc_register(); +// let group_exprs_comparison_register = +// program.alloc_registers(group_by.len()); +// let group_exprs_accumulator_register = +// program.alloc_registers(group_by.len()); +// let agg_exprs_start_reg = program.alloc_registers(num_aggs); +// m.aggregation_start_registers +// .insert(*id, agg_exprs_start_reg); +// let sorter_key_register = program.alloc_register(); + +// let subroutine_accumulator_clear_label = program.allocate_label(); +// let subroutine_accumulator_output_label = program.allocate_label(); +// let sorter_data_label = program.allocate_label(); +// let grouping_done_label = program.allocate_label(); + +// let mut order = Vec::new(); +// const ASCENDING: i64 = 0; +// for _ in group_by.iter() { +// order.push(OwnedValue::Integer(ASCENDING)); +// } +// program.emit_insn(Insn::SorterOpen { +// cursor_id: sort_cursor, +// columns: current_operator_column_count, +// order: OwnedRecord::new(order), +// }); + +// program.add_comment(program.offset(), "clear group by abort flag"); +// program.emit_insn(Insn::Integer { +// value: 0, +// dest: abort_flag_register, +// }); + +// program.add_comment( +// program.offset(), +// "initialize group by comparison registers to NULL", +// ); +// program.emit_insn(Insn::Null { +// dest: group_exprs_comparison_register, +// dest_end: if group_by.len() > 1 { +// Some(group_exprs_comparison_register + group_by.len() - 1) +// } else { +// None +// }, +// }); + +// program.add_comment( +// program.offset(), +// "go to clear accumulator subroutine", +// ); + +// let subroutine_accumulator_clear_return_offset_register = +// program.alloc_register(); +// program.emit_insn_with_label_dependency( +// Insn::Gosub { +// target_pc: subroutine_accumulator_clear_label, +// return_reg: subroutine_accumulator_clear_return_offset_register, +// }, +// subroutine_accumulator_clear_label, +// ); + +// m.group_bys.insert( +// *id, +// GroupByMetadata { +// sort_cursor, +// subroutine_accumulator_clear_label, +// subroutine_accumulator_clear_return_offset_register, +// subroutine_accumulator_output_label, +// subroutine_accumulator_output_return_offset_register: program +// .alloc_register(), +// accumulator_indicator_set_true_label: program.allocate_label(), +// sorter_data_label, +// grouping_done_label, +// abort_flag_register, +// data_in_accumulator_indicator_register, +// group_exprs_accumulator_register, +// group_exprs_comparison_register, +// sorter_key_register, +// }, +// ); + +// loop { +// match source.step(program, m, referenced_tables)? { +// OpStepResult::Continue => continue, +// OpStepResult::ReadyToEmit => { +// return Ok(OpStepResult::Continue); +// } +// OpStepResult::Done => { +// return Ok(OpStepResult::Done); +// } +// } +// } +// } +// GROUP_BY_INSERT_INTO_SORTER => { +// let sort_keys_count = group_by.len(); +// let start_reg = program.alloc_registers(current_operator_column_count); +// for (i, expr) in group_by.iter().enumerate() { +// let key_reg = start_reg + i; +// translate_expr( +// program, +// Some(referenced_tables), +// expr, +// key_reg, +// None, +// m.result_set_register_start, +// )?; +// } +// for (i, agg) in aggregates.iter().enumerate() { +// // TODO it's a hack to assume aggregate functions have exactly one argument. +// // Counterpoint e.g. GROUP_CONCAT(expr, separator). +// // +// // Here we are collecting scalars for the group by sorter, which will include +// // both the group by expressions and the aggregate arguments. +// // e.g. in `select u.first_name, sum(u.age) from users group by u.first_name` +// // the sorter will have two scalars: u.first_name and u.age. +// // these are then sorted by u.first_name, and for each u.first_name, we sum the u.age. +// // the actual aggregation is done later in GROUP_BY_SORT_AND_COMPARE below. +// // +// // This is why we take the first argument of each aggregate function currently. +// // It's mostly an artifact of the current architecture being a bit poor; we should recognize +// // which scalars are dependencies of aggregate functions and explicitly collect those. +// let expr = &agg.args[0]; +// let agg_reg = start_reg + sort_keys_count + i; +// translate_expr( +// program, +// Some(referenced_tables), +// expr, +// agg_reg, +// None, +// m.result_set_register_start, +// )?; +// } + +// let group_by_metadata = m.group_bys.get(id).unwrap(); + +// program.emit_insn(Insn::MakeRecord { +// start_reg, +// count: current_operator_column_count, +// dest_reg: group_by_metadata.sorter_key_register, +// }); + +// let group_by_metadata = m.group_bys.get(id).unwrap(); +// program.emit_insn(Insn::SorterInsert { +// cursor_id: group_by_metadata.sort_cursor, +// record_reg: group_by_metadata.sorter_key_register, +// }); + +// return Ok(OpStepResult::Continue); +// } +// #[allow(clippy::never_loop)] +// GROUP_BY_SORT_AND_COMPARE => { +// loop { +// match source.step(program, m, referenced_tables)? { +// OpStepResult::Done => { +// break; +// } +// _ => unreachable!(), +// } +// } + +// let group_by_metadata = m.group_bys.get_mut(id).unwrap(); + +// let GroupByMetadata { +// group_exprs_comparison_register: comparison_register, +// subroutine_accumulator_output_return_offset_register, +// subroutine_accumulator_output_label, +// subroutine_accumulator_clear_return_offset_register, +// subroutine_accumulator_clear_label, +// data_in_accumulator_indicator_register, +// accumulator_indicator_set_true_label, +// group_exprs_accumulator_register: group_exprs_start_register, +// abort_flag_register, +// sorter_key_register, +// .. +// } = *group_by_metadata; +// let halt_label = *m.termination_label_stack.first().unwrap(); + +// let mut column_names = +// Vec::with_capacity(current_operator_column_count); +// for expr in group_by +// .iter() +// .chain(aggregates.iter().map(|agg| &agg.args[0])) +// // FIXME: just blindly taking the first arg is a hack +// { +// // Sorter column names for group by are now just determined by stringifying the expression, since the group by +// // columns and aggregations can be practically anything. +// // FIXME: either come up with something more robust, or make this something like expr.to_canonical_string() so that we can handle +// // things like `count(1)` and `COUNT(1)` the same way +// column_names.push(expr.to_string()); +// } +// let pseudo_columns = column_names +// .iter() +// .map(|name| Column { +// name: name.clone(), +// primary_key: false, +// ty: crate::schema::Type::Null, +// }) +// .collect::>(); + +// let pseudo_table = Rc::new(PseudoTable { +// columns: pseudo_columns, +// }); + +// let pseudo_cursor = program +// .alloc_cursor_id(None, Some(Table::Pseudo(pseudo_table.clone()))); + +// program.emit_insn(Insn::OpenPseudo { +// cursor_id: pseudo_cursor, +// content_reg: sorter_key_register, +// num_fields: current_operator_column_count, +// }); + +// let group_by_metadata = m.group_bys.get(id).unwrap(); +// program.emit_insn_with_label_dependency( +// Insn::SorterSort { +// cursor_id: group_by_metadata.sort_cursor, +// pc_if_empty: group_by_metadata.grouping_done_label, +// }, +// group_by_metadata.grouping_done_label, +// ); + +// program.defer_label_resolution( +// group_by_metadata.sorter_data_label, +// program.offset() as usize, +// ); +// program.emit_insn(Insn::SorterData { +// cursor_id: group_by_metadata.sort_cursor, +// dest_reg: group_by_metadata.sorter_key_register, +// pseudo_cursor, +// }); + +// let groups_start_reg = program.alloc_registers(group_by.len()); +// for (i, expr) in group_by.iter().enumerate() { +// let sorter_column_index = +// resolve_ident_pseudo_table(&expr.to_string(), &pseudo_table)?; +// let group_reg = groups_start_reg + i; +// program.emit_insn(Insn::Column { +// cursor_id: pseudo_cursor, +// column: sorter_column_index, +// dest: group_reg, +// }); +// } + +// program.emit_insn(Insn::Compare { +// start_reg_a: comparison_register, +// start_reg_b: groups_start_reg, +// count: group_by.len(), +// }); + +// let agg_step_label = program.allocate_label(); + +// program.add_comment( +// program.offset(), +// "start new group if comparison is not equal", +// ); +// program.emit_insn_with_label_dependency( +// Insn::Jump { +// target_pc_lt: program.offset() + 1, +// target_pc_eq: agg_step_label, +// target_pc_gt: program.offset() + 1, +// }, +// agg_step_label, +// ); + +// program.emit_insn(Insn::Move { +// source_reg: groups_start_reg, +// dest_reg: comparison_register, +// count: group_by.len(), +// }); + +// program.add_comment( +// program.offset(), +// "check if ended group had data, and output if so", +// ); +// program.emit_insn_with_label_dependency( +// Insn::Gosub { +// target_pc: subroutine_accumulator_output_label, +// return_reg: +// subroutine_accumulator_output_return_offset_register, +// }, +// subroutine_accumulator_output_label, +// ); + +// program.add_comment(program.offset(), "check abort flag"); +// program.emit_insn_with_label_dependency( +// Insn::IfPos { +// reg: abort_flag_register, +// target_pc: halt_label, +// decrement_by: 0, +// }, +// m.termination_label_stack[0], +// ); + +// program +// .add_comment(program.offset(), "goto clear accumulator subroutine"); +// program.emit_insn_with_label_dependency( +// Insn::Gosub { +// target_pc: subroutine_accumulator_clear_label, +// return_reg: subroutine_accumulator_clear_return_offset_register, +// }, +// subroutine_accumulator_clear_label, +// ); + +// program.resolve_label(agg_step_label, program.offset()); +// let start_reg = m.aggregation_start_registers.get(id).unwrap(); +// for (i, agg) in aggregates.iter().enumerate() { +// let agg_result_reg = start_reg + i; +// translate_aggregation( +// program, +// referenced_tables, +// agg, +// agg_result_reg, +// Some(pseudo_cursor), +// )?; +// } + +// program.add_comment( +// program.offset(), +// "don't emit group columns if continuing existing group", +// ); +// program.emit_insn_with_label_dependency( +// Insn::If { +// target_pc: accumulator_indicator_set_true_label, +// reg: data_in_accumulator_indicator_register, +// null_reg: 0, // unused in this case +// }, +// accumulator_indicator_set_true_label, +// ); + +// for (i, expr) in group_by.iter().enumerate() { +// let key_reg = group_exprs_start_register + i; +// let sorter_column_index = +// resolve_ident_pseudo_table(&expr.to_string(), &pseudo_table)?; +// program.emit_insn(Insn::Column { +// cursor_id: pseudo_cursor, +// column: sorter_column_index, +// dest: key_reg, +// }); +// } + +// program.resolve_label( +// accumulator_indicator_set_true_label, +// program.offset(), +// ); +// program.add_comment(program.offset(), "indicate data in accumulator"); +// program.emit_insn(Insn::Integer { +// value: 1, +// dest: data_in_accumulator_indicator_register, +// }); + +// return Ok(OpStepResult::Continue); +// } +// GROUP_BY_PREPARE_ROW => { +// let group_by_metadata = m.group_bys.get(id).unwrap(); +// program.emit_insn_with_label_dependency( +// Insn::SorterNext { +// cursor_id: group_by_metadata.sort_cursor, +// pc_if_next: group_by_metadata.sorter_data_label, +// }, +// group_by_metadata.sorter_data_label, +// ); + +// program.resolve_label( +// group_by_metadata.grouping_done_label, +// program.offset(), +// ); + +// program.add_comment(program.offset(), "emit row for final group"); +// program.emit_insn_with_label_dependency( +// Insn::Gosub { +// target_pc: group_by_metadata +// .subroutine_accumulator_output_label, +// return_reg: group_by_metadata +// .subroutine_accumulator_output_return_offset_register, +// }, +// group_by_metadata.subroutine_accumulator_output_label, +// ); + +// program.add_comment(program.offset(), "group by finished"); +// let termination_label = +// m.termination_label_stack[m.termination_label_stack.len() - 2]; +// program.emit_insn_with_label_dependency( +// Insn::Goto { +// target_pc: termination_label, +// }, +// termination_label, +// ); +// program.emit_insn(Insn::Integer { +// value: 1, +// dest: group_by_metadata.abort_flag_register, +// }); +// program.emit_insn(Insn::Return { +// return_reg: group_by_metadata +// .subroutine_accumulator_output_return_offset_register, +// }); + +// program.resolve_label( +// group_by_metadata.subroutine_accumulator_output_label, +// program.offset(), +// ); + +// program.add_comment( +// program.offset(), +// "output group by row subroutine start", +// ); +// let termination_label = *m.termination_label_stack.last().unwrap(); +// program.emit_insn_with_label_dependency( +// Insn::IfPos { +// reg: group_by_metadata.data_in_accumulator_indicator_register, +// target_pc: termination_label, +// decrement_by: 0, +// }, +// termination_label, +// ); +// program.emit_insn(Insn::Return { +// return_reg: group_by_metadata +// .subroutine_accumulator_output_return_offset_register, +// }); + +// return Ok(OpStepResult::ReadyToEmit); +// } +// GROUP_BY_CLEAR_ACCUMULATOR_SUBROUTINE => { +// let group_by_metadata = m.group_bys.get(id).unwrap(); +// program.emit_insn(Insn::Return { +// return_reg: group_by_metadata +// .subroutine_accumulator_output_return_offset_register, +// }); + +// program.add_comment( +// program.offset(), +// "clear accumulator subroutine start", +// ); +// program.resolve_label( +// group_by_metadata.subroutine_accumulator_clear_label, +// program.offset(), +// ); +// let start_reg = group_by_metadata.group_exprs_accumulator_register; +// program.emit_insn(Insn::Null { +// dest: start_reg, +// dest_end: Some(start_reg + group_by.len() + aggregates.len() - 1), +// }); + +// program.emit_insn(Insn::Integer { +// value: 0, +// dest: group_by_metadata.data_in_accumulator_indicator_register, +// }); +// program.emit_insn(Insn::Return { +// return_reg: group_by_metadata +// .subroutine_accumulator_clear_return_offset_register, +// }); +// } +// _ => { +// return Ok(OpStepResult::Done); +// } +// } +// } + +// // Non-grouped aggregation e.g. SELECT COUNT(*) FROM t + +// const AGGREGATE_INIT: usize = 1; +// const AGGREGATE_WAIT_UNTIL_SOURCE_READY: usize = 2; +// match *step { +// AGGREGATE_INIT => { +// let agg_final_label = program.allocate_label(); +// m.termination_label_stack.push(agg_final_label); +// let num_aggs = aggregates.len(); +// let start_reg = program.alloc_registers(num_aggs); +// m.aggregation_start_registers.insert(*id, start_reg); + +// Ok(OpStepResult::Continue) +// } +// AGGREGATE_WAIT_UNTIL_SOURCE_READY => loop { +// match source.step(program, m, referenced_tables)? { +// OpStepResult::Continue => {} +// OpStepResult::ReadyToEmit => { +// let start_reg = m.aggregation_start_registers.get(id).unwrap(); +// for (i, agg) in aggregates.iter().enumerate() { +// let agg_result_reg = start_reg + i; +// translate_aggregation( +// program, +// referenced_tables, +// agg, +// agg_result_reg, +// None, +// )?; +// } +// } +// OpStepResult::Done => { +// return Ok(OpStepResult::ReadyToEmit); +// } +// } +// }, +// _ => Ok(OpStepResult::Done), +// } +// } +// SourceOperator::Filter { .. } => unreachable!("predicates have been pushed down"), +// SourceOperator::Limit { source, step, .. } => { +// *step += 1; +// loop { +// match source.step(program, m, referenced_tables)? { +// OpStepResult::Continue => continue, +// OpStepResult::ReadyToEmit => { +// return Ok(OpStepResult::ReadyToEmit); +// } +// OpStepResult::Done => return Ok(OpStepResult::Done), +// } +// } +// } +// SourceOperator::Order { +// id, +// source, +// key, +// step, +// } => { +// *step += 1; +// const ORDER_INIT: usize = 1; +// const ORDER_INSERT_INTO_SORTER: usize = 2; +// const ORDER_SORT_AND_OPEN_LOOP: usize = 3; +// const ORDER_NEXT: usize = 4; +// match *step { +// ORDER_INIT => { +// m.termination_label_stack.push(program.allocate_label()); +// let sort_cursor = program.alloc_cursor_id(None, None); +// m.sorts.insert( +// *id, +// SortMetadata { +// sort_cursor, +// pseudo_table_cursor: usize::MAX, // will be set later +// sorter_data_register: program.alloc_register(), +// sorter_data_label: program.allocate_label(), +// done_label: program.allocate_label(), +// }, +// ); +// let mut order = Vec::new(); +// for (_, direction) in key.iter() { +// order.push(OwnedValue::Integer(*direction as i64)); +// } +// program.emit_insn(Insn::SorterOpen { +// cursor_id: sort_cursor, +// columns: key.len(), +// order: OwnedRecord::new(order), +// }); + +// loop { +// match source.step(program, m, referenced_tables)? { +// OpStepResult::Continue => continue, +// OpStepResult::ReadyToEmit => { +// return Ok(OpStepResult::Continue); +// } +// OpStepResult::Done => { +// return Ok(OpStepResult::Done); +// } +// } +// } +// } +// ORDER_INSERT_INTO_SORTER => { +// let sort_keys_count = key.len(); +// let source_cols_count = source.column_count(referenced_tables); +// let start_reg = program.alloc_registers(sort_keys_count); +// source.result_columns(program, referenced_tables, m, None)?; + +// for (i, (expr, _)) in key.iter().enumerate() { +// let key_reg = start_reg + i; +// translate_expr( +// program, +// Some(referenced_tables), +// expr, +// key_reg, +// None, +// m.result_set_register_start, +// )?; +// } + +// let sort_metadata = m.sorts.get_mut(id).unwrap(); +// program.emit_insn(Insn::MakeRecord { +// start_reg, +// count: sort_keys_count + source_cols_count, +// dest_reg: sort_metadata.sorter_data_register, +// }); + +// program.emit_insn(Insn::SorterInsert { +// cursor_id: sort_metadata.sort_cursor, +// record_reg: sort_metadata.sorter_data_register, +// }); + +// Ok(OpStepResult::Continue) +// } +// #[allow(clippy::never_loop)] +// ORDER_SORT_AND_OPEN_LOOP => { +// loop { +// match source.step(program, m, referenced_tables)? { +// OpStepResult::Done => { +// break; +// } +// _ => unreachable!(), +// } +// } +// program.resolve_label( +// m.termination_label_stack.pop().unwrap(), +// program.offset(), +// ); +// let column_names = source.column_names(); +// let mut pseudo_columns = vec![]; +// for (i, _) in key.iter().enumerate() { +// pseudo_columns.push(Column { +// name: format!("sort_key_{}", i), +// primary_key: false, +// ty: crate::schema::Type::Null, +// }); +// } +// for name in column_names { +// pseudo_columns.push(Column { +// name: name.clone(), +// primary_key: false, +// ty: crate::schema::Type::Null, +// }); +// } + +// let num_fields = pseudo_columns.len(); + +// let pseudo_cursor = program.alloc_cursor_id( +// None, +// Some(Table::Pseudo(Rc::new(PseudoTable { +// columns: pseudo_columns, +// }))), +// ); +// let sort_metadata = m.sorts.get(id).unwrap(); + +// program.emit_insn(Insn::OpenPseudo { +// cursor_id: pseudo_cursor, +// content_reg: sort_metadata.sorter_data_register, +// num_fields, +// }); + +// program.emit_insn_with_label_dependency( +// Insn::SorterSort { +// cursor_id: sort_metadata.sort_cursor, +// pc_if_empty: sort_metadata.done_label, +// }, +// sort_metadata.done_label, +// ); + +// program.defer_label_resolution( +// sort_metadata.sorter_data_label, +// program.offset() as usize, +// ); +// program.emit_insn(Insn::SorterData { +// cursor_id: sort_metadata.sort_cursor, +// dest_reg: sort_metadata.sorter_data_register, +// pseudo_cursor, +// }); + +// let sort_metadata = m.sorts.get_mut(id).unwrap(); + +// sort_metadata.pseudo_table_cursor = pseudo_cursor; + +// Ok(OpStepResult::ReadyToEmit) +// } +// ORDER_NEXT => { +// let sort_metadata = m.sorts.get(id).unwrap(); +// program.emit_insn_with_label_dependency( +// Insn::SorterNext { +// cursor_id: sort_metadata.sort_cursor, +// pc_if_next: sort_metadata.sorter_data_label, +// }, +// sort_metadata.sorter_data_label, +// ); + +// program.resolve_label(sort_metadata.done_label, program.offset()); + +// Ok(OpStepResult::Done) +// } +// _ => unreachable!(), +// } +// } +// SourceOperator::Nothing => Ok(OpStepResult::Done), +// } +// } +// fn result_columns( +// &self, +// program: &mut ProgramBuilder, +// referenced_tables: &[BTreeTableReference], +// m: &mut Metadata, +// cursor_override: Option<&SortCursorOverride>, +// ) -> Result { +// let col_count = self.column_count(referenced_tables); +// match self { +// SourceOperator::Scan { +// table_reference, .. +// } => { +// let start_reg = program.alloc_registers(col_count); +// let table = cursor_override +// .map(|c| c.pseudo_table.clone()) +// .unwrap_or_else(|| Table::BTree(table_reference.table.clone())); +// let cursor_id = cursor_override.map(|c| c.cursor_id).unwrap_or_else(|| { +// program.resolve_cursor_id(&table_reference.table_identifier, None) +// }); +// let start_column_offset = cursor_override.map(|c| c.sort_key_len).unwrap_or(0); +// translate_table_columns(program, cursor_id, &table, start_column_offset, start_reg); + +// Ok(start_reg) +// } +// SourceOperator::Search { +// table_reference, .. +// } => { +// let start_reg = program.alloc_registers(col_count); +// let table = cursor_override +// .map(|c| c.pseudo_table.clone()) +// .unwrap_or_else(|| Table::BTree(table_reference.table.clone())); +// let cursor_id = cursor_override.map(|c| c.cursor_id).unwrap_or_else(|| { +// program.resolve_cursor_id(&table_reference.table_identifier, None) +// }); +// let start_column_offset = cursor_override.map(|c| c.sort_key_len).unwrap_or(0); +// translate_table_columns(program, cursor_id, &table, start_column_offset, start_reg); + +// Ok(start_reg) +// } +// SourceOperator::Join { left, right, .. } => { +// let left_start_reg = +// left.result_columns(program, referenced_tables, m, cursor_override)?; +// right.result_columns(program, referenced_tables, m, cursor_override)?; + +// Ok(left_start_reg) +// } +// SourceOperator::Projection { +// id, +// expressions, +// aggregates, +// group_by, +// .. +// } => { +// if aggregates.is_empty() && group_by.is_none() { +// let expr_count = expressions.len(); +// let start_reg = program.alloc_registers(expr_count); +// let mut cur_reg = start_reg; +// m.result_set_register_start = start_reg; +// for expr in expressions { +// translate_expr( +// program, +// Some(referenced_tables), +// expr, +// cur_reg, +// cursor_override.map(|c| c.cursor_id), +// m.result_set_register_start, +// )?; +// cur_reg += 1; +// } + +// return Ok(start_reg); +// } +// let agg_start_reg = m.aggregation_start_registers.get(id).unwrap(); +// program.resolve_label(m.termination_label_stack.pop().unwrap(), program.offset()); +// for (i, agg) in aggregates.iter().enumerate() { +// let agg_result_reg = *agg_start_reg + i; +// program.emit_insn(Insn::AggFinal { +// register: agg_result_reg, +// func: agg.func.clone(), +// }); +// } + +// if let Some(group_by) = group_by { +// let output_row_start_reg = +// program.alloc_registers(aggregates.len() + group_by.len()); +// let group_by_metadata = m.group_bys.get(id).unwrap(); +// program.emit_insn(Insn::Copy { +// src_reg: group_by_metadata.group_exprs_accumulator_register, +// dst_reg: output_row_start_reg, +// amount: group_by.len() - 1, +// }); +// program.emit_insn(Insn::Copy { +// src_reg: *agg_start_reg, +// dst_reg: output_row_start_reg + group_by.len(), +// amount: aggregates.len() - 1, +// }); + +// Ok(output_row_start_reg) +// } else { +// Ok(*agg_start_reg) +// } +// } +// SourceOperator::Filter { .. } => unreachable!("predicates have been pushed down"), +// SourceOperator::Limit { .. } => { +// unimplemented!() +// } +// SourceOperator::Order { id, key, .. } => { +// let cursor_id = m.sorts.get(id).unwrap().pseudo_table_cursor; +// let pseudo_table = program.resolve_cursor_to_table(cursor_id).unwrap(); +// let start_column_offset = key.len(); +// let column_count = pseudo_table.columns().len() - start_column_offset; +// let start_reg = program.alloc_registers(column_count); +// translate_table_columns( +// program, +// cursor_id, +// &pseudo_table, +// start_column_offset, +// start_reg, +// ); + +// Ok(start_reg) +// } +// SourceOperator::Projection { +// expressions, id, .. +// } => { +// let expr_count = expressions.len(); +// let start_reg = program.alloc_registers(expr_count); +// let mut cur_reg = start_reg; +// m.result_set_register_start = start_reg; +// for expr in expressions { +// translate_expr( +// program, +// Some(referenced_tables), +// expr, +// cur_reg, +// cursor_override.map(|c| c.cursor_id), +// m.result_set_register_start, +// )?; +// cur_reg += 1; +// } + +// Ok(start_reg) +// } +// SourceOperator::Nothing => unimplemented!(), +// } +// } +// fn result_row( +// &mut self, +// program: &mut ProgramBuilder, +// referenced_tables: &[BTreeTableReference], +// m: &mut Metadata, +// cursor_override: Option<&SortCursorOverride>, +// ) -> Result<()> { +// match self { +// SourceOperator::Limit { source, limit, .. } => { +// source.result_row(program, referenced_tables, m, cursor_override)?; +// let limit_reg = program.alloc_register(); +// program.emit_insn(Insn::Integer { +// value: *limit as i64, +// dest: limit_reg, +// }); +// program.mark_last_insn_constant(); +// let jump_label = m.termination_label_stack.first().unwrap(); +// program.emit_insn_with_label_dependency( +// Insn::DecrJumpZero { +// reg: limit_reg, +// target_pc: *jump_label, +// }, +// *jump_label, +// ); + +// Ok(()) +// } +// operator => { +// let start_reg = +// operator.result_columns(program, referenced_tables, m, cursor_override)?; +// program.emit_insn(Insn::ResultRow { +// start_reg, +// count: operator.column_count(referenced_tables), +// }); +// Ok(()) +// } +// } +// } +// } + +fn prologue() -> Result<(ProgramBuilder, Metadata, BranchOffset, BranchOffset)> { let mut program = ProgramBuilder::new(); let init_label = program.allocate_label(); let halt_label = program.allocate_label(); @@ -1697,13 +1637,13 @@ fn prologue( let metadata = Metadata { termination_label_stack: vec![halt_label], - expr_result_cache: cache, aggregation_start_registers: HashMap::new(), group_bys: HashMap::new(), left_joins: HashMap::new(), next_row_labels: HashMap::new(), scan_loop_body_labels: vec![], sorts: HashMap::new(), + result_set_register_start: 0, }; Ok((program, metadata, init_label, start_offset)) @@ -1740,28 +1680,1195 @@ fn epilogue( pub fn emit_program( database_header: Rc>, mut plan: Plan, - cache: ExpressionResultCache, connection: Weak, ) -> Result { - let (mut program, mut metadata, init_label, start_offset) = prologue(cache)?; - loop { - match plan - .root_operator - .step(&mut program, &mut metadata, &plan.referenced_tables)? - { - OpStepResult::Continue => {} - OpStepResult::ReadyToEmit => { - plan.root_operator.result_row( - &mut program, - &plan.referenced_tables, - &mut metadata, - None, - )?; + let (mut program, mut metadata, init_label, start_offset) = prologue()?; + + let mut order_by_necessary = plan.order_by.is_some(); + + // OPEN CURSORS ETC + if let Some(ref mut order_by) = plan.order_by { + init_order_by(&mut program, order_by, &mut metadata)?; + } + + if let Some(ref mut group_by) = plan.group_by { + let aggregates = plan.aggregates.as_mut().unwrap(); + init_group_by(&mut program, group_by, aggregates, &mut metadata)?; + } + init_source(&mut program, &plan.source, &mut metadata)?; + + // REWIND CURSORS, EMIT CONDITIONS + open_loop( + &mut program, + &mut plan.source, + &plan.referenced_tables, + &mut metadata, + )?; + + // EMIT COLUMNS AND OTHER EXPRS IN INNER LOOP + inner_loop_emit(&mut program, &mut plan, &mut metadata)?; + + // CLOSE LOOP + close_loop( + &mut program, + &mut plan.source, + &mut metadata, + &plan.referenced_tables, + )?; + + // IF GROUP BY, SORT BY GROUPS AND DO AGGREGATION + if let Some(ref mut group_by) = plan.group_by { + sort_group_by(&mut program, group_by, &mut metadata)?; + finalize_group_by(&mut program, group_by, &mut metadata)?; + } else if let Some(ref mut aggregates) = plan.aggregates { + // Example: SELECT sum(x), count(*) FROM t; + finalize_agg_without_group_by(&mut program, aggregates, &mut metadata)?; + // If we have an aggregate without a group by, we don't need an order by because currently + // there can only be a single row result in those cases. + order_by_necessary = false; + } + + // IF ORDER BY, SORT BY ORDER BY + if let Some(ref mut order_by) = plan.order_by { + if order_by_necessary { + sort_order_by( + &mut program, + order_by, + &plan.result_columns, + plan.limit.clone(), + &mut metadata, + )?; + } + } + + // EPILOGUE + epilogue(&mut program, &mut metadata, init_label, start_offset)?; + + Ok(program.build(database_header, connection)) +} + +const ORDER_BY_ID: usize = 0; +const GROUP_BY_ID: usize = 1; +const AGG_WITHOUT_GROUP_BY_ID: usize = 2; + +fn init_order_by( + program: &mut ProgramBuilder, + order_by: &Vec<(ast::Expr, Direction)>, + m: &mut Metadata, +) -> Result<()> { + m.termination_label_stack.push(program.allocate_label()); + let sort_cursor = program.alloc_cursor_id(None, None); + m.sorts.insert( + ORDER_BY_ID, + SortMetadata { + sort_cursor, + pseudo_table_cursor: usize::MAX, // will be set later + sorter_data_register: program.alloc_register(), + sorter_data_label: program.allocate_label(), + done_label: program.allocate_label(), + }, + ); + let mut order = Vec::new(); + for (_, direction) in order_by.iter() { + order.push(OwnedValue::Integer(*direction as i64)); + } + program.emit_insn(Insn::SorterOpen { + cursor_id: sort_cursor, + columns: order_by.len(), + order: OwnedRecord::new(order), + }); + Ok(()) +} + +fn init_group_by( + program: &mut ProgramBuilder, + group_by: &Vec, + aggregates: &Vec, + m: &mut Metadata, +) -> Result<()> { + let agg_final_label = program.allocate_label(); + m.termination_label_stack.push(agg_final_label); + let num_aggs = aggregates.len(); + + let sort_cursor = program.alloc_cursor_id(None, None); + + let abort_flag_register = program.alloc_register(); + let data_in_accumulator_indicator_register = program.alloc_register(); + let group_exprs_comparison_register = program.alloc_registers(group_by.len()); + let group_exprs_accumulator_register = program.alloc_registers(group_by.len()); + let agg_exprs_start_reg = program.alloc_registers(num_aggs); + m.aggregation_start_registers + .insert(GROUP_BY_ID, agg_exprs_start_reg); + let sorter_key_register = program.alloc_register(); + + let subroutine_accumulator_clear_label = program.allocate_label(); + let subroutine_accumulator_output_label = program.allocate_label(); + let sorter_data_label = program.allocate_label(); + let grouping_done_label = program.allocate_label(); + + let mut order = Vec::new(); + const ASCENDING: i64 = 0; + for _ in group_by.iter() { + order.push(OwnedValue::Integer(ASCENDING)); + } + program.emit_insn(Insn::SorterOpen { + cursor_id: sort_cursor, + columns: aggregates.len() + group_by.len(), + order: OwnedRecord::new(order), + }); + + program.add_comment(program.offset(), "clear group by abort flag"); + program.emit_insn(Insn::Integer { + value: 0, + dest: abort_flag_register, + }); + + program.add_comment( + program.offset(), + "initialize group by comparison registers to NULL", + ); + program.emit_insn(Insn::Null { + dest: group_exprs_comparison_register, + dest_end: if group_by.len() > 1 { + Some(group_exprs_comparison_register + group_by.len() - 1) + } else { + None + }, + }); + + program.add_comment(program.offset(), "go to clear accumulator subroutine"); + + let subroutine_accumulator_clear_return_offset_register = program.alloc_register(); + program.emit_insn_with_label_dependency( + Insn::Gosub { + target_pc: subroutine_accumulator_clear_label, + return_reg: subroutine_accumulator_clear_return_offset_register, + }, + subroutine_accumulator_clear_label, + ); + + m.group_bys.insert( + GROUP_BY_ID, + GroupByMetadata { + sort_cursor, + subroutine_accumulator_clear_label, + subroutine_accumulator_clear_return_offset_register, + subroutine_accumulator_output_label, + subroutine_accumulator_output_return_offset_register: program.alloc_register(), + accumulator_indicator_set_true_label: program.allocate_label(), + sorter_data_label, + grouping_done_label, + abort_flag_register, + data_in_accumulator_indicator_register, + group_exprs_accumulator_register, + group_exprs_comparison_register, + sorter_key_register, + }, + ); + Ok(()) +} + +// fn init_agg_without_group_by( +// program: &mut ProgramBuilder, +// aggregates: &Vec, +// m: &mut Metadata, +// ) -> Result<()> { + +// Ok(()) +// } + +fn init_source( + program: &mut ProgramBuilder, + source: &SourceOperator, + m: &mut Metadata, +) -> Result<()> { + match source { + SourceOperator::Join { + id, + left, + right, + outer, + .. + } => { + if *outer { + let lj_metadata = LeftJoinMetadata { + match_flag_register: program.alloc_register(), + set_match_flag_true_label: program.allocate_label(), + check_match_flag_label: program.allocate_label(), + on_match_jump_to_label: program.allocate_label(), + }; + m.left_joins.insert(*id, lj_metadata); } - OpStepResult::Done => { - epilogue(&mut program, &mut metadata, init_label, start_offset)?; - return Ok(program.build(database_header, connection)); + init_source(program, left, m)?; + init_source(program, right, m)?; + + return Ok(()); + } + SourceOperator::Scan { + id, + table_reference, + .. + } => { + let cursor_id = program.alloc_cursor_id( + Some(table_reference.table_identifier.clone()), + Some(Table::BTree(table_reference.table.clone())), + ); + let root_page = table_reference.table.root_page; + let next_row_label = program.allocate_label(); + m.next_row_labels.insert(*id, next_row_label); + program.emit_insn(Insn::OpenReadAsync { + cursor_id, + root_page, + }); + program.emit_insn(Insn::OpenReadAwait); + + return Ok(()); + } + SourceOperator::Search { + id, + table_reference, + search, + .. + } => { + let table_cursor_id = program.alloc_cursor_id( + Some(table_reference.table_identifier.clone()), + Some(Table::BTree(table_reference.table.clone())), + ); + + let next_row_label = program.allocate_label(); + + if !matches!(search, Search::PrimaryKeyEq { .. }) { + // Primary key equality search is handled with a SeekRowid instruction which does not loop, since it is a single row lookup. + m.next_row_labels.insert(*id, next_row_label); } + + let scan_loop_body_label = program.allocate_label(); + m.scan_loop_body_labels.push(scan_loop_body_label); + program.emit_insn(Insn::OpenReadAsync { + cursor_id: table_cursor_id, + root_page: table_reference.table.root_page, + }); + program.emit_insn(Insn::OpenReadAwait); + + if let Search::IndexSearch { index, .. } = search { + let index_cursor_id = program + .alloc_cursor_id(Some(index.name.clone()), Some(Table::Index(index.clone()))); + program.emit_insn(Insn::OpenReadAsync { + cursor_id: index_cursor_id, + root_page: index.root_page, + }); + program.emit_insn(Insn::OpenReadAwait); + } + + return Ok(()); + } + SourceOperator::Nothing => { + return Ok(()); } } } + +fn open_loop( + program: &mut ProgramBuilder, + source: &mut SourceOperator, + referenced_tables: &[BTreeTableReference], + m: &mut Metadata, +) -> Result<()> { + match source { + SourceOperator::Join { + id, + left, + right, + predicates, + outer, + .. + } => { + open_loop(program, left, referenced_tables, m)?; + + let mut jump_target_when_false = *m + .next_row_labels + .get(&right.id()) + .or(m.next_row_labels.get(&left.id())) + .unwrap_or(m.termination_label_stack.last().unwrap()); + + if *outer { + let lj_meta = m.left_joins.get(id).unwrap(); + program.emit_insn(Insn::Integer { + value: 0, + dest: lj_meta.match_flag_register, + }); + jump_target_when_false = lj_meta.check_match_flag_label; + } + m.next_row_labels.insert(right.id(), jump_target_when_false); + + open_loop(program, right, referenced_tables, m)?; + + if let Some(predicates) = predicates { + let jump_target_when_true = program.allocate_label(); + let condition_metadata = ConditionMetadata { + jump_if_condition_is_true: false, + jump_target_when_true, + jump_target_when_false, + }; + for predicate in predicates.iter() { + translate_condition_expr( + program, + referenced_tables, + predicate, + None, + condition_metadata, + m.result_set_register_start, + )?; + } + program.resolve_label(jump_target_when_true, program.offset()); + } + + if *outer { + let lj_meta = m.left_joins.get(id).unwrap(); + program.defer_label_resolution( + lj_meta.set_match_flag_true_label, + program.offset() as usize, + ); + program.emit_insn(Insn::Integer { + value: 1, + dest: lj_meta.match_flag_register, + }); + } + + return Ok(()); + } + SourceOperator::Scan { + id, + table_reference, + predicates, + iter_dir, + } => { + let cursor_id = program.resolve_cursor_id(&table_reference.table_identifier, None); + if iter_dir + .as_ref() + .is_some_and(|dir| *dir == IterationDirection::Backwards) + { + program.emit_insn(Insn::LastAsync { cursor_id }); + } else { + program.emit_insn(Insn::RewindAsync { cursor_id }); + } + let scan_loop_body_label = program.allocate_label(); + let halt_label = m.termination_label_stack.last().unwrap(); + program.emit_insn_with_label_dependency( + if iter_dir + .as_ref() + .is_some_and(|dir| *dir == IterationDirection::Backwards) + { + Insn::LastAwait { + cursor_id, + pc_if_empty: *halt_label, + } + } else { + Insn::RewindAwait { + cursor_id, + pc_if_empty: *halt_label, + } + }, + *halt_label, + ); + m.scan_loop_body_labels.push(scan_loop_body_label); + program.defer_label_resolution(scan_loop_body_label, program.offset() as usize); + + let jump_label = m.next_row_labels.get(id).unwrap_or(halt_label); + if let Some(preds) = predicates { + for expr in preds { + let jump_target_when_true = program.allocate_label(); + let condition_metadata = ConditionMetadata { + jump_if_condition_is_true: false, + jump_target_when_true, + jump_target_when_false: *jump_label, + }; + translate_condition_expr( + program, + referenced_tables, + expr, + None, + condition_metadata, + m.result_set_register_start, + )?; + program.resolve_label(jump_target_when_true, program.offset()); + } + } + + return Ok(()); + } + SourceOperator::Search { + id, + table_reference, + search, + predicates, + .. + } => { + let table_cursor_id = + program.resolve_cursor_id(&table_reference.table_identifier, None); + + // Open the loop for the index search. + // Primary key equality search is handled with a SeekRowid instruction which does not loop, since it is a single row lookup. + if !matches!(search, Search::PrimaryKeyEq { .. }) { + let index_cursor_id = if let Search::IndexSearch { index, .. } = search { + Some(program.resolve_cursor_id(&index.name, None)) + } else { + None + }; + let scan_loop_body_label = *m.scan_loop_body_labels.last().unwrap(); + let cmp_reg = program.alloc_register(); + let (cmp_expr, cmp_op) = match search { + Search::IndexSearch { + cmp_expr, cmp_op, .. + } => (cmp_expr, cmp_op), + Search::PrimaryKeySearch { cmp_expr, cmp_op } => (cmp_expr, cmp_op), + Search::PrimaryKeyEq { .. } => unreachable!(), + }; + // TODO this only handles ascending indexes + match cmp_op { + ast::Operator::Equals + | ast::Operator::Greater + | ast::Operator::GreaterEquals => { + translate_expr( + program, + Some(referenced_tables), + cmp_expr, + cmp_reg, + None, + m.result_set_register_start, + )?; + } + ast::Operator::Less | ast::Operator::LessEquals => { + program.emit_insn(Insn::Null { + dest: cmp_reg, + dest_end: None, + }); + } + _ => unreachable!(), + } + program.emit_insn_with_label_dependency( + match cmp_op { + ast::Operator::Equals | ast::Operator::GreaterEquals => Insn::SeekGE { + is_index: index_cursor_id.is_some(), + cursor_id: index_cursor_id.unwrap_or(table_cursor_id), + start_reg: cmp_reg, + num_regs: 1, + target_pc: *m.termination_label_stack.last().unwrap(), + }, + ast::Operator::Greater + | ast::Operator::Less + | ast::Operator::LessEquals => Insn::SeekGT { + is_index: index_cursor_id.is_some(), + cursor_id: index_cursor_id.unwrap_or(table_cursor_id), + start_reg: cmp_reg, + num_regs: 1, + target_pc: *m.termination_label_stack.last().unwrap(), + }, + _ => unreachable!(), + }, + *m.termination_label_stack.last().unwrap(), + ); + if *cmp_op == ast::Operator::Less || *cmp_op == ast::Operator::LessEquals { + translate_expr( + program, + Some(referenced_tables), + cmp_expr, + cmp_reg, + None, + m.result_set_register_start, + )?; + } + + program.defer_label_resolution(scan_loop_body_label, program.offset() as usize); + // TODO: We are currently only handling ascending indexes. + // For conditions like index_key > 10, we have already seeked to the first key greater than 10, and can just scan forward. + // For conditions like index_key < 10, we are at the beginning of the index, and will scan forward and emit IdxGE(10) with a conditional jump to the end. + // For conditions like index_key = 10, we have already seeked to the first key greater than or equal to 10, and can just scan forward and emit IdxGT(10) with a conditional jump to the end. + // For conditions like index_key >= 10, we have already seeked to the first key greater than or equal to 10, and can just scan forward. + // For conditions like index_key <= 10, we are at the beginning of the index, and will scan forward and emit IdxGT(10) with a conditional jump to the end. + // For conditions like index_key != 10, TODO. probably the optimal way is not to use an index at all. + // + // For primary key searches we emit RowId and then compare it to the seek value. + + let abort_jump_target = *m + .next_row_labels + .get(id) + .unwrap_or(m.termination_label_stack.last().unwrap()); + match cmp_op { + ast::Operator::Equals | ast::Operator::LessEquals => { + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn_with_label_dependency( + Insn::IdxGT { + cursor_id: index_cursor_id, + start_reg: cmp_reg, + num_regs: 1, + target_pc: abort_jump_target, + }, + abort_jump_target, + ); + } else { + let rowid_reg = program.alloc_register(); + program.emit_insn(Insn::RowId { + cursor_id: table_cursor_id, + dest: rowid_reg, + }); + program.emit_insn_with_label_dependency( + Insn::Gt { + lhs: rowid_reg, + rhs: cmp_reg, + target_pc: abort_jump_target, + }, + abort_jump_target, + ); + } + } + ast::Operator::Less => { + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn_with_label_dependency( + Insn::IdxGE { + cursor_id: index_cursor_id, + start_reg: cmp_reg, + num_regs: 1, + target_pc: abort_jump_target, + }, + abort_jump_target, + ); + } else { + let rowid_reg = program.alloc_register(); + program.emit_insn(Insn::RowId { + cursor_id: table_cursor_id, + dest: rowid_reg, + }); + program.emit_insn_with_label_dependency( + Insn::Ge { + lhs: rowid_reg, + rhs: cmp_reg, + target_pc: abort_jump_target, + }, + abort_jump_target, + ); + } + } + _ => {} + } + + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn(Insn::DeferredSeek { + index_cursor_id, + table_cursor_id, + }); + } + } + + let jump_label = m + .next_row_labels + .get(id) + .unwrap_or(m.termination_label_stack.last().unwrap()); + + if let Search::PrimaryKeyEq { cmp_expr } = search { + let src_reg = program.alloc_register(); + translate_expr( + program, + Some(referenced_tables), + cmp_expr, + src_reg, + None, + m.result_set_register_start, + )?; + program.emit_insn_with_label_dependency( + Insn::SeekRowid { + cursor_id: table_cursor_id, + src_reg, + target_pc: *jump_label, + }, + *jump_label, + ); + } + if let Some(predicates) = predicates { + for predicate in predicates.iter() { + let jump_target_when_true = program.allocate_label(); + let condition_metadata = ConditionMetadata { + jump_if_condition_is_true: false, + jump_target_when_true, + jump_target_when_false: *jump_label, + }; + translate_condition_expr( + program, + referenced_tables, + predicate, + None, + condition_metadata, + m.result_set_register_start, + )?; + program.resolve_label(jump_target_when_true, program.offset()); + } + } + + return Ok(()); + } + SourceOperator::Nothing => { + return Ok(()); + } + } +} + +pub enum InnerLoopEmitTarget<'a> { + GroupBySorter { + group_by: &'a Vec, + aggregates: &'a Vec, + }, + OrderBySorter { + order_by: &'a Vec<(ast::Expr, Direction)>, + }, + ResultRow { + limit: Option, + }, + AggStep, +} + +fn inner_loop_emit(program: &mut ProgramBuilder, plan: &mut Plan, m: &mut Metadata) -> Result<()> { + // if we have a group by, we emit a record into the group by sorter. + if let Some(group_by) = &plan.group_by { + return inner_loop_source_emit( + program, + &plan.source, + &plan.result_columns, + &plan.aggregates, + m, + InnerLoopEmitTarget::GroupBySorter { + group_by, + aggregates: &plan.aggregates.as_ref().unwrap(), + }, + &plan.referenced_tables, + ); + } + // if we DONT have a group by, but we have aggregates, we emit without ResultRow. + // we also do not need to sort because we are emitting a single row. + if plan.aggregates.is_some() { + return inner_loop_source_emit( + program, + &plan.source, + &plan.result_columns, + &plan.aggregates, + m, + InnerLoopEmitTarget::AggStep, + &plan.referenced_tables, + ); + } + // if we DONT have a group by, but we have an order by, we emit a record into the order by sorter. + if let Some(order_by) = &plan.order_by { + return inner_loop_source_emit( + program, + &plan.source, + &plan.result_columns, + &plan.aggregates, + m, + InnerLoopEmitTarget::OrderBySorter { order_by }, + &plan.referenced_tables, + ); + } + // if we have neither, we emit a ResultRow. In that case, if we have a Limit, we handle that with DecrJumpZero. + return inner_loop_source_emit( + program, + &plan.source, + &plan.result_columns, + &plan.aggregates, + m, + InnerLoopEmitTarget::ResultRow { limit: plan.limit }, + &plan.referenced_tables, + ); +} + +fn inner_loop_source_emit( + program: &mut ProgramBuilder, + source: &SourceOperator, + result_columns: &Vec, + aggregates: &Option>, + m: &mut Metadata, + emit_target: InnerLoopEmitTarget, + referenced_tables: &[BTreeTableReference], +) -> Result<()> { + match emit_target { + InnerLoopEmitTarget::GroupBySorter { + group_by, + aggregates, + } => { + // TODO: DOESNT WORK YET + let sort_keys_count = group_by.len(); + let column_count = sort_keys_count + aggregates.len(); + let start_reg = program.alloc_registers(column_count); + for (i, expr) in group_by.iter().enumerate() { + let key_reg = start_reg + i; + translate_expr( + program, + Some(referenced_tables), + expr, + key_reg, + None, + m.result_set_register_start, + )?; + } + for (i, agg) in aggregates.iter().enumerate() { + // TODO it's a hack to assume aggregate functions have exactly one argument. + // Counterpoint e.g. GROUP_CONCAT(expr, separator). + // + // Here we are collecting scalars for the group by sorter, which will include + // both the group by expressions and the aggregate arguments. + // e.g. in `select u.first_name, sum(u.age) from users group by u.first_name` + // the sorter will have two scalars: u.first_name and u.age. + // these are then sorted by u.first_name, and for each u.first_name, we sum the u.age. + // the actual aggregation is done later in GROUP_BY_SORT_AND_COMPARE below. + // + // This is why we take the first argument of each aggregate function currently. + // It's mostly an artifact of the current architecture being a bit poor; we should recognize + // which scalars are dependencies of aggregate functions and explicitly collect those. + let expr = &agg.args[0]; + let agg_reg = start_reg + sort_keys_count + i; + translate_expr( + program, + Some(referenced_tables), + expr, + agg_reg, + None, + m.result_set_register_start, + )?; + } + + let group_by_metadata = m.group_bys.get(&GROUP_BY_ID).unwrap(); + + program.emit_insn(Insn::MakeRecord { + start_reg, + count: column_count, + dest_reg: group_by_metadata.sorter_key_register, + }); + + let group_by_metadata = m.group_bys.get(&GROUP_BY_ID).unwrap(); + program.emit_insn(Insn::SorterInsert { + cursor_id: group_by_metadata.sort_cursor, + record_reg: group_by_metadata.sorter_key_register, + }); + + Ok(()) + } + InnerLoopEmitTarget::OrderBySorter { order_by } => { + // TODO: DOESNT WORK YET + let sort_keys_count = order_by.len(); + let source_cols_count = result_columns.len(); + let start_reg = program.alloc_registers(sort_keys_count + source_cols_count); + for (i, (expr, _)) in order_by.iter().enumerate() { + let key_reg = start_reg + i; + translate_expr( + program, + Some(referenced_tables), + expr, + key_reg, + None, + m.result_set_register_start, + )?; + } + for (i, expr) in result_columns.iter().enumerate() { + match expr { + ResultSetColumn::Scalar(expr) => { + let reg = start_reg + sort_keys_count + i; + translate_expr( + program, + Some(referenced_tables), + expr, + reg, + None, + m.result_set_register_start, + )?; + } + other => todo!("{:?}", other), + } + } + + let sort_metadata = m.sorts.get_mut(&ORDER_BY_ID).unwrap(); + program.emit_insn(Insn::MakeRecord { + start_reg, + count: sort_keys_count + source_cols_count, + dest_reg: sort_metadata.sorter_data_register, + }); + + program.emit_insn(Insn::SorterInsert { + cursor_id: sort_metadata.sort_cursor, + record_reg: sort_metadata.sorter_data_register, + }); + + Ok(()) + } + InnerLoopEmitTarget::AggStep => { + let aggregates = aggregates.as_ref().unwrap(); + let agg_final_label = program.allocate_label(); + m.termination_label_stack.push(agg_final_label); + let num_aggs = aggregates.len(); + let start_reg = program.alloc_registers(result_columns.len()); + m.aggregation_start_registers + .insert(AGG_WITHOUT_GROUP_BY_ID, start_reg); + for (i, agg) in aggregates.iter().enumerate() { + let reg = start_reg + i; + translate_aggregation(program, referenced_tables, agg, reg, None)?; + } + for (i, expr) in result_columns.iter().enumerate() { + match expr { + ResultSetColumn::Scalar(expr) => { + let reg = start_reg + num_aggs + i; + translate_expr( + program, + Some(referenced_tables), + expr, + reg, + None, + m.result_set_register_start, + )?; + } + ResultSetColumn::Agg(_) => { /* do nothing, aggregates are computed above */ } + other => unreachable!("Unexpected non-scalar result column: {:?}", other), + } + } + Ok(()) + } + InnerLoopEmitTarget::ResultRow { limit } => { + assert!(aggregates.is_none()); + let start_reg = program.alloc_registers(result_columns.len()); + for (i, expr) in result_columns.iter().enumerate() { + match expr { + ResultSetColumn::Scalar(expr) => { + let reg = start_reg + i; + translate_expr( + program, + Some(referenced_tables), + expr, + reg, + None, + m.result_set_register_start, + )?; + } + other => unreachable!("Unexpected non-scalar result column: {:?}", other), + } + } + program.emit_insn(Insn::ResultRow { + start_reg, + count: result_columns.len(), + }); + if let Some(limit) = limit { + let jump_label = m.termination_label_stack.last().unwrap(); + let limit_reg = program.alloc_register(); + program.emit_insn(Insn::Integer { + value: limit as i64, + dest: limit_reg, + }); + program.mark_last_insn_constant(); + program.emit_insn_with_label_dependency( + Insn::DecrJumpZero { + reg: limit_reg, + target_pc: *jump_label, + }, + *jump_label, + ); + } + + Ok(()) + } + } +} + +fn close_loop( + program: &mut ProgramBuilder, + source: &SourceOperator, + m: &mut Metadata, + referenced_tables: &[BTreeTableReference], +) -> Result<()> { + match source { + SourceOperator::Join { + id, + left, + right, + outer, + .. + } => { + close_loop(program, right, m, referenced_tables)?; + + if *outer { + let lj_meta = m.left_joins.get(id).unwrap(); + // If the left join match flag has been set to 1, we jump to the next row on the outer table (result row has been emitted already) + program.resolve_label(lj_meta.check_match_flag_label, program.offset()); + program.emit_insn_with_label_dependency( + Insn::IfPos { + reg: lj_meta.match_flag_register, + target_pc: lj_meta.on_match_jump_to_label, + decrement_by: 0, + }, + lj_meta.on_match_jump_to_label, + ); + // If not, we set the right table cursor's "pseudo null bit" on, which means any Insn::Column will return NULL + let right_cursor_id = match right.as_ref() { + SourceOperator::Scan { + table_reference, .. + } => program.resolve_cursor_id(&table_reference.table_identifier, None), + SourceOperator::Search { + table_reference, .. + } => program.resolve_cursor_id(&table_reference.table_identifier, None), + _ => unreachable!(), + }; + program.emit_insn(Insn::NullRow { + cursor_id: right_cursor_id, + }); + // Jump to setting the left join match flag to 1 again, but this time the right table cursor will set everything to null + program.emit_insn_with_label_dependency( + Insn::Goto { + target_pc: lj_meta.set_match_flag_true_label, + }, + lj_meta.set_match_flag_true_label, + ); + } + let next_row_label = if *outer { + m.left_joins.get(id).unwrap().on_match_jump_to_label + } else { + *m.next_row_labels.get(&right.id()).unwrap() + }; + // This points to the NextAsync instruction of the left table + program.resolve_label(next_row_label, program.offset()); + close_loop(program, left, m, referenced_tables)?; + + Ok(()) + } + SourceOperator::Scan { + id, + table_reference, + iter_dir, + .. + } => { + let cursor_id = program.resolve_cursor_id(&table_reference.table_identifier, None); + program.resolve_label(*m.next_row_labels.get(id).unwrap(), program.offset()); + if iter_dir + .as_ref() + .is_some_and(|dir| *dir == IterationDirection::Backwards) + { + program.emit_insn(Insn::PrevAsync { cursor_id }); + } else { + program.emit_insn(Insn::NextAsync { cursor_id }); + } + let jump_label = m.scan_loop_body_labels.pop().unwrap(); + + if iter_dir + .as_ref() + .is_some_and(|dir| *dir == IterationDirection::Backwards) + { + program.emit_insn_with_label_dependency( + Insn::PrevAwait { + cursor_id, + pc_if_next: jump_label, + }, + jump_label, + ); + } else { + program.emit_insn_with_label_dependency( + Insn::NextAwait { + cursor_id, + pc_if_next: jump_label, + }, + jump_label, + ); + } + Ok(()) + } + SourceOperator::Search { + id, + table_reference, + search, + .. + } => { + if matches!(search, Search::PrimaryKeyEq { .. }) { + // Primary key equality search is handled with a SeekRowid instruction which does not loop, so there is no need to emit a NextAsync instruction. + return Ok(()); + } + let cursor_id = match search { + Search::IndexSearch { index, .. } => program.resolve_cursor_id(&index.name, None), + Search::PrimaryKeySearch { .. } => { + program.resolve_cursor_id(&table_reference.table_identifier, None) + } + Search::PrimaryKeyEq { .. } => unreachable!(), + }; + program.resolve_label(*m.next_row_labels.get(id).unwrap(), program.offset()); + program.emit_insn(Insn::NextAsync { cursor_id }); + let jump_label = m.scan_loop_body_labels.pop().unwrap(); + program.emit_insn_with_label_dependency( + Insn::NextAwait { + cursor_id, + pc_if_next: jump_label, + }, + jump_label, + ); + + Ok(()) + } + SourceOperator::Nothing => { + unreachable!() + } + } +} + +fn sort_group_by( + program: &mut ProgramBuilder, + group_by: &Vec, + m: &mut Metadata, +) -> Result<()> { + todo!() +} + +fn finalize_group_by( + program: &mut ProgramBuilder, + group_by: &Vec, + m: &mut Metadata, +) -> Result<()> { + todo!() +} + +enum FinalizeGroupByEmitTarget { + OrderBySorter(usize), + ResultRow, +} + +fn finalize_agg_without_group_by( + program: &mut ProgramBuilder, + aggregates: &Vec, + m: &mut Metadata, +) -> Result<()> { + let agg_start_reg = m + .aggregation_start_registers + .get(&AGG_WITHOUT_GROUP_BY_ID) + .unwrap(); + for (i, agg) in aggregates.iter().enumerate() { + let agg_result_reg = *agg_start_reg + i; + program.emit_insn(Insn::AggFinal { + register: agg_result_reg, + func: agg.func.clone(), + }); + } + let output_reg = program.alloc_registers(aggregates.len()); + program.emit_insn(Insn::Copy { + src_reg: *agg_start_reg, + dst_reg: output_reg, + amount: aggregates.len() - 1, + }); + // This always emits a ResultRow because currently it can only be used for a single row result + program.emit_insn(Insn::ResultRow { + start_reg: output_reg, + count: aggregates.len(), + }); + + Ok(()) +} + +fn sort_order_by( + program: &mut ProgramBuilder, + order_by: &Vec<(ast::Expr, Direction)>, + result_columns: &Vec, + limit: Option, + m: &mut Metadata, +) -> Result<()> { + // TODO: DOESNT WORK YET + program.resolve_label(m.termination_label_stack.pop().unwrap(), program.offset()); + let mut pseudo_columns = vec![]; + for (i, _) in order_by.iter().enumerate() { + pseudo_columns.push(Column { + name: format!("sort_key_{}", i), + primary_key: false, + ty: crate::schema::Type::Null, + }); + } + for expr in result_columns.iter() { + pseudo_columns.push(Column { + name: match expr { + ResultSetColumn::Scalar(expr) => expr.to_string(), + ResultSetColumn::Agg(agg) => agg.to_string(), + _ => unreachable!(), + }, + primary_key: false, + ty: crate::schema::Type::Null, + }); + } + + let num_fields = pseudo_columns.len(); + + let pseudo_cursor = program.alloc_cursor_id( + None, + Some(Table::Pseudo(Rc::new(PseudoTable { + columns: pseudo_columns, + }))), + ); + let sort_metadata = m.sorts.get(&ORDER_BY_ID).unwrap(); + + program.emit_insn(Insn::OpenPseudo { + cursor_id: pseudo_cursor, + content_reg: sort_metadata.sorter_data_register, + num_fields, + }); + + program.emit_insn_with_label_dependency( + Insn::SorterSort { + cursor_id: sort_metadata.sort_cursor, + pc_if_empty: sort_metadata.done_label, + }, + sort_metadata.done_label, + ); + + program.defer_label_resolution(sort_metadata.sorter_data_label, program.offset() as usize); + program.emit_insn(Insn::SorterData { + cursor_id: sort_metadata.sort_cursor, + dest_reg: sort_metadata.sorter_data_register, + pseudo_cursor, + }); + + let sort_metadata = m.sorts.get_mut(&ORDER_BY_ID).unwrap(); + + sort_metadata.pseudo_table_cursor = pseudo_cursor; + + // EMIT COLUMNS FROM SORTER AND EMIT ROW + let cursor_id = pseudo_cursor; + let pseudo_table = program.resolve_cursor_to_table(cursor_id).unwrap(); + let start_column_offset = order_by.len(); + let column_count = pseudo_table.columns().len() - start_column_offset; + let start_reg = program.alloc_registers(column_count); + for i in 0..column_count { + let reg = start_reg + i; + program.emit_insn(Insn::Column { + cursor_id, + column: start_column_offset + i, + dest: reg, + }); + } + program.emit_insn(Insn::ResultRow { + start_reg, + count: column_count, + }); + + if let Some(limit) = limit { + let limit_reg = program.alloc_register(); + program.emit_insn(Insn::Integer { + value: limit as i64, + dest: limit_reg, + }); + program.mark_last_insn_constant(); + program.emit_insn_with_label_dependency( + Insn::DecrJumpZero { + reg: limit_reg, + target_pc: sort_metadata.done_label, + }, + sort_metadata.done_label, + ); + } + + program.emit_insn_with_label_dependency( + Insn::SorterNext { + cursor_id: sort_metadata.sort_cursor, + pc_if_next: sort_metadata.sorter_data_label, + }, + sort_metadata.sorter_data_label, + ); + + program.resolve_label(sort_metadata.done_label, program.offset()); + + Ok(()) +} diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 6fa78c6f6..01dcae16c 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1,6 +1,5 @@ use sqlite3_parser::ast::{self, UnaryOperator}; -use super::optimizer::CachedResult; #[cfg(feature = "json")] use crate::function::JsonFunc; use crate::function::{AggFunc, Func, FuncCtx, ScalarFunc}; @@ -24,6 +23,7 @@ pub fn translate_condition_expr( expr: &ast::Expr, cursor_hint: Option, condition_metadata: ConditionMetadata, + result_set_register_start: usize, ) -> Result<()> { match expr { ast::Expr::Between { .. } => todo!(), @@ -39,6 +39,7 @@ pub fn translate_condition_expr( jump_if_condition_is_true: false, ..condition_metadata }, + result_set_register_start, ); let _ = translate_condition_expr( program, @@ -46,6 +47,7 @@ pub fn translate_condition_expr( rhs, cursor_hint, condition_metadata, + result_set_register_start, ); } ast::Expr::Binary(lhs, ast::Operator::Or, rhs) => { @@ -61,6 +63,7 @@ pub fn translate_condition_expr( jump_target_when_false, ..condition_metadata }, + result_set_register_start, ); program.resolve_label(jump_target_when_false, program.offset()); let _ = translate_condition_expr( @@ -69,6 +72,7 @@ pub fn translate_condition_expr( rhs, cursor_hint, condition_metadata, + result_set_register_start, ); } ast::Expr::Binary(lhs, op, rhs) => { @@ -79,7 +83,7 @@ pub fn translate_condition_expr( lhs, lhs_reg, cursor_hint, - None, + result_set_register_start, ); if let ast::Expr::Literal(_) = lhs.as_ref() { program.mark_last_insn_constant() @@ -91,7 +95,7 @@ pub fn translate_condition_expr( rhs, rhs_reg, cursor_hint, - None, + result_set_register_start, ); if let ast::Expr::Literal(_) = rhs.as_ref() { program.mark_last_insn_constant() @@ -340,7 +344,7 @@ pub fn translate_condition_expr( lhs, lhs_reg, cursor_hint, - None, + result_set_register_start, )?; let rhs = rhs.as_ref().unwrap(); @@ -370,7 +374,7 @@ pub fn translate_condition_expr( expr, rhs_reg, cursor_hint, - None, + result_set_register_start, )?; // If this is not the last condition, we need to jump to the 'jump_target_when_true' label if the condition is true. if !last_condition { @@ -414,7 +418,7 @@ pub fn translate_condition_expr( expr, rhs_reg, cursor_hint, - None, + result_set_register_start, )?; program.emit_insn_with_label_dependency( Insn::Eq { @@ -460,7 +464,7 @@ pub fn translate_condition_expr( lhs, column_reg, cursor_hint, - None, + result_set_register_start, )?; if let ast::Expr::Literal(_) = lhs.as_ref() { program.mark_last_insn_constant(); @@ -471,7 +475,7 @@ pub fn translate_condition_expr( rhs, pattern_reg, cursor_hint, - None, + result_set_register_start, )?; if let ast::Expr::Literal(_) = rhs.as_ref() { program.mark_last_insn_constant(); @@ -545,6 +549,7 @@ pub fn translate_condition_expr( expr, cursor_hint, condition_metadata, + result_set_register_start, ); } } @@ -553,71 +558,33 @@ pub fn translate_condition_expr( Ok(()) } -pub fn get_cached_or_translate( - program: &mut ProgramBuilder, - referenced_tables: Option<&[BTreeTableReference]>, - expr: &ast::Expr, - cursor_hint: Option, - cached_results: Option<&Vec<&CachedResult>>, -) -> Result { - if let Some(cached_results) = cached_results { - if let Some(cached_result) = cached_results - .iter() - .find(|cached_result| cached_result.source_expr == *expr) - { - return Ok(cached_result.register_idx); - } - } - let reg = program.alloc_register(); - translate_expr( - program, - referenced_tables, - expr, - reg, - cursor_hint, - cached_results, - )?; - Ok(reg) -} - pub fn translate_expr( program: &mut ProgramBuilder, referenced_tables: Option<&[BTreeTableReference]>, expr: &ast::Expr, target_register: usize, cursor_hint: Option, - cached_results: Option<&Vec<&CachedResult>>, + result_set_register_start: usize, ) -> Result { - if let Some(cached_results) = &cached_results { - if let Some(cached_result) = cached_results - .iter() - .find(|cached_result| cached_result.source_expr == *expr) - { - program.emit_insn(Insn::Copy { - src_reg: cached_result.register_idx, - dst_reg: target_register, - amount: 0, - }); - return Ok(target_register); - } - } - match expr { + ast::Expr::AggRef { index } => todo!(), ast::Expr::Between { .. } => todo!(), ast::Expr::Binary(e1, op, e2) => { - let e1_reg = get_cached_or_translate( + let e1_reg = translate_expr( program, referenced_tables, e1, + target_register, cursor_hint, - cached_results, + result_set_register_start, )?; - let e2_reg = get_cached_or_translate( + let e2_reg = translate_expr( program, referenced_tables, e2, + target_register, cursor_hint, - cached_results, + result_set_register_start, )?; match op { @@ -741,7 +708,7 @@ pub fn translate_expr( expr, reg_expr, cursor_hint, - cached_results, + result_set_register_start, )?; let reg_type = program.alloc_register(); program.emit_insn(Insn::String8 { @@ -814,7 +781,7 @@ pub fn translate_expr( &args[0], regs, cursor_hint, - cached_results, + result_set_register_start, )?; program.emit_insn(Insn::Function { constant_mask: 0, @@ -841,7 +808,7 @@ pub fn translate_expr( arg, reg, cursor_hint, - cached_results, + result_set_register_start, )?; } @@ -879,7 +846,7 @@ pub fn translate_expr( arg, target_register, cursor_hint, - cached_results, + result_set_register_start, )?; if index < args.len() - 1 { program.emit_insn_with_label_dependency( @@ -915,7 +882,7 @@ pub fn translate_expr( arg, reg, cursor_hint, - cached_results, + result_set_register_start, )?; } program.emit_insn(Insn::Function { @@ -948,7 +915,7 @@ pub fn translate_expr( arg, reg, cursor_hint, - cached_results, + result_set_register_start, )?; } program.emit_insn(Insn::Function { @@ -985,7 +952,7 @@ pub fn translate_expr( &args[0], temp_reg, cursor_hint, - cached_results, + result_set_register_start, )?; program.emit_insn(Insn::NotNull { reg: temp_reg, @@ -998,7 +965,7 @@ pub fn translate_expr( &args[1], temp_reg, cursor_hint, - cached_results, + result_set_register_start, )?; program.emit_insn(Insn::Copy { src_reg: temp_reg, @@ -1031,7 +998,7 @@ pub fn translate_expr( arg, reg, cursor_hint, - cached_results, + result_set_register_start, )?; if let ast::Expr::Literal(_) = arg { program.mark_last_insn_constant() @@ -1079,7 +1046,7 @@ pub fn translate_expr( &args[0], regs, cursor_hint, - cached_results, + result_set_register_start, )?; program.emit_insn(Insn::Function { constant_mask: 0, @@ -1116,7 +1083,7 @@ pub fn translate_expr( arg, target_reg, cursor_hint, - cached_results, + result_set_register_start, )?; } } @@ -1154,7 +1121,7 @@ pub fn translate_expr( &args[0], str_reg, cursor_hint, - cached_results, + result_set_register_start, )?; translate_expr( program, @@ -1162,7 +1129,7 @@ pub fn translate_expr( &args[1], start_reg, cursor_hint, - cached_results, + result_set_register_start, )?; if args.len() == 3 { translate_expr( @@ -1171,7 +1138,7 @@ pub fn translate_expr( &args[2], length_reg, cursor_hint, - cached_results, + result_set_register_start, )?; } @@ -1201,7 +1168,7 @@ pub fn translate_expr( &args[0], regs, cursor_hint, - cached_results, + result_set_register_start, )?; program.emit_insn(Insn::Function { constant_mask: 0, @@ -1225,7 +1192,7 @@ pub fn translate_expr( &args[0], arg_reg, cursor_hint, - cached_results, + result_set_register_start, )?; start_reg = arg_reg; } @@ -1250,7 +1217,7 @@ pub fn translate_expr( arg, target_reg, cursor_hint, - cached_results, + result_set_register_start, )?; } } @@ -1290,7 +1257,7 @@ pub fn translate_expr( arg, reg, cursor_hint, - cached_results, + result_set_register_start, )?; if let ast::Expr::Literal(_) = arg { program.mark_last_insn_constant(); @@ -1323,7 +1290,7 @@ pub fn translate_expr( arg, reg, cursor_hint, - cached_results, + result_set_register_start, )?; if let ast::Expr::Literal(_) = arg { program.mark_last_insn_constant() @@ -1357,7 +1324,7 @@ pub fn translate_expr( arg, reg, cursor_hint, - cached_results, + result_set_register_start, )?; if let ast::Expr::Literal(_) = arg { program.mark_last_insn_constant() @@ -1395,7 +1362,7 @@ pub fn translate_expr( &args[0], first_reg, cursor_hint, - cached_results, + result_set_register_start, )?; let second_reg = program.alloc_register(); translate_expr( @@ -1404,7 +1371,7 @@ pub fn translate_expr( &args[1], second_reg, cursor_hint, - cached_results, + result_set_register_start, )?; program.emit_insn(Insn::Function { constant_mask: 0, @@ -1536,7 +1503,7 @@ pub fn translate_expr( &exprs[0], target_register, cursor_hint, - cached_results, + result_set_register_start, )?; } else { // Parenthesized expressions with multiple arguments are reserved for special cases @@ -1660,7 +1627,7 @@ pub fn translate_aggregation( expr, expr_reg, cursor_hint, - None, + 0, )?; program.emit_insn(Insn::AggStep { acc_reg: target_register, @@ -1682,7 +1649,7 @@ pub fn translate_aggregation( expr, expr_reg, cursor_hint, - None, + 0, ); expr_reg }; @@ -1725,7 +1692,7 @@ pub fn translate_aggregation( expr, expr_reg, cursor_hint, - None, + 0, )?; translate_expr( program, @@ -1733,7 +1700,7 @@ pub fn translate_aggregation( &delimiter_expr, delimiter_reg, cursor_hint, - None, + 0, )?; program.emit_insn(Insn::AggStep { @@ -1757,7 +1724,7 @@ pub fn translate_aggregation( expr, expr_reg, cursor_hint, - None, + 0, )?; program.emit_insn(Insn::AggStep { acc_reg: target_register, @@ -1779,7 +1746,7 @@ pub fn translate_aggregation( expr, expr_reg, cursor_hint, - None, + 0, )?; program.emit_insn(Insn::AggStep { acc_reg: target_register, @@ -1816,7 +1783,7 @@ pub fn translate_aggregation( expr, expr_reg, cursor_hint, - None, + 0, )?; translate_expr( program, @@ -1824,7 +1791,7 @@ pub fn translate_aggregation( &delimiter_expr, delimiter_reg, cursor_hint, - None, + 0, )?; program.emit_insn(Insn::AggStep { @@ -1848,7 +1815,7 @@ pub fn translate_aggregation( expr, expr_reg, cursor_hint, - None, + 0, )?; program.emit_insn(Insn::AggStep { acc_reg: target_register, @@ -1870,7 +1837,7 @@ pub fn translate_aggregation( expr, expr_reg, cursor_hint, - None, + 0, )?; program.emit_insn(Insn::AggStep { acc_reg: target_register, diff --git a/core/translate/insert.rs b/core/translate/insert.rs index ea890e994..8a5760516 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -98,7 +98,7 @@ pub fn translate_insert( expr, column_registers_start + col, None, - None, + 0, )?; } program.emit_insn(Insn::Yield { diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 682ed6c4b..cd08f619f 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -1,4 +1,4 @@ -use std::{collections::HashMap, rc::Rc}; +use std::rc::Rc; use sqlite3_parser::ast; @@ -6,7 +6,7 @@ use crate::{schema::Index, Result}; use super::plan::{ get_table_ref_bitmask_for_ast_expr, get_table_ref_bitmask_for_operator, BTreeTableReference, - Direction, IterationDirection, Operator, Plan, ProjectionColumn, Search, + Direction, IterationDirection, Plan, Search, SourceOperator, }; /** @@ -14,49 +14,45 @@ use super::plan::{ * TODO: these could probably be done in less passes, * but having them separate makes them easier to understand */ -pub fn optimize_plan(mut select_plan: Plan) -> Result<(Plan, ExpressionResultCache)> { - let mut expr_result_cache = ExpressionResultCache::new(); +pub fn optimize_plan(mut select_plan: Plan) -> Result { push_predicates( - &mut select_plan.root_operator, + &mut select_plan.source, + &mut select_plan.where_clause, &select_plan.referenced_tables, )?; - if eliminate_constants(&mut select_plan.root_operator)? + if eliminate_constants(&mut select_plan.source)? == ConstantConditionEliminationResult::ImpossibleCondition { - return Ok(( - Plan { - root_operator: Operator::Nothing, - referenced_tables: vec![], - available_indexes: vec![], - }, - expr_result_cache, - )); + return Ok(Plan { + source: SourceOperator::Nothing, + ..select_plan + }); } use_indexes( - &mut select_plan.root_operator, + &mut select_plan.source, &select_plan.referenced_tables, &select_plan.available_indexes, )?; eliminate_unnecessary_orderby( - &mut select_plan.root_operator, + &mut select_plan.source, + &mut select_plan.order_by, &select_plan.referenced_tables, &select_plan.available_indexes, )?; - find_shared_expressions_in_child_operators_and_mark_them_so_that_the_parent_operator_doesnt_recompute_them(&select_plan.root_operator, &mut expr_result_cache); - Ok((select_plan, expr_result_cache)) + Ok(select_plan) } fn _operator_is_already_ordered_by( - operator: &mut Operator, + operator: &mut SourceOperator, key: &mut ast::Expr, referenced_tables: &[BTreeTableReference], available_indexes: &Vec>, ) -> Result { match operator { - Operator::Scan { + SourceOperator::Scan { table_reference, .. } => Ok(key.is_primary_key_of(table_reference.table_index)), - Operator::Search { + SourceOperator::Search { table_reference, search, .. @@ -77,61 +73,53 @@ fn _operator_is_already_ordered_by( Ok(index_is_the_same) } }, - Operator::Join { left, .. } => { + SourceOperator::Join { left, .. } => { _operator_is_already_ordered_by(left, key, referenced_tables, available_indexes) } - Operator::Aggregate { source, .. } => { - _operator_is_already_ordered_by(source, key, referenced_tables, available_indexes) - } - Operator::Projection { source, .. } => { - _operator_is_already_ordered_by(source, key, referenced_tables, available_indexes) - } _ => Ok(false), } } fn eliminate_unnecessary_orderby( - operator: &mut Operator, + operator: &mut SourceOperator, + order_by: &mut Option>, referenced_tables: &[BTreeTableReference], available_indexes: &Vec>, ) -> Result<()> { - match operator { - Operator::Order { source, key, .. } => { - if key.len() != 1 { - // TODO: handle multiple order by keys - return Ok(()); - } - - let (key, direction) = key.first_mut().unwrap(); - - let already_ordered = _operator_is_already_ordered_by(source, key, referenced_tables, available_indexes)?; - - if already_ordered { - push_scan_direction(source, direction); - - *operator = source.take_ownership(); - } - Ok(()) - } - Operator::Limit { source, .. } => { - eliminate_unnecessary_orderby(source, referenced_tables, available_indexes)?; - Ok(()) - } - _ => Ok(()), + if order_by.is_none() { + return Ok(()); } + + let o = order_by.as_mut().unwrap(); + + if o.len() != 1 { + // TODO: handle multiple order by keys + return Ok(()); + } + + let (key, _) = o.first_mut().unwrap(); + + let already_ordered = + _operator_is_already_ordered_by(operator, key, referenced_tables, available_indexes)?; + + if already_ordered { + *order_by = None; + } + + Ok(()) } /** * Use indexes where possible */ fn use_indexes( - operator: &mut Operator, + operator: &mut SourceOperator, referenced_tables: &[BTreeTableReference], available_indexes: &[Rc], ) -> Result<()> { match operator { - Operator::Search { .. } => Ok(()), - Operator::Scan { + SourceOperator::Search { .. } => Ok(()), + SourceOperator::Scan { table_reference, predicates: filter, id, @@ -162,12 +150,11 @@ fn use_indexes( } Either::Right(index_search) => { fs.remove(i); - *operator = Operator::Search { + *operator = SourceOperator::Search { id: *id, table_reference: table_reference.clone(), predicates: Some(fs.clone()), search: index_search, - step: 0, }; return Ok(()); @@ -177,32 +164,12 @@ fn use_indexes( Ok(()) } - Operator::Aggregate { source, .. } => { - use_indexes(source, referenced_tables, available_indexes)?; - Ok(()) - } - Operator::Filter { source, .. } => { - use_indexes(source, referenced_tables, available_indexes)?; - Ok(()) - } - Operator::Limit { source, .. } => { - use_indexes(source, referenced_tables, available_indexes)?; - Ok(()) - } - Operator::Join { left, right, .. } => { + SourceOperator::Join { left, right, .. } => { use_indexes(left, referenced_tables, available_indexes)?; use_indexes(right, referenced_tables, available_indexes)?; Ok(()) } - Operator::Order { source, .. } => { - use_indexes(source, referenced_tables, available_indexes)?; - Ok(()) - } - Operator::Projection { source, .. } => { - use_indexes(source, referenced_tables, available_indexes)?; - Ok(()) - } - Operator::Nothing => Ok(()), + SourceOperator::Nothing => Ok(()), } } @@ -214,33 +181,11 @@ enum ConstantConditionEliminationResult { // removes predicates that are always true // returns a ConstantEliminationResult indicating whether any predicates are always false -fn eliminate_constants(operator: &mut Operator) -> Result { +fn eliminate_constants( + operator: &mut SourceOperator, +) -> Result { match operator { - Operator::Filter { - source, predicates, .. - } => { - let mut i = 0; - while i < predicates.len() { - let predicate = &predicates[i]; - if predicate.is_always_true()? { - predicates.remove(i); - } else if predicate.is_always_false()? { - return Ok(ConstantConditionEliminationResult::ImpossibleCondition); - } else { - i += 1; - } - } - - if predicates.is_empty() { - *operator = source.take_ownership(); - eliminate_constants(operator)?; - } else { - eliminate_constants(source)?; - } - - Ok(ConstantConditionEliminationResult::Continue) - } - Operator::Join { + SourceOperator::Join { left, right, predicates, @@ -278,44 +223,7 @@ fn eliminate_constants(operator: &mut Operator) -> Result { - if eliminate_constants(source)? - == ConstantConditionEliminationResult::ImpossibleCondition - { - *source = Box::new(Operator::Nothing); - } - // Aggregation operator can return a row even if the source is empty e.g. count(1) from users where 0 - Ok(ConstantConditionEliminationResult::Continue) - } - Operator::Limit { source, .. } => { - let constant_elimination_result = eliminate_constants(source)?; - if constant_elimination_result - == ConstantConditionEliminationResult::ImpossibleCondition - { - *operator = Operator::Nothing; - } - Ok(constant_elimination_result) - } - Operator::Order { source, .. } => { - if eliminate_constants(source)? - == ConstantConditionEliminationResult::ImpossibleCondition - { - *operator = Operator::Nothing; - return Ok(ConstantConditionEliminationResult::ImpossibleCondition); - } - Ok(ConstantConditionEliminationResult::Continue) - } - Operator::Projection { source, .. } => { - if eliminate_constants(source)? - == ConstantConditionEliminationResult::ImpossibleCondition - { - *operator = Operator::Nothing; - return Ok(ConstantConditionEliminationResult::ImpossibleCondition); - } - - Ok(ConstantConditionEliminationResult::Continue) - } - Operator::Scan { predicates, .. } => { + SourceOperator::Scan { predicates, .. } => { if let Some(ps) = predicates { let mut i = 0; while i < ps.len() { @@ -335,7 +243,7 @@ fn eliminate_constants(operator: &mut Operator) -> Result { + SourceOperator::Search { predicates, .. } => { if let Some(predicates) = predicates { let mut i = 0; while i < predicates.len() { @@ -352,7 +260,7 @@ fn eliminate_constants(operator: &mut Operator) -> Result Ok(ConstantConditionEliminationResult::Continue), + SourceOperator::Nothing => Ok(ConstantConditionEliminationResult::Continue), } } @@ -360,42 +268,35 @@ fn eliminate_constants(operator: &mut Operator) -> Result>, referenced_tables: &Vec, ) -> Result<()> { - match operator { - Operator::Filter { - source, predicates, .. - } => { - let mut i = 0; - while i < predicates.len() { - // try to push the predicate to the source - // if it succeeds, remove the predicate from the filter - let predicate_owned = predicates[i].take_ownership(); - let Some(predicate) = push_predicate(source, predicate_owned, referenced_tables)? - else { - predicates.remove(i); - continue; - }; - predicates[i] = predicate; - i += 1; - } - - if predicates.is_empty() { - *operator = source.take_ownership(); - } - - Ok(()) + if let Some(predicates) = where_clause { + let mut i = 0; + while i < predicates.len() { + let predicate = predicates[i].take_ownership(); + let Some(predicate) = push_predicate(operator, predicate, referenced_tables)? else { + predicates.remove(i); + continue; + }; + predicates[i] = predicate; + i += 1; } - Operator::Join { + if predicates.is_empty() { + *where_clause = None; + } + } + match operator { + SourceOperator::Join { left, right, predicates, outer, .. } => { - push_predicates(left, referenced_tables)?; - push_predicates(right, referenced_tables)?; + push_predicates(left, where_clause, referenced_tables)?; + push_predicates(right, where_clause, referenced_tables)?; if predicates.is_none() { return Ok(()); @@ -433,26 +334,9 @@ fn push_predicates( Ok(()) } - Operator::Aggregate { source, .. } => { - push_predicates(source, referenced_tables)?; - - Ok(()) - } - Operator::Limit { source, .. } => { - push_predicates(source, referenced_tables)?; - Ok(()) - } - Operator::Order { source, .. } => { - push_predicates(source, referenced_tables)?; - Ok(()) - } - Operator::Projection { source, .. } => { - push_predicates(source, referenced_tables)?; - Ok(()) - } - Operator::Scan { .. } => Ok(()), - Operator::Search { .. } => Ok(()), - Operator::Nothing => Ok(()), + SourceOperator::Scan { .. } => Ok(()), + SourceOperator::Search { .. } => Ok(()), + SourceOperator::Nothing => Ok(()), } } @@ -461,12 +345,12 @@ fn push_predicates( Returns Ok(None) if the predicate was pushed, otherwise returns itself as Ok(Some(predicate)) */ fn push_predicate( - operator: &mut Operator, + operator: &mut SourceOperator, predicate: ast::Expr, referenced_tables: &Vec, ) -> Result> { match operator { - Operator::Scan { + SourceOperator::Scan { predicates, table_reference, .. @@ -497,22 +381,8 @@ fn push_predicate( Ok(None) } - Operator::Search { .. } => Ok(Some(predicate)), - Operator::Filter { - source, - predicates: ps, - .. - } => { - let push_result = push_predicate(source, predicate, referenced_tables)?; - if push_result.is_none() { - return Ok(None); - } - - ps.push(push_result.unwrap()); - - Ok(None) - } - Operator::Join { + SourceOperator::Search { .. } => Ok(Some(predicate)), + SourceOperator::Join { left, right, predicates: join_on_preds, @@ -552,46 +422,13 @@ fn push_predicate( Ok(None) } - Operator::Aggregate { source, .. } => { - let push_result = push_predicate(source, predicate, referenced_tables)?; - if push_result.is_none() { - return Ok(None); - } - - Ok(Some(push_result.unwrap())) - } - Operator::Limit { source, .. } => { - let push_result = push_predicate(source, predicate, referenced_tables)?; - if push_result.is_none() { - return Ok(None); - } - - Ok(Some(push_result.unwrap())) - } - Operator::Order { source, .. } => { - let push_result = push_predicate(source, predicate, referenced_tables)?; - if push_result.is_none() { - return Ok(None); - } - - Ok(Some(push_result.unwrap())) - } - Operator::Projection { source, .. } => { - let push_result = push_predicate(source, predicate, referenced_tables)?; - if push_result.is_none() { - return Ok(None); - } - - Ok(Some(push_result.unwrap())) - } - Operator::Nothing => Ok(Some(predicate)), + SourceOperator::Nothing => Ok(Some(predicate)), } } -fn push_scan_direction(operator: &mut Operator, direction: &Direction) { +fn push_scan_direction(operator: &mut SourceOperator, direction: &Direction) { match operator { - Operator::Projection { source, .. } => push_scan_direction(source, direction), - Operator::Scan { iter_dir, .. } => { + SourceOperator::Scan { iter_dir, .. } => { if iter_dir.is_none() { match direction { Direction::Ascending => *iter_dir = Some(IterationDirection::Forwards), @@ -603,381 +440,6 @@ fn push_scan_direction(operator: &mut Operator, direction: &Direction) { } } -#[derive(Debug)] -pub struct ExpressionResultCache { - resultmap: HashMap, - keymap: HashMap>, -} - -#[derive(Debug)] -pub struct CachedResult { - pub register_idx: usize, - pub source_expr: ast::Expr, -} - -const OPERATOR_ID_MULTIPLIER: usize = 10000; - -/** - ExpressionResultCache is a cache for the results of expressions that are computed in the query plan, - or more precisely, the VM registers that hold the results of these expressions. - - Right now the cache is mainly used to avoid recomputing e.g. the result of an aggregation expression - e.g. SELECT t.a, SUM(t.b) FROM t GROUP BY t.a ORDER BY SUM(t.b) -*/ -impl ExpressionResultCache { - pub fn new() -> Self { - ExpressionResultCache { - resultmap: HashMap::new(), - keymap: HashMap::new(), - } - } - - /** - Store the result of an expression that is computed in the query plan. - The result is stored in a VM register. A copy of the expression AST node is - stored as well, so that parent operators can use it to compare their own expressions - with the one that was computed in a child operator. - - This is a weakness of our current reliance on a 3rd party AST library, as we can't - e.g. modify the AST to add identifiers to nodes or replace nodes with some kind of - reference to a register, etc. - */ - pub fn cache_result_register( - &mut self, - operator_id: usize, - result_column_idx: usize, - register_idx: usize, - expr: ast::Expr, - ) { - let key = operator_id * OPERATOR_ID_MULTIPLIER + result_column_idx; - self.resultmap.insert( - key, - CachedResult { - register_idx, - source_expr: expr, - }, - ); - } - - /** - Set a mapping from a parent operator to a child operator, so that the parent operator - can look up the register of a result that was computed in the child operator. - E.g. "Parent operator's result column 3 is computed in child operator 5, result column 2" - */ - pub fn set_precomputation_key( - &mut self, - operator_id: usize, - result_column_idx: usize, - child_operator_id: usize, - child_operator_result_column_idx_mask: usize, - ) { - let key = operator_id * OPERATOR_ID_MULTIPLIER + result_column_idx; - - let mut values = Vec::new(); - for i in 0..64 { - if (child_operator_result_column_idx_mask >> i) & 1 == 1 { - values.push(child_operator_id * OPERATOR_ID_MULTIPLIER + i); - } - } - self.keymap.insert(key, values); - } - - /** - Get the cache entries for a given operator and result column index. - There may be multiple cached entries, e.g. a binary operator's both - arms may have been cached. - */ - pub fn get_cached_result_registers( - &self, - operator_id: usize, - result_column_idx: usize, - ) -> Option> { - let key = operator_id * OPERATOR_ID_MULTIPLIER + result_column_idx; - self.keymap.get(&key).and_then(|keys| { - let mut results = Vec::new(); - for key in keys { - if let Some(result) = self.resultmap.get(key) { - results.push(result); - } - } - if results.is_empty() { - None - } else { - Some(results) - } - }) - } -} - -type ResultColumnIndexBitmask = usize; - -/** - Find all result columns in an operator that match an expression, either fully or partially. - This is used to find the result columns that are computed in an operator and that are used - in a parent operator, so that the parent operator can look up the register that holds the result - of the child operator's expression. - - The result is returned as a bitmask due to performance neuroticism. A limitation of this is that - we can only handle 64 result columns per operator. -*/ -fn find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially( - expr: &ast::Expr, - operator: &Operator, -) -> ResultColumnIndexBitmask { - let exact_match = match operator { - Operator::Aggregate { - aggregates, - group_by, - .. - } => { - let mut idx = 0; - let mut mask = 0; - for agg in aggregates.iter() { - if agg.original_expr == *expr { - mask |= 1 << idx; - } - idx += 1; - } - - if let Some(group_by) = group_by { - for g in group_by.iter() { - if g == expr { - mask |= 1 << idx; - } - idx += 1 - } - } - - mask - } - Operator::Filter { .. } => 0, - Operator::Limit { .. } => 0, - Operator::Join { .. } => 0, - Operator::Order { .. } => 0, - Operator::Projection { expressions, .. } => { - let mut mask = 0; - for (idx, e) in expressions.iter().enumerate() { - match e { - ProjectionColumn::Column(c) => { - if c == expr { - mask |= 1 << idx; - } - } - ProjectionColumn::Star => {} - ProjectionColumn::TableStar(_) => {} - } - } - - mask - } - Operator::Scan { .. } => 0, - Operator::Search { .. } => 0, - Operator::Nothing => 0, - }; - - if exact_match != 0 { - return exact_match; - } - - match expr { - ast::Expr::Between { - lhs, - not: _, - start, - end, - } => { - let mut mask = 0; - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(lhs, operator); - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(start, operator); - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(end, operator); - mask - } - ast::Expr::Binary(lhs, _op, rhs) => { - let mut mask = 0; - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(lhs, operator); - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(rhs, operator); - mask - } - ast::Expr::Case { - base, - when_then_pairs, - else_expr, - } => { - let mut mask = 0; - if let Some(base) = base { - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(base, operator); - } - for (w, t) in when_then_pairs.iter() { - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(w, operator); - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(t, operator); - } - if let Some(e) = else_expr { - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(e, operator); - } - mask - } - ast::Expr::Cast { expr, type_name: _ } => { - find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially( - expr, operator, - ) - } - ast::Expr::Collate(expr, _collation) => { - find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially( - expr, operator, - ) - } - ast::Expr::DoublyQualified(_schema, _tbl, _ident) => 0, - ast::Expr::Exists(_) => 0, - ast::Expr::FunctionCall { - name: _, - distinctness: _, - args, - order_by: _, - filter_over: _, - } => { - let mut mask = 0; - if let Some(args) = args { - for a in args.iter() { - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(a, operator); - } - } - mask - } - ast::Expr::FunctionCallStar { - name: _, - filter_over: _, - } => 0, - ast::Expr::Id(_) => unreachable!("Ids have been bound to Column references"), - ast::Expr::Column { .. } => 0, - ast::Expr::InList { lhs, not: _, rhs } => { - let mut mask = 0; - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(lhs, operator); - if let Some(rhs) = rhs { - for r in rhs.iter() { - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(r, operator); - } - } - mask - } - ast::Expr::InSelect { - lhs, - not: _, - rhs: _, - } => { - find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially( - lhs, operator, - ) - } - ast::Expr::InTable { - lhs: _, - not: _, - rhs: _, - args: _, - } => 0, - ast::Expr::IsNull(expr) => { - find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially( - expr, operator, - ) - } - ast::Expr::Like { - lhs, - not: _, - op: _, - rhs, - escape: _, - } => { - let mut mask = 0; - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(lhs, operator); - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(rhs, operator); - mask - } - ast::Expr::Literal(_) => 0, - ast::Expr::Name(_) => 0, - ast::Expr::NotNull(expr) => { - find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially( - expr, operator, - ) - } - ast::Expr::Parenthesized(expr) => { - let mut mask = 0; - for e in expr.iter() { - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(e, operator); - } - mask - } - ast::Expr::Qualified(_, _) => { - unreachable!("Qualified expressions have been bound to Column references") - } - ast::Expr::Raise(_, _) => 0, - ast::Expr::Subquery(_) => 0, - ast::Expr::Unary(_op, expr) => { - find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially( - expr, operator, - ) - } - ast::Expr::Variable(_) => 0, - } -} - -/** - * This function is used to find all the expressions that are shared between the parent operator and the child operators. - * If an expression is shared between the parent and child operators, then the parent operator should not recompute the expression. - * Instead, it should use the result of the expression that was computed by the child operator. -*/ -fn find_shared_expressions_in_child_operators_and_mark_them_so_that_the_parent_operator_doesnt_recompute_them( - operator: &Operator, - expr_result_cache: &mut ExpressionResultCache, -) { - match operator { - Operator::Aggregate { - source, - .. - } => { - find_shared_expressions_in_child_operators_and_mark_them_so_that_the_parent_operator_doesnt_recompute_them( - source, expr_result_cache, - ) - } - Operator::Filter { .. } => unreachable!(), - Operator::Limit { source, .. } => { - find_shared_expressions_in_child_operators_and_mark_them_so_that_the_parent_operator_doesnt_recompute_them(source, expr_result_cache) - } - Operator::Join { .. } => {} - Operator::Order { source, key, .. } => { - for (idx, (expr, _)) in key.iter().enumerate() { - let result = find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(expr, source); - if result != 0 { - expr_result_cache.set_precomputation_key( - operator.id(), - idx, - source.id(), - result, - ); - } - } - find_shared_expressions_in_child_operators_and_mark_them_so_that_the_parent_operator_doesnt_recompute_them(source, expr_result_cache) - } - Operator::Projection { source, expressions, .. } => { - for (idx, expr) in expressions.iter().enumerate() { - if let ProjectionColumn::Column(expr) = expr { - let result = find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(expr, source); - if result != 0 { - expr_result_cache.set_precomputation_key( - operator.id(), - idx, - source.id(), - result, - ); - } - } - } - find_shared_expressions_in_child_operators_and_mark_them_so_that_the_parent_operator_doesnt_recompute_them(source, expr_result_cache) - } - Operator::Scan { .. } => {} - Operator::Search { .. } => {} - Operator::Nothing => {} - } -} - #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum ConstantPredicate { AlwaysTrue, @@ -1286,8 +748,8 @@ impl TakeOwnership for ast::Expr { } } -impl TakeOwnership for Operator { +impl TakeOwnership for SourceOperator { fn take_ownership(&mut self) -> Self { - std::mem::replace(self, Operator::Nothing) + std::mem::replace(self, SourceOperator::Nothing) } } diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 8c1eff7be..43fcb4e96 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -12,16 +12,29 @@ use crate::{ Result, }; +#[derive(Debug)] +pub enum ResultSetColumn { + Scalar(ast::Expr), + Agg(Aggregate), + ComputedAgg(ast::Expr), +} + #[derive(Debug)] pub struct Plan { - pub root_operator: Operator, + pub source: SourceOperator, + pub result_columns: Vec, + pub where_clause: Option>, + pub group_by: Option>, + pub order_by: Option>, + pub aggregates: Option>, + pub limit: Option, pub referenced_tables: Vec, pub available_indexes: Vec>, } impl Display for Plan { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.root_operator) + write!(f, "{}", self.source) } } @@ -45,69 +58,17 @@ pub enum IterationDirection { TODO: perhaps 'step' shouldn't be in this struct, since it's an execution time concept, not a plan time concept. */ #[derive(Clone, Debug)] -pub enum Operator { - // Aggregate operator - // This operator is used to compute aggregate functions like SUM, AVG, COUNT, etc. - // It takes a source operator and a list of aggregate functions to compute. - // GROUP BY is not supported yet. - Aggregate { - id: usize, - source: Box, - aggregates: Vec, - group_by: Option>, - step: usize, - }, - // Filter operator - // This operator is used to filter rows from the source operator. - // It takes a source operator and a list of predicates to evaluate. - // Only rows for which all predicates evaluate to true are passed to the next operator. - // Generally filter operators will only exist in unoptimized plans, - // as the optimizer will try to push filters down to the lowest possible level, - // e.g. a table scan. - Filter { - id: usize, - source: Box, - predicates: Vec, - }, - // Limit operator - // This operator is used to limit the number of rows returned by the source operator. - Limit { - id: usize, - source: Box, - limit: usize, - step: usize, - }, +pub enum SourceOperator { // Join operator // This operator is used to join two source operators. // It takes a left and right source operator, a list of predicates to evaluate, // and a boolean indicating whether it is an outer join. Join { id: usize, - left: Box, - right: Box, + left: Box, + right: Box, predicates: Option>, outer: bool, - step: usize, - }, - // Order operator - // This operator is used to sort the rows returned by the source operator. - Order { - id: usize, - source: Box, - key: Vec<(ast::Expr, Direction)>, - step: usize, - }, - // Projection operator - // This operator is used to project columns from the source operator. - // It takes a source operator and a list of expressions to evaluate. - // e.g. SELECT foo, bar FROM t1 - // In this example, the expressions would be [foo, bar] - // and the source operator would be a Scan operator for table t1. - Projection { - id: usize, - source: Box, - expressions: Vec, - step: usize, }, // Scan operator // This operator is used to scan a table. @@ -122,7 +83,6 @@ pub enum Operator { id: usize, table_reference: BTreeTableReference, predicates: Option>, - step: usize, iter_dir: Option, }, // Search operator @@ -133,7 +93,6 @@ pub enum Operator { table_reference: BTreeTableReference, search: Search, predicates: Option>, - step: usize, }, // Nothing operator // This operator is used to represent an empty query. @@ -168,106 +127,30 @@ pub enum Search { }, } -#[derive(Clone, Debug)] -pub enum ProjectionColumn { - Column(ast::Expr), - Star, - TableStar(BTreeTableReference), -} - -impl ProjectionColumn { +impl SourceOperator { pub fn column_count(&self, referenced_tables: &[BTreeTableReference]) -> usize { match self { - ProjectionColumn::Column(_) => 1, - ProjectionColumn::Star => { - let mut count = 0; - for table_reference in referenced_tables { - count += table_reference.table.columns.len(); - } - count - } - ProjectionColumn::TableStar(table_reference) => table_reference.table.columns.len(), - } - } -} - -impl Operator { - pub fn column_count(&self, referenced_tables: &[BTreeTableReference]) -> usize { - match self { - Operator::Aggregate { - group_by, - aggregates, - .. - } => aggregates.len() + group_by.as_ref().map_or(0, |g| g.len()), - Operator::Filter { source, .. } => source.column_count(referenced_tables), - Operator::Limit { source, .. } => source.column_count(referenced_tables), - Operator::Join { left, right, .. } => { + SourceOperator::Join { left, right, .. } => { left.column_count(referenced_tables) + right.column_count(referenced_tables) } - Operator::Order { source, .. } => source.column_count(referenced_tables), - Operator::Projection { expressions, .. } => expressions - .iter() - .map(|e| e.column_count(referenced_tables)) - .sum(), - Operator::Scan { + SourceOperator::Scan { table_reference, .. } => table_reference.table.columns.len(), - Operator::Search { + SourceOperator::Search { table_reference, .. } => table_reference.table.columns.len(), - Operator::Nothing => 0, + SourceOperator::Nothing => 0, } } pub fn column_names(&self) -> Vec { match self { - Operator::Aggregate { - aggregates, - group_by, - .. - } => { - let mut names = vec![]; - for agg in aggregates.iter() { - names.push(agg.func.to_string().to_string()); - } - - if let Some(group_by) = group_by { - for expr in group_by.iter() { - match expr { - ast::Expr::Id(ident) => names.push(ident.0.clone()), - ast::Expr::Qualified(tbl, ident) => { - names.push(format!("{}.{}", tbl.0, ident.0)) - } - e => names.push(e.to_string()), - } - } - } - - names - } - Operator::Filter { source, .. } => source.column_names(), - Operator::Limit { source, .. } => source.column_names(), - Operator::Join { left, right, .. } => { + SourceOperator::Join { left, right, .. } => { let mut names = left.column_names(); names.extend(right.column_names()); names } - Operator::Order { source, .. } => source.column_names(), - Operator::Projection { expressions, .. } => expressions - .iter() - .map(|e| match e { - ProjectionColumn::Column(expr) => match expr { - ast::Expr::Id(ident) => ident.0.clone(), - ast::Expr::Qualified(tbl, ident) => format!("{}.{}", tbl.0, ident.0), - _ => "expr".to_string(), - }, - ProjectionColumn::Star => "*".to_string(), - ProjectionColumn::TableStar(table_reference) => { - format!("{}.{}", table_reference.table_identifier, "*") - } - }) - .collect(), - Operator::Scan { + SourceOperator::Scan { table_reference, .. } => table_reference .table @@ -275,7 +158,7 @@ impl Operator { .iter() .map(|c| c.name.clone()) .collect(), - Operator::Search { + SourceOperator::Search { table_reference, .. } => table_reference .table @@ -283,21 +166,16 @@ impl Operator { .iter() .map(|c| c.name.clone()) .collect(), - Operator::Nothing => vec![], + SourceOperator::Nothing => vec![], } } pub fn id(&self) -> usize { match self { - Operator::Aggregate { id, .. } => *id, - Operator::Filter { id, .. } => *id, - Operator::Limit { id, .. } => *id, - Operator::Join { id, .. } => *id, - Operator::Order { id, .. } => *id, - Operator::Projection { id, .. } => *id, - Operator::Scan { id, .. } => *id, - Operator::Search { id, .. } => *id, - Operator::Nothing => unreachable!(), + SourceOperator::Join { id, .. } => *id, + SourceOperator::Scan { id, .. } => *id, + SourceOperator::Search { id, .. } => *id, + SourceOperator::Nothing => unreachable!(), } } } @@ -337,10 +215,10 @@ impl Display for Aggregate { } // For EXPLAIN QUERY PLAN -impl Display for Operator { +impl Display for SourceOperator { fn fmt(&self, f: &mut Formatter) -> fmt::Result { fn fmt_operator( - operator: &Operator, + operator: &SourceOperator, f: &mut Formatter, level: usize, last: bool, @@ -356,34 +234,7 @@ impl Display for Operator { }; match operator { - Operator::Aggregate { - source, aggregates, .. - } => { - // e.g. Aggregate count(*), sum(x) - let aggregates_display_string = aggregates - .iter() - .map(|agg| agg.to_string()) - .collect::>() - .join(", "); - writeln!(f, "{}AGGREGATE {}", indent, aggregates_display_string)?; - fmt_operator(source, f, level + 1, true) - } - Operator::Filter { - source, predicates, .. - } => { - let predicates_string = predicates - .iter() - .map(|p| p.to_string()) - .collect::>() - .join(" AND "); - writeln!(f, "{}FILTER {}", indent, predicates_string)?; - fmt_operator(source, f, level + 1, true) - } - Operator::Limit { source, limit, .. } => { - writeln!(f, "{}TAKE {}", indent, limit)?; - fmt_operator(source, f, level + 1, true) - } - Operator::Join { + SourceOperator::Join { left, right, predicates, @@ -408,35 +259,7 @@ impl Display for Operator { fmt_operator(left, f, level + 1, false)?; fmt_operator(right, f, level + 1, true) } - Operator::Order { source, key, .. } => { - let sort_keys_string = key - .iter() - .map(|(expr, dir)| format!("{} {}", expr, dir)) - .collect::>() - .join(", "); - writeln!(f, "{}SORT {}", indent, sort_keys_string)?; - fmt_operator(source, f, level + 1, true) - } - Operator::Projection { - source, - expressions, - .. - } => { - let expressions = expressions - .iter() - .map(|expr| match expr { - ProjectionColumn::Column(c) => c.to_string(), - ProjectionColumn::Star => "*".to_string(), - ProjectionColumn::TableStar(table_reference) => { - format!("{}.{}", table_reference.table_identifier, "*") - } - }) - .collect::>() - .join(", "); - writeln!(f, "{}PROJECT {}", indent, expressions)?; - fmt_operator(source, f, level + 1, true) - } - Operator::Scan { + SourceOperator::Scan { table_reference, predicates: filter, .. @@ -464,7 +287,7 @@ impl Display for Operator { }?; Ok(()) } - Operator::Search { + SourceOperator::Search { table_reference, search, .. @@ -487,7 +310,7 @@ impl Display for Operator { } Ok(()) } - Operator::Nothing => Ok(()), + SourceOperator::Nothing => Ok(()), } } writeln!(f, "QUERY PLAN")?; @@ -505,35 +328,15 @@ impl Display for Operator { */ pub fn get_table_ref_bitmask_for_operator<'a>( tables: &'a Vec, - operator: &'a Operator, + operator: &'a SourceOperator, ) -> Result { let mut table_refs_mask = 0; match operator { - Operator::Aggregate { source, .. } => { - table_refs_mask |= get_table_ref_bitmask_for_operator(tables, source)?; - } - Operator::Filter { - source, predicates, .. - } => { - table_refs_mask |= get_table_ref_bitmask_for_operator(tables, source)?; - for predicate in predicates { - table_refs_mask |= get_table_ref_bitmask_for_ast_expr(tables, predicate)?; - } - } - Operator::Limit { source, .. } => { - table_refs_mask |= get_table_ref_bitmask_for_operator(tables, source)?; - } - Operator::Join { left, right, .. } => { + SourceOperator::Join { left, right, .. } => { table_refs_mask |= get_table_ref_bitmask_for_operator(tables, left)?; table_refs_mask |= get_table_ref_bitmask_for_operator(tables, right)?; } - Operator::Order { source, .. } => { - table_refs_mask |= get_table_ref_bitmask_for_operator(tables, source)?; - } - Operator::Projection { source, .. } => { - table_refs_mask |= get_table_ref_bitmask_for_operator(tables, source)?; - } - Operator::Scan { + SourceOperator::Scan { table_reference, .. } => { table_refs_mask |= 1 @@ -542,7 +345,7 @@ pub fn get_table_ref_bitmask_for_operator<'a>( .position(|t| Rc::ptr_eq(&t.table, &table_reference.table)) .unwrap(); } - Operator::Search { + SourceOperator::Search { table_reference, .. } => { table_refs_mask |= 1 @@ -551,7 +354,7 @@ pub fn get_table_ref_bitmask_for_operator<'a>( .position(|t| Rc::ptr_eq(&t.table, &table_reference.table)) .unwrap(); } - Operator::Nothing => {} + SourceOperator::Nothing => {} } Ok(table_refs_mask) } diff --git a/core/translate/planner.rs b/core/translate/planner.rs index 7f803e514..3e6b73630 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -1,4 +1,6 @@ -use super::plan::{Aggregate, BTreeTableReference, Direction, Operator, Plan, ProjectionColumn}; +use super::plan::{ + Aggregate, BTreeTableReference, Direction, Plan, ResultSetColumn, SourceOperator, +}; use crate::{function::Func, schema::Schema, util::normalize_ident, Result}; use sqlite3_parser::ast::{self, FromClause, JoinType, ResultColumn}; @@ -66,6 +68,7 @@ fn bind_column_references( referenced_tables: &[BTreeTableReference], ) -> Result<()> { match expr { + ast::Expr::AggRef { .. } => unreachable!(), ast::Expr::Id(id) => { let mut match_result = None; for (tbl_idx, table) in referenced_tables.iter().enumerate() { @@ -237,146 +240,157 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

{ - projection_expressions.push(ProjectionColumn::Star); - } - ast::ResultColumn::TableStar(name) => { - let name_normalized = normalize_ident(name.0.as_str()); - let referenced_table = referenced_tables - .iter() - .find(|t| t.table_identifier == name_normalized); - - if referenced_table.is_none() { - crate::bail_parse_error!("Table {} not found", name.0); + let mut aggregate_expressions = Vec::new(); + for column in columns.clone() { + match column { + ast::ResultColumn::Star => { + for table_reference in plan.referenced_tables.iter() { + for (idx, col) in table_reference.table.columns.iter().enumerate() { + plan.result_columns.push(ResultSetColumn::Scalar( + ast::Expr::Column { + database: None, // TODO: support different databases + table: table_reference.table_index, + column: idx, + is_primary_key: col.primary_key, + }, + )); } - let table_reference = referenced_table.unwrap(); - projection_expressions - .push(ProjectionColumn::TableStar(table_reference.clone())); } - ast::ResultColumn::Expr(mut expr, _) => { - bind_column_references(&mut expr, &referenced_tables)?; - projection_expressions.push(ProjectionColumn::Column(expr.clone())); - match expr.clone() { - ast::Expr::FunctionCall { - name, - distinctness: _, - args, - filter_over: _, - order_by: _, - } => { - let args_count = if let Some(args) = &args { - args.len() - } else { - 0 - }; - match Func::resolve_function( - normalize_ident(name.0.as_str()).as_str(), - args_count, - ) { - Ok(Func::Agg(f)) => { - aggregate_expressions.push(Aggregate { - func: f, - args: args.unwrap(), - original_expr: expr.clone(), - }); - } - Ok(_) => { - resolve_aggregates(&expr, &mut aggregate_expressions); - } - _ => {} - } - } - ast::Expr::FunctionCallStar { - name, - filter_over: _, - } => { - if let Ok(Func::Agg(f)) = Func::resolve_function( - normalize_ident(name.0.as_str()).as_str(), - 0, - ) { - aggregate_expressions.push(Aggregate { + } + ast::ResultColumn::TableStar(name) => { + let name_normalized = normalize_ident(name.0.as_str()); + let referenced_table = plan + .referenced_tables + .iter() + .find(|t| t.table_identifier == name_normalized); + + if referenced_table.is_none() { + crate::bail_parse_error!("Table {} not found", name.0); + } + let table_reference = referenced_table.unwrap(); + for (idx, col) in table_reference.table.columns.iter().enumerate() { + plan.result_columns + .push(ResultSetColumn::Scalar(ast::Expr::Column { + database: None, // TODO: support different databases + table: table_reference.table_index, + column: idx, + is_primary_key: col.primary_key, + })); + } + } + ast::ResultColumn::Expr(mut expr, _) => { + bind_column_references(&mut expr, &plan.referenced_tables)?; + match &expr { + ast::Expr::FunctionCall { + name, + distinctness: _, + args, + filter_over: _, + order_by: _, + } => { + let args_count = if let Some(args) = &args { + args.len() + } else { + 0 + }; + match Func::resolve_function( + normalize_ident(name.0.as_str()).as_str(), + args_count, + ) { + Ok(Func::Agg(f)) => { + let agg = Aggregate { func: f, - args: vec![ast::Expr::Literal(ast::Literal::Numeric( - "1".to_string(), - ))], + args: args.as_ref().unwrap().clone(), original_expr: expr.clone(), - }); + }; + aggregate_expressions.push(agg.clone()); + plan.result_columns.push(ResultSetColumn::Agg(agg)); } + Ok(_) => { + resolve_aggregates(&expr, &mut aggregate_expressions); + } + _ => {} } - ast::Expr::Binary(lhs, _, rhs) => { - resolve_aggregates(&lhs, &mut aggregate_expressions); - resolve_aggregates(&rhs, &mut aggregate_expressions); + } + ast::Expr::FunctionCallStar { + name, + filter_over: _, + } => { + if let Ok(Func::Agg(f)) = Func::resolve_function( + normalize_ident(name.0.as_str()).as_str(), + 0, + ) { + let agg = Aggregate { + func: f, + args: vec![ast::Expr::Literal(ast::Literal::Numeric( + "1".to_string(), + ))], + original_expr: expr.clone(), + }; + aggregate_expressions.push(agg.clone()); + plan.result_columns.push(ResultSetColumn::Agg(agg)); + } else { + crate::bail_parse_error!( + "Invalid aggregate function: {}", + name.0 + ); } - _ => {} + } + ast::Expr::Binary(lhs, _, rhs) => { + resolve_aggregates(&lhs, &mut aggregate_expressions); + resolve_aggregates(&rhs, &mut aggregate_expressions); + plan.result_columns + .push(ResultSetColumn::Scalar(expr.clone())); + } + e => { + plan.result_columns.push(ResultSetColumn::Scalar(e.clone())); } } } } - if let Some(group_by) = group_by.as_mut() { - for expr in group_by.exprs.iter_mut() { - bind_column_references(expr, &referenced_tables)?; - } - if aggregate_expressions.is_empty() { - crate::bail_parse_error!( - "GROUP BY clause without aggregate functions is not allowed" - ); - } - for scalar in projection_expressions.iter() { - match scalar { - ProjectionColumn::Column(_) => {} - _ => { - crate::bail_parse_error!( - "Only column references are allowed in the SELECT clause when using GROUP BY" - ); - } - } - } - } - if !aggregate_expressions.is_empty() { - operator = Operator::Aggregate { - source: Box::new(operator), - aggregates: aggregate_expressions, - group_by: group_by.map(|g| g.exprs), // TODO: support HAVING - id: operator_id_counter.get_next_id(), - step: 0, - } - } - - if !projection_expressions.is_empty() { - operator = Operator::Projection { - source: Box::new(operator), - expressions: projection_expressions, - id: operator_id_counter.get_next_id(), - step: 0, - }; - } } + if let Some(group_by) = group_by.as_mut() { + for expr in group_by.exprs.iter_mut() { + bind_column_references(expr, &plan.referenced_tables)?; + } + if aggregate_expressions.is_empty() { + crate::bail_parse_error!( + "GROUP BY clause without aggregate functions is not allowed" + ); + } + } + + plan.group_by = group_by.map(|g| g.exprs); + plan.aggregates = if aggregate_expressions.is_empty() { + None + } else { + Some(aggregate_expressions) + }; // Parse the ORDER BY clause if let Some(order_by) = select.order_by { @@ -402,7 +416,7 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

(schema: &Schema, select: ast::Select) -> Result

{ let l = n.parse()?; - if l == 0 { - Operator::Nothing - } else { - Operator::Limit { - source: Box::new(operator), - limit: l, - id: operator_id_counter.get_next_id(), - step: 0, - } - } + Some(l) } _ => todo!(), } } // Return the unoptimized query plan - Ok(Plan { - root_operator: operator, - referenced_tables, - available_indexes: schema.indexes.clone().into_values().flatten().collect(), - }) + Ok(plan) } _ => todo!(), } @@ -456,9 +452,9 @@ fn parse_from( schema: &Schema, from: Option, operator_id_counter: &mut OperatorIdCounter, -) -> Result<(Operator, Vec)> { +) -> Result<(SourceOperator, Vec)> { if from.as_ref().and_then(|f| f.select.as_ref()).is_none() { - return Ok((Operator::Nothing, vec![])); + return Ok((SourceOperator::Nothing, vec![])); } let from = from.unwrap(); @@ -484,11 +480,10 @@ fn parse_from( _ => todo!(), }; - let mut operator = Operator::Scan { + let mut operator = SourceOperator::Scan { table_reference: first_table.clone(), predicates: None, id: operator_id_counter.get_next_id(), - step: 0, iter_dir: None, }; @@ -498,13 +493,12 @@ fn parse_from( for join in from.joins.unwrap_or_default().into_iter() { let (right, outer, predicates) = parse_join(schema, join, operator_id_counter, &mut tables, table_index)?; - operator = Operator::Join { + operator = SourceOperator::Join { left: Box::new(operator), right: Box::new(right), predicates, outer, id: operator_id_counter.get_next_id(), - step: 0, }; table_index += 1; } @@ -518,7 +512,7 @@ fn parse_join( operator_id_counter: &mut OperatorIdCounter, tables: &mut Vec, table_index: usize, -) -> Result<(Operator, bool, Option>)> { +) -> Result<(SourceOperator, bool, Option>)> { let ast::JoinedSelectTable { operator, table, @@ -574,11 +568,10 @@ fn parse_join( } Ok(( - Operator::Scan { + SourceOperator::Scan { table_reference: table.clone(), predicates: None, id: operator_id_counter.get_next_id(), - step: 0, iter_dir: None, }, outer, diff --git a/core/translate/select.rs b/core/translate/select.rs index 2b946b0fd..0d16089eb 100644 --- a/core/translate/select.rs +++ b/core/translate/select.rs @@ -17,12 +17,7 @@ pub fn translate_select( connection: Weak, ) -> Result { let select_plan = prepare_select_plan(schema, select)?; - let (optimized_plan, expr_result_cache) = optimize_plan(select_plan)?; - println!("{:?}", expr_result_cache); - emit_program( - database_header, - optimized_plan, - expr_result_cache, - connection, - ) + let optimized_plan = optimize_plan(select_plan)?; + // println!("optimized_plan: {:?}", optimized_plan); + emit_program(database_header, optimized_plan, connection) } diff --git a/vendored/sqlite3-parser/src/parser/ast/fmt.rs b/vendored/sqlite3-parser/src/parser/ast/fmt.rs index 80f87eefb..6b0271919 100644 --- a/vendored/sqlite3-parser/src/parser/ast/fmt.rs +++ b/vendored/sqlite3-parser/src/parser/ast/fmt.rs @@ -638,6 +638,7 @@ impl ToTokens for Expr { } Self::Id(id) => id.to_tokens(s), Self::Column { .. } => Ok(()), + Self::AggRef { .. } => Ok(()), Self::InList { lhs, not, rhs } => { lhs.to_tokens(s)?; if *not { diff --git a/vendored/sqlite3-parser/src/parser/ast/mod.rs b/vendored/sqlite3-parser/src/parser/ast/mod.rs index 29ec84dd6..ac45b5170 100644 --- a/vendored/sqlite3-parser/src/parser/ast/mod.rs +++ b/vendored/sqlite3-parser/src/parser/ast/mod.rs @@ -338,6 +338,11 @@ pub enum Expr { /// is the column a primary key is_primary_key: bool, }, + /// AggRef is a reference to a computed aggregate + AggRef { + /// index of the aggregate in the aggregates vector parsed from the query + index: usize, + }, /// `IN` InList { /// expression