diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index afea359cd..cd5ab9fd1 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -29,11 +29,11 @@ use super::plan::{ResultSetColumn, SourceOperator}; #[derive(Debug)] pub struct LeftJoinMetadata { // integer register that holds a flag that is set to true if the current row has a match for the left join - pub match_flag_register: usize, + pub reg_match_flag: usize, // label for the instruction that sets the match flag to true - pub set_match_flag_true_label: BranchOffset, + pub label_match_flag_set_true: BranchOffset, // label for the instruction that checks if the match flag is true - pub check_match_flag_label: BranchOffset, + pub label_match_flag_check_value: BranchOffset, } // Metadata for handling ORDER BY operations @@ -42,7 +42,7 @@ pub struct SortMetadata { // cursor id for the Sorter table where the sorted rows are stored pub sort_cursor: usize, // register where the sorter data is inserted and later retrieved from - pub sorter_data_register: usize, + pub reg_sorter_data: usize, } // Metadata for handling GROUP BY operations @@ -51,27 +51,21 @@ pub struct GroupByMetadata { // Cursor ID for the Sorter table where the grouped rows are stored pub sort_cursor: usize, // Label for the subroutine that clears the accumulator registers (temporary storage for per-group aggregate calculations) - pub subroutine_accumulator_clear_label: BranchOffset, - // Register holding the return offset for the accumulator clear subroutine - pub subroutine_accumulator_clear_return_offset_register: usize, - // Label for the subroutine that outputs the accumulator contents - pub subroutine_accumulator_output_label: BranchOffset, - // Register holding the return offset for the accumulator output subroutine - pub subroutine_accumulator_output_return_offset_register: usize, + pub label_subrtn_acc_clear: BranchOffset, // Label for the instruction that sets the accumulator indicator to true (indicating data exists in the accumulator for the current group) - pub accumulator_indicator_set_true_label: BranchOffset, + pub label_acc_indicator_set_flag_true: BranchOffset, + // Register holding the return offset for the accumulator clear subroutine + pub reg_subrtn_acc_clear_return_offset: usize, // Register holding the key used for sorting in the Sorter - pub sorter_key_register: usize, + pub reg_sorter_key: usize, // Register holding a flag to abort the grouping process if necessary - pub abort_flag_register: usize, - // Register holding a boolean indicating whether there's data in the accumulator (used for aggregation) - pub data_in_accumulator_indicator_register: usize, + pub reg_abort_flag: usize, // Register holding the start of the accumulator group registers (i.e. the groups, not the aggregates) - pub group_exprs_accumulator_register: usize, + pub reg_group_exprs_acc: usize, // Starting index of the register(s) that hold the comparison result between the current row and the previous row // The comparison result is used to determine if the current row belongs to the same group as the previous row // Each group by expression has a corresponding register - pub group_exprs_comparison_register: usize, + pub reg_group_exprs_cmp: usize, } /// Jump labels for each loop in the query's main execution loop @@ -85,34 +79,34 @@ pub struct LoopLabels { loop_end: BranchOffset, } -/// The Metadata struct holds various information and labels used during bytecode generation. +/// The TranslateCtx struct holds various information and labels used during bytecode generation. /// It is used for maintaining state and control flow during the bytecode /// generation process. #[derive(Debug)] -pub struct Metadata { +pub struct TranslateCtx { // A typical query plan is a nested loop. Each loop has its own LoopLabels (see the definition of LoopLabels for more details) - loop_labels: HashMap, + labels_main_loop: HashMap, // label for the instruction that jumps to the next phase of the query after the main loop // we don't know ahead of time what that is (GROUP BY, ORDER BY, etc.) - after_main_loop_label: Option, - // metadata for the group by operator - group_by_metadata: Option, - // metadata for the order by operator - sort_metadata: Option, - // mapping between Join operator id and associated metadata (for left joins only) - left_joins: HashMap, + label_main_loop_end: Option, // First register of the aggregation results - pub aggregation_start_register: Option, + pub reg_agg_start: Option, // First register of the result columns of the query - pub result_column_start_register: Option, + pub reg_result_cols_start: Option, + // The register holding the limit value, if any. + pub reg_limit: Option, + // metadata for the group by operator + meta_group_by: Option, + // metadata for the order by operator + meta_sort: Option, + // mapping between Join operator id and associated metadata (for left joins only) + meta_left_joins: HashMap, // We need to emit result columns in the order they are present in the SELECT, but they may not be in the same order in the ORDER BY sorter. // This vector holds the indexes of the result columns in the ORDER BY sorter. pub result_column_indexes_in_orderby_sorter: HashMap, // We might skip adding a SELECT result column into the ORDER BY sorter if it is an exact match in the ORDER BY keys. // This vector holds the indexes of the result columns that we need to skip. pub result_columns_to_skip_in_orderby_sorter: Option>, - // The register holding the limit value, if any. - pub limit_reg: Option, } /// Used to distinguish database operations @@ -126,7 +120,7 @@ pub enum OperationMode { } /// Initialize the program with basic setup and return initial metadata and labels -fn prologue() -> Result<(ProgramBuilder, Metadata, BranchOffset, BranchOffset)> { +fn prologue() -> Result<(ProgramBuilder, TranslateCtx, BranchOffset, BranchOffset)> { let mut program = ProgramBuilder::new(); let init_label = program.allocate_label(); @@ -139,20 +133,20 @@ fn prologue() -> Result<(ProgramBuilder, Metadata, BranchOffset, BranchOffset)> let start_offset = program.offset(); - let metadata = Metadata { - loop_labels: HashMap::new(), - after_main_loop_label: None, - group_by_metadata: None, - left_joins: HashMap::new(), - sort_metadata: None, - aggregation_start_register: None, - result_column_start_register: None, + let t_ctx = TranslateCtx { + labels_main_loop: HashMap::new(), + label_main_loop_end: None, + reg_agg_start: None, + reg_limit: None, + reg_result_cols_start: None, + meta_group_by: None, + meta_left_joins: HashMap::new(), + meta_sort: None, result_column_indexes_in_orderby_sorter: HashMap::new(), result_columns_to_skip_in_orderby_sorter: None, - limit_reg: None, }; - Ok((program, metadata, init_label, start_offset)) + Ok((program, t_ctx, init_label, start_offset)) } /// Clean up and finalize the program, resolving any remaining labels @@ -201,7 +195,7 @@ fn emit_program_for_select( connection: Weak, syms: &SymbolTable, ) -> Result { - let (mut program, mut metadata, init_label, start_offset) = prologue()?; + let (mut program, mut t_ctx, init_label, start_offset) = prologue()?; // Trivial exit on LIMIT 0 if let Some(limit) = plan.limit { @@ -212,7 +206,7 @@ fn emit_program_for_select( } // Emit main parts of query - emit_query(&mut program, &mut plan, &mut metadata, syms)?; + emit_query(&mut program, &mut plan, &mut t_ctx, syms)?; // Finalize program epilogue(&mut program, init_label, start_offset)?; @@ -296,17 +290,17 @@ fn emit_subquery( _ => unreachable!("emit_subquery called on non-subquery"), } let end_coroutine_label = program.allocate_label(); - let mut metadata = Metadata { - loop_labels: HashMap::new(), - after_main_loop_label: None, - group_by_metadata: None, - left_joins: HashMap::new(), - sort_metadata: None, - aggregation_start_register: None, - result_column_start_register: None, + let mut metadata = TranslateCtx { + labels_main_loop: HashMap::new(), + label_main_loop_end: None, + meta_group_by: None, + meta_left_joins: HashMap::new(), + meta_sort: None, + reg_agg_start: None, + reg_result_cols_start: None, result_column_indexes_in_orderby_sorter: HashMap::new(), result_columns_to_skip_in_orderby_sorter: None, - limit_reg: plan.limit.map(|_| program.alloc_register()), + reg_limit: plan.limit.map(|_| program.alloc_register()), }; let subquery_body_end_label = program.allocate_label(); program.emit_insn_with_label_dependency( @@ -323,7 +317,7 @@ fn emit_subquery( if let Some(limit) = plan.limit { program.emit_insn(Insn::Integer { value: limit as i64, - dest: metadata.limit_reg.unwrap(), + dest: metadata.reg_limit.unwrap(), }); } let result_column_start_reg = emit_query(program, plan, &mut metadata, syms)?; @@ -336,21 +330,21 @@ fn emit_subquery( fn emit_query( program: &mut ProgramBuilder, plan: &mut SelectPlan, - metadata: &mut Metadata, + t_ctx: &mut TranslateCtx, syms: &SymbolTable, ) -> Result { // Emit subqueries first so the results can be read in the main query loop. emit_subqueries(program, &mut plan.referenced_tables, &mut plan.source, syms)?; - if metadata.limit_reg.is_none() { - metadata.limit_reg = plan.limit.map(|_| program.alloc_register()); + if t_ctx.reg_limit.is_none() { + t_ctx.reg_limit = plan.limit.map(|_| program.alloc_register()); } // No rows will be read from source table loops if there is a constant false condition eg. WHERE 0 // however an aggregation might still happen, // e.g. SELECT COUNT(*) WHERE 0 returns a row with 0, not an empty result set let after_main_loop_label = program.allocate_label(); - metadata.after_main_loop_label = Some(after_main_loop_label); + t_ctx.label_main_loop_end = Some(after_main_loop_label); if plan.contains_constant_false_condition { program.emit_insn_with_label_dependency( Insn::Goto { @@ -361,33 +355,32 @@ fn emit_query( } // Allocate registers for result columns - metadata.result_column_start_register = - Some(program.alloc_registers(plan.result_columns.len())); + t_ctx.reg_result_cols_start = Some(program.alloc_registers(plan.result_columns.len())); // Initialize cursors and other resources needed for query execution if let Some(ref mut order_by) = plan.order_by { - init_order_by(program, order_by, metadata)?; + init_order_by(program, order_by, t_ctx)?; } if let Some(ref mut group_by) = plan.group_by { - init_group_by(program, group_by, &plan.aggregates, metadata)?; + init_group_by(program, group_by, &plan.aggregates, t_ctx)?; } - init_source(program, &plan.source, metadata, &OperationMode::SELECT)?; + init_source(program, &plan.source, t_ctx, &OperationMode::SELECT)?; // Set up main query execution loop open_loop( program, &mut plan.source, &plan.referenced_tables, - metadata, + t_ctx, syms, )?; // Process result columns and expressions in the inner loop - inner_loop_emit(program, plan, metadata, syms)?; + emit_loop(program, plan, t_ctx, syms)?; // Clean up and close the main execution loop - close_loop(program, &plan.source, metadata)?; + close_loop(program, &plan.source, t_ctx)?; program.resolve_label(after_main_loop_label, program.offset()); @@ -395,7 +388,7 @@ fn emit_query( // Handle GROUP BY and aggregation processing if let Some(ref mut group_by) = plan.group_by { - group_by_emit( + emit_group_by( program, &plan.result_columns, group_by, @@ -403,18 +396,18 @@ fn emit_query( &plan.aggregates, plan.limit, &plan.referenced_tables, - metadata, + t_ctx, syms, &plan.query_type, )?; } else if !plan.aggregates.is_empty() { // Handle aggregation without GROUP BY - agg_without_group_by_emit( + emit_ungrouped_aggregation( program, &plan.referenced_tables, &plan.result_columns, &plan.aggregates, - metadata, + t_ctx, syms, &plan.query_type, )?; @@ -425,18 +418,18 @@ fn emit_query( // Process ORDER BY results if needed if let Some(ref mut order_by) = plan.order_by { if order_by_necessary { - order_by_emit( + emit_order_by( program, order_by, &plan.result_columns, plan.limit, - metadata, + t_ctx, &plan.query_type, )?; } } - Ok(metadata.result_column_start_register.unwrap()) + Ok(t_ctx.reg_result_cols_start.unwrap()) } fn emit_program_for_delete( @@ -445,7 +438,7 @@ fn emit_program_for_delete( connection: Weak, syms: &SymbolTable, ) -> Result { - let (mut program, mut metadata, init_label, start_offset) = prologue()?; + let (mut program, mut t_ctx, init_label, start_offset) = prologue()?; // No rows will be read from source table loops if there is a constant false condition eg. WHERE 0 let after_main_loop_label = program.allocate_label(); @@ -462,7 +455,7 @@ fn emit_program_for_delete( init_source( &mut program, &plan.source, - &mut metadata, + &mut t_ctx, &OperationMode::DELETE, )?; @@ -471,14 +464,14 @@ fn emit_program_for_delete( &mut program, &mut plan.source, &plan.referenced_tables, - &mut metadata, + &mut t_ctx, syms, )?; - emit_delete_insns(&mut program, &plan.source, &plan.limit, &metadata)?; + emit_delete_insns(&mut program, &plan.source, &plan.limit, &t_ctx)?; // Clean up and close the main execution loop - close_loop(&mut program, &plan.source, &mut metadata)?; + close_loop(&mut program, &plan.source, &mut t_ctx)?; program.resolve_label(after_main_loop_label, program.offset()); @@ -492,12 +485,12 @@ fn emit_program_for_delete( fn init_order_by( program: &mut ProgramBuilder, order_by: &[(ast::Expr, Direction)], - metadata: &mut Metadata, + t_ctx: &mut TranslateCtx, ) -> Result<()> { let sort_cursor = program.alloc_cursor_id(None, None); - metadata.sort_metadata = Some(SortMetadata { + t_ctx.meta_sort = Some(SortMetadata { sort_cursor, - sorter_data_register: program.alloc_register(), + reg_sorter_data: program.alloc_register(), }); let mut order = Vec::new(); for (_, direction) in order_by.iter() { @@ -516,21 +509,19 @@ fn init_group_by( program: &mut ProgramBuilder, group_by: &GroupBy, aggregates: &[Aggregate], - metadata: &mut Metadata, + t_ctx: &mut TranslateCtx, ) -> Result<()> { let num_aggs = aggregates.len(); let sort_cursor = program.alloc_cursor_id(None, None); - let abort_flag_register = program.alloc_register(); - let data_in_accumulator_indicator_register = program.alloc_register(); - let group_exprs_comparison_register = program.alloc_registers(group_by.exprs.len()); - let group_exprs_accumulator_register = program.alloc_registers(group_by.exprs.len()); - let agg_exprs_start_reg = program.alloc_registers(num_aggs); - let sorter_key_register = program.alloc_register(); + let reg_abort_flag = program.alloc_register(); + let reg_group_exprs_cmp = program.alloc_registers(group_by.exprs.len()); + let reg_group_exprs_acc = program.alloc_registers(group_by.exprs.len()); + let reg_agg_exprs_start = program.alloc_registers(num_aggs); + let reg_sorter_key = program.alloc_register(); - let subroutine_accumulator_clear_label = program.allocate_label(); - let subroutine_accumulator_output_label = program.allocate_label(); + let label_subrtn_acc_clear = program.allocate_label(); let mut order = Vec::new(); const ASCENDING: i64 = 0; @@ -546,7 +537,7 @@ fn init_group_by( program.add_comment(program.offset(), "clear group by abort flag"); program.emit_insn(Insn::Integer { value: 0, - dest: abort_flag_register, + dest: reg_abort_flag, }); program.add_comment( @@ -554,9 +545,9 @@ fn init_group_by( "initialize group by comparison registers to NULL", ); program.emit_insn(Insn::Null { - dest: group_exprs_comparison_register, + dest: reg_group_exprs_cmp, dest_end: if group_by.exprs.len() > 1 { - Some(group_exprs_comparison_register + group_by.exprs.len() - 1) + Some(reg_group_exprs_cmp + group_by.exprs.len() - 1) } else { None }, @@ -564,29 +555,26 @@ fn init_group_by( program.add_comment(program.offset(), "go to clear accumulator subroutine"); - let subroutine_accumulator_clear_return_offset_register = program.alloc_register(); + let reg_subrtn_acc_clear_return_offset = program.alloc_register(); program.emit_insn_with_label_dependency( Insn::Gosub { - target_pc: subroutine_accumulator_clear_label, - return_reg: subroutine_accumulator_clear_return_offset_register, + target_pc: label_subrtn_acc_clear, + return_reg: reg_subrtn_acc_clear_return_offset, }, - subroutine_accumulator_clear_label, + label_subrtn_acc_clear, ); - metadata.aggregation_start_register = Some(agg_exprs_start_reg); + t_ctx.reg_agg_start = Some(reg_agg_exprs_start); - metadata.group_by_metadata = Some(GroupByMetadata { + t_ctx.meta_group_by = Some(GroupByMetadata { sort_cursor, - subroutine_accumulator_clear_label, - subroutine_accumulator_clear_return_offset_register, - subroutine_accumulator_output_label, - subroutine_accumulator_output_return_offset_register: program.alloc_register(), - accumulator_indicator_set_true_label: program.allocate_label(), - abort_flag_register, - data_in_accumulator_indicator_register, - group_exprs_accumulator_register, - group_exprs_comparison_register, - sorter_key_register, + label_subrtn_acc_clear, + label_acc_indicator_set_flag_true: program.allocate_label(), + reg_subrtn_acc_clear_return_offset, + reg_abort_flag, + reg_group_exprs_acc, + reg_group_exprs_cmp, + reg_sorter_key, }); Ok(()) } @@ -595,7 +583,7 @@ fn init_group_by( fn init_source( program: &mut ProgramBuilder, source: &SourceOperator, - metadata: &mut Metadata, + t_ctx: &mut TranslateCtx, mode: &OperationMode, ) -> Result<()> { let operator_id = source.id(); @@ -604,7 +592,7 @@ fn init_source( loop_start: program.allocate_label(), loop_end: program.allocate_label(), }; - metadata.loop_labels.insert(operator_id, loop_labels); + t_ctx.labels_main_loop.insert(operator_id, loop_labels); match source { SourceOperator::Subquery { .. } => Ok(()), @@ -617,14 +605,14 @@ fn init_source( } => { if *outer { let lj_metadata = LeftJoinMetadata { - match_flag_register: program.alloc_register(), - set_match_flag_true_label: program.allocate_label(), - check_match_flag_label: program.allocate_label(), + reg_match_flag: program.alloc_register(), + label_match_flag_set_true: program.allocate_label(), + label_match_flag_check_value: program.allocate_label(), }; - metadata.left_joins.insert(*id, lj_metadata); + t_ctx.meta_left_joins.insert(*id, lj_metadata); } - init_source(program, left, metadata, mode)?; - init_source(program, right, metadata, mode)?; + init_source(program, left, t_ctx, mode)?; + init_source(program, right, t_ctx, mode)?; Ok(()) } @@ -727,7 +715,7 @@ fn open_loop( program: &mut ProgramBuilder, source: &mut SourceOperator, referenced_tables: &[TableReference], - metadata: &mut Metadata, + t_ctx: &mut TranslateCtx, syms: &SymbolTable, ) -> Result<()> { match source { @@ -750,8 +738,8 @@ fn open_loop( jump_on_definition: 0, start_offset: coroutine_implementation_start, }); - let loop_labels = metadata - .loop_labels + let loop_labels = t_ctx + .labels_main_loop .get(id) .expect("subquery has no loop labels"); program.defer_label_resolution(loop_labels.loop_start, program.offset() as usize); @@ -800,25 +788,25 @@ fn open_loop( outer, .. } => { - open_loop(program, left, referenced_tables, metadata, syms)?; + open_loop(program, left, referenced_tables, t_ctx, syms)?; - let loop_labels = metadata - .loop_labels + let loop_labels = t_ctx + .labels_main_loop .get(&right.id()) .expect("right side of join has no loop labels"); let mut jump_target_when_false = loop_labels.next; if *outer { - let lj_meta = metadata.left_joins.get(id).unwrap(); + let lj_meta = t_ctx.meta_left_joins.get(id).unwrap(); program.emit_insn(Insn::Integer { value: 0, - dest: lj_meta.match_flag_register, + dest: lj_meta.reg_match_flag, }); - jump_target_when_false = lj_meta.check_match_flag_label; + jump_target_when_false = lj_meta.label_match_flag_check_value; } - open_loop(program, right, referenced_tables, metadata, syms)?; + open_loop(program, right, referenced_tables, t_ctx, syms)?; if let Some(predicates) = predicates { let jump_target_when_true = program.allocate_label(); @@ -841,14 +829,14 @@ fn open_loop( } if *outer { - let lj_meta = metadata.left_joins.get(id).unwrap(); + let lj_meta = t_ctx.meta_left_joins.get(id).unwrap(); program.defer_label_resolution( - lj_meta.set_match_flag_true_label, + lj_meta.label_match_flag_set_true, program.offset() as usize, ); program.emit_insn(Insn::Integer { value: 1, - dest: lj_meta.match_flag_register, + dest: lj_meta.reg_match_flag, }); } @@ -869,8 +857,8 @@ fn open_loop( } else { program.emit_insn(Insn::RewindAsync { cursor_id }); } - let loop_labels = metadata - .loop_labels + let loop_labels = t_ctx + .labels_main_loop .get(id) .expect("scan has no loop labels"); program.emit_insn_with_label_dependency( @@ -922,8 +910,8 @@ fn open_loop( .. } => { let table_cursor_id = program.resolve_cursor_id(&table_reference.table_identifier); - let loop_labels = metadata - .loop_labels + let loop_labels = t_ctx + .labels_main_loop .get(id) .expect("search has no loop labels"); // Open the loop for the index search. @@ -1123,12 +1111,12 @@ fn open_loop( } /// SQLite (and so Limbo) processes joins as a nested loop. -/// The inner loop may emit rows to various destinations depending on the query: +/// The loop may emit rows to various destinations depending on the query: /// - a GROUP BY sorter (grouping is done by sorting based on the GROUP BY keys and aggregating while the GROUP BY keys match) /// - an ORDER BY sorter (when there is no GROUP BY, but there is an ORDER BY) /// - an AggStep (the columns are collected for aggregation, which is finished later) /// - a QueryResult (there is none of the above, so the loop either emits a ResultRow, or if it's a subquery, yields to the parent query) -pub enum InnerLoopEmitTarget<'a> { +pub enum LoopEmitTarget<'a> { GroupBySorter { group_by: &'a GroupBy, aggregates: &'a Vec, @@ -1145,20 +1133,20 @@ pub enum InnerLoopEmitTarget<'a> { /// Emits the bytecode for the inner loop of a query. /// At this point the cursors for all tables have been opened and rewound. -fn inner_loop_emit( +fn emit_loop( program: &mut ProgramBuilder, plan: &mut SelectPlan, - metadata: &mut Metadata, + t_ctx: &mut TranslateCtx, syms: &SymbolTable, ) -> Result<()> { // if we have a group by, we emit a record into the group by sorter. if let Some(group_by) = &plan.group_by { - return inner_loop_source_emit( + return emit_loop_source( program, &plan.result_columns, &plan.aggregates, - metadata, - InnerLoopEmitTarget::GroupBySorter { + t_ctx, + LoopEmitTarget::GroupBySorter { group_by, aggregates: &plan.aggregates, }, @@ -1169,35 +1157,35 @@ fn inner_loop_emit( // if we DONT have a group by, but we have aggregates, we emit without ResultRow. // we also do not need to sort because we are emitting a single row. if !plan.aggregates.is_empty() { - return inner_loop_source_emit( + return emit_loop_source( program, &plan.result_columns, &plan.aggregates, - metadata, - InnerLoopEmitTarget::AggStep, + t_ctx, + LoopEmitTarget::AggStep, &plan.referenced_tables, syms, ); } // if we DONT have a group by, but we have an order by, we emit a record into the order by sorter. if let Some(order_by) = &plan.order_by { - return inner_loop_source_emit( + return emit_loop_source( program, &plan.result_columns, &plan.aggregates, - metadata, - InnerLoopEmitTarget::OrderBySorter { order_by }, + t_ctx, + LoopEmitTarget::OrderBySorter { order_by }, &plan.referenced_tables, syms, ); } // if we have neither, we emit a ResultRow. In that case, if we have a Limit, we handle that with DecrJumpZero. - inner_loop_source_emit( + emit_loop_source( program, &plan.result_columns, &plan.aggregates, - metadata, - InnerLoopEmitTarget::QueryResult { + t_ctx, + LoopEmitTarget::QueryResult { query_type: &plan.query_type, limit: plan.limit, }, @@ -1209,17 +1197,17 @@ fn inner_loop_emit( /// This is a helper function for inner_loop_emit, /// which does a different thing depending on the emit target. /// See the InnerLoopEmitTarget enum for more details. -fn inner_loop_source_emit( +fn emit_loop_source( program: &mut ProgramBuilder, result_columns: &[ResultSetColumn], aggregates: &[Aggregate], - metadata: &mut Metadata, - emit_target: InnerLoopEmitTarget, + t_ctx: &mut TranslateCtx, + emit_target: LoopEmitTarget, referenced_tables: &[TableReference], syms: &SymbolTable, ) -> Result<()> { match emit_target { - InnerLoopEmitTarget::GroupBySorter { + LoopEmitTarget::GroupBySorter { group_by, aggregates, } => { @@ -1254,35 +1242,35 @@ fn inner_loop_source_emit( // TODO: although it's less often useful, SQLite does allow for expressions in the SELECT that are not part of a GROUP BY or aggregate. // We currently ignore those and only emit the GROUP BY keys and aggregate arguments. This should be fixed. - let group_by_metadata = metadata.group_by_metadata.as_ref().unwrap(); + let group_by_metadata = t_ctx.meta_group_by.as_ref().unwrap(); sorter_insert( program, start_reg, column_count, group_by_metadata.sort_cursor, - group_by_metadata.sorter_key_register, + group_by_metadata.reg_sorter_key, ); Ok(()) } - InnerLoopEmitTarget::OrderBySorter { order_by } => { + LoopEmitTarget::OrderBySorter { order_by } => { order_by_sorter_insert( program, referenced_tables, order_by, result_columns, - &mut metadata.result_column_indexes_in_orderby_sorter, - metadata.sort_metadata.as_ref().unwrap(), + &mut t_ctx.result_column_indexes_in_orderby_sorter, + t_ctx.meta_sort.as_ref().unwrap(), None, syms, )?; Ok(()) } - InnerLoopEmitTarget::AggStep => { + LoopEmitTarget::AggStep => { let num_aggs = aggregates.len(); let start_reg = program.alloc_registers(num_aggs); - metadata.aggregation_start_register = Some(start_reg); + t_ctx.reg_agg_start = Some(start_reg); // In planner.rs, we have collected all aggregates from the SELECT clause, including ones where the aggregate is embedded inside // a more complex expression. Some examples: length(sum(x)), sum(x) + avg(y), sum(x) + 1, etc. @@ -1304,7 +1292,7 @@ fn inner_loop_source_emit( } Ok(()) } - InnerLoopEmitTarget::QueryResult { query_type, limit } => { + LoopEmitTarget::QueryResult { query_type, limit } => { assert!( aggregates.is_empty(), "We should not get here with aggregates" @@ -1313,13 +1301,13 @@ fn inner_loop_source_emit( program, referenced_tables, result_columns, - metadata.result_column_start_register.unwrap(), + t_ctx.reg_result_cols_start.unwrap(), None, limit.map(|l| { ( l, - metadata.limit_reg.unwrap(), - metadata.after_main_loop_label.unwrap(), + t_ctx.reg_limit.unwrap(), + t_ctx.label_main_loop_end.unwrap(), ) }), syms, @@ -1337,10 +1325,10 @@ fn inner_loop_source_emit( fn close_loop( program: &mut ProgramBuilder, source: &SourceOperator, - metadata: &mut Metadata, + t_ctx: &mut TranslateCtx, ) -> Result<()> { - let loop_labels = *metadata - .loop_labels + let loop_labels = *t_ctx + .labels_main_loop .get(&source.id()) .expect("source has no loop labels"); match source { @@ -1363,18 +1351,18 @@ fn close_loop( outer, .. } => { - close_loop(program, right, metadata)?; + close_loop(program, right, t_ctx)?; if *outer { - let lj_meta = metadata.left_joins.get(id).unwrap(); + let lj_meta = t_ctx.meta_left_joins.get(id).unwrap(); // The left join match flag is set to 1 when there is any match on the right table // (e.g. SELECT * FROM t1 LEFT JOIN t2 ON t1.a = t2.a). // If the left join match flag has been set to 1, we jump to the next row on the outer table, // i.e. continue to the next row of t1 in our example. - program.resolve_label(lj_meta.check_match_flag_label, program.offset()); + program.resolve_label(lj_meta.label_match_flag_check_value, program.offset()); let jump_offset = program.offset() + 3; program.emit_insn(Insn::IfPos { - reg: lj_meta.match_flag_register, + reg: lj_meta.reg_match_flag, target_pc: jump_offset, decrement_by: 0, }); @@ -1402,15 +1390,15 @@ fn close_loop( // next row in the left table. program.emit_insn_with_label_dependency( Insn::Goto { - target_pc: lj_meta.set_match_flag_true_label, + target_pc: lj_meta.label_match_flag_set_true, }, - lj_meta.set_match_flag_true_label, + lj_meta.label_match_flag_set_true, ); assert!(program.offset() == jump_offset); } - close_loop(program, left, metadata)?; + close_loop(program, left, t_ctx)?; } SourceOperator::Scan { table_reference, @@ -1486,7 +1474,7 @@ fn emit_delete_insns( program: &mut ProgramBuilder, source: &SourceOperator, limit: &Option, - metadata: &Metadata, + t_ctx: &TranslateCtx, ) -> Result<()> { let cursor_id = match source { SourceOperator::Scan { @@ -1523,9 +1511,9 @@ fn emit_delete_insns( program.emit_insn_with_label_dependency( Insn::DecrJumpZero { reg: limit_reg, - target_pc: metadata.after_main_loop_label.unwrap(), + target_pc: t_ctx.label_main_loop_end.unwrap(), }, - metadata.after_main_loop_label.unwrap(), + t_ctx.label_main_loop_end.unwrap(), ) } @@ -1536,7 +1524,7 @@ fn emit_delete_insns( /// This is called when the main query execution loop has finished processing, /// and we now have data in the GROUP BY sorter. #[allow(clippy::too_many_arguments)] -fn group_by_emit( +fn emit_group_by( program: &mut ProgramBuilder, result_columns: &[ResultSetColumn], group_by: &GroupBy, @@ -1544,27 +1532,36 @@ fn group_by_emit( aggregates: &[Aggregate], limit: Option, referenced_tables: &[TableReference], - metadata: &mut Metadata, + t_ctx: &mut TranslateCtx, syms: &SymbolTable, query_type: &SelectQueryType, ) -> Result<()> { - let sort_loop_start_label = program.allocate_label(); - let grouping_done_label = program.allocate_label(); - let group_by_output_row_label = program.allocate_label(); - let group_by_end_label = program.allocate_label(); - let group_by_metadata = metadata.group_by_metadata.as_mut().unwrap(); + // Label for the first instruction of the grouping loop. + // This is the start of the loop that reads the sorted data and groups&aggregates it. + let label_grouping_loop_start = program.allocate_label(); + // Label for the instruction immediately after the grouping loop. + let label_grouping_loop_end = program.allocate_label(); + // Label for the instruction where a row for a finished group is output. + // Distinct from subroutine_accumulator_output_label, which is the start of the subroutine, but may still skip emitting a row. + let label_agg_final = program.allocate_label(); + // Label for the instruction immediately after the entire group by phase. + let label_group_by_end = program.allocate_label(); + // Label for the beginning of the subroutine that potentially outputs a row for a finished group. + let label_subrtn_acc_output = program.allocate_label(); + // Register holding the return offset of the subroutine that potentially outputs a row for a finished group. + let reg_subrtn_acc_output_return_offset = program.alloc_register(); + // Register holding a boolean indicating whether there's data in the accumulator (used for aggregation) + let reg_data_in_acc_flag = program.alloc_register(); + let group_by_metadata = t_ctx.meta_group_by.as_mut().unwrap(); let GroupByMetadata { - group_exprs_comparison_register: comparison_register, - subroutine_accumulator_output_return_offset_register, - subroutine_accumulator_output_label, - subroutine_accumulator_clear_return_offset_register, - subroutine_accumulator_clear_label, - data_in_accumulator_indicator_register, - accumulator_indicator_set_true_label, - group_exprs_accumulator_register: group_exprs_start_register, - abort_flag_register, - sorter_key_register, + reg_group_exprs_cmp, + reg_subrtn_acc_clear_return_offset, + reg_group_exprs_acc, + reg_abort_flag, + reg_sorter_key, + label_subrtn_acc_clear, + label_acc_indicator_set_flag_true, .. } = *group_by_metadata; @@ -1591,7 +1588,7 @@ fn group_by_emit( program.emit_insn(Insn::OpenPseudo { cursor_id: pseudo_cursor, - content_reg: sorter_key_register, + content_reg: reg_sorter_key, num_fields: sorter_column_count, }); @@ -1599,16 +1596,16 @@ fn group_by_emit( program.emit_insn_with_label_dependency( Insn::SorterSort { cursor_id: group_by_metadata.sort_cursor, - pc_if_empty: grouping_done_label, + pc_if_empty: label_grouping_loop_end, }, - grouping_done_label, + label_grouping_loop_end, ); - program.defer_label_resolution(sort_loop_start_label, program.offset() as usize); + program.defer_label_resolution(label_grouping_loop_start, program.offset() as usize); // Read a row from the sorted data in the sorter into the pseudo cursor program.emit_insn(Insn::SorterData { cursor_id: group_by_metadata.sort_cursor, - dest_reg: group_by_metadata.sorter_key_register, + dest_reg: group_by_metadata.reg_sorter_key, pseudo_cursor, }); @@ -1626,7 +1623,7 @@ fn group_by_emit( // Compare the group by columns to the previous group by columns to see if we are at a new group or not program.emit_insn(Insn::Compare { - start_reg_a: comparison_register, + start_reg_a: reg_group_exprs_cmp, start_reg_b: groups_start_reg, count: group_by.exprs.len(), }); @@ -1650,7 +1647,7 @@ fn group_by_emit( // New group, move current group by columns into the comparison register program.emit_insn(Insn::Move { source_reg: groups_start_reg, - dest_reg: comparison_register, + dest_reg: reg_group_exprs_cmp, count: group_by.exprs.len(), }); @@ -1660,34 +1657,34 @@ fn group_by_emit( ); program.emit_insn_with_label_dependency( Insn::Gosub { - target_pc: subroutine_accumulator_output_label, - return_reg: subroutine_accumulator_output_return_offset_register, + target_pc: label_subrtn_acc_output, + return_reg: reg_subrtn_acc_output_return_offset, }, - subroutine_accumulator_output_label, + label_subrtn_acc_output, ); program.add_comment(program.offset(), "check abort flag"); program.emit_insn_with_label_dependency( Insn::IfPos { - reg: abort_flag_register, - target_pc: group_by_end_label, + reg: reg_abort_flag, + target_pc: label_group_by_end, decrement_by: 0, }, - group_by_end_label, + label_group_by_end, ); program.add_comment(program.offset(), "goto clear accumulator subroutine"); program.emit_insn_with_label_dependency( Insn::Gosub { - target_pc: subroutine_accumulator_clear_label, - return_reg: subroutine_accumulator_clear_return_offset_register, + target_pc: label_subrtn_acc_clear, + return_reg: reg_subrtn_acc_clear_return_offset, }, - subroutine_accumulator_clear_label, + label_subrtn_acc_clear, ); // Accumulate the values into the aggregations program.resolve_label(agg_step_label, program.offset()); - let start_reg = metadata.aggregation_start_register.unwrap(); + let start_reg = t_ctx.reg_agg_start.unwrap(); let mut cursor_index = group_by.exprs.len(); for (i, agg) in aggregates.iter().enumerate() { let agg_result_reg = start_reg + i; @@ -1710,16 +1707,16 @@ fn group_by_emit( ); program.emit_insn_with_label_dependency( Insn::If { - target_pc: accumulator_indicator_set_true_label, - reg: data_in_accumulator_indicator_register, + target_pc: label_acc_indicator_set_flag_true, + reg: reg_data_in_acc_flag, null_reg: 0, // unused in this case }, - accumulator_indicator_set_true_label, + label_acc_indicator_set_flag_true, ); // Read the group by columns for a finished group for i in 0..group_by.exprs.len() { - let key_reg = group_exprs_start_register + i; + let key_reg = reg_group_exprs_acc + i; let sorter_column_index = i; program.emit_insn(Insn::Column { cursor_id: pseudo_cursor, @@ -1728,60 +1725,57 @@ fn group_by_emit( }); } - program.resolve_label(accumulator_indicator_set_true_label, program.offset()); + program.resolve_label(label_acc_indicator_set_flag_true, program.offset()); program.add_comment(program.offset(), "indicate data in accumulator"); program.emit_insn(Insn::Integer { value: 1, - dest: data_in_accumulator_indicator_register, + dest: reg_data_in_acc_flag, }); program.emit_insn_with_label_dependency( Insn::SorterNext { cursor_id: group_by_metadata.sort_cursor, - pc_if_next: sort_loop_start_label, + pc_if_next: label_grouping_loop_start, }, - sort_loop_start_label, + label_grouping_loop_start, ); - program.resolve_label(grouping_done_label, program.offset()); + program.resolve_label(label_grouping_loop_end, program.offset()); program.add_comment(program.offset(), "emit row for final group"); program.emit_insn_with_label_dependency( Insn::Gosub { - target_pc: group_by_metadata.subroutine_accumulator_output_label, - return_reg: group_by_metadata.subroutine_accumulator_output_return_offset_register, + target_pc: label_subrtn_acc_output, + return_reg: reg_subrtn_acc_output_return_offset, }, - group_by_metadata.subroutine_accumulator_output_label, + label_subrtn_acc_output, ); program.add_comment(program.offset(), "group by finished"); program.emit_insn_with_label_dependency( Insn::Goto { - target_pc: group_by_end_label, + target_pc: label_group_by_end, }, - group_by_end_label, + label_group_by_end, ); program.emit_insn(Insn::Integer { value: 1, - dest: group_by_metadata.abort_flag_register, + dest: group_by_metadata.reg_abort_flag, }); program.emit_insn(Insn::Return { - return_reg: group_by_metadata.subroutine_accumulator_output_return_offset_register, + return_reg: reg_subrtn_acc_output_return_offset, }); - program.resolve_label( - group_by_metadata.subroutine_accumulator_output_label, - program.offset(), - ); + program.resolve_label(label_subrtn_acc_output, program.offset()); program.add_comment(program.offset(), "output group by row subroutine start"); program.emit_insn_with_label_dependency( Insn::IfPos { - reg: group_by_metadata.data_in_accumulator_indicator_register, - target_pc: group_by_output_row_label, + reg: reg_data_in_acc_flag, + target_pc: label_agg_final, decrement_by: 0, }, - group_by_output_row_label, + label_agg_final, ); let group_by_end_without_emitting_row_label = program.allocate_label(); program.defer_label_resolution( @@ -1789,12 +1783,12 @@ fn group_by_emit( program.offset() as usize, ); program.emit_insn(Insn::Return { - return_reg: group_by_metadata.subroutine_accumulator_output_return_offset_register, + return_reg: reg_subrtn_acc_output_return_offset, }); - let agg_start_reg = metadata.aggregation_start_register.unwrap(); + let agg_start_reg = t_ctx.reg_agg_start.unwrap(); // Resolve the label for the start of the group by output row subroutine - program.resolve_label(group_by_output_row_label, program.offset()); + program.resolve_label(label_agg_final, program.offset()); for (i, agg) in aggregates.iter().enumerate() { let agg_result_reg = agg_start_reg + i; program.emit_insn(Insn::AggFinal { @@ -1810,7 +1804,7 @@ fn group_by_emit( let mut precomputed_exprs_to_register = Vec::with_capacity(aggregates.len() + group_by.exprs.len()); for (i, expr) in group_by.exprs.iter().enumerate() { - precomputed_exprs_to_register.push((expr, group_exprs_start_register + i)); + precomputed_exprs_to_register.push((expr, reg_group_exprs_acc + i)); } for (i, agg) in aggregates.iter().enumerate() { precomputed_exprs_to_register.push((&agg.original_expr, agg_start_reg + i)); @@ -1839,9 +1833,9 @@ fn group_by_emit( program, referenced_tables, result_columns, - metadata.result_column_start_register.unwrap(), + t_ctx.reg_result_cols_start.unwrap(), Some(&precomputed_exprs_to_register), - limit.map(|l| (l, metadata.limit_reg.unwrap(), group_by_end_label)), + limit.map(|l| (l, t_ctx.reg_limit.unwrap(), label_group_by_end)), syms, query_type, )?; @@ -1852,8 +1846,8 @@ fn group_by_emit( referenced_tables, order_by, result_columns, - &mut metadata.result_column_indexes_in_orderby_sorter, - metadata.sort_metadata.as_ref().unwrap(), + &mut t_ctx.result_column_indexes_in_orderby_sorter, + t_ctx.meta_sort.as_ref().unwrap(), Some(&precomputed_exprs_to_register), syms, )?; @@ -1861,15 +1855,12 @@ fn group_by_emit( } program.emit_insn(Insn::Return { - return_reg: group_by_metadata.subroutine_accumulator_output_return_offset_register, + return_reg: reg_subrtn_acc_output_return_offset, }); program.add_comment(program.offset(), "clear accumulator subroutine start"); - program.resolve_label( - group_by_metadata.subroutine_accumulator_clear_label, - program.offset(), - ); - let start_reg = group_by_metadata.group_exprs_accumulator_register; + program.resolve_label(group_by_metadata.label_subrtn_acc_clear, program.offset()); + let start_reg = group_by_metadata.reg_group_exprs_acc; program.emit_insn(Insn::Null { dest: start_reg, dest_end: Some(start_reg + group_by.exprs.len() + aggregates.len() - 1), @@ -1877,13 +1868,13 @@ fn group_by_emit( program.emit_insn(Insn::Integer { value: 0, - dest: group_by_metadata.data_in_accumulator_indicator_register, + dest: reg_data_in_acc_flag, }); program.emit_insn(Insn::Return { - return_reg: group_by_metadata.subroutine_accumulator_clear_return_offset_register, + return_reg: reg_subrtn_acc_clear_return_offset, }); - program.resolve_label(group_by_end_label, program.offset()); + program.resolve_label(label_group_by_end, program.offset()); Ok(()) } @@ -1891,16 +1882,16 @@ fn group_by_emit( /// Emits the bytecode for processing an aggregate without a GROUP BY clause. /// This is called when the main query execution loop has finished processing, /// and we can now materialize the aggregate results. -fn agg_without_group_by_emit( +fn emit_ungrouped_aggregation( program: &mut ProgramBuilder, referenced_tables: &[TableReference], result_columns: &[ResultSetColumn], aggregates: &[Aggregate], - metadata: &mut Metadata, + t_ctx: &mut TranslateCtx, syms: &SymbolTable, query_type: &SelectQueryType, ) -> Result<()> { - let agg_start_reg = metadata.aggregation_start_register.unwrap(); + let agg_start_reg = t_ctx.reg_agg_start.unwrap(); for (i, agg) in aggregates.iter().enumerate() { let agg_result_reg = agg_start_reg + i; program.emit_insn(Insn::AggFinal { @@ -1922,7 +1913,7 @@ fn agg_without_group_by_emit( program, referenced_tables, result_columns, - metadata.result_column_start_register.unwrap(), + t_ctx.reg_result_cols_start.unwrap(), Some(&precomputed_exprs_to_register), None, syms, @@ -1935,12 +1926,12 @@ fn agg_without_group_by_emit( /// Emits the bytecode for outputting rows from an ORDER BY sorter. /// This is called when the main query execution loop has finished processing, /// and we can now emit rows from the ORDER BY sorter. -fn order_by_emit( +fn emit_order_by( program: &mut ProgramBuilder, order_by: &[(ast::Expr, Direction)], result_columns: &[ResultSetColumn], limit: Option, - metadata: &mut Metadata, + t_ctx: &mut TranslateCtx, query_type: &SelectQueryType, ) -> Result<()> { let sort_loop_start_label = program.allocate_label(); @@ -1957,7 +1948,7 @@ fn order_by_emit( } for (i, rc) in result_columns.iter().enumerate() { // If any result columns are not in the ORDER BY sorter, it's because they are equal to a sort key and were already added to the pseudo columns above. - if let Some(ref v) = metadata.result_columns_to_skip_in_orderby_sorter { + if let Some(ref v) = t_ctx.result_columns_to_skip_in_orderby_sorter { if v.contains(&i) { continue; } @@ -1971,7 +1962,7 @@ fn order_by_emit( } let num_columns_in_sorter = order_by.len() + result_columns.len() - - metadata + - t_ctx .result_columns_to_skip_in_orderby_sorter .as_ref() .map(|v| v.len()) @@ -1983,11 +1974,11 @@ fn order_by_emit( columns: pseudo_columns, }))), ); - let sort_metadata = metadata.sort_metadata.as_mut().unwrap(); + let sort_metadata = t_ctx.meta_sort.as_mut().unwrap(); program.emit_insn(Insn::OpenPseudo { cursor_id: pseudo_cursor, - content_reg: sort_metadata.sorter_data_register, + content_reg: sort_metadata.reg_sorter_data, num_fields: num_columns_in_sorter, }); @@ -2002,19 +1993,19 @@ fn order_by_emit( program.defer_label_resolution(sort_loop_start_label, program.offset() as usize); program.emit_insn(Insn::SorterData { cursor_id: sort_metadata.sort_cursor, - dest_reg: sort_metadata.sorter_data_register, + dest_reg: sort_metadata.reg_sorter_data, pseudo_cursor, }); // We emit the columns in SELECT order, not sorter order (sorter always has the sort keys first). // This is tracked in m.result_column_indexes_in_orderby_sorter. let cursor_id = pseudo_cursor; - let start_reg = metadata.result_column_start_register.unwrap(); + let start_reg = t_ctx.reg_result_cols_start.unwrap(); for i in 0..result_columns.len() { let reg = start_reg + i; program.emit_insn(Insn::Column { cursor_id, - column: metadata.result_column_indexes_in_orderby_sorter[&i], + column: t_ctx.result_column_indexes_in_orderby_sorter[&i], dest: reg, }); } @@ -2023,7 +2014,7 @@ fn order_by_emit( program, start_reg, result_columns.len(), - limit.map(|l| (l, metadata.limit_reg.unwrap(), sort_loop_end_label)), + limit.map(|l| (l, t_ctx.reg_limit.unwrap(), sort_loop_end_label)), query_type, )?; @@ -2195,7 +2186,7 @@ fn order_by_sorter_insert( start_reg, orderby_sorter_column_count, sort_metadata.sort_cursor, - sort_metadata.sorter_data_register, + sort_metadata.reg_sorter_data, ); Ok(()) }