From f2dab8499deeadfbaf81e282b705742496d7d62b Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Mon, 3 Feb 2025 09:28:24 +0200 Subject: [PATCH 1/5] preallocate loop metadata according to table/column count and prefer vec over hashmap --- core/translate/emitter.rs | 31 +++++++++++++++++++++---------- core/translate/main_loop.rs | 25 ++++++++++++++----------- core/translate/order_by.rs | 2 +- core/translate/subquery.rs | 11 ++++++----- 4 files changed, 42 insertions(+), 27 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 12db50a95..ef559a16c 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -1,8 +1,6 @@ // This module contains code for emitting bytecode instructions for SQL query execution. // It handles translating high-level SQL operations into low-level bytecode that can be executed by the virtual machine. -use std::collections::HashMap; - use sqlite3_parser::ast::{self}; use crate::function::Func; @@ -76,11 +74,12 @@ pub struct TranslateCtx<'a> { pub meta_group_by: Option, // metadata for the order by operator pub meta_sort: Option, - // mapping between Join operator id and associated metadata (for left joins only) - pub meta_left_joins: HashMap, + /// mapping between table loop index and associated metadata (for left joins only) + /// this metadata exists for the right table in a given left join + pub meta_left_joins: Vec>, // We need to emit result columns in the order they are present in the SELECT, but they may not be in the same order in the ORDER BY sorter. // This vector holds the indexes of the result columns in the ORDER BY sorter. - pub result_column_indexes_in_orderby_sorter: HashMap, + pub result_column_indexes_in_orderby_sorter: Vec, // We might skip adding a SELECT result column into the ORDER BY sorter if it is an exact match in the ORDER BY keys. // This vector holds the indexes of the result columns that we need to skip. pub result_columns_to_skip_in_orderby_sorter: Option>, @@ -101,6 +100,8 @@ pub enum OperationMode { fn prologue<'a>( program: &mut ProgramBuilder, syms: &'a SymbolTable, + table_count: usize, + result_column_count: usize, ) -> Result<(TranslateCtx<'a>, BranchOffset, BranchOffset)> { let init_label = program.allocate_label(); @@ -111,7 +112,7 @@ fn prologue<'a>( let start_offset = program.offset(); let t_ctx = TranslateCtx { - labels_main_loop: Vec::new(), + labels_main_loop: (0..table_count).map(|_| LoopLabels::new(program)).collect(), label_main_loop_end: None, reg_agg_start: None, reg_limit: None, @@ -119,9 +120,9 @@ fn prologue<'a>( reg_limit_offset_sum: None, reg_result_cols_start: None, meta_group_by: None, - meta_left_joins: HashMap::new(), + meta_left_joins: (0..table_count).map(|_| None).collect(), meta_sort: None, - result_column_indexes_in_orderby_sorter: HashMap::new(), + result_column_indexes_in_orderby_sorter: (0..result_column_count).collect(), result_columns_to_skip_in_orderby_sorter: None, resolver: Resolver::new(syms), }; @@ -167,7 +168,12 @@ fn emit_program_for_select( mut plan: SelectPlan, syms: &SymbolTable, ) -> Result<()> { - let (mut t_ctx, init_label, start_offset) = prologue(program, syms)?; + let (mut t_ctx, init_label, start_offset) = prologue( + program, + syms, + plan.table_references.len(), + plan.result_columns.len(), + )?; // Trivial exit on LIMIT 0 if let Some(limit) = plan.limit { @@ -274,7 +280,12 @@ fn emit_program_for_delete( mut plan: DeletePlan, syms: &SymbolTable, ) -> Result<()> { - let (mut t_ctx, init_label, start_offset) = prologue(program, syms)?; + let (mut t_ctx, init_label, start_offset) = prologue( + program, + syms, + plan.table_references.len(), + plan.result_columns.len(), + )?; // No rows will be read from source table loops if there is a constant false condition eg. WHERE 0 let after_main_loop_label = program.allocate_label(); diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 1df0efb80..98e0e3938 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -43,6 +43,16 @@ pub struct LoopLabels { loop_end: BranchOffset, } +impl LoopLabels { + pub fn new(program: &mut ProgramBuilder) -> Self { + Self { + loop_start: program.allocate_label(), + next: program.allocate_label(), + loop_end: program.allocate_label(), + } + } +} + /// Initialize resources needed for the source operators (tables, joins, etc) pub fn init_loop( program: &mut ProgramBuilder, @@ -51,13 +61,6 @@ pub fn init_loop( mode: &OperationMode, ) -> Result<()> { for (table_index, table) in tables.iter().enumerate() { - let loop_labels = LoopLabels { - next: program.allocate_label(), - loop_start: program.allocate_label(), - loop_end: program.allocate_label(), - }; - t_ctx.labels_main_loop.push(loop_labels); - // Initialize bookkeeping for OUTER JOIN if let Some(join_info) = table.join_info.as_ref() { if join_info.outer { @@ -66,7 +69,7 @@ pub fn init_loop( label_match_flag_set_true: program.allocate_label(), label_match_flag_check_value: program.allocate_label(), }; - t_ctx.meta_left_joins.insert(table_index, lj_metadata); + t_ctx.meta_left_joins[table_index] = Some(lj_metadata); } } match &table.op { @@ -181,7 +184,7 @@ pub fn open_loop( // This is used to determine whether to emit actual columns or NULLs for the columns of the right table. if let Some(join_info) = table.join_info.as_ref() { if join_info.outer { - let lj_meta = t_ctx.meta_left_joins.get(&table_index).unwrap(); + let lj_meta = t_ctx.meta_left_joins[table_index].as_ref().unwrap(); program.emit_insn(Insn::Integer { value: 0, dest: lj_meta.reg_match_flag, @@ -465,7 +468,7 @@ pub fn open_loop( // for the right table's cursor. if let Some(join_info) = table.join_info.as_ref() { if join_info.outer { - let lj_meta = t_ctx.meta_left_joins.get(&table_index).unwrap(); + let lj_meta = t_ctx.meta_left_joins[table_index].as_ref().unwrap(); program.resolve_label(lj_meta.label_match_flag_set_true, program.offset()); program.emit_insn(Insn::Integer { value: 1, @@ -731,7 +734,7 @@ pub fn close_loop( // and emit a row with NULLs for the right table, and then jump back to the next row of the left table. if let Some(join_info) = table.join_info.as_ref() { if join_info.outer { - let lj_meta = t_ctx.meta_left_joins.get(&table_index).unwrap(); + let lj_meta = t_ctx.meta_left_joins[table_index].as_ref().unwrap(); // The left join match flag is set to 1 when there is any match on the right table // (e.g. SELECT * FROM t1 LEFT JOIN t2 ON t1.a = t2.a). // If the left join match flag has been set to 1, we jump to the next row on the outer table, diff --git a/core/translate/order_by.rs b/core/translate/order_by.rs index a908da571..06e411175 100644 --- a/core/translate/order_by.rs +++ b/core/translate/order_by.rs @@ -142,7 +142,7 @@ pub fn emit_order_by( let reg = start_reg + i; program.emit_insn(Insn::Column { cursor_id, - column: t_ctx.result_column_indexes_in_orderby_sorter[&i], + column: t_ctx.result_column_indexes_in_orderby_sorter[i], dest: reg, }); } diff --git a/core/translate/subquery.rs b/core/translate/subquery.rs index e66acc88e..1730312be 100644 --- a/core/translate/subquery.rs +++ b/core/translate/subquery.rs @@ -1,5 +1,3 @@ -use std::collections::HashMap; - use crate::{ vdbe::{builder::ProgramBuilder, insn::Insn}, Result, @@ -7,6 +5,7 @@ use crate::{ use super::{ emitter::{emit_query, Resolver, TranslateCtx}, + main_loop::LoopLabels, plan::{Operation, SelectPlan, SelectQueryType, TableReference}, }; @@ -68,14 +67,16 @@ pub fn emit_subquery<'a>( } let end_coroutine_label = program.allocate_label(); let mut metadata = TranslateCtx { - labels_main_loop: vec![], + labels_main_loop: (0..plan.table_references.len()) + .map(|_| LoopLabels::new(program)) + .collect(), label_main_loop_end: None, meta_group_by: None, - meta_left_joins: HashMap::new(), + meta_left_joins: (0..plan.table_references.len()).map(|_| None).collect(), meta_sort: None, reg_agg_start: None, reg_result_cols_start: None, - result_column_indexes_in_orderby_sorter: HashMap::new(), + result_column_indexes_in_orderby_sorter: (0..plan.result_columns.len()).collect(), result_columns_to_skip_in_orderby_sorter: None, reg_limit: plan.limit.map(|_| program.alloc_register()), reg_offset: plan.offset.map(|_| program.alloc_register()), From 61a007fb29cee2d178bf12380ad2a1b72d8e6443 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Mon, 3 Feb 2025 09:28:51 +0200 Subject: [PATCH 2/5] preallocate plan.result_columns according to AST --- core/translate/select.rs | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/core/translate/select.rs b/core/translate/select.rs index 6a9250296..b0494e931 100644 --- a/core/translate/select.rs +++ b/core/translate/select.rs @@ -48,9 +48,30 @@ pub fn prepare_select_plan( // Parse the FROM clause into a vec of TableReferences. Fold all the join conditions expressions into the WHERE clause. let table_references = parse_from(schema, from, syms, &mut where_predicates)?; + // Preallocate space for the result columns + let result_columns = Vec::with_capacity( + columns + .iter() + .map(|c| match c { + // Allocate space for all columns in all tables + ResultColumn::Star => { + table_references.iter().map(|t| t.columns().len()).sum() + } + // Guess 5 columns if we can't find the table using the identifier (maybe it's in [brackets] or `tick_quotes`, or miXeDcAse) + ResultColumn::TableStar(n) => table_references + .iter() + .find(|t| t.identifier == n.0) + .map(|t| t.columns().len()) + .unwrap_or(5), + // Otherwise allocate space for 1 column + ResultColumn::Expr(_, _) => 1, + }) + .sum(), + ); + let mut plan = SelectPlan { table_references, - result_columns: vec![], + result_columns, where_clause: where_predicates, group_by: None, order_by: None, From 40f536fabbb90d34a8bb55885bb4b163b228679b Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Mon, 3 Feb 2025 09:45:33 +0200 Subject: [PATCH 3/5] Dont store available_indexes on plan; only used in optimize_plan() --- core/lib.rs | 2 +- core/translate/delete.rs | 3 +- core/translate/optimizer.rs | 78 ++++++++++++++++++++----------------- core/translate/plan.rs | 4 -- core/translate/select.rs | 3 +- 5 files changed, 45 insertions(+), 45 deletions(-) diff --git a/core/lib.rs b/core/lib.rs index 0addaffc5..a3532237e 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -331,7 +331,7 @@ impl Connection { *select, &self.db.syms.borrow(), )?; - optimize_plan(&mut plan)?; + optimize_plan(&mut plan, &self.schema.borrow())?; println!("{}", plan); } _ => todo!(), diff --git a/core/translate/delete.rs b/core/translate/delete.rs index 8741e1e62..063aaad21 100644 --- a/core/translate/delete.rs +++ b/core/translate/delete.rs @@ -18,7 +18,7 @@ pub fn translate_delete( syms: &SymbolTable, ) -> Result<()> { let mut delete_plan = prepare_delete_plan(schema, tbl_name, where_clause, limit)?; - optimize_plan(&mut delete_plan)?; + optimize_plan(&mut delete_plan, schema)?; emit_program(program, delete_plan, syms) } @@ -55,7 +55,6 @@ pub fn prepare_delete_plan( order_by: None, limit: resolved_limit, offset: resolved_offset, - available_indexes: vec![], contains_constant_false_condition: false, }; diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index be93248a5..cfa66e9fc 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -1,18 +1,21 @@ -use std::rc::Rc; +use std::{collections::HashMap, rc::Rc}; use sqlite3_parser::ast; -use crate::{schema::Index, Result}; +use crate::{ + schema::{Index, Schema}, + Result, +}; use super::plan::{ DeletePlan, Direction, IterationDirection, Operation, Plan, Search, SelectPlan, TableReference, WhereTerm, }; -pub fn optimize_plan(plan: &mut Plan) -> Result<()> { +pub fn optimize_plan(plan: &mut Plan, schema: &Schema) -> Result<()> { match plan { - Plan::Select(plan) => optimize_select_plan(plan), - Plan::Delete(plan) => optimize_delete_plan(plan), + Plan::Select(plan) => optimize_select_plan(plan, schema), + Plan::Delete(plan) => optimize_delete_plan(plan, schema), } } @@ -21,8 +24,8 @@ pub fn optimize_plan(plan: &mut Plan) -> Result<()> { * TODO: these could probably be done in less passes, * but having them separate makes them easier to understand */ -fn optimize_select_plan(plan: &mut SelectPlan) -> Result<()> { - optimize_subqueries(plan)?; +fn optimize_select_plan(plan: &mut SelectPlan, schema: &Schema) -> Result<()> { + optimize_subqueries(plan, schema)?; rewrite_exprs_select(plan)?; if let ConstantConditionEliminationResult::ImpossibleCondition = eliminate_constant_conditions(&mut plan.where_clause)? @@ -33,16 +36,16 @@ fn optimize_select_plan(plan: &mut SelectPlan) -> Result<()> { use_indexes( &mut plan.table_references, - &plan.available_indexes, + &schema.indexes, &mut plan.where_clause, )?; - eliminate_unnecessary_orderby(plan)?; + eliminate_unnecessary_orderby(plan, schema)?; Ok(()) } -fn optimize_delete_plan(plan: &mut DeletePlan) -> Result<()> { +fn optimize_delete_plan(plan: &mut DeletePlan, schema: &Schema) -> Result<()> { rewrite_exprs_delete(plan)?; if let ConstantConditionEliminationResult::ImpossibleCondition = eliminate_constant_conditions(&mut plan.where_clause)? @@ -53,17 +56,17 @@ fn optimize_delete_plan(plan: &mut DeletePlan) -> Result<()> { use_indexes( &mut plan.table_references, - &plan.available_indexes, + &schema.indexes, &mut plan.where_clause, )?; Ok(()) } -fn optimize_subqueries(plan: &mut SelectPlan) -> Result<()> { +fn optimize_subqueries(plan: &mut SelectPlan, schema: &Schema) -> Result<()> { for table in plan.table_references.iter_mut() { if let Operation::Subquery { plan, .. } = &mut table.op { - optimize_select_plan(&mut *plan)?; + optimize_select_plan(&mut *plan, schema)?; } } @@ -73,7 +76,7 @@ fn optimize_subqueries(plan: &mut SelectPlan) -> Result<()> { fn query_is_already_ordered_by( table_references: &[TableReference], key: &mut ast::Expr, - available_indexes: &Vec>, + available_indexes: &HashMap>>, ) -> Result { let first_table = table_references.first(); if first_table.is_none() { @@ -86,10 +89,9 @@ fn query_is_already_ordered_by( Search::RowidEq { .. } => Ok(key.is_rowid_alias_of(0)), Search::RowidSearch { .. } => Ok(key.is_rowid_alias_of(0)), Search::IndexSearch { index, .. } => { - let index_idx = key.check_index_scan(0, &table_reference, available_indexes)?; - let index_is_the_same = index_idx - .map(|i| Rc::ptr_eq(&available_indexes[i], index)) - .unwrap_or(false); + let index_rc = key.check_index_scan(0, &table_reference, available_indexes)?; + let index_is_the_same = + index_rc.map(|irc| Rc::ptr_eq(index, &irc)).unwrap_or(false); Ok(index_is_the_same) } }, @@ -97,7 +99,7 @@ fn query_is_already_ordered_by( } } -fn eliminate_unnecessary_orderby(plan: &mut SelectPlan) -> Result<()> { +fn eliminate_unnecessary_orderby(plan: &mut SelectPlan, schema: &Schema) -> Result<()> { if plan.order_by.is_none() { return Ok(()); } @@ -115,7 +117,7 @@ fn eliminate_unnecessary_orderby(plan: &mut SelectPlan) -> Result<()> { let (key, direction) = o.first_mut().unwrap(); let already_ordered = - query_is_already_ordered_by(&plan.table_references, key, &plan.available_indexes)?; + query_is_already_ordered_by(&plan.table_references, key, &schema.indexes)?; if already_ordered { push_scan_direction(&mut plan.table_references[0], direction); @@ -136,7 +138,7 @@ fn eliminate_unnecessary_orderby(plan: &mut SelectPlan) -> Result<()> { */ fn use_indexes( table_references: &mut [TableReference], - available_indexes: &Vec>, + available_indexes: &HashMap>>, where_clause: &mut Vec, ) -> Result<()> { if where_clause.is_empty() { @@ -274,8 +276,8 @@ pub trait Optimizable { &mut self, table_index: usize, table_reference: &TableReference, - available_indexes: &[Rc], - ) -> Result>; + available_indexes: &HashMap>>, + ) -> Result>>; } impl Optimizable for ast::Expr { @@ -293,19 +295,23 @@ impl Optimizable for ast::Expr { &mut self, table_index: usize, table_reference: &TableReference, - available_indexes: &[Rc], - ) -> Result> { + available_indexes: &HashMap>>, + ) -> Result>> { match self { Self::Column { table, column, .. } => { if *table != table_index { return Ok(None); } - for (idx, index) in available_indexes.iter().enumerate() { - if index.table_name == table_reference.table.get_name() { - let column = table_reference.table.get_column_at(*column); - if index.columns.first().unwrap().name == column.name { - return Ok(Some(idx)); - } + let available_indexes_for_table = + available_indexes.get(table_reference.table.get_name()); + if available_indexes_for_table.is_none() { + return Ok(None); + } + let available_indexes_for_table = available_indexes_for_table.unwrap(); + for index in available_indexes_for_table.iter() { + let column = table_reference.table.get_column_at(*column); + if index.columns.first().unwrap().name == column.name { + return Ok(Some(index.clone())); } } Ok(None) @@ -489,7 +495,7 @@ pub fn try_extract_index_search_expression( cond: &mut WhereTerm, table_index: usize, table_reference: &TableReference, - available_indexes: &[Rc], + available_indexes: &HashMap>>, ) -> Result> { if cond.eval_at_loop != table_index { return Ok(None); @@ -556,7 +562,7 @@ pub fn try_extract_index_search_expression( } } - if let Some(index_index) = + if let Some(index_rc) = lhs.check_index_scan(table_index, &table_reference, available_indexes)? { match operator { @@ -567,7 +573,7 @@ pub fn try_extract_index_search_expression( | ast::Operator::LessEquals => { let rhs_owned = rhs.take_ownership(); return Ok(Some(Search::IndexSearch { - index: available_indexes[index_index].clone(), + index: index_rc, cmp_op: *operator, cmp_expr: WhereTerm { expr: rhs_owned, @@ -580,7 +586,7 @@ pub fn try_extract_index_search_expression( } } - if let Some(index_index) = + if let Some(index_rc) = rhs.check_index_scan(table_index, &table_reference, available_indexes)? { match operator { @@ -591,7 +597,7 @@ pub fn try_extract_index_search_expression( | ast::Operator::LessEquals => { let lhs_owned = lhs.take_ownership(); return Ok(Some(Search::IndexSearch { - index: available_indexes[index_index].clone(), + index: index_rc, cmp_op: opposite_cmp_op(*operator), cmp_expr: WhereTerm { expr: lhs_owned, diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 9ac3a5b78..6209d37a8 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -89,8 +89,6 @@ pub struct SelectPlan { pub limit: Option, /// offset clause pub offset: Option, - /// all the indexes available - pub available_indexes: Vec>, /// query contains a constant condition that is always false pub contains_constant_false_condition: bool, /// query type (top level or subquery) @@ -112,8 +110,6 @@ pub struct DeletePlan { pub limit: Option, /// offset clause pub offset: Option, - /// all the indexes available - pub available_indexes: Vec>, /// query contains a constant condition that is always false pub contains_constant_false_condition: bool, } diff --git a/core/translate/select.rs b/core/translate/select.rs index b0494e931..d86897096 100644 --- a/core/translate/select.rs +++ b/core/translate/select.rs @@ -21,7 +21,7 @@ pub fn translate_select( syms: &SymbolTable, ) -> Result<()> { let mut select_plan = prepare_select_plan(schema, select, syms)?; - optimize_plan(&mut select_plan)?; + optimize_plan(&mut select_plan, schema)?; emit_program(program, select_plan, syms) } @@ -78,7 +78,6 @@ pub fn prepare_select_plan( aggregates: vec![], limit: None, offset: None, - available_indexes: schema.indexes.clone().into_values().flatten().collect(), contains_constant_false_condition: false, query_type: SelectQueryType::TopLevel, }; From 8b1f0ea23c92ad1ea91b9e7cbbc1fa2391ff8141 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Mon, 3 Feb 2025 10:28:51 +0200 Subject: [PATCH 4/5] Use vec for label resolution, not hashmap --- core/vdbe/builder.rs | 36 +++++++++++++++++++++++------------- core/vdbe/mod.rs | 6 +++--- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index 1752e9dd3..a029a0161 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -15,7 +15,7 @@ use super::{BranchOffset, CursorID, Insn, InsnReference, Program}; #[allow(dead_code)] pub struct ProgramBuilder { next_free_register: usize, - next_free_label: i32, + next_free_label: u32, next_free_cursor_id: usize, insns: Vec, // for temporarily storing instructions that will be put after Transaction opcode @@ -24,7 +24,7 @@ pub struct ProgramBuilder { // Cursors that are referenced by the program. Indexed by CursorID. pub cursor_ref: Vec<(Option, CursorType)>, // Hashmap of label to insn reference. Resolved in build(). - label_to_resolved_offset: HashMap, + label_to_resolved_offset: Vec>, // Bitmask of cursors that have emitted a SeekRowid instruction. seekrowid_emitted_bitmask: u64, // map of instruction index to manual comment (used in EXPLAIN) @@ -57,7 +57,7 @@ impl ProgramBuilder { next_insn_label: None, cursor_ref: Vec::new(), constant_insns: Vec::new(), - label_to_resolved_offset: HashMap::new(), + label_to_resolved_offset: Vec::with_capacity(4), // 4 is arbitrary, we guess to assign at least this much seekrowid_emitted_bitmask: 0, comments: HashMap::new(), parameters: Parameters::new(), @@ -91,8 +91,10 @@ impl ProgramBuilder { pub fn emit_insn(&mut self, insn: Insn) { if let Some(label) = self.next_insn_label { - self.label_to_resolved_offset - .insert(label.to_label_value(), self.insns.len() as InsnReference); + self.label_to_resolved_offset.insert( + label.to_label_value() as usize, + Some(self.insns.len() as InsnReference), + ); self.next_insn_label = None; } self.insns.push(insn); @@ -183,8 +185,10 @@ impl ProgramBuilder { } pub fn allocate_label(&mut self) -> BranchOffset { - self.next_free_label -= 1; - BranchOffset::Label(self.next_free_label) + let label_n = self.next_free_label; + self.next_free_label += 1; + self.label_to_resolved_offset.push(None); + BranchOffset::Label(label_n) } // Effectively a GOTO without the need to emit an explicit GOTO instruction. @@ -197,8 +201,8 @@ impl ProgramBuilder { pub fn resolve_label(&mut self, label: BranchOffset, to_offset: BranchOffset) { assert!(matches!(label, BranchOffset::Label(_))); assert!(matches!(to_offset, BranchOffset::Offset(_))); - self.label_to_resolved_offset - .insert(label.to_label_value(), to_offset.to_offset_int()); + self.label_to_resolved_offset[label.to_label_value() as usize] = + Some(to_offset.to_offset_int()); } /// Resolve unresolved labels to a specific offset in the instruction list. @@ -209,10 +213,16 @@ impl ProgramBuilder { pub fn resolve_labels(&mut self) { let resolve = |pc: &mut BranchOffset, insn_name: &str| { if let BranchOffset::Label(label) = pc { - let to_offset = *self.label_to_resolved_offset.get(label).unwrap_or_else(|| { - panic!("Reference to undefined label in {}: {}", insn_name, label) - }); - *pc = BranchOffset::Offset(to_offset); + let to_offset = self + .label_to_resolved_offset + .get(*label as usize) + .unwrap_or_else(|| { + panic!("Reference to undefined label in {}: {}", insn_name, label) + }); + *pc = BranchOffset::Offset( + to_offset + .unwrap_or_else(|| panic!("Unresolved label in {}: {}", insn_name, label)), + ); } }; for insn in self.insns.iter_mut() { diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 16156347a..3f70dab45 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -74,7 +74,7 @@ pub enum BranchOffset { /// A label is a named location in the program. /// If there are references to it, it must always be resolved to an Offset /// via program.resolve_label(). - Label(i32), + Label(u32), /// An offset is a direct index into the instruction list. Offset(InsnReference), /// A placeholder is a temporary value to satisfy the compiler. @@ -103,7 +103,7 @@ impl BranchOffset { } /// Returns the label value. Panics if the branch offset is an offset or placeholder. - pub fn to_label_value(&self) -> i32 { + pub fn to_label_value(&self) -> u32 { match self { BranchOffset::Label(v) => *v, BranchOffset::Offset(_) => unreachable!("Offset cannot be converted to label value"), @@ -116,7 +116,7 @@ impl BranchOffset { /// label or placeholder. pub fn to_debug_int(&self) -> i32 { match self { - BranchOffset::Label(v) => *v, + BranchOffset::Label(v) => *v as i32, BranchOffset::Offset(v) => *v as i32, BranchOffset::Placeholder => i32::MAX, } From 750a9c6463edfb9ac76df9bcf0646fd227588522 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Mon, 3 Feb 2025 13:08:13 +0200 Subject: [PATCH 5/5] assertions and small cleanups --- core/schema.rs | 10 ++++++++-- core/translate/main_loop.rs | 4 ++++ core/translate/optimizer.rs | 11 +++++------ core/vdbe/builder.rs | 9 +++------ 4 files changed, 20 insertions(+), 14 deletions(-) diff --git a/core/schema.rs b/core/schema.rs index 17d18f74c..76ee11544 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -66,8 +66,14 @@ impl Table { pub fn get_column_at(&self, index: usize) -> &Column { match self { - Self::BTree(table) => table.columns.get(index).unwrap(), - Self::Pseudo(table) => table.columns.get(index).unwrap(), + Self::BTree(table) => table + .columns + .get(index) + .expect("column index out of bounds"), + Self::Pseudo(table) => table + .columns + .get(index) + .expect("column index out of bounds"), } } diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 98e0e3938..b4fff9c7a 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -60,6 +60,10 @@ pub fn init_loop( tables: &[TableReference], mode: &OperationMode, ) -> Result<()> { + assert!( + t_ctx.meta_left_joins.len() == tables.len(), + "meta_left_joins length does not match tables length" + ); for (table_index, table) in tables.iter().enumerate() { // Initialize bookkeeping for OUTER JOIN if let Some(join_info) = table.join_info.as_ref() { diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index cfa66e9fc..63f8cc573 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -302,14 +302,13 @@ impl Optimizable for ast::Expr { if *table != table_index { return Ok(None); } - let available_indexes_for_table = - available_indexes.get(table_reference.table.get_name()); - if available_indexes_for_table.is_none() { + let Some(available_indexes_for_table) = + available_indexes.get(table_reference.table.get_name()) + else { return Ok(None); - } - let available_indexes_for_table = available_indexes_for_table.unwrap(); + }; + let column = table_reference.table.get_column_at(*column); for index in available_indexes_for_table.iter() { - let column = table_reference.table.get_column_at(*column); if index.columns.first().unwrap().name == column.name { return Ok(Some(index.clone())); } diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index a029a0161..1e0280b63 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -15,7 +15,6 @@ use super::{BranchOffset, CursorID, Insn, InsnReference, Program}; #[allow(dead_code)] pub struct ProgramBuilder { next_free_register: usize, - next_free_label: u32, next_free_cursor_id: usize, insns: Vec, // for temporarily storing instructions that will be put after Transaction opcode @@ -23,7 +22,7 @@ pub struct ProgramBuilder { next_insn_label: Option, // Cursors that are referenced by the program. Indexed by CursorID. pub cursor_ref: Vec<(Option, CursorType)>, - // Hashmap of label to insn reference. Resolved in build(). + /// A vector where index=label number, value=resolved offset. Resolved in build(). label_to_resolved_offset: Vec>, // Bitmask of cursors that have emitted a SeekRowid instruction. seekrowid_emitted_bitmask: u64, @@ -51,7 +50,6 @@ impl ProgramBuilder { pub fn new() -> Self { Self { next_free_register: 1, - next_free_label: 0, next_free_cursor_id: 0, insns: Vec::new(), next_insn_label: None, @@ -185,10 +183,9 @@ impl ProgramBuilder { } pub fn allocate_label(&mut self) -> BranchOffset { - let label_n = self.next_free_label; - self.next_free_label += 1; + let label_n = self.label_to_resolved_offset.len(); self.label_to_resolved_offset.push(None); - BranchOffset::Label(label_n) + BranchOffset::Label(label_n as u32) } // Effectively a GOTO without the need to emit an explicit GOTO instruction.