use std::{ cell::Cell, cmp::Ordering, rc::{Rc, Weak}, sync::Arc, }; use limbo_sqlite3_parser::ast::{self, TableInternalId}; use crate::{ fast_lock::SpinLock, parameters::Parameters, schema::{BTreeTable, Index, PseudoTable, Schema, Table}, storage::sqlite3_ondisk::DatabaseHeader, translate::{ collate::CollationSeq, emitter::TransactionMode, plan::{ResultSetColumn, TableReference}, }, Connection, VirtualTable, }; pub struct TableRefIdCounter { next_free: TableInternalId, } impl TableRefIdCounter { pub fn new() -> Self { Self { next_free: TableInternalId::default(), } } pub fn next(&mut self) -> ast::TableInternalId { let id = self.next_free; self.next_free += 1; id } } use super::{ insn::RegisterOrLiteral, BranchOffset, CursorID, Insn, InsnFunction, InsnReference, JumpTarget, Program, }; #[allow(dead_code)] pub struct ProgramBuilder { pub table_reference_counter: TableRefIdCounter, next_free_register: usize, next_free_cursor_id: usize, /// Instruction, the function to execute it with, and its original index in the vector. insns: Vec<(Insn, InsnFunction, usize)>, /// A span of instructions from (offset_start_inclusive, offset_end_exclusive), /// that are deemed to be compile-time constant and can be hoisted out of loops /// so that they get evaluated only once at the start of the program. pub constant_spans: Vec<(usize, usize)>, // Cursors that are referenced by the program. Indexed by CursorID. pub cursor_ref: Vec<(Option, CursorType)>, /// A vector where index=label number, value=resolved offset. Resolved in build(). label_to_resolved_offset: Vec>, // Bitmask of cursors that have emitted a SeekRowid instruction. seekrowid_emitted_bitmask: u64, // map of instruction index to manual comment (used in EXPLAIN only) comments: Option>, pub parameters: Parameters, pub result_columns: Vec, pub table_references: Vec, /// Curr collation sequence. Bool indicates whether it was set by a COLLATE expr collation: Option<(CollationSeq, bool)>, /// Current parsing nesting level nested_level: usize, init_label: BranchOffset, start_offset: BranchOffset, } #[derive(Debug, Clone)] pub enum CursorType { BTreeTable(Rc), BTreeIndex(Arc), Pseudo(Rc), Sorter, VirtualTable(Rc), } impl CursorType { pub fn is_index(&self) -> bool { matches!(self, CursorType::BTreeIndex(_)) } } #[derive(Debug, Clone, PartialEq, Eq, Copy)] pub enum QueryMode { Normal, Explain, } impl From for QueryMode { fn from(stmt: ast::Cmd) -> Self { match stmt { ast::Cmd::ExplainQueryPlan(_) | ast::Cmd::Explain(_) => QueryMode::Explain, _ => QueryMode::Normal, } } } pub struct ProgramBuilderOpts { pub query_mode: QueryMode, pub num_cursors: usize, pub approx_num_insns: usize, pub approx_num_labels: usize, } impl ProgramBuilder { pub fn new(opts: ProgramBuilderOpts) -> Self { Self { table_reference_counter: TableRefIdCounter::new(), next_free_register: 1, next_free_cursor_id: 0, insns: Vec::with_capacity(opts.approx_num_insns), cursor_ref: Vec::with_capacity(opts.num_cursors), constant_spans: Vec::new(), label_to_resolved_offset: Vec::with_capacity(opts.approx_num_labels), seekrowid_emitted_bitmask: 0, comments: if opts.query_mode == QueryMode::Explain { Some(Vec::new()) } else { None }, parameters: Parameters::new(), result_columns: Vec::new(), table_references: Vec::new(), collation: None, nested_level: 0, // These labels will be filled when `prologue()` is called init_label: BranchOffset::Placeholder, start_offset: BranchOffset::Placeholder, } } pub fn extend(&mut self, opts: &ProgramBuilderOpts) { self.insns.reserve(opts.approx_num_insns); self.cursor_ref.reserve(opts.num_cursors); self.label_to_resolved_offset .reserve(opts.approx_num_labels); } /// Start a new constant span. The next instruction to be emitted will be the first /// instruction in the span. pub fn constant_span_start(&mut self) -> usize { let span = self.constant_spans.len(); let start = self.insns.len(); self.constant_spans.push((start, usize::MAX)); span } /// End the current constant span. The last instruction that was emitted is the last /// instruction in the span. pub fn constant_span_end(&mut self, span_idx: usize) { let span = &mut self.constant_spans[span_idx]; if span.1 == usize::MAX { span.1 = self.insns.len().saturating_sub(1); } } /// End all constant spans that are currently open. This is used to handle edge cases /// where we think a parent expression is constant, but we decide during the evaluation /// of one of its children that it is not. pub fn constant_span_end_all(&mut self) { for span in self.constant_spans.iter_mut() { if span.1 == usize::MAX { span.1 = self.insns.len().saturating_sub(1); } } } /// Check if there is a constant span that is currently open. pub fn constant_span_is_open(&self) -> bool { self.constant_spans .last() .map_or(false, |(_, end)| *end == usize::MAX) } /// Get the index of the next constant span. /// Used in [crate::translate::expr::translate_expr_no_constant_opt()] to invalidate /// all constant spans after the given index. pub fn constant_spans_next_idx(&self) -> usize { self.constant_spans.len() } /// Invalidate all constant spans after the given index. This is used when we want to /// be sure that constant optimization is never used for translating a given expression. /// See [crate::translate::expr::translate_expr_no_constant_opt()] for more details. pub fn constant_spans_invalidate_after(&mut self, idx: usize) { self.constant_spans.truncate(idx); } pub fn alloc_register(&mut self) -> usize { let reg = self.next_free_register; self.next_free_register += 1; reg } pub fn alloc_registers(&mut self, amount: usize) -> usize { let reg = self.next_free_register; self.next_free_register += amount; reg } pub fn alloc_cursor_id( &mut self, table_identifier: Option, cursor_type: CursorType, ) -> usize { let cursor = self.next_free_cursor_id; self.next_free_cursor_id += 1; self.cursor_ref.push((table_identifier, cursor_type)); assert_eq!(self.cursor_ref.len(), self.next_free_cursor_id); cursor } pub fn emit_insn(&mut self, insn: Insn) { let function = insn.to_function(); self.insns.push((insn, function, self.insns.len())); } pub fn close_cursors(&mut self, cursors: &[CursorID]) { for cursor in cursors { self.emit_insn(Insn::Close { cursor_id: *cursor }); } } pub fn emit_string8(&mut self, value: String, dest: usize) { self.emit_insn(Insn::String8 { value, dest }); } pub fn emit_string8_new_reg(&mut self, value: String) -> usize { let dest = self.alloc_register(); self.emit_insn(Insn::String8 { value, dest }); dest } pub fn emit_int(&mut self, value: i64, dest: usize) { self.emit_insn(Insn::Integer { value, dest }); } pub fn emit_bool(&mut self, value: bool, dest: usize) { self.emit_insn(Insn::Integer { value: if value { 1 } else { 0 }, dest, }); } pub fn emit_null(&mut self, dest: usize, dest_end: Option) { self.emit_insn(Insn::Null { dest, dest_end }); } pub fn emit_result_row(&mut self, start_reg: usize, count: usize) { self.emit_insn(Insn::ResultRow { start_reg, count }); } fn emit_halt(&mut self) { self.emit_insn(Insn::Halt { err_code: 0, description: String::new(), }); } // no users yet, but I want to avoid someone else in the future // just adding parameters to emit_halt! If you use this, remove the // clippy warning please. #[allow(dead_code)] pub fn emit_halt_err(&mut self, err_code: usize, description: String) { self.emit_insn(Insn::Halt { err_code, description, }); } pub fn add_comment(&mut self, insn_index: BranchOffset, comment: &'static str) { if let Some(comments) = &mut self.comments { comments.push((insn_index.to_offset_int(), comment)); } } pub fn mark_last_insn_constant(&mut self) { if self.constant_span_is_open() { // no need to mark this insn as constant as the surrounding parent expression is already constant return; } let prev = self.insns.len().saturating_sub(1); self.constant_spans.push((prev, prev)); } fn emit_constant_insns(&mut self) { // move compile-time constant instructions to the end of the program, where they are executed once after Init jumps to it. // any label_to_resolved_offset that points to an instruction within any moved constant span should be updated to point to the new location. // the instruction reordering can be done by sorting the insns, so that the ordering is: // 1. if insn not in any constant span, it stays where it is // 2. if insn is in a constant span, it is after other insns, except those that are in a later constant span // 3. within a single constant span the order is preserver self.insns.sort_by(|(_, _, index_a), (_, _, index_b)| { let a_span = self .constant_spans .iter() .find(|span| span.0 <= *index_a && span.1 >= *index_a); let b_span = self .constant_spans .iter() .find(|span| span.0 <= *index_b && span.1 >= *index_b); if a_span.is_some() && b_span.is_some() { a_span.unwrap().0.cmp(&b_span.unwrap().0) } else if a_span.is_some() { Ordering::Greater } else if b_span.is_some() { Ordering::Less } else { Ordering::Equal } }); for resolved_offset in self.label_to_resolved_offset.iter_mut() { if let Some((old_offset, target)) = resolved_offset { let new_offset = self .insns .iter() .position(|(_, _, index)| *old_offset == *index as u32) .unwrap() as u32; *resolved_offset = Some((new_offset, *target)); } } // Fix comments to refer to new locations if let Some(comments) = &mut self.comments { for (old_offset, _) in comments.iter_mut() { let new_offset = self .insns .iter() .position(|(_, _, index)| *old_offset == *index as u32) .expect("comment must exist") as u32; *old_offset = new_offset; } } } pub fn offset(&self) -> BranchOffset { BranchOffset::Offset(self.insns.len() as InsnReference) } pub fn allocate_label(&mut self) -> BranchOffset { let label_n = self.label_to_resolved_offset.len(); self.label_to_resolved_offset.push(None); BranchOffset::Label(label_n as u32) } /// Resolve a label to whatever instruction follows the one that was /// last emitted. /// /// Use this when your use case is: "the program should jump to whatever instruction /// follows the one that was previously emitted", and you don't care exactly /// which instruction that is. Examples include "the start of a loop", or /// "after the loop ends". /// /// It is important to handle those cases this way, because the precise /// instruction that follows any given instruction might change due to /// reordering the emitted instructions. #[inline] pub fn preassign_label_to_next_insn(&mut self, label: BranchOffset) { assert!(label.is_label(), "BranchOffset {:?} is not a label", label); self._resolve_label(label, self.offset().sub(1u32), JumpTarget::AfterThisInsn); } /// Resolve a label to exactly the instruction that was last emitted. /// /// Use this when your use case is: "the program should jump to the exact instruction /// that was last emitted", and you don't care WHERE exactly that ends up being /// once the order of the bytecode of the program is finalized. Examples include /// "jump to the Halt instruction", or "jump to the Next instruction of a loop". #[inline] pub fn resolve_label(&mut self, label: BranchOffset, to_offset: BranchOffset) { self._resolve_label(label, to_offset, JumpTarget::ExactlyThisInsn); } fn _resolve_label(&mut self, label: BranchOffset, to_offset: BranchOffset, target: JumpTarget) { assert!(matches!(label, BranchOffset::Label(_))); assert!(matches!(to_offset, BranchOffset::Offset(_))); let BranchOffset::Label(label_number) = label else { unreachable!("Label is not a label"); }; self.label_to_resolved_offset[label_number as usize] = Some((to_offset.to_offset_int(), target)); } /// Resolve unresolved labels to a specific offset in the instruction list. /// /// This function scans all instructions and resolves any labels to their corresponding offsets. /// It ensures that all labels are resolved correctly and updates the target program counter (PC) /// of each instruction that references a label. pub fn resolve_labels(&mut self) { let resolve = |pc: &mut BranchOffset, insn_name: &str| { if let BranchOffset::Label(label) = pc { let Some(Some((to_offset, target))) = self.label_to_resolved_offset.get(*label as usize) else { panic!( "Reference to undefined or unresolved label in {}: {}", insn_name, label ); }; *pc = BranchOffset::Offset( to_offset + if *target == JumpTarget::ExactlyThisInsn { 0 } else { 1 }, ); } }; for (insn, _, _) in self.insns.iter_mut() { match insn { Insn::Init { target_pc } => { resolve(target_pc, "Init"); } Insn::Eq { lhs: _lhs, rhs: _rhs, target_pc, .. } => { resolve(target_pc, "Eq"); } Insn::Ne { lhs: _lhs, rhs: _rhs, target_pc, .. } => { resolve(target_pc, "Ne"); } Insn::Lt { lhs: _lhs, rhs: _rhs, target_pc, .. } => { resolve(target_pc, "Lt"); } Insn::Le { lhs: _lhs, rhs: _rhs, target_pc, .. } => { resolve(target_pc, "Le"); } Insn::Gt { lhs: _lhs, rhs: _rhs, target_pc, .. } => { resolve(target_pc, "Gt"); } Insn::Ge { lhs: _lhs, rhs: _rhs, target_pc, .. } => { resolve(target_pc, "Ge"); } Insn::If { reg: _reg, target_pc, jump_if_null: _, } => { resolve(target_pc, "If"); } Insn::IfNot { reg: _reg, target_pc, jump_if_null: _, } => { resolve(target_pc, "IfNot"); } Insn::Rewind { pc_if_empty, .. } => { resolve(pc_if_empty, "Rewind"); } Insn::Last { pc_if_empty, .. } => { resolve(pc_if_empty, "Last"); } Insn::Goto { target_pc } => { resolve(target_pc, "Goto"); } Insn::DecrJumpZero { reg: _reg, target_pc, } => { resolve(target_pc, "DecrJumpZero"); } Insn::SorterNext { cursor_id: _cursor_id, pc_if_next, } => { resolve(pc_if_next, "SorterNext"); } Insn::SorterSort { pc_if_empty, .. } => { resolve(pc_if_empty, "SorterSort"); } Insn::NotNull { reg: _reg, target_pc, } => { resolve(target_pc, "NotNull"); } Insn::IfPos { target_pc, .. } => { resolve(target_pc, "IfPos"); } Insn::Next { pc_if_next, .. } => { resolve(pc_if_next, "Next"); } Insn::Once { target_pc_when_reentered, .. } => { resolve(target_pc_when_reentered, "Once"); } Insn::Prev { pc_if_prev, .. } => { resolve(pc_if_prev, "Prev"); } Insn::InitCoroutine { yield_reg: _, jump_on_definition, start_offset, } => { resolve(jump_on_definition, "InitCoroutine"); resolve(start_offset, "InitCoroutine"); } Insn::NotExists { cursor: _, rowid_reg: _, target_pc, } => { resolve(target_pc, "NotExists"); } Insn::Yield { yield_reg: _, end_offset, } => { resolve(end_offset, "Yield"); } Insn::SeekRowid { target_pc, .. } => { resolve(target_pc, "SeekRowid"); } Insn::Gosub { target_pc, .. } => { resolve(target_pc, "Gosub"); } Insn::Jump { target_pc_eq, target_pc_lt, target_pc_gt, } => { resolve(target_pc_eq, "Jump"); resolve(target_pc_lt, "Jump"); resolve(target_pc_gt, "Jump"); } Insn::SeekGE { target_pc, .. } => { resolve(target_pc, "SeekGE"); } Insn::SeekGT { target_pc, .. } => { resolve(target_pc, "SeekGT"); } Insn::SeekLE { target_pc, .. } => { resolve(target_pc, "SeekLE"); } Insn::SeekLT { target_pc, .. } => { resolve(target_pc, "SeekLT"); } Insn::IdxGE { target_pc, .. } => { resolve(target_pc, "IdxGE"); } Insn::IdxLE { target_pc, .. } => { resolve(target_pc, "IdxLE"); } Insn::IdxGT { target_pc, .. } => { resolve(target_pc, "IdxGT"); } Insn::IdxLT { target_pc, .. } => { resolve(target_pc, "IdxLT"); } Insn::IsNull { reg: _, target_pc } => { resolve(target_pc, "IsNull"); } Insn::VNext { pc_if_next, .. } => { resolve(pc_if_next, "VNext"); } Insn::VFilter { pc_if_empty, .. } => { resolve(pc_if_empty, "VFilter"); } Insn::NoConflict { target_pc, .. } => { resolve(target_pc, "NoConflict"); } Insn::Found { target_pc, .. } => { resolve(target_pc, "Found"); } Insn::NotFound { target_pc, .. } => { resolve(target_pc, "NotFound"); } _ => {} } } self.label_to_resolved_offset.clear(); } // translate table to cursor id pub fn resolve_cursor_id_safe(&self, table_identifier: &str) -> Option { self.cursor_ref.iter().position(|(t_ident, _)| { t_ident .as_ref() .is_some_and(|ident| ident == table_identifier) }) } pub fn resolve_cursor_id(&self, table_identifier: &str) -> CursorID { self.resolve_cursor_id_safe(table_identifier) .unwrap_or_else(|| panic!("Cursor not found: {}", table_identifier)) } pub fn set_collation(&mut self, c: Option<(CollationSeq, bool)>) { self.collation = c } pub fn curr_collation_ctx(&self) -> Option<(CollationSeq, bool)> { self.collation } pub fn curr_collation(&self) -> Option { self.collation.map(|c| c.0) } pub fn reset_collation(&mut self) { self.collation = None; } #[inline] pub fn incr_nesting(&mut self) { self.nested_level += 1; } #[inline] pub fn decr_nesting(&mut self) { self.nested_level -= 1; } /// Initialize the program with basic setup and return initial metadata and labels pub fn prologue<'a>(&mut self) { if self.nested_level == 0 { self.init_label = self.allocate_label(); self.emit_insn(Insn::Init { target_pc: self.init_label, }); self.start_offset = self.offset(); } } /// Clean up and finalize the program, resolving any remaining labels /// Note that although these are the final instructions, typically an SQLite /// query will jump to the Transaction instruction via init_label. pub fn epilogue(&mut self, txn_mode: TransactionMode) { if self.nested_level == 0 { self.emit_halt(); self.preassign_label_to_next_insn(self.init_label); match txn_mode { TransactionMode::Read => self.emit_insn(Insn::Transaction { write: false }), TransactionMode::Write => self.emit_insn(Insn::Transaction { write: true }), TransactionMode::None => {} } self.emit_constant_insns(); self.emit_insn(Insn::Goto { target_pc: self.start_offset, }); } } /// Checks whether `table` or any of its indices has been opened in the program pub fn is_table_open(&self, table: &Table, schema: &Schema) -> bool { let btree = table.btree(); let vtab = table.virtual_table(); for (insn, ..) in self.insns.iter() { match insn { Insn::OpenRead { cursor_id, root_page, .. } => { if let Some(btree) = &btree { if btree.root_page == *root_page { return true; } } let name = self.cursor_ref[*cursor_id].0.as_ref(); if name.is_none() { continue; } let name = name.unwrap(); let indices = schema.get_indices(name); for index in indices { if index.root_page == *root_page { return true; } } } Insn::OpenWrite { root_page, name, .. } => { let RegisterOrLiteral::Literal(root_page) = root_page else { unreachable!("root page can only be a literal"); }; if let Some(btree) = &btree { if btree.root_page == *root_page { return true; } } let indices = schema.get_indices(name); for index in indices { if index.root_page == *root_page { return true; } } } Insn::VOpen { cursor_id, .. } => { if let Some(vtab) = &vtab { let name = self.cursor_ref[*cursor_id].0.as_ref().unwrap(); if vtab.name == *name { return true; } } } _ => {} } } false } pub fn build( mut self, database_header: Arc>, connection: Weak, change_cnt_on: bool, ) -> Program { self.resolve_labels(); self.parameters.list.dedup(); Program { max_registers: self.next_free_register, insns: self .insns .into_iter() .map(|(insn, function, _)| (insn, function)) .collect(), cursor_ref: self.cursor_ref, database_header, comments: self.comments, connection, parameters: self.parameters, n_change: Cell::new(0), change_cnt_on, result_columns: self.result_columns, table_references: self.table_references, } } }