diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 41d70e9f7..9ea943a8f 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -6,7 +6,7 @@ use crate::{ schema::{Schema, Table, Type}, translate::select::{ColumnInfo, Select, SrcTable}, util::normalize_ident, - vdbe::{BranchOffset, Insn, ProgramBuilder}, + vdbe::{BranchOffset, Insn, builder::ProgramBuilder}, }; pub fn build_select<'a>(schema: &Schema, select: &'a ast::Select) -> Result> { diff --git a/core/translate/mod.rs b/core/translate/mod.rs index 074e01ef5..68fa38d04 100644 --- a/core/translate/mod.rs +++ b/core/translate/mod.rs @@ -15,7 +15,7 @@ use crate::translate::where_clause::{ }; use crate::types::{OwnedRecord, OwnedValue}; use crate::util::normalize_ident; -use crate::vdbe::{BranchOffset, Insn, Program, ProgramBuilder}; +use crate::vdbe::{BranchOffset, Insn, Program, builder::ProgramBuilder}; use anyhow::Result; use expr::{build_select, maybe_apply_affinity, translate_expr}; use sqlite3_parser::ast::{self, Literal}; diff --git a/core/translate/where_clause.rs b/core/translate/where_clause.rs index 30e29ca25..062d0f124 100644 --- a/core/translate/where_clause.rs +++ b/core/translate/where_clause.rs @@ -5,7 +5,7 @@ use crate::{ translate::expr::{resolve_ident_qualified, resolve_ident_table, translate_expr}, function::SingleRowFunc, translate::select::Select, - vdbe::{BranchOffset, Insn, ProgramBuilder}, + vdbe::{BranchOffset, Insn, builder::ProgramBuilder}, }; const HARDCODED_CURSOR_LEFT_TABLE: usize = 0; diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs new file mode 100644 index 000000000..9305b65da --- /dev/null +++ b/core/vdbe/builder.rs @@ -0,0 +1,295 @@ +use super::{BranchOffset, CursorID, Insn, InsnReference, Program, Table}; + +pub struct ProgramBuilder { + next_free_register: usize, + next_free_label: BranchOffset, + next_free_cursor_id: usize, + insns: Vec, + // for temporarily storing instructions that will be put after Transaction opcode + constant_insns: Vec, + // Each label has a list of InsnReferences that must + // be resolved. Lists are indexed by: label.abs() - 1 + unresolved_labels: Vec>, + next_insn_label: Option, + // Cursors that are referenced by the program. Indexed by CursorID. + pub cursor_ref: Vec<(Option, Option)>, + // List of deferred label resolutions. Each entry is a pair of (label, insn_reference). + deferred_label_resolutions: Vec<(BranchOffset, InsnReference)>, +} + +impl ProgramBuilder { + pub fn new() -> Self { + Self { + next_free_register: 1, + next_free_label: 0, + next_free_cursor_id: 0, + insns: Vec::new(), + unresolved_labels: Vec::new(), + next_insn_label: None, + cursor_ref: Vec::new(), + constant_insns: Vec::new(), + deferred_label_resolutions: Vec::new(), + } + } + + pub fn alloc_register(&mut self) -> usize { + let reg = self.next_free_register; + self.next_free_register += 1; + reg + } + + pub fn alloc_registers(&mut self, amount: usize) -> usize { + let reg = self.next_free_register; + self.next_free_register += amount; + reg + } + + pub fn next_free_register(&self) -> usize { + self.next_free_register + } + + pub fn alloc_cursor_id( + &mut self, + table_identifier: Option, + table: Option
, + ) -> usize { + let cursor = self.next_free_cursor_id; + self.next_free_cursor_id += 1; + self.cursor_ref.push((table_identifier, table)); + assert!(self.cursor_ref.len() == self.next_free_cursor_id); + cursor + } + + pub fn emit_insn(&mut self, insn: Insn) { + self.insns.push(insn); + if let Some(label) = self.next_insn_label { + self.next_insn_label = None; + self.resolve_label(label, (self.insns.len() - 1) as BranchOffset); + } + } + + // Emit an instruction that will be put at the end of the program (after Transaction statement). + // This is useful for instructions that otherwise will be unnecessarily repeated in a loop. + // Example: In `SELECT * from users where name='John'`, it is unnecessary to set r[1]='John' as we SCAN users table. + // We could simply set it once before the SCAN started. + pub fn mark_last_insn_constant(&mut self) { + self.constant_insns.push(self.insns.pop().unwrap()); + } + + pub fn emit_constant_insns(&mut self) { + self.insns.append(&mut self.constant_insns); + } + + pub fn emit_insn_with_label_dependency(&mut self, insn: Insn, label: BranchOffset) { + self.insns.push(insn); + self.add_label_dependency(label, (self.insns.len() - 1) as BranchOffset); + } + + pub fn offset(&self) -> BranchOffset { + self.insns.len() as BranchOffset + } + + pub fn allocate_label(&mut self) -> BranchOffset { + self.next_free_label -= 1; + self.unresolved_labels.push(Vec::new()); + self.next_free_label + } + + // Effectively a GOTO without the need to emit an explicit GOTO instruction. + // Useful when you know you need to jump to "the next part", but the exact offset is unknowable + // at the time of emitting the instruction. + pub fn preassign_label_to_next_insn(&mut self, label: BranchOffset) { + self.next_insn_label = Some(label); + } + + fn label_to_index(&self, label: BranchOffset) -> usize { + (label.abs() - 1) as usize + } + + pub fn add_label_dependency(&mut self, label: BranchOffset, insn_reference: BranchOffset) { + assert!(insn_reference >= 0); + assert!(label < 0); + let label_index = self.label_to_index(label); + assert!(label_index < self.unresolved_labels.len()); + let insn_reference = insn_reference as InsnReference; + let label_references = &mut self.unresolved_labels[label_index]; + label_references.push(insn_reference); + } + + pub fn defer_label_resolution(&mut self, label: BranchOffset, insn_reference: InsnReference) { + self.deferred_label_resolutions + .push((label, insn_reference)); + } + + pub fn resolve_label(&mut self, label: BranchOffset, to_offset: BranchOffset) { + assert!(label < 0); + assert!(to_offset >= 0); + let label_index = self.label_to_index(label); + assert!( + label_index < self.unresolved_labels.len(), + "Forbidden resolve of an unexistent label!" + ); + + let label_references = &mut self.unresolved_labels[label_index]; + for insn_reference in label_references.iter() { + let insn = &mut self.insns[*insn_reference]; + match insn { + Insn::Init { target_pc } => { + assert!(*target_pc < 0); + *target_pc = to_offset; + } + Insn::Eq { + lhs: _lhs, + rhs: _rhs, + target_pc, + } => { + assert!(*target_pc < 0); + *target_pc = to_offset; + } + Insn::Ne { + lhs: _lhs, + rhs: _rhs, + target_pc, + } => { + assert!(*target_pc < 0); + *target_pc = to_offset; + } + Insn::Lt { + lhs: _lhs, + rhs: _rhs, + target_pc, + } => { + assert!(*target_pc < 0); + *target_pc = to_offset; + } + Insn::Le { + lhs: _lhs, + rhs: _rhs, + target_pc, + } => { + assert!(*target_pc < 0); + *target_pc = to_offset; + } + Insn::Gt { + lhs: _lhs, + rhs: _rhs, + target_pc, + } => { + assert!(*target_pc < 0); + *target_pc = to_offset; + } + Insn::Ge { + lhs: _lhs, + rhs: _rhs, + target_pc, + } => { + assert!(*target_pc < 0); + *target_pc = to_offset; + } + Insn::If { + reg: _reg, + target_pc, + null_reg: _, + } => { + assert!(*target_pc < 0); + *target_pc = to_offset; + } + Insn::IfNot { + reg: _reg, + target_pc, + null_reg: _, + } => { + assert!(*target_pc < 0); + *target_pc = to_offset; + } + Insn::RewindAwait { + cursor_id: _cursor_id, + pc_if_empty, + } => { + assert!(*pc_if_empty < 0); + *pc_if_empty = to_offset; + } + Insn::Goto { target_pc } => { + assert!(*target_pc < 0); + *target_pc = to_offset; + } + Insn::DecrJumpZero { + reg: _reg, + target_pc, + } => { + assert!(*target_pc < 0); + *target_pc = to_offset; + } + Insn::SorterNext { + cursor_id: _cursor_id, + pc_if_next, + } => { + assert!(*pc_if_next < 0); + *pc_if_next = to_offset; + } + Insn::SorterSort { pc_if_empty, .. } => { + assert!(*pc_if_empty < 0); + *pc_if_empty = to_offset; + } + Insn::NotNull { + reg: _reg, + target_pc, + } => { + assert!(*target_pc < 0); + *target_pc = to_offset; + } + Insn::IfPos { target_pc, .. } => { + assert!(*target_pc < 0); + *target_pc = to_offset; + } + _ => { + todo!("missing resolve_label for {:?}", insn); + } + } + } + label_references.clear(); + } + + // translate table to cursor id + pub fn resolve_cursor_id( + &self, + table_identifier: &str, + cursor_hint: Option, + ) -> CursorID { + if let Some(cursor_hint) = cursor_hint { + return cursor_hint; + } + self.cursor_ref + .iter() + .position(|(t_ident, _)| { + t_ident + .as_ref() + .is_some_and(|ident| ident == table_identifier) + }) + .unwrap() + } + + pub fn resolve_deferred_labels(&mut self) { + for i in 0..self.deferred_label_resolutions.len() { + let (label, insn_reference) = self.deferred_label_resolutions[i]; + self.resolve_label(label, insn_reference as BranchOffset); + } + self.deferred_label_resolutions.clear(); + } + + pub fn build(self) -> Program { + assert!( + self.deferred_label_resolutions.is_empty(), + "deferred_label_resolutions is not empty when build() is called, did you forget to call resolve_deferred_labels()?" + ); + assert!( + self.constant_insns.is_empty(), + "constant_insns is not empty when build() is called, did you forget to call emit_constant_insns()?" + ); + Program { + max_registers: self.next_free_register, + insns: self.insns, + cursor_ref: self.cursor_ref, + } + } +} \ No newline at end of file diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 68ddb5fff..68ee2e256 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -1,3 +1,5 @@ +pub mod builder; + use crate::btree::BTreeCursor; use crate::function::{AggFunc, SingleRowFunc}; use crate::pager::Pager; @@ -259,300 +261,6 @@ pub enum Insn { // Index of insn in list of insns type InsnReference = usize; -pub struct ProgramBuilder { - next_free_register: usize, - next_free_label: BranchOffset, - next_free_cursor_id: usize, - insns: Vec, - // for temporarily storing instructions that will be put after Transaction opcode - constant_insns: Vec, - // Each label has a list of InsnReferences that must - // be resolved. Lists are indexed by: label.abs() - 1 - unresolved_labels: Vec>, - next_insn_label: Option, - // Cursors that are referenced by the program. Indexed by CursorID. - pub cursor_ref: Vec<(Option, Option
)>, - // List of deferred label resolutions. Each entry is a pair of (label, insn_reference). - deferred_label_resolutions: Vec<(BranchOffset, InsnReference)>, -} - -impl ProgramBuilder { - pub fn new() -> Self { - Self { - next_free_register: 1, - next_free_label: 0, - next_free_cursor_id: 0, - insns: Vec::new(), - unresolved_labels: Vec::new(), - next_insn_label: None, - cursor_ref: Vec::new(), - constant_insns: Vec::new(), - deferred_label_resolutions: Vec::new(), - } - } - - pub fn alloc_register(&mut self) -> usize { - let reg = self.next_free_register; - self.next_free_register += 1; - reg - } - - pub fn alloc_registers(&mut self, amount: usize) -> usize { - let reg = self.next_free_register; - self.next_free_register += amount; - reg - } - - pub fn next_free_register(&self) -> usize { - self.next_free_register - } - - pub fn alloc_cursor_id( - &mut self, - table_identifier: Option, - table: Option
, - ) -> usize { - let cursor = self.next_free_cursor_id; - self.next_free_cursor_id += 1; - self.cursor_ref.push((table_identifier, table)); - assert!(self.cursor_ref.len() == self.next_free_cursor_id); - cursor - } - - pub fn emit_insn(&mut self, insn: Insn) { - self.insns.push(insn); - if let Some(label) = self.next_insn_label { - self.next_insn_label = None; - self.resolve_label(label, (self.insns.len() - 1) as BranchOffset); - } - } - - // Emit an instruction that will be put at the end of the program (after Transaction statement). - // This is useful for instructions that otherwise will be unnecessarily repeated in a loop. - // Example: In `SELECT * from users where name='John'`, it is unnecessary to set r[1]='John' as we SCAN users table. - // We could simply set it once before the SCAN started. - pub fn mark_last_insn_constant(&mut self) { - self.constant_insns.push(self.insns.pop().unwrap()); - } - - pub fn emit_constant_insns(&mut self) { - self.insns.append(&mut self.constant_insns); - } - - pub fn emit_insn_with_label_dependency(&mut self, insn: Insn, label: BranchOffset) { - self.insns.push(insn); - self.add_label_dependency(label, (self.insns.len() - 1) as BranchOffset); - } - - pub fn offset(&self) -> BranchOffset { - self.insns.len() as BranchOffset - } - - pub fn allocate_label(&mut self) -> BranchOffset { - self.next_free_label -= 1; - self.unresolved_labels.push(Vec::new()); - self.next_free_label - } - - // Effectively a GOTO without the need to emit an explicit GOTO instruction. - // Useful when you know you need to jump to "the next part", but the exact offset is unknowable - // at the time of emitting the instruction. - pub fn preassign_label_to_next_insn(&mut self, label: BranchOffset) { - self.next_insn_label = Some(label); - } - - fn label_to_index(&self, label: BranchOffset) -> usize { - (label.abs() - 1) as usize - } - - pub fn add_label_dependency(&mut self, label: BranchOffset, insn_reference: BranchOffset) { - assert!(insn_reference >= 0); - assert!(label < 0); - let label_index = self.label_to_index(label); - assert!(label_index < self.unresolved_labels.len()); - let insn_reference = insn_reference as InsnReference; - let label_references = &mut self.unresolved_labels[label_index]; - label_references.push(insn_reference); - } - - pub fn defer_label_resolution(&mut self, label: BranchOffset, insn_reference: InsnReference) { - self.deferred_label_resolutions - .push((label, insn_reference)); - } - - pub fn resolve_label(&mut self, label: BranchOffset, to_offset: BranchOffset) { - assert!(label < 0); - assert!(to_offset >= 0); - let label_index = self.label_to_index(label); - assert!( - label_index < self.unresolved_labels.len(), - "Forbidden resolve of an unexistent label!" - ); - - let label_references = &mut self.unresolved_labels[label_index]; - for insn_reference in label_references.iter() { - let insn = &mut self.insns[*insn_reference]; - match insn { - Insn::Init { target_pc } => { - assert!(*target_pc < 0); - *target_pc = to_offset; - } - Insn::Eq { - lhs: _lhs, - rhs: _rhs, - target_pc, - } => { - assert!(*target_pc < 0); - *target_pc = to_offset; - } - Insn::Ne { - lhs: _lhs, - rhs: _rhs, - target_pc, - } => { - assert!(*target_pc < 0); - *target_pc = to_offset; - } - Insn::Lt { - lhs: _lhs, - rhs: _rhs, - target_pc, - } => { - assert!(*target_pc < 0); - *target_pc = to_offset; - } - Insn::Le { - lhs: _lhs, - rhs: _rhs, - target_pc, - } => { - assert!(*target_pc < 0); - *target_pc = to_offset; - } - Insn::Gt { - lhs: _lhs, - rhs: _rhs, - target_pc, - } => { - assert!(*target_pc < 0); - *target_pc = to_offset; - } - Insn::Ge { - lhs: _lhs, - rhs: _rhs, - target_pc, - } => { - assert!(*target_pc < 0); - *target_pc = to_offset; - } - Insn::If { - reg: _reg, - target_pc, - null_reg: _, - } => { - assert!(*target_pc < 0); - *target_pc = to_offset; - } - Insn::IfNot { - reg: _reg, - target_pc, - null_reg: _, - } => { - assert!(*target_pc < 0); - *target_pc = to_offset; - } - Insn::RewindAwait { - cursor_id: _cursor_id, - pc_if_empty, - } => { - assert!(*pc_if_empty < 0); - *pc_if_empty = to_offset; - } - Insn::Goto { target_pc } => { - assert!(*target_pc < 0); - *target_pc = to_offset; - } - Insn::DecrJumpZero { - reg: _reg, - target_pc, - } => { - assert!(*target_pc < 0); - *target_pc = to_offset; - } - Insn::SorterNext { - cursor_id: _cursor_id, - pc_if_next, - } => { - assert!(*pc_if_next < 0); - *pc_if_next = to_offset; - } - Insn::SorterSort { pc_if_empty, .. } => { - assert!(*pc_if_empty < 0); - *pc_if_empty = to_offset; - } - Insn::NotNull { - reg: _reg, - target_pc, - } => { - assert!(*target_pc < 0); - *target_pc = to_offset; - } - Insn::IfPos { target_pc, .. } => { - assert!(*target_pc < 0); - *target_pc = to_offset; - } - _ => { - todo!("missing resolve_label for {:?}", insn); - } - } - } - label_references.clear(); - } - - // translate table to cursor id - pub fn resolve_cursor_id( - &self, - table_identifier: &str, - cursor_hint: Option, - ) -> CursorID { - if let Some(cursor_hint) = cursor_hint { - return cursor_hint; - } - self.cursor_ref - .iter() - .position(|(t_ident, _)| { - t_ident - .as_ref() - .is_some_and(|ident| ident == table_identifier) - }) - .unwrap() - } - - pub fn resolve_deferred_labels(&mut self) { - for i in 0..self.deferred_label_resolutions.len() { - let (label, insn_reference) = self.deferred_label_resolutions[i]; - self.resolve_label(label, insn_reference as BranchOffset); - } - self.deferred_label_resolutions.clear(); - } - - pub fn build(self) -> Program { - assert!( - self.deferred_label_resolutions.is_empty(), - "deferred_label_resolutions is not empty when build() is called, did you forget to call resolve_deferred_labels()?" - ); - assert!( - self.constant_insns.is_empty(), - "constant_insns is not empty when build() is called, did you forget to call emit_constant_insns()?" - ); - Program { - max_registers: self.next_free_register, - insns: self.insns, - cursor_ref: self.cursor_ref, - } - } -} - pub enum StepResult<'a> { Done, IO,