Files
turso/core/translate/emitter.rs

361 lines
12 KiB
Rust

// This module contains code for emitting bytecode instructions for SQL query execution.
// It handles translating high-level SQL operations into low-level bytecode that can be executed by the virtual machine.
use std::collections::HashMap;
use sqlite3_parser::ast::{self};
use crate::function::Func;
use crate::translate::plan::{DeletePlan, Plan, Search};
use crate::util::exprs_are_equivalent;
use crate::vdbe::builder::ProgramBuilder;
use crate::vdbe::{insn::Insn, BranchOffset};
use crate::{Result, SymbolTable};
use super::aggregation::emit_ungrouped_aggregation;
use super::group_by::{emit_group_by, init_group_by, GroupByMetadata};
use super::main_loop::{close_loop, emit_loop, init_loop, open_loop, LeftJoinMetadata, LoopLabels};
use super::order_by::{emit_order_by, init_order_by, SortMetadata};
use super::plan::Operation;
use super::plan::{SelectPlan, TableReference};
use super::subquery::emit_subqueries;
#[derive(Debug)]
pub struct Resolver<'a> {
pub symbol_table: &'a SymbolTable,
pub expr_to_reg_cache: Vec<(&'a ast::Expr, usize)>,
}
impl<'a> Resolver<'a> {
pub fn new(symbol_table: &'a SymbolTable) -> Self {
Self {
symbol_table,
expr_to_reg_cache: Vec::new(),
}
}
pub fn resolve_function(&self, func_name: &str, arg_count: usize) -> Option<Func> {
match Func::resolve_function(func_name, arg_count).ok() {
Some(func) => Some(func),
None => self
.symbol_table
.resolve_function(func_name, arg_count)
.map(|arg| Func::External(arg.clone())),
}
}
pub fn resolve_cached_expr_reg(&self, expr: &ast::Expr) -> Option<usize> {
self.expr_to_reg_cache
.iter()
.find(|(e, _)| exprs_are_equivalent(expr, e))
.map(|(_, reg)| *reg)
}
}
/// The TranslateCtx struct holds various information and labels used during bytecode generation.
/// It is used for maintaining state and control flow during the bytecode
/// generation process.
#[derive(Debug)]
pub struct TranslateCtx<'a> {
// A typical query plan is a nested loop. Each loop has its own LoopLabels (see the definition of LoopLabels for more details)
pub labels_main_loop: Vec<LoopLabels>,
// label for the instruction that jumps to the next phase of the query after the main loop
// we don't know ahead of time what that is (GROUP BY, ORDER BY, etc.)
pub label_main_loop_end: Option<BranchOffset>,
// First register of the aggregation results
pub reg_agg_start: Option<usize>,
// First register of the result columns of the query
pub reg_result_cols_start: Option<usize>,
// The register holding the limit value, if any.
pub reg_limit: Option<usize>,
// The register holding the offset value, if any.
pub reg_offset: Option<usize>,
// The register holding the limit+offset value, if any.
pub reg_limit_offset_sum: Option<usize>,
// metadata for the group by operator
pub meta_group_by: Option<GroupByMetadata>,
// metadata for the order by operator
pub meta_sort: Option<SortMetadata>,
// mapping between Join operator id and associated metadata (for left joins only)
pub meta_left_joins: HashMap<usize, LeftJoinMetadata>,
// We need to emit result columns in the order they are present in the SELECT, but they may not be in the same order in the ORDER BY sorter.
// This vector holds the indexes of the result columns in the ORDER BY sorter.
pub result_column_indexes_in_orderby_sorter: HashMap<usize, usize>,
// We might skip adding a SELECT result column into the ORDER BY sorter if it is an exact match in the ORDER BY keys.
// This vector holds the indexes of the result columns that we need to skip.
pub result_columns_to_skip_in_orderby_sorter: Option<Vec<usize>>,
pub resolver: Resolver<'a>,
}
/// Used to distinguish database operations
#[allow(clippy::upper_case_acronyms, dead_code)]
#[derive(Debug, Clone)]
pub enum OperationMode {
SELECT,
INSERT,
UPDATE,
DELETE,
}
/// Initialize the program with basic setup and return initial metadata and labels
fn prologue<'a>(
program: &mut ProgramBuilder,
syms: &'a SymbolTable,
) -> Result<(TranslateCtx<'a>, BranchOffset, BranchOffset)> {
let init_label = program.allocate_label();
program.emit_insn(Insn::Init {
target_pc: init_label,
});
let start_offset = program.offset();
let t_ctx = TranslateCtx {
labels_main_loop: Vec::new(),
label_main_loop_end: None,
reg_agg_start: None,
reg_limit: None,
reg_offset: None,
reg_limit_offset_sum: None,
reg_result_cols_start: None,
meta_group_by: None,
meta_left_joins: HashMap::new(),
meta_sort: None,
result_column_indexes_in_orderby_sorter: HashMap::new(),
result_columns_to_skip_in_orderby_sorter: None,
resolver: Resolver::new(syms),
};
Ok((t_ctx, init_label, start_offset))
}
/// Clean up and finalize the program, resolving any remaining labels
/// Note that although these are the final instructions, typically an SQLite
/// query will jump to the Transaction instruction via init_label.
fn epilogue(
program: &mut ProgramBuilder,
init_label: BranchOffset,
start_offset: BranchOffset,
) -> Result<()> {
program.emit_insn(Insn::Halt {
err_code: 0,
description: String::new(),
});
program.resolve_label(init_label, program.offset());
program.emit_insn(Insn::Transaction { write: false });
program.emit_constant_insns();
program.emit_insn(Insn::Goto {
target_pc: start_offset,
});
Ok(())
}
/// Main entry point for emitting bytecode for a SQL query
/// Takes a query plan and generates the corresponding bytecode program
pub fn emit_program(program: &mut ProgramBuilder, plan: Plan, syms: &SymbolTable) -> Result<()> {
match plan {
Plan::Select(plan) => emit_program_for_select(program, plan, syms),
Plan::Delete(plan) => emit_program_for_delete(program, plan, syms),
}
}
fn emit_program_for_select(
program: &mut ProgramBuilder,
mut plan: SelectPlan,
syms: &SymbolTable,
) -> Result<()> {
let (mut t_ctx, init_label, start_offset) = prologue(program, syms)?;
// Trivial exit on LIMIT 0
if let Some(limit) = plan.limit {
if limit == 0 {
epilogue(program, init_label, start_offset)?;
}
}
// Emit main parts of query
emit_query(program, &mut plan, &mut t_ctx)?;
// Finalize program
epilogue(program, init_label, start_offset)?;
program.columns = plan
.result_columns
.iter()
.map(|rc| rc.name.clone())
.collect::<Vec<_>>();
Ok(())
}
pub fn emit_query<'a>(
program: &'a mut ProgramBuilder,
plan: &'a mut SelectPlan,
t_ctx: &'a mut TranslateCtx<'a>,
) -> Result<usize> {
// Emit subqueries first so the results can be read in the main query loop.
emit_subqueries(program, t_ctx, &mut plan.table_references)?;
if t_ctx.reg_limit.is_none() {
t_ctx.reg_limit = plan.limit.map(|_| program.alloc_register());
}
if t_ctx.reg_offset.is_none() {
t_ctx.reg_offset = plan.offset.map(|_| program.alloc_register());
}
if t_ctx.reg_limit_offset_sum.is_none() {
t_ctx.reg_limit_offset_sum = plan.offset.map(|_| program.alloc_register());
}
// No rows will be read from source table loops if there is a constant false condition eg. WHERE 0
// however an aggregation might still happen,
// e.g. SELECT COUNT(*) WHERE 0 returns a row with 0, not an empty result set
let after_main_loop_label = program.allocate_label();
t_ctx.label_main_loop_end = Some(after_main_loop_label);
if plan.contains_constant_false_condition {
program.emit_insn(Insn::Goto {
target_pc: after_main_loop_label,
});
}
// Allocate registers for result columns
t_ctx.reg_result_cols_start = Some(program.alloc_registers(plan.result_columns.len()));
// Initialize cursors and other resources needed for query execution
if let Some(ref mut order_by) = plan.order_by {
init_order_by(program, t_ctx, order_by)?;
}
if let Some(ref mut group_by) = plan.group_by {
init_group_by(program, t_ctx, group_by, &plan.aggregates)?;
}
init_loop(
program,
t_ctx,
&plan.table_references,
&OperationMode::SELECT,
)?;
// Set up main query execution loop
open_loop(program, t_ctx, &plan.table_references, &plan.where_clause)?;
// Process result columns and expressions in the inner loop
emit_loop(program, t_ctx, plan)?;
// Clean up and close the main execution loop
close_loop(program, t_ctx, &plan.table_references)?;
program.resolve_label(after_main_loop_label, program.offset());
let mut order_by_necessary = plan.order_by.is_some() && !plan.contains_constant_false_condition;
let order_by = plan.order_by.as_ref();
// Handle GROUP BY and aggregation processing
if plan.group_by.is_some() {
emit_group_by(program, t_ctx, plan)?;
} else if !plan.aggregates.is_empty() {
// Handle aggregation without GROUP BY
emit_ungrouped_aggregation(program, t_ctx, plan)?;
// Single row result for aggregates without GROUP BY, so ORDER BY not needed
order_by_necessary = false;
}
// Process ORDER BY results if needed
if order_by.is_some() && order_by_necessary {
emit_order_by(program, t_ctx, plan)?;
}
Ok(t_ctx.reg_result_cols_start.unwrap())
}
fn emit_program_for_delete(
program: &mut ProgramBuilder,
mut plan: DeletePlan,
syms: &SymbolTable,
) -> Result<()> {
let (mut t_ctx, init_label, start_offset) = prologue(program, syms)?;
// No rows will be read from source table loops if there is a constant false condition eg. WHERE 0
let after_main_loop_label = program.allocate_label();
if plan.contains_constant_false_condition {
program.emit_insn(Insn::Goto {
target_pc: after_main_loop_label,
});
}
// Initialize cursors and other resources needed for query execution
init_loop(
program,
&mut t_ctx,
&plan.table_references,
&OperationMode::DELETE,
)?;
// Set up main query execution loop
open_loop(
program,
&mut t_ctx,
&mut plan.table_references,
&plan.where_clause,
)?;
emit_delete_insns(program, &mut t_ctx, &plan.table_references, &plan.limit)?;
// Clean up and close the main execution loop
close_loop(program, &mut t_ctx, &plan.table_references)?;
program.resolve_label(after_main_loop_label, program.offset());
// Finalize program
epilogue(program, init_label, start_offset)?;
program.columns = plan
.result_columns
.iter()
.map(|rc| rc.name.clone())
.collect::<Vec<_>>();
Ok(())
}
fn emit_delete_insns(
program: &mut ProgramBuilder,
t_ctx: &mut TranslateCtx,
table_references: &[TableReference],
limit: &Option<isize>,
) -> Result<()> {
let table_reference = table_references.first().unwrap();
let cursor_id = match &table_reference.op {
Operation::Scan { .. } => program.resolve_cursor_id(&table_reference.identifier),
Operation::Search(search) => match search {
Search::RowidEq { .. } | Search::RowidSearch { .. } => {
program.resolve_cursor_id(&table_reference.identifier)
}
Search::IndexSearch { index, .. } => program.resolve_cursor_id(&index.name),
},
_ => return Ok(()),
};
// Emit the instructions to delete the row
let key_reg = program.alloc_register();
program.emit_insn(Insn::RowId {
cursor_id,
dest: key_reg,
});
program.emit_insn(Insn::DeleteAsync { cursor_id });
program.emit_insn(Insn::DeleteAwait { cursor_id });
if let Some(limit) = limit {
let limit_reg = program.alloc_register();
program.emit_insn(Insn::Integer {
value: *limit as i64,
dest: limit_reg,
});
program.mark_last_insn_constant();
program.emit_insn(Insn::DecrJumpZero {
reg: limit_reg,
target_pc: t_ctx.label_main_loop_end.unwrap(),
})
}
Ok(())
}