mirror of
https://github.com/aljazceru/turso.git
synced 2025-12-27 21:14:21 +01:00
361 lines
12 KiB
Rust
361 lines
12 KiB
Rust
// This module contains code for emitting bytecode instructions for SQL query execution.
|
|
// It handles translating high-level SQL operations into low-level bytecode that can be executed by the virtual machine.
|
|
|
|
use std::collections::HashMap;
|
|
|
|
use sqlite3_parser::ast::{self};
|
|
|
|
use crate::function::Func;
|
|
use crate::translate::plan::{DeletePlan, Plan, Search};
|
|
use crate::util::exprs_are_equivalent;
|
|
use crate::vdbe::builder::ProgramBuilder;
|
|
use crate::vdbe::{insn::Insn, BranchOffset};
|
|
use crate::{Result, SymbolTable};
|
|
|
|
use super::aggregation::emit_ungrouped_aggregation;
|
|
use super::group_by::{emit_group_by, init_group_by, GroupByMetadata};
|
|
use super::main_loop::{close_loop, emit_loop, init_loop, open_loop, LeftJoinMetadata, LoopLabels};
|
|
use super::order_by::{emit_order_by, init_order_by, SortMetadata};
|
|
use super::plan::Operation;
|
|
use super::plan::{SelectPlan, TableReference};
|
|
use super::subquery::emit_subqueries;
|
|
|
|
#[derive(Debug)]
|
|
pub struct Resolver<'a> {
|
|
pub symbol_table: &'a SymbolTable,
|
|
pub expr_to_reg_cache: Vec<(&'a ast::Expr, usize)>,
|
|
}
|
|
|
|
impl<'a> Resolver<'a> {
|
|
pub fn new(symbol_table: &'a SymbolTable) -> Self {
|
|
Self {
|
|
symbol_table,
|
|
expr_to_reg_cache: Vec::new(),
|
|
}
|
|
}
|
|
|
|
pub fn resolve_function(&self, func_name: &str, arg_count: usize) -> Option<Func> {
|
|
match Func::resolve_function(func_name, arg_count).ok() {
|
|
Some(func) => Some(func),
|
|
None => self
|
|
.symbol_table
|
|
.resolve_function(func_name, arg_count)
|
|
.map(|arg| Func::External(arg.clone())),
|
|
}
|
|
}
|
|
|
|
pub fn resolve_cached_expr_reg(&self, expr: &ast::Expr) -> Option<usize> {
|
|
self.expr_to_reg_cache
|
|
.iter()
|
|
.find(|(e, _)| exprs_are_equivalent(expr, e))
|
|
.map(|(_, reg)| *reg)
|
|
}
|
|
}
|
|
|
|
/// The TranslateCtx struct holds various information and labels used during bytecode generation.
|
|
/// It is used for maintaining state and control flow during the bytecode
|
|
/// generation process.
|
|
#[derive(Debug)]
|
|
pub struct TranslateCtx<'a> {
|
|
// A typical query plan is a nested loop. Each loop has its own LoopLabels (see the definition of LoopLabels for more details)
|
|
pub labels_main_loop: Vec<LoopLabels>,
|
|
// label for the instruction that jumps to the next phase of the query after the main loop
|
|
// we don't know ahead of time what that is (GROUP BY, ORDER BY, etc.)
|
|
pub label_main_loop_end: Option<BranchOffset>,
|
|
// First register of the aggregation results
|
|
pub reg_agg_start: Option<usize>,
|
|
// First register of the result columns of the query
|
|
pub reg_result_cols_start: Option<usize>,
|
|
// The register holding the limit value, if any.
|
|
pub reg_limit: Option<usize>,
|
|
// The register holding the offset value, if any.
|
|
pub reg_offset: Option<usize>,
|
|
// The register holding the limit+offset value, if any.
|
|
pub reg_limit_offset_sum: Option<usize>,
|
|
// metadata for the group by operator
|
|
pub meta_group_by: Option<GroupByMetadata>,
|
|
// metadata for the order by operator
|
|
pub meta_sort: Option<SortMetadata>,
|
|
// mapping between Join operator id and associated metadata (for left joins only)
|
|
pub meta_left_joins: HashMap<usize, LeftJoinMetadata>,
|
|
// We need to emit result columns in the order they are present in the SELECT, but they may not be in the same order in the ORDER BY sorter.
|
|
// This vector holds the indexes of the result columns in the ORDER BY sorter.
|
|
pub result_column_indexes_in_orderby_sorter: HashMap<usize, usize>,
|
|
// We might skip adding a SELECT result column into the ORDER BY sorter if it is an exact match in the ORDER BY keys.
|
|
// This vector holds the indexes of the result columns that we need to skip.
|
|
pub result_columns_to_skip_in_orderby_sorter: Option<Vec<usize>>,
|
|
pub resolver: Resolver<'a>,
|
|
}
|
|
|
|
/// Used to distinguish database operations
|
|
#[allow(clippy::upper_case_acronyms, dead_code)]
|
|
#[derive(Debug, Clone)]
|
|
pub enum OperationMode {
|
|
SELECT,
|
|
INSERT,
|
|
UPDATE,
|
|
DELETE,
|
|
}
|
|
|
|
/// Initialize the program with basic setup and return initial metadata and labels
|
|
fn prologue<'a>(
|
|
program: &mut ProgramBuilder,
|
|
syms: &'a SymbolTable,
|
|
) -> Result<(TranslateCtx<'a>, BranchOffset, BranchOffset)> {
|
|
let init_label = program.allocate_label();
|
|
|
|
program.emit_insn(Insn::Init {
|
|
target_pc: init_label,
|
|
});
|
|
|
|
let start_offset = program.offset();
|
|
|
|
let t_ctx = TranslateCtx {
|
|
labels_main_loop: Vec::new(),
|
|
label_main_loop_end: None,
|
|
reg_agg_start: None,
|
|
reg_limit: None,
|
|
reg_offset: None,
|
|
reg_limit_offset_sum: None,
|
|
reg_result_cols_start: None,
|
|
meta_group_by: None,
|
|
meta_left_joins: HashMap::new(),
|
|
meta_sort: None,
|
|
result_column_indexes_in_orderby_sorter: HashMap::new(),
|
|
result_columns_to_skip_in_orderby_sorter: None,
|
|
resolver: Resolver::new(syms),
|
|
};
|
|
|
|
Ok((t_ctx, init_label, start_offset))
|
|
}
|
|
|
|
/// Clean up and finalize the program, resolving any remaining labels
|
|
/// Note that although these are the final instructions, typically an SQLite
|
|
/// query will jump to the Transaction instruction via init_label.
|
|
fn epilogue(
|
|
program: &mut ProgramBuilder,
|
|
init_label: BranchOffset,
|
|
start_offset: BranchOffset,
|
|
) -> Result<()> {
|
|
program.emit_insn(Insn::Halt {
|
|
err_code: 0,
|
|
description: String::new(),
|
|
});
|
|
|
|
program.resolve_label(init_label, program.offset());
|
|
program.emit_insn(Insn::Transaction { write: false });
|
|
|
|
program.emit_constant_insns();
|
|
program.emit_insn(Insn::Goto {
|
|
target_pc: start_offset,
|
|
});
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Main entry point for emitting bytecode for a SQL query
|
|
/// Takes a query plan and generates the corresponding bytecode program
|
|
pub fn emit_program(program: &mut ProgramBuilder, plan: Plan, syms: &SymbolTable) -> Result<()> {
|
|
match plan {
|
|
Plan::Select(plan) => emit_program_for_select(program, plan, syms),
|
|
Plan::Delete(plan) => emit_program_for_delete(program, plan, syms),
|
|
}
|
|
}
|
|
|
|
fn emit_program_for_select(
|
|
program: &mut ProgramBuilder,
|
|
mut plan: SelectPlan,
|
|
syms: &SymbolTable,
|
|
) -> Result<()> {
|
|
let (mut t_ctx, init_label, start_offset) = prologue(program, syms)?;
|
|
|
|
// Trivial exit on LIMIT 0
|
|
if let Some(limit) = plan.limit {
|
|
if limit == 0 {
|
|
epilogue(program, init_label, start_offset)?;
|
|
}
|
|
}
|
|
|
|
// Emit main parts of query
|
|
emit_query(program, &mut plan, &mut t_ctx)?;
|
|
|
|
// Finalize program
|
|
epilogue(program, init_label, start_offset)?;
|
|
program.columns = plan
|
|
.result_columns
|
|
.iter()
|
|
.map(|rc| rc.name.clone())
|
|
.collect::<Vec<_>>();
|
|
Ok(())
|
|
}
|
|
|
|
pub fn emit_query<'a>(
|
|
program: &'a mut ProgramBuilder,
|
|
plan: &'a mut SelectPlan,
|
|
t_ctx: &'a mut TranslateCtx<'a>,
|
|
) -> Result<usize> {
|
|
// Emit subqueries first so the results can be read in the main query loop.
|
|
emit_subqueries(program, t_ctx, &mut plan.table_references)?;
|
|
|
|
if t_ctx.reg_limit.is_none() {
|
|
t_ctx.reg_limit = plan.limit.map(|_| program.alloc_register());
|
|
}
|
|
|
|
if t_ctx.reg_offset.is_none() {
|
|
t_ctx.reg_offset = plan.offset.map(|_| program.alloc_register());
|
|
}
|
|
|
|
if t_ctx.reg_limit_offset_sum.is_none() {
|
|
t_ctx.reg_limit_offset_sum = plan.offset.map(|_| program.alloc_register());
|
|
}
|
|
|
|
// No rows will be read from source table loops if there is a constant false condition eg. WHERE 0
|
|
// however an aggregation might still happen,
|
|
// e.g. SELECT COUNT(*) WHERE 0 returns a row with 0, not an empty result set
|
|
let after_main_loop_label = program.allocate_label();
|
|
t_ctx.label_main_loop_end = Some(after_main_loop_label);
|
|
if plan.contains_constant_false_condition {
|
|
program.emit_insn(Insn::Goto {
|
|
target_pc: after_main_loop_label,
|
|
});
|
|
}
|
|
|
|
// Allocate registers for result columns
|
|
t_ctx.reg_result_cols_start = Some(program.alloc_registers(plan.result_columns.len()));
|
|
|
|
// Initialize cursors and other resources needed for query execution
|
|
if let Some(ref mut order_by) = plan.order_by {
|
|
init_order_by(program, t_ctx, order_by)?;
|
|
}
|
|
|
|
if let Some(ref mut group_by) = plan.group_by {
|
|
init_group_by(program, t_ctx, group_by, &plan.aggregates)?;
|
|
}
|
|
init_loop(
|
|
program,
|
|
t_ctx,
|
|
&plan.table_references,
|
|
&OperationMode::SELECT,
|
|
)?;
|
|
|
|
// Set up main query execution loop
|
|
open_loop(program, t_ctx, &plan.table_references, &plan.where_clause)?;
|
|
|
|
// Process result columns and expressions in the inner loop
|
|
emit_loop(program, t_ctx, plan)?;
|
|
|
|
// Clean up and close the main execution loop
|
|
close_loop(program, t_ctx, &plan.table_references)?;
|
|
|
|
program.resolve_label(after_main_loop_label, program.offset());
|
|
|
|
let mut order_by_necessary = plan.order_by.is_some() && !plan.contains_constant_false_condition;
|
|
let order_by = plan.order_by.as_ref();
|
|
// Handle GROUP BY and aggregation processing
|
|
if plan.group_by.is_some() {
|
|
emit_group_by(program, t_ctx, plan)?;
|
|
} else if !plan.aggregates.is_empty() {
|
|
// Handle aggregation without GROUP BY
|
|
emit_ungrouped_aggregation(program, t_ctx, plan)?;
|
|
// Single row result for aggregates without GROUP BY, so ORDER BY not needed
|
|
order_by_necessary = false;
|
|
}
|
|
|
|
// Process ORDER BY results if needed
|
|
if order_by.is_some() && order_by_necessary {
|
|
emit_order_by(program, t_ctx, plan)?;
|
|
}
|
|
|
|
Ok(t_ctx.reg_result_cols_start.unwrap())
|
|
}
|
|
|
|
fn emit_program_for_delete(
|
|
program: &mut ProgramBuilder,
|
|
mut plan: DeletePlan,
|
|
syms: &SymbolTable,
|
|
) -> Result<()> {
|
|
let (mut t_ctx, init_label, start_offset) = prologue(program, syms)?;
|
|
|
|
// No rows will be read from source table loops if there is a constant false condition eg. WHERE 0
|
|
let after_main_loop_label = program.allocate_label();
|
|
if plan.contains_constant_false_condition {
|
|
program.emit_insn(Insn::Goto {
|
|
target_pc: after_main_loop_label,
|
|
});
|
|
}
|
|
|
|
// Initialize cursors and other resources needed for query execution
|
|
init_loop(
|
|
program,
|
|
&mut t_ctx,
|
|
&plan.table_references,
|
|
&OperationMode::DELETE,
|
|
)?;
|
|
|
|
// Set up main query execution loop
|
|
open_loop(
|
|
program,
|
|
&mut t_ctx,
|
|
&mut plan.table_references,
|
|
&plan.where_clause,
|
|
)?;
|
|
|
|
emit_delete_insns(program, &mut t_ctx, &plan.table_references, &plan.limit)?;
|
|
|
|
// Clean up and close the main execution loop
|
|
close_loop(program, &mut t_ctx, &plan.table_references)?;
|
|
|
|
program.resolve_label(after_main_loop_label, program.offset());
|
|
|
|
// Finalize program
|
|
epilogue(program, init_label, start_offset)?;
|
|
program.columns = plan
|
|
.result_columns
|
|
.iter()
|
|
.map(|rc| rc.name.clone())
|
|
.collect::<Vec<_>>();
|
|
Ok(())
|
|
}
|
|
|
|
fn emit_delete_insns(
|
|
program: &mut ProgramBuilder,
|
|
t_ctx: &mut TranslateCtx,
|
|
table_references: &[TableReference],
|
|
limit: &Option<isize>,
|
|
) -> Result<()> {
|
|
let table_reference = table_references.first().unwrap();
|
|
let cursor_id = match &table_reference.op {
|
|
Operation::Scan { .. } => program.resolve_cursor_id(&table_reference.identifier),
|
|
Operation::Search(search) => match search {
|
|
Search::RowidEq { .. } | Search::RowidSearch { .. } => {
|
|
program.resolve_cursor_id(&table_reference.identifier)
|
|
}
|
|
Search::IndexSearch { index, .. } => program.resolve_cursor_id(&index.name),
|
|
},
|
|
_ => return Ok(()),
|
|
};
|
|
|
|
// Emit the instructions to delete the row
|
|
let key_reg = program.alloc_register();
|
|
program.emit_insn(Insn::RowId {
|
|
cursor_id,
|
|
dest: key_reg,
|
|
});
|
|
program.emit_insn(Insn::DeleteAsync { cursor_id });
|
|
program.emit_insn(Insn::DeleteAwait { cursor_id });
|
|
if let Some(limit) = limit {
|
|
let limit_reg = program.alloc_register();
|
|
program.emit_insn(Insn::Integer {
|
|
value: *limit as i64,
|
|
dest: limit_reg,
|
|
});
|
|
program.mark_last_insn_constant();
|
|
program.emit_insn(Insn::DecrJumpZero {
|
|
reg: limit_reg,
|
|
target_pc: t_ctx.label_main_loop_end.unwrap(),
|
|
})
|
|
}
|
|
|
|
Ok(())
|
|
}
|