diff --git a/core/function.rs b/core/function.rs index da246e719..5a436465e 100644 --- a/core/function.rs +++ b/core/function.rs @@ -10,6 +10,12 @@ pub struct ExternalFunc { pub func: ExtFunc, } +impl ExternalFunc { + pub fn is_deterministic(&self) -> bool { + false // external functions can be whatever so let's just default to false + } +} + #[derive(Debug, Clone)] pub enum ExtFunc { Scalar(ScalarFunction), @@ -98,6 +104,13 @@ pub enum JsonFunc { JsonQuote, } +#[cfg(feature = "json")] +impl JsonFunc { + pub fn is_deterministic(&self) -> bool { + true + } +} + #[cfg(feature = "json")] impl Display for JsonFunc { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { @@ -145,6 +158,12 @@ pub enum VectorFunc { VectorDistanceCos, } +impl VectorFunc { + pub fn is_deterministic(&self) -> bool { + true + } +} + impl Display for VectorFunc { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let str = match self { @@ -198,6 +217,10 @@ impl PartialEq for AggFunc { } impl AggFunc { + pub fn is_deterministic(&self) -> bool { + false // consider aggregate functions nondeterministic since they depend on the number of rows, not only the input arguments + } + pub fn num_args(&self) -> usize { match self { Self::Avg => 1, @@ -297,6 +320,65 @@ pub enum ScalarFunc { Likelihood, } +impl ScalarFunc { + pub fn is_deterministic(&self) -> bool { + match self { + ScalarFunc::Cast => true, + ScalarFunc::Changes => false, // depends on DB state + ScalarFunc::Char => true, + ScalarFunc::Coalesce => true, + ScalarFunc::Concat => true, + ScalarFunc::ConcatWs => true, + ScalarFunc::Glob => true, + ScalarFunc::IfNull => true, + ScalarFunc::Iif => true, + ScalarFunc::Instr => true, + ScalarFunc::Like => true, + ScalarFunc::Abs => true, + ScalarFunc::Upper => true, + ScalarFunc::Lower => true, + ScalarFunc::Random => false, // duh + ScalarFunc::RandomBlob => false, // duh + ScalarFunc::Trim => true, + ScalarFunc::LTrim => true, + ScalarFunc::RTrim => true, + ScalarFunc::Round => true, + ScalarFunc::Length => true, + ScalarFunc::OctetLength => true, + ScalarFunc::Min => true, + ScalarFunc::Max => true, + ScalarFunc::Nullif => true, + ScalarFunc::Sign => true, + ScalarFunc::Substr => true, + ScalarFunc::Substring => true, + ScalarFunc::Soundex => true, + ScalarFunc::Date => false, + ScalarFunc::Time => false, + ScalarFunc::TotalChanges => false, + ScalarFunc::DateTime => false, + ScalarFunc::Typeof => true, + ScalarFunc::Unicode => true, + ScalarFunc::Quote => true, + ScalarFunc::SqliteVersion => true, + ScalarFunc::SqliteSourceId => true, + ScalarFunc::UnixEpoch => false, + ScalarFunc::JulianDay => false, + ScalarFunc::Hex => true, + ScalarFunc::Unhex => true, + ScalarFunc::ZeroBlob => true, + ScalarFunc::LastInsertRowid => false, + ScalarFunc::Replace => true, + #[cfg(feature = "fs")] + ScalarFunc::LoadExtension => true, + ScalarFunc::StrfTime => false, + ScalarFunc::Printf => false, + ScalarFunc::Likely => true, + ScalarFunc::TimeDiff => false, + ScalarFunc::Likelihood => true, + } + } +} + impl Display for ScalarFunc { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let str = match self { @@ -398,6 +480,9 @@ pub enum MathFuncArity { } impl MathFunc { + pub fn is_deterministic(&self) -> bool { + true + } pub fn arity(&self) -> MathFuncArity { match self { Self::Pi => MathFuncArity::Nullary, @@ -501,6 +586,17 @@ pub struct FuncCtx { } impl Func { + pub fn is_deterministic(&self) -> bool { + match self { + Self::Agg(agg_func) => agg_func.is_deterministic(), + Self::Scalar(scalar_func) => scalar_func.is_deterministic(), + Self::Math(math_func) => math_func.is_deterministic(), + Self::Vector(vector_func) => vector_func.is_deterministic(), + #[cfg(feature = "json")] + Self::Json(json_func) => json_func.is_deterministic(), + Self::External(external_func) => external_func.is_deterministic(), + } + } pub fn resolve_function(name: &str, arg_count: usize) -> Result { match name { "avg" => { diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 5b12e4375..86283fa64 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -159,8 +159,7 @@ fn epilogue( err_code: 0, description: String::new(), }); - - program.resolve_label(init_label, program.offset()); + program.preassign_label_to_next_insn(init_label); match txn_mode { TransactionMode::Read => program.emit_insn(Insn::Transaction { write: false }), @@ -297,7 +296,7 @@ pub fn emit_query<'a>( condition_metadata, &t_ctx.resolver, )?; - program.resolve_label(jump_target_when_true, program.offset()); + program.preassign_label_to_next_insn(jump_target_when_true); } // Set up main query execution loop @@ -308,8 +307,7 @@ pub fn emit_query<'a>( // Clean up and close the main execution loop close_loop(program, t_ctx, &plan.table_references)?; - - program.resolve_label(after_main_loop_label, program.offset()); + program.preassign_label_to_next_insn(after_main_loop_label); let mut order_by_necessary = plan.order_by.is_some() && !plan.contains_constant_false_condition; let order_by = plan.order_by.as_ref(); @@ -379,8 +377,7 @@ fn emit_program_for_delete( // Clean up and close the main execution loop close_loop(program, &mut t_ctx, &plan.table_references)?; - - program.resolve_label(after_main_loop_label, program.offset()); + program.preassign_label_to_next_insn(after_main_loop_label); // Finalize program epilogue(program, init_label, start_offset, TransactionMode::Write)?; @@ -516,8 +513,7 @@ fn emit_program_for_update( )?; emit_update_insns(&plan, &t_ctx, program)?; close_loop(program, &mut t_ctx, &plan.table_references)?; - - program.resolve_label(after_main_loop_label, program.offset()); + program.preassign_label_to_next_insn(after_main_loop_label); // Finalize program epilogue(program, init_label, start_offset, TransactionMode::Write)?; @@ -570,7 +566,7 @@ fn emit_update_insns( meta, &t_ctx.resolver, )?; - program.resolve_label(jump_target, program.offset()); + program.preassign_label_to_next_insn(jump_target); } let beg = program.alloc_registers( table_ref.table.columns().len() diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 53deb7e0f..79ccb1fe9 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -13,6 +13,7 @@ use crate::vdbe::{ use crate::Result; use super::emitter::Resolver; +use super::optimizer::Optimizable; use super::plan::{Operation, TableReference}; #[derive(Debug, Clone, Copy)] @@ -205,7 +206,7 @@ pub fn translate_condition_expr( }, resolver, )?; - program.resolve_label(jump_target_when_true, program.offset()); + program.preassign_label_to_next_insn(jump_target_when_true); translate_condition_expr( program, referenced_tables, @@ -230,7 +231,7 @@ pub fn translate_condition_expr( }, resolver, )?; - program.resolve_label(jump_target_when_false, program.offset()); + program.preassign_label_to_next_insn(jump_target_when_false); translate_condition_expr( program, referenced_tables, @@ -254,8 +255,8 @@ pub fn translate_condition_expr( { let lhs_reg = program.alloc_register(); let rhs_reg = program.alloc_register(); - translate_and_mark(program, Some(referenced_tables), lhs, lhs_reg, resolver)?; - translate_and_mark(program, Some(referenced_tables), rhs, rhs_reg, resolver)?; + translate_expr(program, Some(referenced_tables), lhs, lhs_reg, resolver)?; + translate_expr(program, Some(referenced_tables), rhs, rhs_reg, resolver)?; match op { ast::Operator::Greater => { emit_cmp_insn!(program, condition_metadata, Gt, Le, lhs_reg, rhs_reg) @@ -410,7 +411,7 @@ pub fn translate_condition_expr( } if !condition_metadata.jump_if_condition_is_true { - program.resolve_label(jump_target_when_true, program.offset()); + program.preassign_label_to_next_insn(jump_target_when_true); } } ast::Expr::Like { not, .. } => { @@ -478,6 +479,38 @@ pub fn translate_condition_expr( Ok(()) } +/// Reason why [translate_expr_no_constant_opt()] was called. +#[derive(Debug)] +pub enum NoConstantOptReason { + /// The expression translation involves reusing register(s), + /// so hoisting those register assignments is not safe. + /// e.g. SELECT COALESCE(1, t.x, NULL) would overwrite 1 with NULL, which is invalid. + RegisterReuse, +} + +/// Translate an expression into bytecode via [translate_expr()], and forbid any constant values from being hoisted +/// into the beginning of the program. This is a good idea in most cases where +/// a register will end up being reused e.g. in a coroutine. +pub fn translate_expr_no_constant_opt( + program: &mut ProgramBuilder, + referenced_tables: Option<&[TableReference]>, + expr: &ast::Expr, + target_register: usize, + resolver: &Resolver, + deopt_reason: NoConstantOptReason, +) -> Result { + tracing::debug!( + "translate_expr_no_constant_opt: expr={:?}, deopt_reason={:?}", + expr, + deopt_reason + ); + let next_span_idx = program.constant_spans_next_idx(); + let translated = translate_expr(program, referenced_tables, expr, target_register, resolver)?; + program.constant_spans_invalidate_after(next_span_idx); + Ok(translated) +} + +/// Translate an expression into bytecode. pub fn translate_expr( program: &mut ProgramBuilder, referenced_tables: Option<&[TableReference]>, @@ -485,14 +518,29 @@ pub fn translate_expr( target_register: usize, resolver: &Resolver, ) -> Result { + let constant_span = if expr.is_constant(resolver) { + if !program.constant_span_is_open() { + Some(program.constant_span_start()) + } else { + None + } + } else { + program.constant_span_end_all(); + None + }; + if let Some(reg) = resolver.resolve_cached_expr_reg(expr) { program.emit_insn(Insn::Copy { src_reg: reg, dst_reg: target_register, amount: 0, }); + if let Some(span) = constant_span { + program.constant_span_end(span); + } return Ok(target_register); } + match expr { ast::Expr::Between { .. } => { unreachable!("expression should have been rewritten in optmizer") @@ -504,17 +552,17 @@ pub fn translate_expr( translate_expr(program, referenced_tables, e1, shared_reg, resolver)?; emit_binary_insn(program, op, shared_reg, shared_reg, target_register)?; - return Ok(target_register); + Ok(target_register) + } else { + let e1_reg = program.alloc_registers(2); + let e2_reg = e1_reg + 1; + + translate_expr(program, referenced_tables, e1, e1_reg, resolver)?; + translate_expr(program, referenced_tables, e2, e2_reg, resolver)?; + + emit_binary_insn(program, op, e1_reg, e2_reg, target_register)?; + Ok(target_register) } - - let e1_reg = program.alloc_registers(2); - let e2_reg = e1_reg + 1; - - translate_expr(program, referenced_tables, e1, e1_reg, resolver)?; - translate_expr(program, referenced_tables, e2, e2_reg, resolver)?; - - emit_binary_insn(program, op, e1_reg, e2_reg, target_register)?; - Ok(target_register) } ast::Expr::Case { base, @@ -545,7 +593,14 @@ pub fn translate_expr( )?; }; for (when_expr, then_expr) in when_then_pairs { - translate_expr(program, referenced_tables, when_expr, expr_reg, resolver)?; + translate_expr_no_constant_opt( + program, + referenced_tables, + when_expr, + expr_reg, + resolver, + NoConstantOptReason::RegisterReuse, + )?; match base_reg { // CASE 1 WHEN 0 THEN 0 ELSE 1 becomes 1==0, Ne branch to next clause Some(base_reg) => program.emit_insn(Insn::Ne { @@ -563,12 +618,13 @@ pub fn translate_expr( }), }; // THEN... - translate_expr( + translate_expr_no_constant_opt( program, referenced_tables, then_expr, target_register, resolver, + NoConstantOptReason::RegisterReuse, )?; program.emit_insn(Insn::Goto { target_pc: return_label, @@ -580,7 +636,14 @@ pub fn translate_expr( } match else_expr { Some(expr) => { - translate_expr(program, referenced_tables, expr, target_register, resolver)?; + translate_expr_no_constant_opt( + program, + referenced_tables, + expr, + target_register, + resolver, + NoConstantOptReason::RegisterReuse, + )?; } // If ELSE isn't specified, it means ELSE null. None => { @@ -590,7 +653,7 @@ pub fn translate_expr( }); } }; - program.resolve_label(return_label, program.offset()); + program.preassign_label_to_next_insn(return_label); Ok(target_register) } ast::Expr::Cast { expr, type_name } => { @@ -776,7 +839,7 @@ pub fn translate_expr( if let Some(args) = args { for (i, arg) in args.iter().enumerate() { // register containing result of each argument expression - translate_and_mark( + translate_expr( program, referenced_tables, arg, @@ -904,12 +967,13 @@ pub fn translate_expr( // whenever a not null check succeeds, we jump to the end of the series let label_coalesce_end = program.allocate_label(); for (index, arg) in args.iter().enumerate() { - let reg = translate_expr( + let reg = translate_expr_no_constant_opt( program, referenced_tables, arg, target_register, resolver, + NoConstantOptReason::RegisterReuse, )?; if index < args.len() - 1 { program.emit_insn(Insn::NotNull { @@ -991,12 +1055,13 @@ pub fn translate_expr( }; let temp_reg = program.alloc_register(); - translate_expr( + translate_expr_no_constant_opt( program, referenced_tables, &args[0], temp_reg, resolver, + NoConstantOptReason::RegisterReuse, )?; let before_copy_label = program.allocate_label(); program.emit_insn(Insn::NotNull { @@ -1004,12 +1069,13 @@ pub fn translate_expr( target_pc: before_copy_label, }); - translate_expr( + translate_expr_no_constant_opt( program, referenced_tables, &args[1], temp_reg, resolver, + NoConstantOptReason::RegisterReuse, )?; program.resolve_label(before_copy_label, program.offset()); program.emit_insn(Insn::Copy { @@ -1029,12 +1095,13 @@ pub fn translate_expr( ), }; let temp_reg = program.alloc_register(); - translate_expr( + translate_expr_no_constant_opt( program, referenced_tables, &args[0], temp_reg, resolver, + NoConstantOptReason::RegisterReuse, )?; let jump_target_when_false = program.allocate_label(); program.emit_insn(Insn::IfNot { @@ -1042,26 +1109,28 @@ pub fn translate_expr( target_pc: jump_target_when_false, jump_if_null: true, }); - translate_expr( + translate_expr_no_constant_opt( program, referenced_tables, &args[1], target_register, resolver, + NoConstantOptReason::RegisterReuse, )?; let jump_target_result = program.allocate_label(); program.emit_insn(Insn::Goto { target_pc: jump_target_result, }); - program.resolve_label(jump_target_when_false, program.offset()); - translate_expr( + program.preassign_label_to_next_insn(jump_target_when_false); + translate_expr_no_constant_opt( program, referenced_tables, &args[2], target_register, resolver, + NoConstantOptReason::RegisterReuse, )?; - program.resolve_label(jump_target_result, program.offset()); + program.preassign_label_to_next_insn(jump_target_result); Ok(target_register) } ScalarFunc::Glob | ScalarFunc::Like => { @@ -1113,7 +1182,7 @@ pub fn translate_expr( | ScalarFunc::ZeroBlob => { let args = expect_arguments_exact!(args, 1, srf); let start_reg = program.alloc_register(); - translate_and_mark( + translate_expr( program, referenced_tables, &args[0], @@ -1132,7 +1201,7 @@ pub fn translate_expr( ScalarFunc::LoadExtension => { let args = expect_arguments_exact!(args, 1, srf); let start_reg = program.alloc_register(); - translate_and_mark( + translate_expr( program, referenced_tables, &args[0], @@ -1169,7 +1238,7 @@ pub fn translate_expr( if let Some(args) = args { for (i, arg) in args.iter().enumerate() { // register containing result of each argument expression - translate_and_mark( + translate_expr( program, referenced_tables, arg, @@ -1248,7 +1317,7 @@ pub fn translate_expr( crate::bail_parse_error!("hex function with no arguments",); }; let start_reg = program.alloc_register(); - translate_and_mark( + translate_expr( program, referenced_tables, &args[0], @@ -1296,7 +1365,7 @@ pub fn translate_expr( if let Some(args) = args { for (i, arg) in args.iter().enumerate() { // register containing result of each argument expression - translate_and_mark( + translate_expr( program, referenced_tables, arg, @@ -1365,7 +1434,7 @@ pub fn translate_expr( let start_reg = program.alloc_registers(args.len()); for (i, arg) in args.iter().enumerate() { - translate_and_mark( + translate_expr( program, referenced_tables, arg, @@ -1394,7 +1463,7 @@ pub fn translate_expr( }; let start_reg = program.alloc_registers(args.len()); for (i, arg) in args.iter().enumerate() { - translate_and_mark( + translate_expr( program, referenced_tables, arg, @@ -1424,7 +1493,7 @@ pub fn translate_expr( }; let start_reg = program.alloc_registers(args.len()); for (i, arg) in args.iter().enumerate() { - translate_and_mark( + translate_expr( program, referenced_tables, arg, @@ -1577,7 +1646,7 @@ pub fn translate_expr( if let Some(args) = args { for (i, arg) in args.iter().enumerate() { // register containing result of each argument expression - translate_and_mark( + translate_expr( program, referenced_tables, arg, @@ -1614,7 +1683,7 @@ pub fn translate_expr( crate::bail_parse_error!("likely function with no arguments",); }; let start_reg = program.alloc_register(); - translate_and_mark( + translate_expr( program, referenced_tables, &args[0], @@ -1665,7 +1734,7 @@ pub fn translate_expr( } let start_reg = program.alloc_register(); - translate_and_mark( + translate_expr( program, referenced_tables, &args[0], @@ -1701,13 +1770,7 @@ pub fn translate_expr( MathFuncArity::Unary => { let args = expect_arguments_exact!(args, 1, math_func); let start_reg = program.alloc_register(); - translate_and_mark( - program, - referenced_tables, - &args[0], - start_reg, - resolver, - )?; + translate_expr(program, referenced_tables, &args[0], start_reg, resolver)?; program.emit_insn(Insn::Function { constant_mask: 0, start_reg, @@ -2098,7 +2161,13 @@ pub fn translate_expr( }); Ok(target_register) } + }?; + + if let Some(span) = constant_span { + program.constant_span_end(span); } + + Ok(target_register) } fn emit_binary_insn( @@ -2367,17 +2436,11 @@ fn translate_like_base( let arg_count = if matches!(escape, Some(_)) { 3 } else { 2 }; let start_reg = program.alloc_registers(arg_count); let mut constant_mask = 0; - translate_and_mark(program, referenced_tables, lhs, start_reg + 1, resolver)?; + translate_expr(program, referenced_tables, lhs, start_reg + 1, resolver)?; let _ = translate_expr(program, referenced_tables, rhs, start_reg, resolver)?; if arg_count == 3 { if let Some(escape) = escape { - translate_and_mark( - program, - referenced_tables, - escape, - start_reg + 2, - resolver, - )?; + translate_expr(program, referenced_tables, escape, start_reg + 2, resolver)?; } } if matches!(rhs.as_ref(), ast::Expr::Literal(_)) { @@ -2482,20 +2545,6 @@ pub fn maybe_apply_affinity(col_type: Type, target_register: usize, program: &mu } } -pub fn translate_and_mark( - program: &mut ProgramBuilder, - referenced_tables: Option<&[TableReference]>, - expr: &ast::Expr, - target_register: usize, - resolver: &Resolver, -) -> Result<()> { - translate_expr(program, referenced_tables, expr, target_register, resolver)?; - if matches!(expr, ast::Expr::Literal(_)) { - program.mark_last_insn_constant(); - } - Ok(()) -} - /// Sanitaizes a string literal by removing single quote at front and back /// and escaping double single quotes pub fn sanitize_string(input: &str) -> String { diff --git a/core/translate/group_by.rs b/core/translate/group_by.rs index 68f732cbb..6eeee3fa3 100644 --- a/core/translate/group_by.rs +++ b/core/translate/group_by.rs @@ -275,16 +275,18 @@ pub fn emit_group_by<'a>( "start new group if comparison is not equal", ); // If we are at a new group, continue. If we are at the same group, jump to the aggregation step (i.e. accumulate more values into the aggregations) + let label_jump_after_comparison = program.allocate_label(); program.emit_insn(Insn::Jump { - target_pc_lt: program.offset().add(1u32), + target_pc_lt: label_jump_after_comparison, target_pc_eq: agg_step_label, - target_pc_gt: program.offset().add(1u32), + target_pc_gt: label_jump_after_comparison, }); program.add_comment( program.offset(), "check if ended group had data, and output if so", ); + program.resolve_label(label_jump_after_comparison, program.offset()); program.emit_insn(Insn::Gosub { target_pc: label_subrtn_acc_output, return_reg: reg_subrtn_acc_output_return_offset, @@ -364,8 +366,7 @@ pub fn emit_group_by<'a>( cursor_id: sort_cursor, pc_if_next: label_grouping_loop_start, }); - - program.resolve_label(label_grouping_loop_end, program.offset()); + program.preassign_label_to_next_insn(label_grouping_loop_end); program.add_comment(program.offset(), "emit row for final group"); program.emit_insn(Insn::Gosub { @@ -505,8 +506,7 @@ pub fn emit_group_by<'a>( program.emit_insn(Insn::Return { return_reg: reg_subrtn_acc_clear_return_offset, }); - - program.resolve_label(label_group_by_end, program.offset()); + program.preassign_label_to_next_insn(label_group_by_end); Ok(()) } diff --git a/core/translate/index.rs b/core/translate/index.rs index 55222e40f..b01fb2921 100644 --- a/core/translate/index.rs +++ b/core/translate/index.rs @@ -149,8 +149,7 @@ pub fn translate_create_index( cursor_id: table_cursor_id, pc_if_empty: loop_end_label, }); - - program.resolve_label(loop_start_label, program.offset()); + program.preassign_label_to_next_insn(loop_start_label); // Loop start: // Collect index values into start_reg..rowid_reg @@ -185,7 +184,7 @@ pub fn translate_create_index( cursor_id: table_cursor_id, pc_if_next: loop_start_label, }); - program.resolve_label(loop_end_label, program.offset()); + program.preassign_label_to_next_insn(loop_end_label); // Open the index btree we created for writing to insert the // newly sorted index records. @@ -202,7 +201,7 @@ pub fn translate_create_index( cursor_id: sorter_cursor_id, pc_if_empty: sorted_loop_end, }); - program.resolve_label(sorted_loop_start, program.offset()); + program.preassign_label_to_next_insn(sorted_loop_start); let sorted_record_reg = program.alloc_register(); program.emit_insn(Insn::SorterData { pseudo_cursor: pseudo_cursor_id, @@ -226,7 +225,7 @@ pub fn translate_create_index( cursor_id: sorter_cursor_id, pc_if_next: sorted_loop_start, }); - program.resolve_label(sorted_loop_end, program.offset()); + program.preassign_label_to_next_insn(sorted_loop_end); // End of the outer loop // @@ -248,7 +247,7 @@ pub fn translate_create_index( // Epilogue: program.emit_halt(); - program.resolve_label(init_label, program.offset()); + program.preassign_label_to_next_insn(init_label); program.emit_transaction(true); program.emit_constant_insns(); program.emit_goto(start_offset); diff --git a/core/translate/insert.rs b/core/translate/insert.rs index b17d19110..6887c43af 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -13,16 +13,15 @@ use crate::vdbe::insn::{IdxInsertFlags, RegisterOrLiteral}; use crate::vdbe::BranchOffset; use crate::{ schema::{Column, Schema}, - translate::expr::translate_expr, vdbe::{ builder::{CursorType, ProgramBuilder}, insn::Insn, }, - SymbolTable, }; -use crate::{Result, VirtualTable}; +use crate::{Result, SymbolTable, VirtualTable}; use super::emitter::Resolver; +use super::expr::{translate_expr_no_constant_opt, NoConstantOptReason}; #[allow(clippy::too_many_arguments)] pub fn translate_insert( @@ -144,12 +143,15 @@ pub fn translate_insert( if inserting_multiple_rows { let yield_reg = program.alloc_register(); let jump_on_definition_label = program.allocate_label(); + let start_offset_label = program.allocate_label(); program.emit_insn(Insn::InitCoroutine { yield_reg, jump_on_definition: jump_on_definition_label, - start_offset: program.offset().add(1u32), + start_offset: start_offset_label, }); + program.resolve_label(start_offset_label, program.offset()); + for value in values { populate_column_registers( &mut program, @@ -166,7 +168,7 @@ pub fn translate_insert( }); } program.emit_insn(Insn::EndCoroutine { yield_reg }); - program.resolve_label(jump_on_definition_label, program.offset()); + program.preassign_label_to_next_insn(jump_on_definition_label); program.emit_insn(Insn::OpenWrite { cursor_id, @@ -268,8 +270,7 @@ pub fn translate_insert( err_code: SQLITE_CONSTRAINT_PRIMARYKEY, description: format!("{}.{}", table_name.0, rowid_column_name), }); - - program.resolve_label(make_record_label, program.offset()); + program.preassign_label_to_next_insn(make_record_label); } match table.btree() { @@ -400,8 +401,8 @@ pub fn translate_insert( err_code: 0, description: String::new(), }); + program.preassign_label_to_next_insn(init_label); - program.resolve_label(init_label, program.offset()); program.emit_insn(Insn::Transaction { write: true }); program.emit_constant_insns(); program.emit_insn(Insn::Goto { @@ -603,18 +604,26 @@ fn populate_column_registers( } else { target_reg }; - translate_expr( + translate_expr_no_constant_opt( program, None, value.get(value_index).expect("value index out of bounds"), reg, resolver, + NoConstantOptReason::RegisterReuse, )?; if write_directly_to_rowid_reg { program.emit_insn(Insn::SoftNull { reg: target_reg }); } } else if let Some(default_expr) = mapping.default_value { - translate_expr(program, None, default_expr, target_reg, resolver)?; + translate_expr_no_constant_opt( + program, + None, + default_expr, + target_reg, + resolver, + NoConstantOptReason::RegisterReuse, + )?; } else { // Column was not specified as has no DEFAULT - use NULL if it is nullable, otherwise error // Rowid alias columns can be NULL because we will autogenerate a rowid in that case. @@ -664,7 +673,14 @@ fn translate_virtual_table_insert( let value_registers_start = program.alloc_registers(values[0].len()); for (i, expr) in values[0].iter().enumerate() { - translate_expr(program, None, expr, value_registers_start + i, resolver)?; + translate_expr_no_constant_opt( + program, + None, + expr, + value_registers_start + i, + resolver, + NoConstantOptReason::RegisterReuse, + )?; } /* * * Inserts for virtual tables are done in a single step. @@ -718,12 +734,12 @@ fn translate_virtual_table_insert( }); let halt_label = program.allocate_label(); + program.resolve_label(halt_label, program.offset()); program.emit_insn(Insn::Halt { err_code: 0, description: String::new(), }); - program.resolve_label(halt_label, program.offset()); program.resolve_label(init_label, program.offset()); program.emit_insn(Insn::Goto { diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index ff60bb305..a1cabc511 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -18,7 +18,10 @@ use crate::{ use super::{ aggregation::translate_aggregation_step, emitter::{OperationMode, TranslateCtx}, - expr::{translate_condition_expr, translate_expr, ConditionMetadata}, + expr::{ + translate_condition_expr, translate_expr, translate_expr_no_constant_opt, + ConditionMetadata, NoConstantOptReason, + }, group_by::is_column_in_group_by, optimizer::Optimizable, order_by::{order_by_sorter_insert, sorter_insert}, @@ -238,7 +241,7 @@ pub fn open_loop( jump_on_definition: BranchOffset::Offset(0), start_offset: coroutine_implementation_start, }); - program.resolve_label(loop_start, program.offset()); + program.preassign_label_to_next_insn(loop_start); // A subquery within the main loop of a parent query has no cursor, so instead of advancing the cursor, // it emits a Yield which jumps back to the main loop of the subquery itself to retrieve the next row. // When the subquery coroutine completes, this instruction jumps to the label at the top of the termination_label_stack, @@ -265,7 +268,7 @@ pub fn open_loop( condition_metadata, &t_ctx.resolver, )?; - program.resolve_label(jump_target_when_true, program.offset()); + program.preassign_label_to_next_insn(jump_target_when_true); } } Operation::Scan { iter_dir, .. } => { @@ -284,6 +287,7 @@ pub fn open_loop( pc_if_empty: loop_end, }); } + program.preassign_label_to_next_insn(loop_start); } else if let Some(vtab) = table.virtual_table() { let (start_reg, count, maybe_idx_str, maybe_idx_int) = if vtab .kind @@ -391,8 +395,8 @@ pub fn open_loop( idx_num: maybe_idx_int.unwrap_or(0) as usize, pc_if_empty: loop_end, }); + program.preassign_label_to_next_insn(loop_start); } - program.resolve_label(loop_start, program.offset()); if let Some(table_cursor_id) = table_cursor_id { if let Some(index_cursor_id) = index_cursor_id { @@ -419,7 +423,7 @@ pub fn open_loop( condition_metadata, &t_ctx.resolver, )?; - program.resolve_label(jump_target_when_true, program.offset()); + program.preassign_label_to_next_insn(jump_target_when_true); } } Operation::Search(search) => { @@ -527,7 +531,7 @@ pub fn open_loop( condition_metadata, &t_ctx.resolver, )?; - program.resolve_label(jump_target_when_true, program.offset()); + program.preassign_label_to_next_insn(jump_target_when_true); } } } @@ -815,6 +819,7 @@ pub fn close_loop( program.emit_insn(Insn::Goto { target_pc: loop_labels.loop_start, }); + program.preassign_label_to_next_insn(loop_labels.loop_end); } Operation::Scan { iter_dir, .. } => { program.resolve_label(loop_labels.next, program.offset()); @@ -844,6 +849,7 @@ pub fn close_loop( } other => unreachable!("Unsupported table reference type: {:?}", other), } + program.preassign_label_to_next_insn(loop_labels.loop_end); } Operation::Search(search) => { program.resolve_label(loop_labels.next, program.offset()); @@ -869,11 +875,10 @@ pub fn close_loop( }); } } + program.preassign_label_to_next_insn(loop_labels.loop_end); } } - program.resolve_label(loop_labels.loop_end, program.offset()); - // Handle OUTER JOIN logic. The reason this comes after the "loop end" mark is that we may need to still jump back // and emit a row with NULLs for the right table, and then jump back to the next row of the left table. if let Some(join_info) = table.join_info.as_ref() { @@ -913,7 +918,7 @@ pub fn close_loop( program.emit_insn(Insn::Goto { target_pc: lj_meta.label_match_flag_set_true, }); - program.resolve_label(label_when_right_table_notnull, program.offset()); + program.preassign_label_to_next_insn(label_when_right_table_notnull); } } } @@ -972,7 +977,14 @@ fn emit_seek( } } else { let expr = &seek_def.key[i].0; - translate_expr(program, Some(tables), &expr, reg, &t_ctx.resolver)?; + translate_expr_no_constant_opt( + program, + Some(tables), + &expr, + reg, + &t_ctx.resolver, + NoConstantOptReason::RegisterReuse, + )?; // If the seek key column is not verifiably non-NULL, we need check whether it is NULL, // and if so, jump to the loop end. // This is to avoid returning rows for e.g. SELECT * FROM t WHERE t.x > NULL, @@ -1046,7 +1058,7 @@ fn emit_seek_termination( is_index: bool, ) -> Result<()> { let Some(termination) = seek_def.termination.as_ref() else { - program.resolve_label(loop_start, program.offset()); + program.preassign_label_to_next_insn(loop_start); return Ok(()); }; @@ -1081,16 +1093,17 @@ fn emit_seek_termination( // if the seek key is shorter than the termination key, we need to translate the remaining suffix of the termination key. // if not, we just reuse what was emitted for the seek. } else if seek_len < termination.len { - translate_expr( + translate_expr_no_constant_opt( program, Some(tables), &seek_def.key[i].0, reg, &t_ctx.resolver, + NoConstantOptReason::RegisterReuse, )?; } } - program.resolve_label(loop_start, program.offset()); + program.preassign_label_to_next_insn(loop_start); let mut rowid_reg = None; if !is_index { rowid_reg = Some(program.alloc_register()); @@ -1177,11 +1190,12 @@ fn emit_autoindex( cursor_id: index_cursor_id, }); // Rewind source table + let label_ephemeral_build_loop_start = program.allocate_label(); program.emit_insn(Insn::Rewind { cursor_id: table_cursor_id, - pc_if_empty: label_ephemeral_build_end, + pc_if_empty: label_ephemeral_build_loop_start, }); - let offset_ephemeral_build_loop_start = program.offset(); + program.preassign_label_to_next_insn(label_ephemeral_build_loop_start); // Emit all columns from source table that are needed in the ephemeral index. // Also reserve a register for the rowid if the source table has rowids. let num_regs_to_reserve = index.columns.len() + table_has_rowid as usize; @@ -1215,8 +1229,8 @@ fn emit_autoindex( }); program.emit_insn(Insn::Next { cursor_id: table_cursor_id, - pc_if_next: offset_ephemeral_build_loop_start, + pc_if_next: label_ephemeral_build_loop_start, }); - program.resolve_label(label_ephemeral_build_end, program.offset()); + program.preassign_label_to_next_insn(label_ephemeral_build_end); Ok(index_cursor_id) } diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index dc9dccaa6..efb2015f0 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -11,6 +11,7 @@ use crate::{ }; use super::{ + emitter::Resolver, plan::{ DeletePlan, Direction, EvalAt, GroupBy, IterationDirection, Operation, Plan, Search, SeekDef, SeekKey, SelectPlan, TableReference, UpdatePlan, WhereTerm, @@ -479,7 +480,7 @@ fn rewrite_exprs_update(plan: &mut UpdatePlan) -> Result<()> { } #[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum ConstantPredicate { +pub enum AlwaysTrueOrFalse { AlwaysTrue, AlwaysFalse, } @@ -489,18 +490,20 @@ pub enum ConstantPredicate { Implemented for ast::Expr */ pub trait Optimizable { - // if the expression is a constant expression e.g. '1', returns the constant condition - fn check_constant(&self) -> Result>; + // if the expression is a constant expression that, when evaluated as a condition, is always true or false + // return a [ConstantPredicate]. + fn check_always_true_or_false(&self) -> Result>; fn is_always_true(&self) -> Result { Ok(self - .check_constant()? - .map_or(false, |c| c == ConstantPredicate::AlwaysTrue)) + .check_always_true_or_false()? + .map_or(false, |c| c == AlwaysTrueOrFalse::AlwaysTrue)) } fn is_always_false(&self) -> Result { Ok(self - .check_constant()? - .map_or(false, |c| c == ConstantPredicate::AlwaysFalse)) + .check_always_true_or_false()? + .map_or(false, |c| c == AlwaysTrueOrFalse::AlwaysFalse)) } + fn is_constant(&self, resolver: &Resolver<'_>) -> bool; fn is_rowid_alias_of(&self, table_index: usize) -> bool; fn is_nonnull(&self, tables: &[TableReference]) -> bool; } @@ -599,22 +602,106 @@ impl Optimizable for ast::Expr { Expr::Variable(..) => false, } } - fn check_constant(&self) -> Result> { + /// Returns true if the expression is a constant i.e. does not depend on variables or columns etc. + fn is_constant(&self, resolver: &Resolver<'_>) -> bool { + match self { + Expr::Between { + lhs, start, end, .. + } => { + lhs.is_constant(resolver) + && start.is_constant(resolver) + && end.is_constant(resolver) + } + Expr::Binary(expr, _, expr1) => { + expr.is_constant(resolver) && expr1.is_constant(resolver) + } + Expr::Case { + base, + when_then_pairs, + else_expr, + } => { + base.as_ref() + .map_or(true, |base| base.is_constant(resolver)) + && when_then_pairs.iter().all(|(when, then)| { + when.is_constant(resolver) && then.is_constant(resolver) + }) + && else_expr + .as_ref() + .map_or(true, |else_expr| else_expr.is_constant(resolver)) + } + Expr::Cast { expr, .. } => expr.is_constant(resolver), + Expr::Collate(expr, _) => expr.is_constant(resolver), + Expr::DoublyQualified(_, _, _) => { + panic!("DoublyQualified should have been rewritten as Column") + } + Expr::Exists(_) => false, + Expr::FunctionCall { args, name, .. } => { + let Some(func) = + resolver.resolve_function(&name.0, args.as_ref().map_or(0, |args| args.len())) + else { + return false; + }; + func.is_deterministic() + && args.as_ref().map_or(true, |args| { + args.iter().all(|arg| arg.is_constant(resolver)) + }) + } + Expr::FunctionCallStar { .. } => false, + Expr::Id(_) => panic!("Id should have been rewritten as Column"), + Expr::Column { .. } => false, + Expr::RowId { .. } => false, + Expr::InList { lhs, rhs, .. } => { + lhs.is_constant(resolver) + && rhs + .as_ref() + .map_or(true, |rhs| rhs.iter().all(|rhs| rhs.is_constant(resolver))) + } + Expr::InSelect { .. } => { + false // might be constant, too annoying to check subqueries etc. implement later + } + Expr::InTable { .. } => false, + Expr::IsNull(expr) => expr.is_constant(resolver), + Expr::Like { + lhs, rhs, escape, .. + } => { + lhs.is_constant(resolver) + && rhs.is_constant(resolver) + && escape + .as_ref() + .map_or(true, |escape| escape.is_constant(resolver)) + } + Expr::Literal(_) => true, + Expr::Name(_) => false, + Expr::NotNull(expr) => expr.is_constant(resolver), + Expr::Parenthesized(exprs) => exprs.iter().all(|expr| expr.is_constant(resolver)), + Expr::Qualified(_, _) => { + panic!("Qualified should have been rewritten as Column") + } + Expr::Raise(_, expr) => expr + .as_ref() + .map_or(true, |expr| expr.is_constant(resolver)), + Expr::Subquery(_) => false, + Expr::Unary(_, expr) => expr.is_constant(resolver), + Expr::Variable(_) => false, + } + } + /// Returns true if the expression is a constant expression that, when evaluated as a condition, is always true or false + fn check_always_true_or_false(&self) -> Result> { match self { Self::Literal(lit) => match lit { ast::Literal::Numeric(b) => { if let Ok(int_value) = b.parse::() { return Ok(Some(if int_value == 0 { - ConstantPredicate::AlwaysFalse + AlwaysTrueOrFalse::AlwaysFalse } else { - ConstantPredicate::AlwaysTrue + AlwaysTrueOrFalse::AlwaysTrue })); } if let Ok(float_value) = b.parse::() { return Ok(Some(if float_value == 0.0 { - ConstantPredicate::AlwaysFalse + AlwaysTrueOrFalse::AlwaysFalse } else { - ConstantPredicate::AlwaysTrue + AlwaysTrueOrFalse::AlwaysTrue })); } @@ -624,35 +711,35 @@ impl Optimizable for ast::Expr { let without_quotes = s.trim_matches('\''); if let Ok(int_value) = without_quotes.parse::() { return Ok(Some(if int_value == 0 { - ConstantPredicate::AlwaysFalse + AlwaysTrueOrFalse::AlwaysFalse } else { - ConstantPredicate::AlwaysTrue + AlwaysTrueOrFalse::AlwaysTrue })); } if let Ok(float_value) = without_quotes.parse::() { return Ok(Some(if float_value == 0.0 { - ConstantPredicate::AlwaysFalse + AlwaysTrueOrFalse::AlwaysFalse } else { - ConstantPredicate::AlwaysTrue + AlwaysTrueOrFalse::AlwaysTrue })); } - Ok(Some(ConstantPredicate::AlwaysFalse)) + Ok(Some(AlwaysTrueOrFalse::AlwaysFalse)) } _ => Ok(None), }, Self::Unary(op, expr) => { if *op == ast::UnaryOperator::Not { - let trivial = expr.check_constant()?; + let trivial = expr.check_always_true_or_false()?; return Ok(trivial.map(|t| match t { - ConstantPredicate::AlwaysTrue => ConstantPredicate::AlwaysFalse, - ConstantPredicate::AlwaysFalse => ConstantPredicate::AlwaysTrue, + AlwaysTrueOrFalse::AlwaysTrue => AlwaysTrueOrFalse::AlwaysFalse, + AlwaysTrueOrFalse::AlwaysFalse => AlwaysTrueOrFalse::AlwaysTrue, })); } if *op == ast::UnaryOperator::Negative { - let trivial = expr.check_constant()?; + let trivial = expr.check_always_true_or_false()?; return Ok(trivial); } @@ -661,50 +748,50 @@ impl Optimizable for ast::Expr { Self::InList { lhs: _, not, rhs } => { if rhs.is_none() { return Ok(Some(if *not { - ConstantPredicate::AlwaysTrue + AlwaysTrueOrFalse::AlwaysTrue } else { - ConstantPredicate::AlwaysFalse + AlwaysTrueOrFalse::AlwaysFalse })); } let rhs = rhs.as_ref().unwrap(); if rhs.is_empty() { return Ok(Some(if *not { - ConstantPredicate::AlwaysTrue + AlwaysTrueOrFalse::AlwaysTrue } else { - ConstantPredicate::AlwaysFalse + AlwaysTrueOrFalse::AlwaysFalse })); } Ok(None) } Self::Binary(lhs, op, rhs) => { - let lhs_trivial = lhs.check_constant()?; - let rhs_trivial = rhs.check_constant()?; + let lhs_trivial = lhs.check_always_true_or_false()?; + let rhs_trivial = rhs.check_always_true_or_false()?; match op { ast::Operator::And => { - if lhs_trivial == Some(ConstantPredicate::AlwaysFalse) - || rhs_trivial == Some(ConstantPredicate::AlwaysFalse) + if lhs_trivial == Some(AlwaysTrueOrFalse::AlwaysFalse) + || rhs_trivial == Some(AlwaysTrueOrFalse::AlwaysFalse) { - return Ok(Some(ConstantPredicate::AlwaysFalse)); + return Ok(Some(AlwaysTrueOrFalse::AlwaysFalse)); } - if lhs_trivial == Some(ConstantPredicate::AlwaysTrue) - && rhs_trivial == Some(ConstantPredicate::AlwaysTrue) + if lhs_trivial == Some(AlwaysTrueOrFalse::AlwaysTrue) + && rhs_trivial == Some(AlwaysTrueOrFalse::AlwaysTrue) { - return Ok(Some(ConstantPredicate::AlwaysTrue)); + return Ok(Some(AlwaysTrueOrFalse::AlwaysTrue)); } Ok(None) } ast::Operator::Or => { - if lhs_trivial == Some(ConstantPredicate::AlwaysTrue) - || rhs_trivial == Some(ConstantPredicate::AlwaysTrue) + if lhs_trivial == Some(AlwaysTrueOrFalse::AlwaysTrue) + || rhs_trivial == Some(AlwaysTrueOrFalse::AlwaysTrue) { - return Ok(Some(ConstantPredicate::AlwaysTrue)); + return Ok(Some(AlwaysTrueOrFalse::AlwaysTrue)); } - if lhs_trivial == Some(ConstantPredicate::AlwaysFalse) - && rhs_trivial == Some(ConstantPredicate::AlwaysFalse) + if lhs_trivial == Some(AlwaysTrueOrFalse::AlwaysFalse) + && rhs_trivial == Some(AlwaysTrueOrFalse::AlwaysFalse) { - return Ok(Some(ConstantPredicate::AlwaysFalse)); + return Ok(Some(AlwaysTrueOrFalse::AlwaysFalse)); } Ok(None) diff --git a/core/translate/order_by.rs b/core/translate/order_by.rs index 9793afdc9..ce9cbc4ac 100644 --- a/core/translate/order_by.rs +++ b/core/translate/order_by.rs @@ -124,8 +124,8 @@ pub fn emit_order_by( cursor_id: sort_cursor, pc_if_empty: sort_loop_end_label, }); + program.preassign_label_to_next_insn(sort_loop_start_label); - program.resolve_label(sort_loop_start_label, program.offset()); emit_offset(program, t_ctx, plan, sort_loop_next_label)?; program.emit_insn(Insn::SorterData { @@ -154,8 +154,7 @@ pub fn emit_order_by( cursor_id: sort_cursor, pc_if_next: sort_loop_start_label, }); - - program.resolve_label(sort_loop_end_label, program.offset()); + program.preassign_label_to_next_insn(sort_loop_end_label); Ok(()) } diff --git a/core/translate/pragma.rs b/core/translate/pragma.rs index bd120ecb8..a3662c9c2 100644 --- a/core/translate/pragma.rs +++ b/core/translate/pragma.rs @@ -29,7 +29,7 @@ fn list_pragmas( } program.emit_halt(); - program.resolve_label(init_label, program.offset()); + program.preassign_label_to_next_insn(init_label); program.emit_constant_insns(); program.emit_goto(start_offset); } @@ -104,7 +104,7 @@ pub fn translate_pragma( }, }; program.emit_halt(); - program.resolve_label(init_label, program.offset()); + program.preassign_label_to_next_insn(init_label); program.emit_transaction(write); program.emit_constant_insns(); program.emit_goto(start_offset); diff --git a/core/translate/schema.rs b/core/translate/schema.rs index 49e73cf1b..760de4e41 100644 --- a/core/translate/schema.rs +++ b/core/translate/schema.rs @@ -37,7 +37,7 @@ pub fn translate_create_table( let init_label = program.emit_init(); let start_offset = program.offset(); program.emit_halt(); - program.resolve_label(init_label, program.offset()); + program.preassign_label_to_next_insn(init_label); program.emit_transaction(true); program.emit_constant_insns(); program.emit_goto(start_offset); @@ -148,7 +148,7 @@ pub fn translate_create_table( // TODO: SqlExec program.emit_halt(); - program.resolve_label(init_label, program.offset()); + program.preassign_label_to_next_insn(init_label); program.emit_transaction(true); program.emit_constant_insns(); program.emit_goto(start_offset); @@ -448,7 +448,7 @@ pub fn translate_create_virtual_table( }); let init_label = program.emit_init(); program.emit_halt(); - program.resolve_label(init_label, program.offset()); + program.preassign_label_to_next_insn(init_label); program.emit_transaction(true); program.emit_constant_insns(); return Ok(program); @@ -519,7 +519,7 @@ pub fn translate_create_virtual_table( }); program.emit_halt(); - program.resolve_label(init_label, program.offset()); + program.preassign_label_to_next_insn(init_label); program.emit_transaction(true); program.emit_constant_insns(); program.emit_goto(start_offset); @@ -545,7 +545,7 @@ pub fn translate_drop_table( let init_label = program.emit_init(); let start_offset = program.offset(); program.emit_halt(); - program.resolve_label(init_label, program.offset()); + program.preassign_label_to_next_insn(init_label); program.emit_transaction(true); program.emit_constant_insns(); program.emit_goto(start_offset); @@ -583,14 +583,14 @@ pub fn translate_drop_table( // 1. Remove all entries from the schema table related to the table we are dropping, except for triggers // loop to beginning of schema table let end_metadata_label = program.allocate_label(); + let metadata_loop = program.allocate_label(); program.emit_insn(Insn::Rewind { cursor_id: sqlite_schema_cursor_id, pc_if_empty: end_metadata_label, }); + program.preassign_label_to_next_insn(metadata_loop); // start loop on schema table - let metadata_loop = program.allocate_label(); - program.resolve_label(metadata_loop, program.offset()); program.emit_insn(Insn::Column { cursor_id: sqlite_schema_cursor_id, column: 2, @@ -627,7 +627,7 @@ pub fn translate_drop_table( cursor_id: sqlite_schema_cursor_id, pc_if_next: metadata_loop, }); - program.resolve_label(end_metadata_label, program.offset()); + program.preassign_label_to_next_insn(end_metadata_label); // end of loop on schema table // 2. Destroy the indices within a loop @@ -696,7 +696,7 @@ pub fn translate_drop_table( // end of the program program.emit_halt(); - program.resolve_label(init_label, program.offset()); + program.preassign_label_to_next_insn(init_label); program.emit_transaction(true); program.emit_constant_insns(); diff --git a/core/translate/subquery.rs b/core/translate/subquery.rs index 71cb72348..17434edc2 100644 --- a/core/translate/subquery.rs +++ b/core/translate/subquery.rs @@ -52,7 +52,7 @@ pub fn emit_subquery<'a>( t_ctx: &mut TranslateCtx<'a>, ) -> Result { let yield_reg = program.alloc_register(); - let coroutine_implementation_start_offset = program.offset().add(1u32); + let coroutine_implementation_start_offset = program.allocate_label(); match &mut plan.query_type { SelectQueryType::Subquery { yield_reg: y, @@ -91,6 +91,7 @@ pub fn emit_subquery<'a>( jump_on_definition: subquery_body_end_label, start_offset: coroutine_implementation_start_offset, }); + program.preassign_label_to_next_insn(coroutine_implementation_start_offset); // Normally we mark each LIMIT value as a constant insn that is emitted only once, but in the case of a subquery, // we need to initialize it every time the subquery is run; otherwise subsequent runs of the subquery will already // have the LIMIT counter at 0, and will never return rows. @@ -103,6 +104,6 @@ pub fn emit_subquery<'a>( let result_column_start_reg = emit_query(program, plan, &mut metadata)?; program.resolve_label(end_coroutine_label, program.offset()); program.emit_insn(Insn::EndCoroutine { yield_reg }); - program.resolve_label(subquery_body_end_label, program.offset()); + program.preassign_label_to_next_insn(subquery_body_end_label); Ok(result_column_start_reg) } diff --git a/core/translate/transaction.rs b/core/translate/transaction.rs index 60e00e73b..11c0a8a10 100644 --- a/core/translate/transaction.rs +++ b/core/translate/transaction.rs @@ -33,7 +33,7 @@ pub fn translate_tx_begin( } } program.emit_halt(); - program.resolve_label(init_label, program.offset()); + program.preassign_label_to_next_insn(init_label); program.emit_goto(start_offset); Ok(program) } @@ -52,7 +52,7 @@ pub fn translate_tx_commit(_tx_name: Option) -> Result { rollback: false, }); program.emit_halt(); - program.resolve_label(init_label, program.offset()); + program.preassign_label_to_next_insn(init_label); program.emit_goto(start_offset); Ok(program) } diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index 66b2143bb..6be6fdad8 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -1,6 +1,6 @@ use std::{ cell::Cell, - collections::HashMap, + cmp::Ordering, rc::{Rc, Weak}, sync::Arc, }; @@ -16,24 +16,25 @@ use crate::{ Connection, VirtualTable, }; -use super::{BranchOffset, CursorID, Insn, InsnFunction, InsnReference, Program}; +use super::{BranchOffset, CursorID, Insn, InsnFunction, InsnReference, JumpTarget, Program}; #[allow(dead_code)] pub struct ProgramBuilder { next_free_register: usize, next_free_cursor_id: usize, - insns: Vec<(Insn, InsnFunction)>, - // for temporarily storing instructions that will be put after Transaction opcode - constant_insns: Vec<(Insn, InsnFunction)>, - // Vector of labels which must be assigned to next emitted instruction - next_insn_labels: Vec, + /// Instruction, the function to execute it with, and its original index in the vector. + insns: Vec<(Insn, InsnFunction, usize)>, + /// A span of instructions from (offset_start_inclusive, offset_end_exclusive), + /// that are deemed to be compile-time constant and can be hoisted out of loops + /// so that they get evaluated only once at the start of the program. + pub constant_spans: Vec<(usize, usize)>, // Cursors that are referenced by the program. Indexed by CursorID. pub cursor_ref: Vec<(Option, CursorType)>, /// A vector where index=label number, value=resolved offset. Resolved in build(). - label_to_resolved_offset: Vec>, + label_to_resolved_offset: Vec>, // Bitmask of cursors that have emitted a SeekRowid instruction. seekrowid_emitted_bitmask: u64, // map of instruction index to manual comment (used in EXPLAIN only) - comments: Option>, + comments: Option>, pub parameters: Parameters, pub result_columns: Vec, pub table_references: Vec, @@ -82,13 +83,12 @@ impl ProgramBuilder { next_free_register: 1, next_free_cursor_id: 0, insns: Vec::with_capacity(opts.approx_num_insns), - next_insn_labels: Vec::with_capacity(2), cursor_ref: Vec::with_capacity(opts.num_cursors), - constant_insns: Vec::new(), + constant_spans: Vec::new(), label_to_resolved_offset: Vec::with_capacity(opts.approx_num_labels), seekrowid_emitted_bitmask: 0, comments: if opts.query_mode == QueryMode::Explain { - Some(HashMap::new()) + Some(Vec::new()) } else { None }, @@ -98,6 +98,56 @@ impl ProgramBuilder { } } + /// Start a new constant span. The next instruction to be emitted will be the first + /// instruction in the span. + pub fn constant_span_start(&mut self) -> usize { + let span = self.constant_spans.len(); + let start = self.insns.len(); + self.constant_spans.push((start, usize::MAX)); + span + } + + /// End the current constant span. The last instruction that was emitted is the last + /// instruction in the span. + pub fn constant_span_end(&mut self, span_idx: usize) { + let span = &mut self.constant_spans[span_idx]; + if span.1 == usize::MAX { + span.1 = self.insns.len().saturating_sub(1); + } + } + + /// End all constant spans that are currently open. This is used to handle edge cases + /// where we think a parent expression is constant, but we decide during the evaluation + /// of one of its children that it is not. + pub fn constant_span_end_all(&mut self) { + for span in self.constant_spans.iter_mut() { + if span.1 == usize::MAX { + span.1 = self.insns.len().saturating_sub(1); + } + } + } + + /// Check if there is a constant span that is currently open. + pub fn constant_span_is_open(&self) -> bool { + self.constant_spans + .last() + .map_or(false, |(_, end)| *end == usize::MAX) + } + + /// Get the index of the next constant span. + /// Used in [crate::translate::expr::translate_expr_no_constant_opt()] to invalidate + /// all constant spans after the given index. + pub fn constant_spans_next_idx(&self) -> usize { + self.constant_spans.len() + } + + /// Invalidate all constant spans after the given index. This is used when we want to + /// be sure that constant optimization is never used for translating a given expression. + /// See [crate::translate::expr::translate_expr_no_constant_opt()] for more details. + pub fn constant_spans_invalidate_after(&mut self, idx: usize) { + self.constant_spans.truncate(idx); + } + pub fn alloc_register(&mut self) -> usize { let reg = self.next_free_register; self.next_free_register += 1; @@ -123,12 +173,8 @@ impl ProgramBuilder { } pub fn emit_insn(&mut self, insn: Insn) { - for label in self.next_insn_labels.drain(..) { - self.label_to_resolved_offset[label.to_label_value() as usize] = - Some(self.insns.len() as InsnReference); - } let function = insn.to_function(); - self.insns.push((insn, function)); + self.insns.push((insn, function, self.insns.len())); } pub fn close_cursors(&mut self, cursors: &[CursorID]) { @@ -200,20 +246,69 @@ impl ProgramBuilder { pub fn add_comment(&mut self, insn_index: BranchOffset, comment: &'static str) { if let Some(comments) = &mut self.comments { - comments.insert(insn_index.to_offset_int(), comment); + comments.push((insn_index.to_offset_int(), comment)); } } - // Emit an instruction that will be put at the end of the program (after Transaction statement). - // This is useful for instructions that otherwise will be unnecessarily repeated in a loop. - // Example: In `SELECT * from users where name='John'`, it is unnecessary to set r[1]='John' as we SCAN users table. - // We could simply set it once before the SCAN started. pub fn mark_last_insn_constant(&mut self) { - self.constant_insns.push(self.insns.pop().unwrap()); + if self.constant_span_is_open() { + // no need to mark this insn as constant as the surrounding parent expression is already constant + return; + } + + let prev = self.insns.len().saturating_sub(1); + self.constant_spans.push((prev, prev)); } pub fn emit_constant_insns(&mut self) { - self.insns.append(&mut self.constant_insns); + // move compile-time constant instructions to the end of the program, where they are executed once after Init jumps to it. + // any label_to_resolved_offset that points to an instruction within any moved constant span should be updated to point to the new location. + + // the instruction reordering can be done by sorting the insns, so that the ordering is: + // 1. if insn not in any constant span, it stays where it is + // 2. if insn is in a constant span, it is after other insns, except those that are in a later constant span + // 3. within a single constant span the order is preserver + self.insns.sort_by(|(_, _, index_a), (_, _, index_b)| { + let a_span = self + .constant_spans + .iter() + .find(|span| span.0 <= *index_a && span.1 >= *index_a); + let b_span = self + .constant_spans + .iter() + .find(|span| span.0 <= *index_b && span.1 >= *index_b); + if a_span.is_some() && b_span.is_some() { + a_span.unwrap().0.cmp(&b_span.unwrap().0) + } else if a_span.is_some() { + Ordering::Greater + } else if b_span.is_some() { + Ordering::Less + } else { + Ordering::Equal + } + }); + for resolved_offset in self.label_to_resolved_offset.iter_mut() { + if let Some((old_offset, target)) = resolved_offset { + let new_offset = self + .insns + .iter() + .position(|(_, _, index)| *old_offset == *index as u32) + .unwrap() as u32; + *resolved_offset = Some((new_offset, *target)); + } + } + + // Fix comments to refer to new locations + if let Some(comments) = &mut self.comments { + for (old_offset, _) in comments.iter_mut() { + let new_offset = self + .insns + .iter() + .position(|(_, _, index)| *old_offset == *index as u32) + .expect("comment must exist") as u32; + *old_offset = new_offset; + } + } } pub fn offset(&self) -> BranchOffset { @@ -226,18 +321,42 @@ impl ProgramBuilder { BranchOffset::Label(label_n as u32) } - // Effectively a GOTO without the need to emit an explicit GOTO instruction. - // Useful when you know you need to jump to "the next part", but the exact offset is unknowable - // at the time of emitting the instruction. + /// Resolve a label to whatever instruction follows the one that was + /// last emitted. + /// + /// Use this when your use case is: "the program should jump to whatever instruction + /// follows the one that was previously emitted", and you don't care exactly + /// which instruction that is. Examples include "the start of a loop", or + /// "after the loop ends". + /// + /// It is important to handle those cases this way, because the precise + /// instruction that follows any given instruction might change due to + /// reordering the emitted instructions. + #[inline] pub fn preassign_label_to_next_insn(&mut self, label: BranchOffset) { - self.next_insn_labels.push(label); + assert!(label.is_label(), "BranchOffset {:?} is not a label", label); + self._resolve_label(label, self.offset().sub(1u32), JumpTarget::AfterThisInsn); } + /// Resolve a label to exactly the instruction that was last emitted. + /// + /// Use this when your use case is: "the program should jump to the exact instruction + /// that was last emitted", and you don't care WHERE exactly that ends up being + /// once the order of the bytecode of the program is finalized. Examples include + /// "jump to the Halt instruction", or "jump to the Next instruction of a loop". + #[inline] pub fn resolve_label(&mut self, label: BranchOffset, to_offset: BranchOffset) { + self._resolve_label(label, to_offset, JumpTarget::ExactlyThisInsn); + } + + fn _resolve_label(&mut self, label: BranchOffset, to_offset: BranchOffset, target: JumpTarget) { assert!(matches!(label, BranchOffset::Label(_))); assert!(matches!(to_offset, BranchOffset::Offset(_))); - self.label_to_resolved_offset[label.to_label_value() as usize] = - Some(to_offset.to_offset_int()); + let BranchOffset::Label(label_number) = label else { + unreachable!("Label is not a label"); + }; + self.label_to_resolved_offset[label_number as usize] = + Some((to_offset.to_offset_int(), target)); } /// Resolve unresolved labels to a specific offset in the instruction list. @@ -248,19 +367,25 @@ impl ProgramBuilder { pub fn resolve_labels(&mut self) { let resolve = |pc: &mut BranchOffset, insn_name: &str| { if let BranchOffset::Label(label) = pc { - let to_offset = self - .label_to_resolved_offset - .get(*label as usize) - .unwrap_or_else(|| { - panic!("Reference to undefined label in {}: {}", insn_name, label) - }); + let Some(Some((to_offset, target))) = + self.label_to_resolved_offset.get(*label as usize) + else { + panic!( + "Reference to undefined or unresolved label in {}: {}", + insn_name, label + ); + }; *pc = BranchOffset::Offset( to_offset - .unwrap_or_else(|| panic!("Unresolved label in {}: {}", insn_name, label)), + + if *target == JumpTarget::ExactlyThisInsn { + 0 + } else { + 1 + }, ); } }; - for (insn, _) in self.insns.iter_mut() { + for (insn, _, _) in self.insns.iter_mut() { match insn { Insn::Init { target_pc } => { resolve(target_pc, "Init"); @@ -375,9 +500,10 @@ impl ProgramBuilder { Insn::InitCoroutine { yield_reg: _, jump_on_definition, - start_offset: _, + start_offset, } => { resolve(jump_on_definition, "InitCoroutine"); + resolve(start_offset, "InitCoroutine"); } Insn::NotExists { cursor: _, @@ -470,15 +596,15 @@ impl ProgramBuilder { change_cnt_on: bool, ) -> Program { self.resolve_labels(); - assert!( - self.constant_insns.is_empty(), - "constant_insns is not empty when build() is called, did you forget to call emit_constant_insns()?" - ); self.parameters.list.dedup(); Program { max_registers: self.next_free_register, - insns: self.insns, + insns: self + .insns + .into_iter() + .map(|(insn, function, _)| (insn, function)) + .collect(), cursor_ref: self.cursor_ref, database_header, comments: self.comments, diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 1d1ad0b77..45b23c538 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -60,6 +60,26 @@ use std::{ sync::Arc, }; +/// We use labels to indicate that we want to jump to whatever the instruction offset +/// will be at runtime, because the offset cannot always be determined when the jump +/// instruction is created. +/// +/// In some cases, we want to jump to EXACTLY a specific instruction. +/// - Example: a condition is not met, so we want to jump to wherever Halt is. +/// In other cases, we don't care what the exact instruction is, but we know that we +/// want to jump to whatever comes AFTER a certain instruction. +/// - Example: a Next instruction will want to jump to "whatever the start of the loop is", +/// but it doesn't care what instruction that is. +/// +/// The reason this distinction is important is that we might reorder instructions that are +/// constant at compile time, and when we do that, we need to change the offsets of any impacted +/// jump instructions, so the instruction that comes immediately after "next Insn" might have changed during the reordering. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum JumpTarget { + ExactlyThisInsn, + AfterThisInsn, +} + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] /// Represents a target for a jump instruction. /// Stores 32-bit ints to keep the enum word-sized. @@ -95,15 +115,6 @@ impl BranchOffset { } } - /// Returns the label value. Panics if the branch offset is an offset or placeholder. - pub fn to_label_value(&self) -> u32 { - match self { - BranchOffset::Label(v) => *v, - BranchOffset::Offset(_) => unreachable!("Offset cannot be converted to label value"), - BranchOffset::Placeholder => unreachable!("Unresolved placeholder"), - } - } - /// Returns the branch offset as a signed integer. /// Used in explain output, where we don't want to panic in case we have an unresolved /// label or placeholder. @@ -121,6 +132,10 @@ impl BranchOffset { pub fn add>(self, n: N) -> BranchOffset { BranchOffset::Offset(self.to_offset_int() + n.into()) } + + pub fn sub>(self, n: N) -> BranchOffset { + BranchOffset::Offset(self.to_offset_int() - n.into()) + } } pub type CursorID = usize; @@ -352,7 +367,7 @@ pub struct Program { pub insns: Vec<(Insn, InsnFunction)>, pub cursor_ref: Vec<(Option, CursorType)>, pub database_header: Arc>, - pub comments: Option>, + pub comments: Option>, pub parameters: crate::parameters::Parameters, pub connection: Weak, pub n_change: Cell, @@ -542,10 +557,11 @@ fn trace_insn(program: &Program, addr: InsnReference, insn: &Insn) { addr, insn, String::new(), - program - .comments - .as_ref() - .and_then(|comments| comments.get(&{ addr }).copied()) + program.comments.as_ref().and_then(|comments| comments + .iter() + .find(|(offset, _)| *offset == addr) + .map(|(_, comment)| comment) + .copied()) ) ); } @@ -556,10 +572,13 @@ fn print_insn(program: &Program, addr: InsnReference, insn: &Insn, indent: Strin addr, insn, indent, - program - .comments - .as_ref() - .and_then(|comments| comments.get(&{ addr }).copied()), + program.comments.as_ref().and_then(|comments| { + comments + .iter() + .find(|(offset, _)| *offset == addr) + .map(|(_, comment)| comment) + .copied() + }), ); w.push_str(&s); }