diff --git a/Cargo.lock b/Cargo.lock index c44ea4996..6a024d184 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4424,7 +4424,7 @@ dependencies = [ [[package]] name = "turso_whopper" -version = "0.1.5" +version = "0.2.0-pre.1" dependencies = [ "anyhow", "clap", diff --git a/core/translate/compound_select.rs b/core/translate/compound_select.rs index f4d9dd0bf..a7387e17c 100644 --- a/core/translate/compound_select.rs +++ b/core/translate/compound_select.rs @@ -6,7 +6,7 @@ use crate::translate::result_row::try_fold_expr_to_i64; use crate::vdbe::builder::{CursorType, ProgramBuilder}; use crate::vdbe::insn::Insn; use crate::vdbe::BranchOffset; -use crate::SymbolTable; +use crate::{emit_explain, QueryMode, SymbolTable}; use std::sync::Arc; use tracing::instrument; use turso_parser::ast::{CompoundOperator, SortOrder}; @@ -98,6 +98,7 @@ pub fn emit_program_for_compound_select( _ => (None, None), }; + emit_explain!(program, true, "COMPOUND QUERY".to_owned()); emit_compound_select( program, plan, @@ -108,6 +109,7 @@ pub fn emit_program_for_compound_select( yield_reg, reg_result_cols_start, )?; + program.pop_current_parent_explain(); program.result_columns = right_plan.result_columns; program.table_references.extend(right_plan.table_references); @@ -145,6 +147,7 @@ fn emit_compound_select( syms, right_most.table_references.joined_tables().len(), ); + let is_leftmost_query = left.len() == 1; right_most_ctx.reg_result_cols_start = reg_result_cols_start; match left.pop() { Some((mut plan, operator)) => match operator { @@ -187,7 +190,18 @@ fn emit_compound_select( right_most.offset = offset; right_most_ctx.reg_offset = offset_reg; } + + emit_explain!( + program, + true, + if is_leftmost_query { + "LEFT-MOST SUBQUERY".to_owned() + } else { + "UNION ALL".to_owned() + } + ); emit_query(program, &mut right_most, &mut right_most_ctx)?; + program.pop_current_parent_explain(); program.preassign_label_to_next_insn(label_next_select); } CompoundOperator::Union => { @@ -229,7 +243,18 @@ fn emit_compound_select( index: dedupe_index.1.clone(), is_delete: false, }; + + emit_explain!( + program, + true, + if is_leftmost_query { + "LEFT-MOST SUBQUERY".to_owned() + } else { + "UNION USING TEMP B-TREE".to_owned() + } + ); emit_query(program, &mut right_most, &mut right_most_ctx)?; + program.pop_current_parent_explain(); if new_dedupe_index { read_deduplicated_union_or_except_rows( @@ -282,7 +307,17 @@ fn emit_compound_select( index: right_index, is_delete: false, }; + emit_explain!( + program, + true, + if is_leftmost_query { + "LEFT-MOST SUBQUERY".to_owned() + } else { + "INTERSECT USING TEMP B-TREE".to_owned() + } + ); emit_query(program, &mut right_most, &mut right_most_ctx)?; + program.pop_current_parent_explain(); read_intersect_rows( program, left_cursor_id, @@ -332,7 +367,17 @@ fn emit_compound_select( index: index.clone(), is_delete: true, }; + emit_explain!( + program, + true, + if is_leftmost_query { + "LEFT-MOST SUBQUERY".to_owned() + } else { + "EXCEPT USING TEMP B-TREE".to_owned() + } + ); emit_query(program, &mut right_most, &mut right_most_ctx)?; + program.pop_current_parent_explain(); if new_index { read_deduplicated_union_or_except_rows( program, cursor_id, &index, limit_ctx, offset_reg, yield_reg, diff --git a/core/translate/order_by.rs b/core/translate/order_by.rs index a4dc56691..3483b4c30 100644 --- a/core/translate/order_by.rs +++ b/core/translate/order_by.rs @@ -1,6 +1,7 @@ use turso_parser::ast::{self, SortOrder}; use crate::{ + emit_explain, schema::PseudoCursorType, translate::collate::CollationSeq, util::exprs_are_equivalent, @@ -8,7 +9,7 @@ use crate::{ builder::{CursorType, ProgramBuilder}, insn::Insn, }, - Result, + QueryMode, Result, }; use super::{ @@ -101,6 +102,10 @@ pub fn emit_order_by( let sorter_column_count = order_by.len() + remappings.iter().filter(|r| !r.deduplicated).count(); + // TODO: we need to know how many indices used for sorting + // to emit correct explain output. + emit_explain!(program, false, "USE TEMP B-TREE FOR ORDER BY".to_owned()); + let pseudo_cursor = program.alloc_cursor_id(CursorType::Pseudo(PseudoCursorType { column_count: sorter_column_count, })); diff --git a/core/translate/subquery.rs b/core/translate/subquery.rs index 99d7d8568..e70d40019 100644 --- a/core/translate/subquery.rs +++ b/core/translate/subquery.rs @@ -1,13 +1,14 @@ use crate::{ + emit_explain, schema::Table, vdbe::{builder::ProgramBuilder, insn::Insn}, - Result, + QueryMode, Result, }; use super::{ emitter::{emit_query, Resolver, TranslateCtx}, main_loop::LoopLabels, - plan::{QueryDestination, SelectPlan, TableReferences}, + plan::{Operation, QueryDestination, Search, SelectPlan, TableReferences}, }; /// Emit the subqueries contained in the FROM clause. @@ -17,7 +18,45 @@ pub fn emit_subqueries( t_ctx: &mut TranslateCtx, tables: &mut TableReferences, ) -> Result<()> { + if tables.joined_tables().is_empty() { + emit_explain!(program, false, "SCAN CONSTANT ROW".to_owned()); + } + for table_reference in tables.joined_tables_mut() { + emit_explain!( + program, + true, + match &table_reference.op { + Operation::Scan { .. } => { + if table_reference.table.get_name() == table_reference.identifier { + format!("SCAN {}", table_reference.identifier) + } else { + format!( + "SCAN {} AS {}", + table_reference.table.get_name(), + table_reference.identifier + ) + } + } + Operation::Search(search) => match search { + Search::RowidEq { .. } | Search::Seek { index: None, .. } => { + format!( + "SEARCH {} USING INTEGER PRIMARY KEY (rowid=?)", + table_reference.identifier + ) + } + Search::Seek { + index: Some(index), .. + } => { + format!( + "SEARCH {} USING INDEX {}", + table_reference.identifier, index.name + ) + } + }, + } + ); + if let Table::FromClauseSubquery(from_clause_subquery) = &mut table_reference.table { // Emit the subquery and get the start register of the result columns. let result_columns_start = @@ -27,6 +66,8 @@ pub fn emit_subqueries( // as if it were reading from a regular table. from_clause_subquery.result_columns_start_reg = Some(result_columns_start); } + + program.pop_current_parent_explain(); } Ok(()) } diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index 98fa3f526..2ade6b0f4 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -100,7 +100,7 @@ pub struct ProgramBuilder { // Bitmask of cursors that have emitted a SeekRowid instruction. seekrowid_emitted_bitmask: u64, // map of instruction index to manual comment (used in EXPLAIN only) - comments: Option>, + comments: Vec<(InsnReference, &'static str)>, pub parameters: Parameters, pub result_columns: Vec, pub table_references: TableReferences, @@ -114,6 +114,10 @@ pub struct ProgramBuilder { // TODO: when we support multiple dbs, this should be a write mask to track which DBs need to be written txn_mode: TransactionMode, rollback: bool, + /// The mode in which the query is being executed. + query_mode: QueryMode, + /// Current parent explain address, if any. + current_parent_explain_idx: Option, } #[derive(Debug, Clone)] @@ -158,6 +162,18 @@ pub struct ProgramBuilderOpts { pub approx_num_labels: usize, } +/// Use this macro to emit an OP_Explain instruction. +/// Please use this macro instead of calling emit_explain() directly, +/// because we want to avoid allocating a String if we are not in explain mode. +#[macro_export] +macro_rules! emit_explain { + ($builder:expr, $push:expr, $detail:expr) => { + if let QueryMode::ExplainQueryPlan = $builder.get_query_mode() { + $builder.emit_explain($push, $detail); + } + }; +} + impl ProgramBuilder { pub fn new( query_mode: QueryMode, @@ -173,11 +189,7 @@ impl ProgramBuilder { constant_spans: Vec::new(), label_to_resolved_offset: Vec::with_capacity(opts.approx_num_labels), seekrowid_emitted_bitmask: 0, - comments: if let QueryMode::Explain | QueryMode::ExplainQueryPlan = query_mode { - Some(Vec::new()) - } else { - None - }, + comments: Vec::new(), parameters: Parameters::new(), result_columns: Vec::new(), table_references: TableReferences::new(vec![], vec![]), @@ -189,6 +201,8 @@ impl ProgramBuilder { capture_data_changes_mode, txn_mode: TransactionMode::None, rollback: false, + query_mode, + current_parent_explain_idx: None, } } @@ -378,8 +392,40 @@ impl ProgramBuilder { } pub fn add_comment(&mut self, insn_index: BranchOffset, comment: &'static str) { - if let Some(comments) = &mut self.comments { - comments.push((insn_index.as_offset_int(), comment)); + if let QueryMode::Explain | QueryMode::ExplainQueryPlan = self.query_mode { + self.comments.push((insn_index.as_offset_int(), comment)); + } + } + + pub fn get_query_mode(&self) -> QueryMode { + self.query_mode + } + + /// use emit_explain macro instead, because we don't want to allocate + /// String if we are not in explain mode + pub fn emit_explain(&mut self, push: bool, detail: String) { + if let QueryMode::ExplainQueryPlan = self.query_mode { + self.emit_insn(Insn::Explain { + p1: self.insns.len(), + p2: self.current_parent_explain_idx, + detail: detail, + }); + if push { + self.current_parent_explain_idx = Some(self.insns.len() - 1); + } + } + } + + pub fn pop_current_parent_explain(&mut self) { + if let QueryMode::ExplainQueryPlan = self.query_mode { + if let Some(current) = self.current_parent_explain_idx { + let (Insn::Explain { p2, .. }, _, _) = &self.insns[current] else { + unreachable!("current_parent_explain_idx must point to an Explain insn"); + }; + self.current_parent_explain_idx = *p2; + } + } else { + debug_assert!(self.current_parent_explain_idx.is_none()) } } @@ -432,14 +478,44 @@ impl ProgramBuilder { } // Fix comments to refer to new locations - if let Some(comments) = &mut self.comments { - for (old_offset, _) in comments.iter_mut() { - let new_offset = self - .insns - .iter() - .position(|(_, _, index)| *old_offset == *index as u32) - .expect("comment must exist") as u32; - *old_offset = new_offset; + for (old_offset, _) in self.comments.iter_mut() { + let new_offset = self + .insns + .iter() + .position(|(_, _, index)| *old_offset == *index as u32) + .expect("comment must exist") as u32; + *old_offset = new_offset; + } + + if let QueryMode::ExplainQueryPlan = self.query_mode { + self.current_parent_explain_idx = + if let Some(old_parent) = self.current_parent_explain_idx { + self.insns + .iter() + .position(|(_, _, index)| old_parent == *index) + } else { + None + }; + + for i in 0..self.insns.len() { + let (Insn::Explain { p2, .. }, _, _) = &self.insns[i] else { + continue; + }; + + let new_p2 = if p2.is_some() { + self.insns + .iter() + .position(|(_, _, index)| *p2 == Some(*index)) + } else { + None + }; + + let (Insn::Explain { p1, p2, .. }, _, _) = &mut self.insns[i] else { + unreachable!(); + }; + + *p1 = i; + *p2 = new_p2; } } } diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index 47efb692c..9e764a395 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -1723,6 +1723,15 @@ pub fn insn_to_row( 0, format!("if (r[{}] < 0) goto {}", reg, target_pc.as_debug_int()), ), + Insn::Explain { p1, p2, detail } => ( + "Explain", + *p1 as i32, + p2.as_ref().map(|p| *p).unwrap_or(0) as i32, + 0, + Value::build_text(detail.as_str()), + 0, + String::new(), + ), } } diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index ac0564c0c..d9b86a513 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -1086,6 +1086,13 @@ pub enum Insn { reg: usize, target_pc: BranchOffset, }, + + // OP_Explain + Explain { + p1: usize, // P1: address of instruction + p2: Option, // P2: address of parent explain instruction + detail: String, // P4: detail text + }, } impl Insn { @@ -1224,6 +1231,7 @@ impl Insn { Insn::MaxPgcnt { .. } => execute::op_max_pgcnt, Insn::JournalMode { .. } => execute::op_journal_mode, Insn::IfNeg { .. } => execute::op_if_neg, + Insn::Explain { .. } => execute::op_noop, } } } diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 6f28fe745..ad0c0f962 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -447,7 +447,7 @@ pub struct Program { pub max_registers: usize, pub insns: Vec<(Insn, InsnFunction)>, pub cursor_ref: Vec<(Option, CursorType)>, - pub comments: Option>, + pub comments: Vec<(InsnReference, &'static str)>, pub parameters: crate::parameters::Parameters, pub connection: Arc, pub n_change: Cell, @@ -511,13 +511,11 @@ impl Program { let (opcode, p1, p2, p3, p4, p5, comment) = insn_to_row_with_comment( self, current_insn, - self.comments.as_ref().and_then(|comments| { - comments - .iter() - .find(|(offset, _)| *offset == state.pc) - .map(|(_, comment)| comment) - .copied() - }), + self.comments + .iter() + .find(|(offset, _)| *offset == state.pc) + .map(|(_, comment)| comment) + .copied(), ); state.registers[0] = Register::Value(Value::Integer(state.pc as i64)); @@ -903,11 +901,12 @@ fn trace_insn(program: &Program, addr: InsnReference, insn: &Insn) { addr, insn, String::new(), - program.comments.as_ref().and_then(|comments| comments + program + .comments .iter() .find(|(offset, _)| *offset == addr) .map(|(_, comment)| comment) - .copied()) + .copied() ) ); }