From 2066475e03a0fb3421bf8d75a3be6f42606805e3 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Tue, 31 Dec 2024 14:13:11 +0200 Subject: [PATCH 1/7] feat: subqueries in FROM clause --- core/lib.rs | 4 +- core/schema.rs | 12 + core/translate/delete.rs | 22 +- core/translate/emitter.rs | 555 ++++++++++++++++++++++++++++-------- core/translate/expr.rs | 87 ++++-- core/translate/optimizer.rs | 132 +++++++-- core/translate/plan.rs | 131 +++++++-- core/translate/planner.rs | 194 +++++++------ core/translate/select.rs | 48 +++- core/vdbe/mod.rs | 22 +- testing/all.test | 1 + testing/subquery.test | 194 +++++++++++++ 12 files changed, 1092 insertions(+), 310 deletions(-) create mode 100644 testing/subquery.test diff --git a/core/lib.rs b/core/lib.rs index 5ef48e742..2e0b3e301 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -298,8 +298,8 @@ impl Connection { Cmd::ExplainQueryPlan(stmt) => { match stmt { ast::Stmt::Select(select) => { - let plan = prepare_select_plan(&self.schema.borrow(), select)?; - let plan = optimize_plan(plan)?; + let mut plan = prepare_select_plan(&self.schema.borrow(), select)?; + optimize_plan(&mut plan)?; println!("{}", plan); } _ => todo!(), diff --git a/core/schema.rs b/core/schema.rs index b9f9bdccf..b69e18213 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -71,6 +71,14 @@ impl Table { } } + pub fn get_root_page(&self) -> usize { + match self { + Table::BTree(table) => table.root_page, + Table::Index(_) => unimplemented!(), + Table::Pseudo(_) => unimplemented!(), + } + } + pub fn get_name(&self) -> &str { match self { Self::BTree(table) => &table.name, @@ -211,6 +219,10 @@ impl PseudoTable { Self { columns: vec![] } } + pub fn new_with_columns(columns: Vec) -> Self { + Self { columns } + } + pub fn add_column(&mut self, name: &str, ty: Type, primary_key: bool) { self.columns.push(Column { name: normalize_ident(name), diff --git a/core/translate/delete.rs b/core/translate/delete.rs index f655e8c3b..be7dd772a 100644 --- a/core/translate/delete.rs +++ b/core/translate/delete.rs @@ -1,6 +1,7 @@ +use crate::schema::Table; use crate::translate::emitter::emit_program; use crate::translate::optimizer::optimize_plan; -use crate::translate::plan::{BTreeTableReference, DeletePlan, Plan, SourceOperator}; +use crate::translate::plan::{DeletePlan, Plan, SourceOperator}; use crate::translate::planner::{parse_limit, parse_where}; use crate::{schema::Schema, storage::sqlite3_ondisk::DatabaseHeader, vdbe::Program}; use crate::{Connection, Result, SymbolTable}; @@ -8,6 +9,8 @@ use sqlite3_parser::ast::{Expr, Limit, QualifiedName}; use std::rc::Weak; use std::{cell::RefCell, rc::Rc}; +use super::plan::{TableReference, TableReferenceType}; + pub fn translate_delete( schema: &Schema, tbl_name: &QualifiedName, @@ -17,9 +20,9 @@ pub fn translate_delete( connection: Weak, syms: &SymbolTable, ) -> Result { - let delete_plan = prepare_delete_plan(schema, tbl_name, where_clause, limit)?; - let optimized_plan = optimize_plan(delete_plan)?; - emit_program(database_header, optimized_plan, connection, syms) + let mut delete_plan = prepare_delete_plan(schema, tbl_name, where_clause, limit)?; + optimize_plan(&mut delete_plan)?; + emit_program(database_header, delete_plan, connection, syms) } pub fn prepare_delete_plan( @@ -33,15 +36,16 @@ pub fn prepare_delete_plan( None => crate::bail_corrupt_error!("Parse error: no such table: {}", tbl_name), }; - let table_ref = BTreeTableReference { - table: table.clone(), + let btree_table_ref = TableReference { + table: Table::BTree(table.clone()), table_identifier: table.name.clone(), table_index: 0, + reference_type: TableReferenceType::BTreeTable, }; - let referenced_tables = vec![table_ref.clone()]; + let referenced_tables = vec![btree_table_ref.clone()]; // Parse the WHERE clause - let resolved_where_clauses = parse_where(where_clause, &[table_ref.clone()])?; + let resolved_where_clauses = parse_where(where_clause, &referenced_tables)?; // Parse the LIMIT clause let resolved_limit = limit.and_then(parse_limit); @@ -49,7 +53,7 @@ pub fn prepare_delete_plan( let plan = DeletePlan { source: SourceOperator::Scan { id: 0, - table_reference: table_ref.clone(), + table_reference: btree_table_ref, predicates: resolved_where_clauses.clone(), iter_dir: None, }, diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 2cf9dafe4..ad472c2d0 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -20,7 +20,9 @@ use super::expr::{ translate_aggregation, translate_aggregation_groupby, translate_condition_expr, translate_expr, ConditionMetadata, }; -use super::plan::{Aggregate, BTreeTableReference, Direction, GroupBy, SelectPlan}; +use super::plan::{ + Aggregate, Direction, GroupBy, SelectPlan, SelectQueryType, TableReference, TableReferenceType, +}; use super::plan::{ResultSetColumn, SourceOperator}; // Metadata for handling LEFT JOIN operations @@ -77,11 +79,17 @@ pub struct GroupByMetadata { /// generation process. #[derive(Debug)] pub struct Metadata { - // labels for the instructions that terminate the execution when a conditional check evaluates to false. typically jumps to Halt, but can also jump to AggFinal if a parent in the tree is an aggregation + // this stack is generically used for a "jump to the end of the current query phase" purpose in a FIFO manner. + // For example, in a nested loop join, each loop will have a label signifying the end of that particular loop. termination_label_stack: Vec, + // label for the instruction that jumps to the next phase of the query after the main loop + // we don't know ahead of time what that is (GROUP BY, ORDER BY, etc.) + after_main_loop_label: Option, // labels for the instructions that jump to the next row in the current operator. // for example, in a join with two nested scans, the inner loop will jump to its Next instruction when the join condition is false; // in a join with a scan and a seek, the seek will jump to the scan's Next instruction when the join condition is false. + // The difference between next_row_labels and termination_label_stack is that next_row_labels are used to jump to the next row in the + // current loop, whereas termination_label_stack is used to jump OUT of the current loop entirely. next_row_labels: HashMap, // labels for the instructions beginning the inner loop of a scan operator. scan_loop_body_labels: Vec, @@ -93,12 +101,16 @@ pub struct Metadata { left_joins: HashMap, // First register of the aggregation results pub aggregation_start_register: Option, + // First register of the result columns of the query + pub result_column_start_register: Option, // We need to emit result columns in the order they are present in the SELECT, but they may not be in the same order in the ORDER BY sorter. // This vector holds the indexes of the result columns in the ORDER BY sorter. pub result_column_indexes_in_orderby_sorter: HashMap, // We might skip adding a SELECT result column into the ORDER BY sorter if it is an exact match in the ORDER BY keys. // This vector holds the indexes of the result columns that we need to skip. pub result_columns_to_skip_in_orderby_sorter: Option>, + // The register holding the limit value, if any. + pub limit_reg: Option, } /// Used to distinguish database operations @@ -128,14 +140,17 @@ fn prologue() -> Result<(ProgramBuilder, Metadata, BranchOffset, BranchOffset)> let metadata = Metadata { termination_label_stack: vec![halt_label], + after_main_loop_label: None, group_by_metadata: None, left_joins: HashMap::new(), next_row_labels: HashMap::new(), scan_loop_body_labels: vec![], sort_metadata: None, aggregation_start_register: None, + result_column_start_register: None, result_column_indexes_in_orderby_sorter: HashMap::new(), result_columns_to_skip_in_orderby_sorter: None, + limit_reg: None, }; Ok((program, metadata, init_label, start_offset)) @@ -200,80 +215,228 @@ fn emit_program_for_select( } } + // Emit main parts of query + emit_query(&mut program, &mut plan, &mut metadata, syms)?; + + // Finalize program + epilogue(&mut program, &mut metadata, init_label, start_offset)?; + + Ok(program.build(database_header, connection)) +} + +/// Emit the subqueries contained in the FROM clause. +/// This is done first so the results can be read in the main query loop. +fn emit_subqueries( + program: &mut ProgramBuilder, + referenced_tables: &mut [TableReference], + source: &mut SourceOperator, + syms: &SymbolTable, +) -> Result<()> { + match source { + SourceOperator::Subquery { + table_reference, + plan, + .. + } => { + // Emit the subquery and get the start register of the result columns. + let result_columns_start = emit_subquery(program, plan, syms)?; + // Set the result_columns_start_reg in the TableReference object. + // This is done so that translate_expr() can read the result columns of the subquery, + // as if it were reading from a regular table. + let table_ref = referenced_tables + .iter_mut() + .find(|t| t.table_identifier == table_reference.table_identifier) + .unwrap(); + if let TableReferenceType::Subquery { + result_columns_start_reg, + .. + } = &mut table_ref.reference_type + { + *result_columns_start_reg = result_columns_start; + } else { + unreachable!("emit_subqueries called on non-subquery"); + } + Ok(()) + } + SourceOperator::Join { left, right, .. } => { + emit_subqueries(program, referenced_tables, left, syms)?; + emit_subqueries(program, referenced_tables, right, syms)?; + Ok(()) + } + _ => Ok(()), + } +} + +/// Emit a subquery and return the start register of the result columns. +/// This is done by emitting a coroutine that stores the result columns in sequential registers. +/// Each subquery in a FROM clause has its own separate SelectPlan which is wrapped in a coroutine. +/// +/// The resulting bytecode from a subquery is mostly exactly the same as a regular query, except: +/// - it ends in an EndCoroutine instead of a Halt. +/// - instead of emitting ResultRows, the coroutine yields to the main query loop. +/// - the first register of the result columns is returned to the parent query, +/// so that translate_expr() can read the result columns of the subquery, +/// as if it were reading from a regular table. +/// +/// Since a subquery has its own SelectPlan, it can contain nested subqueries, +/// which can contain even more nested subqueries, etc. +fn emit_subquery( + program: &mut ProgramBuilder, + plan: &mut SelectPlan, + syms: &SymbolTable, +) -> Result { + let yield_reg = program.alloc_register(); + let coroutine_implementation_start_offset = program.offset() + 1; + match &mut plan.query_type { + SelectQueryType::Subquery { + yield_reg: y, + coroutine_implementation_start, + } => { + // The parent query will use this register to jump to/from the subquery. + *y = yield_reg; + // The parent query will use this register to reinitialize the coroutine when it needs to run multiple times. + *coroutine_implementation_start = coroutine_implementation_start_offset; + } + _ => unreachable!("emit_subquery called on non-subquery"), + } + let end_coroutine_label = program.allocate_label(); + let mut metadata = Metadata { + // A regular query ends in a Halt, whereas a subquery ends in an EndCoroutine. + termination_label_stack: vec![end_coroutine_label], + after_main_loop_label: None, + group_by_metadata: None, + left_joins: HashMap::new(), + next_row_labels: HashMap::new(), + scan_loop_body_labels: vec![], + sort_metadata: None, + aggregation_start_register: None, + result_column_start_register: None, + result_column_indexes_in_orderby_sorter: HashMap::new(), + result_columns_to_skip_in_orderby_sorter: None, + limit_reg: plan.limit.map(|_| program.alloc_register()), + }; + let subquery_body_end_label = program.allocate_label(); + program.emit_insn_with_label_dependency( + Insn::InitCoroutine { + yield_reg, + jump_on_definition: subquery_body_end_label, + start_offset: coroutine_implementation_start_offset, + }, + subquery_body_end_label, + ); + // Normally we mark each LIMIT value as a constant insn that is emitted only once, but in the case of a subquery, + // we need to initialize it every time the subquery is run; otherwise subsequent runs of the subquery will already + // have the LIMIT counter at 0, and will never return rows. + if let Some(limit) = plan.limit { + program.emit_insn(Insn::Integer { + value: limit as i64, + dest: metadata.limit_reg.unwrap(), + }); + } + let result_column_start_reg = emit_query(program, plan, &mut metadata, syms)?; + program.resolve_label(end_coroutine_label, program.offset()); + program.emit_insn(Insn::EndCoroutine { yield_reg }); + program.resolve_label(subquery_body_end_label, program.offset()); + Ok(result_column_start_reg) +} + +fn emit_query( + program: &mut ProgramBuilder, + plan: &mut SelectPlan, + metadata: &mut Metadata, + syms: &SymbolTable, +) -> Result { + // Emit subqueries first so the results can be read in the main query loop. + emit_subqueries(program, &mut plan.referenced_tables, &mut plan.source, syms)?; + + if metadata.limit_reg.is_none() { + metadata.limit_reg = plan.limit.map(|_| program.alloc_register()); + } + // No rows will be read from source table loops if there is a constant false condition eg. WHERE 0 // however an aggregation might still happen, // e.g. SELECT COUNT(*) WHERE 0 returns a row with 0, not an empty result set - let skip_loops_label = if plan.contains_constant_false_condition { - let skip_loops_label = program.allocate_label(); + let after_main_loop_label = program.allocate_label(); + metadata.after_main_loop_label = Some(after_main_loop_label); + if plan.contains_constant_false_condition { program.emit_insn_with_label_dependency( Insn::Goto { - target_pc: skip_loops_label, + target_pc: after_main_loop_label, }, - skip_loops_label, + after_main_loop_label, ); - Some(skip_loops_label) - } else { - None - }; + } + + // Allocate registers for result columns + metadata.result_column_start_register = + Some(program.alloc_registers(plan.result_columns.len())); // Initialize cursors and other resources needed for query execution if let Some(ref mut order_by) = plan.order_by { - init_order_by(&mut program, order_by, &mut metadata)?; + let orderby_label = program.allocate_label(); + metadata.termination_label_stack.push(orderby_label); + init_order_by(program, order_by, metadata)?; } if let Some(ref mut group_by) = plan.group_by { - init_group_by(&mut program, group_by, &plan.aggregates, &mut metadata)?; + let output_groupby_row_label = program.allocate_label(); + metadata + .termination_label_stack + .push(output_groupby_row_label); + let groupby_end_label = program.allocate_label(); + metadata.termination_label_stack.push(groupby_end_label); + init_group_by(program, group_by, &plan.aggregates, metadata)?; + } else if !plan.aggregates.is_empty() { + let output_aggregation_row_label = program.allocate_label(); + metadata + .termination_label_stack + .push(output_aggregation_row_label); } - init_source( - &mut program, - &plan.source, - &mut metadata, - &OperationMode::SELECT, - )?; + init_source(program, &plan.source, metadata, &OperationMode::SELECT)?; // Set up main query execution loop open_loop( - &mut program, + program, &mut plan.source, &plan.referenced_tables, - &mut metadata, + metadata, syms, )?; // Process result columns and expressions in the inner loop - inner_loop_emit(&mut program, &mut plan, &mut metadata, syms)?; + inner_loop_emit(program, plan, metadata, syms)?; // Clean up and close the main execution loop - close_loop(&mut program, &plan.source, &mut metadata)?; + close_loop(program, &plan.source, metadata)?; - if let Some(skip_loops_label) = skip_loops_label { - program.resolve_label(skip_loops_label, program.offset()); - } + program.resolve_label(after_main_loop_label, program.offset()); let mut order_by_necessary = plan.order_by.is_some() && !plan.contains_constant_false_condition; // Handle GROUP BY and aggregation processing if let Some(ref mut group_by) = plan.group_by { group_by_emit( - &mut program, + program, &plan.result_columns, group_by, plan.order_by.as_ref(), &plan.aggregates, plan.limit, &plan.referenced_tables, - &mut metadata, + metadata, syms, + &plan.query_type, )?; } else if !plan.aggregates.is_empty() { // Handle aggregation without GROUP BY agg_without_group_by_emit( - &mut program, + program, &plan.referenced_tables, &plan.result_columns, &plan.aggregates, - &mut metadata, + metadata, syms, + &plan.query_type, )?; // Single row result for aggregates without GROUP BY, so ORDER BY not needed order_by_necessary = false; @@ -283,19 +446,17 @@ fn emit_program_for_select( if let Some(ref mut order_by) = plan.order_by { if order_by_necessary { order_by_emit( - &mut program, + program, order_by, &plan.result_columns, plan.limit, - &mut metadata, + metadata, + &plan.query_type, )?; } } - // Finalize program - epilogue(&mut program, &mut metadata, init_label, start_offset)?; - - Ok(program.build(database_header, connection)) + Ok(metadata.result_column_start_register.unwrap()) } fn emit_program_for_delete( @@ -307,18 +468,15 @@ fn emit_program_for_delete( let (mut program, mut metadata, init_label, start_offset) = prologue()?; // No rows will be read from source table loops if there is a constant false condition eg. WHERE 0 - let skip_loops_label = if plan.contains_constant_false_condition { - let skip_loops_label = program.allocate_label(); + let after_main_loop_label = program.allocate_label(); + if plan.contains_constant_false_condition { program.emit_insn_with_label_dependency( Insn::Goto { - target_pc: skip_loops_label, + target_pc: after_main_loop_label, }, - skip_loops_label, + after_main_loop_label, ); - Some(skip_loops_label) - } else { - None - }; + } // Initialize cursors and other resources needed for query execution init_source( @@ -342,9 +500,7 @@ fn emit_program_for_delete( // Clean up and close the main execution loop close_loop(&mut program, &plan.source, &mut metadata)?; - if let Some(skip_loops_label) = skip_loops_label { - program.resolve_label(skip_loops_label, program.offset()); - } + program.resolve_label(after_main_loop_label, program.offset()); // Finalize program epilogue(&mut program, &mut metadata, init_label, start_offset)?; @@ -358,9 +514,6 @@ fn init_order_by( order_by: &[(ast::Expr, Direction)], metadata: &mut Metadata, ) -> Result<()> { - metadata - .termination_label_stack - .push(program.allocate_label()); let sort_cursor = program.alloc_cursor_id(None, None); metadata.sort_metadata = Some(SortMetadata { sort_cursor, @@ -385,8 +538,6 @@ fn init_group_by( aggregates: &[Aggregate], metadata: &mut Metadata, ) -> Result<()> { - let agg_final_label = program.allocate_label(); - metadata.termination_label_stack.push(agg_final_label); let num_aggs = aggregates.len(); let sort_cursor = program.alloc_cursor_id(None, None); @@ -468,6 +619,11 @@ fn init_source( mode: &OperationMode, ) -> Result<()> { match source { + SourceOperator::Subquery { id, .. } => { + let next_row_label = program.allocate_label(); + metadata.next_row_labels.insert(*id, next_row_label); + Ok(()) + } SourceOperator::Join { id, left, @@ -495,9 +651,9 @@ fn init_source( } => { let cursor_id = program.alloc_cursor_id( Some(table_reference.table_identifier.clone()), - Some(Table::BTree(table_reference.table.clone())), + Some(table_reference.table.clone()), ); - let root_page = table_reference.table.root_page; + let root_page = table_reference.table.get_root_page(); let next_row_label = program.allocate_label(); metadata.next_row_labels.insert(*id, next_row_label); @@ -531,7 +687,7 @@ fn init_source( } => { let table_cursor_id = program.alloc_cursor_id( Some(table_reference.table_identifier.clone()), - Some(Table::BTree(table_reference.table.clone())), + Some(table_reference.table.clone()), ); let next_row_label = program.allocate_label(); @@ -542,14 +698,14 @@ fn init_source( OperationMode::SELECT => { program.emit_insn(Insn::OpenReadAsync { cursor_id: table_cursor_id, - root_page: table_reference.table.root_page, + root_page: table_reference.table.get_root_page(), }); program.emit_insn(Insn::OpenReadAwait {}); } OperationMode::DELETE => { program.emit_insn(Insn::OpenWriteAsync { cursor_id: table_cursor_id, - root_page: table_reference.table.root_page, + root_page: table_reference.table.get_root_page(), }); program.emit_insn(Insn::OpenWriteAwait {}); } @@ -595,11 +751,82 @@ fn init_source( fn open_loop( program: &mut ProgramBuilder, source: &mut SourceOperator, - referenced_tables: &[BTreeTableReference], + referenced_tables: &[TableReference], metadata: &mut Metadata, syms: &SymbolTable, ) -> Result<()> { + metadata + .termination_label_stack + .push(program.allocate_label()); match source { + SourceOperator::Subquery { + id, + predicates, + plan, + .. + } => { + let (yield_reg, coroutine_implementation_start) = match &plan.query_type { + SelectQueryType::Subquery { + yield_reg, + coroutine_implementation_start, + } => (*yield_reg, *coroutine_implementation_start), + _ => unreachable!("Subquery operator with non-subquery query type"), + }; + // In case the subquery is an inner loop, it needs to be reinitialized on each iteration of the outer loop. + program.emit_insn(Insn::InitCoroutine { + yield_reg, + jump_on_definition: 0, + start_offset: coroutine_implementation_start, + }); + let loop_body_start_label = program.allocate_label(); + metadata.scan_loop_body_labels.push(loop_body_start_label); + program.defer_label_resolution(loop_body_start_label, program.offset() as usize); + // A subquery within the main loop of a parent query has no cursor, so instead of advancing the cursor, + // it emits a Yield which jumps back to the main loop of the subquery itself to retrieve the next row. + // When the subquery coroutine completes, this instruction jumps to the label at the top of the termination_label_stack, + // which in this case is the end of the Yield-Goto loop in the parent query. + let end_of_loop_label = *metadata.termination_label_stack.last().unwrap(); + program.emit_insn_with_label_dependency( + Insn::Yield { + yield_reg, + end_offset: end_of_loop_label, + }, + end_of_loop_label, + ); + + // In case we have predicates on the subquery results that evaluate to false, + // (e.g. SELECT foo FROM (SELECT bar as foo FROM t1) sub WHERE sub.foo > 10) + // we jump to the Goto instruction below to move on to the next row from the subquery. + let jump_label = metadata + .next_row_labels + .get(id) + .expect("subquery has no next row label"); + + // These are predicates evaluated outside of the subquery, + // so they are translated here. + // E.g. SELECT foo FROM (SELECT bar as foo FROM t1) sub WHERE sub.foo > 10 + if let Some(preds) = predicates { + for expr in preds { + let jump_target_when_true = program.allocate_label(); + let condition_metadata = ConditionMetadata { + jump_if_condition_is_true: false, + jump_target_when_true, + jump_target_when_false: *jump_label, + }; + translate_condition_expr( + program, + referenced_tables, + expr, + condition_metadata, + None, + syms, + )?; + program.resolve_label(jump_target_when_true, program.offset()); + } + } + + Ok(()) + } SourceOperator::Join { id, left, @@ -613,8 +840,7 @@ fn open_loop( let mut jump_target_when_false = *metadata .next_row_labels .get(&right.id()) - .or(metadata.next_row_labels.get(&left.id())) - .unwrap_or(metadata.termination_label_stack.last().unwrap()); + .expect("right side of join has no next row label"); if *outer { let lj_meta = metadata.left_joins.get(id).unwrap(); @@ -680,7 +906,9 @@ fn open_loop( program.emit_insn(Insn::RewindAsync { cursor_id }); } let scan_loop_body_label = program.allocate_label(); - let halt_label = metadata.termination_label_stack.last().unwrap(); + + // If the table this cursor is scanning is entirely empty, we exit this loop entirely. + let end_of_loop_label = metadata.termination_label_stack.last().unwrap(); program.emit_insn_with_label_dependency( if iter_dir .as_ref() @@ -688,20 +916,23 @@ fn open_loop( { Insn::LastAwait { cursor_id, - pc_if_empty: *halt_label, + pc_if_empty: *end_of_loop_label, } } else { Insn::RewindAwait { cursor_id, - pc_if_empty: *halt_label, + pc_if_empty: *end_of_loop_label, } }, - *halt_label, + *end_of_loop_label, ); metadata.scan_loop_body_labels.push(scan_loop_body_label); program.defer_label_resolution(scan_loop_body_label, program.offset() as usize); - let jump_label = metadata.next_row_labels.get(id).unwrap_or(halt_label); + let jump_label = metadata + .next_row_labels + .get(id) + .expect("scan has no next row label"); if let Some(preds) = predicates { for expr in preds { let jump_target_when_true = program.allocate_label(); @@ -772,6 +1003,8 @@ fn open_loop( } _ => unreachable!(), } + // If we try to seek to a key that is not present in the table/index, we exit the loop entirely. + let end_of_loop_label = *metadata.termination_label_stack.last().unwrap(); program.emit_insn_with_label_dependency( match cmp_op { ast::Operator::Equals | ast::Operator::GreaterEquals => Insn::SeekGE { @@ -779,7 +1012,7 @@ fn open_loop( cursor_id: index_cursor_id.unwrap_or(table_cursor_id), start_reg: cmp_reg, num_regs: 1, - target_pc: *metadata.termination_label_stack.last().unwrap(), + target_pc: end_of_loop_label, }, ast::Operator::Greater | ast::Operator::Less @@ -788,11 +1021,11 @@ fn open_loop( cursor_id: index_cursor_id.unwrap_or(table_cursor_id), start_reg: cmp_reg, num_regs: 1, - target_pc: *metadata.termination_label_stack.last().unwrap(), + target_pc: end_of_loop_label, }, _ => unreachable!(), }, - *metadata.termination_label_stack.last().unwrap(), + end_of_loop_label, ); if *cmp_op == ast::Operator::Less || *cmp_op == ast::Operator::LessEquals { translate_expr( @@ -819,7 +1052,7 @@ fn open_loop( let abort_jump_target = *metadata .next_row_labels .get(id) - .unwrap_or(metadata.termination_label_stack.last().unwrap()); + .expect("search operator has no next row label"); match cmp_op { ast::Operator::Equals | ast::Operator::LessEquals => { if let Some(index_cursor_id) = index_cursor_id { @@ -938,7 +1171,7 @@ fn open_loop( /// - a GROUP BY sorter (grouping is done by sorting based on the GROUP BY keys and aggregating while the GROUP BY keys match) /// - an ORDER BY sorter (when there is no GROUP BY, but there is an ORDER BY) /// - an AggStep (the columns are collected for aggregation, which is finished later) -/// - a ResultRow (there is none of the above, so the loop emits a result row directly) +/// - a QueryResult (there is none of the above, so the loop either emits a ResultRow, or if it's a subquery, yields to the parent query) pub enum InnerLoopEmitTarget<'a> { GroupBySorter { group_by: &'a GroupBy, @@ -948,7 +1181,8 @@ pub enum InnerLoopEmitTarget<'a> { order_by: &'a Vec<(ast::Expr, Direction)>, }, AggStep, - ResultRow { + QueryResult { + query_type: &'a SelectQueryType, limit: Option, }, } @@ -1007,7 +1241,10 @@ fn inner_loop_emit( &plan.result_columns, &plan.aggregates, metadata, - InnerLoopEmitTarget::ResultRow { limit: plan.limit }, + InnerLoopEmitTarget::QueryResult { + query_type: &plan.query_type, + limit: plan.limit, + }, &plan.referenced_tables, syms, ) @@ -1022,7 +1259,7 @@ fn inner_loop_source_emit( aggregates: &[Aggregate], metadata: &mut Metadata, emit_target: InnerLoopEmitTarget, - referenced_tables: &[BTreeTableReference], + referenced_tables: &[TableReference], syms: &SymbolTable, ) -> Result<()> { match emit_target { @@ -1087,8 +1324,6 @@ fn inner_loop_source_emit( Ok(()) } InnerLoopEmitTarget::AggStep => { - let agg_final_label = program.allocate_label(); - metadata.termination_label_stack.push(agg_final_label); let num_aggs = aggregates.len(); let start_reg = program.alloc_registers(num_aggs); metadata.aggregation_start_register = Some(start_reg); @@ -1113,7 +1348,7 @@ fn inner_loop_source_emit( } Ok(()) } - InnerLoopEmitTarget::ResultRow { limit } => { + InnerLoopEmitTarget::QueryResult { query_type, limit } => { assert!( aggregates.is_empty(), "We should not get here with aggregates" @@ -1122,9 +1357,17 @@ fn inner_loop_source_emit( program, referenced_tables, result_columns, + metadata.result_column_start_register.unwrap(), None, - limit.map(|l| (l, *metadata.termination_label_stack.last().unwrap())), + limit.map(|l| { + ( + l, + metadata.limit_reg.unwrap(), + metadata.after_main_loop_label.unwrap(), + ) + }), syms, + query_type, )?; Ok(()) @@ -1141,6 +1384,25 @@ fn close_loop( metadata: &mut Metadata, ) -> Result<()> { match source { + SourceOperator::Subquery { id, .. } => { + program.resolve_label( + *metadata + .next_row_labels + .get(id) + .expect("subquery has no next row label"), + program.offset(), + ); + let jump_label = metadata.scan_loop_body_labels.pop().unwrap(); + // A subquery has no cursor to call NextAsync on, so it just emits a Goto + // to the Yield instruction, which in turn jumps back to the main loop of the subquery, + // so that the next row from the subquery can be read. + program.emit_insn_with_label_dependency( + Insn::Goto { + target_pc: jump_label, + }, + jump_label, + ); + } SourceOperator::Join { id, left, @@ -1196,8 +1458,6 @@ fn close_loop( } close_loop(program, left, metadata)?; - - Ok(()) } SourceOperator::Scan { id, @@ -1237,7 +1497,6 @@ fn close_loop( jump_label, ); } - Ok(()) } SourceOperator::Search { id, @@ -1267,11 +1526,13 @@ fn close_loop( }, jump_label, ); - - Ok(()) } - SourceOperator::Nothing => Ok(()), - } + SourceOperator::Nothing => {} + }; + + let end_of_loop_label = metadata.termination_label_stack.pop().unwrap(); + program.resolve_label(end_of_loop_label, program.offset()); + Ok(()) } fn emit_delete_insns( @@ -1312,16 +1573,12 @@ fn emit_delete_insns( dest: limit_reg, }); program.mark_last_insn_constant(); - let jump_label_on_limit_reached = metadata - .termination_label_stack - .last() - .expect("termination_label_stack should not be empty."); program.emit_insn_with_label_dependency( Insn::DecrJumpZero { reg: limit_reg, - target_pc: *jump_label_on_limit_reached, + target_pc: metadata.after_main_loop_label.unwrap(), }, - *jump_label_on_limit_reached, + metadata.after_main_loop_label.unwrap(), ) } @@ -1339,9 +1596,10 @@ fn group_by_emit( order_by: Option<&Vec<(ast::Expr, Direction)>>, aggregates: &[Aggregate], limit: Option, - referenced_tables: &[BTreeTableReference], + referenced_tables: &[TableReference], metadata: &mut Metadata, syms: &SymbolTable, + query_type: &SelectQueryType, ) -> Result<()> { let sort_loop_start_label = program.allocate_label(); let grouping_done_label = program.allocate_label(); @@ -1360,7 +1618,6 @@ fn group_by_emit( sorter_key_register, .. } = *group_by_metadata; - let halt_label = *metadata.termination_label_stack.first().unwrap(); // all group by columns and all arguments of agg functions are in the sorter. // the sort keys are the group by columns (the aggregation within groups is done based on how long the sort keys remain the same) @@ -1460,14 +1717,25 @@ fn group_by_emit( subroutine_accumulator_output_label, ); + let group_by_end_idx = { + assert!(metadata.termination_label_stack.len() >= 2); + // The reason we take the 2nd-to-last label on the stack is because the top of the stack jumps to + // the group by output row subroutine (i.e. emit row for a single group), whereas + // the 2nd-to-last label on the stack jumps to the end of the entire group by routine. + metadata.termination_label_stack.len() - 2 + }; + let group_by_end_label = *metadata + .termination_label_stack + .get(group_by_end_idx) + .unwrap(); program.add_comment(program.offset(), "check abort flag"); program.emit_insn_with_label_dependency( Insn::IfPos { reg: abort_flag_register, - target_pc: halt_label, + target_pc: group_by_end_label, decrement_by: 0, }, - metadata.termination_label_stack[0], + group_by_end_label, ); program.add_comment(program.offset(), "goto clear accumulator subroutine"); @@ -1549,13 +1817,11 @@ fn group_by_emit( ); program.add_comment(program.offset(), "group by finished"); - let termination_label = - metadata.termination_label_stack[metadata.termination_label_stack.len() - 2]; program.emit_insn_with_label_dependency( Insn::Goto { - target_pc: termination_label, + target_pc: group_by_end_label, }, - termination_label, + group_by_end_label, ); program.emit_insn(Insn::Integer { value: 1, @@ -1571,14 +1837,14 @@ fn group_by_emit( ); program.add_comment(program.offset(), "output group by row subroutine start"); - let termination_label = *metadata.termination_label_stack.last().unwrap(); + let output_group_by_row_label = metadata.termination_label_stack.pop().unwrap(); program.emit_insn_with_label_dependency( Insn::IfPos { reg: group_by_metadata.data_in_accumulator_indicator_register, - target_pc: termination_label, + target_pc: output_group_by_row_label, decrement_by: 0, }, - termination_label, + output_group_by_row_label, ); let group_by_end_without_emitting_row_label = program.allocate_label(); program.defer_label_resolution( @@ -1590,10 +1856,8 @@ fn group_by_emit( }); let agg_start_reg = metadata.aggregation_start_register.unwrap(); - program.resolve_label( - metadata.termination_label_stack.pop().unwrap(), - program.offset(), - ); + // Resolve the label for the start of the group by output row subroutine + program.resolve_label(output_group_by_row_label, program.offset()); for (i, agg) in aggregates.iter().enumerate() { let agg_result_reg = agg_start_reg + i; program.emit_insn(Insn::AggFinal { @@ -1638,9 +1902,17 @@ fn group_by_emit( program, referenced_tables, result_columns, + metadata.result_column_start_register.unwrap(), Some(&precomputed_exprs_to_register), - limit.map(|l| (l, *metadata.termination_label_stack.last().unwrap())), + limit.map(|l| { + ( + l, + metadata.limit_reg.unwrap(), + *metadata.termination_label_stack.last().unwrap(), + ) + }), syms, + query_type, )?; } Some(order_by) => { @@ -1680,6 +1952,13 @@ fn group_by_emit( return_reg: group_by_metadata.subroutine_accumulator_clear_return_offset_register, }); + assert!( + metadata.termination_label_stack.len() >= 2, + "termination_label_stack should have at least 2 elements" + ); + let group_by_end_label = metadata.termination_label_stack.pop().unwrap(); + program.resolve_label(group_by_end_label, program.offset()); + Ok(()) } @@ -1688,12 +1967,21 @@ fn group_by_emit( /// and we can now materialize the aggregate results. fn agg_without_group_by_emit( program: &mut ProgramBuilder, - referenced_tables: &[BTreeTableReference], + referenced_tables: &[TableReference], result_columns: &[ResultSetColumn], aggregates: &[Aggregate], metadata: &mut Metadata, syms: &SymbolTable, + query_type: &SelectQueryType, ) -> Result<()> { + // Resolve the label for the start of the aggregation phase + program.resolve_label( + metadata + .termination_label_stack + .pop() + .expect("termination_label_stack should not be empty"), + program.offset(), + ); let agg_start_reg = metadata.aggregation_start_register.unwrap(); for (i, agg) in aggregates.iter().enumerate() { let agg_result_reg = agg_start_reg + i; @@ -1716,9 +2004,11 @@ fn agg_without_group_by_emit( program, referenced_tables, result_columns, + metadata.result_column_start_register.unwrap(), Some(&precomputed_exprs_to_register), None, syms, + query_type, )?; Ok(()) @@ -1733,6 +2023,7 @@ fn order_by_emit( result_columns: &[ResultSetColumn], limit: Option, metadata: &mut Metadata, + query_type: &SelectQueryType, ) -> Result<()> { let sort_loop_start_label = program.allocate_label(); let sort_loop_end_label = program.allocate_label(); @@ -1804,7 +2095,7 @@ fn order_by_emit( // We emit the columns in SELECT order, not sorter order (sorter always has the sort keys first). // This is tracked in m.result_column_indexes_in_orderby_sorter. let cursor_id = pseudo_cursor; - let start_reg = program.alloc_registers(result_columns.len()); + let start_reg = metadata.result_column_start_register.unwrap(); for i in 0..result_columns.len() { let reg = start_reg + i; program.emit_insn(Insn::Column { @@ -1813,11 +2104,13 @@ fn order_by_emit( dest: reg, }); } + emit_result_row_and_limit( program, start_reg, result_columns.len(), - limit.map(|l| (l, sort_loop_end_label)), + limit.map(|l| (l, metadata.limit_reg.unwrap(), sort_loop_end_label)), + query_type, )?; program.emit_insn_with_label_dependency( @@ -1833,19 +2126,32 @@ fn order_by_emit( Ok(()) } -/// Emits the bytecode for: result row and limit. +/// Emits the bytecode for: +/// - result row (or if a subquery, yields to the parent query) +/// - limit fn emit_result_row_and_limit( program: &mut ProgramBuilder, start_reg: usize, result_columns_len: usize, - limit: Option<(usize, BranchOffset)>, + limit: Option<(usize, usize, BranchOffset)>, + query_type: &SelectQueryType, ) -> Result<()> { - program.emit_insn(Insn::ResultRow { - start_reg, - count: result_columns_len, - }); - if let Some((limit, jump_label_on_limit_reached)) = limit { - let limit_reg = program.alloc_register(); + match query_type { + SelectQueryType::TopLevel => { + program.emit_insn(Insn::ResultRow { + start_reg, + count: result_columns_len, + }); + } + SelectQueryType::Subquery { yield_reg, .. } => { + program.emit_insn(Insn::Yield { + yield_reg: *yield_reg, + end_offset: 0, + }); + } + } + + if let Some((limit, limit_reg, jump_label_on_limit_reached)) = limit { program.emit_insn(Insn::Integer { value: limit as i64, dest: limit_reg, @@ -1862,16 +2168,21 @@ fn emit_result_row_and_limit( Ok(()) } -/// Emits the bytecode for: all result columns, result row, and limit. +/// Emits the bytecode for: +/// - all result columns +/// - result row (or if a subquery, yields to the parent query) +/// - limit fn emit_select_result( program: &mut ProgramBuilder, - referenced_tables: &[BTreeTableReference], + referenced_tables: &[TableReference], result_columns: &[ResultSetColumn], + result_column_start_register: usize, precomputed_exprs_to_register: Option<&Vec<(&ast::Expr, usize)>>, - limit: Option<(usize, BranchOffset)>, + limit: Option<(usize, usize, BranchOffset)>, syms: &SymbolTable, + query_type: &SelectQueryType, ) -> Result<()> { - let start_reg = program.alloc_registers(result_columns.len()); + let start_reg = result_column_start_register; for (i, rc) in result_columns.iter().enumerate() { let reg = start_reg + i; translate_expr( @@ -1883,7 +2194,7 @@ fn emit_select_result( syms, )?; } - emit_result_row_and_limit(program, start_reg, result_columns.len(), limit)?; + emit_result_row_and_limit(program, start_reg, result_columns.len(), limit, query_type)?; Ok(()) } @@ -1910,7 +2221,7 @@ fn sorter_insert( /// Emits the bytecode for inserting a row into an ORDER BY sorter. fn order_by_sorter_insert( program: &mut ProgramBuilder, - referenced_tables: &[BTreeTableReference], + referenced_tables: &[TableReference], order_by: &[(ast::Expr, Direction)], result_columns: &[ResultSetColumn], result_column_indexes_in_orderby_sorter: &mut HashMap, diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 6b12c24e6..bd8d9c94e 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -10,7 +10,7 @@ use crate::util::{exprs_are_equivalent, normalize_ident}; use crate::vdbe::{builder::ProgramBuilder, insn::Insn, BranchOffset}; use crate::{Result, SymbolTable}; -use super::plan::{Aggregate, BTreeTableReference}; +use super::plan::{Aggregate, TableReference, TableReferenceType}; #[derive(Default, Debug, Clone, Copy)] pub struct ConditionMetadata { @@ -21,7 +21,7 @@ pub struct ConditionMetadata { pub fn translate_condition_expr( program: &mut ProgramBuilder, - referenced_tables: &[BTreeTableReference], + referenced_tables: &[TableReference], expr: &ast::Expr, condition_metadata: ConditionMetadata, precomputed_exprs_to_registers: Option<&Vec<(&ast::Expr, usize)>>, @@ -562,7 +562,7 @@ pub fn translate_condition_expr( pub fn translate_expr( program: &mut ProgramBuilder, - referenced_tables: Option<&[BTreeTableReference]>, + referenced_tables: Option<&[TableReference]>, expr: &ast::Expr, target_register: usize, precomputed_exprs_to_registers: Option<&Vec<(&ast::Expr, usize)>>, @@ -1962,22 +1962,41 @@ pub fn translate_expr( is_rowid_alias, } => { let tbl_ref = referenced_tables.as_ref().unwrap().get(*table).unwrap(); - let cursor_id = program.resolve_cursor_id(&tbl_ref.table_identifier); - if *is_rowid_alias { - program.emit_insn(Insn::RowId { - cursor_id, - dest: target_register, - }); - } else { - program.emit_insn(Insn::Column { - cursor_id, - column: *column, - dest: target_register, - }); + match tbl_ref.reference_type { + // If we are reading a column from a table, we find the cursor that corresponds to + // the table and read the column from the cursor. + TableReferenceType::BTreeTable => { + let cursor_id = program.resolve_cursor_id(&tbl_ref.table_identifier); + if *is_rowid_alias { + program.emit_insn(Insn::RowId { + cursor_id, + dest: target_register, + }); + } else { + program.emit_insn(Insn::Column { + cursor_id, + column: *column, + dest: target_register, + }); + } + let column = tbl_ref.table.get_column_at(*column); + maybe_apply_affinity(column.ty, target_register, program); + Ok(target_register) + } + // If we are reading a column from a subquery, we instead copy the column from the + // subquery's result registers. + TableReferenceType::Subquery { + result_columns_start_reg, + .. + } => { + program.emit_insn(Insn::Copy { + src_reg: result_columns_start_reg + *column, + dst_reg: target_register, + amount: 0, + }); + Ok(target_register) + } } - let column = &tbl_ref.table.columns[*column]; - maybe_apply_affinity(column.ty, target_register, program); - Ok(target_register) } ast::Expr::InList { .. } => todo!(), ast::Expr::InSelect { .. } => todo!(), @@ -2170,7 +2189,7 @@ pub fn translate_expr( fn translate_variable_sized_function_parameter_list( program: &mut ProgramBuilder, args: &Option>, - referenced_tables: Option<&[BTreeTableReference]>, + referenced_tables: Option<&[TableReference]>, precomputed_exprs_to_registers: Option<&Vec<(&ast::Expr, usize)>>, syms: &SymbolTable, ) -> Result { @@ -2223,7 +2242,7 @@ pub fn maybe_apply_affinity(col_type: Type, target_register: usize, program: &mu pub fn translate_aggregation( program: &mut ProgramBuilder, - referenced_tables: &[BTreeTableReference], + referenced_tables: &[TableReference], agg: &Aggregate, target_register: usize, syms: &SymbolTable, @@ -2408,7 +2427,7 @@ pub fn translate_aggregation( pub fn translate_aggregation_groupby( program: &mut ProgramBuilder, - referenced_tables: &[BTreeTableReference], + referenced_tables: &[TableReference], group_by_sorter_cursor_id: usize, cursor_index: usize, agg: &Aggregate, @@ -2585,3 +2604,29 @@ pub fn translate_aggregation_groupby( }; Ok(dest) } + +/// Get an appropriate name for an expression. +/// If the query provides an alias (e.g. `SELECT a AS b FROM t`), use that (e.g. `b`). +/// If the expression is a column from a table, use the column name (e.g. `a`). +/// Otherwise we just use a generic fallback name (e.g. `expr_`). +pub fn get_name( + maybe_alias: Option<&ast::As>, + expr: &ast::Expr, + referenced_tables: &[TableReference], + fallback: impl Fn() -> String, +) -> String { + let alias = maybe_alias.map(|a| match a { + ast::As::As(id) => id.0.clone(), + ast::As::Elided(id) => id.0.clone(), + }); + if let Some(alias) = alias { + return alias; + } + match expr { + ast::Expr::Column { table, column, .. } => { + let table_ref = referenced_tables.get(*table).unwrap(); + table_ref.table.get_column_at(*column).name.clone() + } + _ => fallback(), + } +} diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 217241f2a..2e0f142a9 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -5,14 +5,15 @@ use sqlite3_parser::ast; use crate::{schema::Index, Result}; use super::plan::{ - get_table_ref_bitmask_for_ast_expr, get_table_ref_bitmask_for_operator, BTreeTableReference, - DeletePlan, Direction, IterationDirection, Plan, Search, SelectPlan, SourceOperator, + get_table_ref_bitmask_for_ast_expr, get_table_ref_bitmask_for_operator, DeletePlan, Direction, + IterationDirection, Plan, Search, SelectPlan, SourceOperator, TableReference, + TableReferenceType, }; -pub fn optimize_plan(plan: Plan) -> Result { +pub fn optimize_plan(plan: &mut Plan) -> Result<()> { match plan { - Plan::Select(plan) => optimize_select_plan(plan).map(Plan::Select), - Plan::Delete(plan) => optimize_delete_plan(plan).map(Plan::Delete), + Plan::Select(plan) => optimize_select_plan(plan), + Plan::Delete(plan) => optimize_delete_plan(plan), } } @@ -21,13 +22,14 @@ pub fn optimize_plan(plan: Plan) -> Result { * TODO: these could probably be done in less passes, * but having them separate makes them easier to understand */ -fn optimize_select_plan(mut plan: SelectPlan) -> Result { +fn optimize_select_plan(plan: &mut SelectPlan) -> Result<()> { + optimize_subqueries(&mut plan.source)?; eliminate_between(&mut plan.source, &mut plan.where_clause)?; if let ConstantConditionEliminationResult::ImpossibleCondition = eliminate_constants(&mut plan.source, &mut plan.where_clause)? { plan.contains_constant_false_condition = true; - return Ok(plan); + return Ok(()); } push_predicates( @@ -49,16 +51,16 @@ fn optimize_select_plan(mut plan: SelectPlan) -> Result { &plan.available_indexes, )?; - Ok(plan) + Ok(()) } -fn optimize_delete_plan(mut plan: DeletePlan) -> Result { +fn optimize_delete_plan(plan: &mut DeletePlan) -> Result<()> { eliminate_between(&mut plan.source, &mut plan.where_clause)?; if let ConstantConditionEliminationResult::ImpossibleCondition = eliminate_constants(&mut plan.source, &mut plan.where_clause)? { plan.contains_constant_false_condition = true; - return Ok(plan); + return Ok(()); } use_indexes( @@ -67,13 +69,28 @@ fn optimize_delete_plan(mut plan: DeletePlan) -> Result { &plan.available_indexes, )?; - Ok(plan) + Ok(()) +} + +fn optimize_subqueries(operator: &mut SourceOperator) -> Result<()> { + match operator { + SourceOperator::Subquery { plan, .. } => { + optimize_select_plan(&mut *plan)?; + Ok(()) + } + SourceOperator::Join { left, right, .. } => { + optimize_subqueries(left)?; + optimize_subqueries(right)?; + Ok(()) + } + _ => Ok(()), + } } fn _operator_is_already_ordered_by( operator: &mut SourceOperator, key: &mut ast::Expr, - referenced_tables: &[BTreeTableReference], + referenced_tables: &[TableReference], available_indexes: &Vec>, ) -> Result { match operator { @@ -109,7 +126,7 @@ fn _operator_is_already_ordered_by( fn eliminate_unnecessary_orderby( operator: &mut SourceOperator, order_by: &mut Option>, - referenced_tables: &[BTreeTableReference], + referenced_tables: &[TableReference], available_indexes: &Vec>, ) -> Result<()> { if order_by.is_none() { @@ -141,10 +158,11 @@ fn eliminate_unnecessary_orderby( */ fn use_indexes( operator: &mut SourceOperator, - referenced_tables: &[BTreeTableReference], + referenced_tables: &[TableReference], available_indexes: &[Rc], ) -> Result<()> { match operator { + SourceOperator::Subquery { .. } => Ok(()), SourceOperator::Search { .. } => Ok(()), SourceOperator::Scan { table_reference, @@ -161,10 +179,7 @@ fn use_indexes( let f = fs[i].take_ownership(); let table_index = referenced_tables .iter() - .position(|t| { - Rc::ptr_eq(&t.table, &table_reference.table) - && t.table_identifier == table_reference.table_identifier - }) + .position(|t| t.table_identifier == table_reference.table_identifier) .unwrap(); match try_extract_index_search_expression( f, @@ -229,6 +244,7 @@ fn eliminate_constants( } } match operator { + SourceOperator::Subquery { .. } => Ok(ConstantConditionEliminationResult::Continue), SourceOperator::Join { left, right, @@ -334,7 +350,7 @@ fn eliminate_constants( fn push_predicates( operator: &mut SourceOperator, where_clause: &mut Option>, - referenced_tables: &Vec, + referenced_tables: &Vec, ) -> Result<()> { // First try to push down any predicates from the WHERE clause if let Some(predicates) = where_clause { @@ -357,6 +373,7 @@ fn push_predicates( } match operator { + SourceOperator::Subquery { .. } => Ok(()), SourceOperator::Join { left, right, @@ -424,9 +441,64 @@ fn push_predicates( fn push_predicate( operator: &mut SourceOperator, predicate: ast::Expr, - referenced_tables: &Vec, + referenced_tables: &Vec, ) -> Result> { match operator { + SourceOperator::Subquery { + predicates, + table_reference, + .. + } => { + // **TODO**: we are currently just evaluating the predicate after the subquery yields, + // and not trying to do anythign more sophisticated. + // E.g. literally: SELECT * FROM (SELECT * FROM t1) sub WHERE sub.col = 'foo' + // + // It is possible, and not overly difficult, to determine that we can also push the + // predicate into the subquery coroutine itself before it yields. The above query would + // effectively become: SELECT * FROM (SELECT * FROM t1 WHERE col = 'foo') sub + // + // This matters more in cases where the subquery builds some kind of sorter/index in memory + // (or on disk) and in those cases pushing the predicate down to the coroutine will make the + // subquery produce less intermediate data. In cases where no intermediate data structures are + // built, it doesn't matter. + // + // Moreover, in many cases the subquery can even be completely eliminated, e.g. the above original + // query would become: SELECT * FROM t1 WHERE col = 'foo' without the subquery. + // **END TODO** + + // Find position of this subquery in referenced_tables array + let subquery_index = referenced_tables + .iter() + .position(|t| { + t.table_identifier == table_reference.table_identifier + && matches!(t.reference_type, TableReferenceType::Subquery { .. }) + }) + .unwrap(); + + // Get bitmask showing which tables this predicate references + let predicate_bitmask = + get_table_ref_bitmask_for_ast_expr(referenced_tables, &predicate)?; + + // Each table has a bit position based on join order from left to right + // e.g. in SELECT * FROM t1 JOIN t2 JOIN t3 + // t1 is position 0 (001), t2 is position 1 (010), t3 is position 2 (100) + // To push a predicate to a given table, it can only reference that table and tables to its left + // Example: For table t2 at position 1 (bit 010): + // - Can push: 011 (t2 + t1), 001 (just t1), 010 (just t2) + // - Can't push: 110 (t2 + t3) + let next_table_on_the_right_in_join_bitmask = 1 << (subquery_index + 1); + if predicate_bitmask >= next_table_on_the_right_in_join_bitmask { + return Ok(Some(predicate)); + } + + if predicates.is_none() { + predicates.replace(vec![predicate]); + } else { + predicates.as_mut().unwrap().push(predicate); + } + + Ok(None) + } SourceOperator::Scan { predicates, table_reference, @@ -435,7 +507,10 @@ fn push_predicate( // Find position of this table in referenced_tables array let table_index = referenced_tables .iter() - .position(|t| t.table_identifier == table_reference.table_identifier) + .position(|t| { + t.table_identifier == table_reference.table_identifier + && t.reference_type == TableReferenceType::BTreeTable + }) .unwrap(); // Get bitmask showing which tables this predicate references @@ -595,7 +670,7 @@ pub trait Optimizable { fn check_index_scan( &mut self, table_index: usize, - referenced_tables: &[BTreeTableReference], + referenced_tables: &[TableReference], available_indexes: &[Rc], ) -> Result>; } @@ -614,7 +689,7 @@ impl Optimizable for ast::Expr { fn check_index_scan( &mut self, table_index: usize, - referenced_tables: &[BTreeTableReference], + referenced_tables: &[TableReference], available_indexes: &[Rc], ) -> Result> { match self { @@ -623,12 +698,9 @@ impl Optimizable for ast::Expr { return Ok(None); } for (idx, index) in available_indexes.iter().enumerate() { - if index.table_name == referenced_tables[*table].table.name { - let column = referenced_tables[*table] - .table - .columns - .get(*column) - .unwrap(); + let table_ref = &referenced_tables[*table]; + if index.table_name == table_ref.table.get_name() { + let column = table_ref.table.get_column_at(*column); if index.columns.first().unwrap().name == column.name { return Ok(Some(idx)); } @@ -793,7 +865,7 @@ pub enum Either { pub fn try_extract_index_search_expression( expr: ast::Expr, table_index: usize, - referenced_tables: &[BTreeTableReference], + referenced_tables: &[TableReference], available_indexes: &[Rc], ) -> Result> { match expr { diff --git a/core/translate/plan.rs b/core/translate/plan.rs index e16676061..84fde1382 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -5,34 +5,51 @@ use std::{ rc::Rc, }; -use crate::translate::plan::Plan::{Delete, Select}; use crate::{ function::AggFunc, - schema::{BTreeTable, Column, Index}, + schema::{Column, Index, Table}, + vdbe::BranchOffset, Result, }; +use crate::{ + schema::{PseudoTable, Type}, + translate::plan::Plan::{Delete, Select}, +}; -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct ResultSetColumn { pub expr: ast::Expr, + pub name: String, // TODO: encode which aggregates (e.g. index bitmask of plan.aggregates) are present in this column pub contains_aggregates: bool, } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct GroupBy { pub exprs: Vec, /// having clause split into a vec at 'AND' boundaries. pub having: Option>, } -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum Plan { Select(SelectPlan), Delete(DeletePlan), } -#[derive(Debug)] +/// The type of the query, either top level or subquery +#[derive(Debug, Clone)] +pub enum SelectQueryType { + TopLevel, + Subquery { + /// The register that holds the program offset that handles jumping to/from the subquery. + yield_reg: usize, + /// The index of the first instruction in the bytecode that implements the subquery. + coroutine_implementation_start: BranchOffset, + }, +} + +#[derive(Debug, Clone)] pub struct SelectPlan { /// A tree of sources (tables). pub source: SourceOperator, @@ -49,15 +66,17 @@ pub struct SelectPlan { /// limit clause pub limit: Option, /// all the tables referenced in the query - pub referenced_tables: Vec, + pub referenced_tables: Vec, /// all the indexes available pub available_indexes: Vec>, /// query contains a constant condition that is always false pub contains_constant_false_condition: bool, + /// query type (top level or subquery) + pub query_type: SelectQueryType, } #[allow(dead_code)] -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct DeletePlan { /// A tree of sources (tables). pub source: SourceOperator, @@ -70,7 +89,7 @@ pub struct DeletePlan { /// limit clause pub limit: Option, /// all the tables referenced in the query - pub referenced_tables: Vec, + pub referenced_tables: Vec, /// all the indexes available pub available_indexes: Vec>, /// query contains a constant condition that is always false @@ -94,11 +113,12 @@ pub enum IterationDirection { impl SourceOperator { pub fn select_star(&self, out_columns: &mut Vec) { - for (table_ref, col, idx) in self.select_star_helper() { + for (table_index, col, idx) in self.select_star_helper() { out_columns.push(ResultSetColumn { + name: col.name.clone(), expr: ast::Expr::Column { database: None, - table: table_ref.table_index, + table: table_index, column: idx, is_rowid_alias: col.is_rowid_alias, }, @@ -108,7 +128,7 @@ impl SourceOperator { } /// All this ceremony is required to deduplicate columns when joining with USING - fn select_star_helper(&self) -> Vec<(&BTreeTableReference, &Column, usize)> { + fn select_star_helper(&self) -> Vec<(usize, &Column, usize)> { match self { SourceOperator::Join { left, right, using, .. @@ -120,12 +140,12 @@ impl SourceOperator { if let Some(using_cols) = using { let right_columns = right.select_star_helper(); - for (table_ref, col, idx) in right_columns { + for (table_index, col, idx) in right_columns { if !using_cols .iter() .any(|using_col| col.name.eq_ignore_ascii_case(&using_col.0)) { - columns.push((table_ref, col, idx)); + columns.push((table_index, col, idx)); } } } else { @@ -138,12 +158,14 @@ impl SourceOperator { } | SourceOperator::Search { table_reference, .. + } + | SourceOperator::Subquery { + table_reference, .. } => table_reference - .table - .columns + .columns() .iter() .enumerate() - .map(|(i, col)| (table_reference, col, i)) + .map(|(i, col)| (table_reference.table_index, col, i)) .collect(), SourceOperator::Nothing => Vec::new(), } @@ -178,7 +200,7 @@ pub enum SourceOperator { // assignments. for more detailed discussions, please refer to https://github.com/penberg/limbo/pull/376 Scan { id: usize, - table_reference: BTreeTableReference, + table_reference: TableReference, predicates: Option>, iter_dir: Option, }, @@ -187,21 +209,74 @@ pub enum SourceOperator { // (i.e. a primary key or a secondary index) Search { id: usize, - table_reference: BTreeTableReference, + table_reference: TableReference, search: Search, predicates: Option>, }, + Subquery { + id: usize, + table_reference: TableReference, + plan: Box, + predicates: Option>, + }, // Nothing operator // This operator is used to represent an empty query. // e.g. SELECT * from foo WHERE 0 will eventually be optimized to Nothing. Nothing, } +/// The type of the table reference, either BTreeTable or Subquery +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum TableReferenceType { + /// A BTreeTable is a table that is stored on disk in a B-tree index. + BTreeTable, + /// A subquery. + Subquery { + /// The index of the first register in the query plan that contains the result columns of the subquery. + result_columns_start_reg: usize, + }, +} + +/// A query plan has a list of TableReference objects, each of which represents a table or subquery. #[derive(Clone, Debug)] -pub struct BTreeTableReference { - pub table: Rc, +pub struct TableReference { + /// Table object, which contains metadata about the table, e.g. columns. + pub table: Table, + /// The name of the table as referred to in the query, either the literal name or an alias e.g. "users" or "u" pub table_identifier: String, + /// The index of this reference in the list of TableReference objects in the query plan + /// The reference at index 0 is the first table in the FROM clause, the reference at index 1 is the second table in the FROM clause, etc. + /// So, the index is relevant for determining when predicates (WHERE, ON filters etc.) should be evaluated. pub table_index: usize, + /// The type of the table reference, either BTreeTable or Subquery + pub reference_type: TableReferenceType, +} + +impl TableReference { + pub fn new_subquery(identifier: String, table_index: usize, plan: &SelectPlan) -> Self { + Self { + table: Table::Pseudo(Rc::new(PseudoTable::new_with_columns( + plan.result_columns + .iter() + .map(|rc| Column { + name: rc.name.clone(), + ty: Type::Text, // FIXME: infer proper type + is_rowid_alias: false, + primary_key: false, + }) + .collect(), + ))), + table_identifier: identifier.clone(), + table_index, + reference_type: TableReferenceType::Subquery { + result_columns_start_reg: 0, // Will be set in the bytecode emission phase + }, + } + } + + pub fn columns(&self) -> &[Column] { + self.table.columns() + } } /// An enum that represents a search operation that can be used to search for a row in a table using an index @@ -230,6 +305,7 @@ impl SourceOperator { SourceOperator::Join { id, .. } => *id, SourceOperator::Scan { id, .. } => *id, SourceOperator::Search { id, .. } => *id, + SourceOperator::Subquery { id, .. } => *id, SourceOperator::Nothing => unreachable!(), } } @@ -320,12 +396,13 @@ impl Display for SourceOperator { .. } => { let table_name = - if table_reference.table.name == table_reference.table_identifier { + if table_reference.table.get_name() == table_reference.table_identifier { table_reference.table_identifier.clone() } else { format!( "{} AS {}", - &table_reference.table.name, &table_reference.table_identifier + &table_reference.table.get_name(), + &table_reference.table_identifier ) }; let filter_string = filter.as_ref().map(|f| { @@ -365,6 +442,9 @@ impl Display for SourceOperator { } Ok(()) } + SourceOperator::Subquery { plan, .. } => { + fmt_operator(&plan.source, f, level + 1, last) + } SourceOperator::Nothing => Ok(()), } } @@ -382,7 +462,7 @@ impl Display for SourceOperator { then the return value will be (in bits): 110 */ pub fn get_table_ref_bitmask_for_operator<'a>( - tables: &'a Vec, + tables: &'a Vec, operator: &'a SourceOperator, ) -> Result { let mut table_refs_mask = 0; @@ -409,6 +489,7 @@ pub fn get_table_ref_bitmask_for_operator<'a>( .position(|t| t.table_identifier == table_reference.table_identifier) .unwrap(); } + SourceOperator::Subquery { .. } => {} SourceOperator::Nothing => {} } Ok(table_refs_mask) @@ -424,7 +505,7 @@ pub fn get_table_ref_bitmask_for_operator<'a>( */ #[allow(clippy::only_used_in_recursion)] pub fn get_table_ref_bitmask_for_ast_expr<'a>( - tables: &'a Vec, + tables: &'a Vec, predicate: &'a ast::Expr, ) -> Result { let mut table_refs_mask = 0; diff --git a/core/translate/planner.rs b/core/translate/planner.rs index a8fb34b14..191aebe33 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -1,8 +1,12 @@ -use super::plan::{Aggregate, BTreeTableReference, SourceOperator}; +use super::{ + plan::{Aggregate, Plan, SelectQueryType, SourceOperator, TableReference, TableReferenceType}, + select::prepare_select_plan, +}; use crate::{ function::Func, - schema::Schema, + schema::{Schema, Table}, util::{exprs_are_equivalent, normalize_ident}, + vdbe::BranchOffset, Result, }; use sqlite3_parser::ast::{self, Expr, FromClause, JoinType, Limit}; @@ -88,7 +92,7 @@ pub fn resolve_aggregates(expr: &ast::Expr, aggs: &mut Vec) -> bool { pub fn bind_column_references( expr: &mut ast::Expr, - referenced_tables: &[BTreeTableReference], + referenced_tables: &[TableReference], ) -> Result<()> { match expr { ast::Expr::Id(id) => { @@ -101,15 +105,14 @@ pub fn bind_column_references( let normalized_id = normalize_ident(id.0.as_str()); for (tbl_idx, table) in referenced_tables.iter().enumerate() { let col_idx = table - .table - .columns + .columns() .iter() .position(|c| c.name.eq_ignore_ascii_case(&normalized_id)); if col_idx.is_some() { if match_result.is_some() { crate::bail_parse_error!("Column {} is ambiguous", id.0); } - let col = table.table.columns.get(col_idx.unwrap()).unwrap(); + let col = table.columns().get(col_idx.unwrap()).unwrap(); match_result = Some((tbl_idx, col_idx.unwrap(), col.primary_key)); } } @@ -137,16 +140,14 @@ pub fn bind_column_references( let tbl_idx = matching_tbl_idx.unwrap(); let normalized_id = normalize_ident(id.0.as_str()); let col_idx = referenced_tables[tbl_idx] - .table - .columns + .columns() .iter() .position(|c| c.name.eq_ignore_ascii_case(&normalized_id)); if col_idx.is_none() { crate::bail_parse_error!("Column {} not found", normalized_id); } let col = referenced_tables[tbl_idx] - .table - .columns + .columns() .get(col_idx.unwrap()) .unwrap(); *expr = ast::Expr::Column { @@ -206,8 +207,8 @@ pub fn bind_column_references( } Ok(()) } - // Column references cannot exist before binding - ast::Expr::Column { .. } => unreachable!(), + // Already bound earlier + ast::Expr::Column { .. } => Ok(()), ast::Expr::DoublyQualified(_, _, _) => todo!(), ast::Expr::Exists(_) => todo!(), ast::Expr::FunctionCallStar { .. } => Ok(()), @@ -253,18 +254,13 @@ pub fn bind_column_references( } } -pub fn parse_from( +fn parse_from_clause_table( schema: &Schema, - from: Option, + table: ast::SelectTable, operator_id_counter: &mut OperatorIdCounter, -) -> Result<(SourceOperator, Vec)> { - if from.as_ref().and_then(|f| f.select.as_ref()).is_none() { - return Ok((SourceOperator::Nothing, vec![])); - } - - let from = from.unwrap(); - - let first_table = match *from.select.unwrap() { + cur_table_index: usize, +) -> Result<(TableReference, SourceOperator)> { + match table { ast::SelectTable::Table(qualified_name, maybe_alias, _) => { let normalized_qualified_name = normalize_ident(qualified_name.name.0.as_str()); let Some(table) = schema.get_table(&normalized_qualified_name) else { @@ -276,29 +272,80 @@ pub fn parse_from( ast::As::Elided(id) => id, }) .map(|a| a.0); - - BTreeTableReference { - table: table.clone(), + let table_reference = TableReference { + table: Table::BTree(table.clone()), table_identifier: alias.unwrap_or(normalized_qualified_name), - table_index: 0, - } + table_index: cur_table_index, + reference_type: TableReferenceType::BTreeTable, + }; + Ok(( + table_reference.clone(), + SourceOperator::Scan { + table_reference, + predicates: None, + id: operator_id_counter.get_next_id(), + iter_dir: None, + }, + )) + } + ast::SelectTable::Select(subselect, maybe_alias) => { + let Plan::Select(mut subplan) = prepare_select_plan(schema, subselect)? else { + unreachable!(); + }; + subplan.query_type = SelectQueryType::Subquery { + yield_reg: usize::MAX, // will be set later in bytecode emission + coroutine_implementation_start: BranchOffset::MAX, // will be set later in bytecode emission + }; + let identifier = maybe_alias + .map(|a| match a { + ast::As::As(id) => id.0.clone(), + ast::As::Elided(id) => id.0.clone(), + }) + .unwrap_or(format!("subquery_{}", cur_table_index)); + let table_reference = + TableReference::new_subquery(identifier.clone(), cur_table_index, &subplan); + Ok(( + table_reference.clone(), + SourceOperator::Subquery { + id: operator_id_counter.get_next_id(), + table_reference, + plan: Box::new(subplan), + predicates: None, + }, + )) } _ => todo!(), - }; + } +} - let mut operator = SourceOperator::Scan { - table_reference: first_table.clone(), - predicates: None, - id: operator_id_counter.get_next_id(), - iter_dir: None, - }; +pub fn parse_from( + schema: &Schema, + mut from: Option, + operator_id_counter: &mut OperatorIdCounter, +) -> Result<(SourceOperator, Vec)> { + if from.as_ref().and_then(|f| f.select.as_ref()).is_none() { + return Ok((SourceOperator::Nothing, vec![])); + } - let mut tables = vec![first_table]; + let mut table_index = 0; + let mut tables = vec![]; - let mut table_index = 1; - for join in from.joins.unwrap_or_default().into_iter() { - let (right, outer, using, predicates) = - parse_join(schema, join, operator_id_counter, &mut tables, table_index)?; + let mut from_owned = std::mem::take(&mut from).unwrap(); + let select_owned = *std::mem::take(&mut from_owned.select).unwrap(); + let joins_owned = std::mem::take(&mut from_owned.joins).unwrap_or_default(); + let (table_reference, mut operator) = + parse_from_clause_table(schema, select_owned, operator_id_counter, table_index)?; + + tables.push(table_reference); + table_index += 1; + + for join in joins_owned.into_iter() { + let JoinParseResult { + source_operator: right, + is_outer_join: outer, + using, + predicates, + } = parse_join(schema, join, operator_id_counter, &mut tables, table_index)?; operator = SourceOperator::Join { left: Box::new(operator), right: Box::new(right), @@ -315,7 +362,7 @@ pub fn parse_from( pub fn parse_where( where_clause: Option, - referenced_tables: &[BTreeTableReference], + referenced_tables: &[TableReference], ) -> Result>> { if let Some(where_expr) = where_clause { let mut predicates = vec![]; @@ -329,48 +376,32 @@ pub fn parse_where( } } +struct JoinParseResult { + source_operator: SourceOperator, + is_outer_join: bool, + using: Option, + predicates: Option>, +} + fn parse_join( schema: &Schema, join: ast::JoinedSelectTable, operator_id_counter: &mut OperatorIdCounter, - tables: &mut Vec, + tables: &mut Vec, table_index: usize, -) -> Result<( - SourceOperator, - bool, - Option, - Option>, -)> { +) -> Result { let ast::JoinedSelectTable { - operator, + operator: join_operator, table, constraint, } = join; - let table = match table { - ast::SelectTable::Table(qualified_name, maybe_alias, _) => { - let normalized_name = normalize_ident(qualified_name.name.0.as_str()); - let Some(table) = schema.get_table(&normalized_name) else { - crate::bail_parse_error!("Table {} not found", normalized_name); - }; - let alias = maybe_alias - .map(|a| match a { - ast::As::As(id) => id, - ast::As::Elided(id) => id, - }) - .map(|a| a.0); - BTreeTableReference { - table: table.clone(), - table_identifier: alias.unwrap_or(normalized_name), - table_index, - } - } - _ => todo!(), - }; + let (table_reference, source_operator) = + parse_from_clause_table(schema, table, operator_id_counter, table_index)?; - tables.push(table.clone()); + tables.push(table_reference); - let (outer, natural) = match operator { + let (outer, natural) = match join_operator { ast::JoinOperator::TypedJoin(Some(join_type)) => { let is_outer = join_type.contains(JoinType::OUTER); let is_natural = join_type.contains(JoinType::NATURAL); @@ -391,13 +422,13 @@ fn parse_join( let left_tables = &tables[..table_index]; assert!(!left_tables.is_empty()); let right_table = &tables[table_index]; - let right_cols = &right_table.table.columns; + let right_cols = &right_table.columns(); let mut distinct_names = None; // TODO: O(n^2) maybe not great for large tables or big multiway joins for right_col in right_cols.iter() { let mut found_match = false; for left_table in left_tables.iter() { - for left_col in left_table.table.columns.iter() { + for left_col in left_table.columns().iter() { if left_col.name == right_col.name { if distinct_names.is_none() { distinct_names = @@ -447,8 +478,7 @@ fn parse_join( let mut left_col = None; for (left_table_idx, left_table) in left_tables.iter().enumerate() { left_col = left_table - .table - .columns + .columns() .iter() .enumerate() .find(|(_, col)| col.name == name_normalized) @@ -464,8 +494,7 @@ fn parse_join( ); } let right_col = right_table - .table - .columns + .columns() .iter() .enumerate() .find(|(_, col)| col.name == name_normalized); @@ -499,17 +528,12 @@ fn parse_join( } } - Ok(( - SourceOperator::Scan { - table_reference: table.clone(), - predicates: None, - id: operator_id_counter.get_next_id(), - iter_dir: None, - }, - outer, + Ok(JoinParseResult { + source_operator, + is_outer_join: outer, using, predicates, - )) + }) } pub fn parse_limit(limit: Limit) -> Option { diff --git a/core/translate/select.rs b/core/translate/select.rs index 5ad2d41d5..affbaa04b 100644 --- a/core/translate/select.rs +++ b/core/translate/select.rs @@ -2,6 +2,8 @@ use std::rc::Weak; use std::{cell::RefCell, rc::Rc}; use super::emitter::emit_program; +use super::expr::get_name; +use super::plan::SelectQueryType; use crate::function::Func; use crate::storage::sqlite3_ondisk::DatabaseHeader; use crate::translate::optimizer::optimize_plan; @@ -23,15 +25,15 @@ pub fn translate_select( connection: Weak, syms: &SymbolTable, ) -> Result { - let select_plan = prepare_select_plan(schema, select)?; - let optimized_plan = optimize_plan(select_plan)?; - emit_program(database_header, optimized_plan, connection, syms) + let mut select_plan = prepare_select_plan(schema, select)?; + optimize_plan(&mut select_plan)?; + emit_program(database_header, select_plan, connection, syms) } pub fn prepare_select_plan(schema: &Schema, select: ast::Select) -> Result { match select.body.select { ast::OneSelect::Select { - columns, + mut columns, from, where_clause, group_by, @@ -58,13 +60,14 @@ pub fn prepare_select_plan(schema: &Schema, select: ast::Select) -> Result referenced_tables, available_indexes: schema.indexes.clone().into_values().flatten().collect(), contains_constant_false_condition: false, + query_type: SelectQueryType::TopLevel, }; // Parse the WHERE clause plan.where_clause = parse_where(where_clause, &plan.referenced_tables)?; let mut aggregate_expressions = Vec::new(); - for column in columns.clone() { + for (result_column_idx, column) in columns.iter_mut().enumerate() { match column { ast::ResultColumn::Star => { plan.source.select_star(&mut plan.result_columns); @@ -80,7 +83,7 @@ pub fn prepare_select_plan(schema: &Schema, select: ast::Select) -> Result crate::bail_parse_error!("Table {} not found", name.0); } let table_reference = referenced_table.unwrap(); - for (idx, col) in table_reference.table.columns.iter().enumerate() { + for (idx, col) in table_reference.columns().iter().enumerate() { plan.result_columns.push(ResultSetColumn { expr: ast::Expr::Column { database: None, // TODO: support different databases @@ -88,13 +91,14 @@ pub fn prepare_select_plan(schema: &Schema, select: ast::Select) -> Result column: idx, is_rowid_alias: col.is_rowid_alias, }, + name: col.name.clone(), contains_aggregates: false, }); } } - ast::ResultColumn::Expr(mut expr, _) => { - bind_column_references(&mut expr, &plan.referenced_tables)?; - match &expr { + ast::ResultColumn::Expr(ref mut expr, maybe_alias) => { + bind_column_references(expr, &plan.referenced_tables)?; + match expr { ast::Expr::FunctionCall { name, distinctness: _, @@ -119,6 +123,12 @@ pub fn prepare_select_plan(schema: &Schema, select: ast::Select) -> Result }; aggregate_expressions.push(agg.clone()); plan.result_columns.push(ResultSetColumn { + name: get_name( + maybe_alias.as_ref(), + &expr, + &plan.referenced_tables, + || format!("expr_{}", result_column_idx), + ), expr: expr.clone(), contains_aggregates: true, }); @@ -127,6 +137,12 @@ pub fn prepare_select_plan(schema: &Schema, select: ast::Select) -> Result let contains_aggregates = resolve_aggregates(&expr, &mut aggregate_expressions); plan.result_columns.push(ResultSetColumn { + name: get_name( + maybe_alias.as_ref(), + &expr, + &plan.referenced_tables, + || format!("expr_{}", result_column_idx), + ), expr: expr.clone(), contains_aggregates, }); @@ -151,6 +167,12 @@ pub fn prepare_select_plan(schema: &Schema, select: ast::Select) -> Result }; aggregate_expressions.push(agg.clone()); plan.result_columns.push(ResultSetColumn { + name: get_name( + maybe_alias.as_ref(), + &expr, + &plan.referenced_tables, + || format!("expr_{}", result_column_idx), + ), expr: expr.clone(), contains_aggregates: true, }); @@ -163,8 +185,14 @@ pub fn prepare_select_plan(schema: &Schema, select: ast::Select) -> Result } expr => { let contains_aggregates = - resolve_aggregates(expr, &mut aggregate_expressions); + resolve_aggregates(&expr, &mut aggregate_expressions); plan.result_columns.push(ResultSetColumn { + name: get_name( + maybe_alias.as_ref(), + &expr, + &plan.referenced_tables, + || format!("expr_{}", result_column_idx), + ), expr: expr.clone(), contains_aggregates, }); diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 20b5e39cb..8c879fe9f 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -99,7 +99,7 @@ pub struct ProgramState { registers: Vec, last_compare: Option, deferred_seek: Option<(CursorID, CursorID)>, - ended_coroutine: bool, // flag to notify yield coroutine finished + ended_coroutine: HashMap, // flag to indicate that a coroutine has ended (key is the yield register) regex_cache: RegexCache, interrupted: bool, } @@ -115,7 +115,7 @@ impl ProgramState { registers, last_compare: None, deferred_seek: None, - ended_coroutine: false, + ended_coroutine: HashMap::new(), regex_cache: RegexCache::new(), interrupted: false, } @@ -179,6 +179,7 @@ impl Program { } let insn = &self.insns[state.pc as usize]; trace_insn(self, state.pc as InsnReference, insn); + // print_insn(self, state.pc as InsnReference, insn, "".to_string()); let mut cursors = state.cursors.borrow_mut(); match insn { Insn::Init { target_pc } => { @@ -2279,12 +2280,18 @@ impl Program { jump_on_definition, start_offset, } => { + assert!(*jump_on_definition >= 0); state.registers[*yield_reg] = OwnedValue::Integer(*start_offset); - state.pc = *jump_on_definition; + state.ended_coroutine.insert(*yield_reg, false); + state.pc = if *jump_on_definition == 0 { + state.pc + 1 + } else { + *jump_on_definition + }; } Insn::EndCoroutine { yield_reg } => { if let OwnedValue::Integer(pc) = state.registers[*yield_reg] { - state.ended_coroutine = true; + state.ended_coroutine.insert(*yield_reg, true); state.pc = pc - 1; // yield jump is always next to yield. Here we substract 1 to go back to yield instruction } else { unreachable!(); @@ -2295,7 +2302,7 @@ impl Program { end_offset, } => { if let OwnedValue::Integer(pc) = state.registers[*yield_reg] { - if state.ended_coroutine { + if *state.ended_coroutine.get(yield_reg).unwrap_or(&false) { state.pc = *end_offset; } else { // swap @@ -2303,7 +2310,10 @@ impl Program { (pc, OwnedValue::Integer(state.pc + 1)); } } else { - unreachable!(); + unreachable!( + "yield_reg {} contains non-integer value: {:?}", + *yield_reg, state.registers[*yield_reg] + ); } } Insn::InsertAsync { diff --git a/testing/all.test b/testing/all.test index 62fb8ee90..57e440351 100755 --- a/testing/all.test +++ b/testing/all.test @@ -17,4 +17,5 @@ source $testdir/pragma.test source $testdir/scalar-functions.test source $testdir/scalar-functions-datetime.test source $testdir/select.test +source $testdir/subquery.test source $testdir/where.test diff --git a/testing/subquery.test b/testing/subquery.test new file mode 100644 index 000000000..4754d1563 --- /dev/null +++ b/testing/subquery.test @@ -0,0 +1,194 @@ +#!/usr/bin/env tclsh + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +do_execsql_test subquery-inner-filter { + select sub.loud_hat from ( + select concat(name, '!!!') as loud_hat + from products where name = 'hat' + ) sub; +} {hat!!!} + +do_execsql_test subquery-outer-filter { + select sub.loud_hat from ( + select concat(name, '!!!') as loud_hat + from products + ) sub where sub.loud_hat = 'hat!!!' +} {hat!!!} + +do_execsql_test subquery-without-alias { + select loud_hat from ( + select concat(name, '!!!') as loud_hat + from products where name = 'hat' + ); +} {hat!!!} + +do_execsql_test subquery-no-alias-on-col { + select price from ( + select * from products where name = 'hat' + ) +} {79.0} + +do_execsql_test subquery-no-alias-on-col-named { + select price from ( + select price from products where name = 'hat' + ) +} {79.0} + +do_execsql_test subquery-select-star { + select * from ( + select price, price + 1.0, name from products where name = 'hat' + ) +} {79.0|80.0|hat} + +do_execsql_test subquery-select-table-star { + select sub.* from ( + select price, price + 1.0, name from products where name = 'hat' + ) sub +} {79.0|80.0|hat} + +do_execsql_test nested-subquery { + select sub.loudest_hat from ( + select upper(nested_sub.loud_hat) as loudest_hat from ( + select concat(name, '!!!') as loud_hat + from products where name = 'hat' + ) nested_sub + ) sub; +} {HAT!!!} + +do_execsql_test subquery-orderby-limit { + select upper(sub.loud_name) as loudest_name + from ( + select concat(name, '!!!') as loud_name + from products + order by name + limit 3 + ) sub; +} {ACCESSORIES!!! +BOOTS!!! +CAP!!!} + +do_execsql_test table-join-subquery { + select sub.product_name, p.name + from products p join ( + select name as product_name + from products + ) sub on p.name = sub.product_name where p.name = 'hat' +} {hat|hat} + +do_execsql_test subquery-join-table { + select sub.product_name, p.name + from ( + select name as product_name + from products + ) sub join products p on sub.product_name = p.name where sub.product_name = 'hat' +} {hat|hat} + +do_execsql_test subquery-join-subquery { + select sub1.sus_name, sub2.truthful_name + from ( + select name as sus_name + from products + where name = 'cap' + ) sub1 join ( + select concat('no ', name) as truthful_name + from products + where name = 'cap' + ) sub2; +} {"cap|no cap"} + +do_execsql_test select-star-table-subquery { + select * + from products p join ( + select name, price + from products + where name = 'hat' + ) sub on p.name = sub.name; +} {1|hat|79.0|hat|79.0} + +do_execsql_test select-star-subquery-table { + select * + from ( + select name, price + from products + where name = 'hat' + ) sub join products p on sub.name = p.name; +} {hat|79.0|1|hat|79.0} + +do_execsql_test select-star-subquery-subquery { + select * + from ( + select name, price + from products + where name = 'hat' + ) sub1 join ( + select price + from products + where name = 'hat' + ) sub2 on sub1.price = sub2.price; +} {hat|79.0|79.0} + + +do_execsql_test subquery-inner-grouping { + select is_jennifer, person_count + from ( + select first_name = 'Jennifer' as is_jennifer, count(1) as person_count from users + group by first_name = 'Jennifer' + ) order by person_count asc +} {1|151 +0|9849} + +do_execsql_test subquery-outer-grouping { + select is_jennifer, count(1) as person_count + from ( + select first_name = 'Jennifer' as is_jennifer from users + ) group by is_jennifer order by count(1) asc +} {1|151 +0|9849} + +do_execsql_test subquery-join-using-with-outer-limit { + SELECT p.name, sub.funny_name + FROM products p + JOIN ( + select id, concat(name, '-lol') as funny_name + from products + ) sub USING (id) + LIMIT 3; +} {"hat|hat-lol +cap|cap-lol +shirt|shirt-lol"} + +do_execsql_test subquery-join-using-with-inner-limit { + SELECT p.name, sub.funny_name + FROM products p + JOIN ( + select id, concat(name, '-lol') as funny_name + from products + limit 3 + ) sub USING (id); +} {"hat|hat-lol +cap|cap-lol +shirt|shirt-lol"} + +do_execsql_test subquery-join-using-with-both-limits { + SELECT p.name, sub.funny_name + FROM products p + JOIN ( + select id, concat(name, '-lol') as funny_name + from products + limit 3 + ) sub USING (id) + LIMIT 2; +} {"hat|hat-lol +cap|cap-lol"} + +do_execsql_test subquery-containing-join { + select foo, bar + from ( + select p.name as foo, u.first_name as bar + from products p join users u using (id) + ) limit 3; +} {hat|Jamie +cap|Cindy +shirt|Tommy} \ No newline at end of file From 80b9da95c0518a3bd0843a8b0bb5589646a889e1 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Wed, 1 Jan 2025 07:40:51 +0200 Subject: [PATCH 2/7] replace termination_label_stack with much simpler LoopLabels --- core/translate/emitter.rs | 258 +++++++++++++----------------------- core/translate/optimizer.rs | 14 +- core/translate/plan.rs | 12 +- core/translate/planner.rs | 7 +- 4 files changed, 106 insertions(+), 185 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index ad472c2d0..06abeb53d 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -74,25 +74,30 @@ pub struct GroupByMetadata { pub group_exprs_comparison_register: usize, } +/// Jump labels for each loop in the query's main execution loop +#[derive(Debug, Clone, Copy)] +pub struct LoopLabels { + /// jump to the start of the loop body + loop_start: BranchOffset, + /// jump to the NextAsync instruction (or equivalent) + next: BranchOffset, + /// jump to the end of the loop, exiting it + loop_end: BranchOffset, +} + /// The Metadata struct holds various information and labels used during bytecode generation. /// It is used for maintaining state and control flow during the bytecode /// generation process. #[derive(Debug)] pub struct Metadata { - // this stack is generically used for a "jump to the end of the current query phase" purpose in a FIFO manner. - // For example, in a nested loop join, each loop will have a label signifying the end of that particular loop. - termination_label_stack: Vec, + // labels for the instructions that either: + // - jump to the start of the current loop. (e.g. a Next instruction jumps here) + // - jump to the Next instruction (or equivalent) in the current operator. (e.g. a condition evaluates to false, so the current row is skipped) + // - jump to the end of the current loop. (e.g. an index seek results in no key matching the seek condition, so execution will jump to the end of the loop) + loop_labels: HashMap, // label for the instruction that jumps to the next phase of the query after the main loop // we don't know ahead of time what that is (GROUP BY, ORDER BY, etc.) after_main_loop_label: Option, - // labels for the instructions that jump to the next row in the current operator. - // for example, in a join with two nested scans, the inner loop will jump to its Next instruction when the join condition is false; - // in a join with a scan and a seek, the seek will jump to the scan's Next instruction when the join condition is false. - // The difference between next_row_labels and termination_label_stack is that next_row_labels are used to jump to the next row in the - // current loop, whereas termination_label_stack is used to jump OUT of the current loop entirely. - next_row_labels: HashMap, - // labels for the instructions beginning the inner loop of a scan operator. - scan_loop_body_labels: Vec, // metadata for the group by operator group_by_metadata: Option, // metadata for the order by operator @@ -139,12 +144,10 @@ fn prologue() -> Result<(ProgramBuilder, Metadata, BranchOffset, BranchOffset)> let start_offset = program.offset(); let metadata = Metadata { - termination_label_stack: vec![halt_label], + loop_labels: HashMap::new(), after_main_loop_label: None, group_by_metadata: None, left_joins: HashMap::new(), - next_row_labels: HashMap::new(), - scan_loop_body_labels: vec![], sort_metadata: None, aggregation_start_register: None, result_column_start_register: None, @@ -161,12 +164,9 @@ fn prologue() -> Result<(ProgramBuilder, Metadata, BranchOffset, BranchOffset)> /// query will jump to the Transaction instruction via init_label. fn epilogue( program: &mut ProgramBuilder, - metadata: &mut Metadata, init_label: BranchOffset, start_offset: BranchOffset, ) -> Result<()> { - let halt_label = metadata.termination_label_stack.pop().unwrap(); - program.resolve_label(halt_label, program.offset()); program.emit_insn(Insn::Halt { err_code: 0, description: String::new(), @@ -210,7 +210,7 @@ fn emit_program_for_select( // Trivial exit on LIMIT 0 if let Some(limit) = plan.limit { if limit == 0 { - epilogue(&mut program, &mut metadata, init_label, start_offset)?; + epilogue(&mut program, init_label, start_offset)?; return Ok(program.build(database_header, connection)); } } @@ -219,7 +219,7 @@ fn emit_program_for_select( emit_query(&mut program, &mut plan, &mut metadata, syms)?; // Finalize program - epilogue(&mut program, &mut metadata, init_label, start_offset)?; + epilogue(&mut program, init_label, start_offset)?; Ok(program.build(database_header, connection)) } @@ -301,13 +301,10 @@ fn emit_subquery( } let end_coroutine_label = program.allocate_label(); let mut metadata = Metadata { - // A regular query ends in a Halt, whereas a subquery ends in an EndCoroutine. - termination_label_stack: vec![end_coroutine_label], + loop_labels: HashMap::new(), after_main_loop_label: None, group_by_metadata: None, left_joins: HashMap::new(), - next_row_labels: HashMap::new(), - scan_loop_body_labels: vec![], sort_metadata: None, aggregation_start_register: None, result_column_start_register: None, @@ -373,24 +370,11 @@ fn emit_query( // Initialize cursors and other resources needed for query execution if let Some(ref mut order_by) = plan.order_by { - let orderby_label = program.allocate_label(); - metadata.termination_label_stack.push(orderby_label); init_order_by(program, order_by, metadata)?; } if let Some(ref mut group_by) = plan.group_by { - let output_groupby_row_label = program.allocate_label(); - metadata - .termination_label_stack - .push(output_groupby_row_label); - let groupby_end_label = program.allocate_label(); - metadata.termination_label_stack.push(groupby_end_label); init_group_by(program, group_by, &plan.aggregates, metadata)?; - } else if !plan.aggregates.is_empty() { - let output_aggregation_row_label = program.allocate_label(); - metadata - .termination_label_stack - .push(output_aggregation_row_label); } init_source(program, &plan.source, metadata, &OperationMode::SELECT)?; @@ -503,7 +487,7 @@ fn emit_program_for_delete( program.resolve_label(after_main_loop_label, program.offset()); // Finalize program - epilogue(&mut program, &mut metadata, init_label, start_offset)?; + epilogue(&mut program, init_label, start_offset)?; Ok(program.build(database_header, connection)) } @@ -618,12 +602,16 @@ fn init_source( metadata: &mut Metadata, mode: &OperationMode, ) -> Result<()> { + let operator_id = source.id(); + let loop_labels = LoopLabels { + next: program.allocate_label(), + loop_start: program.allocate_label(), + loop_end: program.allocate_label(), + }; + metadata.loop_labels.insert(operator_id, loop_labels); + match source { - SourceOperator::Subquery { id, .. } => { - let next_row_label = program.allocate_label(); - metadata.next_row_labels.insert(*id, next_row_label); - Ok(()) - } + SourceOperator::Subquery { .. } => Ok(()), SourceOperator::Join { id, left, @@ -645,17 +633,13 @@ fn init_source( Ok(()) } SourceOperator::Scan { - id, - table_reference, - .. + table_reference, .. } => { let cursor_id = program.alloc_cursor_id( Some(table_reference.table_identifier.clone()), Some(table_reference.table.clone()), ); let root_page = table_reference.table.get_root_page(); - let next_row_label = program.allocate_label(); - metadata.next_row_labels.insert(*id, next_row_label); match mode { OperationMode::SELECT => { @@ -680,7 +664,6 @@ fn init_source( Ok(()) } SourceOperator::Search { - id, table_reference, search, .. @@ -690,10 +673,6 @@ fn init_source( Some(table_reference.table.clone()), ); - let next_row_label = program.allocate_label(); - - metadata.next_row_labels.insert(*id, next_row_label); - match mode { OperationMode::SELECT => { program.emit_insn(Insn::OpenReadAsync { @@ -741,7 +720,7 @@ fn init_source( Ok(()) } - SourceOperator::Nothing => Ok(()), + SourceOperator::Nothing { .. } => Ok(()), } } @@ -755,9 +734,6 @@ fn open_loop( metadata: &mut Metadata, syms: &SymbolTable, ) -> Result<()> { - metadata - .termination_label_stack - .push(program.allocate_label()); match source { SourceOperator::Subquery { id, @@ -778,30 +754,23 @@ fn open_loop( jump_on_definition: 0, start_offset: coroutine_implementation_start, }); - let loop_body_start_label = program.allocate_label(); - metadata.scan_loop_body_labels.push(loop_body_start_label); - program.defer_label_resolution(loop_body_start_label, program.offset() as usize); + let loop_labels = metadata + .loop_labels + .get(id) + .expect("subquery has no loop labels"); + program.defer_label_resolution(loop_labels.loop_start, program.offset() as usize); // A subquery within the main loop of a parent query has no cursor, so instead of advancing the cursor, // it emits a Yield which jumps back to the main loop of the subquery itself to retrieve the next row. // When the subquery coroutine completes, this instruction jumps to the label at the top of the termination_label_stack, // which in this case is the end of the Yield-Goto loop in the parent query. - let end_of_loop_label = *metadata.termination_label_stack.last().unwrap(); program.emit_insn_with_label_dependency( Insn::Yield { yield_reg, - end_offset: end_of_loop_label, + end_offset: loop_labels.loop_end, }, - end_of_loop_label, + loop_labels.loop_end, ); - // In case we have predicates on the subquery results that evaluate to false, - // (e.g. SELECT foo FROM (SELECT bar as foo FROM t1) sub WHERE sub.foo > 10) - // we jump to the Goto instruction below to move on to the next row from the subquery. - let jump_label = metadata - .next_row_labels - .get(id) - .expect("subquery has no next row label"); - // These are predicates evaluated outside of the subquery, // so they are translated here. // E.g. SELECT foo FROM (SELECT bar as foo FROM t1) sub WHERE sub.foo > 10 @@ -811,7 +780,7 @@ fn open_loop( let condition_metadata = ConditionMetadata { jump_if_condition_is_true: false, jump_target_when_true, - jump_target_when_false: *jump_label, + jump_target_when_false: loop_labels.next, }; translate_condition_expr( program, @@ -837,10 +806,12 @@ fn open_loop( } => { open_loop(program, left, referenced_tables, metadata, syms)?; - let mut jump_target_when_false = *metadata - .next_row_labels + let loop_labels = metadata + .loop_labels .get(&right.id()) - .expect("right side of join has no next row label"); + .expect("right side of join has no loop labels"); + + let mut jump_target_when_false = loop_labels.next; if *outer { let lj_meta = metadata.left_joins.get(id).unwrap(); @@ -850,9 +821,6 @@ fn open_loop( }); jump_target_when_false = lj_meta.check_match_flag_label; } - metadata - .next_row_labels - .insert(right.id(), jump_target_when_false); open_loop(program, right, referenced_tables, metadata, syms)?; @@ -905,10 +873,10 @@ fn open_loop( } else { program.emit_insn(Insn::RewindAsync { cursor_id }); } - let scan_loop_body_label = program.allocate_label(); - - // If the table this cursor is scanning is entirely empty, we exit this loop entirely. - let end_of_loop_label = metadata.termination_label_stack.last().unwrap(); + let loop_labels = metadata + .loop_labels + .get(id) + .expect("scan has no loop labels"); program.emit_insn_with_label_dependency( if iter_dir .as_ref() @@ -916,30 +884,25 @@ fn open_loop( { Insn::LastAwait { cursor_id, - pc_if_empty: *end_of_loop_label, + pc_if_empty: loop_labels.loop_end, } } else { Insn::RewindAwait { cursor_id, - pc_if_empty: *end_of_loop_label, + pc_if_empty: loop_labels.loop_end, } }, - *end_of_loop_label, + loop_labels.loop_end, ); - metadata.scan_loop_body_labels.push(scan_loop_body_label); - program.defer_label_resolution(scan_loop_body_label, program.offset() as usize); + program.defer_label_resolution(loop_labels.loop_start, program.offset() as usize); - let jump_label = metadata - .next_row_labels - .get(id) - .expect("scan has no next row label"); if let Some(preds) = predicates { for expr in preds { let jump_target_when_true = program.allocate_label(); let condition_metadata = ConditionMetadata { jump_if_condition_is_true: false, jump_target_when_true, - jump_target_when_false: *jump_label, + jump_target_when_false: loop_labels.next, }; translate_condition_expr( program, @@ -963,6 +926,10 @@ fn open_loop( .. } => { let table_cursor_id = program.resolve_cursor_id(&table_reference.table_identifier); + let loop_labels = metadata + .loop_labels + .get(id) + .expect("search has no loop labels"); // Open the loop for the index search. // Rowid equality point lookups are handled with a SeekRowid instruction which does not loop, since it is a single row lookup. if !matches!(search, Search::RowidEq { .. }) { @@ -971,8 +938,6 @@ fn open_loop( } else { None }; - let scan_loop_body_label = program.allocate_label(); - metadata.scan_loop_body_labels.push(scan_loop_body_label); let cmp_reg = program.alloc_register(); let (cmp_expr, cmp_op) = match search { Search::IndexSearch { @@ -1004,7 +969,7 @@ fn open_loop( _ => unreachable!(), } // If we try to seek to a key that is not present in the table/index, we exit the loop entirely. - let end_of_loop_label = *metadata.termination_label_stack.last().unwrap(); + let end_of_loop_label = loop_labels.loop_end; program.emit_insn_with_label_dependency( match cmp_op { ast::Operator::Equals | ast::Operator::GreaterEquals => Insn::SeekGE { @@ -1038,7 +1003,7 @@ fn open_loop( )?; } - program.defer_label_resolution(scan_loop_body_label, program.offset() as usize); + program.defer_label_resolution(loop_labels.loop_start, program.offset() as usize); // TODO: We are currently only handling ascending indexes. // For conditions like index_key > 10, we have already seeked to the first key greater than 10, and can just scan forward. // For conditions like index_key < 10, we are at the beginning of the index, and will scan forward and emit IdxGE(10) with a conditional jump to the end. @@ -1049,10 +1014,7 @@ fn open_loop( // // For primary key searches we emit RowId and then compare it to the seek value. - let abort_jump_target = *metadata - .next_row_labels - .get(id) - .expect("search operator has no next row label"); + let abort_jump_target = loop_labels.next; match cmp_op { ast::Operator::Equals | ast::Operator::LessEquals => { if let Some(index_cursor_id) = index_cursor_id { @@ -1119,8 +1081,6 @@ fn open_loop( } } - let jump_label = metadata.next_row_labels.get(id).unwrap(); - if let Search::RowidEq { cmp_expr } = search { let src_reg = program.alloc_register(); translate_expr( @@ -1135,9 +1095,9 @@ fn open_loop( Insn::SeekRowid { cursor_id: table_cursor_id, src_reg, - target_pc: *jump_label, + target_pc: loop_labels.next, }, - *jump_label, + loop_labels.next, ); } if let Some(predicates) = predicates { @@ -1146,7 +1106,7 @@ fn open_loop( let condition_metadata = ConditionMetadata { jump_if_condition_is_true: false, jump_target_when_true, - jump_target_when_false: *jump_label, + jump_target_when_false: loop_labels.next, }; translate_condition_expr( program, @@ -1162,7 +1122,7 @@ fn open_loop( Ok(()) } - SourceOperator::Nothing => Ok(()), + SourceOperator::Nothing { .. } => Ok(()), } } @@ -1383,24 +1343,21 @@ fn close_loop( source: &SourceOperator, metadata: &mut Metadata, ) -> Result<()> { + let loop_labels = *metadata + .loop_labels + .get(&source.id()) + .expect("source has no loop labels"); match source { - SourceOperator::Subquery { id, .. } => { - program.resolve_label( - *metadata - .next_row_labels - .get(id) - .expect("subquery has no next row label"), - program.offset(), - ); - let jump_label = metadata.scan_loop_body_labels.pop().unwrap(); + SourceOperator::Subquery { .. } => { + program.resolve_label(loop_labels.next, program.offset()); // A subquery has no cursor to call NextAsync on, so it just emits a Goto // to the Yield instruction, which in turn jumps back to the main loop of the subquery, // so that the next row from the subquery can be read. program.emit_insn_with_label_dependency( Insn::Goto { - target_pc: jump_label, + target_pc: loop_labels.loop_start, }, - jump_label, + loop_labels.loop_start, ); } SourceOperator::Join { @@ -1466,7 +1423,7 @@ fn close_loop( .. } => { let cursor_id = program.resolve_cursor_id(&table_reference.table_identifier); - program.resolve_label(*metadata.next_row_labels.get(id).unwrap(), program.offset()); + program.resolve_label(loop_labels.next, program.offset()); if iter_dir .as_ref() .is_some_and(|dir| *dir == IterationDirection::Backwards) @@ -1475,8 +1432,6 @@ fn close_loop( } else { program.emit_insn(Insn::NextAsync { cursor_id }); } - let jump_label = metadata.scan_loop_body_labels.pop().unwrap(); - if iter_dir .as_ref() .is_some_and(|dir| *dir == IterationDirection::Backwards) @@ -1484,17 +1439,17 @@ fn close_loop( program.emit_insn_with_label_dependency( Insn::PrevAwait { cursor_id, - pc_if_next: jump_label, + pc_if_next: loop_labels.loop_start, }, - jump_label, + loop_labels.loop_start, ); } else { program.emit_insn_with_label_dependency( Insn::NextAwait { cursor_id, - pc_if_next: jump_label, + pc_if_next: loop_labels.loop_start, }, - jump_label, + loop_labels.loop_start, ); } } @@ -1504,7 +1459,7 @@ fn close_loop( search, .. } => { - program.resolve_label(*metadata.next_row_labels.get(id).unwrap(), program.offset()); + program.resolve_label(loop_labels.next, program.offset()); if matches!(search, Search::RowidEq { .. }) { // Rowid equality point lookups are handled with a SeekRowid instruction which does not loop, so there is no need to emit a NextAsync instruction. return Ok(()); @@ -1518,20 +1473,18 @@ fn close_loop( }; program.emit_insn(Insn::NextAsync { cursor_id }); - let jump_label = metadata.scan_loop_body_labels.pop().unwrap(); program.emit_insn_with_label_dependency( Insn::NextAwait { cursor_id, - pc_if_next: jump_label, + pc_if_next: loop_labels.loop_start, }, - jump_label, + loop_labels.loop_start, ); } - SourceOperator::Nothing => {} + SourceOperator::Nothing { .. } => {} }; - let end_of_loop_label = metadata.termination_label_stack.pop().unwrap(); - program.resolve_label(end_of_loop_label, program.offset()); + program.resolve_label(loop_labels.loop_end, program.offset()); Ok(()) } @@ -1603,6 +1556,8 @@ fn group_by_emit( ) -> Result<()> { let sort_loop_start_label = program.allocate_label(); let grouping_done_label = program.allocate_label(); + let group_by_output_row_label = program.allocate_label(); + let group_by_end_label = program.allocate_label(); let group_by_metadata = metadata.group_by_metadata.as_mut().unwrap(); let GroupByMetadata { @@ -1717,17 +1672,6 @@ fn group_by_emit( subroutine_accumulator_output_label, ); - let group_by_end_idx = { - assert!(metadata.termination_label_stack.len() >= 2); - // The reason we take the 2nd-to-last label on the stack is because the top of the stack jumps to - // the group by output row subroutine (i.e. emit row for a single group), whereas - // the 2nd-to-last label on the stack jumps to the end of the entire group by routine. - metadata.termination_label_stack.len() - 2 - }; - let group_by_end_label = *metadata - .termination_label_stack - .get(group_by_end_idx) - .unwrap(); program.add_comment(program.offset(), "check abort flag"); program.emit_insn_with_label_dependency( Insn::IfPos { @@ -1837,14 +1781,13 @@ fn group_by_emit( ); program.add_comment(program.offset(), "output group by row subroutine start"); - let output_group_by_row_label = metadata.termination_label_stack.pop().unwrap(); program.emit_insn_with_label_dependency( Insn::IfPos { reg: group_by_metadata.data_in_accumulator_indicator_register, - target_pc: output_group_by_row_label, + target_pc: group_by_output_row_label, decrement_by: 0, }, - output_group_by_row_label, + group_by_output_row_label, ); let group_by_end_without_emitting_row_label = program.allocate_label(); program.defer_label_resolution( @@ -1857,7 +1800,7 @@ fn group_by_emit( let agg_start_reg = metadata.aggregation_start_register.unwrap(); // Resolve the label for the start of the group by output row subroutine - program.resolve_label(output_group_by_row_label, program.offset()); + program.resolve_label(group_by_output_row_label, program.offset()); for (i, agg) in aggregates.iter().enumerate() { let agg_result_reg = agg_start_reg + i; program.emit_insn(Insn::AggFinal { @@ -1904,13 +1847,7 @@ fn group_by_emit( result_columns, metadata.result_column_start_register.unwrap(), Some(&precomputed_exprs_to_register), - limit.map(|l| { - ( - l, - metadata.limit_reg.unwrap(), - *metadata.termination_label_stack.last().unwrap(), - ) - }), + limit.map(|l| (l, metadata.limit_reg.unwrap(), group_by_end_label)), syms, query_type, )?; @@ -1952,11 +1889,6 @@ fn group_by_emit( return_reg: group_by_metadata.subroutine_accumulator_clear_return_offset_register, }); - assert!( - metadata.termination_label_stack.len() >= 2, - "termination_label_stack should have at least 2 elements" - ); - let group_by_end_label = metadata.termination_label_stack.pop().unwrap(); program.resolve_label(group_by_end_label, program.offset()); Ok(()) @@ -1974,14 +1906,6 @@ fn agg_without_group_by_emit( syms: &SymbolTable, query_type: &SelectQueryType, ) -> Result<()> { - // Resolve the label for the start of the aggregation phase - program.resolve_label( - metadata - .termination_label_stack - .pop() - .expect("termination_label_stack should not be empty"), - program.offset(), - ); let agg_start_reg = metadata.aggregation_start_register.unwrap(); for (i, agg) in aggregates.iter().enumerate() { let agg_result_reg = agg_start_reg + i; @@ -2027,10 +1951,6 @@ fn order_by_emit( ) -> Result<()> { let sort_loop_start_label = program.allocate_label(); let sort_loop_end_label = program.allocate_label(); - program.resolve_label( - metadata.termination_label_stack.pop().unwrap(), - program.offset(), - ); let mut pseudo_columns = vec![]; for (i, _) in order_by.iter().enumerate() { pseudo_columns.push(Column { diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 2e0f142a9..1e8dbcbca 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -211,7 +211,7 @@ fn use_indexes( use_indexes(right, referenced_tables, available_indexes)?; Ok(()) } - SourceOperator::Nothing => Ok(()), + SourceOperator::Nothing { .. } => Ok(()), } } @@ -335,7 +335,7 @@ fn eliminate_constants( Ok(ConstantConditionEliminationResult::Continue) } - SourceOperator::Nothing => Ok(ConstantConditionEliminationResult::Continue), + SourceOperator::Nothing { .. } => Ok(ConstantConditionEliminationResult::Continue), } } @@ -430,7 +430,7 @@ fn push_predicates( // Base cases - nowhere else to push to SourceOperator::Scan { .. } => Ok(()), SourceOperator::Search { .. } => Ok(()), - SourceOperator::Nothing => Ok(()), + SourceOperator::Nothing { .. } => Ok(()), } } @@ -585,7 +585,7 @@ fn push_predicate( Ok(None) } - SourceOperator::Nothing => Ok(Some(predicate)), + SourceOperator::Nothing { .. } => Ok(Some(predicate)), } } @@ -1018,9 +1018,3 @@ impl TakeOwnership for ast::Expr { std::mem::replace(self, ast::Expr::Literal(ast::Literal::Null)) } } - -impl TakeOwnership for SourceOperator { - fn take_ownership(&mut self) -> Self { - std::mem::replace(self, Self::Nothing) - } -} diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 84fde1382..b773d1eec 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -167,7 +167,7 @@ impl SourceOperator { .enumerate() .map(|(i, col)| (table_reference.table_index, col, i)) .collect(), - SourceOperator::Nothing => Vec::new(), + SourceOperator::Nothing { .. } => Vec::new(), } } } @@ -222,7 +222,9 @@ pub enum SourceOperator { // Nothing operator // This operator is used to represent an empty query. // e.g. SELECT * from foo WHERE 0 will eventually be optimized to Nothing. - Nothing, + Nothing { + id: usize, + }, } /// The type of the table reference, either BTreeTable or Subquery @@ -306,7 +308,7 @@ impl SourceOperator { SourceOperator::Scan { id, .. } => *id, SourceOperator::Search { id, .. } => *id, SourceOperator::Subquery { id, .. } => *id, - SourceOperator::Nothing => unreachable!(), + SourceOperator::Nothing { id } => *id, } } } @@ -445,7 +447,7 @@ impl Display for SourceOperator { SourceOperator::Subquery { plan, .. } => { fmt_operator(&plan.source, f, level + 1, last) } - SourceOperator::Nothing => Ok(()), + SourceOperator::Nothing { .. } => Ok(()), } } writeln!(f, "QUERY PLAN")?; @@ -490,7 +492,7 @@ pub fn get_table_ref_bitmask_for_operator<'a>( .unwrap(); } SourceOperator::Subquery { .. } => {} - SourceOperator::Nothing => {} + SourceOperator::Nothing { .. } => {} } Ok(table_refs_mask) } diff --git a/core/translate/planner.rs b/core/translate/planner.rs index 191aebe33..dc2843b6f 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -324,7 +324,12 @@ pub fn parse_from( operator_id_counter: &mut OperatorIdCounter, ) -> Result<(SourceOperator, Vec)> { if from.as_ref().and_then(|f| f.select.as_ref()).is_none() { - return Ok((SourceOperator::Nothing, vec![])); + return Ok(( + SourceOperator::Nothing { + id: operator_id_counter.get_next_id(), + }, + vec![], + )); } let mut table_index = 0; From 6633a3c66a9f1477322aeb906c2e9eae83d90e66 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Wed, 1 Jan 2025 07:42:39 +0200 Subject: [PATCH 3/7] condense comment --- core/translate/emitter.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 06abeb53d..90c24517d 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -90,10 +90,7 @@ pub struct LoopLabels { /// generation process. #[derive(Debug)] pub struct Metadata { - // labels for the instructions that either: - // - jump to the start of the current loop. (e.g. a Next instruction jumps here) - // - jump to the Next instruction (or equivalent) in the current operator. (e.g. a condition evaluates to false, so the current row is skipped) - // - jump to the end of the current loop. (e.g. an index seek results in no key matching the seek condition, so execution will jump to the end of the loop) + // A typical query plan is a nested loop. Each loop has its own LoopLabels (see the definition of LoopLabels for more details) loop_labels: HashMap, // label for the instruction that jumps to the next phase of the query after the main loop // we don't know ahead of time what that is (GROUP BY, ORDER BY, etc.) From 2b5b54c44efdf4927bde9ad5da58d88e51abf8fd Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Wed, 1 Jan 2025 07:53:36 +0200 Subject: [PATCH 4/7] clippy --- core/lib.rs | 10 +++++----- core/translate/emitter.rs | 3 --- core/translate/select.rs | 10 +++++----- 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/core/lib.rs b/core/lib.rs index 2e0b3e301..124c16b57 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -250,7 +250,7 @@ impl Connection { self.header.clone(), self.pager.clone(), Rc::downgrade(self), - &syms, + syms, )?); Ok(Statement::new(program, self.pager.clone())) } @@ -278,7 +278,7 @@ impl Connection { self.header.clone(), self.pager.clone(), Rc::downgrade(self), - &syms, + syms, )?); let stmt = Statement::new(program, self.pager.clone()); Ok(Some(Rows { stmt })) @@ -290,7 +290,7 @@ impl Connection { self.header.clone(), self.pager.clone(), Rc::downgrade(self), - &syms, + syms, )?; program.explain(); Ok(None) @@ -327,7 +327,7 @@ impl Connection { self.header.clone(), self.pager.clone(), Rc::downgrade(self), - &syms, + syms, )?; program.explain(); } @@ -339,7 +339,7 @@ impl Connection { self.header.clone(), self.pager.clone(), Rc::downgrade(self), - &syms, + syms, )?; let mut state = vdbe::ProgramState::new(program.max_registers); program.step(&mut state, self.pager.clone())?; diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 90c24517d..afea359cd 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -129,7 +129,6 @@ pub enum OperationMode { fn prologue() -> Result<(ProgramBuilder, Metadata, BranchOffset, BranchOffset)> { let mut program = ProgramBuilder::new(); let init_label = program.allocate_label(); - let halt_label = program.allocate_label(); program.emit_insn_with_label_dependency( Insn::Init { @@ -1414,7 +1413,6 @@ fn close_loop( close_loop(program, left, metadata)?; } SourceOperator::Scan { - id, table_reference, iter_dir, .. @@ -1451,7 +1449,6 @@ fn close_loop( } } SourceOperator::Search { - id, table_reference, search, .. diff --git a/core/translate/select.rs b/core/translate/select.rs index affbaa04b..173ca544b 100644 --- a/core/translate/select.rs +++ b/core/translate/select.rs @@ -125,7 +125,7 @@ pub fn prepare_select_plan(schema: &Schema, select: ast::Select) -> Result plan.result_columns.push(ResultSetColumn { name: get_name( maybe_alias.as_ref(), - &expr, + expr, &plan.referenced_tables, || format!("expr_{}", result_column_idx), ), @@ -135,11 +135,11 @@ pub fn prepare_select_plan(schema: &Schema, select: ast::Select) -> Result } Ok(_) => { let contains_aggregates = - resolve_aggregates(&expr, &mut aggregate_expressions); + resolve_aggregates(expr, &mut aggregate_expressions); plan.result_columns.push(ResultSetColumn { name: get_name( maybe_alias.as_ref(), - &expr, + expr, &plan.referenced_tables, || format!("expr_{}", result_column_idx), ), @@ -169,7 +169,7 @@ pub fn prepare_select_plan(schema: &Schema, select: ast::Select) -> Result plan.result_columns.push(ResultSetColumn { name: get_name( maybe_alias.as_ref(), - &expr, + expr, &plan.referenced_tables, || format!("expr_{}", result_column_idx), ), @@ -189,7 +189,7 @@ pub fn prepare_select_plan(schema: &Schema, select: ast::Select) -> Result plan.result_columns.push(ResultSetColumn { name: get_name( maybe_alias.as_ref(), - &expr, + expr, &plan.referenced_tables, || format!("expr_{}", result_column_idx), ), From 3e5be21707664d845b5b3e9e899c54faa1b85ecd Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Wed, 1 Jan 2025 08:18:49 +0200 Subject: [PATCH 5/7] remove commented out code --- core/vdbe/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 8c879fe9f..675dea976 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -179,7 +179,6 @@ impl Program { } let insn = &self.insns[state.pc as usize]; trace_insn(self, state.pc as InsnReference, insn); - // print_insn(self, state.pc as InsnReference, insn, "".to_string()); let mut cursors = state.cursors.borrow_mut(); match insn { Insn::Init { target_pc } => { From 776ffc6131c77daca91333ad5ca23a7f741e8dd2 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Wed, 1 Jan 2025 08:21:20 +0200 Subject: [PATCH 6/7] assert instead of fallback --- core/vdbe/mod.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 675dea976..f95570f12 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -2301,7 +2301,11 @@ impl Program { end_offset, } => { if let OwnedValue::Integer(pc) = state.registers[*yield_reg] { - if *state.ended_coroutine.get(yield_reg).unwrap_or(&false) { + if *state + .ended_coroutine + .get(yield_reg) + .expect("coroutine not initialized") + { state.pc = *end_offset; } else { // swap From df6c8c9dd15b602c4f42ed3e63d606a5de8f9610 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Wed, 1 Jan 2025 08:22:47 +0200 Subject: [PATCH 7/7] comment about yield instruction --- core/vdbe/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index f95570f12..0813bbe35 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -2308,7 +2308,8 @@ impl Program { { state.pc = *end_offset; } else { - // swap + // swap the program counter with the value in the yield register + // this is the mechanism that allows jumping back and forth between the coroutine and the caller (state.pc, state.registers[*yield_reg]) = (pc, OwnedValue::Integer(state.pc + 1)); }