From 58caf32fe2339729eb430563c9e5f52bf34e4e8b Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Mon, 27 Oct 2025 14:34:53 +0200 Subject: [PATCH] Add plan_subqueries_from_where_clause() method and use it in Select planning --- core/translate/select.rs | 10 ++ core/translate/subquery.rs | 340 ++++++++++++++++++++++++++++++++++++- 2 files changed, 347 insertions(+), 3 deletions(-) diff --git a/core/translate/select.rs b/core/translate/select.rs index 8c6c57e84..2250e4e6a 100644 --- a/core/translate/select.rs +++ b/core/translate/select.rs @@ -13,6 +13,7 @@ use crate::translate::planner::{ break_predicate_at_and_boundaries, parse_from, parse_limit, parse_where, resolve_window_and_aggregate_functions, }; +use crate::translate::subquery::plan_subqueries_from_where_clause; use crate::translate::window::plan_windows; use crate::util::normalize_ident; use crate::vdbe::builder::ProgramBuilderOpts; @@ -503,6 +504,15 @@ fn prepare_one_select_plan( )?; } + plan_subqueries_from_where_clause( + program, + &mut plan.non_from_clause_subqueries, + &mut plan.table_references, + resolver, + &mut plan.where_clause, + connection, + )?; + // Return the unoptimized query plan Ok(plan) } diff --git a/core/translate/subquery.rs b/core/translate/subquery.rs index 0bf012560..edff3b877 100644 --- a/core/translate/subquery.rs +++ b/core/translate/subquery.rs @@ -1,8 +1,25 @@ +use std::sync::Arc; + +use turso_parser::ast::{self, SortOrder, SubqueryType}; + use crate::{ emit_explain, - schema::Table, - vdbe::{builder::ProgramBuilder, insn::Insn}, - QueryMode, Result, + schema::{Index, IndexColumn, Table}, + translate::{ + collate::get_collseq_from_expr, + expr::{unwrap_parens, walk_expr_mut, WalkControl}, + optimizer::optimize_select_plan, + plan::{ + ColumnUsedMask, NonFromClauseSubquery, OuterQueryReference, Plan, SubqueryState, + WhereTerm, + }, + select::prepare_select_plan, + }, + vdbe::{ + builder::{CursorType, ProgramBuilder}, + insn::Insn, + }, + Connection, QueryMode, Result, }; use super::{ @@ -11,6 +28,323 @@ use super::{ plan::{Operation, QueryDestination, Search, SelectPlan, TableReferences}, }; +/// Compute query plans for subqueries in the WHERE clause. +/// The AST expression containing the subquery ([ast::Expr::Exists], [ast::Expr::Subquery], [ast::Expr::InSelect]) is replaced with a [ast::Expr::SubqueryResult] expression. +/// The [ast::Expr::SubqueryResult] expression contains the subquery ID, the left-hand side expression (only applicable to IN subqueries), the NOT IN flag (only applicable to IN subqueries), and the subquery type. +/// The computed plans are stored in the [NonFromClauseSubquery] structs on the [SelectPlan], and evaluated at the appropriate time during the translation of the main query. +/// The appropriate time is determined by whether the subquery is correlated or uncorrelated; +/// if it is uncorrelated, it can be evaluated as early as possible, but if it is correlated, it must be evaluated after all of its dependencies from the +/// outer query are 'in scope', i.e. their cursors are open and rewound. +pub fn plan_subqueries_from_where_clause( + program: &mut ProgramBuilder, + out_subqueries: &mut Vec, + referenced_tables: &mut TableReferences, + resolver: &Resolver, + where_terms: &mut [WhereTerm], + connection: &Arc, +) -> Result<()> { + // A WHERE clause subquery can reference columns from the outer query, + // including nested cases where a subquery inside a subquery references columns from its parent's parent + // and so on. + let get_outer_query_refs = |referenced_tables: &TableReferences| { + referenced_tables + .joined_tables() + .iter() + .map(|t| OuterQueryReference { + table: t.table.clone(), + identifier: t.identifier.clone(), + internal_id: t.internal_id, + col_used_mask: ColumnUsedMask::default(), + }) + .chain( + referenced_tables + .outer_query_refs() + .iter() + .map(|t| OuterQueryReference { + table: t.table.clone(), + identifier: t.identifier.clone(), + internal_id: t.internal_id, + col_used_mask: ColumnUsedMask::default(), + }), + ) + .collect::>() + }; + + // Walk the WHERE clause and replace subqueries with [ast::Expr::SubqueryResult] expressions. + for where_term in where_terms.iter_mut() { + walk_expr_mut( + &mut where_term.expr, + &mut |expr: &mut ast::Expr| -> Result { + match expr { + ast::Expr::Exists(_) => { + let subquery_id = program.table_reference_counter.next(); + let outer_query_refs = get_outer_query_refs(referenced_tables); + + let result_reg = program.alloc_register(); + let subquery_type = SubqueryType::Exists { result_reg }; + let result_expr = ast::Expr::SubqueryResult { + subquery_id, + lhs: None, + not_in: false, + query_type: subquery_type.clone(), + }; + let ast::Expr::Exists(subselect) = std::mem::replace(expr, result_expr) + else { + unreachable!(); + }; + + let plan = prepare_select_plan( + subselect, + resolver, + program, + &outer_query_refs, + QueryDestination::ExistsSubqueryResult { result_reg }, + connection, + )?; + let Plan::Select(mut plan) = plan else { + crate::bail_parse_error!( + "compound SELECT queries not supported yet in WHERE clause subqueries" + ); + }; + optimize_select_plan(&mut plan, resolver.schema)?; + // EXISTS subqueries are satisfied after at most 1 row has been returned. + plan.limit = Some(Box::new(ast::Expr::Literal(ast::Literal::Numeric( + "1".to_string(), + )))); + let correlated = plan.is_correlated(); + out_subqueries.push(NonFromClauseSubquery { + internal_id: subquery_id, + query_type: subquery_type, + state: SubqueryState::Unevaluated { + plan: Some(Box::new(plan)), + }, + correlated, + }); + Ok(WalkControl::Continue) + } + ast::Expr::Subquery(_) => { + let subquery_id = program.table_reference_counter.next(); + let outer_query_refs = get_outer_query_refs(referenced_tables); + + let result_expr = ast::Expr::SubqueryResult { + subquery_id, + lhs: None, + not_in: false, + // Placeholder values because the number of columns returned is not known until the plan is prepared. + // These are replaced below after planning. + query_type: SubqueryType::RowValue { + result_reg_start: 0, + num_regs: 0, + }, + }; + let ast::Expr::Subquery(subselect) = std::mem::replace(expr, result_expr) + else { + unreachable!(); + }; + let plan = prepare_select_plan( + subselect, + resolver, + program, + &outer_query_refs, + QueryDestination::Unset, + connection, + )?; + let Plan::Select(mut plan) = plan else { + crate::bail_parse_error!( + "compound SELECT queries not supported yet in WHERE clause subqueries" + ); + }; + optimize_select_plan(&mut plan, resolver.schema)?; + let reg_count = plan.result_columns.len(); + let reg_start = program.alloc_registers(reg_count); + + plan.query_destination = QueryDestination::RowValueSubqueryResult { + result_reg_start: reg_start, + num_regs: reg_count, + }; + // RowValue subqueries are satisfied after at most 1 row has been returned, + // as they are used in comparisons with a scalar or a tuple of scalars like (x,y) = (SELECT ...) or x = (SELECT ...). + plan.limit = Some(Box::new(ast::Expr::Literal(ast::Literal::Numeric( + "1".to_string(), + )))); + + let ast::Expr::SubqueryResult { + subquery_id, + lhs: None, + not_in: false, + query_type: + SubqueryType::RowValue { + result_reg_start, + num_regs, + }, + } = &mut *expr + else { + unreachable!(); + }; + *result_reg_start = reg_start; + *num_regs = reg_count; + + let correlated = plan.is_correlated(); + + out_subqueries.push(NonFromClauseSubquery { + internal_id: *subquery_id, + query_type: SubqueryType::RowValue { + result_reg_start: reg_start, + num_regs: reg_count, + }, + state: SubqueryState::Unevaluated { + plan: Some(Box::new(plan)), + }, + correlated, + }); + Ok(WalkControl::Continue) + } + ast::Expr::InSelect { .. } => { + let subquery_id = program.table_reference_counter.next(); + let outer_query_refs = get_outer_query_refs(referenced_tables); + + let ast::Expr::InSelect { lhs, not, rhs } = + std::mem::replace(expr, ast::Expr::Literal(ast::Literal::Null)) + else { + unreachable!(); + }; + let plan = prepare_select_plan( + rhs, + resolver, + program, + &outer_query_refs, + QueryDestination::Unset, + connection, + )?; + let Plan::Select(mut plan) = plan else { + crate::bail_parse_error!( + "compound SELECT queries not supported yet in WHERE clause subqueries" + ); + }; + optimize_select_plan(&mut plan, resolver.schema)?; + // e.g. (x,y) IN (SELECT ...) + // or x IN (SELECT ...) + let lhs_column_count = match unwrap_parens(lhs.as_ref())? { + ast::Expr::Parenthesized(exprs) => exprs.len(), + _ => 1, + }; + if lhs_column_count != plan.result_columns.len() { + crate::bail_parse_error!( + "lhs of IN subquery must have the same number of columns as the subquery" + ); + } + + let mut columns = plan + .result_columns + .iter() + .enumerate() + .map(|(i, c)| IndexColumn { + name: c.name(&plan.table_references).unwrap_or("").to_string(), + order: SortOrder::Asc, + pos_in_table: i, + collation: None, + default: None, + }) + .collect::>(); + + for (i, column) in columns.iter_mut().enumerate() { + column.collation = get_collseq_from_expr( + &plan.result_columns[i].expr, + &plan.table_references, + )?; + } + + let ephemeral_index = Arc::new(Index { + columns, + name: format!("ephemeral_index_where_sub_{subquery_id}"), + table_name: String::new(), + ephemeral: true, + has_rowid: false, + root_page: 0, + unique: false, + where_clause: None, + }); + + let cursor_id = program + .alloc_cursor_id(CursorType::BTreeIndex(ephemeral_index.clone())); + + plan.query_destination = QueryDestination::EphemeralIndex { + cursor_id, + index: ephemeral_index.clone(), + is_delete: false, + }; + + *expr = ast::Expr::SubqueryResult { + subquery_id, + lhs: Some(lhs), + not_in: not, + query_type: SubqueryType::In { cursor_id }, + }; + + let correlated = plan.is_correlated(); + + out_subqueries.push(NonFromClauseSubquery { + internal_id: subquery_id, + query_type: SubqueryType::In { cursor_id }, + state: SubqueryState::Unevaluated { + plan: Some(Box::new(plan)), + }, + correlated, + }); + Ok(WalkControl::Continue) + } + _ => Ok(WalkControl::Continue), + } + }, + )?; + } + + update_column_used_masks(referenced_tables, out_subqueries); + + Ok(()) +} + +/// We make decisions about when to evaluate expressions or whether to use covering indexes based on +/// which columns of a table have been referenced. +/// Since subquery nesting is arbitrarily deep, a reference to a column must propagate recursively +/// up to the parent. Example: +/// +/// SELECT * FROM t WHERE EXISTS (SELECT * FROM u WHERE EXISTS (SELECT * FROM v WHERE v.foo = t.foo)) +/// +/// In this case, t.foo is referenced in the innermost subquery, so the top level query must be notified +/// that t.foo has been used. +fn update_column_used_masks( + table_refs: &mut TableReferences, + subqueries: &mut [NonFromClauseSubquery], +) { + for subquery in subqueries.iter_mut() { + let SubqueryState::Unevaluated { plan } = &mut subquery.state else { + panic!("subquery has already been evaluated"); + }; + let Some(child_plan) = plan.as_mut() else { + panic!("subquery has no plan"); + }; + + for child_outer_query_ref in child_plan + .table_references + .outer_query_refs() + .iter() + .filter(|t| t.is_used()) + { + if let Some(joined_table) = + table_refs.find_joined_table_by_internal_id_mut(child_outer_query_ref.internal_id) + { + joined_table.col_used_mask |= &child_outer_query_ref.col_used_mask; + } + if let Some(outer_query_ref) = table_refs + .find_outer_query_ref_by_internal_id_mut(child_outer_query_ref.internal_id) + { + outer_query_ref.col_used_mask |= &child_outer_query_ref.col_used_mask; + } + } + } +} + /// Emit the subqueries contained in the FROM clause. /// This is done first so the results can be read in the main query loop. pub fn emit_subqueries(