From 718598eab82f2cdfd6a7e177abd40d50fd98858b Mon Sep 17 00:00:00 2001 From: Piotr Rzysko Date: Sat, 2 Aug 2025 06:50:23 +0200 Subject: [PATCH] Introduce scan type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Different scan parameters are required for different table types. Currently, index and iteration direction are only used by B-tree tables, while the remaining table types don’t require any parameters. Planning access to virtual tables, however, will require passing additional information from the planner, such as the virtual table index (distinct from a B-tree index) and the constraints that must be forwarded to the `filter` method. --- core/translate/delete.rs | 7 ++---- core/translate/emitter.rs | 7 +++--- core/translate/expr.rs | 10 ++------ core/translate/main_loop.rs | 37 ++++++++++++++++------------ core/translate/optimizer/join.rs | 32 +++++++++--------------- core/translate/optimizer/mod.rs | 24 +++++++++--------- core/translate/plan.rs | 42 +++++++++++++++++++++++--------- core/translate/planner.rs | 16 ++++-------- core/translate/update.rs | 23 ++++++++++------- 9 files changed, 104 insertions(+), 94 deletions(-) diff --git a/core/translate/delete.rs b/core/translate/delete.rs index 24490f455..fb2798510 100644 --- a/core/translate/delete.rs +++ b/core/translate/delete.rs @@ -8,7 +8,7 @@ use crate::{schema::Schema, Result, SymbolTable}; use std::sync::Arc; use turso_sqlite3_parser::ast::{Expr, Limit, QualifiedName, ResultColumn}; -use super::plan::{ColumnUsedMask, IterationDirection, JoinedTable, TableReferences}; +use super::plan::{ColumnUsedMask, JoinedTable, TableReferences}; #[allow(clippy::too_many_arguments)] pub fn translate_delete( @@ -84,13 +84,10 @@ pub fn prepare_delete_plan( let name = tbl_name.name.as_str().to_string(); let indexes = schema.get_indices(table.get_name()).to_vec(); let joined_tables = vec![JoinedTable { + op: Operation::default_scan_for(&table), table, identifier: name, internal_id: table_ref_counter.next(), - op: Operation::Scan { - iter_dir: IterationDirection::Forwards, - index: None, - }, join_info: None, col_used_mask: ColumnUsedMask::default(), database_id: 0, diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 6a2d90661..de548d630 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -16,7 +16,7 @@ use super::main_loop::{ }; use super::order_by::{emit_order_by, init_order_by, SortMetadata}; use super::plan::{ - Distinctness, JoinOrderMember, Operation, SelectPlan, TableReferences, UpdatePlan, + Distinctness, JoinOrderMember, Operation, Scan, SelectPlan, TableReferences, UpdatePlan, }; use super::select::emit_simple_count; use super::subquery::emit_subqueries; @@ -786,7 +786,7 @@ fn emit_update_insns( let loop_labels = t_ctx.labels_main_loop.first().unwrap(); let cursor_id = program.resolve_cursor_id(&CursorKey::table(table_ref.internal_id)); let (index, is_virtual) = match &table_ref.op { - Operation::Scan { index, .. } => ( + Operation::Scan(Scan::BTreeTable { index, .. }) => ( index.as_ref().map(|index| { ( index.clone(), @@ -794,8 +794,9 @@ fn emit_update_insns( .resolve_cursor_id(&CursorKey::index(table_ref.internal_id, index.clone())), ) }), - table_ref.virtual_table().is_some(), + false, ), + Operation::Scan(_) => (None, table_ref.virtual_table().is_some()), Operation::Search(search) => match search { &Search::RowidEq { .. } | Search::Seek { index: None, .. } => (None, false), Search::Seek { diff --git a/core/translate/expr.rs b/core/translate/expr.rs index e90a54fc5..ae5fd8bca 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -3459,10 +3459,7 @@ pub fn process_returning_clause( Vec, super::plan::TableReferences, )> { - use super::plan::{ - ColumnUsedMask, IterationDirection, JoinedTable, Operation, ResultSetColumn, - TableReferences, - }; + use super::plan::{ColumnUsedMask, JoinedTable, Operation, ResultSetColumn, TableReferences}; use super::planner::bind_column_references; let mut result_columns = vec![]; @@ -3477,10 +3474,7 @@ pub fn process_returning_clause( }, identifier: table_name.to_string(), internal_id, - op: Operation::Scan { - iter_dir: IterationDirection::Forwards, - index: None, - }, + op: Operation::default_scan_for(table), join_info: None, col_used_mask: ColumnUsedMask::default(), database_id: 0, diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index a98a4e658..f666022e2 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -6,7 +6,7 @@ use std::sync::Arc; use crate::{ schema::{Affinity, Index, IndexColumn, Table}, translate::{ - plan::{DistinctCtx, Distinctness}, + plan::{DistinctCtx, Distinctness, Scan}, result_row::emit_select_result, }, types::SeekOp, @@ -203,7 +203,7 @@ pub fn init_loop( } let (table_cursor_id, index_cursor_id) = table.open_cursors(program, mode)?; match &table.op { - Operation::Scan { index, .. } => match (mode, &table.table) { + Operation::Scan(Scan::BTreeTable { index, .. }) => match (mode, &table.table) { (OperationMode::SELECT, Table::BTree(btree)) => { let root_page = btree.root_page; if let Some(cursor_id) = table_cursor_id { @@ -274,7 +274,10 @@ pub fn init_loop( }); } } - (_, Table::Virtual(tbl)) => { + _ => {} + }, + Operation::Scan(Scan::VirtualTable) => { + if let Table::Virtual(tbl) = &table.table { let is_write = matches!( mode, OperationMode::INSERT | OperationMode::UPDATE | OperationMode::DELETE @@ -286,8 +289,8 @@ pub fn init_loop( program.emit_insn(Insn::VOpen { cursor_id }); } } - _ => {} - }, + } + Operation::Scan(_) => {} Operation::Search(search) => { match mode { OperationMode::SELECT => { @@ -431,9 +434,9 @@ pub fn open_loop( let (table_cursor_id, index_cursor_id) = table.resolve_cursors(program)?; match &table.op { - Operation::Scan { iter_dir, .. } => { - match &table.table { - Table::BTree(_) => { + Operation::Scan(scan) => { + match (scan, &table.table) { + (Scan::BTreeTable { iter_dir, .. }, Table::BTree(_)) => { let iteration_cursor_id = temp_cursor_id.unwrap_or_else(|| { index_cursor_id.unwrap_or_else(|| { table_cursor_id.expect( @@ -454,7 +457,7 @@ pub fn open_loop( } program.preassign_label_to_next_insn(loop_start); } - Table::Virtual(vtab) => { + (Scan::VirtualTable, Table::Virtual(vtab)) => { let (start_reg, count, maybe_idx_str, maybe_idx_int) = { // Virtual‑table modules can receive constraints via xBestIndex. // They return information with which to pass to VFilter operation. @@ -556,7 +559,7 @@ pub fn open_loop( }); program.preassign_label_to_next_insn(loop_start); } - Table::FromClauseSubquery(from_clause_subquery) => { + (Scan::Subquery, Table::FromClauseSubquery(from_clause_subquery)) => { let (yield_reg, coroutine_implementation_start) = match &from_clause_subquery.plan.query_destination { QueryDestination::CoroutineYield { @@ -581,6 +584,10 @@ pub fn open_loop( end_offset: loop_end, }); } + _ => unreachable!( + "{:?} scan cannot be used with {:?} table", + scan, table.table + ), } if let Some(table_cursor_id) = table_cursor_id { @@ -1059,10 +1066,10 @@ pub fn close_loop( let (table_cursor_id, index_cursor_id) = table.resolve_cursors(program)?; match &table.op { - Operation::Scan { iter_dir, .. } => { + Operation::Scan(scan) => { program.resolve_label(loop_labels.next, program.offset()); - match &table.table { - Table::BTree(_) => { + match scan { + Scan::BTreeTable { iter_dir, .. } => { let iteration_cursor_id = temp_cursor_id.unwrap_or_else(|| { index_cursor_id.unwrap_or_else(|| { table_cursor_id.expect( @@ -1082,14 +1089,14 @@ pub fn close_loop( }); } } - Table::Virtual(_) => { + Scan::VirtualTable => { program.emit_insn(Insn::VNext { cursor_id: table_cursor_id .expect("Virtual tables do not support covering indexes"), pc_if_next: loop_labels.loop_start, }); } - Table::FromClauseSubquery(_) => { + Scan::Subquery => { // A subquery has no cursor to call Next on, so it just emits a Goto // to the Yield instruction, which in turn jumps back to the main loop of the subquery, // so that the next row from the subquery can be read. diff --git a/core/translate/optimizer/join.rs b/core/translate/optimizer/join.rs index c7c800c8b..a51ea55be 100644 --- a/core/translate/optimizer/join.rs +++ b/core/translate/optimizer/join.rs @@ -1309,13 +1309,11 @@ mod tests { let mut available_indexes = HashMap::new(); available_indexes.insert("t1".to_string(), vec![index]); + let table = Table::BTree(table); joined_tables.push(JoinedTable { - table: Table::BTree(table), + op: Operation::default_scan_for(&table), + table, internal_id: table_id_counter.next(), - op: Operation::Scan { - iter_dir: IterationDirection::Forwards, - index: None, - }, identifier: "t1".to_string(), join_info: None, col_used_mask: ColumnUsedMask::default(), @@ -1402,13 +1400,11 @@ mod tests { }); available_indexes.insert("t1".to_string(), vec![index]); + let table = Table::BTree(table); joined_tables.push(JoinedTable { - table: Table::BTree(table), + op: Operation::default_scan_for(&table), + table, internal_id: table_id_counter.next(), - op: Operation::Scan { - iter_dir: IterationDirection::Forwards, - index: None, - }, identifier: "t1".to_string(), join_info: None, col_used_mask: ColumnUsedMask::default(), @@ -1515,13 +1511,11 @@ mod tests { }); available_indexes.insert("t1".to_string(), vec![index]); + let table = Table::BTree(table); joined_tables.push(JoinedTable { - table: Table::BTree(table), + op: Operation::default_scan_for(&table), + table, internal_id: table_id_counter.next(), - op: Operation::Scan { - iter_dir: IterationDirection::Forwards, - index: None, - }, identifier: "t1".to_string(), join_info: None, col_used_mask: ColumnUsedMask::default(), @@ -1660,12 +1654,10 @@ mod tests { internal_id: TableInternalId, ) -> JoinedTable { let name = table.name.clone(); + let table = Table::BTree(table); JoinedTable { - table: Table::BTree(table), - op: Operation::Scan { - iter_dir: IterationDirection::Forwards, - index: None, - }, + op: Operation::default_scan_for(&table), + table, identifier: name, internal_id, join_info, diff --git a/core/translate/optimizer/mod.rs b/core/translate/optimizer/mod.rs index b47f5e933..8b7b2241e 100644 --- a/core/translate/optimizer/mod.rs +++ b/core/translate/optimizer/mod.rs @@ -14,7 +14,7 @@ use crate::{ schema::{Index, IndexColumn, Schema, Table}, translate::{ expr::is_double_quoted_identifier, expr::walk_expr_mut, - optimizer::access_method::AccessMethodParams, plan::TerminationKey, + optimizer::access_method::AccessMethodParams, plan::Scan, plan::TerminationKey, }, types::SeekOp, Result, @@ -260,10 +260,10 @@ fn optimize_table_access( }; if !try_to_build_ephemeral_index { - joined_tables[table_idx].op = Operation::Scan { + joined_tables[table_idx].op = Operation::Scan(Scan::BTreeTable { iter_dir: *iter_dir, index: index.clone(), - }; + }); continue; } // This branch means we have a full table scan for a non-outermost table. @@ -272,10 +272,10 @@ fn optimize_table_access( .iter() .find(|c| c.table_id == join_order_member.table_id); let Some(table_constraints) = table_constraints else { - joined_tables[table_idx].op = Operation::Scan { + joined_tables[table_idx].op = Operation::Scan(Scan::BTreeTable { iter_dir: *iter_dir, index: index.clone(), - }; + }); continue; }; let temp_constraint_refs = (0..table_constraints.constraints.len()) @@ -291,10 +291,10 @@ fn optimize_table_access( &best_join_order[..=i], ); if usable_constraint_refs.is_empty() { - joined_tables[table_idx].op = Operation::Scan { + joined_tables[table_idx].op = Operation::Scan(Scan::BTreeTable { iter_dir: *iter_dir, index: index.clone(), - }; + }); continue; } let ephemeral_index = ephemeral_index_build( @@ -359,11 +359,11 @@ fn optimize_table_access( }; } } - AccessMethodParams::VirtualTable | AccessMethodParams::Subquery => { - joined_tables[table_idx].op = Operation::Scan { - iter_dir: IterationDirection::Forwards, - index: None, - }; + AccessMethodParams::VirtualTable => { + joined_tables[table_idx].op = Operation::Scan(Scan::VirtualTable); + } + AccessMethodParams::Subquery => { + joined_tables[table_idx].op = Operation::Scan(Scan::Subquery); } } } diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 31b66c497..490f55bae 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -863,12 +863,7 @@ impl ColumnUsedMask { pub enum Operation { // Scan operation // This operation is used to scan a table. - // The iter_dir is used to indicate the direction of the iterator. - Scan { - iter_dir: IterationDirection, - /// The index that we are using to scan the table, if any. - index: Option>, - }, + Scan(Scan), // Search operation // This operation is used to search for a row in a table using an index // (i.e. a primary key or a secondary index) @@ -876,9 +871,21 @@ pub enum Operation { } impl Operation { + pub fn default_scan_for(table: &Table) -> Self { + match table { + Table::BTree(_) => Operation::Scan(Scan::BTreeTable { + iter_dir: IterationDirection::Forwards, + index: None, + }), + Table::Virtual(_) => Operation::Scan(Scan::VirtualTable), + Table::FromClauseSubquery(_) => Operation::Scan(Scan::Subquery), + } + } + pub fn index(&self) -> Option<&Arc> { match self { - Operation::Scan { index, .. } => index.as_ref(), + Operation::Scan(Scan::BTreeTable { index, .. }) => index.as_ref(), + Operation::Scan(_) => None, Operation::Search(Search::RowidEq { .. }) => None, Operation::Search(Search::Seek { index, .. }) => index.as_ref(), } @@ -931,10 +938,7 @@ impl JoinedTable { result_columns_start_reg: None, }); Self { - op: Operation::Scan { - iter_dir: IterationDirection::Forwards, - index: None, - }, + op: Operation::default_scan_for(&table), table, identifier, internal_id, @@ -1110,6 +1114,22 @@ pub struct TerminationKey { pub op: SeekOp, } +/// Represents the type of table scan performed during query execution. +#[derive(Clone, Debug)] +pub enum Scan { + /// A scan of a B-tree–backed table, optionally using an index, and with an iteration direction. + BTreeTable { + /// The iter_dir is used to indicate the direction of the iterator. + iter_dir: IterationDirection, + /// The index that we are using to scan the table, if any. + index: Option>, + }, + /// A scan of a virtual table, delegated to the table’s `filter` and related methods. + VirtualTable, + /// A scan of a subquery in the `FROM` clause. + Subquery, +} + /// An enum that represents a search operation that can be used to search for a row in a table using an index /// (i.e. a primary key or a secondary index) #[allow(clippy::enum_variant_names)] diff --git a/core/translate/planner.rs b/core/translate/planner.rs index f72963234..e22d70fe9 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -4,9 +4,9 @@ use std::sync::Arc; use super::{ expr::walk_expr, plan::{ - Aggregate, ColumnUsedMask, Distinctness, EvalAt, IterationDirection, JoinInfo, - JoinOrderMember, JoinedTable, Operation, OuterQueryReference, Plan, QueryDestination, - ResultSetColumn, TableReferences, WhereTerm, + Aggregate, ColumnUsedMask, Distinctness, EvalAt, JoinInfo, JoinOrderMember, JoinedTable, + Operation, OuterQueryReference, Plan, QueryDestination, ResultSetColumn, TableReferences, + WhereTerm, }, select::prepare_select_plan, SymbolTable, @@ -444,10 +444,7 @@ fn parse_table( )); }; table_references.add_joined_table(JoinedTable { - op: Operation::Scan { - iter_dir: IterationDirection::Forwards, - index: None, - }, + op: Operation::default_scan_for(&tbl_ref), table: tbl_ref, identifier: alias.unwrap_or(normalized_qualified_name), internal_id, @@ -470,10 +467,7 @@ fn parse_table( { if matches!(outer_ref.table, Table::FromClauseSubquery(_)) { table_references.add_joined_table(JoinedTable { - op: Operation::Scan { - iter_dir: IterationDirection::Forwards, - index: None, - }, + op: Operation::default_scan_for(&outer_ref.table), table: outer_ref.table.clone(), identifier: outer_ref.identifier.clone(), internal_id: table_ref_counter.next(), diff --git a/core/translate/update.rs b/core/translate/update.rs index 285446160..26e89985c 100644 --- a/core/translate/update.rs +++ b/core/translate/update.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use crate::schema::{BTreeTable, Column, Type}; use crate::translate::optimizer::optimize_select_plan; -use crate::translate::plan::{Operation, QueryDestination, Search, SelectPlan}; +use crate::translate::plan::{Operation, QueryDestination, Scan, Search, SelectPlan}; use crate::vdbe::builder::CursorType; use crate::{ bail_parse_error, @@ -138,10 +138,7 @@ pub fn prepare_update_plan( }, identifier: table_name.as_str().to_string(), internal_id: program.table_reference_counter.next(), - op: Operation::Scan { - iter_dir, - index: None, - }, + op: build_scan_op(&table, iter_dir), join_info: None, col_used_mask: ColumnUsedMask::default(), database_id: 0, @@ -235,10 +232,7 @@ pub fn prepare_update_plan( }, identifier: table_name.as_str().to_string(), internal_id, - op: Operation::Scan { - iter_dir, - index: None, - }, + op: build_scan_op(&table, iter_dir), join_info: None, col_used_mask: ColumnUsedMask::default(), database_id: 0, @@ -369,3 +363,14 @@ pub fn prepare_update_plan( ephemeral_plan, })) } + +fn build_scan_op(table: &Table, iter_dir: IterationDirection) -> Operation { + match table { + Table::BTree(_) => Operation::Scan(Scan::BTreeTable { + iter_dir, + index: None, + }), + Table::Virtual(_) => Operation::Scan(Scan::VirtualTable), + _ => unreachable!(), + } +}