use std::{ cmp::Ordering, collections::{HashMap, VecDeque}, sync::Arc, }; use crate::{ schema::{Column, Index}, translate::{ collate::get_collseq_from_expr, expr::as_binary_components, plan::{JoinOrderMember, TableReferences, WhereTerm}, planner::{table_mask_from_expr, TableMask}, }, util::exprs_are_equivalent, Result, }; use turso_ext::{ConstraintInfo, ConstraintOp}; use turso_parser::ast::{self, SortOrder, TableInternalId}; use super::cost::ESTIMATED_HARDCODED_ROWS_PER_TABLE; /// Represents a single condition derived from a `WHERE` clause term /// that constrains a specific column of a table. /// /// Constraints are precomputed for each table involved in a query. They are used /// during query optimization to estimate the cost of different access paths (e.g., using an index) /// and to determine the optimal join order. A constraint can only be applied if all tables /// referenced in its expression (other than the constrained table itself) are already /// available in the current join context, i.e. on the left side in the join order /// relative to the table. #[derive(Debug, Clone)] /// pub struct Constraint { /// The position of the original `WHERE` clause term this constraint derives from, /// and which side of the [ast::Expr::Binary] comparison contains the expression /// that constrains the column. /// E.g. in SELECT * FROM t WHERE t.x = 10, the constraint is (0, BinaryExprSide::Rhs) /// because the RHS '10' is the constraining expression. /// /// This is tracked so we can: /// /// 1. Extract the constraining expression for use in an index seek key, and /// 2. Remove the relevant binary expression from the WHERE clause, if used as an index seek key. pub where_clause_pos: (usize, BinaryExprSide), /// The comparison operator (e.g., `=`, `>`, `<`) used in the constraint. pub operator: ast::Operator, /// The zero-based index of the constrained column within the table's schema. pub table_col_pos: usize, /// A bitmask representing the set of tables that appear on the *constraining* side /// of the comparison expression. For example, in SELECT * FROM t1,t2,t3 WHERE t1.x = t2.x + t3.x, /// the lhs_mask contains t2 and t3. Thus, this constraint can only be used if t2 and t3 /// have already been joined (i.e. are on the left side of the join order relative to t1). pub lhs_mask: TableMask, /// An estimated selectivity factor (0.0 to 1.0) indicating the fraction of rows /// expected to satisfy this constraint. Used for cost and cardinality estimation. pub selectivity: f64, /// Whether the constraint is usable for an index seek. /// This is explicitly set to false if the constraint has a different collation than the constrained column. pub usable: bool, } #[derive(Debug, Clone, Copy, PartialEq)] pub enum BinaryExprSide { Lhs, Rhs, } impl Constraint { /// Get the constraining expression and operator, e.g. ('>=', '2+3') from 't.x >= 2+3' pub fn get_constraining_expr(&self, where_clause: &[WhereTerm]) -> (ast::Operator, ast::Expr) { let (idx, side) = self.where_clause_pos; let where_term = &where_clause[idx]; let Ok(Some((lhs, _, rhs))) = as_binary_components(&where_term.expr) else { panic!("Expected a valid binary expression"); }; if side == BinaryExprSide::Lhs { (self.operator, lhs.clone()) } else { (self.operator, rhs.clone()) } } pub fn get_constraining_expr_ref<'a>(&self, where_clause: &'a [WhereTerm]) -> &'a ast::Expr { let (idx, side) = self.where_clause_pos; let where_term = &where_clause[idx]; let Ok(Some((lhs, _, rhs))) = as_binary_components(&where_term.expr) else { panic!("Expected a valid binary expression"); }; if side == BinaryExprSide::Lhs { lhs } else { rhs } } } #[derive(Debug, Clone)] /// A reference to a [Constraint] in a [TableConstraints]. /// /// This is used to track which constraints may be used as an index seek key. pub struct ConstraintRef { /// The position of the constraint in the [TableConstraints::constraints] vector. pub constraint_vec_pos: usize, /// The position of the constrained column in the index. Always 0 for rowid indices. pub index_col_pos: usize, /// The sort order of the constrained column in the index. Always ascending for rowid indices. pub sort_order: SortOrder, } /// A collection of [ConstraintRef]s for a given index, or if index is None, for the table's rowid index. /// For example, given a table `T (x,y,z)` with an index `T_I (y desc,z)`, take the following query: /// ```sql /// SELECT * FROM T WHERE y = 10 AND z = 20; /// ``` /// /// This will produce the following [ConstraintUseCandidate]: /// /// ConstraintUseCandidate { /// index: Some(T_I) /// refs: [ /// ConstraintRef { /// constraint_vec_pos: 0, // y = 10 /// index_col_pos: 0, // y /// sort_order: SortOrder::Desc, /// }, /// ConstraintRef { /// constraint_vec_pos: 1, // z = 20 /// index_col_pos: 1, // z /// sort_order: SortOrder::Asc, /// }, /// ], /// } /// #[derive(Debug)] pub struct ConstraintUseCandidate { /// The index that may be used to satisfy the constraints. If none, the table's rowid index is used. pub index: Option>, /// References to the constraints that may be used as an access path for the index. /// Refs are sorted by [ConstraintRef::index_col_pos] pub refs: Vec, } #[derive(Debug)] /// A collection of [Constraint]s and their potential [ConstraintUseCandidate]s for a given table. pub struct TableConstraints { /// The internal ID of the [TableReference] that these constraints are for. pub table_id: TableInternalId, /// The constraints for the table, i.e. any [WhereTerm]s that reference columns from this table. pub constraints: Vec, /// Candidates for indexes that may use the constraints to perform a lookup. pub candidates: Vec, } /// In lieu of statistics, we estimate that an equality filter will reduce the output set to 1% of its size. const SELECTIVITY_EQ: f64 = 0.01; /// In lieu of statistics, we estimate that a range filter will reduce the output set to 40% of its size. const SELECTIVITY_RANGE: f64 = 0.4; /// In lieu of statistics, we estimate that other filters will reduce the output set to 90% of its size. const SELECTIVITY_OTHER: f64 = 0.9; const SELECTIVITY_UNIQUE_EQUALITY: f64 = 1.0 / ESTIMATED_HARDCODED_ROWS_PER_TABLE as f64; /// Estimate the selectivity of a constraint based on the operator and the column type. fn estimate_selectivity(column: &Column, op: ast::Operator) -> f64 { match op { ast::Operator::Equals => { if column.is_rowid_alias || column.primary_key { SELECTIVITY_UNIQUE_EQUALITY } else { SELECTIVITY_EQ } } ast::Operator::Greater => SELECTIVITY_RANGE, ast::Operator::GreaterEquals => SELECTIVITY_RANGE, ast::Operator::Less => SELECTIVITY_RANGE, ast::Operator::LessEquals => SELECTIVITY_RANGE, _ => SELECTIVITY_OTHER, } } /// Precompute all potentially usable [Constraints] from a WHERE clause. /// The resulting list of [TableConstraints] is then used to evaluate the best access methods for various join orders. /// /// This method do not perform much filtering of constraints and delegate this tasks to the consumers of the method /// Consumers must inspect [TableConstraints] and its candidates and pick best constraints for optimized access pub fn constraints_from_where_clause( where_clause: &[WhereTerm], table_references: &TableReferences, available_indexes: &HashMap>>, ) -> Result> { let mut constraints = Vec::new(); // For each table, collect all the Constraints and all potential index candidates that may use them. for table_reference in table_references.joined_tables() { let rowid_alias_column = table_reference .columns() .iter() .position(|c| c.is_rowid_alias); let mut cs = TableConstraints { table_id: table_reference.internal_id, constraints: Vec::new(), candidates: available_indexes .get(table_reference.table.get_name()) .map_or(Vec::new(), |indexes| { indexes .iter() .map(|index| ConstraintUseCandidate { index: Some(index.clone()), refs: Vec::new(), }) .collect() }), }; // Add a candidate for the rowid index, which is always available when the table has a rowid alias. cs.candidates.push(ConstraintUseCandidate { index: None, refs: Vec::new(), }); for (i, term) in where_clause.iter().enumerate() { let Some((lhs, operator, rhs)) = as_binary_components(&term.expr)? else { continue; }; // Constraints originating from a LEFT JOIN must always be evaluated in that join's RHS table's loop, // regardless of which tables the constraint references. if let Some(outer_join_tbl) = term.from_outer_join { if outer_join_tbl != table_reference.internal_id { continue; } } // If either the LHS or RHS of the constraint is a column from the table, add the constraint. match lhs { ast::Expr::Column { table, column, .. } => { if *table == table_reference.internal_id { let table_column = &table_reference.table.columns()[*column]; cs.constraints.push(Constraint { where_clause_pos: (i, BinaryExprSide::Rhs), operator, table_col_pos: *column, lhs_mask: table_mask_from_expr(rhs, table_references)?, selectivity: estimate_selectivity(table_column, operator), usable: true, }); } } ast::Expr::RowId { table, .. } => { // A rowid alias column must exist for the 'rowid' keyword to be considered a valid reference. // This should be a parse error at an earlier stage of the query compilation, but nevertheless, // we check it here. if *table == table_reference.internal_id && rowid_alias_column.is_some() { let table_column = &table_reference.table.columns()[rowid_alias_column.unwrap()]; cs.constraints.push(Constraint { where_clause_pos: (i, BinaryExprSide::Rhs), operator, table_col_pos: rowid_alias_column.unwrap(), lhs_mask: table_mask_from_expr(rhs, table_references)?, selectivity: estimate_selectivity(table_column, operator), usable: true, }); } } _ => {} }; match rhs { ast::Expr::Column { table, column, .. } => { if *table == table_reference.internal_id { let table_column = &table_reference.table.columns()[*column]; cs.constraints.push(Constraint { where_clause_pos: (i, BinaryExprSide::Lhs), operator: opposite_cmp_op(operator), table_col_pos: *column, lhs_mask: table_mask_from_expr(lhs, table_references)?, selectivity: estimate_selectivity(table_column, operator), usable: true, }); } } ast::Expr::RowId { table, .. } => { if *table == table_reference.internal_id && rowid_alias_column.is_some() { let table_column = &table_reference.table.columns()[rowid_alias_column.unwrap()]; cs.constraints.push(Constraint { where_clause_pos: (i, BinaryExprSide::Lhs), operator: opposite_cmp_op(operator), table_col_pos: rowid_alias_column.unwrap(), lhs_mask: table_mask_from_expr(lhs, table_references)?, selectivity: estimate_selectivity(table_column, operator), usable: true, }); } } _ => {} }; } // sort equalities first so that index keys will be properly constructed. // see e.g.: https://www.solarwinds.com/blog/the-left-prefix-index-rule cs.constraints.sort_by(|a, b| { if a.operator == ast::Operator::Equals { Ordering::Less } else if b.operator == ast::Operator::Equals { Ordering::Greater } else { Ordering::Equal } }); // For each constraint we found, add a reference to it for each index that may be able to use it. for (i, constraint) in cs.constraints.iter_mut().enumerate() { let constrained_column = &table_reference.table.columns()[constraint.table_col_pos]; let column_collation = constrained_column.collation.unwrap_or_default(); let constraining_expr = constraint.get_constraining_expr_ref(where_clause); // Index seek keys must use the same collation as the constrained column. match get_collseq_from_expr(constraining_expr, table_references)? { Some(collation) if collation != column_collation => { constraint.usable = false; continue; } _ => {} } if rowid_alias_column == Some(constraint.table_col_pos) { let rowid_candidate = cs .candidates .iter_mut() .find_map(|candidate| { if candidate.index.is_none() { Some(candidate) } else { None } }) .unwrap(); rowid_candidate.refs.push(ConstraintRef { constraint_vec_pos: i, index_col_pos: 0, sort_order: SortOrder::Asc, }); } for index in available_indexes .get(table_reference.table.get_name()) .unwrap_or(&VecDeque::new()) { if let Some(position_in_index) = index.column_table_pos_to_index_pos(constraint.table_col_pos) { if let Some(index_candidate) = cs.candidates.iter_mut().find_map(|candidate| { if candidate.index.as_ref().is_some_and(|i| { Arc::ptr_eq(index, i) && can_use_partial_index(index, where_clause) }) { Some(candidate) } else { None } }) { index_candidate.refs.push(ConstraintRef { constraint_vec_pos: i, index_col_pos: position_in_index, sort_order: index.columns[position_in_index].order, }); } } } } for candidate in cs.candidates.iter_mut() { // Sort by index_col_pos, ascending -- index columns must be consumed in contiguous order. candidate.refs.sort_by_key(|cref| cref.index_col_pos); } cs.candidates.retain(|c| { if let Some(idx) = &c.index { if idx.where_clause.is_some() && c.refs.is_empty() { // prevent a partial index from even being considered as a scan driver. return false; } } true }); constraints.push(cs); } Ok(constraints) } #[derive(Clone, Debug)] /// A reference to a [Constraint]s in a [TableConstraints] for single column. /// /// This is specialized version of [ConstraintRef] which specifically holds range-like constraints: /// - x = 10 (eq is set) /// - x >= 10, x > 10 (lower_bound is set) /// - x <= 10, x < 10 (upper_bound is set) /// - x > 10 AND x < 20 (both lower_bound and upper_bound are set) /// /// eq, lower_bound and upper_bound holds None or position of the constraint in the [Constraint] array pub struct RangeConstraintRef { /// position of the column in the table definition pub table_col_pos: usize, /// position of the column in the index definition pub index_col_pos: usize, /// sort order for the column in the index definition pub sort_order: SortOrder, /// equality constraint pub eq: Option, /// lower bound constraint (either > or >=) pub lower_bound: Option, /// upper bound constraint (either < or <=) pub upper_bound: Option, } #[derive(Debug, Clone)] /// Represent seek range which can be used in query planning to emit range scan over table or index pub struct SeekRangeConstraint { pub sort_order: SortOrder, pub eq: Option<(ast::Operator, ast::Expr)>, pub lower_bound: Option<(ast::Operator, ast::Expr)>, pub upper_bound: Option<(ast::Operator, ast::Expr)>, } impl SeekRangeConstraint { pub fn new_eq(sort_order: SortOrder, eq: (ast::Operator, ast::Expr)) -> Self { Self { sort_order, eq: Some(eq), lower_bound: None, upper_bound: None, } } pub fn new_range( sort_order: SortOrder, lower_bound: Option<(ast::Operator, ast::Expr)>, upper_bound: Option<(ast::Operator, ast::Expr)>, ) -> Self { assert!(lower_bound.is_some() || upper_bound.is_some()); Self { sort_order, eq: None, lower_bound, upper_bound, } } } impl RangeConstraintRef { /// Convert the [RangeConstraintRef] to a [SeekRangeConstraint] usable in a [crate::translate::plan::SeekDef::key]. pub fn as_seek_range_constraint( &self, constraints: &[Constraint], where_clause: &[WhereTerm], ) -> SeekRangeConstraint { if let Some(eq) = self.eq { return SeekRangeConstraint::new_eq( self.sort_order, constraints[eq].get_constraining_expr(where_clause), ); } SeekRangeConstraint::new_range( self.sort_order, self.lower_bound .map(|x| constraints[x].get_constraining_expr(where_clause)), self.upper_bound .map(|x| constraints[x].get_constraining_expr(where_clause)), ) } } /// Find which [Constraint]s are usable for a given join order. /// Returns a slice of the references to the constraints that are usable. /// A constraint is considered usable for a given table if all of the other tables referenced by the constraint /// are on the left side in the join order relative to the table. pub fn usable_constraints_for_join_order<'a>( constraints: &'a [Constraint], refs: &'a [ConstraintRef], join_order: &[JoinOrderMember], ) -> Vec { debug_assert!(refs.is_sorted_by_key(|x| x.index_col_pos)); let table_idx = join_order.last().unwrap().original_idx; let lhs_mask = TableMask::from_table_number_iter( join_order .iter() .take(join_order.len() - 1) .map(|j| j.original_idx), ); let mut usable: Vec = Vec::new(); let mut current_required_column_pos = 0; for cref in refs.iter() { let constraint = &constraints[cref.constraint_vec_pos]; let other_side_refers_to_self = constraint.lhs_mask.contains_table(table_idx); if other_side_refers_to_self { break; } let all_required_tables_are_on_left_side = lhs_mask.contains_all(&constraint.lhs_mask); if !all_required_tables_are_on_left_side { break; } if Some(cref.index_col_pos) == usable.last().map(|x| x.index_col_pos) { // Two constraints on the same index column can be combined into a single range constraint. assert_eq!(cref.sort_order, usable.last().unwrap().sort_order); assert_eq!(cref.index_col_pos, usable.last().unwrap().index_col_pos); assert_eq!( constraints[cref.constraint_vec_pos].table_col_pos, usable.last().unwrap().table_col_pos ); // if we already have eq constraint - we must not add anything to it // otherwise, we can incorrectly consume filters which will not be used in the access path if usable.last().unwrap().eq.is_some() { continue; } match constraints[cref.constraint_vec_pos].operator { ast::Operator::Greater | ast::Operator::GreaterEquals => { usable.last_mut().unwrap().lower_bound = Some(cref.constraint_vec_pos); } ast::Operator::Less | ast::Operator::LessEquals => { usable.last_mut().unwrap().upper_bound = Some(cref.constraint_vec_pos); } _ => {} } continue; } if cref.index_col_pos != current_required_column_pos { // Index columns must be consumed contiguously in the order they appear in the index. break; } if usable.last().is_some_and(|x| x.eq.is_none()) { // Usable index key must have 0-n equalities and then a maximum of 1 range constraint with one or both bounds set. // If we already have a range constraint before this one, we must not add anything to it break; } let operator = constraints[cref.constraint_vec_pos].operator; let table_col_pos = constraints[cref.constraint_vec_pos].table_col_pos; if operator == ast::Operator::Equals && usable .last() .is_some_and(|x| x.table_col_pos == table_col_pos) { // If we already have an equality constraint for this column, we can't use it again continue; } let constraint_group = match operator { ast::Operator::Equals => RangeConstraintRef { table_col_pos, index_col_pos: cref.index_col_pos, sort_order: cref.sort_order, eq: Some(cref.constraint_vec_pos), lower_bound: None, upper_bound: None, }, ast::Operator::Greater | ast::Operator::GreaterEquals => RangeConstraintRef { table_col_pos, index_col_pos: cref.index_col_pos, sort_order: cref.sort_order, eq: None, lower_bound: Some(cref.constraint_vec_pos), upper_bound: None, }, ast::Operator::Less | ast::Operator::LessEquals => RangeConstraintRef { table_col_pos, index_col_pos: cref.index_col_pos, sort_order: cref.sort_order, eq: None, lower_bound: None, upper_bound: Some(cref.constraint_vec_pos), }, _ => continue, }; usable.push(constraint_group); current_required_column_pos += 1; } usable } fn can_use_partial_index(index: &Index, query_where_clause: &[WhereTerm]) -> bool { let Some(index_where) = &index.where_clause else { // Full index, always usable return true; }; // Check if query WHERE contains the exact same predicate for term in query_where_clause { if exprs_are_equivalent(&term.expr, index_where.as_ref()) { return true; } } // TODO: do better to determine if we should use partial index false } pub fn convert_to_vtab_constraint( constraints: &[Constraint], join_order: &[JoinOrderMember], ) -> Vec { let table_idx = join_order.last().unwrap().original_idx; let lhs_mask = TableMask::from_table_number_iter( join_order .iter() .take(join_order.len() - 1) .map(|j| j.original_idx), ); constraints .iter() .enumerate() .filter_map(|(i, constraint)| { let other_side_refers_to_self = constraint.lhs_mask.contains_table(table_idx); if other_side_refers_to_self { return None; } let all_required_tables_are_on_left_side = lhs_mask.contains_all(&constraint.lhs_mask); to_ext_constraint_op(&constraint.operator).map(|op| ConstraintInfo { column_index: constraint.table_col_pos as u32, op, usable: all_required_tables_are_on_left_side, index: i, }) }) .collect() } fn to_ext_constraint_op(op: &ast::Operator) -> Option { match op { ast::Operator::Equals => Some(ConstraintOp::Eq), ast::Operator::Less => Some(ConstraintOp::Lt), ast::Operator::LessEquals => Some(ConstraintOp::Le), ast::Operator::Greater => Some(ConstraintOp::Gt), ast::Operator::GreaterEquals => Some(ConstraintOp::Ge), ast::Operator::NotEquals => Some(ConstraintOp::Ne), _ => None, } } fn opposite_cmp_op(op: ast::Operator) -> ast::Operator { match op { ast::Operator::Equals => ast::Operator::Equals, ast::Operator::Greater => ast::Operator::Less, ast::Operator::GreaterEquals => ast::Operator::LessEquals, ast::Operator::Less => ast::Operator::Greater, ast::Operator::LessEquals => ast::Operator::GreaterEquals, _ => panic!("unexpected operator: {op:?}"), } }