use std::{cmp::Ordering, collections::HashMap, sync::Arc}; use crate::{ schema::{Column, Index}, translate::{ expr::as_binary_components, plan::{JoinOrderMember, TableReferences, WhereTerm}, planner::{table_mask_from_expr, TableMask}, }, Result, }; use turso_ext::{ConstraintInfo, ConstraintOp}; use turso_sqlite3_parser::ast::{self, SortOrder, TableInternalId}; use super::cost::ESTIMATED_HARDCODED_ROWS_PER_TABLE; /// Represents a single condition derived from a `WHERE` clause term /// that constrains a specific column of a table. /// /// Constraints are precomputed for each table involved in a query. They are used /// during query optimization to estimate the cost of different access paths (e.g., using an index) /// and to determine the optimal join order. A constraint can only be applied if all tables /// referenced in its expression (other than the constrained table itself) are already /// available in the current join context, i.e. on the left side in the join order /// relative to the table. #[derive(Debug, Clone)] /// pub struct Constraint { /// The position of the original `WHERE` clause term this constraint derives from, /// and which side of the [ast::Expr::Binary] comparison contains the expression /// that constrains the column. /// E.g. in SELECT * FROM t WHERE t.x = 10, the constraint is (0, BinaryExprSide::Rhs) /// because the RHS '10' is the constraining expression. /// /// This is tracked so we can: /// /// 1. Extract the constraining expression for use in an index seek key, and /// 2. Remove the relevant binary expression from the WHERE clause, if used as an index seek key. pub where_clause_pos: (usize, BinaryExprSide), /// The comparison operator (e.g., `=`, `>`, `<`) used in the constraint. pub operator: ast::Operator, /// The zero-based index of the constrained column within the table's schema. pub table_col_pos: usize, /// A bitmask representing the set of tables that appear on the *constraining* side /// of the comparison expression. For example, in SELECT * FROM t1,t2,t3 WHERE t1.x = t2.x + t3.x, /// the lhs_mask contains t2 and t3. Thus, this constraint can only be used if t2 and t3 /// have already been joined (i.e. are on the left side of the join order relative to t1). pub lhs_mask: TableMask, /// An estimated selectivity factor (0.0 to 1.0) indicating the fraction of rows /// expected to satisfy this constraint. Used for cost and cardinality estimation. pub selectivity: f64, } #[derive(Debug, Clone, Copy, PartialEq)] pub enum BinaryExprSide { Lhs, Rhs, } impl Constraint { /// Get the constraining expression, e.g. '2+3' from 't.x = 2+3' pub fn get_constraining_expr(&self, where_clause: &[WhereTerm]) -> ast::Expr { let (idx, side) = self.where_clause_pos; let where_term = &where_clause[idx]; let Ok(Some((lhs, _, rhs))) = as_binary_components(&where_term.expr) else { panic!("Expected a valid binary expression"); }; if side == BinaryExprSide::Lhs { lhs.clone() } else { rhs.clone() } } } #[derive(Debug, Clone)] /// A reference to a [Constraint] in a [TableConstraints]. /// /// This is used to track which constraints may be used as an index seek key. pub struct ConstraintRef { /// The position of the constraint in the [TableConstraints::constraints] vector. pub constraint_vec_pos: usize, /// The position of the constrained column in the index. Always 0 for rowid indices. pub index_col_pos: usize, /// The sort order of the constrained column in the index. Always ascending for rowid indices. pub sort_order: SortOrder, } impl ConstraintRef { /// Convert the constraint to a column usable in a [crate::translate::plan::SeekDef::key]. pub fn as_seek_key_column( &self, constraints: &[Constraint], where_clause: &[WhereTerm], ) -> (ast::Expr, SortOrder) { let constraint = &constraints[self.constraint_vec_pos]; let constraining_expr = constraint.get_constraining_expr(where_clause); (constraining_expr, self.sort_order) } } /// A collection of [ConstraintRef]s for a given index, or if index is None, for the table's rowid index. /// For example, given a table `T (x,y,z)` with an index `T_I (y desc,z)`, take the following query: /// ```sql /// SELECT * FROM T WHERE y = 10 AND z = 20; /// ``` /// /// This will produce the following [ConstraintUseCandidate]: /// /// ConstraintUseCandidate { /// index: Some(T_I) /// refs: [ /// ConstraintRef { /// constraint_vec_pos: 0, // y = 10 /// index_col_pos: 0, // y /// sort_order: SortOrder::Desc, /// }, /// ConstraintRef { /// constraint_vec_pos: 1, // z = 20 /// index_col_pos: 1, // z /// sort_order: SortOrder::Asc, /// }, /// ], /// } /// #[derive(Debug)] pub struct ConstraintUseCandidate { /// The index that may be used to satisfy the constraints. If none, the table's rowid index is used. pub index: Option>, /// References to the constraints that may be used as an access path for the index. pub refs: Vec, } #[derive(Debug)] /// A collection of [Constraint]s and their potential [ConstraintUseCandidate]s for a given table. pub struct TableConstraints { /// The internal ID of the [TableReference] that these constraints are for. pub table_id: TableInternalId, /// The constraints for the table, i.e. any [WhereTerm]s that reference columns from this table. pub constraints: Vec, /// Candidates for indexes that may use the constraints to perform a lookup. pub candidates: Vec, } /// In lieu of statistics, we estimate that an equality filter will reduce the output set to 1% of its size. const SELECTIVITY_EQ: f64 = 0.01; /// In lieu of statistics, we estimate that a range filter will reduce the output set to 40% of its size. const SELECTIVITY_RANGE: f64 = 0.4; /// In lieu of statistics, we estimate that other filters will reduce the output set to 90% of its size. const SELECTIVITY_OTHER: f64 = 0.9; const SELECTIVITY_UNIQUE_EQUALITY: f64 = 1.0 / ESTIMATED_HARDCODED_ROWS_PER_TABLE as f64; /// Estimate the selectivity of a constraint based on the operator and the column type. fn estimate_selectivity(column: &Column, op: ast::Operator) -> f64 { match op { ast::Operator::Equals => { if column.is_rowid_alias || column.primary_key { SELECTIVITY_UNIQUE_EQUALITY } else { SELECTIVITY_EQ } } ast::Operator::Greater => SELECTIVITY_RANGE, ast::Operator::GreaterEquals => SELECTIVITY_RANGE, ast::Operator::Less => SELECTIVITY_RANGE, ast::Operator::LessEquals => SELECTIVITY_RANGE, _ => SELECTIVITY_OTHER, } } /// Precompute all potentially usable [Constraints] from a WHERE clause. /// The resulting list of [TableConstraints] is then used to evaluate the best access methods for various join orders. pub fn constraints_from_where_clause( where_clause: &[WhereTerm], table_references: &TableReferences, available_indexes: &HashMap>>, ) -> Result> { let mut constraints = Vec::new(); // For each table, collect all the Constraints and all potential index candidates that may use them. for table_reference in table_references.joined_tables() { let rowid_alias_column = table_reference .columns() .iter() .position(|c| c.is_rowid_alias); let mut cs = TableConstraints { table_id: table_reference.internal_id, constraints: Vec::new(), candidates: available_indexes .get(table_reference.table.get_name()) .map_or(Vec::new(), |indexes| { indexes .iter() .map(|index| ConstraintUseCandidate { index: Some(index.clone()), refs: Vec::new(), }) .collect() }), }; // Add a candidate for the rowid index, which is always available when the table has a rowid alias. cs.candidates.push(ConstraintUseCandidate { index: None, refs: Vec::new(), }); for (i, term) in where_clause.iter().enumerate() { let Some((lhs, operator, rhs)) = as_binary_components(&term.expr)? else { continue; }; // Constraints originating from a LEFT JOIN must always be evaluated in that join's RHS table's loop, // regardless of which tables the constraint references. if let Some(outer_join_tbl) = term.from_outer_join { if outer_join_tbl != table_reference.internal_id { continue; } } // If either the LHS or RHS of the constraint is a column from the table, add the constraint. match lhs { ast::Expr::Column { table, column, .. } => { if *table == table_reference.internal_id { let table_column = &table_reference.table.columns()[*column]; cs.constraints.push(Constraint { where_clause_pos: (i, BinaryExprSide::Rhs), operator, table_col_pos: *column, lhs_mask: table_mask_from_expr(rhs, table_references)?, selectivity: estimate_selectivity(table_column, operator), }); } } ast::Expr::RowId { table, .. } => { // A rowid alias column must exist for the 'rowid' keyword to be considered a valid reference. // This should be a parse error at an earlier stage of the query compilation, but nevertheless, // we check it here. if *table == table_reference.internal_id && rowid_alias_column.is_some() { let table_column = &table_reference.table.columns()[rowid_alias_column.unwrap()]; cs.constraints.push(Constraint { where_clause_pos: (i, BinaryExprSide::Rhs), operator, table_col_pos: rowid_alias_column.unwrap(), lhs_mask: table_mask_from_expr(rhs, table_references)?, selectivity: estimate_selectivity(table_column, operator), }); } } _ => {} }; match rhs { ast::Expr::Column { table, column, .. } => { if *table == table_reference.internal_id { let table_column = &table_reference.table.columns()[*column]; cs.constraints.push(Constraint { where_clause_pos: (i, BinaryExprSide::Lhs), operator: opposite_cmp_op(operator), table_col_pos: *column, lhs_mask: table_mask_from_expr(lhs, table_references)?, selectivity: estimate_selectivity(table_column, operator), }); } } ast::Expr::RowId { table, .. } => { if *table == table_reference.internal_id && rowid_alias_column.is_some() { let table_column = &table_reference.table.columns()[rowid_alias_column.unwrap()]; cs.constraints.push(Constraint { where_clause_pos: (i, BinaryExprSide::Lhs), operator: opposite_cmp_op(operator), table_col_pos: rowid_alias_column.unwrap(), lhs_mask: table_mask_from_expr(lhs, table_references)?, selectivity: estimate_selectivity(table_column, operator), }); } } _ => {} }; } // sort equalities first so that index keys will be properly constructed. // see e.g.: https://www.solarwinds.com/blog/the-left-prefix-index-rule cs.constraints.sort_by(|a, b| { if a.operator == ast::Operator::Equals { Ordering::Less } else if b.operator == ast::Operator::Equals { Ordering::Greater } else { Ordering::Equal } }); // For each constraint we found, add a reference to it for each index that may be able to use it. for (i, constraint) in cs.constraints.iter().enumerate() { if rowid_alias_column == Some(constraint.table_col_pos) { let rowid_candidate = cs .candidates .iter_mut() .find_map(|candidate| { if candidate.index.is_none() { Some(candidate) } else { None } }) .unwrap(); rowid_candidate.refs.push(ConstraintRef { constraint_vec_pos: i, index_col_pos: 0, sort_order: SortOrder::Asc, }); } for index in available_indexes .get(table_reference.table.get_name()) .unwrap_or(&Vec::new()) { if let Some(position_in_index) = index.column_table_pos_to_index_pos(constraint.table_col_pos) { let index_candidate = cs .candidates .iter_mut() .find_map(|candidate| { if candidate .index .as_ref() .is_some_and(|i| Arc::ptr_eq(index, i)) { Some(candidate) } else { None } }) .unwrap(); index_candidate.refs.push(ConstraintRef { constraint_vec_pos: i, index_col_pos: position_in_index, sort_order: index.columns[position_in_index].order, }); } } } for candidate in cs.candidates.iter_mut() { // Sort by index_col_pos, ascending -- index columns must be consumed in contiguous order. candidate.refs.sort_by_key(|cref| cref.index_col_pos); // Deduplicate by position, keeping first occurrence (which will be equality if one exists, since the constraints vec is sorted that way) candidate.refs.dedup_by_key(|cref| cref.index_col_pos); // Truncate at first gap in positions -- again, index columns must be consumed in contiguous order. let contiguous_len = candidate .refs .iter() .enumerate() .take_while(|(i, cref)| cref.index_col_pos == *i) .count(); candidate.refs.truncate(contiguous_len); // Truncate after the first inequality, since the left-prefix rule of indexes requires that all constraints but the last one must be equalities; // again see: https://www.solarwinds.com/blog/the-left-prefix-index-rule if let Some(first_inequality) = candidate.refs.iter().position(|cref| { cs.constraints[cref.constraint_vec_pos].operator != ast::Operator::Equals }) { candidate.refs.truncate(first_inequality + 1); } } constraints.push(cs); } Ok(constraints) } /// Find which [Constraint]s are usable for a given join order. /// Returns a slice of the references to the constraints that are usable. /// A constraint is considered usable for a given table if all of the other tables referenced by the constraint /// are on the left side in the join order relative to the table. pub fn usable_constraints_for_join_order<'a>( constraints: &'a [Constraint], refs: &'a [ConstraintRef], join_order: &[JoinOrderMember], ) -> &'a [ConstraintRef] { let table_idx = join_order.last().unwrap().original_idx; let mut usable_until = 0; for cref in refs.iter() { let constraint = &constraints[cref.constraint_vec_pos]; let other_side_refers_to_self = constraint.lhs_mask.contains_table(table_idx); if other_side_refers_to_self { break; } let lhs_mask = TableMask::from_table_number_iter( join_order .iter() .take(join_order.len() - 1) .map(|j| j.original_idx), ); let all_required_tables_are_on_left_side = lhs_mask.contains_all(&constraint.lhs_mask); if !all_required_tables_are_on_left_side { break; } usable_until += 1; } &refs[..usable_until] } pub fn convert_to_vtab_constraint( constraints: &[Constraint], join_order: &[JoinOrderMember], ) -> Vec { let table_idx = join_order.last().unwrap().original_idx; let lhs_mask = TableMask::from_table_number_iter( join_order .iter() .take(join_order.len() - 1) .map(|j| j.original_idx), ); constraints .iter() .enumerate() .filter_map(|(i, constraint)| { let other_side_refers_to_self = constraint.lhs_mask.contains_table(table_idx); if other_side_refers_to_self { return None; } let all_required_tables_are_on_left_side = lhs_mask.contains_all(&constraint.lhs_mask); to_ext_constraint_op(&constraint.operator).map(|op| ConstraintInfo { column_index: constraint.table_col_pos as u32, op, usable: all_required_tables_are_on_left_side, index: i, }) }) .collect() } fn to_ext_constraint_op(op: &ast::Operator) -> Option { match op { ast::Operator::Equals => Some(ConstraintOp::Eq), ast::Operator::Less => Some(ConstraintOp::Lt), ast::Operator::LessEquals => Some(ConstraintOp::Le), ast::Operator::Greater => Some(ConstraintOp::Gt), ast::Operator::GreaterEquals => Some(ConstraintOp::Ge), ast::Operator::NotEquals => Some(ConstraintOp::Ne), _ => None, } } fn opposite_cmp_op(op: ast::Operator) -> ast::Operator { match op { ast::Operator::Equals => ast::Operator::Equals, ast::Operator::Greater => ast::Operator::Less, ast::Operator::GreaterEquals => ast::Operator::LessEquals, ast::Operator::Less => ast::Operator::Greater, ast::Operator::LessEquals => ast::Operator::GreaterEquals, _ => panic!("unexpected operator: {op:?}"), } }