mirror of
https://github.com/aljazceru/turso.git
synced 2025-12-17 08:34:19 +01:00
The side of the binary expression no longer needs to be stored in `ConstraintInfo`, since the optimizer now guarantees that it is always on the right. As a result, only the index of the corresponding constraint needs to be preserved.
458 lines
19 KiB
Rust
458 lines
19 KiB
Rust
use std::{cmp::Ordering, collections::HashMap, sync::Arc};
|
|
|
|
use crate::{
|
|
schema::{Column, Index},
|
|
translate::{
|
|
expr::as_binary_components,
|
|
plan::{JoinOrderMember, TableReferences, WhereTerm},
|
|
planner::{table_mask_from_expr, TableMask},
|
|
},
|
|
Result,
|
|
};
|
|
use turso_ext::{ConstraintInfo, ConstraintOp};
|
|
use turso_sqlite3_parser::ast::{self, SortOrder, TableInternalId};
|
|
|
|
use super::cost::ESTIMATED_HARDCODED_ROWS_PER_TABLE;
|
|
|
|
/// Represents a single condition derived from a `WHERE` clause term
|
|
/// that constrains a specific column of a table.
|
|
///
|
|
/// Constraints are precomputed for each table involved in a query. They are used
|
|
/// during query optimization to estimate the cost of different access paths (e.g., using an index)
|
|
/// and to determine the optimal join order. A constraint can only be applied if all tables
|
|
/// referenced in its expression (other than the constrained table itself) are already
|
|
/// available in the current join context, i.e. on the left side in the join order
|
|
/// relative to the table.
|
|
#[derive(Debug, Clone)]
|
|
///
|
|
pub struct Constraint {
|
|
/// The position of the original `WHERE` clause term this constraint derives from,
|
|
/// and which side of the [ast::Expr::Binary] comparison contains the expression
|
|
/// that constrains the column.
|
|
/// E.g. in SELECT * FROM t WHERE t.x = 10, the constraint is (0, BinaryExprSide::Rhs)
|
|
/// because the RHS '10' is the constraining expression.
|
|
///
|
|
/// This is tracked so we can:
|
|
///
|
|
/// 1. Extract the constraining expression for use in an index seek key, and
|
|
/// 2. Remove the relevant binary expression from the WHERE clause, if used as an index seek key.
|
|
pub where_clause_pos: (usize, BinaryExprSide),
|
|
/// The comparison operator (e.g., `=`, `>`, `<`) used in the constraint.
|
|
pub operator: ast::Operator,
|
|
/// The zero-based index of the constrained column within the table's schema.
|
|
pub table_col_pos: usize,
|
|
/// A bitmask representing the set of tables that appear on the *constraining* side
|
|
/// of the comparison expression. For example, in SELECT * FROM t1,t2,t3 WHERE t1.x = t2.x + t3.x,
|
|
/// the lhs_mask contains t2 and t3. Thus, this constraint can only be used if t2 and t3
|
|
/// have already been joined (i.e. are on the left side of the join order relative to t1).
|
|
pub lhs_mask: TableMask,
|
|
/// An estimated selectivity factor (0.0 to 1.0) indicating the fraction of rows
|
|
/// expected to satisfy this constraint. Used for cost and cardinality estimation.
|
|
pub selectivity: f64,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq)]
|
|
pub enum BinaryExprSide {
|
|
Lhs,
|
|
Rhs,
|
|
}
|
|
|
|
impl Constraint {
|
|
/// Get the constraining expression, e.g. '2+3' from 't.x = 2+3'
|
|
pub fn get_constraining_expr(&self, where_clause: &[WhereTerm]) -> ast::Expr {
|
|
let (idx, side) = self.where_clause_pos;
|
|
let where_term = &where_clause[idx];
|
|
let Ok(Some((lhs, _, rhs))) = as_binary_components(&where_term.expr) else {
|
|
panic!("Expected a valid binary expression");
|
|
};
|
|
if side == BinaryExprSide::Lhs {
|
|
lhs.clone()
|
|
} else {
|
|
rhs.clone()
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
/// A reference to a [Constraint] in a [TableConstraints].
|
|
///
|
|
/// This is used to track which constraints may be used as an index seek key.
|
|
pub struct ConstraintRef {
|
|
/// The position of the constraint in the [TableConstraints::constraints] vector.
|
|
pub constraint_vec_pos: usize,
|
|
/// The position of the constrained column in the index. Always 0 for rowid indices.
|
|
pub index_col_pos: usize,
|
|
/// The sort order of the constrained column in the index. Always ascending for rowid indices.
|
|
pub sort_order: SortOrder,
|
|
}
|
|
|
|
impl ConstraintRef {
|
|
/// Convert the constraint to a column usable in a [crate::translate::plan::SeekDef::key].
|
|
pub fn as_seek_key_column(
|
|
&self,
|
|
constraints: &[Constraint],
|
|
where_clause: &[WhereTerm],
|
|
) -> (ast::Expr, SortOrder) {
|
|
let constraint = &constraints[self.constraint_vec_pos];
|
|
let constraining_expr = constraint.get_constraining_expr(where_clause);
|
|
(constraining_expr, self.sort_order)
|
|
}
|
|
}
|
|
|
|
/// A collection of [ConstraintRef]s for a given index, or if index is None, for the table's rowid index.
|
|
/// For example, given a table `T (x,y,z)` with an index `T_I (y desc,z)`, take the following query:
|
|
/// ```sql
|
|
/// SELECT * FROM T WHERE y = 10 AND z = 20;
|
|
/// ```
|
|
///
|
|
/// This will produce the following [ConstraintUseCandidate]:
|
|
///
|
|
/// ConstraintUseCandidate {
|
|
/// index: Some(T_I)
|
|
/// refs: [
|
|
/// ConstraintRef {
|
|
/// constraint_vec_pos: 0, // y = 10
|
|
/// index_col_pos: 0, // y
|
|
/// sort_order: SortOrder::Desc,
|
|
/// },
|
|
/// ConstraintRef {
|
|
/// constraint_vec_pos: 1, // z = 20
|
|
/// index_col_pos: 1, // z
|
|
/// sort_order: SortOrder::Asc,
|
|
/// },
|
|
/// ],
|
|
/// }
|
|
///
|
|
#[derive(Debug)]
|
|
pub struct ConstraintUseCandidate {
|
|
/// The index that may be used to satisfy the constraints. If none, the table's rowid index is used.
|
|
pub index: Option<Arc<Index>>,
|
|
/// References to the constraints that may be used as an access path for the index.
|
|
pub refs: Vec<ConstraintRef>,
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
/// A collection of [Constraint]s and their potential [ConstraintUseCandidate]s for a given table.
|
|
pub struct TableConstraints {
|
|
/// The internal ID of the [TableReference] that these constraints are for.
|
|
pub table_id: TableInternalId,
|
|
/// The constraints for the table, i.e. any [WhereTerm]s that reference columns from this table.
|
|
pub constraints: Vec<Constraint>,
|
|
/// Candidates for indexes that may use the constraints to perform a lookup.
|
|
pub candidates: Vec<ConstraintUseCandidate>,
|
|
}
|
|
|
|
/// In lieu of statistics, we estimate that an equality filter will reduce the output set to 1% of its size.
|
|
const SELECTIVITY_EQ: f64 = 0.01;
|
|
/// In lieu of statistics, we estimate that a range filter will reduce the output set to 40% of its size.
|
|
const SELECTIVITY_RANGE: f64 = 0.4;
|
|
/// In lieu of statistics, we estimate that other filters will reduce the output set to 90% of its size.
|
|
const SELECTIVITY_OTHER: f64 = 0.9;
|
|
|
|
const SELECTIVITY_UNIQUE_EQUALITY: f64 = 1.0 / ESTIMATED_HARDCODED_ROWS_PER_TABLE as f64;
|
|
|
|
/// Estimate the selectivity of a constraint based on the operator and the column type.
|
|
fn estimate_selectivity(column: &Column, op: ast::Operator) -> f64 {
|
|
match op {
|
|
ast::Operator::Equals => {
|
|
if column.is_rowid_alias || column.primary_key {
|
|
SELECTIVITY_UNIQUE_EQUALITY
|
|
} else {
|
|
SELECTIVITY_EQ
|
|
}
|
|
}
|
|
ast::Operator::Greater => SELECTIVITY_RANGE,
|
|
ast::Operator::GreaterEquals => SELECTIVITY_RANGE,
|
|
ast::Operator::Less => SELECTIVITY_RANGE,
|
|
ast::Operator::LessEquals => SELECTIVITY_RANGE,
|
|
_ => SELECTIVITY_OTHER,
|
|
}
|
|
}
|
|
|
|
/// Precompute all potentially usable [Constraints] from a WHERE clause.
|
|
/// The resulting list of [TableConstraints] is then used to evaluate the best access methods for various join orders.
|
|
pub fn constraints_from_where_clause(
|
|
where_clause: &[WhereTerm],
|
|
table_references: &TableReferences,
|
|
available_indexes: &HashMap<String, Vec<Arc<Index>>>,
|
|
) -> Result<Vec<TableConstraints>> {
|
|
let mut constraints = Vec::new();
|
|
|
|
// For each table, collect all the Constraints and all potential index candidates that may use them.
|
|
for table_reference in table_references.joined_tables() {
|
|
let rowid_alias_column = table_reference
|
|
.columns()
|
|
.iter()
|
|
.position(|c| c.is_rowid_alias);
|
|
|
|
let mut cs = TableConstraints {
|
|
table_id: table_reference.internal_id,
|
|
constraints: Vec::new(),
|
|
candidates: available_indexes
|
|
.get(table_reference.table.get_name())
|
|
.map_or(Vec::new(), |indexes| {
|
|
indexes
|
|
.iter()
|
|
.map(|index| ConstraintUseCandidate {
|
|
index: Some(index.clone()),
|
|
refs: Vec::new(),
|
|
})
|
|
.collect()
|
|
}),
|
|
};
|
|
// Add a candidate for the rowid index, which is always available when the table has a rowid alias.
|
|
cs.candidates.push(ConstraintUseCandidate {
|
|
index: None,
|
|
refs: Vec::new(),
|
|
});
|
|
|
|
for (i, term) in where_clause.iter().enumerate() {
|
|
let Some((lhs, operator, rhs)) = as_binary_components(&term.expr)? else {
|
|
continue;
|
|
};
|
|
|
|
// Constraints originating from a LEFT JOIN must always be evaluated in that join's RHS table's loop,
|
|
// regardless of which tables the constraint references.
|
|
if let Some(outer_join_tbl) = term.from_outer_join {
|
|
if outer_join_tbl != table_reference.internal_id {
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// If either the LHS or RHS of the constraint is a column from the table, add the constraint.
|
|
match lhs {
|
|
ast::Expr::Column { table, column, .. } => {
|
|
if *table == table_reference.internal_id {
|
|
let table_column = &table_reference.table.columns()[*column];
|
|
cs.constraints.push(Constraint {
|
|
where_clause_pos: (i, BinaryExprSide::Rhs),
|
|
operator,
|
|
table_col_pos: *column,
|
|
lhs_mask: table_mask_from_expr(rhs, table_references)?,
|
|
selectivity: estimate_selectivity(table_column, operator),
|
|
});
|
|
}
|
|
}
|
|
ast::Expr::RowId { table, .. } => {
|
|
// A rowid alias column must exist for the 'rowid' keyword to be considered a valid reference.
|
|
// This should be a parse error at an earlier stage of the query compilation, but nevertheless,
|
|
// we check it here.
|
|
if *table == table_reference.internal_id && rowid_alias_column.is_some() {
|
|
let table_column =
|
|
&table_reference.table.columns()[rowid_alias_column.unwrap()];
|
|
cs.constraints.push(Constraint {
|
|
where_clause_pos: (i, BinaryExprSide::Rhs),
|
|
operator,
|
|
table_col_pos: rowid_alias_column.unwrap(),
|
|
lhs_mask: table_mask_from_expr(rhs, table_references)?,
|
|
selectivity: estimate_selectivity(table_column, operator),
|
|
});
|
|
}
|
|
}
|
|
_ => {}
|
|
};
|
|
match rhs {
|
|
ast::Expr::Column { table, column, .. } => {
|
|
if *table == table_reference.internal_id {
|
|
let table_column = &table_reference.table.columns()[*column];
|
|
cs.constraints.push(Constraint {
|
|
where_clause_pos: (i, BinaryExprSide::Lhs),
|
|
operator: opposite_cmp_op(operator),
|
|
table_col_pos: *column,
|
|
lhs_mask: table_mask_from_expr(lhs, table_references)?,
|
|
selectivity: estimate_selectivity(table_column, operator),
|
|
});
|
|
}
|
|
}
|
|
ast::Expr::RowId { table, .. } => {
|
|
if *table == table_reference.internal_id && rowid_alias_column.is_some() {
|
|
let table_column =
|
|
&table_reference.table.columns()[rowid_alias_column.unwrap()];
|
|
cs.constraints.push(Constraint {
|
|
where_clause_pos: (i, BinaryExprSide::Lhs),
|
|
operator: opposite_cmp_op(operator),
|
|
table_col_pos: rowid_alias_column.unwrap(),
|
|
lhs_mask: table_mask_from_expr(lhs, table_references)?,
|
|
selectivity: estimate_selectivity(table_column, operator),
|
|
});
|
|
}
|
|
}
|
|
_ => {}
|
|
};
|
|
}
|
|
// sort equalities first so that index keys will be properly constructed.
|
|
// see e.g.: https://www.solarwinds.com/blog/the-left-prefix-index-rule
|
|
cs.constraints.sort_by(|a, b| {
|
|
if a.operator == ast::Operator::Equals {
|
|
Ordering::Less
|
|
} else if b.operator == ast::Operator::Equals {
|
|
Ordering::Greater
|
|
} else {
|
|
Ordering::Equal
|
|
}
|
|
});
|
|
|
|
// For each constraint we found, add a reference to it for each index that may be able to use it.
|
|
for (i, constraint) in cs.constraints.iter().enumerate() {
|
|
if rowid_alias_column == Some(constraint.table_col_pos) {
|
|
let rowid_candidate = cs
|
|
.candidates
|
|
.iter_mut()
|
|
.find_map(|candidate| {
|
|
if candidate.index.is_none() {
|
|
Some(candidate)
|
|
} else {
|
|
None
|
|
}
|
|
})
|
|
.unwrap();
|
|
rowid_candidate.refs.push(ConstraintRef {
|
|
constraint_vec_pos: i,
|
|
index_col_pos: 0,
|
|
sort_order: SortOrder::Asc,
|
|
});
|
|
}
|
|
for index in available_indexes
|
|
.get(table_reference.table.get_name())
|
|
.unwrap_or(&Vec::new())
|
|
{
|
|
if let Some(position_in_index) =
|
|
index.column_table_pos_to_index_pos(constraint.table_col_pos)
|
|
{
|
|
let index_candidate = cs
|
|
.candidates
|
|
.iter_mut()
|
|
.find_map(|candidate| {
|
|
if candidate
|
|
.index
|
|
.as_ref()
|
|
.is_some_and(|i| Arc::ptr_eq(index, i))
|
|
{
|
|
Some(candidate)
|
|
} else {
|
|
None
|
|
}
|
|
})
|
|
.unwrap();
|
|
index_candidate.refs.push(ConstraintRef {
|
|
constraint_vec_pos: i,
|
|
index_col_pos: position_in_index,
|
|
sort_order: index.columns[position_in_index].order,
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
for candidate in cs.candidates.iter_mut() {
|
|
// Sort by index_col_pos, ascending -- index columns must be consumed in contiguous order.
|
|
candidate.refs.sort_by_key(|cref| cref.index_col_pos);
|
|
// Deduplicate by position, keeping first occurrence (which will be equality if one exists, since the constraints vec is sorted that way)
|
|
candidate.refs.dedup_by_key(|cref| cref.index_col_pos);
|
|
// Truncate at first gap in positions -- again, index columns must be consumed in contiguous order.
|
|
let contiguous_len = candidate
|
|
.refs
|
|
.iter()
|
|
.enumerate()
|
|
.take_while(|(i, cref)| cref.index_col_pos == *i)
|
|
.count();
|
|
candidate.refs.truncate(contiguous_len);
|
|
|
|
// Truncate after the first inequality, since the left-prefix rule of indexes requires that all constraints but the last one must be equalities;
|
|
// again see: https://www.solarwinds.com/blog/the-left-prefix-index-rule
|
|
if let Some(first_inequality) = candidate.refs.iter().position(|cref| {
|
|
cs.constraints[cref.constraint_vec_pos].operator != ast::Operator::Equals
|
|
}) {
|
|
candidate.refs.truncate(first_inequality + 1);
|
|
}
|
|
}
|
|
constraints.push(cs);
|
|
}
|
|
|
|
Ok(constraints)
|
|
}
|
|
|
|
/// Find which [Constraint]s are usable for a given join order.
|
|
/// Returns a slice of the references to the constraints that are usable.
|
|
/// A constraint is considered usable for a given table if all of the other tables referenced by the constraint
|
|
/// are on the left side in the join order relative to the table.
|
|
pub fn usable_constraints_for_join_order<'a>(
|
|
constraints: &'a [Constraint],
|
|
refs: &'a [ConstraintRef],
|
|
join_order: &[JoinOrderMember],
|
|
) -> &'a [ConstraintRef] {
|
|
let table_idx = join_order.last().unwrap().original_idx;
|
|
let mut usable_until = 0;
|
|
for cref in refs.iter() {
|
|
let constraint = &constraints[cref.constraint_vec_pos];
|
|
let other_side_refers_to_self = constraint.lhs_mask.contains_table(table_idx);
|
|
if other_side_refers_to_self {
|
|
break;
|
|
}
|
|
let lhs_mask = TableMask::from_table_number_iter(
|
|
join_order
|
|
.iter()
|
|
.take(join_order.len() - 1)
|
|
.map(|j| j.original_idx),
|
|
);
|
|
let all_required_tables_are_on_left_side = lhs_mask.contains_all(&constraint.lhs_mask);
|
|
if !all_required_tables_are_on_left_side {
|
|
break;
|
|
}
|
|
usable_until += 1;
|
|
}
|
|
&refs[..usable_until]
|
|
}
|
|
|
|
pub fn convert_to_vtab_constraint(
|
|
constraints: &[Constraint],
|
|
join_order: &[JoinOrderMember],
|
|
) -> Vec<ConstraintInfo> {
|
|
let table_idx = join_order.last().unwrap().original_idx;
|
|
let lhs_mask = TableMask::from_table_number_iter(
|
|
join_order
|
|
.iter()
|
|
.take(join_order.len() - 1)
|
|
.map(|j| j.original_idx),
|
|
);
|
|
constraints
|
|
.iter()
|
|
.enumerate()
|
|
.filter_map(|(i, constraint)| {
|
|
let other_side_refers_to_self = constraint.lhs_mask.contains_table(table_idx);
|
|
if other_side_refers_to_self {
|
|
return None;
|
|
}
|
|
let all_required_tables_are_on_left_side = lhs_mask.contains_all(&constraint.lhs_mask);
|
|
to_ext_constraint_op(&constraint.operator).map(|op| ConstraintInfo {
|
|
column_index: constraint.table_col_pos as u32,
|
|
op,
|
|
usable: all_required_tables_are_on_left_side,
|
|
index: i,
|
|
})
|
|
})
|
|
.collect()
|
|
}
|
|
|
|
fn to_ext_constraint_op(op: &ast::Operator) -> Option<ConstraintOp> {
|
|
match op {
|
|
ast::Operator::Equals => Some(ConstraintOp::Eq),
|
|
ast::Operator::Less => Some(ConstraintOp::Lt),
|
|
ast::Operator::LessEquals => Some(ConstraintOp::Le),
|
|
ast::Operator::Greater => Some(ConstraintOp::Gt),
|
|
ast::Operator::GreaterEquals => Some(ConstraintOp::Ge),
|
|
ast::Operator::NotEquals => Some(ConstraintOp::Ne),
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
fn opposite_cmp_op(op: ast::Operator) -> ast::Operator {
|
|
match op {
|
|
ast::Operator::Equals => ast::Operator::Equals,
|
|
ast::Operator::Greater => ast::Operator::Less,
|
|
ast::Operator::GreaterEquals => ast::Operator::LessEquals,
|
|
ast::Operator::Less => ast::Operator::Greater,
|
|
ast::Operator::LessEquals => ast::Operator::GreaterEquals,
|
|
_ => panic!("unexpected operator: {op:?}"),
|
|
}
|
|
}
|