mirror of
https://github.com/aljazceru/turso.git
synced 2026-02-12 03:34:20 +01:00
Refactor constraints so that WHERE clause is not needed in join reordering phase
This commit is contained in:
@@ -9,7 +9,7 @@ use crate::{
|
||||
};
|
||||
|
||||
use super::{
|
||||
constraints::{usable_constraints_for_join_order, Constraint, ConstraintLookup, Constraints},
|
||||
constraints::{usable_constraints_for_join_order, ConstraintRef, TableConstraints},
|
||||
cost::{estimate_cost_for_scan_or_seek, Cost, IndexInfo},
|
||||
order::OrderTarget,
|
||||
};
|
||||
@@ -31,39 +31,38 @@ impl<'a> AccessMethod<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_constraints(&mut self, lookup: &ConstraintLookup, constraints: &'a [Constraint]) {
|
||||
let index = match lookup {
|
||||
ConstraintLookup::Index(index) => Some(index),
|
||||
ConstraintLookup::Rowid => None,
|
||||
ConstraintLookup::EphemeralIndex => panic!("set_constraints called with Lookup::None"),
|
||||
};
|
||||
match (&mut self.kind, constraints.is_empty()) {
|
||||
pub fn set_constraint_refs(
|
||||
&mut self,
|
||||
new_index: Option<Arc<Index>>,
|
||||
new_constraint_refs: &'a [ConstraintRef],
|
||||
) {
|
||||
match (&mut self.kind, new_constraint_refs.is_empty()) {
|
||||
(
|
||||
AccessMethodKind::Search {
|
||||
constraints,
|
||||
index: i,
|
||||
constraint_refs,
|
||||
index,
|
||||
..
|
||||
},
|
||||
false,
|
||||
) => {
|
||||
*constraints = constraints;
|
||||
*i = index.cloned();
|
||||
*constraint_refs = new_constraint_refs;
|
||||
*index = new_index;
|
||||
}
|
||||
(AccessMethodKind::Search { iter_dir, .. }, true) => {
|
||||
self.kind = AccessMethodKind::Scan {
|
||||
index: index.cloned(),
|
||||
index: new_index,
|
||||
iter_dir: *iter_dir,
|
||||
};
|
||||
}
|
||||
(AccessMethodKind::Scan { iter_dir, .. }, false) => {
|
||||
self.kind = AccessMethodKind::Search {
|
||||
index: index.cloned(),
|
||||
index: new_index,
|
||||
iter_dir: *iter_dir,
|
||||
constraints,
|
||||
constraint_refs: new_constraint_refs,
|
||||
};
|
||||
}
|
||||
(AccessMethodKind::Scan { index: i, .. }, true) => {
|
||||
*i = index.cloned();
|
||||
(AccessMethodKind::Scan { index, .. }, true) => {
|
||||
*index = new_index;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -81,7 +80,7 @@ pub enum AccessMethodKind<'a> {
|
||||
Search {
|
||||
index: Option<Arc<Index>>,
|
||||
iter_dir: IterationDirection,
|
||||
constraints: &'a [Constraint],
|
||||
constraint_refs: &'a [ConstraintRef],
|
||||
},
|
||||
}
|
||||
|
||||
@@ -90,12 +89,12 @@ pub enum AccessMethodKind<'a> {
|
||||
pub fn find_best_access_method_for_join_order<'a>(
|
||||
table_index: usize,
|
||||
table_reference: &TableReference,
|
||||
constraints: &'a [Constraints],
|
||||
table_constraints: &'a TableConstraints,
|
||||
join_order: &[JoinOrderMember],
|
||||
maybe_order_target: Option<&OrderTarget>,
|
||||
input_cardinality: f64,
|
||||
) -> Result<AccessMethod<'a>> {
|
||||
let cost_of_full_table_scan = estimate_cost_for_scan_or_seek(None, &[], input_cardinality);
|
||||
let cost_of_full_table_scan = estimate_cost_for_scan_or_seek(None, &[], &[], input_cardinality);
|
||||
let mut best_access_method = AccessMethod {
|
||||
cost: cost_of_full_table_scan,
|
||||
kind: AccessMethodKind::Scan {
|
||||
@@ -107,28 +106,29 @@ pub fn find_best_access_method_for_join_order<'a>(
|
||||
.columns()
|
||||
.iter()
|
||||
.position(|c| c.is_rowid_alias);
|
||||
for csmap in constraints
|
||||
.iter()
|
||||
.filter(|csmap| csmap.table_no == table_index)
|
||||
{
|
||||
let index_info = match &csmap.lookup {
|
||||
ConstraintLookup::Index(index) => IndexInfo {
|
||||
for usage in table_constraints.candidates.iter() {
|
||||
let index_info = match usage.index.as_ref() {
|
||||
Some(index) => IndexInfo {
|
||||
unique: index.unique,
|
||||
covering: table_reference.index_is_covering(index),
|
||||
column_count: index.columns.len(),
|
||||
},
|
||||
ConstraintLookup::Rowid => IndexInfo {
|
||||
None => IndexInfo {
|
||||
unique: true, // rowids are always unique
|
||||
covering: false,
|
||||
column_count: 1,
|
||||
},
|
||||
ConstraintLookup::EphemeralIndex => continue,
|
||||
};
|
||||
let usable_constraints =
|
||||
usable_constraints_for_join_order(&csmap.constraints, table_index, join_order);
|
||||
let usable_constraint_refs = usable_constraints_for_join_order(
|
||||
&table_constraints.constraints,
|
||||
&usage.refs,
|
||||
table_index,
|
||||
join_order,
|
||||
);
|
||||
let cost = estimate_cost_for_scan_or_seek(
|
||||
Some(index_info),
|
||||
&usable_constraints,
|
||||
&table_constraints.constraints,
|
||||
&usable_constraint_refs,
|
||||
input_cardinality,
|
||||
);
|
||||
|
||||
@@ -138,14 +138,11 @@ pub fn find_best_access_method_for_join_order<'a>(
|
||||
for i in 0..order_target.0.len().min(index_info.column_count) {
|
||||
let correct_table = order_target.0[i].table_no == table_index;
|
||||
let correct_column = {
|
||||
match &csmap.lookup {
|
||||
ConstraintLookup::Index(index) => {
|
||||
index.columns[i].pos_in_table == order_target.0[i].column_no
|
||||
}
|
||||
ConstraintLookup::Rowid => {
|
||||
match &usage.index {
|
||||
Some(index) => index.columns[i].pos_in_table == order_target.0[i].column_no,
|
||||
None => {
|
||||
rowid_column_idx.map_or(false, |idx| idx == order_target.0[i].column_no)
|
||||
}
|
||||
ConstraintLookup::EphemeralIndex => unreachable!(),
|
||||
}
|
||||
};
|
||||
if !correct_table || !correct_column {
|
||||
@@ -154,12 +151,9 @@ pub fn find_best_access_method_for_join_order<'a>(
|
||||
break;
|
||||
}
|
||||
let correct_order = {
|
||||
match &csmap.lookup {
|
||||
ConstraintLookup::Index(index) => {
|
||||
order_target.0[i].order == index.columns[i].order
|
||||
}
|
||||
ConstraintLookup::Rowid => order_target.0[i].order == SortOrder::Asc,
|
||||
ConstraintLookup::EphemeralIndex => unreachable!(),
|
||||
match &usage.index {
|
||||
Some(index) => order_target.0[i].order == index.columns[i].order,
|
||||
None => order_target.0[i].order == SortOrder::Asc,
|
||||
}
|
||||
};
|
||||
if correct_order {
|
||||
@@ -178,7 +172,7 @@ pub fn find_best_access_method_for_join_order<'a>(
|
||||
};
|
||||
if cost < best_access_method.cost + order_satisfiability_bonus {
|
||||
best_access_method.cost = cost;
|
||||
best_access_method.set_constraints(&csmap.lookup, &usable_constraints);
|
||||
best_access_method.set_constraint_refs(usage.index.clone(), &usable_constraint_refs);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,15 +1,18 @@
|
||||
use std::{cmp::Ordering, collections::HashMap, sync::Arc};
|
||||
|
||||
use crate::{
|
||||
schema::Index,
|
||||
schema::{Column, Index},
|
||||
translate::{
|
||||
expr::{as_binary_components, unwrap_parens},
|
||||
expr::as_binary_components,
|
||||
plan::{JoinOrderMember, TableReference, WhereTerm},
|
||||
planner::{table_mask_from_expr, TableMask},
|
||||
},
|
||||
Result,
|
||||
};
|
||||
use limbo_sqlite3_parser::ast::{self, SortOrder};
|
||||
|
||||
use super::cost::ESTIMATED_HARDCODED_ROWS_PER_TABLE;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Constraint {
|
||||
/// The position of the constraint in the WHERE clause, e.g. in SELECT * FROM t WHERE true AND t.x = 10, the position is (1, BinaryExprSide::Rhs),
|
||||
@@ -17,37 +20,42 @@ pub struct Constraint {
|
||||
pub where_clause_pos: (usize, BinaryExprSide),
|
||||
/// The operator of the constraint, e.g. =, >, <
|
||||
pub operator: ast::Operator,
|
||||
/// The position of the index column in the index, e.g. if the index is (a,b,c) and the constraint is on b, then index_column_pos is 1.
|
||||
/// For Rowid constraints this is always 0.
|
||||
pub index_col_pos: usize,
|
||||
/// The position of the constrained column in the table.
|
||||
pub table_col_pos: usize,
|
||||
/// The sort order of the index column, ASC or DESC. For Rowid constraints this is always ASC.
|
||||
pub sort_order: SortOrder,
|
||||
/// Bitmask of tables that are required to be on the left side of the constrained table,
|
||||
/// e.g. in SELECT * FROM t1,t2,t3 WHERE t1.x = t2.x + t3.x, the lhs_mask contains t2 and t3.
|
||||
pub lhs_mask: TableMask,
|
||||
/// The selectivity of the constraint, i.e. the fraction of rows that will match the constraint.
|
||||
pub selectivity: f64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
/// Lookup denotes how a given set of [Constraint]s can be used to access a table.
|
||||
///
|
||||
/// Lookup::Index(index) means that the constraints can be used to access the table using the given index.
|
||||
/// Lookup::Rowid means that the constraints can be used to access the table using the table's rowid column.
|
||||
/// Lookup::EphemeralIndex means that the constraints are not useful for accessing the table,
|
||||
/// but an ephemeral index can be built ad-hoc to use them.
|
||||
pub enum ConstraintLookup {
|
||||
Index(Arc<Index>),
|
||||
Rowid,
|
||||
EphemeralIndex,
|
||||
/// A reference to a [Constraint] in a [TableConstraints].
|
||||
pub struct ConstraintRef {
|
||||
/// The position of the constraint in the [TableConstraints::constraints] vector.
|
||||
pub constraint_vec_pos: usize,
|
||||
/// The position of the constrained column in the index. Always 0 for rowid indices.
|
||||
pub index_col_pos: usize,
|
||||
/// The sort order of the constrained column in the index. Always ascending for rowid indices.
|
||||
pub sort_order: SortOrder,
|
||||
}
|
||||
#[derive(Debug, Clone)]
|
||||
/// A collection of [ConstraintRef]s for a given index, or if index is None, for the table's rowid index.
|
||||
pub struct ConstraintUseCandidate {
|
||||
/// The index that may be used to satisfy the constraints. If none, the table's rowid index is used.
|
||||
pub index: Option<Arc<Index>>,
|
||||
/// References to the constraints that may be used as an access path for the index.
|
||||
pub refs: Vec<ConstraintRef>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
/// A collection of [Constraint]s for a given (table, index) pair.
|
||||
pub struct Constraints {
|
||||
pub lookup: ConstraintLookup,
|
||||
/// A collection of [Constraint]s and their potential [ConstraintUseCandidate]s for a given table.
|
||||
pub struct TableConstraints {
|
||||
pub table_no: usize,
|
||||
/// The constraints for the table, i.e. any [WhereTerm]s that reference columns from this table.
|
||||
pub constraints: Vec<Constraint>,
|
||||
/// Candidates for indexes that may use the constraints to perform a lookup.
|
||||
pub candidates: Vec<ConstraintUseCandidate>,
|
||||
}
|
||||
|
||||
/// Helper enum for [Constraint] to indicate which side of a binary comparison expression is being compared to the index column.
|
||||
@@ -60,13 +68,40 @@ pub enum BinaryExprSide {
|
||||
Rhs,
|
||||
}
|
||||
|
||||
/// In lieu of statistics, we estimate that an equality filter will reduce the output set to 1% of its size.
|
||||
const SELECTIVITY_EQ: f64 = 0.01;
|
||||
/// In lieu of statistics, we estimate that a range filter will reduce the output set to 40% of its size.
|
||||
const SELECTIVITY_RANGE: f64 = 0.4;
|
||||
/// In lieu of statistics, we estimate that other filters will reduce the output set to 90% of its size.
|
||||
const SELECTIVITY_OTHER: f64 = 0.9;
|
||||
|
||||
const SELECTIVITY_UNIQUE_EQUALITY: f64 = 1.0 / ESTIMATED_HARDCODED_ROWS_PER_TABLE as f64;
|
||||
|
||||
/// Estimate the selectivity of a constraint based on the operator and the column type.
|
||||
fn estimate_selectivity(column: &Column, op: ast::Operator) -> f64 {
|
||||
match op {
|
||||
ast::Operator::Equals => {
|
||||
if column.is_rowid_alias || column.primary_key {
|
||||
SELECTIVITY_UNIQUE_EQUALITY
|
||||
} else {
|
||||
SELECTIVITY_EQ
|
||||
}
|
||||
}
|
||||
ast::Operator::Greater => SELECTIVITY_RANGE,
|
||||
ast::Operator::GreaterEquals => SELECTIVITY_RANGE,
|
||||
ast::Operator::Less => SELECTIVITY_RANGE,
|
||||
ast::Operator::LessEquals => SELECTIVITY_RANGE,
|
||||
_ => SELECTIVITY_OTHER,
|
||||
}
|
||||
}
|
||||
|
||||
/// Precompute all potentially usable [Constraints] from a WHERE clause.
|
||||
/// The resulting list of [Constraints] is then used to evaluate the best access methods for various join orders.
|
||||
/// The resulting list of [TableConstraints] is then used to evaluate the best access methods for various join orders.
|
||||
pub fn constraints_from_where_clause(
|
||||
where_clause: &[WhereTerm],
|
||||
table_references: &[TableReference],
|
||||
available_indexes: &HashMap<String, Vec<Arc<Index>>>,
|
||||
) -> Result<Vec<Constraints>> {
|
||||
) -> Result<Vec<TableConstraints>> {
|
||||
let mut constraints = Vec::new();
|
||||
for (table_no, table_reference) in table_references.iter().enumerate() {
|
||||
let rowid_alias_column = table_reference
|
||||
@@ -74,16 +109,26 @@ pub fn constraints_from_where_clause(
|
||||
.iter()
|
||||
.position(|c| c.is_rowid_alias);
|
||||
|
||||
let mut cs = Constraints {
|
||||
lookup: ConstraintLookup::Rowid,
|
||||
table_no,
|
||||
constraints: Vec::new(),
|
||||
};
|
||||
let mut cs_ephemeral = Constraints {
|
||||
lookup: ConstraintLookup::EphemeralIndex,
|
||||
let mut cs = TableConstraints {
|
||||
table_no,
|
||||
constraints: Vec::new(),
|
||||
candidates: available_indexes
|
||||
.get(table_reference.table.get_name())
|
||||
.map_or(Vec::new(), |indexes| {
|
||||
indexes
|
||||
.iter()
|
||||
.map(|index| ConstraintUseCandidate {
|
||||
index: Some(index.clone()),
|
||||
refs: Vec::new(),
|
||||
})
|
||||
.collect()
|
||||
}),
|
||||
};
|
||||
cs.candidates.push(ConstraintUseCandidate {
|
||||
index: None,
|
||||
refs: Vec::new(),
|
||||
});
|
||||
|
||||
for (i, term) in where_clause.iter().enumerate() {
|
||||
let Some((lhs, operator, rhs)) = as_binary_components(&term.expr)? else {
|
||||
continue;
|
||||
@@ -96,36 +141,26 @@ pub fn constraints_from_where_clause(
|
||||
match lhs {
|
||||
ast::Expr::Column { table, column, .. } => {
|
||||
if *table == table_no {
|
||||
if rowid_alias_column.map_or(false, |idx| *column == idx) {
|
||||
cs.constraints.push(Constraint {
|
||||
where_clause_pos: (i, BinaryExprSide::Rhs),
|
||||
operator,
|
||||
index_col_pos: 0,
|
||||
table_col_pos: rowid_alias_column.unwrap(),
|
||||
sort_order: SortOrder::Asc,
|
||||
lhs_mask: table_mask_from_expr(rhs)?,
|
||||
});
|
||||
} else {
|
||||
cs_ephemeral.constraints.push(Constraint {
|
||||
where_clause_pos: (i, BinaryExprSide::Rhs),
|
||||
operator,
|
||||
index_col_pos: 0,
|
||||
table_col_pos: *column,
|
||||
sort_order: SortOrder::Asc,
|
||||
lhs_mask: table_mask_from_expr(rhs)?,
|
||||
});
|
||||
}
|
||||
let table_column = &table_reference.table.columns()[*column];
|
||||
cs.constraints.push(Constraint {
|
||||
where_clause_pos: (i, BinaryExprSide::Rhs),
|
||||
operator,
|
||||
table_col_pos: *column,
|
||||
lhs_mask: table_mask_from_expr(rhs)?,
|
||||
selectivity: estimate_selectivity(table_column, operator),
|
||||
});
|
||||
}
|
||||
}
|
||||
ast::Expr::RowId { table, .. } => {
|
||||
if *table == table_no && rowid_alias_column.is_some() {
|
||||
let table_column =
|
||||
&table_reference.table.columns()[rowid_alias_column.unwrap()];
|
||||
cs.constraints.push(Constraint {
|
||||
where_clause_pos: (i, BinaryExprSide::Rhs),
|
||||
operator,
|
||||
index_col_pos: 0,
|
||||
table_col_pos: rowid_alias_column.unwrap(),
|
||||
sort_order: SortOrder::Asc,
|
||||
lhs_mask: table_mask_from_expr(rhs)?,
|
||||
selectivity: estimate_selectivity(table_column, operator),
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -134,59 +169,34 @@ pub fn constraints_from_where_clause(
|
||||
match rhs {
|
||||
ast::Expr::Column { table, column, .. } => {
|
||||
if *table == table_no {
|
||||
if rowid_alias_column.map_or(false, |idx| *column == idx) {
|
||||
cs.constraints.push(Constraint {
|
||||
where_clause_pos: (i, BinaryExprSide::Lhs),
|
||||
operator: opposite_cmp_op(operator),
|
||||
index_col_pos: 0,
|
||||
table_col_pos: rowid_alias_column.unwrap(),
|
||||
sort_order: SortOrder::Asc,
|
||||
lhs_mask: table_mask_from_expr(lhs)?,
|
||||
});
|
||||
} else {
|
||||
cs_ephemeral.constraints.push(Constraint {
|
||||
where_clause_pos: (i, BinaryExprSide::Lhs),
|
||||
operator: opposite_cmp_op(operator),
|
||||
index_col_pos: 0,
|
||||
table_col_pos: *column,
|
||||
sort_order: SortOrder::Asc,
|
||||
lhs_mask: table_mask_from_expr(lhs)?,
|
||||
});
|
||||
}
|
||||
let table_column = &table_reference.table.columns()[*column];
|
||||
cs.constraints.push(Constraint {
|
||||
where_clause_pos: (i, BinaryExprSide::Lhs),
|
||||
operator: opposite_cmp_op(operator),
|
||||
table_col_pos: *column,
|
||||
lhs_mask: table_mask_from_expr(lhs)?,
|
||||
selectivity: estimate_selectivity(table_column, operator),
|
||||
});
|
||||
}
|
||||
}
|
||||
ast::Expr::RowId { table, .. } => {
|
||||
if *table == table_no && rowid_alias_column.is_some() {
|
||||
let table_column =
|
||||
&table_reference.table.columns()[rowid_alias_column.unwrap()];
|
||||
cs.constraints.push(Constraint {
|
||||
where_clause_pos: (i, BinaryExprSide::Lhs),
|
||||
operator: opposite_cmp_op(operator),
|
||||
index_col_pos: 0,
|
||||
table_col_pos: rowid_alias_column.unwrap(),
|
||||
sort_order: SortOrder::Asc,
|
||||
lhs_mask: table_mask_from_expr(lhs)?,
|
||||
selectivity: estimate_selectivity(table_column, operator),
|
||||
});
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
};
|
||||
}
|
||||
// First sort by position, with equalities first within each position
|
||||
cs.constraints.sort_by(|a, b| {
|
||||
let pos_cmp = a.index_col_pos.cmp(&b.index_col_pos);
|
||||
if pos_cmp == Ordering::Equal {
|
||||
// If same position, sort equalities first
|
||||
if a.operator == ast::Operator::Equals {
|
||||
Ordering::Less
|
||||
} else if b.operator == ast::Operator::Equals {
|
||||
Ordering::Greater
|
||||
} else {
|
||||
Ordering::Equal
|
||||
}
|
||||
} else {
|
||||
pos_cmp
|
||||
}
|
||||
});
|
||||
cs_ephemeral.constraints.sort_by(|a, b| {
|
||||
// sort equalities first so that index keys will be properly constructed
|
||||
if a.operator == ast::Operator::Equals {
|
||||
Ordering::Less
|
||||
} else if b.operator == ast::Operator::Equals {
|
||||
@@ -196,145 +206,96 @@ pub fn constraints_from_where_clause(
|
||||
}
|
||||
});
|
||||
|
||||
// Deduplicate by position, keeping first occurrence (which will be equality if one exists)
|
||||
cs.constraints.dedup_by_key(|c| c.index_col_pos);
|
||||
|
||||
// Truncate at first gap in positions
|
||||
let mut last_pos = 0;
|
||||
let mut i = 0;
|
||||
for constraint in cs.constraints.iter() {
|
||||
if constraint.index_col_pos != last_pos {
|
||||
if constraint.index_col_pos != last_pos + 1 {
|
||||
break;
|
||||
}
|
||||
last_pos = constraint.index_col_pos;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
cs.constraints.truncate(i);
|
||||
|
||||
// Truncate after the first inequality
|
||||
if let Some(first_inequality) = cs
|
||||
.constraints
|
||||
.iter()
|
||||
.position(|c| c.operator != ast::Operator::Equals)
|
||||
{
|
||||
cs.constraints.truncate(first_inequality + 1);
|
||||
}
|
||||
if rowid_alias_column.is_some() {
|
||||
constraints.push(cs);
|
||||
}
|
||||
constraints.push(cs_ephemeral);
|
||||
|
||||
let indexes = available_indexes.get(table_reference.table.get_name());
|
||||
if let Some(indexes) = indexes {
|
||||
for index in indexes {
|
||||
let mut cs = Constraints {
|
||||
lookup: ConstraintLookup::Index(index.clone()),
|
||||
table_no,
|
||||
constraints: Vec::new(),
|
||||
};
|
||||
for (i, term) in where_clause.iter().enumerate() {
|
||||
let Some((lhs, operator, rhs)) = as_binary_components(&term.expr)? else {
|
||||
continue;
|
||||
};
|
||||
if let Some(outer_join_tbl) = term.from_outer_join {
|
||||
if outer_join_tbl != table_no {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if let Some(position_in_index) =
|
||||
get_column_position_in_index(lhs, table_no, index)?
|
||||
{
|
||||
cs.constraints.push(Constraint {
|
||||
where_clause_pos: (i, BinaryExprSide::Rhs),
|
||||
operator,
|
||||
index_col_pos: position_in_index,
|
||||
table_col_pos: {
|
||||
let ast::Expr::Column { column, .. } = unwrap_parens(lhs)? else {
|
||||
crate::bail_parse_error!("expected column in index constraint");
|
||||
};
|
||||
*column
|
||||
},
|
||||
sort_order: index.columns[position_in_index].order,
|
||||
lhs_mask: table_mask_from_expr(rhs)?,
|
||||
});
|
||||
}
|
||||
if let Some(position_in_index) =
|
||||
get_column_position_in_index(rhs, table_no, index)?
|
||||
{
|
||||
cs.constraints.push(Constraint {
|
||||
where_clause_pos: (i, BinaryExprSide::Lhs),
|
||||
operator: opposite_cmp_op(operator),
|
||||
index_col_pos: position_in_index,
|
||||
table_col_pos: {
|
||||
let ast::Expr::Column { column, .. } = unwrap_parens(rhs)? else {
|
||||
crate::bail_parse_error!("expected column in index constraint");
|
||||
};
|
||||
*column
|
||||
},
|
||||
sort_order: index.columns[position_in_index].order,
|
||||
lhs_mask: table_mask_from_expr(lhs)?,
|
||||
});
|
||||
}
|
||||
}
|
||||
// First sort by position, with equalities first within each position
|
||||
cs.constraints.sort_by(|a, b| {
|
||||
let pos_cmp = a.index_col_pos.cmp(&b.index_col_pos);
|
||||
if pos_cmp == Ordering::Equal {
|
||||
// If same position, sort equalities first
|
||||
if a.operator == ast::Operator::Equals {
|
||||
Ordering::Less
|
||||
} else if b.operator == ast::Operator::Equals {
|
||||
Ordering::Greater
|
||||
for (i, constraint) in cs.constraints.iter().enumerate() {
|
||||
if rowid_alias_column.map_or(false, |idx| constraint.table_col_pos == idx) {
|
||||
let rowid_usage = cs
|
||||
.candidates
|
||||
.iter_mut()
|
||||
.find_map(|usage| {
|
||||
if usage.index.is_none() {
|
||||
Some(usage)
|
||||
} else {
|
||||
Ordering::Equal
|
||||
None
|
||||
}
|
||||
} else {
|
||||
pos_cmp
|
||||
}
|
||||
})
|
||||
.unwrap();
|
||||
rowid_usage.refs.push(ConstraintRef {
|
||||
constraint_vec_pos: i,
|
||||
index_col_pos: 0,
|
||||
sort_order: SortOrder::Asc,
|
||||
});
|
||||
|
||||
// Deduplicate by position, keeping first occurrence (which will be equality if one exists)
|
||||
cs.constraints.dedup_by_key(|c| c.index_col_pos);
|
||||
|
||||
// Truncate at first gap in positions
|
||||
let mut last_pos = 0;
|
||||
let mut i = 0;
|
||||
for constraint in cs.constraints.iter() {
|
||||
if constraint.index_col_pos != last_pos {
|
||||
if constraint.index_col_pos != last_pos + 1 {
|
||||
break;
|
||||
}
|
||||
last_pos = constraint.index_col_pos;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
cs.constraints.truncate(i);
|
||||
|
||||
// Truncate after the first inequality
|
||||
if let Some(first_inequality) = cs
|
||||
.constraints
|
||||
.iter()
|
||||
.position(|c| c.operator != ast::Operator::Equals)
|
||||
}
|
||||
for index in available_indexes
|
||||
.get(table_reference.table.get_name())
|
||||
.unwrap_or(&Vec::new())
|
||||
{
|
||||
if let Some(position_in_index) =
|
||||
index.column_table_pos_to_index_pos(constraint.table_col_pos)
|
||||
{
|
||||
cs.constraints.truncate(first_inequality + 1);
|
||||
let index_usage = cs
|
||||
.candidates
|
||||
.iter_mut()
|
||||
.find_map(|usage| {
|
||||
if usage
|
||||
.index
|
||||
.as_ref()
|
||||
.map_or(false, |i| Arc::ptr_eq(index, i))
|
||||
{
|
||||
Some(usage)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.unwrap();
|
||||
index_usage.refs.push(ConstraintRef {
|
||||
constraint_vec_pos: i,
|
||||
index_col_pos: position_in_index,
|
||||
sort_order: index.columns[position_in_index].order,
|
||||
});
|
||||
}
|
||||
constraints.push(cs);
|
||||
}
|
||||
}
|
||||
|
||||
for usage in cs.candidates.iter_mut() {
|
||||
// Deduplicate by position, keeping first occurrence (which will be equality if one exists, since the constraints vec is sorted that way)
|
||||
usage.refs.dedup_by_key(|uref| uref.index_col_pos);
|
||||
|
||||
// Truncate at first gap in positions
|
||||
let mut last_pos = 0;
|
||||
let mut i = 0;
|
||||
for uref in usage.refs.iter() {
|
||||
if uref.index_col_pos != last_pos {
|
||||
if uref.index_col_pos != last_pos + 1 {
|
||||
break;
|
||||
}
|
||||
last_pos = uref.index_col_pos;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
usage.refs.truncate(i);
|
||||
|
||||
// Truncate after the first inequality, since the left-prefix rule of indexes requires that all constraints but the last one must be equalities
|
||||
if let Some(first_inequality) = usage.refs.iter().position(|uref| {
|
||||
cs.constraints[uref.constraint_vec_pos].operator != ast::Operator::Equals
|
||||
}) {
|
||||
usage.refs.truncate(first_inequality + 1);
|
||||
}
|
||||
}
|
||||
constraints.push(cs);
|
||||
}
|
||||
|
||||
Ok(constraints)
|
||||
}
|
||||
|
||||
pub fn usable_constraints_for_join_order<'a>(
|
||||
cs: &'a [Constraint],
|
||||
constraints: &'a [Constraint],
|
||||
refs: &'a [ConstraintRef],
|
||||
table_index: usize,
|
||||
join_order: &[JoinOrderMember],
|
||||
) -> &'a [Constraint] {
|
||||
) -> &'a [ConstraintRef] {
|
||||
let mut usable_until = 0;
|
||||
for constraint in cs.iter() {
|
||||
for uref in refs.iter() {
|
||||
let constraint = &constraints[uref.constraint_vec_pos];
|
||||
let other_side_refers_to_self = constraint.lhs_mask.contains_table(table_index);
|
||||
if other_side_refers_to_self {
|
||||
break;
|
||||
@@ -351,23 +312,7 @@ pub fn usable_constraints_for_join_order<'a>(
|
||||
}
|
||||
usable_until += 1;
|
||||
}
|
||||
&cs[..usable_until]
|
||||
}
|
||||
|
||||
/// Get the position of a column in an index
|
||||
/// For example, if there is an index on table T(x,y) then y's position in the index is 1.
|
||||
fn get_column_position_in_index(
|
||||
expr: &ast::Expr,
|
||||
table_index: usize,
|
||||
index: &Arc<Index>,
|
||||
) -> Result<Option<usize>> {
|
||||
let ast::Expr::Column { table, column, .. } = unwrap_parens(expr)? else {
|
||||
return Ok(None);
|
||||
};
|
||||
if *table != table_index {
|
||||
return Ok(None);
|
||||
}
|
||||
Ok(index.column_table_pos_to_index_pos(*column))
|
||||
&refs[..usable_until]
|
||||
}
|
||||
|
||||
fn opposite_cmp_op(op: ast::Operator) -> ast::Operator {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use limbo_sqlite3_parser::ast;
|
||||
|
||||
use super::constraints::Constraint;
|
||||
use super::constraints::{Constraint, ConstraintRef};
|
||||
|
||||
/// A simple newtype wrapper over a f64 that represents the cost of an operation.
|
||||
///
|
||||
@@ -45,6 +45,7 @@ pub fn estimate_page_io_cost(rowcount: f64) -> Cost {
|
||||
pub fn estimate_cost_for_scan_or_seek(
|
||||
index_info: Option<IndexInfo>,
|
||||
constraints: &[Constraint],
|
||||
usable_constraint_refs: &[ConstraintRef],
|
||||
input_cardinality: f64,
|
||||
) -> Cost {
|
||||
let Some(index_info) = index_info else {
|
||||
@@ -53,15 +54,15 @@ pub fn estimate_cost_for_scan_or_seek(
|
||||
);
|
||||
};
|
||||
|
||||
let final_constraint_is_range = constraints
|
||||
.last()
|
||||
.map_or(false, |c| c.operator != ast::Operator::Equals);
|
||||
let final_constraint_is_range = usable_constraint_refs.last().map_or(false, |c| {
|
||||
constraints[c.constraint_vec_pos].operator != ast::Operator::Equals
|
||||
});
|
||||
let equalities_count = constraints
|
||||
.iter()
|
||||
.take(if final_constraint_is_range {
|
||||
constraints.len() - 1
|
||||
usable_constraint_refs.len() - 1
|
||||
} else {
|
||||
constraints.len()
|
||||
usable_constraint_refs.len()
|
||||
})
|
||||
.count() as f64;
|
||||
|
||||
|
||||
@@ -1,20 +1,17 @@
|
||||
use std::{cell::RefCell, collections::HashMap};
|
||||
|
||||
use limbo_sqlite3_parser::ast;
|
||||
|
||||
use crate::{
|
||||
translate::{
|
||||
expr::as_binary_components,
|
||||
optimizer::{cost::Cost, order::plan_satisfies_order_target},
|
||||
plan::{EvalAt, JoinOrderMember, TableReference, WhereTerm},
|
||||
planner::{determine_where_to_eval_expr, TableMask},
|
||||
plan::{JoinOrderMember, TableReference},
|
||||
planner::TableMask,
|
||||
},
|
||||
Result,
|
||||
};
|
||||
|
||||
use super::{
|
||||
access_method::{find_best_access_method_for_join_order, AccessMethod},
|
||||
constraints::Constraints,
|
||||
constraints::TableConstraints,
|
||||
cost::ESTIMATED_HARDCODED_ROWS_PER_TABLE,
|
||||
order::OrderTarget,
|
||||
};
|
||||
@@ -32,20 +29,12 @@ pub struct JoinN {
|
||||
pub cost: Cost,
|
||||
}
|
||||
|
||||
/// In lieu of statistics, we estimate that an equality filter will reduce the output set to 1% of its size.
|
||||
const SELECTIVITY_EQ: f64 = 0.01;
|
||||
/// In lieu of statistics, we estimate that a range filter will reduce the output set to 40% of its size.
|
||||
const SELECTIVITY_RANGE: f64 = 0.4;
|
||||
/// In lieu of statistics, we estimate that other filters will reduce the output set to 90% of its size.
|
||||
const SELECTIVITY_OTHER: f64 = 0.9;
|
||||
|
||||
/// Join n-1 tables with the n'th table.
|
||||
pub fn join_lhs_and_rhs<'a>(
|
||||
lhs: Option<&JoinN>,
|
||||
rhs_table_number: usize,
|
||||
rhs_table_reference: &TableReference,
|
||||
where_clause: &Vec<WhereTerm>,
|
||||
constraints: &'a [Constraints],
|
||||
constraints: &'a TableConstraints,
|
||||
join_order: &[JoinOrderMember],
|
||||
maybe_order_target: Option<&OrderTarget>,
|
||||
access_methods_arena: &'a RefCell<Vec<AccessMethod<'a>>>,
|
||||
@@ -77,78 +66,15 @@ pub fn join_lhs_and_rhs<'a>(
|
||||
let mut best_access_methods = lhs.map_or(vec![], |l| l.best_access_methods.clone());
|
||||
best_access_methods.push(access_methods_arena.borrow().len() - 1);
|
||||
|
||||
// Estimate based on the WHERE clause terms how much the different filters will reduce the output set.
|
||||
let output_cardinality_multiplier = where_clause
|
||||
let lhs_mask = lhs.map_or(TableMask::new(), |l| {
|
||||
TableMask::from_iter(l.table_numbers.iter().cloned())
|
||||
});
|
||||
// Output cardinality is reduced by the product of the selectivities of the constraints that can be used with this join order.
|
||||
let output_cardinality_multiplier = constraints
|
||||
.constraints
|
||||
.iter()
|
||||
.filter_map(|term| {
|
||||
// Skip terms that are not binary comparisons
|
||||
let Ok(Some((lhs, op, rhs))) = as_binary_components(&term.expr) else {
|
||||
return None;
|
||||
};
|
||||
// Skip terms that cannot be evaluated at this table's loop level
|
||||
if !term.should_eval_at_loop(join_order.len() - 1, join_order) {
|
||||
return None;
|
||||
}
|
||||
|
||||
// If both lhs and rhs refer to columns from this table, we can't use this constraint
|
||||
// because we can't use the index to satisfy the condition.
|
||||
// Examples:
|
||||
// - WHERE t.x > t.y
|
||||
// - WHERE t.x + 1 > t.y - 5
|
||||
// - WHERE t.x = (t.x)
|
||||
let Ok(eval_at_left) = determine_where_to_eval_expr(&lhs, join_order) else {
|
||||
return None;
|
||||
};
|
||||
let Ok(eval_at_right) = determine_where_to_eval_expr(&rhs, join_order) else {
|
||||
return None;
|
||||
};
|
||||
if eval_at_left == EvalAt::Loop(join_order.len() - 1)
|
||||
&& eval_at_right == EvalAt::Loop(join_order.len() - 1)
|
||||
{
|
||||
return None;
|
||||
}
|
||||
|
||||
Some((lhs, op, rhs))
|
||||
})
|
||||
.filter_map(|(lhs, op, rhs)| {
|
||||
// Skip terms where neither lhs nor rhs refer to columns from this table
|
||||
if let ast::Expr::Column { table, column, .. } = lhs {
|
||||
if *table != rhs_table_number {
|
||||
None
|
||||
} else {
|
||||
let columns = rhs_table_reference.columns();
|
||||
Some((&columns[*column], op))
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
.or_else(|| {
|
||||
if let ast::Expr::Column { table, column, .. } = rhs {
|
||||
if *table != rhs_table_number {
|
||||
None
|
||||
} else {
|
||||
let columns = rhs_table_reference.columns();
|
||||
Some((&columns[*column], op))
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
})
|
||||
.map(|(column, op)| match op {
|
||||
ast::Operator::Equals => {
|
||||
if column.is_rowid_alias || column.primary_key {
|
||||
1.0 / ESTIMATED_HARDCODED_ROWS_PER_TABLE as f64
|
||||
} else {
|
||||
SELECTIVITY_EQ
|
||||
}
|
||||
}
|
||||
ast::Operator::Greater => SELECTIVITY_RANGE,
|
||||
ast::Operator::GreaterEquals => SELECTIVITY_RANGE,
|
||||
ast::Operator::Less => SELECTIVITY_RANGE,
|
||||
ast::Operator::LessEquals => SELECTIVITY_RANGE,
|
||||
_ => SELECTIVITY_OTHER,
|
||||
})
|
||||
.filter(|c| lhs_mask.contains_all(&c.lhs_mask))
|
||||
.map(|c| c.selectivity)
|
||||
.product::<f64>();
|
||||
|
||||
// Produce a number of rows estimated to be returned when this table is filtered by the WHERE clause.
|
||||
@@ -180,9 +106,8 @@ pub struct BestJoinOrderResult {
|
||||
/// Returns the best [JoinN] if one exists, otherwise returns None.
|
||||
pub fn compute_best_join_order<'a>(
|
||||
table_references: &[TableReference],
|
||||
where_clause: &Vec<WhereTerm>,
|
||||
maybe_order_target: Option<&OrderTarget>,
|
||||
constraints: &'a [Constraints],
|
||||
constraints: &'a [TableConstraints],
|
||||
access_methods_arena: &'a RefCell<Vec<AccessMethod<'a>>>,
|
||||
) -> Result<Option<BestJoinOrderResult>> {
|
||||
// Skip work if we have no tables to consider.
|
||||
@@ -195,7 +120,6 @@ pub fn compute_best_join_order<'a>(
|
||||
// Compute naive left-to-right plan to use as pruning threshold
|
||||
let naive_plan = compute_naive_left_deep_plan(
|
||||
table_references,
|
||||
where_clause,
|
||||
maybe_order_target,
|
||||
access_methods_arena,
|
||||
&constraints,
|
||||
@@ -265,8 +189,7 @@ pub fn compute_best_join_order<'a>(
|
||||
None,
|
||||
i,
|
||||
table_ref,
|
||||
where_clause,
|
||||
&constraints,
|
||||
&constraints[i],
|
||||
&join_order,
|
||||
maybe_order_target,
|
||||
access_methods_arena,
|
||||
@@ -381,8 +304,7 @@ pub fn compute_best_join_order<'a>(
|
||||
Some(lhs),
|
||||
rhs_idx,
|
||||
&table_references[rhs_idx],
|
||||
where_clause,
|
||||
&constraints,
|
||||
&constraints[rhs_idx],
|
||||
&join_order,
|
||||
maybe_order_target,
|
||||
access_methods_arena,
|
||||
@@ -464,10 +386,9 @@ pub fn compute_best_join_order<'a>(
|
||||
/// permutations if they exceed this cost during enumeration.
|
||||
pub fn compute_naive_left_deep_plan<'a>(
|
||||
table_references: &[TableReference],
|
||||
where_clause: &Vec<WhereTerm>,
|
||||
maybe_order_target: Option<&OrderTarget>,
|
||||
access_methods_arena: &'a RefCell<Vec<AccessMethod<'a>>>,
|
||||
constraints: &'a [Constraints],
|
||||
constraints: &'a [TableConstraints],
|
||||
) -> Result<JoinN> {
|
||||
let n = table_references.len();
|
||||
assert!(n > 0);
|
||||
@@ -486,8 +407,7 @@ pub fn compute_naive_left_deep_plan<'a>(
|
||||
None,
|
||||
0,
|
||||
&table_references[0],
|
||||
where_clause,
|
||||
constraints,
|
||||
&constraints[0],
|
||||
&join_order[..1],
|
||||
maybe_order_target,
|
||||
access_methods_arena,
|
||||
@@ -499,8 +419,7 @@ pub fn compute_naive_left_deep_plan<'a>(
|
||||
Some(&best_plan),
|
||||
i,
|
||||
&table_references[i],
|
||||
where_clause,
|
||||
constraints,
|
||||
&constraints[i],
|
||||
&join_order[..i + 1],
|
||||
maybe_order_target,
|
||||
access_methods_arena,
|
||||
@@ -561,7 +480,7 @@ fn generate_join_bitmasks(table_number_max_exclusive: usize, how_many: usize) ->
|
||||
mod tests {
|
||||
use std::{rc::Rc, sync::Arc};
|
||||
|
||||
use limbo_sqlite3_parser::ast::{Expr, Operator, SortOrder};
|
||||
use limbo_sqlite3_parser::ast::{self, Expr, Operator, SortOrder};
|
||||
|
||||
use super::*;
|
||||
use crate::{
|
||||
@@ -571,7 +490,7 @@ mod tests {
|
||||
access_method::AccessMethodKind,
|
||||
constraints::{constraints_from_where_clause, BinaryExprSide},
|
||||
},
|
||||
plan::{ColumnUsedMask, IterationDirection, JoinInfo, Operation},
|
||||
plan::{ColumnUsedMask, IterationDirection, JoinInfo, Operation, WhereTerm},
|
||||
planner::TableMask,
|
||||
},
|
||||
};
|
||||
@@ -595,15 +514,14 @@ mod tests {
|
||||
let where_clause = vec![];
|
||||
|
||||
let access_methods_arena = RefCell::new(Vec::new());
|
||||
let constraints =
|
||||
let table_constraints =
|
||||
constraints_from_where_clause(&where_clause, &table_references, &available_indexes)
|
||||
.unwrap();
|
||||
|
||||
let result = compute_best_join_order(
|
||||
&table_references,
|
||||
&where_clause,
|
||||
None,
|
||||
&constraints,
|
||||
&table_constraints,
|
||||
&access_methods_arena,
|
||||
)
|
||||
.unwrap();
|
||||
@@ -619,7 +537,7 @@ mod tests {
|
||||
let where_clause = vec![];
|
||||
|
||||
let access_methods_arena = RefCell::new(Vec::new());
|
||||
let constraints =
|
||||
let table_constraints =
|
||||
constraints_from_where_clause(&where_clause, &table_references, &available_indexes)
|
||||
.unwrap();
|
||||
|
||||
@@ -627,9 +545,8 @@ mod tests {
|
||||
// expecting best_best_plan() not to do any work due to empty where clause.
|
||||
let BestJoinOrderResult { best_plan, .. } = compute_best_join_order(
|
||||
&table_references,
|
||||
&where_clause,
|
||||
None,
|
||||
&constraints,
|
||||
&table_constraints,
|
||||
&access_methods_arena,
|
||||
)
|
||||
.unwrap()
|
||||
@@ -656,7 +573,7 @@ mod tests {
|
||||
|
||||
let access_methods_arena = RefCell::new(Vec::new());
|
||||
let available_indexes = HashMap::new();
|
||||
let constraints =
|
||||
let table_constraints =
|
||||
constraints_from_where_clause(&where_clause, &table_references, &available_indexes)
|
||||
.unwrap();
|
||||
|
||||
@@ -664,9 +581,8 @@ mod tests {
|
||||
// expecting a RowidEq access method because id is a rowid alias.
|
||||
let result = compute_best_join_order(
|
||||
&table_references,
|
||||
&where_clause,
|
||||
None,
|
||||
&constraints,
|
||||
&table_constraints,
|
||||
&access_methods_arena,
|
||||
)
|
||||
.unwrap();
|
||||
@@ -679,9 +595,9 @@ mod tests {
|
||||
AccessMethodKind::Search {
|
||||
index: None,
|
||||
iter_dir,
|
||||
constraints,
|
||||
constraint_refs,
|
||||
}
|
||||
if *iter_dir == IterationDirection::Forwards && constraints.len() == 1 && constraints[0].where_clause_pos == (0, BinaryExprSide::Rhs),
|
||||
if *iter_dir == IterationDirection::Forwards && constraint_refs.len() == 1 && table_constraints[0].constraints[constraint_refs[0].constraint_vec_pos].where_clause_pos == (0, BinaryExprSide::Rhs),
|
||||
),
|
||||
"expected rowid eq access method, got {:?}",
|
||||
access_methods_arena.borrow()[best_plan.best_access_methods[0]].kind
|
||||
@@ -719,16 +635,15 @@ mod tests {
|
||||
});
|
||||
available_indexes.insert("test_table".to_string(), vec![index]);
|
||||
|
||||
let constraints =
|
||||
let table_constraints =
|
||||
constraints_from_where_clause(&where_clause, &table_references, &available_indexes)
|
||||
.unwrap();
|
||||
// SELECT * FROM test_table WHERE id = 42
|
||||
// expecting an IndexScan access method because id is a primary key with an index
|
||||
let result = compute_best_join_order(
|
||||
&table_references,
|
||||
&where_clause,
|
||||
None,
|
||||
&constraints,
|
||||
&table_constraints,
|
||||
&access_methods_arena,
|
||||
)
|
||||
.unwrap();
|
||||
@@ -741,9 +656,9 @@ mod tests {
|
||||
AccessMethodKind::Search {
|
||||
index: Some(index),
|
||||
iter_dir,
|
||||
constraints,
|
||||
constraint_refs,
|
||||
}
|
||||
if *iter_dir == IterationDirection::Forwards && constraints.len() == 1 && constraints[0].lhs_mask.is_empty() && index.name == "sqlite_autoindex_test_table_1"
|
||||
if *iter_dir == IterationDirection::Forwards && constraint_refs.len() == 1 && table_constraints[0].constraints[constraint_refs[0].constraint_vec_pos].lhs_mask.is_empty() && index.name == "sqlite_autoindex_test_table_1"
|
||||
),
|
||||
"expected index search access method, got {:?}",
|
||||
access_methods_arena.borrow()[best_plan.best_access_methods[0]].kind
|
||||
@@ -767,6 +682,9 @@ mod tests {
|
||||
),
|
||||
];
|
||||
|
||||
const TABLE1: usize = 0;
|
||||
const TABLE2: usize = 1;
|
||||
|
||||
let mut available_indexes = HashMap::new();
|
||||
// Index on the outer table (table1)
|
||||
let index1 = Arc::new(Index {
|
||||
@@ -786,21 +704,20 @@ mod tests {
|
||||
// SELECT * FROM table1 JOIN table2 WHERE table1.id = table2.id
|
||||
// expecting table2 to be chosen first due to the index on table1.id
|
||||
let where_clause = vec![_create_binary_expr(
|
||||
_create_column_expr(0, 0, false), // table1.id
|
||||
_create_column_expr(TABLE1, 0, false), // table1.id
|
||||
ast::Operator::Equals,
|
||||
_create_column_expr(1, 0, false), // table2.id
|
||||
_create_column_expr(TABLE2, 0, false), // table2.id
|
||||
)];
|
||||
|
||||
let access_methods_arena = RefCell::new(Vec::new());
|
||||
let constraints =
|
||||
let table_constraints =
|
||||
constraints_from_where_clause(&where_clause, &table_references, &available_indexes)
|
||||
.unwrap();
|
||||
|
||||
let result = compute_best_join_order(
|
||||
&mut table_references,
|
||||
&where_clause,
|
||||
None,
|
||||
&constraints,
|
||||
&table_constraints,
|
||||
&access_methods_arena,
|
||||
)
|
||||
.unwrap();
|
||||
@@ -822,9 +739,9 @@ mod tests {
|
||||
AccessMethodKind::Search {
|
||||
index: Some(index),
|
||||
iter_dir,
|
||||
constraints,
|
||||
constraint_refs,
|
||||
}
|
||||
if *iter_dir == IterationDirection::Forwards && constraints.len() == 1 && constraints[0].where_clause_pos == (0, BinaryExprSide::Rhs) && index.name == "index1",
|
||||
if *iter_dir == IterationDirection::Forwards && constraint_refs.len() == 1 && table_constraints[TABLE1].constraints[constraint_refs[0].constraint_vec_pos].where_clause_pos == (0, BinaryExprSide::Rhs) && index.name == "index1",
|
||||
),
|
||||
"expected Search access method, got {:?}",
|
||||
access_methods_arena.borrow()[best_plan.best_access_methods[1]].kind
|
||||
@@ -960,15 +877,14 @@ mod tests {
|
||||
];
|
||||
|
||||
let access_methods_arena = RefCell::new(Vec::new());
|
||||
let constraints =
|
||||
let table_constraints =
|
||||
constraints_from_where_clause(&where_clause, &table_references, &available_indexes)
|
||||
.unwrap();
|
||||
|
||||
let result = compute_best_join_order(
|
||||
&table_references,
|
||||
&where_clause,
|
||||
None,
|
||||
&constraints,
|
||||
&table_constraints,
|
||||
&access_methods_arena,
|
||||
)
|
||||
.unwrap();
|
||||
@@ -981,46 +897,98 @@ mod tests {
|
||||
vec![TABLE_NO_CUSTOMERS, TABLE_NO_ORDERS, TABLE_NO_ORDER_ITEMS]
|
||||
);
|
||||
|
||||
let AccessMethodKind::Search {
|
||||
index: Some(index),
|
||||
iter_dir,
|
||||
constraint_refs,
|
||||
} = &access_methods_arena.borrow()[best_plan.best_access_methods[0]].kind
|
||||
else {
|
||||
panic!("expected Search access method with index for first table");
|
||||
};
|
||||
|
||||
assert_eq!(
|
||||
index.name, "sqlite_autoindex_customers_1",
|
||||
"wrong index name"
|
||||
);
|
||||
assert_eq!(
|
||||
*iter_dir,
|
||||
IterationDirection::Forwards,
|
||||
"wrong iteration direction"
|
||||
);
|
||||
assert_eq!(
|
||||
constraint_refs.len(),
|
||||
1,
|
||||
"wrong number of constraint references"
|
||||
);
|
||||
assert!(
|
||||
matches!(
|
||||
&access_methods_arena.borrow()[best_plan.best_access_methods[0]].kind,
|
||||
AccessMethodKind::Search {
|
||||
index: Some(index),
|
||||
iter_dir,
|
||||
constraints,
|
||||
}
|
||||
if *iter_dir == IterationDirection::Forwards && constraints.len() == 1 && constraints[0].lhs_mask.is_empty() && index.name == "sqlite_autoindex_customers_1",
|
||||
),
|
||||
"expected Search access method, got {:?}",
|
||||
access_methods_arena.borrow()[best_plan.best_access_methods[0]].kind
|
||||
table_constraints[TABLE_NO_CUSTOMERS].constraints
|
||||
[constraint_refs[0].constraint_vec_pos]
|
||||
.lhs_mask
|
||||
.is_empty(),
|
||||
"wrong lhs mask: {:?}",
|
||||
table_constraints[TABLE_NO_CUSTOMERS].constraints
|
||||
[constraint_refs[0].constraint_vec_pos]
|
||||
.lhs_mask
|
||||
);
|
||||
|
||||
let AccessMethodKind::Search {
|
||||
index: Some(index),
|
||||
iter_dir,
|
||||
constraint_refs,
|
||||
} = &access_methods_arena.borrow()[best_plan.best_access_methods[1]].kind
|
||||
else {
|
||||
panic!("expected Search access method with index for second table");
|
||||
};
|
||||
|
||||
assert_eq!(index.name, "orders_customer_id_idx", "wrong index name");
|
||||
assert_eq!(
|
||||
*iter_dir,
|
||||
IterationDirection::Forwards,
|
||||
"wrong iteration direction"
|
||||
);
|
||||
assert_eq!(
|
||||
constraint_refs.len(),
|
||||
1,
|
||||
"wrong number of constraint references"
|
||||
);
|
||||
assert!(
|
||||
matches!(
|
||||
&access_methods_arena.borrow()[best_plan.best_access_methods[1]].kind,
|
||||
AccessMethodKind::Search {
|
||||
index: Some(index),
|
||||
iter_dir,
|
||||
constraints,
|
||||
}
|
||||
if *iter_dir == IterationDirection::Forwards && constraints.len() == 1 && constraints[0].lhs_mask.contains_table(TABLE_NO_CUSTOMERS) && index.name == "orders_customer_id_idx",
|
||||
),
|
||||
"expected Search access method, got {:?}",
|
||||
access_methods_arena.borrow()[best_plan.best_access_methods[1]].kind
|
||||
table_constraints[TABLE_NO_ORDERS].constraints[constraint_refs[0].constraint_vec_pos]
|
||||
.lhs_mask
|
||||
.contains_table(TABLE_NO_CUSTOMERS),
|
||||
"wrong lhs mask: {:?}",
|
||||
table_constraints[TABLE_NO_ORDERS].constraints[constraint_refs[0].constraint_vec_pos]
|
||||
.lhs_mask
|
||||
);
|
||||
|
||||
let AccessMethodKind::Search {
|
||||
index: Some(index),
|
||||
iter_dir,
|
||||
constraint_refs,
|
||||
} = &access_methods_arena.borrow()[best_plan.best_access_methods[2]].kind
|
||||
else {
|
||||
panic!("expected Search access method with index for third table");
|
||||
};
|
||||
|
||||
assert_eq!(index.name, "order_items_order_id_idx", "wrong index name");
|
||||
assert_eq!(
|
||||
*iter_dir,
|
||||
IterationDirection::Forwards,
|
||||
"wrong iteration direction"
|
||||
);
|
||||
assert_eq!(
|
||||
constraint_refs.len(),
|
||||
1,
|
||||
"wrong number of constraint references"
|
||||
);
|
||||
assert!(
|
||||
matches!(
|
||||
&access_methods_arena.borrow()[best_plan.best_access_methods[2]].kind,
|
||||
AccessMethodKind::Search {
|
||||
index: Some(index),
|
||||
iter_dir,
|
||||
constraints,
|
||||
}
|
||||
if *iter_dir == IterationDirection::Forwards && constraints.len() == 1 && constraints[0].lhs_mask.contains_table(TABLE_NO_ORDERS) && index.name == "order_items_order_id_idx",
|
||||
),
|
||||
"expected Search access method, got {:?}",
|
||||
access_methods_arena.borrow()[best_plan.best_access_methods[2]].kind
|
||||
table_constraints[TABLE_NO_ORDER_ITEMS].constraints
|
||||
[constraint_refs[0].constraint_vec_pos]
|
||||
.lhs_mask
|
||||
.contains_table(TABLE_NO_ORDERS),
|
||||
"wrong lhs mask: {:?}",
|
||||
table_constraints[TABLE_NO_ORDER_ITEMS].constraints
|
||||
[constraint_refs[0].constraint_vec_pos]
|
||||
.lhs_mask
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1081,15 +1049,14 @@ mod tests {
|
||||
|
||||
let available_indexes = HashMap::new();
|
||||
let access_methods_arena = RefCell::new(Vec::new());
|
||||
let constraints =
|
||||
let table_constraints =
|
||||
constraints_from_where_clause(&where_clause, &table_references, &available_indexes)
|
||||
.unwrap();
|
||||
|
||||
let BestJoinOrderResult { best_plan, .. } = compute_best_join_order(
|
||||
&mut table_references,
|
||||
&where_clause,
|
||||
None,
|
||||
&constraints,
|
||||
&table_constraints,
|
||||
&access_methods_arena,
|
||||
)
|
||||
.unwrap()
|
||||
@@ -1181,15 +1148,14 @@ mod tests {
|
||||
|
||||
let access_methods_arena = RefCell::new(Vec::new());
|
||||
let available_indexes = HashMap::new();
|
||||
let constraints =
|
||||
let table_constraints =
|
||||
constraints_from_where_clause(&where_clause, &table_references, &available_indexes)
|
||||
.unwrap();
|
||||
|
||||
let result = compute_best_join_order(
|
||||
&table_references,
|
||||
&where_clause,
|
||||
None,
|
||||
&constraints,
|
||||
&table_constraints,
|
||||
&access_methods_arena,
|
||||
)
|
||||
.unwrap();
|
||||
@@ -1214,20 +1180,33 @@ mod tests {
|
||||
"First table (fact) should use table scan due to column filter"
|
||||
);
|
||||
|
||||
for i in 1..best_plan.table_numbers.len() {
|
||||
for (i, table_number) in best_plan.table_numbers.iter().enumerate().skip(1) {
|
||||
let AccessMethodKind::Search {
|
||||
index: None,
|
||||
iter_dir,
|
||||
constraint_refs,
|
||||
} = &access_methods_arena.borrow()[best_plan.best_access_methods[i]].kind
|
||||
else {
|
||||
panic!("expected Search access method for table {}", table_number);
|
||||
};
|
||||
|
||||
assert_eq!(
|
||||
*iter_dir,
|
||||
IterationDirection::Forwards,
|
||||
"wrong iteration direction"
|
||||
);
|
||||
assert_eq!(
|
||||
constraint_refs.len(),
|
||||
1,
|
||||
"wrong number of constraint references"
|
||||
);
|
||||
assert!(
|
||||
matches!(
|
||||
&access_methods_arena.borrow()[best_plan.best_access_methods[i]].kind,
|
||||
AccessMethodKind::Search {
|
||||
index: None,
|
||||
iter_dir,
|
||||
constraints,
|
||||
}
|
||||
if *iter_dir == IterationDirection::Forwards && constraints.len() == 1 && constraints[0].lhs_mask.contains_table(FACT_TABLE_IDX)
|
||||
),
|
||||
"Table {} should use Search access method, got {:?}",
|
||||
i + 1,
|
||||
&access_methods_arena.borrow()[best_plan.best_access_methods[i]].kind
|
||||
table_constraints[*table_number].constraints[constraint_refs[0].constraint_vec_pos]
|
||||
.lhs_mask
|
||||
.contains_table(FACT_TABLE_IDX),
|
||||
"wrong lhs mask: {:?}",
|
||||
table_constraints[*table_number].constraints[constraint_refs[0].constraint_vec_pos]
|
||||
.lhs_mask
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1268,16 +1247,15 @@ mod tests {
|
||||
}
|
||||
|
||||
let access_methods_arena = RefCell::new(Vec::new());
|
||||
let constraints =
|
||||
let table_constraints =
|
||||
constraints_from_where_clause(&where_clause, &table_references, &available_indexes)
|
||||
.unwrap();
|
||||
|
||||
// Run the optimizer
|
||||
let BestJoinOrderResult { best_plan, .. } = compute_best_join_order(
|
||||
&table_references,
|
||||
&where_clause,
|
||||
None,
|
||||
&constraints,
|
||||
&table_constraints,
|
||||
&access_methods_arena,
|
||||
)
|
||||
.unwrap()
|
||||
@@ -1312,9 +1290,9 @@ mod tests {
|
||||
AccessMethodKind::Search {
|
||||
index: None,
|
||||
iter_dir,
|
||||
constraints,
|
||||
constraint_refs,
|
||||
}
|
||||
if *iter_dir == IterationDirection::Forwards && constraints.len() == 1 && constraints[0].lhs_mask.contains_table(i-1)
|
||||
if *iter_dir == IterationDirection::Forwards && constraint_refs.len() == 1 && table_constraints[i].constraints[constraint_refs[0].constraint_vec_pos].lhs_mask.contains_table(i-1)
|
||||
),
|
||||
"Table {} should use Search access method, got {:?}",
|
||||
i + 1,
|
||||
|
||||
@@ -3,7 +3,7 @@ use std::{cell::RefCell, cmp::Ordering, collections::HashMap, sync::Arc};
|
||||
use access_method::AccessMethodKind;
|
||||
use constraints::{
|
||||
constraints_from_where_clause, usable_constraints_for_join_order, BinaryExprSide, Constraint,
|
||||
ConstraintLookup,
|
||||
ConstraintRef,
|
||||
};
|
||||
use cost::Cost;
|
||||
use join::{compute_best_join_order, BestJoinOrderResult};
|
||||
@@ -128,13 +128,12 @@ fn use_indexes(
|
||||
) -> Result<Option<Vec<JoinOrderMember>>> {
|
||||
let access_methods_arena = RefCell::new(Vec::new());
|
||||
let maybe_order_target = compute_order_target(order_by, group_by.as_mut());
|
||||
let constraints =
|
||||
let constraints_per_table =
|
||||
constraints_from_where_clause(where_clause, table_references, available_indexes)?;
|
||||
let Some(best_join_order_result) = compute_best_join_order(
|
||||
table_references,
|
||||
where_clause,
|
||||
maybe_order_target.as_ref(),
|
||||
&constraints,
|
||||
&constraints_per_table,
|
||||
&access_methods_arena,
|
||||
)?
|
||||
else {
|
||||
@@ -222,29 +221,38 @@ fn use_indexes(
|
||||
Operation::Scan { iter_dir, index }
|
||||
} else {
|
||||
// Try to construct ephemeral index since it's going to be better than a scan for non-outermost tables.
|
||||
let unindexable_constraints = constraints.iter().find(|c| {
|
||||
c.table_no == table_number
|
||||
&& matches!(c.lookup, ConstraintLookup::EphemeralIndex)
|
||||
});
|
||||
if let Some(unindexable) = unindexable_constraints {
|
||||
let usable_constraints = usable_constraints_for_join_order(
|
||||
&unindexable.constraints,
|
||||
let table_constraints = constraints_per_table
|
||||
.iter()
|
||||
.find(|c| c.table_no == table_number);
|
||||
if let Some(table_constraints) = table_constraints {
|
||||
let temp_constraint_refs = (0..table_constraints.constraints.len())
|
||||
.map(|i| ConstraintRef {
|
||||
constraint_vec_pos: i,
|
||||
index_col_pos: table_constraints.constraints[i].table_col_pos,
|
||||
sort_order: SortOrder::Asc,
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
let usable_constraint_refs = usable_constraints_for_join_order(
|
||||
&table_constraints.constraints,
|
||||
&temp_constraint_refs,
|
||||
table_number,
|
||||
&best_join_order[..=i],
|
||||
);
|
||||
if usable_constraints.is_empty() {
|
||||
if usable_constraint_refs.is_empty() {
|
||||
Operation::Scan { iter_dir, index }
|
||||
} else {
|
||||
let ephemeral_index = ephemeral_index_build(
|
||||
&table_references[table_number],
|
||||
table_number,
|
||||
&usable_constraints,
|
||||
&table_constraints.constraints,
|
||||
&usable_constraint_refs,
|
||||
);
|
||||
let ephemeral_index = Arc::new(ephemeral_index);
|
||||
Operation::Search(Search::Seek {
|
||||
index: Some(ephemeral_index),
|
||||
seek_def: build_seek_def_from_constraints(
|
||||
usable_constraints,
|
||||
&table_constraints.constraints,
|
||||
&usable_constraint_refs,
|
||||
iter_dir,
|
||||
where_clause,
|
||||
)?,
|
||||
@@ -257,32 +265,37 @@ fn use_indexes(
|
||||
}
|
||||
AccessMethodKind::Search {
|
||||
index,
|
||||
constraints,
|
||||
constraint_refs,
|
||||
iter_dir,
|
||||
} => {
|
||||
assert!(!constraints.is_empty());
|
||||
for constraint in constraints.iter() {
|
||||
assert!(!constraint_refs.is_empty());
|
||||
for uref in constraint_refs.iter() {
|
||||
let constraint =
|
||||
&constraints_per_table[table_number].constraints[uref.constraint_vec_pos];
|
||||
to_remove_from_where_clause.push(constraint.where_clause_pos.0);
|
||||
}
|
||||
if let Some(index) = index {
|
||||
Operation::Search(Search::Seek {
|
||||
index: Some(index),
|
||||
seek_def: build_seek_def_from_constraints(
|
||||
constraints,
|
||||
&constraints_per_table[table_number].constraints,
|
||||
&constraint_refs,
|
||||
iter_dir,
|
||||
where_clause,
|
||||
)?,
|
||||
})
|
||||
} else {
|
||||
assert!(
|
||||
constraints.len() == 1,
|
||||
constraint_refs.len() == 1,
|
||||
"expected exactly one constraint for rowid seek, got {:?}",
|
||||
constraints
|
||||
constraint_refs
|
||||
);
|
||||
match constraints[0].operator {
|
||||
let constraint = &constraints_per_table[table_number].constraints
|
||||
[constraint_refs[0].constraint_vec_pos];
|
||||
match constraint.operator {
|
||||
ast::Operator::Equals => Operation::Search(Search::RowidEq {
|
||||
cmp_expr: {
|
||||
let (idx, side) = constraints[0].where_clause_pos;
|
||||
let (idx, side) = constraint.where_clause_pos;
|
||||
let ast::Expr::Binary(lhs, _, rhs) =
|
||||
unwrap_parens(&where_clause[idx].expr)?
|
||||
else {
|
||||
@@ -301,7 +314,8 @@ fn use_indexes(
|
||||
_ => Operation::Search(Search::Seek {
|
||||
index: None,
|
||||
seek_def: build_seek_def_from_constraints(
|
||||
constraints,
|
||||
&constraints_per_table[table_number].constraints,
|
||||
&constraint_refs,
|
||||
iter_dir,
|
||||
where_clause,
|
||||
)?,
|
||||
@@ -726,6 +740,7 @@ fn ephemeral_index_build(
|
||||
table_reference: &TableReference,
|
||||
table_index: usize,
|
||||
constraints: &[Constraint],
|
||||
constraint_refs: &[ConstraintRef],
|
||||
) -> Index {
|
||||
let mut ephemeral_columns: Vec<IndexColumn> = table_reference
|
||||
.columns()
|
||||
@@ -741,14 +756,14 @@ fn ephemeral_index_build(
|
||||
.collect();
|
||||
// sort so that constraints first, then rest in whatever order they were in in the table
|
||||
ephemeral_columns.sort_by(|a, b| {
|
||||
let a_constraint = constraints
|
||||
let a_constraint = constraint_refs
|
||||
.iter()
|
||||
.enumerate()
|
||||
.find(|(_, c)| c.table_col_pos == a.pos_in_table);
|
||||
let b_constraint = constraints
|
||||
.find(|(_, c)| constraints[c.constraint_vec_pos].table_col_pos == a.pos_in_table);
|
||||
let b_constraint = constraint_refs
|
||||
.iter()
|
||||
.enumerate()
|
||||
.find(|(_, c)| c.table_col_pos == b.pos_in_table);
|
||||
.find(|(_, c)| constraints[c.constraint_vec_pos].table_col_pos == b.pos_in_table);
|
||||
match (a_constraint, b_constraint) {
|
||||
(Some(_), None) => Ordering::Less,
|
||||
(None, Some(_)) => Ordering::Greater,
|
||||
@@ -775,20 +790,22 @@ fn ephemeral_index_build(
|
||||
/// Build a [SeekDef] for a given list of [Constraint]s
|
||||
pub fn build_seek_def_from_constraints(
|
||||
constraints: &[Constraint],
|
||||
constraint_refs: &[ConstraintRef],
|
||||
iter_dir: IterationDirection,
|
||||
where_clause: &[WhereTerm],
|
||||
) -> Result<SeekDef> {
|
||||
assert!(
|
||||
!constraints.is_empty(),
|
||||
"cannot build seek def from empty list of constraints"
|
||||
!constraint_refs.is_empty(),
|
||||
"cannot build seek def from empty list of constraint refs"
|
||||
);
|
||||
// Extract the key values and operators
|
||||
let mut key = Vec::with_capacity(constraints.len());
|
||||
let mut key = Vec::with_capacity(constraint_refs.len());
|
||||
|
||||
for constraint in constraints {
|
||||
for uref in constraint_refs {
|
||||
// Extract the other expression from the binary WhereTerm (i.e. the one being compared to the index column)
|
||||
let (idx, side) = constraint.where_clause_pos;
|
||||
let where_term = &where_clause[idx];
|
||||
let constraint = &constraints[uref.constraint_vec_pos];
|
||||
let (where_idx, side) = constraint.where_clause_pos;
|
||||
let where_term = &where_clause[where_idx];
|
||||
let ast::Expr::Binary(lhs, _, rhs) = unwrap_parens(where_term.expr.clone())? else {
|
||||
crate::bail_parse_error!("expected binary expression");
|
||||
};
|
||||
@@ -797,12 +814,12 @@ pub fn build_seek_def_from_constraints(
|
||||
} else {
|
||||
*rhs
|
||||
};
|
||||
key.push((cmp_expr, constraint.sort_order));
|
||||
key.push((cmp_expr, uref.sort_order));
|
||||
}
|
||||
|
||||
// We know all but potentially the last term is an equality, so we can use the operator of the last term
|
||||
// to form the SeekOp
|
||||
let op = constraints.last().unwrap().operator;
|
||||
let op = constraints[constraint_refs.last().unwrap().constraint_vec_pos].operator;
|
||||
|
||||
let seek_def = build_seek_def(op, iter_dir, key)?;
|
||||
Ok(seek_def)
|
||||
|
||||
Reference in New Issue
Block a user