mirror of
https://github.com/aljazceru/turso.git
synced 2026-02-23 00:45:37 +01:00
optimizer module split
This commit is contained in:
@@ -2579,3 +2579,64 @@ pub fn maybe_apply_affinity(col_type: Type, target_register: usize, program: &mu
|
||||
pub fn sanitize_string(input: &str) -> String {
|
||||
input[1..input.len() - 1].replace("''", "'").to_string()
|
||||
}
|
||||
|
||||
pub fn as_binary_components(
|
||||
expr: &ast::Expr,
|
||||
) -> Result<Option<(&ast::Expr, ast::Operator, &ast::Expr)>> {
|
||||
match unwrap_parens(expr)? {
|
||||
ast::Expr::Binary(lhs, operator, rhs)
|
||||
if matches!(
|
||||
operator,
|
||||
ast::Operator::Equals
|
||||
| ast::Operator::Greater
|
||||
| ast::Operator::Less
|
||||
| ast::Operator::GreaterEquals
|
||||
| ast::Operator::LessEquals
|
||||
) =>
|
||||
{
|
||||
Ok(Some((lhs.as_ref(), *operator, rhs.as_ref())))
|
||||
}
|
||||
_ => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
/// Recursively unwrap parentheses from an expression
|
||||
/// e.g. (((t.x > 5))) -> t.x > 5
|
||||
pub fn unwrap_parens<T>(expr: T) -> Result<T>
|
||||
where
|
||||
T: UnwrapParens,
|
||||
{
|
||||
expr.unwrap_parens()
|
||||
}
|
||||
|
||||
pub trait UnwrapParens {
|
||||
fn unwrap_parens(self) -> Result<Self>
|
||||
where
|
||||
Self: Sized;
|
||||
}
|
||||
|
||||
impl UnwrapParens for &ast::Expr {
|
||||
fn unwrap_parens(self) -> Result<Self> {
|
||||
match self {
|
||||
ast::Expr::Column { .. } => Ok(self),
|
||||
ast::Expr::Parenthesized(exprs) => match exprs.len() {
|
||||
1 => unwrap_parens(exprs.first().unwrap()),
|
||||
_ => crate::bail_parse_error!("expected single expression in parentheses"),
|
||||
},
|
||||
_ => Ok(self),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl UnwrapParens for ast::Expr {
|
||||
fn unwrap_parens(self) -> Result<Self> {
|
||||
match self {
|
||||
ast::Expr::Column { .. } => Ok(self),
|
||||
ast::Expr::Parenthesized(mut exprs) => match exprs.len() {
|
||||
1 => unwrap_parens(exprs.pop().unwrap()),
|
||||
_ => crate::bail_parse_error!("expected single expression in parentheses"),
|
||||
},
|
||||
_ => Ok(self),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
229
core/translate/optimizer/access_method.rs
Normal file
229
core/translate/optimizer/access_method.rs
Normal file
@@ -0,0 +1,229 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use limbo_sqlite3_parser::ast::SortOrder;
|
||||
|
||||
use crate::{
|
||||
schema::Index,
|
||||
translate::plan::{IterationDirection, JoinOrderMember, TableReference},
|
||||
Result,
|
||||
};
|
||||
|
||||
use super::{
|
||||
constraints::{usable_constraints_for_join_order, Constraint, ConstraintLookup, Constraints},
|
||||
cost::{estimate_cost_for_scan_or_seek, Cost, IndexInfo},
|
||||
order::OrderTarget,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
/// Represents a way to access a table.
|
||||
pub struct AccessMethod<'a> {
|
||||
/// The estimated number of page fetches.
|
||||
/// We are ignoring CPU cost for now.
|
||||
pub cost: Cost,
|
||||
pub kind: AccessMethodKind<'a>,
|
||||
}
|
||||
|
||||
impl<'a> AccessMethod<'a> {
|
||||
pub fn set_iter_dir(&mut self, new_dir: IterationDirection) {
|
||||
match &mut self.kind {
|
||||
AccessMethodKind::Scan { iter_dir, .. } => *iter_dir = new_dir,
|
||||
AccessMethodKind::Search { iter_dir, .. } => *iter_dir = new_dir,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_constraints(&mut self, lookup: &ConstraintLookup, constraints: &'a [Constraint]) {
|
||||
let index = match lookup {
|
||||
ConstraintLookup::Index(index) => Some(index),
|
||||
ConstraintLookup::Rowid => None,
|
||||
ConstraintLookup::EphemeralIndex => panic!("set_constraints called with Lookup::None"),
|
||||
};
|
||||
match (&mut self.kind, constraints.is_empty()) {
|
||||
(
|
||||
AccessMethodKind::Search {
|
||||
constraints,
|
||||
index: i,
|
||||
..
|
||||
},
|
||||
false,
|
||||
) => {
|
||||
*constraints = constraints;
|
||||
*i = index.cloned();
|
||||
}
|
||||
(AccessMethodKind::Search { iter_dir, .. }, true) => {
|
||||
self.kind = AccessMethodKind::Scan {
|
||||
index: index.cloned(),
|
||||
iter_dir: *iter_dir,
|
||||
};
|
||||
}
|
||||
(AccessMethodKind::Scan { iter_dir, .. }, false) => {
|
||||
self.kind = AccessMethodKind::Search {
|
||||
index: index.cloned(),
|
||||
iter_dir: *iter_dir,
|
||||
constraints,
|
||||
};
|
||||
}
|
||||
(AccessMethodKind::Scan { index: i, .. }, true) => {
|
||||
*i = index.cloned();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
/// Represents the kind of access method.
|
||||
pub enum AccessMethodKind<'a> {
|
||||
/// A full scan, which can be an index scan or a table scan.
|
||||
Scan {
|
||||
index: Option<Arc<Index>>,
|
||||
iter_dir: IterationDirection,
|
||||
},
|
||||
/// A search, which can be an index seek or a rowid-based search.
|
||||
Search {
|
||||
index: Option<Arc<Index>>,
|
||||
iter_dir: IterationDirection,
|
||||
constraints: &'a [Constraint],
|
||||
},
|
||||
}
|
||||
|
||||
/// Return the best [AccessMethod] for a given join order.
|
||||
/// table_index and table_reference refer to the rightmost table in the join order.
|
||||
pub fn find_best_access_method_for_join_order<'a>(
|
||||
table_index: usize,
|
||||
table_reference: &TableReference,
|
||||
constraints: &'a [Constraints],
|
||||
join_order: &[JoinOrderMember],
|
||||
maybe_order_target: Option<&OrderTarget>,
|
||||
input_cardinality: f64,
|
||||
) -> Result<AccessMethod<'a>> {
|
||||
let cost_of_full_table_scan = estimate_cost_for_scan_or_seek(None, &[], input_cardinality);
|
||||
let mut best_access_method = AccessMethod {
|
||||
cost: cost_of_full_table_scan,
|
||||
kind: AccessMethodKind::Scan {
|
||||
index: None,
|
||||
iter_dir: IterationDirection::Forwards,
|
||||
},
|
||||
};
|
||||
let rowid_column_idx = table_reference
|
||||
.columns()
|
||||
.iter()
|
||||
.position(|c| c.is_rowid_alias);
|
||||
for csmap in constraints
|
||||
.iter()
|
||||
.filter(|csmap| csmap.table_no == table_index)
|
||||
{
|
||||
let index_info = match &csmap.lookup {
|
||||
ConstraintLookup::Index(index) => IndexInfo {
|
||||
unique: index.unique,
|
||||
covering: table_reference.index_is_covering(index),
|
||||
column_count: index.columns.len(),
|
||||
},
|
||||
ConstraintLookup::Rowid => IndexInfo {
|
||||
unique: true, // rowids are always unique
|
||||
covering: false,
|
||||
column_count: 1,
|
||||
},
|
||||
ConstraintLookup::EphemeralIndex => continue,
|
||||
};
|
||||
let usable_constraints =
|
||||
usable_constraints_for_join_order(&csmap.constraints, table_index, join_order);
|
||||
let cost = estimate_cost_for_scan_or_seek(
|
||||
Some(index_info),
|
||||
&usable_constraints,
|
||||
input_cardinality,
|
||||
);
|
||||
|
||||
let order_satisfiability_bonus = if let Some(order_target) = maybe_order_target {
|
||||
let mut all_same_direction = true;
|
||||
let mut all_opposite_direction = true;
|
||||
for i in 0..order_target.0.len().min(index_info.column_count) {
|
||||
let correct_table = order_target.0[i].table_no == table_index;
|
||||
let correct_column = {
|
||||
match &csmap.lookup {
|
||||
ConstraintLookup::Index(index) => {
|
||||
index.columns[i].pos_in_table == order_target.0[i].column_no
|
||||
}
|
||||
ConstraintLookup::Rowid => {
|
||||
rowid_column_idx.map_or(false, |idx| idx == order_target.0[i].column_no)
|
||||
}
|
||||
ConstraintLookup::EphemeralIndex => unreachable!(),
|
||||
}
|
||||
};
|
||||
if !correct_table || !correct_column {
|
||||
all_same_direction = false;
|
||||
all_opposite_direction = false;
|
||||
break;
|
||||
}
|
||||
let correct_order = {
|
||||
match &csmap.lookup {
|
||||
ConstraintLookup::Index(index) => {
|
||||
order_target.0[i].order == index.columns[i].order
|
||||
}
|
||||
ConstraintLookup::Rowid => order_target.0[i].order == SortOrder::Asc,
|
||||
ConstraintLookup::EphemeralIndex => unreachable!(),
|
||||
}
|
||||
};
|
||||
if correct_order {
|
||||
all_opposite_direction = false;
|
||||
} else {
|
||||
all_same_direction = false;
|
||||
}
|
||||
}
|
||||
if all_same_direction || all_opposite_direction {
|
||||
Cost(1.0)
|
||||
} else {
|
||||
Cost(0.0)
|
||||
}
|
||||
} else {
|
||||
Cost(0.0)
|
||||
};
|
||||
if cost < best_access_method.cost + order_satisfiability_bonus {
|
||||
best_access_method.cost = cost;
|
||||
best_access_method.set_constraints(&csmap.lookup, &usable_constraints);
|
||||
}
|
||||
}
|
||||
|
||||
let iter_dir = if let Some(order_target) = maybe_order_target {
|
||||
// if index columns match the order target columns in the exact reverse directions, then we should use IterationDirection::Backwards
|
||||
let index = match &best_access_method.kind {
|
||||
AccessMethodKind::Scan { index, .. } => index.as_ref(),
|
||||
AccessMethodKind::Search { index, .. } => index.as_ref(),
|
||||
};
|
||||
let mut should_use_backwards = true;
|
||||
let num_cols = index.map_or(1, |i| i.columns.len());
|
||||
for i in 0..order_target.0.len().min(num_cols) {
|
||||
let correct_table = order_target.0[i].table_no == table_index;
|
||||
let correct_column = {
|
||||
match index {
|
||||
Some(index) => index.columns[i].pos_in_table == order_target.0[i].column_no,
|
||||
None => {
|
||||
rowid_column_idx.map_or(false, |idx| idx == order_target.0[i].column_no)
|
||||
}
|
||||
}
|
||||
};
|
||||
if !correct_table || !correct_column {
|
||||
should_use_backwards = false;
|
||||
break;
|
||||
}
|
||||
let correct_order = {
|
||||
match index {
|
||||
Some(index) => order_target.0[i].order == index.columns[i].order,
|
||||
None => order_target.0[i].order == SortOrder::Asc,
|
||||
}
|
||||
};
|
||||
if correct_order {
|
||||
should_use_backwards = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if should_use_backwards {
|
||||
IterationDirection::Backwards
|
||||
} else {
|
||||
IterationDirection::Forwards
|
||||
}
|
||||
} else {
|
||||
IterationDirection::Forwards
|
||||
};
|
||||
best_access_method.set_iter_dir(iter_dir);
|
||||
|
||||
Ok(best_access_method)
|
||||
}
|
||||
382
core/translate/optimizer/constraints.rs
Normal file
382
core/translate/optimizer/constraints.rs
Normal file
@@ -0,0 +1,382 @@
|
||||
use std::{cmp::Ordering, collections::HashMap, sync::Arc};
|
||||
|
||||
use crate::{
|
||||
schema::Index,
|
||||
translate::{
|
||||
expr::{as_binary_components, unwrap_parens},
|
||||
plan::{JoinOrderMember, TableReference, WhereTerm},
|
||||
planner::{table_mask_from_expr, TableMask},
|
||||
},
|
||||
Result,
|
||||
};
|
||||
use limbo_sqlite3_parser::ast::{self, SortOrder};
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Constraint {
|
||||
/// The position of the constraint in the WHERE clause, e.g. in SELECT * FROM t WHERE true AND t.x = 10, the position is (1, BinaryExprSide::Rhs),
|
||||
/// since the RHS '10' is the constraining expression and it's part of the second term in the WHERE clause.
|
||||
pub where_clause_pos: (usize, BinaryExprSide),
|
||||
/// The operator of the constraint, e.g. =, >, <
|
||||
pub operator: ast::Operator,
|
||||
/// The position of the index column in the index, e.g. if the index is (a,b,c) and the constraint is on b, then index_column_pos is 1.
|
||||
/// For Rowid constraints this is always 0.
|
||||
pub index_col_pos: usize,
|
||||
/// The position of the constrained column in the table.
|
||||
pub table_col_pos: usize,
|
||||
/// The sort order of the index column, ASC or DESC. For Rowid constraints this is always ASC.
|
||||
pub sort_order: SortOrder,
|
||||
/// Bitmask of tables that are required to be on the left side of the constrained table,
|
||||
/// e.g. in SELECT * FROM t1,t2,t3 WHERE t1.x = t2.x + t3.x, the lhs_mask contains t2 and t3.
|
||||
pub lhs_mask: TableMask,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
/// Lookup denotes how a given set of [Constraint]s can be used to access a table.
|
||||
///
|
||||
/// Lookup::Index(index) means that the constraints can be used to access the table using the given index.
|
||||
/// Lookup::Rowid means that the constraints can be used to access the table using the table's rowid column.
|
||||
/// Lookup::EphemeralIndex means that the constraints are not useful for accessing the table,
|
||||
/// but an ephemeral index can be built ad-hoc to use them.
|
||||
pub enum ConstraintLookup {
|
||||
Index(Arc<Index>),
|
||||
Rowid,
|
||||
EphemeralIndex,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
/// A collection of [Constraint]s for a given (table, index) pair.
|
||||
pub struct Constraints {
|
||||
pub lookup: ConstraintLookup,
|
||||
pub table_no: usize,
|
||||
pub constraints: Vec<Constraint>,
|
||||
}
|
||||
|
||||
/// Helper enum for [Constraint] to indicate which side of a binary comparison expression is being compared to the index column.
|
||||
/// For example, if the where clause is "WHERE x = 10" and there's an index on x,
|
||||
/// the [Constraint] for the where clause term "x = 10" will have a [BinaryExprSide::Rhs]
|
||||
/// because the right hand side expression "10" is being compared to the index column "x".
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum BinaryExprSide {
|
||||
Lhs,
|
||||
Rhs,
|
||||
}
|
||||
|
||||
/// Precompute all potentially usable [Constraints] from a WHERE clause.
|
||||
/// The resulting list of [Constraints] is then used to evaluate the best access methods for various join orders.
|
||||
pub fn constraints_from_where_clause(
|
||||
where_clause: &[WhereTerm],
|
||||
table_references: &[TableReference],
|
||||
available_indexes: &HashMap<String, Vec<Arc<Index>>>,
|
||||
) -> Result<Vec<Constraints>> {
|
||||
let mut constraints = Vec::new();
|
||||
for (table_no, table_reference) in table_references.iter().enumerate() {
|
||||
let rowid_alias_column = table_reference
|
||||
.columns()
|
||||
.iter()
|
||||
.position(|c| c.is_rowid_alias);
|
||||
|
||||
let mut cs = Constraints {
|
||||
lookup: ConstraintLookup::Rowid,
|
||||
table_no,
|
||||
constraints: Vec::new(),
|
||||
};
|
||||
let mut cs_ephemeral = Constraints {
|
||||
lookup: ConstraintLookup::EphemeralIndex,
|
||||
table_no,
|
||||
constraints: Vec::new(),
|
||||
};
|
||||
for (i, term) in where_clause.iter().enumerate() {
|
||||
let Some((lhs, operator, rhs)) = as_binary_components(&term.expr)? else {
|
||||
continue;
|
||||
};
|
||||
if let Some(outer_join_tbl) = term.from_outer_join {
|
||||
if outer_join_tbl != table_no {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
match lhs {
|
||||
ast::Expr::Column { table, column, .. } => {
|
||||
if *table == table_no {
|
||||
if rowid_alias_column.map_or(false, |idx| *column == idx) {
|
||||
cs.constraints.push(Constraint {
|
||||
where_clause_pos: (i, BinaryExprSide::Rhs),
|
||||
operator,
|
||||
index_col_pos: 0,
|
||||
table_col_pos: rowid_alias_column.unwrap(),
|
||||
sort_order: SortOrder::Asc,
|
||||
lhs_mask: table_mask_from_expr(rhs)?,
|
||||
});
|
||||
} else {
|
||||
cs_ephemeral.constraints.push(Constraint {
|
||||
where_clause_pos: (i, BinaryExprSide::Rhs),
|
||||
operator,
|
||||
index_col_pos: 0,
|
||||
table_col_pos: *column,
|
||||
sort_order: SortOrder::Asc,
|
||||
lhs_mask: table_mask_from_expr(rhs)?,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
ast::Expr::RowId { table, .. } => {
|
||||
if *table == table_no && rowid_alias_column.is_some() {
|
||||
cs.constraints.push(Constraint {
|
||||
where_clause_pos: (i, BinaryExprSide::Rhs),
|
||||
operator,
|
||||
index_col_pos: 0,
|
||||
table_col_pos: rowid_alias_column.unwrap(),
|
||||
sort_order: SortOrder::Asc,
|
||||
lhs_mask: table_mask_from_expr(rhs)?,
|
||||
});
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
};
|
||||
match rhs {
|
||||
ast::Expr::Column { table, column, .. } => {
|
||||
if *table == table_no {
|
||||
if rowid_alias_column.map_or(false, |idx| *column == idx) {
|
||||
cs.constraints.push(Constraint {
|
||||
where_clause_pos: (i, BinaryExprSide::Lhs),
|
||||
operator: opposite_cmp_op(operator),
|
||||
index_col_pos: 0,
|
||||
table_col_pos: rowid_alias_column.unwrap(),
|
||||
sort_order: SortOrder::Asc,
|
||||
lhs_mask: table_mask_from_expr(lhs)?,
|
||||
});
|
||||
} else {
|
||||
cs_ephemeral.constraints.push(Constraint {
|
||||
where_clause_pos: (i, BinaryExprSide::Lhs),
|
||||
operator: opposite_cmp_op(operator),
|
||||
index_col_pos: 0,
|
||||
table_col_pos: *column,
|
||||
sort_order: SortOrder::Asc,
|
||||
lhs_mask: table_mask_from_expr(lhs)?,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
ast::Expr::RowId { table, .. } => {
|
||||
if *table == table_no && rowid_alias_column.is_some() {
|
||||
cs.constraints.push(Constraint {
|
||||
where_clause_pos: (i, BinaryExprSide::Lhs),
|
||||
operator: opposite_cmp_op(operator),
|
||||
index_col_pos: 0,
|
||||
table_col_pos: rowid_alias_column.unwrap(),
|
||||
sort_order: SortOrder::Asc,
|
||||
lhs_mask: table_mask_from_expr(lhs)?,
|
||||
});
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
};
|
||||
}
|
||||
// First sort by position, with equalities first within each position
|
||||
cs.constraints.sort_by(|a, b| {
|
||||
let pos_cmp = a.index_col_pos.cmp(&b.index_col_pos);
|
||||
if pos_cmp == Ordering::Equal {
|
||||
// If same position, sort equalities first
|
||||
if a.operator == ast::Operator::Equals {
|
||||
Ordering::Less
|
||||
} else if b.operator == ast::Operator::Equals {
|
||||
Ordering::Greater
|
||||
} else {
|
||||
Ordering::Equal
|
||||
}
|
||||
} else {
|
||||
pos_cmp
|
||||
}
|
||||
});
|
||||
cs_ephemeral.constraints.sort_by(|a, b| {
|
||||
if a.operator == ast::Operator::Equals {
|
||||
Ordering::Less
|
||||
} else if b.operator == ast::Operator::Equals {
|
||||
Ordering::Greater
|
||||
} else {
|
||||
Ordering::Equal
|
||||
}
|
||||
});
|
||||
|
||||
// Deduplicate by position, keeping first occurrence (which will be equality if one exists)
|
||||
cs.constraints.dedup_by_key(|c| c.index_col_pos);
|
||||
|
||||
// Truncate at first gap in positions
|
||||
let mut last_pos = 0;
|
||||
let mut i = 0;
|
||||
for constraint in cs.constraints.iter() {
|
||||
if constraint.index_col_pos != last_pos {
|
||||
if constraint.index_col_pos != last_pos + 1 {
|
||||
break;
|
||||
}
|
||||
last_pos = constraint.index_col_pos;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
cs.constraints.truncate(i);
|
||||
|
||||
// Truncate after the first inequality
|
||||
if let Some(first_inequality) = cs
|
||||
.constraints
|
||||
.iter()
|
||||
.position(|c| c.operator != ast::Operator::Equals)
|
||||
{
|
||||
cs.constraints.truncate(first_inequality + 1);
|
||||
}
|
||||
if rowid_alias_column.is_some() {
|
||||
constraints.push(cs);
|
||||
}
|
||||
constraints.push(cs_ephemeral);
|
||||
|
||||
let indexes = available_indexes.get(table_reference.table.get_name());
|
||||
if let Some(indexes) = indexes {
|
||||
for index in indexes {
|
||||
let mut cs = Constraints {
|
||||
lookup: ConstraintLookup::Index(index.clone()),
|
||||
table_no,
|
||||
constraints: Vec::new(),
|
||||
};
|
||||
for (i, term) in where_clause.iter().enumerate() {
|
||||
let Some((lhs, operator, rhs)) = as_binary_components(&term.expr)? else {
|
||||
continue;
|
||||
};
|
||||
if let Some(outer_join_tbl) = term.from_outer_join {
|
||||
if outer_join_tbl != table_no {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if let Some(position_in_index) =
|
||||
get_column_position_in_index(lhs, table_no, index)?
|
||||
{
|
||||
cs.constraints.push(Constraint {
|
||||
where_clause_pos: (i, BinaryExprSide::Rhs),
|
||||
operator,
|
||||
index_col_pos: position_in_index,
|
||||
table_col_pos: {
|
||||
let ast::Expr::Column { column, .. } = unwrap_parens(lhs)? else {
|
||||
crate::bail_parse_error!("expected column in index constraint");
|
||||
};
|
||||
*column
|
||||
},
|
||||
sort_order: index.columns[position_in_index].order,
|
||||
lhs_mask: table_mask_from_expr(rhs)?,
|
||||
});
|
||||
}
|
||||
if let Some(position_in_index) =
|
||||
get_column_position_in_index(rhs, table_no, index)?
|
||||
{
|
||||
cs.constraints.push(Constraint {
|
||||
where_clause_pos: (i, BinaryExprSide::Lhs),
|
||||
operator: opposite_cmp_op(operator),
|
||||
index_col_pos: position_in_index,
|
||||
table_col_pos: {
|
||||
let ast::Expr::Column { column, .. } = unwrap_parens(rhs)? else {
|
||||
crate::bail_parse_error!("expected column in index constraint");
|
||||
};
|
||||
*column
|
||||
},
|
||||
sort_order: index.columns[position_in_index].order,
|
||||
lhs_mask: table_mask_from_expr(lhs)?,
|
||||
});
|
||||
}
|
||||
}
|
||||
// First sort by position, with equalities first within each position
|
||||
cs.constraints.sort_by(|a, b| {
|
||||
let pos_cmp = a.index_col_pos.cmp(&b.index_col_pos);
|
||||
if pos_cmp == Ordering::Equal {
|
||||
// If same position, sort equalities first
|
||||
if a.operator == ast::Operator::Equals {
|
||||
Ordering::Less
|
||||
} else if b.operator == ast::Operator::Equals {
|
||||
Ordering::Greater
|
||||
} else {
|
||||
Ordering::Equal
|
||||
}
|
||||
} else {
|
||||
pos_cmp
|
||||
}
|
||||
});
|
||||
|
||||
// Deduplicate by position, keeping first occurrence (which will be equality if one exists)
|
||||
cs.constraints.dedup_by_key(|c| c.index_col_pos);
|
||||
|
||||
// Truncate at first gap in positions
|
||||
let mut last_pos = 0;
|
||||
let mut i = 0;
|
||||
for constraint in cs.constraints.iter() {
|
||||
if constraint.index_col_pos != last_pos {
|
||||
if constraint.index_col_pos != last_pos + 1 {
|
||||
break;
|
||||
}
|
||||
last_pos = constraint.index_col_pos;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
cs.constraints.truncate(i);
|
||||
|
||||
// Truncate after the first inequality
|
||||
if let Some(first_inequality) = cs
|
||||
.constraints
|
||||
.iter()
|
||||
.position(|c| c.operator != ast::Operator::Equals)
|
||||
{
|
||||
cs.constraints.truncate(first_inequality + 1);
|
||||
}
|
||||
constraints.push(cs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(constraints)
|
||||
}
|
||||
|
||||
pub fn usable_constraints_for_join_order<'a>(
|
||||
cs: &'a [Constraint],
|
||||
table_index: usize,
|
||||
join_order: &[JoinOrderMember],
|
||||
) -> &'a [Constraint] {
|
||||
let mut usable_until = 0;
|
||||
for constraint in cs.iter() {
|
||||
let other_side_refers_to_self = constraint.lhs_mask.contains_table(table_index);
|
||||
if other_side_refers_to_self {
|
||||
break;
|
||||
}
|
||||
let lhs_mask = TableMask::from_iter(
|
||||
join_order
|
||||
.iter()
|
||||
.take(join_order.len() - 1)
|
||||
.map(|j| j.table_no),
|
||||
);
|
||||
let all_required_tables_are_on_left_side = lhs_mask.contains_all(&constraint.lhs_mask);
|
||||
if !all_required_tables_are_on_left_side {
|
||||
break;
|
||||
}
|
||||
usable_until += 1;
|
||||
}
|
||||
&cs[..usable_until]
|
||||
}
|
||||
|
||||
/// Get the position of a column in an index
|
||||
/// For example, if there is an index on table T(x,y) then y's position in the index is 1.
|
||||
fn get_column_position_in_index(
|
||||
expr: &ast::Expr,
|
||||
table_index: usize,
|
||||
index: &Arc<Index>,
|
||||
) -> Result<Option<usize>> {
|
||||
let ast::Expr::Column { table, column, .. } = unwrap_parens(expr)? else {
|
||||
return Ok(None);
|
||||
};
|
||||
if *table != table_index {
|
||||
return Ok(None);
|
||||
}
|
||||
Ok(index.column_table_pos_to_index_pos(*column))
|
||||
}
|
||||
|
||||
fn opposite_cmp_op(op: ast::Operator) -> ast::Operator {
|
||||
match op {
|
||||
ast::Operator::Equals => ast::Operator::Equals,
|
||||
ast::Operator::Greater => ast::Operator::Less,
|
||||
ast::Operator::GreaterEquals => ast::Operator::LessEquals,
|
||||
ast::Operator::Less => ast::Operator::Greater,
|
||||
ast::Operator::LessEquals => ast::Operator::GreaterEquals,
|
||||
_ => panic!("unexpected operator: {:?}", op),
|
||||
}
|
||||
}
|
||||
103
core/translate/optimizer/cost.rs
Normal file
103
core/translate/optimizer/cost.rs
Normal file
@@ -0,0 +1,103 @@
|
||||
use limbo_sqlite3_parser::ast;
|
||||
|
||||
use super::constraints::Constraint;
|
||||
|
||||
/// A simple newtype wrapper over a f64 that represents the cost of an operation.
|
||||
///
|
||||
/// This is used to estimate the cost of scans, seeks, and joins.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, PartialOrd)]
|
||||
pub struct Cost(pub f64);
|
||||
|
||||
impl std::ops::Add for Cost {
|
||||
type Output = Cost;
|
||||
|
||||
fn add(self, other: Cost) -> Cost {
|
||||
Cost(self.0 + other.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Deref for Cost {
|
||||
type Target = f64;
|
||||
|
||||
fn deref(&self) -> &f64 {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub struct IndexInfo {
|
||||
pub unique: bool,
|
||||
pub column_count: usize,
|
||||
pub covering: bool,
|
||||
}
|
||||
|
||||
pub const ESTIMATED_HARDCODED_ROWS_PER_TABLE: usize = 1000000;
|
||||
pub const ESTIMATED_HARDCODED_ROWS_PER_PAGE: usize = 50; // roughly 80 bytes per 4096 byte page
|
||||
|
||||
pub fn estimate_page_io_cost(rowcount: f64) -> Cost {
|
||||
Cost((rowcount as f64 / ESTIMATED_HARDCODED_ROWS_PER_PAGE as f64).ceil())
|
||||
}
|
||||
|
||||
/// Estimate the cost of a scan or seek operation.
|
||||
///
|
||||
/// This is a very simple model that estimates the number of pages read
|
||||
/// based on the number of rows read, ignoring any CPU costs.
|
||||
pub fn estimate_cost_for_scan_or_seek(
|
||||
index_info: Option<IndexInfo>,
|
||||
constraints: &[Constraint],
|
||||
input_cardinality: f64,
|
||||
) -> Cost {
|
||||
let Some(index_info) = index_info else {
|
||||
return estimate_page_io_cost(
|
||||
input_cardinality * ESTIMATED_HARDCODED_ROWS_PER_TABLE as f64,
|
||||
);
|
||||
};
|
||||
|
||||
let final_constraint_is_range = constraints
|
||||
.last()
|
||||
.map_or(false, |c| c.operator != ast::Operator::Equals);
|
||||
let equalities_count = constraints
|
||||
.iter()
|
||||
.take(if final_constraint_is_range {
|
||||
constraints.len() - 1
|
||||
} else {
|
||||
constraints.len()
|
||||
})
|
||||
.count() as f64;
|
||||
|
||||
let cost_multiplier = match (
|
||||
index_info.unique,
|
||||
index_info.column_count as f64,
|
||||
equalities_count,
|
||||
) {
|
||||
// no equalities: let's assume range query selectivity is 0.4. if final constraint is not range and there are no equalities, it means full table scan incoming
|
||||
(_, _, 0.0) => {
|
||||
if final_constraint_is_range {
|
||||
0.4
|
||||
} else {
|
||||
1.0
|
||||
}
|
||||
}
|
||||
// on an unique index if we have equalities across all index columns, assume very high selectivity
|
||||
(true, index_cols, eq_count) if eq_count == index_cols => 0.01,
|
||||
(false, index_cols, eq_count) if eq_count == index_cols => 0.1,
|
||||
// some equalities: let's assume each equality has a selectivity of 0.1 and range query selectivity is 0.4
|
||||
(_, _, eq_count) => {
|
||||
let mut multiplier = 1.0;
|
||||
for _ in 0..(eq_count as usize) {
|
||||
multiplier *= 0.1;
|
||||
}
|
||||
multiplier * if final_constraint_is_range { 4.0 } else { 1.0 }
|
||||
}
|
||||
};
|
||||
|
||||
// little bonus for covering indexes
|
||||
let covering_multiplier = if index_info.covering { 0.9 } else { 1.0 };
|
||||
|
||||
estimate_page_io_cost(
|
||||
cost_multiplier
|
||||
* ESTIMATED_HARDCODED_ROWS_PER_TABLE as f64
|
||||
* input_cardinality
|
||||
* covering_multiplier,
|
||||
)
|
||||
}
|
||||
1412
core/translate/optimizer/join.rs
Normal file
1412
core/translate/optimizer/join.rs
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
254
core/translate/optimizer/order.rs
Normal file
254
core/translate/optimizer/order.rs
Normal file
@@ -0,0 +1,254 @@
|
||||
use std::cell::RefCell;
|
||||
|
||||
use limbo_sqlite3_parser::ast::{self, SortOrder};
|
||||
|
||||
use crate::{
|
||||
translate::plan::{GroupBy, IterationDirection, TableReference},
|
||||
util::exprs_are_equivalent,
|
||||
};
|
||||
|
||||
use super::{
|
||||
access_method::{AccessMethod, AccessMethodKind},
|
||||
join::JoinN,
|
||||
};
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct ColumnOrder {
|
||||
pub table_no: usize,
|
||||
pub column_no: usize,
|
||||
pub order: SortOrder,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub enum EliminatesSort {
|
||||
GroupBy,
|
||||
OrderBy,
|
||||
GroupByAndOrderBy,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct OrderTarget(pub Vec<ColumnOrder>, pub EliminatesSort);
|
||||
|
||||
impl OrderTarget {
|
||||
fn maybe_from_iterator<'a>(
|
||||
list: impl Iterator<Item = (&'a ast::Expr, SortOrder)> + Clone,
|
||||
eliminates_sort: EliminatesSort,
|
||||
) -> Option<Self> {
|
||||
if list.clone().count() == 0 {
|
||||
return None;
|
||||
}
|
||||
if list
|
||||
.clone()
|
||||
.any(|(expr, _)| !matches!(expr, ast::Expr::Column { .. }))
|
||||
{
|
||||
return None;
|
||||
}
|
||||
Some(OrderTarget(
|
||||
list.map(|(expr, order)| {
|
||||
let ast::Expr::Column { table, column, .. } = expr else {
|
||||
unreachable!();
|
||||
};
|
||||
ColumnOrder {
|
||||
table_no: *table,
|
||||
column_no: *column,
|
||||
order,
|
||||
}
|
||||
})
|
||||
.collect(),
|
||||
eliminates_sort,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute an [OrderTarget] for the join optimizer to use.
|
||||
/// Ideally, a join order is both efficient in joining the tables
|
||||
/// but also returns the results in an order that minimizes the amount of
|
||||
/// sorting that needs to be done later (either in GROUP BY, ORDER BY, or both).
|
||||
///
|
||||
/// TODO: this does not currently handle the case where we definitely cannot eliminate
|
||||
/// the ORDER BY sorter, but we could still eliminate the GROUP BY sorter.
|
||||
pub fn compute_order_target(
|
||||
order_by: &Option<Vec<(ast::Expr, SortOrder)>>,
|
||||
group_by: Option<&mut GroupBy>,
|
||||
) -> Option<OrderTarget> {
|
||||
match (order_by, group_by) {
|
||||
// No ordering demands - we don't care what order the joined result rows are in
|
||||
(None, None) => None,
|
||||
// Only ORDER BY - we would like the joined result rows to be in the order specified by the ORDER BY
|
||||
(Some(order_by), None) => OrderTarget::maybe_from_iterator(
|
||||
order_by.iter().map(|(expr, order)| (expr, *order)),
|
||||
EliminatesSort::OrderBy,
|
||||
),
|
||||
// Only GROUP BY - we would like the joined result rows to be in the order specified by the GROUP BY
|
||||
(None, Some(group_by)) => OrderTarget::maybe_from_iterator(
|
||||
group_by.exprs.iter().map(|expr| (expr, SortOrder::Asc)),
|
||||
EliminatesSort::GroupBy,
|
||||
),
|
||||
// Both ORDER BY and GROUP BY:
|
||||
// If the GROUP BY does not contain all the expressions in the ORDER BY,
|
||||
// then we must separately sort the result rows for ORDER BY anyway.
|
||||
// However, in that case we can use the GROUP BY expressions as the target order for the join,
|
||||
// so that we don't have to sort twice.
|
||||
//
|
||||
// If the GROUP BY contains all the expressions in the ORDER BY,
|
||||
// then we again can use the GROUP BY expressions as the target order for the join;
|
||||
// however in this case we must take the ASC/DESC from ORDER BY into account.
|
||||
(Some(order_by), Some(group_by)) => {
|
||||
// Does the group by contain all expressions in the order by?
|
||||
let group_by_contains_all = group_by.exprs.iter().all(|expr| {
|
||||
order_by
|
||||
.iter()
|
||||
.any(|(order_by_expr, _)| exprs_are_equivalent(expr, order_by_expr))
|
||||
});
|
||||
// If not, let's try to target an ordering that matches the group by -- we don't care about ASC/DESC
|
||||
if !group_by_contains_all {
|
||||
return OrderTarget::maybe_from_iterator(
|
||||
group_by.exprs.iter().map(|expr| (expr, SortOrder::Asc)),
|
||||
EliminatesSort::GroupBy,
|
||||
);
|
||||
}
|
||||
// If yes, let's try to target an ordering that matches the GROUP BY columns,
|
||||
// but the ORDER BY orderings. First, we need to reorder the GROUP BY columns to match the ORDER BY columns.
|
||||
group_by.exprs.sort_by_key(|expr| {
|
||||
order_by
|
||||
.iter()
|
||||
.position(|(order_by_expr, _)| exprs_are_equivalent(expr, order_by_expr))
|
||||
.map_or(usize::MAX, |i| i)
|
||||
});
|
||||
// Iterate over GROUP BY, but take the ORDER BY orderings into account.
|
||||
OrderTarget::maybe_from_iterator(
|
||||
group_by
|
||||
.exprs
|
||||
.iter()
|
||||
.zip(
|
||||
order_by
|
||||
.iter()
|
||||
.map(|(_, dir)| dir)
|
||||
.chain(std::iter::repeat(&SortOrder::Asc)),
|
||||
)
|
||||
.map(|(expr, dir)| (expr, *dir)),
|
||||
EliminatesSort::GroupByAndOrderBy,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if the plan's row iteration order matches the [OrderTarget]'s column order
|
||||
pub fn plan_satisfies_order_target(
|
||||
plan: &JoinN,
|
||||
access_methods_arena: &RefCell<Vec<AccessMethod>>,
|
||||
table_references: &[TableReference],
|
||||
order_target: &OrderTarget,
|
||||
) -> bool {
|
||||
let mut target_col_idx = 0;
|
||||
for (i, table_no) in plan.table_numbers.iter().enumerate() {
|
||||
let table_ref = &table_references[*table_no];
|
||||
// Check if this table has an access method that provides ordering
|
||||
let access_method = &access_methods_arena.borrow()[plan.best_access_methods[i]];
|
||||
match &access_method.kind {
|
||||
AccessMethodKind::Scan {
|
||||
index: None,
|
||||
iter_dir,
|
||||
} => {
|
||||
let rowid_alias_col = table_ref
|
||||
.table
|
||||
.columns()
|
||||
.iter()
|
||||
.position(|c| c.is_rowid_alias);
|
||||
let Some(rowid_alias_col) = rowid_alias_col else {
|
||||
return false;
|
||||
};
|
||||
let target_col = &order_target.0[target_col_idx];
|
||||
let order_matches = if *iter_dir == IterationDirection::Forwards {
|
||||
target_col.order == SortOrder::Asc
|
||||
} else {
|
||||
target_col.order == SortOrder::Desc
|
||||
};
|
||||
if target_col.table_no != *table_no
|
||||
|| target_col.column_no != rowid_alias_col
|
||||
|| !order_matches
|
||||
{
|
||||
return false;
|
||||
}
|
||||
target_col_idx += 1;
|
||||
if target_col_idx == order_target.0.len() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
AccessMethodKind::Scan {
|
||||
index: Some(index),
|
||||
iter_dir,
|
||||
} => {
|
||||
// The index columns must match the order target columns for this table
|
||||
for index_col in index.columns.iter() {
|
||||
let target_col = &order_target.0[target_col_idx];
|
||||
let order_matches = if *iter_dir == IterationDirection::Forwards {
|
||||
target_col.order == index_col.order
|
||||
} else {
|
||||
target_col.order != index_col.order
|
||||
};
|
||||
if target_col.table_no != *table_no
|
||||
|| target_col.column_no != index_col.pos_in_table
|
||||
|| !order_matches
|
||||
{
|
||||
return false;
|
||||
}
|
||||
target_col_idx += 1;
|
||||
if target_col_idx == order_target.0.len() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
AccessMethodKind::Search {
|
||||
index, iter_dir, ..
|
||||
} => {
|
||||
if let Some(index) = index {
|
||||
for index_col in index.columns.iter() {
|
||||
let target_col = &order_target.0[target_col_idx];
|
||||
let order_matches = if *iter_dir == IterationDirection::Forwards {
|
||||
target_col.order == index_col.order
|
||||
} else {
|
||||
target_col.order != index_col.order
|
||||
};
|
||||
if target_col.table_no != *table_no
|
||||
|| target_col.column_no != index_col.pos_in_table
|
||||
|| !order_matches
|
||||
{
|
||||
return false;
|
||||
}
|
||||
target_col_idx += 1;
|
||||
if target_col_idx == order_target.0.len() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let rowid_alias_col = table_ref
|
||||
.table
|
||||
.columns()
|
||||
.iter()
|
||||
.position(|c| c.is_rowid_alias);
|
||||
let Some(rowid_alias_col) = rowid_alias_col else {
|
||||
return false;
|
||||
};
|
||||
let target_col = &order_target.0[target_col_idx];
|
||||
let order_matches = if *iter_dir == IterationDirection::Forwards {
|
||||
target_col.order == SortOrder::Asc
|
||||
} else {
|
||||
target_col.order == SortOrder::Desc
|
||||
};
|
||||
if target_col.table_no != *table_no
|
||||
|| target_col.column_no != rowid_alias_col
|
||||
|| !order_matches
|
||||
{
|
||||
return false;
|
||||
}
|
||||
target_col_idx += 1;
|
||||
if target_col_idx == order_target.0.len() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
Reference in New Issue
Block a user