optimizer module split

This commit is contained in:
Jussi Saurio
2025-05-10 01:18:35 +03:00
parent ec45a92bac
commit bd875e3876
7 changed files with 2458 additions and 2377 deletions

View File

@@ -2579,3 +2579,64 @@ pub fn maybe_apply_affinity(col_type: Type, target_register: usize, program: &mu
pub fn sanitize_string(input: &str) -> String {
input[1..input.len() - 1].replace("''", "'").to_string()
}
pub fn as_binary_components(
expr: &ast::Expr,
) -> Result<Option<(&ast::Expr, ast::Operator, &ast::Expr)>> {
match unwrap_parens(expr)? {
ast::Expr::Binary(lhs, operator, rhs)
if matches!(
operator,
ast::Operator::Equals
| ast::Operator::Greater
| ast::Operator::Less
| ast::Operator::GreaterEquals
| ast::Operator::LessEquals
) =>
{
Ok(Some((lhs.as_ref(), *operator, rhs.as_ref())))
}
_ => Ok(None),
}
}
/// Recursively unwrap parentheses from an expression
/// e.g. (((t.x > 5))) -> t.x > 5
pub fn unwrap_parens<T>(expr: T) -> Result<T>
where
T: UnwrapParens,
{
expr.unwrap_parens()
}
pub trait UnwrapParens {
fn unwrap_parens(self) -> Result<Self>
where
Self: Sized;
}
impl UnwrapParens for &ast::Expr {
fn unwrap_parens(self) -> Result<Self> {
match self {
ast::Expr::Column { .. } => Ok(self),
ast::Expr::Parenthesized(exprs) => match exprs.len() {
1 => unwrap_parens(exprs.first().unwrap()),
_ => crate::bail_parse_error!("expected single expression in parentheses"),
},
_ => Ok(self),
}
}
}
impl UnwrapParens for ast::Expr {
fn unwrap_parens(self) -> Result<Self> {
match self {
ast::Expr::Column { .. } => Ok(self),
ast::Expr::Parenthesized(mut exprs) => match exprs.len() {
1 => unwrap_parens(exprs.pop().unwrap()),
_ => crate::bail_parse_error!("expected single expression in parentheses"),
},
_ => Ok(self),
}
}
}

View File

@@ -0,0 +1,229 @@
use std::sync::Arc;
use limbo_sqlite3_parser::ast::SortOrder;
use crate::{
schema::Index,
translate::plan::{IterationDirection, JoinOrderMember, TableReference},
Result,
};
use super::{
constraints::{usable_constraints_for_join_order, Constraint, ConstraintLookup, Constraints},
cost::{estimate_cost_for_scan_or_seek, Cost, IndexInfo},
order::OrderTarget,
};
#[derive(Debug, Clone)]
/// Represents a way to access a table.
pub struct AccessMethod<'a> {
/// The estimated number of page fetches.
/// We are ignoring CPU cost for now.
pub cost: Cost,
pub kind: AccessMethodKind<'a>,
}
impl<'a> AccessMethod<'a> {
pub fn set_iter_dir(&mut self, new_dir: IterationDirection) {
match &mut self.kind {
AccessMethodKind::Scan { iter_dir, .. } => *iter_dir = new_dir,
AccessMethodKind::Search { iter_dir, .. } => *iter_dir = new_dir,
}
}
pub fn set_constraints(&mut self, lookup: &ConstraintLookup, constraints: &'a [Constraint]) {
let index = match lookup {
ConstraintLookup::Index(index) => Some(index),
ConstraintLookup::Rowid => None,
ConstraintLookup::EphemeralIndex => panic!("set_constraints called with Lookup::None"),
};
match (&mut self.kind, constraints.is_empty()) {
(
AccessMethodKind::Search {
constraints,
index: i,
..
},
false,
) => {
*constraints = constraints;
*i = index.cloned();
}
(AccessMethodKind::Search { iter_dir, .. }, true) => {
self.kind = AccessMethodKind::Scan {
index: index.cloned(),
iter_dir: *iter_dir,
};
}
(AccessMethodKind::Scan { iter_dir, .. }, false) => {
self.kind = AccessMethodKind::Search {
index: index.cloned(),
iter_dir: *iter_dir,
constraints,
};
}
(AccessMethodKind::Scan { index: i, .. }, true) => {
*i = index.cloned();
}
}
}
}
#[derive(Debug, Clone)]
/// Represents the kind of access method.
pub enum AccessMethodKind<'a> {
/// A full scan, which can be an index scan or a table scan.
Scan {
index: Option<Arc<Index>>,
iter_dir: IterationDirection,
},
/// A search, which can be an index seek or a rowid-based search.
Search {
index: Option<Arc<Index>>,
iter_dir: IterationDirection,
constraints: &'a [Constraint],
},
}
/// Return the best [AccessMethod] for a given join order.
/// table_index and table_reference refer to the rightmost table in the join order.
pub fn find_best_access_method_for_join_order<'a>(
table_index: usize,
table_reference: &TableReference,
constraints: &'a [Constraints],
join_order: &[JoinOrderMember],
maybe_order_target: Option<&OrderTarget>,
input_cardinality: f64,
) -> Result<AccessMethod<'a>> {
let cost_of_full_table_scan = estimate_cost_for_scan_or_seek(None, &[], input_cardinality);
let mut best_access_method = AccessMethod {
cost: cost_of_full_table_scan,
kind: AccessMethodKind::Scan {
index: None,
iter_dir: IterationDirection::Forwards,
},
};
let rowid_column_idx = table_reference
.columns()
.iter()
.position(|c| c.is_rowid_alias);
for csmap in constraints
.iter()
.filter(|csmap| csmap.table_no == table_index)
{
let index_info = match &csmap.lookup {
ConstraintLookup::Index(index) => IndexInfo {
unique: index.unique,
covering: table_reference.index_is_covering(index),
column_count: index.columns.len(),
},
ConstraintLookup::Rowid => IndexInfo {
unique: true, // rowids are always unique
covering: false,
column_count: 1,
},
ConstraintLookup::EphemeralIndex => continue,
};
let usable_constraints =
usable_constraints_for_join_order(&csmap.constraints, table_index, join_order);
let cost = estimate_cost_for_scan_or_seek(
Some(index_info),
&usable_constraints,
input_cardinality,
);
let order_satisfiability_bonus = if let Some(order_target) = maybe_order_target {
let mut all_same_direction = true;
let mut all_opposite_direction = true;
for i in 0..order_target.0.len().min(index_info.column_count) {
let correct_table = order_target.0[i].table_no == table_index;
let correct_column = {
match &csmap.lookup {
ConstraintLookup::Index(index) => {
index.columns[i].pos_in_table == order_target.0[i].column_no
}
ConstraintLookup::Rowid => {
rowid_column_idx.map_or(false, |idx| idx == order_target.0[i].column_no)
}
ConstraintLookup::EphemeralIndex => unreachable!(),
}
};
if !correct_table || !correct_column {
all_same_direction = false;
all_opposite_direction = false;
break;
}
let correct_order = {
match &csmap.lookup {
ConstraintLookup::Index(index) => {
order_target.0[i].order == index.columns[i].order
}
ConstraintLookup::Rowid => order_target.0[i].order == SortOrder::Asc,
ConstraintLookup::EphemeralIndex => unreachable!(),
}
};
if correct_order {
all_opposite_direction = false;
} else {
all_same_direction = false;
}
}
if all_same_direction || all_opposite_direction {
Cost(1.0)
} else {
Cost(0.0)
}
} else {
Cost(0.0)
};
if cost < best_access_method.cost + order_satisfiability_bonus {
best_access_method.cost = cost;
best_access_method.set_constraints(&csmap.lookup, &usable_constraints);
}
}
let iter_dir = if let Some(order_target) = maybe_order_target {
// if index columns match the order target columns in the exact reverse directions, then we should use IterationDirection::Backwards
let index = match &best_access_method.kind {
AccessMethodKind::Scan { index, .. } => index.as_ref(),
AccessMethodKind::Search { index, .. } => index.as_ref(),
};
let mut should_use_backwards = true;
let num_cols = index.map_or(1, |i| i.columns.len());
for i in 0..order_target.0.len().min(num_cols) {
let correct_table = order_target.0[i].table_no == table_index;
let correct_column = {
match index {
Some(index) => index.columns[i].pos_in_table == order_target.0[i].column_no,
None => {
rowid_column_idx.map_or(false, |idx| idx == order_target.0[i].column_no)
}
}
};
if !correct_table || !correct_column {
should_use_backwards = false;
break;
}
let correct_order = {
match index {
Some(index) => order_target.0[i].order == index.columns[i].order,
None => order_target.0[i].order == SortOrder::Asc,
}
};
if correct_order {
should_use_backwards = false;
break;
}
}
if should_use_backwards {
IterationDirection::Backwards
} else {
IterationDirection::Forwards
}
} else {
IterationDirection::Forwards
};
best_access_method.set_iter_dir(iter_dir);
Ok(best_access_method)
}

View File

@@ -0,0 +1,382 @@
use std::{cmp::Ordering, collections::HashMap, sync::Arc};
use crate::{
schema::Index,
translate::{
expr::{as_binary_components, unwrap_parens},
plan::{JoinOrderMember, TableReference, WhereTerm},
planner::{table_mask_from_expr, TableMask},
},
Result,
};
use limbo_sqlite3_parser::ast::{self, SortOrder};
#[derive(Debug, Clone)]
pub struct Constraint {
/// The position of the constraint in the WHERE clause, e.g. in SELECT * FROM t WHERE true AND t.x = 10, the position is (1, BinaryExprSide::Rhs),
/// since the RHS '10' is the constraining expression and it's part of the second term in the WHERE clause.
pub where_clause_pos: (usize, BinaryExprSide),
/// The operator of the constraint, e.g. =, >, <
pub operator: ast::Operator,
/// The position of the index column in the index, e.g. if the index is (a,b,c) and the constraint is on b, then index_column_pos is 1.
/// For Rowid constraints this is always 0.
pub index_col_pos: usize,
/// The position of the constrained column in the table.
pub table_col_pos: usize,
/// The sort order of the index column, ASC or DESC. For Rowid constraints this is always ASC.
pub sort_order: SortOrder,
/// Bitmask of tables that are required to be on the left side of the constrained table,
/// e.g. in SELECT * FROM t1,t2,t3 WHERE t1.x = t2.x + t3.x, the lhs_mask contains t2 and t3.
pub lhs_mask: TableMask,
}
#[derive(Debug, Clone)]
/// Lookup denotes how a given set of [Constraint]s can be used to access a table.
///
/// Lookup::Index(index) means that the constraints can be used to access the table using the given index.
/// Lookup::Rowid means that the constraints can be used to access the table using the table's rowid column.
/// Lookup::EphemeralIndex means that the constraints are not useful for accessing the table,
/// but an ephemeral index can be built ad-hoc to use them.
pub enum ConstraintLookup {
Index(Arc<Index>),
Rowid,
EphemeralIndex,
}
#[derive(Debug)]
/// A collection of [Constraint]s for a given (table, index) pair.
pub struct Constraints {
pub lookup: ConstraintLookup,
pub table_no: usize,
pub constraints: Vec<Constraint>,
}
/// Helper enum for [Constraint] to indicate which side of a binary comparison expression is being compared to the index column.
/// For example, if the where clause is "WHERE x = 10" and there's an index on x,
/// the [Constraint] for the where clause term "x = 10" will have a [BinaryExprSide::Rhs]
/// because the right hand side expression "10" is being compared to the index column "x".
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BinaryExprSide {
Lhs,
Rhs,
}
/// Precompute all potentially usable [Constraints] from a WHERE clause.
/// The resulting list of [Constraints] is then used to evaluate the best access methods for various join orders.
pub fn constraints_from_where_clause(
where_clause: &[WhereTerm],
table_references: &[TableReference],
available_indexes: &HashMap<String, Vec<Arc<Index>>>,
) -> Result<Vec<Constraints>> {
let mut constraints = Vec::new();
for (table_no, table_reference) in table_references.iter().enumerate() {
let rowid_alias_column = table_reference
.columns()
.iter()
.position(|c| c.is_rowid_alias);
let mut cs = Constraints {
lookup: ConstraintLookup::Rowid,
table_no,
constraints: Vec::new(),
};
let mut cs_ephemeral = Constraints {
lookup: ConstraintLookup::EphemeralIndex,
table_no,
constraints: Vec::new(),
};
for (i, term) in where_clause.iter().enumerate() {
let Some((lhs, operator, rhs)) = as_binary_components(&term.expr)? else {
continue;
};
if let Some(outer_join_tbl) = term.from_outer_join {
if outer_join_tbl != table_no {
continue;
}
}
match lhs {
ast::Expr::Column { table, column, .. } => {
if *table == table_no {
if rowid_alias_column.map_or(false, |idx| *column == idx) {
cs.constraints.push(Constraint {
where_clause_pos: (i, BinaryExprSide::Rhs),
operator,
index_col_pos: 0,
table_col_pos: rowid_alias_column.unwrap(),
sort_order: SortOrder::Asc,
lhs_mask: table_mask_from_expr(rhs)?,
});
} else {
cs_ephemeral.constraints.push(Constraint {
where_clause_pos: (i, BinaryExprSide::Rhs),
operator,
index_col_pos: 0,
table_col_pos: *column,
sort_order: SortOrder::Asc,
lhs_mask: table_mask_from_expr(rhs)?,
});
}
}
}
ast::Expr::RowId { table, .. } => {
if *table == table_no && rowid_alias_column.is_some() {
cs.constraints.push(Constraint {
where_clause_pos: (i, BinaryExprSide::Rhs),
operator,
index_col_pos: 0,
table_col_pos: rowid_alias_column.unwrap(),
sort_order: SortOrder::Asc,
lhs_mask: table_mask_from_expr(rhs)?,
});
}
}
_ => {}
};
match rhs {
ast::Expr::Column { table, column, .. } => {
if *table == table_no {
if rowid_alias_column.map_or(false, |idx| *column == idx) {
cs.constraints.push(Constraint {
where_clause_pos: (i, BinaryExprSide::Lhs),
operator: opposite_cmp_op(operator),
index_col_pos: 0,
table_col_pos: rowid_alias_column.unwrap(),
sort_order: SortOrder::Asc,
lhs_mask: table_mask_from_expr(lhs)?,
});
} else {
cs_ephemeral.constraints.push(Constraint {
where_clause_pos: (i, BinaryExprSide::Lhs),
operator: opposite_cmp_op(operator),
index_col_pos: 0,
table_col_pos: *column,
sort_order: SortOrder::Asc,
lhs_mask: table_mask_from_expr(lhs)?,
});
}
}
}
ast::Expr::RowId { table, .. } => {
if *table == table_no && rowid_alias_column.is_some() {
cs.constraints.push(Constraint {
where_clause_pos: (i, BinaryExprSide::Lhs),
operator: opposite_cmp_op(operator),
index_col_pos: 0,
table_col_pos: rowid_alias_column.unwrap(),
sort_order: SortOrder::Asc,
lhs_mask: table_mask_from_expr(lhs)?,
});
}
}
_ => {}
};
}
// First sort by position, with equalities first within each position
cs.constraints.sort_by(|a, b| {
let pos_cmp = a.index_col_pos.cmp(&b.index_col_pos);
if pos_cmp == Ordering::Equal {
// If same position, sort equalities first
if a.operator == ast::Operator::Equals {
Ordering::Less
} else if b.operator == ast::Operator::Equals {
Ordering::Greater
} else {
Ordering::Equal
}
} else {
pos_cmp
}
});
cs_ephemeral.constraints.sort_by(|a, b| {
if a.operator == ast::Operator::Equals {
Ordering::Less
} else if b.operator == ast::Operator::Equals {
Ordering::Greater
} else {
Ordering::Equal
}
});
// Deduplicate by position, keeping first occurrence (which will be equality if one exists)
cs.constraints.dedup_by_key(|c| c.index_col_pos);
// Truncate at first gap in positions
let mut last_pos = 0;
let mut i = 0;
for constraint in cs.constraints.iter() {
if constraint.index_col_pos != last_pos {
if constraint.index_col_pos != last_pos + 1 {
break;
}
last_pos = constraint.index_col_pos;
}
i += 1;
}
cs.constraints.truncate(i);
// Truncate after the first inequality
if let Some(first_inequality) = cs
.constraints
.iter()
.position(|c| c.operator != ast::Operator::Equals)
{
cs.constraints.truncate(first_inequality + 1);
}
if rowid_alias_column.is_some() {
constraints.push(cs);
}
constraints.push(cs_ephemeral);
let indexes = available_indexes.get(table_reference.table.get_name());
if let Some(indexes) = indexes {
for index in indexes {
let mut cs = Constraints {
lookup: ConstraintLookup::Index(index.clone()),
table_no,
constraints: Vec::new(),
};
for (i, term) in where_clause.iter().enumerate() {
let Some((lhs, operator, rhs)) = as_binary_components(&term.expr)? else {
continue;
};
if let Some(outer_join_tbl) = term.from_outer_join {
if outer_join_tbl != table_no {
continue;
}
}
if let Some(position_in_index) =
get_column_position_in_index(lhs, table_no, index)?
{
cs.constraints.push(Constraint {
where_clause_pos: (i, BinaryExprSide::Rhs),
operator,
index_col_pos: position_in_index,
table_col_pos: {
let ast::Expr::Column { column, .. } = unwrap_parens(lhs)? else {
crate::bail_parse_error!("expected column in index constraint");
};
*column
},
sort_order: index.columns[position_in_index].order,
lhs_mask: table_mask_from_expr(rhs)?,
});
}
if let Some(position_in_index) =
get_column_position_in_index(rhs, table_no, index)?
{
cs.constraints.push(Constraint {
where_clause_pos: (i, BinaryExprSide::Lhs),
operator: opposite_cmp_op(operator),
index_col_pos: position_in_index,
table_col_pos: {
let ast::Expr::Column { column, .. } = unwrap_parens(rhs)? else {
crate::bail_parse_error!("expected column in index constraint");
};
*column
},
sort_order: index.columns[position_in_index].order,
lhs_mask: table_mask_from_expr(lhs)?,
});
}
}
// First sort by position, with equalities first within each position
cs.constraints.sort_by(|a, b| {
let pos_cmp = a.index_col_pos.cmp(&b.index_col_pos);
if pos_cmp == Ordering::Equal {
// If same position, sort equalities first
if a.operator == ast::Operator::Equals {
Ordering::Less
} else if b.operator == ast::Operator::Equals {
Ordering::Greater
} else {
Ordering::Equal
}
} else {
pos_cmp
}
});
// Deduplicate by position, keeping first occurrence (which will be equality if one exists)
cs.constraints.dedup_by_key(|c| c.index_col_pos);
// Truncate at first gap in positions
let mut last_pos = 0;
let mut i = 0;
for constraint in cs.constraints.iter() {
if constraint.index_col_pos != last_pos {
if constraint.index_col_pos != last_pos + 1 {
break;
}
last_pos = constraint.index_col_pos;
}
i += 1;
}
cs.constraints.truncate(i);
// Truncate after the first inequality
if let Some(first_inequality) = cs
.constraints
.iter()
.position(|c| c.operator != ast::Operator::Equals)
{
cs.constraints.truncate(first_inequality + 1);
}
constraints.push(cs);
}
}
}
Ok(constraints)
}
pub fn usable_constraints_for_join_order<'a>(
cs: &'a [Constraint],
table_index: usize,
join_order: &[JoinOrderMember],
) -> &'a [Constraint] {
let mut usable_until = 0;
for constraint in cs.iter() {
let other_side_refers_to_self = constraint.lhs_mask.contains_table(table_index);
if other_side_refers_to_self {
break;
}
let lhs_mask = TableMask::from_iter(
join_order
.iter()
.take(join_order.len() - 1)
.map(|j| j.table_no),
);
let all_required_tables_are_on_left_side = lhs_mask.contains_all(&constraint.lhs_mask);
if !all_required_tables_are_on_left_side {
break;
}
usable_until += 1;
}
&cs[..usable_until]
}
/// Get the position of a column in an index
/// For example, if there is an index on table T(x,y) then y's position in the index is 1.
fn get_column_position_in_index(
expr: &ast::Expr,
table_index: usize,
index: &Arc<Index>,
) -> Result<Option<usize>> {
let ast::Expr::Column { table, column, .. } = unwrap_parens(expr)? else {
return Ok(None);
};
if *table != table_index {
return Ok(None);
}
Ok(index.column_table_pos_to_index_pos(*column))
}
fn opposite_cmp_op(op: ast::Operator) -> ast::Operator {
match op {
ast::Operator::Equals => ast::Operator::Equals,
ast::Operator::Greater => ast::Operator::Less,
ast::Operator::GreaterEquals => ast::Operator::LessEquals,
ast::Operator::Less => ast::Operator::Greater,
ast::Operator::LessEquals => ast::Operator::GreaterEquals,
_ => panic!("unexpected operator: {:?}", op),
}
}

View File

@@ -0,0 +1,103 @@
use limbo_sqlite3_parser::ast;
use super::constraints::Constraint;
/// A simple newtype wrapper over a f64 that represents the cost of an operation.
///
/// This is used to estimate the cost of scans, seeks, and joins.
#[derive(Debug, Clone, Copy, PartialEq, PartialOrd)]
pub struct Cost(pub f64);
impl std::ops::Add for Cost {
type Output = Cost;
fn add(self, other: Cost) -> Cost {
Cost(self.0 + other.0)
}
}
impl std::ops::Deref for Cost {
type Target = f64;
fn deref(&self) -> &f64 {
&self.0
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct IndexInfo {
pub unique: bool,
pub column_count: usize,
pub covering: bool,
}
pub const ESTIMATED_HARDCODED_ROWS_PER_TABLE: usize = 1000000;
pub const ESTIMATED_HARDCODED_ROWS_PER_PAGE: usize = 50; // roughly 80 bytes per 4096 byte page
pub fn estimate_page_io_cost(rowcount: f64) -> Cost {
Cost((rowcount as f64 / ESTIMATED_HARDCODED_ROWS_PER_PAGE as f64).ceil())
}
/// Estimate the cost of a scan or seek operation.
///
/// This is a very simple model that estimates the number of pages read
/// based on the number of rows read, ignoring any CPU costs.
pub fn estimate_cost_for_scan_or_seek(
index_info: Option<IndexInfo>,
constraints: &[Constraint],
input_cardinality: f64,
) -> Cost {
let Some(index_info) = index_info else {
return estimate_page_io_cost(
input_cardinality * ESTIMATED_HARDCODED_ROWS_PER_TABLE as f64,
);
};
let final_constraint_is_range = constraints
.last()
.map_or(false, |c| c.operator != ast::Operator::Equals);
let equalities_count = constraints
.iter()
.take(if final_constraint_is_range {
constraints.len() - 1
} else {
constraints.len()
})
.count() as f64;
let cost_multiplier = match (
index_info.unique,
index_info.column_count as f64,
equalities_count,
) {
// no equalities: let's assume range query selectivity is 0.4. if final constraint is not range and there are no equalities, it means full table scan incoming
(_, _, 0.0) => {
if final_constraint_is_range {
0.4
} else {
1.0
}
}
// on an unique index if we have equalities across all index columns, assume very high selectivity
(true, index_cols, eq_count) if eq_count == index_cols => 0.01,
(false, index_cols, eq_count) if eq_count == index_cols => 0.1,
// some equalities: let's assume each equality has a selectivity of 0.1 and range query selectivity is 0.4
(_, _, eq_count) => {
let mut multiplier = 1.0;
for _ in 0..(eq_count as usize) {
multiplier *= 0.1;
}
multiplier * if final_constraint_is_range { 4.0 } else { 1.0 }
}
};
// little bonus for covering indexes
let covering_multiplier = if index_info.covering { 0.9 } else { 1.0 };
estimate_page_io_cost(
cost_multiplier
* ESTIMATED_HARDCODED_ROWS_PER_TABLE as f64
* input_cardinality
* covering_multiplier,
)
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,254 @@
use std::cell::RefCell;
use limbo_sqlite3_parser::ast::{self, SortOrder};
use crate::{
translate::plan::{GroupBy, IterationDirection, TableReference},
util::exprs_are_equivalent,
};
use super::{
access_method::{AccessMethod, AccessMethodKind},
join::JoinN,
};
#[derive(Debug, PartialEq, Clone)]
pub struct ColumnOrder {
pub table_no: usize,
pub column_no: usize,
pub order: SortOrder,
}
#[derive(Debug, PartialEq, Clone)]
pub enum EliminatesSort {
GroupBy,
OrderBy,
GroupByAndOrderBy,
}
#[derive(Debug, PartialEq, Clone)]
pub struct OrderTarget(pub Vec<ColumnOrder>, pub EliminatesSort);
impl OrderTarget {
fn maybe_from_iterator<'a>(
list: impl Iterator<Item = (&'a ast::Expr, SortOrder)> + Clone,
eliminates_sort: EliminatesSort,
) -> Option<Self> {
if list.clone().count() == 0 {
return None;
}
if list
.clone()
.any(|(expr, _)| !matches!(expr, ast::Expr::Column { .. }))
{
return None;
}
Some(OrderTarget(
list.map(|(expr, order)| {
let ast::Expr::Column { table, column, .. } = expr else {
unreachable!();
};
ColumnOrder {
table_no: *table,
column_no: *column,
order,
}
})
.collect(),
eliminates_sort,
))
}
}
/// Compute an [OrderTarget] for the join optimizer to use.
/// Ideally, a join order is both efficient in joining the tables
/// but also returns the results in an order that minimizes the amount of
/// sorting that needs to be done later (either in GROUP BY, ORDER BY, or both).
///
/// TODO: this does not currently handle the case where we definitely cannot eliminate
/// the ORDER BY sorter, but we could still eliminate the GROUP BY sorter.
pub fn compute_order_target(
order_by: &Option<Vec<(ast::Expr, SortOrder)>>,
group_by: Option<&mut GroupBy>,
) -> Option<OrderTarget> {
match (order_by, group_by) {
// No ordering demands - we don't care what order the joined result rows are in
(None, None) => None,
// Only ORDER BY - we would like the joined result rows to be in the order specified by the ORDER BY
(Some(order_by), None) => OrderTarget::maybe_from_iterator(
order_by.iter().map(|(expr, order)| (expr, *order)),
EliminatesSort::OrderBy,
),
// Only GROUP BY - we would like the joined result rows to be in the order specified by the GROUP BY
(None, Some(group_by)) => OrderTarget::maybe_from_iterator(
group_by.exprs.iter().map(|expr| (expr, SortOrder::Asc)),
EliminatesSort::GroupBy,
),
// Both ORDER BY and GROUP BY:
// If the GROUP BY does not contain all the expressions in the ORDER BY,
// then we must separately sort the result rows for ORDER BY anyway.
// However, in that case we can use the GROUP BY expressions as the target order for the join,
// so that we don't have to sort twice.
//
// If the GROUP BY contains all the expressions in the ORDER BY,
// then we again can use the GROUP BY expressions as the target order for the join;
// however in this case we must take the ASC/DESC from ORDER BY into account.
(Some(order_by), Some(group_by)) => {
// Does the group by contain all expressions in the order by?
let group_by_contains_all = group_by.exprs.iter().all(|expr| {
order_by
.iter()
.any(|(order_by_expr, _)| exprs_are_equivalent(expr, order_by_expr))
});
// If not, let's try to target an ordering that matches the group by -- we don't care about ASC/DESC
if !group_by_contains_all {
return OrderTarget::maybe_from_iterator(
group_by.exprs.iter().map(|expr| (expr, SortOrder::Asc)),
EliminatesSort::GroupBy,
);
}
// If yes, let's try to target an ordering that matches the GROUP BY columns,
// but the ORDER BY orderings. First, we need to reorder the GROUP BY columns to match the ORDER BY columns.
group_by.exprs.sort_by_key(|expr| {
order_by
.iter()
.position(|(order_by_expr, _)| exprs_are_equivalent(expr, order_by_expr))
.map_or(usize::MAX, |i| i)
});
// Iterate over GROUP BY, but take the ORDER BY orderings into account.
OrderTarget::maybe_from_iterator(
group_by
.exprs
.iter()
.zip(
order_by
.iter()
.map(|(_, dir)| dir)
.chain(std::iter::repeat(&SortOrder::Asc)),
)
.map(|(expr, dir)| (expr, *dir)),
EliminatesSort::GroupByAndOrderBy,
)
}
}
}
/// Check if the plan's row iteration order matches the [OrderTarget]'s column order
pub fn plan_satisfies_order_target(
plan: &JoinN,
access_methods_arena: &RefCell<Vec<AccessMethod>>,
table_references: &[TableReference],
order_target: &OrderTarget,
) -> bool {
let mut target_col_idx = 0;
for (i, table_no) in plan.table_numbers.iter().enumerate() {
let table_ref = &table_references[*table_no];
// Check if this table has an access method that provides ordering
let access_method = &access_methods_arena.borrow()[plan.best_access_methods[i]];
match &access_method.kind {
AccessMethodKind::Scan {
index: None,
iter_dir,
} => {
let rowid_alias_col = table_ref
.table
.columns()
.iter()
.position(|c| c.is_rowid_alias);
let Some(rowid_alias_col) = rowid_alias_col else {
return false;
};
let target_col = &order_target.0[target_col_idx];
let order_matches = if *iter_dir == IterationDirection::Forwards {
target_col.order == SortOrder::Asc
} else {
target_col.order == SortOrder::Desc
};
if target_col.table_no != *table_no
|| target_col.column_no != rowid_alias_col
|| !order_matches
{
return false;
}
target_col_idx += 1;
if target_col_idx == order_target.0.len() {
return true;
}
}
AccessMethodKind::Scan {
index: Some(index),
iter_dir,
} => {
// The index columns must match the order target columns for this table
for index_col in index.columns.iter() {
let target_col = &order_target.0[target_col_idx];
let order_matches = if *iter_dir == IterationDirection::Forwards {
target_col.order == index_col.order
} else {
target_col.order != index_col.order
};
if target_col.table_no != *table_no
|| target_col.column_no != index_col.pos_in_table
|| !order_matches
{
return false;
}
target_col_idx += 1;
if target_col_idx == order_target.0.len() {
return true;
}
}
}
AccessMethodKind::Search {
index, iter_dir, ..
} => {
if let Some(index) = index {
for index_col in index.columns.iter() {
let target_col = &order_target.0[target_col_idx];
let order_matches = if *iter_dir == IterationDirection::Forwards {
target_col.order == index_col.order
} else {
target_col.order != index_col.order
};
if target_col.table_no != *table_no
|| target_col.column_no != index_col.pos_in_table
|| !order_matches
{
return false;
}
target_col_idx += 1;
if target_col_idx == order_target.0.len() {
return true;
}
}
} else {
let rowid_alias_col = table_ref
.table
.columns()
.iter()
.position(|c| c.is_rowid_alias);
let Some(rowid_alias_col) = rowid_alias_col else {
return false;
};
let target_col = &order_target.0[target_col_idx];
let order_matches = if *iter_dir == IterationDirection::Forwards {
target_col.order == SortOrder::Asc
} else {
target_col.order == SortOrder::Desc
};
if target_col.table_no != *table_no
|| target_col.column_no != rowid_alias_col
|| !order_matches
{
return false;
}
target_col_idx += 1;
if target_col_idx == order_target.0.len() {
return true;
}
}
}
}
}
false
}