optimizer: remove ORDER BY if index can be used to satisfy the order

This commit is contained in:
Jussi Saurio
2025-04-08 10:33:01 +03:00
parent a706b7160a
commit 024c63f808

View File

@@ -1,6 +1,6 @@
use std::{collections::HashMap, sync::Arc};
use limbo_sqlite3_parser::ast;
use limbo_sqlite3_parser::ast::{self, Expr, SortOrder};
use crate::{
schema::{Index, Schema},
@@ -9,8 +9,8 @@ use crate::{
};
use super::plan::{
DeletePlan, Direction, IterationDirection, Operation, Plan, Search, SelectPlan, TableReference,
UpdatePlan, WhereTerm,
DeletePlan, Direction, GroupBy, IterationDirection, Operation, Plan, Search, SelectPlan,
TableReference, UpdatePlan, WhereTerm,
};
pub fn optimize_plan(plan: &mut Plan, schema: &Schema) -> Result<()> {
@@ -40,10 +40,10 @@ fn optimize_select_plan(plan: &mut SelectPlan, schema: &Schema) -> Result<()> {
&mut plan.table_references,
&schema.indexes,
&mut plan.where_clause,
&mut plan.order_by,
&plan.group_by,
)?;
eliminate_unnecessary_orderby(plan, schema)?;
eliminate_orderby_like_groupby(plan)?;
Ok(())
@@ -62,6 +62,8 @@ fn optimize_delete_plan(plan: &mut DeletePlan, schema: &Schema) -> Result<()> {
&mut plan.table_references,
&schema.indexes,
&mut plan.where_clause,
&mut plan.order_by,
&None,
)?;
Ok(())
@@ -79,6 +81,8 @@ fn optimize_update_plan(plan: &mut UpdatePlan, schema: &Schema) -> Result<()> {
&mut plan.table_references,
&schema.indexes,
&mut plan.where_clause,
&mut plan.order_by,
&None,
)?;
Ok(())
}
@@ -93,33 +97,6 @@ fn optimize_subqueries(plan: &mut SelectPlan, schema: &Schema) -> Result<()> {
Ok(())
}
fn query_is_already_ordered_by(
table_references: &[TableReference],
key: &mut ast::Expr,
available_indexes: &HashMap<String, Vec<Arc<Index>>>,
) -> Result<bool> {
let first_table = table_references.first();
if first_table.is_none() {
return Ok(false);
}
let table_reference = first_table.unwrap();
match &table_reference.op {
Operation::Scan { .. } => Ok(key.is_rowid_alias_of(0)),
Operation::Search(search) => match search {
Search::RowidEq { .. } => Ok(key.is_rowid_alias_of(0)),
Search::RowidSearch { .. } => Ok(key.is_rowid_alias_of(0)),
Search::IndexSearch { index, .. } => {
let index_rc = key.check_index_scan(0, table_reference, available_indexes)?;
let index_is_the_same = index_rc
.map(|irc| Arc::ptr_eq(index, &irc))
.unwrap_or(false);
Ok(index_is_the_same)
}
},
_ => Ok(false),
}
}
fn eliminate_orderby_like_groupby(plan: &mut SelectPlan) -> Result<()> {
if plan.order_by.is_none() | plan.group_by.is_none() {
return Ok(());
@@ -185,36 +162,117 @@ fn eliminate_orderby_like_groupby(plan: &mut SelectPlan) -> Result<()> {
Ok(())
}
fn eliminate_unnecessary_orderby(plan: &mut SelectPlan, schema: &Schema) -> Result<()> {
if plan.order_by.is_none() {
fn eliminate_unnecessary_orderby(
table_references: &mut [TableReference],
available_indexes: &HashMap<String, Vec<Arc<Index>>>,
order_by: &mut Option<Vec<(ast::Expr, Direction)>>,
group_by: &Option<GroupBy>,
) -> Result<()> {
let Some(order) = order_by else {
return Ok(());
}
if plan.table_references.is_empty() {
};
let Some(first_table_reference) = table_references.first_mut() else {
return Ok(());
}
};
let Some(btree_table) = first_table_reference.btree() else {
return Ok(());
};
// If GROUP BY clause is present, we can't rely on already ordered columns because GROUP BY reorders the data
// This early return prevents the elimination of ORDER BY when GROUP BY exists, as sorting must be applied after grouping
// And if ORDER BY clause duplicates GROUP BY we handle it later in fn eliminate_orderby_like_groupby
if plan.group_by.is_some() {
if group_by.is_some() {
return Ok(());
}
let Operation::Scan {
index, iter_dir, ..
} = &mut first_table_reference.op
else {
return Ok(());
};
assert!(
index.is_none(),
"Nothing shouldve transformed the scan to use an index yet"
);
// Special case: if ordering by just the rowid, we can remove the ORDER BY clause
if order.len() == 1 && order[0].0.is_rowid_alias_of(0) {
*iter_dir = match order[0].1 {
Direction::Ascending => IterationDirection::Forwards,
Direction::Descending => IterationDirection::Backwards,
};
*order_by = None;
return Ok(());
}
let o = plan.order_by.as_mut().unwrap();
// Find the best matching index for the ORDER BY columns
let table_name = &btree_table.name;
let mut best_index = (None, 0);
if o.len() != 1 {
// TODO: handle multiple order by keys
return Ok(());
for (_, indexes) in available_indexes.iter() {
for index_candidate in indexes.iter().filter(|i| &i.table_name == table_name) {
let matching_columns = index_candidate.columns.iter().enumerate().take_while(|(i, c)| {
if let Some((Expr::Column { table, column, .. }, _)) = order.get(*i) {
let col_idx_in_table = btree_table
.columns
.iter()
.position(|tc| tc.name.as_ref() == Some(&c.name));
matches!(col_idx_in_table, Some(col_idx) if *table == 0 && *column == col_idx)
} else {
false
}
}).count();
if matching_columns > best_index.1 {
best_index = (Some(index_candidate), matching_columns);
}
}
}
let (key, direction) = o.first_mut().unwrap();
let Some(matching_index) = best_index.0 else {
return Ok(());
};
let match_count = best_index.1;
let already_ordered =
query_is_already_ordered_by(&plan.table_references, key, &schema.indexes)?;
// If we found a matching index, use it for scanning
*index = Some(matching_index.clone());
// If the order by direction matches the index direction, we can iterate the index in forwards order.
// If they don't, we must iterate the index in backwards order.
let index_direction = &matching_index.columns.first().as_ref().unwrap().order;
*iter_dir = match (index_direction, order[0].1) {
(SortOrder::Asc, Direction::Ascending) | (SortOrder::Desc, Direction::Descending) => {
IterationDirection::Forwards
}
(SortOrder::Asc, Direction::Descending) | (SortOrder::Desc, Direction::Ascending) => {
IterationDirection::Backwards
}
};
if already_ordered {
push_scan_direction(&mut plan.table_references[0], direction);
plan.order_by = None;
// If the index covers all ORDER BY columns, and one of the following applies:
// - the ORDER BY directions exactly match the index orderings,
// - the ORDER by directions are the exact opposite of the index orderings,
// we can remove the ORDER BY clause.
if match_count == order.len() {
let full_match = {
let mut all_match_forward = true;
let mut all_match_reverse = true;
for (i, (_, direction)) in order.iter().enumerate() {
match (&matching_index.columns[i].order, direction) {
(SortOrder::Asc, Direction::Ascending)
| (SortOrder::Desc, Direction::Descending) => {
all_match_reverse = false;
}
(SortOrder::Asc, Direction::Descending)
| (SortOrder::Desc, Direction::Ascending) => {
all_match_forward = false;
}
}
}
all_match_forward || all_match_reverse
};
if full_match {
*order_by = None;
}
}
Ok(())
@@ -222,24 +280,25 @@ fn eliminate_unnecessary_orderby(plan: &mut SelectPlan, schema: &Schema) -> Resu
/**
* Use indexes where possible.
* Right now we make decisions about using indexes ONLY based on condition expressions, not e.g. ORDER BY or others.
* This is just because we are WIP.
*
* When this function is called, condition expressions from both the actual WHERE clause and the JOIN clauses are in the where_clause vector.
* If we find a condition that can be used to index scan, we pop it off from the where_clause vector and put it into a Search operation.
* We put it there simply because it makes it a bit easier to track during translation.
*
* In this function we also try to eliminate ORDER BY clauses if there is an index that satisfies the ORDER BY clause.
*/
fn use_indexes(
table_references: &mut [TableReference],
available_indexes: &HashMap<String, Vec<Arc<Index>>>,
where_clause: &mut Vec<WhereTerm>,
order_by: &mut Option<Vec<(ast::Expr, Direction)>>,
group_by: &Option<GroupBy>,
) -> Result<()> {
if where_clause.is_empty() {
return Ok(());
}
// Try to use indexes for eliminating ORDER BY clauses
eliminate_unnecessary_orderby(table_references, available_indexes, order_by, group_by)?;
// Try to use indexes for WHERE conditions
'outer: for (table_index, table_reference) in table_references.iter_mut().enumerate() {
if let Operation::Scan { .. } = &mut table_reference.op {
if let Operation::Scan { iter_dir, .. } = &table_reference.op {
let mut i = 0;
while i < where_clause.len() {
let cond = where_clause.get_mut(i).unwrap();
@@ -248,6 +307,7 @@ fn use_indexes(
table_index,
table_reference,
available_indexes,
iter_dir.clone(),
)? {
where_clause.remove(i);
table_reference.op = Operation::Search(index_search);
@@ -296,20 +356,6 @@ fn eliminate_constant_conditions(
Ok(ConstantConditionEliminationResult::Continue)
}
fn push_scan_direction(table: &mut TableReference, direction: &Direction) {
if let Operation::Scan {
ref mut iter_dir, ..
} = table.op
{
if iter_dir.is_none() {
match direction {
Direction::Ascending => *iter_dir = Some(IterationDirection::Forwards),
Direction::Descending => *iter_dir = Some(IterationDirection::Backwards),
}
}
}
}
fn rewrite_exprs_select(plan: &mut SelectPlan) -> Result<()> {
for rc in plan.result_columns.iter_mut() {
rewrite_expr(&mut rc.expr)?;
@@ -611,6 +657,7 @@ pub fn try_extract_index_search_expression(
table_index: usize,
table_reference: &TableReference,
available_indexes: &HashMap<String, Vec<Arc<Index>>>,
iter_dir: IterationDirection,
) -> Result<Option<Search>> {
if !cond.should_eval_at_loop(table_index) {
return Ok(None);
@@ -641,6 +688,7 @@ pub fn try_extract_index_search_expression(
from_outer_join: cond.from_outer_join,
eval_at: cond.eval_at,
},
iter_dir,
}));
}
_ => {}
@@ -671,6 +719,7 @@ pub fn try_extract_index_search_expression(
from_outer_join: cond.from_outer_join,
eval_at: cond.eval_at,
},
iter_dir,
}));
}
_ => {}
@@ -695,6 +744,7 @@ pub fn try_extract_index_search_expression(
from_outer_join: cond.from_outer_join,
eval_at: cond.eval_at,
},
iter_dir,
}));
}
_ => {}
@@ -719,6 +769,7 @@ pub fn try_extract_index_search_expression(
from_outer_join: cond.from_outer_join,
eval_at: cond.eval_at,
},
iter_dir,
}));
}
_ => {}