diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 5321e0fa0..dd3455449 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -1,6 +1,6 @@ use std::{collections::HashMap, sync::Arc}; -use limbo_sqlite3_parser::ast; +use limbo_sqlite3_parser::ast::{self, Expr, SortOrder}; use crate::{ schema::{Index, Schema}, @@ -9,8 +9,8 @@ use crate::{ }; use super::plan::{ - DeletePlan, Direction, IterationDirection, Operation, Plan, Search, SelectPlan, TableReference, - UpdatePlan, WhereTerm, + DeletePlan, Direction, GroupBy, IterationDirection, Operation, Plan, Search, SelectPlan, + TableReference, UpdatePlan, WhereTerm, }; pub fn optimize_plan(plan: &mut Plan, schema: &Schema) -> Result<()> { @@ -40,10 +40,10 @@ fn optimize_select_plan(plan: &mut SelectPlan, schema: &Schema) -> Result<()> { &mut plan.table_references, &schema.indexes, &mut plan.where_clause, + &mut plan.order_by, + &plan.group_by, )?; - eliminate_unnecessary_orderby(plan, schema)?; - eliminate_orderby_like_groupby(plan)?; Ok(()) @@ -62,6 +62,8 @@ fn optimize_delete_plan(plan: &mut DeletePlan, schema: &Schema) -> Result<()> { &mut plan.table_references, &schema.indexes, &mut plan.where_clause, + &mut plan.order_by, + &None, )?; Ok(()) @@ -79,6 +81,8 @@ fn optimize_update_plan(plan: &mut UpdatePlan, schema: &Schema) -> Result<()> { &mut plan.table_references, &schema.indexes, &mut plan.where_clause, + &mut plan.order_by, + &None, )?; Ok(()) } @@ -93,33 +97,6 @@ fn optimize_subqueries(plan: &mut SelectPlan, schema: &Schema) -> Result<()> { Ok(()) } -fn query_is_already_ordered_by( - table_references: &[TableReference], - key: &mut ast::Expr, - available_indexes: &HashMap>>, -) -> Result { - let first_table = table_references.first(); - if first_table.is_none() { - return Ok(false); - } - let table_reference = first_table.unwrap(); - match &table_reference.op { - Operation::Scan { .. } => Ok(key.is_rowid_alias_of(0)), - Operation::Search(search) => match search { - Search::RowidEq { .. } => Ok(key.is_rowid_alias_of(0)), - Search::RowidSearch { .. } => Ok(key.is_rowid_alias_of(0)), - Search::IndexSearch { index, .. } => { - let index_rc = key.check_index_scan(0, table_reference, available_indexes)?; - let index_is_the_same = index_rc - .map(|irc| Arc::ptr_eq(index, &irc)) - .unwrap_or(false); - Ok(index_is_the_same) - } - }, - _ => Ok(false), - } -} - fn eliminate_orderby_like_groupby(plan: &mut SelectPlan) -> Result<()> { if plan.order_by.is_none() | plan.group_by.is_none() { return Ok(()); @@ -185,36 +162,117 @@ fn eliminate_orderby_like_groupby(plan: &mut SelectPlan) -> Result<()> { Ok(()) } -fn eliminate_unnecessary_orderby(plan: &mut SelectPlan, schema: &Schema) -> Result<()> { - if plan.order_by.is_none() { +fn eliminate_unnecessary_orderby( + table_references: &mut [TableReference], + available_indexes: &HashMap>>, + order_by: &mut Option>, + group_by: &Option, +) -> Result<()> { + let Some(order) = order_by else { return Ok(()); - } - if plan.table_references.is_empty() { + }; + let Some(first_table_reference) = table_references.first_mut() else { return Ok(()); - } - + }; + let Some(btree_table) = first_table_reference.btree() else { + return Ok(()); + }; // If GROUP BY clause is present, we can't rely on already ordered columns because GROUP BY reorders the data // This early return prevents the elimination of ORDER BY when GROUP BY exists, as sorting must be applied after grouping // And if ORDER BY clause duplicates GROUP BY we handle it later in fn eliminate_orderby_like_groupby - if plan.group_by.is_some() { + if group_by.is_some() { + return Ok(()); + } + let Operation::Scan { + index, iter_dir, .. + } = &mut first_table_reference.op + else { + return Ok(()); + }; + + assert!( + index.is_none(), + "Nothing shouldve transformed the scan to use an index yet" + ); + + // Special case: if ordering by just the rowid, we can remove the ORDER BY clause + if order.len() == 1 && order[0].0.is_rowid_alias_of(0) { + *iter_dir = match order[0].1 { + Direction::Ascending => IterationDirection::Forwards, + Direction::Descending => IterationDirection::Backwards, + }; + *order_by = None; return Ok(()); } - let o = plan.order_by.as_mut().unwrap(); + // Find the best matching index for the ORDER BY columns + let table_name = &btree_table.name; + let mut best_index = (None, 0); - if o.len() != 1 { - // TODO: handle multiple order by keys - return Ok(()); + for (_, indexes) in available_indexes.iter() { + for index_candidate in indexes.iter().filter(|i| &i.table_name == table_name) { + let matching_columns = index_candidate.columns.iter().enumerate().take_while(|(i, c)| { + if let Some((Expr::Column { table, column, .. }, _)) = order.get(*i) { + let col_idx_in_table = btree_table + .columns + .iter() + .position(|tc| tc.name.as_ref() == Some(&c.name)); + matches!(col_idx_in_table, Some(col_idx) if *table == 0 && *column == col_idx) + } else { + false + } + }).count(); + + if matching_columns > best_index.1 { + best_index = (Some(index_candidate), matching_columns); + } + } } - let (key, direction) = o.first_mut().unwrap(); + let Some(matching_index) = best_index.0 else { + return Ok(()); + }; + let match_count = best_index.1; - let already_ordered = - query_is_already_ordered_by(&plan.table_references, key, &schema.indexes)?; + // If we found a matching index, use it for scanning + *index = Some(matching_index.clone()); + // If the order by direction matches the index direction, we can iterate the index in forwards order. + // If they don't, we must iterate the index in backwards order. + let index_direction = &matching_index.columns.first().as_ref().unwrap().order; + *iter_dir = match (index_direction, order[0].1) { + (SortOrder::Asc, Direction::Ascending) | (SortOrder::Desc, Direction::Descending) => { + IterationDirection::Forwards + } + (SortOrder::Asc, Direction::Descending) | (SortOrder::Desc, Direction::Ascending) => { + IterationDirection::Backwards + } + }; - if already_ordered { - push_scan_direction(&mut plan.table_references[0], direction); - plan.order_by = None; + // If the index covers all ORDER BY columns, and one of the following applies: + // - the ORDER BY directions exactly match the index orderings, + // - the ORDER by directions are the exact opposite of the index orderings, + // we can remove the ORDER BY clause. + if match_count == order.len() { + let full_match = { + let mut all_match_forward = true; + let mut all_match_reverse = true; + for (i, (_, direction)) in order.iter().enumerate() { + match (&matching_index.columns[i].order, direction) { + (SortOrder::Asc, Direction::Ascending) + | (SortOrder::Desc, Direction::Descending) => { + all_match_reverse = false; + } + (SortOrder::Asc, Direction::Descending) + | (SortOrder::Desc, Direction::Ascending) => { + all_match_forward = false; + } + } + } + all_match_forward || all_match_reverse + }; + if full_match { + *order_by = None; + } } Ok(()) @@ -222,24 +280,25 @@ fn eliminate_unnecessary_orderby(plan: &mut SelectPlan, schema: &Schema) -> Resu /** * Use indexes where possible. - * Right now we make decisions about using indexes ONLY based on condition expressions, not e.g. ORDER BY or others. - * This is just because we are WIP. * * When this function is called, condition expressions from both the actual WHERE clause and the JOIN clauses are in the where_clause vector. * If we find a condition that can be used to index scan, we pop it off from the where_clause vector and put it into a Search operation. * We put it there simply because it makes it a bit easier to track during translation. + * + * In this function we also try to eliminate ORDER BY clauses if there is an index that satisfies the ORDER BY clause. */ fn use_indexes( table_references: &mut [TableReference], available_indexes: &HashMap>>, where_clause: &mut Vec, + order_by: &mut Option>, + group_by: &Option, ) -> Result<()> { - if where_clause.is_empty() { - return Ok(()); - } - + // Try to use indexes for eliminating ORDER BY clauses + eliminate_unnecessary_orderby(table_references, available_indexes, order_by, group_by)?; + // Try to use indexes for WHERE conditions 'outer: for (table_index, table_reference) in table_references.iter_mut().enumerate() { - if let Operation::Scan { .. } = &mut table_reference.op { + if let Operation::Scan { iter_dir, .. } = &table_reference.op { let mut i = 0; while i < where_clause.len() { let cond = where_clause.get_mut(i).unwrap(); @@ -248,6 +307,7 @@ fn use_indexes( table_index, table_reference, available_indexes, + iter_dir.clone(), )? { where_clause.remove(i); table_reference.op = Operation::Search(index_search); @@ -296,20 +356,6 @@ fn eliminate_constant_conditions( Ok(ConstantConditionEliminationResult::Continue) } -fn push_scan_direction(table: &mut TableReference, direction: &Direction) { - if let Operation::Scan { - ref mut iter_dir, .. - } = table.op - { - if iter_dir.is_none() { - match direction { - Direction::Ascending => *iter_dir = Some(IterationDirection::Forwards), - Direction::Descending => *iter_dir = Some(IterationDirection::Backwards), - } - } - } -} - fn rewrite_exprs_select(plan: &mut SelectPlan) -> Result<()> { for rc in plan.result_columns.iter_mut() { rewrite_expr(&mut rc.expr)?; @@ -611,6 +657,7 @@ pub fn try_extract_index_search_expression( table_index: usize, table_reference: &TableReference, available_indexes: &HashMap>>, + iter_dir: IterationDirection, ) -> Result> { if !cond.should_eval_at_loop(table_index) { return Ok(None); @@ -641,6 +688,7 @@ pub fn try_extract_index_search_expression( from_outer_join: cond.from_outer_join, eval_at: cond.eval_at, }, + iter_dir, })); } _ => {} @@ -671,6 +719,7 @@ pub fn try_extract_index_search_expression( from_outer_join: cond.from_outer_join, eval_at: cond.eval_at, }, + iter_dir, })); } _ => {} @@ -695,6 +744,7 @@ pub fn try_extract_index_search_expression( from_outer_join: cond.from_outer_join, eval_at: cond.eval_at, }, + iter_dir, })); } _ => {} @@ -719,6 +769,7 @@ pub fn try_extract_index_search_expression( from_outer_join: cond.from_outer_join, eval_at: cond.eval_at, }, + iter_dir, })); } _ => {}