use std::cell::RefCell; use turso_sqlite3_parser::ast::{self, SortOrder, TableInternalId}; use crate::{ translate::optimizer::access_method::AccessMethodParams, translate::plan::{GroupBy, IterationDirection, JoinedTable}, util::exprs_are_equivalent, }; use super::{access_method::AccessMethod, join::JoinN}; #[derive(Debug, PartialEq, Clone)] /// A convenience struct for representing a (table_no, column_no, [SortOrder]) tuple. pub struct ColumnOrder { pub table_id: TableInternalId, pub column_no: usize, pub order: SortOrder, } #[derive(Debug, PartialEq, Clone)] /// If an [OrderTarget] is satisfied, then [EliminatesSort] describes which part of the query no longer requires sorting. pub enum EliminatesSortBy { Group, Order, GroupByAndOrder, } #[derive(Debug, PartialEq, Clone)] /// An [OrderTarget] is considered in join optimization and index selection, /// so that if a given join ordering and its access methods satisfy the [OrderTarget], /// then the join ordering and its access methods are preferred, all other things being equal. pub struct OrderTarget(pub Vec, pub EliminatesSortBy); impl OrderTarget { fn maybe_from_iterator<'a>( list: impl Iterator + Clone, eliminates_sort: EliminatesSortBy, ) -> Option { if list.clone().count() == 0 { return None; } if list .clone() .any(|(expr, _)| !matches!(expr, ast::Expr::Column { .. })) { return None; } Some(OrderTarget( list.map(|(expr, order)| { let ast::Expr::Column { table, column, .. } = expr else { unreachable!(); }; ColumnOrder { table_id: *table, column_no: *column, order, } }) .collect(), eliminates_sort, )) } } /// Compute an [OrderTarget] for the join optimizer to use. /// Ideally, a join order is both efficient in joining the tables /// but also returns the results in an order that minimizes the amount of /// sorting that needs to be done later (either in GROUP BY, ORDER BY, or both). /// /// TODO: this does not currently handle the case where we definitely cannot eliminate /// the ORDER BY sorter, but we could still eliminate the GROUP BY sorter. pub fn compute_order_target( order_by_opt: &mut Option>, group_by_opt: Option<&mut GroupBy>, ) -> Option { match (&order_by_opt, group_by_opt) { // No ordering demands - we don't care what order the joined result rows are in (None, None) => None, // Only ORDER BY - we would like the joined result rows to be in the order specified by the ORDER BY (Some(order_by), None) => OrderTarget::maybe_from_iterator( order_by.iter().map(|(expr, order)| (expr, *order)), EliminatesSortBy::Order, ), // Only GROUP BY - we would like the joined result rows to be in the order specified by the GROUP BY (None, Some(group_by)) => OrderTarget::maybe_from_iterator( group_by.exprs.iter().map(|expr| (expr, SortOrder::Asc)), EliminatesSortBy::Group, ), // Both ORDER BY and GROUP BY: // If the GROUP BY does not contain all the expressions in the ORDER BY, // then we must separately sort the result rows for ORDER BY anyway. // However, in that case we can use the GROUP BY expressions as the target order for the join, // so that we don't have to sort twice. // // If the GROUP BY contains all the expressions in the ORDER BY, // then we again can use the GROUP BY expressions as the target order for the join; // however in this case we must take the ASC/DESC from ORDER BY into account. (Some(order_by), Some(group_by)) => { // Does the group by contain all expressions in the order by? let group_by_contains_all = order_by.iter().all(|(expr, _)| { group_by .exprs .iter() .any(|group_by_expr| exprs_are_equivalent(expr, group_by_expr)) }); // If not, let's try to target an ordering that matches the group by -- we don't care about ASC/DESC if !group_by_contains_all { return OrderTarget::maybe_from_iterator( group_by.exprs.iter().map(|expr| (expr, SortOrder::Asc)), EliminatesSortBy::Group, ); } // If yes, let's try to target an ordering that matches the GROUP BY columns, // but the ORDER BY orderings. First, we need to reorder the GROUP BY columns to match the ORDER BY columns. group_by.exprs.sort_by_key(|expr| { order_by .iter() .position(|(order_by_expr, _)| exprs_are_equivalent(expr, order_by_expr)) .map_or(usize::MAX, |i| i) }); // Now, regardless of whether we can eventually eliminate the sorting entirely in the optimizer, // we know that we don't need ORDER BY sorting anyway, because the GROUP BY will sort the result since // it contains all the necessary columns required for the ORDER BY, and the GROUP BY columns are now in the correct order. // First, however, we need to make sure the GROUP BY sorter's column sort directions match the ORDER BY requirements. assert!(group_by.exprs.len() >= order_by.len()); for (i, (_, order_by_dir)) in order_by.iter().enumerate() { group_by .sort_order .as_mut() .expect("GROUP BY should have a sort order before optimization is run")[i] = *order_by_dir; } // Now we can remove the ORDER BY from the query. order_by_opt.take(); OrderTarget::maybe_from_iterator( group_by .exprs .iter() .zip( group_by .sort_order .as_ref() .expect("GROUP BY should have a sort order before optimization is run") .iter(), ) .map(|(expr, dir)| (expr, *dir)), EliminatesSortBy::GroupByAndOrder, ) } } } /// Check if the plan's row iteration order matches the [OrderTarget]'s column order. /// If yes, and this plan is selected, then a sort operation can be eliminated. pub fn plan_satisfies_order_target( plan: &JoinN, access_methods_arena: &RefCell>, joined_tables: &[JoinedTable], order_target: &OrderTarget, ) -> bool { let mut target_col_idx = 0; let num_cols_in_order_target = order_target.0.len(); for (table_index, access_method_index) in plan.data.iter() { let target_col = &order_target.0[target_col_idx]; let table_ref = &joined_tables[*table_index]; let correct_table = target_col.table_id == table_ref.internal_id; if !correct_table { return false; } // Check if this table has an access method that provides the right ordering. let access_method = &access_methods_arena.borrow()[*access_method_index]; match &access_method.params { AccessMethodParams::BTreeTable { iter_dir, index, .. } => { match index { None => { // No index, so the next required column must be the rowid alias column. let rowid_alias_col = table_ref .table .columns() .iter() .position(|c| c.is_rowid_alias); let Some(rowid_alias_col) = rowid_alias_col else { return false; }; let correct_column = target_col.column_no == rowid_alias_col; if !correct_column { return false; } // Btree table rows are always in ascending order of rowid. let correct_order = if *iter_dir == IterationDirection::Forwards { target_col.order == SortOrder::Asc } else { target_col.order == SortOrder::Desc }; if !correct_order { return false; } target_col_idx += 1; // All order columns matched. if target_col_idx == num_cols_in_order_target { return true; } } Some(index) => { // All of the index columns must match the next required columns in the order target. for index_col in index.columns.iter() { let target_col = &order_target.0[target_col_idx]; let correct_column = target_col.column_no == index_col.pos_in_table; if !correct_column { return false; } let correct_order = if *iter_dir == IterationDirection::Forwards { target_col.order == index_col.order } else { target_col.order != index_col.order }; if !correct_order { return false; } target_col_idx += 1; // All order columns matched. if target_col_idx == num_cols_in_order_target { return true; } } } } } AccessMethodParams::VirtualTable { .. } => return false, AccessMethodParams::Subquery => return false, } } false }