diff --git a/core/translate/group_by.rs b/core/translate/group_by.rs index 6eeee3fa3..70b33dee1 100644 --- a/core/translate/group_by.rs +++ b/core/translate/group_by.rs @@ -1,11 +1,10 @@ use std::rc::Rc; -use limbo_sqlite3_parser::ast; +use limbo_sqlite3_parser::ast::{self, SortOrder}; use crate::{ function::AggFunc, schema::{Column, PseudoTable}, - types::{OwnedValue, Record}, util::exprs_are_equivalent, vdbe::{ builder::{CursorType, ProgramBuilder}, @@ -74,15 +73,10 @@ pub fn init_group_by( let label_subrtn_acc_clear = program.allocate_label(); - let mut order = Vec::new(); - const ASCENDING: i64 = 0; - for _ in group_by.exprs.iter() { - order.push(OwnedValue::Integer(ASCENDING)); - } program.emit_insn(Insn::SorterOpen { cursor_id: sort_cursor, columns: non_aggregate_count + plan.aggregates.len(), - order: Record::new(order), + order: (0..group_by.exprs.len()).map(|_| SortOrder::Asc).collect(), }); program.add_comment(program.offset(), "clear group by abort flag"); diff --git a/core/translate/index.rs b/core/translate/index.rs index b01fb2921..063344bd8 100644 --- a/core/translate/index.rs +++ b/core/translate/index.rs @@ -3,13 +3,11 @@ use std::sync::Arc; use crate::{ schema::{BTreeTable, Column, Index, IndexColumn, PseudoTable, Schema}, storage::pager::CreateBTreeFlags, - types::Record, util::normalize_ident, vdbe::{ builder::{CursorType, ProgramBuilder, QueryMode}, insn::{IdxInsertFlags, Insn, RegisterOrLiteral}, }, - OwnedValue, }; use limbo_sqlite3_parser::ast::{self, Expr, Id, SortOrder, SortedColumn}; @@ -114,21 +112,12 @@ pub fn translate_create_index( ); // determine the order of the columns in the index for the sorter - let order = idx - .columns - .iter() - .map(|c| { - OwnedValue::Integer(match c.order { - SortOrder::Asc => 0, - SortOrder::Desc => 1, - }) - }) - .collect(); + let order = idx.columns.iter().map(|c| c.order.clone()).collect(); // open the sorter and the pseudo table program.emit_insn(Insn::SorterOpen { cursor_id: sorter_cursor_id, columns: columns.len(), - order: Record::new(order), + order, }); let content_reg = program.alloc_register(); program.emit_insn(Insn::OpenPseudo { diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index efb2015f0..8520f3e9d 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -13,8 +13,8 @@ use crate::{ use super::{ emitter::Resolver, plan::{ - DeletePlan, Direction, EvalAt, GroupBy, IterationDirection, Operation, Plan, Search, - SeekDef, SeekKey, SelectPlan, TableReference, UpdatePlan, WhereTerm, + DeletePlan, EvalAt, GroupBy, IterationDirection, Operation, Plan, Search, SeekDef, SeekKey, + SelectPlan, TableReference, UpdatePlan, WhereTerm, }, planner::determine_where_to_eval_expr, }; @@ -112,59 +112,36 @@ fn eliminate_orderby_like_groupby(plan: &mut SelectPlan) -> Result<()> { } let order_by_clauses = plan.order_by.as_mut().unwrap(); + // TODO: let's make the group by sorter aware of the order by directions so we dont need to skip + // descending terms. + if order_by_clauses + .iter() + .any(|(_, dir)| matches!(dir, SortOrder::Desc)) + { + return Ok(()); + } let group_by_clauses = plan.group_by.as_mut().unwrap(); - - let mut group_by_insert_position = 0; - let mut order_index = 0; - - // This function optimizes query execution by eliminating duplicate expressions between ORDER BY and GROUP BY clauses - // When the same column appears in both clauses, we can avoid redundant sorting operations - // The function reorders GROUP BY expressions and removes redundant ORDER BY expressions to ensure consistent ordering - while order_index < order_by_clauses.len() { - let (order_expr, direction) = &order_by_clauses[order_index]; - - // Skip descending orders as they require separate sorting - if matches!(direction, Direction::Descending) { - order_index += 1; - continue; - } - - // Check if the current ORDER BY expression matches any expression in the GROUP BY clause - if let Some(group_expr_position) = group_by_clauses + // all order by terms must be in the group by clause for order by to be eliminated + if !order_by_clauses.iter().all(|(o_expr, _)| { + group_by_clauses .exprs .iter() - .position(|expr| exprs_are_equivalent(expr, order_expr)) - { - // If we found a matching expression in GROUP BY, we need to ensure it's in the correct position - // to preserve the ordering specified by ORDER BY clauses - - // Move the matching GROUP BY expression to the current insertion position - // This effectively "bubbles up" the expression to maintain proper ordering - if group_expr_position != group_by_insert_position { - let mut current_position = group_expr_position; - - // Swap expressions to move the matching one to the correct position - while current_position > group_by_insert_position { - group_by_clauses - .exprs - .swap(current_position, current_position - 1); - current_position -= 1; - } - } - - group_by_insert_position += 1; - - // Remove this expression from ORDER BY since it's now handled by GROUP BY - order_by_clauses.remove(order_index); - // Note: We don't increment order_index here because removal shifts all elements - } else { - // If not found in GROUP BY, move to next ORDER BY expression - order_index += 1; - } - } - if order_by_clauses.is_empty() { - plan.order_by = None + .any(|g_expr| exprs_are_equivalent(g_expr, o_expr)) + }) { + return Ok(()); } + + // reorder group by terms so that they match the order by terms + // this way the group by sorter will effectively do the order by sorter's job and + // we can remove the order by clause + group_by_clauses.exprs.sort_by_key(|g_expr| { + order_by_clauses + .iter() + .position(|(o_expr, _)| exprs_are_equivalent(o_expr, g_expr)) + .unwrap_or(usize::MAX) + }); + + plan.order_by = None; Ok(()) } @@ -173,7 +150,7 @@ fn eliminate_orderby_like_groupby(plan: &mut SelectPlan) -> Result<()> { fn eliminate_unnecessary_orderby( table_references: &mut [TableReference], available_indexes: &HashMap>>, - order_by: &mut Option>, + order_by: &mut Option>, group_by: &Option, ) -> Result { let Some(order) = order_by else { @@ -206,8 +183,8 @@ fn eliminate_unnecessary_orderby( // Special case: if ordering by just the rowid, we can remove the ORDER BY clause if order.len() == 1 && order[0].0.is_rowid_alias_of(0) { *iter_dir = match order[0].1 { - Direction::Ascending => IterationDirection::Forwards, - Direction::Descending => IterationDirection::Backwards, + SortOrder::Asc => IterationDirection::Forwards, + SortOrder::Desc => IterationDirection::Backwards, }; *order_by = None; return Ok(true); @@ -248,10 +225,10 @@ fn eliminate_unnecessary_orderby( // If they don't, we must iterate the index in backwards order. let index_direction = &matching_index.columns.first().as_ref().unwrap().order; *iter_dir = match (index_direction, order[0].1) { - (SortOrder::Asc, Direction::Ascending) | (SortOrder::Desc, Direction::Descending) => { + (SortOrder::Asc, SortOrder::Asc) | (SortOrder::Desc, SortOrder::Desc) => { IterationDirection::Forwards } - (SortOrder::Asc, Direction::Descending) | (SortOrder::Desc, Direction::Ascending) => { + (SortOrder::Asc, SortOrder::Desc) | (SortOrder::Desc, SortOrder::Asc) => { IterationDirection::Backwards } }; @@ -266,12 +243,10 @@ fn eliminate_unnecessary_orderby( let mut all_match_reverse = true; for (i, (_, direction)) in order.iter().enumerate() { match (&matching_index.columns[i].order, direction) { - (SortOrder::Asc, Direction::Ascending) - | (SortOrder::Desc, Direction::Descending) => { + (SortOrder::Asc, SortOrder::Asc) | (SortOrder::Desc, SortOrder::Desc) => { all_match_reverse = false; } - (SortOrder::Asc, Direction::Descending) - | (SortOrder::Desc, Direction::Ascending) => { + (SortOrder::Asc, SortOrder::Desc) | (SortOrder::Desc, SortOrder::Asc) => { all_match_forward = false; } } @@ -299,7 +274,7 @@ fn use_indexes( table_references: &mut [TableReference], available_indexes: &HashMap>>, where_clause: &mut Vec, - order_by: &mut Option>, + order_by: &mut Option>, group_by: &Option, ) -> Result<()> { // Try to use indexes for eliminating ORDER BY clauses diff --git a/core/translate/order_by.rs b/core/translate/order_by.rs index ce9cbc4ac..0a43e6683 100644 --- a/core/translate/order_by.rs +++ b/core/translate/order_by.rs @@ -1,10 +1,9 @@ use std::rc::Rc; -use limbo_sqlite3_parser::ast; +use limbo_sqlite3_parser::ast::{self, SortOrder}; use crate::{ schema::{Column, PseudoTable}, - types::{OwnedValue, Record}, util::exprs_are_equivalent, vdbe::{ builder::{CursorType, ProgramBuilder}, @@ -16,7 +15,7 @@ use crate::{ use super::{ emitter::TranslateCtx, expr::translate_expr, - plan::{Direction, ResultSetColumn, SelectPlan}, + plan::{ResultSetColumn, SelectPlan}, result_row::{emit_offset, emit_result_row_and_limit}, }; @@ -33,21 +32,17 @@ pub struct SortMetadata { pub fn init_order_by( program: &mut ProgramBuilder, t_ctx: &mut TranslateCtx, - order_by: &[(ast::Expr, Direction)], + order_by: &[(ast::Expr, SortOrder)], ) -> Result<()> { let sort_cursor = program.alloc_cursor_id(None, CursorType::Sorter); t_ctx.meta_sort = Some(SortMetadata { sort_cursor, reg_sorter_data: program.alloc_register(), }); - let mut order = Vec::new(); - for (_, direction) in order_by.iter() { - order.push(OwnedValue::Integer(*direction as i64)); - } program.emit_insn(Insn::SorterOpen { cursor_id: sort_cursor, columns: order_by.len(), - order: Record::new(order), + order: order_by.iter().map(|(_, direction)| *direction).collect(), }); Ok(()) } @@ -257,7 +252,7 @@ pub fn sorter_insert( /// /// If any result columns can be skipped, this returns list of 2-tuples of (SkippedResultColumnIndex: usize, ResultColumnIndexInOrderBySorter: usize) pub fn order_by_deduplicate_result_columns( - order_by: &[(ast::Expr, Direction)], + order_by: &[(ast::Expr, SortOrder)], result_columns: &[ResultSetColumn], ) -> Option> { let mut result_column_remapping: Option> = None; diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 2baf59d32..079e99908 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -267,7 +267,7 @@ pub struct SelectPlan { /// group by clause pub group_by: Option, /// order by clause - pub order_by: Option>, + pub order_by: Option>, /// all the aggregates collected from the result columns, order by, and (TODO) having clauses pub aggregates: Vec, /// limit clause @@ -290,7 +290,7 @@ pub struct DeletePlan { /// where clause split into a vec at 'AND' boundaries. pub where_clause: Vec, /// order by clause - pub order_by: Option>, + pub order_by: Option>, /// limit clause pub limit: Option, /// offset clause @@ -308,7 +308,7 @@ pub struct UpdatePlan { // (colum index, new value) pairs pub set_clauses: Vec<(usize, ast::Expr)>, pub where_clause: Vec, - pub order_by: Option>, + pub order_by: Option>, pub limit: Option, pub offset: Option, // TODO: optional RETURNING clause @@ -681,21 +681,6 @@ pub enum Search { }, } -#[derive(Clone, Copy, Debug, PartialEq)] -pub enum Direction { - Ascending, - Descending, -} - -impl Display for Direction { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - match self { - Direction::Ascending => write!(f, "ASC"), - Direction::Descending => write!(f, "DESC"), - } - } -} - #[derive(Clone, Debug, PartialEq)] pub struct Aggregate { pub func: AggFunc, @@ -873,7 +858,16 @@ impl fmt::Display for UpdatePlan { if let Some(order_by) = &self.order_by { writeln!(f, "ORDER BY:")?; for (expr, dir) in order_by { - writeln!(f, " - {} {}", expr, dir)?; + writeln!( + f, + " - {} {}", + expr, + if *dir == SortOrder::Asc { + "ASC" + } else { + "DESC" + } + )?; } } if let Some(limit) = self.limit { diff --git a/core/translate/select.rs b/core/translate/select.rs index 3972bdc85..b1eb613bb 100644 --- a/core/translate/select.rs +++ b/core/translate/select.rs @@ -3,7 +3,7 @@ use super::plan::{select_star, Operation, Search, SelectQueryType}; use super::planner::Scope; use crate::function::{AggFunc, ExtFunc, Func}; use crate::translate::optimizer::optimize_plan; -use crate::translate::plan::{Aggregate, Direction, GroupBy, Plan, ResultSetColumn, SelectPlan}; +use crate::translate::plan::{Aggregate, GroupBy, Plan, ResultSetColumn, SelectPlan}; use crate::translate::planner::{ bind_column_references, break_predicate_at_and_boundaries, parse_from, parse_limit, parse_where, resolve_aggregates, @@ -368,13 +368,7 @@ pub fn prepare_select_plan<'a>( )?; resolve_aggregates(&o.expr, &mut plan.aggregates); - key.push(( - o.expr, - o.order.map_or(Direction::Ascending, |o| match o { - ast::SortOrder::Asc => Direction::Ascending, - ast::SortOrder::Desc => Direction::Descending, - }), - )); + key.push((o.expr, o.order.unwrap_or(ast::SortOrder::Asc))); } plan.order_by = Some(key); } diff --git a/core/translate/update.rs b/core/translate/update.rs index 3e36583dc..308a0cec1 100644 --- a/core/translate/update.rs +++ b/core/translate/update.rs @@ -11,8 +11,7 @@ use limbo_sqlite3_parser::ast::{self, Expr, ResultColumn, SortOrder, Update}; use super::emitter::emit_program; use super::optimizer::optimize_plan; use super::plan::{ - ColumnUsedMask, Direction, IterationDirection, Plan, ResultSetColumn, TableReference, - UpdatePlan, + ColumnUsedMask, IterationDirection, Plan, ResultSetColumn, TableReference, UpdatePlan, }; use super::planner::bind_column_references; use super::planner::{parse_limit, parse_where}; @@ -155,17 +154,7 @@ pub fn prepare_update_plan(schema: &Schema, body: &mut Update) -> crate::Result< let order_by = body.order_by.as_ref().map(|order| { order .iter() - .map(|o| { - ( - o.expr.clone(), - o.order - .map(|s| match s { - SortOrder::Asc => Direction::Ascending, - SortOrder::Desc => Direction::Descending, - }) - .unwrap_or(Direction::Ascending), - ) - }) + .map(|o| (o.expr.clone(), o.order.unwrap_or(SortOrder::Asc))) .collect() }); // Parse the WHERE clause diff --git a/core/types.rs b/core/types.rs index 6ed66cc0c..3d2faa4f8 100644 --- a/core/types.rs +++ b/core/types.rs @@ -1079,6 +1079,14 @@ impl IndexKeySortOrder { IndexKeySortOrder(spec) } + pub fn from_list(order: &[SortOrder]) -> Self { + let mut spec = 0; + for (i, order) in order.iter().enumerate() { + spec |= ((*order == SortOrder::Desc) as u64) << i; + } + IndexKeySortOrder(spec) + } + pub fn default() -> Self { Self(0) } diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 948f0a5b8..6aba7eed4 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -2698,14 +2698,6 @@ pub fn op_sorter_open( else { unreachable!("unexpected Insn {:?}", insn) }; - let order = order - .get_values() - .iter() - .map(|v| match v { - OwnedValue::Integer(i) => *i == 0, - _ => unreachable!(), - }) - .collect(); let cursor = Sorter::new(order); let mut cursors = state.cursors.borrow_mut(); cursors diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index fbdeaa6c2..3df247cd0 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -1,3 +1,5 @@ +use limbo_sqlite3_parser::ast::SortOrder; + use crate::vdbe::{builder::CursorType, insn::RegisterOrLiteral}; use super::{Insn, InsnReference, OwnedValue, Program}; @@ -876,17 +878,10 @@ pub fn insn_to_str( } => { let _p4 = String::new(); let to_print: Vec = order - .get_values() .iter() .map(|v| match v { - OwnedValue::Integer(i) => { - if *i == 0 { - "B".to_string() - } else { - "-B".to_string() - } - } - _ => unreachable!(), + SortOrder::Asc => "B".to_string(), + SortOrder::Desc => "-B".to_string(), }) .collect(); ( diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index fc64f59d1..34a9f680e 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -7,9 +7,9 @@ use super::{execute, AggFunc, BranchOffset, CursorID, FuncCtx, InsnFunction, Pag use crate::{ schema::BTreeTable, storage::{pager::CreateBTreeFlags, wal::CheckpointMode}, - types::Record, }; use limbo_macros::Description; +use limbo_sqlite3_parser::ast::SortOrder; /// Flags provided to comparison instructions (e.g. Eq, Ne) which determine behavior related to NULL values. #[derive(Clone, Copy, Debug, Default)] @@ -586,9 +586,9 @@ pub enum Insn { /// Open a sorter. SorterOpen { - cursor_id: CursorID, // P1 - columns: usize, // P2 - order: Record, // P4. 0 if ASC and 1 if DESC + cursor_id: CursorID, // P1 + columns: usize, // P2 + order: Vec, // P4. }, /// Insert a row into the sorter. diff --git a/core/vdbe/sorter.rs b/core/vdbe/sorter.rs index 584a29271..d758f91f5 100644 --- a/core/vdbe/sorter.rs +++ b/core/vdbe/sorter.rs @@ -1,18 +1,21 @@ -use crate::types::ImmutableRecord; -use std::cmp::Ordering; +use limbo_sqlite3_parser::ast::SortOrder; + +use crate::types::{compare_immutable, ImmutableRecord, IndexKeySortOrder}; pub struct Sorter { records: Vec, current: Option, - order: Vec, + order: IndexKeySortOrder, + key_len: usize, } impl Sorter { - pub fn new(order: Vec) -> Self { + pub fn new(order: &[SortOrder]) -> Self { Self { records: Vec::new(), current: None, - order, + key_len: order.len(), + order: IndexKeySortOrder::from_list(order), } } pub fn is_empty(&self) -> bool { @@ -26,24 +29,11 @@ impl Sorter { // We do the sorting here since this is what is called by the SorterSort instruction pub fn sort(&mut self) { self.records.sort_by(|a, b| { - let cmp_by_idx = |idx: usize, ascending: bool| { - let a = &a.get_value(idx); - let b = &b.get_value(idx); - if ascending { - a.cmp(b) - } else { - b.cmp(a) - } - }; - - let mut cmp_ret = Ordering::Equal; - for (idx, &is_asc) in self.order.iter().enumerate() { - cmp_ret = cmp_by_idx(idx, is_asc); - if cmp_ret != Ordering::Equal { - break; - } - } - cmp_ret + compare_immutable( + &a.values[..self.key_len], + &b.values[..self.key_len], + self.order, + ) }); self.records.reverse(); self.next() diff --git a/testing/groupby.test b/testing/groupby.test index 9fd6e51bf..9fce2e83e 100644 --- a/testing/groupby.test +++ b/testing/groupby.test @@ -185,3 +185,10 @@ William|111} do_execsql_test group_by_column_number { select u.first_name, count(1) from users u group by 1 limit 1; } {Aaron|41} + +# There was a regression where we incorrectly removed SOME order by terms and left others in place, which is invalid and results in wrong rows being returned. +do_execsql_test groupby_orderby_removal_regression_test { + select id, last_name, count(1) from users GROUP BY 1,2 order by id, last_name desc limit 3; +} {1|Foster|1 +2|Salazar|1 +3|Perry|1}