diff --git a/core/translate/group_by.rs b/core/translate/group_by.rs index 449185872..9e22c9a16 100644 --- a/core/translate/group_by.rs +++ b/core/translate/group_by.rs @@ -490,12 +490,25 @@ pub fn group_by_process_single_group( GroupByRowSource::MainLoop { start_reg_src, .. } => *start_reg_src, }; + let mut compare_key_info = group_by + .exprs + .iter() + .map(|_| KeyInfo { + sort_order: SortOrder::Asc, + collation: CollationSeq::default(), + }) + .collect::>(); + for i in 0..group_by.exprs.len() { + let maybe_collation = get_collseq_from_expr(&group_by.exprs[i], &plan.table_references)?; + compare_key_info[i].collation = maybe_collation.unwrap_or_default(); + } + // Compare the group by columns to the previous group by columns to see if we are at a new group or not program.emit_insn(Insn::Compare { start_reg_a: registers.reg_group_exprs_cmp, start_reg_b: groups_start_reg, count: group_by.exprs.len(), - collation: program.curr_collation(), + key_info: compare_key_info, }); program.add_comment( diff --git a/core/translate/window.rs b/core/translate/window.rs index 3ca52778a..8467b1030 100644 --- a/core/translate/window.rs +++ b/core/translate/window.rs @@ -1,5 +1,6 @@ use crate::schema::{BTreeTable, Table}; use crate::translate::aggregation::{translate_aggregation_step, AggArgumentSource}; +use crate::translate::collate::{get_collseq_from_expr, CollationSeq}; use crate::translate::emitter::{Resolver, TranslateCtx}; use crate::translate::expr::{walk_expr, walk_expr_mut, WalkControl}; use crate::translate::order_by::order_by_sorter_insert; @@ -9,10 +10,12 @@ use crate::translate::plan::{ }; use crate::translate::planner::resolve_window_and_aggregate_functions; use crate::translate::result_row::emit_select_result; +use crate::types::KeyInfo; use crate::util::exprs_are_equivalent; use crate::vdbe::builder::{CursorType, ProgramBuilder, TableRefIdCounter}; use crate::vdbe::insn::{InsertFlags, Insn}; use crate::vdbe::{BranchOffset, CursorID}; +use crate::Result; use std::mem; use std::sync::Arc; use turso_parser::ast::Name; @@ -220,7 +223,7 @@ fn prepare_window_subquery( inner_plan, None, subquery_id, - ); + )?; // Verify that the subquery has the expected database ID. // This is required to ensure that assumptions in `rewrite_terminal_expr` are valid. @@ -657,9 +660,9 @@ pub fn emit_window_loop_source( let window = plan.window.as_ref().expect("missing window"); emit_load_order_by_columns(program, window, registers); - emit_flush_buffer_if_new_partition(program, labels, registers, window); + emit_flush_buffer_if_new_partition(program, labels, registers, window, plan)?; emit_reset_state_if_new_partition(program, registers, window); - emit_flush_buffer_if_not_peer(program, labels, registers, window); + emit_flush_buffer_if_not_peer(program, labels, registers, window, plan)?; emit_insert_row_into_buffer( program, registers, @@ -677,7 +680,8 @@ fn emit_flush_buffer_if_new_partition( labels: &WindowLabels, registers: &WindowRegisters, window: &Window, -) { + plan: &SelectPlan, +) -> Result<()> { if let Some(reg_partition_start) = registers.partition_start { let same_partition_label = program.allocate_label(); let new_partition_label = program.allocate_label(); @@ -692,11 +696,22 @@ fn emit_flush_buffer_if_new_partition( program.offset(), "compare partition keys to detect new partition", ); + let mut compare_key_info = (0..window.partition_by.len()) + .map(|_| KeyInfo { + sort_order: SortOrder::Asc, + collation: CollationSeq::default(), + }) + .collect::>(); + for i in 0..window.partition_by.len() { + let maybe_collation = + get_collseq_from_expr(&window.partition_by[i], &plan.table_references)?; + compare_key_info[i].collation = maybe_collation.unwrap_or_default(); + } program.emit_insn(Insn::Compare { start_reg_a: registers.src_columns_start, start_reg_b: reg_partition_start, count: partition_by_len, - collation: program.curr_collation(), + key_info: compare_key_info, }); program.emit_insn(Insn::Jump { target_pc_lt: new_partition_label, @@ -723,6 +738,8 @@ fn emit_flush_buffer_if_new_partition( program.resolve_label(same_partition_label, program.offset()); } + + Ok(()) } fn emit_reset_state_if_new_partition( @@ -768,7 +785,8 @@ fn emit_flush_buffer_if_not_peer( labels: &WindowLabels, registers: &WindowRegisters, window: &Window, -) { + plan: &SelectPlan, +) -> Result<()> { if let Some(reg_new_order_by_columns_start) = registers.new_order_by_columns_start { let label_peer = program.allocate_label(); let label_not_peer = program.allocate_label(); @@ -778,11 +796,22 @@ fn emit_flush_buffer_if_not_peer( .expect("prev_order_by_columns_start must exist"); program.add_comment(program.offset(), "compare ORDER BY columns to detect peer"); + let mut compare_key_info = (0..window.order_by.len()) + .map(|_| KeyInfo { + sort_order: SortOrder::Asc, + collation: CollationSeq::default(), + }) + .collect::>(); + for i in 0..window.order_by.len() { + let maybe_collation = + get_collseq_from_expr(&window.order_by[i].0, &plan.table_references)?; + compare_key_info[i].collation = maybe_collation.unwrap_or_default(); + } program.emit_insn(Insn::Compare { start_reg_a: reg_prev_order_by_columns_start, start_reg_b: reg_new_order_by_columns_start, count: order_by_len, - collation: program.curr_collation(), + key_info: compare_key_info, }); program.emit_insn(Insn::Jump { target_pc_lt: label_not_peer, @@ -804,6 +833,8 @@ fn emit_flush_buffer_if_not_peer( program.resolve_label(label_peer, program.offset()); } + + Ok(()) } fn emit_load_order_by_columns( diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index cd8cd6b3a..fa8a9f54b 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -73,7 +73,7 @@ use super::{ }; use parking_lot::RwLock; use rand::{thread_rng, Rng}; -use turso_parser::ast::{self, Name}; +use turso_parser::ast::{self, Name, SortOrder}; use turso_parser::parser::Parser; use super::{ @@ -523,14 +523,13 @@ pub fn op_compare( start_reg_a, start_reg_b, count, - collation, + key_info, }, insn ); let start_reg_a = *start_reg_a; let start_reg_b = *start_reg_b; let count = *count; - let collation = collation.unwrap_or_default(); if start_reg_a + count > start_reg_b { return Err(LimboError::InternalError( @@ -538,21 +537,30 @@ pub fn op_compare( )); } - let mut cmp = None; + let mut cmp = std::cmp::Ordering::Equal; for i in 0..count { + // TODO (https://github.com/tursodatabase/turso/issues/2304): this logic is almost the same as compare_immutable() + // but that one works on RefValue and this works on Value. There are tons of cases like this where we could reuse + // functionality if we had a trait that both RefValue and Value implement. let a = state.registers[start_reg_a + i].get_value(); let b = state.registers[start_reg_b + i].get_value(); + let column_order = key_info[i].sort_order; + let collation = key_info[i].collation; cmp = match (a, b) { (Value::Text(left), Value::Text(right)) => { - Some(collation.compare_strings(left.as_str(), right.as_str())) + collation.compare_strings(left.as_str(), right.as_str()) } - _ => Some(a.cmp(b)), + _ => a.partial_cmp(b).unwrap(), }; - if cmp != Some(std::cmp::Ordering::Equal) { + if !cmp.is_eq() { + cmp = match column_order { + SortOrder::Asc => cmp, + SortOrder::Desc => cmp.reverse(), + }; break; } } - state.last_compare = cmp; + state.last_compare = Some(cmp); state.pc += 1; Ok(InsnFunctionStepResult::Step) } diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index ce9bd4f4f..63972590c 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -151,13 +151,13 @@ pub fn insn_to_row( start_reg_a, start_reg_b, count, - collation, + key_info, } => ( "Compare", *start_reg_a as i32, *start_reg_b as i32, *count as i32, - Value::build_text(format!("k({count}, {})", collation.unwrap_or_default())), + Value::build_text(format!("k({count}, {})", key_info.iter().map(|k| k.collation.to_string()).collect::>().join(", "))), 0, format!( "r[{}..{}]==r[{}..{}]", diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 5da5729ba..bf50ef2b2 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -8,6 +8,7 @@ use crate::{ schema::{Affinity, BTreeTable, Column, Index}, storage::{pager::CreateBTreeFlags, wal::CheckpointMode}, translate::{collate::CollationSeq, emitter::TransactionMode}, + types::KeyInfo, Value, }; use strum::EnumCount; @@ -217,7 +218,7 @@ pub enum Insn { start_reg_a: usize, start_reg_b: usize, count: usize, - collation: Option, + key_info: Vec, }, /// Place the result of rhs bitwise AND lhs in third register. BitAnd {