Collate: fix Insn::Compare to use collation seq of each compared column

This commit is contained in:
Jussi Saurio
2025-10-02 21:28:51 +03:00
parent edd4651b97
commit 8e2e557da4
5 changed files with 72 additions and 19 deletions

View File

@@ -490,12 +490,25 @@ pub fn group_by_process_single_group(
GroupByRowSource::MainLoop { start_reg_src, .. } => *start_reg_src,
};
let mut compare_key_info = group_by
.exprs
.iter()
.map(|_| KeyInfo {
sort_order: SortOrder::Asc,
collation: CollationSeq::default(),
})
.collect::<Vec<_>>();
for i in 0..group_by.exprs.len() {
let maybe_collation = get_collseq_from_expr(&group_by.exprs[i], &plan.table_references)?;
compare_key_info[i].collation = maybe_collation.unwrap_or_default();
}
// Compare the group by columns to the previous group by columns to see if we are at a new group or not
program.emit_insn(Insn::Compare {
start_reg_a: registers.reg_group_exprs_cmp,
start_reg_b: groups_start_reg,
count: group_by.exprs.len(),
collation: program.curr_collation(),
key_info: compare_key_info,
});
program.add_comment(

View File

@@ -1,5 +1,6 @@
use crate::schema::{BTreeTable, Table};
use crate::translate::aggregation::{translate_aggregation_step, AggArgumentSource};
use crate::translate::collate::{get_collseq_from_expr, CollationSeq};
use crate::translate::emitter::{Resolver, TranslateCtx};
use crate::translate::expr::{walk_expr, walk_expr_mut, WalkControl};
use crate::translate::order_by::order_by_sorter_insert;
@@ -9,10 +10,12 @@ use crate::translate::plan::{
};
use crate::translate::planner::resolve_window_and_aggregate_functions;
use crate::translate::result_row::emit_select_result;
use crate::types::KeyInfo;
use crate::util::exprs_are_equivalent;
use crate::vdbe::builder::{CursorType, ProgramBuilder, TableRefIdCounter};
use crate::vdbe::insn::{InsertFlags, Insn};
use crate::vdbe::{BranchOffset, CursorID};
use crate::Result;
use std::mem;
use std::sync::Arc;
use turso_parser::ast::Name;
@@ -220,7 +223,7 @@ fn prepare_window_subquery(
inner_plan,
None,
subquery_id,
);
)?;
// Verify that the subquery has the expected database ID.
// This is required to ensure that assumptions in `rewrite_terminal_expr` are valid.
@@ -657,9 +660,9 @@ pub fn emit_window_loop_source(
let window = plan.window.as_ref().expect("missing window");
emit_load_order_by_columns(program, window, registers);
emit_flush_buffer_if_new_partition(program, labels, registers, window);
emit_flush_buffer_if_new_partition(program, labels, registers, window, plan)?;
emit_reset_state_if_new_partition(program, registers, window);
emit_flush_buffer_if_not_peer(program, labels, registers, window);
emit_flush_buffer_if_not_peer(program, labels, registers, window, plan)?;
emit_insert_row_into_buffer(
program,
registers,
@@ -677,7 +680,8 @@ fn emit_flush_buffer_if_new_partition(
labels: &WindowLabels,
registers: &WindowRegisters,
window: &Window,
) {
plan: &SelectPlan,
) -> Result<()> {
if let Some(reg_partition_start) = registers.partition_start {
let same_partition_label = program.allocate_label();
let new_partition_label = program.allocate_label();
@@ -692,11 +696,22 @@ fn emit_flush_buffer_if_new_partition(
program.offset(),
"compare partition keys to detect new partition",
);
let mut compare_key_info = (0..window.partition_by.len())
.map(|_| KeyInfo {
sort_order: SortOrder::Asc,
collation: CollationSeq::default(),
})
.collect::<Vec<_>>();
for i in 0..window.partition_by.len() {
let maybe_collation =
get_collseq_from_expr(&window.partition_by[i], &plan.table_references)?;
compare_key_info[i].collation = maybe_collation.unwrap_or_default();
}
program.emit_insn(Insn::Compare {
start_reg_a: registers.src_columns_start,
start_reg_b: reg_partition_start,
count: partition_by_len,
collation: program.curr_collation(),
key_info: compare_key_info,
});
program.emit_insn(Insn::Jump {
target_pc_lt: new_partition_label,
@@ -723,6 +738,8 @@ fn emit_flush_buffer_if_new_partition(
program.resolve_label(same_partition_label, program.offset());
}
Ok(())
}
fn emit_reset_state_if_new_partition(
@@ -768,7 +785,8 @@ fn emit_flush_buffer_if_not_peer(
labels: &WindowLabels,
registers: &WindowRegisters,
window: &Window,
) {
plan: &SelectPlan,
) -> Result<()> {
if let Some(reg_new_order_by_columns_start) = registers.new_order_by_columns_start {
let label_peer = program.allocate_label();
let label_not_peer = program.allocate_label();
@@ -778,11 +796,22 @@ fn emit_flush_buffer_if_not_peer(
.expect("prev_order_by_columns_start must exist");
program.add_comment(program.offset(), "compare ORDER BY columns to detect peer");
let mut compare_key_info = (0..window.order_by.len())
.map(|_| KeyInfo {
sort_order: SortOrder::Asc,
collation: CollationSeq::default(),
})
.collect::<Vec<_>>();
for i in 0..window.order_by.len() {
let maybe_collation =
get_collseq_from_expr(&window.order_by[i].0, &plan.table_references)?;
compare_key_info[i].collation = maybe_collation.unwrap_or_default();
}
program.emit_insn(Insn::Compare {
start_reg_a: reg_prev_order_by_columns_start,
start_reg_b: reg_new_order_by_columns_start,
count: order_by_len,
collation: program.curr_collation(),
key_info: compare_key_info,
});
program.emit_insn(Insn::Jump {
target_pc_lt: label_not_peer,
@@ -804,6 +833,8 @@ fn emit_flush_buffer_if_not_peer(
program.resolve_label(label_peer, program.offset());
}
Ok(())
}
fn emit_load_order_by_columns(

View File

@@ -73,7 +73,7 @@ use super::{
};
use parking_lot::RwLock;
use rand::{thread_rng, Rng};
use turso_parser::ast::{self, Name};
use turso_parser::ast::{self, Name, SortOrder};
use turso_parser::parser::Parser;
use super::{
@@ -523,14 +523,13 @@ pub fn op_compare(
start_reg_a,
start_reg_b,
count,
collation,
key_info,
},
insn
);
let start_reg_a = *start_reg_a;
let start_reg_b = *start_reg_b;
let count = *count;
let collation = collation.unwrap_or_default();
if start_reg_a + count > start_reg_b {
return Err(LimboError::InternalError(
@@ -538,21 +537,30 @@ pub fn op_compare(
));
}
let mut cmp = None;
let mut cmp = std::cmp::Ordering::Equal;
for i in 0..count {
// TODO (https://github.com/tursodatabase/turso/issues/2304): this logic is almost the same as compare_immutable()
// but that one works on RefValue and this works on Value. There are tons of cases like this where we could reuse
// functionality if we had a trait that both RefValue and Value implement.
let a = state.registers[start_reg_a + i].get_value();
let b = state.registers[start_reg_b + i].get_value();
let column_order = key_info[i].sort_order;
let collation = key_info[i].collation;
cmp = match (a, b) {
(Value::Text(left), Value::Text(right)) => {
Some(collation.compare_strings(left.as_str(), right.as_str()))
collation.compare_strings(left.as_str(), right.as_str())
}
_ => Some(a.cmp(b)),
_ => a.partial_cmp(b).unwrap(),
};
if cmp != Some(std::cmp::Ordering::Equal) {
if !cmp.is_eq() {
cmp = match column_order {
SortOrder::Asc => cmp,
SortOrder::Desc => cmp.reverse(),
};
break;
}
}
state.last_compare = cmp;
state.last_compare = Some(cmp);
state.pc += 1;
Ok(InsnFunctionStepResult::Step)
}

View File

@@ -151,13 +151,13 @@ pub fn insn_to_row(
start_reg_a,
start_reg_b,
count,
collation,
key_info,
} => (
"Compare",
*start_reg_a as i32,
*start_reg_b as i32,
*count as i32,
Value::build_text(format!("k({count}, {})", collation.unwrap_or_default())),
Value::build_text(format!("k({count}, {})", key_info.iter().map(|k| k.collation.to_string()).collect::<Vec<_>>().join(", "))),
0,
format!(
"r[{}..{}]==r[{}..{}]",

View File

@@ -8,6 +8,7 @@ use crate::{
schema::{Affinity, BTreeTable, Column, Index},
storage::{pager::CreateBTreeFlags, wal::CheckpointMode},
translate::{collate::CollationSeq, emitter::TransactionMode},
types::KeyInfo,
Value,
};
use strum::EnumCount;
@@ -217,7 +218,7 @@ pub enum Insn {
start_reg_a: usize,
start_reg_b: usize,
count: usize,
collation: Option<CollationSeq>,
key_info: Vec<KeyInfo>,
},
/// Place the result of rhs bitwise AND lhs in third register.
BitAnd {