mirror of
https://github.com/aljazceru/turso.git
synced 2026-01-30 13:24:22 +01:00
Merge 'Coll seq' from Glauber Costa
Implement the CollSeq vdbe opcode. Reviewed-by: Preston Thorpe <preston@turso.tech> Closes #2454
This commit is contained in:
@@ -431,7 +431,7 @@ Modifiers:
|
||||
| Checkpoint | Yes | |
|
||||
| Clear | No | |
|
||||
| Close | Yes | |
|
||||
| CollSeq | No | |
|
||||
| CollSeq | Yes | |
|
||||
| Column | Yes | |
|
||||
| Compare | Yes | |
|
||||
| Concat | Yes | |
|
||||
|
||||
@@ -2,6 +2,7 @@ use turso_sqlite3_parser::ast;
|
||||
|
||||
use crate::{
|
||||
function::AggFunc,
|
||||
translate::collate::CollationSeq,
|
||||
vdbe::{
|
||||
builder::ProgramBuilder,
|
||||
insn::{IdxInsertFlags, Insn},
|
||||
@@ -60,6 +61,37 @@ pub fn emit_ungrouped_aggregation<'a>(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn emit_collseq_if_needed(
|
||||
program: &mut ProgramBuilder,
|
||||
referenced_tables: &TableReferences,
|
||||
expr: &ast::Expr,
|
||||
) {
|
||||
// Check if this is a column expression with explicit COLLATE clause
|
||||
if let ast::Expr::Collate(_, collation_name) = expr {
|
||||
if let Ok(collation) = CollationSeq::new(collation_name) {
|
||||
program.emit_insn(Insn::CollSeq {
|
||||
reg: None,
|
||||
collation,
|
||||
});
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// If no explicit collation, check if this is a column with table-defined collation
|
||||
if let ast::Expr::Column { table, column, .. } = expr {
|
||||
if let Some(table_ref) = referenced_tables.find_table_by_internal_id(*table) {
|
||||
if let Some(table_column) = table_ref.get_column_at(*column) {
|
||||
if let Some(collation) = &table_column.collation {
|
||||
program.emit_insn(Insn::CollSeq {
|
||||
reg: None,
|
||||
collation: *collation,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Emits the bytecode for handling duplicates in a distinct aggregate.
|
||||
/// This is used in both GROUP BY and non-GROUP BY aggregations to jump over
|
||||
/// the AggStep that would otherwise accumulate the same value multiple times.
|
||||
@@ -196,6 +228,7 @@ pub fn translate_aggregation_step(
|
||||
let expr_reg = program.alloc_register();
|
||||
let _ = translate_expr(program, Some(referenced_tables), expr, expr_reg, resolver)?;
|
||||
handle_distinct(program, agg, expr_reg);
|
||||
emit_collseq_if_needed(program, referenced_tables, expr);
|
||||
program.emit_insn(Insn::AggStep {
|
||||
acc_reg: target_register,
|
||||
col: expr_reg,
|
||||
@@ -212,6 +245,7 @@ pub fn translate_aggregation_step(
|
||||
let expr_reg = program.alloc_register();
|
||||
let _ = translate_expr(program, Some(referenced_tables), expr, expr_reg, resolver)?;
|
||||
handle_distinct(program, agg, expr_reg);
|
||||
emit_collseq_if_needed(program, referenced_tables, expr);
|
||||
program.emit_insn(Insn::AggStep {
|
||||
acc_reg: target_register,
|
||||
col: expr_reg,
|
||||
|
||||
@@ -117,6 +117,25 @@ pub type InsnFunction = fn(
|
||||
Option<&Arc<MvStore>>,
|
||||
) -> Result<InsnFunctionStepResult>;
|
||||
|
||||
/// Compare two values using the specified collation for text values.
|
||||
/// Non-text values are compared using their natural ordering.
|
||||
fn compare_with_collation(
|
||||
lhs: &Value,
|
||||
rhs: &Value,
|
||||
collation: Option<CollationSeq>,
|
||||
) -> std::cmp::Ordering {
|
||||
match (lhs, rhs) {
|
||||
(Value::Text(lhs_text), Value::Text(rhs_text)) => {
|
||||
if let Some(coll) = collation {
|
||||
coll.compare_strings(lhs_text.as_str(), rhs_text.as_str())
|
||||
} else {
|
||||
lhs.cmp(rhs)
|
||||
}
|
||||
}
|
||||
_ => lhs.cmp(rhs),
|
||||
}
|
||||
}
|
||||
|
||||
pub enum InsnFunctionStepResult {
|
||||
Done,
|
||||
IO,
|
||||
@@ -3364,7 +3383,13 @@ pub fn op_agg_step(
|
||||
};
|
||||
|
||||
let new_value = col.get_owned_value();
|
||||
if *new_value != Value::Null && acc.as_ref().is_none_or(|acc| new_value > acc) {
|
||||
if *new_value != Value::Null
|
||||
&& acc.as_ref().is_none_or(|acc| {
|
||||
use std::cmp::Ordering;
|
||||
compare_with_collation(new_value, acc, state.current_collation)
|
||||
== Ordering::Greater
|
||||
})
|
||||
{
|
||||
*acc = Some(new_value.clone());
|
||||
}
|
||||
}
|
||||
@@ -3382,7 +3407,13 @@ pub fn op_agg_step(
|
||||
|
||||
let new_value = col.get_owned_value();
|
||||
|
||||
if *new_value != Value::Null && acc.as_ref().is_none_or(|acc| new_value < acc) {
|
||||
if *new_value != Value::Null
|
||||
&& acc.as_ref().is_none_or(|acc| {
|
||||
use std::cmp::Ordering;
|
||||
compare_with_collation(new_value, acc, state.current_collation)
|
||||
== Ordering::Less
|
||||
})
|
||||
{
|
||||
*acc = Some(new_value.clone());
|
||||
}
|
||||
}
|
||||
@@ -6083,6 +6114,29 @@ pub fn op_is_null(
|
||||
Ok(InsnFunctionStepResult::Step)
|
||||
}
|
||||
|
||||
pub fn op_coll_seq(
|
||||
_program: &Program,
|
||||
state: &mut ProgramState,
|
||||
insn: &Insn,
|
||||
_pager: &Rc<Pager>,
|
||||
_mv_store: Option<&Arc<MvStore>>,
|
||||
) -> Result<InsnFunctionStepResult> {
|
||||
let Insn::CollSeq { reg, collation } = insn else {
|
||||
unreachable!("unexpected Insn {:?}", insn)
|
||||
};
|
||||
|
||||
// Set the current collation sequence for use by subsequent functions
|
||||
state.current_collation = Some(*collation);
|
||||
|
||||
// If P1 is not zero, initialize that register to 0
|
||||
if let Some(reg_idx) = reg {
|
||||
state.registers[*reg_idx] = Register::Value(Value::Integer(0));
|
||||
}
|
||||
|
||||
state.pc += 1;
|
||||
Ok(InsnFunctionStepResult::Step)
|
||||
}
|
||||
|
||||
pub fn op_page_count(
|
||||
program: &Program,
|
||||
state: &mut ProgramState,
|
||||
|
||||
@@ -1636,6 +1636,15 @@ pub fn insn_to_str(
|
||||
0,
|
||||
format!("rename_table({from}, {to})"),
|
||||
),
|
||||
Insn::CollSeq { reg, collation } => (
|
||||
"CollSeq",
|
||||
reg.unwrap_or(0) as i32,
|
||||
0,
|
||||
0,
|
||||
Value::build_text(collation.to_string().as_str()),
|
||||
0,
|
||||
format!("collation={collation}"),
|
||||
),
|
||||
};
|
||||
format!(
|
||||
"{:<4} {:<17} {:<4} {:<4} {:<4} {:<13} {:<2} {}",
|
||||
|
||||
@@ -864,6 +864,22 @@ pub enum Insn {
|
||||
/// Jump to this PC if the register is null (P2).
|
||||
target_pc: BranchOffset,
|
||||
},
|
||||
|
||||
/// Set the collation sequence for the next function call.
|
||||
/// P4 is a pointer to a CollationSeq. If the next call to a user function
|
||||
/// or aggregate calls sqlite3GetFuncCollSeq(), this collation sequence will
|
||||
/// be returned. This is used by the built-in min(), max() and nullif()
|
||||
/// functions.
|
||||
///
|
||||
/// If P1 is not zero, then it is a register that a subsequent min() or
|
||||
/// max() aggregate will set to 1 if the current row is not the minimum or
|
||||
/// maximum. The P1 register is initialized to 0 by this instruction.
|
||||
CollSeq {
|
||||
/// Optional register to initialize to 0 (P1).
|
||||
reg: Option<usize>,
|
||||
/// The collation sequence to set (P4).
|
||||
collation: CollationSeq,
|
||||
},
|
||||
ParseSchema {
|
||||
db: usize,
|
||||
where_clause: Option<String>,
|
||||
@@ -1121,6 +1137,7 @@ impl Insn {
|
||||
Insn::DropTable { .. } => execute::op_drop_table,
|
||||
Insn::Close { .. } => execute::op_close,
|
||||
Insn::IsNull { .. } => execute::op_is_null,
|
||||
Insn::CollSeq { .. } => execute::op_coll_seq,
|
||||
Insn::ParseSchema { .. } => execute::op_parse_schema,
|
||||
Insn::ShiftRight { .. } => execute::op_shift_right,
|
||||
Insn::ShiftLeft { .. } => execute::op_shift_left,
|
||||
|
||||
@@ -29,6 +29,7 @@ use crate::{
|
||||
function::{AggFunc, FuncCtx},
|
||||
state_machine::StateTransition,
|
||||
storage::sqlite3_ondisk::SmallVec,
|
||||
translate::collate::CollationSeq,
|
||||
translate::plan::TableReferences,
|
||||
types::{IOResult, RawSlice, TextRef},
|
||||
vdbe::execute::{
|
||||
@@ -260,6 +261,8 @@ pub struct ProgramState {
|
||||
op_insert_state: OpInsertState,
|
||||
op_no_conflict_state: OpNoConflictState,
|
||||
seek_state: OpSeekState,
|
||||
/// Current collation sequence set by OP_CollSeq instruction
|
||||
current_collation: Option<CollationSeq>,
|
||||
}
|
||||
|
||||
impl ProgramState {
|
||||
@@ -291,6 +294,7 @@ impl ProgramState {
|
||||
op_insert_state: OpInsertState::Insert,
|
||||
op_no_conflict_state: OpNoConflictState::Start,
|
||||
seek_state: OpSeekState::Start,
|
||||
current_collation: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -330,6 +334,7 @@ impl ProgramState {
|
||||
self.regex_cache.like.clear();
|
||||
self.interrupted = false;
|
||||
self.parameters.clear();
|
||||
self.current_collation = None;
|
||||
#[cfg(feature = "json")]
|
||||
self.json_cache.clear()
|
||||
}
|
||||
|
||||
@@ -50,3 +50,27 @@ do_execsql_test_in_memory_any_error collate_unique_constraint {
|
||||
CREATE TABLE t (a TEXT COLLATE NOCASE PRIMARY KEY);
|
||||
INSERT INTO t VALUES ('lol'), ('LOL'), ('lOl');
|
||||
}
|
||||
|
||||
do_execsql_test_on_specific_db {:memory:} collate_aggregation_default_binary {
|
||||
create table fruits(name collate binary);
|
||||
insert into fruits(name) values ('Apple') ,('banana') ,('CHERRY');
|
||||
select max(name) from fruits;
|
||||
} {banana}
|
||||
|
||||
do_execsql_test_on_specific_db {:memory:} collate_aggregation_default_nocase {
|
||||
create table fruits(name collate nocase);
|
||||
insert into fruits(name) values ('Apple') ,('banana') ,('CHERRY');
|
||||
select max(name) from fruits;
|
||||
} {CHERRY}
|
||||
|
||||
do_execsql_test_on_specific_db {:memory:} collate_aggregation_explicit_binary {
|
||||
create table fruits(name collate nocase);
|
||||
insert into fruits(name) values ('Apple') ,('banana') ,('CHERRY');
|
||||
select max(name collate binary) from fruits;
|
||||
} {banana}
|
||||
|
||||
do_execsql_test_on_specific_db {:memory:} collate_aggregation_explicit_nocase {
|
||||
create table fruits(name collate binary);
|
||||
insert into fruits(name) values ('Apple') ,('banana') ,('CHERRY');
|
||||
select max(name collate nocase) from fruits;
|
||||
} {CHERRY}
|
||||
|
||||
Reference in New Issue
Block a user