From b5b1010e7c98928fdf61c210456c4ba9722744eb Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 18 Apr 2025 23:01:41 -0300 Subject: [PATCH] set binary collation as default --- core/translate/collate.rs | 5 +- core/translate/expr.rs | 18 +++++++ core/translate/main_loop.rs | 5 ++ core/translate/schema.rs | 4 ++ core/vdbe/execute.rs | 102 +++++++++++++++++++++++++++--------- core/vdbe/insn.rs | 7 +++ 6 files changed, 116 insertions(+), 25 deletions(-) diff --git a/core/translate/collate.rs b/core/translate/collate.rs index bd6d1be4f..0e3e4877f 100644 --- a/core/translate/collate.rs +++ b/core/translate/collate.rs @@ -1,5 +1,7 @@ use std::cmp::Ordering; +use tracing::Level; + // TODO: in the future allow user to define collation sequences // Will have to meddle with ffi for this #[derive(Debug, Eq, PartialEq, strum_macros::Display, Default)] @@ -18,7 +20,8 @@ pub enum CollationSeq { } impl CollationSeq { - fn compare_strings(&self, lhs: &str, rhs: &str) -> Ordering { + pub fn compare_strings(&self, lhs: &str, rhs: &str) -> Ordering { + tracing::event!(Level::DEBUG, collate = %self, lhs, rhs); match self { CollationSeq::Binary => Self::binary_cmp(lhs, rhs), CollationSeq::NoCase => Self::nocase_cmp(lhs, rhs), diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 8dae73fba..22204105f 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -16,6 +16,8 @@ use crate::vdbe::{ }; use crate::{Result, Value}; +use super::collate::CollationSeq; + #[derive(Debug, Clone, Copy)] pub struct ConditionMetadata { pub jump_if_condition_is_true: bool, @@ -53,6 +55,7 @@ macro_rules! emit_cmp_insn { rhs: $rhs, target_pc: $cond.jump_target_when_true, flags: CmpInsFlags::default(), + collation: CollationSeq::default(), }); } else { $program.emit_insn(Insn::$op_false { @@ -60,6 +63,7 @@ macro_rules! emit_cmp_insn { rhs: $rhs, target_pc: $cond.jump_target_when_false, flags: CmpInsFlags::default().jump_if_null(), + collation: CollationSeq::default(), }); } }}; @@ -80,6 +84,7 @@ macro_rules! emit_cmp_null_insn { rhs: $rhs, target_pc: $cond.jump_target_when_true, flags: CmpInsFlags::default().null_eq(), + collation: CollationSeq::default(), }); } else { $program.emit_insn(Insn::$op_false { @@ -87,6 +92,7 @@ macro_rules! emit_cmp_null_insn { rhs: $rhs, target_pc: $cond.jump_target_when_false, flags: CmpInsFlags::default().null_eq(), + collation: CollationSeq::default(), }); } }}; @@ -370,6 +376,7 @@ pub fn translate_condition_expr( rhs: rhs_reg, target_pc: jump_target_when_true, flags: CmpInsFlags::default(), + collation: CollationSeq::default(), }); } else { // If this is the last condition, we need to jump to the 'jump_target_when_false' label if there is no match. @@ -378,6 +385,7 @@ pub fn translate_condition_expr( rhs: rhs_reg, target_pc: condition_metadata.jump_target_when_false, flags: CmpInsFlags::default().jump_if_null(), + collation: CollationSeq::default(), }); } } @@ -399,6 +407,7 @@ pub fn translate_condition_expr( rhs: rhs_reg, target_pc: condition_metadata.jump_target_when_false, flags: CmpInsFlags::default().jump_if_null(), + collation: CollationSeq::default(), }); } // If we got here, then none of the conditions were a match, so we jump to the 'jump_target_when_true' label if 'jump_if_condition_is_true'. @@ -609,6 +618,7 @@ pub fn translate_expr( target_pc: next_case_label, // A NULL result is considered untrue when evaluating WHEN terms. flags: CmpInsFlags::default().jump_if_null(), + collation: CollationSeq::default(), }), // CASE WHEN 0 THEN 0 ELSE 1 becomes ifnot 0 branch to next clause None => program.emit_insn(Insn::IfNot { @@ -2216,6 +2226,7 @@ fn emit_binary_insn( rhs, target_pc: if_true_label, flags: CmpInsFlags::default(), + collation: CollationSeq::default(), }, target_register, if_true_label, @@ -2232,6 +2243,7 @@ fn emit_binary_insn( rhs, target_pc: if_true_label, flags: CmpInsFlags::default(), + collation: CollationSeq::default(), }, target_register, if_true_label, @@ -2248,6 +2260,7 @@ fn emit_binary_insn( rhs, target_pc: if_true_label, flags: CmpInsFlags::default(), + collation: CollationSeq::default(), }, target_register, if_true_label, @@ -2264,6 +2277,7 @@ fn emit_binary_insn( rhs, target_pc: if_true_label, flags: CmpInsFlags::default(), + collation: CollationSeq::default(), }, target_register, if_true_label, @@ -2280,6 +2294,7 @@ fn emit_binary_insn( rhs, target_pc: if_true_label, flags: CmpInsFlags::default(), + collation: CollationSeq::default(), }, target_register, if_true_label, @@ -2296,6 +2311,7 @@ fn emit_binary_insn( rhs, target_pc: if_true_label, flags: CmpInsFlags::default(), + collation: CollationSeq::default(), }, target_register, if_true_label, @@ -2389,6 +2405,7 @@ fn emit_binary_insn( rhs, target_pc: if_true_label, flags: CmpInsFlags::default().null_eq(), + collation: CollationSeq::default(), }, target_register, if_true_label, @@ -2403,6 +2420,7 @@ fn emit_binary_insn( rhs, target_pc: if_true_label, flags: CmpInsFlags::default().null_eq(), + collation: CollationSeq::default(), }, target_register, if_true_label, diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 17f439450..ed7620056 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -17,6 +17,7 @@ use crate::{ use super::{ aggregation::translate_aggregation_step, + collate::CollationSeq, emitter::{OperationMode, TranslateCtx}, expr::{ translate_condition_expr, translate_expr, translate_expr_no_constant_opt, @@ -1151,24 +1152,28 @@ fn emit_seek_termination( rhs: start_reg, target_pc: loop_end, flags: CmpInsFlags::default(), + collation: CollationSeq::default(), }), (false, SeekOp::GT) => program.emit_insn(Insn::Gt { lhs: rowid_reg.unwrap(), rhs: start_reg, target_pc: loop_end, flags: CmpInsFlags::default(), + collation: CollationSeq::default(), }), (false, SeekOp::LE) => program.emit_insn(Insn::Le { lhs: rowid_reg.unwrap(), rhs: start_reg, target_pc: loop_end, flags: CmpInsFlags::default(), + collation: CollationSeq::default(), }), (false, SeekOp::LT) => program.emit_insn(Insn::Lt { lhs: rowid_reg.unwrap(), rhs: start_reg, target_pc: loop_end, flags: CmpInsFlags::default(), + collation: CollationSeq::default(), }), (_, SeekOp::EQ) => { panic!("An index termination condition is never EQ") diff --git a/core/translate/schema.rs b/core/translate/schema.rs index 1f95f89d5..5fbf9d110 100644 --- a/core/translate/schema.rs +++ b/core/translate/schema.rs @@ -21,6 +21,8 @@ use crate::{bail_parse_error, Result}; use limbo_ext::VTabKind; use limbo_sqlite3_parser::ast::{fmt::ToTokens, CreateVirtualTable}; +use super::collate::CollationSeq; + #[derive(Debug, Clone, Copy)] pub enum ParseSchema { None, @@ -719,6 +721,7 @@ pub fn translate_drop_table( rhs: table_reg, target_pc: next_label, flags: CmpInsFlags::default(), + collation: CollationSeq::default(), }); program.emit_insn(Insn::Column { cursor_id: sqlite_schema_cursor_id, @@ -730,6 +733,7 @@ pub fn translate_drop_table( rhs: table_type, target_pc: next_label, flags: CmpInsFlags::default(), + collation: CollationSeq::default(), }); program.emit_insn(Insn::RowId { cursor_id: sqlite_schema_cursor_id, diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 736ea260d..64c2cc567 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -525,6 +525,7 @@ pub fn op_eq( rhs, target_pc, flags, + collation, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -537,8 +538,8 @@ pub fn op_eq( let nulleq = flags.has_nulleq(); let jump_if_null = flags.has_jump_if_null(); match ( - &state.registers[lhs].get_owned_value(), - &state.registers[rhs].get_owned_value(), + state.registers[lhs].get_owned_value(), + state.registers[rhs].get_owned_value(), ) { (_, Value::Null) | (Value::Null, _) => { if (nulleq && cond) || (!nulleq && jump_if_null) { @@ -547,8 +548,16 @@ pub fn op_eq( state.pc += 1; } } - _ => { - if *state.registers[lhs].get_owned_value() == *state.registers[rhs].get_owned_value() { + (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => { + let order = collation.compare_strings(lhs.as_str(), rhs.as_str()); + if order.is_eq() { + state.pc = target_pc.to_offset_int(); + } else { + state.pc += 1; + } + } + (lhs, rhs) => { + if *lhs == *rhs { state.pc = target_pc.to_offset_int(); } else { state.pc += 1; @@ -570,6 +579,7 @@ pub fn op_ne( rhs, target_pc, flags, + collation, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -582,8 +592,8 @@ pub fn op_ne( let nulleq = flags.has_nulleq(); let jump_if_null = flags.has_jump_if_null(); match ( - &state.registers[lhs].get_owned_value(), - &state.registers[rhs].get_owned_value(), + state.registers[lhs].get_owned_value(), + state.registers[rhs].get_owned_value(), ) { (_, Value::Null) | (Value::Null, _) => { if (nulleq && cond) || (!nulleq && jump_if_null) { @@ -592,8 +602,16 @@ pub fn op_ne( state.pc += 1; } } - _ => { - if *state.registers[lhs].get_owned_value() != *state.registers[rhs].get_owned_value() { + (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => { + let order = collation.compare_strings(lhs.as_str(), rhs.as_str()); + if order.is_ne() { + state.pc = target_pc.to_offset_int(); + } else { + state.pc += 1; + } + } + (lhs, rhs) => { + if *lhs != *rhs { state.pc = target_pc.to_offset_int(); } else { state.pc += 1; @@ -615,6 +633,7 @@ pub fn op_lt( rhs, target_pc, flags, + collation, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -625,8 +644,8 @@ pub fn op_lt( let target_pc = *target_pc; let jump_if_null = flags.has_jump_if_null(); match ( - &state.registers[lhs].get_owned_value(), - &state.registers[rhs].get_owned_value(), + state.registers[lhs].get_owned_value(), + state.registers[rhs].get_owned_value(), ) { (_, Value::Null) | (Value::Null, _) => { if jump_if_null { @@ -635,8 +654,16 @@ pub fn op_lt( state.pc += 1; } } - _ => { - if *state.registers[lhs].get_owned_value() < *state.registers[rhs].get_owned_value() { + (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => { + let order = collation.compare_strings(lhs.as_str(), rhs.as_str()); + if order.is_lt() { + state.pc = target_pc.to_offset_int(); + } else { + state.pc += 1; + } + } + (lhs, rhs) => { + if *lhs < *rhs { state.pc = target_pc.to_offset_int(); } else { state.pc += 1; @@ -658,6 +685,7 @@ pub fn op_le( rhs, target_pc, flags, + collation, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -668,8 +696,8 @@ pub fn op_le( let target_pc = *target_pc; let jump_if_null = flags.has_jump_if_null(); match ( - &state.registers[lhs].get_owned_value(), - &state.registers[rhs].get_owned_value(), + state.registers[lhs].get_owned_value(), + state.registers[rhs].get_owned_value(), ) { (_, Value::Null) | (Value::Null, _) => { if jump_if_null { @@ -678,8 +706,16 @@ pub fn op_le( state.pc += 1; } } - _ => { - if *state.registers[lhs].get_owned_value() <= *state.registers[rhs].get_owned_value() { + (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => { + let order = collation.compare_strings(lhs.as_str(), rhs.as_str()); + if order.is_le() { + state.pc = target_pc.to_offset_int(); + } else { + state.pc += 1; + } + } + (lhs, rhs) => { + if *lhs <= *rhs { state.pc = target_pc.to_offset_int(); } else { state.pc += 1; @@ -701,6 +737,7 @@ pub fn op_gt( rhs, target_pc, flags, + collation, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -711,8 +748,8 @@ pub fn op_gt( let target_pc = *target_pc; let jump_if_null = flags.has_jump_if_null(); match ( - &state.registers[lhs].get_owned_value(), - &state.registers[rhs].get_owned_value(), + state.registers[lhs].get_owned_value(), + state.registers[rhs].get_owned_value(), ) { (_, Value::Null) | (Value::Null, _) => { if jump_if_null { @@ -721,8 +758,16 @@ pub fn op_gt( state.pc += 1; } } - _ => { - if *state.registers[lhs].get_owned_value() > *state.registers[rhs].get_owned_value() { + (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => { + let order = collation.compare_strings(lhs.as_str(), rhs.as_str()); + if order.is_gt() { + state.pc = target_pc.to_offset_int(); + } else { + state.pc += 1; + } + } + (lhs, rhs) => { + if *lhs > *rhs { state.pc = target_pc.to_offset_int(); } else { state.pc += 1; @@ -744,6 +789,7 @@ pub fn op_ge( rhs, target_pc, flags, + collation, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -754,8 +800,8 @@ pub fn op_ge( let target_pc = *target_pc; let jump_if_null = flags.has_jump_if_null(); match ( - &state.registers[lhs].get_owned_value(), - &state.registers[rhs].get_owned_value(), + state.registers[lhs].get_owned_value(), + state.registers[rhs].get_owned_value(), ) { (_, Value::Null) | (Value::Null, _) => { if jump_if_null { @@ -764,8 +810,16 @@ pub fn op_ge( state.pc += 1; } } - _ => { - if *state.registers[lhs].get_owned_value() >= *state.registers[rhs].get_owned_value() { + (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => { + let order = collation.compare_strings(lhs.as_str(), rhs.as_str()); + if order.is_ge() { + state.pc = target_pc.to_offset_int(); + } else { + state.pc += 1; + } + } + (lhs, rhs) => { + if *lhs >= *rhs { state.pc = target_pc.to_offset_int(); } else { state.pc += 1; diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 876ab7930..4ff14ee16 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -8,6 +8,7 @@ use super::{execute, AggFunc, BranchOffset, CursorID, FuncCtx, InsnFunction, Pag use crate::{ schema::{BTreeTable, Index}, storage::{pager::CreateBTreeFlags, wal::CheckpointMode}, + translate::collate::CollationSeq, }; use limbo_macros::Description; use limbo_sqlite3_parser::ast::SortOrder; @@ -218,6 +219,7 @@ pub enum Insn { /// Without the jump_if_null flag it would not jump because the logical comparison "id != NULL" is never true. /// This flag indicates that if either is null we should still jump. flags: CmpInsFlags, + collation: CollationSeq, }, /// Compare two registers and jump to the given PC if they are not equal. Ne { @@ -228,6 +230,7 @@ pub enum Insn { /// /// jump_if_null jumps if either of the operands is null. Used for "jump when false" logic. flags: CmpInsFlags, + collation: CollationSeq, }, /// Compare two registers and jump to the given PC if the left-hand side is less than the right-hand side. Lt { @@ -236,6 +239,7 @@ pub enum Insn { target_pc: BranchOffset, /// jump_if_null: Jump if either of the operands is null. Used for "jump when false" logic. flags: CmpInsFlags, + collation: CollationSeq, }, // Compare two registers and jump to the given PC if the left-hand side is less than or equal to the right-hand side. Le { @@ -244,6 +248,7 @@ pub enum Insn { target_pc: BranchOffset, /// jump_if_null: Jump if either of the operands is null. Used for "jump when false" logic. flags: CmpInsFlags, + collation: CollationSeq, }, /// Compare two registers and jump to the given PC if the left-hand side is greater than the right-hand side. Gt { @@ -252,6 +257,7 @@ pub enum Insn { target_pc: BranchOffset, /// jump_if_null: Jump if either of the operands is null. Used for "jump when false" logic. flags: CmpInsFlags, + collation: CollationSeq, }, /// Compare two registers and jump to the given PC if the left-hand side is greater than or equal to the right-hand side. Ge { @@ -260,6 +266,7 @@ pub enum Insn { target_pc: BranchOffset, /// jump_if_null: Jump if either of the operands is null. Used for "jump when false" logic. flags: CmpInsFlags, + collation: CollationSeq, }, /// Jump to target_pc if r\[reg\] != 0 or (r\[reg\] == NULL && r\[jump_if_null\] != 0) If {