set binary collation as default

This commit is contained in:
pedrocarlo
2025-04-18 23:01:41 -03:00
parent 510c70e919
commit b5b1010e7c
6 changed files with 116 additions and 25 deletions

View File

@@ -1,5 +1,7 @@
use std::cmp::Ordering;
use tracing::Level;
// TODO: in the future allow user to define collation sequences
// Will have to meddle with ffi for this
#[derive(Debug, Eq, PartialEq, strum_macros::Display, Default)]
@@ -18,7 +20,8 @@ pub enum CollationSeq {
}
impl CollationSeq {
fn compare_strings(&self, lhs: &str, rhs: &str) -> Ordering {
pub fn compare_strings(&self, lhs: &str, rhs: &str) -> Ordering {
tracing::event!(Level::DEBUG, collate = %self, lhs, rhs);
match self {
CollationSeq::Binary => Self::binary_cmp(lhs, rhs),
CollationSeq::NoCase => Self::nocase_cmp(lhs, rhs),

View File

@@ -16,6 +16,8 @@ use crate::vdbe::{
};
use crate::{Result, Value};
use super::collate::CollationSeq;
#[derive(Debug, Clone, Copy)]
pub struct ConditionMetadata {
pub jump_if_condition_is_true: bool,
@@ -53,6 +55,7 @@ macro_rules! emit_cmp_insn {
rhs: $rhs,
target_pc: $cond.jump_target_when_true,
flags: CmpInsFlags::default(),
collation: CollationSeq::default(),
});
} else {
$program.emit_insn(Insn::$op_false {
@@ -60,6 +63,7 @@ macro_rules! emit_cmp_insn {
rhs: $rhs,
target_pc: $cond.jump_target_when_false,
flags: CmpInsFlags::default().jump_if_null(),
collation: CollationSeq::default(),
});
}
}};
@@ -80,6 +84,7 @@ macro_rules! emit_cmp_null_insn {
rhs: $rhs,
target_pc: $cond.jump_target_when_true,
flags: CmpInsFlags::default().null_eq(),
collation: CollationSeq::default(),
});
} else {
$program.emit_insn(Insn::$op_false {
@@ -87,6 +92,7 @@ macro_rules! emit_cmp_null_insn {
rhs: $rhs,
target_pc: $cond.jump_target_when_false,
flags: CmpInsFlags::default().null_eq(),
collation: CollationSeq::default(),
});
}
}};
@@ -370,6 +376,7 @@ pub fn translate_condition_expr(
rhs: rhs_reg,
target_pc: jump_target_when_true,
flags: CmpInsFlags::default(),
collation: CollationSeq::default(),
});
} else {
// If this is the last condition, we need to jump to the 'jump_target_when_false' label if there is no match.
@@ -378,6 +385,7 @@ pub fn translate_condition_expr(
rhs: rhs_reg,
target_pc: condition_metadata.jump_target_when_false,
flags: CmpInsFlags::default().jump_if_null(),
collation: CollationSeq::default(),
});
}
}
@@ -399,6 +407,7 @@ pub fn translate_condition_expr(
rhs: rhs_reg,
target_pc: condition_metadata.jump_target_when_false,
flags: CmpInsFlags::default().jump_if_null(),
collation: CollationSeq::default(),
});
}
// If we got here, then none of the conditions were a match, so we jump to the 'jump_target_when_true' label if 'jump_if_condition_is_true'.
@@ -609,6 +618,7 @@ pub fn translate_expr(
target_pc: next_case_label,
// A NULL result is considered untrue when evaluating WHEN terms.
flags: CmpInsFlags::default().jump_if_null(),
collation: CollationSeq::default(),
}),
// CASE WHEN 0 THEN 0 ELSE 1 becomes ifnot 0 branch to next clause
None => program.emit_insn(Insn::IfNot {
@@ -2216,6 +2226,7 @@ fn emit_binary_insn(
rhs,
target_pc: if_true_label,
flags: CmpInsFlags::default(),
collation: CollationSeq::default(),
},
target_register,
if_true_label,
@@ -2232,6 +2243,7 @@ fn emit_binary_insn(
rhs,
target_pc: if_true_label,
flags: CmpInsFlags::default(),
collation: CollationSeq::default(),
},
target_register,
if_true_label,
@@ -2248,6 +2260,7 @@ fn emit_binary_insn(
rhs,
target_pc: if_true_label,
flags: CmpInsFlags::default(),
collation: CollationSeq::default(),
},
target_register,
if_true_label,
@@ -2264,6 +2277,7 @@ fn emit_binary_insn(
rhs,
target_pc: if_true_label,
flags: CmpInsFlags::default(),
collation: CollationSeq::default(),
},
target_register,
if_true_label,
@@ -2280,6 +2294,7 @@ fn emit_binary_insn(
rhs,
target_pc: if_true_label,
flags: CmpInsFlags::default(),
collation: CollationSeq::default(),
},
target_register,
if_true_label,
@@ -2296,6 +2311,7 @@ fn emit_binary_insn(
rhs,
target_pc: if_true_label,
flags: CmpInsFlags::default(),
collation: CollationSeq::default(),
},
target_register,
if_true_label,
@@ -2389,6 +2405,7 @@ fn emit_binary_insn(
rhs,
target_pc: if_true_label,
flags: CmpInsFlags::default().null_eq(),
collation: CollationSeq::default(),
},
target_register,
if_true_label,
@@ -2403,6 +2420,7 @@ fn emit_binary_insn(
rhs,
target_pc: if_true_label,
flags: CmpInsFlags::default().null_eq(),
collation: CollationSeq::default(),
},
target_register,
if_true_label,

View File

@@ -17,6 +17,7 @@ use crate::{
use super::{
aggregation::translate_aggregation_step,
collate::CollationSeq,
emitter::{OperationMode, TranslateCtx},
expr::{
translate_condition_expr, translate_expr, translate_expr_no_constant_opt,
@@ -1151,24 +1152,28 @@ fn emit_seek_termination(
rhs: start_reg,
target_pc: loop_end,
flags: CmpInsFlags::default(),
collation: CollationSeq::default(),
}),
(false, SeekOp::GT) => program.emit_insn(Insn::Gt {
lhs: rowid_reg.unwrap(),
rhs: start_reg,
target_pc: loop_end,
flags: CmpInsFlags::default(),
collation: CollationSeq::default(),
}),
(false, SeekOp::LE) => program.emit_insn(Insn::Le {
lhs: rowid_reg.unwrap(),
rhs: start_reg,
target_pc: loop_end,
flags: CmpInsFlags::default(),
collation: CollationSeq::default(),
}),
(false, SeekOp::LT) => program.emit_insn(Insn::Lt {
lhs: rowid_reg.unwrap(),
rhs: start_reg,
target_pc: loop_end,
flags: CmpInsFlags::default(),
collation: CollationSeq::default(),
}),
(_, SeekOp::EQ) => {
panic!("An index termination condition is never EQ")

View File

@@ -21,6 +21,8 @@ use crate::{bail_parse_error, Result};
use limbo_ext::VTabKind;
use limbo_sqlite3_parser::ast::{fmt::ToTokens, CreateVirtualTable};
use super::collate::CollationSeq;
#[derive(Debug, Clone, Copy)]
pub enum ParseSchema {
None,
@@ -719,6 +721,7 @@ pub fn translate_drop_table(
rhs: table_reg,
target_pc: next_label,
flags: CmpInsFlags::default(),
collation: CollationSeq::default(),
});
program.emit_insn(Insn::Column {
cursor_id: sqlite_schema_cursor_id,
@@ -730,6 +733,7 @@ pub fn translate_drop_table(
rhs: table_type,
target_pc: next_label,
flags: CmpInsFlags::default(),
collation: CollationSeq::default(),
});
program.emit_insn(Insn::RowId {
cursor_id: sqlite_schema_cursor_id,

View File

@@ -525,6 +525,7 @@ pub fn op_eq(
rhs,
target_pc,
flags,
collation,
} = insn
else {
unreachable!("unexpected Insn {:?}", insn)
@@ -537,8 +538,8 @@ pub fn op_eq(
let nulleq = flags.has_nulleq();
let jump_if_null = flags.has_jump_if_null();
match (
&state.registers[lhs].get_owned_value(),
&state.registers[rhs].get_owned_value(),
state.registers[lhs].get_owned_value(),
state.registers[rhs].get_owned_value(),
) {
(_, Value::Null) | (Value::Null, _) => {
if (nulleq && cond) || (!nulleq && jump_if_null) {
@@ -547,8 +548,16 @@ pub fn op_eq(
state.pc += 1;
}
}
_ => {
if *state.registers[lhs].get_owned_value() == *state.registers[rhs].get_owned_value() {
(OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => {
let order = collation.compare_strings(lhs.as_str(), rhs.as_str());
if order.is_eq() {
state.pc = target_pc.to_offset_int();
} else {
state.pc += 1;
}
}
(lhs, rhs) => {
if *lhs == *rhs {
state.pc = target_pc.to_offset_int();
} else {
state.pc += 1;
@@ -570,6 +579,7 @@ pub fn op_ne(
rhs,
target_pc,
flags,
collation,
} = insn
else {
unreachable!("unexpected Insn {:?}", insn)
@@ -582,8 +592,8 @@ pub fn op_ne(
let nulleq = flags.has_nulleq();
let jump_if_null = flags.has_jump_if_null();
match (
&state.registers[lhs].get_owned_value(),
&state.registers[rhs].get_owned_value(),
state.registers[lhs].get_owned_value(),
state.registers[rhs].get_owned_value(),
) {
(_, Value::Null) | (Value::Null, _) => {
if (nulleq && cond) || (!nulleq && jump_if_null) {
@@ -592,8 +602,16 @@ pub fn op_ne(
state.pc += 1;
}
}
_ => {
if *state.registers[lhs].get_owned_value() != *state.registers[rhs].get_owned_value() {
(OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => {
let order = collation.compare_strings(lhs.as_str(), rhs.as_str());
if order.is_ne() {
state.pc = target_pc.to_offset_int();
} else {
state.pc += 1;
}
}
(lhs, rhs) => {
if *lhs != *rhs {
state.pc = target_pc.to_offset_int();
} else {
state.pc += 1;
@@ -615,6 +633,7 @@ pub fn op_lt(
rhs,
target_pc,
flags,
collation,
} = insn
else {
unreachable!("unexpected Insn {:?}", insn)
@@ -625,8 +644,8 @@ pub fn op_lt(
let target_pc = *target_pc;
let jump_if_null = flags.has_jump_if_null();
match (
&state.registers[lhs].get_owned_value(),
&state.registers[rhs].get_owned_value(),
state.registers[lhs].get_owned_value(),
state.registers[rhs].get_owned_value(),
) {
(_, Value::Null) | (Value::Null, _) => {
if jump_if_null {
@@ -635,8 +654,16 @@ pub fn op_lt(
state.pc += 1;
}
}
_ => {
if *state.registers[lhs].get_owned_value() < *state.registers[rhs].get_owned_value() {
(OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => {
let order = collation.compare_strings(lhs.as_str(), rhs.as_str());
if order.is_lt() {
state.pc = target_pc.to_offset_int();
} else {
state.pc += 1;
}
}
(lhs, rhs) => {
if *lhs < *rhs {
state.pc = target_pc.to_offset_int();
} else {
state.pc += 1;
@@ -658,6 +685,7 @@ pub fn op_le(
rhs,
target_pc,
flags,
collation,
} = insn
else {
unreachable!("unexpected Insn {:?}", insn)
@@ -668,8 +696,8 @@ pub fn op_le(
let target_pc = *target_pc;
let jump_if_null = flags.has_jump_if_null();
match (
&state.registers[lhs].get_owned_value(),
&state.registers[rhs].get_owned_value(),
state.registers[lhs].get_owned_value(),
state.registers[rhs].get_owned_value(),
) {
(_, Value::Null) | (Value::Null, _) => {
if jump_if_null {
@@ -678,8 +706,16 @@ pub fn op_le(
state.pc += 1;
}
}
_ => {
if *state.registers[lhs].get_owned_value() <= *state.registers[rhs].get_owned_value() {
(OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => {
let order = collation.compare_strings(lhs.as_str(), rhs.as_str());
if order.is_le() {
state.pc = target_pc.to_offset_int();
} else {
state.pc += 1;
}
}
(lhs, rhs) => {
if *lhs <= *rhs {
state.pc = target_pc.to_offset_int();
} else {
state.pc += 1;
@@ -701,6 +737,7 @@ pub fn op_gt(
rhs,
target_pc,
flags,
collation,
} = insn
else {
unreachable!("unexpected Insn {:?}", insn)
@@ -711,8 +748,8 @@ pub fn op_gt(
let target_pc = *target_pc;
let jump_if_null = flags.has_jump_if_null();
match (
&state.registers[lhs].get_owned_value(),
&state.registers[rhs].get_owned_value(),
state.registers[lhs].get_owned_value(),
state.registers[rhs].get_owned_value(),
) {
(_, Value::Null) | (Value::Null, _) => {
if jump_if_null {
@@ -721,8 +758,16 @@ pub fn op_gt(
state.pc += 1;
}
}
_ => {
if *state.registers[lhs].get_owned_value() > *state.registers[rhs].get_owned_value() {
(OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => {
let order = collation.compare_strings(lhs.as_str(), rhs.as_str());
if order.is_gt() {
state.pc = target_pc.to_offset_int();
} else {
state.pc += 1;
}
}
(lhs, rhs) => {
if *lhs > *rhs {
state.pc = target_pc.to_offset_int();
} else {
state.pc += 1;
@@ -744,6 +789,7 @@ pub fn op_ge(
rhs,
target_pc,
flags,
collation,
} = insn
else {
unreachable!("unexpected Insn {:?}", insn)
@@ -754,8 +800,8 @@ pub fn op_ge(
let target_pc = *target_pc;
let jump_if_null = flags.has_jump_if_null();
match (
&state.registers[lhs].get_owned_value(),
&state.registers[rhs].get_owned_value(),
state.registers[lhs].get_owned_value(),
state.registers[rhs].get_owned_value(),
) {
(_, Value::Null) | (Value::Null, _) => {
if jump_if_null {
@@ -764,8 +810,16 @@ pub fn op_ge(
state.pc += 1;
}
}
_ => {
if *state.registers[lhs].get_owned_value() >= *state.registers[rhs].get_owned_value() {
(OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => {
let order = collation.compare_strings(lhs.as_str(), rhs.as_str());
if order.is_ge() {
state.pc = target_pc.to_offset_int();
} else {
state.pc += 1;
}
}
(lhs, rhs) => {
if *lhs >= *rhs {
state.pc = target_pc.to_offset_int();
} else {
state.pc += 1;

View File

@@ -8,6 +8,7 @@ use super::{execute, AggFunc, BranchOffset, CursorID, FuncCtx, InsnFunction, Pag
use crate::{
schema::{BTreeTable, Index},
storage::{pager::CreateBTreeFlags, wal::CheckpointMode},
translate::collate::CollationSeq,
};
use limbo_macros::Description;
use limbo_sqlite3_parser::ast::SortOrder;
@@ -218,6 +219,7 @@ pub enum Insn {
/// Without the jump_if_null flag it would not jump because the logical comparison "id != NULL" is never true.
/// This flag indicates that if either is null we should still jump.
flags: CmpInsFlags,
collation: CollationSeq,
},
/// Compare two registers and jump to the given PC if they are not equal.
Ne {
@@ -228,6 +230,7 @@ pub enum Insn {
///
/// jump_if_null jumps if either of the operands is null. Used for "jump when false" logic.
flags: CmpInsFlags,
collation: CollationSeq,
},
/// Compare two registers and jump to the given PC if the left-hand side is less than the right-hand side.
Lt {
@@ -236,6 +239,7 @@ pub enum Insn {
target_pc: BranchOffset,
/// jump_if_null: Jump if either of the operands is null. Used for "jump when false" logic.
flags: CmpInsFlags,
collation: CollationSeq,
},
// Compare two registers and jump to the given PC if the left-hand side is less than or equal to the right-hand side.
Le {
@@ -244,6 +248,7 @@ pub enum Insn {
target_pc: BranchOffset,
/// jump_if_null: Jump if either of the operands is null. Used for "jump when false" logic.
flags: CmpInsFlags,
collation: CollationSeq,
},
/// Compare two registers and jump to the given PC if the left-hand side is greater than the right-hand side.
Gt {
@@ -252,6 +257,7 @@ pub enum Insn {
target_pc: BranchOffset,
/// jump_if_null: Jump if either of the operands is null. Used for "jump when false" logic.
flags: CmpInsFlags,
collation: CollationSeq,
},
/// Compare two registers and jump to the given PC if the left-hand side is greater than or equal to the right-hand side.
Ge {
@@ -260,6 +266,7 @@ pub enum Insn {
target_pc: BranchOffset,
/// jump_if_null: Jump if either of the operands is null. Used for "jump when false" logic.
flags: CmpInsFlags,
collation: CollationSeq,
},
/// Jump to target_pc if r\[reg\] != 0 or (r\[reg\] == NULL && r\[jump_if_null\] != 0)
If {