From 510c70e9194c0a27f00aaeff84329f2e158dc405 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 18 Apr 2025 17:26:14 -0300 Subject: [PATCH 01/17] Create CollationSeq enum and functions. Move strum to workspace dependency to avoid version mismatch with Parser --- Cargo.lock | 2 ++ Cargo.toml | 2 ++ core/Cargo.toml | 4 ++- core/translate/collate.rs | 42 ++++++++++++++++++++++++++++++ core/translate/mod.rs | 1 + vendored/sqlite3-parser/Cargo.toml | 4 +-- 6 files changed, 52 insertions(+), 3 deletions(-) create mode 100644 core/translate/collate.rs diff --git a/Cargo.lock b/Cargo.lock index 9838d600f..7a027407a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1841,10 +1841,12 @@ dependencies = [ "ryu", "sorted-vec", "strum", + "strum_macros", "tempfile", "test-log", "thiserror 1.0.69", "tracing", + "uncased", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 774de264a..31794d093 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,6 +50,8 @@ limbo_series = { path = "extensions/series", version = "0.0.20" } limbo_sqlite3_parser = { path = "vendored/sqlite3-parser", version = "0.0.20" } limbo_time = { path = "extensions/time", version = "0.0.20" } limbo_uuid = { path = "extensions/uuid", version = "0.0.20" } +strum = { version = "0.26", features = ["derive"] } +strum_macros = "0.26" [profile.release] debug = "line-tables-only" diff --git a/core/Cargo.toml b/core/Cargo.toml index 485f94b12..27807f3dd 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -69,11 +69,13 @@ limbo_ipaddr = { workspace = true, optional = true, features = ["static"] } limbo_completion = { workspace = true, optional = true, features = ["static"] } limbo_ext_tests = { workspace = true, optional = true, features = ["static"] } miette = "7.6.0" -strum = "0.26" +strum = { workspace = true } parking_lot = "0.12.3" crossbeam-skiplist = "0.1.3" tracing = "0.1.41" ryu = "1.0.19" +uncased = "0.9.10" +strum_macros = {workspace = true } bitflags = "2.9.0" [build-dependencies] diff --git a/core/translate/collate.rs b/core/translate/collate.rs new file mode 100644 index 000000000..bd6d1be4f --- /dev/null +++ b/core/translate/collate.rs @@ -0,0 +1,42 @@ +use std::cmp::Ordering; + +// TODO: in the future allow user to define collation sequences +// Will have to meddle with ffi for this +#[derive(Debug, Eq, PartialEq, strum_macros::Display, Default)] +#[strum(ascii_case_insensitive)] +/// **Pre defined collation sequences**\ +/// Collating functions only matter when comparing string values. +/// Numeric values are always compared numerically, and BLOBs are always compared byte-by-byte using memcmp(). +pub enum CollationSeq { + /// Standard String compare + #[default] + Binary, + /// Ascii case insensitive + NoCase, + /// Same as Binary but with trimmed whitespace + Rtrim, +} + +impl CollationSeq { + fn compare_strings(&self, lhs: &str, rhs: &str) -> Ordering { + match self { + CollationSeq::Binary => Self::binary_cmp(lhs, rhs), + CollationSeq::NoCase => Self::nocase_cmp(lhs, rhs), + CollationSeq::Rtrim => Self::rtrim_cmp(lhs, rhs), + } + } + + fn binary_cmp(lhs: &str, rhs: &str) -> Ordering { + lhs.cmp(rhs) + } + + fn nocase_cmp(lhs: &str, rhs: &str) -> Ordering { + let nocase_lhs = uncased::UncasedStr::new(lhs); + let nocase_rhs = uncased::UncasedStr::new(rhs); + nocase_lhs.cmp(nocase_rhs) + } + + fn rtrim_cmp(lhs: &str, rhs: &str) -> Ordering { + lhs.trim().cmp(rhs.trim()) + } +} diff --git a/core/translate/mod.rs b/core/translate/mod.rs index fa0b6d343..073df226c 100644 --- a/core/translate/mod.rs +++ b/core/translate/mod.rs @@ -8,6 +8,7 @@ //! will read rows from the database and filter them according to a WHERE clause. pub(crate) mod aggregation; +pub(crate) mod collate; pub(crate) mod delete; pub(crate) mod emitter; pub(crate) mod expr; diff --git a/vendored/sqlite3-parser/Cargo.toml b/vendored/sqlite3-parser/Cargo.toml index fc0f1bf8e..4701cdeba 100644 --- a/vendored/sqlite3-parser/Cargo.toml +++ b/vendored/sqlite3-parser/Cargo.toml @@ -32,8 +32,8 @@ bitflags = "2.0" uncased = "0.9.10" indexmap = "2.0" miette = "7.4.0" -strum = { version = "0.26", features = ["derive"] } -strum_macros = "0.26" +strum = { workspace = true } +strum_macros = {workspace = true } [dev-dependencies] env_logger = { version = "0.11", default-features = false } From b5b1010e7c98928fdf61c210456c4ba9722744eb Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 18 Apr 2025 23:01:41 -0300 Subject: [PATCH 02/17] set binary collation as default --- core/translate/collate.rs | 5 +- core/translate/expr.rs | 18 +++++++ core/translate/main_loop.rs | 5 ++ core/translate/schema.rs | 4 ++ core/vdbe/execute.rs | 102 +++++++++++++++++++++++++++--------- core/vdbe/insn.rs | 7 +++ 6 files changed, 116 insertions(+), 25 deletions(-) diff --git a/core/translate/collate.rs b/core/translate/collate.rs index bd6d1be4f..0e3e4877f 100644 --- a/core/translate/collate.rs +++ b/core/translate/collate.rs @@ -1,5 +1,7 @@ use std::cmp::Ordering; +use tracing::Level; + // TODO: in the future allow user to define collation sequences // Will have to meddle with ffi for this #[derive(Debug, Eq, PartialEq, strum_macros::Display, Default)] @@ -18,7 +20,8 @@ pub enum CollationSeq { } impl CollationSeq { - fn compare_strings(&self, lhs: &str, rhs: &str) -> Ordering { + pub fn compare_strings(&self, lhs: &str, rhs: &str) -> Ordering { + tracing::event!(Level::DEBUG, collate = %self, lhs, rhs); match self { CollationSeq::Binary => Self::binary_cmp(lhs, rhs), CollationSeq::NoCase => Self::nocase_cmp(lhs, rhs), diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 8dae73fba..22204105f 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -16,6 +16,8 @@ use crate::vdbe::{ }; use crate::{Result, Value}; +use super::collate::CollationSeq; + #[derive(Debug, Clone, Copy)] pub struct ConditionMetadata { pub jump_if_condition_is_true: bool, @@ -53,6 +55,7 @@ macro_rules! emit_cmp_insn { rhs: $rhs, target_pc: $cond.jump_target_when_true, flags: CmpInsFlags::default(), + collation: CollationSeq::default(), }); } else { $program.emit_insn(Insn::$op_false { @@ -60,6 +63,7 @@ macro_rules! emit_cmp_insn { rhs: $rhs, target_pc: $cond.jump_target_when_false, flags: CmpInsFlags::default().jump_if_null(), + collation: CollationSeq::default(), }); } }}; @@ -80,6 +84,7 @@ macro_rules! emit_cmp_null_insn { rhs: $rhs, target_pc: $cond.jump_target_when_true, flags: CmpInsFlags::default().null_eq(), + collation: CollationSeq::default(), }); } else { $program.emit_insn(Insn::$op_false { @@ -87,6 +92,7 @@ macro_rules! emit_cmp_null_insn { rhs: $rhs, target_pc: $cond.jump_target_when_false, flags: CmpInsFlags::default().null_eq(), + collation: CollationSeq::default(), }); } }}; @@ -370,6 +376,7 @@ pub fn translate_condition_expr( rhs: rhs_reg, target_pc: jump_target_when_true, flags: CmpInsFlags::default(), + collation: CollationSeq::default(), }); } else { // If this is the last condition, we need to jump to the 'jump_target_when_false' label if there is no match. @@ -378,6 +385,7 @@ pub fn translate_condition_expr( rhs: rhs_reg, target_pc: condition_metadata.jump_target_when_false, flags: CmpInsFlags::default().jump_if_null(), + collation: CollationSeq::default(), }); } } @@ -399,6 +407,7 @@ pub fn translate_condition_expr( rhs: rhs_reg, target_pc: condition_metadata.jump_target_when_false, flags: CmpInsFlags::default().jump_if_null(), + collation: CollationSeq::default(), }); } // If we got here, then none of the conditions were a match, so we jump to the 'jump_target_when_true' label if 'jump_if_condition_is_true'. @@ -609,6 +618,7 @@ pub fn translate_expr( target_pc: next_case_label, // A NULL result is considered untrue when evaluating WHEN terms. flags: CmpInsFlags::default().jump_if_null(), + collation: CollationSeq::default(), }), // CASE WHEN 0 THEN 0 ELSE 1 becomes ifnot 0 branch to next clause None => program.emit_insn(Insn::IfNot { @@ -2216,6 +2226,7 @@ fn emit_binary_insn( rhs, target_pc: if_true_label, flags: CmpInsFlags::default(), + collation: CollationSeq::default(), }, target_register, if_true_label, @@ -2232,6 +2243,7 @@ fn emit_binary_insn( rhs, target_pc: if_true_label, flags: CmpInsFlags::default(), + collation: CollationSeq::default(), }, target_register, if_true_label, @@ -2248,6 +2260,7 @@ fn emit_binary_insn( rhs, target_pc: if_true_label, flags: CmpInsFlags::default(), + collation: CollationSeq::default(), }, target_register, if_true_label, @@ -2264,6 +2277,7 @@ fn emit_binary_insn( rhs, target_pc: if_true_label, flags: CmpInsFlags::default(), + collation: CollationSeq::default(), }, target_register, if_true_label, @@ -2280,6 +2294,7 @@ fn emit_binary_insn( rhs, target_pc: if_true_label, flags: CmpInsFlags::default(), + collation: CollationSeq::default(), }, target_register, if_true_label, @@ -2296,6 +2311,7 @@ fn emit_binary_insn( rhs, target_pc: if_true_label, flags: CmpInsFlags::default(), + collation: CollationSeq::default(), }, target_register, if_true_label, @@ -2389,6 +2405,7 @@ fn emit_binary_insn( rhs, target_pc: if_true_label, flags: CmpInsFlags::default().null_eq(), + collation: CollationSeq::default(), }, target_register, if_true_label, @@ -2403,6 +2420,7 @@ fn emit_binary_insn( rhs, target_pc: if_true_label, flags: CmpInsFlags::default().null_eq(), + collation: CollationSeq::default(), }, target_register, if_true_label, diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 17f439450..ed7620056 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -17,6 +17,7 @@ use crate::{ use super::{ aggregation::translate_aggregation_step, + collate::CollationSeq, emitter::{OperationMode, TranslateCtx}, expr::{ translate_condition_expr, translate_expr, translate_expr_no_constant_opt, @@ -1151,24 +1152,28 @@ fn emit_seek_termination( rhs: start_reg, target_pc: loop_end, flags: CmpInsFlags::default(), + collation: CollationSeq::default(), }), (false, SeekOp::GT) => program.emit_insn(Insn::Gt { lhs: rowid_reg.unwrap(), rhs: start_reg, target_pc: loop_end, flags: CmpInsFlags::default(), + collation: CollationSeq::default(), }), (false, SeekOp::LE) => program.emit_insn(Insn::Le { lhs: rowid_reg.unwrap(), rhs: start_reg, target_pc: loop_end, flags: CmpInsFlags::default(), + collation: CollationSeq::default(), }), (false, SeekOp::LT) => program.emit_insn(Insn::Lt { lhs: rowid_reg.unwrap(), rhs: start_reg, target_pc: loop_end, flags: CmpInsFlags::default(), + collation: CollationSeq::default(), }), (_, SeekOp::EQ) => { panic!("An index termination condition is never EQ") diff --git a/core/translate/schema.rs b/core/translate/schema.rs index 1f95f89d5..5fbf9d110 100644 --- a/core/translate/schema.rs +++ b/core/translate/schema.rs @@ -21,6 +21,8 @@ use crate::{bail_parse_error, Result}; use limbo_ext::VTabKind; use limbo_sqlite3_parser::ast::{fmt::ToTokens, CreateVirtualTable}; +use super::collate::CollationSeq; + #[derive(Debug, Clone, Copy)] pub enum ParseSchema { None, @@ -719,6 +721,7 @@ pub fn translate_drop_table( rhs: table_reg, target_pc: next_label, flags: CmpInsFlags::default(), + collation: CollationSeq::default(), }); program.emit_insn(Insn::Column { cursor_id: sqlite_schema_cursor_id, @@ -730,6 +733,7 @@ pub fn translate_drop_table( rhs: table_type, target_pc: next_label, flags: CmpInsFlags::default(), + collation: CollationSeq::default(), }); program.emit_insn(Insn::RowId { cursor_id: sqlite_schema_cursor_id, diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 736ea260d..64c2cc567 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -525,6 +525,7 @@ pub fn op_eq( rhs, target_pc, flags, + collation, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -537,8 +538,8 @@ pub fn op_eq( let nulleq = flags.has_nulleq(); let jump_if_null = flags.has_jump_if_null(); match ( - &state.registers[lhs].get_owned_value(), - &state.registers[rhs].get_owned_value(), + state.registers[lhs].get_owned_value(), + state.registers[rhs].get_owned_value(), ) { (_, Value::Null) | (Value::Null, _) => { if (nulleq && cond) || (!nulleq && jump_if_null) { @@ -547,8 +548,16 @@ pub fn op_eq( state.pc += 1; } } - _ => { - if *state.registers[lhs].get_owned_value() == *state.registers[rhs].get_owned_value() { + (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => { + let order = collation.compare_strings(lhs.as_str(), rhs.as_str()); + if order.is_eq() { + state.pc = target_pc.to_offset_int(); + } else { + state.pc += 1; + } + } + (lhs, rhs) => { + if *lhs == *rhs { state.pc = target_pc.to_offset_int(); } else { state.pc += 1; @@ -570,6 +579,7 @@ pub fn op_ne( rhs, target_pc, flags, + collation, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -582,8 +592,8 @@ pub fn op_ne( let nulleq = flags.has_nulleq(); let jump_if_null = flags.has_jump_if_null(); match ( - &state.registers[lhs].get_owned_value(), - &state.registers[rhs].get_owned_value(), + state.registers[lhs].get_owned_value(), + state.registers[rhs].get_owned_value(), ) { (_, Value::Null) | (Value::Null, _) => { if (nulleq && cond) || (!nulleq && jump_if_null) { @@ -592,8 +602,16 @@ pub fn op_ne( state.pc += 1; } } - _ => { - if *state.registers[lhs].get_owned_value() != *state.registers[rhs].get_owned_value() { + (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => { + let order = collation.compare_strings(lhs.as_str(), rhs.as_str()); + if order.is_ne() { + state.pc = target_pc.to_offset_int(); + } else { + state.pc += 1; + } + } + (lhs, rhs) => { + if *lhs != *rhs { state.pc = target_pc.to_offset_int(); } else { state.pc += 1; @@ -615,6 +633,7 @@ pub fn op_lt( rhs, target_pc, flags, + collation, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -625,8 +644,8 @@ pub fn op_lt( let target_pc = *target_pc; let jump_if_null = flags.has_jump_if_null(); match ( - &state.registers[lhs].get_owned_value(), - &state.registers[rhs].get_owned_value(), + state.registers[lhs].get_owned_value(), + state.registers[rhs].get_owned_value(), ) { (_, Value::Null) | (Value::Null, _) => { if jump_if_null { @@ -635,8 +654,16 @@ pub fn op_lt( state.pc += 1; } } - _ => { - if *state.registers[lhs].get_owned_value() < *state.registers[rhs].get_owned_value() { + (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => { + let order = collation.compare_strings(lhs.as_str(), rhs.as_str()); + if order.is_lt() { + state.pc = target_pc.to_offset_int(); + } else { + state.pc += 1; + } + } + (lhs, rhs) => { + if *lhs < *rhs { state.pc = target_pc.to_offset_int(); } else { state.pc += 1; @@ -658,6 +685,7 @@ pub fn op_le( rhs, target_pc, flags, + collation, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -668,8 +696,8 @@ pub fn op_le( let target_pc = *target_pc; let jump_if_null = flags.has_jump_if_null(); match ( - &state.registers[lhs].get_owned_value(), - &state.registers[rhs].get_owned_value(), + state.registers[lhs].get_owned_value(), + state.registers[rhs].get_owned_value(), ) { (_, Value::Null) | (Value::Null, _) => { if jump_if_null { @@ -678,8 +706,16 @@ pub fn op_le( state.pc += 1; } } - _ => { - if *state.registers[lhs].get_owned_value() <= *state.registers[rhs].get_owned_value() { + (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => { + let order = collation.compare_strings(lhs.as_str(), rhs.as_str()); + if order.is_le() { + state.pc = target_pc.to_offset_int(); + } else { + state.pc += 1; + } + } + (lhs, rhs) => { + if *lhs <= *rhs { state.pc = target_pc.to_offset_int(); } else { state.pc += 1; @@ -701,6 +737,7 @@ pub fn op_gt( rhs, target_pc, flags, + collation, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -711,8 +748,8 @@ pub fn op_gt( let target_pc = *target_pc; let jump_if_null = flags.has_jump_if_null(); match ( - &state.registers[lhs].get_owned_value(), - &state.registers[rhs].get_owned_value(), + state.registers[lhs].get_owned_value(), + state.registers[rhs].get_owned_value(), ) { (_, Value::Null) | (Value::Null, _) => { if jump_if_null { @@ -721,8 +758,16 @@ pub fn op_gt( state.pc += 1; } } - _ => { - if *state.registers[lhs].get_owned_value() > *state.registers[rhs].get_owned_value() { + (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => { + let order = collation.compare_strings(lhs.as_str(), rhs.as_str()); + if order.is_gt() { + state.pc = target_pc.to_offset_int(); + } else { + state.pc += 1; + } + } + (lhs, rhs) => { + if *lhs > *rhs { state.pc = target_pc.to_offset_int(); } else { state.pc += 1; @@ -744,6 +789,7 @@ pub fn op_ge( rhs, target_pc, flags, + collation, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -754,8 +800,8 @@ pub fn op_ge( let target_pc = *target_pc; let jump_if_null = flags.has_jump_if_null(); match ( - &state.registers[lhs].get_owned_value(), - &state.registers[rhs].get_owned_value(), + state.registers[lhs].get_owned_value(), + state.registers[rhs].get_owned_value(), ) { (_, Value::Null) | (Value::Null, _) => { if jump_if_null { @@ -764,8 +810,16 @@ pub fn op_ge( state.pc += 1; } } - _ => { - if *state.registers[lhs].get_owned_value() >= *state.registers[rhs].get_owned_value() { + (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => { + let order = collation.compare_strings(lhs.as_str(), rhs.as_str()); + if order.is_ge() { + state.pc = target_pc.to_offset_int(); + } else { + state.pc += 1; + } + } + (lhs, rhs) => { + if *lhs >= *rhs { state.pc = target_pc.to_offset_int(); } else { state.pc += 1; diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 876ab7930..4ff14ee16 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -8,6 +8,7 @@ use super::{execute, AggFunc, BranchOffset, CursorID, FuncCtx, InsnFunction, Pag use crate::{ schema::{BTreeTable, Index}, storage::{pager::CreateBTreeFlags, wal::CheckpointMode}, + translate::collate::CollationSeq, }; use limbo_macros::Description; use limbo_sqlite3_parser::ast::SortOrder; @@ -218,6 +219,7 @@ pub enum Insn { /// Without the jump_if_null flag it would not jump because the logical comparison "id != NULL" is never true. /// This flag indicates that if either is null we should still jump. flags: CmpInsFlags, + collation: CollationSeq, }, /// Compare two registers and jump to the given PC if they are not equal. Ne { @@ -228,6 +230,7 @@ pub enum Insn { /// /// jump_if_null jumps if either of the operands is null. Used for "jump when false" logic. flags: CmpInsFlags, + collation: CollationSeq, }, /// Compare two registers and jump to the given PC if the left-hand side is less than the right-hand side. Lt { @@ -236,6 +239,7 @@ pub enum Insn { target_pc: BranchOffset, /// jump_if_null: Jump if either of the operands is null. Used for "jump when false" logic. flags: CmpInsFlags, + collation: CollationSeq, }, // Compare two registers and jump to the given PC if the left-hand side is less than or equal to the right-hand side. Le { @@ -244,6 +248,7 @@ pub enum Insn { target_pc: BranchOffset, /// jump_if_null: Jump if either of the operands is null. Used for "jump when false" logic. flags: CmpInsFlags, + collation: CollationSeq, }, /// Compare two registers and jump to the given PC if the left-hand side is greater than the right-hand side. Gt { @@ -252,6 +257,7 @@ pub enum Insn { target_pc: BranchOffset, /// jump_if_null: Jump if either of the operands is null. Used for "jump when false" logic. flags: CmpInsFlags, + collation: CollationSeq, }, /// Compare two registers and jump to the given PC if the left-hand side is greater than or equal to the right-hand side. Ge { @@ -260,6 +266,7 @@ pub enum Insn { target_pc: BranchOffset, /// jump_if_null: Jump if either of the operands is null. Used for "jump when false" logic. flags: CmpInsFlags, + collation: CollationSeq, }, /// Jump to target_pc if r\[reg\] != 0 or (r\[reg\] == NULL && r\[jump_if_null\] != 0) If { From d0a63429a61b57608b21f35aea31538de2028396 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Sat, 19 Apr 2025 02:47:59 -0300 Subject: [PATCH 03/17] Naive implementation of collate for queries. Not implemented for column constraints --- core/translate/collate.rs | 4 ++- core/translate/emitter.rs | 2 ++ core/translate/expr.rs | 61 ++++++++++++++++++++++++++----------- core/translate/index.rs | 2 ++ core/translate/main_loop.rs | 9 +++--- core/translate/schema.rs | 6 ++-- core/vdbe/builder.rs | 24 ++++++++++++++- core/vdbe/execute.rs | 18 +++++++---- core/vdbe/explain.rs | 18 +++++++---- core/vdbe/insn.rs | 12 ++++---- testing/all.test | 1 + testing/collate.test | 30 ++++++++++++++++++ 12 files changed, 141 insertions(+), 46 deletions(-) create mode 100755 testing/collate.test diff --git a/core/translate/collate.rs b/core/translate/collate.rs index 0e3e4877f..4613ff7c4 100644 --- a/core/translate/collate.rs +++ b/core/translate/collate.rs @@ -4,7 +4,9 @@ use tracing::Level; // TODO: in the future allow user to define collation sequences // Will have to meddle with ffi for this -#[derive(Debug, Eq, PartialEq, strum_macros::Display, Default)] +#[derive( + Debug, Clone, Copy, Eq, PartialEq, strum_macros::Display, strum_macros::EnumString, Default, +)] #[strum(ascii_case_insensitive)] /// **Pre defined collation sequences**\ /// Collating functions only matter when comparing string values. diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 770695833..10d8d6605 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -914,6 +914,7 @@ fn emit_update_insns( rhs: idx_rowid_reg, target_pc: constraint_check, flags: CmpInsFlags::default(), // TODO: not sure what type of comparison flag is needed + collation: program.curr_collation(), }); program.emit_insn(Insn::Halt { @@ -943,6 +944,7 @@ fn emit_update_insns( rhs: beg, target_pc: record_label, flags: CmpInsFlags::default(), + collation: program.curr_collation(), }); program.emit_insn(Insn::NotExists { diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 22204105f..def53c7a7 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1,3 +1,5 @@ +use std::str::FromStr; + use limbo_sqlite3_parser::ast::{self, UnaryOperator}; use super::emitter::Resolver; @@ -55,7 +57,7 @@ macro_rules! emit_cmp_insn { rhs: $rhs, target_pc: $cond.jump_target_when_true, flags: CmpInsFlags::default(), - collation: CollationSeq::default(), + collation: $program.curr_collation(), }); } else { $program.emit_insn(Insn::$op_false { @@ -63,7 +65,7 @@ macro_rules! emit_cmp_insn { rhs: $rhs, target_pc: $cond.jump_target_when_false, flags: CmpInsFlags::default().jump_if_null(), - collation: CollationSeq::default(), + collation: $program.curr_collation(), }); } }}; @@ -84,7 +86,7 @@ macro_rules! emit_cmp_null_insn { rhs: $rhs, target_pc: $cond.jump_target_when_true, flags: CmpInsFlags::default().null_eq(), - collation: CollationSeq::default(), + collation: $program.curr_collation(), }); } else { $program.emit_insn(Insn::$op_false { @@ -92,7 +94,7 @@ macro_rules! emit_cmp_null_insn { rhs: $rhs, target_pc: $cond.jump_target_when_false, flags: CmpInsFlags::default().null_eq(), - collation: CollationSeq::default(), + collation: $program.curr_collation(), }); } }}; @@ -376,7 +378,7 @@ pub fn translate_condition_expr( rhs: rhs_reg, target_pc: jump_target_when_true, flags: CmpInsFlags::default(), - collation: CollationSeq::default(), + collation: program.curr_collation(), }); } else { // If this is the last condition, we need to jump to the 'jump_target_when_false' label if there is no match. @@ -385,7 +387,7 @@ pub fn translate_condition_expr( rhs: rhs_reg, target_pc: condition_metadata.jump_target_when_false, flags: CmpInsFlags::default().jump_if_null(), - collation: CollationSeq::default(), + collation: program.curr_collation(), }); } } @@ -407,7 +409,7 @@ pub fn translate_condition_expr( rhs: rhs_reg, target_pc: condition_metadata.jump_target_when_false, flags: CmpInsFlags::default().jump_if_null(), - collation: CollationSeq::default(), + collation: program.curr_collation(), }); } // If we got here, then none of the conditions were a match, so we jump to the 'jump_target_when_true' label if 'jump_if_condition_is_true'. @@ -561,15 +563,31 @@ pub fn translate_expr( translate_expr(program, referenced_tables, e1, shared_reg, resolver)?; emit_binary_insn(program, op, shared_reg, shared_reg, target_register)?; + program.reset_collation(); Ok(target_register) } else { let e1_reg = program.alloc_registers(2); let e2_reg = e1_reg + 1; translate_expr(program, referenced_tables, e1, e1_reg, resolver)?; + let left_collation = program.curr_collation(); + program.reset_collation(); + translate_expr(program, referenced_tables, e2, e2_reg, resolver)?; + let right_collation = program.curr_collation(); + program.reset_collation(); + + let collation = { + match (left_collation, right_collation) { + (Some(left), _) => Some(left), + (None, Some(right)) => Some(right), + _ => None, + } + }; + program.set_collation(collation); emit_binary_insn(program, op, e1_reg, e2_reg, target_register)?; + program.reset_collation(); Ok(target_register) } } @@ -618,7 +636,7 @@ pub fn translate_expr( target_pc: next_case_label, // A NULL result is considered untrue when evaluating WHEN terms. flags: CmpInsFlags::default().jump_if_null(), - collation: CollationSeq::default(), + collation: program.curr_collation(), }), // CASE WHEN 0 THEN 0 ELSE 1 becomes ifnot 0 branch to next clause None => program.emit_insn(Insn::IfNot { @@ -689,7 +707,16 @@ pub fn translate_expr( }); Ok(target_register) } - ast::Expr::Collate(_, _) => todo!(), + ast::Expr::Collate(expr, collation) => { + // First translate inner expr, then set the curr collation. If we set curr collation before, + // it may be overwritten later by inner translate. + translate_expr(program, referenced_tables, expr, target_register, resolver)?; + let collation = CollationSeq::from_str(collation).map_err(|_| { + crate::LimboError::ParseError(format!("no such collation sequence: {}", collation)) + })?; + program.set_collation(Some(collation)); + Ok(target_register) + } ast::Expr::DoublyQualified(_, _, _) => todo!(), ast::Expr::Exists(_) => todo!(), ast::Expr::FunctionCall { @@ -2226,7 +2253,7 @@ fn emit_binary_insn( rhs, target_pc: if_true_label, flags: CmpInsFlags::default(), - collation: CollationSeq::default(), + collation: program.curr_collation(), }, target_register, if_true_label, @@ -2243,7 +2270,7 @@ fn emit_binary_insn( rhs, target_pc: if_true_label, flags: CmpInsFlags::default(), - collation: CollationSeq::default(), + collation: program.curr_collation(), }, target_register, if_true_label, @@ -2260,7 +2287,7 @@ fn emit_binary_insn( rhs, target_pc: if_true_label, flags: CmpInsFlags::default(), - collation: CollationSeq::default(), + collation: program.curr_collation(), }, target_register, if_true_label, @@ -2277,7 +2304,7 @@ fn emit_binary_insn( rhs, target_pc: if_true_label, flags: CmpInsFlags::default(), - collation: CollationSeq::default(), + collation: program.curr_collation(), }, target_register, if_true_label, @@ -2294,7 +2321,7 @@ fn emit_binary_insn( rhs, target_pc: if_true_label, flags: CmpInsFlags::default(), - collation: CollationSeq::default(), + collation: program.curr_collation(), }, target_register, if_true_label, @@ -2311,7 +2338,7 @@ fn emit_binary_insn( rhs, target_pc: if_true_label, flags: CmpInsFlags::default(), - collation: CollationSeq::default(), + collation: program.curr_collation(), }, target_register, if_true_label, @@ -2405,7 +2432,7 @@ fn emit_binary_insn( rhs, target_pc: if_true_label, flags: CmpInsFlags::default().null_eq(), - collation: CollationSeq::default(), + collation: program.curr_collation(), }, target_register, if_true_label, @@ -2420,7 +2447,7 @@ fn emit_binary_insn( rhs, target_pc: if_true_label, flags: CmpInsFlags::default().null_eq(), - collation: CollationSeq::default(), + collation: program.curr_collation(), }, target_register, if_true_label, diff --git a/core/translate/index.rs b/core/translate/index.rs index 30e96f897..bb8390fca 100644 --- a/core/translate/index.rs +++ b/core/translate/index.rs @@ -405,6 +405,7 @@ pub fn translate_drop_index( rhs: dest_reg, target_pc: next_label, flags: CmpInsFlags::default(), + collation: program.curr_collation(), }); // read type of table @@ -420,6 +421,7 @@ pub fn translate_drop_index( rhs: dest_reg, target_pc: next_label, flags: CmpInsFlags::default(), + collation: program.curr_collation(), }); program.emit_insn(Insn::RowId { diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index ed7620056..59168509e 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -17,7 +17,6 @@ use crate::{ use super::{ aggregation::translate_aggregation_step, - collate::CollationSeq, emitter::{OperationMode, TranslateCtx}, expr::{ translate_condition_expr, translate_expr, translate_expr_no_constant_opt, @@ -1152,28 +1151,28 @@ fn emit_seek_termination( rhs: start_reg, target_pc: loop_end, flags: CmpInsFlags::default(), - collation: CollationSeq::default(), + collation: program.curr_collation(), }), (false, SeekOp::GT) => program.emit_insn(Insn::Gt { lhs: rowid_reg.unwrap(), rhs: start_reg, target_pc: loop_end, flags: CmpInsFlags::default(), - collation: CollationSeq::default(), + collation: program.curr_collation(), }), (false, SeekOp::LE) => program.emit_insn(Insn::Le { lhs: rowid_reg.unwrap(), rhs: start_reg, target_pc: loop_end, flags: CmpInsFlags::default(), - collation: CollationSeq::default(), + collation: program.curr_collation(), }), (false, SeekOp::LT) => program.emit_insn(Insn::Lt { lhs: rowid_reg.unwrap(), rhs: start_reg, target_pc: loop_end, flags: CmpInsFlags::default(), - collation: CollationSeq::default(), + collation: program.curr_collation(), }), (_, SeekOp::EQ) => { panic!("An index termination condition is never EQ") diff --git a/core/translate/schema.rs b/core/translate/schema.rs index 5fbf9d110..3f5fb47b7 100644 --- a/core/translate/schema.rs +++ b/core/translate/schema.rs @@ -21,8 +21,6 @@ use crate::{bail_parse_error, Result}; use limbo_ext::VTabKind; use limbo_sqlite3_parser::ast::{fmt::ToTokens, CreateVirtualTable}; -use super::collate::CollationSeq; - #[derive(Debug, Clone, Copy)] pub enum ParseSchema { None, @@ -721,7 +719,7 @@ pub fn translate_drop_table( rhs: table_reg, target_pc: next_label, flags: CmpInsFlags::default(), - collation: CollationSeq::default(), + collation: program.curr_collation(), }); program.emit_insn(Insn::Column { cursor_id: sqlite_schema_cursor_id, @@ -733,7 +731,7 @@ pub fn translate_drop_table( rhs: table_type, target_pc: next_label, flags: CmpInsFlags::default(), - collation: CollationSeq::default(), + collation: program.curr_collation(), }); program.emit_insn(Insn::RowId { cursor_id: sqlite_schema_cursor_id, diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index 6be6fdad8..78d6970a9 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -12,7 +12,10 @@ use crate::{ parameters::Parameters, schema::{BTreeTable, Index, PseudoTable}, storage::sqlite3_ondisk::DatabaseHeader, - translate::plan::{ResultSetColumn, TableReference}, + translate::{ + collate::CollationSeq, + plan::{ResultSetColumn, TableReference}, + }, Connection, VirtualTable, }; @@ -38,6 +41,8 @@ pub struct ProgramBuilder { pub parameters: Parameters, pub result_columns: Vec, pub table_references: Vec, + /// Stack of collation definitions encountered + collation: Option, } #[derive(Debug, Clone)] @@ -95,6 +100,7 @@ impl ProgramBuilder { parameters: Parameters::new(), result_columns: Vec::new(), table_references: Vec::new(), + collation: None, } } @@ -589,6 +595,22 @@ impl ProgramBuilder { .unwrap_or_else(|| panic!("Cursor not found: {}", table_identifier)) } + pub fn set_collation(&mut self, c: Option) { + self.collation = c; + } + + pub fn curr_collation(&self) -> Option { + self.collation + } + + pub fn reset_collation(&mut self) { + self.collation = None; + } + + // pub fn pop_collation(&mut self) -> Option { + // self.collations.pop() + // } + pub fn build( mut self, database_header: Arc>, diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 64c2cc567..c0d912e0c 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -537,6 +537,7 @@ pub fn op_eq( let cond = *state.registers[lhs].get_owned_value() == *state.registers[rhs].get_owned_value(); let nulleq = flags.has_nulleq(); let jump_if_null = flags.has_jump_if_null(); + let collation = collation.unwrap_or_default(); match ( state.registers[lhs].get_owned_value(), state.registers[rhs].get_owned_value(), @@ -548,7 +549,7 @@ pub fn op_eq( state.pc += 1; } } - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => { + (Value::Text(lhs), Value::Text(rhs)) => { let order = collation.compare_strings(lhs.as_str(), rhs.as_str()); if order.is_eq() { state.pc = target_pc.to_offset_int(); @@ -591,6 +592,7 @@ pub fn op_ne( let cond = *state.registers[lhs].get_owned_value() != *state.registers[rhs].get_owned_value(); let nulleq = flags.has_nulleq(); let jump_if_null = flags.has_jump_if_null(); + let collation = collation.unwrap_or_default(); match ( state.registers[lhs].get_owned_value(), state.registers[rhs].get_owned_value(), @@ -602,7 +604,7 @@ pub fn op_ne( state.pc += 1; } } - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => { + (Value::Text(lhs), Value::Text(rhs)) => { let order = collation.compare_strings(lhs.as_str(), rhs.as_str()); if order.is_ne() { state.pc = target_pc.to_offset_int(); @@ -643,6 +645,7 @@ pub fn op_lt( let rhs = *rhs; let target_pc = *target_pc; let jump_if_null = flags.has_jump_if_null(); + let collation = collation.unwrap_or_default(); match ( state.registers[lhs].get_owned_value(), state.registers[rhs].get_owned_value(), @@ -654,7 +657,7 @@ pub fn op_lt( state.pc += 1; } } - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => { + (Value::Text(lhs), Value::Text(rhs)) => { let order = collation.compare_strings(lhs.as_str(), rhs.as_str()); if order.is_lt() { state.pc = target_pc.to_offset_int(); @@ -695,6 +698,7 @@ pub fn op_le( let rhs = *rhs; let target_pc = *target_pc; let jump_if_null = flags.has_jump_if_null(); + let collation = collation.unwrap_or_default(); match ( state.registers[lhs].get_owned_value(), state.registers[rhs].get_owned_value(), @@ -706,7 +710,7 @@ pub fn op_le( state.pc += 1; } } - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => { + (Value::Text(lhs), Value::Text(rhs)) => { let order = collation.compare_strings(lhs.as_str(), rhs.as_str()); if order.is_le() { state.pc = target_pc.to_offset_int(); @@ -747,6 +751,7 @@ pub fn op_gt( let rhs = *rhs; let target_pc = *target_pc; let jump_if_null = flags.has_jump_if_null(); + let collation = collation.unwrap_or_default(); match ( state.registers[lhs].get_owned_value(), state.registers[rhs].get_owned_value(), @@ -758,7 +763,7 @@ pub fn op_gt( state.pc += 1; } } - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => { + (Value::Text(lhs), Value::Text(rhs)) => { let order = collation.compare_strings(lhs.as_str(), rhs.as_str()); if order.is_gt() { state.pc = target_pc.to_offset_int(); @@ -799,6 +804,7 @@ pub fn op_ge( let rhs = *rhs; let target_pc = *target_pc; let jump_if_null = flags.has_jump_if_null(); + let collation = collation.unwrap_or_default(); match ( state.registers[lhs].get_owned_value(), state.registers[rhs].get_owned_value(), @@ -810,7 +816,7 @@ pub fn op_ge( state.pc += 1; } } - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => { + (Value::Text(lhs), Value::Text(rhs)) => { let order = collation.compare_strings(lhs.as_str(), rhs.as_str()); if order.is_ge() { state.pc = target_pc.to_offset_int(); diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index 2ee29e178..0fe6184db 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -211,13 +211,14 @@ pub fn insn_to_str( lhs, rhs, target_pc, + collation, .. } => ( "Eq", *lhs as i32, *rhs as i32, target_pc.to_debug_int(), - Value::build_text(""), + Value::build_text(&collation.map_or("".to_string(), |c| c.to_string())), 0, format!( "if r[{}]==r[{}] goto {}", @@ -230,13 +231,14 @@ pub fn insn_to_str( lhs, rhs, target_pc, + collation, .. } => ( "Ne", *lhs as i32, *rhs as i32, target_pc.to_debug_int(), - Value::build_text(""), + Value::build_text(&collation.map_or("".to_string(), |c| c.to_string())), 0, format!( "if r[{}]!=r[{}] goto {}", @@ -249,13 +251,14 @@ pub fn insn_to_str( lhs, rhs, target_pc, + collation, .. } => ( "Lt", *lhs as i32, *rhs as i32, target_pc.to_debug_int(), - Value::build_text(""), + Value::build_text(&collation.map_or("".to_string(), |c| c.to_string())), 0, format!("if r[{}] ( "Le", *lhs as i32, *rhs as i32, target_pc.to_debug_int(), - Value::build_text(""), + Value::build_text(&collation.map_or("".to_string(), |c| c.to_string())), 0, format!( "if r[{}]<=r[{}] goto {}", @@ -282,13 +286,14 @@ pub fn insn_to_str( lhs, rhs, target_pc, + collation, .. } => ( "Gt", *lhs as i32, *rhs as i32, target_pc.to_debug_int(), - Value::build_text(""), + Value::build_text(&collation.map_or("".to_string(), |c| c.to_string())), 0, format!("if r[{}]>r[{}] goto {}", lhs, rhs, target_pc.to_debug_int()), ), @@ -296,13 +301,14 @@ pub fn insn_to_str( lhs, rhs, target_pc, + collation, .. } => ( "Ge", *lhs as i32, *rhs as i32, target_pc.to_debug_int(), - Value::build_text(""), + Value::build_text(&collation.map_or("".to_string(), |c| c.to_string())), 0, format!( "if r[{}]>=r[{}] goto {}", diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 4ff14ee16..d94ec521c 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -219,7 +219,7 @@ pub enum Insn { /// Without the jump_if_null flag it would not jump because the logical comparison "id != NULL" is never true. /// This flag indicates that if either is null we should still jump. flags: CmpInsFlags, - collation: CollationSeq, + collation: Option, }, /// Compare two registers and jump to the given PC if they are not equal. Ne { @@ -230,7 +230,7 @@ pub enum Insn { /// /// jump_if_null jumps if either of the operands is null. Used for "jump when false" logic. flags: CmpInsFlags, - collation: CollationSeq, + collation: Option, }, /// Compare two registers and jump to the given PC if the left-hand side is less than the right-hand side. Lt { @@ -239,7 +239,7 @@ pub enum Insn { target_pc: BranchOffset, /// jump_if_null: Jump if either of the operands is null. Used for "jump when false" logic. flags: CmpInsFlags, - collation: CollationSeq, + collation: Option, }, // Compare two registers and jump to the given PC if the left-hand side is less than or equal to the right-hand side. Le { @@ -248,7 +248,7 @@ pub enum Insn { target_pc: BranchOffset, /// jump_if_null: Jump if either of the operands is null. Used for "jump when false" logic. flags: CmpInsFlags, - collation: CollationSeq, + collation: Option, }, /// Compare two registers and jump to the given PC if the left-hand side is greater than the right-hand side. Gt { @@ -257,7 +257,7 @@ pub enum Insn { target_pc: BranchOffset, /// jump_if_null: Jump if either of the operands is null. Used for "jump when false" logic. flags: CmpInsFlags, - collation: CollationSeq, + collation: Option, }, /// Compare two registers and jump to the given PC if the left-hand side is greater than or equal to the right-hand side. Ge { @@ -266,7 +266,7 @@ pub enum Insn { target_pc: BranchOffset, /// jump_if_null: Jump if either of the operands is null. Used for "jump when false" logic. flags: CmpInsFlags, - collation: CollationSeq, + collation: Option, }, /// Jump to target_pc if r\[reg\] != 0 or (r\[reg\] == NULL && r\[jump_if_null\] != 0) If { diff --git a/testing/all.test b/testing/all.test index 1f9ab8051..dc5b3264f 100755 --- a/testing/all.test +++ b/testing/all.test @@ -34,3 +34,4 @@ source $testdir/boolean.test source $testdir/literal.test source $testdir/null.test source $testdir/create_table.test +source $testdir/collate.test diff --git a/testing/collate.test b/testing/collate.test new file mode 100755 index 000000000..dc65b2e7d --- /dev/null +++ b/testing/collate.test @@ -0,0 +1,30 @@ +#!/usr/bin/env tclsh + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +# SIMPLE SMOKE TESTS THAT DO NOT DEPEND ON SPECIFIC DATABASE ROWS + +do_execsql_test collate-nocase { + SELECT 'hat' == 'hAt' COLLATE NOCASE; +} {1} + +do_execsql_test collate-binary-1 { + SELECT 'hat' == 'hAt' COLLATE BINARY; +} {0} + +do_execsql_test collate-binary-2 { + SELECT 'hat' == 'hat' COLLATE BINARY; +} {1} + +do_execsql_test collate-rtrim-1 { + SELECT 'hat' == ' hAt ' COLLATE RTRIM; +} {0} + +do_execsql_test collate-rtrim-2 { + SELECT 'hat' == ' hat ' COLLATE RTRIM; +} {1} + +do_execsql_test collate-2 { + SELECT 'hat' == 'hat' COLLATE NOCASE; +} {1} From f8854f180a25f048c497d869ae3eac273fb1270b Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Sun, 20 Apr 2025 00:14:23 -0300 Subject: [PATCH 04/17] Added collation to create table columns --- core/schema.rs | 15 +++++ core/translate/collate.rs | 8 ++- core/translate/expr.rs | 16 ++--- core/translate/group_by.rs | 1 + core/translate/order_by.rs | 2 + core/translate/plan.rs | 1 + core/util.rs | 134 +++++++++++++++++++++---------------- testing/collate.test | 8 ++- 8 files changed, 115 insertions(+), 70 deletions(-) diff --git a/core/schema.rs b/core/schema.rs index 210830b81..b04a31918 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -1,3 +1,4 @@ +use crate::translate::collate::CollationSeq; use crate::{util::normalize_ident, Result}; use crate::{LimboError, VirtualTable}; use core::fmt; @@ -261,6 +262,7 @@ impl PseudoTable { notnull: false, default: None, unique: false, + collation: None, }); } pub fn get_column(&self, name: &str) -> Option<(usize, &Column)> { @@ -418,6 +420,7 @@ fn create_table( let mut notnull = false; let mut order = SortOrder::Asc; let mut unique = false; + let mut collation = None; for c_def in &col_def.constraints { match &c_def.constraint { limbo_sqlite3_parser::ast::ColumnConstraint::PrimaryKey { @@ -442,6 +445,10 @@ fn create_table( } unique = true; } + limbo_sqlite3_parser::ast::ColumnConstraint::Collate { collation_name } => { + collation = Some(CollationSeq::new(collation_name.0.as_str())?); + } + // Collate _ => {} } } @@ -464,6 +471,7 @@ fn create_table( notnull, default, unique, + collation, }); } if options.contains(TableOptions::WITHOUT_ROWID) { @@ -534,6 +542,7 @@ pub struct Column { pub notnull: bool, pub default: Option, pub unique: bool, + pub collation: Option, } impl Column { @@ -737,6 +746,7 @@ pub fn sqlite_schema_table() -> BTreeTable { notnull: false, default: None, unique: false, + collation: None, }, Column { name: Some("name".to_string()), @@ -747,6 +757,7 @@ pub fn sqlite_schema_table() -> BTreeTable { notnull: false, default: None, unique: false, + collation: None, }, Column { name: Some("tbl_name".to_string()), @@ -757,6 +768,7 @@ pub fn sqlite_schema_table() -> BTreeTable { notnull: false, default: None, unique: false, + collation: None, }, Column { name: Some("rootpage".to_string()), @@ -767,6 +779,7 @@ pub fn sqlite_schema_table() -> BTreeTable { notnull: false, default: None, unique: false, + collation: None, }, Column { name: Some("sql".to_string()), @@ -777,6 +790,7 @@ pub fn sqlite_schema_table() -> BTreeTable { notnull: false, default: None, unique: false, + collation: None, }, ], unique_sets: None, @@ -1406,6 +1420,7 @@ mod tests { notnull: false, default: None, unique: false, + collation: None, }], unique_sets: None, }; diff --git a/core/translate/collate.rs b/core/translate/collate.rs index 4613ff7c4..42faa3d34 100644 --- a/core/translate/collate.rs +++ b/core/translate/collate.rs @@ -1,4 +1,4 @@ -use std::cmp::Ordering; +use std::{cmp::Ordering, str::FromStr as _}; use tracing::Level; @@ -22,6 +22,12 @@ pub enum CollationSeq { } impl CollationSeq { + pub fn new(collation: &str) -> crate::Result { + CollationSeq::from_str(collation).map_err(|_| { + crate::LimboError::ParseError(format!("no such collation sequence: {}", collation)) + }) + } + pub fn compare_strings(&self, lhs: &str, rhs: &str) -> Ordering { tracing::event!(Level::DEBUG, collate = %self, lhs, rhs); match self { diff --git a/core/translate/expr.rs b/core/translate/expr.rs index def53c7a7..95292ccba 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1,5 +1,3 @@ -use std::str::FromStr; - use limbo_sqlite3_parser::ast::{self, UnaryOperator}; use super::emitter::Resolver; @@ -711,9 +709,7 @@ pub fn translate_expr( // First translate inner expr, then set the curr collation. If we set curr collation before, // it may be overwritten later by inner translate. translate_expr(program, referenced_tables, expr, target_register, resolver)?; - let collation = CollationSeq::from_str(collation).map_err(|_| { - crate::LimboError::ParseError(format!("no such collation sequence: {}", collation)) - })?; + let collation = CollationSeq::new(collation)?; program.set_collation(Some(collation)); Ok(target_register) } @@ -1876,6 +1872,11 @@ pub fn translate_expr( let table_reference = referenced_tables.as_ref().unwrap().get(*table).unwrap(); let index = table_reference.op.index(); let use_covering_index = table_reference.utilizes_covering_index(); + + let Some(table_column) = table_reference.table.get_column_at(*column) else { + crate::bail_parse_error!("column index out of bounds"); + }; + program.set_collation(table_column.collation); match table_reference.op { // If we are reading a column from a table, we find the cursor that corresponds to // the table and read the column from the cursor. @@ -1926,10 +1927,7 @@ pub fn translate_expr( dest: target_register, }); } - let Some(column) = table_reference.table.get_column_at(*column) else { - crate::bail_parse_error!("column index out of bounds"); - }; - maybe_apply_affinity(column.ty, target_register, program); + maybe_apply_affinity(table_column.ty, target_register, program); Ok(target_register) } Table::Virtual(_) => { diff --git a/core/translate/group_by.rs b/core/translate/group_by.rs index 9bb10a57e..7785c4d2c 100644 --- a/core/translate/group_by.rs +++ b/core/translate/group_by.rs @@ -217,6 +217,7 @@ pub fn group_by_create_pseudo_table( notnull: false, default: None, unique: false, + collation: None, }) .collect::>(); diff --git a/core/translate/order_by.rs b/core/translate/order_by.rs index 488a8cd82..2105d7da0 100644 --- a/core/translate/order_by.rs +++ b/core/translate/order_by.rs @@ -73,6 +73,7 @@ pub fn emit_order_by( notnull: false, default: None, unique: false, + collation: None, }); } for i in 0..result_columns.len() { @@ -92,6 +93,7 @@ pub fn emit_order_by( notnull: false, default: None, unique: false, + collation: None, }); } diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 58efbe7a2..48c768f1d 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -610,6 +610,7 @@ impl TableReference { notnull: false, default: None, unique: false, + collation: None, }) .collect(), ))); diff --git a/core/util.rs b/core/util.rs index 53e74fabe..40a2c0902 100644 --- a/core/util.rs +++ b/core/util.rs @@ -1,6 +1,7 @@ use crate::{ function::Func, schema::{self, Column, Schema, Type}, + translate::collate::CollationSeq, types::{Value, ValueType}, LimboError, OpenFlags, Result, Statement, StepResult, SymbolTable, IO, }; @@ -499,66 +500,83 @@ pub fn columns_from_create_table_body(body: &ast::CreateTableBody) -> crate::Res return None; } } - let column = Column { - name: Some(name.0.clone()), - ty: match column_def.col_type { - Some(ref data_type) => { - // https://www.sqlite.org/datatype3.html - let type_name = data_type.name.as_str().to_uppercase(); - if type_name.contains("INT") { - Type::Integer - } else if type_name.contains("CHAR") - || type_name.contains("CLOB") - || type_name.contains("TEXT") - { - Type::Text - } else if type_name.contains("BLOB") || type_name.is_empty() { - Type::Blob - } else if type_name.contains("REAL") - || type_name.contains("FLOA") - || type_name.contains("DOUB") - { - Type::Real - } else { - Type::Numeric + let column = + Column { + name: Some(name.0.clone()), + ty: match column_def.col_type { + Some(ref data_type) => { + // https://www.sqlite.org/datatype3.html + let type_name = data_type.name.as_str().to_uppercase(); + if type_name.contains("INT") { + Type::Integer + } else if type_name.contains("CHAR") + || type_name.contains("CLOB") + || type_name.contains("TEXT") + { + Type::Text + } else if type_name.contains("BLOB") || type_name.is_empty() { + Type::Blob + } else if type_name.contains("REAL") + || type_name.contains("FLOA") + || type_name.contains("DOUB") + { + Type::Real + } else { + Type::Numeric + } } - } - None => Type::Null, - }, - default: column_def - .constraints - .iter() - .find_map(|c| match &c.constraint { - limbo_sqlite3_parser::ast::ColumnConstraint::Default(val) => { - Some(val.clone()) - } - _ => None, + None => Type::Null, + }, + default: column_def + .constraints + .iter() + .find_map(|c| match &c.constraint { + limbo_sqlite3_parser::ast::ColumnConstraint::Default(val) => { + Some(val.clone()) + } + _ => None, + }), + notnull: column_def.constraints.iter().any(|c| { + matches!( + c.constraint, + limbo_sqlite3_parser::ast::ColumnConstraint::NotNull { .. } + ) }), - notnull: column_def.constraints.iter().any(|c| { - matches!( - c.constraint, - limbo_sqlite3_parser::ast::ColumnConstraint::NotNull { .. } - ) - }), - ty_str: column_def - .col_type - .clone() - .map(|t| t.name.to_string()) - .unwrap_or_default(), - primary_key: column_def.constraints.iter().any(|c| { - matches!( - c.constraint, - limbo_sqlite3_parser::ast::ColumnConstraint::PrimaryKey { .. } - ) - }), - is_rowid_alias: false, - unique: column_def.constraints.iter().any(|c| { - matches!( - c.constraint, - limbo_sqlite3_parser::ast::ColumnConstraint::Unique(..) - ) - }), - }; + ty_str: column_def + .col_type + .clone() + .map(|t| t.name.to_string()) + .unwrap_or_default(), + primary_key: column_def.constraints.iter().any(|c| { + matches!( + c.constraint, + limbo_sqlite3_parser::ast::ColumnConstraint::PrimaryKey { .. } + ) + }), + is_rowid_alias: false, + unique: column_def.constraints.iter().any(|c| { + matches!( + c.constraint, + limbo_sqlite3_parser::ast::ColumnConstraint::Unique(..) + ) + }), + collation: column_def + .constraints + .iter() + .find_map(|c| match &c.constraint { + // TODO: see if this should be the correct behavior + // currently there cannot be any user defined collation sequences. + // But in the future, when a user defines a collation sequence, creates a table with it, + // then closes the db and opens it again. This may panic here if the collation seq is not registered + // before reading the columns + limbo_sqlite3_parser::ast::ColumnConstraint::Collate { + collation_name, + } => Some(CollationSeq::new(collation_name.0.as_str()).expect( + "collation should have been set correctly in create table", + )), + _ => None, + }), + }; Some(column) }) .collect::>()) diff --git a/testing/collate.test b/testing/collate.test index dc65b2e7d..291fbb334 100755 --- a/testing/collate.test +++ b/testing/collate.test @@ -25,6 +25,10 @@ do_execsql_test collate-rtrim-2 { SELECT 'hat' == ' hat ' COLLATE RTRIM; } {1} -do_execsql_test collate-2 { - SELECT 'hat' == 'hat' COLLATE NOCASE; +do_execsql_test collate-left-precedence { + SELECT 'hat' COLLATE BINARY == 'hAt' COLLATE NOCASE; +} {0} + +do_execsql_test collate-left-precedence-2 { + SELECT 'hat' COLLATE NOCASE == 'hAt' COLLATE BINARY; } {1} From a818b6924c33cf5314c4b099c28f0b94d8b96954 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Sun, 20 Apr 2025 01:46:26 -0300 Subject: [PATCH 05/17] Removed repeated binary expression translation. Adjusted the set_collation to capture additional context of whether it was set by a Collate expression or not. Added some tests to prove those modifications were necessary. --- Makefile | 6 +- core/translate/expr.rs | 139 ++++++++--------------------------- core/vdbe/builder.rs | 14 ++-- core/vdbe/explain.rs | 1 + testing/cli_tests/collate.py | 105 ++++++++++++++++++++++++++ testing/pyproject.toml | 5 ++ 6 files changed, 154 insertions(+), 116 deletions(-) create mode 100644 testing/cli_tests/collate.py diff --git a/Makefile b/Makefile index 846867a0c..db12b90ff 100644 --- a/Makefile +++ b/Makefile @@ -51,7 +51,7 @@ uv-sync: uv sync --all-packages .PHONE: uv-sync -test: limbo uv-sync test-compat test-vector test-sqlite3 test-shell test-extensions test-memory test-write test-update test-constraint +test: limbo uv-sync test-compat test-vector test-sqlite3 test-shell test-extensions test-memory test-write test-update test-constraint test-collate .PHONY: test test-extensions: limbo uv-sync @@ -100,6 +100,10 @@ test-update: limbo uv-sync SQLITE_EXEC=$(SQLITE_EXEC) uv run --project limbo_test test-update .PHONY: test-update +test-collate: limbo uv-sync + SQLITE_EXEC=$(SQLITE_EXEC) uv run --project limbo_test test-collate +.PHONY: test-collate + test-constraint: limbo uv-sync SQLITE_EXEC=$(SQLITE_EXEC) uv run --project limbo_test test-constraint .PHONY: test-constraint diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 95292ccba..fe64e3a2d 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -40,63 +40,6 @@ fn emit_cond_jump(program: &mut ProgramBuilder, cond_meta: ConditionMetadata, re }); } } -macro_rules! emit_cmp_insn { - ( - $program:expr, - $cond:expr, - $op_true:ident, - $op_false:ident, - $lhs:expr, - $rhs:expr - ) => {{ - if $cond.jump_if_condition_is_true { - $program.emit_insn(Insn::$op_true { - lhs: $lhs, - rhs: $rhs, - target_pc: $cond.jump_target_when_true, - flags: CmpInsFlags::default(), - collation: $program.curr_collation(), - }); - } else { - $program.emit_insn(Insn::$op_false { - lhs: $lhs, - rhs: $rhs, - target_pc: $cond.jump_target_when_false, - flags: CmpInsFlags::default().jump_if_null(), - collation: $program.curr_collation(), - }); - } - }}; -} - -macro_rules! emit_cmp_null_insn { - ( - $program:expr, - $cond:expr, - $op_true:ident, - $op_false:ident, - $lhs:expr, - $rhs:expr - ) => {{ - if $cond.jump_if_condition_is_true { - $program.emit_insn(Insn::$op_true { - lhs: $lhs, - rhs: $rhs, - target_pc: $cond.jump_target_when_true, - flags: CmpInsFlags::default().null_eq(), - collation: $program.curr_collation(), - }); - } else { - $program.emit_insn(Insn::$op_false { - lhs: $lhs, - rhs: $rhs, - target_pc: $cond.jump_target_when_false, - flags: CmpInsFlags::default().null_eq(), - collation: $program.curr_collation(), - }); - } - }}; -} macro_rules! expect_arguments_exact { ( @@ -246,51 +189,6 @@ pub fn translate_condition_expr( resolver, )?; } - ast::Expr::Binary(lhs, op, rhs) - if matches!( - op, - ast::Operator::Greater - | ast::Operator::GreaterEquals - | ast::Operator::Less - | ast::Operator::LessEquals - | ast::Operator::Equals - | ast::Operator::NotEquals - | ast::Operator::Is - | ast::Operator::IsNot - ) => - { - let lhs_reg = program.alloc_register(); - let rhs_reg = program.alloc_register(); - translate_expr(program, Some(referenced_tables), lhs, lhs_reg, resolver)?; - translate_expr(program, Some(referenced_tables), rhs, rhs_reg, resolver)?; - match op { - ast::Operator::Greater => { - emit_cmp_insn!(program, condition_metadata, Gt, Le, lhs_reg, rhs_reg) - } - ast::Operator::GreaterEquals => { - emit_cmp_insn!(program, condition_metadata, Ge, Lt, lhs_reg, rhs_reg) - } - ast::Operator::Less => { - emit_cmp_insn!(program, condition_metadata, Lt, Ge, lhs_reg, rhs_reg) - } - ast::Operator::LessEquals => { - emit_cmp_insn!(program, condition_metadata, Le, Gt, lhs_reg, rhs_reg) - } - ast::Operator::Equals => { - emit_cmp_insn!(program, condition_metadata, Eq, Ne, lhs_reg, rhs_reg) - } - ast::Operator::NotEquals => { - emit_cmp_insn!(program, condition_metadata, Ne, Eq, lhs_reg, rhs_reg) - } - ast::Operator::Is => { - emit_cmp_null_insn!(program, condition_metadata, Eq, Ne, lhs_reg, rhs_reg) - } - ast::Operator::IsNot => { - emit_cmp_null_insn!(program, condition_metadata, Ne, Eq, lhs_reg, rhs_reg) - } - _ => unreachable!(), - } - } ast::Expr::Binary(_, _, _) => { let result_reg = program.alloc_register(); translate_expr(program, Some(referenced_tables), expr, result_reg, resolver)?; @@ -568,21 +466,41 @@ pub fn translate_expr( let e2_reg = e1_reg + 1; translate_expr(program, referenced_tables, e1, e1_reg, resolver)?; - let left_collation = program.curr_collation(); + let left_collation = program.curr_collation_ctx(); program.reset_collation(); translate_expr(program, referenced_tables, e2, e2_reg, resolver)?; - let right_collation = program.curr_collation(); + let right_collation = program.curr_collation_ctx(); program.reset_collation(); - let collation = { + /* + * The rules for determining which collating function to use for a binary comparison + * operator (=, <, >, <=, >=, !=, IS, and IS NOT) are as follows: + * + * 1. If either operand has an explicit collating function assignment using the postfix COLLATE operator, + * then the explicit collating function is used for comparison, + * with precedence to the collating function of the left operand. + * + * 2. If either operand is a column, then the collating function of that column is used + * with precedence to the left operand. For the purposes of the previous sentence, + * a column name preceded by one or more unary "+" operators and/or CAST operators is still considered a column name. + * + * 3. Otherwise, the BINARY collating function is used for comparison. + */ + let collation_ctx = { match (left_collation, right_collation) { - (Some(left), _) => Some(left), - (None, Some(right)) => Some(right), + (Some((c_left, true)), _) => Some((c_left, true)), + (Some((c_left, from_collate_left)), Some((_, false))) => { + Some((c_left, from_collate_left)) + } + (Some((_, false)), Some((c_right, true))) => Some((c_right, true)), + (None, Some((c_right, from_collate_right))) => { + Some((c_right, from_collate_right)) + } _ => None, } }; - program.set_collation(collation); + program.set_collation(collation_ctx); emit_binary_insn(program, op, e1_reg, e2_reg, target_register)?; program.reset_collation(); @@ -710,7 +628,7 @@ pub fn translate_expr( // it may be overwritten later by inner translate. translate_expr(program, referenced_tables, expr, target_register, resolver)?; let collation = CollationSeq::new(collation)?; - program.set_collation(Some(collation)); + program.set_collation(Some((collation, true))); Ok(target_register) } ast::Expr::DoublyQualified(_, _, _) => todo!(), @@ -1876,7 +1794,8 @@ pub fn translate_expr( let Some(table_column) = table_reference.table.get_column_at(*column) else { crate::bail_parse_error!("column index out of bounds"); }; - program.set_collation(table_column.collation); + // Counter intuitive but a column always needs to have a collation + program.set_collation(Some((table_column.collation.unwrap_or_default(), false))); match table_reference.op { // If we are reading a column from a table, we find the cursor that corresponds to // the table and read the column from the cursor. diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index 78d6970a9..7aad796ba 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -41,8 +41,8 @@ pub struct ProgramBuilder { pub parameters: Parameters, pub result_columns: Vec, pub table_references: Vec, - /// Stack of collation definitions encountered - collation: Option, + /// Curr collation sequence. Bool indicates whether it was set by a COLLATE expr + collation: Option<(CollationSeq, bool)>, } #[derive(Debug, Clone)] @@ -595,12 +595,16 @@ impl ProgramBuilder { .unwrap_or_else(|| panic!("Cursor not found: {}", table_identifier)) } - pub fn set_collation(&mut self, c: Option) { - self.collation = c; + pub fn set_collation(&mut self, c: Option<(CollationSeq, bool)>) { + self.collation = c + } + + pub fn curr_collation_ctx(&self) -> Option<(CollationSeq, bool)> { + self.collation } pub fn curr_collation(&self) -> Option { - self.collation + self.collation.map(|c| c.0) } pub fn reset_collation(&mut self) { diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index 0fe6184db..a39ef5482 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -887,6 +887,7 @@ pub fn insn_to_str( order, } => { let _p4 = String::new(); + // TODO: to_print should also print the Collation Sequence let to_print: Vec = order .iter() .map(|v| match v { diff --git a/testing/cli_tests/collate.py b/testing/cli_tests/collate.py new file mode 100644 index 000000000..784b397a2 --- /dev/null +++ b/testing/cli_tests/collate.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python3 +import os +from cli_tests.test_limbo_cli import TestLimboShell +from pydantic import BaseModel +from cli_tests import console + + +sqlite_flags = os.getenv("SQLITE_FLAGS", "-q").split(" ") + + +class CollateTest(BaseModel): + name: str + db_schema: str = """CREATE TABLE t1( + x INTEGER PRIMARY KEY, + a, /* collating sequence BINARY */ + b COLLATE BINARY, /* collating sequence BINARY */ + c COLLATE RTRIM, /* collating sequence RTRIM */ + d COLLATE NOCASE /* collating sequence NOCASE */ + );""" + db_path: str = "testing/collate.db" + + def init_db(self): + with TestLimboShell( + init_commands="", + exec_name="sqlite3", + flags=f"{self.db_path}", + ) as sqlite: + sqlite.execute_dot(f".open {self.db_path}") + stmt = [self.db_schema] + stmt = stmt + [ + "INSERT INTO t1 VALUES(1,'abc','abc', 'abc ','abc');", + "INSERT INTO t1 VALUES(2,'abc','abc', 'abc', 'ABC');", + "INSERT INTO t1 VALUES(3,'abc','abc', 'abc ', 'Abc');", + "INSERT INTO t1 VALUES(4,'abc','abc ','ABC', 'abc');", + ] + stmt.append("SELECT count(*) FROM t1;") + + sqlite.run_test( + "Init Collate Db in Sqlite", + "".join(stmt), + f"{4}", + ) + + def run(self, limbo: TestLimboShell): + limbo.execute_dot(f".open {self.db_path}") + + limbo.run_test( + "Text comparison a=b is performed using the BINARY collating sequence", + "SELECT x FROM t1 WHERE a = b ORDER BY x;", + "\n".join(map(lambda x: str(x), [1, 2, 3])), + ) + + limbo.run_test( + "Text comparison a=b is performed using the RTRIM collating sequence", + "SELECT x FROM t1 WHERE a = b COLLATE RTRIM ORDER BY x;", + "\n".join(map(lambda x: str(x), [1, 2, 3, 4])), + ) + + limbo.run_test( + "Text comparison d=a is performed using the NOCASE collating sequence", + "SELECT x FROM t1 WHERE d = a ORDER BY x;", + "\n".join(map(lambda x: str(x), [1, 2, 3, 4])), + ) + + limbo.run_test( + "Text comparison a=d is performed using the BINARY collating sequence.", + "SELECT x FROM t1 WHERE a = d ORDER BY x;", + "\n".join(map(lambda x: str(x), [1, 4])), + ) + + +def cleanup(db_fullpath: str): + wal_path = f"{db_fullpath}-wal" + shm_path = f"{db_fullpath}-shm" + paths = [db_fullpath, wal_path, shm_path] + for path in paths: + if os.path.exists(path): + os.remove(path) + + +def main(): + # Test from using examples from Section 7.2 + # https://sqlite.org/datatype3.html#collation + test = CollateTest(name="Smoke collate tests") + console.info(test) + + db_path = test.db_path + try: + test.init_db() + # Use with syntax to automatically close shell on error + with TestLimboShell("") as limbo: + test.run(limbo) + + # test.test_compat() + except Exception as e: + console.error(f"Test FAILED: {e}") + cleanup(db_path) + exit(1) + # delete db after every compat test so we we have fresh db for next test + cleanup(db_path) + console.info("All tests passed successfully.") + + +if __name__ == "__main__": + main() diff --git a/testing/pyproject.toml b/testing/pyproject.toml index 0aed7b99b..f3ba79a1d 100644 --- a/testing/pyproject.toml +++ b/testing/pyproject.toml @@ -15,8 +15,13 @@ test-shell = "cli_tests.cli_test_cases:main" test-extensions = "cli_tests.extensions:main" test-update = "cli_tests.update:main" test-memory = "cli_tests.memory:main" +<<<<<<< HEAD bench-vfs = "cli_tests.vfs_bench:main" test-constraint = "cli_tests.constraint:main" +||||||| parent of b9e03a19 (Removed repeated binary expression translation. Adjusted the set_collation to capture additional context of whether it was set by a Collate expression or not. Added some tests to prove those modifications were necessary.) +======= +test-collate = "cli_tests.collate:main" +>>>>>>> b9e03a19 (Removed repeated binary expression translation. Adjusted the set_collation to capture additional context of whether it was set by a Collate expression or not. Added some tests to prove those modifications were necessary.) [tool.uv] package = true From bba9689674d113f4cb0eed3999e607c7e5e960ce Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Sun, 20 Apr 2025 02:02:07 -0300 Subject: [PATCH 06/17] Fixed matching bug for defining collation context to use --- core/translate/expr.rs | 59 ++++++++++++++++++------------------ testing/cli_tests/collate.py | 12 ++++++++ 2 files changed, 42 insertions(+), 29 deletions(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index fe64e3a2d..24fba1800 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -466,41 +466,42 @@ pub fn translate_expr( let e2_reg = e1_reg + 1; translate_expr(program, referenced_tables, e1, e1_reg, resolver)?; - let left_collation = program.curr_collation_ctx(); + let left_collation_ctx = program.curr_collation_ctx(); program.reset_collation(); translate_expr(program, referenced_tables, e2, e2_reg, resolver)?; - let right_collation = program.curr_collation_ctx(); + let right_collation_ctx = program.curr_collation_ctx(); program.reset_collation(); - /* - * The rules for determining which collating function to use for a binary comparison - * operator (=, <, >, <=, >=, !=, IS, and IS NOT) are as follows: - * - * 1. If either operand has an explicit collating function assignment using the postfix COLLATE operator, - * then the explicit collating function is used for comparison, - * with precedence to the collating function of the left operand. - * - * 2. If either operand is a column, then the collating function of that column is used - * with precedence to the left operand. For the purposes of the previous sentence, - * a column name preceded by one or more unary "+" operators and/or CAST operators is still considered a column name. - * - * 3. Otherwise, the BINARY collating function is used for comparison. - */ - let collation_ctx = { - match (left_collation, right_collation) { - (Some((c_left, true)), _) => Some((c_left, true)), - (Some((c_left, from_collate_left)), Some((_, false))) => { - Some((c_left, from_collate_left)) - } - (Some((_, false)), Some((c_right, true))) => Some((c_right, true)), - (None, Some((c_right, from_collate_right))) => { - Some((c_right, from_collate_right)) - } - _ => None, + /* + * The rules for determining which collating function to use for a binary comparison + * operator (=, <, >, <=, >=, !=, IS, and IS NOT) are as follows: + * + * 1. If either operand has an explicit collating function assignment using the postfix COLLATE operator, + * then the explicit collating function is used for comparison, + * with precedence to the collating function of the left operand. + * + * 2. If either operand is a column, then the collating function of that column is used + * with precedence to the left operand. For the purposes of the previous sentence, + * a column name preceded by one or more unary "+" operators and/or CAST operators is still considered a column name. + * + * 3. Otherwise, the BINARY collating function is used for comparison. + */ + let collation_ctx = { + match (left_collation_ctx, right_collation_ctx) { + (Some((c_left, true)), _) => Some((c_left, true)), + (_, Some((c_right, true))) => Some((c_right, true)), + (Some((c_left, from_collate_left)), None) => Some((c_left, from_collate_left)), + (None, Some((c_right, from_collate_right))) => { + Some((c_right, from_collate_right)) } - }; - program.set_collation(collation_ctx); + (Some((c_left, from_collate_left)), Some((_, false))) => { + Some((c_left, from_collate_left)) + } + _ => None, + } + }; + program.set_collation(collation_ctx); emit_binary_insn(program, op, e1_reg, e2_reg, target_register)?; program.reset_collation(); diff --git a/testing/cli_tests/collate.py b/testing/cli_tests/collate.py index 784b397a2..3dd5daadd 100644 --- a/testing/cli_tests/collate.py +++ b/testing/cli_tests/collate.py @@ -67,6 +67,18 @@ class CollateTest(BaseModel): "SELECT x FROM t1 WHERE a = d ORDER BY x;", "\n".join(map(lambda x: str(x), [1, 4])), ) + + limbo.run_test( + "Text comparison 'abc'=c is performed using the RTRIM collating sequence.", + "SELECT x FROM t1 WHERE 'abc' = c ORDER BY x;", + "\n".join(map(lambda x: str(x), [1, 2, 3])), + ) + + limbo.run_test( + "Text comparison c='abc' is performed using the RTRIM collating sequence.", + "SELECT x FROM t1 WHERE c = 'abc' ORDER BY x;", + "\n".join(map(lambda x: str(x), [1, 2, 3])), + ) def cleanup(db_fullpath: str): From 0df6c87f07b45320bfb4fa35c410710a99615e9d Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Sun, 20 Apr 2025 02:11:33 -0300 Subject: [PATCH 07/17] Fixed Group By collation --- core/translate/group_by.rs | 1 + core/translate/optimizer/join.rs | 1 + core/vdbe/execute.rs | 9 ++++++++- core/vdbe/explain.rs | 3 ++- core/vdbe/insn.rs | 1 + testing/cli_tests/collate.py | 10 ++++++++-- 6 files changed, 21 insertions(+), 4 deletions(-) diff --git a/core/translate/group_by.rs b/core/translate/group_by.rs index 7785c4d2c..d7ac98c29 100644 --- a/core/translate/group_by.rs +++ b/core/translate/group_by.rs @@ -462,6 +462,7 @@ pub fn group_by_process_single_group( start_reg_a: registers.reg_group_exprs_cmp, start_reg_b: groups_start_reg, count: group_by.exprs.len(), + collation: program.curr_collation(), }); program.add_comment( diff --git a/core/translate/optimizer/join.rs b/core/translate/optimizer/join.rs index c10fcc4fa..3a2679b7c 100644 --- a/core/translate/optimizer/join.rs +++ b/core/translate/optimizer/join.rs @@ -1205,6 +1205,7 @@ mod tests { notnull: false, default: None, unique: false, + collation: None, } } fn _create_column_of_type(name: &str, ty: Type) -> Column { diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index c0d912e0c..2c3bcd2d4 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -365,6 +365,7 @@ pub fn op_compare( start_reg_a, start_reg_b, count, + collation, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -372,6 +373,7 @@ pub fn op_compare( let start_reg_a = *start_reg_a; let start_reg_b = *start_reg_b; let count = *count; + let collation = collation.unwrap_or_default(); if start_reg_a + count > start_reg_b { return Err(LimboError::InternalError( @@ -383,7 +385,12 @@ pub fn op_compare( for i in 0..count { let a = state.registers[start_reg_a + i].get_owned_value(); let b = state.registers[start_reg_b + i].get_owned_value(); - cmp = Some(a.cmp(b)); + cmp = match (a, b) { + (Value::Text(left), Value::Text(right)) => { + Some(collation.compare_strings(left.as_str(), right.as_str())) + } + _ => Some(a.cmp(b)), + }; if cmp != Some(std::cmp::Ordering::Equal) { break; } diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index a39ef5482..a2ef91d01 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -141,12 +141,13 @@ pub fn insn_to_str( start_reg_a, start_reg_b, count, + collation, } => ( "Compare", *start_reg_a as i32, *start_reg_b as i32, *count as i32, - Value::build_text(""), + Value::build_text(&format!("k({count}, {})", collation.unwrap_or_default())), 0, format!( "r[{}..{}]==r[{}..{}]", diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index d94ec521c..156abfcd6 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -153,6 +153,7 @@ pub enum Insn { start_reg_a: usize, start_reg_b: usize, count: usize, + collation: Option, }, /// Place the result of rhs bitwise AND lhs in third register. BitAnd { diff --git a/testing/cli_tests/collate.py b/testing/cli_tests/collate.py index 3dd5daadd..69fa0d574 100644 --- a/testing/cli_tests/collate.py +++ b/testing/cli_tests/collate.py @@ -67,19 +67,25 @@ class CollateTest(BaseModel): "SELECT x FROM t1 WHERE a = d ORDER BY x;", "\n".join(map(lambda x: str(x), [1, 4])), ) - + limbo.run_test( "Text comparison 'abc'=c is performed using the RTRIM collating sequence.", "SELECT x FROM t1 WHERE 'abc' = c ORDER BY x;", "\n".join(map(lambda x: str(x), [1, 2, 3])), ) - + limbo.run_test( "Text comparison c='abc' is performed using the RTRIM collating sequence.", "SELECT x FROM t1 WHERE c = 'abc' ORDER BY x;", "\n".join(map(lambda x: str(x), [1, 2, 3])), ) + limbo.run_test( + " Grouping is performed using the NOCASE collating sequence (Values 'abc', 'ABC', and 'Abc' are placed in the same group).", + "SELECT count(*) FROM t1 GROUP BY d ORDER BY 1;", + "\n".join(map(lambda x: str(x), [4])), + ) + def cleanup(db_fullpath: str): wal_path = f"{db_fullpath}-wal" From bf1fe9e0b3fe89ec5afc109a03b4ee47cf9732b6 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Sun, 20 Apr 2025 03:53:44 -0300 Subject: [PATCH 08/17] Actually fixed group by and order by collation --- core/translate/emitter.rs | 2 +- core/translate/group_by.rs | 32 ++++++++++++++++++++++++++++++++ core/translate/index.rs | 1 + core/translate/order_by.rs | 35 ++++++++++++++++++++++++++++++++++- core/types.rs | 16 +++++++++++++++- core/vdbe/execute.rs | 3 ++- core/vdbe/explain.rs | 15 +++++++++++---- core/vdbe/insn.rs | 1 + core/vdbe/sorter.rs | 10 ++++++++-- testing/cli_tests/collate.py | 14 +++++++++++++- 10 files changed, 118 insertions(+), 11 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 10d8d6605..38a2410bb 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -275,7 +275,7 @@ pub fn emit_query<'a>( // Initialize cursors and other resources needed for query execution if let Some(ref mut order_by) = plan.order_by { - init_order_by(program, t_ctx, order_by)?; + init_order_by(program, t_ctx, order_by, &plan.table_references)?; } if let Some(ref group_by) = plan.group_by { diff --git a/core/translate/group_by.rs b/core/translate/group_by.rs index d7ac98c29..055a7a208 100644 --- a/core/translate/group_by.rs +++ b/core/translate/group_by.rs @@ -5,6 +5,7 @@ use limbo_sqlite3_parser::ast; use crate::{ function::AggFunc, schema::{Column, PseudoTable}, + translate::collate::CollationSeq, util::exprs_are_equivalent, vdbe::{ builder::{CursorType, ProgramBuilder}, @@ -117,10 +118,41 @@ pub fn init_group_by( let row_source = if let Some(sort_order) = group_by.sort_order.as_ref() { let sort_cursor = program.alloc_cursor_id(None, CursorType::Sorter); let sorter_column_count = plan.group_by_sorter_column_count(); + // Should work the same way as Order By + /* + * Terms of the ORDER BY clause that is part of a SELECT statement may be assigned a collating sequence using the COLLATE operator, + * in which case the specified collating function is used for sorting. + * Otherwise, if the expression sorted by an ORDER BY clause is a column, + * then the collating sequence of the column is used to determine sort order. + * If the expression is not a column and has no COLLATE clause, then the BINARY collating sequence is used. + */ + let mut collation = None; + for expr in group_by.exprs.iter() { + match expr { + ast::Expr::Collate(_, collation_name) => { + collation = Some(CollationSeq::new(collation_name)?); + break; + } + ast::Expr::Column { table, column, .. } => { + let table_reference = plan.table_references.get(*table).unwrap(); + + let Some(table_column) = table_reference.table.get_column_at(*column) else { + crate::bail_parse_error!("column index out of bounds"); + }; + + if table_column.collation.is_some() { + collation = table_column.collation; + break; + } + } + _ => {} + }; + } program.emit_insn(Insn::SorterOpen { cursor_id: sort_cursor, columns: sorter_column_count, order: sort_order.clone(), + collation, }); let pseudo_cursor = group_by_create_pseudo_table(program, sorter_column_count); GroupByRowSource::Sorter { diff --git a/core/translate/index.rs b/core/translate/index.rs index bb8390fca..84b6bf0eb 100644 --- a/core/translate/index.rs +++ b/core/translate/index.rs @@ -120,6 +120,7 @@ pub fn translate_create_index( cursor_id: sorter_cursor_id, columns: columns.len(), order, + collation: program.curr_collation(), }); let content_reg = program.alloc_register(); program.emit_insn(Insn::OpenPseudo { diff --git a/core/translate/order_by.rs b/core/translate/order_by.rs index 2105d7da0..4f1707958 100644 --- a/core/translate/order_by.rs +++ b/core/translate/order_by.rs @@ -4,6 +4,7 @@ use limbo_sqlite3_parser::ast::{self, SortOrder}; use crate::{ schema::{Column, PseudoTable}, + translate::collate::CollationSeq, util::exprs_are_equivalent, vdbe::{ builder::{CursorType, ProgramBuilder}, @@ -15,7 +16,7 @@ use crate::{ use super::{ emitter::{Resolver, TranslateCtx}, expr::translate_expr, - plan::{ResultSetColumn, SelectPlan}, + plan::{ResultSetColumn, SelectPlan, TableReference}, result_row::{emit_offset, emit_result_row_and_limit}, }; @@ -33,16 +34,48 @@ pub fn init_order_by( program: &mut ProgramBuilder, t_ctx: &mut TranslateCtx, order_by: &[(ast::Expr, SortOrder)], + referenced_tables: &[TableReference], ) -> Result<()> { let sort_cursor = program.alloc_cursor_id(None, CursorType::Sorter); t_ctx.meta_sort = Some(SortMetadata { sort_cursor, reg_sorter_data: program.alloc_register(), }); + + /* + * Terms of the ORDER BY clause that is part of a SELECT statement may be assigned a collating sequence using the COLLATE operator, + * in which case the specified collating function is used for sorting. + * Otherwise, if the expression sorted by an ORDER BY clause is a column, + * then the collating sequence of the column is used to determine sort order. + * If the expression is not a column and has no COLLATE clause, then the BINARY collating sequence is used. + */ + let mut collation = None; + for (expr, _) in order_by.iter() { + match expr { + ast::Expr::Collate(_, collation_name) => { + collation = Some(CollationSeq::new(collation_name)?); + break; + } + ast::Expr::Column { table, column, .. } => { + let table_reference = referenced_tables.get(*table).unwrap(); + + let Some(table_column) = table_reference.table.get_column_at(*column) else { + crate::bail_parse_error!("column index out of bounds"); + }; + + if table_column.collation.is_some() { + collation = table_column.collation; + break; + } + } + _ => {} + }; + } program.emit_insn(Insn::SorterOpen { cursor_id: sort_cursor, columns: order_by.len(), order: order_by.iter().map(|(_, direction)| *direction).collect(), + collation, }); Ok(()) } diff --git a/core/types.rs b/core/types.rs index a277d3c80..2b7718870 100644 --- a/core/types.rs +++ b/core/types.rs @@ -7,6 +7,7 @@ use crate::pseudo::PseudoCursor; use crate::schema::Index; use crate::storage::btree::BTreeCursor; use crate::storage::sqlite3_ondisk::write_varint; +use crate::translate::collate::CollationSeq; use crate::translate::plan::IterationDirection; use crate::vdbe::sorter::Sorter; use crate::vdbe::{Register, VTabOpaqueCursor}; @@ -1103,11 +1104,24 @@ pub fn compare_immutable( l: &[RefValue], r: &[RefValue], index_key_sort_order: IndexKeySortOrder, + collation: CollationSeq, ) -> std::cmp::Ordering { assert_eq!(l.len(), r.len()); for (i, (l, r)) in l.iter().zip(r).enumerate() { let column_order = index_key_sort_order.get_sort_order_for_col(i); - let cmp = l.partial_cmp(r).unwrap(); + let mut l = l; + let mut r = r; + if !matches!(column_order, SortOrder::Asc) { + let tmp = l; + l = r; + r = tmp; + } + let cmp = match (l, r) { + (RefValue::Text(left), RefValue::Text(right)) => { + collation.compare_strings(left.as_str(), right.as_str()) + } + _ => l.partial_cmp(r).unwrap(), + }; if !cmp.is_eq() { return match column_order { SortOrder::Asc => cmp, diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 2c3bcd2d4..91389e70c 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -2765,11 +2765,12 @@ pub fn op_sorter_open( cursor_id, columns: _, order, + collation, } = insn else { unreachable!("unexpected Insn {:?}", insn) }; - let cursor = Sorter::new(order); + let cursor = Sorter::new(order, collation.unwrap_or_default()); let mut cursors = state.cursors.borrow_mut(); cursors .get_mut(*cursor_id) diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index a2ef91d01..9da401778 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -886,14 +886,21 @@ pub fn insn_to_str( cursor_id, columns, order, + collation, } => { let _p4 = String::new(); - // TODO: to_print should also print the Collation Sequence let to_print: Vec = order .iter() - .map(|v| match v { - SortOrder::Asc => "B".to_string(), - SortOrder::Desc => "-B".to_string(), + .enumerate() + .map(|(idx, v)| { + if idx == 0 { + collation.unwrap_or_default().to_string() + } else { + match v { + SortOrder::Asc => "B".to_string(), + SortOrder::Desc => "-B".to_string(), + } + } }) .collect(); ( diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 156abfcd6..c61de1c26 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -608,6 +608,7 @@ pub enum Insn { cursor_id: CursorID, // P1 columns: usize, // P2 order: Vec, // P4. + collation: Option, }, /// Insert a row into the sorter. diff --git a/core/vdbe/sorter.rs b/core/vdbe/sorter.rs index d758f91f5..d4127a798 100644 --- a/core/vdbe/sorter.rs +++ b/core/vdbe/sorter.rs @@ -1,21 +1,26 @@ use limbo_sqlite3_parser::ast::SortOrder; -use crate::types::{compare_immutable, ImmutableRecord, IndexKeySortOrder}; +use crate::{ + translate::collate::CollationSeq, + types::{compare_immutable, ImmutableRecord, IndexKeySortOrder}, +}; pub struct Sorter { records: Vec, current: Option, order: IndexKeySortOrder, key_len: usize, + collation: CollationSeq, } impl Sorter { - pub fn new(order: &[SortOrder]) -> Self { + pub fn new(order: &[SortOrder], collation: CollationSeq) -> Self { Self { records: Vec::new(), current: None, key_len: order.len(), order: IndexKeySortOrder::from_list(order), + collation, } } pub fn is_empty(&self) -> bool { @@ -33,6 +38,7 @@ impl Sorter { &a.values[..self.key_len], &b.values[..self.key_len], self.order, + self.collation, ) }); self.records.reverse(); diff --git a/testing/cli_tests/collate.py b/testing/cli_tests/collate.py index 69fa0d574..4c176cb09 100644 --- a/testing/cli_tests/collate.py +++ b/testing/cli_tests/collate.py @@ -81,11 +81,23 @@ class CollateTest(BaseModel): ) limbo.run_test( - " Grouping is performed using the NOCASE collating sequence (Values 'abc', 'ABC', and 'Abc' are placed in the same group).", + "Grouping is performed using the NOCASE collating sequence (Values 'abc', 'ABC', and 'Abc' are placed in the same group).", "SELECT count(*) FROM t1 GROUP BY d ORDER BY 1;", "\n".join(map(lambda x: str(x), [4])), ) + limbo.run_test( + "Grouping is performed using the BINARY collating sequence. 'abc' and 'ABC' and 'Abc' form different groups", + "SELECT count(*) FROM t1 GROUP BY (d || '') ORDER BY 1;", + "\n".join(map(lambda x: str(x), [1, 1, 2])), + ) + + limbo.run_test( + "Sorting or column c is performed using the RTRIM collating sequence.", + "SELECT x FROM t1 ORDER BY c, x;", + "\n".join(map(lambda x: str(x), [4, 1, 2, 3])), + ) + def cleanup(db_fullpath: str): wal_path = f"{db_fullpath}-wal" From 6d7a73fd60961882615b3a9d304c874270738751 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Sun, 20 Apr 2025 03:56:36 -0300 Subject: [PATCH 09/17] More tests --- COMPAT.md | 2 +- testing/cli_tests/collate.py | 14 +++++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/COMPAT.md b/COMPAT.md index 799411193..962271493 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -193,7 +193,7 @@ Feature support of [sqlite expr syntax](https://www.sqlite.org/lang_expr.html). | ... OVER (...) | No | Is incorrectly ignored | | (expr) | Yes | | | CAST (expr AS type) | Yes | | -| COLLATE | No | | +| COLLATE | Partial | Custom Collations not supported | | (NOT) LIKE | Yes | | | (NOT) GLOB | Yes | | | (NOT) REGEXP | No | | diff --git a/testing/cli_tests/collate.py b/testing/cli_tests/collate.py index 4c176cb09..8a88d8943 100644 --- a/testing/cli_tests/collate.py +++ b/testing/cli_tests/collate.py @@ -91,13 +91,25 @@ class CollateTest(BaseModel): "SELECT count(*) FROM t1 GROUP BY (d || '') ORDER BY 1;", "\n".join(map(lambda x: str(x), [1, 1, 2])), ) - + limbo.run_test( "Sorting or column c is performed using the RTRIM collating sequence.", "SELECT x FROM t1 ORDER BY c, x;", "\n".join(map(lambda x: str(x), [4, 1, 2, 3])), ) + limbo.run_test( + "Sorting of (c||'') is performed using the BINARY collating sequence.", + "SELECT x FROM t1 ORDER BY (c||''), x;", + "\n".join(map(lambda x: str(x), [4, 2, 3, 1])), + ) + + limbo.run_test( + "Sorting of column c is performed using the NOCASE collating sequence.", + "SELECT x FROM t1 ORDER BY c COLLATE NOCASE, x;", + "\n".join(map(lambda x: str(x), [2, 4, 3, 1])), + ) + def cleanup(db_fullpath: str): wal_path = f"{db_fullpath}-wal" From cc86c789d6132a66e96110720b115ffecf863c26 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Sun, 20 Apr 2025 04:28:56 -0300 Subject: [PATCH 10/17] Correct Rtrim --- core/translate/collate.rs | 2 +- testing/collate.test | 12 ++++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/core/translate/collate.rs b/core/translate/collate.rs index 42faa3d34..81c076330 100644 --- a/core/translate/collate.rs +++ b/core/translate/collate.rs @@ -48,6 +48,6 @@ impl CollationSeq { } fn rtrim_cmp(lhs: &str, rhs: &str) -> Ordering { - lhs.trim().cmp(rhs.trim()) + lhs.trim_end().cmp(rhs.trim_end()) } } diff --git a/testing/collate.test b/testing/collate.test index 291fbb334..094e855ac 100755 --- a/testing/collate.test +++ b/testing/collate.test @@ -18,13 +18,21 @@ do_execsql_test collate-binary-2 { } {1} do_execsql_test collate-rtrim-1 { - SELECT 'hat' == ' hAt ' COLLATE RTRIM; + SELECT 'hat' == 'hAt ' COLLATE RTRIM; } {0} do_execsql_test collate-rtrim-2 { - SELECT 'hat' == ' hat ' COLLATE RTRIM; + SELECT 'hat' == 'hat ' COLLATE RTRIM; } {1} +do_execsql_test collate-rtrim-3 { + SELECT 'hat' == ' hAt ' COLLATE RTRIM; +} {0} + +do_execsql_test collate-rtrim-4 { + SELECT 'hat' == ' hat ' COLLATE RTRIM; +} {0} + do_execsql_test collate-left-precedence { SELECT 'hat' COLLATE BINARY == 'hAt' COLLATE NOCASE; } {0} From 5bd47d7462d85da4445fea14d2bc7aa312e15e31 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 16 May 2025 14:38:01 -0300 Subject: [PATCH 11/17] post rebase adjustments to accomodate new instructions that were created before the merge conflicts --- core/storage/btree.rs | 10 ++++++- core/translate/expr.rs | 58 +++++++++++++++++++------------------ core/translate/main_loop.rs | 5 ++++ core/types.rs | 7 ----- core/vdbe/execute.rs | 32 +++++++++++++++++--- core/vdbe/explain.rs | 6 +++- core/vdbe/insn.rs | 4 +++ testing/pyproject.toml | 4 --- 8 files changed, 81 insertions(+), 45 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index a94c8c0f2..92fbd707c 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -7,7 +7,7 @@ use crate::{ TableLeafCell, }, }, - translate::plan::IterationDirection, + translate::{collate::CollationSeq, plan::IterationDirection}, types::IndexKeySortOrder, MvCursor, }; @@ -610,6 +610,7 @@ impl BTreeCursor { record_slice_same_num_cols, index_key.get_values(), self.index_key_sort_order, + CollationSeq::Binary, ); order }; @@ -668,6 +669,7 @@ impl BTreeCursor { record_slice_same_num_cols, index_key.get_values(), self.index_key_sort_order, + CollationSeq::Binary, ); order }; @@ -1248,6 +1250,7 @@ impl BTreeCursor { record_slice_same_num_cols, index_key.get_values(), self.index_key_sort_order, + CollationSeq::Binary, ); order }; @@ -1308,6 +1311,7 @@ impl BTreeCursor { record_slice_same_num_cols, index_key.get_values(), self.index_key_sort_order, + CollationSeq::Binary, ); order }; @@ -1588,6 +1592,7 @@ impl BTreeCursor { record_slice_equal_number_of_cols, index_key.get_values(), self.index_key_sort_order, + CollationSeq::Binary, ); // in sqlite btrees left child pages have <= keys. // in general, in forwards iteration we want to find the first key that matches the seek condition. @@ -1912,6 +1917,7 @@ impl BTreeCursor { record_slice_equal_number_of_cols, key.get_values(), self.index_key_sort_order, + CollationSeq::Binary, ); let found = match seek_op { SeekOp::GT => cmp.is_gt(), @@ -2079,6 +2085,7 @@ impl BTreeCursor { .unwrap() .get_values(), self.index_key_sort_order, + CollationSeq::Binary, ) == Ordering::Equal { tracing::debug!("insert_into_page: found exact match with cell_idx={cell_idx}, overwriting"); @@ -3725,6 +3732,7 @@ impl BTreeCursor { key.to_index_key_values(), self.get_immutable_record().as_ref().unwrap().get_values(), self.index_key_sort_order, + CollationSeq::Binary, ); match order { Ordering::Less | Ordering::Equal => { diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 24fba1800..64c5a29ed 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -473,35 +473,37 @@ pub fn translate_expr( let right_collation_ctx = program.curr_collation_ctx(); program.reset_collation(); - /* - * The rules for determining which collating function to use for a binary comparison - * operator (=, <, >, <=, >=, !=, IS, and IS NOT) are as follows: - * - * 1. If either operand has an explicit collating function assignment using the postfix COLLATE operator, - * then the explicit collating function is used for comparison, - * with precedence to the collating function of the left operand. - * - * 2. If either operand is a column, then the collating function of that column is used - * with precedence to the left operand. For the purposes of the previous sentence, - * a column name preceded by one or more unary "+" operators and/or CAST operators is still considered a column name. - * - * 3. Otherwise, the BINARY collating function is used for comparison. - */ - let collation_ctx = { - match (left_collation_ctx, right_collation_ctx) { - (Some((c_left, true)), _) => Some((c_left, true)), - (_, Some((c_right, true))) => Some((c_right, true)), - (Some((c_left, from_collate_left)), None) => Some((c_left, from_collate_left)), - (None, Some((c_right, from_collate_right))) => { - Some((c_right, from_collate_right)) + /* + * The rules for determining which collating function to use for a binary comparison + * operator (=, <, >, <=, >=, !=, IS, and IS NOT) are as follows: + * + * 1. If either operand has an explicit collating function assignment using the postfix COLLATE operator, + * then the explicit collating function is used for comparison, + * with precedence to the collating function of the left operand. + * + * 2. If either operand is a column, then the collating function of that column is used + * with precedence to the left operand. For the purposes of the previous sentence, + * a column name preceded by one or more unary "+" operators and/or CAST operators is still considered a column name. + * + * 3. Otherwise, the BINARY collating function is used for comparison. + */ + let collation_ctx = { + match (left_collation_ctx, right_collation_ctx) { + (Some((c_left, true)), _) => Some((c_left, true)), + (_, Some((c_right, true))) => Some((c_right, true)), + (Some((c_left, from_collate_left)), None) => { + Some((c_left, from_collate_left)) + } + (None, Some((c_right, from_collate_right))) => { + Some((c_right, from_collate_right)) + } + (Some((c_left, from_collate_left)), Some((_, false))) => { + Some((c_left, from_collate_left)) + } + _ => None, } - (Some((c_left, from_collate_left)), Some((_, false))) => { - Some((c_left, from_collate_left)) - } - _ => None, - } - }; - program.set_collation(collation_ctx); + }; + program.set_collation(collation_ctx); emit_binary_insn(program, op, e1_reg, e2_reg, target_register)?; program.reset_collation(); diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 59168509e..9df9da7f5 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -17,6 +17,7 @@ use crate::{ use super::{ aggregation::translate_aggregation_step, + collate::CollationSeq, emitter::{OperationMode, TranslateCtx}, expr::{ translate_condition_expr, translate_expr, translate_expr_no_constant_opt, @@ -1127,24 +1128,28 @@ fn emit_seek_termination( start_reg, num_regs, target_pc: loop_end, + collation: Some(CollationSeq::Binary), }), (true, SeekOp::GT) => program.emit_insn(Insn::IdxGT { cursor_id: seek_cursor_id, start_reg, num_regs, target_pc: loop_end, + collation: Some(CollationSeq::Binary), }), (true, SeekOp::LE) => program.emit_insn(Insn::IdxLE { cursor_id: seek_cursor_id, start_reg, num_regs, target_pc: loop_end, + collation: Some(CollationSeq::Binary), }), (true, SeekOp::LT) => program.emit_insn(Insn::IdxLT { cursor_id: seek_cursor_id, start_reg, num_regs, target_pc: loop_end, + collation: Some(CollationSeq::Binary), }), (false, SeekOp::GE) => program.emit_insn(Insn::Ge { lhs: rowid_reg.unwrap(), diff --git a/core/types.rs b/core/types.rs index 2b7718870..3c6b4926e 100644 --- a/core/types.rs +++ b/core/types.rs @@ -1109,13 +1109,6 @@ pub fn compare_immutable( assert_eq!(l.len(), r.len()); for (i, (l, r)) in l.iter().zip(r).enumerate() { let column_order = index_key_sort_order.get_sort_order_for_col(i); - let mut l = l; - let mut r = r; - if !matches!(column_order, SortOrder::Asc) { - let tmp = l; - l = r; - r = tmp; - } let cmp = match (l, r) { (RefValue::Text(left), RefValue::Text(right)) => { collation.compare_strings(left.as_str(), right.as_str()) diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 91389e70c..b55a5c768 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -2141,6 +2141,7 @@ pub fn op_idx_ge( start_reg, num_regs, target_pc, + collation, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -2155,7 +2156,12 @@ pub fn op_idx_ge( let idx_values = idx_record.get_values(); let idx_values = &idx_values[..record_from_regs.len()]; let record_values = record_from_regs.get_values(); - let ord = compare_immutable(&idx_values, &record_values, cursor.index_key_sort_order); + let ord = compare_immutable( + &idx_values, + &record_values, + cursor.index_key_sort_order, + collation.unwrap_or_default(), + ); if ord.is_ge() { target_pc.to_offset_int() } else { @@ -2200,6 +2206,7 @@ pub fn op_idx_le( start_reg, num_regs, target_pc, + collation, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -2214,7 +2221,12 @@ pub fn op_idx_le( let idx_values = idx_record.get_values(); let idx_values = &idx_values[..record_from_regs.len()]; let record_values = record_from_regs.get_values(); - let ord = compare_immutable(&idx_values, &record_values, cursor.index_key_sort_order); + let ord = compare_immutable( + &idx_values, + &record_values, + cursor.index_key_sort_order, + collation.unwrap_or_default(), + ); if ord.is_le() { target_pc.to_offset_int() } else { @@ -2241,6 +2253,7 @@ pub fn op_idx_gt( start_reg, num_regs, target_pc, + collation, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -2255,7 +2268,12 @@ pub fn op_idx_gt( let idx_values = idx_record.get_values(); let idx_values = &idx_values[..record_from_regs.len()]; let record_values = record_from_regs.get_values(); - let ord = compare_immutable(&idx_values, &record_values, cursor.index_key_sort_order); + let ord = compare_immutable( + &idx_values, + &record_values, + cursor.index_key_sort_order, + collation.unwrap_or_default(), + ); if ord.is_gt() { target_pc.to_offset_int() } else { @@ -2282,6 +2300,7 @@ pub fn op_idx_lt( start_reg, num_regs, target_pc, + collation, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -2296,7 +2315,12 @@ pub fn op_idx_lt( let idx_values = idx_record.get_values(); let idx_values = &idx_values[..record_from_regs.len()]; let record_values = record_from_regs.get_values(); - let ord = compare_immutable(&idx_values, &record_values, cursor.index_key_sort_order); + let ord = compare_immutable( + &idx_values, + &record_values, + cursor.index_key_sort_order, + collation.unwrap_or_default(), + ); if ord.is_lt() { target_pc.to_offset_int() } else { diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index 9da401778..ca455c81a 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -817,24 +817,28 @@ pub fn insn_to_str( start_reg, num_regs, target_pc, + collation, } | Insn::IdxGE { cursor_id, start_reg, num_regs, target_pc, + collation, } | Insn::IdxLE { cursor_id, start_reg, num_regs, target_pc, + collation, } | Insn::IdxLT { cursor_id, start_reg, num_regs, target_pc, + collation, } => ( match insn { Insn::IdxGT { .. } => "IdxGT", @@ -846,7 +850,7 @@ pub fn insn_to_str( *cursor_id as i32, target_pc.to_debug_int(), *start_reg as i32, - Value::build_text(""), + Value::build_text(&collation.map_or("".to_string(), |c| c.to_string())), 0, format!("key=[{}..{}]", start_reg, start_reg + num_regs - 1), ), diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index c61de1c26..2a2f2b765 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -556,6 +556,7 @@ pub enum Insn { start_reg: usize, num_regs: usize, target_pc: BranchOffset, + collation: Option, }, /// The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. @@ -565,6 +566,7 @@ pub enum Insn { start_reg: usize, num_regs: usize, target_pc: BranchOffset, + collation: Option, }, /// The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. @@ -574,6 +576,7 @@ pub enum Insn { start_reg: usize, num_regs: usize, target_pc: BranchOffset, + collation: Option, }, /// The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. @@ -583,6 +586,7 @@ pub enum Insn { start_reg: usize, num_regs: usize, target_pc: BranchOffset, + collation: Option, }, /// Decrement the given register and jump to the given PC if the result is zero. diff --git a/testing/pyproject.toml b/testing/pyproject.toml index f3ba79a1d..21ba0e8e6 100644 --- a/testing/pyproject.toml +++ b/testing/pyproject.toml @@ -15,13 +15,9 @@ test-shell = "cli_tests.cli_test_cases:main" test-extensions = "cli_tests.extensions:main" test-update = "cli_tests.update:main" test-memory = "cli_tests.memory:main" -<<<<<<< HEAD bench-vfs = "cli_tests.vfs_bench:main" test-constraint = "cli_tests.constraint:main" -||||||| parent of b9e03a19 (Removed repeated binary expression translation. Adjusted the set_collation to capture additional context of whether it was set by a Collate expression or not. Added some tests to prove those modifications were necessary.) -======= test-collate = "cli_tests.collate:main" ->>>>>>> b9e03a19 (Removed repeated binary expression translation. Adjusted the set_collation to capture additional context of whether it was set by a Collate expression or not. Added some tests to prove those modifications were necessary.) [tool.uv] package = true From f28ce2b75727314e5334da4db4e418d37e9f32f3 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 16 May 2025 18:41:14 -0300 Subject: [PATCH 12/17] add collations to btree cursor --- core/storage/btree.rs | 49 +++++++++++++++++++---------- core/vdbe/execute.rs | 59 ++++++++++++++++++++++++++++++----- tests/integration/fuzz/mod.rs | 1 + 3 files changed, 84 insertions(+), 25 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 92fbd707c..e7a8879c8 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -407,6 +407,10 @@ pub struct BTreeCursor { context: Option, /// Store whether the Cursor is in a valid state. Meaning if it is pointing to a valid cell index or not valid_state: CursorValidState, + /// Colations for Index Btree constraint checks + /// Contains the Collation Seq for the whole Table + /// This Vec should be empty for Table Btree + collations: Vec, } impl BTreeCursor { @@ -414,6 +418,7 @@ impl BTreeCursor { mv_cursor: Option>>, pager: Rc, root_page: usize, + collations: Vec, ) -> Self { Self { mv_cursor, @@ -435,17 +440,27 @@ impl BTreeCursor { count: 0, context: None, valid_state: CursorValidState::Valid, + collations, } } + pub fn new_table( + mv_cursor: Option>>, + pager: Rc, + root_page: usize, + ) -> Self { + Self::new(mv_cursor, pager, root_page, Vec::new()) + } + pub fn new_index( mv_cursor: Option>>, pager: Rc, root_page: usize, index: &Index, + collations: Vec, ) -> Self { let index_key_sort_order = IndexKeySortOrder::from_index(index); - let mut cursor = Self::new(mv_cursor, pager, root_page); + let mut cursor = Self::new(mv_cursor, pager, root_page, collations); cursor.index_key_sort_order = index_key_sort_order; cursor } @@ -5953,7 +5968,7 @@ mod tests { } fn validate_btree(pager: Rc, page_idx: usize) -> (usize, bool) { - let cursor = BTreeCursor::new(None, pager.clone(), page_idx); + let cursor = BTreeCursor::new_table(None, pager.clone(), page_idx); let page = pager.read_page(page_idx).unwrap(); let page = page.get(); let contents = page.contents.as_ref().unwrap(); @@ -6043,7 +6058,7 @@ mod tests { } fn format_btree(pager: Rc, page_idx: usize, depth: usize) -> String { - let cursor = BTreeCursor::new(None, pager.clone(), page_idx); + let cursor = BTreeCursor::new_table(None, pager.clone(), page_idx); let page = pager.read_page(page_idx).unwrap(); let page = page.get(); let contents = page.contents.as_ref().unwrap(); @@ -6173,7 +6188,7 @@ mod tests { .as_slice(), ] { let (pager, root_page) = empty_btree(); - let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page); for (key, size) in sequence.iter() { run_until_done( || { @@ -6240,7 +6255,7 @@ mod tests { tracing::info!("super seed: {}", seed); for _ in 0..attempts { let (pager, root_page) = empty_btree(); - let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page); let mut keys = SortedVec::new(); tracing::info!("seed: {}", seed); for insert_id in 0..inserts { @@ -6347,7 +6362,7 @@ mod tests { let (pager, _) = empty_btree(); let index_root_page = pager.btree_create(&CreateBTreeFlags::new_index()); let index_root_page = index_root_page as usize; - let mut cursor = BTreeCursor::new(None, pager.clone(), index_root_page); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), index_root_page); let mut keys = SortedVec::new(); tracing::info!("seed: {}", seed); for _ in 0..inserts { @@ -6582,7 +6597,7 @@ mod tests { #[ignore] pub fn test_clear_overflow_pages() -> Result<()> { let (pager, db_header) = setup_test_env(5); - let mut cursor = BTreeCursor::new(None, pager.clone(), 1); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), 1); let max_local = payload_overflow_threshold_max(PageType::TableLeaf, 4096); let usable_size = cursor.usable_space(); @@ -6681,7 +6696,7 @@ mod tests { #[test] pub fn test_clear_overflow_pages_no_overflow() -> Result<()> { let (pager, db_header) = setup_test_env(5); - let mut cursor = BTreeCursor::new(None, pager.clone(), 1); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), 1); let small_payload = vec![b'A'; 10]; @@ -6725,7 +6740,7 @@ mod tests { fn test_btree_destroy() -> Result<()> { let initial_size = 3; let (pager, db_header) = setup_test_env(initial_size); - let mut cursor = BTreeCursor::new(None, pager.clone(), 2); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), 2); assert_eq!( db_header.lock().database_size, initial_size, @@ -7382,7 +7397,7 @@ mod tests { let (pager, root_page) = empty_btree(); let mut keys = Vec::new(); for i in 0..10000 { - let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page); tracing::info!("INSERT INTO t VALUES ({});", i,); let value = ImmutableRecord::from_registers(&[Register::Value(Value::Integer(i))]); tracing::trace!("before insert {}", i); @@ -7409,7 +7424,7 @@ mod tests { format_btree(pager.clone(), root_page, 0) ); for key in keys.iter() { - let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page); let key = Value::Integer(*key); let exists = run_until_done(|| cursor.exists(&key), pager.deref()).unwrap(); assert!(exists, "key not found {}", key); @@ -7458,7 +7473,7 @@ mod tests { // Insert 10,000 records in to the BTree. for i in 1..=10000 { - let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page); let value = ImmutableRecord::from_registers(&[Register::Value(Value::Text( Text::new("hello world"), ))]); @@ -7486,7 +7501,7 @@ mod tests { // Delete records with 500 <= key <= 3500 for i in 500..=3500 { - let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page); let seek_key = SeekKey::TableRowId(i as u64); let found = run_until_done(|| cursor.seek(seek_key.clone(), SeekOp::EQ), pager.deref()) @@ -7503,7 +7518,7 @@ mod tests { continue; } - let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page); let key = Value::Integer(i); let exists = run_until_done(|| cursor.exists(&key), pager.deref()).unwrap(); assert!(exists, "Key {} should exist but doesn't", i); @@ -7511,7 +7526,7 @@ mod tests { // Verify the deleted records don't exist. for i in 500..=3500 { - let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page); let key = Value::Integer(i); let exists = run_until_done(|| cursor.exists(&key), pager.deref()).unwrap(); assert!(!exists, "Deleted key {} still exists", i); @@ -7533,7 +7548,7 @@ mod tests { let (pager, root_page) = empty_btree(); for i in 0..iterations { - let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page); tracing::info!("INSERT INTO t VALUES ({});", i,); let value = ImmutableRecord::from_registers(&[Register::Value(Value::Text(Text { value: huge_texts[i].as_bytes().to_vec(), @@ -7562,7 +7577,7 @@ mod tests { format_btree(pager.clone(), root_page, 0) ); } - let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page); cursor.move_to_root(); for i in 0..iterations { let rowid = run_until_done(|| cursor.get_next_record(None), pager.deref()).unwrap(); diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index b55a5c768..dd4680930 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -927,15 +927,36 @@ pub fn op_open_read( let mut cursors = state.cursors.borrow_mut(); match cursor_type { CursorType::BTreeTable(_) => { - let cursor = BTreeCursor::new(mv_cursor, pager.clone(), *root_page); + let cursor = BTreeCursor::new_table(mv_cursor, pager.clone(), *root_page); cursors .get_mut(*cursor_id) .unwrap() .replace(Cursor::new_btree(cursor)); } CursorType::BTreeIndex(index) => { - let cursor = - BTreeCursor::new_index(mv_cursor, pager.clone(), *root_page, index.as_ref()); + let table = program.table_references.iter().find_map(|table_ref| { + table_ref.btree().and_then(|table| { + if table.name == index.table_name { + Some(table) + } else { + None + } + }) + }); + let collations = table.map_or(Vec::new(), |table| { + table + .columns + .iter() + .map(|column| column.collation.unwrap_or_default()) + .collect::>() + }); + let cursor = BTreeCursor::new_index( + mv_cursor, + pager.clone(), + *root_page, + index.as_ref(), + collations, + ); cursors .get_mut(*cursor_id) .unwrap() @@ -4220,14 +4241,35 @@ pub fn op_open_write( None => None, }; if let Some(index) = maybe_index { - let cursor = - BTreeCursor::new_index(mv_cursor, pager.clone(), root_page as usize, index.as_ref()); + let table = program.table_references.iter().find_map(|table_ref| { + table_ref.btree().and_then(|table| { + if table.name == index.table_name { + Some(table) + } else { + None + } + }) + }); + let collations = table.map_or(Vec::new(), |table| { + table + .columns + .iter() + .map(|column| column.collation.unwrap_or_default()) + .collect::>() + }); + let cursor = BTreeCursor::new_index( + mv_cursor, + pager.clone(), + root_page as usize, + index.as_ref(), + collations, + ); cursors .get_mut(*cursor_id) .unwrap() .replace(Cursor::new_btree(cursor)); } else { - let cursor = BTreeCursor::new(mv_cursor, pager.clone(), root_page as usize); + let cursor = BTreeCursor::new_table(mv_cursor, pager.clone(), root_page as usize); cursors .get_mut(*cursor_id) .unwrap() @@ -4297,7 +4339,8 @@ pub fn op_destroy( if *is_temp == 1 { todo!("temp databases not implemented yet."); } - let mut cursor = BTreeCursor::new(None, pager.clone(), *root); + // TODO not sure if should be BTreeCursor::new_table or BTreeCursor::new_index here or neither and just pass an emtpy vec + let mut cursor = BTreeCursor::new(None, pager.clone(), *root, Vec::new()); cursor.btree_destroy()?; state.pc += 1; Ok(InsnFunctionStepResult::Step) @@ -4671,7 +4714,7 @@ pub fn op_open_ephemeral( } None => None, }; - let mut cursor = BTreeCursor::new(mv_cursor, pager, root_page as usize); + let mut cursor = BTreeCursor::new_table(mv_cursor, pager, root_page as usize); cursor.rewind()?; // Will never return io let mut cursors: std::cell::RefMut<'_, Vec>> = state.cursors.borrow_mut(); diff --git a/tests/integration/fuzz/mod.rs b/tests/integration/fuzz/mod.rs index 0af2992af..a981ce4a1 100644 --- a/tests/integration/fuzz/mod.rs +++ b/tests/integration/fuzz/mod.rs @@ -1220,6 +1220,7 @@ mod tests { ); let query = format!("INSERT INTO t VALUES ({}, {}, {})", x, y, z); log::info!("insert: {}", query); + dbg!(&query); assert_eq!( limbo_exec_rows(&db, &limbo_conn, &query), sqlite_exec_rows(&sqlite_conn, &query), From 4a3119786e0706c9ce8a5faa71bad4fd8b488a94 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 16 May 2025 20:42:26 -0300 Subject: [PATCH 13/17] refactor BtreeCursor and Sorter to accept Vec of collations --- core/schema.rs | 4 ++++ core/storage/btree.rs | 20 +++++++++++-------- core/translate/group_by.rs | 24 +++++++++++------------ core/translate/index.rs | 2 +- core/translate/main_loop.rs | 5 ----- core/translate/order_by.rs | 24 +++++++++-------------- core/types.rs | 3 ++- core/vdbe/execute.rs | 38 +++++++++++++++++++------------------ core/vdbe/explain.rs | 25 +++++++++++------------- core/vdbe/insn.rs | 12 ++++-------- core/vdbe/sorter.rs | 8 ++++---- 11 files changed, 78 insertions(+), 87 deletions(-) diff --git a/core/schema.rs b/core/schema.rs index b04a31918..7407333dc 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -236,6 +236,10 @@ impl BTreeTable { sql.push_str(");\n"); sql } + + pub fn column_collations(&self) -> Vec> { + self.columns.iter().map(|column| column.collation).collect() + } } #[derive(Debug, Default)] diff --git a/core/storage/btree.rs b/core/storage/btree.rs index e7a8879c8..3d3154d1d 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -625,7 +625,7 @@ impl BTreeCursor { record_slice_same_num_cols, index_key.get_values(), self.index_key_sort_order, - CollationSeq::Binary, + &self.collations, ); order }; @@ -684,7 +684,7 @@ impl BTreeCursor { record_slice_same_num_cols, index_key.get_values(), self.index_key_sort_order, - CollationSeq::Binary, + &self.collations, ); order }; @@ -1265,7 +1265,7 @@ impl BTreeCursor { record_slice_same_num_cols, index_key.get_values(), self.index_key_sort_order, - CollationSeq::Binary, + &self.collations, ); order }; @@ -1326,7 +1326,7 @@ impl BTreeCursor { record_slice_same_num_cols, index_key.get_values(), self.index_key_sort_order, - CollationSeq::Binary, + &self.collations, ); order }; @@ -1607,7 +1607,7 @@ impl BTreeCursor { record_slice_equal_number_of_cols, index_key.get_values(), self.index_key_sort_order, - CollationSeq::Binary, + &self.collations, ); // in sqlite btrees left child pages have <= keys. // in general, in forwards iteration we want to find the first key that matches the seek condition. @@ -1932,7 +1932,7 @@ impl BTreeCursor { record_slice_equal_number_of_cols, key.get_values(), self.index_key_sort_order, - CollationSeq::Binary, + &self.collations, ); let found = match seek_op { SeekOp::GT => cmp.is_gt(), @@ -2100,7 +2100,7 @@ impl BTreeCursor { .unwrap() .get_values(), self.index_key_sort_order, - CollationSeq::Binary, + &self.collations, ) == Ordering::Equal { tracing::debug!("insert_into_page: found exact match with cell_idx={cell_idx}, overwriting"); @@ -3747,7 +3747,7 @@ impl BTreeCursor { key.to_index_key_values(), self.get_immutable_record().as_ref().unwrap().get_values(), self.index_key_sort_order, - CollationSeq::Binary, + &self.collations, ); match order { Ordering::Less | Ordering::Equal => { @@ -4778,6 +4778,10 @@ impl BTreeCursor { } } } + + pub fn collations(&self) -> &[CollationSeq] { + &self.collations + } } #[cfg(debug_assertions)] diff --git a/core/translate/group_by.rs b/core/translate/group_by.rs index 055a7a208..25afebd8e 100644 --- a/core/translate/group_by.rs +++ b/core/translate/group_by.rs @@ -126,12 +126,12 @@ pub fn init_group_by( * then the collating sequence of the column is used to determine sort order. * If the expression is not a column and has no COLLATE clause, then the BINARY collating sequence is used. */ - let mut collation = None; - for expr in group_by.exprs.iter() { - match expr { + let collations = group_by + .exprs + .iter() + .map(|expr| match expr { ast::Expr::Collate(_, collation_name) => { - collation = Some(CollationSeq::new(collation_name)?); - break; + CollationSeq::new(collation_name).map(Some) } ast::Expr::Column { table, column, .. } => { let table_reference = plan.table_references.get(*table).unwrap(); @@ -140,19 +140,17 @@ pub fn init_group_by( crate::bail_parse_error!("column index out of bounds"); }; - if table_column.collation.is_some() { - collation = table_column.collation; - break; - } + Ok(table_column.collation) } - _ => {} - }; - } + _ => Ok(Some(CollationSeq::default())), + }) + .collect::>>()?; + program.emit_insn(Insn::SorterOpen { cursor_id: sort_cursor, columns: sorter_column_count, order: sort_order.clone(), - collation, + collations, }); let pseudo_cursor = group_by_create_pseudo_table(program, sorter_column_count); GroupByRowSource::Sorter { diff --git a/core/translate/index.rs b/core/translate/index.rs index 84b6bf0eb..4f9838204 100644 --- a/core/translate/index.rs +++ b/core/translate/index.rs @@ -120,7 +120,7 @@ pub fn translate_create_index( cursor_id: sorter_cursor_id, columns: columns.len(), order, - collation: program.curr_collation(), + collations: tbl.column_collations(), }); let content_reg = program.alloc_register(); program.emit_insn(Insn::OpenPseudo { diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 9df9da7f5..59168509e 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -17,7 +17,6 @@ use crate::{ use super::{ aggregation::translate_aggregation_step, - collate::CollationSeq, emitter::{OperationMode, TranslateCtx}, expr::{ translate_condition_expr, translate_expr, translate_expr_no_constant_opt, @@ -1128,28 +1127,24 @@ fn emit_seek_termination( start_reg, num_regs, target_pc: loop_end, - collation: Some(CollationSeq::Binary), }), (true, SeekOp::GT) => program.emit_insn(Insn::IdxGT { cursor_id: seek_cursor_id, start_reg, num_regs, target_pc: loop_end, - collation: Some(CollationSeq::Binary), }), (true, SeekOp::LE) => program.emit_insn(Insn::IdxLE { cursor_id: seek_cursor_id, start_reg, num_regs, target_pc: loop_end, - collation: Some(CollationSeq::Binary), }), (true, SeekOp::LT) => program.emit_insn(Insn::IdxLT { cursor_id: seek_cursor_id, start_reg, num_regs, target_pc: loop_end, - collation: Some(CollationSeq::Binary), }), (false, SeekOp::GE) => program.emit_insn(Insn::Ge { lhs: rowid_reg.unwrap(), diff --git a/core/translate/order_by.rs b/core/translate/order_by.rs index 4f1707958..f1c6a0b13 100644 --- a/core/translate/order_by.rs +++ b/core/translate/order_by.rs @@ -49,13 +49,10 @@ pub fn init_order_by( * then the collating sequence of the column is used to determine sort order. * If the expression is not a column and has no COLLATE clause, then the BINARY collating sequence is used. */ - let mut collation = None; - for (expr, _) in order_by.iter() { - match expr { - ast::Expr::Collate(_, collation_name) => { - collation = Some(CollationSeq::new(collation_name)?); - break; - } + let collations = order_by + .iter() + .map(|(expr, _)| match expr { + ast::Expr::Collate(_, collation_name) => CollationSeq::new(collation_name).map(Some), ast::Expr::Column { table, column, .. } => { let table_reference = referenced_tables.get(*table).unwrap(); @@ -63,19 +60,16 @@ pub fn init_order_by( crate::bail_parse_error!("column index out of bounds"); }; - if table_column.collation.is_some() { - collation = table_column.collation; - break; - } + Ok(table_column.collation) } - _ => {} - }; - } + _ => Ok(Some(CollationSeq::default())), + }) + .collect::>>()?; program.emit_insn(Insn::SorterOpen { cursor_id: sort_cursor, columns: order_by.len(), order: order_by.iter().map(|(_, direction)| *direction).collect(), - collation, + collations, }); Ok(()) } diff --git a/core/types.rs b/core/types.rs index 3c6b4926e..5021a09cb 100644 --- a/core/types.rs +++ b/core/types.rs @@ -1104,11 +1104,12 @@ pub fn compare_immutable( l: &[RefValue], r: &[RefValue], index_key_sort_order: IndexKeySortOrder, - collation: CollationSeq, + collations: &[CollationSeq], ) -> std::cmp::Ordering { assert_eq!(l.len(), r.len()); for (i, (l, r)) in l.iter().zip(r).enumerate() { let column_order = index_key_sort_order.get_sort_order_for_col(i); + let collation = collations.get(i).copied().unwrap_or_default(); let cmp = match (l, r) { (RefValue::Text(left), RefValue::Text(right)) => { collation.compare_strings(left.as_str(), right.as_str()) diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index dd4680930..8a1530445 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -945,10 +945,10 @@ pub fn op_open_read( }); let collations = table.map_or(Vec::new(), |table| { table - .columns - .iter() - .map(|column| column.collation.unwrap_or_default()) - .collect::>() + .column_collations() + .into_iter() + .map(|c| c.unwrap_or_default()) + .collect() }); let cursor = BTreeCursor::new_index( mv_cursor, @@ -2162,7 +2162,6 @@ pub fn op_idx_ge( start_reg, num_regs, target_pc, - collation, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -2181,7 +2180,7 @@ pub fn op_idx_ge( &idx_values, &record_values, cursor.index_key_sort_order, - collation.unwrap_or_default(), + cursor.collations(), ); if ord.is_ge() { target_pc.to_offset_int() @@ -2227,7 +2226,6 @@ pub fn op_idx_le( start_reg, num_regs, target_pc, - collation, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -2246,7 +2244,7 @@ pub fn op_idx_le( &idx_values, &record_values, cursor.index_key_sort_order, - collation.unwrap_or_default(), + cursor.collations(), ); if ord.is_le() { target_pc.to_offset_int() @@ -2274,7 +2272,6 @@ pub fn op_idx_gt( start_reg, num_regs, target_pc, - collation, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -2293,7 +2290,7 @@ pub fn op_idx_gt( &idx_values, &record_values, cursor.index_key_sort_order, - collation.unwrap_or_default(), + cursor.collations(), ); if ord.is_gt() { target_pc.to_offset_int() @@ -2321,7 +2318,6 @@ pub fn op_idx_lt( start_reg, num_regs, target_pc, - collation, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -2340,7 +2336,7 @@ pub fn op_idx_lt( &idx_values, &record_values, cursor.index_key_sort_order, - collation.unwrap_or_default(), + cursor.collations(), ); if ord.is_lt() { target_pc.to_offset_int() @@ -2810,12 +2806,18 @@ pub fn op_sorter_open( cursor_id, columns: _, order, - collation, + collations, } = insn else { unreachable!("unexpected Insn {:?}", insn) }; - let cursor = Sorter::new(order, collation.unwrap_or_default()); + let cursor = Sorter::new( + order, + collations + .iter() + .map(|collation| collation.unwrap_or_default()) + .collect(), + ); let mut cursors = state.cursors.borrow_mut(); cursors .get_mut(*cursor_id) @@ -4252,10 +4254,10 @@ pub fn op_open_write( }); let collations = table.map_or(Vec::new(), |table| { table - .columns - .iter() - .map(|column| column.collation.unwrap_or_default()) - .collect::>() + .column_collations() + .into_iter() + .map(|c| c.unwrap_or_default()) + .collect() }); let cursor = BTreeCursor::new_index( mv_cursor, diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index ca455c81a..e899e5a9b 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -817,28 +817,24 @@ pub fn insn_to_str( start_reg, num_regs, target_pc, - collation, } | Insn::IdxGE { cursor_id, start_reg, num_regs, target_pc, - collation, } | Insn::IdxLE { cursor_id, start_reg, num_regs, target_pc, - collation, } | Insn::IdxLT { cursor_id, start_reg, num_regs, target_pc, - collation, } => ( match insn { Insn::IdxGT { .. } => "IdxGT", @@ -850,7 +846,7 @@ pub fn insn_to_str( *cursor_id as i32, target_pc.to_debug_int(), *start_reg as i32, - Value::build_text(&collation.map_or("".to_string(), |c| c.to_string())), + Value::build_text(""), 0, format!("key=[{}..{}]", start_reg, start_reg + num_regs - 1), ), @@ -890,20 +886,21 @@ pub fn insn_to_str( cursor_id, columns, order, - collation, + collations, } => { let _p4 = String::new(); let to_print: Vec = order .iter() - .enumerate() - .map(|(idx, v)| { - if idx == 0 { - collation.unwrap_or_default().to_string() + .zip(collations.iter()) + .map(|(v, collation)| { + let sign = match v { + SortOrder::Asc => "", + SortOrder::Desc => "-", + }; + if collation.is_some() { + format!("{sign}{}", collation.unwrap()) } else { - match v { - SortOrder::Asc => "B".to_string(), - SortOrder::Desc => "-B".to_string(), - } + format!("{sign}B") } }) .collect(); diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 2a2f2b765..7cac9f202 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -556,7 +556,6 @@ pub enum Insn { start_reg: usize, num_regs: usize, target_pc: BranchOffset, - collation: Option, }, /// The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. @@ -566,7 +565,6 @@ pub enum Insn { start_reg: usize, num_regs: usize, target_pc: BranchOffset, - collation: Option, }, /// The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. @@ -576,7 +574,6 @@ pub enum Insn { start_reg: usize, num_regs: usize, target_pc: BranchOffset, - collation: Option, }, /// The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. @@ -586,7 +583,6 @@ pub enum Insn { start_reg: usize, num_regs: usize, target_pc: BranchOffset, - collation: Option, }, /// Decrement the given register and jump to the given PC if the result is zero. @@ -609,10 +605,10 @@ pub enum Insn { /// Open a sorter. SorterOpen { - cursor_id: CursorID, // P1 - columns: usize, // P2 - order: Vec, // P4. - collation: Option, + cursor_id: CursorID, // P1 + columns: usize, // P2 + order: Vec, // P4. + collations: Vec>, // The only reason for using Option is so the explain message is the same as in SQLite }, /// Insert a row into the sorter. diff --git a/core/vdbe/sorter.rs b/core/vdbe/sorter.rs index d4127a798..0036ed05e 100644 --- a/core/vdbe/sorter.rs +++ b/core/vdbe/sorter.rs @@ -10,17 +10,17 @@ pub struct Sorter { current: Option, order: IndexKeySortOrder, key_len: usize, - collation: CollationSeq, + collations: Vec, } impl Sorter { - pub fn new(order: &[SortOrder], collation: CollationSeq) -> Self { + pub fn new(order: &[SortOrder], collations: Vec) -> Self { Self { records: Vec::new(), current: None, key_len: order.len(), order: IndexKeySortOrder::from_list(order), - collation, + collations, } } pub fn is_empty(&self) -> bool { @@ -38,7 +38,7 @@ impl Sorter { &a.values[..self.key_len], &b.values[..self.key_len], self.order, - self.collation, + &self.collations, ) }); self.records.reverse(); From 5b15d6aa3255d9c8debd2e8e85331b44d8327760 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 16 May 2025 21:35:23 -0300 Subject: [PATCH 14/17] Get the table correctly from the connection instead of table_references + test to confirm unique constraint --- core/vdbe/builder.rs | 4 ---- core/vdbe/execute.rs | 28 ++++++++++------------------ testing/collate.test | 23 ++++++++++++++--------- testing/tester.tcl | 10 ++++++++++ 4 files changed, 34 insertions(+), 31 deletions(-) diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index 7aad796ba..0d8dea8d0 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -611,10 +611,6 @@ impl ProgramBuilder { self.collation = None; } - // pub fn pop_collation(&mut self) -> Option { - // self.collations.pop() - // } - pub fn build( mut self, database_header: Arc>, diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 8a1530445..a05a9f731 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -934,15 +934,11 @@ pub fn op_open_read( .replace(Cursor::new_btree(cursor)); } CursorType::BTreeIndex(index) => { - let table = program.table_references.iter().find_map(|table_ref| { - table_ref.btree().and_then(|table| { - if table.name == index.table_name { - Some(table) - } else { - None - } - }) - }); + let conn = program.connection.upgrade().unwrap(); + let schema = conn.schema.try_read().ok_or(LimboError::SchemaLocked)?; + let table = schema + .get_table(&index.table_name) + .map_or(None, |table| table.btree()); let collations = table.map_or(Vec::new(), |table| { table .column_collations() @@ -4243,15 +4239,11 @@ pub fn op_open_write( None => None, }; if let Some(index) = maybe_index { - let table = program.table_references.iter().find_map(|table_ref| { - table_ref.btree().and_then(|table| { - if table.name == index.table_name { - Some(table) - } else { - None - } - }) - }); + let conn = program.connection.upgrade().unwrap(); + let schema = conn.schema.try_read().ok_or(LimboError::SchemaLocked)?; + let table = schema + .get_table(&index.table_name) + .map_or(None, |table| table.btree()); let collations = table.map_or(Vec::new(), |table| { table .column_collations() diff --git a/testing/collate.test b/testing/collate.test index 094e855ac..f50670727 100755 --- a/testing/collate.test +++ b/testing/collate.test @@ -5,38 +5,43 @@ source $testdir/tester.tcl # SIMPLE SMOKE TESTS THAT DO NOT DEPEND ON SPECIFIC DATABASE ROWS -do_execsql_test collate-nocase { +do_execsql_test collate_nocase { SELECT 'hat' == 'hAt' COLLATE NOCASE; } {1} -do_execsql_test collate-binary-1 { +do_execsql_test collate_binary_1 { SELECT 'hat' == 'hAt' COLLATE BINARY; } {0} -do_execsql_test collate-binary-2 { +do_execsql_test collate_binary_2 { SELECT 'hat' == 'hat' COLLATE BINARY; } {1} -do_execsql_test collate-rtrim-1 { +do_execsql_test collate_rtrim_1 { SELECT 'hat' == 'hAt ' COLLATE RTRIM; } {0} -do_execsql_test collate-rtrim-2 { +do_execsql_test collate_rtrim_2 { SELECT 'hat' == 'hat ' COLLATE RTRIM; } {1} -do_execsql_test collate-rtrim-3 { +do_execsql_test collate_rtrim_3 { SELECT 'hat' == ' hAt ' COLLATE RTRIM; } {0} -do_execsql_test collate-rtrim-4 { +do_execsql_test collate_rtrim_4 { SELECT 'hat' == ' hat ' COLLATE RTRIM; } {0} -do_execsql_test collate-left-precedence { +do_execsql_test collate_left_precedence { SELECT 'hat' COLLATE BINARY == 'hAt' COLLATE NOCASE; } {0} -do_execsql_test collate-left-precedence-2 { +do_execsql_test collate_left_precedence_2 { SELECT 'hat' COLLATE NOCASE == 'hAt' COLLATE BINARY; } {1} + +do_execsql_test_in_memory_error_content collate_unique_constraint { + CREATE TABLE t(a TEXT COLLATE NOCASE PRIMARY KEY); + INSERT INTO t VALUES ('lol'), ('LOL'), ('lOl'); +} {Runtime error: UNIQUE constraint failed: t.a (19)} diff --git a/testing/tester.tcl b/testing/tester.tcl index d739b2a39..490a0f394 100644 --- a/testing/tester.tcl +++ b/testing/tester.tcl @@ -226,3 +226,13 @@ proc do_execsql_test_in_memory_any_error {test_name sql_statements} { set combined_sql [string trim $sql_statements] run_test_expecting_any_error $::sqlite_exec $db_name $combined_sql } + +proc do_execsql_test_in_memory_error_content {test_name sql_statements expected_error_text} { + test_put "Running error content test" in-memory $test_name + + # Use ":memory:" special filename for in-memory database + set db_name ":memory:" + + set combined_sql [string trim $sql_statements] + run_test_expecting_error_content $::sqlite_exec $db_name $combined_sql $expected_error_text +} From 819fd0f4968ed5acb3e68685f3f62e6ef7e3f371 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 16 May 2025 21:48:20 -0300 Subject: [PATCH 15/17] use any error method instead, as limbo and sqlite error message differ slightly --- testing/collate.test | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/collate.test b/testing/collate.test index f50670727..5760a82d1 100755 --- a/testing/collate.test +++ b/testing/collate.test @@ -41,7 +41,7 @@ do_execsql_test collate_left_precedence_2 { SELECT 'hat' COLLATE NOCASE == 'hAt' COLLATE BINARY; } {1} -do_execsql_test_in_memory_error_content collate_unique_constraint { +do_execsql_test_in_memory_any_error collate_unique_constraint { CREATE TABLE t(a TEXT COLLATE NOCASE PRIMARY KEY); INSERT INTO t VALUES ('lol'), ('LOL'), ('lOl'); -} {Runtime error: UNIQUE constraint failed: t.a (19)} +} From 22b6b88f681a7d722d2eb68aef343189f0804000 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Mon, 19 May 2025 11:41:42 -0300 Subject: [PATCH 16/17] fix rebase type errors --- core/storage/btree.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 3d3154d1d..22ffacc28 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -7592,7 +7592,7 @@ mod tests { #[test] pub fn test_read_write_payload_with_offset() { let (pager, root_page) = empty_btree(); - let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); + let mut cursor = BTreeCursor::new(None, pager.clone(), root_page, vec![]); let offset = 2; // blobs data starts at offset 2 let initial_text = "hello world"; let initial_blob = initial_text.as_bytes().to_vec(); @@ -7668,7 +7668,7 @@ mod tests { #[test] pub fn test_read_write_payload_with_overflow_page() { let (pager, root_page) = empty_btree(); - let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); + let mut cursor = BTreeCursor::new(None, pager.clone(), root_page, vec![]); let mut large_blob = vec![b'A'; 40960 - 11]; // insert large blob. 40960 = 10 page long. let hello_world = b"hello world"; large_blob.extend_from_slice(hello_world); From 52533cab402f727f409601e923e64566cf0ef1e7 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Mon, 19 May 2025 12:47:17 -0300 Subject: [PATCH 17/17] only pass collations for index in cursor + adhere to order of columns in index --- core/storage/btree.rs | 2 +- core/vdbe/execute.rs | 30 ++++++++++++++++++++++-------- testing/collate.test | 5 +++++ 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 22ffacc28..06c18a0cd 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -408,7 +408,7 @@ pub struct BTreeCursor { /// Store whether the Cursor is in a valid state. Meaning if it is pointing to a valid cell index or not valid_state: CursorValidState, /// Colations for Index Btree constraint checks - /// Contains the Collation Seq for the whole Table + /// Contains the Collation Seq for the whole Index /// This Vec should be empty for Table Btree collations: Vec, } diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index a05a9f731..c3e7f328c 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -940,10 +940,17 @@ pub fn op_open_read( .get_table(&index.table_name) .map_or(None, |table| table.btree()); let collations = table.map_or(Vec::new(), |table| { - table - .column_collations() - .into_iter() - .map(|c| c.unwrap_or_default()) + index + .columns + .iter() + .map(|c| { + table + .columns + .get(c.pos_in_table) + .unwrap() + .collation + .unwrap_or_default() + }) .collect() }); let cursor = BTreeCursor::new_index( @@ -4245,10 +4252,17 @@ pub fn op_open_write( .get_table(&index.table_name) .map_or(None, |table| table.btree()); let collations = table.map_or(Vec::new(), |table| { - table - .column_collations() - .into_iter() - .map(|c| c.unwrap_or_default()) + index + .columns + .iter() + .map(|c| { + table + .columns + .get(c.pos_in_table) + .unwrap() + .collation + .unwrap_or_default() + }) .collect() }); let cursor = BTreeCursor::new_index( diff --git a/testing/collate.test b/testing/collate.test index 5760a82d1..468297d82 100755 --- a/testing/collate.test +++ b/testing/collate.test @@ -45,3 +45,8 @@ do_execsql_test_in_memory_any_error collate_unique_constraint { CREATE TABLE t(a TEXT COLLATE NOCASE PRIMARY KEY); INSERT INTO t VALUES ('lol'), ('LOL'), ('lOl'); } + +do_execsql_test_in_memory_any_error collate_unique_constraint { + CREATE TABLE t(a TEXT COLLATE NOCASE PRIMARY KEY); + INSERT INTO t VALUES ('lol'), ('LOL'), ('lOl'); +}