diff --git a/COMPAT.md b/COMPAT.md index 799411193..962271493 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -193,7 +193,7 @@ Feature support of [sqlite expr syntax](https://www.sqlite.org/lang_expr.html). | ... OVER (...) | No | Is incorrectly ignored | | (expr) | Yes | | | CAST (expr AS type) | Yes | | -| COLLATE | No | | +| COLLATE | Partial | Custom Collations not supported | | (NOT) LIKE | Yes | | | (NOT) GLOB | Yes | | | (NOT) REGEXP | No | | diff --git a/Cargo.lock b/Cargo.lock index 9838d600f..7a027407a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1841,10 +1841,12 @@ dependencies = [ "ryu", "sorted-vec", "strum", + "strum_macros", "tempfile", "test-log", "thiserror 1.0.69", "tracing", + "uncased", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 774de264a..31794d093 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,6 +50,8 @@ limbo_series = { path = "extensions/series", version = "0.0.20" } limbo_sqlite3_parser = { path = "vendored/sqlite3-parser", version = "0.0.20" } limbo_time = { path = "extensions/time", version = "0.0.20" } limbo_uuid = { path = "extensions/uuid", version = "0.0.20" } +strum = { version = "0.26", features = ["derive"] } +strum_macros = "0.26" [profile.release] debug = "line-tables-only" diff --git a/Makefile b/Makefile index 846867a0c..db12b90ff 100644 --- a/Makefile +++ b/Makefile @@ -51,7 +51,7 @@ uv-sync: uv sync --all-packages .PHONE: uv-sync -test: limbo uv-sync test-compat test-vector test-sqlite3 test-shell test-extensions test-memory test-write test-update test-constraint +test: limbo uv-sync test-compat test-vector test-sqlite3 test-shell test-extensions test-memory test-write test-update test-constraint test-collate .PHONY: test test-extensions: limbo uv-sync @@ -100,6 +100,10 @@ test-update: limbo uv-sync SQLITE_EXEC=$(SQLITE_EXEC) uv run --project limbo_test test-update .PHONY: test-update +test-collate: limbo uv-sync + SQLITE_EXEC=$(SQLITE_EXEC) uv run --project limbo_test test-collate +.PHONY: test-collate + test-constraint: limbo uv-sync SQLITE_EXEC=$(SQLITE_EXEC) uv run --project limbo_test test-constraint .PHONY: test-constraint diff --git a/core/Cargo.toml b/core/Cargo.toml index 485f94b12..27807f3dd 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -69,11 +69,13 @@ limbo_ipaddr = { workspace = true, optional = true, features = ["static"] } limbo_completion = { workspace = true, optional = true, features = ["static"] } limbo_ext_tests = { workspace = true, optional = true, features = ["static"] } miette = "7.6.0" -strum = "0.26" +strum = { workspace = true } parking_lot = "0.12.3" crossbeam-skiplist = "0.1.3" tracing = "0.1.41" ryu = "1.0.19" +uncased = "0.9.10" +strum_macros = {workspace = true } bitflags = "2.9.0" [build-dependencies] diff --git a/core/schema.rs b/core/schema.rs index 210830b81..7407333dc 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -1,3 +1,4 @@ +use crate::translate::collate::CollationSeq; use crate::{util::normalize_ident, Result}; use crate::{LimboError, VirtualTable}; use core::fmt; @@ -235,6 +236,10 @@ impl BTreeTable { sql.push_str(");\n"); sql } + + pub fn column_collations(&self) -> Vec> { + self.columns.iter().map(|column| column.collation).collect() + } } #[derive(Debug, Default)] @@ -261,6 +266,7 @@ impl PseudoTable { notnull: false, default: None, unique: false, + collation: None, }); } pub fn get_column(&self, name: &str) -> Option<(usize, &Column)> { @@ -418,6 +424,7 @@ fn create_table( let mut notnull = false; let mut order = SortOrder::Asc; let mut unique = false; + let mut collation = None; for c_def in &col_def.constraints { match &c_def.constraint { limbo_sqlite3_parser::ast::ColumnConstraint::PrimaryKey { @@ -442,6 +449,10 @@ fn create_table( } unique = true; } + limbo_sqlite3_parser::ast::ColumnConstraint::Collate { collation_name } => { + collation = Some(CollationSeq::new(collation_name.0.as_str())?); + } + // Collate _ => {} } } @@ -464,6 +475,7 @@ fn create_table( notnull, default, unique, + collation, }); } if options.contains(TableOptions::WITHOUT_ROWID) { @@ -534,6 +546,7 @@ pub struct Column { pub notnull: bool, pub default: Option, pub unique: bool, + pub collation: Option, } impl Column { @@ -737,6 +750,7 @@ pub fn sqlite_schema_table() -> BTreeTable { notnull: false, default: None, unique: false, + collation: None, }, Column { name: Some("name".to_string()), @@ -747,6 +761,7 @@ pub fn sqlite_schema_table() -> BTreeTable { notnull: false, default: None, unique: false, + collation: None, }, Column { name: Some("tbl_name".to_string()), @@ -757,6 +772,7 @@ pub fn sqlite_schema_table() -> BTreeTable { notnull: false, default: None, unique: false, + collation: None, }, Column { name: Some("rootpage".to_string()), @@ -767,6 +783,7 @@ pub fn sqlite_schema_table() -> BTreeTable { notnull: false, default: None, unique: false, + collation: None, }, Column { name: Some("sql".to_string()), @@ -777,6 +794,7 @@ pub fn sqlite_schema_table() -> BTreeTable { notnull: false, default: None, unique: false, + collation: None, }, ], unique_sets: None, @@ -1406,6 +1424,7 @@ mod tests { notnull: false, default: None, unique: false, + collation: None, }], unique_sets: None, }; diff --git a/core/storage/btree.rs b/core/storage/btree.rs index a94c8c0f2..06c18a0cd 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -7,7 +7,7 @@ use crate::{ TableLeafCell, }, }, - translate::plan::IterationDirection, + translate::{collate::CollationSeq, plan::IterationDirection}, types::IndexKeySortOrder, MvCursor, }; @@ -407,6 +407,10 @@ pub struct BTreeCursor { context: Option, /// Store whether the Cursor is in a valid state. Meaning if it is pointing to a valid cell index or not valid_state: CursorValidState, + /// Colations for Index Btree constraint checks + /// Contains the Collation Seq for the whole Index + /// This Vec should be empty for Table Btree + collations: Vec, } impl BTreeCursor { @@ -414,6 +418,7 @@ impl BTreeCursor { mv_cursor: Option>>, pager: Rc, root_page: usize, + collations: Vec, ) -> Self { Self { mv_cursor, @@ -435,17 +440,27 @@ impl BTreeCursor { count: 0, context: None, valid_state: CursorValidState::Valid, + collations, } } + pub fn new_table( + mv_cursor: Option>>, + pager: Rc, + root_page: usize, + ) -> Self { + Self::new(mv_cursor, pager, root_page, Vec::new()) + } + pub fn new_index( mv_cursor: Option>>, pager: Rc, root_page: usize, index: &Index, + collations: Vec, ) -> Self { let index_key_sort_order = IndexKeySortOrder::from_index(index); - let mut cursor = Self::new(mv_cursor, pager, root_page); + let mut cursor = Self::new(mv_cursor, pager, root_page, collations); cursor.index_key_sort_order = index_key_sort_order; cursor } @@ -610,6 +625,7 @@ impl BTreeCursor { record_slice_same_num_cols, index_key.get_values(), self.index_key_sort_order, + &self.collations, ); order }; @@ -668,6 +684,7 @@ impl BTreeCursor { record_slice_same_num_cols, index_key.get_values(), self.index_key_sort_order, + &self.collations, ); order }; @@ -1248,6 +1265,7 @@ impl BTreeCursor { record_slice_same_num_cols, index_key.get_values(), self.index_key_sort_order, + &self.collations, ); order }; @@ -1308,6 +1326,7 @@ impl BTreeCursor { record_slice_same_num_cols, index_key.get_values(), self.index_key_sort_order, + &self.collations, ); order }; @@ -1588,6 +1607,7 @@ impl BTreeCursor { record_slice_equal_number_of_cols, index_key.get_values(), self.index_key_sort_order, + &self.collations, ); // in sqlite btrees left child pages have <= keys. // in general, in forwards iteration we want to find the first key that matches the seek condition. @@ -1912,6 +1932,7 @@ impl BTreeCursor { record_slice_equal_number_of_cols, key.get_values(), self.index_key_sort_order, + &self.collations, ); let found = match seek_op { SeekOp::GT => cmp.is_gt(), @@ -2079,6 +2100,7 @@ impl BTreeCursor { .unwrap() .get_values(), self.index_key_sort_order, + &self.collations, ) == Ordering::Equal { tracing::debug!("insert_into_page: found exact match with cell_idx={cell_idx}, overwriting"); @@ -3725,6 +3747,7 @@ impl BTreeCursor { key.to_index_key_values(), self.get_immutable_record().as_ref().unwrap().get_values(), self.index_key_sort_order, + &self.collations, ); match order { Ordering::Less | Ordering::Equal => { @@ -4755,6 +4778,10 @@ impl BTreeCursor { } } } + + pub fn collations(&self) -> &[CollationSeq] { + &self.collations + } } #[cfg(debug_assertions)] @@ -5945,7 +5972,7 @@ mod tests { } fn validate_btree(pager: Rc, page_idx: usize) -> (usize, bool) { - let cursor = BTreeCursor::new(None, pager.clone(), page_idx); + let cursor = BTreeCursor::new_table(None, pager.clone(), page_idx); let page = pager.read_page(page_idx).unwrap(); let page = page.get(); let contents = page.contents.as_ref().unwrap(); @@ -6035,7 +6062,7 @@ mod tests { } fn format_btree(pager: Rc, page_idx: usize, depth: usize) -> String { - let cursor = BTreeCursor::new(None, pager.clone(), page_idx); + let cursor = BTreeCursor::new_table(None, pager.clone(), page_idx); let page = pager.read_page(page_idx).unwrap(); let page = page.get(); let contents = page.contents.as_ref().unwrap(); @@ -6165,7 +6192,7 @@ mod tests { .as_slice(), ] { let (pager, root_page) = empty_btree(); - let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page); for (key, size) in sequence.iter() { run_until_done( || { @@ -6232,7 +6259,7 @@ mod tests { tracing::info!("super seed: {}", seed); for _ in 0..attempts { let (pager, root_page) = empty_btree(); - let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page); let mut keys = SortedVec::new(); tracing::info!("seed: {}", seed); for insert_id in 0..inserts { @@ -6339,7 +6366,7 @@ mod tests { let (pager, _) = empty_btree(); let index_root_page = pager.btree_create(&CreateBTreeFlags::new_index()); let index_root_page = index_root_page as usize; - let mut cursor = BTreeCursor::new(None, pager.clone(), index_root_page); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), index_root_page); let mut keys = SortedVec::new(); tracing::info!("seed: {}", seed); for _ in 0..inserts { @@ -6574,7 +6601,7 @@ mod tests { #[ignore] pub fn test_clear_overflow_pages() -> Result<()> { let (pager, db_header) = setup_test_env(5); - let mut cursor = BTreeCursor::new(None, pager.clone(), 1); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), 1); let max_local = payload_overflow_threshold_max(PageType::TableLeaf, 4096); let usable_size = cursor.usable_space(); @@ -6673,7 +6700,7 @@ mod tests { #[test] pub fn test_clear_overflow_pages_no_overflow() -> Result<()> { let (pager, db_header) = setup_test_env(5); - let mut cursor = BTreeCursor::new(None, pager.clone(), 1); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), 1); let small_payload = vec![b'A'; 10]; @@ -6717,7 +6744,7 @@ mod tests { fn test_btree_destroy() -> Result<()> { let initial_size = 3; let (pager, db_header) = setup_test_env(initial_size); - let mut cursor = BTreeCursor::new(None, pager.clone(), 2); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), 2); assert_eq!( db_header.lock().database_size, initial_size, @@ -7374,7 +7401,7 @@ mod tests { let (pager, root_page) = empty_btree(); let mut keys = Vec::new(); for i in 0..10000 { - let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page); tracing::info!("INSERT INTO t VALUES ({});", i,); let value = ImmutableRecord::from_registers(&[Register::Value(Value::Integer(i))]); tracing::trace!("before insert {}", i); @@ -7401,7 +7428,7 @@ mod tests { format_btree(pager.clone(), root_page, 0) ); for key in keys.iter() { - let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page); let key = Value::Integer(*key); let exists = run_until_done(|| cursor.exists(&key), pager.deref()).unwrap(); assert!(exists, "key not found {}", key); @@ -7450,7 +7477,7 @@ mod tests { // Insert 10,000 records in to the BTree. for i in 1..=10000 { - let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page); let value = ImmutableRecord::from_registers(&[Register::Value(Value::Text( Text::new("hello world"), ))]); @@ -7478,7 +7505,7 @@ mod tests { // Delete records with 500 <= key <= 3500 for i in 500..=3500 { - let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page); let seek_key = SeekKey::TableRowId(i as u64); let found = run_until_done(|| cursor.seek(seek_key.clone(), SeekOp::EQ), pager.deref()) @@ -7495,7 +7522,7 @@ mod tests { continue; } - let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page); let key = Value::Integer(i); let exists = run_until_done(|| cursor.exists(&key), pager.deref()).unwrap(); assert!(exists, "Key {} should exist but doesn't", i); @@ -7503,7 +7530,7 @@ mod tests { // Verify the deleted records don't exist. for i in 500..=3500 { - let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page); let key = Value::Integer(i); let exists = run_until_done(|| cursor.exists(&key), pager.deref()).unwrap(); assert!(!exists, "Deleted key {} still exists", i); @@ -7525,7 +7552,7 @@ mod tests { let (pager, root_page) = empty_btree(); for i in 0..iterations { - let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page); tracing::info!("INSERT INTO t VALUES ({});", i,); let value = ImmutableRecord::from_registers(&[Register::Value(Value::Text(Text { value: huge_texts[i].as_bytes().to_vec(), @@ -7554,7 +7581,7 @@ mod tests { format_btree(pager.clone(), root_page, 0) ); } - let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page); cursor.move_to_root(); for i in 0..iterations { let rowid = run_until_done(|| cursor.get_next_record(None), pager.deref()).unwrap(); @@ -7565,7 +7592,7 @@ mod tests { #[test] pub fn test_read_write_payload_with_offset() { let (pager, root_page) = empty_btree(); - let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); + let mut cursor = BTreeCursor::new(None, pager.clone(), root_page, vec![]); let offset = 2; // blobs data starts at offset 2 let initial_text = "hello world"; let initial_blob = initial_text.as_bytes().to_vec(); @@ -7641,7 +7668,7 @@ mod tests { #[test] pub fn test_read_write_payload_with_overflow_page() { let (pager, root_page) = empty_btree(); - let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); + let mut cursor = BTreeCursor::new(None, pager.clone(), root_page, vec![]); let mut large_blob = vec![b'A'; 40960 - 11]; // insert large blob. 40960 = 10 page long. let hello_world = b"hello world"; large_blob.extend_from_slice(hello_world); diff --git a/core/translate/collate.rs b/core/translate/collate.rs new file mode 100644 index 000000000..81c076330 --- /dev/null +++ b/core/translate/collate.rs @@ -0,0 +1,53 @@ +use std::{cmp::Ordering, str::FromStr as _}; + +use tracing::Level; + +// TODO: in the future allow user to define collation sequences +// Will have to meddle with ffi for this +#[derive( + Debug, Clone, Copy, Eq, PartialEq, strum_macros::Display, strum_macros::EnumString, Default, +)] +#[strum(ascii_case_insensitive)] +/// **Pre defined collation sequences**\ +/// Collating functions only matter when comparing string values. +/// Numeric values are always compared numerically, and BLOBs are always compared byte-by-byte using memcmp(). +pub enum CollationSeq { + /// Standard String compare + #[default] + Binary, + /// Ascii case insensitive + NoCase, + /// Same as Binary but with trimmed whitespace + Rtrim, +} + +impl CollationSeq { + pub fn new(collation: &str) -> crate::Result { + CollationSeq::from_str(collation).map_err(|_| { + crate::LimboError::ParseError(format!("no such collation sequence: {}", collation)) + }) + } + + pub fn compare_strings(&self, lhs: &str, rhs: &str) -> Ordering { + tracing::event!(Level::DEBUG, collate = %self, lhs, rhs); + match self { + CollationSeq::Binary => Self::binary_cmp(lhs, rhs), + CollationSeq::NoCase => Self::nocase_cmp(lhs, rhs), + CollationSeq::Rtrim => Self::rtrim_cmp(lhs, rhs), + } + } + + fn binary_cmp(lhs: &str, rhs: &str) -> Ordering { + lhs.cmp(rhs) + } + + fn nocase_cmp(lhs: &str, rhs: &str) -> Ordering { + let nocase_lhs = uncased::UncasedStr::new(lhs); + let nocase_rhs = uncased::UncasedStr::new(rhs); + nocase_lhs.cmp(nocase_rhs) + } + + fn rtrim_cmp(lhs: &str, rhs: &str) -> Ordering { + lhs.trim_end().cmp(rhs.trim_end()) + } +} diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 770695833..38a2410bb 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -275,7 +275,7 @@ pub fn emit_query<'a>( // Initialize cursors and other resources needed for query execution if let Some(ref mut order_by) = plan.order_by { - init_order_by(program, t_ctx, order_by)?; + init_order_by(program, t_ctx, order_by, &plan.table_references)?; } if let Some(ref group_by) = plan.group_by { @@ -914,6 +914,7 @@ fn emit_update_insns( rhs: idx_rowid_reg, target_pc: constraint_check, flags: CmpInsFlags::default(), // TODO: not sure what type of comparison flag is needed + collation: program.curr_collation(), }); program.emit_insn(Insn::Halt { @@ -943,6 +944,7 @@ fn emit_update_insns( rhs: beg, target_pc: record_label, flags: CmpInsFlags::default(), + collation: program.curr_collation(), }); program.emit_insn(Insn::NotExists { diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 8dae73fba..64c5a29ed 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -16,6 +16,8 @@ use crate::vdbe::{ }; use crate::{Result, Value}; +use super::collate::CollationSeq; + #[derive(Debug, Clone, Copy)] pub struct ConditionMetadata { pub jump_if_condition_is_true: bool, @@ -38,59 +40,6 @@ fn emit_cond_jump(program: &mut ProgramBuilder, cond_meta: ConditionMetadata, re }); } } -macro_rules! emit_cmp_insn { - ( - $program:expr, - $cond:expr, - $op_true:ident, - $op_false:ident, - $lhs:expr, - $rhs:expr - ) => {{ - if $cond.jump_if_condition_is_true { - $program.emit_insn(Insn::$op_true { - lhs: $lhs, - rhs: $rhs, - target_pc: $cond.jump_target_when_true, - flags: CmpInsFlags::default(), - }); - } else { - $program.emit_insn(Insn::$op_false { - lhs: $lhs, - rhs: $rhs, - target_pc: $cond.jump_target_when_false, - flags: CmpInsFlags::default().jump_if_null(), - }); - } - }}; -} - -macro_rules! emit_cmp_null_insn { - ( - $program:expr, - $cond:expr, - $op_true:ident, - $op_false:ident, - $lhs:expr, - $rhs:expr - ) => {{ - if $cond.jump_if_condition_is_true { - $program.emit_insn(Insn::$op_true { - lhs: $lhs, - rhs: $rhs, - target_pc: $cond.jump_target_when_true, - flags: CmpInsFlags::default().null_eq(), - }); - } else { - $program.emit_insn(Insn::$op_false { - lhs: $lhs, - rhs: $rhs, - target_pc: $cond.jump_target_when_false, - flags: CmpInsFlags::default().null_eq(), - }); - } - }}; -} macro_rules! expect_arguments_exact { ( @@ -240,51 +189,6 @@ pub fn translate_condition_expr( resolver, )?; } - ast::Expr::Binary(lhs, op, rhs) - if matches!( - op, - ast::Operator::Greater - | ast::Operator::GreaterEquals - | ast::Operator::Less - | ast::Operator::LessEquals - | ast::Operator::Equals - | ast::Operator::NotEquals - | ast::Operator::Is - | ast::Operator::IsNot - ) => - { - let lhs_reg = program.alloc_register(); - let rhs_reg = program.alloc_register(); - translate_expr(program, Some(referenced_tables), lhs, lhs_reg, resolver)?; - translate_expr(program, Some(referenced_tables), rhs, rhs_reg, resolver)?; - match op { - ast::Operator::Greater => { - emit_cmp_insn!(program, condition_metadata, Gt, Le, lhs_reg, rhs_reg) - } - ast::Operator::GreaterEquals => { - emit_cmp_insn!(program, condition_metadata, Ge, Lt, lhs_reg, rhs_reg) - } - ast::Operator::Less => { - emit_cmp_insn!(program, condition_metadata, Lt, Ge, lhs_reg, rhs_reg) - } - ast::Operator::LessEquals => { - emit_cmp_insn!(program, condition_metadata, Le, Gt, lhs_reg, rhs_reg) - } - ast::Operator::Equals => { - emit_cmp_insn!(program, condition_metadata, Eq, Ne, lhs_reg, rhs_reg) - } - ast::Operator::NotEquals => { - emit_cmp_insn!(program, condition_metadata, Ne, Eq, lhs_reg, rhs_reg) - } - ast::Operator::Is => { - emit_cmp_null_insn!(program, condition_metadata, Eq, Ne, lhs_reg, rhs_reg) - } - ast::Operator::IsNot => { - emit_cmp_null_insn!(program, condition_metadata, Ne, Eq, lhs_reg, rhs_reg) - } - _ => unreachable!(), - } - } ast::Expr::Binary(_, _, _) => { let result_reg = program.alloc_register(); translate_expr(program, Some(referenced_tables), expr, result_reg, resolver)?; @@ -370,6 +274,7 @@ pub fn translate_condition_expr( rhs: rhs_reg, target_pc: jump_target_when_true, flags: CmpInsFlags::default(), + collation: program.curr_collation(), }); } else { // If this is the last condition, we need to jump to the 'jump_target_when_false' label if there is no match. @@ -378,6 +283,7 @@ pub fn translate_condition_expr( rhs: rhs_reg, target_pc: condition_metadata.jump_target_when_false, flags: CmpInsFlags::default().jump_if_null(), + collation: program.curr_collation(), }); } } @@ -399,6 +305,7 @@ pub fn translate_condition_expr( rhs: rhs_reg, target_pc: condition_metadata.jump_target_when_false, flags: CmpInsFlags::default().jump_if_null(), + collation: program.curr_collation(), }); } // If we got here, then none of the conditions were a match, so we jump to the 'jump_target_when_true' label if 'jump_if_condition_is_true'. @@ -552,15 +459,54 @@ pub fn translate_expr( translate_expr(program, referenced_tables, e1, shared_reg, resolver)?; emit_binary_insn(program, op, shared_reg, shared_reg, target_register)?; + program.reset_collation(); Ok(target_register) } else { let e1_reg = program.alloc_registers(2); let e2_reg = e1_reg + 1; translate_expr(program, referenced_tables, e1, e1_reg, resolver)?; + let left_collation_ctx = program.curr_collation_ctx(); + program.reset_collation(); + translate_expr(program, referenced_tables, e2, e2_reg, resolver)?; + let right_collation_ctx = program.curr_collation_ctx(); + program.reset_collation(); + + /* + * The rules for determining which collating function to use for a binary comparison + * operator (=, <, >, <=, >=, !=, IS, and IS NOT) are as follows: + * + * 1. If either operand has an explicit collating function assignment using the postfix COLLATE operator, + * then the explicit collating function is used for comparison, + * with precedence to the collating function of the left operand. + * + * 2. If either operand is a column, then the collating function of that column is used + * with precedence to the left operand. For the purposes of the previous sentence, + * a column name preceded by one or more unary "+" operators and/or CAST operators is still considered a column name. + * + * 3. Otherwise, the BINARY collating function is used for comparison. + */ + let collation_ctx = { + match (left_collation_ctx, right_collation_ctx) { + (Some((c_left, true)), _) => Some((c_left, true)), + (_, Some((c_right, true))) => Some((c_right, true)), + (Some((c_left, from_collate_left)), None) => { + Some((c_left, from_collate_left)) + } + (None, Some((c_right, from_collate_right))) => { + Some((c_right, from_collate_right)) + } + (Some((c_left, from_collate_left)), Some((_, false))) => { + Some((c_left, from_collate_left)) + } + _ => None, + } + }; + program.set_collation(collation_ctx); emit_binary_insn(program, op, e1_reg, e2_reg, target_register)?; + program.reset_collation(); Ok(target_register) } } @@ -609,6 +555,7 @@ pub fn translate_expr( target_pc: next_case_label, // A NULL result is considered untrue when evaluating WHEN terms. flags: CmpInsFlags::default().jump_if_null(), + collation: program.curr_collation(), }), // CASE WHEN 0 THEN 0 ELSE 1 becomes ifnot 0 branch to next clause None => program.emit_insn(Insn::IfNot { @@ -679,7 +626,14 @@ pub fn translate_expr( }); Ok(target_register) } - ast::Expr::Collate(_, _) => todo!(), + ast::Expr::Collate(expr, collation) => { + // First translate inner expr, then set the curr collation. If we set curr collation before, + // it may be overwritten later by inner translate. + translate_expr(program, referenced_tables, expr, target_register, resolver)?; + let collation = CollationSeq::new(collation)?; + program.set_collation(Some((collation, true))); + Ok(target_register) + } ast::Expr::DoublyQualified(_, _, _) => todo!(), ast::Expr::Exists(_) => todo!(), ast::Expr::FunctionCall { @@ -1839,6 +1793,12 @@ pub fn translate_expr( let table_reference = referenced_tables.as_ref().unwrap().get(*table).unwrap(); let index = table_reference.op.index(); let use_covering_index = table_reference.utilizes_covering_index(); + + let Some(table_column) = table_reference.table.get_column_at(*column) else { + crate::bail_parse_error!("column index out of bounds"); + }; + // Counter intuitive but a column always needs to have a collation + program.set_collation(Some((table_column.collation.unwrap_or_default(), false))); match table_reference.op { // If we are reading a column from a table, we find the cursor that corresponds to // the table and read the column from the cursor. @@ -1889,10 +1849,7 @@ pub fn translate_expr( dest: target_register, }); } - let Some(column) = table_reference.table.get_column_at(*column) else { - crate::bail_parse_error!("column index out of bounds"); - }; - maybe_apply_affinity(column.ty, target_register, program); + maybe_apply_affinity(table_column.ty, target_register, program); Ok(target_register) } Table::Virtual(_) => { @@ -2216,6 +2173,7 @@ fn emit_binary_insn( rhs, target_pc: if_true_label, flags: CmpInsFlags::default(), + collation: program.curr_collation(), }, target_register, if_true_label, @@ -2232,6 +2190,7 @@ fn emit_binary_insn( rhs, target_pc: if_true_label, flags: CmpInsFlags::default(), + collation: program.curr_collation(), }, target_register, if_true_label, @@ -2248,6 +2207,7 @@ fn emit_binary_insn( rhs, target_pc: if_true_label, flags: CmpInsFlags::default(), + collation: program.curr_collation(), }, target_register, if_true_label, @@ -2264,6 +2224,7 @@ fn emit_binary_insn( rhs, target_pc: if_true_label, flags: CmpInsFlags::default(), + collation: program.curr_collation(), }, target_register, if_true_label, @@ -2280,6 +2241,7 @@ fn emit_binary_insn( rhs, target_pc: if_true_label, flags: CmpInsFlags::default(), + collation: program.curr_collation(), }, target_register, if_true_label, @@ -2296,6 +2258,7 @@ fn emit_binary_insn( rhs, target_pc: if_true_label, flags: CmpInsFlags::default(), + collation: program.curr_collation(), }, target_register, if_true_label, @@ -2389,6 +2352,7 @@ fn emit_binary_insn( rhs, target_pc: if_true_label, flags: CmpInsFlags::default().null_eq(), + collation: program.curr_collation(), }, target_register, if_true_label, @@ -2403,6 +2367,7 @@ fn emit_binary_insn( rhs, target_pc: if_true_label, flags: CmpInsFlags::default().null_eq(), + collation: program.curr_collation(), }, target_register, if_true_label, diff --git a/core/translate/group_by.rs b/core/translate/group_by.rs index 9bb10a57e..25afebd8e 100644 --- a/core/translate/group_by.rs +++ b/core/translate/group_by.rs @@ -5,6 +5,7 @@ use limbo_sqlite3_parser::ast; use crate::{ function::AggFunc, schema::{Column, PseudoTable}, + translate::collate::CollationSeq, util::exprs_are_equivalent, vdbe::{ builder::{CursorType, ProgramBuilder}, @@ -117,10 +118,39 @@ pub fn init_group_by( let row_source = if let Some(sort_order) = group_by.sort_order.as_ref() { let sort_cursor = program.alloc_cursor_id(None, CursorType::Sorter); let sorter_column_count = plan.group_by_sorter_column_count(); + // Should work the same way as Order By + /* + * Terms of the ORDER BY clause that is part of a SELECT statement may be assigned a collating sequence using the COLLATE operator, + * in which case the specified collating function is used for sorting. + * Otherwise, if the expression sorted by an ORDER BY clause is a column, + * then the collating sequence of the column is used to determine sort order. + * If the expression is not a column and has no COLLATE clause, then the BINARY collating sequence is used. + */ + let collations = group_by + .exprs + .iter() + .map(|expr| match expr { + ast::Expr::Collate(_, collation_name) => { + CollationSeq::new(collation_name).map(Some) + } + ast::Expr::Column { table, column, .. } => { + let table_reference = plan.table_references.get(*table).unwrap(); + + let Some(table_column) = table_reference.table.get_column_at(*column) else { + crate::bail_parse_error!("column index out of bounds"); + }; + + Ok(table_column.collation) + } + _ => Ok(Some(CollationSeq::default())), + }) + .collect::>>()?; + program.emit_insn(Insn::SorterOpen { cursor_id: sort_cursor, columns: sorter_column_count, order: sort_order.clone(), + collations, }); let pseudo_cursor = group_by_create_pseudo_table(program, sorter_column_count); GroupByRowSource::Sorter { @@ -217,6 +247,7 @@ pub fn group_by_create_pseudo_table( notnull: false, default: None, unique: false, + collation: None, }) .collect::>(); @@ -461,6 +492,7 @@ pub fn group_by_process_single_group( start_reg_a: registers.reg_group_exprs_cmp, start_reg_b: groups_start_reg, count: group_by.exprs.len(), + collation: program.curr_collation(), }); program.add_comment( diff --git a/core/translate/index.rs b/core/translate/index.rs index 30e96f897..4f9838204 100644 --- a/core/translate/index.rs +++ b/core/translate/index.rs @@ -120,6 +120,7 @@ pub fn translate_create_index( cursor_id: sorter_cursor_id, columns: columns.len(), order, + collations: tbl.column_collations(), }); let content_reg = program.alloc_register(); program.emit_insn(Insn::OpenPseudo { @@ -405,6 +406,7 @@ pub fn translate_drop_index( rhs: dest_reg, target_pc: next_label, flags: CmpInsFlags::default(), + collation: program.curr_collation(), }); // read type of table @@ -420,6 +422,7 @@ pub fn translate_drop_index( rhs: dest_reg, target_pc: next_label, flags: CmpInsFlags::default(), + collation: program.curr_collation(), }); program.emit_insn(Insn::RowId { diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 17f439450..59168509e 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -1151,24 +1151,28 @@ fn emit_seek_termination( rhs: start_reg, target_pc: loop_end, flags: CmpInsFlags::default(), + collation: program.curr_collation(), }), (false, SeekOp::GT) => program.emit_insn(Insn::Gt { lhs: rowid_reg.unwrap(), rhs: start_reg, target_pc: loop_end, flags: CmpInsFlags::default(), + collation: program.curr_collation(), }), (false, SeekOp::LE) => program.emit_insn(Insn::Le { lhs: rowid_reg.unwrap(), rhs: start_reg, target_pc: loop_end, flags: CmpInsFlags::default(), + collation: program.curr_collation(), }), (false, SeekOp::LT) => program.emit_insn(Insn::Lt { lhs: rowid_reg.unwrap(), rhs: start_reg, target_pc: loop_end, flags: CmpInsFlags::default(), + collation: program.curr_collation(), }), (_, SeekOp::EQ) => { panic!("An index termination condition is never EQ") diff --git a/core/translate/mod.rs b/core/translate/mod.rs index fa0b6d343..073df226c 100644 --- a/core/translate/mod.rs +++ b/core/translate/mod.rs @@ -8,6 +8,7 @@ //! will read rows from the database and filter them according to a WHERE clause. pub(crate) mod aggregation; +pub(crate) mod collate; pub(crate) mod delete; pub(crate) mod emitter; pub(crate) mod expr; diff --git a/core/translate/optimizer/join.rs b/core/translate/optimizer/join.rs index c10fcc4fa..3a2679b7c 100644 --- a/core/translate/optimizer/join.rs +++ b/core/translate/optimizer/join.rs @@ -1205,6 +1205,7 @@ mod tests { notnull: false, default: None, unique: false, + collation: None, } } fn _create_column_of_type(name: &str, ty: Type) -> Column { diff --git a/core/translate/order_by.rs b/core/translate/order_by.rs index 488a8cd82..f1c6a0b13 100644 --- a/core/translate/order_by.rs +++ b/core/translate/order_by.rs @@ -4,6 +4,7 @@ use limbo_sqlite3_parser::ast::{self, SortOrder}; use crate::{ schema::{Column, PseudoTable}, + translate::collate::CollationSeq, util::exprs_are_equivalent, vdbe::{ builder::{CursorType, ProgramBuilder}, @@ -15,7 +16,7 @@ use crate::{ use super::{ emitter::{Resolver, TranslateCtx}, expr::translate_expr, - plan::{ResultSetColumn, SelectPlan}, + plan::{ResultSetColumn, SelectPlan, TableReference}, result_row::{emit_offset, emit_result_row_and_limit}, }; @@ -33,16 +34,42 @@ pub fn init_order_by( program: &mut ProgramBuilder, t_ctx: &mut TranslateCtx, order_by: &[(ast::Expr, SortOrder)], + referenced_tables: &[TableReference], ) -> Result<()> { let sort_cursor = program.alloc_cursor_id(None, CursorType::Sorter); t_ctx.meta_sort = Some(SortMetadata { sort_cursor, reg_sorter_data: program.alloc_register(), }); + + /* + * Terms of the ORDER BY clause that is part of a SELECT statement may be assigned a collating sequence using the COLLATE operator, + * in which case the specified collating function is used for sorting. + * Otherwise, if the expression sorted by an ORDER BY clause is a column, + * then the collating sequence of the column is used to determine sort order. + * If the expression is not a column and has no COLLATE clause, then the BINARY collating sequence is used. + */ + let collations = order_by + .iter() + .map(|(expr, _)| match expr { + ast::Expr::Collate(_, collation_name) => CollationSeq::new(collation_name).map(Some), + ast::Expr::Column { table, column, .. } => { + let table_reference = referenced_tables.get(*table).unwrap(); + + let Some(table_column) = table_reference.table.get_column_at(*column) else { + crate::bail_parse_error!("column index out of bounds"); + }; + + Ok(table_column.collation) + } + _ => Ok(Some(CollationSeq::default())), + }) + .collect::>>()?; program.emit_insn(Insn::SorterOpen { cursor_id: sort_cursor, columns: order_by.len(), order: order_by.iter().map(|(_, direction)| *direction).collect(), + collations, }); Ok(()) } @@ -73,6 +100,7 @@ pub fn emit_order_by( notnull: false, default: None, unique: false, + collation: None, }); } for i in 0..result_columns.len() { @@ -92,6 +120,7 @@ pub fn emit_order_by( notnull: false, default: None, unique: false, + collation: None, }); } diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 58efbe7a2..48c768f1d 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -610,6 +610,7 @@ impl TableReference { notnull: false, default: None, unique: false, + collation: None, }) .collect(), ))); diff --git a/core/translate/schema.rs b/core/translate/schema.rs index 1f95f89d5..3f5fb47b7 100644 --- a/core/translate/schema.rs +++ b/core/translate/schema.rs @@ -719,6 +719,7 @@ pub fn translate_drop_table( rhs: table_reg, target_pc: next_label, flags: CmpInsFlags::default(), + collation: program.curr_collation(), }); program.emit_insn(Insn::Column { cursor_id: sqlite_schema_cursor_id, @@ -730,6 +731,7 @@ pub fn translate_drop_table( rhs: table_type, target_pc: next_label, flags: CmpInsFlags::default(), + collation: program.curr_collation(), }); program.emit_insn(Insn::RowId { cursor_id: sqlite_schema_cursor_id, diff --git a/core/types.rs b/core/types.rs index a277d3c80..5021a09cb 100644 --- a/core/types.rs +++ b/core/types.rs @@ -7,6 +7,7 @@ use crate::pseudo::PseudoCursor; use crate::schema::Index; use crate::storage::btree::BTreeCursor; use crate::storage::sqlite3_ondisk::write_varint; +use crate::translate::collate::CollationSeq; use crate::translate::plan::IterationDirection; use crate::vdbe::sorter::Sorter; use crate::vdbe::{Register, VTabOpaqueCursor}; @@ -1103,11 +1104,18 @@ pub fn compare_immutable( l: &[RefValue], r: &[RefValue], index_key_sort_order: IndexKeySortOrder, + collations: &[CollationSeq], ) -> std::cmp::Ordering { assert_eq!(l.len(), r.len()); for (i, (l, r)) in l.iter().zip(r).enumerate() { let column_order = index_key_sort_order.get_sort_order_for_col(i); - let cmp = l.partial_cmp(r).unwrap(); + let collation = collations.get(i).copied().unwrap_or_default(); + let cmp = match (l, r) { + (RefValue::Text(left), RefValue::Text(right)) => { + collation.compare_strings(left.as_str(), right.as_str()) + } + _ => l.partial_cmp(r).unwrap(), + }; if !cmp.is_eq() { return match column_order { SortOrder::Asc => cmp, diff --git a/core/util.rs b/core/util.rs index 53e74fabe..40a2c0902 100644 --- a/core/util.rs +++ b/core/util.rs @@ -1,6 +1,7 @@ use crate::{ function::Func, schema::{self, Column, Schema, Type}, + translate::collate::CollationSeq, types::{Value, ValueType}, LimboError, OpenFlags, Result, Statement, StepResult, SymbolTable, IO, }; @@ -499,66 +500,83 @@ pub fn columns_from_create_table_body(body: &ast::CreateTableBody) -> crate::Res return None; } } - let column = Column { - name: Some(name.0.clone()), - ty: match column_def.col_type { - Some(ref data_type) => { - // https://www.sqlite.org/datatype3.html - let type_name = data_type.name.as_str().to_uppercase(); - if type_name.contains("INT") { - Type::Integer - } else if type_name.contains("CHAR") - || type_name.contains("CLOB") - || type_name.contains("TEXT") - { - Type::Text - } else if type_name.contains("BLOB") || type_name.is_empty() { - Type::Blob - } else if type_name.contains("REAL") - || type_name.contains("FLOA") - || type_name.contains("DOUB") - { - Type::Real - } else { - Type::Numeric + let column = + Column { + name: Some(name.0.clone()), + ty: match column_def.col_type { + Some(ref data_type) => { + // https://www.sqlite.org/datatype3.html + let type_name = data_type.name.as_str().to_uppercase(); + if type_name.contains("INT") { + Type::Integer + } else if type_name.contains("CHAR") + || type_name.contains("CLOB") + || type_name.contains("TEXT") + { + Type::Text + } else if type_name.contains("BLOB") || type_name.is_empty() { + Type::Blob + } else if type_name.contains("REAL") + || type_name.contains("FLOA") + || type_name.contains("DOUB") + { + Type::Real + } else { + Type::Numeric + } } - } - None => Type::Null, - }, - default: column_def - .constraints - .iter() - .find_map(|c| match &c.constraint { - limbo_sqlite3_parser::ast::ColumnConstraint::Default(val) => { - Some(val.clone()) - } - _ => None, + None => Type::Null, + }, + default: column_def + .constraints + .iter() + .find_map(|c| match &c.constraint { + limbo_sqlite3_parser::ast::ColumnConstraint::Default(val) => { + Some(val.clone()) + } + _ => None, + }), + notnull: column_def.constraints.iter().any(|c| { + matches!( + c.constraint, + limbo_sqlite3_parser::ast::ColumnConstraint::NotNull { .. } + ) }), - notnull: column_def.constraints.iter().any(|c| { - matches!( - c.constraint, - limbo_sqlite3_parser::ast::ColumnConstraint::NotNull { .. } - ) - }), - ty_str: column_def - .col_type - .clone() - .map(|t| t.name.to_string()) - .unwrap_or_default(), - primary_key: column_def.constraints.iter().any(|c| { - matches!( - c.constraint, - limbo_sqlite3_parser::ast::ColumnConstraint::PrimaryKey { .. } - ) - }), - is_rowid_alias: false, - unique: column_def.constraints.iter().any(|c| { - matches!( - c.constraint, - limbo_sqlite3_parser::ast::ColumnConstraint::Unique(..) - ) - }), - }; + ty_str: column_def + .col_type + .clone() + .map(|t| t.name.to_string()) + .unwrap_or_default(), + primary_key: column_def.constraints.iter().any(|c| { + matches!( + c.constraint, + limbo_sqlite3_parser::ast::ColumnConstraint::PrimaryKey { .. } + ) + }), + is_rowid_alias: false, + unique: column_def.constraints.iter().any(|c| { + matches!( + c.constraint, + limbo_sqlite3_parser::ast::ColumnConstraint::Unique(..) + ) + }), + collation: column_def + .constraints + .iter() + .find_map(|c| match &c.constraint { + // TODO: see if this should be the correct behavior + // currently there cannot be any user defined collation sequences. + // But in the future, when a user defines a collation sequence, creates a table with it, + // then closes the db and opens it again. This may panic here if the collation seq is not registered + // before reading the columns + limbo_sqlite3_parser::ast::ColumnConstraint::Collate { + collation_name, + } => Some(CollationSeq::new(collation_name.0.as_str()).expect( + "collation should have been set correctly in create table", + )), + _ => None, + }), + }; Some(column) }) .collect::>()) diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index 6be6fdad8..0d8dea8d0 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -12,7 +12,10 @@ use crate::{ parameters::Parameters, schema::{BTreeTable, Index, PseudoTable}, storage::sqlite3_ondisk::DatabaseHeader, - translate::plan::{ResultSetColumn, TableReference}, + translate::{ + collate::CollationSeq, + plan::{ResultSetColumn, TableReference}, + }, Connection, VirtualTable, }; @@ -38,6 +41,8 @@ pub struct ProgramBuilder { pub parameters: Parameters, pub result_columns: Vec, pub table_references: Vec, + /// Curr collation sequence. Bool indicates whether it was set by a COLLATE expr + collation: Option<(CollationSeq, bool)>, } #[derive(Debug, Clone)] @@ -95,6 +100,7 @@ impl ProgramBuilder { parameters: Parameters::new(), result_columns: Vec::new(), table_references: Vec::new(), + collation: None, } } @@ -589,6 +595,22 @@ impl ProgramBuilder { .unwrap_or_else(|| panic!("Cursor not found: {}", table_identifier)) } + pub fn set_collation(&mut self, c: Option<(CollationSeq, bool)>) { + self.collation = c + } + + pub fn curr_collation_ctx(&self) -> Option<(CollationSeq, bool)> { + self.collation + } + + pub fn curr_collation(&self) -> Option { + self.collation.map(|c| c.0) + } + + pub fn reset_collation(&mut self) { + self.collation = None; + } + pub fn build( mut self, database_header: Arc>, diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 736ea260d..c3e7f328c 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -365,6 +365,7 @@ pub fn op_compare( start_reg_a, start_reg_b, count, + collation, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -372,6 +373,7 @@ pub fn op_compare( let start_reg_a = *start_reg_a; let start_reg_b = *start_reg_b; let count = *count; + let collation = collation.unwrap_or_default(); if start_reg_a + count > start_reg_b { return Err(LimboError::InternalError( @@ -383,7 +385,12 @@ pub fn op_compare( for i in 0..count { let a = state.registers[start_reg_a + i].get_owned_value(); let b = state.registers[start_reg_b + i].get_owned_value(); - cmp = Some(a.cmp(b)); + cmp = match (a, b) { + (Value::Text(left), Value::Text(right)) => { + Some(collation.compare_strings(left.as_str(), right.as_str())) + } + _ => Some(a.cmp(b)), + }; if cmp != Some(std::cmp::Ordering::Equal) { break; } @@ -525,6 +532,7 @@ pub fn op_eq( rhs, target_pc, flags, + collation, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -536,9 +544,10 @@ pub fn op_eq( let cond = *state.registers[lhs].get_owned_value() == *state.registers[rhs].get_owned_value(); let nulleq = flags.has_nulleq(); let jump_if_null = flags.has_jump_if_null(); + let collation = collation.unwrap_or_default(); match ( - &state.registers[lhs].get_owned_value(), - &state.registers[rhs].get_owned_value(), + state.registers[lhs].get_owned_value(), + state.registers[rhs].get_owned_value(), ) { (_, Value::Null) | (Value::Null, _) => { if (nulleq && cond) || (!nulleq && jump_if_null) { @@ -547,8 +556,16 @@ pub fn op_eq( state.pc += 1; } } - _ => { - if *state.registers[lhs].get_owned_value() == *state.registers[rhs].get_owned_value() { + (Value::Text(lhs), Value::Text(rhs)) => { + let order = collation.compare_strings(lhs.as_str(), rhs.as_str()); + if order.is_eq() { + state.pc = target_pc.to_offset_int(); + } else { + state.pc += 1; + } + } + (lhs, rhs) => { + if *lhs == *rhs { state.pc = target_pc.to_offset_int(); } else { state.pc += 1; @@ -570,6 +587,7 @@ pub fn op_ne( rhs, target_pc, flags, + collation, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -581,9 +599,10 @@ pub fn op_ne( let cond = *state.registers[lhs].get_owned_value() != *state.registers[rhs].get_owned_value(); let nulleq = flags.has_nulleq(); let jump_if_null = flags.has_jump_if_null(); + let collation = collation.unwrap_or_default(); match ( - &state.registers[lhs].get_owned_value(), - &state.registers[rhs].get_owned_value(), + state.registers[lhs].get_owned_value(), + state.registers[rhs].get_owned_value(), ) { (_, Value::Null) | (Value::Null, _) => { if (nulleq && cond) || (!nulleq && jump_if_null) { @@ -592,8 +611,16 @@ pub fn op_ne( state.pc += 1; } } - _ => { - if *state.registers[lhs].get_owned_value() != *state.registers[rhs].get_owned_value() { + (Value::Text(lhs), Value::Text(rhs)) => { + let order = collation.compare_strings(lhs.as_str(), rhs.as_str()); + if order.is_ne() { + state.pc = target_pc.to_offset_int(); + } else { + state.pc += 1; + } + } + (lhs, rhs) => { + if *lhs != *rhs { state.pc = target_pc.to_offset_int(); } else { state.pc += 1; @@ -615,6 +642,7 @@ pub fn op_lt( rhs, target_pc, flags, + collation, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -624,9 +652,10 @@ pub fn op_lt( let rhs = *rhs; let target_pc = *target_pc; let jump_if_null = flags.has_jump_if_null(); + let collation = collation.unwrap_or_default(); match ( - &state.registers[lhs].get_owned_value(), - &state.registers[rhs].get_owned_value(), + state.registers[lhs].get_owned_value(), + state.registers[rhs].get_owned_value(), ) { (_, Value::Null) | (Value::Null, _) => { if jump_if_null { @@ -635,8 +664,16 @@ pub fn op_lt( state.pc += 1; } } - _ => { - if *state.registers[lhs].get_owned_value() < *state.registers[rhs].get_owned_value() { + (Value::Text(lhs), Value::Text(rhs)) => { + let order = collation.compare_strings(lhs.as_str(), rhs.as_str()); + if order.is_lt() { + state.pc = target_pc.to_offset_int(); + } else { + state.pc += 1; + } + } + (lhs, rhs) => { + if *lhs < *rhs { state.pc = target_pc.to_offset_int(); } else { state.pc += 1; @@ -658,6 +695,7 @@ pub fn op_le( rhs, target_pc, flags, + collation, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -667,9 +705,10 @@ pub fn op_le( let rhs = *rhs; let target_pc = *target_pc; let jump_if_null = flags.has_jump_if_null(); + let collation = collation.unwrap_or_default(); match ( - &state.registers[lhs].get_owned_value(), - &state.registers[rhs].get_owned_value(), + state.registers[lhs].get_owned_value(), + state.registers[rhs].get_owned_value(), ) { (_, Value::Null) | (Value::Null, _) => { if jump_if_null { @@ -678,8 +717,16 @@ pub fn op_le( state.pc += 1; } } - _ => { - if *state.registers[lhs].get_owned_value() <= *state.registers[rhs].get_owned_value() { + (Value::Text(lhs), Value::Text(rhs)) => { + let order = collation.compare_strings(lhs.as_str(), rhs.as_str()); + if order.is_le() { + state.pc = target_pc.to_offset_int(); + } else { + state.pc += 1; + } + } + (lhs, rhs) => { + if *lhs <= *rhs { state.pc = target_pc.to_offset_int(); } else { state.pc += 1; @@ -701,6 +748,7 @@ pub fn op_gt( rhs, target_pc, flags, + collation, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -710,9 +758,10 @@ pub fn op_gt( let rhs = *rhs; let target_pc = *target_pc; let jump_if_null = flags.has_jump_if_null(); + let collation = collation.unwrap_or_default(); match ( - &state.registers[lhs].get_owned_value(), - &state.registers[rhs].get_owned_value(), + state.registers[lhs].get_owned_value(), + state.registers[rhs].get_owned_value(), ) { (_, Value::Null) | (Value::Null, _) => { if jump_if_null { @@ -721,8 +770,16 @@ pub fn op_gt( state.pc += 1; } } - _ => { - if *state.registers[lhs].get_owned_value() > *state.registers[rhs].get_owned_value() { + (Value::Text(lhs), Value::Text(rhs)) => { + let order = collation.compare_strings(lhs.as_str(), rhs.as_str()); + if order.is_gt() { + state.pc = target_pc.to_offset_int(); + } else { + state.pc += 1; + } + } + (lhs, rhs) => { + if *lhs > *rhs { state.pc = target_pc.to_offset_int(); } else { state.pc += 1; @@ -744,6 +801,7 @@ pub fn op_ge( rhs, target_pc, flags, + collation, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -753,9 +811,10 @@ pub fn op_ge( let rhs = *rhs; let target_pc = *target_pc; let jump_if_null = flags.has_jump_if_null(); + let collation = collation.unwrap_or_default(); match ( - &state.registers[lhs].get_owned_value(), - &state.registers[rhs].get_owned_value(), + state.registers[lhs].get_owned_value(), + state.registers[rhs].get_owned_value(), ) { (_, Value::Null) | (Value::Null, _) => { if jump_if_null { @@ -764,8 +823,16 @@ pub fn op_ge( state.pc += 1; } } - _ => { - if *state.registers[lhs].get_owned_value() >= *state.registers[rhs].get_owned_value() { + (Value::Text(lhs), Value::Text(rhs)) => { + let order = collation.compare_strings(lhs.as_str(), rhs.as_str()); + if order.is_ge() { + state.pc = target_pc.to_offset_int(); + } else { + state.pc += 1; + } + } + (lhs, rhs) => { + if *lhs >= *rhs { state.pc = target_pc.to_offset_int(); } else { state.pc += 1; @@ -860,15 +927,39 @@ pub fn op_open_read( let mut cursors = state.cursors.borrow_mut(); match cursor_type { CursorType::BTreeTable(_) => { - let cursor = BTreeCursor::new(mv_cursor, pager.clone(), *root_page); + let cursor = BTreeCursor::new_table(mv_cursor, pager.clone(), *root_page); cursors .get_mut(*cursor_id) .unwrap() .replace(Cursor::new_btree(cursor)); } CursorType::BTreeIndex(index) => { - let cursor = - BTreeCursor::new_index(mv_cursor, pager.clone(), *root_page, index.as_ref()); + let conn = program.connection.upgrade().unwrap(); + let schema = conn.schema.try_read().ok_or(LimboError::SchemaLocked)?; + let table = schema + .get_table(&index.table_name) + .map_or(None, |table| table.btree()); + let collations = table.map_or(Vec::new(), |table| { + index + .columns + .iter() + .map(|c| { + table + .columns + .get(c.pos_in_table) + .unwrap() + .collation + .unwrap_or_default() + }) + .collect() + }); + let cursor = BTreeCursor::new_index( + mv_cursor, + pager.clone(), + *root_page, + index.as_ref(), + collations, + ); cursors .get_mut(*cursor_id) .unwrap() @@ -2088,7 +2179,12 @@ pub fn op_idx_ge( let idx_values = idx_record.get_values(); let idx_values = &idx_values[..record_from_regs.len()]; let record_values = record_from_regs.get_values(); - let ord = compare_immutable(&idx_values, &record_values, cursor.index_key_sort_order); + let ord = compare_immutable( + &idx_values, + &record_values, + cursor.index_key_sort_order, + cursor.collations(), + ); if ord.is_ge() { target_pc.to_offset_int() } else { @@ -2147,7 +2243,12 @@ pub fn op_idx_le( let idx_values = idx_record.get_values(); let idx_values = &idx_values[..record_from_regs.len()]; let record_values = record_from_regs.get_values(); - let ord = compare_immutable(&idx_values, &record_values, cursor.index_key_sort_order); + let ord = compare_immutable( + &idx_values, + &record_values, + cursor.index_key_sort_order, + cursor.collations(), + ); if ord.is_le() { target_pc.to_offset_int() } else { @@ -2188,7 +2289,12 @@ pub fn op_idx_gt( let idx_values = idx_record.get_values(); let idx_values = &idx_values[..record_from_regs.len()]; let record_values = record_from_regs.get_values(); - let ord = compare_immutable(&idx_values, &record_values, cursor.index_key_sort_order); + let ord = compare_immutable( + &idx_values, + &record_values, + cursor.index_key_sort_order, + cursor.collations(), + ); if ord.is_gt() { target_pc.to_offset_int() } else { @@ -2229,7 +2335,12 @@ pub fn op_idx_lt( let idx_values = idx_record.get_values(); let idx_values = &idx_values[..record_from_regs.len()]; let record_values = record_from_regs.get_values(); - let ord = compare_immutable(&idx_values, &record_values, cursor.index_key_sort_order); + let ord = compare_immutable( + &idx_values, + &record_values, + cursor.index_key_sort_order, + cursor.collations(), + ); if ord.is_lt() { target_pc.to_offset_int() } else { @@ -2698,11 +2809,18 @@ pub fn op_sorter_open( cursor_id, columns: _, order, + collations, } = insn else { unreachable!("unexpected Insn {:?}", insn) }; - let cursor = Sorter::new(order); + let cursor = Sorter::new( + order, + collations + .iter() + .map(|collation| collation.unwrap_or_default()) + .collect(), + ); let mut cursors = state.cursors.borrow_mut(); cursors .get_mut(*cursor_id) @@ -4128,14 +4246,38 @@ pub fn op_open_write( None => None, }; if let Some(index) = maybe_index { - let cursor = - BTreeCursor::new_index(mv_cursor, pager.clone(), root_page as usize, index.as_ref()); + let conn = program.connection.upgrade().unwrap(); + let schema = conn.schema.try_read().ok_or(LimboError::SchemaLocked)?; + let table = schema + .get_table(&index.table_name) + .map_or(None, |table| table.btree()); + let collations = table.map_or(Vec::new(), |table| { + index + .columns + .iter() + .map(|c| { + table + .columns + .get(c.pos_in_table) + .unwrap() + .collation + .unwrap_or_default() + }) + .collect() + }); + let cursor = BTreeCursor::new_index( + mv_cursor, + pager.clone(), + root_page as usize, + index.as_ref(), + collations, + ); cursors .get_mut(*cursor_id) .unwrap() .replace(Cursor::new_btree(cursor)); } else { - let cursor = BTreeCursor::new(mv_cursor, pager.clone(), root_page as usize); + let cursor = BTreeCursor::new_table(mv_cursor, pager.clone(), root_page as usize); cursors .get_mut(*cursor_id) .unwrap() @@ -4205,7 +4347,8 @@ pub fn op_destroy( if *is_temp == 1 { todo!("temp databases not implemented yet."); } - let mut cursor = BTreeCursor::new(None, pager.clone(), *root); + // TODO not sure if should be BTreeCursor::new_table or BTreeCursor::new_index here or neither and just pass an emtpy vec + let mut cursor = BTreeCursor::new(None, pager.clone(), *root, Vec::new()); cursor.btree_destroy()?; state.pc += 1; Ok(InsnFunctionStepResult::Step) @@ -4579,7 +4722,7 @@ pub fn op_open_ephemeral( } None => None, }; - let mut cursor = BTreeCursor::new(mv_cursor, pager, root_page as usize); + let mut cursor = BTreeCursor::new_table(mv_cursor, pager, root_page as usize); cursor.rewind()?; // Will never return io let mut cursors: std::cell::RefMut<'_, Vec>> = state.cursors.borrow_mut(); diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index 2ee29e178..e899e5a9b 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -141,12 +141,13 @@ pub fn insn_to_str( start_reg_a, start_reg_b, count, + collation, } => ( "Compare", *start_reg_a as i32, *start_reg_b as i32, *count as i32, - Value::build_text(""), + Value::build_text(&format!("k({count}, {})", collation.unwrap_or_default())), 0, format!( "r[{}..{}]==r[{}..{}]", @@ -211,13 +212,14 @@ pub fn insn_to_str( lhs, rhs, target_pc, + collation, .. } => ( "Eq", *lhs as i32, *rhs as i32, target_pc.to_debug_int(), - Value::build_text(""), + Value::build_text(&collation.map_or("".to_string(), |c| c.to_string())), 0, format!( "if r[{}]==r[{}] goto {}", @@ -230,13 +232,14 @@ pub fn insn_to_str( lhs, rhs, target_pc, + collation, .. } => ( "Ne", *lhs as i32, *rhs as i32, target_pc.to_debug_int(), - Value::build_text(""), + Value::build_text(&collation.map_or("".to_string(), |c| c.to_string())), 0, format!( "if r[{}]!=r[{}] goto {}", @@ -249,13 +252,14 @@ pub fn insn_to_str( lhs, rhs, target_pc, + collation, .. } => ( "Lt", *lhs as i32, *rhs as i32, target_pc.to_debug_int(), - Value::build_text(""), + Value::build_text(&collation.map_or("".to_string(), |c| c.to_string())), 0, format!("if r[{}] ( "Le", *lhs as i32, *rhs as i32, target_pc.to_debug_int(), - Value::build_text(""), + Value::build_text(&collation.map_or("".to_string(), |c| c.to_string())), 0, format!( "if r[{}]<=r[{}] goto {}", @@ -282,13 +287,14 @@ pub fn insn_to_str( lhs, rhs, target_pc, + collation, .. } => ( "Gt", *lhs as i32, *rhs as i32, target_pc.to_debug_int(), - Value::build_text(""), + Value::build_text(&collation.map_or("".to_string(), |c| c.to_string())), 0, format!("if r[{}]>r[{}] goto {}", lhs, rhs, target_pc.to_debug_int()), ), @@ -296,13 +302,14 @@ pub fn insn_to_str( lhs, rhs, target_pc, + collation, .. } => ( "Ge", *lhs as i32, *rhs as i32, target_pc.to_debug_int(), - Value::build_text(""), + Value::build_text(&collation.map_or("".to_string(), |c| c.to_string())), 0, format!( "if r[{}]>=r[{}] goto {}", @@ -879,13 +886,22 @@ pub fn insn_to_str( cursor_id, columns, order, + collations, } => { let _p4 = String::new(); let to_print: Vec = order .iter() - .map(|v| match v { - SortOrder::Asc => "B".to_string(), - SortOrder::Desc => "-B".to_string(), + .zip(collations.iter()) + .map(|(v, collation)| { + let sign = match v { + SortOrder::Asc => "", + SortOrder::Desc => "-", + }; + if collation.is_some() { + format!("{sign}{}", collation.unwrap()) + } else { + format!("{sign}B") + } }) .collect(); ( diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 876ab7930..7cac9f202 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -8,6 +8,7 @@ use super::{execute, AggFunc, BranchOffset, CursorID, FuncCtx, InsnFunction, Pag use crate::{ schema::{BTreeTable, Index}, storage::{pager::CreateBTreeFlags, wal::CheckpointMode}, + translate::collate::CollationSeq, }; use limbo_macros::Description; use limbo_sqlite3_parser::ast::SortOrder; @@ -152,6 +153,7 @@ pub enum Insn { start_reg_a: usize, start_reg_b: usize, count: usize, + collation: Option, }, /// Place the result of rhs bitwise AND lhs in third register. BitAnd { @@ -218,6 +220,7 @@ pub enum Insn { /// Without the jump_if_null flag it would not jump because the logical comparison "id != NULL" is never true. /// This flag indicates that if either is null we should still jump. flags: CmpInsFlags, + collation: Option, }, /// Compare two registers and jump to the given PC if they are not equal. Ne { @@ -228,6 +231,7 @@ pub enum Insn { /// /// jump_if_null jumps if either of the operands is null. Used for "jump when false" logic. flags: CmpInsFlags, + collation: Option, }, /// Compare two registers and jump to the given PC if the left-hand side is less than the right-hand side. Lt { @@ -236,6 +240,7 @@ pub enum Insn { target_pc: BranchOffset, /// jump_if_null: Jump if either of the operands is null. Used for "jump when false" logic. flags: CmpInsFlags, + collation: Option, }, // Compare two registers and jump to the given PC if the left-hand side is less than or equal to the right-hand side. Le { @@ -244,6 +249,7 @@ pub enum Insn { target_pc: BranchOffset, /// jump_if_null: Jump if either of the operands is null. Used for "jump when false" logic. flags: CmpInsFlags, + collation: Option, }, /// Compare two registers and jump to the given PC if the left-hand side is greater than the right-hand side. Gt { @@ -252,6 +258,7 @@ pub enum Insn { target_pc: BranchOffset, /// jump_if_null: Jump if either of the operands is null. Used for "jump when false" logic. flags: CmpInsFlags, + collation: Option, }, /// Compare two registers and jump to the given PC if the left-hand side is greater than or equal to the right-hand side. Ge { @@ -260,6 +267,7 @@ pub enum Insn { target_pc: BranchOffset, /// jump_if_null: Jump if either of the operands is null. Used for "jump when false" logic. flags: CmpInsFlags, + collation: Option, }, /// Jump to target_pc if r\[reg\] != 0 or (r\[reg\] == NULL && r\[jump_if_null\] != 0) If { @@ -597,9 +605,10 @@ pub enum Insn { /// Open a sorter. SorterOpen { - cursor_id: CursorID, // P1 - columns: usize, // P2 - order: Vec, // P4. + cursor_id: CursorID, // P1 + columns: usize, // P2 + order: Vec, // P4. + collations: Vec>, // The only reason for using Option is so the explain message is the same as in SQLite }, /// Insert a row into the sorter. diff --git a/core/vdbe/sorter.rs b/core/vdbe/sorter.rs index d758f91f5..0036ed05e 100644 --- a/core/vdbe/sorter.rs +++ b/core/vdbe/sorter.rs @@ -1,21 +1,26 @@ use limbo_sqlite3_parser::ast::SortOrder; -use crate::types::{compare_immutable, ImmutableRecord, IndexKeySortOrder}; +use crate::{ + translate::collate::CollationSeq, + types::{compare_immutable, ImmutableRecord, IndexKeySortOrder}, +}; pub struct Sorter { records: Vec, current: Option, order: IndexKeySortOrder, key_len: usize, + collations: Vec, } impl Sorter { - pub fn new(order: &[SortOrder]) -> Self { + pub fn new(order: &[SortOrder], collations: Vec) -> Self { Self { records: Vec::new(), current: None, key_len: order.len(), order: IndexKeySortOrder::from_list(order), + collations, } } pub fn is_empty(&self) -> bool { @@ -33,6 +38,7 @@ impl Sorter { &a.values[..self.key_len], &b.values[..self.key_len], self.order, + &self.collations, ) }); self.records.reverse(); diff --git a/testing/all.test b/testing/all.test index 1f9ab8051..dc5b3264f 100755 --- a/testing/all.test +++ b/testing/all.test @@ -34,3 +34,4 @@ source $testdir/boolean.test source $testdir/literal.test source $testdir/null.test source $testdir/create_table.test +source $testdir/collate.test diff --git a/testing/cli_tests/collate.py b/testing/cli_tests/collate.py new file mode 100644 index 000000000..8a88d8943 --- /dev/null +++ b/testing/cli_tests/collate.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python3 +import os +from cli_tests.test_limbo_cli import TestLimboShell +from pydantic import BaseModel +from cli_tests import console + + +sqlite_flags = os.getenv("SQLITE_FLAGS", "-q").split(" ") + + +class CollateTest(BaseModel): + name: str + db_schema: str = """CREATE TABLE t1( + x INTEGER PRIMARY KEY, + a, /* collating sequence BINARY */ + b COLLATE BINARY, /* collating sequence BINARY */ + c COLLATE RTRIM, /* collating sequence RTRIM */ + d COLLATE NOCASE /* collating sequence NOCASE */ + );""" + db_path: str = "testing/collate.db" + + def init_db(self): + with TestLimboShell( + init_commands="", + exec_name="sqlite3", + flags=f"{self.db_path}", + ) as sqlite: + sqlite.execute_dot(f".open {self.db_path}") + stmt = [self.db_schema] + stmt = stmt + [ + "INSERT INTO t1 VALUES(1,'abc','abc', 'abc ','abc');", + "INSERT INTO t1 VALUES(2,'abc','abc', 'abc', 'ABC');", + "INSERT INTO t1 VALUES(3,'abc','abc', 'abc ', 'Abc');", + "INSERT INTO t1 VALUES(4,'abc','abc ','ABC', 'abc');", + ] + stmt.append("SELECT count(*) FROM t1;") + + sqlite.run_test( + "Init Collate Db in Sqlite", + "".join(stmt), + f"{4}", + ) + + def run(self, limbo: TestLimboShell): + limbo.execute_dot(f".open {self.db_path}") + + limbo.run_test( + "Text comparison a=b is performed using the BINARY collating sequence", + "SELECT x FROM t1 WHERE a = b ORDER BY x;", + "\n".join(map(lambda x: str(x), [1, 2, 3])), + ) + + limbo.run_test( + "Text comparison a=b is performed using the RTRIM collating sequence", + "SELECT x FROM t1 WHERE a = b COLLATE RTRIM ORDER BY x;", + "\n".join(map(lambda x: str(x), [1, 2, 3, 4])), + ) + + limbo.run_test( + "Text comparison d=a is performed using the NOCASE collating sequence", + "SELECT x FROM t1 WHERE d = a ORDER BY x;", + "\n".join(map(lambda x: str(x), [1, 2, 3, 4])), + ) + + limbo.run_test( + "Text comparison a=d is performed using the BINARY collating sequence.", + "SELECT x FROM t1 WHERE a = d ORDER BY x;", + "\n".join(map(lambda x: str(x), [1, 4])), + ) + + limbo.run_test( + "Text comparison 'abc'=c is performed using the RTRIM collating sequence.", + "SELECT x FROM t1 WHERE 'abc' = c ORDER BY x;", + "\n".join(map(lambda x: str(x), [1, 2, 3])), + ) + + limbo.run_test( + "Text comparison c='abc' is performed using the RTRIM collating sequence.", + "SELECT x FROM t1 WHERE c = 'abc' ORDER BY x;", + "\n".join(map(lambda x: str(x), [1, 2, 3])), + ) + + limbo.run_test( + "Grouping is performed using the NOCASE collating sequence (Values 'abc', 'ABC', and 'Abc' are placed in the same group).", + "SELECT count(*) FROM t1 GROUP BY d ORDER BY 1;", + "\n".join(map(lambda x: str(x), [4])), + ) + + limbo.run_test( + "Grouping is performed using the BINARY collating sequence. 'abc' and 'ABC' and 'Abc' form different groups", + "SELECT count(*) FROM t1 GROUP BY (d || '') ORDER BY 1;", + "\n".join(map(lambda x: str(x), [1, 1, 2])), + ) + + limbo.run_test( + "Sorting or column c is performed using the RTRIM collating sequence.", + "SELECT x FROM t1 ORDER BY c, x;", + "\n".join(map(lambda x: str(x), [4, 1, 2, 3])), + ) + + limbo.run_test( + "Sorting of (c||'') is performed using the BINARY collating sequence.", + "SELECT x FROM t1 ORDER BY (c||''), x;", + "\n".join(map(lambda x: str(x), [4, 2, 3, 1])), + ) + + limbo.run_test( + "Sorting of column c is performed using the NOCASE collating sequence.", + "SELECT x FROM t1 ORDER BY c COLLATE NOCASE, x;", + "\n".join(map(lambda x: str(x), [2, 4, 3, 1])), + ) + + +def cleanup(db_fullpath: str): + wal_path = f"{db_fullpath}-wal" + shm_path = f"{db_fullpath}-shm" + paths = [db_fullpath, wal_path, shm_path] + for path in paths: + if os.path.exists(path): + os.remove(path) + + +def main(): + # Test from using examples from Section 7.2 + # https://sqlite.org/datatype3.html#collation + test = CollateTest(name="Smoke collate tests") + console.info(test) + + db_path = test.db_path + try: + test.init_db() + # Use with syntax to automatically close shell on error + with TestLimboShell("") as limbo: + test.run(limbo) + + # test.test_compat() + except Exception as e: + console.error(f"Test FAILED: {e}") + cleanup(db_path) + exit(1) + # delete db after every compat test so we we have fresh db for next test + cleanup(db_path) + console.info("All tests passed successfully.") + + +if __name__ == "__main__": + main() diff --git a/testing/collate.test b/testing/collate.test new file mode 100755 index 000000000..468297d82 --- /dev/null +++ b/testing/collate.test @@ -0,0 +1,52 @@ +#!/usr/bin/env tclsh + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +# SIMPLE SMOKE TESTS THAT DO NOT DEPEND ON SPECIFIC DATABASE ROWS + +do_execsql_test collate_nocase { + SELECT 'hat' == 'hAt' COLLATE NOCASE; +} {1} + +do_execsql_test collate_binary_1 { + SELECT 'hat' == 'hAt' COLLATE BINARY; +} {0} + +do_execsql_test collate_binary_2 { + SELECT 'hat' == 'hat' COLLATE BINARY; +} {1} + +do_execsql_test collate_rtrim_1 { + SELECT 'hat' == 'hAt ' COLLATE RTRIM; +} {0} + +do_execsql_test collate_rtrim_2 { + SELECT 'hat' == 'hat ' COLLATE RTRIM; +} {1} + +do_execsql_test collate_rtrim_3 { + SELECT 'hat' == ' hAt ' COLLATE RTRIM; +} {0} + +do_execsql_test collate_rtrim_4 { + SELECT 'hat' == ' hat ' COLLATE RTRIM; +} {0} + +do_execsql_test collate_left_precedence { + SELECT 'hat' COLLATE BINARY == 'hAt' COLLATE NOCASE; +} {0} + +do_execsql_test collate_left_precedence_2 { + SELECT 'hat' COLLATE NOCASE == 'hAt' COLLATE BINARY; +} {1} + +do_execsql_test_in_memory_any_error collate_unique_constraint { + CREATE TABLE t(a TEXT COLLATE NOCASE PRIMARY KEY); + INSERT INTO t VALUES ('lol'), ('LOL'), ('lOl'); +} + +do_execsql_test_in_memory_any_error collate_unique_constraint { + CREATE TABLE t(a TEXT COLLATE NOCASE PRIMARY KEY); + INSERT INTO t VALUES ('lol'), ('LOL'), ('lOl'); +} diff --git a/testing/pyproject.toml b/testing/pyproject.toml index 0aed7b99b..21ba0e8e6 100644 --- a/testing/pyproject.toml +++ b/testing/pyproject.toml @@ -17,6 +17,7 @@ test-update = "cli_tests.update:main" test-memory = "cli_tests.memory:main" bench-vfs = "cli_tests.vfs_bench:main" test-constraint = "cli_tests.constraint:main" +test-collate = "cli_tests.collate:main" [tool.uv] package = true diff --git a/testing/tester.tcl b/testing/tester.tcl index d739b2a39..490a0f394 100644 --- a/testing/tester.tcl +++ b/testing/tester.tcl @@ -226,3 +226,13 @@ proc do_execsql_test_in_memory_any_error {test_name sql_statements} { set combined_sql [string trim $sql_statements] run_test_expecting_any_error $::sqlite_exec $db_name $combined_sql } + +proc do_execsql_test_in_memory_error_content {test_name sql_statements expected_error_text} { + test_put "Running error content test" in-memory $test_name + + # Use ":memory:" special filename for in-memory database + set db_name ":memory:" + + set combined_sql [string trim $sql_statements] + run_test_expecting_error_content $::sqlite_exec $db_name $combined_sql $expected_error_text +} diff --git a/tests/integration/fuzz/mod.rs b/tests/integration/fuzz/mod.rs index 0af2992af..a981ce4a1 100644 --- a/tests/integration/fuzz/mod.rs +++ b/tests/integration/fuzz/mod.rs @@ -1220,6 +1220,7 @@ mod tests { ); let query = format!("INSERT INTO t VALUES ({}, {}, {})", x, y, z); log::info!("insert: {}", query); + dbg!(&query); assert_eq!( limbo_exec_rows(&db, &limbo_conn, &query), sqlite_exec_rows(&sqlite_conn, &query), diff --git a/vendored/sqlite3-parser/Cargo.toml b/vendored/sqlite3-parser/Cargo.toml index fc0f1bf8e..4701cdeba 100644 --- a/vendored/sqlite3-parser/Cargo.toml +++ b/vendored/sqlite3-parser/Cargo.toml @@ -32,8 +32,8 @@ bitflags = "2.0" uncased = "0.9.10" indexmap = "2.0" miette = "7.4.0" -strum = { version = "0.26", features = ["derive"] } -strum_macros = "0.26" +strum = { workspace = true } +strum_macros = {workspace = true } [dev-dependencies] env_logger = { version = "0.11", default-features = false }