From 5bd47d7462d85da4445fea14d2bc7aa312e15e31 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 16 May 2025 14:38:01 -0300 Subject: [PATCH] post rebase adjustments to accomodate new instructions that were created before the merge conflicts --- core/storage/btree.rs | 10 ++++++- core/translate/expr.rs | 58 +++++++++++++++++++------------------ core/translate/main_loop.rs | 5 ++++ core/types.rs | 7 ----- core/vdbe/execute.rs | 32 +++++++++++++++++--- core/vdbe/explain.rs | 6 +++- core/vdbe/insn.rs | 4 +++ testing/pyproject.toml | 4 --- 8 files changed, 81 insertions(+), 45 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index a94c8c0f2..92fbd707c 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -7,7 +7,7 @@ use crate::{ TableLeafCell, }, }, - translate::plan::IterationDirection, + translate::{collate::CollationSeq, plan::IterationDirection}, types::IndexKeySortOrder, MvCursor, }; @@ -610,6 +610,7 @@ impl BTreeCursor { record_slice_same_num_cols, index_key.get_values(), self.index_key_sort_order, + CollationSeq::Binary, ); order }; @@ -668,6 +669,7 @@ impl BTreeCursor { record_slice_same_num_cols, index_key.get_values(), self.index_key_sort_order, + CollationSeq::Binary, ); order }; @@ -1248,6 +1250,7 @@ impl BTreeCursor { record_slice_same_num_cols, index_key.get_values(), self.index_key_sort_order, + CollationSeq::Binary, ); order }; @@ -1308,6 +1311,7 @@ impl BTreeCursor { record_slice_same_num_cols, index_key.get_values(), self.index_key_sort_order, + CollationSeq::Binary, ); order }; @@ -1588,6 +1592,7 @@ impl BTreeCursor { record_slice_equal_number_of_cols, index_key.get_values(), self.index_key_sort_order, + CollationSeq::Binary, ); // in sqlite btrees left child pages have <= keys. // in general, in forwards iteration we want to find the first key that matches the seek condition. @@ -1912,6 +1917,7 @@ impl BTreeCursor { record_slice_equal_number_of_cols, key.get_values(), self.index_key_sort_order, + CollationSeq::Binary, ); let found = match seek_op { SeekOp::GT => cmp.is_gt(), @@ -2079,6 +2085,7 @@ impl BTreeCursor { .unwrap() .get_values(), self.index_key_sort_order, + CollationSeq::Binary, ) == Ordering::Equal { tracing::debug!("insert_into_page: found exact match with cell_idx={cell_idx}, overwriting"); @@ -3725,6 +3732,7 @@ impl BTreeCursor { key.to_index_key_values(), self.get_immutable_record().as_ref().unwrap().get_values(), self.index_key_sort_order, + CollationSeq::Binary, ); match order { Ordering::Less | Ordering::Equal => { diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 24fba1800..64c5a29ed 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -473,35 +473,37 @@ pub fn translate_expr( let right_collation_ctx = program.curr_collation_ctx(); program.reset_collation(); - /* - * The rules for determining which collating function to use for a binary comparison - * operator (=, <, >, <=, >=, !=, IS, and IS NOT) are as follows: - * - * 1. If either operand has an explicit collating function assignment using the postfix COLLATE operator, - * then the explicit collating function is used for comparison, - * with precedence to the collating function of the left operand. - * - * 2. If either operand is a column, then the collating function of that column is used - * with precedence to the left operand. For the purposes of the previous sentence, - * a column name preceded by one or more unary "+" operators and/or CAST operators is still considered a column name. - * - * 3. Otherwise, the BINARY collating function is used for comparison. - */ - let collation_ctx = { - match (left_collation_ctx, right_collation_ctx) { - (Some((c_left, true)), _) => Some((c_left, true)), - (_, Some((c_right, true))) => Some((c_right, true)), - (Some((c_left, from_collate_left)), None) => Some((c_left, from_collate_left)), - (None, Some((c_right, from_collate_right))) => { - Some((c_right, from_collate_right)) + /* + * The rules for determining which collating function to use for a binary comparison + * operator (=, <, >, <=, >=, !=, IS, and IS NOT) are as follows: + * + * 1. If either operand has an explicit collating function assignment using the postfix COLLATE operator, + * then the explicit collating function is used for comparison, + * with precedence to the collating function of the left operand. + * + * 2. If either operand is a column, then the collating function of that column is used + * with precedence to the left operand. For the purposes of the previous sentence, + * a column name preceded by one or more unary "+" operators and/or CAST operators is still considered a column name. + * + * 3. Otherwise, the BINARY collating function is used for comparison. + */ + let collation_ctx = { + match (left_collation_ctx, right_collation_ctx) { + (Some((c_left, true)), _) => Some((c_left, true)), + (_, Some((c_right, true))) => Some((c_right, true)), + (Some((c_left, from_collate_left)), None) => { + Some((c_left, from_collate_left)) + } + (None, Some((c_right, from_collate_right))) => { + Some((c_right, from_collate_right)) + } + (Some((c_left, from_collate_left)), Some((_, false))) => { + Some((c_left, from_collate_left)) + } + _ => None, } - (Some((c_left, from_collate_left)), Some((_, false))) => { - Some((c_left, from_collate_left)) - } - _ => None, - } - }; - program.set_collation(collation_ctx); + }; + program.set_collation(collation_ctx); emit_binary_insn(program, op, e1_reg, e2_reg, target_register)?; program.reset_collation(); diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 59168509e..9df9da7f5 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -17,6 +17,7 @@ use crate::{ use super::{ aggregation::translate_aggregation_step, + collate::CollationSeq, emitter::{OperationMode, TranslateCtx}, expr::{ translate_condition_expr, translate_expr, translate_expr_no_constant_opt, @@ -1127,24 +1128,28 @@ fn emit_seek_termination( start_reg, num_regs, target_pc: loop_end, + collation: Some(CollationSeq::Binary), }), (true, SeekOp::GT) => program.emit_insn(Insn::IdxGT { cursor_id: seek_cursor_id, start_reg, num_regs, target_pc: loop_end, + collation: Some(CollationSeq::Binary), }), (true, SeekOp::LE) => program.emit_insn(Insn::IdxLE { cursor_id: seek_cursor_id, start_reg, num_regs, target_pc: loop_end, + collation: Some(CollationSeq::Binary), }), (true, SeekOp::LT) => program.emit_insn(Insn::IdxLT { cursor_id: seek_cursor_id, start_reg, num_regs, target_pc: loop_end, + collation: Some(CollationSeq::Binary), }), (false, SeekOp::GE) => program.emit_insn(Insn::Ge { lhs: rowid_reg.unwrap(), diff --git a/core/types.rs b/core/types.rs index 2b7718870..3c6b4926e 100644 --- a/core/types.rs +++ b/core/types.rs @@ -1109,13 +1109,6 @@ pub fn compare_immutable( assert_eq!(l.len(), r.len()); for (i, (l, r)) in l.iter().zip(r).enumerate() { let column_order = index_key_sort_order.get_sort_order_for_col(i); - let mut l = l; - let mut r = r; - if !matches!(column_order, SortOrder::Asc) { - let tmp = l; - l = r; - r = tmp; - } let cmp = match (l, r) { (RefValue::Text(left), RefValue::Text(right)) => { collation.compare_strings(left.as_str(), right.as_str()) diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 91389e70c..b55a5c768 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -2141,6 +2141,7 @@ pub fn op_idx_ge( start_reg, num_regs, target_pc, + collation, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -2155,7 +2156,12 @@ pub fn op_idx_ge( let idx_values = idx_record.get_values(); let idx_values = &idx_values[..record_from_regs.len()]; let record_values = record_from_regs.get_values(); - let ord = compare_immutable(&idx_values, &record_values, cursor.index_key_sort_order); + let ord = compare_immutable( + &idx_values, + &record_values, + cursor.index_key_sort_order, + collation.unwrap_or_default(), + ); if ord.is_ge() { target_pc.to_offset_int() } else { @@ -2200,6 +2206,7 @@ pub fn op_idx_le( start_reg, num_regs, target_pc, + collation, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -2214,7 +2221,12 @@ pub fn op_idx_le( let idx_values = idx_record.get_values(); let idx_values = &idx_values[..record_from_regs.len()]; let record_values = record_from_regs.get_values(); - let ord = compare_immutable(&idx_values, &record_values, cursor.index_key_sort_order); + let ord = compare_immutable( + &idx_values, + &record_values, + cursor.index_key_sort_order, + collation.unwrap_or_default(), + ); if ord.is_le() { target_pc.to_offset_int() } else { @@ -2241,6 +2253,7 @@ pub fn op_idx_gt( start_reg, num_regs, target_pc, + collation, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -2255,7 +2268,12 @@ pub fn op_idx_gt( let idx_values = idx_record.get_values(); let idx_values = &idx_values[..record_from_regs.len()]; let record_values = record_from_regs.get_values(); - let ord = compare_immutable(&idx_values, &record_values, cursor.index_key_sort_order); + let ord = compare_immutable( + &idx_values, + &record_values, + cursor.index_key_sort_order, + collation.unwrap_or_default(), + ); if ord.is_gt() { target_pc.to_offset_int() } else { @@ -2282,6 +2300,7 @@ pub fn op_idx_lt( start_reg, num_regs, target_pc, + collation, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -2296,7 +2315,12 @@ pub fn op_idx_lt( let idx_values = idx_record.get_values(); let idx_values = &idx_values[..record_from_regs.len()]; let record_values = record_from_regs.get_values(); - let ord = compare_immutable(&idx_values, &record_values, cursor.index_key_sort_order); + let ord = compare_immutable( + &idx_values, + &record_values, + cursor.index_key_sort_order, + collation.unwrap_or_default(), + ); if ord.is_lt() { target_pc.to_offset_int() } else { diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index 9da401778..ca455c81a 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -817,24 +817,28 @@ pub fn insn_to_str( start_reg, num_regs, target_pc, + collation, } | Insn::IdxGE { cursor_id, start_reg, num_regs, target_pc, + collation, } | Insn::IdxLE { cursor_id, start_reg, num_regs, target_pc, + collation, } | Insn::IdxLT { cursor_id, start_reg, num_regs, target_pc, + collation, } => ( match insn { Insn::IdxGT { .. } => "IdxGT", @@ -846,7 +850,7 @@ pub fn insn_to_str( *cursor_id as i32, target_pc.to_debug_int(), *start_reg as i32, - Value::build_text(""), + Value::build_text(&collation.map_or("".to_string(), |c| c.to_string())), 0, format!("key=[{}..{}]", start_reg, start_reg + num_regs - 1), ), diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index c61de1c26..2a2f2b765 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -556,6 +556,7 @@ pub enum Insn { start_reg: usize, num_regs: usize, target_pc: BranchOffset, + collation: Option, }, /// The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. @@ -565,6 +566,7 @@ pub enum Insn { start_reg: usize, num_regs: usize, target_pc: BranchOffset, + collation: Option, }, /// The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. @@ -574,6 +576,7 @@ pub enum Insn { start_reg: usize, num_regs: usize, target_pc: BranchOffset, + collation: Option, }, /// The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. @@ -583,6 +586,7 @@ pub enum Insn { start_reg: usize, num_regs: usize, target_pc: BranchOffset, + collation: Option, }, /// Decrement the given register and jump to the given PC if the result is zero. diff --git a/testing/pyproject.toml b/testing/pyproject.toml index f3ba79a1d..21ba0e8e6 100644 --- a/testing/pyproject.toml +++ b/testing/pyproject.toml @@ -15,13 +15,9 @@ test-shell = "cli_tests.cli_test_cases:main" test-extensions = "cli_tests.extensions:main" test-update = "cli_tests.update:main" test-memory = "cli_tests.memory:main" -<<<<<<< HEAD bench-vfs = "cli_tests.vfs_bench:main" test-constraint = "cli_tests.constraint:main" -||||||| parent of b9e03a19 (Removed repeated binary expression translation. Adjusted the set_collation to capture additional context of whether it was set by a Collate expression or not. Added some tests to prove those modifications were necessary.) -======= test-collate = "cli_tests.collate:main" ->>>>>>> b9e03a19 (Removed repeated binary expression translation. Adjusted the set_collation to capture additional context of whether it was set by a Collate expression or not. Added some tests to prove those modifications were necessary.) [tool.uv] package = true