From 4a3119786e0706c9ce8a5faa71bad4fd8b488a94 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 16 May 2025 20:42:26 -0300 Subject: [PATCH] refactor BtreeCursor and Sorter to accept Vec of collations --- core/schema.rs | 4 ++++ core/storage/btree.rs | 20 +++++++++++-------- core/translate/group_by.rs | 24 +++++++++++------------ core/translate/index.rs | 2 +- core/translate/main_loop.rs | 5 ----- core/translate/order_by.rs | 24 +++++++++-------------- core/types.rs | 3 ++- core/vdbe/execute.rs | 38 +++++++++++++++++++------------------ core/vdbe/explain.rs | 25 +++++++++++------------- core/vdbe/insn.rs | 12 ++++-------- core/vdbe/sorter.rs | 8 ++++---- 11 files changed, 78 insertions(+), 87 deletions(-) diff --git a/core/schema.rs b/core/schema.rs index b04a31918..7407333dc 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -236,6 +236,10 @@ impl BTreeTable { sql.push_str(");\n"); sql } + + pub fn column_collations(&self) -> Vec> { + self.columns.iter().map(|column| column.collation).collect() + } } #[derive(Debug, Default)] diff --git a/core/storage/btree.rs b/core/storage/btree.rs index e7a8879c8..3d3154d1d 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -625,7 +625,7 @@ impl BTreeCursor { record_slice_same_num_cols, index_key.get_values(), self.index_key_sort_order, - CollationSeq::Binary, + &self.collations, ); order }; @@ -684,7 +684,7 @@ impl BTreeCursor { record_slice_same_num_cols, index_key.get_values(), self.index_key_sort_order, - CollationSeq::Binary, + &self.collations, ); order }; @@ -1265,7 +1265,7 @@ impl BTreeCursor { record_slice_same_num_cols, index_key.get_values(), self.index_key_sort_order, - CollationSeq::Binary, + &self.collations, ); order }; @@ -1326,7 +1326,7 @@ impl BTreeCursor { record_slice_same_num_cols, index_key.get_values(), self.index_key_sort_order, - CollationSeq::Binary, + &self.collations, ); order }; @@ -1607,7 +1607,7 @@ impl BTreeCursor { record_slice_equal_number_of_cols, index_key.get_values(), self.index_key_sort_order, - CollationSeq::Binary, + &self.collations, ); // in sqlite btrees left child pages have <= keys. // in general, in forwards iteration we want to find the first key that matches the seek condition. @@ -1932,7 +1932,7 @@ impl BTreeCursor { record_slice_equal_number_of_cols, key.get_values(), self.index_key_sort_order, - CollationSeq::Binary, + &self.collations, ); let found = match seek_op { SeekOp::GT => cmp.is_gt(), @@ -2100,7 +2100,7 @@ impl BTreeCursor { .unwrap() .get_values(), self.index_key_sort_order, - CollationSeq::Binary, + &self.collations, ) == Ordering::Equal { tracing::debug!("insert_into_page: found exact match with cell_idx={cell_idx}, overwriting"); @@ -3747,7 +3747,7 @@ impl BTreeCursor { key.to_index_key_values(), self.get_immutable_record().as_ref().unwrap().get_values(), self.index_key_sort_order, - CollationSeq::Binary, + &self.collations, ); match order { Ordering::Less | Ordering::Equal => { @@ -4778,6 +4778,10 @@ impl BTreeCursor { } } } + + pub fn collations(&self) -> &[CollationSeq] { + &self.collations + } } #[cfg(debug_assertions)] diff --git a/core/translate/group_by.rs b/core/translate/group_by.rs index 055a7a208..25afebd8e 100644 --- a/core/translate/group_by.rs +++ b/core/translate/group_by.rs @@ -126,12 +126,12 @@ pub fn init_group_by( * then the collating sequence of the column is used to determine sort order. * If the expression is not a column and has no COLLATE clause, then the BINARY collating sequence is used. */ - let mut collation = None; - for expr in group_by.exprs.iter() { - match expr { + let collations = group_by + .exprs + .iter() + .map(|expr| match expr { ast::Expr::Collate(_, collation_name) => { - collation = Some(CollationSeq::new(collation_name)?); - break; + CollationSeq::new(collation_name).map(Some) } ast::Expr::Column { table, column, .. } => { let table_reference = plan.table_references.get(*table).unwrap(); @@ -140,19 +140,17 @@ pub fn init_group_by( crate::bail_parse_error!("column index out of bounds"); }; - if table_column.collation.is_some() { - collation = table_column.collation; - break; - } + Ok(table_column.collation) } - _ => {} - }; - } + _ => Ok(Some(CollationSeq::default())), + }) + .collect::>>()?; + program.emit_insn(Insn::SorterOpen { cursor_id: sort_cursor, columns: sorter_column_count, order: sort_order.clone(), - collation, + collations, }); let pseudo_cursor = group_by_create_pseudo_table(program, sorter_column_count); GroupByRowSource::Sorter { diff --git a/core/translate/index.rs b/core/translate/index.rs index 84b6bf0eb..4f9838204 100644 --- a/core/translate/index.rs +++ b/core/translate/index.rs @@ -120,7 +120,7 @@ pub fn translate_create_index( cursor_id: sorter_cursor_id, columns: columns.len(), order, - collation: program.curr_collation(), + collations: tbl.column_collations(), }); let content_reg = program.alloc_register(); program.emit_insn(Insn::OpenPseudo { diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 9df9da7f5..59168509e 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -17,7 +17,6 @@ use crate::{ use super::{ aggregation::translate_aggregation_step, - collate::CollationSeq, emitter::{OperationMode, TranslateCtx}, expr::{ translate_condition_expr, translate_expr, translate_expr_no_constant_opt, @@ -1128,28 +1127,24 @@ fn emit_seek_termination( start_reg, num_regs, target_pc: loop_end, - collation: Some(CollationSeq::Binary), }), (true, SeekOp::GT) => program.emit_insn(Insn::IdxGT { cursor_id: seek_cursor_id, start_reg, num_regs, target_pc: loop_end, - collation: Some(CollationSeq::Binary), }), (true, SeekOp::LE) => program.emit_insn(Insn::IdxLE { cursor_id: seek_cursor_id, start_reg, num_regs, target_pc: loop_end, - collation: Some(CollationSeq::Binary), }), (true, SeekOp::LT) => program.emit_insn(Insn::IdxLT { cursor_id: seek_cursor_id, start_reg, num_regs, target_pc: loop_end, - collation: Some(CollationSeq::Binary), }), (false, SeekOp::GE) => program.emit_insn(Insn::Ge { lhs: rowid_reg.unwrap(), diff --git a/core/translate/order_by.rs b/core/translate/order_by.rs index 4f1707958..f1c6a0b13 100644 --- a/core/translate/order_by.rs +++ b/core/translate/order_by.rs @@ -49,13 +49,10 @@ pub fn init_order_by( * then the collating sequence of the column is used to determine sort order. * If the expression is not a column and has no COLLATE clause, then the BINARY collating sequence is used. */ - let mut collation = None; - for (expr, _) in order_by.iter() { - match expr { - ast::Expr::Collate(_, collation_name) => { - collation = Some(CollationSeq::new(collation_name)?); - break; - } + let collations = order_by + .iter() + .map(|(expr, _)| match expr { + ast::Expr::Collate(_, collation_name) => CollationSeq::new(collation_name).map(Some), ast::Expr::Column { table, column, .. } => { let table_reference = referenced_tables.get(*table).unwrap(); @@ -63,19 +60,16 @@ pub fn init_order_by( crate::bail_parse_error!("column index out of bounds"); }; - if table_column.collation.is_some() { - collation = table_column.collation; - break; - } + Ok(table_column.collation) } - _ => {} - }; - } + _ => Ok(Some(CollationSeq::default())), + }) + .collect::>>()?; program.emit_insn(Insn::SorterOpen { cursor_id: sort_cursor, columns: order_by.len(), order: order_by.iter().map(|(_, direction)| *direction).collect(), - collation, + collations, }); Ok(()) } diff --git a/core/types.rs b/core/types.rs index 3c6b4926e..5021a09cb 100644 --- a/core/types.rs +++ b/core/types.rs @@ -1104,11 +1104,12 @@ pub fn compare_immutable( l: &[RefValue], r: &[RefValue], index_key_sort_order: IndexKeySortOrder, - collation: CollationSeq, + collations: &[CollationSeq], ) -> std::cmp::Ordering { assert_eq!(l.len(), r.len()); for (i, (l, r)) in l.iter().zip(r).enumerate() { let column_order = index_key_sort_order.get_sort_order_for_col(i); + let collation = collations.get(i).copied().unwrap_or_default(); let cmp = match (l, r) { (RefValue::Text(left), RefValue::Text(right)) => { collation.compare_strings(left.as_str(), right.as_str()) diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index dd4680930..8a1530445 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -945,10 +945,10 @@ pub fn op_open_read( }); let collations = table.map_or(Vec::new(), |table| { table - .columns - .iter() - .map(|column| column.collation.unwrap_or_default()) - .collect::>() + .column_collations() + .into_iter() + .map(|c| c.unwrap_or_default()) + .collect() }); let cursor = BTreeCursor::new_index( mv_cursor, @@ -2162,7 +2162,6 @@ pub fn op_idx_ge( start_reg, num_regs, target_pc, - collation, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -2181,7 +2180,7 @@ pub fn op_idx_ge( &idx_values, &record_values, cursor.index_key_sort_order, - collation.unwrap_or_default(), + cursor.collations(), ); if ord.is_ge() { target_pc.to_offset_int() @@ -2227,7 +2226,6 @@ pub fn op_idx_le( start_reg, num_regs, target_pc, - collation, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -2246,7 +2244,7 @@ pub fn op_idx_le( &idx_values, &record_values, cursor.index_key_sort_order, - collation.unwrap_or_default(), + cursor.collations(), ); if ord.is_le() { target_pc.to_offset_int() @@ -2274,7 +2272,6 @@ pub fn op_idx_gt( start_reg, num_regs, target_pc, - collation, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -2293,7 +2290,7 @@ pub fn op_idx_gt( &idx_values, &record_values, cursor.index_key_sort_order, - collation.unwrap_or_default(), + cursor.collations(), ); if ord.is_gt() { target_pc.to_offset_int() @@ -2321,7 +2318,6 @@ pub fn op_idx_lt( start_reg, num_regs, target_pc, - collation, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -2340,7 +2336,7 @@ pub fn op_idx_lt( &idx_values, &record_values, cursor.index_key_sort_order, - collation.unwrap_or_default(), + cursor.collations(), ); if ord.is_lt() { target_pc.to_offset_int() @@ -2810,12 +2806,18 @@ pub fn op_sorter_open( cursor_id, columns: _, order, - collation, + collations, } = insn else { unreachable!("unexpected Insn {:?}", insn) }; - let cursor = Sorter::new(order, collation.unwrap_or_default()); + let cursor = Sorter::new( + order, + collations + .iter() + .map(|collation| collation.unwrap_or_default()) + .collect(), + ); let mut cursors = state.cursors.borrow_mut(); cursors .get_mut(*cursor_id) @@ -4252,10 +4254,10 @@ pub fn op_open_write( }); let collations = table.map_or(Vec::new(), |table| { table - .columns - .iter() - .map(|column| column.collation.unwrap_or_default()) - .collect::>() + .column_collations() + .into_iter() + .map(|c| c.unwrap_or_default()) + .collect() }); let cursor = BTreeCursor::new_index( mv_cursor, diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index ca455c81a..e899e5a9b 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -817,28 +817,24 @@ pub fn insn_to_str( start_reg, num_regs, target_pc, - collation, } | Insn::IdxGE { cursor_id, start_reg, num_regs, target_pc, - collation, } | Insn::IdxLE { cursor_id, start_reg, num_regs, target_pc, - collation, } | Insn::IdxLT { cursor_id, start_reg, num_regs, target_pc, - collation, } => ( match insn { Insn::IdxGT { .. } => "IdxGT", @@ -850,7 +846,7 @@ pub fn insn_to_str( *cursor_id as i32, target_pc.to_debug_int(), *start_reg as i32, - Value::build_text(&collation.map_or("".to_string(), |c| c.to_string())), + Value::build_text(""), 0, format!("key=[{}..{}]", start_reg, start_reg + num_regs - 1), ), @@ -890,20 +886,21 @@ pub fn insn_to_str( cursor_id, columns, order, - collation, + collations, } => { let _p4 = String::new(); let to_print: Vec = order .iter() - .enumerate() - .map(|(idx, v)| { - if idx == 0 { - collation.unwrap_or_default().to_string() + .zip(collations.iter()) + .map(|(v, collation)| { + let sign = match v { + SortOrder::Asc => "", + SortOrder::Desc => "-", + }; + if collation.is_some() { + format!("{sign}{}", collation.unwrap()) } else { - match v { - SortOrder::Asc => "B".to_string(), - SortOrder::Desc => "-B".to_string(), - } + format!("{sign}B") } }) .collect(); diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 2a2f2b765..7cac9f202 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -556,7 +556,6 @@ pub enum Insn { start_reg: usize, num_regs: usize, target_pc: BranchOffset, - collation: Option, }, /// The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. @@ -566,7 +565,6 @@ pub enum Insn { start_reg: usize, num_regs: usize, target_pc: BranchOffset, - collation: Option, }, /// The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. @@ -576,7 +574,6 @@ pub enum Insn { start_reg: usize, num_regs: usize, target_pc: BranchOffset, - collation: Option, }, /// The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. @@ -586,7 +583,6 @@ pub enum Insn { start_reg: usize, num_regs: usize, target_pc: BranchOffset, - collation: Option, }, /// Decrement the given register and jump to the given PC if the result is zero. @@ -609,10 +605,10 @@ pub enum Insn { /// Open a sorter. SorterOpen { - cursor_id: CursorID, // P1 - columns: usize, // P2 - order: Vec, // P4. - collation: Option, + cursor_id: CursorID, // P1 + columns: usize, // P2 + order: Vec, // P4. + collations: Vec>, // The only reason for using Option is so the explain message is the same as in SQLite }, /// Insert a row into the sorter. diff --git a/core/vdbe/sorter.rs b/core/vdbe/sorter.rs index d4127a798..0036ed05e 100644 --- a/core/vdbe/sorter.rs +++ b/core/vdbe/sorter.rs @@ -10,17 +10,17 @@ pub struct Sorter { current: Option, order: IndexKeySortOrder, key_len: usize, - collation: CollationSeq, + collations: Vec, } impl Sorter { - pub fn new(order: &[SortOrder], collation: CollationSeq) -> Self { + pub fn new(order: &[SortOrder], collations: Vec) -> Self { Self { records: Vec::new(), current: None, key_len: order.len(), order: IndexKeySortOrder::from_list(order), - collation, + collations, } } pub fn is_empty(&self) -> bool { @@ -38,7 +38,7 @@ impl Sorter { &a.values[..self.key_len], &b.values[..self.key_len], self.order, - self.collation, + &self.collations, ) }); self.records.reverse();