refactor BtreeCursor and Sorter to accept Vec of collations

This commit is contained in:
pedrocarlo
2025-05-16 20:42:26 -03:00
parent f28ce2b757
commit 4a3119786e
11 changed files with 78 additions and 87 deletions

View File

@@ -236,6 +236,10 @@ impl BTreeTable {
sql.push_str(");\n");
sql
}
pub fn column_collations(&self) -> Vec<Option<CollationSeq>> {
self.columns.iter().map(|column| column.collation).collect()
}
}
#[derive(Debug, Default)]

View File

@@ -625,7 +625,7 @@ impl BTreeCursor {
record_slice_same_num_cols,
index_key.get_values(),
self.index_key_sort_order,
CollationSeq::Binary,
&self.collations,
);
order
};
@@ -684,7 +684,7 @@ impl BTreeCursor {
record_slice_same_num_cols,
index_key.get_values(),
self.index_key_sort_order,
CollationSeq::Binary,
&self.collations,
);
order
};
@@ -1265,7 +1265,7 @@ impl BTreeCursor {
record_slice_same_num_cols,
index_key.get_values(),
self.index_key_sort_order,
CollationSeq::Binary,
&self.collations,
);
order
};
@@ -1326,7 +1326,7 @@ impl BTreeCursor {
record_slice_same_num_cols,
index_key.get_values(),
self.index_key_sort_order,
CollationSeq::Binary,
&self.collations,
);
order
};
@@ -1607,7 +1607,7 @@ impl BTreeCursor {
record_slice_equal_number_of_cols,
index_key.get_values(),
self.index_key_sort_order,
CollationSeq::Binary,
&self.collations,
);
// in sqlite btrees left child pages have <= keys.
// in general, in forwards iteration we want to find the first key that matches the seek condition.
@@ -1932,7 +1932,7 @@ impl BTreeCursor {
record_slice_equal_number_of_cols,
key.get_values(),
self.index_key_sort_order,
CollationSeq::Binary,
&self.collations,
);
let found = match seek_op {
SeekOp::GT => cmp.is_gt(),
@@ -2100,7 +2100,7 @@ impl BTreeCursor {
.unwrap()
.get_values(),
self.index_key_sort_order,
CollationSeq::Binary,
&self.collations,
) == Ordering::Equal {
tracing::debug!("insert_into_page: found exact match with cell_idx={cell_idx}, overwriting");
@@ -3747,7 +3747,7 @@ impl BTreeCursor {
key.to_index_key_values(),
self.get_immutable_record().as_ref().unwrap().get_values(),
self.index_key_sort_order,
CollationSeq::Binary,
&self.collations,
);
match order {
Ordering::Less | Ordering::Equal => {
@@ -4778,6 +4778,10 @@ impl BTreeCursor {
}
}
}
pub fn collations(&self) -> &[CollationSeq] {
&self.collations
}
}
#[cfg(debug_assertions)]

View File

@@ -126,12 +126,12 @@ pub fn init_group_by(
* then the collating sequence of the column is used to determine sort order.
* If the expression is not a column and has no COLLATE clause, then the BINARY collating sequence is used.
*/
let mut collation = None;
for expr in group_by.exprs.iter() {
match expr {
let collations = group_by
.exprs
.iter()
.map(|expr| match expr {
ast::Expr::Collate(_, collation_name) => {
collation = Some(CollationSeq::new(collation_name)?);
break;
CollationSeq::new(collation_name).map(Some)
}
ast::Expr::Column { table, column, .. } => {
let table_reference = plan.table_references.get(*table).unwrap();
@@ -140,19 +140,17 @@ pub fn init_group_by(
crate::bail_parse_error!("column index out of bounds");
};
if table_column.collation.is_some() {
collation = table_column.collation;
break;
}
Ok(table_column.collation)
}
_ => {}
};
}
_ => Ok(Some(CollationSeq::default())),
})
.collect::<Result<Vec<_>>>()?;
program.emit_insn(Insn::SorterOpen {
cursor_id: sort_cursor,
columns: sorter_column_count,
order: sort_order.clone(),
collation,
collations,
});
let pseudo_cursor = group_by_create_pseudo_table(program, sorter_column_count);
GroupByRowSource::Sorter {

View File

@@ -120,7 +120,7 @@ pub fn translate_create_index(
cursor_id: sorter_cursor_id,
columns: columns.len(),
order,
collation: program.curr_collation(),
collations: tbl.column_collations(),
});
let content_reg = program.alloc_register();
program.emit_insn(Insn::OpenPseudo {

View File

@@ -17,7 +17,6 @@ use crate::{
use super::{
aggregation::translate_aggregation_step,
collate::CollationSeq,
emitter::{OperationMode, TranslateCtx},
expr::{
translate_condition_expr, translate_expr, translate_expr_no_constant_opt,
@@ -1128,28 +1127,24 @@ fn emit_seek_termination(
start_reg,
num_regs,
target_pc: loop_end,
collation: Some(CollationSeq::Binary),
}),
(true, SeekOp::GT) => program.emit_insn(Insn::IdxGT {
cursor_id: seek_cursor_id,
start_reg,
num_regs,
target_pc: loop_end,
collation: Some(CollationSeq::Binary),
}),
(true, SeekOp::LE) => program.emit_insn(Insn::IdxLE {
cursor_id: seek_cursor_id,
start_reg,
num_regs,
target_pc: loop_end,
collation: Some(CollationSeq::Binary),
}),
(true, SeekOp::LT) => program.emit_insn(Insn::IdxLT {
cursor_id: seek_cursor_id,
start_reg,
num_regs,
target_pc: loop_end,
collation: Some(CollationSeq::Binary),
}),
(false, SeekOp::GE) => program.emit_insn(Insn::Ge {
lhs: rowid_reg.unwrap(),

View File

@@ -49,13 +49,10 @@ pub fn init_order_by(
* then the collating sequence of the column is used to determine sort order.
* If the expression is not a column and has no COLLATE clause, then the BINARY collating sequence is used.
*/
let mut collation = None;
for (expr, _) in order_by.iter() {
match expr {
ast::Expr::Collate(_, collation_name) => {
collation = Some(CollationSeq::new(collation_name)?);
break;
}
let collations = order_by
.iter()
.map(|(expr, _)| match expr {
ast::Expr::Collate(_, collation_name) => CollationSeq::new(collation_name).map(Some),
ast::Expr::Column { table, column, .. } => {
let table_reference = referenced_tables.get(*table).unwrap();
@@ -63,19 +60,16 @@ pub fn init_order_by(
crate::bail_parse_error!("column index out of bounds");
};
if table_column.collation.is_some() {
collation = table_column.collation;
break;
}
Ok(table_column.collation)
}
_ => {}
};
}
_ => Ok(Some(CollationSeq::default())),
})
.collect::<Result<Vec<_>>>()?;
program.emit_insn(Insn::SorterOpen {
cursor_id: sort_cursor,
columns: order_by.len(),
order: order_by.iter().map(|(_, direction)| *direction).collect(),
collation,
collations,
});
Ok(())
}

View File

@@ -1104,11 +1104,12 @@ pub fn compare_immutable(
l: &[RefValue],
r: &[RefValue],
index_key_sort_order: IndexKeySortOrder,
collation: CollationSeq,
collations: &[CollationSeq],
) -> std::cmp::Ordering {
assert_eq!(l.len(), r.len());
for (i, (l, r)) in l.iter().zip(r).enumerate() {
let column_order = index_key_sort_order.get_sort_order_for_col(i);
let collation = collations.get(i).copied().unwrap_or_default();
let cmp = match (l, r) {
(RefValue::Text(left), RefValue::Text(right)) => {
collation.compare_strings(left.as_str(), right.as_str())

View File

@@ -945,10 +945,10 @@ pub fn op_open_read(
});
let collations = table.map_or(Vec::new(), |table| {
table
.columns
.iter()
.map(|column| column.collation.unwrap_or_default())
.collect::<Vec<_>>()
.column_collations()
.into_iter()
.map(|c| c.unwrap_or_default())
.collect()
});
let cursor = BTreeCursor::new_index(
mv_cursor,
@@ -2162,7 +2162,6 @@ pub fn op_idx_ge(
start_reg,
num_regs,
target_pc,
collation,
} = insn
else {
unreachable!("unexpected Insn {:?}", insn)
@@ -2181,7 +2180,7 @@ pub fn op_idx_ge(
&idx_values,
&record_values,
cursor.index_key_sort_order,
collation.unwrap_or_default(),
cursor.collations(),
);
if ord.is_ge() {
target_pc.to_offset_int()
@@ -2227,7 +2226,6 @@ pub fn op_idx_le(
start_reg,
num_regs,
target_pc,
collation,
} = insn
else {
unreachable!("unexpected Insn {:?}", insn)
@@ -2246,7 +2244,7 @@ pub fn op_idx_le(
&idx_values,
&record_values,
cursor.index_key_sort_order,
collation.unwrap_or_default(),
cursor.collations(),
);
if ord.is_le() {
target_pc.to_offset_int()
@@ -2274,7 +2272,6 @@ pub fn op_idx_gt(
start_reg,
num_regs,
target_pc,
collation,
} = insn
else {
unreachable!("unexpected Insn {:?}", insn)
@@ -2293,7 +2290,7 @@ pub fn op_idx_gt(
&idx_values,
&record_values,
cursor.index_key_sort_order,
collation.unwrap_or_default(),
cursor.collations(),
);
if ord.is_gt() {
target_pc.to_offset_int()
@@ -2321,7 +2318,6 @@ pub fn op_idx_lt(
start_reg,
num_regs,
target_pc,
collation,
} = insn
else {
unreachable!("unexpected Insn {:?}", insn)
@@ -2340,7 +2336,7 @@ pub fn op_idx_lt(
&idx_values,
&record_values,
cursor.index_key_sort_order,
collation.unwrap_or_default(),
cursor.collations(),
);
if ord.is_lt() {
target_pc.to_offset_int()
@@ -2810,12 +2806,18 @@ pub fn op_sorter_open(
cursor_id,
columns: _,
order,
collation,
collations,
} = insn
else {
unreachable!("unexpected Insn {:?}", insn)
};
let cursor = Sorter::new(order, collation.unwrap_or_default());
let cursor = Sorter::new(
order,
collations
.iter()
.map(|collation| collation.unwrap_or_default())
.collect(),
);
let mut cursors = state.cursors.borrow_mut();
cursors
.get_mut(*cursor_id)
@@ -4252,10 +4254,10 @@ pub fn op_open_write(
});
let collations = table.map_or(Vec::new(), |table| {
table
.columns
.iter()
.map(|column| column.collation.unwrap_or_default())
.collect::<Vec<_>>()
.column_collations()
.into_iter()
.map(|c| c.unwrap_or_default())
.collect()
});
let cursor = BTreeCursor::new_index(
mv_cursor,

View File

@@ -817,28 +817,24 @@ pub fn insn_to_str(
start_reg,
num_regs,
target_pc,
collation,
}
| Insn::IdxGE {
cursor_id,
start_reg,
num_regs,
target_pc,
collation,
}
| Insn::IdxLE {
cursor_id,
start_reg,
num_regs,
target_pc,
collation,
}
| Insn::IdxLT {
cursor_id,
start_reg,
num_regs,
target_pc,
collation,
} => (
match insn {
Insn::IdxGT { .. } => "IdxGT",
@@ -850,7 +846,7 @@ pub fn insn_to_str(
*cursor_id as i32,
target_pc.to_debug_int(),
*start_reg as i32,
Value::build_text(&collation.map_or("".to_string(), |c| c.to_string())),
Value::build_text(""),
0,
format!("key=[{}..{}]", start_reg, start_reg + num_regs - 1),
),
@@ -890,20 +886,21 @@ pub fn insn_to_str(
cursor_id,
columns,
order,
collation,
collations,
} => {
let _p4 = String::new();
let to_print: Vec<String> = order
.iter()
.enumerate()
.map(|(idx, v)| {
if idx == 0 {
collation.unwrap_or_default().to_string()
.zip(collations.iter())
.map(|(v, collation)| {
let sign = match v {
SortOrder::Asc => "",
SortOrder::Desc => "-",
};
if collation.is_some() {
format!("{sign}{}", collation.unwrap())
} else {
match v {
SortOrder::Asc => "B".to_string(),
SortOrder::Desc => "-B".to_string(),
}
format!("{sign}B")
}
})
.collect();

View File

@@ -556,7 +556,6 @@ pub enum Insn {
start_reg: usize,
num_regs: usize,
target_pc: BranchOffset,
collation: Option<CollationSeq>,
},
/// The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end.
@@ -566,7 +565,6 @@ pub enum Insn {
start_reg: usize,
num_regs: usize,
target_pc: BranchOffset,
collation: Option<CollationSeq>,
},
/// The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end.
@@ -576,7 +574,6 @@ pub enum Insn {
start_reg: usize,
num_regs: usize,
target_pc: BranchOffset,
collation: Option<CollationSeq>,
},
/// The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end.
@@ -586,7 +583,6 @@ pub enum Insn {
start_reg: usize,
num_regs: usize,
target_pc: BranchOffset,
collation: Option<CollationSeq>,
},
/// Decrement the given register and jump to the given PC if the result is zero.
@@ -609,10 +605,10 @@ pub enum Insn {
/// Open a sorter.
SorterOpen {
cursor_id: CursorID, // P1
columns: usize, // P2
order: Vec<SortOrder>, // P4.
collation: Option<CollationSeq>,
cursor_id: CursorID, // P1
columns: usize, // P2
order: Vec<SortOrder>, // P4.
collations: Vec<Option<CollationSeq>>, // The only reason for using Option<CollationSeq> is so the explain message is the same as in SQLite
},
/// Insert a row into the sorter.

View File

@@ -10,17 +10,17 @@ pub struct Sorter {
current: Option<ImmutableRecord>,
order: IndexKeySortOrder,
key_len: usize,
collation: CollationSeq,
collations: Vec<CollationSeq>,
}
impl Sorter {
pub fn new(order: &[SortOrder], collation: CollationSeq) -> Self {
pub fn new(order: &[SortOrder], collations: Vec<CollationSeq>) -> Self {
Self {
records: Vec::new(),
current: None,
key_len: order.len(),
order: IndexKeySortOrder::from_list(order),
collation,
collations,
}
}
pub fn is_empty(&self) -> bool {
@@ -38,7 +38,7 @@ impl Sorter {
&a.values[..self.key_len],
&b.values[..self.key_len],
self.order,
self.collation,
&self.collations,
)
});
self.records.reverse();