mirror of
https://github.com/aljazceru/turso.git
synced 2026-02-23 17:05:36 +01:00
refactor BtreeCursor and Sorter to accept Vec of collations
This commit is contained in:
@@ -236,6 +236,10 @@ impl BTreeTable {
|
||||
sql.push_str(");\n");
|
||||
sql
|
||||
}
|
||||
|
||||
pub fn column_collations(&self) -> Vec<Option<CollationSeq>> {
|
||||
self.columns.iter().map(|column| column.collation).collect()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
|
||||
@@ -625,7 +625,7 @@ impl BTreeCursor {
|
||||
record_slice_same_num_cols,
|
||||
index_key.get_values(),
|
||||
self.index_key_sort_order,
|
||||
CollationSeq::Binary,
|
||||
&self.collations,
|
||||
);
|
||||
order
|
||||
};
|
||||
@@ -684,7 +684,7 @@ impl BTreeCursor {
|
||||
record_slice_same_num_cols,
|
||||
index_key.get_values(),
|
||||
self.index_key_sort_order,
|
||||
CollationSeq::Binary,
|
||||
&self.collations,
|
||||
);
|
||||
order
|
||||
};
|
||||
@@ -1265,7 +1265,7 @@ impl BTreeCursor {
|
||||
record_slice_same_num_cols,
|
||||
index_key.get_values(),
|
||||
self.index_key_sort_order,
|
||||
CollationSeq::Binary,
|
||||
&self.collations,
|
||||
);
|
||||
order
|
||||
};
|
||||
@@ -1326,7 +1326,7 @@ impl BTreeCursor {
|
||||
record_slice_same_num_cols,
|
||||
index_key.get_values(),
|
||||
self.index_key_sort_order,
|
||||
CollationSeq::Binary,
|
||||
&self.collations,
|
||||
);
|
||||
order
|
||||
};
|
||||
@@ -1607,7 +1607,7 @@ impl BTreeCursor {
|
||||
record_slice_equal_number_of_cols,
|
||||
index_key.get_values(),
|
||||
self.index_key_sort_order,
|
||||
CollationSeq::Binary,
|
||||
&self.collations,
|
||||
);
|
||||
// in sqlite btrees left child pages have <= keys.
|
||||
// in general, in forwards iteration we want to find the first key that matches the seek condition.
|
||||
@@ -1932,7 +1932,7 @@ impl BTreeCursor {
|
||||
record_slice_equal_number_of_cols,
|
||||
key.get_values(),
|
||||
self.index_key_sort_order,
|
||||
CollationSeq::Binary,
|
||||
&self.collations,
|
||||
);
|
||||
let found = match seek_op {
|
||||
SeekOp::GT => cmp.is_gt(),
|
||||
@@ -2100,7 +2100,7 @@ impl BTreeCursor {
|
||||
.unwrap()
|
||||
.get_values(),
|
||||
self.index_key_sort_order,
|
||||
CollationSeq::Binary,
|
||||
&self.collations,
|
||||
) == Ordering::Equal {
|
||||
|
||||
tracing::debug!("insert_into_page: found exact match with cell_idx={cell_idx}, overwriting");
|
||||
@@ -3747,7 +3747,7 @@ impl BTreeCursor {
|
||||
key.to_index_key_values(),
|
||||
self.get_immutable_record().as_ref().unwrap().get_values(),
|
||||
self.index_key_sort_order,
|
||||
CollationSeq::Binary,
|
||||
&self.collations,
|
||||
);
|
||||
match order {
|
||||
Ordering::Less | Ordering::Equal => {
|
||||
@@ -4778,6 +4778,10 @@ impl BTreeCursor {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn collations(&self) -> &[CollationSeq] {
|
||||
&self.collations
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
|
||||
@@ -126,12 +126,12 @@ pub fn init_group_by(
|
||||
* then the collating sequence of the column is used to determine sort order.
|
||||
* If the expression is not a column and has no COLLATE clause, then the BINARY collating sequence is used.
|
||||
*/
|
||||
let mut collation = None;
|
||||
for expr in group_by.exprs.iter() {
|
||||
match expr {
|
||||
let collations = group_by
|
||||
.exprs
|
||||
.iter()
|
||||
.map(|expr| match expr {
|
||||
ast::Expr::Collate(_, collation_name) => {
|
||||
collation = Some(CollationSeq::new(collation_name)?);
|
||||
break;
|
||||
CollationSeq::new(collation_name).map(Some)
|
||||
}
|
||||
ast::Expr::Column { table, column, .. } => {
|
||||
let table_reference = plan.table_references.get(*table).unwrap();
|
||||
@@ -140,19 +140,17 @@ pub fn init_group_by(
|
||||
crate::bail_parse_error!("column index out of bounds");
|
||||
};
|
||||
|
||||
if table_column.collation.is_some() {
|
||||
collation = table_column.collation;
|
||||
break;
|
||||
}
|
||||
Ok(table_column.collation)
|
||||
}
|
||||
_ => {}
|
||||
};
|
||||
}
|
||||
_ => Ok(Some(CollationSeq::default())),
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
program.emit_insn(Insn::SorterOpen {
|
||||
cursor_id: sort_cursor,
|
||||
columns: sorter_column_count,
|
||||
order: sort_order.clone(),
|
||||
collation,
|
||||
collations,
|
||||
});
|
||||
let pseudo_cursor = group_by_create_pseudo_table(program, sorter_column_count);
|
||||
GroupByRowSource::Sorter {
|
||||
|
||||
@@ -120,7 +120,7 @@ pub fn translate_create_index(
|
||||
cursor_id: sorter_cursor_id,
|
||||
columns: columns.len(),
|
||||
order,
|
||||
collation: program.curr_collation(),
|
||||
collations: tbl.column_collations(),
|
||||
});
|
||||
let content_reg = program.alloc_register();
|
||||
program.emit_insn(Insn::OpenPseudo {
|
||||
|
||||
@@ -17,7 +17,6 @@ use crate::{
|
||||
|
||||
use super::{
|
||||
aggregation::translate_aggregation_step,
|
||||
collate::CollationSeq,
|
||||
emitter::{OperationMode, TranslateCtx},
|
||||
expr::{
|
||||
translate_condition_expr, translate_expr, translate_expr_no_constant_opt,
|
||||
@@ -1128,28 +1127,24 @@ fn emit_seek_termination(
|
||||
start_reg,
|
||||
num_regs,
|
||||
target_pc: loop_end,
|
||||
collation: Some(CollationSeq::Binary),
|
||||
}),
|
||||
(true, SeekOp::GT) => program.emit_insn(Insn::IdxGT {
|
||||
cursor_id: seek_cursor_id,
|
||||
start_reg,
|
||||
num_regs,
|
||||
target_pc: loop_end,
|
||||
collation: Some(CollationSeq::Binary),
|
||||
}),
|
||||
(true, SeekOp::LE) => program.emit_insn(Insn::IdxLE {
|
||||
cursor_id: seek_cursor_id,
|
||||
start_reg,
|
||||
num_regs,
|
||||
target_pc: loop_end,
|
||||
collation: Some(CollationSeq::Binary),
|
||||
}),
|
||||
(true, SeekOp::LT) => program.emit_insn(Insn::IdxLT {
|
||||
cursor_id: seek_cursor_id,
|
||||
start_reg,
|
||||
num_regs,
|
||||
target_pc: loop_end,
|
||||
collation: Some(CollationSeq::Binary),
|
||||
}),
|
||||
(false, SeekOp::GE) => program.emit_insn(Insn::Ge {
|
||||
lhs: rowid_reg.unwrap(),
|
||||
|
||||
@@ -49,13 +49,10 @@ pub fn init_order_by(
|
||||
* then the collating sequence of the column is used to determine sort order.
|
||||
* If the expression is not a column and has no COLLATE clause, then the BINARY collating sequence is used.
|
||||
*/
|
||||
let mut collation = None;
|
||||
for (expr, _) in order_by.iter() {
|
||||
match expr {
|
||||
ast::Expr::Collate(_, collation_name) => {
|
||||
collation = Some(CollationSeq::new(collation_name)?);
|
||||
break;
|
||||
}
|
||||
let collations = order_by
|
||||
.iter()
|
||||
.map(|(expr, _)| match expr {
|
||||
ast::Expr::Collate(_, collation_name) => CollationSeq::new(collation_name).map(Some),
|
||||
ast::Expr::Column { table, column, .. } => {
|
||||
let table_reference = referenced_tables.get(*table).unwrap();
|
||||
|
||||
@@ -63,19 +60,16 @@ pub fn init_order_by(
|
||||
crate::bail_parse_error!("column index out of bounds");
|
||||
};
|
||||
|
||||
if table_column.collation.is_some() {
|
||||
collation = table_column.collation;
|
||||
break;
|
||||
}
|
||||
Ok(table_column.collation)
|
||||
}
|
||||
_ => {}
|
||||
};
|
||||
}
|
||||
_ => Ok(Some(CollationSeq::default())),
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
program.emit_insn(Insn::SorterOpen {
|
||||
cursor_id: sort_cursor,
|
||||
columns: order_by.len(),
|
||||
order: order_by.iter().map(|(_, direction)| *direction).collect(),
|
||||
collation,
|
||||
collations,
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -1104,11 +1104,12 @@ pub fn compare_immutable(
|
||||
l: &[RefValue],
|
||||
r: &[RefValue],
|
||||
index_key_sort_order: IndexKeySortOrder,
|
||||
collation: CollationSeq,
|
||||
collations: &[CollationSeq],
|
||||
) -> std::cmp::Ordering {
|
||||
assert_eq!(l.len(), r.len());
|
||||
for (i, (l, r)) in l.iter().zip(r).enumerate() {
|
||||
let column_order = index_key_sort_order.get_sort_order_for_col(i);
|
||||
let collation = collations.get(i).copied().unwrap_or_default();
|
||||
let cmp = match (l, r) {
|
||||
(RefValue::Text(left), RefValue::Text(right)) => {
|
||||
collation.compare_strings(left.as_str(), right.as_str())
|
||||
|
||||
@@ -945,10 +945,10 @@ pub fn op_open_read(
|
||||
});
|
||||
let collations = table.map_or(Vec::new(), |table| {
|
||||
table
|
||||
.columns
|
||||
.iter()
|
||||
.map(|column| column.collation.unwrap_or_default())
|
||||
.collect::<Vec<_>>()
|
||||
.column_collations()
|
||||
.into_iter()
|
||||
.map(|c| c.unwrap_or_default())
|
||||
.collect()
|
||||
});
|
||||
let cursor = BTreeCursor::new_index(
|
||||
mv_cursor,
|
||||
@@ -2162,7 +2162,6 @@ pub fn op_idx_ge(
|
||||
start_reg,
|
||||
num_regs,
|
||||
target_pc,
|
||||
collation,
|
||||
} = insn
|
||||
else {
|
||||
unreachable!("unexpected Insn {:?}", insn)
|
||||
@@ -2181,7 +2180,7 @@ pub fn op_idx_ge(
|
||||
&idx_values,
|
||||
&record_values,
|
||||
cursor.index_key_sort_order,
|
||||
collation.unwrap_or_default(),
|
||||
cursor.collations(),
|
||||
);
|
||||
if ord.is_ge() {
|
||||
target_pc.to_offset_int()
|
||||
@@ -2227,7 +2226,6 @@ pub fn op_idx_le(
|
||||
start_reg,
|
||||
num_regs,
|
||||
target_pc,
|
||||
collation,
|
||||
} = insn
|
||||
else {
|
||||
unreachable!("unexpected Insn {:?}", insn)
|
||||
@@ -2246,7 +2244,7 @@ pub fn op_idx_le(
|
||||
&idx_values,
|
||||
&record_values,
|
||||
cursor.index_key_sort_order,
|
||||
collation.unwrap_or_default(),
|
||||
cursor.collations(),
|
||||
);
|
||||
if ord.is_le() {
|
||||
target_pc.to_offset_int()
|
||||
@@ -2274,7 +2272,6 @@ pub fn op_idx_gt(
|
||||
start_reg,
|
||||
num_regs,
|
||||
target_pc,
|
||||
collation,
|
||||
} = insn
|
||||
else {
|
||||
unreachable!("unexpected Insn {:?}", insn)
|
||||
@@ -2293,7 +2290,7 @@ pub fn op_idx_gt(
|
||||
&idx_values,
|
||||
&record_values,
|
||||
cursor.index_key_sort_order,
|
||||
collation.unwrap_or_default(),
|
||||
cursor.collations(),
|
||||
);
|
||||
if ord.is_gt() {
|
||||
target_pc.to_offset_int()
|
||||
@@ -2321,7 +2318,6 @@ pub fn op_idx_lt(
|
||||
start_reg,
|
||||
num_regs,
|
||||
target_pc,
|
||||
collation,
|
||||
} = insn
|
||||
else {
|
||||
unreachable!("unexpected Insn {:?}", insn)
|
||||
@@ -2340,7 +2336,7 @@ pub fn op_idx_lt(
|
||||
&idx_values,
|
||||
&record_values,
|
||||
cursor.index_key_sort_order,
|
||||
collation.unwrap_or_default(),
|
||||
cursor.collations(),
|
||||
);
|
||||
if ord.is_lt() {
|
||||
target_pc.to_offset_int()
|
||||
@@ -2810,12 +2806,18 @@ pub fn op_sorter_open(
|
||||
cursor_id,
|
||||
columns: _,
|
||||
order,
|
||||
collation,
|
||||
collations,
|
||||
} = insn
|
||||
else {
|
||||
unreachable!("unexpected Insn {:?}", insn)
|
||||
};
|
||||
let cursor = Sorter::new(order, collation.unwrap_or_default());
|
||||
let cursor = Sorter::new(
|
||||
order,
|
||||
collations
|
||||
.iter()
|
||||
.map(|collation| collation.unwrap_or_default())
|
||||
.collect(),
|
||||
);
|
||||
let mut cursors = state.cursors.borrow_mut();
|
||||
cursors
|
||||
.get_mut(*cursor_id)
|
||||
@@ -4252,10 +4254,10 @@ pub fn op_open_write(
|
||||
});
|
||||
let collations = table.map_or(Vec::new(), |table| {
|
||||
table
|
||||
.columns
|
||||
.iter()
|
||||
.map(|column| column.collation.unwrap_or_default())
|
||||
.collect::<Vec<_>>()
|
||||
.column_collations()
|
||||
.into_iter()
|
||||
.map(|c| c.unwrap_or_default())
|
||||
.collect()
|
||||
});
|
||||
let cursor = BTreeCursor::new_index(
|
||||
mv_cursor,
|
||||
|
||||
@@ -817,28 +817,24 @@ pub fn insn_to_str(
|
||||
start_reg,
|
||||
num_regs,
|
||||
target_pc,
|
||||
collation,
|
||||
}
|
||||
| Insn::IdxGE {
|
||||
cursor_id,
|
||||
start_reg,
|
||||
num_regs,
|
||||
target_pc,
|
||||
collation,
|
||||
}
|
||||
| Insn::IdxLE {
|
||||
cursor_id,
|
||||
start_reg,
|
||||
num_regs,
|
||||
target_pc,
|
||||
collation,
|
||||
}
|
||||
| Insn::IdxLT {
|
||||
cursor_id,
|
||||
start_reg,
|
||||
num_regs,
|
||||
target_pc,
|
||||
collation,
|
||||
} => (
|
||||
match insn {
|
||||
Insn::IdxGT { .. } => "IdxGT",
|
||||
@@ -850,7 +846,7 @@ pub fn insn_to_str(
|
||||
*cursor_id as i32,
|
||||
target_pc.to_debug_int(),
|
||||
*start_reg as i32,
|
||||
Value::build_text(&collation.map_or("".to_string(), |c| c.to_string())),
|
||||
Value::build_text(""),
|
||||
0,
|
||||
format!("key=[{}..{}]", start_reg, start_reg + num_regs - 1),
|
||||
),
|
||||
@@ -890,20 +886,21 @@ pub fn insn_to_str(
|
||||
cursor_id,
|
||||
columns,
|
||||
order,
|
||||
collation,
|
||||
collations,
|
||||
} => {
|
||||
let _p4 = String::new();
|
||||
let to_print: Vec<String> = order
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(idx, v)| {
|
||||
if idx == 0 {
|
||||
collation.unwrap_or_default().to_string()
|
||||
.zip(collations.iter())
|
||||
.map(|(v, collation)| {
|
||||
let sign = match v {
|
||||
SortOrder::Asc => "",
|
||||
SortOrder::Desc => "-",
|
||||
};
|
||||
if collation.is_some() {
|
||||
format!("{sign}{}", collation.unwrap())
|
||||
} else {
|
||||
match v {
|
||||
SortOrder::Asc => "B".to_string(),
|
||||
SortOrder::Desc => "-B".to_string(),
|
||||
}
|
||||
format!("{sign}B")
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
@@ -556,7 +556,6 @@ pub enum Insn {
|
||||
start_reg: usize,
|
||||
num_regs: usize,
|
||||
target_pc: BranchOffset,
|
||||
collation: Option<CollationSeq>,
|
||||
},
|
||||
|
||||
/// The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end.
|
||||
@@ -566,7 +565,6 @@ pub enum Insn {
|
||||
start_reg: usize,
|
||||
num_regs: usize,
|
||||
target_pc: BranchOffset,
|
||||
collation: Option<CollationSeq>,
|
||||
},
|
||||
|
||||
/// The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end.
|
||||
@@ -576,7 +574,6 @@ pub enum Insn {
|
||||
start_reg: usize,
|
||||
num_regs: usize,
|
||||
target_pc: BranchOffset,
|
||||
collation: Option<CollationSeq>,
|
||||
},
|
||||
|
||||
/// The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end.
|
||||
@@ -586,7 +583,6 @@ pub enum Insn {
|
||||
start_reg: usize,
|
||||
num_regs: usize,
|
||||
target_pc: BranchOffset,
|
||||
collation: Option<CollationSeq>,
|
||||
},
|
||||
|
||||
/// Decrement the given register and jump to the given PC if the result is zero.
|
||||
@@ -609,10 +605,10 @@ pub enum Insn {
|
||||
|
||||
/// Open a sorter.
|
||||
SorterOpen {
|
||||
cursor_id: CursorID, // P1
|
||||
columns: usize, // P2
|
||||
order: Vec<SortOrder>, // P4.
|
||||
collation: Option<CollationSeq>,
|
||||
cursor_id: CursorID, // P1
|
||||
columns: usize, // P2
|
||||
order: Vec<SortOrder>, // P4.
|
||||
collations: Vec<Option<CollationSeq>>, // The only reason for using Option<CollationSeq> is so the explain message is the same as in SQLite
|
||||
},
|
||||
|
||||
/// Insert a row into the sorter.
|
||||
|
||||
@@ -10,17 +10,17 @@ pub struct Sorter {
|
||||
current: Option<ImmutableRecord>,
|
||||
order: IndexKeySortOrder,
|
||||
key_len: usize,
|
||||
collation: CollationSeq,
|
||||
collations: Vec<CollationSeq>,
|
||||
}
|
||||
|
||||
impl Sorter {
|
||||
pub fn new(order: &[SortOrder], collation: CollationSeq) -> Self {
|
||||
pub fn new(order: &[SortOrder], collations: Vec<CollationSeq>) -> Self {
|
||||
Self {
|
||||
records: Vec::new(),
|
||||
current: None,
|
||||
key_len: order.len(),
|
||||
order: IndexKeySortOrder::from_list(order),
|
||||
collation,
|
||||
collations,
|
||||
}
|
||||
}
|
||||
pub fn is_empty(&self) -> bool {
|
||||
@@ -38,7 +38,7 @@ impl Sorter {
|
||||
&a.values[..self.key_len],
|
||||
&b.values[..self.key_len],
|
||||
self.order,
|
||||
self.collation,
|
||||
&self.collations,
|
||||
)
|
||||
});
|
||||
self.records.reverse();
|
||||
|
||||
Reference in New Issue
Block a user