mirror of
https://github.com/aljazceru/turso.git
synced 2026-02-10 10:44:22 +01:00
Optimize range scans
This commit is contained in:
committed by
Jussi Saurio
parent
acb3c97fea
commit
4313f57ecb
@@ -23,7 +23,7 @@ use crate::{
|
||||
schema::{Affinity, Index, IndexColumn, Table},
|
||||
translate::{
|
||||
emitter::prepare_cdc_if_necessary,
|
||||
plan::{DistinctCtx, Distinctness, Scan},
|
||||
plan::{DistinctCtx, Distinctness, Scan, SeekKeyComponent},
|
||||
result_row::emit_select_result,
|
||||
},
|
||||
types::SeekOp,
|
||||
@@ -606,7 +606,10 @@ pub fn open_loop(
|
||||
);
|
||||
};
|
||||
|
||||
let start_reg = program.alloc_registers(seek_def.key.len());
|
||||
let max_registers = seek_def
|
||||
.size(&seek_def.start)
|
||||
.max(seek_def.size(&seek_def.end));
|
||||
let start_reg = program.alloc_registers(max_registers);
|
||||
emit_seek(
|
||||
program,
|
||||
table_references,
|
||||
@@ -1146,7 +1149,8 @@ fn emit_seek(
|
||||
seek_index: Option<&Arc<Index>>,
|
||||
) -> Result<()> {
|
||||
let is_index = seek_index.is_some();
|
||||
let Some(seek) = seek_def.seek.as_ref() else {
|
||||
if seek_def.prefix.is_empty() && matches!(seek_def.start.last_component, SeekKeyComponent::None)
|
||||
{
|
||||
// If there is no seek key, we start from the first or last row of the index,
|
||||
// depending on the iteration direction.
|
||||
//
|
||||
@@ -1196,43 +1200,34 @@ fn emit_seek(
|
||||
};
|
||||
// We allocated registers for the full index key, but our seek key might not use the full index key.
|
||||
// See [crate::translate::optimizer::build_seek_def] for more details about in which cases we do and don't use the full index key.
|
||||
for i in 0..seek_def.key.len() {
|
||||
for (i, key) in seek_def.iter(&seek_def.start).enumerate() {
|
||||
let reg = start_reg + i;
|
||||
if i >= seek.len {
|
||||
if seek.null_pad {
|
||||
program.emit_insn(Insn::Null {
|
||||
dest: reg,
|
||||
dest_end: None,
|
||||
});
|
||||
}
|
||||
} else {
|
||||
let expr = &seek_def.key[i].0;
|
||||
translate_expr_no_constant_opt(
|
||||
program,
|
||||
Some(tables),
|
||||
expr,
|
||||
reg,
|
||||
&t_ctx.resolver,
|
||||
NoConstantOptReason::RegisterReuse,
|
||||
)?;
|
||||
// If the seek key column is not verifiably non-NULL, we need check whether it is NULL,
|
||||
// and if so, jump to the loop end.
|
||||
// This is to avoid returning rows for e.g. SELECT * FROM t WHERE t.x > NULL,
|
||||
// which would erroneously return all rows from t, as NULL is lower than any non-NULL value in index key comparisons.
|
||||
if !expr.is_nonnull(tables) {
|
||||
program.emit_insn(Insn::IsNull {
|
||||
match key {
|
||||
SeekKeyComponent::Expr(expr) => {
|
||||
translate_expr_no_constant_opt(
|
||||
program,
|
||||
Some(tables),
|
||||
expr,
|
||||
reg,
|
||||
target_pc: loop_end,
|
||||
});
|
||||
&t_ctx.resolver,
|
||||
NoConstantOptReason::RegisterReuse,
|
||||
)?;
|
||||
// If the seek key column is not verifiably non-NULL, we need check whether it is NULL,
|
||||
// and if so, jump to the loop end.
|
||||
// This is to avoid returning rows for e.g. SELECT * FROM t WHERE t.x > NULL,
|
||||
// which would erroneously return all rows from t, as NULL is lower than any non-NULL value in index key comparisons.
|
||||
if !expr.is_nonnull(tables) {
|
||||
program.emit_insn(Insn::IsNull {
|
||||
reg,
|
||||
target_pc: loop_end,
|
||||
});
|
||||
}
|
||||
}
|
||||
SeekKeyComponent::None => unreachable!("None component is not possible in iterator"),
|
||||
}
|
||||
}
|
||||
let num_regs = if seek.null_pad {
|
||||
seek_def.key.len()
|
||||
} else {
|
||||
seek.len
|
||||
};
|
||||
match seek.op {
|
||||
let num_regs = seek_def.size(&seek_def.start);
|
||||
match seek_def.start.op {
|
||||
SeekOp::GE { eq_only } => program.emit_insn(Insn::SeekGE {
|
||||
is_index,
|
||||
cursor_id: seek_cursor_id,
|
||||
@@ -1289,7 +1284,7 @@ fn emit_seek_termination(
|
||||
seek_index: Option<&Arc<Index>>,
|
||||
) -> Result<()> {
|
||||
let is_index = seek_index.is_some();
|
||||
let Some(termination) = seek_def.termination.as_ref() else {
|
||||
if seek_def.prefix.is_empty() && matches!(seek_def.end.last_component, SeekKeyComponent::None) {
|
||||
program.preassign_label_to_next_insn(loop_start);
|
||||
// If we will encounter NULLs in the index at the end of iteration (Forward + Desc OR Backward + Asc)
|
||||
// then, we must explicitly stop before them as seek always has some bound condition over indexed column (e.g. c < ?, c >= ?, ...)
|
||||
@@ -1320,46 +1315,23 @@ fn emit_seek_termination(
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
// How many non-NULL values were used for seeking.
|
||||
let seek_len = seek_def.seek.as_ref().map_or(0, |seek| seek.len);
|
||||
// For all index key values apart from the last one, we are guaranteed to use the same values
|
||||
// as these values were emited from common prefix, so we don't need to emit them again.
|
||||
|
||||
// How many values will be used for the termination condition.
|
||||
let num_regs = if termination.null_pad {
|
||||
seek_def.key.len()
|
||||
} else {
|
||||
termination.len
|
||||
};
|
||||
for i in 0..seek_def.key.len() {
|
||||
let reg = start_reg + i;
|
||||
let is_last = i == seek_def.key.len() - 1;
|
||||
|
||||
// For all index key values apart from the last one, we are guaranteed to use the same values
|
||||
// as were used for the seek, so we don't need to emit them again.
|
||||
if i < seek_len && !is_last {
|
||||
continue;
|
||||
}
|
||||
// For the last index key value, we need to emit a NULL if the termination condition is NULL-padded.
|
||||
// See [SeekKey::null_pad] and [crate::translate::optimizer::build_seek_def] for why this is the case.
|
||||
if i >= termination.len && !termination.null_pad {
|
||||
continue;
|
||||
}
|
||||
if is_last && termination.null_pad {
|
||||
program.emit_insn(Insn::Null {
|
||||
dest: reg,
|
||||
dest_end: None,
|
||||
});
|
||||
// if the seek key is shorter than the termination key, we need to translate the remaining suffix of the termination key.
|
||||
// if not, we just reuse what was emitted for the seek.
|
||||
} else if seek_len < termination.len {
|
||||
let num_regs = seek_def.size(&seek_def.end);
|
||||
let last_reg = start_reg + seek_def.prefix.len();
|
||||
match &seek_def.end.last_component {
|
||||
SeekKeyComponent::Expr(expr) => {
|
||||
translate_expr_no_constant_opt(
|
||||
program,
|
||||
Some(tables),
|
||||
&seek_def.key[i].0,
|
||||
reg,
|
||||
expr,
|
||||
last_reg,
|
||||
&t_ctx.resolver,
|
||||
NoConstantOptReason::RegisterReuse,
|
||||
)?;
|
||||
}
|
||||
SeekKeyComponent::None => {}
|
||||
}
|
||||
program.preassign_label_to_next_insn(loop_start);
|
||||
let mut rowid_reg = None;
|
||||
@@ -1385,7 +1357,7 @@ fn emit_seek_termination(
|
||||
Some(Affinity::Numeric)
|
||||
};
|
||||
}
|
||||
match (is_index, termination.op) {
|
||||
match (is_index, seek_def.end.op) {
|
||||
(true, SeekOp::GE { .. }) => program.emit_insn(Insn::IdxGE {
|
||||
cursor_id: seek_cursor_id,
|
||||
start_reg,
|
||||
|
||||
@@ -3,7 +3,9 @@ use std::sync::Arc;
|
||||
use turso_ext::{ConstraintInfo, ConstraintUsage, ResultCode};
|
||||
use turso_parser::ast::SortOrder;
|
||||
|
||||
use crate::translate::optimizer::constraints::{convert_to_vtab_constraint, Constraint};
|
||||
use crate::translate::optimizer::constraints::{
|
||||
convert_to_vtab_constraint, Constraint, RangeConstraintRef,
|
||||
};
|
||||
use crate::{
|
||||
schema::{Index, Table},
|
||||
translate::plan::{IterationDirection, JoinOrderMember, JoinedTable},
|
||||
@@ -12,24 +14,24 @@ use crate::{
|
||||
};
|
||||
|
||||
use super::{
|
||||
constraints::{usable_constraints_for_join_order, ConstraintRef, TableConstraints},
|
||||
constraints::{usable_constraints_for_join_order, TableConstraints},
|
||||
cost::{estimate_cost_for_scan_or_seek, Cost, IndexInfo},
|
||||
order::OrderTarget,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
/// Represents a way to access a table.
|
||||
pub struct AccessMethod<'a> {
|
||||
pub struct AccessMethod {
|
||||
/// The estimated number of page fetches.
|
||||
/// We are ignoring CPU cost for now.
|
||||
pub cost: Cost,
|
||||
/// Table-type specific access method details.
|
||||
pub params: AccessMethodParams<'a>,
|
||||
pub params: AccessMethodParams,
|
||||
}
|
||||
|
||||
/// Table‑specific details of how an [`AccessMethod`] operates.
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum AccessMethodParams<'a> {
|
||||
pub enum AccessMethodParams {
|
||||
BTreeTable {
|
||||
/// The direction of iteration for the access method.
|
||||
/// Typically this is backwards only if it helps satisfy an [OrderTarget].
|
||||
@@ -39,7 +41,7 @@ pub enum AccessMethodParams<'a> {
|
||||
/// The constraint references that are being used, if any.
|
||||
/// An empty list of constraint refs means a scan (full table or index);
|
||||
/// a non-empty list means a search.
|
||||
constraint_refs: &'a [ConstraintRef],
|
||||
constraint_refs: Vec<RangeConstraintRef>,
|
||||
},
|
||||
VirtualTable {
|
||||
/// Index identifier returned by the table's `best_index` method.
|
||||
@@ -57,13 +59,13 @@ pub enum AccessMethodParams<'a> {
|
||||
}
|
||||
|
||||
/// Return the best [AccessMethod] for a given join order.
|
||||
pub fn find_best_access_method_for_join_order<'a>(
|
||||
pub fn find_best_access_method_for_join_order(
|
||||
rhs_table: &JoinedTable,
|
||||
rhs_constraints: &'a TableConstraints,
|
||||
rhs_constraints: &TableConstraints,
|
||||
join_order: &[JoinOrderMember],
|
||||
maybe_order_target: Option<&OrderTarget>,
|
||||
input_cardinality: f64,
|
||||
) -> Result<Option<AccessMethod<'a>>> {
|
||||
) -> Result<Option<AccessMethod>> {
|
||||
match &rhs_table.table {
|
||||
Table::BTree(_) => find_best_access_method_for_btree(
|
||||
rhs_table,
|
||||
@@ -85,19 +87,19 @@ pub fn find_best_access_method_for_join_order<'a>(
|
||||
}
|
||||
}
|
||||
|
||||
fn find_best_access_method_for_btree<'a>(
|
||||
fn find_best_access_method_for_btree(
|
||||
rhs_table: &JoinedTable,
|
||||
rhs_constraints: &'a TableConstraints,
|
||||
rhs_constraints: &TableConstraints,
|
||||
join_order: &[JoinOrderMember],
|
||||
maybe_order_target: Option<&OrderTarget>,
|
||||
input_cardinality: f64,
|
||||
) -> Result<Option<AccessMethod<'a>>> {
|
||||
) -> Result<Option<AccessMethod>> {
|
||||
let table_no = join_order.last().unwrap().table_id;
|
||||
let mut best_cost = estimate_cost_for_scan_or_seek(None, &[], &[], input_cardinality);
|
||||
let mut best_params = AccessMethodParams::BTreeTable {
|
||||
iter_dir: IterationDirection::Forwards,
|
||||
index: None,
|
||||
constraint_refs: &[],
|
||||
constraint_refs: vec![],
|
||||
};
|
||||
let rowid_column_idx = rhs_table.columns().iter().position(|c| c.is_rowid_alias);
|
||||
|
||||
@@ -123,7 +125,7 @@ fn find_best_access_method_for_btree<'a>(
|
||||
let cost = estimate_cost_for_scan_or_seek(
|
||||
Some(index_info),
|
||||
&rhs_constraints.constraints,
|
||||
usable_constraint_refs,
|
||||
&usable_constraint_refs,
|
||||
input_cardinality,
|
||||
);
|
||||
|
||||
@@ -192,12 +194,12 @@ fn find_best_access_method_for_btree<'a>(
|
||||
}))
|
||||
}
|
||||
|
||||
fn find_best_access_method_for_vtab<'a>(
|
||||
fn find_best_access_method_for_vtab(
|
||||
vtab: &VirtualTable,
|
||||
constraints: &[Constraint],
|
||||
join_order: &[JoinOrderMember],
|
||||
input_cardinality: f64,
|
||||
) -> Result<Option<AccessMethod<'a>>> {
|
||||
) -> Result<Option<AccessMethod>> {
|
||||
let vtab_constraints = convert_to_vtab_constraint(constraints, join_order);
|
||||
|
||||
// TODO: get proper order_by information to pass to the vtab.
|
||||
|
||||
@@ -67,17 +67,17 @@ pub enum BinaryExprSide {
|
||||
}
|
||||
|
||||
impl Constraint {
|
||||
/// Get the constraining expression, e.g. '2+3' from 't.x = 2+3'
|
||||
pub fn get_constraining_expr(&self, where_clause: &[WhereTerm]) -> ast::Expr {
|
||||
/// Get the constraining expression and operator, e.g. ('>=', '2+3') from 't.x >= 2+3'
|
||||
pub fn get_constraining_expr(&self, where_clause: &[WhereTerm]) -> (ast::Operator, ast::Expr) {
|
||||
let (idx, side) = self.where_clause_pos;
|
||||
let where_term = &where_clause[idx];
|
||||
let Ok(Some((lhs, _, rhs))) = as_binary_components(&where_term.expr) else {
|
||||
panic!("Expected a valid binary expression");
|
||||
};
|
||||
if side == BinaryExprSide::Lhs {
|
||||
lhs.clone()
|
||||
(self.operator, lhs.clone())
|
||||
} else {
|
||||
rhs.clone()
|
||||
(self.operator, rhs.clone())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -108,19 +108,6 @@ pub struct ConstraintRef {
|
||||
pub sort_order: SortOrder,
|
||||
}
|
||||
|
||||
impl ConstraintRef {
|
||||
/// Convert the constraint to a column usable in a [crate::translate::plan::SeekDef::key].
|
||||
pub fn as_seek_key_column(
|
||||
&self,
|
||||
constraints: &[Constraint],
|
||||
where_clause: &[WhereTerm],
|
||||
) -> (ast::Expr, SortOrder) {
|
||||
let constraint = &constraints[self.constraint_vec_pos];
|
||||
let constraining_expr = constraint.get_constraining_expr(where_clause);
|
||||
(constraining_expr, self.sort_order)
|
||||
}
|
||||
}
|
||||
|
||||
/// A collection of [ConstraintRef]s for a given index, or if index is None, for the table's rowid index.
|
||||
/// For example, given a table `T (x,y,z)` with an index `T_I (y desc,z)`, take the following query:
|
||||
/// ```sql
|
||||
@@ -150,6 +137,7 @@ pub struct ConstraintUseCandidate {
|
||||
/// The index that may be used to satisfy the constraints. If none, the table's rowid index is used.
|
||||
pub index: Option<Arc<Index>>,
|
||||
/// References to the constraints that may be used as an access path for the index.
|
||||
/// Refs are sorted by [ConstraintRef::index_col_pos]
|
||||
pub refs: Vec<ConstraintRef>,
|
||||
}
|
||||
|
||||
@@ -193,6 +181,9 @@ fn estimate_selectivity(column: &Column, op: ast::Operator) -> f64 {
|
||||
|
||||
/// Precompute all potentially usable [Constraints] from a WHERE clause.
|
||||
/// The resulting list of [TableConstraints] is then used to evaluate the best access methods for various join orders.
|
||||
///
|
||||
/// This method do not perform much filtering of constraints and delegate this tasks to the consumers of the method
|
||||
/// Consumers must inspect [TableConstraints] and its candidates and pick best constraints for optimized access
|
||||
pub fn constraints_from_where_clause(
|
||||
where_clause: &[WhereTerm],
|
||||
table_references: &TableReferences,
|
||||
@@ -379,24 +370,6 @@ pub fn constraints_from_where_clause(
|
||||
for candidate in cs.candidates.iter_mut() {
|
||||
// Sort by index_col_pos, ascending -- index columns must be consumed in contiguous order.
|
||||
candidate.refs.sort_by_key(|cref| cref.index_col_pos);
|
||||
// Deduplicate by position, keeping first occurrence (which will be equality if one exists, since the constraints vec is sorted that way)
|
||||
candidate.refs.dedup_by_key(|cref| cref.index_col_pos);
|
||||
// Truncate at first gap in positions -- again, index columns must be consumed in contiguous order.
|
||||
let contiguous_len = candidate
|
||||
.refs
|
||||
.iter()
|
||||
.enumerate()
|
||||
.take_while(|(i, cref)| cref.index_col_pos == *i)
|
||||
.count();
|
||||
candidate.refs.truncate(contiguous_len);
|
||||
|
||||
// Truncate after the first inequality, since the left-prefix rule of indexes requires that all constraints but the last one must be equalities;
|
||||
// again see: https://www.solarwinds.com/blog/the-left-prefix-index-rule
|
||||
if let Some(first_inequality) = candidate.refs.iter().position(|cref| {
|
||||
cs.constraints[cref.constraint_vec_pos].operator != ast::Operator::Equals
|
||||
}) {
|
||||
candidate.refs.truncate(first_inequality + 1);
|
||||
}
|
||||
}
|
||||
cs.candidates.retain(|c| {
|
||||
if let Some(idx) = &c.index {
|
||||
@@ -413,6 +386,87 @@ pub fn constraints_from_where_clause(
|
||||
Ok(constraints)
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
/// A reference to a [Constraint]s in a [TableConstraints] for single column.
|
||||
///
|
||||
/// This is specialized version of [ConstraintRef] which specifically holds range-like constraints:
|
||||
/// - x = 10 (eq is set)
|
||||
/// - x >= 10, x > 10 (lower_bound is set)
|
||||
/// - x <= 10, x < 10 (upper_bound is set)
|
||||
/// - x > 10 AND x < 20 (both lower_bound and upper_bound are set)
|
||||
///
|
||||
/// eq, lower_bound and upper_bound holds None or position of the constraint in the [Constraint] array
|
||||
pub struct RangeConstraintRef {
|
||||
/// position of the column in the table definition
|
||||
pub table_col_pos: usize,
|
||||
/// position of the column in the index definition
|
||||
pub index_col_pos: usize,
|
||||
/// sort order for the column in the index definition
|
||||
pub sort_order: SortOrder,
|
||||
/// equality constraint
|
||||
pub eq: Option<usize>,
|
||||
/// lower bound constraint (either > or >=)
|
||||
pub lower_bound: Option<usize>,
|
||||
/// upper bound constraint (either < or <=)
|
||||
pub upper_bound: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
/// Represent seek range which can be used in query planning to emit range scan over table or index
|
||||
pub struct SeekRangeConstraint {
|
||||
pub sort_order: SortOrder,
|
||||
pub eq: Option<(ast::Operator, ast::Expr)>,
|
||||
pub lower_bound: Option<(ast::Operator, ast::Expr)>,
|
||||
pub upper_bound: Option<(ast::Operator, ast::Expr)>,
|
||||
}
|
||||
|
||||
impl SeekRangeConstraint {
|
||||
pub fn new_eq(sort_order: SortOrder, eq: (ast::Operator, ast::Expr)) -> Self {
|
||||
Self {
|
||||
sort_order,
|
||||
eq: Some(eq),
|
||||
lower_bound: None,
|
||||
upper_bound: None,
|
||||
}
|
||||
}
|
||||
pub fn new_range(
|
||||
sort_order: SortOrder,
|
||||
lower_bound: Option<(ast::Operator, ast::Expr)>,
|
||||
upper_bound: Option<(ast::Operator, ast::Expr)>,
|
||||
) -> Self {
|
||||
assert!(lower_bound.is_some() || upper_bound.is_some());
|
||||
Self {
|
||||
sort_order,
|
||||
eq: None,
|
||||
lower_bound,
|
||||
upper_bound,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl RangeConstraintRef {
|
||||
/// Convert the [RangeConstraintRef] to a [SeekRangeConstraint] usable in a [crate::translate::plan::SeekDef::key].
|
||||
pub fn as_seek_range_constraint(
|
||||
&self,
|
||||
constraints: &[Constraint],
|
||||
where_clause: &[WhereTerm],
|
||||
) -> SeekRangeConstraint {
|
||||
if let Some(eq) = self.eq {
|
||||
return SeekRangeConstraint::new_eq(
|
||||
self.sort_order,
|
||||
constraints[eq].get_constraining_expr(where_clause),
|
||||
);
|
||||
}
|
||||
SeekRangeConstraint::new_range(
|
||||
self.sort_order,
|
||||
self.lower_bound
|
||||
.map(|x| constraints[x].get_constraining_expr(where_clause)),
|
||||
self.upper_bound
|
||||
.map(|x| constraints[x].get_constraining_expr(where_clause)),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Find which [Constraint]s are usable for a given join order.
|
||||
/// Returns a slice of the references to the constraints that are usable.
|
||||
/// A constraint is considered usable for a given table if all of the other tables referenced by the constraint
|
||||
@@ -421,28 +475,88 @@ pub fn usable_constraints_for_join_order<'a>(
|
||||
constraints: &'a [Constraint],
|
||||
refs: &'a [ConstraintRef],
|
||||
join_order: &[JoinOrderMember],
|
||||
) -> &'a [ConstraintRef] {
|
||||
) -> Vec<RangeConstraintRef> {
|
||||
debug_assert!(refs.is_sorted_by_key(|x| x.index_col_pos));
|
||||
|
||||
let table_idx = join_order.last().unwrap().original_idx;
|
||||
let mut usable_until = 0;
|
||||
let lhs_mask = TableMask::from_table_number_iter(
|
||||
join_order
|
||||
.iter()
|
||||
.take(join_order.len() - 1)
|
||||
.map(|j| j.original_idx),
|
||||
);
|
||||
let mut usable: Vec<RangeConstraintRef> = Vec::new();
|
||||
let mut last_column_pos = 0;
|
||||
for cref in refs.iter() {
|
||||
let constraint = &constraints[cref.constraint_vec_pos];
|
||||
let other_side_refers_to_self = constraint.lhs_mask.contains_table(table_idx);
|
||||
if other_side_refers_to_self {
|
||||
break;
|
||||
}
|
||||
let lhs_mask = TableMask::from_table_number_iter(
|
||||
join_order
|
||||
.iter()
|
||||
.take(join_order.len() - 1)
|
||||
.map(|j| j.original_idx),
|
||||
);
|
||||
let all_required_tables_are_on_left_side = lhs_mask.contains_all(&constraint.lhs_mask);
|
||||
if !all_required_tables_are_on_left_side {
|
||||
break;
|
||||
}
|
||||
usable_until += 1;
|
||||
if Some(cref.index_col_pos) == usable.last().map(|x| x.index_col_pos) {
|
||||
assert_eq!(cref.sort_order, usable.last().unwrap().sort_order);
|
||||
assert_eq!(cref.index_col_pos, usable.last().unwrap().index_col_pos);
|
||||
assert_eq!(
|
||||
constraints[cref.constraint_vec_pos].table_col_pos,
|
||||
usable.last().unwrap().table_col_pos
|
||||
);
|
||||
// if we already have eq constraint - we must not add anything to it
|
||||
// otherwise, we can incorrectly consume filters which will not be used in the access path
|
||||
if usable.last().unwrap().eq.is_some() {
|
||||
continue;
|
||||
}
|
||||
match constraints[cref.constraint_vec_pos].operator {
|
||||
ast::Operator::Greater | ast::Operator::GreaterEquals => {
|
||||
usable.last_mut().unwrap().lower_bound = Some(cref.constraint_vec_pos);
|
||||
}
|
||||
ast::Operator::Less | ast::Operator::LessEquals => {
|
||||
usable.last_mut().unwrap().upper_bound = Some(cref.constraint_vec_pos);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if cref.index_col_pos != last_column_pos {
|
||||
break;
|
||||
}
|
||||
if usable.last().is_some_and(|x| x.eq.is_none()) {
|
||||
break;
|
||||
}
|
||||
let constraint_group = match constraints[cref.constraint_vec_pos].operator {
|
||||
ast::Operator::Equals => RangeConstraintRef {
|
||||
table_col_pos: constraints[cref.constraint_vec_pos].table_col_pos,
|
||||
index_col_pos: cref.index_col_pos,
|
||||
sort_order: cref.sort_order,
|
||||
eq: Some(cref.constraint_vec_pos),
|
||||
lower_bound: None,
|
||||
upper_bound: None,
|
||||
},
|
||||
ast::Operator::Greater | ast::Operator::GreaterEquals => RangeConstraintRef {
|
||||
table_col_pos: constraints[cref.constraint_vec_pos].table_col_pos,
|
||||
index_col_pos: cref.index_col_pos,
|
||||
sort_order: cref.sort_order,
|
||||
eq: None,
|
||||
lower_bound: Some(cref.constraint_vec_pos),
|
||||
upper_bound: None,
|
||||
},
|
||||
ast::Operator::Less | ast::Operator::LessEquals => RangeConstraintRef {
|
||||
table_col_pos: constraints[cref.constraint_vec_pos].table_col_pos,
|
||||
index_col_pos: cref.index_col_pos,
|
||||
sort_order: cref.sort_order,
|
||||
eq: None,
|
||||
lower_bound: None,
|
||||
upper_bound: Some(cref.constraint_vec_pos),
|
||||
},
|
||||
_ => continue,
|
||||
};
|
||||
usable.push(constraint_group);
|
||||
last_column_pos += 1;
|
||||
}
|
||||
&refs[..usable_until]
|
||||
usable
|
||||
}
|
||||
|
||||
fn can_use_partial_index(index: &Index, query_where_clause: &[WhereTerm]) -> bool {
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
use super::constraints::{Constraint, ConstraintRef};
|
||||
use crate::translate::optimizer::constraints::RangeConstraintRef;
|
||||
|
||||
use super::constraints::Constraint;
|
||||
|
||||
/// A simple newtype wrapper over a f64 that represents the cost of an operation.
|
||||
///
|
||||
@@ -43,7 +45,7 @@ pub fn estimate_page_io_cost(rowcount: f64) -> Cost {
|
||||
pub fn estimate_cost_for_scan_or_seek(
|
||||
index_info: Option<IndexInfo>,
|
||||
constraints: &[Constraint],
|
||||
usable_constraint_refs: &[ConstraintRef],
|
||||
usable_constraint_refs: &[RangeConstraintRef],
|
||||
input_cardinality: f64,
|
||||
) -> Cost {
|
||||
let Some(index_info) = index_info else {
|
||||
@@ -55,8 +57,18 @@ pub fn estimate_cost_for_scan_or_seek(
|
||||
let selectivity_multiplier: f64 = usable_constraint_refs
|
||||
.iter()
|
||||
.map(|cref| {
|
||||
let constraint = &constraints[cref.constraint_vec_pos];
|
||||
constraint.selectivity
|
||||
if let Some(eq) = cref.eq {
|
||||
let constraint = &constraints[eq];
|
||||
return constraint.selectivity;
|
||||
}
|
||||
let mut selectivity = 1.0;
|
||||
if let Some(lower_bound) = cref.lower_bound {
|
||||
selectivity *= constraints[lower_bound].selectivity;
|
||||
}
|
||||
if let Some(upper_bound) = cref.upper_bound {
|
||||
selectivity *= constraints[upper_bound].selectivity;
|
||||
}
|
||||
selectivity
|
||||
})
|
||||
.product();
|
||||
|
||||
|
||||
@@ -49,7 +49,7 @@ pub fn join_lhs_and_rhs<'a>(
|
||||
rhs_constraints: &'a TableConstraints,
|
||||
join_order: &[JoinOrderMember],
|
||||
maybe_order_target: Option<&OrderTarget>,
|
||||
access_methods_arena: &'a RefCell<Vec<AccessMethod<'a>>>,
|
||||
access_methods_arena: &'a RefCell<Vec<AccessMethod>>,
|
||||
cost_upper_bound: Cost,
|
||||
) -> Result<Option<JoinN>> {
|
||||
// The input cardinality for this join is the output cardinality of the previous join.
|
||||
@@ -125,7 +125,7 @@ pub fn compute_best_join_order<'a>(
|
||||
joined_tables: &[JoinedTable],
|
||||
maybe_order_target: Option<&OrderTarget>,
|
||||
constraints: &'a [TableConstraints],
|
||||
access_methods_arena: &'a RefCell<Vec<AccessMethod<'a>>>,
|
||||
access_methods_arena: &'a RefCell<Vec<AccessMethod>>,
|
||||
) -> Result<Option<BestJoinOrderResult>> {
|
||||
// Skip work if we have no tables to consider.
|
||||
if joined_tables.is_empty() {
|
||||
@@ -403,7 +403,7 @@ pub fn compute_best_join_order<'a>(
|
||||
pub fn compute_naive_left_deep_plan<'a>(
|
||||
joined_tables: &[JoinedTable],
|
||||
maybe_order_target: Option<&OrderTarget>,
|
||||
access_methods_arena: &'a RefCell<Vec<AccessMethod<'a>>>,
|
||||
access_methods_arena: &'a RefCell<Vec<AccessMethod>>,
|
||||
constraints: &'a [TableConstraints],
|
||||
) -> Result<Option<JoinN>> {
|
||||
let n = joined_tables.len();
|
||||
@@ -509,9 +509,9 @@ mod tests {
|
||||
use crate::{
|
||||
schema::{BTreeTable, Column, Index, IndexColumn, Table, Type},
|
||||
translate::{
|
||||
optimizer::access_method::AccessMethodParams,
|
||||
optimizer::constraints::{
|
||||
constraints_from_where_clause, BinaryExprSide, ConstraintRef,
|
||||
optimizer::{
|
||||
access_method::AccessMethodParams,
|
||||
constraints::{constraints_from_where_clause, BinaryExprSide, RangeConstraintRef},
|
||||
},
|
||||
plan::{
|
||||
ColumnUsedMask, IterationDirection, JoinInfo, Operation, TableReferences, WhereTerm,
|
||||
@@ -632,8 +632,7 @@ mod tests {
|
||||
assert!(iter_dir == IterationDirection::Forwards);
|
||||
assert!(constraint_refs.len() == 1);
|
||||
assert!(
|
||||
table_constraints[0].constraints[constraint_refs[0].constraint_vec_pos]
|
||||
.where_clause_pos
|
||||
table_constraints[0].constraints[constraint_refs[0].eq.unwrap()].where_clause_pos
|
||||
== (0, BinaryExprSide::Rhs)
|
||||
);
|
||||
}
|
||||
@@ -701,8 +700,7 @@ mod tests {
|
||||
assert!(index.as_ref().unwrap().name == "sqlite_autoindex_test_table_1");
|
||||
assert!(constraint_refs.len() == 1);
|
||||
assert!(
|
||||
table_constraints[0].constraints[constraint_refs[0].constraint_vec_pos]
|
||||
.where_clause_pos
|
||||
table_constraints[0].constraints[constraint_refs[0].eq.unwrap()].where_clause_pos
|
||||
== (0, BinaryExprSide::Rhs)
|
||||
);
|
||||
}
|
||||
@@ -784,8 +782,7 @@ mod tests {
|
||||
assert!(index.as_ref().unwrap().name == "index1");
|
||||
assert!(constraint_refs.len() == 1);
|
||||
assert!(
|
||||
table_constraints[TABLE1].constraints[constraint_refs[0].constraint_vec_pos]
|
||||
.where_clause_pos
|
||||
table_constraints[TABLE1].constraints[constraint_refs[0].eq.unwrap()].where_clause_pos
|
||||
== (0, BinaryExprSide::Rhs)
|
||||
);
|
||||
}
|
||||
@@ -960,8 +957,8 @@ mod tests {
|
||||
assert!(iter_dir == IterationDirection::Forwards);
|
||||
assert!(index.as_ref().unwrap().name == "sqlite_autoindex_customers_1");
|
||||
assert!(constraint_refs.len() == 1);
|
||||
let constraint = &table_constraints[TABLE_NO_CUSTOMERS].constraints
|
||||
[constraint_refs[0].constraint_vec_pos];
|
||||
let constraint =
|
||||
&table_constraints[TABLE_NO_CUSTOMERS].constraints[constraint_refs[0].eq.unwrap()];
|
||||
assert!(constraint.lhs_mask.is_empty());
|
||||
|
||||
let access_method = &access_methods_arena.borrow()[best_plan.data[1].1];
|
||||
@@ -970,7 +967,7 @@ mod tests {
|
||||
assert!(index.as_ref().unwrap().name == "orders_customer_id_idx");
|
||||
assert!(constraint_refs.len() == 1);
|
||||
let constraint =
|
||||
&table_constraints[TABLE_NO_ORDERS].constraints[constraint_refs[0].constraint_vec_pos];
|
||||
&table_constraints[TABLE_NO_ORDERS].constraints[constraint_refs[0].eq.unwrap()];
|
||||
assert!(constraint.lhs_mask.contains_table(TABLE_NO_CUSTOMERS));
|
||||
|
||||
let access_method = &access_methods_arena.borrow()[best_plan.data[2].1];
|
||||
@@ -978,8 +975,8 @@ mod tests {
|
||||
assert!(iter_dir == IterationDirection::Forwards);
|
||||
assert!(index.as_ref().unwrap().name == "order_items_order_id_idx");
|
||||
assert!(constraint_refs.len() == 1);
|
||||
let constraint = &table_constraints[TABLE_NO_ORDER_ITEMS].constraints
|
||||
[constraint_refs[0].constraint_vec_pos];
|
||||
let constraint =
|
||||
&table_constraints[TABLE_NO_ORDER_ITEMS].constraints[constraint_refs[0].eq.unwrap()];
|
||||
assert!(constraint.lhs_mask.contains_table(TABLE_NO_ORDERS));
|
||||
}
|
||||
|
||||
@@ -1187,8 +1184,8 @@ mod tests {
|
||||
assert!(iter_dir == IterationDirection::Forwards);
|
||||
assert!(index.is_none());
|
||||
assert!(constraint_refs.len() == 1);
|
||||
let constraint = &table_constraints[*table_number].constraints
|
||||
[constraint_refs[0].constraint_vec_pos];
|
||||
let constraint =
|
||||
&table_constraints[*table_number].constraints[constraint_refs[0].eq.unwrap()];
|
||||
assert!(constraint.lhs_mask.contains_table(FACT_TABLE_IDX));
|
||||
assert!(constraint.operator == ast::Operator::Equals);
|
||||
}
|
||||
@@ -1280,7 +1277,7 @@ mod tests {
|
||||
assert!(iter_dir == IterationDirection::Forwards);
|
||||
assert!(index.is_none());
|
||||
assert!(constraint_refs.len() == 1);
|
||||
let constraint = &table_constraints.constraints[constraint_refs[0].constraint_vec_pos];
|
||||
let constraint = &table_constraints.constraints[constraint_refs[0].eq.unwrap()];
|
||||
assert!(constraint.lhs_mask.contains_table(i - 1));
|
||||
assert!(constraint.operator == ast::Operator::Equals);
|
||||
}
|
||||
@@ -1481,7 +1478,7 @@ mod tests {
|
||||
let (_, index, constraint_refs) = _as_btree(access_method);
|
||||
assert!(index.as_ref().is_some_and(|i| i.name == "idx1"));
|
||||
assert!(constraint_refs.len() == 1);
|
||||
let constraint = &table_constraints[0].constraints[constraint_refs[0].constraint_vec_pos];
|
||||
let constraint = &table_constraints[0].constraints[constraint_refs[0].eq.unwrap()];
|
||||
assert!(constraint.operator == ast::Operator::Equals);
|
||||
assert!(constraint.table_col_pos == 0); // c1
|
||||
}
|
||||
@@ -1608,10 +1605,10 @@ mod tests {
|
||||
let (_, index, constraint_refs) = _as_btree(access_method);
|
||||
assert!(index.as_ref().is_some_and(|i| i.name == "idx1"));
|
||||
assert!(constraint_refs.len() == 2);
|
||||
let constraint = &table_constraints[0].constraints[constraint_refs[0].constraint_vec_pos];
|
||||
let constraint = &table_constraints[0].constraints[constraint_refs[0].eq.unwrap()];
|
||||
assert!(constraint.operator == ast::Operator::Equals);
|
||||
assert!(constraint.table_col_pos == 0); // c1
|
||||
let constraint = &table_constraints[0].constraints[constraint_refs[1].constraint_vec_pos];
|
||||
let constraint = &table_constraints[0].constraints[constraint_refs[1].lower_bound.unwrap()];
|
||||
assert!(constraint.operator == ast::Operator::Greater);
|
||||
assert!(constraint.table_col_pos == 1); // c2
|
||||
}
|
||||
@@ -1711,9 +1708,13 @@ mod tests {
|
||||
Expr::Literal(ast::Literal::Numeric(value.to_string()))
|
||||
}
|
||||
|
||||
fn _as_btree<'a>(
|
||||
access_method: &AccessMethod<'a>,
|
||||
) -> (IterationDirection, Option<Arc<Index>>, &'a [ConstraintRef]) {
|
||||
fn _as_btree(
|
||||
access_method: &AccessMethod,
|
||||
) -> (
|
||||
IterationDirection,
|
||||
Option<Arc<Index>>,
|
||||
&'_ [RangeConstraintRef],
|
||||
) {
|
||||
match &access_method.params {
|
||||
AccessMethodParams::BTreeTable {
|
||||
iter_dir,
|
||||
|
||||
@@ -18,8 +18,11 @@ use turso_parser::ast::{self, Expr, SortOrder};
|
||||
use crate::{
|
||||
schema::{Index, IndexColumn, Schema, Table},
|
||||
translate::{
|
||||
optimizer::access_method::AccessMethodParams, optimizer::constraints::TableConstraints,
|
||||
plan::Scan, plan::TerminationKey,
|
||||
optimizer::{
|
||||
access_method::AccessMethodParams,
|
||||
constraints::{RangeConstraintRef, SeekRangeConstraint, TableConstraints},
|
||||
},
|
||||
plan::{Scan, SeekKeyComponent},
|
||||
},
|
||||
types::SeekOp,
|
||||
LimboError, Result,
|
||||
@@ -343,13 +346,15 @@ fn optimize_table_access(
|
||||
.filter(|c| c.usable)
|
||||
.cloned()
|
||||
.collect::<Vec<_>>();
|
||||
let temp_constraint_refs = (0..usable_constraints.len())
|
||||
let mut temp_constraint_refs = (0..usable_constraints.len())
|
||||
.map(|i| ConstraintRef {
|
||||
constraint_vec_pos: i,
|
||||
index_col_pos: usable_constraints[i].table_col_pos,
|
||||
index_col_pos: i,
|
||||
sort_order: SortOrder::Asc,
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
temp_constraint_refs.sort_by_key(|x| x.index_col_pos);
|
||||
|
||||
let usable_constraint_refs = usable_constraints_for_join_order(
|
||||
&usable_constraints,
|
||||
&temp_constraint_refs,
|
||||
@@ -362,17 +367,14 @@ fn optimize_table_access(
|
||||
});
|
||||
continue;
|
||||
}
|
||||
let ephemeral_index = ephemeral_index_build(
|
||||
&joined_tables[table_idx],
|
||||
&usable_constraints,
|
||||
usable_constraint_refs,
|
||||
);
|
||||
let ephemeral_index =
|
||||
ephemeral_index_build(&joined_tables[table_idx], &usable_constraint_refs);
|
||||
let ephemeral_index = Arc::new(ephemeral_index);
|
||||
joined_tables[table_idx].op = Operation::Search(Search::Seek {
|
||||
index: Some(ephemeral_index),
|
||||
seek_def: build_seek_def_from_constraints(
|
||||
&usable_constraints,
|
||||
usable_constraint_refs,
|
||||
&table_constraints.constraints,
|
||||
&usable_constraint_refs,
|
||||
*iter_dir,
|
||||
where_clause,
|
||||
)?,
|
||||
@@ -383,25 +385,29 @@ fn optimize_table_access(
|
||||
.as_ref()
|
||||
.is_some_and(|join_info| join_info.outer);
|
||||
for cref in constraint_refs.iter() {
|
||||
let constraint =
|
||||
&constraints_per_table[table_idx].constraints[cref.constraint_vec_pos];
|
||||
let where_term = &mut where_clause[constraint.where_clause_pos.0];
|
||||
assert!(
|
||||
!where_term.consumed,
|
||||
"trying to consume a where clause term twice: {where_term:?}",
|
||||
);
|
||||
if is_outer_join && where_term.from_outer_join.is_none() {
|
||||
// Don't consume WHERE terms from outer joins if the where term is not part of the outer join condition. Consider:
|
||||
// - SELECT * FROM t1 LEFT JOIN t2 ON false WHERE t2.id = 5
|
||||
// - there is no row in t2 where t2.id = 5
|
||||
// This should never produce any rows with null columns for t2 (because NULL != 5), but if we consume 't2.id = 5' to use it as a seek key,
|
||||
// this will cause a null row to be emitted for EVERY row of t1.
|
||||
// Note: in most cases like this, the LEFT JOIN could just be converted into an INNER JOIN (because e.g. t2.id=5 statically excludes any null rows),
|
||||
// but that optimization should not be done here - it should be done before the join order optimization happens.
|
||||
continue;
|
||||
for constraint_vec_pos in &[cref.eq, cref.lower_bound, cref.upper_bound] {
|
||||
let Some(constraint_vec_pos) = constraint_vec_pos else {
|
||||
continue;
|
||||
};
|
||||
let constraint =
|
||||
&constraints_per_table[table_idx].constraints[*constraint_vec_pos];
|
||||
let where_term = &mut where_clause[constraint.where_clause_pos.0];
|
||||
assert!(
|
||||
!where_term.consumed,
|
||||
"trying to consume a where clause term twice: {where_term:?}",
|
||||
);
|
||||
if is_outer_join && where_term.from_outer_join.is_none() {
|
||||
// Don't consume WHERE terms from outer joins if the where term is not part of the outer join condition. Consider:
|
||||
// - SELECT * FROM t1 LEFT JOIN t2 ON false WHERE t2.id = 5
|
||||
// - there is no row in t2 where t2.id = 5
|
||||
// This should never produce any rows with null columns for t2 (because NULL != 5), but if we consume 't2.id = 5' to use it as a seek key,
|
||||
// this will cause a null row to be emitted for EVERY row of t1.
|
||||
// Note: in most cases like this, the LEFT JOIN could just be converted into an INNER JOIN (because e.g. t2.id=5 statically excludes any null rows),
|
||||
// but that optimization should not be done here - it should be done before the join order optimization happens.
|
||||
continue;
|
||||
}
|
||||
where_term.consumed = true;
|
||||
}
|
||||
|
||||
where_clause[constraint.where_clause_pos.0].consumed = true;
|
||||
}
|
||||
if let Some(index) = &index {
|
||||
joined_tables[table_idx].op = Operation::Search(Search::Seek {
|
||||
@@ -419,13 +425,14 @@ fn optimize_table_access(
|
||||
constraint_refs.len() == 1,
|
||||
"expected exactly one constraint for rowid seek, got {constraint_refs:?}"
|
||||
);
|
||||
let constraint = &constraints_per_table[table_idx].constraints
|
||||
[constraint_refs[0].constraint_vec_pos];
|
||||
joined_tables[table_idx].op = match constraint.operator {
|
||||
ast::Operator::Equals => Operation::Search(Search::RowidEq {
|
||||
cmp_expr: constraint.get_constraining_expr(where_clause),
|
||||
}),
|
||||
_ => Operation::Search(Search::Seek {
|
||||
joined_tables[table_idx].op = if let Some(eq) = constraint_refs[0].eq {
|
||||
Operation::Search(Search::RowidEq {
|
||||
cmp_expr: constraints_per_table[table_idx].constraints[eq]
|
||||
.get_constraining_expr(where_clause)
|
||||
.1,
|
||||
})
|
||||
} else {
|
||||
Operation::Search(Search::Seek {
|
||||
index: None,
|
||||
seek_def: build_seek_def_from_constraints(
|
||||
&constraints_per_table[table_idx].constraints,
|
||||
@@ -433,7 +440,7 @@ fn optimize_table_access(
|
||||
*iter_dir,
|
||||
where_clause,
|
||||
)?,
|
||||
}),
|
||||
})
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -505,7 +512,7 @@ fn build_vtab_scan_op(
|
||||
if usage.omit {
|
||||
where_clause[constraint.where_clause_pos.0].consumed = true;
|
||||
}
|
||||
let expr = constraint.get_constraining_expr(where_clause);
|
||||
let (_, expr) = constraint.get_constraining_expr(where_clause);
|
||||
constraints[zero_based_argv_index] = Some(expr);
|
||||
arg_count += 1;
|
||||
}
|
||||
@@ -864,8 +871,7 @@ impl Optimizable for ast::Expr {
|
||||
|
||||
fn ephemeral_index_build(
|
||||
table_reference: &JoinedTable,
|
||||
constraints: &[Constraint],
|
||||
constraint_refs: &[ConstraintRef],
|
||||
constraint_refs: &[RangeConstraintRef],
|
||||
) -> Index {
|
||||
let mut ephemeral_columns: Vec<IndexColumn> = table_reference
|
||||
.columns()
|
||||
@@ -886,11 +892,11 @@ fn ephemeral_index_build(
|
||||
let a_constraint = constraint_refs
|
||||
.iter()
|
||||
.enumerate()
|
||||
.find(|(_, c)| constraints[c.constraint_vec_pos].table_col_pos == a.pos_in_table);
|
||||
.find(|(_, c)| c.table_col_pos == a.pos_in_table);
|
||||
let b_constraint = constraint_refs
|
||||
.iter()
|
||||
.enumerate()
|
||||
.find(|(_, c)| constraints[c.constraint_vec_pos].table_col_pos == b.pos_in_table);
|
||||
.find(|(_, c)| c.table_col_pos == b.pos_in_table);
|
||||
match (a_constraint, b_constraint) {
|
||||
(Some(_), None) => Ordering::Less,
|
||||
(None, Some(_)) => Ordering::Greater,
|
||||
@@ -922,7 +928,7 @@ fn ephemeral_index_build(
|
||||
/// Build a [SeekDef] for a given list of [Constraint]s
|
||||
pub fn build_seek_def_from_constraints(
|
||||
constraints: &[Constraint],
|
||||
constraint_refs: &[ConstraintRef],
|
||||
constraint_refs: &[RangeConstraintRef],
|
||||
iter_dir: IterationDirection,
|
||||
where_clause: &[WhereTerm],
|
||||
) -> Result<SeekDef> {
|
||||
@@ -933,472 +939,294 @@ pub fn build_seek_def_from_constraints(
|
||||
// Extract the key values and operators
|
||||
let key = constraint_refs
|
||||
.iter()
|
||||
.map(|cref| cref.as_seek_key_column(constraints, where_clause))
|
||||
.map(|cref| cref.as_seek_range_constraint(constraints, where_clause))
|
||||
.collect();
|
||||
|
||||
// We know all but potentially the last term is an equality, so we can use the operator of the last term
|
||||
// to form the SeekOp
|
||||
let op = constraints[constraint_refs.last().unwrap().constraint_vec_pos].operator;
|
||||
|
||||
let seek_def = build_seek_def(op, iter_dir, key)?;
|
||||
let seek_def = build_seek_def(iter_dir, key)?;
|
||||
Ok(seek_def)
|
||||
}
|
||||
|
||||
/// Build a [SeekDef] for a given comparison operator and index key.
|
||||
/// Build a [SeekDef] for a given [SeekRangeConstraint] and [IterationDirection].
|
||||
/// To be usable as a seek key, all but potentially the last term must be equalities.
|
||||
/// The last term can be a nonequality.
|
||||
/// The comparison operator referred to by `op` is the operator of the last term.
|
||||
/// The last term can be a nonequality (range with potentially one unbounded range).
|
||||
///
|
||||
/// There are two parts to the seek definition:
|
||||
/// 1. The [SeekKey], which specifies the key that we will use to seek to the first row that matches the index key.
|
||||
/// 2. The [TerminationKey], which specifies the key that we will use to terminate the index scan that follows the seek.
|
||||
/// 1. start [SeekKey], which specifies the key that we will use to seek to the first row that matches the index key.
|
||||
/// 2. end [SeekKey], which specifies the key that we will use to terminate the index scan that follows the seek.
|
||||
///
|
||||
/// There are some nuances to how, and which parts of, the index key can be used in the [SeekKey] and [TerminationKey],
|
||||
/// There are some nuances to how, and which parts of, the index key can be used in the start and end [SeekKey]s,
|
||||
/// depending on the operator and iteration order. This function explains those nuances inline when dealing with
|
||||
/// each case.
|
||||
///
|
||||
/// But to illustrate the general idea, consider the following examples:
|
||||
///
|
||||
/// 1. For example, having two conditions like (x>10 AND y>20) cannot be used as a valid [SeekKey] GT(x:10, y:20)
|
||||
/// because the first row greater than (x:10, y:20) might be (x:10, y:21), which does not satisfy the where clause.
|
||||
/// because the first row greater than (x:10, y:20) might be (x:11, y:19), which does not satisfy the where clause.
|
||||
/// In this case, only GT(x:10) must be used as the [SeekKey], and rows with y <= 20 must be filtered as a regular condition expression for each value of x.
|
||||
///
|
||||
/// 2. In contrast, having (x=10 AND y>20) forms a valid index key GT(x:10, y:20) because after the seek, we can simply terminate as soon as x > 10,
|
||||
/// i.e. use GT(x:10, y:20) as the [SeekKey] and GT(x:10) as the [TerminationKey].
|
||||
/// i.e. use GT(x:10, y:20) as the start [SeekKey] and GT(x:10) as the end.
|
||||
///
|
||||
/// The preceding examples are for an ascending index. The logic is similar for descending indexes, but an important distinction is that
|
||||
/// since a descending index is laid out in reverse order, the comparison operators are reversed, e.g. LT becomes GT, LE becomes GE, etc.
|
||||
/// So when you see e.g. a SeekOp::GT below for a descending index, it actually means that we are seeking the first row where the index key is LESS than the seek key.
|
||||
///
|
||||
fn build_seek_def(
|
||||
op: ast::Operator,
|
||||
iter_dir: IterationDirection,
|
||||
key: Vec<(ast::Expr, SortOrder)>,
|
||||
mut key: Vec<SeekRangeConstraint>,
|
||||
) -> Result<SeekDef> {
|
||||
let key_len = key.len();
|
||||
let sort_order_of_last_key = key.last().unwrap().1;
|
||||
assert!(!key.is_empty());
|
||||
let last = key.last().unwrap();
|
||||
|
||||
// if we searching for exact key - emit definition immediately with prefix as a full key
|
||||
if last.eq.is_some() {
|
||||
let (start_op, end_op) = match iter_dir {
|
||||
IterationDirection::Forwards => (SeekOp::GE { eq_only: true }, SeekOp::GT),
|
||||
IterationDirection::Backwards => (SeekOp::LE { eq_only: true }, SeekOp::LT),
|
||||
};
|
||||
return Ok(SeekDef {
|
||||
prefix: key,
|
||||
iter_dir,
|
||||
start: SeekKey {
|
||||
last_component: SeekKeyComponent::None,
|
||||
op: start_op,
|
||||
},
|
||||
end: SeekKey {
|
||||
last_component: SeekKeyComponent::None,
|
||||
op: end_op,
|
||||
},
|
||||
});
|
||||
}
|
||||
assert!(last.lower_bound.is_some() || last.upper_bound.is_some());
|
||||
|
||||
// pop last key as we will do some form of range search
|
||||
let last = key.pop().unwrap();
|
||||
|
||||
// after that all key components must be equality constraints
|
||||
debug_assert!(key.iter().all(|k| k.eq.is_some()));
|
||||
|
||||
// For the commented examples below, keep in mind that since a descending index is laid out in reverse order, the comparison operators are reversed, e.g. LT becomes GT, LE becomes GE, etc.
|
||||
// Also keep in mind that index keys are compared based on the number of columns given, so for example:
|
||||
// - if key is GT(x:10), then (x=10, y=usize::MAX) is not GT because only X is compared. (x=11, y=<any>) is GT.
|
||||
// - if key is GT(x:10, y:20), then (x=10, y=21) is GT because both X and Y are compared.
|
||||
// - if key is GT(x:10, y:NULL), then (x=10, y=0) is GT because NULL is always LT in index key comparisons.
|
||||
Ok(match (iter_dir, op) {
|
||||
// Forwards, EQ:
|
||||
// Example: (x=10 AND y=20)
|
||||
// Seek key: start from the first GE(x:10, y:20)
|
||||
// Termination key: end at the first GT(x:10, y:20)
|
||||
// Ascending vs descending doesn't matter because all the comparisons are equalities.
|
||||
(IterationDirection::Forwards, ast::Operator::Equals) => SeekDef {
|
||||
key,
|
||||
iter_dir,
|
||||
seek: Some(SeekKey {
|
||||
len: key_len,
|
||||
null_pad: false,
|
||||
op: SeekOp::GE { eq_only: true },
|
||||
}),
|
||||
termination: Some(TerminationKey {
|
||||
len: key_len,
|
||||
null_pad: false,
|
||||
op: SeekOp::GT,
|
||||
}),
|
||||
},
|
||||
// Forwards, GT:
|
||||
// Ascending index example: (x=10 AND y>20)
|
||||
// Seek key: start from the first GT(x:10, y:20), e.g. (x=10, y=21)
|
||||
// Termination key: end at the first GT(x:10), e.g. (x=11, y=0)
|
||||
//
|
||||
// Descending index example: (x=10 AND y>20)
|
||||
// Seek key: start from the first LE(x:10), e.g. (x=10, y=usize::MAX), so reversed -> GE(x:10)
|
||||
// Termination key: end at the first LE(x:10, y:20), e.g. (x=10, y=20) so reversed -> GE(x:10, y:20)
|
||||
(IterationDirection::Forwards, ast::Operator::Greater) => {
|
||||
let (seek_key_len, termination_key_len, seek_op, termination_op) =
|
||||
if sort_order_of_last_key == SortOrder::Asc {
|
||||
(key_len, key_len - 1, SeekOp::GT, SeekOp::GT)
|
||||
} else {
|
||||
(
|
||||
key_len - 1,
|
||||
key_len,
|
||||
SeekOp::LE { eq_only: false }.reverse(),
|
||||
SeekOp::LE { eq_only: false }.reverse(),
|
||||
)
|
||||
};
|
||||
Ok(match iter_dir {
|
||||
IterationDirection::Forwards => {
|
||||
let (start, end) = match last.sort_order {
|
||||
SortOrder::Asc => {
|
||||
let start = match last.lower_bound {
|
||||
// Forwards, Asc, GT: (x=10 AND y>20)
|
||||
// Start key: start from the first GT(x:10, y:20)
|
||||
Some((ast::Operator::Greater, bound)) => SeekKey {
|
||||
last_component: SeekKeyComponent::Expr(bound),
|
||||
op: SeekOp::GT,
|
||||
},
|
||||
// Forwards, Asc, GE: (x=10 AND y>=20)
|
||||
// Start key: start from the first GE(x:10, y:20)
|
||||
Some((ast::Operator::GreaterEquals, bound)) => SeekKey {
|
||||
last_component: SeekKeyComponent::Expr(bound),
|
||||
op: SeekOp::GE { eq_only: false },
|
||||
},
|
||||
// Forwards, Asc, None, (x=10 AND y<30)
|
||||
// Start key: start from the first GE(x:10)
|
||||
None => SeekKey {
|
||||
last_component: SeekKeyComponent::None,
|
||||
op: SeekOp::GE { eq_only: false },
|
||||
},
|
||||
Some((op, _)) => {
|
||||
crate::bail_parse_error!("build_seek_def: invalid operator: {:?}", op,)
|
||||
}
|
||||
};
|
||||
let end = match last.upper_bound {
|
||||
// Forwards, Asc, LT, (x=10 AND y<30)
|
||||
// End key: end at first GE(x:10, y:30)
|
||||
Some((ast::Operator::Less, bound)) => SeekKey {
|
||||
last_component: SeekKeyComponent::Expr(bound),
|
||||
op: SeekOp::GE { eq_only: false },
|
||||
},
|
||||
// Forwards, Asc, LE, (x=10 AND y<=30)
|
||||
// End key: end at first GT(x:10, y:30)
|
||||
Some((ast::Operator::LessEquals, bound)) => SeekKey {
|
||||
last_component: SeekKeyComponent::Expr(bound),
|
||||
op: SeekOp::GT,
|
||||
},
|
||||
// Forwards, Asc, None, (x=10 AND y>20)
|
||||
// End key: end at first GT(x:10)
|
||||
None => SeekKey {
|
||||
last_component: SeekKeyComponent::None,
|
||||
op: SeekOp::GT,
|
||||
},
|
||||
Some((op, _)) => {
|
||||
crate::bail_parse_error!("build_seek_def: invalid operator: {:?}", op,)
|
||||
}
|
||||
};
|
||||
(start, end)
|
||||
}
|
||||
SortOrder::Desc => {
|
||||
let start = match last.upper_bound {
|
||||
// Forwards, Desc, LT: (x=10 AND y<30)
|
||||
// Start key: start from the first GT(x:10, y:30)
|
||||
Some((ast::Operator::Less, bound)) => SeekKey {
|
||||
last_component: SeekKeyComponent::Expr(bound),
|
||||
op: SeekOp::GT,
|
||||
},
|
||||
// Forwards, Desc, LE: (x=10 AND y<=30)
|
||||
// Start key: start from the first GE(x:10, y:30)
|
||||
Some((ast::Operator::LessEquals, bound)) => SeekKey {
|
||||
last_component: SeekKeyComponent::Expr(bound),
|
||||
op: SeekOp::GE { eq_only: false },
|
||||
},
|
||||
// Forwards, Desc, None: (x=10 AND y>20)
|
||||
// Start key: start from the first GE(x:10)
|
||||
None => SeekKey {
|
||||
last_component: SeekKeyComponent::None,
|
||||
op: SeekOp::GE { eq_only: false },
|
||||
},
|
||||
Some((op, _)) => {
|
||||
crate::bail_parse_error!("build_seek_def: invalid operator: {:?}", op,)
|
||||
}
|
||||
};
|
||||
let end = match last.lower_bound {
|
||||
// Forwards, Asc, GT, (x=10 AND y>20)
|
||||
// End key: end at first GE(x:10, y:20)
|
||||
Some((ast::Operator::Greater, bound)) => SeekKey {
|
||||
last_component: SeekKeyComponent::Expr(bound),
|
||||
op: SeekOp::GE { eq_only: false },
|
||||
},
|
||||
// Forwards, Asc, GE, (x=10 AND y>=20)
|
||||
// End key: end at first GT(x:10, y:20)
|
||||
Some((ast::Operator::GreaterEquals, bound)) => SeekKey {
|
||||
last_component: SeekKeyComponent::Expr(bound),
|
||||
op: SeekOp::GT,
|
||||
},
|
||||
// Forwards, Asc, None, (x=10 AND y<30)
|
||||
// End key: end at first GT(x:10)
|
||||
None => SeekKey {
|
||||
last_component: SeekKeyComponent::None,
|
||||
op: SeekOp::GT,
|
||||
},
|
||||
Some((op, _)) => {
|
||||
crate::bail_parse_error!("build_seek_def: invalid operator: {:?}", op,)
|
||||
}
|
||||
};
|
||||
(start, end)
|
||||
}
|
||||
};
|
||||
SeekDef {
|
||||
key,
|
||||
prefix: key,
|
||||
iter_dir,
|
||||
seek: if seek_key_len > 0 {
|
||||
Some(SeekKey {
|
||||
len: seek_key_len,
|
||||
op: seek_op,
|
||||
null_pad: false,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
},
|
||||
termination: if termination_key_len > 0 {
|
||||
Some(TerminationKey {
|
||||
len: termination_key_len,
|
||||
op: termination_op,
|
||||
null_pad: false,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
},
|
||||
start,
|
||||
end,
|
||||
}
|
||||
}
|
||||
// Forwards, GE:
|
||||
// Ascending index example: (x=10 AND y>=20)
|
||||
// Seek key: start from the first GE(x:10, y:20), e.g. (x=10, y=20)
|
||||
// Termination key: end at the first GT(x:10), e.g. (x=11, y=0)
|
||||
//
|
||||
// Descending index example: (x=10 AND y>=20)
|
||||
// Seek key: start from the first LE(x:10), e.g. (x=10, y=usize::MAX), so reversed -> GE(x:10)
|
||||
// Termination key: end at the first LT(x:10, y:20), e.g. (x=10, y=19), so reversed -> GT(x:10, y:20)
|
||||
(IterationDirection::Forwards, ast::Operator::GreaterEquals) => {
|
||||
let (seek_key_len, termination_key_len, seek_op, termination_op) =
|
||||
if sort_order_of_last_key == SortOrder::Asc {
|
||||
(
|
||||
key_len,
|
||||
key_len - 1,
|
||||
SeekOp::GE { eq_only: false },
|
||||
SeekOp::GT,
|
||||
)
|
||||
} else {
|
||||
(
|
||||
key_len - 1,
|
||||
key_len,
|
||||
SeekOp::LE { eq_only: false }.reverse(),
|
||||
SeekOp::LT.reverse(),
|
||||
)
|
||||
};
|
||||
IterationDirection::Backwards => {
|
||||
let (start, end) = match last.sort_order {
|
||||
SortOrder::Asc => {
|
||||
let start = match last.upper_bound {
|
||||
// Backwards, Asc, LT: (x=10 AND y<30)
|
||||
// Start key: start from the first LT(x:10, y:30)
|
||||
Some((ast::Operator::Less, bound)) => SeekKey {
|
||||
last_component: SeekKeyComponent::Expr(bound),
|
||||
op: SeekOp::LT,
|
||||
},
|
||||
// Backwards, Asc, LT: (x=10 AND y<=30)
|
||||
// Start key: start from the first LE(x:10, y:30)
|
||||
Some((ast::Operator::LessEquals, bound)) => SeekKey {
|
||||
last_component: SeekKeyComponent::Expr(bound),
|
||||
op: SeekOp::LE { eq_only: false },
|
||||
},
|
||||
// Backwards, Asc, None: (x=10 AND y>20)
|
||||
// Start key: start from the first LE(x:10)
|
||||
None => SeekKey {
|
||||
last_component: SeekKeyComponent::None,
|
||||
op: SeekOp::LE { eq_only: false },
|
||||
},
|
||||
Some((op, _)) => {
|
||||
crate::bail_parse_error!("build_seek_def: invalid operator: {:?}", op)
|
||||
}
|
||||
};
|
||||
let end = match last.lower_bound {
|
||||
// Backwards, Asc, GT, (x=10 AND y>20)
|
||||
// End key: end at first LE(x:10, y:20)
|
||||
Some((ast::Operator::Greater, bound)) => SeekKey {
|
||||
last_component: SeekKeyComponent::Expr(bound),
|
||||
op: SeekOp::LE { eq_only: false },
|
||||
},
|
||||
// Backwards, Asc, GT, (x=10 AND y>=20)
|
||||
// End key: end at first LT(x:10, y:20)
|
||||
Some((ast::Operator::GreaterEquals, bound)) => SeekKey {
|
||||
last_component: SeekKeyComponent::Expr(bound),
|
||||
op: SeekOp::LT,
|
||||
},
|
||||
// Backwards, Asc, None, (x=10 AND y<30)
|
||||
// End key: end at first LT(x:10)
|
||||
None => SeekKey {
|
||||
last_component: SeekKeyComponent::None,
|
||||
op: SeekOp::LT,
|
||||
},
|
||||
Some((op, _)) => {
|
||||
crate::bail_parse_error!("build_seek_def: invalid operator: {:?}", op,)
|
||||
}
|
||||
};
|
||||
(start, end)
|
||||
}
|
||||
SortOrder::Desc => {
|
||||
let start = match last.lower_bound {
|
||||
// Backwards, Desc, LT: (x=10 AND y>20)
|
||||
// Start key: start from the first LT(x:10, y:20)
|
||||
Some((ast::Operator::Greater, bound)) => SeekKey {
|
||||
last_component: SeekKeyComponent::Expr(bound),
|
||||
op: SeekOp::LT,
|
||||
},
|
||||
// Backwards, Desc, LE: (x=10 AND y>=20)
|
||||
// Start key: start from the first LE(x:10, y:20)
|
||||
Some((ast::Operator::GreaterEquals, bound)) => SeekKey {
|
||||
last_component: SeekKeyComponent::Expr(bound),
|
||||
op: SeekOp::LE { eq_only: false },
|
||||
},
|
||||
// Backwards, Desc, LE: (x=10 AND y<30)
|
||||
// Start key: start from the first LE(x:10)
|
||||
None => SeekKey {
|
||||
last_component: SeekKeyComponent::None,
|
||||
op: SeekOp::LE { eq_only: false },
|
||||
},
|
||||
Some((op, _)) => {
|
||||
crate::bail_parse_error!("build_seek_def: invalid operator: {:?}", op,)
|
||||
}
|
||||
};
|
||||
let end = match last.upper_bound {
|
||||
// Backwards, Desc, LT, (x=10 AND y<30)
|
||||
// End key: end at first LE(x:10, y:30)
|
||||
Some((ast::Operator::Less, bound)) => SeekKey {
|
||||
last_component: SeekKeyComponent::Expr(bound),
|
||||
op: SeekOp::LE { eq_only: false },
|
||||
},
|
||||
// Backwards, Desc, LT, (x=10 AND y<=30)
|
||||
// End key: end at first LT(x:10, y:30)
|
||||
Some((ast::Operator::LessEquals, bound)) => SeekKey {
|
||||
last_component: SeekKeyComponent::Expr(bound),
|
||||
op: SeekOp::LT,
|
||||
},
|
||||
// Backwards, Desc, LT, (x=10 AND y>20)
|
||||
// End key: end at first LT(x:10)
|
||||
None => SeekKey {
|
||||
last_component: SeekKeyComponent::None,
|
||||
op: SeekOp::LT,
|
||||
},
|
||||
Some((op, _)) => {
|
||||
crate::bail_parse_error!("build_seek_def: invalid operator: {:?}", op,)
|
||||
}
|
||||
};
|
||||
(start, end)
|
||||
}
|
||||
};
|
||||
SeekDef {
|
||||
key,
|
||||
prefix: key,
|
||||
iter_dir,
|
||||
seek: if seek_key_len > 0 {
|
||||
Some(SeekKey {
|
||||
len: seek_key_len,
|
||||
op: seek_op,
|
||||
null_pad: false,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
},
|
||||
termination: if termination_key_len > 0 {
|
||||
Some(TerminationKey {
|
||||
len: termination_key_len,
|
||||
op: termination_op,
|
||||
null_pad: false,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
},
|
||||
start,
|
||||
end,
|
||||
}
|
||||
}
|
||||
// Forwards, LT:
|
||||
// Ascending index example: (x=10 AND y<20)
|
||||
// Seek key: start from the first GT(x:10, y: NULL), e.g. (x=10, y=0)
|
||||
// Termination key: end at the first GE(x:10, y:20), e.g. (x=10, y=20)
|
||||
//
|
||||
// Descending index example: (x=10 AND y<20)
|
||||
// Seek key: start from the first LT(x:10, y:20), e.g. (x=10, y=19) so reversed -> GT(x:10, y:20)
|
||||
// Termination key: end at the first LT(x:10), e.g. (x=9, y=usize::MAX), so reversed -> GE(x:10, NULL); i.e. GE the smallest possible (x=10, y) combination (NULL is always LT)
|
||||
(IterationDirection::Forwards, ast::Operator::Less) => {
|
||||
let (seek_key_len, termination_key_len, seek_op, termination_op) =
|
||||
if sort_order_of_last_key == SortOrder::Asc {
|
||||
(
|
||||
key_len - 1,
|
||||
key_len,
|
||||
SeekOp::GT,
|
||||
SeekOp::GE { eq_only: false },
|
||||
)
|
||||
} else {
|
||||
(
|
||||
key_len,
|
||||
key_len - 1,
|
||||
SeekOp::GT,
|
||||
SeekOp::GE { eq_only: false },
|
||||
)
|
||||
};
|
||||
SeekDef {
|
||||
key,
|
||||
iter_dir,
|
||||
seek: if seek_key_len > 0 {
|
||||
Some(SeekKey {
|
||||
len: seek_key_len,
|
||||
op: seek_op,
|
||||
null_pad: sort_order_of_last_key == SortOrder::Asc,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
},
|
||||
termination: if termination_key_len > 0 {
|
||||
Some(TerminationKey {
|
||||
len: termination_key_len,
|
||||
op: termination_op,
|
||||
null_pad: sort_order_of_last_key == SortOrder::Desc,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
},
|
||||
}
|
||||
}
|
||||
// Forwards, LE:
|
||||
// Ascending index example: (x=10 AND y<=20)
|
||||
// Seek key: start from the first GE(x:10, y:NULL), e.g. (x=10, y=0)
|
||||
// Termination key: end at the first GT(x:10, y:20), e.g. (x=10, y=21)
|
||||
//
|
||||
// Descending index example: (x=10 AND y<=20)
|
||||
// Seek key: start from the first LE(x:10, y:20), e.g. (x=10, y=20) so reversed -> GE(x:10, y:20)
|
||||
// Termination key: end at the first LT(x:10), e.g. (x=9, y=usize::MAX), so reversed -> GE(x:10, NULL); i.e. GE the smallest possible (x=10, y) combination (NULL is always LT)
|
||||
(IterationDirection::Forwards, ast::Operator::LessEquals) => {
|
||||
let (seek_key_len, termination_key_len, seek_op, termination_op) =
|
||||
if sort_order_of_last_key == SortOrder::Asc {
|
||||
(key_len - 1, key_len, SeekOp::GT, SeekOp::GT)
|
||||
} else {
|
||||
(
|
||||
key_len,
|
||||
key_len - 1,
|
||||
SeekOp::LE { eq_only: false }.reverse(),
|
||||
SeekOp::LE { eq_only: false }.reverse(),
|
||||
)
|
||||
};
|
||||
SeekDef {
|
||||
key,
|
||||
iter_dir,
|
||||
seek: if seek_key_len > 0 {
|
||||
Some(SeekKey {
|
||||
len: seek_key_len,
|
||||
op: seek_op,
|
||||
null_pad: sort_order_of_last_key == SortOrder::Asc,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
},
|
||||
termination: if termination_key_len > 0 {
|
||||
Some(TerminationKey {
|
||||
len: termination_key_len,
|
||||
op: termination_op,
|
||||
null_pad: sort_order_of_last_key == SortOrder::Desc,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
},
|
||||
}
|
||||
}
|
||||
// Backwards, EQ:
|
||||
// Example: (x=10 AND y=20)
|
||||
// Seek key: start from the last LE(x:10, y:20)
|
||||
// Termination key: end at the first LT(x:10, y:20)
|
||||
// Ascending vs descending doesn't matter because all the comparisons are equalities.
|
||||
(IterationDirection::Backwards, ast::Operator::Equals) => SeekDef {
|
||||
key,
|
||||
iter_dir,
|
||||
seek: Some(SeekKey {
|
||||
len: key_len,
|
||||
op: SeekOp::LE { eq_only: true },
|
||||
null_pad: false,
|
||||
}),
|
||||
termination: Some(TerminationKey {
|
||||
len: key_len,
|
||||
op: SeekOp::LT,
|
||||
null_pad: false,
|
||||
}),
|
||||
},
|
||||
// Backwards, LT:
|
||||
// Ascending index example: (x=10 AND y<20)
|
||||
// Seek key: start from the last LT(x:10, y:20), e.g. (x=10, y=19)
|
||||
// Termination key: end at the first LE(x:10, NULL), e.g. (x=9, y=usize::MAX)
|
||||
//
|
||||
// Descending index example: (x=10 AND y<20)
|
||||
// Seek key: start from the last GT(x:10, y:NULL), e.g. (x=10, y=0) so reversed -> LT(x:10, NULL)
|
||||
// Termination key: end at the first GE(x:10, y:20), e.g. (x=10, y=20) so reversed -> LE(x:10, y:20)
|
||||
(IterationDirection::Backwards, ast::Operator::Less) => {
|
||||
let (seek_key_len, termination_key_len, seek_op, termination_op) =
|
||||
if sort_order_of_last_key == SortOrder::Asc {
|
||||
(
|
||||
key_len,
|
||||
key_len - 1,
|
||||
SeekOp::LT,
|
||||
SeekOp::LE { eq_only: false },
|
||||
)
|
||||
} else {
|
||||
(
|
||||
key_len - 1,
|
||||
key_len,
|
||||
SeekOp::GT.reverse(),
|
||||
SeekOp::GE { eq_only: false }.reverse(),
|
||||
)
|
||||
};
|
||||
SeekDef {
|
||||
key,
|
||||
iter_dir,
|
||||
seek: if seek_key_len > 0 {
|
||||
Some(SeekKey {
|
||||
len: seek_key_len,
|
||||
op: seek_op,
|
||||
null_pad: sort_order_of_last_key == SortOrder::Desc,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
},
|
||||
termination: if termination_key_len > 0 {
|
||||
Some(TerminationKey {
|
||||
len: termination_key_len,
|
||||
op: termination_op,
|
||||
null_pad: sort_order_of_last_key == SortOrder::Asc,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
},
|
||||
}
|
||||
}
|
||||
// Backwards, LE:
|
||||
// Ascending index example: (x=10 AND y<=20)
|
||||
// Seek key: start from the last LE(x:10, y:20), e.g. (x=10, y=20)
|
||||
// Termination key: end at the first LT(x:10, NULL), e.g. (x=9, y=usize::MAX)
|
||||
//
|
||||
// Descending index example: (x=10 AND y<=20)
|
||||
// Seek key: start from the last GT(x:10, NULL), e.g. (x=10, y=0) so reversed -> LT(x:10, NULL)
|
||||
// Termination key: end at the first GT(x:10, y:20), e.g. (x=10, y=21) so reversed -> LT(x:10, y:20)
|
||||
(IterationDirection::Backwards, ast::Operator::LessEquals) => {
|
||||
let (seek_key_len, termination_key_len, seek_op, termination_op) =
|
||||
if sort_order_of_last_key == SortOrder::Asc {
|
||||
(
|
||||
key_len,
|
||||
key_len - 1,
|
||||
SeekOp::LE { eq_only: false },
|
||||
SeekOp::LE { eq_only: false },
|
||||
)
|
||||
} else {
|
||||
(
|
||||
key_len - 1,
|
||||
key_len,
|
||||
SeekOp::GT.reverse(),
|
||||
SeekOp::GT.reverse(),
|
||||
)
|
||||
};
|
||||
SeekDef {
|
||||
key,
|
||||
iter_dir,
|
||||
seek: if seek_key_len > 0 {
|
||||
Some(SeekKey {
|
||||
len: seek_key_len,
|
||||
op: seek_op,
|
||||
null_pad: sort_order_of_last_key == SortOrder::Desc,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
},
|
||||
termination: if termination_key_len > 0 {
|
||||
Some(TerminationKey {
|
||||
len: termination_key_len,
|
||||
op: termination_op,
|
||||
null_pad: sort_order_of_last_key == SortOrder::Asc,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
},
|
||||
}
|
||||
}
|
||||
// Backwards, GT:
|
||||
// Ascending index example: (x=10 AND y>20)
|
||||
// Seek key: start from the last LE(x:10), e.g. (x=10, y=usize::MAX)
|
||||
// Termination key: end at the first LE(x:10, y:20), e.g. (x=10, y=20)
|
||||
//
|
||||
// Descending index example: (x=10 AND y>20)
|
||||
// Seek key: start from the last GT(x:10, y:20), e.g. (x=10, y=21) so reversed -> LT(x:10, y:20)
|
||||
// Termination key: end at the first GT(x:10), e.g. (x=11, y=0) so reversed -> LT(x:10)
|
||||
(IterationDirection::Backwards, ast::Operator::Greater) => {
|
||||
let (seek_key_len, termination_key_len, seek_op, termination_op) =
|
||||
if sort_order_of_last_key == SortOrder::Asc {
|
||||
(
|
||||
key_len - 1,
|
||||
key_len,
|
||||
SeekOp::LE { eq_only: false },
|
||||
SeekOp::LE { eq_only: false },
|
||||
)
|
||||
} else {
|
||||
(
|
||||
key_len,
|
||||
key_len - 1,
|
||||
SeekOp::GT.reverse(),
|
||||
SeekOp::GT.reverse(),
|
||||
)
|
||||
};
|
||||
SeekDef {
|
||||
key,
|
||||
iter_dir,
|
||||
seek: if seek_key_len > 0 {
|
||||
Some(SeekKey {
|
||||
len: seek_key_len,
|
||||
op: seek_op,
|
||||
null_pad: false,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
},
|
||||
termination: if termination_key_len > 0 {
|
||||
Some(TerminationKey {
|
||||
len: termination_key_len,
|
||||
op: termination_op,
|
||||
null_pad: false,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
},
|
||||
}
|
||||
}
|
||||
// Backwards, GE:
|
||||
// Ascending index example: (x=10 AND y>=20)
|
||||
// Seek key: start from the last LE(x:10), e.g. (x=10, y=usize::MAX)
|
||||
// Termination key: end at the first LT(x:10, y:20), e.g. (x=10, y=19)
|
||||
//
|
||||
// Descending index example: (x=10 AND y>=20)
|
||||
// Seek key: start from the last GE(x:10, y:20), e.g. (x=10, y=20) so reversed -> LE(x:10, y:20)
|
||||
// Termination key: end at the first GT(x:10), e.g. (x=11, y=0) so reversed -> LT(x:10)
|
||||
(IterationDirection::Backwards, ast::Operator::GreaterEquals) => {
|
||||
let (seek_key_len, termination_key_len, seek_op, termination_op) =
|
||||
if sort_order_of_last_key == SortOrder::Asc {
|
||||
(
|
||||
key_len - 1,
|
||||
key_len,
|
||||
SeekOp::LE { eq_only: false },
|
||||
SeekOp::LT,
|
||||
)
|
||||
} else {
|
||||
(
|
||||
key_len,
|
||||
key_len - 1,
|
||||
SeekOp::GE { eq_only: false }.reverse(),
|
||||
SeekOp::GT.reverse(),
|
||||
)
|
||||
};
|
||||
SeekDef {
|
||||
key,
|
||||
iter_dir,
|
||||
seek: if seek_key_len > 0 {
|
||||
Some(SeekKey {
|
||||
len: seek_key_len,
|
||||
op: seek_op,
|
||||
null_pad: false,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
},
|
||||
termination: if termination_key_len > 0 {
|
||||
Some(TerminationKey {
|
||||
len: termination_key_len,
|
||||
op: termination_op,
|
||||
null_pad: false,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
},
|
||||
}
|
||||
}
|
||||
(_, op) => {
|
||||
crate::bail_parse_error!("build_seek_def: invalid operator: {:?}", op,)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ use turso_parser::ast::{self, FrameBound, FrameClause, FrameExclude, FrameMode,
|
||||
use crate::{
|
||||
function::AggFunc,
|
||||
schema::{BTreeTable, Column, FromClauseSubquery, Index, Schema, Table},
|
||||
translate::collate::get_collseq_from_expr,
|
||||
translate::{collate::get_collseq_from_expr, optimizer::constraints::SeekRangeConstraint},
|
||||
vdbe::{
|
||||
builder::{CursorKey, CursorType, ProgramBuilder},
|
||||
insn::{IdxInsertFlags, Insn},
|
||||
@@ -1004,54 +1004,91 @@ impl JoinedTable {
|
||||
/// A definition of a rowid/index search.
|
||||
///
|
||||
/// [SeekKey] is the condition that is used to seek to a specific row in a table/index.
|
||||
/// [TerminationKey] is the condition that is used to terminate the search after a seek.
|
||||
/// [SeekKey] also used to represent range scan termination condition.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SeekDef {
|
||||
/// The key to use when seeking and when terminating the scan that follows the seek.
|
||||
/// Common prefix of the key which is shared between start/end fields
|
||||
/// For example, given:
|
||||
/// - CREATE INDEX i ON t (x, y desc)
|
||||
/// - SELECT * FROM t WHERE x = 1 AND y >= 30
|
||||
///
|
||||
/// The key is [(1, ASC), (30, DESC)]
|
||||
pub key: Vec<(ast::Expr, SortOrder)>,
|
||||
/// Then, prefix=[(eq=1, ASC)], start=Some((ge, Expr(30))), end=Some((gt, Sentinel))
|
||||
pub prefix: Vec<SeekRangeConstraint>,
|
||||
/// The condition to use when seeking. See [SeekKey] for more details.
|
||||
pub seek: Option<SeekKey>,
|
||||
/// The condition to use when terminating the scan that follows the seek. See [TerminationKey] for more details.
|
||||
pub termination: Option<TerminationKey>,
|
||||
pub start: SeekKey,
|
||||
/// The condition to use when terminating the scan that follows the seek. See [SeekKey] for more details.
|
||||
pub end: SeekKey,
|
||||
/// The direction of the scan that follows the seek.
|
||||
pub iter_dir: IterationDirection,
|
||||
}
|
||||
|
||||
pub struct SeekDefKeyIterator<'a> {
|
||||
seek_def: &'a SeekDef,
|
||||
seek_key: &'a SeekKey,
|
||||
pos: usize,
|
||||
}
|
||||
|
||||
impl<'a> Iterator for SeekDefKeyIterator<'a> {
|
||||
type Item = SeekKeyComponent<&'a ast::Expr>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let result = if self.pos < self.seek_def.prefix.len() {
|
||||
Some(SeekKeyComponent::Expr(
|
||||
&self.seek_def.prefix[self.pos].eq.as_ref().unwrap().1,
|
||||
))
|
||||
} else if self.pos == self.seek_def.prefix.len() {
|
||||
match &self.seek_key.last_component {
|
||||
SeekKeyComponent::Expr(expr) => Some(SeekKeyComponent::Expr(expr)),
|
||||
SeekKeyComponent::None => None,
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
self.pos += 1;
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
impl SeekDef {
|
||||
/// returns amount of values in the given seek key
|
||||
/// - so, for SELECT * FROM t WHERE x = 10 AND y = 20 AND y >= 30 there will be 3 values (10, 20, 30)
|
||||
pub fn size(&self, key: &SeekKey) -> usize {
|
||||
self.prefix.len()
|
||||
+ match key.last_component {
|
||||
SeekKeyComponent::Expr(_) => 1,
|
||||
SeekKeyComponent::None => 0,
|
||||
}
|
||||
}
|
||||
/// iterate over value expressions in the given seek key
|
||||
pub fn iter<'a>(&'a self, key: &'a SeekKey) -> SeekDefKeyIterator<'a> {
|
||||
SeekDefKeyIterator {
|
||||
seek_def: self,
|
||||
seek_key: key,
|
||||
pos: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// [SeekKeyComponent] enum represents optional last_component of the [SeekKey]
|
||||
///
|
||||
/// This component represented by separate enum instead of Option<E> because before there were third Sentinel value
|
||||
/// For now - we don't need this and it's enough to just either use some user-provided expression or omit last component of the key completely
|
||||
/// But as separate enum is almost never a harm - I decided to keep it here.
|
||||
///
|
||||
/// This enum accepts generic argument E in order to use both SeekKeyComponent<ast::Expr> and SeekKeyComponent<&ast::Expr>
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum SeekKeyComponent<E> {
|
||||
Expr(E),
|
||||
None,
|
||||
}
|
||||
|
||||
/// A condition to use when seeking.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SeekKey {
|
||||
/// How many columns from [SeekDef::key] are used in seeking.
|
||||
pub len: usize,
|
||||
/// Whether to NULL pad the last column of the seek key to match the length of [SeekDef::key].
|
||||
/// The reason it is done is that sometimes our full index key is not used in seeking,
|
||||
/// but we want to find the lowest value that matches the non-null prefix of the key.
|
||||
/// For example, given:
|
||||
/// - CREATE INDEX i ON t (x, y)
|
||||
/// - SELECT * FROM t WHERE x = 1 AND y < 30
|
||||
///
|
||||
/// We want to seek to the first row where x = 1, and then iterate forwards.
|
||||
/// In this case, the seek key is GT(1, NULL) since NULL is always LT in index key comparisons.
|
||||
/// We can't use just GT(1) because in index key comparisons, only the given number of columns are compared,
|
||||
/// so this means any index keys with (x=1) will compare equal, e.g. (x=1, y=usize::MAX) will compare equal to the seek key (x:1)
|
||||
pub null_pad: bool,
|
||||
/// The comparison operator to use when seeking.
|
||||
pub op: SeekOp,
|
||||
}
|
||||
/// Complete key must be constructed from common [SeekDef::prefix] and optional last_component
|
||||
pub last_component: SeekKeyComponent<ast::Expr>,
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
/// A condition to use when terminating the scan that follows a seek.
|
||||
pub struct TerminationKey {
|
||||
/// How many columns from [SeekDef::key] are used in terminating the scan that follows the seek.
|
||||
pub len: usize,
|
||||
/// Whether to NULL pad the last column of the termination key to match the length of [SeekDef::key].
|
||||
/// See [SeekKey::null_pad].
|
||||
pub null_pad: bool,
|
||||
/// The comparison operator to use when terminating the scan that follows the seek.
|
||||
/// The comparison operator to use when seeking.
|
||||
pub op: SeekOp,
|
||||
}
|
||||
|
||||
|
||||
@@ -720,6 +720,176 @@ do_execsql_test_on_specific_db {:memory:} select-no-match-in-leaf-page {
|
||||
2
|
||||
2}
|
||||
|
||||
do_execsql_test_on_specific_db {:memory:} select-range-search-count-asc-index {
|
||||
CREATE TABLE t (a, b);
|
||||
CREATE INDEX t_idx ON t(a, b);
|
||||
insert into t values (1, 1);
|
||||
insert into t values (1, 2);
|
||||
insert into t values (1, 3);
|
||||
insert into t values (1, 4);
|
||||
insert into t values (1, 5);
|
||||
insert into t values (1, 6);
|
||||
insert into t values (2, 1);
|
||||
insert into t values (2, 2);
|
||||
insert into t values (2, 3);
|
||||
insert into t values (2, 4);
|
||||
insert into t values (2, 5);
|
||||
insert into t values (2, 6);
|
||||
select count(*) from t where a = 1 AND b >= 2 ORDER BY a ASC, b ASC;
|
||||
select count(*) from t where a = 1 AND b > 2 ORDER BY a ASC, b ASC;
|
||||
select count(*) from t where a = 1 AND b <= 4 ORDER BY a ASC, b ASC;
|
||||
select count(*) from t where a = 1 AND b < 4 ORDER BY a ASC, b ASC;
|
||||
select count(*) from t where a = 1 AND b >= 2 AND b <= 4 ORDER BY a ASC, b ASC;
|
||||
select count(*) from t where a = 1 AND b > 2 AND b <= 4 ORDER BY a ASC, b ASC;
|
||||
select count(*) from t where a = 1 AND b >= 2 AND b < 4 ORDER BY a ASC, b ASC;
|
||||
select count(*) from t where a = 1 AND b > 2 AND b < 4 ORDER BY a ASC, b ASC;
|
||||
|
||||
select count(*) from t where a = 1 AND b >= 2 ORDER BY a DESC, b DESC;
|
||||
select count(*) from t where a = 1 AND b > 2 ORDER BY a DESC, b DESC;
|
||||
select count(*) from t where a = 1 AND b <= 4 ORDER BY a DESC, b DESC;
|
||||
select count(*) from t where a = 1 AND b < 4 ORDER BY a DESC, b DESC;
|
||||
select count(*) from t where a = 1 AND b >= 2 AND b <= 4 ORDER BY a DESC, b DESC;
|
||||
select count(*) from t where a = 1 AND b > 2 AND b <= 4 ORDER BY a DESC, b DESC;
|
||||
select count(*) from t where a = 1 AND b >= 2 AND b < 4 ORDER BY a DESC, b DESC;
|
||||
select count(*) from t where a = 1 AND b > 2 AND b < 4 ORDER BY a DESC, b DESC;
|
||||
} {5
|
||||
4
|
||||
4
|
||||
3
|
||||
3
|
||||
2
|
||||
2
|
||||
1
|
||||
5
|
||||
4
|
||||
4
|
||||
3
|
||||
3
|
||||
2
|
||||
2
|
||||
1}
|
||||
|
||||
do_execsql_test_on_specific_db {:memory:} select-range-search-count-desc-index {
|
||||
CREATE TABLE t (a, b);
|
||||
CREATE INDEX t_idx ON t(a, b DESC);
|
||||
insert into t values (1, 1);
|
||||
insert into t values (1, 2);
|
||||
insert into t values (1, 3);
|
||||
insert into t values (1, 4);
|
||||
insert into t values (1, 5);
|
||||
insert into t values (1, 6);
|
||||
insert into t values (2, 1);
|
||||
insert into t values (2, 2);
|
||||
insert into t values (2, 3);
|
||||
insert into t values (2, 4);
|
||||
insert into t values (2, 5);
|
||||
insert into t values (2, 6);
|
||||
select count(*) from t where a = 1 AND b >= 2 ORDER BY a ASC, b DESC;
|
||||
select count(*) from t where a = 1 AND b > 2 ORDER BY a ASC, b DESC;
|
||||
select count(*) from t where a = 1 AND b <= 4 ORDER BY a ASC, b DESC;
|
||||
select count(*) from t where a = 1 AND b < 4 ORDER BY a ASC, b DESC;
|
||||
select count(*) from t where a = 1 AND b >= 2 AND b <= 4 ORDER BY a ASC, b DESC;
|
||||
select count(*) from t where a = 1 AND b > 2 AND b <= 4 ORDER BY a ASC, b DESC;
|
||||
select count(*) from t where a = 1 AND b >= 2 AND b < 4 ORDER BY a ASC, b DESC;
|
||||
select count(*) from t where a = 1 AND b > 2 AND b < 4 ORDER BY a ASC, b DESC;
|
||||
|
||||
select count(*) from t where a = 1 AND b >= 2 ORDER BY a DESC, b ASC;
|
||||
select count(*) from t where a = 1 AND b > 2 ORDER BY a DESC, b ASC;
|
||||
select count(*) from t where a = 1 AND b <= 4 ORDER BY a DESC, b ASC;
|
||||
select count(*) from t where a = 1 AND b < 4 ORDER BY a DESC, b ASC;
|
||||
select count(*) from t where a = 1 AND b >= 2 AND b <= 4 ORDER BY a DESC, b ASC;
|
||||
select count(*) from t where a = 1 AND b > 2 AND b <= 4 ORDER BY a DESC, b ASC;
|
||||
select count(*) from t where a = 1 AND b >= 2 AND b < 4 ORDER BY a DESC, b ASC;
|
||||
select count(*) from t where a = 1 AND b > 2 AND b < 4 ORDER BY a DESC, b ASC;
|
||||
} {5
|
||||
4
|
||||
4
|
||||
3
|
||||
3
|
||||
2
|
||||
2
|
||||
1
|
||||
5
|
||||
4
|
||||
4
|
||||
3
|
||||
3
|
||||
2
|
||||
2
|
||||
1}
|
||||
|
||||
do_execsql_test_on_specific_db {:memory:} select-range-search-scan-asc-index {
|
||||
CREATE TABLE t (a, b);
|
||||
CREATE INDEX t_idx ON t(a, b);
|
||||
insert into t values (1, 1);
|
||||
insert into t values (1, 2);
|
||||
insert into t values (1, 3);
|
||||
insert into t values (1, 4);
|
||||
insert into t values (1, 5);
|
||||
insert into t values (1, 6);
|
||||
insert into t values (2, 1);
|
||||
insert into t values (2, 2);
|
||||
insert into t values (2, 3);
|
||||
insert into t values (2, 4);
|
||||
insert into t values (2, 5);
|
||||
insert into t values (2, 6);
|
||||
select * from t where a = 1 AND b > 1 AND b < 6 ORDER BY a ASC, b ASC;
|
||||
select * from t where a = 2 AND b > 1 AND b < 6 ORDER BY a DESC, b DESC;
|
||||
select * from t where a = 1 AND b > 1 AND b < 6 ORDER BY a DESC, b ASC;
|
||||
select * from t where a = 2 AND b > 1 AND b < 6 ORDER BY a ASC, b DESC;
|
||||
} {1|2
|
||||
1|3
|
||||
1|4
|
||||
1|5
|
||||
2|5
|
||||
2|4
|
||||
2|3
|
||||
2|2
|
||||
1|2
|
||||
1|3
|
||||
1|4
|
||||
1|5
|
||||
2|5
|
||||
2|4
|
||||
2|3
|
||||
2|2}
|
||||
|
||||
do_execsql_test_on_specific_db {:memory:} select-range-search-scan-desc-index {
|
||||
CREATE TABLE t (a, b);
|
||||
CREATE INDEX t_idx ON t(a, b DESC);
|
||||
insert into t values (1, 1);
|
||||
insert into t values (1, 2);
|
||||
insert into t values (1, 3);
|
||||
insert into t values (1, 4);
|
||||
insert into t values (1, 5);
|
||||
insert into t values (1, 6);
|
||||
insert into t values (2, 1);
|
||||
insert into t values (2, 2);
|
||||
insert into t values (2, 3);
|
||||
insert into t values (2, 4);
|
||||
insert into t values (2, 5);
|
||||
insert into t values (2, 6);
|
||||
select * from t where a = 1 AND b > 1 AND b < 6 ORDER BY a ASC, b ASC;
|
||||
select * from t where a = 2 AND b > 1 AND b < 6 ORDER BY a DESC, b DESC;
|
||||
select * from t where a = 1 AND b > 1 AND b < 6 ORDER BY a DESC, b ASC;
|
||||
select * from t where a = 2 AND b > 1 AND b < 6 ORDER BY a ASC, b DESC;
|
||||
} {1|2
|
||||
1|3
|
||||
1|4
|
||||
1|5
|
||||
2|5
|
||||
2|4
|
||||
2|3
|
||||
2|2
|
||||
1|2
|
||||
1|3
|
||||
1|4
|
||||
1|5
|
||||
2|5
|
||||
2|4
|
||||
2|3
|
||||
2|2}
|
||||
|
||||
# Regression tests for double-quoted strings in SELECT statements
|
||||
do_execsql_test_skip_lines_on_specific_db 1 {:memory:} select-double-quotes-values {
|
||||
.dbconfig dqs_dml on
|
||||
|
||||
Reference in New Issue
Block a user