mirror of
https://github.com/aljazceru/turso.git
synced 2025-12-30 14:34:22 +01:00
optimizer: try to build ephemeral index to avoid nested table scan
This commit is contained in:
@@ -1,9 +1,9 @@
|
||||
use std::{collections::HashMap, sync::Arc};
|
||||
use std::{cmp::Ordering, collections::HashMap, sync::Arc};
|
||||
|
||||
use limbo_sqlite3_parser::ast::{self, Expr, SortOrder};
|
||||
|
||||
use crate::{
|
||||
schema::{Index, Schema},
|
||||
schema::{Index, IndexColumn, Schema},
|
||||
translate::plan::TerminationKey,
|
||||
types::SeekOp,
|
||||
util::exprs_are_equivalent,
|
||||
@@ -843,6 +843,25 @@ pub fn try_extract_index_search_from_where_clause(
|
||||
}
|
||||
}
|
||||
|
||||
// We haven't found a persistent btree index that is any better than a full table scan;
|
||||
// let's see if building an ephemeral index would be better.
|
||||
if best_index.index.is_none() {
|
||||
let (ephemeral_cost, constraints_with_col_idx, mut constraints_without_col_idx) =
|
||||
ephemeral_index_estimate_cost(where_clause, table_reference, table_index);
|
||||
if ephemeral_cost < best_index.cost {
|
||||
// ephemeral index makes sense, so let's build it now.
|
||||
// ephemeral columns are: columns from the table_reference, constraints first, then the rest
|
||||
let ephemeral_index =
|
||||
ephemeral_index_build(table_reference, table_index, &constraints_with_col_idx);
|
||||
best_index.index = Some(Arc::new(ephemeral_index));
|
||||
best_index.cost = ephemeral_cost;
|
||||
best_index.constraints.clear();
|
||||
best_index
|
||||
.constraints
|
||||
.append(&mut constraints_without_col_idx);
|
||||
}
|
||||
}
|
||||
|
||||
if best_index.index.is_none() {
|
||||
return Ok(None);
|
||||
}
|
||||
@@ -869,6 +888,140 @@ pub fn try_extract_index_search_from_where_clause(
|
||||
}));
|
||||
}
|
||||
|
||||
fn ephemeral_index_estimate_cost(
|
||||
where_clause: &mut Vec<WhereTerm>,
|
||||
table_reference: &TableReference,
|
||||
table_index: usize,
|
||||
) -> (f64, Vec<(usize, IndexConstraint)>, Vec<IndexConstraint>) {
|
||||
let mut constraints_with_col_idx: Vec<(usize, IndexConstraint)> = where_clause
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(_, term)| is_potential_index_constraint(term, table_index))
|
||||
.filter_map(|(i, term)| {
|
||||
let Ok(ast::Expr::Binary(lhs, operator, rhs)) = unwrap_parens(&term.expr) else {
|
||||
panic!("expected binary expression");
|
||||
};
|
||||
if let ast::Expr::Column { table, column, .. } = lhs.as_ref() {
|
||||
if *table == table_index {
|
||||
return Some((
|
||||
*column,
|
||||
IndexConstraint {
|
||||
position_in_where_clause: (i, BinaryExprSide::Rhs),
|
||||
operator: *operator,
|
||||
index_column_sort_order: SortOrder::Asc,
|
||||
},
|
||||
));
|
||||
}
|
||||
}
|
||||
if let ast::Expr::Column { table, column, .. } = rhs.as_ref() {
|
||||
if *table == table_index {
|
||||
return Some((
|
||||
*column,
|
||||
IndexConstraint {
|
||||
position_in_where_clause: (i, BinaryExprSide::Lhs),
|
||||
operator: opposite_cmp_op(*operator),
|
||||
index_column_sort_order: SortOrder::Asc,
|
||||
},
|
||||
));
|
||||
}
|
||||
}
|
||||
None
|
||||
})
|
||||
.collect();
|
||||
// sort equalities first
|
||||
constraints_with_col_idx.sort_by(|a, _| {
|
||||
if a.1.operator == ast::Operator::Equals {
|
||||
Ordering::Less
|
||||
} else {
|
||||
Ordering::Equal
|
||||
}
|
||||
});
|
||||
// drop everything after the first inequality
|
||||
constraints_with_col_idx.truncate(
|
||||
constraints_with_col_idx
|
||||
.iter()
|
||||
.position(|c| c.1.operator != ast::Operator::Equals)
|
||||
.unwrap_or(constraints_with_col_idx.len()),
|
||||
);
|
||||
|
||||
let ephemeral_column_count = table_reference
|
||||
.columns()
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(i, _)| table_reference.column_is_used(*i))
|
||||
.count();
|
||||
|
||||
let constraints_without_col_idx = constraints_with_col_idx
|
||||
.iter()
|
||||
.cloned()
|
||||
.map(|(_, c)| c)
|
||||
.collect::<Vec<_>>();
|
||||
let ephemeral_cost = dumb_cost_estimator(
|
||||
Some(IndexInfo {
|
||||
unique: false,
|
||||
column_count: ephemeral_column_count,
|
||||
}),
|
||||
&constraints_without_col_idx,
|
||||
table_index != 0,
|
||||
true,
|
||||
);
|
||||
(
|
||||
ephemeral_cost,
|
||||
constraints_with_col_idx,
|
||||
constraints_without_col_idx,
|
||||
)
|
||||
}
|
||||
|
||||
fn ephemeral_index_build(
|
||||
table_reference: &TableReference,
|
||||
table_index: usize,
|
||||
index_constraints: &[(usize, IndexConstraint)],
|
||||
) -> Index {
|
||||
let mut ephemeral_columns: Vec<IndexColumn> = table_reference
|
||||
.columns()
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, c)| IndexColumn {
|
||||
name: c.name.clone().unwrap(),
|
||||
order: SortOrder::Asc,
|
||||
pos_in_table: i,
|
||||
})
|
||||
// only include columns that are used in the query
|
||||
.filter(|c| table_reference.column_is_used(c.pos_in_table))
|
||||
.collect();
|
||||
// sort so that constraints first, then rest in whatever order they were in in the table
|
||||
ephemeral_columns.sort_by(|a, b| {
|
||||
let a_constraint = index_constraints
|
||||
.iter()
|
||||
.enumerate()
|
||||
.find(|(_, c)| c.0 == a.pos_in_table);
|
||||
let b_constraint = index_constraints
|
||||
.iter()
|
||||
.enumerate()
|
||||
.find(|(_, c)| c.0 == b.pos_in_table);
|
||||
match (a_constraint, b_constraint) {
|
||||
(Some(_), None) => Ordering::Less,
|
||||
(None, Some(_)) => Ordering::Greater,
|
||||
(Some((a_idx, _)), Some((b_idx, _))) => a_idx.cmp(&b_idx),
|
||||
(None, None) => Ordering::Equal,
|
||||
}
|
||||
});
|
||||
let ephemeral_index = Index {
|
||||
name: format!(
|
||||
"ephemeral_{}_{}",
|
||||
table_reference.table.get_name(),
|
||||
table_index
|
||||
),
|
||||
columns: ephemeral_columns,
|
||||
unique: false,
|
||||
ephemeral: true,
|
||||
table_name: table_reference.table.get_name().to_string(),
|
||||
root_page: 0,
|
||||
};
|
||||
|
||||
ephemeral_index
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
/// A representation of an expression in a [WhereTerm] that can potentially be used as part of an index seek key.
|
||||
/// For example, if there is an index on table T(x,y) and another index on table U(z), and the where clause is "WHERE x > 10 AND 20 = z",
|
||||
|
||||
Reference in New Issue
Block a user