optimizer: try to build ephemeral index to avoid nested table scan

This commit is contained in:
Jussi Saurio
2025-04-19 12:22:57 +03:00
parent 6924424f11
commit 3b44b269a3

View File

@@ -1,9 +1,9 @@
use std::{collections::HashMap, sync::Arc};
use std::{cmp::Ordering, collections::HashMap, sync::Arc};
use limbo_sqlite3_parser::ast::{self, Expr, SortOrder};
use crate::{
schema::{Index, Schema},
schema::{Index, IndexColumn, Schema},
translate::plan::TerminationKey,
types::SeekOp,
util::exprs_are_equivalent,
@@ -843,6 +843,25 @@ pub fn try_extract_index_search_from_where_clause(
}
}
// We haven't found a persistent btree index that is any better than a full table scan;
// let's see if building an ephemeral index would be better.
if best_index.index.is_none() {
let (ephemeral_cost, constraints_with_col_idx, mut constraints_without_col_idx) =
ephemeral_index_estimate_cost(where_clause, table_reference, table_index);
if ephemeral_cost < best_index.cost {
// ephemeral index makes sense, so let's build it now.
// ephemeral columns are: columns from the table_reference, constraints first, then the rest
let ephemeral_index =
ephemeral_index_build(table_reference, table_index, &constraints_with_col_idx);
best_index.index = Some(Arc::new(ephemeral_index));
best_index.cost = ephemeral_cost;
best_index.constraints.clear();
best_index
.constraints
.append(&mut constraints_without_col_idx);
}
}
if best_index.index.is_none() {
return Ok(None);
}
@@ -869,6 +888,140 @@ pub fn try_extract_index_search_from_where_clause(
}));
}
fn ephemeral_index_estimate_cost(
where_clause: &mut Vec<WhereTerm>,
table_reference: &TableReference,
table_index: usize,
) -> (f64, Vec<(usize, IndexConstraint)>, Vec<IndexConstraint>) {
let mut constraints_with_col_idx: Vec<(usize, IndexConstraint)> = where_clause
.iter()
.enumerate()
.filter(|(_, term)| is_potential_index_constraint(term, table_index))
.filter_map(|(i, term)| {
let Ok(ast::Expr::Binary(lhs, operator, rhs)) = unwrap_parens(&term.expr) else {
panic!("expected binary expression");
};
if let ast::Expr::Column { table, column, .. } = lhs.as_ref() {
if *table == table_index {
return Some((
*column,
IndexConstraint {
position_in_where_clause: (i, BinaryExprSide::Rhs),
operator: *operator,
index_column_sort_order: SortOrder::Asc,
},
));
}
}
if let ast::Expr::Column { table, column, .. } = rhs.as_ref() {
if *table == table_index {
return Some((
*column,
IndexConstraint {
position_in_where_clause: (i, BinaryExprSide::Lhs),
operator: opposite_cmp_op(*operator),
index_column_sort_order: SortOrder::Asc,
},
));
}
}
None
})
.collect();
// sort equalities first
constraints_with_col_idx.sort_by(|a, _| {
if a.1.operator == ast::Operator::Equals {
Ordering::Less
} else {
Ordering::Equal
}
});
// drop everything after the first inequality
constraints_with_col_idx.truncate(
constraints_with_col_idx
.iter()
.position(|c| c.1.operator != ast::Operator::Equals)
.unwrap_or(constraints_with_col_idx.len()),
);
let ephemeral_column_count = table_reference
.columns()
.iter()
.enumerate()
.filter(|(i, _)| table_reference.column_is_used(*i))
.count();
let constraints_without_col_idx = constraints_with_col_idx
.iter()
.cloned()
.map(|(_, c)| c)
.collect::<Vec<_>>();
let ephemeral_cost = dumb_cost_estimator(
Some(IndexInfo {
unique: false,
column_count: ephemeral_column_count,
}),
&constraints_without_col_idx,
table_index != 0,
true,
);
(
ephemeral_cost,
constraints_with_col_idx,
constraints_without_col_idx,
)
}
fn ephemeral_index_build(
table_reference: &TableReference,
table_index: usize,
index_constraints: &[(usize, IndexConstraint)],
) -> Index {
let mut ephemeral_columns: Vec<IndexColumn> = table_reference
.columns()
.iter()
.enumerate()
.map(|(i, c)| IndexColumn {
name: c.name.clone().unwrap(),
order: SortOrder::Asc,
pos_in_table: i,
})
// only include columns that are used in the query
.filter(|c| table_reference.column_is_used(c.pos_in_table))
.collect();
// sort so that constraints first, then rest in whatever order they were in in the table
ephemeral_columns.sort_by(|a, b| {
let a_constraint = index_constraints
.iter()
.enumerate()
.find(|(_, c)| c.0 == a.pos_in_table);
let b_constraint = index_constraints
.iter()
.enumerate()
.find(|(_, c)| c.0 == b.pos_in_table);
match (a_constraint, b_constraint) {
(Some(_), None) => Ordering::Less,
(None, Some(_)) => Ordering::Greater,
(Some((a_idx, _)), Some((b_idx, _))) => a_idx.cmp(&b_idx),
(None, None) => Ordering::Equal,
}
});
let ephemeral_index = Index {
name: format!(
"ephemeral_{}_{}",
table_reference.table.get_name(),
table_index
),
columns: ephemeral_columns,
unique: false,
ephemeral: true,
table_name: table_reference.table.get_name().to_string(),
root_page: 0,
};
ephemeral_index
}
#[derive(Debug, Clone)]
/// A representation of an expression in a [WhereTerm] that can potentially be used as part of an index seek key.
/// For example, if there is an index on table T(x,y) and another index on table U(z), and the where clause is "WHERE x > 10 AND 20 = z",