mirror of
https://github.com/aljazceru/turso.git
synced 2025-12-27 21:14:21 +01:00
optimizer: add highly unintelligent heuristics-based cost estimation
This commit is contained in:
@@ -713,14 +713,80 @@ fn opposite_cmp_op(op: ast::Operator) -> ast::Operator {
|
||||
}
|
||||
|
||||
/// Struct used for scoring index scans
|
||||
/// Currently we just score by the number of index columns that can be utilized
|
||||
/// in the scan, i.e. no statistics are used.
|
||||
/// Currently we just estimate cost in a really dumb way,
|
||||
/// i.e. no statistics are used.
|
||||
struct IndexScore {
|
||||
index: Option<Arc<Index>>,
|
||||
score: usize,
|
||||
cost: f64,
|
||||
constraints: Vec<IndexConstraint>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
struct IndexInfo {
|
||||
unique: bool,
|
||||
column_count: usize,
|
||||
}
|
||||
|
||||
const ESTIMATED_HARDCODED_ROWS_PER_TABLE: f64 = 1000.0;
|
||||
|
||||
/// Unbelievably dumb cost estimate for rows scanned by an index scan.
|
||||
fn dumb_cost_estimator(
|
||||
index_info: Option<IndexInfo>,
|
||||
constraints: &[IndexConstraint],
|
||||
is_inner_loop: bool,
|
||||
is_ephemeral: bool,
|
||||
) -> f64 {
|
||||
// assume that the outer table always does a full table scan :)
|
||||
// this discourages building ephemeral indexes on the outer table
|
||||
// (since a scan reads TABLE_ROWS rows, so an ephemeral index on the outer table would both read TABLE_ROWS rows to build the index and then seek the index)
|
||||
// but encourages building it on the inner table because it's only built once but the inner loop is run as many times as the outer loop has iterations.
|
||||
let loop_multiplier = if is_inner_loop {
|
||||
ESTIMATED_HARDCODED_ROWS_PER_TABLE
|
||||
} else {
|
||||
1.0
|
||||
};
|
||||
|
||||
// If we are building an ephemeral index, we assume we will scan the entire source table to build it.
|
||||
// Non-ephemeral indexes don't need to be built.
|
||||
let cost_to_build_index = is_ephemeral as usize as f64 * ESTIMATED_HARDCODED_ROWS_PER_TABLE;
|
||||
|
||||
let Some(index_info) = index_info else {
|
||||
return cost_to_build_index + ESTIMATED_HARDCODED_ROWS_PER_TABLE * loop_multiplier;
|
||||
};
|
||||
|
||||
let final_constraint_is_range = constraints
|
||||
.last()
|
||||
.map_or(false, |c| c.operator != ast::Operator::Equals);
|
||||
let equalities_count = constraints
|
||||
.iter()
|
||||
.take(if final_constraint_is_range {
|
||||
constraints.len() - 1
|
||||
} else {
|
||||
constraints.len()
|
||||
})
|
||||
.count() as f64;
|
||||
|
||||
let selectivity = match (
|
||||
index_info.unique,
|
||||
index_info.column_count as f64,
|
||||
equalities_count,
|
||||
) {
|
||||
// no equalities: let's assume range query selectivity is 0.4. if final constraint is not range and there are no equalities, it means full table scan incoming
|
||||
(_, _, 0.0) => {
|
||||
if final_constraint_is_range {
|
||||
0.4
|
||||
} else {
|
||||
1.0
|
||||
}
|
||||
}
|
||||
// on an unique index if we have equalities across all index columns, assume very high selectivity
|
||||
(true, index_cols, eq_count) if eq_count == index_cols => 0.01 * eq_count,
|
||||
// some equalities: let's assume each equality has a selectivity of 0.1 and range query selectivity is 0.4
|
||||
(_, _, eq_count) => (eq_count * 0.1) * if final_constraint_is_range { 0.4 } else { 1.0 },
|
||||
};
|
||||
cost_to_build_index + selectivity * ESTIMATED_HARDCODED_ROWS_PER_TABLE * loop_multiplier
|
||||
}
|
||||
|
||||
/// Try to extract an index search from the WHERE clause
|
||||
/// Returns an optional [Search] struct if an index search can be extracted, otherwise returns None.
|
||||
pub fn try_extract_index_search_from_where_clause(
|
||||
@@ -747,10 +813,11 @@ pub fn try_extract_index_search_from_where_clause(
|
||||
// 3. constrain the index columns in the order that they appear in the index
|
||||
// - e.g. if the index is on (a,b,c) then we can use all of "a = 1 AND b = 2 AND c = 3" to constrain the index scan,
|
||||
// - but if the where clause is "a = 1 and c = 3" then we can only use "a = 1".
|
||||
let cost_of_full_table_scan = dumb_cost_estimator(None, &[], table_index != 0, false);
|
||||
let mut constraints_cur = vec![];
|
||||
let mut best_index = IndexScore {
|
||||
index: None,
|
||||
score: 0,
|
||||
cost: cost_of_full_table_scan,
|
||||
constraints: vec![],
|
||||
};
|
||||
|
||||
@@ -759,10 +826,18 @@ pub fn try_extract_index_search_from_where_clause(
|
||||
find_index_constraints(where_clause, table_index, index, &mut constraints_cur)?;
|
||||
// naive scoring since we don't have statistics: prefer the index where we can use the most columns
|
||||
// e.g. if we can use all columns of an index on (a,b), it's better than an index of (c,d,e) where we can only use c.
|
||||
let score = constraints_cur.len();
|
||||
if score > best_index.score {
|
||||
let cost = dumb_cost_estimator(
|
||||
Some(IndexInfo {
|
||||
unique: index.unique,
|
||||
column_count: index.columns.len(),
|
||||
}),
|
||||
&constraints_cur,
|
||||
table_index != 0,
|
||||
false,
|
||||
);
|
||||
if cost < best_index.cost {
|
||||
best_index.index = Some(Arc::clone(index));
|
||||
best_index.score = score;
|
||||
best_index.cost = cost;
|
||||
best_index.constraints.clear();
|
||||
best_index.constraints.append(&mut constraints_cur);
|
||||
}
|
||||
@@ -873,6 +948,45 @@ fn get_column_position_in_index(
|
||||
Ok(index.column_table_pos_to_index_pos(*column))
|
||||
}
|
||||
|
||||
fn is_potential_index_constraint(term: &WhereTerm, table_index: usize) -> bool {
|
||||
// Skip terms that cannot be evaluated at this table's loop level
|
||||
if !term.should_eval_at_loop(table_index) {
|
||||
return false;
|
||||
}
|
||||
// Skip terms that are not binary comparisons
|
||||
let Ok(ast::Expr::Binary(lhs, operator, rhs)) = unwrap_parens(&term.expr) else {
|
||||
return false;
|
||||
};
|
||||
// Only consider index scans for binary ops that are comparisons
|
||||
if !matches!(
|
||||
*operator,
|
||||
ast::Operator::Equals
|
||||
| ast::Operator::Greater
|
||||
| ast::Operator::GreaterEquals
|
||||
| ast::Operator::Less
|
||||
| ast::Operator::LessEquals
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// If both lhs and rhs refer to columns from this table, we can't use this constraint
|
||||
// because we can't use the index to satisfy the condition.
|
||||
// Examples:
|
||||
// - WHERE t.x > t.y
|
||||
// - WHERE t.x + 1 > t.y - 5
|
||||
// - WHERE t.x = (t.x)
|
||||
let Ok(eval_at_left) = determine_where_to_eval_expr(&lhs) else {
|
||||
return false;
|
||||
};
|
||||
let Ok(eval_at_right) = determine_where_to_eval_expr(&rhs) else {
|
||||
return false;
|
||||
};
|
||||
if eval_at_left == EvalAt::Loop(table_index) && eval_at_right == EvalAt::Loop(table_index) {
|
||||
return false;
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/// Find all [IndexConstraint]s for a given WHERE clause
|
||||
/// Constraints are appended as long as they constrain the index in column order.
|
||||
/// E.g. for index (a,b,c) to be fully used, there must be a [WhereTerm] for each of a, b, and c.
|
||||
@@ -886,37 +1000,13 @@ fn find_index_constraints(
|
||||
for position_in_index in 0..index.columns.len() {
|
||||
let mut found = false;
|
||||
for (position_in_where_clause, term) in where_clause.iter().enumerate() {
|
||||
// Skip terms that cannot be evaluated at this table's loop level
|
||||
if !term.should_eval_at_loop(table_index) {
|
||||
continue;
|
||||
}
|
||||
// Skip terms that are not binary comparisons
|
||||
let ast::Expr::Binary(lhs, operator, rhs) = unwrap_parens(&term.expr)? else {
|
||||
continue;
|
||||
};
|
||||
// Only consider index scans for binary ops that are comparisons
|
||||
if !matches!(
|
||||
*operator,
|
||||
ast::Operator::Equals
|
||||
| ast::Operator::Greater
|
||||
| ast::Operator::GreaterEquals
|
||||
| ast::Operator::Less
|
||||
| ast::Operator::LessEquals
|
||||
) {
|
||||
if !is_potential_index_constraint(term, table_index) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// If both lhs and rhs refer to columns from this table, we can't use this constraint
|
||||
// because we can't use the index to satisfy the condition.
|
||||
// Examples:
|
||||
// - WHERE t.x > t.y
|
||||
// - WHERE t.x + 1 > t.y - 5
|
||||
// - WHERE t.x = (t.x)
|
||||
if determine_where_to_eval_expr(&lhs)? == EvalAt::Loop(table_index)
|
||||
&& determine_where_to_eval_expr(&rhs)? == EvalAt::Loop(table_index)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
let ast::Expr::Binary(lhs, operator, rhs) = unwrap_parens(&term.expr)? else {
|
||||
panic!("expected binary expression");
|
||||
};
|
||||
|
||||
// Check if lhs is a column that is in the i'th position of the index
|
||||
if Some(position_in_index) == get_column_position_in_index(lhs, table_index, index)? {
|
||||
|
||||
Reference in New Issue
Block a user