From 3b44b269a31eb00be79a9fe74fd9daf2a5bf58ae Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sat, 19 Apr 2025 12:22:57 +0300 Subject: [PATCH] optimizer: try to build ephemeral index to avoid nested table scan --- core/translate/optimizer.rs | 157 +++++++++++++++++++++++++++++++++++- 1 file changed, 155 insertions(+), 2 deletions(-) diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 872758257..41e34418e 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -1,9 +1,9 @@ -use std::{collections::HashMap, sync::Arc}; +use std::{cmp::Ordering, collections::HashMap, sync::Arc}; use limbo_sqlite3_parser::ast::{self, Expr, SortOrder}; use crate::{ - schema::{Index, Schema}, + schema::{Index, IndexColumn, Schema}, translate::plan::TerminationKey, types::SeekOp, util::exprs_are_equivalent, @@ -843,6 +843,25 @@ pub fn try_extract_index_search_from_where_clause( } } + // We haven't found a persistent btree index that is any better than a full table scan; + // let's see if building an ephemeral index would be better. + if best_index.index.is_none() { + let (ephemeral_cost, constraints_with_col_idx, mut constraints_without_col_idx) = + ephemeral_index_estimate_cost(where_clause, table_reference, table_index); + if ephemeral_cost < best_index.cost { + // ephemeral index makes sense, so let's build it now. + // ephemeral columns are: columns from the table_reference, constraints first, then the rest + let ephemeral_index = + ephemeral_index_build(table_reference, table_index, &constraints_with_col_idx); + best_index.index = Some(Arc::new(ephemeral_index)); + best_index.cost = ephemeral_cost; + best_index.constraints.clear(); + best_index + .constraints + .append(&mut constraints_without_col_idx); + } + } + if best_index.index.is_none() { return Ok(None); } @@ -869,6 +888,140 @@ pub fn try_extract_index_search_from_where_clause( })); } +fn ephemeral_index_estimate_cost( + where_clause: &mut Vec, + table_reference: &TableReference, + table_index: usize, +) -> (f64, Vec<(usize, IndexConstraint)>, Vec) { + let mut constraints_with_col_idx: Vec<(usize, IndexConstraint)> = where_clause + .iter() + .enumerate() + .filter(|(_, term)| is_potential_index_constraint(term, table_index)) + .filter_map(|(i, term)| { + let Ok(ast::Expr::Binary(lhs, operator, rhs)) = unwrap_parens(&term.expr) else { + panic!("expected binary expression"); + }; + if let ast::Expr::Column { table, column, .. } = lhs.as_ref() { + if *table == table_index { + return Some(( + *column, + IndexConstraint { + position_in_where_clause: (i, BinaryExprSide::Rhs), + operator: *operator, + index_column_sort_order: SortOrder::Asc, + }, + )); + } + } + if let ast::Expr::Column { table, column, .. } = rhs.as_ref() { + if *table == table_index { + return Some(( + *column, + IndexConstraint { + position_in_where_clause: (i, BinaryExprSide::Lhs), + operator: opposite_cmp_op(*operator), + index_column_sort_order: SortOrder::Asc, + }, + )); + } + } + None + }) + .collect(); + // sort equalities first + constraints_with_col_idx.sort_by(|a, _| { + if a.1.operator == ast::Operator::Equals { + Ordering::Less + } else { + Ordering::Equal + } + }); + // drop everything after the first inequality + constraints_with_col_idx.truncate( + constraints_with_col_idx + .iter() + .position(|c| c.1.operator != ast::Operator::Equals) + .unwrap_or(constraints_with_col_idx.len()), + ); + + let ephemeral_column_count = table_reference + .columns() + .iter() + .enumerate() + .filter(|(i, _)| table_reference.column_is_used(*i)) + .count(); + + let constraints_without_col_idx = constraints_with_col_idx + .iter() + .cloned() + .map(|(_, c)| c) + .collect::>(); + let ephemeral_cost = dumb_cost_estimator( + Some(IndexInfo { + unique: false, + column_count: ephemeral_column_count, + }), + &constraints_without_col_idx, + table_index != 0, + true, + ); + ( + ephemeral_cost, + constraints_with_col_idx, + constraints_without_col_idx, + ) +} + +fn ephemeral_index_build( + table_reference: &TableReference, + table_index: usize, + index_constraints: &[(usize, IndexConstraint)], +) -> Index { + let mut ephemeral_columns: Vec = table_reference + .columns() + .iter() + .enumerate() + .map(|(i, c)| IndexColumn { + name: c.name.clone().unwrap(), + order: SortOrder::Asc, + pos_in_table: i, + }) + // only include columns that are used in the query + .filter(|c| table_reference.column_is_used(c.pos_in_table)) + .collect(); + // sort so that constraints first, then rest in whatever order they were in in the table + ephemeral_columns.sort_by(|a, b| { + let a_constraint = index_constraints + .iter() + .enumerate() + .find(|(_, c)| c.0 == a.pos_in_table); + let b_constraint = index_constraints + .iter() + .enumerate() + .find(|(_, c)| c.0 == b.pos_in_table); + match (a_constraint, b_constraint) { + (Some(_), None) => Ordering::Less, + (None, Some(_)) => Ordering::Greater, + (Some((a_idx, _)), Some((b_idx, _))) => a_idx.cmp(&b_idx), + (None, None) => Ordering::Equal, + } + }); + let ephemeral_index = Index { + name: format!( + "ephemeral_{}_{}", + table_reference.table.get_name(), + table_index + ), + columns: ephemeral_columns, + unique: false, + ephemeral: true, + table_name: table_reference.table.get_name().to_string(), + root_page: 0, + }; + + ephemeral_index +} + #[derive(Debug, Clone)] /// A representation of an expression in a [WhereTerm] that can potentially be used as part of an index seek key. /// For example, if there is an index on table T(x,y) and another index on table U(z), and the where clause is "WHERE x > 10 AND 20 = z",