mirror of
https://github.com/aljazceru/turso.git
synced 2026-02-06 00:34:23 +01:00
Merge 'Create an automatic ephemeral index when a nested table scan would otherwise be selected' from Jussi Saurio
Closes #747 - Creates an automatic ephemeral (in-memory) index on the right-side table of a join if otherwise a nested table scan would be selected. - This behavior is not hardcoded; instead this PR introduces a (quite dumb) cost estimator that naturally deincentivizes building ephemeral indexes where they don't make sense (e.g. the outermost table). I will probably build this estimator to be smarter in the future when working on join reordering optimizations ### Example bytecode plans and runtimes (note that this is debug mode) Example query with no persistent indexes to choose from. Without ephemeral index it's a nested scan: ```sql limbo> explain select * from t1 natural join t2; addr opcode p1 p2 p3 p4 p5 comment ---- ----------------- ---- ---- ---- ------------- -- ------- 0 Init 0 13 0 0 Start at 13 1 OpenRead 0 2 0 0 table=t1, root=2 2 OpenRead 1 3 0 0 table=t2, root=3 3 Rewind 0 12 0 0 Rewind t1 4 Rewind 1 11 0 0 Rewind t2 5 Column 0 0 2 0 r[2]=t1.a 6 Column 1 0 3 0 r[3]=t2.a 7 Ne 2 3 10 0 if r[2]!=r[3] goto 10 8 Column 0 0 1 0 r[1]=t1.a 9 ResultRow 1 1 0 0 output=r[1] 10 Next 1 5 0 0 11 Next 0 4 0 0 12 Halt 0 0 0 0 13 Transaction 0 0 0 0 write=false 14 Goto 0 1 0 0 limbo> .timer on limbo> select * from t1 natural join t2; ┌───┐ │ a │ ├───┤ └───┘ Command stats: ---------------------------- total: 953 ms (this includes parsing/coloring of cli app) ``` Same query with autoindexing enabled: ```sql limbo> explain select * from t1 natural join t2; addr opcode p1 p2 p3 p4 p5 comment ---- ----------------- ---- ---- ---- ------------- -- ------- 0 Init 0 22 0 0 Start at 22 1 OpenRead 0 2 0 0 table=t1, root=2 2 OpenRead 1 3 0 0 table=t2, root=3 3 Rewind 0 21 0 0 Rewind t1 4 Once 12 0 0 0 goto 12 # execute block 5-11 only once, on subsequent iters jump straight to 12 5 OpenAutoindex 3 0 0 0 cursor=3 6 Rewind 1 12 0 0 Rewind t2 # open source table for ephemeral index 7 Column 1 0 2 0 r[2]=t2.a 8 RowId 1 3 0 0 r[3]=t2.rowid 9 MakeRecord 2 2 4 0 r[4]=mkrec(r[2..3]) 10 IdxInsert 3 4 2 0 key=r[4] # insert stuff to ephemeral index 11 Next 1 7 0 0 12 Column 0 0 5 0 r[5]=t1.a 13 IsNull 5 20 0 0 if (r[5]==NULL) goto 20 14 SeekGE 3 20 5 0 key=[5..5] # perform seek on ephemeral index 15 IdxGT 3 20 5 0 key=[5..5] 16 DeferredSeek 3 1 0 0 17 Column 0 0 1 0 r[1]=t1.a 18 ResultRow 1 1 0 0 output=r[1] 19 Next 2 15 0 0 20 Next 0 4 0 0 21 Halt 0 0 0 0 22 Transaction 0 0 0 0 write=false 23 Goto 0 1 0 0 limbo> .timer on limbo> select * from t1 natural join t2; ┌───┐ │ a │ ├───┤ └───┘ Command stats: ---------------------------- total: 220 ms (this includes parsing/coloring of cli app) ``` Closes #1356
This commit is contained in:
@@ -692,6 +692,7 @@ pub struct Index {
|
||||
pub root_page: usize,
|
||||
pub columns: Vec<IndexColumn>,
|
||||
pub unique: bool,
|
||||
pub ephemeral: bool,
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
@@ -741,6 +742,7 @@ impl Index {
|
||||
root_page,
|
||||
columns: index_columns,
|
||||
unique,
|
||||
ephemeral: false,
|
||||
})
|
||||
}
|
||||
_ => todo!("Expected create index statement"),
|
||||
@@ -783,6 +785,7 @@ impl Index {
|
||||
root_page,
|
||||
columns: index_columns,
|
||||
unique: true, // Primary key indexes are always unique
|
||||
ephemeral: false,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -62,6 +62,7 @@ pub fn translate_create_index(
|
||||
})
|
||||
.collect(),
|
||||
unique: unique_if_not_exists.0,
|
||||
ephemeral: false,
|
||||
});
|
||||
|
||||
// Allocate the necessary cursors:
|
||||
|
||||
@@ -1,14 +1,16 @@
|
||||
use limbo_ext::VTabKind;
|
||||
use limbo_sqlite3_parser::ast;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::{
|
||||
schema::Table,
|
||||
schema::{Index, Table},
|
||||
translate::result_row::emit_select_result,
|
||||
types::SeekOp,
|
||||
vdbe::{
|
||||
builder::ProgramBuilder,
|
||||
insn::{CmpInsFlags, Insn},
|
||||
BranchOffset,
|
||||
insn::{CmpInsFlags, IdxInsertFlags, Insn},
|
||||
BranchOffset, CursorID,
|
||||
},
|
||||
Result,
|
||||
};
|
||||
@@ -156,23 +158,26 @@ pub fn init_loop(
|
||||
index: Some(index), ..
|
||||
} = search
|
||||
{
|
||||
match mode {
|
||||
OperationMode::SELECT => {
|
||||
program.emit_insn(Insn::OpenRead {
|
||||
cursor_id: index_cursor_id
|
||||
.expect("index cursor is always opened in Seek with index"),
|
||||
root_page: index.root_page,
|
||||
});
|
||||
}
|
||||
OperationMode::UPDATE | OperationMode::DELETE => {
|
||||
program.emit_insn(Insn::OpenWrite {
|
||||
cursor_id: index_cursor_id
|
||||
.expect("index cursor is always opened in Seek with index"),
|
||||
root_page: index.root_page.into(),
|
||||
});
|
||||
}
|
||||
_ => {
|
||||
unimplemented!()
|
||||
// Ephemeral index cursor are opened ad-hoc when needed.
|
||||
if !index.ephemeral {
|
||||
match mode {
|
||||
OperationMode::SELECT => {
|
||||
program.emit_insn(Insn::OpenRead {
|
||||
cursor_id: index_cursor_id
|
||||
.expect("index cursor is always opened in Seek with index"),
|
||||
root_page: index.root_page,
|
||||
});
|
||||
}
|
||||
OperationMode::UPDATE | OperationMode::DELETE => {
|
||||
program.emit_insn(Insn::OpenWrite {
|
||||
cursor_id: index_cursor_id
|
||||
.expect("index cursor is always opened in Seek with index"),
|
||||
root_page: index.root_page.into(),
|
||||
});
|
||||
}
|
||||
_ => {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -437,6 +442,32 @@ pub fn open_loop(
|
||||
});
|
||||
} else {
|
||||
// Otherwise, it's an index/rowid scan, i.e. first a seek is performed and then a scan until the comparison expression is not satisfied anymore.
|
||||
if let Search::Seek {
|
||||
index: Some(index), ..
|
||||
} = search
|
||||
{
|
||||
if index.ephemeral {
|
||||
let table_has_rowid = if let Table::BTree(btree) = &table.table {
|
||||
btree.has_rowid
|
||||
} else {
|
||||
false
|
||||
};
|
||||
Some(emit_autoindex(
|
||||
program,
|
||||
&index,
|
||||
table_cursor_id
|
||||
.expect("an ephemeral index must have a source table cursor"),
|
||||
index_cursor_id
|
||||
.expect("an ephemeral index must have an index cursor"),
|
||||
table_has_rowid,
|
||||
)?)
|
||||
} else {
|
||||
index_cursor_id
|
||||
}
|
||||
} else {
|
||||
index_cursor_id
|
||||
};
|
||||
|
||||
let is_index = index_cursor_id.is_some();
|
||||
let seek_cursor_id = index_cursor_id.unwrap_or_else(|| {
|
||||
table_cursor_id.expect("Either index or table cursor must be opened")
|
||||
@@ -1125,3 +1156,67 @@ fn emit_seek_termination(
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Open an ephemeral index cursor and build an automatic index on a table.
|
||||
/// This is used as a last-resort to avoid a nested full table scan
|
||||
/// Returns the cursor id of the ephemeral index cursor.
|
||||
fn emit_autoindex(
|
||||
program: &mut ProgramBuilder,
|
||||
index: &Arc<Index>,
|
||||
table_cursor_id: CursorID,
|
||||
index_cursor_id: CursorID,
|
||||
table_has_rowid: bool,
|
||||
) -> Result<CursorID> {
|
||||
assert!(index.ephemeral, "Index {} is not ephemeral", index.name);
|
||||
let label_ephemeral_build_end = program.allocate_label();
|
||||
// Since this typically happens in an inner loop, we only build it once.
|
||||
program.emit_insn(Insn::Once {
|
||||
target_pc_when_reentered: label_ephemeral_build_end,
|
||||
});
|
||||
program.emit_insn(Insn::OpenAutoindex {
|
||||
cursor_id: index_cursor_id,
|
||||
});
|
||||
// Rewind source table
|
||||
program.emit_insn(Insn::Rewind {
|
||||
cursor_id: table_cursor_id,
|
||||
pc_if_empty: label_ephemeral_build_end,
|
||||
});
|
||||
let offset_ephemeral_build_loop_start = program.offset();
|
||||
// Emit all columns from source table that are needed in the ephemeral index.
|
||||
// Also reserve a register for the rowid if the source table has rowids.
|
||||
let num_regs_to_reserve = index.columns.len() + table_has_rowid as usize;
|
||||
let ephemeral_cols_start_reg = program.alloc_registers(num_regs_to_reserve);
|
||||
for (i, col) in index.columns.iter().enumerate() {
|
||||
let reg = ephemeral_cols_start_reg + i;
|
||||
program.emit_insn(Insn::Column {
|
||||
cursor_id: table_cursor_id,
|
||||
column: col.pos_in_table,
|
||||
dest: reg,
|
||||
});
|
||||
}
|
||||
if table_has_rowid {
|
||||
program.emit_insn(Insn::RowId {
|
||||
cursor_id: table_cursor_id,
|
||||
dest: ephemeral_cols_start_reg + index.columns.len(),
|
||||
});
|
||||
}
|
||||
let record_reg = program.alloc_register();
|
||||
program.emit_insn(Insn::MakeRecord {
|
||||
start_reg: ephemeral_cols_start_reg,
|
||||
count: num_regs_to_reserve,
|
||||
dest_reg: record_reg,
|
||||
});
|
||||
program.emit_insn(Insn::IdxInsert {
|
||||
cursor_id: index_cursor_id,
|
||||
record_reg,
|
||||
unpacked_start: Some(ephemeral_cols_start_reg),
|
||||
unpacked_count: Some(num_regs_to_reserve as u16),
|
||||
flags: IdxInsertFlags::new().use_seek(false),
|
||||
});
|
||||
program.emit_insn(Insn::Next {
|
||||
cursor_id: table_cursor_id,
|
||||
pc_if_next: offset_ephemeral_build_loop_start,
|
||||
});
|
||||
program.resolve_label(label_ephemeral_build_end, program.offset());
|
||||
Ok(index_cursor_id)
|
||||
}
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
use std::{collections::HashMap, sync::Arc};
|
||||
use std::{cmp::Ordering, collections::HashMap, sync::Arc};
|
||||
|
||||
use limbo_sqlite3_parser::ast::{self, Expr, SortOrder};
|
||||
|
||||
use crate::{
|
||||
schema::{Index, Schema},
|
||||
schema::{Index, IndexColumn, Schema},
|
||||
translate::plan::TerminationKey,
|
||||
types::SeekOp,
|
||||
util::exprs_are_equivalent,
|
||||
@@ -355,15 +355,18 @@ fn use_indexes(
|
||||
// but we just don't do that yet.
|
||||
continue;
|
||||
}
|
||||
let placeholder = vec![];
|
||||
let mut usable_indexes_ref = &placeholder;
|
||||
if let Some(indexes) = available_indexes.get(table_name) {
|
||||
if let Some(search) = try_extract_index_search_from_where_clause(
|
||||
where_clause,
|
||||
table_index,
|
||||
table_reference,
|
||||
indexes,
|
||||
)? {
|
||||
table_reference.op = Operation::Search(search);
|
||||
}
|
||||
usable_indexes_ref = indexes;
|
||||
}
|
||||
if let Some(search) = try_extract_index_search_from_where_clause(
|
||||
where_clause,
|
||||
table_index,
|
||||
table_reference,
|
||||
usable_indexes_ref,
|
||||
)? {
|
||||
table_reference.op = Operation::Search(search);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -710,14 +713,80 @@ fn opposite_cmp_op(op: ast::Operator) -> ast::Operator {
|
||||
}
|
||||
|
||||
/// Struct used for scoring index scans
|
||||
/// Currently we just score by the number of index columns that can be utilized
|
||||
/// in the scan, i.e. no statistics are used.
|
||||
/// Currently we just estimate cost in a really dumb way,
|
||||
/// i.e. no statistics are used.
|
||||
struct IndexScore {
|
||||
index: Option<Arc<Index>>,
|
||||
score: usize,
|
||||
cost: f64,
|
||||
constraints: Vec<IndexConstraint>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
struct IndexInfo {
|
||||
unique: bool,
|
||||
column_count: usize,
|
||||
}
|
||||
|
||||
const ESTIMATED_HARDCODED_ROWS_PER_TABLE: f64 = 1000.0;
|
||||
|
||||
/// Unbelievably dumb cost estimate for rows scanned by an index scan.
|
||||
fn dumb_cost_estimator(
|
||||
index_info: Option<IndexInfo>,
|
||||
constraints: &[IndexConstraint],
|
||||
is_inner_loop: bool,
|
||||
is_ephemeral: bool,
|
||||
) -> f64 {
|
||||
// assume that the outer table always does a full table scan :)
|
||||
// this discourages building ephemeral indexes on the outer table
|
||||
// (since a scan reads TABLE_ROWS rows, so an ephemeral index on the outer table would both read TABLE_ROWS rows to build the index and then seek the index)
|
||||
// but encourages building it on the inner table because it's only built once but the inner loop is run as many times as the outer loop has iterations.
|
||||
let loop_multiplier = if is_inner_loop {
|
||||
ESTIMATED_HARDCODED_ROWS_PER_TABLE
|
||||
} else {
|
||||
1.0
|
||||
};
|
||||
|
||||
// If we are building an ephemeral index, we assume we will scan the entire source table to build it.
|
||||
// Non-ephemeral indexes don't need to be built.
|
||||
let cost_to_build_index = is_ephemeral as usize as f64 * ESTIMATED_HARDCODED_ROWS_PER_TABLE;
|
||||
|
||||
let Some(index_info) = index_info else {
|
||||
return cost_to_build_index + ESTIMATED_HARDCODED_ROWS_PER_TABLE * loop_multiplier;
|
||||
};
|
||||
|
||||
let final_constraint_is_range = constraints
|
||||
.last()
|
||||
.map_or(false, |c| c.operator != ast::Operator::Equals);
|
||||
let equalities_count = constraints
|
||||
.iter()
|
||||
.take(if final_constraint_is_range {
|
||||
constraints.len() - 1
|
||||
} else {
|
||||
constraints.len()
|
||||
})
|
||||
.count() as f64;
|
||||
|
||||
let selectivity = match (
|
||||
index_info.unique,
|
||||
index_info.column_count as f64,
|
||||
equalities_count,
|
||||
) {
|
||||
// no equalities: let's assume range query selectivity is 0.4. if final constraint is not range and there are no equalities, it means full table scan incoming
|
||||
(_, _, 0.0) => {
|
||||
if final_constraint_is_range {
|
||||
0.4
|
||||
} else {
|
||||
1.0
|
||||
}
|
||||
}
|
||||
// on an unique index if we have equalities across all index columns, assume very high selectivity
|
||||
(true, index_cols, eq_count) if eq_count == index_cols => 0.01 * eq_count,
|
||||
// some equalities: let's assume each equality has a selectivity of 0.1 and range query selectivity is 0.4
|
||||
(_, _, eq_count) => (eq_count * 0.1) * if final_constraint_is_range { 0.4 } else { 1.0 },
|
||||
};
|
||||
cost_to_build_index + selectivity * ESTIMATED_HARDCODED_ROWS_PER_TABLE * loop_multiplier
|
||||
}
|
||||
|
||||
/// Try to extract an index search from the WHERE clause
|
||||
/// Returns an optional [Search] struct if an index search can be extracted, otherwise returns None.
|
||||
pub fn try_extract_index_search_from_where_clause(
|
||||
@@ -730,10 +799,6 @@ pub fn try_extract_index_search_from_where_clause(
|
||||
if where_clause.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
// If there are no indexes, we can't extract a search
|
||||
if table_indexes.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let iter_dir = if let Operation::Scan { iter_dir, .. } = &table_reference.op {
|
||||
*iter_dir
|
||||
@@ -748,10 +813,11 @@ pub fn try_extract_index_search_from_where_clause(
|
||||
// 3. constrain the index columns in the order that they appear in the index
|
||||
// - e.g. if the index is on (a,b,c) then we can use all of "a = 1 AND b = 2 AND c = 3" to constrain the index scan,
|
||||
// - but if the where clause is "a = 1 and c = 3" then we can only use "a = 1".
|
||||
let cost_of_full_table_scan = dumb_cost_estimator(None, &[], table_index != 0, false);
|
||||
let mut constraints_cur = vec![];
|
||||
let mut best_index = IndexScore {
|
||||
index: None,
|
||||
score: 0,
|
||||
cost: cost_of_full_table_scan,
|
||||
constraints: vec![],
|
||||
};
|
||||
|
||||
@@ -760,15 +826,42 @@ pub fn try_extract_index_search_from_where_clause(
|
||||
find_index_constraints(where_clause, table_index, index, &mut constraints_cur)?;
|
||||
// naive scoring since we don't have statistics: prefer the index where we can use the most columns
|
||||
// e.g. if we can use all columns of an index on (a,b), it's better than an index of (c,d,e) where we can only use c.
|
||||
let score = constraints_cur.len();
|
||||
if score > best_index.score {
|
||||
let cost = dumb_cost_estimator(
|
||||
Some(IndexInfo {
|
||||
unique: index.unique,
|
||||
column_count: index.columns.len(),
|
||||
}),
|
||||
&constraints_cur,
|
||||
table_index != 0,
|
||||
false,
|
||||
);
|
||||
if cost < best_index.cost {
|
||||
best_index.index = Some(Arc::clone(index));
|
||||
best_index.score = score;
|
||||
best_index.cost = cost;
|
||||
best_index.constraints.clear();
|
||||
best_index.constraints.append(&mut constraints_cur);
|
||||
}
|
||||
}
|
||||
|
||||
// We haven't found a persistent btree index that is any better than a full table scan;
|
||||
// let's see if building an ephemeral index would be better.
|
||||
if best_index.index.is_none() {
|
||||
let (ephemeral_cost, constraints_with_col_idx, mut constraints_without_col_idx) =
|
||||
ephemeral_index_estimate_cost(where_clause, table_reference, table_index);
|
||||
if ephemeral_cost < best_index.cost {
|
||||
// ephemeral index makes sense, so let's build it now.
|
||||
// ephemeral columns are: columns from the table_reference, constraints first, then the rest
|
||||
let ephemeral_index =
|
||||
ephemeral_index_build(table_reference, table_index, &constraints_with_col_idx);
|
||||
best_index.index = Some(Arc::new(ephemeral_index));
|
||||
best_index.cost = ephemeral_cost;
|
||||
best_index.constraints.clear();
|
||||
best_index
|
||||
.constraints
|
||||
.append(&mut constraints_without_col_idx);
|
||||
}
|
||||
}
|
||||
|
||||
if best_index.index.is_none() {
|
||||
return Ok(None);
|
||||
}
|
||||
@@ -795,6 +888,140 @@ pub fn try_extract_index_search_from_where_clause(
|
||||
}));
|
||||
}
|
||||
|
||||
fn ephemeral_index_estimate_cost(
|
||||
where_clause: &mut Vec<WhereTerm>,
|
||||
table_reference: &TableReference,
|
||||
table_index: usize,
|
||||
) -> (f64, Vec<(usize, IndexConstraint)>, Vec<IndexConstraint>) {
|
||||
let mut constraints_with_col_idx: Vec<(usize, IndexConstraint)> = where_clause
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(_, term)| is_potential_index_constraint(term, table_index))
|
||||
.filter_map(|(i, term)| {
|
||||
let Ok(ast::Expr::Binary(lhs, operator, rhs)) = unwrap_parens(&term.expr) else {
|
||||
panic!("expected binary expression");
|
||||
};
|
||||
if let ast::Expr::Column { table, column, .. } = lhs.as_ref() {
|
||||
if *table == table_index {
|
||||
return Some((
|
||||
*column,
|
||||
IndexConstraint {
|
||||
position_in_where_clause: (i, BinaryExprSide::Rhs),
|
||||
operator: *operator,
|
||||
index_column_sort_order: SortOrder::Asc,
|
||||
},
|
||||
));
|
||||
}
|
||||
}
|
||||
if let ast::Expr::Column { table, column, .. } = rhs.as_ref() {
|
||||
if *table == table_index {
|
||||
return Some((
|
||||
*column,
|
||||
IndexConstraint {
|
||||
position_in_where_clause: (i, BinaryExprSide::Lhs),
|
||||
operator: opposite_cmp_op(*operator),
|
||||
index_column_sort_order: SortOrder::Asc,
|
||||
},
|
||||
));
|
||||
}
|
||||
}
|
||||
None
|
||||
})
|
||||
.collect();
|
||||
// sort equalities first
|
||||
constraints_with_col_idx.sort_by(|a, _| {
|
||||
if a.1.operator == ast::Operator::Equals {
|
||||
Ordering::Less
|
||||
} else {
|
||||
Ordering::Equal
|
||||
}
|
||||
});
|
||||
// drop everything after the first inequality
|
||||
constraints_with_col_idx.truncate(
|
||||
constraints_with_col_idx
|
||||
.iter()
|
||||
.position(|c| c.1.operator != ast::Operator::Equals)
|
||||
.unwrap_or(constraints_with_col_idx.len()),
|
||||
);
|
||||
|
||||
let ephemeral_column_count = table_reference
|
||||
.columns()
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(i, _)| table_reference.column_is_used(*i))
|
||||
.count();
|
||||
|
||||
let constraints_without_col_idx = constraints_with_col_idx
|
||||
.iter()
|
||||
.cloned()
|
||||
.map(|(_, c)| c)
|
||||
.collect::<Vec<_>>();
|
||||
let ephemeral_cost = dumb_cost_estimator(
|
||||
Some(IndexInfo {
|
||||
unique: false,
|
||||
column_count: ephemeral_column_count,
|
||||
}),
|
||||
&constraints_without_col_idx,
|
||||
table_index != 0,
|
||||
true,
|
||||
);
|
||||
(
|
||||
ephemeral_cost,
|
||||
constraints_with_col_idx,
|
||||
constraints_without_col_idx,
|
||||
)
|
||||
}
|
||||
|
||||
fn ephemeral_index_build(
|
||||
table_reference: &TableReference,
|
||||
table_index: usize,
|
||||
index_constraints: &[(usize, IndexConstraint)],
|
||||
) -> Index {
|
||||
let mut ephemeral_columns: Vec<IndexColumn> = table_reference
|
||||
.columns()
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, c)| IndexColumn {
|
||||
name: c.name.clone().unwrap(),
|
||||
order: SortOrder::Asc,
|
||||
pos_in_table: i,
|
||||
})
|
||||
// only include columns that are used in the query
|
||||
.filter(|c| table_reference.column_is_used(c.pos_in_table))
|
||||
.collect();
|
||||
// sort so that constraints first, then rest in whatever order they were in in the table
|
||||
ephemeral_columns.sort_by(|a, b| {
|
||||
let a_constraint = index_constraints
|
||||
.iter()
|
||||
.enumerate()
|
||||
.find(|(_, c)| c.0 == a.pos_in_table);
|
||||
let b_constraint = index_constraints
|
||||
.iter()
|
||||
.enumerate()
|
||||
.find(|(_, c)| c.0 == b.pos_in_table);
|
||||
match (a_constraint, b_constraint) {
|
||||
(Some(_), None) => Ordering::Less,
|
||||
(None, Some(_)) => Ordering::Greater,
|
||||
(Some((a_idx, _)), Some((b_idx, _))) => a_idx.cmp(&b_idx),
|
||||
(None, None) => Ordering::Equal,
|
||||
}
|
||||
});
|
||||
let ephemeral_index = Index {
|
||||
name: format!(
|
||||
"ephemeral_{}_{}",
|
||||
table_reference.table.get_name(),
|
||||
table_index
|
||||
),
|
||||
columns: ephemeral_columns,
|
||||
unique: false,
|
||||
ephemeral: true,
|
||||
table_name: table_reference.table.get_name().to_string(),
|
||||
root_page: 0,
|
||||
};
|
||||
|
||||
ephemeral_index
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
/// A representation of an expression in a [WhereTerm] that can potentially be used as part of an index seek key.
|
||||
/// For example, if there is an index on table T(x,y) and another index on table U(z), and the where clause is "WHERE x > 10 AND 20 = z",
|
||||
@@ -874,6 +1101,45 @@ fn get_column_position_in_index(
|
||||
Ok(index.column_table_pos_to_index_pos(*column))
|
||||
}
|
||||
|
||||
fn is_potential_index_constraint(term: &WhereTerm, table_index: usize) -> bool {
|
||||
// Skip terms that cannot be evaluated at this table's loop level
|
||||
if !term.should_eval_at_loop(table_index) {
|
||||
return false;
|
||||
}
|
||||
// Skip terms that are not binary comparisons
|
||||
let Ok(ast::Expr::Binary(lhs, operator, rhs)) = unwrap_parens(&term.expr) else {
|
||||
return false;
|
||||
};
|
||||
// Only consider index scans for binary ops that are comparisons
|
||||
if !matches!(
|
||||
*operator,
|
||||
ast::Operator::Equals
|
||||
| ast::Operator::Greater
|
||||
| ast::Operator::GreaterEquals
|
||||
| ast::Operator::Less
|
||||
| ast::Operator::LessEquals
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// If both lhs and rhs refer to columns from this table, we can't use this constraint
|
||||
// because we can't use the index to satisfy the condition.
|
||||
// Examples:
|
||||
// - WHERE t.x > t.y
|
||||
// - WHERE t.x + 1 > t.y - 5
|
||||
// - WHERE t.x = (t.x)
|
||||
let Ok(eval_at_left) = determine_where_to_eval_expr(&lhs) else {
|
||||
return false;
|
||||
};
|
||||
let Ok(eval_at_right) = determine_where_to_eval_expr(&rhs) else {
|
||||
return false;
|
||||
};
|
||||
if eval_at_left == EvalAt::Loop(table_index) && eval_at_right == EvalAt::Loop(table_index) {
|
||||
return false;
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/// Find all [IndexConstraint]s for a given WHERE clause
|
||||
/// Constraints are appended as long as they constrain the index in column order.
|
||||
/// E.g. for index (a,b,c) to be fully used, there must be a [WhereTerm] for each of a, b, and c.
|
||||
@@ -887,37 +1153,13 @@ fn find_index_constraints(
|
||||
for position_in_index in 0..index.columns.len() {
|
||||
let mut found = false;
|
||||
for (position_in_where_clause, term) in where_clause.iter().enumerate() {
|
||||
// Skip terms that cannot be evaluated at this table's loop level
|
||||
if !term.should_eval_at_loop(table_index) {
|
||||
continue;
|
||||
}
|
||||
// Skip terms that are not binary comparisons
|
||||
let ast::Expr::Binary(lhs, operator, rhs) = unwrap_parens(&term.expr)? else {
|
||||
continue;
|
||||
};
|
||||
// Only consider index scans for binary ops that are comparisons
|
||||
if !matches!(
|
||||
*operator,
|
||||
ast::Operator::Equals
|
||||
| ast::Operator::Greater
|
||||
| ast::Operator::GreaterEquals
|
||||
| ast::Operator::Less
|
||||
| ast::Operator::LessEquals
|
||||
) {
|
||||
if !is_potential_index_constraint(term, table_index) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// If both lhs and rhs refer to columns from this table, we can't use this constraint
|
||||
// because we can't use the index to satisfy the condition.
|
||||
// Examples:
|
||||
// - WHERE t.x > t.y
|
||||
// - WHERE t.x + 1 > t.y - 5
|
||||
// - WHERE t.x = (t.x)
|
||||
if determine_where_to_eval_expr(&lhs)? == EvalAt::Loop(table_index)
|
||||
&& determine_where_to_eval_expr(&rhs)? == EvalAt::Loop(table_index)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
let ast::Expr::Binary(lhs, operator, rhs) = unwrap_parens(&term.expr)? else {
|
||||
panic!("expected binary expression");
|
||||
};
|
||||
|
||||
// Check if lhs is a column that is in the i'th position of the index
|
||||
if Some(position_in_index) == get_column_position_in_index(lhs, table_index, index)? {
|
||||
|
||||
@@ -522,7 +522,10 @@ impl TableReference {
|
||||
match &self.table {
|
||||
Table::BTree(btree) => {
|
||||
let use_covering_index = self.utilizes_covering_index();
|
||||
let table_cursor_id = if use_covering_index && mode == OperationMode::SELECT {
|
||||
let index_is_ephemeral = index.map_or(false, |index| index.ephemeral);
|
||||
let table_not_required =
|
||||
OperationMode::SELECT == mode && use_covering_index && !index_is_ephemeral;
|
||||
let table_cursor_id = if table_not_required {
|
||||
None
|
||||
} else {
|
||||
Some(program.alloc_cursor_id(
|
||||
@@ -603,6 +606,10 @@ impl TableReference {
|
||||
};
|
||||
self.index_is_covering(index.as_ref())
|
||||
}
|
||||
|
||||
pub fn column_is_used(&self, index: usize) -> bool {
|
||||
self.col_used_mask.get(index)
|
||||
}
|
||||
}
|
||||
|
||||
/// A definition of a rowid/index search.
|
||||
|
||||
@@ -363,6 +363,12 @@ impl ProgramBuilder {
|
||||
Insn::Next { pc_if_next, .. } => {
|
||||
resolve(pc_if_next, "Next");
|
||||
}
|
||||
Insn::Once {
|
||||
target_pc_when_reentered,
|
||||
..
|
||||
} => {
|
||||
resolve(target_pc_when_reentered, "Once");
|
||||
}
|
||||
Insn::Prev { pc_if_prev, .. } => {
|
||||
resolve(pc_if_prev, "Prev");
|
||||
}
|
||||
|
||||
@@ -3766,7 +3766,6 @@ pub fn op_idx_insert(
|
||||
pager: &Rc<Pager>,
|
||||
mv_store: Option<&Rc<MvStore>>,
|
||||
) -> Result<InsnFunctionStepResult> {
|
||||
dbg!("op_idx_insert_");
|
||||
if let Insn::IdxInsert {
|
||||
cursor_id,
|
||||
record_reg,
|
||||
@@ -3807,7 +3806,6 @@ pub fn op_idx_insert(
|
||||
}
|
||||
};
|
||||
|
||||
dbg!(moved_before);
|
||||
// Start insertion of row. This might trigger a balance procedure which will take care of moving to different pages,
|
||||
// therefore, we don't want to seek again if that happens, meaning we don't want to return on io without moving to the following opcode
|
||||
// because it could trigger a movement to child page after a balance root which will leave the current page as the root page.
|
||||
|
||||
Reference in New Issue
Block a user