Merge 'Feat: Covering indexes' from Jussi Saurio

Closes #364
Covering indexes mean being able to read all the necessary data from an
index instead of using the underlying table at all. This PR adds that
functionality.
This PR can be reviewed commit-by-commit as the first commits are
enablers for the actual covering index usage functionality
Example of a scan where covering index can be used:
```sql
limbo> .schema
CREATE TABLE t(a,b,c,d,e);
CREATE INDEX abc ON t (a,b,c);
limbo> explain select b+1,concat(a, c) from t;
addr  opcode             p1    p2    p3    p4             p5  comment
----  -----------------  ----  ----  ----  -------------  --  -------
0     Init               0     12    0                    0   Start at 12
1     OpenRead           0     3     0                    0   table=abc, root=3
2     Rewind             0     11    0                    0   Rewind abc
3       Column           0     1     3                    0   r[3]=abc.b
4       Integer          1     4     0                    0   r[4]=1
5       Add              3     4     1                    0   r[1]=r[3]+r[4]
6       Column           0     0     5                    0   r[5]=abc.a
7       Column           0     2     6                    0   r[6]=abc.c
8       Function         0     5     2     concat         0   r[2]=func(r[5..6])
9       ResultRow        1     2     0                    0   output=r[1..2]
10    Next               0     3     0                    0
11    Halt               0     0     0                    0
12    Transaction        0     0     0                    0   write=false
13    Goto               0     1     0                    0
```
Example of a scan where it can't be used:
```sql
limbo> .schema
CREATE TABLE t(a,b,c,d,e);
CREATE INDEX abc ON t (a,b,c);
limbo> explain select a,b,c,d from t limit 5;
addr  opcode             p1    p2    p3    p4             p5  comment
----  -----------------  ----  ----  ----  -------------  --  -------
0     Init               0     11    0                    0   Start at 11
1     OpenRead           0     2     0                    0   table=t, root=2
2     Rewind             0     10    0                    0   Rewind t
3       Column           0     0     4                    0   r[4]=t.a
4       Column           0     1     5                    0   r[5]=t.b
5       Column           0     2     6                    0   r[6]=t.c
6       Column           0     3     7                    0   r[7]=t.d
7       ResultRow        4     4     0                    0   output=r[4..7]
8       DecrJumpZero     1     10    0                    0   if (--r[1]==0) goto 10
9     Next               0     3     0                    0
10    Halt               0     0     0                    0
11    Transaction        0     0     0                    0   write=false
12    Integer            5     1     0                    0   r[1]=5
13    Integer            0     2     0                    0   r[2]=0
14    OffsetLimit        1     3     2                    0   if r[1]>0 then r[3]=r[1]+max(0,r[2]) else r[3]=(-1)
15    Goto               0     1     0                    0
```

Closes #1351
This commit is contained in:
Jussi Saurio
2025-04-18 15:27:27 +03:00
6 changed files with 403 additions and 185 deletions

View File

@@ -95,7 +95,7 @@ pub struct TranslateCtx<'a> {
/// Used to distinguish database operations
#[allow(clippy::upper_case_acronyms, dead_code)]
#[derive(Debug, Clone, Copy)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum OperationMode {
SELECT,
INSERT,

View File

@@ -1770,41 +1770,79 @@ pub fn translate_expr(
is_rowid_alias,
} => {
let table_reference = referenced_tables.as_ref().unwrap().get(*table).unwrap();
let index = table_reference.op.index();
let use_covering_index = table_reference.utilizes_covering_index();
match table_reference.op {
// If we are reading a column from a table, we find the cursor that corresponds to
// the table and read the column from the cursor.
Operation::Scan { .. } | Operation::Search(_) => match &table_reference.table {
Table::BTree(_) => {
let cursor_id = program.resolve_cursor_id(&table_reference.identifier);
if *is_rowid_alias {
program.emit_insn(Insn::RowId {
cursor_id,
dest: target_register,
});
} else {
program.emit_insn(Insn::Column {
// If we have a covering index, we don't have an open table cursor so we read from the index cursor.
Operation::Scan { .. } | Operation::Search(_) => {
match &table_reference.table {
Table::BTree(_) => {
let table_cursor_id = if use_covering_index {
None
} else {
Some(program.resolve_cursor_id(&table_reference.identifier))
};
let index_cursor_id = if let Some(index) = index {
Some(program.resolve_cursor_id(&index.name))
} else {
None
};
if *is_rowid_alias {
if let Some(index_cursor_id) = index_cursor_id {
program.emit_insn(Insn::IdxRowId {
cursor_id: index_cursor_id,
dest: target_register,
});
} else if let Some(table_cursor_id) = table_cursor_id {
program.emit_insn(Insn::RowId {
cursor_id: table_cursor_id,
dest: target_register,
});
} else {
unreachable!("Either index or table cursor must be opened");
}
} else {
let read_cursor = if use_covering_index {
index_cursor_id
.expect("index cursor should be opened when use_covering_index=true")
} else {
table_cursor_id
.expect("table cursor should be opened when use_covering_index=false")
};
let column = if use_covering_index {
let index = index.expect("index cursor should be opened when use_covering_index=true");
index.column_table_pos_to_index_pos(*column).unwrap_or_else(|| {
panic!("covering index {} does not contain column number {} of table {}", index.name, column, table_reference.identifier)
})
} else {
*column
};
program.emit_insn(Insn::Column {
cursor_id: read_cursor,
column,
dest: target_register,
});
}
let Some(column) = table_reference.table.get_column_at(*column) else {
crate::bail_parse_error!("column index out of bounds");
};
maybe_apply_affinity(column.ty, target_register, program);
Ok(target_register)
}
Table::Virtual(_) => {
let cursor_id = program.resolve_cursor_id(&table_reference.identifier);
program.emit_insn(Insn::VColumn {
cursor_id,
column: *column,
dest: target_register,
});
Ok(target_register)
}
let Some(column) = table_reference.table.get_column_at(*column) else {
crate::bail_parse_error!("column index out of bounds");
};
maybe_apply_affinity(column.ty, target_register, program);
Ok(target_register)
_ => unreachable!(),
}
Table::Virtual(_) => {
let cursor_id = program.resolve_cursor_id(&table_reference.identifier);
program.emit_insn(Insn::VColumn {
cursor_id,
column: *column,
dest: target_register,
});
Ok(target_register)
}
_ => unreachable!(),
},
}
// If we are reading a column from a subquery, we instead copy the column from the
// subquery's result registers.
Operation::Subquery {
@@ -1822,11 +1860,23 @@ pub fn translate_expr(
}
ast::Expr::RowId { database: _, table } => {
let table_reference = referenced_tables.as_ref().unwrap().get(*table).unwrap();
let cursor_id = program.resolve_cursor_id(&table_reference.identifier);
program.emit_insn(Insn::RowId {
cursor_id,
dest: target_register,
});
let index = table_reference.op.index();
let use_covering_index = table_reference.utilizes_covering_index();
if use_covering_index {
let index =
index.expect("index cursor should be opened when use_covering_index=true");
let cursor_id = program.resolve_cursor_id(&index.name);
program.emit_insn(Insn::IdxRowId {
cursor_id,
dest: target_register,
});
} else {
let cursor_id = program.resolve_cursor_id(&table_reference.identifier);
program.emit_insn(Insn::RowId {
cursor_id,
dest: target_register,
});
}
Ok(target_register)
}
ast::Expr::InList { .. } => todo!(),

View File

@@ -6,7 +6,7 @@ use crate::{
translate::result_row::emit_select_result,
types::SeekOp,
vdbe::{
builder::{CursorType, ProgramBuilder},
builder::ProgramBuilder,
insn::{CmpInsFlags, Insn},
BranchOffset,
},
@@ -81,77 +81,67 @@ pub fn init_loop(
t_ctx.meta_left_joins[table_index] = Some(lj_metadata);
}
}
let (table_cursor_id, index_cursor_id) = table.open_cursors(program, mode)?;
match &table.op {
Operation::Scan { index, .. } => {
let cursor_id = program.alloc_cursor_id(
Some(table.identifier.clone()),
match &table.table {
Table::BTree(_) => CursorType::BTreeTable(table.btree().unwrap().clone()),
Table::Virtual(_) => {
CursorType::VirtualTable(table.virtual_table().unwrap().clone())
}
other => panic!("Invalid table reference type in Scan: {:?}", other),
},
);
let index_cursor_id = index.as_ref().map(|i| {
program.alloc_cursor_id(Some(i.name.clone()), CursorType::BTreeIndex(i.clone()))
});
match (mode, &table.table) {
(OperationMode::SELECT, Table::BTree(btree)) => {
let root_page = btree.root_page;
Operation::Scan { index, .. } => match (mode, &table.table) {
(OperationMode::SELECT, Table::BTree(btree)) => {
let root_page = btree.root_page;
if let Some(cursor_id) = table_cursor_id {
program.emit_insn(Insn::OpenRead {
cursor_id,
root_page,
});
if let Some(index_cursor_id) = index_cursor_id {
program.emit_insn(Insn::OpenRead {
cursor_id: index_cursor_id,
root_page: index.as_ref().unwrap().root_page,
});
}
}
(OperationMode::DELETE, Table::BTree(btree)) => {
let root_page = btree.root_page;
program.emit_insn(Insn::OpenWrite {
cursor_id,
root_page: root_page.into(),
if let Some(index_cursor_id) = index_cursor_id {
program.emit_insn(Insn::OpenRead {
cursor_id: index_cursor_id,
root_page: index.as_ref().unwrap().root_page,
});
}
(OperationMode::UPDATE, Table::BTree(btree)) => {
let root_page = btree.root_page;
program.emit_insn(Insn::OpenWrite {
cursor_id,
root_page: root_page.into(),
});
if let Some(index_cursor_id) = index_cursor_id {
program.emit_insn(Insn::OpenWrite {
cursor_id: index_cursor_id,
root_page: index.as_ref().unwrap().root_page.into(),
});
}
}
(_, Table::Virtual(_)) => {
program.emit_insn(Insn::VOpen { cursor_id });
}
_ => {
unimplemented!()
}
}
}
Operation::Search(search) => {
let table_cursor_id = program.alloc_cursor_id(
Some(table.identifier.clone()),
CursorType::BTreeTable(table.btree().unwrap().clone()),
);
match mode {
OperationMode::SELECT => {
program.emit_insn(Insn::OpenRead {
cursor_id: table_cursor_id,
root_page: table.table.get_root_page(),
(OperationMode::DELETE, Table::BTree(btree)) => {
let root_page = btree.root_page;
program.emit_insn(Insn::OpenWrite {
cursor_id: table_cursor_id
.expect("table cursor is always opened in OperationMode::DELETE"),
root_page: root_page.into(),
});
}
(OperationMode::UPDATE, Table::BTree(btree)) => {
let root_page = btree.root_page;
program.emit_insn(Insn::OpenWrite {
cursor_id: table_cursor_id
.expect("table cursor is always opened in OperationMode::UPDATE"),
root_page: root_page.into(),
});
if let Some(index_cursor_id) = index_cursor_id {
program.emit_insn(Insn::OpenWrite {
cursor_id: index_cursor_id,
root_page: index.as_ref().unwrap().root_page.into(),
});
}
}
(_, Table::Virtual(_)) => {
if let Some(cursor_id) = table_cursor_id {
program.emit_insn(Insn::VOpen { cursor_id });
}
}
_ => {
unimplemented!()
}
},
Operation::Search(search) => {
match mode {
OperationMode::SELECT => {
if let Some(table_cursor_id) = table_cursor_id {
program.emit_insn(Insn::OpenRead {
cursor_id: table_cursor_id,
root_page: table.table.get_root_page(),
});
}
}
OperationMode::DELETE | OperationMode::UPDATE => {
let table_cursor_id = table_cursor_id.expect("table cursor is always opened in OperationMode::DELETE or OperationMode::UPDATE");
program.emit_insn(Insn::OpenWrite {
cursor_id: table_cursor_id,
root_page: table.table.get_root_page().into(),
@@ -166,21 +156,18 @@ pub fn init_loop(
index: Some(index), ..
} = search
{
let index_cursor_id = program.alloc_cursor_id(
Some(index.name.clone()),
CursorType::BTreeIndex(index.clone()),
);
match mode {
OperationMode::SELECT => {
program.emit_insn(Insn::OpenRead {
cursor_id: index_cursor_id,
cursor_id: index_cursor_id
.expect("index cursor is always opened in Seek with index"),
root_page: index.root_page,
});
}
OperationMode::UPDATE | OperationMode::DELETE => {
program.emit_insn(Insn::OpenWrite {
cursor_id: index_cursor_id,
cursor_id: index_cursor_id
.expect("index cursor is always opened in Seek with index"),
root_page: index.root_page.into(),
});
}
@@ -229,6 +216,8 @@ pub fn open_loop(
}
}
let (table_cursor_id, index_cursor_id) = table.resolve_cursors(program)?;
match &table.op {
Operation::Subquery { plan, .. } => {
let (yield_reg, coroutine_implementation_start) = match &plan.query_type {
@@ -274,10 +263,10 @@ pub fn open_loop(
program.resolve_label(jump_target_when_true, program.offset());
}
}
Operation::Scan { iter_dir, index } => {
let cursor_id = program.resolve_cursor_id(&table.identifier);
let index_cursor_id = index.as_ref().map(|i| program.resolve_cursor_id(&i.name));
let iteration_cursor_id = index_cursor_id.unwrap_or(cursor_id);
Operation::Scan { iter_dir, .. } => {
let iteration_cursor_id = index_cursor_id.unwrap_or_else(|| {
table_cursor_id.expect("Either index or table cursor must be opened")
});
if !matches!(&table.table, Table::Virtual(_)) {
if *iter_dir == IterationDirection::Backwards {
program.emit_insn(Insn::Last {
@@ -389,7 +378,8 @@ pub fn open_loop(
// Emit VFilter with the computed arguments.
program.emit_insn(Insn::VFilter {
cursor_id,
cursor_id: table_cursor_id
.expect("Virtual tables do not support covering indexes"),
arg_count: count,
args_reg: start_reg,
idx_str: maybe_idx_str,
@@ -399,11 +389,13 @@ pub fn open_loop(
}
program.resolve_label(loop_start, program.offset());
if let Some(index_cursor_id) = index_cursor_id {
program.emit_insn(Insn::DeferredSeek {
index_cursor_id,
table_cursor_id: cursor_id,
});
if let Some(table_cursor_id) = table_cursor_id {
if let Some(index_cursor_id) = index_cursor_id {
program.emit_insn(Insn::DeferredSeek {
index_cursor_id,
table_cursor_id,
});
}
}
for (_, cond) in predicates.iter().enumerate().filter(|(i, cond)| {
@@ -426,7 +418,6 @@ pub fn open_loop(
}
}
Operation::Search(search) => {
let table_cursor_id = program.resolve_cursor_id(&table.identifier);
// Open the loop for the index search.
// Rowid equality point lookups are handled with a SeekRowid instruction which does not loop, since it is a single row lookup.
if let Search::RowidEq { cmp_expr } = search {
@@ -439,22 +430,17 @@ pub fn open_loop(
&t_ctx.resolver,
)?;
program.emit_insn(Insn::SeekRowid {
cursor_id: table_cursor_id,
cursor_id: table_cursor_id
.expect("Search::RowidEq requires a table cursor"),
src_reg,
target_pc: next,
});
} else {
// Otherwise, it's an index/rowid scan, i.e. first a seek is performed and then a scan until the comparison expression is not satisfied anymore.
let index_cursor_id = if let Search::Seek {
index: Some(index), ..
} = search
{
Some(program.resolve_cursor_id(&index.name))
} else {
None
};
let is_index = index_cursor_id.is_some();
let seek_cursor_id = index_cursor_id.unwrap_or(table_cursor_id);
let seek_cursor_id = index_cursor_id.unwrap_or_else(|| {
table_cursor_id.expect("Either index or table cursor must be opened")
});
let Search::Seek { seek_def, .. } = search else {
unreachable!("Rowid equality point lookup should have been handled above");
};
@@ -483,11 +469,13 @@ pub fn open_loop(
)?;
if let Some(index_cursor_id) = index_cursor_id {
// Don't do a btree table seek until it's actually necessary to read from the table.
program.emit_insn(Insn::DeferredSeek {
index_cursor_id,
table_cursor_id,
});
if let Some(table_cursor_id) = table_cursor_id {
// Don't do a btree table seek until it's actually necessary to read from the table.
program.emit_insn(Insn::DeferredSeek {
index_cursor_id,
table_cursor_id,
});
}
}
}
@@ -785,6 +773,8 @@ pub fn close_loop(
.get(table_index)
.expect("source has no loop labels");
let (table_cursor_id, index_cursor_id) = table.resolve_cursors(program)?;
match &table.op {
Operation::Subquery { .. } => {
program.resolve_label(loop_labels.next, program.offset());
@@ -795,14 +785,11 @@ pub fn close_loop(
target_pc: loop_labels.loop_start,
});
}
Operation::Scan {
index, iter_dir, ..
} => {
Operation::Scan { iter_dir, .. } => {
program.resolve_label(loop_labels.next, program.offset());
let cursor_id = program.resolve_cursor_id(&table.identifier);
let index_cursor_id = index.as_ref().map(|i| program.resolve_cursor_id(&i.name));
let iteration_cursor_id = index_cursor_id.unwrap_or(cursor_id);
let iteration_cursor_id = index_cursor_id.unwrap_or_else(|| {
table_cursor_id.expect("Either index or table cursor must be opened")
});
match &table.table {
Table::BTree(_) => {
if *iter_dir == IterationDirection::Backwards {
@@ -819,7 +806,8 @@ pub fn close_loop(
}
Table::Virtual(_) => {
program.emit_insn(Insn::VNext {
cursor_id,
cursor_id: table_cursor_id
.expect("Virtual tables do not support covering indexes"),
pc_if_next: loop_labels.loop_start,
});
}
@@ -828,33 +816,24 @@ pub fn close_loop(
}
Operation::Search(search) => {
program.resolve_label(loop_labels.next, program.offset());
let iteration_cursor_id = index_cursor_id.unwrap_or_else(|| {
table_cursor_id.expect("Either index or table cursor must be opened")
});
// Rowid equality point lookups are handled with a SeekRowid instruction which does not loop, so there is no need to emit a Next instruction.
if !matches!(search, Search::RowidEq { .. }) {
let (cursor_id, iter_dir) = match search {
Search::Seek {
index: Some(index),
seek_def,
..
} => (program.resolve_cursor_id(&index.name), seek_def.iter_dir),
Search::Seek {
index: None,
seek_def,
..
} => (
program.resolve_cursor_id(&table.identifier),
seek_def.iter_dir,
),
let iter_dir = match search {
Search::Seek { seek_def, .. } => seek_def.iter_dir,
Search::RowidEq { .. } => unreachable!(),
};
if iter_dir == IterationDirection::Backwards {
program.emit_insn(Insn::Prev {
cursor_id,
cursor_id: iteration_cursor_id,
pc_if_prev: loop_labels.loop_start,
});
} else {
program.emit_insn(Insn::Next {
cursor_id,
cursor_id: iteration_cursor_id,
pc_if_next: loop_labels.loop_start,
});
}

View File

@@ -167,32 +167,34 @@ fn eliminate_orderby_like_groupby(plan: &mut SelectPlan) -> Result<()> {
Ok(())
}
/// Eliminate unnecessary ORDER BY clauses.
/// Returns true if the ORDER BY clause was eliminated.
fn eliminate_unnecessary_orderby(
table_references: &mut [TableReference],
available_indexes: &HashMap<String, Vec<Arc<Index>>>,
order_by: &mut Option<Vec<(ast::Expr, Direction)>>,
group_by: &Option<GroupBy>,
) -> Result<()> {
) -> Result<bool> {
let Some(order) = order_by else {
return Ok(());
return Ok(false);
};
let Some(first_table_reference) = table_references.first_mut() else {
return Ok(());
return Ok(false);
};
let Some(btree_table) = first_table_reference.btree() else {
return Ok(());
return Ok(false);
};
// If GROUP BY clause is present, we can't rely on already ordered columns because GROUP BY reorders the data
// This early return prevents the elimination of ORDER BY when GROUP BY exists, as sorting must be applied after grouping
// And if ORDER BY clause duplicates GROUP BY we handle it later in fn eliminate_orderby_like_groupby
if group_by.is_some() {
return Ok(());
return Ok(false);
}
let Operation::Scan {
index, iter_dir, ..
} = &mut first_table_reference.op
else {
return Ok(());
return Ok(false);
};
assert!(
@@ -207,7 +209,7 @@ fn eliminate_unnecessary_orderby(
Direction::Descending => IterationDirection::Backwards,
};
*order_by = None;
return Ok(());
return Ok(true);
}
// Find the best matching index for the ORDER BY columns
@@ -235,7 +237,7 @@ fn eliminate_unnecessary_orderby(
}
let Some(matching_index) = best_index.0 else {
return Ok(());
return Ok(false);
};
let match_count = best_index.1;
@@ -280,7 +282,7 @@ fn eliminate_unnecessary_orderby(
}
}
Ok(())
Ok(order_by.is_none())
}
/**
@@ -300,7 +302,8 @@ fn use_indexes(
group_by: &Option<GroupBy>,
) -> Result<()> {
// Try to use indexes for eliminating ORDER BY clauses
eliminate_unnecessary_orderby(table_references, available_indexes, order_by, group_by)?;
let did_eliminate_orderby =
eliminate_unnecessary_orderby(table_references, available_indexes, order_by, group_by)?;
// Try to use indexes for WHERE conditions
for (table_index, table_reference) in table_references.iter_mut().enumerate() {
@@ -346,6 +349,12 @@ fn use_indexes(
i += 1;
}
}
if did_eliminate_orderby && table_index == 0 {
// If we already made the decision to remove ORDER BY based on the Rowid (e.g. ORDER BY id), then skip this.
// It would be possible to analyze the index and see if the covering index would retain the ordering guarantee,
// but we just don't do that yet.
continue;
}
if let Some(indexes) = available_indexes.get(table_name) {
if let Some(search) = try_extract_index_search_from_where_clause(
where_clause,
@@ -359,6 +368,22 @@ fn use_indexes(
}
}
}
// Finally, if there's no other reason to use an index, if an index covers the columns used in the query, let's use it.
if let Some(indexes) = available_indexes.get(table_reference.table.get_name()) {
for index_candidate in indexes.iter() {
let is_covering = table_reference.index_is_covering(index_candidate);
if let Operation::Scan { index, .. } = &mut table_reference.op {
if index.is_some() {
continue;
}
if is_covering {
*index = Some(index_candidate.clone());
break;
}
}
}
}
}
Ok(())

View File

@@ -11,8 +11,11 @@ use std::{
use crate::{
function::AggFunc,
schema::{BTreeTable, Column, Index, Table},
vdbe::BranchOffset,
VirtualTable,
vdbe::{
builder::{CursorType, ProgramBuilder},
BranchOffset, CursorID,
},
Result, VirtualTable,
};
use crate::{
schema::{PseudoTable, Type},
@@ -20,6 +23,8 @@ use crate::{
util::can_pushdown_predicate,
};
use super::emitter::OperationMode;
#[derive(Debug, Clone)]
pub struct ResultSetColumn {
pub expr: ast::Expr,
@@ -491,6 +496,100 @@ impl TableReference {
pub fn mark_column_used(&mut self, index: usize) {
self.col_used_mask.set(index);
}
/// Open the necessary cursors for this table reference.
/// Generally a table cursor is always opened unless a SELECT query can use a covering index.
/// An index cursor is opened if an index is used in any way for reading data from the table.
pub fn open_cursors(
&self,
program: &mut ProgramBuilder,
mode: OperationMode,
) -> Result<(Option<CursorID>, Option<CursorID>)> {
let index = self.op.index();
match &self.table {
Table::BTree(btree) => {
let use_covering_index = self.utilizes_covering_index();
let table_cursor_id = if use_covering_index && mode == OperationMode::SELECT {
None
} else {
Some(program.alloc_cursor_id(
Some(self.identifier.clone()),
CursorType::BTreeTable(btree.clone()),
))
};
let index_cursor_id = if let Some(index) = index {
Some(program.alloc_cursor_id(
Some(index.name.clone()),
CursorType::BTreeIndex(index.clone()),
))
} else {
None
};
Ok((table_cursor_id, index_cursor_id))
}
Table::Virtual(virtual_table) => {
let table_cursor_id = Some(program.alloc_cursor_id(
Some(self.identifier.clone()),
CursorType::VirtualTable(virtual_table.clone()),
));
let index_cursor_id = None;
Ok((table_cursor_id, index_cursor_id))
}
Table::Pseudo(_) => Ok((None, None)),
}
}
/// Resolve the already opened cursors for this table reference.
pub fn resolve_cursors(
&self,
program: &mut ProgramBuilder,
) -> Result<(Option<CursorID>, Option<CursorID>)> {
let index = self.op.index();
let table_cursor_id = program.resolve_cursor_id_safe(&self.identifier);
let index_cursor_id = index.map(|index| program.resolve_cursor_id(&index.name));
Ok((table_cursor_id, index_cursor_id))
}
/// Returns true if a given index is a covering index for this [TableReference].
pub fn index_is_covering(&self, index: &Index) -> bool {
let Table::BTree(btree) = &self.table else {
return false;
};
if self.col_used_mask.is_empty() {
return false;
}
let mut index_cols_mask = ColumnUsedMask::new();
for col in index.columns.iter() {
index_cols_mask.set(col.pos_in_table);
}
// If a table has a rowid (i.e. is not a WITHOUT ROWID table), the index is guaranteed to contain the rowid as well.
if btree.has_rowid {
if let Some(pos_of_rowid_alias_col) = btree.get_rowid_alias_column().map(|(pos, _)| pos)
{
let mut empty_mask = ColumnUsedMask::new();
empty_mask.set(pos_of_rowid_alias_col);
if self.col_used_mask == empty_mask {
// However if the index would be ONLY used for the rowid, then let's not bother using it to cover the query.
// Example: if the query is SELECT id FROM t, and id is a rowid alias, then let's rather just scan the table
// instead of an index.
return false;
}
index_cols_mask.set(pos_of_rowid_alias_col);
}
}
index_cols_mask.contains_all_set_bits_of(&self.col_used_mask)
}
/// Returns true if the index selected for use with this [TableReference] is a covering index,
/// meaning that it contains all the columns that are referenced in the query.
pub fn utilizes_covering_index(&self) -> bool {
let Some(index) = self.op.index() else {
return false;
};
self.index_is_covering(index.as_ref())
}
}
/// A definition of a rowid/index search.

View File

@@ -4,7 +4,7 @@ pub mod grammar_generator;
mod tests {
use std::{collections::HashSet, rc::Rc};
use rand::{Rng, SeedableRng};
use rand::{seq::IndexedRandom, Rng, SeedableRng};
use rand_chacha::ChaCha8Rng;
use rusqlite::params;
@@ -213,34 +213,47 @@ mod tests {
};
// Create all different 3-column primary key permutations
let dbs = [
TempDatabase::new_with_rusqlite("CREATE TABLE t(x, y, z, PRIMARY KEY (x, y, z))"),
TempDatabase::new_with_rusqlite("CREATE TABLE t(x, y, z, PRIMARY KEY (x desc, y, z))"),
TempDatabase::new_with_rusqlite("CREATE TABLE t(x, y, z, PRIMARY KEY (x, y desc, z))"),
TempDatabase::new_with_rusqlite("CREATE TABLE t(x, y, z, PRIMARY KEY (x, y, z desc))"),
TempDatabase::new_with_rusqlite(
"CREATE TABLE t(x, y, z, PRIMARY KEY (x desc, y desc, z))",
"CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x, y, z))",
),
TempDatabase::new_with_rusqlite(
"CREATE TABLE t(x, y, z, PRIMARY KEY (x, y desc, z desc))",
"CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x desc, y, z))",
),
TempDatabase::new_with_rusqlite(
"CREATE TABLE t(x, y, z, PRIMARY KEY (x desc, y, z desc))",
"CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x, y desc, z))",
),
TempDatabase::new_with_rusqlite(
"CREATE TABLE t(x, y, z, PRIMARY KEY (x desc, y desc, z desc))",
"CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x, y, z desc))",
),
TempDatabase::new_with_rusqlite(
"CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x desc, y desc, z))",
),
TempDatabase::new_with_rusqlite(
"CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x, y desc, z desc))",
),
TempDatabase::new_with_rusqlite(
"CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x desc, y, z desc))",
),
TempDatabase::new_with_rusqlite(
"CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x desc, y desc, z desc))",
),
];
let mut pk_tuples = HashSet::new();
while pk_tuples.len() < 100000 {
pk_tuples.insert((rng.random_range(0..3000), rng.random_range(0..3000)));
pk_tuples.insert((
rng.random_range(0..3000),
rng.random_range(0..3000),
rng.random_range(0..3000),
));
}
let mut tuples = Vec::new();
for pk_tuple in pk_tuples {
tuples.push(format!(
"({}, {}, {})",
"({}, {}, {}, {})",
pk_tuple.0,
pk_tuple.1,
rng.random_range(0..2000)
pk_tuple.2,
rng.random_range(0..3000)
));
}
let insert = format!("INSERT INTO t VALUES {}", tuples.join(", "));
@@ -298,6 +311,21 @@ mod tests {
ITERATIONS
);
}
// let's choose random columns from the table
let col_choices = ["x", "y", "z", "nonindexed_col"];
let col_choices_weights = [10.0, 10.0, 10.0, 3.0];
let num_cols_in_select = rng.random_range(1..=4);
let select_cols = col_choices
.choose_multiple_weighted(&mut rng, num_cols_in_select, |s| {
let idx = col_choices.iter().position(|c| c == s).unwrap();
col_choices_weights[idx]
})
.unwrap()
.collect::<Vec<_>>()
.iter()
.map(|x| x.to_string())
.collect::<Vec<_>>();
let (comp1, comp2, comp3) = all_comps[rng.random_range(0..all_comps.len())];
// Similarly as for the constraints, generate order by permutations so that the only columns involved in the index seek are potentially part of the ORDER BY.
let (order_by1, order_by2, order_by3) = {
@@ -318,7 +346,7 @@ mod tests {
}
};
// Generate random values for the WHERE clause constraints
// Generate random values for the WHERE clause constraints. Only involve primary key columns.
let (col_val_first, col_val_second, col_val_third) = {
if comp1.is_some() && comp2.is_some() && comp3.is_some() {
(
@@ -372,8 +400,11 @@ mod tests {
// Generate final query string
let query = format!(
"SELECT * FROM t {} {} LIMIT {}",
where_clause, order_by, limit
"SELECT {} FROM t {} {} LIMIT {}",
select_cols.join(", "),
where_clause,
order_by,
limit
);
log::debug!("query: {}", query);
@@ -398,19 +429,53 @@ mod tests {
}
});
if order_by_only_equalities {
let query_no_limit =
format!("SELECT * FROM t {} {} {}", where_clause, order_by, "");
let limbo_no_limit =
limbo_exec_rows(&dbs[i], &limbo_conns[i], &query_no_limit);
let sqlite_no_limit = sqlite_exec_rows(&sqlite_conn, &query_no_limit);
let limbo_rev = limbo_no_limit.iter().cloned().rev().collect::<Vec<_>>();
if limbo_rev == sqlite_no_limit {
let query_no_limit =
format!("SELECT * FROM t {} {} {}", where_clause, order_by, "");
let limbo_no_limit = limbo_exec_rows(&dbs[i], &limbo_conns[i], &query_no_limit);
let sqlite_no_limit = sqlite_exec_rows(&sqlite_conn, &query_no_limit);
let limbo_rev = limbo_no_limit.iter().cloned().rev().collect::<Vec<_>>();
if limbo_rev == sqlite_no_limit && order_by_only_equalities {
continue;
}
// finally, if the order by columns specified contain duplicates, sqlite might've returned the rows in an arbitrary different order.
// e.g. SELECT x,y,z FROM t ORDER BY x,y -- if there are duplicates on (x,y), the ordering returned might be different for limbo and sqlite.
// let's check this case and forgive ourselves if the ordering is different for this reason (but no other reason!)
let order_by_cols = select_cols
.iter()
.enumerate()
.filter(|(i, _)| {
order_by_components
.iter()
.any(|o| o.starts_with(col_choices[*i]))
})
.map(|(i, _)| i)
.collect::<Vec<_>>();
let duplicate_on_order_by_exists = {
let mut exists = false;
'outer: for (i, row) in limbo_no_limit.iter().enumerate() {
for (j, other_row) in limbo_no_limit.iter().enumerate() {
if i != j
&& order_by_cols.iter().all(|&col| row[col] == other_row[col])
{
exists = true;
break 'outer;
}
}
}
exists
};
if duplicate_on_order_by_exists {
let len_equal = limbo_no_limit.len() == sqlite_no_limit.len();
let all_contained =
len_equal && limbo_no_limit.iter().all(|x| sqlite_no_limit.contains(x));
if all_contained {
continue;
}
}
panic!(
"limbo: {:?}, sqlite: {:?}, seed: {}, query: {}",
"DIFFERENT RESULTS! limbo: {:?}, sqlite: {:?}, seed: {}, query: {}",
limbo, sqlite, seed, query
);
}