Properly handle pushing predicates for query optimization from xBestIndex

This commit is contained in:
PThorpe92
2025-04-06 17:31:15 -04:00
parent 0f34a813ff
commit de27c2fe4c
7 changed files with 155 additions and 78 deletions

View File

@@ -90,6 +90,7 @@ pub struct TranslateCtx<'a> {
// This vector holds the indexes of the result columns that we need to skip.
pub result_columns_to_skip_in_orderby_sorter: Option<Vec<usize>>,
pub resolver: Resolver<'a>,
pub omit_predicates: Vec<usize>,
}
/// Used to distinguish database operations
@@ -132,6 +133,7 @@ fn prologue<'a>(
result_column_indexes_in_orderby_sorter: (0..result_column_count).collect(),
result_columns_to_skip_in_orderby_sorter: None,
resolver: Resolver::new(syms),
omit_predicates: Vec::new(),
};
Ok((t_ctx, init_label, start_offset))

View File

@@ -9,8 +9,10 @@ use crate::{
},
Result,
};
use limbo_ext::OrderByInfo;
use limbo_ext::{ConstraintInfo, OrderByInfo};
use limbo_sqlite3_parser::ast;
use limbo_sqlite3_parser::ast;
use super::{
aggregation::translate_aggregation_step,
@@ -289,13 +291,18 @@ pub fn open_loop(
pc_if_empty: loop_end,
});
}
} else if let Some(vtab) = table.table.virtual_table() {
let constraints: Vec<ConstraintInfo> = predicates
} else if let Some(vtab) = table.virtual_table() {
// Collect usable constraints and track which predicate each came from
let converted_constraints = predicates
.iter()
.filter(|p| p.applies_to_table(&table.table, tables))
.filter_map(|p| try_convert_to_constraint_info(p, table_index))
.collect();
.enumerate()
.filter_map(|(i, pred)| {
try_convert_to_constraint_info(pred, table_index, i)
.map(|c| (c, &predicates[i]))
})
.collect::<Vec<_>>();
let constraints: Vec<_> =
converted_constraints.iter().map(|(c, _)| *c).collect();
let order_by = vec![OrderByInfo {
column_index: *t_ctx
.result_column_indexes_in_orderby_sorter
@@ -304,18 +311,43 @@ pub fn open_loop(
desc: matches!(iter_dir, IterationDirection::Backwards),
}];
let index_info = vtab.best_index(&constraints, &order_by);
let start_reg =
program.alloc_registers(vtab.args.as_ref().map(|a| a.len()).unwrap_or(0));
let mut cur_reg = start_reg;
let args = match vtab.args.as_ref() {
Some(args) => args,
None => &vec![],
};
for arg in args {
let reg = cur_reg;
cur_reg += 1;
let _ = translate_expr(program, Some(tables), arg, reg, &t_ctx.resolver)?;
// Translate arguments to pass into VFilter
let args_needed = index_info
.constraint_usages
.iter()
.filter(|u| u.argv_index.is_some())
.count();
let start_reg = program.alloc_registers(args_needed);
let mut arg_regs = vec![];
for (i, usage) in index_info.constraint_usages.iter().enumerate() {
if let Some(argv_index) = usage.argv_index {
let (_, pred) = &converted_constraints[i];
// this is the literal side of the expression (col = 'literal')
let ast::Expr::Binary(lhs, _, rhs) = &pred.expr else {
continue;
};
let literal_expr = match (&**lhs, &**rhs) {
(ast::Expr::Column { .. }, rhs) => rhs,
(lhs, ast::Expr::Column { .. }) => lhs,
_ => continue,
};
let target_reg = start_reg + (argv_index - 1) as usize;
translate_expr(
program,
Some(tables),
literal_expr,
target_reg,
&t_ctx.resolver,
)?;
arg_regs.push(target_reg);
}
}
// Encode idx_str to pass to VFilter
let mut maybe_idx_str_reg = None;
if let Some(idx_str) = index_info.idx_str {
let reg = program.alloc_register();
@@ -328,11 +360,32 @@ pub fn open_loop(
program.emit_insn(Insn::VFilter {
cursor_id,
pc_if_empty: loop_end,
arg_count: vtab.args.as_ref().map_or(0, |args| args.len()),
arg_count: args_needed,
args_reg: start_reg,
idx_str: maybe_idx_str_reg,
idx_num: index_info.idx_num as usize,
});
// Remove predicates omitted by best_index
let omit_predicates: Vec<usize> = predicates
.iter()
.enumerate()
.filter(|(i, _)| {
!index_info
.constraint_usages
.iter()
.enumerate()
.any(|(j, usage)| {
usage.argv_index.is_some()
&& !usage.omit
&& constraints.get(j).map_or(false, |c| c.pred_idx == *i)
})
})
.map(|(i, _)| i)
.collect();
t_ctx
.omit_predicates
.extend_from_slice(&omit_predicates[..]);
}
program.resolve_label(loop_start, program.offset());

View File

@@ -1,5 +1,5 @@
use core::fmt;
use limbo_ext::{ConstraintInfo, ConstraintOp, IndexInfo};
use limbo_ext::{ConstraintInfo, ConstraintOp};
use limbo_sqlite3_parser::ast::{self, SortOrder};
use std::{
cmp::Ordering,
@@ -17,6 +17,7 @@ use crate::{
use crate::{
schema::{PseudoTable, Type},
types::SeekOp,
util::can_pushdown_predicate,
};
#[derive(Debug, Clone)]
@@ -74,24 +75,10 @@ impl WhereTerm {
pub fn should_eval_at_loop(&self, loop_idx: usize) -> bool {
self.eval_at == EvalAt::Loop(loop_idx)
}
pub fn applies_to_table(&self, table: &Table, tables: &[TableReference]) -> bool {
match &self.expr {
ast::Expr::Column {
table: table_idx, ..
} => {
let table_ref = &tables[*table_idx];
table_ref.table == *table
}
_ => false,
}
}
}
use crate::ast::{Expr, Operator};
use super::optimizer::{ConstantPredicate, Optimizable};
fn reverse_operator(op: &Operator) -> Option<Operator> {
match op {
Operator::Equals => Some(Operator::Equals),
@@ -106,9 +93,19 @@ fn reverse_operator(op: &Operator) -> Option<Operator> {
}
}
/// This function takes a WhereTerm for a select involving a VTab at index 'table_index'.
/// It determines whether or not it involves the given table and whether or not it can
/// be converted into a ConstraintInfo which can be passed to the vtab module's xBestIndex
/// method, which will possibly calculate some information to improve the query plan, that we can send
/// back to it as arguments for the VFilter operation. Perhaps we should save the exact Expr for which a relevant column
/// is going to be filtered against: e.g:
/// 'SELECT key, value FROM vtab WHERE key = 'some_key';
/// we need to send the OwnedValue('some_key') as an argument to VFilter, and possibly omit it from
/// the filtration in the vdbe layer.
pub fn try_convert_to_constraint_info(
term: &WhereTerm,
table_index: usize,
pred_idx: usize,
) -> Option<ConstraintInfo> {
if term.from_outer_join {
return None;
@@ -119,32 +116,27 @@ pub fn try_convert_to_constraint_info(
};
let (col_expr, _, op) = match (&**lhs, &**rhs) {
(Expr::Column { .. }, rhs)
if rhs.check_constant().ok()? == Some(ConstantPredicate::AlwaysTrue) =>
{
(Expr::Column { table, .. }, rhs) if can_pushdown_predicate(rhs) => {
if table != &table_index {
return None;
}
(lhs, rhs, op)
}
(lhs, Expr::Column { .. })
if lhs.check_constant().ok()? == Some(ConstantPredicate::AlwaysTrue) =>
{
(lhs, Expr::Column { table, .. }) if can_pushdown_predicate(lhs) => {
if table != &table_index {
return None;
}
(rhs, lhs, &reverse_operator(op).unwrap_or(*op))
}
_ => return None,
_ => {
return None;
}
};
let Expr::Column {
table: tbl_idx,
column,
..
} = **col_expr
else {
let Expr::Column { column, .. } = **col_expr else {
return None;
};
if tbl_idx != table_index {
return None;
}
let column_index = column as u32;
let constraint_op = match op {
Operator::Equals => ConstraintOp::Eq,
@@ -162,6 +154,7 @@ pub fn try_convert_to_constraint_info(
column_index,
op: constraint_op,
usable: true,
pred_idx,
})
}
/// The loop index where to evaluate the condition.

View File

@@ -83,6 +83,7 @@ pub fn emit_subquery<'a>(
reg_offset: plan.offset.map(|_| program.alloc_register()),
reg_limit_offset_sum: plan.offset.map(|_| program.alloc_register()),
resolver: Resolver::new(t_ctx.resolver.symbol_table),
omit_predicates: Vec::new(),
};
let subquery_body_end_label = program.allocate_label();
program.emit_insn(Insn::InitCoroutine {

View File

@@ -268,4 +268,5 @@ pub struct ConstraintInfo {
pub column_index: u32,
pub op: ConstraintOp,
pub usable: bool,
pub pred_idx: usize,
}

View File

@@ -240,7 +240,7 @@ mod tests {
];
// Initialize cursor through filter
match GenerateSeriesVTab::filter(&mut cursor, &args) {
match GenerateSeriesVTab::filter(&mut cursor, &args, None) {
ResultCode::OK => (),
ResultCode::EOF => return Ok(vec![]),
err => return Err(err),
@@ -293,7 +293,7 @@ mod tests {
let expected_len = series_expected_length(&series);
assert_eq!(
values.len(),
expected_len as usize,
expected_len,
"Series length mismatch for start={}, stop={}, step={}: expected {}, got {}, values: {:?}",
start,
stop,
@@ -546,7 +546,7 @@ mod tests {
let start = series.start;
let stop = series.stop;
let step = series.step;
let tbl = GenerateSeriesVTab::default();
let tbl = GenerateSeriesVTab {};
let mut cursor = tbl.open().unwrap();
let args = vec![
@@ -556,7 +556,7 @@ mod tests {
];
// Initialize cursor through filter
GenerateSeriesVTab::filter(&mut cursor, &args);
GenerateSeriesVTab::filter(&mut cursor, &args, None);
let mut rowids = vec![];
while !GenerateSeriesVTab::eof(&cursor) {

View File

@@ -1,7 +1,7 @@
use lazy_static::lazy_static;
use limbo_ext::{
register_extension, scalar, ConstraintInfo, ConstraintOp, ExtResult, IndexInfo, OrderByInfo,
ResultCode, VTabCursor, VTabKind, VTabModule, VTabModuleDerive, Value,
register_extension, scalar, ConstraintInfo, ConstraintOp, ConstraintUsage, ExtResult,
IndexInfo, OrderByInfo, ResultCode, VTabCursor, VTabKind, VTabModule, VTabModuleDerive, Value,
};
#[cfg(not(target_family = "wasm"))]
use limbo_ext::{VfsDerive, VfsExtension, VfsFile};
@@ -40,6 +40,7 @@ impl VTabModule for KVStoreVTab {
}
fn open(&self) -> Result<Self::VCursor, Self::Error> {
let _ = env_logger::try_init();
Ok(KVStoreCursor {
rows: Vec::new(),
index: None,
@@ -47,25 +48,29 @@ impl VTabModule for KVStoreVTab {
}
fn best_index(constraints: &[ConstraintInfo], _order_by: &[OrderByInfo]) -> IndexInfo {
// not exactly the ideal kind of table to demonstrate this on...
for constraint in constraints {
println!("constraint: {:?}", constraint);
// Look for: key = ?
for constraint in constraints.iter() {
if constraint.usable
&& constraint.op == ConstraintOp::Eq
&& constraint.column_index == 0
{
// key = ? is supported
log::debug!("xBestIndex: constraint found for 'key = ?'");
return IndexInfo {
idx_num: 1, // arbitrary non-zero code to signify optimization
idx_num: 1,
idx_str: Some("key_eq".to_string()),
order_by_consumed: false,
estimated_cost: 10.0,
..Default::default()
estimated_rows: 4,
constraint_usages: vec![ConstraintUsage {
omit: true,
argv_index: Some(1),
}],
};
}
}
// fallback: full scan
log::debug!("No usable constraints found, using full scan");
IndexInfo {
idx_num: -1,
idx_str: None,
@@ -77,23 +82,45 @@ impl VTabModule for KVStoreVTab {
fn filter(
cursor: &mut Self::VCursor,
_args: &[Value],
_idx_str: Option<(&str, i32)>,
args: &[Value],
idx_str: Option<(&str, i32)>,
) -> ResultCode {
let store = GLOBAL_STORE.lock().unwrap();
cursor.rows = store
.iter()
.map(|(&rowid, (k, v))| (rowid, k.clone(), v.clone()))
.collect();
cursor.rows.sort_by_key(|(rowid, _, _)| *rowid);
if cursor.rows.is_empty() {
cursor.index = None;
return ResultCode::EOF;
} else {
cursor.index = Some(0);
match idx_str {
Some(("key_eq", 1)) => {
let key = args
.first()
.and_then(|v| v.to_text())
.map(|s| s.to_string());
log::debug!("idx_str found: key_eq\n value: {:?}", key);
if let Some(key) = key {
let rowid = hash_key(&key);
let store = GLOBAL_STORE.lock().unwrap();
if let Some((k, v)) = store.get(&rowid) {
cursor.rows.push((rowid, k.clone(), v.clone()));
cursor.index = Some(0);
} else {
cursor.index = None;
}
return ResultCode::OK;
}
cursor.index = None;
ResultCode::OK
}
_ => {
let store = GLOBAL_STORE.lock().unwrap();
cursor.rows = store
.iter()
.map(|(&rowid, (k, v))| (rowid, k.clone(), v.clone()))
.collect();
cursor.rows.sort_by_key(|(rowid, _, _)| *rowid);
cursor.index = if cursor.rows.is_empty() {
None
} else {
Some(0)
};
ResultCode::OK
}
}
ResultCode::OK
}
fn insert(&mut self, values: &[Value]) -> Result<i64, Self::Error> {
@@ -152,7 +179,7 @@ impl VTabModule for KVStoreVTab {
_ => Err("Invalid column".into()),
}
} else {
Err("cursor out of range".into())
Err("Invalid Column".into())
}
}