From de27c2fe4c73839c882e557efbe0ffd801cfd3f3 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sun, 6 Apr 2025 17:31:15 -0400 Subject: [PATCH] Properly handle pushing predicates for query optimization from xBestIndex --- core/translate/emitter.rs | 2 + core/translate/main_loop.rs | 89 ++++++++++++++++++++++++++++-------- core/translate/plan.rs | 57 ++++++++++------------- core/translate/subquery.rs | 1 + extensions/core/src/vtabs.rs | 1 + extensions/series/src/lib.rs | 8 ++-- extensions/tests/src/lib.rs | 75 ++++++++++++++++++++---------- 7 files changed, 155 insertions(+), 78 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index e2914bbd0..0bc54bb9a 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -90,6 +90,7 @@ pub struct TranslateCtx<'a> { // This vector holds the indexes of the result columns that we need to skip. pub result_columns_to_skip_in_orderby_sorter: Option>, pub resolver: Resolver<'a>, + pub omit_predicates: Vec, } /// Used to distinguish database operations @@ -132,6 +133,7 @@ fn prologue<'a>( result_column_indexes_in_orderby_sorter: (0..result_column_count).collect(), result_columns_to_skip_in_orderby_sorter: None, resolver: Resolver::new(syms), + omit_predicates: Vec::new(), }; Ok((t_ctx, init_label, start_offset)) diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 877f55f75..b3c5d6457 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -9,8 +9,10 @@ use crate::{ }, Result, }; +use limbo_ext::OrderByInfo; use limbo_ext::{ConstraintInfo, OrderByInfo}; use limbo_sqlite3_parser::ast; +use limbo_sqlite3_parser::ast; use super::{ aggregation::translate_aggregation_step, @@ -289,13 +291,18 @@ pub fn open_loop( pc_if_empty: loop_end, }); } - } else if let Some(vtab) = table.table.virtual_table() { - let constraints: Vec = predicates + } else if let Some(vtab) = table.virtual_table() { + // Collect usable constraints and track which predicate each came from + let converted_constraints = predicates .iter() - .filter(|p| p.applies_to_table(&table.table, tables)) - .filter_map(|p| try_convert_to_constraint_info(p, table_index)) - .collect(); - + .enumerate() + .filter_map(|(i, pred)| { + try_convert_to_constraint_info(pred, table_index, i) + .map(|c| (c, &predicates[i])) + }) + .collect::>(); + let constraints: Vec<_> = + converted_constraints.iter().map(|(c, _)| *c).collect(); let order_by = vec![OrderByInfo { column_index: *t_ctx .result_column_indexes_in_orderby_sorter @@ -304,18 +311,43 @@ pub fn open_loop( desc: matches!(iter_dir, IterationDirection::Backwards), }]; let index_info = vtab.best_index(&constraints, &order_by); - let start_reg = - program.alloc_registers(vtab.args.as_ref().map(|a| a.len()).unwrap_or(0)); - let mut cur_reg = start_reg; - let args = match vtab.args.as_ref() { - Some(args) => args, - None => &vec![], - }; - for arg in args { - let reg = cur_reg; - cur_reg += 1; - let _ = translate_expr(program, Some(tables), arg, reg, &t_ctx.resolver)?; + + // Translate arguments to pass into VFilter + let args_needed = index_info + .constraint_usages + .iter() + .filter(|u| u.argv_index.is_some()) + .count(); + let start_reg = program.alloc_registers(args_needed); + let mut arg_regs = vec![]; + + for (i, usage) in index_info.constraint_usages.iter().enumerate() { + if let Some(argv_index) = usage.argv_index { + let (_, pred) = &converted_constraints[i]; + // this is the literal side of the expression (col = 'literal') + let ast::Expr::Binary(lhs, _, rhs) = &pred.expr else { + continue; + }; + + let literal_expr = match (&**lhs, &**rhs) { + (ast::Expr::Column { .. }, rhs) => rhs, + (lhs, ast::Expr::Column { .. }) => lhs, + _ => continue, + }; + + let target_reg = start_reg + (argv_index - 1) as usize; + translate_expr( + program, + Some(tables), + literal_expr, + target_reg, + &t_ctx.resolver, + )?; + arg_regs.push(target_reg); + } } + + // Encode idx_str to pass to VFilter let mut maybe_idx_str_reg = None; if let Some(idx_str) = index_info.idx_str { let reg = program.alloc_register(); @@ -328,11 +360,32 @@ pub fn open_loop( program.emit_insn(Insn::VFilter { cursor_id, pc_if_empty: loop_end, - arg_count: vtab.args.as_ref().map_or(0, |args| args.len()), + arg_count: args_needed, args_reg: start_reg, idx_str: maybe_idx_str_reg, idx_num: index_info.idx_num as usize, }); + + // Remove predicates omitted by best_index + let omit_predicates: Vec = predicates + .iter() + .enumerate() + .filter(|(i, _)| { + !index_info + .constraint_usages + .iter() + .enumerate() + .any(|(j, usage)| { + usage.argv_index.is_some() + && !usage.omit + && constraints.get(j).map_or(false, |c| c.pred_idx == *i) + }) + }) + .map(|(i, _)| i) + .collect(); + t_ctx + .omit_predicates + .extend_from_slice(&omit_predicates[..]); } program.resolve_label(loop_start, program.offset()); diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 9c535c57a..faec051f1 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -1,5 +1,5 @@ use core::fmt; -use limbo_ext::{ConstraintInfo, ConstraintOp, IndexInfo}; +use limbo_ext::{ConstraintInfo, ConstraintOp}; use limbo_sqlite3_parser::ast::{self, SortOrder}; use std::{ cmp::Ordering, @@ -17,6 +17,7 @@ use crate::{ use crate::{ schema::{PseudoTable, Type}, types::SeekOp, + util::can_pushdown_predicate, }; #[derive(Debug, Clone)] @@ -74,24 +75,10 @@ impl WhereTerm { pub fn should_eval_at_loop(&self, loop_idx: usize) -> bool { self.eval_at == EvalAt::Loop(loop_idx) } - - pub fn applies_to_table(&self, table: &Table, tables: &[TableReference]) -> bool { - match &self.expr { - ast::Expr::Column { - table: table_idx, .. - } => { - let table_ref = &tables[*table_idx]; - table_ref.table == *table - } - _ => false, - } - } } use crate::ast::{Expr, Operator}; -use super::optimizer::{ConstantPredicate, Optimizable}; - fn reverse_operator(op: &Operator) -> Option { match op { Operator::Equals => Some(Operator::Equals), @@ -106,9 +93,19 @@ fn reverse_operator(op: &Operator) -> Option { } } +/// This function takes a WhereTerm for a select involving a VTab at index 'table_index'. +/// It determines whether or not it involves the given table and whether or not it can +/// be converted into a ConstraintInfo which can be passed to the vtab module's xBestIndex +/// method, which will possibly calculate some information to improve the query plan, that we can send +/// back to it as arguments for the VFilter operation. Perhaps we should save the exact Expr for which a relevant column +/// is going to be filtered against: e.g: +/// 'SELECT key, value FROM vtab WHERE key = 'some_key'; +/// we need to send the OwnedValue('some_key') as an argument to VFilter, and possibly omit it from +/// the filtration in the vdbe layer. pub fn try_convert_to_constraint_info( term: &WhereTerm, table_index: usize, + pred_idx: usize, ) -> Option { if term.from_outer_join { return None; @@ -119,32 +116,27 @@ pub fn try_convert_to_constraint_info( }; let (col_expr, _, op) = match (&**lhs, &**rhs) { - (Expr::Column { .. }, rhs) - if rhs.check_constant().ok()? == Some(ConstantPredicate::AlwaysTrue) => - { + (Expr::Column { table, .. }, rhs) if can_pushdown_predicate(rhs) => { + if table != &table_index { + return None; + } (lhs, rhs, op) } - (lhs, Expr::Column { .. }) - if lhs.check_constant().ok()? == Some(ConstantPredicate::AlwaysTrue) => - { + (lhs, Expr::Column { table, .. }) if can_pushdown_predicate(lhs) => { + if table != &table_index { + return None; + } (rhs, lhs, &reverse_operator(op).unwrap_or(*op)) } - _ => return None, + _ => { + return None; + } }; - let Expr::Column { - table: tbl_idx, - column, - .. - } = **col_expr - else { + let Expr::Column { column, .. } = **col_expr else { return None; }; - if tbl_idx != table_index { - return None; - } - let column_index = column as u32; let constraint_op = match op { Operator::Equals => ConstraintOp::Eq, @@ -162,6 +154,7 @@ pub fn try_convert_to_constraint_info( column_index, op: constraint_op, usable: true, + pred_idx, }) } /// The loop index where to evaluate the condition. diff --git a/core/translate/subquery.rs b/core/translate/subquery.rs index 87ddddd63..71cb72348 100644 --- a/core/translate/subquery.rs +++ b/core/translate/subquery.rs @@ -83,6 +83,7 @@ pub fn emit_subquery<'a>( reg_offset: plan.offset.map(|_| program.alloc_register()), reg_limit_offset_sum: plan.offset.map(|_| program.alloc_register()), resolver: Resolver::new(t_ctx.resolver.symbol_table), + omit_predicates: Vec::new(), }; let subquery_body_end_label = program.allocate_label(); program.emit_insn(Insn::InitCoroutine { diff --git a/extensions/core/src/vtabs.rs b/extensions/core/src/vtabs.rs index cf89b1fd1..ae3fc7429 100644 --- a/extensions/core/src/vtabs.rs +++ b/extensions/core/src/vtabs.rs @@ -268,4 +268,5 @@ pub struct ConstraintInfo { pub column_index: u32, pub op: ConstraintOp, pub usable: bool, + pub pred_idx: usize, } diff --git a/extensions/series/src/lib.rs b/extensions/series/src/lib.rs index 5f833a607..21d3a89fa 100644 --- a/extensions/series/src/lib.rs +++ b/extensions/series/src/lib.rs @@ -240,7 +240,7 @@ mod tests { ]; // Initialize cursor through filter - match GenerateSeriesVTab::filter(&mut cursor, &args) { + match GenerateSeriesVTab::filter(&mut cursor, &args, None) { ResultCode::OK => (), ResultCode::EOF => return Ok(vec![]), err => return Err(err), @@ -293,7 +293,7 @@ mod tests { let expected_len = series_expected_length(&series); assert_eq!( values.len(), - expected_len as usize, + expected_len, "Series length mismatch for start={}, stop={}, step={}: expected {}, got {}, values: {:?}", start, stop, @@ -546,7 +546,7 @@ mod tests { let start = series.start; let stop = series.stop; let step = series.step; - let tbl = GenerateSeriesVTab::default(); + let tbl = GenerateSeriesVTab {}; let mut cursor = tbl.open().unwrap(); let args = vec![ @@ -556,7 +556,7 @@ mod tests { ]; // Initialize cursor through filter - GenerateSeriesVTab::filter(&mut cursor, &args); + GenerateSeriesVTab::filter(&mut cursor, &args, None); let mut rowids = vec![]; while !GenerateSeriesVTab::eof(&cursor) { diff --git a/extensions/tests/src/lib.rs b/extensions/tests/src/lib.rs index c10a40f58..7592355ed 100644 --- a/extensions/tests/src/lib.rs +++ b/extensions/tests/src/lib.rs @@ -1,7 +1,7 @@ use lazy_static::lazy_static; use limbo_ext::{ - register_extension, scalar, ConstraintInfo, ConstraintOp, ExtResult, IndexInfo, OrderByInfo, - ResultCode, VTabCursor, VTabKind, VTabModule, VTabModuleDerive, Value, + register_extension, scalar, ConstraintInfo, ConstraintOp, ConstraintUsage, ExtResult, + IndexInfo, OrderByInfo, ResultCode, VTabCursor, VTabKind, VTabModule, VTabModuleDerive, Value, }; #[cfg(not(target_family = "wasm"))] use limbo_ext::{VfsDerive, VfsExtension, VfsFile}; @@ -40,6 +40,7 @@ impl VTabModule for KVStoreVTab { } fn open(&self) -> Result { + let _ = env_logger::try_init(); Ok(KVStoreCursor { rows: Vec::new(), index: None, @@ -47,25 +48,29 @@ impl VTabModule for KVStoreVTab { } fn best_index(constraints: &[ConstraintInfo], _order_by: &[OrderByInfo]) -> IndexInfo { - // not exactly the ideal kind of table to demonstrate this on... - for constraint in constraints { - println!("constraint: {:?}", constraint); + // Look for: key = ? + for constraint in constraints.iter() { if constraint.usable && constraint.op == ConstraintOp::Eq && constraint.column_index == 0 { - // key = ? is supported + log::debug!("xBestIndex: constraint found for 'key = ?'"); return IndexInfo { - idx_num: 1, // arbitrary non-zero code to signify optimization + idx_num: 1, idx_str: Some("key_eq".to_string()), order_by_consumed: false, estimated_cost: 10.0, - ..Default::default() + estimated_rows: 4, + constraint_usages: vec![ConstraintUsage { + omit: true, + argv_index: Some(1), + }], }; } } // fallback: full scan + log::debug!("No usable constraints found, using full scan"); IndexInfo { idx_num: -1, idx_str: None, @@ -77,23 +82,45 @@ impl VTabModule for KVStoreVTab { fn filter( cursor: &mut Self::VCursor, - _args: &[Value], - _idx_str: Option<(&str, i32)>, + args: &[Value], + idx_str: Option<(&str, i32)>, ) -> ResultCode { - let store = GLOBAL_STORE.lock().unwrap(); - cursor.rows = store - .iter() - .map(|(&rowid, (k, v))| (rowid, k.clone(), v.clone())) - .collect(); - cursor.rows.sort_by_key(|(rowid, _, _)| *rowid); - - if cursor.rows.is_empty() { - cursor.index = None; - return ResultCode::EOF; - } else { - cursor.index = Some(0); + match idx_str { + Some(("key_eq", 1)) => { + let key = args + .first() + .and_then(|v| v.to_text()) + .map(|s| s.to_string()); + log::debug!("idx_str found: key_eq\n value: {:?}", key); + if let Some(key) = key { + let rowid = hash_key(&key); + let store = GLOBAL_STORE.lock().unwrap(); + if let Some((k, v)) = store.get(&rowid) { + cursor.rows.push((rowid, k.clone(), v.clone())); + cursor.index = Some(0); + } else { + cursor.index = None; + } + return ResultCode::OK; + } + cursor.index = None; + ResultCode::OK + } + _ => { + let store = GLOBAL_STORE.lock().unwrap(); + cursor.rows = store + .iter() + .map(|(&rowid, (k, v))| (rowid, k.clone(), v.clone())) + .collect(); + cursor.rows.sort_by_key(|(rowid, _, _)| *rowid); + cursor.index = if cursor.rows.is_empty() { + None + } else { + Some(0) + }; + ResultCode::OK + } } - ResultCode::OK } fn insert(&mut self, values: &[Value]) -> Result { @@ -152,7 +179,7 @@ impl VTabModule for KVStoreVTab { _ => Err("Invalid column".into()), } } else { - Err("cursor out of range".into()) + Err("Invalid Column".into()) } }