From 853af16946a7f9850e98b60ab56c3db5d49279d3 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Wed, 26 Mar 2025 09:16:03 -0400 Subject: [PATCH 01/12] Implement xBestIndex for virtual table api to improve query planning --- core/lib.rs | 31 +++++- core/translate/main_loop.rs | 43 ++++++-- core/translate/plan.rs | 89 ++++++++++++++++ core/vdbe/execute.rs | 9 +- core/vdbe/insn.rs | 2 + extensions/completion/src/lib.rs | 4 +- extensions/core/src/lib.rs | 5 +- extensions/core/src/vtabs.rs | 172 ++++++++++++++++++++++++++++++- extensions/series/src/lib.rs | 2 +- extensions/tests/src/lib.rs | 39 ++++++- macros/src/lib.rs | 23 ++++- 11 files changed, 395 insertions(+), 24 deletions(-) diff --git a/core/lib.rs b/core/lib.rs index 353789839..68384e77d 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -35,7 +35,7 @@ pub use io::UringIO; pub use io::{ Buffer, Completion, File, MemoryIO, OpenFlags, PlatformIO, SyscallIO, WriteCompletion, IO, }; -use limbo_ext::{ResultCode, VTabKind, VTabModuleImpl}; +use limbo_ext::{ConstraintInfo, IndexInfo, OrderByInfo, ResultCode, VTabKind, VTabModuleImpl}; use limbo_sqlite3_parser::{ast, ast::Cmd, lexer::sql::Parser}; use parking_lot::RwLock; use schema::{Column, Schema}; @@ -641,6 +641,21 @@ impl VirtualTable { pub(crate) fn rowid(&self, cursor: &VTabOpaqueCursor) -> i64 { unsafe { (self.implementation.rowid)(cursor.as_ptr()) } } + + pub(crate) fn best_index( + &self, + constraints: &[ConstraintInfo], + order_by: &[OrderByInfo], + ) -> IndexInfo { + unsafe { + IndexInfo::from_ffi((self.implementation.best_idx)( + constraints.as_ptr(), + constraints.len() as i32, + order_by.as_ptr(), + order_by.len() as i32, + )) + } + } /// takes ownership of the provided Args pub(crate) fn from_args( tbl_name: Option<&str>, @@ -693,6 +708,8 @@ impl VirtualTable { pub fn filter( &self, cursor: &VTabOpaqueCursor, + idx_num: i32, + idx_str: Option, arg_count: usize, args: Vec, ) -> Result { @@ -701,8 +718,18 @@ impl VirtualTable { let ownedvalue_arg = args.get(i).unwrap(); filter_args.push(ownedvalue_arg.to_ffi()); } + let c_idx_str = idx_str + .map(|s| std::ffi::CString::new(s).unwrap()) + .map(|cstr| cstr.into_raw()) + .unwrap_or(std::ptr::null_mut()); let rc = unsafe { - (self.implementation.filter)(cursor.as_ptr(), arg_count as i32, filter_args.as_ptr()) + (self.implementation.filter)( + cursor.as_ptr(), + arg_count as i32, + filter_args.as_ptr(), + c_idx_str, + idx_num, + ) }; for arg in filter_args { unsafe { diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 76057c53b..877f55f75 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -9,6 +9,8 @@ use crate::{ }, Result, }; +use limbo_ext::{ConstraintInfo, OrderByInfo}; +use limbo_sqlite3_parser::ast; use super::{ aggregation::translate_aggregation_step, @@ -18,8 +20,8 @@ use super::{ optimizer::Optimizable, order_by::{order_by_sorter_insert, sorter_insert}, plan::{ - IterationDirection, Operation, Search, SeekDef, SelectPlan, SelectQueryType, - TableReference, WhereTerm, + try_convert_to_constraint_info, IterationDirection, Operation, Search, SeekDef, SelectPlan, + SelectQueryType, TableReference, WhereTerm, }, }; @@ -251,9 +253,6 @@ pub fn open_loop( end_offset: loop_end, }); - // These are predicates evaluated outside of the subquery, - // so they are translated here. - // E.g. SELECT foo FROM (SELECT bar as foo FROM t1) sub WHERE sub.foo > 10 for cond in predicates .iter() .filter(|cond| cond.should_eval_at_loop(table_index)) @@ -290,12 +289,25 @@ pub fn open_loop( pc_if_empty: loop_end, }); } - } - if let Table::Virtual(ref table) = table.table { + } else if let Some(vtab) = table.table.virtual_table() { + let constraints: Vec = predicates + .iter() + .filter(|p| p.applies_to_table(&table.table, tables)) + .filter_map(|p| try_convert_to_constraint_info(p, table_index)) + .collect(); + + let order_by = vec![OrderByInfo { + column_index: *t_ctx + .result_column_indexes_in_orderby_sorter + .first() + .unwrap_or(&0) as u32, + desc: matches!(iter_dir, IterationDirection::Backwards), + }]; + let index_info = vtab.best_index(&constraints, &order_by); let start_reg = - program.alloc_registers(table.args.as_ref().map(|a| a.len()).unwrap_or(0)); + program.alloc_registers(vtab.args.as_ref().map(|a| a.len()).unwrap_or(0)); let mut cur_reg = start_reg; - let args = match table.args.as_ref() { + let args = match vtab.args.as_ref() { Some(args) => args, None => &vec![], }; @@ -304,11 +316,22 @@ pub fn open_loop( cur_reg += 1; let _ = translate_expr(program, Some(tables), arg, reg, &t_ctx.resolver)?; } + let mut maybe_idx_str_reg = None; + if let Some(idx_str) = index_info.idx_str { + let reg = program.alloc_register(); + program.emit_insn(Insn::String8 { + dest: reg, + value: idx_str, + }); + maybe_idx_str_reg = Some(reg); + } program.emit_insn(Insn::VFilter { cursor_id, pc_if_empty: loop_end, - arg_count: table.args.as_ref().map_or(0, |args| args.len()), + arg_count: vtab.args.as_ref().map_or(0, |args| args.len()), args_reg: start_reg, + idx_str: maybe_idx_str_reg, + idx_num: index_info.idx_num as usize, }); } program.resolve_label(loop_start, program.offset()); diff --git a/core/translate/plan.rs b/core/translate/plan.rs index bb581ab13..9c535c57a 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -1,4 +1,5 @@ use core::fmt; +use limbo_ext::{ConstraintInfo, ConstraintOp, IndexInfo}; use limbo_sqlite3_parser::ast::{self, SortOrder}; use std::{ cmp::Ordering, @@ -73,8 +74,96 @@ impl WhereTerm { pub fn should_eval_at_loop(&self, loop_idx: usize) -> bool { self.eval_at == EvalAt::Loop(loop_idx) } + + pub fn applies_to_table(&self, table: &Table, tables: &[TableReference]) -> bool { + match &self.expr { + ast::Expr::Column { + table: table_idx, .. + } => { + let table_ref = &tables[*table_idx]; + table_ref.table == *table + } + _ => false, + } + } } +use crate::ast::{Expr, Operator}; + +use super::optimizer::{ConstantPredicate, Optimizable}; + +fn reverse_operator(op: &Operator) -> Option { + match op { + Operator::Equals => Some(Operator::Equals), + Operator::Less => Some(Operator::Greater), + Operator::LessEquals => Some(Operator::GreaterEquals), + Operator::Greater => Some(Operator::Less), + Operator::GreaterEquals => Some(Operator::LessEquals), + Operator::NotEquals => Some(Operator::NotEquals), + Operator::Is => Some(Operator::Is), + Operator::IsNot => Some(Operator::IsNot), + _ => None, + } +} + +pub fn try_convert_to_constraint_info( + term: &WhereTerm, + table_index: usize, +) -> Option { + if term.from_outer_join { + return None; + } + + let Expr::Binary(lhs, op, rhs) = &term.expr else { + return None; + }; + + let (col_expr, _, op) = match (&**lhs, &**rhs) { + (Expr::Column { .. }, rhs) + if rhs.check_constant().ok()? == Some(ConstantPredicate::AlwaysTrue) => + { + (lhs, rhs, op) + } + (lhs, Expr::Column { .. }) + if lhs.check_constant().ok()? == Some(ConstantPredicate::AlwaysTrue) => + { + (rhs, lhs, &reverse_operator(op).unwrap_or(*op)) + } + _ => return None, + }; + + let Expr::Column { + table: tbl_idx, + column, + .. + } = **col_expr + else { + return None; + }; + + if tbl_idx != table_index { + return None; + } + + let column_index = column as u32; + let constraint_op = match op { + Operator::Equals => ConstraintOp::Eq, + Operator::Less => ConstraintOp::Lt, + Operator::LessEquals => ConstraintOp::Le, + Operator::Greater => ConstraintOp::Gt, + Operator::GreaterEquals => ConstraintOp::Ge, + Operator::NotEquals => ConstraintOp::Ne, + Operator::Is => ConstraintOp::Is, + Operator::IsNot => ConstraintOp::IsNot, + _ => return None, + }; + + Some(ConstraintInfo { + column_index, + op: constraint_op, + usable: true, + }) +} /// The loop index where to evaluate the condition. /// For example, in `SELECT * FROM u JOIN p WHERE u.id = 5`, the condition can already be evaluated at the first loop (idx 0), /// because that is the rightmost table that it references. diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index d00ee6129..9abc7c6c3 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -966,6 +966,8 @@ pub fn op_vfilter( pc_if_empty, arg_count, args_reg, + idx_str, + idx_num, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -981,7 +983,12 @@ pub fn op_vfilter( for i in 0..*arg_count { args.push(state.registers[args_reg + i].get_owned_value().clone()); } - virtual_table.filter(cursor, *arg_count, args)? + let idx_str = if let Some(idx_str) = idx_str { + Some(state.registers[*idx_str].get_owned_value().to_string()) + } else { + None + }; + virtual_table.filter(cursor, *idx_num as i32, idx_str, *arg_count, args)? }; if !has_rows { state.pc = pc_if_empty.to_offset_int(); diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index f1276798f..56f44bd2b 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -289,6 +289,8 @@ pub enum Insn { pc_if_empty: BranchOffset, arg_count: usize, args_reg: usize, + idx_str: Option, + idx_num: usize, }, /// Read a column from the current row of the virtual table cursor. diff --git a/extensions/completion/src/lib.rs b/extensions/completion/src/lib.rs index 09b09c479..53358c23c 100644 --- a/extensions/completion/src/lib.rs +++ b/extensions/completion/src/lib.rs @@ -91,8 +91,8 @@ impl VTabModule for CompletionVTab { cursor.eof() } - fn filter(cursor: &mut Self::VCursor, args: &[Value]) -> ResultCode { - if args.len() == 0 || args.len() > 2 { + fn filter(cursor: &mut Self::VCursor, args: &[Value], _: Option<(&str, i32)>) -> ResultCode { + if args.is_empty() || args.len() > 2 { return ResultCode::InvalidArgs; } cursor.reset(); diff --git a/extensions/core/src/lib.rs b/extensions/core/src/lib.rs index e73b2b894..99729de6c 100644 --- a/extensions/core/src/lib.rs +++ b/extensions/core/src/lib.rs @@ -15,7 +15,10 @@ pub use types::{ResultCode, Value, ValueType}; #[cfg(feature = "vfs")] pub use vfs_modules::{RegisterVfsFn, VfsExtension, VfsFile, VfsFileImpl, VfsImpl, VfsInterface}; use vtabs::RegisterModuleFn; -pub use vtabs::{VTabCursor, VTabKind, VTabModule, VTabModuleImpl}; +pub use vtabs::{ + ConstraintInfo, ConstraintOp, ConstraintUsage, ExtIndexInfo, IndexInfo, OrderByInfo, + VTabCursor, VTabKind, VTabModule, VTabModuleImpl, +}; pub type ExtResult = std::result::Result; diff --git a/extensions/core/src/vtabs.rs b/extensions/core/src/vtabs.rs index 83b3dae78..cf89b1fd1 100644 --- a/extensions/core/src/vtabs.rs +++ b/extensions/core/src/vtabs.rs @@ -22,6 +22,7 @@ pub struct VTabModuleImpl { pub update: VtabFnUpdate, pub rowid: VtabRowIDFn, pub destroy: VtabFnDestroy, + pub best_idx: BestIdxFn, } #[cfg(feature = "core_only")] @@ -43,8 +44,13 @@ pub type VtabFnCreateSchema = unsafe extern "C" fn(args: *const Value, argc: i32 pub type VtabFnOpen = unsafe extern "C" fn(*const c_void) -> *const c_void; -pub type VtabFnFilter = - unsafe extern "C" fn(cursor: *const c_void, argc: i32, argv: *const Value) -> ResultCode; +pub type VtabFnFilter = unsafe extern "C" fn( + cursor: *const c_void, + argc: i32, + argv: *const Value, + idx_str: *const c_char, + idx_num: i32, +) -> ResultCode; pub type VtabFnColumn = unsafe extern "C" fn(cursor: *const c_void, idx: u32) -> Value; @@ -62,6 +68,12 @@ pub type VtabFnUpdate = unsafe extern "C" fn( ) -> ResultCode; pub type VtabFnDestroy = unsafe extern "C" fn(vtab: *const c_void) -> ResultCode; +pub type BestIdxFn = unsafe extern "C" fn( + constraints: *const ConstraintInfo, + constraint_len: i32, + order_by: *const OrderByInfo, + order_by_len: i32, +) -> ExtIndexInfo; #[repr(C)] #[derive(Clone, Copy, Debug, PartialEq)] @@ -78,7 +90,11 @@ pub trait VTabModule: 'static { fn create_schema(args: &[Value]) -> String; fn open(&self) -> Result; - fn filter(cursor: &mut Self::VCursor, args: &[Value]) -> ResultCode; + fn filter( + cursor: &mut Self::VCursor, + args: &[Value], + idx_info: Option<(&str, i32)>, + ) -> ResultCode; fn column(cursor: &Self::VCursor, idx: u32) -> Result; fn next(cursor: &mut Self::VCursor) -> ResultCode; fn eof(cursor: &Self::VCursor) -> bool; @@ -94,6 +110,22 @@ pub trait VTabModule: 'static { fn destroy(&mut self) -> Result<(), Self::Error> { Ok(()) } + fn best_index(_constraints: &[ConstraintInfo], _order_by: &[OrderByInfo]) -> IndexInfo { + IndexInfo { + idx_num: 0, + idx_str: None, + order_by_consumed: false, + estimated_cost: 1_000_000.0, + estimated_rows: u32::MAX, + constraint_usages: _constraints + .iter() + .map(|_| ConstraintUsage { + argv_index: Some(0), + omit: false, + }) + .collect(), + } + } } pub trait VTabCursor: Sized { @@ -103,3 +135,137 @@ pub trait VTabCursor: Sized { fn eof(&self) -> bool; fn next(&mut self) -> ResultCode; } + +#[repr(u8)] +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum ConstraintOp { + Eq = 2, + Lt = 4, + Le = 8, + Gt = 16, + Ge = 32, + Match = 64, + Like = 65, + Glob = 66, + Regexp = 67, + Ne = 68, + IsNot = 69, + IsNotNull = 70, + IsNull = 71, + Is = 72, + In = 73, +} + +#[repr(C)] +#[derive(Copy, Clone)] +pub struct OrderByInfo { + pub column_index: u32, + pub desc: bool, +} + +#[derive(Debug, Clone)] +pub struct IndexInfo { + pub idx_num: i32, + pub idx_str: Option, + pub order_by_consumed: bool, + /// TODO: for eventual cost based query planning + pub estimated_cost: f64, + pub estimated_rows: u32, + pub constraint_usages: Vec, +} +impl Default for IndexInfo { + fn default() -> Self { + Self { + idx_num: 0, + idx_str: None, + order_by_consumed: false, + estimated_cost: 1_000_000.0, + estimated_rows: u32::MAX, + constraint_usages: Vec::new(), + } + } +} + +impl IndexInfo { + /// + /// Converts IndexInfo to an FFI-safe `ExtIndexInfo`. + /// This method transfers ownership of `constraint_usages` and `idx_str`, + /// which must later be reclaimed using `from_ffi` to prevent leaks. + pub fn to_ffi(self) -> ExtIndexInfo { + let len = self.constraint_usages.len(); + let ptr = Box::into_raw(self.constraint_usages.into_boxed_slice()) as *mut ConstraintUsage; + let idx_str_len = self.idx_str.as_ref().map(|s| s.len()).unwrap_or(0); + let c_idx_str = self + .idx_str + .map(|s| std::ffi::CString::new(s).unwrap().into_raw()) + .unwrap_or(std::ptr::null_mut()); + ExtIndexInfo { + idx_num: self.idx_num, + estimated_cost: self.estimated_cost, + estimated_rows: self.estimated_rows, + order_by_consumed: self.order_by_consumed, + constraint_usages_ptr: ptr, + constraint_usage_len: len, + idx_str: c_idx_str as *mut _, + idx_str_len, + } + } + + /// Reclaims ownership of `constraint_usages` and `idx_str` from an FFI-safe `ExtIndexInfo`. + /// # Safety + /// This method is unsafe because it can cause memory leaks if not used correctly. + /// to_ffi and from_ffi are meant to send index info across ffi bounds then immediately reclaim it. + pub unsafe fn from_ffi(ffi: ExtIndexInfo) -> Self { + let constraint_usages = unsafe { + Box::from_raw(std::slice::from_raw_parts_mut( + ffi.constraint_usages_ptr, + ffi.constraint_usage_len, + )) + .to_vec() + }; + let idx_str = if ffi.idx_str.is_null() { + None + } else { + Some(unsafe { + std::ffi::CString::from_raw(ffi.idx_str as *mut _) + .to_string_lossy() + .into_owned() + }) + }; + Self { + idx_num: ffi.idx_num, + idx_str, + order_by_consumed: ffi.order_by_consumed, + estimated_cost: ffi.estimated_cost, + estimated_rows: ffi.estimated_rows, + constraint_usages, + } + } +} + +#[repr(C)] +#[derive(Clone, Debug)] +pub struct ExtIndexInfo { + pub idx_num: i32, + pub idx_str: *const u8, + pub idx_str_len: usize, + pub order_by_consumed: bool, + pub estimated_cost: f64, + pub estimated_rows: u32, + pub constraint_usages_ptr: *mut ConstraintUsage, + pub constraint_usage_len: usize, +} + +#[derive(Debug, Clone, Copy)] +pub struct ConstraintUsage { + pub argv_index: Option, // 1-based index into VFilter args + pub omit: bool, // if true, core skips checking it again +} + +#[derive(Clone, Copy, Debug)] +#[repr(C)] +pub struct ConstraintInfo { + pub column_index: u32, + pub op: ConstraintOp, + pub usable: bool, +} diff --git a/extensions/series/src/lib.rs b/extensions/series/src/lib.rs index 43028eed5..5f833a607 100644 --- a/extensions/series/src/lib.rs +++ b/extensions/series/src/lib.rs @@ -45,7 +45,7 @@ impl VTabModule for GenerateSeriesVTab { }) } - fn filter(cursor: &mut Self::VCursor, args: &[Value]) -> ResultCode { + fn filter(cursor: &mut Self::VCursor, args: &[Value], _: Option<(&str, i32)>) -> ResultCode { // args are the start, stop, and step if args.is_empty() || args.len() > 3 { return ResultCode::InvalidArgs; diff --git a/extensions/tests/src/lib.rs b/extensions/tests/src/lib.rs index beff17004..c10a40f58 100644 --- a/extensions/tests/src/lib.rs +++ b/extensions/tests/src/lib.rs @@ -1,7 +1,7 @@ use lazy_static::lazy_static; use limbo_ext::{ - register_extension, scalar, ExtResult, ResultCode, VTabCursor, VTabKind, VTabModule, - VTabModuleDerive, Value, + register_extension, scalar, ConstraintInfo, ConstraintOp, ExtResult, IndexInfo, OrderByInfo, + ResultCode, VTabCursor, VTabKind, VTabModule, VTabModuleDerive, Value, }; #[cfg(not(target_family = "wasm"))] use limbo_ext::{VfsDerive, VfsExtension, VfsFile}; @@ -46,7 +46,40 @@ impl VTabModule for KVStoreVTab { }) } - fn filter(cursor: &mut Self::VCursor, _args: &[Value]) -> ResultCode { + fn best_index(constraints: &[ConstraintInfo], _order_by: &[OrderByInfo]) -> IndexInfo { + // not exactly the ideal kind of table to demonstrate this on... + for constraint in constraints { + println!("constraint: {:?}", constraint); + if constraint.usable + && constraint.op == ConstraintOp::Eq + && constraint.column_index == 0 + { + // key = ? is supported + return IndexInfo { + idx_num: 1, // arbitrary non-zero code to signify optimization + idx_str: Some("key_eq".to_string()), + order_by_consumed: false, + estimated_cost: 10.0, + ..Default::default() + }; + } + } + + // fallback: full scan + IndexInfo { + idx_num: -1, + idx_str: None, + order_by_consumed: false, + estimated_cost: 1000.0, + ..Default::default() + } + } + + fn filter( + cursor: &mut Self::VCursor, + _args: &[Value], + _idx_str: Option<(&str, i32)>, + ) -> ResultCode { let store = GLOBAL_STORE.lock().unwrap(); cursor.rows = store .iter() diff --git a/macros/src/lib.rs b/macros/src/lib.rs index c03788c7c..acb969876 100644 --- a/macros/src/lib.rs +++ b/macros/src/lib.rs @@ -455,6 +455,7 @@ pub fn derive_vtab_module(input: TokenStream) -> TokenStream { let update_fn_name = format_ident!("update_{}", struct_name); let rowid_fn_name = format_ident!("rowid_{}", struct_name); let destroy_fn_name = format_ident!("destroy_{}", struct_name); + let best_idx_fn_name = format_ident!("best_idx_{}", struct_name); let expanded = quote! { impl #struct_name { @@ -490,13 +491,20 @@ pub fn derive_vtab_module(input: TokenStream) -> TokenStream { cursor: *const ::std::ffi::c_void, argc: i32, argv: *const ::limbo_ext::Value, + idx_str: *const ::std::ffi::c_char, + idx_num: i32, ) -> ::limbo_ext::ResultCode { if cursor.is_null() { return ::limbo_ext::ResultCode::Error; } let cursor = unsafe { &mut *(cursor as *mut <#struct_name as ::limbo_ext::VTabModule>::VCursor) }; let args = ::std::slice::from_raw_parts(argv, argc as usize); - <#struct_name as ::limbo_ext::VTabModule>::filter(cursor, args) + let idx_str = if idx_str.is_null() { + None + } else { + Some((unsafe { ::std::ffi::CStr::from_ptr(idx_str).to_str().unwrap() }, idx_num)) + }; + <#struct_name as ::limbo_ext::VTabModule>::filter(cursor, args, idx_str) } #[no_mangle] @@ -613,6 +621,18 @@ pub fn derive_vtab_module(input: TokenStream) -> TokenStream { return ::limbo_ext::ResultCode::OK; } + #[no_mangle] + pub unsafe extern "C" fn #best_idx_fn_name( + constraints: *const ::limbo_ext::ConstraintInfo, + n_constraints: i32, + order_by: *const ::limbo_ext::OrderByInfo, + n_order_by: i32, + ) -> ::limbo_ext::ExtIndexInfo { + let constraints = std::slice::from_raw_parts(constraints, n_constraints as usize); + let order_by = std::slice::from_raw_parts(order_by, n_order_by as usize); + <#struct_name as ::limbo_ext::VTabModule>::best_index(constraints, order_by).to_ffi() + } + #[no_mangle] pub unsafe extern "C" fn #register_fn_name( api: *const ::limbo_ext::ExtensionApi @@ -636,6 +656,7 @@ pub fn derive_vtab_module(input: TokenStream) -> TokenStream { update: Self::#update_fn_name, rowid: Self::#rowid_fn_name, destroy: Self::#destroy_fn_name, + best_idx: Self::#best_idx_fn_name, }; (api.register_vtab_module)(api.ctx, name_c, module, <#struct_name as ::limbo_ext::VTabModule>::VTAB_KIND) } From 0f34a813ffd8154b1ba96b839e5769ac68d89769 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sun, 6 Apr 2025 14:20:51 -0400 Subject: [PATCH 02/12] Add can_pushdown_predicate fn to evaluate ast expressions for constness --- core/util.rs | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/core/util.rs b/core/util.rs index b3ce8ecd0..406d7059f 100644 --- a/core/util.rs +++ b/core/util.rs @@ -2,6 +2,7 @@ use limbo_sqlite3_parser::ast::{self, CreateTableBody, Expr, FunctionTail, Liter use std::{rc::Rc, sync::Arc}; use crate::{ + function::Func, schema::{self, Column, Schema, Type}, types::{OwnedValue, OwnedValueType}, LimboError, OpenFlags, Result, Statement, StepResult, SymbolTable, IO, @@ -565,6 +566,41 @@ pub fn columns_from_create_table_body(body: &ast::CreateTableBody) -> crate::Res .collect::>()) } +pub fn can_pushdown_predicate(expr: &Expr) -> bool { + match expr { + Expr::Literal(_) => true, + Expr::Column { .. } => true, + Expr::Binary(lhs, _, rhs) => can_pushdown_predicate(lhs) && can_pushdown_predicate(rhs), + Expr::Parenthesized(exprs) => can_pushdown_predicate(exprs.first().unwrap()), + Expr::Unary(_, expr) => can_pushdown_predicate(expr), + Expr::FunctionCall { args, name, .. } => { + let function = crate::function::Func::resolve_function( + &name.0, + args.as_ref().map_or(0, |a| a.len()), + ); + matches!(function, Ok(Func::Scalar(_))) + } + Expr::Like { lhs, rhs, .. } => can_pushdown_predicate(lhs) && can_pushdown_predicate(rhs), + Expr::Between { + lhs, start, end, .. + } => { + can_pushdown_predicate(lhs) + && can_pushdown_predicate(start) + && can_pushdown_predicate(end) + } + Expr::Id(_) => true, + Expr::Name(_) => true, + Expr::Qualified(_, _) => true, + Expr::DoublyQualified(_, _, _) => true, + Expr::InTable { lhs, .. } => can_pushdown_predicate(lhs), + _ => false, + } +} + +fn is_deterministic(func: &Func) -> bool { + matches!(func, Func::Scalar(_)) +} + #[derive(Debug, Default, PartialEq)] pub struct OpenOptions<'a> { /// The authority component of the URI. may be 'localhost' or empty From de27c2fe4c73839c882e557efbe0ffd801cfd3f3 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sun, 6 Apr 2025 17:31:15 -0400 Subject: [PATCH 03/12] Properly handle pushing predicates for query optimization from xBestIndex --- core/translate/emitter.rs | 2 + core/translate/main_loop.rs | 89 ++++++++++++++++++++++++++++-------- core/translate/plan.rs | 57 ++++++++++------------- core/translate/subquery.rs | 1 + extensions/core/src/vtabs.rs | 1 + extensions/series/src/lib.rs | 8 ++-- extensions/tests/src/lib.rs | 75 ++++++++++++++++++++---------- 7 files changed, 155 insertions(+), 78 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index e2914bbd0..0bc54bb9a 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -90,6 +90,7 @@ pub struct TranslateCtx<'a> { // This vector holds the indexes of the result columns that we need to skip. pub result_columns_to_skip_in_orderby_sorter: Option>, pub resolver: Resolver<'a>, + pub omit_predicates: Vec, } /// Used to distinguish database operations @@ -132,6 +133,7 @@ fn prologue<'a>( result_column_indexes_in_orderby_sorter: (0..result_column_count).collect(), result_columns_to_skip_in_orderby_sorter: None, resolver: Resolver::new(syms), + omit_predicates: Vec::new(), }; Ok((t_ctx, init_label, start_offset)) diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 877f55f75..b3c5d6457 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -9,8 +9,10 @@ use crate::{ }, Result, }; +use limbo_ext::OrderByInfo; use limbo_ext::{ConstraintInfo, OrderByInfo}; use limbo_sqlite3_parser::ast; +use limbo_sqlite3_parser::ast; use super::{ aggregation::translate_aggregation_step, @@ -289,13 +291,18 @@ pub fn open_loop( pc_if_empty: loop_end, }); } - } else if let Some(vtab) = table.table.virtual_table() { - let constraints: Vec = predicates + } else if let Some(vtab) = table.virtual_table() { + // Collect usable constraints and track which predicate each came from + let converted_constraints = predicates .iter() - .filter(|p| p.applies_to_table(&table.table, tables)) - .filter_map(|p| try_convert_to_constraint_info(p, table_index)) - .collect(); - + .enumerate() + .filter_map(|(i, pred)| { + try_convert_to_constraint_info(pred, table_index, i) + .map(|c| (c, &predicates[i])) + }) + .collect::>(); + let constraints: Vec<_> = + converted_constraints.iter().map(|(c, _)| *c).collect(); let order_by = vec![OrderByInfo { column_index: *t_ctx .result_column_indexes_in_orderby_sorter @@ -304,18 +311,43 @@ pub fn open_loop( desc: matches!(iter_dir, IterationDirection::Backwards), }]; let index_info = vtab.best_index(&constraints, &order_by); - let start_reg = - program.alloc_registers(vtab.args.as_ref().map(|a| a.len()).unwrap_or(0)); - let mut cur_reg = start_reg; - let args = match vtab.args.as_ref() { - Some(args) => args, - None => &vec![], - }; - for arg in args { - let reg = cur_reg; - cur_reg += 1; - let _ = translate_expr(program, Some(tables), arg, reg, &t_ctx.resolver)?; + + // Translate arguments to pass into VFilter + let args_needed = index_info + .constraint_usages + .iter() + .filter(|u| u.argv_index.is_some()) + .count(); + let start_reg = program.alloc_registers(args_needed); + let mut arg_regs = vec![]; + + for (i, usage) in index_info.constraint_usages.iter().enumerate() { + if let Some(argv_index) = usage.argv_index { + let (_, pred) = &converted_constraints[i]; + // this is the literal side of the expression (col = 'literal') + let ast::Expr::Binary(lhs, _, rhs) = &pred.expr else { + continue; + }; + + let literal_expr = match (&**lhs, &**rhs) { + (ast::Expr::Column { .. }, rhs) => rhs, + (lhs, ast::Expr::Column { .. }) => lhs, + _ => continue, + }; + + let target_reg = start_reg + (argv_index - 1) as usize; + translate_expr( + program, + Some(tables), + literal_expr, + target_reg, + &t_ctx.resolver, + )?; + arg_regs.push(target_reg); + } } + + // Encode idx_str to pass to VFilter let mut maybe_idx_str_reg = None; if let Some(idx_str) = index_info.idx_str { let reg = program.alloc_register(); @@ -328,11 +360,32 @@ pub fn open_loop( program.emit_insn(Insn::VFilter { cursor_id, pc_if_empty: loop_end, - arg_count: vtab.args.as_ref().map_or(0, |args| args.len()), + arg_count: args_needed, args_reg: start_reg, idx_str: maybe_idx_str_reg, idx_num: index_info.idx_num as usize, }); + + // Remove predicates omitted by best_index + let omit_predicates: Vec = predicates + .iter() + .enumerate() + .filter(|(i, _)| { + !index_info + .constraint_usages + .iter() + .enumerate() + .any(|(j, usage)| { + usage.argv_index.is_some() + && !usage.omit + && constraints.get(j).map_or(false, |c| c.pred_idx == *i) + }) + }) + .map(|(i, _)| i) + .collect(); + t_ctx + .omit_predicates + .extend_from_slice(&omit_predicates[..]); } program.resolve_label(loop_start, program.offset()); diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 9c535c57a..faec051f1 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -1,5 +1,5 @@ use core::fmt; -use limbo_ext::{ConstraintInfo, ConstraintOp, IndexInfo}; +use limbo_ext::{ConstraintInfo, ConstraintOp}; use limbo_sqlite3_parser::ast::{self, SortOrder}; use std::{ cmp::Ordering, @@ -17,6 +17,7 @@ use crate::{ use crate::{ schema::{PseudoTable, Type}, types::SeekOp, + util::can_pushdown_predicate, }; #[derive(Debug, Clone)] @@ -74,24 +75,10 @@ impl WhereTerm { pub fn should_eval_at_loop(&self, loop_idx: usize) -> bool { self.eval_at == EvalAt::Loop(loop_idx) } - - pub fn applies_to_table(&self, table: &Table, tables: &[TableReference]) -> bool { - match &self.expr { - ast::Expr::Column { - table: table_idx, .. - } => { - let table_ref = &tables[*table_idx]; - table_ref.table == *table - } - _ => false, - } - } } use crate::ast::{Expr, Operator}; -use super::optimizer::{ConstantPredicate, Optimizable}; - fn reverse_operator(op: &Operator) -> Option { match op { Operator::Equals => Some(Operator::Equals), @@ -106,9 +93,19 @@ fn reverse_operator(op: &Operator) -> Option { } } +/// This function takes a WhereTerm for a select involving a VTab at index 'table_index'. +/// It determines whether or not it involves the given table and whether or not it can +/// be converted into a ConstraintInfo which can be passed to the vtab module's xBestIndex +/// method, which will possibly calculate some information to improve the query plan, that we can send +/// back to it as arguments for the VFilter operation. Perhaps we should save the exact Expr for which a relevant column +/// is going to be filtered against: e.g: +/// 'SELECT key, value FROM vtab WHERE key = 'some_key'; +/// we need to send the OwnedValue('some_key') as an argument to VFilter, and possibly omit it from +/// the filtration in the vdbe layer. pub fn try_convert_to_constraint_info( term: &WhereTerm, table_index: usize, + pred_idx: usize, ) -> Option { if term.from_outer_join { return None; @@ -119,32 +116,27 @@ pub fn try_convert_to_constraint_info( }; let (col_expr, _, op) = match (&**lhs, &**rhs) { - (Expr::Column { .. }, rhs) - if rhs.check_constant().ok()? == Some(ConstantPredicate::AlwaysTrue) => - { + (Expr::Column { table, .. }, rhs) if can_pushdown_predicate(rhs) => { + if table != &table_index { + return None; + } (lhs, rhs, op) } - (lhs, Expr::Column { .. }) - if lhs.check_constant().ok()? == Some(ConstantPredicate::AlwaysTrue) => - { + (lhs, Expr::Column { table, .. }) if can_pushdown_predicate(lhs) => { + if table != &table_index { + return None; + } (rhs, lhs, &reverse_operator(op).unwrap_or(*op)) } - _ => return None, + _ => { + return None; + } }; - let Expr::Column { - table: tbl_idx, - column, - .. - } = **col_expr - else { + let Expr::Column { column, .. } = **col_expr else { return None; }; - if tbl_idx != table_index { - return None; - } - let column_index = column as u32; let constraint_op = match op { Operator::Equals => ConstraintOp::Eq, @@ -162,6 +154,7 @@ pub fn try_convert_to_constraint_info( column_index, op: constraint_op, usable: true, + pred_idx, }) } /// The loop index where to evaluate the condition. diff --git a/core/translate/subquery.rs b/core/translate/subquery.rs index 87ddddd63..71cb72348 100644 --- a/core/translate/subquery.rs +++ b/core/translate/subquery.rs @@ -83,6 +83,7 @@ pub fn emit_subquery<'a>( reg_offset: plan.offset.map(|_| program.alloc_register()), reg_limit_offset_sum: plan.offset.map(|_| program.alloc_register()), resolver: Resolver::new(t_ctx.resolver.symbol_table), + omit_predicates: Vec::new(), }; let subquery_body_end_label = program.allocate_label(); program.emit_insn(Insn::InitCoroutine { diff --git a/extensions/core/src/vtabs.rs b/extensions/core/src/vtabs.rs index cf89b1fd1..ae3fc7429 100644 --- a/extensions/core/src/vtabs.rs +++ b/extensions/core/src/vtabs.rs @@ -268,4 +268,5 @@ pub struct ConstraintInfo { pub column_index: u32, pub op: ConstraintOp, pub usable: bool, + pub pred_idx: usize, } diff --git a/extensions/series/src/lib.rs b/extensions/series/src/lib.rs index 5f833a607..21d3a89fa 100644 --- a/extensions/series/src/lib.rs +++ b/extensions/series/src/lib.rs @@ -240,7 +240,7 @@ mod tests { ]; // Initialize cursor through filter - match GenerateSeriesVTab::filter(&mut cursor, &args) { + match GenerateSeriesVTab::filter(&mut cursor, &args, None) { ResultCode::OK => (), ResultCode::EOF => return Ok(vec![]), err => return Err(err), @@ -293,7 +293,7 @@ mod tests { let expected_len = series_expected_length(&series); assert_eq!( values.len(), - expected_len as usize, + expected_len, "Series length mismatch for start={}, stop={}, step={}: expected {}, got {}, values: {:?}", start, stop, @@ -546,7 +546,7 @@ mod tests { let start = series.start; let stop = series.stop; let step = series.step; - let tbl = GenerateSeriesVTab::default(); + let tbl = GenerateSeriesVTab {}; let mut cursor = tbl.open().unwrap(); let args = vec![ @@ -556,7 +556,7 @@ mod tests { ]; // Initialize cursor through filter - GenerateSeriesVTab::filter(&mut cursor, &args); + GenerateSeriesVTab::filter(&mut cursor, &args, None); let mut rowids = vec![]; while !GenerateSeriesVTab::eof(&cursor) { diff --git a/extensions/tests/src/lib.rs b/extensions/tests/src/lib.rs index c10a40f58..7592355ed 100644 --- a/extensions/tests/src/lib.rs +++ b/extensions/tests/src/lib.rs @@ -1,7 +1,7 @@ use lazy_static::lazy_static; use limbo_ext::{ - register_extension, scalar, ConstraintInfo, ConstraintOp, ExtResult, IndexInfo, OrderByInfo, - ResultCode, VTabCursor, VTabKind, VTabModule, VTabModuleDerive, Value, + register_extension, scalar, ConstraintInfo, ConstraintOp, ConstraintUsage, ExtResult, + IndexInfo, OrderByInfo, ResultCode, VTabCursor, VTabKind, VTabModule, VTabModuleDerive, Value, }; #[cfg(not(target_family = "wasm"))] use limbo_ext::{VfsDerive, VfsExtension, VfsFile}; @@ -40,6 +40,7 @@ impl VTabModule for KVStoreVTab { } fn open(&self) -> Result { + let _ = env_logger::try_init(); Ok(KVStoreCursor { rows: Vec::new(), index: None, @@ -47,25 +48,29 @@ impl VTabModule for KVStoreVTab { } fn best_index(constraints: &[ConstraintInfo], _order_by: &[OrderByInfo]) -> IndexInfo { - // not exactly the ideal kind of table to demonstrate this on... - for constraint in constraints { - println!("constraint: {:?}", constraint); + // Look for: key = ? + for constraint in constraints.iter() { if constraint.usable && constraint.op == ConstraintOp::Eq && constraint.column_index == 0 { - // key = ? is supported + log::debug!("xBestIndex: constraint found for 'key = ?'"); return IndexInfo { - idx_num: 1, // arbitrary non-zero code to signify optimization + idx_num: 1, idx_str: Some("key_eq".to_string()), order_by_consumed: false, estimated_cost: 10.0, - ..Default::default() + estimated_rows: 4, + constraint_usages: vec![ConstraintUsage { + omit: true, + argv_index: Some(1), + }], }; } } // fallback: full scan + log::debug!("No usable constraints found, using full scan"); IndexInfo { idx_num: -1, idx_str: None, @@ -77,23 +82,45 @@ impl VTabModule for KVStoreVTab { fn filter( cursor: &mut Self::VCursor, - _args: &[Value], - _idx_str: Option<(&str, i32)>, + args: &[Value], + idx_str: Option<(&str, i32)>, ) -> ResultCode { - let store = GLOBAL_STORE.lock().unwrap(); - cursor.rows = store - .iter() - .map(|(&rowid, (k, v))| (rowid, k.clone(), v.clone())) - .collect(); - cursor.rows.sort_by_key(|(rowid, _, _)| *rowid); - - if cursor.rows.is_empty() { - cursor.index = None; - return ResultCode::EOF; - } else { - cursor.index = Some(0); + match idx_str { + Some(("key_eq", 1)) => { + let key = args + .first() + .and_then(|v| v.to_text()) + .map(|s| s.to_string()); + log::debug!("idx_str found: key_eq\n value: {:?}", key); + if let Some(key) = key { + let rowid = hash_key(&key); + let store = GLOBAL_STORE.lock().unwrap(); + if let Some((k, v)) = store.get(&rowid) { + cursor.rows.push((rowid, k.clone(), v.clone())); + cursor.index = Some(0); + } else { + cursor.index = None; + } + return ResultCode::OK; + } + cursor.index = None; + ResultCode::OK + } + _ => { + let store = GLOBAL_STORE.lock().unwrap(); + cursor.rows = store + .iter() + .map(|(&rowid, (k, v))| (rowid, k.clone(), v.clone())) + .collect(); + cursor.rows.sort_by_key(|(rowid, _, _)| *rowid); + cursor.index = if cursor.rows.is_empty() { + None + } else { + Some(0) + }; + ResultCode::OK + } } - ResultCode::OK } fn insert(&mut self, values: &[Value]) -> Result { @@ -152,7 +179,7 @@ impl VTabModule for KVStoreVTab { _ => Err("Invalid column".into()), } } else { - Err("cursor out of range".into()) + Err("Invalid Column".into()) } } From 6f2c6c6a61138d38f4686d341cf01bdd798f91da Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sun, 6 Apr 2025 19:30:00 -0400 Subject: [PATCH 04/12] Actually skip omitted predicates in open loop --- core/translate/main_loop.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index b3c5d6457..b9b857eee 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -396,10 +396,14 @@ pub fn open_loop( }); } - for cond in predicates + for (i, cond) in predicates .iter() - .filter(|cond| cond.should_eval_at_loop(table_index)) + .enumerate() + .filter(|(_, cond)| cond.should_eval_at_loop(table_index)) { + if t_ctx.omit_predicates.contains(&i) { + continue; + } let jump_target_when_true = program.allocate_label(); let condition_metadata = ConditionMetadata { jump_if_condition_is_true: false, From 7d271edf8a83dfec6866f8fadd1af2c8771dfb9a Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sun, 6 Apr 2025 19:33:15 -0400 Subject: [PATCH 05/12] Remove unused function in core/util.rs --- core/util.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/core/util.rs b/core/util.rs index 406d7059f..8ab86c70a 100644 --- a/core/util.rs +++ b/core/util.rs @@ -578,6 +578,7 @@ pub fn can_pushdown_predicate(expr: &Expr) -> bool { &name.0, args.as_ref().map_or(0, |a| a.len()), ); + // is deterministic matches!(function, Ok(Func::Scalar(_))) } Expr::Like { lhs, rhs, .. } => can_pushdown_predicate(lhs) && can_pushdown_predicate(rhs), @@ -597,10 +598,6 @@ pub fn can_pushdown_predicate(expr: &Expr) -> bool { } } -fn is_deterministic(func: &Func) -> bool { - matches!(func, Func::Scalar(_)) -} - #[derive(Debug, Default, PartialEq)] pub struct OpenOptions<'a> { /// The authority component of the URI. may be 'localhost' or empty From 528a9b6c7e343e122371d0d73b9218a3d2b18601 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sun, 6 Apr 2025 23:44:19 -0400 Subject: [PATCH 06/12] Clean up allocations in main loop and fix ext tests --- core/translate/main_loop.rs | 208 +++++++++++++++++++----------------- extensions/tests/src/lib.rs | 15 ++- 2 files changed, 119 insertions(+), 104 deletions(-) diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index b9b857eee..00c943b9b 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -1,3 +1,6 @@ +use limbo_ext::{OrderByInfo, VTabKind}; +use limbo_sqlite3_parser::ast; + use crate::{ schema::Table, translate::result_row::emit_select_result, @@ -9,10 +12,6 @@ use crate::{ }, Result, }; -use limbo_ext::OrderByInfo; -use limbo_ext::{ConstraintInfo, OrderByInfo}; -use limbo_sqlite3_parser::ast; -use limbo_sqlite3_parser::ast; use super::{ aggregation::translate_aggregation_step, @@ -293,99 +292,115 @@ pub fn open_loop( } } else if let Some(vtab) = table.virtual_table() { // Collect usable constraints and track which predicate each came from - let converted_constraints = predicates - .iter() - .enumerate() - .filter_map(|(i, pred)| { - try_convert_to_constraint_info(pred, table_index, i) - .map(|c| (c, &predicates[i])) - }) - .collect::>(); - let constraints: Vec<_> = - converted_constraints.iter().map(|(c, _)| *c).collect(); - let order_by = vec![OrderByInfo { - column_index: *t_ctx - .result_column_indexes_in_orderby_sorter - .first() - .unwrap_or(&0) as u32, - desc: matches!(iter_dir, IterationDirection::Backwards), - }]; - let index_info = vtab.best_index(&constraints, &order_by); - - // Translate arguments to pass into VFilter - let args_needed = index_info - .constraint_usages - .iter() - .filter(|u| u.argv_index.is_some()) - .count(); - let start_reg = program.alloc_registers(args_needed); - let mut arg_regs = vec![]; - - for (i, usage) in index_info.constraint_usages.iter().enumerate() { - if let Some(argv_index) = usage.argv_index { - let (_, pred) = &converted_constraints[i]; - // this is the literal side of the expression (col = 'literal') - let ast::Expr::Binary(lhs, _, rhs) = &pred.expr else { - continue; - }; - - let literal_expr = match (&**lhs, &**rhs) { - (ast::Expr::Column { .. }, rhs) => rhs, - (lhs, ast::Expr::Column { .. }) => lhs, - _ => continue, - }; - - let target_reg = start_reg + (argv_index - 1) as usize; - translate_expr( - program, - Some(tables), - literal_expr, - target_reg, - &t_ctx.resolver, - )?; - arg_regs.push(target_reg); + // Virtual tables may be used either as VTab or TVF, distinguished by vtab.name. + let (start_reg, count, maybe_idx_str, maybe_idx_int) = if vtab + .kind + .eq(&VTabKind::VirtualTable) + { + // Build converted constraints from the predicates. + let mut converted_constraints = Vec::with_capacity(predicates.len()); + for (i, pred) in predicates.iter().enumerate() { + if let Some(cinfo) = + try_convert_to_constraint_info(pred, table_index, i) + { + converted_constraints.push((cinfo, pred)); + } } - } + let constraints: Vec<_> = + converted_constraints.iter().map(|(c, _)| *c).collect(); + let order_by = vec![OrderByInfo { + column_index: *t_ctx + .result_column_indexes_in_orderby_sorter + .first() + .unwrap_or(&0) as u32, + desc: matches!(iter_dir, IterationDirection::Backwards), + }]; + let index_info = vtab.best_index(&constraints, &order_by); - // Encode idx_str to pass to VFilter - let mut maybe_idx_str_reg = None; - if let Some(idx_str) = index_info.idx_str { - let reg = program.alloc_register(); - program.emit_insn(Insn::String8 { - dest: reg, - value: idx_str, - }); - maybe_idx_str_reg = Some(reg); - } + // Determine the number of VFilter arguments (constraints with an argv_index). + let args_needed = index_info + .constraint_usages + .iter() + .filter(|u| u.argv_index.is_some()) + .count(); + let start_reg = program.alloc_registers(args_needed); + + // For each constraint used by best_index, translate the opposite side. + for (i, usage) in index_info.constraint_usages.iter().enumerate() { + if let Some(argv_index) = usage.argv_index { + if let Some((_, pred)) = converted_constraints.get(i) { + if let ast::Expr::Binary(lhs, _, rhs) = &pred.expr { + let literal_expr = match (&**lhs, &**rhs) { + (ast::Expr::Column { .. }, lit) => lit, + (lit, ast::Expr::Column { .. }) => lit, + _ => continue, + }; + // argv_index is 1-based; adjust to get the proper register offset. + let target_reg = start_reg + (argv_index - 1) as usize; + translate_expr( + program, + Some(tables), + literal_expr, + target_reg, + &t_ctx.resolver, + )?; + } + } + } + } + // If best_index provided an idx_str, translate it. + let maybe_idx_str = if let Some(idx_str) = index_info.idx_str { + let reg = program.alloc_register(); + program.emit_insn(Insn::String8 { + dest: reg, + value: idx_str, + }); + Some(reg) + } else { + None + }; + + // Record (in t_ctx) the indices of predicates that best_index tells us to omit. + // Here we insert directly into t_ctx.omit_predicates + for (j, usage) in index_info.constraint_usages.iter().enumerate() { + if usage.argv_index.is_some() && usage.omit { + if let Some(constraint) = constraints.get(j) { + t_ctx.omit_predicates.push(constraint.pred_idx); + } + } + } + ( + start_reg, + args_needed, + maybe_idx_str, + Some(index_info.idx_num), + ) + } else { + // For table-valued functions: translate the table args. + let args = match vtab.args.as_ref() { + Some(args) => args, + None => &vec![], + }; + let start_reg = program.alloc_registers(args.len()); + let mut cur_reg = start_reg; + for arg in args { + let reg = cur_reg; + cur_reg += 1; + let _ = + translate_expr(program, Some(tables), arg, reg, &t_ctx.resolver)?; + } + (start_reg, args.len(), None, None) + }; + + // Emit VFilter with the computed arguments. program.emit_insn(Insn::VFilter { cursor_id, - pc_if_empty: loop_end, - arg_count: args_needed, + arg_count: count, args_reg: start_reg, - idx_str: maybe_idx_str_reg, - idx_num: index_info.idx_num as usize, + idx_str: maybe_idx_str, + idx_num: maybe_idx_int.unwrap_or(0) as usize, + pc_if_empty: loop_end, }); - - // Remove predicates omitted by best_index - let omit_predicates: Vec = predicates - .iter() - .enumerate() - .filter(|(i, _)| { - !index_info - .constraint_usages - .iter() - .enumerate() - .any(|(j, usage)| { - usage.argv_index.is_some() - && !usage.omit - && constraints.get(j).map_or(false, |c| c.pred_idx == *i) - }) - }) - .map(|(i, _)| i) - .collect(); - t_ctx - .omit_predicates - .extend_from_slice(&omit_predicates[..]); } program.resolve_label(loop_start, program.offset()); @@ -396,14 +411,9 @@ pub fn open_loop( }); } - for (i, cond) in predicates - .iter() - .enumerate() - .filter(|(_, cond)| cond.should_eval_at_loop(table_index)) - { - if t_ctx.omit_predicates.contains(&i) { - continue; - } + for (_, cond) in predicates.iter().enumerate().filter(|(i, cond)| { + cond.should_eval_at_loop(table_index) && !t_ctx.omit_predicates.contains(i) + }) { let jump_target_when_true = program.allocate_label(); let condition_metadata = ConditionMetadata { jump_if_condition_is_true: false, diff --git a/extensions/tests/src/lib.rs b/extensions/tests/src/lib.rs index 7592355ed..a574febfb 100644 --- a/extensions/tests/src/lib.rs +++ b/extensions/tests/src/lib.rs @@ -99,10 +99,13 @@ impl VTabModule for KVStoreVTab { cursor.rows.push((rowid, k.clone(), v.clone())); cursor.index = Some(0); } else { + cursor.rows.clear(); cursor.index = None; + return ResultCode::EOF; } return ResultCode::OK; } + cursor.rows.clear(); cursor.index = None; ResultCode::OK } @@ -113,12 +116,13 @@ impl VTabModule for KVStoreVTab { .map(|(&rowid, (k, v))| (rowid, k.clone(), v.clone())) .collect(); cursor.rows.sort_by_key(|(rowid, _, _)| *rowid); - cursor.index = if cursor.rows.is_empty() { - None + if cursor.rows.is_empty() { + cursor.index = None; + ResultCode::EOF } else { - Some(0) - }; - ResultCode::OK + cursor.index = Some(0); + ResultCode::OK + } } } } @@ -156,6 +160,7 @@ impl VTabModule for KVStoreVTab { let _ = self.insert(values)?; Ok(()) } + fn eof(cursor: &Self::VCursor) -> bool { cursor.index.is_some_and(|s| s >= cursor.rows.len()) || cursor.index.is_none() } From e17fd7edc40cf2faa0bf618da28e9a088093410d Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Wed, 9 Apr 2025 11:06:26 -0400 Subject: [PATCH 07/12] Add comments and address PR review --- core/translate/main_loop.rs | 11 +++++------ core/translate/plan.rs | 8 +++++++- core/util.rs | 8 ++------ extensions/core/src/vtabs.rs | 26 ++++++++++++++++++++++++-- 4 files changed, 38 insertions(+), 15 deletions(-) diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 00c943b9b..86ce7415f 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -291,8 +291,7 @@ pub fn open_loop( }); } } else if let Some(vtab) = table.virtual_table() { - // Collect usable constraints and track which predicate each came from - // Virtual tables may be used either as VTab or TVF, distinguished by vtab.name. + // Virtual tables may be used either as VTab or TVF let (start_reg, count, maybe_idx_str, maybe_idx_int) = if vtab .kind .eq(&VTabKind::VirtualTable) @@ -308,13 +307,14 @@ pub fn open_loop( } let constraints: Vec<_> = converted_constraints.iter().map(|(c, _)| *c).collect(); - let order_by = vec![OrderByInfo { + let order_by = [OrderByInfo { column_index: *t_ctx .result_column_indexes_in_orderby_sorter .first() .unwrap_or(&0) as u32, desc: matches!(iter_dir, IterationDirection::Backwards), }]; + // Call xBestIndex method on the underlying vtable. let index_info = vtab.best_index(&constraints, &order_by); // Determine the number of VFilter arguments (constraints with an argv_index). @@ -330,7 +330,7 @@ pub fn open_loop( if let Some(argv_index) = usage.argv_index { if let Some((_, pred)) = converted_constraints.get(i) { if let ast::Expr::Binary(lhs, _, rhs) = &pred.expr { - let literal_expr = match (&**lhs, &**rhs) { + let expr = match (&**lhs, &**rhs) { (ast::Expr::Column { .. }, lit) => lit, (lit, ast::Expr::Column { .. }) => lit, _ => continue, @@ -340,7 +340,7 @@ pub fn open_loop( translate_expr( program, Some(tables), - literal_expr, + expr, target_reg, &t_ctx.resolver, )?; @@ -359,7 +359,6 @@ pub fn open_loop( } else { None }; - // Record (in t_ctx) the indices of predicates that best_index tells us to omit. // Here we insert directly into t_ctx.omit_predicates for (j, usage) in index_info.constraint_usages.iter().enumerate() { diff --git a/core/translate/plan.rs b/core/translate/plan.rs index faec051f1..3b60775a7 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -79,6 +79,10 @@ impl WhereTerm { use crate::ast::{Expr, Operator}; +// This function takes an operator and returns the operator you would obtain if the operands were swapped. +// e.g. "literal < column" +// which is not the canonical order for constraint pushdown. +// This function will return > so that the expression can be treated as if it were written "column > literal" fn reverse_operator(op: &Operator) -> Option { match op { Operator::Equals => Some(Operator::Equals), @@ -97,7 +101,7 @@ fn reverse_operator(op: &Operator) -> Option { /// It determines whether or not it involves the given table and whether or not it can /// be converted into a ConstraintInfo which can be passed to the vtab module's xBestIndex /// method, which will possibly calculate some information to improve the query plan, that we can send -/// back to it as arguments for the VFilter operation. Perhaps we should save the exact Expr for which a relevant column +/// back to it as arguments for the VFilter operation. /// is going to be filtered against: e.g: /// 'SELECT key, value FROM vtab WHERE key = 'some_key'; /// we need to send the OwnedValue('some_key') as an argument to VFilter, and possibly omit it from @@ -126,6 +130,8 @@ pub fn try_convert_to_constraint_info( if table != &table_index { return None; } + // if the column is on the rhs, swap the operands and possibly + // the operator if it's a logical comparison. (rhs, lhs, &reverse_operator(op).unwrap_or(*op)) } _ => { diff --git a/core/util.rs b/core/util.rs index 8ab86c70a..5d0010423 100644 --- a/core/util.rs +++ b/core/util.rs @@ -566,10 +566,11 @@ pub fn columns_from_create_table_body(body: &ast::CreateTableBody) -> crate::Res .collect::>()) } +/// This function checks if a given expression is a constant value that can be pushed down to the database engine. +/// It is expected to be called with the other half of a binary expression with an Expr::Column pub fn can_pushdown_predicate(expr: &Expr) -> bool { match expr { Expr::Literal(_) => true, - Expr::Column { .. } => true, Expr::Binary(lhs, _, rhs) => can_pushdown_predicate(lhs) && can_pushdown_predicate(rhs), Expr::Parenthesized(exprs) => can_pushdown_predicate(exprs.first().unwrap()), Expr::Unary(_, expr) => can_pushdown_predicate(expr), @@ -589,11 +590,6 @@ pub fn can_pushdown_predicate(expr: &Expr) -> bool { && can_pushdown_predicate(start) && can_pushdown_predicate(end) } - Expr::Id(_) => true, - Expr::Name(_) => true, - Expr::Qualified(_, _) => true, - Expr::DoublyQualified(_, _, _) => true, - Expr::InTable { lhs, .. } => can_pushdown_predicate(lhs), _ => false, } } diff --git a/extensions/core/src/vtabs.rs b/extensions/core/src/vtabs.rs index ae3fc7429..c2f6620d1 100644 --- a/extensions/core/src/vtabs.rs +++ b/extensions/core/src/vtabs.rs @@ -158,19 +158,30 @@ pub enum ConstraintOp { #[repr(C)] #[derive(Copy, Clone)] +/// Describes an ORDER BY clause in a query involving a virtual table. +/// Passed along with the constraints to xBestIndex. pub struct OrderByInfo { + /// The index of the column referenced in the ORDER BY clause. pub column_index: u32, + /// Whether or not the clause is in descending order. pub desc: bool, } +/// The internal (core) representation of an 'index' on a virtual table. +/// Returned from xBestIndex and then processed and passed to VFilter. #[derive(Debug, Clone)] pub struct IndexInfo { + /// The index number, used to identify the index internally by the VTab pub idx_num: i32, + /// Optional index name. these are passed to vfilter in a tuple (idx_num, idx_str) pub idx_str: Option, + /// Whether the index is used for order by pub order_by_consumed: bool, /// TODO: for eventual cost based query planning pub estimated_cost: f64, + /// Estimated number of rows that the query will return pub estimated_rows: u32, + /// List of constraints that can be used to optimize the query. pub constraint_usages: Vec, } impl Default for IndexInfo { @@ -245,6 +256,7 @@ impl IndexInfo { #[repr(C)] #[derive(Clone, Debug)] +/// FFI representation of IndexInfo. pub struct ExtIndexInfo { pub idx_num: i32, pub idx_str: *const u8, @@ -256,17 +268,27 @@ pub struct ExtIndexInfo { pub constraint_usage_len: usize, } +/// Returned from xBestIndex to describe how the virtual table +/// can use the constraints in the WHERE clause of a query. #[derive(Debug, Clone, Copy)] pub struct ConstraintUsage { - pub argv_index: Option, // 1-based index into VFilter args - pub omit: bool, // if true, core skips checking it again + /// 1 based index of the argument in the WHERE clause. + pub argv_index: Option, + /// If true, core can omit this constraint in the vdbe layer. + pub omit: bool, } #[derive(Clone, Copy, Debug)] #[repr(C)] +/// The primary argument to xBestIndex, which describes a constraint +/// in a query involving a virtual table. pub struct ConstraintInfo { + /// The index of the column referenced in the WHERE clause. pub column_index: u32, + /// The operator used in the clause. pub op: ConstraintOp, + /// Whether or not constraint is garaunteed to be enforced. pub usable: bool, + /// pub pred_idx: usize, } From d53c60e0719775f07b025eee5e52caab7efa7332 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Thu, 17 Apr 2025 13:11:21 -0400 Subject: [PATCH 08/12] Prevent double allocations for VFilter args in vdbe --- core/lib.rs | 13 +++++-------- core/vdbe/execute.rs | 9 +++++++-- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/core/lib.rs b/core/lib.rs index 68384e77d..e130306f7 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -705,19 +705,16 @@ impl VirtualTable { VTabOpaqueCursor::new(cursor) } + #[tracing::instrument(skip(cursor))] pub fn filter( &self, cursor: &VTabOpaqueCursor, idx_num: i32, idx_str: Option, arg_count: usize, - args: Vec, + args: Vec, ) -> Result { - let mut filter_args = Vec::with_capacity(arg_count); - for i in 0..arg_count { - let ownedvalue_arg = args.get(i).unwrap(); - filter_args.push(ownedvalue_arg.to_ffi()); - } + tracing::trace!("xFilter"); let c_idx_str = idx_str .map(|s| std::ffi::CString::new(s).unwrap()) .map(|cstr| cstr.into_raw()) @@ -726,12 +723,12 @@ impl VirtualTable { (self.implementation.filter)( cursor.as_ptr(), arg_count as i32, - filter_args.as_ptr(), + args.as_ptr(), c_idx_str, idx_num, ) }; - for arg in filter_args { + for arg in args { unsafe { arg.__free_internal_type(); } diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 9abc7c6c3..de871f54c 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -979,9 +979,14 @@ pub fn op_vfilter( let has_rows = { let mut cursor = state.get_cursor(*cursor_id); let cursor = cursor.as_virtual_mut(); - let mut args = Vec::new(); + let mut args = Vec::with_capacity(*arg_count); for i in 0..*arg_count { - args.push(state.registers[args_reg + i].get_owned_value().clone()); + args.push( + state.registers[args_reg + i] + .get_owned_value() + .clone() + .to_ffi(), + ); } let idx_str = if let Some(idx_str) = idx_str { Some(state.registers[*idx_str].get_owned_value().to_string()) From 95a2fdc096c0e9d7c9722b4fff13e7e0b7c9e509 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Thu, 17 Apr 2025 13:12:07 -0400 Subject: [PATCH 09/12] Fix array from ptr in bestindex ffi method in proc macro --- extensions/tests/src/lib.rs | 10 +++++++++- macros/src/lib.rs | 4 ++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/extensions/tests/src/lib.rs b/extensions/tests/src/lib.rs index a574febfb..5c6495595 100644 --- a/extensions/tests/src/lib.rs +++ b/extensions/tests/src/lib.rs @@ -54,11 +54,19 @@ impl VTabModule for KVStoreVTab { && constraint.op == ConstraintOp::Eq && constraint.column_index == 0 { + // this extension wouldn't support order by but for testing purposes, + // we will consume it if we find an ASC order by clause on the value column + let mut consumed = false; + if let Some(order) = _order_by.first() { + if order.column_index == 1 && !order.desc { + consumed = true; + } + } log::debug!("xBestIndex: constraint found for 'key = ?'"); return IndexInfo { idx_num: 1, idx_str: Some("key_eq".to_string()), - order_by_consumed: false, + order_by_consumed: consumed, estimated_cost: 10.0, estimated_rows: 4, constraint_usages: vec![ConstraintUsage { diff --git a/macros/src/lib.rs b/macros/src/lib.rs index acb969876..d47101589 100644 --- a/macros/src/lib.rs +++ b/macros/src/lib.rs @@ -628,8 +628,8 @@ pub fn derive_vtab_module(input: TokenStream) -> TokenStream { order_by: *const ::limbo_ext::OrderByInfo, n_order_by: i32, ) -> ::limbo_ext::ExtIndexInfo { - let constraints = std::slice::from_raw_parts(constraints, n_constraints as usize); - let order_by = std::slice::from_raw_parts(order_by, n_order_by as usize); + let constraints = if n_constraints > 0 { std::slice::from_raw_parts(constraints, n_constraints as usize) } else { &[] }; + let order_by = if n_order_by > 0 { std::slice::from_raw_parts(order_by, n_order_by as usize) } else { &[] }; <#struct_name as ::limbo_ext::VTabModule>::best_index(constraints, order_by).to_ffi() } From 245e7f94f6bd3238de4df9a14549bd09e2a0a26d Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Thu, 17 Apr 2025 13:14:04 -0400 Subject: [PATCH 10/12] Store packed field on ConstraintInfo to optimize planning for vfilter --- extensions/core/src/vtabs.rs | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/extensions/core/src/vtabs.rs b/extensions/core/src/vtabs.rs index c2f6620d1..5d86457f7 100644 --- a/extensions/core/src/vtabs.rs +++ b/extensions/core/src/vtabs.rs @@ -272,7 +272,7 @@ pub struct ExtIndexInfo { /// can use the constraints in the WHERE clause of a query. #[derive(Debug, Clone, Copy)] pub struct ConstraintUsage { - /// 1 based index of the argument in the WHERE clause. + /// 1 based index of the argument passed pub argv_index: Option, /// If true, core can omit this constraint in the vdbe layer. pub omit: bool, @@ -289,6 +289,18 @@ pub struct ConstraintInfo { pub op: ConstraintOp, /// Whether or not constraint is garaunteed to be enforced. pub usable: bool, - /// - pub pred_idx: usize, + /// packed integer with the index of the constraint in the planner, + /// and the side of the binary expr that the relevant column is on. + pub plan_info: u32, +} + +impl ConstraintInfo { + #[inline(always)] + pub fn pack_plan_info(pred_idx: u32, is_right_side: bool) -> u32 { + ((pred_idx) << 1) | (is_right_side as u32) + } + #[inline(always)] + pub fn unpack_plan_info(&self) -> (usize, bool) { + ((self.plan_info >> 1) as usize, (self.plan_info & 1) != 0) + } } From a25a02efe1a62916b0de618ac55774c885156bc2 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Thu, 17 Apr 2025 13:15:20 -0400 Subject: [PATCH 11/12] Improve xBestIndex call site and allow for proper handling of join and where constraints --- core/translate/main_loop.rs | 73 +++++++++++++--------------- core/translate/plan.rs | 96 +++++++++++++++++++++++-------------- core/util.rs | 21 ++++---- 3 files changed, 106 insertions(+), 84 deletions(-) diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 86ce7415f..f9ba9ca97 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -21,8 +21,8 @@ use super::{ optimizer::Optimizable, order_by::{order_by_sorter_insert, sorter_insert}, plan::{ - try_convert_to_constraint_info, IterationDirection, Operation, Search, SeekDef, SelectPlan, - SelectQueryType, TableReference, WhereTerm, + convert_where_to_vtab_constraint, IterationDirection, Operation, Search, SeekDef, + SelectPlan, SelectQueryType, TableReference, WhereTerm, }, }; @@ -291,31 +291,33 @@ pub fn open_loop( }); } } else if let Some(vtab) = table.virtual_table() { - // Virtual tables may be used either as VTab or TVF let (start_reg, count, maybe_idx_str, maybe_idx_int) = if vtab .kind .eq(&VTabKind::VirtualTable) { - // Build converted constraints from the predicates. - let mut converted_constraints = Vec::with_capacity(predicates.len()); - for (i, pred) in predicates.iter().enumerate() { - if let Some(cinfo) = - try_convert_to_constraint_info(pred, table_index, i) - { - converted_constraints.push((cinfo, pred)); - } - } - let constraints: Vec<_> = - converted_constraints.iter().map(|(c, _)| *c).collect(); - let order_by = [OrderByInfo { - column_index: *t_ctx - .result_column_indexes_in_orderby_sorter - .first() - .unwrap_or(&0) as u32, - desc: matches!(iter_dir, IterationDirection::Backwards), - }]; - // Call xBestIndex method on the underlying vtable. - let index_info = vtab.best_index(&constraints, &order_by); + // Virtual‑table (non‑TVF) modules can receive constraints via xBestIndex. + // They return information with which to pass to VFilter operation. + // We forward every predicate that touches vtab columns. + // + // vtab.col = literal (always usable) + // vtab.col = outer_table.col (usable, because outer_table is already positioned) + // vtab.col = later_table.col (forwarded with usable = false) + // + // xBestIndex decides which ones it wants by setting argvIndex and whether the + // core layer may omit them (omit = true). + // We then materialise the RHS/LHS into registers before issuing VFilter. + let converted_constraints = predicates + .iter() + .filter(|p| p.should_eval_at_loop(table_index)) + .enumerate() + .filter_map(|(i, p)| { + // Build ConstraintInfo from the predicates + convert_where_to_vtab_constraint(p, table_index, i) + }) + .collect::>(); + // TODO: get proper order_by information to pass to the vtab. + // maybe encode more info on t_ctx? we need: [col_idx, is_descending] + let index_info = vtab.best_index(&converted_constraints, &[]); // Determine the number of VFilter arguments (constraints with an argv_index). let args_needed = index_info @@ -328,13 +330,12 @@ pub fn open_loop( // For each constraint used by best_index, translate the opposite side. for (i, usage) in index_info.constraint_usages.iter().enumerate() { if let Some(argv_index) = usage.argv_index { - if let Some((_, pred)) = converted_constraints.get(i) { - if let ast::Expr::Binary(lhs, _, rhs) = &pred.expr { - let expr = match (&**lhs, &**rhs) { - (ast::Expr::Column { .. }, lit) => lit, - (lit, ast::Expr::Column { .. }) => lit, - _ => continue, - }; + if let Some(cinfo) = converted_constraints.get(i) { + let (pred_idx, is_rhs) = cinfo.unpack_plan_info(); + if let ast::Expr::Binary(lhs, _, rhs) = + &predicates[pred_idx].expr + { + let expr = if is_rhs { rhs } else { lhs }; // argv_index is 1-based; adjust to get the proper register offset. let target_reg = start_reg + (argv_index - 1) as usize; translate_expr( @@ -344,6 +345,9 @@ pub fn open_loop( target_reg, &t_ctx.resolver, )?; + if cinfo.usable && usage.omit { + t_ctx.omit_predicates.push(pred_idx) + } } } } @@ -359,15 +363,6 @@ pub fn open_loop( } else { None }; - // Record (in t_ctx) the indices of predicates that best_index tells us to omit. - // Here we insert directly into t_ctx.omit_predicates - for (j, usage) in index_info.constraint_usages.iter().enumerate() { - if usage.argv_index.is_some() && usage.omit { - if let Some(constraint) = constraints.get(j) { - t_ctx.omit_predicates.push(constraint.pred_idx); - } - } - } ( start_reg, args_needed, diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 3b60775a7..25a4fd7ef 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -97,6 +97,18 @@ fn reverse_operator(op: &Operator) -> Option { } } +fn to_ext_constraint_op(op: &Operator) -> Option { + match op { + Operator::Equals => Some(ConstraintOp::Eq), + Operator::Less => Some(ConstraintOp::Lt), + Operator::LessEquals => Some(ConstraintOp::Le), + Operator::Greater => Some(ConstraintOp::Gt), + Operator::GreaterEquals => Some(ConstraintOp::Ge), + Operator::NotEquals => Some(ConstraintOp::Ne), + _ => None, + } +} + /// This function takes a WhereTerm for a select involving a VTab at index 'table_index'. /// It determines whether or not it involves the given table and whether or not it can /// be converted into a ConstraintInfo which can be passed to the vtab module's xBestIndex @@ -106,7 +118,7 @@ fn reverse_operator(op: &Operator) -> Option { /// 'SELECT key, value FROM vtab WHERE key = 'some_key'; /// we need to send the OwnedValue('some_key') as an argument to VFilter, and possibly omit it from /// the filtration in the vdbe layer. -pub fn try_convert_to_constraint_info( +pub fn convert_where_to_vtab_constraint( term: &WhereTerm, table_index: usize, pred_idx: usize, @@ -114,53 +126,63 @@ pub fn try_convert_to_constraint_info( if term.from_outer_join { return None; } - let Expr::Binary(lhs, op, rhs) = &term.expr else { return None; }; - - let (col_expr, _, op) = match (&**lhs, &**rhs) { - (Expr::Column { table, .. }, rhs) if can_pushdown_predicate(rhs) => { - if table != &table_index { - return None; + let expr_is_ready = |e: &Expr| -> bool { can_pushdown_predicate(e, table_index) }; + let (vcol_idx, op_for_vtab, usable, is_rhs) = match (&**lhs, &**rhs) { + ( + Expr::Column { + table: tbl_l, + column: col_l, + .. + }, + Expr::Column { + table: tbl_r, + column: col_r, + .. + }, + ) => { + // one side must be the virtual table + let vtab_on_l = *tbl_l == table_index; + let vtab_on_r = *tbl_r == table_index; + if vtab_on_l == vtab_on_r { + return None; // either both or none -> not convertible } - (lhs, rhs, op) - } - (lhs, Expr::Column { table, .. }) if can_pushdown_predicate(lhs) => { - if table != &table_index { - return None; + + if vtab_on_l { + // vtab on left side: operator unchanged + let usable = *tbl_r < table_index; // usable if the other table is already positioned + (col_l, op, usable, false) + } else { + // vtab on right side of the expr: reverse operator + let usable = *tbl_l < table_index; + (col_r, &reverse_operator(op).unwrap_or(*op), usable, true) } - // if the column is on the rhs, swap the operands and possibly - // the operator if it's a logical comparison. - (rhs, lhs, &reverse_operator(op).unwrap_or(*op)) } - _ => { - return None; + (Expr::Column { table, column, .. }, other) if *table == table_index => { + ( + column, + op, + expr_is_ready(other), // literal / earlier‑table / deterministic func ? + false, + ) } - }; + (other, Expr::Column { table, column, .. }) if *table == table_index => ( + column, + &reverse_operator(op).unwrap_or(*op), + expr_is_ready(other), + true, + ), - let Expr::Column { column, .. } = **col_expr else { - return None; - }; - - let column_index = column as u32; - let constraint_op = match op { - Operator::Equals => ConstraintOp::Eq, - Operator::Less => ConstraintOp::Lt, - Operator::LessEquals => ConstraintOp::Le, - Operator::Greater => ConstraintOp::Gt, - Operator::GreaterEquals => ConstraintOp::Ge, - Operator::NotEquals => ConstraintOp::Ne, - Operator::Is => ConstraintOp::Is, - Operator::IsNot => ConstraintOp::IsNot, - _ => return None, + _ => return None, // does not involve the virtual table at all }; Some(ConstraintInfo { - column_index, - op: constraint_op, - usable: true, - pred_idx, + column_index: *vcol_idx as u32, + op: to_ext_constraint_op(op_for_vtab)?, + usable, + plan_info: ConstraintInfo::pack_plan_info(pred_idx as u32, is_rhs), }) } /// The loop index where to evaluate the condition. diff --git a/core/util.rs b/core/util.rs index 5d0010423..f518df6f4 100644 --- a/core/util.rs +++ b/core/util.rs @@ -568,12 +568,15 @@ pub fn columns_from_create_table_body(body: &ast::CreateTableBody) -> crate::Res /// This function checks if a given expression is a constant value that can be pushed down to the database engine. /// It is expected to be called with the other half of a binary expression with an Expr::Column -pub fn can_pushdown_predicate(expr: &Expr) -> bool { +pub fn can_pushdown_predicate(expr: &Expr, table_idx: usize) -> bool { match expr { Expr::Literal(_) => true, - Expr::Binary(lhs, _, rhs) => can_pushdown_predicate(lhs) && can_pushdown_predicate(rhs), - Expr::Parenthesized(exprs) => can_pushdown_predicate(exprs.first().unwrap()), - Expr::Unary(_, expr) => can_pushdown_predicate(expr), + Expr::Column { table, .. } => *table <= table_idx, + Expr::Binary(lhs, _, rhs) => { + can_pushdown_predicate(lhs, table_idx) && can_pushdown_predicate(rhs, table_idx) + } + Expr::Parenthesized(exprs) => can_pushdown_predicate(exprs.first().unwrap(), table_idx), + Expr::Unary(_, expr) => can_pushdown_predicate(expr, table_idx), Expr::FunctionCall { args, name, .. } => { let function = crate::function::Func::resolve_function( &name.0, @@ -582,13 +585,15 @@ pub fn can_pushdown_predicate(expr: &Expr) -> bool { // is deterministic matches!(function, Ok(Func::Scalar(_))) } - Expr::Like { lhs, rhs, .. } => can_pushdown_predicate(lhs) && can_pushdown_predicate(rhs), + Expr::Like { lhs, rhs, .. } => { + can_pushdown_predicate(lhs, table_idx) && can_pushdown_predicate(rhs, table_idx) + } Expr::Between { lhs, start, end, .. } => { - can_pushdown_predicate(lhs) - && can_pushdown_predicate(start) - && can_pushdown_predicate(end) + can_pushdown_predicate(lhs, table_idx) + && can_pushdown_predicate(start, table_idx) + && can_pushdown_predicate(end, table_idx) } _ => false, } From d02900294eb47a76bc0ec61f59fbe499cab5218a Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Thu, 17 Apr 2025 13:47:46 -0400 Subject: [PATCH 12/12] Remove 2nd shell in vtab tests, fix expr translation in main loop --- core/translate/main_loop.rs | 5 +++-- testing/cli_tests/extensions.py | 1 - 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index f9ba9ca97..7a2e1b9ef 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -1,4 +1,4 @@ -use limbo_ext::{OrderByInfo, VTabKind}; +use limbo_ext::VTabKind; use limbo_sqlite3_parser::ast; use crate::{ @@ -335,7 +335,8 @@ pub fn open_loop( if let ast::Expr::Binary(lhs, _, rhs) = &predicates[pred_idx].expr { - let expr = if is_rhs { rhs } else { lhs }; + // translate the opposite side of the referenced vtab column + let expr = if is_rhs { lhs } else { rhs }; // argv_index is 1-based; adjust to get the proper register offset. let target_reg = start_reg + (argv_index - 1) as usize; translate_expr( diff --git a/testing/cli_tests/extensions.py b/testing/cli_tests/extensions.py index ab57e4178..d4d55fca1 100755 --- a/testing/cli_tests/extensions.py +++ b/testing/cli_tests/extensions.py @@ -343,7 +343,6 @@ def test_kv(): # first, create a normal table to ensure no issues limbo.execute_dot("CREATE TABLE other (a,b,c);") limbo.execute_dot("INSERT INTO other values (23,32,23);") - limbo = TestLimboShell() limbo.run_test_fn( "create virtual table t using kv_store;", lambda res: "Module kv_store not found" in res,