diff --git a/core/lib.rs b/core/lib.rs index 353789839..e130306f7 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -35,7 +35,7 @@ pub use io::UringIO; pub use io::{ Buffer, Completion, File, MemoryIO, OpenFlags, PlatformIO, SyscallIO, WriteCompletion, IO, }; -use limbo_ext::{ResultCode, VTabKind, VTabModuleImpl}; +use limbo_ext::{ConstraintInfo, IndexInfo, OrderByInfo, ResultCode, VTabKind, VTabModuleImpl}; use limbo_sqlite3_parser::{ast, ast::Cmd, lexer::sql::Parser}; use parking_lot::RwLock; use schema::{Column, Schema}; @@ -641,6 +641,21 @@ impl VirtualTable { pub(crate) fn rowid(&self, cursor: &VTabOpaqueCursor) -> i64 { unsafe { (self.implementation.rowid)(cursor.as_ptr()) } } + + pub(crate) fn best_index( + &self, + constraints: &[ConstraintInfo], + order_by: &[OrderByInfo], + ) -> IndexInfo { + unsafe { + IndexInfo::from_ffi((self.implementation.best_idx)( + constraints.as_ptr(), + constraints.len() as i32, + order_by.as_ptr(), + order_by.len() as i32, + )) + } + } /// takes ownership of the provided Args pub(crate) fn from_args( tbl_name: Option<&str>, @@ -690,21 +705,30 @@ impl VirtualTable { VTabOpaqueCursor::new(cursor) } + #[tracing::instrument(skip(cursor))] pub fn filter( &self, cursor: &VTabOpaqueCursor, + idx_num: i32, + idx_str: Option, arg_count: usize, - args: Vec, + args: Vec, ) -> Result { - let mut filter_args = Vec::with_capacity(arg_count); - for i in 0..arg_count { - let ownedvalue_arg = args.get(i).unwrap(); - filter_args.push(ownedvalue_arg.to_ffi()); - } + tracing::trace!("xFilter"); + let c_idx_str = idx_str + .map(|s| std::ffi::CString::new(s).unwrap()) + .map(|cstr| cstr.into_raw()) + .unwrap_or(std::ptr::null_mut()); let rc = unsafe { - (self.implementation.filter)(cursor.as_ptr(), arg_count as i32, filter_args.as_ptr()) + (self.implementation.filter)( + cursor.as_ptr(), + arg_count as i32, + args.as_ptr(), + c_idx_str, + idx_num, + ) }; - for arg in filter_args { + for arg in args { unsafe { arg.__free_internal_type(); } diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index e2914bbd0..0bc54bb9a 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -90,6 +90,7 @@ pub struct TranslateCtx<'a> { // This vector holds the indexes of the result columns that we need to skip. pub result_columns_to_skip_in_orderby_sorter: Option>, pub resolver: Resolver<'a>, + pub omit_predicates: Vec, } /// Used to distinguish database operations @@ -132,6 +133,7 @@ fn prologue<'a>( result_column_indexes_in_orderby_sorter: (0..result_column_count).collect(), result_columns_to_skip_in_orderby_sorter: None, resolver: Resolver::new(syms), + omit_predicates: Vec::new(), }; Ok((t_ctx, init_label, start_offset)) diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 76057c53b..7a2e1b9ef 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -1,3 +1,6 @@ +use limbo_ext::VTabKind; +use limbo_sqlite3_parser::ast; + use crate::{ schema::Table, translate::result_row::emit_select_result, @@ -18,8 +21,8 @@ use super::{ optimizer::Optimizable, order_by::{order_by_sorter_insert, sorter_insert}, plan::{ - IterationDirection, Operation, Search, SeekDef, SelectPlan, SelectQueryType, - TableReference, WhereTerm, + convert_where_to_vtab_constraint, IterationDirection, Operation, Search, SeekDef, + SelectPlan, SelectQueryType, TableReference, WhereTerm, }, }; @@ -251,9 +254,6 @@ pub fn open_loop( end_offset: loop_end, }); - // These are predicates evaluated outside of the subquery, - // so they are translated here. - // E.g. SELECT foo FROM (SELECT bar as foo FROM t1) sub WHERE sub.foo > 10 for cond in predicates .iter() .filter(|cond| cond.should_eval_at_loop(table_index)) @@ -290,25 +290,111 @@ pub fn open_loop( pc_if_empty: loop_end, }); } - } - if let Table::Virtual(ref table) = table.table { - let start_reg = - program.alloc_registers(table.args.as_ref().map(|a| a.len()).unwrap_or(0)); - let mut cur_reg = start_reg; - let args = match table.args.as_ref() { - Some(args) => args, - None => &vec![], + } else if let Some(vtab) = table.virtual_table() { + let (start_reg, count, maybe_idx_str, maybe_idx_int) = if vtab + .kind + .eq(&VTabKind::VirtualTable) + { + // Virtual‑table (non‑TVF) modules can receive constraints via xBestIndex. + // They return information with which to pass to VFilter operation. + // We forward every predicate that touches vtab columns. + // + // vtab.col = literal (always usable) + // vtab.col = outer_table.col (usable, because outer_table is already positioned) + // vtab.col = later_table.col (forwarded with usable = false) + // + // xBestIndex decides which ones it wants by setting argvIndex and whether the + // core layer may omit them (omit = true). + // We then materialise the RHS/LHS into registers before issuing VFilter. + let converted_constraints = predicates + .iter() + .filter(|p| p.should_eval_at_loop(table_index)) + .enumerate() + .filter_map(|(i, p)| { + // Build ConstraintInfo from the predicates + convert_where_to_vtab_constraint(p, table_index, i) + }) + .collect::>(); + // TODO: get proper order_by information to pass to the vtab. + // maybe encode more info on t_ctx? we need: [col_idx, is_descending] + let index_info = vtab.best_index(&converted_constraints, &[]); + + // Determine the number of VFilter arguments (constraints with an argv_index). + let args_needed = index_info + .constraint_usages + .iter() + .filter(|u| u.argv_index.is_some()) + .count(); + let start_reg = program.alloc_registers(args_needed); + + // For each constraint used by best_index, translate the opposite side. + for (i, usage) in index_info.constraint_usages.iter().enumerate() { + if let Some(argv_index) = usage.argv_index { + if let Some(cinfo) = converted_constraints.get(i) { + let (pred_idx, is_rhs) = cinfo.unpack_plan_info(); + if let ast::Expr::Binary(lhs, _, rhs) = + &predicates[pred_idx].expr + { + // translate the opposite side of the referenced vtab column + let expr = if is_rhs { lhs } else { rhs }; + // argv_index is 1-based; adjust to get the proper register offset. + let target_reg = start_reg + (argv_index - 1) as usize; + translate_expr( + program, + Some(tables), + expr, + target_reg, + &t_ctx.resolver, + )?; + if cinfo.usable && usage.omit { + t_ctx.omit_predicates.push(pred_idx) + } + } + } + } + } + // If best_index provided an idx_str, translate it. + let maybe_idx_str = if let Some(idx_str) = index_info.idx_str { + let reg = program.alloc_register(); + program.emit_insn(Insn::String8 { + dest: reg, + value: idx_str, + }); + Some(reg) + } else { + None + }; + ( + start_reg, + args_needed, + maybe_idx_str, + Some(index_info.idx_num), + ) + } else { + // For table-valued functions: translate the table args. + let args = match vtab.args.as_ref() { + Some(args) => args, + None => &vec![], + }; + let start_reg = program.alloc_registers(args.len()); + let mut cur_reg = start_reg; + for arg in args { + let reg = cur_reg; + cur_reg += 1; + let _ = + translate_expr(program, Some(tables), arg, reg, &t_ctx.resolver)?; + } + (start_reg, args.len(), None, None) }; - for arg in args { - let reg = cur_reg; - cur_reg += 1; - let _ = translate_expr(program, Some(tables), arg, reg, &t_ctx.resolver)?; - } + + // Emit VFilter with the computed arguments. program.emit_insn(Insn::VFilter { cursor_id, - pc_if_empty: loop_end, - arg_count: table.args.as_ref().map_or(0, |args| args.len()), + arg_count: count, args_reg: start_reg, + idx_str: maybe_idx_str, + idx_num: maybe_idx_int.unwrap_or(0) as usize, + pc_if_empty: loop_end, }); } program.resolve_label(loop_start, program.offset()); @@ -320,10 +406,9 @@ pub fn open_loop( }); } - for cond in predicates - .iter() - .filter(|cond| cond.should_eval_at_loop(table_index)) - { + for (_, cond) in predicates.iter().enumerate().filter(|(i, cond)| { + cond.should_eval_at_loop(table_index) && !t_ctx.omit_predicates.contains(i) + }) { let jump_target_when_true = program.allocate_label(); let condition_metadata = ConditionMetadata { jump_if_condition_is_true: false, diff --git a/core/translate/plan.rs b/core/translate/plan.rs index bb581ab13..25a4fd7ef 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -1,4 +1,5 @@ use core::fmt; +use limbo_ext::{ConstraintInfo, ConstraintOp}; use limbo_sqlite3_parser::ast::{self, SortOrder}; use std::{ cmp::Ordering, @@ -16,6 +17,7 @@ use crate::{ use crate::{ schema::{PseudoTable, Type}, types::SeekOp, + util::can_pushdown_predicate, }; #[derive(Debug, Clone)] @@ -75,6 +77,114 @@ impl WhereTerm { } } +use crate::ast::{Expr, Operator}; + +// This function takes an operator and returns the operator you would obtain if the operands were swapped. +// e.g. "literal < column" +// which is not the canonical order for constraint pushdown. +// This function will return > so that the expression can be treated as if it were written "column > literal" +fn reverse_operator(op: &Operator) -> Option { + match op { + Operator::Equals => Some(Operator::Equals), + Operator::Less => Some(Operator::Greater), + Operator::LessEquals => Some(Operator::GreaterEquals), + Operator::Greater => Some(Operator::Less), + Operator::GreaterEquals => Some(Operator::LessEquals), + Operator::NotEquals => Some(Operator::NotEquals), + Operator::Is => Some(Operator::Is), + Operator::IsNot => Some(Operator::IsNot), + _ => None, + } +} + +fn to_ext_constraint_op(op: &Operator) -> Option { + match op { + Operator::Equals => Some(ConstraintOp::Eq), + Operator::Less => Some(ConstraintOp::Lt), + Operator::LessEquals => Some(ConstraintOp::Le), + Operator::Greater => Some(ConstraintOp::Gt), + Operator::GreaterEquals => Some(ConstraintOp::Ge), + Operator::NotEquals => Some(ConstraintOp::Ne), + _ => None, + } +} + +/// This function takes a WhereTerm for a select involving a VTab at index 'table_index'. +/// It determines whether or not it involves the given table and whether or not it can +/// be converted into a ConstraintInfo which can be passed to the vtab module's xBestIndex +/// method, which will possibly calculate some information to improve the query plan, that we can send +/// back to it as arguments for the VFilter operation. +/// is going to be filtered against: e.g: +/// 'SELECT key, value FROM vtab WHERE key = 'some_key'; +/// we need to send the OwnedValue('some_key') as an argument to VFilter, and possibly omit it from +/// the filtration in the vdbe layer. +pub fn convert_where_to_vtab_constraint( + term: &WhereTerm, + table_index: usize, + pred_idx: usize, +) -> Option { + if term.from_outer_join { + return None; + } + let Expr::Binary(lhs, op, rhs) = &term.expr else { + return None; + }; + let expr_is_ready = |e: &Expr| -> bool { can_pushdown_predicate(e, table_index) }; + let (vcol_idx, op_for_vtab, usable, is_rhs) = match (&**lhs, &**rhs) { + ( + Expr::Column { + table: tbl_l, + column: col_l, + .. + }, + Expr::Column { + table: tbl_r, + column: col_r, + .. + }, + ) => { + // one side must be the virtual table + let vtab_on_l = *tbl_l == table_index; + let vtab_on_r = *tbl_r == table_index; + if vtab_on_l == vtab_on_r { + return None; // either both or none -> not convertible + } + + if vtab_on_l { + // vtab on left side: operator unchanged + let usable = *tbl_r < table_index; // usable if the other table is already positioned + (col_l, op, usable, false) + } else { + // vtab on right side of the expr: reverse operator + let usable = *tbl_l < table_index; + (col_r, &reverse_operator(op).unwrap_or(*op), usable, true) + } + } + (Expr::Column { table, column, .. }, other) if *table == table_index => { + ( + column, + op, + expr_is_ready(other), // literal / earlier‑table / deterministic func ? + false, + ) + } + (other, Expr::Column { table, column, .. }) if *table == table_index => ( + column, + &reverse_operator(op).unwrap_or(*op), + expr_is_ready(other), + true, + ), + + _ => return None, // does not involve the virtual table at all + }; + + Some(ConstraintInfo { + column_index: *vcol_idx as u32, + op: to_ext_constraint_op(op_for_vtab)?, + usable, + plan_info: ConstraintInfo::pack_plan_info(pred_idx as u32, is_rhs), + }) +} /// The loop index where to evaluate the condition. /// For example, in `SELECT * FROM u JOIN p WHERE u.id = 5`, the condition can already be evaluated at the first loop (idx 0), /// because that is the rightmost table that it references. diff --git a/core/translate/subquery.rs b/core/translate/subquery.rs index 87ddddd63..71cb72348 100644 --- a/core/translate/subquery.rs +++ b/core/translate/subquery.rs @@ -83,6 +83,7 @@ pub fn emit_subquery<'a>( reg_offset: plan.offset.map(|_| program.alloc_register()), reg_limit_offset_sum: plan.offset.map(|_| program.alloc_register()), resolver: Resolver::new(t_ctx.resolver.symbol_table), + omit_predicates: Vec::new(), }; let subquery_body_end_label = program.allocate_label(); program.emit_insn(Insn::InitCoroutine { diff --git a/core/util.rs b/core/util.rs index b3ce8ecd0..f518df6f4 100644 --- a/core/util.rs +++ b/core/util.rs @@ -2,6 +2,7 @@ use limbo_sqlite3_parser::ast::{self, CreateTableBody, Expr, FunctionTail, Liter use std::{rc::Rc, sync::Arc}; use crate::{ + function::Func, schema::{self, Column, Schema, Type}, types::{OwnedValue, OwnedValueType}, LimboError, OpenFlags, Result, Statement, StepResult, SymbolTable, IO, @@ -565,6 +566,39 @@ pub fn columns_from_create_table_body(body: &ast::CreateTableBody) -> crate::Res .collect::>()) } +/// This function checks if a given expression is a constant value that can be pushed down to the database engine. +/// It is expected to be called with the other half of a binary expression with an Expr::Column +pub fn can_pushdown_predicate(expr: &Expr, table_idx: usize) -> bool { + match expr { + Expr::Literal(_) => true, + Expr::Column { table, .. } => *table <= table_idx, + Expr::Binary(lhs, _, rhs) => { + can_pushdown_predicate(lhs, table_idx) && can_pushdown_predicate(rhs, table_idx) + } + Expr::Parenthesized(exprs) => can_pushdown_predicate(exprs.first().unwrap(), table_idx), + Expr::Unary(_, expr) => can_pushdown_predicate(expr, table_idx), + Expr::FunctionCall { args, name, .. } => { + let function = crate::function::Func::resolve_function( + &name.0, + args.as_ref().map_or(0, |a| a.len()), + ); + // is deterministic + matches!(function, Ok(Func::Scalar(_))) + } + Expr::Like { lhs, rhs, .. } => { + can_pushdown_predicate(lhs, table_idx) && can_pushdown_predicate(rhs, table_idx) + } + Expr::Between { + lhs, start, end, .. + } => { + can_pushdown_predicate(lhs, table_idx) + && can_pushdown_predicate(start, table_idx) + && can_pushdown_predicate(end, table_idx) + } + _ => false, + } +} + #[derive(Debug, Default, PartialEq)] pub struct OpenOptions<'a> { /// The authority component of the URI. may be 'localhost' or empty diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index d00ee6129..de871f54c 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -966,6 +966,8 @@ pub fn op_vfilter( pc_if_empty, arg_count, args_reg, + idx_str, + idx_num, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -977,11 +979,21 @@ pub fn op_vfilter( let has_rows = { let mut cursor = state.get_cursor(*cursor_id); let cursor = cursor.as_virtual_mut(); - let mut args = Vec::new(); + let mut args = Vec::with_capacity(*arg_count); for i in 0..*arg_count { - args.push(state.registers[args_reg + i].get_owned_value().clone()); + args.push( + state.registers[args_reg + i] + .get_owned_value() + .clone() + .to_ffi(), + ); } - virtual_table.filter(cursor, *arg_count, args)? + let idx_str = if let Some(idx_str) = idx_str { + Some(state.registers[*idx_str].get_owned_value().to_string()) + } else { + None + }; + virtual_table.filter(cursor, *idx_num as i32, idx_str, *arg_count, args)? }; if !has_rows { state.pc = pc_if_empty.to_offset_int(); diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index f1276798f..56f44bd2b 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -289,6 +289,8 @@ pub enum Insn { pc_if_empty: BranchOffset, arg_count: usize, args_reg: usize, + idx_str: Option, + idx_num: usize, }, /// Read a column from the current row of the virtual table cursor. diff --git a/extensions/completion/src/lib.rs b/extensions/completion/src/lib.rs index 09b09c479..53358c23c 100644 --- a/extensions/completion/src/lib.rs +++ b/extensions/completion/src/lib.rs @@ -91,8 +91,8 @@ impl VTabModule for CompletionVTab { cursor.eof() } - fn filter(cursor: &mut Self::VCursor, args: &[Value]) -> ResultCode { - if args.len() == 0 || args.len() > 2 { + fn filter(cursor: &mut Self::VCursor, args: &[Value], _: Option<(&str, i32)>) -> ResultCode { + if args.is_empty() || args.len() > 2 { return ResultCode::InvalidArgs; } cursor.reset(); diff --git a/extensions/core/src/lib.rs b/extensions/core/src/lib.rs index e73b2b894..99729de6c 100644 --- a/extensions/core/src/lib.rs +++ b/extensions/core/src/lib.rs @@ -15,7 +15,10 @@ pub use types::{ResultCode, Value, ValueType}; #[cfg(feature = "vfs")] pub use vfs_modules::{RegisterVfsFn, VfsExtension, VfsFile, VfsFileImpl, VfsImpl, VfsInterface}; use vtabs::RegisterModuleFn; -pub use vtabs::{VTabCursor, VTabKind, VTabModule, VTabModuleImpl}; +pub use vtabs::{ + ConstraintInfo, ConstraintOp, ConstraintUsage, ExtIndexInfo, IndexInfo, OrderByInfo, + VTabCursor, VTabKind, VTabModule, VTabModuleImpl, +}; pub type ExtResult = std::result::Result; diff --git a/extensions/core/src/vtabs.rs b/extensions/core/src/vtabs.rs index 83b3dae78..5d86457f7 100644 --- a/extensions/core/src/vtabs.rs +++ b/extensions/core/src/vtabs.rs @@ -22,6 +22,7 @@ pub struct VTabModuleImpl { pub update: VtabFnUpdate, pub rowid: VtabRowIDFn, pub destroy: VtabFnDestroy, + pub best_idx: BestIdxFn, } #[cfg(feature = "core_only")] @@ -43,8 +44,13 @@ pub type VtabFnCreateSchema = unsafe extern "C" fn(args: *const Value, argc: i32 pub type VtabFnOpen = unsafe extern "C" fn(*const c_void) -> *const c_void; -pub type VtabFnFilter = - unsafe extern "C" fn(cursor: *const c_void, argc: i32, argv: *const Value) -> ResultCode; +pub type VtabFnFilter = unsafe extern "C" fn( + cursor: *const c_void, + argc: i32, + argv: *const Value, + idx_str: *const c_char, + idx_num: i32, +) -> ResultCode; pub type VtabFnColumn = unsafe extern "C" fn(cursor: *const c_void, idx: u32) -> Value; @@ -62,6 +68,12 @@ pub type VtabFnUpdate = unsafe extern "C" fn( ) -> ResultCode; pub type VtabFnDestroy = unsafe extern "C" fn(vtab: *const c_void) -> ResultCode; +pub type BestIdxFn = unsafe extern "C" fn( + constraints: *const ConstraintInfo, + constraint_len: i32, + order_by: *const OrderByInfo, + order_by_len: i32, +) -> ExtIndexInfo; #[repr(C)] #[derive(Clone, Copy, Debug, PartialEq)] @@ -78,7 +90,11 @@ pub trait VTabModule: 'static { fn create_schema(args: &[Value]) -> String; fn open(&self) -> Result; - fn filter(cursor: &mut Self::VCursor, args: &[Value]) -> ResultCode; + fn filter( + cursor: &mut Self::VCursor, + args: &[Value], + idx_info: Option<(&str, i32)>, + ) -> ResultCode; fn column(cursor: &Self::VCursor, idx: u32) -> Result; fn next(cursor: &mut Self::VCursor) -> ResultCode; fn eof(cursor: &Self::VCursor) -> bool; @@ -94,6 +110,22 @@ pub trait VTabModule: 'static { fn destroy(&mut self) -> Result<(), Self::Error> { Ok(()) } + fn best_index(_constraints: &[ConstraintInfo], _order_by: &[OrderByInfo]) -> IndexInfo { + IndexInfo { + idx_num: 0, + idx_str: None, + order_by_consumed: false, + estimated_cost: 1_000_000.0, + estimated_rows: u32::MAX, + constraint_usages: _constraints + .iter() + .map(|_| ConstraintUsage { + argv_index: Some(0), + omit: false, + }) + .collect(), + } + } } pub trait VTabCursor: Sized { @@ -103,3 +135,172 @@ pub trait VTabCursor: Sized { fn eof(&self) -> bool; fn next(&mut self) -> ResultCode; } + +#[repr(u8)] +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum ConstraintOp { + Eq = 2, + Lt = 4, + Le = 8, + Gt = 16, + Ge = 32, + Match = 64, + Like = 65, + Glob = 66, + Regexp = 67, + Ne = 68, + IsNot = 69, + IsNotNull = 70, + IsNull = 71, + Is = 72, + In = 73, +} + +#[repr(C)] +#[derive(Copy, Clone)] +/// Describes an ORDER BY clause in a query involving a virtual table. +/// Passed along with the constraints to xBestIndex. +pub struct OrderByInfo { + /// The index of the column referenced in the ORDER BY clause. + pub column_index: u32, + /// Whether or not the clause is in descending order. + pub desc: bool, +} + +/// The internal (core) representation of an 'index' on a virtual table. +/// Returned from xBestIndex and then processed and passed to VFilter. +#[derive(Debug, Clone)] +pub struct IndexInfo { + /// The index number, used to identify the index internally by the VTab + pub idx_num: i32, + /// Optional index name. these are passed to vfilter in a tuple (idx_num, idx_str) + pub idx_str: Option, + /// Whether the index is used for order by + pub order_by_consumed: bool, + /// TODO: for eventual cost based query planning + pub estimated_cost: f64, + /// Estimated number of rows that the query will return + pub estimated_rows: u32, + /// List of constraints that can be used to optimize the query. + pub constraint_usages: Vec, +} +impl Default for IndexInfo { + fn default() -> Self { + Self { + idx_num: 0, + idx_str: None, + order_by_consumed: false, + estimated_cost: 1_000_000.0, + estimated_rows: u32::MAX, + constraint_usages: Vec::new(), + } + } +} + +impl IndexInfo { + /// + /// Converts IndexInfo to an FFI-safe `ExtIndexInfo`. + /// This method transfers ownership of `constraint_usages` and `idx_str`, + /// which must later be reclaimed using `from_ffi` to prevent leaks. + pub fn to_ffi(self) -> ExtIndexInfo { + let len = self.constraint_usages.len(); + let ptr = Box::into_raw(self.constraint_usages.into_boxed_slice()) as *mut ConstraintUsage; + let idx_str_len = self.idx_str.as_ref().map(|s| s.len()).unwrap_or(0); + let c_idx_str = self + .idx_str + .map(|s| std::ffi::CString::new(s).unwrap().into_raw()) + .unwrap_or(std::ptr::null_mut()); + ExtIndexInfo { + idx_num: self.idx_num, + estimated_cost: self.estimated_cost, + estimated_rows: self.estimated_rows, + order_by_consumed: self.order_by_consumed, + constraint_usages_ptr: ptr, + constraint_usage_len: len, + idx_str: c_idx_str as *mut _, + idx_str_len, + } + } + + /// Reclaims ownership of `constraint_usages` and `idx_str` from an FFI-safe `ExtIndexInfo`. + /// # Safety + /// This method is unsafe because it can cause memory leaks if not used correctly. + /// to_ffi and from_ffi are meant to send index info across ffi bounds then immediately reclaim it. + pub unsafe fn from_ffi(ffi: ExtIndexInfo) -> Self { + let constraint_usages = unsafe { + Box::from_raw(std::slice::from_raw_parts_mut( + ffi.constraint_usages_ptr, + ffi.constraint_usage_len, + )) + .to_vec() + }; + let idx_str = if ffi.idx_str.is_null() { + None + } else { + Some(unsafe { + std::ffi::CString::from_raw(ffi.idx_str as *mut _) + .to_string_lossy() + .into_owned() + }) + }; + Self { + idx_num: ffi.idx_num, + idx_str, + order_by_consumed: ffi.order_by_consumed, + estimated_cost: ffi.estimated_cost, + estimated_rows: ffi.estimated_rows, + constraint_usages, + } + } +} + +#[repr(C)] +#[derive(Clone, Debug)] +/// FFI representation of IndexInfo. +pub struct ExtIndexInfo { + pub idx_num: i32, + pub idx_str: *const u8, + pub idx_str_len: usize, + pub order_by_consumed: bool, + pub estimated_cost: f64, + pub estimated_rows: u32, + pub constraint_usages_ptr: *mut ConstraintUsage, + pub constraint_usage_len: usize, +} + +/// Returned from xBestIndex to describe how the virtual table +/// can use the constraints in the WHERE clause of a query. +#[derive(Debug, Clone, Copy)] +pub struct ConstraintUsage { + /// 1 based index of the argument passed + pub argv_index: Option, + /// If true, core can omit this constraint in the vdbe layer. + pub omit: bool, +} + +#[derive(Clone, Copy, Debug)] +#[repr(C)] +/// The primary argument to xBestIndex, which describes a constraint +/// in a query involving a virtual table. +pub struct ConstraintInfo { + /// The index of the column referenced in the WHERE clause. + pub column_index: u32, + /// The operator used in the clause. + pub op: ConstraintOp, + /// Whether or not constraint is garaunteed to be enforced. + pub usable: bool, + /// packed integer with the index of the constraint in the planner, + /// and the side of the binary expr that the relevant column is on. + pub plan_info: u32, +} + +impl ConstraintInfo { + #[inline(always)] + pub fn pack_plan_info(pred_idx: u32, is_right_side: bool) -> u32 { + ((pred_idx) << 1) | (is_right_side as u32) + } + #[inline(always)] + pub fn unpack_plan_info(&self) -> (usize, bool) { + ((self.plan_info >> 1) as usize, (self.plan_info & 1) != 0) + } +} diff --git a/extensions/series/src/lib.rs b/extensions/series/src/lib.rs index 43028eed5..21d3a89fa 100644 --- a/extensions/series/src/lib.rs +++ b/extensions/series/src/lib.rs @@ -45,7 +45,7 @@ impl VTabModule for GenerateSeriesVTab { }) } - fn filter(cursor: &mut Self::VCursor, args: &[Value]) -> ResultCode { + fn filter(cursor: &mut Self::VCursor, args: &[Value], _: Option<(&str, i32)>) -> ResultCode { // args are the start, stop, and step if args.is_empty() || args.len() > 3 { return ResultCode::InvalidArgs; @@ -240,7 +240,7 @@ mod tests { ]; // Initialize cursor through filter - match GenerateSeriesVTab::filter(&mut cursor, &args) { + match GenerateSeriesVTab::filter(&mut cursor, &args, None) { ResultCode::OK => (), ResultCode::EOF => return Ok(vec![]), err => return Err(err), @@ -293,7 +293,7 @@ mod tests { let expected_len = series_expected_length(&series); assert_eq!( values.len(), - expected_len as usize, + expected_len, "Series length mismatch for start={}, stop={}, step={}: expected {}, got {}, values: {:?}", start, stop, @@ -546,7 +546,7 @@ mod tests { let start = series.start; let stop = series.stop; let step = series.step; - let tbl = GenerateSeriesVTab::default(); + let tbl = GenerateSeriesVTab {}; let mut cursor = tbl.open().unwrap(); let args = vec![ @@ -556,7 +556,7 @@ mod tests { ]; // Initialize cursor through filter - GenerateSeriesVTab::filter(&mut cursor, &args); + GenerateSeriesVTab::filter(&mut cursor, &args, None); let mut rowids = vec![]; while !GenerateSeriesVTab::eof(&cursor) { diff --git a/extensions/tests/src/lib.rs b/extensions/tests/src/lib.rs index beff17004..5c6495595 100644 --- a/extensions/tests/src/lib.rs +++ b/extensions/tests/src/lib.rs @@ -1,7 +1,7 @@ use lazy_static::lazy_static; use limbo_ext::{ - register_extension, scalar, ExtResult, ResultCode, VTabCursor, VTabKind, VTabModule, - VTabModuleDerive, Value, + register_extension, scalar, ConstraintInfo, ConstraintOp, ConstraintUsage, ExtResult, + IndexInfo, OrderByInfo, ResultCode, VTabCursor, VTabKind, VTabModule, VTabModuleDerive, Value, }; #[cfg(not(target_family = "wasm"))] use limbo_ext::{VfsDerive, VfsExtension, VfsFile}; @@ -40,27 +40,99 @@ impl VTabModule for KVStoreVTab { } fn open(&self) -> Result { + let _ = env_logger::try_init(); Ok(KVStoreCursor { rows: Vec::new(), index: None, }) } - fn filter(cursor: &mut Self::VCursor, _args: &[Value]) -> ResultCode { - let store = GLOBAL_STORE.lock().unwrap(); - cursor.rows = store - .iter() - .map(|(&rowid, (k, v))| (rowid, k.clone(), v.clone())) - .collect(); - cursor.rows.sort_by_key(|(rowid, _, _)| *rowid); - - if cursor.rows.is_empty() { - cursor.index = None; - return ResultCode::EOF; - } else { - cursor.index = Some(0); + fn best_index(constraints: &[ConstraintInfo], _order_by: &[OrderByInfo]) -> IndexInfo { + // Look for: key = ? + for constraint in constraints.iter() { + if constraint.usable + && constraint.op == ConstraintOp::Eq + && constraint.column_index == 0 + { + // this extension wouldn't support order by but for testing purposes, + // we will consume it if we find an ASC order by clause on the value column + let mut consumed = false; + if let Some(order) = _order_by.first() { + if order.column_index == 1 && !order.desc { + consumed = true; + } + } + log::debug!("xBestIndex: constraint found for 'key = ?'"); + return IndexInfo { + idx_num: 1, + idx_str: Some("key_eq".to_string()), + order_by_consumed: consumed, + estimated_cost: 10.0, + estimated_rows: 4, + constraint_usages: vec![ConstraintUsage { + omit: true, + argv_index: Some(1), + }], + }; + } + } + + // fallback: full scan + log::debug!("No usable constraints found, using full scan"); + IndexInfo { + idx_num: -1, + idx_str: None, + order_by_consumed: false, + estimated_cost: 1000.0, + ..Default::default() + } + } + + fn filter( + cursor: &mut Self::VCursor, + args: &[Value], + idx_str: Option<(&str, i32)>, + ) -> ResultCode { + match idx_str { + Some(("key_eq", 1)) => { + let key = args + .first() + .and_then(|v| v.to_text()) + .map(|s| s.to_string()); + log::debug!("idx_str found: key_eq\n value: {:?}", key); + if let Some(key) = key { + let rowid = hash_key(&key); + let store = GLOBAL_STORE.lock().unwrap(); + if let Some((k, v)) = store.get(&rowid) { + cursor.rows.push((rowid, k.clone(), v.clone())); + cursor.index = Some(0); + } else { + cursor.rows.clear(); + cursor.index = None; + return ResultCode::EOF; + } + return ResultCode::OK; + } + cursor.rows.clear(); + cursor.index = None; + ResultCode::OK + } + _ => { + let store = GLOBAL_STORE.lock().unwrap(); + cursor.rows = store + .iter() + .map(|(&rowid, (k, v))| (rowid, k.clone(), v.clone())) + .collect(); + cursor.rows.sort_by_key(|(rowid, _, _)| *rowid); + if cursor.rows.is_empty() { + cursor.index = None; + ResultCode::EOF + } else { + cursor.index = Some(0); + ResultCode::OK + } + } } - ResultCode::OK } fn insert(&mut self, values: &[Value]) -> Result { @@ -96,6 +168,7 @@ impl VTabModule for KVStoreVTab { let _ = self.insert(values)?; Ok(()) } + fn eof(cursor: &Self::VCursor) -> bool { cursor.index.is_some_and(|s| s >= cursor.rows.len()) || cursor.index.is_none() } @@ -119,7 +192,7 @@ impl VTabModule for KVStoreVTab { _ => Err("Invalid column".into()), } } else { - Err("cursor out of range".into()) + Err("Invalid Column".into()) } } diff --git a/macros/src/lib.rs b/macros/src/lib.rs index c03788c7c..d47101589 100644 --- a/macros/src/lib.rs +++ b/macros/src/lib.rs @@ -455,6 +455,7 @@ pub fn derive_vtab_module(input: TokenStream) -> TokenStream { let update_fn_name = format_ident!("update_{}", struct_name); let rowid_fn_name = format_ident!("rowid_{}", struct_name); let destroy_fn_name = format_ident!("destroy_{}", struct_name); + let best_idx_fn_name = format_ident!("best_idx_{}", struct_name); let expanded = quote! { impl #struct_name { @@ -490,13 +491,20 @@ pub fn derive_vtab_module(input: TokenStream) -> TokenStream { cursor: *const ::std::ffi::c_void, argc: i32, argv: *const ::limbo_ext::Value, + idx_str: *const ::std::ffi::c_char, + idx_num: i32, ) -> ::limbo_ext::ResultCode { if cursor.is_null() { return ::limbo_ext::ResultCode::Error; } let cursor = unsafe { &mut *(cursor as *mut <#struct_name as ::limbo_ext::VTabModule>::VCursor) }; let args = ::std::slice::from_raw_parts(argv, argc as usize); - <#struct_name as ::limbo_ext::VTabModule>::filter(cursor, args) + let idx_str = if idx_str.is_null() { + None + } else { + Some((unsafe { ::std::ffi::CStr::from_ptr(idx_str).to_str().unwrap() }, idx_num)) + }; + <#struct_name as ::limbo_ext::VTabModule>::filter(cursor, args, idx_str) } #[no_mangle] @@ -613,6 +621,18 @@ pub fn derive_vtab_module(input: TokenStream) -> TokenStream { return ::limbo_ext::ResultCode::OK; } + #[no_mangle] + pub unsafe extern "C" fn #best_idx_fn_name( + constraints: *const ::limbo_ext::ConstraintInfo, + n_constraints: i32, + order_by: *const ::limbo_ext::OrderByInfo, + n_order_by: i32, + ) -> ::limbo_ext::ExtIndexInfo { + let constraints = if n_constraints > 0 { std::slice::from_raw_parts(constraints, n_constraints as usize) } else { &[] }; + let order_by = if n_order_by > 0 { std::slice::from_raw_parts(order_by, n_order_by as usize) } else { &[] }; + <#struct_name as ::limbo_ext::VTabModule>::best_index(constraints, order_by).to_ffi() + } + #[no_mangle] pub unsafe extern "C" fn #register_fn_name( api: *const ::limbo_ext::ExtensionApi @@ -636,6 +656,7 @@ pub fn derive_vtab_module(input: TokenStream) -> TokenStream { update: Self::#update_fn_name, rowid: Self::#rowid_fn_name, destroy: Self::#destroy_fn_name, + best_idx: Self::#best_idx_fn_name, }; (api.register_vtab_module)(api.ctx, name_c, module, <#struct_name as ::limbo_ext::VTabModule>::VTAB_KIND) } diff --git a/testing/cli_tests/extensions.py b/testing/cli_tests/extensions.py index ab57e4178..d4d55fca1 100755 --- a/testing/cli_tests/extensions.py +++ b/testing/cli_tests/extensions.py @@ -343,7 +343,6 @@ def test_kv(): # first, create a normal table to ensure no issues limbo.execute_dot("CREATE TABLE other (a,b,c);") limbo.execute_dot("INSERT INTO other values (23,32,23);") - limbo = TestLimboShell() limbo.run_test_fn( "create virtual table t using kv_store;", lambda res: "Module kv_store not found" in res,