Implement xBestIndex for virtual table api to improve query planning

This commit is contained in:
PThorpe92
2025-03-26 09:16:03 -04:00
parent 30c488e35d
commit 853af16946
11 changed files with 395 additions and 24 deletions

View File

@@ -35,7 +35,7 @@ pub use io::UringIO;
pub use io::{
Buffer, Completion, File, MemoryIO, OpenFlags, PlatformIO, SyscallIO, WriteCompletion, IO,
};
use limbo_ext::{ResultCode, VTabKind, VTabModuleImpl};
use limbo_ext::{ConstraintInfo, IndexInfo, OrderByInfo, ResultCode, VTabKind, VTabModuleImpl};
use limbo_sqlite3_parser::{ast, ast::Cmd, lexer::sql::Parser};
use parking_lot::RwLock;
use schema::{Column, Schema};
@@ -641,6 +641,21 @@ impl VirtualTable {
pub(crate) fn rowid(&self, cursor: &VTabOpaqueCursor) -> i64 {
unsafe { (self.implementation.rowid)(cursor.as_ptr()) }
}
pub(crate) fn best_index(
&self,
constraints: &[ConstraintInfo],
order_by: &[OrderByInfo],
) -> IndexInfo {
unsafe {
IndexInfo::from_ffi((self.implementation.best_idx)(
constraints.as_ptr(),
constraints.len() as i32,
order_by.as_ptr(),
order_by.len() as i32,
))
}
}
/// takes ownership of the provided Args
pub(crate) fn from_args(
tbl_name: Option<&str>,
@@ -693,6 +708,8 @@ impl VirtualTable {
pub fn filter(
&self,
cursor: &VTabOpaqueCursor,
idx_num: i32,
idx_str: Option<String>,
arg_count: usize,
args: Vec<OwnedValue>,
) -> Result<bool> {
@@ -701,8 +718,18 @@ impl VirtualTable {
let ownedvalue_arg = args.get(i).unwrap();
filter_args.push(ownedvalue_arg.to_ffi());
}
let c_idx_str = idx_str
.map(|s| std::ffi::CString::new(s).unwrap())
.map(|cstr| cstr.into_raw())
.unwrap_or(std::ptr::null_mut());
let rc = unsafe {
(self.implementation.filter)(cursor.as_ptr(), arg_count as i32, filter_args.as_ptr())
(self.implementation.filter)(
cursor.as_ptr(),
arg_count as i32,
filter_args.as_ptr(),
c_idx_str,
idx_num,
)
};
for arg in filter_args {
unsafe {

View File

@@ -9,6 +9,8 @@ use crate::{
},
Result,
};
use limbo_ext::{ConstraintInfo, OrderByInfo};
use limbo_sqlite3_parser::ast;
use super::{
aggregation::translate_aggregation_step,
@@ -18,8 +20,8 @@ use super::{
optimizer::Optimizable,
order_by::{order_by_sorter_insert, sorter_insert},
plan::{
IterationDirection, Operation, Search, SeekDef, SelectPlan, SelectQueryType,
TableReference, WhereTerm,
try_convert_to_constraint_info, IterationDirection, Operation, Search, SeekDef, SelectPlan,
SelectQueryType, TableReference, WhereTerm,
},
};
@@ -251,9 +253,6 @@ pub fn open_loop(
end_offset: loop_end,
});
// These are predicates evaluated outside of the subquery,
// so they are translated here.
// E.g. SELECT foo FROM (SELECT bar as foo FROM t1) sub WHERE sub.foo > 10
for cond in predicates
.iter()
.filter(|cond| cond.should_eval_at_loop(table_index))
@@ -290,12 +289,25 @@ pub fn open_loop(
pc_if_empty: loop_end,
});
}
}
if let Table::Virtual(ref table) = table.table {
} else if let Some(vtab) = table.table.virtual_table() {
let constraints: Vec<ConstraintInfo> = predicates
.iter()
.filter(|p| p.applies_to_table(&table.table, tables))
.filter_map(|p| try_convert_to_constraint_info(p, table_index))
.collect();
let order_by = vec![OrderByInfo {
column_index: *t_ctx
.result_column_indexes_in_orderby_sorter
.first()
.unwrap_or(&0) as u32,
desc: matches!(iter_dir, IterationDirection::Backwards),
}];
let index_info = vtab.best_index(&constraints, &order_by);
let start_reg =
program.alloc_registers(table.args.as_ref().map(|a| a.len()).unwrap_or(0));
program.alloc_registers(vtab.args.as_ref().map(|a| a.len()).unwrap_or(0));
let mut cur_reg = start_reg;
let args = match table.args.as_ref() {
let args = match vtab.args.as_ref() {
Some(args) => args,
None => &vec![],
};
@@ -304,11 +316,22 @@ pub fn open_loop(
cur_reg += 1;
let _ = translate_expr(program, Some(tables), arg, reg, &t_ctx.resolver)?;
}
let mut maybe_idx_str_reg = None;
if let Some(idx_str) = index_info.idx_str {
let reg = program.alloc_register();
program.emit_insn(Insn::String8 {
dest: reg,
value: idx_str,
});
maybe_idx_str_reg = Some(reg);
}
program.emit_insn(Insn::VFilter {
cursor_id,
pc_if_empty: loop_end,
arg_count: table.args.as_ref().map_or(0, |args| args.len()),
arg_count: vtab.args.as_ref().map_or(0, |args| args.len()),
args_reg: start_reg,
idx_str: maybe_idx_str_reg,
idx_num: index_info.idx_num as usize,
});
}
program.resolve_label(loop_start, program.offset());

View File

@@ -1,4 +1,5 @@
use core::fmt;
use limbo_ext::{ConstraintInfo, ConstraintOp, IndexInfo};
use limbo_sqlite3_parser::ast::{self, SortOrder};
use std::{
cmp::Ordering,
@@ -73,8 +74,96 @@ impl WhereTerm {
pub fn should_eval_at_loop(&self, loop_idx: usize) -> bool {
self.eval_at == EvalAt::Loop(loop_idx)
}
pub fn applies_to_table(&self, table: &Table, tables: &[TableReference]) -> bool {
match &self.expr {
ast::Expr::Column {
table: table_idx, ..
} => {
let table_ref = &tables[*table_idx];
table_ref.table == *table
}
_ => false,
}
}
}
use crate::ast::{Expr, Operator};
use super::optimizer::{ConstantPredicate, Optimizable};
fn reverse_operator(op: &Operator) -> Option<Operator> {
match op {
Operator::Equals => Some(Operator::Equals),
Operator::Less => Some(Operator::Greater),
Operator::LessEquals => Some(Operator::GreaterEquals),
Operator::Greater => Some(Operator::Less),
Operator::GreaterEquals => Some(Operator::LessEquals),
Operator::NotEquals => Some(Operator::NotEquals),
Operator::Is => Some(Operator::Is),
Operator::IsNot => Some(Operator::IsNot),
_ => None,
}
}
pub fn try_convert_to_constraint_info(
term: &WhereTerm,
table_index: usize,
) -> Option<ConstraintInfo> {
if term.from_outer_join {
return None;
}
let Expr::Binary(lhs, op, rhs) = &term.expr else {
return None;
};
let (col_expr, _, op) = match (&**lhs, &**rhs) {
(Expr::Column { .. }, rhs)
if rhs.check_constant().ok()? == Some(ConstantPredicate::AlwaysTrue) =>
{
(lhs, rhs, op)
}
(lhs, Expr::Column { .. })
if lhs.check_constant().ok()? == Some(ConstantPredicate::AlwaysTrue) =>
{
(rhs, lhs, &reverse_operator(op).unwrap_or(*op))
}
_ => return None,
};
let Expr::Column {
table: tbl_idx,
column,
..
} = **col_expr
else {
return None;
};
if tbl_idx != table_index {
return None;
}
let column_index = column as u32;
let constraint_op = match op {
Operator::Equals => ConstraintOp::Eq,
Operator::Less => ConstraintOp::Lt,
Operator::LessEquals => ConstraintOp::Le,
Operator::Greater => ConstraintOp::Gt,
Operator::GreaterEquals => ConstraintOp::Ge,
Operator::NotEquals => ConstraintOp::Ne,
Operator::Is => ConstraintOp::Is,
Operator::IsNot => ConstraintOp::IsNot,
_ => return None,
};
Some(ConstraintInfo {
column_index,
op: constraint_op,
usable: true,
})
}
/// The loop index where to evaluate the condition.
/// For example, in `SELECT * FROM u JOIN p WHERE u.id = 5`, the condition can already be evaluated at the first loop (idx 0),
/// because that is the rightmost table that it references.

View File

@@ -966,6 +966,8 @@ pub fn op_vfilter(
pc_if_empty,
arg_count,
args_reg,
idx_str,
idx_num,
} = insn
else {
unreachable!("unexpected Insn {:?}", insn)
@@ -981,7 +983,12 @@ pub fn op_vfilter(
for i in 0..*arg_count {
args.push(state.registers[args_reg + i].get_owned_value().clone());
}
virtual_table.filter(cursor, *arg_count, args)?
let idx_str = if let Some(idx_str) = idx_str {
Some(state.registers[*idx_str].get_owned_value().to_string())
} else {
None
};
virtual_table.filter(cursor, *idx_num as i32, idx_str, *arg_count, args)?
};
if !has_rows {
state.pc = pc_if_empty.to_offset_int();

View File

@@ -289,6 +289,8 @@ pub enum Insn {
pc_if_empty: BranchOffset,
arg_count: usize,
args_reg: usize,
idx_str: Option<usize>,
idx_num: usize,
},
/// Read a column from the current row of the virtual table cursor.

View File

@@ -91,8 +91,8 @@ impl VTabModule for CompletionVTab {
cursor.eof()
}
fn filter(cursor: &mut Self::VCursor, args: &[Value]) -> ResultCode {
if args.len() == 0 || args.len() > 2 {
fn filter(cursor: &mut Self::VCursor, args: &[Value], _: Option<(&str, i32)>) -> ResultCode {
if args.is_empty() || args.len() > 2 {
return ResultCode::InvalidArgs;
}
cursor.reset();

View File

@@ -15,7 +15,10 @@ pub use types::{ResultCode, Value, ValueType};
#[cfg(feature = "vfs")]
pub use vfs_modules::{RegisterVfsFn, VfsExtension, VfsFile, VfsFileImpl, VfsImpl, VfsInterface};
use vtabs::RegisterModuleFn;
pub use vtabs::{VTabCursor, VTabKind, VTabModule, VTabModuleImpl};
pub use vtabs::{
ConstraintInfo, ConstraintOp, ConstraintUsage, ExtIndexInfo, IndexInfo, OrderByInfo,
VTabCursor, VTabKind, VTabModule, VTabModuleImpl,
};
pub type ExtResult<T> = std::result::Result<T, ResultCode>;

View File

@@ -22,6 +22,7 @@ pub struct VTabModuleImpl {
pub update: VtabFnUpdate,
pub rowid: VtabRowIDFn,
pub destroy: VtabFnDestroy,
pub best_idx: BestIdxFn,
}
#[cfg(feature = "core_only")]
@@ -43,8 +44,13 @@ pub type VtabFnCreateSchema = unsafe extern "C" fn(args: *const Value, argc: i32
pub type VtabFnOpen = unsafe extern "C" fn(*const c_void) -> *const c_void;
pub type VtabFnFilter =
unsafe extern "C" fn(cursor: *const c_void, argc: i32, argv: *const Value) -> ResultCode;
pub type VtabFnFilter = unsafe extern "C" fn(
cursor: *const c_void,
argc: i32,
argv: *const Value,
idx_str: *const c_char,
idx_num: i32,
) -> ResultCode;
pub type VtabFnColumn = unsafe extern "C" fn(cursor: *const c_void, idx: u32) -> Value;
@@ -62,6 +68,12 @@ pub type VtabFnUpdate = unsafe extern "C" fn(
) -> ResultCode;
pub type VtabFnDestroy = unsafe extern "C" fn(vtab: *const c_void) -> ResultCode;
pub type BestIdxFn = unsafe extern "C" fn(
constraints: *const ConstraintInfo,
constraint_len: i32,
order_by: *const OrderByInfo,
order_by_len: i32,
) -> ExtIndexInfo;
#[repr(C)]
#[derive(Clone, Copy, Debug, PartialEq)]
@@ -78,7 +90,11 @@ pub trait VTabModule: 'static {
fn create_schema(args: &[Value]) -> String;
fn open(&self) -> Result<Self::VCursor, Self::Error>;
fn filter(cursor: &mut Self::VCursor, args: &[Value]) -> ResultCode;
fn filter(
cursor: &mut Self::VCursor,
args: &[Value],
idx_info: Option<(&str, i32)>,
) -> ResultCode;
fn column(cursor: &Self::VCursor, idx: u32) -> Result<Value, Self::Error>;
fn next(cursor: &mut Self::VCursor) -> ResultCode;
fn eof(cursor: &Self::VCursor) -> bool;
@@ -94,6 +110,22 @@ pub trait VTabModule: 'static {
fn destroy(&mut self) -> Result<(), Self::Error> {
Ok(())
}
fn best_index(_constraints: &[ConstraintInfo], _order_by: &[OrderByInfo]) -> IndexInfo {
IndexInfo {
idx_num: 0,
idx_str: None,
order_by_consumed: false,
estimated_cost: 1_000_000.0,
estimated_rows: u32::MAX,
constraint_usages: _constraints
.iter()
.map(|_| ConstraintUsage {
argv_index: Some(0),
omit: false,
})
.collect(),
}
}
}
pub trait VTabCursor: Sized {
@@ -103,3 +135,137 @@ pub trait VTabCursor: Sized {
fn eof(&self) -> bool;
fn next(&mut self) -> ResultCode;
}
#[repr(u8)]
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum ConstraintOp {
Eq = 2,
Lt = 4,
Le = 8,
Gt = 16,
Ge = 32,
Match = 64,
Like = 65,
Glob = 66,
Regexp = 67,
Ne = 68,
IsNot = 69,
IsNotNull = 70,
IsNull = 71,
Is = 72,
In = 73,
}
#[repr(C)]
#[derive(Copy, Clone)]
pub struct OrderByInfo {
pub column_index: u32,
pub desc: bool,
}
#[derive(Debug, Clone)]
pub struct IndexInfo {
pub idx_num: i32,
pub idx_str: Option<String>,
pub order_by_consumed: bool,
/// TODO: for eventual cost based query planning
pub estimated_cost: f64,
pub estimated_rows: u32,
pub constraint_usages: Vec<ConstraintUsage>,
}
impl Default for IndexInfo {
fn default() -> Self {
Self {
idx_num: 0,
idx_str: None,
order_by_consumed: false,
estimated_cost: 1_000_000.0,
estimated_rows: u32::MAX,
constraint_usages: Vec::new(),
}
}
}
impl IndexInfo {
///
/// Converts IndexInfo to an FFI-safe `ExtIndexInfo`.
/// This method transfers ownership of `constraint_usages` and `idx_str`,
/// which must later be reclaimed using `from_ffi` to prevent leaks.
pub fn to_ffi(self) -> ExtIndexInfo {
let len = self.constraint_usages.len();
let ptr = Box::into_raw(self.constraint_usages.into_boxed_slice()) as *mut ConstraintUsage;
let idx_str_len = self.idx_str.as_ref().map(|s| s.len()).unwrap_or(0);
let c_idx_str = self
.idx_str
.map(|s| std::ffi::CString::new(s).unwrap().into_raw())
.unwrap_or(std::ptr::null_mut());
ExtIndexInfo {
idx_num: self.idx_num,
estimated_cost: self.estimated_cost,
estimated_rows: self.estimated_rows,
order_by_consumed: self.order_by_consumed,
constraint_usages_ptr: ptr,
constraint_usage_len: len,
idx_str: c_idx_str as *mut _,
idx_str_len,
}
}
/// Reclaims ownership of `constraint_usages` and `idx_str` from an FFI-safe `ExtIndexInfo`.
/// # Safety
/// This method is unsafe because it can cause memory leaks if not used correctly.
/// to_ffi and from_ffi are meant to send index info across ffi bounds then immediately reclaim it.
pub unsafe fn from_ffi(ffi: ExtIndexInfo) -> Self {
let constraint_usages = unsafe {
Box::from_raw(std::slice::from_raw_parts_mut(
ffi.constraint_usages_ptr,
ffi.constraint_usage_len,
))
.to_vec()
};
let idx_str = if ffi.idx_str.is_null() {
None
} else {
Some(unsafe {
std::ffi::CString::from_raw(ffi.idx_str as *mut _)
.to_string_lossy()
.into_owned()
})
};
Self {
idx_num: ffi.idx_num,
idx_str,
order_by_consumed: ffi.order_by_consumed,
estimated_cost: ffi.estimated_cost,
estimated_rows: ffi.estimated_rows,
constraint_usages,
}
}
}
#[repr(C)]
#[derive(Clone, Debug)]
pub struct ExtIndexInfo {
pub idx_num: i32,
pub idx_str: *const u8,
pub idx_str_len: usize,
pub order_by_consumed: bool,
pub estimated_cost: f64,
pub estimated_rows: u32,
pub constraint_usages_ptr: *mut ConstraintUsage,
pub constraint_usage_len: usize,
}
#[derive(Debug, Clone, Copy)]
pub struct ConstraintUsage {
pub argv_index: Option<u32>, // 1-based index into VFilter args
pub omit: bool, // if true, core skips checking it again
}
#[derive(Clone, Copy, Debug)]
#[repr(C)]
pub struct ConstraintInfo {
pub column_index: u32,
pub op: ConstraintOp,
pub usable: bool,
}

View File

@@ -45,7 +45,7 @@ impl VTabModule for GenerateSeriesVTab {
})
}
fn filter(cursor: &mut Self::VCursor, args: &[Value]) -> ResultCode {
fn filter(cursor: &mut Self::VCursor, args: &[Value], _: Option<(&str, i32)>) -> ResultCode {
// args are the start, stop, and step
if args.is_empty() || args.len() > 3 {
return ResultCode::InvalidArgs;

View File

@@ -1,7 +1,7 @@
use lazy_static::lazy_static;
use limbo_ext::{
register_extension, scalar, ExtResult, ResultCode, VTabCursor, VTabKind, VTabModule,
VTabModuleDerive, Value,
register_extension, scalar, ConstraintInfo, ConstraintOp, ExtResult, IndexInfo, OrderByInfo,
ResultCode, VTabCursor, VTabKind, VTabModule, VTabModuleDerive, Value,
};
#[cfg(not(target_family = "wasm"))]
use limbo_ext::{VfsDerive, VfsExtension, VfsFile};
@@ -46,7 +46,40 @@ impl VTabModule for KVStoreVTab {
})
}
fn filter(cursor: &mut Self::VCursor, _args: &[Value]) -> ResultCode {
fn best_index(constraints: &[ConstraintInfo], _order_by: &[OrderByInfo]) -> IndexInfo {
// not exactly the ideal kind of table to demonstrate this on...
for constraint in constraints {
println!("constraint: {:?}", constraint);
if constraint.usable
&& constraint.op == ConstraintOp::Eq
&& constraint.column_index == 0
{
// key = ? is supported
return IndexInfo {
idx_num: 1, // arbitrary non-zero code to signify optimization
idx_str: Some("key_eq".to_string()),
order_by_consumed: false,
estimated_cost: 10.0,
..Default::default()
};
}
}
// fallback: full scan
IndexInfo {
idx_num: -1,
idx_str: None,
order_by_consumed: false,
estimated_cost: 1000.0,
..Default::default()
}
}
fn filter(
cursor: &mut Self::VCursor,
_args: &[Value],
_idx_str: Option<(&str, i32)>,
) -> ResultCode {
let store = GLOBAL_STORE.lock().unwrap();
cursor.rows = store
.iter()

View File

@@ -455,6 +455,7 @@ pub fn derive_vtab_module(input: TokenStream) -> TokenStream {
let update_fn_name = format_ident!("update_{}", struct_name);
let rowid_fn_name = format_ident!("rowid_{}", struct_name);
let destroy_fn_name = format_ident!("destroy_{}", struct_name);
let best_idx_fn_name = format_ident!("best_idx_{}", struct_name);
let expanded = quote! {
impl #struct_name {
@@ -490,13 +491,20 @@ pub fn derive_vtab_module(input: TokenStream) -> TokenStream {
cursor: *const ::std::ffi::c_void,
argc: i32,
argv: *const ::limbo_ext::Value,
idx_str: *const ::std::ffi::c_char,
idx_num: i32,
) -> ::limbo_ext::ResultCode {
if cursor.is_null() {
return ::limbo_ext::ResultCode::Error;
}
let cursor = unsafe { &mut *(cursor as *mut <#struct_name as ::limbo_ext::VTabModule>::VCursor) };
let args = ::std::slice::from_raw_parts(argv, argc as usize);
<#struct_name as ::limbo_ext::VTabModule>::filter(cursor, args)
let idx_str = if idx_str.is_null() {
None
} else {
Some((unsafe { ::std::ffi::CStr::from_ptr(idx_str).to_str().unwrap() }, idx_num))
};
<#struct_name as ::limbo_ext::VTabModule>::filter(cursor, args, idx_str)
}
#[no_mangle]
@@ -613,6 +621,18 @@ pub fn derive_vtab_module(input: TokenStream) -> TokenStream {
return ::limbo_ext::ResultCode::OK;
}
#[no_mangle]
pub unsafe extern "C" fn #best_idx_fn_name(
constraints: *const ::limbo_ext::ConstraintInfo,
n_constraints: i32,
order_by: *const ::limbo_ext::OrderByInfo,
n_order_by: i32,
) -> ::limbo_ext::ExtIndexInfo {
let constraints = std::slice::from_raw_parts(constraints, n_constraints as usize);
let order_by = std::slice::from_raw_parts(order_by, n_order_by as usize);
<#struct_name as ::limbo_ext::VTabModule>::best_index(constraints, order_by).to_ffi()
}
#[no_mangle]
pub unsafe extern "C" fn #register_fn_name(
api: *const ::limbo_ext::ExtensionApi
@@ -636,6 +656,7 @@ pub fn derive_vtab_module(input: TokenStream) -> TokenStream {
update: Self::#update_fn_name,
rowid: Self::#rowid_fn_name,
destroy: Self::#destroy_fn_name,
best_idx: Self::#best_idx_fn_name,
};
(api.register_vtab_module)(api.ctx, name_c, module, <#struct_name as ::limbo_ext::VTabModule>::VTAB_KIND)
}