mirror of
https://github.com/aljazceru/turso.git
synced 2025-12-18 17:14:20 +01:00
Integrate virtual tables with optimizer
This change connects virtual tables with the query optimizer. The optimizer now considers virtual tables during join order search and invokes their best_index callbacks to determine feasible access paths. Currently, this is not a visible change, since none of the existing extensions return information indicating that a plan is invalid.
This commit is contained in:
@@ -1,5 +1,4 @@
|
|||||||
use std::collections::HashSet;
|
use turso_sqlite3_parser::ast::SortOrder;
|
||||||
use turso_sqlite3_parser::ast::{self, SortOrder};
|
|
||||||
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
@@ -15,11 +14,9 @@ use crate::{
|
|||||||
insn::{CmpInsFlags, IdxInsertFlags, Insn},
|
insn::{CmpInsFlags, IdxInsertFlags, Insn},
|
||||||
BranchOffset, CursorID,
|
BranchOffset, CursorID,
|
||||||
},
|
},
|
||||||
LimboError, Result,
|
Result,
|
||||||
};
|
};
|
||||||
|
|
||||||
use turso_ext::IndexInfo;
|
|
||||||
|
|
||||||
use super::{
|
use super::{
|
||||||
aggregation::translate_aggregation_step,
|
aggregation::translate_aggregation_step,
|
||||||
emitter::{OperationMode, TranslateCtx},
|
emitter::{OperationMode, TranslateCtx},
|
||||||
@@ -31,8 +28,8 @@ use super::{
|
|||||||
optimizer::Optimizable,
|
optimizer::Optimizable,
|
||||||
order_by::{order_by_sorter_insert, sorter_insert},
|
order_by::{order_by_sorter_insert, sorter_insert},
|
||||||
plan::{
|
plan::{
|
||||||
convert_where_to_vtab_constraint, Aggregate, GroupBy, IterationDirection, JoinOrderMember,
|
Aggregate, GroupBy, IterationDirection, JoinOrderMember, Operation, QueryDestination,
|
||||||
Operation, QueryDestination, Search, SeekDef, SelectPlan, TableReferences, WhereTerm,
|
Search, SeekDef, SelectPlan, TableReferences, WhereTerm,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -276,7 +273,7 @@ pub fn init_loop(
|
|||||||
}
|
}
|
||||||
_ => {}
|
_ => {}
|
||||||
},
|
},
|
||||||
Operation::Scan(Scan::VirtualTable) => {
|
Operation::Scan(Scan::VirtualTable { .. }) => {
|
||||||
if let Table::Virtual(tbl) = &table.table {
|
if let Table::Virtual(tbl) = &table.table {
|
||||||
let is_write = matches!(
|
let is_write = matches!(
|
||||||
mode,
|
mode,
|
||||||
@@ -457,94 +454,41 @@ pub fn open_loop(
|
|||||||
}
|
}
|
||||||
program.preassign_label_to_next_insn(loop_start);
|
program.preassign_label_to_next_insn(loop_start);
|
||||||
}
|
}
|
||||||
(Scan::VirtualTable, Table::Virtual(vtab)) => {
|
(
|
||||||
|
Scan::VirtualTable {
|
||||||
|
idx_num,
|
||||||
|
idx_str,
|
||||||
|
constraints,
|
||||||
|
},
|
||||||
|
Table::Virtual(_),
|
||||||
|
) => {
|
||||||
let (start_reg, count, maybe_idx_str, maybe_idx_int) = {
|
let (start_reg, count, maybe_idx_str, maybe_idx_int) = {
|
||||||
// Virtual‑table modules can receive constraints via xBestIndex.
|
let args_needed = constraints.len();
|
||||||
// They return information with which to pass to VFilter operation.
|
|
||||||
// We forward every predicate that touches vtab columns.
|
|
||||||
//
|
|
||||||
// vtab.col = literal (always usable)
|
|
||||||
// vtab.col = outer_table.col (usable, because outer_table is already positioned)
|
|
||||||
// vtab.col = later_table.col (forwarded with usable = false)
|
|
||||||
//
|
|
||||||
// xBestIndex decides which ones it wants by setting argvIndex and whether the
|
|
||||||
// core layer may omit them (omit = true).
|
|
||||||
// We then materialise the RHS/LHS into registers before issuing VFilter.
|
|
||||||
let converted_constraints = predicates
|
|
||||||
.iter()
|
|
||||||
.enumerate()
|
|
||||||
.filter(|(_, p)| p.should_eval_at_loop(join_index, join_order))
|
|
||||||
.filter_map(|(i, p)| {
|
|
||||||
// Build ConstraintInfo from the predicates
|
|
||||||
convert_where_to_vtab_constraint(
|
|
||||||
p,
|
|
||||||
joined_table_index,
|
|
||||||
i,
|
|
||||||
join_order,
|
|
||||||
)
|
|
||||||
.unwrap_or(None)
|
|
||||||
})
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
// TODO: get proper order_by information to pass to the vtab.
|
|
||||||
// maybe encode more info on t_ctx? we need: [col_idx, is_descending]
|
|
||||||
let index_info = vtab.best_index(&converted_constraints, &[])?;
|
|
||||||
|
|
||||||
if index_info.constraint_usages.len() != converted_constraints.len() {
|
|
||||||
return Err(LimboError::ExtensionError(format!(
|
|
||||||
"Constraint usage count mismatch (expected {}, got {})",
|
|
||||||
converted_constraints.len(),
|
|
||||||
index_info.constraint_usages.len()
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Determine the number of VFilter arguments (constraints with an argv_index).
|
|
||||||
let args_needed = count_and_validate_vtab_filter_args(&index_info)?;
|
|
||||||
let start_reg = program.alloc_registers(args_needed);
|
let start_reg = program.alloc_registers(args_needed);
|
||||||
|
|
||||||
// For each constraint used by best_index, translate the opposite side.
|
for (argv_index, expr) in constraints.iter().enumerate() {
|
||||||
for (i, usage) in index_info.constraint_usages.iter().enumerate() {
|
let target_reg = start_reg + argv_index;
|
||||||
if let Some(argv_index) = usage.argv_index {
|
translate_expr(
|
||||||
if let Some(cinfo) = converted_constraints.get(i) {
|
program,
|
||||||
let (pred_idx, is_rhs) = cinfo.unpack_plan_info();
|
Some(table_references),
|
||||||
if let ast::Expr::Binary(lhs, _, rhs) =
|
expr,
|
||||||
&predicates[pred_idx].expr
|
target_reg,
|
||||||
{
|
&t_ctx.resolver,
|
||||||
// translate the opposite side of the referenced vtab column
|
)?;
|
||||||
let expr = if is_rhs { lhs } else { rhs };
|
|
||||||
// argv_index is 1-based; adjust to get the proper register offset.
|
|
||||||
let target_reg = start_reg + (argv_index - 1) as usize;
|
|
||||||
translate_expr(
|
|
||||||
program,
|
|
||||||
Some(table_references),
|
|
||||||
expr,
|
|
||||||
target_reg,
|
|
||||||
&t_ctx.resolver,
|
|
||||||
)?;
|
|
||||||
if cinfo.usable && usage.omit {
|
|
||||||
predicates[pred_idx].consumed.set(true);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// If best_index provided an idx_str, translate it.
|
// If best_index provided an idx_str, translate it.
|
||||||
let maybe_idx_str = if let Some(idx_str) = index_info.idx_str {
|
let maybe_idx_str = if let Some(idx_str) = idx_str {
|
||||||
let reg = program.alloc_register();
|
let reg = program.alloc_register();
|
||||||
program.emit_insn(Insn::String8 {
|
program.emit_insn(Insn::String8 {
|
||||||
dest: reg,
|
dest: reg,
|
||||||
value: idx_str,
|
value: idx_str.to_owned(),
|
||||||
});
|
});
|
||||||
Some(reg)
|
Some(reg)
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
};
|
};
|
||||||
(
|
(start_reg, args_needed, maybe_idx_str, Some(*idx_num))
|
||||||
start_reg,
|
|
||||||
args_needed,
|
|
||||||
maybe_idx_str,
|
|
||||||
Some(index_info.idx_num),
|
|
||||||
)
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// Emit VFilter with the computed arguments.
|
// Emit VFilter with the computed arguments.
|
||||||
@@ -755,44 +699,6 @@ pub fn open_loop(
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn count_and_validate_vtab_filter_args(index_info: &IndexInfo) -> Result<usize> {
|
|
||||||
let mut args_needed = 0;
|
|
||||||
let mut used_indices = HashSet::new();
|
|
||||||
|
|
||||||
for usage in &index_info.constraint_usages {
|
|
||||||
if let Some(argv_index) = usage.argv_index {
|
|
||||||
if argv_index < 1 {
|
|
||||||
return Err(LimboError::ExtensionError(format!(
|
|
||||||
"argv_index must be >= 1, got {argv_index}"
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
if argv_index > index_info.constraint_usages.len() as u32 {
|
|
||||||
return Err(LimboError::ExtensionError(format!(
|
|
||||||
"argv_index {} exceeds constraint count {}",
|
|
||||||
argv_index,
|
|
||||||
index_info.constraint_usages.len()
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
if !used_indices.insert(argv_index) {
|
|
||||||
return Err(LimboError::ExtensionError(format!(
|
|
||||||
"duplicate argv_index {argv_index}"
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
args_needed += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Verify that used indices form a contiguous sequence starting from 1
|
|
||||||
for i in 1..=args_needed as u32 {
|
|
||||||
if !used_indices.contains(&i) {
|
|
||||||
return Err(LimboError::ExtensionError(format!(
|
|
||||||
"argv_index values must form contiguous sequence starting from 1, missing index {i}"
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(args_needed)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// SQLite (and so Limbo) processes joins as a nested loop.
|
/// SQLite (and so Limbo) processes joins as a nested loop.
|
||||||
/// The loop may emit rows to various destinations depending on the query:
|
/// The loop may emit rows to various destinations depending on the query:
|
||||||
/// - a GROUP BY sorter (grouping is done by sorting based on the GROUP BY keys and aggregating while the GROUP BY keys match)
|
/// - a GROUP BY sorter (grouping is done by sorting based on the GROUP BY keys and aggregating while the GROUP BY keys match)
|
||||||
@@ -1089,7 +995,7 @@ pub fn close_loop(
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Scan::VirtualTable => {
|
Scan::VirtualTable { .. } => {
|
||||||
program.emit_insn(Insn::VNext {
|
program.emit_insn(Insn::VNext {
|
||||||
cursor_id: table_cursor_id
|
cursor_id: table_cursor_id
|
||||||
.expect("Virtual tables do not support covering indexes"),
|
.expect("Virtual tables do not support covering indexes"),
|
||||||
|
|||||||
@@ -61,7 +61,11 @@ i.e. straight from the 70s! The DP algorithm is explained below.
|
|||||||
- `n=3`: for each 2-table subset found, find the best way to join that result to each other table. Memoize the result.
|
- `n=3`: for each 2-table subset found, find the best way to join that result to each other table. Memoize the result.
|
||||||
- `n=m`: for each `m-1` table subset found, find the best way to join that result to the `m'th` table
|
- `n=m`: for each `m-1` table subset found, find the best way to join that result to the `m'th` table
|
||||||
- **Use pruning to reduce search space:**
|
- **Use pruning to reduce search space:**
|
||||||
- Compute the literal query order first, and store its _cost_ as an upper threshold
|
- Compute the literal query order first, and store its _cost_ as an upper threshold.
|
||||||
|
In some cases it is not possible to compute this upper threshold from the literal order—for example, when
|
||||||
|
table-valued functions are involved and their arguments reference tables that appear to the right in the join order.
|
||||||
|
In such situations, the literal order cannot be executed directly, so no meaningful _cost_ can be assigned.
|
||||||
|
In these cases, the threshold is set to infinity, ensuring that valid plans are still considered.
|
||||||
- If at any point a considered join order exceeds the upper threshold, discard that search path since it cannot be better than the current best.
|
- If at any point a considered join order exceeds the upper threshold, discard that search path since it cannot be better than the current best.
|
||||||
- For example, we have `SELECT * FROM a JOIN b JOIN c JOIN d`. Compute `JOIN(a,b,c,d)` first. If `JOIN (b,a)` is already worse than `JOIN(a,b,c,d)`, we don't have to even try `JOIN(b,a,c)`.
|
- For example, we have `SELECT * FROM a JOIN b JOIN c JOIN d`. Compute `JOIN(a,b,c,d)` first. If `JOIN (b,a)` is already worse than `JOIN(a,b,c,d)`, we don't have to even try `JOIN(b,a,c)`.
|
||||||
- Also keep track of the best plan per _subset_:
|
- Also keep track of the best plan per _subset_:
|
||||||
|
|||||||
@@ -1,11 +1,14 @@
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use turso_ext::{ConstraintInfo, ConstraintUsage};
|
||||||
use turso_sqlite3_parser::ast::SortOrder;
|
use turso_sqlite3_parser::ast::SortOrder;
|
||||||
|
|
||||||
|
use crate::translate::optimizer::constraints::{convert_to_vtab_constraint, Constraint};
|
||||||
use crate::{
|
use crate::{
|
||||||
schema::{Index, Table},
|
schema::{Index, Table},
|
||||||
translate::plan::{IterationDirection, JoinOrderMember, JoinedTable},
|
translate::plan::{IterationDirection, JoinOrderMember, JoinedTable},
|
||||||
Result,
|
vtab::VirtualTable,
|
||||||
|
LimboError, Result,
|
||||||
};
|
};
|
||||||
|
|
||||||
use super::{
|
use super::{
|
||||||
@@ -38,7 +41,18 @@ pub enum AccessMethodParams<'a> {
|
|||||||
/// a non-empty list means a search.
|
/// a non-empty list means a search.
|
||||||
constraint_refs: &'a [ConstraintRef],
|
constraint_refs: &'a [ConstraintRef],
|
||||||
},
|
},
|
||||||
VirtualTable,
|
VirtualTable {
|
||||||
|
/// Index identifier returned by the table's `best_index` method.
|
||||||
|
idx_num: i32,
|
||||||
|
/// Optional index string returned by the table's `best_index` method.
|
||||||
|
idx_str: Option<String>,
|
||||||
|
/// Constraint descriptors passed to the virtual table’s `filter` method.
|
||||||
|
/// Each corresponds to a column/operator pair from the WHERE clause.
|
||||||
|
constraints: Vec<ConstraintInfo>,
|
||||||
|
/// Information returned by the virtual table's `best_index` method
|
||||||
|
/// describing how each constraint will be used.
|
||||||
|
constraint_usages: Vec<ConstraintUsage>,
|
||||||
|
},
|
||||||
Subquery,
|
Subquery,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -58,10 +72,12 @@ pub fn find_best_access_method_for_join_order<'a>(
|
|||||||
maybe_order_target,
|
maybe_order_target,
|
||||||
input_cardinality,
|
input_cardinality,
|
||||||
),
|
),
|
||||||
Table::Virtual(_) => Ok(AccessMethod {
|
Table::Virtual(vtab) => find_best_access_method_for_vtab(
|
||||||
cost: estimate_cost_for_scan_or_seek(None, &[], &[], input_cardinality),
|
vtab,
|
||||||
params: AccessMethodParams::VirtualTable,
|
&rhs_constraints.constraints,
|
||||||
}),
|
join_order,
|
||||||
|
input_cardinality,
|
||||||
|
),
|
||||||
Table::FromClauseSubquery(_) => Ok(AccessMethod {
|
Table::FromClauseSubquery(_) => Ok(AccessMethod {
|
||||||
cost: estimate_cost_for_scan_or_seek(None, &[], &[], input_cardinality),
|
cost: estimate_cost_for_scan_or_seek(None, &[], &[], input_cardinality),
|
||||||
params: AccessMethodParams::Subquery,
|
params: AccessMethodParams::Subquery,
|
||||||
@@ -175,3 +191,32 @@ fn find_best_access_method_for_btree<'a>(
|
|||||||
params: best_params,
|
params: best_params,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn find_best_access_method_for_vtab<'a>(
|
||||||
|
vtab: &VirtualTable,
|
||||||
|
constraints: &[Constraint],
|
||||||
|
join_order: &[JoinOrderMember],
|
||||||
|
input_cardinality: f64,
|
||||||
|
) -> Result<AccessMethod<'a>> {
|
||||||
|
let vtab_constraints = convert_to_vtab_constraint(constraints, join_order);
|
||||||
|
|
||||||
|
// TODO: get proper order_by information to pass to the vtab.
|
||||||
|
// maybe encode more info on t_ctx? we need: [col_idx , is_descending]
|
||||||
|
let best_index_result = vtab.best_index(&vtab_constraints, &[]);
|
||||||
|
|
||||||
|
match best_index_result {
|
||||||
|
Ok(index_info) => {
|
||||||
|
Ok(AccessMethod {
|
||||||
|
// TODO: Base cost on `IndexInfo::estimated_cost` and output cardinality on `IndexInfo::estimated_rows`
|
||||||
|
cost: estimate_cost_for_scan_or_seek(None, &[], &[], input_cardinality),
|
||||||
|
params: AccessMethodParams::VirtualTable {
|
||||||
|
idx_num: index_info.idx_num,
|
||||||
|
idx_str: index_info.idx_str,
|
||||||
|
constraints: vtab_constraints,
|
||||||
|
constraint_usages: index_info.constraint_usages,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
Err(e) => Err(LimboError::from(e)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ use crate::{
|
|||||||
},
|
},
|
||||||
Result,
|
Result,
|
||||||
};
|
};
|
||||||
|
use turso_ext::{ConstraintInfo, ConstraintOp};
|
||||||
use turso_sqlite3_parser::ast::{self, SortOrder, TableInternalId};
|
use turso_sqlite3_parser::ast::{self, SortOrder, TableInternalId};
|
||||||
|
|
||||||
use super::cost::ESTIMATED_HARDCODED_ROWS_PER_TABLE;
|
use super::cost::ESTIMATED_HARDCODED_ROWS_PER_TABLE;
|
||||||
@@ -402,6 +403,48 @@ pub fn usable_constraints_for_join_order<'a>(
|
|||||||
&refs[..usable_until]
|
&refs[..usable_until]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn convert_to_vtab_constraint(
|
||||||
|
constraints: &[Constraint],
|
||||||
|
join_order: &[JoinOrderMember],
|
||||||
|
) -> Vec<ConstraintInfo> {
|
||||||
|
let table_idx = join_order.last().unwrap().original_idx;
|
||||||
|
let lhs_mask = TableMask::from_table_number_iter(
|
||||||
|
join_order
|
||||||
|
.iter()
|
||||||
|
.take(join_order.len() - 1)
|
||||||
|
.map(|j| j.original_idx),
|
||||||
|
);
|
||||||
|
constraints
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.filter_map(|(i, constraint)| {
|
||||||
|
let other_side_refers_to_self = constraint.lhs_mask.contains_table(table_idx);
|
||||||
|
if other_side_refers_to_self {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
let all_required_tables_are_on_left_side = lhs_mask.contains_all(&constraint.lhs_mask);
|
||||||
|
to_ext_constraint_op(&constraint.operator).map(|op| ConstraintInfo {
|
||||||
|
column_index: constraint.table_col_pos as u32,
|
||||||
|
op,
|
||||||
|
usable: all_required_tables_are_on_left_side,
|
||||||
|
plan_info: ConstraintInfo::pack_plan_info(i as u32, true),
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn to_ext_constraint_op(op: &ast::Operator) -> Option<ConstraintOp> {
|
||||||
|
match op {
|
||||||
|
ast::Operator::Equals => Some(ConstraintOp::Eq),
|
||||||
|
ast::Operator::Less => Some(ConstraintOp::Lt),
|
||||||
|
ast::Operator::LessEquals => Some(ConstraintOp::Le),
|
||||||
|
ast::Operator::Greater => Some(ConstraintOp::Gt),
|
||||||
|
ast::Operator::GreaterEquals => Some(ConstraintOp::Ge),
|
||||||
|
ast::Operator::NotEquals => Some(ConstraintOp::Ne),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn opposite_cmp_op(op: ast::Operator) -> ast::Operator {
|
fn opposite_cmp_op(op: ast::Operator) -> ast::Operator {
|
||||||
match op {
|
match op {
|
||||||
ast::Operator::Equals => ast::Operator::Equals,
|
ast::Operator::Equals => ast::Operator::Equals,
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ use cost::Cost;
|
|||||||
use join::{compute_best_join_order, BestJoinOrderResult};
|
use join::{compute_best_join_order, BestJoinOrderResult};
|
||||||
use lift_common_subexpressions::lift_common_subexpressions_from_binary_or_terms;
|
use lift_common_subexpressions::lift_common_subexpressions_from_binary_or_terms;
|
||||||
use order::{compute_order_target, plan_satisfies_order_target, EliminatesSortBy};
|
use order::{compute_order_target, plan_satisfies_order_target, EliminatesSortBy};
|
||||||
|
use turso_ext::{ConstraintInfo, ConstraintUsage};
|
||||||
use turso_sqlite3_parser::ast::{self, fmt::ToTokens as _, Expr, SortOrder};
|
use turso_sqlite3_parser::ast::{self, fmt::ToTokens as _, Expr, SortOrder};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
@@ -14,10 +15,11 @@ use crate::{
|
|||||||
schema::{Index, IndexColumn, Schema, Table},
|
schema::{Index, IndexColumn, Schema, Table},
|
||||||
translate::{
|
translate::{
|
||||||
expr::is_double_quoted_identifier, expr::walk_expr_mut,
|
expr::is_double_quoted_identifier, expr::walk_expr_mut,
|
||||||
optimizer::access_method::AccessMethodParams, plan::Scan, plan::TerminationKey,
|
optimizer::access_method::AccessMethodParams, optimizer::constraints::TableConstraints,
|
||||||
|
plan::Scan, plan::TerminationKey,
|
||||||
},
|
},
|
||||||
types::SeekOp,
|
types::SeekOp,
|
||||||
Result,
|
LimboError, Result,
|
||||||
};
|
};
|
||||||
|
|
||||||
use super::{
|
use super::{
|
||||||
@@ -359,8 +361,20 @@ fn optimize_table_access(
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
AccessMethodParams::VirtualTable => {
|
AccessMethodParams::VirtualTable {
|
||||||
joined_tables[table_idx].op = Operation::Scan(Scan::VirtualTable);
|
idx_num,
|
||||||
|
idx_str,
|
||||||
|
constraints,
|
||||||
|
constraint_usages,
|
||||||
|
} => {
|
||||||
|
joined_tables[table_idx].op = build_vtab_scan_op(
|
||||||
|
where_clause,
|
||||||
|
&constraints_per_table[table_idx],
|
||||||
|
idx_num,
|
||||||
|
idx_str,
|
||||||
|
constraints,
|
||||||
|
constraint_usages,
|
||||||
|
)?;
|
||||||
}
|
}
|
||||||
AccessMethodParams::Subquery => {
|
AccessMethodParams::Subquery => {
|
||||||
joined_tables[table_idx].op = Operation::Scan(Scan::Subquery);
|
joined_tables[table_idx].op = Operation::Scan(Scan::Subquery);
|
||||||
@@ -371,6 +385,80 @@ fn optimize_table_access(
|
|||||||
Ok(Some(best_join_order))
|
Ok(Some(best_join_order))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn build_vtab_scan_op(
|
||||||
|
where_clause: &mut [WhereTerm],
|
||||||
|
table_constraints: &TableConstraints,
|
||||||
|
idx_num: &i32,
|
||||||
|
idx_str: &Option<String>,
|
||||||
|
vtab_constraints: &[ConstraintInfo],
|
||||||
|
constraint_usages: &[ConstraintUsage],
|
||||||
|
) -> Result<Operation> {
|
||||||
|
if constraint_usages.len() != vtab_constraints.len() {
|
||||||
|
return Err(LimboError::ExtensionError(format!(
|
||||||
|
"Constraint usage count mismatch (expected {}, got {})",
|
||||||
|
vtab_constraints.len(),
|
||||||
|
constraint_usages.len()
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut constraints = vec![None; constraint_usages.len()];
|
||||||
|
let mut arg_count = 0;
|
||||||
|
|
||||||
|
for (i, vtab_constraint) in vtab_constraints.iter().enumerate() {
|
||||||
|
let usage = constraint_usages[i];
|
||||||
|
let argv_index = match usage.argv_index {
|
||||||
|
Some(idx) if idx >= 1 && (idx as usize) <= constraint_usages.len() => idx,
|
||||||
|
Some(idx) => {
|
||||||
|
return Err(LimboError::ExtensionError(format!(
|
||||||
|
"argv_index {} is out of valid range [1..{}]",
|
||||||
|
idx,
|
||||||
|
constraint_usages.len()
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
None => continue,
|
||||||
|
};
|
||||||
|
|
||||||
|
let zero_based_argv_index = (argv_index - 1) as usize;
|
||||||
|
if constraints[zero_based_argv_index].is_some() {
|
||||||
|
return Err(LimboError::ExtensionError(format!(
|
||||||
|
"duplicate argv_index {argv_index}"
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
let (pred_idx, _) = vtab_constraint.unpack_plan_info();
|
||||||
|
let constraint = &table_constraints.constraints[pred_idx];
|
||||||
|
if usage.omit {
|
||||||
|
where_clause[constraint.where_clause_pos.0]
|
||||||
|
.consumed
|
||||||
|
.set(true);
|
||||||
|
}
|
||||||
|
let expr = constraint.get_constraining_expr(where_clause);
|
||||||
|
constraints[zero_based_argv_index] = Some(expr);
|
||||||
|
arg_count += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify that used indices form a contiguous sequence starting from 1
|
||||||
|
let constraints = constraints
|
||||||
|
.into_iter()
|
||||||
|
.take(arg_count)
|
||||||
|
.enumerate()
|
||||||
|
.map(|(i, c)| {
|
||||||
|
c.ok_or_else(|| {
|
||||||
|
LimboError::ExtensionError(format!(
|
||||||
|
"argv_index values must form contiguous sequence starting from 1, missing index {}",
|
||||||
|
i + 1
|
||||||
|
))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect::<Result<Vec<_>>>()?;
|
||||||
|
|
||||||
|
Ok(Operation::Scan(Scan::VirtualTable {
|
||||||
|
idx_num: *idx_num,
|
||||||
|
idx_str: idx_str.clone(),
|
||||||
|
constraints,
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Clone)]
|
#[derive(Debug, PartialEq, Clone)]
|
||||||
enum ConstantConditionEliminationResult {
|
enum ConstantConditionEliminationResult {
|
||||||
Continue,
|
Continue,
|
||||||
|
|||||||
@@ -233,7 +233,7 @@ pub fn plan_satisfies_order_target(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
AccessMethodParams::VirtualTable => return false,
|
AccessMethodParams::VirtualTable { .. } => return false,
|
||||||
AccessMethodParams::Subquery => return false,
|
AccessMethodParams::Subquery => return false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
use std::{cell::Cell, cmp::Ordering, sync::Arc};
|
use std::{cell::Cell, cmp::Ordering, sync::Arc};
|
||||||
use turso_ext::{ConstraintInfo, ConstraintOp};
|
|
||||||
use turso_sqlite3_parser::ast::{self, SortOrder};
|
use turso_sqlite3_parser::ast::{self, SortOrder};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
@@ -12,7 +11,7 @@ use crate::{
|
|||||||
},
|
},
|
||||||
Result, VirtualTable,
|
Result, VirtualTable,
|
||||||
};
|
};
|
||||||
use crate::{schema::Type, types::SeekOp, util::can_pushdown_predicate};
|
use crate::{schema::Type, types::SeekOp};
|
||||||
|
|
||||||
use turso_sqlite3_parser::ast::TableInternalId;
|
use turso_sqlite3_parser::ast::TableInternalId;
|
||||||
|
|
||||||
@@ -118,142 +117,8 @@ impl WhereTerm {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
use crate::ast::{Expr, Operator};
|
use crate::ast::Expr;
|
||||||
|
|
||||||
// This function takes an operator and returns the operator you would obtain if the operands were swapped.
|
|
||||||
// e.g. "literal < column"
|
|
||||||
// which is not the canonical order for constraint pushdown.
|
|
||||||
// This function will return > so that the expression can be treated as if it were written "column > literal"
|
|
||||||
fn reverse_operator(op: &Operator) -> Option<Operator> {
|
|
||||||
match op {
|
|
||||||
Operator::Equals => Some(Operator::Equals),
|
|
||||||
Operator::Less => Some(Operator::Greater),
|
|
||||||
Operator::LessEquals => Some(Operator::GreaterEquals),
|
|
||||||
Operator::Greater => Some(Operator::Less),
|
|
||||||
Operator::GreaterEquals => Some(Operator::LessEquals),
|
|
||||||
Operator::NotEquals => Some(Operator::NotEquals),
|
|
||||||
Operator::Is => Some(Operator::Is),
|
|
||||||
Operator::IsNot => Some(Operator::IsNot),
|
|
||||||
_ => None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn to_ext_constraint_op(op: &Operator) -> Option<ConstraintOp> {
|
|
||||||
match op {
|
|
||||||
Operator::Equals => Some(ConstraintOp::Eq),
|
|
||||||
Operator::Less => Some(ConstraintOp::Lt),
|
|
||||||
Operator::LessEquals => Some(ConstraintOp::Le),
|
|
||||||
Operator::Greater => Some(ConstraintOp::Gt),
|
|
||||||
Operator::GreaterEquals => Some(ConstraintOp::Ge),
|
|
||||||
Operator::NotEquals => Some(ConstraintOp::Ne),
|
|
||||||
_ => None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// This function takes a WhereTerm for a select involving a VTab at index 'table_index'.
|
|
||||||
/// It determines whether or not it involves the given table and whether or not it can
|
|
||||||
/// be converted into a ConstraintInfo which can be passed to the vtab module's xBestIndex
|
|
||||||
/// method, which will possibly calculate some information to improve the query plan, that we can send
|
|
||||||
/// back to it as arguments for the VFilter operation.
|
|
||||||
/// is going to be filtered against: e.g:
|
|
||||||
/// 'SELECT key, value FROM vtab WHERE key = 'some_key';
|
|
||||||
/// we need to send the Value('some_key') as an argument to VFilter, and possibly omit it from
|
|
||||||
/// the filtration in the vdbe layer.
|
|
||||||
pub fn convert_where_to_vtab_constraint(
|
|
||||||
term: &WhereTerm,
|
|
||||||
table_idx: usize,
|
|
||||||
pred_idx: usize,
|
|
||||||
join_order: &[JoinOrderMember],
|
|
||||||
) -> Result<Option<ConstraintInfo>> {
|
|
||||||
if term.from_outer_join.is_some() {
|
|
||||||
return Ok(None);
|
|
||||||
}
|
|
||||||
let Expr::Binary(lhs, op, rhs) = &term.expr else {
|
|
||||||
return Ok(None);
|
|
||||||
};
|
|
||||||
let expr_is_ready =
|
|
||||||
|e: &Expr| -> Result<bool> { can_pushdown_predicate(e, table_idx, join_order) };
|
|
||||||
let (vcol_idx, op_for_vtab, usable, is_rhs) = match (&**lhs, &**rhs) {
|
|
||||||
(
|
|
||||||
Expr::Column {
|
|
||||||
table: tbl_l,
|
|
||||||
column: col_l,
|
|
||||||
..
|
|
||||||
},
|
|
||||||
Expr::Column {
|
|
||||||
table: tbl_r,
|
|
||||||
column: col_r,
|
|
||||||
..
|
|
||||||
},
|
|
||||||
) => {
|
|
||||||
// one side must be the virtual table
|
|
||||||
let tbl_l_idx = join_order
|
|
||||||
.iter()
|
|
||||||
.position(|j| j.table_id == *tbl_l)
|
|
||||||
.unwrap();
|
|
||||||
let tbl_r_idx = join_order
|
|
||||||
.iter()
|
|
||||||
.position(|j| j.table_id == *tbl_r)
|
|
||||||
.unwrap();
|
|
||||||
let vtab_on_l = tbl_l_idx == table_idx;
|
|
||||||
let vtab_on_r = tbl_r_idx == table_idx;
|
|
||||||
if vtab_on_l == vtab_on_r {
|
|
||||||
return Ok(None); // either both or none -> not convertible
|
|
||||||
}
|
|
||||||
|
|
||||||
if vtab_on_l {
|
|
||||||
// vtab on left side: operator unchanged
|
|
||||||
let usable = tbl_r_idx < table_idx; // usable if the other table is already positioned
|
|
||||||
(col_l, op, usable, false)
|
|
||||||
} else {
|
|
||||||
// vtab on right side of the expr: reverse operator
|
|
||||||
let usable = tbl_l_idx < table_idx;
|
|
||||||
(col_r, &reverse_operator(op).unwrap_or(*op), usable, true)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
(Expr::Column { table, column, .. }, other)
|
|
||||||
if join_order
|
|
||||||
.iter()
|
|
||||||
.position(|j| j.table_id == *table)
|
|
||||||
.unwrap()
|
|
||||||
== table_idx =>
|
|
||||||
{
|
|
||||||
(
|
|
||||||
column,
|
|
||||||
op,
|
|
||||||
expr_is_ready(other)?, // literal / earlier‑table / deterministic func ?
|
|
||||||
false,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
(other, Expr::Column { table, column, .. })
|
|
||||||
if join_order
|
|
||||||
.iter()
|
|
||||||
.position(|j| j.table_id == *table)
|
|
||||||
.unwrap()
|
|
||||||
== table_idx =>
|
|
||||||
{
|
|
||||||
(
|
|
||||||
column,
|
|
||||||
&reverse_operator(op).unwrap_or(*op),
|
|
||||||
expr_is_ready(other)?,
|
|
||||||
true,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
_ => return Ok(None), // does not involve the virtual table at all
|
|
||||||
};
|
|
||||||
|
|
||||||
let Some(op) = to_ext_constraint_op(op_for_vtab) else {
|
|
||||||
return Ok(None);
|
|
||||||
};
|
|
||||||
|
|
||||||
Ok(Some(ConstraintInfo {
|
|
||||||
column_index: *vcol_idx as u32,
|
|
||||||
op,
|
|
||||||
usable,
|
|
||||||
plan_info: ConstraintInfo::pack_plan_info(pred_idx as u32, is_rhs),
|
|
||||||
}))
|
|
||||||
}
|
|
||||||
/// The loop index where to evaluate the condition.
|
/// The loop index where to evaluate the condition.
|
||||||
/// For example, in `SELECT * FROM u JOIN p WHERE u.id = 5`, the condition can already be evaluated at the first loop (idx 0),
|
/// For example, in `SELECT * FROM u JOIN p WHERE u.id = 5`, the condition can already be evaluated at the first loop (idx 0),
|
||||||
/// because that is the rightmost table that it references.
|
/// because that is the rightmost table that it references.
|
||||||
@@ -877,7 +742,11 @@ impl Operation {
|
|||||||
iter_dir: IterationDirection::Forwards,
|
iter_dir: IterationDirection::Forwards,
|
||||||
index: None,
|
index: None,
|
||||||
}),
|
}),
|
||||||
Table::Virtual(_) => Operation::Scan(Scan::VirtualTable),
|
Table::Virtual(_) => Operation::Scan(Scan::VirtualTable {
|
||||||
|
idx_num: -1,
|
||||||
|
idx_str: None,
|
||||||
|
constraints: Vec::new(),
|
||||||
|
}),
|
||||||
Table::FromClauseSubquery(_) => Operation::Scan(Scan::Subquery),
|
Table::FromClauseSubquery(_) => Operation::Scan(Scan::Subquery),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1125,7 +994,15 @@ pub enum Scan {
|
|||||||
index: Option<Arc<Index>>,
|
index: Option<Arc<Index>>,
|
||||||
},
|
},
|
||||||
/// A scan of a virtual table, delegated to the table’s `filter` and related methods.
|
/// A scan of a virtual table, delegated to the table’s `filter` and related methods.
|
||||||
VirtualTable,
|
VirtualTable {
|
||||||
|
/// Index identifier returned by the table's `best_index` method.
|
||||||
|
idx_num: i32,
|
||||||
|
/// Optional index name returned by the table’s `best_index` method.
|
||||||
|
idx_str: Option<String>,
|
||||||
|
/// Constraining expressions to be passed to the table’s `filter` method.
|
||||||
|
/// The order of expressions matches the argument order expected by the virtual table.
|
||||||
|
constraints: Vec<Expr>,
|
||||||
|
},
|
||||||
/// A scan of a subquery in the `FROM` clause.
|
/// A scan of a subquery in the `FROM` clause.
|
||||||
Subquery,
|
Subquery,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -370,7 +370,7 @@ fn build_scan_op(table: &Table, iter_dir: IterationDirection) -> Operation {
|
|||||||
iter_dir,
|
iter_dir,
|
||||||
index: None,
|
index: None,
|
||||||
}),
|
}),
|
||||||
Table::Virtual(_) => Operation::Scan(Scan::VirtualTable),
|
Table::Virtual(_) => Operation::default_scan_for(table),
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -146,6 +146,19 @@ pub trait VTable {
|
|||||||
fn destroy(&mut self) -> Result<(), Self::Error> {
|
fn destroy(&mut self) -> Result<(), Self::Error> {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// The query planner may call this method multiple times during optimization, exploring
|
||||||
|
/// different join orders. Each call asks the virtual table which constraints (WHERE clause
|
||||||
|
/// terms) it can efficiently handle. Based on the incoming `ConstraintInfo`s, the virtual table
|
||||||
|
/// should decide:
|
||||||
|
/// - which constraints it can consume (`ConstraintUsage`),
|
||||||
|
/// - how they map to arguments passed into `filter`,
|
||||||
|
/// - and return an `IndexInfo` describing the resulting plan.
|
||||||
|
///
|
||||||
|
/// The return value’s `idx_num`, `idx_str`, and `constraint_usages` are later passed back to
|
||||||
|
/// the virtual table’s `filter` method if the chosen plan is selected for execution. There is
|
||||||
|
/// no guarantee that `filter` will ever be called — many `best_index` candidates are discarded
|
||||||
|
/// during planning.
|
||||||
fn best_index(
|
fn best_index(
|
||||||
_constraints: &[ConstraintInfo],
|
_constraints: &[ConstraintInfo],
|
||||||
_order_by: &[OrderByInfo],
|
_order_by: &[OrderByInfo],
|
||||||
|
|||||||
@@ -320,6 +320,11 @@ def _test_series(limbo: TestTursoShell):
|
|||||||
"SELECT * FROM generate_series WHERE start = 1 AND stop = 10;",
|
"SELECT * FROM generate_series WHERE start = 1 AND stop = 10;",
|
||||||
lambda res: res == "1\n2\n3\n4\n5\n6\n7\n8\n9\n10",
|
lambda res: res == "1\n2\n3\n4\n5\n6\n7\n8\n9\n10",
|
||||||
)
|
)
|
||||||
|
limbo.run_test_fn(
|
||||||
|
"SELECT * FROM generate_series WHERE 1 = start AND 10 = stop;",
|
||||||
|
lambda res: res == "1\n2\n3\n4\n5\n6\n7\n8\n9\n10",
|
||||||
|
"Constraint with column on RHS used as TVF arg"
|
||||||
|
)
|
||||||
limbo.run_test_fn(
|
limbo.run_test_fn(
|
||||||
"SELECT * FROM generate_series WHERE stop = 10 AND start = 1;",
|
"SELECT * FROM generate_series WHERE stop = 10 AND start = 1;",
|
||||||
lambda res: res == "1\n2\n3\n4\n5\n6\n7\n8\n9\n10",
|
lambda res: res == "1\n2\n3\n4\n5\n6\n7\n8\n9\n10",
|
||||||
|
|||||||
Reference in New Issue
Block a user