Support column references in table-valued function arguments

This change extends table-valued function support by allowing arguments
to be column references, not only literals.

Virtual tables can now reject a plan by returning an error from
best_index (e.g., when a TVF argument references a table that appears
later in the join order). The planner using this information excludes
invalid plans during join order search.
This commit is contained in:
Piotr Rzysko
2025-08-03 09:46:02 +02:00
parent 82491ceb6a
commit 99f87c07c1
10 changed files with 172 additions and 59 deletions

View File

@@ -178,13 +178,16 @@ impl PragmaVirtualTable {
let mut arg1_idx = None;
for (i, c) in constraints.iter().enumerate() {
if !c.usable || c.op != ConstraintOp::Eq {
if c.op != ConstraintOp::Eq {
continue;
}
let visible_count = self.visible_column_count as u32;
if c.column_index < visible_count {
continue;
}
if !c.usable {
return Err(ResultCode::ConstraintViolation);
}
let hidden_idx = c.column_index - visible_count;
match hidden_idx {
0 => arg0_idx = Some(i),
@@ -444,11 +447,9 @@ mod tests {
plan_info: 0,
}];
let index_info = pragma_vtab.best_index(&constraints).unwrap();
let result = pragma_vtab.best_index(&constraints);
// Verify no argv_index is assigned
assert_eq!(index_info.constraint_usages[0].argv_index, None);
assert!(!index_info.constraint_usages[0].omit);
assert!(matches!(result, Err(ResultCode::ConstraintViolation)));
}
fn usable_constraint(column_index: u32) -> ConstraintInfo {

View File

@@ -71,24 +71,29 @@ impl VTable for GenerateSeriesTable {
let mut idx_num = 0;
let mut positions = [None; 4]; // maps column index to constraint position
let mut start_exists = false;
let mut usable = true;
for (i, c) in constraints.iter().enumerate() {
if c.column_index == START_COLUMN_INDEX && c.op == ConstraintOp::Eq {
start_exists = true;
}
if !c.usable || c.op != ConstraintOp::Eq {
continue;
}
if c.column_index >= START_COLUMN_INDEX && c.column_index <= STEP_COLUMN_INDEX {
let bit = 1 << (c.column_index - 1);
idx_num |= bit;
positions[c.column_index as usize] = Some(i);
if !c.usable {
usable = false;
} else if c.op == ConstraintOp::Eq {
let bit = 1 << (c.column_index - 1);
idx_num |= bit;
positions[c.column_index as usize] = Some(i);
}
}
}
if !start_exists {
return Err(ResultCode::InvalidArgs);
}
if !usable {
return Err(ResultCode::ConstraintViolation);
}
// Assign argv indexes contiguously
let mut argv_idx = 1;
@@ -763,11 +768,9 @@ mod tests {
plan_info: 0,
}];
let index_info = GenerateSeriesTable::best_index(&constraints, &[]).unwrap();
let result = GenerateSeriesTable::best_index(&constraints, &[]);
// Verify no argv_index is assigned
assert_eq!(index_info.constraint_usages[0].argv_index, None);
assert_eq!(index_info.idx_num, 0); // No bits set
assert!(matches!(result, Err(ResultCode::ConstraintViolation)));
}
fn usable_constraint(column_index: u32) -> ConstraintInfo {

View File

@@ -1,6 +1,6 @@
use std::sync::Arc;
use turso_ext::{ConstraintInfo, ConstraintUsage};
use turso_ext::{ConstraintInfo, ConstraintUsage, ResultCode};
use turso_sqlite3_parser::ast::SortOrder;
use crate::translate::optimizer::constraints::{convert_to_vtab_constraint, Constraint};
@@ -63,7 +63,7 @@ pub fn find_best_access_method_for_join_order<'a>(
join_order: &[JoinOrderMember],
maybe_order_target: Option<&OrderTarget>,
input_cardinality: f64,
) -> Result<AccessMethod<'a>> {
) -> Result<Option<AccessMethod<'a>>> {
match &rhs_table.table {
Table::BTree(_) => find_best_access_method_for_btree(
rhs_table,
@@ -78,10 +78,10 @@ pub fn find_best_access_method_for_join_order<'a>(
join_order,
input_cardinality,
),
Table::FromClauseSubquery(_) => Ok(AccessMethod {
Table::FromClauseSubquery(_) => Ok(Some(AccessMethod {
cost: estimate_cost_for_scan_or_seek(None, &[], &[], input_cardinality),
params: AccessMethodParams::Subquery,
}),
})),
}
}
@@ -91,7 +91,7 @@ fn find_best_access_method_for_btree<'a>(
join_order: &[JoinOrderMember],
maybe_order_target: Option<&OrderTarget>,
input_cardinality: f64,
) -> Result<AccessMethod<'a>> {
) -> Result<Option<AccessMethod<'a>>> {
let table_no = join_order.last().unwrap().table_id;
let mut best_cost = estimate_cost_for_scan_or_seek(None, &[], &[], input_cardinality);
let mut best_params = AccessMethodParams::BTreeTable {
@@ -186,10 +186,10 @@ fn find_best_access_method_for_btree<'a>(
}
}
Ok(AccessMethod {
Ok(Some(AccessMethod {
cost: best_cost,
params: best_params,
})
}))
}
fn find_best_access_method_for_vtab<'a>(
@@ -197,7 +197,7 @@ fn find_best_access_method_for_vtab<'a>(
constraints: &[Constraint],
join_order: &[JoinOrderMember],
input_cardinality: f64,
) -> Result<AccessMethod<'a>> {
) -> Result<Option<AccessMethod<'a>>> {
let vtab_constraints = convert_to_vtab_constraint(constraints, join_order);
// TODO: get proper order_by information to pass to the vtab.
@@ -206,7 +206,7 @@ fn find_best_access_method_for_vtab<'a>(
match best_index_result {
Ok(index_info) => {
Ok(AccessMethod {
Ok(Some(AccessMethod {
// TODO: Base cost on `IndexInfo::estimated_cost` and output cardinality on `IndexInfo::estimated_rows`
cost: estimate_cost_for_scan_or_seek(None, &[], &[], input_cardinality),
params: AccessMethodParams::VirtualTable {
@@ -215,8 +215,9 @@ fn find_best_access_method_for_vtab<'a>(
constraints: vtab_constraints,
constraint_usages: index_info.constraint_usages,
},
})
}))
}
Err(ResultCode::ConstraintViolation) => Ok(None),
Err(e) => Err(LimboError::from(e)),
}
}

View File

@@ -42,7 +42,7 @@ impl JoinN {
}
/// Join n-1 tables with the n'th table.
/// Returns None if the plan is worse than the provided cost upper bound.
/// Returns None if the plan is worse than the provided cost upper bound or if no valid access method is found.
pub fn join_lhs_and_rhs<'a>(
lhs: Option<&JoinN>,
rhs_table_reference: &JoinedTable,
@@ -65,6 +65,10 @@ pub fn join_lhs_and_rhs<'a>(
input_cardinality as f64,
)?;
let Some(best_access_method) = best_access_method else {
return Ok(None);
};
let lhs_cost = lhs.map_or(Cost(0.0), |l| l.cost);
let cost = lhs_cost + best_access_method.cost;

View File

@@ -327,7 +327,7 @@ fn parse_from_clause_table(
schema: &Schema,
table: ast::SelectTable,
table_references: &mut TableReferences,
out_where_clause: &mut Vec<WhereTerm>,
vtab_predicates: &mut Vec<Expr>,
ctes: &mut Vec<JoinedTable>,
syms: &SymbolTable,
table_ref_counter: &mut TableRefIdCounter,
@@ -338,7 +338,7 @@ fn parse_from_clause_table(
table_references,
ctes,
table_ref_counter,
out_where_clause,
vtab_predicates,
qualified_name,
maybe_alias,
None,
@@ -379,7 +379,7 @@ fn parse_from_clause_table(
table_references,
ctes,
table_ref_counter,
out_where_clause,
vtab_predicates,
qualified_name,
maybe_alias,
maybe_args,
@@ -394,7 +394,7 @@ fn parse_table(
table_references: &mut TableReferences,
ctes: &mut Vec<JoinedTable>,
table_ref_counter: &mut TableRefIdCounter,
out_where_clause: &mut Vec<WhereTerm>,
vtab_predicates: &mut Vec<Expr>,
qualified_name: QualifiedName,
maybe_alias: Option<As>,
maybe_args: Option<Vec<Expr>>,
@@ -431,7 +431,7 @@ fn parse_table(
transform_args_into_where_terms(
args,
internal_id,
out_where_clause,
vtab_predicates,
table.as_ref(),
)?;
}
@@ -485,7 +485,7 @@ fn parse_table(
fn transform_args_into_where_terms(
args: Vec<Expr>,
internal_id: TableInternalId,
out_where_clause: &mut Vec<WhereTerm>,
predicates: &mut Vec<Expr>,
table: &Table,
) -> Result<()> {
let mut args_iter = args.into_iter();
@@ -497,11 +497,6 @@ fn transform_args_into_where_terms(
hidden_count += 1;
if let Some(arg_expr) = args_iter.next() {
if contains_column_reference(&arg_expr)? {
crate::bail_parse_error!(
"Column references are not supported as table-valued function arguments yet"
);
}
let column_expr = Expr::Column {
database: None,
table: internal_id,
@@ -516,11 +511,7 @@ fn transform_args_into_where_terms(
Box::new(other),
),
};
out_where_clause.push(WhereTerm {
expr,
from_outer_join: None,
consumed: Cell::new(false),
});
predicates.push(expr);
}
}
@@ -536,18 +527,6 @@ fn transform_args_into_where_terms(
Ok(())
}
fn contains_column_reference(top_level_expr: &Expr) -> Result<bool> {
let mut contains = false;
walk_expr(top_level_expr, &mut |expr: &Expr| -> Result<WalkControl> {
match expr {
Expr::Id(_) | Expr::Qualified(_, _) | Expr::Column { .. } => contains = true,
_ => {}
};
Ok(WalkControl::Continue)
})?;
Ok(contains)
}
#[allow(clippy::too_many_arguments)]
pub fn parse_from(
schema: &Schema,
@@ -555,6 +534,7 @@ pub fn parse_from(
syms: &SymbolTable,
with: Option<With>,
out_where_clause: &mut Vec<WhereTerm>,
vtab_predicates: &mut Vec<Expr>,
table_references: &mut TableReferences,
table_ref_counter: &mut TableRefIdCounter,
connection: &Arc<crate::Connection>,
@@ -641,7 +621,7 @@ pub fn parse_from(
schema,
select_owned,
table_references,
out_where_clause,
vtab_predicates,
&mut ctes_as_subqueries,
syms,
table_ref_counter,
@@ -655,6 +635,7 @@ pub fn parse_from(
syms,
&mut ctes_as_subqueries,
out_where_clause,
vtab_predicates,
table_references,
table_ref_counter,
connection,
@@ -871,6 +852,7 @@ fn parse_join(
syms: &SymbolTable,
ctes: &mut Vec<JoinedTable>,
out_where_clause: &mut Vec<WhereTerm>,
vtab_predicates: &mut Vec<Expr>,
table_references: &mut TableReferences,
table_ref_counter: &mut TableRefIdCounter,
connection: &Arc<crate::Connection>,
@@ -885,7 +867,7 @@ fn parse_join(
schema,
table,
table_references,
out_where_clause,
vtab_predicates,
ctes,
syms,
table_ref_counter,

View File

@@ -1,7 +1,7 @@
use super::emitter::{emit_program, TranslateCtx};
use super::plan::{
select_star, Distinctness, JoinOrderMember, Operation, OuterQueryReference, QueryDestination,
Search, TableReferences,
Search, TableReferences, WhereTerm,
};
use crate::function::{AggFunc, ExtFunc, Func};
use crate::schema::Table;
@@ -14,10 +14,11 @@ use crate::translate::planner::{
use crate::util::normalize_ident;
use crate::vdbe::builder::{ProgramBuilderOpts, TableRefIdCounter};
use crate::vdbe::insn::Insn;
use crate::SymbolTable;
use crate::{schema::Schema, vdbe::builder::ProgramBuilder, Result};
use crate::{Connection, SymbolTable};
use std::cell::Cell;
use std::sync::Arc;
use turso_sqlite3_parser::ast::{self, CompoundSelect, SortOrder};
use turso_sqlite3_parser::ast::{self, CompoundSelect, Expr, SortOrder};
use turso_sqlite3_parser::ast::{ResultColumn, SelectInner};
pub struct TranslateSelectResult {
@@ -207,6 +208,7 @@ fn prepare_one_select_plan(
}
let mut where_predicates = vec![];
let mut vtab_predicates = vec![];
let mut table_references = TableReferences::new(vec![], outer_query_refs.to_vec());
@@ -225,6 +227,7 @@ fn prepare_one_select_plan(
syms,
with,
&mut where_predicates,
&mut vtab_predicates,
&mut table_references,
table_ref_counter,
connection,
@@ -521,6 +524,11 @@ fn prepare_one_select_plan(
}
}
// This step can only be performed at this point, because all table references are now available.
// Virtual table predicates may depend on column bindings from tables to the right in the join order,
// so we must wait until the full set of references has been collected.
add_vtab_predicates_to_where_clause(&mut vtab_predicates, &mut plan, connection)?;
// Parse the actual WHERE clause and add its conditions to the plan WHERE clause that already contains the join conditions.
parse_where(
where_clause,
@@ -636,6 +644,29 @@ fn prepare_one_select_plan(
}
}
fn add_vtab_predicates_to_where_clause(
vtab_predicates: &mut Vec<Expr>,
plan: &mut SelectPlan,
connection: &Arc<Connection>,
) -> Result<()> {
for expr in vtab_predicates.iter_mut() {
bind_column_references(
expr,
&mut plan.table_references,
Some(&plan.result_columns),
connection,
)?;
}
for expr in vtab_predicates.drain(..) {
plan.where_clause.push(WhereTerm {
expr,
from_outer_join: None,
consumed: Cell::new(false),
});
}
Ok(())
}
/// Replaces a column number in an ORDER BY or GROUP BY expression with a copy of the column expression.
/// For example, in SELECT u.first_name, count(1) FROM users u GROUP BY 1 ORDER BY 2,
/// the column number 1 is replaced with u.first_name and the column number 2 is replaced with count(1).

View File

@@ -26,6 +26,7 @@ pub enum ResultCode {
Row = 18,
Interrupt = 19,
Busy = 20,
ConstraintViolation = 21,
}
impl ResultCode {
@@ -66,6 +67,7 @@ impl Display for ResultCode {
ResultCode::Row => write!(f, "Row"),
ResultCode::Interrupt => write!(f, "Interrupt"),
ResultCode::Busy => write!(f, "Busy"),
ResultCode::ConstraintViolation => write!(f, "Constraint Violation"),
}
}
}

View File

@@ -159,6 +159,11 @@ pub trait VTable {
/// the virtual tables `filter` method if the chosen plan is selected for execution. There is
/// no guarantee that `filter` will ever be called — many `best_index` candidates are discarded
/// during planning.
///
/// If an error occurs, an appropriate error code is returned. A return value of
/// `ResultCode::ConstraintViolation` from `best_index` is not considered an error. Instead, it
/// indicates that the current configuration of `usable` flags in `ConstraintInfo` cannot
/// produce a valid plan.
fn best_index(
_constraints: &[ConstraintInfo],
_order_by: &[OrderByInfo],

View File

@@ -381,6 +381,56 @@ def _test_series(limbo: TestTursoShell):
"SELECT * FROM target;",
lambda res: res == "1\n2\n3\n4\n5",
)
limbo.run_test_fn(
"SELECT t.id, series.value FROM target t, generate_series(t.id, 3) series;",
lambda res: res == "1|1\n1|2\n1|3\n2|2\n2|3\n3|3",
"Column reference from table on the left used as generate_series argument"
)
limbo.run_test_fn(
"SELECT t.id, series.value FROM generate_series(t.id, 3) series, target t;",
lambda res: res == "1|1\n1|2\n1|3\n2|2\n2|3\n3|3",
"Column reference from table on the right used as generate_series argument"
)
limbo.run_test_fn(
"SELECT one.value, series.value FROM (SELECT 1 AS value) one, generate_series(one.value, 3) series;",
lambda res: res == "1|1\n1|2\n1|3",
"Column reference from scalar subquery (left side)"
)
limbo.run_test_fn(
"SELECT one.value, series.value FROM generate_series(one.value, 3) series, (SELECT 1 AS value) one;",
lambda res: res == "1|1\n1|2\n1|3",
"Column reference from scalar subquery (right side)"
)
limbo.run_test_fn(
"SELECT "
" * "
"FROM "
" generate_series(a.start, a.stop) series "
"NATURAL JOIN "
" (SELECT 1 AS start, 3 AS stop, 2 AS value) a;",
lambda res: res == "2|1|3",
"Natural join where TVF arguments come from column references"
)
limbo.run_test_fn(
"SELECT * FROM generate_series(a.start, a.stop) JOIN (SELECT 1 AS start, 3 AS stop) a USING (start, stop);",
lambda res: res == "1\n2\n3",
"Join USING where TVF arguments come from column references"
)
limbo.run_test_fn(
"SELECT a.value, b.value FROM generate_series(b.value, b.value+1) a JOIN generate_series(1, 2) b;",
lambda res: res == "1|1\n2|1\n2|2\n3|2",
"TVF arguments come from another TVF"
)
limbo.run_test_fn(
"SELECT * FROM generate_series(a.start, a.stop) b, generate_series(b.start, b.stop) a;",
lambda res: "No valid query plan found" in res or "no query solution" in res,
"circular column references between two generate_series"
)
limbo.run_test_fn(
"SELECT * FROM generate_series(b.start, b.stop) b;",
lambda res: "Invalid Argument" in res or 'first argument to "generate_series()" missing or unusable' in res,
"self-reference in generate_series arguments"
)
limbo.quit()

View File

@@ -262,3 +262,37 @@ do_execsql_test_on_specific_db $test_pragma_page_size_db pragma-page-size-set-un
} {1024}
catch {file delete -force $test_pragma_page_size_db}
catch {file delete -force "${test_pragma_page_size_db}-wal"}
do_execsql_test pragma-vtab-join {
SELECT s.name, ti.name FROM sqlite_schema AS s, pragma_table_info(s.name) AS ti;
} {users|id
users|first_name
users|last_name
users|email
users|phone_number
users|address
users|city
users|state
users|zipcode
users|age
products|id
products|name
products|price
}
do_execsql_test pragma-vtab-reversed-join-order {
SELECT s.name, ti.name FROM pragma_table_info(s.name) AS ti, sqlite_schema AS s;
} {users|id
users|first_name
users|last_name
users|email
users|phone_number
users|address
users|city
users|state
users|zipcode
users|age
products|id
products|name
products|price
}