Support column references in table-valued function arguments

This change extends table-valued function support by allowing arguments to be column references, not only literals. Virtual tables can now reject a plan by returning an error from best_index (e.g., when a TVF argument references a table that appears later in the join order). The planner using this information excludes invalid plans during join order search.
2026-02-06 16:54:23 +01:00 · 2025-08-03 09:46:02 +02:00
parent 82491ceb6a
commit 99f87c07c1
10 changed files with 172 additions and 59 deletions
--- a/core/pragma.rs
+++ b/core/pragma.rs
@@ -178,13 +178,16 @@ impl PragmaVirtualTable {
        let mut arg1_idx = None;

        for (i, c) in constraints.iter().enumerate() {
-            if !c.usable || c.op != ConstraintOp::Eq {
+            if c.op != ConstraintOp::Eq {
                continue;
            }
            let visible_count = self.visible_column_count as u32;
            if c.column_index < visible_count {
                continue;
            }
+            if !c.usable {
+                return Err(ResultCode::ConstraintViolation);
+            }
            let hidden_idx = c.column_index - visible_count;
            match hidden_idx {
                0 => arg0_idx = Some(i),
@@ -444,11 +447,9 @@ mod tests {
            plan_info: 0,
        }];

-        let index_info = pragma_vtab.best_index(&constraints).unwrap();
+        let result = pragma_vtab.best_index(&constraints);

-        // Verify no argv_index is assigned
-        assert_eq!(index_info.constraint_usages[0].argv_index, None);
-        assert!(!index_info.constraint_usages[0].omit);
+        assert!(matches!(result, Err(ResultCode::ConstraintViolation)));
    }

    fn usable_constraint(column_index: u32) -> ConstraintInfo {
--- a/core/series.rs
+++ b/core/series.rs
@@ -71,24 +71,29 @@ impl VTable for GenerateSeriesTable {
        let mut idx_num = 0;
        let mut positions = [None; 4]; // maps column index to constraint position
        let mut start_exists = false;
+        let mut usable = true;

        for (i, c) in constraints.iter().enumerate() {
            if c.column_index == START_COLUMN_INDEX && c.op == ConstraintOp::Eq {
                start_exists = true;
            }
-            if !c.usable || c.op != ConstraintOp::Eq {
-                continue;
-            }
            if c.column_index >= START_COLUMN_INDEX && c.column_index <= STEP_COLUMN_INDEX {
-                let bit = 1 << (c.column_index - 1);
-                idx_num |= bit;
-                positions[c.column_index as usize] = Some(i);
+                if !c.usable {
+                    usable = false;
+                } else if c.op == ConstraintOp::Eq {
+                    let bit = 1 << (c.column_index - 1);
+                    idx_num |= bit;
+                    positions[c.column_index as usize] = Some(i);
+                }
            }
        }

        if !start_exists {
            return Err(ResultCode::InvalidArgs);
        }
+        if !usable {
+            return Err(ResultCode::ConstraintViolation);
+        }

        // Assign argv indexes contiguously
        let mut argv_idx = 1;
@@ -763,11 +768,9 @@ mod tests {
            plan_info: 0,
        }];

-        let index_info = GenerateSeriesTable::best_index(&constraints, &[]).unwrap();
+        let result = GenerateSeriesTable::best_index(&constraints, &[]);

-        // Verify no argv_index is assigned
-        assert_eq!(index_info.constraint_usages[0].argv_index, None);
-        assert_eq!(index_info.idx_num, 0); // No bits set
+        assert!(matches!(result, Err(ResultCode::ConstraintViolation)));
    }

    fn usable_constraint(column_index: u32) -> ConstraintInfo {
--- a/core/translate/optimizer/access_method.rs
+++ b/core/translate/optimizer/access_method.rs
@@ -1,6 +1,6 @@
 use std::sync::Arc;

-use turso_ext::{ConstraintInfo, ConstraintUsage};
+use turso_ext::{ConstraintInfo, ConstraintUsage, ResultCode};
 use turso_sqlite3_parser::ast::SortOrder;

 use crate::translate::optimizer::constraints::{convert_to_vtab_constraint, Constraint};
@@ -63,7 +63,7 @@ pub fn find_best_access_method_for_join_order<'a>(
    join_order: &[JoinOrderMember],
    maybe_order_target: Option<&OrderTarget>,
    input_cardinality: f64,
-) -> Result<AccessMethod<'a>> {
+) -> Result<Option<AccessMethod<'a>>> {
    match &rhs_table.table {
        Table::BTree(_) => find_best_access_method_for_btree(
            rhs_table,
@@ -78,10 +78,10 @@ pub fn find_best_access_method_for_join_order<'a>(
            join_order,
            input_cardinality,
        ),
-        Table::FromClauseSubquery(_) => Ok(AccessMethod {
+        Table::FromClauseSubquery(_) => Ok(Some(AccessMethod {
            cost: estimate_cost_for_scan_or_seek(None, &[], &[], input_cardinality),
            params: AccessMethodParams::Subquery,
-        }),
+        })),
    }
 }

@@ -91,7 +91,7 @@ fn find_best_access_method_for_btree<'a>(
    join_order: &[JoinOrderMember],
    maybe_order_target: Option<&OrderTarget>,
    input_cardinality: f64,
-) -> Result<AccessMethod<'a>> {
+) -> Result<Option<AccessMethod<'a>>> {
    let table_no = join_order.last().unwrap().table_id;
    let mut best_cost = estimate_cost_for_scan_or_seek(None, &[], &[], input_cardinality);
    let mut best_params = AccessMethodParams::BTreeTable {
@@ -186,10 +186,10 @@ fn find_best_access_method_for_btree<'a>(
        }
    }

-    Ok(AccessMethod {
+    Ok(Some(AccessMethod {
        cost: best_cost,
        params: best_params,
-    })
+    }))
 }

 fn find_best_access_method_for_vtab<'a>(
@@ -197,7 +197,7 @@ fn find_best_access_method_for_vtab<'a>(
    constraints: &[Constraint],
    join_order: &[JoinOrderMember],
    input_cardinality: f64,
-) -> Result<AccessMethod<'a>> {
+) -> Result<Option<AccessMethod<'a>>> {
    let vtab_constraints = convert_to_vtab_constraint(constraints, join_order);

    // TODO: get proper order_by information to pass to the vtab.
@@ -206,7 +206,7 @@ fn find_best_access_method_for_vtab<'a>(

    match best_index_result {
        Ok(index_info) => {
-            Ok(AccessMethod {
+            Ok(Some(AccessMethod {
                // TODO: Base cost on `IndexInfo::estimated_cost` and output cardinality on `IndexInfo::estimated_rows`
                cost: estimate_cost_for_scan_or_seek(None, &[], &[], input_cardinality),
                params: AccessMethodParams::VirtualTable {
@@ -215,8 +215,9 @@ fn find_best_access_method_for_vtab<'a>(
                    constraints: vtab_constraints,
                    constraint_usages: index_info.constraint_usages,
                },
-            })
+            }))
        }
+        Err(ResultCode::ConstraintViolation) => Ok(None),
        Err(e) => Err(LimboError::from(e)),
    }
 }
--- a/core/translate/optimizer/join.rs
+++ b/core/translate/optimizer/join.rs
@@ -42,7 +42,7 @@ impl JoinN {
 }

 /// Join n-1 tables with the n'th table.
-/// Returns None if the plan is worse than the provided cost upper bound.
+/// Returns None if the plan is worse than the provided cost upper bound or if no valid access method is found.
 pub fn join_lhs_and_rhs<'a>(
    lhs: Option<&JoinN>,
    rhs_table_reference: &JoinedTable,
@@ -65,6 +65,10 @@ pub fn join_lhs_and_rhs<'a>(
        input_cardinality as f64,
    )?;

+    let Some(best_access_method) = best_access_method else {
+        return Ok(None);
+    };
+
    let lhs_cost = lhs.map_or(Cost(0.0), |l| l.cost);
    let cost = lhs_cost + best_access_method.cost;

--- a/core/translate/planner.rs
+++ b/core/translate/planner.rs
@@ -327,7 +327,7 @@ fn parse_from_clause_table(
    schema: &Schema,
    table: ast::SelectTable,
    table_references: &mut TableReferences,
-    out_where_clause: &mut Vec<WhereTerm>,
+    vtab_predicates: &mut Vec<Expr>,
    ctes: &mut Vec<JoinedTable>,
    syms: &SymbolTable,
    table_ref_counter: &mut TableRefIdCounter,
@@ -338,7 +338,7 @@ fn parse_from_clause_table(
            table_references,
            ctes,
            table_ref_counter,
-            out_where_clause,
+            vtab_predicates,
            qualified_name,
            maybe_alias,
            None,
@@ -379,7 +379,7 @@ fn parse_from_clause_table(
            table_references,
            ctes,
            table_ref_counter,
-            out_where_clause,
+            vtab_predicates,
            qualified_name,
            maybe_alias,
            maybe_args,
@@ -394,7 +394,7 @@ fn parse_table(
    table_references: &mut TableReferences,
    ctes: &mut Vec<JoinedTable>,
    table_ref_counter: &mut TableRefIdCounter,
-    out_where_clause: &mut Vec<WhereTerm>,
+    vtab_predicates: &mut Vec<Expr>,
    qualified_name: QualifiedName,
    maybe_alias: Option<As>,
    maybe_args: Option<Vec<Expr>>,
@@ -431,7 +431,7 @@ fn parse_table(
                transform_args_into_where_terms(
                    args,
                    internal_id,
-                    out_where_clause,
+                    vtab_predicates,
                    table.as_ref(),
                )?;
            }
@@ -485,7 +485,7 @@ fn parse_table(
 fn transform_args_into_where_terms(
    args: Vec<Expr>,
    internal_id: TableInternalId,
-    out_where_clause: &mut Vec<WhereTerm>,
+    predicates: &mut Vec<Expr>,
    table: &Table,
 ) -> Result<()> {
    let mut args_iter = args.into_iter();
@@ -497,11 +497,6 @@ fn transform_args_into_where_terms(
        hidden_count += 1;

        if let Some(arg_expr) = args_iter.next() {
-            if contains_column_reference(&arg_expr)? {
-                crate::bail_parse_error!(
-                    "Column references are not supported as table-valued function arguments yet"
-                );
-            }
            let column_expr = Expr::Column {
                database: None,
                table: internal_id,
@@ -516,11 +511,7 @@ fn transform_args_into_where_terms(
                    Box::new(other),
                ),
            };
-            out_where_clause.push(WhereTerm {
-                expr,
-                from_outer_join: None,
-                consumed: Cell::new(false),
-            });
+            predicates.push(expr);
        }
    }

@@ -536,18 +527,6 @@ fn transform_args_into_where_terms(
    Ok(())
 }

-fn contains_column_reference(top_level_expr: &Expr) -> Result<bool> {
-    let mut contains = false;
-    walk_expr(top_level_expr, &mut |expr: &Expr| -> Result<WalkControl> {
-        match expr {
-            Expr::Id(_) | Expr::Qualified(_, _) | Expr::Column { .. } => contains = true,
-            _ => {}
-        };
-        Ok(WalkControl::Continue)
-    })?;
-    Ok(contains)
-}
-
 #[allow(clippy::too_many_arguments)]
 pub fn parse_from(
    schema: &Schema,
@@ -555,6 +534,7 @@ pub fn parse_from(
    syms: &SymbolTable,
    with: Option<With>,
    out_where_clause: &mut Vec<WhereTerm>,
+    vtab_predicates: &mut Vec<Expr>,
    table_references: &mut TableReferences,
    table_ref_counter: &mut TableRefIdCounter,
    connection: &Arc<crate::Connection>,
@@ -641,7 +621,7 @@ pub fn parse_from(
        schema,
        select_owned,
        table_references,
-        out_where_clause,
+        vtab_predicates,
        &mut ctes_as_subqueries,
        syms,
        table_ref_counter,
@@ -655,6 +635,7 @@ pub fn parse_from(
            syms,
            &mut ctes_as_subqueries,
            out_where_clause,
+            vtab_predicates,
            table_references,
            table_ref_counter,
            connection,
@@ -871,6 +852,7 @@ fn parse_join(
    syms: &SymbolTable,
    ctes: &mut Vec<JoinedTable>,
    out_where_clause: &mut Vec<WhereTerm>,
+    vtab_predicates: &mut Vec<Expr>,
    table_references: &mut TableReferences,
    table_ref_counter: &mut TableRefIdCounter,
    connection: &Arc<crate::Connection>,
@@ -885,7 +867,7 @@ fn parse_join(
        schema,
        table,
        table_references,
-        out_where_clause,
+        vtab_predicates,
        ctes,
        syms,
        table_ref_counter,
--- a/core/translate/select.rs
+++ b/core/translate/select.rs
@@ -1,7 +1,7 @@
 use super::emitter::{emit_program, TranslateCtx};
 use super::plan::{
    select_star, Distinctness, JoinOrderMember, Operation, OuterQueryReference, QueryDestination,
-    Search, TableReferences,
+    Search, TableReferences, WhereTerm,
 };
 use crate::function::{AggFunc, ExtFunc, Func};
 use crate::schema::Table;
@@ -14,10 +14,11 @@ use crate::translate::planner::{
 use crate::util::normalize_ident;
 use crate::vdbe::builder::{ProgramBuilderOpts, TableRefIdCounter};
 use crate::vdbe::insn::Insn;
-use crate::SymbolTable;
 use crate::{schema::Schema, vdbe::builder::ProgramBuilder, Result};
+use crate::{Connection, SymbolTable};
+use std::cell::Cell;
 use std::sync::Arc;
-use turso_sqlite3_parser::ast::{self, CompoundSelect, SortOrder};
+use turso_sqlite3_parser::ast::{self, CompoundSelect, Expr, SortOrder};
 use turso_sqlite3_parser::ast::{ResultColumn, SelectInner};

 pub struct TranslateSelectResult {
@@ -207,6 +208,7 @@ fn prepare_one_select_plan(
            }

            let mut where_predicates = vec![];
+            let mut vtab_predicates = vec![];

            let mut table_references = TableReferences::new(vec![], outer_query_refs.to_vec());

@@ -225,6 +227,7 @@ fn prepare_one_select_plan(
                syms,
                with,
                &mut where_predicates,
+                &mut vtab_predicates,
                &mut table_references,
                table_ref_counter,
                connection,
@@ -521,6 +524,11 @@ fn prepare_one_select_plan(
                }
            }

+            // This step can only be performed at this point, because all table references are now available.
+            // Virtual table predicates may depend on column bindings from tables to the right in the join order,
+            // so we must wait until the full set of references has been collected.
+            add_vtab_predicates_to_where_clause(&mut vtab_predicates, &mut plan, connection)?;
+
            // Parse the actual WHERE clause and add its conditions to the plan WHERE clause that already contains the join conditions.
            parse_where(
                where_clause,
@@ -636,6 +644,29 @@ fn prepare_one_select_plan(
    }
 }

+fn add_vtab_predicates_to_where_clause(
+    vtab_predicates: &mut Vec<Expr>,
+    plan: &mut SelectPlan,
+    connection: &Arc<Connection>,
+) -> Result<()> {
+    for expr in vtab_predicates.iter_mut() {
+        bind_column_references(
+            expr,
+            &mut plan.table_references,
+            Some(&plan.result_columns),
+            connection,
+        )?;
+    }
+    for expr in vtab_predicates.drain(..) {
+        plan.where_clause.push(WhereTerm {
+            expr,
+            from_outer_join: None,
+            consumed: Cell::new(false),
+        });
+    }
+    Ok(())
+}
+
 /// Replaces a column number in an ORDER BY or GROUP BY expression with a copy of the column expression.
 /// For example, in SELECT u.first_name, count(1) FROM users u GROUP BY 1 ORDER BY 2,
 /// the column number 1 is replaced with u.first_name and the column number 2 is replaced with count(1).
--- a/extensions/core/src/types.rs
+++ b/extensions/core/src/types.rs
@@ -26,6 +26,7 @@ pub enum ResultCode {
    Row = 18,
    Interrupt = 19,
    Busy = 20,
+    ConstraintViolation = 21,
 }

 impl ResultCode {
@@ -66,6 +67,7 @@ impl Display for ResultCode {
            ResultCode::Row => write!(f, "Row"),
            ResultCode::Interrupt => write!(f, "Interrupt"),
            ResultCode::Busy => write!(f, "Busy"),
+            ResultCode::ConstraintViolation => write!(f, "Constraint Violation"),
        }
    }
 }
--- a/extensions/core/src/vtabs.rs
+++ b/extensions/core/src/vtabs.rs
@@ -159,6 +159,11 @@ pub trait VTable {
    /// the virtual table’s `filter` method if the chosen plan is selected for execution. There is
    /// no guarantee that `filter` will ever be called — many `best_index` candidates are discarded
    /// during planning.
+    ///
+    /// If an error occurs, an appropriate error code is returned. A return value of
+    /// `ResultCode::ConstraintViolation` from `best_index` is not considered an error. Instead, it
+    /// indicates that the current configuration of `usable` flags in `ConstraintInfo` cannot
+    /// produce a valid plan.
    fn best_index(
        _constraints: &[ConstraintInfo],
        _order_by: &[OrderByInfo],
--- a/testing/cli_tests/extensions.py
+++ b/testing/cli_tests/extensions.py
@@ -381,6 +381,56 @@ def _test_series(limbo: TestTursoShell):
        "SELECT * FROM target;",
        lambda res: res == "1\n2\n3\n4\n5",
    )
+    limbo.run_test_fn(
+        "SELECT t.id, series.value FROM target t, generate_series(t.id, 3) series;",
+        lambda res: res == "1|1\n1|2\n1|3\n2|2\n2|3\n3|3",
+        "Column reference from table on the left used as generate_series argument"
+    )
+    limbo.run_test_fn(
+        "SELECT t.id, series.value FROM generate_series(t.id, 3) series, target t;",
+        lambda res: res == "1|1\n1|2\n1|3\n2|2\n2|3\n3|3",
+        "Column reference from table on the right used as generate_series argument"
+    )
+    limbo.run_test_fn(
+        "SELECT one.value, series.value FROM (SELECT 1 AS value) one, generate_series(one.value, 3) series;",
+        lambda res: res == "1|1\n1|2\n1|3",
+        "Column reference from scalar subquery (left side)"
+    )
+    limbo.run_test_fn(
+        "SELECT one.value, series.value FROM generate_series(one.value, 3) series, (SELECT 1 AS value) one;",
+        lambda res: res == "1|1\n1|2\n1|3",
+        "Column reference from scalar subquery (right side)"
+    )
+    limbo.run_test_fn(
+        "SELECT "
+        "   * "
+        "FROM "
+        "   generate_series(a.start, a.stop) series "
+        "NATURAL JOIN "
+        "   (SELECT 1 AS start, 3 AS stop, 2 AS value) a;",
+        lambda res: res == "2|1|3",
+        "Natural join where TVF arguments come from column references"
+    )
+    limbo.run_test_fn(
+        "SELECT * FROM generate_series(a.start, a.stop) JOIN (SELECT 1 AS start, 3 AS stop) a USING (start, stop);",
+        lambda res: res == "1\n2\n3",
+        "Join USING where TVF arguments come from column references"
+    )
+    limbo.run_test_fn(
+        "SELECT a.value, b.value FROM generate_series(b.value, b.value+1) a JOIN generate_series(1, 2) b;",
+        lambda res: res == "1|1\n2|1\n2|2\n3|2",
+        "TVF arguments come from another TVF"
+    )
+    limbo.run_test_fn(
+        "SELECT * FROM generate_series(a.start, a.stop) b, generate_series(b.start, b.stop) a;",
+        lambda res: "No valid query plan found" in res or "no query solution" in res,
+        "circular column references between two generate_series"
+    )
+    limbo.run_test_fn(
+        "SELECT * FROM generate_series(b.start, b.stop) b;",
+        lambda res: "Invalid Argument" in res or 'first argument to "generate_series()" missing or unusable' in res,
+        "self-reference in generate_series arguments"
+    )
    limbo.quit()


--- a/testing/pragma.test
+++ b/testing/pragma.test
@@ -262,3 +262,37 @@ do_execsql_test_on_specific_db $test_pragma_page_size_db pragma-page-size-set-un
 } {1024}
 catch {file delete -force $test_pragma_page_size_db}
 catch {file delete -force "${test_pragma_page_size_db}-wal"}
+
+do_execsql_test pragma-vtab-join {
+  SELECT s.name, ti.name FROM sqlite_schema AS s, pragma_table_info(s.name) AS ti;
+} {users|id
+users|first_name
+users|last_name
+users|email
+users|phone_number
+users|address
+users|city
+users|state
+users|zipcode
+users|age
+products|id
+products|name
+products|price
+}
+
+do_execsql_test pragma-vtab-reversed-join-order {
+  SELECT s.name, ti.name FROM pragma_table_info(s.name) AS ti, sqlite_schema AS s;
+} {users|id
+users|first_name
+users|last_name
+users|email
+users|phone_number
+users|address
+users|city
+users|state
+users|zipcode
+users|age
+products|id
+products|name
+products|price
+}