Merge 'treat parameters as "constant" within a query' from Nikita Sivukhin

Right now tursodb treat parameters/variable as non-constant. But
actually they are constant in a sense that parameters/variables has
fixed value during query execution which never changes.
This PR makes tursodb to treat parameters as constant and evaluate
expressions related to them only once.
One real-world scenario where this can be helpful is vector search
query:
```sql
    SELECT id, vector_distance_jaccard(embedding, vector32_sparse(?)) as distance
    FROM vectors
    ORDER BY distance ASC
    LIMIT ?
```
Without constant optimization, `vector32_sparse` function will be
executed for every row - which is very inefficient and query can be 100x
slower due to that (but there is no need to evaluate this function for
every query as we can transform text representation to binary only once)

Reviewed-by: Preston Thorpe <preston@turso.tech>

Closes #3936
This commit is contained in:
Jussi Saurio
2025-11-12 11:46:10 +02:00
committed by GitHub
2 changed files with 47 additions and 3 deletions

View File

@@ -1071,7 +1071,7 @@ impl Optimizable for ast::Expr {
Expr::Register(..) => false, // Register values can be null
}
}
/// Returns true if the expression is a constant i.e. does not depend on variables or columns etc.
/// Returns true if the expression is a constant i.e. does not depend on columns and can be evaluated only once during the execution
fn is_constant(&self, resolver: &Resolver<'_>) -> bool {
match self {
Expr::SubqueryResult { .. } => false,
@@ -1142,8 +1142,8 @@ impl Optimizable for ast::Expr {
Expr::Raise(_, expr) => expr.as_ref().is_none_or(|expr| expr.is_constant(resolver)),
Expr::Subquery(_) => false,
Expr::Unary(_, expr) => expr.is_constant(resolver),
Expr::Variable(_) => false,
Expr::Register(_) => false, // Register values are not constants
Expr::Variable(_) => true,
Expr::Register(_) => false,
}
}
/// Returns true if the expression is a constant expression that, when evaluated as a condition, is always true or false

View File

@@ -1018,3 +1018,47 @@ fn test_many_columns() {
]]
);
}
#[test]
fn test_eval_param_only_once() {
let tmp_db = TempDatabase::new("test_eval_param_only_once");
let conn = tmp_db.connect_limbo();
conn.execute("CREATE TABLE t(x)").unwrap();
conn.execute("INSERT INTO t SELECT value FROM generate_series(1, 10000)")
.unwrap();
let mut stmt = conn
.query("SELECT COUNT(*) FROM t WHERE LENGTH(zeroblob(?)) = ?")
.unwrap()
.unwrap();
stmt.bind_at(
1.try_into().unwrap(),
turso_core::Value::Integer(100_000_000),
);
stmt.bind_at(
2.try_into().unwrap(),
turso_core::Value::Integer(100_000_000),
);
let start_time = std::time::Instant::now();
loop {
match stmt.step().unwrap() {
StepResult::IO => {
stmt.run_once().unwrap();
}
StepResult::Done => break,
StepResult::Row => {
let values = stmt
.row()
.unwrap()
.get_values()
.cloned()
.collect::<Vec<_>>();
assert_eq!(values, vec![turso_core::Value::Integer(10000)]);
}
_ => unreachable!(),
}
}
let end_time = std::time::Instant::now();
let elapsed = end_time.duration_since(start_time);
// the test will allocate 10^8 * 10^4 bytes in case if parameter will be evaluated for every row
assert!(elapsed < std::time::Duration::from_millis(100));
}