Merge 'translate: disallow correlated subqueries in HAVING and ORDER BY' from Jussi Saurio

These are supported by SQLite, but we cannot handle them correctly yet.

Reviewed-by: Preston Thorpe <preston@turso.tech>

Closes #3861
This commit is contained in:
Jussi Saurio
2025-10-29 16:05:43 +02:00
committed by GitHub
4 changed files with 41 additions and 138 deletions

View File

@@ -1390,7 +1390,7 @@ pub enum SubqueryState {
Evaluated { evaluated_at: EvalAt },
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SubqueryPosition {
ResultColumn,
Where,
@@ -1400,6 +1400,30 @@ pub enum SubqueryPosition {
LimitOffset,
}
impl SubqueryPosition {
/// Returns true if a subquery in this position of the SELECT can be correlated, i.e. if it can reference columns from the outer query.
/// FIXME: HAVING and ORDER BY should allow correlated subqueries, but our translation system currently does not support this well.
/// Subqueries in these positions should be evaluated after the main loop, AND they should also have access to aggregations computed
/// in the main query.
pub fn allow_correlated(&self) -> bool {
matches!(
self,
SubqueryPosition::ResultColumn | SubqueryPosition::Where | SubqueryPosition::GroupBy
)
}
pub fn name(&self) -> &'static str {
match self {
SubqueryPosition::ResultColumn => "SELECT list",
SubqueryPosition::Where => "WHERE",
SubqueryPosition::GroupBy => "GROUP BY",
SubqueryPosition::Having => "HAVING",
SubqueryPosition::OrderBy => "ORDER BY",
SubqueryPosition::LimitOffset => "LIMIT/OFFSET",
}
}
}
#[derive(Debug, Clone)]
/// A subquery that is not part of the `FROM` clause.
/// This is used for subqueries in the WHERE clause, HAVING clause, ORDER BY clause, LIMIT clause, OFFSET clause, etc.

View File

@@ -192,6 +192,16 @@ fn get_subquery_parser<'a>(
get_outer_query_refs: fn(&TableReferences) -> Vec<OuterQueryReference>,
position: SubqueryPosition,
) -> impl FnMut(&mut ast::Expr) -> Result<WalkControl> + 'a {
fn handle_unsupported_correlation(correlated: bool, position: SubqueryPosition) -> Result<()> {
if correlated && !position.allow_correlated() {
crate::bail_parse_error!(
"correlated subqueries in {} clause are not supported yet",
position.name()
);
}
Ok(())
}
move |expr: &mut ast::Expr| -> Result<WalkControl> {
match expr {
ast::Expr::Exists(_) => {
@@ -229,6 +239,7 @@ fn get_subquery_parser<'a>(
"1".to_string(),
))));
let correlated = plan.is_correlated();
handle_unsupported_correlation(correlated, position)?;
out_subqueries.push(NonFromClauseSubquery {
internal_id: subquery_id,
query_type: subquery_type,
@@ -301,6 +312,7 @@ fn get_subquery_parser<'a>(
*num_regs = reg_count;
let correlated = plan.is_correlated();
handle_unsupported_correlation(correlated, position)?;
out_subqueries.push(NonFromClauseSubquery {
internal_id: *subquery_id,
@@ -399,12 +411,7 @@ fn get_subquery_parser<'a>(
};
let correlated = plan.is_correlated();
if correlated && position == SubqueryPosition::Having {
crate::bail_parse_error!(
"correlated IN subqueries in HAVING clause are not supported yet"
);
}
handle_unsupported_correlation(correlated, position)?;
out_subqueries.push(NonFromClauseSubquery {
internal_id: subquery_id,

View File

@@ -744,48 +744,6 @@ do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-group-by-with-j
North|Bob|200
South|Alice|150}
# Uncorrelated subquery in ORDER BY clause
do_execsql_test_on_specific_db {:memory:} subquery-uncorrelated-in-order-by {
create table products(id, name, category_id);
create table sort_config(sort_order);
insert into products values (1, 'hat', 2), (2, 'laptop', 1), (3, 'book', 3);
insert into sort_config values ('asc');
select id, name from products
order by (select case when sort_order = 'asc' then id else -id end from sort_config);
} {1|hat
2|laptop
3|book}
# Correlated subquery in ORDER BY clause
do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-order-by {
create table products(id, name, category_id);
create table categories(id, priority);
insert into products values (1, 'hat', 2), (2, 'laptop', 1), (3, 'book', 3);
insert into categories values (1, 10), (2, 20), (3, 5);
select id, name from products
order by (select priority from categories where id = category_id);
} {3|book
2|laptop
1|hat}
# Correlated subquery in ORDER BY clause with join
do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-order-by-with-join {
create table products(id, name, category_id, supplier_id);
create table categories(id, priority);
create table suppliers(id, name);
insert into products values (1, 'hat', 2, 100), (2, 'laptop', 1, 200), (3, 'book', 3, 100);
insert into categories values (1, 10), (2, 20), (3, 5);
insert into suppliers values (100, 'SupplierA'), (200, 'SupplierB');
select p.id, p.name, s.name as supplier
from products p join suppliers s on p.supplier_id = s.id
order by (select priority from categories where id = p.category_id);
} {3|book|SupplierA
2|laptop|SupplierB
1|hat|SupplierA}
# Uncorrelated IN-subquery in ORDER BY clause
do_execsql_test_on_specific_db {:memory:} subquery-uncorrelated-in-order-by-in {
create table products(id, name, category_id);
@@ -799,19 +757,6 @@ do_execsql_test_on_specific_db {:memory:} subquery-uncorrelated-in-order-by-in {
3|book
1|hat}
# Correlated IN-subquery in ORDER BY clause
do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-order-by-in {
create table products(id, name, category_id);
create table category_priorities(category_id, priority);
insert into products values (1, 'hat', 2), (2, 'laptop', 1), (3, 'book', 3);
insert into category_priorities values (1, 10), (2, 20), (3, 5);
select id, name from products
order by category_id in (select category_id from category_priorities where priority > 8), id;
} {3|book
1|hat
2|laptop}
# Uncorrelated subquery in HAVING clause
do_execsql_test_on_specific_db {:memory:} subquery-uncorrelated-in-having {
create table orders(id, customer_id, amount);
@@ -839,64 +784,6 @@ do_execsql_test_on_specific_db {:memory:} subquery-uncorrelated-in-having-in {
} {100|200
300|100}
# Correlated subquery in HAVING clause
do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-having {
create table orders(id, customer_id, amount);
create table customer_thresholds(customer_id, min_amount);
insert into orders values (1, 100, 50), (2, 100, 150), (3, 200, 30), (4, 200, 80);
insert into customer_thresholds values (100, 100), (200, 150);
select customer_id, sum(amount) as total
from orders
group by customer_id
having total > (select min_amount from customer_thresholds where customer_thresholds.customer_id = orders.customer_id);
} {100|200}
# Correlated IN-subquery in HAVING clause
# FIXME: currently disabled
# do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-having-in {
# create table orders(id, customer_id, amount);
# create table customer_targets(customer_id, target_amount);
# insert into orders values (1, 100, 50), (2, 100, 150), (3, 200, 30), (4, 200, 80);
# insert into customer_targets values (100, 200), (100, 250), (200, 110);
#
# select customer_id, sum(amount) as total
# from orders
# group by customer_id
# having total in (select target_amount from customer_targets where customer_targets.customer_id = orders.customer_id);
# } {100|200
# 200|110}
# Correlated subquery in HAVING clause with join
do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-having-with-join {
create table orders(id, customer_id, amount, region_id);
create table customer_thresholds(customer_id, min_amount);
create table regions(id, name);
insert into orders values (1, 100, 50, 1), (2, 100, 150, 1), (3, 200, 30, 2), (4, 200, 80, 2);
insert into customer_thresholds values (100, 100), (200, 150);
insert into regions values (1, 'East'), (2, 'West');
select o.customer_id, r.name as region, sum(o.amount) as total
from orders o join regions r on o.region_id = r.id
group by o.customer_id, r.name
having total > (select min_amount from customer_thresholds where customer_thresholds.customer_id = o.customer_id);
} {100|East|200}
# Correlated IN-subquery in HAVING clause with join
# do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-subquery-in-having-with-join {
# create table orders(id, customer_id, amount, region_id);
# create table customer_thresholds(customer_id, min_amount);
# create table regions(id, name);
# insert into orders values (1, 100, 50, 1), (2, 100, 150, 1), (3, 200, 30, 2), (4, 200, 80, 2);
# insert into customer_thresholds values (100, 100), (200, 150);
# insert into regions values (1, 'East'), (2, 'West');
#
# select o.customer_id, r.name as region, sum(o.amount) as total
# from orders o join regions r on o.region_id = r.id
# group by o.customer_id, r.name
# having total + 40 in (select min_amount from customer_thresholds where customer_thresholds.customer_id = o.customer_id);
# } {200|West|110}
# Uncorrelated subquery in LIMIT clause
do_execsql_test_on_specific_db {:memory:} subquery-in-limit {
create table items(id, name);

View File

@@ -4737,11 +4737,7 @@ mod fuzz_tests {
}
// Helper to generate a HAVING condition comparing an aggregate to a scalar subquery
fn gen_having_condition(
rng: &mut ChaCha8Rng,
main_table: &str,
allowed_outer_cols: Option<&[&str]>,
) -> String {
fn gen_having_condition(rng: &mut ChaCha8Rng, main_table: &str) -> String {
let (agg_func, agg_col) = match main_table {
"t1" => [
("SUM", "value1"),
@@ -4767,7 +4763,7 @@ mod fuzz_tests {
_ => ("COUNT", "*"),
};
let op = [">", "<", ">=", "<=", "=", "<>"][rng.random_range(0..6)];
let rhs = gen_scalar_subquery(rng, 0, Some(main_table), allowed_outer_cols);
let rhs = gen_scalar_subquery(rng, 0, Some(main_table), Some(&[]));
if agg_col == "*" {
format!("COUNT(*) {op} ({rhs})")
} else {
@@ -5039,17 +5035,6 @@ mod fuzz_tests {
6 => {
// Aggregated query with GROUP BY and optional HAVING; allow subqueries in GROUP BY/HAVING
let group_expr = gen_group_by_expr(&mut rng, main_table);
// Only GROUP BY columns may be referenced by correlated subqueries
let allowed_outer_cols: Vec<&str> = match group_expr.as_str() {
"id" => vec!["id"],
"value1" => vec!["value1"],
"value2" => vec!["value2"],
"ref_id" => vec!["ref_id"],
"data" => vec!["data"],
"category" => vec!["category"],
"amount" => vec!["amount"],
_ => Vec::new(),
};
let (agg_func, agg_col) = match main_table {
"t1" => [
("SUM", "value1"),
@@ -5080,7 +5065,7 @@ mod fuzz_tests {
if rng.random_bool(0.4) {
q.push_str(&format!(
" HAVING {}",
gen_having_condition(&mut rng, main_table, Some(&allowed_outer_cols))
gen_having_condition(&mut rng, main_table)
));
}
q