diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 2b9f98e59..d69000526 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -1390,7 +1390,7 @@ pub enum SubqueryState { Evaluated { evaluated_at: EvalAt }, } -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum SubqueryPosition { ResultColumn, Where, @@ -1400,6 +1400,30 @@ pub enum SubqueryPosition { LimitOffset, } +impl SubqueryPosition { + /// Returns true if a subquery in this position of the SELECT can be correlated, i.e. if it can reference columns from the outer query. + /// FIXME: HAVING and ORDER BY should allow correlated subqueries, but our translation system currently does not support this well. + /// Subqueries in these positions should be evaluated after the main loop, AND they should also have access to aggregations computed + /// in the main query. + pub fn allow_correlated(&self) -> bool { + matches!( + self, + SubqueryPosition::ResultColumn | SubqueryPosition::Where | SubqueryPosition::GroupBy + ) + } + + pub fn name(&self) -> &'static str { + match self { + SubqueryPosition::ResultColumn => "SELECT list", + SubqueryPosition::Where => "WHERE", + SubqueryPosition::GroupBy => "GROUP BY", + SubqueryPosition::Having => "HAVING", + SubqueryPosition::OrderBy => "ORDER BY", + SubqueryPosition::LimitOffset => "LIMIT/OFFSET", + } + } +} + #[derive(Debug, Clone)] /// A subquery that is not part of the `FROM` clause. /// This is used for subqueries in the WHERE clause, HAVING clause, ORDER BY clause, LIMIT clause, OFFSET clause, etc. diff --git a/core/translate/subquery.rs b/core/translate/subquery.rs index 42ef2e616..51999b9c4 100644 --- a/core/translate/subquery.rs +++ b/core/translate/subquery.rs @@ -192,6 +192,16 @@ fn get_subquery_parser<'a>( get_outer_query_refs: fn(&TableReferences) -> Vec, position: SubqueryPosition, ) -> impl FnMut(&mut ast::Expr) -> Result + 'a { + fn handle_unsupported_correlation(correlated: bool, position: SubqueryPosition) -> Result<()> { + if correlated && !position.allow_correlated() { + crate::bail_parse_error!( + "correlated subqueries in {} clause are not supported yet", + position.name() + ); + } + Ok(()) + } + move |expr: &mut ast::Expr| -> Result { match expr { ast::Expr::Exists(_) => { @@ -229,6 +239,7 @@ fn get_subquery_parser<'a>( "1".to_string(), )))); let correlated = plan.is_correlated(); + handle_unsupported_correlation(correlated, position)?; out_subqueries.push(NonFromClauseSubquery { internal_id: subquery_id, query_type: subquery_type, @@ -301,6 +312,7 @@ fn get_subquery_parser<'a>( *num_regs = reg_count; let correlated = plan.is_correlated(); + handle_unsupported_correlation(correlated, position)?; out_subqueries.push(NonFromClauseSubquery { internal_id: *subquery_id, @@ -399,12 +411,7 @@ fn get_subquery_parser<'a>( }; let correlated = plan.is_correlated(); - - if correlated && position == SubqueryPosition::Having { - crate::bail_parse_error!( - "correlated IN subqueries in HAVING clause are not supported yet" - ); - } + handle_unsupported_correlation(correlated, position)?; out_subqueries.push(NonFromClauseSubquery { internal_id: subquery_id, diff --git a/testing/subquery.test b/testing/subquery.test index 261defd47..f2076a2c3 100644 --- a/testing/subquery.test +++ b/testing/subquery.test @@ -744,48 +744,6 @@ do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-group-by-with-j North|Bob|200 South|Alice|150} -# Uncorrelated subquery in ORDER BY clause -do_execsql_test_on_specific_db {:memory:} subquery-uncorrelated-in-order-by { - create table products(id, name, category_id); - create table sort_config(sort_order); - insert into products values (1, 'hat', 2), (2, 'laptop', 1), (3, 'book', 3); - insert into sort_config values ('asc'); - - select id, name from products - order by (select case when sort_order = 'asc' then id else -id end from sort_config); -} {1|hat -2|laptop -3|book} - -# Correlated subquery in ORDER BY clause -do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-order-by { - create table products(id, name, category_id); - create table categories(id, priority); - insert into products values (1, 'hat', 2), (2, 'laptop', 1), (3, 'book', 3); - insert into categories values (1, 10), (2, 20), (3, 5); - - select id, name from products - order by (select priority from categories where id = category_id); -} {3|book -2|laptop -1|hat} - -# Correlated subquery in ORDER BY clause with join -do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-order-by-with-join { - create table products(id, name, category_id, supplier_id); - create table categories(id, priority); - create table suppliers(id, name); - insert into products values (1, 'hat', 2, 100), (2, 'laptop', 1, 200), (3, 'book', 3, 100); - insert into categories values (1, 10), (2, 20), (3, 5); - insert into suppliers values (100, 'SupplierA'), (200, 'SupplierB'); - - select p.id, p.name, s.name as supplier - from products p join suppliers s on p.supplier_id = s.id - order by (select priority from categories where id = p.category_id); -} {3|book|SupplierA -2|laptop|SupplierB -1|hat|SupplierA} - # Uncorrelated IN-subquery in ORDER BY clause do_execsql_test_on_specific_db {:memory:} subquery-uncorrelated-in-order-by-in { create table products(id, name, category_id); @@ -799,19 +757,6 @@ do_execsql_test_on_specific_db {:memory:} subquery-uncorrelated-in-order-by-in { 3|book 1|hat} -# Correlated IN-subquery in ORDER BY clause -do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-order-by-in { - create table products(id, name, category_id); - create table category_priorities(category_id, priority); - insert into products values (1, 'hat', 2), (2, 'laptop', 1), (3, 'book', 3); - insert into category_priorities values (1, 10), (2, 20), (3, 5); - - select id, name from products - order by category_id in (select category_id from category_priorities where priority > 8), id; -} {3|book -1|hat -2|laptop} - # Uncorrelated subquery in HAVING clause do_execsql_test_on_specific_db {:memory:} subquery-uncorrelated-in-having { create table orders(id, customer_id, amount); @@ -839,64 +784,6 @@ do_execsql_test_on_specific_db {:memory:} subquery-uncorrelated-in-having-in { } {100|200 300|100} -# Correlated subquery in HAVING clause -do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-having { - create table orders(id, customer_id, amount); - create table customer_thresholds(customer_id, min_amount); - insert into orders values (1, 100, 50), (2, 100, 150), (3, 200, 30), (4, 200, 80); - insert into customer_thresholds values (100, 100), (200, 150); - - select customer_id, sum(amount) as total - from orders - group by customer_id - having total > (select min_amount from customer_thresholds where customer_thresholds.customer_id = orders.customer_id); -} {100|200} - -# Correlated IN-subquery in HAVING clause -# FIXME: currently disabled -# do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-having-in { -# create table orders(id, customer_id, amount); -# create table customer_targets(customer_id, target_amount); -# insert into orders values (1, 100, 50), (2, 100, 150), (3, 200, 30), (4, 200, 80); -# insert into customer_targets values (100, 200), (100, 250), (200, 110); -# -# select customer_id, sum(amount) as total -# from orders -# group by customer_id -# having total in (select target_amount from customer_targets where customer_targets.customer_id = orders.customer_id); -# } {100|200 -# 200|110} - -# Correlated subquery in HAVING clause with join -do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-having-with-join { - create table orders(id, customer_id, amount, region_id); - create table customer_thresholds(customer_id, min_amount); - create table regions(id, name); - insert into orders values (1, 100, 50, 1), (2, 100, 150, 1), (3, 200, 30, 2), (4, 200, 80, 2); - insert into customer_thresholds values (100, 100), (200, 150); - insert into regions values (1, 'East'), (2, 'West'); - - select o.customer_id, r.name as region, sum(o.amount) as total - from orders o join regions r on o.region_id = r.id - group by o.customer_id, r.name - having total > (select min_amount from customer_thresholds where customer_thresholds.customer_id = o.customer_id); -} {100|East|200} - -# Correlated IN-subquery in HAVING clause with join -# do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-subquery-in-having-with-join { -# create table orders(id, customer_id, amount, region_id); -# create table customer_thresholds(customer_id, min_amount); -# create table regions(id, name); -# insert into orders values (1, 100, 50, 1), (2, 100, 150, 1), (3, 200, 30, 2), (4, 200, 80, 2); -# insert into customer_thresholds values (100, 100), (200, 150); -# insert into regions values (1, 'East'), (2, 'West'); -# -# select o.customer_id, r.name as region, sum(o.amount) as total -# from orders o join regions r on o.region_id = r.id -# group by o.customer_id, r.name -# having total + 40 in (select min_amount from customer_thresholds where customer_thresholds.customer_id = o.customer_id); -# } {200|West|110} - # Uncorrelated subquery in LIMIT clause do_execsql_test_on_specific_db {:memory:} subquery-in-limit { create table items(id, name); diff --git a/tests/fuzz/mod.rs b/tests/fuzz/mod.rs index 8085ba588..212f53db5 100644 --- a/tests/fuzz/mod.rs +++ b/tests/fuzz/mod.rs @@ -4737,11 +4737,7 @@ mod fuzz_tests { } // Helper to generate a HAVING condition comparing an aggregate to a scalar subquery - fn gen_having_condition( - rng: &mut ChaCha8Rng, - main_table: &str, - allowed_outer_cols: Option<&[&str]>, - ) -> String { + fn gen_having_condition(rng: &mut ChaCha8Rng, main_table: &str) -> String { let (agg_func, agg_col) = match main_table { "t1" => [ ("SUM", "value1"), @@ -4767,7 +4763,7 @@ mod fuzz_tests { _ => ("COUNT", "*"), }; let op = [">", "<", ">=", "<=", "=", "<>"][rng.random_range(0..6)]; - let rhs = gen_scalar_subquery(rng, 0, Some(main_table), allowed_outer_cols); + let rhs = gen_scalar_subquery(rng, 0, Some(main_table), Some(&[])); if agg_col == "*" { format!("COUNT(*) {op} ({rhs})") } else { @@ -5039,17 +5035,6 @@ mod fuzz_tests { 6 => { // Aggregated query with GROUP BY and optional HAVING; allow subqueries in GROUP BY/HAVING let group_expr = gen_group_by_expr(&mut rng, main_table); - // Only GROUP BY columns may be referenced by correlated subqueries - let allowed_outer_cols: Vec<&str> = match group_expr.as_str() { - "id" => vec!["id"], - "value1" => vec!["value1"], - "value2" => vec!["value2"], - "ref_id" => vec!["ref_id"], - "data" => vec!["data"], - "category" => vec!["category"], - "amount" => vec!["amount"], - _ => Vec::new(), - }; let (agg_func, agg_col) = match main_table { "t1" => [ ("SUM", "value1"), @@ -5080,7 +5065,7 @@ mod fuzz_tests { if rng.random_bool(0.4) { q.push_str(&format!( " HAVING {}", - gen_having_condition(&mut rng, main_table, Some(&allowed_outer_cols)) + gen_having_condition(&mut rng, main_table) )); } q