From 29fe3b585a2d6c8ca98f0d4a1bf9bdcae111570c Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Wed, 29 Oct 2025 09:57:55 +0200 Subject: [PATCH] Add more tests and disable correlated IN-subqueries in HAVING position I discovered a flaw in our current translation that makes queries of type HAVING foo IN (SELECT ...) not work properly - in these cases we need to defer translation of the subquery until later. I will fix this in a future PR because I suspect it's not trivial. --- core/translate/group_by.rs | 18 +++++++- core/translate/plan.rs | 10 ++++ core/translate/subquery.rs | 20 +++++++- testing/subquery.test | 94 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 140 insertions(+), 2 deletions(-) diff --git a/core/translate/group_by.rs b/core/translate/group_by.rs index 5d7524d40..aa5e5dcdc 100644 --- a/core/translate/group_by.rs +++ b/core/translate/group_by.rs @@ -343,7 +343,23 @@ fn collect_result_columns<'a>( // unlike other expressions like function calls or direct column references, // so we must add it so that the subquery result gets collected to the GROUP BY // columns. - ast::Expr::SubqueryResult { .. } => { + // + // However, if the subquery is of the form: 'aggregate_result IN (SELECT...)', we need to skip it because the aggregation + // is done later. + ast::Expr::SubqueryResult { lhs, .. } => { + if let Some(ref lhs) = lhs { + let mut lhs_contains_agg = false; + walk_expr(lhs, &mut |expr: &ast::Expr| -> Result { + if plan.aggregates.iter().any(|a| a.original_expr == *expr) { + lhs_contains_agg = true; + return Ok(WalkControl::SkipChildren); + } + Ok(WalkControl::Continue) + })?; + if lhs_contains_agg { + return Ok(WalkControl::SkipChildren); + } + } result_columns.push(expr); } _ => { diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 653763d07..df8fb57c3 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -1366,6 +1366,16 @@ pub enum SubqueryState { Evaluated { evaluated_at: EvalAt }, } +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum SubqueryPosition { + ResultColumn, + Where, + GroupBy, + Having, + OrderBy, + LimitOffset, +} + #[derive(Debug, Clone)] /// A subquery that is not part of the `FROM` clause. /// This is used for subqueries in the WHERE clause, HAVING clause, ORDER BY clause, LIMIT clause, OFFSET clause, etc. diff --git a/core/translate/subquery.rs b/core/translate/subquery.rs index 48d1e7c04..f02eb9264 100644 --- a/core/translate/subquery.rs +++ b/core/translate/subquery.rs @@ -10,7 +10,10 @@ use crate::{ emitter::emit_program_for_select, expr::{unwrap_parens, walk_expr_mut, WalkControl}, optimizer::optimize_select_plan, - plan::{ColumnUsedMask, NonFromClauseSubquery, OuterQueryReference, Plan, SubqueryState}, + plan::{ + ColumnUsedMask, NonFromClauseSubquery, OuterQueryReference, Plan, SubqueryPosition, + SubqueryState, + }, select::prepare_select_plan, }, vdbe::{ @@ -48,6 +51,7 @@ pub fn plan_subqueries_from_select_plan( resolver, plan.where_clause.iter_mut().map(|t| &mut t.expr), connection, + SubqueryPosition::Where, )?; // GROUP BY @@ -59,6 +63,7 @@ pub fn plan_subqueries_from_select_plan( resolver, group_by.exprs.iter_mut(), connection, + SubqueryPosition::GroupBy, )?; if let Some(having) = group_by.having.as_mut() { plan_subqueries_with_outer_query_access( @@ -68,6 +73,7 @@ pub fn plan_subqueries_from_select_plan( resolver, having.iter_mut(), connection, + SubqueryPosition::Having, )?; } } @@ -80,6 +86,7 @@ pub fn plan_subqueries_from_select_plan( resolver, plan.result_columns.iter_mut().map(|c| &mut c.expr), connection, + SubqueryPosition::ResultColumn, )?; // ORDER BY @@ -90,6 +97,7 @@ pub fn plan_subqueries_from_select_plan( resolver, plan.order_by.iter_mut().map(|(expr, _)| &mut **expr), connection, + SubqueryPosition::OrderBy, )?; // LIMIT and OFFSET cannot reference columns from the outer query @@ -102,6 +110,7 @@ pub fn plan_subqueries_from_select_plan( resolver, connection, get_outer_query_refs, + SubqueryPosition::LimitOffset, ); // Limit if let Some(limit) = &mut plan.limit { @@ -128,6 +137,7 @@ fn plan_subqueries_with_outer_query_access<'a>( resolver: &Resolver, exprs: impl Iterator, connection: &Arc, + position: SubqueryPosition, ) -> Result<()> { // Most subqueries can reference columns from the outer query, // including nested cases where a subquery inside a subquery references columns from its parent's parent @@ -163,6 +173,7 @@ fn plan_subqueries_with_outer_query_access<'a>( resolver, connection, get_outer_query_refs, + position, ); for expr in exprs { walk_expr_mut(expr, &mut subquery_parser)?; @@ -179,6 +190,7 @@ fn get_subquery_parser<'a>( resolver: &'a Resolver, connection: &'a Arc, get_outer_query_refs: fn(&TableReferences) -> Vec, + position: SubqueryPosition, ) -> impl FnMut(&mut ast::Expr) -> Result + 'a { move |expr: &mut ast::Expr| -> Result { match expr { @@ -388,6 +400,12 @@ fn get_subquery_parser<'a>( let correlated = plan.is_correlated(); + if correlated && position == SubqueryPosition::Having { + crate::bail_parse_error!( + "correlated IN subqueries in HAVING clause are not supported yet" + ); + } + out_subqueries.push(NonFromClauseSubquery { internal_id: subquery_id, query_type: SubqueryType::In { cursor_id }, diff --git a/testing/subquery.test b/testing/subquery.test index 62e8ae757..261defd47 100644 --- a/testing/subquery.test +++ b/testing/subquery.test @@ -678,6 +678,30 @@ do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-result-column-w } {1|Alice|Carol|Sales 2|Bob|Dave|Engineering} +# Uncorrelated IN-subquery in result column +do_execsql_test_on_specific_db {:memory:} subquery-uncorrelated-in-result-column-in { + create table employees(id, name, dept_id); + create table special_depts(dept_id); + insert into employees values (1, 'Alice', 10), (2, 'Bob', 20), (3, 'Charlie', 30); + insert into special_depts values (10), (20); + + select id, name, dept_id in (select dept_id from special_depts) as is_special from employees; +} {1|Alice|1 +2|Bob|1 +3|Charlie|0} + +# Correlated IN-subquery in result column +do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-result-column-in { + create table employees(id, name, dept_id); + create table dept_awards(dept_id, employee_id); + insert into employees values (1, 'Alice', 10), (2, 'Bob', 20), (3, 'Charlie', 10); + insert into dept_awards values (10, 1), (20, 2); + + select id, name, id in (select employee_id from dept_awards where dept_id = employees.dept_id) as has_award from employees; +} {1|Alice|1 +2|Bob|1 +3|Charlie|0} + # Uncorrelated subquery in GROUP BY clause do_execsql_test_on_specific_db {:memory:} subquery-uncorrelated-in-group-by { create table sales(id, amount, region_id); @@ -762,6 +786,32 @@ do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-order-by-with-j 2|laptop|SupplierB 1|hat|SupplierA} +# Uncorrelated IN-subquery in ORDER BY clause +do_execsql_test_on_specific_db {:memory:} subquery-uncorrelated-in-order-by-in { + create table products(id, name, category_id); + create table priority_categories(category_id); + insert into products values (1, 'hat', 2), (2, 'laptop', 1), (3, 'book', 3); + insert into priority_categories values (1), (3); + + select id, name from products + order by category_id in (select category_id from priority_categories) desc, id; +} {2|laptop +3|book +1|hat} + +# Correlated IN-subquery in ORDER BY clause +do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-order-by-in { + create table products(id, name, category_id); + create table category_priorities(category_id, priority); + insert into products values (1, 'hat', 2), (2, 'laptop', 1), (3, 'book', 3); + insert into category_priorities values (1, 10), (2, 20), (3, 5); + + select id, name from products + order by category_id in (select category_id from category_priorities where priority > 8), id; +} {3|book +1|hat +2|laptop} + # Uncorrelated subquery in HAVING clause do_execsql_test_on_specific_db {:memory:} subquery-uncorrelated-in-having { create table orders(id, customer_id, amount); @@ -775,6 +825,20 @@ do_execsql_test_on_specific_db {:memory:} subquery-uncorrelated-in-having { having total > (select min_amount from vip_threshold); } {100|200} +# Uncorrelated IN-subquery in HAVING clause +do_execsql_test_on_specific_db {:memory:} subquery-uncorrelated-in-having-in { + create table orders(id, customer_id, amount); + create table target_totals(total_amount); + insert into orders values (1, 100, 50), (2, 100, 150), (3, 200, 30), (4, 300, 100); + insert into target_totals values (200), (100); + + select customer_id, sum(amount) as total + from orders + group by customer_id + having total in (select total_amount from target_totals); +} {100|200 +300|100} + # Correlated subquery in HAVING clause do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-having { create table orders(id, customer_id, amount); @@ -788,6 +852,21 @@ do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-having { having total > (select min_amount from customer_thresholds where customer_thresholds.customer_id = orders.customer_id); } {100|200} +# Correlated IN-subquery in HAVING clause +# FIXME: currently disabled +# do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-having-in { +# create table orders(id, customer_id, amount); +# create table customer_targets(customer_id, target_amount); +# insert into orders values (1, 100, 50), (2, 100, 150), (3, 200, 30), (4, 200, 80); +# insert into customer_targets values (100, 200), (100, 250), (200, 110); +# +# select customer_id, sum(amount) as total +# from orders +# group by customer_id +# having total in (select target_amount from customer_targets where customer_targets.customer_id = orders.customer_id); +# } {100|200 +# 200|110} + # Correlated subquery in HAVING clause with join do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-having-with-join { create table orders(id, customer_id, amount, region_id); @@ -803,6 +882,21 @@ do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-having-with-joi having total > (select min_amount from customer_thresholds where customer_thresholds.customer_id = o.customer_id); } {100|East|200} +# Correlated IN-subquery in HAVING clause with join +# do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-subquery-in-having-with-join { +# create table orders(id, customer_id, amount, region_id); +# create table customer_thresholds(customer_id, min_amount); +# create table regions(id, name); +# insert into orders values (1, 100, 50, 1), (2, 100, 150, 1), (3, 200, 30, 2), (4, 200, 80, 2); +# insert into customer_thresholds values (100, 100), (200, 150); +# insert into regions values (1, 'East'), (2, 'West'); +# +# select o.customer_id, r.name as region, sum(o.amount) as total +# from orders o join regions r on o.region_id = r.id +# group by o.customer_id, r.name +# having total + 40 in (select min_amount from customer_thresholds where customer_thresholds.customer_id = o.customer_id); +# } {200|West|110} + # Uncorrelated subquery in LIMIT clause do_execsql_test_on_specific_db {:memory:} subquery-in-limit { create table items(id, name);