Add more tests and disable correlated IN-subqueries in HAVING position

I discovered a flaw in our current translation that makes queries of type
HAVING foo IN (SELECT ...) not work properly - in these cases we need to
defer translation of the subquery until later.

I will fix this in a future PR because I suspect it's not trivial.
This commit is contained in:
Jussi Saurio
2025-10-29 09:57:55 +02:00
parent ec1eac2943
commit 29fe3b585a
4 changed files with 140 additions and 2 deletions

View File

@@ -343,7 +343,23 @@ fn collect_result_columns<'a>(
// unlike other expressions like function calls or direct column references,
// so we must add it so that the subquery result gets collected to the GROUP BY
// columns.
ast::Expr::SubqueryResult { .. } => {
//
// However, if the subquery is of the form: 'aggregate_result IN (SELECT...)', we need to skip it because the aggregation
// is done later.
ast::Expr::SubqueryResult { lhs, .. } => {
if let Some(ref lhs) = lhs {
let mut lhs_contains_agg = false;
walk_expr(lhs, &mut |expr: &ast::Expr| -> Result<WalkControl> {
if plan.aggregates.iter().any(|a| a.original_expr == *expr) {
lhs_contains_agg = true;
return Ok(WalkControl::SkipChildren);
}
Ok(WalkControl::Continue)
})?;
if lhs_contains_agg {
return Ok(WalkControl::SkipChildren);
}
}
result_columns.push(expr);
}
_ => {

View File

@@ -1366,6 +1366,16 @@ pub enum SubqueryState {
Evaluated { evaluated_at: EvalAt },
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SubqueryPosition {
ResultColumn,
Where,
GroupBy,
Having,
OrderBy,
LimitOffset,
}
#[derive(Debug, Clone)]
/// A subquery that is not part of the `FROM` clause.
/// This is used for subqueries in the WHERE clause, HAVING clause, ORDER BY clause, LIMIT clause, OFFSET clause, etc.

View File

@@ -10,7 +10,10 @@ use crate::{
emitter::emit_program_for_select,
expr::{unwrap_parens, walk_expr_mut, WalkControl},
optimizer::optimize_select_plan,
plan::{ColumnUsedMask, NonFromClauseSubquery, OuterQueryReference, Plan, SubqueryState},
plan::{
ColumnUsedMask, NonFromClauseSubquery, OuterQueryReference, Plan, SubqueryPosition,
SubqueryState,
},
select::prepare_select_plan,
},
vdbe::{
@@ -48,6 +51,7 @@ pub fn plan_subqueries_from_select_plan(
resolver,
plan.where_clause.iter_mut().map(|t| &mut t.expr),
connection,
SubqueryPosition::Where,
)?;
// GROUP BY
@@ -59,6 +63,7 @@ pub fn plan_subqueries_from_select_plan(
resolver,
group_by.exprs.iter_mut(),
connection,
SubqueryPosition::GroupBy,
)?;
if let Some(having) = group_by.having.as_mut() {
plan_subqueries_with_outer_query_access(
@@ -68,6 +73,7 @@ pub fn plan_subqueries_from_select_plan(
resolver,
having.iter_mut(),
connection,
SubqueryPosition::Having,
)?;
}
}
@@ -80,6 +86,7 @@ pub fn plan_subqueries_from_select_plan(
resolver,
plan.result_columns.iter_mut().map(|c| &mut c.expr),
connection,
SubqueryPosition::ResultColumn,
)?;
// ORDER BY
@@ -90,6 +97,7 @@ pub fn plan_subqueries_from_select_plan(
resolver,
plan.order_by.iter_mut().map(|(expr, _)| &mut **expr),
connection,
SubqueryPosition::OrderBy,
)?;
// LIMIT and OFFSET cannot reference columns from the outer query
@@ -102,6 +110,7 @@ pub fn plan_subqueries_from_select_plan(
resolver,
connection,
get_outer_query_refs,
SubqueryPosition::LimitOffset,
);
// Limit
if let Some(limit) = &mut plan.limit {
@@ -128,6 +137,7 @@ fn plan_subqueries_with_outer_query_access<'a>(
resolver: &Resolver,
exprs: impl Iterator<Item = &'a mut ast::Expr>,
connection: &Arc<Connection>,
position: SubqueryPosition,
) -> Result<()> {
// Most subqueries can reference columns from the outer query,
// including nested cases where a subquery inside a subquery references columns from its parent's parent
@@ -163,6 +173,7 @@ fn plan_subqueries_with_outer_query_access<'a>(
resolver,
connection,
get_outer_query_refs,
position,
);
for expr in exprs {
walk_expr_mut(expr, &mut subquery_parser)?;
@@ -179,6 +190,7 @@ fn get_subquery_parser<'a>(
resolver: &'a Resolver,
connection: &'a Arc<Connection>,
get_outer_query_refs: fn(&TableReferences) -> Vec<OuterQueryReference>,
position: SubqueryPosition,
) -> impl FnMut(&mut ast::Expr) -> Result<WalkControl> + 'a {
move |expr: &mut ast::Expr| -> Result<WalkControl> {
match expr {
@@ -388,6 +400,12 @@ fn get_subquery_parser<'a>(
let correlated = plan.is_correlated();
if correlated && position == SubqueryPosition::Having {
crate::bail_parse_error!(
"correlated IN subqueries in HAVING clause are not supported yet"
);
}
out_subqueries.push(NonFromClauseSubquery {
internal_id: subquery_id,
query_type: SubqueryType::In { cursor_id },

View File

@@ -678,6 +678,30 @@ do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-result-column-w
} {1|Alice|Carol|Sales
2|Bob|Dave|Engineering}
# Uncorrelated IN-subquery in result column
do_execsql_test_on_specific_db {:memory:} subquery-uncorrelated-in-result-column-in {
create table employees(id, name, dept_id);
create table special_depts(dept_id);
insert into employees values (1, 'Alice', 10), (2, 'Bob', 20), (3, 'Charlie', 30);
insert into special_depts values (10), (20);
select id, name, dept_id in (select dept_id from special_depts) as is_special from employees;
} {1|Alice|1
2|Bob|1
3|Charlie|0}
# Correlated IN-subquery in result column
do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-result-column-in {
create table employees(id, name, dept_id);
create table dept_awards(dept_id, employee_id);
insert into employees values (1, 'Alice', 10), (2, 'Bob', 20), (3, 'Charlie', 10);
insert into dept_awards values (10, 1), (20, 2);
select id, name, id in (select employee_id from dept_awards where dept_id = employees.dept_id) as has_award from employees;
} {1|Alice|1
2|Bob|1
3|Charlie|0}
# Uncorrelated subquery in GROUP BY clause
do_execsql_test_on_specific_db {:memory:} subquery-uncorrelated-in-group-by {
create table sales(id, amount, region_id);
@@ -762,6 +786,32 @@ do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-order-by-with-j
2|laptop|SupplierB
1|hat|SupplierA}
# Uncorrelated IN-subquery in ORDER BY clause
do_execsql_test_on_specific_db {:memory:} subquery-uncorrelated-in-order-by-in {
create table products(id, name, category_id);
create table priority_categories(category_id);
insert into products values (1, 'hat', 2), (2, 'laptop', 1), (3, 'book', 3);
insert into priority_categories values (1), (3);
select id, name from products
order by category_id in (select category_id from priority_categories) desc, id;
} {2|laptop
3|book
1|hat}
# Correlated IN-subquery in ORDER BY clause
do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-order-by-in {
create table products(id, name, category_id);
create table category_priorities(category_id, priority);
insert into products values (1, 'hat', 2), (2, 'laptop', 1), (3, 'book', 3);
insert into category_priorities values (1, 10), (2, 20), (3, 5);
select id, name from products
order by category_id in (select category_id from category_priorities where priority > 8), id;
} {3|book
1|hat
2|laptop}
# Uncorrelated subquery in HAVING clause
do_execsql_test_on_specific_db {:memory:} subquery-uncorrelated-in-having {
create table orders(id, customer_id, amount);
@@ -775,6 +825,20 @@ do_execsql_test_on_specific_db {:memory:} subquery-uncorrelated-in-having {
having total > (select min_amount from vip_threshold);
} {100|200}
# Uncorrelated IN-subquery in HAVING clause
do_execsql_test_on_specific_db {:memory:} subquery-uncorrelated-in-having-in {
create table orders(id, customer_id, amount);
create table target_totals(total_amount);
insert into orders values (1, 100, 50), (2, 100, 150), (3, 200, 30), (4, 300, 100);
insert into target_totals values (200), (100);
select customer_id, sum(amount) as total
from orders
group by customer_id
having total in (select total_amount from target_totals);
} {100|200
300|100}
# Correlated subquery in HAVING clause
do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-having {
create table orders(id, customer_id, amount);
@@ -788,6 +852,21 @@ do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-having {
having total > (select min_amount from customer_thresholds where customer_thresholds.customer_id = orders.customer_id);
} {100|200}
# Correlated IN-subquery in HAVING clause
# FIXME: currently disabled
# do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-having-in {
# create table orders(id, customer_id, amount);
# create table customer_targets(customer_id, target_amount);
# insert into orders values (1, 100, 50), (2, 100, 150), (3, 200, 30), (4, 200, 80);
# insert into customer_targets values (100, 200), (100, 250), (200, 110);
#
# select customer_id, sum(amount) as total
# from orders
# group by customer_id
# having total in (select target_amount from customer_targets where customer_targets.customer_id = orders.customer_id);
# } {100|200
# 200|110}
# Correlated subquery in HAVING clause with join
do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-having-with-join {
create table orders(id, customer_id, amount, region_id);
@@ -803,6 +882,21 @@ do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-having-with-joi
having total > (select min_amount from customer_thresholds where customer_thresholds.customer_id = o.customer_id);
} {100|East|200}
# Correlated IN-subquery in HAVING clause with join
# do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-subquery-in-having-with-join {
# create table orders(id, customer_id, amount, region_id);
# create table customer_thresholds(customer_id, min_amount);
# create table regions(id, name);
# insert into orders values (1, 100, 50, 1), (2, 100, 150, 1), (3, 200, 30, 2), (4, 200, 80, 2);
# insert into customer_thresholds values (100, 100), (200, 150);
# insert into regions values (1, 'East'), (2, 'West');
#
# select o.customer_id, r.name as region, sum(o.amount) as total
# from orders o join regions r on o.region_id = r.id
# group by o.customer_id, r.name
# having total + 40 in (select min_amount from customer_thresholds where customer_thresholds.customer_id = o.customer_id);
# } {200|West|110}
# Uncorrelated subquery in LIMIT clause
do_execsql_test_on_specific_db {:memory:} subquery-in-limit {
create table items(id, name);