Merge 'Support subqueries in all positions of a SELECT statement' from Jussi Saurio

Follow-up to #3847. Adds support for subqueries in all other positions of a SELECT (the result list, GROUP BY, ORDER BY, HAVING, LIMIT, OFFSET). Turns out I am a sql noob and didn't realize that correlated subqueries are supported in basically all positions except LIMIT/OFFSET, so added support for those too + accompanying TCL tests. Thankfully the abstractions introduced in #3847 carry over to this very well so the code change is relatively small (over half of the diff is tests and a lot of the remaining diff is just moving logic around). Reviewed-by: Preston Thorpe <preston@turso.tech> Closes #3852
2026-02-09 18:24:20 +01:00 · 2025-10-29 10:19:39 +02:00
parent fcb927ed24 29fe3b585a
commit 4bf8ad8cfd
9 changed files with 1080 additions and 383 deletions
--- a/COMPAT.md
+++ b/COMPAT.md
@@ -200,8 +200,9 @@ Feature support of [sqlite expr syntax](https://www.sqlite.org/lang_expr.html).
 | IS (NOT)                  | Yes     |                                          |
 | IS (NOT) DISTINCT FROM    | Yes     |                                          |
 | (NOT) BETWEEN ... AND ... | Yes     | Expression is rewritten in the optimizer |
-| (NOT) IN (subquery)       | No      |                                          |
-| (NOT) EXISTS (subquery)   | No      |                                          |
+| (NOT) IN (SELECT...)       | Yes      |                                          |
+| (NOT) EXISTS (SELECT...)   | Yes      |                                          |
+| x <operator> (SELECT...))   | Partial  | Only scalar subqueries supported, i.e. not (x,y) = (SELECT...)
 | CASE WHEN THEN ELSE END   | Yes     |                                          |
 | RAISE                     | No      |                                          |

--- a/core/translate/emitter.rs
+++ b/core/translate/emitter.rs
@@ -35,8 +35,9 @@ use crate::translate::fkeys::{
    emit_fk_delete_parent_existence_checks, emit_guarded_fk_decrement,
    emit_parent_key_change_checks, open_read_index, open_read_table, stabilize_new_row_for_fk,
 };
-use crate::translate::plan::{DeletePlan, JoinedTable, Plan, QueryDestination, Search};
+use crate::translate::plan::{DeletePlan, EvalAt, JoinedTable, Plan, QueryDestination, Search};
 use crate::translate::planner::ROWID_STRS;
+use crate::translate::subquery::emit_non_from_clause_subquery;
 use crate::translate::values::emit_values;
 use crate::translate::window::{emit_window_results, init_window, WindowMetadata};
 use crate::util::{exprs_are_equivalent, normalize_ident};
@@ -278,6 +279,27 @@ pub fn emit_query<'a>(
        return Ok(reg_result_cols_start);
    }

+    // Evaluate uncorrelated subqueries as early as possible, because even LIMIT can reference a subquery.
+    for subquery in plan
+        .non_from_clause_subqueries
+        .iter_mut()
+        .filter(|s| !s.has_been_evaluated())
+    {
+        let eval_at = subquery.get_eval_at(&plan.join_order)?;
+        if eval_at != EvalAt::BeforeLoop {
+            continue;
+        }
+        let plan = subquery.consume_plan(EvalAt::BeforeLoop);
+
+        emit_non_from_clause_subquery(
+            program,
+            t_ctx,
+            *plan,
+            &subquery.query_type,
+            subquery.correlated,
+        )?;
+    }
+
    // Emit FROM clause subqueries first so the results can be read in the main query loop.
    emit_from_clause_subqueries(program, t_ctx, &mut plan.table_references)?;

--- a/core/translate/group_by.rs
+++ b/core/translate/group_by.rs
@@ -321,6 +321,8 @@ fn collect_non_aggregate_expressions<'a>(
    Ok(())
 }

+/// Collects columns from different parts of a SELECT that are needed for
+/// GROUP BY.
 fn collect_result_columns<'a>(
    root_expr: &'a ast::Expr,
    plan: &SelectPlan,
@@ -337,6 +339,29 @@ fn collect_result_columns<'a>(
                    result_columns.push(expr);
                }
            }
+            // SubqueryResult is an exception because we can't "extract" columns from it
+            // unlike other expressions like function calls or direct column references,
+            // so we must add it so that the subquery result gets collected to the GROUP BY
+            // columns.
+            //
+            // However, if the subquery is of the form: 'aggregate_result IN (SELECT...)', we need to skip it because the aggregation
+            // is done later.
+            ast::Expr::SubqueryResult { lhs, .. } => {
+                if let Some(ref lhs) = lhs {
+                    let mut lhs_contains_agg = false;
+                    walk_expr(lhs, &mut |expr: &ast::Expr| -> Result<WalkControl> {
+                        if plan.aggregates.iter().any(|a| a.original_expr == *expr) {
+                            lhs_contains_agg = true;
+                            return Ok(WalkControl::SkipChildren);
+                        }
+                        Ok(WalkControl::Continue)
+                    })?;
+                    if lhs_contains_agg {
+                        return Ok(WalkControl::SkipChildren);
+                    }
+                }
+                result_columns.push(expr);
+            }
            _ => {
                if plan.aggregates.iter().any(|a| a.original_expr == *expr) {
                    return Ok(WalkControl::SkipChildren);
--- a/core/translate/main_loop.rs
+++ b/core/translate/main_loop.rs
@@ -418,22 +418,6 @@ pub fn init_loop(
        }
    }

-    for subquery in subqueries.iter_mut().filter(|s| !s.has_been_evaluated()) {
-        let eval_at = subquery.get_eval_at(join_order)?;
-        if eval_at != EvalAt::BeforeLoop {
-            continue;
-        }
-        let plan = subquery.consume_plan(EvalAt::BeforeLoop);
-
-        emit_non_from_clause_subquery(
-            program,
-            t_ctx,
-            *plan,
-            &subquery.query_type,
-            subquery.correlated,
-        )?;
-    }
-
    for cond in where_clause
        .iter()
        .filter(|c| c.should_eval_before_loop(join_order, subqueries))
--- a/core/translate/plan.rs
+++ b/core/translate/plan.rs
@@ -1390,6 +1390,16 @@ pub enum SubqueryState {
    Evaluated { evaluated_at: EvalAt },
 }

+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum SubqueryPosition {
+    ResultColumn,
+    Where,
+    GroupBy,
+    Having,
+    OrderBy,
+    LimitOffset,
+}
+
 #[derive(Debug, Clone)]
 /// A subquery that is not part of the `FROM` clause.
 /// This is used for subqueries in the WHERE clause, HAVING clause, ORDER BY clause, LIMIT clause, OFFSET clause, etc.
--- a/core/translate/select.rs
+++ b/core/translate/select.rs
@@ -13,7 +13,7 @@ use crate::translate::planner::{
    break_predicate_at_and_boundaries, parse_from, parse_limit, parse_where,
    resolve_window_and_aggregate_functions,
 };
-use crate::translate::subquery::plan_subqueries_from_where_clause;
+use crate::translate::subquery::plan_subqueries_from_select_plan;
 use crate::translate::window::plan_windows;
 use crate::util::normalize_ident;
 use crate::vdbe::builder::ProgramBuilderOpts;
@@ -504,24 +504,7 @@ fn prepare_one_select_plan(
                )?;
            }

-            // TODO: support subqueries in the following positions:
-            // - result column of a select, e.g.: SELECT x = (SELECT ...) FROM t
-            // - GROUP BY clause, e.g.: SELECT * FROM t GROUP BY (SELECT ...) <-- however nonsensical that might be...
-            // - HAVING clause, e.g.: SELECT * FROM t GROUP BY x HAVING x = (SELECT ...)
-            // - ORDER BY clause, e.g.: SELECT * FROM t ORDER BY x = (SELECT ...)
-            // - LIMIT clause, e.g.: SELECT * FROM t LIMIT (SELECT ...)
-            // - OFFSET clause, e.g.: SELECT * FROM t OFFSET (SELECT ...)
-            //
-            // in these positions, unlike in the WHERE clause, the subquery cannot reference columns from the outer query,
-            // so we don't need to collect outer query references.
-            plan_subqueries_from_where_clause(
-                program,
-                &mut plan.non_from_clause_subqueries,
-                &mut plan.table_references,
-                resolver,
-                &mut plan.where_clause,
-                connection,
-            )?;
+            plan_subqueries_from_select_plan(program, &mut plan, resolver, connection)?;

            // Return the unoptimized query plan
            Ok(plan)
--- a/core/translate/subquery.rs
+++ b/core/translate/subquery.rs
@@ -11,8 +11,8 @@ use crate::{
        expr::{unwrap_parens, walk_expr_mut, WalkControl},
        optimizer::optimize_select_plan,
        plan::{
-            ColumnUsedMask, NonFromClauseSubquery, OuterQueryReference, Plan, SubqueryState,
-            WhereTerm,
+            ColumnUsedMask, NonFromClauseSubquery, OuterQueryReference, Plan, SubqueryPosition,
+            SubqueryState,
        },
        select::prepare_select_plan,
    },
@@ -29,22 +29,117 @@ use super::{
    plan::{Operation, QueryDestination, Search, SelectPlan, TableReferences},
 };

-/// Compute query plans for subqueries in the WHERE clause.
+// Compute query plans for subqueries occurring in any position other than the FROM clause.
+// This includes the WHERE clause, HAVING clause, GROUP BY clause, ORDER BY clause, LIMIT clause, and OFFSET clause.
 /// The AST expression containing the subquery ([ast::Expr::Exists], [ast::Expr::Subquery], [ast::Expr::InSelect]) is replaced with a [ast::Expr::SubqueryResult] expression.
 /// The [ast::Expr::SubqueryResult] expression contains the subquery ID, the left-hand side expression (only applicable to IN subqueries), the NOT IN flag (only applicable to IN subqueries), and the subquery type.
 /// The computed plans are stored in the [NonFromClauseSubquery] structs on the [SelectPlan], and evaluated at the appropriate time during the translation of the main query.
 /// The appropriate time is determined by whether the subquery is correlated or uncorrelated;
 /// if it is uncorrelated, it can be evaluated as early as possible, but if it is correlated, it must be evaluated after all of its dependencies from the
 /// outer query are 'in scope', i.e. their cursors are open and rewound.
-pub fn plan_subqueries_from_where_clause(
+pub fn plan_subqueries_from_select_plan(
+    program: &mut ProgramBuilder,
+    plan: &mut SelectPlan,
+    resolver: &Resolver,
+    connection: &Arc<Connection>,
+) -> Result<()> {
+    // WHERE
+    plan_subqueries_with_outer_query_access(
+        program,
+        &mut plan.non_from_clause_subqueries,
+        &mut plan.table_references,
+        resolver,
+        plan.where_clause.iter_mut().map(|t| &mut t.expr),
+        connection,
+        SubqueryPosition::Where,
+    )?;
+
+    // GROUP BY
+    if let Some(group_by) = &mut plan.group_by {
+        plan_subqueries_with_outer_query_access(
+            program,
+            &mut plan.non_from_clause_subqueries,
+            &mut plan.table_references,
+            resolver,
+            group_by.exprs.iter_mut(),
+            connection,
+            SubqueryPosition::GroupBy,
+        )?;
+        if let Some(having) = group_by.having.as_mut() {
+            plan_subqueries_with_outer_query_access(
+                program,
+                &mut plan.non_from_clause_subqueries,
+                &mut plan.table_references,
+                resolver,
+                having.iter_mut(),
+                connection,
+                SubqueryPosition::Having,
+            )?;
+        }
+    }
+
+    // Result columns
+    plan_subqueries_with_outer_query_access(
+        program,
+        &mut plan.non_from_clause_subqueries,
+        &mut plan.table_references,
+        resolver,
+        plan.result_columns.iter_mut().map(|c| &mut c.expr),
+        connection,
+        SubqueryPosition::ResultColumn,
+    )?;
+
+    // ORDER BY
+    plan_subqueries_with_outer_query_access(
+        program,
+        &mut plan.non_from_clause_subqueries,
+        &mut plan.table_references,
+        resolver,
+        plan.order_by.iter_mut().map(|(expr, _)| &mut **expr),
+        connection,
+        SubqueryPosition::OrderBy,
+    )?;
+
+    // LIMIT and OFFSET cannot reference columns from the outer query
+    let get_outer_query_refs = |_: &TableReferences| vec![];
+    {
+        let mut subquery_parser = get_subquery_parser(
+            program,
+            &mut plan.non_from_clause_subqueries,
+            &mut plan.table_references,
+            resolver,
+            connection,
+            get_outer_query_refs,
+            SubqueryPosition::LimitOffset,
+        );
+        // Limit
+        if let Some(limit) = &mut plan.limit {
+            walk_expr_mut(limit, &mut subquery_parser)?;
+        }
+        // Offset
+        if let Some(offset) = &mut plan.offset {
+            walk_expr_mut(offset, &mut subquery_parser)?;
+        }
+    }
+
+    update_column_used_masks(
+        &mut plan.table_references,
+        &mut plan.non_from_clause_subqueries,
+    );
+    Ok(())
+}
+
+/// Compute query plans for subqueries in the WHERE clause and HAVING clause (both of which have access to the outer query scope)
+fn plan_subqueries_with_outer_query_access<'a>(
    program: &mut ProgramBuilder,
    out_subqueries: &mut Vec<NonFromClauseSubquery>,
    referenced_tables: &mut TableReferences,
    resolver: &Resolver,
-    where_terms: &mut [WhereTerm],
+    exprs: impl Iterator<Item = &'a mut ast::Expr>,
    connection: &Arc<Connection>,
+    position: SubqueryPosition,
 ) -> Result<()> {
-    // A WHERE clause subquery can reference columns from the outer query,
+    // Most subqueries can reference columns from the outer query,
    // including nested cases where a subquery inside a subquery references columns from its parent's parent
    // and so on.
    let get_outer_query_refs = |referenced_tables: &TableReferences| {
@@ -71,241 +166,261 @@ pub fn plan_subqueries_from_where_clause(
            .collect::<Vec<_>>()
    };

-    // Walk the WHERE clause and replace subqueries with [ast::Expr::SubqueryResult] expressions.
-    for where_term in where_terms.iter_mut() {
-        walk_expr_mut(
-            &mut where_term.expr,
-            &mut |expr: &mut ast::Expr| -> Result<WalkControl> {
-                match expr {
-                    ast::Expr::Exists(_) => {
-                        let subquery_id = program.table_reference_counter.next();
-                        let outer_query_refs = get_outer_query_refs(referenced_tables);
-
-                        let result_reg = program.alloc_register();
-                        let subquery_type = SubqueryType::Exists { result_reg };
-                        let result_expr = ast::Expr::SubqueryResult {
-                            subquery_id,
-                            lhs: None,
-                            not_in: false,
-                            query_type: subquery_type.clone(),
-                        };
-                        let ast::Expr::Exists(subselect) = std::mem::replace(expr, result_expr)
-                        else {
-                            unreachable!();
-                        };
-
-                        let plan = prepare_select_plan(
-                            subselect,
-                            resolver,
-                            program,
-                            &outer_query_refs,
-                            QueryDestination::ExistsSubqueryResult { result_reg },
-                            connection,
-                        )?;
-                        let Plan::Select(mut plan) = plan else {
-                            crate::bail_parse_error!(
-                            "compound SELECT queries not supported yet in WHERE clause subqueries"
-                        );
-                        };
-                        optimize_select_plan(&mut plan, resolver.schema)?;
-                        // EXISTS subqueries are satisfied after at most 1 row has been returned.
-                        plan.limit = Some(Box::new(ast::Expr::Literal(ast::Literal::Numeric(
-                            "1".to_string(),
-                        ))));
-                        let correlated = plan.is_correlated();
-                        out_subqueries.push(NonFromClauseSubquery {
-                            internal_id: subquery_id,
-                            query_type: subquery_type,
-                            state: SubqueryState::Unevaluated {
-                                plan: Some(Box::new(plan)),
-                            },
-                            correlated,
-                        });
-                        Ok(WalkControl::Continue)
-                    }
-                    ast::Expr::Subquery(_) => {
-                        let subquery_id = program.table_reference_counter.next();
-                        let outer_query_refs = get_outer_query_refs(referenced_tables);
-
-                        let result_expr = ast::Expr::SubqueryResult {
-                            subquery_id,
-                            lhs: None,
-                            not_in: false,
-                            // Placeholder values because the number of columns returned is not known until the plan is prepared.
-                            // These are replaced below after planning.
-                            query_type: SubqueryType::RowValue {
-                                result_reg_start: 0,
-                                num_regs: 0,
-                            },
-                        };
-                        let ast::Expr::Subquery(subselect) = std::mem::replace(expr, result_expr)
-                        else {
-                            unreachable!();
-                        };
-                        let plan = prepare_select_plan(
-                            subselect,
-                            resolver,
-                            program,
-                            &outer_query_refs,
-                            QueryDestination::Unset,
-                            connection,
-                        )?;
-                        let Plan::Select(mut plan) = plan else {
-                            crate::bail_parse_error!(
-                            "compound SELECT queries not supported yet in WHERE clause subqueries"
-                        );
-                        };
-                        optimize_select_plan(&mut plan, resolver.schema)?;
-                        let reg_count = plan.result_columns.len();
-                        let reg_start = program.alloc_registers(reg_count);
-
-                        plan.query_destination = QueryDestination::RowValueSubqueryResult {
-                            result_reg_start: reg_start,
-                            num_regs: reg_count,
-                        };
-                        // RowValue subqueries are satisfied after at most 1 row has been returned,
-                        // as they are used in comparisons with a scalar or a tuple of scalars like (x,y) = (SELECT ...) or x = (SELECT ...).
-                        plan.limit = Some(Box::new(ast::Expr::Literal(ast::Literal::Numeric(
-                            "1".to_string(),
-                        ))));
-
-                        let ast::Expr::SubqueryResult {
-                            subquery_id,
-                            lhs: None,
-                            not_in: false,
-                            query_type:
-                                SubqueryType::RowValue {
-                                    result_reg_start,
-                                    num_regs,
-                                },
-                        } = &mut *expr
-                        else {
-                            unreachable!();
-                        };
-                        *result_reg_start = reg_start;
-                        *num_regs = reg_count;
-
-                        let correlated = plan.is_correlated();
-
-                        out_subqueries.push(NonFromClauseSubquery {
-                            internal_id: *subquery_id,
-                            query_type: SubqueryType::RowValue {
-                                result_reg_start: reg_start,
-                                num_regs: reg_count,
-                            },
-                            state: SubqueryState::Unevaluated {
-                                plan: Some(Box::new(plan)),
-                            },
-                            correlated,
-                        });
-                        Ok(WalkControl::Continue)
-                    }
-                    ast::Expr::InSelect { .. } => {
-                        let subquery_id = program.table_reference_counter.next();
-                        let outer_query_refs = get_outer_query_refs(referenced_tables);
-
-                        let ast::Expr::InSelect { lhs, not, rhs } =
-                            std::mem::replace(expr, ast::Expr::Literal(ast::Literal::Null))
-                        else {
-                            unreachable!();
-                        };
-                        let plan = prepare_select_plan(
-                            rhs,
-                            resolver,
-                            program,
-                            &outer_query_refs,
-                            QueryDestination::Unset,
-                            connection,
-                        )?;
-                        let Plan::Select(mut plan) = plan else {
-                            crate::bail_parse_error!(
-                                "compound SELECT queries not supported yet in WHERE clause subqueries"
-                            );
-                        };
-                        optimize_select_plan(&mut plan, resolver.schema)?;
-                        // e.g. (x,y) IN (SELECT ...)
-                        // or x IN (SELECT ...)
-                        let lhs_column_count = match unwrap_parens(lhs.as_ref())? {
-                            ast::Expr::Parenthesized(exprs) => exprs.len(),
-                            _ => 1,
-                        };
-                        if lhs_column_count != plan.result_columns.len() {
-                            crate::bail_parse_error!(
-                                "lhs of IN subquery must have the same number of columns as the subquery"
-                            );
-                        }
-
-                        let mut columns = plan
-                            .result_columns
-                            .iter()
-                            .enumerate()
-                            .map(|(i, c)| IndexColumn {
-                                name: c.name(&plan.table_references).unwrap_or("").to_string(),
-                                order: SortOrder::Asc,
-                                pos_in_table: i,
-                                collation: None,
-                                default: None,
-                            })
-                            .collect::<Vec<_>>();
-
-                        for (i, column) in columns.iter_mut().enumerate() {
-                            column.collation = get_collseq_from_expr(
-                                &plan.result_columns[i].expr,
-                                &plan.table_references,
-                            )?;
-                        }
-
-                        let ephemeral_index = Arc::new(Index {
-                            columns,
-                            name: format!("ephemeral_index_where_sub_{subquery_id}"),
-                            table_name: String::new(),
-                            ephemeral: true,
-                            has_rowid: false,
-                            root_page: 0,
-                            unique: false,
-                            where_clause: None,
-                            index_method: None,
-                        });
-
-                        let cursor_id = program
-                            .alloc_cursor_id(CursorType::BTreeIndex(ephemeral_index.clone()));
-
-                        plan.query_destination = QueryDestination::EphemeralIndex {
-                            cursor_id,
-                            index: ephemeral_index.clone(),
-                            is_delete: false,
-                        };
-
-                        *expr = ast::Expr::SubqueryResult {
-                            subquery_id,
-                            lhs: Some(lhs),
-                            not_in: not,
-                            query_type: SubqueryType::In { cursor_id },
-                        };
-
-                        let correlated = plan.is_correlated();
-
-                        out_subqueries.push(NonFromClauseSubquery {
-                            internal_id: subquery_id,
-                            query_type: SubqueryType::In { cursor_id },
-                            state: SubqueryState::Unevaluated {
-                                plan: Some(Box::new(plan)),
-                            },
-                            correlated,
-                        });
-                        Ok(WalkControl::Continue)
-                    }
-                    _ => Ok(WalkControl::Continue),
-                }
-            },
-        )?;
+    let mut subquery_parser = get_subquery_parser(
+        program,
+        out_subqueries,
+        referenced_tables,
+        resolver,
+        connection,
+        get_outer_query_refs,
+        position,
+    );
+    for expr in exprs {
+        walk_expr_mut(expr, &mut subquery_parser)?;
    }

-    update_column_used_masks(referenced_tables, out_subqueries);
-
    Ok(())
 }

+/// Create a closure that will walk the AST and replace subqueries with [ast::Expr::SubqueryResult] expressions.
+fn get_subquery_parser<'a>(
+    program: &'a mut ProgramBuilder,
+    out_subqueries: &'a mut Vec<NonFromClauseSubquery>,
+    referenced_tables: &'a mut TableReferences,
+    resolver: &'a Resolver,
+    connection: &'a Arc<Connection>,
+    get_outer_query_refs: fn(&TableReferences) -> Vec<OuterQueryReference>,
+    position: SubqueryPosition,
+) -> impl FnMut(&mut ast::Expr) -> Result<WalkControl> + 'a {
+    move |expr: &mut ast::Expr| -> Result<WalkControl> {
+        match expr {
+            ast::Expr::Exists(_) => {
+                let subquery_id = program.table_reference_counter.next();
+                let outer_query_refs = get_outer_query_refs(referenced_tables);
+
+                let result_reg = program.alloc_register();
+                let subquery_type = SubqueryType::Exists { result_reg };
+                let result_expr = ast::Expr::SubqueryResult {
+                    subquery_id,
+                    lhs: None,
+                    not_in: false,
+                    query_type: subquery_type.clone(),
+                };
+                let ast::Expr::Exists(subselect) = std::mem::replace(expr, result_expr) else {
+                    unreachable!();
+                };
+
+                let plan = prepare_select_plan(
+                    subselect,
+                    resolver,
+                    program,
+                    &outer_query_refs,
+                    QueryDestination::ExistsSubqueryResult { result_reg },
+                    connection,
+                )?;
+                let Plan::Select(mut plan) = plan else {
+                    crate::bail_parse_error!(
+                        "compound SELECT queries not supported yet in WHERE clause subqueries"
+                    );
+                };
+                optimize_select_plan(&mut plan, resolver.schema)?;
+                // EXISTS subqueries are satisfied after at most 1 row has been returned.
+                plan.limit = Some(Box::new(ast::Expr::Literal(ast::Literal::Numeric(
+                    "1".to_string(),
+                ))));
+                let correlated = plan.is_correlated();
+                out_subqueries.push(NonFromClauseSubquery {
+                    internal_id: subquery_id,
+                    query_type: subquery_type,
+                    state: SubqueryState::Unevaluated {
+                        plan: Some(Box::new(plan)),
+                    },
+                    correlated,
+                });
+                Ok(WalkControl::Continue)
+            }
+            ast::Expr::Subquery(_) => {
+                let subquery_id = program.table_reference_counter.next();
+                let outer_query_refs = get_outer_query_refs(referenced_tables);
+
+                let result_expr = ast::Expr::SubqueryResult {
+                    subquery_id,
+                    lhs: None,
+                    not_in: false,
+                    // Placeholder values because the number of columns returned is not known until the plan is prepared.
+                    // These are replaced below after planning.
+                    query_type: SubqueryType::RowValue {
+                        result_reg_start: 0,
+                        num_regs: 0,
+                    },
+                };
+                let ast::Expr::Subquery(subselect) = std::mem::replace(expr, result_expr) else {
+                    unreachable!();
+                };
+                let plan = prepare_select_plan(
+                    subselect,
+                    resolver,
+                    program,
+                    &outer_query_refs,
+                    QueryDestination::Unset,
+                    connection,
+                )?;
+                let Plan::Select(mut plan) = plan else {
+                    crate::bail_parse_error!(
+                        "compound SELECT queries not supported yet in WHERE clause subqueries"
+                    );
+                };
+                optimize_select_plan(&mut plan, resolver.schema)?;
+                let reg_count = plan.result_columns.len();
+                let reg_start = program.alloc_registers(reg_count);
+
+                plan.query_destination = QueryDestination::RowValueSubqueryResult {
+                    result_reg_start: reg_start,
+                    num_regs: reg_count,
+                };
+                // RowValue subqueries are satisfied after at most 1 row has been returned,
+                // as they are used in comparisons with a scalar or a tuple of scalars like (x,y) = (SELECT ...) or x = (SELECT ...).
+                plan.limit = Some(Box::new(ast::Expr::Literal(ast::Literal::Numeric(
+                    "1".to_string(),
+                ))));
+
+                let ast::Expr::SubqueryResult {
+                    subquery_id,
+                    lhs: None,
+                    not_in: false,
+                    query_type:
+                        SubqueryType::RowValue {
+                            result_reg_start,
+                            num_regs,
+                        },
+                } = &mut *expr
+                else {
+                    unreachable!();
+                };
+                *result_reg_start = reg_start;
+                *num_regs = reg_count;
+
+                let correlated = plan.is_correlated();
+
+                out_subqueries.push(NonFromClauseSubquery {
+                    internal_id: *subquery_id,
+                    query_type: SubqueryType::RowValue {
+                        result_reg_start: reg_start,
+                        num_regs: reg_count,
+                    },
+                    state: SubqueryState::Unevaluated {
+                        plan: Some(Box::new(plan)),
+                    },
+                    correlated,
+                });
+                Ok(WalkControl::Continue)
+            }
+            ast::Expr::InSelect { .. } => {
+                let subquery_id = program.table_reference_counter.next();
+                let outer_query_refs = get_outer_query_refs(referenced_tables);
+
+                let ast::Expr::InSelect { lhs, not, rhs } =
+                    std::mem::replace(expr, ast::Expr::Literal(ast::Literal::Null))
+                else {
+                    unreachable!();
+                };
+                let plan = prepare_select_plan(
+                    rhs,
+                    resolver,
+                    program,
+                    &outer_query_refs,
+                    QueryDestination::Unset,
+                    connection,
+                )?;
+                let Plan::Select(mut plan) = plan else {
+                    crate::bail_parse_error!(
+                        "compound SELECT queries not supported yet in WHERE clause subqueries"
+                    );
+                };
+                optimize_select_plan(&mut plan, resolver.schema)?;
+                // e.g. (x,y) IN (SELECT ...)
+                // or x IN (SELECT ...)
+                let lhs_column_count = match unwrap_parens(lhs.as_ref())? {
+                    ast::Expr::Parenthesized(exprs) => exprs.len(),
+                    _ => 1,
+                };
+                if lhs_column_count != plan.result_columns.len() {
+                    crate::bail_parse_error!(
+                        "lhs of IN subquery must have the same number of columns as the subquery"
+                    );
+                }
+
+                let mut columns = plan
+                    .result_columns
+                    .iter()
+                    .enumerate()
+                    .map(|(i, c)| IndexColumn {
+                        name: c.name(&plan.table_references).unwrap_or("").to_string(),
+                        order: SortOrder::Asc,
+                        pos_in_table: i,
+                        collation: None,
+                        default: None,
+                    })
+                    .collect::<Vec<_>>();
+
+                for (i, column) in columns.iter_mut().enumerate() {
+                    column.collation = get_collseq_from_expr(
+                        &plan.result_columns[i].expr,
+                        &plan.table_references,
+                    )?;
+                }
+
+                let ephemeral_index = Arc::new(Index {
+                    columns,
+                    name: format!("ephemeral_index_where_sub_{subquery_id}"),
+                    table_name: String::new(),
+                    ephemeral: true,
+                    has_rowid: false,
+                    root_page: 0,
+                    unique: false,
+                    where_clause: None,
+                    index_method: None,
+                });
+
+                let cursor_id =
+                    program.alloc_cursor_id(CursorType::BTreeIndex(ephemeral_index.clone()));
+
+                plan.query_destination = QueryDestination::EphemeralIndex {
+                    cursor_id,
+                    index: ephemeral_index.clone(),
+                    is_delete: false,
+                };
+
+                *expr = ast::Expr::SubqueryResult {
+                    subquery_id,
+                    lhs: Some(lhs),
+                    not_in: not,
+                    query_type: SubqueryType::In { cursor_id },
+                };
+
+                let correlated = plan.is_correlated();
+
+                if correlated && position == SubqueryPosition::Having {
+                    crate::bail_parse_error!(
+                        "correlated IN subqueries in HAVING clause are not supported yet"
+                    );
+                }
+
+                out_subqueries.push(NonFromClauseSubquery {
+                    internal_id: subquery_id,
+                    query_type: SubqueryType::In { cursor_id },
+                    state: SubqueryState::Unevaluated {
+                        plan: Some(Box::new(plan)),
+                    },
+                    correlated,
+                });
+                Ok(WalkControl::Continue)
+            }
+            _ => Ok(WalkControl::Continue),
+        }
+    }
+}
+
 /// We make decisions about when to evaluate expressions or whether to use covering indexes based on
 /// which columns of a table have been referenced.
 /// Since subquery nesting is arbitrarily deep, a reference to a column must propagate recursively
--- a/testing/subquery.test
+++ b/testing/subquery.test
@@ -638,4 +638,283 @@ do_execsql_test_on_specific_db {:memory:} subquery-not-in-multiple-columns {
        select name, age from restricted_profiles
    );
 } {1|Alice|25
-3|Charlie|25}
+3|Charlie|25}
+
+# SUBQUERIES IN OTHER POSITIONS (result columns, GROUP BY, ORDER BY, HAVING, LIMIT, OFFSET)
+
+# Uncorrelated subquery in result column
+do_execsql_test_on_specific_db {:memory:} subquery-uncorrelated-in-result-column {
+    create table employees(id, name, dept_id);
+    create table company_info(total_depts);
+    insert into employees values (1, 'Alice', 10), (2, 'Bob', 20);
+    insert into company_info values (5);
+
+    select id, name, (select total_depts from company_info) as total_depts from employees;
+} {1|Alice|5
+2|Bob|5}
+
+# Correlated subquery in result column
+do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-result-column {
+    create table employees(id, name, dept_id);
+    create table departments(id, name);
+    insert into employees values (1, 'Alice', 10), (2, 'Bob', 20);
+    insert into departments values (10, 'Sales'), (20, 'Engineering');
+
+    select id, name, (select name from departments where id = dept_id) as dept_name from employees;
+} {1|Alice|Sales
+2|Bob|Engineering}
+
+# Correlated subquery in result column with join
+do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-result-column-with-join {
+    create table employees(id, name, dept_id, manager_id);
+    create table departments(id, name);
+    create table managers(id, name);
+    insert into employees values (1, 'Alice', 10, 100), (2, 'Bob', 20, 200);
+    insert into departments values (10, 'Sales'), (20, 'Engineering');
+    insert into managers values (100, 'Carol'), (200, 'Dave');
+
+    select e.id, e.name, m.name as manager, (select name from departments where id = e.dept_id) as dept_name 
+    from employees e join managers m on e.manager_id = m.id;
+} {1|Alice|Carol|Sales
+2|Bob|Dave|Engineering}
+
+# Uncorrelated IN-subquery in result column
+do_execsql_test_on_specific_db {:memory:} subquery-uncorrelated-in-result-column-in {
+    create table employees(id, name, dept_id);
+    create table special_depts(dept_id);
+    insert into employees values (1, 'Alice', 10), (2, 'Bob', 20), (3, 'Charlie', 30);
+    insert into special_depts values (10), (20);
+
+    select id, name, dept_id in (select dept_id from special_depts) as is_special from employees;
+} {1|Alice|1
+2|Bob|1
+3|Charlie|0}
+
+# Correlated IN-subquery in result column
+do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-result-column-in {
+    create table employees(id, name, dept_id);
+    create table dept_awards(dept_id, employee_id);
+    insert into employees values (1, 'Alice', 10), (2, 'Bob', 20), (3, 'Charlie', 10);
+    insert into dept_awards values (10, 1), (20, 2);
+
+    select id, name, id in (select employee_id from dept_awards where dept_id = employees.dept_id) as has_award from employees;
+} {1|Alice|1
+2|Bob|1
+3|Charlie|0}
+
+# Uncorrelated subquery in GROUP BY clause
+do_execsql_test_on_specific_db {:memory:} subquery-uncorrelated-in-group-by {
+    create table sales(id, amount, region_id);
+    create table grouping_config(group_column);
+    insert into sales values (1, 100, 1), (2, 200, 1), (3, 150, 2);
+    insert into grouping_config values ('region_id');
+
+    select region_id, sum(amount) 
+    from sales 
+    group by (select case when group_column = 'region_id' then region_id else amount end from grouping_config);
+} {1|300
+2|150}
+
+# Correlated subquery in GROUP BY clause
+do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-group-by {
+    create table sales(id, amount, region_id);
+    create table regions(id, name);
+    insert into sales values (1, 100, 1), (2, 200, 1), (3, 150, 2);
+    insert into regions values (1, 'North'), (2, 'South');
+
+    select (select name from regions where id = region_id) as region, sum(amount) 
+    from sales 
+    group by (select name from regions where id = region_id);
+} {North|300
+South|150}
+
+# Correlated subquery in GROUP BY clause with join
+do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-group-by-with-join {
+    create table sales(id, amount, region_id, salesperson_id);
+    create table regions(id, name);
+    create table salespeople(id, name);
+    insert into sales values (1, 100, 1, 10), (2, 200, 1, 20), (3, 150, 2, 10);
+    insert into regions values (1, 'North'), (2, 'South');
+    insert into salespeople values (10, 'Alice'), (20, 'Bob');
+
+    select (select name from regions where id = s.region_id) as region, sp.name as salesperson, sum(amount) 
+    from sales s join salespeople sp on s.salesperson_id = sp.id
+    group by (select name from regions where id = s.region_id), sp.name;
+} {North|Alice|100
+North|Bob|200
+South|Alice|150}
+
+# Uncorrelated subquery in ORDER BY clause
+do_execsql_test_on_specific_db {:memory:} subquery-uncorrelated-in-order-by {
+    create table products(id, name, category_id);
+    create table sort_config(sort_order);
+    insert into products values (1, 'hat', 2), (2, 'laptop', 1), (3, 'book', 3);
+    insert into sort_config values ('asc');
+
+    select id, name from products 
+    order by (select case when sort_order = 'asc' then id else -id end from sort_config);
+} {1|hat
+2|laptop
+3|book}
+
+# Correlated subquery in ORDER BY clause
+do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-order-by {
+    create table products(id, name, category_id);
+    create table categories(id, priority);
+    insert into products values (1, 'hat', 2), (2, 'laptop', 1), (3, 'book', 3);
+    insert into categories values (1, 10), (2, 20), (3, 5);
+
+    select id, name from products 
+    order by (select priority from categories where id = category_id);
+} {3|book
+2|laptop
+1|hat}
+
+# Correlated subquery in ORDER BY clause with join
+do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-order-by-with-join {
+    create table products(id, name, category_id, supplier_id);
+    create table categories(id, priority);
+    create table suppliers(id, name);
+    insert into products values (1, 'hat', 2, 100), (2, 'laptop', 1, 200), (3, 'book', 3, 100);
+    insert into categories values (1, 10), (2, 20), (3, 5);
+    insert into suppliers values (100, 'SupplierA'), (200, 'SupplierB');
+
+    select p.id, p.name, s.name as supplier
+    from products p join suppliers s on p.supplier_id = s.id
+    order by (select priority from categories where id = p.category_id);
+} {3|book|SupplierA
+2|laptop|SupplierB
+1|hat|SupplierA}
+
+# Uncorrelated IN-subquery in ORDER BY clause
+do_execsql_test_on_specific_db {:memory:} subquery-uncorrelated-in-order-by-in {
+    create table products(id, name, category_id);
+    create table priority_categories(category_id);
+    insert into products values (1, 'hat', 2), (2, 'laptop', 1), (3, 'book', 3);
+    insert into priority_categories values (1), (3);
+
+    select id, name from products 
+    order by category_id in (select category_id from priority_categories) desc, id;
+} {2|laptop
+3|book
+1|hat}
+
+# Correlated IN-subquery in ORDER BY clause
+do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-order-by-in {
+    create table products(id, name, category_id);
+    create table category_priorities(category_id, priority);
+    insert into products values (1, 'hat', 2), (2, 'laptop', 1), (3, 'book', 3);
+    insert into category_priorities values (1, 10), (2, 20), (3, 5);
+
+    select id, name from products 
+    order by category_id in (select category_id from category_priorities where priority > 8), id;
+} {3|book
+1|hat
+2|laptop}
+
+# Uncorrelated subquery in HAVING clause
+do_execsql_test_on_specific_db {:memory:} subquery-uncorrelated-in-having {
+    create table orders(id, customer_id, amount);
+    create table vip_threshold(min_amount);
+    insert into orders values (1, 100, 50), (2, 100, 150), (3, 200, 30);
+    insert into vip_threshold values (100);
+
+    select customer_id, sum(amount) as total 
+    from orders 
+    group by customer_id 
+    having total > (select min_amount from vip_threshold);
+} {100|200}
+
+# Uncorrelated IN-subquery in HAVING clause
+do_execsql_test_on_specific_db {:memory:} subquery-uncorrelated-in-having-in {
+    create table orders(id, customer_id, amount);
+    create table target_totals(total_amount);
+    insert into orders values (1, 100, 50), (2, 100, 150), (3, 200, 30), (4, 300, 100);
+    insert into target_totals values (200), (100);
+
+    select customer_id, sum(amount) as total 
+    from orders 
+    group by customer_id 
+    having total in (select total_amount from target_totals);
+} {100|200
+300|100}
+
+# Correlated subquery in HAVING clause
+do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-having {
+    create table orders(id, customer_id, amount);
+    create table customer_thresholds(customer_id, min_amount);
+    insert into orders values (1, 100, 50), (2, 100, 150), (3, 200, 30), (4, 200, 80);
+    insert into customer_thresholds values (100, 100), (200, 150);
+
+    select customer_id, sum(amount) as total 
+    from orders 
+    group by customer_id 
+    having total > (select min_amount from customer_thresholds where customer_thresholds.customer_id = orders.customer_id);
+} {100|200}
+
+# Correlated IN-subquery in HAVING clause
+# FIXME: currently disabled
+# do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-having-in {
+#     create table orders(id, customer_id, amount);
+#     create table customer_targets(customer_id, target_amount);
+#     insert into orders values (1, 100, 50), (2, 100, 150), (3, 200, 30), (4, 200, 80);
+#     insert into customer_targets values (100, 200), (100, 250), (200, 110);
+# 
+#     select customer_id, sum(amount) as total 
+#     from orders 
+#     group by customer_id 
+#     having total in (select target_amount from customer_targets where customer_targets.customer_id = orders.customer_id);
+# } {100|200
+# 200|110}
+
+# Correlated subquery in HAVING clause with join
+do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-having-with-join {
+    create table orders(id, customer_id, amount, region_id);
+    create table customer_thresholds(customer_id, min_amount);
+    create table regions(id, name);
+    insert into orders values (1, 100, 50, 1), (2, 100, 150, 1), (3, 200, 30, 2), (4, 200, 80, 2);
+    insert into customer_thresholds values (100, 100), (200, 150);
+    insert into regions values (1, 'East'), (2, 'West');
+
+    select o.customer_id, r.name as region, sum(o.amount) as total 
+    from orders o join regions r on o.region_id = r.id
+    group by o.customer_id, r.name
+    having total > (select min_amount from customer_thresholds where customer_thresholds.customer_id = o.customer_id);
+} {100|East|200}
+
+# Correlated IN-subquery in HAVING clause with join
+# do_execsql_test_on_specific_db {:memory:} subquery-correlated-in-subquery-in-having-with-join {
+#     create table orders(id, customer_id, amount, region_id);
+#     create table customer_thresholds(customer_id, min_amount);
+#     create table regions(id, name);
+#     insert into orders values (1, 100, 50, 1), (2, 100, 150, 1), (3, 200, 30, 2), (4, 200, 80, 2);
+#     insert into customer_thresholds values (100, 100), (200, 150);
+#     insert into regions values (1, 'East'), (2, 'West');
+# 
+#     select o.customer_id, r.name as region, sum(o.amount) as total 
+#     from orders o join regions r on o.region_id = r.id
+#     group by o.customer_id, r.name
+#     having total + 40 in (select min_amount from customer_thresholds where customer_thresholds.customer_id = o.customer_id);
+# } {200|West|110}
+
+# Uncorrelated subquery in LIMIT clause
+do_execsql_test_on_specific_db {:memory:} subquery-in-limit {
+    create table items(id, name);
+    create table config(max_results);
+    insert into items values (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd');
+    insert into config values (2);
+
+    select * from items limit (select max_results from config);
+} {1|a
+2|b}
+
+# Uncorrelated subquery in OFFSET clause
+do_execsql_test_on_specific_db {:memory:} subquery-in-offset {
+    create table items(id, name);
+    create table config(skip_count);
+    insert into items values (1, 'a'), (2, 'b'), (3, 'c');
+    insert into config values (1);
+
+    select * from items limit 2 offset (select skip_count from config);
+} {2|b
+3|c}
--- a/tests/fuzz/mod.rs
+++ b/tests/fuzz/mod.rs
@@ -4295,7 +4295,12 @@ mod fuzz_tests {
        };

        // Helper function to generate simple subquery
-        fn gen_subquery(rng: &mut ChaCha8Rng, depth: usize, outer_table: Option<&str>) -> String {
+        fn gen_subquery(
+            rng: &mut ChaCha8Rng,
+            depth: usize,
+            outer_table: Option<&str>,
+            allowed_outer_cols: Option<&[&str]>,
+        ) -> String {
            if depth > MAX_SUBQUERY_DEPTH {
                return "SELECT 1".to_string();
            }
@@ -4328,56 +4333,96 @@ mod fuzz_tests {
            // Helper function to generate correlated WHERE conditions
            let gen_correlated_where =
                |rng: &mut ChaCha8Rng, inner_table: &str, outer_table: &str| -> String {
+                    let pick =
+                        |rng: &mut ChaCha8Rng, mut candidates: Vec<(String, &'static str)>| {
+                            let filtered: Vec<String> = if let Some(allowed) = allowed_outer_cols {
+                                candidates
+                                    .drain(..)
+                                    .filter(|(_, col)| allowed.contains(col))
+                                    .map(|(cond, _)| cond)
+                                    .collect()
+                            } else {
+                                candidates.drain(..).map(|(cond, _)| cond).collect()
+                            };
+                            if filtered.is_empty() {
+                                "1=1".to_string()
+                            } else {
+                                filtered[rng.random_range(0..filtered.len())].clone()
+                            }
+                        };
                    match (outer_table, inner_table) {
-                        ("t1", "t2") => {
-                            // t2.ref_id relates to t1.id
-                            let conditions = [
-                                format!("{inner_table}.ref_id = {outer_table}.id"),
-                                format!("{inner_table}.id < {outer_table}.value1"),
-                                format!("{inner_table}.data > {outer_table}.value2"),
-                            ];
-                            conditions[rng.random_range(0..conditions.len())].clone()
-                        }
-                        ("t1", "t3") => {
-                            let conditions = [
-                                format!("{inner_table}.id = {outer_table}.id"),
-                                format!("{inner_table}.category < {outer_table}.value1"),
-                                format!("{inner_table}.amount > {outer_table}.value2"),
-                            ];
-                            conditions[rng.random_range(0..conditions.len())].clone()
-                        }
-                        ("t2", "t1") => {
-                            let conditions = [
-                                format!("{inner_table}.id = {outer_table}.ref_id"),
-                                format!("{inner_table}.value1 > {outer_table}.data"),
-                                format!("{inner_table}.value2 < {outer_table}.id"),
-                            ];
-                            conditions[rng.random_range(0..conditions.len())].clone()
-                        }
-                        ("t2", "t3") => {
-                            let conditions = [
-                                format!("{inner_table}.id = {outer_table}.id"),
-                                format!("{inner_table}.category = {outer_table}.ref_id"),
-                                format!("{inner_table}.amount > {outer_table}.data"),
-                            ];
-                            conditions[rng.random_range(0..conditions.len())].clone()
-                        }
-                        ("t3", "t1") => {
-                            let conditions = [
-                                format!("{inner_table}.id = {outer_table}.id"),
-                                format!("{inner_table}.value1 > {outer_table}.category"),
-                                format!("{inner_table}.value2 < {outer_table}.amount"),
-                            ];
-                            conditions[rng.random_range(0..conditions.len())].clone()
-                        }
-                        ("t3", "t2") => {
-                            let conditions = [
-                                format!("{inner_table}.id = {outer_table}.id"),
-                                format!("{inner_table}.ref_id = {outer_table}.category"),
-                                format!("{inner_table}.data < {outer_table}.amount"),
-                            ];
-                            conditions[rng.random_range(0..conditions.len())].clone()
-                        }
+                        ("t1", "t2") => pick(
+                            rng,
+                            vec![
+                                (format!("{inner_table}.ref_id = {outer_table}.id"), "id"),
+                                (format!("{inner_table}.id < {outer_table}.value1"), "value1"),
+                                (
+                                    format!("{inner_table}.data > {outer_table}.value2"),
+                                    "value2",
+                                ),
+                            ],
+                        ),
+                        ("t1", "t3") => pick(
+                            rng,
+                            vec![
+                                (format!("{inner_table}.id = {outer_table}.id"), "id"),
+                                (
+                                    format!("{inner_table}.category < {outer_table}.value1"),
+                                    "value1",
+                                ),
+                                (
+                                    format!("{inner_table}.amount > {outer_table}.value2"),
+                                    "value2",
+                                ),
+                            ],
+                        ),
+                        ("t2", "t1") => pick(
+                            rng,
+                            vec![
+                                (format!("{inner_table}.id = {outer_table}.ref_id"), "ref_id"),
+                                (format!("{inner_table}.value1 > {outer_table}.data"), "data"),
+                                (format!("{inner_table}.value2 < {outer_table}.id"), "id"),
+                            ],
+                        ),
+                        ("t2", "t3") => pick(
+                            rng,
+                            vec![
+                                (format!("{inner_table}.id = {outer_table}.id"), "id"),
+                                (
+                                    format!("{inner_table}.category = {outer_table}.ref_id"),
+                                    "ref_id",
+                                ),
+                                (format!("{inner_table}.amount > {outer_table}.data"), "data"),
+                            ],
+                        ),
+                        ("t3", "t1") => pick(
+                            rng,
+                            vec![
+                                (format!("{inner_table}.id = {outer_table}.id"), "id"),
+                                (
+                                    format!("{inner_table}.value1 > {outer_table}.category"),
+                                    "category",
+                                ),
+                                (
+                                    format!("{inner_table}.value2 < {outer_table}.amount"),
+                                    "amount",
+                                ),
+                            ],
+                        ),
+                        ("t3", "t2") => pick(
+                            rng,
+                            vec![
+                                (format!("{inner_table}.id = {outer_table}.id"), "id"),
+                                (
+                                    format!("{inner_table}.ref_id = {outer_table}.category"),
+                                    "category",
+                                ),
+                                (
+                                    format!("{inner_table}.data < {outer_table}.amount"),
+                                    "amount",
+                                ),
+                            ],
+                        ),
                        _ => "1=1".to_string(),
                    }
                };
@@ -4410,7 +4455,11 @@ mod fuzz_tests {

            // Add correlated conditions if outer_table is provided and sometimes
            let final_query = if let Some(outer_table) = outer_table {
-                if rng.random_bool(0.4) {
+                let can_correlate = match allowed_outer_cols {
+                    Some(cols) => !cols.is_empty(),
+                    None => true,
+                };
+                if can_correlate && rng.random_bool(0.4) {
                    // 40% chance for correlation
                    // Extract the inner table from the base query
                    let inner_table = if base_query.contains("FROM t1") {
@@ -4440,7 +4489,7 @@ mod fuzz_tests {
            // Sometimes add nesting - but use scalar subquery for nesting to avoid column count issues
            if depth < 1 && rng.random_bool(0.2) {
                // Reduced probability and depth
-                let nested = gen_scalar_subquery(rng, 0, outer_table);
+                let nested = gen_scalar_subquery(rng, 0, outer_table, allowed_outer_cols);
                if final_query.contains("WHERE") {
                    format!("{final_query} AND id IN ({nested})")
                } else {
@@ -4456,6 +4505,7 @@ mod fuzz_tests {
            rng: &mut ChaCha8Rng,
            depth: usize,
            outer_table: Option<&str>,
+            allowed_outer_cols: Option<&[&str]>,
        ) -> String {
            if depth > MAX_SUBQUERY_DEPTH {
                // Reduced nesting depth
@@ -4490,56 +4540,96 @@ mod fuzz_tests {
            // Helper function to generate correlated WHERE conditions
            let gen_correlated_where =
                |rng: &mut ChaCha8Rng, inner_table: &str, outer_table: &str| -> String {
+                    let pick =
+                        |rng: &mut ChaCha8Rng, mut candidates: Vec<(String, &'static str)>| {
+                            let filtered: Vec<String> = if let Some(allowed) = allowed_outer_cols {
+                                candidates
+                                    .drain(..)
+                                    .filter(|(_, col)| allowed.contains(col))
+                                    .map(|(cond, _)| cond)
+                                    .collect()
+                            } else {
+                                candidates.drain(..).map(|(cond, _)| cond).collect()
+                            };
+                            if filtered.is_empty() {
+                                "1=1".to_string()
+                            } else {
+                                filtered[rng.random_range(0..filtered.len())].clone()
+                            }
+                        };
                    match (outer_table, inner_table) {
-                        ("t1", "t2") => {
-                            // t2.ref_id relates to t1.id
-                            let conditions = [
-                                format!("{inner_table}.ref_id = {outer_table}.id"),
-                                format!("{inner_table}.id < {outer_table}.value1"),
-                                format!("{inner_table}.data > {outer_table}.value2"),
-                            ];
-                            conditions[rng.random_range(0..conditions.len())].clone()
-                        }
-                        ("t1", "t3") => {
-                            let conditions = [
-                                format!("{inner_table}.id = {outer_table}.id"),
-                                format!("{inner_table}.category < {outer_table}.value1"),
-                                format!("{inner_table}.amount > {outer_table}.value2"),
-                            ];
-                            conditions[rng.random_range(0..conditions.len())].clone()
-                        }
-                        ("t2", "t1") => {
-                            let conditions = [
-                                format!("{inner_table}.id = {outer_table}.ref_id"),
-                                format!("{inner_table}.value1 > {outer_table}.data"),
-                                format!("{inner_table}.value2 < {outer_table}.id"),
-                            ];
-                            conditions[rng.random_range(0..conditions.len())].clone()
-                        }
-                        ("t2", "t3") => {
-                            let conditions = [
-                                format!("{inner_table}.id = {outer_table}.id"),
-                                format!("{inner_table}.category = {outer_table}.ref_id"),
-                                format!("{inner_table}.amount > {outer_table}.data"),
-                            ];
-                            conditions[rng.random_range(0..conditions.len())].clone()
-                        }
-                        ("t3", "t1") => {
-                            let conditions = [
-                                format!("{inner_table}.id = {outer_table}.id"),
-                                format!("{inner_table}.value1 > {outer_table}.category"),
-                                format!("{inner_table}.value2 < {outer_table}.amount"),
-                            ];
-                            conditions[rng.random_range(0..conditions.len())].clone()
-                        }
-                        ("t3", "t2") => {
-                            let conditions = [
-                                format!("{inner_table}.id = {outer_table}.id"),
-                                format!("{inner_table}.ref_id = {outer_table}.category"),
-                                format!("{inner_table}.data < {outer_table}.amount"),
-                            ];
-                            conditions[rng.random_range(0..conditions.len())].clone()
-                        }
+                        ("t1", "t2") => pick(
+                            rng,
+                            vec![
+                                (format!("{inner_table}.ref_id = {outer_table}.id"), "id"),
+                                (format!("{inner_table}.id < {outer_table}.value1"), "value1"),
+                                (
+                                    format!("{inner_table}.data > {outer_table}.value2"),
+                                    "value2",
+                                ),
+                            ],
+                        ),
+                        ("t1", "t3") => pick(
+                            rng,
+                            vec![
+                                (format!("{inner_table}.id = {outer_table}.id"), "id"),
+                                (
+                                    format!("{inner_table}.category < {outer_table}.value1"),
+                                    "value1",
+                                ),
+                                (
+                                    format!("{inner_table}.amount > {outer_table}.value2"),
+                                    "value2",
+                                ),
+                            ],
+                        ),
+                        ("t2", "t1") => pick(
+                            rng,
+                            vec![
+                                (format!("{inner_table}.id = {outer_table}.ref_id"), "ref_id"),
+                                (format!("{inner_table}.value1 > {outer_table}.data"), "data"),
+                                (format!("{inner_table}.value2 < {outer_table}.id"), "id"),
+                            ],
+                        ),
+                        ("t2", "t3") => pick(
+                            rng,
+                            vec![
+                                (format!("{inner_table}.id = {outer_table}.id"), "id"),
+                                (
+                                    format!("{inner_table}.category = {outer_table}.ref_id"),
+                                    "ref_id",
+                                ),
+                                (format!("{inner_table}.amount > {outer_table}.data"), "data"),
+                            ],
+                        ),
+                        ("t3", "t1") => pick(
+                            rng,
+                            vec![
+                                (format!("{inner_table}.id = {outer_table}.id"), "id"),
+                                (
+                                    format!("{inner_table}.value1 > {outer_table}.category"),
+                                    "category",
+                                ),
+                                (
+                                    format!("{inner_table}.value2 < {outer_table}.amount"),
+                                    "amount",
+                                ),
+                            ],
+                        ),
+                        ("t3", "t2") => pick(
+                            rng,
+                            vec![
+                                (format!("{inner_table}.id = {outer_table}.id"), "id"),
+                                (
+                                    format!("{inner_table}.ref_id = {outer_table}.category"),
+                                    "category",
+                                ),
+                                (
+                                    format!("{inner_table}.data < {outer_table}.amount"),
+                                    "amount",
+                                ),
+                            ],
+                        ),
                        _ => "1=1".to_string(),
                    }
                };
@@ -4573,7 +4663,11 @@ mod fuzz_tests {

            // Add correlated conditions if outer_table is provided and sometimes
            let final_query = if let Some(outer_table) = outer_table {
-                if rng.random_bool(0.4) {
+                let can_correlate = match allowed_outer_cols {
+                    Some(cols) => !cols.is_empty(),
+                    None => true,
+                };
+                if can_correlate && rng.random_bool(0.4) {
                    // 40% chance for correlation
                    // Extract the inner table from the base query
                    let inner_table = if base_query.contains("FROM t1") {
@@ -4603,7 +4697,7 @@ mod fuzz_tests {
            // Sometimes add nesting
            if depth < 1 && rng.random_bool(0.2) {
                // Reduced probability and depth
-                let nested = gen_scalar_subquery(rng, depth + 1, outer_table);
+                let nested = gen_scalar_subquery(rng, depth + 1, outer_table, allowed_outer_cols);
                if final_query.contains("WHERE") {
                    format!("{final_query} AND id IN ({nested})")
                } else {
@@ -4614,11 +4708,110 @@ mod fuzz_tests {
            }
        }

+        // Helper to generate a SELECT-list expression as a scalar subquery, optionally correlated
+        fn gen_selectlist_scalar_expr(rng: &mut ChaCha8Rng, outer_table: &str) -> String {
+            // Reuse scalar subquery generator; return the inner SELECT (without wrapping)
+            gen_scalar_subquery(rng, 0, Some(outer_table), None)
+        }
+
+        // Helper to generate a GROUP BY expression which may include a correlated scalar subquery
+        fn gen_group_by_expr(rng: &mut ChaCha8Rng, main_table: &str) -> String {
+            // Either a plain column or a correlated scalar subquery
+            if rng.random_bool(0.4) {
+                // Prefer plain columns most of the time to keep GROUP BY semantics simple
+                match main_table {
+                    "t1" => ["id", "value1", "value2"][rng.random_range(0..3)].to_string(),
+                    "t2" => ["id", "ref_id", "data"][rng.random_range(0..3)].to_string(),
+                    "t3" => ["id", "category", "amount"][rng.random_range(0..3)].to_string(),
+                    _ => "id".to_string(),
+                }
+            } else {
+                // If GROUP BY is present, a subquery that references outer columns would be invalid
+                // unless it only references GROUP BY columns; since this subquery becomes the
+                // grouping expression itself, disallow correlation entirely here.
+                format!(
+                    "({})",
+                    gen_scalar_subquery(rng, 0, Some(main_table), Some(&[]))
+                )
+            }
+        }
+
+        // Helper to generate a HAVING condition comparing an aggregate to a scalar subquery
+        fn gen_having_condition(
+            rng: &mut ChaCha8Rng,
+            main_table: &str,
+            allowed_outer_cols: Option<&[&str]>,
+        ) -> String {
+            let (agg_func, agg_col) = match main_table {
+                "t1" => [
+                    ("SUM", "value1"),
+                    ("SUM", "value2"),
+                    ("MAX", "value1"),
+                    ("MAX", "value2"),
+                    ("MIN", "value1"),
+                    ("MIN", "value2"),
+                    ("COUNT", "*"),
+                ][rng.random_range(0..7)],
+                "t2" => [
+                    ("SUM", "data"),
+                    ("MAX", "data"),
+                    ("MIN", "data"),
+                    ("COUNT", "*"),
+                ][rng.random_range(0..4)],
+                "t3" => [
+                    ("SUM", "amount"),
+                    ("MAX", "amount"),
+                    ("MIN", "amount"),
+                    ("COUNT", "*"),
+                ][rng.random_range(0..4)],
+                _ => ("COUNT", "*"),
+            };
+            let op = [">", "<", ">=", "<=", "=", "<>"][rng.random_range(0..6)];
+            let rhs = gen_scalar_subquery(rng, 0, Some(main_table), allowed_outer_cols);
+            if agg_col == "*" {
+                format!("COUNT(*) {op} ({rhs})")
+            } else {
+                format!("{agg_func}({agg_col}) {op} ({rhs})")
+            }
+        }
+
+        // Helper to generate LIMIT/OFFSET clause (optionally empty). Expressions may be subqueries.
+        fn gen_limit_offset_clause(rng: &mut ChaCha8Rng) -> String {
+            // 50% of the time, no LIMIT/OFFSET
+            if rng.random_bool(0.5) {
+                return String::new();
+            }
+
+            fn gen_limit_like_expr(rng: &mut ChaCha8Rng) -> String {
+                // Small literal or a scalar subquery from a random table
+                if rng.random_bool(0.6) {
+                    // Keep literal sizes modest
+                    format!("{}", rng.random_range(0..20))
+                } else {
+                    let which = rng.random_range(0..3);
+                    match which {
+                        0 => "(SELECT COUNT(*) FROM t1)".to_string(),
+                        1 => "(SELECT COUNT(*) FROM t2)".to_string(),
+                        _ => "(SELECT COUNT(*) FROM t3)".to_string(),
+                    }
+                }
+            }
+
+            let mut clause = String::new();
+            let limit_expr = gen_limit_like_expr(rng);
+            clause.push_str(&format!(" LIMIT {limit_expr}",));
+            if rng.random_bool(0.5) {
+                let offset_expr = gen_limit_like_expr(rng);
+                clause.push_str(&format!(" OFFSET {offset_expr}",));
+            }
+            clause
+        }
+
        for iter_num in 0..NUM_FUZZ_ITERATIONS {
            let main_table = ["t1", "t2", "t3"][rng.random_range(0..3)];

-            let query_type = rng.random_range(0..6); // Increased from 4 to 6 for new correlated query types
-            let query = match query_type {
+            let query_type = rng.random_range(0..8); // Add GROUP BY/HAVING variants
+            let mut query = match query_type {
                0 => {
                    // Comparison subquery: WHERE column <op> (SELECT ...)
                    let column = match main_table {
@@ -4628,13 +4821,13 @@ mod fuzz_tests {
                        _ => "id",
                    };
                    let op = [">", "<", ">=", "<=", "=", "<>"][rng.random_range(0..6)];
-                    let subquery = gen_scalar_subquery(&mut rng, 0, Some(main_table));
+                    let subquery = gen_scalar_subquery(&mut rng, 0, Some(main_table), None);
                    format!("SELECT * FROM {main_table} WHERE {column} {op} ({subquery})",)
                }
                1 => {
                    // EXISTS subquery: WHERE [NOT] EXISTS (SELECT ...)
                    let not_exists = if rng.random_bool(0.3) { "NOT " } else { "" };
-                    let subquery = gen_subquery(&mut rng, 0, Some(main_table));
+                    let subquery = gen_subquery(&mut rng, 0, Some(main_table), None);
                    format!("SELECT * FROM {main_table} WHERE {not_exists}EXISTS ({subquery})",)
                }
                2 => {
@@ -4646,7 +4839,7 @@ mod fuzz_tests {
                        "t3" => ["amount", "category", "id"][rng.random_range(0..3)],
                        _ => "id",
                    };
-                    let subquery = gen_scalar_subquery(&mut rng, 0, Some(main_table));
+                    let subquery = gen_scalar_subquery(&mut rng, 0, Some(main_table), None);
                    format!("SELECT * FROM {main_table} WHERE {column} {not_in}IN ({subquery})",)
                }
                3 => {
@@ -4843,8 +5036,93 @@ mod fuzz_tests {
                        "SELECT * FROM {main_table} WHERE {column} {op} (SELECT {select_column} FROM {inner_table} WHERE {correlated_condition})",
                    )
                }
+                6 => {
+                    // Aggregated query with GROUP BY and optional HAVING; allow subqueries in GROUP BY/HAVING
+                    let group_expr = gen_group_by_expr(&mut rng, main_table);
+                    // Only GROUP BY columns may be referenced by correlated subqueries
+                    let allowed_outer_cols: Vec<&str> = match group_expr.as_str() {
+                        "id" => vec!["id"],
+                        "value1" => vec!["value1"],
+                        "value2" => vec!["value2"],
+                        "ref_id" => vec!["ref_id"],
+                        "data" => vec!["data"],
+                        "category" => vec!["category"],
+                        "amount" => vec!["amount"],
+                        _ => Vec::new(),
+                    };
+                    let (agg_func, agg_col) = match main_table {
+                        "t1" => [
+                            ("SUM", "value1"),
+                            ("SUM", "value2"),
+                            ("MAX", "value1"),
+                            ("MAX", "value2"),
+                            ("COUNT", "*"),
+                        ][rng.random_range(0..5)],
+                        "t2" => [("SUM", "data"), ("MAX", "data"), ("COUNT", "*")]
+                            [rng.random_range(0..3)],
+                        "t3" => [("SUM", "amount"), ("MAX", "amount"), ("COUNT", "*")]
+                            [rng.random_range(0..3)],
+                        _ => ("COUNT", "*"),
+                    };
+                    let mut q;
+                    if agg_col == "*" {
+                        q = format!("SELECT {group_expr} AS g, COUNT(*) AS c FROM {main_table}");
+                    } else {
+                        q = format!("SELECT {group_expr} AS g, {agg_func}({agg_col}) AS a FROM {main_table}");
+                    }
+                    if rng.random_bool(0.5) {
+                        q.push_str(&format!(
+                            " WHERE {}",
+                            gen_simple_where(&mut rng, main_table)
+                        ));
+                    }
+                    q.push_str(&format!(" GROUP BY {group_expr}"));
+                    if rng.random_bool(0.4) {
+                        q.push_str(&format!(
+                            " HAVING {}",
+                            gen_having_condition(&mut rng, main_table, Some(&allowed_outer_cols))
+                        ));
+                    }
+                    q
+                }
+                7 => {
+                    // Simple GROUP BY without HAVING (baseline support); may use subquery in GROUP BY
+                    let group_expr = gen_group_by_expr(&mut rng, main_table);
+                    let select_expr = if rng.random_bool(0.5) {
+                        // Use aggregate
+                        match main_table {
+                            "t1" => "SUM(value1) AS s".to_string(),
+                            "t2" => "SUM(data) AS s".to_string(),
+                            _ => "SUM(amount) AS s".to_string(),
+                        }
+                    } else {
+                        "COUNT(*) AS c".to_string()
+                    };
+                    let mut q =
+                        format!("SELECT {group_expr} AS g, {select_expr} FROM {main_table}");
+                    if rng.random_bool(0.5) {
+                        q.push_str(&format!(
+                            " WHERE {}",
+                            gen_simple_where(&mut rng, main_table)
+                        ));
+                    }
+                    q.push_str(&format!(" GROUP BY {group_expr}"));
+                    q
+                }
                _ => unreachable!(),
            };
+            // Optionally inject a SELECT-list scalar subquery into non-aggregated SELECT * queries
+            if query.starts_with("SELECT * FROM ") && rng.random_bool(0.4) {
+                let sel_expr = gen_selectlist_scalar_expr(&mut rng, main_table);
+                let replacement = "SELECT *, (".to_string() + &sel_expr + ") AS s_sub FROM ";
+                query = query.replacen("SELECT * FROM ", &replacement, 1);
+            }
+
+            // Optionally append LIMIT/OFFSET (with or without subqueries)
+            let limit_clause = gen_limit_offset_clause(&mut rng);
+            if !limit_clause.is_empty() {
+                query.push_str(&limit_clause);
+            }

            log::info!(
                "Iteration {}/{NUM_FUZZ_ITERATIONS}: Query: {query}",