From 005d922ab42321f65dc93e0891481a0ad4679881 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Mon, 3 Nov 2025 10:49:04 +0200 Subject: [PATCH] Fix: prevent misuse of subqueries that return multiple columns --- core/translate/expr.rs | 165 ++++++++++++++++++++++++++++++++++ core/translate/select.rs | 88 ++++++++++++++++++- testing/subquery.test | 185 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 436 insertions(+), 2 deletions(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index c0065dd3b..640b1b12d 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -4544,3 +4544,168 @@ pub(crate) fn emit_returning_results( Ok(()) } + +/// Get the number of values returned by an expression +pub fn expr_vector_size(expr: &Expr) -> Result { + Ok(match unwrap_parens(expr)? { + Expr::Between { + lhs, start, end, .. + } => { + let evs_left = expr_vector_size(lhs)?; + let evs_start = expr_vector_size(start)?; + let evs_end = expr_vector_size(end)?; + if evs_left != evs_start || evs_left != evs_end { + crate::bail_parse_error!("all arguments to BETWEEN must return the same number of values. Got: ({evs_left}) BETWEEN ({evs_start}) AND ({evs_end})"); + } + 1 + } + Expr::Binary(expr, operator, expr1) => { + let evs_left = expr_vector_size(expr)?; + let evs_right = expr_vector_size(expr1)?; + if evs_left != evs_right { + crate::bail_parse_error!("all arguments to binary operator {operator} must return the same number of values. Got: ({evs_left}) {operator} ({evs_right})"); + } + 1 + } + Expr::Register(_) => 1, + Expr::Case { + base, + when_then_pairs, + else_expr, + } => { + if let Some(base) = base { + let evs_base = expr_vector_size(base)?; + if evs_base != 1 { + crate::bail_parse_error!( + "base expression in CASE must return 1 value. Got: ({evs_base})" + ); + } + } + for (when, then) in when_then_pairs { + let evs_when = expr_vector_size(when)?; + if evs_when != 1 { + crate::bail_parse_error!( + "when expression in CASE must return 1 value. Got: ({evs_when})" + ); + } + let evs_then = expr_vector_size(then)?; + if evs_then != 1 { + crate::bail_parse_error!( + "then expression in CASE must return 1 value. Got: ({evs_then})" + ); + } + } + if let Some(else_expr) = else_expr { + let evs_else_expr = expr_vector_size(else_expr)?; + if evs_else_expr != 1 { + crate::bail_parse_error!( + "else expression in CASE must return 1 value. Got: ({evs_else_expr})" + ); + } + } + 1 + } + Expr::Cast { expr, .. } => { + let evs_expr = expr_vector_size(expr)?; + if evs_expr != 1 { + crate::bail_parse_error!("argument to CAST must return 1 value. Got: ({evs_expr})"); + } + 1 + } + Expr::Collate(expr, _) => { + let evs_expr = expr_vector_size(expr)?; + if evs_expr != 1 { + crate::bail_parse_error!( + "argument to COLLATE must return 1 value. Got: ({evs_expr})" + ); + } + 1 + } + Expr::DoublyQualified(..) => 1, + Expr::Exists(_) => todo!(), + Expr::FunctionCall { name, args, .. } => { + for (pos, arg) in args.iter().enumerate() { + let evs_arg = expr_vector_size(arg)?; + if evs_arg != 1 { + crate::bail_parse_error!( + "argument {} to function call {name} must return 1 value. Got: ({evs_arg})", + pos + 1 + ); + } + } + 1 + } + Expr::FunctionCallStar { .. } => 1, + Expr::Id(_) => 1, + Expr::Column { .. } => 1, + Expr::RowId { .. } => 1, + Expr::InList { lhs, rhs, .. } => { + let evs_lhs = expr_vector_size(lhs)?; + for rhs in rhs.iter() { + let evs_rhs = expr_vector_size(rhs)?; + if evs_lhs != evs_rhs { + crate::bail_parse_error!("all arguments to IN list must return the same number of values, got: ({evs_lhs}) IN ({evs_rhs})"); + } + } + 1 + } + Expr::InSelect { .. } => { + crate::bail_parse_error!("InSelect is not supported in this position") + } + Expr::InTable { .. } => { + crate::bail_parse_error!("InTable is not supported in this position") + } + Expr::IsNull(expr) => { + let evs_expr = expr_vector_size(expr)?; + if evs_expr != 1 { + crate::bail_parse_error!( + "argument to IS NULL must return 1 value. Got: ({evs_expr})" + ); + } + 1 + } + Expr::Like { lhs, rhs, .. } => { + let evs_lhs = expr_vector_size(lhs)?; + if evs_lhs != 1 { + crate::bail_parse_error!( + "left operand of LIKE must return 1 value. Got: ({evs_lhs})" + ); + } + let evs_rhs = expr_vector_size(rhs)?; + if evs_rhs != 1 { + crate::bail_parse_error!( + "right operand of LIKE must return 1 value. Got: ({evs_rhs})" + ); + } + 1 + } + Expr::Literal(_) => 1, + Expr::Name(_) => 1, + Expr::NotNull(expr) => { + let evs_expr = expr_vector_size(expr)?; + if evs_expr != 1 { + crate::bail_parse_error!( + "argument to NOT NULL must return 1 value. Got: ({evs_expr})" + ); + } + 1 + } + Expr::Parenthesized(exprs) => exprs.len(), + Expr::Qualified(..) => 1, + Expr::Raise(..) => crate::bail_parse_error!("RAISE is not supported"), + Expr::Subquery(_) => todo!(), + Expr::Unary(unary_operator, expr) => { + let evs_expr = expr_vector_size(expr)?; + if evs_expr != 1 { + crate::bail_parse_error!("argument to unary operator {unary_operator} must return 1 value. Got: ({evs_expr})"); + } + 1 + } + Expr::Variable(_) => 1, + Expr::SubqueryResult { query_type, .. } => match query_type { + SubqueryType::Exists { .. } => 1, + SubqueryType::In { .. } => 1, + SubqueryType::RowValue { num_regs, .. } => *num_regs, + }, + }) +} diff --git a/core/translate/select.rs b/core/translate/select.rs index 3ec65af30..51c0d7de9 100644 --- a/core/translate/select.rs +++ b/core/translate/select.rs @@ -5,7 +5,9 @@ use super::plan::{ }; use crate::schema::Table; use crate::translate::emitter::{OperationMode, Resolver}; -use crate::translate::expr::{bind_and_rewrite_expr, BindingBehavior, ParamState}; +use crate::translate::expr::{ + bind_and_rewrite_expr, expr_vector_size, BindingBehavior, ParamState, +}; use crate::translate::group_by::compute_group_by_sort_order; use crate::translate::optimizer::optimize_plan; use crate::translate::plan::{GroupBy, Plan, ResultSetColumn, SelectPlan}; @@ -506,6 +508,8 @@ fn prepare_one_select_plan( plan_subqueries_from_select_plan(program, &mut plan, resolver, connection)?; + validate_expr_correct_column_counts(&plan)?; + // Return the unoptimized query plan Ok(plan) } @@ -562,11 +566,93 @@ fn prepare_one_select_plan( non_from_clause_subqueries: vec![], }; + validate_expr_correct_column_counts(&plan)?; + Ok(plan) } } } +/// Validate that all expressions in the plan return the correct number of values; +/// generally this only applies to parenthesized lists and subqueries. +fn validate_expr_correct_column_counts(plan: &SelectPlan) -> Result<()> { + for result_column in plan.result_columns.iter() { + let vec_size = expr_vector_size(&result_column.expr)?; + if vec_size != 1 { + crate::bail_parse_error!("result column must return 1 value, got {}", vec_size); + } + } + for (expr, _) in plan.order_by.iter() { + let vec_size = expr_vector_size(expr)?; + if vec_size != 1 { + crate::bail_parse_error!("order by expression must return 1 value, got {}", vec_size); + } + } + if let Some(group_by) = &plan.group_by { + for expr in group_by.exprs.iter() { + let vec_size = expr_vector_size(expr)?; + if vec_size != 1 { + crate::bail_parse_error!( + "group by expression must return 1 value, got {}", + vec_size + ); + } + } + if let Some(having) = &group_by.having { + for expr in having.iter() { + let vec_size = expr_vector_size(expr)?; + if vec_size != 1 { + crate::bail_parse_error!( + "having expression must return 1 value, got {}", + vec_size + ); + } + } + } + } + for aggregate in plan.aggregates.iter() { + for arg in aggregate.args.iter() { + let vec_size = expr_vector_size(arg)?; + if vec_size != 1 { + crate::bail_parse_error!( + "aggregate argument must return 1 value, got {}", + vec_size + ); + } + } + } + for term in plan.where_clause.iter() { + let vec_size = expr_vector_size(&term.expr)?; + if vec_size != 1 { + crate::bail_parse_error!( + "where clause expression must return 1 value, got {}", + vec_size + ); + } + } + for expr in plan.values.iter() { + for value in expr.iter() { + let vec_size = expr_vector_size(value)?; + if vec_size != 1 { + crate::bail_parse_error!("value must return 1 value, got {}", vec_size); + } + } + } + if let Some(limit) = &plan.limit { + let vec_size = expr_vector_size(limit)?; + if vec_size != 1 { + crate::bail_parse_error!("limit expression must return 1 value, got {}", vec_size); + } + } + if let Some(offset) = &plan.offset { + let vec_size = expr_vector_size(offset)?; + if vec_size != 1 { + crate::bail_parse_error!("offset expression must return 1 value, got {}", vec_size); + } + } + Ok(()) +} + fn add_vtab_predicates_to_where_clause( vtab_predicates: &mut Vec, plan: &mut SelectPlan, diff --git a/testing/subquery.test b/testing/subquery.test index f2076a2c3..67cbcf66a 100755 --- a/testing/subquery.test +++ b/testing/subquery.test @@ -804,4 +804,187 @@ do_execsql_test_on_specific_db {:memory:} subquery-in-offset { select * from items limit 2 offset (select skip_count from config); } {2|b -3|c} \ No newline at end of file +3|c} + +### INCORRECT NUMBER OF RETURNED VALUES - ERROR TESTS ### + +# Subquery returning multiple columns in SELECT clause (should error) +do_execsql_test_in_memory_any_error subquery-multiple-columns-in-select { + create table t(x, y); + insert into t values (1, 2); + select (select x, y from t) as result; +} + +# Subquery returning multiple columns in WHERE clause (should error) +do_execsql_test_in_memory_any_error subquery-vector-in-where { + create table t1(x,y); + create table t2(y); + insert into t1 values (1,1); + insert into t2 values (1); + select * from t2 where y = (select x,y from t1); +} + +# Subquery returning multiple columns in HAVING clause (should error) +do_execsql_test_in_memory_any_error subquery-vector-in-having { + create table orders(customer_id, amount); + create table thresholds(min_amount, max_amount); + insert into orders values (100, 50), (100, 150); + insert into thresholds values (100, 200); + select customer_id, sum(amount) as total + from orders + group by customer_id + having total > (select min_amount, max_amount from thresholds); +} + +# Subquery returning multiple columns in LIMIT clause (should error) +do_execsql_test_in_memory_any_error subquery-vector-in-limit { + create table items(id); + create table config(max_results, other_col); + insert into items values (1), (2), (3); + insert into config values (2, 3); + select * from items limit (select max_results, other_col from config); +} + +# Subquery returning multiple columns in OFFSET clause (should error) +do_execsql_test_in_memory_any_error subquery-vector-in-offset { + create table items(id); + create table config(skip_count, other_col); + insert into items values (1), (2), (3); + insert into config values (1, 2); + select * from items limit 1 offset (select skip_count, other_col from config); +} + +# Subquery returning multiple columns in ORDER BY clause (should error) +do_execsql_test_in_memory_any_error subquery-vector-in-order-by { + create table items(id, name); + create table sort_order(priority, other_col); + insert into items values (1, 'a'), (2, 'b'); + insert into sort_order values (1, 2); + select * from items order by (select priority, other_col from sort_order); +} + +# Subquery returning multiple columns in GROUP BY clause (should error) +do_execsql_test_in_memory_any_error subquery-vector-in-group-by { + create table sales(product_id, amount); + create table grouping(category, other_col); + insert into sales values (1, 100), (2, 200); + insert into grouping values (1, 2); + select sum(amount) from sales group by (select category, other_col from grouping); +} + +# Subquery returning multiple columns in CASE WHEN (should error) +do_execsql_test_in_memory_any_error subquery-vector-in-case-when { + create table t1(x); + create table t2(y, z); + insert into t1 values (1); + insert into t2 values (1, 2); + select case when (select y, z from t2) then 'yes' else 'no' end from t1; +} + +# Subquery returning multiple columns in CASE THEN (should error) +do_execsql_test_in_memory_any_error subquery-vector-in-case-then { + create table t1(x); + create table t2(y, z); + insert into t1 values (1); + insert into t2 values (1, 2); + select case when x = 1 then (select y, z from t2) else 0 end from t1; +} + +# Subquery returning multiple columns in CASE ELSE (should error) +do_execsql_test_in_memory_any_error subquery-vector-in-case-else { + create table t1(x); + create table t2(y, z); + insert into t1 values (1); + insert into t2 values (1, 2); + select case when x = 2 then 0 else (select y, z from t2) end from t1; +} + +# Subquery returning multiple columns in aggregate function argument (should error) +do_execsql_test_in_memory_any_error subquery-vector-in-aggregate-arg { + create table t1(x); + create table t2(y, z); + insert into t1 values (1); + insert into t2 values (1, 2); + select max((select y, z from t2)) from t1; +} + +# Subquery returning multiple columns in binary expression (should error) +do_execsql_test_in_memory_any_error subquery-vector-in-binary-expr { + create table t1(x); + create table t2(y, z); + insert into t1 values (1); + insert into t2 values (1, 2); + select x + (select y, z from t2) from t1; +} + +# Subquery returning multiple columns in BETWEEN (should error) +do_execsql_test_in_memory_any_error subquery-vector-in-between { + create table t1(x); + create table t2(y, z); + insert into t1 values (5); + insert into t2 values (1, 2); + select * from t1 where x between (select y, z from t2) and 10; +} + +# Subquery returning multiple columns in CAST (should error) +do_execsql_test_in_memory_any_error subquery-vector-in-cast { + create table t1(x); + create table t2(y, z); + insert into t1 values (1); + insert into t2 values (1, 2); + select cast((select y, z from t2) as integer) from t1; +} + +# Subquery returning multiple columns in COLLATE (should error) +do_execsql_test_in_memory_any_error subquery-vector-in-collate { + create table t1(x); + create table t2(y, z); + insert into t1 values (1); + insert into t2 values ('a', 'b'); + select (select y, z from t2) collate nocase from t1; +} + +# Subquery returning multiple columns in IS NULL (should error) +do_execsql_test_in_memory_any_error subquery-vector-in-is-null { + create table t1(x); + create table t2(y, z); + insert into t1 values (1); + insert into t2 values (1, 2); + select * from t1 where (select y, z from t2) is null; +} + +# Subquery returning multiple columns in NOT NULL (should error) +do_execsql_test_in_memory_any_error subquery-vector-in-not-null { + create table t1(x); + create table t2(y, z); + insert into t1 values (1); + insert into t2 values (1, 2); + select * from t1 where (select y, z from t2) not null; +} + +# Subquery returning multiple columns in LIKE (should error) +do_execsql_test_in_memory_any_error subquery-vector-in-like { + create table t1(x); + create table t2(y, z); + insert into t1 values (1); + insert into t2 values ('a', 'b'); + select * from t1 where (select y, z from t2) like 'a%'; +} + +# Subquery returning multiple columns in unary operator (should error) +do_execsql_test_in_memory_any_error subquery-vector-in-unary { + create table t1(x); + create table t2(y, z); + insert into t1 values (1); + insert into t2 values (1, 2); + select -(select y, z from t2) from t1; +} + +# Subquery returning multiple columns in function call (should error) +do_execsql_test_in_memory_any_error subquery-vector-in-function-call { + create table t1(x); + create table t2(y, z); + insert into t1 values (1); + insert into t2 values (1, 2); + select abs((select y, z from t2)) from t1; +} \ No newline at end of file