Merge 'Prevent misuse of subqueries that return multiple columns' from Jussi Saurio

Closes #3892
Closes #3888
Stuff like:
```sql
turso>     create table t1(x);
    create table t2(y, z);
    insert into t1 values (1);
    insert into t2 values (1, 2);
    select case (select y, z from t2) when 1 then 'one' else 'other' end from t1;
  × Parse error: base expression in CASE must return 1 value

turso>     create table t(x, y);
    insert into t values (1, 2);
    select (select x, y from t) as result;
  × Parse error: result column must return 1 value, got 2

turso>     create table t1(x,y);
    create table t2(y);
    insert into t1 values (1,1);
    insert into t2 values (1);
    select * from t2 where y = (select x,y from t1);
  × Parse error: all arguments to binary operator = must return the same number of
  │ values. Got: (1) = (2)

turso>     create table orders(customer_id, amount);
    create table thresholds(min_amount, max_amount);
    insert into orders values (100, 50), (100, 150);
    insert into thresholds values (100, 200);
    select customer_id, sum(amount) as total 
    from orders 
    group by customer_id 
    having total > (select min_amount, max_amount from thresholds);
  × Parse error: all arguments to binary operator > must return the same number of
  │ values. Got: (1) > (2)

turso>     create table items(id);
    create table config(max_results, other_col);
    insert into items values (1), (2), (3);
    insert into config values (2, 3);
    select * from items limit (select max_results, other_col from config);
  × Parse error: limit expression must return 1 value, got 2

turso>     create table items(id);
    create table config(skip_count, other_col);
    insert into items values (1), (2), (3);
    insert into config values (1, 2);
    select * from items limit 1 offset (select skip_count, other_col from config);
  × Parse error: offset expression must return 1 value, got 2

turso>     create table items(id, name);
    create table sort_order(priority, other_col);
    insert into items values (1, 'a'), (2, 'b');
    insert into sort_order values (1, 2);
    select * from items order by (select priority, other_col from sort_order);
  × Parse error: order by expression must return 1 value, got 2

turso>     create table sales(product_id, amount);
    create table grouping(category, other_col);
    insert into sales values (1, 100), (2, 200);
    insert into grouping values (1, 2);
    select sum(amount) from sales group by (select category, other_col from grouping);
  × Parse error: group by expression must return 1 value, got 2

turso>     create table t1(x);
    create table t2(y, z);
    insert into t1 values (1);
    insert into t2 values (1, 2);
    select case when (select y, z from t2) then 'yes' else 'no' end from t1;
  × Parse error: when expression in CASE must return 1 value. Got: (2)

turso>     create table t1(x);
    create table t2(y, z);
    insert into t1 values (1);
    insert into t2 values (1, 2);
    select case when x = 1 then (select y, z from t2) else 0 end from t1;
  × Parse error: then expression in CASE must return 1 value. Got: (2)

turso>     create table t1(x);
    create table t2(y, z);
    insert into t1 values (1);
    insert into t2 values (1, 2);
    select case when x = 2 then 0 else (select y, z from t2) end from t1;
  × Parse error: else expression in CASE must return 1 value. Got: (2)

turso>     create table t1(x);
    create table t2(y, z);
    insert into t1 values (1);
    insert into t2 values (1, 2);
    select max((select y, z from t2)) from t1;
  × Parse error: argument 0 to function call max must return 1 value. Got: (2)

turso>     create table t1(x);
    create table t2(y, z);
    insert into t1 values (1);
    insert into t2 values (1, 2);
    select x + (select y, z from t2) from t1;
  × Parse error: all arguments to binary operator + must return the same number of
  │ values. Got: (1) + (2)

turso>     create table t1(x);
    create table t2(y, z);
    insert into t1 values (5);
    insert into t2 values (1, 2);
    select * from t1 where x between (select y, z from t2) and 10;
  × Parse error: all arguments to binary operator <= must return the same number of
  │ values. Got: (2) <= (1)

turso>     create table t1(x);
    create table t2(y, z);
    insert into t1 values (1);
    insert into t2 values (1, 2);
    select cast((select y, z from t2) as integer) from t1;
  × Parse error: argument to CAST must return 1 value. Got: (2)

turso>     create table t1(x);
    create table t2(y, z);
    insert into t1 values (1);
    insert into t2 values ('a', 'b');
    select (select y, z from t2) collate nocase from t1;
  × Parse error: argument to COLLATE must return 1 value. Got: (2)

turso>     create table t1(x);
    create table t2(y, z);
    insert into t1 values (1);
    insert into t2 values (1, 2);
    select * from t1 where (select y, z from t2) is null;
  × Parse error: all arguments to binary operator IS must return the same number of
  │ values. Got: (2) IS (1)

turso>     create table t1(x);
    create table t2(y, z);
    insert into t1 values (1);
    insert into t2 values (1, 2);
    select * from t1 where (select y, z from t2) not null;
  × Parse error: argument to NOT NULL must return 1 value. Got: (2)

turso>     create table t1(x);
    create table t2(y, z);
    insert into t1 values (1);
    insert into t2 values ('a', 'b');
    select * from t1 where (select y, z from t2) like 'a%';
  × Parse error: left operand of LIKE must return 1 value. Got: (2)

turso>     create table t1(x);
    create table t2(y, z);
    insert into t1 values (1);
    insert into t2 values (1, 2);
    select -(select y, z from t2) from t1;
  × Parse error: argument to unary operator - must return 1 value. Got: (2)

turso>     create table t1(x);
    create table t2(y, z);
    insert into t1 values (1);
    insert into t2 values (1, 2);
    select abs((select y, z from t2)) from t1;
  × Parse error: argument 0 to function call abs must return 1 value. Got: (2)
  ```

Closes #3906
This commit is contained in:
Pekka Enberg
2025-11-03 13:06:38 +02:00
committed by GitHub
3 changed files with 436 additions and 2 deletions

View File

@@ -4544,3 +4544,168 @@ pub(crate) fn emit_returning_results(
Ok(())
}
/// Get the number of values returned by an expression
pub fn expr_vector_size(expr: &Expr) -> Result<usize> {
Ok(match unwrap_parens(expr)? {
Expr::Between {
lhs, start, end, ..
} => {
let evs_left = expr_vector_size(lhs)?;
let evs_start = expr_vector_size(start)?;
let evs_end = expr_vector_size(end)?;
if evs_left != evs_start || evs_left != evs_end {
crate::bail_parse_error!("all arguments to BETWEEN must return the same number of values. Got: ({evs_left}) BETWEEN ({evs_start}) AND ({evs_end})");
}
1
}
Expr::Binary(expr, operator, expr1) => {
let evs_left = expr_vector_size(expr)?;
let evs_right = expr_vector_size(expr1)?;
if evs_left != evs_right {
crate::bail_parse_error!("all arguments to binary operator {operator} must return the same number of values. Got: ({evs_left}) {operator} ({evs_right})");
}
1
}
Expr::Register(_) => 1,
Expr::Case {
base,
when_then_pairs,
else_expr,
} => {
if let Some(base) = base {
let evs_base = expr_vector_size(base)?;
if evs_base != 1 {
crate::bail_parse_error!(
"base expression in CASE must return 1 value. Got: ({evs_base})"
);
}
}
for (when, then) in when_then_pairs {
let evs_when = expr_vector_size(when)?;
if evs_when != 1 {
crate::bail_parse_error!(
"when expression in CASE must return 1 value. Got: ({evs_when})"
);
}
let evs_then = expr_vector_size(then)?;
if evs_then != 1 {
crate::bail_parse_error!(
"then expression in CASE must return 1 value. Got: ({evs_then})"
);
}
}
if let Some(else_expr) = else_expr {
let evs_else_expr = expr_vector_size(else_expr)?;
if evs_else_expr != 1 {
crate::bail_parse_error!(
"else expression in CASE must return 1 value. Got: ({evs_else_expr})"
);
}
}
1
}
Expr::Cast { expr, .. } => {
let evs_expr = expr_vector_size(expr)?;
if evs_expr != 1 {
crate::bail_parse_error!("argument to CAST must return 1 value. Got: ({evs_expr})");
}
1
}
Expr::Collate(expr, _) => {
let evs_expr = expr_vector_size(expr)?;
if evs_expr != 1 {
crate::bail_parse_error!(
"argument to COLLATE must return 1 value. Got: ({evs_expr})"
);
}
1
}
Expr::DoublyQualified(..) => 1,
Expr::Exists(_) => todo!(),
Expr::FunctionCall { name, args, .. } => {
for (pos, arg) in args.iter().enumerate() {
let evs_arg = expr_vector_size(arg)?;
if evs_arg != 1 {
crate::bail_parse_error!(
"argument {} to function call {name} must return 1 value. Got: ({evs_arg})",
pos + 1
);
}
}
1
}
Expr::FunctionCallStar { .. } => 1,
Expr::Id(_) => 1,
Expr::Column { .. } => 1,
Expr::RowId { .. } => 1,
Expr::InList { lhs, rhs, .. } => {
let evs_lhs = expr_vector_size(lhs)?;
for rhs in rhs.iter() {
let evs_rhs = expr_vector_size(rhs)?;
if evs_lhs != evs_rhs {
crate::bail_parse_error!("all arguments to IN list must return the same number of values, got: ({evs_lhs}) IN ({evs_rhs})");
}
}
1
}
Expr::InSelect { .. } => {
crate::bail_parse_error!("InSelect is not supported in this position")
}
Expr::InTable { .. } => {
crate::bail_parse_error!("InTable is not supported in this position")
}
Expr::IsNull(expr) => {
let evs_expr = expr_vector_size(expr)?;
if evs_expr != 1 {
crate::bail_parse_error!(
"argument to IS NULL must return 1 value. Got: ({evs_expr})"
);
}
1
}
Expr::Like { lhs, rhs, .. } => {
let evs_lhs = expr_vector_size(lhs)?;
if evs_lhs != 1 {
crate::bail_parse_error!(
"left operand of LIKE must return 1 value. Got: ({evs_lhs})"
);
}
let evs_rhs = expr_vector_size(rhs)?;
if evs_rhs != 1 {
crate::bail_parse_error!(
"right operand of LIKE must return 1 value. Got: ({evs_rhs})"
);
}
1
}
Expr::Literal(_) => 1,
Expr::Name(_) => 1,
Expr::NotNull(expr) => {
let evs_expr = expr_vector_size(expr)?;
if evs_expr != 1 {
crate::bail_parse_error!(
"argument to NOT NULL must return 1 value. Got: ({evs_expr})"
);
}
1
}
Expr::Parenthesized(exprs) => exprs.len(),
Expr::Qualified(..) => 1,
Expr::Raise(..) => crate::bail_parse_error!("RAISE is not supported"),
Expr::Subquery(_) => todo!(),
Expr::Unary(unary_operator, expr) => {
let evs_expr = expr_vector_size(expr)?;
if evs_expr != 1 {
crate::bail_parse_error!("argument to unary operator {unary_operator} must return 1 value. Got: ({evs_expr})");
}
1
}
Expr::Variable(_) => 1,
Expr::SubqueryResult { query_type, .. } => match query_type {
SubqueryType::Exists { .. } => 1,
SubqueryType::In { .. } => 1,
SubqueryType::RowValue { num_regs, .. } => *num_regs,
},
})
}

View File

@@ -5,7 +5,9 @@ use super::plan::{
};
use crate::schema::Table;
use crate::translate::emitter::{OperationMode, Resolver};
use crate::translate::expr::{bind_and_rewrite_expr, BindingBehavior, ParamState};
use crate::translate::expr::{
bind_and_rewrite_expr, expr_vector_size, BindingBehavior, ParamState,
};
use crate::translate::group_by::compute_group_by_sort_order;
use crate::translate::optimizer::optimize_plan;
use crate::translate::plan::{GroupBy, Plan, ResultSetColumn, SelectPlan};
@@ -506,6 +508,8 @@ fn prepare_one_select_plan(
plan_subqueries_from_select_plan(program, &mut plan, resolver, connection)?;
validate_expr_correct_column_counts(&plan)?;
// Return the unoptimized query plan
Ok(plan)
}
@@ -562,11 +566,93 @@ fn prepare_one_select_plan(
non_from_clause_subqueries: vec![],
};
validate_expr_correct_column_counts(&plan)?;
Ok(plan)
}
}
}
/// Validate that all expressions in the plan return the correct number of values;
/// generally this only applies to parenthesized lists and subqueries.
fn validate_expr_correct_column_counts(plan: &SelectPlan) -> Result<()> {
for result_column in plan.result_columns.iter() {
let vec_size = expr_vector_size(&result_column.expr)?;
if vec_size != 1 {
crate::bail_parse_error!("result column must return 1 value, got {}", vec_size);
}
}
for (expr, _) in plan.order_by.iter() {
let vec_size = expr_vector_size(expr)?;
if vec_size != 1 {
crate::bail_parse_error!("order by expression must return 1 value, got {}", vec_size);
}
}
if let Some(group_by) = &plan.group_by {
for expr in group_by.exprs.iter() {
let vec_size = expr_vector_size(expr)?;
if vec_size != 1 {
crate::bail_parse_error!(
"group by expression must return 1 value, got {}",
vec_size
);
}
}
if let Some(having) = &group_by.having {
for expr in having.iter() {
let vec_size = expr_vector_size(expr)?;
if vec_size != 1 {
crate::bail_parse_error!(
"having expression must return 1 value, got {}",
vec_size
);
}
}
}
}
for aggregate in plan.aggregates.iter() {
for arg in aggregate.args.iter() {
let vec_size = expr_vector_size(arg)?;
if vec_size != 1 {
crate::bail_parse_error!(
"aggregate argument must return 1 value, got {}",
vec_size
);
}
}
}
for term in plan.where_clause.iter() {
let vec_size = expr_vector_size(&term.expr)?;
if vec_size != 1 {
crate::bail_parse_error!(
"where clause expression must return 1 value, got {}",
vec_size
);
}
}
for expr in plan.values.iter() {
for value in expr.iter() {
let vec_size = expr_vector_size(value)?;
if vec_size != 1 {
crate::bail_parse_error!("value must return 1 value, got {}", vec_size);
}
}
}
if let Some(limit) = &plan.limit {
let vec_size = expr_vector_size(limit)?;
if vec_size != 1 {
crate::bail_parse_error!("limit expression must return 1 value, got {}", vec_size);
}
}
if let Some(offset) = &plan.offset {
let vec_size = expr_vector_size(offset)?;
if vec_size != 1 {
crate::bail_parse_error!("offset expression must return 1 value, got {}", vec_size);
}
}
Ok(())
}
fn add_vtab_predicates_to_where_clause(
vtab_predicates: &mut Vec<Expr>,
plan: &mut SelectPlan,

View File

@@ -804,4 +804,187 @@ do_execsql_test_on_specific_db {:memory:} subquery-in-offset {
select * from items limit 2 offset (select skip_count from config);
} {2|b
3|c}
3|c}
### INCORRECT NUMBER OF RETURNED VALUES - ERROR TESTS ###
# Subquery returning multiple columns in SELECT clause (should error)
do_execsql_test_in_memory_any_error subquery-multiple-columns-in-select {
create table t(x, y);
insert into t values (1, 2);
select (select x, y from t) as result;
}
# Subquery returning multiple columns in WHERE clause (should error)
do_execsql_test_in_memory_any_error subquery-vector-in-where {
create table t1(x,y);
create table t2(y);
insert into t1 values (1,1);
insert into t2 values (1);
select * from t2 where y = (select x,y from t1);
}
# Subquery returning multiple columns in HAVING clause (should error)
do_execsql_test_in_memory_any_error subquery-vector-in-having {
create table orders(customer_id, amount);
create table thresholds(min_amount, max_amount);
insert into orders values (100, 50), (100, 150);
insert into thresholds values (100, 200);
select customer_id, sum(amount) as total
from orders
group by customer_id
having total > (select min_amount, max_amount from thresholds);
}
# Subquery returning multiple columns in LIMIT clause (should error)
do_execsql_test_in_memory_any_error subquery-vector-in-limit {
create table items(id);
create table config(max_results, other_col);
insert into items values (1), (2), (3);
insert into config values (2, 3);
select * from items limit (select max_results, other_col from config);
}
# Subquery returning multiple columns in OFFSET clause (should error)
do_execsql_test_in_memory_any_error subquery-vector-in-offset {
create table items(id);
create table config(skip_count, other_col);
insert into items values (1), (2), (3);
insert into config values (1, 2);
select * from items limit 1 offset (select skip_count, other_col from config);
}
# Subquery returning multiple columns in ORDER BY clause (should error)
do_execsql_test_in_memory_any_error subquery-vector-in-order-by {
create table items(id, name);
create table sort_order(priority, other_col);
insert into items values (1, 'a'), (2, 'b');
insert into sort_order values (1, 2);
select * from items order by (select priority, other_col from sort_order);
}
# Subquery returning multiple columns in GROUP BY clause (should error)
do_execsql_test_in_memory_any_error subquery-vector-in-group-by {
create table sales(product_id, amount);
create table grouping(category, other_col);
insert into sales values (1, 100), (2, 200);
insert into grouping values (1, 2);
select sum(amount) from sales group by (select category, other_col from grouping);
}
# Subquery returning multiple columns in CASE WHEN (should error)
do_execsql_test_in_memory_any_error subquery-vector-in-case-when {
create table t1(x);
create table t2(y, z);
insert into t1 values (1);
insert into t2 values (1, 2);
select case when (select y, z from t2) then 'yes' else 'no' end from t1;
}
# Subquery returning multiple columns in CASE THEN (should error)
do_execsql_test_in_memory_any_error subquery-vector-in-case-then {
create table t1(x);
create table t2(y, z);
insert into t1 values (1);
insert into t2 values (1, 2);
select case when x = 1 then (select y, z from t2) else 0 end from t1;
}
# Subquery returning multiple columns in CASE ELSE (should error)
do_execsql_test_in_memory_any_error subquery-vector-in-case-else {
create table t1(x);
create table t2(y, z);
insert into t1 values (1);
insert into t2 values (1, 2);
select case when x = 2 then 0 else (select y, z from t2) end from t1;
}
# Subquery returning multiple columns in aggregate function argument (should error)
do_execsql_test_in_memory_any_error subquery-vector-in-aggregate-arg {
create table t1(x);
create table t2(y, z);
insert into t1 values (1);
insert into t2 values (1, 2);
select max((select y, z from t2)) from t1;
}
# Subquery returning multiple columns in binary expression (should error)
do_execsql_test_in_memory_any_error subquery-vector-in-binary-expr {
create table t1(x);
create table t2(y, z);
insert into t1 values (1);
insert into t2 values (1, 2);
select x + (select y, z from t2) from t1;
}
# Subquery returning multiple columns in BETWEEN (should error)
do_execsql_test_in_memory_any_error subquery-vector-in-between {
create table t1(x);
create table t2(y, z);
insert into t1 values (5);
insert into t2 values (1, 2);
select * from t1 where x between (select y, z from t2) and 10;
}
# Subquery returning multiple columns in CAST (should error)
do_execsql_test_in_memory_any_error subquery-vector-in-cast {
create table t1(x);
create table t2(y, z);
insert into t1 values (1);
insert into t2 values (1, 2);
select cast((select y, z from t2) as integer) from t1;
}
# Subquery returning multiple columns in COLLATE (should error)
do_execsql_test_in_memory_any_error subquery-vector-in-collate {
create table t1(x);
create table t2(y, z);
insert into t1 values (1);
insert into t2 values ('a', 'b');
select (select y, z from t2) collate nocase from t1;
}
# Subquery returning multiple columns in IS NULL (should error)
do_execsql_test_in_memory_any_error subquery-vector-in-is-null {
create table t1(x);
create table t2(y, z);
insert into t1 values (1);
insert into t2 values (1, 2);
select * from t1 where (select y, z from t2) is null;
}
# Subquery returning multiple columns in NOT NULL (should error)
do_execsql_test_in_memory_any_error subquery-vector-in-not-null {
create table t1(x);
create table t2(y, z);
insert into t1 values (1);
insert into t2 values (1, 2);
select * from t1 where (select y, z from t2) not null;
}
# Subquery returning multiple columns in LIKE (should error)
do_execsql_test_in_memory_any_error subquery-vector-in-like {
create table t1(x);
create table t2(y, z);
insert into t1 values (1);
insert into t2 values ('a', 'b');
select * from t1 where (select y, z from t2) like 'a%';
}
# Subquery returning multiple columns in unary operator (should error)
do_execsql_test_in_memory_any_error subquery-vector-in-unary {
create table t1(x);
create table t2(y, z);
insert into t1 values (1);
insert into t2 values (1, 2);
select -(select y, z from t2) from t1;
}
# Subquery returning multiple columns in function call (should error)
do_execsql_test_in_memory_any_error subquery-vector-in-function-call {
create table t1(x);
create table t2(y, z);
insert into t1 values (1);
insert into t2 values (1, 2);
select abs((select y, z from t2)) from t1;
}