Merge 'Where clause subquery support' from Jussi Saurio

Closes #1282
# Support for WHERE clause subqueries
This PR implements support for subqueries that appear in the WHERE
clause of SELECT statements.
## What are those lol
1. **EXISTS subqueries**: `WHERE EXISTS (SELECT ...)`
2. **Row value subqueries**: `WHERE x = (SELECT ...)` or `WHERE (x, y) =
(SELECT ...)`. The latter are not yet supported - only the single-column
("scalar subquery") case is.
3. **IN subqueries**: `WHERE x IN (SELECT ...)` or `WHERE (x, y) IN
(SELECT ...)`
## Correlated vs Uncorrelated Subqueries
- **Uncorrelated subqueries** reference only their own tables and can be
evaluated once.
- **Correlated subqueries** reference columns from the outer query
(e.g., `WHERE EXISTS (SELECT * FROM t2 WHERE t2.id = t1.id)`) and must
be re-evaluated for each row of the outer query
## Implementation
### Planning
During query planning, the WHERE clause is walked to find subquery
expressions (`Expr::Exists`, `Expr::Subquery`, `Expr::InSelect`). Each
subquery is:
1. Assigned a unique internal ID
2. Compiled into its own `SelectPlan` with outer query tables provided
as available references
3. Replaced in the AST with an `Expr::SubqueryResult` node that
references the subquery with its internal ID
4. Stored in a `Vec<NonFromClauseSubquery>` on the `SelectPlan`
For IN subqueries, an ephemeral index is created to store the subquery
results; for other kinds, the results are stored in register(s).
### Translation
Before emitting bytecode, we need to determine when each subquery should
be evaluated:
- **Uncorrelated**: Evaluated once before opening any table cursors
- **Correlated**: Evaluated at the appropriate nested loop depth after
all referenced outer tables are in scope
This is calculated by examining which outer query tables the subquery
references and finding the right-most (innermost) loop that opens those
tables - using similar mechanisms that we use for figuring out when to
evaluate other `WhereTerm`s too.
### Code Generation
- **EXISTS**: Sets a register to 1 if any row is produced, 0 otherwise.
Has new `QueryDestination::ExistsSubqueryResult` variant.
- **IN**: Results stored in an ephemeral index and the index is probed.
- **RowValue**: Results stored in a range of registers. Has new
`QueryDestination::RowValueSubqueryResult` variant.
## Annoying details
### Which cursor to read from in a subquery?
Sometimes a query will use a covering index, i.e. skip opening the table
cursor at all if the index contains All The Needed Stuff.
Correlated subqueries reading columns from outer tables is a bit
problematic in this regard: with our current translation code, the
subquery doesn't know whether the outer query opened a table cursor,
index cursor, or both. So, for now, we try to find a table cursor first,
then fall back to finding any index cursor for that table.

Reviewed-by: Preston Thorpe <preston@turso.tech>

Closes #3847
This commit is contained in:
Jussi Saurio
2025-10-28 06:36:55 +02:00
committed by GitHub
32 changed files with 2116 additions and 129 deletions

View File

@@ -860,15 +860,34 @@ impl Limbo {
indent_count: usize, indent_count: usize,
curr_insn: &str, curr_insn: &str,
prev_insn: &str, prev_insn: &str,
p1: &str,
unclosed_begin_subrtns: &mut Vec<String>,
) -> usize { ) -> usize {
let indent_count = match prev_insn { let indent_count = match prev_insn {
"Rewind" | "Last" | "SorterSort" | "SeekGE" | "SeekGT" | "SeekLE" "Rewind" | "Last" | "SorterSort" | "SeekGE" | "SeekGT" | "SeekLE"
| "SeekLT" => indent_count + 1, | "SeekLT" | "BeginSubrtn" => indent_count + 1,
_ => indent_count, _ => indent_count,
}; };
// The corresponding closing instruction for BeginSubrtn is Return,
// but Return is also used for other purposes, so we need to track pairs of
// BeginSubrtn and Return that share the same 1st parameter (the subroutine register).
if curr_insn == "BeginSubrtn" {
unclosed_begin_subrtns.push(p1.to_string());
}
match curr_insn { match curr_insn {
"Next" | "SorterNext" | "Prev" => indent_count - 1, "Next" | "SorterNext" | "Prev" => indent_count - 1,
"Return" => {
let matching_begin_subrtn =
unclosed_begin_subrtns.iter().position(|b| b == p1);
if let Some(matching_begin_subrtn) = matching_begin_subrtn {
unclosed_begin_subrtns.remove(matching_begin_subrtn);
indent_count - 1
} else {
indent_count
}
}
_ => indent_count, _ => indent_count,
} }
} }
@@ -883,16 +902,24 @@ impl Limbo {
let mut prev_insn: String = "".to_string(); let mut prev_insn: String = "".to_string();
let mut indent_count = 0; let mut indent_count = 0;
let indent = " "; let indent = " ";
let mut unclosed_begin_subrtns = vec![];
loop { loop {
row_step_result_query!(self, sql, rows, statistics, { row_step_result_query!(self, sql, rows, statistics, {
let row = rows.row().unwrap(); let row = rows.row().unwrap();
let insn = row.get_value(1).to_string(); let insn = row.get_value(1).to_string();
indent_count = get_explain_indent(indent_count, &insn, &prev_insn); let p1 = row.get_value(2).to_string();
indent_count = get_explain_indent(
indent_count,
&insn,
&prev_insn,
&p1,
&mut unclosed_begin_subrtns,
);
let _ = self.writeln(format!( let _ = self.writeln(format!(
"{:<4} {:<17} {:<4} {:<4} {:<4} {:<13} {:<2} {}", "{:<4} {:<17} {:<4} {:<4} {:<4} {:<13} {:<2} {}",
row.get_value(0).to_string(), row.get_value(0).to_string(),
&(indent.repeat(indent_count) + &insn), &(indent.repeat(indent_count) + &insn),
row.get_value(2).to_string(), p1,
row.get_value(3).to_string(), row.get_value(3).to_string(),
row.get_value(4).to_string(), row.get_value(4).to_string(),
row.get_value(5).to_string(), row.get_value(5).to_string(),

View File

@@ -2779,7 +2779,7 @@ impl Statement {
.program .program
.table_references .table_references
.find_table_by_internal_id(*table) .find_table_by_internal_id(*table)
.map(|table_ref| Cow::Borrowed(table_ref.get_name())), .map(|(_, table_ref)| Cow::Borrowed(table_ref.get_name())),
_ => None, _ => None,
} }
} }
@@ -2808,7 +2808,7 @@ impl Statement {
column: column_idx, column: column_idx,
.. ..
} => { } => {
let table_ref = self let (_, table_ref) = self
.program .program
.table_references .table_references
.find_table_by_internal_id(*table)?; .find_table_by_internal_id(*table)?;

View File

@@ -107,7 +107,7 @@ fn emit_collseq_if_needed(
// If no explicit collation, check if this is a column with table-defined collation // If no explicit collation, check if this is a column with table-defined collation
if let ast::Expr::Column { table, column, .. } = expr { if let ast::Expr::Column { table, column, .. } = expr {
if let Some(table_ref) = referenced_tables.find_table_by_internal_id(*table) { if let Some((_, table_ref)) = referenced_tables.find_table_by_internal_id(*table) {
if let Some(table_column) = table_ref.get_column_at(*column) { if let Some(table_column) = table_ref.get_column_at(*column) {
if let Some(collation) = &table_column.collation { if let Some(collation) = &table_column.collation {
program.emit_insn(Insn::CollSeq { program.emit_insn(Insn::CollSeq {

View File

@@ -105,7 +105,7 @@ pub fn get_collseq_from_expr(
return Ok(WalkControl::SkipChildren); return Ok(WalkControl::SkipChildren);
} }
Expr::Column { table, column, .. } => { Expr::Column { table, column, .. } => {
let table_ref = referenced_tables let (_, table_ref) = referenced_tables
.find_table_by_internal_id(*table) .find_table_by_internal_id(*table)
.ok_or_else(|| crate::LimboError::ParseError("table not found".to_string()))?; .ok_or_else(|| crate::LimboError::ParseError("table not found".to_string()))?;
let column = table_ref let column = table_ref
@@ -117,7 +117,7 @@ pub fn get_collseq_from_expr(
return Ok(WalkControl::Continue); return Ok(WalkControl::Continue);
} }
Expr::RowId { table, .. } => { Expr::RowId { table, .. } => {
let table_ref = referenced_tables let (_, table_ref) = referenced_tables
.find_table_by_internal_id(*table) .find_table_by_internal_id(*table)
.ok_or_else(|| crate::LimboError::ParseError("table not found".to_string()))?; .ok_or_else(|| crate::LimboError::ParseError("table not found".to_string()))?;
if let Some(btree) = table_ref.btree() { if let Some(btree) = table_ref.btree() {

View File

@@ -251,7 +251,7 @@ pub struct PlanContext<'a>(pub &'a [&'a TableReferences]);
// Definitely not perfect yet // Definitely not perfect yet
impl ToSqlContext for PlanContext<'_> { impl ToSqlContext for PlanContext<'_> {
fn get_column_name(&self, table_id: TableInternalId, col_idx: usize) -> Option<Option<&str>> { fn get_column_name(&self, table_id: TableInternalId, col_idx: usize) -> Option<Option<&str>> {
let table = self let (_, table) = self
.0 .0
.iter() .iter()
.find_map(|table_ref| table_ref.find_table_by_internal_id(table_id))?; .find_map(|table_ref| table_ref.find_table_by_internal_id(table_id))?;
@@ -270,7 +270,8 @@ impl ToSqlContext for PlanContext<'_> {
match (joined_table, outer_query) { match (joined_table, outer_query) {
(Some(table), None) => Some(&table.identifier), (Some(table), None) => Some(&table.identifier),
(None, Some(table)) => Some(&table.identifier), (None, Some(table)) => Some(&table.identifier),
_ => unreachable!(), (Some(table), Some(_)) => Some(&table.identifier),
(None, None) => unreachable!(),
} }
} }
} }

View File

@@ -21,7 +21,7 @@ use super::plan::{
Distinctness, JoinOrderMember, Operation, Scan, SelectPlan, TableReferences, UpdatePlan, Distinctness, JoinOrderMember, Operation, Scan, SelectPlan, TableReferences, UpdatePlan,
}; };
use super::select::emit_simple_count; use super::select::emit_simple_count;
use super::subquery::emit_subqueries; use super::subquery::emit_from_clause_subqueries;
use crate::error::SQLITE_CONSTRAINT_PRIMARYKEY; use crate::error::SQLITE_CONSTRAINT_PRIMARYKEY;
use crate::function::Func; use crate::function::Func;
use crate::schema::{BTreeTable, Column, Schema, Table, ROWID_SENTINEL}; use crate::schema::{BTreeTable, Column, Schema, Table, ROWID_SENTINEL};
@@ -243,7 +243,7 @@ pub fn emit_program(
} }
#[instrument(skip_all, level = Level::DEBUG)] #[instrument(skip_all, level = Level::DEBUG)]
fn emit_program_for_select( pub fn emit_program_for_select(
program: &mut ProgramBuilder, program: &mut ProgramBuilder,
resolver: &Resolver, resolver: &Resolver,
mut plan: SelectPlan, mut plan: SelectPlan,
@@ -280,8 +280,8 @@ pub fn emit_query<'a>(
return Ok(reg_result_cols_start); return Ok(reg_result_cols_start);
} }
// Emit subqueries first so the results can be read in the main query loop. // Emit FROM clause subqueries first so the results can be read in the main query loop.
emit_subqueries(program, t_ctx, &mut plan.table_references)?; emit_from_clause_subqueries(program, t_ctx, &mut plan.table_references)?;
// No rows will be read from source table loops if there is a constant false condition eg. WHERE 0 // No rows will be read from source table loops if there is a constant false condition eg. WHERE 0
// however an aggregation might still happen, // however an aggregation might still happen,
@@ -364,6 +364,8 @@ pub fn emit_query<'a>(
plan.group_by.as_ref(), plan.group_by.as_ref(),
OperationMode::SELECT, OperationMode::SELECT,
&plan.where_clause, &plan.where_clause,
&plan.join_order,
&mut plan.non_from_clause_subqueries,
)?; )?;
if plan.is_simple_count() { if plan.is_simple_count() {
@@ -380,6 +382,7 @@ pub fn emit_query<'a>(
&plan.where_clause, &plan.where_clause,
None, None,
OperationMode::SELECT, OperationMode::SELECT,
&mut plan.non_from_clause_subqueries,
)?; )?;
// Process result columns and expressions in the inner loop // Process result columns and expressions in the inner loop
@@ -463,6 +466,8 @@ fn emit_program_for_delete(
None, None,
OperationMode::DELETE, OperationMode::DELETE,
&plan.where_clause, &plan.where_clause,
&[JoinOrderMember::default()],
&mut [],
)?; )?;
// Set up main query execution loop // Set up main query execution loop
@@ -474,6 +479,7 @@ fn emit_program_for_delete(
&plan.where_clause, &plan.where_clause,
None, None,
OperationMode::DELETE, OperationMode::DELETE,
&mut [],
)?; )?;
emit_delete_insns( emit_delete_insns(
@@ -963,6 +969,8 @@ fn emit_program_for_update(
None, None,
mode.clone(), mode.clone(),
&plan.where_clause, &plan.where_clause,
&[JoinOrderMember::default()],
&mut [],
)?; )?;
// Prepare index cursors // Prepare index cursors
@@ -999,6 +1007,7 @@ fn emit_program_for_update(
&plan.where_clause, &plan.where_clause,
temp_cursor_id, temp_cursor_id,
mode.clone(), mode.clone(),
&mut [],
)?; )?;
let target_table_cursor_id = let target_table_cursor_id =

View File

@@ -1,7 +1,7 @@
use std::sync::Arc; use std::sync::Arc;
use tracing::{instrument, Level}; use tracing::{instrument, Level};
use turso_parser::ast::{self, As, Expr, UnaryOperator}; use turso_parser::ast::{self, As, Expr, SubqueryType, UnaryOperator};
use super::emitter::Resolver; use super::emitter::Resolver;
use super::optimizer::Optimizable; use super::optimizer::Optimizable;
@@ -288,6 +288,25 @@ pub fn translate_condition_expr(
resolver: &Resolver, resolver: &Resolver,
) -> Result<()> { ) -> Result<()> {
match expr { match expr {
ast::Expr::SubqueryResult { query_type, .. } => match query_type {
SubqueryType::Exists { result_reg } => {
emit_cond_jump(program, condition_metadata, *result_reg);
}
SubqueryType::In { .. } => {
let result_reg = program.alloc_register();
translate_expr(program, Some(referenced_tables), expr, result_reg, resolver)?;
emit_cond_jump(program, condition_metadata, result_reg);
}
SubqueryType::RowValue { num_regs, .. } => {
if *num_regs != 1 {
// A query like SELECT * FROM t WHERE (SELECT ...) must return a single column.
crate::bail_parse_error!("sub-select returns {num_regs} columns - expected 1");
}
let result_reg = program.alloc_register();
translate_expr(program, Some(referenced_tables), expr, result_reg, resolver)?;
emit_cond_jump(program, condition_metadata, result_reg);
}
},
ast::Expr::Register(_) => { ast::Expr::Register(_) => {
crate::bail_parse_error!("Register in WHERE clause is currently unused. Consider removing Resolver::expr_to_reg_cache and using Expr::Register instead"); crate::bail_parse_error!("Register in WHERE clause is currently unused. Consider removing Resolver::expr_to_reg_cache and using Expr::Register instead");
} }
@@ -593,6 +612,149 @@ pub fn translate_expr(
} }
match expr { match expr {
ast::Expr::SubqueryResult {
lhs,
not_in,
query_type,
..
} => {
match query_type {
SubqueryType::Exists { result_reg } => {
program.emit_insn(Insn::Copy {
src_reg: *result_reg,
dst_reg: target_register,
extra_amount: 0,
});
Ok(target_register)
}
SubqueryType::In { cursor_id } => {
// jump here when we can definitely skip the row
let label_skip_row = program.allocate_label();
// jump here when we can definitely include the row
let label_include_row = program.allocate_label();
// jump here when we need to make extra null-related checks, because sql null is the greatest thing ever
let label_null_rewind = program.allocate_label();
let label_null_checks_loop_start = program.allocate_label();
let label_null_checks_next = program.allocate_label();
program.emit_insn(Insn::Integer {
value: 0,
dest: target_register,
});
let lhs_columns = match unwrap_parens(lhs.as_ref().unwrap())? {
ast::Expr::Parenthesized(exprs) => {
exprs.iter().map(|e| e.as_ref()).collect()
}
expr => vec![expr],
};
let lhs_column_count = lhs_columns.len();
let lhs_column_regs_start = program.alloc_registers(lhs_column_count);
for (i, lhs_column) in lhs_columns.iter().enumerate() {
translate_expr(
program,
referenced_tables,
lhs_column,
lhs_column_regs_start + i,
resolver,
)?;
if !lhs_column.is_nonnull(referenced_tables.as_ref().unwrap()) {
program.emit_insn(Insn::IsNull {
reg: lhs_column_regs_start + i,
target_pc: if *not_in {
label_null_rewind
} else {
label_skip_row
},
});
}
}
if *not_in {
// WHERE ... NOT IN (SELECT ...)
// We must skip the row if we find a match.
program.emit_insn(Insn::Found {
cursor_id: *cursor_id,
target_pc: label_skip_row,
record_reg: lhs_column_regs_start,
num_regs: lhs_column_count,
});
// Ok, so Found didn't return a match.
// Because SQL NULL, we need do extra checks to see if we can include the row.
// Consider:
// 1. SELECT * FROM T WHERE 1 NOT IN (SELECT NULL),
// 2. SELECT * FROM T WHERE 1 IN (SELECT NULL) -- or anything else where the subquery evaluates to NULL.
// _Both_ of these queries should return nothing, because... SQL NULL.
// The same goes for e.g. SELECT * FROM T WHERE (1,1) NOT IN (SELECT NULL, NULL).
// However, it does _NOT_ apply for SELECT * FROM T WHERE (1,1) NOT IN (SELECT NULL, 1).
// BUT: it DOES apply for SELECT * FROM T WHERE (2,2) NOT IN ((1,1), (NULL, NULL))!!!
// Ergo: if the subquery result has _ANY_ tuples with all NULLs, we need to NOT include the row.
//
// So, if we didn't found a match (and hence, so far, our 'NOT IN' condition still applies),
// we must still rewind the subquery's ephemeral index cursor and go through ALL rows and compare each LHS column (with !=) to the corresponding column in the ephemeral index.
// Comparison instructions have the default behavior that if either operand is NULL, the comparison is completely skipped.
// That means: if we, for ANY row in the ephemeral index, get through all the != comparisons without jumping,
// it means our subquery result has a tuple that is exactly NULL (or (NULL, NULL) etc.),
// in which case we need to NOT include the row.
// If ALL the rows jump at one of the != comparisons, it means our subquery result has no tuples with all NULLs -> we can include the row.
program.preassign_label_to_next_insn(label_null_rewind);
program.emit_insn(Insn::Rewind {
cursor_id: *cursor_id,
pc_if_empty: label_include_row,
});
program.preassign_label_to_next_insn(label_null_checks_loop_start);
let column_check_reg = program.alloc_register();
for i in 0..lhs_column_count {
program.emit_insn(Insn::Column {
cursor_id: *cursor_id,
column: i,
dest: column_check_reg,
default: None,
});
program.emit_insn(Insn::Ne {
lhs: lhs_column_regs_start + i,
rhs: column_check_reg,
target_pc: label_null_checks_next,
flags: CmpInsFlags::default(),
collation: program.curr_collation(),
});
}
program.emit_insn(Insn::Goto {
target_pc: label_skip_row,
});
program.preassign_label_to_next_insn(label_null_checks_next);
program.emit_insn(Insn::Next {
cursor_id: *cursor_id,
pc_if_next: label_null_checks_loop_start,
})
} else {
// WHERE ... IN (SELECT ...)
// We can skip the row if we don't find a match
program.emit_insn(Insn::NotFound {
cursor_id: *cursor_id,
target_pc: label_skip_row,
record_reg: lhs_column_regs_start,
num_regs: lhs_column_count,
});
}
program.preassign_label_to_next_insn(label_include_row);
program.emit_insn(Insn::Integer {
value: 1,
dest: target_register,
});
program.preassign_label_to_next_insn(label_skip_row);
Ok(target_register)
}
SubqueryType::RowValue {
result_reg_start,
num_regs,
} => {
program.emit_insn(Insn::Copy {
src_reg: *result_reg_start,
dst_reg: target_register,
extra_amount: num_regs - 1,
});
Ok(target_register)
}
}
}
ast::Expr::Between { .. } => { ast::Expr::Between { .. } => {
unreachable!("expression should have been rewritten in optmizer") unreachable!("expression should have been rewritten in optmizer")
} }
@@ -724,7 +886,9 @@ pub fn translate_expr(
ast::Expr::DoublyQualified(_, _, _) => { ast::Expr::DoublyQualified(_, _, _) => {
crate::bail_parse_error!("DoublyQualified should have been rewritten in optimizer") crate::bail_parse_error!("DoublyQualified should have been rewritten in optimizer")
} }
ast::Expr::Exists(_) => crate::bail_parse_error!("EXISTS in WHERE clause is not supported"), ast::Expr::Exists(_) => {
crate::bail_parse_error!("EXISTS is not supported in this position")
}
ast::Expr::FunctionCall { ast::Expr::FunctionCall {
name, name,
distinctness: _, distinctness: _,
@@ -1948,7 +2112,7 @@ pub fn translate_expr(
} }
}; };
let table = referenced_tables let (is_from_outer_query_scope, table) = referenced_tables
.unwrap() .unwrap()
.find_table_by_internal_id(*table_ref_id) .find_table_by_internal_id(*table_ref_id)
.unwrap_or_else(|| { .unwrap_or_else(|| {
@@ -1969,14 +2133,34 @@ pub fn translate_expr(
// If we have a covering index, we don't have an open table cursor so we read from the index cursor. // If we have a covering index, we don't have an open table cursor so we read from the index cursor.
match &table { match &table {
Table::BTree(_) => { Table::BTree(_) => {
let table_cursor_id = if use_covering_index { let (table_cursor_id, index_cursor_id) = if is_from_outer_query_scope {
None // Due to a limitation of our translation system, a subquery that references an outer query table
// cannot know whether a table cursor, index cursor, or both were opened for that table reference.
// Hence: currently we first try to resolve a table cursor, and if that fails,
// we resolve an index cursor.
if let Some(table_cursor_id) =
program.resolve_cursor_id_safe(&CursorKey::table(*table_ref_id))
{
(Some(table_cursor_id), None)
} else {
(
None,
Some(program.resolve_any_index_cursor_id_for_table(*table_ref_id)),
)
}
} else { } else {
Some(program.resolve_cursor_id(&CursorKey::table(*table_ref_id))) let table_cursor_id = if use_covering_index {
None
} else {
Some(program.resolve_cursor_id(&CursorKey::table(*table_ref_id)))
};
let index_cursor_id = index.map(|index| {
program
.resolve_cursor_id(&CursorKey::index(*table_ref_id, index.clone()))
});
(table_cursor_id, index_cursor_id)
}; };
let index_cursor_id = index.map(|index| {
program.resolve_cursor_id(&CursorKey::index(*table_ref_id, index.clone()))
});
if *is_rowid_alias { if *is_rowid_alias {
if let Some(index_cursor_id) = index_cursor_id { if let Some(index_cursor_id) = index_cursor_id {
program.emit_insn(Insn::IdxRowId { program.emit_insn(Insn::IdxRowId {
@@ -1992,22 +2176,28 @@ pub fn translate_expr(
unreachable!("Either index or table cursor must be opened"); unreachable!("Either index or table cursor must be opened");
} }
} else { } else {
let read_cursor = if use_covering_index { let read_from_index = if is_from_outer_query_scope {
index_cursor_id.expect( index_cursor_id.is_some()
"index cursor should be opened when use_covering_index=true",
)
} else { } else {
table_cursor_id.expect( use_covering_index
"table cursor should be opened when use_covering_index=false",
)
}; };
let column = if use_covering_index { let read_cursor = if read_from_index {
let index = index.expect( index_cursor_id.expect("index cursor should be opened")
"index cursor should be opened when use_covering_index=true", } else {
table_cursor_id.expect("table cursor should be opened")
};
let column = if read_from_index {
let index = program.resolve_index_for_cursor_id(
index_cursor_id.expect("index cursor should be opened"),
); );
index.column_table_pos_to_index_pos(*column).unwrap_or_else(|| { index
panic!("covering index {} does not contain column number {} of table {}", index.name, column, table_ref_id) .column_table_pos_to_index_pos(*column)
}) .unwrap_or_else(|| {
panic!(
"index {} does not contain column number {} of table {}",
index.name, column, table_ref_id
)
})
} else { } else {
*column *column
}; };
@@ -2142,10 +2332,10 @@ pub fn translate_expr(
Ok(result_reg) Ok(result_reg)
} }
ast::Expr::InSelect { .. } => { ast::Expr::InSelect { .. } => {
crate::bail_parse_error!("IN (...subquery) in WHERE clause is not supported") crate::bail_parse_error!("IN (...subquery) is not supported in this position")
} }
ast::Expr::InTable { .. } => { ast::Expr::InTable { .. } => {
crate::bail_parse_error!("Table expression in WHERE clause is not supported") crate::bail_parse_error!("Table expression is not supported in this position")
} }
ast::Expr::IsNull(expr) => { ast::Expr::IsNull(expr) => {
let reg = program.alloc_register(); let reg = program.alloc_register();
@@ -2183,7 +2373,7 @@ pub fn translate_expr(
} }
ast::Expr::Literal(lit) => emit_literal(program, lit, target_register), ast::Expr::Literal(lit) => emit_literal(program, lit, target_register),
ast::Expr::Name(_) => { ast::Expr::Name(_) => {
crate::bail_parse_error!("ast::Expr::Name in WHERE clause is not supported") crate::bail_parse_error!("ast::Expr::Name is not supported in this position")
} }
ast::Expr::NotNull(expr) => { ast::Expr::NotNull(expr) => {
let reg = program.alloc_register(); let reg = program.alloc_register();
@@ -2230,7 +2420,7 @@ pub fn translate_expr(
} }
ast::Expr::Raise(_, _) => crate::bail_parse_error!("RAISE is not supported"), ast::Expr::Raise(_, _) => crate::bail_parse_error!("RAISE is not supported"),
ast::Expr::Subquery(_) => { ast::Expr::Subquery(_) => {
crate::bail_parse_error!("Subquery in WHERE clause is not supported") crate::bail_parse_error!("Subquery is not supported in this position")
} }
ast::Expr::Unary(op, expr) => match (op, expr.as_ref()) { ast::Expr::Unary(op, expr) => match (op, expr.as_ref()) {
(UnaryOperator::Positive, expr) => { (UnaryOperator::Positive, expr) => {
@@ -3140,12 +3330,12 @@ pub fn as_binary_components(
/// Recursively unwrap parentheses from an expression /// Recursively unwrap parentheses from an expression
/// e.g. (((t.x > 5))) -> t.x > 5 /// e.g. (((t.x > 5))) -> t.x > 5
fn unwrap_parens(expr: &ast::Expr) -> Result<&ast::Expr> { pub fn unwrap_parens(expr: &ast::Expr) -> Result<&ast::Expr> {
match expr { match expr {
ast::Expr::Column { .. } => Ok(expr), ast::Expr::Column { .. } => Ok(expr),
ast::Expr::Parenthesized(exprs) => match exprs.len() { ast::Expr::Parenthesized(exprs) => match exprs.len() {
1 => unwrap_parens(exprs.first().unwrap()), 1 => unwrap_parens(exprs.first().unwrap()),
_ => crate::bail_parse_error!("expected single expression in parentheses"), _ => Ok(expr), // If the expression is e.g. (x, y), as used in e.g. (x, y) IN (SELECT ...), return as is.
}, },
_ => Ok(expr), _ => Ok(expr),
} }
@@ -3182,6 +3372,11 @@ where
match func(expr)? { match func(expr)? {
WalkControl::Continue => { WalkControl::Continue => {
match expr { match expr {
ast::Expr::SubqueryResult { lhs, .. } => {
if let Some(lhs) = lhs {
walk_expr(lhs, func)?;
}
}
ast::Expr::Between { ast::Expr::Between {
lhs, start, end, .. lhs, start, end, ..
} => { } => {
@@ -3710,6 +3905,11 @@ where
match func(expr)? { match func(expr)? {
WalkControl::Continue => { WalkControl::Continue => {
match expr { match expr {
ast::Expr::SubqueryResult { lhs, .. } => {
if let Some(lhs) = lhs {
walk_expr_mut(lhs, func)?;
}
}
ast::Expr::Between { ast::Expr::Between {
lhs, start, end, .. lhs, start, end, ..
} => { } => {
@@ -3887,7 +4087,7 @@ pub fn get_expr_affinity(
match expr { match expr {
ast::Expr::Column { table, column, .. } => { ast::Expr::Column { table, column, .. } => {
if let Some(tables) = referenced_tables { if let Some(tables) = referenced_tables {
if let Some(table_ref) = tables.find_table_by_internal_id(*table) { if let Some((_, table_ref)) = tables.find_table_by_internal_id(*table) {
if let Some(col) = table_ref.get_column_at(*column) { if let Some(col) = table_ref.get_column_at(*column) {
return col.affinity(); return col.affinity();
} }

View File

@@ -19,7 +19,11 @@ use super::{
}, },
}; };
use crate::translate::{ use crate::translate::{
collate::get_collseq_from_expr, emitter::UpdateRowSource, window::emit_window_loop_source, collate::get_collseq_from_expr,
emitter::UpdateRowSource,
plan::{EvalAt, NonFromClauseSubquery},
subquery::emit_non_from_clause_subquery,
window::emit_window_loop_source,
}; };
use crate::{ use crate::{
schema::{Affinity, Index, IndexColumn, Table}, schema::{Affinity, Index, IndexColumn, Table},
@@ -116,6 +120,7 @@ pub fn init_distinct(program: &mut ProgramBuilder, plan: &SelectPlan) -> Result<
} }
/// Initialize resources needed for the source operators (tables, joins, etc) /// Initialize resources needed for the source operators (tables, joins, etc)
#[allow(clippy::too_many_arguments)]
pub fn init_loop( pub fn init_loop(
program: &mut ProgramBuilder, program: &mut ProgramBuilder,
t_ctx: &mut TranslateCtx, t_ctx: &mut TranslateCtx,
@@ -124,6 +129,8 @@ pub fn init_loop(
group_by: Option<&GroupBy>, group_by: Option<&GroupBy>,
mode: OperationMode, mode: OperationMode,
where_clause: &[WhereTerm], where_clause: &[WhereTerm],
join_order: &[JoinOrderMember],
subqueries: &mut [NonFromClauseSubquery],
) -> Result<()> { ) -> Result<()> {
assert!( assert!(
t_ctx.meta_left_joins.len() == tables.joined_tables().len(), t_ctx.meta_left_joins.len() == tables.joined_tables().len(),
@@ -397,9 +404,25 @@ pub fn init_loop(
} }
} }
for subquery in subqueries.iter_mut().filter(|s| !s.has_been_evaluated()) {
let eval_at = subquery.get_eval_at(join_order)?;
if eval_at != EvalAt::BeforeLoop {
continue;
}
let plan = subquery.consume_plan(EvalAt::BeforeLoop);
emit_non_from_clause_subquery(
program,
t_ctx,
*plan,
&subquery.query_type,
subquery.correlated,
)?;
}
for cond in where_clause for cond in where_clause
.iter() .iter()
.filter(|c| c.should_eval_before_loop(&[JoinOrderMember::default()])) .filter(|c| c.should_eval_before_loop(join_order, subqueries))
{ {
let jump_target = program.allocate_label(); let jump_target = program.allocate_label();
let meta = ConditionMetadata { let meta = ConditionMetadata {
@@ -418,6 +441,7 @@ pub fn init_loop(
/// Set up the main query execution loop /// Set up the main query execution loop
/// For example in the case of a nested table scan, this means emitting the Rewind instruction /// For example in the case of a nested table scan, this means emitting the Rewind instruction
/// for all tables involved, outermost first. /// for all tables involved, outermost first.
#[allow(clippy::too_many_arguments)]
pub fn open_loop( pub fn open_loop(
program: &mut ProgramBuilder, program: &mut ProgramBuilder,
t_ctx: &mut TranslateCtx, t_ctx: &mut TranslateCtx,
@@ -426,6 +450,7 @@ pub fn open_loop(
predicates: &[WhereTerm], predicates: &[WhereTerm],
temp_cursor_id: Option<CursorID>, temp_cursor_id: Option<CursorID>,
mode: OperationMode, mode: OperationMode,
subqueries: &mut [NonFromClauseSubquery],
) -> Result<()> { ) -> Result<()> {
for (join_index, join) in join_order.iter().enumerate() { for (join_index, join) in join_order.iter().enumerate() {
let joined_table_index = join.original_idx; let joined_table_index = join.original_idx;
@@ -666,6 +691,25 @@ pub fn open_loop(
} }
} }
for subquery in subqueries.iter_mut().filter(|s| !s.has_been_evaluated()) {
assert!(subquery.correlated, "subquery must be correlated");
let eval_at = subquery.get_eval_at(join_order)?;
if eval_at != EvalAt::Loop(join_index) {
continue;
}
let plan = subquery.consume_plan(eval_at);
emit_non_from_clause_subquery(
program,
t_ctx,
*plan,
&subquery.query_type,
subquery.correlated,
)?;
}
// First emit outer join conditions, if any. // First emit outer join conditions, if any.
emit_conditions( emit_conditions(
program, program,
@@ -676,6 +720,7 @@ pub fn open_loop(
join_index, join_index,
next, next,
true, true,
subqueries,
)?; )?;
// Set the match flag to true if this is a LEFT JOIN. // Set the match flag to true if this is a LEFT JOIN.
@@ -706,9 +751,20 @@ pub fn open_loop(
join_index, join_index,
next, next,
false, false,
subqueries,
)?; )?;
} }
if subqueries.iter().any(|s| !s.has_been_evaluated()) {
crate::bail_parse_error!(
"all subqueries should have already been emitted, but found {} unevaluated subqueries",
subqueries
.iter()
.filter(|s| !s.has_been_evaluated())
.count()
);
}
Ok(()) Ok(())
} }
@@ -722,11 +778,12 @@ fn emit_conditions(
join_index: usize, join_index: usize,
next: BranchOffset, next: BranchOffset,
from_outer_join: bool, from_outer_join: bool,
subqueries: &[NonFromClauseSubquery],
) -> Result<()> { ) -> Result<()> {
for cond in predicates for cond in predicates
.iter() .iter()
.filter(|cond| cond.from_outer_join.is_some() == from_outer_join) .filter(|cond| cond.from_outer_join.is_some() == from_outer_join)
.filter(|cond| cond.should_eval_at_loop(join_index, join_order)) .filter(|cond| cond.should_eval_at_loop(join_index, join_order, subqueries))
{ {
let jump_target_when_true = program.allocate_label(); let jump_target_when_true = program.allocate_label();
let condition_metadata = ConditionMetadata { let condition_metadata = ConditionMetadata {

View File

@@ -9,7 +9,7 @@ use crate::{
translate::{ translate::{
collate::get_collseq_from_expr, collate::get_collseq_from_expr,
expr::as_binary_components, expr::as_binary_components,
plan::{JoinOrderMember, TableReferences, WhereTerm}, plan::{JoinOrderMember, NonFromClauseSubquery, TableReferences, WhereTerm},
planner::{table_mask_from_expr, TableMask}, planner::{table_mask_from_expr, TableMask},
}, },
util::exprs_are_equivalent, util::exprs_are_equivalent,
@@ -188,6 +188,7 @@ pub fn constraints_from_where_clause(
where_clause: &[WhereTerm], where_clause: &[WhereTerm],
table_references: &TableReferences, table_references: &TableReferences,
available_indexes: &HashMap<String, VecDeque<Arc<Index>>>, available_indexes: &HashMap<String, VecDeque<Arc<Index>>>,
subqueries: &[NonFromClauseSubquery],
) -> Result<Vec<TableConstraints>> { ) -> Result<Vec<TableConstraints>> {
let mut constraints = Vec::new(); let mut constraints = Vec::new();
@@ -241,7 +242,7 @@ pub fn constraints_from_where_clause(
where_clause_pos: (i, BinaryExprSide::Rhs), where_clause_pos: (i, BinaryExprSide::Rhs),
operator, operator,
table_col_pos: *column, table_col_pos: *column,
lhs_mask: table_mask_from_expr(rhs, table_references)?, lhs_mask: table_mask_from_expr(rhs, table_references, subqueries)?,
selectivity: estimate_selectivity(table_column, operator), selectivity: estimate_selectivity(table_column, operator),
usable: true, usable: true,
}); });
@@ -258,7 +259,7 @@ pub fn constraints_from_where_clause(
where_clause_pos: (i, BinaryExprSide::Rhs), where_clause_pos: (i, BinaryExprSide::Rhs),
operator, operator,
table_col_pos: rowid_alias_column.unwrap(), table_col_pos: rowid_alias_column.unwrap(),
lhs_mask: table_mask_from_expr(rhs, table_references)?, lhs_mask: table_mask_from_expr(rhs, table_references, subqueries)?,
selectivity: estimate_selectivity(table_column, operator), selectivity: estimate_selectivity(table_column, operator),
usable: true, usable: true,
}); });
@@ -274,7 +275,7 @@ pub fn constraints_from_where_clause(
where_clause_pos: (i, BinaryExprSide::Lhs), where_clause_pos: (i, BinaryExprSide::Lhs),
operator: opposite_cmp_op(operator), operator: opposite_cmp_op(operator),
table_col_pos: *column, table_col_pos: *column,
lhs_mask: table_mask_from_expr(lhs, table_references)?, lhs_mask: table_mask_from_expr(lhs, table_references, subqueries)?,
selectivity: estimate_selectivity(table_column, operator), selectivity: estimate_selectivity(table_column, operator),
usable: true, usable: true,
}); });
@@ -288,7 +289,7 @@ pub fn constraints_from_where_clause(
where_clause_pos: (i, BinaryExprSide::Lhs), where_clause_pos: (i, BinaryExprSide::Lhs),
operator: opposite_cmp_op(operator), operator: opposite_cmp_op(operator),
table_col_pos: rowid_alias_column.unwrap(), table_col_pos: rowid_alias_column.unwrap(),
lhs_mask: table_mask_from_expr(lhs, table_references)?, lhs_mask: table_mask_from_expr(lhs, table_references, subqueries)?,
selectivity: estimate_selectivity(table_column, operator), selectivity: estimate_selectivity(table_column, operator),
usable: true, usable: true,
}); });

View File

@@ -540,9 +540,13 @@ mod tests {
let where_clause = vec![]; let where_clause = vec![];
let access_methods_arena = RefCell::new(Vec::new()); let access_methods_arena = RefCell::new(Vec::new());
let table_constraints = let table_constraints = constraints_from_where_clause(
constraints_from_where_clause(&where_clause, &table_references, &available_indexes) &where_clause,
.unwrap(); &table_references,
&available_indexes,
&[],
)
.unwrap();
let result = compute_best_join_order( let result = compute_best_join_order(
table_references.joined_tables(), table_references.joined_tables(),
@@ -569,9 +573,13 @@ mod tests {
let where_clause = vec![]; let where_clause = vec![];
let access_methods_arena = RefCell::new(Vec::new()); let access_methods_arena = RefCell::new(Vec::new());
let table_constraints = let table_constraints = constraints_from_where_clause(
constraints_from_where_clause(&where_clause, &table_references, &available_indexes) &where_clause,
.unwrap(); &table_references,
&available_indexes,
&[],
)
.unwrap();
// SELECT * from test_table // SELECT * from test_table
// expecting best_best_plan() not to do any work due to empty where clause. // expecting best_best_plan() not to do any work due to empty where clause.
@@ -610,9 +618,13 @@ mod tests {
let table_references = TableReferences::new(joined_tables, vec![]); let table_references = TableReferences::new(joined_tables, vec![]);
let access_methods_arena = RefCell::new(Vec::new()); let access_methods_arena = RefCell::new(Vec::new());
let available_indexes = HashMap::new(); let available_indexes = HashMap::new();
let table_constraints = let table_constraints = constraints_from_where_clause(
constraints_from_where_clause(&where_clause, &table_references, &available_indexes) &where_clause,
.unwrap(); &table_references,
&available_indexes,
&[],
)
.unwrap();
// SELECT * FROM test_table WHERE id = 42 // SELECT * FROM test_table WHERE id = 42
// expecting a RowidEq access method because id is a rowid alias. // expecting a RowidEq access method because id is a rowid alias.
@@ -678,9 +690,13 @@ mod tests {
}); });
available_indexes.insert("test_table".to_string(), VecDeque::from([index])); available_indexes.insert("test_table".to_string(), VecDeque::from([index]));
let table_constraints = let table_constraints = constraints_from_where_clause(
constraints_from_where_clause(&where_clause, &table_references, &available_indexes) &where_clause,
.unwrap(); &table_references,
&available_indexes,
&[],
)
.unwrap();
// SELECT * FROM test_table WHERE id = 42 // SELECT * FROM test_table WHERE id = 42
// expecting an IndexScan access method because id is a primary key with an index // expecting an IndexScan access method because id is a primary key with an index
let result = compute_best_join_order( let result = compute_best_join_order(
@@ -757,9 +773,13 @@ mod tests {
let table_references = TableReferences::new(joined_tables, vec![]); let table_references = TableReferences::new(joined_tables, vec![]);
let access_methods_arena = RefCell::new(Vec::new()); let access_methods_arena = RefCell::new(Vec::new());
let table_constraints = let table_constraints = constraints_from_where_clause(
constraints_from_where_clause(&where_clause, &table_references, &available_indexes) &where_clause,
.unwrap(); &table_references,
&available_indexes,
&[],
)
.unwrap();
let result = compute_best_join_order( let result = compute_best_join_order(
table_references.joined_tables(), table_references.joined_tables(),
@@ -932,9 +952,13 @@ mod tests {
let table_references = TableReferences::new(joined_tables, vec![]); let table_references = TableReferences::new(joined_tables, vec![]);
let access_methods_arena = RefCell::new(Vec::new()); let access_methods_arena = RefCell::new(Vec::new());
let table_constraints = let table_constraints = constraints_from_where_clause(
constraints_from_where_clause(&where_clause, &table_references, &available_indexes) &where_clause,
.unwrap(); &table_references,
&available_indexes,
&[],
)
.unwrap();
let result = compute_best_join_order( let result = compute_best_join_order(
table_references.joined_tables(), table_references.joined_tables(),
@@ -1041,9 +1065,13 @@ mod tests {
let table_references = TableReferences::new(joined_tables, vec![]); let table_references = TableReferences::new(joined_tables, vec![]);
let available_indexes = HashMap::new(); let available_indexes = HashMap::new();
let access_methods_arena = RefCell::new(Vec::new()); let access_methods_arena = RefCell::new(Vec::new());
let table_constraints = let table_constraints = constraints_from_where_clause(
constraints_from_where_clause(&where_clause, &table_references, &available_indexes) &where_clause,
.unwrap(); &table_references,
&available_indexes,
&[],
)
.unwrap();
let BestJoinOrderResult { best_plan, .. } = compute_best_join_order( let BestJoinOrderResult { best_plan, .. } = compute_best_join_order(
table_references.joined_tables(), table_references.joined_tables(),
@@ -1147,9 +1175,13 @@ mod tests {
let table_references = TableReferences::new(joined_tables, vec![]); let table_references = TableReferences::new(joined_tables, vec![]);
let access_methods_arena = RefCell::new(Vec::new()); let access_methods_arena = RefCell::new(Vec::new());
let available_indexes = HashMap::new(); let available_indexes = HashMap::new();
let table_constraints = let table_constraints = constraints_from_where_clause(
constraints_from_where_clause(&where_clause, &table_references, &available_indexes) &where_clause,
.unwrap(); &table_references,
&available_indexes,
&[],
)
.unwrap();
let result = compute_best_join_order( let result = compute_best_join_order(
table_references.joined_tables(), table_references.joined_tables(),
@@ -1231,9 +1263,13 @@ mod tests {
let table_references = TableReferences::new(joined_tables, vec![]); let table_references = TableReferences::new(joined_tables, vec![]);
let access_methods_arena = RefCell::new(Vec::new()); let access_methods_arena = RefCell::new(Vec::new());
let table_constraints = let table_constraints = constraints_from_where_clause(
constraints_from_where_clause(&where_clause, &table_references, &available_indexes) &where_clause,
.unwrap(); &table_references,
&available_indexes,
&[],
)
.unwrap();
// Run the optimizer // Run the optimizer
let BestJoinOrderResult { best_plan, .. } = compute_best_join_order( let BestJoinOrderResult { best_plan, .. } = compute_best_join_order(
@@ -1352,9 +1388,13 @@ mod tests {
let table_references = TableReferences::new(joined_tables, vec![]); let table_references = TableReferences::new(joined_tables, vec![]);
let access_methods_arena = RefCell::new(Vec::new()); let access_methods_arena = RefCell::new(Vec::new());
let table_constraints = let table_constraints = constraints_from_where_clause(
constraints_from_where_clause(&where_clause, &table_references, &available_indexes) &where_clause,
.unwrap(); &table_references,
&available_indexes,
&[],
)
.unwrap();
let BestJoinOrderResult { best_plan, .. } = compute_best_join_order( let BestJoinOrderResult { best_plan, .. } = compute_best_join_order(
table_references.joined_tables(), table_references.joined_tables(),
@@ -1460,9 +1500,13 @@ mod tests {
let table_references = TableReferences::new(joined_tables, vec![]); let table_references = TableReferences::new(joined_tables, vec![]);
let access_methods_arena = RefCell::new(Vec::new()); let access_methods_arena = RefCell::new(Vec::new());
let table_constraints = let table_constraints = constraints_from_where_clause(
constraints_from_where_clause(&where_clause, &table_references, &available_indexes) &where_clause,
.unwrap(); &table_references,
&available_indexes,
&[],
)
.unwrap();
let BestJoinOrderResult { best_plan, .. } = compute_best_join_order( let BestJoinOrderResult { best_plan, .. } = compute_best_join_order(
table_references.joined_tables(), table_references.joined_tables(),
@@ -1586,9 +1630,13 @@ mod tests {
let table_references = TableReferences::new(joined_tables, vec![]); let table_references = TableReferences::new(joined_tables, vec![]);
let access_methods_arena = RefCell::new(Vec::new()); let access_methods_arena = RefCell::new(Vec::new());
let table_constraints = let table_constraints = constraints_from_where_clause(
constraints_from_where_clause(&where_clause, &table_references, &available_indexes) &where_clause,
.unwrap(); &table_references,
&available_indexes,
&[],
)
.unwrap();
let BestJoinOrderResult { best_plan, .. } = compute_best_join_order( let BestJoinOrderResult { best_plan, .. } = compute_best_join_order(
table_references.joined_tables(), table_references.joined_tables(),

View File

@@ -23,8 +23,8 @@ use crate::{
constraints::{RangeConstraintRef, SeekRangeConstraint, TableConstraints}, constraints::{RangeConstraintRef, SeekRangeConstraint, TableConstraints},
}, },
plan::{ plan::{
ColumnUsedMask, OuterQueryReference, QueryDestination, ResultSetColumn, Scan, ColumnUsedMask, NonFromClauseSubquery, OuterQueryReference, QueryDestination,
SeekKeyComponent, ResultSetColumn, Scan, SeekKeyComponent,
}, },
}, },
types::SeekOp, types::SeekOp,
@@ -89,6 +89,7 @@ pub fn optimize_select_plan(plan: &mut SelectPlan, schema: &Schema) -> Result<()
&mut plan.where_clause, &mut plan.where_clause,
&mut plan.order_by, &mut plan.order_by,
&mut plan.group_by, &mut plan.group_by,
&plan.non_from_clause_subqueries,
)?; )?;
if let Some(best_join_order) = best_join_order { if let Some(best_join_order) = best_join_order {
@@ -114,6 +115,7 @@ fn optimize_delete_plan(plan: &mut DeletePlan, schema: &Schema) -> Result<()> {
&mut plan.where_clause, &mut plan.where_clause,
&mut plan.order_by, &mut plan.order_by,
&mut None, &mut None,
&[],
)?; )?;
Ok(()) Ok(())
@@ -138,6 +140,7 @@ fn optimize_update_plan(
&mut plan.where_clause, &mut plan.where_clause,
&mut plan.order_by, &mut plan.order_by,
&mut None, &mut None,
&[],
)?; )?;
let table_ref = &mut plan.table_references.joined_tables_mut()[0]; let table_ref = &mut plan.table_references.joined_tables_mut()[0];
@@ -301,6 +304,7 @@ fn add_ephemeral_table_to_update_plan(
distinctness: super::plan::Distinctness::NonDistinct, distinctness: super::plan::Distinctness::NonDistinct,
values: vec![], values: vec![],
window: None, window: None,
non_from_clause_subqueries: vec![],
}; };
plan.ephemeral_plan = Some(ephemeral_plan); plan.ephemeral_plan = Some(ephemeral_plan);
@@ -336,6 +340,7 @@ fn optimize_table_access(
where_clause: &mut [WhereTerm], where_clause: &mut [WhereTerm],
order_by: &mut Vec<(Box<ast::Expr>, SortOrder)>, order_by: &mut Vec<(Box<ast::Expr>, SortOrder)>,
group_by: &mut Option<GroupBy>, group_by: &mut Option<GroupBy>,
subqueries: &[NonFromClauseSubquery],
) -> Result<Option<Vec<JoinOrderMember>>> { ) -> Result<Option<Vec<JoinOrderMember>>> {
if table_references.joined_tables().len() > TableReferences::MAX_JOINED_TABLES { if table_references.joined_tables().len() > TableReferences::MAX_JOINED_TABLES {
crate::bail_parse_error!( crate::bail_parse_error!(
@@ -345,8 +350,12 @@ fn optimize_table_access(
} }
let access_methods_arena = RefCell::new(Vec::new()); let access_methods_arena = RefCell::new(Vec::new());
let maybe_order_target = compute_order_target(order_by, group_by.as_mut()); let maybe_order_target = compute_order_target(order_by, group_by.as_mut());
let constraints_per_table = let constraints_per_table = constraints_from_where_clause(
constraints_from_where_clause(where_clause, table_references, available_indexes)?; where_clause,
table_references,
available_indexes,
subqueries,
)?;
// Currently the expressions we evaluate as constraints are binary expressions that will never be true for a NULL operand. // Currently the expressions we evaluate as constraints are binary expressions that will never be true for a NULL operand.
// If there are any constraints on the right hand side table of an outer join that are not part of the outer join condition, // If there are any constraints on the right hand side table of an outer join that are not part of the outer join condition,
@@ -766,6 +775,7 @@ impl Optimizable for ast::Expr {
/// by writing more complex code. /// by writing more complex code.
fn is_nonnull(&self, tables: &TableReferences) -> bool { fn is_nonnull(&self, tables: &TableReferences) -> bool {
match self { match self {
Expr::SubqueryResult { .. } => false,
Expr::Between { Expr::Between {
lhs, start, end, .. lhs, start, end, ..
} => lhs.is_nonnull(tables) && start.is_nonnull(tables) && end.is_nonnull(tables), } => lhs.is_nonnull(tables) && start.is_nonnull(tables) && end.is_nonnull(tables),
@@ -804,7 +814,9 @@ impl Optimizable for ast::Expr {
return true; return true;
} }
let table_ref = tables.find_joined_table_by_internal_id(*table).unwrap(); let (_, table_ref) = tables
.find_table_by_internal_id(*table)
.expect("table not found");
let columns = table_ref.columns(); let columns = table_ref.columns();
let column = &columns[*column]; let column = &columns[*column];
column.primary_key || column.notnull column.primary_key || column.notnull
@@ -843,6 +855,7 @@ impl Optimizable for ast::Expr {
/// Returns true if the expression is a constant i.e. does not depend on variables or columns etc. /// Returns true if the expression is a constant i.e. does not depend on variables or columns etc.
fn is_constant(&self, resolver: &Resolver<'_>) -> bool { fn is_constant(&self, resolver: &Resolver<'_>) -> bool {
match self { match self {
Expr::SubqueryResult { .. } => false,
Expr::Between { Expr::Between {
lhs, start, end, .. lhs, start, end, ..
} => { } => {

View File

@@ -1,5 +1,7 @@
use std::{cmp::Ordering, sync::Arc}; use std::{cmp::Ordering, sync::Arc};
use turso_parser::ast::{self, FrameBound, FrameClause, FrameExclude, FrameMode, SortOrder}; use turso_parser::ast::{
self, FrameBound, FrameClause, FrameExclude, FrameMode, SortOrder, SubqueryType,
};
use crate::{ use crate::{
function::AggFunc, function::AggFunc,
@@ -36,12 +38,12 @@ impl ResultSetColumn {
} }
match &self.expr { match &self.expr {
ast::Expr::Column { table, column, .. } => { ast::Expr::Column { table, column, .. } => {
let table_ref = tables.find_table_by_internal_id(*table).unwrap(); let (_, table_ref) = tables.find_table_by_internal_id(*table).unwrap();
table_ref.get_column_at(*column).unwrap().name.as_deref() table_ref.get_column_at(*column).unwrap().name.as_deref()
} }
ast::Expr::RowId { table, .. } => { ast::Expr::RowId { table, .. } => {
// If there is a rowid alias column, use its name // If there is a rowid alias column, use its name
let table_ref = tables.find_table_by_internal_id(*table).unwrap(); let (_, table_ref) = tables.find_table_by_internal_id(*table).unwrap();
if let Table::BTree(table) = &table_ref { if let Table::BTree(table) = &table_ref {
if let Some(rowid_alias_column) = table.get_rowid_alias_column() { if let Some(rowid_alias_column) = table.get_rowid_alias_column() {
if let Some(name) = &rowid_alias_column.1.name { if let Some(name) = &rowid_alias_column.1.name {
@@ -106,28 +108,41 @@ pub struct WhereTerm {
} }
impl WhereTerm { impl WhereTerm {
pub fn should_eval_before_loop(&self, join_order: &[JoinOrderMember]) -> bool { pub fn should_eval_before_loop(
&self,
join_order: &[JoinOrderMember],
subqueries: &[NonFromClauseSubquery],
) -> bool {
if self.consumed { if self.consumed {
return false; return false;
} }
let Ok(eval_at) = self.eval_at(join_order) else { let Ok(eval_at) = self.eval_at(join_order, subqueries) else {
return false; return false;
}; };
eval_at == EvalAt::BeforeLoop eval_at == EvalAt::BeforeLoop
} }
pub fn should_eval_at_loop(&self, loop_idx: usize, join_order: &[JoinOrderMember]) -> bool { pub fn should_eval_at_loop(
&self,
loop_idx: usize,
join_order: &[JoinOrderMember],
subqueries: &[NonFromClauseSubquery],
) -> bool {
if self.consumed { if self.consumed {
return false; return false;
} }
let Ok(eval_at) = self.eval_at(join_order) else { let Ok(eval_at) = self.eval_at(join_order, subqueries) else {
return false; return false;
}; };
eval_at == EvalAt::Loop(loop_idx) eval_at == EvalAt::Loop(loop_idx)
} }
fn eval_at(&self, join_order: &[JoinOrderMember]) -> Result<EvalAt> { fn eval_at(
determine_where_to_eval_term(self, join_order) &self,
join_order: &[JoinOrderMember],
subqueries: &[NonFromClauseSubquery],
) -> Result<EvalAt> {
determine_where_to_eval_term(self, join_order, subqueries)
} }
} }
@@ -225,6 +240,20 @@ pub enum QueryDestination {
/// The table that will be used to store the results. /// The table that will be used to store the results.
table: Arc<BTreeTable>, table: Arc<BTreeTable>,
}, },
/// The result of an EXISTS subquery are stored in a single register.
ExistsSubqueryResult {
/// The register that holds the result of the EXISTS subquery.
result_reg: usize,
},
/// The results of a subquery that is neither 'EXISTS' nor 'IN' are stored in a range of registers.
RowValueSubqueryResult {
/// The start register of the range that holds the result of the subquery.
result_reg_start: usize,
/// The number of registers that hold the result of the subquery.
num_regs: usize,
},
/// Decision made at some point after query plan construction.
Unset,
} }
impl QueryDestination { impl QueryDestination {
@@ -345,6 +374,8 @@ pub struct SelectPlan {
/// The window definition and all window functions associated with it. There is at most one /// The window definition and all window functions associated with it. There is at most one
/// window per SELECT. If the original query contains more, they are pushed down into subqueries. /// window per SELECT. If the original query contains more, they are pushed down into subqueries.
pub window: Option<Window>, pub window: Option<Window>,
/// Subqueries that appear in any part of the query apart from the FROM clause
pub non_from_clause_subqueries: Vec<NonFromClauseSubquery>,
} }
impl SelectPlan { impl SelectPlan {
@@ -356,6 +387,15 @@ impl SelectPlan {
self.aggregates.iter().map(|agg| agg.args.len()).sum() self.aggregates.iter().map(|agg| agg.args.len()).sum()
} }
/// Whether this query or any of its subqueries reference columns from the outer query.
pub fn is_correlated(&self) -> bool {
self.table_references
.outer_query_refs()
.iter()
.any(|t| t.is_used())
|| self.non_from_clause_subqueries.iter().any(|s| s.correlated)
}
/// Reference: https://github.com/sqlite/sqlite/blob/5db695197b74580c777b37ab1b787531f15f7f9f/src/select.c#L8613 /// Reference: https://github.com/sqlite/sqlite/blob/5db695197b74580c777b37ab1b787531f15f7f9f/src/select.c#L8613
/// ///
/// Checks to see if the query is of the format `SELECT count(*) FROM <tbl>` /// Checks to see if the query is of the format `SELECT count(*) FROM <tbl>`
@@ -659,17 +699,22 @@ impl TableReferences {
.find(|t| t.internal_id == internal_id) .find(|t| t.internal_id == internal_id)
} }
/// Returns an immutable reference to the [Table] with the given internal ID. /// Returns an immutable reference to the [Table] with the given internal ID,
pub fn find_table_by_internal_id(&self, internal_id: TableInternalId) -> Option<&Table> { /// plus a boolean indicating whether the table is a joined table from the current query scope (false),
/// or an outer query reference (true).
pub fn find_table_by_internal_id(
&self,
internal_id: TableInternalId,
) -> Option<(bool, &Table)> {
self.joined_tables self.joined_tables
.iter() .iter()
.find(|t| t.internal_id == internal_id) .find(|t| t.internal_id == internal_id)
.map(|t| &t.table) .map(|t| (false, &t.table))
.or_else(|| { .or_else(|| {
self.outer_query_refs self.outer_query_refs
.iter() .iter()
.find(|t| t.internal_id == internal_id) .find(|t| t.internal_id == internal_id)
.map(|t| &t.table) .map(|t| (true, &t.table))
}) })
} }
@@ -790,6 +835,12 @@ impl ColumnUsedMask {
} }
} }
impl std::ops::BitOrAssign<&Self> for ColumnUsedMask {
fn bitor_assign(&mut self, rhs: &Self) {
self.0 |= &rhs.0;
}
}
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
#[allow(clippy::large_enum_variant)] #[allow(clippy::large_enum_variant)]
pub enum Operation { pub enum Operation {
@@ -1299,6 +1350,84 @@ pub struct WindowFunction {
pub original_expr: Expr, pub original_expr: Expr,
} }
#[derive(Debug, Clone)]
pub enum SubqueryState {
/// The subquery has not been evaluated yet.
/// The 'plan' field is only optional because it is .take()'d when the the subquery
/// is translated into bytecode.
Unevaluated { plan: Option<Box<SelectPlan>> },
/// The subquery has been evaluated.
/// The [evaluated_at] field contains the loop index where the subquery was evaluated.
/// The query plan struct no longer exists because translating the plan currently
/// requires an ownership transfer.
Evaluated { evaluated_at: EvalAt },
}
#[derive(Debug, Clone)]
/// A subquery that is not part of the `FROM` clause.
/// This is used for subqueries in the WHERE clause, HAVING clause, ORDER BY clause, LIMIT clause, OFFSET clause, etc.
/// Currently only subqueries in the WHERE clause are supported.
pub struct NonFromClauseSubquery {
pub internal_id: TableInternalId,
pub query_type: SubqueryType,
pub state: SubqueryState,
pub correlated: bool,
}
impl NonFromClauseSubquery {
/// Returns true if the subquery has been evaluated (translated into bytecode).
pub fn has_been_evaluated(&self) -> bool {
matches!(self.state, SubqueryState::Evaluated { .. })
}
/// Returns the loop index where the subquery should be evaluated in this particular join order.
/// If the subquery references tables from the parent query, it will be evaluated at the right-most
/// nested loop whose table it references.
pub fn get_eval_at(&self, join_order: &[JoinOrderMember]) -> Result<EvalAt> {
let mut eval_at = EvalAt::BeforeLoop;
let SubqueryState::Unevaluated { plan } = &self.state else {
crate::bail_parse_error!("subquery has already been evaluated");
};
let used_outer_refs = plan
.as_ref()
.unwrap()
.table_references
.outer_query_refs()
.iter()
.filter(|t| t.is_used());
for outer_ref in used_outer_refs {
let Some(loop_idx) = join_order
.iter()
.position(|t| t.table_id == outer_ref.internal_id)
else {
continue;
};
eval_at = eval_at.max(EvalAt::Loop(loop_idx));
}
for subquery in plan.as_ref().unwrap().non_from_clause_subqueries.iter() {
let eval_at_inner = subquery.get_eval_at(join_order)?;
eval_at = eval_at.max(eval_at_inner);
}
Ok(eval_at)
}
/// Consumes the plan and returns it, and sets the subquery to the evaluated state.
/// This is used when the subquery is translated into bytecode.
pub fn consume_plan(&mut self, evaluated_at: EvalAt) -> Box<SelectPlan> {
match &mut self.state {
SubqueryState::Unevaluated { plan } => {
let plan = plan.take().unwrap();
self.state = SubqueryState::Evaluated { evaluated_at };
plan
}
SubqueryState::Evaluated { .. } => {
panic!("subquery has already been evaluated");
}
}
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;

View File

@@ -13,6 +13,7 @@ use super::{
use crate::translate::{ use crate::translate::{
emitter::Resolver, emitter::Resolver,
expr::{BindingBehavior, WalkControl}, expr::{BindingBehavior, WalkControl},
plan::{NonFromClauseSubquery, SubqueryState},
}; };
use crate::{ use crate::{
ast::Limit, ast::Limit,
@@ -776,6 +777,7 @@ pub fn parse_where(
pub fn determine_where_to_eval_term( pub fn determine_where_to_eval_term(
term: &WhereTerm, term: &WhereTerm,
join_order: &[JoinOrderMember], join_order: &[JoinOrderMember],
subqueries: &[NonFromClauseSubquery],
) -> Result<EvalAt> { ) -> Result<EvalAt> {
if let Some(table_id) = term.from_outer_join { if let Some(table_id) = term.from_outer_join {
return Ok(EvalAt::Loop( return Ok(EvalAt::Loop(
@@ -786,7 +788,7 @@ pub fn determine_where_to_eval_term(
)); ));
} }
determine_where_to_eval_expr(&term.expr, join_order) determine_where_to_eval_expr(&term.expr, join_order, subqueries)
} }
/// A bitmask representing a set of tables in a query plan. /// A bitmask representing a set of tables in a query plan.
@@ -889,6 +891,7 @@ impl TableMask {
pub fn table_mask_from_expr( pub fn table_mask_from_expr(
top_level_expr: &Expr, top_level_expr: &Expr,
table_references: &TableReferences, table_references: &TableReferences,
subqueries: &[NonFromClauseSubquery],
) -> Result<TableMask> { ) -> Result<TableMask> {
let mut mask = TableMask::new(); let mut mask = TableMask::new();
walk_expr(top_level_expr, &mut |expr: &Expr| -> Result<WalkControl> { walk_expr(top_level_expr, &mut |expr: &Expr| -> Result<WalkControl> {
@@ -910,6 +913,34 @@ pub fn table_mask_from_expr(
crate::bail_parse_error!("table not found in joined_tables"); crate::bail_parse_error!("table not found in joined_tables");
} }
} }
// Given something like WHERE t.a = (SELECT ...), we can only evaluate that expression
// when all both table 't' and all outer scope tables referenced by the subquery OR its nested subqueries are in scope.
// Hence, the tables referenced in subqueries must be added to the table mask.
Expr::SubqueryResult { subquery_id, .. } => {
let Some(subquery) = subqueries.iter().find(|s| s.internal_id == *subquery_id)
else {
crate::bail_parse_error!("subquery not found");
};
let SubqueryState::Unevaluated { plan } = &subquery.state else {
crate::bail_parse_error!("subquery has already been evaluated");
};
let used_outer_query_refs = plan
.as_ref()
.unwrap()
.table_references
.outer_query_refs()
.iter()
.filter(|t| t.is_used());
for outer_query_ref in used_outer_query_refs {
if let Some(table_idx) = table_references
.joined_tables()
.iter()
.position(|t| t.internal_id == outer_query_ref.internal_id)
{
mask.add_table(table_idx);
}
}
}
_ => {} _ => {}
} }
Ok(WalkControl::Continue) Ok(WalkControl::Continue)
@@ -921,17 +952,51 @@ pub fn table_mask_from_expr(
pub fn determine_where_to_eval_expr( pub fn determine_where_to_eval_expr(
top_level_expr: &Expr, top_level_expr: &Expr,
join_order: &[JoinOrderMember], join_order: &[JoinOrderMember],
subqueries: &[NonFromClauseSubquery],
) -> Result<EvalAt> { ) -> Result<EvalAt> {
// If the expression references no tables, it can be evaluated before any table loops are opened.
let mut eval_at: EvalAt = EvalAt::BeforeLoop; let mut eval_at: EvalAt = EvalAt::BeforeLoop;
walk_expr(top_level_expr, &mut |expr: &Expr| -> Result<WalkControl> { walk_expr(top_level_expr, &mut |expr: &Expr| -> Result<WalkControl> {
match expr { match expr {
Expr::Column { table, .. } | Expr::RowId { table, .. } => { Expr::Column { table, .. } | Expr::RowId { table, .. } => {
let join_idx = join_order let Some(join_idx) = join_order.iter().position(|t| t.table_id == *table) else {
.iter() // Must be an outer query reference; in that case, the table is already in scope.
.position(|t| t.table_id == *table) return Ok(WalkControl::Continue);
.unwrap_or(usize::MAX); };
eval_at = eval_at.max(EvalAt::Loop(join_idx)); eval_at = eval_at.max(EvalAt::Loop(join_idx));
} }
// Given something like WHERE t.a = (SELECT ...), we can only evaluate that expression
// when all both table 't' and all outer scope tables referenced by the subquery OR its nested subqueries are in scope.
Expr::SubqueryResult { subquery_id, .. } => {
let Some(subquery) = subqueries.iter().find(|s| s.internal_id == *subquery_id)
else {
crate::bail_parse_error!("subquery not found");
};
match &subquery.state {
SubqueryState::Evaluated { evaluated_at } => {
eval_at = eval_at.max(*evaluated_at);
}
SubqueryState::Unevaluated { plan } => {
let used_outer_refs = plan
.as_ref()
.unwrap()
.table_references
.outer_query_refs()
.iter()
.filter(|t| t.is_used());
for outer_ref in used_outer_refs {
let Some(join_idx) = join_order
.iter()
.position(|t| t.table_id == outer_ref.internal_id)
else {
continue;
};
eval_at = eval_at.max(EvalAt::Loop(join_idx));
}
return Ok(WalkControl::Continue);
}
}
}
_ => {} _ => {}
} }
Ok(WalkControl::Continue) Ok(WalkControl::Continue)

View File

@@ -143,6 +143,24 @@ pub fn emit_result_row_and_limit(
end_offset: BranchOffset::Offset(0), end_offset: BranchOffset::Offset(0),
}); });
} }
QueryDestination::ExistsSubqueryResult { result_reg } => {
program.emit_insn(Insn::Integer {
value: 1,
dest: *result_reg,
});
}
QueryDestination::RowValueSubqueryResult {
result_reg_start,
num_regs,
} => {
assert!(plan.result_columns.len() == *num_regs, "Row value subqueries should have the same number of result columns as the number of registers");
program.emit_insn(Insn::Copy {
src_reg: result_columns_start_reg,
dst_reg: *result_reg_start,
extra_amount: num_regs - 1,
});
}
QueryDestination::Unset => unreachable!("Unset query destination should not be reached"),
} }
if plan.limit.is_some() { if plan.limit.is_some() {

View File

@@ -13,6 +13,7 @@ use crate::translate::planner::{
break_predicate_at_and_boundaries, parse_from, parse_limit, parse_where, break_predicate_at_and_boundaries, parse_from, parse_limit, parse_where,
resolve_window_and_aggregate_functions, resolve_window_and_aggregate_functions,
}; };
use crate::translate::subquery::plan_subqueries_from_where_clause;
use crate::translate::window::plan_windows; use crate::translate::window::plan_windows;
use crate::util::normalize_ident; use crate::util::normalize_ident;
use crate::vdbe::builder::ProgramBuilderOpts; use crate::vdbe::builder::ProgramBuilderOpts;
@@ -273,6 +274,7 @@ fn prepare_one_select_plan(
distinctness: Distinctness::from_ast(distinctness.as_ref()), distinctness: Distinctness::from_ast(distinctness.as_ref()),
values: vec![], values: vec![],
window: None, window: None,
non_from_clause_subqueries: vec![],
}; };
let mut windows = Vec::with_capacity(window_clause.len()); let mut windows = Vec::with_capacity(window_clause.len());
@@ -502,6 +504,25 @@ fn prepare_one_select_plan(
)?; )?;
} }
// TODO: support subqueries in the following positions:
// - result column of a select, e.g.: SELECT x = (SELECT ...) FROM t
// - GROUP BY clause, e.g.: SELECT * FROM t GROUP BY (SELECT ...) <-- however nonsensical that might be...
// - HAVING clause, e.g.: SELECT * FROM t GROUP BY x HAVING x = (SELECT ...)
// - ORDER BY clause, e.g.: SELECT * FROM t ORDER BY x = (SELECT ...)
// - LIMIT clause, e.g.: SELECT * FROM t LIMIT (SELECT ...)
// - OFFSET clause, e.g.: SELECT * FROM t OFFSET (SELECT ...)
//
// in these positions, unlike in the WHERE clause, the subquery cannot reference columns from the outer query,
// so we don't need to collect outer query references.
plan_subqueries_from_where_clause(
program,
&mut plan.non_from_clause_subqueries,
&mut plan.table_references,
resolver,
&mut plan.where_clause,
connection,
)?;
// Return the unoptimized query plan // Return the unoptimized query plan
Ok(plan) Ok(plan)
} }
@@ -555,6 +576,7 @@ fn prepare_one_select_plan(
.map(|values| values.iter().map(|value| *value.clone()).collect()) .map(|values| values.iter().map(|value| *value.clone()).collect())
.collect(), .collect(),
window: None, window: None,
non_from_clause_subqueries: vec![],
}; };
Ok(plan) Ok(plan)

View File

@@ -1,8 +1,26 @@
use std::sync::Arc;
use turso_parser::ast::{self, SortOrder, SubqueryType};
use crate::{ use crate::{
emit_explain, emit_explain,
schema::Table, schema::{Index, IndexColumn, Table},
vdbe::{builder::ProgramBuilder, insn::Insn}, translate::{
QueryMode, Result, collate::get_collseq_from_expr,
emitter::emit_program_for_select,
expr::{unwrap_parens, walk_expr_mut, WalkControl},
optimizer::optimize_select_plan,
plan::{
ColumnUsedMask, NonFromClauseSubquery, OuterQueryReference, Plan, SubqueryState,
WhereTerm,
},
select::prepare_select_plan,
},
vdbe::{
builder::{CursorType, ProgramBuilder},
insn::Insn,
},
Connection, QueryMode, Result,
}; };
use super::{ use super::{
@@ -11,9 +29,326 @@ use super::{
plan::{Operation, QueryDestination, Search, SelectPlan, TableReferences}, plan::{Operation, QueryDestination, Search, SelectPlan, TableReferences},
}; };
/// Compute query plans for subqueries in the WHERE clause.
/// The AST expression containing the subquery ([ast::Expr::Exists], [ast::Expr::Subquery], [ast::Expr::InSelect]) is replaced with a [ast::Expr::SubqueryResult] expression.
/// The [ast::Expr::SubqueryResult] expression contains the subquery ID, the left-hand side expression (only applicable to IN subqueries), the NOT IN flag (only applicable to IN subqueries), and the subquery type.
/// The computed plans are stored in the [NonFromClauseSubquery] structs on the [SelectPlan], and evaluated at the appropriate time during the translation of the main query.
/// The appropriate time is determined by whether the subquery is correlated or uncorrelated;
/// if it is uncorrelated, it can be evaluated as early as possible, but if it is correlated, it must be evaluated after all of its dependencies from the
/// outer query are 'in scope', i.e. their cursors are open and rewound.
pub fn plan_subqueries_from_where_clause(
program: &mut ProgramBuilder,
out_subqueries: &mut Vec<NonFromClauseSubquery>,
referenced_tables: &mut TableReferences,
resolver: &Resolver,
where_terms: &mut [WhereTerm],
connection: &Arc<Connection>,
) -> Result<()> {
// A WHERE clause subquery can reference columns from the outer query,
// including nested cases where a subquery inside a subquery references columns from its parent's parent
// and so on.
let get_outer_query_refs = |referenced_tables: &TableReferences| {
referenced_tables
.joined_tables()
.iter()
.map(|t| OuterQueryReference {
table: t.table.clone(),
identifier: t.identifier.clone(),
internal_id: t.internal_id,
col_used_mask: ColumnUsedMask::default(),
})
.chain(
referenced_tables
.outer_query_refs()
.iter()
.map(|t| OuterQueryReference {
table: t.table.clone(),
identifier: t.identifier.clone(),
internal_id: t.internal_id,
col_used_mask: ColumnUsedMask::default(),
}),
)
.collect::<Vec<_>>()
};
// Walk the WHERE clause and replace subqueries with [ast::Expr::SubqueryResult] expressions.
for where_term in where_terms.iter_mut() {
walk_expr_mut(
&mut where_term.expr,
&mut |expr: &mut ast::Expr| -> Result<WalkControl> {
match expr {
ast::Expr::Exists(_) => {
let subquery_id = program.table_reference_counter.next();
let outer_query_refs = get_outer_query_refs(referenced_tables);
let result_reg = program.alloc_register();
let subquery_type = SubqueryType::Exists { result_reg };
let result_expr = ast::Expr::SubqueryResult {
subquery_id,
lhs: None,
not_in: false,
query_type: subquery_type.clone(),
};
let ast::Expr::Exists(subselect) = std::mem::replace(expr, result_expr)
else {
unreachable!();
};
let plan = prepare_select_plan(
subselect,
resolver,
program,
&outer_query_refs,
QueryDestination::ExistsSubqueryResult { result_reg },
connection,
)?;
let Plan::Select(mut plan) = plan else {
crate::bail_parse_error!(
"compound SELECT queries not supported yet in WHERE clause subqueries"
);
};
optimize_select_plan(&mut plan, resolver.schema)?;
// EXISTS subqueries are satisfied after at most 1 row has been returned.
plan.limit = Some(Box::new(ast::Expr::Literal(ast::Literal::Numeric(
"1".to_string(),
))));
let correlated = plan.is_correlated();
out_subqueries.push(NonFromClauseSubquery {
internal_id: subquery_id,
query_type: subquery_type,
state: SubqueryState::Unevaluated {
plan: Some(Box::new(plan)),
},
correlated,
});
Ok(WalkControl::Continue)
}
ast::Expr::Subquery(_) => {
let subquery_id = program.table_reference_counter.next();
let outer_query_refs = get_outer_query_refs(referenced_tables);
let result_expr = ast::Expr::SubqueryResult {
subquery_id,
lhs: None,
not_in: false,
// Placeholder values because the number of columns returned is not known until the plan is prepared.
// These are replaced below after planning.
query_type: SubqueryType::RowValue {
result_reg_start: 0,
num_regs: 0,
},
};
let ast::Expr::Subquery(subselect) = std::mem::replace(expr, result_expr)
else {
unreachable!();
};
let plan = prepare_select_plan(
subselect,
resolver,
program,
&outer_query_refs,
QueryDestination::Unset,
connection,
)?;
let Plan::Select(mut plan) = plan else {
crate::bail_parse_error!(
"compound SELECT queries not supported yet in WHERE clause subqueries"
);
};
optimize_select_plan(&mut plan, resolver.schema)?;
let reg_count = plan.result_columns.len();
let reg_start = program.alloc_registers(reg_count);
plan.query_destination = QueryDestination::RowValueSubqueryResult {
result_reg_start: reg_start,
num_regs: reg_count,
};
// RowValue subqueries are satisfied after at most 1 row has been returned,
// as they are used in comparisons with a scalar or a tuple of scalars like (x,y) = (SELECT ...) or x = (SELECT ...).
plan.limit = Some(Box::new(ast::Expr::Literal(ast::Literal::Numeric(
"1".to_string(),
))));
let ast::Expr::SubqueryResult {
subquery_id,
lhs: None,
not_in: false,
query_type:
SubqueryType::RowValue {
result_reg_start,
num_regs,
},
} = &mut *expr
else {
unreachable!();
};
*result_reg_start = reg_start;
*num_regs = reg_count;
let correlated = plan.is_correlated();
out_subqueries.push(NonFromClauseSubquery {
internal_id: *subquery_id,
query_type: SubqueryType::RowValue {
result_reg_start: reg_start,
num_regs: reg_count,
},
state: SubqueryState::Unevaluated {
plan: Some(Box::new(plan)),
},
correlated,
});
Ok(WalkControl::Continue)
}
ast::Expr::InSelect { .. } => {
let subquery_id = program.table_reference_counter.next();
let outer_query_refs = get_outer_query_refs(referenced_tables);
let ast::Expr::InSelect { lhs, not, rhs } =
std::mem::replace(expr, ast::Expr::Literal(ast::Literal::Null))
else {
unreachable!();
};
let plan = prepare_select_plan(
rhs,
resolver,
program,
&outer_query_refs,
QueryDestination::Unset,
connection,
)?;
let Plan::Select(mut plan) = plan else {
crate::bail_parse_error!(
"compound SELECT queries not supported yet in WHERE clause subqueries"
);
};
optimize_select_plan(&mut plan, resolver.schema)?;
// e.g. (x,y) IN (SELECT ...)
// or x IN (SELECT ...)
let lhs_column_count = match unwrap_parens(lhs.as_ref())? {
ast::Expr::Parenthesized(exprs) => exprs.len(),
_ => 1,
};
if lhs_column_count != plan.result_columns.len() {
crate::bail_parse_error!(
"lhs of IN subquery must have the same number of columns as the subquery"
);
}
let mut columns = plan
.result_columns
.iter()
.enumerate()
.map(|(i, c)| IndexColumn {
name: c.name(&plan.table_references).unwrap_or("").to_string(),
order: SortOrder::Asc,
pos_in_table: i,
collation: None,
default: None,
})
.collect::<Vec<_>>();
for (i, column) in columns.iter_mut().enumerate() {
column.collation = get_collseq_from_expr(
&plan.result_columns[i].expr,
&plan.table_references,
)?;
}
let ephemeral_index = Arc::new(Index {
columns,
name: format!("ephemeral_index_where_sub_{subquery_id}"),
table_name: String::new(),
ephemeral: true,
has_rowid: false,
root_page: 0,
unique: false,
where_clause: None,
});
let cursor_id = program
.alloc_cursor_id(CursorType::BTreeIndex(ephemeral_index.clone()));
plan.query_destination = QueryDestination::EphemeralIndex {
cursor_id,
index: ephemeral_index.clone(),
is_delete: false,
};
*expr = ast::Expr::SubqueryResult {
subquery_id,
lhs: Some(lhs),
not_in: not,
query_type: SubqueryType::In { cursor_id },
};
let correlated = plan.is_correlated();
out_subqueries.push(NonFromClauseSubquery {
internal_id: subquery_id,
query_type: SubqueryType::In { cursor_id },
state: SubqueryState::Unevaluated {
plan: Some(Box::new(plan)),
},
correlated,
});
Ok(WalkControl::Continue)
}
_ => Ok(WalkControl::Continue),
}
},
)?;
}
update_column_used_masks(referenced_tables, out_subqueries);
Ok(())
}
/// We make decisions about when to evaluate expressions or whether to use covering indexes based on
/// which columns of a table have been referenced.
/// Since subquery nesting is arbitrarily deep, a reference to a column must propagate recursively
/// up to the parent. Example:
///
/// SELECT * FROM t WHERE EXISTS (SELECT * FROM u WHERE EXISTS (SELECT * FROM v WHERE v.foo = t.foo))
///
/// In this case, t.foo is referenced in the innermost subquery, so the top level query must be notified
/// that t.foo has been used.
fn update_column_used_masks(
table_refs: &mut TableReferences,
subqueries: &mut [NonFromClauseSubquery],
) {
for subquery in subqueries.iter_mut() {
let SubqueryState::Unevaluated { plan } = &mut subquery.state else {
panic!("subquery has already been evaluated");
};
let Some(child_plan) = plan.as_mut() else {
panic!("subquery has no plan");
};
for child_outer_query_ref in child_plan
.table_references
.outer_query_refs()
.iter()
.filter(|t| t.is_used())
{
if let Some(joined_table) =
table_refs.find_joined_table_by_internal_id_mut(child_outer_query_ref.internal_id)
{
joined_table.col_used_mask |= &child_outer_query_ref.col_used_mask;
}
if let Some(outer_query_ref) = table_refs
.find_outer_query_ref_by_internal_id_mut(child_outer_query_ref.internal_id)
{
outer_query_ref.col_used_mask |= &child_outer_query_ref.col_used_mask;
}
}
}
}
/// Emit the subqueries contained in the FROM clause. /// Emit the subqueries contained in the FROM clause.
/// This is done first so the results can be read in the main query loop. /// This is done first so the results can be read in the main query loop.
pub fn emit_subqueries( pub fn emit_from_clause_subqueries(
program: &mut ProgramBuilder, program: &mut ProgramBuilder,
t_ctx: &mut TranslateCtx, t_ctx: &mut TranslateCtx,
tables: &mut TableReferences, tables: &mut TableReferences,
@@ -60,7 +395,7 @@ pub fn emit_subqueries(
if let Table::FromClauseSubquery(from_clause_subquery) = &mut table_reference.table { if let Table::FromClauseSubquery(from_clause_subquery) = &mut table_reference.table {
// Emit the subquery and get the start register of the result columns. // Emit the subquery and get the start register of the result columns.
let result_columns_start = let result_columns_start =
emit_subquery(program, &mut from_clause_subquery.plan, t_ctx)?; emit_from_clause_subquery(program, &mut from_clause_subquery.plan, t_ctx)?;
// Set the start register of the subquery's result columns. // Set the start register of the subquery's result columns.
// This is done so that translate_expr() can read the result columns of the subquery, // This is done so that translate_expr() can read the result columns of the subquery,
// as if it were reading from a regular table. // as if it were reading from a regular table.
@@ -72,9 +407,9 @@ pub fn emit_subqueries(
Ok(()) Ok(())
} }
/// Emit a subquery and return the start register of the result columns. /// Emit a FROM clause subquery and return the start register of the result columns.
/// This is done by emitting a coroutine that stores the result columns in sequential registers. /// This is done by emitting a coroutine that stores the result columns in sequential registers.
/// Each subquery in a FROM clause has its own separate SelectPlan which is wrapped in a coroutine. /// Each FROM clause subquery has its own separate SelectPlan which is wrapped in a coroutine.
/// ///
/// The resulting bytecode from a subquery is mostly exactly the same as a regular query, except: /// The resulting bytecode from a subquery is mostly exactly the same as a regular query, except:
/// - it ends in an EndCoroutine instead of a Halt. /// - it ends in an EndCoroutine instead of a Halt.
@@ -85,7 +420,7 @@ pub fn emit_subqueries(
/// ///
/// Since a subquery has its own SelectPlan, it can contain nested subqueries, /// Since a subquery has its own SelectPlan, it can contain nested subqueries,
/// which can contain even more nested subqueries, etc. /// which can contain even more nested subqueries, etc.
pub fn emit_subquery( pub fn emit_from_clause_subquery(
program: &mut ProgramBuilder, program: &mut ProgramBuilder,
plan: &mut SelectPlan, plan: &mut SelectPlan,
t_ctx: &mut TranslateCtx, t_ctx: &mut TranslateCtx,
@@ -102,7 +437,7 @@ pub fn emit_subquery(
// The parent query will use this register to reinitialize the coroutine when it needs to run multiple times. // The parent query will use this register to reinitialize the coroutine when it needs to run multiple times.
*coroutine_implementation_start = coroutine_implementation_start_offset; *coroutine_implementation_start = coroutine_implementation_start_offset;
} }
_ => unreachable!("emit_subquery called on non-subquery"), _ => unreachable!("emit_from_clause_subquery called on non-subquery"),
} }
let end_coroutine_label = program.allocate_label(); let end_coroutine_label = program.allocate_label();
let mut metadata = TranslateCtx { let mut metadata = TranslateCtx {
@@ -137,3 +472,88 @@ pub fn emit_subquery(
program.preassign_label_to_next_insn(subquery_body_end_label); program.preassign_label_to_next_insn(subquery_body_end_label);
Ok(result_column_start_reg) Ok(result_column_start_reg)
} }
/// Translate a subquery that is not part of the FROM clause.
/// If a subquery is uncorrelated (i.e. does not reference columns from the outer query),
/// it will be executed only once.
///
/// If it is correlated (i.e. references columns from the outer query),
/// it will be executed for each row of the outer query.
///
/// The result of the subquery is stored in:
///
/// - a single register for EXISTS subqueries,
/// - a range of registers for RowValue subqueries,
/// - an ephemeral index for IN subqueries.
pub fn emit_non_from_clause_subquery(
program: &mut ProgramBuilder,
t_ctx: &mut TranslateCtx,
plan: SelectPlan,
query_type: &SubqueryType,
is_correlated: bool,
) -> Result<()> {
program.incr_nesting();
let label_skip_after_first_run = if !is_correlated {
let label = program.allocate_label();
program.emit_insn(Insn::Once {
target_pc_when_reentered: label,
});
Some(label)
} else {
None
};
match query_type {
SubqueryType::Exists { result_reg, .. } => {
let subroutine_reg = program.alloc_register();
program.emit_insn(Insn::BeginSubrtn {
dest: subroutine_reg,
dest_end: None,
});
program.emit_insn(Insn::Integer {
value: 0,
dest: *result_reg,
});
emit_program_for_select(program, &t_ctx.resolver, plan)?;
program.emit_insn(Insn::Return {
return_reg: subroutine_reg,
can_fallthrough: true,
});
}
SubqueryType::In { cursor_id } => {
program.emit_insn(Insn::OpenEphemeral {
cursor_id: *cursor_id,
is_table: false,
});
emit_program_for_select(program, &t_ctx.resolver, plan)?;
}
SubqueryType::RowValue {
result_reg_start,
num_regs,
} => {
let subroutine_reg = program.alloc_register();
program.emit_insn(Insn::BeginSubrtn {
dest: subroutine_reg,
dest_end: None,
});
for result_reg in *result_reg_start..*result_reg_start + *num_regs {
program.emit_insn(Insn::Null {
dest: result_reg,
dest_end: None,
});
}
emit_program_for_select(program, &t_ctx.resolver, plan)?;
program.emit_insn(Insn::Return {
return_reg: subroutine_reg,
can_fallthrough: true,
});
}
}
if let Some(label) = label_skip_after_first_run {
program.preassign_label_to_next_insn(label);
}
program.decr_nesting();
Ok(())
}

View File

@@ -24,6 +24,17 @@ pub fn emit_values(
} }
QueryDestination::EphemeralIndex { .. } => emit_toplevel_values(program, plan, t_ctx)?, QueryDestination::EphemeralIndex { .. } => emit_toplevel_values(program, plan, t_ctx)?,
QueryDestination::EphemeralTable { .. } => unreachable!(), QueryDestination::EphemeralTable { .. } => unreachable!(),
QueryDestination::ExistsSubqueryResult { result_reg } => {
program.emit_insn(Insn::Integer {
value: 1,
dest: result_reg,
});
result_reg
}
QueryDestination::RowValueSubqueryResult { .. } => {
emit_toplevel_values(program, plan, t_ctx)?
}
QueryDestination::Unset => unreachable!("Unset query destination should not be reached"),
}; };
Ok(reg_result_cols_start) Ok(reg_result_cols_start)
} }
@@ -168,6 +179,24 @@ fn emit_values_to_destination(
emit_values_to_index(program, plan, start_reg, row_len); emit_values_to_index(program, plan, start_reg, row_len);
} }
QueryDestination::EphemeralTable { .. } => unreachable!(), QueryDestination::EphemeralTable { .. } => unreachable!(),
QueryDestination::ExistsSubqueryResult { result_reg } => {
program.emit_insn(Insn::Integer {
value: 1,
dest: *result_reg,
});
}
QueryDestination::RowValueSubqueryResult {
result_reg_start,
num_regs,
} => {
assert!(row_len == *num_regs, "Row value subqueries should have the same number of result columns as the number of registers");
program.emit_insn(Insn::Copy {
src_reg: start_reg,
dst_reg: *result_reg_start,
extra_amount: num_regs - 1,
});
}
QueryDestination::Unset => unreachable!("Unset query destination should not be reached"),
} }
} }

View File

@@ -208,6 +208,7 @@ fn prepare_window_subquery(
distinctness: Distinctness::NonDistinct, distinctness: Distinctness::NonDistinct,
values: vec![], values: vec![],
window: None, window: None,
non_from_clause_subqueries: vec![],
}; };
prepare_window_subquery( prepare_window_subquery(

View File

@@ -842,6 +842,42 @@ impl ProgramBuilder {
.unwrap_or_else(|| panic!("Cursor not found: {key:?}")) .unwrap_or_else(|| panic!("Cursor not found: {key:?}"))
} }
/// Resolve the first allocated index cursor for a given table reference.
/// This method exists due to a limitation of our translation system where
/// a subquery that references an outer query table cannot know whether a
/// table cursor, index cursor, or both were opened for that table reference.
/// Hence: currently we first try to resolve a table cursor, and if that fails,
/// we resolve an index cursor via this method.
pub fn resolve_any_index_cursor_id_for_table(&self, table_ref_id: TableInternalId) -> CursorID {
self.cursor_ref
.iter()
.position(|(k, _)| {
k.as_ref()
.is_some_and(|k| k.table_reference_id == table_ref_id && k.index.is_some())
})
.unwrap_or_else(|| panic!("No index cursor found for table {table_ref_id}"))
}
/// Resolve the [Index] that a given cursor is associated with.
pub fn resolve_index_for_cursor_id(&self, cursor_id: CursorID) -> Arc<Index> {
let cursor_ref = &self
.cursor_ref
.get(cursor_id)
.unwrap_or_else(|| panic!("Cursor not found: {cursor_id}"))
.1;
let CursorType::BTreeIndex(index) = cursor_ref else {
panic!("Cursor is not an index: {cursor_id}");
};
index.clone()
}
/// Get the [CursorType] of a given cursor.
pub fn get_cursor_type(&self, cursor_id: CursorID) -> Option<&CursorType> {
self.cursor_ref
.get(cursor_id)
.map(|(_, cursor_type)| cursor_type)
}
pub fn set_collation(&mut self, c: Option<(CollationSeq, bool)>) { pub fn set_collation(&mut self, c: Option<(CollationSeq, bool)>) {
self.collation = c self.collation = c
} }

View File

@@ -476,6 +476,40 @@ pub enum Expr {
Unary(UnaryOperator, Box<Expr>), Unary(UnaryOperator, Box<Expr>),
/// Parameters /// Parameters
Variable(String), Variable(String),
/// Subqueries from e.g. the WHERE clause are planned separately
/// and their results will be placed in registers or in an ephemeral index
/// pointed to by this type.
SubqueryResult {
/// Internal "opaque" identifier for the subquery. When the translator encounters
/// a [Expr::SubqueryResult], it needs to know which subquery in the corresponding
/// query plan it references.
subquery_id: TableInternalId,
/// Left-hand side expression for IN subqueries.
/// This property plus 'not_in' are only relevant for IN subqueries,
/// and the reason they are not included in the [SubqueryType] enum is so that
/// we don't have to clone this Box.
lhs: Option<Box<Expr>>,
/// Whether the IN subquery is a NOT IN subquery.
not_in: bool,
/// The type of subquery.
query_type: SubqueryType,
},
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum SubqueryType {
/// EXISTS subquery; result is stored in a single register.
Exists { result_reg: usize },
/// Row value subquery; result is stored in a range of registers.
/// Example: x = (SELECT ...) or (x, y) = (SELECT ...)
RowValue {
result_reg_start: usize,
num_regs: usize,
},
/// IN subquery; result is stored in an ephemeral index.
/// Example: x <NOT> IN (SELECT ...)
In { cursor_id: usize },
} }
impl Expr { impl Expr {

View File

@@ -707,6 +707,10 @@ impl ToTokens for Expr {
context: &C, context: &C,
) -> Result<(), S::Error> { ) -> Result<(), S::Error> {
match self { match self {
Self::SubqueryResult { .. } => {
// FIXME: what to put here? This is a highly "artificial" AST node that has no meaning when stringified.
Ok(())
}
Self::Between { Self::Between {
lhs, lhs,
not, not,

View File

@@ -1,4 +1,4 @@
-- LIMBO_SKIP: subquery in where not supported -- LIMBO_SKIP: subquery in HAVING clausenot supported
select select

View File

@@ -1,4 +1,4 @@
-- LIMBO_SKIP: subquery in where not supported
select select

View File

@@ -1,4 +1,4 @@
-- LIMBO_SKIP: subquery in where not supported -- LIMBO_SKIP: query 17 is slow as hell in both Turso and Sqlite
select select

View File

@@ -1,4 +1,4 @@
-- LIMBO_SKIP: subquery in where not supported
select select

View File

@@ -1,4 +1,4 @@
-- LIMBO_SKIP: subquery in where not supported
select select

View File

@@ -1,4 +1,4 @@
-- LIMBO_SKIP: subquery in where not supported -- LIMBO_SKIP: query 20 is slow as hell in both Turso and Sqlite
select select

View File

@@ -1,4 +1,4 @@
-- LIMBO_SKIP: subquery in where not supported
select select

View File

@@ -1,4 +1,4 @@
-- LIMBO_SKIP: subquery in where not supported -- LIMBO_SKIP: query 22 is slow as hell in both Turso and Sqlite
select select

View File

@@ -1,4 +1,4 @@
-- LIMBO_SKIP: subquery in where not supported
select select

View File

@@ -447,3 +447,195 @@ do_execsql_test_on_specific_db {:memory:} subquery-cte-available-in-arbitrary-de
) join cte2 ) join cte2
); );
} {112} } {112}
# WHERE CLAUSE SUBQUERIES
# EXISTS/NOT EXISTS
do_execsql_test_on_specific_db {:memory:} subquery-exists-basic {
create table test(a);
insert into test values (1);
select * from test where exists (select 0);
select * from test where not exists (select 0 where false);
} {1
1}
# Trivial example: get all users if there are any products
do_execsql_test_on_specific_db {:memory:} subquery-exists-uncorrelated {
create table products(id, name, price);
create table users(id, name);
insert into products values (1, 'hat', 50), (2, 'cap', 75), (3, 'shirt', 100);
insert into users values (1, 'Alice'), (2, 'Bob'), (3, 'Charlie');
select * from users where exists (select 1 from products);
} {1|Alice
2|Bob
3|Charlie}
# Get all products when there are no users
do_execsql_test_on_specific_db {:memory:} subquery-not-exists-uncorrelated {
create table products(id, name, price);
create table users(id, name);
insert into products values (1, 'hat', 50), (2, 'cap', 75);
select * from products where not exists (select 1 from users);
} {1|hat|50
2|cap|75}
# Get products that have a matching category
do_execsql_test_on_specific_db {:memory:} subquery-exists-correlated {
create table products(id, name, category_id);
create table categories(id, name);
insert into products values (1, 'hat', 10), (2, 'cap', 20), (3, 'shirt', 10);
insert into categories values (10, 'Clothing'), (30, 'Electronics');
select * from products p where exists (
select * from categories c where c.id = p.category_id
);
} {1|hat|10
3|shirt|10}
# Get users who have no orders
do_execsql_test_on_specific_db {:memory:} subquery-not-exists-correlated {
create table users(id, name, age);
create table orders(id, user_id, amount);
insert into users values (1, 'Alice', 25), (2, 'Bob', 30), (3, 'Charlie', 35);
insert into orders values (1, 1, 100), (2, 3, 200);
select * from users u where not exists (
select * from orders o where o.user_id = u.id
);
} {2|Bob|30}
# Get products that belong to the 'Clothing' category
do_execsql_test_on_specific_db {:memory:} subquery-exists-with-conditions {
create table categories(id, name);
create table products(id, name, price, category_id);
insert into categories values (1, 'Clothing'), (2, 'Electronics');
insert into products values (1, 'hat', 50, 1), (2, 'cap', 25, 1), (3, 'macbook', 75, 2);
select * from products p where exists (
select * from categories c
where c.id = p.category_id and c.name = 'Clothing'
);
} {1|hat|50|1
2|cap|25|1}
# Get users who have products with high-rated reviews
do_execsql_test_on_specific_db {:memory:} subquery-nested-exists {
create table users(id, name);
create table products(id, name, user_id);
create table reviews(id, product_id, rating);
insert into users values (1, 'Alice'), (2, 'Bob');
insert into products values (1, 'hat', 1), (2, 'cap', 2);
insert into reviews values (1, 1, 5);
select * from users u where exists (
select * from products p where p.user_id = u.id and exists (
select 1 from reviews r where r.product_id = p.id and r.rating >= 4
)
);
} {1|Alice}
# Get products that have tags (none exist, so empty result)
do_execsql_test_on_specific_db {:memory:} subquery-exists-empty-result {
create table products(id, name);
create table tags(product_id, tag);
insert into products values (1, 'hat'), (2, 'cap');
select * from products p where exists (
select * from tags t where t.product_id = p.id
);
} {}
# Get users whose emails are not in the blocked list
do_execsql_test_on_specific_db {:memory:} subquery-not-exists-all-match {
create table users(id, email);
create table blocked_emails(email);
insert into users values (1, 'alice@test.com'), (2, 'bob@test.com');
insert into blocked_emails values ('spam@test.com');
select * from users u where not exists (
select * from blocked_emails b where b.email = u.email
);
} {1|alice@test.com
2|bob@test.com}
# SCALAR SUBQUERIES
# Get products with price higher than average price
do_execsql_test_on_specific_db {:memory:} subquery-scalar-comparison {
create table products(id, name, price);
insert into products values (1, 'hat', 50), (2, 'cap', 25), (3, 'jacket', 75);
select * from products where price >= (
select avg(price) from products
);
} {1|hat|50
3|jacket|75}
# Get users with the highest score
do_execsql_test_on_specific_db {:memory:} subquery-scalar-max {
create table users(id, name, score);
insert into users values (1, 'Alice', 85), (2, 'Bob', 92), (3, 'Charlie', 92);
select * from users where score = (
select max(score) from users
);
} {2|Bob|92
3|Charlie|92}
# (x,y) IN SUBQUERIES
# Get products from specific categories using IN
do_execsql_test_on_specific_db {:memory:} subquery-in-single-column {
create table products(id, name, category_id);
create table categories(id, name);
insert into categories values (1, 'Clothing'), (2, 'Electronics'), (3, 'Books');
insert into products values (1, 'hat', 1), (2, 'laptop', 2), (3, 'novel', 3), (4, 'cap', 1);
select * from products where category_id in (
select id from categories where name in ('Clothing', 'Electronics')
);
} {1|hat|1
2|laptop|2
4|cap|1}
# Get products NOT in discontinued categories
do_execsql_test_on_specific_db {:memory:} subquery-not-in-single-column {
create table products(id, name, category_id);
create table discontinued_categories(category_id);
insert into products values (1, 'hat', 1), (2, 'laptop', 2), (3, 'book', 3);
insert into discontinued_categories values (2);
select * from products where category_id not in (
select category_id from discontinued_categories
);
} {1|hat|1
3|book|3}
# Get order items matching specific product-quantity combinations
do_execsql_test_on_specific_db {:memory:} subquery-in-multiple-columns {
create table order_items(order_id, product_id, quantity);
create table special_offers(product_id, min_quantity);
insert into order_items values (1, 10, 5), (2, 20, 3), (3, 10, 2), (4, 30, 1);
insert into special_offers values (10, 5), (20, 3);
select * from order_items where (product_id, quantity) in (
select product_id, min_quantity from special_offers
);
} {1|10|5
2|20|3}
# Get users whose (name, age) combination is not in the restricted list
do_execsql_test_on_specific_db {:memory:} subquery-not-in-multiple-columns {
create table users(id, name, age);
create table restricted_profiles(name, age);
insert into users values (1, 'Alice', 25), (2, 'Bob', 30), (3, 'Charlie', 25);
insert into restricted_profiles values ('Bob', 30);
select * from users where (name, age) not in (
select name, age from restricted_profiles
);
} {1|Alice|25
3|Charlie|25}

View File

@@ -4203,4 +4203,685 @@ mod fuzz_tests {
Ok(()) Ok(())
} }
#[test]
/// Tests for correlated and uncorrelated subqueries occurring in the WHERE clause of a SELECT statement.
pub fn table_subquery_fuzz() {
let _ = env_logger::try_init();
let (mut rng, seed) = rng_from_time_or_env();
log::info!("table_subquery_fuzz seed: {seed}");
// Constants for fuzzing parameters
const NUM_FUZZ_ITERATIONS: usize = 2000;
const MAX_ROWS_PER_TABLE: usize = 100;
const MIN_ROWS_PER_TABLE: usize = 5;
const MAX_SUBQUERY_DEPTH: usize = 4;
let db = TempDatabase::new_empty(true);
let limbo_conn = db.connect_limbo();
let sqlite_conn = rusqlite::Connection::open_in_memory().unwrap();
let mut debug_ddl_dml_string = String::new();
// Create 3 simple tables
let table_schemas = [
"CREATE TABLE t1 (id INT PRIMARY KEY, value1 INTEGER, value2 INTEGER);",
"CREATE TABLE t2 (id INT PRIMARY KEY, ref_id INTEGER, data INTEGER);",
"CREATE TABLE t3 (id INT PRIMARY KEY, category INTEGER, amount INTEGER);",
];
for schema in &table_schemas {
debug_ddl_dml_string.push_str(schema);
limbo_exec_rows(&db, &limbo_conn, schema);
sqlite_exec_rows(&sqlite_conn, schema);
}
// Populate tables with random data
for table_num in 1..=3 {
let num_rows = rng.random_range(MIN_ROWS_PER_TABLE..=MAX_ROWS_PER_TABLE);
for i in 1..=num_rows {
let insert_sql = match table_num {
1 => format!(
"INSERT INTO t1 VALUES ({}, {}, {});",
i,
rng.random_range(-10..20),
rng.random_range(-5..15)
),
2 => format!(
"INSERT INTO t2 VALUES ({}, {}, {});",
i,
rng.random_range(1..=num_rows), // ref_id references t1 approximately
rng.random_range(-5..10)
),
3 => format!(
"INSERT INTO t3 VALUES ({}, {}, {});",
i,
rng.random_range(1..5), // category 1-4
rng.random_range(0..100)
),
_ => unreachable!(),
};
log::debug!("{insert_sql}");
debug_ddl_dml_string.push_str(&insert_sql);
limbo_exec_rows(&db, &limbo_conn, &insert_sql);
sqlite_exec_rows(&sqlite_conn, &insert_sql);
}
}
log::info!("DDL/DML to reproduce manually:\n{debug_ddl_dml_string}");
// Helper function to generate random simple WHERE condition
let gen_simple_where = |rng: &mut ChaCha8Rng, table: &str| -> String {
let conditions = match table {
"t1" => vec![
format!("value1 > {}", rng.random_range(-5..15)),
format!("value2 < {}", rng.random_range(-5..15)),
format!("id <= {}", rng.random_range(1..20)),
"value1 IS NOT NULL".to_string(),
],
"t2" => vec![
format!("data > {}", rng.random_range(-3..8)),
format!("ref_id = {}", rng.random_range(1..15)),
format!("id < {}", rng.random_range(5..25)),
"data IS NOT NULL".to_string(),
],
"t3" => vec![
format!("category = {}", rng.random_range(1..5)),
format!("amount > {}", rng.random_range(0..50)),
format!("id <= {}", rng.random_range(1..20)),
"amount IS NOT NULL".to_string(),
],
_ => vec!["1=1".to_string()],
};
conditions[rng.random_range(0..conditions.len())].clone()
};
// Helper function to generate simple subquery
fn gen_subquery(rng: &mut ChaCha8Rng, depth: usize, outer_table: Option<&str>) -> String {
if depth > MAX_SUBQUERY_DEPTH {
return "SELECT 1".to_string();
}
let gen_simple_where_inner = |rng: &mut ChaCha8Rng, table: &str| -> String {
let conditions = match table {
"t1" => vec![
format!("value1 > {}", rng.random_range(-5..15)),
format!("value2 < {}", rng.random_range(-5..15)),
format!("id <= {}", rng.random_range(1..20)),
"value1 IS NOT NULL".to_string(),
],
"t2" => vec![
format!("data > {}", rng.random_range(-3..8)),
format!("ref_id = {}", rng.random_range(1..15)),
format!("id < {}", rng.random_range(5..25)),
"data IS NOT NULL".to_string(),
],
"t3" => vec![
format!("category = {}", rng.random_range(1..5)),
format!("amount > {}", rng.random_range(0..50)),
format!("id <= {}", rng.random_range(1..20)),
"amount IS NOT NULL".to_string(),
],
_ => vec!["1=1".to_string()],
};
conditions[rng.random_range(0..conditions.len())].clone()
};
// Helper function to generate correlated WHERE conditions
let gen_correlated_where =
|rng: &mut ChaCha8Rng, inner_table: &str, outer_table: &str| -> String {
match (outer_table, inner_table) {
("t1", "t2") => {
// t2.ref_id relates to t1.id
let conditions = [
format!("{inner_table}.ref_id = {outer_table}.id"),
format!("{inner_table}.id < {outer_table}.value1"),
format!("{inner_table}.data > {outer_table}.value2"),
];
conditions[rng.random_range(0..conditions.len())].clone()
}
("t1", "t3") => {
let conditions = [
format!("{inner_table}.id = {outer_table}.id"),
format!("{inner_table}.category < {outer_table}.value1"),
format!("{inner_table}.amount > {outer_table}.value2"),
];
conditions[rng.random_range(0..conditions.len())].clone()
}
("t2", "t1") => {
let conditions = [
format!("{inner_table}.id = {outer_table}.ref_id"),
format!("{inner_table}.value1 > {outer_table}.data"),
format!("{inner_table}.value2 < {outer_table}.id"),
];
conditions[rng.random_range(0..conditions.len())].clone()
}
("t2", "t3") => {
let conditions = [
format!("{inner_table}.id = {outer_table}.id"),
format!("{inner_table}.category = {outer_table}.ref_id"),
format!("{inner_table}.amount > {outer_table}.data"),
];
conditions[rng.random_range(0..conditions.len())].clone()
}
("t3", "t1") => {
let conditions = [
format!("{inner_table}.id = {outer_table}.id"),
format!("{inner_table}.value1 > {outer_table}.category"),
format!("{inner_table}.value2 < {outer_table}.amount"),
];
conditions[rng.random_range(0..conditions.len())].clone()
}
("t3", "t2") => {
let conditions = [
format!("{inner_table}.id = {outer_table}.id"),
format!("{inner_table}.ref_id = {outer_table}.category"),
format!("{inner_table}.data < {outer_table}.amount"),
];
conditions[rng.random_range(0..conditions.len())].clone()
}
_ => "1=1".to_string(),
}
};
let subquery_types = [
// Simple scalar subqueries - single column only for safe nesting
"SELECT MAX(amount) FROM t3".to_string(),
"SELECT MIN(value1) FROM t1".to_string(),
"SELECT COUNT(*) FROM t2".to_string(),
"SELECT AVG(amount) FROM t3".to_string(),
"SELECT id FROM t1".to_string(),
"SELECT ref_id FROM t2".to_string(),
"SELECT category FROM t3".to_string(),
// Subqueries with WHERE - single column only
format!(
"SELECT MAX(amount) FROM t3 WHERE {}",
gen_simple_where_inner(rng, "t3")
),
format!(
"SELECT value1 FROM t1 WHERE {}",
gen_simple_where_inner(rng, "t1")
),
format!(
"SELECT ref_id FROM t2 WHERE {}",
gen_simple_where_inner(rng, "t2")
),
];
let base_query = &subquery_types[rng.random_range(0..subquery_types.len())];
// Add correlated conditions if outer_table is provided and sometimes
let final_query = if let Some(outer_table) = outer_table {
if rng.random_bool(0.4) {
// 40% chance for correlation
// Extract the inner table from the base query
let inner_table = if base_query.contains("FROM t1") {
"t1"
} else if base_query.contains("FROM t2") {
"t2"
} else if base_query.contains("FROM t3") {
"t3"
} else {
return base_query.clone(); // fallback
};
let correlated_condition = gen_correlated_where(rng, inner_table, outer_table);
if base_query.contains("WHERE") {
format!("{base_query} AND {correlated_condition}")
} else {
format!("{base_query} WHERE {correlated_condition}")
}
} else {
base_query.clone()
}
} else {
base_query.clone()
};
// Sometimes add nesting - but use scalar subquery for nesting to avoid column count issues
if depth < 1 && rng.random_bool(0.2) {
// Reduced probability and depth
let nested = gen_scalar_subquery(rng, 0, outer_table);
if final_query.contains("WHERE") {
format!("{final_query} AND id IN ({nested})")
} else {
format!("{final_query} WHERE id IN ({nested})")
}
} else {
final_query
}
}
// Helper function to generate scalar subquery (single column only)
fn gen_scalar_subquery(
rng: &mut ChaCha8Rng,
depth: usize,
outer_table: Option<&str>,
) -> String {
if depth > MAX_SUBQUERY_DEPTH {
// Reduced nesting depth
return "SELECT 1".to_string();
}
let gen_simple_where_inner = |rng: &mut ChaCha8Rng, table: &str| -> String {
let conditions = match table {
"t1" => vec![
format!("value1 > {}", rng.random_range(-5..15)),
format!("value2 < {}", rng.random_range(-5..15)),
format!("id <= {}", rng.random_range(1..20)),
"value1 IS NOT NULL".to_string(),
],
"t2" => vec![
format!("data > {}", rng.random_range(-3..8)),
format!("ref_id = {}", rng.random_range(1..15)),
format!("id < {}", rng.random_range(5..25)),
"data IS NOT NULL".to_string(),
],
"t3" => vec![
format!("category = {}", rng.random_range(1..5)),
format!("amount > {}", rng.random_range(0..50)),
format!("id <= {}", rng.random_range(1..20)),
"amount IS NOT NULL".to_string(),
],
_ => vec!["1=1".to_string()],
};
conditions[rng.random_range(0..conditions.len())].clone()
};
// Helper function to generate correlated WHERE conditions
let gen_correlated_where =
|rng: &mut ChaCha8Rng, inner_table: &str, outer_table: &str| -> String {
match (outer_table, inner_table) {
("t1", "t2") => {
// t2.ref_id relates to t1.id
let conditions = [
format!("{inner_table}.ref_id = {outer_table}.id"),
format!("{inner_table}.id < {outer_table}.value1"),
format!("{inner_table}.data > {outer_table}.value2"),
];
conditions[rng.random_range(0..conditions.len())].clone()
}
("t1", "t3") => {
let conditions = [
format!("{inner_table}.id = {outer_table}.id"),
format!("{inner_table}.category < {outer_table}.value1"),
format!("{inner_table}.amount > {outer_table}.value2"),
];
conditions[rng.random_range(0..conditions.len())].clone()
}
("t2", "t1") => {
let conditions = [
format!("{inner_table}.id = {outer_table}.ref_id"),
format!("{inner_table}.value1 > {outer_table}.data"),
format!("{inner_table}.value2 < {outer_table}.id"),
];
conditions[rng.random_range(0..conditions.len())].clone()
}
("t2", "t3") => {
let conditions = [
format!("{inner_table}.id = {outer_table}.id"),
format!("{inner_table}.category = {outer_table}.ref_id"),
format!("{inner_table}.amount > {outer_table}.data"),
];
conditions[rng.random_range(0..conditions.len())].clone()
}
("t3", "t1") => {
let conditions = [
format!("{inner_table}.id = {outer_table}.id"),
format!("{inner_table}.value1 > {outer_table}.category"),
format!("{inner_table}.value2 < {outer_table}.amount"),
];
conditions[rng.random_range(0..conditions.len())].clone()
}
("t3", "t2") => {
let conditions = [
format!("{inner_table}.id = {outer_table}.id"),
format!("{inner_table}.ref_id = {outer_table}.category"),
format!("{inner_table}.data < {outer_table}.amount"),
];
conditions[rng.random_range(0..conditions.len())].clone()
}
_ => "1=1".to_string(),
}
};
let scalar_subquery_types = [
// Only scalar subqueries - single column only
"SELECT MAX(amount) FROM t3".to_string(),
"SELECT MIN(value1) FROM t1".to_string(),
"SELECT COUNT(*) FROM t2".to_string(),
"SELECT AVG(amount) FROM t3".to_string(),
"SELECT id FROM t1".to_string(),
"SELECT ref_id FROM t2".to_string(),
"SELECT category FROM t3".to_string(),
// Scalar subqueries with WHERE
format!(
"SELECT MAX(amount) FROM t3 WHERE {}",
gen_simple_where_inner(rng, "t3")
),
format!(
"SELECT value1 FROM t1 WHERE {}",
gen_simple_where_inner(rng, "t1")
),
format!(
"SELECT ref_id FROM t2 WHERE {}",
gen_simple_where_inner(rng, "t2")
),
];
let base_query =
&scalar_subquery_types[rng.random_range(0..scalar_subquery_types.len())];
// Add correlated conditions if outer_table is provided and sometimes
let final_query = if let Some(outer_table) = outer_table {
if rng.random_bool(0.4) {
// 40% chance for correlation
// Extract the inner table from the base query
let inner_table = if base_query.contains("FROM t1") {
"t1"
} else if base_query.contains("FROM t2") {
"t2"
} else if base_query.contains("FROM t3") {
"t3"
} else {
return base_query.clone(); // fallback
};
let correlated_condition = gen_correlated_where(rng, inner_table, outer_table);
if base_query.contains("WHERE") {
format!("{base_query} AND {correlated_condition}")
} else {
format!("{base_query} WHERE {correlated_condition}")
}
} else {
base_query.clone()
}
} else {
base_query.clone()
};
// Sometimes add nesting
if depth < 1 && rng.random_bool(0.2) {
// Reduced probability and depth
let nested = gen_scalar_subquery(rng, depth + 1, outer_table);
if final_query.contains("WHERE") {
format!("{final_query} AND id IN ({nested})")
} else {
format!("{final_query} WHERE id IN ({nested})")
}
} else {
final_query
}
}
for iter_num in 0..NUM_FUZZ_ITERATIONS {
let main_table = ["t1", "t2", "t3"][rng.random_range(0..3)];
let query_type = rng.random_range(0..6); // Increased from 4 to 6 for new correlated query types
let query = match query_type {
0 => {
// Comparison subquery: WHERE column <op> (SELECT ...)
let column = match main_table {
"t1" => ["value1", "value2", "id"][rng.random_range(0..3)],
"t2" => ["data", "ref_id", "id"][rng.random_range(0..3)],
"t3" => ["amount", "category", "id"][rng.random_range(0..3)],
_ => "id",
};
let op = [">", "<", ">=", "<=", "=", "<>"][rng.random_range(0..6)];
let subquery = gen_scalar_subquery(&mut rng, 0, Some(main_table));
format!("SELECT * FROM {main_table} WHERE {column} {op} ({subquery})",)
}
1 => {
// EXISTS subquery: WHERE [NOT] EXISTS (SELECT ...)
let not_exists = if rng.random_bool(0.3) { "NOT " } else { "" };
let subquery = gen_subquery(&mut rng, 0, Some(main_table));
format!("SELECT * FROM {main_table} WHERE {not_exists}EXISTS ({subquery})",)
}
2 => {
// IN subquery with single column: WHERE column [NOT] IN (SELECT ...)
let not_in = if rng.random_bool(0.3) { "NOT " } else { "" };
let column = match main_table {
"t1" => ["value1", "value2", "id"][rng.random_range(0..3)],
"t2" => ["data", "ref_id", "id"][rng.random_range(0..3)],
"t3" => ["amount", "category", "id"][rng.random_range(0..3)],
_ => "id",
};
let subquery = gen_scalar_subquery(&mut rng, 0, Some(main_table));
format!("SELECT * FROM {main_table} WHERE {column} {not_in}IN ({subquery})",)
}
3 => {
// IN subquery with tuple: WHERE (col1, col2) [NOT] IN (SELECT col1, col2 ...)
let not_in = if rng.random_bool(0.3) { "NOT " } else { "" };
let (columns, sub_columns) = match main_table {
"t1" => {
if rng.random_bool(0.5) {
("(id, value1)", "SELECT id, value1 FROM t1")
} else {
("id", "SELECT id FROM t1")
}
}
"t2" => {
if rng.random_bool(0.5) {
("(ref_id, data)", "SELECT ref_id, data FROM t2")
} else {
("ref_id", "SELECT ref_id FROM t2")
}
}
"t3" => {
if rng.random_bool(0.5) {
("(id, category)", "SELECT id, category FROM t3")
} else {
("id", "SELECT id FROM t3")
}
}
_ => ("id", "SELECT id FROM t1"),
};
let subquery = if rng.random_bool(0.5) {
sub_columns.to_string()
} else {
let base = sub_columns;
let table_for_where = base.split("FROM ").nth(1).unwrap_or("t1");
format!(
"{} WHERE {}",
base,
gen_simple_where(&mut rng, table_for_where)
)
};
format!("SELECT * FROM {main_table} WHERE {columns} {not_in}IN ({subquery})",)
}
4 => {
// Correlated EXISTS subquery: WHERE [NOT] EXISTS (SELECT ... WHERE correlation)
let not_exists = if rng.random_bool(0.3) { "NOT " } else { "" };
// Choose a different table for the subquery to ensure correlation is meaningful
let inner_tables = match main_table {
"t1" => ["t2", "t3"],
"t2" => ["t1", "t3"],
"t3" => ["t1", "t2"],
_ => ["t1", "t2"],
};
let inner_table = inner_tables[rng.random_range(0..inner_tables.len())];
// Generate correlated condition
let correlated_condition = match (main_table, inner_table) {
("t1", "t2") => {
let conditions = [
format!("{inner_table}.ref_id = {main_table}.id"),
format!("{inner_table}.id < {main_table}.value1"),
format!("{inner_table}.data > {main_table}.value2"),
];
conditions[rng.random_range(0..conditions.len())].clone()
}
("t1", "t3") => {
let conditions = [
format!("{inner_table}.id = {main_table}.id"),
format!("{inner_table}.category < {main_table}.value1"),
format!("{inner_table}.amount > {main_table}.value2"),
];
conditions[rng.random_range(0..conditions.len())].clone()
}
("t2", "t1") => {
let conditions = [
format!("{inner_table}.id = {main_table}.ref_id"),
format!("{inner_table}.value1 > {main_table}.data"),
format!("{inner_table}.value2 < {main_table}.id"),
];
conditions[rng.random_range(0..conditions.len())].clone()
}
("t2", "t3") => {
let conditions = [
format!("{inner_table}.id = {main_table}.id"),
format!("{inner_table}.category = {main_table}.ref_id"),
format!("{inner_table}.amount > {main_table}.data"),
];
conditions[rng.random_range(0..conditions.len())].clone()
}
("t3", "t1") => {
let conditions = [
format!("{inner_table}.id = {main_table}.id"),
format!("{inner_table}.value1 > {main_table}.category"),
format!("{inner_table}.value2 < {main_table}.amount"),
];
conditions[rng.random_range(0..conditions.len())].clone()
}
("t3", "t2") => {
let conditions = [
format!("{inner_table}.id = {main_table}.id"),
format!("{inner_table}.ref_id = {main_table}.category"),
format!("{inner_table}.data < {main_table}.amount"),
];
conditions[rng.random_range(0..conditions.len())].clone()
}
_ => "1=1".to_string(),
};
format!(
"SELECT * FROM {main_table} WHERE {not_exists}EXISTS (SELECT 1 FROM {inner_table} WHERE {correlated_condition})",
)
}
5 => {
// Correlated comparison subquery: WHERE column <op> (SELECT ... WHERE correlation)
let column = match main_table {
"t1" => ["value1", "value2", "id"][rng.random_range(0..3)],
"t2" => ["data", "ref_id", "id"][rng.random_range(0..3)],
"t3" => ["amount", "category", "id"][rng.random_range(0..3)],
_ => "id",
};
let op = [">", "<", ">=", "<=", "=", "<>"][rng.random_range(0..6)];
// Choose a different table for the subquery
let inner_tables = match main_table {
"t1" => ["t2", "t3"],
"t2" => ["t1", "t3"],
"t3" => ["t1", "t2"],
_ => ["t1", "t2"],
};
let inner_table = inner_tables[rng.random_range(0..inner_tables.len())];
// Choose what to select from inner table
let select_column = match inner_table {
"t1" => ["value1", "value2", "id"][rng.random_range(0..3)],
"t2" => ["data", "ref_id", "id"][rng.random_range(0..3)],
"t3" => ["amount", "category", "id"][rng.random_range(0..3)],
_ => "id",
};
// Generate correlated condition
let correlated_condition = match (main_table, inner_table) {
("t1", "t2") => {
let conditions = [
format!("{inner_table}.ref_id = {main_table}.id"),
format!("{inner_table}.id < {main_table}.value1"),
format!("{inner_table}.data > {main_table}.value2"),
];
conditions[rng.random_range(0..conditions.len())].clone()
}
("t1", "t3") => {
let conditions = [
format!("{inner_table}.id = {main_table}.id"),
format!("{inner_table}.category < {main_table}.value1"),
format!("{inner_table}.amount > {main_table}.value2"),
];
conditions[rng.random_range(0..conditions.len())].clone()
}
("t2", "t1") => {
let conditions = [
format!("{inner_table}.id = {main_table}.ref_id"),
format!("{inner_table}.value1 > {main_table}.data"),
format!("{inner_table}.value2 < {main_table}.id"),
];
conditions[rng.random_range(0..conditions.len())].clone()
}
("t2", "t3") => {
let conditions = [
format!("{inner_table}.id = {main_table}.id"),
format!("{inner_table}.category = {main_table}.ref_id"),
format!("{inner_table}.amount > {main_table}.data"),
];
conditions[rng.random_range(0..conditions.len())].clone()
}
("t3", "t1") => {
let conditions = [
format!("{inner_table}.id = {main_table}.id"),
format!("{inner_table}.value1 > {main_table}.category"),
format!("{inner_table}.value2 < {main_table}.amount"),
];
conditions[rng.random_range(0..conditions.len())].clone()
}
("t3", "t2") => {
let conditions = [
format!("{inner_table}.id = {main_table}.id"),
format!("{inner_table}.ref_id = {main_table}.category"),
format!("{inner_table}.data < {main_table}.amount"),
];
conditions[rng.random_range(0..conditions.len())].clone()
}
_ => "1=1".to_string(),
};
format!(
"SELECT * FROM {main_table} WHERE {column} {op} (SELECT {select_column} FROM {inner_table} WHERE {correlated_condition})",
)
}
_ => unreachable!(),
};
log::info!(
"Iteration {}/{NUM_FUZZ_ITERATIONS}: Query: {query}",
iter_num + 1,
);
let limbo_results = limbo_exec_rows(&db, &limbo_conn, &query);
let sqlite_results = sqlite_exec_rows(&sqlite_conn, &query);
// Check if results match
if limbo_results.len() != sqlite_results.len() {
panic!(
"Row count mismatch for query: {}\nLimbo: {} rows, SQLite: {} rows\nLimbo: {:?}\nSQLite: {:?}\nSeed: {}\n\n DDL/DML to reproduce manually:\n{}",
query, limbo_results.len(), sqlite_results.len(), limbo_results, sqlite_results, seed, debug_ddl_dml_string
);
}
// Check if all rows match (order might be different)
// Since Value doesn't implement Ord, we'll check containment both ways
let all_limbo_in_sqlite = limbo_results.iter().all(|limbo_row| {
sqlite_results
.iter()
.any(|sqlite_row| limbo_row == sqlite_row)
});
let all_sqlite_in_limbo = sqlite_results.iter().all(|sqlite_row| {
limbo_results
.iter()
.any(|limbo_row| sqlite_row == limbo_row)
});
if !all_limbo_in_sqlite || !all_sqlite_in_limbo {
panic!(
"Results mismatch for query: {query}\nLimbo: {limbo_results:?}\nSQLite: {sqlite_results:?}\nSeed: {seed}",
);
}
}
}
} }