Translate Expr::SubqueryResult into bytecode

This commit is contained in:
Jussi Saurio
2025-10-27 15:57:53 +02:00
parent bc2a7c79f9
commit 59363a1be3

View File

@@ -1,7 +1,7 @@
use std::sync::Arc;
use tracing::{instrument, Level};
use turso_parser::ast::{self, As, Expr, UnaryOperator};
use turso_parser::ast::{self, As, Expr, SubqueryType, UnaryOperator};
use super::emitter::Resolver;
use super::optimizer::Optimizable;
@@ -288,7 +288,25 @@ pub fn translate_condition_expr(
resolver: &Resolver,
) -> Result<()> {
match expr {
ast::Expr::SubqueryResult { .. } => unimplemented!(), // Will be implemented in a future commit
ast::Expr::SubqueryResult { query_type, .. } => match query_type {
SubqueryType::Exists { result_reg } => {
emit_cond_jump(program, condition_metadata, *result_reg);
}
SubqueryType::In { .. } => {
let result_reg = program.alloc_register();
translate_expr(program, Some(referenced_tables), expr, result_reg, resolver)?;
emit_cond_jump(program, condition_metadata, result_reg);
}
SubqueryType::RowValue { num_regs, .. } => {
if *num_regs != 1 {
// A query like SELECT * FROM t WHERE (SELECT ...) must return a single column.
crate::bail_parse_error!("sub-select returns {num_regs} columns - expected 1");
}
let result_reg = program.alloc_register();
translate_expr(program, Some(referenced_tables), expr, result_reg, resolver)?;
emit_cond_jump(program, condition_metadata, result_reg);
}
},
ast::Expr::Register(_) => {
crate::bail_parse_error!("Register in WHERE clause is currently unused. Consider removing Resolver::expr_to_reg_cache and using Expr::Register instead");
}
@@ -594,7 +612,149 @@ pub fn translate_expr(
}
match expr {
ast::Expr::SubqueryResult { .. } => unimplemented!(), // Will be implemented in a future commit
ast::Expr::SubqueryResult {
lhs,
not_in,
query_type,
..
} => {
match query_type {
SubqueryType::Exists { result_reg } => {
program.emit_insn(Insn::Copy {
src_reg: *result_reg,
dst_reg: target_register,
extra_amount: 0,
});
Ok(target_register)
}
SubqueryType::In { cursor_id } => {
// jump here when we can definitely skip the row
let label_skip_row = program.allocate_label();
// jump here when we can definitely include the row
let label_include_row = program.allocate_label();
// jump here when we need to make extra null-related checks, because sql null is the greatest thing ever
let label_null_rewind = program.allocate_label();
let label_null_checks_loop_start = program.allocate_label();
let label_null_checks_next = program.allocate_label();
program.emit_insn(Insn::Integer {
value: 0,
dest: target_register,
});
let lhs_columns = match unwrap_parens(lhs.as_ref().unwrap())? {
ast::Expr::Parenthesized(exprs) => {
exprs.iter().map(|e| e.as_ref()).collect()
}
expr => vec![expr],
};
let lhs_column_count = lhs_columns.len();
let lhs_column_regs_start = program.alloc_registers(lhs_column_count);
for (i, lhs_column) in lhs_columns.iter().enumerate() {
translate_expr(
program,
referenced_tables,
lhs_column,
lhs_column_regs_start + i,
resolver,
)?;
if !lhs_column.is_nonnull(referenced_tables.as_ref().unwrap()) {
program.emit_insn(Insn::IsNull {
reg: lhs_column_regs_start + i,
target_pc: if *not_in {
label_null_rewind
} else {
label_skip_row
},
});
}
}
if *not_in {
// WHERE ... NOT IN (SELECT ...)
// We must skip the row if we find a match.
program.emit_insn(Insn::Found {
cursor_id: *cursor_id,
target_pc: label_skip_row,
record_reg: lhs_column_regs_start,
num_regs: lhs_column_count,
});
// Ok, so Found didn't return a match.
// Because SQL NULL, we need do extra checks to see if we can include the row.
// Consider:
// 1. SELECT * FROM T WHERE 1 NOT IN (SELECT NULL),
// 2. SELECT * FROM T WHERE 1 IN (SELECT NULL) -- or anything else where the subquery evaluates to NULL.
// _Both_ of these queries should return nothing, because... SQL NULL.
// The same goes for e.g. SELECT * FROM T WHERE (1,1) NOT IN (SELECT NULL, NULL).
// However, it does _NOT_ apply for SELECT * FROM T WHERE (1,1) NOT IN (SELECT NULL, 1).
// BUT: it DOES apply for SELECT * FROM T WHERE (2,2) NOT IN ((1,1), (NULL, NULL))!!!
// Ergo: if the subquery result has _ANY_ tuples with all NULLs, we need to NOT include the row.
//
// So, if we didn't found a match (and hence, so far, our 'NOT IN' condition still applies),
// we must still rewind the subquery's ephemeral index cursor and go through ALL rows and compare each LHS column (with !=) to the corresponding column in the ephemeral index.
// Comparison instructions have the default behavior that if either operand is NULL, the comparison is completely skipped.
// That means: if we, for ANY row in the ephemeral index, get through all the != comparisons without jumping,
// it means our subquery result has a tuple that is exactly NULL (or (NULL, NULL) etc.),
// in which case we need to NOT include the row.
// If ALL the rows jump at one of the != comparisons, it means our subquery result has no tuples with all NULLs -> we can include the row.
program.preassign_label_to_next_insn(label_null_rewind);
program.emit_insn(Insn::Rewind {
cursor_id: *cursor_id,
pc_if_empty: label_include_row,
});
program.preassign_label_to_next_insn(label_null_checks_loop_start);
let column_check_reg = program.alloc_register();
for i in 0..lhs_column_count {
program.emit_insn(Insn::Column {
cursor_id: *cursor_id,
column: i,
dest: column_check_reg,
default: None,
});
program.emit_insn(Insn::Ne {
lhs: lhs_column_regs_start + i,
rhs: column_check_reg,
target_pc: label_null_checks_next,
flags: CmpInsFlags::default(),
collation: program.curr_collation(),
});
}
program.emit_insn(Insn::Goto {
target_pc: label_skip_row,
});
program.preassign_label_to_next_insn(label_null_checks_next);
program.emit_insn(Insn::Next {
cursor_id: *cursor_id,
pc_if_next: label_null_checks_loop_start,
})
} else {
// WHERE ... IN (SELECT ...)
// We can skip the row if we don't find a match
program.emit_insn(Insn::NotFound {
cursor_id: *cursor_id,
target_pc: label_skip_row,
record_reg: lhs_column_regs_start,
num_regs: lhs_column_count,
});
}
program.preassign_label_to_next_insn(label_include_row);
program.emit_insn(Insn::Integer {
value: 1,
dest: target_register,
});
program.preassign_label_to_next_insn(label_skip_row);
Ok(target_register)
}
SubqueryType::RowValue {
result_reg_start,
num_regs,
} => {
program.emit_insn(Insn::Copy {
src_reg: *result_reg_start,
dst_reg: target_register,
extra_amount: num_regs - 1,
});
Ok(target_register)
}
}
}
ast::Expr::Between { .. } => {
unreachable!("expression should have been rewritten in optmizer")
}
@@ -1952,7 +2112,7 @@ pub fn translate_expr(
}
};
let (_, table) = referenced_tables
let (is_from_outer_query_scope, table) = referenced_tables
.unwrap()
.find_table_by_internal_id(*table_ref_id)
.unwrap_or_else(|| {
@@ -1973,14 +2133,34 @@ pub fn translate_expr(
// If we have a covering index, we don't have an open table cursor so we read from the index cursor.
match &table {
Table::BTree(_) => {
let table_cursor_id = if use_covering_index {
None
let (table_cursor_id, index_cursor_id) = if is_from_outer_query_scope {
// Due to a limitation of our translation system, a subquery that references an outer query table
// cannot know whether a table cursor, index cursor, or both were opened for that table reference.
// Hence: currently we first try to resolve a table cursor, and if that fails,
// we resolve an index cursor.
if let Some(table_cursor_id) =
program.resolve_cursor_id_safe(&CursorKey::table(*table_ref_id))
{
(Some(table_cursor_id), None)
} else {
(
None,
Some(program.resolve_any_index_cursor_id_for_table(*table_ref_id)),
)
}
} else {
Some(program.resolve_cursor_id(&CursorKey::table(*table_ref_id)))
let table_cursor_id = if use_covering_index {
None
} else {
Some(program.resolve_cursor_id(&CursorKey::table(*table_ref_id)))
};
let index_cursor_id = index.map(|index| {
program
.resolve_cursor_id(&CursorKey::index(*table_ref_id, index.clone()))
});
(table_cursor_id, index_cursor_id)
};
let index_cursor_id = index.map(|index| {
program.resolve_cursor_id(&CursorKey::index(*table_ref_id, index.clone()))
});
if *is_rowid_alias {
if let Some(index_cursor_id) = index_cursor_id {
program.emit_insn(Insn::IdxRowId {
@@ -1996,22 +2176,28 @@ pub fn translate_expr(
unreachable!("Either index or table cursor must be opened");
}
} else {
let read_cursor = if use_covering_index {
index_cursor_id.expect(
"index cursor should be opened when use_covering_index=true",
)
let read_from_index = if is_from_outer_query_scope {
index_cursor_id.is_some()
} else {
table_cursor_id.expect(
"table cursor should be opened when use_covering_index=false",
)
use_covering_index
};
let column = if use_covering_index {
let index = index.expect(
"index cursor should be opened when use_covering_index=true",
let read_cursor = if read_from_index {
index_cursor_id.expect("index cursor should be opened")
} else {
table_cursor_id.expect("table cursor should be opened")
};
let column = if read_from_index {
let index = program.resolve_index_for_cursor_id(
index_cursor_id.expect("index cursor should be opened"),
);
index.column_table_pos_to_index_pos(*column).unwrap_or_else(|| {
panic!("covering index {} does not contain column number {} of table {}", index.name, column, table_ref_id)
})
index
.column_table_pos_to_index_pos(*column)
.unwrap_or_else(|| {
panic!(
"index {} does not contain column number {} of table {}",
index.name, column, table_ref_id
)
})
} else {
*column
};