mirror of
https://github.com/aljazceru/turso.git
synced 2026-01-31 05:44:25 +01:00
Merge 'Fix IN operator NULL handling' from Diego Reis
Closes #3277 Basically changed what we used to do to match what and how SQLite does. Closes #3606
This commit is contained in:
@@ -30,6 +30,7 @@ pub struct ConditionMetadata {
|
||||
pub jump_if_condition_is_true: bool,
|
||||
pub jump_target_when_true: BranchOffset,
|
||||
pub jump_target_when_false: BranchOffset,
|
||||
pub jump_target_when_null: BranchOffset,
|
||||
}
|
||||
|
||||
/// Container for register locations of values that can be referenced in RETURNING expressions
|
||||
@@ -154,128 +155,127 @@ macro_rules! expect_arguments_even {
|
||||
///
|
||||
/// This is extracted from the original conditional implementation to be reusable.
|
||||
/// The logic exactly matches the original conditional InList implementation.
|
||||
///
|
||||
/// An IN expression has one of the following formats:
|
||||
/// ```sql
|
||||
/// x IN (y1, y2,...,yN)
|
||||
/// x IN (subquery) (Not yet implemented)
|
||||
/// ```
|
||||
/// The result of an IN operator is one of TRUE, FALSE, or NULL. A NULL result
|
||||
/// means that it cannot be determined if the LHS is contained in the RHS due
|
||||
/// to the presence of NULL values.
|
||||
///
|
||||
/// Currently, we do a simple full-scan, yet it's not ideal when there are many rows
|
||||
/// on RHS. (Check sqlite's in-operator.md)
|
||||
///
|
||||
/// Algorithm:
|
||||
/// 1. Set the null-flag to false
|
||||
/// 2. For each row in the RHS:
|
||||
/// - Compare LHS and RHS
|
||||
/// - If LHS matches RHS, returns TRUE
|
||||
/// - If the comparison results in NULL, set the null-flag to true
|
||||
/// 3. If the null-flag is true, return NULL
|
||||
/// 4. Return FALSE
|
||||
///
|
||||
/// A "NOT IN" operator is computed by first computing the equivalent IN
|
||||
/// operator, then interchanging the TRUE and FALSE results.
|
||||
// todo: Check right affinities
|
||||
#[instrument(skip(program, referenced_tables, resolver), level = Level::DEBUG)]
|
||||
fn translate_in_list(
|
||||
program: &mut ProgramBuilder,
|
||||
referenced_tables: Option<&TableReferences>,
|
||||
lhs: &ast::Expr,
|
||||
rhs: &[Box<ast::Expr>],
|
||||
not: bool,
|
||||
condition_metadata: ConditionMetadata,
|
||||
// dest if null should be in ConditionMetadata
|
||||
resolver: &Resolver,
|
||||
) -> Result<()> {
|
||||
// lhs is e.g. a column reference
|
||||
// rhs is an Option<Vec<Expr>>
|
||||
// If rhs is None, it means the IN expression is always false, i.e. tbl.id IN ().
|
||||
// If rhs is Some, it means the IN expression has a list of values to compare against, e.g. tbl.id IN (1, 2, 3).
|
||||
//
|
||||
// The IN expression is equivalent to a series of OR expressions.
|
||||
// For example, `a IN (1, 2, 3)` is equivalent to `a = 1 OR a = 2 OR a = 3`.
|
||||
// The NOT IN expression is equivalent to a series of AND expressions.
|
||||
// For example, `a NOT IN (1, 2, 3)` is equivalent to `a != 1 AND a != 2 AND a != 3`.
|
||||
//
|
||||
// SQLite typically optimizes IN expressions to use a binary search on an ephemeral index if there are many values.
|
||||
// For now we don't have the plumbing to do that, so we'll just emit a series of comparisons,
|
||||
// which is what SQLite also does for small lists of values.
|
||||
// TODO: Let's refactor this later to use a more efficient implementation conditionally based on the number of values.
|
||||
let lhs_reg = if let Expr::Parenthesized(v) = lhs {
|
||||
program.alloc_registers(v.len())
|
||||
} else {
|
||||
program.alloc_register()
|
||||
};
|
||||
let _ = translate_expr(program, referenced_tables, lhs, lhs_reg, resolver)?;
|
||||
let mut check_null_reg = 0;
|
||||
let label_ok = program.allocate_label();
|
||||
|
||||
if rhs.is_empty() {
|
||||
// If rhs is None, IN expressions are always false and NOT IN expressions are always true.
|
||||
if not {
|
||||
// On a trivially true NOT IN () expression we can only jump to the 'jump_target_when_true' label if 'jump_if_condition_is_true'; otherwise me must fall through.
|
||||
// This is because in a more complex condition we might need to evaluate the rest of the condition.
|
||||
// Note that we are already breaking up our WHERE clauses into a series of terms at "AND" boundaries, so right now we won't be running into cases where jumping on true would be incorrect,
|
||||
// but once we have e.g. parenthesization and more complex conditions, not having this 'if' here would introduce a bug.
|
||||
if condition_metadata.jump_if_condition_is_true {
|
||||
program.emit_insn(Insn::Goto {
|
||||
target_pc: condition_metadata.jump_target_when_true,
|
||||
});
|
||||
}
|
||||
} else {
|
||||
program.emit_insn(Insn::Goto {
|
||||
target_pc: condition_metadata.jump_target_when_false,
|
||||
});
|
||||
}
|
||||
return Ok(());
|
||||
if condition_metadata.jump_target_when_false != condition_metadata.jump_target_when_null {
|
||||
check_null_reg = program.alloc_register();
|
||||
program.emit_insn(Insn::BitAnd {
|
||||
lhs: lhs_reg,
|
||||
rhs: lhs_reg,
|
||||
dest: check_null_reg,
|
||||
});
|
||||
}
|
||||
|
||||
// The left hand side only needs to be evaluated once we have a list of values to compare against.
|
||||
let lhs_reg = program.alloc_register();
|
||||
let _ = translate_expr(program, referenced_tables, lhs, lhs_reg, resolver)?;
|
||||
for (i, expr) in rhs.iter().enumerate() {
|
||||
let last_condition = i == rhs.len() - 1;
|
||||
let rhs_reg = program.alloc_register();
|
||||
let _ = translate_expr(program, referenced_tables, expr, rhs_reg, resolver)?;
|
||||
|
||||
// The difference between a local jump and an "upper level" jump is that for example in this case:
|
||||
// WHERE foo IN (1,2,3) OR bar = 5,
|
||||
// we can immediately jump to the 'jump_target_when_true' label of the ENTIRE CONDITION if foo = 1, foo = 2, or foo = 3 without evaluating the bar = 5 condition.
|
||||
// This is why in Binary-OR expressions we set jump_if_condition_is_true to true for the first condition.
|
||||
// However, in this example:
|
||||
// WHERE foo IN (1,2,3) AND bar = 5,
|
||||
// we can't jump to the 'jump_target_when_true' label of the entire condition foo = 1, foo = 2, or foo = 3, because we still need to evaluate the bar = 5 condition later.
|
||||
// This is why in that case we just jump over the rest of the IN conditions in this "local" branch which evaluates the IN condition.
|
||||
let jump_target_when_true = if condition_metadata.jump_if_condition_is_true {
|
||||
condition_metadata.jump_target_when_true
|
||||
} else {
|
||||
program.allocate_label()
|
||||
};
|
||||
if check_null_reg != 0 && expr.can_be_null() {
|
||||
program.emit_insn(Insn::BitAnd {
|
||||
lhs: check_null_reg,
|
||||
rhs: rhs_reg,
|
||||
dest: check_null_reg,
|
||||
});
|
||||
}
|
||||
|
||||
if !not {
|
||||
// If it's an IN expression, we need to jump to the 'jump_target_when_true' label if any of the conditions are true.
|
||||
for (i, expr) in rhs.iter().enumerate() {
|
||||
let rhs_reg = program.alloc_register();
|
||||
let last_condition = i == rhs.len() - 1;
|
||||
let _ = translate_expr(program, referenced_tables, expr, rhs_reg, resolver)?;
|
||||
// If this is not the last condition, we need to jump to the 'jump_target_when_true' label if the condition is true.
|
||||
if !last_condition {
|
||||
if !last_condition
|
||||
|| condition_metadata.jump_target_when_false != condition_metadata.jump_target_when_null
|
||||
{
|
||||
if lhs_reg != rhs_reg {
|
||||
program.emit_insn(Insn::Eq {
|
||||
lhs: lhs_reg,
|
||||
rhs: rhs_reg,
|
||||
target_pc: jump_target_when_true,
|
||||
target_pc: label_ok,
|
||||
// Use affinity instead
|
||||
flags: CmpInsFlags::default(),
|
||||
collation: program.curr_collation(),
|
||||
});
|
||||
} else {
|
||||
// If this is the last condition, we need to jump to the 'jump_target_when_false' label if there is no match.
|
||||
program.emit_insn(Insn::Ne {
|
||||
lhs: lhs_reg,
|
||||
rhs: rhs_reg,
|
||||
target_pc: condition_metadata.jump_target_when_false,
|
||||
flags: CmpInsFlags::default().jump_if_null(),
|
||||
collation: program.curr_collation(),
|
||||
program.emit_insn(Insn::NotNull {
|
||||
reg: lhs_reg,
|
||||
target_pc: label_ok,
|
||||
});
|
||||
}
|
||||
}
|
||||
// If we got here, then the last condition was a match, so we jump to the 'jump_target_when_true' label if 'jump_if_condition_is_true'.
|
||||
// If not, we can just fall through without emitting an unnecessary instruction.
|
||||
if condition_metadata.jump_if_condition_is_true {
|
||||
program.emit_insn(Insn::Goto {
|
||||
target_pc: condition_metadata.jump_target_when_true,
|
||||
});
|
||||
}
|
||||
} else {
|
||||
// If it's a NOT IN expression, we need to jump to the 'jump_target_when_false' label if any of the conditions are true.
|
||||
for expr in rhs.iter() {
|
||||
let rhs_reg = program.alloc_register();
|
||||
let _ = translate_expr(program, referenced_tables, expr, rhs_reg, resolver)?;
|
||||
program.emit_insn(Insn::Eq {
|
||||
// sqlite3VdbeChangeP5(v, zAff[0]);
|
||||
} else if lhs_reg != rhs_reg {
|
||||
program.emit_insn(Insn::Ne {
|
||||
lhs: lhs_reg,
|
||||
rhs: rhs_reg,
|
||||
target_pc: condition_metadata.jump_target_when_false,
|
||||
flags: CmpInsFlags::default().jump_if_null(),
|
||||
flags: CmpInsFlags::default(),
|
||||
collation: program.curr_collation(),
|
||||
});
|
||||
}
|
||||
// If we got here, then none of the conditions were a match, so we jump to the 'jump_target_when_true' label if 'jump_if_condition_is_true'.
|
||||
// If not, we can just fall through without emitting an unnecessary instruction.
|
||||
if condition_metadata.jump_if_condition_is_true {
|
||||
program.emit_insn(Insn::Goto {
|
||||
target_pc: condition_metadata.jump_target_when_true,
|
||||
} else {
|
||||
program.emit_insn(Insn::IsNull {
|
||||
reg: lhs_reg,
|
||||
target_pc: condition_metadata.jump_target_when_false,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if !condition_metadata.jump_if_condition_is_true {
|
||||
program.preassign_label_to_next_insn(jump_target_when_true);
|
||||
if check_null_reg != 0 {
|
||||
program.emit_insn(Insn::IsNull {
|
||||
reg: check_null_reg,
|
||||
target_pc: condition_metadata.jump_target_when_null,
|
||||
});
|
||||
program.emit_insn(Insn::Goto {
|
||||
target_pc: condition_metadata.jump_target_when_false,
|
||||
});
|
||||
}
|
||||
|
||||
program.resolve_label(label_ok, program.offset());
|
||||
|
||||
// by default if IN expression is true we just continue to the next instruction
|
||||
if condition_metadata.jump_if_condition_is_true {
|
||||
program.emit_insn(Insn::Goto {
|
||||
target_pc: condition_metadata.jump_target_when_true,
|
||||
});
|
||||
}
|
||||
// todo: deallocate check_null_reg
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -402,16 +402,55 @@ pub fn translate_condition_expr(
|
||||
translate_expr(program, Some(referenced_tables), expr, reg, resolver)?;
|
||||
emit_cond_jump(program, condition_metadata, reg);
|
||||
}
|
||||
|
||||
ast::Expr::InList { lhs, not, rhs } => {
|
||||
let ConditionMetadata {
|
||||
jump_if_condition_is_true,
|
||||
jump_target_when_true,
|
||||
jump_target_when_false,
|
||||
jump_target_when_null,
|
||||
} = condition_metadata;
|
||||
|
||||
// Adjust targets if `NOT IN`
|
||||
let (adjusted_metadata, not_true_label, not_false_label) = if *not {
|
||||
let not_true_label = program.allocate_label();
|
||||
let not_false_label = program.allocate_label();
|
||||
(
|
||||
ConditionMetadata {
|
||||
jump_if_condition_is_true,
|
||||
jump_target_when_true: not_true_label,
|
||||
jump_target_when_false: not_false_label,
|
||||
jump_target_when_null,
|
||||
},
|
||||
Some(not_true_label),
|
||||
Some(not_false_label),
|
||||
)
|
||||
} else {
|
||||
(condition_metadata, None, None)
|
||||
};
|
||||
|
||||
translate_in_list(
|
||||
program,
|
||||
Some(referenced_tables),
|
||||
lhs,
|
||||
rhs,
|
||||
*not,
|
||||
condition_metadata,
|
||||
adjusted_metadata,
|
||||
resolver,
|
||||
)?;
|
||||
|
||||
if *not {
|
||||
// When IN is TRUE (match found), NOT IN should be FALSE
|
||||
program.resolve_label(not_true_label.unwrap(), program.offset());
|
||||
program.emit_insn(Insn::Goto {
|
||||
target_pc: jump_target_when_false,
|
||||
});
|
||||
|
||||
// When IN is FALSE (no match), NOT IN should be TRUE
|
||||
program.resolve_label(not_false_label.unwrap(), program.offset());
|
||||
program.emit_insn(Insn::Goto {
|
||||
target_pc: jump_target_when_true,
|
||||
});
|
||||
}
|
||||
}
|
||||
ast::Expr::Like { not, .. } => {
|
||||
let cur_reg = program.alloc_register();
|
||||
@@ -2029,26 +2068,29 @@ pub fn translate_expr(
|
||||
// but wrap it with appropriate expression context handling
|
||||
let result_reg = target_register;
|
||||
|
||||
// Set result to NULL initially (matches SQLite behavior)
|
||||
program.emit_insn(Insn::Null {
|
||||
dest: result_reg,
|
||||
let dest_if_false = program.allocate_label();
|
||||
let dest_if_null = program.allocate_label();
|
||||
let dest_if_true = program.allocate_label();
|
||||
|
||||
// Ideally we wouldn't need a tmp register, but currently if an IN expression
|
||||
// is used inside an aggregator the target_register is cleared on every iteration,
|
||||
// losing the state of the aggregator.
|
||||
let tmp = program.alloc_register();
|
||||
program.emit_no_constant_insn(Insn::Null {
|
||||
dest: tmp,
|
||||
dest_end: None,
|
||||
});
|
||||
|
||||
let dest_if_false = program.allocate_label();
|
||||
let label_integer_conversion = program.allocate_label();
|
||||
|
||||
// Call the core InList logic with expression-appropriate condition metadata
|
||||
translate_in_list(
|
||||
program,
|
||||
referenced_tables,
|
||||
lhs,
|
||||
rhs,
|
||||
*not,
|
||||
ConditionMetadata {
|
||||
jump_if_condition_is_true: false,
|
||||
jump_target_when_true: label_integer_conversion, // will be resolved below
|
||||
jump_target_when_true: dest_if_true,
|
||||
jump_target_when_false: dest_if_false,
|
||||
jump_target_when_null: dest_if_null,
|
||||
},
|
||||
resolver,
|
||||
)?;
|
||||
@@ -2056,27 +2098,30 @@ pub fn translate_expr(
|
||||
// condition true: set result to 1
|
||||
program.emit_insn(Insn::Integer {
|
||||
value: 1,
|
||||
dest: result_reg,
|
||||
});
|
||||
program.emit_insn(Insn::Goto {
|
||||
target_pc: label_integer_conversion,
|
||||
dest: tmp,
|
||||
});
|
||||
|
||||
// False path: set result to 0
|
||||
program.resolve_label(dest_if_false, program.offset());
|
||||
program.emit_insn(Insn::Integer {
|
||||
value: 0,
|
||||
dest: result_reg,
|
||||
});
|
||||
|
||||
program.resolve_label(label_integer_conversion, program.offset());
|
||||
|
||||
// Force integer conversion with AddImm 0
|
||||
program.emit_insn(Insn::AddImm {
|
||||
register: result_reg,
|
||||
register: tmp,
|
||||
value: 0,
|
||||
});
|
||||
|
||||
if *not {
|
||||
program.emit_insn(Insn::Not {
|
||||
reg: tmp,
|
||||
dest: tmp,
|
||||
});
|
||||
}
|
||||
program.resolve_label(dest_if_null, program.offset());
|
||||
program.emit_insn(Insn::Copy {
|
||||
src_reg: tmp,
|
||||
dst_reg: result_reg,
|
||||
extra_amount: 0,
|
||||
});
|
||||
Ok(result_reg)
|
||||
}
|
||||
ast::Expr::InSelect { .. } => {
|
||||
|
||||
@@ -786,6 +786,8 @@ pub fn group_by_emit_row_phase<'a>(
|
||||
jump_if_condition_is_true: false,
|
||||
jump_target_when_false: labels.label_group_by_end_without_emitting_row,
|
||||
jump_target_when_true: if_true_target,
|
||||
// treat null result has false for now
|
||||
jump_target_when_null: labels.label_group_by_end_without_emitting_row,
|
||||
},
|
||||
&t_ctx.resolver,
|
||||
)?;
|
||||
|
||||
@@ -245,6 +245,7 @@ pub fn translate_create_index(
|
||||
jump_if_condition_is_true: false,
|
||||
jump_target_when_false: label,
|
||||
jump_target_when_true: BranchOffset::Placeholder,
|
||||
jump_target_when_null: label,
|
||||
},
|
||||
resolver,
|
||||
)?;
|
||||
|
||||
@@ -388,6 +388,7 @@ pub fn init_loop(
|
||||
jump_if_condition_is_true: false,
|
||||
jump_target_when_true: jump_target,
|
||||
jump_target_when_false: t_ctx.label_main_loop_end.unwrap(),
|
||||
jump_target_when_null: t_ctx.label_main_loop_end.unwrap(),
|
||||
};
|
||||
translate_condition_expr(program, tables, &cond.expr, meta, &t_ctx.resolver)?;
|
||||
program.preassign_label_to_next_insn(jump_target);
|
||||
@@ -713,6 +714,7 @@ fn emit_conditions(
|
||||
jump_if_condition_is_true: false,
|
||||
jump_target_when_true,
|
||||
jump_target_when_false: next,
|
||||
jump_target_when_null: next,
|
||||
};
|
||||
translate_condition_expr(
|
||||
program,
|
||||
@@ -727,7 +729,7 @@ fn emit_conditions(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// SQLite (and so Limbo) processes joins as a nested loop.
|
||||
/// SQLite (and so Turso) processes joins as a nested loop.
|
||||
/// The loop may emit rows to various destinations depending on the query:
|
||||
/// - a GROUP BY sorter (grouping is done by sorting based on the GROUP BY keys and aggregating while the GROUP BY keys match)
|
||||
/// - a GROUP BY phase with no sorting (when the rows are already in the order required by the GROUP BY keys)
|
||||
|
||||
@@ -341,6 +341,14 @@ impl ProgramBuilder {
|
||||
self.insns.push((insn, self.insns.len()));
|
||||
}
|
||||
|
||||
/// Emit an instruction that is guaranteed not to be in any constant span.
|
||||
/// This ensures the instruction won't be hoisted when emit_constant_insns is called.
|
||||
#[instrument(skip(self), level = Level::DEBUG)]
|
||||
pub fn emit_no_constant_insn(&mut self, insn: Insn) {
|
||||
self.constant_span_end_all();
|
||||
self.emit_insn(insn);
|
||||
}
|
||||
|
||||
pub fn close_cursors(&mut self, cursors: &[CursorID]) {
|
||||
for cursor in cursors {
|
||||
self.emit_insn(Insn::Close { cursor_id: *cursor });
|
||||
|
||||
@@ -853,10 +853,12 @@ pub enum Insn {
|
||||
db: usize,
|
||||
},
|
||||
|
||||
/// Make a copy of register src..src+extra_amount into dst..dst+extra_amount.
|
||||
Copy {
|
||||
src_reg: usize,
|
||||
dst_reg: usize,
|
||||
extra_amount: usize, // 0 extra_amount means we include src_reg, dst_reg..=dst_reg+amount = src_reg..=src_reg+amount
|
||||
/// 0 extra_amount means we include src_reg, dst_reg..=dst_reg+amount = src_reg..=src_reg+amount
|
||||
extra_amount: usize,
|
||||
},
|
||||
|
||||
/// Allocate a new b-tree.
|
||||
|
||||
@@ -542,6 +542,17 @@ impl Expr {
|
||||
pub fn raise(resolve_type: ResolveType, expr: Option<Expr>) -> Expr {
|
||||
Expr::Raise(resolve_type, expr.map(Box::new))
|
||||
}
|
||||
|
||||
pub fn can_be_null(&self) -> bool {
|
||||
// todo: better handling columns. Check sqlite3ExprCanBeNull
|
||||
match self {
|
||||
Expr::Literal(literal) => !matches!(
|
||||
literal,
|
||||
Literal::Numeric(_) | Literal::String(_) | Literal::Blob(_)
|
||||
),
|
||||
_ => true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// SQL literal
|
||||
|
||||
@@ -1723,11 +1723,23 @@ impl<'a> Parser<'a> {
|
||||
_ => {
|
||||
let exprs = self.parse_expr_list()?;
|
||||
eat_expect!(self, TK_RP);
|
||||
Box::new(Expr::InList {
|
||||
lhs: result,
|
||||
not,
|
||||
rhs: exprs,
|
||||
})
|
||||
// Expressions in the form:
|
||||
// lhs IN ()
|
||||
// lhs NOT IN ()
|
||||
// can be simplified to constants 0 (false) and 1 (true), respectively.
|
||||
//
|
||||
// todo: should check if lhs has a function. If so, this optimization cannot
|
||||
// be done.
|
||||
if exprs.is_empty() {
|
||||
let name = if not { "1" } else { "0" };
|
||||
Box::new(Expr::Literal(Literal::Numeric(name.into())))
|
||||
} else {
|
||||
Box::new(Expr::InList {
|
||||
lhs: result,
|
||||
rhs: exprs,
|
||||
not,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -913,6 +913,19 @@ do_execsql_test_on_specific_db {:memory:} select-in-simple {
|
||||
} {1
|
||||
0}
|
||||
|
||||
do_execsql_test_on_specific_db {:memory:} select-in-with-nulls {
|
||||
SELECT 4 IN (1, 4, null);
|
||||
SELECT 4 NOT IN (1, 4, null);
|
||||
} {1
|
||||
0}
|
||||
|
||||
# All should be null
|
||||
do_execsql_test_on_specific_db {:memory:} select-in-with-nulls-2 {
|
||||
SELECT 1 IN (2, 3, null);
|
||||
SELECT 1 NOT IN (2, 3, null);
|
||||
SELECT null in (null);
|
||||
} {\n\n}
|
||||
|
||||
do_execsql_test_on_specific_db {:memory:} select-in-complex {
|
||||
CREATE TABLE test_table (id INTEGER, category TEXT, value INTEGER);
|
||||
INSERT INTO test_table VALUES (1, 'A', 10), (2, 'B', 20), (3, 'A', 30), (4, 'C', 40);
|
||||
|
||||
Reference in New Issue
Block a user