From 70fc509046fdd3b19b3e9c3953fe5984f3230f38 Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Mon, 6 Oct 2025 02:14:15 -0300 Subject: [PATCH 1/8] First step to fix 3277 This follows almost step by step sqlite's functions, and indeed it's correct. But still have to translate some of this logic to our current semantics --- core/translate/expr.rs | 217 ++++++++++++++++++++--------------------- 1 file changed, 106 insertions(+), 111 deletions(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index d9e1c281d..359bb8cf7 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -149,6 +149,33 @@ macro_rules! expect_arguments_even { }}; } +fn expr_vec_size(expr: &ast::Expr) -> usize { + match expr { + Expr::Parenthesized(v) => v.len(), + _ => 1, + } +} + +fn expr_code_vector(program: &mut ProgramBuilder, expr: &ast::Expr) -> usize { + let size = expr_vec_size(expr); + program.alloc_registers(size) +} + +fn expr_can_be_null(expr: &ast::Expr) -> bool { + // todo: better handling columns. Check sqlite3ExprCanBeNull + match expr { + Expr::Literal(literal) => match literal { + ast::Literal::Numeric(_) => false, + ast::Literal::String(_) => false, + ast::Literal::Blob(_) => false, + _ => true, + }, + _ => true, + } +} + +// todo(rn): Check right affinities, fix other call to translate + /// Core implementation of IN expression logic that can be used in both conditional and expression contexts. /// This follows SQLite's approach where a single core function handles all InList cases. /// @@ -160,121 +187,92 @@ fn translate_in_list( referenced_tables: Option<&TableReferences>, lhs: &ast::Expr, rhs: &[Box], - not: bool, - condition_metadata: ConditionMetadata, + dest_if_false: BranchOffset, + dest_if_null: BranchOffset, resolver: &Resolver, ) -> Result<()> { - // lhs is e.g. a column reference - // rhs is an Option> - // If rhs is None, it means the IN expression is always false, i.e. tbl.id IN (). - // If rhs is Some, it means the IN expression has a list of values to compare against, e.g. tbl.id IN (1, 2, 3). - // - // The IN expression is equivalent to a series of OR expressions. - // For example, `a IN (1, 2, 3)` is equivalent to `a = 1 OR a = 2 OR a = 3`. - // The NOT IN expression is equivalent to a series of AND expressions. - // For example, `a NOT IN (1, 2, 3)` is equivalent to `a != 1 AND a != 2 AND a != 3`. - // - // SQLite typically optimizes IN expressions to use a binary search on an ephemeral index if there are many values. - // For now we don't have the plumbing to do that, so we'll just emit a series of comparisons, - // which is what SQLite also does for small lists of values. - // TODO: Let's refactor this later to use a more efficient implementation conditionally based on the number of values. - + // Disclamer: SQLite does this opt during parsing (https://github.com/sqlite/sqlite/blob/833fb1ef59b1c62fb2b00c7a121a5b5171f8a85e/src/parse.y#L1425) + // But we're the cool kids so we gotta do during translation :) if rhs.is_empty() { - // If rhs is None, IN expressions are always false and NOT IN expressions are always true. - if not { - // On a trivially true NOT IN () expression we can only jump to the 'jump_target_when_true' label if 'jump_if_condition_is_true'; otherwise me must fall through. - // This is because in a more complex condition we might need to evaluate the rest of the condition. - // Note that we are already breaking up our WHERE clauses into a series of terms at "AND" boundaries, so right now we won't be running into cases where jumping on true would be incorrect, - // but once we have e.g. parenthesization and more complex conditions, not having this 'if' here would introduce a bug. - if condition_metadata.jump_if_condition_is_true { - program.emit_insn(Insn::Goto { - target_pc: condition_metadata.jump_target_when_true, - }); - } - } else { - program.emit_insn(Insn::Goto { - target_pc: condition_metadata.jump_target_when_false, - }); - } + program.emit_insn(Insn::Goto { + target_pc: dest_if_false, + }); return Ok(()); } - // The left hand side only needs to be evaluated once we have a list of values to compare against. - let lhs_reg = program.alloc_register(); + let lhs_reg = expr_code_vector(program, lhs); let _ = translate_expr(program, referenced_tables, lhs, lhs_reg, resolver)?; + let mut check_null_reg = 0; + let label_ok = program.allocate_label(); - // The difference between a local jump and an "upper level" jump is that for example in this case: - // WHERE foo IN (1,2,3) OR bar = 5, - // we can immediately jump to the 'jump_target_when_true' label of the ENTIRE CONDITION if foo = 1, foo = 2, or foo = 3 without evaluating the bar = 5 condition. - // This is why in Binary-OR expressions we set jump_if_condition_is_true to true for the first condition. - // However, in this example: - // WHERE foo IN (1,2,3) AND bar = 5, - // we can't jump to the 'jump_target_when_true' label of the entire condition foo = 1, foo = 2, or foo = 3, because we still need to evaluate the bar = 5 condition later. - // This is why in that case we just jump over the rest of the IN conditions in this "local" branch which evaluates the IN condition. - let jump_target_when_true = if condition_metadata.jump_if_condition_is_true { - condition_metadata.jump_target_when_true - } else { - program.allocate_label() - }; + if dest_if_false != dest_if_null { + check_null_reg = program.alloc_register(); + program.emit_insn(Insn::BitAnd { + lhs: lhs_reg, + rhs: lhs_reg, + dest: check_null_reg, + }); + } - if !not { - // If it's an IN expression, we need to jump to the 'jump_target_when_true' label if any of the conditions are true. - for (i, expr) in rhs.iter().enumerate() { - let rhs_reg = program.alloc_register(); - let last_condition = i == rhs.len() - 1; - let _ = translate_expr(program, referenced_tables, expr, rhs_reg, resolver)?; - // If this is not the last condition, we need to jump to the 'jump_target_when_true' label if the condition is true. - if !last_condition { + for (i, expr) in rhs.iter().enumerate() { + let last_condition = i == rhs.len() - 1; + let rhs_reg = program.alloc_register(); + let _ = translate_expr(program, referenced_tables, expr, rhs_reg, resolver)?; + + if check_null_reg != 0 && expr_can_be_null(expr) { + program.emit_insn(Insn::BitAnd { + lhs: check_null_reg, + rhs: rhs_reg, + dest: check_null_reg, + }); + } + + if !last_condition || dest_if_false != dest_if_null { + if lhs_reg != rhs_reg { program.emit_insn(Insn::Eq { lhs: lhs_reg, rhs: rhs_reg, - target_pc: jump_target_when_true, + target_pc: label_ok, + // Use affinity instead flags: CmpInsFlags::default(), collation: program.curr_collation(), }); } else { - // If this is the last condition, we need to jump to the 'jump_target_when_false' label if there is no match. + program.emit_insn(Insn::NotNull { + reg: lhs_reg, + target_pc: label_ok, + }); + } + // sqlite3VdbeChangeP5(v, zAff[0]); + } else { + if lhs_reg != rhs_reg { program.emit_insn(Insn::Ne { lhs: lhs_reg, rhs: rhs_reg, - target_pc: condition_metadata.jump_target_when_false, - flags: CmpInsFlags::default().jump_if_null(), + target_pc: dest_if_false, + flags: CmpInsFlags::default(), collation: program.curr_collation(), }); + } else { + program.emit_insn(Insn::IsNull { + reg: lhs_reg, + target_pc: dest_if_false, + }); } } - // If we got here, then the last condition was a match, so we jump to the 'jump_target_when_true' label if 'jump_if_condition_is_true'. - // If not, we can just fall through without emitting an unnecessary instruction. - if condition_metadata.jump_if_condition_is_true { - program.emit_insn(Insn::Goto { - target_pc: condition_metadata.jump_target_when_true, - }); - } - } else { - // If it's a NOT IN expression, we need to jump to the 'jump_target_when_false' label if any of the conditions are true. - for expr in rhs.iter() { - let rhs_reg = program.alloc_register(); - let _ = translate_expr(program, referenced_tables, expr, rhs_reg, resolver)?; - program.emit_insn(Insn::Eq { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: condition_metadata.jump_target_when_false, - flags: CmpInsFlags::default().jump_if_null(), - collation: program.curr_collation(), - }); - } - // If we got here, then none of the conditions were a match, so we jump to the 'jump_target_when_true' label if 'jump_if_condition_is_true'. - // If not, we can just fall through without emitting an unnecessary instruction. - if condition_metadata.jump_if_condition_is_true { - program.emit_insn(Insn::Goto { - target_pc: condition_metadata.jump_target_when_true, - }); - } } - if !condition_metadata.jump_if_condition_is_true { - program.preassign_label_to_next_insn(jump_target_when_true); + if check_null_reg != 0 { + program.emit_insn(Insn::IsNull { + reg: check_null_reg, + target_pc: dest_if_null, + }); + program.emit_insn(Insn::Goto { + target_pc: dest_if_false, + }); } + program.resolve_label(label_ok, program.offset()); + // todo: deallocate check_null_reg Ok(()) } @@ -403,13 +401,20 @@ pub fn translate_condition_expr( emit_cond_jump(program, condition_metadata, reg); } ast::Expr::InList { lhs, not, rhs } => { + let ConditionMetadata { + jump_target_when_true, + jump_target_when_false, + .. + } = condition_metadata; + // fix me translate_in_list( program, Some(referenced_tables), lhs, rhs, - *not, - condition_metadata, + jump_target_when_false, + // should be null!!! + jump_target_when_true, resolver, )?; } @@ -2029,14 +2034,13 @@ pub fn translate_expr( // but wrap it with appropriate expression context handling let result_reg = target_register; - // Set result to NULL initially (matches SQLite behavior) program.emit_insn(Insn::Null { dest: result_reg, dest_end: None, }); let dest_if_false = program.allocate_label(); - let label_integer_conversion = program.allocate_label(); + let dest_if_null = program.allocate_label(); // Call the core InList logic with expression-appropriate condition metadata translate_in_list( @@ -2044,12 +2048,8 @@ pub fn translate_expr( referenced_tables, lhs, rhs, - *not, - ConditionMetadata { - jump_if_condition_is_true: false, - jump_target_when_true: label_integer_conversion, // will be resolved below - jump_target_when_false: dest_if_false, - }, + dest_if_false, + dest_if_null, resolver, )?; @@ -2058,25 +2058,20 @@ pub fn translate_expr( value: 1, dest: result_reg, }); - program.emit_insn(Insn::Goto { - target_pc: label_integer_conversion, - }); - // False path: set result to 0 program.resolve_label(dest_if_false, program.offset()); - program.emit_insn(Insn::Integer { - value: 0, - dest: result_reg, - }); - - program.resolve_label(label_integer_conversion, program.offset()); - // Force integer conversion with AddImm 0 program.emit_insn(Insn::AddImm { register: result_reg, value: 0, }); - + if *not { + program.emit_insn(Insn::Not { + reg: result_reg, + dest: result_reg, + }); + } + program.resolve_label(dest_if_null, program.offset()); Ok(result_reg) } ast::Expr::InSelect { .. } => { From 52ed0f7997b56c8ae40e5938221ddd9db473faa3 Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Mon, 6 Oct 2025 21:34:54 -0300 Subject: [PATCH 2/8] Add in expr optimization at the parser level instead of translation. lhs IN () and lhs NOT IN () can be translated to false and true. --- core/translate/expr.rs | 9 --------- parser/src/parser.rs | 22 +++++++++++++++++----- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 359bb8cf7..ce797c75a 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -191,15 +191,6 @@ fn translate_in_list( dest_if_null: BranchOffset, resolver: &Resolver, ) -> Result<()> { - // Disclamer: SQLite does this opt during parsing (https://github.com/sqlite/sqlite/blob/833fb1ef59b1c62fb2b00c7a121a5b5171f8a85e/src/parse.y#L1425) - // But we're the cool kids so we gotta do during translation :) - if rhs.is_empty() { - program.emit_insn(Insn::Goto { - target_pc: dest_if_false, - }); - return Ok(()); - } - let lhs_reg = expr_code_vector(program, lhs); let _ = translate_expr(program, referenced_tables, lhs, lhs_reg, resolver)?; let mut check_null_reg = 0; diff --git a/parser/src/parser.rs b/parser/src/parser.rs index b4188df44..b134f9f95 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -1723,11 +1723,23 @@ impl<'a> Parser<'a> { _ => { let exprs = self.parse_expr_list()?; eat_expect!(self, TK_RP); - Box::new(Expr::InList { - lhs: result, - not, - rhs: exprs, - }) + // Expressions in the form: + // lhs IN () + // lhs NOT IN () + // can be simplified to constants 0 (false) and 1 (true), respectively. + // + // todo: should check if lhs has a function. If so, this optimization cannot + // be done. + if exprs.is_empty() { + let name = if not { "1" } else { "0" }; + Box::new(Expr::Literal(Literal::Numeric(name.into()))) + } else { + Box::new(Expr::InList { + lhs: result, + not, + rhs: exprs, + }) + } } } } From 79958f468d4b25a5a773b7afdd918787852bc524 Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Mon, 6 Oct 2025 21:49:34 -0300 Subject: [PATCH 3/8] Add jump_target_null to ConditionMetadata It's kinda make sense, conditions can be evaluated into 3 values: false, true and null. Now we handle that. --- core/translate/expr.rs | 49 +++++++++++++++++++++---------------- core/translate/group_by.rs | 2 ++ core/translate/index.rs | 2 ++ core/translate/main_loop.rs | 6 ++++- 4 files changed, 37 insertions(+), 22 deletions(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index ce797c75a..57da3c669 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -30,6 +30,7 @@ pub struct ConditionMetadata { pub jump_if_condition_is_true: bool, pub jump_target_when_true: BranchOffset, pub jump_target_when_false: BranchOffset, + pub jump_target_when_null: BranchOffset, } /// Container for register locations of values that can be referenced in RETURNING expressions @@ -174,21 +175,20 @@ fn expr_can_be_null(expr: &ast::Expr) -> bool { } } -// todo(rn): Check right affinities, fix other call to translate - /// Core implementation of IN expression logic that can be used in both conditional and expression contexts. /// This follows SQLite's approach where a single core function handles all InList cases. /// /// This is extracted from the original conditional implementation to be reusable. /// The logic exactly matches the original conditional InList implementation. +// todo: Check right affinities #[instrument(skip(program, referenced_tables, resolver), level = Level::DEBUG)] fn translate_in_list( program: &mut ProgramBuilder, referenced_tables: Option<&TableReferences>, lhs: &ast::Expr, rhs: &[Box], - dest_if_false: BranchOffset, - dest_if_null: BranchOffset, + condition_metadata: ConditionMetadata, + // dest if null should be in ConditionMetadata resolver: &Resolver, ) -> Result<()> { let lhs_reg = expr_code_vector(program, lhs); @@ -196,7 +196,7 @@ fn translate_in_list( let mut check_null_reg = 0; let label_ok = program.allocate_label(); - if dest_if_false != dest_if_null { + if condition_metadata.jump_target_when_false != condition_metadata.jump_target_when_null { check_null_reg = program.alloc_register(); program.emit_insn(Insn::BitAnd { lhs: lhs_reg, @@ -218,7 +218,9 @@ fn translate_in_list( }); } - if !last_condition || dest_if_false != dest_if_null { + if !last_condition + || condition_metadata.jump_target_when_false != condition_metadata.jump_target_when_null + { if lhs_reg != rhs_reg { program.emit_insn(Insn::Eq { lhs: lhs_reg, @@ -240,14 +242,14 @@ fn translate_in_list( program.emit_insn(Insn::Ne { lhs: lhs_reg, rhs: rhs_reg, - target_pc: dest_if_false, + target_pc: condition_metadata.jump_target_when_false, flags: CmpInsFlags::default(), collation: program.curr_collation(), }); } else { program.emit_insn(Insn::IsNull { reg: lhs_reg, - target_pc: dest_if_false, + target_pc: condition_metadata.jump_target_when_false, }); } } @@ -256,10 +258,17 @@ fn translate_in_list( if check_null_reg != 0 { program.emit_insn(Insn::IsNull { reg: check_null_reg, - target_pc: dest_if_null, + target_pc: condition_metadata.jump_target_when_null, }); program.emit_insn(Insn::Goto { - target_pc: dest_if_false, + target_pc: condition_metadata.jump_target_when_false, + }); + } + + // by default if IN expression is true we just continue to the next instruction + if condition_metadata.jump_if_condition_is_true { + program.emit_insn(Insn::Goto { + target_pc: condition_metadata.jump_target_when_true, }); } program.resolve_label(label_ok, program.offset()); @@ -392,20 +401,12 @@ pub fn translate_condition_expr( emit_cond_jump(program, condition_metadata, reg); } ast::Expr::InList { lhs, not, rhs } => { - let ConditionMetadata { - jump_target_when_true, - jump_target_when_false, - .. - } = condition_metadata; - // fix me translate_in_list( program, Some(referenced_tables), lhs, rhs, - jump_target_when_false, - // should be null!!! - jump_target_when_true, + condition_metadata, resolver, )?; } @@ -2032,6 +2033,8 @@ pub fn translate_expr( let dest_if_false = program.allocate_label(); let dest_if_null = program.allocate_label(); + // won't use this label :/ + let dest_if_true = program.allocate_label(); // Call the core InList logic with expression-appropriate condition metadata translate_in_list( @@ -2039,8 +2042,12 @@ pub fn translate_expr( referenced_tables, lhs, rhs, - dest_if_false, - dest_if_null, + ConditionMetadata { + jump_if_condition_is_true: false, + jump_target_when_true: dest_if_true, + jump_target_when_false: dest_if_false, + jump_target_when_null: dest_if_null, + }, resolver, )?; diff --git a/core/translate/group_by.rs b/core/translate/group_by.rs index 94d8dee03..6f891ae2b 100644 --- a/core/translate/group_by.rs +++ b/core/translate/group_by.rs @@ -778,6 +778,7 @@ pub fn group_by_emit_row_phase<'a>( if let Some(having) = &group_by.having { for expr in having.iter() { let if_true_target = program.allocate_label(); + let if_null_target = program.allocate_label(); translate_condition_expr( program, &plan.table_references, @@ -786,6 +787,7 @@ pub fn group_by_emit_row_phase<'a>( jump_if_condition_is_true: false, jump_target_when_false: labels.label_group_by_end_without_emitting_row, jump_target_when_true: if_true_target, + jump_target_when_null: if_null_target, }, &t_ctx.resolver, )?; diff --git a/core/translate/index.rs b/core/translate/index.rs index 8aced50d9..ced75fdf7 100644 --- a/core/translate/index.rs +++ b/core/translate/index.rs @@ -237,6 +237,7 @@ pub fn translate_create_index( let mut skip_row_label = None; if let Some(where_clause) = where_clause { let label = program.allocate_label(); + let null_label = program.allocate_label(); translate_condition_expr( &mut program, &table_references, @@ -245,6 +246,7 @@ pub fn translate_create_index( jump_if_condition_is_true: false, jump_target_when_false: label, jump_target_when_true: BranchOffset::Placeholder, + jump_target_when_null: null_label, }, resolver, )?; diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 03e00f01d..a93390d11 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -384,10 +384,12 @@ pub fn init_loop( .filter(|c| c.should_eval_before_loop(&[JoinOrderMember::default()])) { let jump_target = program.allocate_label(); + let null_jump_target = program.allocate_label(); let meta = ConditionMetadata { jump_if_condition_is_true: false, jump_target_when_true: jump_target, jump_target_when_false: t_ctx.label_main_loop_end.unwrap(), + jump_target_when_null: null_jump_target, }; translate_condition_expr(program, tables, &cond.expr, meta, &t_ctx.resolver)?; program.preassign_label_to_next_insn(jump_target); @@ -709,10 +711,12 @@ fn emit_conditions( .filter(|cond| cond.should_eval_at_loop(join_index, join_order)) { let jump_target_when_true = program.allocate_label(); + let jump_target_when_null = program.allocate_label(); let condition_metadata = ConditionMetadata { jump_if_condition_is_true: false, jump_target_when_true, jump_target_when_false: next, + jump_target_when_null, }; translate_condition_expr( program, @@ -727,7 +731,7 @@ fn emit_conditions( Ok(()) } -/// SQLite (and so Limbo) processes joins as a nested loop. +/// SQLite (and so Turso) processes joins as a nested loop. /// The loop may emit rows to various destinations depending on the query: /// - a GROUP BY sorter (grouping is done by sorting based on the GROUP BY keys and aggregating while the GROUP BY keys match) /// - a GROUP BY phase with no sorting (when the rows are already in the order required by the GROUP BY keys) From da323fa0c4fe8675c6e20fe5092ce84b81d321e7 Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Tue, 7 Oct 2025 02:34:33 -0300 Subject: [PATCH 4/8] Some clean ups and correctly working on WHERE clauses --- core/translate/expr.rs | 89 ++++++++++++++++++++++++++++--------- core/translate/group_by.rs | 4 +- core/translate/index.rs | 3 +- core/translate/main_loop.rs | 6 +-- parser/src/parser.rs | 2 +- testing/select.test | 13 ++++++ 6 files changed, 87 insertions(+), 30 deletions(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 57da3c669..2ab33a85a 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -165,12 +165,10 @@ fn expr_code_vector(program: &mut ProgramBuilder, expr: &ast::Expr) -> usize { fn expr_can_be_null(expr: &ast::Expr) -> bool { // todo: better handling columns. Check sqlite3ExprCanBeNull match expr { - Expr::Literal(literal) => match literal { - ast::Literal::Numeric(_) => false, - ast::Literal::String(_) => false, - ast::Literal::Blob(_) => false, - _ => true, - }, + Expr::Literal(literal) => !matches!( + literal, + ast::Literal::Numeric(_) | ast::Literal::String(_) | ast::Literal::Blob(_) + ), _ => true, } } @@ -180,6 +178,30 @@ fn expr_can_be_null(expr: &ast::Expr) -> bool { /// /// This is extracted from the original conditional implementation to be reusable. /// The logic exactly matches the original conditional InList implementation. +/// +/// An IN expression has one of the following formats: +/// ```sql +/// x IN (y1, y2,...,yN) +/// x IN (subquery) (Not yet implemented) +/// ``` +/// The result of an IN operator is one of TRUE, FALSE, or NULL. A NULL result +/// means that it cannot be determined if the LHS is contained in the RHS due +/// to the presence of NULL values. +/// +/// Currently, we do a simple full-scan, yet it's not ideal when there are many rows +/// on RHS. (Check sqlite's in-operator.md) +/// +/// Algorithm: +/// 1. Set the null-flag to false +/// 2. For each row in the RHS: +/// - Compare LHS and RHS +/// - If LHS matches RHS, returns TRUE +/// - If the comparison results in NULL, set the null-flag to true +/// 3. If the null-flag is true, return NULL +/// 4. Return FALSE +/// +/// A "NOT IN" operator is computed by first computing the equivalent IN +/// operator, then interchanging the TRUE and FALSE results. // todo: Check right affinities #[instrument(skip(program, referenced_tables, resolver), level = Level::DEBUG)] fn translate_in_list( @@ -237,21 +259,19 @@ fn translate_in_list( }); } // sqlite3VdbeChangeP5(v, zAff[0]); + } else if lhs_reg != rhs_reg { + program.emit_insn(Insn::Ne { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: condition_metadata.jump_target_when_false, + flags: CmpInsFlags::default(), + collation: program.curr_collation(), + }); } else { - if lhs_reg != rhs_reg { - program.emit_insn(Insn::Ne { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: condition_metadata.jump_target_when_false, - flags: CmpInsFlags::default(), - collation: program.curr_collation(), - }); - } else { - program.emit_insn(Insn::IsNull { - reg: lhs_reg, - target_pc: condition_metadata.jump_target_when_false, - }); - } + program.emit_insn(Insn::IsNull { + reg: lhs_reg, + target_pc: condition_metadata.jump_target_when_false, + }); } } @@ -400,15 +420,42 @@ pub fn translate_condition_expr( translate_expr(program, Some(referenced_tables), expr, reg, resolver)?; emit_cond_jump(program, condition_metadata, reg); } + ast::Expr::InList { lhs, not, rhs } => { + let ConditionMetadata { + jump_if_condition_is_true, + jump_target_when_true, + jump_target_when_false, + jump_target_when_null, + } = condition_metadata; + + // Adjust targets if `NOT IN` + let adjusted_metadata = if *not { + ConditionMetadata { + jump_if_condition_is_true, + jump_target_when_true, + jump_target_when_false: jump_target_when_true, + jump_target_when_null, + } + } else { + condition_metadata + }; + translate_in_list( program, Some(referenced_tables), lhs, rhs, - condition_metadata, + adjusted_metadata, resolver, )?; + + if *not { + program.emit_insn(Insn::Goto { + target_pc: jump_target_when_false, + }); + program.resolve_label(adjusted_metadata.jump_target_when_false, program.offset()); + } } ast::Expr::Like { not, .. } => { let cur_reg = program.alloc_register(); diff --git a/core/translate/group_by.rs b/core/translate/group_by.rs index 6f891ae2b..19f91c5bc 100644 --- a/core/translate/group_by.rs +++ b/core/translate/group_by.rs @@ -778,7 +778,6 @@ pub fn group_by_emit_row_phase<'a>( if let Some(having) = &group_by.having { for expr in having.iter() { let if_true_target = program.allocate_label(); - let if_null_target = program.allocate_label(); translate_condition_expr( program, &plan.table_references, @@ -787,7 +786,8 @@ pub fn group_by_emit_row_phase<'a>( jump_if_condition_is_true: false, jump_target_when_false: labels.label_group_by_end_without_emitting_row, jump_target_when_true: if_true_target, - jump_target_when_null: if_null_target, + // treat null result has false for now + jump_target_when_null: labels.label_group_by_end_without_emitting_row, }, &t_ctx.resolver, )?; diff --git a/core/translate/index.rs b/core/translate/index.rs index ced75fdf7..5fa07dc0d 100644 --- a/core/translate/index.rs +++ b/core/translate/index.rs @@ -237,7 +237,6 @@ pub fn translate_create_index( let mut skip_row_label = None; if let Some(where_clause) = where_clause { let label = program.allocate_label(); - let null_label = program.allocate_label(); translate_condition_expr( &mut program, &table_references, @@ -246,7 +245,7 @@ pub fn translate_create_index( jump_if_condition_is_true: false, jump_target_when_false: label, jump_target_when_true: BranchOffset::Placeholder, - jump_target_when_null: null_label, + jump_target_when_null: label, }, resolver, )?; diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index a93390d11..1f9d0a069 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -384,12 +384,11 @@ pub fn init_loop( .filter(|c| c.should_eval_before_loop(&[JoinOrderMember::default()])) { let jump_target = program.allocate_label(); - let null_jump_target = program.allocate_label(); let meta = ConditionMetadata { jump_if_condition_is_true: false, jump_target_when_true: jump_target, jump_target_when_false: t_ctx.label_main_loop_end.unwrap(), - jump_target_when_null: null_jump_target, + jump_target_when_null: t_ctx.label_main_loop_end.unwrap(), }; translate_condition_expr(program, tables, &cond.expr, meta, &t_ctx.resolver)?; program.preassign_label_to_next_insn(jump_target); @@ -711,12 +710,11 @@ fn emit_conditions( .filter(|cond| cond.should_eval_at_loop(join_index, join_order)) { let jump_target_when_true = program.allocate_label(); - let jump_target_when_null = program.allocate_label(); let condition_metadata = ConditionMetadata { jump_if_condition_is_true: false, jump_target_when_true, jump_target_when_false: next, - jump_target_when_null, + jump_target_when_null: next, }; translate_condition_expr( program, diff --git a/parser/src/parser.rs b/parser/src/parser.rs index b134f9f95..bbab464ff 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -1736,8 +1736,8 @@ impl<'a> Parser<'a> { } else { Box::new(Expr::InList { lhs: result, - not, rhs: exprs, + not, }) } } diff --git a/testing/select.test b/testing/select.test index f70d0ada7..5dff582f5 100755 --- a/testing/select.test +++ b/testing/select.test @@ -913,6 +913,19 @@ do_execsql_test_on_specific_db {:memory:} select-in-simple { } {1 0} +do_execsql_test_on_specific_db {:memory:} select-in-with-nulls { + SELECT 4 IN (1, 4, null); + SELECT 4 NOT IN (1, 4, null); +} {1 +0} + +# All should be null +do_execsql_test_on_specific_db {:memory:} select-in-with-nulls-2 { +SELECT 1 IN (2, 3, null); +SELECT 1 NOT IN (2, 3, null); +SELECT null in (null); +} {\n\n} + do_execsql_test_on_specific_db {:memory:} select-in-complex { CREATE TABLE test_table (id INTEGER, category TEXT, value INTEGER); INSERT INTO test_table VALUES (1, 'A', 10), (2, 'B', 20), (3, 'A', 30), (4, 'C', 40); From 625403cc2a307f855b80d3ff0b1c29d5012276be Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Tue, 7 Oct 2025 16:54:50 -0300 Subject: [PATCH 5/8] Fix register reuse when called inside a coroutine - On each interaction we assume that the value is NULL, so we need to set it like so for every interaction in the list. So we force to not emit this NULL as constant; - Forces a copy so IN expressions works inside an aggregation expression. Not ideal but it works, we should work more on the query planner for sure. --- core/translate/expr.rs | 42 +++++++++++++++++++++++++++++++++--------- core/vdbe/insn.rs | 4 +++- 2 files changed, 36 insertions(+), 10 deletions(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 2ab33a85a..99c0ad4bb 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -43,6 +43,13 @@ pub struct ReturningValueRegisters { pub num_columns: usize, } +/// Emit an instruction that is guaranteed not to be in any constant span. +/// This ensures the instruction won't be hoisted when emit_constant_insns is called. +fn emit_no_constant_insn(program: &mut ProgramBuilder, insn: Insn) { + program.constant_span_end_all(); + program.emit_insn(insn); +} + #[instrument(skip_all, level = Level::DEBUG)] fn emit_cond_jump(program: &mut ProgramBuilder, cond_meta: ConditionMetadata, reg: usize) { if cond_meta.jump_if_condition_is_true { @@ -2073,16 +2080,28 @@ pub fn translate_expr( // but wrap it with appropriate expression context handling let result_reg = target_register; - program.emit_insn(Insn::Null { - dest: result_reg, - dest_end: None, - }); - let dest_if_false = program.allocate_label(); let dest_if_null = program.allocate_label(); // won't use this label :/ let dest_if_true = program.allocate_label(); + let tmp = program.alloc_register(); + emit_no_constant_insn( + program, + Insn::Null { + dest: tmp, + dest_end: None, + }, + ); + translate_expr_no_constant_opt( + program, + referenced_tables, + &ast::Expr::Literal(ast::Literal::Null), + tmp, + resolver, + NoConstantOptReason::RegisterReuse, + )?; + // Call the core InList logic with expression-appropriate condition metadata translate_in_list( program, @@ -2101,22 +2120,27 @@ pub fn translate_expr( // condition true: set result to 1 program.emit_insn(Insn::Integer { value: 1, - dest: result_reg, + dest: tmp, }); // False path: set result to 0 program.resolve_label(dest_if_false, program.offset()); // Force integer conversion with AddImm 0 program.emit_insn(Insn::AddImm { - register: result_reg, + register: tmp, value: 0, }); if *not { program.emit_insn(Insn::Not { - reg: result_reg, - dest: result_reg, + reg: tmp, + dest: tmp, }); } program.resolve_label(dest_if_null, program.offset()); + program.emit_insn(Insn::Copy { + src_reg: tmp, + dst_reg: result_reg, + extra_amount: 0, + }); Ok(result_reg) } ast::Expr::InSelect { .. } => { diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 9a5bab21c..64bae3947 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -853,10 +853,12 @@ pub enum Insn { db: usize, }, + /// Make a copy of register src..src+extra_amount into dst..dst+extra_amount. Copy { src_reg: usize, dst_reg: usize, - extra_amount: usize, // 0 extra_amount means we include src_reg, dst_reg..=dst_reg+amount = src_reg..=src_reg+amount + /// 0 extra_amount means we include src_reg, dst_reg..=dst_reg+amount = src_reg..=src_reg+amount + extra_amount: usize, }, /// Allocate a new b-tree. From 84e8d117648e0e00f90a5427a641f6ea8da0d3c5 Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Wed, 8 Oct 2025 14:42:55 -0300 Subject: [PATCH 6/8] Fix bug when jump_if_true is enabled --- core/translate/expr.rs | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 99c0ad4bb..f1ec8ada0 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -292,13 +292,14 @@ fn translate_in_list( }); } + program.resolve_label(label_ok, program.offset()); + // by default if IN expression is true we just continue to the next instruction if condition_metadata.jump_if_condition_is_true { program.emit_insn(Insn::Goto { target_pc: condition_metadata.jump_target_when_true, }); } - program.resolve_label(label_ok, program.offset()); // todo: deallocate check_null_reg Ok(()) @@ -437,15 +438,21 @@ pub fn translate_condition_expr( } = condition_metadata; // Adjust targets if `NOT IN` - let adjusted_metadata = if *not { - ConditionMetadata { - jump_if_condition_is_true, - jump_target_when_true, - jump_target_when_false: jump_target_when_true, - jump_target_when_null, - } + let (adjusted_metadata, not_true_label, not_false_label) = if *not { + let not_true_label = program.allocate_label(); + let not_false_label = program.allocate_label(); + ( + ConditionMetadata { + jump_if_condition_is_true, + jump_target_when_true: not_true_label, + jump_target_when_false: not_false_label, + jump_target_when_null, + }, + Some(not_true_label), + Some(not_false_label), + ) } else { - condition_metadata + (condition_metadata, None, None) }; translate_in_list( @@ -458,10 +465,17 @@ pub fn translate_condition_expr( )?; if *not { + // When IN is TRUE (match found), NOT IN should be FALSE + program.resolve_label(not_true_label.unwrap(), program.offset()); program.emit_insn(Insn::Goto { target_pc: jump_target_when_false, }); - program.resolve_label(adjusted_metadata.jump_target_when_false, program.offset()); + + // When IN is FALSE (no match), NOT IN should be TRUE + program.resolve_label(not_false_label.unwrap(), program.offset()); + program.emit_insn(Insn::Goto { + target_pc: jump_target_when_true, + }); } } ast::Expr::Like { not, .. } => { From b8f8a870071137345551403275c772c0f4b07414 Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Wed, 8 Oct 2025 15:37:49 -0300 Subject: [PATCH 7/8] Refactor bytecode emission - we were redundantly translating tmp - Make emit_constant_insn a method of ProgramBuilder --- core/translate/expr.rs | 26 ++++---------------------- core/vdbe/builder.rs | 8 ++++++++ 2 files changed, 12 insertions(+), 22 deletions(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index f1ec8ada0..622cb7a25 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -43,13 +43,6 @@ pub struct ReturningValueRegisters { pub num_columns: usize, } -/// Emit an instruction that is guaranteed not to be in any constant span. -/// This ensures the instruction won't be hoisted when emit_constant_insns is called. -fn emit_no_constant_insn(program: &mut ProgramBuilder, insn: Insn) { - program.constant_span_end_all(); - program.emit_insn(insn); -} - #[instrument(skip_all, level = Level::DEBUG)] fn emit_cond_jump(program: &mut ProgramBuilder, cond_meta: ConditionMetadata, reg: usize) { if cond_meta.jump_if_condition_is_true { @@ -2100,21 +2093,10 @@ pub fn translate_expr( let dest_if_true = program.allocate_label(); let tmp = program.alloc_register(); - emit_no_constant_insn( - program, - Insn::Null { - dest: tmp, - dest_end: None, - }, - ); - translate_expr_no_constant_opt( - program, - referenced_tables, - &ast::Expr::Literal(ast::Literal::Null), - tmp, - resolver, - NoConstantOptReason::RegisterReuse, - )?; + program.emit_no_constant_insn(Insn::Null { + dest: tmp, + dest_end: None, + }); // Call the core InList logic with expression-appropriate condition metadata translate_in_list( diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index 70e98eb00..82710377e 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -341,6 +341,14 @@ impl ProgramBuilder { self.insns.push((insn, self.insns.len())); } + /// Emit an instruction that is guaranteed not to be in any constant span. + /// This ensures the instruction won't be hoisted when emit_constant_insns is called. + #[instrument(skip(self), level = Level::DEBUG)] + pub fn emit_no_constant_insn(&mut self, insn: Insn) { + self.constant_span_end_all(); + self.emit_insn(insn); + } + pub fn close_cursors(&mut self, cursors: &[CursorID]) { for cursor in cursors { self.emit_insn(Insn::Close { cursor_id: *cursor }); From d2d265a06f2ce1c2b948d128235a5f2ec8e5b6f3 Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Thu, 9 Oct 2025 12:14:20 -0300 Subject: [PATCH 8/8] Small nits and code clean ups --- core/translate/expr.rs | 39 ++++++++++++--------------------------- parser/src/ast.rs | 11 +++++++++++ 2 files changed, 23 insertions(+), 27 deletions(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 622cb7a25..7d5f60bc0 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -150,29 +150,6 @@ macro_rules! expect_arguments_even { }}; } -fn expr_vec_size(expr: &ast::Expr) -> usize { - match expr { - Expr::Parenthesized(v) => v.len(), - _ => 1, - } -} - -fn expr_code_vector(program: &mut ProgramBuilder, expr: &ast::Expr) -> usize { - let size = expr_vec_size(expr); - program.alloc_registers(size) -} - -fn expr_can_be_null(expr: &ast::Expr) -> bool { - // todo: better handling columns. Check sqlite3ExprCanBeNull - match expr { - Expr::Literal(literal) => !matches!( - literal, - ast::Literal::Numeric(_) | ast::Literal::String(_) | ast::Literal::Blob(_) - ), - _ => true, - } -} - /// Core implementation of IN expression logic that can be used in both conditional and expression contexts. /// This follows SQLite's approach where a single core function handles all InList cases. /// @@ -213,7 +190,11 @@ fn translate_in_list( // dest if null should be in ConditionMetadata resolver: &Resolver, ) -> Result<()> { - let lhs_reg = expr_code_vector(program, lhs); + let lhs_reg = if let Expr::Parenthesized(v) = lhs { + program.alloc_registers(v.len()) + } else { + program.alloc_register() + }; let _ = translate_expr(program, referenced_tables, lhs, lhs_reg, resolver)?; let mut check_null_reg = 0; let label_ok = program.allocate_label(); @@ -232,7 +213,7 @@ fn translate_in_list( let rhs_reg = program.alloc_register(); let _ = translate_expr(program, referenced_tables, expr, rhs_reg, resolver)?; - if check_null_reg != 0 && expr_can_be_null(expr) { + if check_null_reg != 0 && expr.can_be_null() { program.emit_insn(Insn::BitAnd { lhs: check_null_reg, rhs: rhs_reg, @@ -2089,16 +2070,17 @@ pub fn translate_expr( let dest_if_false = program.allocate_label(); let dest_if_null = program.allocate_label(); - // won't use this label :/ let dest_if_true = program.allocate_label(); + // Ideally we wouldn't need a tmp register, but currently if an IN expression + // is used inside an aggregator the target_register is cleared on every iteration, + // losing the state of the aggregator. let tmp = program.alloc_register(); program.emit_no_constant_insn(Insn::Null { dest: tmp, dest_end: None, }); - // Call the core InList logic with expression-appropriate condition metadata translate_in_list( program, referenced_tables, @@ -2118,13 +2100,16 @@ pub fn translate_expr( value: 1, dest: tmp, }); + // False path: set result to 0 program.resolve_label(dest_if_false, program.offset()); + // Force integer conversion with AddImm 0 program.emit_insn(Insn::AddImm { register: tmp, value: 0, }); + if *not { program.emit_insn(Insn::Not { reg: tmp, diff --git a/parser/src/ast.rs b/parser/src/ast.rs index dae656cc4..9fa714eef 100644 --- a/parser/src/ast.rs +++ b/parser/src/ast.rs @@ -542,6 +542,17 @@ impl Expr { pub fn raise(resolve_type: ResolveType, expr: Option) -> Expr { Expr::Raise(resolve_type, expr.map(Box::new)) } + + pub fn can_be_null(&self) -> bool { + // todo: better handling columns. Check sqlite3ExprCanBeNull + match self { + Expr::Literal(literal) => !matches!( + literal, + Literal::Numeric(_) | Literal::String(_) | Literal::Blob(_) + ), + _ => true, + } + } } /// SQL literal