From da323fa0c4fe8675c6e20fe5092ce84b81d321e7 Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Tue, 7 Oct 2025 02:34:33 -0300 Subject: [PATCH] Some clean ups and correctly working on WHERE clauses --- core/translate/expr.rs | 89 ++++++++++++++++++++++++++++--------- core/translate/group_by.rs | 4 +- core/translate/index.rs | 3 +- core/translate/main_loop.rs | 6 +-- parser/src/parser.rs | 2 +- testing/select.test | 13 ++++++ 6 files changed, 87 insertions(+), 30 deletions(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 57da3c669..2ab33a85a 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -165,12 +165,10 @@ fn expr_code_vector(program: &mut ProgramBuilder, expr: &ast::Expr) -> usize { fn expr_can_be_null(expr: &ast::Expr) -> bool { // todo: better handling columns. Check sqlite3ExprCanBeNull match expr { - Expr::Literal(literal) => match literal { - ast::Literal::Numeric(_) => false, - ast::Literal::String(_) => false, - ast::Literal::Blob(_) => false, - _ => true, - }, + Expr::Literal(literal) => !matches!( + literal, + ast::Literal::Numeric(_) | ast::Literal::String(_) | ast::Literal::Blob(_) + ), _ => true, } } @@ -180,6 +178,30 @@ fn expr_can_be_null(expr: &ast::Expr) -> bool { /// /// This is extracted from the original conditional implementation to be reusable. /// The logic exactly matches the original conditional InList implementation. +/// +/// An IN expression has one of the following formats: +/// ```sql +/// x IN (y1, y2,...,yN) +/// x IN (subquery) (Not yet implemented) +/// ``` +/// The result of an IN operator is one of TRUE, FALSE, or NULL. A NULL result +/// means that it cannot be determined if the LHS is contained in the RHS due +/// to the presence of NULL values. +/// +/// Currently, we do a simple full-scan, yet it's not ideal when there are many rows +/// on RHS. (Check sqlite's in-operator.md) +/// +/// Algorithm: +/// 1. Set the null-flag to false +/// 2. For each row in the RHS: +/// - Compare LHS and RHS +/// - If LHS matches RHS, returns TRUE +/// - If the comparison results in NULL, set the null-flag to true +/// 3. If the null-flag is true, return NULL +/// 4. Return FALSE +/// +/// A "NOT IN" operator is computed by first computing the equivalent IN +/// operator, then interchanging the TRUE and FALSE results. // todo: Check right affinities #[instrument(skip(program, referenced_tables, resolver), level = Level::DEBUG)] fn translate_in_list( @@ -237,21 +259,19 @@ fn translate_in_list( }); } // sqlite3VdbeChangeP5(v, zAff[0]); + } else if lhs_reg != rhs_reg { + program.emit_insn(Insn::Ne { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: condition_metadata.jump_target_when_false, + flags: CmpInsFlags::default(), + collation: program.curr_collation(), + }); } else { - if lhs_reg != rhs_reg { - program.emit_insn(Insn::Ne { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: condition_metadata.jump_target_when_false, - flags: CmpInsFlags::default(), - collation: program.curr_collation(), - }); - } else { - program.emit_insn(Insn::IsNull { - reg: lhs_reg, - target_pc: condition_metadata.jump_target_when_false, - }); - } + program.emit_insn(Insn::IsNull { + reg: lhs_reg, + target_pc: condition_metadata.jump_target_when_false, + }); } } @@ -400,15 +420,42 @@ pub fn translate_condition_expr( translate_expr(program, Some(referenced_tables), expr, reg, resolver)?; emit_cond_jump(program, condition_metadata, reg); } + ast::Expr::InList { lhs, not, rhs } => { + let ConditionMetadata { + jump_if_condition_is_true, + jump_target_when_true, + jump_target_when_false, + jump_target_when_null, + } = condition_metadata; + + // Adjust targets if `NOT IN` + let adjusted_metadata = if *not { + ConditionMetadata { + jump_if_condition_is_true, + jump_target_when_true, + jump_target_when_false: jump_target_when_true, + jump_target_when_null, + } + } else { + condition_metadata + }; + translate_in_list( program, Some(referenced_tables), lhs, rhs, - condition_metadata, + adjusted_metadata, resolver, )?; + + if *not { + program.emit_insn(Insn::Goto { + target_pc: jump_target_when_false, + }); + program.resolve_label(adjusted_metadata.jump_target_when_false, program.offset()); + } } ast::Expr::Like { not, .. } => { let cur_reg = program.alloc_register(); diff --git a/core/translate/group_by.rs b/core/translate/group_by.rs index 6f891ae2b..19f91c5bc 100644 --- a/core/translate/group_by.rs +++ b/core/translate/group_by.rs @@ -778,7 +778,6 @@ pub fn group_by_emit_row_phase<'a>( if let Some(having) = &group_by.having { for expr in having.iter() { let if_true_target = program.allocate_label(); - let if_null_target = program.allocate_label(); translate_condition_expr( program, &plan.table_references, @@ -787,7 +786,8 @@ pub fn group_by_emit_row_phase<'a>( jump_if_condition_is_true: false, jump_target_when_false: labels.label_group_by_end_without_emitting_row, jump_target_when_true: if_true_target, - jump_target_when_null: if_null_target, + // treat null result has false for now + jump_target_when_null: labels.label_group_by_end_without_emitting_row, }, &t_ctx.resolver, )?; diff --git a/core/translate/index.rs b/core/translate/index.rs index ced75fdf7..5fa07dc0d 100644 --- a/core/translate/index.rs +++ b/core/translate/index.rs @@ -237,7 +237,6 @@ pub fn translate_create_index( let mut skip_row_label = None; if let Some(where_clause) = where_clause { let label = program.allocate_label(); - let null_label = program.allocate_label(); translate_condition_expr( &mut program, &table_references, @@ -246,7 +245,7 @@ pub fn translate_create_index( jump_if_condition_is_true: false, jump_target_when_false: label, jump_target_when_true: BranchOffset::Placeholder, - jump_target_when_null: null_label, + jump_target_when_null: label, }, resolver, )?; diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index a93390d11..1f9d0a069 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -384,12 +384,11 @@ pub fn init_loop( .filter(|c| c.should_eval_before_loop(&[JoinOrderMember::default()])) { let jump_target = program.allocate_label(); - let null_jump_target = program.allocate_label(); let meta = ConditionMetadata { jump_if_condition_is_true: false, jump_target_when_true: jump_target, jump_target_when_false: t_ctx.label_main_loop_end.unwrap(), - jump_target_when_null: null_jump_target, + jump_target_when_null: t_ctx.label_main_loop_end.unwrap(), }; translate_condition_expr(program, tables, &cond.expr, meta, &t_ctx.resolver)?; program.preassign_label_to_next_insn(jump_target); @@ -711,12 +710,11 @@ fn emit_conditions( .filter(|cond| cond.should_eval_at_loop(join_index, join_order)) { let jump_target_when_true = program.allocate_label(); - let jump_target_when_null = program.allocate_label(); let condition_metadata = ConditionMetadata { jump_if_condition_is_true: false, jump_target_when_true, jump_target_when_false: next, - jump_target_when_null, + jump_target_when_null: next, }; translate_condition_expr( program, diff --git a/parser/src/parser.rs b/parser/src/parser.rs index b134f9f95..bbab464ff 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -1736,8 +1736,8 @@ impl<'a> Parser<'a> { } else { Box::new(Expr::InList { lhs: result, - not, rhs: exprs, + not, }) } } diff --git a/testing/select.test b/testing/select.test index f70d0ada7..5dff582f5 100755 --- a/testing/select.test +++ b/testing/select.test @@ -913,6 +913,19 @@ do_execsql_test_on_specific_db {:memory:} select-in-simple { } {1 0} +do_execsql_test_on_specific_db {:memory:} select-in-with-nulls { + SELECT 4 IN (1, 4, null); + SELECT 4 NOT IN (1, 4, null); +} {1 +0} + +# All should be null +do_execsql_test_on_specific_db {:memory:} select-in-with-nulls-2 { +SELECT 1 IN (2, 3, null); +SELECT 1 NOT IN (2, 3, null); +SELECT null in (null); +} {\n\n} + do_execsql_test_on_specific_db {:memory:} select-in-complex { CREATE TABLE test_table (id INTEGER, category TEXT, value INTEGER); INSERT INTO test_table VALUES (1, 'A', 10), (2, 'B', 20), (3, 'A', 30), (4, 'C', 40);