diff --git a/core/translate/where_clause.rs b/core/translate/where_clause.rs index 6c16a78bb..539f9e6cd 100644 --- a/core/translate/where_clause.rs +++ b/core/translate/where_clause.rs @@ -148,9 +148,23 @@ pub fn translate_where( program: &mut ProgramBuilder, ) -> Result> { if let Some(w) = &select.where_clause { - let label = program.allocate_label(); - translate_condition_expr(program, select, w, label, false, None)?; - Ok(Some(label)) + let failure_label = program.allocate_label(); + let success_label = program.allocate_label(); + translate_condition_expr( + program, + select, + w, + None, + ConditionMetadata { + on_failure_jump_label: failure_label, + on_success_jump_label: success_label, + ..Default::default() + }, + )?; + + program.resolve_label(success_label, program.offset()); + + Ok(Some(failure_label)) } else { Ok(None) } @@ -173,49 +187,71 @@ pub fn translate_processed_where<'a>( continue; } let target_jump = current_loop.next_row_label; - translate_condition_expr(program, select, &term.expr, target_jump, false, cursor_hint)?; + let success_label = program.allocate_label(); + translate_condition_expr( + program, + select, + &term.expr, + cursor_hint, + ConditionMetadata { + jump_on_success: false, + on_failure_jump_label: target_jump, + on_success_jump_label: success_label, + }, + )?; + + program.resolve_label(success_label, program.offset()); } Ok(()) } +#[derive(Default, Debug, Clone, Copy)] +struct ConditionMetadata { + jump_on_success: bool, + on_success_jump_label: BranchOffset, + on_failure_jump_label: BranchOffset, +} + fn translate_condition_expr( program: &mut ProgramBuilder, select: &Select, expr: &ast::Expr, - target_jump: BranchOffset, - jump_if_true: bool, // if true jump to target on op == true, if false invert op cursor_hint: Option, + condition_metadata: ConditionMetadata, ) -> Result<()> { match expr { ast::Expr::Between { .. } => todo!(), ast::Expr::Binary(lhs, ast::Operator::And, rhs) => { - if jump_if_true { - let label = program.allocate_label(); - let _ = translate_condition_expr(program, select, lhs, label, false, cursor_hint); - let _ = - translate_condition_expr(program, select, rhs, target_jump, true, cursor_hint); - program.resolve_label(label, program.offset()); - } else { - let _ = - translate_condition_expr(program, select, lhs, target_jump, false, cursor_hint); - let _ = - translate_condition_expr(program, select, rhs, target_jump, false, cursor_hint); - } + // In a binary AND, never jump to the success label on the first condition, because + // the second condition must also be true. + let _ = translate_condition_expr( + program, + select, + lhs, + cursor_hint, + ConditionMetadata { + jump_on_success: false, + ..condition_metadata + }, + ); + let _ = translate_condition_expr(program, select, rhs, cursor_hint, condition_metadata); } ast::Expr::Binary(lhs, ast::Operator::Or, rhs) => { - if jump_if_true { - let _ = - translate_condition_expr(program, select, lhs, target_jump, true, cursor_hint); - let _ = - translate_condition_expr(program, select, rhs, target_jump, true, cursor_hint); - } else { - let label = program.allocate_label(); - let _ = translate_condition_expr(program, select, lhs, label, true, cursor_hint); - let _ = - translate_condition_expr(program, select, rhs, target_jump, false, cursor_hint); - program.resolve_label(label, program.offset()); - } + let on_failure_jump_label = program.allocate_label(); + let _ = translate_condition_expr( + program, + select, + lhs, + cursor_hint, + ConditionMetadata { + jump_on_success: true, + on_failure_jump_label, + ..condition_metadata + }, + ); + program.resolve_label(on_failure_jump_label, program.offset()); + let _ = translate_condition_expr(program, select, rhs, cursor_hint, condition_metadata); } ast::Expr::Binary(lhs, op, rhs) => { let lhs_reg = program.alloc_register(); @@ -232,128 +268,128 @@ fn translate_condition_expr( } match op { ast::Operator::Greater => { - if jump_if_true { + if condition_metadata.jump_on_success { program.emit_insn_with_label_dependency( Insn::Gt { lhs: lhs_reg, rhs: rhs_reg, - target_pc: target_jump, + target_pc: condition_metadata.on_success_jump_label, }, - target_jump, + condition_metadata.on_success_jump_label, ) } else { program.emit_insn_with_label_dependency( Insn::Le { lhs: lhs_reg, rhs: rhs_reg, - target_pc: target_jump, + target_pc: condition_metadata.on_failure_jump_label, }, - target_jump, + condition_metadata.on_failure_jump_label, ) } } ast::Operator::GreaterEquals => { - if jump_if_true { + if condition_metadata.jump_on_success { program.emit_insn_with_label_dependency( Insn::Ge { lhs: lhs_reg, rhs: rhs_reg, - target_pc: target_jump, + target_pc: condition_metadata.on_success_jump_label, }, - target_jump, + condition_metadata.on_success_jump_label, ) } else { program.emit_insn_with_label_dependency( Insn::Lt { lhs: lhs_reg, rhs: rhs_reg, - target_pc: target_jump, + target_pc: condition_metadata.on_failure_jump_label, }, - target_jump, + condition_metadata.on_failure_jump_label, ) } } ast::Operator::Less => { - if jump_if_true { + if condition_metadata.jump_on_success { program.emit_insn_with_label_dependency( Insn::Lt { lhs: lhs_reg, rhs: rhs_reg, - target_pc: target_jump, + target_pc: condition_metadata.on_success_jump_label, }, - target_jump, + condition_metadata.on_success_jump_label, ) } else { program.emit_insn_with_label_dependency( Insn::Ge { lhs: lhs_reg, rhs: rhs_reg, - target_pc: target_jump, + target_pc: condition_metadata.on_failure_jump_label, }, - target_jump, + condition_metadata.on_failure_jump_label, ) } } ast::Operator::LessEquals => { - if jump_if_true { + if condition_metadata.jump_on_success { program.emit_insn_with_label_dependency( Insn::Le { lhs: lhs_reg, rhs: rhs_reg, - target_pc: target_jump, + target_pc: condition_metadata.on_success_jump_label, }, - target_jump, + condition_metadata.on_success_jump_label, ) } else { program.emit_insn_with_label_dependency( Insn::Gt { lhs: lhs_reg, rhs: rhs_reg, - target_pc: target_jump, + target_pc: condition_metadata.on_failure_jump_label, }, - target_jump, + condition_metadata.on_failure_jump_label, ) } } ast::Operator::Equals => { - if jump_if_true { + if condition_metadata.jump_on_success { program.emit_insn_with_label_dependency( Insn::Eq { lhs: lhs_reg, rhs: rhs_reg, - target_pc: target_jump, + target_pc: condition_metadata.on_success_jump_label, }, - target_jump, + condition_metadata.on_success_jump_label, ) } else { program.emit_insn_with_label_dependency( Insn::Ne { lhs: lhs_reg, rhs: rhs_reg, - target_pc: target_jump, + target_pc: condition_metadata.on_failure_jump_label, }, - target_jump, + condition_metadata.on_failure_jump_label, ) } } ast::Operator::NotEquals => { - if jump_if_true { + if condition_metadata.jump_on_success { program.emit_insn_with_label_dependency( Insn::Ne { lhs: lhs_reg, rhs: rhs_reg, - target_pc: target_jump, + target_pc: condition_metadata.on_success_jump_label, }, - target_jump, + condition_metadata.on_success_jump_label, ) } else { program.emit_insn_with_label_dependency( Insn::Eq { lhs: lhs_reg, rhs: rhs_reg, - target_pc: target_jump, + target_pc: condition_metadata.on_failure_jump_label, }, - target_jump, + condition_metadata.on_failure_jump_label, ) } } @@ -373,25 +409,186 @@ fn translate_condition_expr( value: int_value, dest: reg, }); - if target_jump < 0 { - program.add_label_dependency(target_jump, program.offset()); + if condition_metadata.jump_on_success { + program.emit_insn_with_label_dependency( + Insn::If { + reg, + target_pc: condition_metadata.on_success_jump_label, + null_reg: reg, + }, + condition_metadata.on_success_jump_label, + ) + } else { + program.emit_insn_with_label_dependency( + Insn::IfNot { + reg, + target_pc: condition_metadata.on_failure_jump_label, + null_reg: reg, + }, + condition_metadata.on_failure_jump_label, + ) } - program.emit_insn(Insn::IfNot { - reg, - target_pc: target_jump, - null_reg: reg, - }); } else { - crate::bail_parse_error!("unsupported literal type in condition"); + crate::bail_parse_error!("Parse error: unsupported literal type in condition"); } } - _ => todo!(), + ast::Literal::String(string) => { + let reg = program.alloc_register(); + program.emit_insn(Insn::String8 { + value: string.clone(), + dest: reg, + }); + if condition_metadata.jump_on_success { + program.emit_insn_with_label_dependency( + Insn::If { + reg, + target_pc: condition_metadata.on_success_jump_label, + null_reg: reg, + }, + condition_metadata.on_success_jump_label, + ) + } else { + program.emit_insn_with_label_dependency( + Insn::IfNot { + reg, + target_pc: condition_metadata.on_failure_jump_label, + null_reg: reg, + }, + condition_metadata.on_failure_jump_label, + ) + } + } + unimpl => todo!("literal {:?} not implemented", unimpl), }, - ast::Expr::InList { - lhs: _, - not: _, - rhs: _, - } => {} + ast::Expr::InList { lhs, not, rhs } => { + // lhs is e.g. a column reference + // rhs is an Option> + // If rhs is None, it means the IN expression is always false, i.e. tbl.id IN (). + // If rhs is Some, it means the IN expression has a list of values to compare against, e.g. tbl.id IN (1, 2, 3). + // + // The IN expression is equivalent to a series of OR expressions. + // For example, `a IN (1, 2, 3)` is equivalent to `a = 1 OR a = 2 OR a = 3`. + // The NOT IN expression is equivalent to a series of AND expressions. + // For example, `a NOT IN (1, 2, 3)` is equivalent to `a != 1 AND a != 2 AND a != 3`. + // + // SQLite typically optimizes IN expressions to use a binary search on an ephemeral index if there are many values. + // For now we don't have the plumbing to do that, so we'll just emit a series of comparisons, + // which is what SQLite also does for small lists of values. + // TODO: Let's refactor this later to use a more efficient implementation conditionally based on the number of values. + + let lhs_reg = program.alloc_register(); + let _ = translate_expr(program, select, lhs, lhs_reg, cursor_hint)?; + + if rhs.is_none() { + // If rhs is None, IN expressions are always false and NOT IN expressions are always true. + if *not { + // On a trivially successful NOT IN () expression we can only jump to the success label if 'jump_on_success' is true; otherwise me must fall through. + // This is because in a more complex condition we might need to evaluate the rest of the condition. + // Note that we are already breaking up our WHERE clauses into a series of terms at "AND" boundaries, so right now we won't be running into cases where success-jumping would be incorrect, + // but once we have e.g. parenthesization and more complex conditions, not having this 'if' here would introduce a bug. + if condition_metadata.jump_on_success { + program.emit_insn_with_label_dependency( + Insn::Goto { + target_pc: condition_metadata.on_success_jump_label, + }, + condition_metadata.on_success_jump_label, + ); + } + } else { + program.emit_insn_with_label_dependency( + Insn::Goto { + target_pc: condition_metadata.on_failure_jump_label, + }, + condition_metadata.on_failure_jump_label, + ); + } + return Ok(()); + } + + let rhs = rhs.as_ref().unwrap(); + + // The difference between a local jump and an "upper level" jump is that for example in this case: + // WHERE foo IN (1,2,3) OR bar = 5, + // we can immediately jump to the success label of the ENTIRE CONDITION if foo = 1, foo = 2, or foo = 3 without evaluating the bar = 5 condition. + // This is why in Binary-OR expressions we set jump_on_success to true for the first condition. + // However, in this example: + // WHERE foo IN (1,2,3) AND bar = 5, + // we can't jump to the success label of the entire condition foo = 1, foo = 2, or foo = 3, because we still need to evaluate the bar = 5 condition later. + // This is why in that case we just jump over the rest of the IN conditions in this "local" branch which evaluates the IN condition. + let success_label = if condition_metadata.jump_on_success { + condition_metadata.on_success_jump_label + } else { + program.allocate_label() + }; + + if !*not { + // If it's an IN expression, we need to jump to the success label if any of the conditions are true. + for (i, expr) in rhs.iter().enumerate() { + let rhs_reg = program.alloc_register(); + let last_condition = i == rhs.len() - 1; + let _ = translate_expr(program, select, expr, rhs_reg, cursor_hint)?; + // If this is not the last condition, we need to jump to the success label if the condition is true. + if !last_condition { + program.emit_insn_with_label_dependency( + Insn::Eq { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: success_label, + }, + success_label, + ); + } else { + // If this is the last condition, we need to jump to the failure label if there is no match. + program.emit_insn_with_label_dependency( + Insn::Ne { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: condition_metadata.on_failure_jump_label, + }, + condition_metadata.on_failure_jump_label, + ); + } + } + // If we got here, then the last condition was a match, so we jump to the success label if 'jump_on_success' is true. + // If not, we can just fall through without emitting an unnecessary instruction. + if condition_metadata.jump_on_success { + program.emit_insn_with_label_dependency( + Insn::Goto { + target_pc: condition_metadata.on_success_jump_label, + }, + condition_metadata.on_success_jump_label, + ); + } + } else { + // If it's a NOT IN expression, we need to jump to the failure label if any of the conditions are true. + for expr in rhs.iter() { + let rhs_reg = program.alloc_register(); + let _ = translate_expr(program, select, expr, rhs_reg, cursor_hint)?; + program.emit_insn_with_label_dependency( + Insn::Eq { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: condition_metadata.on_failure_jump_label, + }, + condition_metadata.on_failure_jump_label, + ); + } + // If we got here, then none of the conditions were a match, so we jump to the success label if 'jump_on_success' is true. + // If not, we can just fall through without emitting an unnecessary instruction. + if condition_metadata.jump_on_success { + program.emit_insn_with_label_dependency( + Insn::Goto { + target_pc: condition_metadata.on_success_jump_label, + }, + condition_metadata.on_success_jump_label, + ); + } + } + + if !condition_metadata.jump_on_success { + program.resolve_label(success_label, program.offset()); + } + } ast::Expr::Like { lhs, not, @@ -422,24 +619,46 @@ fn translate_condition_expr( ast::LikeOperator::Match => todo!(), ast::LikeOperator::Regexp => todo!(), } - if jump_if_true ^ *not { - program.emit_insn_with_label_dependency( - Insn::If { - reg: cur_reg, - target_pc: target_jump, - null_reg: cur_reg, - }, - target_jump, - ) + if !*not { + if condition_metadata.jump_on_success { + program.emit_insn_with_label_dependency( + Insn::If { + reg: cur_reg, + target_pc: condition_metadata.on_success_jump_label, + null_reg: cur_reg, + }, + condition_metadata.on_success_jump_label, + ); + } else { + program.emit_insn_with_label_dependency( + Insn::IfNot { + reg: cur_reg, + target_pc: condition_metadata.on_failure_jump_label, + null_reg: cur_reg, + }, + condition_metadata.on_failure_jump_label, + ); + } } else { - program.emit_insn_with_label_dependency( - Insn::IfNot { - reg: cur_reg, - target_pc: target_jump, - null_reg: cur_reg, - }, - target_jump, - ) + if condition_metadata.jump_on_success { + program.emit_insn_with_label_dependency( + Insn::IfNot { + reg: cur_reg, + target_pc: condition_metadata.on_success_jump_label, + null_reg: cur_reg, + }, + condition_metadata.on_success_jump_label, + ); + } else { + program.emit_insn_with_label_dependency( + Insn::If { + reg: cur_reg, + target_pc: condition_metadata.on_failure_jump_label, + null_reg: cur_reg, + }, + condition_metadata.on_failure_jump_label, + ); + } } } _ => todo!("op {:?} not implemented", expr), diff --git a/testing/where.test b/testing/where.test index c47084fa2..5b03436eb 100755 --- a/testing/where.test +++ b/testing/where.test @@ -49,7 +49,7 @@ do_execsql_test where-clause-no-table-unary-false { } {} do_execsql_test select-where-and { - select first_name, age from users where first_name = 'Jamie' and age > 80 + select first_name, age from users where first_name = 'Jamie' and age > 80 } {Jamie|94 Jamie|88 Jamie|99 @@ -59,7 +59,7 @@ Jamie|88 } do_execsql_test select-where-or { - select first_name, age from users where first_name = 'Jamie' and age > 80 + select first_name, age from users where first_name = 'Jamie' and age > 80 } {Jamie|94 Jamie|88 Jamie|99 @@ -109,3 +109,95 @@ do_execsql_test where-float-int { do_execsql_test where-multiple-and { select * from products where price > 50 and name != 'sweatshirt' and price < 75; } {6|shorts|70.0} + +do_execsql_test where-multiple-or { + select * from products where price > 75 or name = 'shirt' or name = 'coat'; +} {1|hat|79.0 +2|cap|82.0 +3|shirt|18.0 +7|jeans|78.0 +8|sneakers|82.0 +10|coat|33.0 +11|accessories|81.0} + +do_execsql_test where_in_list { + select * from products where name in ('hat', 'sweatshirt', 'shorts'); +} {1|hat|79.0 +5|sweatshirt|74.0 +6|shorts|70.0} + +do_execsql_test where_not_in_list { + select * from products where name not in ('hat', 'sweatshirt', 'shorts'); +} {2|cap|82.0 +3|shirt|18.0 +4|sweater|25.0 +7|jeans|78.0 +8|sneakers|82.0 +9|boots|1.0 +10|coat|33.0 +11|accessories|81.0} + +do_execsql_test where_in_list_or_another_list { + select * from products where name in ('hat', 'sweatshirt', 'shorts') or price in (81.0, 82.0); +} {1|hat|79.0 +2|cap|82.0 +5|sweatshirt|74.0 +6|shorts|70.0 +8|sneakers|82.0 +11|accessories|81.0} + +do_execsql_test where_not_in_list_and_not_in_another_list { + select * from products where name not in ('hat', 'sweatshirt', 'shorts') and price not in (81.0, 82.0, 78.0, 1.0, 33.0); +} {3|shirt|18.0 +4|sweater|25.0} + +do_execsql_test where_in_list_or_not_in_another_list { + select * from products where name in ('hat', 'sweatshirt', 'shorts') or price not in (82.0, 18.0, 78.0, 33.0, 81.0); +} {1|hat|79.0 +4|sweater|25.0 +5|sweatshirt|74.0 +6|shorts|70.0 +9|boots|1.0} + +do_execsql_test where_in_empty_list { + select * from products where name in (); +} {} + +do_execsql_test where_not_in_empty_list { + select * from products where name not in (); +} {1|hat|79.0 +2|cap|82.0 +3|shirt|18.0 +4|sweater|25.0 +5|sweatshirt|74.0 +6|shorts|70.0 +7|jeans|78.0 +8|sneakers|82.0 +9|boots|1.0 +10|coat|33.0 +11|accessories|81.0} + +do_execsql_test where_name_in_list_and_price_gt_70_or_name_exactly_boots { + select * from products where name in ('hat', 'sweatshirt', 'shorts') and price > 70 or name = 'boots'; +} {1|hat|79.0 +5|sweatshirt|74.0 +9|boots|1.0} + +do_execsql_test where_name_in_list_or_price_gt_70_and_name_like_shirt { + select * from products where name in ('hat', 'shorts') or price > 70 and name like '%shirt%'; +} {1|hat|79.0 +5|sweatshirt|74.0 +6|shorts|70.0} + +do_execsql_test where_name_not_in_list_or_name_eq_shirt { + select * from products where name not in ('shirt', 'boots') or name = 'shirt'; +} {1|hat|79.0 +2|cap|82.0 +3|shirt|18.0 +4|sweater|25.0 +5|sweatshirt|74.0 +6|shorts|70.0 +7|jeans|78.0 +8|sneakers|82.0 +10|coat|33.0 +11|accessories|81.0}