diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 8409e31c9..6521b9c24 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -15,6 +15,7 @@ use super::{ emitter::{OperationMode, TranslateCtx}, expr::{translate_condition_expr, translate_expr, ConditionMetadata}, group_by::is_column_in_group_by, + optimizer::Optimizable, order_by::{order_by_sorter_insert, sorter_insert}, plan::{ IterationDirection, Operation, Search, SeekDef, SelectPlan, SelectQueryType, @@ -902,13 +903,18 @@ fn emit_seek( }); } } else { - translate_expr( - program, - Some(tables), - &seek_def.key[i], - reg, - &t_ctx.resolver, - )?; + let expr = &seek_def.key[i]; + translate_expr(program, Some(tables), &expr, reg, &t_ctx.resolver)?; + // If the seek key column is not verifiably non-NULL, we need check whether it is NULL, + // and if so, jump to the loop end. + // This is to avoid returning rows for e.g. SELECT * FROM t WHERE t.x > NULL, + // which would erroneously return all rows from t, as NULL is lower than any non-NULL value in index key comparisons. + if !expr.is_nonnull() { + program.emit_insn(Insn::IsNull { + reg, + target_pc: loop_end, + }); + } } } let num_regs = if seek_def.null_pad_unset_cols() { diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 609acd906..49543fd6b 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -471,6 +471,7 @@ pub trait Optimizable { .map_or(false, |c| c == ConstantPredicate::AlwaysFalse)) } fn is_rowid_alias_of(&self, table_index: usize) -> bool; + fn is_nonnull(&self) -> bool; } impl Optimizable for ast::Expr { @@ -484,6 +485,73 @@ impl Optimizable for ast::Expr { _ => false, } } + /// Returns true if the expressions is (verifiably) non-NULL. + /// It might still be non-NULL even if we return false; we just + /// weren't able to prove it. + /// This function is currently very conservative, and will return false + /// for any expression where we aren't sure and didn't bother to find out + /// by writing more complex code. + fn is_nonnull(&self) -> bool { + match self { + Expr::Between { + lhs, start, end, .. + } => lhs.is_nonnull() && start.is_nonnull() && end.is_nonnull(), + Expr::Binary(expr, _, expr1) => expr.is_nonnull() && expr1.is_nonnull(), + Expr::Case { + base, + when_then_pairs, + else_expr, + .. + } => { + base.as_ref().map_or(true, |base| base.is_nonnull()) + && when_then_pairs.iter().all(|(_, then)| then.is_nonnull()) + && else_expr + .as_ref() + .map_or(true, |else_expr| else_expr.is_nonnull()) + } + Expr::Cast { expr, .. } => expr.is_nonnull(), + Expr::Collate(expr, _) => expr.is_nonnull(), + Expr::DoublyQualified(..) => { + panic!("Do not call is_nonnull before DoublyQualified has been rewritten as Column") + } + Expr::Exists(..) => false, + Expr::FunctionCall { .. } => false, + Expr::FunctionCallStar { .. } => false, + Expr::Id(..) => panic!("Do not call is_nonnull before Id has been rewritten as Column"), + Expr::Column { is_rowid_alias, .. } => *is_rowid_alias, + Expr::RowId { .. } => true, + Expr::InList { lhs, rhs, .. } => { + lhs.is_nonnull() + && rhs + .as_ref() + .map_or(true, |rhs| rhs.iter().all(|rhs| rhs.is_nonnull())) + } + Expr::InSelect { .. } => false, + Expr::InTable { .. } => false, + Expr::IsNull(..) => true, + Expr::Like { lhs, rhs, .. } => lhs.is_nonnull() && rhs.is_nonnull(), + Expr::Literal(literal) => match literal { + ast::Literal::Numeric(_) => true, + ast::Literal::String(_) => true, + ast::Literal::Blob(_) => true, + ast::Literal::Keyword(_) => true, + ast::Literal::Null => false, + ast::Literal::CurrentDate => true, + ast::Literal::CurrentTime => true, + ast::Literal::CurrentTimestamp => true, + }, + Expr::Name(..) => false, + Expr::NotNull(..) => true, + Expr::Parenthesized(exprs) => exprs.iter().all(|expr| expr.is_nonnull()), + Expr::Qualified(..) => { + panic!("Do not call is_nonnull before Qualified has been rewritten as Column") + } + Expr::Raise(..) => false, + Expr::Subquery(..) => false, + Expr::Unary(_, expr) => expr.is_nonnull(), + Expr::Variable(..) => false, + } + } fn check_constant(&self) -> Result> { match self { Self::Literal(lit) => match lit { diff --git a/testing/where.test b/testing/where.test index a5bdc91e8..a149e85f2 100755 --- a/testing/where.test +++ b/testing/where.test @@ -572,3 +572,7 @@ do_execsql_test where-constant-condition-no-tables { do_execsql_test where-constant-condition-no-tables-2 { select 1 where 1 IS NOT NULL; } {1} + +do_execsql_test where-null-comparison-index-seek-regression-test { + select age from users where age > NULL; +} {} \ No newline at end of file