Merge 'Check that index seek key members are not null' from Jussi Saurio

Fixes bug where e.g. `SELECT age FROM users WHERE age > NULL` would
return all rows, since NULL is lower than anything else in index keys,
but in comparison expressions it returns NULL.
Solution: check whether index seek key members can be null, and if they
are null, do not seek and instead exit the loop.
Example bytecode on a table with an index on (a,b,c):
```sql
sqlite> explain select * from t where a = 5 and b = 6 and c > NULL;
addr  opcode         p1    p2    p3    p4             p5  comment
----  -------------  ----  ----  ----  -------------  --  -------------
0     Init           0     18    0                    0   Start at 18
1     OpenRead       0     2     0     5              0   root=2 iDb=0; t
2     OpenRead       1     3     0     k(4,,,,)       0   root=3 iDb=0; abc
3     Integer        5     1     0                    0   r[1]=5
4     Integer        6     2     0                    0   r[2]=6
5     Null           0     3     0                    0   r[3]=NULL
6     IsNull         3     17    0                    0   if r[3]==NULL goto 17
7     SeekGT         1     17    1     3              0   key=r[1..3]
8       IdxGT          1     17    1     2              0   key=r[1..2]
9       DeferredSeek   1     0     0                    0   Move 0 to 1.rowid if needed
10      Column         1     0     4                    0   r[4]= cursor 1 column 0
11      Column         1     1     5                    0   r[5]= cursor 1 column 1
12      Column         1     2     6                    0   r[6]= cursor 1 column 2
13      Column         0     3     7                    0   r[7]= cursor 0 column 3
14      Column         0     4     8                    0   r[8]= cursor 0 column 4
15      ResultRow      4     5     0                    0   output=r[4..8]
16    Next           1     8     0                    0
17    Halt           0     0     0                    0
18    Transaction    0     0     2     0              1   usesStmtJournal=0
19    Goto           0     1     0                    0
sqlite> select * from t where a = 5 and b = 6 and c > NULL;

Limbo v0.0.19-pre.4
Enter ".help" for usage hints.
limbo> explain select * from t where a = 5 and b = 6 and c > NULL;
addr  opcode             p1    p2    p3    p4             p5  comment
----  -----------------  ----  ----  ----  -------------  --  -------
0     Init               0     21    0                    0   Start at 21
1     OpenReadAsync      0     2     0                    0   table=t, root=2
2     OpenReadAwait      0     0     0                    0
3     OpenReadAsync      1     3     0                    0   table=abc, root=3
4     OpenReadAwait      0     0     0                    0
5     Integer            5     6     0                    0   r[6]=5
6     Integer            6     7     0                    0   r[7]=6
7     Null               0     8     0                    0   r[8]=NULL
8     IsNull             8     20    0                    0   if (r[8]==NULL) goto 20
9     SeekGT             1     20    6                    0   key=[6..8]
10      IdxGT            1     20    6                    0   key=[6..7]
11      DeferredSeek     1     0     0                    0
12      Column           0     0     1                    0   r[1]=t.a
13      Column           0     1     2                    0   r[2]=t.b
14      Column           0     2     3                    0   r[3]=t.c
15      Column           0     3     4                    0   r[4]=t.d
16      Column           0     4     5                    0   r[5]=t.e
17      ResultRow        1     5     0                    0   output=r[1..5]
18    NextAsync          1     0     0                    0
19    NextAwait          1     10    0                    0
20    Halt               0     0     0                    0
21    Transaction        0     0     0                    0   write=false
22    Goto               0     1     0                    0
```

Closes #1313
This commit is contained in:
Jussi Saurio
2025-04-12 11:11:57 +03:00
3 changed files with 85 additions and 7 deletions

View File

@@ -15,6 +15,7 @@ use super::{
emitter::{OperationMode, TranslateCtx},
expr::{translate_condition_expr, translate_expr, ConditionMetadata},
group_by::is_column_in_group_by,
optimizer::Optimizable,
order_by::{order_by_sorter_insert, sorter_insert},
plan::{
IterationDirection, Operation, Search, SeekDef, SelectPlan, SelectQueryType,
@@ -902,13 +903,18 @@ fn emit_seek(
});
}
} else {
translate_expr(
program,
Some(tables),
&seek_def.key[i],
reg,
&t_ctx.resolver,
)?;
let expr = &seek_def.key[i];
translate_expr(program, Some(tables), &expr, reg, &t_ctx.resolver)?;
// If the seek key column is not verifiably non-NULL, we need check whether it is NULL,
// and if so, jump to the loop end.
// This is to avoid returning rows for e.g. SELECT * FROM t WHERE t.x > NULL,
// which would erroneously return all rows from t, as NULL is lower than any non-NULL value in index key comparisons.
if !expr.is_nonnull() {
program.emit_insn(Insn::IsNull {
reg,
target_pc: loop_end,
});
}
}
}
let num_regs = if seek_def.null_pad_unset_cols() {

View File

@@ -471,6 +471,7 @@ pub trait Optimizable {
.map_or(false, |c| c == ConstantPredicate::AlwaysFalse))
}
fn is_rowid_alias_of(&self, table_index: usize) -> bool;
fn is_nonnull(&self) -> bool;
}
impl Optimizable for ast::Expr {
@@ -484,6 +485,73 @@ impl Optimizable for ast::Expr {
_ => false,
}
}
/// Returns true if the expressions is (verifiably) non-NULL.
/// It might still be non-NULL even if we return false; we just
/// weren't able to prove it.
/// This function is currently very conservative, and will return false
/// for any expression where we aren't sure and didn't bother to find out
/// by writing more complex code.
fn is_nonnull(&self) -> bool {
match self {
Expr::Between {
lhs, start, end, ..
} => lhs.is_nonnull() && start.is_nonnull() && end.is_nonnull(),
Expr::Binary(expr, _, expr1) => expr.is_nonnull() && expr1.is_nonnull(),
Expr::Case {
base,
when_then_pairs,
else_expr,
..
} => {
base.as_ref().map_or(true, |base| base.is_nonnull())
&& when_then_pairs.iter().all(|(_, then)| then.is_nonnull())
&& else_expr
.as_ref()
.map_or(true, |else_expr| else_expr.is_nonnull())
}
Expr::Cast { expr, .. } => expr.is_nonnull(),
Expr::Collate(expr, _) => expr.is_nonnull(),
Expr::DoublyQualified(..) => {
panic!("Do not call is_nonnull before DoublyQualified has been rewritten as Column")
}
Expr::Exists(..) => false,
Expr::FunctionCall { .. } => false,
Expr::FunctionCallStar { .. } => false,
Expr::Id(..) => panic!("Do not call is_nonnull before Id has been rewritten as Column"),
Expr::Column { is_rowid_alias, .. } => *is_rowid_alias,
Expr::RowId { .. } => true,
Expr::InList { lhs, rhs, .. } => {
lhs.is_nonnull()
&& rhs
.as_ref()
.map_or(true, |rhs| rhs.iter().all(|rhs| rhs.is_nonnull()))
}
Expr::InSelect { .. } => false,
Expr::InTable { .. } => false,
Expr::IsNull(..) => true,
Expr::Like { lhs, rhs, .. } => lhs.is_nonnull() && rhs.is_nonnull(),
Expr::Literal(literal) => match literal {
ast::Literal::Numeric(_) => true,
ast::Literal::String(_) => true,
ast::Literal::Blob(_) => true,
ast::Literal::Keyword(_) => true,
ast::Literal::Null => false,
ast::Literal::CurrentDate => true,
ast::Literal::CurrentTime => true,
ast::Literal::CurrentTimestamp => true,
},
Expr::Name(..) => false,
Expr::NotNull(..) => true,
Expr::Parenthesized(exprs) => exprs.iter().all(|expr| expr.is_nonnull()),
Expr::Qualified(..) => {
panic!("Do not call is_nonnull before Qualified has been rewritten as Column")
}
Expr::Raise(..) => false,
Expr::Subquery(..) => false,
Expr::Unary(_, expr) => expr.is_nonnull(),
Expr::Variable(..) => false,
}
}
fn check_constant(&self) -> Result<Option<ConstantPredicate>> {
match self {
Self::Literal(lit) => match lit {

View File

@@ -572,3 +572,7 @@ do_execsql_test where-constant-condition-no-tables {
do_execsql_test where-constant-condition-no-tables-2 {
select 1 where 1 IS NOT NULL;
} {1}
do_execsql_test where-null-comparison-index-seek-regression-test {
select age from users where age > NULL;
} {}