Merge 'Fix not evaling constant conditions when no tables in query' from Jussi Saurio

This PR is extracted from the sqlite fuzzing exploration effort in
https://github.com/tursodatabase/limbo/pull/1021
---
We were not evaluating constant conditions (e.g '1 IS NULL') when there
were no tables referenced in the query, because our WHERE term
evaluation was based on "during which loop" to evaluate them. However,
when there are no tables, there are no loops, so they were never
evaluated.

Closes #1023
This commit is contained in:
Pekka Enberg
2025-02-18 10:07:04 +02:00
6 changed files with 178 additions and 45 deletions

View File

@@ -11,6 +11,7 @@ use crate::vdbe::{insn::Insn, BranchOffset};
use crate::{Result, SymbolTable};
use super::aggregation::emit_ungrouped_aggregation;
use super::expr::{translate_condition_expr, ConditionMetadata};
use super::group_by::{emit_group_by, init_group_by, GroupByMetadata};
use super::main_loop::{close_loop, emit_loop, init_loop, open_loop, LeftJoinMetadata, LoopLabels};
use super::order_by::{emit_order_by, init_order_by, SortMetadata};
@@ -241,6 +242,23 @@ pub fn emit_query<'a>(
&OperationMode::SELECT,
)?;
for where_term in plan.where_clause.iter().filter(|wt| wt.is_constant()) {
let jump_target_when_true = program.allocate_label();
let condition_metadata = ConditionMetadata {
jump_if_condition_is_true: false,
jump_target_when_false: after_main_loop_label,
jump_target_when_true,
};
translate_condition_expr(
program,
&plan.table_references,
&where_term.expr,
condition_metadata,
&mut t_ctx.resolver,
)?;
program.resolve_label(jump_target_when_true, program.offset());
}
// Set up main query execution loop
open_loop(program, t_ctx, &plan.table_references, &plan.where_clause)?;

View File

@@ -237,7 +237,7 @@ pub fn open_loop(
// E.g. SELECT foo FROM (SELECT bar as foo FROM t1) sub WHERE sub.foo > 10
for cond in predicates
.iter()
.filter(|cond| cond.eval_at_loop == table_index)
.filter(|cond| cond.should_eval_at_loop(table_index))
{
let jump_target_when_true = program.allocate_label();
let condition_metadata = ConditionMetadata {
@@ -311,7 +311,7 @@ pub fn open_loop(
for cond in predicates
.iter()
.filter(|cond| cond.eval_at_loop == table_index)
.filter(|cond| cond.should_eval_at_loop(table_index))
{
let jump_target_when_true = program.allocate_label();
let condition_metadata = ConditionMetadata {
@@ -483,7 +483,7 @@ pub fn open_loop(
}
for cond in predicates
.iter()
.filter(|cond| cond.eval_at_loop == table_index)
.filter(|cond| cond.should_eval_at_loop(table_index))
{
let jump_target_when_true = program.allocate_label();
let condition_metadata = ConditionMetadata {

View File

@@ -499,7 +499,7 @@ pub fn try_extract_index_search_expression(
table_reference: &TableReference,
available_indexes: &HashMap<String, Vec<Rc<Index>>>,
) -> Result<Option<Search>> {
if cond.eval_at_loop != table_index {
if !cond.should_eval_at_loop(table_index) {
return Ok(None);
}
match &mut cond.expr {
@@ -512,7 +512,7 @@ pub fn try_extract_index_search_expression(
cmp_expr: WhereTerm {
expr: rhs_owned,
from_outer_join: cond.from_outer_join,
eval_at_loop: cond.eval_at_loop,
eval_at: cond.eval_at,
},
}));
}
@@ -526,7 +526,7 @@ pub fn try_extract_index_search_expression(
cmp_expr: WhereTerm {
expr: rhs_owned,
from_outer_join: cond.from_outer_join,
eval_at_loop: cond.eval_at_loop,
eval_at: cond.eval_at,
},
}));
}
@@ -542,7 +542,7 @@ pub fn try_extract_index_search_expression(
cmp_expr: WhereTerm {
expr: lhs_owned,
from_outer_join: cond.from_outer_join,
eval_at_loop: cond.eval_at_loop,
eval_at: cond.eval_at,
},
}));
}
@@ -556,7 +556,7 @@ pub fn try_extract_index_search_expression(
cmp_expr: WhereTerm {
expr: lhs_owned,
from_outer_join: cond.from_outer_join,
eval_at_loop: cond.eval_at_loop,
eval_at: cond.eval_at,
},
}));
}
@@ -580,7 +580,7 @@ pub fn try_extract_index_search_expression(
cmp_expr: WhereTerm {
expr: rhs_owned,
from_outer_join: cond.from_outer_join,
eval_at_loop: cond.eval_at_loop,
eval_at: cond.eval_at,
},
}));
}
@@ -604,7 +604,7 @@ pub fn try_extract_index_search_expression(
cmp_expr: WhereTerm {
expr: lhs_owned,
from_outer_join: cond.from_outer_join,
eval_at_loop: cond.eval_at_loop,
eval_at: cond.eval_at,
},
}));
}

View File

@@ -1,6 +1,7 @@
use core::fmt;
use limbo_sqlite3_parser::ast;
use std::{
cmp::Ordering,
fmt::{Display, Formatter},
rc::Rc,
};
@@ -60,10 +61,49 @@ pub struct WhereTerm {
/// regardless of which tables it references.
/// We also cannot e.g. short circuit the entire query in the optimizer if the condition is statically false.
pub from_outer_join: bool,
/// The loop index where to evaluate the condition.
/// For example, in `SELECT * FROM u JOIN p WHERE u.id = 5`, the condition can already be evaluated at the first loop (idx 0),
/// because that is the rightmost table that it references.
pub eval_at_loop: usize,
pub eval_at: EvalAt,
}
impl WhereTerm {
pub fn is_constant(&self) -> bool {
self.eval_at == EvalAt::BeforeLoop
}
pub fn should_eval_at_loop(&self, loop_idx: usize) -> bool {
self.eval_at == EvalAt::Loop(loop_idx)
}
}
/// The loop index where to evaluate the condition.
/// For example, in `SELECT * FROM u JOIN p WHERE u.id = 5`, the condition can already be evaluated at the first loop (idx 0),
/// because that is the rightmost table that it references.
///
/// Conditions like 1=2 can be evaluated before the main loop is opened, because they are constant.
/// In theory we should be able to statically analyze them all and reduce them to a single boolean value,
/// but that is not implemented yet.
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
pub enum EvalAt {
Loop(usize),
BeforeLoop,
}
#[allow(clippy::non_canonical_partial_ord_impl)]
impl PartialOrd for EvalAt {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
match (self, other) {
(EvalAt::Loop(a), EvalAt::Loop(b)) => a.partial_cmp(b),
(EvalAt::BeforeLoop, EvalAt::BeforeLoop) => Some(Ordering::Equal),
(EvalAt::BeforeLoop, _) => Some(Ordering::Less),
(_, EvalAt::BeforeLoop) => Some(Ordering::Greater),
}
}
}
impl Ord for EvalAt {
fn cmp(&self, other: &Self) -> Ordering {
self.partial_cmp(other)
.expect("total ordering not implemented for EvalAt")
}
}
/// A query plan is either a SELECT or a DELETE (for now)

View File

@@ -1,6 +1,6 @@
use super::{
plan::{
Aggregate, JoinInfo, Operation, Plan, ResultSetColumn, SelectPlan, SelectQueryType,
Aggregate, EvalAt, JoinInfo, Operation, Plan, ResultSetColumn, SelectPlan, SelectQueryType,
TableReference, WhereTerm,
},
select::prepare_select_plan,
@@ -534,11 +534,11 @@ pub fn parse_where(
bind_column_references(expr, table_references, result_columns)?;
}
for expr in predicates {
let eval_at_loop = get_rightmost_table_referenced_in_expr(&expr)?;
let eval_at = determine_where_to_eval_expr(&expr)?;
out_where_clause.push(WhereTerm {
expr,
from_outer_join: false,
eval_at_loop,
eval_at,
});
}
Ok(())
@@ -548,54 +548,121 @@ pub fn parse_where(
}
/**
Returns the rightmost table index that is referenced in the given AST expression.
Rightmost = innermost loop.
This is used to determine where we should evaluate a given condition expression,
and it needs to be the rightmost table referenced in the expression, because otherwise
the condition would be evaluated before a row is read from that table.
Returns the earliest point at which a WHERE term can be evaluated.
For expressions referencing tables, this is the innermost loop that contains a row for each
table referenced in the expression.
For expressions not referencing any tables (e.g. constants), this is before the main loop is
opened, because they do not need any table data.
*/
fn get_rightmost_table_referenced_in_expr<'a>(predicate: &'a ast::Expr) -> Result<usize> {
let mut max_table_idx = 0;
fn determine_where_to_eval_expr<'a>(predicate: &'a ast::Expr) -> Result<EvalAt> {
let mut eval_at: EvalAt = EvalAt::BeforeLoop;
match predicate {
ast::Expr::Binary(e1, _, e2) => {
max_table_idx = max_table_idx.max(get_rightmost_table_referenced_in_expr(e1)?);
max_table_idx = max_table_idx.max(get_rightmost_table_referenced_in_expr(e2)?);
eval_at = eval_at.max(determine_where_to_eval_expr(e1)?);
eval_at = eval_at.max(determine_where_to_eval_expr(e2)?);
}
ast::Expr::Column { table, .. } => {
max_table_idx = max_table_idx.max(*table);
ast::Expr::Column { table, .. } | ast::Expr::RowId { table, .. } => {
eval_at = eval_at.max(EvalAt::Loop(*table));
}
ast::Expr::Id(_) => {
/* Id referring to column will already have been rewritten as an Expr::Column */
/* we only get here with literal 'true' or 'false' etc */
}
ast::Expr::Qualified(_, _) => {
unreachable!("Qualified should be resolved to a Column before optimizer")
unreachable!("Qualified should be resolved to a Column before resolving eval_at")
}
ast::Expr::Literal(_) => {}
ast::Expr::Like { lhs, rhs, .. } => {
max_table_idx = max_table_idx.max(get_rightmost_table_referenced_in_expr(lhs)?);
max_table_idx = max_table_idx.max(get_rightmost_table_referenced_in_expr(rhs)?);
eval_at = eval_at.max(determine_where_to_eval_expr(lhs)?);
eval_at = eval_at.max(determine_where_to_eval_expr(rhs)?);
}
ast::Expr::FunctionCall {
args: Some(args), ..
} => {
for arg in args {
max_table_idx = max_table_idx.max(get_rightmost_table_referenced_in_expr(arg)?);
eval_at = eval_at.max(determine_where_to_eval_expr(arg)?);
}
}
ast::Expr::InList { lhs, rhs, .. } => {
max_table_idx = max_table_idx.max(get_rightmost_table_referenced_in_expr(lhs)?);
eval_at = eval_at.max(determine_where_to_eval_expr(lhs)?);
if let Some(rhs_list) = rhs {
for rhs_expr in rhs_list {
max_table_idx =
max_table_idx.max(get_rightmost_table_referenced_in_expr(rhs_expr)?);
eval_at = eval_at.max(determine_where_to_eval_expr(rhs_expr)?);
}
}
}
_ => {}
Expr::Between {
lhs, start, end, ..
} => {
eval_at = eval_at.max(determine_where_to_eval_expr(lhs)?);
eval_at = eval_at.max(determine_where_to_eval_expr(start)?);
eval_at = eval_at.max(determine_where_to_eval_expr(end)?);
}
Expr::Case {
base,
when_then_pairs,
else_expr,
} => {
if let Some(base) = base {
eval_at = eval_at.max(determine_where_to_eval_expr(base)?);
}
for (when, then) in when_then_pairs {
eval_at = eval_at.max(determine_where_to_eval_expr(when)?);
eval_at = eval_at.max(determine_where_to_eval_expr(then)?);
}
if let Some(else_expr) = else_expr {
eval_at = eval_at.max(determine_where_to_eval_expr(else_expr)?);
}
}
Expr::Cast { expr, .. } => {
eval_at = eval_at.max(determine_where_to_eval_expr(expr)?);
}
Expr::Collate(expr, _) => {
eval_at = eval_at.max(determine_where_to_eval_expr(expr)?);
}
Expr::DoublyQualified(_, _, _) => {
unreachable!("DoublyQualified should be resolved to a Column before resolving eval_at")
}
Expr::Exists(_) => {
todo!("exists not supported yet")
}
Expr::FunctionCall { args, .. } => {
for arg in args.as_ref().unwrap_or(&vec![]).iter() {
eval_at = eval_at.max(determine_where_to_eval_expr(arg)?);
}
}
Expr::FunctionCallStar { .. } => {}
Expr::InSelect { .. } => {
todo!("in select not supported yet")
}
Expr::InTable { .. } => {
todo!("in table not supported yet")
}
Expr::IsNull(expr) => {
eval_at = eval_at.max(determine_where_to_eval_expr(expr)?);
}
Expr::Name(_) => {}
Expr::NotNull(expr) => {
eval_at = eval_at.max(determine_where_to_eval_expr(expr)?);
}
Expr::Parenthesized(exprs) => {
for expr in exprs.iter() {
eval_at = eval_at.max(determine_where_to_eval_expr(expr)?);
}
}
Expr::Raise(_, _) => {
todo!("raise not supported yet")
}
Expr::Subquery(_) => {
todo!("subquery not supported yet")
}
Expr::Unary(_, expr) => {
eval_at = eval_at.max(determine_where_to_eval_expr(expr)?);
}
Expr::Variable(_) => {}
}
Ok(max_table_idx)
Ok(eval_at)
}
fn parse_join<'a>(
@@ -679,15 +746,15 @@ fn parse_join<'a>(
}
for pred in preds {
let cur_table_idx = scope.tables.len() - 1;
let eval_at_loop = if outer {
cur_table_idx
let eval_at = if outer {
EvalAt::Loop(cur_table_idx)
} else {
get_rightmost_table_referenced_in_expr(&pred)?
determine_where_to_eval_expr(&pred)?
};
out_where_clause.push(WhereTerm {
expr: pred,
from_outer_join: outer,
eval_at_loop,
eval_at,
});
}
}
@@ -749,15 +816,15 @@ fn parse_join<'a>(
is_rowid_alias: right_col.is_rowid_alias,
}),
);
let eval_at_loop = if outer {
cur_table_idx
let eval_at = if outer {
EvalAt::Loop(cur_table_idx)
} else {
get_rightmost_table_referenced_in_expr(&expr)?
determine_where_to_eval_expr(&expr)?
};
out_where_clause.push(WhereTerm {
expr,
from_outer_join: outer,
eval_at_loop,
eval_at,
});
}
using = Some(distinct_names);

View File

@@ -564,3 +564,11 @@ do_execsql_test where-binary-bitwise-and {
do_execsql_test where-binary-bitwise-or {
select count(*) from users where 2 | 1;
} {10000}
do_execsql_test where-constant-condition-no-tables {
select 1 where 1 IS NULL;
} {}
do_execsql_test where-constant-condition-no-tables-2 {
select 1 where 1 IS NOT NULL;
} {1}