From 8e5499e5ed76656d28d95cf0f3727c8fa5a260b2 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sun, 16 Feb 2025 19:14:57 +0200 Subject: [PATCH] Fix not evaling constant conditions when no tables in query We were not evaluating constant conditions (e.g '1 IS NULL') when there were no tables referenced in the query, because our WHERE term evaluation was based on "during which loop" to evaluate them. However, when there are no tables, there are no loops, so they were never evaluated. --- core/translate/emitter.rs | 18 +++++ core/translate/main_loop.rs | 6 +- core/translate/optimizer.rs | 14 ++-- core/translate/plan.rs | 48 ++++++++++++-- core/translate/planner.rs | 129 +++++++++++++++++++++++++++--------- testing/where.test | 8 +++ 6 files changed, 178 insertions(+), 45 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 7786c90a9..8de249de3 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -11,6 +11,7 @@ use crate::vdbe::{insn::Insn, BranchOffset}; use crate::{Result, SymbolTable}; use super::aggregation::emit_ungrouped_aggregation; +use super::expr::{translate_condition_expr, ConditionMetadata}; use super::group_by::{emit_group_by, init_group_by, GroupByMetadata}; use super::main_loop::{close_loop, emit_loop, init_loop, open_loop, LeftJoinMetadata, LoopLabels}; use super::order_by::{emit_order_by, init_order_by, SortMetadata}; @@ -241,6 +242,23 @@ pub fn emit_query<'a>( &OperationMode::SELECT, )?; + for where_term in plan.where_clause.iter().filter(|wt| wt.is_constant()) { + let jump_target_when_true = program.allocate_label(); + let condition_metadata = ConditionMetadata { + jump_if_condition_is_true: false, + jump_target_when_false: after_main_loop_label, + jump_target_when_true, + }; + translate_condition_expr( + program, + &plan.table_references, + &where_term.expr, + condition_metadata, + &mut t_ctx.resolver, + )?; + program.resolve_label(jump_target_when_true, program.offset()); + } + // Set up main query execution loop open_loop(program, t_ctx, &plan.table_references, &plan.where_clause)?; diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index a9fa9a158..c2e18597c 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -237,7 +237,7 @@ pub fn open_loop( // E.g. SELECT foo FROM (SELECT bar as foo FROM t1) sub WHERE sub.foo > 10 for cond in predicates .iter() - .filter(|cond| cond.eval_at_loop == table_index) + .filter(|cond| cond.should_eval_at_loop(table_index)) { let jump_target_when_true = program.allocate_label(); let condition_metadata = ConditionMetadata { @@ -311,7 +311,7 @@ pub fn open_loop( for cond in predicates .iter() - .filter(|cond| cond.eval_at_loop == table_index) + .filter(|cond| cond.should_eval_at_loop(table_index)) { let jump_target_when_true = program.allocate_label(); let condition_metadata = ConditionMetadata { @@ -483,7 +483,7 @@ pub fn open_loop( } for cond in predicates .iter() - .filter(|cond| cond.eval_at_loop == table_index) + .filter(|cond| cond.should_eval_at_loop(table_index)) { let jump_target_when_true = program.allocate_label(); let condition_metadata = ConditionMetadata { diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 7b765edb2..89a8b3a38 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -499,7 +499,7 @@ pub fn try_extract_index_search_expression( table_reference: &TableReference, available_indexes: &HashMap>>, ) -> Result> { - if cond.eval_at_loop != table_index { + if !cond.should_eval_at_loop(table_index) { return Ok(None); } match &mut cond.expr { @@ -512,7 +512,7 @@ pub fn try_extract_index_search_expression( cmp_expr: WhereTerm { expr: rhs_owned, from_outer_join: cond.from_outer_join, - eval_at_loop: cond.eval_at_loop, + eval_at: cond.eval_at, }, })); } @@ -526,7 +526,7 @@ pub fn try_extract_index_search_expression( cmp_expr: WhereTerm { expr: rhs_owned, from_outer_join: cond.from_outer_join, - eval_at_loop: cond.eval_at_loop, + eval_at: cond.eval_at, }, })); } @@ -542,7 +542,7 @@ pub fn try_extract_index_search_expression( cmp_expr: WhereTerm { expr: lhs_owned, from_outer_join: cond.from_outer_join, - eval_at_loop: cond.eval_at_loop, + eval_at: cond.eval_at, }, })); } @@ -556,7 +556,7 @@ pub fn try_extract_index_search_expression( cmp_expr: WhereTerm { expr: lhs_owned, from_outer_join: cond.from_outer_join, - eval_at_loop: cond.eval_at_loop, + eval_at: cond.eval_at, }, })); } @@ -580,7 +580,7 @@ pub fn try_extract_index_search_expression( cmp_expr: WhereTerm { expr: rhs_owned, from_outer_join: cond.from_outer_join, - eval_at_loop: cond.eval_at_loop, + eval_at: cond.eval_at, }, })); } @@ -604,7 +604,7 @@ pub fn try_extract_index_search_expression( cmp_expr: WhereTerm { expr: lhs_owned, from_outer_join: cond.from_outer_join, - eval_at_loop: cond.eval_at_loop, + eval_at: cond.eval_at, }, })); } diff --git a/core/translate/plan.rs b/core/translate/plan.rs index fcc9e474d..f40d5bb38 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -1,6 +1,7 @@ use core::fmt; use limbo_sqlite3_parser::ast; use std::{ + cmp::Ordering, fmt::{Display, Formatter}, rc::Rc, }; @@ -60,10 +61,49 @@ pub struct WhereTerm { /// regardless of which tables it references. /// We also cannot e.g. short circuit the entire query in the optimizer if the condition is statically false. pub from_outer_join: bool, - /// The loop index where to evaluate the condition. - /// For example, in `SELECT * FROM u JOIN p WHERE u.id = 5`, the condition can already be evaluated at the first loop (idx 0), - /// because that is the rightmost table that it references. - pub eval_at_loop: usize, + pub eval_at: EvalAt, +} + +impl WhereTerm { + pub fn is_constant(&self) -> bool { + self.eval_at == EvalAt::BeforeLoop + } + + pub fn should_eval_at_loop(&self, loop_idx: usize) -> bool { + self.eval_at == EvalAt::Loop(loop_idx) + } +} + +/// The loop index where to evaluate the condition. +/// For example, in `SELECT * FROM u JOIN p WHERE u.id = 5`, the condition can already be evaluated at the first loop (idx 0), +/// because that is the rightmost table that it references. +/// +/// Conditions like 1=2 can be evaluated before the main loop is opened, because they are constant. +/// In theory we should be able to statically analyze them all and reduce them to a single boolean value, +/// but that is not implemented yet. +#[derive(Debug, Clone, PartialEq, Eq, Copy)] +pub enum EvalAt { + Loop(usize), + BeforeLoop, +} + +#[allow(clippy::non_canonical_partial_ord_impl)] +impl PartialOrd for EvalAt { + fn partial_cmp(&self, other: &Self) -> Option { + match (self, other) { + (EvalAt::Loop(a), EvalAt::Loop(b)) => a.partial_cmp(b), + (EvalAt::BeforeLoop, EvalAt::BeforeLoop) => Some(Ordering::Equal), + (EvalAt::BeforeLoop, _) => Some(Ordering::Less), + (_, EvalAt::BeforeLoop) => Some(Ordering::Greater), + } + } +} + +impl Ord for EvalAt { + fn cmp(&self, other: &Self) -> Ordering { + self.partial_cmp(other) + .expect("total ordering not implemented for EvalAt") + } } /// A query plan is either a SELECT or a DELETE (for now) diff --git a/core/translate/planner.rs b/core/translate/planner.rs index 138d1cbc0..ffc7ee992 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -1,6 +1,6 @@ use super::{ plan::{ - Aggregate, JoinInfo, Operation, Plan, ResultSetColumn, SelectPlan, SelectQueryType, + Aggregate, EvalAt, JoinInfo, Operation, Plan, ResultSetColumn, SelectPlan, SelectQueryType, TableReference, WhereTerm, }, select::prepare_select_plan, @@ -534,11 +534,11 @@ pub fn parse_where( bind_column_references(expr, table_references, result_columns)?; } for expr in predicates { - let eval_at_loop = get_rightmost_table_referenced_in_expr(&expr)?; + let eval_at = determine_where_to_eval_expr(&expr)?; out_where_clause.push(WhereTerm { expr, from_outer_join: false, - eval_at_loop, + eval_at, }); } Ok(()) @@ -548,54 +548,121 @@ pub fn parse_where( } /** - Returns the rightmost table index that is referenced in the given AST expression. - Rightmost = innermost loop. - This is used to determine where we should evaluate a given condition expression, - and it needs to be the rightmost table referenced in the expression, because otherwise - the condition would be evaluated before a row is read from that table. + Returns the earliest point at which a WHERE term can be evaluated. + For expressions referencing tables, this is the innermost loop that contains a row for each + table referenced in the expression. + For expressions not referencing any tables (e.g. constants), this is before the main loop is + opened, because they do not need any table data. */ -fn get_rightmost_table_referenced_in_expr<'a>(predicate: &'a ast::Expr) -> Result { - let mut max_table_idx = 0; +fn determine_where_to_eval_expr<'a>(predicate: &'a ast::Expr) -> Result { + let mut eval_at: EvalAt = EvalAt::BeforeLoop; match predicate { ast::Expr::Binary(e1, _, e2) => { - max_table_idx = max_table_idx.max(get_rightmost_table_referenced_in_expr(e1)?); - max_table_idx = max_table_idx.max(get_rightmost_table_referenced_in_expr(e2)?); + eval_at = eval_at.max(determine_where_to_eval_expr(e1)?); + eval_at = eval_at.max(determine_where_to_eval_expr(e2)?); } - ast::Expr::Column { table, .. } => { - max_table_idx = max_table_idx.max(*table); + ast::Expr::Column { table, .. } | ast::Expr::RowId { table, .. } => { + eval_at = eval_at.max(EvalAt::Loop(*table)); } ast::Expr::Id(_) => { /* Id referring to column will already have been rewritten as an Expr::Column */ /* we only get here with literal 'true' or 'false' etc */ } ast::Expr::Qualified(_, _) => { - unreachable!("Qualified should be resolved to a Column before optimizer") + unreachable!("Qualified should be resolved to a Column before resolving eval_at") } ast::Expr::Literal(_) => {} ast::Expr::Like { lhs, rhs, .. } => { - max_table_idx = max_table_idx.max(get_rightmost_table_referenced_in_expr(lhs)?); - max_table_idx = max_table_idx.max(get_rightmost_table_referenced_in_expr(rhs)?); + eval_at = eval_at.max(determine_where_to_eval_expr(lhs)?); + eval_at = eval_at.max(determine_where_to_eval_expr(rhs)?); } ast::Expr::FunctionCall { args: Some(args), .. } => { for arg in args { - max_table_idx = max_table_idx.max(get_rightmost_table_referenced_in_expr(arg)?); + eval_at = eval_at.max(determine_where_to_eval_expr(arg)?); } } ast::Expr::InList { lhs, rhs, .. } => { - max_table_idx = max_table_idx.max(get_rightmost_table_referenced_in_expr(lhs)?); + eval_at = eval_at.max(determine_where_to_eval_expr(lhs)?); if let Some(rhs_list) = rhs { for rhs_expr in rhs_list { - max_table_idx = - max_table_idx.max(get_rightmost_table_referenced_in_expr(rhs_expr)?); + eval_at = eval_at.max(determine_where_to_eval_expr(rhs_expr)?); } } } - _ => {} + Expr::Between { + lhs, start, end, .. + } => { + eval_at = eval_at.max(determine_where_to_eval_expr(lhs)?); + eval_at = eval_at.max(determine_where_to_eval_expr(start)?); + eval_at = eval_at.max(determine_where_to_eval_expr(end)?); + } + Expr::Case { + base, + when_then_pairs, + else_expr, + } => { + if let Some(base) = base { + eval_at = eval_at.max(determine_where_to_eval_expr(base)?); + } + for (when, then) in when_then_pairs { + eval_at = eval_at.max(determine_where_to_eval_expr(when)?); + eval_at = eval_at.max(determine_where_to_eval_expr(then)?); + } + if let Some(else_expr) = else_expr { + eval_at = eval_at.max(determine_where_to_eval_expr(else_expr)?); + } + } + Expr::Cast { expr, .. } => { + eval_at = eval_at.max(determine_where_to_eval_expr(expr)?); + } + Expr::Collate(expr, _) => { + eval_at = eval_at.max(determine_where_to_eval_expr(expr)?); + } + Expr::DoublyQualified(_, _, _) => { + unreachable!("DoublyQualified should be resolved to a Column before resolving eval_at") + } + Expr::Exists(_) => { + todo!("exists not supported yet") + } + Expr::FunctionCall { args, .. } => { + for arg in args.as_ref().unwrap_or(&vec![]).iter() { + eval_at = eval_at.max(determine_where_to_eval_expr(arg)?); + } + } + Expr::FunctionCallStar { .. } => {} + Expr::InSelect { .. } => { + todo!("in select not supported yet") + } + Expr::InTable { .. } => { + todo!("in table not supported yet") + } + Expr::IsNull(expr) => { + eval_at = eval_at.max(determine_where_to_eval_expr(expr)?); + } + Expr::Name(_) => {} + Expr::NotNull(expr) => { + eval_at = eval_at.max(determine_where_to_eval_expr(expr)?); + } + Expr::Parenthesized(exprs) => { + for expr in exprs.iter() { + eval_at = eval_at.max(determine_where_to_eval_expr(expr)?); + } + } + Expr::Raise(_, _) => { + todo!("raise not supported yet") + } + Expr::Subquery(_) => { + todo!("subquery not supported yet") + } + Expr::Unary(_, expr) => { + eval_at = eval_at.max(determine_where_to_eval_expr(expr)?); + } + Expr::Variable(_) => {} } - Ok(max_table_idx) + Ok(eval_at) } fn parse_join<'a>( @@ -679,15 +746,15 @@ fn parse_join<'a>( } for pred in preds { let cur_table_idx = scope.tables.len() - 1; - let eval_at_loop = if outer { - cur_table_idx + let eval_at = if outer { + EvalAt::Loop(cur_table_idx) } else { - get_rightmost_table_referenced_in_expr(&pred)? + determine_where_to_eval_expr(&pred)? }; out_where_clause.push(WhereTerm { expr: pred, from_outer_join: outer, - eval_at_loop, + eval_at, }); } } @@ -749,15 +816,15 @@ fn parse_join<'a>( is_rowid_alias: right_col.is_rowid_alias, }), ); - let eval_at_loop = if outer { - cur_table_idx + let eval_at = if outer { + EvalAt::Loop(cur_table_idx) } else { - get_rightmost_table_referenced_in_expr(&expr)? + determine_where_to_eval_expr(&expr)? }; out_where_clause.push(WhereTerm { expr, from_outer_join: outer, - eval_at_loop, + eval_at, }); } using = Some(distinct_names); diff --git a/testing/where.test b/testing/where.test index 54a6b0eba..a5bdc91e8 100755 --- a/testing/where.test +++ b/testing/where.test @@ -564,3 +564,11 @@ do_execsql_test where-binary-bitwise-and { do_execsql_test where-binary-bitwise-or { select count(*) from users where 2 | 1; } {10000} + +do_execsql_test where-constant-condition-no-tables { + select 1 where 1 IS NULL; +} {} + +do_execsql_test where-constant-condition-no-tables-2 { + select 1 where 1 IS NOT NULL; +} {1}