Merge pull request #237 from penberg/optimize-constant-conditions

This commit is contained in:
Pekka Enberg
2024-07-27 22:56:56 +03:00
committed by GitHub
4 changed files with 335 additions and 33 deletions

View File

@@ -2,7 +2,7 @@ use crate::function::{AggFunc, Func};
use crate::schema::{Column, PseudoTable, Schema, Table};
use crate::translate::expr::{analyze_columns, maybe_apply_affinity, translate_expr};
use crate::translate::where_clause::{
process_where, translate_processed_where, translate_where, ProcessedWhereClause,
process_where, translate_processed_where, translate_tableless_where, ProcessedWhereClause,
};
use crate::translate::{normalize_ident, Insn};
use crate::types::{OwnedRecord, OwnedValue};
@@ -93,20 +93,24 @@ impl<'a> ColumnInfo<'a> {
}
}
#[derive(Debug)]
pub struct LeftJoinBookkeeping {
// integer register that holds a flag that is set to true if the current row has a match for the left join
pub match_flag_register: usize,
// label for the instruction that sets the match flag to true
pub set_match_flag_true_label: BranchOffset,
// label for the instruction that checks if the match flag is true
pub check_match_flag_label: BranchOffset,
// label for the instruction where the program jumps to if the current row has a match for the left join
pub on_match_jump_to_label: BranchOffset,
}
#[derive(Debug)]
pub struct LoopInfo {
// The table or table alias that we are looping over
pub identifier: String,
// Metadata about a left join, if any
pub left_join_bookkeeping: Option<LeftJoinBookkeeping>,
pub left_join_maybe: Option<LeftJoinBookkeeping>,
// The label for the instruction that reads the next row for this table
pub next_row_label: BranchOffset,
// The label for the instruction that rewinds the cursor for this table
@@ -239,6 +243,7 @@ pub fn prepare_select<'a>(schema: &Schema, select: &'a ast::Select) -> Result<Se
pub fn translate_select(mut select: Select) -> Result<Program> {
let mut program = ProgramBuilder::new();
let init_label = program.allocate_label();
let early_terminate_label = program.allocate_label();
program.emit_insn_with_label_dependency(
Insn::Init {
target_pc: init_label,
@@ -299,7 +304,7 @@ pub fn translate_select(mut select: Select) -> Result<Program> {
};
if !select.src_tables.is_empty() {
translate_tables_begin(&mut program, &mut select)?;
translate_tables_begin(&mut program, &mut select, early_terminate_label)?;
let (register_start, column_count) = if let Some(sort_columns) = select.order_by {
let start = program.next_free_register();
@@ -359,6 +364,7 @@ pub fn translate_select(mut select: Select) -> Result<Program> {
translate_tables_end(&mut program, &select);
if select.exist_aggregation {
program.resolve_label(early_terminate_label, program.offset());
let mut target = register_start;
for info in &select.column_info {
if let Some(Func::Agg(func)) = &info.func {
@@ -379,7 +385,7 @@ pub fn translate_select(mut select: Select) -> Result<Program> {
} else {
assert!(!select.exist_aggregation);
assert!(sort_info.is_none());
let where_maybe = translate_where(&select, &mut program)?;
let where_maybe = translate_tableless_where(&select, &mut program, early_terminate_label)?;
let (register_start, count) = translate_columns(&mut program, &select, None)?;
if let Some(where_clause_label) = where_maybe {
program.resolve_label(where_clause_label, program.offset() + 1);
@@ -396,6 +402,9 @@ pub fn translate_select(mut select: Select) -> Result<Program> {
let _ = translate_sorter(&select, &mut program, &sort_info.unwrap(), &limit_info);
}
if !select.exist_aggregation {
program.resolve_label(early_terminate_label, program.offset());
}
program.emit_insn(Insn::Halt);
let halt_offset = program.offset() - 1;
if let Some(limit_info) = limit_info {
@@ -502,7 +511,11 @@ fn translate_sorter(
Ok(())
}
fn translate_tables_begin(program: &mut ProgramBuilder, select: &mut Select) -> Result<()> {
fn translate_tables_begin(
program: &mut ProgramBuilder,
select: &mut Select,
early_terminate_label: BranchOffset,
) -> Result<()> {
for join in &select.src_tables {
let loop_info = translate_table_open_cursor(program, join);
select.loops.push(loop_info);
@@ -511,7 +524,29 @@ fn translate_tables_begin(program: &mut ProgramBuilder, select: &mut Select) ->
let processed_where = process_where(program, select)?;
for loop_info in &select.loops {
translate_table_open_loop(program, select, loop_info, &processed_where)?;
// early_terminate_label decides where to jump _IF_ there exists a condition on this loop that is always false.
// this is part of a constant folding optimization where we can skip the loop entirely if we know it will never produce any rows.
let current_loop_early_terminate_label = if let Some(left_join) = &loop_info.left_join_maybe
{
// If there exists a condition on the LEFT JOIN that is always false, e.g.:
// 'SELECT * FROM x LEFT JOIN y ON false'
// then we can't jump to e.g. Halt, but instead we need to still emit all rows from the 'x' table, with NULLs for the 'y' table.
// 'check_match_flag_label' is the label that checks if the left join match flag has been set to true, and if not (which it by default isn't),
// sets the 'y' cursor's "pseudo null bit" on, which means any Insn::Column after that will return NULL for the 'y' table.
left_join.check_match_flag_label
} else {
// If there exists a condition in an INNER JOIN (or WHERE) that is always false, then the query will not produce any rows.
// Example: 'SELECT * FROM x JOIN y ON false' or 'SELECT * FROM x WHERE false'
// Here we should jump to Halt (or e.g. AggFinal in case we have an aggregation expression like count() that should produce a 0 on empty input.
early_terminate_label
};
translate_table_open_loop(
program,
select,
loop_info,
&processed_where,
current_loop_early_terminate_label,
)?;
}
Ok(())
@@ -531,8 +566,8 @@ fn translate_tables_end(program: &mut ProgramBuilder, select: &Select) {
table_loop.rewind_label,
);
if let Some(ljbk) = &table_loop.left_join_bookkeeping {
left_join_match_flag_check(program, ljbk, cursor_id);
if let Some(left_join) = &table_loop.left_join_maybe {
left_join_match_flag_check(program, left_join, cursor_id);
}
}
}
@@ -551,10 +586,11 @@ fn translate_table_open_cursor(program: &mut ProgramBuilder, table: &SrcTable) -
program.emit_insn(Insn::OpenReadAwait);
LoopInfo {
identifier: table.identifier.clone(),
left_join_bookkeeping: if table.is_outer_join() {
left_join_maybe: if table.is_outer_join() {
Some(LeftJoinBookkeeping {
match_flag_register: program.alloc_register(),
on_match_jump_to_label: program.allocate_label(),
check_match_flag_label: program.allocate_label(),
set_match_flag_true_label: program.allocate_label(),
})
} else {
@@ -571,21 +607,24 @@ fn translate_table_open_cursor(program: &mut ProgramBuilder, table: &SrcTable) -
* initialize left join match flag to false
* if condition checks pass, it will eventually be set to true
*/
fn left_join_match_flag_initialize(program: &mut ProgramBuilder, ljbk: &LeftJoinBookkeeping) {
fn left_join_match_flag_initialize(program: &mut ProgramBuilder, left_join: &LeftJoinBookkeeping) {
program.emit_insn(Insn::Integer {
value: 0,
dest: ljbk.match_flag_register,
dest: left_join.match_flag_register,
});
}
/**
* after the relevant conditional jumps have been emitted, set the left join match flag to true
*/
fn left_join_match_flag_set_true(program: &mut ProgramBuilder, ljbk: &LeftJoinBookkeeping) {
program.defer_label_resolution(ljbk.set_match_flag_true_label, program.offset() as usize);
fn left_join_match_flag_set_true(program: &mut ProgramBuilder, left_join: &LeftJoinBookkeeping) {
program.defer_label_resolution(
left_join.set_match_flag_true_label,
program.offset() as usize,
);
program.emit_insn(Insn::Integer {
value: 1,
dest: ljbk.match_flag_register,
dest: left_join.match_flag_register,
});
}
@@ -598,30 +637,31 @@ fn left_join_match_flag_set_true(program: &mut ProgramBuilder, ljbk: &LeftJoinBo
*/
fn left_join_match_flag_check(
program: &mut ProgramBuilder,
ljbk: &LeftJoinBookkeeping,
left_join: &LeftJoinBookkeeping,
cursor_id: usize,
) {
// If the left join match flag has been set to 1, we jump to the next row on the outer table (result row has been emitted already)
program.resolve_label(left_join.check_match_flag_label, program.offset());
program.emit_insn_with_label_dependency(
Insn::IfPos {
reg: ljbk.match_flag_register,
target_pc: ljbk.on_match_jump_to_label,
reg: left_join.match_flag_register,
target_pc: left_join.on_match_jump_to_label,
decrement_by: 0,
},
ljbk.on_match_jump_to_label,
left_join.on_match_jump_to_label,
);
// If not, we set the right table cursor's "pseudo null bit" on, which means any Insn::Column will return NULL
program.emit_insn(Insn::NullRow { cursor_id });
// Jump to setting the left join match flag to 1 again, but this time the right table cursor will set everything to null
program.emit_insn_with_label_dependency(
Insn::Goto {
target_pc: ljbk.set_match_flag_true_label,
target_pc: left_join.set_match_flag_true_label,
},
ljbk.set_match_flag_true_label,
left_join.set_match_flag_true_label,
);
// This points to the NextAsync instruction of the next table in the loop
// (i.e. the outer table, since we're iterating in reverse order)
program.resolve_label(ljbk.on_match_jump_to_label, program.offset());
program.resolve_label(left_join.on_match_jump_to_label, program.offset());
}
fn translate_table_open_loop(
@@ -629,9 +669,10 @@ fn translate_table_open_loop(
select: &Select,
loop_info: &LoopInfo,
w: &ProcessedWhereClause,
early_terminate_label: BranchOffset,
) -> Result<()> {
if let Some(ljbk) = loop_info.left_join_bookkeeping.as_ref() {
left_join_match_flag_initialize(program, ljbk);
if let Some(left_join) = loop_info.left_join_maybe.as_ref() {
left_join_match_flag_initialize(program, left_join);
}
program.emit_insn(Insn::RewindAsync {
@@ -646,10 +687,10 @@ fn translate_table_open_loop(
loop_info.rewind_on_empty_label,
);
translate_processed_where(program, select, loop_info, w, None)?;
translate_processed_where(program, select, loop_info, w, early_terminate_label, None)?;
if let Some(ljbk) = loop_info.left_join_bookkeeping.as_ref() {
left_join_match_flag_set_true(program, ljbk);
if let Some(left_join) = loop_info.left_join_maybe.as_ref() {
left_join_match_flag_set_true(program, left_join);
}
Ok(())

View File

@@ -46,6 +46,9 @@ pub fn split_constraint_to_terms<'a>(
queue.push(right);
}
expr => {
if expr.is_always_true()? {
continue;
}
let term = WhereTerm {
expr: expr.clone(),
evaluate_at_cursor: match outer_join_table_name {
@@ -143,11 +146,29 @@ pub fn process_where<'a>(
Ok(wc)
}
pub fn translate_where(
/**
* Translate the WHERE clause of a SELECT statement that doesn't have any tables.
* TODO: refactor this to use the same code path as the other WHERE clause translation functions.
*/
pub fn translate_tableless_where(
select: &Select,
program: &mut ProgramBuilder,
early_terminate_label: BranchOffset,
) -> Result<Option<BranchOffset>> {
if let Some(w) = &select.where_clause {
if w.is_always_false()? {
program.emit_insn_with_label_dependency(
Insn::Goto {
target_pc: early_terminate_label,
},
early_terminate_label,
);
return Ok(None);
}
if w.is_always_true()? {
return Ok(None);
}
let jump_target_when_false = program.allocate_label();
let jump_target_when_true = program.allocate_label();
translate_condition_expr(
@@ -180,12 +201,28 @@ pub fn translate_processed_where<'a>(
select: &'a Select,
current_loop: &'a LoopInfo,
where_c: &'a ProcessedWhereClause,
skip_entire_table_label: BranchOffset,
cursor_hint: Option<usize>,
) -> Result<()> {
for term in where_c.terms.iter() {
if term.evaluate_at_cursor != current_loop.open_cursor {
continue;
}
if where_c
.terms
.iter()
.filter(|t| t.evaluate_at_cursor == current_loop.open_cursor)
.any(|t| t.expr.is_always_false().unwrap_or(false))
{
program.emit_insn_with_label_dependency(
Insn::Goto {
target_pc: skip_entire_table_label,
},
skip_entire_table_label,
);
return Ok(());
}
for term in where_c
.terms
.iter()
.filter(|t| t.evaluate_at_cursor == current_loop.open_cursor)
{
let jump_target_when_false = current_loop.next_row_label;
let jump_target_when_true = program.allocate_label();
translate_condition_expr(
@@ -749,3 +786,143 @@ fn introspect_expression_for_cursors(
Ok(cursors)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ConstantCondition {
AlwaysTrue,
AlwaysFalse,
}
pub trait Evaluatable {
fn check_constant(&self) -> Result<Option<ConstantCondition>>;
fn is_always_true(&self) -> Result<bool> {
Ok(self
.check_constant()?
.map_or(false, |c| c == ConstantCondition::AlwaysTrue))
}
fn is_always_false(&self) -> Result<bool> {
Ok(self
.check_constant()?
.map_or(false, |c| c == ConstantCondition::AlwaysFalse))
}
}
impl Evaluatable for ast::Expr {
fn check_constant(&self) -> Result<Option<ConstantCondition>> {
match self {
ast::Expr::Literal(lit) => match lit {
ast::Literal::Null => Ok(Some(ConstantCondition::AlwaysFalse)),
ast::Literal::Numeric(b) => {
if let Ok(int_value) = b.parse::<i64>() {
return Ok(Some(if int_value == 0 {
ConstantCondition::AlwaysFalse
} else {
ConstantCondition::AlwaysTrue
}));
}
if let Ok(float_value) = b.parse::<f64>() {
return Ok(Some(if float_value == 0.0 {
ConstantCondition::AlwaysFalse
} else {
ConstantCondition::AlwaysTrue
}));
}
Ok(None)
}
ast::Literal::String(s) => {
let without_quotes = s.trim_matches('\'');
if let Ok(int_value) = without_quotes.parse::<i64>() {
return Ok(Some(if int_value == 0 {
ConstantCondition::AlwaysFalse
} else {
ConstantCondition::AlwaysTrue
}));
}
if let Ok(float_value) = without_quotes.parse::<f64>() {
return Ok(Some(if float_value == 0.0 {
ConstantCondition::AlwaysFalse
} else {
ConstantCondition::AlwaysTrue
}));
}
Ok(Some(ConstantCondition::AlwaysFalse))
}
_ => Ok(None),
},
ast::Expr::Unary(op, expr) => {
if *op == ast::UnaryOperator::Not {
let trivial = expr.check_constant()?;
return Ok(trivial.map(|t| match t {
ConstantCondition::AlwaysTrue => ConstantCondition::AlwaysFalse,
ConstantCondition::AlwaysFalse => ConstantCondition::AlwaysTrue,
}));
}
if *op == ast::UnaryOperator::Negative {
let trivial = expr.check_constant()?;
return Ok(trivial);
}
Ok(None)
}
ast::Expr::InList { lhs: _, not, rhs } => {
if rhs.is_none() {
return Ok(Some(if *not {
ConstantCondition::AlwaysTrue
} else {
ConstantCondition::AlwaysFalse
}));
}
let rhs = rhs.as_ref().unwrap();
if rhs.is_empty() {
return Ok(Some(if *not {
ConstantCondition::AlwaysTrue
} else {
ConstantCondition::AlwaysFalse
}));
}
Ok(None)
}
ast::Expr::Binary(lhs, op, rhs) => {
let lhs_trivial = lhs.check_constant()?;
let rhs_trivial = rhs.check_constant()?;
match op {
ast::Operator::And => {
if lhs_trivial == Some(ConstantCondition::AlwaysFalse)
|| rhs_trivial == Some(ConstantCondition::AlwaysFalse)
{
return Ok(Some(ConstantCondition::AlwaysFalse));
}
if lhs_trivial == Some(ConstantCondition::AlwaysTrue)
&& rhs_trivial == Some(ConstantCondition::AlwaysTrue)
{
return Ok(Some(ConstantCondition::AlwaysTrue));
}
Ok(None)
}
ast::Operator::Or => {
if lhs_trivial == Some(ConstantCondition::AlwaysTrue)
|| rhs_trivial == Some(ConstantCondition::AlwaysTrue)
{
return Ok(Some(ConstantCondition::AlwaysTrue));
}
if lhs_trivial == Some(ConstantCondition::AlwaysFalse)
&& rhs_trivial == Some(ConstantCondition::AlwaysFalse)
{
return Ok(Some(ConstantCondition::AlwaysFalse));
}
Ok(None)
}
_ => Ok(None),
}
}
_ => Ok(None),
}
}
}

View File

@@ -56,6 +56,18 @@ do_execsql_test inner-join-self-with-where {
# select u.first_name from users u join products as p on u.first_name != p.name where u.last_name = 'Williams' limit 1;
#} {Laura} <-- sqlite3 returns 'Aaron'
do_execsql_test inner-join-constant-condition-true {
select u.first_name, p.name from users u join products as p where 1 limit 5;
} {Jamie|hat
Jamie|cap
Jamie|shirt
Jamie|sweater
Jamie|sweatshirt}
do_execsql_test inner-join-constant-condition-false {
select u.first_name from users u join products as p where 0 limit 5;
} {}
do_execsql_test left-join-pk {
select users.first_name as user_name, products.name as product_name from users left join products on users.id = products.id limit 12;
} {Jamie|hat
@@ -133,6 +145,22 @@ do_execsql_test left-join-order-by-qualified-nullable-sorting-col {
select users.first_name, products.name from users left join products on users.id = products.id order by products.name limit 1;
} {Alan|}
do_execsql_test left-join-constant-condition-true {
select u.first_name, p.name from users u left join products as p on 1 limit 5;
} {Jamie|hat
Jamie|cap
Jamie|shirt
Jamie|sweater
Jamie|sweatshirt}
do_execsql_test left-join-constant-condition-false {
select u.first_name, p.name from users u left join products as p on 0 limit 5;
} {Jamie|
Cindy|
Tommy|
Jennifer|
Edward|}
do_execsql_test four-way-inner-join {
select u1.first_name, u2.first_name, u3.first_name, u4.first_name from users u1 join users u2 on u1.id = u2.id join users u3 on u2.id = u3.id + 1 join users u4 on u3.id = u4.id + 1 limit 1;
} {Tommy|Tommy|Cindy|Jamie}
@@ -155,3 +183,15 @@ do_execsql_test innerjoin-leftjoin-with-or-terms {
select u.first_name, u2.first_name, p.name from users u join users u2 on u.id = u2.id + 1 left join products p on p.name = u.first_name or p.name like 'sweat%' where u.first_name = 'Franklin';
} {Franklin|Cynthia|sweater
Franklin|Cynthia|sweatshirt}
do_execsql_test left-join-constant-condition-false-inner-join-constant-condition-true {
select u.first_name, p.name, u2.first_name from users u left join products as p on 0 join users u2 on 1 limit 5;
} {Jamie||Jamie
Jamie||Cindy
Jamie||Tommy
Jamie||Jennifer
Jamie||Edward}
do_execsql_test left-join-constant-condition-true-inner-join-constant-condition-false {
select u.first_name, p.name, u2.first_name from users u left join products as p on 1 join users u2 on 0 limit 5;
} {}

View File

@@ -40,14 +40,58 @@ do_execsql_test where-clause-unary-false {
select count(1) from users where 0;
} {0}
do_execsql_test where-clause-no-table-unary-true {
do_execsql_test where-clause-no-table-constant-condition-true {
select 1 where 1;
} {1}
do_execsql_test where-clause-no-table-unary-false {
do_execsql_test where-clause-no-table-constant-condition-true-2 {
select 1 where '1';
} {1}
do_execsql_test where-clause-no-table-constant-condition-true-3 {
select 1 where 6.66;
} {1}
do_execsql_test where-clause-no-table-constant-condition-true-4 {
select 1 where '6.66';
} {1}
do_execsql_test where-clause-no-table-constant-condition-true-5 {
select 1 where -1;
} {1}
do_execsql_test where-clause-no-table-constant-condition-true-6 {
select 1 where '-1';
} {1}
do_execsql_test where-clause-no-table-constant-condition-false {
select 1 where 0;
} {}
do_execsql_test where-clause-no-table-constant-condition-false-2 {
select 1 where '0';
} {}
do_execsql_test where-clause-no-table-constant-condition-false-3 {
select 1 where 0.0;
} {}
do_execsql_test where-clause-no-table-constant-condition-false-4 {
select 1 where '0.0';
} {}
do_execsql_test where-clause-no-table-constant-condition-false-5 {
select 1 where -0.0;
} {}
do_execsql_test where-clause-no-table-constant-condition-false-6 {
select 1 where '-0.0';
} {}
do_execsql_test where-clause-no-table-constant-condition-false-7 {
select 1 where 'hamburger';
} {}
do_execsql_test select-where-and {
select first_name, age from users where first_name = 'Jamie' and age > 80
} {Jamie|94