core/translate: #154: implement WHERE IN (...)

This commit is contained in:
jussisaurio
2024-07-26 17:28:53 +03:00
parent 3597089b26
commit 016379fea4
2 changed files with 404 additions and 93 deletions

View File

@@ -148,9 +148,23 @@ pub fn translate_where(
program: &mut ProgramBuilder,
) -> Result<Option<BranchOffset>> {
if let Some(w) = &select.where_clause {
let label = program.allocate_label();
translate_condition_expr(program, select, w, label, false, None)?;
Ok(Some(label))
let failure_label = program.allocate_label();
let success_label = program.allocate_label();
translate_condition_expr(
program,
select,
w,
None,
ConditionMetadata {
on_failure_jump_label: failure_label,
on_success_jump_label: success_label,
..Default::default()
},
)?;
program.resolve_label(success_label, program.offset());
Ok(Some(failure_label))
} else {
Ok(None)
}
@@ -173,49 +187,71 @@ pub fn translate_processed_where<'a>(
continue;
}
let target_jump = current_loop.next_row_label;
translate_condition_expr(program, select, &term.expr, target_jump, false, cursor_hint)?;
let success_label = program.allocate_label();
translate_condition_expr(
program,
select,
&term.expr,
cursor_hint,
ConditionMetadata {
jump_on_success: false,
on_failure_jump_label: target_jump,
on_success_jump_label: success_label,
},
)?;
program.resolve_label(success_label, program.offset());
}
Ok(())
}
#[derive(Default, Debug, Clone, Copy)]
struct ConditionMetadata {
jump_on_success: bool,
on_success_jump_label: BranchOffset,
on_failure_jump_label: BranchOffset,
}
fn translate_condition_expr(
program: &mut ProgramBuilder,
select: &Select,
expr: &ast::Expr,
target_jump: BranchOffset,
jump_if_true: bool, // if true jump to target on op == true, if false invert op
cursor_hint: Option<usize>,
condition_metadata: ConditionMetadata,
) -> Result<()> {
match expr {
ast::Expr::Between { .. } => todo!(),
ast::Expr::Binary(lhs, ast::Operator::And, rhs) => {
if jump_if_true {
let label = program.allocate_label();
let _ = translate_condition_expr(program, select, lhs, label, false, cursor_hint);
let _ =
translate_condition_expr(program, select, rhs, target_jump, true, cursor_hint);
program.resolve_label(label, program.offset());
} else {
let _ =
translate_condition_expr(program, select, lhs, target_jump, false, cursor_hint);
let _ =
translate_condition_expr(program, select, rhs, target_jump, false, cursor_hint);
}
// In a binary AND, never jump to the success label on the first condition, because
// the second condition must also be true.
let _ = translate_condition_expr(
program,
select,
lhs,
cursor_hint,
ConditionMetadata {
jump_on_success: false,
..condition_metadata
},
);
let _ = translate_condition_expr(program, select, rhs, cursor_hint, condition_metadata);
}
ast::Expr::Binary(lhs, ast::Operator::Or, rhs) => {
if jump_if_true {
let _ =
translate_condition_expr(program, select, lhs, target_jump, true, cursor_hint);
let _ =
translate_condition_expr(program, select, rhs, target_jump, true, cursor_hint);
} else {
let label = program.allocate_label();
let _ = translate_condition_expr(program, select, lhs, label, true, cursor_hint);
let _ =
translate_condition_expr(program, select, rhs, target_jump, false, cursor_hint);
program.resolve_label(label, program.offset());
}
let on_failure_jump_label = program.allocate_label();
let _ = translate_condition_expr(
program,
select,
lhs,
cursor_hint,
ConditionMetadata {
jump_on_success: true,
on_failure_jump_label,
..condition_metadata
},
);
program.resolve_label(on_failure_jump_label, program.offset());
let _ = translate_condition_expr(program, select, rhs, cursor_hint, condition_metadata);
}
ast::Expr::Binary(lhs, op, rhs) => {
let lhs_reg = program.alloc_register();
@@ -232,128 +268,128 @@ fn translate_condition_expr(
}
match op {
ast::Operator::Greater => {
if jump_if_true {
if condition_metadata.jump_on_success {
program.emit_insn_with_label_dependency(
Insn::Gt {
lhs: lhs_reg,
rhs: rhs_reg,
target_pc: target_jump,
target_pc: condition_metadata.on_success_jump_label,
},
target_jump,
condition_metadata.on_success_jump_label,
)
} else {
program.emit_insn_with_label_dependency(
Insn::Le {
lhs: lhs_reg,
rhs: rhs_reg,
target_pc: target_jump,
target_pc: condition_metadata.on_failure_jump_label,
},
target_jump,
condition_metadata.on_failure_jump_label,
)
}
}
ast::Operator::GreaterEquals => {
if jump_if_true {
if condition_metadata.jump_on_success {
program.emit_insn_with_label_dependency(
Insn::Ge {
lhs: lhs_reg,
rhs: rhs_reg,
target_pc: target_jump,
target_pc: condition_metadata.on_success_jump_label,
},
target_jump,
condition_metadata.on_success_jump_label,
)
} else {
program.emit_insn_with_label_dependency(
Insn::Lt {
lhs: lhs_reg,
rhs: rhs_reg,
target_pc: target_jump,
target_pc: condition_metadata.on_failure_jump_label,
},
target_jump,
condition_metadata.on_failure_jump_label,
)
}
}
ast::Operator::Less => {
if jump_if_true {
if condition_metadata.jump_on_success {
program.emit_insn_with_label_dependency(
Insn::Lt {
lhs: lhs_reg,
rhs: rhs_reg,
target_pc: target_jump,
target_pc: condition_metadata.on_success_jump_label,
},
target_jump,
condition_metadata.on_success_jump_label,
)
} else {
program.emit_insn_with_label_dependency(
Insn::Ge {
lhs: lhs_reg,
rhs: rhs_reg,
target_pc: target_jump,
target_pc: condition_metadata.on_failure_jump_label,
},
target_jump,
condition_metadata.on_failure_jump_label,
)
}
}
ast::Operator::LessEquals => {
if jump_if_true {
if condition_metadata.jump_on_success {
program.emit_insn_with_label_dependency(
Insn::Le {
lhs: lhs_reg,
rhs: rhs_reg,
target_pc: target_jump,
target_pc: condition_metadata.on_success_jump_label,
},
target_jump,
condition_metadata.on_success_jump_label,
)
} else {
program.emit_insn_with_label_dependency(
Insn::Gt {
lhs: lhs_reg,
rhs: rhs_reg,
target_pc: target_jump,
target_pc: condition_metadata.on_failure_jump_label,
},
target_jump,
condition_metadata.on_failure_jump_label,
)
}
}
ast::Operator::Equals => {
if jump_if_true {
if condition_metadata.jump_on_success {
program.emit_insn_with_label_dependency(
Insn::Eq {
lhs: lhs_reg,
rhs: rhs_reg,
target_pc: target_jump,
target_pc: condition_metadata.on_success_jump_label,
},
target_jump,
condition_metadata.on_success_jump_label,
)
} else {
program.emit_insn_with_label_dependency(
Insn::Ne {
lhs: lhs_reg,
rhs: rhs_reg,
target_pc: target_jump,
target_pc: condition_metadata.on_failure_jump_label,
},
target_jump,
condition_metadata.on_failure_jump_label,
)
}
}
ast::Operator::NotEquals => {
if jump_if_true {
if condition_metadata.jump_on_success {
program.emit_insn_with_label_dependency(
Insn::Ne {
lhs: lhs_reg,
rhs: rhs_reg,
target_pc: target_jump,
target_pc: condition_metadata.on_success_jump_label,
},
target_jump,
condition_metadata.on_success_jump_label,
)
} else {
program.emit_insn_with_label_dependency(
Insn::Eq {
lhs: lhs_reg,
rhs: rhs_reg,
target_pc: target_jump,
target_pc: condition_metadata.on_failure_jump_label,
},
target_jump,
condition_metadata.on_failure_jump_label,
)
}
}
@@ -373,25 +409,186 @@ fn translate_condition_expr(
value: int_value,
dest: reg,
});
if target_jump < 0 {
program.add_label_dependency(target_jump, program.offset());
if condition_metadata.jump_on_success {
program.emit_insn_with_label_dependency(
Insn::If {
reg,
target_pc: condition_metadata.on_success_jump_label,
null_reg: reg,
},
condition_metadata.on_success_jump_label,
)
} else {
program.emit_insn_with_label_dependency(
Insn::IfNot {
reg,
target_pc: condition_metadata.on_failure_jump_label,
null_reg: reg,
},
condition_metadata.on_failure_jump_label,
)
}
program.emit_insn(Insn::IfNot {
reg,
target_pc: target_jump,
null_reg: reg,
});
} else {
crate::bail_parse_error!("unsupported literal type in condition");
crate::bail_parse_error!("Parse error: unsupported literal type in condition");
}
}
_ => todo!(),
ast::Literal::String(string) => {
let reg = program.alloc_register();
program.emit_insn(Insn::String8 {
value: string.clone(),
dest: reg,
});
if condition_metadata.jump_on_success {
program.emit_insn_with_label_dependency(
Insn::If {
reg,
target_pc: condition_metadata.on_success_jump_label,
null_reg: reg,
},
condition_metadata.on_success_jump_label,
)
} else {
program.emit_insn_with_label_dependency(
Insn::IfNot {
reg,
target_pc: condition_metadata.on_failure_jump_label,
null_reg: reg,
},
condition_metadata.on_failure_jump_label,
)
}
}
unimpl => todo!("literal {:?} not implemented", unimpl),
},
ast::Expr::InList {
lhs: _,
not: _,
rhs: _,
} => {}
ast::Expr::InList { lhs, not, rhs } => {
// lhs is e.g. a column reference
// rhs is an Option<Vec<Expr>>
// If rhs is None, it means the IN expression is always false, i.e. tbl.id IN ().
// If rhs is Some, it means the IN expression has a list of values to compare against, e.g. tbl.id IN (1, 2, 3).
//
// The IN expression is equivalent to a series of OR expressions.
// For example, `a IN (1, 2, 3)` is equivalent to `a = 1 OR a = 2 OR a = 3`.
// The NOT IN expression is equivalent to a series of AND expressions.
// For example, `a NOT IN (1, 2, 3)` is equivalent to `a != 1 AND a != 2 AND a != 3`.
//
// SQLite typically optimizes IN expressions to use a binary search on an ephemeral index if there are many values.
// For now we don't have the plumbing to do that, so we'll just emit a series of comparisons,
// which is what SQLite also does for small lists of values.
// TODO: Let's refactor this later to use a more efficient implementation conditionally based on the number of values.
let lhs_reg = program.alloc_register();
let _ = translate_expr(program, select, lhs, lhs_reg, cursor_hint)?;
if rhs.is_none() {
// If rhs is None, IN expressions are always false and NOT IN expressions are always true.
if *not {
// On a trivially successful NOT IN () expression we can only jump to the success label if 'jump_on_success' is true; otherwise me must fall through.
// This is because in a more complex condition we might need to evaluate the rest of the condition.
// Note that we are already breaking up our WHERE clauses into a series of terms at "AND" boundaries, so right now we won't be running into cases where success-jumping would be incorrect,
// but once we have e.g. parenthesization and more complex conditions, not having this 'if' here would introduce a bug.
if condition_metadata.jump_on_success {
program.emit_insn_with_label_dependency(
Insn::Goto {
target_pc: condition_metadata.on_success_jump_label,
},
condition_metadata.on_success_jump_label,
);
}
} else {
program.emit_insn_with_label_dependency(
Insn::Goto {
target_pc: condition_metadata.on_failure_jump_label,
},
condition_metadata.on_failure_jump_label,
);
}
return Ok(());
}
let rhs = rhs.as_ref().unwrap();
// The difference between a local jump and an "upper level" jump is that for example in this case:
// WHERE foo IN (1,2,3) OR bar = 5,
// we can immediately jump to the success label of the ENTIRE CONDITION if foo = 1, foo = 2, or foo = 3 without evaluating the bar = 5 condition.
// This is why in Binary-OR expressions we set jump_on_success to true for the first condition.
// However, in this example:
// WHERE foo IN (1,2,3) AND bar = 5,
// we can't jump to the success label of the entire condition foo = 1, foo = 2, or foo = 3, because we still need to evaluate the bar = 5 condition later.
// This is why in that case we just jump over the rest of the IN conditions in this "local" branch which evaluates the IN condition.
let success_label = if condition_metadata.jump_on_success {
condition_metadata.on_success_jump_label
} else {
program.allocate_label()
};
if !*not {
// If it's an IN expression, we need to jump to the success label if any of the conditions are true.
for (i, expr) in rhs.iter().enumerate() {
let rhs_reg = program.alloc_register();
let last_condition = i == rhs.len() - 1;
let _ = translate_expr(program, select, expr, rhs_reg, cursor_hint)?;
// If this is not the last condition, we need to jump to the success label if the condition is true.
if !last_condition {
program.emit_insn_with_label_dependency(
Insn::Eq {
lhs: lhs_reg,
rhs: rhs_reg,
target_pc: success_label,
},
success_label,
);
} else {
// If this is the last condition, we need to jump to the failure label if there is no match.
program.emit_insn_with_label_dependency(
Insn::Ne {
lhs: lhs_reg,
rhs: rhs_reg,
target_pc: condition_metadata.on_failure_jump_label,
},
condition_metadata.on_failure_jump_label,
);
}
}
// If we got here, then the last condition was a match, so we jump to the success label if 'jump_on_success' is true.
// If not, we can just fall through without emitting an unnecessary instruction.
if condition_metadata.jump_on_success {
program.emit_insn_with_label_dependency(
Insn::Goto {
target_pc: condition_metadata.on_success_jump_label,
},
condition_metadata.on_success_jump_label,
);
}
} else {
// If it's a NOT IN expression, we need to jump to the failure label if any of the conditions are true.
for expr in rhs.iter() {
let rhs_reg = program.alloc_register();
let _ = translate_expr(program, select, expr, rhs_reg, cursor_hint)?;
program.emit_insn_with_label_dependency(
Insn::Eq {
lhs: lhs_reg,
rhs: rhs_reg,
target_pc: condition_metadata.on_failure_jump_label,
},
condition_metadata.on_failure_jump_label,
);
}
// If we got here, then none of the conditions were a match, so we jump to the success label if 'jump_on_success' is true.
// If not, we can just fall through without emitting an unnecessary instruction.
if condition_metadata.jump_on_success {
program.emit_insn_with_label_dependency(
Insn::Goto {
target_pc: condition_metadata.on_success_jump_label,
},
condition_metadata.on_success_jump_label,
);
}
}
if !condition_metadata.jump_on_success {
program.resolve_label(success_label, program.offset());
}
}
ast::Expr::Like {
lhs,
not,
@@ -422,24 +619,46 @@ fn translate_condition_expr(
ast::LikeOperator::Match => todo!(),
ast::LikeOperator::Regexp => todo!(),
}
if jump_if_true ^ *not {
program.emit_insn_with_label_dependency(
Insn::If {
reg: cur_reg,
target_pc: target_jump,
null_reg: cur_reg,
},
target_jump,
)
if !*not {
if condition_metadata.jump_on_success {
program.emit_insn_with_label_dependency(
Insn::If {
reg: cur_reg,
target_pc: condition_metadata.on_success_jump_label,
null_reg: cur_reg,
},
condition_metadata.on_success_jump_label,
);
} else {
program.emit_insn_with_label_dependency(
Insn::IfNot {
reg: cur_reg,
target_pc: condition_metadata.on_failure_jump_label,
null_reg: cur_reg,
},
condition_metadata.on_failure_jump_label,
);
}
} else {
program.emit_insn_with_label_dependency(
Insn::IfNot {
reg: cur_reg,
target_pc: target_jump,
null_reg: cur_reg,
},
target_jump,
)
if condition_metadata.jump_on_success {
program.emit_insn_with_label_dependency(
Insn::IfNot {
reg: cur_reg,
target_pc: condition_metadata.on_success_jump_label,
null_reg: cur_reg,
},
condition_metadata.on_success_jump_label,
);
} else {
program.emit_insn_with_label_dependency(
Insn::If {
reg: cur_reg,
target_pc: condition_metadata.on_failure_jump_label,
null_reg: cur_reg,
},
condition_metadata.on_failure_jump_label,
);
}
}
}
_ => todo!("op {:?} not implemented", expr),

View File

@@ -49,7 +49,7 @@ do_execsql_test where-clause-no-table-unary-false {
} {}
do_execsql_test select-where-and {
select first_name, age from users where first_name = 'Jamie' and age > 80
select first_name, age from users where first_name = 'Jamie' and age > 80
} {Jamie|94
Jamie|88
Jamie|99
@@ -59,7 +59,7 @@ Jamie|88
}
do_execsql_test select-where-or {
select first_name, age from users where first_name = 'Jamie' and age > 80
select first_name, age from users where first_name = 'Jamie' and age > 80
} {Jamie|94
Jamie|88
Jamie|99
@@ -109,3 +109,95 @@ do_execsql_test where-float-int {
do_execsql_test where-multiple-and {
select * from products where price > 50 and name != 'sweatshirt' and price < 75;
} {6|shorts|70.0}
do_execsql_test where-multiple-or {
select * from products where price > 75 or name = 'shirt' or name = 'coat';
} {1|hat|79.0
2|cap|82.0
3|shirt|18.0
7|jeans|78.0
8|sneakers|82.0
10|coat|33.0
11|accessories|81.0}
do_execsql_test where_in_list {
select * from products where name in ('hat', 'sweatshirt', 'shorts');
} {1|hat|79.0
5|sweatshirt|74.0
6|shorts|70.0}
do_execsql_test where_not_in_list {
select * from products where name not in ('hat', 'sweatshirt', 'shorts');
} {2|cap|82.0
3|shirt|18.0
4|sweater|25.0
7|jeans|78.0
8|sneakers|82.0
9|boots|1.0
10|coat|33.0
11|accessories|81.0}
do_execsql_test where_in_list_or_another_list {
select * from products where name in ('hat', 'sweatshirt', 'shorts') or price in (81.0, 82.0);
} {1|hat|79.0
2|cap|82.0
5|sweatshirt|74.0
6|shorts|70.0
8|sneakers|82.0
11|accessories|81.0}
do_execsql_test where_not_in_list_and_not_in_another_list {
select * from products where name not in ('hat', 'sweatshirt', 'shorts') and price not in (81.0, 82.0, 78.0, 1.0, 33.0);
} {3|shirt|18.0
4|sweater|25.0}
do_execsql_test where_in_list_or_not_in_another_list {
select * from products where name in ('hat', 'sweatshirt', 'shorts') or price not in (82.0, 18.0, 78.0, 33.0, 81.0);
} {1|hat|79.0
4|sweater|25.0
5|sweatshirt|74.0
6|shorts|70.0
9|boots|1.0}
do_execsql_test where_in_empty_list {
select * from products where name in ();
} {}
do_execsql_test where_not_in_empty_list {
select * from products where name not in ();
} {1|hat|79.0
2|cap|82.0
3|shirt|18.0
4|sweater|25.0
5|sweatshirt|74.0
6|shorts|70.0
7|jeans|78.0
8|sneakers|82.0
9|boots|1.0
10|coat|33.0
11|accessories|81.0}
do_execsql_test where_name_in_list_and_price_gt_70_or_name_exactly_boots {
select * from products where name in ('hat', 'sweatshirt', 'shorts') and price > 70 or name = 'boots';
} {1|hat|79.0
5|sweatshirt|74.0
9|boots|1.0}
do_execsql_test where_name_in_list_or_price_gt_70_and_name_like_shirt {
select * from products where name in ('hat', 'shorts') or price > 70 and name like '%shirt%';
} {1|hat|79.0
5|sweatshirt|74.0
6|shorts|70.0}
do_execsql_test where_name_not_in_list_or_name_eq_shirt {
select * from products where name not in ('shirt', 'boots') or name = 'shirt';
} {1|hat|79.0
2|cap|82.0
3|shirt|18.0
4|sweater|25.0
5|sweatshirt|74.0
6|shorts|70.0
7|jeans|78.0
8|sneakers|82.0
10|coat|33.0
11|accessories|81.0}