Files
turso/core/translate/main_loop.rs
Pekka Enberg 7f2525ac27 Merge 'Implement create virtual table using vtab modules, more work on virtual tables' from Preston Thorpe
This PR started out as one to improve the API of extensions but I ended
up building on top of this quite a bit and it just kept going. Sorry
this one is so large but there wasn't really a good stopping point, as
it kept leaving stuff in broken states.
**VCreate**: Support for `CREATE VIRTUAL TABLE t USING vtab_module`
**VUpdate**: Support for `INSERT` and `DELETE` methods on virtual
tables.
Sqlite uses `xUpdate` function with the `VUpdate` opcode to handle all
insert/update/delete functionality in virtual tables..
have to just document that:
```
if args[0] == NULL:  INSERT args[1] the values in args[2..]

if args[1] == NULL: DELETE args[0]

if args[0] != NULL && len(args) > 2: Update values=args[2..]  rowid=args[0]
```
I know I asked @jussisaurio on discord about this already, but it just
sucked so bad that I added some internal translation so we could expose
a [nice API](https://github.com/tursodatabase/limbo/pull/996/files#diff-
3e8f8a660b11786745b48b528222d11671e9f19fa00a032a4eefb5412e8200d1R54) and
handle the logic ourselves while keeping with sqlite's opcodes.
I'll change it back if I have to, I just thought it was genuinely awful
to have to rely on comments to explain all that to extension authors.
The included extension is not meant to be a legitimately useful one, it
is there for testing purposes. I did something similar in #960 using a
test extension, so I figure when they are both merged, I will go back
and combine them into one since you can do many kinds at once, and that
way it will reduce the amount of crates and therefore compile time.
1. Remaining opcodes.
2. `UPDATE` (when we support the syntax)
3. `xConnect` - expose API for a DB connection to a vtab so it can
perform arbitrary queries.

Closes #996
2025-02-25 15:31:12 +02:00

834 lines
37 KiB
Rust

use limbo_sqlite3_parser::ast;
use crate::{
schema::Table,
translate::result_row::emit_select_result,
vdbe::{
builder::{CursorType, ProgramBuilder},
insn::{CmpInsFlags, Insn},
BranchOffset,
},
Result,
};
use super::{
aggregation::translate_aggregation_step,
emitter::{OperationMode, TranslateCtx},
expr::{translate_condition_expr, translate_expr, ConditionMetadata},
order_by::{order_by_sorter_insert, sorter_insert},
plan::{
IterationDirection, Operation, Search, SelectPlan, SelectQueryType, TableReference,
WhereTerm,
},
};
// Metadata for handling LEFT JOIN operations
#[derive(Debug)]
pub struct LeftJoinMetadata {
// integer register that holds a flag that is set to true if the current row has a match for the left join
pub reg_match_flag: usize,
// label for the instruction that sets the match flag to true
pub label_match_flag_set_true: BranchOffset,
// label for the instruction that checks if the match flag is true
pub label_match_flag_check_value: BranchOffset,
}
/// Jump labels for each loop in the query's main execution loop
#[derive(Debug, Clone, Copy)]
pub struct LoopLabels {
/// jump to the start of the loop body
loop_start: BranchOffset,
/// jump to the NextAsync instruction (or equivalent)
next: BranchOffset,
/// jump to the end of the loop, exiting it
loop_end: BranchOffset,
}
impl LoopLabels {
pub fn new(program: &mut ProgramBuilder) -> Self {
Self {
loop_start: program.allocate_label(),
next: program.allocate_label(),
loop_end: program.allocate_label(),
}
}
}
/// Initialize resources needed for the source operators (tables, joins, etc)
pub fn init_loop(
program: &mut ProgramBuilder,
t_ctx: &mut TranslateCtx,
tables: &[TableReference],
mode: &OperationMode,
) -> Result<()> {
assert!(
t_ctx.meta_left_joins.len() == tables.len(),
"meta_left_joins length does not match tables length"
);
for (table_index, table) in tables.iter().enumerate() {
// Initialize bookkeeping for OUTER JOIN
if let Some(join_info) = table.join_info.as_ref() {
if join_info.outer {
let lj_metadata = LeftJoinMetadata {
reg_match_flag: program.alloc_register(),
label_match_flag_set_true: program.allocate_label(),
label_match_flag_check_value: program.allocate_label(),
};
t_ctx.meta_left_joins[table_index] = Some(lj_metadata);
}
}
match &table.op {
Operation::Scan { .. } => {
let cursor_id = program.alloc_cursor_id(
Some(table.identifier.clone()),
match &table.table {
Table::BTree(_) => CursorType::BTreeTable(table.btree().unwrap().clone()),
Table::Virtual(_) => {
CursorType::VirtualTable(table.virtual_table().unwrap().clone())
}
other => panic!("Invalid table reference type in Scan: {:?}", other),
},
);
match (mode, &table.table) {
(OperationMode::SELECT, Table::BTree(_)) => {
let root_page = table.btree().unwrap().root_page;
program.emit_insn(Insn::OpenReadAsync {
cursor_id,
root_page,
});
program.emit_insn(Insn::OpenReadAwait {});
}
(OperationMode::DELETE, Table::BTree(_)) => {
let root_page = table.btree().unwrap().root_page;
program.emit_insn(Insn::OpenWriteAsync {
cursor_id,
root_page,
});
program.emit_insn(Insn::OpenWriteAwait {});
}
(OperationMode::SELECT, Table::Virtual(_)) => {
program.emit_insn(Insn::VOpenAsync { cursor_id });
program.emit_insn(Insn::VOpenAwait {});
}
(OperationMode::DELETE, Table::Virtual(_)) => {
program.emit_insn(Insn::VOpenAsync { cursor_id });
program.emit_insn(Insn::VOpenAwait {});
}
_ => {
unimplemented!()
}
}
}
Operation::Search(search) => {
let table_cursor_id = program.alloc_cursor_id(
Some(table.identifier.clone()),
CursorType::BTreeTable(table.btree().unwrap().clone()),
);
match mode {
OperationMode::SELECT => {
program.emit_insn(Insn::OpenReadAsync {
cursor_id: table_cursor_id,
root_page: table.table.get_root_page(),
});
program.emit_insn(Insn::OpenReadAwait {});
}
OperationMode::DELETE => {
program.emit_insn(Insn::OpenWriteAsync {
cursor_id: table_cursor_id,
root_page: table.table.get_root_page(),
});
program.emit_insn(Insn::OpenWriteAwait {});
}
_ => {
unimplemented!()
}
}
if let Search::IndexSearch { index, .. } = search {
let index_cursor_id = program.alloc_cursor_id(
Some(index.name.clone()),
CursorType::BTreeIndex(index.clone()),
);
match mode {
OperationMode::SELECT => {
program.emit_insn(Insn::OpenReadAsync {
cursor_id: index_cursor_id,
root_page: index.root_page,
});
program.emit_insn(Insn::OpenReadAwait);
}
OperationMode::DELETE => {
program.emit_insn(Insn::OpenWriteAsync {
cursor_id: index_cursor_id,
root_page: index.root_page,
});
program.emit_insn(Insn::OpenWriteAwait {});
}
_ => {
unimplemented!()
}
}
}
}
_ => {}
}
}
Ok(())
}
/// Set up the main query execution loop
/// For example in the case of a nested table scan, this means emitting the RewindAsync instruction
/// for all tables involved, outermost first.
pub fn open_loop(
program: &mut ProgramBuilder,
t_ctx: &mut TranslateCtx,
tables: &[TableReference],
predicates: &[WhereTerm],
) -> Result<()> {
for (table_index, table) in tables.iter().enumerate() {
let LoopLabels {
loop_start,
loop_end,
next,
} = *t_ctx
.labels_main_loop
.get(table_index)
.expect("table has no loop labels");
// Each OUTER JOIN has a "match flag" that is initially set to false,
// and is set to true when a match is found for the OUTER JOIN.
// This is used to determine whether to emit actual columns or NULLs for the columns of the right table.
if let Some(join_info) = table.join_info.as_ref() {
if join_info.outer {
let lj_meta = t_ctx.meta_left_joins[table_index].as_ref().unwrap();
program.emit_insn(Insn::Integer {
value: 0,
dest: lj_meta.reg_match_flag,
});
}
}
match &table.op {
Operation::Subquery { plan, .. } => {
let (yield_reg, coroutine_implementation_start) = match &plan.query_type {
SelectQueryType::Subquery {
yield_reg,
coroutine_implementation_start,
} => (*yield_reg, *coroutine_implementation_start),
_ => unreachable!("Subquery operator with non-subquery query type"),
};
// In case the subquery is an inner loop, it needs to be reinitialized on each iteration of the outer loop.
program.emit_insn(Insn::InitCoroutine {
yield_reg,
jump_on_definition: BranchOffset::Offset(0),
start_offset: coroutine_implementation_start,
});
program.resolve_label(loop_start, program.offset());
// A subquery within the main loop of a parent query has no cursor, so instead of advancing the cursor,
// it emits a Yield which jumps back to the main loop of the subquery itself to retrieve the next row.
// When the subquery coroutine completes, this instruction jumps to the label at the top of the termination_label_stack,
// which in this case is the end of the Yield-Goto loop in the parent query.
program.emit_insn(Insn::Yield {
yield_reg,
end_offset: loop_end,
});
// These are predicates evaluated outside of the subquery,
// so they are translated here.
// E.g. SELECT foo FROM (SELECT bar as foo FROM t1) sub WHERE sub.foo > 10
for cond in predicates
.iter()
.filter(|cond| cond.should_eval_at_loop(table_index))
{
let jump_target_when_true = program.allocate_label();
let condition_metadata = ConditionMetadata {
jump_if_condition_is_true: false,
jump_target_when_true,
jump_target_when_false: next,
};
translate_condition_expr(
program,
tables,
&cond.expr,
condition_metadata,
&t_ctx.resolver,
)?;
program.resolve_label(jump_target_when_true, program.offset());
}
}
Operation::Scan { iter_dir } => {
let cursor_id = program.resolve_cursor_id(&table.identifier);
if !matches!(&table.table, Table::Virtual(_)) {
if iter_dir
.as_ref()
.is_some_and(|dir| *dir == IterationDirection::Backwards)
{
program.emit_insn(Insn::LastAsync { cursor_id });
} else {
program.emit_insn(Insn::RewindAsync { cursor_id });
}
}
match &table.table {
Table::BTree(_) => program.emit_insn(
if iter_dir
.as_ref()
.is_some_and(|dir| *dir == IterationDirection::Backwards)
{
Insn::LastAwait {
cursor_id,
pc_if_empty: loop_end,
}
} else {
Insn::RewindAwait {
cursor_id,
pc_if_empty: loop_end,
}
},
),
Table::Virtual(ref table) => {
let start_reg = program
.alloc_registers(table.args.as_ref().map(|a| a.len()).unwrap_or(0));
let mut cur_reg = start_reg;
let args = match table.args.as_ref() {
Some(args) => args,
None => &vec![],
};
for arg in args {
let reg = cur_reg;
cur_reg += 1;
let _ =
translate_expr(program, Some(tables), arg, reg, &t_ctx.resolver)?;
}
program.emit_insn(Insn::VFilter {
cursor_id,
pc_if_empty: loop_end,
arg_count: table.args.as_ref().map_or(0, |args| args.len()),
args_reg: start_reg,
});
}
other => panic!("Unsupported table reference type: {:?}", other),
}
program.resolve_label(loop_start, program.offset());
for cond in predicates
.iter()
.filter(|cond| cond.should_eval_at_loop(table_index))
{
let jump_target_when_true = program.allocate_label();
let condition_metadata = ConditionMetadata {
jump_if_condition_is_true: false,
jump_target_when_true,
jump_target_when_false: next,
};
translate_condition_expr(
program,
tables,
&cond.expr,
condition_metadata,
&t_ctx.resolver,
)?;
program.resolve_label(jump_target_when_true, program.offset());
}
}
Operation::Search(search) => {
let table_cursor_id = program.resolve_cursor_id(&table.identifier);
// Open the loop for the index search.
// Rowid equality point lookups are handled with a SeekRowid instruction which does not loop, since it is a single row lookup.
if !matches!(search, Search::RowidEq { .. }) {
let index_cursor_id = if let Search::IndexSearch { index, .. } = search {
Some(program.resolve_cursor_id(&index.name))
} else {
None
};
let cmp_reg = program.alloc_register();
let (cmp_expr, cmp_op) = match search {
Search::IndexSearch {
cmp_expr, cmp_op, ..
} => (cmp_expr, cmp_op),
Search::RowidSearch { cmp_expr, cmp_op } => (cmp_expr, cmp_op),
Search::RowidEq { .. } => unreachable!(),
};
// TODO this only handles ascending indexes
match cmp_op {
ast::Operator::Equals
| ast::Operator::Greater
| ast::Operator::GreaterEquals => {
translate_expr(
program,
Some(tables),
&cmp_expr.expr,
cmp_reg,
&t_ctx.resolver,
)?;
}
ast::Operator::Less | ast::Operator::LessEquals => {
program.emit_insn(Insn::Null {
dest: cmp_reg,
dest_end: None,
});
}
_ => unreachable!(),
}
// If we try to seek to a key that is not present in the table/index, we exit the loop entirely.
program.emit_insn(match cmp_op {
ast::Operator::Equals | ast::Operator::GreaterEquals => Insn::SeekGE {
is_index: index_cursor_id.is_some(),
cursor_id: index_cursor_id.unwrap_or(table_cursor_id),
start_reg: cmp_reg,
num_regs: 1,
target_pc: loop_end,
},
ast::Operator::Greater
| ast::Operator::Less
| ast::Operator::LessEquals => Insn::SeekGT {
is_index: index_cursor_id.is_some(),
cursor_id: index_cursor_id.unwrap_or(table_cursor_id),
start_reg: cmp_reg,
num_regs: 1,
target_pc: loop_end,
},
_ => unreachable!(),
});
if *cmp_op == ast::Operator::Less || *cmp_op == ast::Operator::LessEquals {
translate_expr(
program,
Some(tables),
&cmp_expr.expr,
cmp_reg,
&t_ctx.resolver,
)?;
}
program.resolve_label(loop_start, program.offset());
// TODO: We are currently only handling ascending indexes.
// For conditions like index_key > 10, we have already sought to the first key greater than 10, and can just scan forward.
// For conditions like index_key < 10, we are at the beginning of the index, and will scan forward and emit IdxGE(10) with a conditional jump to the end.
// For conditions like index_key = 10, we have already sought to the first key greater than or equal to 10, and can just scan forward and emit IdxGT(10) with a conditional jump to the end.
// For conditions like index_key >= 10, we have already sought to the first key greater than or equal to 10, and can just scan forward.
// For conditions like index_key <= 10, we are at the beginning of the index, and will scan forward and emit IdxGT(10) with a conditional jump to the end.
// For conditions like index_key != 10, TODO. probably the optimal way is not to use an index at all.
//
// For primary key searches we emit RowId and then compare it to the seek value.
match cmp_op {
ast::Operator::Equals | ast::Operator::LessEquals => {
if let Some(index_cursor_id) = index_cursor_id {
program.emit_insn(Insn::IdxGT {
cursor_id: index_cursor_id,
start_reg: cmp_reg,
num_regs: 1,
target_pc: loop_end,
});
} else {
let rowid_reg = program.alloc_register();
program.emit_insn(Insn::RowId {
cursor_id: table_cursor_id,
dest: rowid_reg,
});
program.emit_insn(Insn::Gt {
lhs: rowid_reg,
rhs: cmp_reg,
target_pc: loop_end,
flags: CmpInsFlags::default(),
});
}
}
ast::Operator::Less => {
if let Some(index_cursor_id) = index_cursor_id {
program.emit_insn(Insn::IdxGE {
cursor_id: index_cursor_id,
start_reg: cmp_reg,
num_regs: 1,
target_pc: loop_end,
});
} else {
let rowid_reg = program.alloc_register();
program.emit_insn(Insn::RowId {
cursor_id: table_cursor_id,
dest: rowid_reg,
});
program.emit_insn(Insn::Ge {
lhs: rowid_reg,
rhs: cmp_reg,
target_pc: loop_end,
flags: CmpInsFlags::default(),
});
}
}
_ => {}
}
if let Some(index_cursor_id) = index_cursor_id {
program.emit_insn(Insn::DeferredSeek {
index_cursor_id,
table_cursor_id,
});
}
}
if let Search::RowidEq { cmp_expr } = search {
let src_reg = program.alloc_register();
translate_expr(
program,
Some(tables),
&cmp_expr.expr,
src_reg,
&t_ctx.resolver,
)?;
program.emit_insn(Insn::SeekRowid {
cursor_id: table_cursor_id,
src_reg,
target_pc: next,
});
}
for cond in predicates
.iter()
.filter(|cond| cond.should_eval_at_loop(table_index))
{
let jump_target_when_true = program.allocate_label();
let condition_metadata = ConditionMetadata {
jump_if_condition_is_true: false,
jump_target_when_true,
jump_target_when_false: next,
};
translate_condition_expr(
program,
tables,
&cond.expr,
condition_metadata,
&t_ctx.resolver,
)?;
program.resolve_label(jump_target_when_true, program.offset());
}
}
}
// Set the match flag to true if this is a LEFT JOIN.
// At this point of execution we are going to emit columns for the left table,
// and either emit columns or NULLs for the right table, depending on whether the null_flag is set
// for the right table's cursor.
if let Some(join_info) = table.join_info.as_ref() {
if join_info.outer {
let lj_meta = t_ctx.meta_left_joins[table_index].as_ref().unwrap();
program.resolve_label(lj_meta.label_match_flag_set_true, program.offset());
program.emit_insn(Insn::Integer {
value: 1,
dest: lj_meta.reg_match_flag,
});
}
}
}
Ok(())
}
/// SQLite (and so Limbo) processes joins as a nested loop.
/// The loop may emit rows to various destinations depending on the query:
/// - a GROUP BY sorter (grouping is done by sorting based on the GROUP BY keys and aggregating while the GROUP BY keys match)
/// - an ORDER BY sorter (when there is no GROUP BY, but there is an ORDER BY)
/// - an AggStep (the columns are collected for aggregation, which is finished later)
/// - a QueryResult (there is none of the above, so the loop either emits a ResultRow, or if it's a subquery, yields to the parent query)
enum LoopEmitTarget {
GroupBySorter,
OrderBySorter,
AggStep,
QueryResult,
}
/// Emits the bytecode for the inner loop of a query.
/// At this point the cursors for all tables have been opened and rewound.
pub fn emit_loop(
program: &mut ProgramBuilder,
t_ctx: &mut TranslateCtx,
plan: &mut SelectPlan,
) -> Result<()> {
// if we have a group by, we emit a record into the group by sorter.
if plan.group_by.is_some() {
return emit_loop_source(program, t_ctx, plan, LoopEmitTarget::GroupBySorter);
}
// if we DONT have a group by, but we have aggregates, we emit without ResultRow.
// we also do not need to sort because we are emitting a single row.
if !plan.aggregates.is_empty() {
return emit_loop_source(program, t_ctx, plan, LoopEmitTarget::AggStep);
}
// if we DONT have a group by, but we have an order by, we emit a record into the order by sorter.
if plan.order_by.is_some() {
return emit_loop_source(program, t_ctx, plan, LoopEmitTarget::OrderBySorter);
}
// if we have neither, we emit a ResultRow. In that case, if we have a Limit, we handle that with DecrJumpZero.
emit_loop_source(program, t_ctx, plan, LoopEmitTarget::QueryResult)
}
/// This is a helper function for inner_loop_emit,
/// which does a different thing depending on the emit target.
/// See the InnerLoopEmitTarget enum for more details.
fn emit_loop_source(
program: &mut ProgramBuilder,
t_ctx: &mut TranslateCtx,
plan: &SelectPlan,
emit_target: LoopEmitTarget,
) -> Result<()> {
match emit_target {
LoopEmitTarget::GroupBySorter => {
let group_by = plan.group_by.as_ref().unwrap();
let aggregates = &plan.aggregates;
let sort_keys_count = group_by.exprs.len();
let aggregate_arguments_count = plan
.aggregates
.iter()
.map(|agg| agg.args.len())
.sum::<usize>();
let column_count = sort_keys_count + aggregate_arguments_count;
let start_reg = program.alloc_registers(column_count);
let mut cur_reg = start_reg;
// The group by sorter rows will contain the grouping keys first. They are also the sort keys.
for expr in group_by.exprs.iter() {
let key_reg = cur_reg;
cur_reg += 1;
translate_expr(
program,
Some(&plan.table_references),
expr,
key_reg,
&t_ctx.resolver,
)?;
}
// Then we have the aggregate arguments.
for agg in aggregates.iter() {
// Here we are collecting scalars for the group by sorter, which will include
// both the group by expressions and the aggregate arguments.
// e.g. in `select u.first_name, sum(u.age) from users group by u.first_name`
// the sorter will have two scalars: u.first_name and u.age.
// these are then sorted by u.first_name, and for each u.first_name, we sum the u.age.
// the actual aggregation is done later.
for expr in agg.args.iter() {
let agg_reg = cur_reg;
cur_reg += 1;
translate_expr(
program,
Some(&plan.table_references),
expr,
agg_reg,
&t_ctx.resolver,
)?;
}
}
// TODO: although it's less often useful, SQLite does allow for expressions in the SELECT that are not part of a GROUP BY or aggregate.
// We currently ignore those and only emit the GROUP BY keys and aggregate arguments. This should be fixed.
let group_by_metadata = t_ctx.meta_group_by.as_ref().unwrap();
sorter_insert(
program,
start_reg,
column_count,
group_by_metadata.sort_cursor,
group_by_metadata.reg_sorter_key,
);
Ok(())
}
LoopEmitTarget::OrderBySorter => order_by_sorter_insert(program, t_ctx, plan),
LoopEmitTarget::AggStep => {
let num_aggs = plan.aggregates.len();
let start_reg = program.alloc_registers(num_aggs);
t_ctx.reg_agg_start = Some(start_reg);
// In planner.rs, we have collected all aggregates from the SELECT clause, including ones where the aggregate is embedded inside
// a more complex expression. Some examples: length(sum(x)), sum(x) + avg(y), sum(x) + 1, etc.
// The result of those more complex expressions depends on the final result of the aggregate, so we don't translate the complete expressions here.
// Instead, we accumulate the intermediate results of all aggreagates, and evaluate any expressions that do not contain aggregates.
for (i, agg) in plan.aggregates.iter().enumerate() {
let reg = start_reg + i;
translate_aggregation_step(
program,
&plan.table_references,
agg,
reg,
&t_ctx.resolver,
)?;
}
for (i, rc) in plan.result_columns.iter().enumerate() {
if rc.contains_aggregates {
// Do nothing, aggregates are computed above
// if this result column is e.g. something like sum(x) + 1 or length(sum(x)), we do not want to translate that (+1) or length() yet,
// it will be computed after the aggregations are finalized.
continue;
}
let reg = start_reg + num_aggs + i;
translate_expr(
program,
Some(&plan.table_references),
&rc.expr,
reg,
&t_ctx.resolver,
)?;
}
Ok(())
}
LoopEmitTarget::QueryResult => {
assert!(
plan.aggregates.is_empty(),
"We should not get here with aggregates"
);
let offset_jump_to = t_ctx
.labels_main_loop
.first()
.map(|l| l.next)
.or(t_ctx.label_main_loop_end);
emit_select_result(
program,
t_ctx,
plan,
t_ctx.label_main_loop_end,
offset_jump_to,
)?;
Ok(())
}
}
}
/// Closes the loop for a given source operator.
/// For example in the case of a nested table scan, this means emitting the NextAsync instruction
/// for all tables involved, innermost first.
pub fn close_loop(
program: &mut ProgramBuilder,
t_ctx: &mut TranslateCtx,
tables: &[TableReference],
) -> Result<()> {
// We close the loops for all tables in reverse order, i.e. innermost first.
// OPEN t1
// OPEN t2
// OPEN t3
// <do stuff>
// CLOSE t3
// CLOSE t2
// CLOSE t1
for (idx, table) in tables.iter().rev().enumerate() {
let table_index = tables.len() - idx - 1;
let loop_labels = *t_ctx
.labels_main_loop
.get(table_index)
.expect("source has no loop labels");
match &table.op {
Operation::Subquery { .. } => {
program.resolve_label(loop_labels.next, program.offset());
// A subquery has no cursor to call NextAsync on, so it just emits a Goto
// to the Yield instruction, which in turn jumps back to the main loop of the subquery,
// so that the next row from the subquery can be read.
program.emit_insn(Insn::Goto {
target_pc: loop_labels.loop_start,
});
}
Operation::Scan { iter_dir, .. } => {
program.resolve_label(loop_labels.next, program.offset());
let cursor_id = program.resolve_cursor_id(&table.identifier);
match &table.table {
Table::BTree(_) => {
if iter_dir
.as_ref()
.is_some_and(|dir| *dir == IterationDirection::Backwards)
{
program.emit_insn(Insn::PrevAsync { cursor_id });
} else {
program.emit_insn(Insn::NextAsync { cursor_id });
}
if iter_dir
.as_ref()
.is_some_and(|dir| *dir == IterationDirection::Backwards)
{
program.emit_insn(Insn::PrevAwait {
cursor_id,
pc_if_next: loop_labels.loop_start,
});
} else {
program.emit_insn(Insn::NextAwait {
cursor_id,
pc_if_next: loop_labels.loop_start,
});
}
}
Table::Virtual(_) => {
program.emit_insn(Insn::VNext {
cursor_id,
pc_if_next: loop_labels.loop_start,
});
}
other => unreachable!("Unsupported table reference type: {:?}", other),
}
}
Operation::Search(search) => {
program.resolve_label(loop_labels.next, program.offset());
// Rowid equality point lookups are handled with a SeekRowid instruction which does not loop, so there is no need to emit a NextAsync instruction.
if !matches!(search, Search::RowidEq { .. }) {
let cursor_id = match search {
Search::IndexSearch { index, .. } => program.resolve_cursor_id(&index.name),
Search::RowidSearch { .. } => program.resolve_cursor_id(&table.identifier),
Search::RowidEq { .. } => unreachable!(),
};
program.emit_insn(Insn::NextAsync { cursor_id });
program.emit_insn(Insn::NextAwait {
cursor_id,
pc_if_next: loop_labels.loop_start,
});
}
}
}
program.resolve_label(loop_labels.loop_end, program.offset());
// Handle OUTER JOIN logic. The reason this comes after the "loop end" mark is that we may need to still jump back
// and emit a row with NULLs for the right table, and then jump back to the next row of the left table.
if let Some(join_info) = table.join_info.as_ref() {
if join_info.outer {
let lj_meta = t_ctx.meta_left_joins[table_index].as_ref().unwrap();
// The left join match flag is set to 1 when there is any match on the right table
// (e.g. SELECT * FROM t1 LEFT JOIN t2 ON t1.a = t2.a).
// If the left join match flag has been set to 1, we jump to the next row on the outer table,
// i.e. continue to the next row of t1 in our example.
program.resolve_label(lj_meta.label_match_flag_check_value, program.offset());
let jump_offset = program.offset().add(3u32);
program.emit_insn(Insn::IfPos {
reg: lj_meta.reg_match_flag,
target_pc: jump_offset,
decrement_by: 0,
});
// If the left join match flag is still 0, it means there was no match on the right table,
// but since it's a LEFT JOIN, we still need to emit a row with NULLs for the right table.
// In that case, we now enter the routine that does exactly that.
// First we set the right table cursor's "pseudo null bit" on, which means any Insn::Column will return NULL
let right_cursor_id = match &table.op {
Operation::Scan { .. } => program.resolve_cursor_id(&table.identifier),
Operation::Search { .. } => program.resolve_cursor_id(&table.identifier),
_ => unreachable!(),
};
program.emit_insn(Insn::NullRow {
cursor_id: right_cursor_id,
});
// Then we jump to setting the left join match flag to 1 again,
// but this time the right table cursor will set everything to null.
// This leads to emitting a row with cols from the left + nulls from the right,
// and we will end up back in the IfPos instruction above, which will then
// check the match flag again, and since it is now 1, we will jump to the
// next row in the left table.
program.emit_insn(Insn::Goto {
target_pc: lj_meta.label_match_flag_set_true,
});
assert_eq!(program.offset(), jump_offset);
}
}
}
Ok(())
}