Merge 'fix(core/translate): fix bug with multiway joins and clean up left join implementation' from Jussi Saurio

There was a bug where this kind of query (a 3-way join with two seeks
and only one scan loop) would emit a wrong jump target for DecrJumpZero:
```
limbo> explain select u.first_name, u2.last_name, p.name from users u join users u2 on u.id=u2.id join products p on u2.id = p.id limit 3;
addr  opcode             p1    p2    p3    p4             p5  comment
----  -----------------  ----  ----  ----  -------------  --  -------
0     Init               0     21    0                    0   Start at 21
1     OpenReadAsync      0     2     0                    0   table=u, root=2
2     OpenReadAwait      0     0     0                    0
3     OpenReadAsync      1     2     0                    0   table=u2, root=2
4     OpenReadAwait      0     0     0                    0
5     OpenReadAsync      2     3     0                    0   table=p, root=3
6     OpenReadAwait      0     0     0                    0
7     RewindAsync        0     0     0                    0
8     RewindAwait        0     18    0                    0   Rewind table u
9       RowId            0     1     0                    0   r[1]=u.rowid
10      SeekRowid        1     1     18                   0   if (r[1]!=u2.rowid) goto 18
11      RowId            1     2     0                    0   r[2]=u2.rowid
12      SeekRowid        2     2     18                   0   if (r[2]!=p.rowid) goto 18
13      Column           0     1     3                    0   r[3]=u.first_name
14      Column           1     2     4                    0   r[4]=u2.last_name
15      Column           2     1     5                    0   r[5]=p.name
16      ResultRow        3     3     0                    0   output=r[3..5]
17      DecrJumpZero     6     18    0                    0   if (--r[6]==0) goto 18 <--- this should go to Halt!!!
18    NextAsync          0     0     0                    0
19    NextAwait          0     9     0                    0
20    Halt               0     0     0                    0
21    Transaction        0     0     0                    0
22    Integer            3     6     0                    0   r[6]=3
23    Goto               0     1     0                    0
```
due to incorrect label bookkeeping.
fixed the bookkeeping, plus cleaned up unnecessary crap from the left
join bookkeeping at the same time.

Closes #423
This commit is contained in:
jussisaurio
2024-12-03 19:02:28 +02:00
2 changed files with 34 additions and 30 deletions

View File

@@ -32,8 +32,6 @@ pub struct LeftJoinMetadata {
pub set_match_flag_true_label: BranchOffset,
// label for the instruction that checks if the match flag is true
pub check_match_flag_label: BranchOffset,
// label for the instruction where the program jumps to if the current row has a match for the left join
pub on_match_jump_to_label: BranchOffset,
}
// Metadata for handling ORDER BY operations
@@ -382,7 +380,6 @@ fn init_source(
match_flag_register: program.alloc_register(),
set_match_flag_true_label: program.allocate_label(),
check_match_flag_label: program.allocate_label(),
on_match_jump_to_label: program.allocate_label(),
};
metadata.left_joins.insert(*id, lj_metadata);
}
@@ -424,10 +421,7 @@ fn init_source(
let next_row_label = program.allocate_label();
if !matches!(search, Search::PrimaryKeyEq { .. }) {
// Primary key equality search is handled with a SeekRowid instruction which does not loop, since it is a single row lookup.
metadata.next_row_labels.insert(*id, next_row_label);
}
metadata.next_row_labels.insert(*id, next_row_label);
let scan_loop_body_label = program.allocate_label();
metadata.scan_loop_body_labels.push(scan_loop_body_label);
@@ -735,10 +729,7 @@ fn open_loop(
}
}
let jump_label = metadata
.next_row_labels
.get(id)
.unwrap_or(metadata.termination_label_stack.last().unwrap());
let jump_label = metadata.next_row_labels.get(id).unwrap();
if let Search::PrimaryKeyEq { cmp_expr } = search {
let src_reg = program.alloc_register();
@@ -1005,17 +996,21 @@ fn close_loop(
if *outer {
let lj_meta = metadata.left_joins.get(id).unwrap();
// If the left join match flag has been set to 1, we jump to the next row on the outer table (result row has been emitted already)
// The left join match flag is set to 1 when there is any match on the right table
// (e.g. SELECT * FROM t1 LEFT JOIN t2 ON t1.a = t2.a).
// If the left join match flag has been set to 1, we jump to the next row on the outer table,
// i.e. continue to the next row of t1 in our example.
program.resolve_label(lj_meta.check_match_flag_label, program.offset());
program.emit_insn_with_label_dependency(
Insn::IfPos {
reg: lj_meta.match_flag_register,
target_pc: lj_meta.on_match_jump_to_label,
decrement_by: 0,
},
lj_meta.on_match_jump_to_label,
);
// If not, we set the right table cursor's "pseudo null bit" on, which means any Insn::Column will return NULL
let jump_offset = program.offset() + 3;
program.emit_insn(Insn::IfPos {
reg: lj_meta.match_flag_register,
target_pc: jump_offset,
decrement_by: 0,
});
// If the left join match flag is still 0, it means there was no match on the right table,
// but since it's a LEFT JOIN, we still need to emit a row with NULLs for the right table.
// In that case, we now enter the routine that does exactly that.
// First we set the right table cursor's "pseudo null bit" on, which means any Insn::Column will return NULL
let right_cursor_id = match right.as_ref() {
SourceOperator::Scan {
table_reference, ..
@@ -1028,21 +1023,22 @@ fn close_loop(
program.emit_insn(Insn::NullRow {
cursor_id: right_cursor_id,
});
// Jump to setting the left join match flag to 1 again, but this time the right table cursor will set everything to null
// Then we jump to setting the left join match flag to 1 again,
// but this time the right table cursor will set everything to null.
// This leads to emitting a row with cols from the left + nulls from the right,
// and we will end up back in the IfPos instruction above, which will then
// check the match flag again, and since it is now 1, we will jump to the
// next row in the left table.
program.emit_insn_with_label_dependency(
Insn::Goto {
target_pc: lj_meta.set_match_flag_true_label,
},
lj_meta.set_match_flag_true_label,
);
assert!(program.offset() == jump_offset);
}
let next_row_label = if *outer {
metadata.left_joins.get(id).unwrap().on_match_jump_to_label
} else {
*metadata.next_row_labels.get(&right.id()).unwrap()
};
// This points to the NextAsync instruction of the left table
program.resolve_label(next_row_label, program.offset());
close_loop(program, left, metadata, referenced_tables)?;
Ok(())
@@ -1093,6 +1089,7 @@ fn close_loop(
search,
..
} => {
program.resolve_label(*metadata.next_row_labels.get(id).unwrap(), program.offset());
if matches!(search, Search::PrimaryKeyEq { .. }) {
// Primary key equality search is handled with a SeekRowid instruction which does not loop, so there is no need to emit a NextAsync instruction.
return Ok(());
@@ -1104,7 +1101,7 @@ fn close_loop(
}
Search::PrimaryKeyEq { .. } => unreachable!(),
};
program.resolve_label(*metadata.next_row_labels.get(id).unwrap(), program.offset());
program.emit_insn(Insn::NextAsync { cursor_id });
let jump_label = metadata.scan_loop_body_labels.pop().unwrap();
program.emit_insn_with_label_dependency(

View File

@@ -170,6 +170,13 @@ do_execsql_test four-way-inner-join {
select u1.first_name, u2.first_name, u3.first_name, u4.first_name from users u1 join users u2 on u1.id = u2.id join users u3 on u2.id = u3.id + 1 join users u4 on u3.id = u4.id + 1 limit 1;
} {Tommy|Tommy|Cindy|Jamie}
# regression test for case where 3-way join that used 1 scan and 2 seeks (point lookups) was buggy due to incorrect jump opcodes
do_execsql_test three-way-inner-join-with-two-seeks {
select * from users u join users u2 on u.id=u2.id join products p on u2.id = p.id limit 3;
} {"1|Jamie|Foster|dylan00@example.com|496-522-9493|62375 Johnson Rest Suite 322|West Lauriestad|IL|35865|94|1|Jamie|Foster|dylan00@example.com|496-522-9493|62375 Johnson Rest Suite 322|West Lauriestad|IL|35865|94|1|hat|79.0
2|Cindy|Salazar|williamsrebecca@example.com|287-934-1135|75615 Stacey Shore|South Stephanie|NC|85181|37|2|Cindy|Salazar|williamsrebecca@example.com|287-934-1135|75615 Stacey Shore|South Stephanie|NC|85181|37|2|cap|82.0
3|Tommy|Perry|warechristopher@example.org|001-288-554-8139x0276|2896 Paul Fall Apt. 972|Michaelborough|VA|15691|18|3|Tommy|Perry|warechristopher@example.org|001-288-554-8139x0276|2896 Paul Fall Apt. 972|Michaelborough|VA|15691|18|3|shirt|18.0"}
do_execsql_test leftjoin-innerjoin-where {
select u.first_name, p.name, p2.name from users u left join products p on p.name = u.first_name join products p2 on length(p2.name) > 8 where u.first_name = 'Franklin';
} {Franklin||sweatshirt