Merge 'Fix unsafe calls to mark_last_insn_constant()' from Jussi Saurio

Bug found by @alpaylan and described here: https://github.com/tursodatab
ase/limbo/issues/662#issuecomment-2589756954
The reason is that we were, as a bytecode optimization, marking
instructions as constant in places where it was not safe to do so. It is
ONLY safe to mark an instruction as constant if the register allocated
for the result of that instruction is allocated during the translation
of that expression.
In the case of e.g. `Unary(Minus, Number)` we were doing the following:
```
limbo> CREATE TABLE likable_fire (captivating_insolacion INTEGER,mirthful_shihab REAL);
limbo> INSERT INTO likable_fire VALUES (8358895602713329453, -7435384732.72567), (4233751081339504981, -6653311696.714637);
limbo> select * from likable_fire;
8358895602713329453|-6653311696.714637
4233751081339504981|-6653311696.714637
limbo> explain INSERT INTO likable_fire VALUES (8358895602713329453, -7435384732.72567), (4233751081339504981, -6653311696.714637);
addr  opcode             p1    p2    p3    p4             p5  comment
----  -----------------  ----  ----  ----  -------------  --  -------
0     Init               0     16    0                    0   Start at 16
1     InitCoroutine      5     7     2                    0
2     Integer            1783463725  2     0                    0   r[2]=8358895602713329453
3     Yield              5     15    0                    0
4     Integer            1454033237  2     0                    0   r[2]=4233751081339504981
5     Yield              5     15    0                    0
6     EndCoroutine       5     0     0                    0
7     OpenWriteAsync     0     2     0                    0
8     OpenWriteAwait     0     0     0                    0
9     Yield              5     15    0                    0
10    NewRowId           0     1     0                    0
11    MakeRecord         2     2     4                    0   r[4]=mkrec(r[2..3])
12    InsertAsync        0     4     1                    0
13    InsertAwait        0     0     0                    0
14    Goto               0     9     0                    0
15    Halt               0     0     0                    0
16    Transaction        0     1     0                    0

<!-- Reg 3 evaluated in a loop, but marked as "constant" twice, resulting in the first value being overwritten! -->
<!-- The reason mark_last_insn_constant() breaks this is because different values are being evaluated into the -->
<!-- Same register in a loop -->
17    Real               0     3     0     -7435384732.72567  0   r[3]=-7435384732.72567
18    Real               0     3     0     -6653311696.714637  0   r[3]=-6653311696.714637

19    Goto               0     1     0                    0
```
This PR removes `.mark_last_insn_constant()` from the affected places.
We should think about a small abstraction to prevent this going forward

Closes #679
This commit is contained in:
Pekka Enberg
2025-01-14 17:15:38 +02:00
2 changed files with 67 additions and 3 deletions

View File

@@ -1651,7 +1651,6 @@ pub fn translate_expr(
dest: target_register,
});
}
program.mark_last_insn_constant();
Ok(target_register)
}
(UnaryOperator::Negative | UnaryOperator::Positive, _) => {
@@ -1690,7 +1689,6 @@ pub fn translate_expr(
dest: target_register,
});
}
program.mark_last_insn_constant();
Ok(target_register)
}
(UnaryOperator::BitwiseNot, ast::Expr::Literal(ast::Literal::Null)) => {
@@ -1698,7 +1696,6 @@ pub fn translate_expr(
dest: target_register,
dest_end: None,
});
program.mark_last_insn_constant();
Ok(target_register)
}
(UnaryOperator::BitwiseNot, _) => {

View File

@@ -110,6 +110,73 @@ mod tests {
Ok(())
}
#[test]
/// There was a regression with inserting multiple rows with a column containing an unary operator :)
/// https://github.com/tursodatabase/limbo/pull/679
fn test_regression_multi_row_insert() -> anyhow::Result<()> {
let _ = env_logger::try_init();
let tmp_db = TempDatabase::new("CREATE TABLE test (x REAL);");
let conn = tmp_db.connect_limbo();
let insert_query = "INSERT INTO test VALUES (-2), (-3), (-1)";
let list_query = "SELECT * FROM test";
match conn.query(insert_query) {
Ok(Some(ref mut rows)) => loop {
match rows.next_row()? {
StepResult::IO => {
tmp_db.io.run_once()?;
}
StepResult::Done => break,
_ => unreachable!(),
}
},
Ok(None) => {}
Err(err) => {
eprintln!("{}", err);
}
};
do_flush(&conn, &tmp_db)?;
let mut current_read_index = 1;
let expected_ids = vec![-3, -2, -1];
let mut actual_ids = Vec::new();
match conn.query(list_query) {
Ok(Some(ref mut rows)) => loop {
match rows.next_row()? {
StepResult::Row(row) => {
let first_value = row.values.first().expect("missing id");
let id = match first_value {
Value::Float(f) => *f as i32,
_ => panic!("expected float"),
};
actual_ids.push(id);
current_read_index += 1;
}
StepResult::IO => {
tmp_db.io.run_once()?;
}
StepResult::Interrupt => break,
StepResult::Done => break,
StepResult::Busy => {
panic!("Database is busy");
}
}
},
Ok(None) => {}
Err(err) => {
eprintln!("{}", err);
}
}
assert_eq!(current_read_index, 4); // Verify we read all rows
// sort ids
actual_ids.sort();
assert_eq!(actual_ids, expected_ids);
Ok(())
}
#[test]
fn test_simple_overflow_page() -> anyhow::Result<()> {
let _ = env_logger::try_init();