From 8f202e80155bacfb00310a46030fa39a4b6b8c99 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Tue, 1 Apr 2025 19:42:44 -0300 Subject: [PATCH 01/56] separate memory tests to track large blob insertions --- Makefile | 6 +- testing/cli_tests/memory.py | 113 ++++++++++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+), 1 deletion(-) create mode 100755 testing/cli_tests/memory.py diff --git a/Makefile b/Makefile index 5202112d1..46ef06c98 100644 --- a/Makefile +++ b/Makefile @@ -62,7 +62,7 @@ limbo-wasm: cargo build --package limbo-wasm --target wasm32-wasi .PHONY: limbo-wasm -test: limbo test-compat test-vector test-sqlite3 test-shell test-extensions +test: limbo test-compat test-vector test-sqlite3 test-shell test-extensions test-memory .PHONY: test test-extensions: limbo @@ -94,6 +94,10 @@ test-json: SQLITE_EXEC=$(SQLITE_EXEC) ./testing/json.test .PHONY: test-json +test-memory: + SQLITE_EXEC=$(SQLITE_EXEC) ./testing/cli_tests/memory.py +.PHONY: test-memory + clickbench: ./perf/clickbench/benchmark.sh .PHONY: clickbench diff --git a/testing/cli_tests/memory.py b/testing/cli_tests/memory.py new file mode 100755 index 000000000..e96df3475 --- /dev/null +++ b/testing/cli_tests/memory.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python3 +import os +from test_limbo_cli import TestLimboShell + + +sqlite_exec = "./target/debug/limbo" +sqlite_flags = os.getenv("SQLITE_FLAGS", "-q").split(" ") + + +def validate_with_expected(result: str, expected: str): + return (expected in result, expected) + + +def stub_memory_test( + limbo: TestLimboShell, + name: str, + blob_size: int = 1024**2, + vals: int = 100, + blobs: bool = True, +): + # zero_blob_size = 1024 **2 + zero_blob = "0" * blob_size * 2 + # vals = 100 + big_stmt = ["CREATE TABLE temp (t1 BLOB, t2 INTEGER);"] + big_stmt = big_stmt + [ + f"INSERT INTO temp (t1) VALUES (zeroblob({blob_size}));" + if i % 2 == 0 and blobs + else f"INSERT INTO temp (t2) VALUES ({i});" + for i in range(vals * 2) + ] + expected = [] + for i in range(vals * 2): + if i % 2 == 0 and blobs: + big_stmt.append(f"SELECT hex(t1) FROM temp LIMIT 1 OFFSET {i};") + expected.append(zero_blob) + else: + big_stmt.append(f"SELECT t2 FROM temp LIMIT 1 OFFSET {i};") + expected.append(f"{i}") + + big_stmt.append("SELECT count(*) FROM temp;") + expected.append(str(vals * 2)) + + big_stmt = "".join(big_stmt) + expected = "\n".join(expected) + + limbo.run_test_fn(big_stmt, lambda res: validate_with_expected(res, expected), name) + + +# TODO no delete tests for now because of limbo outputs some debug information on delete +def memory_tests() -> list[dict]: + tests = [] + + for vals in range(0, 1000, 100): + tests.append( + { + "name": f"small-insert-integer-vals-{vals}", + "vals": vals, + "blobs": False, + } + ) + + tests.append( + { + "name": f"small-insert-blob-interleaved-blob-size-{1024}", + "vals": 10, + "blob_size": 1024, + } + ) + tests.append( + { + "name": f"big-insert-blob-interleaved-blob-size-{1024}", + "vals": 100, + "blob_size": 1024, + } + ) + + for blob_size in range(0, (1024 * 1024) + 1, 1024 * 4**4): + if blob_size == 0: + continue + tests.append( + { + "name": f"small-insert-blob-interleaved-blob-size-{blob_size}", + "vals": 10, + "blob_size": blob_size, + } + ) + tests.append( + { + "name": f"big-insert-blob-interleaved-blob-size-{blob_size}", + "vals": 100, + "blob_size": blob_size, + } + ) + return tests + + +def main(): + tests = memory_tests() + # TODO see how to parallelize this loop with different subprocesses + for test in tests: + limbo = TestLimboShell() + try: + stub_memory_test(limbo, **test) + except Exception as e: + print(f"Test FAILED: {e}") + limbo.quit() + exit(1) + limbo.quit() # remove this line when `with` statement is supported for TestLimboShell + print("All tests passed successfully.") + + +if __name__ == "__main__": + main() From 4a08b98babde5bc0fda2d494f1af6beba9732deb Mon Sep 17 00:00:00 2001 From: Ihor Andrianov Date: Wed, 2 Apr 2025 16:13:15 +0300 Subject: [PATCH 02/56] implemented strict table --- core/schema.rs | 18 +++-- core/translate/insert.rs | 11 ++++ core/types.rs | 23 +++++++ core/vdbe/execute.rs | 139 ++++++++++++++++++++++++++++++++++++++- core/vdbe/explain.rs | 14 ++++ core/vdbe/insn.rs | 16 ++++- 6 files changed, 211 insertions(+), 10 deletions(-) diff --git a/core/schema.rs b/core/schema.rs index dda37d15b..21bed120d 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -161,6 +161,7 @@ pub struct BTreeTable { pub primary_key_column_names: Vec, pub columns: Vec, pub has_rowid: bool, + pub is_strict: bool, } impl BTreeTable { @@ -262,12 +263,14 @@ fn create_table( let mut has_rowid = true; let mut primary_key_column_names = vec![]; let mut cols = vec![]; + let is_strict: bool; match body { CreateTableBody::ColumnsAndConstraints { columns, constraints, options, } => { + is_strict = options.contains(TableOptions::STRICT); if let Some(constraints) = constraints { for c in constraints { if let limbo_sqlite3_parser::ast::TableConstraint::PrimaryKey { @@ -390,6 +393,7 @@ fn create_table( has_rowid, primary_key_column_names, columns: cols, + is_strict, }) } @@ -456,7 +460,7 @@ pub fn affinity(datatype: &str) -> Affinity { } // Rule 3: BLOB or empty -> BLOB affinity (historically called NONE) - if datatype.contains("BLOB") || datatype.is_empty() { + if datatype.contains("BLOB") || datatype.is_empty() || datatype.contains("ANY") { return Affinity::Blob; } @@ -508,11 +512,11 @@ pub enum Affinity { Numeric, } -pub const SQLITE_AFF_TEXT: char = 'a'; -pub const SQLITE_AFF_NONE: char = 'b'; // Historically called NONE, but it's the same as BLOB -pub const SQLITE_AFF_NUMERIC: char = 'c'; -pub const SQLITE_AFF_INTEGER: char = 'd'; -pub const SQLITE_AFF_REAL: char = 'e'; +pub const SQLITE_AFF_NONE: char = 'A'; // Historically called NONE, but it's the same as BLOB +pub const SQLITE_AFF_TEXT: char = 'B'; +pub const SQLITE_AFF_NUMERIC: char = 'C'; +pub const SQLITE_AFF_INTEGER: char = 'D'; +pub const SQLITE_AFF_REAL: char = 'E'; impl Affinity { /// This is meant to be used in opcodes like Eq, which state: @@ -552,6 +556,7 @@ pub fn sqlite_schema_table() -> BTreeTable { root_page: 1, name: "sqlite_schema".to_string(), has_rowid: true, + is_strict: false, primary_key_column_names: vec![], columns: vec![ Column { @@ -1046,6 +1051,7 @@ mod tests { root_page: 0, name: "t1".to_string(), has_rowid: true, + is_strict: false, primary_key_column_names: vec!["nonexistent".to_string()], columns: vec![Column { name: Some("a".to_string()), diff --git a/core/translate/insert.rs b/core/translate/insert.rs index 7713b9355..9ee253da5 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -251,6 +251,17 @@ pub fn translate_insert( program.resolve_label(make_record_label, program.offset()); } + match table.btree() { + Some(t) if t.is_strict => { + program.emit_insn(Insn::TypeCheck { + start_reg: column_registers_start, + count: num_cols, + check_generated: true, + table_reference: Rc::clone(&t), + }); + } + _ => (), + } // Create and insert the record program.emit_insn(Insn::MakeRecord { start_reg: column_registers_start, diff --git a/core/types.rs b/core/types.rs index 1556ee100..631bc7492 100644 --- a/core/types.rs +++ b/core/types.rs @@ -22,6 +22,20 @@ pub enum OwnedValueType { Error, } +impl Display for OwnedValueType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let value = match self { + Self::Null => "NULL", + Self::Integer => "INT", + Self::Float => "REAL", + Self::Blob => "BLOB", + Self::Text => "TEXT", + Self::Error => "ERROR", + }; + write!(f, "{}", value) + } +} + #[derive(Debug, Clone, PartialEq)] pub enum TextSubtype { Text, @@ -69,6 +83,15 @@ impl Text { } } +impl From for Text { + fn from(value: String) -> Self { + Text { + value: value.into_bytes(), + subtype: TextSubtype::Text, + } + } +} + impl TextRef { pub fn as_str(&self) -> &str { unsafe { std::str::from_utf8_unchecked(self.value.to_slice()) } diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 09c283ecd..cf1b9e03e 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -1,5 +1,5 @@ #![allow(unused_variables)] -use crate::error::{LimboError, SQLITE_CONSTRAINT_PRIMARYKEY}; +use crate::error::{LimboError, SQLITE_CONSTRAINT, SQLITE_CONSTRAINT_PRIMARYKEY}; use crate::ext::ExtValue; use crate::function::{AggFunc, ExtFunc, MathFunc, MathFuncArity, ScalarFunc, VectorFunc}; use crate::functions::datetime::{ @@ -10,11 +10,13 @@ use std::{borrow::BorrowMut, rc::Rc}; use crate::pseudo::PseudoCursor; use crate::result::LimboResult; + use crate::schema::{affinity, Affinity}; use crate::storage::btree::{BTreeCursor, BTreeKey}; + use crate::storage::wal::CheckpointResult; use crate::types::{ - AggContext, Cursor, CursorResult, ExternalAggState, OwnedValue, SeekKey, SeekOp, + AggContext, Cursor, CursorResult, ExternalAggState, OwnedValue, OwnedValueType, SeekKey, SeekOp, }; use crate::util::{ cast_real_to_integer, cast_text_to_integer, cast_text_to_numeric, cast_text_to_real, @@ -1341,6 +1343,68 @@ pub fn op_column( Ok(InsnFunctionStepResult::Step) } +pub fn op_type_check( + program: &Program, + state: &mut ProgramState, + insn: &Insn, + pager: &Rc, + mv_store: Option<&Rc>, +) -> Result { + let Insn::TypeCheck { + start_reg, + count, + check_generated, + table_reference, + } = insn + else { + unreachable!("unexpected Insn {:?}", insn) + }; + assert_eq!(table_reference.is_strict, true); + state.registers[*start_reg..*start_reg + *count] + .iter_mut() + .zip(table_reference.columns.iter()) + .try_for_each(|(reg, col)| { + // INT PRIMARY KEY is not row_id_alias so we throw error if this col is NULL + if !col.is_rowid_alias + && col.primary_key + && matches!(reg.get_owned_value(), OwnedValue::Null) + { + bail_constraint_error!( + "NOT NULL constraint failed: {}.{} ({})", + &table_reference.name, + col.name.as_ref().map(|s| s.as_str()).unwrap_or(""), + SQLITE_CONSTRAINT + ) + } else if col.is_rowid_alias { + // If it is INTEGER PRIMARY KEY we let sqlite assign row_id + return Ok(()); + }; + let col_affinity = col.affinity(); + let ty_str = col.ty_str.as_str(); + let applied = apply_affinity_char(reg, col_affinity); + let value_type = reg.get_owned_value().value_type(); + match (ty_str, value_type) { + ("INTEGER" | "INT", OwnedValueType::Integer) => {} + ("REAL", OwnedValueType::Float) => {} + ("BLOB", OwnedValueType::Blob) => {} + ("TEXT", OwnedValueType::Text) => {} + ("ANY", _) => {} + (t, v) => bail_constraint_error!( + "cannot store {} value in {} column {}.{} ({})", + v, + t, + &table_reference.name, + col.name.as_ref().map(|s| s.as_str()).unwrap_or(""), + SQLITE_CONSTRAINT + ), + }; + Ok(()) + })?; + + state.pc += 1; + Ok(InsnFunctionStepResult::Step) +} + pub fn op_make_record( program: &Program, state: &mut ProgramState, @@ -5012,6 +5076,77 @@ fn exec_if(reg: &OwnedValue, jump_if_null: bool, not: bool) -> bool { } } +fn apply_affinity_char(target: &mut Register, affinity: Affinity) -> bool { + if let Register::OwnedValue(value) = target { + if matches!(value, OwnedValue::Blob(_)) { + return true; + } + match affinity { + Affinity::Blob => return true, + Affinity::Text => { + if matches!(value, OwnedValue::Text(_) | OwnedValue::Null) { + return true; + } + let text = value.to_string(); + *value = OwnedValue::Text(text.into()); + return true; + } + Affinity::Integer | Affinity::Numeric => { + if matches!(value, OwnedValue::Integer(_)) { + return true; + } + if !matches!(value, OwnedValue::Text(_) | OwnedValue::Float(_)) { + return true; + } + + if let OwnedValue::Float(fl) = *value { + if let Ok(int) = cast_real_to_integer(fl).map(OwnedValue::Integer) { + *value = int; + return true; + } + return false; + } + + let text = value.to_text().unwrap(); + let Ok(num) = checked_cast_text_to_numeric(&text) else { + return false; + }; + + *value = match &num { + OwnedValue::Float(fl) => { + cast_real_to_integer(*fl) + .map(OwnedValue::Integer) + .unwrap_or(num); + return true; + } + OwnedValue::Integer(_) if text.starts_with("0x") => { + return false; + } + _ => num, + }; + } + + Affinity::Real => { + if let OwnedValue::Integer(i) = value { + *value = OwnedValue::Float(*i as f64); + return true; + } else if let OwnedValue::Text(t) = value { + if t.as_str().starts_with("0x") { + return false; + } + if let Ok(num) = checked_cast_text_to_numeric(t.as_str()) { + *value = num; + return true; + } else { + return false; + } + } + } + }; + } + return true; +} + fn exec_cast(value: &OwnedValue, datatype: &str) -> OwnedValue { if matches!(value, OwnedValue::Null) { return OwnedValue::Null; diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index 66c68d9c0..7b9b02d2a 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -528,6 +528,20 @@ pub fn insn_to_str( ), ) } + Insn::TypeCheck { + start_reg, + count, + check_generated, + .. + } => ( + "TypeCheck", + *start_reg as i32, + *count as i32, + *check_generated as i32, + OwnedValue::build_text(""), + 0, + String::from(""), + ), Insn::MakeRecord { start_reg, count, diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 8d3a9afca..d7fc39609 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -1,8 +1,10 @@ use std::num::NonZero; +use std::rc::Rc; use super::{ cast_text_to_numeric, execute, AggFunc, BranchOffset, CursorID, FuncCtx, InsnFunction, PageIdx, }; +use crate::schema::BTreeTable; use crate::storage::wal::CheckpointMode; use crate::types::{OwnedValue, Record}; use limbo_macros::Description; @@ -344,7 +346,16 @@ pub enum Insn { dest: usize, }, - /// Make a record and write it to destination register. + TypeCheck { + start_reg: usize, // P1 + count: usize, // P2 + /// GENERATED ALWAYS AS ... STATIC columns are only checked if P3 is zero. + /// When P3 is non-zero, no type checking occurs for static generated columns. + check_generated: bool, // P3 + table_reference: Rc, // P4 + }, + + // Make a record and write it to destination register. MakeRecord { start_reg: usize, // P1 count: usize, // P2 @@ -427,7 +438,7 @@ pub enum Insn { register: usize, }, - /// Write a string value into a register. + // Write a string value into a register. String8 { value: String, dest: usize, @@ -1271,6 +1282,7 @@ impl Insn { Insn::LastAwait { .. } => execute::op_last_await, Insn::Column { .. } => execute::op_column, + Insn::TypeCheck { .. } => execute::op_type_check, Insn::MakeRecord { .. } => execute::op_make_record, Insn::ResultRow { .. } => execute::op_result_row, From 3a97fd075fb84432e4b8926ab18a47cd1586e7f3 Mon Sep 17 00:00:00 2001 From: Ihor Andrianov Date: Mon, 7 Apr 2025 19:53:49 +0300 Subject: [PATCH 03/56] add tests --- testing/insert.test | 147 +++++++++++++++++++++++++++++++++++++++++++- testing/tester.tcl | 111 +++++++++++++++++++++++++++++++++ 2 files changed, 257 insertions(+), 1 deletion(-) diff --git a/testing/insert.test b/testing/insert.test index 5a37fd692..ab520b052 100755 --- a/testing/insert.test +++ b/testing/insert.test @@ -15,4 +15,149 @@ do_execsql_test_on_specific_db {:memory:} must-be-int-insert { } {1 2 3 -4} \ No newline at end of file +4} + +do_execsql_test strict-basic-creation { + CREATE TABLE test1(id INTEGER, name TEXT, price REAL) STRICT; + INSERT INTO test1 VALUES(1, 'item1', 10.5); + SELECT * FROM test1; +} {1|item1|10.5} + +do_execsql_test_any_error strict-require-datatype { + CREATE TABLE test2(id INTEGER, name) STRICT; +} + +do_execsql_test_any_error strict-valid-datatypes { + CREATE TABLE test2(id INTEGER, value DATETIME) STRICT; +} + +do_execsql_test_any_error strict-type-enforcement { + CREATE TABLE test3(id INTEGER, name TEXT, price REAL) STRICT; + INSERT INTO test3 VALUES(1, 'item1', 'not-a-number'); +} + +do_execsql_test strict-type-coercion { + CREATE TABLE test4(id INTEGER, name TEXT, price REAL) STRICT; + INSERT INTO test4 VALUES(1, 'item1', '10.5'); + SELECT typeof(price), price FROM test4; +} {real|10.5} + +do_execsql_test strict-any-flexibility { + CREATE TABLE test5(id INTEGER, data ANY) STRICT; + INSERT INTO test5 VALUES(1, 100); + INSERT INTO test5 VALUES(2, 'text'); + INSERT INTO test5 VALUES(3, 3.14); + SELECT id, typeof(data) FROM test5 ORDER BY id; +} {1|integer +2|text +3|real} + +do_execsql_test strict-any-preservation { + CREATE TABLE test6(id INTEGER, code ANY) STRICT; + INSERT INTO test6 VALUES(1, '000123'); + SELECT typeof(code), code FROM test6; +} {text|000123} + +do_execsql_test_any_error strict-int-vs-integer-pk { + CREATE TABLE test8(id INT PRIMARY KEY, name TEXT) STRICT + INSERT INTO test8 VALUES(NULL, 'test'); +} + +do_execsql_test strict-integer-pk-behavior { + CREATE TABLE test9(id INTEGER PRIMARY KEY, name TEXT) STRICT; + INSERT INTO test9 VALUES(NULL, 'test'); + SELECT id, name FROM test9; +} {1|test} + + +do_execsql_test strict-mixed-inserts { + CREATE TABLE test11( + id INTEGER PRIMARY KEY, + name TEXT, + price REAL, + quantity INT, + tags ANY + ) STRICT; + + INSERT INTO test11 VALUES(1, 'item1', 10.5, 5, 'tag1'); + INSERT INTO test11 VALUES(2, 'item2', 20.75, 10, 42); + + SELECT id, name, price, quantity, typeof(tags) FROM test11 ORDER BY id; +} {1|item1|10.5|5|text +2|item2|20.75|10|integer} + +do_execsql_test strict-update-basic { + CREATE TABLE test1(id INTEGER, name TEXT, price REAL) STRICT; + INSERT INTO test1 VALUES(1, 'item1', 10.5); + UPDATE test1 SET price = 15.75 WHERE id = 1; + SELECT * FROM test1; +} {1|item1|15.75} + +do_execsql_test_any_error strict-update-type-enforcement { + CREATE TABLE test2(id INTEGER, name TEXT, price REAL) STRICT; + INSERT INTO test2 VALUES(1, 'item1', 10.5); + UPDATE test2 SET price = 'not-a-number' WHERE id = 1; +} + +do_execsql_test strict-update-type-coercion { + CREATE TABLE test3(id INTEGER, name TEXT, price REAL) STRICT; + INSERT INTO test3 VALUES(1, 'item1', 10.5); + UPDATE test3 SET price = '15.75' WHERE id = 1; + SELECT id, typeof(price), price FROM test3; +} {1|real|15.75} + +do_execsql_test strict-update-any-flexibility { + CREATE TABLE test4(id INTEGER, data ANY) STRICT; + INSERT INTO test4 VALUES(1, 100); + UPDATE test4 SET data = 'text' WHERE id = 1; + INSERT INTO test4 VALUES(2, 'original'); + UPDATE test4 SET data = 3.14 WHERE id = 2; + SELECT id, typeof(data), data FROM test4 ORDER BY id; +} {1|text|text +2|real|3.14} + +do_execsql_test strict-update-any-preservation { + CREATE TABLE test5(id INTEGER, code ANY) STRICT; + INSERT INTO test5 VALUES(1, 'text'); + UPDATE test5 SET code = '000123' WHERE id = 1; + SELECT typeof(code), code FROM test5; +} {text|000123} + +do_execsql_test_any_error strict-update-not-null-constraint { + CREATE TABLE test7(id INTEGER, name TEXT NOT NULL) STRICT; + INSERT INTO test7 VALUES(1, 'name'); + UPDATE test7 SET name = NULL WHERE id = 1; +} + +# Uncomment following test case when unique constraint is added +#do_execsql_test_any_error strict-update-pk-constraint { +# CREATE TABLE test8(id INTEGER PRIMARY KEY, name TEXT) STRICT; +# INSERT INTO test8 VALUES(1, 'name1'); +# INSERT INTO test8 VALUES(2, 'name2'); +# UPDATE test8 SET id = 2 WHERE id = 1; +#} + +do_execsql_test strict-update-multiple-columns { + CREATE TABLE test9(id INTEGER, name TEXT, price REAL, quantity INT) STRICT; + INSERT INTO test9 VALUES(1, 'item1', 10.5, 5); + UPDATE test9 SET name = 'updated', price = 20.75, quantity = 10 WHERE id = 1; + SELECT * FROM test9; +} {1|updated|20.75|10} + +do_execsql_test strict-update-where-clause { + CREATE TABLE test10(id INTEGER, category TEXT, price REAL) STRICT; + INSERT INTO test10 VALUES(1, 'A', 10); + INSERT INTO test10 VALUES(2, 'A', 20); + INSERT INTO test10 VALUES(3, 'B', 30); + UPDATE test10 SET price = price * 2 WHERE category = 'A'; + SELECT id, price FROM test10 ORDER BY id; +} {1|20.0 +2|40.0 +3|30.0} + +do_execsql_test strict-update-expression { + CREATE TABLE test11(id INTEGER, name TEXT, price REAL, discount REAL) STRICT; + INSERT INTO test11 VALUES(1, 'item1', 100, 0.1); + UPDATE test11 SET price = price - (price * discount); + SELECT id, price FROM test11; +} {1|90.0} diff --git a/testing/tester.tcl b/testing/tester.tcl index 735c91aae..4bb3ab2ef 100644 --- a/testing/tester.tcl +++ b/testing/tester.tcl @@ -97,3 +97,114 @@ proc do_execsql_test_tolerance {test_name sql_statements expected_outputs tolera } } } +# This procedure passes the test if the output contains error messages +proc run_test_expecting_any_error {sqlite_exec db_name sql} { + # Execute the SQL command and capture output + set command [list $sqlite_exec $db_name $sql] + + # Use catch to handle both successful and error cases + catch {exec {*}$command} result options + + # Check if the output contains error indicators (×, error, syntax error, etc.) + if {[regexp {(error|ERROR|Error|×|syntax error|failed)} $result]} { + # Error found in output - test passed + puts "Test PASSED: Got expected error" + return 1 + } + + # No error indicators in output + puts "Test FAILED: '$sql'" + puts "Expected an error but command output didn't indicate any error: '$result'" + exit 1 +} + +# This procedure passes if error matches a specific pattern +proc run_test_expecting_error {sqlite_exec db_name sql expected_error_pattern} { + # Execute the SQL command and capture output + set command [list $sqlite_exec $db_name $sql] + + # Capture output whether command succeeds or fails + catch {exec {*}$command} result options + + # Check if the output contains error indicators first + if {![regexp {(error|ERROR|Error|×|syntax error|failed)} $result]} { + puts "Test FAILED: '$sql'" + puts "Expected an error matching '$expected_error_pattern'" + puts "But command output didn't indicate any error: '$result'" + exit 1 + } + + # Now check if the error message matches the expected pattern + if {![regexp $expected_error_pattern $result]} { + puts "Test FAILED: '$sql'" + puts "Error occurred but didn't match expected pattern." + puts "Output was: '$result'" + puts "Expected pattern: '$expected_error_pattern'" + exit 1 + } + + # If we get here, the test passed - got expected error matching pattern + return 1 +} + +# This version accepts exact error text, ignoring formatting +proc run_test_expecting_error_content {sqlite_exec db_name sql expected_error_text} { + # Execute the SQL command and capture output + set command [list $sqlite_exec $db_name $sql] + + # Capture output whether command succeeds or fails + catch {exec {*}$command} result options + + # Check if the output contains error indicators first + if {![regexp {(error|ERROR|Error|×|syntax error|failed)} $result]} { + puts "Test FAILED: '$sql'" + puts "Expected an error with text: '$expected_error_text'" + puts "But command output didn't indicate any error: '$result'" + exit 1 + } + + # Normalize both the actual and expected error messages + # Remove all whitespace, newlines, and special characters for comparison + set normalized_actual [regsub -all {[[:space:]]|[[:punct:]]} $result ""] + set normalized_expected [regsub -all {[[:space:]]|[[:punct:]]} $expected_error_text ""] + + # Convert to lowercase for case-insensitive comparison + set normalized_actual [string tolower $normalized_actual] + set normalized_expected [string tolower $normalized_expected] + + # Check if the normalized strings contain the same text + if {[string first $normalized_expected $normalized_actual] == -1} { + puts "Test FAILED: '$sql'" + puts "Error occurred but content didn't match." + puts "Output was: '$result'" + puts "Expected text: '$expected_error_text'" + exit 1 + } + + # If we get here, the test passed - got error with expected content + return 1 +} + +proc do_execsql_test_error {test_name sql_statements expected_error_pattern} { + foreach db $::test_dbs { + puts [format "(%s) %s Running error test: %s" $db [string repeat " " [expr {40 - [string length $db]}]] $test_name] + set combined_sql [string trim $sql_statements] + run_test_expecting_error $::sqlite_exec $db $combined_sql $expected_error_pattern + } +} + +proc do_execsql_test_error_content {test_name sql_statements expected_error_text} { + foreach db $::test_dbs { + puts [format "(%s) %s Running error content test: %s" $db [string repeat " " [expr {40 - [string length $db]}]] $test_name] + set combined_sql [string trim $sql_statements] + run_test_expecting_error_content $::sqlite_exec $db $combined_sql $expected_error_text + } +} + +proc do_execsql_test_any_error {test_name sql_statements} { + foreach db $::test_dbs { + puts [format "(%s) %s Running any-error test: %s" $db [string repeat " " [expr {40 - [string length $db]}]] $test_name] + set combined_sql [string trim $sql_statements] + run_test_expecting_any_error $::sqlite_exec $db $combined_sql + } +} From 7c154651187fbe99b4541502d07e2efc480e2b77 Mon Sep 17 00:00:00 2001 From: Ihor Andrianov Date: Mon, 7 Apr 2025 19:54:25 +0300 Subject: [PATCH 04/56] add TypeCheck insn to update --- core/translate/emitter.rs | 12 ++++++++++++ core/vdbe/execute.rs | 6 +++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 23b937019..ec369127a 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -1,6 +1,8 @@ // This module contains code for emitting bytecode instructions for SQL query execution. // It handles translating high-level SQL operations into low-level bytecode that can be executed by the virtual machine. +use std::rc::Rc; + use limbo_sqlite3_parser::ast::{self}; use crate::function::Func; @@ -614,6 +616,16 @@ fn emit_update_insns( } } } + if let Some(btree_table) = table_ref.btree() { + if btree_table.is_strict { + program.emit_insn(Insn::TypeCheck { + start_reg: first_col_reg, + count: table_ref.columns().len(), + check_generated: true, + table_reference: Rc::clone(&btree_table), + }); + } + } let record_reg = program.alloc_register(); program.emit_insn(Insn::MakeRecord { start_reg: first_col_reg, diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index cf1b9e03e..a388c8b84 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -1375,10 +1375,10 @@ pub fn op_type_check( col.name.as_ref().map(|s| s.as_str()).unwrap_or(""), SQLITE_CONSTRAINT ) - } else if col.is_rowid_alias { - // If it is INTEGER PRIMARY KEY we let sqlite assign row_id + } else if col.is_rowid_alias && matches!(reg.get_owned_value(), OwnedValue::Null) { + // Handle INTEGER PRIMARY KEY for null as usual (Rowid will be auto-assigned) return Ok(()); - }; + } let col_affinity = col.affinity(); let ty_str = col.ty_str.as_str(); let applied = apply_affinity_char(reg, col_affinity); From ad91a2ae513785a9b31a1d0d6da35623e2af35ae Mon Sep 17 00:00:00 2001 From: Ihor Andrianov Date: Mon, 7 Apr 2025 20:29:45 +0300 Subject: [PATCH 05/56] fix tests --- testing/insert.test | 38 +++++++++++++++++++------------------- testing/tester.tcl | 10 ++++++++++ 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/testing/insert.test b/testing/insert.test index ab520b052..6c14ee249 100755 --- a/testing/insert.test +++ b/testing/insert.test @@ -17,32 +17,32 @@ do_execsql_test_on_specific_db {:memory:} must-be-int-insert { 3 4} -do_execsql_test strict-basic-creation { +do_execsql_test_on_specific_db {:memory:} strict-basic-creation { CREATE TABLE test1(id INTEGER, name TEXT, price REAL) STRICT; INSERT INTO test1 VALUES(1, 'item1', 10.5); SELECT * FROM test1; } {1|item1|10.5} -do_execsql_test_any_error strict-require-datatype { +do_execsql_test_in_memory_any_error strict-require-datatype { CREATE TABLE test2(id INTEGER, name) STRICT; } -do_execsql_test_any_error strict-valid-datatypes { +do_execsql_test_in_memory_any_error strict-valid-datatypes { CREATE TABLE test2(id INTEGER, value DATETIME) STRICT; } -do_execsql_test_any_error strict-type-enforcement { +do_execsql_test_in_memory_any_error strict-type-enforcement { CREATE TABLE test3(id INTEGER, name TEXT, price REAL) STRICT; INSERT INTO test3 VALUES(1, 'item1', 'not-a-number'); } -do_execsql_test strict-type-coercion { +do_execsql_test_on_specific_db {:memory:} strict-type-coercion { CREATE TABLE test4(id INTEGER, name TEXT, price REAL) STRICT; INSERT INTO test4 VALUES(1, 'item1', '10.5'); SELECT typeof(price), price FROM test4; } {real|10.5} -do_execsql_test strict-any-flexibility { +do_execsql_test_on_specific_db {:memory:} strict-any-flexibility { CREATE TABLE test5(id INTEGER, data ANY) STRICT; INSERT INTO test5 VALUES(1, 100); INSERT INTO test5 VALUES(2, 'text'); @@ -52,25 +52,25 @@ do_execsql_test strict-any-flexibility { 2|text 3|real} -do_execsql_test strict-any-preservation { +do_execsql_test_on_specific_db {:memory:} strict-any-preservation { CREATE TABLE test6(id INTEGER, code ANY) STRICT; INSERT INTO test6 VALUES(1, '000123'); SELECT typeof(code), code FROM test6; } {text|000123} -do_execsql_test_any_error strict-int-vs-integer-pk { +do_execsql_test_in_memory_any_error strict-int-vs-integer-pk { CREATE TABLE test8(id INT PRIMARY KEY, name TEXT) STRICT INSERT INTO test8 VALUES(NULL, 'test'); } -do_execsql_test strict-integer-pk-behavior { +do_execsql_test_on_specific_db {:memory:} strict-integer-pk-behavior { CREATE TABLE test9(id INTEGER PRIMARY KEY, name TEXT) STRICT; INSERT INTO test9 VALUES(NULL, 'test'); SELECT id, name FROM test9; } {1|test} -do_execsql_test strict-mixed-inserts { +do_execsql_test_on_specific_db {:memory:} strict-mixed-inserts { CREATE TABLE test11( id INTEGER PRIMARY KEY, name TEXT, @@ -86,27 +86,27 @@ do_execsql_test strict-mixed-inserts { } {1|item1|10.5|5|text 2|item2|20.75|10|integer} -do_execsql_test strict-update-basic { +do_execsql_test_on_specific_db {:memory:} strict-update-basic { CREATE TABLE test1(id INTEGER, name TEXT, price REAL) STRICT; INSERT INTO test1 VALUES(1, 'item1', 10.5); UPDATE test1 SET price = 15.75 WHERE id = 1; SELECT * FROM test1; } {1|item1|15.75} -do_execsql_test_any_error strict-update-type-enforcement { +do_execsql_test_in_memory_any_error strict-update-type-enforcement { CREATE TABLE test2(id INTEGER, name TEXT, price REAL) STRICT; INSERT INTO test2 VALUES(1, 'item1', 10.5); UPDATE test2 SET price = 'not-a-number' WHERE id = 1; } -do_execsql_test strict-update-type-coercion { +do_execsql_test_on_specific_db {:memory:} strict-update-type-coercion { CREATE TABLE test3(id INTEGER, name TEXT, price REAL) STRICT; INSERT INTO test3 VALUES(1, 'item1', 10.5); UPDATE test3 SET price = '15.75' WHERE id = 1; SELECT id, typeof(price), price FROM test3; } {1|real|15.75} -do_execsql_test strict-update-any-flexibility { +do_execsql_test_on_specific_db {:memory:} strict-update-any-flexibility { CREATE TABLE test4(id INTEGER, data ANY) STRICT; INSERT INTO test4 VALUES(1, 100); UPDATE test4 SET data = 'text' WHERE id = 1; @@ -116,14 +116,14 @@ do_execsql_test strict-update-any-flexibility { } {1|text|text 2|real|3.14} -do_execsql_test strict-update-any-preservation { +do_execsql_test_on_specific_db {:memory:} strict-update-any-preservation { CREATE TABLE test5(id INTEGER, code ANY) STRICT; INSERT INTO test5 VALUES(1, 'text'); UPDATE test5 SET code = '000123' WHERE id = 1; SELECT typeof(code), code FROM test5; } {text|000123} -do_execsql_test_any_error strict-update-not-null-constraint { +do_execsql_test_in_memory_any_error strict-update-not-null-constraint { CREATE TABLE test7(id INTEGER, name TEXT NOT NULL) STRICT; INSERT INTO test7 VALUES(1, 'name'); UPDATE test7 SET name = NULL WHERE id = 1; @@ -137,14 +137,14 @@ do_execsql_test_any_error strict-update-not-null-constraint { # UPDATE test8 SET id = 2 WHERE id = 1; #} -do_execsql_test strict-update-multiple-columns { +do_execsql_test_on_specific_db {:memory:} strict-update-multiple-columns { CREATE TABLE test9(id INTEGER, name TEXT, price REAL, quantity INT) STRICT; INSERT INTO test9 VALUES(1, 'item1', 10.5, 5); UPDATE test9 SET name = 'updated', price = 20.75, quantity = 10 WHERE id = 1; SELECT * FROM test9; } {1|updated|20.75|10} -do_execsql_test strict-update-where-clause { +do_execsql_test_on_specific_db {:memory:} strict-update-where-clause { CREATE TABLE test10(id INTEGER, category TEXT, price REAL) STRICT; INSERT INTO test10 VALUES(1, 'A', 10); INSERT INTO test10 VALUES(2, 'A', 20); @@ -155,7 +155,7 @@ do_execsql_test strict-update-where-clause { 2|40.0 3|30.0} -do_execsql_test strict-update-expression { +do_execsql_test_on_specific_db {:memory:} strict-update-expression { CREATE TABLE test11(id INTEGER, name TEXT, price REAL, discount REAL) STRICT; INSERT INTO test11 VALUES(1, 'item1', 100, 0.1); UPDATE test11 SET price = price - (price * discount); diff --git a/testing/tester.tcl b/testing/tester.tcl index 4bb3ab2ef..41117ed37 100644 --- a/testing/tester.tcl +++ b/testing/tester.tcl @@ -208,3 +208,13 @@ proc do_execsql_test_any_error {test_name sql_statements} { run_test_expecting_any_error $::sqlite_exec $db $combined_sql } } + +proc do_execsql_test_in_memory_any_error {test_name sql_statements} { + puts [format "(in-memory) %s Running any-error test: %s" [string repeat " " 31] $test_name] + + # Use ":memory:" special filename for in-memory database + set db_name ":memory:" + + set combined_sql [string trim $sql_statements] + run_test_expecting_any_error $::sqlite_exec $db_name $combined_sql +} From 1f29307fe8ddb438cb453d73b1da0e4144140930 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sat, 5 Apr 2025 16:12:59 -0400 Subject: [PATCH 06/56] Support proper index handling when doing insertions --- core/schema.rs | 13 ++++ core/translate/index.rs | 5 ++ core/translate/insert.rs | 149 ++++++++++++++++++++++++++++++++++++++- core/vdbe/mod.rs | 5 +- 4 files changed, 166 insertions(+), 6 deletions(-) diff --git a/core/schema.rs b/core/schema.rs index dda37d15b..b68b1075e 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -85,6 +85,19 @@ impl Schema { let name = normalize_ident(table_name); self.indexes.remove(&name); } + + pub fn get_index_for_column(&self, table_name: &str, column_name: &str) -> Option> { + if let Some(indexes) = self.indexes.get(table_name) { + for index in indexes { + for column in &index.columns { + if column.name.eq_ignore_ascii_case(column_name) { + return Some(index.clone()); + } + } + } + } + None + } } #[derive(Clone, Debug)] diff --git a/core/translate/index.rs b/core/translate/index.rs index 32d7cd2e9..94aa03e64 100644 --- a/core/translate/index.rs +++ b/core/translate/index.rs @@ -172,6 +172,11 @@ pub fn translate_create_index( cursor_id: table_cursor_id, dest: rowid_reg, }); + // if the rowid is null, skip the insert + program.emit_insn(Insn::IsNull { + reg: rowid_reg, + target_pc: loop_end_label, + }); let record_reg = program.alloc_register(); program.emit_insn(Insn::MakeRecord { start_reg, diff --git a/core/translate/insert.rs b/core/translate/insert.rs index 7713b9355..2b16f464c 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -6,10 +6,10 @@ use limbo_sqlite3_parser::ast::{ }; use crate::error::SQLITE_CONSTRAINT_PRIMARYKEY; -use crate::schema::Table; +use crate::schema::{IndexColumn, Table}; use crate::util::normalize_ident; use crate::vdbe::builder::{ProgramBuilderOpts, QueryMode}; -use crate::vdbe::insn::RegisterOrLiteral; +use crate::vdbe::insn::{IdxInsertFlags, RegisterOrLiteral}; use crate::vdbe::BranchOffset; use crate::{ schema::{Column, Schema}, @@ -83,6 +83,22 @@ pub fn translate_insert( Some(table_name.0.clone()), CursorType::BTreeTable(btree_table.clone()), ); + // allocate cursor id's for each btree index cursor we'll need to populate the indexes + // (idx name, root_page, idx cursor id) + let idx_cursors = schema + .get_indices(&table_name.0) + .iter() + .map(|idx| { + ( + &idx.name, + idx.root_page, + program.alloc_cursor_id( + Some(table_name.0.clone()), + CursorType::BTreeIndex(idx.clone()), + ), + ) + }) + .collect::>(); let root_page = btree_table.root_page; let values = match body { InsertBody::Select(select, _) => match &select.body.select.deref() { @@ -93,6 +109,7 @@ pub fn translate_insert( }; let column_mappings = resolve_columns_for_insert(&table, columns, values)?; + let index_col_mappings = resolve_indicies_for_insert(schema, table.as_ref(), &column_mappings)?; // Check if rowid was provided (through INTEGER PRIMARY KEY as a rowid alias) let rowid_alias_index = btree_table.columns.iter().position(|c| c.is_rowid_alias); let has_user_provided_rowid = { @@ -183,7 +200,14 @@ pub fn translate_insert( &resolver, )?; } - + // Open all the index btrees for writing + for idx_cursor in idx_cursors.iter() { + program.emit_insn(Insn::OpenWriteAsync { + cursor_id: idx_cursor.2, + root_page: idx_cursor.1.into(), + }); + program.emit_insn(Insn::OpenWriteAwait {}); + } // Common record insertion logic for both single and multiple rows let check_rowid_is_integer_label = rowid_alias_reg.and(Some(program.allocate_label())); if let Some(reg) = rowid_alias_reg { @@ -265,7 +289,54 @@ pub fn translate_insert( flag: 0, }); program.emit_insn(Insn::InsertAwait { cursor_id }); + for index_col_mapping in index_col_mappings.iter() { + // find which cursor we opened earlier for this index + let idx_cursor_id = idx_cursors + .iter() + .find(|(name, _, _)| *name == &index_col_mapping.idx_name) + .map(|(_, _, c_id)| *c_id) + .expect("no cursor found for index"); + let num_cols = index_col_mapping.columns.len(); + // allocate scratch registers for the index columns plus rowid + let idx_start_reg = program.alloc_registers(num_cols + 1); + + // copy each index column from the table's column registers into these scratch regs + for (i, col) in index_col_mapping.columns.iter().enumerate() { + // copy from the table's column register over to the index's scratch register + program.emit_insn(Insn::Copy { + src_reg: column_registers_start + col.0, + dst_reg: idx_start_reg + i, + amount: 0, + }); + } + // last register is the rowid + program.emit_insn(Insn::Copy { + src_reg: rowid_reg, + dst_reg: idx_start_reg + num_cols, + amount: 0, + }); + + let record_reg = program.alloc_register(); + program.emit_insn(Insn::MakeRecord { + start_reg: idx_start_reg, + count: num_cols + 1, + dest_reg: record_reg, + }); + + // now do the actual index insertion using the unpacked registers + program.emit_insn(Insn::IdxInsertAsync { + cursor_id: idx_cursor_id, + record_reg, + unpacked_start: Some(idx_start_reg), // TODO: enable optimization + unpacked_count: Some((num_cols + 1) as u16), + // TODO: figure out how to determine whether or not we need to seek prior to insert. + flags: IdxInsertFlags::new(), + }); + program.emit_insn(Insn::IdxInsertAwait { + cursor_id: idx_cursor_id, + }); + } if inserting_multiple_rows { // For multiple rows, loop back program.emit_insn(Insn::Goto { @@ -393,6 +464,78 @@ fn resolve_columns_for_insert<'a>( Ok(mappings) } +/// Represents how a column in an index should be populated during an INSERT. +/// Similar to ColumnMapping above but includes the index name, as well as multiple +/// possible value indices for each. +#[derive(Default)] +struct IndexColMapping { + idx_name: String, + columns: Vec<(usize, IndexColumn)>, + value_indicies: Vec>, +} + +impl IndexColMapping { + fn new(name: String) -> Self { + IndexColMapping { + idx_name: name, + ..Default::default() + } + } +} + +/// Example: +/// Table 'test': (a, b, c); +/// Index 'idx': test(a, b); +///________________________________ +/// Insert (a, c): (2, 3) +/// Record: (2, NULL, 3) +/// IndexColMapping: (a, b) = (2, NULL) +fn resolve_indicies_for_insert<'a>( + schema: &Schema, + table: &Table, + columns: &[ColumnMapping], +) -> Result> { + let mut index_col_mappings = Vec::new(); + for col in columns { + // check if any of the inserted columns are part of an index + if let Some(index) = + schema.get_index_for_column(table.get_name(), col.column.name.as_ref().unwrap()) + { + // check if the index is already in the list + if index_col_mappings + .iter() + .any(|i: &IndexColMapping| i.idx_name.eq_ignore_ascii_case(&index.name)) + { + continue; + } + let mut idx_col_map = IndexColMapping::new(index.name.clone()); //todo: rm clone -_- + for column in &index.columns { + let column_name = normalize_ident(column.name.as_str()); + // find the other columns in the index that are not part of the insert + if let Some((i, index_column)) = columns.iter().enumerate().find(|(_, c)| { + c.column + .name + .as_ref() + .is_some_and(|c| c.eq_ignore_ascii_case(&column_name)) + }) { + // the column is also part of the insert + idx_col_map.columns.push((i, column.clone())); + // store the value index (which may be null if not part of the insert) + idx_col_map.value_indicies.push(index_column.value_index); + } else { + // column not found, meaning the ColumnMapping failed, thus we bail + return Err(crate::LimboError::ParseError(format!( + "Column {} not found in index {}", + column_name, index.name + ))); + } + } + index_col_mappings.push(idx_col_map); + } + } + Ok(index_col_mappings) +} + /// Populates the column registers with values for a single row fn populate_column_registers( program: &mut ProgramBuilder, diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 8794b208a..b12cfcc56 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -627,11 +627,10 @@ impl Row { pub fn get_value<'a>(&'a self, idx: usize) -> &'a OwnedValue { let value = unsafe { self.values.add(idx).as_ref().unwrap() }; - let value = match value { + match value { Register::OwnedValue(owned_value) => owned_value, _ => unreachable!("a row should be formed of values only"), - }; - value + } } pub fn get_values(&self) -> impl Iterator { From 878c987026429bf2c27fbffc588f149850a09ad4 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sat, 5 Apr 2025 20:38:23 -0400 Subject: [PATCH 07/56] Remove is_null check from create index translation --- core/translate/index.rs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/core/translate/index.rs b/core/translate/index.rs index 94aa03e64..32d7cd2e9 100644 --- a/core/translate/index.rs +++ b/core/translate/index.rs @@ -172,11 +172,6 @@ pub fn translate_create_index( cursor_id: table_cursor_id, dest: rowid_reg, }); - // if the rowid is null, skip the insert - program.emit_insn(Insn::IsNull { - reg: rowid_reg, - target_pc: loop_end_label, - }); let record_reg = program.alloc_register(); program.emit_insn(Insn::MakeRecord { start_reg, From 224f913ae7cf3641d782215223e86416ae754097 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Mon, 7 Apr 2025 10:13:49 -0400 Subject: [PATCH 08/56] Handle composite key indexes on insert --- core/schema.rs | 13 -------- core/translate/insert.rs | 64 ++++++++++++++++++---------------------- 2 files changed, 28 insertions(+), 49 deletions(-) diff --git a/core/schema.rs b/core/schema.rs index b68b1075e..dda37d15b 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -85,19 +85,6 @@ impl Schema { let name = normalize_ident(table_name); self.indexes.remove(&name); } - - pub fn get_index_for_column(&self, table_name: &str, column_name: &str) -> Option> { - if let Some(indexes) = self.indexes.get(table_name) { - for index in indexes { - for column in &index.columns { - if column.name.eq_ignore_ascii_case(column_name) { - return Some(index.clone()); - } - } - } - } - None - } } #[derive(Clone, Debug)] diff --git a/core/translate/insert.rs b/core/translate/insert.rs index 2b16f464c..16cc040c5 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -304,6 +304,7 @@ pub fn translate_insert( // copy each index column from the table's column registers into these scratch regs for (i, col) in index_col_mapping.columns.iter().enumerate() { // copy from the table's column register over to the index's scratch register + program.emit_insn(Insn::Copy { src_reg: column_registers_start + col.0, dst_reg: idx_start_reg + i, @@ -490,47 +491,38 @@ impl IndexColMapping { /// Insert (a, c): (2, 3) /// Record: (2, NULL, 3) /// IndexColMapping: (a, b) = (2, NULL) -fn resolve_indicies_for_insert<'a>( +fn resolve_indicies_for_insert( schema: &Schema, table: &Table, - columns: &[ColumnMapping], + columns: &[ColumnMapping<'_>], ) -> Result> { let mut index_col_mappings = Vec::new(); - for col in columns { - // check if any of the inserted columns are part of an index - if let Some(index) = - schema.get_index_for_column(table.get_name(), col.column.name.as_ref().unwrap()) - { - // check if the index is already in the list - if index_col_mappings - .iter() - .any(|i: &IndexColMapping| i.idx_name.eq_ignore_ascii_case(&index.name)) - { - continue; + // Iterate over all indices for this table + for index in schema.get_indices(table.get_name()) { + let mut idx_map = IndexColMapping::new(index.name.clone()); + // For each column in the index (in the order defined by the index), + // try to find the corresponding column in the insert’s column mapping. + for idx_col in &index.columns { + let target_name = normalize_ident(idx_col.name.as_str()); + if let Some((i, col_mapping)) = columns.iter().enumerate().find(|(_, mapping)| { + mapping + .column + .name + .as_ref() + .map_or(false, |name| name.eq_ignore_ascii_case(&target_name)) + }) { + idx_map.columns.push((i, idx_col.clone())); + idx_map.value_indicies.push(col_mapping.value_index); + } else { + return Err(crate::LimboError::ParseError(format!( + "Column {} not found in index {}", + target_name, index.name + ))); } - let mut idx_col_map = IndexColMapping::new(index.name.clone()); //todo: rm clone -_- - for column in &index.columns { - let column_name = normalize_ident(column.name.as_str()); - // find the other columns in the index that are not part of the insert - if let Some((i, index_column)) = columns.iter().enumerate().find(|(_, c)| { - c.column - .name - .as_ref() - .is_some_and(|c| c.eq_ignore_ascii_case(&column_name)) - }) { - // the column is also part of the insert - idx_col_map.columns.push((i, column.clone())); - // store the value index (which may be null if not part of the insert) - idx_col_map.value_indicies.push(index_column.value_index); - } else { - // column not found, meaning the ColumnMapping failed, thus we bail - return Err(crate::LimboError::ParseError(format!( - "Column {} not found in index {}", - column_name, index.name - ))); - } - } - index_col_mappings.push(idx_col_map); + } + // Add the mapping if at least one column was found. + if !idx_map.columns.is_empty() { + index_col_mappings.push(idx_map); } } Ok(index_col_mappings) From 029da5c81c7ff2bc063f041868098fad1c9ce63d Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Tue, 8 Apr 2025 11:03:30 +0200 Subject: [PATCH 09/56] Improve readability of balance_non_root with comments and validation extraction --- core/storage/btree.rs | 153 +++++++++++++++++++++++++++++------------- 1 file changed, 107 insertions(+), 46 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index e29a9f8f7..980316132 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1470,7 +1470,7 @@ impl BTreeCursor { "overflow parent not yet implemented" ); - // Get divider cells and max_cells + /* 1. Get divider cells and max_cells */ let mut max_cells = 0; let mut pages_to_balance_new = Vec::new(); for i in (0..balance_info.sibling_count).rev() { @@ -1528,6 +1528,7 @@ impl BTreeCursor { // Reverse divider cells to be in order balance_info.divider_cells.reverse(); + /* 2. Initialize CellArray with all the cells used for distribution, this includes divider cells if !leaf. */ let mut cell_array = CellArray { cells: Vec::with_capacity(max_cells), number_of_cells_per_page: Vec::new(), @@ -1614,16 +1615,9 @@ impl BTreeCursor { } #[cfg(debug_assertions)] - { - for cell in &cell_array.cells { - assert!(cell.len() >= 4); + validate_cells_after_insertion(&cell_array, leaf_data); - if leaf_data { - assert!(cell[0] != 0, "payload is {:?}", cell); - } - } - } - // calculate how many pages to allocate + /* 3. Initiliaze current size of every page including overflow cells and divider cells that might be included. */ let mut new_page_sizes: Vec = Vec::new(); let leaf_correction = if leaf { 4 } else { 0 }; // number of bytes beyond header, different from global usableSapce which includes @@ -1650,6 +1644,15 @@ impl BTreeCursor { } } + /* 4. Now let's try to move cells to the left trying to stack them without exceeding the maximum size of a page. + There are two cases: + * If current page has too many cells, it will move them to the next page. + * If it still has space, and it can take a cell from the right it will take them. + Here there is a caveat. Taking a cell from the right might take cells from page i+1, i+2, i+3, so not necessarily + adjacent. But we decrease the size of the adjacent page if we move from the right. This might cause a intermitent state + where page can have size <0. + This will also calculate how many pages are required to balance the cells and store in sibling_count_new. + */ // Try to pack as many cells to the left let mut sibling_count_new = balance_info.sibling_count; let mut i = 0; @@ -1658,6 +1661,7 @@ impl BTreeCursor { while new_page_sizes[i] > usable_space as usize { let needs_new_page = i + 1 >= sibling_count_new; if needs_new_page { + // FIXME: this doesn't remove pages if not needed sibling_count_new += 1; new_page_sizes.push(0); cell_array @@ -1732,6 +1736,10 @@ impl BTreeCursor { cell_array.cells.len() ); + /* 5. Balance pages starting from a left stacked cell state and move them to right trying to maintain a balanced state + where we only move from left to right if it will not unbalance both pages, meaning moving left to right won't make + right page bigger than left page. + */ // Comment borrowed from SQLite src/btree.c // The packing computed by the previous block is biased toward the siblings // on the left side (siblings with smaller keys). The left siblings are @@ -1840,6 +1848,7 @@ impl BTreeCursor { right_page_id ); + /* 6. Update parent pointers. Update right pointer and insert divider cells with newly created distribution of cells */ // Ensure right-child pointer of the right-most new sibling pge points to the page // that was originally on that place. let is_leaf_page = matches!(page_type, PageType::TableLeaf | PageType::IndexLeaf); @@ -1921,41 +1930,12 @@ impl BTreeCursor { ) .unwrap(); #[cfg(debug_assertions)] - { - let left_pointer = if parent_contents.overflow_cells.len() == 0 { - let (cell_start, cell_len) = parent_contents.cell_get_raw_region( - balance_info.first_divider_cell + i, - payload_overflow_threshold_max( - parent_contents.page_type(), - self.usable_space() as u16, - ), - payload_overflow_threshold_min( - parent_contents.page_type(), - self.usable_space() as u16, - ), - self.usable_space(), - ); - tracing::debug!( - "balance_non_root(cell_start={}, cell_len={})", - cell_start, - cell_len - ); - - let left_pointer = read_u32( - &parent_contents.as_ptr()[cell_start..cell_start + cell_len], - 0, - ); - left_pointer - } else { - let cell = &parent_contents.overflow_cells[0]; - assert_eq!(cell.index, balance_info.first_divider_cell + i); - read_u32(&cell.payload, 0) - }; - assert_eq!(left_pointer, page.get().id as u32, "the cell we just inserted doesn't point to the correct page. points to {}, should point to {}", - left_pointer, - page.get().id as u32 - ); - } + self.validate_balance_non_root_divider_cell_insertion( + balance_info, + parent_contents, + i, + page, + ); } tracing::debug!( "balance_non_root(parent_overflow={})", @@ -1964,6 +1944,7 @@ impl BTreeCursor { #[cfg(debug_assertions)] { + // Let's ensure every page is pointed to by the divider cell or the rightmost pointer. for page in &pages_to_balance_new { assert!( pages_pointed_to.contains(&(page.get().id as u32)), @@ -1972,7 +1953,29 @@ impl BTreeCursor { ); } } - // TODO: update pages + /* 7. Start real movement of cells. Next comment is borrowed from SQLite: */ + /* Now update the actual sibling pages. The order in which they are updated + ** is important, as this code needs to avoid disrupting any page from which + ** cells may still to be read. In practice, this means: + ** + ** (1) If cells are moving left (from apNew[iPg] to apNew[iPg-1]) + ** then it is not safe to update page apNew[iPg] until after + ** the left-hand sibling apNew[iPg-1] has been updated. + ** + ** (2) If cells are moving right (from apNew[iPg] to apNew[iPg+1]) + ** then it is not safe to update page apNew[iPg] until after + ** the right-hand sibling apNew[iPg+1] has been updated. + ** + ** If neither of the above apply, the page is safe to update. + ** + ** The iPg value in the following loop starts at nNew-1 goes down + ** to 0, then back up to nNew-1 again, thus making two passes over + ** the pages. On the initial downward pass, only condition (1) above + ** needs to be tested because (2) will always be true from the previous + ** step. On the upward pass, both conditions are always true, so the + ** upwards pass simply processes pages that were missed on the downward + ** pass. + */ let mut done = vec![false; sibling_count_new]; for i in (1 - sibling_count_new as i64)..sibling_count_new as i64 { let page_idx = i.unsigned_abs() as usize; @@ -2053,6 +2056,53 @@ impl BTreeCursor { result } + #[cfg(debug_assertions)] + fn validate_balance_non_root_divider_cell_insertion( + &self, + balance_info: &mut BalanceInfo, + parent_contents: &mut PageContent, + i: usize, + page: &std::sync::Arc, + ) { + let left_pointer = if parent_contents.overflow_cells.len() == 0 { + let (cell_start, cell_len) = parent_contents.cell_get_raw_region( + balance_info.first_divider_cell + i, + payload_overflow_threshold_max( + parent_contents.page_type(), + self.usable_space() as u16, + ), + payload_overflow_threshold_min( + parent_contents.page_type(), + self.usable_space() as u16, + ), + self.usable_space(), + ); + tracing::debug!( + "balance_non_root(cell_start={}, cell_len={})", + cell_start, + cell_len + ); + + let left_pointer = read_u32( + &parent_contents.as_ptr()[cell_start..cell_start + cell_len], + 0, + ); + left_pointer + } else { + let mut left_pointer = None; + for cell in parent_contents.overflow_cells.iter() { + if cell.index == balance_info.first_divider_cell + i { + left_pointer = Some(read_u32(&cell.payload, 0)) + } + } + left_pointer.expect("overflow cell with divider cell was not found") + }; + assert_eq!(left_pointer, page.get().id as u32, "the cell we just inserted doesn't point to the correct page. points to {}, should point to {}", + left_pointer, + page.get().id as u32 + ); + } + #[cfg(debug_assertions)] fn post_balance_non_root_validation( &self, @@ -3419,6 +3469,17 @@ impl BTreeCursor { } } +#[cfg(debug_assertions)] +fn validate_cells_after_insertion(cell_array: &CellArray, leaf_data: bool) { + for cell in &cell_array.cells { + assert!(cell.len() >= 4); + + if leaf_data { + assert!(cell[0] != 0, "payload is {:?}", cell); + } + } +} + impl PageStack { fn increment_current(&self) { self.current_page.set(self.current_page.get() + 1); From cf62099bf53502e42c95330b33d9425e654d908d Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Tue, 8 Apr 2025 11:03:49 +0200 Subject: [PATCH 10/56] allow insertion of multiple overflow cells --- core/storage/btree.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 980316132..c2fcd09a0 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1565,10 +1565,6 @@ impl BTreeCursor { } // Insert overflow cells into correct place let offset = total_cells_inserted; - assert!( - old_page_contents.overflow_cells.len() <= 1, - "todo: check this works for more than one overflow cell" - ); for overflow_cell in old_page_contents.overflow_cells.iter_mut() { cell_array.cells.insert( offset + overflow_cell.index, @@ -4005,7 +4001,7 @@ fn insert_into_cell( usable_space: u16, ) -> Result<()> { assert!( - cell_idx <= page.cell_count(), + cell_idx <= page.cell_count() + page.overflow_cells.len(), "attempting to add cell to an incorrect place cell_idx={} cell_count={}", cell_idx, page.cell_count() From ce7e0188f640ba3b875b6352c41e1894bf0f28a2 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Tue, 8 Apr 2025 17:57:39 +0200 Subject: [PATCH 11/56] bring back i64 page sizes while balancing --- core/storage/btree.rs | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index c2fcd09a0..befb43189 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1614,7 +1614,7 @@ impl BTreeCursor { validate_cells_after_insertion(&cell_array, leaf_data); /* 3. Initiliaze current size of every page including overflow cells and divider cells that might be included. */ - let mut new_page_sizes: Vec = Vec::new(); + let mut new_page_sizes: Vec = Vec::new(); let leaf_correction = if leaf { 4 } else { 0 }; // number of bytes beyond header, different from global usableSapce which includes // header @@ -1627,16 +1627,16 @@ impl BTreeCursor { let page_contents = page.get_contents(); let free_space = compute_free_space(page_contents, self.usable_space() as u16); - new_page_sizes.push(usable_space as usize - free_space as usize); + new_page_sizes.push(usable_space as i64 - free_space as i64); for overflow in &page_contents.overflow_cells { let size = new_page_sizes.last_mut().unwrap(); // 2 to account of pointer - *size += 2 + overflow.payload.len() as usize; + *size += 2 + overflow.payload.len() as i64; } if !leaf && i < balance_info.sibling_count - 1 { // Account for divider cell which is included in this page. let size = new_page_sizes.last_mut().unwrap(); - *size += cell_array.cells[cell_array.cell_count(i)].len() as usize; + *size += cell_array.cells[cell_array.cell_count(i)].len() as i64; } } @@ -1654,7 +1654,7 @@ impl BTreeCursor { let mut i = 0; while i < sibling_count_new { // First try to move cells to the right if they do not fit - while new_page_sizes[i] > usable_space as usize { + while new_page_sizes[i] > usable_space as i64 { let needs_new_page = i + 1 >= sibling_count_new; if needs_new_page { // FIXME: this doesn't remove pages if not needed @@ -1669,7 +1669,7 @@ impl BTreeCursor { ); } let size_of_cell_to_remove_from_left = - 2 + cell_array.cells[cell_array.cell_count(i) - 1].len() as usize; + 2 + cell_array.cells[cell_array.cell_count(i) - 1].len() as i64; new_page_sizes[i] -= size_of_cell_to_remove_from_left; let size_of_cell_to_move_right = if !leaf_data { if cell_array.number_of_cells_per_page[i] @@ -1677,23 +1677,23 @@ impl BTreeCursor { { // This means we move to the right page the divider cell and we // promote left cell to divider - 2 + cell_array.cells[cell_array.cell_count(i)].len() as usize + 2 + cell_array.cells[cell_array.cell_count(i)].len() as i64 } else { 0 } } else { size_of_cell_to_remove_from_left }; - new_page_sizes[i + 1] += size_of_cell_to_move_right as usize; + new_page_sizes[i + 1] += size_of_cell_to_move_right as i64; cell_array.number_of_cells_per_page[i] -= 1; } // Now try to take from the right if we didn't have enough while cell_array.number_of_cells_per_page[i] < cell_array.cells.len() as u16 { let size_of_cell_to_remove_from_right = - 2 + cell_array.cells[cell_array.cell_count(i)].len() as usize; + 2 + cell_array.cells[cell_array.cell_count(i)].len() as i64; let can_take = new_page_sizes[i] + size_of_cell_to_remove_from_right - > usable_space as usize; + > usable_space as i64; if can_take { break; } @@ -1704,7 +1704,7 @@ impl BTreeCursor { if cell_array.number_of_cells_per_page[i] < cell_array.cells.len() as u16 { - 2 + cell_array.cells[cell_array.cell_count(i)].len() as usize + 2 + cell_array.cells[cell_array.cell_count(i)].len() as i64 } else { 0 } @@ -1754,8 +1754,8 @@ impl BTreeCursor { // the same we add to right (we don't add divider to right). let mut cell_right = cell_left + 1 - leaf_data as u16; loop { - let cell_left_size = cell_array.cell_size(cell_left as usize) as usize; - let cell_right_size = cell_array.cell_size(cell_right as usize) as usize; + let cell_left_size = cell_array.cell_size(cell_left as usize) as i64; + let cell_right_size = cell_array.cell_size(cell_right as usize) as i64; // TODO: add assert nMaxCells let pointer_size = if i == sibling_count_new - 1 { 0 } else { 2 }; From 2af447128f357f5d8246e43c051dae629bde7fc4 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Tue, 8 Apr 2025 19:32:03 -0400 Subject: [PATCH 12/56] Add tracing log file to gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 8a7437707..369a9b7ef 100644 --- a/.gitignore +++ b/.gitignore @@ -34,3 +34,4 @@ dist/ # testing testing/limbo_output.txt **/limbo_output.txt +testing/test.log From 570253b29ff2b839035133b6dfa926b2b40ae6f8 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Tue, 8 Apr 2025 19:32:51 -0400 Subject: [PATCH 13/56] Adjust limbo run script to log to file during tests if RUST_LOG set --- scripts/limbo-sqlite3 | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scripts/limbo-sqlite3 b/scripts/limbo-sqlite3 index 8e9f0389a..d448a2d6a 100755 --- a/scripts/limbo-sqlite3 +++ b/scripts/limbo-sqlite3 @@ -1,3 +1,8 @@ #!/bin/bash -target/debug/limbo -m list "$@" +# if RUST_LOG is non-empty, enable tracing output +if [ -n "$RUST_LOG" ]; then + target/debug/limbo -m list -t testing/test.log "$@" +else + target/debug/limbo -m list "$@" +fi From 01184ec1d7850d80380acd5d619f385697db49fa Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Tue, 8 Apr 2025 19:36:38 -0400 Subject: [PATCH 14/56] Add tracing-appender to log traces to file asyncronously --- Cargo.lock | 22 ++++++++++++ cli/Cargo.toml | 1 + cli/app.rs | 56 +++++++++++++++++++++++------ cli/input.rs | 10 +++--- cli/main.rs | 10 +----- testing/cli_tests/test_limbo_cli.py | 2 +- 6 files changed, 77 insertions(+), 24 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2e7a615c6..96aec95db 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -583,6 +583,15 @@ dependencies = [ "itertools", ] +[[package]] +name = "crossbeam-channel" +version = "0.5.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ba6d68e24814cb8de6bb986db8222d3a027d15872cabc0d18817bc3c0e4471" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-deque" version = "0.8.6" @@ -1678,6 +1687,7 @@ dependencies = [ "shlex", "syntect", "tracing", + "tracing-appender", "tracing-subscriber", ] @@ -3472,6 +3482,18 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "tracing-appender" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3566e8ce28cc0a3fe42519fc80e6b4c943cc4c8cef275620eb8dac2d3d4e06cf" +dependencies = [ + "crossbeam-channel", + "thiserror 1.0.69", + "time", + "tracing-subscriber", +] + [[package]] name = "tracing-attributes" version = "0.1.28" diff --git a/cli/Cargo.toml b/cli/Cargo.toml index ddd44519f..2f1625420 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -39,6 +39,7 @@ rustyline = { version = "15.0.0", default-features = true, features = [ shlex = "1.3.0" syntect = "5.2.0" tracing = "0.1.41" +tracing-appender = "0.2.3" tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } diff --git a/cli/app.rs b/cli/app.rs index 40e187d43..e5aa851a6 100644 --- a/cli/app.rs +++ b/cli/app.rs @@ -6,6 +6,8 @@ use crate::{ }; use comfy_table::{Attribute, Cell, CellAlignment, Color, ContentArrangement, Row, Table}; use limbo_core::{Database, LimboError, OwnedValue, Statement, StepResult}; +use tracing_appender::non_blocking::WorkerGuard; +use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter}; use clap::Parser; use rustyline::{history::DefaultHistory, Editor}; @@ -49,6 +51,8 @@ pub struct Opts { pub vfs: Option, #[clap(long, help = "Enable experimental MVCC feature")] pub experimental_mvcc: bool, + #[clap(short = 't', long, help = "specify output file for log traces")] + pub tracing_output: Option, } const PROMPT: &str = "limbo> "; @@ -130,6 +134,8 @@ impl<'a> Limbo<'a> { }) .expect("Error setting Ctrl-C handler"); } + let sql = opts.sql.clone(); + let quiet = opts.quiet; let mut app = Self { prompt: PROMPT.to_string(), io, @@ -137,21 +143,25 @@ impl<'a> Limbo<'a> { conn, interrupt_count, input_buff: String::new(), - opts: Settings::from(&opts), + opts: Settings::from(opts), rl, }; - - if opts.sql.is_some() { - app.handle_first_input(opts.sql.as_ref().unwrap()); - } - if !opts.quiet { - app.write_fmt(format_args!("Limbo v{}", env!("CARGO_PKG_VERSION")))?; - app.writeln("Enter \".help\" for usage hints.")?; - app.display_in_memory()?; - } + app.first_run(sql, quiet)?; Ok(app) } + fn first_run(&mut self, sql: Option, quiet: bool) -> io::Result<()> { + if let Some(sql) = sql { + self.handle_first_input(&sql); + } + if !quiet { + self.write_fmt(format_args!("Limbo v{}", env!("CARGO_PKG_VERSION")))?; + self.writeln("Enter \".help\" for usage hints.")?; + self.display_in_memory()?; + } + Ok(()) + } + fn handle_first_input(&mut self, cmd: &str) { if cmd.trim().starts_with('.') { self.handle_dot_command(&cmd[1..]); @@ -695,6 +705,32 @@ impl<'a> Limbo<'a> { Ok(()) } + pub fn init_tracing(&mut self) -> Result { + let (non_blocking, guard) = if let Some(file) = &self.opts.tracing_output { + tracing_appender::non_blocking( + std::fs::File::options() + .append(true) + .create(true) + .open(file)?, + ) + } else { + tracing_appender::non_blocking(std::io::stderr()) + }; + if let Err(e) = tracing_subscriber::registry() + .with( + tracing_subscriber::fmt::layer() + .with_writer(non_blocking) + .with_line_number(true) + .with_thread_ids(true), + ) + .with(EnvFilter::from_default_env()) + .try_init() + { + println!("Unable to setup tracing appender: {:?}", e); + } + Ok(guard) + } + fn display_schema(&mut self, table: Option<&str>) -> anyhow::Result<()> { let sql = match table { Some(table_name) => format!( diff --git a/cli/input.rs b/cli/input.rs index 4361394c0..7b505a99f 100644 --- a/cli/input.rs +++ b/cli/input.rs @@ -81,28 +81,30 @@ pub struct Settings { pub echo: bool, pub is_stdout: bool, pub io: Io, + pub tracing_output: Option, } -impl From<&Opts> for Settings { - fn from(opts: &Opts) -> Self { +impl From for Settings { + fn from(opts: Opts) -> Self { Self { null_value: String::new(), output_mode: opts.output_mode, echo: false, is_stdout: opts.output.is_empty(), - output_filename: opts.output.clone(), + output_filename: opts.output, db_file: opts .database .as_ref() .map_or(":memory:".to_string(), |p| p.to_string_lossy().to_string()), io: match opts.vfs.as_ref().unwrap_or(&String::new()).as_str() { - "memory" => Io::Memory, + "memory" | ":memory:" => Io::Memory, "syscall" => Io::Syscall, #[cfg(all(target_os = "linux", feature = "io_uring"))] "io_uring" => Io::IoUring, "" => Io::default(), vfs => Io::External(vfs.to_string()), }, + tracing_output: opts.tracing_output, } } } diff --git a/cli/main.rs b/cli/main.rs index 4e8eca02a..ec81b64af 100644 --- a/cli/main.rs +++ b/cli/main.rs @@ -7,7 +7,6 @@ mod opcodes_dictionary; use rustyline::{error::ReadlineError, Config, Editor}; use std::sync::atomic::Ordering; -use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter}; fn rustyline_config() -> Config { Config::builder() @@ -17,15 +16,8 @@ fn rustyline_config() -> Config { fn main() -> anyhow::Result<()> { let mut rl = Editor::with_config(rustyline_config())?; - tracing_subscriber::registry() - .with( - tracing_subscriber::fmt::layer() - .with_line_number(true) - .with_thread_ids(true), - ) - .with(EnvFilter::from_default_env()) - .init(); let mut app = app::Limbo::new(&mut rl)?; + let _guard = app.init_tracing()?; let home = dirs::home_dir().expect("Could not determine home directory"); let history_file = home.join(".limbo_history"); if history_file.exists() { diff --git a/testing/cli_tests/test_limbo_cli.py b/testing/cli_tests/test_limbo_cli.py index 8b6a61375..43f8d1ed2 100755 --- a/testing/cli_tests/test_limbo_cli.py +++ b/testing/cli_tests/test_limbo_cli.py @@ -11,7 +11,7 @@ PIPE_BUF = 4096 class ShellConfig: - def __init__(self, exe_name, flags: str = "-q"): + def __init__(self, exe_name, flags: str = "-q -t testing/trace.log"): self.sqlite_exec: str = exe_name self.sqlite_flags: List[str] = flags.split() self.cwd = os.getcwd() From 4b3c14369d4c5be1a925326d223bcf569a3b5b3d Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Tue, 8 Apr 2025 19:36:58 -0400 Subject: [PATCH 15/56] Add testing.md document --- docs/testing.md | 85 +++++++++++++++++++++++++++++ testing/cli_tests/test_limbo_cli.py | 2 +- 2 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 docs/testing.md diff --git a/docs/testing.md b/docs/testing.md new file mode 100644 index 000000000..21823957f --- /dev/null +++ b/docs/testing.md @@ -0,0 +1,85 @@ +# Testing in Limbo + +Limbo supports a comprehensive testing system to ensure correctness, performance, and compatibility with SQLite. + +## 1. Compatibility Tests + +The `make test` target is the main entry point. + +Most compatibility tests live in the testing/ directory and are written in SQLite’s TCL test format. These tests ensure that Limbo matches SQLite’s behavior exactly. The database used during these tests is located at testing/testing.db, which includes the following schema: + +```sql +CREATE TABLE users ( + id INTEGER PRIMARY KEY, + first_name TEXT, + last_name TEXT, + email TEXT, + phone_number TEXT, + address TEXT, + city TEXT, + state TEXT, + zipcode TEXT, + age INTEGER +); +CREATE TABLE products ( + id INTEGER PRIMARY KEY, + name TEXT, + price REAL +); +CREATE INDEX age_idx ON users (age); +``` + +You can freely write queries against these tables during compatibility testing. + +### Shell and Python-based Tests + +For cases where output or behavior differs intentionally from SQLite (e.g. due to new features or limitations), tests should be placed in the testing/cli_tests/ directory and written in Python. + +These tests use the TestLimboShell class: + +```python +from cli_tests.common import TestLimboShell + +def test_uuid(): + limbo = TestLimboShell() + limbo.run_test_fn("SELECT uuid4_str();", lambda res: len(res) == 36) + limbo.quit() +``` + +You can use run_test, run_test_fn, or debug_print to interact with the shell and validate results. +The constructor takes an optional argument with the `sql` you want to initiate the tests with. You can also enable blob testing or override the executable and flags. + +Use these Python-based tests for validating: + + - Output formatting + + - Shell commands and .dot interactions + + - Limbo-specific extensions in `testing/cli_tests/extensions.py` + + - Any known divergence from SQLite behavior + + +> Logging and Tracing +If you wish to trace internal events during test execution, you can set the RUST_LOG environment variable before running the test. For example: + +```bash +RUST_LOG=none,limbo_core=trace make test +``` + +This will enable trace-level logs for the limbo_core crate and disable logs elsewhere. Logging all internal traces to the `testing/test.log` file. + +**Note:** trace logs can be very verbose—it's not uncommon for a single test run to generate megabytes of logs. + + +## Deterministic Simulation Testing (DST): + +TODO! + + +## Fuzzing + +TODO! + + + diff --git a/testing/cli_tests/test_limbo_cli.py b/testing/cli_tests/test_limbo_cli.py index 43f8d1ed2..8b6a61375 100755 --- a/testing/cli_tests/test_limbo_cli.py +++ b/testing/cli_tests/test_limbo_cli.py @@ -11,7 +11,7 @@ PIPE_BUF = 4096 class ShellConfig: - def __init__(self, exe_name, flags: str = "-q -t testing/trace.log"): + def __init__(self, exe_name, flags: str = "-q"): self.sqlite_exec: str = exe_name self.sqlite_flags: List[str] = flags.split() self.cwd = os.getcwd() From 3ad7d194cbf8b4f159cbe0cce6dd99fd3d6f1a22 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Thu, 3 Apr 2025 08:38:48 -0400 Subject: [PATCH 16/56] Prevent panic on loading non-existent vtab module --- core/util.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/core/util.rs b/core/util.rs index f17699233..b0acb54f2 100644 --- a/core/util.rs +++ b/core/util.rs @@ -60,8 +60,14 @@ pub fn parse_schema_rows( let sql: &str = row.get::<&str>(4)?; if root_page == 0 && sql.to_lowercase().contains("create virtual") { let name: &str = row.get::<&str>(1)?; - let vtab = syms.vtabs.get(name).unwrap().clone(); - schema.add_virtual_table(vtab); + let Some(vtab) = syms.vtabs.get(name) else { + return Err(LimboError::InvalidArgument(format!( + "Virtual table Module for {} not found in symbol table, + please load extension first", + name + ))); + }; + schema.add_virtual_table(vtab.clone()); } else { let table = schema::BTreeTable::from_sql(sql, root_page as usize)?; schema.add_btree_table(Rc::new(table)); From 4b9b6c969b2095f6aa0a7fcd2b31a4baf8b5cdd6 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Thu, 3 Apr 2025 09:17:06 -0400 Subject: [PATCH 17/56] Parse schema rows after extensions are loaded --- core/ext/dynamic.rs | 14 ++++++++++++-- core/lib.rs | 37 ++++++++++++++++++++++++++++++------- core/util.rs | 2 +- core/vdbe/execute.rs | 6 +++--- core/vdbe/mod.rs | 6 +++--- 5 files changed, 49 insertions(+), 16 deletions(-) diff --git a/core/ext/dynamic.rs b/core/ext/dynamic.rs index df342caca..ec297bf36 100644 --- a/core/ext/dynamic.rs +++ b/core/ext/dynamic.rs @@ -6,6 +6,7 @@ use libloading::{Library, Symbol}; use limbo_ext::{ExtensionApi, ExtensionApiRef, ExtensionEntryPoint, ResultCode, VfsImpl}; use std::{ ffi::{c_char, CString}, + rc::Rc, sync::{Arc, Mutex, OnceLock}, }; @@ -29,7 +30,10 @@ unsafe impl Send for VfsMod {} unsafe impl Sync for VfsMod {} impl Connection { - pub fn load_extension>(&self, path: P) -> crate::Result<()> { + pub fn load_extension>( + self: &Rc, + path: P, + ) -> crate::Result<()> { use limbo_ext::ExtensionApiRef; let api = Box::new(self.build_limbo_ext()); @@ -44,7 +48,13 @@ impl Connection { let result_code = unsafe { entry(api_ptr) }; if result_code.is_ok() { let extensions = get_extension_libraries(); - extensions.lock().unwrap().push((Arc::new(lib), api_ref)); + extensions + .lock() + .map_err(|_| { + LimboError::ExtensionError("Error unlocking extension libraries".to_string()) + })? + .push((Arc::new(lib), api_ref)); + self.parse_schema_rows()?; Ok(()) } else { if !api_ptr.is_null() { diff --git a/core/lib.rs b/core/lib.rs index e364b226d..7176086f0 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -70,7 +70,7 @@ use vdbe::{builder::QueryMode, VTabOpaqueCursor}; pub type Result = std::result::Result; pub static DATABASE_VERSION: OnceLock = OnceLock::new(); -#[derive(Clone, PartialEq, Eq)] +#[derive(Clone, Copy, PartialEq, Eq)] enum TransactionState { Write, Read, @@ -158,7 +158,13 @@ impl Database { .try_write() .expect("lock on schema should succeed first try"); let syms = conn.syms.borrow(); - parse_schema_rows(rows, &mut schema, io, syms.deref(), None)?; + if let Err(LimboError::ExtensionError(e)) = + parse_schema_rows(rows, &mut schema, io, syms.deref(), None) + { + // this means that a vtab exists and we no longer have the module loaded. we print + // a warning to the user to load the module + eprintln!("Warning: {}", e); + } } Ok(db) } @@ -186,9 +192,9 @@ impl Database { schema: self.schema.clone(), header: self.header.clone(), last_insert_rowid: Cell::new(0), - auto_commit: RefCell::new(true), + auto_commit: Cell::new(true), mv_transactions: RefCell::new(Vec::new()), - transaction_state: RefCell::new(TransactionState::None), + transaction_state: Cell::new(TransactionState::None), last_change: Cell::new(0), syms: RefCell::new(SymbolTable::new()), total_changes: Cell::new(0), @@ -278,9 +284,9 @@ pub struct Connection { pager: Rc, schema: Arc>, header: Arc>, - auto_commit: RefCell, + auto_commit: Cell, mv_transactions: RefCell>, - transaction_state: RefCell, + transaction_state: Cell, last_insert_rowid: Cell, last_change: Cell, total_changes: Cell, @@ -517,7 +523,24 @@ impl Connection { } pub fn get_auto_commit(&self) -> bool { - *self.auto_commit.borrow() + self.auto_commit.get() + } + + pub fn parse_schema_rows(self: &Rc) -> Result<()> { + let rows = self.query("SELECT * FROM sqlite_schema")?; + let mut schema = self + .schema + .try_write() + .expect("lock on schema should succeed first try"); + let syms = self.syms.borrow(); + if let Err(LimboError::ExtensionError(e)) = + parse_schema_rows(rows, &mut schema, self.pager.io.clone(), syms.deref(), None) + { + // this means that a vtab exists and we no longer have the module loaded. we print + // a warning to the user to load the module + eprintln!("Warning: {}", e); + } + Ok(()) } } diff --git a/core/util.rs b/core/util.rs index b0acb54f2..55a50b7ae 100644 --- a/core/util.rs +++ b/core/util.rs @@ -61,7 +61,7 @@ pub fn parse_schema_rows( if root_page == 0 && sql.to_lowercase().contains("create virtual") { let name: &str = row.get::<&str>(1)?; let Some(vtab) = syms.vtabs.get(name) else { - return Err(LimboError::InvalidArgument(format!( + return Err(LimboError::ExtensionError(format!( "Virtual table Module for {} not found in symbol table, please load extension first", name diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 09c283ecd..89a2d8d93 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -1542,8 +1542,8 @@ pub fn op_transaction( } } else { let connection = program.connection.upgrade().unwrap(); - let current_state = connection.transaction_state.borrow().clone(); - let (new_transaction_state, updated) = match (¤t_state, write) { + let current_state = connection.transaction_state.get(); + let (new_transaction_state, updated) = match (current_state, write) { (TransactionState::Write, true) => (TransactionState::Write, false), (TransactionState::Write, false) => (TransactionState::Write, false), (TransactionState::Read, true) => (TransactionState::Write, true), @@ -1597,7 +1597,7 @@ pub fn op_auto_commit( }; } - if *auto_commit != *conn.auto_commit.borrow() { + if *auto_commit != conn.auto_commit.get() { if *rollback { todo!("Rollback is not implemented"); } else { diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 8794b208a..550f21164 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -386,7 +386,7 @@ impl Program { ) -> Result { if let Some(mv_store) = mv_store { let conn = self.connection.upgrade().unwrap(); - let auto_commit = *conn.auto_commit.borrow(); + let auto_commit = conn.auto_commit.get(); if auto_commit { let mut mv_transactions = conn.mv_transactions.borrow_mut(); for tx_id in mv_transactions.iter() { @@ -400,7 +400,7 @@ impl Program { .connection .upgrade() .expect("only weak ref to connection?"); - let auto_commit = *connection.auto_commit.borrow(); + let auto_commit = connection.auto_commit.get(); tracing::trace!("Halt auto_commit {}", auto_commit); assert!( program_state.halt_state.is_none() @@ -409,7 +409,7 @@ impl Program { if program_state.halt_state.is_some() { self.step_end_write_txn(&pager, &mut program_state.halt_state, connection.deref()) } else if auto_commit { - let current_state = connection.transaction_state.borrow().clone(); + let current_state = connection.transaction_state.get(); match current_state { TransactionState::Write => self.step_end_write_txn( &pager, From c15035caf84c23ad9e61cdfbab7cafb6b1726b7d Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Thu, 3 Apr 2025 14:03:54 -0400 Subject: [PATCH 18/56] Add module and vtab to schema after table is reopened with proper ext --- core/lib.rs | 20 +++--- core/translate/emitter.rs | 1 + core/util.rs | 132 +++++++++++++++++++++++++++++++++++--- core/vdbe/execute.rs | 13 +++- 4 files changed, 147 insertions(+), 19 deletions(-) diff --git a/core/lib.rs b/core/lib.rs index 7176086f0..48fb2bd7c 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -157,9 +157,9 @@ impl Database { let mut schema = schema .try_write() .expect("lock on schema should succeed first try"); - let syms = conn.syms.borrow(); + let syms = conn.syms.borrow_mut(); if let Err(LimboError::ExtensionError(e)) = - parse_schema_rows(rows, &mut schema, io, syms.deref(), None) + parse_schema_rows(rows, &mut schema, io, syms, None) { // this means that a vtab exists and we no longer have the module loaded. we print // a warning to the user to load the module @@ -532,13 +532,15 @@ impl Connection { .schema .try_write() .expect("lock on schema should succeed first try"); - let syms = self.syms.borrow(); - if let Err(LimboError::ExtensionError(e)) = - parse_schema_rows(rows, &mut schema, self.pager.io.clone(), syms.deref(), None) { - // this means that a vtab exists and we no longer have the module loaded. we print - // a warning to the user to load the module - eprintln!("Warning: {}", e); + let syms = self.syms.borrow_mut(); + if let Err(LimboError::ExtensionError(e)) = + parse_schema_rows(rows, &mut schema, self.pager.io.clone(), syms, None) + { + // this means that a vtab exists and we no longer have the module loaded. we print + // a warning to the user to load the module + eprintln!("Warning: {}", e); + } } Ok(()) } @@ -653,7 +655,7 @@ impl VirtualTable { module_name )))?; if let VTabKind::VirtualTable = kind { - if module.module_kind != VTabKind::VirtualTable { + if module.module_kind == VTabKind::TableValuedFunction { return Err(LimboError::ExtensionError(format!( "{} is not a virtual table module", module_name diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 23b937019..514bc21ab 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -135,6 +135,7 @@ fn prologue<'a>( Ok((t_ctx, init_label, start_offset)) } +#[derive(Clone, Copy, Debug)] pub enum TransactionMode { None, Read, diff --git a/core/util.rs b/core/util.rs index 55a50b7ae..f7c41348c 100644 --- a/core/util.rs +++ b/core/util.rs @@ -1,5 +1,5 @@ use limbo_sqlite3_parser::ast::{self, CreateTableBody, Expr, FunctionTail, Literal}; -use std::{rc::Rc, sync::Arc}; +use std::{cell::RefMut, rc::Rc, sync::Arc}; use crate::{ schema::{self, Column, Schema, Type}, @@ -40,7 +40,7 @@ pub fn parse_schema_rows( rows: Option, schema: &mut Schema, io: Arc, - syms: &SymbolTable, + mut syms: RefMut, mv_tx_id: Option, ) -> Result<()> { if let Some(mut rows) = rows { @@ -60,12 +60,35 @@ pub fn parse_schema_rows( let sql: &str = row.get::<&str>(4)?; if root_page == 0 && sql.to_lowercase().contains("create virtual") { let name: &str = row.get::<&str>(1)?; - let Some(vtab) = syms.vtabs.get(name) else { - return Err(LimboError::ExtensionError(format!( - "Virtual table Module for {} not found in symbol table, - please load extension first", - name - ))); + // a virtual table is found in the sqlite_schema, but it's no + // longer in the symbol table. We need to recreate it. + let vtab = if let Some(vtab) = syms.vtabs.get(name) { + vtab.clone() + } else { + // "create virtual table using mod" + let mod_name = module_name_from_sql(sql)?; + if let Some(vmod) = syms.vtab_modules.get(mod_name) { + if let limbo_ext::VTabKind::VirtualTable = vmod.module_kind + { + let vtab = crate::VirtualTable::from_args( + Some(name), + mod_name, + module_args_from_sql(sql)?, + &syms, + vmod.module_kind, + None, + )?; + syms.vtabs.insert(name.to_string(), vtab.clone()); + vtab + } else { + return Err(LimboError::Corrupt("Table valued function: {name} registered as virtual table in schema".to_string())); + } + } else { + return Err(LimboError::ExtensionError(format!( + "Virtual table module '{}' not found\nPlease load extension", + &mod_name + ))); + } }; schema.add_virtual_table(vtab.clone()); } else { @@ -138,6 +161,99 @@ pub fn check_ident_equivalency(ident1: &str, ident2: &str) -> bool { strip_quotes(ident1).eq_ignore_ascii_case(strip_quotes(ident2)) } +fn module_name_from_sql(sql: &str) -> Result<&str> { + if let Some(start) = sql.find("USING") { + let start = start + 6; + // stop at the first space, semicolon, or parenthesis + let end = sql[start..] + .find(|c: char| c.is_whitespace() || c == ';' || c == '(') + .unwrap_or(sql.len() - start) + + start; + Ok(sql[start..end].trim()) + } else { + Err(LimboError::InvalidArgument( + "Expected 'USING' in module name".to_string(), + )) + } +} + +// CREATE VIRTUAL TABLE table_name USING module_name(arg1, arg2, ...); +// CREATE VIRTUAL TABLE table_name USING module_name; +fn module_args_from_sql(sql: &str) -> Result> { + if !sql.contains('(') { + return Ok(vec![]); + } + let start = sql.find('(').ok_or_else(|| { + LimboError::InvalidArgument("Expected '(' in module argument list".to_string()) + })? + 1; + let end = sql.rfind(')').ok_or_else(|| { + LimboError::InvalidArgument("Expected ')' in module argument list".to_string()) + })?; + + let mut args = Vec::new(); + let mut current_arg = String::new(); + let mut chars = sql[start..end].chars().peekable(); + let mut in_quotes = false; + + while let Some(c) = chars.next() { + match c { + '\'' => { + if in_quotes { + if chars.peek() == Some(&'\'') { + // Escaped quote + current_arg.push('\''); + chars.next(); + } else { + in_quotes = false; + args.push(limbo_ext::Value::from_text(current_arg.trim().to_string())); + current_arg.clear(); + // Skip until comma or end + while let Some(&nc) = chars.peek() { + if nc == ',' { + chars.next(); // Consume comma + break; + } else if nc.is_whitespace() { + chars.next(); + } else { + return Err(LimboError::InvalidArgument( + "Unexpected characters after quoted argument".to_string(), + )); + } + } + } + } else { + in_quotes = true; + } + } + ',' => { + if !in_quotes { + if !current_arg.trim().is_empty() { + args.push(limbo_ext::Value::from_text(current_arg.trim().to_string())); + current_arg.clear(); + } + } else { + current_arg.push(c); + } + } + _ => { + current_arg.push(c); + } + } + } + + if !current_arg.trim().is_empty() && !in_quotes { + args.push(limbo_ext::Value::from_text(current_arg.trim().to_string())); + } + + if in_quotes { + return Err(LimboError::InvalidArgument( + "Unterminated string literal in module arguments".to_string(), + )); + } + + Ok(args) +} + pub fn check_literal_equivalency(lhs: &Literal, rhs: &Literal) -> bool { match (lhs, rhs) { (Literal::Numeric(n1), Literal::Numeric(n2)) => cmp_numeric_strings(n1, n2), diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 89a2d8d93..65bf5238e 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -917,12 +917,21 @@ pub fn op_vcreate( "Failed to upgrade Connection".to_string(), )); }; + let mod_type = conn + .syms + .borrow() + .vtab_modules + .get(&module_name) + .ok_or_else(|| { + crate::LimboError::ExtensionError(format!("Module {} not found", module_name)) + })? + .module_kind; let table = crate::VirtualTable::from_args( Some(&table_name), &module_name, args, &conn.syms.borrow(), - limbo_ext::VTabKind::VirtualTable, + mod_type, None, )?; { @@ -4231,7 +4240,7 @@ pub fn op_parse_schema( Some(stmt), &mut schema, conn.pager.io.clone(), - &conn.syms.borrow(), + conn.syms.borrow_mut(), state.mv_tx_id, )?; state.pc += 1; From a0f71e27beece3aea2fdf24087ab14253006d29d Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Thu, 3 Apr 2025 14:04:28 -0400 Subject: [PATCH 19/56] Fix cli tests --- testing/cli_tests/extensions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/cli_tests/extensions.py b/testing/cli_tests/extensions.py index bab8cb74f..ac870ee4d 100755 --- a/testing/cli_tests/extensions.py +++ b/testing/cli_tests/extensions.py @@ -345,10 +345,10 @@ def test_kv(): limbo = TestLimboShell() limbo.run_test_fn( "create virtual table t using kv_store;", - lambda res: "Virtual table module not found: kv_store" in res, + lambda res: "Module kv_store not found" in res, ) limbo.execute_dot(f".load {ext_path}") - limbo.debug_print( + limbo.execute_dot( "create virtual table t using kv_store;", ) limbo.run_test_fn(".schema", lambda res: "CREATE VIRTUAL TABLE t" in res) From 6b5ec1f07b30a3d90af8f64daf4031fdd04139ea Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Thu, 3 Apr 2025 20:55:59 -0400 Subject: [PATCH 20/56] Remove mut borrow from sym table in parse schema fn --- core/ext/dynamic.rs | 4 +++- core/lib.rs | 8 ++++---- core/util.rs | 14 ++++++-------- core/vdbe/execute.rs | 16 +++++++++------- 4 files changed, 22 insertions(+), 20 deletions(-) diff --git a/core/ext/dynamic.rs b/core/ext/dynamic.rs index ec297bf36..60e4050d7 100644 --- a/core/ext/dynamic.rs +++ b/core/ext/dynamic.rs @@ -54,7 +54,9 @@ impl Connection { LimboError::ExtensionError("Error unlocking extension libraries".to_string()) })? .push((Arc::new(lib), api_ref)); - self.parse_schema_rows()?; + { + self.parse_schema_rows()?; + } Ok(()) } else { if !api_ptr.is_null() { diff --git a/core/lib.rs b/core/lib.rs index 48fb2bd7c..cd728fa64 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -157,9 +157,9 @@ impl Database { let mut schema = schema .try_write() .expect("lock on schema should succeed first try"); - let syms = conn.syms.borrow_mut(); + let syms = conn.syms.borrow(); if let Err(LimboError::ExtensionError(e)) = - parse_schema_rows(rows, &mut schema, io, syms, None) + parse_schema_rows(rows, &mut schema, io, &syms, None) { // this means that a vtab exists and we no longer have the module loaded. we print // a warning to the user to load the module @@ -533,9 +533,9 @@ impl Connection { .try_write() .expect("lock on schema should succeed first try"); { - let syms = self.syms.borrow_mut(); + let syms = self.syms.borrow(); if let Err(LimboError::ExtensionError(e)) = - parse_schema_rows(rows, &mut schema, self.pager.io.clone(), syms, None) + parse_schema_rows(rows, &mut schema, self.pager.io.clone(), &syms, None) { // this means that a vtab exists and we no longer have the module loaded. we print // a warning to the user to load the module diff --git a/core/util.rs b/core/util.rs index f7c41348c..dab083c16 100644 --- a/core/util.rs +++ b/core/util.rs @@ -1,5 +1,5 @@ use limbo_sqlite3_parser::ast::{self, CreateTableBody, Expr, FunctionTail, Literal}; -use std::{cell::RefMut, rc::Rc, sync::Arc}; +use std::{rc::Rc, sync::Arc}; use crate::{ schema::{self, Column, Schema, Type}, @@ -40,7 +40,7 @@ pub fn parse_schema_rows( rows: Option, schema: &mut Schema, io: Arc, - mut syms: RefMut, + syms: &SymbolTable, mv_tx_id: Option, ) -> Result<()> { if let Some(mut rows) = rows { @@ -70,16 +70,14 @@ pub fn parse_schema_rows( if let Some(vmod) = syms.vtab_modules.get(mod_name) { if let limbo_ext::VTabKind::VirtualTable = vmod.module_kind { - let vtab = crate::VirtualTable::from_args( + crate::VirtualTable::from_args( Some(name), mod_name, module_args_from_sql(sql)?, - &syms, + syms, vmod.module_kind, None, - )?; - syms.vtabs.insert(name.to_string(), vtab.clone()); - vtab + )? } else { return Err(LimboError::Corrupt("Table valued function: {name} registered as virtual table in schema".to_string())); } @@ -90,7 +88,7 @@ pub fn parse_schema_rows( ))); } }; - schema.add_virtual_table(vtab.clone()); + schema.add_virtual_table(vtab); } else { let table = schema::BTreeTable::from_sql(sql, root_page as usize)?; schema.add_btree_table(Rc::new(table)); diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 65bf5238e..654e9a2c5 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -4236,13 +4236,15 @@ pub fn op_parse_schema( ))?; let mut schema = conn.schema.write(); // TODO: This function below is synchronous, make it async - parse_schema_rows( - Some(stmt), - &mut schema, - conn.pager.io.clone(), - conn.syms.borrow_mut(), - state.mv_tx_id, - )?; + { + parse_schema_rows( + Some(stmt), + &mut schema, + conn.pager.io.clone(), + &conn.syms.borrow(), + state.mv_tx_id, + )?; + } state.pc += 1; Ok(InsnFunctionStepResult::Step) } From 3a7f1e4056a967f8380158973153d1134bd66716 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Thu, 3 Apr 2025 20:57:59 -0400 Subject: [PATCH 21/56] Add comments explaining flow of reloading vtabs from schema tbl --- core/util.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/core/util.rs b/core/util.rs index dab083c16..ca23f0e83 100644 --- a/core/util.rs +++ b/core/util.rs @@ -61,11 +61,11 @@ pub fn parse_schema_rows( if root_page == 0 && sql.to_lowercase().contains("create virtual") { let name: &str = row.get::<&str>(1)?; // a virtual table is found in the sqlite_schema, but it's no - // longer in the symbol table. We need to recreate it. + // longer in the in-memory schema. We need to recreate it if + // the module is loaded in the symbol table. let vtab = if let Some(vtab) = syms.vtabs.get(name) { vtab.clone() } else { - // "create virtual table using mod" let mod_name = module_name_from_sql(sql)?; if let Some(vmod) = syms.vtab_modules.get(mod_name) { if let limbo_ext::VTabKind::VirtualTable = vmod.module_kind @@ -82,6 +82,7 @@ pub fn parse_schema_rows( return Err(LimboError::Corrupt("Table valued function: {name} registered as virtual table in schema".to_string())); } } else { + // the extension isn't loaded, so we emit a warning. return Err(LimboError::ExtensionError(format!( "Virtual table module '{}' not found\nPlease load extension", &mod_name From 41ac91f14f4c528e24bcafe95f0d3409519ad75f Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Fri, 4 Apr 2025 09:55:43 -0400 Subject: [PATCH 22/56] Add tests for parsing vtab creation sql in ParseSchema --- core/util.rs | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/core/util.rs b/core/util.rs index ca23f0e83..3d12a2c6e 100644 --- a/core/util.rs +++ b/core/util.rs @@ -1753,4 +1753,88 @@ pub mod tests { Ok((OwnedValueType::Float, "1.23e4")) ); } + + #[test] + fn test_module_name_basic() { + let sql = "CREATE VIRTUAL TABLE x USING y;"; + assert_eq!(module_name_from_sql(sql).unwrap(), "y"); + } + + #[test] + fn test_module_name_with_args() { + let sql = "CREATE VIRTUAL TABLE x USING modname('a', 'b');"; + assert_eq!(module_name_from_sql(sql).unwrap(), "modname"); + } + + #[test] + fn test_module_name_missing_using() { + let sql = "CREATE VIRTUAL TABLE x (a, b);"; + assert!(module_name_from_sql(sql).is_err()); + } + + #[test] + fn test_module_name_no_semicolon() { + let sql = "CREATE VIRTUAL TABLE x USING limbo(a, b)"; + assert_eq!(module_name_from_sql(sql).unwrap(), "limbo"); + } + + #[test] + fn test_module_name_no_semicolon_or_args() { + let sql = "CREATE VIRTUAL TABLE x USING limbo"; + assert_eq!(module_name_from_sql(sql).unwrap(), "limbo"); + } + + #[test] + fn test_module_args_none() { + let sql = "CREATE VIRTUAL TABLE x USING modname;"; + let args = module_args_from_sql(sql).unwrap(); + assert_eq!(args.len(), 0); + } + + #[test] + fn test_module_args_basic() { + let sql = "CREATE VIRTUAL TABLE x USING modname('arg1', 'arg2');"; + let args = module_args_from_sql(sql).unwrap(); + assert_eq!(args.len(), 2); + assert_eq!("arg1", args[0].to_text().unwrap()); + assert_eq!("arg2", args[1].to_text().unwrap()); + for arg in args { + unsafe { arg.__free_internal_type() } + } + } + + #[test] + fn test_module_args_with_escaped_quote() { + let sql = "CREATE VIRTUAL TABLE x USING modname('a''b', 'c');"; + let args = module_args_from_sql(sql).unwrap(); + assert_eq!(args.len(), 2); + assert_eq!(args[0].to_text().unwrap(), "a'b"); + assert_eq!(args[1].to_text().unwrap(), "c"); + for arg in args { + unsafe { arg.__free_internal_type() } + } + } + + #[test] + fn test_module_args_unterminated_string() { + let sql = "CREATE VIRTUAL TABLE x USING modname('arg1, 'arg2');"; + assert!(module_args_from_sql(sql).is_err()); + } + + #[test] + fn test_module_args_extra_garbage_after_quote() { + let sql = "CREATE VIRTUAL TABLE x USING modname('arg1'x);"; + assert!(module_args_from_sql(sql).is_err()); + } + + #[test] + fn test_module_args_trailing_comma() { + let sql = "CREATE VIRTUAL TABLE x USING modname('arg1',);"; + let args = module_args_from_sql(sql).unwrap(); + assert_eq!(args.len(), 1); + assert_eq!("arg1", args[0].to_text().unwrap()); + for arg in args { + unsafe { arg.__free_internal_type() } + } + } } From 9b1e60a29c4882ab2551acaedac7b079849ce5a9 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Tue, 8 Apr 2025 20:09:12 -0400 Subject: [PATCH 23/56] Fix typo in ext library lock err message --- core/ext/dynamic.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/ext/dynamic.rs b/core/ext/dynamic.rs index 60e4050d7..17138f268 100644 --- a/core/ext/dynamic.rs +++ b/core/ext/dynamic.rs @@ -51,7 +51,7 @@ impl Connection { extensions .lock() .map_err(|_| { - LimboError::ExtensionError("Error unlocking extension libraries".to_string()) + LimboError::ExtensionError("Error locking extension libraries".to_string()) })? .push((Arc::new(lib), api_ref)); { From 431ef2fa6a49f9180f3157cf496ae40e5b452bac Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Tue, 8 Apr 2025 10:15:06 +0300 Subject: [PATCH 24/56] Add TCL/differential fuzz tests for verifying index scan behavior --- testing/orderby.test | 59 +++++++++++++ tests/integration/fuzz/mod.rs | 158 +++++++++++++++++++++++++++++++++- 2 files changed, 215 insertions(+), 2 deletions(-) diff --git a/testing/orderby.test b/testing/orderby.test index f23c41bfd..b5b56cdd4 100755 --- a/testing/orderby.test +++ b/testing/orderby.test @@ -141,3 +141,62 @@ Collin|15} do_execsql_test case-insensitive-alias { select u.first_name as fF, count(1) > 0 as cC from users u where fF = 'Jamie' group by fF order by cC; } {Jamie|1} + +do_execsql_test age_idx_order_desc { + select first_name from users order by age desc limit 3; +} {Robert +Sydney +Matthew} + +do_execsql_test rowid_or_integer_pk_desc { + select first_name from users order by id desc limit 3; +} {Nicole +Gina +Dorothy} + +# These two following tests may seem dumb but they verify that index scanning by age_idx doesn't drop any rows due to BTree bugs +do_execsql_test orderby_asc_verify_rows { + select count(1) from (select * from users order by age desc) +} {10000} + +do_execsql_test orderby_desc_verify_rows { + select count(1) from (select * from users order by age desc) +} {10000} + +do_execsql_test orderby_desc_with_offset { + select first_name, age from users order by age desc limit 3 offset 666; +} {Francis|94 +Matthew|94 +Theresa|94} + +do_execsql_test orderby_desc_with_filter { + select first_name, age from users where age <= 50 order by age desc limit 5; +} {Gerald|50 +Nicole|50 +Tammy|50 +Marissa|50 +Daniel|50} + +do_execsql_test orderby_asc_with_filter_range { + select first_name, age from users where age <= 50 and age >= 49 order by age asc limit 5; +} {William|49 +Jennifer|49 +Robert|49 +David|49 +Stephanie|49} + +do_execsql_test orderby_desc_with_filter_id_lt { + select id from users where id < 6666 order by id desc limit 5; +} {6665 +6664 +6663 +6662 +6661} + +do_execsql_test orderby_desc_with_filter_id_le { + select id from users where id <= 6666 order by id desc limit 5; +} {6666 +6665 +6664 +6663 +6662} \ No newline at end of file diff --git a/tests/integration/fuzz/mod.rs b/tests/integration/fuzz/mod.rs index f776fc9a7..eeed31698 100644 --- a/tests/integration/fuzz/mod.rs +++ b/tests/integration/fuzz/mod.rs @@ -2,9 +2,9 @@ pub mod grammar_generator; #[cfg(test)] mod tests { - use std::rc::Rc; + use std::{collections::HashSet, rc::Rc}; - use rand::SeedableRng; + use rand::{Rng, SeedableRng}; use rand_chacha::ChaCha8Rng; use rusqlite::params; @@ -107,6 +107,160 @@ mod tests { } } + #[test] + pub fn index_scan_fuzz() { + let db = TempDatabase::new_with_rusqlite("CREATE TABLE t(x PRIMARY KEY)"); + let sqlite_conn = rusqlite::Connection::open(db.path.clone()).unwrap(); + + let insert = format!( + "INSERT INTO t VALUES {}", + (0..10000) + .map(|x| format!("({})", x)) + .collect::>() + .join(", ") + ); + sqlite_conn.execute(&insert, params![]).unwrap(); + sqlite_conn.close().unwrap(); + let sqlite_conn = rusqlite::Connection::open(db.path.clone()).unwrap(); + let limbo_conn = db.connect_limbo(); + + const COMPARISONS: [&str; 5] = ["=", "<", "<=", ">", ">="]; + + const ORDER_BY: [Option<&str>; 4] = [ + None, + Some("ORDER BY x"), + Some("ORDER BY x DESC"), + Some("ORDER BY x ASC"), + ]; + + for comp in COMPARISONS.iter() { + for order_by in ORDER_BY.iter() { + for max in 0..=10000 { + let query = format!( + "SELECT * FROM t WHERE x {} {} {} LIMIT 3", + comp, + max, + order_by.unwrap_or(""), + ); + let limbo = limbo_exec_rows(&db, &limbo_conn, &query); + let sqlite = sqlite_exec_rows(&sqlite_conn, &query); + assert_eq!( + limbo, sqlite, + "query: {}, limbo: {:?}, sqlite: {:?}", + query, limbo, sqlite + ); + } + } + } + } + + #[test] + pub fn index_scan_compound_key_fuzz() { + let (mut rng, seed) = if std::env::var("SEED").is_ok() { + let seed = std::env::var("SEED").unwrap().parse::().unwrap(); + (ChaCha8Rng::seed_from_u64(seed), seed) + } else { + rng_from_time() + }; + let db = TempDatabase::new_with_rusqlite("CREATE TABLE t(x, y, z, PRIMARY KEY (x, y))"); + let sqlite_conn = rusqlite::Connection::open(db.path.clone()).unwrap(); + let mut pk_tuples = HashSet::new(); + while pk_tuples.len() < 100000 { + pk_tuples.insert((rng.random_range(0..3000), rng.random_range(0..3000))); + } + let mut tuples = Vec::new(); + for pk_tuple in pk_tuples { + tuples.push(format!( + "({}, {}, {})", + pk_tuple.0, + pk_tuple.1, + rng.random_range(0..2000) + )); + } + let insert = format!("INSERT INTO t VALUES {}", tuples.join(", ")); + sqlite_conn.execute(&insert, params![]).unwrap(); + sqlite_conn.close().unwrap(); + let sqlite_conn = rusqlite::Connection::open(db.path.clone()).unwrap(); + let limbo_conn = db.connect_limbo(); + + const COMPARISONS: [&str; 5] = ["=", "<", "<=", ">", ">="]; + + const ORDER_BY: [Option<&str>; 4] = [ + None, + Some("ORDER BY x"), + Some("ORDER BY x DESC"), + Some("ORDER BY x ASC"), + ]; + + let print_dump_on_fail = |insert: &str, seed: u64| { + let comment = format!("-- seed: {}; dump for manual debugging:", seed); + let pragma_journal_mode = "PRAGMA journal_mode = wal;"; + let create_table = "CREATE TABLE t(x, y, z, PRIMARY KEY (x, y));"; + let dump = format!( + "{}\n{}\n{}\n{}\n{}", + comment, pragma_journal_mode, create_table, comment, insert + ); + println!("{}", dump); + }; + + for comp in COMPARISONS.iter() { + for order_by in ORDER_BY.iter() { + for max in 0..=3000 { + // see comment below about ordering and the '=' comparison operator; omitting LIMIT for that reason + // we mainly have LIMIT here for performance reasons but for = we want to get all the rows to ensure + // correctness in the = case + let limit = if *comp == "=" { "" } else { "LIMIT 5" }; + let query = format!( + "SELECT * FROM t WHERE x {} {} {} {}", + comp, + max, + order_by.unwrap_or(""), + limit + ); + log::trace!("query: {}", query); + let limbo = limbo_exec_rows(&db, &limbo_conn, &query); + let sqlite = sqlite_exec_rows(&sqlite_conn, &query); + let is_equal = limbo == sqlite; + if !is_equal { + // if the condition is = and the same rows are present but in different order, then we accept that + // e.g. sqlite doesn't bother iterating in reverse order if "WHERE X = 3 ORDER BY X DESC", but we currently do. + if *comp == "=" { + let limbo_row_count = limbo.len(); + let sqlite_row_count = sqlite.len(); + if limbo_row_count == sqlite_row_count { + for limbo_row in limbo.iter() { + if !sqlite.contains(limbo_row) { + // save insert to file and print the filename for debugging + let error_msg = format!("row not found in sqlite: query: {}, limbo: {:?}, sqlite: {:?}, seed: {}", query, limbo, sqlite, seed); + print_dump_on_fail(&insert, seed); + panic!("{}", error_msg); + } + } + for sqlite_row in sqlite.iter() { + if !limbo.contains(sqlite_row) { + let error_msg = format!("row not found in limbo: query: {}, limbo: {:?}, sqlite: {:?}, seed: {}", query, limbo, sqlite, seed); + print_dump_on_fail(&insert, seed); + panic!("{}", error_msg); + } + } + continue; + } else { + print_dump_on_fail(&insert, seed); + let error_msg = format!("row count mismatch (limbo: {}, sqlite: {}): query: {}, limbo: {:?}, sqlite: {:?}, seed: {}", limbo_row_count, sqlite_row_count, query, limbo, sqlite, seed); + panic!("{}", error_msg); + } + } + print_dump_on_fail(&insert, seed); + panic!( + "query: {}, limbo: {:?}, sqlite: {:?}, seed: {}", + query, limbo, sqlite, seed + ); + } + } + } + } + } + #[test] pub fn arithmetic_expression_fuzz() { let _ = env_logger::try_init(); From 3e42a62cd08fcdbea19af9e22ce5dbe82afb5f55 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Tue, 8 Apr 2025 10:19:38 +0300 Subject: [PATCH 25/56] Add SeekLE/SeekLT operations to VDBE --- core/types.rs | 4 +- core/vdbe/builder.rs | 6 +++ core/vdbe/execute.rs | 113 ++++++++++++++++--------------------------- core/vdbe/explain.rs | 50 +++++++++++-------- core/vdbe/insn.rs | 30 +++++++++++- core/vdbe/mod.rs | 5 +- 6 files changed, 115 insertions(+), 93 deletions(-) diff --git a/core/types.rs b/core/types.rs index 72119349f..cc4495f52 100644 --- a/core/types.rs +++ b/core/types.rs @@ -1203,11 +1203,13 @@ pub enum CursorResult { IO, } -#[derive(Clone, PartialEq, Debug)] +#[derive(Clone, Copy, PartialEq, Eq, Debug)] pub enum SeekOp { EQ, GE, GT, + LE, + LT, } #[derive(Clone, PartialEq, Debug)] diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index 19a71a68d..78216204e 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -413,6 +413,12 @@ impl ProgramBuilder { Insn::SeekGT { target_pc, .. } => { resolve(target_pc, "SeekGT"); } + Insn::SeekLE { target_pc, .. } => { + resolve(target_pc, "SeekLE"); + } + Insn::SeekLT { target_pc, .. } => { + resolve(target_pc, "SeekLT"); + } Insn::IdxGE { target_pc, .. } => { resolve(target_pc, "IdxGE"); } diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 654e9a2c5..b282fa524 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -1892,97 +1892,69 @@ pub fn op_deferred_seek( Ok(InsnFunctionStepResult::Step) } -pub fn op_seek_ge( +pub fn op_seek( program: &Program, state: &mut ProgramState, insn: &Insn, pager: &Rc, mv_store: Option<&Rc>, ) -> Result { - let Insn::SeekGE { + let (Insn::SeekGE { cursor_id, start_reg, num_regs, target_pc, is_index, - } = insn - else { - unreachable!("unexpected Insn {:?}", insn) - }; - assert!(target_pc.is_offset()); - if *is_index { - let found = { - let mut cursor = state.get_cursor(*cursor_id); - let cursor = cursor.as_btree_mut(); - let record_from_regs = make_record(&state.registers, start_reg, num_regs); - let found = - return_if_io!(cursor.seek(SeekKey::IndexKey(&record_from_regs), SeekOp::GE)); - found - }; - if !found { - state.pc = target_pc.to_offset_int(); - } else { - state.pc += 1; - } - } else { - let pc = { - let mut cursor = state.get_cursor(*cursor_id); - let cursor = cursor.as_btree_mut(); - let rowid = match state.registers[*start_reg].get_owned_value() { - OwnedValue::Null => { - // All integer values are greater than null so we just rewind the cursor - return_if_io!(cursor.rewind()); - None - } - OwnedValue::Integer(rowid) => Some(*rowid as u64), - _ => { - return Err(LimboError::InternalError( - "SeekGE: the value in the register is not an integer".into(), - )); - } - }; - match rowid { - Some(rowid) => { - let found = return_if_io!(cursor.seek(SeekKey::TableRowId(rowid), SeekOp::GE)); - if !found { - target_pc.to_offset_int() - } else { - state.pc + 1 - } - } - None => state.pc + 1, - } - }; - state.pc = pc; } - Ok(InsnFunctionStepResult::Step) -} - -pub fn op_seek_gt( - program: &Program, - state: &mut ProgramState, - insn: &Insn, - pager: &Rc, - mv_store: Option<&Rc>, -) -> Result { - let Insn::SeekGT { + | Insn::SeekGT { cursor_id, start_reg, num_regs, target_pc, is_index, - } = insn + } + | Insn::SeekLE { + cursor_id, + start_reg, + num_regs, + target_pc, + is_index, + } + | Insn::SeekLT { + cursor_id, + start_reg, + num_regs, + target_pc, + is_index, + }) = insn else { unreachable!("unexpected Insn {:?}", insn) }; - assert!(target_pc.is_offset()); + assert!( + target_pc.is_offset(), + "target_pc should be an offset, is: {:?}", + target_pc + ); + let op = match insn { + Insn::SeekGE { .. } => SeekOp::GE, + Insn::SeekGT { .. } => SeekOp::GT, + Insn::SeekLE { .. } => SeekOp::LE, + Insn::SeekLT { .. } => SeekOp::LT, + _ => unreachable!("unexpected Insn {:?}", insn), + }; + let op_name = match op { + SeekOp::GE => "SeekGE", + SeekOp::GT => "SeekGT", + SeekOp::LE => "SeekLE", + SeekOp::LT => "SeekLT", + _ => unreachable!("unexpected SeekOp {:?}", op), + }; if *is_index { let found = { let mut cursor = state.get_cursor(*cursor_id); let cursor = cursor.as_btree_mut(); let record_from_regs = make_record(&state.registers, start_reg, num_regs); - let found = - return_if_io!(cursor.seek(SeekKey::IndexKey(&record_from_regs), SeekOp::GT)); + let found = return_if_io!(cursor.seek(SeekKey::IndexKey(&record_from_regs), op)); found }; if !found { @@ -2002,14 +1974,15 @@ pub fn op_seek_gt( } OwnedValue::Integer(rowid) => Some(*rowid as u64), _ => { - return Err(LimboError::InternalError( - "SeekGT: the value in the register is not an integer".into(), - )); + return Err(LimboError::InternalError(format!( + "{}: the value in the register is not an integer", + op_name + ))); } }; let found = match rowid { Some(rowid) => { - let found = return_if_io!(cursor.seek(SeekKey::TableRowId(rowid), SeekOp::GT)); + let found = return_if_io!(cursor.seek(SeekKey::TableRowId(rowid), op)); if !found { target_pc.to_offset_int() } else { diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index 66c68d9c0..550e6cb5c 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -736,23 +736,35 @@ pub fn insn_to_str( start_reg, num_regs: _, target_pc, - } => ( - "SeekGT", - *cursor_id as i32, - target_pc.to_debug_int(), - *start_reg as i32, - OwnedValue::build_text(""), - 0, - "".to_string(), - ), - Insn::SeekGE { + } + | Insn::SeekGE { + is_index: _, + cursor_id, + start_reg, + num_regs: _, + target_pc, + } + | Insn::SeekLE { + is_index: _, + cursor_id, + start_reg, + num_regs: _, + target_pc, + } + | Insn::SeekLT { is_index: _, cursor_id, start_reg, num_regs: _, target_pc, } => ( - "SeekGE", + match insn { + Insn::SeekGT { .. } => "SeekGT", + Insn::SeekGE { .. } => "SeekGE", + Insn::SeekLE { .. } => "SeekLE", + Insn::SeekLT { .. } => "SeekLT", + _ => unreachable!(), + }, *cursor_id as i32, target_pc.to_debug_int(), *start_reg as i32, @@ -1213,9 +1225,9 @@ pub fn insn_to_str( 0, "".to_string(), ), - Insn::LastAsync { .. } => ( + Insn::LastAsync { cursor_id } => ( "LastAsync", - 0, + *cursor_id as i32, 0, 0, OwnedValue::build_text(""), @@ -1240,27 +1252,27 @@ pub fn insn_to_str( 0, where_clause.clone(), ), - Insn::LastAwait { .. } => ( + Insn::LastAwait { cursor_id, .. } => ( "LastAwait", - 0, + *cursor_id as i32, 0, 0, OwnedValue::build_text(""), 0, "".to_string(), ), - Insn::PrevAsync { .. } => ( + Insn::PrevAsync { cursor_id } => ( "PrevAsync", - 0, + *cursor_id as i32, 0, 0, OwnedValue::build_text(""), 0, "".to_string(), ), - Insn::PrevAwait { .. } => ( + Insn::PrevAwait { cursor_id, .. } => ( "PrevAwait", - 0, + *cursor_id as i32, 0, 0, OwnedValue::build_text(""), diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 7fffb9b22..c02c78d6e 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -501,6 +501,30 @@ pub enum Insn { /// The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. /// If the P1 index entry is greater or equal than the key value then jump to P2. Otherwise fall through to the next instruction. + // If cursor_id refers to an SQL table (B-Tree that uses integer keys), use the value in start_reg as the key. + // If cursor_id refers to an SQL index, then start_reg is the first in an array of num_regs registers that are used as an unpacked index key. + // Seek to the first index entry that is less than or equal to the given key. If not found, jump to the given PC. Otherwise, continue to the next instruction. + SeekLE { + is_index: bool, + cursor_id: CursorID, + start_reg: usize, + num_regs: usize, + target_pc: BranchOffset, + }, + + // If cursor_id refers to an SQL table (B-Tree that uses integer keys), use the value in start_reg as the key. + // If cursor_id refers to an SQL index, then start_reg is the first in an array of num_regs registers that are used as an unpacked index key. + // Seek to the first index entry that is less than the given key. If not found, jump to the given PC. Otherwise, continue to the next instruction. + SeekLT { + is_index: bool, + cursor_id: CursorID, + start_reg: usize, + num_regs: usize, + target_pc: BranchOffset, + }, + + // The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. + // If the P1 index entry is greater or equal than the key value then jump to P2. Otherwise fall through to the next instruction. IdxGE { cursor_id: CursorID, start_reg: usize, @@ -1306,8 +1330,10 @@ impl Insn { Insn::SeekRowid { .. } => execute::op_seek_rowid, Insn::DeferredSeek { .. } => execute::op_deferred_seek, - Insn::SeekGE { .. } => execute::op_seek_ge, - Insn::SeekGT { .. } => execute::op_seek_gt, + Insn::SeekGE { .. } => execute::op_seek, + Insn::SeekGT { .. } => execute::op_seek, + Insn::SeekLE { .. } => execute::op_seek, + Insn::SeekLT { .. } => execute::op_seek, Insn::SeekEnd { .. } => execute::op_seek_end, Insn::IdxGE { .. } => execute::op_idx_ge, Insn::IdxGT { .. } => execute::op_idx_gt, diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 45e656032..cf6918304 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -561,7 +561,10 @@ fn get_indent_count(indent_count: usize, curr_insn: &Insn, prev_insn: Option<&In | Insn::LastAwait { .. } | Insn::SorterSort { .. } | Insn::SeekGE { .. } - | Insn::SeekGT { .. } => indent_count + 1, + | Insn::SeekGT { .. } + | Insn::SeekLE { .. } + | Insn::SeekLT { .. } => indent_count + 1, + _ => indent_count, } } else { From c9190236f031fb3c56dfc61c720174b5c015ca9f Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Tue, 8 Apr 2025 10:31:05 +0300 Subject: [PATCH 26/56] btree: support backwards index seeks and iteration --- core/storage/btree.rs | 461 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 430 insertions(+), 31 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index e29a9f8f7..8943d9e81 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -4,6 +4,7 @@ use crate::storage::pager::Pager; use crate::storage::sqlite3_ondisk::{ read_u32, read_varint, BTreeCell, PageContent, PageType, TableInteriorCell, TableLeafCell, }; +use crate::translate::plan::IterationDirection; use crate::MvCursor; use crate::types::{ @@ -312,6 +313,17 @@ enum OverflowState { Done, } +/// Iteration state of the cursor. Can only be set once. +/// Once a SeekGT or SeekGE is performed, the cursor must iterate forwards and calling prev() is an error. +/// Similarly, once a SeekLT or SeekLE is performed, the cursor must iterate backwards and calling next() is an error. +/// When a SeekEQ or SeekRowid is performed, the cursor is NOT allowed to iterate further. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum IterationState { + Unset, + Iterating(IterationDirection), + IterationNotAllowed, +} + pub struct BTreeCursor { /// The multi-version cursor that is used to read and write to the database file. mv_cursor: Option>>, @@ -337,6 +349,8 @@ pub struct BTreeCursor { /// Reusable immutable record, used to allow better allocation strategy. reusable_immutable_record: RefCell>, empty_record: Cell, + + iteration_state: IterationState, } /// Stack of pages representing the tree traversal order. @@ -385,6 +399,7 @@ impl BTreeCursor { }, reusable_immutable_record: RefCell::new(None), empty_record: Cell::new(true), + iteration_state: IterationState::Unset, } } @@ -404,7 +419,10 @@ impl BTreeCursor { /// Move the cursor to the previous record and return it. /// Used in backwards iteration. - fn get_prev_record(&mut self) -> Result>> { + fn get_prev_record( + &mut self, + predicate: Option<(SeekKey<'_>, SeekOp)>, + ) -> Result>> { loop { let page = self.stack.top(); let cell_idx = self.stack.current_cell_index(); @@ -418,6 +436,7 @@ impl BTreeCursor { break; } if self.stack.has_parent() { + self.going_upwards = true; self.stack.pop(); } else { // moved to begin of btree @@ -442,6 +461,19 @@ impl BTreeCursor { let contents = page.get().contents.as_ref().unwrap(); let cell_count = contents.cell_count(); + + // If we are at the end of the page and we haven't just come back from the right child, + // we now need to move to the rightmost child. + if cell_idx as i32 == i32::MAX && !self.going_upwards { + let rightmost_pointer = contents.rightmost_pointer(); + if let Some(rightmost_pointer) = rightmost_pointer { + self.stack + .push(self.pager.read_page(rightmost_pointer as usize)?); + self.stack.set_cell_index(i32::MAX); + continue; + } + } + let cell_idx = if cell_idx >= cell_count { self.stack.set_cell_index(cell_count as i32 - 1); cell_count - 1 @@ -484,8 +516,127 @@ impl BTreeCursor { self.stack.retreat(); return Ok(CursorResult::Ok(Some(_rowid))); } - BTreeCell::IndexInteriorCell(_) => todo!(), - BTreeCell::IndexLeafCell(_) => todo!(), + BTreeCell::IndexInteriorCell(IndexInteriorCell { + payload, + left_child_page, + first_overflow_page, + payload_size, + }) => { + if !self.going_upwards { + let mem_page = self.pager.read_page(left_child_page as usize)?; + self.stack.push(mem_page); + // use cell_index = i32::MAX to tell next loop to go to the end of the current page + self.stack.set_cell_index(i32::MAX); + continue; + } + if let Some(next_page) = first_overflow_page { + return_if_io!(self.process_overflow_read(payload, next_page, payload_size)) + } else { + crate::storage::sqlite3_ondisk::read_record( + payload, + self.get_immutable_record_or_create().as_mut().unwrap(), + )? + }; + + // Going upwards = we just moved to an interior cell from a leaf. + // On the first pass we must take the record from the interior cell (since unlike table btrees, index interior cells have payloads) + // We then mark going_upwards=false so that we go back down the tree on the next invocation. + self.going_upwards = false; + if predicate.is_none() { + let rowid = match self.get_immutable_record().as_ref().unwrap().last_value() + { + Some(RefValue::Integer(rowid)) => *rowid as u64, + _ => unreachable!("index cells should have an integer rowid"), + }; + return Ok(CursorResult::Ok(Some(rowid))); + } + + let (key, op) = predicate.as_ref().unwrap(); + let SeekKey::IndexKey(index_key) = key else { + unreachable!("index seek key should be a record"); + }; + let order = { + let record = self.get_immutable_record(); + let record = record.as_ref().unwrap(); + let record_values = record.get_values(); + let record_slice_same_num_cols = + &record_values[..index_key.get_values().len()]; + let order = + compare_immutable(record_slice_same_num_cols, index_key.get_values()); + order + }; + + let found = match op { + SeekOp::EQ => order.is_eq(), + SeekOp::LE => order.is_le(), + SeekOp::LT => order.is_lt(), + _ => unreachable!("Seek GT/GE should not happen in get_prev_record() because we are iterating backwards"), + }; + if found { + let rowid = match self.get_immutable_record().as_ref().unwrap().last_value() + { + Some(RefValue::Integer(rowid)) => *rowid as u64, + _ => unreachable!("index cells should have an integer rowid"), + }; + return Ok(CursorResult::Ok(Some(rowid))); + } else { + continue; + } + } + BTreeCell::IndexLeafCell(IndexLeafCell { + payload, + first_overflow_page, + payload_size, + }) => { + if let Some(next_page) = first_overflow_page { + return_if_io!(self.process_overflow_read(payload, next_page, payload_size)) + } else { + crate::storage::sqlite3_ondisk::read_record( + payload, + self.get_immutable_record_or_create().as_mut().unwrap(), + )? + }; + + self.stack.retreat(); + if predicate.is_none() { + let rowid = match self.get_immutable_record().as_ref().unwrap().last_value() + { + Some(RefValue::Integer(rowid)) => *rowid as u64, + _ => unreachable!("index cells should have an integer rowid"), + }; + return Ok(CursorResult::Ok(Some(rowid))); + } + let (key, op) = predicate.as_ref().unwrap(); + let SeekKey::IndexKey(index_key) = key else { + unreachable!("index seek key should be a record"); + }; + let order = { + let record = self.get_immutable_record(); + let record = record.as_ref().unwrap(); + let record_values = record.get_values(); + let record_slice_same_num_cols = + &record_values[..index_key.get_values().len()]; + let order = + compare_immutable(record_slice_same_num_cols, index_key.get_values()); + order + }; + let found = match op { + SeekOp::EQ => order.is_eq(), + SeekOp::LE => order.is_le(), + SeekOp::LT => order.is_lt(), + _ => unreachable!("Seek GT/GE should not happen in get_prev_record() because we are iterating backwards"), + }; + if found { + let rowid = match self.get_immutable_record().as_ref().unwrap().last_value() + { + Some(RefValue::Integer(rowid)) => *rowid as u64, + _ => unreachable!("index cells should have an integer rowid"), + }; + return Ok(CursorResult::Ok(Some(rowid))); + } else { + continue; + } + } } } } @@ -720,6 +871,7 @@ impl BTreeCursor { SeekOp::GT => order.is_gt(), SeekOp::GE => order.is_ge(), SeekOp::EQ => order.is_eq(), + _ => unreachable!("Seek LE/LT should not happen in get_next_record() because we are iterating forwards"), }; if found { let rowid = match self.get_immutable_record().as_ref().unwrap().last_value() @@ -771,6 +923,7 @@ impl BTreeCursor { SeekOp::GT => order.is_lt(), SeekOp::GE => order.is_le(), SeekOp::EQ => order.is_le(), + _ => todo!("not implemented: {:?}", op), }; if found { let rowid = match self.get_immutable_record().as_ref().unwrap().last_value() @@ -792,6 +945,35 @@ impl BTreeCursor { /// or e.g. find the first record greater than the seek key in a range query (e.g. SELECT * FROM table WHERE col > 10). /// We don't include the rowid in the comparison and that's why the last value from the record is not included. fn do_seek(&mut self, key: SeekKey<'_>, op: SeekOp) -> Result>> { + assert!( + self.iteration_state != IterationState::Unset, + "iteration state must have been set before do_seek() is called" + ); + let valid_op = match (self.iteration_state, op) { + (IterationState::Iterating(IterationDirection::Forwards), SeekOp::GE | SeekOp::GT) => { + true + } + (IterationState::Iterating(IterationDirection::Backwards), SeekOp::LE | SeekOp::LT) => { + true + } + (IterationState::IterationNotAllowed, SeekOp::EQ) => true, + _ => false, + }; + assert!( + valid_op, + "invalid seek op for iteration state: {:?} {:?}", + self.iteration_state, op + ); + let cell_iter_dir = match self.iteration_state { + IterationState::Iterating(IterationDirection::Forwards) + | IterationState::IterationNotAllowed => IterationDirection::Forwards, + IterationState::Iterating(IterationDirection::Backwards) => { + IterationDirection::Backwards + } + IterationState::Unset => { + unreachable!("iteration state must have been set before do_seek() is called"); + } + }; return_if_io!(self.move_to(key.clone(), op.clone())); { @@ -800,9 +982,27 @@ impl BTreeCursor { let contents = page.get().contents.as_ref().unwrap(); - for cell_idx in 0..contents.cell_count() { + let cell_count = contents.cell_count(); + let mut cell_idx: isize = if cell_iter_dir == IterationDirection::Forwards { + 0 + } else { + cell_count as isize - 1 + }; + let end = if cell_iter_dir == IterationDirection::Forwards { + cell_count as isize - 1 + } else { + 0 + }; + self.stack.set_cell_index(cell_idx as i32); + while cell_count > 0 + && (if cell_iter_dir == IterationDirection::Forwards { + cell_idx <= end + } else { + cell_idx >= end + }) + { let cell = contents.cell_get( - cell_idx, + cell_idx as usize, payload_overflow_threshold_max( contents.page_type(), self.usable_space() as u16, @@ -827,6 +1027,8 @@ impl BTreeCursor { SeekOp::GT => *cell_rowid > rowid_key, SeekOp::GE => *cell_rowid >= rowid_key, SeekOp::EQ => *cell_rowid == rowid_key, + SeekOp::LE => *cell_rowid <= rowid_key, + SeekOp::LT => *cell_rowid < rowid_key, }; if found { if let Some(next_page) = first_overflow_page { @@ -841,10 +1043,10 @@ impl BTreeCursor { self.get_immutable_record_or_create().as_mut().unwrap(), )? }; - self.stack.advance(); + self.stack.next(cell_iter_dir); return Ok(CursorResult::Ok(Some(*cell_rowid))); } else { - self.stack.advance(); + self.stack.next(cell_iter_dir); } } BTreeCell::IndexLeafCell(IndexLeafCell { @@ -869,14 +1071,17 @@ impl BTreeCursor { }; let record = self.get_immutable_record(); let record = record.as_ref().unwrap(); - let without_rowid = &record.get_values().as_slice()[..record.len() - 1]; - let order = without_rowid.cmp(index_key.get_values()); + let record_slice_equal_number_of_cols = + &record.get_values().as_slice()[..index_key.get_values().len()]; + let order = record_slice_equal_number_of_cols.cmp(index_key.get_values()); let found = match op { SeekOp::GT => order.is_gt(), SeekOp::GE => order.is_ge(), SeekOp::EQ => order.is_eq(), + SeekOp::LE => order.is_le(), + SeekOp::LT => order.is_lt(), }; - self.stack.advance(); + self.stack.next(cell_iter_dir); if found { let rowid = match record.last_value() { Some(RefValue::Integer(rowid)) => *rowid as u64, @@ -889,6 +1094,11 @@ impl BTreeCursor { unreachable!("unexpected cell type: {:?}", cell_type); } } + if cell_iter_dir == IterationDirection::Forwards { + cell_idx += 1; + } else { + cell_idx -= 1; + } } } @@ -909,7 +1119,20 @@ impl BTreeCursor { // if we were to return Ok(CursorResult::Ok((None, None))), self.record would be None, which is incorrect, because we already know // that there is a record with a key greater than K (K' = K+2) in the parent interior cell. Hence, we need to move back up the tree // and get the next matching record from there. - return self.get_next_record(Some((key, op))); + match self.iteration_state { + IterationState::Iterating(IterationDirection::Forwards) => { + return self.get_next_record(Some((key, op))); + } + IterationState::Iterating(IterationDirection::Backwards) => { + return self.get_prev_record(Some((key, op))); + } + IterationState::Unset => { + unreachable!("iteration state must not be unset"); + } + IterationState::IterationNotAllowed => { + unreachable!("iteration state must not be IterationNotAllowed"); + } + } } Ok(CursorResult::Ok(None)) @@ -994,7 +1217,7 @@ impl BTreeCursor { let mut found_cell = false; for cell_idx in 0..contents.cell_count() { - match &contents.cell_get( + let cell = contents.cell_get( cell_idx, payload_overflow_threshold_max( contents.page_type(), @@ -1005,18 +1228,61 @@ impl BTreeCursor { self.usable_space() as u16, ), self.usable_space(), - )? { + )?; + match &cell { BTreeCell::TableInteriorCell(TableInteriorCell { _left_child_page, - _rowid, + _rowid: cell_rowid, }) => { let SeekKey::TableRowId(rowid_key) = key else { unreachable!("table seek key should be a rowid"); }; - let target_leaf_page_is_in_left_subtree = match cmp { - SeekOp::GT => rowid_key < *_rowid, - SeekOp::GE => rowid_key <= *_rowid, - SeekOp::EQ => rowid_key <= *_rowid, + // in sqlite btrees left child pages have <= keys. + // table btrees can have a duplicate rowid in the interior cell, so for example if we are looking for rowid=10, + // and we find an interior cell with rowid=10, we need to move to the left page since (due to the <= rule of sqlite btrees) + // the left page may have a rowid=10. + // Logic table for determining if target leaf page is in left subtree + // + // Forwards iteration (looking for first match in tree): + // OP | Current Cell vs Seek Key | Action? | Explanation + // GT | > | go left | First > key is in left subtree + // GT | = or < | go right | First > key is in right subtree + // GE | > or = | go left | First >= key is in left subtree + // GE | < | go right | First >= key is in right subtree + // + // Backwards iteration (looking for last match in tree): + // OP | Current Cell vs Seek Key | Action? | Explanation + // LE | > or = | go left | Last <= key is in left subtree + // LE | < | go right | Last <= key is in right subtree + // LT | > or = | go left | Last < key is in left subtree + // LT | < | go right | Last < key is in right subtree + // + // No iteration (point query): + // EQ | > or = | go left | Last = key is in left subtree + // EQ | < | go right | Last = key is in right subtree + let target_leaf_page_is_in_left_subtree = match (self.iteration_state, cmp) + { + ( + IterationState::Iterating(IterationDirection::Forwards), + SeekOp::GT, + ) => *cell_rowid > rowid_key, + ( + IterationState::Iterating(IterationDirection::Forwards), + SeekOp::GE, + ) => *cell_rowid >= rowid_key, + ( + IterationState::Iterating(IterationDirection::Backwards), + SeekOp::LE, + ) => *cell_rowid >= rowid_key, + ( + IterationState::Iterating(IterationDirection::Backwards), + SeekOp::LT, + ) => *cell_rowid >= rowid_key, + (_any, SeekOp::EQ) => *cell_rowid >= rowid_key, + _ => unreachable!( + "invalid combination of seek op and iteration state: {:?} {:?}", + cmp, self.iteration_state + ), }; self.stack.advance(); if target_leaf_page_is_in_left_subtree { @@ -1057,16 +1323,75 @@ impl BTreeCursor { self.get_immutable_record_or_create().as_mut().unwrap(), )? }; - let order = compare_immutable( + let record = self.get_immutable_record(); + let record = record.as_ref().unwrap(); + let record_slice_equal_number_of_cols = + &record.get_values().as_slice()[..index_key.get_values().len()]; + let interior_cell_vs_index_key = compare_immutable( + record_slice_equal_number_of_cols, index_key.get_values(), - self.get_immutable_record().as_ref().unwrap().get_values(), ); - let target_leaf_page_is_in_the_left_subtree = match cmp { - SeekOp::GT => order.is_lt(), - SeekOp::GE => order.is_le(), - SeekOp::EQ => order.is_le(), + // in sqlite btrees left child pages have <= keys. + // in general, in forwards iteration we want to find the first key that matches the seek condition. + // in backwards iteration we want to find the last key that matches the seek condition. + // + // Logic table for determining if target leaf page is in left subtree. + // For index b-trees this is a bit more complicated since the interior cells contain payloads (the key is the payload). + // and for non-unique indexes there might be several cells with the same key. + // + // Forwards iteration (looking for first match in tree): + // OP | Current Cell vs Seek Key | Action? | Explanation + // GT | > | go left | First > key could be exactly this one, or in left subtree + // GT | = or < | go right | First > key must be in right subtree + // GE | > | go left | First >= key could be exactly this one, or in left subtree + // GE | = | go left | First >= key could be exactly this one, or in left subtree + // GE | < | go right | First >= key must be in right subtree + // + // Backwards iteration (looking for last match in tree): + // OP | Current Cell vs Seek Key | Action? | Explanation + // LE | > | go left | Last <= key must be in left subtree + // LE | = | go right | Last <= key is either this one, or somewhere to the right of this one. So we need to go right to make sure + // LE | < | go right | Last <= key must be in right subtree + // LT | > | go left | Last < key must be in left subtree + // LT | = | go left | Last < key must be in left subtree since we want strictly less than + // LT | < | go right | Last < key could be exactly this one, or in right subtree + // + // No iteration (point query): + // EQ | > | go left | First = key must be in left subtree + // EQ | = | go left | First = key could be exactly this one, or in left subtree + // EQ | < | go right | First = key must be in right subtree + assert!( + self.iteration_state != IterationState::Unset, + "iteration state must have been set before move_to() is called" + ); + + let target_leaf_page_is_in_left_subtree = match (cmp, self.iteration_state) + { + ( + SeekOp::GT, + IterationState::Iterating(IterationDirection::Forwards), + ) => interior_cell_vs_index_key.is_gt(), + ( + SeekOp::GE, + IterationState::Iterating(IterationDirection::Forwards), + ) => interior_cell_vs_index_key.is_ge(), + (SeekOp::EQ, IterationState::IterationNotAllowed) => { + interior_cell_vs_index_key.is_ge() + } + ( + SeekOp::LE, + IterationState::Iterating(IterationDirection::Backwards), + ) => interior_cell_vs_index_key.is_gt(), + ( + SeekOp::LT, + IterationState::Iterating(IterationDirection::Backwards), + ) => interior_cell_vs_index_key.is_ge(), + _ => unreachable!( + "invalid combination of seek op and iteration state: {:?} {:?}", + cmp, self.iteration_state + ), }; - if target_leaf_page_is_in_the_left_subtree { + if target_leaf_page_is_in_left_subtree { // we don't advance in case of index tree internal nodes because we will visit this node going up let mem_page = self.pager.read_page(*left_child_page as usize)?; self.stack.push(mem_page); @@ -2561,6 +2886,14 @@ impl BTreeCursor { } pub fn rewind(&mut self) -> Result> { + assert!( + matches!( + self.iteration_state, + IterationState::Unset | IterationState::Iterating(IterationDirection::Forwards) + ), + "iteration state must be unset or Iterating(Forwards) when rewind() is called" + ); + self.iteration_state = IterationState::Iterating(IterationDirection::Forwards); if self.mv_cursor.is_some() { let rowid = return_if_io!(self.get_next_record(None)); self.rowid.replace(rowid); @@ -2576,6 +2909,14 @@ impl BTreeCursor { } pub fn last(&mut self) -> Result> { + assert!( + matches!( + self.iteration_state, + IterationState::Unset | IterationState::Iterating(IterationDirection::Backwards) + ), + "iteration state must be unset or Iterating(Backwards) when last() is called" + ); + self.iteration_state = IterationState::Iterating(IterationDirection::Backwards); assert!(self.mv_cursor.is_none()); match self.move_to_rightmost()? { CursorResult::Ok(_) => self.prev(), @@ -2584,6 +2925,13 @@ impl BTreeCursor { } pub fn next(&mut self) -> Result> { + assert!( + matches!( + self.iteration_state, + IterationState::Iterating(IterationDirection::Forwards) + ), + "iteration state must be Iterating(Forwards) when next() is called" + ); let rowid = return_if_io!(self.get_next_record(None)); self.rowid.replace(rowid); self.empty_record.replace(rowid.is_none()); @@ -2591,8 +2939,15 @@ impl BTreeCursor { } pub fn prev(&mut self) -> Result> { + assert!( + matches!( + self.iteration_state, + IterationState::Iterating(IterationDirection::Backwards) + ), + "iteration state must be Iterating(Backwards) when prev() is called" + ); assert!(self.mv_cursor.is_none()); - match self.get_prev_record()? { + match self.get_prev_record(None)? { CursorResult::Ok(rowid) => { self.rowid.replace(rowid); self.empty_record.replace(rowid.is_none()); @@ -2617,6 +2972,38 @@ impl BTreeCursor { pub fn seek(&mut self, key: SeekKey<'_>, op: SeekOp) -> Result> { assert!(self.mv_cursor.is_none()); + match op { + SeekOp::GE | SeekOp::GT => { + if self.iteration_state == IterationState::Unset { + self.iteration_state = IterationState::Iterating(IterationDirection::Forwards); + } else { + assert!(matches!( + self.iteration_state, + IterationState::Iterating(IterationDirection::Forwards) + )); + } + } + SeekOp::LE | SeekOp::LT => { + if self.iteration_state == IterationState::Unset { + self.iteration_state = IterationState::Iterating(IterationDirection::Backwards); + } else { + assert!(matches!( + self.iteration_state, + IterationState::Iterating(IterationDirection::Backwards) + )); + } + } + SeekOp::EQ => { + if self.iteration_state == IterationState::Unset { + self.iteration_state = IterationState::IterationNotAllowed; + } else { + assert!(matches!( + self.iteration_state, + IterationState::IterationNotAllowed + )); + } + } + }; let rowid = return_if_io!(self.do_seek(key, op)); self.rowid.replace(rowid); self.empty_record.replace(rowid.is_none()); @@ -3010,7 +3397,7 @@ impl BTreeCursor { OwnedValue::Integer(i) => i, _ => unreachable!("btree tables are indexed by integers!"), }; - return_if_io!(self.move_to(SeekKey::TableRowId(*int_key as u64), SeekOp::EQ)); + let _ = return_if_io!(self.move_to(SeekKey::TableRowId(*int_key as u64), SeekOp::EQ)); let page = self.stack.top(); // TODO(pere): request load return_if_locked!(page); @@ -3501,6 +3888,18 @@ impl PageStack { self.cell_indices.borrow_mut()[current] -= 1; } + /// Move the cursor to the next cell in the current page according to the iteration direction. + fn next(&self, iteration_direction: IterationDirection) { + match iteration_direction { + IterationDirection::Forwards => { + self.advance(); + } + IterationDirection::Backwards => { + self.retreat(); + } + } + } + fn set_cell_index(&self, idx: i32) { let current = self.current(); self.cell_indices.borrow_mut()[current] = idx @@ -4767,7 +5166,7 @@ mod tests { run_until_done( || { let key = SeekKey::TableRowId(key as u64); - cursor.move_to(key, SeekOp::EQ) + cursor.seek(key, SeekOp::EQ) }, pager.deref(), ) @@ -5683,7 +6082,7 @@ mod tests { run_until_done( || { let key = SeekKey::TableRowId(i as u64); - cursor.move_to(key, SeekOp::EQ) + cursor.seek(key, SeekOp::EQ) }, pager.deref(), ) @@ -5763,7 +6162,7 @@ mod tests { run_until_done( || { let key = SeekKey::TableRowId(i as u64); - cursor.move_to(key, SeekOp::EQ) + cursor.seek(key, SeekOp::EQ) }, pager.deref(), ) @@ -5845,7 +6244,7 @@ mod tests { run_until_done( || { let key = SeekKey::TableRowId(i as u64); - cursor.move_to(key, SeekOp::EQ) + cursor.seek(key, SeekOp::EQ) }, pager.deref(), ) From a706b7160a17a8104de0af7f856c1e5c9164bc02 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Tue, 8 Apr 2025 10:32:31 +0300 Subject: [PATCH 27/56] planner: support index backwards seeks and iteration --- core/translate/delete.rs | 7 +- core/translate/main_loop.rs | 521 ++++++++++++++++++++++++------------ core/translate/plan.rs | 11 +- core/translate/planner.rs | 14 +- core/translate/update.rs | 24 +- 5 files changed, 386 insertions(+), 191 deletions(-) diff --git a/core/translate/delete.rs b/core/translate/delete.rs index 9652048fe..b8b92349d 100644 --- a/core/translate/delete.rs +++ b/core/translate/delete.rs @@ -7,7 +7,7 @@ use crate::vdbe::builder::{ProgramBuilder, ProgramBuilderOpts, QueryMode}; use crate::{schema::Schema, Result, SymbolTable}; use limbo_sqlite3_parser::ast::{Expr, Limit, QualifiedName}; -use super::plan::TableReference; +use super::plan::{IterationDirection, TableReference}; pub fn translate_delete( query_mode: QueryMode, @@ -53,7 +53,10 @@ pub fn prepare_delete_plan( let table_references = vec![TableReference { table, identifier: name, - op: Operation::Scan { iter_dir: None }, + op: Operation::Scan { + iter_dir: IterationDirection::Forwards, + index: None, + }, join_info: None, }]; diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 4773879e5..ab7ae1a0e 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -79,7 +79,7 @@ pub fn init_loop( } } match &table.op { - Operation::Scan { .. } => { + Operation::Scan { index, .. } => { let cursor_id = program.alloc_cursor_id( Some(table.identifier.clone()), match &table.table { @@ -90,6 +90,9 @@ pub fn init_loop( other => panic!("Invalid table reference type in Scan: {:?}", other), }, ); + let index_cursor_id = index.as_ref().map(|i| { + program.alloc_cursor_id(Some(i.name.clone()), CursorType::BTreeIndex(i.clone())) + }); match (mode, &table.table) { (OperationMode::SELECT, Table::BTree(btree)) => { let root_page = btree.root_page; @@ -98,6 +101,13 @@ pub fn init_loop( root_page, }); program.emit_insn(Insn::OpenReadAwait {}); + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn(Insn::OpenReadAsync { + cursor_id: index_cursor_id, + root_page: index.as_ref().unwrap().root_page, + }); + program.emit_insn(Insn::OpenReadAwait {}); + } } (OperationMode::DELETE, Table::BTree(btree)) => { let root_page = btree.root_page; @@ -113,6 +123,12 @@ pub fn init_loop( cursor_id, root_page: root_page.into(), }); + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn(Insn::OpenWriteAsync { + cursor_id: index_cursor_id, + root_page: index.as_ref().unwrap().root_page.into(), + }); + } program.emit_insn(Insn::OpenWriteAwait {}); } (OperationMode::SELECT, Table::Virtual(_)) => { @@ -282,36 +298,35 @@ pub fn open_loop( program.resolve_label(jump_target_when_true, program.offset()); } } - Operation::Scan { iter_dir } => { + Operation::Scan { iter_dir, index } => { let cursor_id = program.resolve_cursor_id(&table.identifier); - + let index_cursor_id = index.as_ref().map(|i| program.resolve_cursor_id(&i.name)); + let iteration_cursor_id = index_cursor_id.unwrap_or(cursor_id); if !matches!(&table.table, Table::Virtual(_)) { - if iter_dir - .as_ref() - .is_some_and(|dir| *dir == IterationDirection::Backwards) - { - program.emit_insn(Insn::LastAsync { cursor_id }); + if *iter_dir == IterationDirection::Backwards { + program.emit_insn(Insn::LastAsync { + cursor_id: iteration_cursor_id, + }); } else { - program.emit_insn(Insn::RewindAsync { cursor_id }); + program.emit_insn(Insn::RewindAsync { + cursor_id: iteration_cursor_id, + }); } } match &table.table { - Table::BTree(_) => program.emit_insn( - if iter_dir - .as_ref() - .is_some_and(|dir| *dir == IterationDirection::Backwards) - { + Table::BTree(_) => { + program.emit_insn(if *iter_dir == IterationDirection::Backwards { Insn::LastAwait { - cursor_id, + cursor_id: iteration_cursor_id, pc_if_empty: loop_end, } } else { Insn::RewindAwait { - cursor_id, + cursor_id: iteration_cursor_id, pc_if_empty: loop_end, } - }, - ), + }) + } Table::Virtual(ref table) => { let start_reg = program .alloc_registers(table.args.as_ref().map(|a| a.len()).unwrap_or(0)); @@ -337,6 +352,13 @@ pub fn open_loop( } program.resolve_label(loop_start, program.offset()); + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn(Insn::DeferredSeek { + index_cursor_id, + table_cursor_id: cursor_id, + }); + } + for cond in predicates .iter() .filter(|cond| cond.should_eval_at_loop(table_index)) @@ -361,139 +383,6 @@ pub fn open_loop( let table_cursor_id = program.resolve_cursor_id(&table.identifier); // Open the loop for the index search. // Rowid equality point lookups are handled with a SeekRowid instruction which does not loop, since it is a single row lookup. - if !matches!(search, Search::RowidEq { .. }) { - let index_cursor_id = if let Search::IndexSearch { index, .. } = search { - Some(program.resolve_cursor_id(&index.name)) - } else { - None - }; - let cmp_reg = program.alloc_register(); - let (cmp_expr, cmp_op) = match search { - Search::IndexSearch { - cmp_expr, cmp_op, .. - } => (cmp_expr, cmp_op), - Search::RowidSearch { cmp_expr, cmp_op } => (cmp_expr, cmp_op), - Search::RowidEq { .. } => unreachable!(), - }; - - // TODO this only handles ascending indexes - match cmp_op { - ast::Operator::Equals - | ast::Operator::Greater - | ast::Operator::GreaterEquals => { - translate_expr( - program, - Some(tables), - &cmp_expr.expr, - cmp_reg, - &t_ctx.resolver, - )?; - } - ast::Operator::Less | ast::Operator::LessEquals => { - program.emit_insn(Insn::Null { - dest: cmp_reg, - dest_end: None, - }); - } - _ => unreachable!(), - } - // If we try to seek to a key that is not present in the table/index, we exit the loop entirely. - program.emit_insn(match cmp_op { - ast::Operator::Equals | ast::Operator::GreaterEquals => Insn::SeekGE { - is_index: index_cursor_id.is_some(), - cursor_id: index_cursor_id.unwrap_or(table_cursor_id), - start_reg: cmp_reg, - num_regs: 1, - target_pc: loop_end, - }, - ast::Operator::Greater - | ast::Operator::Less - | ast::Operator::LessEquals => Insn::SeekGT { - is_index: index_cursor_id.is_some(), - cursor_id: index_cursor_id.unwrap_or(table_cursor_id), - start_reg: cmp_reg, - num_regs: 1, - target_pc: loop_end, - }, - _ => unreachable!(), - }); - if *cmp_op == ast::Operator::Less || *cmp_op == ast::Operator::LessEquals { - translate_expr( - program, - Some(tables), - &cmp_expr.expr, - cmp_reg, - &t_ctx.resolver, - )?; - } - - program.resolve_label(loop_start, program.offset()); - // TODO: We are currently only handling ascending indexes. - // For conditions like index_key > 10, we have already sought to the first key greater than 10, and can just scan forward. - // For conditions like index_key < 10, we are at the beginning of the index, and will scan forward and emit IdxGE(10) with a conditional jump to the end. - // For conditions like index_key = 10, we have already sought to the first key greater than or equal to 10, and can just scan forward and emit IdxGT(10) with a conditional jump to the end. - // For conditions like index_key >= 10, we have already sought to the first key greater than or equal to 10, and can just scan forward. - // For conditions like index_key <= 10, we are at the beginning of the index, and will scan forward and emit IdxGT(10) with a conditional jump to the end. - // For conditions like index_key != 10, TODO. probably the optimal way is not to use an index at all. - // - // For primary key searches we emit RowId and then compare it to the seek value. - - match cmp_op { - ast::Operator::Equals | ast::Operator::LessEquals => { - if let Some(index_cursor_id) = index_cursor_id { - program.emit_insn(Insn::IdxGT { - cursor_id: index_cursor_id, - start_reg: cmp_reg, - num_regs: 1, - target_pc: loop_end, - }); - } else { - let rowid_reg = program.alloc_register(); - program.emit_insn(Insn::RowId { - cursor_id: table_cursor_id, - dest: rowid_reg, - }); - program.emit_insn(Insn::Gt { - lhs: rowid_reg, - rhs: cmp_reg, - target_pc: loop_end, - flags: CmpInsFlags::default(), - }); - } - } - ast::Operator::Less => { - if let Some(index_cursor_id) = index_cursor_id { - program.emit_insn(Insn::IdxGE { - cursor_id: index_cursor_id, - start_reg: cmp_reg, - num_regs: 1, - target_pc: loop_end, - }); - } else { - let rowid_reg = program.alloc_register(); - program.emit_insn(Insn::RowId { - cursor_id: table_cursor_id, - dest: rowid_reg, - }); - program.emit_insn(Insn::Ge { - lhs: rowid_reg, - rhs: cmp_reg, - target_pc: loop_end, - flags: CmpInsFlags::default(), - }); - } - } - _ => {} - } - - if let Some(index_cursor_id) = index_cursor_id { - program.emit_insn(Insn::DeferredSeek { - index_cursor_id, - table_cursor_id, - }); - } - } - if let Search::RowidEq { cmp_expr } = search { let src_reg = program.alloc_register(); translate_expr( @@ -508,7 +397,280 @@ pub fn open_loop( src_reg, target_pc: next, }); + } else { + // Otherwise, it's an index/rowid scan, i.e. first a seek is performed and then a scan until the comparison expression is not satisfied anymore. + let index_cursor_id = if let Search::IndexSearch { index, .. } = search { + Some(program.resolve_cursor_id(&index.name)) + } else { + None + }; + let (cmp_expr, cmp_op, iter_dir) = match search { + Search::IndexSearch { + cmp_expr, + cmp_op, + iter_dir, + .. + } => (cmp_expr, cmp_op, iter_dir), + Search::RowidSearch { + cmp_expr, + cmp_op, + iter_dir, + } => (cmp_expr, cmp_op, iter_dir), + Search::RowidEq { .. } => unreachable!(), + }; + + // There are a few steps in an index seek: + // 1. Emit the comparison expression for the rowid/index seek. For example, if we a clause 'WHERE index_key >= 10', we emit the comparison expression 10 into cmp_reg. + // + // 2. Emit the seek instruction. SeekGE and SeekGT are used in forwards iteration, SeekLT and SeekLE are used in backwards iteration. + // All of the examples below assume an ascending index, because we do not support descending indexes yet. + // If we are scanning the ascending index: + // - Forwards, and have a GT/GE/EQ comparison, the comparison expression from step 1 is used as the value to seek to, because that is the lowest possible value that satisfies the clause. + // - Forwards, and have a LT/LE comparison, NULL is used as the comparison expression because we actually want to start scanning from the beginning of the index. + // - Backwards, and have a GT/GE comparison, no Seek instruction is emitted and we emit LastAsync instead, because we want to start scanning from the end of the index. + // - Backwards, and have a LT/LE/EQ comparison, we emit a Seek instruction with the comparison expression from step 1 as the value to seek to, since that is the highest possible + // value that satisfies the clause. + let seek_cmp_reg = program.alloc_register(); + let mut comparison_expr_translated = false; + match (cmp_op, iter_dir) { + // Forwards, GT/GE/EQ -> use the comparison expression (i.e. seek to the first key where the cmp expr is satisfied, and then scan forwards) + ( + ast::Operator::Equals + | ast::Operator::Greater + | ast::Operator::GreaterEquals, + IterationDirection::Forwards, + ) => { + translate_expr( + program, + Some(tables), + &cmp_expr.expr, + seek_cmp_reg, + &t_ctx.resolver, + )?; + comparison_expr_translated = true; + match cmp_op { + ast::Operator::Equals | ast::Operator::GreaterEquals => { + program.emit_insn(Insn::SeekGE { + is_index: index_cursor_id.is_some(), + cursor_id: index_cursor_id.unwrap_or(table_cursor_id), + start_reg: seek_cmp_reg, + num_regs: 1, + target_pc: loop_end, + }); + } + ast::Operator::Greater => { + program.emit_insn(Insn::SeekGT { + is_index: index_cursor_id.is_some(), + cursor_id: index_cursor_id.unwrap_or(table_cursor_id), + start_reg: seek_cmp_reg, + num_regs: 1, + target_pc: loop_end, + }); + } + _ => unreachable!(), + } + } + // Forwards, LT/LE -> use NULL (i.e. start from the beginning of the index) + ( + ast::Operator::Less | ast::Operator::LessEquals, + IterationDirection::Forwards, + ) => { + program.emit_insn(Insn::Null { + dest: seek_cmp_reg, + dest_end: None, + }); + program.emit_insn(Insn::SeekGT { + is_index: index_cursor_id.is_some(), + cursor_id: index_cursor_id.unwrap_or(table_cursor_id), + start_reg: seek_cmp_reg, + num_regs: 1, + target_pc: loop_end, + }); + } + // Backwards, GT/GE -> no seek, emit LastAsync (i.e. start from the end of the index) + ( + ast::Operator::Greater | ast::Operator::GreaterEquals, + IterationDirection::Backwards, + ) => { + program.emit_insn(Insn::LastAsync { + cursor_id: index_cursor_id.unwrap_or(table_cursor_id), + }); + program.emit_insn(Insn::LastAwait { + cursor_id: index_cursor_id.unwrap_or(table_cursor_id), + pc_if_empty: loop_end, + }); + } + // Backwards, LT/LE/EQ -> use the comparison expression (i.e. seek from the end of the index until the cmp expr is satisfied, and then scan backwards) + ( + ast::Operator::Less | ast::Operator::LessEquals | ast::Operator::Equals, + IterationDirection::Backwards, + ) => { + translate_expr( + program, + Some(tables), + &cmp_expr.expr, + seek_cmp_reg, + &t_ctx.resolver, + )?; + comparison_expr_translated = true; + match cmp_op { + ast::Operator::Less => { + program.emit_insn(Insn::SeekLT { + is_index: index_cursor_id.is_some(), + cursor_id: index_cursor_id.unwrap_or(table_cursor_id), + start_reg: seek_cmp_reg, + num_regs: 1, + target_pc: loop_end, + }); + } + ast::Operator::LessEquals | ast::Operator::Equals => { + program.emit_insn(Insn::SeekLE { + is_index: index_cursor_id.is_some(), + cursor_id: index_cursor_id.unwrap_or(table_cursor_id), + start_reg: seek_cmp_reg, + num_regs: 1, + target_pc: loop_end, + }); + } + _ => unreachable!(), + } + } + _ => unreachable!(), + }; + + program.resolve_label(loop_start, program.offset()); + + let scan_terminating_cmp_reg = if comparison_expr_translated { + seek_cmp_reg + } else { + let reg = program.alloc_register(); + translate_expr( + program, + Some(tables), + &cmp_expr.expr, + reg, + &t_ctx.resolver, + )?; + reg + }; + + // 3. Emit a scan-terminating comparison instruction (IdxGT, IdxGE, IdxLT, IdxLE if index; GT, GE, LT, LE if btree rowid scan). + // Here the comparison expression from step 1 is compared to the current index key and the loop is exited if the comparison is true. + // The comparison operator used in the Idx__ instruction is the inverse of the WHERE clause comparison operator. + // For example, if we are scanning forwards and have a clause 'WHERE index_key < 10', we emit IdxGE(10) since >=10 is the first key where our condition is not satisfied anymore. + match (cmp_op, iter_dir) { + // Forwards, <= -> terminate if > + ( + ast::Operator::Equals | ast::Operator::LessEquals, + IterationDirection::Forwards, + ) => { + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn(Insn::IdxGT { + cursor_id: index_cursor_id, + start_reg: scan_terminating_cmp_reg, + num_regs: 1, + target_pc: loop_end, + }); + } else { + let rowid_reg = program.alloc_register(); + program.emit_insn(Insn::RowId { + cursor_id: table_cursor_id, + dest: rowid_reg, + }); + program.emit_insn(Insn::Gt { + lhs: rowid_reg, + rhs: scan_terminating_cmp_reg, + target_pc: loop_end, + flags: CmpInsFlags::default(), + }); + } + } + // Forwards, < -> terminate if >= + (ast::Operator::Less, IterationDirection::Forwards) => { + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn(Insn::IdxGE { + cursor_id: index_cursor_id, + start_reg: scan_terminating_cmp_reg, + num_regs: 1, + target_pc: loop_end, + }); + } else { + let rowid_reg = program.alloc_register(); + program.emit_insn(Insn::RowId { + cursor_id: table_cursor_id, + dest: rowid_reg, + }); + program.emit_insn(Insn::Ge { + lhs: rowid_reg, + rhs: scan_terminating_cmp_reg, + target_pc: loop_end, + flags: CmpInsFlags::default(), + }); + } + } + // Backwards, >= -> terminate if < + ( + ast::Operator::Equals | ast::Operator::GreaterEquals, + IterationDirection::Backwards, + ) => { + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn(Insn::IdxLT { + cursor_id: index_cursor_id, + start_reg: scan_terminating_cmp_reg, + num_regs: 1, + target_pc: loop_end, + }); + } else { + let rowid_reg = program.alloc_register(); + program.emit_insn(Insn::RowId { + cursor_id: table_cursor_id, + dest: rowid_reg, + }); + program.emit_insn(Insn::Lt { + lhs: rowid_reg, + rhs: scan_terminating_cmp_reg, + target_pc: loop_end, + flags: CmpInsFlags::default(), + }); + } + } + // Backwards, > -> terminate if <= + (ast::Operator::Greater, IterationDirection::Backwards) => { + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn(Insn::IdxLE { + cursor_id: index_cursor_id, + start_reg: scan_terminating_cmp_reg, + num_regs: 1, + target_pc: loop_end, + }); + } else { + let rowid_reg = program.alloc_register(); + program.emit_insn(Insn::RowId { + cursor_id: table_cursor_id, + dest: rowid_reg, + }); + program.emit_insn(Insn::Le { + lhs: rowid_reg, + rhs: scan_terminating_cmp_reg, + target_pc: loop_end, + flags: CmpInsFlags::default(), + }); + } + } + // Forwards, > and >= -> we already did a seek to the first key where the cmp expr is satisfied, so we dont have a terminating condition + // Backwards, < and <= -> we already did a seek to the last key where the cmp expr is satisfied, so we dont have a terminating condition + _ => {} + } + + if let Some(index_cursor_id) = index_cursor_id { + // Don't do a btree table seek until it's actually necessary to read from the table. + program.emit_insn(Insn::DeferredSeek { + index_cursor_id, + table_cursor_id, + }); + } } + for cond in predicates .iter() .filter(|cond| cond.should_eval_at_loop(table_index)) @@ -813,30 +975,33 @@ pub fn close_loop( target_pc: loop_labels.loop_start, }); } - Operation::Scan { iter_dir, .. } => { + Operation::Scan { + index, iter_dir, .. + } => { program.resolve_label(loop_labels.next, program.offset()); + let cursor_id = program.resolve_cursor_id(&table.identifier); + let index_cursor_id = index.as_ref().map(|i| program.resolve_cursor_id(&i.name)); + let iteration_cursor_id = index_cursor_id.unwrap_or(cursor_id); match &table.table { Table::BTree(_) => { - if iter_dir - .as_ref() - .is_some_and(|dir| *dir == IterationDirection::Backwards) - { - program.emit_insn(Insn::PrevAsync { cursor_id }); + if *iter_dir == IterationDirection::Backwards { + program.emit_insn(Insn::PrevAsync { + cursor_id: iteration_cursor_id, + }); } else { - program.emit_insn(Insn::NextAsync { cursor_id }); + program.emit_insn(Insn::NextAsync { + cursor_id: iteration_cursor_id, + }); } - if iter_dir - .as_ref() - .is_some_and(|dir| *dir == IterationDirection::Backwards) - { + if *iter_dir == IterationDirection::Backwards { program.emit_insn(Insn::PrevAwait { - cursor_id, + cursor_id: iteration_cursor_id, pc_if_next: loop_labels.loop_start, }); } else { program.emit_insn(Insn::NextAwait { - cursor_id, + cursor_id: iteration_cursor_id, pc_if_next: loop_labels.loop_start, }); } @@ -854,17 +1019,29 @@ pub fn close_loop( program.resolve_label(loop_labels.next, program.offset()); // Rowid equality point lookups are handled with a SeekRowid instruction which does not loop, so there is no need to emit a NextAsync instruction. if !matches!(search, Search::RowidEq { .. }) { - let cursor_id = match search { - Search::IndexSearch { index, .. } => program.resolve_cursor_id(&index.name), - Search::RowidSearch { .. } => program.resolve_cursor_id(&table.identifier), + let (cursor_id, iter_dir) = match search { + Search::IndexSearch { + index, iter_dir, .. + } => (program.resolve_cursor_id(&index.name), *iter_dir), + Search::RowidSearch { iter_dir, .. } => { + (program.resolve_cursor_id(&table.identifier), *iter_dir) + } Search::RowidEq { .. } => unreachable!(), }; - program.emit_insn(Insn::NextAsync { cursor_id }); - program.emit_insn(Insn::NextAwait { - cursor_id, - pc_if_next: loop_labels.loop_start, - }); + if iter_dir == IterationDirection::Backwards { + program.emit_insn(Insn::PrevAsync { cursor_id }); + program.emit_insn(Insn::PrevAwait { + cursor_id, + pc_if_next: loop_labels.loop_start, + }); + } else { + program.emit_insn(Insn::NextAsync { cursor_id }); + program.emit_insn(Insn::NextAwait { + cursor_id, + pc_if_next: loop_labels.loop_start, + }); + } } } } diff --git a/core/translate/plan.rs b/core/translate/plan.rs index af14f0352..3958f9f81 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -259,12 +259,11 @@ pub struct TableReference { pub enum Operation { // Scan operation // This operation is used to scan a table. - // The iter_dir are uset to indicate the direction of the iterator. - // The use of Option for iter_dir is aimed at implementing a conservative optimization strategy: it only pushes - // iter_dir down to Scan when iter_dir is None, to prevent potential result set errors caused by multiple - // assignments. for more detailed discussions, please refer to https://github.com/tursodatabase/limbo/pull/376 + // The iter_dir is used to indicate the direction of the iterator. Scan { - iter_dir: Option, + iter_dir: IterationDirection, + /// The index that we are using to scan the table, if any. + index: Option>, }, // Search operation // This operation is used to search for a row in a table using an index @@ -337,12 +336,14 @@ pub enum Search { RowidSearch { cmp_op: ast::Operator, cmp_expr: WhereTerm, + iter_dir: IterationDirection, }, /// A secondary index search. Uses bytecode instructions like SeekGE, SeekGT etc. IndexSearch { index: Arc, cmp_op: ast::Operator, cmp_expr: WhereTerm, + iter_dir: IterationDirection, }, } diff --git a/core/translate/planner.rs b/core/translate/planner.rs index 1b78c954c..b7b8745b0 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -1,7 +1,7 @@ use super::{ plan::{ - Aggregate, EvalAt, JoinInfo, Operation, Plan, ResultSetColumn, SelectPlan, SelectQueryType, - TableReference, WhereTerm, + Aggregate, EvalAt, IterationDirection, JoinInfo, Operation, Plan, ResultSetColumn, + SelectPlan, SelectQueryType, TableReference, WhereTerm, }, select::prepare_select_plan, SymbolTable, @@ -320,7 +320,10 @@ fn parse_from_clause_table<'a>( )); }; scope.tables.push(TableReference { - op: Operation::Scan { iter_dir: None }, + op: Operation::Scan { + iter_dir: IterationDirection::Forwards, + index: None, + }, table: tbl_ref, identifier: alias.unwrap_or(normalized_qualified_name), join_info: None, @@ -399,7 +402,10 @@ fn parse_from_clause_table<'a>( .unwrap_or(normalized_name.to_string()); scope.tables.push(TableReference { - op: Operation::Scan { iter_dir: None }, + op: Operation::Scan { + iter_dir: IterationDirection::Forwards, + index: None, + }, join_info: None, table: Table::Virtual(vtab), identifier: alias, diff --git a/core/translate/update.rs b/core/translate/update.rs index f282fff24..347e36acb 100644 --- a/core/translate/update.rs +++ b/core/translate/update.rs @@ -13,7 +13,8 @@ use super::optimizer::optimize_plan; use super::plan::{ Direction, IterationDirection, Plan, ResultSetColumn, TableReference, UpdatePlan, }; -use super::planner::{bind_column_references, parse_limit, parse_where}; +use super::planner::bind_column_references; +use super::planner::{parse_limit, parse_where}; /* * Update is simple. By default we scan the table, and for each row, we check the WHERE @@ -72,18 +73,25 @@ pub fn prepare_update_plan(schema: &Schema, body: &mut Update) -> crate::Result< let Some(btree_table) = table.btree() else { bail_parse_error!("Error: {} is not a btree table", table_name); }; - let iter_dir: Option = body.order_by.as_ref().and_then(|order_by| { - order_by.first().and_then(|ob| { - ob.order.map(|o| match o { - SortOrder::Asc => IterationDirection::Forwards, - SortOrder::Desc => IterationDirection::Backwards, + let iter_dir = body + .order_by + .as_ref() + .and_then(|order_by| { + order_by.first().and_then(|ob| { + ob.order.map(|o| match o { + SortOrder::Asc => IterationDirection::Forwards, + SortOrder::Desc => IterationDirection::Backwards, + }) }) }) - }); + .unwrap_or(IterationDirection::Forwards); let table_references = vec![TableReference { table: Table::BTree(btree_table.clone()), identifier: table_name.0.clone(), - op: Operation::Scan { iter_dir }, + op: Operation::Scan { + iter_dir, + index: None, + }, join_info: None, }]; let set_clauses = body From 024c63f8080b46f846f1f338fbe7d9b497ec2bf5 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Tue, 8 Apr 2025 10:33:01 +0300 Subject: [PATCH 28/56] optimizer: remove ORDER BY if index can be used to satisfy the order --- core/translate/optimizer.rs | 191 +++++++++++++++++++++++------------- 1 file changed, 121 insertions(+), 70 deletions(-) diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 5321e0fa0..dd3455449 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -1,6 +1,6 @@ use std::{collections::HashMap, sync::Arc}; -use limbo_sqlite3_parser::ast; +use limbo_sqlite3_parser::ast::{self, Expr, SortOrder}; use crate::{ schema::{Index, Schema}, @@ -9,8 +9,8 @@ use crate::{ }; use super::plan::{ - DeletePlan, Direction, IterationDirection, Operation, Plan, Search, SelectPlan, TableReference, - UpdatePlan, WhereTerm, + DeletePlan, Direction, GroupBy, IterationDirection, Operation, Plan, Search, SelectPlan, + TableReference, UpdatePlan, WhereTerm, }; pub fn optimize_plan(plan: &mut Plan, schema: &Schema) -> Result<()> { @@ -40,10 +40,10 @@ fn optimize_select_plan(plan: &mut SelectPlan, schema: &Schema) -> Result<()> { &mut plan.table_references, &schema.indexes, &mut plan.where_clause, + &mut plan.order_by, + &plan.group_by, )?; - eliminate_unnecessary_orderby(plan, schema)?; - eliminate_orderby_like_groupby(plan)?; Ok(()) @@ -62,6 +62,8 @@ fn optimize_delete_plan(plan: &mut DeletePlan, schema: &Schema) -> Result<()> { &mut plan.table_references, &schema.indexes, &mut plan.where_clause, + &mut plan.order_by, + &None, )?; Ok(()) @@ -79,6 +81,8 @@ fn optimize_update_plan(plan: &mut UpdatePlan, schema: &Schema) -> Result<()> { &mut plan.table_references, &schema.indexes, &mut plan.where_clause, + &mut plan.order_by, + &None, )?; Ok(()) } @@ -93,33 +97,6 @@ fn optimize_subqueries(plan: &mut SelectPlan, schema: &Schema) -> Result<()> { Ok(()) } -fn query_is_already_ordered_by( - table_references: &[TableReference], - key: &mut ast::Expr, - available_indexes: &HashMap>>, -) -> Result { - let first_table = table_references.first(); - if first_table.is_none() { - return Ok(false); - } - let table_reference = first_table.unwrap(); - match &table_reference.op { - Operation::Scan { .. } => Ok(key.is_rowid_alias_of(0)), - Operation::Search(search) => match search { - Search::RowidEq { .. } => Ok(key.is_rowid_alias_of(0)), - Search::RowidSearch { .. } => Ok(key.is_rowid_alias_of(0)), - Search::IndexSearch { index, .. } => { - let index_rc = key.check_index_scan(0, table_reference, available_indexes)?; - let index_is_the_same = index_rc - .map(|irc| Arc::ptr_eq(index, &irc)) - .unwrap_or(false); - Ok(index_is_the_same) - } - }, - _ => Ok(false), - } -} - fn eliminate_orderby_like_groupby(plan: &mut SelectPlan) -> Result<()> { if plan.order_by.is_none() | plan.group_by.is_none() { return Ok(()); @@ -185,36 +162,117 @@ fn eliminate_orderby_like_groupby(plan: &mut SelectPlan) -> Result<()> { Ok(()) } -fn eliminate_unnecessary_orderby(plan: &mut SelectPlan, schema: &Schema) -> Result<()> { - if plan.order_by.is_none() { +fn eliminate_unnecessary_orderby( + table_references: &mut [TableReference], + available_indexes: &HashMap>>, + order_by: &mut Option>, + group_by: &Option, +) -> Result<()> { + let Some(order) = order_by else { return Ok(()); - } - if plan.table_references.is_empty() { + }; + let Some(first_table_reference) = table_references.first_mut() else { return Ok(()); - } - + }; + let Some(btree_table) = first_table_reference.btree() else { + return Ok(()); + }; // If GROUP BY clause is present, we can't rely on already ordered columns because GROUP BY reorders the data // This early return prevents the elimination of ORDER BY when GROUP BY exists, as sorting must be applied after grouping // And if ORDER BY clause duplicates GROUP BY we handle it later in fn eliminate_orderby_like_groupby - if plan.group_by.is_some() { + if group_by.is_some() { + return Ok(()); + } + let Operation::Scan { + index, iter_dir, .. + } = &mut first_table_reference.op + else { + return Ok(()); + }; + + assert!( + index.is_none(), + "Nothing shouldve transformed the scan to use an index yet" + ); + + // Special case: if ordering by just the rowid, we can remove the ORDER BY clause + if order.len() == 1 && order[0].0.is_rowid_alias_of(0) { + *iter_dir = match order[0].1 { + Direction::Ascending => IterationDirection::Forwards, + Direction::Descending => IterationDirection::Backwards, + }; + *order_by = None; return Ok(()); } - let o = plan.order_by.as_mut().unwrap(); + // Find the best matching index for the ORDER BY columns + let table_name = &btree_table.name; + let mut best_index = (None, 0); - if o.len() != 1 { - // TODO: handle multiple order by keys - return Ok(()); + for (_, indexes) in available_indexes.iter() { + for index_candidate in indexes.iter().filter(|i| &i.table_name == table_name) { + let matching_columns = index_candidate.columns.iter().enumerate().take_while(|(i, c)| { + if let Some((Expr::Column { table, column, .. }, _)) = order.get(*i) { + let col_idx_in_table = btree_table + .columns + .iter() + .position(|tc| tc.name.as_ref() == Some(&c.name)); + matches!(col_idx_in_table, Some(col_idx) if *table == 0 && *column == col_idx) + } else { + false + } + }).count(); + + if matching_columns > best_index.1 { + best_index = (Some(index_candidate), matching_columns); + } + } } - let (key, direction) = o.first_mut().unwrap(); + let Some(matching_index) = best_index.0 else { + return Ok(()); + }; + let match_count = best_index.1; - let already_ordered = - query_is_already_ordered_by(&plan.table_references, key, &schema.indexes)?; + // If we found a matching index, use it for scanning + *index = Some(matching_index.clone()); + // If the order by direction matches the index direction, we can iterate the index in forwards order. + // If they don't, we must iterate the index in backwards order. + let index_direction = &matching_index.columns.first().as_ref().unwrap().order; + *iter_dir = match (index_direction, order[0].1) { + (SortOrder::Asc, Direction::Ascending) | (SortOrder::Desc, Direction::Descending) => { + IterationDirection::Forwards + } + (SortOrder::Asc, Direction::Descending) | (SortOrder::Desc, Direction::Ascending) => { + IterationDirection::Backwards + } + }; - if already_ordered { - push_scan_direction(&mut plan.table_references[0], direction); - plan.order_by = None; + // If the index covers all ORDER BY columns, and one of the following applies: + // - the ORDER BY directions exactly match the index orderings, + // - the ORDER by directions are the exact opposite of the index orderings, + // we can remove the ORDER BY clause. + if match_count == order.len() { + let full_match = { + let mut all_match_forward = true; + let mut all_match_reverse = true; + for (i, (_, direction)) in order.iter().enumerate() { + match (&matching_index.columns[i].order, direction) { + (SortOrder::Asc, Direction::Ascending) + | (SortOrder::Desc, Direction::Descending) => { + all_match_reverse = false; + } + (SortOrder::Asc, Direction::Descending) + | (SortOrder::Desc, Direction::Ascending) => { + all_match_forward = false; + } + } + } + all_match_forward || all_match_reverse + }; + if full_match { + *order_by = None; + } } Ok(()) @@ -222,24 +280,25 @@ fn eliminate_unnecessary_orderby(plan: &mut SelectPlan, schema: &Schema) -> Resu /** * Use indexes where possible. - * Right now we make decisions about using indexes ONLY based on condition expressions, not e.g. ORDER BY or others. - * This is just because we are WIP. * * When this function is called, condition expressions from both the actual WHERE clause and the JOIN clauses are in the where_clause vector. * If we find a condition that can be used to index scan, we pop it off from the where_clause vector and put it into a Search operation. * We put it there simply because it makes it a bit easier to track during translation. + * + * In this function we also try to eliminate ORDER BY clauses if there is an index that satisfies the ORDER BY clause. */ fn use_indexes( table_references: &mut [TableReference], available_indexes: &HashMap>>, where_clause: &mut Vec, + order_by: &mut Option>, + group_by: &Option, ) -> Result<()> { - if where_clause.is_empty() { - return Ok(()); - } - + // Try to use indexes for eliminating ORDER BY clauses + eliminate_unnecessary_orderby(table_references, available_indexes, order_by, group_by)?; + // Try to use indexes for WHERE conditions 'outer: for (table_index, table_reference) in table_references.iter_mut().enumerate() { - if let Operation::Scan { .. } = &mut table_reference.op { + if let Operation::Scan { iter_dir, .. } = &table_reference.op { let mut i = 0; while i < where_clause.len() { let cond = where_clause.get_mut(i).unwrap(); @@ -248,6 +307,7 @@ fn use_indexes( table_index, table_reference, available_indexes, + iter_dir.clone(), )? { where_clause.remove(i); table_reference.op = Operation::Search(index_search); @@ -296,20 +356,6 @@ fn eliminate_constant_conditions( Ok(ConstantConditionEliminationResult::Continue) } -fn push_scan_direction(table: &mut TableReference, direction: &Direction) { - if let Operation::Scan { - ref mut iter_dir, .. - } = table.op - { - if iter_dir.is_none() { - match direction { - Direction::Ascending => *iter_dir = Some(IterationDirection::Forwards), - Direction::Descending => *iter_dir = Some(IterationDirection::Backwards), - } - } - } -} - fn rewrite_exprs_select(plan: &mut SelectPlan) -> Result<()> { for rc in plan.result_columns.iter_mut() { rewrite_expr(&mut rc.expr)?; @@ -611,6 +657,7 @@ pub fn try_extract_index_search_expression( table_index: usize, table_reference: &TableReference, available_indexes: &HashMap>>, + iter_dir: IterationDirection, ) -> Result> { if !cond.should_eval_at_loop(table_index) { return Ok(None); @@ -641,6 +688,7 @@ pub fn try_extract_index_search_expression( from_outer_join: cond.from_outer_join, eval_at: cond.eval_at, }, + iter_dir, })); } _ => {} @@ -671,6 +719,7 @@ pub fn try_extract_index_search_expression( from_outer_join: cond.from_outer_join, eval_at: cond.eval_at, }, + iter_dir, })); } _ => {} @@ -695,6 +744,7 @@ pub fn try_extract_index_search_expression( from_outer_join: cond.from_outer_join, eval_at: cond.eval_at, }, + iter_dir, })); } _ => {} @@ -719,6 +769,7 @@ pub fn try_extract_index_search_expression( from_outer_join: cond.from_outer_join, eval_at: cond.eval_at, }, + iter_dir, })); } _ => {} From fa295af635a68caf3d2a13c235c32dd9c60b77fd Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Tue, 8 Apr 2025 11:05:30 +0300 Subject: [PATCH 29/56] Fix insert fuzz test by bypassing internal invariant --- core/storage/btree.rs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 8943d9e81..39404ec8f 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -318,7 +318,7 @@ enum OverflowState { /// Similarly, once a SeekLT or SeekLE is performed, the cursor must iterate backwards and calling next() is an error. /// When a SeekEQ or SeekRowid is performed, the cursor is NOT allowed to iterate further. #[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum IterationState { +pub enum IterationState { Unset, Iterating(IterationDirection), IterationNotAllowed, @@ -350,7 +350,7 @@ pub struct BTreeCursor { reusable_immutable_record: RefCell>, empty_record: Cell, - iteration_state: IterationState, + pub iteration_state: IterationState, } /// Stack of pages representing the tree traversal order. @@ -2930,7 +2930,8 @@ impl BTreeCursor { self.iteration_state, IterationState::Iterating(IterationDirection::Forwards) ), - "iteration state must be Iterating(Forwards) when next() is called" + "iteration state must be Iterating(Forwards) when next() is called, but it was {:?}", + self.iteration_state ); let rowid = return_if_io!(self.get_next_record(None)); self.rowid.replace(rowid); @@ -5183,6 +5184,8 @@ mod tests { // FIXME: add sorted vector instead, should be okay for small amounts of keys for now :P, too lazy to fix right now keys.sort(); cursor.move_to_root(); + // hack to allow bypassing our internal invariant of not allowing cursor iteration after SeekOp::EQ + cursor.iteration_state = IterationState::Iterating(IterationDirection::Forwards); let mut valid = true; for key in keys.iter() { tracing::trace!("seeking key: {}", key); @@ -5194,6 +5197,7 @@ mod tests { break; } } + cursor.iteration_state = IterationState::Unset; // let's validate btree too so that we undertsand where the btree failed if matches!(validate_btree(pager.clone(), root_page), (_, false)) || !valid { let btree_after = format_btree(pager.clone(), root_page, 0); @@ -5211,6 +5215,8 @@ mod tests { } keys.sort(); cursor.move_to_root(); + // hack to allow bypassing our internal invariant of not allowing cursor iteration after SeekOp::EQ + cursor.iteration_state = IterationState::Iterating(IterationDirection::Forwards); for key in keys.iter() { tracing::trace!("seeking key: {}", key); run_until_done(|| cursor.next(), pager.deref()).unwrap(); From f5220d281df9d8c263720f1a22d471da6203c910 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Tue, 8 Apr 2025 14:57:26 +0300 Subject: [PATCH 30/56] Fix off-by-one logic in btree table traversal --- core/storage/btree.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 39404ec8f..97c1c5126 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1255,7 +1255,7 @@ impl BTreeCursor { // LE | > or = | go left | Last <= key is in left subtree // LE | < | go right | Last <= key is in right subtree // LT | > or = | go left | Last < key is in left subtree - // LT | < | go right | Last < key is in right subtree + // LT | < | go right?| Last < key is in right subtree, except if cell rowid is exactly 1 less // // No iteration (point query): // EQ | > or = | go left | Last = key is in left subtree @@ -1277,7 +1277,7 @@ impl BTreeCursor { ( IterationState::Iterating(IterationDirection::Backwards), SeekOp::LT, - ) => *cell_rowid >= rowid_key, + ) => *cell_rowid >= rowid_key || *cell_rowid == rowid_key - 1, (_any, SeekOp::EQ) => *cell_rowid >= rowid_key, _ => unreachable!( "invalid combination of seek op and iteration state: {:?} {:?}", From d9bae633c06836c8bc70014ef884ee499445a7e9 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Tue, 8 Apr 2025 14:59:39 +0300 Subject: [PATCH 31/56] Add rowid_seek_fuzz() test --- tests/integration/fuzz/mod.rs | 47 +++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/tests/integration/fuzz/mod.rs b/tests/integration/fuzz/mod.rs index eeed31698..5df3b49b4 100644 --- a/tests/integration/fuzz/mod.rs +++ b/tests/integration/fuzz/mod.rs @@ -107,6 +107,53 @@ mod tests { } } + #[test] + pub fn rowid_seek_fuzz() { + let db = TempDatabase::new_with_rusqlite("CREATE TABLE t(x INTEGER PRIMARY KEY)"); // INTEGER PRIMARY KEY is a rowid alias, so an index is not created + let sqlite_conn = rusqlite::Connection::open(db.path.clone()).unwrap(); + + let insert = format!( + "INSERT INTO t VALUES {}", + (1..10000) + .map(|x| format!("({})", x)) + .collect::>() + .join(", ") + ); + sqlite_conn.execute(&insert, params![]).unwrap(); + sqlite_conn.close().unwrap(); + let sqlite_conn = rusqlite::Connection::open(db.path.clone()).unwrap(); + let limbo_conn = db.connect_limbo(); + + const COMPARISONS: [&str; 4] = ["<", "<=", ">", ">="]; + const ORDER_BY: [Option<&str>; 4] = [ + None, + Some("ORDER BY x"), + Some("ORDER BY x DESC"), + Some("ORDER BY x ASC"), + ]; + + for comp in COMPARISONS.iter() { + for order_by in ORDER_BY.iter() { + for max in 0..=10000 { + let query = format!( + "SELECT * FROM t WHERE x {} {} {} LIMIT 3", + comp, + max, + order_by.unwrap_or("") + ); + log::trace!("query: {}", query); + let limbo = limbo_exec_rows(&db, &limbo_conn, &query); + let sqlite = sqlite_exec_rows(&sqlite_conn, &query); + assert_eq!( + limbo, sqlite, + "query: {}, limbo: {:?}, sqlite: {:?}", + query, limbo, sqlite + ); + } + } + } + } + #[test] pub fn index_scan_fuzz() { let db = TempDatabase::new_with_rusqlite("CREATE TABLE t(x PRIMARY KEY)"); From 0bb87b060a87b2b166d956a38e672f4c40b6d572 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Tue, 8 Apr 2025 17:05:52 +0300 Subject: [PATCH 32/56] Fix existing table btree backwards iteration logic --- core/storage/btree.rs | 67 +++++++++++++++++++++++++++++++------------ 1 file changed, 49 insertions(+), 18 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 97c1c5126..028e60206 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -431,8 +431,7 @@ impl BTreeCursor { // todo: find a better way to flag moved to end or begin of page if self.stack.current_cell_index_less_than_min() { loop { - if self.stack.current_cell_index() > 0 { - self.stack.retreat(); + if self.stack.current_cell_index() >= 0 { break; } if self.stack.has_parent() { @@ -448,11 +447,6 @@ impl BTreeCursor { } let cell_idx = cell_idx as usize; - tracing::trace!( - "get_prev_record current id={} cell={}", - page.get().id, - cell_idx - ); return_if_locked!(page); if !page.is_loaded() { self.pager.load_page(page.clone())?; @@ -468,8 +462,7 @@ impl BTreeCursor { let rightmost_pointer = contents.rightmost_pointer(); if let Some(rightmost_pointer) = rightmost_pointer { self.stack - .push(self.pager.read_page(rightmost_pointer as usize)?); - self.stack.set_cell_index(i32::MAX); + .push_backwards(self.pager.read_page(rightmost_pointer as usize)?); continue; } } @@ -480,7 +473,6 @@ impl BTreeCursor { } else { cell_idx }; - let cell = contents.cell_get( cell_idx, payload_overflow_threshold_max(contents.page_type(), self.usable_space() as u16), @@ -494,9 +486,7 @@ impl BTreeCursor { _rowid, }) => { let mem_page = self.pager.read_page(_left_child_page as usize)?; - self.stack.push(mem_page); - // use cell_index = i32::MAX to tell next loop to go to the end of the current page - self.stack.set_cell_index(i32::MAX); + self.stack.push_backwards(mem_page); continue; } BTreeCell::TableLeafCell(TableLeafCell { @@ -523,6 +513,14 @@ impl BTreeCursor { payload_size, }) => { if !self.going_upwards { + // In backwards iteration, if we haven't just moved to this interior node from the + // right child, but instead are about to move to the left child, we need to retreat + // so that we don't come back to this node again. + // For example: + // this parent: key 666 + // left child has: key 663, key 664, key 665 + // we need to move to the previous parent (with e.g. key 662) when iterating backwards. + self.stack.retreat(); let mem_page = self.pager.read_page(left_child_page as usize)?; self.stack.push(mem_page); // use cell_index = i32::MAX to tell next loop to go to the end of the current page @@ -538,7 +536,7 @@ impl BTreeCursor { )? }; - // Going upwards = we just moved to an interior cell from a leaf. + // Going upwards = we just moved to an interior cell from the right child. // On the first pass we must take the record from the interior cell (since unlike table btrees, index interior cells have payloads) // We then mark going_upwards=false so that we go back down the tree on the next invocation. self.going_upwards = false; @@ -1206,6 +1204,13 @@ impl BTreeCursor { // 6. If we find the cell, we return the record. Otherwise, we return an empty result. self.move_to_root(); + let iter_dir = match self.iteration_state { + IterationState::Iterating(IterationDirection::Backwards) => { + IterationDirection::Backwards + } + _ => IterationDirection::Forwards, + }; + loop { let page = self.stack.top(); return_if_locked!(page); @@ -1284,12 +1289,22 @@ impl BTreeCursor { cmp, self.iteration_state ), }; - self.stack.advance(); if target_leaf_page_is_in_left_subtree { + // If we found our target rowid in the left subtree, + // we need to move the parent cell pointer forwards or backwards depending on the iteration direction. + // For example: since the internal node contains the max rowid of the left subtree, we need to move the + // parent pointer backwards in backwards iteration so that we don't come back to the parent again. + // E.g. + // this parent: rowid 666 + // left child has: 664,665,666 + // we need to move to the previous parent (with e.g. rowid 663) when iterating backwards. + self.stack.next(iter_dir); let mem_page = self.pager.read_page(*_left_child_page as usize)?; self.stack.push(mem_page); found_cell = true; break; + } else { + self.stack.advance(); } } BTreeCell::TableLeafCell(TableLeafCell { @@ -1392,7 +1407,15 @@ impl BTreeCursor { ), }; if target_leaf_page_is_in_left_subtree { - // we don't advance in case of index tree internal nodes because we will visit this node going up + // we don't advance in case of forward iteration and index tree internal nodes because we will visit this node going up. + // in backwards iteration, we must retreat because otherwise we would unnecessarily visit this node again. + // Example: + // this parent: key 666, and we found the target key in the left child. + // left child has: key 663, key 664, key 665 + // we need to move to the previous parent (with e.g. key 662) when iterating backwards so that we don't end up back here again. + if iter_dir == IterationDirection::Backwards { + self.stack.retreat(); + } let mem_page = self.pager.read_page(*left_child_page as usize)?; self.stack.push(mem_page); found_cell = true; @@ -3816,7 +3839,7 @@ impl PageStack { } /// Push a new page onto the stack. /// This effectively means traversing to a child page. - fn push(&self, page: PageRef) { + fn _push(&self, page: PageRef, starting_cell_idx: i32) { tracing::trace!( "pagestack::push(current={}, new_page_id={})", self.current_page.get(), @@ -3829,7 +3852,15 @@ impl PageStack { "corrupted database, stack is bigger than expected" ); self.stack.borrow_mut()[current as usize] = Some(page); - self.cell_indices.borrow_mut()[current as usize] = 0; + self.cell_indices.borrow_mut()[current as usize] = starting_cell_idx; + } + + fn push(&self, page: PageRef) { + self._push(page, 0); + } + + fn push_backwards(&self, page: PageRef) { + self._push(page, i32::MAX); } /// Pop a page off the stack. From 3124fca5b72c72d13ea866ab1f6524a3c6482d1a Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Tue, 8 Apr 2025 17:15:57 +0300 Subject: [PATCH 33/56] Dereference instead of explicit clone --- core/translate/optimizer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index dd3455449..772ed81e7 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -307,7 +307,7 @@ fn use_indexes( table_index, table_reference, available_indexes, - iter_dir.clone(), + *iter_dir, )? { where_clause.remove(i); table_reference.op = Operation::Search(index_search); From 5e3a37a1921e74e9dc7f417731f927b9d17d02d2 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Tue, 8 Apr 2025 17:25:07 +0300 Subject: [PATCH 34/56] Try to name iteration direction sensitive method better --- core/storage/btree.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 028e60206..9d487243d 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1041,10 +1041,10 @@ impl BTreeCursor { self.get_immutable_record_or_create().as_mut().unwrap(), )? }; - self.stack.next(cell_iter_dir); + self.stack.next_cell_in_direction(cell_iter_dir); return Ok(CursorResult::Ok(Some(*cell_rowid))); } else { - self.stack.next(cell_iter_dir); + self.stack.next_cell_in_direction(cell_iter_dir); } } BTreeCell::IndexLeafCell(IndexLeafCell { @@ -1079,7 +1079,7 @@ impl BTreeCursor { SeekOp::LE => order.is_le(), SeekOp::LT => order.is_lt(), }; - self.stack.next(cell_iter_dir); + self.stack.next_cell_in_direction(cell_iter_dir); if found { let rowid = match record.last_value() { Some(RefValue::Integer(rowid)) => *rowid as u64, @@ -1298,7 +1298,7 @@ impl BTreeCursor { // this parent: rowid 666 // left child has: 664,665,666 // we need to move to the previous parent (with e.g. rowid 663) when iterating backwards. - self.stack.next(iter_dir); + self.stack.next_cell_in_direction(iter_dir); let mem_page = self.pager.read_page(*_left_child_page as usize)?; self.stack.push(mem_page); found_cell = true; @@ -3921,7 +3921,7 @@ impl PageStack { } /// Move the cursor to the next cell in the current page according to the iteration direction. - fn next(&self, iteration_direction: IterationDirection) { + fn next_cell_in_direction(&self, iteration_direction: IterationDirection) { match iteration_direction { IterationDirection::Forwards => { self.advance(); From 0888c71ba08756ba48cbdf99c550b49fcc2017d8 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Wed, 9 Apr 2025 10:26:02 +0300 Subject: [PATCH 35/56] use seek() instead of do_seek() to set iteration state --- core/storage/btree.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 9d487243d..dac04d960 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -3390,7 +3390,7 @@ impl BTreeCursor { /// Search for a key in an Index Btree. Looking up indexes that need to be unique, we cannot compare the rowid pub fn key_exists_in_index(&mut self, key: &ImmutableRecord) -> Result> { - return_if_io!(self.do_seek(SeekKey::IndexKey(key), SeekOp::GE)); + return_if_io!(self.seek(SeekKey::IndexKey(key), SeekOp::GE)); let record_opt = self.record(); match record_opt.as_ref() { From edc3a420fbeb7f46d2dc789dabdcc28b79d31bd1 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Wed, 9 Apr 2025 11:02:49 +0200 Subject: [PATCH 36/56] comment how page count is decreased while balancing --- core/storage/btree.rs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index befb43189..b843440cc 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1657,7 +1657,6 @@ impl BTreeCursor { while new_page_sizes[i] > usable_space as i64 { let needs_new_page = i + 1 >= sibling_count_new; if needs_new_page { - // FIXME: this doesn't remove pages if not needed sibling_count_new += 1; new_page_sizes.push(0); cell_array @@ -1715,16 +1714,24 @@ impl BTreeCursor { new_page_sizes[i + 1] -= size_of_cell_to_remove_from_right; } - let we_still_need_another_page = + // Check if this page contains up to the last cell. If this happens it means we really just need up to this page. + // Let's update the number of new pages to be up to this page (i+1) + let page_completes_all_cells = cell_array.number_of_cells_per_page[i] >= cell_array.cells.len() as u16; - if we_still_need_another_page { + if page_completes_all_cells { sibling_count_new = i + 1; + break; } i += 1; if i >= sibling_count_new { break; } } + new_page_sizes.truncate(sibling_count_new); + cell_array + .number_of_cells_per_page + .truncate(sibling_count_new); + tracing::debug!( "balance_non_root(sibling_count={}, sibling_count_new={}, cells={})", balance_info.sibling_count, From f1df09ffd9cfda029f05b385eac5d61d6fef93ae Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Wed, 9 Apr 2025 11:05:41 +0200 Subject: [PATCH 37/56] free no longer used pages after balance --- core/storage/btree.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index b843440cc..5030abf03 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -2045,7 +2045,11 @@ impl BTreeCursor { rightmost_pointer, ); // TODO: balance root - // TODO: free pages + // We have to free pages that are not used anymore + for i in sibling_count_new..balance_info.sibling_count { + let page = &balance_info.pages_to_balance[i]; + self.pager.free_page(Some(page.clone()), page.get().id)?; + } (WriteState::BalanceStart, Ok(CursorResult::Ok(()))) } WriteState::Finish => todo!(), From d9453f6e0695f673737303fbd2c025a47aacd194 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Wed, 9 Apr 2025 15:01:18 +0200 Subject: [PATCH 38/56] fix `cell_get_raw_region` length calculation --- core/storage/sqlite3_ondisk.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index d48f5b61b..b8373514f 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -626,9 +626,9 @@ impl PageContent { usable_size, ); if overflows { - 4 + to_read + n_payload + 4 + 4 + to_read + n_payload } else { - 4 + len_payload as usize + n_payload + 4 + 4 + len_payload as usize + n_payload } } PageType::TableInterior => { @@ -644,9 +644,9 @@ impl PageContent { usable_size, ); if overflows { - to_read + n_payload + 4 + to_read + n_payload } else { - len_payload as usize + n_payload + 4 + len_payload as usize + n_payload } } PageType::TableLeaf => { From 12899034c9c814ce01fb0e8cb95c3969a38b5e0f Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Wed, 9 Apr 2025 15:01:40 +0200 Subject: [PATCH 39/56] make insert idx re-entrant --- core/storage/btree.rs | 7 +++++++ core/vdbe/execute.rs | 12 +++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 0446b95ec..cd97412ab 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -3885,6 +3885,13 @@ impl BTreeCursor { fn get_immutable_record(&self) -> std::cell::RefMut<'_, Option> { self.reusable_immutable_record.borrow_mut() } + + pub fn is_write_in_progress(&self) -> bool { + match self.state { + CursorState::Write(_) => true, + _ => false, + } + } } #[cfg(debug_assertions)] diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index b282fa524..b82b13f3b 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -3761,7 +3761,17 @@ pub fn op_idx_insert_async( } else { flags.has(IdxInsertFlags::USE_SEEK) }; - // insert record as key + + // To make this reentrant in case of `moved_before` = false, we need to check if the previous cursor.insert started + // a write/balancing operation. If it did, it means we already moved to the place we wanted. + let moved_before = if cursor.is_write_in_progress() { + true + } else { + moved_before + }; + // Start insertion of row. This might trigger a balance procedure which will take care of moving to different pages, + // therefore, we don't want to seek again if that happens, meaning we don't want to return on io without moving to `Await` opcode + // because it could trigger a movement to child page after a balance root which will leave the current page as the root page. return_if_io!(cursor.insert(&BTreeKey::new_index_key(record), moved_before)); } state.pc += 1; From f2d9e1e8f55d40eb6a7ae916945460e68c129187 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Wed, 9 Apr 2025 15:02:24 +0200 Subject: [PATCH 40/56] fix divider cell in index --- core/storage/btree.rs | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index cd97412ab..9e9d6800c 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1928,7 +1928,7 @@ impl BTreeCursor { if i < balance_info.pages_to_balance.len() - 1 && !leaf_data { // If we are a index page or a interior table page we need to take the divider cell too. // But we don't need the last divider as it will remain the same. - let divider_cell = &mut balance_info.divider_cells[i]; + let mut divider_cell = balance_info.divider_cells[i].as_mut_slice(); // TODO(pere): in case of old pages are leaf pages, so index leaf page, we need to strip page pointers // from divider cells in index interior pages (parent) because those should not be included. cells_inserted += 1; @@ -1936,8 +1936,13 @@ impl BTreeCursor { // This divider cell needs to be updated with new left pointer, let right_pointer = old_page_contents.rightmost_pointer().unwrap(); divider_cell[..4].copy_from_slice(&right_pointer.to_be_bytes()); + } else { + // index leaf + assert!(divider_cell.len() >= 4); + // let's strip the page pointer + divider_cell = &mut divider_cell[4..]; } - cell_array.cells.push(to_static_buf(divider_cell.as_mut())); + cell_array.cells.push(to_static_buf(divider_cell)); } total_cells_inserted += cells_inserted; } @@ -1955,6 +1960,9 @@ impl BTreeCursor { { for cell in &cell_array.cells { cells_debug.push(cell.to_vec()); + if leaf { + assert!(cell[0] != 0) + } } } @@ -2778,7 +2786,17 @@ impl BTreeCursor { valid = false; } } - PageType::IndexLeaf => todo!(), + PageType::IndexLeaf => { + let parent_cell_buf = + &parent_buf[parent_cell_start..parent_cell_start + parent_cell_len]; + if parent_cell_buf[4..] != cell_buf_in_array[..] { + tracing::error!("balance_non_root(cell_divider_cell_index_leaf, page_id={}, cell_divider_idx={})", + page.get().id, + cell_divider_idx, + ); + valid = false; + } + } _ => { unreachable!() } From 6b7575bf3f2b01852ddae77dcce034e1d39df01a Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Wed, 9 Apr 2025 15:03:23 +0200 Subject: [PATCH 41/56] fix tree traversal assumptions on traversal --- core/storage/btree.rs | 46 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 9e9d6800c..95dcad520 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1630,7 +1630,12 @@ impl BTreeCursor { let write_info = self.state.mut_write_info().unwrap(); write_info.state = WriteState::BalanceNonRoot; self.stack.pop(); - self.stack.retreat(); + // with `move_to` we advance the current cell idx of TableInterior once we move to left subtree. + // On the other hand, with IndexInterior, we do not because we tranver in-order. In the latter case + // since we haven't consumed the cell we can avoid retreating the current cell index. + if matches!(current_page.get_contents().page_type(), PageType::TableLeaf) { + self.stack.retreat(); + } return_if_io!(self.balance_non_root()); } WriteState::BalanceNonRoot | WriteState::BalanceNonRootWaitLoadPages => { @@ -1682,7 +1687,12 @@ impl BTreeCursor { parent_contents.overflow_cells.is_empty(), "balancing child page with overflowed parent not yet implemented" ); - assert!(page_to_balance_idx <= parent_contents.cell_count()); + assert!( + page_to_balance_idx <= parent_contents.cell_count(), + "page_to_balance_idx={} is out of bounds for parent cell count {}", + page_to_balance_idx, + number_of_cells_in_parent + ); // As there will be at maximum 3 pages used to balance: // sibling_pointer is the index represeneting one of those 3 pages, and we initialize it to the last possible page. // next_divider is the first divider that contains the first page of the 3 pages. @@ -1813,6 +1823,7 @@ impl BTreeCursor { // Now do real balancing let parent_page = self.stack.top(); let parent_contents = parent_page.get_contents(); + assert!( parent_contents.overflow_cells.is_empty(), "overflow parent not yet implemented" @@ -2259,6 +2270,7 @@ impl BTreeCursor { write_varint_to_vec(rowid, &mut new_divider_cell); } else { // Leaf index + new_divider_cell.extend_from_slice(&(page.get().id as u32).to_be_bytes()); new_divider_cell.extend_from_slice(divider_cell); } @@ -2855,7 +2867,10 @@ impl BTreeCursor { child_buf[0..root_contents.header_size()] .copy_from_slice(&root_buf[offset..offset + root_contents.header_size()]); // Copy overflow cells - child_contents.overflow_cells = root_contents.overflow_cells.clone(); + std::mem::swap( + &mut child_contents.overflow_cells, + &mut root_contents.overflow_cells, + ); // 2. Modify root let new_root_page_type = match root_contents.page_type() { @@ -2878,7 +2893,9 @@ impl BTreeCursor { self.root_page = root.get().id; self.stack.clear(); self.stack.push(root.clone()); - self.stack.advance(); + if matches!(root_contents.page_type(), PageType::TableInterior) { + self.stack.advance(); + } self.stack.push(child.clone()); } @@ -2933,6 +2950,7 @@ impl BTreeCursor { } cell_idx += 1; } + assert!(cell_idx <= cell_count); cell_idx } @@ -4449,10 +4467,21 @@ fn debug_validate_cells_core(page: &PageContent, usable_space: u16) { payload_overflow_threshold_min(page.page_type(), usable_space), usable_space as usize, ); + let buf = &page.as_ptr()[offset..offset + size]; + assert!( + size >= 4, + "cell size should be at least 4 bytes idx={}, cell={:?}, offset={}", + i, + buf, + offset + ); if page.is_leaf() { assert!(page.as_ptr()[offset] != 0); } - assert!(size >= 4, "cell size should be at least 4 bytes idx={}", i); + assert!( + offset + size <= usable_space as usize, + "cell spans out of usable space" + ); } } @@ -4467,6 +4496,7 @@ fn insert_into_cell( cell_idx: usize, usable_space: u16, ) -> Result<()> { + debug_validate_cells!(page, usable_space); assert!( cell_idx <= page.cell_count() + page.overflow_cells.len(), "attempting to add cell to an incorrect place cell_idx={} cell_count={}", @@ -4487,10 +4517,12 @@ fn insert_into_cell( let new_cell_data_pointer = allocate_cell_space(page, payload.len() as u16, usable_space)?; tracing::debug!( - "insert_into_cell(idx={}, pc={})", + "insert_into_cell(idx={}, pc={}, size={})", cell_idx, - new_cell_data_pointer + new_cell_data_pointer, + payload.len() ); + assert!(new_cell_data_pointer + payload.len() as u16 <= usable_space); let buf = page.as_ptr(); // copy data From 7b384f8e5c27e6172dbfb897ff6e814f7f00938b Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Wed, 9 Apr 2025 15:29:06 +0200 Subject: [PATCH 42/56] set iteration_state for insert --- core/storage/btree.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 95dcad520..620dd7662 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -3148,6 +3148,7 @@ impl BTreeCursor { }, None => { if !moved_before { + self.iteration_state = IterationState::Iterating(IterationDirection::Forwards); match key { BTreeKey::IndexKey(_) => { return_if_io!(self From 6a02730c1ac12616c7f0cf671352f6631828d9df Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Wed, 9 Apr 2025 15:56:04 +0200 Subject: [PATCH 43/56] rebase fixes --- core/storage/btree.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 620dd7662..8f4afb090 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -2893,9 +2893,6 @@ impl BTreeCursor { self.root_page = root.get().id; self.stack.clear(); self.stack.push(root.clone()); - if matches!(root_contents.page_type(), PageType::TableInterior) { - self.stack.advance(); - } self.stack.push(child.clone()); } @@ -5324,7 +5321,7 @@ mod tests { run_until_done( || { let key = SeekKey::TableRowId(key as u64); - cursor.seek(key, SeekOp::EQ) + cursor.move_to(key, SeekOp::EQ) }, pager.deref(), ) From 3b98675aa0a711111279725cfeb0c0558c70ca4f Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Wed, 9 Apr 2025 17:02:25 +0300 Subject: [PATCH 44/56] Update COMPAT.md --- COMPAT.md | 1 + 1 file changed, 1 insertion(+) diff --git a/COMPAT.md b/COMPAT.md index f541c1f61..7013b2427 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -58,6 +58,7 @@ Limbo aims to be fully compatible with SQLite, with opt-in features not supporte | COMMIT TRANSACTION | Partial | Transaction names are not supported. | | CREATE INDEX | Yes | | | CREATE TABLE | Partial | | +| CREATE TABLE ... STRICT | Yes | | | CREATE TRIGGER | No | | | CREATE VIEW | No | | | CREATE VIRTUAL TABLE | No | | From dbb346ba2810a3a246aeae2856ea1836c5742296 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Wed, 9 Apr 2025 17:03:53 +0300 Subject: [PATCH 45/56] Update COMPAT.md --- COMPAT.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/COMPAT.md b/COMPAT.md index 7013b2427..5e6dc0499 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -61,7 +61,7 @@ Limbo aims to be fully compatible with SQLite, with opt-in features not supporte | CREATE TABLE ... STRICT | Yes | | | CREATE TRIGGER | No | | | CREATE VIEW | No | | -| CREATE VIRTUAL TABLE | No | | +| CREATE VIRTUAL TABLE | Yes | | | DELETE | Yes | | | DETACH DATABASE | No | | | DROP INDEX | No | | From 5de2d91d04f6ecbe7245d4c3163177c1da28ae81 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Wed, 9 Apr 2025 17:07:24 +0300 Subject: [PATCH 46/56] Update COMPAT.md --- COMPAT.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/COMPAT.md b/COMPAT.md index 5e6dc0499..e85a47725 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -41,7 +41,6 @@ Limbo aims to be fully compatible with SQLite, with opt-in features not supporte * ⛔️ Concurrent access from multiple processes is not supported. * ⛔️ Savepoints are not supported. * ⛔️ Triggers are not supported. -* ⛔️ Indexes are not supported. * ⛔️ Views are not supported. * ⛔️ Vacuum is not supported. @@ -65,7 +64,7 @@ Limbo aims to be fully compatible with SQLite, with opt-in features not supporte | DELETE | Yes | | | DETACH DATABASE | No | | | DROP INDEX | No | | -| DROP TABLE | No | | +| DROP TABLE | Yes | | | DROP TRIGGER | No | | | DROP VIEW | No | | | END TRANSACTION | Partial | Alias for `COMMIT TRANSACTION` | From 2316d7ebf1ee5d682d2332d57141a3596646c242 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Wed, 9 Apr 2025 16:31:08 +0200 Subject: [PATCH 47/56] add .timer command with fine grained statistics about limbo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``` Limbo v0.0.19-pre.4 Enter ".help" for usage hints. limbo> .timer on limbo> select count(1) from users; ┌───────────┐ │ count (1) │ ├───────────┤ │ 10000 │ └───────────┘ Command stats: ---------------------------- total: 35 ms (this includes parsing/coloring of cli app) query execution stats: ---------------------------- Execution: avg=16 us, total=33 us I/O: avg=123 ns, total=3 us limbo> select 1; ┌───┐ │ 1 │ ├───┤ │ 1 │ └───┘ Command stats: ---------------------------- total: 282 us (this includes parsing/coloring of cli app) query execution stats: ---------------------------- Execution: avg=2 us, total=4 us I/O: No samples available ``` --- cli/app.rs | 119 +++++++++++++++++++++++++++++++++++++++++-- cli/commands/args.rs | 12 +++++ cli/commands/mod.rs | 4 +- cli/input.rs | 2 + 4 files changed, 131 insertions(+), 6 deletions(-) diff --git a/cli/app.rs b/cli/app.rs index e5aa851a6..cffe9022f 100644 --- a/cli/app.rs +++ b/cli/app.rs @@ -1,5 +1,9 @@ use crate::{ - commands::{args::EchoMode, import::ImportFile, Command, CommandParser}, + commands::{ + args::{EchoMode, TimerMode}, + import::ImportFile, + Command, CommandParser, + }, helper::LimboHelper, input::{get_io, get_writer, DbLocation, OutputMode, Settings}, opcodes_dictionary::OPCODE_DESCRIPTIONS, @@ -20,6 +24,7 @@ use std::{ atomic::{AtomicUsize, Ordering}, Arc, }, + time::{Duration, Instant}, }; #[derive(Parser)] @@ -68,6 +73,11 @@ pub struct Limbo<'a> { pub rl: &'a mut Editor, } +struct QueryStatistics { + io_time_elapsed_samples: Vec, + execute_time_elapsed_samples: Vec, +} + macro_rules! query_internal { ($self:expr, $query:expr, $body:expr) => {{ let rows = $self.conn.query($query)?; @@ -391,6 +401,11 @@ impl<'a> Limbo<'a> { let _ = self.writeln(input); } + let start = Instant::now(); + let mut stats = QueryStatistics { + io_time_elapsed_samples: vec![], + execute_time_elapsed_samples: vec![], + }; if input.trim_start().starts_with("explain") { if let Ok(Some(stmt)) = self.conn.query(input) { let _ = self.writeln(stmt.explain().as_bytes()); @@ -399,14 +414,59 @@ impl<'a> Limbo<'a> { let conn = self.conn.clone(); let runner = conn.query_runner(input.as_bytes()); for output in runner { - if self.print_query_result(input, output).is_err() { + if self + .print_query_result(input, output, Some(&mut stats)) + .is_err() + { break; } } } + self.print_query_performance_stats(start, stats); self.reset_input(); } + fn print_query_performance_stats(&mut self, start: Instant, stats: QueryStatistics) { + let elapsed_as_str = |duration: Duration| { + if duration.as_secs() >= 1 { + format!("{} s", duration.as_secs_f64()) + } else if duration.as_millis() >= 1 { + format!("{} ms", duration.as_millis() as f64) + } else if duration.as_micros() >= 1 { + format!("{} us", duration.as_micros() as f64) + } else { + format!("{} ns", duration.as_nanos()) + } + }; + let sample_stats_as_str = |name: &str, samples: Vec| { + if samples.is_empty() { + return format!("{}: No samples available", name); + } + let avg_time_spent = samples.iter().sum::() / samples.len() as u32; + let total_time = samples.iter().fold(Duration::ZERO, |acc, x| acc + *x); + format!( + "{}: avg={}, total={}", + name, + elapsed_as_str(avg_time_spent), + elapsed_as_str(total_time), + ) + }; + if self.opts.timer { + let _ = self.writeln("Command stats:\n----------------------------"); + let _ = self.writeln(format!( + "total: {} (this includes parsing/coloring of cli app)\n", + elapsed_as_str(start.elapsed()) + )); + + let _ = self.writeln("query execution stats:\n----------------------------"); + let _ = self.writeln(sample_stats_as_str( + "Execution", + stats.execute_time_elapsed_samples, + )); + let _ = self.writeln(sample_stats_as_str("I/O", stats.io_time_elapsed_samples)); + } + } + fn reset_line(&mut self, line: &str) -> rustyline::Result<()> { self.rl.add_history_entry(line.to_owned())?; self.interrupt_count.store(0, Ordering::SeqCst); @@ -436,7 +496,7 @@ impl<'a> Limbo<'a> { let conn = self.conn.clone(); let runner = conn.query_runner(after_comment.as_bytes()); for output in runner { - if let Err(e) = self.print_query_result(after_comment, output) { + if let Err(e) = self.print_query_result(after_comment, output, None) { let _ = self.writeln(e.to_string()); } } @@ -565,6 +625,12 @@ impl<'a> Limbo<'a> { let _ = self.writeln(v); }); } + Command::Timer(timer_mode) => { + self.opts.timer = match timer_mode.mode { + TimerMode::On => true, + TimerMode::Off => false, + }; + } }, } } @@ -573,6 +639,7 @@ impl<'a> Limbo<'a> { &mut self, sql: &str, mut output: Result, LimboError>, + mut statistics: Option<&mut QueryStatistics>, ) -> anyhow::Result<()> { match output { Ok(Some(ref mut rows)) => match self.opts.output_mode { @@ -582,8 +649,13 @@ impl<'a> Limbo<'a> { return Ok(()); } + let start = Instant::now(); + match rows.step() { Ok(StepResult::Row) => { + if let Some(ref mut stats) = statistics { + stats.execute_time_elapsed_samples.push(start.elapsed()); + } let row = rows.row().unwrap(); for (i, value) in row.get_values().enumerate() { if i > 0 { @@ -598,17 +670,30 @@ impl<'a> Limbo<'a> { let _ = self.writeln(""); } Ok(StepResult::IO) => { + let start = Instant::now(); self.io.run_once()?; + if let Some(ref mut stats) = statistics { + stats.io_time_elapsed_samples.push(start.elapsed()); + } } Ok(StepResult::Interrupt) => break, Ok(StepResult::Done) => { + if let Some(ref mut stats) = statistics { + stats.execute_time_elapsed_samples.push(start.elapsed()); + } break; } Ok(StepResult::Busy) => { + if let Some(ref mut stats) = statistics { + stats.execute_time_elapsed_samples.push(start.elapsed()); + } let _ = self.writeln("database is busy"); break; } Err(err) => { + if let Some(ref mut stats) = statistics { + stats.execute_time_elapsed_samples.push(start.elapsed()); + } let _ = self.writeln(err.to_string()); break; } @@ -636,8 +721,12 @@ impl<'a> Limbo<'a> { table.set_header(header); } loop { + let start = Instant::now(); match rows.step() { Ok(StepResult::Row) => { + if let Some(ref mut stats) = statistics { + stats.execute_time_elapsed_samples.push(start.elapsed()); + } let record = rows.row().unwrap(); let mut row = Row::new(); row.max_height(1); @@ -668,15 +757,35 @@ impl<'a> Limbo<'a> { table.add_row(row); } Ok(StepResult::IO) => { + let start = Instant::now(); self.io.run_once()?; + if let Some(ref mut stats) = statistics { + stats.io_time_elapsed_samples.push(start.elapsed()); + } + } + Ok(StepResult::Interrupt) => { + if let Some(ref mut stats) = statistics { + stats.execute_time_elapsed_samples.push(start.elapsed()); + } + break; + } + Ok(StepResult::Done) => { + if let Some(ref mut stats) = statistics { + stats.execute_time_elapsed_samples.push(start.elapsed()); + } + break; } - Ok(StepResult::Interrupt) => break, - Ok(StepResult::Done) => break, Ok(StepResult::Busy) => { + if let Some(ref mut stats) = statistics { + stats.execute_time_elapsed_samples.push(start.elapsed()); + } let _ = self.writeln("database is busy"); break; } Err(err) => { + if let Some(ref mut stats) = statistics { + stats.execute_time_elapsed_samples.push(start.elapsed()); + } let _ = self.write_fmt(format_args!( "{:?}", miette::Error::from(err).with_source_code(sql.to_owned()) diff --git a/cli/commands/args.rs b/cli/commands/args.rs index 3bb78d0b8..750895049 100644 --- a/cli/commands/args.rs +++ b/cli/commands/args.rs @@ -106,3 +106,15 @@ pub struct LoadExtensionArgs { #[arg(add = ArgValueCompleter::new(PathCompleter::file()))] pub path: String, } + +#[derive(Debug, ValueEnum, Clone)] +pub enum TimerMode { + On, + Off, +} + +#[derive(Debug, Clone, Args)] +pub struct TimerArgs { + #[arg(value_enum)] + pub mode: TimerMode, +} diff --git a/cli/commands/mod.rs b/cli/commands/mod.rs index 757cee530..e01828517 100644 --- a/cli/commands/mod.rs +++ b/cli/commands/mod.rs @@ -3,7 +3,7 @@ pub mod import; use args::{ CwdArgs, EchoArgs, ExitArgs, LoadExtensionArgs, NullValueArgs, OpcodesArgs, OpenArgs, - OutputModeArgs, SchemaArgs, SetOutputArgs, TablesArgs, + OutputModeArgs, SchemaArgs, SetOutputArgs, TablesArgs, TimerArgs, }; use clap::Parser; use import::ImportArgs; @@ -72,6 +72,8 @@ pub enum Command { /// List vfs modules available #[command(name = "vfslist", display_name = ".vfslist")] ListVfs, + #[command(name = "timer", display_name = ".timer")] + Timer(TimerArgs), } const _HELP_TEMPLATE: &str = "{before-help}{name} diff --git a/cli/input.rs b/cli/input.rs index 7b505a99f..e352899c9 100644 --- a/cli/input.rs +++ b/cli/input.rs @@ -82,6 +82,7 @@ pub struct Settings { pub is_stdout: bool, pub io: Io, pub tracing_output: Option, + pub timer: bool, } impl From for Settings { @@ -105,6 +106,7 @@ impl From for Settings { vfs => Io::External(vfs.to_string()), }, tracing_output: opts.tracing_output, + timer: false, } } } From 2d7a27fbfa557cc315b8e85149d874c9c4be0a3c Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Mon, 24 Mar 2025 22:03:32 -0400 Subject: [PATCH 48/56] Prevent panic in extension by out of bounds cursor idx --- extensions/tests/src/lib.rs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/extensions/tests/src/lib.rs b/extensions/tests/src/lib.rs index 92e4f874f..df8e8bca0 100644 --- a/extensions/tests/src/lib.rs +++ b/extensions/tests/src/lib.rs @@ -112,11 +112,14 @@ impl VTabModule for KVStoreVTab { if cursor.index.is_some_and(|c| c >= cursor.rows.len()) { return Err("cursor out of range".into()); } - let (_, ref key, ref val) = cursor.rows[cursor.index.unwrap_or(0)]; - match idx { - 0 => Ok(Value::from_text(key.clone())), // key - 1 => Ok(Value::from_text(val.clone())), // value - _ => Err("Invalid column".into()), + if let Some((_, ref key, ref val)) = cursor.rows.get(cursor.index.unwrap_or(0)) { + match idx { + 0 => Ok(Value::from_text(key.clone())), // key + 1 => Ok(Value::from_text(val.clone())), // value + _ => Err("Invalid column".into()), + } + } else { + Err("cursor out of range".into()) } } } From b685086cadca38f1c83151237ab68cf394a8c096 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Mon, 24 Mar 2025 22:14:48 -0400 Subject: [PATCH 49/56] Support UPDATE for virtual tables --- core/translate/emitter.rs | 85 +++++++++++++++------ core/translate/main_loop.rs | 28 ++----- core/translate/update.rs | 145 +++++++++++++++++++++++++++++++++++- 3 files changed, 210 insertions(+), 48 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 1ecc16bff..215c6b3ac 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -6,6 +6,7 @@ use std::rc::Rc; use limbo_sqlite3_parser::ast::{self}; use crate::function::Func; +use crate::schema::Table; use crate::translate::plan::{DeletePlan, Plan, Search}; use crate::util::exprs_are_equivalent; use crate::vdbe::builder::ProgramBuilder; @@ -600,20 +601,41 @@ fn emit_update_insns( if table_column.primary_key { program.emit_null(dest, None); } else { - program.emit_insn(Insn::Column { - cursor_id: *index - .as_ref() - .and_then(|(_, id)| { - if column_idx_in_index.is_some() { - Some(id) - } else { - None - } - }) - .unwrap_or(&cursor_id), - column: column_idx_in_index.unwrap_or(idx), - dest, - }); + match &table_ref.table { + Table::BTree(_) => { + program.emit_insn(Insn::Column { + cursor_id: *index + .as_ref() + .and_then(|(_, id)| { + if column_idx_in_index.is_some() { + Some(id) + } else { + None + } + }) + .unwrap_or(&cursor_id), + column: column_idx_in_index.unwrap_or(idx), + dest, + }); + } + Table::Virtual(_) => { + program.emit_insn(Insn::VColumn { + cursor_id: *index + .as_ref() + .and_then(|(_, id)| { + if column_idx_in_index.is_some() { + Some(id) + } else { + None + } + }) + .unwrap_or(&cursor_id), + column: column_idx_in_index.unwrap_or(idx), + dest, + }); + } + typ => unreachable!("query plan generated on unexpected table type {:?}", typ), + } } } } @@ -633,13 +655,34 @@ fn emit_update_insns( count: table_ref.columns().len(), dest_reg: record_reg, }); - program.emit_insn(Insn::InsertAsync { - cursor: cursor_id, - key_reg: rowid_reg, - record_reg, - flag: 0, - }); - program.emit_insn(Insn::InsertAwait { cursor_id }); + match &table_ref.table { + Table::BTree(_) => { + program.emit_insn(Insn::InsertAsync { + cursor: cursor_id, + key_reg: rowid_reg, + record_reg, + flag: 0, + }); + program.emit_insn(Insn::InsertAwait { cursor_id }); + } + Table::Virtual(vtab) => { + let new_rowid = program.alloc_register(); + program.emit_insn(Insn::Copy { + src_reg: rowid_reg, + dst_reg: new_rowid, + amount: 0, + }); + let arg_count = table_ref.columns().len() + 2; + program.emit_insn(Insn::VUpdate { + cursor_id, + arg_count, + start_reg: record_reg, + vtab_ptr: vtab.implementation.as_ref().ctx as usize, + conflict_action: 0u16, + }); + } + _ => unreachable!("unexpected table type"), + } if let Some(limit_reg) = t_ctx.reg_limit { program.emit_insn(Insn::DecrJumpZero { diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index ab7ae1a0e..9575eb900 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -109,7 +109,7 @@ pub fn init_loop( program.emit_insn(Insn::OpenReadAwait {}); } } - (OperationMode::DELETE, Table::BTree(btree)) => { + (OperationMode::DELETE | OperationMode::UPDATE, Table::BTree(btree)) => { let root_page = btree.root_page; program.emit_insn(Insn::OpenWriteAsync { cursor_id, @@ -131,11 +131,7 @@ pub fn init_loop( } program.emit_insn(Insn::OpenWriteAwait {}); } - (OperationMode::SELECT, Table::Virtual(_)) => { - program.emit_insn(Insn::VOpenAsync { cursor_id }); - program.emit_insn(Insn::VOpenAwait {}); - } - (OperationMode::DELETE, Table::Virtual(_)) => { + (_, Table::Virtual(_)) => { program.emit_insn(Insn::VOpenAsync { cursor_id }); program.emit_insn(Insn::VOpenAwait {}); } @@ -158,14 +154,7 @@ pub fn init_loop( }); program.emit_insn(Insn::OpenReadAwait {}); } - OperationMode::DELETE => { - program.emit_insn(Insn::OpenWriteAsync { - cursor_id: table_cursor_id, - root_page: table.table.get_root_page().into(), - }); - program.emit_insn(Insn::OpenWriteAwait {}); - } - OperationMode::UPDATE => { + OperationMode::DELETE | OperationMode::UPDATE => { program.emit_insn(Insn::OpenWriteAsync { cursor_id: table_cursor_id, root_page: table.table.get_root_page().into(), @@ -191,17 +180,10 @@ pub fn init_loop( }); program.emit_insn(Insn::OpenReadAwait); } - OperationMode::DELETE => { + OperationMode::UPDATE | OperationMode::DELETE => { program.emit_insn(Insn::OpenWriteAsync { cursor_id: index_cursor_id, - root_page: index.root_page.into(), - }); - program.emit_insn(Insn::OpenWriteAwait {}); - } - OperationMode::UPDATE => { - program.emit_insn(Insn::OpenWriteAsync { - cursor_id: index_cursor_id, - root_page: index.root_page.into(), + root_page: index.root_page, }); program.emit_insn(Insn::OpenWriteAwait {}); } diff --git a/core/translate/update.rs b/core/translate/update.rs index 347e36acb..de1a568a8 100644 --- a/core/translate/update.rs +++ b/core/translate/update.rs @@ -1,4 +1,7 @@ +use std::sync::Arc; + use crate::translate::plan::Operation; +use crate::vdbe::BranchOffset; use crate::{ bail_parse_error, schema::{Schema, Table}, @@ -8,7 +11,7 @@ use crate::{ }; use limbo_sqlite3_parser::ast::{self, Expr, ResultColumn, SortOrder, Update}; -use super::emitter::emit_program; +use super::emitter::{emit_program, Resolver}; use super::optimizer::optimize_plan; use super::plan::{ Direction, IterationDirection, Plan, ResultSetColumn, TableReference, UpdatePlan, @@ -53,6 +56,7 @@ pub fn translate_update( ) -> crate::Result { let mut plan = prepare_update_plan(schema, body)?; optimize_plan(&mut plan, schema)?; + let resolver = Resolver::new(syms); // TODO: freestyling these numbers let mut program = ProgramBuilder::new(ProgramBuilderOpts { query_mode, @@ -65,6 +69,12 @@ pub fn translate_update( } pub fn prepare_update_plan(schema: &Schema, body: &mut Update) -> crate::Result { + if body.with.is_some() { + bail_parse_error!("WITH clause is not supported"); + } + if body.or_conflict.is_some() { + bail_parse_error!("ON CONFLICT clause is not supported"); + } let table_name = &body.tbl_name.name; let table = match schema.get_table(table_name.0.as_str()) { Some(table) => table, @@ -86,7 +96,11 @@ pub fn prepare_update_plan(schema: &Schema, body: &mut Update) -> crate::Result< }) .unwrap_or(IterationDirection::Forwards); let table_references = vec![TableReference { - table: Table::BTree(btree_table.clone()), + table: match table.as_ref() { + Table::Virtual(vtab) => Table::Virtual(vtab.clone()), + Table::BTree(btree_table) => Table::BTree(btree_table.clone()), + _ => unreachable!(), + }, identifier: table_name.0.clone(), op: Operation::Scan { iter_dir, @@ -99,8 +113,8 @@ pub fn prepare_update_plan(schema: &Schema, body: &mut Update) -> crate::Result< .iter_mut() .map(|set| { let ident = normalize_ident(set.col_names[0].0.as_str()); - let col_index = btree_table - .columns + let col_index = table + .columns() .iter() .enumerate() .find_map(|(i, col)| { @@ -185,3 +199,126 @@ pub fn prepare_update_plan(schema: &Schema, body: &mut Update) -> crate::Result< contains_constant_false_condition: false, })) } + +// fn translate_vtab_update( +// mut program: ProgramBuilder, +// body: &mut Update, +// table: Arc, +// resolver: &Resolver, +// ) -> crate::Result { +// let start_label = program.allocate_label(); +// program.emit_insn(Insn::Init { +// target_pc: start_label, +// }); +// let start_offset = program.offset(); +// let vtab = table.virtual_table().unwrap(); +// let cursor_id = program.alloc_cursor_id( +// Some(table.get_name().to_string()), +// CursorType::VirtualTable(vtab.clone()), +// ); +// let referenced_tables = vec![TableReference { +// table: Table::Virtual(table.virtual_table().unwrap().clone()), +// identifier: table.get_name().to_string(), +// op: Operation::Scan { iter_dir: None }, +// join_info: None, +// }]; +// program.emit_insn(Insn::VOpenAsync { cursor_id }); +// program.emit_insn(Insn::VOpenAwait {}); +// +// let argv_start = program.alloc_registers(0); +// let end_label = program.allocate_label(); +// let skip_label = program.allocate_label(); +// program.emit_insn(Insn::VFilter { +// cursor_id, +// pc_if_empty: end_label, +// args_reg: argv_start, +// arg_count: 0, +// }); +// +// let loop_start = program.offset(); +// let start_reg = program.alloc_registers(2 + table.columns().len()); +// let old_rowid = start_reg; +// let new_rowid = start_reg + 1; +// let column_regs = start_reg + 2; +// +// program.emit_insn(Insn::RowId { +// cursor_id, +// dest: old_rowid, +// }); +// program.emit_insn(Insn::RowId { +// cursor_id, +// dest: new_rowid, +// }); +// +// for (i, _) in table.columns().iter().enumerate() { +// let dest = column_regs + i; +// program.emit_insn(Insn::VColumn { +// cursor_id, +// column: i, +// dest, +// }); +// } +// +// if let Some(ref mut where_clause) = body.where_clause { +// bind_column_references(where_clause, &referenced_tables, None)?; +// translate_condition_expr( +// &mut program, +// &referenced_tables, +// where_clause, +// ConditionMetadata { +// jump_if_condition_is_true: false, +// jump_target_when_true: BranchOffset::Placeholder, +// jump_target_when_false: skip_label, +// }, +// resolver, +// )?; +// } +// // prepare updated columns in place +// for expr in body.sets.iter() { +// let Some(col_index) = table.columns().iter().position(|t| { +// t.name +// .as_ref() +// .unwrap() +// .eq_ignore_ascii_case(&expr.col_names[0].0) +// }) else { +// bail_parse_error!("column {} not found", expr.col_names[0].0); +// }; +// translate_expr( +// &mut program, +// Some(&referenced_tables), +// &expr.expr, +// column_regs + col_index, +// resolver, +// )?; +// } +// +// let arg_count = 2 + table.columns().len(); +// program.emit_insn(Insn::VUpdate { +// cursor_id, +// arg_count, +// start_reg: old_rowid, +// vtab_ptr: vtab.implementation.ctx as usize, +// conflict_action: 0, +// }); +// +// program.resolve_label(skip_label, program.offset()); +// program.emit_insn(Insn::VNext { +// cursor_id, +// pc_if_next: loop_start, +// }); +// +// program.resolve_label(end_label, program.offset()); +// program.emit_insn(Insn::Halt { +// err_code: 0, +// description: String::new(), +// }); +// program.resolve_label(start_label, program.offset()); +// program.emit_insn(Insn::Transaction { write: true }); +// +// program.emit_constant_insns(); +// program.emit_insn(Insn::Goto { +// target_pc: start_offset, +// }); +// program.table_references = referenced_tables.clone(); +// Ok(program) +// } From 7993857020529f0a108e5e02c0ecfd82d857b8c2 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Mon, 24 Mar 2025 22:29:26 -0400 Subject: [PATCH 50/56] Add py tests for vtab update behavior --- testing/cli_tests/extensions.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/testing/cli_tests/extensions.py b/testing/cli_tests/extensions.py index ac870ee4d..4d289f311 100755 --- a/testing/cli_tests/extensions.py +++ b/testing/cli_tests/extensions.py @@ -398,10 +398,35 @@ def test_kv(): limbo.run_test_fn( "select count(*) from t;", lambda res: "100" == res, "can insert 100 rows" ) + limbo.run_test_fn("update t set value = 'updated' where key = 'key33';", null) + limbo.run_test_fn( + "select * from t where key = 'key33';", + lambda res: res == "key33|updated", + "can update single row", + ) + limbo.run_test_fn( + "select COUNT(*) from t where value = 'updated';", + lambda res: res == "1", + "only updated a single row", + ) + limbo.run_test_fn("update t set value = 'updated2';", null) + limbo.run_test_fn( + "select COUNT(*) from t where value = 'updated2';", + lambda res: res == "100", + "can update all rows", + ) limbo.run_test_fn("delete from t limit 96;", null, "can delete 96 rows") limbo.run_test_fn( "select count(*) from t;", lambda res: "4" == res, "four rows remain" ) + limbo.run_test_fn( + "update t set key = '100' where 1;", null, "where clause evaluates properly" + ) + limbo.run_test_fn( + "select * from t where key = '100';", + lambda res: res == "100|updated2", + "there is only 1 key remaining after setting all keys to same value", + ) limbo.quit() From 0ffecb3021addea05db1a8af937a6a550ab6a761 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Tue, 25 Mar 2025 10:47:05 -0400 Subject: [PATCH 51/56] Add comments to document update on vtabs --- core/translate/update.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/core/translate/update.rs b/core/translate/update.rs index de1a568a8..296d8a6ff 100644 --- a/core/translate/update.rs +++ b/core/translate/update.rs @@ -1,7 +1,4 @@ -use std::sync::Arc; - use crate::translate::plan::Operation; -use crate::vdbe::BranchOffset; use crate::{ bail_parse_error, schema::{Schema, Table}, @@ -11,7 +8,7 @@ use crate::{ }; use limbo_sqlite3_parser::ast::{self, Expr, ResultColumn, SortOrder, Update}; -use super::emitter::{emit_program, Resolver}; +use super::emitter::emit_program; use super::optimizer::optimize_plan; use super::plan::{ Direction, IterationDirection, Plan, ResultSetColumn, TableReference, UpdatePlan, @@ -56,7 +53,6 @@ pub fn translate_update( ) -> crate::Result { let mut plan = prepare_update_plan(schema, body)?; optimize_plan(&mut plan, schema)?; - let resolver = Resolver::new(syms); // TODO: freestyling these numbers let mut program = ProgramBuilder::new(ProgramBuilderOpts { query_mode, From 62d1447cd681dd67f3571ca1225e97857103d9de Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Wed, 2 Apr 2025 23:48:14 -0400 Subject: [PATCH 52/56] Adapt query plan to handle vatbs for updates --- core/translate/emitter.rs | 183 ++++++++++++++++++++------------------ core/translate/update.rs | 123 ------------------------- core/vdbe/execute.rs | 13 ++- 3 files changed, 103 insertions(+), 216 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 215c6b3ac..7106bc14a 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -6,7 +6,6 @@ use std::rc::Rc; use limbo_sqlite3_parser::ast::{self}; use crate::function::Func; -use crate::schema::Table; use crate::translate::plan::{DeletePlan, Plan, Search}; use crate::util::exprs_are_equivalent; use crate::vdbe::builder::ProgramBuilder; @@ -531,29 +530,67 @@ fn emit_update_insns( ) -> crate::Result<()> { let table_ref = &plan.table_references.first().unwrap(); let loop_labels = t_ctx.labels_main_loop.first().unwrap(); - let (cursor_id, index) = match &table_ref.op { - Operation::Scan { .. } => (program.resolve_cursor_id(&table_ref.identifier), None), + let (cursor_id, index, is_virtual) = match &table_ref.op { + Operation::Scan { .. } => ( + program.resolve_cursor_id(&table_ref.identifier), + None, + table_ref.virtual_table().is_some(), + ), Operation::Search(search) => match search { - &Search::RowidEq { .. } | Search::RowidSearch { .. } => { - (program.resolve_cursor_id(&table_ref.identifier), None) - } + &Search::RowidEq { .. } | Search::RowidSearch { .. } => ( + program.resolve_cursor_id(&table_ref.identifier), + None, + false, + ), Search::IndexSearch { index, .. } => ( program.resolve_cursor_id(&table_ref.identifier), Some((index.clone(), program.resolve_cursor_id(&index.name))), + false, ), }, _ => return Ok(()), }; - let rowid_reg = program.alloc_register(); + + for cond in plan.where_clause.iter().filter(|c| c.is_constant()) { + let jump_target = program.allocate_label(); + let meta = ConditionMetadata { + jump_if_condition_is_true: false, + jump_target_when_true: jump_target, + jump_target_when_false: t_ctx.label_main_loop_end.unwrap(), + }; + translate_condition_expr( + program, + &plan.table_references, + &cond.expr, + meta, + &t_ctx.resolver, + )?; + program.resolve_label(jump_target, program.offset()); + } + let mut beg = program.alloc_registers( + table_ref.table.columns().len() + + if is_virtual { + 2 // two args before the relevant columns for VUpdate + } else { + 1 // rowid reg + }, + ); program.emit_insn(Insn::RowId { cursor_id, - dest: rowid_reg, + dest: beg, }); // if no rowid, we're done program.emit_insn(Insn::IsNull { - reg: rowid_reg, + reg: beg, target_pc: t_ctx.label_main_loop_end.unwrap(), }); + if is_virtual { + program.emit_insn(Insn::Copy { + src_reg: beg, + dst_reg: beg + 1, + amount: 0, + }) + } if let Some(offset) = t_ctx.reg_offset { program.emit_insn(Insn::IfPos { @@ -577,12 +614,13 @@ fn emit_update_insns( &t_ctx.resolver, )?; } - let first_col_reg = program.alloc_registers(table_ref.table.columns().len()); + // we scan a column at a time, loading either the column's values, or the new value // from the Set expression, into registers so we can emit a MakeRecord and update the row. + let start = if is_virtual { beg + 2 } else { beg + 1 }; for idx in 0..table_ref.columns().len() { - if let Some((idx, expr)) = plan.set_clauses.iter().find(|(i, _)| *i == idx) { - let target_reg = first_col_reg + idx; + let target_reg = start + idx; + if let Some((_, expr)) = plan.set_clauses.iter().find(|(i, _)| *i == idx) { translate_expr( program, Some(&plan.table_references), @@ -597,91 +635,66 @@ fn emit_update_insns( .iter() .position(|c| Some(&c.name) == table_column.name.as_ref()) }); - let dest = first_col_reg + idx; - if table_column.primary_key { - program.emit_null(dest, None); + + // don't emit null for pkey of virtual tables. they require first two args + // before the 'record' to be explicitly non-null + if table_column.primary_key && !is_virtual { + program.emit_null(target_reg, None); + } else if is_virtual { + program.emit_insn(Insn::VColumn { + cursor_id, + column: idx, + dest: target_reg, + }); } else { - match &table_ref.table { - Table::BTree(_) => { - program.emit_insn(Insn::Column { - cursor_id: *index - .as_ref() - .and_then(|(_, id)| { - if column_idx_in_index.is_some() { - Some(id) - } else { - None - } - }) - .unwrap_or(&cursor_id), - column: column_idx_in_index.unwrap_or(idx), - dest, - }); - } - Table::Virtual(_) => { - program.emit_insn(Insn::VColumn { - cursor_id: *index - .as_ref() - .and_then(|(_, id)| { - if column_idx_in_index.is_some() { - Some(id) - } else { - None - } - }) - .unwrap_or(&cursor_id), - column: column_idx_in_index.unwrap_or(idx), - dest, - }); - } - typ => unreachable!("query plan generated on unexpected table type {:?}", typ), - } + program.emit_insn(Insn::Column { + cursor_id: *index + .as_ref() + .and_then(|(_, id)| { + if column_idx_in_index.is_some() { + Some(id) + } else { + None + } + }) + .unwrap_or(&cursor_id), + column: column_idx_in_index.unwrap_or(idx), + dest: target_reg, + }); } } } if let Some(btree_table) = table_ref.btree() { if btree_table.is_strict { program.emit_insn(Insn::TypeCheck { - start_reg: first_col_reg, + start_reg: start, count: table_ref.columns().len(), check_generated: true, table_reference: Rc::clone(&btree_table), }); } - } - let record_reg = program.alloc_register(); - program.emit_insn(Insn::MakeRecord { - start_reg: first_col_reg, - count: table_ref.columns().len(), - dest_reg: record_reg, - }); - match &table_ref.table { - Table::BTree(_) => { - program.emit_insn(Insn::InsertAsync { - cursor: cursor_id, - key_reg: rowid_reg, - record_reg, - flag: 0, - }); - program.emit_insn(Insn::InsertAwait { cursor_id }); - } - Table::Virtual(vtab) => { - let new_rowid = program.alloc_register(); - program.emit_insn(Insn::Copy { - src_reg: rowid_reg, - dst_reg: new_rowid, - amount: 0, - }); - let arg_count = table_ref.columns().len() + 2; - program.emit_insn(Insn::VUpdate { - cursor_id, - arg_count, - start_reg: record_reg, - vtab_ptr: vtab.implementation.as_ref().ctx as usize, - conflict_action: 0u16, - }); - } - _ => unreachable!("unexpected table type"), + let record_reg = program.alloc_register(); + program.emit_insn(Insn::MakeRecord { + start_reg: start, + count: table_ref.columns().len(), + dest_reg: record_reg, + }); + program.emit_insn(Insn::InsertAsync { + cursor: cursor_id, + key_reg: beg, + record_reg, + flag: 0, + }); + program.emit_insn(Insn::InsertAwait { cursor_id }); + } else if let Some(vtab) = table_ref.virtual_table() { + let arg_count = table_ref.columns().len() + 2; + program.emit_insn(Insn::VUpdate { + cursor_id, + arg_count, + start_reg: beg, + vtab_ptr: vtab.implementation.as_ref().ctx as usize, + conflict_action: 0u16, + }); } if let Some(limit_reg) = t_ctx.reg_limit { diff --git a/core/translate/update.rs b/core/translate/update.rs index 296d8a6ff..71293483c 100644 --- a/core/translate/update.rs +++ b/core/translate/update.rs @@ -195,126 +195,3 @@ pub fn prepare_update_plan(schema: &Schema, body: &mut Update) -> crate::Result< contains_constant_false_condition: false, })) } - -// fn translate_vtab_update( -// mut program: ProgramBuilder, -// body: &mut Update, -// table: Arc
, -// resolver: &Resolver, -// ) -> crate::Result { -// let start_label = program.allocate_label(); -// program.emit_insn(Insn::Init { -// target_pc: start_label, -// }); -// let start_offset = program.offset(); -// let vtab = table.virtual_table().unwrap(); -// let cursor_id = program.alloc_cursor_id( -// Some(table.get_name().to_string()), -// CursorType::VirtualTable(vtab.clone()), -// ); -// let referenced_tables = vec![TableReference { -// table: Table::Virtual(table.virtual_table().unwrap().clone()), -// identifier: table.get_name().to_string(), -// op: Operation::Scan { iter_dir: None }, -// join_info: None, -// }]; -// program.emit_insn(Insn::VOpenAsync { cursor_id }); -// program.emit_insn(Insn::VOpenAwait {}); -// -// let argv_start = program.alloc_registers(0); -// let end_label = program.allocate_label(); -// let skip_label = program.allocate_label(); -// program.emit_insn(Insn::VFilter { -// cursor_id, -// pc_if_empty: end_label, -// args_reg: argv_start, -// arg_count: 0, -// }); -// -// let loop_start = program.offset(); -// let start_reg = program.alloc_registers(2 + table.columns().len()); -// let old_rowid = start_reg; -// let new_rowid = start_reg + 1; -// let column_regs = start_reg + 2; -// -// program.emit_insn(Insn::RowId { -// cursor_id, -// dest: old_rowid, -// }); -// program.emit_insn(Insn::RowId { -// cursor_id, -// dest: new_rowid, -// }); -// -// for (i, _) in table.columns().iter().enumerate() { -// let dest = column_regs + i; -// program.emit_insn(Insn::VColumn { -// cursor_id, -// column: i, -// dest, -// }); -// } -// -// if let Some(ref mut where_clause) = body.where_clause { -// bind_column_references(where_clause, &referenced_tables, None)?; -// translate_condition_expr( -// &mut program, -// &referenced_tables, -// where_clause, -// ConditionMetadata { -// jump_if_condition_is_true: false, -// jump_target_when_true: BranchOffset::Placeholder, -// jump_target_when_false: skip_label, -// }, -// resolver, -// )?; -// } -// // prepare updated columns in place -// for expr in body.sets.iter() { -// let Some(col_index) = table.columns().iter().position(|t| { -// t.name -// .as_ref() -// .unwrap() -// .eq_ignore_ascii_case(&expr.col_names[0].0) -// }) else { -// bail_parse_error!("column {} not found", expr.col_names[0].0); -// }; -// translate_expr( -// &mut program, -// Some(&referenced_tables), -// &expr.expr, -// column_regs + col_index, -// resolver, -// )?; -// } -// -// let arg_count = 2 + table.columns().len(); -// program.emit_insn(Insn::VUpdate { -// cursor_id, -// arg_count, -// start_reg: old_rowid, -// vtab_ptr: vtab.implementation.ctx as usize, -// conflict_action: 0, -// }); -// -// program.resolve_label(skip_label, program.offset()); -// program.emit_insn(Insn::VNext { -// cursor_id, -// pc_if_next: loop_start, -// }); -// -// program.resolve_label(end_label, program.offset()); -// program.emit_insn(Insn::Halt { -// err_code: 0, -// description: String::new(), -// }); -// program.resolve_label(start_label, program.offset()); -// program.emit_insn(Insn::Transaction { write: true }); -// -// program.emit_constant_insns(); -// program.emit_insn(Insn::Goto { -// target_pc: start_offset, -// }); -// program.table_references = referenced_tables.clone(); -// Ok(program) -// } diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index a09da4ac0..0e7fc583b 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -1848,17 +1848,14 @@ pub fn op_row_id( let rowid = { let mut index_cursor = state.get_cursor(index_cursor_id); let index_cursor = index_cursor.as_btree_mut(); - let rowid = index_cursor.rowid()?; - rowid + index_cursor.rowid()? }; let mut table_cursor = state.get_cursor(table_cursor_id); let table_cursor = table_cursor.as_btree_mut(); - let deferred_seek = - match table_cursor.seek(SeekKey::TableRowId(rowid.unwrap()), SeekOp::EQ)? { - CursorResult::Ok(_) => None, - CursorResult::IO => Some((index_cursor_id, table_cursor_id)), - }; - deferred_seek + match table_cursor.seek(SeekKey::TableRowId(rowid.unwrap()), SeekOp::EQ)? { + CursorResult::Ok(_) => None, + CursorResult::IO => Some((index_cursor_id, table_cursor_id)), + } }; if let Some(deferred_seek) = deferred_seek { state.deferred_seek = Some(deferred_seek); From 13ae19c78c19c115a9452a19c8c30854bf74f720 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Fri, 4 Apr 2025 07:08:01 -0400 Subject: [PATCH 53/56] Remove unnecessary clones from mc cursors --- core/vdbe/execute.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 0e7fc583b..5cf2e6cd2 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -1582,7 +1582,7 @@ pub fn op_halt( ))); } } - match program.halt(pager.clone(), state, mv_store.clone())? { + match program.halt(pager.clone(), state, mv_store)? { StepResult::Done => Ok(InsnFunctionStepResult::Done), StepResult::IO => Ok(InsnFunctionStepResult::IO), StepResult::Row => Ok(InsnFunctionStepResult::Row), @@ -1661,7 +1661,7 @@ pub fn op_auto_commit( }; let conn = program.connection.upgrade().unwrap(); if matches!(state.halt_state, Some(HaltState::Checkpointing)) { - return match program.halt(pager.clone(), state, mv_store.clone())? { + return match program.halt(pager.clone(), state, mv_store)? { super::StepResult::Done => Ok(InsnFunctionStepResult::Done), super::StepResult::IO => Ok(InsnFunctionStepResult::IO), super::StepResult::Row => Ok(InsnFunctionStepResult::Row), @@ -1689,7 +1689,7 @@ pub fn op_auto_commit( "cannot commit - no transaction is active".to_string(), )); } - return match program.halt(pager.clone(), state, mv_store.clone())? { + return match program.halt(pager.clone(), state, mv_store)? { super::StepResult::Done => Ok(InsnFunctionStepResult::Done), super::StepResult::IO => Ok(InsnFunctionStepResult::IO), super::StepResult::Row => Ok(InsnFunctionStepResult::Row), From f223e66c82a4f862749b6a9c14fcb48d630f6fab Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Fri, 4 Apr 2025 07:29:16 -0400 Subject: [PATCH 54/56] Remove unused mut and fix merge conflict issues --- core/translate/emitter.rs | 2 +- core/translate/main_loop.rs | 4 ++-- core/translate/update.rs | 3 --- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 7106bc14a..5049bb738 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -567,7 +567,7 @@ fn emit_update_insns( )?; program.resolve_label(jump_target, program.offset()); } - let mut beg = program.alloc_registers( + let beg = program.alloc_registers( table_ref.table.columns().len() + if is_virtual { 2 // two args before the relevant columns for VUpdate diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 9575eb900..51bd05382 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -109,7 +109,7 @@ pub fn init_loop( program.emit_insn(Insn::OpenReadAwait {}); } } - (OperationMode::DELETE | OperationMode::UPDATE, Table::BTree(btree)) => { + (OperationMode::DELETE, Table::BTree(btree)) => { let root_page = btree.root_page; program.emit_insn(Insn::OpenWriteAsync { cursor_id, @@ -183,7 +183,7 @@ pub fn init_loop( OperationMode::UPDATE | OperationMode::DELETE => { program.emit_insn(Insn::OpenWriteAsync { cursor_id: index_cursor_id, - root_page: index.root_page, + root_page: index.root_page.into(), }); program.emit_insn(Insn::OpenWriteAwait {}); } diff --git a/core/translate/update.rs b/core/translate/update.rs index 71293483c..62c6c6f9f 100644 --- a/core/translate/update.rs +++ b/core/translate/update.rs @@ -76,9 +76,6 @@ pub fn prepare_update_plan(schema: &Schema, body: &mut Update) -> crate::Result< Some(table) => table, None => bail_parse_error!("Parse error: no such table: {}", table_name), }; - let Some(btree_table) = table.btree() else { - bail_parse_error!("Error: {} is not a btree table", table_name); - }; let iter_dir = body .order_by .as_ref() From 2d009083bae7682cbb6736e56b11269b71ee34e4 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Wed, 9 Apr 2025 19:27:58 +0300 Subject: [PATCH 55/56] core: Fix syscall VFS on Linux Fix the syscall VFS on Linux not to use `PlatformIO`, which is just an alias for `io_uring`. --- core/io/mod.rs | 4 ++++ core/lib.rs | 6 ++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/core/io/mod.rs b/core/io/mod.rs index 7eb8845bb..b5321637b 100644 --- a/core/io/mod.rs +++ b/core/io/mod.rs @@ -191,6 +191,7 @@ cfg_block! { mod unix; #[cfg(feature = "fs")] pub use unix::UnixIO; + pub use unix::UnixIO as SyscallIO; pub use io_uring::UringIO as PlatformIO; } @@ -199,16 +200,19 @@ cfg_block! { #[cfg(feature = "fs")] pub use unix::UnixIO; pub use unix::UnixIO as PlatformIO; + pub use PlatformIO as SyscallIO; } #[cfg(target_os = "windows")] { mod windows; pub use windows::WindowsIO as PlatformIO; + pub use PlatformIO as SyscallIO; } #[cfg(not(any(target_os = "linux", target_os = "macos", target_os = "windows")))] { mod generic; pub use generic::GenericIO as PlatformIO; + pub use PlatformIO as SyscallIO; } } diff --git a/core/lib.rs b/core/lib.rs index e827c3d0d..7ccfea4fe 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -31,7 +31,9 @@ use fallible_iterator::FallibleIterator; pub use io::UnixIO; #[cfg(all(feature = "fs", target_os = "linux", feature = "io_uring"))] pub use io::UringIO; -pub use io::{Buffer, Completion, File, MemoryIO, OpenFlags, PlatformIO, WriteCompletion, IO}; +pub use io::{ + Buffer, Completion, File, MemoryIO, OpenFlags, PlatformIO, SyscallIO, WriteCompletion, IO, +}; use limbo_ext::{ResultCode, VTabKind, VTabModuleImpl}; use limbo_sqlite3_parser::{ast, ast::Cmd, lexer::sql::Parser}; use parking_lot::RwLock; @@ -209,7 +211,7 @@ impl Database { Some(vfs) => vfs, None => match vfs.trim() { "memory" => Arc::new(MemoryIO::new()), - "syscall" => Arc::new(PlatformIO::new()?), + "syscall" => Arc::new(SyscallIO::new()?), #[cfg(all(target_os = "linux", feature = "io_uring"))] "io_uring" => Arc::new(UringIO::new()?), other => { From 94217319a2a7d8cffdff0451f89ec3603417d62b Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Wed, 9 Apr 2025 14:21:18 -0300 Subject: [PATCH 56/56] Fix Explain to be case insensitive --- cli/app.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cli/app.rs b/cli/app.rs index cffe9022f..c5cb2ff4f 100644 --- a/cli/app.rs +++ b/cli/app.rs @@ -406,7 +406,10 @@ impl<'a> Limbo<'a> { io_time_elapsed_samples: vec![], execute_time_elapsed_samples: vec![], }; - if input.trim_start().starts_with("explain") { + // TODO this is a quickfix. Some ideas to do case insensitive comparisons is to use + // Uncased or Unicase. + let temp = input.to_lowercase(); + if temp.trim_start().starts_with("explain") { if let Ok(Some(stmt)) = self.conn.query(input) { let _ = self.writeln(stmt.explain().as_bytes()); }