From 4384659e5fc9f20646abda58891d1f99f3dbd89f Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Mon, 24 Mar 2025 20:44:29 -0400 Subject: [PATCH 001/425] Adjust vtab schema creation to display the underlying columns --- core/translate/mod.rs | 2 +- core/translate/schema.rs | 41 +++++++++++++++++++++++++++++++++------- 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/core/translate/mod.rs b/core/translate/mod.rs index 739ae5f03..f2007825d 100644 --- a/core/translate/mod.rs +++ b/core/translate/mod.rs @@ -78,7 +78,7 @@ pub fn translate( ast::Stmt::CreateTrigger { .. } => bail_parse_error!("CREATE TRIGGER not supported yet"), ast::Stmt::CreateView { .. } => bail_parse_error!("CREATE VIEW not supported yet"), ast::Stmt::CreateVirtualTable(vtab) => { - translate_create_virtual_table(*vtab, schema, query_mode)? + translate_create_virtual_table(*vtab, schema, query_mode, &syms)? } ast::Stmt::Delete(delete) => { let Delete { diff --git a/core/translate/schema.rs b/core/translate/schema.rs index a887bdef8..43fdd6017 100644 --- a/core/translate/schema.rs +++ b/core/translate/schema.rs @@ -1,6 +1,8 @@ use std::fmt::Display; +use std::rc::Rc; use crate::ast; +use crate::ext::VTabImpl; use crate::schema::Schema; use crate::translate::ProgramBuilder; use crate::translate::ProgramBuilderOpts; @@ -9,8 +11,10 @@ use crate::util::PRIMARY_KEY_AUTOMATIC_INDEX_NAME_PREFIX; use crate::vdbe::builder::CursorType; use crate::vdbe::insn::{CmpInsFlags, Insn}; use crate::LimboError; +use crate::SymbolTable; use crate::{bail_parse_error, Result}; +use limbo_ext::VTabKind; use limbo_sqlite3_parser::ast::{fmt::ToTokens, CreateVirtualTable}; pub fn translate_create_table( @@ -398,7 +402,7 @@ fn create_table_body_to_str(tbl_name: &ast::QualifiedName, body: &ast::CreateTab sql } -fn create_vtable_body_to_str(vtab: &CreateVirtualTable) -> String { +fn create_vtable_body_to_str(vtab: &CreateVirtualTable, module: Rc) -> String { let args = if let Some(args) = &vtab.args { args.iter() .map(|arg| arg.to_string()) @@ -412,8 +416,25 @@ fn create_vtable_body_to_str(vtab: &CreateVirtualTable) -> String { } else { "" }; + let ext_args = vtab + .args + .as_ref() + .unwrap_or(&vec![]) + .iter() + .map(|a| limbo_ext::Value::from_text(a.to_string())) + .collect::>(); + let schema = module + .implementation + .init_schema(ext_args) + .unwrap_or_default(); + let vtab_args = if let Some(first_paren) = schema.find('(') { + let closing_paren = schema.rfind(')').unwrap_or_default(); + &schema[first_paren..=closing_paren] + } else { + "()" + }; format!( - "CREATE VIRTUAL TABLE {} {} USING {}{}", + "CREATE VIRTUAL TABLE {} {} USING {}{}\n /*{}{}*/;", vtab.tbl_name.name.0, if_not_exists, vtab.module_name.0, @@ -421,7 +442,9 @@ fn create_vtable_body_to_str(vtab: &CreateVirtualTable) -> String { String::new() } else { format!("({})", args) - } + }, + vtab.tbl_name.name.0, + vtab_args ) } @@ -429,6 +452,7 @@ pub fn translate_create_virtual_table( vtab: CreateVirtualTable, schema: &Schema, query_mode: QueryMode, + syms: &SymbolTable, ) -> Result { let ast::CreateVirtualTable { if_not_exists, @@ -440,7 +464,12 @@ pub fn translate_create_virtual_table( let table_name = tbl_name.name.0.clone(); let module_name_str = module_name.0.clone(); let args_vec = args.clone().unwrap_or_default(); - + let Some(vtab_module) = syms.vtab_modules.get(&module_name_str) else { + bail_parse_error!("no such module: {}", module_name_str); + }; + if !vtab_module.module_kind.eq(&VTabKind::VirtualTable) { + bail_parse_error!("module {} is not a virtual table", module_name_str); + }; if schema.get_table(&table_name).is_some() && *if_not_exists { let mut program = ProgramBuilder::new(ProgramBuilderOpts { query_mode, @@ -465,7 +494,6 @@ pub fn translate_create_virtual_table( let module_name_reg = program.emit_string8_new_reg(module_name_str.clone()); let table_name_reg = program.emit_string8_new_reg(table_name.clone()); - let args_reg = if !args_vec.is_empty() { let args_start = program.alloc_register(); @@ -491,7 +519,6 @@ pub fn translate_create_virtual_table( table_name: table_name_reg, args_reg, }); - let table = schema.get_btree_table(SQLITE_TABLEID).unwrap(); let sqlite_schema_cursor_id = program.alloc_cursor_id( Some(SQLITE_TABLEID.to_owned()), @@ -503,7 +530,7 @@ pub fn translate_create_virtual_table( }); program.emit_insn(Insn::OpenWriteAwait {}); - let sql = create_vtable_body_to_str(&vtab); + let sql = create_vtable_body_to_str(&vtab, vtab_module.clone()); emit_schema_entry( &mut program, sqlite_schema_cursor_id, From 334f0a928ac24f3577b5266e7dc384bc5895613a Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Mon, 24 Mar 2025 21:05:50 -0400 Subject: [PATCH 002/425] Adjust test to reflect new parse error --- core/translate/schema.rs | 2 +- testing/cli_tests/extensions.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/core/translate/schema.rs b/core/translate/schema.rs index 43fdd6017..8a5accea2 100644 --- a/core/translate/schema.rs +++ b/core/translate/schema.rs @@ -434,7 +434,7 @@ fn create_vtable_body_to_str(vtab: &CreateVirtualTable, module: Rc) -> "()" }; format!( - "CREATE VIRTUAL TABLE {} {} USING {}{}\n /*{}{}*/;", + "CREATE VIRTUAL TABLE {} {} USING {}{}\n /*{}{}*/", vtab.tbl_name.name.0, if_not_exists, vtab.module_name.0, diff --git a/testing/cli_tests/extensions.py b/testing/cli_tests/extensions.py index 56784b286..92d058ff8 100755 --- a/testing/cli_tests/extensions.py +++ b/testing/cli_tests/extensions.py @@ -341,7 +341,7 @@ def test_kv(): limbo = TestLimboShell() limbo.run_test_fn( "create virtual table t using kv_store;", - lambda res: "Virtual table module not found: kv_store" in res, + lambda res: "Parse error: no such module: kv_store" in res, ) limbo.execute_dot(f".load {ext_path}") limbo.run_test_fn( From 8f202e80155bacfb00310a46030fa39a4b6b8c99 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Tue, 1 Apr 2025 19:42:44 -0300 Subject: [PATCH 003/425] separate memory tests to track large blob insertions --- Makefile | 6 +- testing/cli_tests/memory.py | 113 ++++++++++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+), 1 deletion(-) create mode 100755 testing/cli_tests/memory.py diff --git a/Makefile b/Makefile index 5202112d1..46ef06c98 100644 --- a/Makefile +++ b/Makefile @@ -62,7 +62,7 @@ limbo-wasm: cargo build --package limbo-wasm --target wasm32-wasi .PHONY: limbo-wasm -test: limbo test-compat test-vector test-sqlite3 test-shell test-extensions +test: limbo test-compat test-vector test-sqlite3 test-shell test-extensions test-memory .PHONY: test test-extensions: limbo @@ -94,6 +94,10 @@ test-json: SQLITE_EXEC=$(SQLITE_EXEC) ./testing/json.test .PHONY: test-json +test-memory: + SQLITE_EXEC=$(SQLITE_EXEC) ./testing/cli_tests/memory.py +.PHONY: test-memory + clickbench: ./perf/clickbench/benchmark.sh .PHONY: clickbench diff --git a/testing/cli_tests/memory.py b/testing/cli_tests/memory.py new file mode 100755 index 000000000..e96df3475 --- /dev/null +++ b/testing/cli_tests/memory.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python3 +import os +from test_limbo_cli import TestLimboShell + + +sqlite_exec = "./target/debug/limbo" +sqlite_flags = os.getenv("SQLITE_FLAGS", "-q").split(" ") + + +def validate_with_expected(result: str, expected: str): + return (expected in result, expected) + + +def stub_memory_test( + limbo: TestLimboShell, + name: str, + blob_size: int = 1024**2, + vals: int = 100, + blobs: bool = True, +): + # zero_blob_size = 1024 **2 + zero_blob = "0" * blob_size * 2 + # vals = 100 + big_stmt = ["CREATE TABLE temp (t1 BLOB, t2 INTEGER);"] + big_stmt = big_stmt + [ + f"INSERT INTO temp (t1) VALUES (zeroblob({blob_size}));" + if i % 2 == 0 and blobs + else f"INSERT INTO temp (t2) VALUES ({i});" + for i in range(vals * 2) + ] + expected = [] + for i in range(vals * 2): + if i % 2 == 0 and blobs: + big_stmt.append(f"SELECT hex(t1) FROM temp LIMIT 1 OFFSET {i};") + expected.append(zero_blob) + else: + big_stmt.append(f"SELECT t2 FROM temp LIMIT 1 OFFSET {i};") + expected.append(f"{i}") + + big_stmt.append("SELECT count(*) FROM temp;") + expected.append(str(vals * 2)) + + big_stmt = "".join(big_stmt) + expected = "\n".join(expected) + + limbo.run_test_fn(big_stmt, lambda res: validate_with_expected(res, expected), name) + + +# TODO no delete tests for now because of limbo outputs some debug information on delete +def memory_tests() -> list[dict]: + tests = [] + + for vals in range(0, 1000, 100): + tests.append( + { + "name": f"small-insert-integer-vals-{vals}", + "vals": vals, + "blobs": False, + } + ) + + tests.append( + { + "name": f"small-insert-blob-interleaved-blob-size-{1024}", + "vals": 10, + "blob_size": 1024, + } + ) + tests.append( + { + "name": f"big-insert-blob-interleaved-blob-size-{1024}", + "vals": 100, + "blob_size": 1024, + } + ) + + for blob_size in range(0, (1024 * 1024) + 1, 1024 * 4**4): + if blob_size == 0: + continue + tests.append( + { + "name": f"small-insert-blob-interleaved-blob-size-{blob_size}", + "vals": 10, + "blob_size": blob_size, + } + ) + tests.append( + { + "name": f"big-insert-blob-interleaved-blob-size-{blob_size}", + "vals": 100, + "blob_size": blob_size, + } + ) + return tests + + +def main(): + tests = memory_tests() + # TODO see how to parallelize this loop with different subprocesses + for test in tests: + limbo = TestLimboShell() + try: + stub_memory_test(limbo, **test) + except Exception as e: + print(f"Test FAILED: {e}") + limbo.quit() + exit(1) + limbo.quit() # remove this line when `with` statement is supported for TestLimboShell + print("All tests passed successfully.") + + +if __name__ == "__main__": + main() From 89c0b0b86261957c7703c564ee4370bf0d41196c Mon Sep 17 00:00:00 2001 From: tsar-boomba Date: Wed, 2 Apr 2025 18:22:57 -0700 Subject: [PATCH 004/425] Implement Clone and Debug for rust binding's Database --- bindings/rust/src/lib.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/bindings/rust/src/lib.rs b/bindings/rust/src/lib.rs index fe653fdb5..60a7ffd77 100644 --- a/bindings/rust/src/lib.rs +++ b/bindings/rust/src/lib.rs @@ -6,6 +6,7 @@ pub use value::Value; pub use params::params_from_iter; use crate::params::*; +use std::fmt::Debug; use std::num::NonZero; use std::rc::Rc; use std::sync::{Arc, Mutex}; @@ -55,6 +56,7 @@ impl Builder { } } +#[derive(Clone)] pub struct Database { inner: Arc, } @@ -62,6 +64,12 @@ pub struct Database { unsafe impl Send for Database {} unsafe impl Sync for Database {} +impl Debug for Database { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Database").finish() + } +} + impl Database { pub fn connect(&self) -> Result { let conn = self.inner.connect()?; From 22fd3e9781fbec5433189ada5bd4e0fa78d73cb7 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Thu, 3 Apr 2025 15:22:09 -0400 Subject: [PATCH 005/425] Fix cli tests --- testing/cli_tests/extensions.py | 6 ++---- testing/cli_tests/test_limbo_cli.py | 1 - 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/testing/cli_tests/extensions.py b/testing/cli_tests/extensions.py index cb73aa760..058fe2e6b 100755 --- a/testing/cli_tests/extensions.py +++ b/testing/cli_tests/extensions.py @@ -338,16 +338,14 @@ def test_series(): def test_kv(): ext_path = "target/debug/liblimbo_ext_tests" - limbo = TestLimboShell() + limbo = TestLimboShell("") limbo.run_test_fn( "create virtual table t using kv_store;", lambda res: "Virtual table module not found: kv_store" in res, ) limbo.execute_dot(f".load {ext_path}") - limbo.run_test_fn( + limbo.debug_print( "create virtual table t using kv_store;", - null, - "can create kv_store vtable", ) limbo.run_test_fn( "insert into t values ('hello', 'world');", diff --git a/testing/cli_tests/test_limbo_cli.py b/testing/cli_tests/test_limbo_cli.py index 10e87869d..8b6a61375 100755 --- a/testing/cli_tests/test_limbo_cli.py +++ b/testing/cli_tests/test_limbo_cli.py @@ -111,7 +111,6 @@ class TestLimboShell: if init_commands is None: # Default initialization init_commands = """ -.open :memory: CREATE TABLE users (id INTEGER PRIMARY KEY, first_name TEXT, last_name TEXT, age INTEGER); CREATE TABLE products (id INTEGER PRIMARY KEY, name TEXT, price INTEGER); INSERT INTO users VALUES (1, 'Alice', 'Smith', 30), (2, 'Bob', 'Johnson', 25), From b47c214a5eed43a26fd3382e81ee517955112e1f Mon Sep 17 00:00:00 2001 From: Ihor Andrianov Date: Thu, 27 Mar 2025 21:48:32 +0200 Subject: [PATCH 006/425] fix aggregation functions without group by --- core/translate/emitter.rs | 8 ++++++++ core/translate/main_loop.rs | 19 ++++++++++++++++++- core/translate/result_row.rs | 4 +++- core/translate/subquery.rs | 1 + 4 files changed, 30 insertions(+), 2 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index e4d05bfaa..b473715fe 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -62,6 +62,8 @@ pub struct TranslateCtx<'a> { pub label_main_loop_end: Option, // First register of the aggregation results pub reg_agg_start: Option, + // Register to track if we set non aggregate cols to first encountered row in non group by agg statement + pub reg_agg_flag: Option, // First register of the result columns of the query pub reg_result_cols_start: Option, // The register holding the limit value, if any. @@ -115,6 +117,7 @@ fn prologue<'a>( labels_main_loop: (0..table_count).map(|_| LoopLabels::new(program)).collect(), label_main_loop_end: None, reg_agg_start: None, + reg_agg_flag: None, reg_limit: None, reg_offset: None, reg_limit_offset_sum: None, @@ -242,6 +245,11 @@ pub fn emit_query<'a>( target_pc: after_main_loop_label, }); } + if !plan.aggregates.is_empty() && plan.group_by.is_none() { + let flag = program.alloc_register(); + program.emit_int(0, flag); + t_ctx.reg_agg_flag = Some(flag); + } // Allocate registers for result columns t_ctx.reg_result_cols_start = Some(program.alloc_registers(plan.result_columns.len())); diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 7b51a2328..95601bb9a 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -677,6 +677,18 @@ fn emit_loop_source( &t_ctx.resolver, )?; } + + if let Some(flag) = t_ctx.reg_agg_flag { + let offset = program.offset().add(plan.result_columns.len() as u32); + + program.emit_insn(Insn::If { + reg: flag, + target_pc: offset, + jump_if_null: false, + }); + } + let col_start = t_ctx.reg_result_cols_start.unwrap(); + for (i, rc) in plan.result_columns.iter().enumerate() { if rc.contains_aggregates { // Do nothing, aggregates are computed above @@ -684,7 +696,9 @@ fn emit_loop_source( // it will be computed after the aggregations are finalized. continue; } - let reg = start_reg + num_aggs + i; + + let reg = col_start + i; + translate_expr( program, Some(&plan.table_references), @@ -693,6 +707,9 @@ fn emit_loop_source( &t_ctx.resolver, )?; } + if let Some(flag) = t_ctx.reg_agg_flag { + program.emit_int(1, flag); + } Ok(()) } LoopEmitTarget::QueryResult => { diff --git a/core/translate/result_row.rs b/core/translate/result_row.rs index ad8454c25..7988d0417 100644 --- a/core/translate/result_row.rs +++ b/core/translate/result_row.rs @@ -25,7 +25,9 @@ pub fn emit_select_result( } let start_reg = t_ctx.reg_result_cols_start.unwrap(); - for (i, rc) in plan.result_columns.iter().enumerate() { + for (i, rc) in plan.result_columns.iter().enumerate().filter(|(_, rc)| { + t_ctx.reg_agg_flag.is_some() && rc.contains_aggregates || t_ctx.reg_agg_flag.is_none() + }) { let reg = start_reg + i; translate_expr( program, diff --git a/core/translate/subquery.rs b/core/translate/subquery.rs index 1730312be..f81b20788 100644 --- a/core/translate/subquery.rs +++ b/core/translate/subquery.rs @@ -75,6 +75,7 @@ pub fn emit_subquery<'a>( meta_left_joins: (0..plan.table_references.len()).map(|_| None).collect(), meta_sort: None, reg_agg_start: None, + reg_agg_flag: None, reg_result_cols_start: None, result_column_indexes_in_orderby_sorter: (0..plan.result_columns.len()).collect(), result_columns_to_skip_in_orderby_sorter: None, From 352fa6fd34f42a4dfbe326f01ab7093ed63b0ac7 Mon Sep 17 00:00:00 2001 From: Ihor Andrianov Date: Thu, 27 Mar 2025 22:05:34 +0200 Subject: [PATCH 007/425] cargo fmt --- core/translate/main_loop.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 95601bb9a..da922ce20 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -677,7 +677,7 @@ fn emit_loop_source( &t_ctx.resolver, )?; } - + if let Some(flag) = t_ctx.reg_agg_flag { let offset = program.offset().add(plan.result_columns.len() as u32); From 36fe859d7d8e2752c049db0ca16db7aad1c234e9 Mon Sep 17 00:00:00 2001 From: Ihor Andrianov Date: Fri, 28 Mar 2025 00:52:53 +0200 Subject: [PATCH 008/425] create if only if non aggregate columns present --- core/translate/emitter.rs | 5 ++++- core/translate/main_loop.rs | 6 +++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index b473715fe..ef3c2b9ad 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -245,7 +245,10 @@ pub fn emit_query<'a>( target_pc: after_main_loop_label, }); } - if !plan.aggregates.is_empty() && plan.group_by.is_none() { + if !plan.aggregates.is_empty() + && plan.group_by.is_none() + && plan.result_columns.iter().any(|c| !c.contains_aggregates) + { let flag = program.alloc_register(); program.emit_int(0, flag); t_ctx.reg_agg_flag = Some(flag); diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index da922ce20..24b057219 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -678,12 +678,11 @@ fn emit_loop_source( )?; } + let if_label = program.allocate_label(); if let Some(flag) = t_ctx.reg_agg_flag { - let offset = program.offset().add(plan.result_columns.len() as u32); - program.emit_insn(Insn::If { reg: flag, - target_pc: offset, + target_pc: if_label, jump_if_null: false, }); } @@ -707,6 +706,7 @@ fn emit_loop_source( &t_ctx.resolver, )?; } + program.resolve_label(if_label, program.offset()); if let Some(flag) = t_ctx.reg_agg_flag { program.emit_int(1, flag); } From 4fd1dcdc73da1e075159c23c66acd9541f6ba3c3 Mon Sep 17 00:00:00 2001 From: Ihor Andrianov Date: Fri, 28 Mar 2025 01:06:09 +0200 Subject: [PATCH 009/425] small refine --- core/translate/main_loop.rs | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 24b057219..ea7b2f4ef 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -678,14 +678,18 @@ fn emit_loop_source( )?; } - let if_label = program.allocate_label(); - if let Some(flag) = t_ctx.reg_agg_flag { + let if_label = if let Some(flag) = t_ctx.reg_agg_flag { + let if_label = program.allocate_label(); program.emit_insn(Insn::If { reg: flag, target_pc: if_label, jump_if_null: false, }); - } + Some(if_label) + } else { + None + }; + let col_start = t_ctx.reg_result_cols_start.unwrap(); for (i, rc) in plan.result_columns.iter().enumerate() { @@ -706,10 +710,12 @@ fn emit_loop_source( &t_ctx.resolver, )?; } - program.resolve_label(if_label, program.offset()); - if let Some(flag) = t_ctx.reg_agg_flag { + if let Some(label) = if_label { + program.resolve_label(label, program.offset()); + let flag = t_ctx.reg_agg_flag.unwrap(); program.emit_int(1, flag); } + Ok(()) } LoopEmitTarget::QueryResult => { From 2bcdd4e4042972c951dea9da032b1a6090148b37 Mon Sep 17 00:00:00 2001 From: Ihor Andrianov Date: Sun, 30 Mar 2025 18:39:19 +0300 Subject: [PATCH 010/425] non group by cols are displayed in group by agg statements --- core/translate/emitter.rs | 4 +- core/translate/group_by.rs | 103 +++++++++++++++++++++++++++++------- core/translate/main_loop.rs | 41 +++++++++++--- 3 files changed, 120 insertions(+), 28 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index ef3c2b9ad..0eaeef58f 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -262,8 +262,8 @@ pub fn emit_query<'a>( init_order_by(program, t_ctx, order_by)?; } - if let Some(ref mut group_by) = plan.group_by { - init_group_by(program, t_ctx, group_by, &plan.aggregates)?; + if let Some(ref group_by) = plan.group_by { + init_group_by(program, t_ctx, group_by, &plan)?; } init_loop( program, diff --git a/core/translate/group_by.rs b/core/translate/group_by.rs index 13d860f16..86d6087e2 100644 --- a/core/translate/group_by.rs +++ b/core/translate/group_by.rs @@ -50,15 +50,22 @@ pub fn init_group_by( program: &mut ProgramBuilder, t_ctx: &mut TranslateCtx, group_by: &GroupBy, - aggregates: &[Aggregate], + plan: &SelectPlan, ) -> Result<()> { - let num_aggs = aggregates.len(); + let num_aggs = plan.aggregates.len(); + + // Calculate this count only once + let non_aggregate_count = plan + .result_columns + .iter() + .filter(|rc| !rc.contains_aggregates) + .count(); let sort_cursor = program.alloc_cursor_id(None, CursorType::Sorter); let reg_abort_flag = program.alloc_register(); let reg_group_exprs_cmp = program.alloc_registers(group_by.exprs.len()); - let reg_group_exprs_acc = program.alloc_registers(group_by.exprs.len()); + let reg_group_exprs_acc = program.alloc_registers(non_aggregate_count); let reg_agg_exprs_start = program.alloc_registers(num_aggs); let reg_sorter_key = program.alloc_register(); @@ -71,7 +78,7 @@ pub fn init_group_by( } program.emit_insn(Insn::SorterOpen { cursor_id: sort_cursor, - columns: aggregates.len() + group_by.exprs.len(), + columns: non_aggregate_count + plan.aggregates.len(), order: Record::new(order), }); @@ -156,14 +163,23 @@ pub fn emit_group_by<'a>( let group_by = plan.group_by.as_ref().unwrap(); + // Calculate these values once + let non_aggregate_count = plan + .result_columns + .iter() + .filter(|rc| !rc.contains_aggregates) + .count(); + + let agg_args_count = plan + .aggregates + .iter() + .map(|agg| agg.args.len()) + .sum::(); + // all group by columns and all arguments of agg functions are in the sorter. // the sort keys are the group by columns (the aggregation within groups is done based on how long the sort keys remain the same) - let sorter_column_count = group_by.exprs.len() - + plan - .aggregates - .iter() - .map(|agg| agg.args.len()) - .sum::(); + let sorter_column_count = non_aggregate_count + agg_args_count; + // sorter column names do not matter let ty = crate::schema::Type::Null; let pseudo_columns = (0..sorter_column_count) @@ -238,11 +254,6 @@ pub fn emit_group_by<'a>( }); // New group, move current group by columns into the comparison register - program.emit_insn(Insn::Move { - source_reg: groups_start_reg, - dest_reg: reg_group_exprs_cmp, - count: group_by.exprs.len(), - }); program.add_comment( program.offset(), @@ -253,6 +264,12 @@ pub fn emit_group_by<'a>( return_reg: reg_subrtn_acc_output_return_offset, }); + program.emit_insn(Insn::Move { + source_reg: groups_start_reg, + dest_reg: reg_group_exprs_cmp, + count: group_by.exprs.len(), + }); + program.add_comment(program.offset(), "check abort flag"); program.emit_insn(Insn::IfPos { reg: reg_abort_flag, @@ -269,7 +286,7 @@ pub fn emit_group_by<'a>( // Accumulate the values into the aggregations program.resolve_label(agg_step_label, program.offset()); let start_reg = t_ctx.reg_agg_start.unwrap(); - let mut cursor_index = group_by.exprs.len(); + let mut cursor_index = non_aggregate_count; for (i, agg) in plan.aggregates.iter().enumerate() { let agg_result_reg = start_reg + i; translate_aggregation_step_groupby( @@ -296,7 +313,7 @@ pub fn emit_group_by<'a>( }); // Read the group by columns for a finished group - for i in 0..group_by.exprs.len() { + for i in 0..non_aggregate_count { let key_reg = reg_group_exprs_acc + i; let sorter_column_index = i; program.emit_insn(Insn::Column { @@ -363,6 +380,12 @@ pub fn emit_group_by<'a>( }); } + // Cache expressions we need multiple times + let filtered_results = plan + .result_columns + .iter() + .filter(|rc| !rc.contains_aggregates) + .collect::>(); // we now have the group by columns in registers (group_exprs_start_register..group_exprs_start_register + group_by.len() - 1) // and the agg results in (agg_start_reg..agg_start_reg + aggregates.len() - 1) // we need to call translate_expr on each result column, but replace the expr with a register copy in case any part of the @@ -373,6 +396,24 @@ pub fn emit_group_by<'a>( .expr_to_reg_cache .push((expr, reg_group_exprs_acc + i)); } + + // Offset for the next expressions after group_by + let mut offset = group_by.exprs.len(); + + for rc in filtered_results.iter() { + let expr = &rc.expr; + + // skip cols that are already in group by + if !matches!(expr, ast::Expr::Column { .. }) + || !is_column_in_group_by(expr, &group_by.exprs) + { + t_ctx + .resolver + .expr_to_reg_cache + .push((expr, reg_group_exprs_acc + offset)); + offset += 1; + } + } for (i, agg) in plan.aggregates.iter().enumerate() { t_ctx .resolver @@ -420,7 +461,7 @@ pub fn emit_group_by<'a>( let start_reg = reg_group_exprs_acc; program.emit_insn(Insn::Null { dest: start_reg, - dest_end: Some(start_reg + group_by.exprs.len() + plan.aggregates.len() - 1), + dest_end: Some(start_reg + non_aggregate_count + plan.aggregates.len() - 1), }); program.emit_insn(Insn::Integer { @@ -668,3 +709,29 @@ pub fn translate_aggregation_step_groupby( }; Ok(dest) } + +pub fn is_column_in_group_by(expr: &ast::Expr, group_by_exprs: &[ast::Expr]) -> bool { + if let ast::Expr::Column { + database: _, + table: _, + column: col, + is_rowid_alias: _, + } = expr + { + group_by_exprs.iter().any(|ex| { + if let ast::Expr::Column { + database: _, + table: _, + column: group_col, + is_rowid_alias: _, + } = ex + { + col == group_col + } else { + false + } + }) + } else { + false + } +} diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index ea7b2f4ef..9c58e193d 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -15,6 +15,7 @@ use super::{ aggregation::translate_aggregation_step, emitter::{OperationMode, TranslateCtx}, expr::{translate_condition_expr, translate_expr, ConditionMetadata}, + group_by::is_column_in_group_by, order_by::{order_by_sorter_insert, sorter_insert}, plan::{ IterationDirection, Operation, Search, SelectPlan, SelectQueryType, TableReference, @@ -599,7 +600,12 @@ fn emit_loop_source( LoopEmitTarget::GroupBySorter => { let group_by = plan.group_by.as_ref().unwrap(); let aggregates = &plan.aggregates; - let sort_keys_count = group_by.exprs.len(); + let non_aggregate_columns = plan + .result_columns + .iter() + .filter(|rc| !rc.contains_aggregates) + .collect::>(); + let sort_keys_count = non_aggregate_columns.len(); let aggregate_arguments_count = plan .aggregates .iter() @@ -621,6 +627,25 @@ fn emit_loop_source( &t_ctx.resolver, )?; } + + if group_by.exprs.len() + aggregates.len() != plan.result_columns.len() { + for rc in non_aggregate_columns.iter() { + let expr = &rc.expr; + if !is_column_in_group_by(expr, &group_by.exprs) { + let key_reg = cur_reg; + cur_reg += 1; + translate_expr( + program, + Some(&plan.table_references), + expr, + key_reg, + &t_ctx.resolver, + )?; + } + } + } + // Process non-aggregate result columns that aren't already in group_by + // Then we have the aggregate arguments. for agg in aggregates.iter() { // Here we are collecting scalars for the group by sorter, which will include @@ -692,14 +717,14 @@ fn emit_loop_source( let col_start = t_ctx.reg_result_cols_start.unwrap(); - for (i, rc) in plan.result_columns.iter().enumerate() { - if rc.contains_aggregates { - // Do nothing, aggregates are computed above - // if this result column is e.g. something like sum(x) + 1 or length(sum(x)), we do not want to translate that (+1) or length() yet, - // it will be computed after the aggregations are finalized. - continue; - } + // Process only non-aggregate columns + let non_agg_columns = plan + .result_columns + .iter() + .enumerate() + .filter(|(_, rc)| !rc.contains_aggregates); + for (i, rc) in non_agg_columns { let reg = col_start + i; translate_expr( From 816cbacc9caa5fc820b356b611f93dd5a1ce4dee Mon Sep 17 00:00:00 2001 From: Ihor Andrianov Date: Mon, 31 Mar 2025 01:56:04 +0300 Subject: [PATCH 011/425] some smartie optimizations --- core/translate/group_by.rs | 27 +++---------------- core/translate/optimizer.rs | 53 +++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 23 deletions(-) diff --git a/core/translate/group_by.rs b/core/translate/group_by.rs index 86d6087e2..e00edf455 100644 --- a/core/translate/group_by.rs +++ b/core/translate/group_by.rs @@ -6,6 +6,7 @@ use crate::{ function::AggFunc, schema::{Column, PseudoTable}, types::{OwnedValue, Record}, + util::exprs_are_equivalent, vdbe::{ builder::{CursorType, ProgramBuilder}, insn::Insn, @@ -711,27 +712,7 @@ pub fn translate_aggregation_step_groupby( } pub fn is_column_in_group_by(expr: &ast::Expr, group_by_exprs: &[ast::Expr]) -> bool { - if let ast::Expr::Column { - database: _, - table: _, - column: col, - is_rowid_alias: _, - } = expr - { - group_by_exprs.iter().any(|ex| { - if let ast::Expr::Column { - database: _, - table: _, - column: group_col, - is_rowid_alias: _, - } = ex - { - col == group_col - } else { - false - } - }) - } else { - false - } + group_by_exprs + .iter() + .any(|expr2| exprs_are_equivalent(expr, expr2)) } diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 6fa7f9619..dfd7ab16d 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -4,6 +4,7 @@ use limbo_sqlite3_parser::ast; use crate::{ schema::{Index, Schema}, + util::exprs_are_equivalent, Result, }; @@ -43,6 +44,8 @@ fn optimize_select_plan(plan: &mut SelectPlan, schema: &Schema) -> Result<()> { eliminate_unnecessary_orderby(plan, schema)?; + eliminate_orderby_like_groupby(plan)?; + Ok(()) } @@ -117,6 +120,51 @@ fn query_is_already_ordered_by( } } +fn eliminate_orderby_like_groupby(plan: &mut SelectPlan) -> Result<()> { + if plan.order_by.is_none() | plan.group_by.is_none() { + return Ok(()); + } + if plan.table_references.len() == 0 { + return Ok(()); + } + + let o = plan.order_by.as_mut().unwrap(); + let g = plan.group_by.as_mut().unwrap(); + + let mut insert_pos = 0; + let mut i = 0; + + while i < o.len() { + let (key, order) = &o[i]; + + if matches!(order, Direction::Descending) { + i += 1; + continue; + } + if let Some(pos) = g + .exprs + .iter() + .position(|expr| exprs_are_equivalent(expr, key)) + { + if pos != insert_pos { + let mut current_pos = pos; + while current_pos > insert_pos { + g.exprs.swap(current_pos, current_pos - 1); + current_pos -= 1; + } + } + insert_pos += 1; + o.remove(i); + } else { + i += 1; + } + } + if o.is_empty() { + plan.order_by = None + } + Ok(()) +} + fn eliminate_unnecessary_orderby(plan: &mut SelectPlan, schema: &Schema) -> Result<()> { if plan.order_by.is_none() { return Ok(()); @@ -125,6 +173,11 @@ fn eliminate_unnecessary_orderby(plan: &mut SelectPlan, schema: &Schema) -> Resu return Ok(()); } + // if pk will be removed later + if plan.group_by.is_some() { + return Ok(()); + } + let o = plan.order_by.as_mut().unwrap(); if o.len() != 1 { From 91ceab16268e5c7063670e8e6c214d061a8bdbb0 Mon Sep 17 00:00:00 2001 From: Ihor Andrianov Date: Thu, 3 Apr 2025 16:53:13 +0300 Subject: [PATCH 012/425] improve naming and add comments for context --- core/translate/emitter.rs | 10 +++--- core/translate/group_by.rs | 41 ++++++++++-------------- core/translate/main_loop.rs | 8 ++--- core/translate/optimizer.rs | 62 ++++++++++++++++++++++++------------ core/translate/result_row.rs | 9 +++++- core/translate/subquery.rs | 2 +- 6 files changed, 78 insertions(+), 54 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 0eaeef58f..80a6db6d5 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -62,8 +62,10 @@ pub struct TranslateCtx<'a> { pub label_main_loop_end: Option, // First register of the aggregation results pub reg_agg_start: Option, - // Register to track if we set non aggregate cols to first encountered row in non group by agg statement - pub reg_agg_flag: Option, + // In non-group-by statements with aggregations (e.g. SELECT foo, bar, sum(baz) FROM t), + // we want to emit the non-aggregate columns (foo and bar) only once. + // This register is a flag that tracks whether we have already done that. + pub reg_nonagg_emit_once_flag: Option, // First register of the result columns of the query pub reg_result_cols_start: Option, // The register holding the limit value, if any. @@ -117,7 +119,7 @@ fn prologue<'a>( labels_main_loop: (0..table_count).map(|_| LoopLabels::new(program)).collect(), label_main_loop_end: None, reg_agg_start: None, - reg_agg_flag: None, + reg_nonagg_emit_once_flag: None, reg_limit: None, reg_offset: None, reg_limit_offset_sum: None, @@ -251,7 +253,7 @@ pub fn emit_query<'a>( { let flag = program.alloc_register(); program.emit_int(0, flag); - t_ctx.reg_agg_flag = Some(flag); + t_ctx.reg_nonagg_emit_once_flag = Some(flag); } // Allocate registers for result columns diff --git a/core/translate/group_by.rs b/core/translate/group_by.rs index e00edf455..ce268671d 100644 --- a/core/translate/group_by.rs +++ b/core/translate/group_by.rs @@ -38,8 +38,8 @@ pub struct GroupByMetadata { pub reg_sorter_key: usize, // Register holding a flag to abort the grouping process if necessary pub reg_abort_flag: usize, - // Register holding the start of the accumulator group registers (i.e. the groups, not the aggregates) - pub reg_group_exprs_acc: usize, + // Register holding the start of the non aggregate query members (all columns except aggregate arguments) + pub reg_non_aggregate_exprs_acc: usize, // Starting index of the register(s) that hold the comparison result between the current row and the previous row // The comparison result is used to determine if the current row belongs to the same group as the previous row // Each group by expression has a corresponding register @@ -55,7 +55,6 @@ pub fn init_group_by( ) -> Result<()> { let num_aggs = plan.aggregates.len(); - // Calculate this count only once let non_aggregate_count = plan .result_columns .iter() @@ -66,7 +65,7 @@ pub fn init_group_by( let reg_abort_flag = program.alloc_register(); let reg_group_exprs_cmp = program.alloc_registers(group_by.exprs.len()); - let reg_group_exprs_acc = program.alloc_registers(non_aggregate_count); + let reg_non_aggregate_exprs_acc = program.alloc_registers(non_aggregate_count); let reg_agg_exprs_start = program.alloc_registers(num_aggs); let reg_sorter_key = program.alloc_register(); @@ -118,7 +117,7 @@ pub fn init_group_by( label_acc_indicator_set_flag_true: program.allocate_label(), reg_subrtn_acc_clear_return_offset, reg_abort_flag, - reg_group_exprs_acc, + reg_non_aggregate_exprs_acc, reg_group_exprs_cmp, reg_sorter_key, }); @@ -154,7 +153,7 @@ pub fn emit_group_by<'a>( sort_cursor, reg_group_exprs_cmp, reg_subrtn_acc_clear_return_offset, - reg_group_exprs_acc, + reg_non_aggregate_exprs_acc, reg_abort_flag, reg_sorter_key, label_subrtn_acc_clear, @@ -164,7 +163,6 @@ pub fn emit_group_by<'a>( let group_by = plan.group_by.as_ref().unwrap(); - // Calculate these values once let non_aggregate_count = plan .result_columns .iter() @@ -177,7 +175,7 @@ pub fn emit_group_by<'a>( .map(|agg| agg.args.len()) .sum::(); - // all group by columns and all arguments of agg functions are in the sorter. + // all non-aggregate columns and all arguments of agg functions are in the sorter // the sort keys are the group by columns (the aggregation within groups is done based on how long the sort keys remain the same) let sorter_column_count = non_aggregate_count + agg_args_count; @@ -254,8 +252,6 @@ pub fn emit_group_by<'a>( target_pc_gt: program.offset().add(1u32), }); - // New group, move current group by columns into the comparison register - program.add_comment( program.offset(), "check if ended group had data, and output if so", @@ -265,6 +261,7 @@ pub fn emit_group_by<'a>( return_reg: reg_subrtn_acc_output_return_offset, }); + // New group, move current group by columns into the comparison register program.emit_insn(Insn::Move { source_reg: groups_start_reg, dest_reg: reg_group_exprs_cmp, @@ -313,9 +310,9 @@ pub fn emit_group_by<'a>( jump_if_null: false, }); - // Read the group by columns for a finished group + // Read the non-aggregate columns for a finished group for i in 0..non_aggregate_count { - let key_reg = reg_group_exprs_acc + i; + let key_reg = reg_non_aggregate_exprs_acc + i; let sorter_column_index = i; program.emit_insn(Insn::Column { cursor_id: pseudo_cursor, @@ -381,12 +378,10 @@ pub fn emit_group_by<'a>( }); } - // Cache expressions we need multiple times - let filtered_results = plan + let non_aggregate_result_columns = plan .result_columns .iter() - .filter(|rc| !rc.contains_aggregates) - .collect::>(); + .filter(|rc| !rc.contains_aggregates); // we now have the group by columns in registers (group_exprs_start_register..group_exprs_start_register + group_by.len() - 1) // and the agg results in (agg_start_reg..agg_start_reg + aggregates.len() - 1) // we need to call translate_expr on each result column, but replace the expr with a register copy in case any part of the @@ -395,23 +390,21 @@ pub fn emit_group_by<'a>( t_ctx .resolver .expr_to_reg_cache - .push((expr, reg_group_exprs_acc + i)); + .push((expr, reg_non_aggregate_exprs_acc + i)); } - // Offset for the next expressions after group_by + // Register offset for the non-aggregate expressions that are not part of GROUP BY let mut offset = group_by.exprs.len(); - for rc in filtered_results.iter() { + for rc in non_aggregate_result_columns { let expr = &rc.expr; // skip cols that are already in group by - if !matches!(expr, ast::Expr::Column { .. }) - || !is_column_in_group_by(expr, &group_by.exprs) - { + if !is_column_in_group_by(expr, &group_by.exprs) { t_ctx .resolver .expr_to_reg_cache - .push((expr, reg_group_exprs_acc + offset)); + .push((expr, reg_non_aggregate_exprs_acc + offset)); offset += 1; } } @@ -459,7 +452,7 @@ pub fn emit_group_by<'a>( program.add_comment(program.offset(), "clear accumulator subroutine start"); program.resolve_label(label_subrtn_acc_clear, program.offset()); - let start_reg = reg_group_exprs_acc; + let start_reg = reg_non_aggregate_exprs_acc; program.emit_insn(Insn::Null { dest: start_reg, dest_end: Some(start_reg + non_aggregate_count + plan.aggregates.len() - 1), diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 9c58e193d..5a3e1c126 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -628,6 +628,7 @@ fn emit_loop_source( )?; } + // Process non-aggregate result columns that aren't already in group_by if group_by.exprs.len() + aggregates.len() != plan.result_columns.len() { for rc in non_aggregate_columns.iter() { let expr = &rc.expr; @@ -644,7 +645,6 @@ fn emit_loop_source( } } } - // Process non-aggregate result columns that aren't already in group_by // Then we have the aggregate arguments. for agg in aggregates.iter() { @@ -703,7 +703,7 @@ fn emit_loop_source( )?; } - let if_label = if let Some(flag) = t_ctx.reg_agg_flag { + let label_emit_nonagg_only_once = if let Some(flag) = t_ctx.reg_nonagg_emit_once_flag { let if_label = program.allocate_label(); program.emit_insn(Insn::If { reg: flag, @@ -735,9 +735,9 @@ fn emit_loop_source( &t_ctx.resolver, )?; } - if let Some(label) = if_label { + if let Some(label) = label_emit_nonagg_only_once { program.resolve_label(label, program.offset()); - let flag = t_ctx.reg_agg_flag.unwrap(); + let flag = t_ctx.reg_nonagg_emit_once_flag.unwrap(); program.emit_int(1, flag); } diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index dfd7ab16d..5321e0fa0 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -128,38 +128,58 @@ fn eliminate_orderby_like_groupby(plan: &mut SelectPlan) -> Result<()> { return Ok(()); } - let o = plan.order_by.as_mut().unwrap(); - let g = plan.group_by.as_mut().unwrap(); + let order_by_clauses = plan.order_by.as_mut().unwrap(); + let group_by_clauses = plan.group_by.as_mut().unwrap(); - let mut insert_pos = 0; - let mut i = 0; + let mut group_by_insert_position = 0; + let mut order_index = 0; - while i < o.len() { - let (key, order) = &o[i]; + // This function optimizes query execution by eliminating duplicate expressions between ORDER BY and GROUP BY clauses + // When the same column appears in both clauses, we can avoid redundant sorting operations + // The function reorders GROUP BY expressions and removes redundant ORDER BY expressions to ensure consistent ordering + while order_index < order_by_clauses.len() { + let (order_expr, direction) = &order_by_clauses[order_index]; - if matches!(order, Direction::Descending) { - i += 1; + // Skip descending orders as they require separate sorting + if matches!(direction, Direction::Descending) { + order_index += 1; continue; } - if let Some(pos) = g + + // Check if the current ORDER BY expression matches any expression in the GROUP BY clause + if let Some(group_expr_position) = group_by_clauses .exprs .iter() - .position(|expr| exprs_are_equivalent(expr, key)) + .position(|expr| exprs_are_equivalent(expr, order_expr)) { - if pos != insert_pos { - let mut current_pos = pos; - while current_pos > insert_pos { - g.exprs.swap(current_pos, current_pos - 1); - current_pos -= 1; + // If we found a matching expression in GROUP BY, we need to ensure it's in the correct position + // to preserve the ordering specified by ORDER BY clauses + + // Move the matching GROUP BY expression to the current insertion position + // This effectively "bubbles up" the expression to maintain proper ordering + if group_expr_position != group_by_insert_position { + let mut current_position = group_expr_position; + + // Swap expressions to move the matching one to the correct position + while current_position > group_by_insert_position { + group_by_clauses + .exprs + .swap(current_position, current_position - 1); + current_position -= 1; } } - insert_pos += 1; - o.remove(i); + + group_by_insert_position += 1; + + // Remove this expression from ORDER BY since it's now handled by GROUP BY + order_by_clauses.remove(order_index); + // Note: We don't increment order_index here because removal shifts all elements } else { - i += 1; + // If not found in GROUP BY, move to next ORDER BY expression + order_index += 1; } } - if o.is_empty() { + if order_by_clauses.is_empty() { plan.order_by = None } Ok(()) @@ -173,7 +193,9 @@ fn eliminate_unnecessary_orderby(plan: &mut SelectPlan, schema: &Schema) -> Resu return Ok(()); } - // if pk will be removed later + // If GROUP BY clause is present, we can't rely on already ordered columns because GROUP BY reorders the data + // This early return prevents the elimination of ORDER BY when GROUP BY exists, as sorting must be applied after grouping + // And if ORDER BY clause duplicates GROUP BY we handle it later in fn eliminate_orderby_like_groupby if plan.group_by.is_some() { return Ok(()); } diff --git a/core/translate/result_row.rs b/core/translate/result_row.rs index 7988d0417..dc24cee67 100644 --- a/core/translate/result_row.rs +++ b/core/translate/result_row.rs @@ -26,7 +26,14 @@ pub fn emit_select_result( let start_reg = t_ctx.reg_result_cols_start.unwrap(); for (i, rc) in plan.result_columns.iter().enumerate().filter(|(_, rc)| { - t_ctx.reg_agg_flag.is_some() && rc.contains_aggregates || t_ctx.reg_agg_flag.is_none() + // For aggregate queries, we handle columns differently; example: select id, first_name, sum(age) from users limit 1; + // 1. Columns with aggregates (e.g., sum(age)) are computed in each iteration of aggregation + // 2. Non-aggregate columns (e.g., id, first_name) are only computed once in the first iteration + // This filter ensures we only emit expressions for non aggregate columns once, + // preserving previously calculated values while updating aggregate results + // For all other queries where reg_nonagg_emit_once_flag is none we do nothing. + t_ctx.reg_nonagg_emit_once_flag.is_some() && rc.contains_aggregates + || t_ctx.reg_nonagg_emit_once_flag.is_none() }) { let reg = start_reg + i; translate_expr( diff --git a/core/translate/subquery.rs b/core/translate/subquery.rs index f81b20788..87ddddd63 100644 --- a/core/translate/subquery.rs +++ b/core/translate/subquery.rs @@ -75,7 +75,7 @@ pub fn emit_subquery<'a>( meta_left_joins: (0..plan.table_references.len()).map(|_| None).collect(), meta_sort: None, reg_agg_start: None, - reg_agg_flag: None, + reg_nonagg_emit_once_flag: None, reg_result_cols_start: None, result_column_indexes_in_orderby_sorter: (0..plan.result_columns.len()).collect(), result_columns_to_skip_in_orderby_sorter: None, From 34a132fcd3845623031a7d17bde745b13f357ce0 Mon Sep 17 00:00:00 2001 From: Ihor Andrianov Date: Thu, 3 Apr 2025 21:14:22 +0300 Subject: [PATCH 013/425] fix output when group by is not part of resulting set --- core/translate/group_by.rs | 142 +++++++++++++++++++++++------------- core/translate/main_loop.rs | 78 ++++++++++++-------- 2 files changed, 139 insertions(+), 81 deletions(-) diff --git a/core/translate/group_by.rs b/core/translate/group_by.rs index ce268671d..a36e82864 100644 --- a/core/translate/group_by.rs +++ b/core/translate/group_by.rs @@ -1,4 +1,4 @@ -use std::rc::Rc; +use std::{collections::HashMap, rc::Rc}; use limbo_sqlite3_parser::ast; @@ -44,6 +44,9 @@ pub struct GroupByMetadata { // The comparison result is used to determine if the current row belongs to the same group as the previous row // Each group by expression has a corresponding register pub reg_group_exprs_cmp: usize, + // Columns that not part of GROUP BY clause and not arguments of Aggregation function. + // Heavy calculation and needed in different functions, so it is reasonable to do it once and save. + pub non_group_by_non_agg_column_count: Option, } /// Initialize resources needed for GROUP BY processing @@ -120,6 +123,7 @@ pub fn init_group_by( reg_non_aggregate_exprs_acc, reg_group_exprs_cmp, reg_sorter_key, + non_group_by_non_agg_column_count: None, }); Ok(()) } @@ -158,28 +162,58 @@ pub fn emit_group_by<'a>( reg_sorter_key, label_subrtn_acc_clear, label_acc_indicator_set_flag_true, + non_group_by_non_agg_column_count, .. } = *t_ctx.meta_group_by.as_mut().unwrap(); - let group_by = plan.group_by.as_ref().unwrap(); - let non_aggregate_count = plan - .result_columns - .iter() - .filter(|rc| !rc.contains_aggregates) - .count(); - let agg_args_count = plan .aggregates .iter() .map(|agg| agg.args.len()) .sum::(); + let group_by_count = group_by.exprs.len(); + let non_group_by_non_agg_column_count = non_group_by_non_agg_column_count.unwrap(); + // Count of GROUP BY columns that appear in the result set + let group_by_colls_in_result_set = + plan.result_columns.len() - non_group_by_non_agg_column_count - plan.aggregates.len(); - // all non-aggregate columns and all arguments of agg functions are in the sorter - // the sort keys are the group by columns (the aggregation within groups is done based on how long the sort keys remain the same) - let sorter_column_count = non_aggregate_count + agg_args_count; + // We have to know which group by expr present in resulting set + let group_by_expr_in_res_cols: Vec = group_by + .exprs + .iter() + .map(|expr| { + plan.result_columns + .iter() + .any(|e| exprs_are_equivalent(&e.expr, expr)) + }) + .collect(); - // sorter column names do not matter + // Create a map from sorter column index to result register + // This helps track where each column from the sorter should be stored + let mut column_register_mapping = HashMap::new(); + let mut next_reg = reg_non_aggregate_exprs_acc; + + // Map GROUP BY columns that are in the result set to registers + for (i, is_in_result) in group_by_expr_in_res_cols.iter().enumerate() { + if *is_in_result { + column_register_mapping.insert(i, next_reg); + next_reg += 1; + } + } + + // Handle other non-aggregate columns that aren't part of GROUP BY and not part of Aggregation function + for i in group_by_count..group_by_count + non_group_by_non_agg_column_count { + column_register_mapping.insert(i, next_reg); + next_reg += 1; + } + + // Calculate total number of columns in the sorter + // The sorter contains all GROUP BY columns, aggregate arguments, and other columns + let sorter_column_count = agg_args_count + group_by_count + non_group_by_non_agg_column_count; + + // Create pseudo-columns for the pseudo-table + // (these are placeholders as we only care about structure, not semantics) let ty = crate::schema::Type::Null; let pseudo_columns = (0..sorter_column_count) .map(|_| Column { @@ -193,7 +227,8 @@ pub fn emit_group_by<'a>( }) .collect::>(); - // A pseudo table is a "fake" table to which we read one row at a time from the sorter + // Create a pseudo-table to read one row at a time from the sorter + // This allows us to use standard table access operations on the sorted data let pseudo_table = Rc::new(PseudoTable { columns: pseudo_columns, }); @@ -281,10 +316,10 @@ pub fn emit_group_by<'a>( return_reg: reg_subrtn_acc_clear_return_offset, }); - // Accumulate the values into the aggregations + // Process each aggregate function for the current row program.resolve_label(agg_step_label, program.offset()); let start_reg = t_ctx.reg_agg_start.unwrap(); - let mut cursor_index = non_aggregate_count; + let mut cursor_index = group_by_count + non_group_by_non_agg_column_count; // Skipping all columns in sorter that not an aggregation arguments for (i, agg) in plan.aggregates.iter().enumerate() { let agg_result_reg = start_reg + i; translate_aggregation_step_groupby( @@ -299,7 +334,8 @@ pub fn emit_group_by<'a>( cursor_index += agg.args.len(); } - // We only emit the group by columns if we are going to start a new group (i.e. the prev group will not accumulate any more values into the aggregations) + // We only need to store non-aggregate columns once per group + // Skip if we've already stored them for this group program.add_comment( program.offset(), "don't emit group columns if continuing existing group", @@ -310,17 +346,16 @@ pub fn emit_group_by<'a>( jump_if_null: false, }); - // Read the non-aggregate columns for a finished group - for i in 0..non_aggregate_count { - let key_reg = reg_non_aggregate_exprs_acc + i; - let sorter_column_index = i; + // Read non-aggregate columns from the current row + for (sorter_column_index, dest_reg) in column_register_mapping.iter() { program.emit_insn(Insn::Column { cursor_id: pseudo_cursor, - column: sorter_column_index, - dest: key_reg, + column: *sorter_column_index, + dest: *dest_reg, }); } + // Mark that we've stored data for this group program.resolve_label(label_acc_indicator_set_flag_true, program.offset()); program.add_comment(program.offset(), "indicate data in accumulator"); program.emit_insn(Insn::Integer { @@ -328,6 +363,7 @@ pub fn emit_group_by<'a>( dest: reg_data_in_acc_flag, }); + // Continue to the next row in the sorter program.emit_insn(Insn::SorterNext { cursor_id: sort_cursor, pc_if_next: label_grouping_loop_start, @@ -355,18 +391,22 @@ pub fn emit_group_by<'a>( program.resolve_label(label_subrtn_acc_output, program.offset()); + // Only output a row if there's data in the accumulator program.add_comment(program.offset(), "output group by row subroutine start"); program.emit_insn(Insn::IfPos { reg: reg_data_in_acc_flag, target_pc: label_agg_final, decrement_by: 0, }); + + // If no data, return without outputting a row let group_by_end_without_emitting_row_label = program.allocate_label(); program.resolve_label(group_by_end_without_emitting_row_label, program.offset()); program.emit_insn(Insn::Return { return_reg: reg_subrtn_acc_output_return_offset, }); + // Finalize aggregate values for output let agg_start_reg = t_ctx.reg_agg_start.unwrap(); // Resolve the label for the start of the group by output row subroutine program.resolve_label(label_agg_final, program.offset()); @@ -378,36 +418,29 @@ pub fn emit_group_by<'a>( }); } - let non_aggregate_result_columns = plan - .result_columns - .iter() - .filter(|rc| !rc.contains_aggregates); - // we now have the group by columns in registers (group_exprs_start_register..group_exprs_start_register + group_by.len() - 1) - // and the agg results in (agg_start_reg..agg_start_reg + aggregates.len() - 1) - // we need to call translate_expr on each result column, but replace the expr with a register copy in case any part of the - // result column expression matches a) a group by column or b) an aggregation result. + // Map GROUP BY expressions to their registers in the result set for (i, expr) in group_by.exprs.iter().enumerate() { - t_ctx - .resolver - .expr_to_reg_cache - .push((expr, reg_non_aggregate_exprs_acc + i)); - } - - // Register offset for the non-aggregate expressions that are not part of GROUP BY - let mut offset = group_by.exprs.len(); - - for rc in non_aggregate_result_columns { - let expr = &rc.expr; - - // skip cols that are already in group by - if !is_column_in_group_by(expr, &group_by.exprs) { - t_ctx - .resolver - .expr_to_reg_cache - .push((expr, reg_non_aggregate_exprs_acc + offset)); - offset += 1; + if group_by_expr_in_res_cols[i] { + if let Some(reg) = &column_register_mapping.get(&i) { + t_ctx.resolver.expr_to_reg_cache.push((expr, **reg)); + } } } + + // Map non-aggregate, non-GROUP BY columns to their registers + let non_agg_cols = plan + .result_columns + .iter() + .filter(|rc| !rc.contains_aggregates && !is_column_in_group_by(&rc.expr, &group_by.exprs)); + + for (idx, rc) in non_agg_cols.enumerate() { + let sorter_idx = group_by_count + idx; + if let Some(®) = column_register_mapping.get(&sorter_idx) { + t_ctx.resolver.expr_to_reg_cache.push((&rc.expr, reg)); + } + } + + // Map aggregate expressions to their result registers for (i, agg) in plan.aggregates.iter().enumerate() { t_ctx .resolver @@ -450,12 +483,21 @@ pub fn emit_group_by<'a>( return_reg: reg_subrtn_acc_output_return_offset, }); + // Subroutine to clear accumulators for a new group program.add_comment(program.offset(), "clear accumulator subroutine start"); program.resolve_label(label_subrtn_acc_clear, program.offset()); let start_reg = reg_non_aggregate_exprs_acc; + + // Reset all accumulator registers to NULL program.emit_insn(Insn::Null { dest: start_reg, - dest_end: Some(start_reg + non_aggregate_count + plan.aggregates.len() - 1), + dest_end: Some( + start_reg + + non_group_by_non_agg_column_count + + group_by_colls_in_result_set + + plan.aggregates.len() + - 1, + ), }); program.emit_insn(Insn::Integer { diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 5a3e1c126..50f5a948c 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -598,24 +598,48 @@ fn emit_loop_source( ) -> Result<()> { match emit_target { LoopEmitTarget::GroupBySorter => { + // This function creates a sorter for GROUP BY operations by allocating registers and + // translating expressions for three types of columns: + // 1) GROUP BY columns (used as sorting keys) + // 2) non-aggregate, non-GROUP BY columns + // 3) aggregate function arguments let group_by = plan.group_by.as_ref().unwrap(); let aggregates = &plan.aggregates; - let non_aggregate_columns = plan + + // Identify columns in the result set that are neither in GROUP BY nor contain aggregates + let non_group_by_non_agg_expr = plan .result_columns .iter() - .filter(|rc| !rc.contains_aggregates) + .filter(|rc| { + !rc.contains_aggregates && !is_column_in_group_by(&rc.expr, &group_by.exprs) + }) + .map(|rc| &rc.expr) .collect::>(); - let sort_keys_count = non_aggregate_columns.len(); + + // Store the count of non-GROUP BY, non-aggregate columns in the metadata + // This will be used later during aggregation processing + t_ctx.meta_group_by.as_mut().map(|meta| { + meta.non_group_by_non_agg_column_count = Some(non_group_by_non_agg_expr.len()); + meta + }); + + // Calculate the total number of arguments used across all aggregate functions let aggregate_arguments_count = plan .aggregates .iter() .map(|agg| agg.args.len()) .sum::(); - let column_count = sort_keys_count + aggregate_arguments_count; + + // Calculate total number of registers needed for all columns in the sorter + let column_count = + group_by.exprs.len() + aggregate_arguments_count + non_group_by_non_agg_expr.len(); + + // Allocate a contiguous block of registers for all columns let start_reg = program.alloc_registers(column_count); let mut cur_reg = start_reg; - // The group by sorter rows will contain the grouping keys first. They are also the sort keys. + // Step 1: Process GROUP BY columns first + // These will be the first columns in the sorter and serve as sort keys for expr in group_by.exprs.iter() { let key_reg = cur_reg; cur_reg += 1; @@ -628,32 +652,27 @@ fn emit_loop_source( )?; } - // Process non-aggregate result columns that aren't already in group_by - if group_by.exprs.len() + aggregates.len() != plan.result_columns.len() { - for rc in non_aggregate_columns.iter() { - let expr = &rc.expr; - if !is_column_in_group_by(expr, &group_by.exprs) { - let key_reg = cur_reg; - cur_reg += 1; - translate_expr( - program, - Some(&plan.table_references), - expr, - key_reg, - &t_ctx.resolver, - )?; - } - } + // Step 2: Process columns that aren't part of GROUP BY and don't contain aggregates + // Example: SELECT col1, col2, SUM(col3) FROM table GROUP BY col1 + // Here col2 would be processed in this loop if it's in the result set + for expr in non_group_by_non_agg_expr.iter() { + let key_reg = cur_reg; + cur_reg += 1; + translate_expr( + program, + Some(&plan.table_references), + expr, + key_reg, + &t_ctx.resolver, + )?; } - // Then we have the aggregate arguments. + // Step 3: Process arguments for all aggregate functions + // For each aggregate, translate all its argument expressions for agg in aggregates.iter() { - // Here we are collecting scalars for the group by sorter, which will include - // both the group by expressions and the aggregate arguments. - // e.g. in `select u.first_name, sum(u.age) from users group by u.first_name` - // the sorter will have two scalars: u.first_name and u.age. - // these are then sorted by u.first_name, and for each u.first_name, we sum the u.age. - // the actual aggregation is done later. + // For a query like: SELECT group_col, SUM(val1), AVG(val2) FROM table GROUP BY group_col + // we'll process val1 and val2 here, storing them in the sorter so they're available + // when computing the aggregates after sorting by group_col for expr in agg.args.iter() { let agg_reg = cur_reg; cur_reg += 1; @@ -667,9 +686,6 @@ fn emit_loop_source( } } - // TODO: although it's less often useful, SQLite does allow for expressions in the SELECT that are not part of a GROUP BY or aggregate. - // We currently ignore those and only emit the GROUP BY keys and aggregate arguments. This should be fixed. - let group_by_metadata = t_ctx.meta_group_by.as_ref().unwrap(); sorter_insert( From 5632b15a449cb1e291d7ca114575dfcb2608358a Mon Sep 17 00:00:00 2001 From: Ihor Andrianov Date: Thu, 3 Apr 2025 21:51:10 +0300 Subject: [PATCH 014/425] add tests --- testing/agg-functions.test | 21 +++++++++++++++++++++ testing/testing | 0 2 files changed, 21 insertions(+) create mode 100644 testing/testing diff --git a/testing/agg-functions.test b/testing/agg-functions.test index 52cf2865c..f1a85dde5 100755 --- a/testing/agg-functions.test +++ b/testing/agg-functions.test @@ -99,6 +99,27 @@ do_execsql_test select-agg-binary-unary-positive { SELECT min(age) + +max(age) FROM users; } {101} +do_execsql_test select-non-agg-cols-should-be-not-null { + SELECT id, first_name, sum(age) FROM users LIMIT 1; +} {1|Jamie|503960} + +do_execsql_test select-with-group-by-and-agg-1 { + SELECT id, first_name, avg(age) FROM users group by last_name limit 1; +} {274|Debra|66.25} + +do_execsql_test select-with-group-by-and-agg-2 { + select first_name, last_name from users where state = 'AL' group by last_name limit 10; +} {Jay|Acosta +Daniel|Adams +Aaron|Baker +Sharon|Becker +Kim|Berg +Donald|Bishop +Brian|Bradford +Jesus|Bradley +John|Brown +Hunter|Burke} + do_execsql_test select-agg-json-array { SELECT json_group_array(name) FROM products; } {["hat","cap","shirt","sweater","sweatshirt","shorts","jeans","sneakers","boots","coat","accessories"]} diff --git a/testing/testing b/testing/testing new file mode 100644 index 000000000..e69de29bb From d4b8fa17f8612a65f5aa5c659a04015cbb5ebfd3 Mon Sep 17 00:00:00 2001 From: Ihor Andrianov Date: Thu, 3 Apr 2025 22:06:46 +0300 Subject: [PATCH 015/425] fix tests --- core/translate/group_by.rs | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/core/translate/group_by.rs b/core/translate/group_by.rs index a36e82864..5adcf658f 100644 --- a/core/translate/group_by.rs +++ b/core/translate/group_by.rs @@ -174,9 +174,6 @@ pub fn emit_group_by<'a>( .sum::(); let group_by_count = group_by.exprs.len(); let non_group_by_non_agg_column_count = non_group_by_non_agg_column_count.unwrap(); - // Count of GROUP BY columns that appear in the result set - let group_by_colls_in_result_set = - plan.result_columns.len() - non_group_by_non_agg_column_count - plan.aggregates.len(); // We have to know which group by expr present in resulting set let group_by_expr_in_res_cols: Vec = group_by @@ -492,10 +489,7 @@ pub fn emit_group_by<'a>( program.emit_insn(Insn::Null { dest: start_reg, dest_end: Some( - start_reg - + non_group_by_non_agg_column_count - + group_by_colls_in_result_set - + plan.aggregates.len() + start_reg + non_group_by_non_agg_column_count + group_by_count + plan.aggregates.len() - 1, ), }); From ae2be302047268852ffa6c59f7a1f39e5b4ab67a Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Thu, 3 Apr 2025 20:22:14 -0400 Subject: [PATCH 016/425] Move init label to proper place in create vtab translation --- core/translate/schema.rs | 5 ++--- core/util.rs | 2 +- core/vdbe/execute.rs | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/core/translate/schema.rs b/core/translate/schema.rs index a887bdef8..29cf29644 100644 --- a/core/translate/schema.rs +++ b/core/translate/schema.rs @@ -462,7 +462,8 @@ pub fn translate_create_virtual_table( approx_num_insns: 40, approx_num_labels: 2, }); - + let init_label = program.emit_init(); + let start_offset = program.offset(); let module_name_reg = program.emit_string8_new_reg(module_name_str.clone()); let table_name_reg = program.emit_string8_new_reg(table_name.clone()); @@ -520,8 +521,6 @@ pub fn translate_create_virtual_table( where_clause: parse_schema_where_clause, }); - let init_label = program.emit_init(); - let start_offset = program.offset(); program.emit_halt(); program.resolve_label(init_label, program.offset()); program.emit_transaction(true); diff --git a/core/util.rs b/core/util.rs index 13b53bcb3..f17699233 100644 --- a/core/util.rs +++ b/core/util.rs @@ -58,7 +58,7 @@ pub fn parse_schema_rows( "table" => { let root_page: i64 = row.get::(3)?; let sql: &str = row.get::<&str>(4)?; - if root_page == 0 && sql.to_lowercase().contains("virtual") { + if root_page == 0 && sql.to_lowercase().contains("create virtual") { let name: &str = row.get::<&str>(1)?; let vtab = syms.vtabs.get(name).unwrap().clone(); schema.add_virtual_table(vtab); diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 31a64d491..3c511a0db 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -4110,7 +4110,7 @@ pub fn op_parse_schema( let conn = program.connection.upgrade(); let conn = conn.as_ref().unwrap(); let stmt = conn.prepare(format!( - "SELECT * FROM sqlite_schema WHERE {}", + "SELECT * FROM sqlite_schema WHERE {}", where_clause ))?; let mut schema = conn.schema.write(); From 97c68f905a4368f75f182786315ece5b2c54ed15 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Thu, 3 Apr 2025 20:22:47 -0400 Subject: [PATCH 017/425] Move test back to original setup to ensure issue is solved --- testing/cli_tests/extensions.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/testing/cli_tests/extensions.py b/testing/cli_tests/extensions.py index 058fe2e6b..d898908f9 100755 --- a/testing/cli_tests/extensions.py +++ b/testing/cli_tests/extensions.py @@ -338,7 +338,10 @@ def test_series(): def test_kv(): ext_path = "target/debug/liblimbo_ext_tests" - limbo = TestLimboShell("") + limbo = TestLimboShell() + # first, create a normal table to ensure no issues + limbo.execute_dot("CREATE TABLE other (a,b,c);") + limbo.execute_dot("INSERT INTO other values (23,32,23);") limbo.run_test_fn( "create virtual table t using kv_store;", lambda res: "Virtual table module not found: kv_store" in res, From 38d842d675bd9d5ff1bd009da4116ea63514c6ed Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Fri, 4 Apr 2025 00:56:54 -0300 Subject: [PATCH 018/425] docs/insn: Standardizes comments for insn to doc comments --- core/vdbe/insn.rs | 143 +++++++++++++++++++++++----------------------- 1 file changed, 72 insertions(+), 71 deletions(-) diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index ab9013de4..f45e7ce35 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -40,102 +40,102 @@ impl CmpInsFlags { #[derive(Description, Debug)] pub enum Insn { - // Initialize the program state and jump to the given PC. + /// Initialize the program state and jump to the given PC. Init { target_pc: BranchOffset, }, - // Write a NULL into register dest. If dest_end is Some, then also write NULL into register dest_end and every register in between dest and dest_end. If dest_end is not set, then only register dest is set to NULL. + /// Write a NULL into register dest. If dest_end is Some, then also write NULL into register dest_end and every register in between dest and dest_end. If dest_end is not set, then only register dest is set to NULL. Null { dest: usize, dest_end: Option, }, - // Move the cursor P1 to a null row. Any Column operations that occur while the cursor is on the null row will always write a NULL. + /// Move the cursor P1 to a null row. Any Column operations that occur while the cursor is on the null row will always write a NULL. NullRow { cursor_id: CursorID, }, - // Add two registers and store the result in a third register. + /// Add two registers and store the result in a third register. Add { lhs: usize, rhs: usize, dest: usize, }, - // Subtract rhs from lhs and store in dest + /// Subtract rhs from lhs and store in dest Subtract { lhs: usize, rhs: usize, dest: usize, }, - // Multiply two registers and store the result in a third register. + /// Multiply two registers and store the result in a third register. Multiply { lhs: usize, rhs: usize, dest: usize, }, - // Divide lhs by rhs and store the result in a third register. + /// Divide lhs by rhs and store the result in a third register. Divide { lhs: usize, rhs: usize, dest: usize, }, - // Compare two vectors of registers in reg(P1)..reg(P1+P3-1) (call this vector "A") and in reg(P2)..reg(P2+P3-1) ("B"). Save the result of the comparison for use by the next Jump instruct. + /// Compare two vectors of registers in reg(P1)..reg(P1+P3-1) (call this vector "A") and in reg(P2)..reg(P2+P3-1) ("B"). Save the result of the comparison for use by the next Jump instruct. Compare { start_reg_a: usize, start_reg_b: usize, count: usize, }, - // Place the result of rhs bitwise AND lhs in third register. + /// Place the result of rhs bitwise AND lhs in third register. BitAnd { lhs: usize, rhs: usize, dest: usize, }, - // Place the result of rhs bitwise OR lhs in third register. + /// Place the result of rhs bitwise OR lhs in third register. BitOr { lhs: usize, rhs: usize, dest: usize, }, - // Place the result of bitwise NOT register P1 in dest register. + /// Place the result of bitwise NOT register P1 in dest register. BitNot { reg: usize, dest: usize, }, - // Checkpoint the database (applying wal file content to database file). + /// Checkpoint the database (applying wal file content to database file). Checkpoint { database: usize, // checkpoint database P1 checkpoint_mode: CheckpointMode, // P2 checkpoint mode dest: usize, // P3 checkpoint result }, - // Divide lhs by rhs and place the remainder in dest register. + /// Divide lhs by rhs and place the remainder in dest register. Remainder { lhs: usize, rhs: usize, dest: usize, }, - // Jump to the instruction at address P1, P2, or P3 depending on whether in the most recent Compare instruction the P1 vector was less than, equal to, or greater than the P2 vector, respectively. + /// Jump to the instruction at address P1, P2, or P3 depending on whether in the most recent Compare instruction the P1 vector was less than, equal to, or greater than the P2 vector, respectively. Jump { target_pc_lt: BranchOffset, target_pc_eq: BranchOffset, target_pc_gt: BranchOffset, }, - // Move the P3 values in register P1..P1+P3-1 over into registers P2..P2+P3-1. Registers P1..P1+P3-1 are left holding a NULL. It is an error for register ranges P1..P1+P3-1 and P2..P2+P3-1 to overlap. It is an error for P3 to be less than 1. + /// Move the P3 values in register P1..P1+P3-1 over into registers P2..P2+P3-1. Registers P1..P1+P3-1 are left holding a NULL. It is an error for register ranges P1..P1+P3-1 and P2..P2+P3-1 to overlap. It is an error for P3 to be less than 1. Move { source_reg: usize, dest_reg: usize, count: usize, }, - // If the given register is a positive integer, decrement it by decrement_by and jump to the given PC. + /// If the given register is a positive integer, decrement it by decrement_by and jump to the given PC. IfPos { reg: usize, target_pc: BranchOffset, decrement_by: usize, }, - // If the given register is not NULL, jump to the given PC. + /// If the given register is not NULL, jump to the given PC. NotNull { reg: usize, target_pc: BranchOffset, }, - // Compare two registers and jump to the given PC if they are equal. + /// Compare two registers and jump to the given PC if they are equal. Eq { lhs: usize, rhs: usize, @@ -149,7 +149,7 @@ pub enum Insn { /// This flag indicates that if either is null we should still jump. flags: CmpInsFlags, }, - // Compare two registers and jump to the given PC if they are not equal. + /// Compare two registers and jump to the given PC if they are not equal. Ne { lhs: usize, rhs: usize, @@ -159,7 +159,7 @@ pub enum Insn { /// jump_if_null jumps if either of the operands is null. Used for "jump when false" logic. flags: CmpInsFlags, }, - // Compare two registers and jump to the given PC if the left-hand side is less than the right-hand side. + /// Compare two registers and jump to the given PC if the left-hand side is less than the right-hand side. Lt { lhs: usize, rhs: usize, @@ -175,7 +175,7 @@ pub enum Insn { /// jump_if_null: Jump if either of the operands is null. Used for "jump when false" logic. flags: CmpInsFlags, }, - // Compare two registers and jump to the given PC if the left-hand side is greater than the right-hand side. + /// Compare two registers and jump to the given PC if the left-hand side is greater than the right-hand side. Gt { lhs: usize, rhs: usize, @@ -183,7 +183,7 @@ pub enum Insn { /// jump_if_null: Jump if either of the operands is null. Used for "jump when false" logic. flags: CmpInsFlags, }, - // Compare two registers and jump to the given PC if the left-hand side is greater than or equal to the right-hand side. + /// Compare two registers and jump to the given PC if the left-hand side is greater than or equal to the right-hand side. Ge { lhs: usize, rhs: usize, @@ -205,13 +205,13 @@ pub enum Insn { /// P3. If r\[reg\] is null, jump iff r\[jump_if_null\] != 0 jump_if_null: bool, }, - // Open a cursor for reading. + /// Open a cursor for reading. OpenReadAsync { cursor_id: CursorID, root_page: PageIdx, }, - // Await for the completion of open cursor. + /// Await for the completion of open cursor. OpenReadAwait, /// Open a cursor for a virtual table. @@ -260,19 +260,19 @@ pub enum Insn { pc_if_next: BranchOffset, }, - // Open a cursor for a pseudo-table that contains a single row. + /// Open a cursor for a pseudo-table that contains a single row. OpenPseudo { cursor_id: CursorID, content_reg: usize, num_fields: usize, }, - // Rewind the cursor to the beginning of the B-Tree. + /// Rewind the cursor to the beginning of the B-Tree. RewindAsync { cursor_id: CursorID, }, - // Await for the completion of cursor rewind. + /// Await for the completion of cursor rewind. RewindAwait { cursor_id: CursorID, pc_if_empty: BranchOffset, @@ -287,32 +287,32 @@ pub enum Insn { pc_if_empty: BranchOffset, }, - // Read a column from the current row of the cursor. + /// Read a column from the current row of the cursor. Column { cursor_id: CursorID, column: usize, dest: usize, }, - // Make a record and write it to destination register. + /// Make a record and write it to destination register. MakeRecord { start_reg: usize, // P1 count: usize, // P2 dest_reg: usize, // P3 }, - // Emit a row of results. + /// Emit a row of results. ResultRow { start_reg: usize, // P1 count: usize, // P2 }, - // Advance the cursor to the next row. + /// Advance the cursor to the next row. NextAsync { cursor_id: CursorID, }, - // Await for the completion of cursor advance. + /// Await for the completion of cursor advance. NextAwait { cursor_id: CursorID, pc_if_next: BranchOffset, @@ -327,91 +327,91 @@ pub enum Insn { pc_if_next: BranchOffset, }, - // Halt the program. + /// Halt the program. Halt { err_code: usize, description: String, }, - // Start a transaction. + /// Start a transaction. Transaction { write: bool, }, - // Set database auto-commit mode and potentially rollback. + /// Set database auto-commit mode and potentially rollback. AutoCommit { auto_commit: bool, rollback: bool, }, - // Branch to the given PC. + /// Branch to the given PC. Goto { target_pc: BranchOffset, }, - // Stores the current program counter into register 'return_reg' then jumps to address target_pc. + /// Stores the current program counter into register 'return_reg' then jumps to address target_pc. Gosub { target_pc: BranchOffset, return_reg: usize, }, - // Returns to the program counter stored in register 'return_reg'. + /// Returns to the program counter stored in register 'return_reg'. Return { return_reg: usize, }, - // Write an integer value into a register. + /// Write an integer value into a register. Integer { value: i64, dest: usize, }, - // Write a float value into a register + /// Write a float value into a register Real { value: f64, dest: usize, }, - // If register holds an integer, transform it to a float + /// If register holds an integer, transform it to a float RealAffinity { register: usize, }, - // Write a string value into a register. + /// Write a string value into a register. String8 { value: String, dest: usize, }, - // Write a blob value into a register. + /// Write a blob value into a register. Blob { value: Vec, dest: usize, }, - // Read the rowid of the current row. + /// Read the rowid of the current row. RowId { cursor_id: CursorID, dest: usize, }, - // Seek to a rowid in the cursor. If not found, jump to the given PC. Otherwise, continue to the next instruction. + /// Seek to a rowid in the cursor. If not found, jump to the given PC. Otherwise, continue to the next instruction. SeekRowid { cursor_id: CursorID, src_reg: usize, target_pc: BranchOffset, }, - // P1 is an open index cursor and P3 is a cursor on the corresponding table. This opcode does a deferred seek of the P3 table cursor to the row that corresponds to the current row of P1. - // This is a deferred seek. Nothing actually happens until the cursor is used to read a record. That way, if no reads occur, no unnecessary I/O happens. + /// P1 is an open index cursor and P3 is a cursor on the corresponding table. This opcode does a deferred seek of the P3 table cursor to the row that corresponds to the current row of P1. + /// This is a deferred seek. Nothing actually happens until the cursor is used to read a record. That way, if no reads occur, no unnecessary I/O happens. DeferredSeek { index_cursor_id: CursorID, table_cursor_id: CursorID, }, - // If cursor_id refers to an SQL table (B-Tree that uses integer keys), use the value in start_reg as the key. - // If cursor_id refers to an SQL index, then start_reg is the first in an array of num_regs registers that are used as an unpacked index key. - // Seek to the first index entry that is greater than or equal to the given key. If not found, jump to the given PC. Otherwise, continue to the next instruction. + /// If cursor_id refers to an SQL table (B-Tree that uses integer keys), use the value in start_reg as the key. + /// If cursor_id refers to an SQL index, then start_reg is the first in an array of num_regs registers that are used as an unpacked index key. + /// Seek to the first index entry that is greater than or equal to the given key. If not found, jump to the given PC. Otherwise, continue to the next instruction. SeekGE { is_index: bool, cursor_id: CursorID, @@ -420,9 +420,9 @@ pub enum Insn { target_pc: BranchOffset, }, - // If cursor_id refers to an SQL table (B-Tree that uses integer keys), use the value in start_reg as the key. - // If cursor_id refers to an SQL index, then start_reg is the first in an array of num_regs registers that are used as an unpacked index key. - // Seek to the first index entry that is greater than the given key. If not found, jump to the given PC. Otherwise, continue to the next instruction. + /// If cursor_id refers to an SQL table (B-Tree that uses integer keys), use the value in start_reg as the key. + /// If cursor_id refers to an SQL index, then start_reg is the first in an array of num_regs registers that are used as an unpacked index key. + /// Seek to the first index entry that is greater than the given key. If not found, jump to the given PC. Otherwise, continue to the next instruction. SeekGT { is_index: bool, cursor_id: CursorID, @@ -431,8 +431,8 @@ pub enum Insn { target_pc: BranchOffset, }, - // The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. - // If the P1 index entry is greater or equal than the key value then jump to P2. Otherwise fall through to the next instruction. + /// The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. + /// If the P1 index entry is greater or equal than the key value then jump to P2. Otherwise fall through to the next instruction. IdxGE { cursor_id: CursorID, start_reg: usize, @@ -440,8 +440,8 @@ pub enum Insn { target_pc: BranchOffset, }, - // The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. - // If the P1 index entry is greater than the key value then jump to P2. Otherwise fall through to the next instruction. + /// The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. + /// If the P1 index entry is greater than the key value then jump to P2. Otherwise fall through to the next instruction. IdxGT { cursor_id: CursorID, start_reg: usize, @@ -449,8 +449,8 @@ pub enum Insn { target_pc: BranchOffset, }, - // The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. - // If the P1 index entry is lesser or equal than the key value then jump to P2. Otherwise fall through to the next instruction. + /// The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. + /// If the P1 index entry is lesser or equal than the key value then jump to P2. Otherwise fall through to the next instruction. IdxLE { cursor_id: CursorID, start_reg: usize, @@ -458,8 +458,8 @@ pub enum Insn { target_pc: BranchOffset, }, - // The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. - // If the P1 index entry is lesser than the key value then jump to P2. Otherwise fall through to the next instruction. + /// The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. + /// If the P1 index entry is lesser than the key value then jump to P2. Otherwise fall through to the next instruction. IdxLT { cursor_id: CursorID, start_reg: usize, @@ -467,7 +467,7 @@ pub enum Insn { target_pc: BranchOffset, }, - // Decrement the given register and jump to the given PC if the result is zero. + /// Decrement the given register and jump to the given PC if the result is zero. DecrJumpZero { reg: usize, target_pc: BranchOffset, @@ -485,39 +485,39 @@ pub enum Insn { func: AggFunc, }, - // Open a sorter. + /// Open a sorter. SorterOpen { cursor_id: CursorID, // P1 columns: usize, // P2 order: Record, // P4. 0 if ASC and 1 if DESC }, - // Insert a row into the sorter. + /// Insert a row into the sorter. SorterInsert { cursor_id: CursorID, record_reg: usize, }, - // Sort the rows in the sorter. + /// Sort the rows in the sorter. SorterSort { cursor_id: CursorID, pc_if_empty: BranchOffset, }, - // Retrieve the next row from the sorter. + /// Retrieve the next row from the sorter. SorterData { cursor_id: CursorID, // P1 dest_reg: usize, // P2 pseudo_cursor: usize, // P3 }, - // Advance to the next row in the sorter. + /// Advance to the next row in the sorter. SorterNext { cursor_id: CursorID, pc_if_next: BranchOffset, }, - // Function + /// Function Function { constant_mask: i32, // P1 start_reg: usize, // P2, start of argument registers @@ -618,7 +618,7 @@ pub enum Insn { is_temp: usize, }, - // Drop a table + /// Drop a table DropTable { /// The database within which this b-tree needs to be dropped (P1). db: usize, @@ -648,14 +648,14 @@ pub enum Insn { where_clause: String, }, - // Place the result of lhs >> rhs in dest register. + /// Place the result of lhs >> rhs in dest register. ShiftRight { lhs: usize, rhs: usize, dest: usize, }, - // Place the result of lhs << rhs in dest register. + /// Place the result of lhs << rhs in dest register. ShiftLeft { lhs: usize, rhs: usize, @@ -697,6 +697,7 @@ pub enum Insn { rhs: usize, dest: usize, }, + /// Do nothing. Continue downward to the next opcode. Noop, /// Write the current number of pages in database P1 to memory cell P2. PageCount { From 43daba9942e8611b01d0d0883865e0f61d072b6e Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Fri, 4 Apr 2025 01:32:13 -0300 Subject: [PATCH 019/425] core/translate: Add support for default values in INSERT statements --- core/translate/insert.rs | 8 ++++++- testing/all.test | 1 + testing/default_value.test | 43 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 testing/default_value.test diff --git a/core/translate/insert.rs b/core/translate/insert.rs index 53368d30b..5fda098e6 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -297,6 +297,8 @@ struct ColumnMapping<'a> { /// If Some(i), use the i-th value from the VALUES tuple /// If None, use NULL (column was not specified in INSERT statement) value_index: Option, + /// The default value for the column, if defined + default_value: Option<&'a Expr>, } /// Resolves how each column in a table should be populated during an INSERT. @@ -352,6 +354,7 @@ fn resolve_columns_for_insert<'a>( .map(|(i, col)| ColumnMapping { column: col, value_index: if i < num_values { Some(i) } else { None }, + default_value: col.default.as_ref(), }) .collect()); } @@ -362,6 +365,7 @@ fn resolve_columns_for_insert<'a>( .map(|col| ColumnMapping { column: col, value_index: None, + default_value: col.default.as_ref(), }) .collect(); @@ -423,8 +427,10 @@ fn populate_column_registers( if write_directly_to_rowid_reg { program.emit_insn(Insn::SoftNull { reg: target_reg }); } + } else if let Some(default_expr) = mapping.default_value { + translate_expr(program, None, default_expr, target_reg, resolver)?; } else { - // Column was not specified - use NULL if it is nullable, otherwise error + // Column was not specified as has no DEFAULT - use NULL if it is nullable, otherwise error // Rowid alias columns can be NULL because we will autogenerate a rowid in that case. let is_nullable = !mapping.column.primary_key || mapping.column.is_rowid_alias; if is_nullable { diff --git a/testing/all.test b/testing/all.test index 857224ef6..dc12c331a 100755 --- a/testing/all.test +++ b/testing/all.test @@ -28,3 +28,4 @@ source $testdir/scalar-functions-printf.test source $testdir/transactions.test source $testdir/update.test source $testdir/drop_table.test +source $testdir/default_value.test diff --git a/testing/default_value.test b/testing/default_value.test new file mode 100644 index 000000000..32a39144b --- /dev/null +++ b/testing/default_value.test @@ -0,0 +1,43 @@ +#!/usr/bin/env tclsh + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +do_execsql_test_on_specific_db {:memory:} default-value-text { + CREATE TABLE t1(x INTEGER PRIMARY KEY, y TEXT DEFAULT 'default_value'); + INSERT INTO t1 (x) VALUES (1); + SELECT y FROM t1 WHERE x = 1; +} {default_value} + +do_execsql_test_on_specific_db {:memory:} default-value-integer { + CREATE TABLE t2(x INTEGER PRIMARY KEY, y INTEGER DEFAULT 42); + INSERT INTO t2 (x) VALUES (1); + SELECT y FROM t2 WHERE x = 1; +} {42} + +do_execsql_test_on_specific_db {:memory:} default-value-real { + CREATE TABLE t3(x INTEGER PRIMARY KEY, y REAL DEFAULT 3.14); + INSERT INTO t3 (x) VALUES (1); + SELECT y FROM t3 WHERE x = 1; +} {3.14} + +do_execsql_test_on_specific_db {:memory:} default-value-null { + CREATE TABLE t5(x INTEGER PRIMARY KEY, y TEXT DEFAULT NULL); + INSERT INTO t5 (x) VALUES (1); + SELECT y FROM t5 WHERE x = 1; +} {} + +do_execsql_test_on_specific_db {:memory:} default-value-boolean { + CREATE TABLE t6(x INTEGER PRIMARY KEY, y BOOLEAN DEFAULT 1); + INSERT INTO t6 (x) VALUES (1); + SELECT y FROM t6 WHERE x = 1; +} {1} + +do_execsql_test_on_specific_db {:memory:} default-value-function { + CREATE TABLE t7(x INTEGER PRIMARY KEY, y INTEGER DEFAULT (ABS(-5))); + INSERT INTO t7 (x) VALUES (1); + SELECT y FROM t7 WHERE x = 1; +} {5} + + + From fd3335908c9b3eeaec949ad43e2c5879850eb5fe Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 4 Apr 2025 13:01:57 -0300 Subject: [PATCH 020/425] basic autocomplete for dot commands --- Cargo.lock | 23 +++++++ cli/Cargo.toml | 17 +++--- cli/helper.rs | 161 +++++++++++++++++++++++++++++++++++-------------- 3 files changed, 147 insertions(+), 54 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c6e5eab5f..2e7a615c6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -397,6 +397,18 @@ dependencies = [ "strsim", ] +[[package]] +name = "clap_complete" +version = "4.5.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06f5378ea264ad4f82bbc826628b5aad714a75abf6ece087e923010eb937fb6" +dependencies = [ + "clap", + "clap_lex", + "is_executable", + "shlex", +] + [[package]] name = "clap_derive" version = "4.5.32" @@ -1400,6 +1412,15 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7655c9839580ee829dfacba1d1278c2b7883e50a277ff7541299489d6bdfdc45" +[[package]] +name = "is_executable" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4a1b5bad6f9072935961dfbf1cced2f3d129963d091b6f69f007fe04e758ae2" +dependencies = [ + "winapi", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.1" @@ -1644,6 +1665,7 @@ dependencies = [ "anyhow", "cfg-if", "clap", + "clap_complete", "comfy-table", "csv", "ctrlc", @@ -1653,6 +1675,7 @@ dependencies = [ "miette", "nu-ansi-term 0.50.1", "rustyline", + "shlex", "syntect", "tracing", "tracing-subscriber", diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 2a16f2dd0..ddd44519f 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -20,24 +20,26 @@ path = "main.rs" [dependencies] anyhow = "1.0.75" +cfg-if = "1.0.0" clap = { version = "4.5.31", features = ["derive"] } +clap_complete = { version = "=4.5.47", features = ["unstable-dynamic"] } comfy-table = "7.1.4" +csv = "1.3.1" +ctrlc = "3.4.4" dirs = "5.0.1" env_logger = "0.10.1" limbo_core = { path = "../core", default-features = true, features = [ "completion", ] } +miette = { version = "7.4.0", features = ["fancy"] } +nu-ansi-term = "0.50.1" rustyline = { version = "15.0.0", default-features = true, features = [ "derive", ] } -ctrlc = "3.4.4" -csv = "1.3.1" -miette = { version = "7.4.0", features = ["fancy"] } -cfg-if = "1.0.0" -tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } -tracing = "0.1.41" +shlex = "1.3.0" syntect = "5.2.0" -nu-ansi-term = "0.50.1" +tracing = "0.1.41" +tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } [features] @@ -46,4 +48,3 @@ io_uring = ["limbo_core/io_uring"] [build-dependencies] syntect = "5.2.0" - diff --git a/cli/helper.rs b/cli/helper.rs index 90549dd11..a464c3515 100644 --- a/cli/helper.rs +++ b/cli/helper.rs @@ -1,12 +1,18 @@ -use std::rc::Rc; -use std::sync::Arc; - +use clap::Parser; use limbo_core::{Connection, StepResult}; use nu_ansi_term::{Color, Style}; use rustyline::completion::{extract_word, Completer, Pair}; use rustyline::highlight::Highlighter; use rustyline::hint::HistoryHinter; use rustyline::{Completer, Helper, Hinter, Validator}; +use shlex::Shlex; +use std::cell::RefCell; +use std::marker::PhantomData; +use std::rc::Rc; +use std::sync::Arc; +use std::{ffi::OsString, path::PathBuf, str::FromStr as _}; + +use crate::commands::CommandParser; macro_rules! try_result { ($expr:expr, $err:expr) => { @@ -20,7 +26,7 @@ macro_rules! try_result { #[derive(Helper, Completer, Hinter, Validator)] pub struct LimboHelper { #[rustyline(Completer)] - completer: SqlCompleter, + completer: SqlCompleter, #[rustyline(Hinter)] hinter: HistoryHinter, } @@ -77,57 +83,72 @@ impl Highlighter for LimboHelper { } } -pub struct SqlCompleter { +pub struct SqlCompleter { conn: Rc, io: Arc, + // Has to be a ref cell as Rustyline takes immutable reference to self + // This problem would be solved with Reedline as it uses &mut self for completions + cmd: RefCell, + _cmd_phantom: PhantomData, } -impl SqlCompleter { +impl SqlCompleter { pub fn new(conn: Rc, io: Arc) -> Self { - Self { conn, io } - } -} - -// Got this from the FilenameCompleter. -// TODO have to see what chars break words in Sqlite -cfg_if::cfg_if! { - if #[cfg(unix)] { - // rl_basic_word_break_characters, rl_completer_word_break_characters - const fn default_break_chars(c : char) -> bool { - matches!(c, ' ' | '\t' | '\n' | '"' | '\\' | '\'' | '`' | '@' | '$' | '>' | '<' | '=' | ';' | '|' | '&' | - '{' | '(' | '\0') + Self { + conn, + io, + cmd: C::command().into(), + _cmd_phantom: PhantomData::default(), } - const ESCAPE_CHAR: Option = Some('\\'); - // In double quotes, not all break_chars need to be escaped - // https://www.gnu.org/software/bash/manual/html_node/Double-Quotes.html - #[allow(dead_code)] - const fn double_quotes_special_chars(c: char) -> bool { matches!(c, '"' | '$' | '\\' | '`') } - } else if #[cfg(windows)] { - // Remove \ to make file completion works on windows - const fn default_break_chars(c: char) -> bool { - matches!(c, ' ' | '\t' | '\n' | '"' | '\'' | '`' | '@' | '$' | '>' | '<' | '=' | ';' | '|' | '&' | '{' | - '(' | '\0') - } - const ESCAPE_CHAR: Option = None; - #[allow(dead_code)] - const fn double_quotes_special_chars(c: char) -> bool { c == '"' } // TODO Validate: only '"' ? - } else if #[cfg(target_arch = "wasm32")] { - const fn default_break_chars(c: char) -> bool { false } - const ESCAPE_CHAR: Option = None; - #[allow(dead_code)] - const fn double_quotes_special_chars(c: char) -> bool { false } } -} -impl Completer for SqlCompleter { - type Candidate = Pair; - - fn complete( + fn dot_completion( &self, - line: &str, - pos: usize, - _ctx: &rustyline::Context<'_>, - ) -> rustyline::Result<(usize, Vec)> { + mut line: &str, + mut pos: usize, + ) -> rustyline::Result<(usize, Vec)> { + line = &line[1..]; + pos = pos - 1; + + let (prefix_pos, _) = extract_word(line, pos, ESCAPE_CHAR, default_break_chars); + + let args = Shlex::new(line); + let mut args = std::iter::once("".to_owned()) + .chain(args) + .map(OsString::from) + .collect::>(); + if line.ends_with(' ') { + args.push(OsString::new()); + } + let arg_index = args.len() - 1; + // dbg!(&pos, line, &args, arg_index); + + let mut cmd = self.cmd.borrow_mut(); + match clap_complete::engine::complete( + &mut cmd, + args, + arg_index, + PathBuf::from_str(".").ok().as_deref(), + ) { + Ok(candidates) => { + let candidates = candidates + .iter() + .map(|candidate| Pair { + display: candidate.get_value().to_string_lossy().into_owned(), + replacement: candidate.get_value().to_string_lossy().into_owned(), + }) + .collect::>(); + + Ok((prefix_pos + 1, candidates)) + } + Err(e) => { + tracing::error!("Dot completion error: {e}"); + Ok((prefix_pos + 1, Vec::new())) + } + } + } + + fn sql_completion(&self, line: &str, pos: usize) -> rustyline::Result<(usize, Vec)> { // TODO: have to differentiate words if they are enclosed in single of double quotes let (prefix_pos, prefix) = extract_word(line, pos, ESCAPE_CHAR, default_break_chars); let mut candidates = Vec::new(); @@ -167,3 +188,51 @@ impl Completer for SqlCompleter { Ok((prefix_pos, candidates)) } } + +// Got this from the FilenameCompleter. +// TODO have to see what chars break words in Sqlite +cfg_if::cfg_if! { + if #[cfg(unix)] { + // rl_basic_word_break_characters, rl_completer_word_break_characters + const fn default_break_chars(c : char) -> bool { + matches!(c, ' ' | '\t' | '\n' | '"' | '\\' | '\'' | '`' | '@' | '$' | '>' | '<' | '=' | ';' | '|' | '&' | + '{' | '(' | '\0') + } + const ESCAPE_CHAR: Option = Some('\\'); + // In double quotes, not all break_chars need to be escaped + // https://www.gnu.org/software/bash/manual/html_node/Double-Quotes.html + #[allow(dead_code)] + const fn double_quotes_special_chars(c: char) -> bool { matches!(c, '"' | '$' | '\\' | '`') } + } else if #[cfg(windows)] { + // Remove \ to make file completion works on windows + const fn default_break_chars(c: char) -> bool { + matches!(c, ' ' | '\t' | '\n' | '"' | '\'' | '`' | '@' | '$' | '>' | '<' | '=' | ';' | '|' | '&' | '{' | + '(' | '\0') + } + const ESCAPE_CHAR: Option = None; + #[allow(dead_code)] + const fn double_quotes_special_chars(c: char) -> bool { c == '"' } // TODO Validate: only '"' ? + } else if #[cfg(target_arch = "wasm32")] { + const fn default_break_chars(c: char) -> bool { false } + const ESCAPE_CHAR: Option = None; + #[allow(dead_code)] + const fn double_quotes_special_chars(c: char) -> bool { false } + } +} + +impl Completer for SqlCompleter { + type Candidate = Pair; + + fn complete( + &self, + line: &str, + pos: usize, + _ctx: &rustyline::Context<'_>, + ) -> rustyline::Result<(usize, Vec)> { + if line.starts_with(".") { + self.dot_completion(line, pos) + } else { + self.sql_completion(line, pos) + } + } +} From f6a64a7b15df4e24acfd2afa357b2eda2d9cb678 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Fri, 4 Apr 2025 12:35:30 -0400 Subject: [PATCH 021/425] Support OFFSET clause for LIMIT in UPDATE queries --- core/translate/delete.rs | 4 +- core/translate/emitter.rs | 92 ++++++++++++++++++++++--------------- core/translate/main_loop.rs | 6 +-- core/translate/plan.rs | 2 +- core/translate/update.rs | 11 +++-- 5 files changed, 67 insertions(+), 48 deletions(-) diff --git a/core/translate/delete.rs b/core/translate/delete.rs index 1e0d64a98..ba841f3ff 100644 --- a/core/translate/delete.rs +++ b/core/translate/delete.rs @@ -86,7 +86,5 @@ pub fn prepare_delete_plan( fn estimate_num_instructions(plan: &DeletePlan) -> usize { let base = 20; - let num_instructions = base + plan.table_references.len() * 10; - - num_instructions + base + plan.table_references.len() * 10 } diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index e4d05bfaa..537a6ca16 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -442,25 +442,11 @@ fn emit_program_for_update( // Exit on LIMIT 0 if let Some(0) = plan.limit { - epilogue(program, init_label, start_offset, TransactionMode::Read)?; + epilogue(program, init_label, start_offset, TransactionMode::None)?; program.result_columns = plan.returning.unwrap_or_default(); program.table_references = plan.table_references; return Ok(()); } - let after_main_loop_label = program.allocate_label(); - t_ctx.label_main_loop_end = Some(after_main_loop_label); - if plan.contains_constant_false_condition { - program.emit_insn(Insn::Goto { - target_pc: after_main_loop_label, - }); - } - let skip_label = program.allocate_label(); - init_loop( - program, - &mut t_ctx, - &plan.table_references, - OperationMode::UPDATE, - )?; if t_ctx.reg_limit.is_none() && plan.limit.is_some() { let reg = program.alloc_register(); t_ctx.reg_limit = Some(reg); @@ -469,15 +455,43 @@ fn emit_program_for_update( dest: reg, }); program.mark_last_insn_constant(); + if t_ctx.reg_offset.is_none() && plan.offset.is_some_and(|n| n.ne(&0)) { + let reg = program.alloc_register(); + t_ctx.reg_offset = Some(reg); + program.emit_insn(Insn::Integer { + value: plan.offset.unwrap() as i64, + dest: reg, + }); + program.mark_last_insn_constant(); + let combined_reg = program.alloc_register(); + t_ctx.reg_limit_offset_sum = Some(combined_reg); + program.emit_insn(Insn::OffsetLimit { + limit_reg: t_ctx.reg_limit.unwrap(), + offset_reg: reg, + combined_reg, + }); + } } + let after_main_loop_label = program.allocate_label(); + t_ctx.label_main_loop_end = Some(after_main_loop_label); + if plan.contains_constant_false_condition { + program.emit_insn(Insn::Goto { + target_pc: after_main_loop_label, + }); + } + init_loop( + program, + &mut t_ctx, + &plan.table_references, + OperationMode::UPDATE, + )?; open_loop( program, &mut t_ctx, &plan.table_references, &plan.where_clause, )?; - emit_update_insns(&plan, &t_ctx, program)?; - program.resolve_label(skip_label, program.offset()); + emit_update_insns(&plan, &mut t_ctx, program)?; close_loop(program, &mut t_ctx, &plan.table_references)?; program.resolve_label(after_main_loop_label, program.offset()); @@ -491,10 +505,11 @@ fn emit_program_for_update( fn emit_update_insns( plan: &UpdatePlan, - t_ctx: &TranslateCtx, + t_ctx: &mut TranslateCtx, program: &mut ProgramBuilder, ) -> crate::Result<()> { let table_ref = &plan.table_references.first().unwrap(); + let loop_labels = t_ctx.labels_main_loop.first().unwrap(); let (cursor_id, index) = match &table_ref.op { Operation::Scan { .. } => (program.resolve_cursor_id(&table_ref.identifier), None), Operation::Search(search) => match search { @@ -508,24 +523,6 @@ fn emit_update_insns( }, _ => return Ok(()), }; - - for cond in plan.where_clause.iter().filter(|c| c.is_constant()) { - let jump_target = program.allocate_label(); - let meta = ConditionMetadata { - jump_if_condition_is_true: false, - jump_target_when_true: jump_target, - jump_target_when_false: t_ctx.label_main_loop_end.unwrap(), - }; - translate_condition_expr( - program, - &plan.table_references, - &cond.expr, - meta, - &t_ctx.resolver, - )?; - program.resolve_label(jump_target, program.offset()); - } - let first_col_reg = program.alloc_registers(table_ref.table.columns().len()); let rowid_reg = program.alloc_register(); program.emit_insn(Insn::RowId { cursor_id, @@ -537,6 +534,29 @@ fn emit_update_insns( target_pc: t_ctx.label_main_loop_end.unwrap(), }); + if let Some(offset) = t_ctx.reg_offset { + program.emit_insn(Insn::IfPos { + reg: offset, + target_pc: loop_labels.next, + decrement_by: 1, + }); + } + + for cond in plan.where_clause.iter().filter(|c| c.is_constant()) { + let meta = ConditionMetadata { + jump_if_condition_is_true: false, + jump_target_when_true: BranchOffset::Placeholder, + jump_target_when_false: loop_labels.next, + }; + translate_condition_expr( + program, + &plan.table_references, + &cond.expr, + meta, + &t_ctx.resolver, + )?; + } + let first_col_reg = program.alloc_registers(table_ref.table.columns().len()); // we scan a column at a time, loading either the column's values, or the new value // from the Set expression, into registers so we can emit a MakeRecord and update the row. for idx in 0..table_ref.columns().len() { diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 7b51a2328..0dca99b20 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -37,11 +37,11 @@ pub struct LeftJoinMetadata { #[derive(Debug, Clone, Copy)] pub struct LoopLabels { /// jump to the start of the loop body - loop_start: BranchOffset, + pub loop_start: BranchOffset, /// jump to the NextAsync instruction (or equivalent) - next: BranchOffset, + pub next: BranchOffset, /// jump to the end of the loop, exiting it - loop_end: BranchOffset, + pub loop_end: BranchOffset, } impl LoopLabels { diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 3d93548de..af14f0352 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -176,8 +176,8 @@ pub struct UpdatePlan { pub set_clauses: Vec<(usize, ast::Expr)>, pub where_clause: Vec, pub order_by: Option>, - // TODO: support OFFSET pub limit: Option, + pub offset: Option, // TODO: optional RETURNING clause pub returning: Option>, // whether the WHERE clause is always false diff --git a/core/translate/update.rs b/core/translate/update.rs index a9388ee13..4fd3385e0 100644 --- a/core/translate/update.rs +++ b/core/translate/update.rs @@ -157,11 +157,11 @@ pub fn prepare_update_plan(schema: &Schema, body: &mut Update) -> crate::Result< Some(&result_columns), &mut where_clause, )?; - let limit = if let Some(Ok((limit, _))) = body.limit.as_ref().map(|l| parse_limit(*l.clone())) { - limit - } else { - None - }; + let (limit, offset) = body + .limit + .as_ref() + .map(|l| parse_limit(*l.clone())) + .unwrap_or(Ok((None, None)))?; Ok(Plan::Update(UpdatePlan { table_references, set_clauses, @@ -169,6 +169,7 @@ pub fn prepare_update_plan(schema: &Schema, body: &mut Update) -> crate::Result< returning: Some(result_columns), order_by, limit, + offset, contains_constant_false_condition: false, })) } From 13e084351d094642008caafd0dc3d2eba2d60371 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Fri, 4 Apr 2025 12:38:18 -0400 Subject: [PATCH 022/425] Change parse_limit function to accept reference value to ast::Limit --- core/translate/delete.rs | 2 +- core/translate/planner.rs | 23 +++++++++++++---------- core/translate/select.rs | 2 +- core/translate/update.rs | 6 +++++- 4 files changed, 20 insertions(+), 13 deletions(-) diff --git a/core/translate/delete.rs b/core/translate/delete.rs index ba841f3ff..9652048fe 100644 --- a/core/translate/delete.rs +++ b/core/translate/delete.rs @@ -68,7 +68,7 @@ pub fn prepare_delete_plan( )?; // Parse the LIMIT/OFFSET clause - let (resolved_limit, resolved_offset) = limit.map_or(Ok((None, None)), |l| parse_limit(*l))?; + let (resolved_limit, resolved_offset) = limit.map_or(Ok((None, None)), |l| parse_limit(&l))?; let plan = DeletePlan { table_references, diff --git a/core/translate/planner.rs b/core/translate/planner.rs index 953a15e59..1b78c954c 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -850,30 +850,33 @@ fn parse_join<'a>( Ok(()) } -pub fn parse_limit(limit: Limit) -> Result<(Option, Option)> { - let offset_val = match limit.offset { +pub fn parse_limit(limit: &Limit) -> Result<(Option, Option)> { + let offset_val = match &limit.offset { Some(offset_expr) => match offset_expr { Expr::Literal(ast::Literal::Numeric(n)) => n.parse().ok(), // If OFFSET is negative, the result is as if OFFSET is zero - Expr::Unary(UnaryOperator::Negative, expr) => match *expr { - Expr::Literal(ast::Literal::Numeric(n)) => n.parse::().ok().map(|num| -num), - _ => crate::bail_parse_error!("Invalid OFFSET clause"), - }, + Expr::Unary(UnaryOperator::Negative, expr) => { + if let Expr::Literal(ast::Literal::Numeric(ref n)) = &**expr { + n.parse::().ok().map(|num| -num) + } else { + crate::bail_parse_error!("Invalid OFFSET clause"); + } + } _ => crate::bail_parse_error!("Invalid OFFSET clause"), }, None => Some(0), }; - if let Expr::Literal(ast::Literal::Numeric(n)) = limit.expr { + if let Expr::Literal(ast::Literal::Numeric(n)) = &limit.expr { Ok((n.parse().ok(), offset_val)) - } else if let Expr::Unary(UnaryOperator::Negative, expr) = limit.expr { - if let Expr::Literal(ast::Literal::Numeric(n)) = *expr { + } else if let Expr::Unary(UnaryOperator::Negative, expr) = &limit.expr { + if let Expr::Literal(ast::Literal::Numeric(n)) = &**expr { let limit_val = n.parse::().ok().map(|num| -num); Ok((limit_val, offset_val)) } else { crate::bail_parse_error!("Invalid LIMIT clause"); } - } else if let Expr::Id(id) = limit.expr { + } else if let Expr::Id(id) = &limit.expr { if id.0.eq_ignore_ascii_case("true") { Ok((Some(1), offset_val)) } else if id.0.eq_ignore_ascii_case("false") { diff --git a/core/translate/select.rs b/core/translate/select.rs index fe7656f16..bde61880f 100644 --- a/core/translate/select.rs +++ b/core/translate/select.rs @@ -370,7 +370,7 @@ pub fn prepare_select_plan<'a>( // Parse the LIMIT/OFFSET clause (plan.limit, plan.offset) = - select.limit.map_or(Ok((None, None)), |l| parse_limit(*l))?; + select.limit.map_or(Ok((None, None)), |l| parse_limit(&l))?; // Return the unoptimized query plan Ok(Plan::Select(plan)) diff --git a/core/translate/update.rs b/core/translate/update.rs index 4fd3385e0..f282fff24 100644 --- a/core/translate/update.rs +++ b/core/translate/update.rs @@ -151,17 +151,21 @@ pub fn prepare_update_plan(schema: &Schema, body: &mut Update) -> crate::Result< }) .collect() }); + // Parse the WHERE clause parse_where( body.where_clause.as_ref().map(|w| *w.clone()), &table_references, Some(&result_columns), &mut where_clause, )?; + + // Parse the LIMIT/OFFSET clause let (limit, offset) = body .limit .as_ref() - .map(|l| parse_limit(*l.clone())) + .map(|l| parse_limit(l)) .unwrap_or(Ok((None, None)))?; + Ok(Plan::Update(UpdatePlan { table_references, set_clauses, From c6c3f39959c59e2114aaedb2eb2e438d70051432 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Fri, 4 Apr 2025 12:49:12 -0400 Subject: [PATCH 023/425] Add test cases in python CLI tests for OFFSET on LIMIT clauses --- testing/cli_tests/cli_test_cases.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/testing/cli_tests/cli_test_cases.py b/testing/cli_tests/cli_test_cases.py index 120d20070..2e46b3f03 100755 --- a/testing/cli_tests/cli_test_cases.py +++ b/testing/cli_tests/cli_test_cases.py @@ -264,6 +264,32 @@ def test_update_with_limit(): limbo.quit() +def test_update_with_limit_and_offset(): + limbo = TestLimboShell( + "CREATE TABLE t (a,b,c); insert into t values (1,2,3), (4,5,6), (7,8,9), (1,2,3),(4,5,6), (7,8,9);" + ) + limbo.run_test("update-limit-offset", "UPDATE t SET a = 10 LIMIT 1 OFFSET 3;", "") + limbo.run_test( + "update-limit-offset-result", "SELECT COUNT(*) from t WHERE a = 10;", "1" + ) + limbo.run_test("update-limit-result", "SELECT a from t LIMIT 4;", "1\n4\n7\n10") + limbo.run_test( + "update-limit-offset-zero", "UPDATE t SET a = 100 LIMIT 0 OFFSET 0;", "" + ) + limbo.run_test( + "update-limit-zero-result", "SELECT COUNT(*) from t WHERE a = 100;", "0" + ) + limbo.run_test("update-limit-all", "UPDATE t SET a = 100 LIMIT -1 OFFSET 1;", "") + limbo.run_test("update-limit-result", "SELECT COUNT(*) from t WHERE a = 100;", "5") + limbo.run_test( + "udpate-limit-where", "UPDATE t SET a = 333 WHERE b = 5 LIMIT 1 OFFSET 2;", "" + ) + limbo.run_test( + "update-limit-where-result", "SELECT COUNT(*) from t WHERE a = 333;", "0" + ) + limbo.quit() + + if __name__ == "__main__": print("Running all Limbo CLI tests...") test_basic_queries() @@ -282,4 +308,5 @@ if __name__ == "__main__": test_import_csv_skip() test_table_patterns() test_update_with_limit() + test_update_with_limit_and_offset() print("All tests have passed") From e3985b699446c4a81925d1ee84578c45d67f541a Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Fri, 4 Apr 2025 12:51:37 -0400 Subject: [PATCH 024/425] Remove unused mut ref from emit_update_instructions for tx context --- core/translate/emitter.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 537a6ca16..fac46fee6 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -491,7 +491,7 @@ fn emit_program_for_update( &plan.table_references, &plan.where_clause, )?; - emit_update_insns(&plan, &mut t_ctx, program)?; + emit_update_insns(&plan, &t_ctx, program)?; close_loop(program, &mut t_ctx, &plan.table_references)?; program.resolve_label(after_main_loop_label, program.offset()); @@ -505,7 +505,7 @@ fn emit_program_for_update( fn emit_update_insns( plan: &UpdatePlan, - t_ctx: &mut TranslateCtx, + t_ctx: &TranslateCtx, program: &mut ProgramBuilder, ) -> crate::Result<()> { let table_ref = &plan.table_references.first().unwrap(); From 57af9c71ba4bb305eb71c6d7e34a56b43fec66cc Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 4 Apr 2025 17:39:09 -0300 Subject: [PATCH 025/425] customize completion candidates for some args --- cli/app.rs | 3 ++- cli/commands/args.rs | 40 +++++++++++++++++++++++++++++++++------- cli/commands/mod.rs | 3 --- cli/helper.rs | 1 + 4 files changed, 36 insertions(+), 11 deletions(-) diff --git a/cli/app.rs b/cli/app.rs index f82c587bd..40e187d43 100644 --- a/cli/app.rs +++ b/cli/app.rs @@ -467,7 +467,8 @@ impl<'a> Limbo<'a> { } match CommandParser::try_parse_from(args) { Err(err) => { - let _ = self.write_fmt(format_args!("{err}")); + // Let clap print with Styled Colors instead + let _ = err.print(); } Ok(cmd) => match cmd.command { Command::Exit(args) => { diff --git a/cli/commands/args.rs b/cli/commands/args.rs index e0fd10994..3bb78d0b8 100644 --- a/cli/commands/args.rs +++ b/cli/commands/args.rs @@ -1,6 +1,7 @@ use clap::{Args, ValueEnum}; +use clap_complete::{ArgValueCompleter, CompletionCandidate, PathCompleter}; -use crate::input::OutputMode; +use crate::{input::OutputMode, opcodes_dictionary::OPCODE_DESCRIPTIONS}; #[derive(Debug, Clone, Args)] pub struct ExitArgs { @@ -12,13 +13,17 @@ pub struct ExitArgs { #[derive(Debug, Clone, Args)] pub struct OpenArgs { /// Path to open database + #[arg(add = ArgValueCompleter::new(PathCompleter::file()))] pub path: String, + // TODO see how to have this completed with the output of List Vfs function + // Currently not possible to pass arbitrary /// Name of VFS pub vfs_name: Option, } #[derive(Debug, Clone, Args)] pub struct SchemaArgs { + // TODO depends on PRAGMA table_list for completions /// Table name to visualize schema pub table_name: Option, } @@ -26,6 +31,7 @@ pub struct SchemaArgs { #[derive(Debug, Clone, Args)] pub struct SetOutputArgs { /// File path to send output to + #[arg(add = ArgValueCompleter::new(PathCompleter::file()))] pub path: Option, } @@ -35,15 +41,40 @@ pub struct OutputModeArgs { pub mode: OutputMode, } +fn opcodes_completer(current: &std::ffi::OsStr) -> Vec { + let mut completions = vec![]; + + let Some(current) = current.to_str() else { + return completions; + }; + + let current = current.to_lowercase(); + + let opcodes = &OPCODE_DESCRIPTIONS; + + for op in opcodes { + // TODO if someone know how to do prefix_match with case insensitve in Rust + // without converting the String to lowercase first, please fix this. + let op_name = op.name.to_ascii_lowercase(); + if op_name.starts_with(¤t) { + completions.push(CompletionCandidate::new(op.name).help(Some(op.description.into()))); + } + } + + completions +} + #[derive(Debug, Clone, Args)] pub struct OpcodesArgs { /// Opcode to display description + #[arg(add = ArgValueCompleter::new(opcodes_completer))] pub opcode: Option, } #[derive(Debug, Clone, Args)] pub struct CwdArgs { /// Target directory + #[arg(add = ArgValueCompleter::new(PathCompleter::dir()))] pub directory: String, } @@ -72,11 +103,6 @@ pub struct TablesArgs { #[derive(Debug, Clone, Args)] pub struct LoadExtensionArgs { /// Path to extension file - pub path: String, -} - -#[derive(Debug, Clone, Args)] -pub struct ListVfsArgs { - /// Path to extension file + #[arg(add = ArgValueCompleter::new(PathCompleter::file()))] pub path: String, } diff --git a/cli/commands/mod.rs b/cli/commands/mod.rs index 33261a860..757cee530 100644 --- a/cli/commands/mod.rs +++ b/cli/commands/mod.rs @@ -35,9 +35,6 @@ pub enum Command { /// Open a database file #[command(display_name = ".open")] Open(OpenArgs), - /// Print this message or the help of the given subcommand(s) - // #[command(display_name = ".help")] - // Help, /// Display schema for a table #[command(display_name = ".schema")] Schema(SchemaArgs), diff --git a/cli/helper.rs b/cli/helper.rs index a464c3515..f0d396ae0 100644 --- a/cli/helper.rs +++ b/cli/helper.rs @@ -107,6 +107,7 @@ impl SqlCompleter { mut line: &str, mut pos: usize, ) -> rustyline::Result<(usize, Vec)> { + // TODO maybe check to see if the line is empty and then just output the command names line = &line[1..]; pos = pos - 1; From d5fa37ab66d3b0e91b0bdb85e2a9b374b2378d5e Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 4 Apr 2025 17:44:34 -0300 Subject: [PATCH 026/425] remove error debug --- cli/helper.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/cli/helper.rs b/cli/helper.rs index f0d396ae0..70194234d 100644 --- a/cli/helper.rs +++ b/cli/helper.rs @@ -142,10 +142,7 @@ impl SqlCompleter { Ok((prefix_pos + 1, candidates)) } - Err(e) => { - tracing::error!("Dot completion error: {e}"); - Ok((prefix_pos + 1, Vec::new())) - } + Err(_) => Ok((prefix_pos + 1, Vec::new())), } } From 907794cb0775b75720bc51472cecd5ca6ffc13b9 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 4 Apr 2025 19:04:42 -0300 Subject: [PATCH 027/425] add path completion for .import --- cli/commands/import.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cli/commands/import.rs b/cli/commands/import.rs index df33500dd..38ec5df45 100644 --- a/cli/commands/import.rs +++ b/cli/commands/import.rs @@ -1,4 +1,5 @@ use clap::Args; +use clap_complete::{ArgValueCompleter, PathCompleter}; use limbo_core::Connection; use std::{fs::File, io::Write, path::PathBuf, rc::Rc, sync::Arc}; @@ -13,6 +14,7 @@ pub struct ImportArgs { /// Skip the first N rows of input #[arg(long, default_value = "0")] skip: u64, + #[arg(add = ArgValueCompleter::new(PathCompleter::file()))] file: PathBuf, table: String, } From 3fa1cb56a514f84ae14909e255d0c634bf36d78e Mon Sep 17 00:00:00 2001 From: Hiroaki Yutani Date: Sat, 5 Apr 2025 09:10:48 +0900 Subject: [PATCH 028/425] Update cargo-dist and switch to Astral's version --- .github/workflows/release.yml | 72 ++++++++++++++++++++--------------- Cargo.toml | 26 ------------- dist-workspace.toml | 28 ++++++++++++++ 3 files changed, 69 insertions(+), 57 deletions(-) create mode 100644 dist-workspace.toml diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index daf032087..ca92fbd34 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,10 +1,12 @@ -# Copyright 2022-2024, axodotdev +# This file was autogenerated by dist: https://github.com/astral-sh/cargo-dist +# +# Copyright 2025 Astral Software Inc. # SPDX-License-Identifier: MIT or Apache-2.0 # # CI that: # # * checks for a Git Tag that looks like a release -# * builds artifacts with cargo-dist (archives, installers, hashes) +# * builds artifacts with dist (archives, installers, hashes) # * uploads those artifacts to temporary workflow zip # * on success, uploads the artifacts to a GitHub Release # @@ -24,10 +26,10 @@ permissions: # must be a Cargo-style SemVer Version (must have at least major.minor.patch). # # If PACKAGE_NAME is specified, then the announcement will be for that -# package (erroring out if it doesn't have the given version or isn't cargo-dist-able). +# package (erroring out if it doesn't have the given version or isn't dist-able). # # If PACKAGE_NAME isn't specified, then the announcement will be for all -# (cargo-dist-able) packages in the workspace with that version (this mode is +# (dist-able) packages in the workspace with that version (this mode is # intended for workspaces with only one dist-able package, or with all dist-able # packages versioned/released in lockstep). # @@ -45,9 +47,9 @@ on: - '**[0-9]+.[0-9]+.[0-9]+*' jobs: - # Run 'cargo dist plan' (or host) to determine what tasks we need to do + # Run 'dist plan' (or host) to determine what tasks we need to do plan: - runs-on: "ubuntu-20.04" + runs-on: "ubuntu-22.04" outputs: val: ${{ steps.plan.outputs.manifest }} tag: ${{ !github.event.pull_request && github.ref_name || '' }} @@ -59,16 +61,16 @@ jobs: - uses: actions/checkout@v4 with: submodules: recursive - - name: Install cargo-dist + - name: Install dist # we specify bash to get pipefail; it guards against the `curl` command # failing. otherwise `sh` won't catch that `curl` returned non-0 shell: bash - run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.21.0/cargo-dist-installer.sh | sh" - - name: Cache cargo-dist + run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/astral-sh/cargo-dist/releases/download/v0.28.3/cargo-dist-installer.sh | sh" + - name: Cache dist uses: actions/upload-artifact@v4 with: name: cargo-dist-cache - path: ~/.cargo/bin/cargo-dist + path: ~/.cargo/bin/dist # sure would be cool if github gave us proper conditionals... # so here's a doubly-nested ternary-via-truthiness to try to provide the best possible # functionality based on whether this is a pull_request, and whether it's from a fork. @@ -76,8 +78,8 @@ jobs: # but also really annoying to build CI around when it needs secrets to work right.) - id: plan run: | - cargo dist ${{ (!github.event.pull_request && format('host --steps=create --tag={0}', github.ref_name)) || 'plan' }} --output-format=json > plan-dist-manifest.json - echo "cargo dist ran successfully" + dist ${{ (!github.event.pull_request && format('host --steps=create --tag={0}', github.ref_name)) || 'plan' }} --output-format=json > plan-dist-manifest.json + echo "dist ran successfully" cat plan-dist-manifest.json echo "manifest=$(jq -c "." plan-dist-manifest.json)" >> "$GITHUB_OUTPUT" - name: "Upload dist-manifest.json" @@ -95,18 +97,19 @@ jobs: if: ${{ fromJson(needs.plan.outputs.val).ci.github.artifacts_matrix.include != null && (needs.plan.outputs.publishing == 'true' || fromJson(needs.plan.outputs.val).ci.github.pr_run_mode == 'upload') }} strategy: fail-fast: false - # Target platforms/runners are computed by cargo-dist in create-release. + # Target platforms/runners are computed by dist in create-release. # Each member of the matrix has the following arguments: # # - runner: the github runner - # - dist-args: cli flags to pass to cargo dist - # - install-dist: expression to run to install cargo-dist on the runner + # - dist-args: cli flags to pass to dist + # - install-dist: expression to run to install dist on the runner # # Typically there will be: # - 1 "global" task that builds universal installers # - N "local" tasks that build each platform's binaries and platform-specific installers matrix: ${{ fromJson(needs.plan.outputs.val).ci.github.artifacts_matrix }} runs-on: ${{ matrix.runner }} + container: ${{ matrix.container && matrix.container.image || null }} env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} BUILD_MANIFEST_NAME: target/distrib/${{ join(matrix.targets, '-') }}-dist-manifest.json @@ -117,8 +120,15 @@ jobs: - uses: actions/checkout@v4 with: submodules: recursive - - name: Install cargo-dist - run: ${{ matrix.install_dist }} + - name: Install Rust non-interactively if not already installed + if: ${{ matrix.container }} + run: | + if ! command -v cargo > /dev/null 2>&1; then + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + echo "$HOME/.cargo/bin" >> $GITHUB_PATH + fi + - name: Install dist + run: ${{ matrix.install_dist.run }} # Get the dist-manifest - name: Fetch local artifacts uses: actions/download-artifact@v4 @@ -132,10 +142,10 @@ jobs: - name: Build artifacts run: | # Actually do builds and make zips and whatnot - cargo dist build ${{ needs.plan.outputs.tag-flag }} --print=linkage --output-format=json ${{ matrix.dist_args }} > dist-manifest.json - echo "cargo dist ran successfully" + dist build ${{ needs.plan.outputs.tag-flag }} --print=linkage --output-format=json ${{ matrix.dist_args }} > dist-manifest.json + echo "dist ran successfully" - name: Attest - uses: actions/attest-build-provenance@v1 + uses: actions/attest-build-provenance@v2 with: subject-path: "target/distrib/*${{ join(matrix.targets, ', ') }}*" - id: cargo-dist @@ -147,7 +157,7 @@ jobs: run: | # Parse out what we just built and upload it to scratch storage echo "paths<> "$GITHUB_OUTPUT" - jq --raw-output ".upload_files[]" dist-manifest.json >> "$GITHUB_OUTPUT" + dist print-upload-files-from-manifest --manifest dist-manifest.json >> "$GITHUB_OUTPUT" echo "EOF" >> "$GITHUB_OUTPUT" cp dist-manifest.json "$BUILD_MANIFEST_NAME" @@ -164,7 +174,7 @@ jobs: needs: - plan - build-local-artifacts - runs-on: "ubuntu-20.04" + runs-on: "ubuntu-22.04" env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} BUILD_MANIFEST_NAME: target/distrib/global-dist-manifest.json @@ -172,12 +182,12 @@ jobs: - uses: actions/checkout@v4 with: submodules: recursive - - name: Install cached cargo-dist + - name: Install cached dist uses: actions/download-artifact@v4 with: name: cargo-dist-cache path: ~/.cargo/bin/ - - run: chmod +x ~/.cargo/bin/cargo-dist + - run: chmod +x ~/.cargo/bin/dist # Get all the local artifacts for the global tasks to use (for e.g. checksums) - name: Fetch local artifacts uses: actions/download-artifact@v4 @@ -188,8 +198,8 @@ jobs: - id: cargo-dist shell: bash run: | - cargo dist build ${{ needs.plan.outputs.tag-flag }} --output-format=json "--artifacts=global" > dist-manifest.json - echo "cargo dist ran successfully" + dist build ${{ needs.plan.outputs.tag-flag }} --output-format=json "--artifacts=global" > dist-manifest.json + echo "dist ran successfully" # Parse out what we just built and upload it to scratch storage echo "paths<> "$GITHUB_OUTPUT" @@ -214,19 +224,19 @@ jobs: if: ${{ always() && needs.plan.outputs.publishing == 'true' && (needs.build-global-artifacts.result == 'skipped' || needs.build-global-artifacts.result == 'success') && (needs.build-local-artifacts.result == 'skipped' || needs.build-local-artifacts.result == 'success') }} env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - runs-on: "ubuntu-20.04" + runs-on: "ubuntu-22.04" outputs: val: ${{ steps.host.outputs.manifest }} steps: - uses: actions/checkout@v4 with: submodules: recursive - - name: Install cached cargo-dist + - name: Install cached dist uses: actions/download-artifact@v4 with: name: cargo-dist-cache path: ~/.cargo/bin/ - - run: chmod +x ~/.cargo/bin/cargo-dist + - run: chmod +x ~/.cargo/bin/dist # Fetch artifacts from scratch-storage - name: Fetch artifacts uses: actions/download-artifact@v4 @@ -237,7 +247,7 @@ jobs: - id: host shell: bash run: | - cargo dist host ${{ needs.plan.outputs.tag-flag }} --steps=upload --steps=release --output-format=json > dist-manifest.json + dist host ${{ needs.plan.outputs.tag-flag }} --steps=upload --steps=release --output-format=json > dist-manifest.json echo "artifacts uploaded and released successfully" cat dist-manifest.json echo "manifest=$(jq -c "." dist-manifest.json)" >> "$GITHUB_OUTPUT" @@ -278,7 +288,7 @@ jobs: # still allowing individual publish jobs to skip themselves (for prereleases). # "host" however must run to completion, no skipping allowed! if: ${{ always() && needs.host.result == 'success' }} - runs-on: "ubuntu-20.04" + runs-on: "ubuntu-22.04" env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: diff --git a/Cargo.toml b/Cargo.toml index edbae0cec..ac7490880 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,32 +50,6 @@ limbo_sqlite3_parser = { path = "vendored/sqlite3-parser", version = "0.0.19-pre limbo_time = { path = "extensions/time", version = "0.0.19-pre.4" } limbo_uuid = { path = "extensions/uuid", version = "0.0.19-pre.4" } -# Config for 'cargo dist' -[workspace.metadata.dist] -# The preferred cargo-dist version to use in CI (Cargo.toml SemVer syntax) -cargo-dist-version = "0.21.0" -# CI backends to support -ci = "github" -# The installers to generate for each app -installers = ["shell", "powershell"] -# Target platforms to build apps for (Rust target-triple syntax) -targets = [ - "aarch64-apple-darwin", - "x86_64-apple-darwin", - "x86_64-unknown-linux-gnu", - "x86_64-pc-windows-msvc", -] -# Which actions to run on pull requests -pr-run-mode = "plan" -# Path that installers should place binaries in -install-path = "~/.limbo" -# Whether to install an updater program -install-updater = true -# Whether to consider the binaries in a package for distribution (defaults true) -dist = false -# Whether to enable GitHub Attestations -github-attestations = true - [profile.release] debug = "line-tables-only" codegen-units = 1 diff --git a/dist-workspace.toml b/dist-workspace.toml new file mode 100644 index 000000000..bdeb71f97 --- /dev/null +++ b/dist-workspace.toml @@ -0,0 +1,28 @@ +[workspace] +members = ["cargo:."] + +# Config for 'dist' +[dist] +# The preferred dist version to use in CI (Cargo.toml SemVer syntax) +cargo-dist-version = "0.28.3" +# CI backends to support +ci = "github" +# The installers to generate for each app +installers = ["shell", "powershell"] +# Target platforms to build apps for (Rust target-triple syntax) +targets = [ + "aarch64-apple-darwin", + "x86_64-apple-darwin", + "x86_64-unknown-linux-gnu", + "x86_64-pc-windows-msvc", +] +# Which actions to run on pull requests +pr-run-mode = "plan" +# Path that installers should place binaries in +install-path = "~/.limbo" +# Whether to install an updater program +install-updater = true +# Whether to consider the binaries in a package for distribution (defaults true) +dist = false +# Whether to enable GitHub Attestations +github-attestations = true From 0c9464e3fc3bc80a44a4ca01ab327865ad42b99a Mon Sep 17 00:00:00 2001 From: Ihor Andrianov Date: Sat, 5 Apr 2025 15:15:10 +0300 Subject: [PATCH 029/425] reduce vec allocations, add comments for magic ifs --- core/translate/emitter.rs | 6 +++- core/translate/group_by.rs | 56 ++++++++++++++++++++----------------- core/translate/main_loop.rs | 12 ++++---- 3 files changed, 40 insertions(+), 34 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 80a6db6d5..8422cea0f 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -247,12 +247,16 @@ pub fn emit_query<'a>( target_pc: after_main_loop_label, }); } + + // For non-grouped aggregation queries that also have non-aggregate columns, + // we need to ensure non-aggregate columns are only emitted once. + // This flag helps track whether we've already emitted these columns. if !plan.aggregates.is_empty() && plan.group_by.is_none() && plan.result_columns.iter().any(|c| !c.contains_aggregates) { let flag = program.alloc_register(); - program.emit_int(0, flag); + program.emit_int(0, flag); // Initialize flag to 0 (not yet emitted) t_ctx.reg_nonagg_emit_once_flag = Some(flag); } diff --git a/core/translate/group_by.rs b/core/translate/group_by.rs index 5adcf658f..68f732cbb 100644 --- a/core/translate/group_by.rs +++ b/core/translate/group_by.rs @@ -1,4 +1,4 @@ -use std::{collections::HashMap, rc::Rc}; +use std::rc::Rc; use limbo_sqlite3_parser::ast; @@ -176,32 +176,29 @@ pub fn emit_group_by<'a>( let non_group_by_non_agg_column_count = non_group_by_non_agg_column_count.unwrap(); // We have to know which group by expr present in resulting set - let group_by_expr_in_res_cols: Vec = group_by - .exprs - .iter() - .map(|expr| { - plan.result_columns - .iter() - .any(|e| exprs_are_equivalent(&e.expr, expr)) - }) - .collect(); + let group_by_expr_in_res_cols = group_by.exprs.iter().map(|expr| { + plan.result_columns + .iter() + .any(|e| exprs_are_equivalent(&e.expr, expr)) + }); // Create a map from sorter column index to result register // This helps track where each column from the sorter should be stored - let mut column_register_mapping = HashMap::new(); + let mut column_register_mapping = + vec![None; group_by_count + non_group_by_non_agg_column_count]; let mut next_reg = reg_non_aggregate_exprs_acc; // Map GROUP BY columns that are in the result set to registers - for (i, is_in_result) in group_by_expr_in_res_cols.iter().enumerate() { - if *is_in_result { - column_register_mapping.insert(i, next_reg); + for (i, is_in_result) in group_by_expr_in_res_cols.clone().enumerate() { + if is_in_result { + column_register_mapping[i] = Some(next_reg); next_reg += 1; } } // Handle other non-aggregate columns that aren't part of GROUP BY and not part of Aggregation function for i in group_by_count..group_by_count + non_group_by_non_agg_column_count { - column_register_mapping.insert(i, next_reg); + column_register_mapping[i] = Some(next_reg); next_reg += 1; } @@ -344,12 +341,14 @@ pub fn emit_group_by<'a>( }); // Read non-aggregate columns from the current row - for (sorter_column_index, dest_reg) in column_register_mapping.iter() { - program.emit_insn(Insn::Column { - cursor_id: pseudo_cursor, - column: *sorter_column_index, - dest: *dest_reg, - }); + for (sorter_column_index, dest_reg) in column_register_mapping.iter().enumerate() { + if let Some(dest_reg) = dest_reg { + program.emit_insn(Insn::Column { + cursor_id: pseudo_cursor, + column: sorter_column_index, + dest: *dest_reg, + }); + } } // Mark that we've stored data for this group @@ -416,10 +415,15 @@ pub fn emit_group_by<'a>( } // Map GROUP BY expressions to their registers in the result set - for (i, expr) in group_by.exprs.iter().enumerate() { - if group_by_expr_in_res_cols[i] { - if let Some(reg) = &column_register_mapping.get(&i) { - t_ctx.resolver.expr_to_reg_cache.push((expr, **reg)); + for (i, (expr, is_in_result)) in group_by + .exprs + .iter() + .zip(group_by_expr_in_res_cols) + .enumerate() + { + if is_in_result { + if let Some(reg) = &column_register_mapping.get(i).and_then(|opt| *opt) { + t_ctx.resolver.expr_to_reg_cache.push((expr, *reg)); } } } @@ -432,7 +436,7 @@ pub fn emit_group_by<'a>( for (idx, rc) in non_agg_cols.enumerate() { let sorter_idx = group_by_count + idx; - if let Some(®) = column_register_mapping.get(&sorter_idx) { + if let Some(reg) = column_register_mapping.get(sorter_idx).and_then(|opt| *opt) { t_ctx.resolver.expr_to_reg_cache.push((&rc.expr, reg)); } } diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 50f5a948c..f6f139d26 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -613,13 +613,12 @@ fn emit_loop_source( .filter(|rc| { !rc.contains_aggregates && !is_column_in_group_by(&rc.expr, &group_by.exprs) }) - .map(|rc| &rc.expr) - .collect::>(); - + .map(|rc| &rc.expr); + let non_agg_count = non_group_by_non_agg_expr.clone().count(); // Store the count of non-GROUP BY, non-aggregate columns in the metadata // This will be used later during aggregation processing t_ctx.meta_group_by.as_mut().map(|meta| { - meta.non_group_by_non_agg_column_count = Some(non_group_by_non_agg_expr.len()); + meta.non_group_by_non_agg_column_count = Some(non_agg_count); meta }); @@ -631,8 +630,7 @@ fn emit_loop_source( .sum::(); // Calculate total number of registers needed for all columns in the sorter - let column_count = - group_by.exprs.len() + aggregate_arguments_count + non_group_by_non_agg_expr.len(); + let column_count = group_by.exprs.len() + aggregate_arguments_count + non_agg_count; // Allocate a contiguous block of registers for all columns let start_reg = program.alloc_registers(column_count); @@ -655,7 +653,7 @@ fn emit_loop_source( // Step 2: Process columns that aren't part of GROUP BY and don't contain aggregates // Example: SELECT col1, col2, SUM(col3) FROM table GROUP BY col1 // Here col2 would be processed in this loop if it's in the result set - for expr in non_group_by_non_agg_expr.iter() { + for expr in non_group_by_non_agg_expr { let key_reg = cur_reg; cur_reg += 1; translate_expr( From 293974e692e63b3151a8c64005aba678fc596014 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sun, 30 Mar 2025 15:24:46 -0400 Subject: [PATCH 030/425] Update COMPAT.md --- COMPAT.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/COMPAT.md b/COMPAT.md index 7fff65b87..d81650502 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -56,7 +56,7 @@ Limbo aims to be fully compatible with SQLite, with opt-in features not supporte | ATTACH DATABASE | No | | | BEGIN TRANSACTION | Partial | Transaction names are not supported. | | COMMIT TRANSACTION | Partial | Transaction names are not supported. | -| CREATE INDEX | No | | +| CREATE INDEX | Yes | | | CREATE TABLE | Partial | | | CREATE TRIGGER | No | | | CREATE VIEW | No | | @@ -461,6 +461,8 @@ Modifiers: | IdxDelete | No | | | IdxGE | Yes | | | IdxInsert | No | | +| IdxInsertAsync | Yes | | +| IdxInsertAwait | Yes | | | IdxLE | Yes | | | IdxLT | Yes | | | IdxRowid | No | | From 4741544dfdadec0892e6e644fa1a4e164e611cc6 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sun, 30 Mar 2025 15:25:42 -0400 Subject: [PATCH 031/425] Add query translation for create index --- core/translate/index.rs | 321 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 321 insertions(+) create mode 100644 core/translate/index.rs diff --git a/core/translate/index.rs b/core/translate/index.rs new file mode 100644 index 000000000..6f1a02c3c --- /dev/null +++ b/core/translate/index.rs @@ -0,0 +1,321 @@ +use std::sync::Arc; + +use crate::{ + schema::{BTreeTable, Column, Index, IndexColumn, PseudoTable, Schema}, + types::Record, + util::normalize_ident, + vdbe::{ + builder::{CursorType, ProgramBuilder, QueryMode}, + insn::{IdxInsertFlags, Insn}, + }, + OwnedValue, +}; +use limbo_sqlite3_parser::ast::{self, Expr, Id, SortOrder, SortedColumn}; + +use super::schema::{emit_schema_entry, SchemaEntryType, SQLITE_TABLEID}; + +pub fn translate_create_index( + mode: QueryMode, + unique_if_not_exists: (bool, bool), + idx_name: &str, + tbl_name: &str, + columns: &[SortedColumn], + schema: &Schema, +) -> crate::Result { + let idx_name = normalize_ident(idx_name); + let tbl_name = normalize_ident(tbl_name); + let mut program = ProgramBuilder::new(crate::vdbe::builder::ProgramBuilderOpts { + query_mode: mode, + num_cursors: 5, + approx_num_insns: 40, + approx_num_labels: 5, + }); + + // Check if the index is being created on a valid btree table and + // the name is globally unique in the schema. + if !schema.is_unique_idx_name(&idx_name) { + crate::bail_parse_error!("Error: index with name '{idx_name}' already exists."); + } + let Some(tbl) = schema.tables.get(&tbl_name) else { + crate::bail_parse_error!("Error: table '{tbl_name}' does not exist."); + }; + let Some(tbl) = tbl.btree() else { + crate::bail_parse_error!("Error: table '{tbl_name}' is not a b-tree table."); + }; + let columns = resolve_sorted_columns(&tbl, columns)?; + + // Prologue: + let init_label = program.emit_init(); + let start_offset = program.offset(); + + let idx = Arc::new(Index { + name: idx_name.clone(), + table_name: tbl.name.clone(), + root_page: 0, // we dont have access till its created, after we parse the schema table + columns: columns + .iter() + .map(|c| IndexColumn { + name: c.0 .1.name.as_ref().unwrap().clone(), + order: c.1, + }) + .collect(), + unique: unique_if_not_exists.0, + }); + + // Allocate the necessary cursors. + // + // 1. sqlite_schema_cursor_id - for the sqlite_schema table + // 2. btree_cursor_id - for the index btree + // 3. table_cursor_id - for the table we are creating the index on + // 4. sorter_cursor_id - for the sorter + // 5. pseudo_cursor_id - for the pseudo table to store the sorted index values + let sqlite_table = schema.get_btree_table(SQLITE_TABLEID).unwrap(); + let sqlite_schema_cursor_id = program.alloc_cursor_id( + Some(SQLITE_TABLEID.to_owned()), + CursorType::BTreeTable(sqlite_table.clone()), + ); + let btree_cursor_id = program.alloc_cursor_id( + Some(idx_name.to_owned()), + CursorType::BTreeIndex(idx.clone()), + ); + let table_cursor_id = program.alloc_cursor_id( + Some(tbl_name.to_owned()), + CursorType::BTreeTable(tbl.clone()), + ); + let sorter_cursor_id = program.alloc_cursor_id(None, CursorType::Sorter); + let pseudo_table = PseudoTable::new_with_columns(tbl.columns.clone()); + let pseudo_cursor_id = program.alloc_cursor_id(None, CursorType::Pseudo(pseudo_table.into())); + + // Create a new B-Tree and store the root page index in a register + let root_page_reg = program.alloc_register(); + program.emit_insn(Insn::CreateBtree { + db: 0, + root: root_page_reg, + flags: 2, // index leaf + }); + + // open the sqlite schema table for writing and create a new entry for the index + program.emit_insn(Insn::OpenWriteAsync { + cursor_id: sqlite_schema_cursor_id, + root_page: sqlite_table.root_page, + is_new_idx: false, + }); + program.emit_insn(Insn::OpenWriteAwait {}); + let sql = create_idx_stmt_to_sql(&tbl_name, &idx_name, unique_if_not_exists, &columns); + emit_schema_entry( + &mut program, + sqlite_schema_cursor_id, + SchemaEntryType::Index, + &idx_name, + &tbl_name, + root_page_reg, + Some(sql), + ); + + // determine the order of the columns in the index for the sorter + let order = idx + .columns + .iter() + .map(|c| { + OwnedValue::Integer(match c.order { + SortOrder::Asc => 0, + SortOrder::Desc => 1, + }) + }) + .collect(); + // open the sorter and the pseudo table + program.emit_insn(Insn::SorterOpen { + cursor_id: sorter_cursor_id, + columns: columns.len(), + order: Record::new(order), + }); + let content_reg = program.alloc_register(); + program.emit_insn(Insn::OpenPseudo { + cursor_id: pseudo_cursor_id, + content_reg, + num_fields: columns.len() + 1, + }); + + // open the table we are creating the index on for reading + program.emit_insn(Insn::OpenReadAsync { + cursor_id: table_cursor_id, + root_page: tbl.root_page, + }); + program.emit_insn(Insn::OpenReadAwait {}); + + program.emit_insn(Insn::RewindAsync { + cursor_id: table_cursor_id, + }); + let loop_start_label = program.allocate_label(); + let loop_end_label = program.allocate_label(); + program.emit_insn(Insn::RewindAwait { + cursor_id: table_cursor_id, + pc_if_empty: loop_end_label, + }); + + program.resolve_label(loop_start_label, program.offset()); + + // Loop start: + // Collect index values into start_reg..rowid_reg + // emit MakeRecord (index key + rowid) into record_reg. + // + // Then insert the record into the sorter + let start_reg = program.alloc_registers(columns.len() + 1); + for (i, (col, _)) in columns.iter().enumerate() { + program.emit_insn(Insn::Column { + cursor_id: table_cursor_id, + column: col.0, + dest: start_reg + i, + }); + } + let rowid_reg = start_reg + columns.len(); + program.emit_insn(Insn::RowId { + cursor_id: table_cursor_id, + dest: rowid_reg, + }); + let record_reg = program.alloc_register(); + program.emit_insn(Insn::MakeRecord { + start_reg, + count: columns.len() + 1, + dest_reg: record_reg, + }); + program.emit_insn(Insn::SorterInsert { + cursor_id: sorter_cursor_id, + record_reg, + }); + + program.emit_insn(Insn::NextAsync { + cursor_id: table_cursor_id, + }); + program.emit_insn(Insn::NextAwait { + cursor_id: table_cursor_id, + pc_if_next: loop_start_label, + }); + program.resolve_label(loop_end_label, program.offset()); + + // Open the index btree we created for writing to insert the + // newly sorted index records. + program.emit_insn(Insn::OpenWriteAsync { + cursor_id: btree_cursor_id, + root_page: root_page_reg, + is_new_idx: true, + }); + program.emit_insn(Insn::OpenWriteAwait {}); + + let sorted_loop_start = program.allocate_label(); + let sorted_loop_end = program.allocate_label(); + + // Sort the index records in the sorter + program.emit_insn(Insn::SorterSort { + cursor_id: sorter_cursor_id, + pc_if_empty: sorted_loop_end, + }); + program.resolve_label(sorted_loop_start, program.offset()); + let sorted_record_reg = program.alloc_register(); + program.emit_insn(Insn::SorterData { + pseudo_cursor: pseudo_cursor_id, + cursor_id: sorter_cursor_id, + dest_reg: sorted_record_reg, + }); + + // seek to the end of the index btree to position the cursor for appending + program.emit_insn(Insn::SeekEnd { + cursor_id: btree_cursor_id, + }); + // insert new index record + program.emit_insn(Insn::IdxInsertAsync { + cursor_id: btree_cursor_id, + record_reg: sorted_record_reg, + unpacked_start: None, // TODO: optimize with these to avoid decoding record twice + unpacked_count: None, + flags: IdxInsertFlags::new().use_seek(false), + }); + program.emit_insn(Insn::IdxInsertAwait { + cursor_id: btree_cursor_id, + }); + program.emit_insn(Insn::SorterNext { + cursor_id: sorter_cursor_id, + pc_if_next: sorted_loop_start, + }); + program.resolve_label(sorted_loop_end, program.offset()); + + // End of the outer loop + // + // Keep schema table open to emit ParseSchema, close the other cursors. + program.close_cursors(&[sorter_cursor_id, table_cursor_id, btree_cursor_id]); + + // TODO: SetCookie for schema change + // + // Parse the schema table to get the index root page and add new index to Schema + let parse_schema_where_clause = format!("name = '{}' AND type = 'index'", idx_name); + program.emit_insn(Insn::ParseSchema { + db: sqlite_schema_cursor_id, + where_clause: parse_schema_where_clause, + }); + // Close the final sqlite_schema cursor + program.emit_insn(Insn::Close { + cursor_id: sqlite_schema_cursor_id, + }); + + // Epilogue: + program.emit_halt(); + program.resolve_label(init_label, program.offset()); + program.emit_transaction(true); + program.emit_constant_insns(); + program.emit_goto(start_offset); + + Ok(program) +} + +fn resolve_sorted_columns<'a>( + table: &'a BTreeTable, + cols: &[SortedColumn], +) -> crate::Result> { + let mut resolved = Vec::with_capacity(cols.len()); + for sc in cols { + let ident = normalize_ident(match &sc.expr { + Expr::Id(Id(col_name)) | Expr::Name(ast::Name(col_name)) => col_name, + _ => crate::bail_parse_error!("Error: cannot use expressions in CREATE INDEX"), + }); + let Some(col) = table.get_column(&ident) else { + crate::bail_parse_error!( + "Error: column '{ident}' does not exist in table '{}'", + table.name + ); + }; + resolved.push((col, sc.order.unwrap_or(SortOrder::Asc))); + } + Ok(resolved) +} + +fn create_idx_stmt_to_sql( + tbl_name: &str, + idx_name: &str, + unique_if_not_exists: (bool, bool), + cols: &[((usize, &Column), SortOrder)], +) -> String { + let mut sql = String::new(); + sql.push_str("CREATE "); + if unique_if_not_exists.0 { + sql.push_str("UNIQUE "); + } + sql.push_str("INDEX "); + if unique_if_not_exists.1 { + sql.push_str("IF NOT EXISTS "); + } + sql.push_str(idx_name); + sql.push_str(" ON "); + sql.push_str(tbl_name); + sql.push_str(" ("); + for (i, (col, order)) in cols.iter().enumerate() { + if i > 0 { + sql.push_str(", "); + } + sql.push_str(col.1.name.as_ref().unwrap()); + if *order == SortOrder::Desc { + sql.push_str(" DESC"); + } + } + sql.push(')'); + sql +} From 7567b30d003a72f1ca2bf299f706c40e0d0d8b51 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sun, 30 Mar 2025 15:27:11 -0400 Subject: [PATCH 032/425] Add SeekEnd to compat.md --- COMPAT.md | 1 + 1 file changed, 1 insertion(+) diff --git a/COMPAT.md b/COMPAT.md index d81650502..b0cc85d83 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -550,6 +550,7 @@ Modifiers: | SeekLe | No | | | SeekLt | No | | | SeekRowid | Yes | | +| SeekEnd | Yes | | | Sequence | No | | | SetCookie | No | | | ShiftLeft | Yes | | From 20adedff4cb61ccd98bf431abb729eacfe8fdf2a Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sun, 30 Mar 2025 15:32:14 -0400 Subject: [PATCH 033/425] Remove Order enum in place of ast::SortOrder --- core/schema.rs | 39 +++++++++++++++------------------------ 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/core/schema.rs b/core/schema.rs index fbec7627f..dda37d15b 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -2,7 +2,7 @@ use crate::VirtualTable; use crate::{util::normalize_ident, Result}; use core::fmt; use fallible_iterator::FallibleIterator; -use limbo_sqlite3_parser::ast::{Expr, Literal, TableOptions}; +use limbo_sqlite3_parser::ast::{Expr, Literal, SortOrder, TableOptions}; use limbo_sqlite3_parser::{ ast::{Cmd, CreateTableBody, QualifiedName, ResultColumn, Stmt}, lexer::sql::Parser, @@ -30,6 +30,13 @@ impl Schema { Self { tables, indexes } } + pub fn is_unique_idx_name(&self, name: &str) -> bool { + !self + .indexes + .iter() + .any(|idx| idx.1.iter().any(|i| i.name == name)) + } + pub fn add_btree_table(&mut self, table: Rc) { let name = normalize_ident(&table.name); self.tables.insert(name, Table::BTree(table).into()); @@ -209,7 +216,7 @@ impl BTreeTable { } } -#[derive(Debug)] +#[derive(Debug, Default)] pub struct PseudoTable { pub columns: Vec, } @@ -245,12 +252,6 @@ impl PseudoTable { } } -impl Default for PseudoTable { - fn default() -> Self { - Self::new() - } -} - fn create_table( tbl_name: QualifiedName, body: CreateTableBody, @@ -616,13 +617,7 @@ pub struct Index { #[derive(Debug, Clone)] pub struct IndexColumn { pub name: String, - pub order: Order, -} - -#[derive(Debug, Clone, PartialEq)] -pub enum Order { - Ascending, - Descending, + pub order: SortOrder, } impl Index { @@ -642,11 +637,7 @@ impl Index { .into_iter() .map(|col| IndexColumn { name: normalize_ident(&col.expr.to_string()), - order: match col.order { - Some(limbo_sqlite3_parser::ast::SortOrder::Asc) => Order::Ascending, - Some(limbo_sqlite3_parser::ast::SortOrder::Desc) => Order::Descending, - None => Order::Ascending, - }, + order: col.order.unwrap_or(SortOrder::Asc), }) .collect(); Ok(Index { @@ -685,7 +676,7 @@ impl Index { } Ok(IndexColumn { name: normalize_ident(col_name), - order: Order::Ascending, // Primary key indexes are always ascending + order: SortOrder::Asc, // Primary key indexes are always ascending }) }) .collect::>>()?; @@ -1012,7 +1003,7 @@ mod tests { assert!(index.unique); assert_eq!(index.columns.len(), 1); assert_eq!(index.columns[0].name, "a"); - assert!(matches!(index.columns[0].order, Order::Ascending)); + assert!(matches!(index.columns[0].order, SortOrder::Asc)); Ok(()) } @@ -1029,8 +1020,8 @@ mod tests { assert_eq!(index.columns.len(), 2); assert_eq!(index.columns[0].name, "a"); assert_eq!(index.columns[1].name, "b"); - assert!(matches!(index.columns[0].order, Order::Ascending)); - assert!(matches!(index.columns[1].order, Order::Ascending)); + assert!(matches!(index.columns[0].order, SortOrder::Asc)); + assert!(matches!(index.columns[1].order, SortOrder::Asc)); Ok(()) } From 45a8e5e2267c06744158c4d31e03b26c1f4ca12e Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sun, 30 Mar 2025 15:34:43 -0400 Subject: [PATCH 034/425] Add close_cursors helper method to program builder --- core/vdbe/builder.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index 058fc8aab..19a71a68d 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -131,6 +131,12 @@ impl ProgramBuilder { self.insns.push((insn, function)); } + pub fn close_cursors(&mut self, cursors: &[CursorID]) { + for cursor in cursors { + self.emit_insn(Insn::Close { cursor_id: *cursor }); + } + } + pub fn emit_string8(&mut self, value: String, dest: usize) { self.emit_insn(Insn::String8 { value, dest }); } From b0016a0ee24f6592c1aef0490ec043c64a08f584 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sun, 30 Mar 2025 15:41:19 -0400 Subject: [PATCH 035/425] Support create index with SeekEnd and IdxCreate opcode functionality --- core/storage/btree.rs | 160 ++++++++++++++++++++++++++++++++++-- core/translate/insert.rs | 2 + core/translate/main_loop.rs | 6 ++ core/translate/mod.rs | 21 ++++- core/translate/schema.rs | 13 +-- core/vdbe/execute.rs | 88 +++++++++++++++++++- core/vdbe/explain.rs | 34 ++++++++ core/vdbe/insn.rs | 61 +++++++++++++- 8 files changed, 370 insertions(+), 15 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 558a6eee3..0f094ac5b 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -12,12 +12,14 @@ use crate::types::{ use crate::{return_corrupt, LimboError, Result}; use std::cell::{Cell, Ref, RefCell}; +use std::cmp::Ordering; use std::pin::Pin; use std::rc::Rc; use super::pager::PageRef; use super::sqlite3_ondisk::{ - write_varint_to_vec, IndexInteriorCell, IndexLeafCell, OverflowCell, DATABASE_HEADER_SIZE, + read_record, write_varint_to_vec, IndexInteriorCell, IndexLeafCell, OverflowCell, + DATABASE_HEADER_SIZE, }; /* @@ -599,8 +601,8 @@ impl BTreeCursor { BTreeCell::TableLeafCell(TableLeafCell { _rowid, _payload, - first_overflow_page, payload_size, + first_overflow_page, }) => { assert!(predicate.is_none()); if let Some(next_page) = first_overflow_page { @@ -814,10 +816,8 @@ impl BTreeCursor { }; let record = self.get_immutable_record(); let record = record.as_ref().unwrap(); - let order = compare_immutable( - &record.get_values().as_slice()[..record.len() - 1], - &index_key.get_values().as_slice()[..], - ); + let without_rowid = &record.get_values().as_slice()[..record.len() - 1]; + let order = without_rowid.cmp(index_key.get_values()); let found = match op { SeekOp::GT => order.is_gt(), SeekOp::GE => order.is_ge(), @@ -1047,6 +1047,65 @@ impl BTreeCursor { } } + pub fn insert_index_key(&mut self, key: &ImmutableRecord) -> Result> { + if let CursorState::None = &self.state { + self.state = CursorState::Write(WriteInfo::new()); + } + + let ret = loop { + let write_state = self.state.mut_write_info().unwrap().state; + match write_state { + WriteState::Start => { + let page = self.stack.top(); + return_if_locked!(page); + page.set_dirty(); + self.pager.add_dirty(page.get().id); + let page = page.get().contents.as_mut().unwrap(); + + assert!(matches!(page.page_type(), PageType::IndexLeaf)); + let cell_idx = self.find_index_cell(page, key); + let mut cell_payload: Vec = Vec::new(); + fill_cell_payload( + page.page_type(), + None, + &mut cell_payload, + key, + self.usable_space() as u16, + self.pager.clone(), + ); + // insert + let overflow = { + debug!( + "insert_index_key(overflow, cell_count={})", + page.cell_count() + ); + insert_into_cell( + page, + cell_payload.as_slice(), + cell_idx, + self.usable_space() as u16, + )?; + page.overflow_cells.len() + }; + let write_info = self.state.mut_write_info().unwrap(); + write_info.state = if overflow > 0 { + WriteState::BalanceStart + } else { + WriteState::Finish + }; + } + WriteState::BalanceStart + | WriteState::BalanceNonRoot + | WriteState::BalanceNonRootWaitLoadPages => { + return_if_io!(self.balance()); + } + WriteState::Finish => break Ok(CursorResult::Ok(())), + } + }; + self.state = CursorState::None; + ret + } + /// Insert a record into the btree. /// If the insert operation overflows the page, it will be split and the btree will be balanced. fn insert_into_page( @@ -1943,6 +2002,74 @@ impl BTreeCursor { cell_idx } + fn find_index_cell(&self, page: &PageContent, key: &ImmutableRecord) -> usize { + let mut cell_idx = 0; + let cell_count = page.cell_count(); + while cell_idx < cell_count { + match page + .cell_get( + cell_idx, + payload_overflow_threshold_max(page.page_type(), self.usable_space() as u16), + payload_overflow_threshold_min(page.page_type(), self.usable_space() as u16), + self.usable_space(), + ) + .unwrap() + { + BTreeCell::IndexInteriorCell(IndexInteriorCell { payload, .. }) + | BTreeCell::IndexLeafCell(IndexLeafCell { payload, .. }) => { + read_record( + payload, + self.get_immutable_record_or_create().as_mut().unwrap(), + ) + .expect("failed to read record"); + let order = compare_immutable( + key.get_values(), + self.get_immutable_record().as_ref().unwrap().get_values(), + ); + match order { + Ordering::Less => { + break; + } + Ordering::Equal => { + break; + } + Ordering::Greater => {} + } + } + _ => unreachable!("Expected Index cell types"), + } + cell_idx += 1; + } + cell_idx + } + + pub fn seek_end(&mut self) -> Result> { + assert!(self.mv_cursor.is_none()); + self.move_to_root(); + loop { + let mem_page = self.stack.top(); + let page_id = mem_page.get().id; + let page = self.pager.read_page(page_id)?; + return_if_locked!(page); + + let contents = page.get().contents.as_ref().unwrap(); + if contents.is_leaf() { + // set cursor just past the last cell to append + self.stack.set_cell_index(contents.cell_count() as i32); + return Ok(CursorResult::Ok(())); + } + + match contents.rightmost_pointer() { + Some(right_most_pointer) => { + self.stack.set_cell_index(contents.cell_count() as i32 + 1); // invalid on interior + let child = self.pager.read_page(right_most_pointer as usize)?; + self.stack.push(child); + } + None => unreachable!("interior page must have rightmost pointer"), + } + } + } + pub fn seek_to_last(&mut self) -> Result> { return_if_io!(self.move_to_rightmost()); let rowid = return_if_io!(self.get_next_record(None)); @@ -2372,6 +2499,27 @@ impl BTreeCursor { self.null_flag } + /// Search for a key in an Index Btree. Looking up indexes that need to be unique, we cannot compare the rowid + pub fn key_exists_in_index(&mut self, key: &ImmutableRecord) -> Result> { + return_if_io!(self.do_seek(SeekKey::IndexKey(key), SeekOp::GE)); + if let Some(record) = self.record().as_ref() { + // get existing record, excluding the rowid + assert!(record.len() > 0); + let existing_key = &record.get_values()[..record.count() - 1]; + let inserted_key_vals = &key.get_values(); + if existing_key + .iter() + .zip(inserted_key_vals.iter()) + .all(|(a, b)| a == b) + { + return Ok(CursorResult::Ok(true)); // duplicate + } + } else { + return Err(LimboError::InvalidArgument("Expected Record key".into())); + } + Ok(CursorResult::Ok(false)) // no matching key found + } + pub fn exists(&mut self, key: &OwnedValue) -> Result> { assert!(self.mv_cursor.is_none()); let int_key = match key { diff --git a/core/translate/insert.rs b/core/translate/insert.rs index 5fda098e6..d6faeb2c8 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -153,6 +153,7 @@ pub fn translate_insert( program.emit_insn(Insn::OpenWriteAsync { cursor_id, root_page, + is_new_idx: false, }); program.emit_insn(Insn::OpenWriteAwait {}); @@ -169,6 +170,7 @@ pub fn translate_insert( program.emit_insn(Insn::OpenWriteAsync { cursor_id, root_page, + is_new_idx: false, }); program.emit_insn(Insn::OpenWriteAwait {}); diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 7b51a2328..d9b7d6a36 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -103,6 +103,7 @@ pub fn init_loop( program.emit_insn(Insn::OpenWriteAsync { cursor_id, root_page, + is_new_idx: false, }); program.emit_insn(Insn::OpenWriteAwait {}); } @@ -111,6 +112,7 @@ pub fn init_loop( program.emit_insn(Insn::OpenWriteAsync { cursor_id, root_page, + is_new_idx: false, }); program.emit_insn(Insn::OpenWriteAwait {}); } @@ -145,6 +147,7 @@ pub fn init_loop( program.emit_insn(Insn::OpenWriteAsync { cursor_id: table_cursor_id, root_page: table.table.get_root_page(), + is_new_idx: false, }); program.emit_insn(Insn::OpenWriteAwait {}); } @@ -152,6 +155,7 @@ pub fn init_loop( program.emit_insn(Insn::OpenWriteAsync { cursor_id: table_cursor_id, root_page: table.table.get_root_page(), + is_new_idx: false, }); program.emit_insn(Insn::OpenWriteAwait {}); } @@ -178,6 +182,7 @@ pub fn init_loop( program.emit_insn(Insn::OpenWriteAsync { cursor_id: index_cursor_id, root_page: index.root_page, + is_new_idx: false, }); program.emit_insn(Insn::OpenWriteAwait {}); } @@ -185,6 +190,7 @@ pub fn init_loop( program.emit_insn(Insn::OpenWriteAsync { cursor_id: index_cursor_id, root_page: index.root_page, + is_new_idx: false, }); program.emit_insn(Insn::OpenWriteAwait {}); } diff --git a/core/translate/mod.rs b/core/translate/mod.rs index 739ae5f03..cf93b34ba 100644 --- a/core/translate/mod.rs +++ b/core/translate/mod.rs @@ -12,6 +12,7 @@ pub(crate) mod delete; pub(crate) mod emitter; pub(crate) mod expr; pub(crate) mod group_by; +pub(crate) mod index; pub(crate) mod insert; pub(crate) mod main_loop; pub(crate) mod optimizer; @@ -34,6 +35,7 @@ use crate::translate::delete::translate_delete; use crate::vdbe::builder::{ProgramBuilder, ProgramBuilderOpts, QueryMode}; use crate::vdbe::Program; use crate::{bail_parse_error, Connection, Result, SymbolTable}; +use index::translate_create_index; use insert::translate_insert; use limbo_sqlite3_parser::ast::{self, Delete, Insert}; use schema::{translate_create_table, translate_create_virtual_table, translate_drop_table}; @@ -61,7 +63,24 @@ pub fn translate( ast::Stmt::Attach { .. } => bail_parse_error!("ATTACH not supported yet"), ast::Stmt::Begin(tx_type, tx_name) => translate_tx_begin(tx_type, tx_name)?, ast::Stmt::Commit(tx_name) => translate_tx_commit(tx_name)?, - ast::Stmt::CreateIndex { .. } => bail_parse_error!("CREATE INDEX not supported yet"), + ast::Stmt::CreateIndex { + unique, + if_not_exists, + idx_name, + tbl_name, + columns, + .. + } => { + change_cnt_on = true; + translate_create_index( + query_mode, + (unique, if_not_exists), + &idx_name.name.0, + &tbl_name.0, + &columns, + schema, + )? + } ast::Stmt::CreateTable { temporary, if_not_exists, diff --git a/core/translate/schema.rs b/core/translate/schema.rs index 29cf29644..6f87937c9 100644 --- a/core/translate/schema.rs +++ b/core/translate/schema.rs @@ -104,6 +104,7 @@ pub fn translate_create_table( program.emit_insn(Insn::OpenWriteAsync { cursor_id: sqlite_schema_cursor_id, root_page: 1, + is_new_idx: false, }); program.emit_insn(Insn::OpenWriteAwait {}); @@ -155,8 +156,8 @@ pub fn translate_create_table( Ok(program) } -#[derive(Debug)] -enum SchemaEntryType { +#[derive(Debug, Clone, Copy)] +pub enum SchemaEntryType { Table, Index, } @@ -169,9 +170,9 @@ impl SchemaEntryType { } } } -const SQLITE_TABLEID: &str = "sqlite_schema"; +pub const SQLITE_TABLEID: &str = "sqlite_schema"; -fn emit_schema_entry( +pub fn emit_schema_entry( program: &mut ProgramBuilder, sqlite_schema_cursor_id: usize, entry_type: SchemaEntryType, @@ -501,6 +502,7 @@ pub fn translate_create_virtual_table( program.emit_insn(Insn::OpenWriteAsync { cursor_id: sqlite_schema_cursor_id, root_page: 1, + is_new_idx: false, }); program.emit_insn(Insn::OpenWriteAwait {}); @@ -572,7 +574,7 @@ pub fn translate_drop_table( let row_id_reg = program.alloc_register(); // r5 let table_name = "sqlite_schema"; - let schema_table = schema.get_btree_table(&table_name).unwrap(); + let schema_table = schema.get_btree_table(table_name).unwrap(); let sqlite_schema_cursor_id = program.alloc_cursor_id( Some(table_name.to_string()), CursorType::BTreeTable(schema_table.clone()), @@ -580,6 +582,7 @@ pub fn translate_drop_table( program.emit_insn(Insn::OpenWriteAsync { cursor_id: sqlite_schema_cursor_id, root_page: 1, + is_new_idx: false, }); program.emit_insn(Insn::OpenWriteAwait {}); diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 3c511a0db..89bd26859 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -21,7 +21,7 @@ use crate::util::{ checked_cast_text_to_numeric, parse_schema_rows, RoundToPrecision, }; use crate::vdbe::builder::CursorType; -use crate::vdbe::insn::Insn; +use crate::vdbe::insn::{IdxInsertFlags, Insn}; use crate::vector::{vector32, vector64, vector_distance_cos, vector_extract}; use crate::{info, MvCursor, RefValue, Row, StepResult, TransactionState}; @@ -2049,6 +2049,24 @@ pub fn op_idx_ge( Ok(InsnFunctionStepResult::Step) } +pub fn op_seek_end( + program: &Program, + state: &mut ProgramState, + insn: &Insn, + pager: &Rc, + mv_store: Option<&Rc>, +) -> Result { + if let Insn::SeekEnd { cursor_id } = *insn { + let mut cursor = state.get_cursor(cursor_id); + let cursor = cursor.as_btree_mut(); + return_if_io!(cursor.seek_end()); + } else { + unreachable!("unexpected Insn {:?}", insn) + } + state.pc += 1; + Ok(InsnFunctionStepResult::Step) +} + pub fn op_idx_le( program: &Program, state: &mut ProgramState, @@ -3706,6 +3724,73 @@ pub fn op_delete_async( Ok(InsnFunctionStepResult::Step) } +pub fn op_idx_insert_async( + program: &Program, + state: &mut ProgramState, + insn: &Insn, + pager: &Rc, + mv_store: Option<&Rc>, +) -> Result { + if let Insn::IdxInsertAsync { + cursor_id, + record_reg, + flags, + .. + } = *insn + { + let (_, cursor_type) = program.cursor_ref.get(cursor_id).unwrap(); + let CursorType::BTreeIndex(index_meta) = cursor_type else { + panic!("IdxInsert: not a BTree index cursor"); + }; + { + let mut cursor = state.get_cursor(cursor_id); + let cursor = cursor.as_btree_mut(); + let record = match &state.registers[record_reg] { + Register::Record(ref r) => r, + _ => return Err(LimboError::InternalError("expected record".into())), + }; + let moved_before = if index_meta.unique { + // check for uniqueness violation + match cursor.key_exists_in_index(record)? { + CursorResult::Ok(true) => { + return Err(LimboError::Constraint( + "UNIQUE constraint failed: duplicate key".into(), + )) + } + CursorResult::IO => return Ok(InsnFunctionStepResult::IO), + CursorResult::Ok(false) => {} + }; + false + } else { + flags.has(IdxInsertFlags::USE_SEEK) + }; + // insert record as key + return_if_io!(cursor.insert_index_key(record)); + } + state.pc += 1; + } + Ok(InsnFunctionStepResult::Step) +} + +pub fn op_idx_insert_await( + program: &Program, + state: &mut ProgramState, + insn: &Insn, + pager: &Rc, + mv_store: Option<&Rc>, +) -> Result { + if let Insn::IdxInsertAwait { cursor_id } = *insn { + { + let mut cursor = state.get_cursor(cursor_id); + let cursor = cursor.as_btree_mut(); + cursor.wait_for_completion()?; + } + // TODO: flag optimizations, update n_change if OPFLAG_NCHANGE + state.pc += 1; + } + Ok(InsnFunctionStepResult::Step) +} + pub fn op_delete_await( program: &Program, state: &mut ProgramState, @@ -3889,6 +3974,7 @@ pub fn op_open_write_async( let Insn::OpenWriteAsync { cursor_id, root_page, + .. } = insn else { unreachable!("unexpected Insn {:?}", insn) diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index 67333c334..f80a442f1 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -760,6 +760,39 @@ pub fn insn_to_str( 0, "".to_string(), ), + Insn::SeekEnd { cursor_id } => ( + "SeekEnd", + *cursor_id as i32, + 0, + 0, + OwnedValue::build_text(""), + 0, + "".to_string(), + ), + Insn::IdxInsertAsync { + cursor_id, + record_reg, + unpacked_start, + flags, + .. + } => ( + "IdxInsertAsync", + *cursor_id as i32, + *record_reg as i32, + unpacked_start.unwrap_or(0) as i32, + OwnedValue::build_text(""), + flags.0 as u16, + format!("key=r[{}]", record_reg), + ), + Insn::IdxInsertAwait { cursor_id } => ( + "IdxInsertAwait", + *cursor_id as i32, + 0, + 0, + OwnedValue::build_text(""), + 0, + "".to_string(), + ), Insn::IdxGT { cursor_id, start_reg, @@ -1097,6 +1130,7 @@ pub fn insn_to_str( Insn::OpenWriteAsync { cursor_id, root_page, + .. } => ( "OpenWriteAsync", *cursor_id as i32, diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index f45e7ce35..5ce68f14a 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -38,6 +38,44 @@ impl CmpInsFlags { } } +#[derive(Clone, Copy, Debug, Default)] +pub struct IdxInsertFlags(pub u8); +impl IdxInsertFlags { + pub const APPEND: u8 = 0x01; // Hint: insert likely at the end + pub const NCHANGE: u8 = 0x02; // Increment the change counter + pub const USE_SEEK: u8 = 0x04; // Skip seek if last one was same key + pub fn new() -> Self { + IdxInsertFlags(0) + } + pub fn has(&self, flag: u8) -> bool { + (self.0 & flag) != 0 + } + pub fn append(mut self, append: bool) -> Self { + if append { + self.0 |= IdxInsertFlags::APPEND; + } else { + self.0 &= !IdxInsertFlags::APPEND; + } + self + } + pub fn use_seek(mut self, seek: bool) -> Self { + if seek { + self.0 |= IdxInsertFlags::USE_SEEK; + } else { + self.0 &= !IdxInsertFlags::USE_SEEK; + } + self + } + pub fn nchange(mut self, change: bool) -> Self { + if change { + self.0 |= IdxInsertFlags::NCHANGE; + } else { + self.0 &= !IdxInsertFlags::NCHANGE; + } + self + } +} + #[derive(Description, Debug)] pub enum Insn { /// Initialize the program state and jump to the given PC. @@ -401,6 +439,9 @@ pub enum Insn { src_reg: usize, target_pc: BranchOffset, }, + SeekEnd { + cursor_id: CursorID, + }, /// P1 is an open index cursor and P3 is a cursor on the corresponding table. This opcode does a deferred seek of the P3 table cursor to the row that corresponds to the current row of P1. /// This is a deferred seek. Nothing actually happens until the cursor is used to read a record. That way, if no reads occur, no unnecessary I/O happens. @@ -431,8 +472,20 @@ pub enum Insn { target_pc: BranchOffset, }, - /// The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. - /// If the P1 index entry is greater or equal than the key value then jump to P2. Otherwise fall through to the next instruction. + // cursor_id is a cursor pointing to a B-Tree index that uses integer keys, this op writes the value obtained from MakeRecord into the index. + // P3 + P4 are for the original column values that make up that key in unpacked (pre-serialized) form. + // If P5 has the OPFLAG_APPEND bit set, that is a hint to the b-tree layer that this insert is likely to be an append. + // OPFLAG_NCHANGE bit set, then the change counter is incremented by this instruction. If the OPFLAG_NCHANGE bit is clear, then the change counter is unchanged + IdxInsertAsync { + cursor_id: CursorID, + record_reg: usize, // P2 the register containing the record to insert + unpacked_start: Option, // P3 the index of the first register for the unpacked key + unpacked_count: Option, // P4 # of unpacked values in the key in P2 + flags: IdxInsertFlags, // TODO: optimization + }, + IdxInsertAwait { + cursor_id: CursorID, + }, IdxGE { cursor_id: CursorID, start_reg: usize, @@ -588,6 +641,7 @@ pub enum Insn { OpenWriteAsync { cursor_id: CursorID, root_page: PageIdx, + is_new_idx: bool, }, OpenWriteAwait {}, @@ -1237,10 +1291,13 @@ impl Insn { Insn::DeferredSeek { .. } => execute::op_deferred_seek, Insn::SeekGE { .. } => execute::op_seek_ge, Insn::SeekGT { .. } => execute::op_seek_gt, + Insn::SeekEnd { .. } => execute::op_seek_end, Insn::IdxGE { .. } => execute::op_idx_ge, Insn::IdxGT { .. } => execute::op_idx_gt, Insn::IdxLE { .. } => execute::op_idx_le, Insn::IdxLT { .. } => execute::op_idx_lt, + Insn::IdxInsertAsync { .. } => execute::op_idx_insert_async, + Insn::IdxInsertAwait { .. } => execute::op_idx_insert_await, Insn::DecrJumpZero { .. } => execute::op_decr_jump_zero, Insn::AggStep { .. } => execute::op_agg_step, From e020ba3dfe97e49e85652d9f36320a5c347ba576 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sun, 30 Mar 2025 16:16:55 -0400 Subject: [PATCH 036/425] Add enum for interpreting a value as a register or literal for insns --- core/translate/index.rs | 8 +++----- core/translate/insert.rs | 7 +++---- core/translate/main_loop.rs | 18 ++++++------------ core/translate/schema.rs | 10 ++++------ core/vdbe/execute.rs | 17 ++++++++++++++--- core/vdbe/explain.rs | 7 +++++-- core/vdbe/insn.rs | 20 +++++++++++++++----- 7 files changed, 50 insertions(+), 37 deletions(-) diff --git a/core/translate/index.rs b/core/translate/index.rs index 6f1a02c3c..c9a474cab 100644 --- a/core/translate/index.rs +++ b/core/translate/index.rs @@ -6,7 +6,7 @@ use crate::{ util::normalize_ident, vdbe::{ builder::{CursorType, ProgramBuilder, QueryMode}, - insn::{IdxInsertFlags, Insn}, + insn::{IdxInsertFlags, Insn, RegisterOrLiteral}, }, OwnedValue, }; @@ -97,8 +97,7 @@ pub fn translate_create_index( // open the sqlite schema table for writing and create a new entry for the index program.emit_insn(Insn::OpenWriteAsync { cursor_id: sqlite_schema_cursor_id, - root_page: sqlite_table.root_page, - is_new_idx: false, + root_page: RegisterOrLiteral::Literal(sqlite_table.root_page), }); program.emit_insn(Insn::OpenWriteAwait {}); let sql = create_idx_stmt_to_sql(&tbl_name, &idx_name, unique_if_not_exists, &columns); @@ -197,8 +196,7 @@ pub fn translate_create_index( // newly sorted index records. program.emit_insn(Insn::OpenWriteAsync { cursor_id: btree_cursor_id, - root_page: root_page_reg, - is_new_idx: true, + root_page: RegisterOrLiteral::Register(root_page_reg), }); program.emit_insn(Insn::OpenWriteAwait {}); diff --git a/core/translate/insert.rs b/core/translate/insert.rs index d6faeb2c8..371d85be1 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -9,6 +9,7 @@ use crate::error::SQLITE_CONSTRAINT_PRIMARYKEY; use crate::schema::Table; use crate::util::normalize_ident; use crate::vdbe::builder::{ProgramBuilderOpts, QueryMode}; +use crate::vdbe::insn::RegisterOrLiteral; use crate::vdbe::BranchOffset; use crate::{ schema::{Column, Schema}, @@ -152,8 +153,7 @@ pub fn translate_insert( program.emit_insn(Insn::OpenWriteAsync { cursor_id, - root_page, - is_new_idx: false, + root_page: RegisterOrLiteral::Literal(root_page), }); program.emit_insn(Insn::OpenWriteAwait {}); @@ -169,8 +169,7 @@ pub fn translate_insert( // Single row - populate registers directly program.emit_insn(Insn::OpenWriteAsync { cursor_id, - root_page, - is_new_idx: false, + root_page: RegisterOrLiteral::Literal(root_page), }); program.emit_insn(Insn::OpenWriteAwait {}); diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index d9b7d6a36..e7235e8f0 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -102,8 +102,7 @@ pub fn init_loop( let root_page = btree.root_page; program.emit_insn(Insn::OpenWriteAsync { cursor_id, - root_page, - is_new_idx: false, + root_page: root_page.into(), }); program.emit_insn(Insn::OpenWriteAwait {}); } @@ -111,8 +110,7 @@ pub fn init_loop( let root_page = btree.root_page; program.emit_insn(Insn::OpenWriteAsync { cursor_id, - root_page, - is_new_idx: false, + root_page: root_page.into(), }); program.emit_insn(Insn::OpenWriteAwait {}); } @@ -146,16 +144,14 @@ pub fn init_loop( OperationMode::DELETE => { program.emit_insn(Insn::OpenWriteAsync { cursor_id: table_cursor_id, - root_page: table.table.get_root_page(), - is_new_idx: false, + root_page: table.table.get_root_page().into(), }); program.emit_insn(Insn::OpenWriteAwait {}); } OperationMode::UPDATE => { program.emit_insn(Insn::OpenWriteAsync { cursor_id: table_cursor_id, - root_page: table.table.get_root_page(), - is_new_idx: false, + root_page: table.table.get_root_page().into(), }); program.emit_insn(Insn::OpenWriteAwait {}); } @@ -181,16 +177,14 @@ pub fn init_loop( OperationMode::DELETE => { program.emit_insn(Insn::OpenWriteAsync { cursor_id: index_cursor_id, - root_page: index.root_page, - is_new_idx: false, + root_page: index.root_page.into(), }); program.emit_insn(Insn::OpenWriteAwait {}); } OperationMode::UPDATE => { program.emit_insn(Insn::OpenWriteAsync { cursor_id: index_cursor_id, - root_page: index.root_page, - is_new_idx: false, + root_page: index.root_page.into(), }); program.emit_insn(Insn::OpenWriteAwait {}); } diff --git a/core/translate/schema.rs b/core/translate/schema.rs index 6f87937c9..eea0868d0 100644 --- a/core/translate/schema.rs +++ b/core/translate/schema.rs @@ -7,6 +7,7 @@ use crate::translate::ProgramBuilderOpts; use crate::translate::QueryMode; use crate::util::PRIMARY_KEY_AUTOMATIC_INDEX_NAME_PREFIX; use crate::vdbe::builder::CursorType; +use crate::vdbe::insn::RegisterOrLiteral; use crate::vdbe::insn::{CmpInsFlags, Insn}; use crate::LimboError; use crate::{bail_parse_error, Result}; @@ -103,8 +104,7 @@ pub fn translate_create_table( ); program.emit_insn(Insn::OpenWriteAsync { cursor_id: sqlite_schema_cursor_id, - root_page: 1, - is_new_idx: false, + root_page: 1usize.into(), }); program.emit_insn(Insn::OpenWriteAwait {}); @@ -501,8 +501,7 @@ pub fn translate_create_virtual_table( ); program.emit_insn(Insn::OpenWriteAsync { cursor_id: sqlite_schema_cursor_id, - root_page: 1, - is_new_idx: false, + root_page: 1usize.into(), }); program.emit_insn(Insn::OpenWriteAwait {}); @@ -581,8 +580,7 @@ pub fn translate_drop_table( ); program.emit_insn(Insn::OpenWriteAsync { cursor_id: sqlite_schema_cursor_id, - root_page: 1, - is_new_idx: false, + root_page: 1usize.into(), }); program.emit_insn(Insn::OpenWriteAwait {}); diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 89bd26859..968fea4eb 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -29,7 +29,7 @@ use crate::{info, MvCursor, RefValue, Row, StepResult, TransactionState}; use super::insn::{ exec_add, exec_and, exec_bit_and, exec_bit_not, exec_bit_or, exec_boolean_not, exec_concat, exec_divide, exec_multiply, exec_or, exec_remainder, exec_shift_left, exec_shift_right, - exec_subtract, Cookie, + exec_subtract, Cookie, RegisterOrLiteral, }; use super::HaltState; use rand::thread_rng; @@ -3979,12 +3979,23 @@ pub fn op_open_write_async( else { unreachable!("unexpected Insn {:?}", insn) }; + let root_page = match root_page { + RegisterOrLiteral::Literal(lit) => *lit as u64, + RegisterOrLiteral::Register(reg) => match &state.registers[*reg].get_owned_value() { + OwnedValue::Integer(val) => *val as u64, + _ => { + return Err(LimboError::InternalError( + "OpenWriteAsync: the value in root_page is not an integer".into(), + )); + } + }, + }; let (_, cursor_type) = program.cursor_ref.get(*cursor_id).unwrap(); let mut cursors = state.cursors.borrow_mut(); let is_index = cursor_type.is_index(); let mv_cursor = match state.mv_tx_id { Some(tx_id) => { - let table_id = *root_page as u64; + let table_id = root_page; let mv_store = mv_store.unwrap().clone(); let mv_cursor = Rc::new(RefCell::new( MvCursor::new(mv_store.clone(), tx_id, table_id).unwrap(), @@ -3993,7 +4004,7 @@ pub fn op_open_write_async( } None => None, }; - let cursor = BTreeCursor::new(mv_cursor, pager.clone(), *root_page); + let cursor = BTreeCursor::new(mv_cursor, pager.clone(), root_page as usize); if is_index { cursors .get_mut(*cursor_id) diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index f80a442f1..66c68d9c0 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -1,4 +1,4 @@ -use crate::vdbe::builder::CursorType; +use crate::vdbe::{builder::CursorType, insn::RegisterOrLiteral}; use super::{Insn, InsnReference, OwnedValue, Program}; use crate::function::{Func, ScalarFunc}; @@ -1134,7 +1134,10 @@ pub fn insn_to_str( } => ( "OpenWriteAsync", *cursor_id as i32, - *root_page as i32, + match root_page { + RegisterOrLiteral::Literal(i) => *i as _, + RegisterOrLiteral::Register(i) => *i as _, + }, 0, OwnedValue::build_text(""), 0, diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 5ce68f14a..4a6bc1ea4 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -76,6 +76,18 @@ impl IdxInsertFlags { } } +#[derive(Clone, Copy, Debug)] +pub enum RegisterOrLiteral { + Register(usize), + Literal(T), +} + +impl From for RegisterOrLiteral { + fn from(value: PageIdx) -> Self { + RegisterOrLiteral::Literal(value) + } +} + #[derive(Description, Debug)] pub enum Insn { /// Initialize the program state and jump to the given PC. @@ -640,8 +652,7 @@ pub enum Insn { OpenWriteAsync { cursor_id: CursorID, - root_page: PageIdx, - is_new_idx: bool, + root_page: RegisterOrLiteral, }, OpenWriteAwait {}, @@ -1296,8 +1307,6 @@ impl Insn { Insn::IdxGT { .. } => execute::op_idx_gt, Insn::IdxLE { .. } => execute::op_idx_le, Insn::IdxLT { .. } => execute::op_idx_lt, - Insn::IdxInsertAsync { .. } => execute::op_idx_insert_async, - Insn::IdxInsertAwait { .. } => execute::op_idx_insert_await, Insn::DecrJumpZero { .. } => execute::op_decr_jump_zero, Insn::AggStep { .. } => execute::op_agg_step, @@ -1315,7 +1324,8 @@ impl Insn { Insn::Yield { .. } => execute::op_yield, Insn::InsertAsync { .. } => execute::op_insert_async, Insn::InsertAwait { .. } => execute::op_insert_await, - + Insn::IdxInsertAsync { .. } => execute::op_idx_insert_async, + Insn::IdxInsertAwait { .. } => execute::op_idx_insert_await, Insn::DeleteAsync { .. } => execute::op_delete_async, Insn::DeleteAwait { .. } => execute::op_delete_await, From 2c3fd509feb5142bf2ef77aec756f58c445f89d9 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sun, 30 Mar 2025 16:29:06 -0400 Subject: [PATCH 037/425] Remove unused imports and consolidate ordering comparison --- core/storage/btree.rs | 9 ++++----- core/translate/schema.rs | 1 - 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 0f094ac5b..002459955 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -2017,6 +2017,8 @@ impl BTreeCursor { { BTreeCell::IndexInteriorCell(IndexInteriorCell { payload, .. }) | BTreeCell::IndexLeafCell(IndexLeafCell { payload, .. }) => { + // TODO: implement efficient comparison of records + // e.g. https://github.com/sqlite/sqlite/blob/master/src/vdbeaux.c#L4719 read_record( payload, self.get_immutable_record_or_create().as_mut().unwrap(), @@ -2027,10 +2029,7 @@ impl BTreeCursor { self.get_immutable_record().as_ref().unwrap().get_values(), ); match order { - Ordering::Less => { - break; - } - Ordering::Equal => { + Ordering::Less | Ordering::Equal => { break; } Ordering::Greater => {} @@ -2044,7 +2043,7 @@ impl BTreeCursor { } pub fn seek_end(&mut self) -> Result> { - assert!(self.mv_cursor.is_none()); + assert!(self.mv_cursor.is_none()); // unsure about this -_- self.move_to_root(); loop { let mem_page = self.stack.top(); diff --git a/core/translate/schema.rs b/core/translate/schema.rs index eea0868d0..3d5aa79db 100644 --- a/core/translate/schema.rs +++ b/core/translate/schema.rs @@ -7,7 +7,6 @@ use crate::translate::ProgramBuilderOpts; use crate::translate::QueryMode; use crate::util::PRIMARY_KEY_AUTOMATIC_INDEX_NAME_PREFIX; use crate::vdbe::builder::CursorType; -use crate::vdbe::insn::RegisterOrLiteral; use crate::vdbe::insn::{CmpInsFlags, Insn}; use crate::LimboError; use crate::{bail_parse_error, Result}; From 007fbe8cc73c80af004d25f4e72f6c4e5682c65f Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sun, 30 Mar 2025 19:52:54 -0400 Subject: [PATCH 038/425] Fix unique index issue and prealloc in sql string for schema --- core/storage/btree.rs | 34 ++++++++++++++++++++-------------- core/translate/index.rs | 14 +++++++------- 2 files changed, 27 insertions(+), 21 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 002459955..d4f58151a 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -2501,22 +2501,28 @@ impl BTreeCursor { /// Search for a key in an Index Btree. Looking up indexes that need to be unique, we cannot compare the rowid pub fn key_exists_in_index(&mut self, key: &ImmutableRecord) -> Result> { return_if_io!(self.do_seek(SeekKey::IndexKey(key), SeekOp::GE)); - if let Some(record) = self.record().as_ref() { - // get existing record, excluding the rowid - assert!(record.len() > 0); - let existing_key = &record.get_values()[..record.count() - 1]; - let inserted_key_vals = &key.get_values(); - if existing_key - .iter() - .zip(inserted_key_vals.iter()) - .all(|(a, b)| a == b) - { - return Ok(CursorResult::Ok(true)); // duplicate + + let record_opt = self.record(); + match record_opt.as_ref() { + Some(record) => { + // Existing record found — compare prefix + let existing_key = &record.get_values()[..record.count().saturating_sub(1)]; + let inserted_key_vals = &key.get_values(); + if existing_key + .iter() + .zip(inserted_key_vals.iter()) + .all(|(a, b)| a == b) + { + return Ok(CursorResult::Ok(true)); // duplicate + } + } + None => { + // Cursor not pointing at a record — table is empty or past last + return Ok(CursorResult::Ok(false)); } - } else { - return Err(LimboError::InvalidArgument("Expected Record key".into())); } - Ok(CursorResult::Ok(false)) // no matching key found + + Ok(CursorResult::Ok(false)) // not a duplicate } pub fn exists(&mut self, key: &OwnedValue) -> Result> { diff --git a/core/translate/index.rs b/core/translate/index.rs index c9a474cab..32d7cd2e9 100644 --- a/core/translate/index.rs +++ b/core/translate/index.rs @@ -62,13 +62,13 @@ pub fn translate_create_index( unique: unique_if_not_exists.0, }); - // Allocate the necessary cursors. + // Allocate the necessary cursors: // - // 1. sqlite_schema_cursor_id - for the sqlite_schema table - // 2. btree_cursor_id - for the index btree - // 3. table_cursor_id - for the table we are creating the index on - // 4. sorter_cursor_id - for the sorter - // 5. pseudo_cursor_id - for the pseudo table to store the sorted index values + // 1. sqlite_schema_cursor_id - sqlite_schema table + // 2. btree_cursor_id - new index btree + // 3. table_cursor_id - table we are creating the index on + // 4. sorter_cursor_id - sorter + // 5. pseudo_cursor_id - pseudo table to store the sorted index values let sqlite_table = schema.get_btree_table(SQLITE_TABLEID).unwrap(); let sqlite_schema_cursor_id = program.alloc_cursor_id( Some(SQLITE_TABLEID.to_owned()), @@ -292,7 +292,7 @@ fn create_idx_stmt_to_sql( unique_if_not_exists: (bool, bool), cols: &[((usize, &Column), SortOrder)], ) -> String { - let mut sql = String::new(); + let mut sql = String::with_capacity(128); sql.push_str("CREATE "); if unique_if_not_exists.0 { sql.push_str("UNIQUE "); From 068ab4ab2779b3e6deebad4b38f375585b8d7a31 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Mon, 31 Mar 2025 20:48:02 -0400 Subject: [PATCH 039/425] Refactor btree to reuse existing insert and seek with idx keys --- core/storage/btree.rs | 259 ++++++++++++++++++++++-------------------- core/vdbe/execute.rs | 11 +- 2 files changed, 142 insertions(+), 128 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index d4f58151a..11a80ef4b 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -171,6 +171,50 @@ enum ReadPayloadOverflow { }, } +#[derive(Clone, Debug)] +pub enum BTreeKey<'a> { + TableRowId((u64, Option<&'a ImmutableRecord>)), + IndexKey(&'a ImmutableRecord), +} + +impl<'a> BTreeKey<'_> { + pub fn new_table_rowid(rowid: u64, record: Option<&'a ImmutableRecord>) -> BTreeKey<'a> { + BTreeKey::TableRowId((rowid, record)) + } + pub fn new_index_key(record: &'a ImmutableRecord) -> BTreeKey<'a> { + BTreeKey::IndexKey(record) + } + fn get_record(&self) -> Option<&'_ ImmutableRecord> { + match self { + BTreeKey::TableRowId((_, record)) => *record, + BTreeKey::IndexKey(record) => Some(record), + } + } + + fn maybe_rowid(&self) -> Option { + match self { + BTreeKey::TableRowId((rowid, _)) => Some(*rowid), + BTreeKey::IndexKey(_) => None, + } + } + + /// Assert that the key is a rowid and return it. + fn to_rowid(&self) -> u64 { + match self { + BTreeKey::TableRowId((rowid, _)) => *rowid, + BTreeKey::IndexKey(_) => panic!("BTreeKey::assert_rowid() called on IndexKey"), + } + } + + /// Assert that the key is an index key and return it. + fn to_index_key_values(&self) -> &'_ Vec { + match self { + BTreeKey::TableRowId(_) => panic!("BTreeKey::assert_index_key() called on TableRowId"), + BTreeKey::IndexKey(key) => key.get_values(), + } + } +} + #[derive(Clone)] struct BalanceInfo { /// Old pages being balanced. @@ -1047,72 +1091,13 @@ impl BTreeCursor { } } - pub fn insert_index_key(&mut self, key: &ImmutableRecord) -> Result> { - if let CursorState::None = &self.state { - self.state = CursorState::Write(WriteInfo::new()); - } - - let ret = loop { - let write_state = self.state.mut_write_info().unwrap().state; - match write_state { - WriteState::Start => { - let page = self.stack.top(); - return_if_locked!(page); - page.set_dirty(); - self.pager.add_dirty(page.get().id); - let page = page.get().contents.as_mut().unwrap(); - - assert!(matches!(page.page_type(), PageType::IndexLeaf)); - let cell_idx = self.find_index_cell(page, key); - let mut cell_payload: Vec = Vec::new(); - fill_cell_payload( - page.page_type(), - None, - &mut cell_payload, - key, - self.usable_space() as u16, - self.pager.clone(), - ); - // insert - let overflow = { - debug!( - "insert_index_key(overflow, cell_count={})", - page.cell_count() - ); - insert_into_cell( - page, - cell_payload.as_slice(), - cell_idx, - self.usable_space() as u16, - )?; - page.overflow_cells.len() - }; - let write_info = self.state.mut_write_info().unwrap(); - write_info.state = if overflow > 0 { - WriteState::BalanceStart - } else { - WriteState::Finish - }; - } - WriteState::BalanceStart - | WriteState::BalanceNonRoot - | WriteState::BalanceNonRootWaitLoadPages => { - return_if_io!(self.balance()); - } - WriteState::Finish => break Ok(CursorResult::Ok(())), - } - }; - self.state = CursorState::None; - ret - } - /// Insert a record into the btree. /// If the insert operation overflows the page, it will be split and the btree will be balanced. - fn insert_into_page( - &mut self, - key: &OwnedValue, - record: &ImmutableRecord, - ) -> Result> { + fn insert_into_page(&mut self, bkey: &BTreeKey) -> Result> { + let record = bkey + .get_record() + .expect("expected record present on insert"); + if let CursorState::None = &self.state { self.state = CursorState::Write(WriteInfo::new()); } @@ -1128,10 +1113,6 @@ impl BTreeCursor { WriteState::Start => { let page = self.stack.top(); return_if_locked_maybe_load!(self.pager, page); - let int_key = match key { - OwnedValue::Integer(i) => *i as u64, - _ => unreachable!("btree tables are indexed by integers!"), - }; // get page and find cell let (cell_idx, page_type) = { @@ -1141,23 +1122,27 @@ impl BTreeCursor { self.pager.add_dirty(page.get().id); let page = page.get().contents.as_mut().unwrap(); - assert!(matches!(page.page_type(), PageType::TableLeaf)); + assert!(matches!( + page.page_type(), + PageType::TableLeaf | PageType::IndexLeaf + )); // find cell - (self.find_cell(page, int_key), page.page_type()) + (self.find_cell(page, bkey), page.page_type()) }; tracing::debug!("insert_into_page(cell_idx={})", cell_idx); // if the cell index is less than the total cells, check: if its an existing // rowid, we are going to update / overwrite the cell if cell_idx < page.get_contents().cell_count() { - if let BTreeCell::TableLeafCell(tbl_leaf) = page.get_contents().cell_get( + match page.get_contents().cell_get( cell_idx, payload_overflow_threshold_max(page_type, self.usable_space() as u16), payload_overflow_threshold_min(page_type, self.usable_space() as u16), self.usable_space(), )? { - if tbl_leaf._rowid == int_key { + BTreeCell::TableLeafCell(tbl_leaf) => { + if tbl_leaf._rowid == bkey.to_rowid() { tracing::debug!("insert_into_page: found exact match with cell_idx={cell_idx}, overwriting"); self.overwrite_cell(page.clone(), cell_idx, record)?; self.state @@ -1167,12 +1152,37 @@ impl BTreeCursor { continue; } } + BTreeCell::IndexLeafCell(idx_leaf) => { + read_record( + idx_leaf.payload, + self.get_immutable_record_or_create().as_mut().unwrap(), + ) + .expect("failed to read record"); + if compare_immutable( + record.get_values(), + self.get_immutable_record() + .as_ref() + .unwrap() + .get_values() + ) == Ordering::Equal { + + tracing::debug!("insert_into_page: found exact match with cell_idx={cell_idx}, overwriting"); + self.overwrite_cell(page.clone(), cell_idx, record)?; + self.state + .mut_write_info() + .expect("expected write info") + .state = WriteState::Finish; + continue; + } + } + other => panic!("unexpected cell type, expected TableLeaf or IndexLeaf, found: {:?}", other), + } } // insert cell let mut cell_payload: Vec = Vec::with_capacity(record.len() + 4); fill_cell_payload( page_type, - Some(int_key), + bkey.maybe_rowid(), &mut cell_payload, record, self.usable_space() as u16, @@ -1971,8 +1981,7 @@ impl BTreeCursor { } /// Find the index of the cell in the page that contains the given rowid. - /// BTree tables only. - fn find_cell(&self, page: &PageContent, int_key: u64) -> usize { + fn find_cell(&self, page: &PageContent, key: &BTreeKey) -> usize { let mut cell_idx = 0; let cell_count = page.cell_count(); while cell_idx < cell_count { @@ -1986,35 +1995,15 @@ impl BTreeCursor { .unwrap() { BTreeCell::TableLeafCell(cell) => { - if int_key <= cell._rowid { + if key.to_rowid() <= cell._rowid { break; } } BTreeCell::TableInteriorCell(cell) => { - if int_key <= cell._rowid { + if key.to_rowid() <= cell._rowid { break; } } - _ => todo!(), - } - cell_idx += 1; - } - cell_idx - } - - fn find_index_cell(&self, page: &PageContent, key: &ImmutableRecord) -> usize { - let mut cell_idx = 0; - let cell_count = page.cell_count(); - while cell_idx < cell_count { - match page - .cell_get( - cell_idx, - payload_overflow_threshold_max(page.page_type(), self.usable_space() as u16), - payload_overflow_threshold_min(page.page_type(), self.usable_space() as u16), - self.usable_space(), - ) - .unwrap() - { BTreeCell::IndexInteriorCell(IndexInteriorCell { payload, .. }) | BTreeCell::IndexLeafCell(IndexLeafCell { payload, .. }) => { // TODO: implement efficient comparison of records @@ -2025,7 +2014,7 @@ impl BTreeCursor { ) .expect("failed to read record"); let order = compare_immutable( - key.get_values(), + key.to_index_key_values(), self.get_immutable_record().as_ref().unwrap().get_values(), ); match order { @@ -2035,7 +2024,6 @@ impl BTreeCursor { Ordering::Greater => {} } } - _ => unreachable!("Expected Index cell types"), } cell_idx += 1; } @@ -2158,28 +2146,34 @@ impl BTreeCursor { pub fn insert( &mut self, - key: &OwnedValue, - record: &ImmutableRecord, + key: &BTreeKey, moved_before: bool, /* Indicate whether it's necessary to traverse to find the leaf page */ ) -> Result> { - let int_key = match key { - OwnedValue::Integer(i) => i, - _ => unreachable!("btree tables are indexed by integers!"), - }; match &self.mv_cursor { - Some(mv_cursor) => { - let row_id = - crate::mvcc::database::RowID::new(self.table_id() as u64, *int_key as u64); - let record_buf = record.get_payload().to_vec(); - let row = crate::mvcc::database::Row::new(row_id, record_buf); - mv_cursor.borrow_mut().insert(row).unwrap(); - } + Some(mv_cursor) => match key.maybe_rowid() { + Some(rowid) => { + let row_id = crate::mvcc::database::RowID::new(self.table_id() as u64, rowid); + let record_buf = key.get_record().unwrap().get_payload().to_vec(); + let row = crate::mvcc::database::Row::new(row_id, record_buf); + mv_cursor.borrow_mut().insert(row).unwrap(); + } + None => todo!("Support mvcc inserts with index btrees"), + }, None => { if !moved_before { - return_if_io!(self.move_to(SeekKey::TableRowId(*int_key as u64), SeekOp::EQ)); + return_if_io!(self.move_to( + match key { + BTreeKey::IndexKey(_) => SeekKey::IndexKey(key.get_record().unwrap()), + BTreeKey::TableRowId(_) => SeekKey::TableRowId(key.to_rowid()), + }, + SeekOp::EQ + )); + } + return_if_io!(self.insert_into_page(key)); + if key.maybe_rowid().is_some() { + let int_key = key.to_rowid(); + self.rowid.replace(Some(int_key)); } - return_if_io!(self.insert_into_page(key, record)); - self.rowid.replace(Some(*int_key as u64)); } }; Ok(CursorResult::Ok(())) @@ -2543,7 +2537,7 @@ impl BTreeCursor { OwnedValue::Integer(i) => *i as u64, _ => unreachable!("btree tables are indexed by integers!"), }; - let cell_idx = self.find_cell(contents, int_key); + let cell_idx = self.find_cell(contents, &BTreeKey::new_table_rowid(int_key, None)); if cell_idx >= contents.cell_count() { Ok(CursorResult::Ok(false)) } else { @@ -4199,12 +4193,15 @@ mod tests { pager.deref(), ) .unwrap(); - let key = OwnedValue::Integer(*key); let value = ImmutableRecord::from_registers(&[Register::OwnedValue( OwnedValue::Blob(vec![0; *size]), )]); tracing::info!("insert key:{}", key); - run_until_done(|| cursor.insert(&key, &value, true), pager.deref()).unwrap(); + run_until_done( + || cursor.insert(&BTreeKey::new_table_rowid(*key, Some(&value)), true), + pager.deref(), + ) + .unwrap(); tracing::info!( "=========== btree ===========\n{}\n\n", format_btree(pager.clone(), root_page, 0) @@ -4279,12 +4276,14 @@ mod tests { pager.deref(), ) .unwrap(); - - let key = OwnedValue::Integer(key); let value = ImmutableRecord::from_registers(&[Register::OwnedValue( OwnedValue::Blob(vec![0; size]), )]); - run_until_done(|| cursor.insert(&key, &value, true), pager.deref()).unwrap(); + run_until_done( + || cursor.insert(&BTreeKey::new_table_rowid(key as u64, Some(&value)), true), + pager.deref(), + ) + .unwrap(); if matches!(validate_btree(pager.clone(), root_page), (_, false)) { panic!("invalid btree"); } @@ -5170,7 +5169,11 @@ mod tests { pager.deref(), ) .unwrap(); - run_until_done(|| cursor.insert(&key, &value, true), pager.deref()).unwrap(); + run_until_done( + || cursor.insert(&BTreeKey::new_table_rowid(i as u64, Some(&value)), true), + pager.deref(), + ) + .unwrap(); keys.push(i); } if matches!(validate_btree(pager.clone(), root_page), (_, false)) { @@ -5248,7 +5251,11 @@ mod tests { ) .unwrap(); - run_until_done(|| cursor.insert(&key, &value, true), pager.deref()).unwrap(); + run_until_done( + || cursor.insert(&BTreeKey::new_table_rowid(i as u64, Some(&value)), true), + pager.deref(), + ) + .unwrap(); } match validate_btree(pager.clone(), root_page) { @@ -5326,7 +5333,11 @@ mod tests { pager.deref(), ) .unwrap(); - run_until_done(|| cursor.insert(&key, &value, true), pager.deref()).unwrap(); + run_until_done( + || cursor.insert(&BTreeKey::new_table_rowid(i as u64, Some(&value)), true), + pager.deref(), + ) + .unwrap(); tracing::debug!( "=========== btree after ===========\n{}\n\n", format_btree(pager.clone(), root_page, 0) diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 968fea4eb..f446da777 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -11,7 +11,7 @@ use std::{borrow::BorrowMut, rc::Rc}; use crate::pseudo::PseudoCursor; use crate::result::LimboResult; use crate::schema::{affinity, Affinity}; -use crate::storage::btree::BTreeCursor; +use crate::storage::btree::{BTreeCursor, BTreeKey}; use crate::storage::wal::CheckpointResult; use crate::types::{ AggContext, Cursor, CursorResult, ExternalAggState, OwnedValue, SeekKey, SeekOp, @@ -3666,11 +3666,14 @@ pub fn op_insert_async( Register::Record(r) => r, _ => unreachable!("Not a record! Cannot insert a non record value."), }; - let key = &state.registers[*key_reg]; + let key = match &state.registers[*key_reg].get_owned_value() { + OwnedValue::Integer(i) => *i, + _ => unreachable!("expected integer key"), + }; // NOTE(pere): Sending moved_before == true is okay because we moved before but // if we were to set to false after starting a balance procedure, it might // leave undefined state. - return_if_io!(cursor.insert(key.get_owned_value(), record, true)); + return_if_io!(cursor.insert(&BTreeKey::new_table_rowid(key as u64, Some(record)), true)); } state.pc += 1; Ok(InsnFunctionStepResult::Step) @@ -3765,7 +3768,7 @@ pub fn op_idx_insert_async( flags.has(IdxInsertFlags::USE_SEEK) }; // insert record as key - return_if_io!(cursor.insert_index_key(record)); + return_if_io!(cursor.insert(&BTreeKey::new_index_key(record), moved_before)); } state.pc += 1; } From a2b9d8d371c50b931e4bce18f0ea3f065e14df45 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Mon, 31 Mar 2025 20:55:37 -0400 Subject: [PATCH 040/425] Use Correct flag on idx insert to prevent seeking --- core/vdbe/mod.rs | 44 +++++++++++++++++++++----------------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 673b836a4..8794b208a 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -394,7 +394,7 @@ impl Program { } mv_transactions.clear(); } - return Ok(StepResult::Done); + Ok(StepResult::Done) } else { let connection = self .connection @@ -408,30 +408,28 @@ impl Program { ); if program_state.halt_state.is_some() { self.step_end_write_txn(&pager, &mut program_state.halt_state, connection.deref()) - } else { - if auto_commit { - let current_state = connection.transaction_state.borrow().clone(); - match current_state { - TransactionState::Write => self.step_end_write_txn( - &pager, - &mut program_state.halt_state, - connection.deref(), - ), - TransactionState::Read => { - connection.transaction_state.replace(TransactionState::None); - pager.end_read_tx()?; - Ok(StepResult::Done) - } - TransactionState::None => Ok(StepResult::Done), + } else if auto_commit { + let current_state = connection.transaction_state.borrow().clone(); + match current_state { + TransactionState::Write => self.step_end_write_txn( + &pager, + &mut program_state.halt_state, + connection.deref(), + ), + TransactionState::Read => { + connection.transaction_state.replace(TransactionState::None); + pager.end_read_tx()?; + Ok(StepResult::Done) } - } else { - if self.change_cnt_on { - if let Some(conn) = self.connection.upgrade() { - conn.set_changes(self.n_change.get()); - } - } - Ok(StepResult::Done) + TransactionState::None => Ok(StepResult::Done), } + } else { + if self.change_cnt_on { + if let Some(conn) = self.connection.upgrade() { + conn.set_changes(self.n_change.get()); + } + } + Ok(StepResult::Done) } } } From 6b42808f1a86ef297de57a39a2ad2f1fbcf7a09a Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Mon, 31 Mar 2025 21:57:56 -0400 Subject: [PATCH 041/425] Dont re-seek if we are inserting a new unique index --- core/storage/btree.rs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 11a80ef4b..7dfb82005 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -2161,13 +2161,15 @@ impl BTreeCursor { }, None => { if !moved_before { - return_if_io!(self.move_to( - match key { - BTreeKey::IndexKey(_) => SeekKey::IndexKey(key.get_record().unwrap()), - BTreeKey::TableRowId(_) => SeekKey::TableRowId(key.to_rowid()), - }, - SeekOp::EQ - )); + match key { + BTreeKey::IndexKey(_) => { + return_if_io!(self + .move_to(SeekKey::IndexKey(key.get_record().unwrap()), SeekOp::GE)) + } + BTreeKey::TableRowId(_) => return_if_io!( + self.move_to(SeekKey::TableRowId(key.to_rowid()), SeekOp::EQ) + ), + } } return_if_io!(self.insert_into_page(key)); if key.maybe_rowid().is_some() { From abc97c877482033b30db263c9c924c2147be738e Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Tue, 1 Apr 2025 10:27:30 -0400 Subject: [PATCH 042/425] Add doc comments to new btree key enum and remove unused lifetimes --- core/storage/btree.rs | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 7dfb82005..ae710859b 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -177,13 +177,19 @@ pub enum BTreeKey<'a> { IndexKey(&'a ImmutableRecord), } -impl<'a> BTreeKey<'_> { - pub fn new_table_rowid(rowid: u64, record: Option<&'a ImmutableRecord>) -> BTreeKey<'a> { +impl BTreeKey<'_> { + /// Create a new table rowid key from a rowid and an optional immutable record. + /// The record is optional because it may not be available when the key is created. + pub fn new_table_rowid(rowid: u64, record: Option<&ImmutableRecord>) -> BTreeKey<'_> { BTreeKey::TableRowId((rowid, record)) } - pub fn new_index_key(record: &'a ImmutableRecord) -> BTreeKey<'a> { + + /// Create a new index key from an immutable record. + pub fn new_index_key(record: &ImmutableRecord) -> BTreeKey<'_> { BTreeKey::IndexKey(record) } + + /// Get the record, if present. Index will always be present, fn get_record(&self) -> Option<&'_ ImmutableRecord> { match self { BTreeKey::TableRowId((_, record)) => *record, @@ -191,6 +197,7 @@ impl<'a> BTreeKey<'_> { } } + /// Get the rowid, if present. Index will never be present. fn maybe_rowid(&self) -> Option { match self { BTreeKey::TableRowId((rowid, _)) => Some(*rowid), @@ -198,18 +205,18 @@ impl<'a> BTreeKey<'_> { } } - /// Assert that the key is a rowid and return it. + /// Assert that the key is an integer rowid and return it. fn to_rowid(&self) -> u64 { match self { BTreeKey::TableRowId((rowid, _)) => *rowid, - BTreeKey::IndexKey(_) => panic!("BTreeKey::assert_rowid() called on IndexKey"), + BTreeKey::IndexKey(_) => panic!("BTreeKey::to_rowid called on IndexKey"), } } /// Assert that the key is an index key and return it. fn to_index_key_values(&self) -> &'_ Vec { match self { - BTreeKey::TableRowId(_) => panic!("BTreeKey::assert_index_key() called on TableRowId"), + BTreeKey::TableRowId(_) => panic!("BTreeKey::to_index_key called on TableRowId"), BTreeKey::IndexKey(key) => key.get_values(), } } From bd04b10f172d1edc3969b53453351a43395b3b73 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Tue, 1 Apr 2025 22:24:30 -0400 Subject: [PATCH 043/425] Fix btree tests to adapt to new type for BTreeKey --- core/storage/btree.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index ae710859b..789f9cb39 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -4196,7 +4196,7 @@ mod tests { for (key, size) in sequence.iter() { run_until_done( || { - let key = SeekKey::TableRowId(*key as u64); + let key = SeekKey::TableRowId(*key); cursor.move_to(key, SeekOp::EQ) }, pager.deref(), @@ -4217,7 +4217,7 @@ mod tests { ); } for (key, _) in sequence.iter() { - let seek_key = SeekKey::TableRowId(*key as u64); + let seek_key = SeekKey::TableRowId(*key); assert!( matches!( cursor.seek(seek_key, SeekOp::EQ).unwrap(), @@ -5166,7 +5166,6 @@ mod tests { for i in 0..10000 { let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); tracing::info!("INSERT INTO t VALUES ({});", i,); - let key = OwnedValue::Integer(i); let value = ImmutableRecord::from_registers(&[Register::OwnedValue(OwnedValue::Integer(i))]); tracing::trace!("before insert {}", i); @@ -5246,7 +5245,6 @@ mod tests { // Insert 10,000 records in to the BTree. for i in 1..=10000 { let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); - let key = OwnedValue::Integer(i); let value = ImmutableRecord::from_registers(&[Register::OwnedValue(OwnedValue::Text( Text::new("hello world"), ))]); @@ -5323,7 +5321,6 @@ mod tests { for i in 0..iterations { let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); tracing::info!("INSERT INTO t VALUES ({});", i,); - let key = OwnedValue::Integer(i as i64); let value = ImmutableRecord::from_registers(&[Register::OwnedValue(OwnedValue::Text(Text { value: huge_texts[i].as_bytes().to_vec(), From 399994bf6644acb1878cde58ed25538078ea8b49 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Wed, 2 Apr 2025 21:29:08 -0400 Subject: [PATCH 044/425] Fix ext tests start with no default schema --- testing/cli_tests/extensions.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/testing/cli_tests/extensions.py b/testing/cli_tests/extensions.py index d898908f9..bab8cb74f 100755 --- a/testing/cli_tests/extensions.py +++ b/testing/cli_tests/extensions.py @@ -342,6 +342,7 @@ def test_kv(): # first, create a normal table to ensure no issues limbo.execute_dot("CREATE TABLE other (a,b,c);") limbo.execute_dot("INSERT INTO other values (23,32,23);") + limbo = TestLimboShell() limbo.run_test_fn( "create virtual table t using kv_store;", lambda res: "Virtual table module not found: kv_store" in res, @@ -350,6 +351,7 @@ def test_kv(): limbo.debug_print( "create virtual table t using kv_store;", ) + limbo.run_test_fn(".schema", lambda res: "CREATE VIRTUAL TABLE t" in res) limbo.run_test_fn( "insert into t values ('hello', 'world');", null, @@ -496,12 +498,6 @@ def test_vfs(): "Tested large write to testfs", ) print("Tested large write to testfs") - # Pere: I commented this out because it added an extra row that made the test test_sqlite_vfs_compat fail - # it didn't segfault from my side so maybe this is necessary? - # # open regular db file to ensure we don't segfault when vfs file is dropped - # limbo.execute_dot(".open testing/vfs.db") - # limbo.execute_dot("create table test (id integer primary key, value float);") - # limbo.execute_dot("insert into test (value) values (1.0);") limbo.quit() @@ -548,10 +544,10 @@ if __name__ == "__main__": test_aggregates() test_crypto() test_series() - test_kv() test_ipaddr() test_vfs() test_sqlite_vfs_compat() + test_kv() except Exception as e: print(f"Test FAILED: {e}") cleanup() From 83af71f1409141192c3408591d3a1cf9c4c8a2a2 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sat, 5 Apr 2025 11:30:57 -0400 Subject: [PATCH 045/425] Return accidentally deleted comment on SeekGE insn from merge conflict --- core/vdbe/insn.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 4a6bc1ea4..8d3a9afca 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -484,10 +484,10 @@ pub enum Insn { target_pc: BranchOffset, }, - // cursor_id is a cursor pointing to a B-Tree index that uses integer keys, this op writes the value obtained from MakeRecord into the index. - // P3 + P4 are for the original column values that make up that key in unpacked (pre-serialized) form. - // If P5 has the OPFLAG_APPEND bit set, that is a hint to the b-tree layer that this insert is likely to be an append. - // OPFLAG_NCHANGE bit set, then the change counter is incremented by this instruction. If the OPFLAG_NCHANGE bit is clear, then the change counter is unchanged + /// cursor_id is a cursor pointing to a B-Tree index that uses integer keys, this op writes the value obtained from MakeRecord into the index. + /// P3 + P4 are for the original column values that make up that key in unpacked (pre-serialized) form. + /// If P5 has the OPFLAG_APPEND bit set, that is a hint to the b-tree layer that this insert is likely to be an append. + /// OPFLAG_NCHANGE bit set, then the change counter is incremented by this instruction. If the OPFLAG_NCHANGE bit is clear, then the change counter is unchanged IdxInsertAsync { cursor_id: CursorID, record_reg: usize, // P2 the register containing the record to insert @@ -498,6 +498,9 @@ pub enum Insn { IdxInsertAwait { cursor_id: CursorID, }, + + /// The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. + /// If the P1 index entry is greater or equal than the key value then jump to P2. Otherwise fall through to the next instruction. IdxGE { cursor_id: CursorID, start_reg: usize, From 67eda10453be1d36e39d4c099e3e2c7d2066629d Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sat, 5 Apr 2025 16:19:56 -0400 Subject: [PATCH 046/425] Allow reading altered tables by defaulting to null in Column insn --- core/types.rs | 4 ++++ core/vdbe/execute.rs | 13 ++++++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/core/types.rs b/core/types.rs index 9e7869b76..1556ee100 100644 --- a/core/types.rs +++ b/core/types.rs @@ -732,6 +732,10 @@ impl ImmutableRecord { &self.values[idx] } + pub fn get_value_opt(&self, idx: usize) -> Option<&RefValue> { + self.values.get(idx) + } + pub fn len(&self) -> usize { self.values.len() } diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 3c511a0db..58153319c 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -1278,7 +1278,10 @@ pub fn op_column( if cursor.get_null_flag() { RefValue::Null } else { - record.get_value(*column).clone() + match record.get_value_opt(*column) { + Some(val) => val.clone(), + None => RefValue::Null, + } } } else { RefValue::Null @@ -1305,10 +1308,14 @@ pub fn op_column( let record = { let mut cursor = state.get_cursor(*cursor_id); let cursor = cursor.as_sorter_mut(); - cursor.record().map(|r| r.clone()) + cursor.record().cloned() }; if let Some(record) = record { - state.registers[*dest] = Register::OwnedValue(record.get_value(*column).to_owned()); + state.registers[*dest] = + Register::OwnedValue(match record.get_value_opt(*column) { + Some(val) => val.to_owned(), + None => OwnedValue::Null, + }); } else { state.registers[*dest] = Register::OwnedValue(OwnedValue::Null); } From c9edf71fb8f75a8ecea2d0abfbaf3c88f976d086 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sat, 5 Apr 2025 21:27:21 -0400 Subject: [PATCH 047/425] Support insert default values syntax --- core/translate/insert.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/translate/insert.rs b/core/translate/insert.rs index 5fda098e6..063a2e264 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -84,11 +84,11 @@ pub fn translate_insert( ); let root_page = btree_table.root_page; let values = match body { - InsertBody::Select(select, None) => match &select.body.select.deref() { + InsertBody::Select(select, _) => match &select.body.select.deref() { OneSelect::Values(values) => values, _ => todo!(), }, - _ => todo!(), + InsertBody::DefaultValues => &vec![vec![]], }; let column_mappings = resolve_columns_for_insert(&table, columns, values)?; From ff482fc75371f111355ce7f0ea1144cad63c32a5 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sat, 5 Apr 2025 21:34:27 -0400 Subject: [PATCH 048/425] Add test case for insert default values --- testing/cli_tests/cli_test_cases.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/testing/cli_tests/cli_test_cases.py b/testing/cli_tests/cli_test_cases.py index 120d20070..2bce1f165 100755 --- a/testing/cli_tests/cli_test_cases.py +++ b/testing/cli_tests/cli_test_cases.py @@ -264,6 +264,16 @@ def test_update_with_limit(): limbo.quit() +def test_insert_default_values(): + limbo = TestLimboShell( + "CREATE TABLE t (a integer default(42),b integer default (43),c integer default(44));" + ) + for _ in range(1, 10): + limbo.execute_dot("INSERT INTO t DEFAULT VALUES;") + limbo.run_test("insert-default-values", "SELECT * FROM t;", "42|43|44\n" * 9) + limbo.quit() + + if __name__ == "__main__": print("Running all Limbo CLI tests...") test_basic_queries() From 02ee2cf90eef934b8a2f478aff0d004a9d190b8f Mon Sep 17 00:00:00 2001 From: Avinash Sajjanshetty Date: Sun, 6 Apr 2025 23:02:41 +0530 Subject: [PATCH 049/425] Add `Clock` trait --- core/io/clock.rs | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 core/io/clock.rs diff --git a/core/io/clock.rs b/core/io/clock.rs new file mode 100644 index 000000000..3a38ad955 --- /dev/null +++ b/core/io/clock.rs @@ -0,0 +1,9 @@ +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub struct Instant { + pub secs: i64, + pub micros: u32, +} + +pub trait Clock { + fn now(&self) -> Instant; +} From 8d6d50d9d521bde36e0cb63385d4ca9540756d9a Mon Sep 17 00:00:00 2001 From: Avinash Sajjanshetty Date: Sun, 6 Apr 2025 23:05:31 +0530 Subject: [PATCH 050/425] Update IO trait - Remove existing `get_current_time() -> String` - Add super trait bound `Clock` --- core/io/mod.rs | 7 ++++--- core/lib.rs | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/core/io/mod.rs b/core/io/mod.rs index 7eb8845bb..3f5a08026 100644 --- a/core/io/mod.rs +++ b/core/io/mod.rs @@ -34,14 +34,13 @@ impl OpenFlags { } } -pub trait IO: Send + Sync { + +pub trait IO: Clock + Send + Sync { fn open_file(&self, path: &str, flags: OpenFlags, direct: bool) -> Result>; fn run_once(&self) -> Result<()>; fn generate_random_number(&self) -> i64; - - fn get_current_time(&self) -> String; } pub type Complete = dyn Fn(Arc>); @@ -217,3 +216,5 @@ mod memory; mod vfs; pub use memory::MemoryIO; mod common; +pub mod clock; +pub use clock::Clock; diff --git a/core/lib.rs b/core/lib.rs index e827c3d0d..e364b226d 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -27,6 +27,7 @@ static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; use crate::{fast_lock::SpinLock, translate::optimizer::optimize_plan}; pub use error::LimboError; use fallible_iterator::FallibleIterator; +pub use io::clock::{Clock, Instant}; #[cfg(all(feature = "fs", target_family = "unix"))] pub use io::UnixIO; #[cfg(all(feature = "fs", target_os = "linux", feature = "io_uring"))] @@ -66,7 +67,6 @@ pub use types::OwnedValue; pub use types::RefValue; use util::{columns_from_create_table_body, parse_schema_rows}; use vdbe::{builder::QueryMode, VTabOpaqueCursor}; - pub type Result = std::result::Result; pub static DATABASE_VERSION: OnceLock = OnceLock::new(); From 2873c36b317f8279c9b441982c1718d0f7cef44d Mon Sep 17 00:00:00 2001 From: Avinash Sajjanshetty Date: Sun, 6 Apr 2025 23:07:40 +0530 Subject: [PATCH 051/425] Implement trait `Clock` for implemented IOs Replace `chrono::Local::now()` to return `Instant` but containing same info --- core/io/generic.rs | 12 +++++++++--- core/io/io_uring.rs | 11 +++++++++-- core/io/memory.rs | 17 ++++++++++++----- core/io/mod.rs | 3 +-- core/io/unix.rs | 15 +++++++++++---- core/io/vfs.rs | 15 ++++++++++++++- core/io/windows.rs | 12 +++++++++--- 7 files changed, 65 insertions(+), 20 deletions(-) diff --git a/core/io/generic.rs b/core/io/generic.rs index 1c0a5b6ff..e1c7eb1f1 100644 --- a/core/io/generic.rs +++ b/core/io/generic.rs @@ -1,4 +1,4 @@ -use crate::{Completion, File, LimboError, OpenFlags, Result, IO}; +use crate::{Clock, Completion, File, Instant, LimboError, OpenFlags, Result, IO}; use std::cell::RefCell; use std::io::{Read, Seek, Write}; use std::sync::Arc; @@ -38,9 +38,15 @@ impl IO for GenericIO { getrandom::getrandom(&mut buf).unwrap(); i64::from_ne_bytes(buf) } +} - fn get_current_time(&self) -> String { - chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string() +impl Clock for GenericIO { + fn now(&self) -> Instant { + let now = chrono::Local::now(); + Instant { + secs: now.timestamp(), + micros: now.timestamp_subsec_micros(), + } } } diff --git a/core/io/io_uring.rs b/core/io/io_uring.rs index cca473790..77d574639 100644 --- a/core/io/io_uring.rs +++ b/core/io/io_uring.rs @@ -11,6 +11,7 @@ use std::rc::Rc; use std::sync::Arc; use thiserror::Error; use tracing::{debug, trace}; +use crate::io::clock::{Clock, Instant}; const MAX_IOVECS: u32 = 128; const SQPOLL_IDLE: u32 = 1000; @@ -196,9 +197,15 @@ impl IO for UringIO { getrandom::getrandom(&mut buf).unwrap(); i64::from_ne_bytes(buf) } +} - fn get_current_time(&self) -> String { - chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string() +impl Clock for UringIO { + fn now(&self) -> Instant { + let now = chrono::Local::now(); + Instant { + secs: now.timestamp(), + micros: now.timestamp_subsec_micros(), + } } } diff --git a/core/io/memory.rs b/core/io/memory.rs index c9519845d..92a61bba7 100644 --- a/core/io/memory.rs +++ b/core/io/memory.rs @@ -1,6 +1,7 @@ -use super::{Buffer, Completion, File, OpenFlags, IO}; +use super::{Buffer, Clock, Completion, File, OpenFlags, IO}; use crate::Result; +use crate::io::clock::Instant; use std::{ cell::{Cell, RefCell, UnsafeCell}, collections::BTreeMap, @@ -29,6 +30,16 @@ impl Default for MemoryIO { } } +impl Clock for MemoryIO { + fn now(&self) -> Instant { + let now = chrono::Local::now(); + Instant { + secs: now.timestamp(), + micros: now.timestamp_subsec_micros(), + } + } +} + impl IO for MemoryIO { fn open_file(&self, _path: &str, _flags: OpenFlags, _direct: bool) -> Result> { Ok(Arc::new(MemoryFile { @@ -47,10 +58,6 @@ impl IO for MemoryIO { getrandom::getrandom(&mut buf).unwrap(); i64::from_ne_bytes(buf) } - - fn get_current_time(&self) -> String { - chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string() - } } pub struct MemoryFile { diff --git a/core/io/mod.rs b/core/io/mod.rs index 3f5a08026..4dddbbcb0 100644 --- a/core/io/mod.rs +++ b/core/io/mod.rs @@ -34,7 +34,6 @@ impl OpenFlags { } } - pub trait IO: Clock + Send + Sync { fn open_file(&self, path: &str, flags: OpenFlags, direct: bool) -> Result>; @@ -215,6 +214,6 @@ mod memory; #[cfg(feature = "fs")] mod vfs; pub use memory::MemoryIO; -mod common; pub mod clock; +mod common; pub use clock::Clock; diff --git a/core/io/unix.rs b/core/io/unix.rs index bbeb1266b..32054e2d5 100644 --- a/core/io/unix.rs +++ b/core/io/unix.rs @@ -18,6 +18,7 @@ use std::{ sync::Arc, }; use tracing::{debug, trace}; +use crate::io::clock::{Clock, Instant}; struct OwnedCallbacks(UnsafeCell); // We assume we locking on IO level is done by user. @@ -183,6 +184,16 @@ impl UnixIO { } } +impl Clock for UnixIO { + fn now(&self) -> Instant { + let now = chrono::Local::now(); + Instant { + secs: now.timestamp(), + micros: now.timestamp_subsec_micros(), + } + } +} + impl IO for UnixIO { fn open_file(&self, path: &str, flags: OpenFlags, _direct: bool) -> Result> { trace!("open_file(path = {})", path); @@ -247,10 +258,6 @@ impl IO for UnixIO { getrandom::getrandom(&mut buf).unwrap(); i64::from_ne_bytes(buf) } - - fn get_current_time(&self) -> String { - chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string() - } } enum CompletionCallback { diff --git a/core/io/vfs.rs b/core/io/vfs.rs index f3cac9d30..4d9a6d6e2 100644 --- a/core/io/vfs.rs +++ b/core/io/vfs.rs @@ -1,11 +1,21 @@ +use super::{Buffer, Completion, File, OpenFlags, IO}; use crate::ext::VfsMod; +use crate::io::clock::{Clock, Instant}; use crate::{LimboError, Result}; use limbo_ext::{VfsFileImpl, VfsImpl}; use std::cell::RefCell; use std::ffi::{c_void, CString}; use std::sync::Arc; -use super::{Buffer, Completion, File, OpenFlags, IO}; +impl Clock for VfsMod { + fn now(&self) -> Instant { + let now = chrono::Local::now(); + Instant { + secs: now.timestamp(), + micros: now.timestamp_subsec_micros(), + } + } +} impl IO for VfsMod { fn open_file(&self, path: &str, flags: OpenFlags, direct: bool) -> Result> { @@ -40,7 +50,10 @@ impl IO for VfsMod { let vfs = unsafe { &*self.ctx }; unsafe { (vfs.gen_random_number)() } } +} +impl VfsMod { + #[allow(dead_code)] // used in FFI call fn get_current_time(&self) -> String { if self.ctx.is_null() { return "".to_string(); diff --git a/core/io/windows.rs b/core/io/windows.rs index 9bfd523a9..2887ea308 100644 --- a/core/io/windows.rs +++ b/core/io/windows.rs @@ -1,4 +1,4 @@ -use crate::{Completion, File, LimboError, OpenFlags, Result, IO}; +use crate::{Clock, Completion, File, Instant, LimboError, OpenFlags, Result, IO}; use std::cell::RefCell; use std::io::{Read, Seek, Write}; use std::sync::Arc; @@ -38,9 +38,15 @@ impl IO for WindowsIO { getrandom::getrandom(&mut buf).unwrap(); i64::from_ne_bytes(buf) } +} - fn get_current_time(&self) -> String { - chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string() +impl Clock for WindowsIO { + fn now(&self) -> Instant { + let now = chrono::Local::now(); + Instant { + secs: now.timestamp(), + micros: now.timestamp_subsec_micros(), + } } } From 3543e83b91d9be0739f6808857fcfeaf289b01a8 Mon Sep 17 00:00:00 2001 From: Avinash Sajjanshetty Date: Sun, 6 Apr 2025 23:09:00 +0530 Subject: [PATCH 052/425] Impl Clock trait in bindings --- bindings/javascript/src/lib.rs | 11 +++++++---- bindings/wasm/lib.rs | 24 ++++++++++++++++++------ simulator/runner/io.rs | 15 ++++++++++----- 3 files changed, 35 insertions(+), 15 deletions(-) diff --git a/bindings/javascript/src/lib.rs b/bindings/javascript/src/lib.rs index 614b17677..a9c0d72a5 100644 --- a/bindings/javascript/src/lib.rs +++ b/bindings/javascript/src/lib.rs @@ -4,6 +4,7 @@ use std::cell::RefCell; use std::rc::Rc; use std::sync::Arc; +use limbo_core::{Clock, Instant}; use napi::{Env, JsUnknown, Result as NapiResult}; use napi_derive::napi; @@ -152,6 +153,12 @@ impl limbo_core::DatabaseStorage for DatabaseFile { struct IO {} +impl Clock for IO { + fn now(&self) -> Instant { + todo!() + } +} + impl limbo_core::IO for IO { fn open_file( &self, @@ -169,8 +176,4 @@ impl limbo_core::IO for IO { fn generate_random_number(&self) -> i64 { todo!(); } - - fn get_current_time(&self) -> String { - todo!(); - } } diff --git a/bindings/wasm/lib.rs b/bindings/wasm/lib.rs index 3a5819efc..91680dc96 100644 --- a/bindings/wasm/lib.rs +++ b/bindings/wasm/lib.rs @@ -1,5 +1,7 @@ use js_sys::{Array, Object}; -use limbo_core::{maybe_init_database_file, OpenFlags, Pager, Result, WalFileShared}; +use limbo_core::{ + maybe_init_database_file, Clock, Instant, OpenFlags, Pager, Result, WalFileShared, +}; use std::cell::RefCell; use std::rc::Rc; use std::sync::Arc; @@ -269,6 +271,18 @@ pub struct PlatformIO { unsafe impl Send for PlatformIO {} unsafe impl Sync for PlatformIO {} +impl Clock for PlatformIO { + fn now(&self) -> Instant { + let date = Date::new(); + let ms_since_epoch = date.getTime(); + + Instant { + secs: (ms_since_epoch / 1000.0) as i64, + micros: ((ms_since_epoch % 1000.0) * 1000.0) as u32, + } + } +} + impl limbo_core::IO for PlatformIO { fn open_file( &self, @@ -291,11 +305,6 @@ impl limbo_core::IO for PlatformIO { let random_f64 = Math_random(); (random_f64 * i64::MAX as f64) as i64 } - - fn get_current_time(&self) -> String { - let date = Date::new(); - date.toISOString() - } } #[wasm_bindgen] @@ -312,6 +321,9 @@ extern "C" { #[wasm_bindgen(method, getter)] fn toISOString(this: &Date) -> String; + + #[wasm_bindgen(method)] + fn getTime(this: &Date) -> f64; } pub struct DatabaseFile { diff --git a/simulator/runner/io.rs b/simulator/runner/io.rs index 48340d170..d1c280b4e 100644 --- a/simulator/runner/io.rs +++ b/simulator/runner/io.rs @@ -1,6 +1,6 @@ use std::{cell::RefCell, sync::Arc}; -use limbo_core::{OpenFlags, PlatformIO, Result, IO}; +use limbo_core::{Clock, Instant, OpenFlags, PlatformIO, Result, IO}; use rand::{RngCore, SeedableRng}; use rand_chacha::ChaCha8Rng; @@ -52,6 +52,15 @@ impl SimulatorIO { } } +impl Clock for SimulatorIO { + fn now(&self) -> Instant { + Instant { + secs: 1704067200, // 2024-01-01 00:00:00 UTC + micros: 0, + } + } +} + impl IO for SimulatorIO { fn open_file( &self, @@ -88,8 +97,4 @@ impl IO for SimulatorIO { fn generate_random_number(&self) -> i64 { self.rng.borrow_mut().next_u64() as i64 } - - fn get_current_time(&self) -> String { - "2024-01-01 00:00:00".to_string() - } } From aa7c64cb192f262d7a77cebf26bac817c0fe4647 Mon Sep 17 00:00:00 2001 From: Duncan Lutz Date: Sun, 6 Apr 2025 23:14:30 -0600 Subject: [PATCH 053/425] feat: added likely scalar function --- COMPAT.md | 2 +- core/function.rs | 3 +++ core/translate/expr.rs | 27 +++++++++++++++++++++++++ core/vdbe/execute.rs | 37 ++++++++++++++++++++++++++++++++++- testing/scalar-functions.test | 16 +++++++++++++++ 5 files changed, 83 insertions(+), 2 deletions(-) diff --git a/COMPAT.md b/COMPAT.md index b0cc85d83..f541c1f61 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -227,7 +227,7 @@ Feature support of [sqlite expr syntax](https://www.sqlite.org/lang_expr.html). | like(X,Y) | Yes | | | like(X,Y,Z) | Yes | | | likelihood(X,Y) | No | | -| likely(X) | No | | +| likely(X) | Yes | | | load_extension(X) | Yes | sqlite3 extensions not yet supported | | load_extension(X,Y) | No | | | lower(X) | Yes | | diff --git a/core/function.rs b/core/function.rs index 8f27b1a5b..4c235cca5 100644 --- a/core/function.rs +++ b/core/function.rs @@ -292,6 +292,7 @@ pub enum ScalarFunc { LoadExtension, StrfTime, Printf, + Likely, } impl Display for ScalarFunc { @@ -346,6 +347,7 @@ impl Display for ScalarFunc { Self::LoadExtension => "load_extension".to_string(), Self::StrfTime => "strftime".to_string(), Self::Printf => "printf".to_string(), + Self::Likely => "likely".to_string(), }; write!(f, "{}", str) } @@ -596,6 +598,7 @@ impl Func { "sqlite_version" => Ok(Self::Scalar(ScalarFunc::SqliteVersion)), "sqlite_source_id" => Ok(Self::Scalar(ScalarFunc::SqliteSourceId)), "replace" => Ok(Self::Scalar(ScalarFunc::Replace)), + "likely" => Ok(Self::Scalar(ScalarFunc::Likely)), #[cfg(feature = "json")] "json" => Ok(Self::Json(JsonFunc::Json)), #[cfg(feature = "json")] diff --git a/core/translate/expr.rs b/core/translate/expr.rs index cd384391a..7bb0dc228 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1571,6 +1571,33 @@ pub fn translate_expr( target_register, func_ctx, ), + ScalarFunc::Likely => { + let args = if let Some(args) = args { + if args.len() != 1 { + crate::bail_parse_error!( + "likely function must have exactly 1 argument", + ); + } + args + } else { + crate::bail_parse_error!("likely function with no arguments",); + }; + let start_reg = program.alloc_register(); + translate_and_mark( + program, + referenced_tables, + &args[0], + start_reg, + resolver, + )?; + program.emit_insn(Insn::Function { + constant_mask: 0, + start_reg, + dest: target_register, + func: func_ctx, + }); + Ok(target_register) + } } } Func::Math(math_func) => match math_func.arity() { diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index d2c636bae..09c283ecd 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -3448,6 +3448,11 @@ pub fn op_function( let result = exec_printf(&state.registers[*start_reg..*start_reg + arg_count])?; state.registers[*dest] = Register::OwnedValue(result); } + ScalarFunc::Likely => { + let value = &state.registers[*start_reg].borrow_mut(); + let result = exec_likely(value.get_owned_value()); + state.registers[*dest] = Register::OwnedValue(result); + } }, crate::function::Func::Vector(vector_func) => match vector_func { VectorFunc::Vector => { @@ -5220,9 +5225,16 @@ fn exec_math_log(arg: &OwnedValue, base: Option<&OwnedValue>) -> OwnedValue { OwnedValue::Float(result) } +fn exec_likely(reg: &OwnedValue) -> OwnedValue { + reg.clone() +} + #[cfg(test)] mod tests { - use crate::vdbe::{execute::exec_replace, Bitfield, Register}; + use crate::vdbe::{ + execute::{exec_likely, exec_replace}, + Bitfield, Register, + }; use super::{ exec_abs, exec_char, exec_hex, exec_if, exec_instr, exec_length, exec_like, exec_lower, @@ -6114,6 +6126,29 @@ mod tests { ); } + #[test] + fn test_likely() { + let input = OwnedValue::build_text("limbo"); + let expected = OwnedValue::build_text("limbo"); + assert_eq!(exec_likely(&input), expected); + + let input = OwnedValue::Integer(100); + let expected = OwnedValue::Integer(100); + assert_eq!(exec_likely(&input), expected); + + let input = OwnedValue::Float(12.34); + let expected = OwnedValue::Float(12.34); + assert_eq!(exec_likely(&input), expected); + + let input = OwnedValue::Null; + let expected = OwnedValue::Null; + assert_eq!(exec_likely(&input), expected); + + let input = OwnedValue::Blob(vec![1, 2, 3, 4]); + let expected = OwnedValue::Blob(vec![1, 2, 3, 4]); + assert_eq!(exec_likely(&input), expected); + } + #[test] fn test_bitfield() { let mut bitfield = Bitfield::<4>::new(); diff --git a/testing/scalar-functions.test b/testing/scalar-functions.test index 01feb7c1b..09e99a8f3 100755 --- a/testing/scalar-functions.test +++ b/testing/scalar-functions.test @@ -195,6 +195,22 @@ do_execsql_test hex-null { select hex(null) } {} +do_execsql_test likely { + select likely('limbo') +} {limbo} + +do_execsql_test likely-int { + select likely(100) +} {100} + +do_execsql_test likely-decimal { + select likely(12.34) +} {12.34} + +do_execsql_test likely-null { + select likely(NULL) +} {} + do_execsql_test unhex-str-ab { SELECT unhex('6162'); } {ab} From 3a1b87eb216f458f16112ffa84c57803e7bf6c90 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Mon, 7 Apr 2025 09:02:55 +0300 Subject: [PATCH 054/425] Update README.md --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index e23823c3e..d7619dab9 100644 --- a/README.md +++ b/README.md @@ -190,3 +190,11 @@ terms or conditions. [contribution guide]: https://github.com/tursodatabase/limbo/blob/main/CONTRIBUTING.md [MIT license]: https://github.com/tursodatabase/limbo/blob/main/LICENSE.md + +## Contributors + +Thanks to all the contributors to Limbo! + + + + From a72b75e193b3a8cd03f772f22e030c3c0c14d15c Mon Sep 17 00:00:00 2001 From: jachewz Date: Mon, 7 Apr 2025 21:45:49 +1000 Subject: [PATCH 055/425] fix: remainder operator rhs text --- core/vdbe/insn.rs | 7 +++++-- testing/math.test | 13 +++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 8d3a9afca..fa201079f 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -1001,9 +1001,12 @@ pub fn exec_remainder(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { &cast_text_to_numeric(lhs.as_str()), &cast_text_to_numeric(rhs.as_str()), ), - (OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => { + (OwnedValue::Text(text), other) => { exec_remainder(&cast_text_to_numeric(text.as_str()), other) } +(other, OwnedValue::Text(text)) => { + exec_remainder(other, &cast_text_to_numeric(text.as_str())) + } other => todo!("remainder not implemented for: {:?} {:?}", lhs, other), } } @@ -1699,7 +1702,7 @@ mod tests { ), ( OwnedValue::Float(12.0), - OwnedValue::Text(Text::from_str("12.0")), + OwnedValue::Text(Text::from_str("3.0")), ), ]; let outputs = vec![ diff --git a/testing/math.test b/testing/math.test index d1747f976..dcfaa97df 100755 --- a/testing/math.test +++ b/testing/math.test @@ -1357,6 +1357,19 @@ do_execsql_test mod-agg-float { SELECT count(*) % 2.43 from users } { 0.0 } +foreach {testnum lhs rhs ans} { + 1 'a' 'a' {} + 2 'a' 10 0 + 3 10 'a' {} + 4 'a' 11.0 0.0 + 5 11.0 'a' {} + 7 '10' '3' 1 + 8 '10.0' '3' 1.0 + 9 '10.0' -3 1.0 +} { + do_execsql_test mod-text-$testnum "SELECT $lhs % $rhs" $::ans +} + do_execsql_test comp-float-float { SELECT 0.0 = 0.0 } { 1 } From db15661b7ef9db6c1db1134fb7d876ab28345d6b Mon Sep 17 00:00:00 2001 From: jachewz Date: Mon, 7 Apr 2025 21:49:09 +1000 Subject: [PATCH 056/425] fix: i64 % -1 overflow panic --- core/vdbe/insn.rs | 20 +++++++++++++++----- testing/math.test | 8 ++++++++ 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index fa201079f..7fffb9b22 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -971,7 +971,7 @@ pub fn exec_remainder(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { if rhs == &0 { OwnedValue::Null } else { - OwnedValue::Integer(lhs % rhs) + OwnedValue::Integer(lhs % rhs.abs()) } } (OwnedValue::Float(lhs), OwnedValue::Float(rhs)) => { @@ -979,14 +979,14 @@ pub fn exec_remainder(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { if rhs_int == 0 { OwnedValue::Null } else { - OwnedValue::Float(((*lhs as i64) % rhs_int) as f64) + OwnedValue::Float(((*lhs as i64) % rhs_int.abs()) as f64) } } (OwnedValue::Float(lhs), OwnedValue::Integer(rhs)) => { if rhs == &0 { OwnedValue::Null } else { - OwnedValue::Float(((*lhs as i64) % rhs) as f64) + OwnedValue::Float(((*lhs as i64) % rhs.abs()) as f64) } } (OwnedValue::Integer(lhs), OwnedValue::Float(rhs)) => { @@ -994,7 +994,7 @@ pub fn exec_remainder(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { if rhs_int == 0 { OwnedValue::Null } else { - OwnedValue::Float((lhs % rhs_int) as f64) + OwnedValue::Float((lhs % rhs_int.abs()) as f64) } } (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_remainder( @@ -1004,7 +1004,7 @@ pub fn exec_remainder(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { (OwnedValue::Text(text), other) => { exec_remainder(&cast_text_to_numeric(text.as_str()), other) } -(other, OwnedValue::Text(text)) => { + (other, OwnedValue::Text(text)) => { exec_remainder(other, &cast_text_to_numeric(text.as_str())) } other => todo!("remainder not implemented for: {:?} {:?}", lhs, other), @@ -1688,10 +1688,15 @@ mod tests { (OwnedValue::Float(12.0), OwnedValue::Float(0.0)), (OwnedValue::Float(12.0), OwnedValue::Integer(0)), (OwnedValue::Integer(12), OwnedValue::Float(0.0)), + (OwnedValue::Integer(i64::MIN), OwnedValue::Integer(-1)), (OwnedValue::Integer(12), OwnedValue::Integer(3)), (OwnedValue::Float(12.0), OwnedValue::Float(3.0)), (OwnedValue::Float(12.0), OwnedValue::Integer(3)), (OwnedValue::Integer(12), OwnedValue::Float(3.0)), + (OwnedValue::Integer(12), OwnedValue::Integer(-3)), + (OwnedValue::Float(12.0), OwnedValue::Float(-3.0)), + (OwnedValue::Float(12.0), OwnedValue::Integer(-3)), + (OwnedValue::Integer(12), OwnedValue::Float(-3.0)), ( OwnedValue::Text(Text::from_str("12.0")), OwnedValue::Text(Text::from_str("3.0")), @@ -1716,6 +1721,11 @@ mod tests { OwnedValue::Null, OwnedValue::Null, OwnedValue::Null, + OwnedValue::Float(0.0), + OwnedValue::Integer(0), + OwnedValue::Float(0.0), + OwnedValue::Float(0.0), + OwnedValue::Float(0.0), OwnedValue::Integer(0), OwnedValue::Float(0.0), OwnedValue::Float(0.0), diff --git a/testing/math.test b/testing/math.test index dcfaa97df..51f1df56a 100755 --- a/testing/math.test +++ b/testing/math.test @@ -1370,6 +1370,14 @@ foreach {testnum lhs rhs ans} { do_execsql_test mod-text-$testnum "SELECT $lhs % $rhs" $::ans } +foreach {testnum lhs rhs ans} { + 1 '-9223372036854775808' '-1' 0 + 2 -9223372036854775808 -1 0 + 3 -9223372036854775809 -1 0.0 +} { + do_execsql_test mod-overflow-$testnum "SELECT $lhs % $rhs" $::ans +} + do_execsql_test comp-float-float { SELECT 0.0 = 0.0 } { 1 } From ebf467d04e98c2331ac0e06d50bf4d115cc1f985 Mon Sep 17 00:00:00 2001 From: jachewz Date: Mon, 7 Apr 2025 21:58:03 +1000 Subject: [PATCH 057/425] rename math.test % operator tests from mod- to remainder- --- testing/math.test | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/testing/math.test b/testing/math.test index 51f1df56a..afa0e29d1 100755 --- a/testing/math.test +++ b/testing/math.test @@ -1309,51 +1309,51 @@ do_execsql_test log-int-null { SELECT log(5, null) } {} -do_execsql_test mod-int-null { +do_execsql_test remainder-int-null { SELECT 183 % null } {} -do_execsql_test mod-int-0 { +do_execsql_test remainder-int-0 { SELECT 183 % 0 } {} -do_execsql_test mod-int-int { +do_execsql_test remainder-int-int { SELECT 183 % 10 } { 3 } -do_execsql_test mod-int-float { +do_execsql_test remainder-int-float { SELECT 38 % 10.35 } { 8.0 } -do_execsql_test mod-float-int { +do_execsql_test remainder-float-int { SELECT 38.43 % 13 } { 12.0 } -do_execsql_test mod-0-float { +do_execsql_test remainder-0-float { SELECT 0 % 12.0 } { 0.0 } -do_execsql_test mod-float-0 { +do_execsql_test remainder-float-0 { SELECT 23.14 % 0 } {} -do_execsql_test mod-float-float { +do_execsql_test remainder-float-float { SELECT 23.14 % 12.0 } { 11.0 } -do_execsql_test mod-float-agg { +do_execsql_test remainder-float-agg { SELECT 23.14 % sum(id) from products } { 23.0 } -do_execsql_test mod-int-agg { +do_execsql_test remainder-int-agg { SELECT 17 % sum(id) from users } { 17 } -do_execsql_test mod-agg-int { +do_execsql_test remainder-agg-int { SELECT count(*) % 17 from users } { 4 } -do_execsql_test mod-agg-float { +do_execsql_test remainder-agg-float { SELECT count(*) % 2.43 from users } { 0.0 } @@ -1367,7 +1367,7 @@ foreach {testnum lhs rhs ans} { 8 '10.0' '3' 1.0 9 '10.0' -3 1.0 } { - do_execsql_test mod-text-$testnum "SELECT $lhs % $rhs" $::ans + do_execsql_test remainder-text-$testnum "SELECT $lhs % $rhs" $::ans } foreach {testnum lhs rhs ans} { @@ -1375,7 +1375,7 @@ foreach {testnum lhs rhs ans} { 2 -9223372036854775808 -1 0 3 -9223372036854775809 -1 0.0 } { - do_execsql_test mod-overflow-$testnum "SELECT $lhs % $rhs" $::ans + do_execsql_test remainder-overflow-$testnum "SELECT $lhs % $rhs" $::ans } do_execsql_test comp-float-float { From 15ed7642c9ff2629549145ecdd5a35002c9ebc98 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Fri, 4 Apr 2025 15:51:52 +0200 Subject: [PATCH 058/425] check all keys are present on every insert with fuzz test Let's make sure every insert does still contain all keys. Previously we did this at the end but it made it hard to debug issues that `validate_btree` might not encounter. --- core/storage/btree.rs | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 789f9cb39..3328421f4 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -2,7 +2,8 @@ use tracing::debug; use crate::storage::pager::Pager; use crate::storage::sqlite3_ondisk::{ - read_u32, read_varint, BTreeCell, PageContent, PageType, TableInteriorCell, TableLeafCell, + read_btree_cell, read_u32, read_varint, BTreeCell, PageContent, PageType, TableInteriorCell, + TableLeafCell, }; use crate::MvCursor; @@ -607,7 +608,7 @@ impl BTreeCursor { } None => { if has_parent { - debug!("moving simple upwards"); + tracing::trace!("moving simple upwards"); self.going_upwards = true; self.stack.pop(); continue; @@ -955,7 +956,7 @@ impl BTreeCursor { pub fn move_to(&mut self, key: SeekKey<'_>, cmp: SeekOp) -> Result> { assert!(self.mv_cursor.is_none()); - tracing::debug!("move_to(key={:?} cmp={:?})", key, cmp); + tracing::trace!("move_to(key={:?} cmp={:?})", key, cmp); // For a table with N rows, we can find any row by row id in O(log(N)) time by starting at the root page and following the B-tree pointers. // B-trees consist of interior pages and leaf pages. Interior pages contain pointers to other pages, while leaf pages contain the actual row data. // @@ -4288,11 +4289,24 @@ mod tests { let value = ImmutableRecord::from_registers(&[Register::OwnedValue( OwnedValue::Blob(vec![0; size]), )]); + let btree_before = format_btree(pager.clone(), root_page, 0); run_until_done( || cursor.insert(&BTreeKey::new_table_rowid(key as u64, Some(&value)), true), pager.deref(), ) .unwrap(); + keys.sort(); + cursor.move_to_root(); + for key in keys.iter() { + tracing::trace!("seeking key: {}", key); + run_until_done(|| cursor.next(), pager.deref()).unwrap(); + let cursor_rowid = cursor.rowid().unwrap().unwrap(); + assert_eq!( + *key as u64, cursor_rowid, + "key {} is not found, got {}", + key, cursor_rowid + ); + } if matches!(validate_btree(pager.clone(), root_page), (_, false)) { panic!("invalid btree"); } @@ -4304,13 +4318,18 @@ mod tests { if matches!(validate_btree(pager.clone(), root_page), (_, false)) { panic!("invalid btree"); } + keys.sort(); + cursor.move_to_root(); for key in keys.iter() { let seek_key = SeekKey::TableRowId(*key as u64); - tracing::debug!("seeking key: {}", key); - let found = - run_until_done(|| cursor.seek(seek_key.clone(), SeekOp::EQ), pager.deref()) - .unwrap(); - assert!(found, "key {} is not found", key); + tracing::trace!("seeking key: {}", key); + run_until_done(|| cursor.next(), pager.deref()).unwrap(); + let cursor_rowid = cursor.rowid().unwrap().unwrap(); + assert_eq!( + *key as u64, cursor_rowid, + "key {} is not found, got {}", + key, cursor_rowid + ); } } } From f4920cb96b844c3b60b271f179efcbc71b604696 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Fri, 4 Apr 2025 15:52:17 +0200 Subject: [PATCH 059/425] assert new divider cell points to the correct place --- core/storage/btree.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 3328421f4..c48f26e53 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1833,6 +1833,15 @@ impl BTreeCursor { // Leaf index new_divider_cell.extend_from_slice(divider_cell); } + + let left_pointer = read_u32(&new_divider_cell[..4], 0); + assert_eq!(left_pointer, page.get().id as u32); + // FIXME: remove this lock + assert!( + left_pointer <= self.pager.db_header.lock().database_size, + "invalid page number divider left pointer {} > database number of pages", + left_pointer, + ); // FIXME: defragment shouldn't be needed defragment_page(parent_contents, self.usable_space() as u16); insert_into_cell( From 9eb9e7021e57ca6412c465db197e132c61bf7ec3 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Fri, 4 Apr 2025 15:52:35 +0200 Subject: [PATCH 060/425] Fix index table new divider cell pointer --- core/storage/btree.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index c48f26e53..80e1c522e 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1817,8 +1817,15 @@ impl BTreeCursor { page.get_contents() .write_u32(PAGE_HEADER_OFFSET_RIGHTMOST_PTR, previous_pointer_divider); // divider cell now points to this page - divider_cell[0..4].copy_from_slice(&(page.get().id as u32).to_be_bytes()); - new_divider_cell.extend_from_slice(divider_cell); + new_divider_cell.extend_from_slice(&(page.get().id as u32).to_be_bytes()); + // now copy the rest of the divider cell: + // Table Interior page: + // * varint rowid + // Index Interior page: + // * varint payload size + // * payload + // * first overflow page (u32 optional) + new_divider_cell.extend_from_slice(÷r_cell[4..]); } else if leaf_data { // Leaf table // FIXME: not needed conversion From ff8ec5455cf2191b4bf26446aeeddc4c5b0604db Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Sat, 5 Apr 2025 23:45:18 +0200 Subject: [PATCH 061/425] fix divider cell selection --- core/storage/btree.rs | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 80e1c522e..bc6889082 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1479,12 +1479,14 @@ impl BTreeCursor { self.pager.add_dirty(sibling_page.get().id); max_cells += sibling_contents.cell_count(); max_cells += sibling_contents.overflow_cells.len(); - if i == 0 { - // we don't have left sibling from this one so we break - break; + + // Right pointer is not dropped, we simply update it at the end. This could be a divider cell that points + // to the last page in the list of pages to balance or this could be the rightmost pointer that points to a page. + if i == balance_info.sibling_count - 1 { + continue; } // Since we know we have a left sibling, take the divider that points to left sibling of this page - let cell_idx = balance_info.first_divider_cell + i - 1; + let cell_idx = balance_info.first_divider_cell + i; let (cell_start, cell_len) = parent_contents.cell_get_raw_region( cell_idx, payload_overflow_threshold_max( @@ -1781,6 +1783,9 @@ impl BTreeCursor { } // Write right pointer in parent page to point to new rightmost page + // Write right pointer in parent page to point to new rightmost page. keep in mind + // we update rightmost pointer first because inserting cells could defragment parent page, + // therfore invalidating the pointer. let right_page_id = pages_to_balance_new.last().unwrap().get().id as u32; let rightmost_pointer = balance_info.rightmost_pointer; let rightmost_pointer = @@ -1850,7 +1855,7 @@ impl BTreeCursor { left_pointer, ); // FIXME: defragment shouldn't be needed - defragment_page(parent_contents, self.usable_space() as u16); + // defragment_page(parent_contents, self.usable_space() as u16); insert_into_cell( parent_contents, &new_divider_cell, @@ -1875,6 +1880,7 @@ impl BTreeCursor { (0, 0, cell_array.cell_count(0)) } else { let this_was_old_page = page_idx < balance_info.sibling_count; + // We add !leaf_data because we want to skip 1 in case of divider cell which is encountared between pages assigned let start_old_cells = if this_was_old_page { count_cells_in_old_pages[page_idx - 1] as usize + (!leaf_data) as usize From 0541da46df97f7c75832926e8a710bb6c6e4dbeb Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Sat, 5 Apr 2025 23:45:35 +0200 Subject: [PATCH 062/425] add strict btree validation after non root balancing in debug mode --- core/storage/btree.rs | 405 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 386 insertions(+), 19 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index bc6889082..2148b66b4 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -14,6 +14,7 @@ use crate::{return_corrupt, LimboError, Result}; use std::cell::{Cell, Ref, RefCell}; use std::cmp::Ordering; +use std::collections::HashSet; use std::pin::Pin; use std::rc::Rc; @@ -1503,6 +1504,13 @@ impl BTreeCursor { let cell_buf = &buf[cell_start..cell_start + cell_len]; max_cells += 1; + tracing::debug!( + "balance_non_root(drop_divider_cell, first_divider_cell={}, divider_cell={}, left_pointer={})", + balance_info.first_divider_cell, + i, + read_u32(cell_buf, 0) + ); + // TODO(pere): make this reference and not copy balance_info.divider_cells.push(cell_buf.to_vec()); tracing::trace!( @@ -1589,6 +1597,17 @@ impl BTreeCursor { cells_capacity_start, "calculation of max cells was wrong" ); + + // Let's copy all cells for later checks + #[cfg(debug_assertions)] + let mut cells_debug = Vec::new(); + #[cfg(debug_assertions)] + { + for cell in &cell_array.cells { + cells_debug.push(cell.to_vec()); + } + } + #[cfg(debug_assertions)] { for cell in &cell_array.cells { @@ -1602,7 +1621,7 @@ impl BTreeCursor { // calculate how many pages to allocate let mut new_page_sizes = Vec::new(); let leaf_correction = if leaf { 4 } else { 0 }; - // number of bytes beyond header, different from global usableSapce which inccludes + // number of bytes beyond header, different from global usableSapce which includes // header let usable_space = self.usable_space() - 12 + leaf_correction; for i in 0..balance_info.sibling_count { @@ -1782,7 +1801,19 @@ impl BTreeCursor { } } - // Write right pointer in parent page to point to new rightmost page + #[cfg(debug_assertions)] + { + tracing::debug!("balance_non_root(parent page_id={})", parent_page.get().id); + for page in &pages_to_balance_new { + tracing::debug!("balance_non_root(new_sibling page_id={})", page.get().id); + } + } + + // pages_pointed_to helps us debug we did in fact create divider cells to all the new pages and the rightmost pointer, + // also points to the last page. + #[cfg(debug_assertions)] + let mut pages_pointed_to = HashSet::new(); + // Write right pointer in parent page to point to new rightmost page. keep in mind // we update rightmost pointer first because inserting cells could defragment parent page, // therfore invalidating the pointer. @@ -1792,6 +1823,13 @@ impl BTreeCursor { unsafe { std::slice::from_raw_parts_mut(rightmost_pointer, 4) }; rightmost_pointer[0..4].copy_from_slice(&right_page_id.to_be_bytes()); + #[cfg(debug_assertions)] + pages_pointed_to.insert(right_page_id); + tracing::debug!( + "balance_non_root(rightmost_pointer_update, rightmost_pointer={})", + right_page_id + ); + // Ensure right-child pointer of the right-most new sibling pge points to the page // that was originally on that place. let is_leaf_page = matches!(page_type, PageType::TableLeaf | PageType::IndexLeaf); @@ -1847,6 +1885,14 @@ impl BTreeCursor { } let left_pointer = read_u32(&new_divider_cell[..4], 0); + #[cfg(debug_assertions)] + pages_pointed_to.insert(left_pointer); + tracing::debug!( + "balance_non_root(insert_divider_cell, first_divider_cell={}, divider_cell={}, left_pointer={})", + balance_info.first_divider_cell, + i, + left_pointer + ); assert_eq!(left_pointer, page.get().id as u32); // FIXME: remove this lock assert!( @@ -1863,6 +1909,57 @@ impl BTreeCursor { self.usable_space() as u16, ) .unwrap(); + #[cfg(debug_assertions)] + { + let left_pointer = if parent_contents.overflow_cells.len() == 0 { + let (cell_start, cell_len) = parent_contents.cell_get_raw_region( + balance_info.first_divider_cell + i, + payload_overflow_threshold_max( + parent_contents.page_type(), + self.usable_space() as u16, + ), + payload_overflow_threshold_min( + parent_contents.page_type(), + self.usable_space() as u16, + ), + self.usable_space(), + ); + tracing::debug!( + "balance_non_root(cell_start={}, cell_len={})", + cell_start, + cell_len + ); + + let left_pointer = read_u32( + &parent_contents.as_ptr()[cell_start..cell_start + cell_len], + 0, + ); + left_pointer + } else { + let cell = &parent_contents.overflow_cells[0]; + assert_eq!(cell.index, balance_info.first_divider_cell + i); + read_u32(&cell.payload, 0) + }; + assert_eq!(left_pointer, page.get().id as u32, "the cell we just inserted doesn't point to the correct page. points to {}, should point to {}", + left_pointer, + page.get().id as u32 + ); + } + } + tracing::debug!( + "balance_non_root(parent_overflow={})", + parent_contents.overflow_cells.len() + ); + + #[cfg(debug_assertions)] + { + for page in &pages_to_balance_new { + assert!( + pages_pointed_to.contains(&(page.get().id as u32)), + "page {} not pointed to by divider cell or rightmost pointer", + page.get().id + ); + } } // TODO: update pages let mut done = vec![false; sibling_count_new]; @@ -1917,6 +2014,18 @@ impl BTreeCursor { done[page_idx] = true; } } + + #[cfg(debug_assertions)] + self.post_balance_non_root_validation( + balance_info, + parent_contents, + pages_to_balance_new, + page_type, + leaf_data, + cells_debug, + sibling_count_new, + rightmost_pointer, + ); // TODO: balance root // TODO: free pages (WriteState::BalanceStart, Ok(CursorResult::Ok(()))) @@ -1932,6 +2041,240 @@ impl BTreeCursor { result } + fn post_balance_non_root_validation( + &self, + balance_info: &mut BalanceInfo, + parent_contents: &mut PageContent, + pages_to_balance_new: Vec>, + page_type: PageType, + leaf_data: bool, + mut cells_debug: Vec>, + sibling_count_new: usize, + rightmost_pointer: &mut [u8], + ) { + let mut valid = true; + let mut current_index_cell = 0; + // Let's now make a in depth check that we in fact added all possible cells somewhere and they are not lost + for (page_idx, page) in pages_to_balance_new.iter().enumerate() { + let contents = page.get_contents(); + // Cells are distributed in order + for cell_idx in 0..contents.cell_count() { + let (cell_start, cell_len) = contents.cell_get_raw_region( + cell_idx, + payload_overflow_threshold_max( + contents.page_type(), + self.usable_space() as u16, + ), + payload_overflow_threshold_min( + contents.page_type(), + self.usable_space() as u16, + ), + self.usable_space(), + ); + let buf = contents.as_ptr(); + let cell_buf = &buf[cell_start..cell_start + cell_len]; + let cell_buf_in_array = &cells_debug[current_index_cell]; + if cell_buf != cell_buf_in_array { + tracing::error!("balance_non_root(cell_not_found_debug, page_id={}, cell_in_cell_array_idx={})", + page.get().id, + current_index_cell, + ); + valid = false; + } + current_index_cell += 1; + } + // Now check divider cells and their pointers. + let parent_buf = parent_contents.as_ptr(); + let cell_divider_idx = balance_info.first_divider_cell + page_idx; + if page_idx == sibling_count_new - 1 { + // We will only validate rightmost pointer of parent page, we will not validate rightmost if it's a cell and not the last pointer because, + // insert cell could've defragmented the page and invalidated the pointer. + // right pointer, we just check right pointer points to this page. + if cell_divider_idx == parent_contents.cell_count() { + let rightmost = read_u32(rightmost_pointer, 0); + if rightmost != page.get().id as u32 { + tracing::error!("balance_non_root(cell_divider_right_pointer, should point to {}, but points to {})", + page.get().id, + rightmost + ); + valid = false; + } + } + } else { + // divider cell might be an overflow cell + let mut was_overflow = false; + for overflow_cell in &parent_contents.overflow_cells { + if overflow_cell.index == cell_divider_idx { + let left_pointer = read_u32(&overflow_cell.payload, 0); + if left_pointer != page.get().id as u32 { + tracing::error!("balance_non_root(cell_divider_left_pointer_overflow, should point to page_id={}, but points to {}, divider_cell={}, overflow_cells_parent={})", + page.get().id, + left_pointer, + page_idx, + parent_contents.overflow_cells.len() + ); + valid = false; + } + was_overflow = true; + break; + } + } + if was_overflow { + continue; + } + // check if overflow + // check if right pointer, this is the last page. Do we update rightmost pointer and defragment moves it? + let (cell_start, cell_len) = parent_contents.cell_get_raw_region( + cell_divider_idx, + payload_overflow_threshold_max( + parent_contents.page_type(), + self.usable_space() as u16, + ), + payload_overflow_threshold_min( + parent_contents.page_type(), + self.usable_space() as u16, + ), + self.usable_space(), + ); + let cell_left_pointer = read_u32(&parent_buf[cell_start..cell_start + cell_len], 0); + if cell_left_pointer != page.get().id as u32 { + tracing::error!("balance_non_root(cell_divider_left_pointer, should point to page_id={}, but points to {}, divider_cell={}, overflow_cells_parent={})", + page.get().id, + cell_left_pointer, + page_idx, + parent_contents.overflow_cells.len() + ); + valid = false; + } + if leaf_data { + // If we are in a table leaf page, we just need to check that this cell that should be a divider cell is in the parent + // This means we already check cell in leaf pages but not on parent so we don't advance current_index_cell + if page_idx >= balance_info.sibling_count - 1 { + // This means we are in the last page and we don't need to check anything + continue; + } + let cell_buf: &'static mut [u8] = + to_static_buf(&mut cells_debug[current_index_cell - 1]); + let cell = read_btree_cell( + cell_buf, + &page_type, + 0, + payload_overflow_threshold_max( + parent_contents.page_type(), + self.usable_space() as u16, + ), + payload_overflow_threshold_min( + parent_contents.page_type(), + self.usable_space() as u16, + ), + self.usable_space(), + ) + .unwrap(); + let parent_cell = parent_contents + .cell_get( + cell_divider_idx, + payload_overflow_threshold_max( + parent_contents.page_type(), + self.usable_space() as u16, + ), + payload_overflow_threshold_min( + parent_contents.page_type(), + self.usable_space() as u16, + ), + self.usable_space(), + ) + .unwrap(); + let rowid = match cell { + BTreeCell::TableLeafCell(table_leaf_cell) => table_leaf_cell._rowid, + _ => unreachable!(), + }; + let rowid_parent = match parent_cell { + BTreeCell::TableInteriorCell(table_interior_cell) => { + table_interior_cell._rowid + } + _ => unreachable!(), + }; + if rowid_parent != rowid { + tracing::error!("balance_non_root(cell_divider_rowid, page_id={}, cell_divider_idx={}, rowid_parent={}, rowid={})", + page.get().id, + cell_divider_idx, + rowid_parent, + rowid + ); + valid = false; + } + } else { + // In any other case, we need to check that this cell was moved to parent as divider cell + let mut was_overflow = false; + for overflow_cell in &parent_contents.overflow_cells { + if overflow_cell.index == cell_divider_idx { + let left_pointer = read_u32(&overflow_cell.payload, 0); + if left_pointer != page.get().id as u32 { + tracing::error!("balance_non_root(cell_divider_divider_cell_overflow should point to page_id={}, but points to {}, divider_cell={}, overflow_cells_parent={})", + page.get().id, + left_pointer, + page_idx, + parent_contents.overflow_cells.len() + ); + valid = false; + } + was_overflow = true; + break; + } + } + if was_overflow { + continue; + } + let (parent_cell_start, parent_cell_len) = parent_contents.cell_get_raw_region( + cell_divider_idx, + payload_overflow_threshold_max( + parent_contents.page_type(), + self.usable_space() as u16, + ), + payload_overflow_threshold_min( + parent_contents.page_type(), + self.usable_space() as u16, + ), + self.usable_space(), + ); + let cell_buf_in_array = &cells_debug[current_index_cell]; + let left_pointer = read_u32( + &parent_buf[parent_cell_start..parent_cell_start + parent_cell_len], + 0, + ); + if left_pointer != page.get().id as u32 { + tracing::error!("balance_non_root(divider_cell_left_pointer_interior should point to page_id={}, but points to {}, divider_cell={}, overflow_cells_parent={})", + page.get().id, + left_pointer, + page_idx, + parent_contents.overflow_cells.len() + ); + valid = false; + } + match page_type { + PageType::TableInterior | PageType::IndexInterior => { + let parent_cell_buf = + &parent_buf[parent_cell_start..parent_cell_start + parent_cell_len]; + if parent_cell_buf[4..] != cell_buf_in_array[4..] { + tracing::error!("balance_non_root(cell_divider_cell, page_id={}, cell_divider_idx={})", + page.get().id, + cell_divider_idx, + ); + valid = false; + } + } + PageType::IndexLeaf => todo!(), + _ => { + unreachable!() + } + } + current_index_cell += 1; + } + } + } + assert!(valid, "corrupted database, cells were to balanced properly"); + } + /// Balance the root page. /// This is done when the root page overflows, and we need to create a new root page. /// See e.g. https://en.wikipedia.org/wiki/B-tree @@ -3156,7 +3499,7 @@ fn edit_page( cell_array: &CellArray, usable_space: u16, ) -> Result<()> { - tracing::trace!( + tracing::debug!( "edit_page start_old_cells={} start_new_cells={} number_new_cells={} cell_array={}", start_old_cells, start_new_cells, @@ -3466,7 +3809,6 @@ fn defragment_page(page: &PageContent, usable_space: u16) { /// Only enabled in debug mode, where we ensure that all cells are valid. fn debug_validate_cells_core(page: &PageContent, usable_space: u16) { for i in 0..page.cell_count() { - // println!("Debug function: i={}", i); let (offset, size) = page.cell_get_raw_region( i, payload_overflow_threshold_max(page.page_type(), usable_space), @@ -3510,7 +3852,7 @@ fn insert_into_cell( } let new_cell_data_pointer = allocate_cell_space(page, payload.len() as u16, usable_space)?; - tracing::trace!( + tracing::debug!( "insert_into_cell(idx={}, pc={})", cell_idx, new_cell_data_pointer @@ -3838,6 +4180,8 @@ fn shift_pointers_left(page: &mut PageContent, cell_idx: usize) { #[cfg(test)] mod tests { + use rand::thread_rng; + use rand::Rng; use rand_chacha::rand_core::RngCore; use rand_chacha::rand_core::SeedableRng; use rand_chacha::ChaCha8Rng; @@ -4032,6 +4376,14 @@ mod tests { _left_child_page, .. }) => { child_pages.push(pager.read_page(_left_child_page as usize).unwrap()); + if _left_child_page == page.id as u32 { + valid = false; + tracing::error!( + "left child page is the same as parent {}", + _left_child_page + ); + continue; + } let (child_depth, child_valid) = validate_btree(pager.clone(), _left_child_page as usize); valid &= child_valid; @@ -4039,6 +4391,10 @@ mod tests { } _ => panic!("unsupported btree cell: {:?}", cell), }; + if current_depth >= 100 { + tracing::error!("depth is too big"); + return (100, false); + } depth = Some(depth.unwrap_or(current_depth + 1)); if depth != Some(current_depth + 1) { tracing::error!("depth is different for child of page {}", page_idx); @@ -4275,6 +4631,7 @@ mod tests { let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); let mut keys = Vec::new(); let seed = rng.next_u64(); + let seed = 1743883058; tracing::info!("seed: {}", seed); let mut rng = ChaCha8Rng::seed_from_u64(seed); for insert_id in 0..inserts { @@ -4319,17 +4676,22 @@ mod tests { .unwrap(); keys.sort(); cursor.move_to_root(); + let mut valid = true; for key in keys.iter() { tracing::trace!("seeking key: {}", key); run_until_done(|| cursor.next(), pager.deref()).unwrap(); let cursor_rowid = cursor.rowid().unwrap().unwrap(); - assert_eq!( - *key as u64, cursor_rowid, - "key {} is not found, got {}", - key, cursor_rowid - ); + if *key as u64 != cursor_rowid { + valid = false; + println!("key {} is not found, got {}", key, cursor_rowid); + break; + } } - if matches!(validate_btree(pager.clone(), root_page), (_, false)) { + // let's validate btree too so that we undertsand where the btree failed + if matches!(validate_btree(pager.clone(), root_page), (_, false)) || !valid { + let btree_after = format_btree(pager.clone(), root_page, 0); + println!("btree before:\n{}", btree_before); + println!("btree after:\n{}", btree_after); panic!("invalid btree"); } } @@ -4417,7 +4779,7 @@ mod tests { #[test] #[ignore] pub fn btree_insert_fuzz_run_small() { - btree_insert_fuzz_run(1, 1024, |rng| (rng.next_u32() % 128) as usize); + btree_insert_fuzz_run(1, 100_000, |rng| (rng.next_u32() % 128) as usize); } #[test] @@ -4816,15 +5178,13 @@ mod tests { let mut total_size = 0; let mut cells = Vec::new(); let usable_space = 4096; - let mut i = 1000; - // let seed = thread_rng().gen(); - // let seed = 15292777653676891381; - let seed = 9261043168681395159; + let mut i = 100000; + let seed = thread_rng().gen(); tracing::info!("seed {}", seed); let mut rng = ChaCha8Rng::seed_from_u64(seed); while i > 0 { i -= 1; - match rng.next_u64() % 3 { + match rng.next_u64() % 4 { 0 => { // allow appends with extra place to insert let cell_idx = rng.next_u64() as usize % (page.cell_count() + 1); @@ -4841,14 +5201,14 @@ mod tests { 4096, conn.pager.clone(), ); - if (free as usize) < payload.len() - 2 { + if (free as usize) < payload.len() + 2 { // do not try to insert overflow pages because they require balancing continue; } insert_into_cell(page, &payload, cell_idx, 4096).unwrap(); assert!(page.overflow_cells.is_empty()); total_size += payload.len() as u16 + 2; - cells.push(Cell { pos: i, payload }); + cells.insert(cell_idx, Cell { pos: i, payload }); } 1 => { if page.cell_count() == 0 { @@ -4868,6 +5228,13 @@ mod tests { 2 => { defragment_page(page, usable_space); } + 3 => { + // check cells + for (i, cell) in cells.iter().enumerate() { + ensure_cell(page, i, &cell.payload); + } + assert_eq!(page.cell_count(), cells.len()); + } _ => unreachable!(), } let free = compute_free_space(page, usable_space); From 6ac2368ae279f05bf89db068a941e6c68a65ed43 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Mon, 7 Apr 2025 17:53:06 +0200 Subject: [PATCH 063/425] update divider cell that is being balanced --- core/storage/btree.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 2148b66b4..29a4b9a4e 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1587,6 +1587,11 @@ impl BTreeCursor { // TODO(pere): in case of old pages are leaf pages, so index leaf page, we need to strip page pointers // from divider cells in index interior pages (parent) because those should not be included. cells_inserted += 1; + if !leaf { + // This divider cell needs to be updated with new left pointer, + let right_pointer = old_page_contents.rightmost_pointer().unwrap(); + divider_cell[..4].copy_from_slice(&right_pointer.to_be_bytes()); + } cell_array.cells.push(to_static_buf(divider_cell.as_mut())); } total_cells_inserted += cells_inserted; @@ -1885,6 +1890,7 @@ impl BTreeCursor { } let left_pointer = read_u32(&new_divider_cell[..4], 0); + assert!(left_pointer != parent_page.get().id as u32); #[cfg(debug_assertions)] pages_pointed_to.insert(left_pointer); tracing::debug!( From f137ddfdf8ec233d2214d07d5fea11c1c3312994 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Mon, 7 Apr 2025 17:55:50 +0200 Subject: [PATCH 064/425] add loop left pointer validation --- core/storage/btree.rs | 108 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 103 insertions(+), 5 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 29a4b9a4e..8bde08311 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -2,8 +2,7 @@ use tracing::debug; use crate::storage::pager::Pager; use crate::storage::sqlite3_ondisk::{ - read_btree_cell, read_u32, read_varint, BTreeCell, PageContent, PageType, TableInteriorCell, - TableLeafCell, + read_u32, read_varint, BTreeCell, PageContent, PageType, TableInteriorCell, TableLeafCell, }; use crate::MvCursor; @@ -14,6 +13,7 @@ use crate::{return_corrupt, LimboError, Result}; use std::cell::{Cell, Ref, RefCell}; use std::cmp::Ordering; +#[cfg(debug_assertions)] use std::collections::HashSet; use std::pin::Pin; use std::rc::Rc; @@ -2023,6 +2023,7 @@ impl BTreeCursor { #[cfg(debug_assertions)] self.post_balance_non_root_validation( + &parent_page, balance_info, parent_contents, pages_to_balance_new, @@ -2047,8 +2048,10 @@ impl BTreeCursor { result } + #[cfg(debug_assertions)] fn post_balance_non_root_validation( &self, + parent_page: &PageRef, balance_info: &mut BalanceInfo, parent_contents: &mut PageContent, pages_to_balance_new: Vec>, @@ -2060,6 +2063,45 @@ impl BTreeCursor { ) { let mut valid = true; let mut current_index_cell = 0; + for cell_idx in 0..parent_contents.cell_count() { + let cell = parent_contents + .cell_get( + cell_idx, + payload_overflow_threshold_max( + parent_contents.page_type(), + self.usable_space() as u16, + ), + payload_overflow_threshold_min( + parent_contents.page_type(), + self.usable_space() as u16, + ), + self.usable_space(), + ) + .unwrap(); + match cell { + BTreeCell::TableInteriorCell(table_interior_cell) => { + let left_child_page = table_interior_cell._left_child_page; + if left_child_page == parent_page.get().id as u32 { + tracing::error!("balance_non_root(parent_divider_points_to_same_page, page_id={}, cell_left_child_page={})", + parent_page.get().id, + left_child_page, + ); + valid = false; + } + } + BTreeCell::IndexInteriorCell(index_interior_cell) => { + let left_child_page = index_interior_cell.left_child_page; + if left_child_page == parent_page.get().id as u32 { + tracing::error!("balance_non_root(parent_divider_points_to_same_page, page_id={}, cell_left_child_page={})", + parent_page.get().id, + left_child_page, + ); + valid = false; + } + } + _ => {} + } + } // Let's now make a in depth check that we in fact added all possible cells somewhere and they are not lost for (page_idx, page) in pages_to_balance_new.iter().enumerate() { let contents = page.get_contents(); @@ -2078,7 +2120,7 @@ impl BTreeCursor { self.usable_space(), ); let buf = contents.as_ptr(); - let cell_buf = &buf[cell_start..cell_start + cell_len]; + let cell_buf = to_static_buf(&mut buf[cell_start..cell_start + cell_len]); let cell_buf_in_array = &cells_debug[current_index_cell]; if cell_buf != cell_buf_in_array { tracing::error!("balance_non_root(cell_not_found_debug, page_id={}, cell_in_cell_array_idx={})", @@ -2087,6 +2129,63 @@ impl BTreeCursor { ); valid = false; } + + let cell = crate::storage::sqlite3_ondisk::read_btree_cell( + cell_buf, + &page_type, + 0, + payload_overflow_threshold_max( + parent_contents.page_type(), + self.usable_space() as u16, + ), + payload_overflow_threshold_min( + parent_contents.page_type(), + self.usable_space() as u16, + ), + self.usable_space(), + ) + .unwrap(); + match &cell { + BTreeCell::TableInteriorCell(table_interior_cell) => { + let left_child_page = table_interior_cell._left_child_page; + if left_child_page == page.get().id as u32 { + tracing::error!("balance_non_root(child_page_points_same_page, page_id={}, cell_left_child_page={}, page_idx={})", + page.get().id, + left_child_page, + page_idx + ); + valid = false; + } + if left_child_page == parent_page.get().id as u32 { + tracing::error!("balance_non_root(child_page_points_parent_of_child, page_id={}, cell_left_child_page={}, page_idx={})", + page.get().id, + left_child_page, + page_idx + ); + valid = false; + } + } + BTreeCell::IndexInteriorCell(index_interior_cell) => { + let left_child_page = index_interior_cell.left_child_page; + if left_child_page == page.get().id as u32 { + tracing::error!("balance_non_root(child_page_points_same_page, page_id={}, cell_left_child_page={}, page_idx={})", + page.get().id, + left_child_page, + page_idx + ); + valid = false; + } + if left_child_page == parent_page.get().id as u32 { + tracing::error!("balance_non_root(child_page_points_parent_of_child, page_id={}, cell_left_child_page={}, page_idx={})", + page.get().id, + left_child_page, + page_idx + ); + valid = false; + } + } + _ => {} + } current_index_cell += 1; } // Now check divider cells and their pointers. @@ -4637,7 +4736,6 @@ mod tests { let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); let mut keys = Vec::new(); let seed = rng.next_u64(); - let seed = 1743883058; tracing::info!("seed: {}", seed); let mut rng = ChaCha8Rng::seed_from_u64(seed); for insert_id in 0..inserts { @@ -4680,6 +4778,7 @@ mod tests { pager.deref(), ) .unwrap(); + // FIXME: add sorted vector instead, should be okay for small amounts of keys for now :P, too lazy to fix right now keys.sort(); cursor.move_to_root(); let mut valid = true; @@ -4711,7 +4810,6 @@ mod tests { keys.sort(); cursor.move_to_root(); for key in keys.iter() { - let seek_key = SeekKey::TableRowId(*key as u64); tracing::trace!("seeking key: {}", key); run_until_done(|| cursor.next(), pager.deref()).unwrap(); let cursor_rowid = cursor.rowid().unwrap().unwrap(); From 83f13596a4975e4ed3a89a3cd7bc5680f78a31aa Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Mon, 7 Apr 2025 17:59:01 +0200 Subject: [PATCH 065/425] decrease fuzz test steps again --- core/storage/btree.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 8bde08311..d5897061e 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -4883,7 +4883,7 @@ mod tests { #[test] #[ignore] pub fn btree_insert_fuzz_run_small() { - btree_insert_fuzz_run(1, 100_000, |rng| (rng.next_u32() % 128) as usize); + btree_insert_fuzz_run(1, 100, |rng| (rng.next_u32() % 128) as usize); } #[test] From 608628461393645d738444c9aba48be67d7bad87 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Mon, 7 Apr 2025 18:06:52 +0200 Subject: [PATCH 066/425] fix debug imports --- core/storage/btree.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index d5897061e..c505a3fee 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -2054,7 +2054,7 @@ impl BTreeCursor { parent_page: &PageRef, balance_info: &mut BalanceInfo, parent_contents: &mut PageContent, - pages_to_balance_new: Vec>, + pages_to_balance_new: Vec>, page_type: PageType, leaf_data: bool, mut cells_debug: Vec>, @@ -2260,7 +2260,7 @@ impl BTreeCursor { } let cell_buf: &'static mut [u8] = to_static_buf(&mut cells_debug[current_index_cell - 1]); - let cell = read_btree_cell( + let cell = crate::storage::sqlite3_ondisk::read_btree_cell( cell_buf, &page_type, 0, From 4a08b98babde5bc0fda2d494f1af6beba9732deb Mon Sep 17 00:00:00 2001 From: Ihor Andrianov Date: Wed, 2 Apr 2025 16:13:15 +0300 Subject: [PATCH 067/425] implemented strict table --- core/schema.rs | 18 +++-- core/translate/insert.rs | 11 ++++ core/types.rs | 23 +++++++ core/vdbe/execute.rs | 139 ++++++++++++++++++++++++++++++++++++++- core/vdbe/explain.rs | 14 ++++ core/vdbe/insn.rs | 16 ++++- 6 files changed, 211 insertions(+), 10 deletions(-) diff --git a/core/schema.rs b/core/schema.rs index dda37d15b..21bed120d 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -161,6 +161,7 @@ pub struct BTreeTable { pub primary_key_column_names: Vec, pub columns: Vec, pub has_rowid: bool, + pub is_strict: bool, } impl BTreeTable { @@ -262,12 +263,14 @@ fn create_table( let mut has_rowid = true; let mut primary_key_column_names = vec![]; let mut cols = vec![]; + let is_strict: bool; match body { CreateTableBody::ColumnsAndConstraints { columns, constraints, options, } => { + is_strict = options.contains(TableOptions::STRICT); if let Some(constraints) = constraints { for c in constraints { if let limbo_sqlite3_parser::ast::TableConstraint::PrimaryKey { @@ -390,6 +393,7 @@ fn create_table( has_rowid, primary_key_column_names, columns: cols, + is_strict, }) } @@ -456,7 +460,7 @@ pub fn affinity(datatype: &str) -> Affinity { } // Rule 3: BLOB or empty -> BLOB affinity (historically called NONE) - if datatype.contains("BLOB") || datatype.is_empty() { + if datatype.contains("BLOB") || datatype.is_empty() || datatype.contains("ANY") { return Affinity::Blob; } @@ -508,11 +512,11 @@ pub enum Affinity { Numeric, } -pub const SQLITE_AFF_TEXT: char = 'a'; -pub const SQLITE_AFF_NONE: char = 'b'; // Historically called NONE, but it's the same as BLOB -pub const SQLITE_AFF_NUMERIC: char = 'c'; -pub const SQLITE_AFF_INTEGER: char = 'd'; -pub const SQLITE_AFF_REAL: char = 'e'; +pub const SQLITE_AFF_NONE: char = 'A'; // Historically called NONE, but it's the same as BLOB +pub const SQLITE_AFF_TEXT: char = 'B'; +pub const SQLITE_AFF_NUMERIC: char = 'C'; +pub const SQLITE_AFF_INTEGER: char = 'D'; +pub const SQLITE_AFF_REAL: char = 'E'; impl Affinity { /// This is meant to be used in opcodes like Eq, which state: @@ -552,6 +556,7 @@ pub fn sqlite_schema_table() -> BTreeTable { root_page: 1, name: "sqlite_schema".to_string(), has_rowid: true, + is_strict: false, primary_key_column_names: vec![], columns: vec![ Column { @@ -1046,6 +1051,7 @@ mod tests { root_page: 0, name: "t1".to_string(), has_rowid: true, + is_strict: false, primary_key_column_names: vec!["nonexistent".to_string()], columns: vec![Column { name: Some("a".to_string()), diff --git a/core/translate/insert.rs b/core/translate/insert.rs index 7713b9355..9ee253da5 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -251,6 +251,17 @@ pub fn translate_insert( program.resolve_label(make_record_label, program.offset()); } + match table.btree() { + Some(t) if t.is_strict => { + program.emit_insn(Insn::TypeCheck { + start_reg: column_registers_start, + count: num_cols, + check_generated: true, + table_reference: Rc::clone(&t), + }); + } + _ => (), + } // Create and insert the record program.emit_insn(Insn::MakeRecord { start_reg: column_registers_start, diff --git a/core/types.rs b/core/types.rs index 1556ee100..631bc7492 100644 --- a/core/types.rs +++ b/core/types.rs @@ -22,6 +22,20 @@ pub enum OwnedValueType { Error, } +impl Display for OwnedValueType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let value = match self { + Self::Null => "NULL", + Self::Integer => "INT", + Self::Float => "REAL", + Self::Blob => "BLOB", + Self::Text => "TEXT", + Self::Error => "ERROR", + }; + write!(f, "{}", value) + } +} + #[derive(Debug, Clone, PartialEq)] pub enum TextSubtype { Text, @@ -69,6 +83,15 @@ impl Text { } } +impl From for Text { + fn from(value: String) -> Self { + Text { + value: value.into_bytes(), + subtype: TextSubtype::Text, + } + } +} + impl TextRef { pub fn as_str(&self) -> &str { unsafe { std::str::from_utf8_unchecked(self.value.to_slice()) } diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 09c283ecd..cf1b9e03e 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -1,5 +1,5 @@ #![allow(unused_variables)] -use crate::error::{LimboError, SQLITE_CONSTRAINT_PRIMARYKEY}; +use crate::error::{LimboError, SQLITE_CONSTRAINT, SQLITE_CONSTRAINT_PRIMARYKEY}; use crate::ext::ExtValue; use crate::function::{AggFunc, ExtFunc, MathFunc, MathFuncArity, ScalarFunc, VectorFunc}; use crate::functions::datetime::{ @@ -10,11 +10,13 @@ use std::{borrow::BorrowMut, rc::Rc}; use crate::pseudo::PseudoCursor; use crate::result::LimboResult; + use crate::schema::{affinity, Affinity}; use crate::storage::btree::{BTreeCursor, BTreeKey}; + use crate::storage::wal::CheckpointResult; use crate::types::{ - AggContext, Cursor, CursorResult, ExternalAggState, OwnedValue, SeekKey, SeekOp, + AggContext, Cursor, CursorResult, ExternalAggState, OwnedValue, OwnedValueType, SeekKey, SeekOp, }; use crate::util::{ cast_real_to_integer, cast_text_to_integer, cast_text_to_numeric, cast_text_to_real, @@ -1341,6 +1343,68 @@ pub fn op_column( Ok(InsnFunctionStepResult::Step) } +pub fn op_type_check( + program: &Program, + state: &mut ProgramState, + insn: &Insn, + pager: &Rc, + mv_store: Option<&Rc>, +) -> Result { + let Insn::TypeCheck { + start_reg, + count, + check_generated, + table_reference, + } = insn + else { + unreachable!("unexpected Insn {:?}", insn) + }; + assert_eq!(table_reference.is_strict, true); + state.registers[*start_reg..*start_reg + *count] + .iter_mut() + .zip(table_reference.columns.iter()) + .try_for_each(|(reg, col)| { + // INT PRIMARY KEY is not row_id_alias so we throw error if this col is NULL + if !col.is_rowid_alias + && col.primary_key + && matches!(reg.get_owned_value(), OwnedValue::Null) + { + bail_constraint_error!( + "NOT NULL constraint failed: {}.{} ({})", + &table_reference.name, + col.name.as_ref().map(|s| s.as_str()).unwrap_or(""), + SQLITE_CONSTRAINT + ) + } else if col.is_rowid_alias { + // If it is INTEGER PRIMARY KEY we let sqlite assign row_id + return Ok(()); + }; + let col_affinity = col.affinity(); + let ty_str = col.ty_str.as_str(); + let applied = apply_affinity_char(reg, col_affinity); + let value_type = reg.get_owned_value().value_type(); + match (ty_str, value_type) { + ("INTEGER" | "INT", OwnedValueType::Integer) => {} + ("REAL", OwnedValueType::Float) => {} + ("BLOB", OwnedValueType::Blob) => {} + ("TEXT", OwnedValueType::Text) => {} + ("ANY", _) => {} + (t, v) => bail_constraint_error!( + "cannot store {} value in {} column {}.{} ({})", + v, + t, + &table_reference.name, + col.name.as_ref().map(|s| s.as_str()).unwrap_or(""), + SQLITE_CONSTRAINT + ), + }; + Ok(()) + })?; + + state.pc += 1; + Ok(InsnFunctionStepResult::Step) +} + pub fn op_make_record( program: &Program, state: &mut ProgramState, @@ -5012,6 +5076,77 @@ fn exec_if(reg: &OwnedValue, jump_if_null: bool, not: bool) -> bool { } } +fn apply_affinity_char(target: &mut Register, affinity: Affinity) -> bool { + if let Register::OwnedValue(value) = target { + if matches!(value, OwnedValue::Blob(_)) { + return true; + } + match affinity { + Affinity::Blob => return true, + Affinity::Text => { + if matches!(value, OwnedValue::Text(_) | OwnedValue::Null) { + return true; + } + let text = value.to_string(); + *value = OwnedValue::Text(text.into()); + return true; + } + Affinity::Integer | Affinity::Numeric => { + if matches!(value, OwnedValue::Integer(_)) { + return true; + } + if !matches!(value, OwnedValue::Text(_) | OwnedValue::Float(_)) { + return true; + } + + if let OwnedValue::Float(fl) = *value { + if let Ok(int) = cast_real_to_integer(fl).map(OwnedValue::Integer) { + *value = int; + return true; + } + return false; + } + + let text = value.to_text().unwrap(); + let Ok(num) = checked_cast_text_to_numeric(&text) else { + return false; + }; + + *value = match &num { + OwnedValue::Float(fl) => { + cast_real_to_integer(*fl) + .map(OwnedValue::Integer) + .unwrap_or(num); + return true; + } + OwnedValue::Integer(_) if text.starts_with("0x") => { + return false; + } + _ => num, + }; + } + + Affinity::Real => { + if let OwnedValue::Integer(i) = value { + *value = OwnedValue::Float(*i as f64); + return true; + } else if let OwnedValue::Text(t) = value { + if t.as_str().starts_with("0x") { + return false; + } + if let Ok(num) = checked_cast_text_to_numeric(t.as_str()) { + *value = num; + return true; + } else { + return false; + } + } + } + }; + } + return true; +} + fn exec_cast(value: &OwnedValue, datatype: &str) -> OwnedValue { if matches!(value, OwnedValue::Null) { return OwnedValue::Null; diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index 66c68d9c0..7b9b02d2a 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -528,6 +528,20 @@ pub fn insn_to_str( ), ) } + Insn::TypeCheck { + start_reg, + count, + check_generated, + .. + } => ( + "TypeCheck", + *start_reg as i32, + *count as i32, + *check_generated as i32, + OwnedValue::build_text(""), + 0, + String::from(""), + ), Insn::MakeRecord { start_reg, count, diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 8d3a9afca..d7fc39609 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -1,8 +1,10 @@ use std::num::NonZero; +use std::rc::Rc; use super::{ cast_text_to_numeric, execute, AggFunc, BranchOffset, CursorID, FuncCtx, InsnFunction, PageIdx, }; +use crate::schema::BTreeTable; use crate::storage::wal::CheckpointMode; use crate::types::{OwnedValue, Record}; use limbo_macros::Description; @@ -344,7 +346,16 @@ pub enum Insn { dest: usize, }, - /// Make a record and write it to destination register. + TypeCheck { + start_reg: usize, // P1 + count: usize, // P2 + /// GENERATED ALWAYS AS ... STATIC columns are only checked if P3 is zero. + /// When P3 is non-zero, no type checking occurs for static generated columns. + check_generated: bool, // P3 + table_reference: Rc, // P4 + }, + + // Make a record and write it to destination register. MakeRecord { start_reg: usize, // P1 count: usize, // P2 @@ -427,7 +438,7 @@ pub enum Insn { register: usize, }, - /// Write a string value into a register. + // Write a string value into a register. String8 { value: String, dest: usize, @@ -1271,6 +1282,7 @@ impl Insn { Insn::LastAwait { .. } => execute::op_last_await, Insn::Column { .. } => execute::op_column, + Insn::TypeCheck { .. } => execute::op_type_check, Insn::MakeRecord { .. } => execute::op_make_record, Insn::ResultRow { .. } => execute::op_result_row, From 3a97fd075fb84432e4b8926ab18a47cd1586e7f3 Mon Sep 17 00:00:00 2001 From: Ihor Andrianov Date: Mon, 7 Apr 2025 19:53:49 +0300 Subject: [PATCH 068/425] add tests --- testing/insert.test | 147 +++++++++++++++++++++++++++++++++++++++++++- testing/tester.tcl | 111 +++++++++++++++++++++++++++++++++ 2 files changed, 257 insertions(+), 1 deletion(-) diff --git a/testing/insert.test b/testing/insert.test index 5a37fd692..ab520b052 100755 --- a/testing/insert.test +++ b/testing/insert.test @@ -15,4 +15,149 @@ do_execsql_test_on_specific_db {:memory:} must-be-int-insert { } {1 2 3 -4} \ No newline at end of file +4} + +do_execsql_test strict-basic-creation { + CREATE TABLE test1(id INTEGER, name TEXT, price REAL) STRICT; + INSERT INTO test1 VALUES(1, 'item1', 10.5); + SELECT * FROM test1; +} {1|item1|10.5} + +do_execsql_test_any_error strict-require-datatype { + CREATE TABLE test2(id INTEGER, name) STRICT; +} + +do_execsql_test_any_error strict-valid-datatypes { + CREATE TABLE test2(id INTEGER, value DATETIME) STRICT; +} + +do_execsql_test_any_error strict-type-enforcement { + CREATE TABLE test3(id INTEGER, name TEXT, price REAL) STRICT; + INSERT INTO test3 VALUES(1, 'item1', 'not-a-number'); +} + +do_execsql_test strict-type-coercion { + CREATE TABLE test4(id INTEGER, name TEXT, price REAL) STRICT; + INSERT INTO test4 VALUES(1, 'item1', '10.5'); + SELECT typeof(price), price FROM test4; +} {real|10.5} + +do_execsql_test strict-any-flexibility { + CREATE TABLE test5(id INTEGER, data ANY) STRICT; + INSERT INTO test5 VALUES(1, 100); + INSERT INTO test5 VALUES(2, 'text'); + INSERT INTO test5 VALUES(3, 3.14); + SELECT id, typeof(data) FROM test5 ORDER BY id; +} {1|integer +2|text +3|real} + +do_execsql_test strict-any-preservation { + CREATE TABLE test6(id INTEGER, code ANY) STRICT; + INSERT INTO test6 VALUES(1, '000123'); + SELECT typeof(code), code FROM test6; +} {text|000123} + +do_execsql_test_any_error strict-int-vs-integer-pk { + CREATE TABLE test8(id INT PRIMARY KEY, name TEXT) STRICT + INSERT INTO test8 VALUES(NULL, 'test'); +} + +do_execsql_test strict-integer-pk-behavior { + CREATE TABLE test9(id INTEGER PRIMARY KEY, name TEXT) STRICT; + INSERT INTO test9 VALUES(NULL, 'test'); + SELECT id, name FROM test9; +} {1|test} + + +do_execsql_test strict-mixed-inserts { + CREATE TABLE test11( + id INTEGER PRIMARY KEY, + name TEXT, + price REAL, + quantity INT, + tags ANY + ) STRICT; + + INSERT INTO test11 VALUES(1, 'item1', 10.5, 5, 'tag1'); + INSERT INTO test11 VALUES(2, 'item2', 20.75, 10, 42); + + SELECT id, name, price, quantity, typeof(tags) FROM test11 ORDER BY id; +} {1|item1|10.5|5|text +2|item2|20.75|10|integer} + +do_execsql_test strict-update-basic { + CREATE TABLE test1(id INTEGER, name TEXT, price REAL) STRICT; + INSERT INTO test1 VALUES(1, 'item1', 10.5); + UPDATE test1 SET price = 15.75 WHERE id = 1; + SELECT * FROM test1; +} {1|item1|15.75} + +do_execsql_test_any_error strict-update-type-enforcement { + CREATE TABLE test2(id INTEGER, name TEXT, price REAL) STRICT; + INSERT INTO test2 VALUES(1, 'item1', 10.5); + UPDATE test2 SET price = 'not-a-number' WHERE id = 1; +} + +do_execsql_test strict-update-type-coercion { + CREATE TABLE test3(id INTEGER, name TEXT, price REAL) STRICT; + INSERT INTO test3 VALUES(1, 'item1', 10.5); + UPDATE test3 SET price = '15.75' WHERE id = 1; + SELECT id, typeof(price), price FROM test3; +} {1|real|15.75} + +do_execsql_test strict-update-any-flexibility { + CREATE TABLE test4(id INTEGER, data ANY) STRICT; + INSERT INTO test4 VALUES(1, 100); + UPDATE test4 SET data = 'text' WHERE id = 1; + INSERT INTO test4 VALUES(2, 'original'); + UPDATE test4 SET data = 3.14 WHERE id = 2; + SELECT id, typeof(data), data FROM test4 ORDER BY id; +} {1|text|text +2|real|3.14} + +do_execsql_test strict-update-any-preservation { + CREATE TABLE test5(id INTEGER, code ANY) STRICT; + INSERT INTO test5 VALUES(1, 'text'); + UPDATE test5 SET code = '000123' WHERE id = 1; + SELECT typeof(code), code FROM test5; +} {text|000123} + +do_execsql_test_any_error strict-update-not-null-constraint { + CREATE TABLE test7(id INTEGER, name TEXT NOT NULL) STRICT; + INSERT INTO test7 VALUES(1, 'name'); + UPDATE test7 SET name = NULL WHERE id = 1; +} + +# Uncomment following test case when unique constraint is added +#do_execsql_test_any_error strict-update-pk-constraint { +# CREATE TABLE test8(id INTEGER PRIMARY KEY, name TEXT) STRICT; +# INSERT INTO test8 VALUES(1, 'name1'); +# INSERT INTO test8 VALUES(2, 'name2'); +# UPDATE test8 SET id = 2 WHERE id = 1; +#} + +do_execsql_test strict-update-multiple-columns { + CREATE TABLE test9(id INTEGER, name TEXT, price REAL, quantity INT) STRICT; + INSERT INTO test9 VALUES(1, 'item1', 10.5, 5); + UPDATE test9 SET name = 'updated', price = 20.75, quantity = 10 WHERE id = 1; + SELECT * FROM test9; +} {1|updated|20.75|10} + +do_execsql_test strict-update-where-clause { + CREATE TABLE test10(id INTEGER, category TEXT, price REAL) STRICT; + INSERT INTO test10 VALUES(1, 'A', 10); + INSERT INTO test10 VALUES(2, 'A', 20); + INSERT INTO test10 VALUES(3, 'B', 30); + UPDATE test10 SET price = price * 2 WHERE category = 'A'; + SELECT id, price FROM test10 ORDER BY id; +} {1|20.0 +2|40.0 +3|30.0} + +do_execsql_test strict-update-expression { + CREATE TABLE test11(id INTEGER, name TEXT, price REAL, discount REAL) STRICT; + INSERT INTO test11 VALUES(1, 'item1', 100, 0.1); + UPDATE test11 SET price = price - (price * discount); + SELECT id, price FROM test11; +} {1|90.0} diff --git a/testing/tester.tcl b/testing/tester.tcl index 735c91aae..4bb3ab2ef 100644 --- a/testing/tester.tcl +++ b/testing/tester.tcl @@ -97,3 +97,114 @@ proc do_execsql_test_tolerance {test_name sql_statements expected_outputs tolera } } } +# This procedure passes the test if the output contains error messages +proc run_test_expecting_any_error {sqlite_exec db_name sql} { + # Execute the SQL command and capture output + set command [list $sqlite_exec $db_name $sql] + + # Use catch to handle both successful and error cases + catch {exec {*}$command} result options + + # Check if the output contains error indicators (×, error, syntax error, etc.) + if {[regexp {(error|ERROR|Error|×|syntax error|failed)} $result]} { + # Error found in output - test passed + puts "Test PASSED: Got expected error" + return 1 + } + + # No error indicators in output + puts "Test FAILED: '$sql'" + puts "Expected an error but command output didn't indicate any error: '$result'" + exit 1 +} + +# This procedure passes if error matches a specific pattern +proc run_test_expecting_error {sqlite_exec db_name sql expected_error_pattern} { + # Execute the SQL command and capture output + set command [list $sqlite_exec $db_name $sql] + + # Capture output whether command succeeds or fails + catch {exec {*}$command} result options + + # Check if the output contains error indicators first + if {![regexp {(error|ERROR|Error|×|syntax error|failed)} $result]} { + puts "Test FAILED: '$sql'" + puts "Expected an error matching '$expected_error_pattern'" + puts "But command output didn't indicate any error: '$result'" + exit 1 + } + + # Now check if the error message matches the expected pattern + if {![regexp $expected_error_pattern $result]} { + puts "Test FAILED: '$sql'" + puts "Error occurred but didn't match expected pattern." + puts "Output was: '$result'" + puts "Expected pattern: '$expected_error_pattern'" + exit 1 + } + + # If we get here, the test passed - got expected error matching pattern + return 1 +} + +# This version accepts exact error text, ignoring formatting +proc run_test_expecting_error_content {sqlite_exec db_name sql expected_error_text} { + # Execute the SQL command and capture output + set command [list $sqlite_exec $db_name $sql] + + # Capture output whether command succeeds or fails + catch {exec {*}$command} result options + + # Check if the output contains error indicators first + if {![regexp {(error|ERROR|Error|×|syntax error|failed)} $result]} { + puts "Test FAILED: '$sql'" + puts "Expected an error with text: '$expected_error_text'" + puts "But command output didn't indicate any error: '$result'" + exit 1 + } + + # Normalize both the actual and expected error messages + # Remove all whitespace, newlines, and special characters for comparison + set normalized_actual [regsub -all {[[:space:]]|[[:punct:]]} $result ""] + set normalized_expected [regsub -all {[[:space:]]|[[:punct:]]} $expected_error_text ""] + + # Convert to lowercase for case-insensitive comparison + set normalized_actual [string tolower $normalized_actual] + set normalized_expected [string tolower $normalized_expected] + + # Check if the normalized strings contain the same text + if {[string first $normalized_expected $normalized_actual] == -1} { + puts "Test FAILED: '$sql'" + puts "Error occurred but content didn't match." + puts "Output was: '$result'" + puts "Expected text: '$expected_error_text'" + exit 1 + } + + # If we get here, the test passed - got error with expected content + return 1 +} + +proc do_execsql_test_error {test_name sql_statements expected_error_pattern} { + foreach db $::test_dbs { + puts [format "(%s) %s Running error test: %s" $db [string repeat " " [expr {40 - [string length $db]}]] $test_name] + set combined_sql [string trim $sql_statements] + run_test_expecting_error $::sqlite_exec $db $combined_sql $expected_error_pattern + } +} + +proc do_execsql_test_error_content {test_name sql_statements expected_error_text} { + foreach db $::test_dbs { + puts [format "(%s) %s Running error content test: %s" $db [string repeat " " [expr {40 - [string length $db]}]] $test_name] + set combined_sql [string trim $sql_statements] + run_test_expecting_error_content $::sqlite_exec $db $combined_sql $expected_error_text + } +} + +proc do_execsql_test_any_error {test_name sql_statements} { + foreach db $::test_dbs { + puts [format "(%s) %s Running any-error test: %s" $db [string repeat " " [expr {40 - [string length $db]}]] $test_name] + set combined_sql [string trim $sql_statements] + run_test_expecting_any_error $::sqlite_exec $db $combined_sql + } +} From 7c154651187fbe99b4541502d07e2efc480e2b77 Mon Sep 17 00:00:00 2001 From: Ihor Andrianov Date: Mon, 7 Apr 2025 19:54:25 +0300 Subject: [PATCH 069/425] add TypeCheck insn to update --- core/translate/emitter.rs | 12 ++++++++++++ core/vdbe/execute.rs | 6 +++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 23b937019..ec369127a 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -1,6 +1,8 @@ // This module contains code for emitting bytecode instructions for SQL query execution. // It handles translating high-level SQL operations into low-level bytecode that can be executed by the virtual machine. +use std::rc::Rc; + use limbo_sqlite3_parser::ast::{self}; use crate::function::Func; @@ -614,6 +616,16 @@ fn emit_update_insns( } } } + if let Some(btree_table) = table_ref.btree() { + if btree_table.is_strict { + program.emit_insn(Insn::TypeCheck { + start_reg: first_col_reg, + count: table_ref.columns().len(), + check_generated: true, + table_reference: Rc::clone(&btree_table), + }); + } + } let record_reg = program.alloc_register(); program.emit_insn(Insn::MakeRecord { start_reg: first_col_reg, diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index cf1b9e03e..a388c8b84 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -1375,10 +1375,10 @@ pub fn op_type_check( col.name.as_ref().map(|s| s.as_str()).unwrap_or(""), SQLITE_CONSTRAINT ) - } else if col.is_rowid_alias { - // If it is INTEGER PRIMARY KEY we let sqlite assign row_id + } else if col.is_rowid_alias && matches!(reg.get_owned_value(), OwnedValue::Null) { + // Handle INTEGER PRIMARY KEY for null as usual (Rowid will be auto-assigned) return Ok(()); - }; + } let col_affinity = col.affinity(); let ty_str = col.ty_str.as_str(); let applied = apply_affinity_char(reg, col_affinity); From ad91a2ae513785a9b31a1d0d6da35623e2af35ae Mon Sep 17 00:00:00 2001 From: Ihor Andrianov Date: Mon, 7 Apr 2025 20:29:45 +0300 Subject: [PATCH 070/425] fix tests --- testing/insert.test | 38 +++++++++++++++++++------------------- testing/tester.tcl | 10 ++++++++++ 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/testing/insert.test b/testing/insert.test index ab520b052..6c14ee249 100755 --- a/testing/insert.test +++ b/testing/insert.test @@ -17,32 +17,32 @@ do_execsql_test_on_specific_db {:memory:} must-be-int-insert { 3 4} -do_execsql_test strict-basic-creation { +do_execsql_test_on_specific_db {:memory:} strict-basic-creation { CREATE TABLE test1(id INTEGER, name TEXT, price REAL) STRICT; INSERT INTO test1 VALUES(1, 'item1', 10.5); SELECT * FROM test1; } {1|item1|10.5} -do_execsql_test_any_error strict-require-datatype { +do_execsql_test_in_memory_any_error strict-require-datatype { CREATE TABLE test2(id INTEGER, name) STRICT; } -do_execsql_test_any_error strict-valid-datatypes { +do_execsql_test_in_memory_any_error strict-valid-datatypes { CREATE TABLE test2(id INTEGER, value DATETIME) STRICT; } -do_execsql_test_any_error strict-type-enforcement { +do_execsql_test_in_memory_any_error strict-type-enforcement { CREATE TABLE test3(id INTEGER, name TEXT, price REAL) STRICT; INSERT INTO test3 VALUES(1, 'item1', 'not-a-number'); } -do_execsql_test strict-type-coercion { +do_execsql_test_on_specific_db {:memory:} strict-type-coercion { CREATE TABLE test4(id INTEGER, name TEXT, price REAL) STRICT; INSERT INTO test4 VALUES(1, 'item1', '10.5'); SELECT typeof(price), price FROM test4; } {real|10.5} -do_execsql_test strict-any-flexibility { +do_execsql_test_on_specific_db {:memory:} strict-any-flexibility { CREATE TABLE test5(id INTEGER, data ANY) STRICT; INSERT INTO test5 VALUES(1, 100); INSERT INTO test5 VALUES(2, 'text'); @@ -52,25 +52,25 @@ do_execsql_test strict-any-flexibility { 2|text 3|real} -do_execsql_test strict-any-preservation { +do_execsql_test_on_specific_db {:memory:} strict-any-preservation { CREATE TABLE test6(id INTEGER, code ANY) STRICT; INSERT INTO test6 VALUES(1, '000123'); SELECT typeof(code), code FROM test6; } {text|000123} -do_execsql_test_any_error strict-int-vs-integer-pk { +do_execsql_test_in_memory_any_error strict-int-vs-integer-pk { CREATE TABLE test8(id INT PRIMARY KEY, name TEXT) STRICT INSERT INTO test8 VALUES(NULL, 'test'); } -do_execsql_test strict-integer-pk-behavior { +do_execsql_test_on_specific_db {:memory:} strict-integer-pk-behavior { CREATE TABLE test9(id INTEGER PRIMARY KEY, name TEXT) STRICT; INSERT INTO test9 VALUES(NULL, 'test'); SELECT id, name FROM test9; } {1|test} -do_execsql_test strict-mixed-inserts { +do_execsql_test_on_specific_db {:memory:} strict-mixed-inserts { CREATE TABLE test11( id INTEGER PRIMARY KEY, name TEXT, @@ -86,27 +86,27 @@ do_execsql_test strict-mixed-inserts { } {1|item1|10.5|5|text 2|item2|20.75|10|integer} -do_execsql_test strict-update-basic { +do_execsql_test_on_specific_db {:memory:} strict-update-basic { CREATE TABLE test1(id INTEGER, name TEXT, price REAL) STRICT; INSERT INTO test1 VALUES(1, 'item1', 10.5); UPDATE test1 SET price = 15.75 WHERE id = 1; SELECT * FROM test1; } {1|item1|15.75} -do_execsql_test_any_error strict-update-type-enforcement { +do_execsql_test_in_memory_any_error strict-update-type-enforcement { CREATE TABLE test2(id INTEGER, name TEXT, price REAL) STRICT; INSERT INTO test2 VALUES(1, 'item1', 10.5); UPDATE test2 SET price = 'not-a-number' WHERE id = 1; } -do_execsql_test strict-update-type-coercion { +do_execsql_test_on_specific_db {:memory:} strict-update-type-coercion { CREATE TABLE test3(id INTEGER, name TEXT, price REAL) STRICT; INSERT INTO test3 VALUES(1, 'item1', 10.5); UPDATE test3 SET price = '15.75' WHERE id = 1; SELECT id, typeof(price), price FROM test3; } {1|real|15.75} -do_execsql_test strict-update-any-flexibility { +do_execsql_test_on_specific_db {:memory:} strict-update-any-flexibility { CREATE TABLE test4(id INTEGER, data ANY) STRICT; INSERT INTO test4 VALUES(1, 100); UPDATE test4 SET data = 'text' WHERE id = 1; @@ -116,14 +116,14 @@ do_execsql_test strict-update-any-flexibility { } {1|text|text 2|real|3.14} -do_execsql_test strict-update-any-preservation { +do_execsql_test_on_specific_db {:memory:} strict-update-any-preservation { CREATE TABLE test5(id INTEGER, code ANY) STRICT; INSERT INTO test5 VALUES(1, 'text'); UPDATE test5 SET code = '000123' WHERE id = 1; SELECT typeof(code), code FROM test5; } {text|000123} -do_execsql_test_any_error strict-update-not-null-constraint { +do_execsql_test_in_memory_any_error strict-update-not-null-constraint { CREATE TABLE test7(id INTEGER, name TEXT NOT NULL) STRICT; INSERT INTO test7 VALUES(1, 'name'); UPDATE test7 SET name = NULL WHERE id = 1; @@ -137,14 +137,14 @@ do_execsql_test_any_error strict-update-not-null-constraint { # UPDATE test8 SET id = 2 WHERE id = 1; #} -do_execsql_test strict-update-multiple-columns { +do_execsql_test_on_specific_db {:memory:} strict-update-multiple-columns { CREATE TABLE test9(id INTEGER, name TEXT, price REAL, quantity INT) STRICT; INSERT INTO test9 VALUES(1, 'item1', 10.5, 5); UPDATE test9 SET name = 'updated', price = 20.75, quantity = 10 WHERE id = 1; SELECT * FROM test9; } {1|updated|20.75|10} -do_execsql_test strict-update-where-clause { +do_execsql_test_on_specific_db {:memory:} strict-update-where-clause { CREATE TABLE test10(id INTEGER, category TEXT, price REAL) STRICT; INSERT INTO test10 VALUES(1, 'A', 10); INSERT INTO test10 VALUES(2, 'A', 20); @@ -155,7 +155,7 @@ do_execsql_test strict-update-where-clause { 2|40.0 3|30.0} -do_execsql_test strict-update-expression { +do_execsql_test_on_specific_db {:memory:} strict-update-expression { CREATE TABLE test11(id INTEGER, name TEXT, price REAL, discount REAL) STRICT; INSERT INTO test11 VALUES(1, 'item1', 100, 0.1); UPDATE test11 SET price = price - (price * discount); diff --git a/testing/tester.tcl b/testing/tester.tcl index 4bb3ab2ef..41117ed37 100644 --- a/testing/tester.tcl +++ b/testing/tester.tcl @@ -208,3 +208,13 @@ proc do_execsql_test_any_error {test_name sql_statements} { run_test_expecting_any_error $::sqlite_exec $db $combined_sql } } + +proc do_execsql_test_in_memory_any_error {test_name sql_statements} { + puts [format "(in-memory) %s Running any-error test: %s" [string repeat " " 31] $test_name] + + # Use ":memory:" special filename for in-memory database + set db_name ":memory:" + + set combined_sql [string trim $sql_statements] + run_test_expecting_any_error $::sqlite_exec $db_name $combined_sql +} From 03f531417c2a0d964625bb194fabc5c200e5b64b Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Tue, 8 Apr 2025 11:12:20 +0200 Subject: [PATCH 071/425] update sqlite download version to 2025 --- .github/shared/install_sqlite/action.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/shared/install_sqlite/action.yml b/.github/shared/install_sqlite/action.yml index f74f620f1..b7571d72c 100644 --- a/.github/shared/install_sqlite/action.yml +++ b/.github/shared/install_sqlite/action.yml @@ -6,8 +6,8 @@ runs: steps: - name: Install SQLite env: - SQLITE_VERSION: "3470200" - YEAR: 2024 + SQLITE_VERSION: "3490100" + YEAR: 2025 run: | curl -o /tmp/sqlite.zip https://www.sqlite.org/$YEAR/sqlite-tools-linux-x64-$SQLITE_VERSION.zip > /dev/null unzip -j /tmp/sqlite.zip sqlite3 -d /usr/local/bin/ From b83b51e973de0d01dc97a79c0965ab62cdc5de47 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Tue, 8 Apr 2025 11:16:32 +0200 Subject: [PATCH 072/425] remove www. prefix --- .github/shared/install_sqlite/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/shared/install_sqlite/action.yml b/.github/shared/install_sqlite/action.yml index b7571d72c..533d4e17d 100644 --- a/.github/shared/install_sqlite/action.yml +++ b/.github/shared/install_sqlite/action.yml @@ -9,7 +9,7 @@ runs: SQLITE_VERSION: "3490100" YEAR: 2025 run: | - curl -o /tmp/sqlite.zip https://www.sqlite.org/$YEAR/sqlite-tools-linux-x64-$SQLITE_VERSION.zip > /dev/null + curl -o /tmp/sqlite.zip https://sqlite.org/$YEAR/sqlite-tools-linux-x64-$SQLITE_VERSION.zip > /dev/null unzip -j /tmp/sqlite.zip sqlite3 -d /usr/local/bin/ sqlite3 --version shell: bash From 3950ab1e52c8d1a83aedbe0518dada8c8f651b3a Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Mon, 7 Apr 2025 22:16:50 +0200 Subject: [PATCH 073/425] account for divider cell size in page size --- core/storage/btree.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index c505a3fee..06a7e65fd 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1642,6 +1642,10 @@ impl BTreeCursor { let size = new_page_sizes.last_mut().unwrap(); // 2 to account of pointer *size += 2 + overflow.payload.len() as u16; + if !leaf && i < balance_info.sibling_count - 1 { + // Account for divider cell which is included in this page. + let size = new_page_sizes.last_mut().unwrap(); + *size += cell_array.cells[cell_array.cell_count(i)].len() as i64; } } From 8e88b0cd147539eb5c9a3afe2d99f94bfa3f5795 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Mon, 7 Apr 2025 22:17:11 +0200 Subject: [PATCH 074/425] new_page_sizes as Vec --- core/storage/btree.rs | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 06a7e65fd..36a3a5ff0 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1624,7 +1624,7 @@ impl BTreeCursor { } } // calculate how many pages to allocate - let mut new_page_sizes = Vec::new(); + let mut new_page_sizes: Vec = Vec::new(); let leaf_correction = if leaf { 4 } else { 0 }; // number of bytes beyond header, different from global usableSapce which includes // header @@ -1637,11 +1637,12 @@ impl BTreeCursor { let page_contents = page.get_contents(); let free_space = compute_free_space(page_contents, self.usable_space() as u16); - new_page_sizes.push(usable_space as u16 - free_space); + new_page_sizes.push(usable_space as i64 - free_space as i64); for overflow in &page_contents.overflow_cells { let size = new_page_sizes.last_mut().unwrap(); // 2 to account of pointer - *size += 2 + overflow.payload.len() as u16; + *size += 2 + overflow.payload.len() as i64; + } if !leaf && i < balance_info.sibling_count - 1 { // Account for divider cell which is included in this page. let size = new_page_sizes.last_mut().unwrap(); @@ -1654,7 +1655,7 @@ impl BTreeCursor { let mut i = 0; while i < sibling_count_new { // First try to move cells to the right if they do not fit - while new_page_sizes[i] > usable_space as u16 { + while new_page_sizes[i] > usable_space as i64 { let needs_new_page = i + 1 >= sibling_count_new; if needs_new_page { sibling_count_new += 1; @@ -1668,7 +1669,9 @@ impl BTreeCursor { ); } let size_of_cell_to_remove_from_left = - 2 + cell_array.cells[cell_array.cell_count(i) - 1].len() as u16; + 2 + cell_array.cells[cell_array.cell_count(i) - 1].len() as i64; + // removing a page from the right might include removing from a page that is not directly adjacent, therefore, it could be possible we set page+1 + // to a negative number until we move the cell to the right page again. new_page_sizes[i] -= size_of_cell_to_remove_from_left; let size_of_cell_to_move_right = if !leaf_data { if cell_array.number_of_cells_per_page[i] @@ -1676,23 +1679,23 @@ impl BTreeCursor { { // This means we move to the right page the divider cell and we // promote left cell to divider - 2 + cell_array.cells[cell_array.cell_count(i)].len() as u16 + 2 + cell_array.cells[cell_array.cell_count(i)].len() as i64 } else { 0 } } else { size_of_cell_to_remove_from_left }; - new_page_sizes[i + 1] += size_of_cell_to_move_right; + new_page_sizes[i + 1] += size_of_cell_to_move_right as i64; cell_array.number_of_cells_per_page[i] -= 1; } // Now try to take from the right if we didn't have enough while cell_array.number_of_cells_per_page[i] < cell_array.cells.len() as u16 { let size_of_cell_to_remove_from_right = - 2 + cell_array.cells[cell_array.cell_count(i)].len() as u16; + 2 + cell_array.cells[cell_array.cell_count(i)].len() as i64; let can_take = new_page_sizes[i] + size_of_cell_to_remove_from_right - > usable_space as u16; + > usable_space as i64; if can_take { break; } @@ -1703,7 +1706,7 @@ impl BTreeCursor { if cell_array.number_of_cells_per_page[i] < cell_array.cells.len() as u16 { - 2 + cell_array.cells[cell_array.cell_count(i)].len() as u16 + 2 + cell_array.cells[cell_array.cell_count(i)].len() as i64 } else { 0 } @@ -1749,8 +1752,8 @@ impl BTreeCursor { // the same we add to right (we don't add divider to right). let mut cell_right = cell_left + 1 - leaf_data as u16; loop { - let cell_left_size = cell_array.cell_size(cell_left as usize); - let cell_right_size = cell_array.cell_size(cell_right as usize); + let cell_left_size = cell_array.cell_size(cell_left as usize) as i64; + let cell_right_size = cell_array.cell_size(cell_right as usize) as i64; // TODO: add assert nMaxCells let pointer_size = if i == sibling_count_new - 1 { 0 } else { 2 }; @@ -4739,7 +4742,7 @@ mod tests { let (pager, root_page) = empty_btree(); let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); let mut keys = Vec::new(); - let seed = rng.next_u64(); + let seed = 3206743363843416341; tracing::info!("seed: {}", seed); let mut rng = ChaCha8Rng::seed_from_u64(seed); for insert_id in 0..inserts { @@ -4879,25 +4882,21 @@ mod tests { } #[test] - #[ignore] pub fn btree_insert_fuzz_run_random() { btree_insert_fuzz_run(128, 16, |rng| (rng.next_u32() % 4096) as usize); } #[test] - #[ignore] pub fn btree_insert_fuzz_run_small() { btree_insert_fuzz_run(1, 100, |rng| (rng.next_u32() % 128) as usize); } #[test] - #[ignore] pub fn btree_insert_fuzz_run_big() { btree_insert_fuzz_run(64, 32, |rng| 3 * 1024 + (rng.next_u32() % 1024) as usize); } #[test] - #[ignore] pub fn btree_insert_fuzz_run_overflow() { btree_insert_fuzz_run(64, 32, |rng| (rng.next_u32() % 32 * 1024) as usize); } From 40f8bbe1320ce2f0af85f5837ee2eec1dbbdbdcf Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Tue, 8 Apr 2025 11:05:40 +0200 Subject: [PATCH 075/425] clippy --- core/storage/btree.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 36a3a5ff0..349d75855 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -4742,9 +4742,7 @@ mod tests { let (pager, root_page) = empty_btree(); let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); let mut keys = Vec::new(); - let seed = 3206743363843416341; tracing::info!("seed: {}", seed); - let mut rng = ChaCha8Rng::seed_from_u64(seed); for insert_id in 0..inserts { let size = size(&mut rng); let key = { From 8c4003908f30288f81e7002a6d62134c7967808e Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Tue, 8 Apr 2025 14:04:51 +0200 Subject: [PATCH 076/425] bring back usize, it shouldn't underflow --- core/storage/btree.rs | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 349d75855..e5576cf28 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1624,7 +1624,7 @@ impl BTreeCursor { } } // calculate how many pages to allocate - let mut new_page_sizes: Vec = Vec::new(); + let mut new_page_sizes: Vec = Vec::new(); let leaf_correction = if leaf { 4 } else { 0 }; // number of bytes beyond header, different from global usableSapce which includes // header @@ -1637,16 +1637,16 @@ impl BTreeCursor { let page_contents = page.get_contents(); let free_space = compute_free_space(page_contents, self.usable_space() as u16); - new_page_sizes.push(usable_space as i64 - free_space as i64); + new_page_sizes.push(usable_space as usize - free_space as usize); for overflow in &page_contents.overflow_cells { let size = new_page_sizes.last_mut().unwrap(); // 2 to account of pointer - *size += 2 + overflow.payload.len() as i64; + *size += 2 + overflow.payload.len() as usize; } if !leaf && i < balance_info.sibling_count - 1 { // Account for divider cell which is included in this page. let size = new_page_sizes.last_mut().unwrap(); - *size += cell_array.cells[cell_array.cell_count(i)].len() as i64; + *size += cell_array.cells[cell_array.cell_count(i)].len() as usize; } } @@ -1655,7 +1655,7 @@ impl BTreeCursor { let mut i = 0; while i < sibling_count_new { // First try to move cells to the right if they do not fit - while new_page_sizes[i] > usable_space as i64 { + while new_page_sizes[i] > usable_space as usize { let needs_new_page = i + 1 >= sibling_count_new; if needs_new_page { sibling_count_new += 1; @@ -1669,7 +1669,7 @@ impl BTreeCursor { ); } let size_of_cell_to_remove_from_left = - 2 + cell_array.cells[cell_array.cell_count(i) - 1].len() as i64; + 2 + cell_array.cells[cell_array.cell_count(i) - 1].len() as usize; // removing a page from the right might include removing from a page that is not directly adjacent, therefore, it could be possible we set page+1 // to a negative number until we move the cell to the right page again. new_page_sizes[i] -= size_of_cell_to_remove_from_left; @@ -1679,23 +1679,23 @@ impl BTreeCursor { { // This means we move to the right page the divider cell and we // promote left cell to divider - 2 + cell_array.cells[cell_array.cell_count(i)].len() as i64 + 2 + cell_array.cells[cell_array.cell_count(i)].len() as usize } else { 0 } } else { size_of_cell_to_remove_from_left }; - new_page_sizes[i + 1] += size_of_cell_to_move_right as i64; + new_page_sizes[i + 1] += size_of_cell_to_move_right as usize; cell_array.number_of_cells_per_page[i] -= 1; } // Now try to take from the right if we didn't have enough while cell_array.number_of_cells_per_page[i] < cell_array.cells.len() as u16 { let size_of_cell_to_remove_from_right = - 2 + cell_array.cells[cell_array.cell_count(i)].len() as i64; + 2 + cell_array.cells[cell_array.cell_count(i)].len() as usize; let can_take = new_page_sizes[i] + size_of_cell_to_remove_from_right - > usable_space as i64; + > usable_space as usize; if can_take { break; } @@ -1706,7 +1706,7 @@ impl BTreeCursor { if cell_array.number_of_cells_per_page[i] < cell_array.cells.len() as u16 { - 2 + cell_array.cells[cell_array.cell_count(i)].len() as i64 + 2 + cell_array.cells[cell_array.cell_count(i)].len() as usize } else { 0 } @@ -1752,8 +1752,8 @@ impl BTreeCursor { // the same we add to right (we don't add divider to right). let mut cell_right = cell_left + 1 - leaf_data as u16; loop { - let cell_left_size = cell_array.cell_size(cell_left as usize) as i64; - let cell_right_size = cell_array.cell_size(cell_right as usize) as i64; + let cell_left_size = cell_array.cell_size(cell_left as usize) as usize; + let cell_right_size = cell_array.cell_size(cell_right as usize) as usize; // TODO: add assert nMaxCells let pointer_size = if i == sibling_count_new - 1 { 0 } else { 2 }; @@ -4896,7 +4896,7 @@ mod tests { #[test] pub fn btree_insert_fuzz_run_overflow() { - btree_insert_fuzz_run(64, 32, |rng| (rng.next_u32() % 32 * 1024) as usize); + btree_insert_fuzz_run(64, 10000, |rng| (rng.next_u32() % 32 * 1024) as usize); } #[allow(clippy::arc_with_non_send_sync)] From c0c66bf8af20b1c839d1c0b868c392a490287907 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Tue, 8 Apr 2025 14:06:48 +0200 Subject: [PATCH 077/425] remove wrong comment --- core/storage/btree.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index e5576cf28..87da725d7 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1670,8 +1670,6 @@ impl BTreeCursor { } let size_of_cell_to_remove_from_left = 2 + cell_array.cells[cell_array.cell_count(i) - 1].len() as usize; - // removing a page from the right might include removing from a page that is not directly adjacent, therefore, it could be possible we set page+1 - // to a negative number until we move the cell to the right page again. new_page_sizes[i] -= size_of_cell_to_remove_from_left; let size_of_cell_to_move_right = if !leaf_data { if cell_array.number_of_cells_per_page[i] From fded6ccaf3b06d31f454fb082de0d7469dad2c3a Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Tue, 8 Apr 2025 14:09:17 +0200 Subject: [PATCH 078/425] rever iterations fuzz test --- core/storage/btree.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 87da725d7..e29a9f8f7 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -4894,7 +4894,7 @@ mod tests { #[test] pub fn btree_insert_fuzz_run_overflow() { - btree_insert_fuzz_run(64, 10000, |rng| (rng.next_u32() % 32 * 1024) as usize); + btree_insert_fuzz_run(64, 32, |rng| (rng.next_u32() % 32 * 1024) as usize); } #[allow(clippy::arc_with_non_send_sync)] From 1f29307fe8ddb438cb453d73b1da0e4144140930 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sat, 5 Apr 2025 16:12:59 -0400 Subject: [PATCH 079/425] Support proper index handling when doing insertions --- core/schema.rs | 13 ++++ core/translate/index.rs | 5 ++ core/translate/insert.rs | 149 ++++++++++++++++++++++++++++++++++++++- core/vdbe/mod.rs | 5 +- 4 files changed, 166 insertions(+), 6 deletions(-) diff --git a/core/schema.rs b/core/schema.rs index dda37d15b..b68b1075e 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -85,6 +85,19 @@ impl Schema { let name = normalize_ident(table_name); self.indexes.remove(&name); } + + pub fn get_index_for_column(&self, table_name: &str, column_name: &str) -> Option> { + if let Some(indexes) = self.indexes.get(table_name) { + for index in indexes { + for column in &index.columns { + if column.name.eq_ignore_ascii_case(column_name) { + return Some(index.clone()); + } + } + } + } + None + } } #[derive(Clone, Debug)] diff --git a/core/translate/index.rs b/core/translate/index.rs index 32d7cd2e9..94aa03e64 100644 --- a/core/translate/index.rs +++ b/core/translate/index.rs @@ -172,6 +172,11 @@ pub fn translate_create_index( cursor_id: table_cursor_id, dest: rowid_reg, }); + // if the rowid is null, skip the insert + program.emit_insn(Insn::IsNull { + reg: rowid_reg, + target_pc: loop_end_label, + }); let record_reg = program.alloc_register(); program.emit_insn(Insn::MakeRecord { start_reg, diff --git a/core/translate/insert.rs b/core/translate/insert.rs index 7713b9355..2b16f464c 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -6,10 +6,10 @@ use limbo_sqlite3_parser::ast::{ }; use crate::error::SQLITE_CONSTRAINT_PRIMARYKEY; -use crate::schema::Table; +use crate::schema::{IndexColumn, Table}; use crate::util::normalize_ident; use crate::vdbe::builder::{ProgramBuilderOpts, QueryMode}; -use crate::vdbe::insn::RegisterOrLiteral; +use crate::vdbe::insn::{IdxInsertFlags, RegisterOrLiteral}; use crate::vdbe::BranchOffset; use crate::{ schema::{Column, Schema}, @@ -83,6 +83,22 @@ pub fn translate_insert( Some(table_name.0.clone()), CursorType::BTreeTable(btree_table.clone()), ); + // allocate cursor id's for each btree index cursor we'll need to populate the indexes + // (idx name, root_page, idx cursor id) + let idx_cursors = schema + .get_indices(&table_name.0) + .iter() + .map(|idx| { + ( + &idx.name, + idx.root_page, + program.alloc_cursor_id( + Some(table_name.0.clone()), + CursorType::BTreeIndex(idx.clone()), + ), + ) + }) + .collect::>(); let root_page = btree_table.root_page; let values = match body { InsertBody::Select(select, _) => match &select.body.select.deref() { @@ -93,6 +109,7 @@ pub fn translate_insert( }; let column_mappings = resolve_columns_for_insert(&table, columns, values)?; + let index_col_mappings = resolve_indicies_for_insert(schema, table.as_ref(), &column_mappings)?; // Check if rowid was provided (through INTEGER PRIMARY KEY as a rowid alias) let rowid_alias_index = btree_table.columns.iter().position(|c| c.is_rowid_alias); let has_user_provided_rowid = { @@ -183,7 +200,14 @@ pub fn translate_insert( &resolver, )?; } - + // Open all the index btrees for writing + for idx_cursor in idx_cursors.iter() { + program.emit_insn(Insn::OpenWriteAsync { + cursor_id: idx_cursor.2, + root_page: idx_cursor.1.into(), + }); + program.emit_insn(Insn::OpenWriteAwait {}); + } // Common record insertion logic for both single and multiple rows let check_rowid_is_integer_label = rowid_alias_reg.and(Some(program.allocate_label())); if let Some(reg) = rowid_alias_reg { @@ -265,7 +289,54 @@ pub fn translate_insert( flag: 0, }); program.emit_insn(Insn::InsertAwait { cursor_id }); + for index_col_mapping in index_col_mappings.iter() { + // find which cursor we opened earlier for this index + let idx_cursor_id = idx_cursors + .iter() + .find(|(name, _, _)| *name == &index_col_mapping.idx_name) + .map(|(_, _, c_id)| *c_id) + .expect("no cursor found for index"); + let num_cols = index_col_mapping.columns.len(); + // allocate scratch registers for the index columns plus rowid + let idx_start_reg = program.alloc_registers(num_cols + 1); + + // copy each index column from the table's column registers into these scratch regs + for (i, col) in index_col_mapping.columns.iter().enumerate() { + // copy from the table's column register over to the index's scratch register + program.emit_insn(Insn::Copy { + src_reg: column_registers_start + col.0, + dst_reg: idx_start_reg + i, + amount: 0, + }); + } + // last register is the rowid + program.emit_insn(Insn::Copy { + src_reg: rowid_reg, + dst_reg: idx_start_reg + num_cols, + amount: 0, + }); + + let record_reg = program.alloc_register(); + program.emit_insn(Insn::MakeRecord { + start_reg: idx_start_reg, + count: num_cols + 1, + dest_reg: record_reg, + }); + + // now do the actual index insertion using the unpacked registers + program.emit_insn(Insn::IdxInsertAsync { + cursor_id: idx_cursor_id, + record_reg, + unpacked_start: Some(idx_start_reg), // TODO: enable optimization + unpacked_count: Some((num_cols + 1) as u16), + // TODO: figure out how to determine whether or not we need to seek prior to insert. + flags: IdxInsertFlags::new(), + }); + program.emit_insn(Insn::IdxInsertAwait { + cursor_id: idx_cursor_id, + }); + } if inserting_multiple_rows { // For multiple rows, loop back program.emit_insn(Insn::Goto { @@ -393,6 +464,78 @@ fn resolve_columns_for_insert<'a>( Ok(mappings) } +/// Represents how a column in an index should be populated during an INSERT. +/// Similar to ColumnMapping above but includes the index name, as well as multiple +/// possible value indices for each. +#[derive(Default)] +struct IndexColMapping { + idx_name: String, + columns: Vec<(usize, IndexColumn)>, + value_indicies: Vec>, +} + +impl IndexColMapping { + fn new(name: String) -> Self { + IndexColMapping { + idx_name: name, + ..Default::default() + } + } +} + +/// Example: +/// Table 'test': (a, b, c); +/// Index 'idx': test(a, b); +///________________________________ +/// Insert (a, c): (2, 3) +/// Record: (2, NULL, 3) +/// IndexColMapping: (a, b) = (2, NULL) +fn resolve_indicies_for_insert<'a>( + schema: &Schema, + table: &Table, + columns: &[ColumnMapping], +) -> Result> { + let mut index_col_mappings = Vec::new(); + for col in columns { + // check if any of the inserted columns are part of an index + if let Some(index) = + schema.get_index_for_column(table.get_name(), col.column.name.as_ref().unwrap()) + { + // check if the index is already in the list + if index_col_mappings + .iter() + .any(|i: &IndexColMapping| i.idx_name.eq_ignore_ascii_case(&index.name)) + { + continue; + } + let mut idx_col_map = IndexColMapping::new(index.name.clone()); //todo: rm clone -_- + for column in &index.columns { + let column_name = normalize_ident(column.name.as_str()); + // find the other columns in the index that are not part of the insert + if let Some((i, index_column)) = columns.iter().enumerate().find(|(_, c)| { + c.column + .name + .as_ref() + .is_some_and(|c| c.eq_ignore_ascii_case(&column_name)) + }) { + // the column is also part of the insert + idx_col_map.columns.push((i, column.clone())); + // store the value index (which may be null if not part of the insert) + idx_col_map.value_indicies.push(index_column.value_index); + } else { + // column not found, meaning the ColumnMapping failed, thus we bail + return Err(crate::LimboError::ParseError(format!( + "Column {} not found in index {}", + column_name, index.name + ))); + } + } + index_col_mappings.push(idx_col_map); + } + } + Ok(index_col_mappings) +} + /// Populates the column registers with values for a single row fn populate_column_registers( program: &mut ProgramBuilder, diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 8794b208a..b12cfcc56 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -627,11 +627,10 @@ impl Row { pub fn get_value<'a>(&'a self, idx: usize) -> &'a OwnedValue { let value = unsafe { self.values.add(idx).as_ref().unwrap() }; - let value = match value { + match value { Register::OwnedValue(owned_value) => owned_value, _ => unreachable!("a row should be formed of values only"), - }; - value + } } pub fn get_values(&self) -> impl Iterator { From 878c987026429bf2c27fbffc588f149850a09ad4 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sat, 5 Apr 2025 20:38:23 -0400 Subject: [PATCH 080/425] Remove is_null check from create index translation --- core/translate/index.rs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/core/translate/index.rs b/core/translate/index.rs index 94aa03e64..32d7cd2e9 100644 --- a/core/translate/index.rs +++ b/core/translate/index.rs @@ -172,11 +172,6 @@ pub fn translate_create_index( cursor_id: table_cursor_id, dest: rowid_reg, }); - // if the rowid is null, skip the insert - program.emit_insn(Insn::IsNull { - reg: rowid_reg, - target_pc: loop_end_label, - }); let record_reg = program.alloc_register(); program.emit_insn(Insn::MakeRecord { start_reg, From 224f913ae7cf3641d782215223e86416ae754097 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Mon, 7 Apr 2025 10:13:49 -0400 Subject: [PATCH 081/425] Handle composite key indexes on insert --- core/schema.rs | 13 -------- core/translate/insert.rs | 64 ++++++++++++++++++---------------------- 2 files changed, 28 insertions(+), 49 deletions(-) diff --git a/core/schema.rs b/core/schema.rs index b68b1075e..dda37d15b 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -85,19 +85,6 @@ impl Schema { let name = normalize_ident(table_name); self.indexes.remove(&name); } - - pub fn get_index_for_column(&self, table_name: &str, column_name: &str) -> Option> { - if let Some(indexes) = self.indexes.get(table_name) { - for index in indexes { - for column in &index.columns { - if column.name.eq_ignore_ascii_case(column_name) { - return Some(index.clone()); - } - } - } - } - None - } } #[derive(Clone, Debug)] diff --git a/core/translate/insert.rs b/core/translate/insert.rs index 2b16f464c..16cc040c5 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -304,6 +304,7 @@ pub fn translate_insert( // copy each index column from the table's column registers into these scratch regs for (i, col) in index_col_mapping.columns.iter().enumerate() { // copy from the table's column register over to the index's scratch register + program.emit_insn(Insn::Copy { src_reg: column_registers_start + col.0, dst_reg: idx_start_reg + i, @@ -490,47 +491,38 @@ impl IndexColMapping { /// Insert (a, c): (2, 3) /// Record: (2, NULL, 3) /// IndexColMapping: (a, b) = (2, NULL) -fn resolve_indicies_for_insert<'a>( +fn resolve_indicies_for_insert( schema: &Schema, table: &Table, - columns: &[ColumnMapping], + columns: &[ColumnMapping<'_>], ) -> Result> { let mut index_col_mappings = Vec::new(); - for col in columns { - // check if any of the inserted columns are part of an index - if let Some(index) = - schema.get_index_for_column(table.get_name(), col.column.name.as_ref().unwrap()) - { - // check if the index is already in the list - if index_col_mappings - .iter() - .any(|i: &IndexColMapping| i.idx_name.eq_ignore_ascii_case(&index.name)) - { - continue; + // Iterate over all indices for this table + for index in schema.get_indices(table.get_name()) { + let mut idx_map = IndexColMapping::new(index.name.clone()); + // For each column in the index (in the order defined by the index), + // try to find the corresponding column in the insert’s column mapping. + for idx_col in &index.columns { + let target_name = normalize_ident(idx_col.name.as_str()); + if let Some((i, col_mapping)) = columns.iter().enumerate().find(|(_, mapping)| { + mapping + .column + .name + .as_ref() + .map_or(false, |name| name.eq_ignore_ascii_case(&target_name)) + }) { + idx_map.columns.push((i, idx_col.clone())); + idx_map.value_indicies.push(col_mapping.value_index); + } else { + return Err(crate::LimboError::ParseError(format!( + "Column {} not found in index {}", + target_name, index.name + ))); } - let mut idx_col_map = IndexColMapping::new(index.name.clone()); //todo: rm clone -_- - for column in &index.columns { - let column_name = normalize_ident(column.name.as_str()); - // find the other columns in the index that are not part of the insert - if let Some((i, index_column)) = columns.iter().enumerate().find(|(_, c)| { - c.column - .name - .as_ref() - .is_some_and(|c| c.eq_ignore_ascii_case(&column_name)) - }) { - // the column is also part of the insert - idx_col_map.columns.push((i, column.clone())); - // store the value index (which may be null if not part of the insert) - idx_col_map.value_indicies.push(index_column.value_index); - } else { - // column not found, meaning the ColumnMapping failed, thus we bail - return Err(crate::LimboError::ParseError(format!( - "Column {} not found in index {}", - column_name, index.name - ))); - } - } - index_col_mappings.push(idx_col_map); + } + // Add the mapping if at least one column was found. + if !idx_map.columns.is_empty() { + index_col_mappings.push(idx_map); } } Ok(index_col_mappings) From 12ae07874ed48ab3b8f43f8cbbfff869e26b24f1 Mon Sep 17 00:00:00 2001 From: jachewz Date: Tue, 8 Apr 2025 23:23:08 +1000 Subject: [PATCH 082/425] fmt inf float str as "Inf"/"-Inf" --- core/types.rs | 6 ++++++ testing/select.test | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/core/types.rs b/core/types.rs index 1556ee100..72119349f 100644 --- a/core/types.rs +++ b/core/types.rs @@ -197,6 +197,12 @@ impl Display for OwnedValue { } Self::Float(fl) => { let fl = *fl; + if fl == f64::INFINITY { + return write!(f, "Inf"); + } + if fl == f64::NEG_INFINITY { + return write!(f, "-Inf"); + } if fl.is_nan() { return write!(f, ""); } diff --git a/testing/select.test b/testing/select.test index 27741aa54..02236159a 100755 --- a/testing/select.test +++ b/testing/select.test @@ -166,6 +166,14 @@ do_execsql_test select-like-expression { select 2 % 0.5 } {} +do_execsql_test select_positive_infinite_float { + SELECT 1.7976931348623157E+308 + 1e308; -- f64::MAX + 1e308 +} {Inf} + +do_execsql_test select_negative_infinite_float { + SELECT -1.7976931348623157E+308 - 1e308 -- f64::MIN - 1e308 +} {-Inf} + do_execsql_test select_shl_large_negative_float { SELECT 1 << -1e19; SELECT 1 << -9223372036854775808; -- i64::MIN From 029da5c81c7ff2bc063f041868098fad1c9ce63d Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Tue, 8 Apr 2025 11:03:30 +0200 Subject: [PATCH 083/425] Improve readability of balance_non_root with comments and validation extraction --- core/storage/btree.rs | 153 +++++++++++++++++++++++++++++------------- 1 file changed, 107 insertions(+), 46 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index e29a9f8f7..980316132 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1470,7 +1470,7 @@ impl BTreeCursor { "overflow parent not yet implemented" ); - // Get divider cells and max_cells + /* 1. Get divider cells and max_cells */ let mut max_cells = 0; let mut pages_to_balance_new = Vec::new(); for i in (0..balance_info.sibling_count).rev() { @@ -1528,6 +1528,7 @@ impl BTreeCursor { // Reverse divider cells to be in order balance_info.divider_cells.reverse(); + /* 2. Initialize CellArray with all the cells used for distribution, this includes divider cells if !leaf. */ let mut cell_array = CellArray { cells: Vec::with_capacity(max_cells), number_of_cells_per_page: Vec::new(), @@ -1614,16 +1615,9 @@ impl BTreeCursor { } #[cfg(debug_assertions)] - { - for cell in &cell_array.cells { - assert!(cell.len() >= 4); + validate_cells_after_insertion(&cell_array, leaf_data); - if leaf_data { - assert!(cell[0] != 0, "payload is {:?}", cell); - } - } - } - // calculate how many pages to allocate + /* 3. Initiliaze current size of every page including overflow cells and divider cells that might be included. */ let mut new_page_sizes: Vec = Vec::new(); let leaf_correction = if leaf { 4 } else { 0 }; // number of bytes beyond header, different from global usableSapce which includes @@ -1650,6 +1644,15 @@ impl BTreeCursor { } } + /* 4. Now let's try to move cells to the left trying to stack them without exceeding the maximum size of a page. + There are two cases: + * If current page has too many cells, it will move them to the next page. + * If it still has space, and it can take a cell from the right it will take them. + Here there is a caveat. Taking a cell from the right might take cells from page i+1, i+2, i+3, so not necessarily + adjacent. But we decrease the size of the adjacent page if we move from the right. This might cause a intermitent state + where page can have size <0. + This will also calculate how many pages are required to balance the cells and store in sibling_count_new. + */ // Try to pack as many cells to the left let mut sibling_count_new = balance_info.sibling_count; let mut i = 0; @@ -1658,6 +1661,7 @@ impl BTreeCursor { while new_page_sizes[i] > usable_space as usize { let needs_new_page = i + 1 >= sibling_count_new; if needs_new_page { + // FIXME: this doesn't remove pages if not needed sibling_count_new += 1; new_page_sizes.push(0); cell_array @@ -1732,6 +1736,10 @@ impl BTreeCursor { cell_array.cells.len() ); + /* 5. Balance pages starting from a left stacked cell state and move them to right trying to maintain a balanced state + where we only move from left to right if it will not unbalance both pages, meaning moving left to right won't make + right page bigger than left page. + */ // Comment borrowed from SQLite src/btree.c // The packing computed by the previous block is biased toward the siblings // on the left side (siblings with smaller keys). The left siblings are @@ -1840,6 +1848,7 @@ impl BTreeCursor { right_page_id ); + /* 6. Update parent pointers. Update right pointer and insert divider cells with newly created distribution of cells */ // Ensure right-child pointer of the right-most new sibling pge points to the page // that was originally on that place. let is_leaf_page = matches!(page_type, PageType::TableLeaf | PageType::IndexLeaf); @@ -1921,41 +1930,12 @@ impl BTreeCursor { ) .unwrap(); #[cfg(debug_assertions)] - { - let left_pointer = if parent_contents.overflow_cells.len() == 0 { - let (cell_start, cell_len) = parent_contents.cell_get_raw_region( - balance_info.first_divider_cell + i, - payload_overflow_threshold_max( - parent_contents.page_type(), - self.usable_space() as u16, - ), - payload_overflow_threshold_min( - parent_contents.page_type(), - self.usable_space() as u16, - ), - self.usable_space(), - ); - tracing::debug!( - "balance_non_root(cell_start={}, cell_len={})", - cell_start, - cell_len - ); - - let left_pointer = read_u32( - &parent_contents.as_ptr()[cell_start..cell_start + cell_len], - 0, - ); - left_pointer - } else { - let cell = &parent_contents.overflow_cells[0]; - assert_eq!(cell.index, balance_info.first_divider_cell + i); - read_u32(&cell.payload, 0) - }; - assert_eq!(left_pointer, page.get().id as u32, "the cell we just inserted doesn't point to the correct page. points to {}, should point to {}", - left_pointer, - page.get().id as u32 - ); - } + self.validate_balance_non_root_divider_cell_insertion( + balance_info, + parent_contents, + i, + page, + ); } tracing::debug!( "balance_non_root(parent_overflow={})", @@ -1964,6 +1944,7 @@ impl BTreeCursor { #[cfg(debug_assertions)] { + // Let's ensure every page is pointed to by the divider cell or the rightmost pointer. for page in &pages_to_balance_new { assert!( pages_pointed_to.contains(&(page.get().id as u32)), @@ -1972,7 +1953,29 @@ impl BTreeCursor { ); } } - // TODO: update pages + /* 7. Start real movement of cells. Next comment is borrowed from SQLite: */ + /* Now update the actual sibling pages. The order in which they are updated + ** is important, as this code needs to avoid disrupting any page from which + ** cells may still to be read. In practice, this means: + ** + ** (1) If cells are moving left (from apNew[iPg] to apNew[iPg-1]) + ** then it is not safe to update page apNew[iPg] until after + ** the left-hand sibling apNew[iPg-1] has been updated. + ** + ** (2) If cells are moving right (from apNew[iPg] to apNew[iPg+1]) + ** then it is not safe to update page apNew[iPg] until after + ** the right-hand sibling apNew[iPg+1] has been updated. + ** + ** If neither of the above apply, the page is safe to update. + ** + ** The iPg value in the following loop starts at nNew-1 goes down + ** to 0, then back up to nNew-1 again, thus making two passes over + ** the pages. On the initial downward pass, only condition (1) above + ** needs to be tested because (2) will always be true from the previous + ** step. On the upward pass, both conditions are always true, so the + ** upwards pass simply processes pages that were missed on the downward + ** pass. + */ let mut done = vec![false; sibling_count_new]; for i in (1 - sibling_count_new as i64)..sibling_count_new as i64 { let page_idx = i.unsigned_abs() as usize; @@ -2053,6 +2056,53 @@ impl BTreeCursor { result } + #[cfg(debug_assertions)] + fn validate_balance_non_root_divider_cell_insertion( + &self, + balance_info: &mut BalanceInfo, + parent_contents: &mut PageContent, + i: usize, + page: &std::sync::Arc, + ) { + let left_pointer = if parent_contents.overflow_cells.len() == 0 { + let (cell_start, cell_len) = parent_contents.cell_get_raw_region( + balance_info.first_divider_cell + i, + payload_overflow_threshold_max( + parent_contents.page_type(), + self.usable_space() as u16, + ), + payload_overflow_threshold_min( + parent_contents.page_type(), + self.usable_space() as u16, + ), + self.usable_space(), + ); + tracing::debug!( + "balance_non_root(cell_start={}, cell_len={})", + cell_start, + cell_len + ); + + let left_pointer = read_u32( + &parent_contents.as_ptr()[cell_start..cell_start + cell_len], + 0, + ); + left_pointer + } else { + let mut left_pointer = None; + for cell in parent_contents.overflow_cells.iter() { + if cell.index == balance_info.first_divider_cell + i { + left_pointer = Some(read_u32(&cell.payload, 0)) + } + } + left_pointer.expect("overflow cell with divider cell was not found") + }; + assert_eq!(left_pointer, page.get().id as u32, "the cell we just inserted doesn't point to the correct page. points to {}, should point to {}", + left_pointer, + page.get().id as u32 + ); + } + #[cfg(debug_assertions)] fn post_balance_non_root_validation( &self, @@ -3419,6 +3469,17 @@ impl BTreeCursor { } } +#[cfg(debug_assertions)] +fn validate_cells_after_insertion(cell_array: &CellArray, leaf_data: bool) { + for cell in &cell_array.cells { + assert!(cell.len() >= 4); + + if leaf_data { + assert!(cell[0] != 0, "payload is {:?}", cell); + } + } +} + impl PageStack { fn increment_current(&self) { self.current_page.set(self.current_page.get() + 1); From cf62099bf53502e42c95330b33d9425e654d908d Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Tue, 8 Apr 2025 11:03:49 +0200 Subject: [PATCH 084/425] allow insertion of multiple overflow cells --- core/storage/btree.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 980316132..c2fcd09a0 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1565,10 +1565,6 @@ impl BTreeCursor { } // Insert overflow cells into correct place let offset = total_cells_inserted; - assert!( - old_page_contents.overflow_cells.len() <= 1, - "todo: check this works for more than one overflow cell" - ); for overflow_cell in old_page_contents.overflow_cells.iter_mut() { cell_array.cells.insert( offset + overflow_cell.index, @@ -4005,7 +4001,7 @@ fn insert_into_cell( usable_space: u16, ) -> Result<()> { assert!( - cell_idx <= page.cell_count(), + cell_idx <= page.cell_count() + page.overflow_cells.len(), "attempting to add cell to an incorrect place cell_idx={} cell_count={}", cell_idx, page.cell_count() From ce7e0188f640ba3b875b6352c41e1894bf0f28a2 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Tue, 8 Apr 2025 17:57:39 +0200 Subject: [PATCH 085/425] bring back i64 page sizes while balancing --- core/storage/btree.rs | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index c2fcd09a0..befb43189 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1614,7 +1614,7 @@ impl BTreeCursor { validate_cells_after_insertion(&cell_array, leaf_data); /* 3. Initiliaze current size of every page including overflow cells and divider cells that might be included. */ - let mut new_page_sizes: Vec = Vec::new(); + let mut new_page_sizes: Vec = Vec::new(); let leaf_correction = if leaf { 4 } else { 0 }; // number of bytes beyond header, different from global usableSapce which includes // header @@ -1627,16 +1627,16 @@ impl BTreeCursor { let page_contents = page.get_contents(); let free_space = compute_free_space(page_contents, self.usable_space() as u16); - new_page_sizes.push(usable_space as usize - free_space as usize); + new_page_sizes.push(usable_space as i64 - free_space as i64); for overflow in &page_contents.overflow_cells { let size = new_page_sizes.last_mut().unwrap(); // 2 to account of pointer - *size += 2 + overflow.payload.len() as usize; + *size += 2 + overflow.payload.len() as i64; } if !leaf && i < balance_info.sibling_count - 1 { // Account for divider cell which is included in this page. let size = new_page_sizes.last_mut().unwrap(); - *size += cell_array.cells[cell_array.cell_count(i)].len() as usize; + *size += cell_array.cells[cell_array.cell_count(i)].len() as i64; } } @@ -1654,7 +1654,7 @@ impl BTreeCursor { let mut i = 0; while i < sibling_count_new { // First try to move cells to the right if they do not fit - while new_page_sizes[i] > usable_space as usize { + while new_page_sizes[i] > usable_space as i64 { let needs_new_page = i + 1 >= sibling_count_new; if needs_new_page { // FIXME: this doesn't remove pages if not needed @@ -1669,7 +1669,7 @@ impl BTreeCursor { ); } let size_of_cell_to_remove_from_left = - 2 + cell_array.cells[cell_array.cell_count(i) - 1].len() as usize; + 2 + cell_array.cells[cell_array.cell_count(i) - 1].len() as i64; new_page_sizes[i] -= size_of_cell_to_remove_from_left; let size_of_cell_to_move_right = if !leaf_data { if cell_array.number_of_cells_per_page[i] @@ -1677,23 +1677,23 @@ impl BTreeCursor { { // This means we move to the right page the divider cell and we // promote left cell to divider - 2 + cell_array.cells[cell_array.cell_count(i)].len() as usize + 2 + cell_array.cells[cell_array.cell_count(i)].len() as i64 } else { 0 } } else { size_of_cell_to_remove_from_left }; - new_page_sizes[i + 1] += size_of_cell_to_move_right as usize; + new_page_sizes[i + 1] += size_of_cell_to_move_right as i64; cell_array.number_of_cells_per_page[i] -= 1; } // Now try to take from the right if we didn't have enough while cell_array.number_of_cells_per_page[i] < cell_array.cells.len() as u16 { let size_of_cell_to_remove_from_right = - 2 + cell_array.cells[cell_array.cell_count(i)].len() as usize; + 2 + cell_array.cells[cell_array.cell_count(i)].len() as i64; let can_take = new_page_sizes[i] + size_of_cell_to_remove_from_right - > usable_space as usize; + > usable_space as i64; if can_take { break; } @@ -1704,7 +1704,7 @@ impl BTreeCursor { if cell_array.number_of_cells_per_page[i] < cell_array.cells.len() as u16 { - 2 + cell_array.cells[cell_array.cell_count(i)].len() as usize + 2 + cell_array.cells[cell_array.cell_count(i)].len() as i64 } else { 0 } @@ -1754,8 +1754,8 @@ impl BTreeCursor { // the same we add to right (we don't add divider to right). let mut cell_right = cell_left + 1 - leaf_data as u16; loop { - let cell_left_size = cell_array.cell_size(cell_left as usize) as usize; - let cell_right_size = cell_array.cell_size(cell_right as usize) as usize; + let cell_left_size = cell_array.cell_size(cell_left as usize) as i64; + let cell_right_size = cell_array.cell_size(cell_right as usize) as i64; // TODO: add assert nMaxCells let pointer_size = if i == sibling_count_new - 1 { 0 } else { 2 }; From 64c2917e811fe346efe8447aaa3f0932fecd0082 Mon Sep 17 00:00:00 2001 From: alpaylan Date: Tue, 8 Apr 2025 17:48:16 -0400 Subject: [PATCH 086/425] add bug base, refactor --- .gitignore | 1 + Cargo.lock | 40 ++++- simulator/Cargo.toml | 2 +- simulator/generation/plan.rs | 5 +- simulator/generation/property.rs | 2 +- simulator/main.rs | 293 +++++++++++++++---------------- simulator/runner/bugbase.rs | 241 +++++++++++++++++++++++++ simulator/runner/cli.rs | 20 +-- simulator/runner/env.rs | 1 + simulator/runner/execution.rs | 8 +- simulator/runner/mod.rs | 1 + 11 files changed, 435 insertions(+), 179 deletions(-) create mode 100644 simulator/runner/bugbase.rs diff --git a/.gitignore b/.gitignore index 8a7437707..47e4c5b02 100644 --- a/.gitignore +++ b/.gitignore @@ -34,3 +34,4 @@ dist/ # testing testing/limbo_output.txt **/limbo_output.txt +.bugbase diff --git a/Cargo.lock b/Cargo.lock index 2e7a615c6..5efed684c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -723,7 +723,16 @@ version = "5.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" dependencies = [ - "dirs-sys", + "dirs-sys 0.4.1", +] + +[[package]] +name = "dirs" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e" +dependencies = [ + "dirs-sys 0.5.0", ] [[package]] @@ -734,10 +743,22 @@ checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" dependencies = [ "libc", "option-ext", - "redox_users", + "redox_users 0.4.6", "windows-sys 0.48.0", ] +[[package]] +name = "dirs-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab" +dependencies = [ + "libc", + "option-ext", + "redox_users 0.5.0", + "windows-sys 0.59.0", +] + [[package]] name = "displaydoc" version = "0.2.5" @@ -1669,7 +1690,7 @@ dependencies = [ "comfy-table", "csv", "ctrlc", - "dirs", + "dirs 5.0.1", "env_logger 0.10.2", "limbo_core", "miette", @@ -1836,6 +1857,7 @@ version = "0.0.19-pre.4" dependencies = [ "anarchist-readable-name-generator-lib", "clap", + "dirs 6.0.0", "env_logger 0.10.2", "limbo_core", "log", @@ -1847,7 +1869,6 @@ dependencies = [ "rusqlite", "serde", "serde_json", - "tempfile", ] [[package]] @@ -2782,6 +2803,17 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "redox_users" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd6f9d3d47bdd2ad6945c5015a226ec6155d0bcdfd8f7cd29f86b71f8de99d2b" +dependencies = [ + "getrandom 0.2.15", + "libredox", + "thiserror 2.0.12", +] + [[package]] name = "regex" version = "1.11.1" diff --git a/simulator/Cargo.toml b/simulator/Cargo.toml index 991b72fc5..285604094 100644 --- a/simulator/Cargo.toml +++ b/simulator/Cargo.toml @@ -19,7 +19,6 @@ limbo_core = { path = "../core" } rand = "0.8.5" rand_chacha = "0.3.1" log = "0.4.20" -tempfile = "3.0.7" env_logger = "0.10.1" regex = "1.11.1" regex-syntax = { version = "0.8.5", default-features = false, features = [ @@ -31,3 +30,4 @@ serde = { version = "1.0", features = ["derive"] } serde_json = { version = "1.0" } notify = "8.0.0" rusqlite = { version = "0.34", features = ["bundled"] } +dirs = "6.0.0" diff --git a/simulator/generation/plan.rs b/simulator/generation/plan.rs index ecad92344..4f4900b34 100644 --- a/simulator/generation/plan.rs +++ b/simulator/generation/plan.rs @@ -38,7 +38,7 @@ impl InteractionPlan { let interactions = interactions.lines().collect::>(); let plan: InteractionPlan = serde_json::from_str( - std::fs::read_to_string(plan_path.with_extension("plan.json")) + std::fs::read_to_string(plan_path.with_extension("json")) .unwrap() .as_str(), ) @@ -71,7 +71,6 @@ impl InteractionPlan { let _ = plan[j].split_off(k); break; } - if interactions[i].contains(plan[j][k].to_string().as_str()) { i += 1; k += 1; @@ -86,7 +85,7 @@ impl InteractionPlan { j += 1; } } - + let _ = plan.split_off(j); plan } } diff --git a/simulator/generation/property.rs b/simulator/generation/property.rs index cbcd2c479..d73f17f96 100644 --- a/simulator/generation/property.rs +++ b/simulator/generation/property.rs @@ -407,7 +407,7 @@ impl Property { match (select_predicate, select_star) { (Ok(rows1), Ok(rows2)) => { // If rows1 results have more than 1 column, there is a problem - if rows1.iter().find(|vs| vs.len() > 1).is_some() { + if rows1.iter().any(|vs| vs.len() > 1) { return Err(LimboError::InternalError( "Select query without the star should return only one column".to_string(), )); diff --git a/simulator/main.rs b/simulator/main.rs index d28c2b017..ef22853f4 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -2,10 +2,10 @@ use clap::Parser; use generation::plan::{Interaction, InteractionPlan, InteractionPlanState}; use generation::ArbitraryFrom; -use limbo_core::Database; use notify::event::{DataChange, ModifyKind}; use notify::{EventKind, RecursiveMode, Watcher}; use rand::prelude::*; +use runner::bugbase::{Bug, BugBase}; use runner::cli::SimulatorCLI; use runner::env::SimulatorEnv; use runner::execution::{execute_plans, Execution, ExecutionHistory, ExecutionResult}; @@ -15,13 +15,13 @@ use std::backtrace::Backtrace; use std::io::Write; use std::path::{Path, PathBuf}; use std::sync::{mpsc, Arc, Mutex}; -use tempfile::TempDir; mod generation; mod model; mod runner; mod shrink; struct Paths { + base: PathBuf, db: PathBuf, plan: PathBuf, shrunk_plan: PathBuf, @@ -31,34 +31,16 @@ struct Paths { } impl Paths { - fn new(output_dir: &Path, shrink: bool, doublecheck: bool) -> Self { - let paths = Paths { - db: PathBuf::from(output_dir).join("simulator.db"), - plan: PathBuf::from(output_dir).join("simulator.plan"), - shrunk_plan: PathBuf::from(output_dir).join("simulator_shrunk.plan"), - history: PathBuf::from(output_dir).join("simulator.history"), - doublecheck_db: PathBuf::from(output_dir).join("simulator_double.db"), - shrunk_db: PathBuf::from(output_dir).join("simulator_shrunk.db"), - }; - - // Print the seed, the locations of the database and the plan file - log::info!("database path: {:?}", paths.db); - if doublecheck { - log::info!("doublecheck database path: {:?}", paths.doublecheck_db); - } else if shrink { - log::info!("shrunk database path: {:?}", paths.shrunk_db); + fn new(output_dir: &Path) -> Self { + Paths { + base: output_dir.to_path_buf(), + db: PathBuf::from(output_dir).join("test.db"), + plan: PathBuf::from(output_dir).join("plan.sql"), + shrunk_plan: PathBuf::from(output_dir).join("shrunk.sql"), + history: PathBuf::from(output_dir).join("history.txt"), + doublecheck_db: PathBuf::from(output_dir).join("double.db"), + shrunk_db: PathBuf::from(output_dir).join("shrunk.db"), } - log::info!("simulator plan path: {:?}", paths.plan); - log::info!( - "simulator plan serialized path: {:?}", - paths.plan.with_extension("plan.json") - ); - if shrink { - log::info!("shrunk plan path: {:?}", paths.shrunk_plan); - } - log::info!("simulator history path: {:?}", paths.history); - - paths } } @@ -68,45 +50,37 @@ fn main() -> Result<(), String> { let cli_opts = SimulatorCLI::parse(); cli_opts.validate()?; - let seed = cli_opts.seed.unwrap_or_else(|| thread_rng().next_u64()); - - let output_dir = match &cli_opts.output_dir { - Some(dir) => Path::new(dir).to_path_buf(), - None => TempDir::new().map_err(|e| format!("{:?}", e))?.into_path(), - }; - + let mut bugbase = BugBase::load().map_err(|e| format!("{:?}", e))?; banner(); - let paths = Paths::new(&output_dir, cli_opts.shrink, cli_opts.doublecheck); - - log::info!("seed: {}", seed); + // let paths = Paths::new(&output_dir, cli_opts.doublecheck); let last_execution = Arc::new(Mutex::new(Execution::new(0, 0, 0))); - let (env, plans) = setup_simulation(seed, &cli_opts, &paths.db, &paths.plan); + let (seed, env, plans) = setup_simulation(&mut bugbase, &cli_opts, |p| &p.plan, |p| &p.db); + + let paths = bugbase.paths(seed); + + // Create the output directory if it doesn't exist + if !paths.base.exists() { + std::fs::create_dir_all(&paths.base).map_err(|e| format!("{:?}", e))?; + } if cli_opts.watch { watch_mode(seed, &cli_opts, &paths, last_execution.clone()).unwrap(); } else if cli_opts.differential { differential_testing(env, plans, last_execution.clone()) } else { - run_simulator(&cli_opts, &paths, env, plans, last_execution.clone()); + run_simulator( + seed, + &mut bugbase, + &cli_opts, + &paths, + env, + plans, + last_execution.clone(), + ); } // Print the seed, the locations of the database and the plan file at the end again for easily accessing them. - println!("database path: {:?}", paths.db); - if cli_opts.doublecheck { - println!("doublecheck database path: {:?}", paths.doublecheck_db); - } else if cli_opts.shrink { - println!("shrunk database path: {:?}", paths.shrunk_db); - } - println!("simulator plan path: {:?}", paths.plan); - println!( - "simulator plan serialized path: {:?}", - paths.plan.with_extension("plan.json") - ); - if cli_opts.shrink { - println!("shrunk plan path: {:?}", paths.shrunk_plan); - } - println!("simulator history path: {:?}", paths.history); println!("seed: {}", seed); Ok(()) @@ -140,7 +114,6 @@ fn watch_mode( std::panic::catch_unwind(|| { let plan: Vec> = InteractionPlan::compute_via_diff(&paths.plan); - let mut env = SimulatorEnv::new(seed, cli_opts, &paths.db); plan.iter().for_each(|is| { is.iter().for_each(|i| { @@ -173,6 +146,8 @@ fn watch_mode( } fn run_simulator( + seed: u64, + bugbase: &mut BugBase, cli_opts: &SimulatorCLI, paths: &Paths, env: SimulatorEnv, @@ -204,13 +179,17 @@ fn run_simulator( ); if cli_opts.doublecheck { - doublecheck(env.clone(), paths, &plans, last_execution.clone(), result); + let env = SimulatorEnv::new(seed, cli_opts, &paths.doublecheck_db); + let env = Arc::new(Mutex::new(env)); + doublecheck(env, paths, &plans, last_execution.clone(), result); } else { // No doublecheck, run shrinking if panicking or found a bug. match &result { SandboxedResult::Correct => { log::info!("simulation succeeded"); println!("simulation succeeded"); + // remove the bugbase entry + bugbase.remove_bug(seed).unwrap(); } SandboxedResult::Panicked { error, @@ -240,59 +219,62 @@ fn run_simulator( log::error!("simulation failed: '{}'", error); println!("simulation failed: '{}'", error); - if cli_opts.shrink { - log::info!("Starting to shrink"); + log::info!("Starting to shrink"); - let shrunk_plans = plans - .iter() - .map(|plan| { - let shrunk = plan.shrink_interaction_plan(last_execution); - log::info!("{}", shrunk.stats()); - shrunk - }) - .collect::>(); + let shrunk_plans = plans + .iter() + .map(|plan| { + let shrunk = plan.shrink_interaction_plan(last_execution); + log::info!("{}", shrunk.stats()); + shrunk + }) + .collect::>(); - // Write the shrunk plan to a file - let mut f = std::fs::File::create(&paths.shrunk_plan).unwrap(); - f.write_all(shrunk_plans[0].to_string().as_bytes()).unwrap(); + // Write the shrunk plan to a file + let mut f = std::fs::File::create(&paths.shrunk_plan).unwrap(); + f.write_all(shrunk_plans[0].to_string().as_bytes()).unwrap(); - let last_execution = Arc::new(Mutex::new(*last_execution)); + let last_execution = Arc::new(Mutex::new(*last_execution)); + let env = SimulatorEnv::new(seed, cli_opts, &paths.shrunk_db); - let shrunk = SandboxedResult::from( - std::panic::catch_unwind(|| { - run_simulation( - env.clone(), - &mut shrunk_plans.clone(), - last_execution.clone(), - ) - }), - last_execution, - ); - - match (&shrunk, &result) { - ( - SandboxedResult::Panicked { error: e1, .. }, - SandboxedResult::Panicked { error: e2, .. }, + let env = Arc::new(Mutex::new(env)); + let shrunk = SandboxedResult::from( + std::panic::catch_unwind(|| { + run_simulation( + env.clone(), + &mut shrunk_plans.clone(), + last_execution.clone(), ) - | ( - SandboxedResult::FoundBug { error: e1, .. }, - SandboxedResult::FoundBug { error: e2, .. }, - ) => { - if e1 != e2 { - log::error!( - "shrinking failed, the error was not properly reproduced" - ); - } else { - log::info!("shrinking succeeded"); - } - } - (_, SandboxedResult::Correct) => { - unreachable!("shrinking should never be called on a correct simulation") - } - _ => { + }), + last_execution, + ); + + match (&shrunk, &result) { + ( + SandboxedResult::Panicked { error: e1, .. }, + SandboxedResult::Panicked { error: e2, .. }, + ) + | ( + SandboxedResult::FoundBug { error: e1, .. }, + SandboxedResult::FoundBug { error: e2, .. }, + ) => { + if e1 != e2 { log::error!("shrinking failed, the error was not properly reproduced"); + bugbase.add_bug(seed, plans[0].clone()).unwrap(); + } else { + log::info!("shrinking succeeded"); + println!("shrinking succeeded"); + // Save the shrunk database + bugbase.add_bug(seed, shrunk_plans[0].clone()).unwrap(); } } + (_, SandboxedResult::Correct) => { + unreachable!("shrinking should never be called on a correct simulation") + } + _ => { + log::error!("shrinking failed, the error was not properly reproduced"); + bugbase.add_bug(seed, plans[0].clone()).unwrap(); + } } } } @@ -306,16 +288,6 @@ fn doublecheck( last_execution: Arc>, result: SandboxedResult, ) { - { - let mut env_ = env.lock().unwrap(); - env_.db = Database::open_file( - env_.io.clone(), - paths.doublecheck_db.to_str().unwrap(), - false, - ) - .unwrap(); - } - // Run the simulation again let result2 = SandboxedResult::from( std::panic::catch_unwind(|| { @@ -443,54 +415,71 @@ impl SandboxedResult { } fn setup_simulation( - mut seed: u64, + bugbase: &mut BugBase, cli_opts: &SimulatorCLI, - db_path: &Path, - plan_path: &Path, -) -> (SimulatorEnv, Vec) { - if let Some(load) = &cli_opts.load { - let seed_path = PathBuf::from(load).with_extension("seed"); - let seed_str = std::fs::read_to_string(&seed_path).unwrap(); - seed = seed_str.parse().unwrap(); - } + plan_path: fn(&Paths) -> &Path, + db_path: fn(&Paths) -> &Path, +) -> (u64, SimulatorEnv, Vec) { + if let Some(seed) = &cli_opts.load { + let seed = seed.parse::().expect("seed should be a number"); + let bug = bugbase + .get_bug(seed) + .unwrap_or_else(|| panic!("bug '{}' not found in bug base", seed)); - let mut env = SimulatorEnv::new(seed, cli_opts, db_path); + let paths = bugbase.paths(seed); + if !paths.base.exists() { + std::fs::create_dir_all(&paths.base).unwrap(); + } + let env = SimulatorEnv::new(bug.seed(), cli_opts, db_path(&paths)); - // todo: the loading works correctly because of a hacky decision - // Right now, the plan generation is the only point we use the rng, so the environment doesn't - // even need it. In the future, especially with multi-connections and multi-threading, we might - // use the RNG for more things such as scheduling, so this assumption will fail. When that happens, - // we'll need to reachitect this logic by saving and loading RNG state. - let plans = if let Some(load) = &cli_opts.load { - log::info!("Loading database interaction plan..."); - let plan = std::fs::read_to_string(load).unwrap(); - let plan: InteractionPlan = serde_json::from_str(&plan).unwrap(); - vec![plan] + let plan = match bug { + Bug::Loaded { plan, .. } => plan.clone(), + Bug::Unloaded { seed } => { + let seed = *seed; + bugbase + .load_bug(seed) + .unwrap_or_else(|_| panic!("could not load bug '{}' in bug base", seed)) + } + }; + + std::fs::write(plan_path(&paths), plan.to_string()).unwrap(); + std::fs::write( + plan_path(&paths).with_extension("json"), + serde_json::to_string_pretty(&plan).unwrap(), + ) + .unwrap(); + let plans = vec![plan]; + (seed, env, plans) } else { + let seed = cli_opts.seed.unwrap_or_else(|| { + let mut rng = rand::thread_rng(); + rng.next_u64() + }); + + let paths = bugbase.paths(seed); + if !paths.base.exists() { + std::fs::create_dir_all(&paths.base).unwrap(); + } + let mut env = SimulatorEnv::new(seed, cli_opts, &paths.db); + log::info!("Generating database interaction plan..."); - (1..=env.opts.max_connections) + + let plans = (1..=env.opts.max_connections) .map(|_| InteractionPlan::arbitrary_from(&mut env.rng.clone(), &mut env)) - .collect::>() - }; + .collect::>(); - // todo: for now, we only use 1 connection, so it's safe to use the first plan. - let plan = plans[0].clone(); - - let mut f = std::fs::File::create(plan_path).unwrap(); - // todo: create a detailed plan file with all the plans. for now, we only use 1 connection, so it's safe to use the first plan. - f.write_all(plan.to_string().as_bytes()).unwrap(); - - let serialized_plan_path = plan_path.with_extension("plan.json"); - let mut f = std::fs::File::create(&serialized_plan_path).unwrap(); - f.write_all(serde_json::to_string(&plan).unwrap().as_bytes()) + // todo: for now, we only use 1 connection, so it's safe to use the first plan. + let plan = &plans[0]; + log::info!("{}", plan.stats()); + std::fs::write(plan_path(&paths), plan.to_string()).unwrap(); + std::fs::write( + plan_path(&paths).with_extension("json"), + serde_json::to_string_pretty(&plan).unwrap(), + ) .unwrap(); - let seed_path = plan_path.with_extension("seed"); - let mut f = std::fs::File::create(&seed_path).unwrap(); - f.write_all(seed.to_string().as_bytes()).unwrap(); - - log::info!("{}", plan.stats()); - (env, plans) + (seed, env, plans) + } } fn run_simulation( diff --git a/simulator/runner/bugbase.rs b/simulator/runner/bugbase.rs new file mode 100644 index 000000000..83c4273b3 --- /dev/null +++ b/simulator/runner/bugbase.rs @@ -0,0 +1,241 @@ +use std::{ + collections::HashMap, + io::{self, Write}, + path::PathBuf, + process::Command, +}; + +use crate::{InteractionPlan, Paths}; + +/// A bug is a run that has been identified as buggy. +#[derive(Clone)] +pub(crate) enum Bug { + Unloaded { seed: u64 }, + Loaded { seed: u64, plan: InteractionPlan }, +} + +impl Bug { + /// Check if the bug is loaded. + pub(crate) fn is_loaded(&self) -> bool { + match self { + Bug::Unloaded { .. } => false, + Bug::Loaded { .. } => true, + } + } + + /// Get the seed of the bug. + pub(crate) fn seed(&self) -> u64 { + match self { + Bug::Unloaded { seed } => *seed, + Bug::Loaded { seed, .. } => *seed, + } + } +} + +/// Bug Base is a local database of buggy runs. +pub(crate) struct BugBase { + /// Path to the bug base directory. + path: PathBuf, + /// The list of buggy runs, uniquely identified by their seed + bugs: HashMap, +} + +impl BugBase { + /// Create a new bug base. + fn new(path: PathBuf) -> Result { + let mut bugs = HashMap::new(); + // list all the bugs in the path as directories + if let Ok(entries) = std::fs::read_dir(&path) { + for entry in entries.flatten() { + if entry.file_type().is_ok_and(|ft| ft.is_dir()) { + let seed = entry + .file_name() + .to_string_lossy() + .to_string() + .parse::() + .or(Err(format!( + "failed to parse seed from directory name {}", + entry.file_name().to_string_lossy() + )))?; + bugs.insert(seed, Bug::Unloaded { seed }); + } + } + } + + Ok(Self { path, bugs }) + } + + /// Load the bug base from one of the potential paths. + pub(crate) fn load() -> Result { + let potential_paths = vec![ + // limbo project directory + BugBase::get_limbo_project_dir()?, + // home directory + dirs::home_dir().ok_or("should be able to get home directory".to_string())?, + // current directory + std::env::current_dir() + .or(Err("should be able to get current directory".to_string()))?, + ]; + + for path in potential_paths { + let path = path.join(".bugbase"); + if path.exists() { + return BugBase::new(path); + } + } + + println!("select bug base location:"); + println!("1. limbo project directory"); + println!("2. home directory"); + println!("3. current directory"); + print!("> "); + io::stdout().flush().unwrap(); + let mut choice = String::new(); + io::stdin() + .read_line(&mut choice) + .expect("failed to read line"); + + let choice = choice + .trim() + .parse::() + .or(Err(format!("invalid choice {choice}")))?; + let path = match choice { + 1 => BugBase::get_limbo_project_dir()?.join(".bugbase"), + 2 => { + let home = std::env::var("HOME").or(Err("failed to get home directory"))?; + PathBuf::from(home).join(".bugbase") + } + 3 => PathBuf::from(".bugbase"), + _ => return Err(format!("invalid choice {choice}")), + }; + + if path.exists() { + unreachable!("bug base already exists at {}", path.display()); + } else { + std::fs::create_dir_all(&path).or(Err("failed to create bug base"))?; + log::info!("bug base created at {}", path.display()); + BugBase::new(path) + } + } + + /// Add a new bug to the bug base. + pub(crate) fn add_bug(&mut self, seed: u64, plan: InteractionPlan) -> Result<(), String> { + log::debug!("adding bug with seed {}", seed); + if self.bugs.contains_key(&seed) { + return Err(format!("Bug with hash {} already exists", seed)); + } + self.save_bug(seed, &plan)?; + self.bugs.insert(seed, Bug::Loaded { seed, plan }); + Ok(()) + } + + /// Get a bug from the bug base. + pub(crate) fn get_bug(&self, seed: u64) -> Option<&Bug> { + self.bugs.get(&seed) + } + + /// Save a bug to the bug base. + pub(crate) fn save_bug(&self, seed: u64, plan: &InteractionPlan) -> Result<(), String> { + let bug_path = self.path.join(seed.to_string()); + std::fs::create_dir_all(&bug_path) + .or(Err("should be able to create bug directory".to_string()))?; + + let seed_path = bug_path.join("seed.txt"); + std::fs::write(&seed_path, seed.to_string()) + .or(Err("should be able to write seed file".to_string()))?; + + // At some point we might want to save the commit hash of the current + // version of Limbo. + // let commit_hash = Self::get_current_commit_hash()?; + // let commit_hash_path = bug_path.join("commit_hash.txt"); + // std::fs::write(&commit_hash_path, commit_hash) + // .or(Err("should be able to write commit hash file".to_string()))?; + + let plan_path = bug_path.join("plan.json"); + std::fs::write( + &plan_path, + serde_json::to_string(plan).or(Err("should be able to serialize plan".to_string()))?, + ) + .or(Err("should be able to write plan file".to_string()))?; + + let readable_plan_path = bug_path.join("plan.sql"); + std::fs::write(&readable_plan_path, plan.to_string()) + .or(Err("should be able to write readable plan file".to_string()))?; + Ok(()) + } + + pub(crate) fn load_bug(&mut self, seed: u64) -> Result { + let seed_match = self.bugs.get(&seed); + + match seed_match { + None => Err(format!("No bugs found for seed {}", seed)), + Some(Bug::Unloaded { .. }) => { + let plan = + std::fs::read_to_string(self.path.join(seed.to_string()).join("plan.json")) + .or(Err("should be able to read plan file".to_string()))?; + let plan: InteractionPlan = serde_json::from_str(&plan) + .or(Err("should be able to deserialize plan".to_string()))?; + + let bug = Bug::Loaded { + seed, + plan: plan.clone(), + }; + self.bugs.insert(seed, bug); + log::debug!("Loaded bug with seed {}", seed); + Ok(plan) + } + Some(Bug::Loaded { plan, .. }) => { + log::warn!( + "Bug with seed {} is already loaded, returning the existing plan", + seed + ); + Ok(plan.clone()) + } + } + } + + pub(crate) fn remove_bug(&mut self, seed: u64) -> Result<(), String> { + self.bugs.remove(&seed); + std::fs::remove_dir_all(self.path.join(seed.to_string())) + .or(Err("should be able to remove bug directory".to_string()))?; + + log::debug!("Removed bug with seed {}", seed); + Ok(()) + } +} + +impl BugBase { + /// Get the path to the bug base directory. + pub(crate) fn path(&self) -> &PathBuf { + &self.path + } + + /// Get the path to the database file for a given seed. + pub(crate) fn db_path(&self, seed: u64) -> PathBuf { + self.path.join(format!("{}/test.db", seed)) + } + + /// Get paths to all the files for a given seed. + pub(crate) fn paths(&self, seed: u64) -> Paths { + let base = self.path.join(format!("{}/", seed)); + Paths::new(&base) + } +} + +impl BugBase { + pub(crate) fn get_limbo_project_dir() -> Result { + Ok(PathBuf::from( + String::from_utf8( + Command::new("git") + .args(["rev-parse", "--git-dir"]) + .output() + .or(Err("should be able to get the git path".to_string()))? + .stdout, + ) + .or(Err("commit hash should be valid utf8".to_string()))? + .trim() + .strip_suffix(".git") + .ok_or("should be able to strip .git suffix".to_string())?, + )) + } +} diff --git a/simulator/runner/cli.rs b/simulator/runner/cli.rs index a18c47212..b07b89d47 100644 --- a/simulator/runner/cli.rs +++ b/simulator/runner/cli.rs @@ -6,8 +6,6 @@ use clap::{command, Parser}; pub struct SimulatorCLI { #[clap(short, long, help = "set seed for reproducible runs", default_value = None)] pub seed: Option, - #[clap(short, long, help = "set custom output directory for produced files", default_value = None)] - pub output_dir: Option, #[clap( short, long, @@ -35,13 +33,7 @@ pub struct SimulatorCLI { default_value_t = 60 * 60 // default to 1 hour )] pub maximum_time: usize, - #[clap( - short = 'm', - long, - help = "minimize(shrink) the failing counterexample" - )] - pub shrink: bool, - #[clap(short = 'l', long, help = "load plan from a file")] + #[clap(short = 'l', long, help = "load plan from the bug base")] pub load: Option, #[clap( short = 'w', @@ -66,14 +58,8 @@ impl SimulatorCLI { return Err("Minimum size cannot be greater than maximum size".to_string()); } - // Make sure incompatible options are not set - if self.shrink && self.doublecheck { - return Err("Cannot use shrink and doublecheck at the same time".to_string()); - } - - if let Some(plan_path) = &self.load { - std::fs::File::open(plan_path) - .map_err(|_| format!("Plan file '{}' could not be opened", plan_path))?; + if self.seed.is_some() && self.load.is_some() { + return Err("Cannot set seed and load plan at the same time".to_string()); } Ok(()) diff --git a/simulator/runner/env.rs b/simulator/runner/env.rs index a9409ad7e..19233fc4a 100644 --- a/simulator/runner/env.rs +++ b/simulator/runner/env.rs @@ -85,6 +85,7 @@ impl SimulatorEnv { // Remove existing database file if it exists if db_path.exists() { std::fs::remove_file(db_path).unwrap(); + std::fs::remove_file(db_path.with_extension("db-wal")).unwrap(); } let db = match Database::open_file(io.clone(), db_path.to_str().unwrap(), false) { diff --git a/simulator/runner/execution.rs b/simulator/runner/execution.rs index 8ae4b0cf6..757d9f3ce 100644 --- a/simulator/runner/execution.rs +++ b/simulator/runner/execution.rs @@ -68,7 +68,12 @@ pub(crate) fn execute_plans( // Pick the connection to interact with let connection_index = pick_index(env.connections.len(), &mut env.rng); let state = &mut states[connection_index]; - + std::thread::sleep(std::time::Duration::from_millis( + std::env::var("TICK_SLEEP") + .unwrap_or("0".into()) + .parse() + .unwrap_or(0), + )); history.history.push(Execution::new( connection_index, state.interaction_pointer, @@ -121,6 +126,7 @@ fn execute_plan( } else { match execute_interaction(env, connection_index, interaction, &mut state.stack) { Ok(next_execution) => { + interaction.shadow(env); log::debug!("connection {} processed", connection_index); // Move to the next interaction or property match next_execution { diff --git a/simulator/runner/mod.rs b/simulator/runner/mod.rs index 36a6fbb0a..792c4bddd 100644 --- a/simulator/runner/mod.rs +++ b/simulator/runner/mod.rs @@ -1,3 +1,4 @@ +pub mod bugbase; pub mod cli; pub mod differential; pub mod env; From 2af447128f357f5d8246e43c051dae629bde7fc4 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Tue, 8 Apr 2025 19:32:03 -0400 Subject: [PATCH 087/425] Add tracing log file to gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 8a7437707..369a9b7ef 100644 --- a/.gitignore +++ b/.gitignore @@ -34,3 +34,4 @@ dist/ # testing testing/limbo_output.txt **/limbo_output.txt +testing/test.log From 570253b29ff2b839035133b6dfa926b2b40ae6f8 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Tue, 8 Apr 2025 19:32:51 -0400 Subject: [PATCH 088/425] Adjust limbo run script to log to file during tests if RUST_LOG set --- scripts/limbo-sqlite3 | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scripts/limbo-sqlite3 b/scripts/limbo-sqlite3 index 8e9f0389a..d448a2d6a 100755 --- a/scripts/limbo-sqlite3 +++ b/scripts/limbo-sqlite3 @@ -1,3 +1,8 @@ #!/bin/bash -target/debug/limbo -m list "$@" +# if RUST_LOG is non-empty, enable tracing output +if [ -n "$RUST_LOG" ]; then + target/debug/limbo -m list -t testing/test.log "$@" +else + target/debug/limbo -m list "$@" +fi From 01184ec1d7850d80380acd5d619f385697db49fa Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Tue, 8 Apr 2025 19:36:38 -0400 Subject: [PATCH 089/425] Add tracing-appender to log traces to file asyncronously --- Cargo.lock | 22 ++++++++++++ cli/Cargo.toml | 1 + cli/app.rs | 56 +++++++++++++++++++++++------ cli/input.rs | 10 +++--- cli/main.rs | 10 +----- testing/cli_tests/test_limbo_cli.py | 2 +- 6 files changed, 77 insertions(+), 24 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2e7a615c6..96aec95db 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -583,6 +583,15 @@ dependencies = [ "itertools", ] +[[package]] +name = "crossbeam-channel" +version = "0.5.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ba6d68e24814cb8de6bb986db8222d3a027d15872cabc0d18817bc3c0e4471" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-deque" version = "0.8.6" @@ -1678,6 +1687,7 @@ dependencies = [ "shlex", "syntect", "tracing", + "tracing-appender", "tracing-subscriber", ] @@ -3472,6 +3482,18 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "tracing-appender" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3566e8ce28cc0a3fe42519fc80e6b4c943cc4c8cef275620eb8dac2d3d4e06cf" +dependencies = [ + "crossbeam-channel", + "thiserror 1.0.69", + "time", + "tracing-subscriber", +] + [[package]] name = "tracing-attributes" version = "0.1.28" diff --git a/cli/Cargo.toml b/cli/Cargo.toml index ddd44519f..2f1625420 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -39,6 +39,7 @@ rustyline = { version = "15.0.0", default-features = true, features = [ shlex = "1.3.0" syntect = "5.2.0" tracing = "0.1.41" +tracing-appender = "0.2.3" tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } diff --git a/cli/app.rs b/cli/app.rs index 40e187d43..e5aa851a6 100644 --- a/cli/app.rs +++ b/cli/app.rs @@ -6,6 +6,8 @@ use crate::{ }; use comfy_table::{Attribute, Cell, CellAlignment, Color, ContentArrangement, Row, Table}; use limbo_core::{Database, LimboError, OwnedValue, Statement, StepResult}; +use tracing_appender::non_blocking::WorkerGuard; +use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter}; use clap::Parser; use rustyline::{history::DefaultHistory, Editor}; @@ -49,6 +51,8 @@ pub struct Opts { pub vfs: Option, #[clap(long, help = "Enable experimental MVCC feature")] pub experimental_mvcc: bool, + #[clap(short = 't', long, help = "specify output file for log traces")] + pub tracing_output: Option, } const PROMPT: &str = "limbo> "; @@ -130,6 +134,8 @@ impl<'a> Limbo<'a> { }) .expect("Error setting Ctrl-C handler"); } + let sql = opts.sql.clone(); + let quiet = opts.quiet; let mut app = Self { prompt: PROMPT.to_string(), io, @@ -137,21 +143,25 @@ impl<'a> Limbo<'a> { conn, interrupt_count, input_buff: String::new(), - opts: Settings::from(&opts), + opts: Settings::from(opts), rl, }; - - if opts.sql.is_some() { - app.handle_first_input(opts.sql.as_ref().unwrap()); - } - if !opts.quiet { - app.write_fmt(format_args!("Limbo v{}", env!("CARGO_PKG_VERSION")))?; - app.writeln("Enter \".help\" for usage hints.")?; - app.display_in_memory()?; - } + app.first_run(sql, quiet)?; Ok(app) } + fn first_run(&mut self, sql: Option, quiet: bool) -> io::Result<()> { + if let Some(sql) = sql { + self.handle_first_input(&sql); + } + if !quiet { + self.write_fmt(format_args!("Limbo v{}", env!("CARGO_PKG_VERSION")))?; + self.writeln("Enter \".help\" for usage hints.")?; + self.display_in_memory()?; + } + Ok(()) + } + fn handle_first_input(&mut self, cmd: &str) { if cmd.trim().starts_with('.') { self.handle_dot_command(&cmd[1..]); @@ -695,6 +705,32 @@ impl<'a> Limbo<'a> { Ok(()) } + pub fn init_tracing(&mut self) -> Result { + let (non_blocking, guard) = if let Some(file) = &self.opts.tracing_output { + tracing_appender::non_blocking( + std::fs::File::options() + .append(true) + .create(true) + .open(file)?, + ) + } else { + tracing_appender::non_blocking(std::io::stderr()) + }; + if let Err(e) = tracing_subscriber::registry() + .with( + tracing_subscriber::fmt::layer() + .with_writer(non_blocking) + .with_line_number(true) + .with_thread_ids(true), + ) + .with(EnvFilter::from_default_env()) + .try_init() + { + println!("Unable to setup tracing appender: {:?}", e); + } + Ok(guard) + } + fn display_schema(&mut self, table: Option<&str>) -> anyhow::Result<()> { let sql = match table { Some(table_name) => format!( diff --git a/cli/input.rs b/cli/input.rs index 4361394c0..7b505a99f 100644 --- a/cli/input.rs +++ b/cli/input.rs @@ -81,28 +81,30 @@ pub struct Settings { pub echo: bool, pub is_stdout: bool, pub io: Io, + pub tracing_output: Option, } -impl From<&Opts> for Settings { - fn from(opts: &Opts) -> Self { +impl From for Settings { + fn from(opts: Opts) -> Self { Self { null_value: String::new(), output_mode: opts.output_mode, echo: false, is_stdout: opts.output.is_empty(), - output_filename: opts.output.clone(), + output_filename: opts.output, db_file: opts .database .as_ref() .map_or(":memory:".to_string(), |p| p.to_string_lossy().to_string()), io: match opts.vfs.as_ref().unwrap_or(&String::new()).as_str() { - "memory" => Io::Memory, + "memory" | ":memory:" => Io::Memory, "syscall" => Io::Syscall, #[cfg(all(target_os = "linux", feature = "io_uring"))] "io_uring" => Io::IoUring, "" => Io::default(), vfs => Io::External(vfs.to_string()), }, + tracing_output: opts.tracing_output, } } } diff --git a/cli/main.rs b/cli/main.rs index 4e8eca02a..ec81b64af 100644 --- a/cli/main.rs +++ b/cli/main.rs @@ -7,7 +7,6 @@ mod opcodes_dictionary; use rustyline::{error::ReadlineError, Config, Editor}; use std::sync::atomic::Ordering; -use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter}; fn rustyline_config() -> Config { Config::builder() @@ -17,15 +16,8 @@ fn rustyline_config() -> Config { fn main() -> anyhow::Result<()> { let mut rl = Editor::with_config(rustyline_config())?; - tracing_subscriber::registry() - .with( - tracing_subscriber::fmt::layer() - .with_line_number(true) - .with_thread_ids(true), - ) - .with(EnvFilter::from_default_env()) - .init(); let mut app = app::Limbo::new(&mut rl)?; + let _guard = app.init_tracing()?; let home = dirs::home_dir().expect("Could not determine home directory"); let history_file = home.join(".limbo_history"); if history_file.exists() { diff --git a/testing/cli_tests/test_limbo_cli.py b/testing/cli_tests/test_limbo_cli.py index 8b6a61375..43f8d1ed2 100755 --- a/testing/cli_tests/test_limbo_cli.py +++ b/testing/cli_tests/test_limbo_cli.py @@ -11,7 +11,7 @@ PIPE_BUF = 4096 class ShellConfig: - def __init__(self, exe_name, flags: str = "-q"): + def __init__(self, exe_name, flags: str = "-q -t testing/trace.log"): self.sqlite_exec: str = exe_name self.sqlite_flags: List[str] = flags.split() self.cwd = os.getcwd() From 4b3c14369d4c5be1a925326d223bcf569a3b5b3d Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Tue, 8 Apr 2025 19:36:58 -0400 Subject: [PATCH 090/425] Add testing.md document --- docs/testing.md | 85 +++++++++++++++++++++++++++++ testing/cli_tests/test_limbo_cli.py | 2 +- 2 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 docs/testing.md diff --git a/docs/testing.md b/docs/testing.md new file mode 100644 index 000000000..21823957f --- /dev/null +++ b/docs/testing.md @@ -0,0 +1,85 @@ +# Testing in Limbo + +Limbo supports a comprehensive testing system to ensure correctness, performance, and compatibility with SQLite. + +## 1. Compatibility Tests + +The `make test` target is the main entry point. + +Most compatibility tests live in the testing/ directory and are written in SQLite’s TCL test format. These tests ensure that Limbo matches SQLite’s behavior exactly. The database used during these tests is located at testing/testing.db, which includes the following schema: + +```sql +CREATE TABLE users ( + id INTEGER PRIMARY KEY, + first_name TEXT, + last_name TEXT, + email TEXT, + phone_number TEXT, + address TEXT, + city TEXT, + state TEXT, + zipcode TEXT, + age INTEGER +); +CREATE TABLE products ( + id INTEGER PRIMARY KEY, + name TEXT, + price REAL +); +CREATE INDEX age_idx ON users (age); +``` + +You can freely write queries against these tables during compatibility testing. + +### Shell and Python-based Tests + +For cases where output or behavior differs intentionally from SQLite (e.g. due to new features or limitations), tests should be placed in the testing/cli_tests/ directory and written in Python. + +These tests use the TestLimboShell class: + +```python +from cli_tests.common import TestLimboShell + +def test_uuid(): + limbo = TestLimboShell() + limbo.run_test_fn("SELECT uuid4_str();", lambda res: len(res) == 36) + limbo.quit() +``` + +You can use run_test, run_test_fn, or debug_print to interact with the shell and validate results. +The constructor takes an optional argument with the `sql` you want to initiate the tests with. You can also enable blob testing or override the executable and flags. + +Use these Python-based tests for validating: + + - Output formatting + + - Shell commands and .dot interactions + + - Limbo-specific extensions in `testing/cli_tests/extensions.py` + + - Any known divergence from SQLite behavior + + +> Logging and Tracing +If you wish to trace internal events during test execution, you can set the RUST_LOG environment variable before running the test. For example: + +```bash +RUST_LOG=none,limbo_core=trace make test +``` + +This will enable trace-level logs for the limbo_core crate and disable logs elsewhere. Logging all internal traces to the `testing/test.log` file. + +**Note:** trace logs can be very verbose—it's not uncommon for a single test run to generate megabytes of logs. + + +## Deterministic Simulation Testing (DST): + +TODO! + + +## Fuzzing + +TODO! + + + diff --git a/testing/cli_tests/test_limbo_cli.py b/testing/cli_tests/test_limbo_cli.py index 43f8d1ed2..8b6a61375 100755 --- a/testing/cli_tests/test_limbo_cli.py +++ b/testing/cli_tests/test_limbo_cli.py @@ -11,7 +11,7 @@ PIPE_BUF = 4096 class ShellConfig: - def __init__(self, exe_name, flags: str = "-q -t testing/trace.log"): + def __init__(self, exe_name, flags: str = "-q"): self.sqlite_exec: str = exe_name self.sqlite_flags: List[str] = flags.split() self.cwd = os.getcwd() From 3ad7d194cbf8b4f159cbe0cce6dd99fd3d6f1a22 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Thu, 3 Apr 2025 08:38:48 -0400 Subject: [PATCH 091/425] Prevent panic on loading non-existent vtab module --- core/util.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/core/util.rs b/core/util.rs index f17699233..b0acb54f2 100644 --- a/core/util.rs +++ b/core/util.rs @@ -60,8 +60,14 @@ pub fn parse_schema_rows( let sql: &str = row.get::<&str>(4)?; if root_page == 0 && sql.to_lowercase().contains("create virtual") { let name: &str = row.get::<&str>(1)?; - let vtab = syms.vtabs.get(name).unwrap().clone(); - schema.add_virtual_table(vtab); + let Some(vtab) = syms.vtabs.get(name) else { + return Err(LimboError::InvalidArgument(format!( + "Virtual table Module for {} not found in symbol table, + please load extension first", + name + ))); + }; + schema.add_virtual_table(vtab.clone()); } else { let table = schema::BTreeTable::from_sql(sql, root_page as usize)?; schema.add_btree_table(Rc::new(table)); From 4b9b6c969b2095f6aa0a7fcd2b31a4baf8b5cdd6 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Thu, 3 Apr 2025 09:17:06 -0400 Subject: [PATCH 092/425] Parse schema rows after extensions are loaded --- core/ext/dynamic.rs | 14 ++++++++++++-- core/lib.rs | 37 ++++++++++++++++++++++++++++++------- core/util.rs | 2 +- core/vdbe/execute.rs | 6 +++--- core/vdbe/mod.rs | 6 +++--- 5 files changed, 49 insertions(+), 16 deletions(-) diff --git a/core/ext/dynamic.rs b/core/ext/dynamic.rs index df342caca..ec297bf36 100644 --- a/core/ext/dynamic.rs +++ b/core/ext/dynamic.rs @@ -6,6 +6,7 @@ use libloading::{Library, Symbol}; use limbo_ext::{ExtensionApi, ExtensionApiRef, ExtensionEntryPoint, ResultCode, VfsImpl}; use std::{ ffi::{c_char, CString}, + rc::Rc, sync::{Arc, Mutex, OnceLock}, }; @@ -29,7 +30,10 @@ unsafe impl Send for VfsMod {} unsafe impl Sync for VfsMod {} impl Connection { - pub fn load_extension>(&self, path: P) -> crate::Result<()> { + pub fn load_extension>( + self: &Rc, + path: P, + ) -> crate::Result<()> { use limbo_ext::ExtensionApiRef; let api = Box::new(self.build_limbo_ext()); @@ -44,7 +48,13 @@ impl Connection { let result_code = unsafe { entry(api_ptr) }; if result_code.is_ok() { let extensions = get_extension_libraries(); - extensions.lock().unwrap().push((Arc::new(lib), api_ref)); + extensions + .lock() + .map_err(|_| { + LimboError::ExtensionError("Error unlocking extension libraries".to_string()) + })? + .push((Arc::new(lib), api_ref)); + self.parse_schema_rows()?; Ok(()) } else { if !api_ptr.is_null() { diff --git a/core/lib.rs b/core/lib.rs index e364b226d..7176086f0 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -70,7 +70,7 @@ use vdbe::{builder::QueryMode, VTabOpaqueCursor}; pub type Result = std::result::Result; pub static DATABASE_VERSION: OnceLock = OnceLock::new(); -#[derive(Clone, PartialEq, Eq)] +#[derive(Clone, Copy, PartialEq, Eq)] enum TransactionState { Write, Read, @@ -158,7 +158,13 @@ impl Database { .try_write() .expect("lock on schema should succeed first try"); let syms = conn.syms.borrow(); - parse_schema_rows(rows, &mut schema, io, syms.deref(), None)?; + if let Err(LimboError::ExtensionError(e)) = + parse_schema_rows(rows, &mut schema, io, syms.deref(), None) + { + // this means that a vtab exists and we no longer have the module loaded. we print + // a warning to the user to load the module + eprintln!("Warning: {}", e); + } } Ok(db) } @@ -186,9 +192,9 @@ impl Database { schema: self.schema.clone(), header: self.header.clone(), last_insert_rowid: Cell::new(0), - auto_commit: RefCell::new(true), + auto_commit: Cell::new(true), mv_transactions: RefCell::new(Vec::new()), - transaction_state: RefCell::new(TransactionState::None), + transaction_state: Cell::new(TransactionState::None), last_change: Cell::new(0), syms: RefCell::new(SymbolTable::new()), total_changes: Cell::new(0), @@ -278,9 +284,9 @@ pub struct Connection { pager: Rc, schema: Arc>, header: Arc>, - auto_commit: RefCell, + auto_commit: Cell, mv_transactions: RefCell>, - transaction_state: RefCell, + transaction_state: Cell, last_insert_rowid: Cell, last_change: Cell, total_changes: Cell, @@ -517,7 +523,24 @@ impl Connection { } pub fn get_auto_commit(&self) -> bool { - *self.auto_commit.borrow() + self.auto_commit.get() + } + + pub fn parse_schema_rows(self: &Rc) -> Result<()> { + let rows = self.query("SELECT * FROM sqlite_schema")?; + let mut schema = self + .schema + .try_write() + .expect("lock on schema should succeed first try"); + let syms = self.syms.borrow(); + if let Err(LimboError::ExtensionError(e)) = + parse_schema_rows(rows, &mut schema, self.pager.io.clone(), syms.deref(), None) + { + // this means that a vtab exists and we no longer have the module loaded. we print + // a warning to the user to load the module + eprintln!("Warning: {}", e); + } + Ok(()) } } diff --git a/core/util.rs b/core/util.rs index b0acb54f2..55a50b7ae 100644 --- a/core/util.rs +++ b/core/util.rs @@ -61,7 +61,7 @@ pub fn parse_schema_rows( if root_page == 0 && sql.to_lowercase().contains("create virtual") { let name: &str = row.get::<&str>(1)?; let Some(vtab) = syms.vtabs.get(name) else { - return Err(LimboError::InvalidArgument(format!( + return Err(LimboError::ExtensionError(format!( "Virtual table Module for {} not found in symbol table, please load extension first", name diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 09c283ecd..89a2d8d93 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -1542,8 +1542,8 @@ pub fn op_transaction( } } else { let connection = program.connection.upgrade().unwrap(); - let current_state = connection.transaction_state.borrow().clone(); - let (new_transaction_state, updated) = match (¤t_state, write) { + let current_state = connection.transaction_state.get(); + let (new_transaction_state, updated) = match (current_state, write) { (TransactionState::Write, true) => (TransactionState::Write, false), (TransactionState::Write, false) => (TransactionState::Write, false), (TransactionState::Read, true) => (TransactionState::Write, true), @@ -1597,7 +1597,7 @@ pub fn op_auto_commit( }; } - if *auto_commit != *conn.auto_commit.borrow() { + if *auto_commit != conn.auto_commit.get() { if *rollback { todo!("Rollback is not implemented"); } else { diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 8794b208a..550f21164 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -386,7 +386,7 @@ impl Program { ) -> Result { if let Some(mv_store) = mv_store { let conn = self.connection.upgrade().unwrap(); - let auto_commit = *conn.auto_commit.borrow(); + let auto_commit = conn.auto_commit.get(); if auto_commit { let mut mv_transactions = conn.mv_transactions.borrow_mut(); for tx_id in mv_transactions.iter() { @@ -400,7 +400,7 @@ impl Program { .connection .upgrade() .expect("only weak ref to connection?"); - let auto_commit = *connection.auto_commit.borrow(); + let auto_commit = connection.auto_commit.get(); tracing::trace!("Halt auto_commit {}", auto_commit); assert!( program_state.halt_state.is_none() @@ -409,7 +409,7 @@ impl Program { if program_state.halt_state.is_some() { self.step_end_write_txn(&pager, &mut program_state.halt_state, connection.deref()) } else if auto_commit { - let current_state = connection.transaction_state.borrow().clone(); + let current_state = connection.transaction_state.get(); match current_state { TransactionState::Write => self.step_end_write_txn( &pager, From c15035caf84c23ad9e61cdfbab7cafb6b1726b7d Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Thu, 3 Apr 2025 14:03:54 -0400 Subject: [PATCH 093/425] Add module and vtab to schema after table is reopened with proper ext --- core/lib.rs | 20 +++--- core/translate/emitter.rs | 1 + core/util.rs | 132 +++++++++++++++++++++++++++++++++++--- core/vdbe/execute.rs | 13 +++- 4 files changed, 147 insertions(+), 19 deletions(-) diff --git a/core/lib.rs b/core/lib.rs index 7176086f0..48fb2bd7c 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -157,9 +157,9 @@ impl Database { let mut schema = schema .try_write() .expect("lock on schema should succeed first try"); - let syms = conn.syms.borrow(); + let syms = conn.syms.borrow_mut(); if let Err(LimboError::ExtensionError(e)) = - parse_schema_rows(rows, &mut schema, io, syms.deref(), None) + parse_schema_rows(rows, &mut schema, io, syms, None) { // this means that a vtab exists and we no longer have the module loaded. we print // a warning to the user to load the module @@ -532,13 +532,15 @@ impl Connection { .schema .try_write() .expect("lock on schema should succeed first try"); - let syms = self.syms.borrow(); - if let Err(LimboError::ExtensionError(e)) = - parse_schema_rows(rows, &mut schema, self.pager.io.clone(), syms.deref(), None) { - // this means that a vtab exists and we no longer have the module loaded. we print - // a warning to the user to load the module - eprintln!("Warning: {}", e); + let syms = self.syms.borrow_mut(); + if let Err(LimboError::ExtensionError(e)) = + parse_schema_rows(rows, &mut schema, self.pager.io.clone(), syms, None) + { + // this means that a vtab exists and we no longer have the module loaded. we print + // a warning to the user to load the module + eprintln!("Warning: {}", e); + } } Ok(()) } @@ -653,7 +655,7 @@ impl VirtualTable { module_name )))?; if let VTabKind::VirtualTable = kind { - if module.module_kind != VTabKind::VirtualTable { + if module.module_kind == VTabKind::TableValuedFunction { return Err(LimboError::ExtensionError(format!( "{} is not a virtual table module", module_name diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 23b937019..514bc21ab 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -135,6 +135,7 @@ fn prologue<'a>( Ok((t_ctx, init_label, start_offset)) } +#[derive(Clone, Copy, Debug)] pub enum TransactionMode { None, Read, diff --git a/core/util.rs b/core/util.rs index 55a50b7ae..f7c41348c 100644 --- a/core/util.rs +++ b/core/util.rs @@ -1,5 +1,5 @@ use limbo_sqlite3_parser::ast::{self, CreateTableBody, Expr, FunctionTail, Literal}; -use std::{rc::Rc, sync::Arc}; +use std::{cell::RefMut, rc::Rc, sync::Arc}; use crate::{ schema::{self, Column, Schema, Type}, @@ -40,7 +40,7 @@ pub fn parse_schema_rows( rows: Option, schema: &mut Schema, io: Arc, - syms: &SymbolTable, + mut syms: RefMut, mv_tx_id: Option, ) -> Result<()> { if let Some(mut rows) = rows { @@ -60,12 +60,35 @@ pub fn parse_schema_rows( let sql: &str = row.get::<&str>(4)?; if root_page == 0 && sql.to_lowercase().contains("create virtual") { let name: &str = row.get::<&str>(1)?; - let Some(vtab) = syms.vtabs.get(name) else { - return Err(LimboError::ExtensionError(format!( - "Virtual table Module for {} not found in symbol table, - please load extension first", - name - ))); + // a virtual table is found in the sqlite_schema, but it's no + // longer in the symbol table. We need to recreate it. + let vtab = if let Some(vtab) = syms.vtabs.get(name) { + vtab.clone() + } else { + // "create virtual table using mod" + let mod_name = module_name_from_sql(sql)?; + if let Some(vmod) = syms.vtab_modules.get(mod_name) { + if let limbo_ext::VTabKind::VirtualTable = vmod.module_kind + { + let vtab = crate::VirtualTable::from_args( + Some(name), + mod_name, + module_args_from_sql(sql)?, + &syms, + vmod.module_kind, + None, + )?; + syms.vtabs.insert(name.to_string(), vtab.clone()); + vtab + } else { + return Err(LimboError::Corrupt("Table valued function: {name} registered as virtual table in schema".to_string())); + } + } else { + return Err(LimboError::ExtensionError(format!( + "Virtual table module '{}' not found\nPlease load extension", + &mod_name + ))); + } }; schema.add_virtual_table(vtab.clone()); } else { @@ -138,6 +161,99 @@ pub fn check_ident_equivalency(ident1: &str, ident2: &str) -> bool { strip_quotes(ident1).eq_ignore_ascii_case(strip_quotes(ident2)) } +fn module_name_from_sql(sql: &str) -> Result<&str> { + if let Some(start) = sql.find("USING") { + let start = start + 6; + // stop at the first space, semicolon, or parenthesis + let end = sql[start..] + .find(|c: char| c.is_whitespace() || c == ';' || c == '(') + .unwrap_or(sql.len() - start) + + start; + Ok(sql[start..end].trim()) + } else { + Err(LimboError::InvalidArgument( + "Expected 'USING' in module name".to_string(), + )) + } +} + +// CREATE VIRTUAL TABLE table_name USING module_name(arg1, arg2, ...); +// CREATE VIRTUAL TABLE table_name USING module_name; +fn module_args_from_sql(sql: &str) -> Result> { + if !sql.contains('(') { + return Ok(vec![]); + } + let start = sql.find('(').ok_or_else(|| { + LimboError::InvalidArgument("Expected '(' in module argument list".to_string()) + })? + 1; + let end = sql.rfind(')').ok_or_else(|| { + LimboError::InvalidArgument("Expected ')' in module argument list".to_string()) + })?; + + let mut args = Vec::new(); + let mut current_arg = String::new(); + let mut chars = sql[start..end].chars().peekable(); + let mut in_quotes = false; + + while let Some(c) = chars.next() { + match c { + '\'' => { + if in_quotes { + if chars.peek() == Some(&'\'') { + // Escaped quote + current_arg.push('\''); + chars.next(); + } else { + in_quotes = false; + args.push(limbo_ext::Value::from_text(current_arg.trim().to_string())); + current_arg.clear(); + // Skip until comma or end + while let Some(&nc) = chars.peek() { + if nc == ',' { + chars.next(); // Consume comma + break; + } else if nc.is_whitespace() { + chars.next(); + } else { + return Err(LimboError::InvalidArgument( + "Unexpected characters after quoted argument".to_string(), + )); + } + } + } + } else { + in_quotes = true; + } + } + ',' => { + if !in_quotes { + if !current_arg.trim().is_empty() { + args.push(limbo_ext::Value::from_text(current_arg.trim().to_string())); + current_arg.clear(); + } + } else { + current_arg.push(c); + } + } + _ => { + current_arg.push(c); + } + } + } + + if !current_arg.trim().is_empty() && !in_quotes { + args.push(limbo_ext::Value::from_text(current_arg.trim().to_string())); + } + + if in_quotes { + return Err(LimboError::InvalidArgument( + "Unterminated string literal in module arguments".to_string(), + )); + } + + Ok(args) +} + pub fn check_literal_equivalency(lhs: &Literal, rhs: &Literal) -> bool { match (lhs, rhs) { (Literal::Numeric(n1), Literal::Numeric(n2)) => cmp_numeric_strings(n1, n2), diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 89a2d8d93..65bf5238e 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -917,12 +917,21 @@ pub fn op_vcreate( "Failed to upgrade Connection".to_string(), )); }; + let mod_type = conn + .syms + .borrow() + .vtab_modules + .get(&module_name) + .ok_or_else(|| { + crate::LimboError::ExtensionError(format!("Module {} not found", module_name)) + })? + .module_kind; let table = crate::VirtualTable::from_args( Some(&table_name), &module_name, args, &conn.syms.borrow(), - limbo_ext::VTabKind::VirtualTable, + mod_type, None, )?; { @@ -4231,7 +4240,7 @@ pub fn op_parse_schema( Some(stmt), &mut schema, conn.pager.io.clone(), - &conn.syms.borrow(), + conn.syms.borrow_mut(), state.mv_tx_id, )?; state.pc += 1; From a0f71e27beece3aea2fdf24087ab14253006d29d Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Thu, 3 Apr 2025 14:04:28 -0400 Subject: [PATCH 094/425] Fix cli tests --- testing/cli_tests/extensions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/cli_tests/extensions.py b/testing/cli_tests/extensions.py index bab8cb74f..ac870ee4d 100755 --- a/testing/cli_tests/extensions.py +++ b/testing/cli_tests/extensions.py @@ -345,10 +345,10 @@ def test_kv(): limbo = TestLimboShell() limbo.run_test_fn( "create virtual table t using kv_store;", - lambda res: "Virtual table module not found: kv_store" in res, + lambda res: "Module kv_store not found" in res, ) limbo.execute_dot(f".load {ext_path}") - limbo.debug_print( + limbo.execute_dot( "create virtual table t using kv_store;", ) limbo.run_test_fn(".schema", lambda res: "CREATE VIRTUAL TABLE t" in res) From 6b5ec1f07b30a3d90af8f64daf4031fdd04139ea Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Thu, 3 Apr 2025 20:55:59 -0400 Subject: [PATCH 095/425] Remove mut borrow from sym table in parse schema fn --- core/ext/dynamic.rs | 4 +++- core/lib.rs | 8 ++++---- core/util.rs | 14 ++++++-------- core/vdbe/execute.rs | 16 +++++++++------- 4 files changed, 22 insertions(+), 20 deletions(-) diff --git a/core/ext/dynamic.rs b/core/ext/dynamic.rs index ec297bf36..60e4050d7 100644 --- a/core/ext/dynamic.rs +++ b/core/ext/dynamic.rs @@ -54,7 +54,9 @@ impl Connection { LimboError::ExtensionError("Error unlocking extension libraries".to_string()) })? .push((Arc::new(lib), api_ref)); - self.parse_schema_rows()?; + { + self.parse_schema_rows()?; + } Ok(()) } else { if !api_ptr.is_null() { diff --git a/core/lib.rs b/core/lib.rs index 48fb2bd7c..cd728fa64 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -157,9 +157,9 @@ impl Database { let mut schema = schema .try_write() .expect("lock on schema should succeed first try"); - let syms = conn.syms.borrow_mut(); + let syms = conn.syms.borrow(); if let Err(LimboError::ExtensionError(e)) = - parse_schema_rows(rows, &mut schema, io, syms, None) + parse_schema_rows(rows, &mut schema, io, &syms, None) { // this means that a vtab exists and we no longer have the module loaded. we print // a warning to the user to load the module @@ -533,9 +533,9 @@ impl Connection { .try_write() .expect("lock on schema should succeed first try"); { - let syms = self.syms.borrow_mut(); + let syms = self.syms.borrow(); if let Err(LimboError::ExtensionError(e)) = - parse_schema_rows(rows, &mut schema, self.pager.io.clone(), syms, None) + parse_schema_rows(rows, &mut schema, self.pager.io.clone(), &syms, None) { // this means that a vtab exists and we no longer have the module loaded. we print // a warning to the user to load the module diff --git a/core/util.rs b/core/util.rs index f7c41348c..dab083c16 100644 --- a/core/util.rs +++ b/core/util.rs @@ -1,5 +1,5 @@ use limbo_sqlite3_parser::ast::{self, CreateTableBody, Expr, FunctionTail, Literal}; -use std::{cell::RefMut, rc::Rc, sync::Arc}; +use std::{rc::Rc, sync::Arc}; use crate::{ schema::{self, Column, Schema, Type}, @@ -40,7 +40,7 @@ pub fn parse_schema_rows( rows: Option, schema: &mut Schema, io: Arc, - mut syms: RefMut, + syms: &SymbolTable, mv_tx_id: Option, ) -> Result<()> { if let Some(mut rows) = rows { @@ -70,16 +70,14 @@ pub fn parse_schema_rows( if let Some(vmod) = syms.vtab_modules.get(mod_name) { if let limbo_ext::VTabKind::VirtualTable = vmod.module_kind { - let vtab = crate::VirtualTable::from_args( + crate::VirtualTable::from_args( Some(name), mod_name, module_args_from_sql(sql)?, - &syms, + syms, vmod.module_kind, None, - )?; - syms.vtabs.insert(name.to_string(), vtab.clone()); - vtab + )? } else { return Err(LimboError::Corrupt("Table valued function: {name} registered as virtual table in schema".to_string())); } @@ -90,7 +88,7 @@ pub fn parse_schema_rows( ))); } }; - schema.add_virtual_table(vtab.clone()); + schema.add_virtual_table(vtab); } else { let table = schema::BTreeTable::from_sql(sql, root_page as usize)?; schema.add_btree_table(Rc::new(table)); diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 65bf5238e..654e9a2c5 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -4236,13 +4236,15 @@ pub fn op_parse_schema( ))?; let mut schema = conn.schema.write(); // TODO: This function below is synchronous, make it async - parse_schema_rows( - Some(stmt), - &mut schema, - conn.pager.io.clone(), - conn.syms.borrow_mut(), - state.mv_tx_id, - )?; + { + parse_schema_rows( + Some(stmt), + &mut schema, + conn.pager.io.clone(), + &conn.syms.borrow(), + state.mv_tx_id, + )?; + } state.pc += 1; Ok(InsnFunctionStepResult::Step) } From 3a7f1e4056a967f8380158973153d1134bd66716 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Thu, 3 Apr 2025 20:57:59 -0400 Subject: [PATCH 096/425] Add comments explaining flow of reloading vtabs from schema tbl --- core/util.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/core/util.rs b/core/util.rs index dab083c16..ca23f0e83 100644 --- a/core/util.rs +++ b/core/util.rs @@ -61,11 +61,11 @@ pub fn parse_schema_rows( if root_page == 0 && sql.to_lowercase().contains("create virtual") { let name: &str = row.get::<&str>(1)?; // a virtual table is found in the sqlite_schema, but it's no - // longer in the symbol table. We need to recreate it. + // longer in the in-memory schema. We need to recreate it if + // the module is loaded in the symbol table. let vtab = if let Some(vtab) = syms.vtabs.get(name) { vtab.clone() } else { - // "create virtual table using mod" let mod_name = module_name_from_sql(sql)?; if let Some(vmod) = syms.vtab_modules.get(mod_name) { if let limbo_ext::VTabKind::VirtualTable = vmod.module_kind @@ -82,6 +82,7 @@ pub fn parse_schema_rows( return Err(LimboError::Corrupt("Table valued function: {name} registered as virtual table in schema".to_string())); } } else { + // the extension isn't loaded, so we emit a warning. return Err(LimboError::ExtensionError(format!( "Virtual table module '{}' not found\nPlease load extension", &mod_name From 41ac91f14f4c528e24bcafe95f0d3409519ad75f Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Fri, 4 Apr 2025 09:55:43 -0400 Subject: [PATCH 097/425] Add tests for parsing vtab creation sql in ParseSchema --- core/util.rs | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/core/util.rs b/core/util.rs index ca23f0e83..3d12a2c6e 100644 --- a/core/util.rs +++ b/core/util.rs @@ -1753,4 +1753,88 @@ pub mod tests { Ok((OwnedValueType::Float, "1.23e4")) ); } + + #[test] + fn test_module_name_basic() { + let sql = "CREATE VIRTUAL TABLE x USING y;"; + assert_eq!(module_name_from_sql(sql).unwrap(), "y"); + } + + #[test] + fn test_module_name_with_args() { + let sql = "CREATE VIRTUAL TABLE x USING modname('a', 'b');"; + assert_eq!(module_name_from_sql(sql).unwrap(), "modname"); + } + + #[test] + fn test_module_name_missing_using() { + let sql = "CREATE VIRTUAL TABLE x (a, b);"; + assert!(module_name_from_sql(sql).is_err()); + } + + #[test] + fn test_module_name_no_semicolon() { + let sql = "CREATE VIRTUAL TABLE x USING limbo(a, b)"; + assert_eq!(module_name_from_sql(sql).unwrap(), "limbo"); + } + + #[test] + fn test_module_name_no_semicolon_or_args() { + let sql = "CREATE VIRTUAL TABLE x USING limbo"; + assert_eq!(module_name_from_sql(sql).unwrap(), "limbo"); + } + + #[test] + fn test_module_args_none() { + let sql = "CREATE VIRTUAL TABLE x USING modname;"; + let args = module_args_from_sql(sql).unwrap(); + assert_eq!(args.len(), 0); + } + + #[test] + fn test_module_args_basic() { + let sql = "CREATE VIRTUAL TABLE x USING modname('arg1', 'arg2');"; + let args = module_args_from_sql(sql).unwrap(); + assert_eq!(args.len(), 2); + assert_eq!("arg1", args[0].to_text().unwrap()); + assert_eq!("arg2", args[1].to_text().unwrap()); + for arg in args { + unsafe { arg.__free_internal_type() } + } + } + + #[test] + fn test_module_args_with_escaped_quote() { + let sql = "CREATE VIRTUAL TABLE x USING modname('a''b', 'c');"; + let args = module_args_from_sql(sql).unwrap(); + assert_eq!(args.len(), 2); + assert_eq!(args[0].to_text().unwrap(), "a'b"); + assert_eq!(args[1].to_text().unwrap(), "c"); + for arg in args { + unsafe { arg.__free_internal_type() } + } + } + + #[test] + fn test_module_args_unterminated_string() { + let sql = "CREATE VIRTUAL TABLE x USING modname('arg1, 'arg2');"; + assert!(module_args_from_sql(sql).is_err()); + } + + #[test] + fn test_module_args_extra_garbage_after_quote() { + let sql = "CREATE VIRTUAL TABLE x USING modname('arg1'x);"; + assert!(module_args_from_sql(sql).is_err()); + } + + #[test] + fn test_module_args_trailing_comma() { + let sql = "CREATE VIRTUAL TABLE x USING modname('arg1',);"; + let args = module_args_from_sql(sql).unwrap(); + assert_eq!(args.len(), 1); + assert_eq!("arg1", args[0].to_text().unwrap()); + for arg in args { + unsafe { arg.__free_internal_type() } + } + } } From 9b1e60a29c4882ab2551acaedac7b079849ce5a9 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Tue, 8 Apr 2025 20:09:12 -0400 Subject: [PATCH 098/425] Fix typo in ext library lock err message --- core/ext/dynamic.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/ext/dynamic.rs b/core/ext/dynamic.rs index 60e4050d7..17138f268 100644 --- a/core/ext/dynamic.rs +++ b/core/ext/dynamic.rs @@ -51,7 +51,7 @@ impl Connection { extensions .lock() .map_err(|_| { - LimboError::ExtensionError("Error unlocking extension libraries".to_string()) + LimboError::ExtensionError("Error locking extension libraries".to_string()) })? .push((Arc::new(lib), api_ref)); { From 431ef2fa6a49f9180f3157cf496ae40e5b452bac Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Tue, 8 Apr 2025 10:15:06 +0300 Subject: [PATCH 099/425] Add TCL/differential fuzz tests for verifying index scan behavior --- testing/orderby.test | 59 +++++++++++++ tests/integration/fuzz/mod.rs | 158 +++++++++++++++++++++++++++++++++- 2 files changed, 215 insertions(+), 2 deletions(-) diff --git a/testing/orderby.test b/testing/orderby.test index f23c41bfd..b5b56cdd4 100755 --- a/testing/orderby.test +++ b/testing/orderby.test @@ -141,3 +141,62 @@ Collin|15} do_execsql_test case-insensitive-alias { select u.first_name as fF, count(1) > 0 as cC from users u where fF = 'Jamie' group by fF order by cC; } {Jamie|1} + +do_execsql_test age_idx_order_desc { + select first_name from users order by age desc limit 3; +} {Robert +Sydney +Matthew} + +do_execsql_test rowid_or_integer_pk_desc { + select first_name from users order by id desc limit 3; +} {Nicole +Gina +Dorothy} + +# These two following tests may seem dumb but they verify that index scanning by age_idx doesn't drop any rows due to BTree bugs +do_execsql_test orderby_asc_verify_rows { + select count(1) from (select * from users order by age desc) +} {10000} + +do_execsql_test orderby_desc_verify_rows { + select count(1) from (select * from users order by age desc) +} {10000} + +do_execsql_test orderby_desc_with_offset { + select first_name, age from users order by age desc limit 3 offset 666; +} {Francis|94 +Matthew|94 +Theresa|94} + +do_execsql_test orderby_desc_with_filter { + select first_name, age from users where age <= 50 order by age desc limit 5; +} {Gerald|50 +Nicole|50 +Tammy|50 +Marissa|50 +Daniel|50} + +do_execsql_test orderby_asc_with_filter_range { + select first_name, age from users where age <= 50 and age >= 49 order by age asc limit 5; +} {William|49 +Jennifer|49 +Robert|49 +David|49 +Stephanie|49} + +do_execsql_test orderby_desc_with_filter_id_lt { + select id from users where id < 6666 order by id desc limit 5; +} {6665 +6664 +6663 +6662 +6661} + +do_execsql_test orderby_desc_with_filter_id_le { + select id from users where id <= 6666 order by id desc limit 5; +} {6666 +6665 +6664 +6663 +6662} \ No newline at end of file diff --git a/tests/integration/fuzz/mod.rs b/tests/integration/fuzz/mod.rs index f776fc9a7..eeed31698 100644 --- a/tests/integration/fuzz/mod.rs +++ b/tests/integration/fuzz/mod.rs @@ -2,9 +2,9 @@ pub mod grammar_generator; #[cfg(test)] mod tests { - use std::rc::Rc; + use std::{collections::HashSet, rc::Rc}; - use rand::SeedableRng; + use rand::{Rng, SeedableRng}; use rand_chacha::ChaCha8Rng; use rusqlite::params; @@ -107,6 +107,160 @@ mod tests { } } + #[test] + pub fn index_scan_fuzz() { + let db = TempDatabase::new_with_rusqlite("CREATE TABLE t(x PRIMARY KEY)"); + let sqlite_conn = rusqlite::Connection::open(db.path.clone()).unwrap(); + + let insert = format!( + "INSERT INTO t VALUES {}", + (0..10000) + .map(|x| format!("({})", x)) + .collect::>() + .join(", ") + ); + sqlite_conn.execute(&insert, params![]).unwrap(); + sqlite_conn.close().unwrap(); + let sqlite_conn = rusqlite::Connection::open(db.path.clone()).unwrap(); + let limbo_conn = db.connect_limbo(); + + const COMPARISONS: [&str; 5] = ["=", "<", "<=", ">", ">="]; + + const ORDER_BY: [Option<&str>; 4] = [ + None, + Some("ORDER BY x"), + Some("ORDER BY x DESC"), + Some("ORDER BY x ASC"), + ]; + + for comp in COMPARISONS.iter() { + for order_by in ORDER_BY.iter() { + for max in 0..=10000 { + let query = format!( + "SELECT * FROM t WHERE x {} {} {} LIMIT 3", + comp, + max, + order_by.unwrap_or(""), + ); + let limbo = limbo_exec_rows(&db, &limbo_conn, &query); + let sqlite = sqlite_exec_rows(&sqlite_conn, &query); + assert_eq!( + limbo, sqlite, + "query: {}, limbo: {:?}, sqlite: {:?}", + query, limbo, sqlite + ); + } + } + } + } + + #[test] + pub fn index_scan_compound_key_fuzz() { + let (mut rng, seed) = if std::env::var("SEED").is_ok() { + let seed = std::env::var("SEED").unwrap().parse::().unwrap(); + (ChaCha8Rng::seed_from_u64(seed), seed) + } else { + rng_from_time() + }; + let db = TempDatabase::new_with_rusqlite("CREATE TABLE t(x, y, z, PRIMARY KEY (x, y))"); + let sqlite_conn = rusqlite::Connection::open(db.path.clone()).unwrap(); + let mut pk_tuples = HashSet::new(); + while pk_tuples.len() < 100000 { + pk_tuples.insert((rng.random_range(0..3000), rng.random_range(0..3000))); + } + let mut tuples = Vec::new(); + for pk_tuple in pk_tuples { + tuples.push(format!( + "({}, {}, {})", + pk_tuple.0, + pk_tuple.1, + rng.random_range(0..2000) + )); + } + let insert = format!("INSERT INTO t VALUES {}", tuples.join(", ")); + sqlite_conn.execute(&insert, params![]).unwrap(); + sqlite_conn.close().unwrap(); + let sqlite_conn = rusqlite::Connection::open(db.path.clone()).unwrap(); + let limbo_conn = db.connect_limbo(); + + const COMPARISONS: [&str; 5] = ["=", "<", "<=", ">", ">="]; + + const ORDER_BY: [Option<&str>; 4] = [ + None, + Some("ORDER BY x"), + Some("ORDER BY x DESC"), + Some("ORDER BY x ASC"), + ]; + + let print_dump_on_fail = |insert: &str, seed: u64| { + let comment = format!("-- seed: {}; dump for manual debugging:", seed); + let pragma_journal_mode = "PRAGMA journal_mode = wal;"; + let create_table = "CREATE TABLE t(x, y, z, PRIMARY KEY (x, y));"; + let dump = format!( + "{}\n{}\n{}\n{}\n{}", + comment, pragma_journal_mode, create_table, comment, insert + ); + println!("{}", dump); + }; + + for comp in COMPARISONS.iter() { + for order_by in ORDER_BY.iter() { + for max in 0..=3000 { + // see comment below about ordering and the '=' comparison operator; omitting LIMIT for that reason + // we mainly have LIMIT here for performance reasons but for = we want to get all the rows to ensure + // correctness in the = case + let limit = if *comp == "=" { "" } else { "LIMIT 5" }; + let query = format!( + "SELECT * FROM t WHERE x {} {} {} {}", + comp, + max, + order_by.unwrap_or(""), + limit + ); + log::trace!("query: {}", query); + let limbo = limbo_exec_rows(&db, &limbo_conn, &query); + let sqlite = sqlite_exec_rows(&sqlite_conn, &query); + let is_equal = limbo == sqlite; + if !is_equal { + // if the condition is = and the same rows are present but in different order, then we accept that + // e.g. sqlite doesn't bother iterating in reverse order if "WHERE X = 3 ORDER BY X DESC", but we currently do. + if *comp == "=" { + let limbo_row_count = limbo.len(); + let sqlite_row_count = sqlite.len(); + if limbo_row_count == sqlite_row_count { + for limbo_row in limbo.iter() { + if !sqlite.contains(limbo_row) { + // save insert to file and print the filename for debugging + let error_msg = format!("row not found in sqlite: query: {}, limbo: {:?}, sqlite: {:?}, seed: {}", query, limbo, sqlite, seed); + print_dump_on_fail(&insert, seed); + panic!("{}", error_msg); + } + } + for sqlite_row in sqlite.iter() { + if !limbo.contains(sqlite_row) { + let error_msg = format!("row not found in limbo: query: {}, limbo: {:?}, sqlite: {:?}, seed: {}", query, limbo, sqlite, seed); + print_dump_on_fail(&insert, seed); + panic!("{}", error_msg); + } + } + continue; + } else { + print_dump_on_fail(&insert, seed); + let error_msg = format!("row count mismatch (limbo: {}, sqlite: {}): query: {}, limbo: {:?}, sqlite: {:?}, seed: {}", limbo_row_count, sqlite_row_count, query, limbo, sqlite, seed); + panic!("{}", error_msg); + } + } + print_dump_on_fail(&insert, seed); + panic!( + "query: {}, limbo: {:?}, sqlite: {:?}, seed: {}", + query, limbo, sqlite, seed + ); + } + } + } + } + } + #[test] pub fn arithmetic_expression_fuzz() { let _ = env_logger::try_init(); From 3e42a62cd08fcdbea19af9e22ce5dbe82afb5f55 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Tue, 8 Apr 2025 10:19:38 +0300 Subject: [PATCH 100/425] Add SeekLE/SeekLT operations to VDBE --- core/types.rs | 4 +- core/vdbe/builder.rs | 6 +++ core/vdbe/execute.rs | 113 ++++++++++++++++--------------------------- core/vdbe/explain.rs | 50 +++++++++++-------- core/vdbe/insn.rs | 30 +++++++++++- core/vdbe/mod.rs | 5 +- 6 files changed, 115 insertions(+), 93 deletions(-) diff --git a/core/types.rs b/core/types.rs index 72119349f..cc4495f52 100644 --- a/core/types.rs +++ b/core/types.rs @@ -1203,11 +1203,13 @@ pub enum CursorResult { IO, } -#[derive(Clone, PartialEq, Debug)] +#[derive(Clone, Copy, PartialEq, Eq, Debug)] pub enum SeekOp { EQ, GE, GT, + LE, + LT, } #[derive(Clone, PartialEq, Debug)] diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index 19a71a68d..78216204e 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -413,6 +413,12 @@ impl ProgramBuilder { Insn::SeekGT { target_pc, .. } => { resolve(target_pc, "SeekGT"); } + Insn::SeekLE { target_pc, .. } => { + resolve(target_pc, "SeekLE"); + } + Insn::SeekLT { target_pc, .. } => { + resolve(target_pc, "SeekLT"); + } Insn::IdxGE { target_pc, .. } => { resolve(target_pc, "IdxGE"); } diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 654e9a2c5..b282fa524 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -1892,97 +1892,69 @@ pub fn op_deferred_seek( Ok(InsnFunctionStepResult::Step) } -pub fn op_seek_ge( +pub fn op_seek( program: &Program, state: &mut ProgramState, insn: &Insn, pager: &Rc, mv_store: Option<&Rc>, ) -> Result { - let Insn::SeekGE { + let (Insn::SeekGE { cursor_id, start_reg, num_regs, target_pc, is_index, - } = insn - else { - unreachable!("unexpected Insn {:?}", insn) - }; - assert!(target_pc.is_offset()); - if *is_index { - let found = { - let mut cursor = state.get_cursor(*cursor_id); - let cursor = cursor.as_btree_mut(); - let record_from_regs = make_record(&state.registers, start_reg, num_regs); - let found = - return_if_io!(cursor.seek(SeekKey::IndexKey(&record_from_regs), SeekOp::GE)); - found - }; - if !found { - state.pc = target_pc.to_offset_int(); - } else { - state.pc += 1; - } - } else { - let pc = { - let mut cursor = state.get_cursor(*cursor_id); - let cursor = cursor.as_btree_mut(); - let rowid = match state.registers[*start_reg].get_owned_value() { - OwnedValue::Null => { - // All integer values are greater than null so we just rewind the cursor - return_if_io!(cursor.rewind()); - None - } - OwnedValue::Integer(rowid) => Some(*rowid as u64), - _ => { - return Err(LimboError::InternalError( - "SeekGE: the value in the register is not an integer".into(), - )); - } - }; - match rowid { - Some(rowid) => { - let found = return_if_io!(cursor.seek(SeekKey::TableRowId(rowid), SeekOp::GE)); - if !found { - target_pc.to_offset_int() - } else { - state.pc + 1 - } - } - None => state.pc + 1, - } - }; - state.pc = pc; } - Ok(InsnFunctionStepResult::Step) -} - -pub fn op_seek_gt( - program: &Program, - state: &mut ProgramState, - insn: &Insn, - pager: &Rc, - mv_store: Option<&Rc>, -) -> Result { - let Insn::SeekGT { + | Insn::SeekGT { cursor_id, start_reg, num_regs, target_pc, is_index, - } = insn + } + | Insn::SeekLE { + cursor_id, + start_reg, + num_regs, + target_pc, + is_index, + } + | Insn::SeekLT { + cursor_id, + start_reg, + num_regs, + target_pc, + is_index, + }) = insn else { unreachable!("unexpected Insn {:?}", insn) }; - assert!(target_pc.is_offset()); + assert!( + target_pc.is_offset(), + "target_pc should be an offset, is: {:?}", + target_pc + ); + let op = match insn { + Insn::SeekGE { .. } => SeekOp::GE, + Insn::SeekGT { .. } => SeekOp::GT, + Insn::SeekLE { .. } => SeekOp::LE, + Insn::SeekLT { .. } => SeekOp::LT, + _ => unreachable!("unexpected Insn {:?}", insn), + }; + let op_name = match op { + SeekOp::GE => "SeekGE", + SeekOp::GT => "SeekGT", + SeekOp::LE => "SeekLE", + SeekOp::LT => "SeekLT", + _ => unreachable!("unexpected SeekOp {:?}", op), + }; if *is_index { let found = { let mut cursor = state.get_cursor(*cursor_id); let cursor = cursor.as_btree_mut(); let record_from_regs = make_record(&state.registers, start_reg, num_regs); - let found = - return_if_io!(cursor.seek(SeekKey::IndexKey(&record_from_regs), SeekOp::GT)); + let found = return_if_io!(cursor.seek(SeekKey::IndexKey(&record_from_regs), op)); found }; if !found { @@ -2002,14 +1974,15 @@ pub fn op_seek_gt( } OwnedValue::Integer(rowid) => Some(*rowid as u64), _ => { - return Err(LimboError::InternalError( - "SeekGT: the value in the register is not an integer".into(), - )); + return Err(LimboError::InternalError(format!( + "{}: the value in the register is not an integer", + op_name + ))); } }; let found = match rowid { Some(rowid) => { - let found = return_if_io!(cursor.seek(SeekKey::TableRowId(rowid), SeekOp::GT)); + let found = return_if_io!(cursor.seek(SeekKey::TableRowId(rowid), op)); if !found { target_pc.to_offset_int() } else { diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index 66c68d9c0..550e6cb5c 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -736,23 +736,35 @@ pub fn insn_to_str( start_reg, num_regs: _, target_pc, - } => ( - "SeekGT", - *cursor_id as i32, - target_pc.to_debug_int(), - *start_reg as i32, - OwnedValue::build_text(""), - 0, - "".to_string(), - ), - Insn::SeekGE { + } + | Insn::SeekGE { + is_index: _, + cursor_id, + start_reg, + num_regs: _, + target_pc, + } + | Insn::SeekLE { + is_index: _, + cursor_id, + start_reg, + num_regs: _, + target_pc, + } + | Insn::SeekLT { is_index: _, cursor_id, start_reg, num_regs: _, target_pc, } => ( - "SeekGE", + match insn { + Insn::SeekGT { .. } => "SeekGT", + Insn::SeekGE { .. } => "SeekGE", + Insn::SeekLE { .. } => "SeekLE", + Insn::SeekLT { .. } => "SeekLT", + _ => unreachable!(), + }, *cursor_id as i32, target_pc.to_debug_int(), *start_reg as i32, @@ -1213,9 +1225,9 @@ pub fn insn_to_str( 0, "".to_string(), ), - Insn::LastAsync { .. } => ( + Insn::LastAsync { cursor_id } => ( "LastAsync", - 0, + *cursor_id as i32, 0, 0, OwnedValue::build_text(""), @@ -1240,27 +1252,27 @@ pub fn insn_to_str( 0, where_clause.clone(), ), - Insn::LastAwait { .. } => ( + Insn::LastAwait { cursor_id, .. } => ( "LastAwait", - 0, + *cursor_id as i32, 0, 0, OwnedValue::build_text(""), 0, "".to_string(), ), - Insn::PrevAsync { .. } => ( + Insn::PrevAsync { cursor_id } => ( "PrevAsync", - 0, + *cursor_id as i32, 0, 0, OwnedValue::build_text(""), 0, "".to_string(), ), - Insn::PrevAwait { .. } => ( + Insn::PrevAwait { cursor_id, .. } => ( "PrevAwait", - 0, + *cursor_id as i32, 0, 0, OwnedValue::build_text(""), diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 7fffb9b22..c02c78d6e 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -501,6 +501,30 @@ pub enum Insn { /// The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. /// If the P1 index entry is greater or equal than the key value then jump to P2. Otherwise fall through to the next instruction. + // If cursor_id refers to an SQL table (B-Tree that uses integer keys), use the value in start_reg as the key. + // If cursor_id refers to an SQL index, then start_reg is the first in an array of num_regs registers that are used as an unpacked index key. + // Seek to the first index entry that is less than or equal to the given key. If not found, jump to the given PC. Otherwise, continue to the next instruction. + SeekLE { + is_index: bool, + cursor_id: CursorID, + start_reg: usize, + num_regs: usize, + target_pc: BranchOffset, + }, + + // If cursor_id refers to an SQL table (B-Tree that uses integer keys), use the value in start_reg as the key. + // If cursor_id refers to an SQL index, then start_reg is the first in an array of num_regs registers that are used as an unpacked index key. + // Seek to the first index entry that is less than the given key. If not found, jump to the given PC. Otherwise, continue to the next instruction. + SeekLT { + is_index: bool, + cursor_id: CursorID, + start_reg: usize, + num_regs: usize, + target_pc: BranchOffset, + }, + + // The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. + // If the P1 index entry is greater or equal than the key value then jump to P2. Otherwise fall through to the next instruction. IdxGE { cursor_id: CursorID, start_reg: usize, @@ -1306,8 +1330,10 @@ impl Insn { Insn::SeekRowid { .. } => execute::op_seek_rowid, Insn::DeferredSeek { .. } => execute::op_deferred_seek, - Insn::SeekGE { .. } => execute::op_seek_ge, - Insn::SeekGT { .. } => execute::op_seek_gt, + Insn::SeekGE { .. } => execute::op_seek, + Insn::SeekGT { .. } => execute::op_seek, + Insn::SeekLE { .. } => execute::op_seek, + Insn::SeekLT { .. } => execute::op_seek, Insn::SeekEnd { .. } => execute::op_seek_end, Insn::IdxGE { .. } => execute::op_idx_ge, Insn::IdxGT { .. } => execute::op_idx_gt, diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 45e656032..cf6918304 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -561,7 +561,10 @@ fn get_indent_count(indent_count: usize, curr_insn: &Insn, prev_insn: Option<&In | Insn::LastAwait { .. } | Insn::SorterSort { .. } | Insn::SeekGE { .. } - | Insn::SeekGT { .. } => indent_count + 1, + | Insn::SeekGT { .. } + | Insn::SeekLE { .. } + | Insn::SeekLT { .. } => indent_count + 1, + _ => indent_count, } } else { From c9190236f031fb3c56dfc61c720174b5c015ca9f Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Tue, 8 Apr 2025 10:31:05 +0300 Subject: [PATCH 101/425] btree: support backwards index seeks and iteration --- core/storage/btree.rs | 461 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 430 insertions(+), 31 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index e29a9f8f7..8943d9e81 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -4,6 +4,7 @@ use crate::storage::pager::Pager; use crate::storage::sqlite3_ondisk::{ read_u32, read_varint, BTreeCell, PageContent, PageType, TableInteriorCell, TableLeafCell, }; +use crate::translate::plan::IterationDirection; use crate::MvCursor; use crate::types::{ @@ -312,6 +313,17 @@ enum OverflowState { Done, } +/// Iteration state of the cursor. Can only be set once. +/// Once a SeekGT or SeekGE is performed, the cursor must iterate forwards and calling prev() is an error. +/// Similarly, once a SeekLT or SeekLE is performed, the cursor must iterate backwards and calling next() is an error. +/// When a SeekEQ or SeekRowid is performed, the cursor is NOT allowed to iterate further. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum IterationState { + Unset, + Iterating(IterationDirection), + IterationNotAllowed, +} + pub struct BTreeCursor { /// The multi-version cursor that is used to read and write to the database file. mv_cursor: Option>>, @@ -337,6 +349,8 @@ pub struct BTreeCursor { /// Reusable immutable record, used to allow better allocation strategy. reusable_immutable_record: RefCell>, empty_record: Cell, + + iteration_state: IterationState, } /// Stack of pages representing the tree traversal order. @@ -385,6 +399,7 @@ impl BTreeCursor { }, reusable_immutable_record: RefCell::new(None), empty_record: Cell::new(true), + iteration_state: IterationState::Unset, } } @@ -404,7 +419,10 @@ impl BTreeCursor { /// Move the cursor to the previous record and return it. /// Used in backwards iteration. - fn get_prev_record(&mut self) -> Result>> { + fn get_prev_record( + &mut self, + predicate: Option<(SeekKey<'_>, SeekOp)>, + ) -> Result>> { loop { let page = self.stack.top(); let cell_idx = self.stack.current_cell_index(); @@ -418,6 +436,7 @@ impl BTreeCursor { break; } if self.stack.has_parent() { + self.going_upwards = true; self.stack.pop(); } else { // moved to begin of btree @@ -442,6 +461,19 @@ impl BTreeCursor { let contents = page.get().contents.as_ref().unwrap(); let cell_count = contents.cell_count(); + + // If we are at the end of the page and we haven't just come back from the right child, + // we now need to move to the rightmost child. + if cell_idx as i32 == i32::MAX && !self.going_upwards { + let rightmost_pointer = contents.rightmost_pointer(); + if let Some(rightmost_pointer) = rightmost_pointer { + self.stack + .push(self.pager.read_page(rightmost_pointer as usize)?); + self.stack.set_cell_index(i32::MAX); + continue; + } + } + let cell_idx = if cell_idx >= cell_count { self.stack.set_cell_index(cell_count as i32 - 1); cell_count - 1 @@ -484,8 +516,127 @@ impl BTreeCursor { self.stack.retreat(); return Ok(CursorResult::Ok(Some(_rowid))); } - BTreeCell::IndexInteriorCell(_) => todo!(), - BTreeCell::IndexLeafCell(_) => todo!(), + BTreeCell::IndexInteriorCell(IndexInteriorCell { + payload, + left_child_page, + first_overflow_page, + payload_size, + }) => { + if !self.going_upwards { + let mem_page = self.pager.read_page(left_child_page as usize)?; + self.stack.push(mem_page); + // use cell_index = i32::MAX to tell next loop to go to the end of the current page + self.stack.set_cell_index(i32::MAX); + continue; + } + if let Some(next_page) = first_overflow_page { + return_if_io!(self.process_overflow_read(payload, next_page, payload_size)) + } else { + crate::storage::sqlite3_ondisk::read_record( + payload, + self.get_immutable_record_or_create().as_mut().unwrap(), + )? + }; + + // Going upwards = we just moved to an interior cell from a leaf. + // On the first pass we must take the record from the interior cell (since unlike table btrees, index interior cells have payloads) + // We then mark going_upwards=false so that we go back down the tree on the next invocation. + self.going_upwards = false; + if predicate.is_none() { + let rowid = match self.get_immutable_record().as_ref().unwrap().last_value() + { + Some(RefValue::Integer(rowid)) => *rowid as u64, + _ => unreachable!("index cells should have an integer rowid"), + }; + return Ok(CursorResult::Ok(Some(rowid))); + } + + let (key, op) = predicate.as_ref().unwrap(); + let SeekKey::IndexKey(index_key) = key else { + unreachable!("index seek key should be a record"); + }; + let order = { + let record = self.get_immutable_record(); + let record = record.as_ref().unwrap(); + let record_values = record.get_values(); + let record_slice_same_num_cols = + &record_values[..index_key.get_values().len()]; + let order = + compare_immutable(record_slice_same_num_cols, index_key.get_values()); + order + }; + + let found = match op { + SeekOp::EQ => order.is_eq(), + SeekOp::LE => order.is_le(), + SeekOp::LT => order.is_lt(), + _ => unreachable!("Seek GT/GE should not happen in get_prev_record() because we are iterating backwards"), + }; + if found { + let rowid = match self.get_immutable_record().as_ref().unwrap().last_value() + { + Some(RefValue::Integer(rowid)) => *rowid as u64, + _ => unreachable!("index cells should have an integer rowid"), + }; + return Ok(CursorResult::Ok(Some(rowid))); + } else { + continue; + } + } + BTreeCell::IndexLeafCell(IndexLeafCell { + payload, + first_overflow_page, + payload_size, + }) => { + if let Some(next_page) = first_overflow_page { + return_if_io!(self.process_overflow_read(payload, next_page, payload_size)) + } else { + crate::storage::sqlite3_ondisk::read_record( + payload, + self.get_immutable_record_or_create().as_mut().unwrap(), + )? + }; + + self.stack.retreat(); + if predicate.is_none() { + let rowid = match self.get_immutable_record().as_ref().unwrap().last_value() + { + Some(RefValue::Integer(rowid)) => *rowid as u64, + _ => unreachable!("index cells should have an integer rowid"), + }; + return Ok(CursorResult::Ok(Some(rowid))); + } + let (key, op) = predicate.as_ref().unwrap(); + let SeekKey::IndexKey(index_key) = key else { + unreachable!("index seek key should be a record"); + }; + let order = { + let record = self.get_immutable_record(); + let record = record.as_ref().unwrap(); + let record_values = record.get_values(); + let record_slice_same_num_cols = + &record_values[..index_key.get_values().len()]; + let order = + compare_immutable(record_slice_same_num_cols, index_key.get_values()); + order + }; + let found = match op { + SeekOp::EQ => order.is_eq(), + SeekOp::LE => order.is_le(), + SeekOp::LT => order.is_lt(), + _ => unreachable!("Seek GT/GE should not happen in get_prev_record() because we are iterating backwards"), + }; + if found { + let rowid = match self.get_immutable_record().as_ref().unwrap().last_value() + { + Some(RefValue::Integer(rowid)) => *rowid as u64, + _ => unreachable!("index cells should have an integer rowid"), + }; + return Ok(CursorResult::Ok(Some(rowid))); + } else { + continue; + } + } } } } @@ -720,6 +871,7 @@ impl BTreeCursor { SeekOp::GT => order.is_gt(), SeekOp::GE => order.is_ge(), SeekOp::EQ => order.is_eq(), + _ => unreachable!("Seek LE/LT should not happen in get_next_record() because we are iterating forwards"), }; if found { let rowid = match self.get_immutable_record().as_ref().unwrap().last_value() @@ -771,6 +923,7 @@ impl BTreeCursor { SeekOp::GT => order.is_lt(), SeekOp::GE => order.is_le(), SeekOp::EQ => order.is_le(), + _ => todo!("not implemented: {:?}", op), }; if found { let rowid = match self.get_immutable_record().as_ref().unwrap().last_value() @@ -792,6 +945,35 @@ impl BTreeCursor { /// or e.g. find the first record greater than the seek key in a range query (e.g. SELECT * FROM table WHERE col > 10). /// We don't include the rowid in the comparison and that's why the last value from the record is not included. fn do_seek(&mut self, key: SeekKey<'_>, op: SeekOp) -> Result>> { + assert!( + self.iteration_state != IterationState::Unset, + "iteration state must have been set before do_seek() is called" + ); + let valid_op = match (self.iteration_state, op) { + (IterationState::Iterating(IterationDirection::Forwards), SeekOp::GE | SeekOp::GT) => { + true + } + (IterationState::Iterating(IterationDirection::Backwards), SeekOp::LE | SeekOp::LT) => { + true + } + (IterationState::IterationNotAllowed, SeekOp::EQ) => true, + _ => false, + }; + assert!( + valid_op, + "invalid seek op for iteration state: {:?} {:?}", + self.iteration_state, op + ); + let cell_iter_dir = match self.iteration_state { + IterationState::Iterating(IterationDirection::Forwards) + | IterationState::IterationNotAllowed => IterationDirection::Forwards, + IterationState::Iterating(IterationDirection::Backwards) => { + IterationDirection::Backwards + } + IterationState::Unset => { + unreachable!("iteration state must have been set before do_seek() is called"); + } + }; return_if_io!(self.move_to(key.clone(), op.clone())); { @@ -800,9 +982,27 @@ impl BTreeCursor { let contents = page.get().contents.as_ref().unwrap(); - for cell_idx in 0..contents.cell_count() { + let cell_count = contents.cell_count(); + let mut cell_idx: isize = if cell_iter_dir == IterationDirection::Forwards { + 0 + } else { + cell_count as isize - 1 + }; + let end = if cell_iter_dir == IterationDirection::Forwards { + cell_count as isize - 1 + } else { + 0 + }; + self.stack.set_cell_index(cell_idx as i32); + while cell_count > 0 + && (if cell_iter_dir == IterationDirection::Forwards { + cell_idx <= end + } else { + cell_idx >= end + }) + { let cell = contents.cell_get( - cell_idx, + cell_idx as usize, payload_overflow_threshold_max( contents.page_type(), self.usable_space() as u16, @@ -827,6 +1027,8 @@ impl BTreeCursor { SeekOp::GT => *cell_rowid > rowid_key, SeekOp::GE => *cell_rowid >= rowid_key, SeekOp::EQ => *cell_rowid == rowid_key, + SeekOp::LE => *cell_rowid <= rowid_key, + SeekOp::LT => *cell_rowid < rowid_key, }; if found { if let Some(next_page) = first_overflow_page { @@ -841,10 +1043,10 @@ impl BTreeCursor { self.get_immutable_record_or_create().as_mut().unwrap(), )? }; - self.stack.advance(); + self.stack.next(cell_iter_dir); return Ok(CursorResult::Ok(Some(*cell_rowid))); } else { - self.stack.advance(); + self.stack.next(cell_iter_dir); } } BTreeCell::IndexLeafCell(IndexLeafCell { @@ -869,14 +1071,17 @@ impl BTreeCursor { }; let record = self.get_immutable_record(); let record = record.as_ref().unwrap(); - let without_rowid = &record.get_values().as_slice()[..record.len() - 1]; - let order = without_rowid.cmp(index_key.get_values()); + let record_slice_equal_number_of_cols = + &record.get_values().as_slice()[..index_key.get_values().len()]; + let order = record_slice_equal_number_of_cols.cmp(index_key.get_values()); let found = match op { SeekOp::GT => order.is_gt(), SeekOp::GE => order.is_ge(), SeekOp::EQ => order.is_eq(), + SeekOp::LE => order.is_le(), + SeekOp::LT => order.is_lt(), }; - self.stack.advance(); + self.stack.next(cell_iter_dir); if found { let rowid = match record.last_value() { Some(RefValue::Integer(rowid)) => *rowid as u64, @@ -889,6 +1094,11 @@ impl BTreeCursor { unreachable!("unexpected cell type: {:?}", cell_type); } } + if cell_iter_dir == IterationDirection::Forwards { + cell_idx += 1; + } else { + cell_idx -= 1; + } } } @@ -909,7 +1119,20 @@ impl BTreeCursor { // if we were to return Ok(CursorResult::Ok((None, None))), self.record would be None, which is incorrect, because we already know // that there is a record with a key greater than K (K' = K+2) in the parent interior cell. Hence, we need to move back up the tree // and get the next matching record from there. - return self.get_next_record(Some((key, op))); + match self.iteration_state { + IterationState::Iterating(IterationDirection::Forwards) => { + return self.get_next_record(Some((key, op))); + } + IterationState::Iterating(IterationDirection::Backwards) => { + return self.get_prev_record(Some((key, op))); + } + IterationState::Unset => { + unreachable!("iteration state must not be unset"); + } + IterationState::IterationNotAllowed => { + unreachable!("iteration state must not be IterationNotAllowed"); + } + } } Ok(CursorResult::Ok(None)) @@ -994,7 +1217,7 @@ impl BTreeCursor { let mut found_cell = false; for cell_idx in 0..contents.cell_count() { - match &contents.cell_get( + let cell = contents.cell_get( cell_idx, payload_overflow_threshold_max( contents.page_type(), @@ -1005,18 +1228,61 @@ impl BTreeCursor { self.usable_space() as u16, ), self.usable_space(), - )? { + )?; + match &cell { BTreeCell::TableInteriorCell(TableInteriorCell { _left_child_page, - _rowid, + _rowid: cell_rowid, }) => { let SeekKey::TableRowId(rowid_key) = key else { unreachable!("table seek key should be a rowid"); }; - let target_leaf_page_is_in_left_subtree = match cmp { - SeekOp::GT => rowid_key < *_rowid, - SeekOp::GE => rowid_key <= *_rowid, - SeekOp::EQ => rowid_key <= *_rowid, + // in sqlite btrees left child pages have <= keys. + // table btrees can have a duplicate rowid in the interior cell, so for example if we are looking for rowid=10, + // and we find an interior cell with rowid=10, we need to move to the left page since (due to the <= rule of sqlite btrees) + // the left page may have a rowid=10. + // Logic table for determining if target leaf page is in left subtree + // + // Forwards iteration (looking for first match in tree): + // OP | Current Cell vs Seek Key | Action? | Explanation + // GT | > | go left | First > key is in left subtree + // GT | = or < | go right | First > key is in right subtree + // GE | > or = | go left | First >= key is in left subtree + // GE | < | go right | First >= key is in right subtree + // + // Backwards iteration (looking for last match in tree): + // OP | Current Cell vs Seek Key | Action? | Explanation + // LE | > or = | go left | Last <= key is in left subtree + // LE | < | go right | Last <= key is in right subtree + // LT | > or = | go left | Last < key is in left subtree + // LT | < | go right | Last < key is in right subtree + // + // No iteration (point query): + // EQ | > or = | go left | Last = key is in left subtree + // EQ | < | go right | Last = key is in right subtree + let target_leaf_page_is_in_left_subtree = match (self.iteration_state, cmp) + { + ( + IterationState::Iterating(IterationDirection::Forwards), + SeekOp::GT, + ) => *cell_rowid > rowid_key, + ( + IterationState::Iterating(IterationDirection::Forwards), + SeekOp::GE, + ) => *cell_rowid >= rowid_key, + ( + IterationState::Iterating(IterationDirection::Backwards), + SeekOp::LE, + ) => *cell_rowid >= rowid_key, + ( + IterationState::Iterating(IterationDirection::Backwards), + SeekOp::LT, + ) => *cell_rowid >= rowid_key, + (_any, SeekOp::EQ) => *cell_rowid >= rowid_key, + _ => unreachable!( + "invalid combination of seek op and iteration state: {:?} {:?}", + cmp, self.iteration_state + ), }; self.stack.advance(); if target_leaf_page_is_in_left_subtree { @@ -1057,16 +1323,75 @@ impl BTreeCursor { self.get_immutable_record_or_create().as_mut().unwrap(), )? }; - let order = compare_immutable( + let record = self.get_immutable_record(); + let record = record.as_ref().unwrap(); + let record_slice_equal_number_of_cols = + &record.get_values().as_slice()[..index_key.get_values().len()]; + let interior_cell_vs_index_key = compare_immutable( + record_slice_equal_number_of_cols, index_key.get_values(), - self.get_immutable_record().as_ref().unwrap().get_values(), ); - let target_leaf_page_is_in_the_left_subtree = match cmp { - SeekOp::GT => order.is_lt(), - SeekOp::GE => order.is_le(), - SeekOp::EQ => order.is_le(), + // in sqlite btrees left child pages have <= keys. + // in general, in forwards iteration we want to find the first key that matches the seek condition. + // in backwards iteration we want to find the last key that matches the seek condition. + // + // Logic table for determining if target leaf page is in left subtree. + // For index b-trees this is a bit more complicated since the interior cells contain payloads (the key is the payload). + // and for non-unique indexes there might be several cells with the same key. + // + // Forwards iteration (looking for first match in tree): + // OP | Current Cell vs Seek Key | Action? | Explanation + // GT | > | go left | First > key could be exactly this one, or in left subtree + // GT | = or < | go right | First > key must be in right subtree + // GE | > | go left | First >= key could be exactly this one, or in left subtree + // GE | = | go left | First >= key could be exactly this one, or in left subtree + // GE | < | go right | First >= key must be in right subtree + // + // Backwards iteration (looking for last match in tree): + // OP | Current Cell vs Seek Key | Action? | Explanation + // LE | > | go left | Last <= key must be in left subtree + // LE | = | go right | Last <= key is either this one, or somewhere to the right of this one. So we need to go right to make sure + // LE | < | go right | Last <= key must be in right subtree + // LT | > | go left | Last < key must be in left subtree + // LT | = | go left | Last < key must be in left subtree since we want strictly less than + // LT | < | go right | Last < key could be exactly this one, or in right subtree + // + // No iteration (point query): + // EQ | > | go left | First = key must be in left subtree + // EQ | = | go left | First = key could be exactly this one, or in left subtree + // EQ | < | go right | First = key must be in right subtree + assert!( + self.iteration_state != IterationState::Unset, + "iteration state must have been set before move_to() is called" + ); + + let target_leaf_page_is_in_left_subtree = match (cmp, self.iteration_state) + { + ( + SeekOp::GT, + IterationState::Iterating(IterationDirection::Forwards), + ) => interior_cell_vs_index_key.is_gt(), + ( + SeekOp::GE, + IterationState::Iterating(IterationDirection::Forwards), + ) => interior_cell_vs_index_key.is_ge(), + (SeekOp::EQ, IterationState::IterationNotAllowed) => { + interior_cell_vs_index_key.is_ge() + } + ( + SeekOp::LE, + IterationState::Iterating(IterationDirection::Backwards), + ) => interior_cell_vs_index_key.is_gt(), + ( + SeekOp::LT, + IterationState::Iterating(IterationDirection::Backwards), + ) => interior_cell_vs_index_key.is_ge(), + _ => unreachable!( + "invalid combination of seek op and iteration state: {:?} {:?}", + cmp, self.iteration_state + ), }; - if target_leaf_page_is_in_the_left_subtree { + if target_leaf_page_is_in_left_subtree { // we don't advance in case of index tree internal nodes because we will visit this node going up let mem_page = self.pager.read_page(*left_child_page as usize)?; self.stack.push(mem_page); @@ -2561,6 +2886,14 @@ impl BTreeCursor { } pub fn rewind(&mut self) -> Result> { + assert!( + matches!( + self.iteration_state, + IterationState::Unset | IterationState::Iterating(IterationDirection::Forwards) + ), + "iteration state must be unset or Iterating(Forwards) when rewind() is called" + ); + self.iteration_state = IterationState::Iterating(IterationDirection::Forwards); if self.mv_cursor.is_some() { let rowid = return_if_io!(self.get_next_record(None)); self.rowid.replace(rowid); @@ -2576,6 +2909,14 @@ impl BTreeCursor { } pub fn last(&mut self) -> Result> { + assert!( + matches!( + self.iteration_state, + IterationState::Unset | IterationState::Iterating(IterationDirection::Backwards) + ), + "iteration state must be unset or Iterating(Backwards) when last() is called" + ); + self.iteration_state = IterationState::Iterating(IterationDirection::Backwards); assert!(self.mv_cursor.is_none()); match self.move_to_rightmost()? { CursorResult::Ok(_) => self.prev(), @@ -2584,6 +2925,13 @@ impl BTreeCursor { } pub fn next(&mut self) -> Result> { + assert!( + matches!( + self.iteration_state, + IterationState::Iterating(IterationDirection::Forwards) + ), + "iteration state must be Iterating(Forwards) when next() is called" + ); let rowid = return_if_io!(self.get_next_record(None)); self.rowid.replace(rowid); self.empty_record.replace(rowid.is_none()); @@ -2591,8 +2939,15 @@ impl BTreeCursor { } pub fn prev(&mut self) -> Result> { + assert!( + matches!( + self.iteration_state, + IterationState::Iterating(IterationDirection::Backwards) + ), + "iteration state must be Iterating(Backwards) when prev() is called" + ); assert!(self.mv_cursor.is_none()); - match self.get_prev_record()? { + match self.get_prev_record(None)? { CursorResult::Ok(rowid) => { self.rowid.replace(rowid); self.empty_record.replace(rowid.is_none()); @@ -2617,6 +2972,38 @@ impl BTreeCursor { pub fn seek(&mut self, key: SeekKey<'_>, op: SeekOp) -> Result> { assert!(self.mv_cursor.is_none()); + match op { + SeekOp::GE | SeekOp::GT => { + if self.iteration_state == IterationState::Unset { + self.iteration_state = IterationState::Iterating(IterationDirection::Forwards); + } else { + assert!(matches!( + self.iteration_state, + IterationState::Iterating(IterationDirection::Forwards) + )); + } + } + SeekOp::LE | SeekOp::LT => { + if self.iteration_state == IterationState::Unset { + self.iteration_state = IterationState::Iterating(IterationDirection::Backwards); + } else { + assert!(matches!( + self.iteration_state, + IterationState::Iterating(IterationDirection::Backwards) + )); + } + } + SeekOp::EQ => { + if self.iteration_state == IterationState::Unset { + self.iteration_state = IterationState::IterationNotAllowed; + } else { + assert!(matches!( + self.iteration_state, + IterationState::IterationNotAllowed + )); + } + } + }; let rowid = return_if_io!(self.do_seek(key, op)); self.rowid.replace(rowid); self.empty_record.replace(rowid.is_none()); @@ -3010,7 +3397,7 @@ impl BTreeCursor { OwnedValue::Integer(i) => i, _ => unreachable!("btree tables are indexed by integers!"), }; - return_if_io!(self.move_to(SeekKey::TableRowId(*int_key as u64), SeekOp::EQ)); + let _ = return_if_io!(self.move_to(SeekKey::TableRowId(*int_key as u64), SeekOp::EQ)); let page = self.stack.top(); // TODO(pere): request load return_if_locked!(page); @@ -3501,6 +3888,18 @@ impl PageStack { self.cell_indices.borrow_mut()[current] -= 1; } + /// Move the cursor to the next cell in the current page according to the iteration direction. + fn next(&self, iteration_direction: IterationDirection) { + match iteration_direction { + IterationDirection::Forwards => { + self.advance(); + } + IterationDirection::Backwards => { + self.retreat(); + } + } + } + fn set_cell_index(&self, idx: i32) { let current = self.current(); self.cell_indices.borrow_mut()[current] = idx @@ -4767,7 +5166,7 @@ mod tests { run_until_done( || { let key = SeekKey::TableRowId(key as u64); - cursor.move_to(key, SeekOp::EQ) + cursor.seek(key, SeekOp::EQ) }, pager.deref(), ) @@ -5683,7 +6082,7 @@ mod tests { run_until_done( || { let key = SeekKey::TableRowId(i as u64); - cursor.move_to(key, SeekOp::EQ) + cursor.seek(key, SeekOp::EQ) }, pager.deref(), ) @@ -5763,7 +6162,7 @@ mod tests { run_until_done( || { let key = SeekKey::TableRowId(i as u64); - cursor.move_to(key, SeekOp::EQ) + cursor.seek(key, SeekOp::EQ) }, pager.deref(), ) @@ -5845,7 +6244,7 @@ mod tests { run_until_done( || { let key = SeekKey::TableRowId(i as u64); - cursor.move_to(key, SeekOp::EQ) + cursor.seek(key, SeekOp::EQ) }, pager.deref(), ) From a706b7160a17a8104de0af7f856c1e5c9164bc02 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Tue, 8 Apr 2025 10:32:31 +0300 Subject: [PATCH 102/425] planner: support index backwards seeks and iteration --- core/translate/delete.rs | 7 +- core/translate/main_loop.rs | 521 ++++++++++++++++++++++++------------ core/translate/plan.rs | 11 +- core/translate/planner.rs | 14 +- core/translate/update.rs | 24 +- 5 files changed, 386 insertions(+), 191 deletions(-) diff --git a/core/translate/delete.rs b/core/translate/delete.rs index 9652048fe..b8b92349d 100644 --- a/core/translate/delete.rs +++ b/core/translate/delete.rs @@ -7,7 +7,7 @@ use crate::vdbe::builder::{ProgramBuilder, ProgramBuilderOpts, QueryMode}; use crate::{schema::Schema, Result, SymbolTable}; use limbo_sqlite3_parser::ast::{Expr, Limit, QualifiedName}; -use super::plan::TableReference; +use super::plan::{IterationDirection, TableReference}; pub fn translate_delete( query_mode: QueryMode, @@ -53,7 +53,10 @@ pub fn prepare_delete_plan( let table_references = vec![TableReference { table, identifier: name, - op: Operation::Scan { iter_dir: None }, + op: Operation::Scan { + iter_dir: IterationDirection::Forwards, + index: None, + }, join_info: None, }]; diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 4773879e5..ab7ae1a0e 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -79,7 +79,7 @@ pub fn init_loop( } } match &table.op { - Operation::Scan { .. } => { + Operation::Scan { index, .. } => { let cursor_id = program.alloc_cursor_id( Some(table.identifier.clone()), match &table.table { @@ -90,6 +90,9 @@ pub fn init_loop( other => panic!("Invalid table reference type in Scan: {:?}", other), }, ); + let index_cursor_id = index.as_ref().map(|i| { + program.alloc_cursor_id(Some(i.name.clone()), CursorType::BTreeIndex(i.clone())) + }); match (mode, &table.table) { (OperationMode::SELECT, Table::BTree(btree)) => { let root_page = btree.root_page; @@ -98,6 +101,13 @@ pub fn init_loop( root_page, }); program.emit_insn(Insn::OpenReadAwait {}); + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn(Insn::OpenReadAsync { + cursor_id: index_cursor_id, + root_page: index.as_ref().unwrap().root_page, + }); + program.emit_insn(Insn::OpenReadAwait {}); + } } (OperationMode::DELETE, Table::BTree(btree)) => { let root_page = btree.root_page; @@ -113,6 +123,12 @@ pub fn init_loop( cursor_id, root_page: root_page.into(), }); + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn(Insn::OpenWriteAsync { + cursor_id: index_cursor_id, + root_page: index.as_ref().unwrap().root_page.into(), + }); + } program.emit_insn(Insn::OpenWriteAwait {}); } (OperationMode::SELECT, Table::Virtual(_)) => { @@ -282,36 +298,35 @@ pub fn open_loop( program.resolve_label(jump_target_when_true, program.offset()); } } - Operation::Scan { iter_dir } => { + Operation::Scan { iter_dir, index } => { let cursor_id = program.resolve_cursor_id(&table.identifier); - + let index_cursor_id = index.as_ref().map(|i| program.resolve_cursor_id(&i.name)); + let iteration_cursor_id = index_cursor_id.unwrap_or(cursor_id); if !matches!(&table.table, Table::Virtual(_)) { - if iter_dir - .as_ref() - .is_some_and(|dir| *dir == IterationDirection::Backwards) - { - program.emit_insn(Insn::LastAsync { cursor_id }); + if *iter_dir == IterationDirection::Backwards { + program.emit_insn(Insn::LastAsync { + cursor_id: iteration_cursor_id, + }); } else { - program.emit_insn(Insn::RewindAsync { cursor_id }); + program.emit_insn(Insn::RewindAsync { + cursor_id: iteration_cursor_id, + }); } } match &table.table { - Table::BTree(_) => program.emit_insn( - if iter_dir - .as_ref() - .is_some_and(|dir| *dir == IterationDirection::Backwards) - { + Table::BTree(_) => { + program.emit_insn(if *iter_dir == IterationDirection::Backwards { Insn::LastAwait { - cursor_id, + cursor_id: iteration_cursor_id, pc_if_empty: loop_end, } } else { Insn::RewindAwait { - cursor_id, + cursor_id: iteration_cursor_id, pc_if_empty: loop_end, } - }, - ), + }) + } Table::Virtual(ref table) => { let start_reg = program .alloc_registers(table.args.as_ref().map(|a| a.len()).unwrap_or(0)); @@ -337,6 +352,13 @@ pub fn open_loop( } program.resolve_label(loop_start, program.offset()); + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn(Insn::DeferredSeek { + index_cursor_id, + table_cursor_id: cursor_id, + }); + } + for cond in predicates .iter() .filter(|cond| cond.should_eval_at_loop(table_index)) @@ -361,139 +383,6 @@ pub fn open_loop( let table_cursor_id = program.resolve_cursor_id(&table.identifier); // Open the loop for the index search. // Rowid equality point lookups are handled with a SeekRowid instruction which does not loop, since it is a single row lookup. - if !matches!(search, Search::RowidEq { .. }) { - let index_cursor_id = if let Search::IndexSearch { index, .. } = search { - Some(program.resolve_cursor_id(&index.name)) - } else { - None - }; - let cmp_reg = program.alloc_register(); - let (cmp_expr, cmp_op) = match search { - Search::IndexSearch { - cmp_expr, cmp_op, .. - } => (cmp_expr, cmp_op), - Search::RowidSearch { cmp_expr, cmp_op } => (cmp_expr, cmp_op), - Search::RowidEq { .. } => unreachable!(), - }; - - // TODO this only handles ascending indexes - match cmp_op { - ast::Operator::Equals - | ast::Operator::Greater - | ast::Operator::GreaterEquals => { - translate_expr( - program, - Some(tables), - &cmp_expr.expr, - cmp_reg, - &t_ctx.resolver, - )?; - } - ast::Operator::Less | ast::Operator::LessEquals => { - program.emit_insn(Insn::Null { - dest: cmp_reg, - dest_end: None, - }); - } - _ => unreachable!(), - } - // If we try to seek to a key that is not present in the table/index, we exit the loop entirely. - program.emit_insn(match cmp_op { - ast::Operator::Equals | ast::Operator::GreaterEquals => Insn::SeekGE { - is_index: index_cursor_id.is_some(), - cursor_id: index_cursor_id.unwrap_or(table_cursor_id), - start_reg: cmp_reg, - num_regs: 1, - target_pc: loop_end, - }, - ast::Operator::Greater - | ast::Operator::Less - | ast::Operator::LessEquals => Insn::SeekGT { - is_index: index_cursor_id.is_some(), - cursor_id: index_cursor_id.unwrap_or(table_cursor_id), - start_reg: cmp_reg, - num_regs: 1, - target_pc: loop_end, - }, - _ => unreachable!(), - }); - if *cmp_op == ast::Operator::Less || *cmp_op == ast::Operator::LessEquals { - translate_expr( - program, - Some(tables), - &cmp_expr.expr, - cmp_reg, - &t_ctx.resolver, - )?; - } - - program.resolve_label(loop_start, program.offset()); - // TODO: We are currently only handling ascending indexes. - // For conditions like index_key > 10, we have already sought to the first key greater than 10, and can just scan forward. - // For conditions like index_key < 10, we are at the beginning of the index, and will scan forward and emit IdxGE(10) with a conditional jump to the end. - // For conditions like index_key = 10, we have already sought to the first key greater than or equal to 10, and can just scan forward and emit IdxGT(10) with a conditional jump to the end. - // For conditions like index_key >= 10, we have already sought to the first key greater than or equal to 10, and can just scan forward. - // For conditions like index_key <= 10, we are at the beginning of the index, and will scan forward and emit IdxGT(10) with a conditional jump to the end. - // For conditions like index_key != 10, TODO. probably the optimal way is not to use an index at all. - // - // For primary key searches we emit RowId and then compare it to the seek value. - - match cmp_op { - ast::Operator::Equals | ast::Operator::LessEquals => { - if let Some(index_cursor_id) = index_cursor_id { - program.emit_insn(Insn::IdxGT { - cursor_id: index_cursor_id, - start_reg: cmp_reg, - num_regs: 1, - target_pc: loop_end, - }); - } else { - let rowid_reg = program.alloc_register(); - program.emit_insn(Insn::RowId { - cursor_id: table_cursor_id, - dest: rowid_reg, - }); - program.emit_insn(Insn::Gt { - lhs: rowid_reg, - rhs: cmp_reg, - target_pc: loop_end, - flags: CmpInsFlags::default(), - }); - } - } - ast::Operator::Less => { - if let Some(index_cursor_id) = index_cursor_id { - program.emit_insn(Insn::IdxGE { - cursor_id: index_cursor_id, - start_reg: cmp_reg, - num_regs: 1, - target_pc: loop_end, - }); - } else { - let rowid_reg = program.alloc_register(); - program.emit_insn(Insn::RowId { - cursor_id: table_cursor_id, - dest: rowid_reg, - }); - program.emit_insn(Insn::Ge { - lhs: rowid_reg, - rhs: cmp_reg, - target_pc: loop_end, - flags: CmpInsFlags::default(), - }); - } - } - _ => {} - } - - if let Some(index_cursor_id) = index_cursor_id { - program.emit_insn(Insn::DeferredSeek { - index_cursor_id, - table_cursor_id, - }); - } - } - if let Search::RowidEq { cmp_expr } = search { let src_reg = program.alloc_register(); translate_expr( @@ -508,7 +397,280 @@ pub fn open_loop( src_reg, target_pc: next, }); + } else { + // Otherwise, it's an index/rowid scan, i.e. first a seek is performed and then a scan until the comparison expression is not satisfied anymore. + let index_cursor_id = if let Search::IndexSearch { index, .. } = search { + Some(program.resolve_cursor_id(&index.name)) + } else { + None + }; + let (cmp_expr, cmp_op, iter_dir) = match search { + Search::IndexSearch { + cmp_expr, + cmp_op, + iter_dir, + .. + } => (cmp_expr, cmp_op, iter_dir), + Search::RowidSearch { + cmp_expr, + cmp_op, + iter_dir, + } => (cmp_expr, cmp_op, iter_dir), + Search::RowidEq { .. } => unreachable!(), + }; + + // There are a few steps in an index seek: + // 1. Emit the comparison expression for the rowid/index seek. For example, if we a clause 'WHERE index_key >= 10', we emit the comparison expression 10 into cmp_reg. + // + // 2. Emit the seek instruction. SeekGE and SeekGT are used in forwards iteration, SeekLT and SeekLE are used in backwards iteration. + // All of the examples below assume an ascending index, because we do not support descending indexes yet. + // If we are scanning the ascending index: + // - Forwards, and have a GT/GE/EQ comparison, the comparison expression from step 1 is used as the value to seek to, because that is the lowest possible value that satisfies the clause. + // - Forwards, and have a LT/LE comparison, NULL is used as the comparison expression because we actually want to start scanning from the beginning of the index. + // - Backwards, and have a GT/GE comparison, no Seek instruction is emitted and we emit LastAsync instead, because we want to start scanning from the end of the index. + // - Backwards, and have a LT/LE/EQ comparison, we emit a Seek instruction with the comparison expression from step 1 as the value to seek to, since that is the highest possible + // value that satisfies the clause. + let seek_cmp_reg = program.alloc_register(); + let mut comparison_expr_translated = false; + match (cmp_op, iter_dir) { + // Forwards, GT/GE/EQ -> use the comparison expression (i.e. seek to the first key where the cmp expr is satisfied, and then scan forwards) + ( + ast::Operator::Equals + | ast::Operator::Greater + | ast::Operator::GreaterEquals, + IterationDirection::Forwards, + ) => { + translate_expr( + program, + Some(tables), + &cmp_expr.expr, + seek_cmp_reg, + &t_ctx.resolver, + )?; + comparison_expr_translated = true; + match cmp_op { + ast::Operator::Equals | ast::Operator::GreaterEquals => { + program.emit_insn(Insn::SeekGE { + is_index: index_cursor_id.is_some(), + cursor_id: index_cursor_id.unwrap_or(table_cursor_id), + start_reg: seek_cmp_reg, + num_regs: 1, + target_pc: loop_end, + }); + } + ast::Operator::Greater => { + program.emit_insn(Insn::SeekGT { + is_index: index_cursor_id.is_some(), + cursor_id: index_cursor_id.unwrap_or(table_cursor_id), + start_reg: seek_cmp_reg, + num_regs: 1, + target_pc: loop_end, + }); + } + _ => unreachable!(), + } + } + // Forwards, LT/LE -> use NULL (i.e. start from the beginning of the index) + ( + ast::Operator::Less | ast::Operator::LessEquals, + IterationDirection::Forwards, + ) => { + program.emit_insn(Insn::Null { + dest: seek_cmp_reg, + dest_end: None, + }); + program.emit_insn(Insn::SeekGT { + is_index: index_cursor_id.is_some(), + cursor_id: index_cursor_id.unwrap_or(table_cursor_id), + start_reg: seek_cmp_reg, + num_regs: 1, + target_pc: loop_end, + }); + } + // Backwards, GT/GE -> no seek, emit LastAsync (i.e. start from the end of the index) + ( + ast::Operator::Greater | ast::Operator::GreaterEquals, + IterationDirection::Backwards, + ) => { + program.emit_insn(Insn::LastAsync { + cursor_id: index_cursor_id.unwrap_or(table_cursor_id), + }); + program.emit_insn(Insn::LastAwait { + cursor_id: index_cursor_id.unwrap_or(table_cursor_id), + pc_if_empty: loop_end, + }); + } + // Backwards, LT/LE/EQ -> use the comparison expression (i.e. seek from the end of the index until the cmp expr is satisfied, and then scan backwards) + ( + ast::Operator::Less | ast::Operator::LessEquals | ast::Operator::Equals, + IterationDirection::Backwards, + ) => { + translate_expr( + program, + Some(tables), + &cmp_expr.expr, + seek_cmp_reg, + &t_ctx.resolver, + )?; + comparison_expr_translated = true; + match cmp_op { + ast::Operator::Less => { + program.emit_insn(Insn::SeekLT { + is_index: index_cursor_id.is_some(), + cursor_id: index_cursor_id.unwrap_or(table_cursor_id), + start_reg: seek_cmp_reg, + num_regs: 1, + target_pc: loop_end, + }); + } + ast::Operator::LessEquals | ast::Operator::Equals => { + program.emit_insn(Insn::SeekLE { + is_index: index_cursor_id.is_some(), + cursor_id: index_cursor_id.unwrap_or(table_cursor_id), + start_reg: seek_cmp_reg, + num_regs: 1, + target_pc: loop_end, + }); + } + _ => unreachable!(), + } + } + _ => unreachable!(), + }; + + program.resolve_label(loop_start, program.offset()); + + let scan_terminating_cmp_reg = if comparison_expr_translated { + seek_cmp_reg + } else { + let reg = program.alloc_register(); + translate_expr( + program, + Some(tables), + &cmp_expr.expr, + reg, + &t_ctx.resolver, + )?; + reg + }; + + // 3. Emit a scan-terminating comparison instruction (IdxGT, IdxGE, IdxLT, IdxLE if index; GT, GE, LT, LE if btree rowid scan). + // Here the comparison expression from step 1 is compared to the current index key and the loop is exited if the comparison is true. + // The comparison operator used in the Idx__ instruction is the inverse of the WHERE clause comparison operator. + // For example, if we are scanning forwards and have a clause 'WHERE index_key < 10', we emit IdxGE(10) since >=10 is the first key where our condition is not satisfied anymore. + match (cmp_op, iter_dir) { + // Forwards, <= -> terminate if > + ( + ast::Operator::Equals | ast::Operator::LessEquals, + IterationDirection::Forwards, + ) => { + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn(Insn::IdxGT { + cursor_id: index_cursor_id, + start_reg: scan_terminating_cmp_reg, + num_regs: 1, + target_pc: loop_end, + }); + } else { + let rowid_reg = program.alloc_register(); + program.emit_insn(Insn::RowId { + cursor_id: table_cursor_id, + dest: rowid_reg, + }); + program.emit_insn(Insn::Gt { + lhs: rowid_reg, + rhs: scan_terminating_cmp_reg, + target_pc: loop_end, + flags: CmpInsFlags::default(), + }); + } + } + // Forwards, < -> terminate if >= + (ast::Operator::Less, IterationDirection::Forwards) => { + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn(Insn::IdxGE { + cursor_id: index_cursor_id, + start_reg: scan_terminating_cmp_reg, + num_regs: 1, + target_pc: loop_end, + }); + } else { + let rowid_reg = program.alloc_register(); + program.emit_insn(Insn::RowId { + cursor_id: table_cursor_id, + dest: rowid_reg, + }); + program.emit_insn(Insn::Ge { + lhs: rowid_reg, + rhs: scan_terminating_cmp_reg, + target_pc: loop_end, + flags: CmpInsFlags::default(), + }); + } + } + // Backwards, >= -> terminate if < + ( + ast::Operator::Equals | ast::Operator::GreaterEquals, + IterationDirection::Backwards, + ) => { + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn(Insn::IdxLT { + cursor_id: index_cursor_id, + start_reg: scan_terminating_cmp_reg, + num_regs: 1, + target_pc: loop_end, + }); + } else { + let rowid_reg = program.alloc_register(); + program.emit_insn(Insn::RowId { + cursor_id: table_cursor_id, + dest: rowid_reg, + }); + program.emit_insn(Insn::Lt { + lhs: rowid_reg, + rhs: scan_terminating_cmp_reg, + target_pc: loop_end, + flags: CmpInsFlags::default(), + }); + } + } + // Backwards, > -> terminate if <= + (ast::Operator::Greater, IterationDirection::Backwards) => { + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn(Insn::IdxLE { + cursor_id: index_cursor_id, + start_reg: scan_terminating_cmp_reg, + num_regs: 1, + target_pc: loop_end, + }); + } else { + let rowid_reg = program.alloc_register(); + program.emit_insn(Insn::RowId { + cursor_id: table_cursor_id, + dest: rowid_reg, + }); + program.emit_insn(Insn::Le { + lhs: rowid_reg, + rhs: scan_terminating_cmp_reg, + target_pc: loop_end, + flags: CmpInsFlags::default(), + }); + } + } + // Forwards, > and >= -> we already did a seek to the first key where the cmp expr is satisfied, so we dont have a terminating condition + // Backwards, < and <= -> we already did a seek to the last key where the cmp expr is satisfied, so we dont have a terminating condition + _ => {} + } + + if let Some(index_cursor_id) = index_cursor_id { + // Don't do a btree table seek until it's actually necessary to read from the table. + program.emit_insn(Insn::DeferredSeek { + index_cursor_id, + table_cursor_id, + }); + } } + for cond in predicates .iter() .filter(|cond| cond.should_eval_at_loop(table_index)) @@ -813,30 +975,33 @@ pub fn close_loop( target_pc: loop_labels.loop_start, }); } - Operation::Scan { iter_dir, .. } => { + Operation::Scan { + index, iter_dir, .. + } => { program.resolve_label(loop_labels.next, program.offset()); + let cursor_id = program.resolve_cursor_id(&table.identifier); + let index_cursor_id = index.as_ref().map(|i| program.resolve_cursor_id(&i.name)); + let iteration_cursor_id = index_cursor_id.unwrap_or(cursor_id); match &table.table { Table::BTree(_) => { - if iter_dir - .as_ref() - .is_some_and(|dir| *dir == IterationDirection::Backwards) - { - program.emit_insn(Insn::PrevAsync { cursor_id }); + if *iter_dir == IterationDirection::Backwards { + program.emit_insn(Insn::PrevAsync { + cursor_id: iteration_cursor_id, + }); } else { - program.emit_insn(Insn::NextAsync { cursor_id }); + program.emit_insn(Insn::NextAsync { + cursor_id: iteration_cursor_id, + }); } - if iter_dir - .as_ref() - .is_some_and(|dir| *dir == IterationDirection::Backwards) - { + if *iter_dir == IterationDirection::Backwards { program.emit_insn(Insn::PrevAwait { - cursor_id, + cursor_id: iteration_cursor_id, pc_if_next: loop_labels.loop_start, }); } else { program.emit_insn(Insn::NextAwait { - cursor_id, + cursor_id: iteration_cursor_id, pc_if_next: loop_labels.loop_start, }); } @@ -854,17 +1019,29 @@ pub fn close_loop( program.resolve_label(loop_labels.next, program.offset()); // Rowid equality point lookups are handled with a SeekRowid instruction which does not loop, so there is no need to emit a NextAsync instruction. if !matches!(search, Search::RowidEq { .. }) { - let cursor_id = match search { - Search::IndexSearch { index, .. } => program.resolve_cursor_id(&index.name), - Search::RowidSearch { .. } => program.resolve_cursor_id(&table.identifier), + let (cursor_id, iter_dir) = match search { + Search::IndexSearch { + index, iter_dir, .. + } => (program.resolve_cursor_id(&index.name), *iter_dir), + Search::RowidSearch { iter_dir, .. } => { + (program.resolve_cursor_id(&table.identifier), *iter_dir) + } Search::RowidEq { .. } => unreachable!(), }; - program.emit_insn(Insn::NextAsync { cursor_id }); - program.emit_insn(Insn::NextAwait { - cursor_id, - pc_if_next: loop_labels.loop_start, - }); + if iter_dir == IterationDirection::Backwards { + program.emit_insn(Insn::PrevAsync { cursor_id }); + program.emit_insn(Insn::PrevAwait { + cursor_id, + pc_if_next: loop_labels.loop_start, + }); + } else { + program.emit_insn(Insn::NextAsync { cursor_id }); + program.emit_insn(Insn::NextAwait { + cursor_id, + pc_if_next: loop_labels.loop_start, + }); + } } } } diff --git a/core/translate/plan.rs b/core/translate/plan.rs index af14f0352..3958f9f81 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -259,12 +259,11 @@ pub struct TableReference { pub enum Operation { // Scan operation // This operation is used to scan a table. - // The iter_dir are uset to indicate the direction of the iterator. - // The use of Option for iter_dir is aimed at implementing a conservative optimization strategy: it only pushes - // iter_dir down to Scan when iter_dir is None, to prevent potential result set errors caused by multiple - // assignments. for more detailed discussions, please refer to https://github.com/tursodatabase/limbo/pull/376 + // The iter_dir is used to indicate the direction of the iterator. Scan { - iter_dir: Option, + iter_dir: IterationDirection, + /// The index that we are using to scan the table, if any. + index: Option>, }, // Search operation // This operation is used to search for a row in a table using an index @@ -337,12 +336,14 @@ pub enum Search { RowidSearch { cmp_op: ast::Operator, cmp_expr: WhereTerm, + iter_dir: IterationDirection, }, /// A secondary index search. Uses bytecode instructions like SeekGE, SeekGT etc. IndexSearch { index: Arc, cmp_op: ast::Operator, cmp_expr: WhereTerm, + iter_dir: IterationDirection, }, } diff --git a/core/translate/planner.rs b/core/translate/planner.rs index 1b78c954c..b7b8745b0 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -1,7 +1,7 @@ use super::{ plan::{ - Aggregate, EvalAt, JoinInfo, Operation, Plan, ResultSetColumn, SelectPlan, SelectQueryType, - TableReference, WhereTerm, + Aggregate, EvalAt, IterationDirection, JoinInfo, Operation, Plan, ResultSetColumn, + SelectPlan, SelectQueryType, TableReference, WhereTerm, }, select::prepare_select_plan, SymbolTable, @@ -320,7 +320,10 @@ fn parse_from_clause_table<'a>( )); }; scope.tables.push(TableReference { - op: Operation::Scan { iter_dir: None }, + op: Operation::Scan { + iter_dir: IterationDirection::Forwards, + index: None, + }, table: tbl_ref, identifier: alias.unwrap_or(normalized_qualified_name), join_info: None, @@ -399,7 +402,10 @@ fn parse_from_clause_table<'a>( .unwrap_or(normalized_name.to_string()); scope.tables.push(TableReference { - op: Operation::Scan { iter_dir: None }, + op: Operation::Scan { + iter_dir: IterationDirection::Forwards, + index: None, + }, join_info: None, table: Table::Virtual(vtab), identifier: alias, diff --git a/core/translate/update.rs b/core/translate/update.rs index f282fff24..347e36acb 100644 --- a/core/translate/update.rs +++ b/core/translate/update.rs @@ -13,7 +13,8 @@ use super::optimizer::optimize_plan; use super::plan::{ Direction, IterationDirection, Plan, ResultSetColumn, TableReference, UpdatePlan, }; -use super::planner::{bind_column_references, parse_limit, parse_where}; +use super::planner::bind_column_references; +use super::planner::{parse_limit, parse_where}; /* * Update is simple. By default we scan the table, and for each row, we check the WHERE @@ -72,18 +73,25 @@ pub fn prepare_update_plan(schema: &Schema, body: &mut Update) -> crate::Result< let Some(btree_table) = table.btree() else { bail_parse_error!("Error: {} is not a btree table", table_name); }; - let iter_dir: Option = body.order_by.as_ref().and_then(|order_by| { - order_by.first().and_then(|ob| { - ob.order.map(|o| match o { - SortOrder::Asc => IterationDirection::Forwards, - SortOrder::Desc => IterationDirection::Backwards, + let iter_dir = body + .order_by + .as_ref() + .and_then(|order_by| { + order_by.first().and_then(|ob| { + ob.order.map(|o| match o { + SortOrder::Asc => IterationDirection::Forwards, + SortOrder::Desc => IterationDirection::Backwards, + }) }) }) - }); + .unwrap_or(IterationDirection::Forwards); let table_references = vec![TableReference { table: Table::BTree(btree_table.clone()), identifier: table_name.0.clone(), - op: Operation::Scan { iter_dir }, + op: Operation::Scan { + iter_dir, + index: None, + }, join_info: None, }]; let set_clauses = body From 024c63f8080b46f846f1f338fbe7d9b497ec2bf5 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Tue, 8 Apr 2025 10:33:01 +0300 Subject: [PATCH 103/425] optimizer: remove ORDER BY if index can be used to satisfy the order --- core/translate/optimizer.rs | 191 +++++++++++++++++++++++------------- 1 file changed, 121 insertions(+), 70 deletions(-) diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 5321e0fa0..dd3455449 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -1,6 +1,6 @@ use std::{collections::HashMap, sync::Arc}; -use limbo_sqlite3_parser::ast; +use limbo_sqlite3_parser::ast::{self, Expr, SortOrder}; use crate::{ schema::{Index, Schema}, @@ -9,8 +9,8 @@ use crate::{ }; use super::plan::{ - DeletePlan, Direction, IterationDirection, Operation, Plan, Search, SelectPlan, TableReference, - UpdatePlan, WhereTerm, + DeletePlan, Direction, GroupBy, IterationDirection, Operation, Plan, Search, SelectPlan, + TableReference, UpdatePlan, WhereTerm, }; pub fn optimize_plan(plan: &mut Plan, schema: &Schema) -> Result<()> { @@ -40,10 +40,10 @@ fn optimize_select_plan(plan: &mut SelectPlan, schema: &Schema) -> Result<()> { &mut plan.table_references, &schema.indexes, &mut plan.where_clause, + &mut plan.order_by, + &plan.group_by, )?; - eliminate_unnecessary_orderby(plan, schema)?; - eliminate_orderby_like_groupby(plan)?; Ok(()) @@ -62,6 +62,8 @@ fn optimize_delete_plan(plan: &mut DeletePlan, schema: &Schema) -> Result<()> { &mut plan.table_references, &schema.indexes, &mut plan.where_clause, + &mut plan.order_by, + &None, )?; Ok(()) @@ -79,6 +81,8 @@ fn optimize_update_plan(plan: &mut UpdatePlan, schema: &Schema) -> Result<()> { &mut plan.table_references, &schema.indexes, &mut plan.where_clause, + &mut plan.order_by, + &None, )?; Ok(()) } @@ -93,33 +97,6 @@ fn optimize_subqueries(plan: &mut SelectPlan, schema: &Schema) -> Result<()> { Ok(()) } -fn query_is_already_ordered_by( - table_references: &[TableReference], - key: &mut ast::Expr, - available_indexes: &HashMap>>, -) -> Result { - let first_table = table_references.first(); - if first_table.is_none() { - return Ok(false); - } - let table_reference = first_table.unwrap(); - match &table_reference.op { - Operation::Scan { .. } => Ok(key.is_rowid_alias_of(0)), - Operation::Search(search) => match search { - Search::RowidEq { .. } => Ok(key.is_rowid_alias_of(0)), - Search::RowidSearch { .. } => Ok(key.is_rowid_alias_of(0)), - Search::IndexSearch { index, .. } => { - let index_rc = key.check_index_scan(0, table_reference, available_indexes)?; - let index_is_the_same = index_rc - .map(|irc| Arc::ptr_eq(index, &irc)) - .unwrap_or(false); - Ok(index_is_the_same) - } - }, - _ => Ok(false), - } -} - fn eliminate_orderby_like_groupby(plan: &mut SelectPlan) -> Result<()> { if plan.order_by.is_none() | plan.group_by.is_none() { return Ok(()); @@ -185,36 +162,117 @@ fn eliminate_orderby_like_groupby(plan: &mut SelectPlan) -> Result<()> { Ok(()) } -fn eliminate_unnecessary_orderby(plan: &mut SelectPlan, schema: &Schema) -> Result<()> { - if plan.order_by.is_none() { +fn eliminate_unnecessary_orderby( + table_references: &mut [TableReference], + available_indexes: &HashMap>>, + order_by: &mut Option>, + group_by: &Option, +) -> Result<()> { + let Some(order) = order_by else { return Ok(()); - } - if plan.table_references.is_empty() { + }; + let Some(first_table_reference) = table_references.first_mut() else { return Ok(()); - } - + }; + let Some(btree_table) = first_table_reference.btree() else { + return Ok(()); + }; // If GROUP BY clause is present, we can't rely on already ordered columns because GROUP BY reorders the data // This early return prevents the elimination of ORDER BY when GROUP BY exists, as sorting must be applied after grouping // And if ORDER BY clause duplicates GROUP BY we handle it later in fn eliminate_orderby_like_groupby - if plan.group_by.is_some() { + if group_by.is_some() { + return Ok(()); + } + let Operation::Scan { + index, iter_dir, .. + } = &mut first_table_reference.op + else { + return Ok(()); + }; + + assert!( + index.is_none(), + "Nothing shouldve transformed the scan to use an index yet" + ); + + // Special case: if ordering by just the rowid, we can remove the ORDER BY clause + if order.len() == 1 && order[0].0.is_rowid_alias_of(0) { + *iter_dir = match order[0].1 { + Direction::Ascending => IterationDirection::Forwards, + Direction::Descending => IterationDirection::Backwards, + }; + *order_by = None; return Ok(()); } - let o = plan.order_by.as_mut().unwrap(); + // Find the best matching index for the ORDER BY columns + let table_name = &btree_table.name; + let mut best_index = (None, 0); - if o.len() != 1 { - // TODO: handle multiple order by keys - return Ok(()); + for (_, indexes) in available_indexes.iter() { + for index_candidate in indexes.iter().filter(|i| &i.table_name == table_name) { + let matching_columns = index_candidate.columns.iter().enumerate().take_while(|(i, c)| { + if let Some((Expr::Column { table, column, .. }, _)) = order.get(*i) { + let col_idx_in_table = btree_table + .columns + .iter() + .position(|tc| tc.name.as_ref() == Some(&c.name)); + matches!(col_idx_in_table, Some(col_idx) if *table == 0 && *column == col_idx) + } else { + false + } + }).count(); + + if matching_columns > best_index.1 { + best_index = (Some(index_candidate), matching_columns); + } + } } - let (key, direction) = o.first_mut().unwrap(); + let Some(matching_index) = best_index.0 else { + return Ok(()); + }; + let match_count = best_index.1; - let already_ordered = - query_is_already_ordered_by(&plan.table_references, key, &schema.indexes)?; + // If we found a matching index, use it for scanning + *index = Some(matching_index.clone()); + // If the order by direction matches the index direction, we can iterate the index in forwards order. + // If they don't, we must iterate the index in backwards order. + let index_direction = &matching_index.columns.first().as_ref().unwrap().order; + *iter_dir = match (index_direction, order[0].1) { + (SortOrder::Asc, Direction::Ascending) | (SortOrder::Desc, Direction::Descending) => { + IterationDirection::Forwards + } + (SortOrder::Asc, Direction::Descending) | (SortOrder::Desc, Direction::Ascending) => { + IterationDirection::Backwards + } + }; - if already_ordered { - push_scan_direction(&mut plan.table_references[0], direction); - plan.order_by = None; + // If the index covers all ORDER BY columns, and one of the following applies: + // - the ORDER BY directions exactly match the index orderings, + // - the ORDER by directions are the exact opposite of the index orderings, + // we can remove the ORDER BY clause. + if match_count == order.len() { + let full_match = { + let mut all_match_forward = true; + let mut all_match_reverse = true; + for (i, (_, direction)) in order.iter().enumerate() { + match (&matching_index.columns[i].order, direction) { + (SortOrder::Asc, Direction::Ascending) + | (SortOrder::Desc, Direction::Descending) => { + all_match_reverse = false; + } + (SortOrder::Asc, Direction::Descending) + | (SortOrder::Desc, Direction::Ascending) => { + all_match_forward = false; + } + } + } + all_match_forward || all_match_reverse + }; + if full_match { + *order_by = None; + } } Ok(()) @@ -222,24 +280,25 @@ fn eliminate_unnecessary_orderby(plan: &mut SelectPlan, schema: &Schema) -> Resu /** * Use indexes where possible. - * Right now we make decisions about using indexes ONLY based on condition expressions, not e.g. ORDER BY or others. - * This is just because we are WIP. * * When this function is called, condition expressions from both the actual WHERE clause and the JOIN clauses are in the where_clause vector. * If we find a condition that can be used to index scan, we pop it off from the where_clause vector and put it into a Search operation. * We put it there simply because it makes it a bit easier to track during translation. + * + * In this function we also try to eliminate ORDER BY clauses if there is an index that satisfies the ORDER BY clause. */ fn use_indexes( table_references: &mut [TableReference], available_indexes: &HashMap>>, where_clause: &mut Vec, + order_by: &mut Option>, + group_by: &Option, ) -> Result<()> { - if where_clause.is_empty() { - return Ok(()); - } - + // Try to use indexes for eliminating ORDER BY clauses + eliminate_unnecessary_orderby(table_references, available_indexes, order_by, group_by)?; + // Try to use indexes for WHERE conditions 'outer: for (table_index, table_reference) in table_references.iter_mut().enumerate() { - if let Operation::Scan { .. } = &mut table_reference.op { + if let Operation::Scan { iter_dir, .. } = &table_reference.op { let mut i = 0; while i < where_clause.len() { let cond = where_clause.get_mut(i).unwrap(); @@ -248,6 +307,7 @@ fn use_indexes( table_index, table_reference, available_indexes, + iter_dir.clone(), )? { where_clause.remove(i); table_reference.op = Operation::Search(index_search); @@ -296,20 +356,6 @@ fn eliminate_constant_conditions( Ok(ConstantConditionEliminationResult::Continue) } -fn push_scan_direction(table: &mut TableReference, direction: &Direction) { - if let Operation::Scan { - ref mut iter_dir, .. - } = table.op - { - if iter_dir.is_none() { - match direction { - Direction::Ascending => *iter_dir = Some(IterationDirection::Forwards), - Direction::Descending => *iter_dir = Some(IterationDirection::Backwards), - } - } - } -} - fn rewrite_exprs_select(plan: &mut SelectPlan) -> Result<()> { for rc in plan.result_columns.iter_mut() { rewrite_expr(&mut rc.expr)?; @@ -611,6 +657,7 @@ pub fn try_extract_index_search_expression( table_index: usize, table_reference: &TableReference, available_indexes: &HashMap>>, + iter_dir: IterationDirection, ) -> Result> { if !cond.should_eval_at_loop(table_index) { return Ok(None); @@ -641,6 +688,7 @@ pub fn try_extract_index_search_expression( from_outer_join: cond.from_outer_join, eval_at: cond.eval_at, }, + iter_dir, })); } _ => {} @@ -671,6 +719,7 @@ pub fn try_extract_index_search_expression( from_outer_join: cond.from_outer_join, eval_at: cond.eval_at, }, + iter_dir, })); } _ => {} @@ -695,6 +744,7 @@ pub fn try_extract_index_search_expression( from_outer_join: cond.from_outer_join, eval_at: cond.eval_at, }, + iter_dir, })); } _ => {} @@ -719,6 +769,7 @@ pub fn try_extract_index_search_expression( from_outer_join: cond.from_outer_join, eval_at: cond.eval_at, }, + iter_dir, })); } _ => {} From fa295af635a68caf3d2a13c235c32dd9c60b77fd Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Tue, 8 Apr 2025 11:05:30 +0300 Subject: [PATCH 104/425] Fix insert fuzz test by bypassing internal invariant --- core/storage/btree.rs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 8943d9e81..39404ec8f 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -318,7 +318,7 @@ enum OverflowState { /// Similarly, once a SeekLT or SeekLE is performed, the cursor must iterate backwards and calling next() is an error. /// When a SeekEQ or SeekRowid is performed, the cursor is NOT allowed to iterate further. #[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum IterationState { +pub enum IterationState { Unset, Iterating(IterationDirection), IterationNotAllowed, @@ -350,7 +350,7 @@ pub struct BTreeCursor { reusable_immutable_record: RefCell>, empty_record: Cell, - iteration_state: IterationState, + pub iteration_state: IterationState, } /// Stack of pages representing the tree traversal order. @@ -2930,7 +2930,8 @@ impl BTreeCursor { self.iteration_state, IterationState::Iterating(IterationDirection::Forwards) ), - "iteration state must be Iterating(Forwards) when next() is called" + "iteration state must be Iterating(Forwards) when next() is called, but it was {:?}", + self.iteration_state ); let rowid = return_if_io!(self.get_next_record(None)); self.rowid.replace(rowid); @@ -5183,6 +5184,8 @@ mod tests { // FIXME: add sorted vector instead, should be okay for small amounts of keys for now :P, too lazy to fix right now keys.sort(); cursor.move_to_root(); + // hack to allow bypassing our internal invariant of not allowing cursor iteration after SeekOp::EQ + cursor.iteration_state = IterationState::Iterating(IterationDirection::Forwards); let mut valid = true; for key in keys.iter() { tracing::trace!("seeking key: {}", key); @@ -5194,6 +5197,7 @@ mod tests { break; } } + cursor.iteration_state = IterationState::Unset; // let's validate btree too so that we undertsand where the btree failed if matches!(validate_btree(pager.clone(), root_page), (_, false)) || !valid { let btree_after = format_btree(pager.clone(), root_page, 0); @@ -5211,6 +5215,8 @@ mod tests { } keys.sort(); cursor.move_to_root(); + // hack to allow bypassing our internal invariant of not allowing cursor iteration after SeekOp::EQ + cursor.iteration_state = IterationState::Iterating(IterationDirection::Forwards); for key in keys.iter() { tracing::trace!("seeking key: {}", key); run_until_done(|| cursor.next(), pager.deref()).unwrap(); From f5220d281df9d8c263720f1a22d471da6203c910 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Tue, 8 Apr 2025 14:57:26 +0300 Subject: [PATCH 105/425] Fix off-by-one logic in btree table traversal --- core/storage/btree.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 39404ec8f..97c1c5126 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1255,7 +1255,7 @@ impl BTreeCursor { // LE | > or = | go left | Last <= key is in left subtree // LE | < | go right | Last <= key is in right subtree // LT | > or = | go left | Last < key is in left subtree - // LT | < | go right | Last < key is in right subtree + // LT | < | go right?| Last < key is in right subtree, except if cell rowid is exactly 1 less // // No iteration (point query): // EQ | > or = | go left | Last = key is in left subtree @@ -1277,7 +1277,7 @@ impl BTreeCursor { ( IterationState::Iterating(IterationDirection::Backwards), SeekOp::LT, - ) => *cell_rowid >= rowid_key, + ) => *cell_rowid >= rowid_key || *cell_rowid == rowid_key - 1, (_any, SeekOp::EQ) => *cell_rowid >= rowid_key, _ => unreachable!( "invalid combination of seek op and iteration state: {:?} {:?}", From d9bae633c06836c8bc70014ef884ee499445a7e9 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Tue, 8 Apr 2025 14:59:39 +0300 Subject: [PATCH 106/425] Add rowid_seek_fuzz() test --- tests/integration/fuzz/mod.rs | 47 +++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/tests/integration/fuzz/mod.rs b/tests/integration/fuzz/mod.rs index eeed31698..5df3b49b4 100644 --- a/tests/integration/fuzz/mod.rs +++ b/tests/integration/fuzz/mod.rs @@ -107,6 +107,53 @@ mod tests { } } + #[test] + pub fn rowid_seek_fuzz() { + let db = TempDatabase::new_with_rusqlite("CREATE TABLE t(x INTEGER PRIMARY KEY)"); // INTEGER PRIMARY KEY is a rowid alias, so an index is not created + let sqlite_conn = rusqlite::Connection::open(db.path.clone()).unwrap(); + + let insert = format!( + "INSERT INTO t VALUES {}", + (1..10000) + .map(|x| format!("({})", x)) + .collect::>() + .join(", ") + ); + sqlite_conn.execute(&insert, params![]).unwrap(); + sqlite_conn.close().unwrap(); + let sqlite_conn = rusqlite::Connection::open(db.path.clone()).unwrap(); + let limbo_conn = db.connect_limbo(); + + const COMPARISONS: [&str; 4] = ["<", "<=", ">", ">="]; + const ORDER_BY: [Option<&str>; 4] = [ + None, + Some("ORDER BY x"), + Some("ORDER BY x DESC"), + Some("ORDER BY x ASC"), + ]; + + for comp in COMPARISONS.iter() { + for order_by in ORDER_BY.iter() { + for max in 0..=10000 { + let query = format!( + "SELECT * FROM t WHERE x {} {} {} LIMIT 3", + comp, + max, + order_by.unwrap_or("") + ); + log::trace!("query: {}", query); + let limbo = limbo_exec_rows(&db, &limbo_conn, &query); + let sqlite = sqlite_exec_rows(&sqlite_conn, &query); + assert_eq!( + limbo, sqlite, + "query: {}, limbo: {:?}, sqlite: {:?}", + query, limbo, sqlite + ); + } + } + } + } + #[test] pub fn index_scan_fuzz() { let db = TempDatabase::new_with_rusqlite("CREATE TABLE t(x PRIMARY KEY)"); From 0bb87b060a87b2b166d956a38e672f4c40b6d572 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Tue, 8 Apr 2025 17:05:52 +0300 Subject: [PATCH 107/425] Fix existing table btree backwards iteration logic --- core/storage/btree.rs | 67 +++++++++++++++++++++++++++++++------------ 1 file changed, 49 insertions(+), 18 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 97c1c5126..028e60206 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -431,8 +431,7 @@ impl BTreeCursor { // todo: find a better way to flag moved to end or begin of page if self.stack.current_cell_index_less_than_min() { loop { - if self.stack.current_cell_index() > 0 { - self.stack.retreat(); + if self.stack.current_cell_index() >= 0 { break; } if self.stack.has_parent() { @@ -448,11 +447,6 @@ impl BTreeCursor { } let cell_idx = cell_idx as usize; - tracing::trace!( - "get_prev_record current id={} cell={}", - page.get().id, - cell_idx - ); return_if_locked!(page); if !page.is_loaded() { self.pager.load_page(page.clone())?; @@ -468,8 +462,7 @@ impl BTreeCursor { let rightmost_pointer = contents.rightmost_pointer(); if let Some(rightmost_pointer) = rightmost_pointer { self.stack - .push(self.pager.read_page(rightmost_pointer as usize)?); - self.stack.set_cell_index(i32::MAX); + .push_backwards(self.pager.read_page(rightmost_pointer as usize)?); continue; } } @@ -480,7 +473,6 @@ impl BTreeCursor { } else { cell_idx }; - let cell = contents.cell_get( cell_idx, payload_overflow_threshold_max(contents.page_type(), self.usable_space() as u16), @@ -494,9 +486,7 @@ impl BTreeCursor { _rowid, }) => { let mem_page = self.pager.read_page(_left_child_page as usize)?; - self.stack.push(mem_page); - // use cell_index = i32::MAX to tell next loop to go to the end of the current page - self.stack.set_cell_index(i32::MAX); + self.stack.push_backwards(mem_page); continue; } BTreeCell::TableLeafCell(TableLeafCell { @@ -523,6 +513,14 @@ impl BTreeCursor { payload_size, }) => { if !self.going_upwards { + // In backwards iteration, if we haven't just moved to this interior node from the + // right child, but instead are about to move to the left child, we need to retreat + // so that we don't come back to this node again. + // For example: + // this parent: key 666 + // left child has: key 663, key 664, key 665 + // we need to move to the previous parent (with e.g. key 662) when iterating backwards. + self.stack.retreat(); let mem_page = self.pager.read_page(left_child_page as usize)?; self.stack.push(mem_page); // use cell_index = i32::MAX to tell next loop to go to the end of the current page @@ -538,7 +536,7 @@ impl BTreeCursor { )? }; - // Going upwards = we just moved to an interior cell from a leaf. + // Going upwards = we just moved to an interior cell from the right child. // On the first pass we must take the record from the interior cell (since unlike table btrees, index interior cells have payloads) // We then mark going_upwards=false so that we go back down the tree on the next invocation. self.going_upwards = false; @@ -1206,6 +1204,13 @@ impl BTreeCursor { // 6. If we find the cell, we return the record. Otherwise, we return an empty result. self.move_to_root(); + let iter_dir = match self.iteration_state { + IterationState::Iterating(IterationDirection::Backwards) => { + IterationDirection::Backwards + } + _ => IterationDirection::Forwards, + }; + loop { let page = self.stack.top(); return_if_locked!(page); @@ -1284,12 +1289,22 @@ impl BTreeCursor { cmp, self.iteration_state ), }; - self.stack.advance(); if target_leaf_page_is_in_left_subtree { + // If we found our target rowid in the left subtree, + // we need to move the parent cell pointer forwards or backwards depending on the iteration direction. + // For example: since the internal node contains the max rowid of the left subtree, we need to move the + // parent pointer backwards in backwards iteration so that we don't come back to the parent again. + // E.g. + // this parent: rowid 666 + // left child has: 664,665,666 + // we need to move to the previous parent (with e.g. rowid 663) when iterating backwards. + self.stack.next(iter_dir); let mem_page = self.pager.read_page(*_left_child_page as usize)?; self.stack.push(mem_page); found_cell = true; break; + } else { + self.stack.advance(); } } BTreeCell::TableLeafCell(TableLeafCell { @@ -1392,7 +1407,15 @@ impl BTreeCursor { ), }; if target_leaf_page_is_in_left_subtree { - // we don't advance in case of index tree internal nodes because we will visit this node going up + // we don't advance in case of forward iteration and index tree internal nodes because we will visit this node going up. + // in backwards iteration, we must retreat because otherwise we would unnecessarily visit this node again. + // Example: + // this parent: key 666, and we found the target key in the left child. + // left child has: key 663, key 664, key 665 + // we need to move to the previous parent (with e.g. key 662) when iterating backwards so that we don't end up back here again. + if iter_dir == IterationDirection::Backwards { + self.stack.retreat(); + } let mem_page = self.pager.read_page(*left_child_page as usize)?; self.stack.push(mem_page); found_cell = true; @@ -3816,7 +3839,7 @@ impl PageStack { } /// Push a new page onto the stack. /// This effectively means traversing to a child page. - fn push(&self, page: PageRef) { + fn _push(&self, page: PageRef, starting_cell_idx: i32) { tracing::trace!( "pagestack::push(current={}, new_page_id={})", self.current_page.get(), @@ -3829,7 +3852,15 @@ impl PageStack { "corrupted database, stack is bigger than expected" ); self.stack.borrow_mut()[current as usize] = Some(page); - self.cell_indices.borrow_mut()[current as usize] = 0; + self.cell_indices.borrow_mut()[current as usize] = starting_cell_idx; + } + + fn push(&self, page: PageRef) { + self._push(page, 0); + } + + fn push_backwards(&self, page: PageRef) { + self._push(page, i32::MAX); } /// Pop a page off the stack. From 3124fca5b72c72d13ea866ab1f6524a3c6482d1a Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Tue, 8 Apr 2025 17:15:57 +0300 Subject: [PATCH 108/425] Dereference instead of explicit clone --- core/translate/optimizer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index dd3455449..772ed81e7 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -307,7 +307,7 @@ fn use_indexes( table_index, table_reference, available_indexes, - iter_dir.clone(), + *iter_dir, )? { where_clause.remove(i); table_reference.op = Operation::Search(index_search); From 5e3a37a1921e74e9dc7f417731f927b9d17d02d2 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Tue, 8 Apr 2025 17:25:07 +0300 Subject: [PATCH 109/425] Try to name iteration direction sensitive method better --- core/storage/btree.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 028e60206..9d487243d 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1041,10 +1041,10 @@ impl BTreeCursor { self.get_immutable_record_or_create().as_mut().unwrap(), )? }; - self.stack.next(cell_iter_dir); + self.stack.next_cell_in_direction(cell_iter_dir); return Ok(CursorResult::Ok(Some(*cell_rowid))); } else { - self.stack.next(cell_iter_dir); + self.stack.next_cell_in_direction(cell_iter_dir); } } BTreeCell::IndexLeafCell(IndexLeafCell { @@ -1079,7 +1079,7 @@ impl BTreeCursor { SeekOp::LE => order.is_le(), SeekOp::LT => order.is_lt(), }; - self.stack.next(cell_iter_dir); + self.stack.next_cell_in_direction(cell_iter_dir); if found { let rowid = match record.last_value() { Some(RefValue::Integer(rowid)) => *rowid as u64, @@ -1298,7 +1298,7 @@ impl BTreeCursor { // this parent: rowid 666 // left child has: 664,665,666 // we need to move to the previous parent (with e.g. rowid 663) when iterating backwards. - self.stack.next(iter_dir); + self.stack.next_cell_in_direction(iter_dir); let mem_page = self.pager.read_page(*_left_child_page as usize)?; self.stack.push(mem_page); found_cell = true; @@ -3921,7 +3921,7 @@ impl PageStack { } /// Move the cursor to the next cell in the current page according to the iteration direction. - fn next(&self, iteration_direction: IterationDirection) { + fn next_cell_in_direction(&self, iteration_direction: IterationDirection) { match iteration_direction { IterationDirection::Forwards => { self.advance(); From 0888c71ba08756ba48cbdf99c550b49fcc2017d8 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Wed, 9 Apr 2025 10:26:02 +0300 Subject: [PATCH 110/425] use seek() instead of do_seek() to set iteration state --- core/storage/btree.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 9d487243d..dac04d960 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -3390,7 +3390,7 @@ impl BTreeCursor { /// Search for a key in an Index Btree. Looking up indexes that need to be unique, we cannot compare the rowid pub fn key_exists_in_index(&mut self, key: &ImmutableRecord) -> Result> { - return_if_io!(self.do_seek(SeekKey::IndexKey(key), SeekOp::GE)); + return_if_io!(self.seek(SeekKey::IndexKey(key), SeekOp::GE)); let record_opt = self.record(); match record_opt.as_ref() { From edc3a420fbeb7f46d2dc789dabdcc28b79d31bd1 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Wed, 9 Apr 2025 11:02:49 +0200 Subject: [PATCH 111/425] comment how page count is decreased while balancing --- core/storage/btree.rs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index befb43189..b843440cc 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1657,7 +1657,6 @@ impl BTreeCursor { while new_page_sizes[i] > usable_space as i64 { let needs_new_page = i + 1 >= sibling_count_new; if needs_new_page { - // FIXME: this doesn't remove pages if not needed sibling_count_new += 1; new_page_sizes.push(0); cell_array @@ -1715,16 +1714,24 @@ impl BTreeCursor { new_page_sizes[i + 1] -= size_of_cell_to_remove_from_right; } - let we_still_need_another_page = + // Check if this page contains up to the last cell. If this happens it means we really just need up to this page. + // Let's update the number of new pages to be up to this page (i+1) + let page_completes_all_cells = cell_array.number_of_cells_per_page[i] >= cell_array.cells.len() as u16; - if we_still_need_another_page { + if page_completes_all_cells { sibling_count_new = i + 1; + break; } i += 1; if i >= sibling_count_new { break; } } + new_page_sizes.truncate(sibling_count_new); + cell_array + .number_of_cells_per_page + .truncate(sibling_count_new); + tracing::debug!( "balance_non_root(sibling_count={}, sibling_count_new={}, cells={})", balance_info.sibling_count, From f1df09ffd9cfda029f05b385eac5d61d6fef93ae Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Wed, 9 Apr 2025 11:05:41 +0200 Subject: [PATCH 112/425] free no longer used pages after balance --- core/storage/btree.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index b843440cc..5030abf03 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -2045,7 +2045,11 @@ impl BTreeCursor { rightmost_pointer, ); // TODO: balance root - // TODO: free pages + // We have to free pages that are not used anymore + for i in sibling_count_new..balance_info.sibling_count { + let page = &balance_info.pages_to_balance[i]; + self.pager.free_page(Some(page.clone()), page.get().id)?; + } (WriteState::BalanceStart, Ok(CursorResult::Ok(()))) } WriteState::Finish => todo!(), From d9453f6e0695f673737303fbd2c025a47aacd194 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Wed, 9 Apr 2025 15:01:18 +0200 Subject: [PATCH 113/425] fix `cell_get_raw_region` length calculation --- core/storage/sqlite3_ondisk.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index d48f5b61b..b8373514f 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -626,9 +626,9 @@ impl PageContent { usable_size, ); if overflows { - 4 + to_read + n_payload + 4 + 4 + to_read + n_payload } else { - 4 + len_payload as usize + n_payload + 4 + 4 + len_payload as usize + n_payload } } PageType::TableInterior => { @@ -644,9 +644,9 @@ impl PageContent { usable_size, ); if overflows { - to_read + n_payload + 4 + to_read + n_payload } else { - len_payload as usize + n_payload + 4 + len_payload as usize + n_payload } } PageType::TableLeaf => { From 12899034c9c814ce01fb0e8cb95c3969a38b5e0f Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Wed, 9 Apr 2025 15:01:40 +0200 Subject: [PATCH 114/425] make insert idx re-entrant --- core/storage/btree.rs | 7 +++++++ core/vdbe/execute.rs | 12 +++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 0446b95ec..cd97412ab 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -3885,6 +3885,13 @@ impl BTreeCursor { fn get_immutable_record(&self) -> std::cell::RefMut<'_, Option> { self.reusable_immutable_record.borrow_mut() } + + pub fn is_write_in_progress(&self) -> bool { + match self.state { + CursorState::Write(_) => true, + _ => false, + } + } } #[cfg(debug_assertions)] diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index b282fa524..b82b13f3b 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -3761,7 +3761,17 @@ pub fn op_idx_insert_async( } else { flags.has(IdxInsertFlags::USE_SEEK) }; - // insert record as key + + // To make this reentrant in case of `moved_before` = false, we need to check if the previous cursor.insert started + // a write/balancing operation. If it did, it means we already moved to the place we wanted. + let moved_before = if cursor.is_write_in_progress() { + true + } else { + moved_before + }; + // Start insertion of row. This might trigger a balance procedure which will take care of moving to different pages, + // therefore, we don't want to seek again if that happens, meaning we don't want to return on io without moving to `Await` opcode + // because it could trigger a movement to child page after a balance root which will leave the current page as the root page. return_if_io!(cursor.insert(&BTreeKey::new_index_key(record), moved_before)); } state.pc += 1; From f2d9e1e8f55d40eb6a7ae916945460e68c129187 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Wed, 9 Apr 2025 15:02:24 +0200 Subject: [PATCH 115/425] fix divider cell in index --- core/storage/btree.rs | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index cd97412ab..9e9d6800c 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1928,7 +1928,7 @@ impl BTreeCursor { if i < balance_info.pages_to_balance.len() - 1 && !leaf_data { // If we are a index page or a interior table page we need to take the divider cell too. // But we don't need the last divider as it will remain the same. - let divider_cell = &mut balance_info.divider_cells[i]; + let mut divider_cell = balance_info.divider_cells[i].as_mut_slice(); // TODO(pere): in case of old pages are leaf pages, so index leaf page, we need to strip page pointers // from divider cells in index interior pages (parent) because those should not be included. cells_inserted += 1; @@ -1936,8 +1936,13 @@ impl BTreeCursor { // This divider cell needs to be updated with new left pointer, let right_pointer = old_page_contents.rightmost_pointer().unwrap(); divider_cell[..4].copy_from_slice(&right_pointer.to_be_bytes()); + } else { + // index leaf + assert!(divider_cell.len() >= 4); + // let's strip the page pointer + divider_cell = &mut divider_cell[4..]; } - cell_array.cells.push(to_static_buf(divider_cell.as_mut())); + cell_array.cells.push(to_static_buf(divider_cell)); } total_cells_inserted += cells_inserted; } @@ -1955,6 +1960,9 @@ impl BTreeCursor { { for cell in &cell_array.cells { cells_debug.push(cell.to_vec()); + if leaf { + assert!(cell[0] != 0) + } } } @@ -2778,7 +2786,17 @@ impl BTreeCursor { valid = false; } } - PageType::IndexLeaf => todo!(), + PageType::IndexLeaf => { + let parent_cell_buf = + &parent_buf[parent_cell_start..parent_cell_start + parent_cell_len]; + if parent_cell_buf[4..] != cell_buf_in_array[..] { + tracing::error!("balance_non_root(cell_divider_cell_index_leaf, page_id={}, cell_divider_idx={})", + page.get().id, + cell_divider_idx, + ); + valid = false; + } + } _ => { unreachable!() } From 6b7575bf3f2b01852ddae77dcce034e1d39df01a Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Wed, 9 Apr 2025 15:03:23 +0200 Subject: [PATCH 116/425] fix tree traversal assumptions on traversal --- core/storage/btree.rs | 46 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 9e9d6800c..95dcad520 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1630,7 +1630,12 @@ impl BTreeCursor { let write_info = self.state.mut_write_info().unwrap(); write_info.state = WriteState::BalanceNonRoot; self.stack.pop(); - self.stack.retreat(); + // with `move_to` we advance the current cell idx of TableInterior once we move to left subtree. + // On the other hand, with IndexInterior, we do not because we tranver in-order. In the latter case + // since we haven't consumed the cell we can avoid retreating the current cell index. + if matches!(current_page.get_contents().page_type(), PageType::TableLeaf) { + self.stack.retreat(); + } return_if_io!(self.balance_non_root()); } WriteState::BalanceNonRoot | WriteState::BalanceNonRootWaitLoadPages => { @@ -1682,7 +1687,12 @@ impl BTreeCursor { parent_contents.overflow_cells.is_empty(), "balancing child page with overflowed parent not yet implemented" ); - assert!(page_to_balance_idx <= parent_contents.cell_count()); + assert!( + page_to_balance_idx <= parent_contents.cell_count(), + "page_to_balance_idx={} is out of bounds for parent cell count {}", + page_to_balance_idx, + number_of_cells_in_parent + ); // As there will be at maximum 3 pages used to balance: // sibling_pointer is the index represeneting one of those 3 pages, and we initialize it to the last possible page. // next_divider is the first divider that contains the first page of the 3 pages. @@ -1813,6 +1823,7 @@ impl BTreeCursor { // Now do real balancing let parent_page = self.stack.top(); let parent_contents = parent_page.get_contents(); + assert!( parent_contents.overflow_cells.is_empty(), "overflow parent not yet implemented" @@ -2259,6 +2270,7 @@ impl BTreeCursor { write_varint_to_vec(rowid, &mut new_divider_cell); } else { // Leaf index + new_divider_cell.extend_from_slice(&(page.get().id as u32).to_be_bytes()); new_divider_cell.extend_from_slice(divider_cell); } @@ -2855,7 +2867,10 @@ impl BTreeCursor { child_buf[0..root_contents.header_size()] .copy_from_slice(&root_buf[offset..offset + root_contents.header_size()]); // Copy overflow cells - child_contents.overflow_cells = root_contents.overflow_cells.clone(); + std::mem::swap( + &mut child_contents.overflow_cells, + &mut root_contents.overflow_cells, + ); // 2. Modify root let new_root_page_type = match root_contents.page_type() { @@ -2878,7 +2893,9 @@ impl BTreeCursor { self.root_page = root.get().id; self.stack.clear(); self.stack.push(root.clone()); - self.stack.advance(); + if matches!(root_contents.page_type(), PageType::TableInterior) { + self.stack.advance(); + } self.stack.push(child.clone()); } @@ -2933,6 +2950,7 @@ impl BTreeCursor { } cell_idx += 1; } + assert!(cell_idx <= cell_count); cell_idx } @@ -4449,10 +4467,21 @@ fn debug_validate_cells_core(page: &PageContent, usable_space: u16) { payload_overflow_threshold_min(page.page_type(), usable_space), usable_space as usize, ); + let buf = &page.as_ptr()[offset..offset + size]; + assert!( + size >= 4, + "cell size should be at least 4 bytes idx={}, cell={:?}, offset={}", + i, + buf, + offset + ); if page.is_leaf() { assert!(page.as_ptr()[offset] != 0); } - assert!(size >= 4, "cell size should be at least 4 bytes idx={}", i); + assert!( + offset + size <= usable_space as usize, + "cell spans out of usable space" + ); } } @@ -4467,6 +4496,7 @@ fn insert_into_cell( cell_idx: usize, usable_space: u16, ) -> Result<()> { + debug_validate_cells!(page, usable_space); assert!( cell_idx <= page.cell_count() + page.overflow_cells.len(), "attempting to add cell to an incorrect place cell_idx={} cell_count={}", @@ -4487,10 +4517,12 @@ fn insert_into_cell( let new_cell_data_pointer = allocate_cell_space(page, payload.len() as u16, usable_space)?; tracing::debug!( - "insert_into_cell(idx={}, pc={})", + "insert_into_cell(idx={}, pc={}, size={})", cell_idx, - new_cell_data_pointer + new_cell_data_pointer, + payload.len() ); + assert!(new_cell_data_pointer + payload.len() as u16 <= usable_space); let buf = page.as_ptr(); // copy data From 7b384f8e5c27e6172dbfb897ff6e814f7f00938b Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Wed, 9 Apr 2025 15:29:06 +0200 Subject: [PATCH 117/425] set iteration_state for insert --- core/storage/btree.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 95dcad520..620dd7662 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -3148,6 +3148,7 @@ impl BTreeCursor { }, None => { if !moved_before { + self.iteration_state = IterationState::Iterating(IterationDirection::Forwards); match key { BTreeKey::IndexKey(_) => { return_if_io!(self From 6a02730c1ac12616c7f0cf671352f6631828d9df Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Wed, 9 Apr 2025 15:56:04 +0200 Subject: [PATCH 118/425] rebase fixes --- core/storage/btree.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 620dd7662..8f4afb090 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -2893,9 +2893,6 @@ impl BTreeCursor { self.root_page = root.get().id; self.stack.clear(); self.stack.push(root.clone()); - if matches!(root_contents.page_type(), PageType::TableInterior) { - self.stack.advance(); - } self.stack.push(child.clone()); } @@ -5324,7 +5321,7 @@ mod tests { run_until_done( || { let key = SeekKey::TableRowId(key as u64); - cursor.seek(key, SeekOp::EQ) + cursor.move_to(key, SeekOp::EQ) }, pager.deref(), ) From 3b98675aa0a711111279725cfeb0c0558c70ca4f Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Wed, 9 Apr 2025 17:02:25 +0300 Subject: [PATCH 119/425] Update COMPAT.md --- COMPAT.md | 1 + 1 file changed, 1 insertion(+) diff --git a/COMPAT.md b/COMPAT.md index f541c1f61..7013b2427 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -58,6 +58,7 @@ Limbo aims to be fully compatible with SQLite, with opt-in features not supporte | COMMIT TRANSACTION | Partial | Transaction names are not supported. | | CREATE INDEX | Yes | | | CREATE TABLE | Partial | | +| CREATE TABLE ... STRICT | Yes | | | CREATE TRIGGER | No | | | CREATE VIEW | No | | | CREATE VIRTUAL TABLE | No | | From dbb346ba2810a3a246aeae2856ea1836c5742296 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Wed, 9 Apr 2025 17:03:53 +0300 Subject: [PATCH 120/425] Update COMPAT.md --- COMPAT.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/COMPAT.md b/COMPAT.md index 7013b2427..5e6dc0499 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -61,7 +61,7 @@ Limbo aims to be fully compatible with SQLite, with opt-in features not supporte | CREATE TABLE ... STRICT | Yes | | | CREATE TRIGGER | No | | | CREATE VIEW | No | | -| CREATE VIRTUAL TABLE | No | | +| CREATE VIRTUAL TABLE | Yes | | | DELETE | Yes | | | DETACH DATABASE | No | | | DROP INDEX | No | | From 5de2d91d04f6ecbe7245d4c3163177c1da28ae81 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Wed, 9 Apr 2025 17:07:24 +0300 Subject: [PATCH 121/425] Update COMPAT.md --- COMPAT.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/COMPAT.md b/COMPAT.md index 5e6dc0499..e85a47725 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -41,7 +41,6 @@ Limbo aims to be fully compatible with SQLite, with opt-in features not supporte * ⛔️ Concurrent access from multiple processes is not supported. * ⛔️ Savepoints are not supported. * ⛔️ Triggers are not supported. -* ⛔️ Indexes are not supported. * ⛔️ Views are not supported. * ⛔️ Vacuum is not supported. @@ -65,7 +64,7 @@ Limbo aims to be fully compatible with SQLite, with opt-in features not supporte | DELETE | Yes | | | DETACH DATABASE | No | | | DROP INDEX | No | | -| DROP TABLE | No | | +| DROP TABLE | Yes | | | DROP TRIGGER | No | | | DROP VIEW | No | | | END TRANSACTION | Partial | Alias for `COMMIT TRANSACTION` | From 2316d7ebf1ee5d682d2332d57141a3596646c242 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Wed, 9 Apr 2025 16:31:08 +0200 Subject: [PATCH 122/425] add .timer command with fine grained statistics about limbo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``` Limbo v0.0.19-pre.4 Enter ".help" for usage hints. limbo> .timer on limbo> select count(1) from users; ┌───────────┐ │ count (1) │ ├───────────┤ │ 10000 │ └───────────┘ Command stats: ---------------------------- total: 35 ms (this includes parsing/coloring of cli app) query execution stats: ---------------------------- Execution: avg=16 us, total=33 us I/O: avg=123 ns, total=3 us limbo> select 1; ┌───┐ │ 1 │ ├───┤ │ 1 │ └───┘ Command stats: ---------------------------- total: 282 us (this includes parsing/coloring of cli app) query execution stats: ---------------------------- Execution: avg=2 us, total=4 us I/O: No samples available ``` --- cli/app.rs | 119 +++++++++++++++++++++++++++++++++++++++++-- cli/commands/args.rs | 12 +++++ cli/commands/mod.rs | 4 +- cli/input.rs | 2 + 4 files changed, 131 insertions(+), 6 deletions(-) diff --git a/cli/app.rs b/cli/app.rs index e5aa851a6..cffe9022f 100644 --- a/cli/app.rs +++ b/cli/app.rs @@ -1,5 +1,9 @@ use crate::{ - commands::{args::EchoMode, import::ImportFile, Command, CommandParser}, + commands::{ + args::{EchoMode, TimerMode}, + import::ImportFile, + Command, CommandParser, + }, helper::LimboHelper, input::{get_io, get_writer, DbLocation, OutputMode, Settings}, opcodes_dictionary::OPCODE_DESCRIPTIONS, @@ -20,6 +24,7 @@ use std::{ atomic::{AtomicUsize, Ordering}, Arc, }, + time::{Duration, Instant}, }; #[derive(Parser)] @@ -68,6 +73,11 @@ pub struct Limbo<'a> { pub rl: &'a mut Editor, } +struct QueryStatistics { + io_time_elapsed_samples: Vec, + execute_time_elapsed_samples: Vec, +} + macro_rules! query_internal { ($self:expr, $query:expr, $body:expr) => {{ let rows = $self.conn.query($query)?; @@ -391,6 +401,11 @@ impl<'a> Limbo<'a> { let _ = self.writeln(input); } + let start = Instant::now(); + let mut stats = QueryStatistics { + io_time_elapsed_samples: vec![], + execute_time_elapsed_samples: vec![], + }; if input.trim_start().starts_with("explain") { if let Ok(Some(stmt)) = self.conn.query(input) { let _ = self.writeln(stmt.explain().as_bytes()); @@ -399,14 +414,59 @@ impl<'a> Limbo<'a> { let conn = self.conn.clone(); let runner = conn.query_runner(input.as_bytes()); for output in runner { - if self.print_query_result(input, output).is_err() { + if self + .print_query_result(input, output, Some(&mut stats)) + .is_err() + { break; } } } + self.print_query_performance_stats(start, stats); self.reset_input(); } + fn print_query_performance_stats(&mut self, start: Instant, stats: QueryStatistics) { + let elapsed_as_str = |duration: Duration| { + if duration.as_secs() >= 1 { + format!("{} s", duration.as_secs_f64()) + } else if duration.as_millis() >= 1 { + format!("{} ms", duration.as_millis() as f64) + } else if duration.as_micros() >= 1 { + format!("{} us", duration.as_micros() as f64) + } else { + format!("{} ns", duration.as_nanos()) + } + }; + let sample_stats_as_str = |name: &str, samples: Vec| { + if samples.is_empty() { + return format!("{}: No samples available", name); + } + let avg_time_spent = samples.iter().sum::() / samples.len() as u32; + let total_time = samples.iter().fold(Duration::ZERO, |acc, x| acc + *x); + format!( + "{}: avg={}, total={}", + name, + elapsed_as_str(avg_time_spent), + elapsed_as_str(total_time), + ) + }; + if self.opts.timer { + let _ = self.writeln("Command stats:\n----------------------------"); + let _ = self.writeln(format!( + "total: {} (this includes parsing/coloring of cli app)\n", + elapsed_as_str(start.elapsed()) + )); + + let _ = self.writeln("query execution stats:\n----------------------------"); + let _ = self.writeln(sample_stats_as_str( + "Execution", + stats.execute_time_elapsed_samples, + )); + let _ = self.writeln(sample_stats_as_str("I/O", stats.io_time_elapsed_samples)); + } + } + fn reset_line(&mut self, line: &str) -> rustyline::Result<()> { self.rl.add_history_entry(line.to_owned())?; self.interrupt_count.store(0, Ordering::SeqCst); @@ -436,7 +496,7 @@ impl<'a> Limbo<'a> { let conn = self.conn.clone(); let runner = conn.query_runner(after_comment.as_bytes()); for output in runner { - if let Err(e) = self.print_query_result(after_comment, output) { + if let Err(e) = self.print_query_result(after_comment, output, None) { let _ = self.writeln(e.to_string()); } } @@ -565,6 +625,12 @@ impl<'a> Limbo<'a> { let _ = self.writeln(v); }); } + Command::Timer(timer_mode) => { + self.opts.timer = match timer_mode.mode { + TimerMode::On => true, + TimerMode::Off => false, + }; + } }, } } @@ -573,6 +639,7 @@ impl<'a> Limbo<'a> { &mut self, sql: &str, mut output: Result, LimboError>, + mut statistics: Option<&mut QueryStatistics>, ) -> anyhow::Result<()> { match output { Ok(Some(ref mut rows)) => match self.opts.output_mode { @@ -582,8 +649,13 @@ impl<'a> Limbo<'a> { return Ok(()); } + let start = Instant::now(); + match rows.step() { Ok(StepResult::Row) => { + if let Some(ref mut stats) = statistics { + stats.execute_time_elapsed_samples.push(start.elapsed()); + } let row = rows.row().unwrap(); for (i, value) in row.get_values().enumerate() { if i > 0 { @@ -598,17 +670,30 @@ impl<'a> Limbo<'a> { let _ = self.writeln(""); } Ok(StepResult::IO) => { + let start = Instant::now(); self.io.run_once()?; + if let Some(ref mut stats) = statistics { + stats.io_time_elapsed_samples.push(start.elapsed()); + } } Ok(StepResult::Interrupt) => break, Ok(StepResult::Done) => { + if let Some(ref mut stats) = statistics { + stats.execute_time_elapsed_samples.push(start.elapsed()); + } break; } Ok(StepResult::Busy) => { + if let Some(ref mut stats) = statistics { + stats.execute_time_elapsed_samples.push(start.elapsed()); + } let _ = self.writeln("database is busy"); break; } Err(err) => { + if let Some(ref mut stats) = statistics { + stats.execute_time_elapsed_samples.push(start.elapsed()); + } let _ = self.writeln(err.to_string()); break; } @@ -636,8 +721,12 @@ impl<'a> Limbo<'a> { table.set_header(header); } loop { + let start = Instant::now(); match rows.step() { Ok(StepResult::Row) => { + if let Some(ref mut stats) = statistics { + stats.execute_time_elapsed_samples.push(start.elapsed()); + } let record = rows.row().unwrap(); let mut row = Row::new(); row.max_height(1); @@ -668,15 +757,35 @@ impl<'a> Limbo<'a> { table.add_row(row); } Ok(StepResult::IO) => { + let start = Instant::now(); self.io.run_once()?; + if let Some(ref mut stats) = statistics { + stats.io_time_elapsed_samples.push(start.elapsed()); + } + } + Ok(StepResult::Interrupt) => { + if let Some(ref mut stats) = statistics { + stats.execute_time_elapsed_samples.push(start.elapsed()); + } + break; + } + Ok(StepResult::Done) => { + if let Some(ref mut stats) = statistics { + stats.execute_time_elapsed_samples.push(start.elapsed()); + } + break; } - Ok(StepResult::Interrupt) => break, - Ok(StepResult::Done) => break, Ok(StepResult::Busy) => { + if let Some(ref mut stats) = statistics { + stats.execute_time_elapsed_samples.push(start.elapsed()); + } let _ = self.writeln("database is busy"); break; } Err(err) => { + if let Some(ref mut stats) = statistics { + stats.execute_time_elapsed_samples.push(start.elapsed()); + } let _ = self.write_fmt(format_args!( "{:?}", miette::Error::from(err).with_source_code(sql.to_owned()) diff --git a/cli/commands/args.rs b/cli/commands/args.rs index 3bb78d0b8..750895049 100644 --- a/cli/commands/args.rs +++ b/cli/commands/args.rs @@ -106,3 +106,15 @@ pub struct LoadExtensionArgs { #[arg(add = ArgValueCompleter::new(PathCompleter::file()))] pub path: String, } + +#[derive(Debug, ValueEnum, Clone)] +pub enum TimerMode { + On, + Off, +} + +#[derive(Debug, Clone, Args)] +pub struct TimerArgs { + #[arg(value_enum)] + pub mode: TimerMode, +} diff --git a/cli/commands/mod.rs b/cli/commands/mod.rs index 757cee530..e01828517 100644 --- a/cli/commands/mod.rs +++ b/cli/commands/mod.rs @@ -3,7 +3,7 @@ pub mod import; use args::{ CwdArgs, EchoArgs, ExitArgs, LoadExtensionArgs, NullValueArgs, OpcodesArgs, OpenArgs, - OutputModeArgs, SchemaArgs, SetOutputArgs, TablesArgs, + OutputModeArgs, SchemaArgs, SetOutputArgs, TablesArgs, TimerArgs, }; use clap::Parser; use import::ImportArgs; @@ -72,6 +72,8 @@ pub enum Command { /// List vfs modules available #[command(name = "vfslist", display_name = ".vfslist")] ListVfs, + #[command(name = "timer", display_name = ".timer")] + Timer(TimerArgs), } const _HELP_TEMPLATE: &str = "{before-help}{name} diff --git a/cli/input.rs b/cli/input.rs index 7b505a99f..e352899c9 100644 --- a/cli/input.rs +++ b/cli/input.rs @@ -82,6 +82,7 @@ pub struct Settings { pub is_stdout: bool, pub io: Io, pub tracing_output: Option, + pub timer: bool, } impl From for Settings { @@ -105,6 +106,7 @@ impl From for Settings { vfs => Io::External(vfs.to_string()), }, tracing_output: opts.tracing_output, + timer: false, } } } From 2d7a27fbfa557cc315b8e85149d874c9c4be0a3c Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Mon, 24 Mar 2025 22:03:32 -0400 Subject: [PATCH 123/425] Prevent panic in extension by out of bounds cursor idx --- extensions/tests/src/lib.rs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/extensions/tests/src/lib.rs b/extensions/tests/src/lib.rs index 92e4f874f..df8e8bca0 100644 --- a/extensions/tests/src/lib.rs +++ b/extensions/tests/src/lib.rs @@ -112,11 +112,14 @@ impl VTabModule for KVStoreVTab { if cursor.index.is_some_and(|c| c >= cursor.rows.len()) { return Err("cursor out of range".into()); } - let (_, ref key, ref val) = cursor.rows[cursor.index.unwrap_or(0)]; - match idx { - 0 => Ok(Value::from_text(key.clone())), // key - 1 => Ok(Value::from_text(val.clone())), // value - _ => Err("Invalid column".into()), + if let Some((_, ref key, ref val)) = cursor.rows.get(cursor.index.unwrap_or(0)) { + match idx { + 0 => Ok(Value::from_text(key.clone())), // key + 1 => Ok(Value::from_text(val.clone())), // value + _ => Err("Invalid column".into()), + } + } else { + Err("cursor out of range".into()) } } } From b685086cadca38f1c83151237ab68cf394a8c096 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Mon, 24 Mar 2025 22:14:48 -0400 Subject: [PATCH 124/425] Support UPDATE for virtual tables --- core/translate/emitter.rs | 85 +++++++++++++++------ core/translate/main_loop.rs | 28 ++----- core/translate/update.rs | 145 +++++++++++++++++++++++++++++++++++- 3 files changed, 210 insertions(+), 48 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 1ecc16bff..215c6b3ac 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -6,6 +6,7 @@ use std::rc::Rc; use limbo_sqlite3_parser::ast::{self}; use crate::function::Func; +use crate::schema::Table; use crate::translate::plan::{DeletePlan, Plan, Search}; use crate::util::exprs_are_equivalent; use crate::vdbe::builder::ProgramBuilder; @@ -600,20 +601,41 @@ fn emit_update_insns( if table_column.primary_key { program.emit_null(dest, None); } else { - program.emit_insn(Insn::Column { - cursor_id: *index - .as_ref() - .and_then(|(_, id)| { - if column_idx_in_index.is_some() { - Some(id) - } else { - None - } - }) - .unwrap_or(&cursor_id), - column: column_idx_in_index.unwrap_or(idx), - dest, - }); + match &table_ref.table { + Table::BTree(_) => { + program.emit_insn(Insn::Column { + cursor_id: *index + .as_ref() + .and_then(|(_, id)| { + if column_idx_in_index.is_some() { + Some(id) + } else { + None + } + }) + .unwrap_or(&cursor_id), + column: column_idx_in_index.unwrap_or(idx), + dest, + }); + } + Table::Virtual(_) => { + program.emit_insn(Insn::VColumn { + cursor_id: *index + .as_ref() + .and_then(|(_, id)| { + if column_idx_in_index.is_some() { + Some(id) + } else { + None + } + }) + .unwrap_or(&cursor_id), + column: column_idx_in_index.unwrap_or(idx), + dest, + }); + } + typ => unreachable!("query plan generated on unexpected table type {:?}", typ), + } } } } @@ -633,13 +655,34 @@ fn emit_update_insns( count: table_ref.columns().len(), dest_reg: record_reg, }); - program.emit_insn(Insn::InsertAsync { - cursor: cursor_id, - key_reg: rowid_reg, - record_reg, - flag: 0, - }); - program.emit_insn(Insn::InsertAwait { cursor_id }); + match &table_ref.table { + Table::BTree(_) => { + program.emit_insn(Insn::InsertAsync { + cursor: cursor_id, + key_reg: rowid_reg, + record_reg, + flag: 0, + }); + program.emit_insn(Insn::InsertAwait { cursor_id }); + } + Table::Virtual(vtab) => { + let new_rowid = program.alloc_register(); + program.emit_insn(Insn::Copy { + src_reg: rowid_reg, + dst_reg: new_rowid, + amount: 0, + }); + let arg_count = table_ref.columns().len() + 2; + program.emit_insn(Insn::VUpdate { + cursor_id, + arg_count, + start_reg: record_reg, + vtab_ptr: vtab.implementation.as_ref().ctx as usize, + conflict_action: 0u16, + }); + } + _ => unreachable!("unexpected table type"), + } if let Some(limit_reg) = t_ctx.reg_limit { program.emit_insn(Insn::DecrJumpZero { diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index ab7ae1a0e..9575eb900 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -109,7 +109,7 @@ pub fn init_loop( program.emit_insn(Insn::OpenReadAwait {}); } } - (OperationMode::DELETE, Table::BTree(btree)) => { + (OperationMode::DELETE | OperationMode::UPDATE, Table::BTree(btree)) => { let root_page = btree.root_page; program.emit_insn(Insn::OpenWriteAsync { cursor_id, @@ -131,11 +131,7 @@ pub fn init_loop( } program.emit_insn(Insn::OpenWriteAwait {}); } - (OperationMode::SELECT, Table::Virtual(_)) => { - program.emit_insn(Insn::VOpenAsync { cursor_id }); - program.emit_insn(Insn::VOpenAwait {}); - } - (OperationMode::DELETE, Table::Virtual(_)) => { + (_, Table::Virtual(_)) => { program.emit_insn(Insn::VOpenAsync { cursor_id }); program.emit_insn(Insn::VOpenAwait {}); } @@ -158,14 +154,7 @@ pub fn init_loop( }); program.emit_insn(Insn::OpenReadAwait {}); } - OperationMode::DELETE => { - program.emit_insn(Insn::OpenWriteAsync { - cursor_id: table_cursor_id, - root_page: table.table.get_root_page().into(), - }); - program.emit_insn(Insn::OpenWriteAwait {}); - } - OperationMode::UPDATE => { + OperationMode::DELETE | OperationMode::UPDATE => { program.emit_insn(Insn::OpenWriteAsync { cursor_id: table_cursor_id, root_page: table.table.get_root_page().into(), @@ -191,17 +180,10 @@ pub fn init_loop( }); program.emit_insn(Insn::OpenReadAwait); } - OperationMode::DELETE => { + OperationMode::UPDATE | OperationMode::DELETE => { program.emit_insn(Insn::OpenWriteAsync { cursor_id: index_cursor_id, - root_page: index.root_page.into(), - }); - program.emit_insn(Insn::OpenWriteAwait {}); - } - OperationMode::UPDATE => { - program.emit_insn(Insn::OpenWriteAsync { - cursor_id: index_cursor_id, - root_page: index.root_page.into(), + root_page: index.root_page, }); program.emit_insn(Insn::OpenWriteAwait {}); } diff --git a/core/translate/update.rs b/core/translate/update.rs index 347e36acb..de1a568a8 100644 --- a/core/translate/update.rs +++ b/core/translate/update.rs @@ -1,4 +1,7 @@ +use std::sync::Arc; + use crate::translate::plan::Operation; +use crate::vdbe::BranchOffset; use crate::{ bail_parse_error, schema::{Schema, Table}, @@ -8,7 +11,7 @@ use crate::{ }; use limbo_sqlite3_parser::ast::{self, Expr, ResultColumn, SortOrder, Update}; -use super::emitter::emit_program; +use super::emitter::{emit_program, Resolver}; use super::optimizer::optimize_plan; use super::plan::{ Direction, IterationDirection, Plan, ResultSetColumn, TableReference, UpdatePlan, @@ -53,6 +56,7 @@ pub fn translate_update( ) -> crate::Result { let mut plan = prepare_update_plan(schema, body)?; optimize_plan(&mut plan, schema)?; + let resolver = Resolver::new(syms); // TODO: freestyling these numbers let mut program = ProgramBuilder::new(ProgramBuilderOpts { query_mode, @@ -65,6 +69,12 @@ pub fn translate_update( } pub fn prepare_update_plan(schema: &Schema, body: &mut Update) -> crate::Result { + if body.with.is_some() { + bail_parse_error!("WITH clause is not supported"); + } + if body.or_conflict.is_some() { + bail_parse_error!("ON CONFLICT clause is not supported"); + } let table_name = &body.tbl_name.name; let table = match schema.get_table(table_name.0.as_str()) { Some(table) => table, @@ -86,7 +96,11 @@ pub fn prepare_update_plan(schema: &Schema, body: &mut Update) -> crate::Result< }) .unwrap_or(IterationDirection::Forwards); let table_references = vec![TableReference { - table: Table::BTree(btree_table.clone()), + table: match table.as_ref() { + Table::Virtual(vtab) => Table::Virtual(vtab.clone()), + Table::BTree(btree_table) => Table::BTree(btree_table.clone()), + _ => unreachable!(), + }, identifier: table_name.0.clone(), op: Operation::Scan { iter_dir, @@ -99,8 +113,8 @@ pub fn prepare_update_plan(schema: &Schema, body: &mut Update) -> crate::Result< .iter_mut() .map(|set| { let ident = normalize_ident(set.col_names[0].0.as_str()); - let col_index = btree_table - .columns + let col_index = table + .columns() .iter() .enumerate() .find_map(|(i, col)| { @@ -185,3 +199,126 @@ pub fn prepare_update_plan(schema: &Schema, body: &mut Update) -> crate::Result< contains_constant_false_condition: false, })) } + +// fn translate_vtab_update( +// mut program: ProgramBuilder, +// body: &mut Update, +// table: Arc, +// resolver: &Resolver, +// ) -> crate::Result { +// let start_label = program.allocate_label(); +// program.emit_insn(Insn::Init { +// target_pc: start_label, +// }); +// let start_offset = program.offset(); +// let vtab = table.virtual_table().unwrap(); +// let cursor_id = program.alloc_cursor_id( +// Some(table.get_name().to_string()), +// CursorType::VirtualTable(vtab.clone()), +// ); +// let referenced_tables = vec![TableReference { +// table: Table::Virtual(table.virtual_table().unwrap().clone()), +// identifier: table.get_name().to_string(), +// op: Operation::Scan { iter_dir: None }, +// join_info: None, +// }]; +// program.emit_insn(Insn::VOpenAsync { cursor_id }); +// program.emit_insn(Insn::VOpenAwait {}); +// +// let argv_start = program.alloc_registers(0); +// let end_label = program.allocate_label(); +// let skip_label = program.allocate_label(); +// program.emit_insn(Insn::VFilter { +// cursor_id, +// pc_if_empty: end_label, +// args_reg: argv_start, +// arg_count: 0, +// }); +// +// let loop_start = program.offset(); +// let start_reg = program.alloc_registers(2 + table.columns().len()); +// let old_rowid = start_reg; +// let new_rowid = start_reg + 1; +// let column_regs = start_reg + 2; +// +// program.emit_insn(Insn::RowId { +// cursor_id, +// dest: old_rowid, +// }); +// program.emit_insn(Insn::RowId { +// cursor_id, +// dest: new_rowid, +// }); +// +// for (i, _) in table.columns().iter().enumerate() { +// let dest = column_regs + i; +// program.emit_insn(Insn::VColumn { +// cursor_id, +// column: i, +// dest, +// }); +// } +// +// if let Some(ref mut where_clause) = body.where_clause { +// bind_column_references(where_clause, &referenced_tables, None)?; +// translate_condition_expr( +// &mut program, +// &referenced_tables, +// where_clause, +// ConditionMetadata { +// jump_if_condition_is_true: false, +// jump_target_when_true: BranchOffset::Placeholder, +// jump_target_when_false: skip_label, +// }, +// resolver, +// )?; +// } +// // prepare updated columns in place +// for expr in body.sets.iter() { +// let Some(col_index) = table.columns().iter().position(|t| { +// t.name +// .as_ref() +// .unwrap() +// .eq_ignore_ascii_case(&expr.col_names[0].0) +// }) else { +// bail_parse_error!("column {} not found", expr.col_names[0].0); +// }; +// translate_expr( +// &mut program, +// Some(&referenced_tables), +// &expr.expr, +// column_regs + col_index, +// resolver, +// )?; +// } +// +// let arg_count = 2 + table.columns().len(); +// program.emit_insn(Insn::VUpdate { +// cursor_id, +// arg_count, +// start_reg: old_rowid, +// vtab_ptr: vtab.implementation.ctx as usize, +// conflict_action: 0, +// }); +// +// program.resolve_label(skip_label, program.offset()); +// program.emit_insn(Insn::VNext { +// cursor_id, +// pc_if_next: loop_start, +// }); +// +// program.resolve_label(end_label, program.offset()); +// program.emit_insn(Insn::Halt { +// err_code: 0, +// description: String::new(), +// }); +// program.resolve_label(start_label, program.offset()); +// program.emit_insn(Insn::Transaction { write: true }); +// +// program.emit_constant_insns(); +// program.emit_insn(Insn::Goto { +// target_pc: start_offset, +// }); +// program.table_references = referenced_tables.clone(); +// Ok(program) +// } From 7993857020529f0a108e5e02c0ecfd82d857b8c2 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Mon, 24 Mar 2025 22:29:26 -0400 Subject: [PATCH 125/425] Add py tests for vtab update behavior --- testing/cli_tests/extensions.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/testing/cli_tests/extensions.py b/testing/cli_tests/extensions.py index ac870ee4d..4d289f311 100755 --- a/testing/cli_tests/extensions.py +++ b/testing/cli_tests/extensions.py @@ -398,10 +398,35 @@ def test_kv(): limbo.run_test_fn( "select count(*) from t;", lambda res: "100" == res, "can insert 100 rows" ) + limbo.run_test_fn("update t set value = 'updated' where key = 'key33';", null) + limbo.run_test_fn( + "select * from t where key = 'key33';", + lambda res: res == "key33|updated", + "can update single row", + ) + limbo.run_test_fn( + "select COUNT(*) from t where value = 'updated';", + lambda res: res == "1", + "only updated a single row", + ) + limbo.run_test_fn("update t set value = 'updated2';", null) + limbo.run_test_fn( + "select COUNT(*) from t where value = 'updated2';", + lambda res: res == "100", + "can update all rows", + ) limbo.run_test_fn("delete from t limit 96;", null, "can delete 96 rows") limbo.run_test_fn( "select count(*) from t;", lambda res: "4" == res, "four rows remain" ) + limbo.run_test_fn( + "update t set key = '100' where 1;", null, "where clause evaluates properly" + ) + limbo.run_test_fn( + "select * from t where key = '100';", + lambda res: res == "100|updated2", + "there is only 1 key remaining after setting all keys to same value", + ) limbo.quit() From 0ffecb3021addea05db1a8af937a6a550ab6a761 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Tue, 25 Mar 2025 10:47:05 -0400 Subject: [PATCH 126/425] Add comments to document update on vtabs --- core/translate/update.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/core/translate/update.rs b/core/translate/update.rs index de1a568a8..296d8a6ff 100644 --- a/core/translate/update.rs +++ b/core/translate/update.rs @@ -1,7 +1,4 @@ -use std::sync::Arc; - use crate::translate::plan::Operation; -use crate::vdbe::BranchOffset; use crate::{ bail_parse_error, schema::{Schema, Table}, @@ -11,7 +8,7 @@ use crate::{ }; use limbo_sqlite3_parser::ast::{self, Expr, ResultColumn, SortOrder, Update}; -use super::emitter::{emit_program, Resolver}; +use super::emitter::emit_program; use super::optimizer::optimize_plan; use super::plan::{ Direction, IterationDirection, Plan, ResultSetColumn, TableReference, UpdatePlan, @@ -56,7 +53,6 @@ pub fn translate_update( ) -> crate::Result { let mut plan = prepare_update_plan(schema, body)?; optimize_plan(&mut plan, schema)?; - let resolver = Resolver::new(syms); // TODO: freestyling these numbers let mut program = ProgramBuilder::new(ProgramBuilderOpts { query_mode, From 62d1447cd681dd67f3571ca1225e97857103d9de Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Wed, 2 Apr 2025 23:48:14 -0400 Subject: [PATCH 127/425] Adapt query plan to handle vatbs for updates --- core/translate/emitter.rs | 183 ++++++++++++++++++++------------------ core/translate/update.rs | 123 ------------------------- core/vdbe/execute.rs | 13 ++- 3 files changed, 103 insertions(+), 216 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 215c6b3ac..7106bc14a 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -6,7 +6,6 @@ use std::rc::Rc; use limbo_sqlite3_parser::ast::{self}; use crate::function::Func; -use crate::schema::Table; use crate::translate::plan::{DeletePlan, Plan, Search}; use crate::util::exprs_are_equivalent; use crate::vdbe::builder::ProgramBuilder; @@ -531,29 +530,67 @@ fn emit_update_insns( ) -> crate::Result<()> { let table_ref = &plan.table_references.first().unwrap(); let loop_labels = t_ctx.labels_main_loop.first().unwrap(); - let (cursor_id, index) = match &table_ref.op { - Operation::Scan { .. } => (program.resolve_cursor_id(&table_ref.identifier), None), + let (cursor_id, index, is_virtual) = match &table_ref.op { + Operation::Scan { .. } => ( + program.resolve_cursor_id(&table_ref.identifier), + None, + table_ref.virtual_table().is_some(), + ), Operation::Search(search) => match search { - &Search::RowidEq { .. } | Search::RowidSearch { .. } => { - (program.resolve_cursor_id(&table_ref.identifier), None) - } + &Search::RowidEq { .. } | Search::RowidSearch { .. } => ( + program.resolve_cursor_id(&table_ref.identifier), + None, + false, + ), Search::IndexSearch { index, .. } => ( program.resolve_cursor_id(&table_ref.identifier), Some((index.clone(), program.resolve_cursor_id(&index.name))), + false, ), }, _ => return Ok(()), }; - let rowid_reg = program.alloc_register(); + + for cond in plan.where_clause.iter().filter(|c| c.is_constant()) { + let jump_target = program.allocate_label(); + let meta = ConditionMetadata { + jump_if_condition_is_true: false, + jump_target_when_true: jump_target, + jump_target_when_false: t_ctx.label_main_loop_end.unwrap(), + }; + translate_condition_expr( + program, + &plan.table_references, + &cond.expr, + meta, + &t_ctx.resolver, + )?; + program.resolve_label(jump_target, program.offset()); + } + let mut beg = program.alloc_registers( + table_ref.table.columns().len() + + if is_virtual { + 2 // two args before the relevant columns for VUpdate + } else { + 1 // rowid reg + }, + ); program.emit_insn(Insn::RowId { cursor_id, - dest: rowid_reg, + dest: beg, }); // if no rowid, we're done program.emit_insn(Insn::IsNull { - reg: rowid_reg, + reg: beg, target_pc: t_ctx.label_main_loop_end.unwrap(), }); + if is_virtual { + program.emit_insn(Insn::Copy { + src_reg: beg, + dst_reg: beg + 1, + amount: 0, + }) + } if let Some(offset) = t_ctx.reg_offset { program.emit_insn(Insn::IfPos { @@ -577,12 +614,13 @@ fn emit_update_insns( &t_ctx.resolver, )?; } - let first_col_reg = program.alloc_registers(table_ref.table.columns().len()); + // we scan a column at a time, loading either the column's values, or the new value // from the Set expression, into registers so we can emit a MakeRecord and update the row. + let start = if is_virtual { beg + 2 } else { beg + 1 }; for idx in 0..table_ref.columns().len() { - if let Some((idx, expr)) = plan.set_clauses.iter().find(|(i, _)| *i == idx) { - let target_reg = first_col_reg + idx; + let target_reg = start + idx; + if let Some((_, expr)) = plan.set_clauses.iter().find(|(i, _)| *i == idx) { translate_expr( program, Some(&plan.table_references), @@ -597,91 +635,66 @@ fn emit_update_insns( .iter() .position(|c| Some(&c.name) == table_column.name.as_ref()) }); - let dest = first_col_reg + idx; - if table_column.primary_key { - program.emit_null(dest, None); + + // don't emit null for pkey of virtual tables. they require first two args + // before the 'record' to be explicitly non-null + if table_column.primary_key && !is_virtual { + program.emit_null(target_reg, None); + } else if is_virtual { + program.emit_insn(Insn::VColumn { + cursor_id, + column: idx, + dest: target_reg, + }); } else { - match &table_ref.table { - Table::BTree(_) => { - program.emit_insn(Insn::Column { - cursor_id: *index - .as_ref() - .and_then(|(_, id)| { - if column_idx_in_index.is_some() { - Some(id) - } else { - None - } - }) - .unwrap_or(&cursor_id), - column: column_idx_in_index.unwrap_or(idx), - dest, - }); - } - Table::Virtual(_) => { - program.emit_insn(Insn::VColumn { - cursor_id: *index - .as_ref() - .and_then(|(_, id)| { - if column_idx_in_index.is_some() { - Some(id) - } else { - None - } - }) - .unwrap_or(&cursor_id), - column: column_idx_in_index.unwrap_or(idx), - dest, - }); - } - typ => unreachable!("query plan generated on unexpected table type {:?}", typ), - } + program.emit_insn(Insn::Column { + cursor_id: *index + .as_ref() + .and_then(|(_, id)| { + if column_idx_in_index.is_some() { + Some(id) + } else { + None + } + }) + .unwrap_or(&cursor_id), + column: column_idx_in_index.unwrap_or(idx), + dest: target_reg, + }); } } } if let Some(btree_table) = table_ref.btree() { if btree_table.is_strict { program.emit_insn(Insn::TypeCheck { - start_reg: first_col_reg, + start_reg: start, count: table_ref.columns().len(), check_generated: true, table_reference: Rc::clone(&btree_table), }); } - } - let record_reg = program.alloc_register(); - program.emit_insn(Insn::MakeRecord { - start_reg: first_col_reg, - count: table_ref.columns().len(), - dest_reg: record_reg, - }); - match &table_ref.table { - Table::BTree(_) => { - program.emit_insn(Insn::InsertAsync { - cursor: cursor_id, - key_reg: rowid_reg, - record_reg, - flag: 0, - }); - program.emit_insn(Insn::InsertAwait { cursor_id }); - } - Table::Virtual(vtab) => { - let new_rowid = program.alloc_register(); - program.emit_insn(Insn::Copy { - src_reg: rowid_reg, - dst_reg: new_rowid, - amount: 0, - }); - let arg_count = table_ref.columns().len() + 2; - program.emit_insn(Insn::VUpdate { - cursor_id, - arg_count, - start_reg: record_reg, - vtab_ptr: vtab.implementation.as_ref().ctx as usize, - conflict_action: 0u16, - }); - } - _ => unreachable!("unexpected table type"), + let record_reg = program.alloc_register(); + program.emit_insn(Insn::MakeRecord { + start_reg: start, + count: table_ref.columns().len(), + dest_reg: record_reg, + }); + program.emit_insn(Insn::InsertAsync { + cursor: cursor_id, + key_reg: beg, + record_reg, + flag: 0, + }); + program.emit_insn(Insn::InsertAwait { cursor_id }); + } else if let Some(vtab) = table_ref.virtual_table() { + let arg_count = table_ref.columns().len() + 2; + program.emit_insn(Insn::VUpdate { + cursor_id, + arg_count, + start_reg: beg, + vtab_ptr: vtab.implementation.as_ref().ctx as usize, + conflict_action: 0u16, + }); } if let Some(limit_reg) = t_ctx.reg_limit { diff --git a/core/translate/update.rs b/core/translate/update.rs index 296d8a6ff..71293483c 100644 --- a/core/translate/update.rs +++ b/core/translate/update.rs @@ -195,126 +195,3 @@ pub fn prepare_update_plan(schema: &Schema, body: &mut Update) -> crate::Result< contains_constant_false_condition: false, })) } - -// fn translate_vtab_update( -// mut program: ProgramBuilder, -// body: &mut Update, -// table: Arc
, -// resolver: &Resolver, -// ) -> crate::Result { -// let start_label = program.allocate_label(); -// program.emit_insn(Insn::Init { -// target_pc: start_label, -// }); -// let start_offset = program.offset(); -// let vtab = table.virtual_table().unwrap(); -// let cursor_id = program.alloc_cursor_id( -// Some(table.get_name().to_string()), -// CursorType::VirtualTable(vtab.clone()), -// ); -// let referenced_tables = vec![TableReference { -// table: Table::Virtual(table.virtual_table().unwrap().clone()), -// identifier: table.get_name().to_string(), -// op: Operation::Scan { iter_dir: None }, -// join_info: None, -// }]; -// program.emit_insn(Insn::VOpenAsync { cursor_id }); -// program.emit_insn(Insn::VOpenAwait {}); -// -// let argv_start = program.alloc_registers(0); -// let end_label = program.allocate_label(); -// let skip_label = program.allocate_label(); -// program.emit_insn(Insn::VFilter { -// cursor_id, -// pc_if_empty: end_label, -// args_reg: argv_start, -// arg_count: 0, -// }); -// -// let loop_start = program.offset(); -// let start_reg = program.alloc_registers(2 + table.columns().len()); -// let old_rowid = start_reg; -// let new_rowid = start_reg + 1; -// let column_regs = start_reg + 2; -// -// program.emit_insn(Insn::RowId { -// cursor_id, -// dest: old_rowid, -// }); -// program.emit_insn(Insn::RowId { -// cursor_id, -// dest: new_rowid, -// }); -// -// for (i, _) in table.columns().iter().enumerate() { -// let dest = column_regs + i; -// program.emit_insn(Insn::VColumn { -// cursor_id, -// column: i, -// dest, -// }); -// } -// -// if let Some(ref mut where_clause) = body.where_clause { -// bind_column_references(where_clause, &referenced_tables, None)?; -// translate_condition_expr( -// &mut program, -// &referenced_tables, -// where_clause, -// ConditionMetadata { -// jump_if_condition_is_true: false, -// jump_target_when_true: BranchOffset::Placeholder, -// jump_target_when_false: skip_label, -// }, -// resolver, -// )?; -// } -// // prepare updated columns in place -// for expr in body.sets.iter() { -// let Some(col_index) = table.columns().iter().position(|t| { -// t.name -// .as_ref() -// .unwrap() -// .eq_ignore_ascii_case(&expr.col_names[0].0) -// }) else { -// bail_parse_error!("column {} not found", expr.col_names[0].0); -// }; -// translate_expr( -// &mut program, -// Some(&referenced_tables), -// &expr.expr, -// column_regs + col_index, -// resolver, -// )?; -// } -// -// let arg_count = 2 + table.columns().len(); -// program.emit_insn(Insn::VUpdate { -// cursor_id, -// arg_count, -// start_reg: old_rowid, -// vtab_ptr: vtab.implementation.ctx as usize, -// conflict_action: 0, -// }); -// -// program.resolve_label(skip_label, program.offset()); -// program.emit_insn(Insn::VNext { -// cursor_id, -// pc_if_next: loop_start, -// }); -// -// program.resolve_label(end_label, program.offset()); -// program.emit_insn(Insn::Halt { -// err_code: 0, -// description: String::new(), -// }); -// program.resolve_label(start_label, program.offset()); -// program.emit_insn(Insn::Transaction { write: true }); -// -// program.emit_constant_insns(); -// program.emit_insn(Insn::Goto { -// target_pc: start_offset, -// }); -// program.table_references = referenced_tables.clone(); -// Ok(program) -// } diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index a09da4ac0..0e7fc583b 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -1848,17 +1848,14 @@ pub fn op_row_id( let rowid = { let mut index_cursor = state.get_cursor(index_cursor_id); let index_cursor = index_cursor.as_btree_mut(); - let rowid = index_cursor.rowid()?; - rowid + index_cursor.rowid()? }; let mut table_cursor = state.get_cursor(table_cursor_id); let table_cursor = table_cursor.as_btree_mut(); - let deferred_seek = - match table_cursor.seek(SeekKey::TableRowId(rowid.unwrap()), SeekOp::EQ)? { - CursorResult::Ok(_) => None, - CursorResult::IO => Some((index_cursor_id, table_cursor_id)), - }; - deferred_seek + match table_cursor.seek(SeekKey::TableRowId(rowid.unwrap()), SeekOp::EQ)? { + CursorResult::Ok(_) => None, + CursorResult::IO => Some((index_cursor_id, table_cursor_id)), + } }; if let Some(deferred_seek) = deferred_seek { state.deferred_seek = Some(deferred_seek); From 13ae19c78c19c115a9452a19c8c30854bf74f720 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Fri, 4 Apr 2025 07:08:01 -0400 Subject: [PATCH 128/425] Remove unnecessary clones from mc cursors --- core/vdbe/execute.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 0e7fc583b..5cf2e6cd2 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -1582,7 +1582,7 @@ pub fn op_halt( ))); } } - match program.halt(pager.clone(), state, mv_store.clone())? { + match program.halt(pager.clone(), state, mv_store)? { StepResult::Done => Ok(InsnFunctionStepResult::Done), StepResult::IO => Ok(InsnFunctionStepResult::IO), StepResult::Row => Ok(InsnFunctionStepResult::Row), @@ -1661,7 +1661,7 @@ pub fn op_auto_commit( }; let conn = program.connection.upgrade().unwrap(); if matches!(state.halt_state, Some(HaltState::Checkpointing)) { - return match program.halt(pager.clone(), state, mv_store.clone())? { + return match program.halt(pager.clone(), state, mv_store)? { super::StepResult::Done => Ok(InsnFunctionStepResult::Done), super::StepResult::IO => Ok(InsnFunctionStepResult::IO), super::StepResult::Row => Ok(InsnFunctionStepResult::Row), @@ -1689,7 +1689,7 @@ pub fn op_auto_commit( "cannot commit - no transaction is active".to_string(), )); } - return match program.halt(pager.clone(), state, mv_store.clone())? { + return match program.halt(pager.clone(), state, mv_store)? { super::StepResult::Done => Ok(InsnFunctionStepResult::Done), super::StepResult::IO => Ok(InsnFunctionStepResult::IO), super::StepResult::Row => Ok(InsnFunctionStepResult::Row), From f223e66c82a4f862749b6a9c14fcb48d630f6fab Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Fri, 4 Apr 2025 07:29:16 -0400 Subject: [PATCH 129/425] Remove unused mut and fix merge conflict issues --- core/translate/emitter.rs | 2 +- core/translate/main_loop.rs | 4 ++-- core/translate/update.rs | 3 --- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 7106bc14a..5049bb738 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -567,7 +567,7 @@ fn emit_update_insns( )?; program.resolve_label(jump_target, program.offset()); } - let mut beg = program.alloc_registers( + let beg = program.alloc_registers( table_ref.table.columns().len() + if is_virtual { 2 // two args before the relevant columns for VUpdate diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 9575eb900..51bd05382 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -109,7 +109,7 @@ pub fn init_loop( program.emit_insn(Insn::OpenReadAwait {}); } } - (OperationMode::DELETE | OperationMode::UPDATE, Table::BTree(btree)) => { + (OperationMode::DELETE, Table::BTree(btree)) => { let root_page = btree.root_page; program.emit_insn(Insn::OpenWriteAsync { cursor_id, @@ -183,7 +183,7 @@ pub fn init_loop( OperationMode::UPDATE | OperationMode::DELETE => { program.emit_insn(Insn::OpenWriteAsync { cursor_id: index_cursor_id, - root_page: index.root_page, + root_page: index.root_page.into(), }); program.emit_insn(Insn::OpenWriteAwait {}); } diff --git a/core/translate/update.rs b/core/translate/update.rs index 71293483c..62c6c6f9f 100644 --- a/core/translate/update.rs +++ b/core/translate/update.rs @@ -76,9 +76,6 @@ pub fn prepare_update_plan(schema: &Schema, body: &mut Update) -> crate::Result< Some(table) => table, None => bail_parse_error!("Parse error: no such table: {}", table_name), }; - let Some(btree_table) = table.btree() else { - bail_parse_error!("Error: {} is not a btree table", table_name); - }; let iter_dir = body .order_by .as_ref() From 2d009083bae7682cbb6736e56b11269b71ee34e4 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Wed, 9 Apr 2025 19:27:58 +0300 Subject: [PATCH 130/425] core: Fix syscall VFS on Linux Fix the syscall VFS on Linux not to use `PlatformIO`, which is just an alias for `io_uring`. --- core/io/mod.rs | 4 ++++ core/lib.rs | 6 ++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/core/io/mod.rs b/core/io/mod.rs index 7eb8845bb..b5321637b 100644 --- a/core/io/mod.rs +++ b/core/io/mod.rs @@ -191,6 +191,7 @@ cfg_block! { mod unix; #[cfg(feature = "fs")] pub use unix::UnixIO; + pub use unix::UnixIO as SyscallIO; pub use io_uring::UringIO as PlatformIO; } @@ -199,16 +200,19 @@ cfg_block! { #[cfg(feature = "fs")] pub use unix::UnixIO; pub use unix::UnixIO as PlatformIO; + pub use PlatformIO as SyscallIO; } #[cfg(target_os = "windows")] { mod windows; pub use windows::WindowsIO as PlatformIO; + pub use PlatformIO as SyscallIO; } #[cfg(not(any(target_os = "linux", target_os = "macos", target_os = "windows")))] { mod generic; pub use generic::GenericIO as PlatformIO; + pub use PlatformIO as SyscallIO; } } diff --git a/core/lib.rs b/core/lib.rs index e827c3d0d..7ccfea4fe 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -31,7 +31,9 @@ use fallible_iterator::FallibleIterator; pub use io::UnixIO; #[cfg(all(feature = "fs", target_os = "linux", feature = "io_uring"))] pub use io::UringIO; -pub use io::{Buffer, Completion, File, MemoryIO, OpenFlags, PlatformIO, WriteCompletion, IO}; +pub use io::{ + Buffer, Completion, File, MemoryIO, OpenFlags, PlatformIO, SyscallIO, WriteCompletion, IO, +}; use limbo_ext::{ResultCode, VTabKind, VTabModuleImpl}; use limbo_sqlite3_parser::{ast, ast::Cmd, lexer::sql::Parser}; use parking_lot::RwLock; @@ -209,7 +211,7 @@ impl Database { Some(vfs) => vfs, None => match vfs.trim() { "memory" => Arc::new(MemoryIO::new()), - "syscall" => Arc::new(PlatformIO::new()?), + "syscall" => Arc::new(SyscallIO::new()?), #[cfg(all(target_os = "linux", feature = "io_uring"))] "io_uring" => Arc::new(UringIO::new()?), other => { From 94217319a2a7d8cffdff0451f89ec3603417d62b Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Wed, 9 Apr 2025 14:21:18 -0300 Subject: [PATCH 131/425] Fix Explain to be case insensitive --- cli/app.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cli/app.rs b/cli/app.rs index cffe9022f..c5cb2ff4f 100644 --- a/cli/app.rs +++ b/cli/app.rs @@ -406,7 +406,10 @@ impl<'a> Limbo<'a> { io_time_elapsed_samples: vec![], execute_time_elapsed_samples: vec![], }; - if input.trim_start().starts_with("explain") { + // TODO this is a quickfix. Some ideas to do case insensitive comparisons is to use + // Uncased or Unicase. + let temp = input.to_lowercase(); + if temp.trim_start().starts_with("explain") { if let Ok(Some(stmt)) = self.conn.query(input) { let _ = self.writeln(stmt.explain().as_bytes()); } From d96906ebc31045cf3fe5ea6dc7eeffc8046f9434 Mon Sep 17 00:00:00 2001 From: alpaylan Date: Wed, 9 Apr 2025 14:02:58 -0400 Subject: [PATCH 132/425] update simulator readme --- simulator/README.md | 90 +++++++++++++++++++++++++++++++-------------- 1 file changed, 62 insertions(+), 28 deletions(-) diff --git a/simulator/README.md b/simulator/README.md index 4e9081bd7..87d61479d 100644 --- a/simulator/README.md +++ b/simulator/README.md @@ -15,20 +15,18 @@ Based on these parameters, we randomly generate **interaction plans**. Interacti An example of a property is the following: -```json -{ - "name": "Read your own writes", - "queries": [ - "INSERT INTO t1 (id) VALUES (1)", - "SELECT * FROM t1 WHERE id = 1" - ], - "assertions": [ - "result.rows.length == 1", - "result.rows[0].id == 1" - ] -} +```sql +-- begin testing 'Select-Select-Optimizer' +-- ASSUME table marvelous_ideal exists; +SELECT ((devoted_ahmed = -9142609771.541502 AND loving_wicker = -1246708244.164486)) FROM marvelous_ideal WHERE TRUE; +SELECT * FROM marvelous_ideal WHERE (devoted_ahmed = -9142609771.541502 AND loving_wicker = -1246708244.164486); +-- ASSERT select queries should return the same amount of results; +-- end testing 'Select-Select-Optimizer' ``` +The simulator starts from an initially empty database, adding random interactions based on the workload distribution. It can +add random queries unrelated to the properties without breaking the property invariants to reach more diverse states and respect the configured workload distribution. + The simulator executes the interaction plans in a loop, and checks the assertions. It can add random queries unrelated to the properties without breaking the property invariants to reach more diverse states and respect the configured workload distribution. @@ -44,36 +42,72 @@ The simulator code is broken into 4 main parts: To run the simulator, you can use the following command: ```bash -cargo run +RUST_LOG=limbo_sim=debug cargo run --bin limbo_sim ``` -This prompt (in the future) will invoke a clap command line interface to configure the simulator. For now, the simulator runs with the default configurations changing the `main.rs` file. If you want to see the logs, you can change the `RUST_LOG` environment variable. +The simulator CLI has a few configuration options that you can explore via `--help` flag. -```bash -RUST_LOG=info cargo run --bin limbo_sim +```txt +The Limbo deterministic simulator + +Usage: limbo_sim [OPTIONS] + +Options: + -s, --seed set seed for reproducible runs + -d, --doublecheck enable doublechecking, run the simulator with the plan twice and check output equality + -n, --maximum-size change the maximum size of the randomly generated sequence of interactions [default: 5000] + -k, --minimum-size change the minimum size of the randomly generated sequence of interactions [default: 1000] + -t, --maximum-time change the maximum time of the simulation(in seconds) [default: 3600] + -l, --load load plan from the bug base + -w, --watch enable watch mode that reruns the simulation on file changes + --differential run differential testing between sqlite and Limbo + -h, --help Print help + -V, --version Print version ``` ## Adding new properties -Todo +The properties are defined in `simulator/generation/property.rs` in the `Property` enum. Each property is documented with +inline doc comments, an example is given below: -## Adding new generation functions +```rust +/// Insert-Select is a property in which the inserted row +/// must be in the resulting rows of a select query that has a +/// where clause that matches the inserted row. +/// The execution of the property is as follows +/// INSERT INTO VALUES (...) +/// I_0 +/// I_1 +/// ... +/// I_n +/// SELECT * FROM WHERE +/// The interactions in the middle has the following constraints; +/// - There will be no errors in the middle interactions. +/// - The inserted row will not be deleted. +/// - The inserted row will not be updated. +/// - The table `t` will not be renamed, dropped, or altered. +InsertValuesSelect { + /// The insert query + insert: Insert, + /// Selected row index + row_index: usize, + /// Additional interactions in the middle of the property + queries: Vec, + /// The select query + select: Select, +}, +``` -Todo - -## Adding new models - -Todo - -## Coverage with Limbo - -Todo +If you would like to add a new property, you can add a new variant to the `Property` enum, and the corresponding +generation function in `simulator/generation/property.rs`. The generation function should return a `Property` instance, and +it should generate the necessary queries and assertions for the property. ## Automatic Compatibility Testing with SQLite -Todo +You can use the `--differential` flag to run the simulator in differential testing mode. This mode will run the same interaction plan on both Limbo and SQLite, and compare the results. It will also check for any panics or errors in either database. ## Resources + - [(reading) TigerBeetle Deterministic Simulation Testing](https://docs.tigerbeetle.com/about/vopr/) - [(reading) sled simulation guide (jepsen-proof engineering)](https://sled.rs/simulation.html) - [(video) "Testing Distributed Systems w/ Deterministic Simulation" by Will Wilson](https://www.youtube.com/watch?v=4fFDFbi3toc) From 0bee24e7adc3c6f723019364d87857b72b3c7601 Mon Sep 17 00:00:00 2001 From: alpaylan Date: Wed, 9 Apr 2025 14:05:22 -0400 Subject: [PATCH 133/425] update dst section of testing.md --- docs/testing.md | 57 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/docs/testing.md b/docs/testing.md index 21823957f..399cc53fe 100644 --- a/docs/testing.md +++ b/docs/testing.md @@ -74,8 +74,63 @@ This will enable trace-level logs for the limbo_core crate and disable logs else ## Deterministic Simulation Testing (DST): -TODO! +Limbo simulator uses randomized deterministic simulations to test the Limbo database behaviors. +Each simulation begins with a random configurations: + +- the database workload distribution(percentages of reads, writes, deletes...), +- database parameters(page size), +- number of reader or writers, etc. + +Based on these parameters, we randomly generate **interaction plans**. Interaction plans consist of statements/queries, and assertions that will be executed in order. The building blocks of interaction plans are: + +- Randomly generated SQL queries satisfying the workload distribution, +- Properties, which contain multiple matching queries with assertions indicating the expected result. + +An example of a property is the following: + +```sql +-- begin testing 'Select-Select-Optimizer' +-- ASSUME table marvelous_ideal exists; +SELECT ((devoted_ahmed = -9142609771.541502 AND loving_wicker = -1246708244.164486)) FROM marvelous_ideal WHERE TRUE; +SELECT * FROM marvelous_ideal WHERE (devoted_ahmed = -9142609771.541502 AND loving_wicker = -1246708244.164486); +-- ASSERT select queries should return the same amount of results; +-- end testing 'Select-Select-Optimizer' +``` + +The simulator starts from an initially empty database, adding random interactions based on the workload distribution. It can +add random queries unrelated to the properties without breaking the property invariants to reach more diverse states and respect the configured workload distribution. + +The simulator executes the interaction plans in a loop, and checks the assertions. It can add random queries unrelated to the properties without +breaking the property invariants to reach more diverse states and respect the configured workload distribution. + +## Usage + +To run the simulator, you can use the following command: + +```bash +RUST_LOG=limbo_sim=debug cargo run --bin limbo_sim +``` + +The simulator CLI has a few configuration options that you can explore via `--help` flag. + +```txt +The Limbo deterministic simulator + +Usage: limbo_sim [OPTIONS] + +Options: + -s, --seed set seed for reproducible runs + -d, --doublecheck enable doublechecking, run the simulator with the plan twice and check output equality + -n, --maximum-size change the maximum size of the randomly generated sequence of interactions [default: 5000] + -k, --minimum-size change the minimum size of the randomly generated sequence of interactions [default: 1000] + -t, --maximum-time change the maximum time of the simulation(in seconds) [default: 3600] + -l, --load load plan from the bug base + -w, --watch enable watch mode that reruns the simulation on file changes + --differential run differential testing between sqlite and Limbo + -h, --help Print help + -V, --version Print version +``` ## Fuzzing From 5643a0abba793fe93799109438b680bff264e49e Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Wed, 9 Apr 2025 19:31:35 -0400 Subject: [PATCH 134/425] Dont emit ansi codes when outputting logs to a file --- cli/app.rs | 27 ++++++++++++++++----------- testing/cli_tests/memory.py | 2 -- testing/testing | 0 3 files changed, 16 insertions(+), 13 deletions(-) delete mode 100644 testing/testing diff --git a/cli/app.rs b/cli/app.rs index c5cb2ff4f..3f04ab9fe 100644 --- a/cli/app.rs +++ b/cli/app.rs @@ -818,22 +818,27 @@ impl<'a> Limbo<'a> { } pub fn init_tracing(&mut self) -> Result { - let (non_blocking, guard) = if let Some(file) = &self.opts.tracing_output { - tracing_appender::non_blocking( - std::fs::File::options() - .append(true) - .create(true) - .open(file)?, - ) - } else { - tracing_appender::non_blocking(std::io::stderr()) - }; + let ((non_blocking, guard), should_emit_ansi) = + if let Some(file) = &self.opts.tracing_output { + ( + tracing_appender::non_blocking( + std::fs::File::options() + .append(true) + .create(true) + .open(file)?, + ), + false, + ) + } else { + (tracing_appender::non_blocking(std::io::stderr()), true) + }; if let Err(e) = tracing_subscriber::registry() .with( tracing_subscriber::fmt::layer() .with_writer(non_blocking) .with_line_number(true) - .with_thread_ids(true), + .with_thread_ids(true) + .with_ansi(should_emit_ansi), ) .with(EnvFilter::from_default_env()) .try_init() diff --git a/testing/cli_tests/memory.py b/testing/cli_tests/memory.py index e96df3475..da98bcc1d 100755 --- a/testing/cli_tests/memory.py +++ b/testing/cli_tests/memory.py @@ -2,8 +2,6 @@ import os from test_limbo_cli import TestLimboShell - -sqlite_exec = "./target/debug/limbo" sqlite_flags = os.getenv("SQLITE_FLAGS", "-q").split(" ") diff --git a/testing/testing b/testing/testing deleted file mode 100644 index e69de29bb..000000000 From 11782cbff8bf3b2f8352218a25bc208919e0c312 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 10 Apr 2025 07:52:10 +0300 Subject: [PATCH 135/425] core/btree: Clean up imports --- core/storage/btree.rs | 85 ++++++++++++++++++++++++------------------- 1 file changed, 47 insertions(+), 38 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 8f4afb090..e77ab88bb 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1,28 +1,40 @@ use tracing::debug; -use crate::storage::pager::Pager; -use crate::storage::sqlite3_ondisk::{ - read_u32, read_varint, BTreeCell, PageContent, PageType, TableInteriorCell, TableLeafCell, +use crate::{ + storage::{ + pager::Pager, + sqlite3_ondisk::{ + read_u32, read_varint, BTreeCell, PageContent, PageType, TableInteriorCell, + TableLeafCell, + }, + }, + translate::plan::IterationDirection, + MvCursor, }; -use crate::translate::plan::IterationDirection; -use crate::MvCursor; -use crate::types::{ - compare_immutable, CursorResult, ImmutableRecord, OwnedValue, RefValue, SeekKey, SeekOp, +use crate::{ + return_corrupt, + types::{ + compare_immutable, CursorResult, ImmutableRecord, OwnedValue, RefValue, SeekKey, SeekOp, + }, + LimboError, Result, }; -use crate::{return_corrupt, LimboError, Result}; -use std::cell::{Cell, Ref, RefCell}; -use std::cmp::Ordering; #[cfg(debug_assertions)] use std::collections::HashSet; -use std::pin::Pin; -use std::rc::Rc; +use std::{ + cell::{Cell, Ref, RefCell}, + cmp::Ordering, + pin::Pin, + rc::Rc, +}; -use super::pager::PageRef; -use super::sqlite3_ondisk::{ - read_record, write_varint_to_vec, IndexInteriorCell, IndexLeafCell, OverflowCell, - DATABASE_HEADER_SIZE, +use super::{ + pager::PageRef, + sqlite3_ondisk::{ + read_record, write_varint_to_vec, IndexInteriorCell, IndexLeafCell, OverflowCell, + DATABASE_HEADER_SIZE, + }, }; /* @@ -4844,31 +4856,28 @@ fn shift_pointers_left(page: &mut PageContent, cell_idx: usize) { #[cfg(test)] mod tests { - use rand::thread_rng; - use rand::Rng; - use rand_chacha::rand_core::RngCore; - use rand_chacha::rand_core::SeedableRng; - use rand_chacha::ChaCha8Rng; + use rand::{thread_rng, Rng}; + use rand_chacha::{ + rand_core::{RngCore, SeedableRng}, + ChaCha8Rng, + }; use test_log::test; use super::*; - use crate::fast_lock::SpinLock; - use crate::io::{Buffer, Completion, MemoryIO, OpenFlags, IO}; - use crate::storage::database::DatabaseFile; - use crate::storage::page_cache::DumbLruPageCache; - use crate::storage::sqlite3_ondisk; - use crate::storage::sqlite3_ondisk::DatabaseHeader; - use crate::types::Text; - use crate::vdbe::Register; - use crate::Connection; - use crate::{BufferPool, DatabaseStorage, WalFile, WalFileShared, WriteCompletion}; - use std::cell::RefCell; - use std::collections::HashSet; - use std::mem::transmute; - use std::ops::Deref; - use std::panic; - use std::rc::Rc; - use std::sync::Arc; + use crate::{ + fast_lock::SpinLock, + io::{Buffer, Completion, MemoryIO, OpenFlags, IO}, + storage::{ + database::DatabaseFile, page_cache::DumbLruPageCache, sqlite3_ondisk, + sqlite3_ondisk::DatabaseHeader, + }, + types::Text, + vdbe::Register, + BufferPool, Connection, DatabaseStorage, WalFile, WalFileShared, WriteCompletion, + }; + use std::{ + cell::RefCell, collections::HashSet, mem::transmute, ops::Deref, panic, rc::Rc, sync::Arc, + }; use tempfile::TempDir; From 86a4d3e33b732fd990e0b184c761d3f66996f2d7 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 10 Apr 2025 08:19:08 +0300 Subject: [PATCH 136/425] core/btree: Move B-Tree header offsets in a module The grouping (with a fancy comment) makes the code a bit more readable. --- core/storage/btree.rs | 129 +++++++++++++++++++++++------------------- 1 file changed, 70 insertions(+), 59 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index e77ab88bb..0e4398ca3 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -37,32 +37,43 @@ use super::{ }, }; -/* - These are offsets of fields in the header of a b-tree page. -*/ - -/// type of btree page -> u8 -const PAGE_HEADER_OFFSET_PAGE_TYPE: usize = 0; -/// pointer to first freeblock -> u16 -/// The second field of the b-tree page header is the offset of the first freeblock, or zero if there are no freeblocks on the page. -/// A freeblock is a structure used to identify unallocated space within a b-tree page. -/// Freeblocks are organized as a chain. +/// The B-Tree page header is 12 bytes for interior pages and 8 bytes for leaf pages. /// -/// To be clear, freeblocks do not mean the regular unallocated free space to the left of the cell content area pointer, but instead -/// blocks of at least 4 bytes WITHIN the cell content area that are not in use due to e.g. deletions. -const PAGE_HEADER_OFFSET_FIRST_FREEBLOCK: usize = 1; -/// number of cells in the page -> u16 -const PAGE_HEADER_OFFSET_CELL_COUNT: usize = 3; -/// pointer to first byte of cell allocated content from top -> u16 -/// SQLite strives to place cells as far toward the end of the b-tree page as it can, -/// in order to leave space for future growth of the cell pointer array. -/// = the cell content area pointer moves leftward as cells are added to the page -const PAGE_HEADER_OFFSET_CELL_CONTENT_AREA: usize = 5; -/// number of fragmented bytes -> u8 -/// Fragments are isolated groups of 1, 2, or 3 unused bytes within the cell content area. -const PAGE_HEADER_OFFSET_FRAGMENTED_BYTES_COUNT: usize = 7; -/// if internalnode, pointer right most pointer (saved separately from cells) -> u32 -const PAGE_HEADER_OFFSET_RIGHTMOST_PTR: usize = 8; +/// +--------+-----------------+-----------------+-----------------+--------+----- ..... ----+ +/// | Page | First Freeblock | Cell Count | Cell Content | Frag. | Right-most | +/// | Type | Offset | | Area Start | Bytes | pointer | +/// +--------+-----------------+-----------------+-----------------+--------+----- ..... ----+ +/// 0 1 2 3 4 5 6 7 8 11 +/// +pub mod offset { + /// type of btree page -> u8 + pub const BTREE_PAGE_TYPE: usize = 0; + + /// pointer to first freeblock -> u16 + /// The second field of the b-tree page header is the offset of the first freeblock, or zero if there are no freeblocks on the page. + /// A freeblock is a structure used to identify unallocated space within a b-tree page. + /// Freeblocks are organized as a chain. + /// + /// To be clear, freeblocks do not mean the regular unallocated free space to the left of the cell content area pointer, but instead + /// blocks of at least 4 bytes WITHIN the cell content area that are not in use due to e.g. deletions. + pub const BTREE_FIRST_FREEBLOCK: usize = 1; + + /// number of cells in the page -> u16 + pub const BTREE_CELL_COUNT: usize = 3; + + /// pointer to first byte of cell allocated content from top -> u16 + /// SQLite strives to place cells as far toward the end of the b-tree page as it can, + /// in order to leave space for future growth of the cell pointer array. + /// = the cell content area pointer moves leftward as cells are added to the page + pub const BTREE_CELL_CONTENT_AREA: usize = 5; + + /// number of fragmented bytes -> u8 + /// Fragments are isolated groups of 1, 2, or 3 unused bytes within the cell content area. + pub const BTREE_FRAGMENTED_BYTES_COUNT: usize = 7; + + /// if internalnode, pointer right most pointer (saved separately from cells) -> u32 + pub const BTREE_RIGHTMOST_PTR: usize = 8; +} /// Maximum depth of an SQLite B-Tree structure. Any B-Tree deeper than /// this will be declared corrupt. This value is calculated based on a @@ -241,7 +252,7 @@ impl BTreeKey<'_> { struct BalanceInfo { /// Old pages being balanced. pages_to_balance: Vec, - /// Bookkeeping of the rightmost pointer so the PAGE_HEADER_OFFSET_RIGHTMOST_PTR can be updated. + /// Bookkeeping of the rightmost pointer so the offset::BTREE_RIGHTMOST_PTR can be updated. rightmost_pointer: *mut u8, /// Divider cells of old pages divider_cells: Vec>, @@ -2240,7 +2251,7 @@ impl BTreeCursor { let new_last_page = pages_to_balance_new.last().unwrap(); new_last_page .get_contents() - .write_u32(PAGE_HEADER_OFFSET_RIGHTMOST_PTR, right_pointer); + .write_u32(offset::BTREE_RIGHTMOST_PTR, right_pointer); } // TODO: pointer map update (vacuum support) // Update divider cells in parent @@ -2259,7 +2270,7 @@ impl BTreeCursor { // Make this page's rightmost pointer point to pointer of divider cell before modification let previous_pointer_divider = read_u32(÷r_cell, 0); page.get_contents() - .write_u32(PAGE_HEADER_OFFSET_RIGHTMOST_PTR, previous_pointer_divider); + .write_u32(offset::BTREE_RIGHTMOST_PTR, previous_pointer_divider); // divider cell now points to this page new_divider_cell.extend_from_slice(&(page.get().id as u32).to_be_bytes()); // now copy the rest of the divider cell: @@ -2891,16 +2902,16 @@ impl BTreeCursor { other => other, } as u8; // set new page type - root_contents.write_u8(PAGE_HEADER_OFFSET_PAGE_TYPE, new_root_page_type); - root_contents.write_u32(PAGE_HEADER_OFFSET_RIGHTMOST_PTR, child.get().id as u32); + root_contents.write_u8(offset::BTREE_PAGE_TYPE, new_root_page_type); + root_contents.write_u32(offset::BTREE_RIGHTMOST_PTR, child.get().id as u32); root_contents.write_u16( - PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, + offset::BTREE_CELL_CONTENT_AREA, self.usable_space() as u16, ); - root_contents.write_u16(PAGE_HEADER_OFFSET_CELL_COUNT, 0); - root_contents.write_u16(PAGE_HEADER_OFFSET_FIRST_FREEBLOCK, 0); + root_contents.write_u16(offset::BTREE_CELL_COUNT, 0); + root_contents.write_u16(offset::BTREE_FIRST_FREEBLOCK, 0); - root_contents.write_u8(PAGE_HEADER_OFFSET_FRAGMENTED_BYTES_COUNT, 0); + root_contents.write_u8(offset::BTREE_FRAGMENTED_BYTES_COUNT, 0); root_contents.overflow_cells.clear(); self.root_page = root.get().id; self.stack.clear(); @@ -4081,7 +4092,7 @@ impl CellArray { fn find_free_cell(page_ref: &PageContent, usable_space: u16, amount: usize) -> Result { // NOTE: freelist is in ascending order of keys and pc // unuse_space is reserved bytes at the end of page, therefore we must substract from maxpc - let mut prev_pc = page_ref.offset + PAGE_HEADER_OFFSET_FIRST_FREEBLOCK; + let mut prev_pc = page_ref.offset + offset::BTREE_FIRST_FREEBLOCK; let mut pc = page_ref.first_freeblock() as usize; let maxpc = usable_space as usize - amount; @@ -4105,7 +4116,7 @@ fn find_free_cell(page_ref: &PageContent, usable_space: u16, amount: usize) -> R // Delete the slot from freelist and update the page's fragment count. page_ref.write_u16(prev_pc, next); let frag = page_ref.num_frag_free_bytes() + new_size as u8; - page_ref.write_u8(PAGE_HEADER_OFFSET_FRAGMENTED_BYTES_COUNT, frag); + page_ref.write_u8(offset::BTREE_FRAGMENTED_BYTES_COUNT, frag); return Ok(pc); } else if new_size + pc > maxpc { return_corrupt!("Free block extends beyond page end"); @@ -4139,14 +4150,14 @@ pub fn btree_init_page(page: &PageRef, page_type: PageType, offset: usize, usabl let contents = contents.contents.as_mut().unwrap(); contents.offset = offset; let id = page_type as u8; - contents.write_u8(PAGE_HEADER_OFFSET_PAGE_TYPE, id); - contents.write_u16(PAGE_HEADER_OFFSET_FIRST_FREEBLOCK, 0); - contents.write_u16(PAGE_HEADER_OFFSET_CELL_COUNT, 0); + contents.write_u8(offset::BTREE_PAGE_TYPE, id); + contents.write_u16(offset::BTREE_FIRST_FREEBLOCK, 0); + contents.write_u16(offset::BTREE_CELL_COUNT, 0); - contents.write_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, usable_space); + contents.write_u16(offset::BTREE_CELL_CONTENT_AREA, usable_space); - contents.write_u8(PAGE_HEADER_OFFSET_FRAGMENTED_BYTES_COUNT, 0); - contents.write_u32(PAGE_HEADER_OFFSET_RIGHTMOST_PTR, 0); + contents.write_u8(offset::BTREE_FRAGMENTED_BYTES_COUNT, 0); + contents.write_u32(offset::BTREE_RIGHTMOST_PTR, 0); } fn to_static_buf(buf: &mut [u8]) -> &'static mut [u8] { @@ -4243,7 +4254,7 @@ fn edit_page( )?; debug_validate_cells!(page, usable_space); // TODO: noverflow - page.write_u16(PAGE_HEADER_OFFSET_CELL_COUNT, number_new_cells as u16); + page.write_u16(offset::BTREE_CELL_COUNT, number_new_cells as u16); Ok(()) } @@ -4273,7 +4284,7 @@ fn page_free_array( let offset = (cell_pointer.start as usize - buf_range.start as usize) as u16; let len = (cell_pointer.end as usize - cell_pointer.start as usize) as u16; free_cell_range(page, offset, len, usable_space)?; - page.write_u16(PAGE_HEADER_OFFSET_CELL_COUNT, page.cell_count() as u16 - 1); + page.write_u16(offset::BTREE_CELL_COUNT, page.cell_count() as u16 - 1); number_of_cells_removed += 1; } } @@ -4383,7 +4394,7 @@ fn free_cell_range( return_corrupt!("Invalid fragmentation count"); } let frag = page.num_frag_free_bytes() - removed_fragmentation; - page.write_u8(PAGE_HEADER_OFFSET_FRAGMENTED_BYTES_COUNT, frag); + page.write_u8(offset::BTREE_FRAGMENTED_BYTES_COUNT, frag); pc }; @@ -4391,11 +4402,11 @@ fn free_cell_range( if offset < page.cell_content_area() { return_corrupt!("Free block before content area"); } - if pointer_to_pc != page.offset as u16 + PAGE_HEADER_OFFSET_FIRST_FREEBLOCK as u16 { + if pointer_to_pc != page.offset as u16 + offset::BTREE_FIRST_FREEBLOCK as u16 { return_corrupt!("Invalid content area merge"); } - page.write_u16(PAGE_HEADER_OFFSET_FIRST_FREEBLOCK, pc); - page.write_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, end); + page.write_u16(offset::BTREE_FIRST_FREEBLOCK, pc); + page.write_u16(offset::BTREE_CELL_CONTENT_AREA, end); } else { page.write_u16_no_offset(pointer_to_pc as usize, offset); page.write_u16_no_offset(offset as usize, pc); @@ -4460,10 +4471,10 @@ fn defragment_page(page: &PageContent, usable_space: u16) { assert!(cbrk >= first_cell); // set new first byte of cell content - page.write_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, cbrk); + page.write_u16(offset::BTREE_CELL_CONTENT_AREA, cbrk); // set free block to 0, unused spaced can be retrieved from gap between cell pointer end and content start - page.write_u16(PAGE_HEADER_OFFSET_FIRST_FREEBLOCK, 0); - page.write_u8(PAGE_HEADER_OFFSET_FRAGMENTED_BYTES_COUNT, 0); + page.write_u16(offset::BTREE_FIRST_FREEBLOCK, 0); + page.write_u8(offset::BTREE_FRAGMENTED_BYTES_COUNT, 0); debug_validate_cells!(page, usable_space); } @@ -4556,7 +4567,7 @@ fn insert_into_cell( // update cell count let new_n_cells = (page.cell_count() + 1) as u16; - page.write_u16(PAGE_HEADER_OFFSET_CELL_COUNT, new_n_cells); + page.write_u16(offset::BTREE_CELL_COUNT, new_n_cells); debug_validate_cells!(page, usable_space); Ok(()) } @@ -4668,12 +4679,12 @@ fn allocate_cell_space(page_ref: &PageContent, amount: u16, usable_space: u16) - if gap + 2 + amount > top { // defragment defragment_page(page_ref, usable_space); - top = page_ref.read_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA) as usize; + top = page_ref.read_u16(offset::BTREE_CELL_CONTENT_AREA) as usize; } top -= amount; - page_ref.write_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, top as u16); + page_ref.write_u16(offset::BTREE_CELL_CONTENT_AREA, top as u16); assert!(top + amount <= usable_space as usize); Ok(top as u16) @@ -4832,11 +4843,11 @@ fn drop_cell(page: &mut PageContent, cell_idx: usize, usable_space: u16) -> Resu if page.cell_count() > 1 { shift_pointers_left(page, cell_idx); } else { - page.write_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, usable_space); - page.write_u16(PAGE_HEADER_OFFSET_FIRST_FREEBLOCK, 0); - page.write_u8(PAGE_HEADER_OFFSET_FRAGMENTED_BYTES_COUNT, 0); + page.write_u16(offset::BTREE_CELL_CONTENT_AREA, usable_space); + page.write_u16(offset::BTREE_FIRST_FREEBLOCK, 0); + page.write_u8(offset::BTREE_FRAGMENTED_BYTES_COUNT, 0); } - page.write_u16(PAGE_HEADER_OFFSET_CELL_COUNT, page.cell_count() as u16 - 1); + page.write_u16(offset::BTREE_CELL_COUNT, page.cell_count() as u16 - 1); debug_validate_cells!(page, usable_space); Ok(()) } @@ -5695,7 +5706,7 @@ mod tests { let contents = root_page.get().contents.as_mut().unwrap(); // Set rightmost pointer to page4 - contents.write_u32(PAGE_HEADER_OFFSET_RIGHTMOST_PTR, page4.get().id as u32); + contents.write_u32(offset::BTREE_RIGHTMOST_PTR, page4.get().id as u32); // Create a cell with pointer to page3 let cell_content = vec![ From 761c03f7c5acc312fa89f933184e9b48d0d181b7 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 10 Apr 2025 08:27:55 +0300 Subject: [PATCH 137/425] core/btree: Clean up B-Tree offset comments --- core/storage/btree.rs | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 0e4398ca3..37d092758 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -46,32 +46,37 @@ use super::{ /// 0 1 2 3 4 5 6 7 8 11 /// pub mod offset { - /// type of btree page -> u8 + /// Type of the B-Tree page (u8). pub const BTREE_PAGE_TYPE: usize = 0; - /// pointer to first freeblock -> u16 - /// The second field of the b-tree page header is the offset of the first freeblock, or zero if there are no freeblocks on the page. - /// A freeblock is a structure used to identify unallocated space within a b-tree page. - /// Freeblocks are organized as a chain. + /// A pointer to the first freeblock (u16). /// - /// To be clear, freeblocks do not mean the regular unallocated free space to the left of the cell content area pointer, but instead - /// blocks of at least 4 bytes WITHIN the cell content area that are not in use due to e.g. deletions. + /// This field of the B-Tree page header is an offset to the first freeblock, or zero if + /// there are no freeblocks on the page. A freeblock is a structure used to identify + /// unallocated space within a B-Tree page, organized as a chain. + /// + /// Please note that freeblocks do not mean the regular unallocated free space to the left + /// of the cell content area pointer, but instead blocks of at least 4 + /// bytes WITHIN the cell content area that are not in use due to e.g. + /// deletions. pub const BTREE_FIRST_FREEBLOCK: usize = 1; - /// number of cells in the page -> u16 + /// The number of cells in the page (u16). pub const BTREE_CELL_COUNT: usize = 3; - /// pointer to first byte of cell allocated content from top -> u16 - /// SQLite strives to place cells as far toward the end of the b-tree page as it can, - /// in order to leave space for future growth of the cell pointer array. - /// = the cell content area pointer moves leftward as cells are added to the page + /// A pointer to first byte of cell allocated content from top (u16). + /// + /// SQLite strives to place cells as far toward the end of the b-tree page as it can, in + /// order to leave space for future growth of the cell pointer array. This means that the + /// cell content area pointer moves leftward as cells are added to the page. pub const BTREE_CELL_CONTENT_AREA: usize = 5; - /// number of fragmented bytes -> u8 + /// The number of fragmented bytes (u8). + /// /// Fragments are isolated groups of 1, 2, or 3 unused bytes within the cell content area. pub const BTREE_FRAGMENTED_BYTES_COUNT: usize = 7; - /// if internalnode, pointer right most pointer (saved separately from cells) -> u32 + /// The right-most pointer (saved separately from cells) (u32) pub const BTREE_RIGHTMOST_PTR: usize = 8; } @@ -2904,10 +2909,7 @@ impl BTreeCursor { // set new page type root_contents.write_u8(offset::BTREE_PAGE_TYPE, new_root_page_type); root_contents.write_u32(offset::BTREE_RIGHTMOST_PTR, child.get().id as u32); - root_contents.write_u16( - offset::BTREE_CELL_CONTENT_AREA, - self.usable_space() as u16, - ); + root_contents.write_u16(offset::BTREE_CELL_CONTENT_AREA, self.usable_space() as u16); root_contents.write_u16(offset::BTREE_CELL_COUNT, 0); root_contents.write_u16(offset::BTREE_FIRST_FREEBLOCK, 0); From a7fa7f7c6212ef34b76d69c90fedd279873a0fa6 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 10 Apr 2025 08:39:07 +0300 Subject: [PATCH 138/425] core/btree: Unify debug() tracing --- core/storage/btree.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 37d092758..b56b01ecd 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1,5 +1,3 @@ -use tracing::debug; - use crate::{ storage::{ pager::Pager, @@ -801,7 +799,7 @@ impl BTreeCursor { // end let has_parent = self.stack.current() > 0; if has_parent { - debug!("moving upwards"); + tracing::debug!("moving upwards"); self.going_upwards = true; self.stack.pop(); continue; @@ -1577,7 +1575,7 @@ impl BTreeCursor { // insert let overflow = { let contents = page.get().contents.as_mut().unwrap(); - debug!( + tracing::debug!( "insert_into_page(overflow, cell_count={})", contents.cell_count() ); @@ -1697,7 +1695,7 @@ impl BTreeCursor { let parent_contents = parent_page.get().contents.as_ref().unwrap(); let page_to_balance_idx = self.stack.current_cell_index() as usize; - debug!( + tracing::debug!( "balance_non_root(parent_id={} page_to_balance_idx={})", parent_page.get().id, page_to_balance_idx @@ -4148,7 +4146,7 @@ fn find_free_cell(page_ref: &PageContent, usable_space: u16, amount: usize) -> R pub fn btree_init_page(page: &PageRef, page_type: PageType, offset: usize, usable_space: u16) { // setup btree page let contents = page.get(); - debug!("btree_init_page(id={}, offset={})", contents.id, offset); + tracing::debug!("btree_init_page(id={}, offset={})", contents.id, offset); let contents = contents.contents.as_mut().unwrap(); contents.offset = offset; let id = page_type as u8; @@ -4719,7 +4717,7 @@ fn fill_cell_payload( } let payload_overflow_threshold_max = payload_overflow_threshold_max(page_type, usable_space); - debug!( + tracing::debug!( "fill_cell_payload(record_size={}, payload_overflow_threshold_max={})", record_buf.len(), payload_overflow_threshold_max From 5906d7971a33a059970cf78575b0fc12ab2d270d Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 10 Apr 2025 09:25:15 +0300 Subject: [PATCH 139/425] core/vdbe: Clean up imports --- core/vdbe/execute.rs | 71 ++++++++++++++++++++++++++------------------ core/vdbe/insn.rs | 11 +++---- core/vdbe/mod.rs | 46 +++++++++++++++------------- 3 files changed, 73 insertions(+), 55 deletions(-) diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 5cf2e6cd2..c974b143c 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -1,46 +1,59 @@ #![allow(unused_variables)] -use crate::error::{LimboError, SQLITE_CONSTRAINT, SQLITE_CONSTRAINT_PRIMARYKEY}; -use crate::ext::ExtValue; -use crate::function::{AggFunc, ExtFunc, MathFunc, MathFuncArity, ScalarFunc, VectorFunc}; -use crate::functions::datetime::{ - exec_date, exec_datetime_full, exec_julianday, exec_strftime, exec_time, exec_unixepoch, +use crate::{ + error::{LimboError, SQLITE_CONSTRAINT, SQLITE_CONSTRAINT_PRIMARYKEY}, + ext::ExtValue, + function::{AggFunc, ExtFunc, MathFunc, MathFuncArity, ScalarFunc, VectorFunc}, + functions::{ + datetime::{ + exec_date, exec_datetime_full, exec_julianday, exec_strftime, exec_time, exec_unixepoch, + }, + printf::exec_printf, + }, }; -use crate::functions::printf::exec_printf; use std::{borrow::BorrowMut, rc::Rc}; -use crate::pseudo::PseudoCursor; -use crate::result::LimboResult; +use crate::{pseudo::PseudoCursor, result::LimboResult}; -use crate::schema::{affinity, Affinity}; -use crate::storage::btree::{BTreeCursor, BTreeKey}; +use crate::{ + schema::{affinity, Affinity}, + storage::btree::{BTreeCursor, BTreeKey}, +}; -use crate::storage::wal::CheckpointResult; -use crate::types::{ - AggContext, Cursor, CursorResult, ExternalAggState, OwnedValue, OwnedValueType, SeekKey, SeekOp, +use crate::{ + storage::wal::CheckpointResult, + types::{ + AggContext, Cursor, CursorResult, ExternalAggState, OwnedValue, OwnedValueType, SeekKey, + SeekOp, + }, + util::{ + cast_real_to_integer, cast_text_to_integer, cast_text_to_numeric, cast_text_to_real, + checked_cast_text_to_numeric, parse_schema_rows, RoundToPrecision, + }, + vdbe::{ + builder::CursorType, + insn::{IdxInsertFlags, Insn}, + }, + vector::{vector32, vector64, vector_distance_cos, vector_extract}, }; -use crate::util::{ - cast_real_to_integer, cast_text_to_integer, cast_text_to_numeric, cast_text_to_real, - checked_cast_text_to_numeric, parse_schema_rows, RoundToPrecision, -}; -use crate::vdbe::builder::CursorType; -use crate::vdbe::insn::{IdxInsertFlags, Insn}; -use crate::vector::{vector32, vector64, vector_distance_cos, vector_extract}; use crate::{info, MvCursor, RefValue, Row, StepResult, TransactionState}; -use super::insn::{ - exec_add, exec_and, exec_bit_and, exec_bit_not, exec_bit_or, exec_boolean_not, exec_concat, - exec_divide, exec_multiply, exec_or, exec_remainder, exec_shift_left, exec_shift_right, - exec_subtract, Cookie, RegisterOrLiteral, +use super::{ + insn::{ + exec_add, exec_and, exec_bit_and, exec_bit_not, exec_bit_or, exec_boolean_not, exec_concat, + exec_divide, exec_multiply, exec_or, exec_remainder, exec_shift_left, exec_shift_right, + exec_subtract, Cookie, RegisterOrLiteral, + }, + HaltState, }; -use super::HaltState; use rand::thread_rng; -use super::likeop::{construct_like_escape_arg, exec_glob, exec_like_with_escape}; -use super::sorter::Sorter; +use super::{ + likeop::{construct_like_escape_arg, exec_glob, exec_like_with_escape}, + sorter::Sorter, +}; use regex::{Regex, RegexBuilder}; -use std::cell::RefCell; -use std::collections::HashMap; +use std::{cell::RefCell, collections::HashMap}; #[cfg(feature = "json")] use crate::{ diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 607949efb..c573869cf 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -1,12 +1,13 @@ -use std::num::NonZero; -use std::rc::Rc; +use std::{num::NonZero, rc::Rc}; use super::{ cast_text_to_numeric, execute, AggFunc, BranchOffset, CursorID, FuncCtx, InsnFunction, PageIdx, }; -use crate::schema::BTreeTable; -use crate::storage::wal::CheckpointMode; -use crate::types::{OwnedValue, Record}; +use crate::{ + schema::BTreeTable, + storage::wal::CheckpointMode, + types::{OwnedValue, Record}, +}; use limbo_macros::Description; /// Flags provided to comparison instructions (e.g. Eq, Ne) which determine behavior related to NULL values. diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index cf6918304..fe145b56b 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -24,19 +24,19 @@ pub mod insn; pub mod likeop; pub mod sorter; -use crate::error::LimboError; -use crate::fast_lock::SpinLock; -use crate::function::{AggFunc, FuncCtx}; - -use crate::storage::sqlite3_ondisk::DatabaseHeader; -use crate::storage::{btree::BTreeCursor, pager::Pager}; -use crate::translate::plan::{ResultSetColumn, TableReference}; -use crate::types::{ - AggContext, Cursor, CursorResult, ImmutableRecord, OwnedValue, SeekKey, SeekOp, +use crate::{ + error::LimboError, + fast_lock::SpinLock, + function::{AggFunc, FuncCtx}, +}; + +use crate::{ + storage::{btree::BTreeCursor, pager::Pager, sqlite3_ondisk::DatabaseHeader}, + translate::plan::{ResultSetColumn, TableReference}, + types::{AggContext, Cursor, CursorResult, ImmutableRecord, OwnedValue, SeekKey, SeekOp}, + util::cast_text_to_numeric, + vdbe::{builder::CursorType, insn::Insn}, }; -use crate::util::cast_text_to_numeric; -use crate::vdbe::builder::CursorType; -use crate::vdbe::insn::Insn; use crate::CheckpointStatus; @@ -45,16 +45,20 @@ use crate::json::JsonCacheCell; use crate::{Connection, MvStore, Result, TransactionState}; use execute::{InsnFunction, InsnFunctionStepResult}; -use rand::distributions::{Distribution, Uniform}; -use rand::Rng; +use rand::{ + distributions::{Distribution, Uniform}, + Rng, +}; use regex::Regex; -use std::cell::{Cell, RefCell}; -use std::collections::HashMap; -use std::ffi::c_void; -use std::num::NonZero; -use std::ops::Deref; -use std::rc::{Rc, Weak}; -use std::sync::Arc; +use std::{ + cell::{Cell, RefCell}, + collections::HashMap, + ffi::c_void, + num::NonZero, + ops::Deref, + rc::{Rc, Weak}, + sync::Arc, +}; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] /// Represents a target for a jump instruction. From 3fd51cdf06229aad2e18add1b95c9d3bd8580134 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 10 Apr 2025 09:28:43 +0300 Subject: [PATCH 140/425] core/vdbe: Move Insn implementation close to struct definition --- core/vdbe/insn.rs | 340 +++++++++++++++++++++++----------------------- 1 file changed, 170 insertions(+), 170 deletions(-) diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index c573869cf..cffd587e0 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -816,6 +816,176 @@ pub enum Insn { }, } +impl Insn { + pub fn to_function(&self) -> InsnFunction { + match self { + Insn::Init { .. } => execute::op_init, + + Insn::Null { .. } => execute::op_null, + + Insn::NullRow { .. } => execute::op_null_row, + + Insn::Add { .. } => execute::op_add, + + Insn::Subtract { .. } => execute::op_subtract, + + Insn::Multiply { .. } => execute::op_multiply, + + Insn::Divide { .. } => execute::op_divide, + + Insn::Compare { .. } => execute::op_compare, + Insn::BitAnd { .. } => execute::op_bit_and, + + Insn::BitOr { .. } => execute::op_bit_or, + + Insn::BitNot { .. } => execute::op_bit_not, + + Insn::Checkpoint { .. } => execute::op_checkpoint, + Insn::Remainder { .. } => execute::op_remainder, + + Insn::Jump { .. } => execute::op_jump, + Insn::Move { .. } => execute::op_move, + Insn::IfPos { .. } => execute::op_if_pos, + Insn::NotNull { .. } => execute::op_not_null, + + Insn::Eq { .. } => execute::op_eq, + Insn::Ne { .. } => execute::op_ne, + Insn::Lt { .. } => execute::op_lt, + Insn::Le { .. } => execute::op_le, + Insn::Gt { .. } => execute::op_gt, + Insn::Ge { .. } => execute::op_ge, + Insn::If { .. } => execute::op_if, + Insn::IfNot { .. } => execute::op_if_not, + Insn::OpenReadAsync { .. } => execute::op_open_read_async, + Insn::OpenReadAwait => execute::op_open_read_await, + + Insn::VOpenAsync { .. } => execute::op_vopen_async, + + Insn::VOpenAwait => execute::op_vopen_await, + + Insn::VCreate { .. } => execute::op_vcreate, + Insn::VFilter { .. } => execute::op_vfilter, + Insn::VColumn { .. } => execute::op_vcolumn, + Insn::VUpdate { .. } => execute::op_vupdate, + Insn::VNext { .. } => execute::op_vnext, + Insn::OpenPseudo { .. } => execute::op_open_pseudo, + Insn::RewindAsync { .. } => execute::op_rewind_async, + + Insn::RewindAwait { .. } => execute::op_rewind_await, + Insn::LastAsync { .. } => execute::op_last_async, + + Insn::LastAwait { .. } => execute::op_last_await, + Insn::Column { .. } => execute::op_column, + Insn::TypeCheck { .. } => execute::op_type_check, + Insn::MakeRecord { .. } => execute::op_make_record, + Insn::ResultRow { .. } => execute::op_result_row, + + Insn::NextAsync { .. } => execute::op_next_async, + + Insn::NextAwait { .. } => execute::op_next_await, + Insn::PrevAsync { .. } => execute::op_prev_async, + + Insn::PrevAwait { .. } => execute::op_prev_await, + Insn::Halt { .. } => execute::op_halt, + Insn::Transaction { .. } => execute::op_transaction, + + Insn::AutoCommit { .. } => execute::op_auto_commit, + Insn::Goto { .. } => execute::op_goto, + + Insn::Gosub { .. } => execute::op_gosub, + Insn::Return { .. } => execute::op_return, + + Insn::Integer { .. } => execute::op_integer, + + Insn::Real { .. } => execute::op_real, + + Insn::RealAffinity { .. } => execute::op_real_affinity, + + Insn::String8 { .. } => execute::op_string8, + + Insn::Blob { .. } => execute::op_blob, + + Insn::RowId { .. } => execute::op_row_id, + + Insn::SeekRowid { .. } => execute::op_seek_rowid, + Insn::DeferredSeek { .. } => execute::op_deferred_seek, + Insn::SeekGE { .. } => execute::op_seek, + Insn::SeekGT { .. } => execute::op_seek, + Insn::SeekLE { .. } => execute::op_seek, + Insn::SeekLT { .. } => execute::op_seek, + Insn::SeekEnd { .. } => execute::op_seek_end, + Insn::IdxGE { .. } => execute::op_idx_ge, + Insn::IdxGT { .. } => execute::op_idx_gt, + Insn::IdxLE { .. } => execute::op_idx_le, + Insn::IdxLT { .. } => execute::op_idx_lt, + Insn::DecrJumpZero { .. } => execute::op_decr_jump_zero, + + Insn::AggStep { .. } => execute::op_agg_step, + Insn::AggFinal { .. } => execute::op_agg_final, + + Insn::SorterOpen { .. } => execute::op_sorter_open, + Insn::SorterInsert { .. } => execute::op_sorter_insert, + Insn::SorterSort { .. } => execute::op_sorter_sort, + Insn::SorterData { .. } => execute::op_sorter_data, + Insn::SorterNext { .. } => execute::op_sorter_next, + Insn::Function { .. } => execute::op_function, + Insn::InitCoroutine { .. } => execute::op_init_coroutine, + Insn::EndCoroutine { .. } => execute::op_end_coroutine, + + Insn::Yield { .. } => execute::op_yield, + Insn::InsertAsync { .. } => execute::op_insert_async, + Insn::InsertAwait { .. } => execute::op_insert_await, + Insn::IdxInsertAsync { .. } => execute::op_idx_insert_async, + Insn::IdxInsertAwait { .. } => execute::op_idx_insert_await, + Insn::DeleteAsync { .. } => execute::op_delete_async, + + Insn::DeleteAwait { .. } => execute::op_delete_await, + + Insn::NewRowid { .. } => execute::op_new_rowid, + Insn::MustBeInt { .. } => execute::op_must_be_int, + + Insn::SoftNull { .. } => execute::op_soft_null, + + Insn::NotExists { .. } => execute::op_not_exists, + Insn::OffsetLimit { .. } => execute::op_offset_limit, + Insn::OpenWriteAsync { .. } => execute::op_open_write_async, + Insn::OpenWriteAwait { .. } => execute::op_open_write_await, + + Insn::Copy { .. } => execute::op_copy, + Insn::CreateBtree { .. } => execute::op_create_btree, + + Insn::Destroy { .. } => execute::op_destroy, + Insn::DropTable { .. } => execute::op_drop_table, + Insn::Close { .. } => execute::op_close, + + Insn::IsNull { .. } => execute::op_is_null, + + Insn::ParseSchema { .. } => execute::op_parse_schema, + + Insn::ShiftRight { .. } => execute::op_shift_right, + + Insn::ShiftLeft { .. } => execute::op_shift_left, + + Insn::Variable { .. } => execute::op_variable, + + Insn::ZeroOrNull { .. } => execute::op_zero_or_null, + + Insn::Not { .. } => execute::op_not, + + Insn::Concat { .. } => execute::op_concat, + + Insn::And { .. } => execute::op_and, + + Insn::Or { .. } => execute::op_or, + + Insn::Noop => execute::op_noop, + Insn::PageCount { .. } => execute::op_page_count, + + Insn::ReadCookie { .. } => execute::op_read_cookie, + } + } +} + // TODO: Add remaining cookies. #[derive(Description, Debug, Clone, Copy)] pub enum Cookie { @@ -1250,176 +1420,6 @@ pub fn exec_or(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { } } -impl Insn { - pub fn to_function(&self) -> InsnFunction { - match self { - Insn::Init { .. } => execute::op_init, - - Insn::Null { .. } => execute::op_null, - - Insn::NullRow { .. } => execute::op_null_row, - - Insn::Add { .. } => execute::op_add, - - Insn::Subtract { .. } => execute::op_subtract, - - Insn::Multiply { .. } => execute::op_multiply, - - Insn::Divide { .. } => execute::op_divide, - - Insn::Compare { .. } => execute::op_compare, - Insn::BitAnd { .. } => execute::op_bit_and, - - Insn::BitOr { .. } => execute::op_bit_or, - - Insn::BitNot { .. } => execute::op_bit_not, - - Insn::Checkpoint { .. } => execute::op_checkpoint, - Insn::Remainder { .. } => execute::op_remainder, - - Insn::Jump { .. } => execute::op_jump, - Insn::Move { .. } => execute::op_move, - Insn::IfPos { .. } => execute::op_if_pos, - Insn::NotNull { .. } => execute::op_not_null, - - Insn::Eq { .. } => execute::op_eq, - Insn::Ne { .. } => execute::op_ne, - Insn::Lt { .. } => execute::op_lt, - Insn::Le { .. } => execute::op_le, - Insn::Gt { .. } => execute::op_gt, - Insn::Ge { .. } => execute::op_ge, - Insn::If { .. } => execute::op_if, - Insn::IfNot { .. } => execute::op_if_not, - Insn::OpenReadAsync { .. } => execute::op_open_read_async, - Insn::OpenReadAwait => execute::op_open_read_await, - - Insn::VOpenAsync { .. } => execute::op_vopen_async, - - Insn::VOpenAwait => execute::op_vopen_await, - - Insn::VCreate { .. } => execute::op_vcreate, - Insn::VFilter { .. } => execute::op_vfilter, - Insn::VColumn { .. } => execute::op_vcolumn, - Insn::VUpdate { .. } => execute::op_vupdate, - Insn::VNext { .. } => execute::op_vnext, - Insn::OpenPseudo { .. } => execute::op_open_pseudo, - Insn::RewindAsync { .. } => execute::op_rewind_async, - - Insn::RewindAwait { .. } => execute::op_rewind_await, - Insn::LastAsync { .. } => execute::op_last_async, - - Insn::LastAwait { .. } => execute::op_last_await, - Insn::Column { .. } => execute::op_column, - Insn::TypeCheck { .. } => execute::op_type_check, - Insn::MakeRecord { .. } => execute::op_make_record, - Insn::ResultRow { .. } => execute::op_result_row, - - Insn::NextAsync { .. } => execute::op_next_async, - - Insn::NextAwait { .. } => execute::op_next_await, - Insn::PrevAsync { .. } => execute::op_prev_async, - - Insn::PrevAwait { .. } => execute::op_prev_await, - Insn::Halt { .. } => execute::op_halt, - Insn::Transaction { .. } => execute::op_transaction, - - Insn::AutoCommit { .. } => execute::op_auto_commit, - Insn::Goto { .. } => execute::op_goto, - - Insn::Gosub { .. } => execute::op_gosub, - Insn::Return { .. } => execute::op_return, - - Insn::Integer { .. } => execute::op_integer, - - Insn::Real { .. } => execute::op_real, - - Insn::RealAffinity { .. } => execute::op_real_affinity, - - Insn::String8 { .. } => execute::op_string8, - - Insn::Blob { .. } => execute::op_blob, - - Insn::RowId { .. } => execute::op_row_id, - - Insn::SeekRowid { .. } => execute::op_seek_rowid, - Insn::DeferredSeek { .. } => execute::op_deferred_seek, - Insn::SeekGE { .. } => execute::op_seek, - Insn::SeekGT { .. } => execute::op_seek, - Insn::SeekLE { .. } => execute::op_seek, - Insn::SeekLT { .. } => execute::op_seek, - Insn::SeekEnd { .. } => execute::op_seek_end, - Insn::IdxGE { .. } => execute::op_idx_ge, - Insn::IdxGT { .. } => execute::op_idx_gt, - Insn::IdxLE { .. } => execute::op_idx_le, - Insn::IdxLT { .. } => execute::op_idx_lt, - Insn::DecrJumpZero { .. } => execute::op_decr_jump_zero, - - Insn::AggStep { .. } => execute::op_agg_step, - Insn::AggFinal { .. } => execute::op_agg_final, - - Insn::SorterOpen { .. } => execute::op_sorter_open, - Insn::SorterInsert { .. } => execute::op_sorter_insert, - Insn::SorterSort { .. } => execute::op_sorter_sort, - Insn::SorterData { .. } => execute::op_sorter_data, - Insn::SorterNext { .. } => execute::op_sorter_next, - Insn::Function { .. } => execute::op_function, - Insn::InitCoroutine { .. } => execute::op_init_coroutine, - Insn::EndCoroutine { .. } => execute::op_end_coroutine, - - Insn::Yield { .. } => execute::op_yield, - Insn::InsertAsync { .. } => execute::op_insert_async, - Insn::InsertAwait { .. } => execute::op_insert_await, - Insn::IdxInsertAsync { .. } => execute::op_idx_insert_async, - Insn::IdxInsertAwait { .. } => execute::op_idx_insert_await, - Insn::DeleteAsync { .. } => execute::op_delete_async, - - Insn::DeleteAwait { .. } => execute::op_delete_await, - - Insn::NewRowid { .. } => execute::op_new_rowid, - Insn::MustBeInt { .. } => execute::op_must_be_int, - - Insn::SoftNull { .. } => execute::op_soft_null, - - Insn::NotExists { .. } => execute::op_not_exists, - Insn::OffsetLimit { .. } => execute::op_offset_limit, - Insn::OpenWriteAsync { .. } => execute::op_open_write_async, - Insn::OpenWriteAwait { .. } => execute::op_open_write_await, - - Insn::Copy { .. } => execute::op_copy, - Insn::CreateBtree { .. } => execute::op_create_btree, - - Insn::Destroy { .. } => execute::op_destroy, - Insn::DropTable { .. } => execute::op_drop_table, - Insn::Close { .. } => execute::op_close, - - Insn::IsNull { .. } => execute::op_is_null, - - Insn::ParseSchema { .. } => execute::op_parse_schema, - - Insn::ShiftRight { .. } => execute::op_shift_right, - - Insn::ShiftLeft { .. } => execute::op_shift_left, - - Insn::Variable { .. } => execute::op_variable, - - Insn::ZeroOrNull { .. } => execute::op_zero_or_null, - - Insn::Not { .. } => execute::op_not, - - Insn::Concat { .. } => execute::op_concat, - - Insn::And { .. } => execute::op_and, - - Insn::Or { .. } => execute::op_or, - - Insn::Noop => execute::op_noop, - Insn::PageCount { .. } => execute::op_page_count, - - Insn::ReadCookie { .. } => execute::op_read_cookie, - } - } -} - #[cfg(test)] mod tests { use crate::{ From 31f0d174d76b4015aafb15ff61ffc91674c9a7b9 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 10 Apr 2025 09:31:40 +0300 Subject: [PATCH 141/425] core/vdbe: Move `exec_*()` funtions to execute.rs --- core/vdbe/execute.rs | 888 +++++++++++++++++++++++++++++++++++++++++- core/vdbe/insn.rs | 896 +------------------------------------------ core/vdbe/mod.rs | 1 - 3 files changed, 884 insertions(+), 901 deletions(-) diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index c974b143c..2b71ee716 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -39,11 +39,7 @@ use crate::{ use crate::{info, MvCursor, RefValue, Row, StepResult, TransactionState}; use super::{ - insn::{ - exec_add, exec_and, exec_bit_and, exec_bit_not, exec_bit_or, exec_boolean_not, exec_concat, - exec_divide, exec_multiply, exec_or, exec_remainder, exec_shift_left, exec_shift_right, - exec_subtract, Cookie, RegisterOrLiteral, - }, + insn::{Cookie, RegisterOrLiteral}, HaltState, }; use rand::thread_rng; @@ -5368,8 +5364,888 @@ fn exec_likely(reg: &OwnedValue) -> OwnedValue { reg.clone() } +pub fn exec_add(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { + let result = match (lhs, rhs) { + (OwnedValue::Integer(lhs), OwnedValue::Integer(rhs)) => { + let result = lhs.overflowing_add(*rhs); + if result.1 { + OwnedValue::Float(*lhs as f64 + *rhs as f64) + } else { + OwnedValue::Integer(result.0) + } + } + (OwnedValue::Float(lhs), OwnedValue::Float(rhs)) => OwnedValue::Float(lhs + rhs), + (OwnedValue::Float(f), OwnedValue::Integer(i)) + | (OwnedValue::Integer(i), OwnedValue::Float(f)) => OwnedValue::Float(*f + *i as f64), + (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, + (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_add( + &cast_text_to_numeric(lhs.as_str()), + &cast_text_to_numeric(rhs.as_str()), + ), + (OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => { + exec_add(&cast_text_to_numeric(text.as_str()), other) + } + _ => todo!(), + }; + match result { + OwnedValue::Float(f) if f.is_nan() => OwnedValue::Null, + _ => result, + } +} + +pub fn exec_subtract(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { + let result = match (lhs, rhs) { + (OwnedValue::Integer(lhs), OwnedValue::Integer(rhs)) => { + let result = lhs.overflowing_sub(*rhs); + if result.1 { + OwnedValue::Float(*lhs as f64 - *rhs as f64) + } else { + OwnedValue::Integer(result.0) + } + } + (OwnedValue::Float(lhs), OwnedValue::Float(rhs)) => OwnedValue::Float(lhs - rhs), + (OwnedValue::Float(lhs), OwnedValue::Integer(rhs)) => OwnedValue::Float(lhs - *rhs as f64), + (OwnedValue::Integer(lhs), OwnedValue::Float(rhs)) => OwnedValue::Float(*lhs as f64 - rhs), + (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, + (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_subtract( + &cast_text_to_numeric(lhs.as_str()), + &cast_text_to_numeric(rhs.as_str()), + ), + (OwnedValue::Text(text), other) => { + exec_subtract(&cast_text_to_numeric(text.as_str()), other) + } + (other, OwnedValue::Text(text)) => { + exec_subtract(other, &cast_text_to_numeric(text.as_str())) + } + _ => todo!(), + }; + match result { + OwnedValue::Float(f) if f.is_nan() => OwnedValue::Null, + _ => result, + } +} + +pub fn exec_multiply(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { + let result = match (lhs, rhs) { + (OwnedValue::Integer(lhs), OwnedValue::Integer(rhs)) => { + let result = lhs.overflowing_mul(*rhs); + if result.1 { + OwnedValue::Float(*lhs as f64 * *rhs as f64) + } else { + OwnedValue::Integer(result.0) + } + } + (OwnedValue::Float(lhs), OwnedValue::Float(rhs)) => OwnedValue::Float(lhs * rhs), + (OwnedValue::Integer(i), OwnedValue::Float(f)) + | (OwnedValue::Float(f), OwnedValue::Integer(i)) => OwnedValue::Float(*i as f64 * { *f }), + (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, + (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_multiply( + &cast_text_to_numeric(lhs.as_str()), + &cast_text_to_numeric(rhs.as_str()), + ), + (OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => { + exec_multiply(&cast_text_to_numeric(text.as_str()), other) + } + + _ => todo!(), + }; + match result { + OwnedValue::Float(f) if f.is_nan() => OwnedValue::Null, + _ => result, + } +} + +pub fn exec_divide(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { + let result = match (lhs, rhs) { + (_, OwnedValue::Integer(0)) | (_, OwnedValue::Float(0.0)) => OwnedValue::Null, + (OwnedValue::Integer(lhs), OwnedValue::Integer(rhs)) => { + let result = lhs.overflowing_div(*rhs); + if result.1 { + OwnedValue::Float(*lhs as f64 / *rhs as f64) + } else { + OwnedValue::Integer(result.0) + } + } + (OwnedValue::Float(lhs), OwnedValue::Float(rhs)) => OwnedValue::Float(lhs / rhs), + (OwnedValue::Float(lhs), OwnedValue::Integer(rhs)) => OwnedValue::Float(lhs / *rhs as f64), + (OwnedValue::Integer(lhs), OwnedValue::Float(rhs)) => OwnedValue::Float(*lhs as f64 / rhs), + (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, + (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_divide( + &cast_text_to_numeric(lhs.as_str()), + &cast_text_to_numeric(rhs.as_str()), + ), + (OwnedValue::Text(text), other) => exec_divide(&cast_text_to_numeric(text.as_str()), other), + (other, OwnedValue::Text(text)) => exec_divide(other, &cast_text_to_numeric(text.as_str())), + _ => todo!(), + }; + match result { + OwnedValue::Float(f) if f.is_nan() => OwnedValue::Null, + _ => result, + } +} + +pub fn exec_bit_and(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { + match (lhs, rhs) { + (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, + (_, OwnedValue::Integer(0)) + | (OwnedValue::Integer(0), _) + | (_, OwnedValue::Float(0.0)) + | (OwnedValue::Float(0.0), _) => OwnedValue::Integer(0), + (OwnedValue::Integer(lh), OwnedValue::Integer(rh)) => OwnedValue::Integer(lh & rh), + (OwnedValue::Float(lh), OwnedValue::Float(rh)) => { + OwnedValue::Integer(*lh as i64 & *rh as i64) + } + (OwnedValue::Float(lh), OwnedValue::Integer(rh)) => OwnedValue::Integer(*lh as i64 & rh), + (OwnedValue::Integer(lh), OwnedValue::Float(rh)) => OwnedValue::Integer(lh & *rh as i64), + (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_bit_and( + &cast_text_to_numeric(lhs.as_str()), + &cast_text_to_numeric(rhs.as_str()), + ), + (OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => { + exec_bit_and(&cast_text_to_numeric(text.as_str()), other) + } + _ => todo!(), + } +} + +pub fn exec_bit_or(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { + match (lhs, rhs) { + (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, + (OwnedValue::Integer(lh), OwnedValue::Integer(rh)) => OwnedValue::Integer(lh | rh), + (OwnedValue::Float(lh), OwnedValue::Integer(rh)) => OwnedValue::Integer(*lh as i64 | rh), + (OwnedValue::Integer(lh), OwnedValue::Float(rh)) => OwnedValue::Integer(lh | *rh as i64), + (OwnedValue::Float(lh), OwnedValue::Float(rh)) => { + OwnedValue::Integer(*lh as i64 | *rh as i64) + } + (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_bit_or( + &cast_text_to_numeric(lhs.as_str()), + &cast_text_to_numeric(rhs.as_str()), + ), + (OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => { + exec_bit_or(&cast_text_to_numeric(text.as_str()), other) + } + _ => todo!(), + } +} + +pub fn exec_remainder(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { + match (lhs, rhs) { + (OwnedValue::Null, _) + | (_, OwnedValue::Null) + | (_, OwnedValue::Integer(0)) + | (_, OwnedValue::Float(0.0)) => OwnedValue::Null, + (OwnedValue::Integer(lhs), OwnedValue::Integer(rhs)) => { + if rhs == &0 { + OwnedValue::Null + } else { + OwnedValue::Integer(lhs % rhs.abs()) + } + } + (OwnedValue::Float(lhs), OwnedValue::Float(rhs)) => { + let rhs_int = *rhs as i64; + if rhs_int == 0 { + OwnedValue::Null + } else { + OwnedValue::Float(((*lhs as i64) % rhs_int.abs()) as f64) + } + } + (OwnedValue::Float(lhs), OwnedValue::Integer(rhs)) => { + if rhs == &0 { + OwnedValue::Null + } else { + OwnedValue::Float(((*lhs as i64) % rhs.abs()) as f64) + } + } + (OwnedValue::Integer(lhs), OwnedValue::Float(rhs)) => { + let rhs_int = *rhs as i64; + if rhs_int == 0 { + OwnedValue::Null + } else { + OwnedValue::Float((lhs % rhs_int.abs()) as f64) + } + } + (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_remainder( + &cast_text_to_numeric(lhs.as_str()), + &cast_text_to_numeric(rhs.as_str()), + ), + (OwnedValue::Text(text), other) => { + exec_remainder(&cast_text_to_numeric(text.as_str()), other) + } + (other, OwnedValue::Text(text)) => { + exec_remainder(other, &cast_text_to_numeric(text.as_str())) + } + other => todo!("remainder not implemented for: {:?} {:?}", lhs, other), + } +} + +pub fn exec_bit_not(reg: &OwnedValue) -> OwnedValue { + match reg { + OwnedValue::Null => OwnedValue::Null, + OwnedValue::Integer(i) => OwnedValue::Integer(!i), + OwnedValue::Float(f) => OwnedValue::Integer(!(*f as i64)), + OwnedValue::Text(text) => exec_bit_not(&cast_text_to_numeric(text.as_str())), + _ => todo!(), + } +} + +pub fn exec_shift_left(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { + match (lhs, rhs) { + (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, + (OwnedValue::Integer(lh), OwnedValue::Integer(rh)) => { + OwnedValue::Integer(compute_shl(*lh, *rh)) + } + (OwnedValue::Float(lh), OwnedValue::Integer(rh)) => { + OwnedValue::Integer(compute_shl(*lh as i64, *rh)) + } + (OwnedValue::Integer(lh), OwnedValue::Float(rh)) => { + OwnedValue::Integer(compute_shl(*lh, *rh as i64)) + } + (OwnedValue::Float(lh), OwnedValue::Float(rh)) => { + OwnedValue::Integer(compute_shl(*lh as i64, *rh as i64)) + } + (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_shift_left( + &cast_text_to_numeric(lhs.as_str()), + &cast_text_to_numeric(rhs.as_str()), + ), + (OwnedValue::Text(text), other) => { + exec_shift_left(&cast_text_to_numeric(text.as_str()), other) + } + (other, OwnedValue::Text(text)) => { + exec_shift_left(other, &cast_text_to_numeric(text.as_str())) + } + _ => todo!(), + } +} + +fn compute_shl(lhs: i64, rhs: i64) -> i64 { + if rhs == 0 { + lhs + } else if rhs > 0 { + // for positive shifts, if it's too large return 0 + if rhs >= 64 { + 0 + } else { + lhs << rhs + } + } else { + // for negative shifts, check if it's i64::MIN to avoid overflow on negation + if rhs == i64::MIN || rhs <= -64 { + if lhs < 0 { + -1 + } else { + 0 + } + } else { + lhs >> (-rhs) + } + } +} + +pub fn exec_shift_right(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { + match (lhs, rhs) { + (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, + (OwnedValue::Integer(lh), OwnedValue::Integer(rh)) => { + OwnedValue::Integer(compute_shr(*lh, *rh)) + } + (OwnedValue::Float(lh), OwnedValue::Integer(rh)) => { + OwnedValue::Integer(compute_shr(*lh as i64, *rh)) + } + (OwnedValue::Integer(lh), OwnedValue::Float(rh)) => { + OwnedValue::Integer(compute_shr(*lh, *rh as i64)) + } + (OwnedValue::Float(lh), OwnedValue::Float(rh)) => { + OwnedValue::Integer(compute_shr(*lh as i64, *rh as i64)) + } + (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_shift_right( + &cast_text_to_numeric(lhs.as_str()), + &cast_text_to_numeric(rhs.as_str()), + ), + (OwnedValue::Text(text), other) => { + exec_shift_right(&cast_text_to_numeric(text.as_str()), other) + } + (other, OwnedValue::Text(text)) => { + exec_shift_right(other, &cast_text_to_numeric(text.as_str())) + } + _ => todo!(), + } +} + +// compute binary shift to the right if rhs >= 0 and binary shift to the left - if rhs < 0 +// note, that binary shift to the right is sign-extended +fn compute_shr(lhs: i64, rhs: i64) -> i64 { + if rhs == 0 { + lhs + } else if rhs > 0 { + // for positive right shifts + if rhs >= 64 { + if lhs < 0 { + -1 + } else { + 0 + } + } else { + lhs >> rhs + } + } else { + // for negative right shifts, check if it's i64::MIN to avoid overflow + if rhs == i64::MIN || -rhs >= 64 { + 0 + } else { + lhs << (-rhs) + } + } +} + +pub fn exec_boolean_not(reg: &OwnedValue) -> OwnedValue { + match reg { + OwnedValue::Null => OwnedValue::Null, + OwnedValue::Integer(i) => OwnedValue::Integer((*i == 0) as i64), + OwnedValue::Float(f) => OwnedValue::Integer((*f == 0.0) as i64), + OwnedValue::Text(text) => exec_boolean_not(&cast_text_to_numeric(text.as_str())), + _ => todo!(), + } +} +pub fn exec_concat(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { + match (lhs, rhs) { + (OwnedValue::Text(lhs_text), OwnedValue::Text(rhs_text)) => { + OwnedValue::build_text(&(lhs_text.as_str().to_string() + rhs_text.as_str())) + } + (OwnedValue::Text(lhs_text), OwnedValue::Integer(rhs_int)) => { + OwnedValue::build_text(&(lhs_text.as_str().to_string() + &rhs_int.to_string())) + } + (OwnedValue::Text(lhs_text), OwnedValue::Float(rhs_float)) => { + OwnedValue::build_text(&(lhs_text.as_str().to_string() + &rhs_float.to_string())) + } + (OwnedValue::Integer(lhs_int), OwnedValue::Text(rhs_text)) => { + OwnedValue::build_text(&(lhs_int.to_string() + rhs_text.as_str())) + } + (OwnedValue::Integer(lhs_int), OwnedValue::Integer(rhs_int)) => { + OwnedValue::build_text(&(lhs_int.to_string() + &rhs_int.to_string())) + } + (OwnedValue::Integer(lhs_int), OwnedValue::Float(rhs_float)) => { + OwnedValue::build_text(&(lhs_int.to_string() + &rhs_float.to_string())) + } + (OwnedValue::Float(lhs_float), OwnedValue::Text(rhs_text)) => { + OwnedValue::build_text(&(lhs_float.to_string() + rhs_text.as_str())) + } + (OwnedValue::Float(lhs_float), OwnedValue::Integer(rhs_int)) => { + OwnedValue::build_text(&(lhs_float.to_string() + &rhs_int.to_string())) + } + (OwnedValue::Float(lhs_float), OwnedValue::Float(rhs_float)) => { + OwnedValue::build_text(&(lhs_float.to_string() + &rhs_float.to_string())) + } + (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, + (OwnedValue::Blob(_), _) | (_, OwnedValue::Blob(_)) => { + todo!("TODO: Handle Blob conversion to String") + } + } +} + +pub fn exec_and(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { + match (lhs, rhs) { + (_, OwnedValue::Integer(0)) + | (OwnedValue::Integer(0), _) + | (_, OwnedValue::Float(0.0)) + | (OwnedValue::Float(0.0), _) => OwnedValue::Integer(0), + (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, + (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_and( + &cast_text_to_numeric(lhs.as_str()), + &cast_text_to_numeric(rhs.as_str()), + ), + (OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => { + exec_and(&cast_text_to_numeric(text.as_str()), other) + } + _ => OwnedValue::Integer(1), + } +} + +pub fn exec_or(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { + match (lhs, rhs) { + (OwnedValue::Null, OwnedValue::Null) + | (OwnedValue::Null, OwnedValue::Float(0.0)) + | (OwnedValue::Float(0.0), OwnedValue::Null) + | (OwnedValue::Null, OwnedValue::Integer(0)) + | (OwnedValue::Integer(0), OwnedValue::Null) => OwnedValue::Null, + (OwnedValue::Float(0.0), OwnedValue::Integer(0)) + | (OwnedValue::Integer(0), OwnedValue::Float(0.0)) + | (OwnedValue::Float(0.0), OwnedValue::Float(0.0)) + | (OwnedValue::Integer(0), OwnedValue::Integer(0)) => OwnedValue::Integer(0), + (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_or( + &cast_text_to_numeric(lhs.as_str()), + &cast_text_to_numeric(rhs.as_str()), + ), + (OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => { + exec_or(&cast_text_to_numeric(text.as_str()), other) + } + _ => OwnedValue::Integer(1), + } +} + #[cfg(test)] mod tests { + use crate::types::{OwnedValue, Text}; + + use super::{exec_add, exec_or}; + + #[test] + fn test_exec_add() { + let inputs = vec![ + (OwnedValue::Integer(3), OwnedValue::Integer(1)), + (OwnedValue::Float(3.0), OwnedValue::Float(1.0)), + (OwnedValue::Float(3.0), OwnedValue::Integer(1)), + (OwnedValue::Integer(3), OwnedValue::Float(1.0)), + (OwnedValue::Null, OwnedValue::Null), + (OwnedValue::Null, OwnedValue::Integer(1)), + (OwnedValue::Null, OwnedValue::Float(1.0)), + (OwnedValue::Null, OwnedValue::Text(Text::from_str("2"))), + (OwnedValue::Integer(1), OwnedValue::Null), + (OwnedValue::Float(1.0), OwnedValue::Null), + (OwnedValue::Text(Text::from_str("1")), OwnedValue::Null), + ( + OwnedValue::Text(Text::from_str("1")), + OwnedValue::Text(Text::from_str("3")), + ), + ( + OwnedValue::Text(Text::from_str("1.0")), + OwnedValue::Text(Text::from_str("3.0")), + ), + ( + OwnedValue::Text(Text::from_str("1.0")), + OwnedValue::Float(3.0), + ), + ( + OwnedValue::Text(Text::from_str("1.0")), + OwnedValue::Integer(3), + ), + ( + OwnedValue::Float(1.0), + OwnedValue::Text(Text::from_str("3.0")), + ), + ( + OwnedValue::Integer(1), + OwnedValue::Text(Text::from_str("3")), + ), + ]; + + let outputs = [ + OwnedValue::Integer(4), + OwnedValue::Float(4.0), + OwnedValue::Float(4.0), + OwnedValue::Float(4.0), + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Integer(4), + OwnedValue::Float(4.0), + OwnedValue::Float(4.0), + OwnedValue::Float(4.0), + OwnedValue::Float(4.0), + OwnedValue::Float(4.0), + ]; + + assert_eq!( + inputs.len(), + outputs.len(), + "Inputs and Outputs should have same size" + ); + for (i, (lhs, rhs)) in inputs.iter().enumerate() { + assert_eq!( + exec_add(lhs, rhs), + outputs[i], + "Wrong ADD for lhs: {}, rhs: {}", + lhs, + rhs + ); + } + } + + use super::exec_subtract; + + #[test] + fn test_exec_subtract() { + let inputs = vec![ + (OwnedValue::Integer(3), OwnedValue::Integer(1)), + (OwnedValue::Float(3.0), OwnedValue::Float(1.0)), + (OwnedValue::Float(3.0), OwnedValue::Integer(1)), + (OwnedValue::Integer(3), OwnedValue::Float(1.0)), + (OwnedValue::Null, OwnedValue::Null), + (OwnedValue::Null, OwnedValue::Integer(1)), + (OwnedValue::Null, OwnedValue::Float(1.0)), + (OwnedValue::Null, OwnedValue::Text(Text::from_str("1"))), + (OwnedValue::Integer(1), OwnedValue::Null), + (OwnedValue::Float(1.0), OwnedValue::Null), + (OwnedValue::Text(Text::from_str("4")), OwnedValue::Null), + ( + OwnedValue::Text(Text::from_str("1")), + OwnedValue::Text(Text::from_str("3")), + ), + ( + OwnedValue::Text(Text::from_str("1.0")), + OwnedValue::Text(Text::from_str("3.0")), + ), + ( + OwnedValue::Text(Text::from_str("1.0")), + OwnedValue::Float(3.0), + ), + ( + OwnedValue::Text(Text::from_str("1.0")), + OwnedValue::Integer(3), + ), + ( + OwnedValue::Float(1.0), + OwnedValue::Text(Text::from_str("3.0")), + ), + ( + OwnedValue::Integer(1), + OwnedValue::Text(Text::from_str("3")), + ), + ]; + + let outputs = [ + OwnedValue::Integer(2), + OwnedValue::Float(2.0), + OwnedValue::Float(2.0), + OwnedValue::Float(2.0), + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Integer(-2), + OwnedValue::Float(-2.0), + OwnedValue::Float(-2.0), + OwnedValue::Float(-2.0), + OwnedValue::Float(-2.0), + OwnedValue::Float(-2.0), + ]; + + assert_eq!( + inputs.len(), + outputs.len(), + "Inputs and Outputs should have same size" + ); + for (i, (lhs, rhs)) in inputs.iter().enumerate() { + assert_eq!( + exec_subtract(lhs, rhs), + outputs[i], + "Wrong subtract for lhs: {}, rhs: {}", + lhs, + rhs + ); + } + } + use super::exec_multiply; + + #[test] + fn test_exec_multiply() { + let inputs = vec![ + (OwnedValue::Integer(3), OwnedValue::Integer(2)), + (OwnedValue::Float(3.0), OwnedValue::Float(2.0)), + (OwnedValue::Float(3.0), OwnedValue::Integer(2)), + (OwnedValue::Integer(3), OwnedValue::Float(2.0)), + (OwnedValue::Null, OwnedValue::Null), + (OwnedValue::Null, OwnedValue::Integer(1)), + (OwnedValue::Null, OwnedValue::Float(1.0)), + (OwnedValue::Null, OwnedValue::Text(Text::from_str("1"))), + (OwnedValue::Integer(1), OwnedValue::Null), + (OwnedValue::Float(1.0), OwnedValue::Null), + (OwnedValue::Text(Text::from_str("4")), OwnedValue::Null), + ( + OwnedValue::Text(Text::from_str("2")), + OwnedValue::Text(Text::from_str("3")), + ), + ( + OwnedValue::Text(Text::from_str("2.0")), + OwnedValue::Text(Text::from_str("3.0")), + ), + ( + OwnedValue::Text(Text::from_str("2.0")), + OwnedValue::Float(3.0), + ), + ( + OwnedValue::Text(Text::from_str("2.0")), + OwnedValue::Integer(3), + ), + ( + OwnedValue::Float(2.0), + OwnedValue::Text(Text::from_str("3.0")), + ), + ( + OwnedValue::Integer(2), + OwnedValue::Text(Text::from_str("3.0")), + ), + ]; + + let outputs = [ + OwnedValue::Integer(6), + OwnedValue::Float(6.0), + OwnedValue::Float(6.0), + OwnedValue::Float(6.0), + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Integer(6), + OwnedValue::Float(6.0), + OwnedValue::Float(6.0), + OwnedValue::Float(6.0), + OwnedValue::Float(6.0), + OwnedValue::Float(6.0), + ]; + + assert_eq!( + inputs.len(), + outputs.len(), + "Inputs and Outputs should have same size" + ); + for (i, (lhs, rhs)) in inputs.iter().enumerate() { + assert_eq!( + exec_multiply(lhs, rhs), + outputs[i], + "Wrong multiply for lhs: {}, rhs: {}", + lhs, + rhs + ); + } + } + use super::exec_divide; + + #[test] + fn test_exec_divide() { + let inputs = vec![ + (OwnedValue::Integer(1), OwnedValue::Integer(0)), + (OwnedValue::Float(1.0), OwnedValue::Float(0.0)), + (OwnedValue::Integer(i64::MIN), OwnedValue::Integer(-1)), + (OwnedValue::Float(6.0), OwnedValue::Float(2.0)), + (OwnedValue::Float(6.0), OwnedValue::Integer(2)), + (OwnedValue::Integer(6), OwnedValue::Integer(2)), + (OwnedValue::Null, OwnedValue::Integer(2)), + (OwnedValue::Integer(2), OwnedValue::Null), + (OwnedValue::Null, OwnedValue::Null), + ( + OwnedValue::Text(Text::from_str("6")), + OwnedValue::Text(Text::from_str("2")), + ), + ( + OwnedValue::Text(Text::from_str("6")), + OwnedValue::Integer(2), + ), + ]; + + let outputs = [ + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Float(9.223372036854776e18), + OwnedValue::Float(3.0), + OwnedValue::Float(3.0), + OwnedValue::Float(3.0), + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Float(3.0), + OwnedValue::Float(3.0), + ]; + + assert_eq!( + inputs.len(), + outputs.len(), + "Inputs and Outputs should have same size" + ); + for (i, (lhs, rhs)) in inputs.iter().enumerate() { + assert_eq!( + exec_divide(lhs, rhs), + outputs[i], + "Wrong divide for lhs: {}, rhs: {}", + lhs, + rhs + ); + } + } + + use super::exec_remainder; + #[test] + fn test_exec_remainder() { + let inputs = vec![ + (OwnedValue::Null, OwnedValue::Null), + (OwnedValue::Null, OwnedValue::Float(1.0)), + (OwnedValue::Null, OwnedValue::Integer(1)), + (OwnedValue::Null, OwnedValue::Text(Text::from_str("1"))), + (OwnedValue::Float(1.0), OwnedValue::Null), + (OwnedValue::Integer(1), OwnedValue::Null), + (OwnedValue::Integer(12), OwnedValue::Integer(0)), + (OwnedValue::Float(12.0), OwnedValue::Float(0.0)), + (OwnedValue::Float(12.0), OwnedValue::Integer(0)), + (OwnedValue::Integer(12), OwnedValue::Float(0.0)), + (OwnedValue::Integer(i64::MIN), OwnedValue::Integer(-1)), + (OwnedValue::Integer(12), OwnedValue::Integer(3)), + (OwnedValue::Float(12.0), OwnedValue::Float(3.0)), + (OwnedValue::Float(12.0), OwnedValue::Integer(3)), + (OwnedValue::Integer(12), OwnedValue::Float(3.0)), + (OwnedValue::Integer(12), OwnedValue::Integer(-3)), + (OwnedValue::Float(12.0), OwnedValue::Float(-3.0)), + (OwnedValue::Float(12.0), OwnedValue::Integer(-3)), + (OwnedValue::Integer(12), OwnedValue::Float(-3.0)), + ( + OwnedValue::Text(Text::from_str("12.0")), + OwnedValue::Text(Text::from_str("3.0")), + ), + ( + OwnedValue::Text(Text::from_str("12.0")), + OwnedValue::Float(3.0), + ), + ( + OwnedValue::Float(12.0), + OwnedValue::Text(Text::from_str("3.0")), + ), + ]; + let outputs = vec![ + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Float(0.0), + OwnedValue::Integer(0), + OwnedValue::Float(0.0), + OwnedValue::Float(0.0), + OwnedValue::Float(0.0), + OwnedValue::Integer(0), + OwnedValue::Float(0.0), + OwnedValue::Float(0.0), + OwnedValue::Float(0.0), + OwnedValue::Float(0.0), + OwnedValue::Float(0.0), + OwnedValue::Float(0.0), + ]; + + assert_eq!( + inputs.len(), + outputs.len(), + "Inputs and Outputs should have same size" + ); + + for (i, (lhs, rhs)) in inputs.iter().enumerate() { + assert_eq!( + exec_remainder(lhs, rhs), + outputs[i], + "Wrong remainder for lhs: {}, rhs: {}", + lhs, + rhs + ); + } + } + + use super::exec_and; + + #[test] + fn test_exec_and() { + let inputs = vec![ + (OwnedValue::Integer(0), OwnedValue::Null), + (OwnedValue::Null, OwnedValue::Integer(1)), + (OwnedValue::Null, OwnedValue::Null), + (OwnedValue::Float(0.0), OwnedValue::Null), + (OwnedValue::Integer(1), OwnedValue::Float(2.2)), + ( + OwnedValue::Integer(0), + OwnedValue::Text(Text::from_str("string")), + ), + ( + OwnedValue::Integer(0), + OwnedValue::Text(Text::from_str("1")), + ), + ( + OwnedValue::Integer(1), + OwnedValue::Text(Text::from_str("1")), + ), + ]; + let outputs = [ + OwnedValue::Integer(0), + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Integer(0), + OwnedValue::Integer(1), + OwnedValue::Integer(0), + OwnedValue::Integer(0), + OwnedValue::Integer(1), + ]; + + assert_eq!( + inputs.len(), + outputs.len(), + "Inputs and Outputs should have same size" + ); + for (i, (lhs, rhs)) in inputs.iter().enumerate() { + assert_eq!( + exec_and(lhs, rhs), + outputs[i], + "Wrong AND for lhs: {}, rhs: {}", + lhs, + rhs + ); + } + } + + #[test] + fn test_exec_or() { + let inputs = vec![ + (OwnedValue::Integer(0), OwnedValue::Null), + (OwnedValue::Null, OwnedValue::Integer(1)), + (OwnedValue::Null, OwnedValue::Null), + (OwnedValue::Float(0.0), OwnedValue::Null), + (OwnedValue::Integer(1), OwnedValue::Float(2.2)), + (OwnedValue::Float(0.0), OwnedValue::Integer(0)), + ( + OwnedValue::Integer(0), + OwnedValue::Text(Text::from_str("string")), + ), + ( + OwnedValue::Integer(0), + OwnedValue::Text(Text::from_str("1")), + ), + (OwnedValue::Integer(0), OwnedValue::Text(Text::from_str(""))), + ]; + let outputs = [ + OwnedValue::Null, + OwnedValue::Integer(1), + OwnedValue::Null, + OwnedValue::Null, + OwnedValue::Integer(1), + OwnedValue::Integer(0), + OwnedValue::Integer(0), + OwnedValue::Integer(1), + OwnedValue::Integer(0), + ]; + + assert_eq!( + inputs.len(), + outputs.len(), + "Inputs and Outputs should have same size" + ); + for (i, (lhs, rhs)) in inputs.iter().enumerate() { + assert_eq!( + exec_or(lhs, rhs), + outputs[i], + "Wrong OR for lhs: {}, rhs: {}", + lhs, + rhs + ); + } + } + use crate::vdbe::{ execute::{exec_likely, exec_replace}, Bitfield, Register, @@ -5379,7 +6255,7 @@ mod tests { exec_abs, exec_char, exec_hex, exec_if, exec_instr, exec_length, exec_like, exec_lower, exec_ltrim, exec_max, exec_min, exec_nullif, exec_quote, exec_random, exec_randomblob, exec_round, exec_rtrim, exec_sign, exec_soundex, exec_substring, exec_trim, exec_typeof, - exec_unhex, exec_unicode, exec_upper, exec_zeroblob, execute_sqlite_version, OwnedValue, + exec_unhex, exec_unicode, exec_upper, exec_zeroblob, execute_sqlite_version, }; use std::collections::HashMap; diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index cffd587e0..0047f9d11 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -1,13 +1,7 @@ use std::{num::NonZero, rc::Rc}; -use super::{ - cast_text_to_numeric, execute, AggFunc, BranchOffset, CursorID, FuncCtx, InsnFunction, PageIdx, -}; -use crate::{ - schema::BTreeTable, - storage::wal::CheckpointMode, - types::{OwnedValue, Record}, -}; +use super::{execute, AggFunc, BranchOffset, CursorID, FuncCtx, InsnFunction, PageIdx}; +use crate::{schema::BTreeTable, storage::wal::CheckpointMode, types::Record}; use limbo_macros::Description; /// Flags provided to comparison instructions (e.g. Eq, Ne) which determine behavior related to NULL values. @@ -1002,889 +996,3 @@ pub enum Cookie { /// The "user version" as read and set by the user_version pragma. UserVersion = 6, } - -pub fn exec_add(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { - let result = match (lhs, rhs) { - (OwnedValue::Integer(lhs), OwnedValue::Integer(rhs)) => { - let result = lhs.overflowing_add(*rhs); - if result.1 { - OwnedValue::Float(*lhs as f64 + *rhs as f64) - } else { - OwnedValue::Integer(result.0) - } - } - (OwnedValue::Float(lhs), OwnedValue::Float(rhs)) => OwnedValue::Float(lhs + rhs), - (OwnedValue::Float(f), OwnedValue::Integer(i)) - | (OwnedValue::Integer(i), OwnedValue::Float(f)) => OwnedValue::Float(*f + *i as f64), - (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_add( - &cast_text_to_numeric(lhs.as_str()), - &cast_text_to_numeric(rhs.as_str()), - ), - (OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => { - exec_add(&cast_text_to_numeric(text.as_str()), other) - } - _ => todo!(), - }; - match result { - OwnedValue::Float(f) if f.is_nan() => OwnedValue::Null, - _ => result, - } -} - -pub fn exec_subtract(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { - let result = match (lhs, rhs) { - (OwnedValue::Integer(lhs), OwnedValue::Integer(rhs)) => { - let result = lhs.overflowing_sub(*rhs); - if result.1 { - OwnedValue::Float(*lhs as f64 - *rhs as f64) - } else { - OwnedValue::Integer(result.0) - } - } - (OwnedValue::Float(lhs), OwnedValue::Float(rhs)) => OwnedValue::Float(lhs - rhs), - (OwnedValue::Float(lhs), OwnedValue::Integer(rhs)) => OwnedValue::Float(lhs - *rhs as f64), - (OwnedValue::Integer(lhs), OwnedValue::Float(rhs)) => OwnedValue::Float(*lhs as f64 - rhs), - (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_subtract( - &cast_text_to_numeric(lhs.as_str()), - &cast_text_to_numeric(rhs.as_str()), - ), - (OwnedValue::Text(text), other) => { - exec_subtract(&cast_text_to_numeric(text.as_str()), other) - } - (other, OwnedValue::Text(text)) => { - exec_subtract(other, &cast_text_to_numeric(text.as_str())) - } - _ => todo!(), - }; - match result { - OwnedValue::Float(f) if f.is_nan() => OwnedValue::Null, - _ => result, - } -} - -pub fn exec_multiply(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { - let result = match (lhs, rhs) { - (OwnedValue::Integer(lhs), OwnedValue::Integer(rhs)) => { - let result = lhs.overflowing_mul(*rhs); - if result.1 { - OwnedValue::Float(*lhs as f64 * *rhs as f64) - } else { - OwnedValue::Integer(result.0) - } - } - (OwnedValue::Float(lhs), OwnedValue::Float(rhs)) => OwnedValue::Float(lhs * rhs), - (OwnedValue::Integer(i), OwnedValue::Float(f)) - | (OwnedValue::Float(f), OwnedValue::Integer(i)) => OwnedValue::Float(*i as f64 * { *f }), - (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_multiply( - &cast_text_to_numeric(lhs.as_str()), - &cast_text_to_numeric(rhs.as_str()), - ), - (OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => { - exec_multiply(&cast_text_to_numeric(text.as_str()), other) - } - - _ => todo!(), - }; - match result { - OwnedValue::Float(f) if f.is_nan() => OwnedValue::Null, - _ => result, - } -} - -pub fn exec_divide(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { - let result = match (lhs, rhs) { - (_, OwnedValue::Integer(0)) | (_, OwnedValue::Float(0.0)) => OwnedValue::Null, - (OwnedValue::Integer(lhs), OwnedValue::Integer(rhs)) => { - let result = lhs.overflowing_div(*rhs); - if result.1 { - OwnedValue::Float(*lhs as f64 / *rhs as f64) - } else { - OwnedValue::Integer(result.0) - } - } - (OwnedValue::Float(lhs), OwnedValue::Float(rhs)) => OwnedValue::Float(lhs / rhs), - (OwnedValue::Float(lhs), OwnedValue::Integer(rhs)) => OwnedValue::Float(lhs / *rhs as f64), - (OwnedValue::Integer(lhs), OwnedValue::Float(rhs)) => OwnedValue::Float(*lhs as f64 / rhs), - (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_divide( - &cast_text_to_numeric(lhs.as_str()), - &cast_text_to_numeric(rhs.as_str()), - ), - (OwnedValue::Text(text), other) => exec_divide(&cast_text_to_numeric(text.as_str()), other), - (other, OwnedValue::Text(text)) => exec_divide(other, &cast_text_to_numeric(text.as_str())), - _ => todo!(), - }; - match result { - OwnedValue::Float(f) if f.is_nan() => OwnedValue::Null, - _ => result, - } -} - -pub fn exec_bit_and(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { - match (lhs, rhs) { - (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, - (_, OwnedValue::Integer(0)) - | (OwnedValue::Integer(0), _) - | (_, OwnedValue::Float(0.0)) - | (OwnedValue::Float(0.0), _) => OwnedValue::Integer(0), - (OwnedValue::Integer(lh), OwnedValue::Integer(rh)) => OwnedValue::Integer(lh & rh), - (OwnedValue::Float(lh), OwnedValue::Float(rh)) => { - OwnedValue::Integer(*lh as i64 & *rh as i64) - } - (OwnedValue::Float(lh), OwnedValue::Integer(rh)) => OwnedValue::Integer(*lh as i64 & rh), - (OwnedValue::Integer(lh), OwnedValue::Float(rh)) => OwnedValue::Integer(lh & *rh as i64), - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_bit_and( - &cast_text_to_numeric(lhs.as_str()), - &cast_text_to_numeric(rhs.as_str()), - ), - (OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => { - exec_bit_and(&cast_text_to_numeric(text.as_str()), other) - } - _ => todo!(), - } -} - -pub fn exec_bit_or(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { - match (lhs, rhs) { - (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, - (OwnedValue::Integer(lh), OwnedValue::Integer(rh)) => OwnedValue::Integer(lh | rh), - (OwnedValue::Float(lh), OwnedValue::Integer(rh)) => OwnedValue::Integer(*lh as i64 | rh), - (OwnedValue::Integer(lh), OwnedValue::Float(rh)) => OwnedValue::Integer(lh | *rh as i64), - (OwnedValue::Float(lh), OwnedValue::Float(rh)) => { - OwnedValue::Integer(*lh as i64 | *rh as i64) - } - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_bit_or( - &cast_text_to_numeric(lhs.as_str()), - &cast_text_to_numeric(rhs.as_str()), - ), - (OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => { - exec_bit_or(&cast_text_to_numeric(text.as_str()), other) - } - _ => todo!(), - } -} - -pub fn exec_remainder(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { - match (lhs, rhs) { - (OwnedValue::Null, _) - | (_, OwnedValue::Null) - | (_, OwnedValue::Integer(0)) - | (_, OwnedValue::Float(0.0)) => OwnedValue::Null, - (OwnedValue::Integer(lhs), OwnedValue::Integer(rhs)) => { - if rhs == &0 { - OwnedValue::Null - } else { - OwnedValue::Integer(lhs % rhs.abs()) - } - } - (OwnedValue::Float(lhs), OwnedValue::Float(rhs)) => { - let rhs_int = *rhs as i64; - if rhs_int == 0 { - OwnedValue::Null - } else { - OwnedValue::Float(((*lhs as i64) % rhs_int.abs()) as f64) - } - } - (OwnedValue::Float(lhs), OwnedValue::Integer(rhs)) => { - if rhs == &0 { - OwnedValue::Null - } else { - OwnedValue::Float(((*lhs as i64) % rhs.abs()) as f64) - } - } - (OwnedValue::Integer(lhs), OwnedValue::Float(rhs)) => { - let rhs_int = *rhs as i64; - if rhs_int == 0 { - OwnedValue::Null - } else { - OwnedValue::Float((lhs % rhs_int.abs()) as f64) - } - } - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_remainder( - &cast_text_to_numeric(lhs.as_str()), - &cast_text_to_numeric(rhs.as_str()), - ), - (OwnedValue::Text(text), other) => { - exec_remainder(&cast_text_to_numeric(text.as_str()), other) - } - (other, OwnedValue::Text(text)) => { - exec_remainder(other, &cast_text_to_numeric(text.as_str())) - } - other => todo!("remainder not implemented for: {:?} {:?}", lhs, other), - } -} - -pub fn exec_bit_not(reg: &OwnedValue) -> OwnedValue { - match reg { - OwnedValue::Null => OwnedValue::Null, - OwnedValue::Integer(i) => OwnedValue::Integer(!i), - OwnedValue::Float(f) => OwnedValue::Integer(!(*f as i64)), - OwnedValue::Text(text) => exec_bit_not(&cast_text_to_numeric(text.as_str())), - _ => todo!(), - } -} - -pub fn exec_shift_left(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { - match (lhs, rhs) { - (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, - (OwnedValue::Integer(lh), OwnedValue::Integer(rh)) => { - OwnedValue::Integer(compute_shl(*lh, *rh)) - } - (OwnedValue::Float(lh), OwnedValue::Integer(rh)) => { - OwnedValue::Integer(compute_shl(*lh as i64, *rh)) - } - (OwnedValue::Integer(lh), OwnedValue::Float(rh)) => { - OwnedValue::Integer(compute_shl(*lh, *rh as i64)) - } - (OwnedValue::Float(lh), OwnedValue::Float(rh)) => { - OwnedValue::Integer(compute_shl(*lh as i64, *rh as i64)) - } - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_shift_left( - &cast_text_to_numeric(lhs.as_str()), - &cast_text_to_numeric(rhs.as_str()), - ), - (OwnedValue::Text(text), other) => { - exec_shift_left(&cast_text_to_numeric(text.as_str()), other) - } - (other, OwnedValue::Text(text)) => { - exec_shift_left(other, &cast_text_to_numeric(text.as_str())) - } - _ => todo!(), - } -} - -fn compute_shl(lhs: i64, rhs: i64) -> i64 { - if rhs == 0 { - lhs - } else if rhs > 0 { - // for positive shifts, if it's too large return 0 - if rhs >= 64 { - 0 - } else { - lhs << rhs - } - } else { - // for negative shifts, check if it's i64::MIN to avoid overflow on negation - if rhs == i64::MIN || rhs <= -64 { - if lhs < 0 { - -1 - } else { - 0 - } - } else { - lhs >> (-rhs) - } - } -} - -pub fn exec_shift_right(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { - match (lhs, rhs) { - (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, - (OwnedValue::Integer(lh), OwnedValue::Integer(rh)) => { - OwnedValue::Integer(compute_shr(*lh, *rh)) - } - (OwnedValue::Float(lh), OwnedValue::Integer(rh)) => { - OwnedValue::Integer(compute_shr(*lh as i64, *rh)) - } - (OwnedValue::Integer(lh), OwnedValue::Float(rh)) => { - OwnedValue::Integer(compute_shr(*lh, *rh as i64)) - } - (OwnedValue::Float(lh), OwnedValue::Float(rh)) => { - OwnedValue::Integer(compute_shr(*lh as i64, *rh as i64)) - } - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_shift_right( - &cast_text_to_numeric(lhs.as_str()), - &cast_text_to_numeric(rhs.as_str()), - ), - (OwnedValue::Text(text), other) => { - exec_shift_right(&cast_text_to_numeric(text.as_str()), other) - } - (other, OwnedValue::Text(text)) => { - exec_shift_right(other, &cast_text_to_numeric(text.as_str())) - } - _ => todo!(), - } -} - -// compute binary shift to the right if rhs >= 0 and binary shift to the left - if rhs < 0 -// note, that binary shift to the right is sign-extended -fn compute_shr(lhs: i64, rhs: i64) -> i64 { - if rhs == 0 { - lhs - } else if rhs > 0 { - // for positive right shifts - if rhs >= 64 { - if lhs < 0 { - -1 - } else { - 0 - } - } else { - lhs >> rhs - } - } else { - // for negative right shifts, check if it's i64::MIN to avoid overflow - if rhs == i64::MIN || -rhs >= 64 { - 0 - } else { - lhs << (-rhs) - } - } -} - -pub fn exec_boolean_not(reg: &OwnedValue) -> OwnedValue { - match reg { - OwnedValue::Null => OwnedValue::Null, - OwnedValue::Integer(i) => OwnedValue::Integer((*i == 0) as i64), - OwnedValue::Float(f) => OwnedValue::Integer((*f == 0.0) as i64), - OwnedValue::Text(text) => exec_boolean_not(&cast_text_to_numeric(text.as_str())), - _ => todo!(), - } -} -pub fn exec_concat(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { - match (lhs, rhs) { - (OwnedValue::Text(lhs_text), OwnedValue::Text(rhs_text)) => { - OwnedValue::build_text(&(lhs_text.as_str().to_string() + rhs_text.as_str())) - } - (OwnedValue::Text(lhs_text), OwnedValue::Integer(rhs_int)) => { - OwnedValue::build_text(&(lhs_text.as_str().to_string() + &rhs_int.to_string())) - } - (OwnedValue::Text(lhs_text), OwnedValue::Float(rhs_float)) => { - OwnedValue::build_text(&(lhs_text.as_str().to_string() + &rhs_float.to_string())) - } - (OwnedValue::Integer(lhs_int), OwnedValue::Text(rhs_text)) => { - OwnedValue::build_text(&(lhs_int.to_string() + rhs_text.as_str())) - } - (OwnedValue::Integer(lhs_int), OwnedValue::Integer(rhs_int)) => { - OwnedValue::build_text(&(lhs_int.to_string() + &rhs_int.to_string())) - } - (OwnedValue::Integer(lhs_int), OwnedValue::Float(rhs_float)) => { - OwnedValue::build_text(&(lhs_int.to_string() + &rhs_float.to_string())) - } - (OwnedValue::Float(lhs_float), OwnedValue::Text(rhs_text)) => { - OwnedValue::build_text(&(lhs_float.to_string() + rhs_text.as_str())) - } - (OwnedValue::Float(lhs_float), OwnedValue::Integer(rhs_int)) => { - OwnedValue::build_text(&(lhs_float.to_string() + &rhs_int.to_string())) - } - (OwnedValue::Float(lhs_float), OwnedValue::Float(rhs_float)) => { - OwnedValue::build_text(&(lhs_float.to_string() + &rhs_float.to_string())) - } - (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, - (OwnedValue::Blob(_), _) | (_, OwnedValue::Blob(_)) => { - todo!("TODO: Handle Blob conversion to String") - } - } -} - -pub fn exec_and(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { - match (lhs, rhs) { - (_, OwnedValue::Integer(0)) - | (OwnedValue::Integer(0), _) - | (_, OwnedValue::Float(0.0)) - | (OwnedValue::Float(0.0), _) => OwnedValue::Integer(0), - (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_and( - &cast_text_to_numeric(lhs.as_str()), - &cast_text_to_numeric(rhs.as_str()), - ), - (OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => { - exec_and(&cast_text_to_numeric(text.as_str()), other) - } - _ => OwnedValue::Integer(1), - } -} - -pub fn exec_or(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { - match (lhs, rhs) { - (OwnedValue::Null, OwnedValue::Null) - | (OwnedValue::Null, OwnedValue::Float(0.0)) - | (OwnedValue::Float(0.0), OwnedValue::Null) - | (OwnedValue::Null, OwnedValue::Integer(0)) - | (OwnedValue::Integer(0), OwnedValue::Null) => OwnedValue::Null, - (OwnedValue::Float(0.0), OwnedValue::Integer(0)) - | (OwnedValue::Integer(0), OwnedValue::Float(0.0)) - | (OwnedValue::Float(0.0), OwnedValue::Float(0.0)) - | (OwnedValue::Integer(0), OwnedValue::Integer(0)) => OwnedValue::Integer(0), - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_or( - &cast_text_to_numeric(lhs.as_str()), - &cast_text_to_numeric(rhs.as_str()), - ), - (OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => { - exec_or(&cast_text_to_numeric(text.as_str()), other) - } - _ => OwnedValue::Integer(1), - } -} - -#[cfg(test)] -mod tests { - use crate::{ - types::{OwnedValue, Text}, - vdbe::insn::exec_or, - }; - - use super::exec_add; - - #[test] - fn test_exec_add() { - let inputs = vec![ - (OwnedValue::Integer(3), OwnedValue::Integer(1)), - (OwnedValue::Float(3.0), OwnedValue::Float(1.0)), - (OwnedValue::Float(3.0), OwnedValue::Integer(1)), - (OwnedValue::Integer(3), OwnedValue::Float(1.0)), - (OwnedValue::Null, OwnedValue::Null), - (OwnedValue::Null, OwnedValue::Integer(1)), - (OwnedValue::Null, OwnedValue::Float(1.0)), - (OwnedValue::Null, OwnedValue::Text(Text::from_str("2"))), - (OwnedValue::Integer(1), OwnedValue::Null), - (OwnedValue::Float(1.0), OwnedValue::Null), - (OwnedValue::Text(Text::from_str("1")), OwnedValue::Null), - ( - OwnedValue::Text(Text::from_str("1")), - OwnedValue::Text(Text::from_str("3")), - ), - ( - OwnedValue::Text(Text::from_str("1.0")), - OwnedValue::Text(Text::from_str("3.0")), - ), - ( - OwnedValue::Text(Text::from_str("1.0")), - OwnedValue::Float(3.0), - ), - ( - OwnedValue::Text(Text::from_str("1.0")), - OwnedValue::Integer(3), - ), - ( - OwnedValue::Float(1.0), - OwnedValue::Text(Text::from_str("3.0")), - ), - ( - OwnedValue::Integer(1), - OwnedValue::Text(Text::from_str("3")), - ), - ]; - - let outputs = [ - OwnedValue::Integer(4), - OwnedValue::Float(4.0), - OwnedValue::Float(4.0), - OwnedValue::Float(4.0), - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Integer(4), - OwnedValue::Float(4.0), - OwnedValue::Float(4.0), - OwnedValue::Float(4.0), - OwnedValue::Float(4.0), - OwnedValue::Float(4.0), - ]; - - assert_eq!( - inputs.len(), - outputs.len(), - "Inputs and Outputs should have same size" - ); - for (i, (lhs, rhs)) in inputs.iter().enumerate() { - assert_eq!( - exec_add(lhs, rhs), - outputs[i], - "Wrong ADD for lhs: {}, rhs: {}", - lhs, - rhs - ); - } - } - - use super::exec_subtract; - - #[test] - fn test_exec_subtract() { - let inputs = vec![ - (OwnedValue::Integer(3), OwnedValue::Integer(1)), - (OwnedValue::Float(3.0), OwnedValue::Float(1.0)), - (OwnedValue::Float(3.0), OwnedValue::Integer(1)), - (OwnedValue::Integer(3), OwnedValue::Float(1.0)), - (OwnedValue::Null, OwnedValue::Null), - (OwnedValue::Null, OwnedValue::Integer(1)), - (OwnedValue::Null, OwnedValue::Float(1.0)), - (OwnedValue::Null, OwnedValue::Text(Text::from_str("1"))), - (OwnedValue::Integer(1), OwnedValue::Null), - (OwnedValue::Float(1.0), OwnedValue::Null), - (OwnedValue::Text(Text::from_str("4")), OwnedValue::Null), - ( - OwnedValue::Text(Text::from_str("1")), - OwnedValue::Text(Text::from_str("3")), - ), - ( - OwnedValue::Text(Text::from_str("1.0")), - OwnedValue::Text(Text::from_str("3.0")), - ), - ( - OwnedValue::Text(Text::from_str("1.0")), - OwnedValue::Float(3.0), - ), - ( - OwnedValue::Text(Text::from_str("1.0")), - OwnedValue::Integer(3), - ), - ( - OwnedValue::Float(1.0), - OwnedValue::Text(Text::from_str("3.0")), - ), - ( - OwnedValue::Integer(1), - OwnedValue::Text(Text::from_str("3")), - ), - ]; - - let outputs = [ - OwnedValue::Integer(2), - OwnedValue::Float(2.0), - OwnedValue::Float(2.0), - OwnedValue::Float(2.0), - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Integer(-2), - OwnedValue::Float(-2.0), - OwnedValue::Float(-2.0), - OwnedValue::Float(-2.0), - OwnedValue::Float(-2.0), - OwnedValue::Float(-2.0), - ]; - - assert_eq!( - inputs.len(), - outputs.len(), - "Inputs and Outputs should have same size" - ); - for (i, (lhs, rhs)) in inputs.iter().enumerate() { - assert_eq!( - exec_subtract(lhs, rhs), - outputs[i], - "Wrong subtract for lhs: {}, rhs: {}", - lhs, - rhs - ); - } - } - use super::exec_multiply; - - #[test] - fn test_exec_multiply() { - let inputs = vec![ - (OwnedValue::Integer(3), OwnedValue::Integer(2)), - (OwnedValue::Float(3.0), OwnedValue::Float(2.0)), - (OwnedValue::Float(3.0), OwnedValue::Integer(2)), - (OwnedValue::Integer(3), OwnedValue::Float(2.0)), - (OwnedValue::Null, OwnedValue::Null), - (OwnedValue::Null, OwnedValue::Integer(1)), - (OwnedValue::Null, OwnedValue::Float(1.0)), - (OwnedValue::Null, OwnedValue::Text(Text::from_str("1"))), - (OwnedValue::Integer(1), OwnedValue::Null), - (OwnedValue::Float(1.0), OwnedValue::Null), - (OwnedValue::Text(Text::from_str("4")), OwnedValue::Null), - ( - OwnedValue::Text(Text::from_str("2")), - OwnedValue::Text(Text::from_str("3")), - ), - ( - OwnedValue::Text(Text::from_str("2.0")), - OwnedValue::Text(Text::from_str("3.0")), - ), - ( - OwnedValue::Text(Text::from_str("2.0")), - OwnedValue::Float(3.0), - ), - ( - OwnedValue::Text(Text::from_str("2.0")), - OwnedValue::Integer(3), - ), - ( - OwnedValue::Float(2.0), - OwnedValue::Text(Text::from_str("3.0")), - ), - ( - OwnedValue::Integer(2), - OwnedValue::Text(Text::from_str("3.0")), - ), - ]; - - let outputs = [ - OwnedValue::Integer(6), - OwnedValue::Float(6.0), - OwnedValue::Float(6.0), - OwnedValue::Float(6.0), - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Integer(6), - OwnedValue::Float(6.0), - OwnedValue::Float(6.0), - OwnedValue::Float(6.0), - OwnedValue::Float(6.0), - OwnedValue::Float(6.0), - ]; - - assert_eq!( - inputs.len(), - outputs.len(), - "Inputs and Outputs should have same size" - ); - for (i, (lhs, rhs)) in inputs.iter().enumerate() { - assert_eq!( - exec_multiply(lhs, rhs), - outputs[i], - "Wrong multiply for lhs: {}, rhs: {}", - lhs, - rhs - ); - } - } - use super::exec_divide; - - #[test] - fn test_exec_divide() { - let inputs = vec![ - (OwnedValue::Integer(1), OwnedValue::Integer(0)), - (OwnedValue::Float(1.0), OwnedValue::Float(0.0)), - (OwnedValue::Integer(i64::MIN), OwnedValue::Integer(-1)), - (OwnedValue::Float(6.0), OwnedValue::Float(2.0)), - (OwnedValue::Float(6.0), OwnedValue::Integer(2)), - (OwnedValue::Integer(6), OwnedValue::Integer(2)), - (OwnedValue::Null, OwnedValue::Integer(2)), - (OwnedValue::Integer(2), OwnedValue::Null), - (OwnedValue::Null, OwnedValue::Null), - ( - OwnedValue::Text(Text::from_str("6")), - OwnedValue::Text(Text::from_str("2")), - ), - ( - OwnedValue::Text(Text::from_str("6")), - OwnedValue::Integer(2), - ), - ]; - - let outputs = [ - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Float(9.223372036854776e18), - OwnedValue::Float(3.0), - OwnedValue::Float(3.0), - OwnedValue::Float(3.0), - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Float(3.0), - OwnedValue::Float(3.0), - ]; - - assert_eq!( - inputs.len(), - outputs.len(), - "Inputs and Outputs should have same size" - ); - for (i, (lhs, rhs)) in inputs.iter().enumerate() { - assert_eq!( - exec_divide(lhs, rhs), - outputs[i], - "Wrong divide for lhs: {}, rhs: {}", - lhs, - rhs - ); - } - } - - use super::exec_remainder; - #[test] - fn test_exec_remainder() { - let inputs = vec![ - (OwnedValue::Null, OwnedValue::Null), - (OwnedValue::Null, OwnedValue::Float(1.0)), - (OwnedValue::Null, OwnedValue::Integer(1)), - (OwnedValue::Null, OwnedValue::Text(Text::from_str("1"))), - (OwnedValue::Float(1.0), OwnedValue::Null), - (OwnedValue::Integer(1), OwnedValue::Null), - (OwnedValue::Integer(12), OwnedValue::Integer(0)), - (OwnedValue::Float(12.0), OwnedValue::Float(0.0)), - (OwnedValue::Float(12.0), OwnedValue::Integer(0)), - (OwnedValue::Integer(12), OwnedValue::Float(0.0)), - (OwnedValue::Integer(i64::MIN), OwnedValue::Integer(-1)), - (OwnedValue::Integer(12), OwnedValue::Integer(3)), - (OwnedValue::Float(12.0), OwnedValue::Float(3.0)), - (OwnedValue::Float(12.0), OwnedValue::Integer(3)), - (OwnedValue::Integer(12), OwnedValue::Float(3.0)), - (OwnedValue::Integer(12), OwnedValue::Integer(-3)), - (OwnedValue::Float(12.0), OwnedValue::Float(-3.0)), - (OwnedValue::Float(12.0), OwnedValue::Integer(-3)), - (OwnedValue::Integer(12), OwnedValue::Float(-3.0)), - ( - OwnedValue::Text(Text::from_str("12.0")), - OwnedValue::Text(Text::from_str("3.0")), - ), - ( - OwnedValue::Text(Text::from_str("12.0")), - OwnedValue::Float(3.0), - ), - ( - OwnedValue::Float(12.0), - OwnedValue::Text(Text::from_str("3.0")), - ), - ]; - let outputs = vec![ - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Float(0.0), - OwnedValue::Integer(0), - OwnedValue::Float(0.0), - OwnedValue::Float(0.0), - OwnedValue::Float(0.0), - OwnedValue::Integer(0), - OwnedValue::Float(0.0), - OwnedValue::Float(0.0), - OwnedValue::Float(0.0), - OwnedValue::Float(0.0), - OwnedValue::Float(0.0), - OwnedValue::Float(0.0), - ]; - - assert_eq!( - inputs.len(), - outputs.len(), - "Inputs and Outputs should have same size" - ); - - for (i, (lhs, rhs)) in inputs.iter().enumerate() { - assert_eq!( - exec_remainder(lhs, rhs), - outputs[i], - "Wrong remainder for lhs: {}, rhs: {}", - lhs, - rhs - ); - } - } - - use super::exec_and; - - #[test] - fn test_exec_and() { - let inputs = vec![ - (OwnedValue::Integer(0), OwnedValue::Null), - (OwnedValue::Null, OwnedValue::Integer(1)), - (OwnedValue::Null, OwnedValue::Null), - (OwnedValue::Float(0.0), OwnedValue::Null), - (OwnedValue::Integer(1), OwnedValue::Float(2.2)), - ( - OwnedValue::Integer(0), - OwnedValue::Text(Text::from_str("string")), - ), - ( - OwnedValue::Integer(0), - OwnedValue::Text(Text::from_str("1")), - ), - ( - OwnedValue::Integer(1), - OwnedValue::Text(Text::from_str("1")), - ), - ]; - let outputs = [ - OwnedValue::Integer(0), - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Integer(0), - OwnedValue::Integer(1), - OwnedValue::Integer(0), - OwnedValue::Integer(0), - OwnedValue::Integer(1), - ]; - - assert_eq!( - inputs.len(), - outputs.len(), - "Inputs and Outputs should have same size" - ); - for (i, (lhs, rhs)) in inputs.iter().enumerate() { - assert_eq!( - exec_and(lhs, rhs), - outputs[i], - "Wrong AND for lhs: {}, rhs: {}", - lhs, - rhs - ); - } - } - - #[test] - fn test_exec_or() { - let inputs = vec![ - (OwnedValue::Integer(0), OwnedValue::Null), - (OwnedValue::Null, OwnedValue::Integer(1)), - (OwnedValue::Null, OwnedValue::Null), - (OwnedValue::Float(0.0), OwnedValue::Null), - (OwnedValue::Integer(1), OwnedValue::Float(2.2)), - (OwnedValue::Float(0.0), OwnedValue::Integer(0)), - ( - OwnedValue::Integer(0), - OwnedValue::Text(Text::from_str("string")), - ), - ( - OwnedValue::Integer(0), - OwnedValue::Text(Text::from_str("1")), - ), - (OwnedValue::Integer(0), OwnedValue::Text(Text::from_str(""))), - ]; - let outputs = [ - OwnedValue::Null, - OwnedValue::Integer(1), - OwnedValue::Null, - OwnedValue::Null, - OwnedValue::Integer(1), - OwnedValue::Integer(0), - OwnedValue::Integer(0), - OwnedValue::Integer(1), - OwnedValue::Integer(0), - ]; - - assert_eq!( - inputs.len(), - outputs.len(), - "Inputs and Outputs should have same size" - ); - for (i, (lhs, rhs)) in inputs.iter().enumerate() { - assert_eq!( - exec_or(lhs, rhs), - outputs[i], - "Wrong OR for lhs: {}, rhs: {}", - lhs, - rhs - ); - } - } -} diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index fe145b56b..daad191b4 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -34,7 +34,6 @@ use crate::{ storage::{btree::BTreeCursor, pager::Pager, sqlite3_ondisk::DatabaseHeader}, translate::plan::{ResultSetColumn, TableReference}, types::{AggContext, Cursor, CursorResult, ImmutableRecord, OwnedValue, SeekKey, SeekOp}, - util::cast_text_to_numeric, vdbe::{builder::CursorType, insn::Insn}, }; From 6aaa105321f4b67e53838e36c161a8ef298ba9b6 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 10 Apr 2025 11:23:53 +0300 Subject: [PATCH 142/425] stress: Add schema generation support --- Cargo.lock | 1 + stress/Cargo.toml | 1 + stress/main.rs | 183 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 185 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 96aec95db..f9f912ea3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1894,6 +1894,7 @@ dependencies = [ name = "limbo_stress" version = "0.0.19-pre.4" dependencies = [ + "anarchist-readable-name-generator-lib", "antithesis_sdk", "clap", "limbo", diff --git a/stress/Cargo.toml b/stress/Cargo.toml index 59c4e8256..3bd7c295b 100644 --- a/stress/Cargo.toml +++ b/stress/Cargo.toml @@ -20,3 +20,4 @@ clap = { version = "4.5", features = ["derive"] } limbo = { path = "../bindings/rust" } serde_json = "1.0.139" tokio = { version = "1.29.1", features = ["full"] } +anarchist-readable-name-generator-lib = "0.1.0" diff --git a/stress/main.rs b/stress/main.rs index c62714e63..c46215481 100644 --- a/stress/main.rs +++ b/stress/main.rs @@ -1,12 +1,188 @@ mod opts; +use anarchist_readable_name_generator_lib::readable_name_custom; +use antithesis_sdk::random::{get_random, AntithesisRng}; use antithesis_sdk::*; use clap::Parser; use limbo::{Builder, Value}; use opts::Opts; use serde_json::json; +use std::collections::HashSet; use std::sync::Arc; +/// Represents a column in a SQLite table +#[derive(Debug, Clone)] +pub struct Column { + pub name: String, + pub data_type: DataType, + pub constraints: Vec, +} + +/// Represents SQLite data types +#[derive(Debug, Clone)] +pub enum DataType { + Integer, + Real, + Text, + Blob, + Numeric, +} + +/// Represents column constraints +#[derive(Debug, Clone)] +pub enum Constraint { + PrimaryKey, + NotNull, + Unique, +} + +/// Represents a table in a SQLite schema +#[derive(Debug, Clone)] +pub struct Table { + pub name: String, + pub columns: Vec, +} + +/// Represents a complete SQLite schema +#[derive(Debug, Clone)] +pub struct ArbitrarySchema { + pub tables: Vec
, +} + +// Helper functions for generating random data +fn generate_random_identifier() -> String { + readable_name_custom("_", AntithesisRng).replace('-', "_") +} + +fn generate_random_data_type() -> DataType { + match get_random() % 5 { + 0 => DataType::Integer, + 1 => DataType::Real, + 2 => DataType::Text, + 3 => DataType::Blob, + _ => DataType::Numeric, + } +} + +fn generate_random_constraint() -> Constraint { + match get_random() % 2 { + 0 => Constraint::NotNull, + _ => Constraint::Unique, + } +} + +fn generate_random_column() -> Column { + let name = generate_random_identifier(); + let data_type = generate_random_data_type(); + + let constraint_count = (get_random() % 3) as usize; + let mut constraints = Vec::with_capacity(constraint_count); + + for _ in 0..constraint_count { + constraints.push(generate_random_constraint()); + } + + Column { + name, + data_type, + constraints, + } +} + +fn generate_random_table() -> Table { + let name = generate_random_identifier(); + let column_count = (get_random() % 10 + 1) as usize; + let mut columns = Vec::with_capacity(column_count); + let mut column_names = HashSet::new(); + + // First, generate all columns without primary keys + for _ in 0..column_count { + let mut column = generate_random_column(); + + // Ensure column names are unique within the table + while column_names.contains(&column.name) { + column.name = generate_random_identifier(); + } + + column_names.insert(column.name.clone()); + columns.push(column); + } + + // Then, randomly select one column to be the primary key + let pk_index = (get_random() % column_count as u64) as usize; + columns[pk_index].constraints.push(Constraint::PrimaryKey); + + Table { name, columns } +} + +pub fn gen_schema() -> ArbitrarySchema { + let table_count = (get_random() % 10 + 1) as usize; + let mut tables = Vec::with_capacity(table_count); + let mut table_names = HashSet::new(); + + for _ in 0..table_count { + let mut table = generate_random_table(); + + // Ensure table names are unique + while table_names.contains(&table.name) { + table.name = generate_random_identifier(); + } + + table_names.insert(table.name.clone()); + tables.push(table); + } + + ArbitrarySchema { tables } +} + +impl ArbitrarySchema { + pub fn to_sql(&self) -> String { + let mut sql = String::new(); + + for table in &self.tables { + sql.push_str(&format!("CREATE TABLE {} (\n", table.name)); + + for (i, column) in table.columns.iter().enumerate() { + if i > 0 { + sql.push_str(",\n"); + } + + sql.push_str(&format!( + " {} {}", + column.name, + data_type_to_sql(&column.data_type) + )); + + for constraint in &column.constraints { + sql.push_str(&format!(" {}", constraint_to_sql(constraint))); + } + } + + sql.push_str("\n);\n\n"); + } + + sql + } +} + +fn data_type_to_sql(data_type: &DataType) -> &'static str { + match data_type { + DataType::Integer => "INTEGER", + DataType::Real => "REAL", + DataType::Text => "TEXT", + DataType::Blob => "BLOB", + DataType::Numeric => "NUMERIC", + } +} + +fn constraint_to_sql(constraint: &Constraint) -> String { + match constraint { + Constraint::PrimaryKey => "PRIMARY KEY".to_string(), + Constraint::NotNull => "NOT NULL".to_string(), + Constraint::Unique => "UNIQUE".to_string(), + } +} + #[tokio::main] async fn main() { let (num_nodes, main_id) = (1, "n-001"); @@ -17,12 +193,19 @@ async fn main() { lifecycle::setup_complete(&startup_data); antithesis_init(); + let schema = gen_schema(); + + let schema_sql = schema.to_sql(); + + println!("{}", schema_sql); let opts = Opts::parse(); let mut handles = Vec::new(); for _ in 0..opts.nr_threads { // TODO: share the database between threads let db = Arc::new(Builder::new_local(":memory:").build().await.unwrap()); + let conn = db.connect().unwrap(); + conn.execute(&schema_sql, ()).await.unwrap(); let nr_iterations = opts.nr_iterations; let db = db.clone(); let handle = tokio::spawn(async move { From 207563208f1734380b7a09e021f43c7e45a4a375 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 10 Apr 2025 11:51:04 +0300 Subject: [PATCH 143/425] stress: Add support for INSERT, DELETE, and UPDATE --- Cargo.lock | 1 + bindings/rust/src/lib.rs | 6 +- stress/Cargo.toml | 1 + stress/main.rs | 142 ++++++++++++++++++++++++++++++++++----- 4 files changed, 131 insertions(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f9f912ea3..14c1df80e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1897,6 +1897,7 @@ dependencies = [ "anarchist-readable-name-generator-lib", "antithesis_sdk", "clap", + "hex", "limbo", "serde_json", "tokio", diff --git a/bindings/rust/src/lib.rs b/bindings/rust/src/lib.rs index 60a7ffd77..61e6271c9 100644 --- a/bindings/rust/src/lib.rs +++ b/bindings/rust/src/lib.rs @@ -17,11 +17,13 @@ pub enum Error { ToSqlConversionFailure(BoxError), #[error("Mutex lock error: {0}")] MutexError(String), + #[error("SQL execution failure: `{0}`")] + SqlExecutionFailure(String), } impl From for Error { - fn from(_err: limbo_core::LimboError) -> Self { - todo!(); + fn from(err: limbo_core::LimboError) -> Self { + Error::SqlExecutionFailure(err.to_string()) } } diff --git a/stress/Cargo.toml b/stress/Cargo.toml index 3bd7c295b..6f7a0a9e9 100644 --- a/stress/Cargo.toml +++ b/stress/Cargo.toml @@ -21,3 +21,4 @@ limbo = { path = "../bindings/rust" } serde_json = "1.0.139" tokio = { version = "1.29.1", features = ["full"] } anarchist-readable-name-generator-lib = "0.1.0" +hex = "0.4" diff --git a/stress/main.rs b/stress/main.rs index c46215481..3b1f53cb8 100644 --- a/stress/main.rs +++ b/stress/main.rs @@ -4,7 +4,8 @@ use anarchist_readable_name_generator_lib::readable_name_custom; use antithesis_sdk::random::{get_random, AntithesisRng}; use antithesis_sdk::*; use clap::Parser; -use limbo::{Builder, Value}; +use hex; +use limbo::Builder; use opts::Opts; use serde_json::json; use std::collections::HashSet; @@ -29,7 +30,7 @@ pub enum DataType { } /// Represents column constraints -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq)] pub enum Constraint { PrimaryKey, NotNull, @@ -183,8 +184,112 @@ fn constraint_to_sql(constraint: &Constraint) -> String { } } +/// Generate a random value for a given data type +fn generate_random_value(data_type: &DataType) -> String { + match data_type { + DataType::Integer => (get_random() % 1000).to_string(), + DataType::Real => format!("{:.2}", (get_random() % 1000) as f64 / 100.0), + DataType::Text => format!("'{}'", generate_random_identifier()), + DataType::Blob => format!("x'{}'", hex::encode(generate_random_identifier())), + DataType::Numeric => (get_random() % 1000).to_string(), + } +} + +/// Generate a random INSERT statement for a table +fn generate_insert(table: &Table) -> String { + let columns = table + .columns + .iter() + .map(|col| col.name.clone()) + .collect::>() + .join(", "); + + let values = table + .columns + .iter() + .map(|col| generate_random_value(&col.data_type)) + .collect::>() + .join(", "); + + format!( + "INSERT INTO {} ({}) VALUES ({});", + table.name, columns, values + ) +} + +/// Generate a random UPDATE statement for a table +fn generate_update(table: &Table) -> String { + // Find the primary key column + let pk_column = table + .columns + .iter() + .find(|col| col.constraints.contains(&Constraint::PrimaryKey)) + .expect("Table should have a primary key"); + + // Get all non-primary key columns + let non_pk_columns: Vec<_> = table + .columns + .iter() + .filter(|col| col.name != pk_column.name) + .collect(); + + // If we have no non-PK columns, just update the primary key itself + let set_clause = if non_pk_columns.is_empty() { + format!( + "{} = {}", + pk_column.name, + generate_random_value(&pk_column.data_type) + ) + } else { + non_pk_columns + .iter() + .map(|col| format!("{} = {}", col.name, generate_random_value(&col.data_type))) + .collect::>() + .join(", ") + }; + + let where_clause = format!( + "{} = {}", + pk_column.name, + generate_random_value(&pk_column.data_type) + ); + + format!( + "UPDATE {} SET {} WHERE {};", + table.name, set_clause, where_clause + ) +} + +/// Generate a random DELETE statement for a table +fn generate_delete(table: &Table) -> String { + // Find the primary key column + let pk_column = table + .columns + .iter() + .find(|col| col.constraints.contains(&Constraint::PrimaryKey)) + .expect("Table should have a primary key"); + + let where_clause = format!( + "{} = {}", + pk_column.name, + generate_random_value(&pk_column.data_type) + ); + + format!("DELETE FROM {} WHERE {};", table.name, where_clause) +} + +/// Generate a random SQL statement for a schema +fn generate_random_statement(schema: &ArbitrarySchema) -> String { + let table = &schema.tables[get_random() as usize % schema.tables.len()]; + match get_random() % 3 { + 0 => generate_insert(table), + 1 => generate_update(table), + _ => generate_delete(table), + } +} + #[tokio::main] -async fn main() { +async fn main() -> Result<(), Box> { let (num_nodes, main_id) = (1, "n-001"); let startup_data = json!({ "num_nodes": num_nodes, @@ -194,34 +299,37 @@ async fn main() { antithesis_init(); let schema = gen_schema(); - let schema_sql = schema.to_sql(); - println!("{}", schema_sql); + let opts = Opts::parse(); - let mut handles = Vec::new(); + let mut handles = Vec::with_capacity(opts.nr_threads); for _ in 0..opts.nr_threads { - // TODO: share the database between threads - let db = Arc::new(Builder::new_local(":memory:").build().await.unwrap()); - let conn = db.connect().unwrap(); - conn.execute(&schema_sql, ()).await.unwrap(); + let db = Arc::new(Builder::new_local(":memory:").build().await?); + let conn = db.connect()?; + conn.execute(&schema_sql, ()).await?; let nr_iterations = opts.nr_iterations; let db = db.clone(); - let handle = tokio::spawn(async move { - let conn = db.connect().unwrap(); + let schema = schema.clone(); + let handle = tokio::spawn(async move { + let conn = db.connect()?; for _ in 0..nr_iterations { - let mut rows = conn.query("select 1", ()).await.unwrap(); - let row = rows.next().await.unwrap().unwrap(); - let value = row.get_value(0).unwrap(); - assert_always!(matches!(value, Value::Integer(1)), "value is incorrect"); + let sql = generate_random_statement(&schema); + println!("{}", sql); + if let Err(e) = conn.execute(&sql, ()).await { + println!("Error: {}", e); + } } + Ok::<_, Box>(()) }); handles.push(handle); } + for handle in handles { - handle.await.unwrap(); + handle.await??; } println!("Done."); + Ok(()) } From f50662205e93dcf45bbb5c4f09790eb14f95c831 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 10 Apr 2025 11:52:53 +0300 Subject: [PATCH 144/425] stress: Fix schema creation --- stress/main.rs | 62 +++++++++++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 28 deletions(-) diff --git a/stress/main.rs b/stress/main.rs index 3b1f53cb8..6bd7a6bef 100644 --- a/stress/main.rs +++ b/stress/main.rs @@ -137,32 +137,29 @@ pub fn gen_schema() -> ArbitrarySchema { } impl ArbitrarySchema { - pub fn to_sql(&self) -> String { - let mut sql = String::new(); + /// Convert the schema to a vector of SQL DDL statements + pub fn to_sql(&self) -> Vec { + self.tables + .iter() + .map(|table| { + let columns = table + .columns + .iter() + .map(|col| { + let mut col_def = + format!(" {} {}", col.name, data_type_to_sql(&col.data_type)); + for constraint in &col.constraints { + col_def.push(' '); + col_def.push_str(&constraint_to_sql(constraint)); + } + col_def + }) + .collect::>() + .join(",\n"); - for table in &self.tables { - sql.push_str(&format!("CREATE TABLE {} (\n", table.name)); - - for (i, column) in table.columns.iter().enumerate() { - if i > 0 { - sql.push_str(",\n"); - } - - sql.push_str(&format!( - " {} {}", - column.name, - data_type_to_sql(&column.data_type) - )); - - for constraint in &column.constraints { - sql.push_str(&format!(" {}", constraint_to_sql(constraint))); - } - } - - sql.push_str("\n);\n\n"); - } - - sql + format!("CREATE TABLE {} (\n{}\n);", table.name, columns) + }) + .collect() } } @@ -299,8 +296,10 @@ async fn main() -> Result<(), Box> { antithesis_init(); let schema = gen_schema(); - let schema_sql = schema.to_sql(); - println!("{}", schema_sql); + let ddl_statements = schema.to_sql(); + for stmt in &ddl_statements { + println!("{}", stmt); + } let opts = Opts::parse(); let mut handles = Vec::with_capacity(opts.nr_threads); @@ -308,7 +307,14 @@ async fn main() -> Result<(), Box> { for _ in 0..opts.nr_threads { let db = Arc::new(Builder::new_local(":memory:").build().await?); let conn = db.connect()?; - conn.execute(&schema_sql, ()).await?; + + // Apply each DDL statement individually + for stmt in &ddl_statements { + if let Err(e) = conn.execute(stmt, ()).await { + println!("Error creating table: {}", e); + } + } + let nr_iterations = opts.nr_iterations; let db = db.clone(); let schema = schema.clone(); From 39cee1b1465bea41eb24a1885717f27eab7b3d96 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 10 Apr 2025 11:55:03 +0300 Subject: [PATCH 145/425] stress: Increase default number of iterations --- stress/opts.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stress/opts.rs b/stress/opts.rs index 392d79448..da00e1a00 100644 --- a/stress/opts.rs +++ b/stress/opts.rs @@ -10,7 +10,7 @@ pub struct Opts { short = 'i', long, help = "the number of iterations", - default_value_t = 1000 + default_value_t = 100000 )] pub nr_iterations: usize, } From c4d983bcfed58980262abfa45d88f59557e17d8f Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 10 Apr 2025 11:57:01 +0300 Subject: [PATCH 146/425] stress: Log SQL statements to a file --- stress/main.rs | 26 ++++++++++++++++++++------ stress/opts.rs | 12 ++++++++++++ 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/stress/main.rs b/stress/main.rs index 6bd7a6bef..6180faf30 100644 --- a/stress/main.rs +++ b/stress/main.rs @@ -9,7 +9,9 @@ use limbo::Builder; use opts::Opts; use serde_json::json; use std::collections::HashSet; -use std::sync::Arc; +use std::fs::File; +use std::io::Write; +use std::sync::{Arc, Mutex}; /// Represents a column in a SQLite table #[derive(Debug, Clone)] @@ -297,11 +299,19 @@ async fn main() -> Result<(), Box> { let schema = gen_schema(); let ddl_statements = schema.to_sql(); - for stmt in &ddl_statements { - println!("{}", stmt); - } let opts = Opts::parse(); + let log_file = File::create(&opts.log_file)?; + let log_file = Arc::new(Mutex::new(log_file)); + + // Write DDL statements to log file + { + let mut file = log_file.lock().unwrap(); + for stmt in &ddl_statements { + writeln!(file, "{}", stmt)?; + } + } + let mut handles = Vec::with_capacity(opts.nr_threads); for _ in 0..opts.nr_threads { @@ -318,12 +328,16 @@ async fn main() -> Result<(), Box> { let nr_iterations = opts.nr_iterations; let db = db.clone(); let schema = schema.clone(); + let log_file = log_file.clone(); let handle = tokio::spawn(async move { let conn = db.connect()?; for _ in 0..nr_iterations { let sql = generate_random_statement(&schema); - println!("{}", sql); + { + let mut file = log_file.lock().unwrap(); + writeln!(file, "{}", sql)?; + } if let Err(e) = conn.execute(&sql, ()).await { println!("Error: {}", e); } @@ -336,6 +350,6 @@ async fn main() -> Result<(), Box> { for handle in handles { handle.await??; } - println!("Done."); + println!("Done. SQL statements written to {}", opts.log_file); Ok(()) } diff --git a/stress/opts.rs b/stress/opts.rs index da00e1a00..e7799d29a 100644 --- a/stress/opts.rs +++ b/stress/opts.rs @@ -4,8 +4,11 @@ use clap::{command, Parser}; #[command(name = "limbo_stress")] #[command(author, version, about, long_about = None)] pub struct Opts { + /// Number of threads to run #[clap(short = 't', long, help = "the number of threads", default_value_t = 8)] pub nr_threads: usize, + + /// Number of iterations per thread #[clap( short = 'i', long, @@ -13,4 +16,13 @@ pub struct Opts { default_value_t = 100000 )] pub nr_iterations: usize, + + /// Log file for SQL statements + #[clap( + short = 'l', + long, + help = "log file for SQL statements", + default_value = "limbostress.log" + )] + pub log_file: String, } From 441cd637b5f71384bc0ebbd610482dd356425d33 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 10 Apr 2025 11:58:52 +0300 Subject: [PATCH 147/425] stress: Make database file configurable --- stress/main.rs | 3 ++- stress/opts.rs | 9 +++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/stress/main.rs b/stress/main.rs index 6180faf30..c0c42449d 100644 --- a/stress/main.rs +++ b/stress/main.rs @@ -315,7 +315,7 @@ async fn main() -> Result<(), Box> { let mut handles = Vec::with_capacity(opts.nr_threads); for _ in 0..opts.nr_threads { - let db = Arc::new(Builder::new_local(":memory:").build().await?); + let db = Arc::new(Builder::new_local(&opts.db_file).build().await?); let conn = db.connect()?; // Apply each DDL statement individually @@ -351,5 +351,6 @@ async fn main() -> Result<(), Box> { handle.await??; } println!("Done. SQL statements written to {}", opts.log_file); + println!("Database file: {}", opts.db_file); Ok(()) } diff --git a/stress/opts.rs b/stress/opts.rs index e7799d29a..3084431c5 100644 --- a/stress/opts.rs +++ b/stress/opts.rs @@ -25,4 +25,13 @@ pub struct Opts { default_value = "limbostress.log" )] pub log_file: String, + + /// Database file + #[clap( + short = 'd', + long, + help = "database file", + default_value = "limbostress.db" + )] + pub db_file: String, } From 3fd378cf9fbb8ebda59240a2830e15a28773b522 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 10 Apr 2025 12:04:49 +0300 Subject: [PATCH 148/425] Fix Antithesis Dockerfile to include JavaScript bindings --- Dockerfile.antithesis | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile.antithesis b/Dockerfile.antithesis index 056ad0947..1f4f3ba10 100644 --- a/Dockerfile.antithesis +++ b/Dockerfile.antithesis @@ -14,6 +14,7 @@ COPY ./Cargo.lock ./Cargo.lock COPY ./Cargo.toml ./Cargo.toml COPY ./bindings/go ./bindings/go/ COPY ./bindings/java ./bindings/java/ +COPY ./bindings/javascript ./bindings/javascript/ COPY ./bindings/python ./bindings/python/ COPY ./bindings/rust ./bindings/rust/ COPY ./bindings/wasm ./bindings/wasm/ From 53633e8b6ffd1e902863c54f8cdb4ce535f2ab3a Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 10 Apr 2025 13:14:38 +0300 Subject: [PATCH 149/425] core/btree: Add PageContent::new() helper --- core/storage/btree.rs | 9 ++++----- core/storage/pager.rs | 6 +----- core/storage/sqlite3_ondisk.rs | 14 +++++++++----- core/storage/wal.rs | 9 ++++----- 4 files changed, 18 insertions(+), 20 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 8f4afb090..361534c5c 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -4893,14 +4893,13 @@ mod tests { let page = Arc::new(Page::new(id)); let drop_fn = Rc::new(|_| {}); - let inner = PageContent { - offset: 0, - buffer: Arc::new(RefCell::new(Buffer::new( + let inner = PageContent::new( + 0, + Arc::new(RefCell::new(Buffer::new( BufferData::new(vec![0; 4096]), drop_fn, ))), - overflow_cells: Vec::new(), - }; + ); page.get().contents.replace(inner); btree_init_page(&page, PageType::TableLeaf, 0, 4096); diff --git a/core/storage/pager.rs b/core/storage/pager.rs index 70af1c8d2..9d6d90c00 100644 --- a/core/storage/pager.rs +++ b/core/storage/pager.rs @@ -637,11 +637,7 @@ pub fn allocate_page(page_id: usize, buffer_pool: &Rc, offset: usize }); let buffer = Arc::new(RefCell::new(Buffer::new(buffer, drop_fn))); page.set_loaded(); - page.get().contents = Some(PageContent { - offset, - buffer, - overflow_cells: Vec::new(), - }); + page.get().contents = Some(PageContent::new(offset, buffer)); } page } diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index b8373514f..10251ca51 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -413,6 +413,14 @@ impl Clone for PageContent { } impl PageContent { + pub fn new(offset: usize, buffer: Arc>) -> Self { + Self { + offset, + buffer, + overflow_cells: Vec::new(), + } + } + pub fn page_type(&self) -> PageType { self.read_u8(0).try_into().unwrap() } @@ -741,11 +749,7 @@ fn finish_read_page( } else { 0 }; - let inner = PageContent { - offset: pos, - buffer: buffer_ref.clone(), - overflow_cells: Vec::new(), - }; + let inner = PageContent::new(pos, buffer_ref.clone()); { page.get().contents.replace(inner); page.set_uptodate(); diff --git a/core/storage/wal.rs b/core/storage/wal.rs index b56246a78..2d1f17776 100644 --- a/core/storage/wal.rs +++ b/core/storage/wal.rs @@ -698,11 +698,10 @@ impl WalFile { let drop_fn = Rc::new(move |buf| { buffer_pool.put(buf); }); - checkpoint_page.get().contents = Some(PageContent { - offset: 0, - buffer: Arc::new(RefCell::new(Buffer::new(buffer, drop_fn))), - overflow_cells: Vec::new(), - }); + checkpoint_page.get().contents = Some(PageContent::new( + 0, + Arc::new(RefCell::new(Buffer::new(buffer, drop_fn))), + )); } Self { io, From 60a13c129fdcc9ea02197ad44f901abe395040f6 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Thu, 10 Apr 2025 13:28:53 +0300 Subject: [PATCH 150/425] io/linux: make syscallio the default (io_uring is really slow) --- cli/input.rs | 2 +- core/ext/mod.rs | 4 ++-- core/io/mod.rs | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cli/input.rs b/cli/input.rs index e352899c9..eac5312dc 100644 --- a/cli/input.rs +++ b/cli/input.rs @@ -43,7 +43,7 @@ impl Default for Io { true => { #[cfg(all(target_os = "linux", feature = "io_uring"))] { - Io::IoUring + Io::Syscall // FIXME: make io_uring faster so it can be the default } #[cfg(any( not(target_os = "linux"), diff --git a/core/ext/mod.rs b/core/ext/mod.rs index 270bee682..939fe3e05 100644 --- a/core/ext/mod.rs +++ b/core/ext/mod.rs @@ -89,12 +89,12 @@ impl Database { path: &str, vfs: &str, ) -> crate::Result<(Arc, Arc)> { - use crate::{MemoryIO, PlatformIO}; + use crate::{MemoryIO, SyscallIO}; use dynamic::get_vfs_modules; let io: Arc = match vfs { "memory" => Arc::new(MemoryIO::new()), - "syscall" => Arc::new(PlatformIO::new()?), + "syscall" => Arc::new(SyscallIO::new()?), #[cfg(all(target_os = "linux", feature = "io_uring"))] "io_uring" => Arc::new(UringIO::new()?), other => match get_vfs_modules().iter().find(|v| v.0 == vfs) { diff --git a/core/io/mod.rs b/core/io/mod.rs index 1d3223128..1cda42380 100644 --- a/core/io/mod.rs +++ b/core/io/mod.rs @@ -190,7 +190,7 @@ cfg_block! { #[cfg(feature = "fs")] pub use unix::UnixIO; pub use unix::UnixIO as SyscallIO; - pub use io_uring::UringIO as PlatformIO; + pub use unix::UnixIO as PlatformIO; } #[cfg(any(all(target_os = "linux",not(feature = "io_uring")), target_os = "macos"))] { From f795a9e331b4b337e6df6ebcadc25d2b0a36ac61 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Thu, 10 Apr 2025 13:41:10 +0200 Subject: [PATCH 151/425] Add support to load log file with stress test --- stress/main.rs | 120 ++++++++++++++++++++++++++++++++++++++----------- stress/opts.rs | 9 ++++ 2 files changed, 103 insertions(+), 26 deletions(-) diff --git a/stress/main.rs b/stress/main.rs index c0c42449d..cbef2d42e 100644 --- a/stress/main.rs +++ b/stress/main.rs @@ -10,9 +10,16 @@ use opts::Opts; use serde_json::json; use std::collections::HashSet; use std::fs::File; -use std::io::Write; +use std::io::{Read, Write}; use std::sync::{Arc, Mutex}; +pub struct Plan { + pub ddl_statements: Vec, + pub queries_per_thread: Vec>, + pub nr_iterations: usize, + pub nr_threads: usize, +} + /// Represents a column in a SQLite table #[derive(Debug, Clone)] pub struct Column { @@ -157,9 +164,9 @@ impl ArbitrarySchema { col_def }) .collect::>() - .join(",\n"); + .join(","); - format!("CREATE TABLE {} (\n{}\n);", table.name, columns) + format!("CREATE TABLE {} ({});", table.name, columns) }) .collect() } @@ -287,6 +294,76 @@ fn generate_random_statement(schema: &ArbitrarySchema) -> String { } } +fn generate_plan(opts: &Opts) -> Result> { + let schema = gen_schema(); + // Write DDL statements to log file + let mut log_file = File::create(&opts.log_file)?; + let ddl_statements = schema.to_sql(); + let mut plan = Plan { + ddl_statements: vec![], + queries_per_thread: vec![], + nr_iterations: opts.nr_iterations, + nr_threads: opts.nr_threads, + }; + writeln!(log_file, "{}", opts.nr_threads)?; + writeln!(log_file, "{}", opts.nr_iterations)?; + writeln!(log_file, "{}", ddl_statements.len())?; + for stmt in &ddl_statements { + writeln!(log_file, "{}", stmt)?; + } + plan.ddl_statements = ddl_statements; + for _ in 0..opts.nr_threads { + let mut queries = vec![]; + for _ in 0..opts.nr_iterations { + let sql = generate_random_statement(&schema); + writeln!(log_file, "{}", sql)?; + queries.push(sql); + } + } + Ok(plan) +} + +fn read_plan_from_log_file(opts: &Opts) -> Result> { + let mut file = File::open(&opts.log_file)?; + let mut buf = String::new(); + let mut plan = Plan { + ddl_statements: vec![], + queries_per_thread: vec![], + nr_iterations: 0, + nr_threads: 0, + }; + file.read_to_string(&mut buf).unwrap(); + let mut lines = buf.lines(); + plan.nr_threads = lines.next().expect("missing threads").parse().unwrap(); + plan.nr_iterations = lines + .next() + .expect("missing nr_iterations") + .parse() + .unwrap(); + let nr_ddl = lines + .next() + .expect("number of ddl statements") + .parse() + .unwrap(); + for _ in 0..nr_ddl { + plan.ddl_statements + .push(lines.next().expect("expected ddl statement").to_string()); + } + for i in 0..plan.nr_threads { + let mut queries = vec![]; + for _ in 0..plan.nr_iterations { + queries.push( + lines + .next() + .expect(format!("missing query for thread {}", i).as_str()) + .to_string(), + ); + } + plan.queries_per_thread.push(queries); + } + Ok(plan) +} + #[tokio::main] async fn main() -> Result<(), Box> { let (num_nodes, main_id) = (1, "n-001"); @@ -297,29 +374,25 @@ async fn main() -> Result<(), Box> { lifecycle::setup_complete(&startup_data); antithesis_init(); - let schema = gen_schema(); - let ddl_statements = schema.to_sql(); + let mut opts = Opts::parse(); - let opts = Opts::parse(); - let log_file = File::create(&opts.log_file)?; - let log_file = Arc::new(Mutex::new(log_file)); - - // Write DDL statements to log file - { - let mut file = log_file.lock().unwrap(); - for stmt in &ddl_statements { - writeln!(file, "{}", stmt)?; - } - } + let plan = if opts.load_log { + read_plan_from_log_file(&mut opts)? + } else { + generate_plan(&opts)? + }; let mut handles = Vec::with_capacity(opts.nr_threads); + let plan = Arc::new(plan); - for _ in 0..opts.nr_threads { + for thread in 0..opts.nr_threads { let db = Arc::new(Builder::new_local(&opts.db_file).build().await?); + let plan = plan.clone(); let conn = db.connect()?; // Apply each DDL statement individually - for stmt in &ddl_statements { + for stmt in &plan.ddl_statements { + println!("executing ddl {}", stmt); if let Err(e) = conn.execute(stmt, ()).await { println!("Error creating table: {}", e); } @@ -327,17 +400,12 @@ async fn main() -> Result<(), Box> { let nr_iterations = opts.nr_iterations; let db = db.clone(); - let schema = schema.clone(); - let log_file = log_file.clone(); let handle = tokio::spawn(async move { let conn = db.connect()?; - for _ in 0..nr_iterations { - let sql = generate_random_statement(&schema); - { - let mut file = log_file.lock().unwrap(); - writeln!(file, "{}", sql)?; - } + for query_index in 0..nr_iterations { + let sql = &plan.queries_per_thread[thread][query_index]; + println!("executing: {}", sql); if let Err(e) = conn.execute(&sql, ()).await { println!("Error: {}", e); } diff --git a/stress/opts.rs b/stress/opts.rs index 3084431c5..a8cbb5b2a 100644 --- a/stress/opts.rs +++ b/stress/opts.rs @@ -26,6 +26,15 @@ pub struct Opts { )] pub log_file: String, + /// Load log file instead of creating a new one + #[clap( + short = 'L', + long = "load-log", + help = "load log file instead of creating a new one", + default_value_t = false + )] + pub load_log: bool, + /// Database file #[clap( short = 'd', From cdcbcafbdda681a4d2a978eb3264e166f2fcb855 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Thu, 10 Apr 2025 13:46:40 +0200 Subject: [PATCH 152/425] clipppy --- stress/main.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/stress/main.rs b/stress/main.rs index cbef2d42e..bd687a231 100644 --- a/stress/main.rs +++ b/stress/main.rs @@ -11,7 +11,7 @@ use serde_json::json; use std::collections::HashSet; use std::fs::File; use std::io::{Read, Write}; -use std::sync::{Arc, Mutex}; +use std::sync::Arc; pub struct Plan { pub ddl_statements: Vec, @@ -349,13 +349,13 @@ fn read_plan_from_log_file(opts: &Opts) -> Result Date: Thu, 10 Apr 2025 15:06:18 +0300 Subject: [PATCH 153/425] test/fuzz: modify compound index scan fuzz to utilize both pk columns in where clause --- tests/integration/fuzz/mod.rs | 125 ++++++++++++++++++++++------------ 1 file changed, 83 insertions(+), 42 deletions(-) diff --git a/tests/integration/fuzz/mod.rs b/tests/integration/fuzz/mod.rs index 5df3b49b4..73263e5f1 100644 --- a/tests/integration/fuzz/mod.rs +++ b/tests/integration/fuzz/mod.rs @@ -232,12 +232,8 @@ mod tests { const COMPARISONS: [&str; 5] = ["=", "<", "<=", ">", ">="]; - const ORDER_BY: [Option<&str>; 4] = [ - None, - Some("ORDER BY x"), - Some("ORDER BY x DESC"), - Some("ORDER BY x ASC"), - ]; + const ORDER_BY: [Option<&str>; 3] = [None, Some("ORDER BY x DESC"), Some("ORDER BY x ASC")]; + const SECONDARY_ORDER_BY: [Option<&str>; 3] = [None, Some(", y DESC"), Some(", y ASC")]; let print_dump_on_fail = |insert: &str, seed: u64| { let comment = format!("-- seed: {}; dump for manual debugging:", seed); @@ -252,56 +248,101 @@ mod tests { for comp in COMPARISONS.iter() { for order_by in ORDER_BY.iter() { - for max in 0..=3000 { - // see comment below about ordering and the '=' comparison operator; omitting LIMIT for that reason - // we mainly have LIMIT here for performance reasons but for = we want to get all the rows to ensure - // correctness in the = case - let limit = if *comp == "=" { "" } else { "LIMIT 5" }; + // make it more likely that the full 2-column index is utilized for seeking + let iter_count_per_permutation = if *comp == "=" { 2000 } else { 500 }; + println!( + "fuzzing {} iterations with comp: {:?}, order_by: {:?}", + iter_count_per_permutation, comp, order_by + ); + for _ in 0..iter_count_per_permutation { + let first_col_val = rng.random_range(0..=3000); + let mut limit = "LIMIT 5"; + let mut second_idx_col_cond = "".to_string(); + let mut second_idx_col_comp = "".to_string(); + + // somtetimes include the second index column in the where clause. + // make it more probable when first column has '=' constraint since those queries are usually faster to run + let second_col_prob = if *comp == "=" { 0.7 } else { 0.02 }; + if rng.random_bool(second_col_prob) { + let second_idx_col = rng.random_range(0..3000); + + second_idx_col_comp = + COMPARISONS[rng.random_range(0..COMPARISONS.len())].to_string(); + second_idx_col_cond = + format!(" AND y {} {}", second_idx_col_comp, second_idx_col); + } + + // if the first constraint is =, then half the time, use the second index column in the ORDER BY too + let mut secondary_order_by = None; + let use_secondary_order_by = order_by.is_some() + && *comp == "=" + && second_idx_col_comp != "" + && rng.random_bool(0.5); + let full_order_by = if use_secondary_order_by { + secondary_order_by = + SECONDARY_ORDER_BY[rng.random_range(0..SECONDARY_ORDER_BY.len())]; + if let Some(secondary) = secondary_order_by { + format!("{}{}", order_by.unwrap_or(""), secondary,) + } else { + order_by.unwrap_or("").to_string() + } + } else { + order_by.unwrap_or("").to_string() + }; + + // There are certain cases where SQLite does not bother iterating in reverse order despite the ORDER BY. + // These cases include e.g. + // SELECT * FROM t WHERE x = 3 ORDER BY x DESC + // SELECT * FROM t WHERE x = 3 and y < 100 ORDER BY x DESC + // + // The common thread being that the ORDER BY column is also constrained by an equality predicate, meaning + // that it doesn't semantically matter what the ordering is. + // + // We do not currently replicate this "lazy" behavior, so in these cases we want the full result set and ensure + // that if the result is not exactly equal, then the ordering must be the exact reverse. + let allow_reverse_ordering = { + if *comp != "=" { + false + } else if secondary_order_by.is_some() { + second_idx_col_comp == "=" + } else { + true + } + }; + if allow_reverse_ordering { + // see comment above about ordering and the '=' comparison operator; omitting LIMIT for that reason + // we mainly have LIMIT here for performance reasons but for = we want to get all the rows to ensure + // correctness in the = case + limit = ""; + } let query = format!( - "SELECT * FROM t WHERE x {} {} {} {}", - comp, - max, - order_by.unwrap_or(""), - limit + // e.g. SELECT * FROM t WHERE x = 1 AND y > 2 ORDER BY x DESC LIMIT 5 + "SELECT * FROM t WHERE x {} {} {} {} {}", + comp, first_col_val, second_idx_col_cond, full_order_by, limit, ); - log::trace!("query: {}", query); + log::debug!("query: {}", query); let limbo = limbo_exec_rows(&db, &limbo_conn, &query); let sqlite = sqlite_exec_rows(&sqlite_conn, &query); let is_equal = limbo == sqlite; if !is_equal { - // if the condition is = and the same rows are present but in different order, then we accept that - // e.g. sqlite doesn't bother iterating in reverse order if "WHERE X = 3 ORDER BY X DESC", but we currently do. - if *comp == "=" { + if allow_reverse_ordering { let limbo_row_count = limbo.len(); let sqlite_row_count = sqlite.len(); if limbo_row_count == sqlite_row_count { - for limbo_row in limbo.iter() { - if !sqlite.contains(limbo_row) { - // save insert to file and print the filename for debugging - let error_msg = format!("row not found in sqlite: query: {}, limbo: {:?}, sqlite: {:?}, seed: {}", query, limbo, sqlite, seed); - print_dump_on_fail(&insert, seed); - panic!("{}", error_msg); - } - } - for sqlite_row in sqlite.iter() { - if !limbo.contains(sqlite_row) { - let error_msg = format!("row not found in limbo: query: {}, limbo: {:?}, sqlite: {:?}, seed: {}", query, limbo, sqlite, seed); - print_dump_on_fail(&insert, seed); - panic!("{}", error_msg); - } - } - continue; + let limbo_rev = limbo.iter().cloned().rev().collect::>(); + assert_eq!(limbo_rev, sqlite, "query: {}, limbo: {:?}, sqlite: {:?}, seed: {}, allow_reverse_ordering: {}", query, limbo, sqlite, seed, allow_reverse_ordering); } else { print_dump_on_fail(&insert, seed); - let error_msg = format!("row count mismatch (limbo: {}, sqlite: {}): query: {}, limbo: {:?}, sqlite: {:?}, seed: {}", limbo_row_count, sqlite_row_count, query, limbo, sqlite, seed); + let error_msg = format!("row count mismatch (limbo row count: {}, sqlite row count: {}): query: {}, limbo: {:?}, sqlite: {:?}, seed: {}, allow_reverse_ordering: {}", limbo_row_count, sqlite_row_count, query, limbo, sqlite, seed, allow_reverse_ordering); panic!("{}", error_msg); } + } else { + print_dump_on_fail(&insert, seed); + panic!( + "query: {}, limbo row count: {}, limbo: {:?}, sqlite row count: {}, sqlite: {:?}, seed: {}, allow_reverse_ordering: {}", + query, limbo.len(), limbo, sqlite.len(), sqlite, seed, allow_reverse_ordering + ); } - print_dump_on_fail(&insert, seed); - panic!( - "query: {}, limbo: {:?}, sqlite: {:?}, seed: {}", - query, limbo, sqlite, seed - ); } } } From afad06fb2388c68034f7f0336e52b1ddb8a917ae Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Thu, 10 Apr 2025 15:06:45 +0300 Subject: [PATCH 154/425] vdbe/explain: add key info to Seek/Idx insns --- core/vdbe/explain.rs | 64 ++++++++++++++++---------------------------- 1 file changed, 23 insertions(+), 41 deletions(-) diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index 3d46bc41b..3ce60f5db 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -748,28 +748,28 @@ pub fn insn_to_str( is_index: _, cursor_id, start_reg, - num_regs: _, + num_regs, target_pc, } | Insn::SeekGE { is_index: _, cursor_id, start_reg, - num_regs: _, + num_regs, target_pc, } | Insn::SeekLE { is_index: _, cursor_id, start_reg, - num_regs: _, + num_regs, target_pc, } | Insn::SeekLT { is_index: _, cursor_id, start_reg, - num_regs: _, + num_regs, target_pc, } => ( match insn { @@ -784,7 +784,7 @@ pub fn insn_to_str( *start_reg as i32, OwnedValue::build_text(""), 0, - "".to_string(), + format!("key=[{}..{}]", start_reg, start_reg + num_regs - 1), ), Insn::SeekEnd { cursor_id } => ( "SeekEnd", @@ -822,58 +822,40 @@ pub fn insn_to_str( Insn::IdxGT { cursor_id, start_reg, - num_regs: _, + num_regs, target_pc, - } => ( - "IdxGT", - *cursor_id as i32, - target_pc.to_debug_int(), - *start_reg as i32, - OwnedValue::build_text(""), - 0, - "".to_string(), - ), - Insn::IdxGE { + } + | Insn::IdxGE { cursor_id, start_reg, - num_regs: _, + num_regs, target_pc, - } => ( - "IdxGE", - *cursor_id as i32, - target_pc.to_debug_int(), - *start_reg as i32, - OwnedValue::build_text(""), - 0, - "".to_string(), - ), - Insn::IdxLT { + } + | Insn::IdxLE { cursor_id, start_reg, - num_regs: _, + num_regs, target_pc, - } => ( - "IdxLT", - *cursor_id as i32, - target_pc.to_debug_int(), - *start_reg as i32, - OwnedValue::build_text(""), - 0, - "".to_string(), - ), - Insn::IdxLE { + } + | Insn::IdxLT { cursor_id, start_reg, - num_regs: _, + num_regs, target_pc, } => ( - "IdxLE", + match insn { + Insn::IdxGT { .. } => "IdxGT", + Insn::IdxGE { .. } => "IdxGE", + Insn::IdxLE { .. } => "IdxLE", + Insn::IdxLT { .. } => "IdxLT", + _ => unreachable!(), + }, *cursor_id as i32, target_pc.to_debug_int(), *start_reg as i32, OwnedValue::build_text(""), 0, - "".to_string(), + format!("key=[{}..{}]", start_reg, start_reg + num_regs - 1), ), Insn::DecrJumpZero { reg, target_pc } => ( "DecrJumpZero", From 457bded14da4f8cf87b55cf2543352c6d2cfa0a4 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Thu, 10 Apr 2025 15:09:18 +0300 Subject: [PATCH 155/425] optimizer: refactor optimizer to support multicolumn index scans --- core/translate/emitter.rs | 12 +- core/translate/main_loop.rs | 515 +++++++++++++------------- core/translate/optimizer.rs | 714 +++++++++++++++++++++++++++--------- core/translate/plan.rs | 95 ++++- core/translate/select.rs | 4 +- 5 files changed, 884 insertions(+), 456 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 5049bb738..21e311bba 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -397,10 +397,12 @@ fn emit_delete_insns( let cursor_id = match &table_reference.op { Operation::Scan { .. } => program.resolve_cursor_id(&table_reference.identifier), Operation::Search(search) => match search { - Search::RowidEq { .. } | Search::RowidSearch { .. } => { + Search::RowidEq { .. } | Search::Seek { index: None, .. } => { program.resolve_cursor_id(&table_reference.identifier) } - Search::IndexSearch { index, .. } => program.resolve_cursor_id(&index.name), + Search::Seek { + index: Some(index), .. + } => program.resolve_cursor_id(&index.name), }, _ => return Ok(()), }; @@ -537,12 +539,14 @@ fn emit_update_insns( table_ref.virtual_table().is_some(), ), Operation::Search(search) => match search { - &Search::RowidEq { .. } | Search::RowidSearch { .. } => ( + &Search::RowidEq { .. } | Search::Seek { index: None, .. } => ( program.resolve_cursor_id(&table_ref.identifier), None, false, ), - Search::IndexSearch { index, .. } => ( + Search::Seek { + index: Some(index), .. + } => ( program.resolve_cursor_id(&table_ref.identifier), Some((index.clone(), program.resolve_cursor_id(&index.name))), false, diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 51bd05382..8409e31c9 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -1,8 +1,7 @@ -use limbo_sqlite3_parser::ast; - use crate::{ schema::Table, translate::result_row::emit_select_result, + types::SeekOp, vdbe::{ builder::{CursorType, ProgramBuilder}, insn::{CmpInsFlags, Insn}, @@ -18,8 +17,8 @@ use super::{ group_by::is_column_in_group_by, order_by::{order_by_sorter_insert, sorter_insert}, plan::{ - IterationDirection, Operation, Search, SelectPlan, SelectQueryType, TableReference, - WhereTerm, + IterationDirection, Operation, Search, SeekDef, SelectPlan, SelectQueryType, + TableReference, WhereTerm, }, }; @@ -166,7 +165,10 @@ pub fn init_loop( } } - if let Search::IndexSearch { index, .. } = search { + if let Search::Seek { + index: Some(index), .. + } = search + { let index_cursor_id = program.alloc_cursor_id( Some(index.name.clone()), CursorType::BTreeIndex(index.clone()), @@ -381,268 +383,42 @@ pub fn open_loop( }); } else { // Otherwise, it's an index/rowid scan, i.e. first a seek is performed and then a scan until the comparison expression is not satisfied anymore. - let index_cursor_id = if let Search::IndexSearch { index, .. } = search { + let index_cursor_id = if let Search::Seek { + index: Some(index), .. + } = search + { Some(program.resolve_cursor_id(&index.name)) } else { None }; - let (cmp_expr, cmp_op, iter_dir) = match search { - Search::IndexSearch { - cmp_expr, - cmp_op, - iter_dir, - .. - } => (cmp_expr, cmp_op, iter_dir), - Search::RowidSearch { - cmp_expr, - cmp_op, - iter_dir, - } => (cmp_expr, cmp_op, iter_dir), - Search::RowidEq { .. } => unreachable!(), + let is_index = index_cursor_id.is_some(); + let seek_cursor_id = index_cursor_id.unwrap_or(table_cursor_id); + let Search::Seek { seek_def, .. } = search else { + unreachable!("Rowid equality point lookup should have been handled above"); }; - // There are a few steps in an index seek: - // 1. Emit the comparison expression for the rowid/index seek. For example, if we a clause 'WHERE index_key >= 10', we emit the comparison expression 10 into cmp_reg. - // - // 2. Emit the seek instruction. SeekGE and SeekGT are used in forwards iteration, SeekLT and SeekLE are used in backwards iteration. - // All of the examples below assume an ascending index, because we do not support descending indexes yet. - // If we are scanning the ascending index: - // - Forwards, and have a GT/GE/EQ comparison, the comparison expression from step 1 is used as the value to seek to, because that is the lowest possible value that satisfies the clause. - // - Forwards, and have a LT/LE comparison, NULL is used as the comparison expression because we actually want to start scanning from the beginning of the index. - // - Backwards, and have a GT/GE comparison, no Seek instruction is emitted and we emit LastAsync instead, because we want to start scanning from the end of the index. - // - Backwards, and have a LT/LE/EQ comparison, we emit a Seek instruction with the comparison expression from step 1 as the value to seek to, since that is the highest possible - // value that satisfies the clause. - let seek_cmp_reg = program.alloc_register(); - let mut comparison_expr_translated = false; - match (cmp_op, iter_dir) { - // Forwards, GT/GE/EQ -> use the comparison expression (i.e. seek to the first key where the cmp expr is satisfied, and then scan forwards) - ( - ast::Operator::Equals - | ast::Operator::Greater - | ast::Operator::GreaterEquals, - IterationDirection::Forwards, - ) => { - translate_expr( - program, - Some(tables), - &cmp_expr.expr, - seek_cmp_reg, - &t_ctx.resolver, - )?; - comparison_expr_translated = true; - match cmp_op { - ast::Operator::Equals | ast::Operator::GreaterEquals => { - program.emit_insn(Insn::SeekGE { - is_index: index_cursor_id.is_some(), - cursor_id: index_cursor_id.unwrap_or(table_cursor_id), - start_reg: seek_cmp_reg, - num_regs: 1, - target_pc: loop_end, - }); - } - ast::Operator::Greater => { - program.emit_insn(Insn::SeekGT { - is_index: index_cursor_id.is_some(), - cursor_id: index_cursor_id.unwrap_or(table_cursor_id), - start_reg: seek_cmp_reg, - num_regs: 1, - target_pc: loop_end, - }); - } - _ => unreachable!(), - } - } - // Forwards, LT/LE -> use NULL (i.e. start from the beginning of the index) - ( - ast::Operator::Less | ast::Operator::LessEquals, - IterationDirection::Forwards, - ) => { - program.emit_insn(Insn::Null { - dest: seek_cmp_reg, - dest_end: None, - }); - program.emit_insn(Insn::SeekGT { - is_index: index_cursor_id.is_some(), - cursor_id: index_cursor_id.unwrap_or(table_cursor_id), - start_reg: seek_cmp_reg, - num_regs: 1, - target_pc: loop_end, - }); - } - // Backwards, GT/GE -> no seek, emit LastAsync (i.e. start from the end of the index) - ( - ast::Operator::Greater | ast::Operator::GreaterEquals, - IterationDirection::Backwards, - ) => { - program.emit_insn(Insn::LastAsync { - cursor_id: index_cursor_id.unwrap_or(table_cursor_id), - }); - program.emit_insn(Insn::LastAwait { - cursor_id: index_cursor_id.unwrap_or(table_cursor_id), - pc_if_empty: loop_end, - }); - } - // Backwards, LT/LE/EQ -> use the comparison expression (i.e. seek from the end of the index until the cmp expr is satisfied, and then scan backwards) - ( - ast::Operator::Less | ast::Operator::LessEquals | ast::Operator::Equals, - IterationDirection::Backwards, - ) => { - translate_expr( - program, - Some(tables), - &cmp_expr.expr, - seek_cmp_reg, - &t_ctx.resolver, - )?; - comparison_expr_translated = true; - match cmp_op { - ast::Operator::Less => { - program.emit_insn(Insn::SeekLT { - is_index: index_cursor_id.is_some(), - cursor_id: index_cursor_id.unwrap_or(table_cursor_id), - start_reg: seek_cmp_reg, - num_regs: 1, - target_pc: loop_end, - }); - } - ast::Operator::LessEquals | ast::Operator::Equals => { - program.emit_insn(Insn::SeekLE { - is_index: index_cursor_id.is_some(), - cursor_id: index_cursor_id.unwrap_or(table_cursor_id), - start_reg: seek_cmp_reg, - num_regs: 1, - target_pc: loop_end, - }); - } - _ => unreachable!(), - } - } - _ => unreachable!(), - }; - - program.resolve_label(loop_start, program.offset()); - - let scan_terminating_cmp_reg = if comparison_expr_translated { - seek_cmp_reg - } else { - let reg = program.alloc_register(); - translate_expr( - program, - Some(tables), - &cmp_expr.expr, - reg, - &t_ctx.resolver, - )?; - reg - }; - - // 3. Emit a scan-terminating comparison instruction (IdxGT, IdxGE, IdxLT, IdxLE if index; GT, GE, LT, LE if btree rowid scan). - // Here the comparison expression from step 1 is compared to the current index key and the loop is exited if the comparison is true. - // The comparison operator used in the Idx__ instruction is the inverse of the WHERE clause comparison operator. - // For example, if we are scanning forwards and have a clause 'WHERE index_key < 10', we emit IdxGE(10) since >=10 is the first key where our condition is not satisfied anymore. - match (cmp_op, iter_dir) { - // Forwards, <= -> terminate if > - ( - ast::Operator::Equals | ast::Operator::LessEquals, - IterationDirection::Forwards, - ) => { - if let Some(index_cursor_id) = index_cursor_id { - program.emit_insn(Insn::IdxGT { - cursor_id: index_cursor_id, - start_reg: scan_terminating_cmp_reg, - num_regs: 1, - target_pc: loop_end, - }); - } else { - let rowid_reg = program.alloc_register(); - program.emit_insn(Insn::RowId { - cursor_id: table_cursor_id, - dest: rowid_reg, - }); - program.emit_insn(Insn::Gt { - lhs: rowid_reg, - rhs: scan_terminating_cmp_reg, - target_pc: loop_end, - flags: CmpInsFlags::default(), - }); - } - } - // Forwards, < -> terminate if >= - (ast::Operator::Less, IterationDirection::Forwards) => { - if let Some(index_cursor_id) = index_cursor_id { - program.emit_insn(Insn::IdxGE { - cursor_id: index_cursor_id, - start_reg: scan_terminating_cmp_reg, - num_regs: 1, - target_pc: loop_end, - }); - } else { - let rowid_reg = program.alloc_register(); - program.emit_insn(Insn::RowId { - cursor_id: table_cursor_id, - dest: rowid_reg, - }); - program.emit_insn(Insn::Ge { - lhs: rowid_reg, - rhs: scan_terminating_cmp_reg, - target_pc: loop_end, - flags: CmpInsFlags::default(), - }); - } - } - // Backwards, >= -> terminate if < - ( - ast::Operator::Equals | ast::Operator::GreaterEquals, - IterationDirection::Backwards, - ) => { - if let Some(index_cursor_id) = index_cursor_id { - program.emit_insn(Insn::IdxLT { - cursor_id: index_cursor_id, - start_reg: scan_terminating_cmp_reg, - num_regs: 1, - target_pc: loop_end, - }); - } else { - let rowid_reg = program.alloc_register(); - program.emit_insn(Insn::RowId { - cursor_id: table_cursor_id, - dest: rowid_reg, - }); - program.emit_insn(Insn::Lt { - lhs: rowid_reg, - rhs: scan_terminating_cmp_reg, - target_pc: loop_end, - flags: CmpInsFlags::default(), - }); - } - } - // Backwards, > -> terminate if <= - (ast::Operator::Greater, IterationDirection::Backwards) => { - if let Some(index_cursor_id) = index_cursor_id { - program.emit_insn(Insn::IdxLE { - cursor_id: index_cursor_id, - start_reg: scan_terminating_cmp_reg, - num_regs: 1, - target_pc: loop_end, - }); - } else { - let rowid_reg = program.alloc_register(); - program.emit_insn(Insn::RowId { - cursor_id: table_cursor_id, - dest: rowid_reg, - }); - program.emit_insn(Insn::Le { - lhs: rowid_reg, - rhs: scan_terminating_cmp_reg, - target_pc: loop_end, - flags: CmpInsFlags::default(), - }); - } - } - // Forwards, > and >= -> we already did a seek to the first key where the cmp expr is satisfied, so we dont have a terminating condition - // Backwards, < and <= -> we already did a seek to the last key where the cmp expr is satisfied, so we dont have a terminating condition - _ => {} - } + let start_reg = program.alloc_registers(seek_def.key.len()); + emit_seek( + program, + tables, + seek_def, + t_ctx, + seek_cursor_id, + start_reg, + loop_end, + is_index, + )?; + emit_seek_termination( + program, + tables, + seek_def, + t_ctx, + seek_cursor_id, + start_reg, + loop_start, + loop_end, + is_index, + )?; if let Some(index_cursor_id) = index_cursor_id { // Don't do a btree table seek until it's actually necessary to read from the table. @@ -1002,12 +778,19 @@ pub fn close_loop( // Rowid equality point lookups are handled with a SeekRowid instruction which does not loop, so there is no need to emit a NextAsync instruction. if !matches!(search, Search::RowidEq { .. }) { let (cursor_id, iter_dir) = match search { - Search::IndexSearch { - index, iter_dir, .. - } => (program.resolve_cursor_id(&index.name), *iter_dir), - Search::RowidSearch { iter_dir, .. } => { - (program.resolve_cursor_id(&table.identifier), *iter_dir) - } + Search::Seek { + index: Some(index), + seek_def, + .. + } => (program.resolve_cursor_id(&index.name), seek_def.iter_dir), + Search::Seek { + index: None, + seek_def, + .. + } => ( + program.resolve_cursor_id(&table.identifier), + seek_def.iter_dir, + ), Search::RowidEq { .. } => unreachable!(), }; @@ -1074,3 +857,201 @@ pub fn close_loop( } Ok(()) } + +/// Emits instructions for an index seek. See e.g. [crate::translate::plan::SeekDef] +/// for more details about the seek definition. +/// +/// Index seeks always position the cursor to the first row that matches the seek key, +/// and then continue to emit rows until the termination condition is reached, +/// see [emit_seek_termination] below. +/// +/// If either 1. the seek finds no rows or 2. the termination condition is reached, +/// the loop for that given table/index is fully exited. +#[allow(clippy::too_many_arguments)] +fn emit_seek( + program: &mut ProgramBuilder, + tables: &[TableReference], + seek_def: &SeekDef, + t_ctx: &mut TranslateCtx, + seek_cursor_id: usize, + start_reg: usize, + loop_end: BranchOffset, + is_index: bool, +) -> Result<()> { + let Some(seek) = seek_def.seek.as_ref() else { + assert!(seek_def.iter_dir == IterationDirection::Backwards, "A SeekDef without a seek operation should only be used in backwards iteration direction"); + program.emit_insn(Insn::LastAsync { + cursor_id: seek_cursor_id, + }); + program.emit_insn(Insn::LastAwait { + cursor_id: seek_cursor_id, + pc_if_empty: loop_end, + }); + return Ok(()); + }; + // We allocated registers for the full index key, but our seek key might not use the full index key. + // Later on for the termination condition we will overwrite the NULL registers. + // See [crate::translate::optimizer::build_seek_def] for more details about in which cases we do and don't use the full index key. + for i in 0..seek_def.key.len() { + let reg = start_reg + i; + if i >= seek.len { + if seek_def.null_pad_unset_cols() { + program.emit_insn(Insn::Null { + dest: reg, + dest_end: None, + }); + } + } else { + translate_expr( + program, + Some(tables), + &seek_def.key[i], + reg, + &t_ctx.resolver, + )?; + } + } + let num_regs = if seek_def.null_pad_unset_cols() { + seek_def.key.len() + } else { + seek.len + }; + match seek.op { + SeekOp::GE => program.emit_insn(Insn::SeekGE { + is_index, + cursor_id: seek_cursor_id, + start_reg, + num_regs, + target_pc: loop_end, + }), + SeekOp::GT => program.emit_insn(Insn::SeekGT { + is_index, + cursor_id: seek_cursor_id, + start_reg, + num_regs, + target_pc: loop_end, + }), + SeekOp::LE => program.emit_insn(Insn::SeekLE { + is_index, + cursor_id: seek_cursor_id, + start_reg, + num_regs, + target_pc: loop_end, + }), + SeekOp::LT => program.emit_insn(Insn::SeekLT { + is_index, + cursor_id: seek_cursor_id, + start_reg, + num_regs, + target_pc: loop_end, + }), + SeekOp::EQ => panic!("An index seek is never EQ"), + }; + + Ok(()) +} + +/// Emits instructions for an index seek termination. See e.g. [crate::translate::plan::SeekDef] +/// for more details about the seek definition. +/// +/// Index seeks always position the cursor to the first row that matches the seek key +/// (see [emit_seek] above), and then continue to emit rows until the termination condition +/// (if any) is reached. +/// +/// If the termination condition is not present, the cursor is fully scanned to the end. +#[allow(clippy::too_many_arguments)] +fn emit_seek_termination( + program: &mut ProgramBuilder, + tables: &[TableReference], + seek_def: &SeekDef, + t_ctx: &mut TranslateCtx, + seek_cursor_id: usize, + start_reg: usize, + loop_start: BranchOffset, + loop_end: BranchOffset, + is_index: bool, +) -> Result<()> { + let Some(termination) = seek_def.termination.as_ref() else { + program.resolve_label(loop_start, program.offset()); + return Ok(()); + }; + let num_regs = termination.len; + // If the seek termination was preceded by a seek (which happens in most cases), + // we can re-use the registers that were allocated for the full index key. + let start_idx = seek_def.seek.as_ref().map_or(0, |seek| seek.len); + for i in start_idx..termination.len { + let reg = start_reg + i; + translate_expr( + program, + Some(tables), + &seek_def.key[i], + reg, + &t_ctx.resolver, + )?; + } + program.resolve_label(loop_start, program.offset()); + let mut rowid_reg = None; + if !is_index { + rowid_reg = Some(program.alloc_register()); + program.emit_insn(Insn::RowId { + cursor_id: seek_cursor_id, + dest: rowid_reg.unwrap(), + }); + } + + match (is_index, termination.op) { + (true, SeekOp::GE) => program.emit_insn(Insn::IdxGE { + cursor_id: seek_cursor_id, + start_reg, + num_regs, + target_pc: loop_end, + }), + (true, SeekOp::GT) => program.emit_insn(Insn::IdxGT { + cursor_id: seek_cursor_id, + start_reg, + num_regs, + target_pc: loop_end, + }), + (true, SeekOp::LE) => program.emit_insn(Insn::IdxLE { + cursor_id: seek_cursor_id, + start_reg, + num_regs, + target_pc: loop_end, + }), + (true, SeekOp::LT) => program.emit_insn(Insn::IdxLT { + cursor_id: seek_cursor_id, + start_reg, + num_regs, + target_pc: loop_end, + }), + (false, SeekOp::GE) => program.emit_insn(Insn::Ge { + lhs: rowid_reg.unwrap(), + rhs: start_reg, + target_pc: loop_end, + flags: CmpInsFlags::default(), + }), + (false, SeekOp::GT) => program.emit_insn(Insn::Gt { + lhs: rowid_reg.unwrap(), + rhs: start_reg, + target_pc: loop_end, + flags: CmpInsFlags::default(), + }), + (false, SeekOp::LE) => program.emit_insn(Insn::Le { + lhs: rowid_reg.unwrap(), + rhs: start_reg, + target_pc: loop_end, + flags: CmpInsFlags::default(), + }), + (false, SeekOp::LT) => program.emit_insn(Insn::Lt { + lhs: rowid_reg.unwrap(), + rhs: start_reg, + target_pc: loop_end, + flags: CmpInsFlags::default(), + }), + (_, SeekOp::EQ) => { + panic!("An index termination condition is never EQ") + } + }; + + Ok(()) +} diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 772ed81e7..3483cd913 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -4,13 +4,15 @@ use limbo_sqlite3_parser::ast::{self, Expr, SortOrder}; use crate::{ schema::{Index, Schema}, + translate::plan::TerminationKey, + types::SeekOp, util::exprs_are_equivalent, Result, }; use super::plan::{ - DeletePlan, Direction, GroupBy, IterationDirection, Operation, Plan, Search, SelectPlan, - TableReference, UpdatePlan, WhereTerm, + DeletePlan, Direction, GroupBy, IterationDirection, Operation, Plan, Search, SeekDef, SeekKey, + SelectPlan, TableReference, UpdatePlan, WhereTerm, }; pub fn optimize_plan(plan: &mut Plan, schema: &Schema) -> Result<()> { @@ -296,24 +298,57 @@ fn use_indexes( ) -> Result<()> { // Try to use indexes for eliminating ORDER BY clauses eliminate_unnecessary_orderby(table_references, available_indexes, order_by, group_by)?; + // Try to use indexes for WHERE conditions - 'outer: for (table_index, table_reference) in table_references.iter_mut().enumerate() { - if let Operation::Scan { iter_dir, .. } = &table_reference.op { - let mut i = 0; - while i < where_clause.len() { - let cond = where_clause.get_mut(i).unwrap(); - if let Some(index_search) = try_extract_index_search_expression( - cond, - table_index, - table_reference, - available_indexes, - *iter_dir, - )? { - where_clause.remove(i); - table_reference.op = Operation::Search(index_search); - continue 'outer; + for (table_index, table_reference) in table_references.iter_mut().enumerate() { + if let Operation::Scan { + iter_dir, index, .. + } = &table_reference.op + { + match index { + // If we decided to eliminate ORDER BY using an index, let's constrain our search to only that index + Some(index) => { + let available_indexes = available_indexes + .values() + .flatten() + .filter(|i| i.name == index.name) + .cloned() + .collect::>(); + if let Some(search) = try_extract_index_search_from_where_clause( + where_clause, + table_index, + table_reference, + &available_indexes, + *iter_dir, + )? { + table_reference.op = Operation::Search(search); + } + } + None => { + let table_name = table_reference.table.get_name(); + + // If we can utilize the rowid alias of the table, let's preferentially always use it for now. + for (i, term) in where_clause.iter_mut().enumerate() { + if let Some(search) = + try_extract_rowid_search_expression(term, table_index, *iter_dir)? + { + where_clause.remove(i); + table_reference.op = Operation::Search(search); + return Ok(()); + } + } + if let Some(indexes) = available_indexes.get(table_name) { + if let Some(search) = try_extract_index_search_from_where_clause( + where_clause, + table_index, + table_reference, + indexes, + *iter_dir, + )? { + table_reference.op = Operation::Search(search); + } + } } - i += 1; } } } @@ -431,12 +466,6 @@ pub trait Optimizable { .map_or(false, |c| c == ConstantPredicate::AlwaysFalse)) } fn is_rowid_alias_of(&self, table_index: usize) -> bool; - fn check_index_scan( - &mut self, - table_index: usize, - table_reference: &TableReference, - available_indexes: &HashMap>>, - ) -> Result>>; } impl Optimizable for ast::Expr { @@ -450,79 +479,6 @@ impl Optimizable for ast::Expr { _ => false, } } - fn check_index_scan( - &mut self, - table_index: usize, - table_reference: &TableReference, - available_indexes: &HashMap>>, - ) -> Result>> { - match self { - Self::Column { table, column, .. } => { - if *table != table_index { - return Ok(None); - } - let Some(available_indexes_for_table) = - available_indexes.get(table_reference.table.get_name()) - else { - return Ok(None); - }; - let Some(column) = table_reference.table.get_column_at(*column) else { - return Ok(None); - }; - for index in available_indexes_for_table.iter() { - if let Some(name) = column.name.as_ref() { - if &index.columns.first().unwrap().name == name { - return Ok(Some(index.clone())); - } - } - } - Ok(None) - } - Self::Binary(lhs, op, rhs) => { - // Only consider index scans for binary ops that are comparisons. - // e.g. "t1.id = t2.id" is a valid index scan, but "t1.id + 1" is not. - // - // TODO/optimization: consider detecting index scan on e.g. table t1 in - // "WHERE t1.id + 1 = t2.id" - // here the Expr could be rewritten to "t1.id = t2.id - 1" - // and then t1.id could be used as an index key. - if !matches!( - *op, - ast::Operator::Equals - | ast::Operator::Greater - | ast::Operator::GreaterEquals - | ast::Operator::Less - | ast::Operator::LessEquals - ) { - return Ok(None); - } - let lhs_index = - lhs.check_index_scan(table_index, &table_reference, available_indexes)?; - if lhs_index.is_some() { - return Ok(lhs_index); - } - let rhs_index = - rhs.check_index_scan(table_index, &table_reference, available_indexes)?; - if rhs_index.is_some() { - // swap lhs and rhs - let swapped_operator = match *op { - ast::Operator::Equals => ast::Operator::Equals, - ast::Operator::Greater => ast::Operator::Less, - ast::Operator::GreaterEquals => ast::Operator::LessEquals, - ast::Operator::Less => ast::Operator::Greater, - ast::Operator::LessEquals => ast::Operator::GreaterEquals, - _ => unreachable!(), - }; - let lhs_new = rhs.take_ownership(); - let rhs_new = lhs.take_ownership(); - *self = Self::Binary(Box::new(lhs_new), swapped_operator, Box::new(rhs_new)); - return Ok(rhs_index); - } - Ok(None) - } - _ => Ok(None), - } - } fn check_constant(&self) -> Result> { match self { Self::Literal(lit) => match lit { @@ -652,11 +608,494 @@ fn opposite_cmp_op(op: ast::Operator) -> ast::Operator { } } -pub fn try_extract_index_search_expression( - cond: &mut WhereTerm, +/// Struct used for scoring index scans +/// Currently we just score by the number of index columns that can be utilized +/// in the scan, i.e. no statistics are used. +struct IndexScore { + index: Option>, + score: usize, + constraints: Vec, +} + +/// Try to extract an index search from the WHERE clause +/// Returns an optional [Search] struct if an index search can be extracted, otherwise returns None. +pub fn try_extract_index_search_from_where_clause( + where_clause: &mut Vec, table_index: usize, table_reference: &TableReference, - available_indexes: &HashMap>>, + table_indexes: &[Arc], + iter_dir: IterationDirection, +) -> Result> { + // If there are no WHERE terms, we can't extract a search + if where_clause.is_empty() { + return Ok(None); + } + // If there are no indexes, we can't extract a search + if table_indexes.is_empty() { + return Ok(None); + } + + // Find all potential index constraints + // For WHERE terms to be used to constrain an index scan, they must: + // 1. refer to columns in the table that the index is on + // 2. be a binary comparison expression + // 3. constrain the index columns in the order that they appear in the index + // - e.g. if the index is on (a,b,c) then we can use all of "a = 1 AND b = 2 AND c = 3" to constrain the index scan, + // - but if the where clause is "a = 1 and c = 3" then we can only use "a = 1". + let mut constraints_cur = vec![]; + let mut best_index = IndexScore { + index: None, + score: 0, + constraints: vec![], + }; + + for index in table_indexes { + // Check how many terms in the where clause constrain the index in column order + find_index_constraints( + where_clause, + table_index, + table_reference, + index, + &mut constraints_cur, + )?; + // naive scoring since we don't have statistics: prefer the index where we can use the most columns + // e.g. if we can use all columns of an index on (a,b), it's better than an index of (c,d,e) where we can only use c. + let score = constraints_cur.len(); + if score > best_index.score { + best_index.index = Some(Arc::clone(index)); + best_index.score = score; + best_index.constraints.clear(); + best_index.constraints.append(&mut constraints_cur); + } + } + + if best_index.index.is_none() { + return Ok(None); + } + + // Build the seek definition + let seek_def = + build_seek_def_from_index_constraints(&best_index.constraints, iter_dir, where_clause)?; + + // Remove the used terms from the where_clause since they are now part of the seek definition + // Sort terms by position in descending order to avoid shifting indices during removal + best_index.constraints.sort_by(|a, b| { + b.position_in_where_clause + .0 + .cmp(&a.position_in_where_clause.0) + }); + + for constraint in best_index.constraints.iter() { + where_clause.remove(constraint.position_in_where_clause.0); + } + + return Ok(Some(Search::Seek { + index: best_index.index, + seek_def, + })); +} + +#[derive(Debug, Clone)] +/// A representation of an expression in a [WhereTerm] that can potentially be used as part of an index seek key. +/// For example, if there is an index on table T(x,y) and another index on table U(z), and the where clause is "WHERE x > 10 AND 20 = z", +/// the index constraints are: +/// - x > 10 ==> IndexConstraint { position_in_where_clause: (0, [BinaryExprSide::Rhs]), operator: [ast::Operator::Greater] } +/// - 20 = z ==> IndexConstraint { position_in_where_clause: (1, [BinaryExprSide::Lhs]), operator: [ast::Operator::Equals] } +pub struct IndexConstraint { + position_in_where_clause: (usize, BinaryExprSide), + operator: ast::Operator, +} + +/// Helper enum for [IndexConstraint] to indicate which side of a binary comparison expression is being compared to the index column. +/// For example, if the where clause is "WHERE x = 10" and there's an index on x, +/// the [IndexConstraint] for the where clause term "x = 10" will have a [BinaryExprSide::Rhs] +/// because the right hand side expression "10" is being compared to the index column "x". +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum BinaryExprSide { + Lhs, + Rhs, +} + +/// Get the position of a column in an index +/// For example, if there is an index on table T(x,y) then y's position in the index is 1. +fn get_column_position_in_index( + expr: &ast::Expr, + table_index: usize, + table_reference: &TableReference, + index: &Arc, +) -> Option { + let ast::Expr::Column { table, column, .. } = expr else { + return None; + }; + if *table != table_index { + return None; + } + let Some(column) = table_reference.table.get_column_at(*column) else { + return None; + }; + index + .columns + .iter() + .position(|col| Some(&col.name) == column.name.as_ref()) +} + +/// Find all [IndexConstraint]s for a given WHERE clause +/// Constraints are appended as long as they constrain the index in column order. +/// E.g. for index (a,b,c) to be fully used, there must be a [WhereTerm] for each of a, b, and c. +/// If e.g. only a and c are present, then only the first column 'a' of the index will be used. +fn find_index_constraints( + where_clause: &mut Vec, + table_index: usize, + table_reference: &TableReference, + index: &Arc, + out_constraints: &mut Vec, +) -> Result<()> { + for position_in_index in 0..index.columns.len() { + let mut found = false; + for (position_in_where_clause, term) in where_clause.iter().enumerate() { + // Skip terms that cannot be evaluated at this table's loop level + if !term.should_eval_at_loop(table_index) { + continue; + } + // Skip terms that are not binary comparisons + let ast::Expr::Binary(lhs, operator, rhs) = &term.expr else { + continue; + }; + // Only consider index scans for binary ops that are comparisons + if !matches!( + *operator, + ast::Operator::Equals + | ast::Operator::Greater + | ast::Operator::GreaterEquals + | ast::Operator::Less + | ast::Operator::LessEquals + ) { + continue; + } + + // Check if lhs is a column that is in the i'th position of the index + if Some(position_in_index) + == get_column_position_in_index(lhs, table_index, table_reference, index) + { + out_constraints.push(IndexConstraint { + operator: *operator, + position_in_where_clause: (position_in_where_clause, BinaryExprSide::Rhs), + }); + found = true; + break; + } + // Check if rhs is a column that is in the i'th position of the index + if Some(position_in_index) + == get_column_position_in_index(rhs, table_index, table_reference, index) + { + out_constraints.push(IndexConstraint { + operator: opposite_cmp_op(*operator), // swap the operator since e.g. if condition is 5 >= x, we want to use x <= 5 + position_in_where_clause: (position_in_where_clause, BinaryExprSide::Lhs), + }); + found = true; + break; + } + } + if !found { + // Expressions must constrain index columns in index definition order. If we didn't find a constraint for the i'th index column, + // then we stop here and return the constraints we have found so far. + break; + } + } + + // In a multicolumn index, only the last term can have a nonequality expression. + // For example, imagine an index on (x,y) and the where clause is "WHERE x > 10 AND y > 20"; + // We can't use GT(x: 10,y: 20) as the seek key, because the first row greater than (x: 10,y: 20) + // might be e.g. (x: 10,y: 21), which does not satisfy the where clause, but a row after that e.g. (x: 11,y: 21) does. + // So: + // - in this case only GT(x: 10) can be used as the seek key, and we must emit a regular condition expression for y > 20 while scanning. + // On the other hand, if the where clause is "WHERE x = 10 AND y > 20", we can use GT(x=10,y=20) as the seek key, + // because any rows where (x=10,y=20) < ROW < (x=11) will match the where clause. + for i in 0..out_constraints.len() { + if out_constraints[i].operator != ast::Operator::Equals { + out_constraints.truncate(i + 1); + break; + } + } + + Ok(()) +} + +/// Build a [SeekDef] for a given list of [IndexConstraint]s +pub fn build_seek_def_from_index_constraints( + constraints: &[IndexConstraint], + iter_dir: IterationDirection, + where_clause: &mut Vec, +) -> Result { + assert!( + !constraints.is_empty(), + "cannot build seek def from empty list of index constraints" + ); + // Extract the key values and operators + let mut key = Vec::with_capacity(constraints.len()); + + for constraint in constraints { + // Extract the other expression from the binary WhereTerm (i.e. the one being compared to the index column) + let (idx, side) = constraint.position_in_where_clause; + let where_term = &mut where_clause[idx]; + let ast::Expr::Binary(lhs, _, rhs) = where_term.expr.take_ownership() else { + crate::bail_parse_error!("expected binary expression"); + }; + let cmp_expr = if side == BinaryExprSide::Lhs { + *lhs + } else { + *rhs + }; + key.push(cmp_expr); + } + + // We know all but potentially the last term is an equality, so we can use the operator of the last term + // to form the SeekOp + let op = constraints.last().unwrap().operator; + + build_seek_def(op, iter_dir, key) +} + +/// Build a [SeekDef] for a given comparison operator and index key. +/// To be usable as a seek key, all but potentially the last term must be equalities. +/// The last term can be a nonequality. +/// The comparison operator referred to by `op` is the operator of the last term. +/// +/// There are two parts to the seek definition: +/// 1. The [SeekKey], which specifies the key that we will use to seek to the first row that matches the index key. +/// 2. The [TerminationKey], which specifies the key that we will use to terminate the index scan that follows the seek. +/// +/// There are some nuances to how, and which parts of, the index key can be used in the [SeekKey] and [TerminationKey], +/// depending on the operator and iteration direction. This function explains those nuances inline when dealing with +/// each case. +/// +/// But to illustrate the general idea, consider the following examples: +/// +/// 1. For example, having two conditions like (x>10 AND y>20) cannot be used as a valid [SeekKey] GT(x:10, y:20) +/// because the first row greater than (x:10, y:20) might be (x:10, y:21), which does not satisfy the where clause. +/// In this case, only GT(x:10) must be used as the [SeekKey], and rows with y <= 20 must be filtered as a regular condition expression for each value of x. +/// +/// 2. In contrast, having (x=10 AND y>20) forms a valid index key GT(x:10, y:20) because after the seek, we can simply terminate as soon as x > 10, +/// i.e. use GT(x:10, y:20) as the [SeekKey] and GT(x:10) as the [TerminationKey]. +/// +fn build_seek_def( + op: ast::Operator, + iter_dir: IterationDirection, + key: Vec, +) -> Result { + let key_len = key.len(); + Ok(match (iter_dir, op) { + // Forwards, EQ: + // Example: (x=10 AND y=20) + // Seek key: GE(x:10, y:20) + // Termination key: GT(x:10, y:20) + (IterationDirection::Forwards, ast::Operator::Equals) => SeekDef { + key, + iter_dir, + seek: Some(SeekKey { + len: key_len, + op: SeekOp::GE, + }), + termination: Some(TerminationKey { + len: key_len, + op: SeekOp::GT, + }), + }, + // Forwards, GT: + // Example: (x=10 AND y>20) + // Seek key: GT(x:10, y:20) + // Termination key: GT(x:10) + (IterationDirection::Forwards, ast::Operator::Greater) => { + let termination_key_len = key_len - 1; + SeekDef { + key, + iter_dir, + seek: Some(SeekKey { + len: key_len, + op: SeekOp::GT, + }), + termination: if termination_key_len > 0 { + Some(TerminationKey { + len: termination_key_len, + op: SeekOp::GT, + }) + } else { + None + }, + } + } + // Forwards, GE: + // Example: (x=10 AND y>=20) + // Seek key: GE(x:10, y:20) + // Termination key: GT(x:10) + (IterationDirection::Forwards, ast::Operator::GreaterEquals) => { + let termination_key_len = key_len - 1; + SeekDef { + key, + iter_dir, + seek: Some(SeekKey { + len: key_len, + op: SeekOp::GE, + }), + termination: if termination_key_len > 0 { + Some(TerminationKey { + len: termination_key_len, + op: SeekOp::GT, + }) + } else { + None + }, + } + } + // Forwards, LT: + // Example: (x=10 AND y<20) + // Seek key: GT(x:10, y: NULL) // NULL is always LT, indicating we only care about x + // Termination key: GE(x:10, y:20) + (IterationDirection::Forwards, ast::Operator::Less) => SeekDef { + key, + iter_dir, + seek: Some(SeekKey { + len: key_len - 1, + op: SeekOp::GT, + }), + termination: Some(TerminationKey { + len: key_len, + op: SeekOp::GE, + }), + }, + // Forwards, LE: + // Example: (x=10 AND y<=20) + // Seek key: GE(x:10, y:NULL) // NULL is always LT, indicating we only care about x + // Termination key: GT(x:10, y:20) + (IterationDirection::Forwards, ast::Operator::LessEquals) => SeekDef { + key, + iter_dir, + seek: Some(SeekKey { + len: key_len - 1, + op: SeekOp::GE, + }), + termination: Some(TerminationKey { + len: key_len, + op: SeekOp::GT, + }), + }, + // Backwards, EQ: + // Example: (x=10 AND y=20) + // Seek key: LE(x:10, y:20) + // Termination key: LT(x:10, y:20) + (IterationDirection::Backwards, ast::Operator::Equals) => SeekDef { + key, + iter_dir, + seek: Some(SeekKey { + len: key_len, + op: SeekOp::LE, + }), + termination: Some(TerminationKey { + len: key_len, + op: SeekOp::LT, + }), + }, + // Backwards, LT: + // Example: (x=10 AND y<20) + // Seek key: LT(x:10, y:20) + // Termination key: LT(x:10) + (IterationDirection::Backwards, ast::Operator::Less) => { + let termination_key_len = key_len - 1; + SeekDef { + key, + iter_dir, + seek: Some(SeekKey { + len: key_len, + op: SeekOp::LT, + }), + termination: if termination_key_len > 0 { + Some(TerminationKey { + len: termination_key_len, + op: SeekOp::LT, + }) + } else { + None + }, + } + } + // Backwards, LE: + // Example: (x=10 AND y<=20) + // Seek key: LE(x:10, y:20) + // Termination key: LT(x:10) + (IterationDirection::Backwards, ast::Operator::LessEquals) => { + let termination_key_len = key_len - 1; + SeekDef { + key, + iter_dir, + seek: Some(SeekKey { + len: key_len, + op: SeekOp::LE, + }), + termination: if termination_key_len > 0 { + Some(TerminationKey { + len: termination_key_len, + op: SeekOp::LT, + }) + } else { + None + }, + } + } + // Backwards, GT: + // Example: (x=10 AND y>20) + // Seek key: LE(x:10) // try to find the last row where x = 10, not considering y at all. + // Termination key: LE(x:10, y:20) + (IterationDirection::Backwards, ast::Operator::Greater) => { + let seek_key_len = key_len - 1; + SeekDef { + key, + iter_dir, + seek: if seek_key_len > 0 { + Some(SeekKey { + len: seek_key_len, + op: SeekOp::LE, + }) + } else { + None + }, + termination: Some(TerminationKey { + len: key_len, + op: SeekOp::LE, + }), + } + } + // Backwards, GE: + // Example: (x=10 AND y>=20) + // Seek key: LE(x:10) // try to find the last row where x = 10, not considering y at all. + // Termination key: LT(x:10, y:20) + (IterationDirection::Backwards, ast::Operator::GreaterEquals) => { + let seek_key_len = key_len - 1; + SeekDef { + key, + iter_dir, + seek: if seek_key_len > 0 { + Some(SeekKey { + len: seek_key_len, + op: SeekOp::LE, + }) + } else { + None + }, + termination: Some(TerminationKey { + len: key_len, + op: SeekOp::LT, + }), + } + } + (_, op) => { + crate::bail_parse_error!("build_seek_def: invalid operator: {:?}", op,) + } + }) +} + +pub fn try_extract_rowid_search_expression( + cond: &mut WhereTerm, + table_index: usize, iter_dir: IterationDirection, ) -> Result> { if !cond.should_eval_at_loop(table_index) { @@ -681,14 +1120,10 @@ pub fn try_extract_index_search_expression( | ast::Operator::Less | ast::Operator::LessEquals => { let rhs_owned = rhs.take_ownership(); - return Ok(Some(Search::RowidSearch { - cmp_op: *operator, - cmp_expr: WhereTerm { - expr: rhs_owned, - from_outer_join: cond.from_outer_join, - eval_at: cond.eval_at, - }, - iter_dir, + let seek_def = build_seek_def(*operator, iter_dir, vec![rhs_owned])?; + return Ok(Some(Search::Seek { + index: None, + seek_def, })); } _ => {} @@ -712,64 +1147,11 @@ pub fn try_extract_index_search_expression( | ast::Operator::Less | ast::Operator::LessEquals => { let lhs_owned = lhs.take_ownership(); - return Ok(Some(Search::RowidSearch { - cmp_op: opposite_cmp_op(*operator), - cmp_expr: WhereTerm { - expr: lhs_owned, - from_outer_join: cond.from_outer_join, - eval_at: cond.eval_at, - }, - iter_dir, - })); - } - _ => {} - } - } - - if let Some(index_rc) = - lhs.check_index_scan(table_index, &table_reference, available_indexes)? - { - match operator { - ast::Operator::Equals - | ast::Operator::Greater - | ast::Operator::GreaterEquals - | ast::Operator::Less - | ast::Operator::LessEquals => { - let rhs_owned = rhs.take_ownership(); - return Ok(Some(Search::IndexSearch { - index: index_rc, - cmp_op: *operator, - cmp_expr: WhereTerm { - expr: rhs_owned, - from_outer_join: cond.from_outer_join, - eval_at: cond.eval_at, - }, - iter_dir, - })); - } - _ => {} - } - } - - if let Some(index_rc) = - rhs.check_index_scan(table_index, &table_reference, available_indexes)? - { - match operator { - ast::Operator::Equals - | ast::Operator::Greater - | ast::Operator::GreaterEquals - | ast::Operator::Less - | ast::Operator::LessEquals => { - let lhs_owned = lhs.take_ownership(); - return Ok(Some(Search::IndexSearch { - index: index_rc, - cmp_op: opposite_cmp_op(*operator), - cmp_expr: WhereTerm { - expr: lhs_owned, - from_outer_join: cond.from_outer_join, - eval_at: cond.eval_at, - }, - iter_dir, + let op = opposite_cmp_op(*operator); + let seek_def = build_seek_def(op, iter_dir, vec![lhs_owned])?; + return Ok(Some(Search::Seek { + index: None, + seek_def, })); } _ => {} diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 3958f9f81..ab7bc893c 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -7,13 +7,16 @@ use std::{ sync::Arc, }; -use crate::schema::{PseudoTable, Type}; use crate::{ function::AggFunc, schema::{BTreeTable, Column, Index, Table}, vdbe::BranchOffset, VirtualTable, }; +use crate::{ + schema::{PseudoTable, Type}, + types::SeekOp, +}; #[derive(Debug, Clone)] pub struct ResultSetColumn { @@ -325,6 +328,68 @@ impl TableReference { } } +/// A definition of a rowid/index search. +/// +/// [SeekKey] is the condition that is used to seek to a specific row in a table/index. +/// [TerminationKey] is the condition that is used to terminate the search after a seek. +#[derive(Debug, Clone)] +pub struct SeekDef { + /// The key to use when seeking and when terminating the scan that follows the seek. + /// For example, given: + /// - CREATE INDEX i ON t (x, y) + /// - SELECT * FROM t WHERE x = 1 AND y >= 30 + /// The key is [1, 30] + pub key: Vec, + /// The condition to use when seeking. See [SeekKey] for more details. + pub seek: Option, + /// The condition to use when terminating the scan that follows the seek. See [TerminationKey] for more details. + pub termination: Option, + /// The direction of the scan that follows the seek. + pub iter_dir: IterationDirection, +} + +impl SeekDef { + /// Whether we should null pad unset columns when seeking. + /// This is only done for forward seeks. + /// The reason it is done is that sometimes our full index key is not used in seeking. + /// See [SeekKey] for more details. + /// + /// For example, given: + /// - CREATE INDEX i ON t (x, y) + /// - SELECT * FROM t WHERE x = 1 AND y < 30 + /// We want to seek to the first row where x = 1, and then iterate forwards. + /// In this case, the seek key is GT(1, NULL) since '30' cannot be used to seek (since we want y < 30), + /// and any value of y will be greater than NULL. + /// + /// In backwards iteration direction, we do not null pad because we want to seek to the last row that matches the seek key. + /// For example, given: + /// - CREATE INDEX i ON t (x, y) + /// - SELECT * FROM t WHERE x = 1 AND y > 30 ORDER BY y + /// We want to seek to the last row where x = 1, and then iterate backwards. + /// In this case, the seek key is just LE(1) so any row with x = 1 will be a match. + pub fn null_pad_unset_cols(&self) -> bool { + self.iter_dir == IterationDirection::Forwards + } +} + +/// A condition to use when seeking. +#[derive(Debug, Clone)] +pub struct SeekKey { + /// How many columns from [SeekDef::key] are used in seeking. + pub len: usize, + /// The comparison operator to use when seeking. + pub op: SeekOp, +} + +#[derive(Debug, Clone)] +/// A condition to use when terminating the scan that follows a seek. +pub struct TerminationKey { + /// How many columns from [SeekDef::key] are used in terminating the scan that follows the seek. + pub len: usize, + /// The comparison operator to use when terminating the scan that follows the seek. + pub op: SeekOp, +} + /// An enum that represents a search operation that can be used to search for a row in a table using an index /// (i.e. a primary key or a secondary index) #[allow(clippy::enum_variant_names)] @@ -332,18 +397,10 @@ impl TableReference { pub enum Search { /// A rowid equality point lookup. This is a special case that uses the SeekRowid bytecode instruction and does not loop. RowidEq { cmp_expr: WhereTerm }, - /// A rowid search. Uses bytecode instructions like SeekGT, SeekGE etc. - RowidSearch { - cmp_op: ast::Operator, - cmp_expr: WhereTerm, - iter_dir: IterationDirection, - }, - /// A secondary index search. Uses bytecode instructions like SeekGE, SeekGT etc. - IndexSearch { - index: Arc, - cmp_op: ast::Operator, - cmp_expr: WhereTerm, - iter_dir: IterationDirection, + /// A search on a table btree (via `rowid`) or a secondary index search. Uses bytecode instructions like SeekGE, SeekGT etc. + Seek { + index: Option>, + seek_def: SeekDef, }, } @@ -420,14 +477,16 @@ impl Display for SelectPlan { writeln!(f, "{}SCAN {}", indent, table_name)?; } Operation::Search(search) => match search { - Search::RowidEq { .. } | Search::RowidSearch { .. } => { + Search::RowidEq { .. } | Search::Seek { index: None, .. } => { writeln!( f, "{}SEARCH {} USING INTEGER PRIMARY KEY (rowid=?)", indent, reference.identifier )?; } - Search::IndexSearch { index, .. } => { + Search::Seek { + index: Some(index), .. + } => { writeln!( f, "{}SEARCH {} USING INDEX {}", @@ -509,14 +568,16 @@ impl fmt::Display for UpdatePlan { } } Operation::Search(search) => match search { - Search::RowidEq { .. } | Search::RowidSearch { .. } => { + Search::RowidEq { .. } | Search::Seek { index: None, .. } => { writeln!( f, "{}SEARCH {} USING INTEGER PRIMARY KEY (rowid=?)", indent, reference.identifier )?; } - Search::IndexSearch { index, .. } => { + Search::Seek { + index: Some(index), .. + } => { writeln!( f, "{}SEARCH {} USING INDEX {}", diff --git a/core/translate/select.rs b/core/translate/select.rs index bde61880f..24a6331e5 100644 --- a/core/translate/select.rs +++ b/core/translate/select.rs @@ -411,8 +411,8 @@ fn count_plan_required_cursors(plan: &SelectPlan) -> usize { .map(|t| match &t.op { Operation::Scan { .. } => 1, Operation::Search(search) => match search { - Search::RowidEq { .. } | Search::RowidSearch { .. } => 1, - Search::IndexSearch { .. } => 2, // btree cursor and index cursor + Search::RowidEq { .. } => 1, + Search::Seek { index, .. } => 1 + index.is_some() as usize, }, Operation::Subquery { plan, .. } => count_plan_required_cursors(plan), }) From 4755acb5712572ce105143497bec7f44549dbbe6 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Thu, 10 Apr 2025 15:03:56 +0200 Subject: [PATCH 156/425] init tracing in stress tool --- stress/Cargo.toml | 3 +++ stress/main.rs | 23 +++++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/stress/Cargo.toml b/stress/Cargo.toml index 6f7a0a9e9..9c0097d45 100644 --- a/stress/Cargo.toml +++ b/stress/Cargo.toml @@ -22,3 +22,6 @@ serde_json = "1.0.139" tokio = { version = "1.29.1", features = ["full"] } anarchist-readable-name-generator-lib = "0.1.0" hex = "0.4" +tracing = "0.1.41" +tracing-appender = "0.2.3" +tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } diff --git a/stress/main.rs b/stress/main.rs index bd687a231..1dd0943c2 100644 --- a/stress/main.rs +++ b/stress/main.rs @@ -12,6 +12,10 @@ use std::collections::HashSet; use std::fs::File; use std::io::{Read, Write}; use std::sync::Arc; +use tracing_appender::non_blocking::WorkerGuard; +use tracing_subscriber::layer::SubscriberExt; +use tracing_subscriber::util::SubscriberInitExt; +use tracing_subscriber::EnvFilter; pub struct Plan { pub ddl_statements: Vec, @@ -364,8 +368,27 @@ fn read_plan_from_log_file(opts: &Opts) -> Result Result { + let (non_blocking, guard) = tracing_appender::non_blocking(std::io::stderr()); + if let Err(e) = tracing_subscriber::registry() + .with( + tracing_subscriber::fmt::layer() + .with_writer(non_blocking) + .with_ansi(false) + .with_line_number(true) + .with_thread_ids(true), + ) + .with(EnvFilter::from_default_env()) + .try_init() + { + println!("Unable to setup tracing appender: {:?}", e); + } + Ok(guard) +} + #[tokio::main] async fn main() -> Result<(), Box> { + let _g = init_tracing()?; let (num_nodes, main_id) = (1, "n-001"); let startup_data = json!({ "num_nodes": num_nodes, From 8e93471d0095211ca3c4d2a358b42bbbd16dbfd2 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Thu, 10 Apr 2025 15:05:20 +0200 Subject: [PATCH 157/425] fix cell index selection while balancing Cell index doesn't move in `move_to` unless we don't need to check next cell. On the other hand, with rightmost pointer, we advance cell index by 1 even though where we are moving to was to that page --- core/storage/btree.rs | 20 +++++++++++--------- core/vdbe/execute.rs | 35 ++++++++++++++++++----------------- 2 files changed, 29 insertions(+), 26 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 8f4afb090..7b722d348 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1179,6 +1179,7 @@ impl BTreeCursor { pub fn move_to(&mut self, key: SeekKey<'_>, cmp: SeekOp) -> Result> { assert!(self.mv_cursor.is_none()); tracing::trace!("move_to(key={:?} cmp={:?})", key, cmp); + tracing::trace!("backtrace: {}", std::backtrace::Backtrace::force_capture()); // For a table with N rows, we can find any row by row id in O(log(N)) time by starting at the root page and following the B-tree pointers. // B-trees consist of interior pages and leaf pages. Interior pages contain pointers to other pages, while leaf pages contain the actual row data. // @@ -1630,12 +1631,6 @@ impl BTreeCursor { let write_info = self.state.mut_write_info().unwrap(); write_info.state = WriteState::BalanceNonRoot; self.stack.pop(); - // with `move_to` we advance the current cell idx of TableInterior once we move to left subtree. - // On the other hand, with IndexInterior, we do not because we tranver in-order. In the latter case - // since we haven't consumed the cell we can avoid retreating the current cell index. - if matches!(current_page.get_contents().page_type(), PageType::TableLeaf) { - self.stack.retreat(); - } return_if_io!(self.balance_non_root()); } WriteState::BalanceNonRoot | WriteState::BalanceNonRootWaitLoadPages => { @@ -1660,10 +1655,14 @@ impl BTreeCursor { WriteState::BalanceStart => todo!(), WriteState::BalanceNonRoot => { let parent_page = self.stack.top(); - if parent_page.is_locked() { - return Ok(CursorResult::IO); - } return_if_locked_maybe_load!(self.pager, parent_page); + // If `move_to` moved to rightmost page, cell index will be out of bounds. Meaning cell_count+1. + // In any other case, `move_to` will stay in the correct index. + if self.stack.current_cell_index() as usize + == parent_page.get_contents().cell_count() + 1 + { + self.stack.retreat(); + } parent_page.set_dirty(); self.pager.add_dirty(parent_page.get().id); let parent_contents = parent_page.get().contents.as_ref().unwrap(); @@ -2871,6 +2870,7 @@ impl BTreeCursor { &mut child_contents.overflow_cells, &mut root_contents.overflow_cells, ); + root_contents.overflow_cells.clear(); // 2. Modify root let new_root_page_type = match root_contents.page_type() { @@ -3133,6 +3133,7 @@ impl BTreeCursor { key: &BTreeKey, moved_before: bool, /* Indicate whether it's necessary to traverse to find the leaf page */ ) -> Result> { + tracing::trace!("insert"); match &self.mv_cursor { Some(mv_cursor) => match key.maybe_rowid() { Some(rowid) => { @@ -3144,6 +3145,7 @@ impl BTreeCursor { None => todo!("Support mvcc inserts with index btrees"), }, None => { + tracing::trace!("moved {}", moved_before); if !moved_before { self.iteration_state = IterationState::Iterating(IterationDirection::Forwards); match key { diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 2b71ee716..6827ba83b 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -3798,6 +3798,7 @@ pub fn op_idx_insert_async( pager: &Rc, mv_store: Option<&Rc>, ) -> Result { + dbg!("op_idx_insert_async"); if let Insn::IdxInsertAsync { cursor_id, record_reg, @@ -3816,29 +3817,29 @@ pub fn op_idx_insert_async( Register::Record(ref r) => r, _ => return Err(LimboError::InternalError("expected record".into())), }; - let moved_before = if index_meta.unique { - // check for uniqueness violation - match cursor.key_exists_in_index(record)? { - CursorResult::Ok(true) => { - return Err(LimboError::Constraint( - "UNIQUE constraint failed: duplicate key".into(), - )) - } - CursorResult::IO => return Ok(InsnFunctionStepResult::IO), - CursorResult::Ok(false) => {} - }; - false - } else { - flags.has(IdxInsertFlags::USE_SEEK) - }; - // To make this reentrant in case of `moved_before` = false, we need to check if the previous cursor.insert started // a write/balancing operation. If it did, it means we already moved to the place we wanted. let moved_before = if cursor.is_write_in_progress() { true } else { - moved_before + if index_meta.unique { + // check for uniqueness violation + match cursor.key_exists_in_index(record)? { + CursorResult::Ok(true) => { + return Err(LimboError::Constraint( + "UNIQUE constraint failed: duplicate key".into(), + )) + } + CursorResult::IO => return Ok(InsnFunctionStepResult::IO), + CursorResult::Ok(false) => {} + }; + false + } else { + flags.has(IdxInsertFlags::USE_SEEK) + } }; + + dbg!(moved_before); // Start insertion of row. This might trigger a balance procedure which will take care of moving to different pages, // therefore, we don't want to seek again if that happens, meaning we don't want to return on io without moving to `Await` opcode // because it could trigger a movement to child page after a balance root which will leave the current page as the root page. From b35d805a81e54ece743abe4f6b5e8820d70287a2 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Thu, 10 Apr 2025 15:05:27 +0200 Subject: [PATCH 158/425] tracing lock stress --- Cargo.lock | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 14c1df80e..73c0fccc1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1901,6 +1901,9 @@ dependencies = [ "limbo", "serde_json", "tokio", + "tracing", + "tracing-appender", + "tracing-subscriber", ] [[package]] From 62d0febdb64d96f3bacacea1251ab80d7da5de3d Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Thu, 10 Apr 2025 15:59:47 +0200 Subject: [PATCH 159/425] panic on corruption --- core/storage/btree.rs | 7 ++++++- stress/main.rs | 23 +++++++++++++++++++++-- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 7b722d348..31aa6b2f8 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -2534,6 +2534,7 @@ impl BTreeCursor { // Let's now make a in depth check that we in fact added all possible cells somewhere and they are not lost for (page_idx, page) in pages_to_balance_new.iter().enumerate() { let contents = page.get_contents(); + debug_validate_cells!(contents, self.usable_space() as u16); // Cells are distributed in order for cell_idx in 0..contents.cell_count() { let (cell_start, cell_len) = contents.cell_get_raw_region( @@ -4370,7 +4371,11 @@ fn free_cell_range( } } if removed_fragmentation > page.num_frag_free_bytes() { - return_corrupt!("Invalid fragmentation count"); + return_corrupt!(format!( + "Invalid fragmentation count. Had {} and removed {}", + page.num_frag_free_bytes(), + removed_fragmentation + )); } let frag = page.num_frag_free_bytes() - removed_fragmentation; page.write_u8(PAGE_HEADER_OFFSET_FRAGMENTED_BYTES_COUNT, frag); diff --git a/stress/main.rs b/stress/main.rs index 1dd0943c2..75a61c4ba 100644 --- a/stress/main.rs +++ b/stress/main.rs @@ -4,6 +4,7 @@ use anarchist_readable_name_generator_lib::readable_name_custom; use antithesis_sdk::random::{get_random, AntithesisRng}; use antithesis_sdk::*; use clap::Parser; +use core::panic; use hex; use limbo::Builder; use opts::Opts; @@ -417,7 +418,16 @@ async fn main() -> Result<(), Box> { for stmt in &plan.ddl_statements { println!("executing ddl {}", stmt); if let Err(e) = conn.execute(stmt, ()).await { - println!("Error creating table: {}", e); + match e { + limbo::Error::SqlExecutionFailure(e) => { + if e.contains("Corrupt database") { + panic!("Error creating table: {}", e); + } else { + println!("Error creating table: {}", e); + } + } + _ => panic!("Error creating table: {}", e), + } } } @@ -430,7 +440,16 @@ async fn main() -> Result<(), Box> { let sql = &plan.queries_per_thread[thread][query_index]; println!("executing: {}", sql); if let Err(e) = conn.execute(&sql, ()).await { - println!("Error: {}", e); + match e { + limbo::Error::SqlExecutionFailure(e) => { + if e.contains("Corrupt database") { + panic!("Error executing query: {}", e); + } else { + println!("Error executing query: {}", e); + } + } + _ => panic!("Error executing query: {}", e), + } } } Ok::<_, Box>(()) From 712a4caa22807321a599c176800bf07f007fb1c4 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 10 Apr 2025 18:39:20 +0300 Subject: [PATCH 160/425] stress: Fix per-thread query generation --- stress/main.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/stress/main.rs b/stress/main.rs index bd687a231..40b041ea1 100644 --- a/stress/main.rs +++ b/stress/main.rs @@ -319,6 +319,7 @@ fn generate_plan(opts: &Opts) -> Result Date: Thu, 10 Apr 2025 18:17:46 +0300 Subject: [PATCH 161/425] Fix bug: accidentally skipped index selection for other tables except first found --- core/translate/optimizer.rs | 41 +++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 3483cd913..609acd906 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -301,10 +301,12 @@ fn use_indexes( // Try to use indexes for WHERE conditions for (table_index, table_reference) in table_references.iter_mut().enumerate() { - if let Operation::Scan { - iter_dir, index, .. - } = &table_reference.op - { + if matches!(table_reference.op, Operation::Scan { .. }) { + let index = if let Operation::Scan { index, .. } = &table_reference.op { + Option::clone(index) + } else { + None + }; match index { // If we decided to eliminate ORDER BY using an index, let's constrain our search to only that index Some(index) => { @@ -319,7 +321,6 @@ fn use_indexes( table_index, table_reference, &available_indexes, - *iter_dir, )? { table_reference.op = Operation::Search(search); } @@ -328,13 +329,18 @@ fn use_indexes( let table_name = table_reference.table.get_name(); // If we can utilize the rowid alias of the table, let's preferentially always use it for now. - for (i, term) in where_clause.iter_mut().enumerate() { - if let Some(search) = - try_extract_rowid_search_expression(term, table_index, *iter_dir)? - { + let mut i = 0; + while i < where_clause.len() { + if let Some(search) = try_extract_rowid_search_expression( + &mut where_clause[i], + table_index, + table_reference, + )? { where_clause.remove(i); table_reference.op = Operation::Search(search); - return Ok(()); + continue; + } else { + i += 1; } } if let Some(indexes) = available_indexes.get(table_name) { @@ -343,7 +349,6 @@ fn use_indexes( table_index, table_reference, indexes, - *iter_dir, )? { table_reference.op = Operation::Search(search); } @@ -624,7 +629,6 @@ pub fn try_extract_index_search_from_where_clause( table_index: usize, table_reference: &TableReference, table_indexes: &[Arc], - iter_dir: IterationDirection, ) -> Result> { // If there are no WHERE terms, we can't extract a search if where_clause.is_empty() { @@ -635,6 +639,12 @@ pub fn try_extract_index_search_from_where_clause( return Ok(None); } + let iter_dir = if let Operation::Scan { iter_dir, .. } = &table_reference.op { + *iter_dir + } else { + return Ok(None); + }; + // Find all potential index constraints // For WHERE terms to be used to constrain an index scan, they must: // 1. refer to columns in the table that the index is on @@ -1096,8 +1106,13 @@ fn build_seek_def( pub fn try_extract_rowid_search_expression( cond: &mut WhereTerm, table_index: usize, - iter_dir: IterationDirection, + table_reference: &TableReference, ) -> Result> { + let iter_dir = if let Operation::Scan { iter_dir, .. } = &table_reference.op { + *iter_dir + } else { + return Ok(None); + }; if !cond.should_eval_at_loop(table_index) { return Ok(None); } From 506c1a236cf00b741a5c1b4795b56698e0ca9841 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Thu, 10 Apr 2025 18:08:29 +0200 Subject: [PATCH 162/425] find_free_cell fix use of `no_offset` writes --- core/storage/btree.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 001049dfb..e3405322a 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -4117,7 +4117,7 @@ fn find_free_cell(page_ref: &PageContent, usable_space: u16, amount: usize) -> R return Ok(0); } // Delete the slot from freelist and update the page's fragment count. - page_ref.write_u16(prev_pc, next); + page_ref.write_u16_no_offset(prev_pc, next); let frag = page_ref.num_frag_free_bytes() + new_size as u8; page_ref.write_u8(offset::BTREE_FRAGMENTED_BYTES_COUNT, frag); return Ok(pc); @@ -4126,7 +4126,7 @@ fn find_free_cell(page_ref: &PageContent, usable_space: u16, amount: usize) -> R } else { // Requested amount fits inside the current free slot so we reduce its size // to account for newly allocated space. - page_ref.write_u16(pc + 2, new_size as u16); + page_ref.write_u16_no_offset(pc + 2, new_size as u16); return Ok(pc + new_size); } } From 038d78f0960949d35f2994090740d14ee212d517 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Thu, 10 Apr 2025 22:14:30 +0200 Subject: [PATCH 163/425] overwrite when payload is equal size as current cell only Prevoiusly we would overwrite even though size less than cell size. This was wrong because it didn't update any fragment size or free blocks it could. To be safe let's just overwrite only if local size is the same amount. --- core/storage/btree.rs | 47 ++++--------------------------------------- 1 file changed, 4 insertions(+), 43 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index e3405322a..dc6e24e58 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1924,6 +1924,7 @@ impl BTreeCursor { let mut count_cells_in_old_pages = Vec::new(); let page_type = balance_info.pages_to_balance[0].get_contents().page_type(); + tracing::debug!("balance_non_root(page_type={:?})", page_type); let leaf_data = matches!(page_type, PageType::TableLeaf); let leaf = matches!(page_type, PageType::TableLeaf | PageType::IndexLeaf); for (i, old_page) in balance_info.pages_to_balance.iter().enumerate() { @@ -3859,25 +3860,8 @@ impl BTreeCursor { }; // if it all fits in local space and old_local_size is enough, do an in-place overwrite - if new_payload.len() <= old_local_size { - self.overwrite_content( - page_ref.clone(), - old_offset, - &new_payload, - 0, - new_payload.len(), - )?; - let remaining = old_local_size - new_payload.len(); - if remaining > 0 { - // fill the rest with zeros - self.overwrite_content( - page_ref.clone(), - old_offset + new_payload.len(), - &[0; 1], - 0, - remaining, - )?; - } + if new_payload.len() == old_local_size { + self.overwrite_content(page_ref.clone(), old_offset, &new_payload)?; Ok(CursorResult::Ok(())) } else { // doesn't fit, drop it and insert a new one @@ -3901,36 +3885,13 @@ impl BTreeCursor { page_ref: PageRef, dest_offset: usize, new_payload: &[u8], - src_offset: usize, - amount: usize, ) -> Result> { return_if_locked!(page_ref); page_ref.set_dirty(); self.pager.add_dirty(page_ref.get().id); let buf = page_ref.get().contents.as_mut().unwrap().as_ptr(); + buf[dest_offset..dest_offset + new_payload.len()].copy_from_slice(&new_payload); - // if new_payload doesn't have enough data, we fill with zeros - let n_data = new_payload.len().saturating_sub(src_offset); - if n_data == 0 { - // everything is zeros - for i in 0..amount { - if buf[dest_offset + i] != 0 { - buf[dest_offset + i] = 0; - } - } - } else { - let copy_len = n_data.min(amount); - // copy the overlapping portion - buf[dest_offset..dest_offset + copy_len] - .copy_from_slice(&new_payload[src_offset..src_offset + copy_len]); - - // if copy_len < amount => fill remainder with 0 - if copy_len < amount { - for i in copy_len..amount { - buf[dest_offset + i] = 0; - } - } - } Ok(CursorResult::Ok(())) } From 745c2b92d0baa81b7242048c6880efe75b2292de Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Thu, 10 Apr 2025 22:19:09 +0200 Subject: [PATCH 164/425] unnecessary dirty set on overwrite --- core/storage/btree.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index dc6e24e58..9ab993d2d 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -3887,8 +3887,6 @@ impl BTreeCursor { new_payload: &[u8], ) -> Result> { return_if_locked!(page_ref); - page_ref.set_dirty(); - self.pager.add_dirty(page_ref.get().id); let buf = page_ref.get().contents.as_mut().unwrap().as_ptr(); buf[dest_offset..dest_offset + new_payload.len()].copy_from_slice(&new_payload); From b7acfa490c88cb4cd3aa70aaf1791ce83d6768e3 Mon Sep 17 00:00:00 2001 From: Sachin Singh Date: Fri, 11 Apr 2025 04:30:57 +0530 Subject: [PATCH 165/425] feat: add timediff data and time function --- core/function.rs | 8 +++ core/functions/datetime.rs | 118 +++++++++++++++++++++++++++++++++++++ core/translate/expr.rs | 27 +++++++++ core/vdbe/execute.rs | 15 +++++ 4 files changed, 168 insertions(+) diff --git a/core/function.rs b/core/function.rs index 4c235cca5..41613c8c8 100644 --- a/core/function.rs +++ b/core/function.rs @@ -293,6 +293,7 @@ pub enum ScalarFunc { StrfTime, Printf, Likely, + TimeDiff, } impl Display for ScalarFunc { @@ -348,6 +349,7 @@ impl Display for ScalarFunc { Self::StrfTime => "strftime".to_string(), Self::Printf => "printf".to_string(), Self::Likely => "likely".to_string(), + Self::TimeDiff => "timediff".to_string(), }; write!(f, "{}", str) } @@ -555,6 +557,12 @@ impl Func { } Ok(Self::Agg(AggFunc::Total)) } + "timediff" => { + if arg_count != 2 { + crate::bail_parse_error!("wrong number of arguments to function {}()", name) + } + Ok(Self::Scalar(ScalarFunc::TimeDiff)) + } #[cfg(feature = "json")] "jsonb_group_array" => Ok(Self::Agg(AggFunc::JsonbGroupArray)), #[cfg(feature = "json")] diff --git a/core/functions/datetime.rs b/core/functions/datetime.rs index 294fbfb2d..e9988f03f 100644 --- a/core/functions/datetime.rs +++ b/core/functions/datetime.rs @@ -656,6 +656,61 @@ fn parse_modifier(modifier: &str) -> Result { } } +pub fn exec_timediff(values: &[Register]) -> OwnedValue { + if values.len() < 2 { + return OwnedValue::Null; + } + + let start = parse_naive_date_time(values[0].get_owned_value()); + let end = parse_naive_date_time(values[1].get_owned_value()); + + match (start, end) { + (Some(start), Some(end)) => { + let duration = start.signed_duration_since(end); + format_time_duration(&duration) + } + _ => OwnedValue::Null, + } +} + +fn format_time_duration(duration: &chrono::Duration) -> OwnedValue { + let is_negative = duration.num_seconds() < 0; + + let abs_duration = if is_negative { + -duration.clone() + } else { + duration.clone() + }; + let total_seconds = abs_duration.num_seconds(); + let hours = total_seconds / 3600; + let minutes = (total_seconds % 3600) / 60; + let seconds = total_seconds % 60; + + let total_millis = abs_duration.num_milliseconds(); + let millis = total_millis % 1000; + + let result = if millis > 0 { + format!( + "{}{:02}:{:02}:{:02}.{:03}", + if is_negative { "-" } else { "" }, + hours, + minutes, + seconds, + millis + ) + } else { + format!( + "{}{:02}:{:02}:{:02}", + if is_negative { "-" } else { "" }, + hours, + minutes, + seconds + ) + }; + + OwnedValue::build_text(&result) +} + #[cfg(test)] mod tests { use super::*; @@ -1642,4 +1697,67 @@ mod tests { #[test] fn test_strftime() {} + + #[test] + fn test_exec_timediff() { + let start = OwnedValue::build_text("12:00:00"); + let end = OwnedValue::build_text("14:30:45"); + let expected = OwnedValue::build_text("-02:30:45"); + assert_eq!( + exec_timediff(&[Register::OwnedValue(start), Register::OwnedValue(end)]), + expected + ); + + let start = OwnedValue::build_text("14:30:45"); + let end = OwnedValue::build_text("12:00:00"); + let expected = OwnedValue::build_text("02:30:45"); + assert_eq!( + exec_timediff(&[Register::OwnedValue(start), Register::OwnedValue(end)]), + expected + ); + + let start = OwnedValue::build_text("12:00:01.300"); + let end = OwnedValue::build_text("12:00:00.500"); + let expected = OwnedValue::build_text("00:00:00.800"); + assert_eq!( + exec_timediff(&[Register::OwnedValue(start), Register::OwnedValue(end)]), + expected + ); + + let start = OwnedValue::build_text("13:30:00"); + let end = OwnedValue::build_text("16:45:30"); + let expected = OwnedValue::build_text("-03:15:30"); + assert_eq!( + exec_timediff(&[Register::OwnedValue(start), Register::OwnedValue(end)]), + expected + ); + + let start = OwnedValue::build_text("2023-05-10 23:30:00"); + let end = OwnedValue::build_text("2023-05-11 01:15:00"); + let expected = OwnedValue::build_text("-01:45:00"); + assert_eq!( + exec_timediff(&[Register::OwnedValue(start), Register::OwnedValue(end)]), + expected + ); + + let start = OwnedValue::Null; + let end = OwnedValue::build_text("12:00:00"); + let expected = OwnedValue::Null; + assert_eq!( + exec_timediff(&[Register::OwnedValue(start), Register::OwnedValue(end)]), + expected + ); + + let start = OwnedValue::build_text("not a time"); + let end = OwnedValue::build_text("12:00:00"); + let expected = OwnedValue::Null; + assert_eq!( + exec_timediff(&[Register::OwnedValue(start), Register::OwnedValue(end)]), + expected + ); + + let start = OwnedValue::build_text("12:00:00"); + let expected = OwnedValue::Null; + assert_eq!(exec_timediff(&[Register::OwnedValue(start)]), expected); + } } diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 7bb0dc228..fbd7680f4 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1309,6 +1309,33 @@ pub fn translate_expr( }); Ok(target_register) } + ScalarFunc::TimeDiff => { + let args = expect_arguments_exact!(args, 2, srf); + + let start_reg = program.alloc_registers(2); + translate_expr( + program, + referenced_tables, + &args[0], + start_reg, + resolver, + )?; + translate_expr( + program, + referenced_tables, + &args[1], + start_reg + 1, + resolver, + )?; + + program.emit_insn(Insn::Function { + constant_mask: 0, + start_reg, + dest: target_register, + func: func_ctx, + }); + Ok(target_register) + } ScalarFunc::TotalChanges => { if args.is_some() { crate::bail_parse_error!( diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 6827ba83b..e5194536d 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -3406,6 +3406,21 @@ pub fn op_function( let result = exec_time(values); state.registers[*dest] = Register::OwnedValue(result); } + ScalarFunc::TimeDiff => { + if arg_count != 2 { + state.registers[*dest] = Register::OwnedValue(OwnedValue::Null); + } else { + let start = state.registers[*start_reg].get_owned_value().clone(); + let end = state.registers[*start_reg + 1].get_owned_value().clone(); + + let result = crate::functions::datetime::exec_timediff(&[ + Register::OwnedValue(start), + Register::OwnedValue(end), + ]); + + state.registers[*dest] = Register::OwnedValue(result); + } + } ScalarFunc::TotalChanges => { let res = &program.connection.upgrade().unwrap().total_changes; let total_changes = res.get(); From ded308ccfadbaa79346e652e5aeb2cc5e40c6b38 Mon Sep 17 00:00:00 2001 From: Sachin Singh Date: Fri, 11 Apr 2025 04:40:09 +0530 Subject: [PATCH 166/425] additional tests --- testing/scalar-functions-datetime.test | 68 ++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/testing/scalar-functions-datetime.test b/testing/scalar-functions-datetime.test index fd450dc02..db09d27fe 100755 --- a/testing/scalar-functions-datetime.test +++ b/testing/scalar-functions-datetime.test @@ -589,3 +589,71 @@ set FMT [list %S.%3f %C %y %b %B %h %a %A %D %x %v %.f %.3f %.6f %.9f %3f %6f %9 foreach i $FMT { do_execsql_test strftime-invalid-$i "SELECT strftime('$i','2025-01-23T13:14:30.567');" {} } + +# Tests for the TIMEDIFF function + +do_execsql_test timediff-basic-positive { + SELECT timediff('14:30:45', '12:00:00'); +} {02:30:45} + +do_execsql_test timediff-basic-negative { + SELECT timediff('12:00:00', '14:30:45'); +} {-02:30:45} + +do_execsql_test timediff-with-milliseconds-positive { + SELECT timediff('12:00:01.300', '12:00:00.500'); +} {00:00:00.800} + + +do_execsql_test timediff-same-time { + SELECT timediff('12:00:00', '12:00:00'); +} {00:00:00} + +do_execsql_test timediff-across-dates { + SELECT timediff('2023-05-11 01:15:00', '2023-05-10 23:30:00'); +} {01:45:00} + +do_execsql_test timediff-across-dates-negative { + SELECT timediff('2023-05-10 23:30:00', '2023-05-11 01:15:00'); +} {-01:45:00} + +do_execsql_test timediff-different-formats { + SELECT timediff('2023-05-10T23:30:00', '2023-05-10 14:15:00'); +} {09:15:00} + +do_execsql_test timediff-with-timezone { + SELECT timediff('2023-05-10 23:30:00+02:00', '2023-05-10 18:30:00Z'); +} {03:00:00} + +do_execsql_test timediff-large-difference { + SELECT timediff('2023-05-12 10:00:00', '2023-05-10 08:00:00'); +} {50:00:00} + +do_execsql_test timediff-with-seconds-precision { + SELECT timediff('12:30:45.123', '12:30:44.987'); +} {00:00:00.136} + +do_execsql_test timediff-null-first-arg { + SELECT timediff(NULL, '12:00:00'); +} {{}} + +do_execsql_test timediff-null-second-arg { + SELECT timediff('12:00:00', NULL); +} {{}} + +do_execsql_test timediff-invalid-first-arg { + SELECT timediff('not-a-time', '12:00:00'); +} {{}} + +do_execsql_test timediff-invalid-second-arg { + SELECT timediff('12:00:00', 'not-a-time'); +} {{}} + + +do_execsql_test timediff-julian-day { + SELECT timediff(2460000, 2460000.5); +} {-12:00:00} + +do_execsql_test timediff-different-time-formats { + SELECT timediff('23:59:59', '00:00:00'); +} {23:59:59} From 05b4b7b9f196f555ed0fcd07b8319232b94d8f14 Mon Sep 17 00:00:00 2001 From: Sachin Singh Date: Fri, 11 Apr 2025 04:41:59 +0530 Subject: [PATCH 167/425] edit compat.md --- COMPAT.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/COMPAT.md b/COMPAT.md index e85a47725..2f9a954f7 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -328,7 +328,7 @@ Feature support of [sqlite expr syntax](https://www.sqlite.org/lang_expr.html). | julianday() | Partial | does not support modifiers | | unixepoch() | Partial | does not support modifiers | | strftime() | Yes | partially supports modifiers | -| timediff() | No | | +| timediff() | Yes | partially supports modifiers | Modifiers: From 482e93bfd0b99b225a07d110aba98e2138e7e7ba Mon Sep 17 00:00:00 2001 From: Sachin Singh Date: Fri, 11 Apr 2025 05:54:23 +0530 Subject: [PATCH 168/425] feat: add likelihood scalar function --- core/function.rs | 3 +++ core/translate/expr.rs | 47 +++++++++++++++++++++++++++++++++++++++ core/vdbe/execute.rs | 50 +++++++++++++++++++++++++++++++++++++++++- 3 files changed, 99 insertions(+), 1 deletion(-) diff --git a/core/function.rs b/core/function.rs index 4c235cca5..904ca7b93 100644 --- a/core/function.rs +++ b/core/function.rs @@ -293,6 +293,7 @@ pub enum ScalarFunc { StrfTime, Printf, Likely, + Likelihood, } impl Display for ScalarFunc { @@ -348,6 +349,7 @@ impl Display for ScalarFunc { Self::StrfTime => "strftime".to_string(), Self::Printf => "printf".to_string(), Self::Likely => "likely".to_string(), + Self::Likelihood => "likelihood".to_string(), }; write!(f, "{}", str) } @@ -599,6 +601,7 @@ impl Func { "sqlite_source_id" => Ok(Self::Scalar(ScalarFunc::SqliteSourceId)), "replace" => Ok(Self::Scalar(ScalarFunc::Replace)), "likely" => Ok(Self::Scalar(ScalarFunc::Likely)), + "likelihood" => Ok(Self::Scalar(ScalarFunc::Likelihood)), #[cfg(feature = "json")] "json" => Ok(Self::Json(JsonFunc::Json)), #[cfg(feature = "json")] diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 7bb0dc228..3827dac63 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1598,6 +1598,53 @@ pub fn translate_expr( }); Ok(target_register) } + ScalarFunc::Likelihood => { + let args = if let Some(args) = args { + if args.len() != 2 { + crate::bail_parse_error!( + "likelihood function must have exactly 2 arguments", + ); + } + args + } else { + crate::bail_parse_error!("likelihood function with no arguments",); + }; + + if let ast::Expr::Literal(ast::Literal::Numeric(ref value)) = args[1] { + if let Ok(probability) = value.parse::() { + if probability < 0.0 || probability > 1.0 { + crate::bail_parse_error!( + "likelihood second argument must be between 0.0 and 1.0", + ); + } + } else { + crate::bail_parse_error!( + "likelihood second argument must be a floating point constant", + ); + } + } else { + crate::bail_parse_error!( + "likelihood second argument must be a numeric literal", + ); + } + + let start_reg = program.alloc_register(); + translate_and_mark( + program, + referenced_tables, + &args[0], + start_reg, + resolver, + )?; + + program.emit_insn(Insn::Copy { + src_reg: start_reg, + dst_reg: target_register, + amount: 0, + }); + + Ok(target_register) + } } } Func::Math(math_func) => match math_func.arity() { diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 6827ba83b..0dafeada2 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -3505,6 +3505,14 @@ pub fn op_function( let result = exec_likely(value.get_owned_value()); state.registers[*dest] = Register::OwnedValue(result); } + ScalarFunc::Likelihood => { + assert_eq!(arg_count, 2); + let value = &state.registers[*start_reg]; + let probability = &state.registers[*start_reg + 1]; + let result = + exec_likelihood(value.get_owned_value(), probability.get_owned_value()); + state.registers[*dest] = Register::OwnedValue(result); + } }, crate::function::Func::Vector(vector_func) => match vector_func { VectorFunc::Vector => { @@ -5365,6 +5373,10 @@ fn exec_likely(reg: &OwnedValue) -> OwnedValue { reg.clone() } +fn exec_likelihood(reg: &OwnedValue, _probability: &OwnedValue) -> OwnedValue { + reg.clone() +} + pub fn exec_add(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { let result = match (lhs, rhs) { (OwnedValue::Integer(lhs), OwnedValue::Integer(rhs)) => { @@ -6248,7 +6260,7 @@ mod tests { } use crate::vdbe::{ - execute::{exec_likely, exec_replace}, + execute::{exec_likelihood, exec_likely, exec_replace}, Bitfield, Register, }; @@ -7165,6 +7177,42 @@ mod tests { assert_eq!(exec_likely(&input), expected); } + #[test] + fn test_likelihood() { + let value = OwnedValue::build_text("limbo"); + let prob = OwnedValue::Float(0.5); + assert_eq!(exec_likelihood(&value, &prob), value); + + let value = OwnedValue::build_text("database"); + let prob = OwnedValue::Float(0.9375); + assert_eq!(exec_likelihood(&value, &prob), value); + + let value = OwnedValue::Integer(100); + let prob = OwnedValue::Float(0.0625); + assert_eq!(exec_likelihood(&value, &prob), value); + + let value = OwnedValue::Float(12.34); + let prob = OwnedValue::Float(0.5); + assert_eq!(exec_likelihood(&value, &prob), value); + + let value = OwnedValue::Null; + let prob = OwnedValue::Float(0.5); + assert_eq!(exec_likelihood(&value, &prob), value); + + let value = OwnedValue::Blob(vec![1, 2, 3, 4]); + let prob = OwnedValue::Float(0.5); + assert_eq!(exec_likelihood(&value, &prob), value); + + let prob = OwnedValue::Integer(1); + assert_eq!(exec_likelihood(&value, &prob), value); + + let prob = OwnedValue::build_text("0.5"); + assert_eq!(exec_likelihood(&value, &prob), value); + + let prob = OwnedValue::Null; + assert_eq!(exec_likelihood(&value, &prob), value); + } + #[test] fn test_bitfield() { let mut bitfield = Bitfield::<4>::new(); From 5ffdd42f12a22353e45c91100b2ced8269a9373a Mon Sep 17 00:00:00 2001 From: Sachin Singh Date: Fri, 11 Apr 2025 06:02:07 +0530 Subject: [PATCH 169/425] Additional tests --- COMPAT.md | 2 +- testing/scalar-functions.test | 36 +++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/COMPAT.md b/COMPAT.md index e85a47725..d1ce96b96 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -226,7 +226,7 @@ Feature support of [sqlite expr syntax](https://www.sqlite.org/lang_expr.html). | length(X) | Yes | | | like(X,Y) | Yes | | | like(X,Y,Z) | Yes | | -| likelihood(X,Y) | No | | +| likelihood(X,Y) | Yes | | | likely(X) | Yes | | | load_extension(X) | Yes | sqlite3 extensions not yet supported | | load_extension(X,Y) | No | | diff --git a/testing/scalar-functions.test b/testing/scalar-functions.test index 09e99a8f3..a63e80467 100755 --- a/testing/scalar-functions.test +++ b/testing/scalar-functions.test @@ -211,6 +211,42 @@ do_execsql_test likely-null { select likely(NULL) } {} +do_execsql_test likelihood-string { + SELECT likelihood('limbo', 0.5); +} {limbo} + +do_execsql_test likelihood-string-high-probability { + SELECT likelihood('database', 0.9375); +} {database} + +do_execsql_test likelihood-integer { + SELECT likelihood(100, 0.0625); +} {100} + +do_execsql_test likelihood-integer-probability-1 { + SELECT likelihood(42, 1); +} {42} + +do_execsql_test likelihood-decimal { + SELECT likelihood(12.34, 0.5); +} {12.34} + +do_execsql_test likelihood-null { + SELECT likelihood(NULL, 0.5); +} {} + +do_execsql_test likelihood-blob { + SELECT hex(likelihood(x'01020304', 0.5)); +} {01020304} + +do_execsql_test likelihood-zero-probability { + SELECT likelihood(999, 0); +} {999} + +do_execsql_test likelihood-extreme-probability { + SELECT likelihood(999, 1); +} {999} + do_execsql_test unhex-str-ab { SELECT unhex('6162'); } {ab} From 01fa02364d19b558fc83fdb6695de0fb0d8a5f63 Mon Sep 17 00:00:00 2001 From: Sachin Singh Date: Fri, 11 Apr 2025 08:34:29 +0530 Subject: [PATCH 170/425] correctly handle edge cases --- core/translate/expr.rs | 17 +++++++++++------ core/vdbe/execute.rs | 5 +---- testing/scalar-functions.test | 8 ++------ 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 3827dac63..02be1db8d 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1602,29 +1602,34 @@ pub fn translate_expr( let args = if let Some(args) = args { if args.len() != 2 { crate::bail_parse_error!( - "likelihood function must have exactly 2 arguments", + "likelihood() function must have exactly 2 arguments", ); } args } else { - crate::bail_parse_error!("likelihood function with no arguments",); + crate::bail_parse_error!("likelihood() function with no arguments",); }; if let ast::Expr::Literal(ast::Literal::Numeric(ref value)) = args[1] { if let Ok(probability) = value.parse::() { - if probability < 0.0 || probability > 1.0 { + if !(0.0..=1.0).contains(&probability) { crate::bail_parse_error!( - "likelihood second argument must be between 0.0 and 1.0", + "second argument of likelihood() must be between 0.0 and 1.0", + ); + } + if !value.contains('.') { + crate::bail_parse_error!( + "second argument of likelihood() must be a floating point number with decimal point", ); } } else { crate::bail_parse_error!( - "likelihood second argument must be a floating point constant", + "second argument of likelihood() must be a floating point constant", ); } } else { crate::bail_parse_error!( - "likelihood second argument must be a numeric literal", + "second argument of likelihood() must be a numeric literal", ); } diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 0dafeada2..8017e8c96 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -7188,7 +7188,7 @@ mod tests { assert_eq!(exec_likelihood(&value, &prob), value); let value = OwnedValue::Integer(100); - let prob = OwnedValue::Float(0.0625); + let prob = OwnedValue::Float(1.0); assert_eq!(exec_likelihood(&value, &prob), value); let value = OwnedValue::Float(12.34); @@ -7203,9 +7203,6 @@ mod tests { let prob = OwnedValue::Float(0.5); assert_eq!(exec_likelihood(&value, &prob), value); - let prob = OwnedValue::Integer(1); - assert_eq!(exec_likelihood(&value, &prob), value); - let prob = OwnedValue::build_text("0.5"); assert_eq!(exec_likelihood(&value, &prob), value); diff --git a/testing/scalar-functions.test b/testing/scalar-functions.test index a63e80467..807c4971d 100755 --- a/testing/scalar-functions.test +++ b/testing/scalar-functions.test @@ -224,7 +224,7 @@ do_execsql_test likelihood-integer { } {100} do_execsql_test likelihood-integer-probability-1 { - SELECT likelihood(42, 1); + SELECT likelihood(42, 1.0); } {42} do_execsql_test likelihood-decimal { @@ -240,11 +240,7 @@ do_execsql_test likelihood-blob { } {01020304} do_execsql_test likelihood-zero-probability { - SELECT likelihood(999, 0); -} {999} - -do_execsql_test likelihood-extreme-probability { - SELECT likelihood(999, 1); + SELECT likelihood(999, 0.0); } {999} do_execsql_test unhex-str-ab { From 23ab387143ea60e4705c7cef821e50a2709c2f49 Mon Sep 17 00:00:00 2001 From: Sachin Singh Date: Fri, 11 Apr 2025 09:59:27 +0530 Subject: [PATCH 171/425] handle formatting issues --- core/functions/datetime.rs | 48 +++++++++++++------------- testing/scalar-functions-datetime.test | 27 +++++++-------- 2 files changed, 37 insertions(+), 38 deletions(-) diff --git a/core/functions/datetime.rs b/core/functions/datetime.rs index e9988f03f..864f61787 100644 --- a/core/functions/datetime.rs +++ b/core/functions/datetime.rs @@ -673,6 +673,7 @@ pub fn exec_timediff(values: &[Register]) -> OwnedValue { } } +/// Format the time duration as +/-YYYY-MM-DD HH:MM:SS.SSS as per SQLite's timediff() function fn format_time_duration(duration: &chrono::Duration) -> OwnedValue { let is_negative = duration.num_seconds() < 0; @@ -681,32 +682,31 @@ fn format_time_duration(duration: &chrono::Duration) -> OwnedValue { } else { duration.clone() }; + let total_seconds = abs_duration.num_seconds(); - let hours = total_seconds / 3600; + let hours = (total_seconds % 86400) / 3600; let minutes = (total_seconds % 3600) / 60; let seconds = total_seconds % 60; + let days = total_seconds / 86400; + let years = days / 365; + let remaining_days = days % 365; + let months = 0; + let total_millis = abs_duration.num_milliseconds(); let millis = total_millis % 1000; - let result = if millis > 0 { - format!( - "{}{:02}:{:02}:{:02}.{:03}", - if is_negative { "-" } else { "" }, - hours, - minutes, - seconds, - millis - ) - } else { - format!( - "{}{:02}:{:02}:{:02}", - if is_negative { "-" } else { "" }, - hours, - minutes, - seconds - ) - }; + let result = format!( + "{}{:04}-{:02}-{:02} {:02}:{:02}:{:02}.{:03}", + if is_negative { "-" } else { "+" }, + years, + months, + remaining_days, + hours, + minutes, + seconds, + millis + ); OwnedValue::build_text(&result) } @@ -1702,7 +1702,7 @@ mod tests { fn test_exec_timediff() { let start = OwnedValue::build_text("12:00:00"); let end = OwnedValue::build_text("14:30:45"); - let expected = OwnedValue::build_text("-02:30:45"); + let expected = OwnedValue::build_text("-0000-00-00 02:30:45.000"); assert_eq!( exec_timediff(&[Register::OwnedValue(start), Register::OwnedValue(end)]), expected @@ -1710,7 +1710,7 @@ mod tests { let start = OwnedValue::build_text("14:30:45"); let end = OwnedValue::build_text("12:00:00"); - let expected = OwnedValue::build_text("02:30:45"); + let expected = OwnedValue::build_text("+0000-00-00 02:30:45.000"); assert_eq!( exec_timediff(&[Register::OwnedValue(start), Register::OwnedValue(end)]), expected @@ -1718,7 +1718,7 @@ mod tests { let start = OwnedValue::build_text("12:00:01.300"); let end = OwnedValue::build_text("12:00:00.500"); - let expected = OwnedValue::build_text("00:00:00.800"); + let expected = OwnedValue::build_text("+0000-00-00 00:00:00.800"); assert_eq!( exec_timediff(&[Register::OwnedValue(start), Register::OwnedValue(end)]), expected @@ -1726,7 +1726,7 @@ mod tests { let start = OwnedValue::build_text("13:30:00"); let end = OwnedValue::build_text("16:45:30"); - let expected = OwnedValue::build_text("-03:15:30"); + let expected = OwnedValue::build_text("-0000-00-00 03:15:30.000"); assert_eq!( exec_timediff(&[Register::OwnedValue(start), Register::OwnedValue(end)]), expected @@ -1734,7 +1734,7 @@ mod tests { let start = OwnedValue::build_text("2023-05-10 23:30:00"); let end = OwnedValue::build_text("2023-05-11 01:15:00"); - let expected = OwnedValue::build_text("-01:45:00"); + let expected = OwnedValue::build_text("-0000-00-00 01:45:00.000"); assert_eq!( exec_timediff(&[Register::OwnedValue(start), Register::OwnedValue(end)]), expected diff --git a/testing/scalar-functions-datetime.test b/testing/scalar-functions-datetime.test index db09d27fe..f6441384f 100755 --- a/testing/scalar-functions-datetime.test +++ b/testing/scalar-functions-datetime.test @@ -590,48 +590,48 @@ foreach i $FMT { do_execsql_test strftime-invalid-$i "SELECT strftime('$i','2025-01-23T13:14:30.567');" {} } + # Tests for the TIMEDIFF function do_execsql_test timediff-basic-positive { SELECT timediff('14:30:45', '12:00:00'); -} {02:30:45} +} {"+0000-00-00 02:30:45.000"} do_execsql_test timediff-basic-negative { SELECT timediff('12:00:00', '14:30:45'); -} {-02:30:45} +} {"-0000-00-00 02:30:45.000"} do_execsql_test timediff-with-milliseconds-positive { SELECT timediff('12:00:01.300', '12:00:00.500'); -} {00:00:00.800} - +} {"+0000-00-00 00:00:00.800"} do_execsql_test timediff-same-time { SELECT timediff('12:00:00', '12:00:00'); -} {00:00:00} +} {"+0000-00-00 00:00:00.000"} do_execsql_test timediff-across-dates { SELECT timediff('2023-05-11 01:15:00', '2023-05-10 23:30:00'); -} {01:45:00} +} {"+0000-00-00 01:45:00.000"} do_execsql_test timediff-across-dates-negative { SELECT timediff('2023-05-10 23:30:00', '2023-05-11 01:15:00'); -} {-01:45:00} +} {"-0000-00-00 01:45:00.000"} do_execsql_test timediff-different-formats { SELECT timediff('2023-05-10T23:30:00', '2023-05-10 14:15:00'); -} {09:15:00} +} {"+0000-00-00 09:15:00.000"} do_execsql_test timediff-with-timezone { SELECT timediff('2023-05-10 23:30:00+02:00', '2023-05-10 18:30:00Z'); -} {03:00:00} +} {"+0000-00-00 03:00:00.000"} do_execsql_test timediff-large-difference { SELECT timediff('2023-05-12 10:00:00', '2023-05-10 08:00:00'); -} {50:00:00} +} {"+0000-00-02 02:00:00.000"} do_execsql_test timediff-with-seconds-precision { SELECT timediff('12:30:45.123', '12:30:44.987'); -} {00:00:00.136} +} {"+0000-00-00 00:00:00.136"} do_execsql_test timediff-null-first-arg { SELECT timediff(NULL, '12:00:00'); @@ -649,11 +649,10 @@ do_execsql_test timediff-invalid-second-arg { SELECT timediff('12:00:00', 'not-a-time'); } {{}} - do_execsql_test timediff-julian-day { SELECT timediff(2460000, 2460000.5); -} {-12:00:00} +} {"-0000-00-00 12:00:00.000"} do_execsql_test timediff-different-time-formats { SELECT timediff('23:59:59', '00:00:00'); -} {23:59:59} +} {"+0000-00-00 23:59:59.000"} \ No newline at end of file From a56e6ebc7d92c4d2ccff56ac34f6a76dbe303417 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 11 Apr 2025 07:01:07 +0000 Subject: [PATCH 172/425] build(deps): bump pyo3 from 0.24.0 to 0.24.1 Bumps [pyo3](https://github.com/pyo3/pyo3) from 0.24.0 to 0.24.1. - [Release notes](https://github.com/pyo3/pyo3/releases) - [Changelog](https://github.com/PyO3/pyo3/blob/v0.24.1/CHANGELOG.md) - [Commits](https://github.com/pyo3/pyo3/compare/v0.24.0...v0.24.1) --- updated-dependencies: - dependency-name: pyo3 dependency-version: 0.24.1 dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- Cargo.lock | 20 ++++++++++---------- bindings/python/Cargo.toml | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a85ec67df..af3033499 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2591,9 +2591,9 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.24.0" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f1c6c3591120564d64db2261bec5f910ae454f01def849b9c22835a84695e86" +checksum = "17da310086b068fbdcefbba30aeb3721d5bb9af8db4987d6735b2183ca567229" dependencies = [ "anyhow", "cfg-if", @@ -2610,9 +2610,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.24.0" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9b6c2b34cf71427ea37c7001aefbaeb85886a074795e35f161f5aecc7620a7a" +checksum = "e27165889bd793000a098bb966adc4300c312497ea25cf7a690a9f0ac5aa5fc1" dependencies = [ "once_cell", "target-lexicon", @@ -2620,9 +2620,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.24.0" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5507651906a46432cdda02cd02dd0319f6064f1374c9147c45b978621d2c3a9c" +checksum = "05280526e1dbf6b420062f3ef228b78c0c54ba94e157f5cb724a609d0f2faabc" dependencies = [ "libc", "pyo3-build-config", @@ -2630,9 +2630,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.24.0" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0d394b5b4fd8d97d48336bb0dd2aebabad39f1d294edd6bcd2cccf2eefe6f42" +checksum = "5c3ce5686aa4d3f63359a5100c62a127c9f15e8398e5fdeb5deef1fed5cd5f44" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -2642,9 +2642,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.24.0" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd72da09cfa943b1080f621f024d2ef7e2773df7badd51aa30a2be1f8caa7c8e" +checksum = "f4cf6faa0cbfb0ed08e89beb8103ae9724eb4750e3a78084ba4017cbe94f3855" dependencies = [ "heck", "proc-macro2", diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml index 402a3a760..4a8eaef59 100644 --- a/bindings/python/Cargo.toml +++ b/bindings/python/Cargo.toml @@ -18,7 +18,7 @@ extension-module = ["pyo3/extension-module"] [dependencies] anyhow = "1.0" limbo_core = { path = "../../core", features = ["io_uring"] } -pyo3 = { version = "0.24.0", features = ["anyhow"] } +pyo3 = { version = "0.24.1", features = ["anyhow"] } [build-dependencies] version_check = "0.9.5" From 9d7a7797572be9fdea81eba8f921fc50678253ec Mon Sep 17 00:00:00 2001 From: TcMits Date: Fri, 11 Apr 2025 14:41:56 +0700 Subject: [PATCH 173/425] Fix drop empty page in balancing --- core/storage/btree.rs | 52 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 49 insertions(+), 3 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 9ab993d2d..734f96569 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1641,9 +1641,20 @@ impl BTreeCursor { let current_page = self.stack.top(); { // check if we don't need to balance - // don't continue if there are no overflow cells + // don't continue if: + // - current page is not overfull root + // OR + // - current page is not overfull and the amount of free space on the page + // is less than 2/3rds of the total usable space on the page + // + // https://github.com/sqlite/sqlite/blob/0aa95099f5003dc99f599ab77ac0004950b281ef/src/btree.c#L9064-L9071 let page = current_page.get().contents.as_mut().unwrap(); - if page.overflow_cells.is_empty() { + let usable_space = self.usable_space(); + let free_space = compute_free_space(page, usable_space as u16); + if page.overflow_cells.is_empty() + && (!self.stack.has_parent() + || free_space as usize * 3 <= usable_space * 2) + { let write_info = self.state.mut_write_info().unwrap(); write_info.state = WriteState::Finish; return Ok(CursorResult::Ok(())); @@ -4850,7 +4861,8 @@ mod tests { }, types::Text, vdbe::Register, - BufferPool, Connection, DatabaseStorage, WalFile, WalFileShared, WriteCompletion, + BufferPool, Connection, DatabaseStorage, StepResult, WalFile, WalFileShared, + WriteCompletion, }; use std::{ cell::RefCell, collections::HashSet, mem::transmute, ops::Deref, panic, rc::Rc, sync::Arc, @@ -5961,6 +5973,40 @@ mod tests { } } + #[test] + pub fn test_drop_page_in_balancing_issue_1203() { + let db = get_database(); + let conn = db.connect().unwrap(); + + let queries = vec![ +"CREATE TABLE lustrous_petit (awesome_nomous TEXT,ambitious_amargi TEXT,fantastic_daniels BLOB,stupendous_highleyman TEXT,relaxed_crane TEXT,elegant_bromma INTEGER,proficient_castro BLOB,ambitious_liman TEXT,responsible_lusbert BLOB);", +"INSERT INTO lustrous_petit VALUES ('funny_sarambi', 'hardworking_naoumov', X'666561726C6573735F68696C6C', 'elegant_iafd', 'rousing_flag', 681399778772406122, X'706572736F6E61626C655F676F6477696E6772696D6D', 'insightful_anonymous', X'706F77657266756C5F726F636861'), ('personable_holmes', 'diligent_pera', X'686F6E6573745F64696D656E73696F6E', 'energetic_raskin', 'gleaming_federasyon', -2778469859573362611, X'656666696369656E745F6769617A', 'sensible_skirda', X'66616E7461737469635F6B656174696E67'), ('inquisitive_baedan', 'brave_sphinx', X'67656E65726F75735F6D6F6E7473656E79', 'inquisitive_syndicate', 'amiable_room', 6954857961525890638, X'7374756E6E696E675F6E6965747A73636865', 'glowing_coordinator', X'64617A7A6C696E675F7365766572696E65'), ('upbeat_foxtale', 'engaging_aktimon', X'63726561746976655F6875746368696E6773', 'ample_locura', 'creative_barrett', 6413352509911171593, X'6772697070696E675F6D696E7969', 'competitive_parissi', X'72656D61726B61626C655F77696E7374616E6C6579');", +"INSERT INTO lustrous_petit VALUES ('ambitious_berry', 'devoted_marshall', X'696E7175697369746976655F6C6172657661', 'flexible_pramen', 'outstanding_stauch', 6936508362673228293, X'6C6F76696E675F6261756572', 'charming_anonymous', X'68617264776F726B696E675F616E6E6973'), ('enchanting_cohen', 'engaging_rubel', X'686F6E6573745F70726F766F63617A696F6E65', 'humorous_robin', 'imaginative_shuzo', 4762266264295288131, X'726F7573696E675F6261796572', 'vivid_bolling', X'6F7267616E697A65645F7275696E73'), ('affectionate_resistance', 'gripping_rustamova', X'6B696E645F6C61726B696E', 'bright_boulanger', 'upbeat_ashirov', -1726815435854320541, X'61646570745F66646361', 'dazzling_tashjian', X'68617264776F726B696E675F6D6F72656C'), ('zestful_ewald', 'favorable_lewis', X'73747570656E646F75735F7368616C6966', 'bright_combustion', 'blithesome_harding', 8408539013935554176, X'62726176655F737079726F706F756C6F75', 'hilarious_finnegan', X'676976696E675F6F7267616E697A696E67'), ('blithesome_picqueray', 'sincere_william', X'636F75726167656F75735F6D69746368656C6C', 'rousing_atan', 'mirthful_katie', -429232313453215091, X'6C6F76656C795F776174616E616265', 'stupendous_mcmillan', X'666F63757365645F6B61666568'), ('incredible_kid', 'friendly_yvetot', X'706572666563745F617A697A', 'helpful_manhattan', 'shining_horrox', -4318061095860308846, X'616D626974696F75735F726F7765', 'twinkling_anarkiya', X'696D6167696E61746976655F73756D6E6572');", +"INSERT INTO lustrous_petit VALUES ('sleek_graeber', 'approachable_ghazzawi', X'62726176655F6865776974747768697465', 'adaptable_zimmer', 'polite_cohn', -5464225138957223865, X'68756D6F726F75735F736E72', 'adaptable_igualada', X'6C6F76656C795F7A686F75'), ('imaginative_rautiainen', 'magnificent_ellul', X'73706C656E6469645F726F6361', 'responsible_brown', 'upbeat_uruguaya', -1185340834321792223, X'616D706C655F6D6470', 'philosophical_kelly', X'676976696E675F6461676865726D6172676F7369616E'), ('blithesome_darkness', 'creative_newell', X'6C757374726F75735F61706174726973', 'engaging_kids', 'charming_wark', -1752453819873942466, X'76697669645F6162657273', 'independent_barricadas', X'676C697374656E696E675F64686F6E6474'), ('productive_chardronnet', 'optimistic_karnage', X'64696C6967656E745F666F72657374', 'engaging_beggar', 'sensible_wolke', 784341549042407442, X'656E676167696E675F6265726B6F7769637A', 'blithesome_zuzenko', X'6E6963655F70726F766F63617A696F6E65');", +"INSERT INTO lustrous_petit VALUES ('shining_sagris', 'considerate_mother', X'6F70656E5F6D696E6465645F72696F74', 'polite_laufer', 'patient_mink', 2240393952789100851, X'636F75726167656F75735F6D636D696C6C616E', 'glowing_robertson', X'68656C7066756C5F73796D6F6E6473'), ('dazzling_glug', 'stupendous_poznan', X'706572736F6E61626C655F6672616E6B73', 'open_minded_ruins', 'qualified_manes', 2937238916206423261, X'696E736967687466756C5F68616B69656C', 'passionate_borl', X'616D6961626C655F6B7570656E647561'), ('wondrous_parry', 'knowledgeable_giovanni', X'6D6F76696E675F77696E6E', 'shimmering_aberlin', 'affectionate_calhoun', 702116954493913499, X'7265736F7572636566756C5F62726F6D6D61', 'propitious_mezzagarcia', X'746563686E6F6C6F676963616C5F6E6973686974616E69');", +"INSERT INTO lustrous_petit VALUES ('kind_room', 'hilarious_crow', X'6F70656E5F6D696E6465645F6B6F74616E7969', 'hardworking_petit', 'adaptable_zarrow', 2491343172109894986, X'70726F647563746976655F646563616C6F677565', 'willing_sindikalis', X'62726561746874616B696E675F6A6F7264616E');", +"INSERT INTO lustrous_petit VALUES ('confident_etrebilal', 'agreeable_shifu', X'726F6D616E7469635F7363687765697A6572', 'loving_debs', 'gripping_spooner', -3136910055229112693, X'677265676172696F75735F736B726F7A6974736B79', 'ample_ontiveros', X'7175616C69666965645F726F6D616E69656E6B6F'), ('competitive_call', 'technological_egoumenides', X'6469706C6F6D617469635F6D6F6E616768616E', 'willing_stew', 'frank_neal', -5973720171570031332, X'6C6F76696E675F6465737461', 'dazzling_gambone', X'70726F647563746976655F6D656E64656C676C6565736F6E'), ('favorable_delesalle', 'sensible_atterbury', X'666169746866756C5F64617861', 'bountiful_aldred', 'marvelous_malgraith', 5330463874397264493, X'706572666563745F7765726265', 'lustrous_anti', X'6C6F79616C5F626F6F6B6368696E'), ('stellar_corlu', 'loyal_espana', X'6D6F76696E675F7A6167', 'efficient_nelson', 'qualified_shepard', 1015518116803600464, X'737061726B6C696E675F76616E6469766572', 'loving_scoffer', X'686F6E6573745F756C72696368'), ('adaptable_taylor', 'shining_yasushi', X'696D6167696E61746976655F776974746967', 'alluring_blackmore', 'zestful_coeurderoy', -7094136731216188999, X'696D6167696E61746976655F757A63617465677569', 'gleaming_hernandez', X'6672616E6B5F646F6D696E69636B'), ('competitive_luis', 'stellar_fredericks', X'616772656561626C655F6D696368656C', 'optimistic_navarro', 'funny_hamilton', 4003895682491323194, X'6F70656E5F6D696E6465645F62656C6D6173', 'incredible_thorndycraft', X'656C6567616E745F746F6C6B69656E'), ('remarkable_parsons', 'sparkling_ulrich', X'737061726B6C696E675F6D6172696E636561', 'technological_leighlais', 'warmhearted_konok', -5789111414354869563, X'676976696E675F68657272696E67', 'adept_dabtara', X'667269656E646C795F72617070');", +"INSERT INTO lustrous_petit VALUES ('hardworking_norberg', 'approachable_winter', X'62726176655F68617474696E6768', 'imaginative_james', 'open_minded_capital', -5950508516718821688, X'6C757374726F75735F72616E7473', 'warmhearted_limanov', X'696E736967687466756C5F646F637472696E65'), ('generous_shatz', 'generous_finley', X'726176697368696E675F6B757A6E6574736F76', 'stunning_arrigoni', 'favorable_volcano', -8442328990977069526, X'6D6972746866756C5F616C7467656C64', 'thoughtful_zurbrugg', X'6D6972746866756C5F6D6F6E726F65'), ('frank_kerr', 'splendid_swain', X'70617373696F6E6174655F6D6470', 'flexible_dubey', 'sensible_tj', 6352949260574274181, X'656666696369656E745F6B656D736B79', 'vibrant_ege', X'736C65656B5F6272696768746F6E'), ('organized_neal', 'glistening_sugar', X'656E676167696E675F6A6F72616D', 'romantic_krieger', 'qualified_corr', -4774868512022958085, X'706572666563745F6B6F7A6172656B', 'bountiful_zaikowska', X'74686F7567687466756C5F6C6F6767616E73'), ('excellent_lydiettcarrion', 'diligent_denslow', X'666162756C6F75735F6D616E68617474616E', 'confident_tomar', 'glistening_ligt', -1134906665439009896, X'7175616C69666965645F6F6E6B656E', 'remarkable_anarkiya', X'6C6F79616C5F696E64616261'), ('passionate_melis', 'loyal_xsilent', X'68617264776F726B696E675F73637564', 'lustrous_barnes', 'nice_sugako', -4097897163377829983, X'726F6D616E7469635F6461686572', 'bright_imrie', X'73656E7369626C655F6D61726B'), ('giving_mlb', 'breathtaking_fourier', X'736C65656B5F616E61726368697374', 'glittering_malet', 'brilliant_crew', 8791228049111405793, X'626F756E746966756C5F626576656E736565', 'lovely_swords', X'70726F706974696F75735F696E656469746173'), ('honest_wright', 'qualified_rabble', X'736C65656B5F6D6172656368616C', 'shimmering_marius', 'blithesome_mckelvie', -1330737263592370654, X'6F70656E5F6D696E6465645F736D616C6C', 'energetic_gorman', X'70726F706974696F75735F6B6F74616E7969');", +"DELETE FROM lustrous_petit WHERE (ambitious_liman > 'adept_dabtaqu');", +"INSERT INTO lustrous_petit VALUES ('technological_dewey', 'fabulous_st', X'6F7074696D69737469635F73687562', 'considerate_levy', 'adaptable_kernis', 4195134012457716562, X'61646570745F736F6C6964617269646164', 'vibrant_crump', X'6C6F79616C5F72796E6572'), ('super_marjan', 'awesome_gethin', X'736C65656B5F6F737465727765696C', 'diplomatic_loidl', 'qualified_bokani', -2822676417968234733, X'6272696768745F64756E6C6170', 'creative_en', X'6D6972746866756C5F656C6F6666'), ('philosophical_malet', 'unique_garcia', X'76697669645F6E6F7262657267', 'spellbinding_fire', 'faithful_barringtonbush', -7293711848773657758, X'6272696C6C69616E745F6F6B65656665', 'gripping_guillon', X'706572736F6E61626C655F6D61726C696E7370696B65'), ('thoughtful_morefus', 'lustrous_rodriguez', X'636F6E666964656E745F67726F73736D616E726F73686368696E', 'devoted_jackson', 'propitious_karnage', -7802999054396485709, X'63617061626C655F64', 'enchanting_orwell', X'7477696E6B6C696E675F64616C616B6F676C6F75'), ('alluring_guillon', 'brilliant_pinotnoir', X'706572736F6E61626C655F6A6165636B6C65', 'open_minded_azeez', 'courageous_romania', 2126962403055072268, X'746563686E6F6C6F676963616C5F6962616E657A', 'open_minded_rosa', X'6C757374726F75735F6575726F7065'), ('courageous_kolokotronis', 'inquisitive_gahman', X'677265676172696F75735F626172726574', 'ambitious_shakur', 'fantastic_apatris', -1232732971861520864, X'737061726B6C696E675F7761746368', 'captivating_clover', X'636F6E666964656E745F736574686E65737363617374726F'), ('charming_sullivan', 'focused_congress', X'7368696D6D6572696E675F636C7562', 'wondrous_skrbina', 'giving_mendanlioglu', -6837337053772308333, X'636861726D696E675F73616C696E6173', 'rousing_hedva', X'6469706C6F6D617469635F7061796E');", + ]; + + for query in queries { + let mut stmt = conn.query(query).unwrap().unwrap(); + loop { + let row = stmt.step().expect("step"); + match row { + StepResult::Done => { + break; + } + _ => { + tracing::debug!("row {:?}", row); + } + } + } + } + } + #[test] pub fn test_free_space() { let db = get_database(); From a4a4879f3b7395ab329fa003962251aca633376e Mon Sep 17 00:00:00 2001 From: TcMits Date: Fri, 11 Apr 2025 14:53:10 +0700 Subject: [PATCH 174/425] fix cargo fmt check --- core/storage/btree.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 734f96569..bae2e85e2 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1644,9 +1644,9 @@ impl BTreeCursor { // don't continue if: // - current page is not overfull root // OR - // - current page is not overfull and the amount of free space on the page + // - current page is not overfull and the amount of free space on the page // is less than 2/3rds of the total usable space on the page - // + // // https://github.com/sqlite/sqlite/blob/0aa95099f5003dc99f599ab77ac0004950b281ef/src/btree.c#L9064-L9071 let page = current_page.get().contents.as_mut().unwrap(); let usable_space = self.usable_space(); From 029a0c86b216e9791fa56110c99be7d187f3baa6 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Fri, 11 Apr 2025 13:27:27 +0300 Subject: [PATCH 175/425] btree: remove IterationState iteration direction must be known when seeking, and transitively when using move_to() since seek() uses it, but otherwise IterationState just brings way too much noise to the code -- it was meant to encode invariants about how a cursor can be iterated, but it's not worth it. iteration direction for seek()/move_to() can be inferred from the SeekOp: GE/GT/EQ: forward LT/LE: backward and get_next_record()/get_prev_record() already have different logic for their respective iteration directions. --- core/storage/btree.rs | 202 +++++------------------------------------- core/types.rs | 10 +++ 2 files changed, 30 insertions(+), 182 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 9ab993d2d..5d07b6b82 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -339,17 +339,6 @@ enum OverflowState { Done, } -/// Iteration state of the cursor. Can only be set once. -/// Once a SeekGT or SeekGE is performed, the cursor must iterate forwards and calling prev() is an error. -/// Similarly, once a SeekLT or SeekLE is performed, the cursor must iterate backwards and calling next() is an error. -/// When a SeekEQ or SeekRowid is performed, the cursor is NOT allowed to iterate further. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum IterationState { - Unset, - Iterating(IterationDirection), - IterationNotAllowed, -} - pub struct BTreeCursor { /// The multi-version cursor that is used to read and write to the database file. mv_cursor: Option>>, @@ -375,8 +364,6 @@ pub struct BTreeCursor { /// Reusable immutable record, used to allow better allocation strategy. reusable_immutable_record: RefCell>, empty_record: Cell, - - pub iteration_state: IterationState, } /// Stack of pages representing the tree traversal order. @@ -425,7 +412,6 @@ impl BTreeCursor { }, reusable_immutable_record: RefCell::new(None), empty_record: Cell::new(true), - iteration_state: IterationState::Unset, } } @@ -969,35 +955,7 @@ impl BTreeCursor { /// or e.g. find the first record greater than the seek key in a range query (e.g. SELECT * FROM table WHERE col > 10). /// We don't include the rowid in the comparison and that's why the last value from the record is not included. fn do_seek(&mut self, key: SeekKey<'_>, op: SeekOp) -> Result>> { - assert!( - self.iteration_state != IterationState::Unset, - "iteration state must have been set before do_seek() is called" - ); - let valid_op = match (self.iteration_state, op) { - (IterationState::Iterating(IterationDirection::Forwards), SeekOp::GE | SeekOp::GT) => { - true - } - (IterationState::Iterating(IterationDirection::Backwards), SeekOp::LE | SeekOp::LT) => { - true - } - (IterationState::IterationNotAllowed, SeekOp::EQ) => true, - _ => false, - }; - assert!( - valid_op, - "invalid seek op for iteration state: {:?} {:?}", - self.iteration_state, op - ); - let cell_iter_dir = match self.iteration_state { - IterationState::Iterating(IterationDirection::Forwards) - | IterationState::IterationNotAllowed => IterationDirection::Forwards, - IterationState::Iterating(IterationDirection::Backwards) => { - IterationDirection::Backwards - } - IterationState::Unset => { - unreachable!("iteration state must have been set before do_seek() is called"); - } - }; + let cell_iter_dir = op.iteration_direction(); return_if_io!(self.move_to(key.clone(), op.clone())); { @@ -1143,19 +1101,13 @@ impl BTreeCursor { // if we were to return Ok(CursorResult::Ok((None, None))), self.record would be None, which is incorrect, because we already know // that there is a record with a key greater than K (K' = K+2) in the parent interior cell. Hence, we need to move back up the tree // and get the next matching record from there. - match self.iteration_state { - IterationState::Iterating(IterationDirection::Forwards) => { + match op.iteration_direction() { + IterationDirection::Forwards => { return self.get_next_record(Some((key, op))); } - IterationState::Iterating(IterationDirection::Backwards) => { + IterationDirection::Backwards => { return self.get_prev_record(Some((key, op))); } - IterationState::Unset => { - unreachable!("iteration state must not be unset"); - } - IterationState::IterationNotAllowed => { - unreachable!("iteration state must not be IterationNotAllowed"); - } } } @@ -1231,12 +1183,7 @@ impl BTreeCursor { // 6. If we find the cell, we return the record. Otherwise, we return an empty result. self.move_to_root(); - let iter_dir = match self.iteration_state { - IterationState::Iterating(IterationDirection::Backwards) => { - IterationDirection::Backwards - } - _ => IterationDirection::Forwards, - }; + let iter_dir = cmp.iteration_direction(); loop { let page = self.stack.top(); @@ -1292,29 +1239,12 @@ impl BTreeCursor { // No iteration (point query): // EQ | > or = | go left | Last = key is in left subtree // EQ | < | go right | Last = key is in right subtree - let target_leaf_page_is_in_left_subtree = match (self.iteration_state, cmp) - { - ( - IterationState::Iterating(IterationDirection::Forwards), - SeekOp::GT, - ) => *cell_rowid > rowid_key, - ( - IterationState::Iterating(IterationDirection::Forwards), - SeekOp::GE, - ) => *cell_rowid >= rowid_key, - ( - IterationState::Iterating(IterationDirection::Backwards), - SeekOp::LE, - ) => *cell_rowid >= rowid_key, - ( - IterationState::Iterating(IterationDirection::Backwards), - SeekOp::LT, - ) => *cell_rowid >= rowid_key || *cell_rowid == rowid_key - 1, - (_any, SeekOp::EQ) => *cell_rowid >= rowid_key, - _ => unreachable!( - "invalid combination of seek op and iteration state: {:?} {:?}", - cmp, self.iteration_state - ), + let target_leaf_page_is_in_left_subtree = match cmp { + SeekOp::GT => *cell_rowid > rowid_key, + SeekOp::GE => *cell_rowid >= rowid_key, + SeekOp::LE => *cell_rowid >= rowid_key, + SeekOp::LT => *cell_rowid + 1 >= rowid_key, + SeekOp::EQ => *cell_rowid >= rowid_key, }; if target_leaf_page_is_in_left_subtree { // If we found our target rowid in the left subtree, @@ -1402,36 +1332,13 @@ impl BTreeCursor { // EQ | > | go left | First = key must be in left subtree // EQ | = | go left | First = key could be exactly this one, or in left subtree // EQ | < | go right | First = key must be in right subtree - assert!( - self.iteration_state != IterationState::Unset, - "iteration state must have been set before move_to() is called" - ); - let target_leaf_page_is_in_left_subtree = match (cmp, self.iteration_state) - { - ( - SeekOp::GT, - IterationState::Iterating(IterationDirection::Forwards), - ) => interior_cell_vs_index_key.is_gt(), - ( - SeekOp::GE, - IterationState::Iterating(IterationDirection::Forwards), - ) => interior_cell_vs_index_key.is_ge(), - (SeekOp::EQ, IterationState::IterationNotAllowed) => { - interior_cell_vs_index_key.is_ge() - } - ( - SeekOp::LE, - IterationState::Iterating(IterationDirection::Backwards), - ) => interior_cell_vs_index_key.is_gt(), - ( - SeekOp::LT, - IterationState::Iterating(IterationDirection::Backwards), - ) => interior_cell_vs_index_key.is_ge(), - _ => unreachable!( - "invalid combination of seek op and iteration state: {:?} {:?}", - cmp, self.iteration_state - ), + let target_leaf_page_is_in_left_subtree = match cmp { + SeekOp::GT => interior_cell_vs_index_key.is_gt(), + SeekOp::GE => interior_cell_vs_index_key.is_ge(), + SeekOp::EQ => interior_cell_vs_index_key.is_ge(), + SeekOp::LE => interior_cell_vs_index_key.is_gt(), + SeekOp::LT => interior_cell_vs_index_key.is_ge(), }; if target_leaf_page_is_in_left_subtree { // we don't advance in case of forward iteration and index tree internal nodes because we will visit this node going up. @@ -3024,14 +2931,6 @@ impl BTreeCursor { } pub fn rewind(&mut self) -> Result> { - assert!( - matches!( - self.iteration_state, - IterationState::Unset | IterationState::Iterating(IterationDirection::Forwards) - ), - "iteration state must be unset or Iterating(Forwards) when rewind() is called" - ); - self.iteration_state = IterationState::Iterating(IterationDirection::Forwards); if self.mv_cursor.is_some() { let rowid = return_if_io!(self.get_next_record(None)); self.rowid.replace(rowid); @@ -3047,14 +2946,6 @@ impl BTreeCursor { } pub fn last(&mut self) -> Result> { - assert!( - matches!( - self.iteration_state, - IterationState::Unset | IterationState::Iterating(IterationDirection::Backwards) - ), - "iteration state must be unset or Iterating(Backwards) when last() is called" - ); - self.iteration_state = IterationState::Iterating(IterationDirection::Backwards); assert!(self.mv_cursor.is_none()); match self.move_to_rightmost()? { CursorResult::Ok(_) => self.prev(), @@ -3063,14 +2954,6 @@ impl BTreeCursor { } pub fn next(&mut self) -> Result> { - assert!( - matches!( - self.iteration_state, - IterationState::Iterating(IterationDirection::Forwards) - ), - "iteration state must be Iterating(Forwards) when next() is called, but it was {:?}", - self.iteration_state - ); let rowid = return_if_io!(self.get_next_record(None)); self.rowid.replace(rowid); self.empty_record.replace(rowid.is_none()); @@ -3078,13 +2961,6 @@ impl BTreeCursor { } pub fn prev(&mut self) -> Result> { - assert!( - matches!( - self.iteration_state, - IterationState::Iterating(IterationDirection::Backwards) - ), - "iteration state must be Iterating(Backwards) when prev() is called" - ); assert!(self.mv_cursor.is_none()); match self.get_prev_record(None)? { CursorResult::Ok(rowid) => { @@ -3111,38 +2987,6 @@ impl BTreeCursor { pub fn seek(&mut self, key: SeekKey<'_>, op: SeekOp) -> Result> { assert!(self.mv_cursor.is_none()); - match op { - SeekOp::GE | SeekOp::GT => { - if self.iteration_state == IterationState::Unset { - self.iteration_state = IterationState::Iterating(IterationDirection::Forwards); - } else { - assert!(matches!( - self.iteration_state, - IterationState::Iterating(IterationDirection::Forwards) - )); - } - } - SeekOp::LE | SeekOp::LT => { - if self.iteration_state == IterationState::Unset { - self.iteration_state = IterationState::Iterating(IterationDirection::Backwards); - } else { - assert!(matches!( - self.iteration_state, - IterationState::Iterating(IterationDirection::Backwards) - )); - } - } - SeekOp::EQ => { - if self.iteration_state == IterationState::Unset { - self.iteration_state = IterationState::IterationNotAllowed; - } else { - assert!(matches!( - self.iteration_state, - IterationState::IterationNotAllowed - )); - } - } - }; let rowid = return_if_io!(self.do_seek(key, op)); self.rowid.replace(rowid); self.empty_record.replace(rowid.is_none()); @@ -3172,7 +3016,6 @@ impl BTreeCursor { None => { tracing::trace!("moved {}", moved_before); if !moved_before { - self.iteration_state = IterationState::Iterating(IterationDirection::Forwards); match key { BTreeKey::IndexKey(_) => { return_if_io!(self @@ -5323,8 +5166,6 @@ mod tests { // FIXME: add sorted vector instead, should be okay for small amounts of keys for now :P, too lazy to fix right now keys.sort(); cursor.move_to_root(); - // hack to allow bypassing our internal invariant of not allowing cursor iteration after SeekOp::EQ - cursor.iteration_state = IterationState::Iterating(IterationDirection::Forwards); let mut valid = true; for key in keys.iter() { tracing::trace!("seeking key: {}", key); @@ -5336,7 +5177,6 @@ mod tests { break; } } - cursor.iteration_state = IterationState::Unset; // let's validate btree too so that we undertsand where the btree failed if matches!(validate_btree(pager.clone(), root_page), (_, false)) || !valid { let btree_after = format_btree(pager.clone(), root_page, 0); @@ -5354,8 +5194,6 @@ mod tests { } keys.sort(); cursor.move_to_root(); - // hack to allow bypassing our internal invariant of not allowing cursor iteration after SeekOp::EQ - cursor.iteration_state = IterationState::Iterating(IterationDirection::Forwards); for key in keys.iter() { tracing::trace!("seeking key: {}", key); run_until_done(|| cursor.next(), pager.deref()).unwrap(); @@ -6227,7 +6065,7 @@ mod tests { run_until_done( || { let key = SeekKey::TableRowId(i as u64); - cursor.seek(key, SeekOp::EQ) + cursor.move_to(key, SeekOp::EQ) }, pager.deref(), ) @@ -6307,7 +6145,7 @@ mod tests { run_until_done( || { let key = SeekKey::TableRowId(i as u64); - cursor.seek(key, SeekOp::EQ) + cursor.move_to(key, SeekOp::EQ) }, pager.deref(), ) @@ -6389,7 +6227,7 @@ mod tests { run_until_done( || { let key = SeekKey::TableRowId(i as u64); - cursor.seek(key, SeekOp::EQ) + cursor.move_to(key, SeekOp::EQ) }, pager.deref(), ) diff --git a/core/types.rs b/core/types.rs index da6b778cc..c26e96a41 100644 --- a/core/types.rs +++ b/core/types.rs @@ -5,6 +5,7 @@ use crate::ext::{ExtValue, ExtValueType}; use crate::pseudo::PseudoCursor; use crate::storage::btree::BTreeCursor; use crate::storage::sqlite3_ondisk::write_varint; +use crate::translate::plan::IterationDirection; use crate::vdbe::sorter::Sorter; use crate::vdbe::{Register, VTabOpaqueCursor}; use crate::Result; @@ -1235,6 +1236,15 @@ pub enum SeekOp { LT, } +impl SeekOp { + pub fn iteration_direction(&self) -> IterationDirection { + match self { + SeekOp::EQ | SeekOp::GE | SeekOp::GT => IterationDirection::Forwards, + SeekOp::LE | SeekOp::LT => IterationDirection::Backwards, + } + } +} + #[derive(Clone, PartialEq, Debug)] pub enum SeekKey<'a> { TableRowId(u64), From 2cbb903b06c57e07428c6e3e0bedfb264ae75a3c Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Fri, 11 Apr 2025 13:45:10 +0300 Subject: [PATCH 176/425] Add doc comments to SeekOp --- core/types.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/core/types.rs b/core/types.rs index c26e96a41..3d531adfe 100644 --- a/core/types.rs +++ b/core/types.rs @@ -1228,6 +1228,7 @@ pub enum CursorResult { } #[derive(Clone, Copy, PartialEq, Eq, Debug)] +/// The match condition of a table/index seek. pub enum SeekOp { EQ, GE, @@ -1237,6 +1238,15 @@ pub enum SeekOp { } impl SeekOp { + /// A given seek op implies an iteration direction. + /// + /// For example, a seek with SeekOp::GT implies: + /// Find the first table/index key that compares greater than the seek key + /// -> used in forwards iteration. + /// + /// A seek with SeekOp::LE implies: + /// Find the last table/index key that compares less than or equal to the seek key + /// -> used in backwards iteration. pub fn iteration_direction(&self) -> IterationDirection { match self { SeekOp::EQ | SeekOp::GE | SeekOp::GT => IterationDirection::Forwards, From 4d1ecd2d50149c0dd182b2113c53e2bbf3bc780c Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 11 Apr 2025 01:36:36 -0300 Subject: [PATCH 177/425] better MalformedHexInteger --- cli/app.rs | 13 ++--- vendored/sqlite3-parser/src/lexer/scan.rs | 4 +- .../sqlite3-parser/src/lexer/sql/error.rs | 51 +++++++++++++++---- vendored/sqlite3-parser/src/lexer/sql/mod.rs | 21 ++++++-- 4 files changed, 65 insertions(+), 24 deletions(-) diff --git a/cli/app.rs b/cli/app.rs index 3f04ab9fe..bb9515660 100644 --- a/cli/app.rs +++ b/cli/app.rs @@ -789,10 +789,9 @@ impl<'a> Limbo<'a> { if let Some(ref mut stats) = statistics { stats.execute_time_elapsed_samples.push(start.elapsed()); } - let _ = self.write_fmt(format_args!( - "{:?}", - miette::Error::from(err).with_source_code(sql.to_owned()) - )); + let report = + miette::Error::from(err).with_source_code(sql.to_owned()); + let _ = self.write_fmt(format_args!("{:?}", report)); break; } } @@ -805,10 +804,8 @@ impl<'a> Limbo<'a> { }, Ok(None) => {} Err(err) => { - let _ = self.write_fmt(format_args!( - "{:?}", - miette::Error::from(err).with_source_code(sql.to_owned()) - )); + let report = miette::Error::from(err).with_source_code(sql.to_owned()); + let _ = self.write_fmt(format_args!("{:?}", report)); anyhow::bail!("We have to throw here, even if we printed error"); } } diff --git a/vendored/sqlite3-parser/src/lexer/scan.rs b/vendored/sqlite3-parser/src/lexer/scan.rs index e0d22cbd5..6c0085b29 100644 --- a/vendored/sqlite3-parser/src/lexer/scan.rs +++ b/vendored/sqlite3-parser/src/lexer/scan.rs @@ -9,7 +9,7 @@ use std::io; /// Error with position pub trait ScanError: Error + From + Sized { /// Update the position where the error occurs - fn position(&mut self, line: u64, column: usize); + fn position(&mut self, line: u64, column: usize, offset: usize); } /// The `(&[u8], TokenType)` is the token. @@ -126,7 +126,7 @@ impl Scanner { let data = &input[self.offset..]; match self.splitter.split(data) { Err(mut e) => { - e.position(self.line, self.column); + e.position(self.line, self.column, self.offset); return Err(e); } Ok((None, 0)) => { diff --git a/vendored/sqlite3-parser/src/lexer/sql/error.rs b/vendored/sqlite3-parser/src/lexer/sql/error.rs index d3e3ac345..fb6d6c32e 100644 --- a/vendored/sqlite3-parser/src/lexer/sql/error.rs +++ b/vendored/sqlite3-parser/src/lexer/sql/error.rs @@ -56,6 +56,8 @@ pub enum Error { MalformedHexInteger( Option<(u64, usize)>, #[label("here")] Option, + Option, + #[help] Option<&'static str>, ), /// Grammar error ParserError( @@ -87,7 +89,7 @@ impl fmt::Display for Error { Self::MalformedBlobLiteral(pos, _) => { write!(f, "malformed blob literal at {:?}", pos.unwrap()) } - Self::MalformedHexInteger(pos, _) => { + Self::MalformedHexInteger(pos, _, _, _) => { write!(f, "malformed hex integer at {:?}", pos.unwrap()) } Self::ParserError(ref msg, Some(pos), _) => write!(f, "{msg} at {pos:?}"), @@ -111,18 +113,45 @@ impl From for Error { } impl ScanError for Error { - fn position(&mut self, line: u64, column: usize) { + fn position(&mut self, line: u64, column: usize, offset: usize) { match *self { Self::Io(_) => {} - Self::UnrecognizedToken(ref mut pos, _) => *pos = Some((line, column)), - Self::UnterminatedLiteral(ref mut pos, _) => *pos = Some((line, column)), - Self::UnterminatedBracket(ref mut pos, _) => *pos = Some((line, column)), - Self::UnterminatedBlockComment(ref mut pos, _) => *pos = Some((line, column)), - Self::BadVariableName(ref mut pos, _) => *pos = Some((line, column)), - Self::BadNumber(ref mut pos, _) => *pos = Some((line, column)), - Self::ExpectedEqualsSign(ref mut pos, _) => *pos = Some((line, column)), - Self::MalformedBlobLiteral(ref mut pos, _) => *pos = Some((line, column)), - Self::MalformedHexInteger(ref mut pos, _) => *pos = Some((line, column)), + Self::UnrecognizedToken(ref mut pos, ref mut src) => { + *pos = Some((line, column)); + *src = Some((offset).into()); + } + Self::UnterminatedLiteral(ref mut pos, ref mut src) => { + *pos = Some((line, column)); + *src = Some((offset).into()); + } + Self::UnterminatedBracket(ref mut pos, ref mut src) => { + *pos = Some((line, column)); + *src = Some((offset).into()); + } + Self::UnterminatedBlockComment(ref mut pos, ref mut src) => { + *pos = Some((line, column)); + *src = Some((offset).into()); + } + Self::BadVariableName(ref mut pos, ref mut src) => { + *pos = Some((line, column)); + *src = Some((offset).into()); + } + Self::BadNumber(ref mut pos, ref mut src) => { + *pos = Some((line, column)); + *src = Some((offset).into()); + } + Self::ExpectedEqualsSign(ref mut pos, ref mut src) => { + *pos = Some((line, column)); + *src = Some((offset).into()); + } + Self::MalformedBlobLiteral(ref mut pos, ref mut src) => { + *pos = Some((line, column)); + *src = Some((offset).into()); + } + Self::MalformedHexInteger(ref mut pos, ref mut src, len, _) => { + *pos = Some((line, column)); + *src = Some((offset, len.unwrap_or(0)).into()); + } Self::ParserError(_, ref mut pos, _) => *pos = Some((line, column)), } } diff --git a/vendored/sqlite3-parser/src/lexer/sql/mod.rs b/vendored/sqlite3-parser/src/lexer/sql/mod.rs index fba3d72d1..ccb05bd01 100644 --- a/vendored/sqlite3-parser/src/lexer/sql/mod.rs +++ b/vendored/sqlite3-parser/src/lexer/sql/mod.rs @@ -172,7 +172,7 @@ macro_rules! try_with_position { Ok(val) => val, Err(err) => { let mut err = Error::from(err); - err.position($scanner.line(), $scanner.column()); + err.position($scanner.line(), $scanner.column(), $scanner.offset() - 1); return Err(err); } } @@ -610,13 +610,28 @@ fn hex_integer(data: &[u8]) -> Result<(Option>, usize), Error> { if let Some((i, b)) = find_end_of_number(data, 2, u8::is_ascii_hexdigit)? { // Must not be empty (Ox is invalid) if i == 2 || is_identifier_start(b) { - return Err(Error::MalformedHexInteger(None, None)); + let (len, help) = if i == 2 && !is_identifier_start(b) { + (i, "Did you forget to add digits after '0x' or '0X'?") + } else { + (i + 1, "There are some invalid digits after '0x' or '0X'") + }; + return Err(Error::MalformedHexInteger( + None, + None, + Some(len), // Length of the malformed hex + Some(help), // Help Message + )); } Ok((Some((&data[..i], TK_INTEGER)), i)) } else { // Must not be empty (Ox is invalid) if data.len() == 2 { - return Err(Error::MalformedHexInteger(None, None)); + return Err(Error::MalformedHexInteger( + None, + None, + Some(2), // Length of the malformed hex + Some("Did you forget to add digits after '0x' or '0X'?"), // Help Message + )); } Ok((Some((data, TK_INTEGER)), data.len())) } From a2ca9e5a4670a867230e55fbf1a589c321806d04 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 11 Apr 2025 10:09:00 -0300 Subject: [PATCH 178/425] better BadNumber --- vendored/sqlite3-parser/src/lexer/sql/error.rs | 11 +++++------ vendored/sqlite3-parser/src/lexer/sql/mod.rs | 15 ++++++++------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/vendored/sqlite3-parser/src/lexer/sql/error.rs b/vendored/sqlite3-parser/src/lexer/sql/error.rs index fb6d6c32e..d94e529a1 100644 --- a/vendored/sqlite3-parser/src/lexer/sql/error.rs +++ b/vendored/sqlite3-parser/src/lexer/sql/error.rs @@ -41,6 +41,7 @@ pub enum Error { BadNumber( Option<(u64, usize)>, #[label("here")] Option, + Option, ), /// Invalid or missing sign after `!` ExpectedEqualsSign( @@ -84,7 +85,7 @@ impl fmt::Display for Error { write!(f, "non-terminated block comment at {:?}", pos.unwrap()) } Self::BadVariableName(pos, _) => write!(f, "bad variable name at {:?}", pos.unwrap()), - Self::BadNumber(pos, _) => write!(f, "bad number at {:?}", pos.unwrap()), + Self::BadNumber(pos, _, _) => write!(f, "bad number at {:?}", pos.unwrap()), Self::ExpectedEqualsSign(pos, _) => write!(f, "expected = sign at {:?}", pos.unwrap()), Self::MalformedBlobLiteral(pos, _) => { write!(f, "malformed blob literal at {:?}", pos.unwrap()) @@ -136,10 +137,6 @@ impl ScanError for Error { *pos = Some((line, column)); *src = Some((offset).into()); } - Self::BadNumber(ref mut pos, ref mut src) => { - *pos = Some((line, column)); - *src = Some((offset).into()); - } Self::ExpectedEqualsSign(ref mut pos, ref mut src) => { *pos = Some((line, column)); *src = Some((offset).into()); @@ -148,7 +145,9 @@ impl ScanError for Error { *pos = Some((line, column)); *src = Some((offset).into()); } - Self::MalformedHexInteger(ref mut pos, ref mut src, len, _) => { + // Exact same handling here + Self::MalformedHexInteger(ref mut pos, ref mut src, len, _) + | Self::BadNumber(ref mut pos, ref mut src, len) => { *pos = Some((line, column)); *src = Some((offset, len.unwrap_or(0)).into()); } diff --git a/vendored/sqlite3-parser/src/lexer/sql/mod.rs b/vendored/sqlite3-parser/src/lexer/sql/mod.rs index ccb05bd01..b65d09863 100644 --- a/vendored/sqlite3-parser/src/lexer/sql/mod.rs +++ b/vendored/sqlite3-parser/src/lexer/sql/mod.rs @@ -596,7 +596,7 @@ fn number(data: &[u8]) -> Result<(Option>, usize), Error> { } else if b == b'e' || b == b'E' { return exponential_part(data, i); } else if is_identifier_start(b) { - return Err(Error::BadNumber(None, None)); + return Err(Error::BadNumber(None, None, Some(i + 1))); } Ok((Some((&data[..i], TK_INTEGER)), i)) } else { @@ -643,7 +643,7 @@ fn fractional_part(data: &[u8], i: usize) -> Result<(Option>, usize), if b == b'e' || b == b'E' { return exponential_part(data, i); } else if is_identifier_start(b) { - return Err(Error::BadNumber(None, None)); + return Err(Error::BadNumber(None, None, Some(i + 1))); } Ok((Some((&data[..i], TK_FLOAT)), i)) } else { @@ -658,17 +658,18 @@ fn exponential_part(data: &[u8], i: usize) -> Result<(Option>, usize), let i = if *b == b'+' || *b == b'-' { i + 1 } else { i }; if let Some((j, b)) = find_end_of_number(data, i + 1, u8::is_ascii_digit)? { if j == i + 1 || is_identifier_start(b) { - return Err(Error::BadNumber(None, None)); + let len = if is_identifier_start(b) { j + 1 } else { j }; + return Err(Error::BadNumber(None, None, Some(len))); } Ok((Some((&data[..j], TK_FLOAT)), j)) } else { if data.len() == i + 1 { - return Err(Error::BadNumber(None, None)); + return Err(Error::BadNumber(None, None, Some(i + 1))); } Ok((Some((data, TK_FLOAT)), data.len())) } } else { - Err(Error::BadNumber(None, None)) + Err(Error::BadNumber(None, None, Some(data.len()))) } } @@ -685,7 +686,7 @@ fn find_end_of_number( { continue; } - return Err(Error::BadNumber(None, None)); + return Err(Error::BadNumber(None, None, Some(j))); } else { return Ok(Some((j, b))); } @@ -739,7 +740,7 @@ mod tests { let mut s = Scanner::new(tokenizer); expect_token(&mut s, input, b"SELECT", TokenType::TK_SELECT)?; let err = s.scan(input).unwrap_err(); - assert!(matches!(err, Error::BadNumber(_, _))); + assert!(matches!(err, Error::BadNumber(_, _, _))); Ok(()) } From 946b59f4ee696f7677b4b142d2131b927e724a6d Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 11 Apr 2025 11:00:06 -0300 Subject: [PATCH 179/425] even better BadNumber --- .../sqlite3-parser/src/lexer/sql/error.rs | 6 +++-- vendored/sqlite3-parser/src/lexer/sql/mod.rs | 26 ++++++++++++++----- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/vendored/sqlite3-parser/src/lexer/sql/error.rs b/vendored/sqlite3-parser/src/lexer/sql/error.rs index d94e529a1..b85dad504 100644 --- a/vendored/sqlite3-parser/src/lexer/sql/error.rs +++ b/vendored/sqlite3-parser/src/lexer/sql/error.rs @@ -38,10 +38,12 @@ pub enum Error { #[label("here")] Option, ), /// Invalid number format + #[diagnostic(help("Invalid digit in `{3}`"))] BadNumber( Option<(u64, usize)>, #[label("here")] Option, Option, + String, // Holds the offending number as a string ), /// Invalid or missing sign after `!` ExpectedEqualsSign( @@ -85,7 +87,7 @@ impl fmt::Display for Error { write!(f, "non-terminated block comment at {:?}", pos.unwrap()) } Self::BadVariableName(pos, _) => write!(f, "bad variable name at {:?}", pos.unwrap()), - Self::BadNumber(pos, _, _) => write!(f, "bad number at {:?}", pos.unwrap()), + Self::BadNumber(pos, _, _, _) => write!(f, "bad number at {:?}", pos.unwrap()), Self::ExpectedEqualsSign(pos, _) => write!(f, "expected = sign at {:?}", pos.unwrap()), Self::MalformedBlobLiteral(pos, _) => { write!(f, "malformed blob literal at {:?}", pos.unwrap()) @@ -147,7 +149,7 @@ impl ScanError for Error { } // Exact same handling here Self::MalformedHexInteger(ref mut pos, ref mut src, len, _) - | Self::BadNumber(ref mut pos, ref mut src, len) => { + | Self::BadNumber(ref mut pos, ref mut src, len, _) => { *pos = Some((line, column)); *src = Some((offset, len.unwrap_or(0)).into()); } diff --git a/vendored/sqlite3-parser/src/lexer/sql/mod.rs b/vendored/sqlite3-parser/src/lexer/sql/mod.rs index b65d09863..84f59eaeb 100644 --- a/vendored/sqlite3-parser/src/lexer/sql/mod.rs +++ b/vendored/sqlite3-parser/src/lexer/sql/mod.rs @@ -596,7 +596,9 @@ fn number(data: &[u8]) -> Result<(Option>, usize), Error> { } else if b == b'e' || b == b'E' { return exponential_part(data, i); } else if is_identifier_start(b) { - return Err(Error::BadNumber(None, None, Some(i + 1))); + return Err(Error::BadNumber(None, None, Some(i + 1), unsafe { + String::from_utf8_unchecked(data[..i + 1].to_vec()) + })); } Ok((Some((&data[..i], TK_INTEGER)), i)) } else { @@ -643,7 +645,9 @@ fn fractional_part(data: &[u8], i: usize) -> Result<(Option>, usize), if b == b'e' || b == b'E' { return exponential_part(data, i); } else if is_identifier_start(b) { - return Err(Error::BadNumber(None, None, Some(i + 1))); + return Err(Error::BadNumber(None, None, Some(i + 1), unsafe { + String::from_utf8_unchecked(data[..i + 1].to_vec()) + })); } Ok((Some((&data[..i], TK_FLOAT)), i)) } else { @@ -659,17 +663,23 @@ fn exponential_part(data: &[u8], i: usize) -> Result<(Option>, usize), if let Some((j, b)) = find_end_of_number(data, i + 1, u8::is_ascii_digit)? { if j == i + 1 || is_identifier_start(b) { let len = if is_identifier_start(b) { j + 1 } else { j }; - return Err(Error::BadNumber(None, None, Some(len))); + return Err(Error::BadNumber(None, None, Some(len), unsafe { + String::from_utf8_unchecked(data[..len].to_vec()) + })); } Ok((Some((&data[..j], TK_FLOAT)), j)) } else { if data.len() == i + 1 { - return Err(Error::BadNumber(None, None, Some(i + 1))); + return Err(Error::BadNumber(None, None, Some(i + 1), unsafe { + String::from_utf8_unchecked(data[..i + 1].to_vec()) + })); } Ok((Some((data, TK_FLOAT)), data.len())) } } else { - Err(Error::BadNumber(None, None, Some(data.len()))) + Err(Error::BadNumber(None, None, Some(data.len()), unsafe { + String::from_utf8_unchecked(data.to_vec()) + })) } } @@ -686,7 +696,9 @@ fn find_end_of_number( { continue; } - return Err(Error::BadNumber(None, None, Some(j))); + return Err(Error::BadNumber(None, None, Some(j), unsafe { + String::from_utf8_unchecked(data[..j].to_vec()) + })); } else { return Ok(Some((j, b))); } @@ -740,7 +752,7 @@ mod tests { let mut s = Scanner::new(tokenizer); expect_token(&mut s, input, b"SELECT", TokenType::TK_SELECT)?; let err = s.scan(input).unwrap_err(); - assert!(matches!(err, Error::BadNumber(_, _, _))); + assert!(matches!(err, Error::BadNumber(_, _, _, _))); Ok(()) } From 6bea4de30fd37f1cf8431c98b3aa8c5221cd2202 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Fri, 11 Apr 2025 17:22:46 +0300 Subject: [PATCH 180/425] Check that index seek key members are not null --- core/translate/main_loop.rs | 20 +++++++---- core/translate/optimizer.rs | 68 +++++++++++++++++++++++++++++++++++++ testing/where.test | 4 +++ 3 files changed, 85 insertions(+), 7 deletions(-) diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 8409e31c9..6521b9c24 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -15,6 +15,7 @@ use super::{ emitter::{OperationMode, TranslateCtx}, expr::{translate_condition_expr, translate_expr, ConditionMetadata}, group_by::is_column_in_group_by, + optimizer::Optimizable, order_by::{order_by_sorter_insert, sorter_insert}, plan::{ IterationDirection, Operation, Search, SeekDef, SelectPlan, SelectQueryType, @@ -902,13 +903,18 @@ fn emit_seek( }); } } else { - translate_expr( - program, - Some(tables), - &seek_def.key[i], - reg, - &t_ctx.resolver, - )?; + let expr = &seek_def.key[i]; + translate_expr(program, Some(tables), &expr, reg, &t_ctx.resolver)?; + // If the seek key column is not verifiably non-NULL, we need check whether it is NULL, + // and if so, jump to the loop end. + // This is to avoid returning rows for e.g. SELECT * FROM t WHERE t.x > NULL, + // which would erroneously return all rows from t, as NULL is lower than any non-NULL value in index key comparisons. + if !expr.is_nonnull() { + program.emit_insn(Insn::IsNull { + reg, + target_pc: loop_end, + }); + } } } let num_regs = if seek_def.null_pad_unset_cols() { diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 609acd906..49543fd6b 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -471,6 +471,7 @@ pub trait Optimizable { .map_or(false, |c| c == ConstantPredicate::AlwaysFalse)) } fn is_rowid_alias_of(&self, table_index: usize) -> bool; + fn is_nonnull(&self) -> bool; } impl Optimizable for ast::Expr { @@ -484,6 +485,73 @@ impl Optimizable for ast::Expr { _ => false, } } + /// Returns true if the expressions is (verifiably) non-NULL. + /// It might still be non-NULL even if we return false; we just + /// weren't able to prove it. + /// This function is currently very conservative, and will return false + /// for any expression where we aren't sure and didn't bother to find out + /// by writing more complex code. + fn is_nonnull(&self) -> bool { + match self { + Expr::Between { + lhs, start, end, .. + } => lhs.is_nonnull() && start.is_nonnull() && end.is_nonnull(), + Expr::Binary(expr, _, expr1) => expr.is_nonnull() && expr1.is_nonnull(), + Expr::Case { + base, + when_then_pairs, + else_expr, + .. + } => { + base.as_ref().map_or(true, |base| base.is_nonnull()) + && when_then_pairs.iter().all(|(_, then)| then.is_nonnull()) + && else_expr + .as_ref() + .map_or(true, |else_expr| else_expr.is_nonnull()) + } + Expr::Cast { expr, .. } => expr.is_nonnull(), + Expr::Collate(expr, _) => expr.is_nonnull(), + Expr::DoublyQualified(..) => { + panic!("Do not call is_nonnull before DoublyQualified has been rewritten as Column") + } + Expr::Exists(..) => false, + Expr::FunctionCall { .. } => false, + Expr::FunctionCallStar { .. } => false, + Expr::Id(..) => panic!("Do not call is_nonnull before Id has been rewritten as Column"), + Expr::Column { is_rowid_alias, .. } => *is_rowid_alias, + Expr::RowId { .. } => true, + Expr::InList { lhs, rhs, .. } => { + lhs.is_nonnull() + && rhs + .as_ref() + .map_or(true, |rhs| rhs.iter().all(|rhs| rhs.is_nonnull())) + } + Expr::InSelect { .. } => false, + Expr::InTable { .. } => false, + Expr::IsNull(..) => true, + Expr::Like { lhs, rhs, .. } => lhs.is_nonnull() && rhs.is_nonnull(), + Expr::Literal(literal) => match literal { + ast::Literal::Numeric(_) => true, + ast::Literal::String(_) => true, + ast::Literal::Blob(_) => true, + ast::Literal::Keyword(_) => true, + ast::Literal::Null => false, + ast::Literal::CurrentDate => true, + ast::Literal::CurrentTime => true, + ast::Literal::CurrentTimestamp => true, + }, + Expr::Name(..) => false, + Expr::NotNull(..) => true, + Expr::Parenthesized(exprs) => exprs.iter().all(|expr| expr.is_nonnull()), + Expr::Qualified(..) => { + panic!("Do not call is_nonnull before Qualified has been rewritten as Column") + } + Expr::Raise(..) => false, + Expr::Subquery(..) => false, + Expr::Unary(_, expr) => expr.is_nonnull(), + Expr::Variable(..) => false, + } + } fn check_constant(&self) -> Result> { match self { Self::Literal(lit) => match lit { diff --git a/testing/where.test b/testing/where.test index a5bdc91e8..a149e85f2 100755 --- a/testing/where.test +++ b/testing/where.test @@ -572,3 +572,7 @@ do_execsql_test where-constant-condition-no-tables { do_execsql_test where-constant-condition-no-tables-2 { select 1 where 1 IS NOT NULL; } {1} + +do_execsql_test where-null-comparison-index-seek-regression-test { + select age from users where age > NULL; +} {} \ No newline at end of file From d4707fe3916297ff8219a50614d4695ce9bba834 Mon Sep 17 00:00:00 2001 From: alpaylan Date: Fri, 11 Apr 2025 11:23:03 -0400 Subject: [PATCH 181/425] add non-zero exit code in case of failures, remove the interactive initalization option in bug base for now, fix bugs in differential mode, add detailed information regarding runs to the bug base --- Cargo.lock | 6 +- simulator/Cargo.toml | 1 + simulator/generation/plan.rs | 23 ++- simulator/generation/property.rs | 40 +++--- simulator/main.rs | 134 ++++++++++++----- simulator/runner/bugbase.rs | 222 ++++++++++++++++++++++------ simulator/runner/cli.rs | 3 +- simulator/runner/differential.rs | 238 +++++++++++++++++-------------- simulator/runner/env.rs | 83 +++++++---- simulator/runner/execution.rs | 9 +- simulator/runner/watch.rs | 3 +- 11 files changed, 517 insertions(+), 245 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 781c3c7b5..eb7943c70 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -344,6 +344,7 @@ dependencies = [ "iana-time-zone", "js-sys", "num-traits", + "serde", "wasm-bindgen", "windows-link", ] @@ -1866,6 +1867,7 @@ name = "limbo_sim" version = "0.0.19-pre.4" dependencies = [ "anarchist-readable-name-generator-lib", + "chrono", "clap", "dirs 6.0.0", "env_logger 0.10.2", @@ -2292,9 +2294,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.21.1" +version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d75b0bedcc4fe52caa0e03d9f1151a323e4aa5e2d78ba3580400cd3c9e2bc4bc" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "onig" diff --git a/simulator/Cargo.toml b/simulator/Cargo.toml index 285604094..991f10866 100644 --- a/simulator/Cargo.toml +++ b/simulator/Cargo.toml @@ -31,3 +31,4 @@ serde_json = { version = "1.0" } notify = "8.0.0" rusqlite = { version = "0.34", features = ["bundled"] } dirs = "6.0.0" +chrono = { version = "0.4.40", features = ["serde"] } diff --git a/simulator/generation/plan.rs b/simulator/generation/plan.rs index 4f4900b34..da2dd3c78 100644 --- a/simulator/generation/plan.rs +++ b/simulator/generation/plan.rs @@ -11,7 +11,7 @@ use crate::{ }, table::Value, }, - runner::env::{SimConnection, SimulatorEnvTrait}, + runner::env::SimConnection, SimulatorEnv, }; @@ -238,7 +238,7 @@ impl Display for Interaction { } } -type AssertionFunc = dyn Fn(&Vec, &dyn SimulatorEnvTrait) -> Result; +type AssertionFunc = dyn Fn(&Vec, &SimulatorEnv) -> Result; enum AssertionAST { Pick(), @@ -523,7 +523,7 @@ impl Interaction { pub(crate) fn execute_assertion( &self, stack: &Vec, - env: &impl SimulatorEnvTrait, + env: &SimulatorEnv, ) -> Result<()> { match self { Self::Query(_) => { @@ -554,7 +554,7 @@ impl Interaction { pub(crate) fn execute_assumption( &self, stack: &Vec, - env: &dyn SimulatorEnvTrait, + env: &SimulatorEnv, ) -> Result<()> { match self { Self::Query(_) => { @@ -596,15 +596,12 @@ impl Interaction { Self::Fault(fault) => { match fault { Fault::Disconnect => { - match env.connections[conn_index] { - SimConnection::Connected(ref mut conn) => { - conn.close()?; - } - SimConnection::Disconnected => { - return Err(limbo_core::LimboError::InternalError( - "Tried to disconnect a disconnected connection".to_string(), - )); - } + if env.connections[conn_index].is_connected() { + env.connections[conn_index].disconnect(); + } else { + return Err(limbo_core::LimboError::InternalError( + "connection already disconnected".into(), + )); } env.connections[conn_index] = SimConnection::Disconnected; } diff --git a/simulator/generation/property.rs b/simulator/generation/property.rs index d73f17f96..a876a833d 100644 --- a/simulator/generation/property.rs +++ b/simulator/generation/property.rs @@ -9,7 +9,7 @@ use crate::{ }, table::Value, }, - runner::env::{SimulatorEnv, SimulatorEnvTrait}, + runner::env::SimulatorEnv, }; use super::{ @@ -170,8 +170,8 @@ impl Property { message: format!("table {} exists", insert.table()), func: Box::new({ let table_name = table.clone(); - move |_: &Vec, env: &dyn SimulatorEnvTrait| { - Ok(env.tables().iter().any(|t| t.name == table_name)) + move |_: &Vec, env: &SimulatorEnv| { + Ok(env.tables.iter().any(|t| t.name == table_name)) } }), }); @@ -182,7 +182,7 @@ impl Property { row.iter().map(|v| v.to_string()).collect::>(), insert.table(), ), - func: Box::new(move |stack: &Vec, _: &dyn SimulatorEnvTrait| { + func: Box::new(move |stack: &Vec, _: &SimulatorEnv| { let rows = stack.last().unwrap(); match rows { Ok(rows) => Ok(rows.iter().any(|r| r == &row)), @@ -206,8 +206,8 @@ impl Property { let assumption = Interaction::Assumption(Assertion { message: "Double-Create-Failure should not be called on an existing table" .to_string(), - func: Box::new(move |_: &Vec, env: &dyn SimulatorEnvTrait| { - Ok(!env.tables().iter().any(|t| t.name == table_name)) + func: Box::new(move |_: &Vec, env: &SimulatorEnv| { + Ok(!env.tables.iter().any(|t| t.name == table_name)) }), }); @@ -220,11 +220,11 @@ impl Property { message: "creating two tables with the name should result in a failure for the second query" .to_string(), - func: Box::new(move |stack: &Vec, _: &dyn SimulatorEnvTrait| { + func: Box::new(move |stack: &Vec, _: &SimulatorEnv| { let last = stack.last().unwrap(); match last { Ok(_) => Ok(false), - Err(e) => Ok(e.to_string().contains(&format!("Table {table_name} already exists"))), + Err(e) => Ok(e.to_string().to_lowercase().contains(&format!("table {table_name} already exists"))), } }), }); @@ -245,8 +245,8 @@ impl Property { message: format!("table {} exists", table_name), func: Box::new({ let table_name = table_name.clone(); - move |_: &Vec, env: &dyn SimulatorEnvTrait| { - Ok(env.tables().iter().any(|t| t.name == table_name)) + move |_: &Vec, env: &SimulatorEnv| { + Ok(env.tables.iter().any(|t| t.name == table_name)) } }), }); @@ -257,7 +257,7 @@ impl Property { let assertion = Interaction::Assertion(Assertion { message: "select query should respect the limit clause".to_string(), - func: Box::new(move |stack: &Vec, _: &dyn SimulatorEnvTrait| { + func: Box::new(move |stack: &Vec, _: &SimulatorEnv| { let last = stack.last().unwrap(); match last { Ok(rows) => Ok(limit >= rows.len()), @@ -281,8 +281,8 @@ impl Property { message: format!("table {} exists", table), func: Box::new({ let table = table.clone(); - move |_: &Vec, env: &dyn SimulatorEnvTrait| { - Ok(env.tables().iter().any(|t| t.name == table)) + move |_: &Vec, env: &SimulatorEnv| { + Ok(env.tables.iter().any(|t| t.name == table)) } }), }); @@ -292,7 +292,7 @@ impl Property { "select '{}' should return no values for table '{}'", predicate, table, ), - func: Box::new(move |stack: &Vec, _: &dyn SimulatorEnvTrait| { + func: Box::new(move |stack: &Vec, _: &SimulatorEnv| { let rows = stack.last().unwrap(); match rows { Ok(rows) => Ok(rows.is_empty()), @@ -332,8 +332,8 @@ impl Property { message: format!("table {} exists", table), func: Box::new({ let table = table.clone(); - move |_: &Vec, env: &dyn SimulatorEnvTrait| { - Ok(env.tables().iter().any(|t| t.name == table)) + move |_: &Vec, env: &SimulatorEnv| { + Ok(env.tables.iter().any(|t| t.name == table)) } }), }); @@ -345,7 +345,7 @@ impl Property { "select query should result in an error for table '{}'", table ), - func: Box::new(move |stack: &Vec, _: &dyn SimulatorEnvTrait| { + func: Box::new(move |stack: &Vec, _: &SimulatorEnv| { let last = stack.last().unwrap(); match last { Ok(_) => Ok(false), @@ -377,8 +377,8 @@ impl Property { message: format!("table {} exists", table), func: Box::new({ let table = table.clone(); - move |_: &Vec, env: &dyn SimulatorEnvTrait| { - Ok(env.tables().iter().any(|t| t.name == table)) + move |_: &Vec, env: &SimulatorEnv| { + Ok(env.tables.iter().any(|t| t.name == table)) } }), }); @@ -401,7 +401,7 @@ impl Property { let assertion = Interaction::Assertion(Assertion { message: "select queries should return the same amount of results".to_string(), - func: Box::new(move |stack: &Vec, _: &dyn SimulatorEnvTrait| { + func: Box::new(move |stack: &Vec, _: &SimulatorEnv| { let select_star = stack.last().unwrap(); let select_predicate = stack.get(stack.len() - 2).unwrap(); match (select_predicate, select_star) { diff --git a/simulator/main.rs b/simulator/main.rs index ef22853f4..9a0345be7 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -5,7 +5,7 @@ use generation::ArbitraryFrom; use notify::event::{DataChange, ModifyKind}; use notify::{EventKind, RecursiveMode, Watcher}; use rand::prelude::*; -use runner::bugbase::{Bug, BugBase}; +use runner::bugbase::{Bug, BugBase, LoadedBug}; use runner::cli::SimulatorCLI; use runner::env::SimulatorEnv; use runner::execution::{execute_plans, Execution, ExecutionHistory, ExecutionResult}; @@ -28,6 +28,7 @@ struct Paths { history: PathBuf, doublecheck_db: PathBuf, shrunk_db: PathBuf, + diff_db: PathBuf, } impl Paths { @@ -40,6 +41,7 @@ impl Paths { history: PathBuf::from(output_dir).join("history.txt"), doublecheck_db: PathBuf::from(output_dir).join("double.db"), shrunk_db: PathBuf::from(output_dir).join("shrunk.db"), + diff_db: PathBuf::from(output_dir).join("diff.db"), } } } @@ -52,7 +54,6 @@ fn main() -> Result<(), String> { let mut bugbase = BugBase::load().map_err(|e| format!("{:?}", e))?; banner(); - // let paths = Paths::new(&output_dir, cli_opts.doublecheck); let last_execution = Arc::new(Mutex::new(Execution::new(0, 0, 0))); let (seed, env, plans) = setup_simulation(&mut bugbase, &cli_opts, |p| &p.plan, |p| &p.db); @@ -66,8 +67,18 @@ fn main() -> Result<(), String> { if cli_opts.watch { watch_mode(seed, &cli_opts, &paths, last_execution.clone()).unwrap(); - } else if cli_opts.differential { - differential_testing(env, plans, last_execution.clone()) + return Ok(()); + } + + let result = if cli_opts.differential { + differential_testing( + seed, + &mut bugbase, + &cli_opts, + &paths, + plans, + last_execution.clone(), + ) } else { run_simulator( seed, @@ -77,13 +88,14 @@ fn main() -> Result<(), String> { env, plans, last_execution.clone(), - ); - } + ) + }; // Print the seed, the locations of the database and the plan file at the end again for easily accessing them. println!("seed: {}", seed); + println!("path: {}", paths.base.display()); - Ok(()) + result } fn watch_mode( @@ -120,7 +132,7 @@ fn watch_mode( i.shadow(&mut env); }); }); - let env = Arc::new(Mutex::new(env.clone())); + let env = Arc::new(Mutex::new(env.clone_without_connections())); watch::run_simulation(env, &mut [plan], last_execution.clone()) }), last_execution.clone(), @@ -133,7 +145,6 @@ fn watch_mode( SandboxedResult::Panicked { error, .. } | SandboxedResult::FoundBug { error, .. } => { log::error!("simulation failed: '{}'", error); - println!("simulation failed: '{}'", error); } } } @@ -153,7 +164,7 @@ fn run_simulator( env: SimulatorEnv, plans: Vec, last_execution: Arc>, -) { +) -> Result<(), String> { std::panic::set_hook(Box::new(move |info| { log::error!("panic occurred"); @@ -181,15 +192,15 @@ fn run_simulator( if cli_opts.doublecheck { let env = SimulatorEnv::new(seed, cli_opts, &paths.doublecheck_db); let env = Arc::new(Mutex::new(env)); - doublecheck(env, paths, &plans, last_execution.clone(), result); + doublecheck(env, paths, &plans, last_execution.clone(), result) } else { // No doublecheck, run shrinking if panicking or found a bug. match &result { SandboxedResult::Correct => { log::info!("simulation succeeded"); println!("simulation succeeded"); - // remove the bugbase entry - bugbase.remove_bug(seed).unwrap(); + bugbase.mark_successful_run(seed, cli_opts).unwrap(); + Ok(()) } SandboxedResult::Panicked { error, @@ -217,8 +228,6 @@ fn run_simulator( } log::error!("simulation failed: '{}'", error); - println!("simulation failed: '{}'", error); - log::info!("Starting to shrink"); let shrunk_plans = plans @@ -260,12 +269,21 @@ fn run_simulator( ) => { if e1 != e2 { log::error!("shrinking failed, the error was not properly reproduced"); - bugbase.add_bug(seed, plans[0].clone()).unwrap(); + bugbase + .add_bug(seed, plans[0].clone(), Some(error.clone()), cli_opts) + .unwrap(); + Err(format!("failed with error: '{}'", error)) } else { - log::info!("shrinking succeeded"); - println!("shrinking succeeded"); + log::info!( + "shrinking succeeded, reduced the plan from {} to {}", + plans[0].plan.len(), + shrunk_plans[0].plan.len() + ); // Save the shrunk database - bugbase.add_bug(seed, shrunk_plans[0].clone()).unwrap(); + bugbase + .add_bug(seed, shrunk_plans[0].clone(), Some(e1.clone()), cli_opts) + .unwrap(); + Err(format!("failed with error: '{}'", e1)) } } (_, SandboxedResult::Correct) => { @@ -273,7 +291,10 @@ fn run_simulator( } _ => { log::error!("shrinking failed, the error was not properly reproduced"); - bugbase.add_bug(seed, plans[0].clone()).unwrap(); + bugbase + .add_bug(seed, plans[0].clone(), Some(error.clone()), cli_opts) + .unwrap(); + Err(format!("failed with error: '{}'", error)) } } } @@ -287,7 +308,7 @@ fn doublecheck( plans: &[InteractionPlan], last_execution: Arc>, result: SandboxedResult, -) { +) -> Result<(), String> { // Run the simulation again let result2 = SandboxedResult::from( std::panic::catch_unwind(|| { @@ -299,29 +320,47 @@ fn doublecheck( match (result, result2) { (SandboxedResult::Correct, SandboxedResult::Panicked { .. }) => { log::error!("doublecheck failed! first run succeeded, but second run panicked."); + Err("doublecheck failed! first run succeeded, but second run panicked.".to_string()) } (SandboxedResult::FoundBug { .. }, SandboxedResult::Panicked { .. }) => { log::error!( "doublecheck failed! first run failed an assertion, but second run panicked." ); + Err( + "doublecheck failed! first run failed an assertion, but second run panicked." + .to_string(), + ) } (SandboxedResult::Panicked { .. }, SandboxedResult::Correct) => { log::error!("doublecheck failed! first run panicked, but second run succeeded."); + Err("doublecheck failed! first run panicked, but second run succeeded.".to_string()) } (SandboxedResult::Panicked { .. }, SandboxedResult::FoundBug { .. }) => { log::error!( "doublecheck failed! first run panicked, but second run failed an assertion." ); + Err( + "doublecheck failed! first run panicked, but second run failed an assertion." + .to_string(), + ) } (SandboxedResult::Correct, SandboxedResult::FoundBug { .. }) => { log::error!( "doublecheck failed! first run succeeded, but second run failed an assertion." ); + Err( + "doublecheck failed! first run succeeded, but second run failed an assertion." + .to_string(), + ) } (SandboxedResult::FoundBug { .. }, SandboxedResult::Correct) => { log::error!( "doublecheck failed! first run failed an assertion, but second run succeeded." ); + Err( + "doublecheck failed! first run failed an assertion, but second run succeeded." + .to_string(), + ) } (SandboxedResult::Correct, SandboxedResult::Correct) | (SandboxedResult::FoundBug { .. }, SandboxedResult::FoundBug { .. }) @@ -331,33 +370,62 @@ fn doublecheck( let doublecheck_db_bytes = std::fs::read(&paths.doublecheck_db).unwrap(); if db_bytes != doublecheck_db_bytes { log::error!("doublecheck failed! database files are different."); + log::error!("current: {}", paths.db.display()); + log::error!("doublecheck: {}", paths.doublecheck_db.display()); + Err( + "doublecheck failed! database files are different, check binary diffs for more details.".to_string() + ) } else { log::info!("doublecheck succeeded! database files are the same."); + println!("doublecheck succeeded! database files are the same."); + Ok(()) } } } } fn differential_testing( - env: SimulatorEnv, + seed: u64, + bugbase: &mut BugBase, + cli_opts: &SimulatorCLI, + paths: &Paths, plans: Vec, last_execution: Arc>, -) { - let env = Arc::new(Mutex::new(env)); +) -> Result<(), String> { + let env = Arc::new(Mutex::new(SimulatorEnv::new(seed, cli_opts, &paths.db))); + let rusqlite_env = Arc::new(Mutex::new(SimulatorEnv::new( + seed, + cli_opts, + &paths.diff_db, + ))); + let result = SandboxedResult::from( std::panic::catch_unwind(|| { let plan = plans[0].clone(); - differential::run_simulation(env, &mut [plan], last_execution.clone()) + differential::run_simulation( + env, + rusqlite_env, + &|| rusqlite::Connection::open(paths.diff_db.clone()).unwrap(), + &mut [plan], + last_execution.clone(), + ) }), last_execution.clone(), ); - if let SandboxedResult::Correct = result { - log::info!("simulation succeeded"); - println!("simulation succeeded"); - } else { - log::error!("simulation failed"); - println!("simulation failed"); + match result { + SandboxedResult::Correct => { + log::info!("simulation succeeded, output of Limbo conforms to SQLite"); + println!("simulation succeeded, output of Limbo conforms to SQLite"); + Ok(()) + } + SandboxedResult::Panicked { error, .. } | SandboxedResult::FoundBug { error, .. } => { + log::error!("simulation failed: '{}'", error); + bugbase + .add_bug(seed, plans[0].clone(), Some(error.clone()), cli_opts) + .unwrap(); + Err(format!("simulation failed: '{}'", error)) + } } } @@ -433,12 +501,14 @@ fn setup_simulation( let env = SimulatorEnv::new(bug.seed(), cli_opts, db_path(&paths)); let plan = match bug { - Bug::Loaded { plan, .. } => plan.clone(), + Bug::Loaded(LoadedBug { plan, .. }) => plan.clone(), Bug::Unloaded { seed } => { let seed = *seed; bugbase .load_bug(seed) .unwrap_or_else(|_| panic!("could not load bug '{}' in bug base", seed)) + .plan + .clone() } }; diff --git a/simulator/runner/bugbase.rs b/simulator/runner/bugbase.rs index 83c4273b3..e131304c3 100644 --- a/simulator/runner/bugbase.rs +++ b/simulator/runner/bugbase.rs @@ -3,15 +3,44 @@ use std::{ io::{self, Write}, path::PathBuf, process::Command, + time::SystemTime, }; +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; + use crate::{InteractionPlan, Paths}; +use super::cli::SimulatorCLI; + /// A bug is a run that has been identified as buggy. #[derive(Clone)] pub(crate) enum Bug { Unloaded { seed: u64 }, - Loaded { seed: u64, plan: InteractionPlan }, + Loaded(LoadedBug), +} + +#[derive(Clone)] +pub struct LoadedBug { + /// The seed of the bug. + pub seed: u64, + /// The plan of the bug. + pub plan: InteractionPlan, + /// The runs of the bug. + pub runs: Vec, +} + +#[derive(Clone, Serialize, Deserialize)] +pub(crate) struct BugRun { + /// Commit hash of the current version of Limbo. + hash: String, + /// Timestamp of the run. + #[serde(with = "chrono::serde::ts_seconds")] + timestamp: DateTime, + /// Error message of the run. + error: Option, + /// Options + cli_options: SimulatorCLI, } impl Bug { @@ -27,7 +56,7 @@ impl Bug { pub(crate) fn seed(&self) -> u64 { match self { Bug::Unloaded { seed } => *seed, - Bug::Loaded { seed, .. } => *seed, + Bug::Loaded(LoadedBug { seed, .. }) => *seed, } } } @@ -77,6 +106,36 @@ impl BugBase { .or(Err("should be able to get current directory".to_string()))?, ]; + for path in &potential_paths { + let path = path.join(".bugbase"); + if path.exists() { + return BugBase::new(path); + } + } + + for path in potential_paths { + let path = path.join(".bugbase"); + if std::fs::create_dir_all(&path).is_ok() { + log::info!("bug base created at {}", path.display()); + return BugBase::new(path); + } + } + + Err("failed to create bug base".to_string()) + } + + /// Load the bug base from one of the potential paths. + pub(crate) fn interactive_load() -> Result { + let potential_paths = vec![ + // limbo project directory + BugBase::get_limbo_project_dir()?, + // home directory + dirs::home_dir().ok_or("should be able to get home directory".to_string())?, + // current directory + std::env::current_dir() + .or(Err("should be able to get current directory".to_string()))?, + ]; + for path in potential_paths { let path = path.join(".bugbase"); if path.exists() { @@ -119,14 +178,41 @@ impl BugBase { } /// Add a new bug to the bug base. - pub(crate) fn add_bug(&mut self, seed: u64, plan: InteractionPlan) -> Result<(), String> { + pub(crate) fn add_bug( + &mut self, + seed: u64, + plan: InteractionPlan, + error: Option, + cli_options: &SimulatorCLI, + ) -> Result<(), String> { log::debug!("adding bug with seed {}", seed); - if self.bugs.contains_key(&seed) { - return Err(format!("Bug with hash {} already exists", seed)); + let bug = self.get_bug(seed); + + if bug.is_some() { + let mut bug = self.load_bug(seed)?; + bug.plan = plan.clone(); + bug.runs.push(BugRun { + hash: Self::get_current_commit_hash()?, + timestamp: SystemTime::now().into(), + error, + cli_options: cli_options.clone(), + }); + self.bugs.insert(seed, Bug::Loaded(bug.clone())); + } else { + let bug = LoadedBug { + seed, + plan: plan.clone(), + runs: vec![BugRun { + hash: Self::get_current_commit_hash()?, + timestamp: SystemTime::now().into(), + error, + cli_options: cli_options.clone(), + }], + }; + self.bugs.insert(seed, Bug::Loaded(bug.clone())); } - self.save_bug(seed, &plan)?; - self.bugs.insert(seed, Bug::Loaded { seed, plan }); - Ok(()) + // Save the bug to the bug base. + self.save_bug(seed) } /// Get a bug from the bug base. @@ -135,36 +221,48 @@ impl BugBase { } /// Save a bug to the bug base. - pub(crate) fn save_bug(&self, seed: u64, plan: &InteractionPlan) -> Result<(), String> { - let bug_path = self.path.join(seed.to_string()); - std::fs::create_dir_all(&bug_path) - .or(Err("should be able to create bug directory".to_string()))?; + fn save_bug(&self, seed: u64) -> Result<(), String> { + let bug = self.get_bug(seed); - let seed_path = bug_path.join("seed.txt"); - std::fs::write(&seed_path, seed.to_string()) - .or(Err("should be able to write seed file".to_string()))?; + match bug { + None | Some(Bug::Unloaded { .. }) => { + unreachable!("save should only be called within add_bug"); + } + Some(Bug::Loaded(bug)) => { + let bug_path = self.path.join(seed.to_string()); + std::fs::create_dir_all(&bug_path) + .or(Err("should be able to create bug directory".to_string()))?; - // At some point we might want to save the commit hash of the current - // version of Limbo. - // let commit_hash = Self::get_current_commit_hash()?; - // let commit_hash_path = bug_path.join("commit_hash.txt"); - // std::fs::write(&commit_hash_path, commit_hash) - // .or(Err("should be able to write commit hash file".to_string()))?; + let seed_path = bug_path.join("seed.txt"); + std::fs::write(&seed_path, seed.to_string()) + .or(Err("should be able to write seed file".to_string()))?; - let plan_path = bug_path.join("plan.json"); - std::fs::write( - &plan_path, - serde_json::to_string(plan).or(Err("should be able to serialize plan".to_string()))?, - ) - .or(Err("should be able to write plan file".to_string()))?; + let plan_path = bug_path.join("plan.json"); + std::fs::write( + &plan_path, + serde_json::to_string_pretty(&bug.plan) + .or(Err("should be able to serialize plan".to_string()))?, + ) + .or(Err("should be able to write plan file".to_string()))?; + + let readable_plan_path = bug_path.join("plan.sql"); + std::fs::write(&readable_plan_path, bug.plan.to_string()) + .or(Err("should be able to write readable plan file".to_string()))?; + + let runs_path = bug_path.join("runs.json"); + std::fs::write( + &runs_path, + serde_json::to_string_pretty(&bug.runs) + .or(Err("should be able to serialize runs".to_string()))?, + ) + .or(Err("should be able to write runs file".to_string()))?; + } + } - let readable_plan_path = bug_path.join("plan.sql"); - std::fs::write(&readable_plan_path, plan.to_string()) - .or(Err("should be able to write readable plan file".to_string()))?; Ok(()) } - pub(crate) fn load_bug(&mut self, seed: u64) -> Result { + pub(crate) fn load_bug(&mut self, seed: u64) -> Result { let seed_match = self.bugs.get(&seed); match seed_match { @@ -176,30 +274,60 @@ impl BugBase { let plan: InteractionPlan = serde_json::from_str(&plan) .or(Err("should be able to deserialize plan".to_string()))?; - let bug = Bug::Loaded { + let runs = + std::fs::read_to_string(self.path.join(seed.to_string()).join("runs.json")) + .or(Err("should be able to read runs file".to_string()))?; + let runs: Vec = serde_json::from_str(&runs) + .or(Err("should be able to deserialize runs".to_string()))?; + + let bug = LoadedBug { seed, plan: plan.clone(), + runs, }; - self.bugs.insert(seed, bug); + + self.bugs.insert(seed, Bug::Loaded(bug.clone())); log::debug!("Loaded bug with seed {}", seed); - Ok(plan) + Ok(bug) } - Some(Bug::Loaded { plan, .. }) => { + Some(Bug::Loaded(bug)) => { log::warn!( "Bug with seed {} is already loaded, returning the existing plan", seed ); - Ok(plan.clone()) + Ok(bug.clone()) } } } - pub(crate) fn remove_bug(&mut self, seed: u64) -> Result<(), String> { - self.bugs.remove(&seed); - std::fs::remove_dir_all(self.path.join(seed.to_string())) - .or(Err("should be able to remove bug directory".to_string()))?; + pub(crate) fn mark_successful_run( + &mut self, + seed: u64, + cli_options: &SimulatorCLI, + ) -> Result<(), String> { + let bug = self.get_bug(seed); + match bug { + None => { + log::debug!("removing bug base entry for {}", seed); + std::fs::remove_dir_all(self.path.join(seed.to_string())) + .or(Err("should be able to remove bug directory".to_string()))?; + } + Some(_) => { + let mut bug = self.load_bug(seed)?; + bug.runs.push(BugRun { + hash: Self::get_current_commit_hash()?, + timestamp: SystemTime::now().into(), + error: None, + cli_options: cli_options.clone(), + }); + self.bugs.insert(seed, Bug::Loaded(bug.clone())); + // Save the bug to the bug base. + self.save_bug(seed) + .or(Err("should be able to save bug".to_string()))?; + log::debug!("Updated bug with seed {}", seed); + } + } - log::debug!("Removed bug with seed {}", seed); Ok(()) } } @@ -223,6 +351,18 @@ impl BugBase { } impl BugBase { + pub(crate) fn get_current_commit_hash() -> Result { + let output = Command::new("git") + .args(["rev-parse", "HEAD"]) + .output() + .or(Err("should be able to get the commit hash".to_string()))?; + let commit_hash = String::from_utf8(output.stdout) + .or(Err("commit hash should be valid utf8".to_string()))? + .trim() + .to_string(); + Ok(commit_hash) + } + pub(crate) fn get_limbo_project_dir() -> Result { Ok(PathBuf::from( String::from_utf8( diff --git a/simulator/runner/cli.rs b/simulator/runner/cli.rs index b07b89d47..5933be437 100644 --- a/simulator/runner/cli.rs +++ b/simulator/runner/cli.rs @@ -1,6 +1,7 @@ use clap::{command, Parser}; +use serde::{Deserialize, Serialize}; -#[derive(Parser)] +#[derive(Parser, Clone, Serialize, Deserialize)] #[command(name = "limbo-simulator")] #[command(author, version, about, long_about = None)] pub struct SimulatorCLI { diff --git a/simulator/runner/differential.rs b/simulator/runner/differential.rs index 1242c5307..0db6b8ecd 100644 --- a/simulator/runner/differential.rs +++ b/simulator/runner/differential.rs @@ -5,54 +5,20 @@ use crate::{ pick_index, plan::{Interaction, InteractionPlanState, ResultSet}, }, - model::{ - query::Query, - table::{Table, Value}, - }, + model::{query::Query, table::Value}, runner::execution::ExecutionContinuation, InteractionPlan, }; use super::{ - env::{ConnectionTrait, SimConnection, SimulatorEnv, SimulatorEnvTrait}, + env::{SimConnection, SimulatorEnv}, execution::{execute_interaction, Execution, ExecutionHistory, ExecutionResult}, }; -pub(crate) struct SimulatorEnvRusqlite { - pub(crate) tables: Vec
, - pub(crate) connections: Vec, -} - -pub(crate) enum RusqliteConnection { - Connected(rusqlite::Connection), - Disconnected, -} - -impl ConnectionTrait for RusqliteConnection { - fn is_connected(&self) -> bool { - match self { - RusqliteConnection::Connected(_) => true, - RusqliteConnection::Disconnected => false, - } - } - - fn disconnect(&mut self) { - *self = RusqliteConnection::Disconnected; - } -} - -impl SimulatorEnvTrait for SimulatorEnvRusqlite { - fn tables(&self) -> &Vec
{ - &self.tables - } - - fn tables_mut(&mut self) -> &mut Vec
{ - &mut self.tables - } -} - pub(crate) fn run_simulation( env: Arc>, + rusqlite_env: Arc>, + rusqlite_conn: &dyn Fn() -> rusqlite::Connection, plans: &mut [InteractionPlan], last_execution: Arc>, ) -> ExecutionResult { @@ -66,14 +32,7 @@ pub(crate) fn run_simulation( secondary_pointer: 0, }) .collect::>(); - let env = env.lock().unwrap(); - let rusqlite_env = SimulatorEnvRusqlite { - tables: env.tables.clone(), - connections: (0..env.connections.len()) - .map(|_| RusqliteConnection::Connected(rusqlite::Connection::open_in_memory().unwrap())) - .collect::>(), - }; let mut rusqlite_states = plans .iter() .map(|_| InteractionPlanState { @@ -84,16 +43,15 @@ pub(crate) fn run_simulation( .collect::>(); let result = execute_plans( - Arc::new(Mutex::new(env.clone())), + env, rusqlite_env, + rusqlite_conn, plans, &mut states, &mut rusqlite_states, last_execution, ); - env.io.print_stats(); - log::info!("Simulation completed"); result @@ -148,7 +106,8 @@ fn execute_query_rusqlite( pub(crate) fn execute_plans( env: Arc>, - mut rusqlite_env: SimulatorEnvRusqlite, + rusqlite_env: Arc>, + rusqlite_conn: &dyn Fn() -> rusqlite::Connection, plans: &mut [InteractionPlan], states: &mut [InteractionPlanState], rusqlite_states: &mut [InteractionPlanState], @@ -158,6 +117,8 @@ pub(crate) fn execute_plans( let now = std::time::Instant::now(); let mut env = env.lock().unwrap(); + let mut rusqlite_env = rusqlite_env.lock().unwrap(); + for _tick in 0..env.opts.ticks { // Pick the connection to interact with let connection_index = pick_index(env.connections.len(), &mut env.rng); @@ -176,6 +137,7 @@ pub(crate) fn execute_plans( match execute_plan( &mut env, &mut rusqlite_env, + rusqlite_conn, connection_index, plans, states, @@ -202,13 +164,15 @@ pub(crate) fn execute_plans( fn execute_plan( env: &mut SimulatorEnv, - rusqlite_env: &mut SimulatorEnvRusqlite, + rusqlite_env: &mut SimulatorEnv, + rusqlite_conn: &dyn Fn() -> rusqlite::Connection, connection_index: usize, plans: &mut [InteractionPlan], states: &mut [InteractionPlanState], rusqlite_states: &mut [InteractionPlanState], ) -> limbo_core::Result<()> { let connection = &env.connections[connection_index]; + let rusqlite_connection = &rusqlite_env.connections[connection_index]; let plan = &mut plans[connection_index]; let state = &mut states[connection_index]; let rusqlite_state = &mut rusqlite_states[connection_index]; @@ -218,83 +182,141 @@ fn execute_plan( let interaction = &plan.plan[state.interaction_pointer].interactions()[state.secondary_pointer]; - if let SimConnection::Disconnected = connection { - log::debug!("connecting {}", connection_index); - env.connections[connection_index] = SimConnection::Connected(env.db.connect().unwrap()); - } else { - let limbo_result = - execute_interaction(env, connection_index, interaction, &mut state.stack); - let ruqlite_result = execute_interaction_rusqlite( - rusqlite_env, - connection_index, - interaction, - &mut rusqlite_state.stack, - ); + match (connection, rusqlite_connection) { + (SimConnection::Disconnected, SimConnection::Disconnected) => { + log::debug!("connecting {}", connection_index); + env.connections[connection_index] = + SimConnection::LimboConnection(env.db.connect().unwrap()); + rusqlite_env.connections[connection_index] = + SimConnection::SQLiteConnection(rusqlite_conn()); + } + (SimConnection::LimboConnection(_), SimConnection::SQLiteConnection(_)) => { + let limbo_result = + execute_interaction(env, connection_index, interaction, &mut state.stack); + let ruqlite_result = execute_interaction_rusqlite( + rusqlite_env, + connection_index, + interaction, + &mut rusqlite_state.stack, + ); + match (limbo_result, ruqlite_result) { + (Ok(next_execution), Ok(next_execution_rusqlite)) => { + if next_execution != next_execution_rusqlite { + log::error!("limbo and rusqlite results do not match"); + return Err(limbo_core::LimboError::InternalError( + "limbo and rusqlite results do not match".into(), + )); + } - match (limbo_result, ruqlite_result) { - (Ok(next_execution), Ok(next_execution_rusqlite)) => { - if next_execution != next_execution_rusqlite { - log::error!("limbo and rusqlite results do not match"); - return Err(limbo_core::LimboError::InternalError( - "limbo and rusqlite results do not match".into(), - )); - } - log::debug!("connection {} processed", connection_index); - // Move to the next interaction or property - match next_execution { - ExecutionContinuation::NextInteraction => { - if state.secondary_pointer + 1 - >= plan.plan[state.interaction_pointer].interactions().len() - { - // If we have reached the end of the interactions for this property, move to the next property - state.interaction_pointer += 1; - state.secondary_pointer = 0; - } else { - // Otherwise, move to the next interaction - state.secondary_pointer += 1; + let limbo_values = state.stack.last(); + let rusqlite_values = rusqlite_state.stack.last(); + match (limbo_values, rusqlite_values) { + (Some(limbo_values), Some(rusqlite_values)) => { + match (limbo_values, rusqlite_values) { + (Ok(limbo_values), Ok(rusqlite_values)) => { + if limbo_values != rusqlite_values { + log::error!("limbo and rusqlite results do not match"); + return Err(limbo_core::LimboError::InternalError( + "limbo and rusqlite results do not match".into(), + )); + } + } + (Err(limbo_err), Err(rusqlite_err)) => { + log::warn!( + "limbo and rusqlite both fail, requires manual check" + ); + log::warn!("limbo error {}", limbo_err); + log::warn!("rusqlite error {}", rusqlite_err); + } + (Ok(limbo_result), Err(rusqlite_err)) => { + log::error!("limbo and rusqlite results do not match"); + log::error!("limbo values {:?}", limbo_result); + log::error!("rusqlite error {}", rusqlite_err); + return Err(limbo_core::LimboError::InternalError( + "limbo and rusqlite results do not match".into(), + )); + } + (Err(limbo_err), Ok(_)) => { + log::error!("limbo and rusqlite results do not match"); + log::error!("limbo error {}", limbo_err); + return Err(limbo_core::LimboError::InternalError( + "limbo and rusqlite results do not match".into(), + )); + } + } + } + (None, None) => {} + _ => { + log::error!("limbo and rusqlite results do not match"); + return Err(limbo_core::LimboError::InternalError( + "limbo and rusqlite results do not match".into(), + )); } } - ExecutionContinuation::NextProperty => { - // Skip to the next property - state.interaction_pointer += 1; - state.secondary_pointer = 0; + + // Move to the next interaction or property + match next_execution { + ExecutionContinuation::NextInteraction => { + if state.secondary_pointer + 1 + >= plan.plan[state.interaction_pointer].interactions().len() + { + // If we have reached the end of the interactions for this property, move to the next property + state.interaction_pointer += 1; + state.secondary_pointer = 0; + } else { + // Otherwise, move to the next interaction + state.secondary_pointer += 1; + } + } + ExecutionContinuation::NextProperty => { + // Skip to the next property + state.interaction_pointer += 1; + state.secondary_pointer = 0; + } } } - } - (Err(err), Ok(_)) => { - log::error!("limbo and rusqlite results do not match"); - log::error!("limbo error {}", err); - return Err(err); - } - (Ok(_), Err(err)) => { - log::error!("limbo and rusqlite results do not match"); - log::error!("rusqlite error {}", err); - return Err(err); - } - (Err(err), Err(err_rusqlite)) => { - log::error!("limbo and rusqlite both fail, requires manual check"); - log::error!("limbo error {}", err); - log::error!("rusqlite error {}", err_rusqlite); - return Err(err); + (Err(err), Ok(_)) => { + log::error!("limbo and rusqlite results do not match"); + log::error!("limbo error {}", err); + return Err(err); + } + (Ok(val), Err(err)) => { + log::error!("limbo and rusqlite results do not match"); + log::error!("limbo {:?}", val); + log::error!("rusqlite error {}", err); + return Err(err); + } + (Err(err), Err(err_rusqlite)) => { + log::error!("limbo and rusqlite both fail, requires manual check"); + log::error!("limbo error {}", err); + log::error!("rusqlite error {}", err_rusqlite); + return Err(err); + } } } + _ => unreachable!("{} vs {}", connection, rusqlite_connection), } Ok(()) } fn execute_interaction_rusqlite( - env: &mut SimulatorEnvRusqlite, + env: &mut SimulatorEnv, connection_index: usize, interaction: &Interaction, stack: &mut Vec, ) -> limbo_core::Result { - log::info!("executing in rusqlite: {}", interaction); + log::trace!( + "execute_interaction_rusqlite(connection_index={}, interaction={})", + connection_index, + interaction + ); match interaction { Interaction::Query(query) => { let conn = match &mut env.connections[connection_index] { - RusqliteConnection::Connected(conn) => conn, - RusqliteConnection::Disconnected => unreachable!(), + SimConnection::SQLiteConnection(conn) => conn, + SimConnection::LimboConnection(_) => unreachable!(), + SimConnection::Disconnected => unreachable!(), }; log::debug!("{}", interaction); @@ -318,7 +340,7 @@ fn execute_interaction_rusqlite( } } Interaction::Fault(_) => { - log::debug!("faults are not supported in differential testing mode"); + interaction.execute_fault(env, connection_index)?; } } diff --git a/simulator/runner/env.rs b/simulator/runner/env.rs index 19233fc4a..742502b7b 100644 --- a/simulator/runner/env.rs +++ b/simulator/runner/env.rs @@ -1,8 +1,10 @@ +use std::fmt::Display; +use std::mem; use std::path::Path; use std::rc::Rc; use std::sync::Arc; -use limbo_core::{Connection, Database}; +use limbo_core::Database; use rand::{Rng, SeedableRng}; use rand_chacha::ChaCha8Rng; @@ -12,12 +14,6 @@ use crate::runner::io::SimulatorIO; use super::cli::SimulatorCLI; -pub trait SimulatorEnvTrait { - fn tables(&self) -> &Vec
; - fn tables_mut(&mut self) -> &mut Vec
; -} - -#[derive(Clone)] pub(crate) struct SimulatorEnv { pub(crate) opts: SimulatorOpts, pub(crate) tables: Vec
, @@ -27,13 +23,18 @@ pub(crate) struct SimulatorEnv { pub(crate) rng: ChaCha8Rng, } -impl SimulatorEnvTrait for SimulatorEnv { - fn tables(&self) -> &Vec
{ - &self.tables - } - - fn tables_mut(&mut self) -> &mut Vec
{ - &mut self.tables +impl SimulatorEnv { + pub(crate) fn clone_without_connections(&self) -> Self { + SimulatorEnv { + opts: self.opts.clone(), + tables: self.tables.clone(), + connections: (0..self.connections.len()) + .map(|_| SimConnection::Disconnected) + .collect(), + io: self.io.clone(), + db: self.db.clone(), + rng: self.rng.clone(), + } } } @@ -85,7 +86,11 @@ impl SimulatorEnv { // Remove existing database file if it exists if db_path.exists() { std::fs::remove_file(db_path).unwrap(); - std::fs::remove_file(db_path.with_extension("db-wal")).unwrap(); + } + + let wal_path = db_path.with_extension("db-wal"); + if wal_path.exists() { + std::fs::remove_file(wal_path).unwrap(); } let db = match Database::open_file(io.clone(), db_path.to_str().unwrap(), false) { @@ -95,7 +100,9 @@ impl SimulatorEnv { } }; - let connections = vec![SimConnection::Disconnected; opts.max_connections]; + let connections = (0..opts.max_connections) + .map(|_| SimConnection::Disconnected) + .collect::>(); SimulatorEnv { opts, @@ -108,27 +115,55 @@ impl SimulatorEnv { } } -pub trait ConnectionTrait { +pub trait ConnectionTrait +where + Self: std::marker::Sized + Clone, +{ fn is_connected(&self) -> bool; fn disconnect(&mut self); } -#[derive(Clone)] pub(crate) enum SimConnection { - Connected(Rc), + LimboConnection(Rc), + SQLiteConnection(rusqlite::Connection), Disconnected, } -impl ConnectionTrait for SimConnection { - fn is_connected(&self) -> bool { +impl SimConnection { + pub(crate) fn is_connected(&self) -> bool { match self { - SimConnection::Connected(_) => true, + SimConnection::LimboConnection(_) | SimConnection::SQLiteConnection(_) => true, SimConnection::Disconnected => false, } } + pub(crate) fn disconnect(&mut self) { + let conn = mem::replace(self, SimConnection::Disconnected); - fn disconnect(&mut self) { - *self = SimConnection::Disconnected; + match conn { + SimConnection::LimboConnection(conn) => { + conn.close().unwrap(); + } + SimConnection::SQLiteConnection(conn) => { + conn.close().unwrap(); + } + SimConnection::Disconnected => {} + } + } +} + +impl Display for SimConnection { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + SimConnection::LimboConnection(_) => { + write!(f, "LimboConnection") + } + SimConnection::SQLiteConnection(_) => { + write!(f, "SQLiteConnection") + } + SimConnection::Disconnected => { + write!(f, "Disconnected") + } + } } } diff --git a/simulator/runner/execution.rs b/simulator/runner/execution.rs index 757d9f3ce..7b8204604 100644 --- a/simulator/runner/execution.rs +++ b/simulator/runner/execution.rs @@ -122,8 +122,10 @@ fn execute_plan( if let SimConnection::Disconnected = connection { log::debug!("connecting {}", connection_index); - env.connections[connection_index] = SimConnection::Connected(env.db.connect().unwrap()); + env.connections[connection_index] = + SimConnection::LimboConnection(env.db.connect().unwrap()); } else { + log::debug!("connection {} already connected", connection_index); match execute_interaction(env, connection_index, interaction, &mut state.stack) { Ok(next_execution) => { interaction.shadow(env); @@ -163,7 +165,7 @@ fn execute_plan( /// `execute_interaction` uses this type in conjunction with a result, where /// the `Err` case indicates a full-stop due to a bug, and the `Ok` case /// indicates the next step in the plan. -#[derive(PartialEq)] +#[derive(PartialEq, Debug)] pub(crate) enum ExecutionContinuation { /// Default continuation, execute the next interaction. NextInteraction, @@ -185,7 +187,8 @@ pub(crate) fn execute_interaction( match interaction { Interaction::Query(_) => { let conn = match &mut env.connections[connection_index] { - SimConnection::Connected(conn) => conn, + SimConnection::LimboConnection(conn) => conn, + SimConnection::SQLiteConnection(_) => unreachable!(), SimConnection::Disconnected => unreachable!(), }; diff --git a/simulator/runner/watch.rs b/simulator/runner/watch.rs index cd0e645b8..cb7648fc3 100644 --- a/simulator/runner/watch.rs +++ b/simulator/runner/watch.rs @@ -98,7 +98,8 @@ fn execute_plan( if let SimConnection::Disconnected = connection { log::debug!("connecting {}", connection_index); - env.connections[connection_index] = SimConnection::Connected(env.db.connect().unwrap()); + env.connections[connection_index] = + SimConnection::LimboConnection(env.db.connect().unwrap()); } else { match execute_interaction(env, connection_index, interaction, &mut state.stack) { Ok(next_execution) => { From 5d85ec0d2ab2366d5b9e1ab174c9d94829c79ec2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 11 Apr 2025 15:57:58 +0000 Subject: [PATCH 182/425] build(deps-dev): bump vite in /bindings/wasm/test-limbo-pkg Bumps [vite](https://github.com/vitejs/vite/tree/HEAD/packages/vite) from 6.0.7 to 6.2.6. - [Release notes](https://github.com/vitejs/vite/releases) - [Changelog](https://github.com/vitejs/vite/blob/v6.2.6/packages/vite/CHANGELOG.md) - [Commits](https://github.com/vitejs/vite/commits/v6.2.6/packages/vite) --- updated-dependencies: - dependency-name: vite dependency-version: 6.2.6 dependency-type: direct:development ... Signed-off-by: dependabot[bot] --- .../wasm/test-limbo-pkg/package-lock.json | 240 +++++++++--------- bindings/wasm/test-limbo-pkg/package.json | 2 +- 2 files changed, 121 insertions(+), 121 deletions(-) diff --git a/bindings/wasm/test-limbo-pkg/package-lock.json b/bindings/wasm/test-limbo-pkg/package-lock.json index 48584c018..4500e3f1c 100644 --- a/bindings/wasm/test-limbo-pkg/package-lock.json +++ b/bindings/wasm/test-limbo-pkg/package-lock.json @@ -6,17 +6,18 @@ "": { "name": "test-limbo", "dependencies": { - "limbo-wasm": "file:../limbo-wasm-0.0.11.tgz" + "limbo-wasm": ".." }, "devDependencies": { - "vite": "^6.0.7", + "vite": "^6.2.6", "vite-plugin-wasm": "^3.4.1" } }, + "..": {}, "node_modules/@esbuild/aix-ppc64": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.24.2.tgz", - "integrity": "sha512-thpVCb/rhxE/BnMLQ7GReQLLN8q9qbHmI55F4489/ByVg2aQaQ6kbcLb6FHkocZzQhxc4gx0sCk0tJkKBFzDhA==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.25.2.tgz", + "integrity": "sha512-wCIboOL2yXZym2cgm6mlA742s9QeJ8DjGVaL39dLN4rRwrOgOyYSnOaFPhKZGLb2ngj4EyfAFjsNJwPXZvseag==", "cpu": [ "ppc64" ], @@ -31,9 +32,9 @@ } }, "node_modules/@esbuild/android-arm": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.24.2.tgz", - "integrity": "sha512-tmwl4hJkCfNHwFB3nBa8z1Uy3ypZpxqxfTQOcHX+xRByyYgunVbZ9MzUUfb0RxaHIMnbHagwAxuTL+tnNM+1/Q==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.25.2.tgz", + "integrity": "sha512-NQhH7jFstVY5x8CKbcfa166GoV0EFkaPkCKBQkdPJFvo5u+nGXLEH/ooniLb3QI8Fk58YAx7nsPLozUWfCBOJA==", "cpu": [ "arm" ], @@ -48,9 +49,9 @@ } }, "node_modules/@esbuild/android-arm64": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.24.2.tgz", - "integrity": "sha512-cNLgeqCqV8WxfcTIOeL4OAtSmL8JjcN6m09XIgro1Wi7cF4t/THaWEa7eL5CMoMBdjoHOTh/vwTO/o2TRXIyzg==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.25.2.tgz", + "integrity": "sha512-5ZAX5xOmTligeBaeNEPnPaeEuah53Id2tX4c2CVP3JaROTH+j4fnfHCkr1PjXMd78hMst+TlkfKcW/DlTq0i4w==", "cpu": [ "arm64" ], @@ -65,9 +66,9 @@ } }, "node_modules/@esbuild/android-x64": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.24.2.tgz", - "integrity": "sha512-B6Q0YQDqMx9D7rvIcsXfmJfvUYLoP722bgfBlO5cGvNVb5V/+Y7nhBE3mHV9OpxBf4eAS2S68KZztiPaWq4XYw==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.25.2.tgz", + "integrity": "sha512-Ffcx+nnma8Sge4jzddPHCZVRvIfQ0kMsUsCMcJRHkGJ1cDmhe4SsrYIjLUKn1xpHZybmOqCWwB0zQvsjdEHtkg==", "cpu": [ "x64" ], @@ -82,9 +83,9 @@ } }, "node_modules/@esbuild/darwin-arm64": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.24.2.tgz", - "integrity": "sha512-kj3AnYWc+CekmZnS5IPu9D+HWtUI49hbnyqk0FLEJDbzCIQt7hg7ucF1SQAilhtYpIujfaHr6O0UHlzzSPdOeA==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.25.2.tgz", + "integrity": "sha512-MpM6LUVTXAzOvN4KbjzU/q5smzryuoNjlriAIx+06RpecwCkL9JpenNzpKd2YMzLJFOdPqBpuub6eVRP5IgiSA==", "cpu": [ "arm64" ], @@ -99,9 +100,9 @@ } }, "node_modules/@esbuild/darwin-x64": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.24.2.tgz", - "integrity": "sha512-WeSrmwwHaPkNR5H3yYfowhZcbriGqooyu3zI/3GGpF8AyUdsrrP0X6KumITGA9WOyiJavnGZUwPGvxvwfWPHIA==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.25.2.tgz", + "integrity": "sha512-5eRPrTX7wFyuWe8FqEFPG2cU0+butQQVNcT4sVipqjLYQjjh8a8+vUTfgBKM88ObB85ahsnTwF7PSIt6PG+QkA==", "cpu": [ "x64" ], @@ -116,9 +117,9 @@ } }, "node_modules/@esbuild/freebsd-arm64": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.24.2.tgz", - "integrity": "sha512-UN8HXjtJ0k/Mj6a9+5u6+2eZ2ERD7Edt1Q9IZiB5UZAIdPnVKDoG7mdTVGhHJIeEml60JteamR3qhsr1r8gXvg==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.25.2.tgz", + "integrity": "sha512-mLwm4vXKiQ2UTSX4+ImyiPdiHjiZhIaE9QvC7sw0tZ6HoNMjYAqQpGyui5VRIi5sGd+uWq940gdCbY3VLvsO1w==", "cpu": [ "arm64" ], @@ -133,9 +134,9 @@ } }, "node_modules/@esbuild/freebsd-x64": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.24.2.tgz", - "integrity": "sha512-TvW7wE/89PYW+IevEJXZ5sF6gJRDY/14hyIGFXdIucxCsbRmLUcjseQu1SyTko+2idmCw94TgyaEZi9HUSOe3Q==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.25.2.tgz", + "integrity": "sha512-6qyyn6TjayJSwGpm8J9QYYGQcRgc90nmfdUb0O7pp1s4lTY+9D0H9O02v5JqGApUyiHOtkz6+1hZNvNtEhbwRQ==", "cpu": [ "x64" ], @@ -150,9 +151,9 @@ } }, "node_modules/@esbuild/linux-arm": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.24.2.tgz", - "integrity": "sha512-n0WRM/gWIdU29J57hJyUdIsk0WarGd6To0s+Y+LwvlC55wt+GT/OgkwoXCXvIue1i1sSNWblHEig00GBWiJgfA==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.25.2.tgz", + "integrity": "sha512-UHBRgJcmjJv5oeQF8EpTRZs/1knq6loLxTsjc3nxO9eXAPDLcWW55flrMVc97qFPbmZP31ta1AZVUKQzKTzb0g==", "cpu": [ "arm" ], @@ -167,9 +168,9 @@ } }, "node_modules/@esbuild/linux-arm64": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.24.2.tgz", - "integrity": "sha512-7HnAD6074BW43YvvUmE/35Id9/NB7BeX5EoNkK9obndmZBUk8xmJJeU7DwmUeN7tkysslb2eSl6CTrYz6oEMQg==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.25.2.tgz", + "integrity": "sha512-gq/sjLsOyMT19I8obBISvhoYiZIAaGF8JpeXu1u8yPv8BE5HlWYobmlsfijFIZ9hIVGYkbdFhEqC0NvM4kNO0g==", "cpu": [ "arm64" ], @@ -184,9 +185,9 @@ } }, "node_modules/@esbuild/linux-ia32": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.24.2.tgz", - "integrity": "sha512-sfv0tGPQhcZOgTKO3oBE9xpHuUqguHvSo4jl+wjnKwFpapx+vUDcawbwPNuBIAYdRAvIDBfZVvXprIj3HA+Ugw==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.25.2.tgz", + "integrity": "sha512-bBYCv9obgW2cBP+2ZWfjYTU+f5cxRoGGQ5SeDbYdFCAZpYWrfjjfYwvUpP8MlKbP0nwZ5gyOU/0aUzZ5HWPuvQ==", "cpu": [ "ia32" ], @@ -201,9 +202,9 @@ } }, "node_modules/@esbuild/linux-loong64": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.24.2.tgz", - "integrity": "sha512-CN9AZr8kEndGooS35ntToZLTQLHEjtVB5n7dl8ZcTZMonJ7CCfStrYhrzF97eAecqVbVJ7APOEe18RPI4KLhwQ==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.25.2.tgz", + "integrity": "sha512-SHNGiKtvnU2dBlM5D8CXRFdd+6etgZ9dXfaPCeJtz+37PIUlixvlIhI23L5khKXs3DIzAn9V8v+qb1TRKrgT5w==", "cpu": [ "loong64" ], @@ -218,9 +219,9 @@ } }, "node_modules/@esbuild/linux-mips64el": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.24.2.tgz", - "integrity": "sha512-iMkk7qr/wl3exJATwkISxI7kTcmHKE+BlymIAbHO8xanq/TjHaaVThFF6ipWzPHryoFsesNQJPE/3wFJw4+huw==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.25.2.tgz", + "integrity": "sha512-hDDRlzE6rPeoj+5fsADqdUZl1OzqDYow4TB4Y/3PlKBD0ph1e6uPHzIQcv2Z65u2K0kpeByIyAjCmjn1hJgG0Q==", "cpu": [ "mips64el" ], @@ -235,9 +236,9 @@ } }, "node_modules/@esbuild/linux-ppc64": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.24.2.tgz", - "integrity": "sha512-shsVrgCZ57Vr2L8mm39kO5PPIb+843FStGt7sGGoqiiWYconSxwTiuswC1VJZLCjNiMLAMh34jg4VSEQb+iEbw==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.25.2.tgz", + "integrity": "sha512-tsHu2RRSWzipmUi9UBDEzc0nLc4HtpZEI5Ba+Omms5456x5WaNuiG3u7xh5AO6sipnJ9r4cRWQB2tUjPyIkc6g==", "cpu": [ "ppc64" ], @@ -252,9 +253,9 @@ } }, "node_modules/@esbuild/linux-riscv64": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.24.2.tgz", - "integrity": "sha512-4eSFWnU9Hhd68fW16GD0TINewo1L6dRrB+oLNNbYyMUAeOD2yCK5KXGK1GH4qD/kT+bTEXjsyTCiJGHPZ3eM9Q==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.25.2.tgz", + "integrity": "sha512-k4LtpgV7NJQOml/10uPU0s4SAXGnowi5qBSjaLWMojNCUICNu7TshqHLAEbkBdAszL5TabfvQ48kK84hyFzjnw==", "cpu": [ "riscv64" ], @@ -269,9 +270,9 @@ } }, "node_modules/@esbuild/linux-s390x": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.24.2.tgz", - "integrity": "sha512-S0Bh0A53b0YHL2XEXC20bHLuGMOhFDO6GN4b3YjRLK//Ep3ql3erpNcPlEFed93hsQAjAQDNsvcK+hV90FubSw==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.25.2.tgz", + "integrity": "sha512-GRa4IshOdvKY7M/rDpRR3gkiTNp34M0eLTaC1a08gNrh4u488aPhuZOCpkF6+2wl3zAN7L7XIpOFBhnaE3/Q8Q==", "cpu": [ "s390x" ], @@ -286,9 +287,9 @@ } }, "node_modules/@esbuild/linux-x64": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.24.2.tgz", - "integrity": "sha512-8Qi4nQcCTbLnK9WoMjdC9NiTG6/E38RNICU6sUNqK0QFxCYgoARqVqxdFmWkdonVsvGqWhmm7MO0jyTqLqwj0Q==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.25.2.tgz", + "integrity": "sha512-QInHERlqpTTZ4FRB0fROQWXcYRD64lAoiegezDunLpalZMjcUcld3YzZmVJ2H/Cp0wJRZ8Xtjtj0cEHhYc/uUg==", "cpu": [ "x64" ], @@ -303,9 +304,9 @@ } }, "node_modules/@esbuild/netbsd-arm64": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.24.2.tgz", - "integrity": "sha512-wuLK/VztRRpMt9zyHSazyCVdCXlpHkKm34WUyinD2lzK07FAHTq0KQvZZlXikNWkDGoT6x3TD51jKQ7gMVpopw==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.25.2.tgz", + "integrity": "sha512-talAIBoY5M8vHc6EeI2WW9d/CkiO9MQJ0IOWX8hrLhxGbro/vBXJvaQXefW2cP0z0nQVTdQ/eNyGFV1GSKrxfw==", "cpu": [ "arm64" ], @@ -320,9 +321,9 @@ } }, "node_modules/@esbuild/netbsd-x64": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.24.2.tgz", - "integrity": "sha512-VefFaQUc4FMmJuAxmIHgUmfNiLXY438XrL4GDNV1Y1H/RW3qow68xTwjZKfj/+Plp9NANmzbH5R40Meudu8mmw==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.25.2.tgz", + "integrity": "sha512-voZT9Z+tpOxrvfKFyfDYPc4DO4rk06qamv1a/fkuzHpiVBMOhpjK+vBmWM8J1eiB3OLSMFYNaOaBNLXGChf5tg==", "cpu": [ "x64" ], @@ -337,9 +338,9 @@ } }, "node_modules/@esbuild/openbsd-arm64": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.24.2.tgz", - "integrity": "sha512-YQbi46SBct6iKnszhSvdluqDmxCJA+Pu280Av9WICNwQmMxV7nLRHZfjQzwbPs3jeWnuAhE9Jy0NrnJ12Oz+0A==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.25.2.tgz", + "integrity": "sha512-dcXYOC6NXOqcykeDlwId9kB6OkPUxOEqU+rkrYVqJbK2hagWOMrsTGsMr8+rW02M+d5Op5NNlgMmjzecaRf7Tg==", "cpu": [ "arm64" ], @@ -354,9 +355,9 @@ } }, "node_modules/@esbuild/openbsd-x64": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.24.2.tgz", - "integrity": "sha512-+iDS6zpNM6EnJyWv0bMGLWSWeXGN/HTaF/LXHXHwejGsVi+ooqDfMCCTerNFxEkM3wYVcExkeGXNqshc9iMaOA==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.25.2.tgz", + "integrity": "sha512-t/TkWwahkH0Tsgoq1Ju7QfgGhArkGLkF1uYz8nQS/PPFlXbP5YgRpqQR3ARRiC2iXoLTWFxc6DJMSK10dVXluw==", "cpu": [ "x64" ], @@ -371,9 +372,9 @@ } }, "node_modules/@esbuild/sunos-x64": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.24.2.tgz", - "integrity": "sha512-hTdsW27jcktEvpwNHJU4ZwWFGkz2zRJUz8pvddmXPtXDzVKTTINmlmga3ZzwcuMpUvLw7JkLy9QLKyGpD2Yxig==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.25.2.tgz", + "integrity": "sha512-cfZH1co2+imVdWCjd+D1gf9NjkchVhhdpgb1q5y6Hcv9TP6Zi9ZG/beI3ig8TvwT9lH9dlxLq5MQBBgwuj4xvA==", "cpu": [ "x64" ], @@ -388,9 +389,9 @@ } }, "node_modules/@esbuild/win32-arm64": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.24.2.tgz", - "integrity": "sha512-LihEQ2BBKVFLOC9ZItT9iFprsE9tqjDjnbulhHoFxYQtQfai7qfluVODIYxt1PgdoyQkz23+01rzwNwYfutxUQ==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.25.2.tgz", + "integrity": "sha512-7Loyjh+D/Nx/sOTzV8vfbB3GJuHdOQyrOryFdZvPHLf42Tk9ivBU5Aedi7iyX+x6rbn2Mh68T4qq1SDqJBQO5Q==", "cpu": [ "arm64" ], @@ -405,9 +406,9 @@ } }, "node_modules/@esbuild/win32-ia32": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.24.2.tgz", - "integrity": "sha512-q+iGUwfs8tncmFC9pcnD5IvRHAzmbwQ3GPS5/ceCyHdjXubwQWI12MKWSNSMYLJMq23/IUCvJMS76PDqXe1fxA==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.25.2.tgz", + "integrity": "sha512-WRJgsz9un0nqZJ4MfhabxaD9Ft8KioqU3JMinOTvobbX6MOSUigSBlogP8QB3uxpJDsFS6yN+3FDBdqE5lg9kg==", "cpu": [ "ia32" ], @@ -422,9 +423,9 @@ } }, "node_modules/@esbuild/win32-x64": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.24.2.tgz", - "integrity": "sha512-7VTgWzgMGvup6aSqDPLiW5zHaxYJGTO4OokMjIlrCtf+VpEL+cXKtCvg723iguPYI5oaUNdS+/V7OU2gvXVWEg==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.25.2.tgz", + "integrity": "sha512-kM3HKb16VIXZyIeVrM1ygYmZBKybX8N4p754bw390wGO3Tf2j4L2/WYL+4suWujpgf6GBYs3jv7TyUivdd05JA==", "cpu": [ "x64" ], @@ -712,9 +713,9 @@ "license": "MIT" }, "node_modules/esbuild": { - "version": "0.24.2", - "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.24.2.tgz", - "integrity": "sha512-+9egpBW8I3CD5XPe0n6BfT5fxLzxrlDzqydF3aviG+9ni1lDC/OvMHcxqEFV0+LANZG5R1bFMWfUrjVsdwxJvA==", + "version": "0.25.2", + "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.25.2.tgz", + "integrity": "sha512-16854zccKPnC+toMywC+uKNeYSv+/eXkevRAfwRD/G9Cleq66m8XFIrigkbvauLLlCfDL45Q2cWegSg53gGBnQ==", "dev": true, "hasInstallScript": true, "license": "MIT", @@ -725,31 +726,31 @@ "node": ">=18" }, "optionalDependencies": { - "@esbuild/aix-ppc64": "0.24.2", - "@esbuild/android-arm": "0.24.2", - "@esbuild/android-arm64": "0.24.2", - "@esbuild/android-x64": "0.24.2", - "@esbuild/darwin-arm64": "0.24.2", - "@esbuild/darwin-x64": "0.24.2", - "@esbuild/freebsd-arm64": "0.24.2", - "@esbuild/freebsd-x64": "0.24.2", - "@esbuild/linux-arm": "0.24.2", - "@esbuild/linux-arm64": "0.24.2", - "@esbuild/linux-ia32": "0.24.2", - "@esbuild/linux-loong64": "0.24.2", - "@esbuild/linux-mips64el": "0.24.2", - "@esbuild/linux-ppc64": "0.24.2", - "@esbuild/linux-riscv64": "0.24.2", - "@esbuild/linux-s390x": "0.24.2", - "@esbuild/linux-x64": "0.24.2", - "@esbuild/netbsd-arm64": "0.24.2", - "@esbuild/netbsd-x64": "0.24.2", - "@esbuild/openbsd-arm64": "0.24.2", - "@esbuild/openbsd-x64": "0.24.2", - "@esbuild/sunos-x64": "0.24.2", - "@esbuild/win32-arm64": "0.24.2", - "@esbuild/win32-ia32": "0.24.2", - "@esbuild/win32-x64": "0.24.2" + "@esbuild/aix-ppc64": "0.25.2", + "@esbuild/android-arm": "0.25.2", + "@esbuild/android-arm64": "0.25.2", + "@esbuild/android-x64": "0.25.2", + "@esbuild/darwin-arm64": "0.25.2", + "@esbuild/darwin-x64": "0.25.2", + "@esbuild/freebsd-arm64": "0.25.2", + "@esbuild/freebsd-x64": "0.25.2", + "@esbuild/linux-arm": "0.25.2", + "@esbuild/linux-arm64": "0.25.2", + "@esbuild/linux-ia32": "0.25.2", + "@esbuild/linux-loong64": "0.25.2", + "@esbuild/linux-mips64el": "0.25.2", + "@esbuild/linux-ppc64": "0.25.2", + "@esbuild/linux-riscv64": "0.25.2", + "@esbuild/linux-s390x": "0.25.2", + "@esbuild/linux-x64": "0.25.2", + "@esbuild/netbsd-arm64": "0.25.2", + "@esbuild/netbsd-x64": "0.25.2", + "@esbuild/openbsd-arm64": "0.25.2", + "@esbuild/openbsd-x64": "0.25.2", + "@esbuild/sunos-x64": "0.25.2", + "@esbuild/win32-arm64": "0.25.2", + "@esbuild/win32-ia32": "0.25.2", + "@esbuild/win32-x64": "0.25.2" } }, "node_modules/fsevents": { @@ -768,14 +769,13 @@ } }, "node_modules/limbo-wasm": { - "version": "0.0.11", - "resolved": "file:../limbo-wasm-0.0.11.tgz", - "integrity": "sha512-Gxs1kqnCKbfwWjTSWaNQzh954DltmDK28j4EmzDEm/7NZtmwnbfeBj92pS3yJVeQpXuu6zQtaDAS0pYAhi3Q0w==" + "resolved": "..", + "link": true }, "node_modules/nanoid": { - "version": "3.3.8", - "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.8.tgz", - "integrity": "sha512-WNLf5Sd8oZxOm+TzppcYk8gVOgP+l58xNy58D0nbUnOxOWRWvlcCV4kUF7ltmI6PsrLl/BgKEyS4mqsGChFN0w==", + "version": "3.3.11", + "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.11.tgz", + "integrity": "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w==", "dev": true, "funding": [ { @@ -799,9 +799,9 @@ "license": "ISC" }, "node_modules/postcss": { - "version": "8.5.1", - "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.1.tgz", - "integrity": "sha512-6oz2beyjc5VMn/KV1pPw8fliQkhBXrVn1Z3TVyqZxU8kZpzEKhBdmCFqI6ZbmGtamQvQGuU1sgPTk8ZrXDD7jQ==", + "version": "8.5.3", + "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.3.tgz", + "integrity": "sha512-dle9A3yYxlBSrt8Fu+IpjGT8SY8hN0mlaA6GY8t0P5PjIOZemULz/E2Bnm/2dcUOena75OTNkHI76uZBNUUq3A==", "dev": true, "funding": [ { @@ -877,15 +877,15 @@ } }, "node_modules/vite": { - "version": "6.0.7", - "resolved": "https://registry.npmjs.org/vite/-/vite-6.0.7.tgz", - "integrity": "sha512-RDt8r/7qx9940f8FcOIAH9PTViRrghKaK2K1jY3RaAURrEUbm9Du1mJ72G+jlhtG3WwodnfzY8ORQZbBavZEAQ==", + "version": "6.2.6", + "resolved": "https://registry.npmjs.org/vite/-/vite-6.2.6.tgz", + "integrity": "sha512-9xpjNl3kR4rVDZgPNdTL0/c6ao4km69a/2ihNQbcANz8RuCOK3hQBmLSJf3bRKVQjVMda+YvizNE8AwvogcPbw==", "dev": true, "license": "MIT", "dependencies": { - "esbuild": "^0.24.2", - "postcss": "^8.4.49", - "rollup": "^4.23.0" + "esbuild": "^0.25.0", + "postcss": "^8.5.3", + "rollup": "^4.30.1" }, "bin": { "vite": "bin/vite.js" diff --git a/bindings/wasm/test-limbo-pkg/package.json b/bindings/wasm/test-limbo-pkg/package.json index 9d64cc94c..96bbaa647 100644 --- a/bindings/wasm/test-limbo-pkg/package.json +++ b/bindings/wasm/test-limbo-pkg/package.json @@ -9,7 +9,7 @@ "dev": "vite" }, "devDependencies": { - "vite": "^6.0.7", + "vite": "^6.2.6", "vite-plugin-wasm": "^3.4.1" } } From c99c6a4be54e3866179f24d038b1eca317c73c06 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 11 Apr 2025 13:40:56 -0300 Subject: [PATCH 183/425] Activate Bench for comparison --- Cargo.lock | 9 +++++++++ Cargo.toml | 1 + vendored/sqlite3-parser/sqlparser_bench/Cargo.toml | 2 +- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index af3033499..e3b2cd455 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3163,6 +3163,15 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "sqlparser_bench" +version = "0.1.0" +dependencies = [ + "criterion", + "fallible-iterator", + "limbo_sqlite3_parser", +] + [[package]] name = "stable_deref_trait" version = "1.2.0" diff --git a/Cargo.toml b/Cargo.toml index ac7490880..44c621be1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,6 +25,7 @@ members = [ "sqlite3", "stress", "tests", + "vendored/sqlite3-parser/sqlparser_bench", ] exclude = ["perf/latency/limbo"] diff --git a/vendored/sqlite3-parser/sqlparser_bench/Cargo.toml b/vendored/sqlite3-parser/sqlparser_bench/Cargo.toml index 1ec87cbe4..0bb6e16c5 100644 --- a/vendored/sqlite3-parser/sqlparser_bench/Cargo.toml +++ b/vendored/sqlite3-parser/sqlparser_bench/Cargo.toml @@ -5,7 +5,7 @@ authors = ["Dandandan "] edition = "2018" [dependencies] -sqlite3-parser = { path = "..", default-features = false, features = [ +limbo_sqlite3_parser = { path = "..", default-features = false, features = [ "YYNOERRORRECOVERY", "NDEBUG", ] } From 9d08693e8f53020b614c4d40517f4b5e0c5e0c60 Mon Sep 17 00:00:00 2001 From: alpaylan Date: Fri, 11 Apr 2025 14:10:12 -0400 Subject: [PATCH 184/425] add simulator subcommands --- simulator/main.rs | 170 +++++++++++++++++++++++++----------- simulator/runner/bugbase.rs | 52 +++++++++-- simulator/runner/cli.rs | 36 +++++++- 3 files changed, 202 insertions(+), 56 deletions(-) diff --git a/simulator/main.rs b/simulator/main.rs index 9a0345be7..3d49fbd36 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -6,7 +6,7 @@ use notify::event::{DataChange, ModifyKind}; use notify::{EventKind, RecursiveMode, Watcher}; use rand::prelude::*; use runner::bugbase::{Bug, BugBase, LoadedBug}; -use runner::cli::SimulatorCLI; +use runner::cli::{SimulatorCLI, SimulatorCommand}; use runner::env::SimulatorEnv; use runner::execution::{execute_plans, Execution, ExecutionHistory, ExecutionResult}; use runner::{differential, watch}; @@ -48,15 +48,87 @@ impl Paths { fn main() -> Result<(), String> { init_logger(); - let cli_opts = SimulatorCLI::parse(); cli_opts.validate()?; + match cli_opts.subcommand { + Some(SimulatorCommand::List) => { + let mut bugbase = BugBase::load().map_err(|e| format!("{:?}", e))?; + bugbase.list_bugs() + } + Some(SimulatorCommand::Loop { n, short_circuit }) => { + banner(); + for i in 0..n { + println!("iteration {}", i); + let result = testing_main(&cli_opts); + if result.is_err() && short_circuit { + println!("short circuiting after {} iterations", i); + return result; + } else if result.is_err() { + println!("iteration {} failed", i); + } else { + println!("iteration {} succeeded", i); + } + } + Ok(()) + } + Some(SimulatorCommand::Test { filter }) => { + let mut bugbase = BugBase::load().map_err(|e| format!("{:?}", e))?; + let bugs = bugbase.load_bugs()?; + let mut bugs = bugs + .into_iter() + .flat_map(|bug| { + let runs = bug + .runs + .into_iter() + .filter_map(|run| run.error.clone().map(|_| run)) + .filter(|run| run.error.as_ref().unwrap().contains(&filter)) + .map(|run| run.cli_options) + .collect::>(); + + runs.into_iter() + .map(|mut cli_opts| { + cli_opts.seed = Some(bug.seed); + cli_opts.load = None; + cli_opts + }) + .collect::>() + }) + .collect::>(); + + bugs.sort(); + bugs.dedup_by(|a, b| a == b); + + println!( + "found {} previously triggered configurations with {}", + bugs.len(), + filter + ); + + let results = bugs + .into_iter() + .map(|cli_opts| testing_main(&cli_opts)) + .collect::>(); + + let (successes, failures): (Vec<_>, Vec<_>) = + results.into_iter().partition(|result| result.is_ok()); + println!("the results of the change are:"); + println!("\t{} successful runs", successes.len()); + println!("\t{} failed runs", failures.len()); + Ok(()) + } + None => { + banner(); + testing_main(&cli_opts) + } + } +} + +fn testing_main(cli_opts: &SimulatorCLI) -> Result<(), String> { let mut bugbase = BugBase::load().map_err(|e| format!("{:?}", e))?; - banner(); let last_execution = Arc::new(Mutex::new(Execution::new(0, 0, 0))); - let (seed, env, plans) = setup_simulation(&mut bugbase, &cli_opts, |p| &p.plan, |p| &p.db); + let (seed, env, plans) = setup_simulation(&mut bugbase, cli_opts, |p| &p.plan, |p| &p.db); let paths = bugbase.paths(seed); @@ -66,7 +138,7 @@ fn main() -> Result<(), String> { } if cli_opts.watch { - watch_mode(seed, &cli_opts, &paths, last_execution.clone()).unwrap(); + watch_mode(seed, cli_opts, &paths, last_execution.clone()).unwrap(); return Ok(()); } @@ -74,7 +146,7 @@ fn main() -> Result<(), String> { differential_testing( seed, &mut bugbase, - &cli_opts, + cli_opts, &paths, plans, last_execution.clone(), @@ -83,7 +155,7 @@ fn main() -> Result<(), String> { run_simulator( seed, &mut bugbase, - &cli_opts, + cli_opts, &paths, env, plans, @@ -190,9 +262,15 @@ fn run_simulator( ); if cli_opts.doublecheck { - let env = SimulatorEnv::new(seed, cli_opts, &paths.doublecheck_db); - let env = Arc::new(Mutex::new(env)); - doublecheck(env, paths, &plans, last_execution.clone(), result) + doublecheck( + seed, + bugbase, + cli_opts, + paths, + &plans, + last_execution.clone(), + result, + ) } else { // No doublecheck, run shrinking if panicking or found a bug. match &result { @@ -303,12 +381,17 @@ fn run_simulator( } fn doublecheck( - env: Arc>, + seed: u64, + bugbase: &mut BugBase, + cli_opts: &SimulatorCLI, paths: &Paths, plans: &[InteractionPlan], last_execution: Arc>, result: SandboxedResult, ) -> Result<(), String> { + let env = SimulatorEnv::new(seed, cli_opts, &paths.doublecheck_db); + let env = Arc::new(Mutex::new(env)); + // Run the simulation again let result2 = SandboxedResult::from( std::panic::catch_unwind(|| { @@ -317,50 +400,24 @@ fn doublecheck( last_execution.clone(), ); - match (result, result2) { + let doublecheck_result = match (result, result2) { (SandboxedResult::Correct, SandboxedResult::Panicked { .. }) => { - log::error!("doublecheck failed! first run succeeded, but second run panicked."); - Err("doublecheck failed! first run succeeded, but second run panicked.".to_string()) + Err("first run succeeded, but second run panicked.".to_string()) } (SandboxedResult::FoundBug { .. }, SandboxedResult::Panicked { .. }) => { - log::error!( - "doublecheck failed! first run failed an assertion, but second run panicked." - ); - Err( - "doublecheck failed! first run failed an assertion, but second run panicked." - .to_string(), - ) + Err("first run failed an assertion, but second run panicked.".to_string()) } (SandboxedResult::Panicked { .. }, SandboxedResult::Correct) => { - log::error!("doublecheck failed! first run panicked, but second run succeeded."); - Err("doublecheck failed! first run panicked, but second run succeeded.".to_string()) + Err("first run panicked, but second run succeeded.".to_string()) } (SandboxedResult::Panicked { .. }, SandboxedResult::FoundBug { .. }) => { - log::error!( - "doublecheck failed! first run panicked, but second run failed an assertion." - ); - Err( - "doublecheck failed! first run panicked, but second run failed an assertion." - .to_string(), - ) + Err("first run panicked, but second run failed an assertion.".to_string()) } (SandboxedResult::Correct, SandboxedResult::FoundBug { .. }) => { - log::error!( - "doublecheck failed! first run succeeded, but second run failed an assertion." - ); - Err( - "doublecheck failed! first run succeeded, but second run failed an assertion." - .to_string(), - ) + Err("first run succeeded, but second run failed an assertion.".to_string()) } (SandboxedResult::FoundBug { .. }, SandboxedResult::Correct) => { - log::error!( - "doublecheck failed! first run failed an assertion, but second run succeeded." - ); - Err( - "doublecheck failed! first run failed an assertion, but second run succeeded." - .to_string(), - ) + Err("first run failed an assertion, but second run succeeded.".to_string()) } (SandboxedResult::Correct, SandboxedResult::Correct) | (SandboxedResult::FoundBug { .. }, SandboxedResult::FoundBug { .. }) @@ -369,18 +426,30 @@ fn doublecheck( let db_bytes = std::fs::read(&paths.db).unwrap(); let doublecheck_db_bytes = std::fs::read(&paths.doublecheck_db).unwrap(); if db_bytes != doublecheck_db_bytes { - log::error!("doublecheck failed! database files are different."); - log::error!("current: {}", paths.db.display()); - log::error!("doublecheck: {}", paths.doublecheck_db.display()); Err( - "doublecheck failed! database files are different, check binary diffs for more details.".to_string() + "database files are different, check binary diffs for more details." + .to_string(), ) } else { - log::info!("doublecheck succeeded! database files are the same."); - println!("doublecheck succeeded! database files are the same."); Ok(()) } } + }; + + match doublecheck_result { + Ok(_) => { + log::info!("doublecheck succeeded"); + println!("doublecheck succeeded"); + bugbase.mark_successful_run(seed, cli_opts)?; + Ok(()) + } + Err(e) => { + log::error!("doublecheck failed: '{}'", e); + bugbase + .add_bug(seed, plans[0].clone(), Some(e.clone()), cli_opts) + .unwrap(); + Err(format!("doublecheck failed: '{}'", e)) + } } } @@ -417,6 +486,7 @@ fn differential_testing( SandboxedResult::Correct => { log::info!("simulation succeeded, output of Limbo conforms to SQLite"); println!("simulation succeeded, output of Limbo conforms to SQLite"); + bugbase.mark_successful_run(seed, cli_opts).unwrap(); Ok(()) } SandboxedResult::Panicked { error, .. } | SandboxedResult::FoundBug { error, .. } => { diff --git a/simulator/runner/bugbase.rs b/simulator/runner/bugbase.rs index e131304c3..c59744046 100644 --- a/simulator/runner/bugbase.rs +++ b/simulator/runner/bugbase.rs @@ -33,14 +33,14 @@ pub struct LoadedBug { #[derive(Clone, Serialize, Deserialize)] pub(crate) struct BugRun { /// Commit hash of the current version of Limbo. - hash: String, + pub(crate) hash: String, /// Timestamp of the run. #[serde(with = "chrono::serde::ts_seconds")] - timestamp: DateTime, + pub(crate) timestamp: DateTime, /// Error message of the run. - error: Option, + pub(crate) error: Option, /// Options - cli_options: SimulatorCLI, + pub(crate) cli_options: SimulatorCLI, } impl Bug { @@ -270,7 +270,10 @@ impl BugBase { Some(Bug::Unloaded { .. }) => { let plan = std::fs::read_to_string(self.path.join(seed.to_string()).join("plan.json")) - .or(Err("should be able to read plan file".to_string()))?; + .or(Err(format!( + "should be able to read plan file at {}", + self.path.join(seed.to_string()).join("plan.json").display() + )))?; let plan: InteractionPlan = serde_json::from_str(&plan) .or(Err("should be able to deserialize plan".to_string()))?; @@ -330,6 +333,45 @@ impl BugBase { Ok(()) } + + pub(crate) fn load_bugs(&mut self) -> Result, String> { + let seeds = self.bugs.keys().map(|seed| *seed).collect::>(); + + seeds + .iter() + .map(|seed| self.load_bug(*seed)) + .collect::, _>>() + } + + pub(crate) fn list_bugs(&mut self) -> Result<(), String> { + let bugs = self.load_bugs()?; + for bug in bugs { + println!("seed: {}", bug.seed); + println!("plan: {}", bug.plan.stats()); + println!("runs:"); + println!(" ------------------"); + for run in &bug.runs { + println!(" - hash: {}", run.hash); + println!(" timestamp: {}", run.timestamp); + println!( + " type: {}", + if run.cli_options.differential { + "differential" + } else if run.cli_options.doublecheck { + "doublecheck" + } else { + "default" + } + ); + if let Some(error) = &run.error { + println!(" error: {}", error); + } + } + println!(" ------------------"); + } + + Ok(()) + } } impl BugBase { diff --git a/simulator/runner/cli.rs b/simulator/runner/cli.rs index 5933be437..5d9bcd9a6 100644 --- a/simulator/runner/cli.rs +++ b/simulator/runner/cli.rs @@ -1,7 +1,7 @@ use clap::{command, Parser}; use serde::{Deserialize, Serialize}; -#[derive(Parser, Clone, Serialize, Deserialize)] +#[derive(Parser, Debug, Clone, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Ord)] #[command(name = "limbo-simulator")] #[command(author, version, about, long_about = None)] pub struct SimulatorCLI { @@ -44,6 +44,40 @@ pub struct SimulatorCLI { pub watch: bool, #[clap(long, help = "run differential testing between sqlite and Limbo")] pub differential: bool, + #[clap(subcommand)] + pub subcommand: Option, +} + +#[derive(Parser, Debug, Clone, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Ord)] +pub enum SimulatorCommand { + #[clap(about = "run the simulator in a loop")] + Loop { + #[clap( + short = 'n', + long, + help = "number of iterations to run the simulator", + default_value_t = 5 + )] + n: usize, + #[clap( + short = 's', + long, + help = "short circuit the simulator, stop on the first failure", + default_value_t = false + )] + short_circuit: bool, + }, + #[clap(about = "list all the bugs in the base")] + List, + #[clap(about = "run the simulator against a specific bug")] + Test { + #[clap( + short = 'b', + long, + help = "run the simulator with previous buggy runs for the specific filter" + )] + filter: String, + }, } impl SimulatorCLI { From e13b5bc69833e36765e0d51ebc69806d75ac4936 Mon Sep 17 00:00:00 2001 From: alpaylan Date: Fri, 11 Apr 2025 15:33:32 -0400 Subject: [PATCH 185/425] fix min/max-tests bug --- simulator/main.rs | 2 +- simulator/runner/cli.rs | 22 ++++++++++++++-------- simulator/runner/env.rs | 4 ++-- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/simulator/main.rs b/simulator/main.rs index 3d49fbd36..34fdac17b 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -48,7 +48,7 @@ impl Paths { fn main() -> Result<(), String> { init_logger(); - let cli_opts = SimulatorCLI::parse(); + let mut cli_opts = SimulatorCLI::parse(); cli_opts.validate()?; match cli_opts.subcommand { diff --git a/simulator/runner/cli.rs b/simulator/runner/cli.rs index 5d9bcd9a6..c62c023bb 100644 --- a/simulator/runner/cli.rs +++ b/simulator/runner/cli.rs @@ -19,14 +19,14 @@ pub struct SimulatorCLI { help = "change the maximum size of the randomly generated sequence of interactions", default_value_t = 5000 )] - pub maximum_size: usize, + pub maximum_tests: usize, #[clap( short = 'k', long, help = "change the minimum size of the randomly generated sequence of interactions", default_value_t = 1000 )] - pub minimum_size: usize, + pub minimum_tests: usize, #[clap( short = 't', long, @@ -81,16 +81,22 @@ pub enum SimulatorCommand { } impl SimulatorCLI { - pub fn validate(&self) -> Result<(), String> { - if self.minimum_size < 1 { + pub fn validate(&mut self) -> Result<(), String> { + if self.minimum_tests < 1 { return Err("minimum size must be at least 1".to_string()); } - if self.maximum_size < 1 { + if self.maximum_tests < 1 { return Err("maximum size must be at least 1".to_string()); } - // todo: fix an issue here where if minimum size is not defined, it prevents setting low maximum sizes. - if self.minimum_size > self.maximum_size { - return Err("Minimum size cannot be greater than maximum size".to_string()); + + if self.minimum_tests > self.maximum_tests { + log::warn!( + "minimum size '{}' is greater than '{}' maximum size, setting both to '{}'", + self.minimum_tests, + self.maximum_tests, + self.maximum_tests + ); + self.minimum_tests = self.maximum_tests - 1; } if self.seed.is_some() && self.load.is_some() { diff --git a/simulator/runner/env.rs b/simulator/runner/env.rs index 742502b7b..8a7a6533a 100644 --- a/simulator/runner/env.rs +++ b/simulator/runner/env.rs @@ -67,7 +67,7 @@ impl SimulatorEnv { }; let opts = SimulatorOpts { - ticks: rng.gen_range(cli_opts.minimum_size..=cli_opts.maximum_size), + ticks: rng.gen_range(cli_opts.minimum_tests..=cli_opts.maximum_tests), max_connections: 1, // TODO: for now let's use one connection as we didn't implement // correct transactions processing max_tables: rng.gen_range(0..128), @@ -77,7 +77,7 @@ impl SimulatorEnv { delete_percent, drop_percent, page_size: 4096, // TODO: randomize this too - max_interactions: rng.gen_range(cli_opts.minimum_size..=cli_opts.maximum_size), + max_interactions: rng.gen_range(cli_opts.minimum_tests..=cli_opts.maximum_tests), max_time_simulation: cli_opts.maximum_time, }; From 1141cbaf3b6b104cc4c48f96b0a362c6d613825e Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Fri, 11 Apr 2025 10:51:36 +0300 Subject: [PATCH 186/425] logical_expr_fuzz: add primary keys to test table to detect issues w index usage --- tests/integration/fuzz/mod.rs | 61 ++++++++++++++++++++++++++++------- 1 file changed, 50 insertions(+), 11 deletions(-) diff --git a/tests/integration/fuzz/mod.rs b/tests/integration/fuzz/mod.rs index 73263e5f1..3814be97b 100644 --- a/tests/integration/fuzz/mod.rs +++ b/tests/integration/fuzz/mod.rs @@ -1114,19 +1114,38 @@ mod tests { let limbo_conn = db.connect_limbo(); let sqlite_conn = rusqlite::Connection::open_in_memory().unwrap(); for table in tables.iter() { - let query = format!("CREATE TABLE {} ({})", table.name, table.columns.join(", ")); + let columns_with_first_column_as_pk = { + let mut columns = vec![]; + columns.push(format!("{} PRIMARY KEY", table.columns[0])); + columns.extend(table.columns[1..].iter().map(|c| c.to_string())); + columns.join(", ") + }; + let query = format!( + "CREATE TABLE {} ({})", + table.name, columns_with_first_column_as_pk + ); + let limbo = limbo_exec_rows(&db, &limbo_conn, &query); + let sqlite = sqlite_exec_rows(&sqlite_conn, &query); + assert_eq!( - limbo_exec_rows(&db, &limbo_conn, &query), - sqlite_exec_rows(&sqlite_conn, &query) + limbo, sqlite, + "query: {}, limbo: {:?}, sqlite: {:?}", + query, limbo, sqlite ); } let (mut rng, seed) = rng_from_time(); log::info!("seed: {}", seed); - for _ in 0..100 { - let (x, y, z) = ( - g.generate(&mut rng, builders.number, 1), + let mut i = 0; + let mut primary_key_set = HashSet::with_capacity(100); + while i < 100 { + let x = g.generate(&mut rng, builders.number, 1); + if primary_key_set.contains(&x) { + continue; + } + primary_key_set.insert(x.clone()); + let (y, z) = ( g.generate(&mut rng, builders.number, 1), g.generate(&mut rng, builders.number, 1), ); @@ -1138,7 +1157,13 @@ mod tests { "seed: {}", seed, ); + i += 1; } + // verify the same number of rows in both tables + let query = format!("SELECT COUNT(*) FROM t"); + let limbo = limbo_exec_rows(&db, &limbo_conn, &query); + let sqlite = sqlite_exec_rows(&sqlite_conn, &query); + assert_eq!(limbo, sqlite, "seed: {}", seed); let sql = g .create() @@ -1152,11 +1177,25 @@ mod tests { log::info!("query: {}", query); let limbo = limbo_exec_rows(&db, &limbo_conn, &query); let sqlite = sqlite_exec_rows(&sqlite_conn, &query); - assert_eq!( - limbo, sqlite, - "query: {}, limbo: {:?}, sqlite: {:?} seed: {}", - query, limbo, sqlite, seed - ); + + if limbo.len() != sqlite.len() { + panic!("MISMATCHING ROW COUNT (limbo: {}, sqlite: {}) for query: {}\n\n limbo: {:?}\n\n sqlite: {:?}", limbo.len(), sqlite.len(), query, limbo, sqlite); + } + // find first row where limbo and sqlite differ + let diff_rows = limbo + .iter() + .zip(sqlite.iter()) + .filter(|(l, s)| l != s) + .collect::>(); + if !diff_rows.is_empty() { + // due to different choices in index usage (usually in these cases sqlite is smart enough to use an index and we aren't), + // sqlite might return rows in a different order + // check if all limbo rows are present in sqlite + let all_present = limbo.iter().all(|l| sqlite.iter().any(|s| l == s)); + if !all_present { + panic!("MISMATCHING ROWS (limbo: {}, sqlite: {}) for query: {}\n\n limbo: {:?}\n\n sqlite: {:?}\n\n differences: {:?}", limbo.len(), sqlite.len(), query, limbo, sqlite, diff_rows); + } + } } } } From 973696228a3dafdb8960c0dc66b26300abef0ad9 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Fri, 11 Apr 2025 10:51:50 +0300 Subject: [PATCH 187/425] Add TCL regression test --- testing/where.test | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/testing/where.test b/testing/where.test index a149e85f2..958d7825f 100755 --- a/testing/where.test +++ b/testing/where.test @@ -573,6 +573,12 @@ do_execsql_test where-constant-condition-no-tables-2 { select 1 where 1 IS NOT NULL; } {1} +# We had a bug where NULL was incorrectly used as a seek key, returning all rows (because NULL < everything in index keys) do_execsql_test where-null-comparison-index-seek-regression-test { select age from users where age > NULL; -} {} \ No newline at end of file +} {} + +# We had a bug where Limbo tried to use an index when there was a WHERE term like 't.x = t.x' +do_execsql_test where-self-referential-regression { + select count(1) from users where id = id; +} {10000} From c6bea835f9f2698c9105c4d03b3dab53c04cc627 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Fri, 11 Apr 2025 10:53:00 +0300 Subject: [PATCH 188/425] Fix trying to use index when both sides of comparison refer to same table --- core/translate/optimizer.rs | 31 ++++++++++++++++++++++++++++--- core/translate/planner.rs | 2 +- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 49543fd6b..f71cb9728 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -10,9 +10,12 @@ use crate::{ Result, }; -use super::plan::{ - DeletePlan, Direction, GroupBy, IterationDirection, Operation, Plan, Search, SeekDef, SeekKey, - SelectPlan, TableReference, UpdatePlan, WhereTerm, +use super::{ + plan::{ + DeletePlan, Direction, EvalAt, GroupBy, IterationDirection, Operation, Plan, Search, + SeekDef, SeekKey, SelectPlan, TableReference, UpdatePlan, WhereTerm, + }, + planner::determine_where_to_eval_expr, }; pub fn optimize_plan(plan: &mut Plan, schema: &Schema) -> Result<()> { @@ -851,6 +854,18 @@ fn find_index_constraints( continue; } + // If both lhs and rhs refer to columns from this table, we can't use this constraint + // because we can't use the index to satisfy the condition. + // Examples: + // - WHERE t.x > t.y + // - WHERE t.x + 1 > t.y - 5 + // - WHERE t.x = (t.x) + if determine_where_to_eval_expr(&lhs)? == EvalAt::Loop(table_index) + && determine_where_to_eval_expr(&rhs)? == EvalAt::Loop(table_index) + { + continue; + } + // Check if lhs is a column that is in the i'th position of the index if Some(position_in_index) == get_column_position_in_index(lhs, table_index, table_reference, index) @@ -1186,6 +1201,16 @@ pub fn try_extract_rowid_search_expression( } match &mut cond.expr { ast::Expr::Binary(lhs, operator, rhs) => { + // If both lhs and rhs refer to columns from this table, we can't perform a rowid seek + // Examples: + // - WHERE t.x > t.y + // - WHERE t.x + 1 > t.y - 5 + // - WHERE t.x = (t.x) + if determine_where_to_eval_expr(lhs)? == EvalAt::Loop(table_index) + && determine_where_to_eval_expr(rhs)? == EvalAt::Loop(table_index) + { + return Ok(None); + } if lhs.is_rowid_alias_of(table_index) { match operator { ast::Operator::Equals => { diff --git a/core/translate/planner.rs b/core/translate/planner.rs index b7b8745b0..2d9246666 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -570,7 +570,7 @@ pub fn parse_where( For expressions not referencing any tables (e.g. constants), this is before the main loop is opened, because they do not need any table data. */ -fn determine_where_to_eval_expr<'a>(predicate: &'a ast::Expr) -> Result { +pub fn determine_where_to_eval_expr<'a>(predicate: &'a ast::Expr) -> Result { let mut eval_at: EvalAt = EvalAt::BeforeLoop; match predicate { ast::Expr::Binary(e1, _, e2) => { From 0d97e2a311d10e94f35a84e01189b1e10403f218 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Fri, 11 Apr 2025 10:53:35 +0300 Subject: [PATCH 189/425] Fix not using index when expr is paren-wrapped: e.g. SELECT * FROM t WHERE (x > 5) --- core/translate/optimizer.rs | 64 ++++++++++++++++++++++++++++++------- 1 file changed, 53 insertions(+), 11 deletions(-) diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index f71cb9728..7c29b8834 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -797,6 +797,47 @@ enum BinaryExprSide { Rhs, } +/// Recursively unwrap parentheses from an expression +/// e.g. (((t.x > 5))) -> t.x > 5 +fn unwrap_parens(expr: T) -> Result +where + T: UnwrapParens, +{ + expr.unwrap_parens() +} + +trait UnwrapParens { + fn unwrap_parens(self) -> Result + where + Self: Sized; +} + +impl UnwrapParens for &ast::Expr { + fn unwrap_parens(self) -> Result { + match self { + ast::Expr::Column { .. } => Ok(self), + ast::Expr::Parenthesized(exprs) => match exprs.len() { + 1 => unwrap_parens(exprs.first().unwrap()), + _ => crate::bail_parse_error!("expected single expression in parentheses"), + }, + _ => Ok(self), + } + } +} + +impl UnwrapParens for ast::Expr { + fn unwrap_parens(self) -> Result { + match self { + ast::Expr::Column { .. } => Ok(self), + ast::Expr::Parenthesized(mut exprs) => match exprs.len() { + 1 => unwrap_parens(exprs.pop().unwrap()), + _ => crate::bail_parse_error!("expected single expression in parentheses"), + }, + _ => Ok(self), + } + } +} + /// Get the position of a column in an index /// For example, if there is an index on table T(x,y) then y's position in the index is 1. fn get_column_position_in_index( @@ -804,20 +845,20 @@ fn get_column_position_in_index( table_index: usize, table_reference: &TableReference, index: &Arc, -) -> Option { - let ast::Expr::Column { table, column, .. } = expr else { - return None; +) -> Result> { + let ast::Expr::Column { table, column, .. } = unwrap_parens(expr)? else { + return Ok(None); }; if *table != table_index { - return None; + return Ok(None); } let Some(column) = table_reference.table.get_column_at(*column) else { - return None; + return Ok(None); }; - index + Ok(index .columns .iter() - .position(|col| Some(&col.name) == column.name.as_ref()) + .position(|col| Some(&col.name) == column.name.as_ref())) } /// Find all [IndexConstraint]s for a given WHERE clause @@ -839,7 +880,7 @@ fn find_index_constraints( continue; } // Skip terms that are not binary comparisons - let ast::Expr::Binary(lhs, operator, rhs) = &term.expr else { + let ast::Expr::Binary(lhs, operator, rhs) = unwrap_parens(&term.expr)? else { continue; }; // Only consider index scans for binary ops that are comparisons @@ -868,7 +909,7 @@ fn find_index_constraints( // Check if lhs is a column that is in the i'th position of the index if Some(position_in_index) - == get_column_position_in_index(lhs, table_index, table_reference, index) + == get_column_position_in_index(lhs, table_index, table_reference, index)? { out_constraints.push(IndexConstraint { operator: *operator, @@ -879,7 +920,7 @@ fn find_index_constraints( } // Check if rhs is a column that is in the i'th position of the index if Some(position_in_index) - == get_column_position_in_index(rhs, table_index, table_reference, index) + == get_column_position_in_index(rhs, table_index, table_reference, index)? { out_constraints.push(IndexConstraint { operator: opposite_cmp_op(*operator), // swap the operator since e.g. if condition is 5 >= x, we want to use x <= 5 @@ -931,7 +972,8 @@ pub fn build_seek_def_from_index_constraints( // Extract the other expression from the binary WhereTerm (i.e. the one being compared to the index column) let (idx, side) = constraint.position_in_where_clause; let where_term = &mut where_clause[idx]; - let ast::Expr::Binary(lhs, _, rhs) = where_term.expr.take_ownership() else { + let ast::Expr::Binary(lhs, _, rhs) = unwrap_parens(where_term.expr.take_ownership())? + else { crate::bail_parse_error!("expected binary expression"); }; let cmp_expr = if side == BinaryExprSide::Lhs { From 8200b328d8b6d4b59ff84312faf6c9d5b6e63c7d Mon Sep 17 00:00:00 2001 From: meteorgan Date: Sat, 12 Apr 2025 19:29:20 +0800 Subject: [PATCH 190/425] support modifiers for julianday() --- COMPAT.md | 2 +- core/functions/datetime.rs | 32 ++++++++++----------- core/translate/expr.rs | 6 ++-- core/vdbe/execute.rs | 22 ++------------- testing/scalar-functions-datetime.test | 39 +++++++++++++++----------- 5 files changed, 44 insertions(+), 57 deletions(-) diff --git a/COMPAT.md b/COMPAT.md index 3d07558c8..68acde763 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -325,7 +325,7 @@ Feature support of [sqlite expr syntax](https://www.sqlite.org/lang_expr.html). | date() | Yes | partially supports modifiers | | time() | Yes | partially supports modifiers | | datetime() | Yes | partially supports modifiers | -| julianday() | Partial | does not support modifiers | +| julianday() | Yes | partially support modifiers | | unixepoch() | Partial | does not support modifiers | | strftime() | Yes | partially supports modifiers | | timediff() | Yes | partially supports modifiers | diff --git a/core/functions/datetime.rs b/core/functions/datetime.rs index 864f61787..55675efc4 100644 --- a/core/functions/datetime.rs +++ b/core/functions/datetime.rs @@ -46,20 +46,13 @@ enum DateTimeOutput { DateTime, // Holds the format string StrfTime(String), + JuliaDay, } fn exec_datetime(values: &[Register], output_type: DateTimeOutput) -> OwnedValue { if values.is_empty() { let now = parse_naive_date_time(&OwnedValue::build_text("now")).unwrap(); - - let formatted_str = match output_type { - DateTimeOutput::DateTime => now.format("%Y-%m-%d %H:%M:%S").to_string(), - DateTimeOutput::Time => now.format("%H:%M:%S").to_string(), - DateTimeOutput::Date => now.format("%Y-%m-%d").to_string(), - DateTimeOutput::StrfTime(ref format_str) => strftime_format(&now, format_str), - }; - - // Parse here + let formatted_str = format_dt(now, output_type, false); return OwnedValue::build_text(&formatted_str); } if let Some(mut dt) = parse_naive_date_time(values[0].get_owned_value()) { @@ -113,6 +106,7 @@ fn format_dt(dt: NaiveDateTime, output_type: DateTimeOutput, subsec: bool) -> St } } DateTimeOutput::StrfTime(format_str) => strftime_format(&dt, &format_str), + DateTimeOutput::JuliaDay => format_julian_day(to_julian_day_exact(&dt)), } } @@ -325,14 +319,8 @@ fn last_day_in_month(year: i32, month: u32) -> u32 { 28 } -pub fn exec_julianday(time_value: &OwnedValue) -> Result { - let dt = parse_naive_date_time(time_value); - match dt { - // if we did something heinous like: parse::().unwrap().to_string() - // that would solve the precision issue, but dear lord... - Some(dt) => Ok(format!("{:.1$}", to_julian_day_exact(&dt), 8)), - None => Ok(String::new()), - } +pub fn exec_julianday(values: &[Register]) -> OwnedValue { + exec_datetime(values, DateTimeOutput::JuliaDay) } fn to_julian_day_exact(dt: &NaiveDateTime) -> f64 { @@ -362,6 +350,16 @@ fn to_julian_day_exact(dt: &NaiveDateTime) -> f64 { jd_days + jd_fraction } +fn format_julian_day(days: f64) -> String { + let t = (days * 100_000_000.0).round() / 100_000_000.0; + let mut ret = format!("{}", t); + if !ret.contains('.') { + ret += ".0"; + } + + ret +} + pub fn exec_unixepoch(time_value: &OwnedValue) -> Result { let dt = parse_naive_date_time(time_value); match dt { diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 958005259..3882568a7 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1159,7 +1159,7 @@ pub fn translate_expr( }); Ok(target_register) } - ScalarFunc::Date | ScalarFunc::DateTime => { + ScalarFunc::Date | ScalarFunc::DateTime | ScalarFunc::JulianDay => { let start_reg = program .alloc_registers(args.as_ref().map(|x| x.len()).unwrap_or(1)); if let Some(args) = args { @@ -1259,11 +1259,11 @@ pub fn translate_expr( }); Ok(target_register) } - ScalarFunc::UnixEpoch | ScalarFunc::JulianDay => { + ScalarFunc::UnixEpoch => { let mut start_reg = 0; match args { Some(args) if args.len() > 1 => { - crate::bail_parse_error!("epoch or julianday function with > 1 arguments. Modifiers are not yet supported."); + crate::bail_parse_error!("epoch function with > 1 arguments. Modifiers are not yet supported."); } Some(args) if args.len() == 1 => { let arg_reg = program.alloc_register(); diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 4d2a96d10..233b225b2 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -3432,26 +3432,8 @@ pub fn op_function( state.registers[*dest] = Register::OwnedValue(result); } ScalarFunc::JulianDay => { - if *start_reg == 0 { - let julianday: String = exec_julianday(&OwnedValue::build_text("now"))?; - state.registers[*dest] = - Register::OwnedValue(OwnedValue::build_text(&julianday)); - } else { - let datetime_value = &state.registers[*start_reg]; - let julianday = exec_julianday(datetime_value.get_owned_value()); - match julianday { - Ok(time) => { - state.registers[*dest] = - Register::OwnedValue(OwnedValue::build_text(&time)) - } - Err(e) => { - return Err(LimboError::ParseError(format!( - "Error encountered while parsing datetime value: {}", - e - ))); - } - } - } + let result = exec_julianday(&state.registers[*start_reg..*start_reg + arg_count]); + state.registers[*dest] = Register::OwnedValue(result); } ScalarFunc::UnixEpoch => { if *start_reg == 0 { diff --git a/testing/scalar-functions-datetime.test b/testing/scalar-functions-datetime.test index f6441384f..3c2f7b771 100755 --- a/testing/scalar-functions-datetime.test +++ b/testing/scalar-functions-datetime.test @@ -423,26 +423,33 @@ do_execsql_test julianday-time-only { SELECT julianday('15:30:45'); } {2451545.14635417} -# -# TODO: fix precision issue -# -#do_execsql_test julianday-midnight { -# SELECT julianday('2023-05-18 00:00:00'); -#} {2460082.5} +do_execsql_test julianday-midnight { + SELECT julianday('2023-05-18 00:00:00'); +} {2460082.5} -#do_execsql_test julianday-noon { -# SELECT julianday('2023-05-18 12:00:00'); -#} {2460083.0} +do_execsql_test julianday-noon { + SELECT julianday('2023-05-18 12:00:00'); +} {2460083.0} -#do_execsql_test julianday-fractional-zero { -# SELECT julianday('2023-05-18 00:00:00.000'); -#} {2460082.5} +do_execsql_test julianday-fractional-zero { + SELECT julianday('2023-05-18 00:00:00.000'); +} {2460082.5} -# same issue as above, we return .5000000 because we are using fmt precision -#do_execsql_test julianday-date-only { -# SELECT julianday('2023-05-18'); -#} {2460082.5} +do_execsql_test julianday-date-only { + SELECT julianday('2023-05-18'); +} {2460082.5} +do_execsql_test julianday-with-modifier-day { + SELECT julianday(2454832.5,'+1 day'); +} {2454833.5} + +do_execsql_test julianday-with-modifier-hour { + SELECT julianday(2454832.5,'-3 hours'); +} {2454832.375} + +do_execsql_test julianday-max-day { + SELECT julianday('9999-12-31 23:59:59'); +} {5373484.49998843} From fd8f629ee4a4da131224b3148e07723239079ba8 Mon Sep 17 00:00:00 2001 From: meteorgan Date: Sat, 12 Apr 2025 22:39:35 +0800 Subject: [PATCH 191/425] comment format_julian_day() --- core/functions/datetime.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/core/functions/datetime.rs b/core/functions/datetime.rs index 55675efc4..d30cf4015 100644 --- a/core/functions/datetime.rs +++ b/core/functions/datetime.rs @@ -350,8 +350,11 @@ fn to_julian_day_exact(dt: &NaiveDateTime) -> f64 { jd_days + jd_fraction } +// Format the Julian day to a maximum of 8 decimal places. if it's an integer, +// append `.0` to the end to stay consistent with SQLite. fn format_julian_day(days: f64) -> String { - let t = (days * 100_000_000.0).round() / 100_000_000.0; + const DECIMAL_PRECISION: f64 = 100_000_000.0; + let t = (days * DECIMAL_PRECISION).round() / DECIMAL_PRECISION; let mut ret = format!("{}", t); if !ret.contains('.') { ret += ".0"; From 73764e198e8dafef9e7f55ca2d26c704755143e2 Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Sat, 12 Apr 2025 16:34:39 -0300 Subject: [PATCH 192/425] core: Fix equivalence between variable expressions to be always false Since until the bind to a value they are treated as NULL. https://sqlite.org/lang_expr.html#varparam --- core/util.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/core/util.rs b/core/util.rs index 3d12a2c6e..aa13e2edc 100644 --- a/core/util.rs +++ b/core/util.rs @@ -399,7 +399,9 @@ pub fn exprs_are_equivalent(expr1: &Expr, expr2: &Expr) -> bool { (Expr::Unary(op1, expr1), Expr::Unary(op2, expr2)) => { op1 == op2 && exprs_are_equivalent(expr1, expr2) } - (Expr::Variable(var1), Expr::Variable(var2)) => var1 == var2, + // Variables that are not bound to a specific value, are treated as NULL + // https://sqlite.org/lang_expr.html#varparam + (Expr::Variable(..), Expr::Variable(..)) => false, (Expr::Parenthesized(exprs1), Expr::Parenthesized(exprs2)) => { exprs1.len() == exprs2.len() && exprs1 @@ -945,6 +947,13 @@ pub mod tests { assert_eq!(normalize_ident("\"foo\""), "foo"); } + #[test] + fn test_variable_comparison() { + let expr1 = Expr::Variable("?".to_string()); + let expr2 = Expr::Variable("?".to_string()); + assert!(!exprs_are_equivalent(&expr1, &expr2)); + } + #[test] fn test_basic_addition_exprs_are_equivalent() { let expr1 = Expr::Binary( From db0f07499da129d447b93c7faca5c8217bff3b5c Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Sat, 12 Apr 2025 16:39:30 -0300 Subject: [PATCH 193/425] core/translate: Fix naive comparison between Binary expressions during register optimization --- core/translate/expr.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 958005259..6a9b7040b 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -4,7 +4,7 @@ use limbo_sqlite3_parser::ast::{self, UnaryOperator}; use crate::function::JsonFunc; use crate::function::{Func, FuncCtx, MathFuncArity, ScalarFunc, VectorFunc}; use crate::schema::{Table, Type}; -use crate::util::normalize_ident; +use crate::util::{exprs_are_equivalent, normalize_ident}; use crate::vdbe::{ builder::ProgramBuilder, insn::{CmpInsFlags, Insn}, @@ -494,8 +494,8 @@ pub fn translate_expr( match expr { ast::Expr::Between { .. } => todo!(), ast::Expr::Binary(e1, op, e2) => { - // Check if both sides of the expression are identical and reuse the same register if so - if e1 == e2 { + // Check if both sides of the expression are equivalent and reuse the same register if so + if exprs_are_equivalent(e1, e2) { let shared_reg = program.alloc_register(); translate_expr(program, referenced_tables, e1, shared_reg, resolver)?; From 5c0b112125e59a780236fc654848afa563314aa7 Mon Sep 17 00:00:00 2001 From: "Levy A." Date: Sat, 12 Apr 2025 17:40:29 -0300 Subject: [PATCH 194/425] fix: return null when parameter is unbound --- core/error.rs | 4 ---- core/vdbe/execute.rs | 7 +------ core/vdbe/mod.rs | 4 ++-- 3 files changed, 3 insertions(+), 12 deletions(-) diff --git a/core/error.rs b/core/error.rs index 3a1fd8112..e8eb83a5a 100644 --- a/core/error.rs +++ b/core/error.rs @@ -1,5 +1,3 @@ -use std::num::NonZero; - use thiserror::Error; #[derive(Debug, Error, miette::Diagnostic)] @@ -49,8 +47,6 @@ pub enum LimboError { Constraint(String), #[error("Extension error: {0}")] ExtensionError(String), - #[error("Unbound parameter at index {0}")] - Unbound(NonZero), #[error("Runtime error: integer overflow")] IntegerOverflow, #[error("Schema is locked for write")] diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 4d2a96d10..21a8d208d 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -4395,12 +4395,7 @@ pub fn op_variable( let Insn::Variable { index, dest } = insn else { unreachable!("unexpected Insn {:?}", insn) }; - state.registers[*dest] = Register::OwnedValue( - state - .get_parameter(*index) - .ok_or(LimboError::Unbound(*index))? - .clone(), - ); + state.registers[*dest] = Register::OwnedValue(state.get_parameter(*index)); state.pc += 1; Ok(InsnFunctionStepResult::Step) } diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index daad191b4..c5ab69eb9 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -284,8 +284,8 @@ impl ProgramState { self.parameters.insert(index, value); } - pub fn get_parameter(&self, index: NonZero) -> Option<&OwnedValue> { - self.parameters.get(&index) + pub fn get_parameter(&self, index: NonZero) -> OwnedValue { + self.parameters.get(&index).cloned().unwrap_or(OwnedValue::Null) } pub fn reset(&mut self) { From 029d1d7cce8886b0db752a2b9607ecc028996afb Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Sat, 12 Apr 2025 17:47:16 -0300 Subject: [PATCH 195/425] Ignore any .log file inside testing --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 726ec8796..893d3fd3f 100644 --- a/.gitignore +++ b/.gitignore @@ -34,5 +34,5 @@ dist/ # testing testing/limbo_output.txt **/limbo_output.txt -testing/test.log +testing/*.log .bugbase From 64f8aca823f08c12b05a9478f1e67b629c7b4de6 Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Sat, 12 Apr 2025 17:55:57 -0300 Subject: [PATCH 196/425] core: Improve readability of Affinity's doc comment --- core/schema.rs | 70 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 58 insertions(+), 12 deletions(-) diff --git a/core/schema.rs b/core/schema.rs index 21bed120d..70cc726c6 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -483,26 +483,72 @@ pub enum Type { Blob, } +/// # SQLite Column Type Affinities +/// /// Each column in an SQLite 3 database is assigned one of the following type affinities: /// -/// TEXT -/// NUMERIC -/// INTEGER -/// REAL -/// BLOB -/// (Historical note: The "BLOB" type affinity used to be called "NONE". But that term was easy to confuse with "no affinity" and so it was renamed.) +/// - **TEXT** +/// - **NUMERIC** +/// - **INTEGER** +/// - **REAL** +/// - **BLOB** /// -/// A column with TEXT affinity stores all data using storage classes NULL, TEXT or BLOB. If numerical data is inserted into a column with TEXT affinity it is converted into text form before being stored. +/// > **Note:** Historically, the "BLOB" type affinity was called "NONE". However, this term was renamed to avoid confusion with "no affinity". /// -/// A column with NUMERIC affinity may contain values using all five storage classes. When text data is inserted into a NUMERIC column, the storage class of the text is converted to INTEGER or REAL (in order of preference) if the text is a well-formed integer or real literal, respectively. If the TEXT value is a well-formed integer literal that is too large to fit in a 64-bit signed integer, it is converted to REAL. For conversions between TEXT and REAL storage classes, only the first 15 significant decimal digits of the number are preserved. If the TEXT value is not a well-formed integer or real literal, then the value is stored as TEXT. For the purposes of this paragraph, hexadecimal integer literals are not considered well-formed and are stored as TEXT. (This is done for historical compatibility with versions of SQLite prior to version 3.8.6 2014-08-15 where hexadecimal integer literals were first introduced into SQLite.) If a floating point value that can be represented exactly as an integer is inserted into a column with NUMERIC affinity, the value is converted into an integer. No attempt is made to convert NULL or BLOB values. +/// ## Affinity Descriptions /// -/// A string might look like a floating-point literal with a decimal point and/or exponent notation but as long as the value can be expressed as an integer, the NUMERIC affinity will convert it into an integer. Hence, the string '3.0e+5' is stored in a column with NUMERIC affinity as the integer 300000, not as the floating point value 300000.0. +/// ### **TEXT** +/// - Stores data using the NULL, TEXT, or BLOB storage classes. +/// - Numerical data inserted into a column with TEXT affinity is converted into text form before being stored. +/// - **Example:** +/// ```sql +/// CREATE TABLE example (col TEXT); +/// INSERT INTO example (col) VALUES (123); -- Stored as '123' (text) +/// SELECT typeof(col) FROM example; -- Returns 'text' +/// ``` /// -/// A column that uses INTEGER affinity behaves the same as a column with NUMERIC affinity. The difference between INTEGER and NUMERIC affinity is only evident in a CAST expression: The expression "CAST(4.0 AS INT)" returns an integer 4, whereas "CAST(4.0 AS NUMERIC)" leaves the value as a floating-point 4.0. +/// ### **NUMERIC** +/// - Can store values using all five storage classes. +/// - Text data is converted to INTEGER or REAL (in that order of preference) if it is a well-formed integer or real literal. +/// - If the text represents an integer too large for a 64-bit signed integer, it is converted to REAL. +/// - If the text is not a well-formed literal, it is stored as TEXT. +/// - Hexadecimal integer literals are stored as TEXT for historical compatibility. +/// - Floating-point values that can be exactly represented as integers are converted to integers. +/// - **Example:** +/// ```sql +/// CREATE TABLE example (col NUMERIC); +/// INSERT INTO example (col) VALUES ('3.0e+5'); -- Stored as 300000 (integer) +/// SELECT typeof(col) FROM example; -- Returns 'integer' +/// ``` /// -/// A column with REAL affinity behaves like a column with NUMERIC affinity except that it forces integer values into floating point representation. (As an internal optimization, small floating point values with no fractional component and stored in columns with REAL affinity are written to disk as integers in order to take up less space and are automatically converted back into floating point as the value is read out. This optimization is completely invisible at the SQL level and can only be detected by examining the raw bits of the database file.) +/// ### **INTEGER** +/// - Behaves like NUMERIC affinity but differs in `CAST` expressions. +/// - **Example:** +/// ```sql +/// CREATE TABLE example (col INTEGER); +/// INSERT INTO example (col) VALUES (4.0); -- Stored as 4 (integer) +/// SELECT typeof(col) FROM example; -- Returns 'integer' +/// ``` /// -/// A column with affinity BLOB does not prefer one storage class over another and no attempt is made to coerce data from one storage class into another. +/// ### **REAL** +/// - Similar to NUMERIC affinity but forces integer values into floating-point representation. +/// - **Optimization:** Small floating-point values with no fractional component may be stored as integers on disk to save space. This is invisible at the SQL level. +/// - **Example:** +/// ```sql +/// CREATE TABLE example (col REAL); +/// INSERT INTO example (col) VALUES (4); -- Stored as 4.0 (real) +/// SELECT typeof(col) FROM example; -- Returns 'real' +/// ``` +/// +/// ### **BLOB** +/// - Does not prefer any storage class. +/// - No coercion is performed between storage classes. +/// - **Example:** +/// ```sql +/// CREATE TABLE example (col BLOB); +/// INSERT INTO example (col) VALUES (x'1234'); -- Stored as a binary blob +/// SELECT typeof(col) FROM example; -- Returns 'blob' +/// ``` #[derive(Debug, Clone, Copy, PartialEq)] pub enum Affinity { Integer, From ff7a4e8297164455b9b5708298cbffc7acd56db8 Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Sat, 12 Apr 2025 16:51:47 -0300 Subject: [PATCH 197/425] core: Change always falseness of equivalence between variables expressions to be only on anonymous variables Named variables are compared by name --- core/util.rs | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/core/util.rs b/core/util.rs index aa13e2edc..80a52f387 100644 --- a/core/util.rs +++ b/core/util.rs @@ -401,7 +401,9 @@ pub fn exprs_are_equivalent(expr1: &Expr, expr2: &Expr) -> bool { } // Variables that are not bound to a specific value, are treated as NULL // https://sqlite.org/lang_expr.html#varparam - (Expr::Variable(..), Expr::Variable(..)) => false, + (Expr::Variable(var), Expr::Variable(var2)) if var == "" && var2 == "" => false, + // Named variables can be compared by their name + (Expr::Variable(val), Expr::Variable(val2)) => val == val2, (Expr::Parenthesized(exprs1), Expr::Parenthesized(exprs2)) => { exprs1.len() == exprs2.len() && exprs1 @@ -948,9 +950,20 @@ pub mod tests { } #[test] - fn test_variable_comparison() { - let expr1 = Expr::Variable("?".to_string()); - let expr2 = Expr::Variable("?".to_string()); + fn test_anonymous_variable_comparison() { + let expr1 = Expr::Variable("".to_string()); + let expr2 = Expr::Variable("".to_string()); + assert!(!exprs_are_equivalent(&expr1, &expr2)); + } + + #[test] + fn test_named_variable_comparison() { + let expr1 = Expr::Variable("1".to_string()); + let expr2 = Expr::Variable("1".to_string()); + assert!(exprs_are_equivalent(&expr1, &expr2)); + + let expr1 = Expr::Variable("1".to_string()); + let expr2 = Expr::Variable("2".to_string()); assert!(!exprs_are_equivalent(&expr1, &expr2)); } From d210ee149783cf1a25a3e5606a2589e645c0f677 Mon Sep 17 00:00:00 2001 From: "Levy A." Date: Sat, 12 Apr 2025 18:55:48 -0300 Subject: [PATCH 198/425] cargo fmt --- core/vdbe/mod.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index c5ab69eb9..a95cc7fba 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -285,7 +285,10 @@ impl ProgramState { } pub fn get_parameter(&self, index: NonZero) -> OwnedValue { - self.parameters.get(&index).cloned().unwrap_or(OwnedValue::Null) + self.parameters + .get(&index) + .cloned() + .unwrap_or(OwnedValue::Null) } pub fn reset(&mut self) { From da484b33aa97bde149fc6c431a066432595ebc8e Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Sat, 12 Apr 2025 21:29:41 -0300 Subject: [PATCH 199/425] core/storage: Add asserts to check invariants for current_page While developing, I had an underflow issue during the casting: `current as usize`. Which could be found way faster with proper asserts. --- core/storage/btree.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 5d07b6b82..dc512afd5 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -3772,6 +3772,7 @@ impl PageStack { self.current_page.set(self.current_page.get() + 1); } fn decrement_current(&self) { + assert!(self.current_page.get() > 0); self.current_page.set(self.current_page.get() - 1); } /// Push a new page onto the stack. @@ -3788,6 +3789,7 @@ impl PageStack { current < BTCURSOR_MAX_DEPTH as i32, "corrupted database, stack is bigger than expected" ); + assert!(current >= 0); self.stack.borrow_mut()[current as usize] = Some(page); self.cell_indices.borrow_mut()[current as usize] = starting_cell_idx; } @@ -3804,6 +3806,7 @@ impl PageStack { /// This effectively means traversing back up to a parent page. fn pop(&self) { let current = self.current_page.get(); + assert!(current >= 0); tracing::trace!("pagestack::pop(current={})", current); self.cell_indices.borrow_mut()[current as usize] = 0; self.stack.borrow_mut()[current as usize] = None; @@ -3827,7 +3830,9 @@ impl PageStack { /// Current page pointer being used fn current(&self) -> usize { - self.current_page.get() as usize + let current = self.current_page.get() as usize; + assert!(self.current_page.get() >= 0); + current } /// Cell index of the current page @@ -3847,13 +3852,13 @@ impl PageStack { /// We usually advance after going traversing a new page fn advance(&self) { let current = self.current(); - tracing::trace!("advance {}", self.cell_indices.borrow()[current],); + tracing::trace!("pagestack::advance {}", self.cell_indices.borrow()[current],); self.cell_indices.borrow_mut()[current] += 1; } fn retreat(&self) { let current = self.current(); - tracing::trace!("retreat {}", self.cell_indices.borrow()[current]); + tracing::trace!("pagestack::retreat {}", self.cell_indices.borrow()[current]); self.cell_indices.borrow_mut()[current] -= 1; } From 51eb2af06ab316e9709c6372e309847fc097600f Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Sat, 12 Apr 2025 22:24:22 -0300 Subject: [PATCH 200/425] core(refactor): Add CreateBTreeFlags Passing 1s and 0s with comments is not rustacean, and since we already follow the pattern of struct flags in other sections of the codebase it's better use it here too. --- core/storage/pager.rs | 38 +++++++++++++++++++++++++++++++------- core/translate/index.rs | 3 ++- core/translate/schema.rs | 5 +++-- core/vdbe/execute.rs | 2 +- core/vdbe/explain.rs | 4 ++-- core/vdbe/insn.rs | 8 ++++++-- 6 files changed, 45 insertions(+), 15 deletions(-) diff --git a/core/storage/pager.rs b/core/storage/pager.rs index 9d6d90c00..5d9554198 100644 --- a/core/storage/pager.rs +++ b/core/storage/pager.rs @@ -206,14 +206,11 @@ impl Pager { }) } - pub fn btree_create(&self, flags: usize) -> u32 { + pub fn btree_create(&self, flags: &CreateBTreeFlags) -> u32 { let page_type = match flags { - 1 => PageType::TableLeaf, - 2 => PageType::IndexLeaf, - _ => unreachable!( - "wrong create table flags, should be 1 for table and 2 for index, got {}", - flags, - ), + _ if flags.is_table() => PageType::TableLeaf, + _ if flags.is_index() => PageType::IndexLeaf, + _ => unreachable!("Invalid flags state"), }; let page = self.do_allocate_page(page_type, 0); let id = page.get().id; @@ -642,6 +639,33 @@ pub fn allocate_page(page_id: usize, buffer_pool: &Rc, offset: usize page } +#[derive(Debug)] +pub struct CreateBTreeFlags(pub u8); +impl CreateBTreeFlags { + pub const TABLE: u8 = 0b0001; + pub const INDEX: u8 = 0b0010; + + pub fn new_table() -> Self { + Self(CreateBTreeFlags::TABLE) + } + + pub fn new_index() -> Self { + Self(CreateBTreeFlags::INDEX) + } + + pub fn is_table(&self) -> bool { + (self.0 & CreateBTreeFlags::TABLE) != 0 + } + + pub fn is_index(&self) -> bool { + (self.0 & CreateBTreeFlags::INDEX) != 0 + } + + pub fn get_flags(&self) -> u8 { + self.0 + } +} + #[cfg(test)] mod tests { use std::sync::Arc; diff --git a/core/translate/index.rs b/core/translate/index.rs index 32d7cd2e9..20647d15c 100644 --- a/core/translate/index.rs +++ b/core/translate/index.rs @@ -2,6 +2,7 @@ use std::sync::Arc; use crate::{ schema::{BTreeTable, Column, Index, IndexColumn, PseudoTable, Schema}, + storage::pager::CreateBTreeFlags, types::Record, util::normalize_ident, vdbe::{ @@ -91,7 +92,7 @@ pub fn translate_create_index( program.emit_insn(Insn::CreateBtree { db: 0, root: root_page_reg, - flags: 2, // index leaf + flags: CreateBTreeFlags::new_index(), }); // open the sqlite schema table for writing and create a new entry for the index diff --git a/core/translate/schema.rs b/core/translate/schema.rs index 3d5aa79db..449d1e0e8 100644 --- a/core/translate/schema.rs +++ b/core/translate/schema.rs @@ -2,6 +2,7 @@ use std::fmt::Display; use crate::ast; use crate::schema::Schema; +use crate::storage::pager::CreateBTreeFlags; use crate::translate::ProgramBuilder; use crate::translate::ProgramBuilderOpts; use crate::translate::QueryMode; @@ -60,7 +61,7 @@ pub fn translate_create_table( program.emit_insn(Insn::CreateBtree { db: 0, root: table_root_reg, - flags: 1, // Table leaf page + flags: CreateBTreeFlags::new_table(), }); // Create an automatic index B-tree if needed @@ -92,7 +93,7 @@ pub fn translate_create_table( program.emit_insn(Insn::CreateBtree { db: 0, root: index_root_reg, - flags: 2, // Index leaf page + flags: CreateBTreeFlags::new_index(), }); } diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 4d2a96d10..37c66b111 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -4171,7 +4171,7 @@ pub fn op_create_btree( // TODO: implement temp databases todo!("temp databases not implemented yet"); } - let root_page = pager.btree_create(*flags); + let root_page = pager.btree_create(flags); state.registers[*root] = Register::OwnedValue(OwnedValue::Integer(root_page as i64)); state.pc += 1; Ok(InsnFunctionStepResult::Step) diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index 3ce60f5db..79b0f3ded 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -1177,10 +1177,10 @@ pub fn insn_to_str( "CreateBtree", *db as i32, *root as i32, - *flags as i32, + flags.get_flags() as i32, OwnedValue::build_text(""), 0, - format!("r[{}]=root iDb={} flags={}", root, db, flags), + format!("r[{}]=root iDb={} flags={}", root, db, flags.get_flags()), ), Insn::Destroy { root, diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 0047f9d11..acd8166c3 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -1,7 +1,11 @@ use std::{num::NonZero, rc::Rc}; use super::{execute, AggFunc, BranchOffset, CursorID, FuncCtx, InsnFunction, PageIdx}; -use crate::{schema::BTreeTable, storage::wal::CheckpointMode, types::Record}; +use crate::{ + schema::BTreeTable, + storage::{pager::CreateBTreeFlags, wal::CheckpointMode}, + types::Record, +}; use limbo_macros::Description; /// Flags provided to comparison instructions (e.g. Eq, Ne) which determine behavior related to NULL values. @@ -703,7 +707,7 @@ pub enum Insn { /// The root page of the new b-tree (P2). root: usize, /// Flags (P3). - flags: usize, + flags: CreateBTreeFlags, }, /// Deletes an entire database table or index whose root page in the database file is given by P1. From 1297cb107c36e01433946481f015363503cfc053 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Sun, 13 Apr 2025 02:25:28 -0300 Subject: [PATCH 201/425] bit-not and boolean-not Co-authored-by: Diego Reis <79876389+diegoreis42@users.noreply.github.com> --- core/vdbe/execute.rs | 14 ++- fuzz/Cargo.lock | 213 +++---------------------------------------- testing/math.test | 32 +++++++ 3 files changed, 55 insertions(+), 204 deletions(-) diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 4d2a96d10..e6495a4fe 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -5611,8 +5611,11 @@ pub fn exec_bit_not(reg: &OwnedValue) -> OwnedValue { OwnedValue::Null => OwnedValue::Null, OwnedValue::Integer(i) => OwnedValue::Integer(!i), OwnedValue::Float(f) => OwnedValue::Integer(!(*f as i64)), - OwnedValue::Text(text) => exec_bit_not(&cast_text_to_numeric(text.as_str())), - _ => todo!(), + OwnedValue::Text(text) => exec_bit_not(&cast_text_to_integer(text.as_str())), + OwnedValue::Blob(blob) => { + let text = String::from_utf8_lossy(blob); + exec_bit_not(&cast_text_to_integer(&text)) + } } } @@ -5729,8 +5732,11 @@ pub fn exec_boolean_not(reg: &OwnedValue) -> OwnedValue { OwnedValue::Null => OwnedValue::Null, OwnedValue::Integer(i) => OwnedValue::Integer((*i == 0) as i64), OwnedValue::Float(f) => OwnedValue::Integer((*f == 0.0) as i64), - OwnedValue::Text(text) => exec_boolean_not(&cast_text_to_numeric(text.as_str())), - _ => todo!(), + OwnedValue::Text(text) => exec_boolean_not(&&cast_text_to_real(text.as_str())), + OwnedValue::Blob(blob) => { + let text = String::from_utf8_lossy(blob); + exec_boolean_not(&cast_text_to_real(&text)) + } } } pub fn exec_concat(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index f32c94005..2556485fb 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -47,15 +47,6 @@ version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" -[[package]] -name = "block-buffer" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" -dependencies = [ - "generic-array", -] - [[package]] name = "built" version = "0.7.7" @@ -72,12 +63,6 @@ version = "3.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" -[[package]] -name = "byteorder" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" - [[package]] name = "cc" version = "1.2.16" @@ -130,15 +115,6 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" -[[package]] -name = "cpufeatures" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" -dependencies = [ - "libc", -] - [[package]] name = "crossbeam-epoch" version = "0.9.18" @@ -164,16 +140,6 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" -[[package]] -name = "crypto-common" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" -dependencies = [ - "generic-array", - "typenum", -] - [[package]] name = "derive_arbitrary" version = "1.4.1" @@ -185,16 +151,6 @@ dependencies = [ "syn", ] -[[package]] -name = "digest" -version = "0.10.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" -dependencies = [ - "block-buffer", - "crypto-common", -] - [[package]] name = "displaydoc" version = "0.2.5" @@ -234,12 +190,6 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" -[[package]] -name = "fast-float2" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8eb564c5c7423d25c886fb561d1e4ee69f72354d16918afa32c08811f6b6a55" - [[package]] name = "foldhash" version = "0.1.4" @@ -255,16 +205,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "generic-array" -version = "0.14.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" -dependencies = [ - "typenum", - "version_check", -] - [[package]] name = "getrandom" version = "0.2.15" @@ -507,15 +447,8 @@ checksum = "3954d50fe15b02142bf25d3b8bdadb634ec3948f103d04ffe3031bc8fe9d7058" dependencies = [ "equivalent", "hashbrown", - "serde", ] -[[package]] -name = "itoa" -version = "1.0.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" - [[package]] name = "jobserver" version = "0.1.32" @@ -535,22 +468,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "jsonb" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acd7dc2490b13d09367f5dc4bf202a5d70958dd5b9b2758e2708ee062752a824" -dependencies = [ - "byteorder", - "fast-float2", - "itoa", - "nom", - "ordered-float", - "rand", - "ryu", - "serde_json", -] - [[package]] name = "julian_day_converter" version = "0.4.4" @@ -598,6 +515,12 @@ dependencies = [ "windows-targets", ] +[[package]] +name = "libm" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" + [[package]] name = "libmimalloc-sys" version = "0.1.39" @@ -643,7 +566,7 @@ dependencies = [ [[package]] name = "limbo_core" -version = "0.0.18-pre.3" +version = "0.0.19-pre.4" dependencies = [ "built", "cfg_block", @@ -652,10 +575,9 @@ dependencies = [ "fallible-iterator", "getrandom 0.2.15", "hex", - "indexmap", - "jsonb", "julian_day_converter", "libloading", + "libm", "limbo_ext", "limbo_macros", "limbo_sqlite3_parser", @@ -664,15 +586,12 @@ dependencies = [ "miette", "mimalloc", "parking_lot", - "pest", - "pest_derive", "polling", "rand", "regex", "regex-syntax", "rustix", "ryu", - "serde", "strum", "thiserror 1.0.69", "tracing", @@ -680,7 +599,7 @@ dependencies = [ [[package]] name = "limbo_ext" -version = "0.0.18-pre.3" +version = "0.0.19-pre.4" dependencies = [ "chrono", "getrandom 0.3.1", @@ -689,7 +608,7 @@ dependencies = [ [[package]] name = "limbo_macros" -version = "0.0.18-pre.3" +version = "0.0.19-pre.4" dependencies = [ "proc-macro2", "quote", @@ -698,7 +617,7 @@ dependencies = [ [[package]] name = "limbo_sqlite3_parser" -version = "0.0.18-pre.3" +version = "0.0.19-pre.4" dependencies = [ "bitflags", "cc", @@ -717,7 +636,7 @@ dependencies = [ [[package]] name = "limbo_time" -version = "0.0.18-pre.3" +version = "0.0.19-pre.4" dependencies = [ "chrono", "limbo_ext", @@ -729,7 +648,7 @@ dependencies = [ [[package]] name = "limbo_uuid" -version = "0.0.18-pre.3" +version = "0.0.19-pre.4" dependencies = [ "limbo_ext", "mimalloc", @@ -802,22 +721,6 @@ dependencies = [ "libmimalloc-sys", ] -[[package]] -name = "minimal-lexical" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" - -[[package]] -name = "nom" -version = "7.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" -dependencies = [ - "memchr", - "minimal-lexical", -] - [[package]] name = "num-traits" version = "0.2.19" @@ -833,15 +736,6 @@ version = "1.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cde51589ab56b20a6f686b2c68f7a0bd6add753d697abf720d63f8db3ab7b1ad" -[[package]] -name = "ordered-float" -version = "4.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7bb71e1b3fa6ca1c61f383464aaf2bb0e2f8e772a1f01d486832464de363b951" -dependencies = [ - "num-traits", -] - [[package]] name = "parking_lot" version = "0.12.3" @@ -871,51 +765,6 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" -[[package]] -name = "pest" -version = "2.7.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b7cafe60d6cf8e62e1b9b2ea516a089c008945bb5a275416789e7db0bc199dc" -dependencies = [ - "memchr", - "thiserror 2.0.12", - "ucd-trie", -] - -[[package]] -name = "pest_derive" -version = "2.7.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "816518421cfc6887a0d62bf441b6ffb4536fcc926395a69e1a85852d4363f57e" -dependencies = [ - "pest", - "pest_generator", -] - -[[package]] -name = "pest_generator" -version = "2.7.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d1396fd3a870fc7838768d171b4616d5c91f6cc25e377b673d714567d99377b" -dependencies = [ - "pest", - "pest_meta", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "pest_meta" -version = "2.7.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1e58089ea25d717bfd31fb534e4f3afcc2cc569c70de3e239778991ea3b7dea" -dependencies = [ - "once_cell", - "pest", - "sha2", -] - [[package]] name = "phf" version = "0.11.3" @@ -1142,30 +991,6 @@ dependencies = [ "syn", ] -[[package]] -name = "serde_json" -version = "1.0.140" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" -dependencies = [ - "indexmap", - "itoa", - "memchr", - "ryu", - "serde", -] - -[[package]] -name = "sha2" -version = "0.10.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" -dependencies = [ - "cfg-if", - "cpufeatures", - "digest", -] - [[package]] name = "shlex" version = "1.3.0" @@ -1315,18 +1140,6 @@ dependencies = [ "once_cell", ] -[[package]] -name = "typenum" -version = "1.18.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" - -[[package]] -name = "ucd-trie" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" - [[package]] name = "uncased" version = "0.9.10" diff --git a/testing/math.test b/testing/math.test index afa0e29d1..7eb15b2bc 100755 --- a/testing/math.test +++ b/testing/math.test @@ -627,6 +627,38 @@ do_execsql_test bitwise-not-zero { SELECT ~0 } {-1} +do_execsql_test bitwise-not-empty-blob { + SELECT ~x'' +} {-1} + +do_execsql_test bitwise-not-cast-blob { + SELECT ~ CAST ('af' AS BLOB); +} {-1} + +do_execsql_test bitwise-not-blob { + SELECT ~ x'0000'; +} {-1} + +do_execsql_test bitwise-not-blob-2 { + SELECT ~ x'0001'; +} {-1} + +do_execsql_test boolean-not-empty-blob { + SELECT NOT x'' +} {1} + +do_execsql_test boolean-not-cast-blob { + SELECT NOT CAST ('af' AS BLOB); +} {1} + +do_execsql_test boolean-not-blob { + SELECT NOT x'0000'; +} {1} + +do_execsql_test boolean-not-blob-2 { + SELECT NOT x'0001'; +} {1} + foreach {testname lhs ans} { int-1 1 0 int-2 2 0 From f3f7a722a77608bced26726ed143ca4bc2d93c85 Mon Sep 17 00:00:00 2001 From: meteorgan Date: Sun, 13 Apr 2025 16:41:30 +0800 Subject: [PATCH 202/425] avoid converting double to string in datetime --- core/functions/datetime.rs | 39 ++++++++++++++------------------------ 1 file changed, 14 insertions(+), 25 deletions(-) diff --git a/core/functions/datetime.rs b/core/functions/datetime.rs index d30cf4015..b58527f40 100644 --- a/core/functions/datetime.rs +++ b/core/functions/datetime.rs @@ -52,8 +52,7 @@ enum DateTimeOutput { fn exec_datetime(values: &[Register], output_type: DateTimeOutput) -> OwnedValue { if values.is_empty() { let now = parse_naive_date_time(&OwnedValue::build_text("now")).unwrap(); - let formatted_str = format_dt(now, output_type, false); - return OwnedValue::build_text(&formatted_str); + return format_dt(now, output_type, false); } if let Some(mut dt) = parse_naive_date_time(values[0].get_owned_value()) { // if successful, treat subsequent entries as modifiers @@ -84,29 +83,32 @@ fn modify_dt(dt: &mut NaiveDateTime, mods: &[Register], output_type: DateTimeOut if is_leap_second(dt) || *dt > get_max_datetime_exclusive() { return OwnedValue::build_text(""); } - let formatted = format_dt(*dt, output_type, subsec_requested); - OwnedValue::build_text(&formatted) + format_dt(*dt, output_type, subsec_requested) } -fn format_dt(dt: NaiveDateTime, output_type: DateTimeOutput, subsec: bool) -> String { +fn format_dt(dt: NaiveDateTime, output_type: DateTimeOutput, subsec: bool) -> OwnedValue { match output_type { - DateTimeOutput::Date => dt.format("%Y-%m-%d").to_string(), + DateTimeOutput::Date => OwnedValue::from_text(dt.format("%Y-%m-%d").to_string().as_str()), DateTimeOutput::Time => { - if subsec { + let t = if subsec { dt.format("%H:%M:%S%.3f").to_string() } else { dt.format("%H:%M:%S").to_string() - } + }; + OwnedValue::from_text(t.as_str()) } DateTimeOutput::DateTime => { - if subsec { + let t = if subsec { dt.format("%Y-%m-%d %H:%M:%S%.3f").to_string() } else { dt.format("%Y-%m-%d %H:%M:%S").to_string() - } + }; + OwnedValue::from_text(t.as_str()) } - DateTimeOutput::StrfTime(format_str) => strftime_format(&dt, &format_str), - DateTimeOutput::JuliaDay => format_julian_day(to_julian_day_exact(&dt)), + DateTimeOutput::StrfTime(format_str) => { + OwnedValue::from_text(strftime_format(&dt, &format_str).as_str()) + } + DateTimeOutput::JuliaDay => OwnedValue::Float(to_julian_day_exact(&dt)), } } @@ -350,19 +352,6 @@ fn to_julian_day_exact(dt: &NaiveDateTime) -> f64 { jd_days + jd_fraction } -// Format the Julian day to a maximum of 8 decimal places. if it's an integer, -// append `.0` to the end to stay consistent with SQLite. -fn format_julian_day(days: f64) -> String { - const DECIMAL_PRECISION: f64 = 100_000_000.0; - let t = (days * DECIMAL_PRECISION).round() / DECIMAL_PRECISION; - let mut ret = format!("{}", t); - if !ret.contains('.') { - ret += ".0"; - } - - ret -} - pub fn exec_unixepoch(time_value: &OwnedValue) -> Result { let dt = parse_naive_date_time(time_value); match dt { From 9ed34924e638b79a844fff1ab53cce7d62a9d00b Mon Sep 17 00:00:00 2001 From: Anton Harniakou Date: Sun, 13 Apr 2025 11:52:25 +0300 Subject: [PATCH 203/425] Parse hexidecimal integers --- core/translate/expr.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 958005259..d49254f03 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1850,8 +1850,14 @@ pub fn translate_expr( } ast::Expr::Literal(lit) => match lit { ast::Literal::Numeric(val) => { - let maybe_int = val.parse::(); - if let Ok(int_value) = maybe_int { + if val.starts_with("0x") { + // must be a hex decimal + let int_value = i64::from_str_radix(&val[2..], 16)?; + program.emit_insn(Insn::Integer { + value: int_value, + dest: target_register, + }); + } else if let Ok(int_value) = val.parse::() { program.emit_insn(Insn::Integer { value: int_value, dest: target_register, From 65d4c68cf2d47bd775bf77b068a8434040399753 Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Thu, 3 Apr 2025 14:39:20 -0300 Subject: [PATCH 204/425] core/pager: Wrap wal with Option --- core/lib.rs | 2 +- core/storage/btree.rs | 4 +- core/storage/pager.rs | 141 ++++++++++++++++++++++++++---------------- 3 files changed, 90 insertions(+), 57 deletions(-) diff --git a/core/lib.rs b/core/lib.rs index b66ef4c23..1ac907af7 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -183,7 +183,7 @@ impl Database { let pager = Rc::new(Pager::finish_open( self.header.clone(), self.db_file.clone(), - wal, + Some(wal), self.io.clone(), self.shared_page_cache.clone(), buffer_pool, diff --git a/core/storage/btree.rs b/core/storage/btree.rs index dc512afd5..ba121d86d 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -5012,7 +5012,7 @@ mod tests { let page_cache = Arc::new(parking_lot::RwLock::new(DumbLruPageCache::new(10))); let pager = { let db_header = Arc::new(SpinLock::new(db_header.clone())); - Pager::finish_open(db_header, db_file, wal, io, page_cache, buffer_pool).unwrap() + Pager::finish_open(db_header, db_file, Some(wal), io, page_cache, buffer_pool).unwrap() }; let pager = Rc::new(pager); let page1 = pager.allocate_page().unwrap(); @@ -5329,7 +5329,7 @@ mod tests { Pager::finish_open( db_header.clone(), db_file, - wal, + Some(wal), io, Arc::new(parking_lot::RwLock::new(DumbLruPageCache::new(10))), buffer_pool, diff --git a/core/storage/pager.rs b/core/storage/pager.rs index 5d9554198..e89b659bf 100644 --- a/core/storage/pager.rs +++ b/core/storage/pager.rs @@ -157,7 +157,7 @@ pub struct Pager { /// Source of the database pages. pub db_file: Arc, /// The write-ahead log (WAL) for the database. - wal: Rc>, + wal: Option>>, /// A page cache for the database. page_cache: Arc>, /// Buffer pool for temporary data storage. @@ -183,7 +183,7 @@ impl Pager { pub fn finish_open( db_header_ref: Arc>, db_file: Arc, - wal: Rc>, + wal: Option>>, io: Arc, page_cache: Arc>, buffer_pool: Rc, @@ -241,28 +241,42 @@ impl Pager { #[inline(always)] pub fn begin_read_tx(&self) -> Result { - self.wal.borrow_mut().begin_read_tx() + if let Some(wal) = &self.wal { + return wal.borrow_mut().begin_read_tx(); + } + + Ok(LimboResult::Ok) } #[inline(always)] pub fn begin_write_tx(&self) -> Result { - self.wal.borrow_mut().begin_write_tx() + if let Some(wal) = &self.wal { + return wal.borrow_mut().begin_write_tx(); + } + + Ok(LimboResult::Ok) } pub fn end_tx(&self) -> Result { + if let Some(wal) = &self.wal { let checkpoint_status = self.cacheflush()?; - match checkpoint_status { + return match checkpoint_status { CheckpointStatus::IO => Ok(checkpoint_status), CheckpointStatus::Done(_) => { - self.wal.borrow().end_write_tx()?; - self.wal.borrow().end_read_tx()?; + wal.borrow().end_write_tx()?; + wal.borrow().end_read_tx()?; Ok(checkpoint_status) } } } + Ok(CheckpointStatus::Done(CheckpointResult::default())) + } + pub fn end_read_tx(&self) -> Result<()> { - self.wal.borrow().end_read_tx()?; + if let Some(wal) = &self.wal { + wal.borrow().end_read_tx()?; + } Ok(()) } @@ -270,7 +284,11 @@ impl Pager { pub fn read_page(&self, page_idx: usize) -> Result { tracing::trace!("read_page(page_idx = {})", page_idx); let mut page_cache = self.page_cache.write(); - let page_key = PageCacheKey::new(page_idx, Some(self.wal.borrow().get_max_frame())); + let max_frame = match &self.wal { + Some(wal) => wal.borrow().get_max_frame(), + None => 0, + }; + let page_key = PageCacheKey::new(page_idx, Some(max_frame)); if let Some(page) = page_cache.get(&page_key) { tracing::trace!("read_page(page_idx = {}) = cached", page_idx); return Ok(page.clone()); @@ -278,8 +296,9 @@ impl Pager { let page = Arc::new(Page::new(page_idx)); page.set_locked(); - if let Some(frame_id) = self.wal.borrow().find_frame(page_idx as u64)? { - self.wal + if let Some(wal) = &self.wal { + if let Some(frame_id) = wal.borrow().find_frame(page_idx as u64)? { + wal .borrow() .read_frame(frame_id, page.clone(), self.buffer_pool.clone())?; { @@ -290,6 +309,7 @@ impl Pager { page_cache.insert(page_key, page.clone()); return Ok(page); } + } sqlite3_ondisk::begin_read_page( self.db_file.clone(), self.buffer_pool.clone(), @@ -307,32 +327,30 @@ impl Pager { trace!("load_page(page_idx = {})", id); let mut page_cache = self.page_cache.write(); page.set_locked(); - let page_key = PageCacheKey::new(id, Some(self.wal.borrow().get_max_frame())); - if let Some(frame_id) = self.wal.borrow().find_frame(id as u64)? { - self.wal - .borrow() - .read_frame(frame_id, page.clone(), self.buffer_pool.clone())?; - { - page.set_uptodate(); - } - // TODO(pere) ensure page is inserted - if !page_cache.contains_key(&page_key) { - page_cache.insert(page_key, page.clone()); - } - return Ok(()); + if let Some(wal) = &self.wal { + let page_key = PageCacheKey::new(id, Some(wal.borrow().get_max_frame())); + if let Some(frame_id) = wal.borrow().find_frame(id as u64)? { + wal.borrow() + .read_frame(frame_id, page.clone(), self.buffer_pool.clone())?; + { + page.set_uptodate(); + } + // TODO(pere) ensure page is inserted + if !page_cache.contains_key(&page_key) { + page_cache.insert(page_key, page.clone()); + } + return Ok(()); } + } sqlite3_ondisk::begin_read_page( self.db_file.clone(), self.buffer_pool.clone(), page.clone(), id, )?; - // TODO(pere) ensure page is inserted - if !page_cache.contains_key(&page_key) { - page_cache.insert(page_key, page.clone()); - } + Ok(()) - } +} /// Writes the database header. pub fn write_database_header(&self, header: &DatabaseHeader) { @@ -361,20 +379,22 @@ impl Pager { let db_size = self.db_header.lock().database_size; for page_id in self.dirty_pages.borrow().iter() { let mut cache = self.page_cache.write(); - let page_key = - PageCacheKey::new(*page_id, Some(self.wal.borrow().get_max_frame())); - let page = cache.get(&page_key).expect("we somehow added a page to dirty list but we didn't mark it as dirty, causing cache to drop it."); - let page_type = page.get().contents.as_ref().unwrap().maybe_page_type(); - trace!("cacheflush(page={}, page_type={:?}", page_id, page_type); - self.wal.borrow_mut().append_frame( - page.clone(), - db_size, - self.flush_info.borrow().in_flight_writes.clone(), - )?; - // This page is no longer valid. - // For example: - // We took page with key (page_num, max_frame) -- this page is no longer valid for that max_frame so it must be invalidated. - cache.delete(page_key); + if let Some(wal) = &self.wal { + let page_key = + PageCacheKey::new(*page_id, Some(wal.borrow().get_max_frame())); + let page = cache.get(&page_key).expect("we somehow added a page to dirty list but we didn't mark it as dirty, causing cache to drop it."); + let page_type = page.get().contents.as_ref().unwrap().maybe_page_type(); + trace!("cacheflush(page={}, page_type={:?}", page_id, page_type); + wal.borrow_mut().append_frame( + page.clone(), + db_size, + self.flush_info.borrow().in_flight_writes.clone(), + )?; + // This page is no longer valid. + // For example: + // We took page with key (page_num, max_frame) -- this page is no longer valid for that max_frame so it must be invalidated. + cache.delete(page_key); + } } self.dirty_pages.borrow_mut().clear(); self.flush_info.borrow_mut().state = FlushState::WaitAppendFrames; @@ -389,13 +409,16 @@ impl Pager { } } FlushState::SyncWal => { - match self.wal.borrow_mut().sync() { + let wal = self.wal.clone().ok_or(LimboError::InternalError( + "SyncWal was called without a existing wal".to_string(), + ))?; + match wal.borrow_mut().sync() { Ok(CheckpointStatus::IO) => return Ok(CheckpointStatus::IO), Ok(CheckpointStatus::Done(res)) => checkpoint_result = res, Err(e) => return Err(e), } - let should_checkpoint = self.wal.borrow().should_checkpoint(); + let should_checkpoint = wal.borrow().should_checkpoint(); if should_checkpoint { self.flush_info.borrow_mut().state = FlushState::Checkpoint; } else { @@ -437,11 +460,13 @@ impl Pager { match state { CheckpointState::Checkpoint => { let in_flight = self.checkpoint_inflight.clone(); - match self.wal.borrow_mut().checkpoint( - self, - in_flight, - CheckpointMode::Passive, - )? { + let wal = self.wal.clone().ok_or(LimboError::InternalError( + "Checkpoint was called without a existing wal".to_string(), + ))?; + match wal + .borrow_mut() + .checkpoint(self, in_flight, CheckpointMode::Passive)? + { CheckpointStatus::IO => return Ok(CheckpointStatus::IO), CheckpointStatus::Done(res) => { checkpoint_result = res; @@ -478,7 +503,7 @@ impl Pager { pub fn clear_page_cache(&self) -> CheckpointResult { let checkpoint_result: CheckpointResult; loop { - match self.wal.borrow_mut().checkpoint( + match self.wal.clone().unwrap().borrow_mut().checkpoint( self, Rc::new(RefCell::new(0)), CheckpointMode::Passive, @@ -603,8 +628,12 @@ impl Pager { page.set_dirty(); self.add_dirty(page.get().id); let mut cache = self.page_cache.write(); - let page_key = - PageCacheKey::new(page.get().id, Some(self.wal.borrow().get_max_frame())); + let max_frame = match &self.wal { + Some(wal) => wal.borrow().get_max_frame(), + None => 0, + }; + + let page_key = PageCacheKey::new(page.get().id, Some(max_frame)); cache.insert(page_key, page.clone()); } Ok(page) @@ -613,7 +642,11 @@ impl Pager { pub fn put_loaded_page(&self, id: usize, page: PageRef) { let mut cache = self.page_cache.write(); // cache insert invalidates previous page - let page_key = PageCacheKey::new(id, Some(self.wal.borrow().get_max_frame())); + let max_frame = match &self.wal { + Some(wal) => wal.borrow().get_max_frame(), + None => 0, + }; + let page_key = PageCacheKey::new(id, Some(max_frame)); cache.insert(page_key, page.clone()); page.set_loaded(); } From b519509349d77203eaed0e54dec09c6ef566c2b8 Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Thu, 3 Apr 2025 15:16:33 -0300 Subject: [PATCH 205/425] core/io: Add internal in-memory MemoryIO to all IO layers Honestly I don't have 100% sure if this is a good idea, the reasoning is that in any IO we'll want to do memory only operations like creating tables etc, so may want a common way to access it --- core/io/generic.rs | 7 +++++++ core/io/io_uring.rs | 8 +++++++- core/io/memory.rs | 4 ++++ core/io/mod.rs | 2 ++ core/io/unix.rs | 8 +++++++- core/io/vfs.rs | 10 +++++++++- core/io/windows.rs | 11 +++++++++-- 7 files changed, 45 insertions(+), 5 deletions(-) diff --git a/core/io/generic.rs b/core/io/generic.rs index e1c7eb1f1..b17a0ea5a 100644 --- a/core/io/generic.rs +++ b/core/io/generic.rs @@ -3,6 +3,7 @@ use std::cell::RefCell; use std::io::{Read, Seek, Write}; use std::sync::Arc; use tracing::{debug, trace}; +use super::MemoryIO; pub struct GenericIO {} @@ -26,6 +27,7 @@ impl IO for GenericIO { .open(path)?; Ok(Arc::new(GenericFile { file: RefCell::new(file), + memory_io: Arc::new(MemoryIO::new()), })) } @@ -48,10 +50,15 @@ impl Clock for GenericIO { micros: now.timestamp_subsec_micros(), } } + + fn get_memory_io(&self) -> Option> { + Some(self.memory_io.clone()) + } } pub struct GenericFile { file: RefCell, + memory_io: Arc, } unsafe impl Send for GenericFile {} diff --git a/core/io/io_uring.rs b/core/io/io_uring.rs index 77d574639..7d73461a0 100644 --- a/core/io/io_uring.rs +++ b/core/io/io_uring.rs @@ -1,5 +1,5 @@ use super::{common, Completion, File, OpenFlags, WriteCompletion, IO}; -use crate::{LimboError, Result}; +use crate::{LimboError, MemoryIO, Result}; use rustix::fs::{self, FlockOperation, OFlags}; use rustix::io_uring::iovec; use std::cell::RefCell; @@ -35,6 +35,7 @@ impl fmt::Display for UringIOError { pub struct UringIO { inner: Rc>, + memory_io: Arc, } unsafe impl Send for UringIO {} @@ -78,6 +79,7 @@ impl UringIO { debug!("Using IO backend 'io-uring'"); Ok(Self { inner: Rc::new(RefCell::new(inner)), + memory_io: Arc::new(MemoryIO::new()), }) } } @@ -207,6 +209,10 @@ impl Clock for UringIO { micros: now.timestamp_subsec_micros(), } } + + fn get_memory_io(&self) -> Option> { + Some(self.memory_io.clone()) + } } pub struct UringFile { diff --git a/core/io/memory.rs b/core/io/memory.rs index 92a61bba7..d573c443a 100644 --- a/core/io/memory.rs +++ b/core/io/memory.rs @@ -58,6 +58,10 @@ impl IO for MemoryIO { getrandom::getrandom(&mut buf).unwrap(); i64::from_ne_bytes(buf) } + + fn get_memory_io(&self) -> Option> { + None + } } pub struct MemoryFile { diff --git a/core/io/mod.rs b/core/io/mod.rs index 1cda42380..36c39e013 100644 --- a/core/io/mod.rs +++ b/core/io/mod.rs @@ -40,6 +40,8 @@ pub trait IO: Clock + Send + Sync { fn run_once(&self) -> Result<()>; fn generate_random_number(&self) -> i64; + + fn get_memory_io(&self) -> Option>; } pub type Complete = dyn Fn(Arc>); diff --git a/core/io/unix.rs b/core/io/unix.rs index 32054e2d5..9c8ac6fed 100644 --- a/core/io/unix.rs +++ b/core/io/unix.rs @@ -2,7 +2,7 @@ use crate::error::LimboError; use crate::io::common; use crate::Result; -use super::{Completion, File, OpenFlags, IO}; +use super::{Completion, File, MemoryIO, OpenFlags, IO}; use polling::{Event, Events, Poller}; use rustix::{ fd::{AsFd, AsRawFd}, @@ -167,6 +167,7 @@ pub struct UnixIO { poller: PollHandler, events: EventsHandler, callbacks: OwnedCallbacks, + memory_io: Arc, } unsafe impl Send for UnixIO {} @@ -180,6 +181,7 @@ impl UnixIO { poller: PollHandler::new(), events: EventsHandler::new(), callbacks: OwnedCallbacks::new(), + memory_io: Arc::new(MemoryIO::new()), }) } } @@ -258,6 +260,10 @@ impl IO for UnixIO { getrandom::getrandom(&mut buf).unwrap(); i64::from_ne_bytes(buf) } + + fn get_memory_io(&self) -> Option> { + Some(self.memory_io.clone()) + } } enum CompletionCallback { diff --git a/core/io/vfs.rs b/core/io/vfs.rs index 4d9a6d6e2..ede08c7cf 100644 --- a/core/io/vfs.rs +++ b/core/io/vfs.rs @@ -1,4 +1,4 @@ -use super::{Buffer, Completion, File, OpenFlags, IO}; +use super::{Buffer, Completion, File, MemoryIO, OpenFlags, IO}; use crate::ext::VfsMod; use crate::io::clock::{Clock, Instant}; use crate::{LimboError, Result}; @@ -50,6 +50,10 @@ impl IO for VfsMod { let vfs = unsafe { &*self.ctx }; unsafe { (vfs.gen_random_number)() } } + + fn get_memory_io(&self) -> Option> { + Some(Arc::new(MemoryIO::new())) + } } impl VfsMod { @@ -65,6 +69,10 @@ impl VfsMod { cstr.to_string_lossy().into_owned() } } + + fn get_memory_io(&self) -> Option> { + None + } } impl File for VfsFileImpl { diff --git a/core/io/windows.rs b/core/io/windows.rs index 2887ea308..7c3c2e015 100644 --- a/core/io/windows.rs +++ b/core/io/windows.rs @@ -3,8 +3,10 @@ use std::cell::RefCell; use std::io::{Read, Seek, Write}; use std::sync::Arc; use tracing::{debug, trace}; - -pub struct WindowsIO {} +use super::MemoryIO; +pub struct WindowsIO { + memory_io: Arc, +} impl WindowsIO { pub fn new() -> Result { @@ -26,6 +28,7 @@ impl IO for WindowsIO { .open(path)?; Ok(Arc::new(WindowsFile { file: RefCell::new(file), + memory_io: Arc::new(MemoryIO::new()), })) } @@ -48,6 +51,10 @@ impl Clock for WindowsIO { micros: now.timestamp_subsec_micros(), } } + + fn get_memory_io(&self) -> Option> { + Some(self.memory_io.clone()) + } } pub struct WindowsFile { From e5144bb6a9cba73764fc5ba35ad69fcec990db84 Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Thu, 3 Apr 2025 15:47:19 -0300 Subject: [PATCH 206/425] core/storage: Create FileMemoryStorage This is basically a copy of DatabaseStorage but outside the fs compilation flag, this way, we can access MemoryIO regardless the storage medium. --- core/storage/database.rs | 49 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/core/storage/database.rs b/core/storage/database.rs index f23d2d3ee..33ca2ac18 100644 --- a/core/storage/database.rs +++ b/core/storage/database.rs @@ -70,3 +70,52 @@ impl DatabaseFile { Self { file } } } + +pub struct FileMemoryStorage { + file: Arc, +} + +unsafe impl Send for FileMemoryStorage {} +unsafe impl Sync for FileMemoryStorage {} + +impl DatabaseStorage for FileMemoryStorage { + fn read_page(&self, page_idx: usize, c: Completion) -> Result<()> { + let r = match c { + Completion::Read(ref r) => r, + _ => unreachable!(), + }; + let size = r.buf().len(); + assert!(page_idx > 0); + if !(512..=65536).contains(&size) || size & (size - 1) != 0 { + return Err(LimboError::NotADB); + } + let pos = (page_idx - 1) * size; + self.file.pread(pos, c)?; + Ok(()) + } + + fn write_page( + &self, + page_idx: usize, + buffer: Arc>, + c: Completion, + ) -> Result<()> { + let buffer_size = buffer.borrow().len(); + assert!(buffer_size >= 512); + assert!(buffer_size <= 65536); + assert_eq!(buffer_size & (buffer_size - 1), 0); + let pos = (page_idx - 1) * buffer_size; + self.file.pwrite(pos, buffer, c)?; + Ok(()) + } + + fn sync(&self, c: Completion) -> Result<()> { + self.file.sync(c) + } +} + +impl FileMemoryStorage { + pub fn new(file: Arc) -> Self { + Self { file } + } +} From 66e12e1c2dd493dd2785cd80fcb1230fd299a0a5 Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Thu, 3 Apr 2025 15:56:51 -0300 Subject: [PATCH 207/425] core/vdbe: Create OpenEphemeral bytecode "Open a new cursor P1 to a transient table. The cursor is always opened read/write even if the main database is read-only. The ephemeral table is deleted automatically when the cursor is closed. If the cursor P1 is already opened on an ephemeral table, the table is cleared (all content is erased)." There is still some work to do, but this is a basic setup --- core/storage/pager.rs | 57 ++++++++++++++------------- core/vdbe/execute.rs | 90 ++++++++++++++++++++++++++++++++++++++++++- core/vdbe/explain.rs | 16 ++++++++ core/vdbe/insn.rs | 56 +++------------------------ 4 files changed, 138 insertions(+), 81 deletions(-) diff --git a/core/storage/pager.rs b/core/storage/pager.rs index e89b659bf..af574053b 100644 --- a/core/storage/pager.rs +++ b/core/storage/pager.rs @@ -258,23 +258,23 @@ impl Pager { } pub fn end_tx(&self) -> Result { - if let Some(wal) = &self.wal { - let checkpoint_status = self.cacheflush()?; - return match checkpoint_status { - CheckpointStatus::IO => Ok(checkpoint_status), - CheckpointStatus::Done(_) => { - wal.borrow().end_write_tx()?; - wal.borrow().end_read_tx()?; - Ok(checkpoint_status) - } + if let Some(wal) = &self.wal { + let checkpoint_status = self.cacheflush()?; + return match checkpoint_status { + CheckpointStatus::IO => Ok(checkpoint_status), + CheckpointStatus::Done(_) => { + wal.borrow().end_write_tx()?; + wal.borrow().end_read_tx()?; + Ok(checkpoint_status) + } + }; } - } - Ok(CheckpointStatus::Done(CheckpointResult::default())) + Ok(CheckpointStatus::Done(CheckpointResult::default())) } pub fn end_read_tx(&self) -> Result<()> { - if let Some(wal) = &self.wal { + if let Some(wal) = &self.wal { wal.borrow().end_read_tx()?; } Ok(()) @@ -297,19 +297,18 @@ impl Pager { page.set_locked(); if let Some(wal) = &self.wal { - if let Some(frame_id) = wal.borrow().find_frame(page_idx as u64)? { - wal - .borrow() - .read_frame(frame_id, page.clone(), self.buffer_pool.clone())?; - { - page.set_uptodate(); + if let Some(frame_id) = wal.borrow().find_frame(page_idx as u64)? { + wal.borrow() + .read_frame(frame_id, page.clone(), self.buffer_pool.clone())?; + { + page.set_uptodate(); + } + // TODO(pere) ensure page is inserted, we should probably first insert to page cache + // and if successful, read frame or page + page_cache.insert(page_key, page.clone()); + return Ok(page); } - // TODO(pere) ensure page is inserted, we should probably first insert to page cache - // and if successful, read frame or page - page_cache.insert(page_key, page.clone()); - return Ok(page); } - } sqlite3_ondisk::begin_read_page( self.db_file.clone(), self.buffer_pool.clone(), @@ -327,7 +326,7 @@ impl Pager { trace!("load_page(page_idx = {})", id); let mut page_cache = self.page_cache.write(); page.set_locked(); - if let Some(wal) = &self.wal { + if let Some(wal) = &self.wal { let page_key = PageCacheKey::new(id, Some(wal.borrow().get_max_frame())); if let Some(frame_id) = wal.borrow().find_frame(id as u64)? { wal.borrow() @@ -340,8 +339,8 @@ impl Pager { page_cache.insert(page_key, page.clone()); } return Ok(()); + } } - } sqlite3_ondisk::begin_read_page( self.db_file.clone(), self.buffer_pool.clone(), @@ -350,7 +349,7 @@ impl Pager { )?; Ok(()) -} + } /// Writes the database header. pub fn write_database_header(&self, header: &DatabaseHeader) { @@ -379,7 +378,7 @@ impl Pager { let db_size = self.db_header.lock().database_size; for page_id in self.dirty_pages.borrow().iter() { let mut cache = self.page_cache.write(); - if let Some(wal) = &self.wal { + if let Some(wal) = &self.wal { let page_key = PageCacheKey::new(*page_id, Some(wal.borrow().get_max_frame())); let page = cache.get(&page_key).expect("we somehow added a page to dirty list but we didn't mark it as dirty, causing cache to drop it."); @@ -412,7 +411,7 @@ impl Pager { let wal = self.wal.clone().ok_or(LimboError::InternalError( "SyncWal was called without a existing wal".to_string(), ))?; - match wal.borrow_mut().sync() { + match wal.borrow_mut().sync() { Ok(CheckpointStatus::IO) => return Ok(CheckpointStatus::IO), Ok(CheckpointStatus::Done(res)) => checkpoint_result = res, Err(e) => return Err(e), @@ -628,7 +627,7 @@ impl Pager { page.set_dirty(); self.add_dirty(page.get().id); let mut cache = self.page_cache.write(); - let max_frame = match &self.wal { + let max_frame = match &self.wal { Some(wal) => wal.borrow().get_max_frame(), None => 0, }; diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index ecc97d088..fb23141d0 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -1,4 +1,6 @@ #![allow(unused_variables)] +use crate::storage::database::FileMemoryStorage; +use crate::storage::page_cache::DumbLruPageCache; use crate::{ error::{LimboError, SQLITE_CONSTRAINT, SQLITE_CONSTRAINT_PRIMARYKEY}, ext::ExtValue, @@ -10,7 +12,7 @@ use crate::{ printf::exec_printf, }, }; -use std::{borrow::BorrowMut, rc::Rc}; +use std::{borrow::BorrowMut, rc::Rc, sync::Arc}; use crate::{pseudo::PseudoCursor, result::LimboResult}; @@ -36,12 +38,13 @@ use crate::{ vector::{vector32, vector64, vector_distance_cos, vector_extract}, }; -use crate::{info, MvCursor, RefValue, Row, StepResult, TransactionState}; +use crate::{info, BufferPool, MvCursor, OpenFlags, RefValue, Row, StepResult, TransactionState}; use super::{ insn::{Cookie, RegisterOrLiteral}, HaltState, }; +use parking_lot::RwLock; use rand::thread_rng; use super::{ @@ -4504,6 +4507,89 @@ pub fn op_noop( Ok(InsnFunctionStepResult::Step) } +pub fn op_open_ephemeral( + program: &Program, + state: &mut ProgramState, + insn: &Insn, + pager: &Rc, + mv_store: Option<&Rc>, +) -> Result { + let Insn::OpenEphemeral { + cursor_id, + is_btree, + } = insn + else { + unreachable!("unexpected Insn {:?}", insn) + }; + + let conn = program.connection.upgrade().unwrap(); + // Only memory and vfs IOs returns None, so cloning is safe + let io = match conn.pager.io.get_memory_io() { + Some(io) => io, + None => conn.pager.io.clone(), + }; + + let file = io.open_file("", OpenFlags::Create, true)?; + let page_io = Arc::new(FileMemoryStorage::new(file)); + + let db_header = Pager::begin_open(page_io.clone())?; + let buffer_pool = Rc::new(BufferPool::new(512)); + let page_cache = Arc::new(RwLock::new(DumbLruPageCache::new(10))); + + let pager = Rc::new(Pager::finish_open( + db_header, + page_io, + None, + io, + page_cache, + buffer_pool, + )?); + + let root_page = pager.btree_create(*is_btree as usize); + + let (_, cursor_type) = program.cursor_ref.get(*cursor_id).unwrap(); + let mv_cursor = match state.mv_tx_id { + Some(tx_id) => { + let table_id = root_page as u64; + let mv_store = mv_store.as_ref().unwrap().clone(); + let mv_cursor = Rc::new(RefCell::new( + MvCursor::new(mv_store.clone(), tx_id, table_id).unwrap(), + )); + Some(mv_cursor) + } + None => None, + }; + let cursor = BTreeCursor::new(mv_cursor, pager, root_page as usize); + let mut cursors: std::cell::RefMut<'_, Vec>> = state.cursors.borrow_mut(); + // Table content is erased if the cursor already exists + match cursor_type { + CursorType::BTreeTable(_) => { + cursors + .get_mut(*cursor_id) + .unwrap() + .replace(Cursor::new_btree(cursor)); + } + CursorType::BTreeIndex(_) => { + cursors + .get_mut(*cursor_id) + .unwrap() + .replace(Cursor::new_btree(cursor)); + } + CursorType::Pseudo(_) => { + panic!("OpenEphemeral on pseudo cursor"); + } + CursorType::Sorter => { + panic!("OpenEphemeral on sorter cursor"); + } + CursorType::VirtualTable(_) => { + panic!("OpenEphemeral on virtual table cursor, use Insn::VOpenAsync instead"); + } + } + + state.pc += 1; + Ok(InsnFunctionStepResult::Step) +} + fn exec_lower(reg: &OwnedValue) -> Option { match reg { OwnedValue::Text(t) => Some(OwnedValue::build_text(&t.as_str().to_lowercase())), diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index 79b0f3ded..40854da24 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -1389,6 +1389,22 @@ pub fn insn_to_str( 0, format!("auto_commit={}, rollback={}", auto_commit, rollback), ), + Insn::OpenEphemeral { + cursor_id, + is_btree, + } => ( + "OpenEphemeral", + *cursor_id as i32, + *is_btree as i32, + 0, + OwnedValue::build_text(""), + 0, + format!( + "cursor={} is_btree={}", + cursor_id, + if *is_btree { "true" } else { "false" } + ), + ), }; format!( "{:<4} {:<17} {:<4} {:<4} {:<4} {:<13} {:<2} {}", diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index acd8166c3..83bac3701 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -812,40 +812,33 @@ pub enum Insn { dest: usize, cookie: Cookie, }, + /// Open a new cursor P1 to a transient table. + OpenEphemeral { + cursor_id: usize, + is_btree: bool, + }, } impl Insn { pub fn to_function(&self) -> InsnFunction { match self { Insn::Init { .. } => execute::op_init, - Insn::Null { .. } => execute::op_null, - Insn::NullRow { .. } => execute::op_null_row, - Insn::Add { .. } => execute::op_add, - Insn::Subtract { .. } => execute::op_subtract, - Insn::Multiply { .. } => execute::op_multiply, - Insn::Divide { .. } => execute::op_divide, - Insn::Compare { .. } => execute::op_compare, Insn::BitAnd { .. } => execute::op_bit_and, - Insn::BitOr { .. } => execute::op_bit_or, - Insn::BitNot { .. } => execute::op_bit_not, - Insn::Checkpoint { .. } => execute::op_checkpoint, Insn::Remainder { .. } => execute::op_remainder, - Insn::Jump { .. } => execute::op_jump, Insn::Move { .. } => execute::op_move, Insn::IfPos { .. } => execute::op_if_pos, Insn::NotNull { .. } => execute::op_not_null, - Insn::Eq { .. } => execute::op_eq, Insn::Ne { .. } => execute::op_ne, Insn::Lt { .. } => execute::op_lt, @@ -856,11 +849,8 @@ impl Insn { Insn::IfNot { .. } => execute::op_if_not, Insn::OpenReadAsync { .. } => execute::op_open_read_async, Insn::OpenReadAwait => execute::op_open_read_await, - Insn::VOpenAsync { .. } => execute::op_vopen_async, - Insn::VOpenAwait => execute::op_vopen_await, - Insn::VCreate { .. } => execute::op_vcreate, Insn::VFilter { .. } => execute::op_vfilter, Insn::VColumn { .. } => execute::op_vcolumn, @@ -868,43 +858,29 @@ impl Insn { Insn::VNext { .. } => execute::op_vnext, Insn::OpenPseudo { .. } => execute::op_open_pseudo, Insn::RewindAsync { .. } => execute::op_rewind_async, - Insn::RewindAwait { .. } => execute::op_rewind_await, Insn::LastAsync { .. } => execute::op_last_async, - Insn::LastAwait { .. } => execute::op_last_await, Insn::Column { .. } => execute::op_column, Insn::TypeCheck { .. } => execute::op_type_check, Insn::MakeRecord { .. } => execute::op_make_record, Insn::ResultRow { .. } => execute::op_result_row, - Insn::NextAsync { .. } => execute::op_next_async, - Insn::NextAwait { .. } => execute::op_next_await, Insn::PrevAsync { .. } => execute::op_prev_async, - Insn::PrevAwait { .. } => execute::op_prev_await, Insn::Halt { .. } => execute::op_halt, Insn::Transaction { .. } => execute::op_transaction, - Insn::AutoCommit { .. } => execute::op_auto_commit, Insn::Goto { .. } => execute::op_goto, - Insn::Gosub { .. } => execute::op_gosub, Insn::Return { .. } => execute::op_return, - Insn::Integer { .. } => execute::op_integer, - Insn::Real { .. } => execute::op_real, - Insn::RealAffinity { .. } => execute::op_real_affinity, - Insn::String8 { .. } => execute::op_string8, - Insn::Blob { .. } => execute::op_blob, - Insn::RowId { .. } => execute::op_row_id, - Insn::SeekRowid { .. } => execute::op_seek_rowid, Insn::DeferredSeek { .. } => execute::op_deferred_seek, Insn::SeekGE { .. } => execute::op_seek, @@ -917,10 +893,8 @@ impl Insn { Insn::IdxLE { .. } => execute::op_idx_le, Insn::IdxLT { .. } => execute::op_idx_lt, Insn::DecrJumpZero { .. } => execute::op_decr_jump_zero, - Insn::AggStep { .. } => execute::op_agg_step, Insn::AggFinal { .. } => execute::op_agg_final, - Insn::SorterOpen { .. } => execute::op_sorter_open, Insn::SorterInsert { .. } => execute::op_sorter_insert, Insn::SorterSort { .. } => execute::op_sorter_sort, @@ -929,57 +903,39 @@ impl Insn { Insn::Function { .. } => execute::op_function, Insn::InitCoroutine { .. } => execute::op_init_coroutine, Insn::EndCoroutine { .. } => execute::op_end_coroutine, - Insn::Yield { .. } => execute::op_yield, Insn::InsertAsync { .. } => execute::op_insert_async, Insn::InsertAwait { .. } => execute::op_insert_await, Insn::IdxInsertAsync { .. } => execute::op_idx_insert_async, Insn::IdxInsertAwait { .. } => execute::op_idx_insert_await, Insn::DeleteAsync { .. } => execute::op_delete_async, - Insn::DeleteAwait { .. } => execute::op_delete_await, - Insn::NewRowid { .. } => execute::op_new_rowid, Insn::MustBeInt { .. } => execute::op_must_be_int, - Insn::SoftNull { .. } => execute::op_soft_null, - Insn::NotExists { .. } => execute::op_not_exists, Insn::OffsetLimit { .. } => execute::op_offset_limit, Insn::OpenWriteAsync { .. } => execute::op_open_write_async, Insn::OpenWriteAwait { .. } => execute::op_open_write_await, - Insn::Copy { .. } => execute::op_copy, Insn::CreateBtree { .. } => execute::op_create_btree, - Insn::Destroy { .. } => execute::op_destroy, Insn::DropTable { .. } => execute::op_drop_table, Insn::Close { .. } => execute::op_close, - Insn::IsNull { .. } => execute::op_is_null, - Insn::ParseSchema { .. } => execute::op_parse_schema, - Insn::ShiftRight { .. } => execute::op_shift_right, - Insn::ShiftLeft { .. } => execute::op_shift_left, - Insn::Variable { .. } => execute::op_variable, - Insn::ZeroOrNull { .. } => execute::op_zero_or_null, - Insn::Not { .. } => execute::op_not, - Insn::Concat { .. } => execute::op_concat, - Insn::And { .. } => execute::op_and, - Insn::Or { .. } => execute::op_or, - Insn::Noop => execute::op_noop, Insn::PageCount { .. } => execute::op_page_count, - Insn::ReadCookie { .. } => execute::op_read_cookie, + Insn::OpenEphemeral { .. } => execute::op_open_ephemeral, } } } From 79f8b83cbe17457125cf05d953bb445b7dd04db3 Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Thu, 3 Apr 2025 16:25:13 -0300 Subject: [PATCH 208/425] Fix dumb clippy errors --- bindings/javascript/src/lib.rs | 4 ++++ bindings/wasm/lib.rs | 4 ++++ core/io/generic.rs | 8 ++++++-- core/io/windows.rs | 5 +++-- core/storage/database.rs | 1 - core/vdbe/execute.rs | 2 +- simulator/runner/io.rs | 4 ++++ 7 files changed, 22 insertions(+), 6 deletions(-) diff --git a/bindings/javascript/src/lib.rs b/bindings/javascript/src/lib.rs index a9c0d72a5..eaa2fc3a6 100644 --- a/bindings/javascript/src/lib.rs +++ b/bindings/javascript/src/lib.rs @@ -176,4 +176,8 @@ impl limbo_core::IO for IO { fn generate_random_number(&self) -> i64 { todo!(); } + + fn get_memory_io(&self) -> Option> { + todo!() + } } diff --git a/bindings/wasm/lib.rs b/bindings/wasm/lib.rs index 91680dc96..6b4173c90 100644 --- a/bindings/wasm/lib.rs +++ b/bindings/wasm/lib.rs @@ -305,6 +305,10 @@ impl limbo_core::IO for PlatformIO { let random_f64 = Math_random(); (random_f64 * i64::MAX as f64) as i64 } + + fn get_memory_io(&self) -> Option> { + None // TODO: Make sure if memory isn't needed here + } } #[wasm_bindgen] diff --git a/core/io/generic.rs b/core/io/generic.rs index b17a0ea5a..03d5fdd3d 100644 --- a/core/io/generic.rs +++ b/core/io/generic.rs @@ -5,12 +5,16 @@ use std::sync::Arc; use tracing::{debug, trace}; use super::MemoryIO; -pub struct GenericIO {} +pub struct GenericIO { + memory_io: Arc, +} impl GenericIO { pub fn new() -> Result { debug!("Using IO backend 'generic'"); - Ok(Self {}) + Ok(Self { + memory_io: Arc::new(MemoryIO::new()), + }) } } diff --git a/core/io/windows.rs b/core/io/windows.rs index 7c3c2e015..af36119d0 100644 --- a/core/io/windows.rs +++ b/core/io/windows.rs @@ -11,7 +11,9 @@ pub struct WindowsIO { impl WindowsIO { pub fn new() -> Result { debug!("Using IO backend 'syscall'"); - Ok(Self {}) + Ok(Self { + memory_io: Arc::new(MemoryIO::new()), + }) } } @@ -28,7 +30,6 @@ impl IO for WindowsIO { .open(path)?; Ok(Arc::new(WindowsFile { file: RefCell::new(file), - memory_io: Arc::new(MemoryIO::new()), })) } diff --git a/core/storage/database.rs b/core/storage/database.rs index 33ca2ac18..cf8b57d8e 100644 --- a/core/storage/database.rs +++ b/core/storage/database.rs @@ -1,4 +1,3 @@ -#[cfg(feature = "fs")] use crate::error::LimboError; use crate::{io::Completion, Buffer, Result}; use std::{cell::RefCell, sync::Arc}; diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index fb23141d0..3ad0a91f8 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -4551,7 +4551,7 @@ pub fn op_open_ephemeral( let mv_cursor = match state.mv_tx_id { Some(tx_id) => { let table_id = root_page as u64; - let mv_store = mv_store.as_ref().unwrap().clone(); + let mv_store = mv_store.unwrap().clone(); let mv_cursor = Rc::new(RefCell::new( MvCursor::new(mv_store.clone(), tx_id, table_id).unwrap(), )); diff --git a/simulator/runner/io.rs b/simulator/runner/io.rs index d1c280b4e..0a7ff3b3a 100644 --- a/simulator/runner/io.rs +++ b/simulator/runner/io.rs @@ -97,4 +97,8 @@ impl IO for SimulatorIO { fn generate_random_number(&self) -> i64 { self.rng.borrow_mut().next_u64() as i64 } + + fn get_memory_io(&self) -> Option> { + todo!() + } } From d9bf38350773789fe01df952e27ba4cadd2a56a1 Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Fri, 4 Apr 2025 00:35:36 -0300 Subject: [PATCH 209/425] core/io: Untie MemoryIO's lifetime of the IO layer --- bindings/javascript/src/lib.rs | 4 ++-- bindings/wasm/lib.rs | 4 ++-- core/io/generic.rs | 8 +++----- core/io/io_uring.rs | 6 ++---- core/io/memory.rs | 4 ++-- core/io/mod.rs | 2 +- core/io/unix.rs | 6 ++---- core/io/vfs.rs | 4 ++-- core/io/windows.rs | 6 ++---- core/vdbe/execute.rs | 10 ++++------ simulator/runner/io.rs | 2 +- 11 files changed, 23 insertions(+), 33 deletions(-) diff --git a/bindings/javascript/src/lib.rs b/bindings/javascript/src/lib.rs index eaa2fc3a6..2e0054358 100644 --- a/bindings/javascript/src/lib.rs +++ b/bindings/javascript/src/lib.rs @@ -177,7 +177,7 @@ impl limbo_core::IO for IO { todo!(); } - fn get_memory_io(&self) -> Option> { - todo!() + fn get_memory_io(&self) -> Arc { + Arc::new(limbo_core::MemoryIO::new()) } } diff --git a/bindings/wasm/lib.rs b/bindings/wasm/lib.rs index 6b4173c90..a704706be 100644 --- a/bindings/wasm/lib.rs +++ b/bindings/wasm/lib.rs @@ -306,8 +306,8 @@ impl limbo_core::IO for PlatformIO { (random_f64 * i64::MAX as f64) as i64 } - fn get_memory_io(&self) -> Option> { - None // TODO: Make sure if memory isn't needed here + fn get_memory_io(&self) -> Arc { + Arc::new(limbo_core::MemoryIO::new()) } } diff --git a/core/io/generic.rs b/core/io/generic.rs index 03d5fdd3d..d67a93dd7 100644 --- a/core/io/generic.rs +++ b/core/io/generic.rs @@ -6,14 +6,12 @@ use tracing::{debug, trace}; use super::MemoryIO; pub struct GenericIO { - memory_io: Arc, } impl GenericIO { pub fn new() -> Result { debug!("Using IO backend 'generic'"); Ok(Self { - memory_io: Arc::new(MemoryIO::new()), }) } } @@ -55,9 +53,9 @@ impl Clock for GenericIO { } } - fn get_memory_io(&self) -> Option> { - Some(self.memory_io.clone()) - } + fn get_memory_io(&self) -> Arc { + Arc::new(MemoryIO::new()) + } } pub struct GenericFile { diff --git a/core/io/io_uring.rs b/core/io/io_uring.rs index 7d73461a0..6e2fc1e7e 100644 --- a/core/io/io_uring.rs +++ b/core/io/io_uring.rs @@ -35,7 +35,6 @@ impl fmt::Display for UringIOError { pub struct UringIO { inner: Rc>, - memory_io: Arc, } unsafe impl Send for UringIO {} @@ -79,7 +78,6 @@ impl UringIO { debug!("Using IO backend 'io-uring'"); Ok(Self { inner: Rc::new(RefCell::new(inner)), - memory_io: Arc::new(MemoryIO::new()), }) } } @@ -210,8 +208,8 @@ impl Clock for UringIO { } } - fn get_memory_io(&self) -> Option> { - Some(self.memory_io.clone()) + fn get_memory_io(&self) -> Arc { + Arc::new(MemoryIO::new()) } } diff --git a/core/io/memory.rs b/core/io/memory.rs index d573c443a..9cc56a5e3 100644 --- a/core/io/memory.rs +++ b/core/io/memory.rs @@ -59,8 +59,8 @@ impl IO for MemoryIO { i64::from_ne_bytes(buf) } - fn get_memory_io(&self) -> Option> { - None + fn get_memory_io(&self) -> Arc { + Arc::new(MemoryIO::new()) } } diff --git a/core/io/mod.rs b/core/io/mod.rs index 36c39e013..6f161d114 100644 --- a/core/io/mod.rs +++ b/core/io/mod.rs @@ -41,7 +41,7 @@ pub trait IO: Clock + Send + Sync { fn generate_random_number(&self) -> i64; - fn get_memory_io(&self) -> Option>; + fn get_memory_io(&self) -> Arc; } pub type Complete = dyn Fn(Arc>); diff --git a/core/io/unix.rs b/core/io/unix.rs index 9c8ac6fed..c232ed3ad 100644 --- a/core/io/unix.rs +++ b/core/io/unix.rs @@ -167,7 +167,6 @@ pub struct UnixIO { poller: PollHandler, events: EventsHandler, callbacks: OwnedCallbacks, - memory_io: Arc, } unsafe impl Send for UnixIO {} @@ -181,7 +180,6 @@ impl UnixIO { poller: PollHandler::new(), events: EventsHandler::new(), callbacks: OwnedCallbacks::new(), - memory_io: Arc::new(MemoryIO::new()), }) } } @@ -261,8 +259,8 @@ impl IO for UnixIO { i64::from_ne_bytes(buf) } - fn get_memory_io(&self) -> Option> { - Some(self.memory_io.clone()) + fn get_memory_io(&self) -> Arc { + Arc::new(MemoryIO::new()) } } diff --git a/core/io/vfs.rs b/core/io/vfs.rs index ede08c7cf..6af47b176 100644 --- a/core/io/vfs.rs +++ b/core/io/vfs.rs @@ -70,8 +70,8 @@ impl VfsMod { } } - fn get_memory_io(&self) -> Option> { - None + fn get_memory_io(&self) -> Arc { + Arc::new(MemoryIO::new()) } } diff --git a/core/io/windows.rs b/core/io/windows.rs index af36119d0..f970ef02d 100644 --- a/core/io/windows.rs +++ b/core/io/windows.rs @@ -5,14 +5,12 @@ use std::sync::Arc; use tracing::{debug, trace}; use super::MemoryIO; pub struct WindowsIO { - memory_io: Arc, } impl WindowsIO { pub fn new() -> Result { debug!("Using IO backend 'syscall'"); Ok(Self { - memory_io: Arc::new(MemoryIO::new()), }) } } @@ -53,8 +51,8 @@ impl Clock for WindowsIO { } } - fn get_memory_io(&self) -> Option> { - Some(self.memory_io.clone()) + fn get_memory_io(&self) -> Arc { + Arc::new(MemoryIO::new()) } } diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 3ad0a91f8..4f40f0d21 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -38,7 +38,9 @@ use crate::{ vector::{vector32, vector64, vector_distance_cos, vector_extract}, }; -use crate::{info, BufferPool, MvCursor, OpenFlags, RefValue, Row, StepResult, TransactionState}; +use crate::{ + info, BufferPool, MvCursor, OpenFlags, RefValue, Row, StepResult, TransactionState, IO, +}; use super::{ insn::{Cookie, RegisterOrLiteral}, @@ -4523,11 +4525,7 @@ pub fn op_open_ephemeral( }; let conn = program.connection.upgrade().unwrap(); - // Only memory and vfs IOs returns None, so cloning is safe - let io = match conn.pager.io.get_memory_io() { - Some(io) => io, - None => conn.pager.io.clone(), - }; + let io = conn.pager.io.get_memory_io(); let file = io.open_file("", OpenFlags::Create, true)?; let page_io = Arc::new(FileMemoryStorage::new(file)); diff --git a/simulator/runner/io.rs b/simulator/runner/io.rs index 0a7ff3b3a..c775b3f9e 100644 --- a/simulator/runner/io.rs +++ b/simulator/runner/io.rs @@ -98,7 +98,7 @@ impl IO for SimulatorIO { self.rng.borrow_mut().next_u64() as i64 } - fn get_memory_io(&self) -> Option> { + fn get_memory_io(&self) -> Arc { todo!() } } From 09d83aadf3c0b49d00299b1595fe8d1e7b6adf04 Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Mon, 7 Apr 2025 23:05:01 -0300 Subject: [PATCH 210/425] Fix dumb conflict errors --- core/io/generic.rs | 16 +++++++--------- core/io/io_uring.rs | 10 +++++----- core/io/vfs.rs | 8 ++------ core/io/windows.rs | 16 +++++++--------- 4 files changed, 21 insertions(+), 29 deletions(-) diff --git a/core/io/generic.rs b/core/io/generic.rs index d67a93dd7..fd59ece88 100644 --- a/core/io/generic.rs +++ b/core/io/generic.rs @@ -1,18 +1,16 @@ +use super::MemoryIO; use crate::{Clock, Completion, File, Instant, LimboError, OpenFlags, Result, IO}; use std::cell::RefCell; use std::io::{Read, Seek, Write}; use std::sync::Arc; use tracing::{debug, trace}; -use super::MemoryIO; -pub struct GenericIO { -} +pub struct GenericIO {} impl GenericIO { pub fn new() -> Result { debug!("Using IO backend 'generic'"); - Ok(Self { - }) + Ok(Self {}) } } @@ -42,6 +40,10 @@ impl IO for GenericIO { getrandom::getrandom(&mut buf).unwrap(); i64::from_ne_bytes(buf) } + + fn get_memory_io(&self) -> Arc { + Arc::new(MemoryIO::new()) + } } impl Clock for GenericIO { @@ -52,10 +54,6 @@ impl Clock for GenericIO { micros: now.timestamp_subsec_micros(), } } - - fn get_memory_io(&self) -> Arc { - Arc::new(MemoryIO::new()) - } } pub struct GenericFile { diff --git a/core/io/io_uring.rs b/core/io/io_uring.rs index 6e2fc1e7e..b4b21aca8 100644 --- a/core/io/io_uring.rs +++ b/core/io/io_uring.rs @@ -1,4 +1,5 @@ use super::{common, Completion, File, OpenFlags, WriteCompletion, IO}; +use crate::io::clock::{Clock, Instant}; use crate::{LimboError, MemoryIO, Result}; use rustix::fs::{self, FlockOperation, OFlags}; use rustix::io_uring::iovec; @@ -11,7 +12,6 @@ use std::rc::Rc; use std::sync::Arc; use thiserror::Error; use tracing::{debug, trace}; -use crate::io::clock::{Clock, Instant}; const MAX_IOVECS: u32 = 128; const SQPOLL_IDLE: u32 = 1000; @@ -197,6 +197,10 @@ impl IO for UringIO { getrandom::getrandom(&mut buf).unwrap(); i64::from_ne_bytes(buf) } + + fn get_memory_io(&self) -> Arc { + Arc::new(MemoryIO::new()) + } } impl Clock for UringIO { @@ -207,10 +211,6 @@ impl Clock for UringIO { micros: now.timestamp_subsec_micros(), } } - - fn get_memory_io(&self) -> Arc { - Arc::new(MemoryIO::new()) - } } pub struct UringFile { diff --git a/core/io/vfs.rs b/core/io/vfs.rs index 6af47b176..95b4055d0 100644 --- a/core/io/vfs.rs +++ b/core/io/vfs.rs @@ -51,8 +51,8 @@ impl IO for VfsMod { unsafe { (vfs.gen_random_number)() } } - fn get_memory_io(&self) -> Option> { - Some(Arc::new(MemoryIO::new())) + fn get_memory_io(&self) -> Arc { + Arc::new(MemoryIO::new()) } } @@ -69,10 +69,6 @@ impl VfsMod { cstr.to_string_lossy().into_owned() } } - - fn get_memory_io(&self) -> Arc { - Arc::new(MemoryIO::new()) - } } impl File for VfsFileImpl { diff --git a/core/io/windows.rs b/core/io/windows.rs index f970ef02d..6c46d1973 100644 --- a/core/io/windows.rs +++ b/core/io/windows.rs @@ -1,17 +1,15 @@ +use super::MemoryIO; use crate::{Clock, Completion, File, Instant, LimboError, OpenFlags, Result, IO}; use std::cell::RefCell; use std::io::{Read, Seek, Write}; use std::sync::Arc; use tracing::{debug, trace}; -use super::MemoryIO; -pub struct WindowsIO { -} +pub struct WindowsIO {} impl WindowsIO { pub fn new() -> Result { debug!("Using IO backend 'syscall'"); - Ok(Self { - }) + Ok(Self {}) } } @@ -40,6 +38,10 @@ impl IO for WindowsIO { getrandom::getrandom(&mut buf).unwrap(); i64::from_ne_bytes(buf) } + + fn get_memory_io(&self) -> Arc { + Arc::new(MemoryIO::new()) + } } impl Clock for WindowsIO { @@ -50,10 +52,6 @@ impl Clock for WindowsIO { micros: now.timestamp_subsec_micros(), } } - - fn get_memory_io(&self) -> Arc { - Arc::new(MemoryIO::new()) - } } pub struct WindowsFile { From bcac1fe778a862864b7a7ceb833b0eaa88bb16c8 Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Fri, 11 Apr 2025 07:24:42 -0300 Subject: [PATCH 211/425] core/vdbe: Rename page_io to db_file in OpenEphemeral --- core/vdbe/execute.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 4f40f0d21..1ddc8cdfc 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -4528,15 +4528,15 @@ pub fn op_open_ephemeral( let io = conn.pager.io.get_memory_io(); let file = io.open_file("", OpenFlags::Create, true)?; - let page_io = Arc::new(FileMemoryStorage::new(file)); + let db_file = Arc::new(FileMemoryStorage::new(file)); - let db_header = Pager::begin_open(page_io.clone())?; + let db_header = Pager::begin_open(db_file.clone())?; let buffer_pool = Rc::new(BufferPool::new(512)); let page_cache = Arc::new(RwLock::new(DumbLruPageCache::new(10))); let pager = Rc::new(Pager::finish_open( db_header, - page_io, + db_file, None, io, page_cache, From 61c324cca558972d7db5b165e2c95941fe7475e9 Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Fri, 11 Apr 2025 07:29:51 -0300 Subject: [PATCH 212/425] core/vdbe: Add missing work to get cursor and transient table usable --- core/vdbe/execute.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 1ddc8cdfc..cac1344ed 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -39,7 +39,8 @@ use crate::{ }; use crate::{ - info, BufferPool, MvCursor, OpenFlags, RefValue, Row, StepResult, TransactionState, IO, + info, maybe_init_database_file, BufferPool, MvCursor, OpenFlags, RefValue, Row, StepResult, + TransactionState, IO, }; use super::{ @@ -4528,10 +4529,11 @@ pub fn op_open_ephemeral( let io = conn.pager.io.get_memory_io(); let file = io.open_file("", OpenFlags::Create, true)?; + maybe_init_database_file(&file, &(io.clone() as Arc))?; let db_file = Arc::new(FileMemoryStorage::new(file)); let db_header = Pager::begin_open(db_file.clone())?; - let buffer_pool = Rc::new(BufferPool::new(512)); + let buffer_pool = Rc::new(BufferPool::new(db_header.lock().page_size as usize)); let page_cache = Arc::new(RwLock::new(DumbLruPageCache::new(10))); let pager = Rc::new(Pager::finish_open( @@ -4557,8 +4559,11 @@ pub fn op_open_ephemeral( } None => None, }; - let cursor = BTreeCursor::new(mv_cursor, pager, root_page as usize); + let mut cursor = BTreeCursor::new(mv_cursor, pager, root_page as usize); + cursor.rewind()?; // Will never return io + let mut cursors: std::cell::RefMut<'_, Vec>> = state.cursors.borrow_mut(); + // Table content is erased if the cursor already exists match cursor_type { CursorType::BTreeTable(_) => { From 035e6dcef4dac473e77a7ac725d565aeac2eaacc Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Fri, 11 Apr 2025 07:32:31 -0300 Subject: [PATCH 213/425] core/vdbe: Fix logic error during btree creation I do thing we should change this 1,2 flag to 0,1 or just an enum, to be more rustacean. The current state can be very misleading --- core/vdbe/execute.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index cac1344ed..d1db2e5ee 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -4545,7 +4545,8 @@ pub fn op_open_ephemeral( buffer_pool, )?); - let root_page = pager.btree_create(*is_btree as usize); + let flag = if *is_btree { 1 } else { 0 }; + let root_page = pager.btree_create(flag); let (_, cursor_type) = program.cursor_ref.get(*cursor_id).unwrap(); let mv_cursor = match state.mv_tx_id { From 135330b7361ac9cbabbc6200a15c917c49e446e5 Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Sat, 12 Apr 2025 20:56:08 -0300 Subject: [PATCH 214/425] core/pager: Fix page handling issue due change in wal type --- core/storage/pager.rs | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/core/storage/pager.rs b/core/storage/pager.rs index af574053b..9d7affa95 100644 --- a/core/storage/pager.rs +++ b/core/storage/pager.rs @@ -326,8 +326,12 @@ impl Pager { trace!("load_page(page_idx = {})", id); let mut page_cache = self.page_cache.write(); page.set_locked(); + let max_frame = match &self.wal { + Some(wal) => wal.borrow().get_max_frame(), + None => 0, + }; + let page_key = PageCacheKey::new(id, Some(max_frame)); if let Some(wal) = &self.wal { - let page_key = PageCacheKey::new(id, Some(wal.borrow().get_max_frame())); if let Some(frame_id) = wal.borrow().find_frame(id as u64)? { wal.borrow() .read_frame(frame_id, page.clone(), self.buffer_pool.clone())?; @@ -341,6 +345,11 @@ impl Pager { return Ok(()); } } + + // TODO(pere) ensure page is inserted + if !page_cache.contains_key(&page_key) { + page_cache.insert(page_key, page.clone()); + } sqlite3_ondisk::begin_read_page( self.db_file.clone(), self.buffer_pool.clone(), @@ -376,11 +385,14 @@ impl Pager { match state { FlushState::Start => { let db_size = self.db_header.lock().database_size; + let max_frame = match &self.wal { + Some(wal) => wal.borrow().get_max_frame(), + None => 0, + }; for page_id in self.dirty_pages.borrow().iter() { let mut cache = self.page_cache.write(); + let page_key = PageCacheKey::new(*page_id, Some(max_frame)); if let Some(wal) = &self.wal { - let page_key = - PageCacheKey::new(*page_id, Some(wal.borrow().get_max_frame())); let page = cache.get(&page_key).expect("we somehow added a page to dirty list but we didn't mark it as dirty, causing cache to drop it."); let page_type = page.get().contents.as_ref().unwrap().maybe_page_type(); trace!("cacheflush(page={}, page_type={:?}", page_id, page_type); @@ -389,11 +401,11 @@ impl Pager { db_size, self.flush_info.borrow().in_flight_writes.clone(), )?; - // This page is no longer valid. - // For example: - // We took page with key (page_num, max_frame) -- this page is no longer valid for that max_frame so it must be invalidated. - cache.delete(page_key); } + // This page is no longer valid. + // For example: + // We took page with key (page_num, max_frame) -- this page is no longer valid for that max_frame so it must be invalidated. + cache.delete(page_key); } self.dirty_pages.borrow_mut().clear(); self.flush_info.borrow_mut().state = FlushState::WaitAppendFrames; From 4c315e1bb6b5b6e2011c6a689727b3ccfa40baa9 Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Sun, 13 Apr 2025 11:13:25 -0300 Subject: [PATCH 215/425] core/vdbe: Update OpenEphemeral to use CreateBtreeFlags --- core/vdbe/execute.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index d1db2e5ee..b496ac199 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -1,6 +1,7 @@ #![allow(unused_variables)] use crate::storage::database::FileMemoryStorage; use crate::storage::page_cache::DumbLruPageCache; +use crate::storage::pager::CreateBTreeFlags; use crate::{ error::{LimboError, SQLITE_CONSTRAINT, SQLITE_CONSTRAINT_PRIMARYKEY}, ext::ExtValue, @@ -4545,7 +4546,12 @@ pub fn op_open_ephemeral( buffer_pool, )?); - let flag = if *is_btree { 1 } else { 0 }; + let flag = if *is_btree { + &CreateBTreeFlags::new_table() + } else { + &CreateBTreeFlags::new_index() + }; + let root_page = pager.btree_create(flag); let (_, cursor_type) = program.cursor_ref.get(*cursor_id).unwrap(); From fd79ad2644fd04dceb9e4f76d7f7aeabd3c3103b Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Sun, 13 Apr 2025 11:15:01 -0300 Subject: [PATCH 216/425] core/vdbe: Change `is_btree` to `is_table` in OpenEphemeral --- core/vdbe/execute.rs | 4 ++-- core/vdbe/explain.rs | 8 ++++---- core/vdbe/insn.rs | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index b496ac199..0df9afcc4 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -4520,7 +4520,7 @@ pub fn op_open_ephemeral( ) -> Result { let Insn::OpenEphemeral { cursor_id, - is_btree, + is_table, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -4546,7 +4546,7 @@ pub fn op_open_ephemeral( buffer_pool, )?); - let flag = if *is_btree { + let flag = if *is_table { &CreateBTreeFlags::new_table() } else { &CreateBTreeFlags::new_index() diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index 40854da24..d4a766d1d 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -1391,18 +1391,18 @@ pub fn insn_to_str( ), Insn::OpenEphemeral { cursor_id, - is_btree, + is_table, } => ( "OpenEphemeral", *cursor_id as i32, - *is_btree as i32, + *is_table as i32, 0, OwnedValue::build_text(""), 0, format!( - "cursor={} is_btree={}", + "cursor={} is_table={}", cursor_id, - if *is_btree { "true" } else { "false" } + if *is_table { "true" } else { "false" } ), ), }; diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 83bac3701..e12293a71 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -815,7 +815,7 @@ pub enum Insn { /// Open a new cursor P1 to a transient table. OpenEphemeral { cursor_id: usize, - is_btree: bool, + is_table: bool, }, } From 499d9b8d4597e8479bf813abf2857bb5466b0c53 Mon Sep 17 00:00:00 2001 From: Anton Harniakou Date: Sun, 13 Apr 2025 21:50:48 +0300 Subject: [PATCH 217/425] Add integration tests for hex numbers --- testing/math.test | 17 +++++++++++++++-- testing/select.test | 4 ++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/testing/math.test b/testing/math.test index afa0e29d1..c5e85186a 100755 --- a/testing/math.test +++ b/testing/math.test @@ -8,18 +8,31 @@ do_execsql_test fuzz-test-failure { SELECT mod(atanh(tanh(-1.0)), ((1.0))) / ((asinh(-1.0) / 2.0 * 1.0) + pow(0.0, 1.0) + 0.5); } {-16.8596516555675} -do_execsql_test add-int { +do_execsql_test add-int-1 { SELECT 10 + 1 } {11} +do_execsql_test add-int-2 { + SELECT 0xA + 0xFF +} {265} + +do_execsql_test add-int-3 { + SELECT 0xA + 1 +} {11} + + do_execsql_test add-float { SELECT 10.1 + 0.3 } {10.4} -do_execsql_test add-int-float { +do_execsql_test add-int-float-1 { SELECT 10 + 0.1 } {10.1} +do_execsql_test add-int-float-2 { + SELECT 0xa + 0.1 +} {10.1} + do_execsql_test add-agg-int-agg-int { SELECT sum(1) + sum(2) } {3} diff --git a/testing/select.test b/testing/select.test index 02236159a..e9d119f51 100755 --- a/testing/select.test +++ b/testing/select.test @@ -11,6 +11,10 @@ do_execsql_test select-const-2 { SELECT 2 } {2} +do_execsql_test select-const-3 { + SELECT 0xDEAF +} {57007} + do_execsql_test select-true { SELECT true } {1} From 000d8756ec02e7024f7e6d13a4e56a9e23ad2171 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Tue, 8 Apr 2025 00:33:11 -0300 Subject: [PATCH 218/425] Implment VDestroy opcode --- core/lib.rs | 2 ++ core/translate/schema.rs | 34 +++++++++++++++++++++++++++------ core/vdbe/execute.rs | 24 +++++++++++++++++++++++ core/vdbe/explain.rs | 9 +++++++++ core/vdbe/insn.rs | 11 +++++++++++ testing/cli_tests/extensions.py | 27 ++++++++++++++++++++++++++ 6 files changed, 101 insertions(+), 6 deletions(-) diff --git a/core/lib.rs b/core/lib.rs index 1ac907af7..839562f5b 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -634,6 +634,7 @@ pub struct VirtualTable { args: Option>, pub implementation: Rc, columns: Vec, + kind: VTabKind, } impl VirtualTable { @@ -675,6 +676,7 @@ impl VirtualTable { implementation: module.implementation.clone(), columns, args: exprs, + kind, }); return Ok(vtab); } diff --git a/core/translate/schema.rs b/core/translate/schema.rs index 449d1e0e8..c4d570b5c 100644 --- a/core/translate/schema.rs +++ b/core/translate/schema.rs @@ -3,6 +3,7 @@ use std::fmt::Display; use crate::ast; use crate::schema::Schema; use crate::storage::pager::CreateBTreeFlags; +use crate::schema::Table; use crate::translate::ProgramBuilder; use crate::translate::ProgramBuilderOpts; use crate::translate::QueryMode; @@ -543,7 +544,7 @@ pub fn translate_drop_table( approx_num_insns: 30, approx_num_labels: 1, }); - let table = schema.get_btree_table(tbl_name.name.0.as_str()); + let table = schema.get_table(tbl_name.name.0.as_str()); if table.is_none() { if if_exists { let init_label = program.emit_init(); @@ -558,6 +559,7 @@ pub fn translate_drop_table( } bail_parse_error!("No such table: {}", tbl_name.name.0.as_str()); } + let table = table.unwrap(); // safe since we just checked for None let init_label = program.emit_init(); @@ -663,11 +665,31 @@ pub fn translate_drop_table( } // 3. Destroy the table structure - program.emit_insn(Insn::Destroy { - root: table.root_page, - former_root_reg: 0, // no autovacuum (https://www.sqlite.org/opcode.html#Destroy) - is_temp: 0, - }); + match table.as_ref() { + Table::BTree(table) => { + program.emit_insn(Insn::Destroy { + root: table.root_page, + former_root_reg: 0, // no autovacuum (https://www.sqlite.org/opcode.html#Destroy) + is_temp: 0, + }); + } + Table::Virtual(vtab) => { + // From what I see, TableValuedFunction is not stored in the schema as a table. + // But this line here below is a safeguard in case this behavior changes in the future + // And mirrors what SQLite does. + if matches!(vtab.kind, limbo_ext::VTabKind::TableValuedFunction) { + return Err(crate::LimboError::ParseError(format!( + "table {} may not be dropped", + vtab.name + ))); + } + program.emit_insn(Insn::VDestroy { + table_name: vtab.name.clone(), + db: 0, // TODO change this for multiple databases + }); + } + Table::Pseudo(..) => unimplemented!(), + }; let r6 = program.alloc_register(); let r7 = program.alloc_register(); diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 0df9afcc4..40559035d 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -1134,6 +1134,30 @@ pub fn op_vnext( Ok(InsnFunctionStepResult::Step) } +pub fn op_vdestroy( + program: &Program, + state: &mut ProgramState, + insn: &Insn, + pager: &Rc, + mv_store: Option<&Rc>, +) -> Result { + let Insn::VDestroy { db, table_name } = insn else { + unreachable!("unexpected Insn {:?}", insn) + }; + let Some(conn) = program.connection.upgrade() else { + return Err(crate::LimboError::ExtensionError( + "Failed to upgrade Connection".to_string(), + )); + }; + + { + conn.syms.borrow_mut().vtabs.remove(table_name); + } + + state.pc += 1; + Ok(InsnFunctionStepResult::Step) +} + pub fn op_open_pseudo( program: &Program, state: &mut ProgramState, diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index d4a766d1d..a62ca310b 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -449,6 +449,15 @@ pub fn insn_to_str( 0, "".to_string(), ), + Insn::VDestroy { db, table_name } => ( + "VDestroy", + *db as i32, + 0, + 0, + OwnedValue::build_text(table_name), + 0, + "".to_string(), + ), Insn::OpenPseudo { cursor_id, content_reg, diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index e12293a71..608030195 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -311,6 +311,14 @@ pub enum Insn { pc_if_next: BranchOffset, }, + /// P4 is the name of a virtual table in database P1. Call the xDestroy method of that table. + VDestroy { + /// Name of a virtual table being destroyed + table_name: String, + /// The database within which this virtual table needs to be destroyed (P1). + db: usize, + }, + /// Open a cursor for a pseudo-table that contains a single row. OpenPseudo { cursor_id: CursorID, @@ -856,6 +864,8 @@ impl Insn { Insn::VColumn { .. } => execute::op_vcolumn, Insn::VUpdate { .. } => execute::op_vupdate, Insn::VNext { .. } => execute::op_vnext, + Insn::VDestroy { .. } => execute::op_vdestroy, + Insn::OpenPseudo { .. } => execute::op_open_pseudo, Insn::RewindAsync { .. } => execute::op_rewind_async, Insn::RewindAwait { .. } => execute::op_rewind_await, @@ -920,6 +930,7 @@ impl Insn { Insn::Copy { .. } => execute::op_copy, Insn::CreateBtree { .. } => execute::op_create_btree, Insn::Destroy { .. } => execute::op_destroy, + Insn::DropTable { .. } => execute::op_drop_table, Insn::Close { .. } => execute::op_close, Insn::IsNull { .. } => execute::op_is_null, diff --git a/testing/cli_tests/extensions.py b/testing/cli_tests/extensions.py index 4d289f311..21ef1a7c2 100755 --- a/testing/cli_tests/extensions.py +++ b/testing/cli_tests/extensions.py @@ -526,6 +526,32 @@ def test_vfs(): limbo.quit() +def test_drop_virtual_table(): + ext_path = "target/debug/liblimbo_ext_tests" + limbo = TestLimboShell() + limbo.execute_dot(f".load {ext_path}") + limbo.debug_print( + "create virtual table t using kv_store;", + ) + limbo.run_test_fn(".schema", lambda res: "CREATE VIRTUAL TABLE t" in res) + limbo.run_test_fn( + "insert into t values ('hello', 'world');", + null, + "can insert into kv_store vtable", + ) + limbo.run_test_fn( + "DROP TABLE t;", + null, + "can drop kv_store vtable", + ) + limbo.run_test_fn( + "DROP TABLE t;", + lambda res: "× Parse error: No such table: t" == res, + "should error when drop kv_store vtable", + ) + limbo.quit() + + def test_sqlite_vfs_compat(): sqlite = TestLimboShell( init_commands="", @@ -573,6 +599,7 @@ if __name__ == "__main__": test_vfs() test_sqlite_vfs_compat() test_kv() + test_drop_virtual_table() except Exception as e: print(f"Test FAILED: {e}") cleanup() From c0747e8064fc99442941826dbc879d5e783c6aa4 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Tue, 8 Apr 2025 02:04:43 -0300 Subject: [PATCH 219/425] update COMPAT.md --- COMPAT.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/COMPAT.md b/COMPAT.md index 3d07558c8..5571a3cb2 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -4,6 +4,8 @@ This document describes the compatibility of Limbo with SQLite. ## Table of contents +- [Limbo compatibility with SQLite](#limbo-compatibility-with-sqlite) + - [Table of contents](#table-of-contents) - [Overview](#overview) - [Features](#features) - [Limitations](#limitations) @@ -577,7 +579,7 @@ Modifiers: | VBegin | No | | | VColumn | Yes | | | VCreate | Yes | | -| VDestroy | No | | +| VDestroy | Yes | | | VFilter | Yes | | | VNext | Yes | | | VOpen | Yes |VOpenAsync| From 2181de79deb1c25da13ef103da611aab01e1123b Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Tue, 8 Apr 2025 21:29:31 -0300 Subject: [PATCH 220/425] add destroy function to vtab --- core/lib.rs | 13 +++++++++++++ core/vdbe/execute.rs | 7 ++++++- extensions/core/src/vtabs.rs | 6 ++++++ macros/src/lib.rs | 24 +++++++++++++++++++++++- 4 files changed, 48 insertions(+), 2 deletions(-) diff --git a/core/lib.rs b/core/lib.rs index 839562f5b..353789839 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -754,6 +754,19 @@ impl VirtualTable { _ => Err(LimboError::ExtensionError(rc.to_string())), } } + + pub fn destroy(&self) -> Result<()> { + let implementation = self.implementation.as_ref(); + let rc = unsafe { + (self.implementation.destroy)( + implementation as *const VTabModuleImpl as *const std::ffi::c_void, + ) + }; + match rc { + ResultCode::OK => Ok(()), + _ => Err(LimboError::ExtensionError(rc.to_string())), + } + } } pub(crate) struct SymbolTable { diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 40559035d..f3243d14a 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -1151,7 +1151,12 @@ pub fn op_vdestroy( }; { - conn.syms.borrow_mut().vtabs.remove(table_name); + let Some(vtab) = conn.syms.borrow_mut().vtabs.remove(table_name) else { + return Err(crate::LimboError::InternalError( + "Could not find Virtual Table to Destroy".to_string(), + )); + }; + vtab.destroy()?; } state.pc += 1; diff --git a/extensions/core/src/vtabs.rs b/extensions/core/src/vtabs.rs index b6e70b8bb..83b3dae78 100644 --- a/extensions/core/src/vtabs.rs +++ b/extensions/core/src/vtabs.rs @@ -21,6 +21,7 @@ pub struct VTabModuleImpl { pub eof: VtabFnEof, pub update: VtabFnUpdate, pub rowid: VtabRowIDFn, + pub destroy: VtabFnDestroy, } #[cfg(feature = "core_only")] @@ -60,6 +61,8 @@ pub type VtabFnUpdate = unsafe extern "C" fn( p_out_rowid: *mut i64, ) -> ResultCode; +pub type VtabFnDestroy = unsafe extern "C" fn(vtab: *const c_void) -> ResultCode; + #[repr(C)] #[derive(Clone, Copy, Debug, PartialEq)] pub enum VTabKind { @@ -88,6 +91,9 @@ pub trait VTabModule: 'static { fn delete(&mut self, _rowid: i64) -> Result<(), Self::Error> { Ok(()) } + fn destroy(&mut self) -> Result<(), Self::Error> { + Ok(()) + } } pub trait VTabCursor: Sized { diff --git a/macros/src/lib.rs b/macros/src/lib.rs index 3de2797cb..c03788c7c 100644 --- a/macros/src/lib.rs +++ b/macros/src/lib.rs @@ -404,7 +404,11 @@ pub fn derive_agg_func(input: TokenStream) -> TokenStream { /// /// Delete the row with the provided rowid /// fn delete(&mut self, rowid: i64) -> Result<(), Self::Error> { /// Ok(()) -/// } +/// } +/// /// Destroy the virtual table. Any cleanup logic for when the table is deleted comes heres +/// fn destroy(&mut self) -> Result<(), Self::Error> { +/// Ok(()) +/// } /// /// #[derive(Debug)] /// struct CsvCursor { @@ -450,6 +454,7 @@ pub fn derive_vtab_module(input: TokenStream) -> TokenStream { let eof_fn_name = format_ident!("eof_{}", struct_name); let update_fn_name = format_ident!("update_{}", struct_name); let rowid_fn_name = format_ident!("rowid_{}", struct_name); + let destroy_fn_name = format_ident!("destroy_{}", struct_name); let expanded = quote! { impl #struct_name { @@ -592,6 +597,22 @@ pub fn derive_vtab_module(input: TokenStream) -> TokenStream { <<#struct_name as ::limbo_ext::VTabModule>::VCursor as ::limbo_ext::VTabCursor>::rowid(cursor) } + #[no_mangle] + unsafe extern "C" fn #destroy_fn_name( + vtab: *const ::std::ffi::c_void, + ) -> ::limbo_ext::ResultCode { + if vtab.is_null() { + return ::limbo_ext::ResultCode::Error; + } + + let vtab = &mut *(vtab as *mut #struct_name); + if <#struct_name as VTabModule>::destroy(vtab).is_err() { + return ::limbo_ext::ResultCode::Error; + } + + return ::limbo_ext::ResultCode::OK; + } + #[no_mangle] pub unsafe extern "C" fn #register_fn_name( api: *const ::limbo_ext::ExtensionApi @@ -614,6 +635,7 @@ pub fn derive_vtab_module(input: TokenStream) -> TokenStream { eof: Self::#eof_fn_name, update: Self::#update_fn_name, rowid: Self::#rowid_fn_name, + destroy: Self::#destroy_fn_name, }; (api.register_vtab_module)(api.ctx, name_c, module, <#struct_name as ::limbo_ext::VTabModule>::VTAB_KIND) } From 6249cd67e99b61cbe1a5d6531247364d8415587a Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Sun, 13 Apr 2025 17:10:43 -0300 Subject: [PATCH 221/425] added print statement to test that VDestroy is being called --- core/translate/schema.rs | 2 +- extensions/tests/src/lib.rs | 5 +++++ testing/cli_tests/extensions.py | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/core/translate/schema.rs b/core/translate/schema.rs index c4d570b5c..a5318cee9 100644 --- a/core/translate/schema.rs +++ b/core/translate/schema.rs @@ -2,8 +2,8 @@ use std::fmt::Display; use crate::ast; use crate::schema::Schema; -use crate::storage::pager::CreateBTreeFlags; use crate::schema::Table; +use crate::storage::pager::CreateBTreeFlags; use crate::translate::ProgramBuilder; use crate::translate::ProgramBuilderOpts; use crate::translate::QueryMode; diff --git a/extensions/tests/src/lib.rs b/extensions/tests/src/lib.rs index df8e8bca0..beff17004 100644 --- a/extensions/tests/src/lib.rs +++ b/extensions/tests/src/lib.rs @@ -122,6 +122,11 @@ impl VTabModule for KVStoreVTab { Err("cursor out of range".into()) } } + + fn destroy(&mut self) -> Result<(), Self::Error> { + println!("VDestroy called"); + Ok(()) + } } fn hash_key(key: &str) -> i64 { diff --git a/testing/cli_tests/extensions.py b/testing/cli_tests/extensions.py index 21ef1a7c2..6d252c543 100755 --- a/testing/cli_tests/extensions.py +++ b/testing/cli_tests/extensions.py @@ -541,7 +541,7 @@ def test_drop_virtual_table(): ) limbo.run_test_fn( "DROP TABLE t;", - null, + lambda res: "VDestroy called" in res, "can drop kv_store vtable", ) limbo.run_test_fn( From af456513d17d16b73c4b9056778726a58f5b4d60 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Sun, 13 Apr 2025 22:38:43 -0300 Subject: [PATCH 222/425] Fix incompatibility AND Expression --- core/vdbe/execute.rs | 12 ++++++---- testing/all.test | 1 + testing/boolean.test | 56 ++++++++++++++++++++++++++++++++++++++++++++ testing/math.test | 33 -------------------------- 4 files changed, 65 insertions(+), 37 deletions(-) create mode 100755 testing/boolean.test diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 0df9afcc4..716ec77e9 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -5872,14 +5872,18 @@ pub fn exec_and(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { | (OwnedValue::Integer(0), _) | (_, OwnedValue::Float(0.0)) | (OwnedValue::Float(0.0), _) => OwnedValue::Integer(0), - (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_and( - &cast_text_to_numeric(lhs.as_str()), - &cast_text_to_numeric(rhs.as_str()), + &cast_text_to_real(lhs.as_str()), + &cast_text_to_real(rhs.as_str()), ), (OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => { - exec_and(&cast_text_to_numeric(text.as_str()), other) + exec_and(&cast_text_to_real(text.as_str()), other) } + (OwnedValue::Blob(blob), other) | (other, OwnedValue::Blob(blob)) => { + let text = String::from_utf8_lossy(blob); + exec_and(&cast_text_to_real(&text), other) + } + (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, _ => OwnedValue::Integer(1), } } diff --git a/testing/all.test b/testing/all.test index dc12c331a..3bdc1f98b 100755 --- a/testing/all.test +++ b/testing/all.test @@ -29,3 +29,4 @@ source $testdir/transactions.test source $testdir/update.test source $testdir/drop_table.test source $testdir/default_value.test +source $testdir/boolean.test diff --git a/testing/boolean.test b/testing/boolean.test new file mode 100755 index 000000000..51d6633e4 --- /dev/null +++ b/testing/boolean.test @@ -0,0 +1,56 @@ +#!/usr/bin/env tclsh + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +foreach {testname lhs ans} { + int-1 1 0 + int-2 2 0 + int-3 0 1 + float-1 1.0 0 + float-2 2.0 0 + float-3 0.0 1 + text 'a' 1 + text-int-1 '0' 1 + text-int-2 '1' 0 + text-float-1 '1.0' 0 + text-float-2 '0.0' 1 + text-float-edge '12-23.0' 0 + null NULL {} + empty-blob x'' 1 + cast-blob "CAST ('af' AS BLOB)" 1 + blob x'0000' 1 + blob-2 x'0001' 1 +} { + do_execsql_test boolean-not-$testname "SELECT not $lhs" $::ans +} + +foreach {testname lhs rhs ans} { + + blob-blob x'' x'' 0 + 1-blob 1 x'' 0 + 0-blob 0 x'' 0 + 0-1 0 1 0 + 1-1 1 1 1 + int-int 20 1000 1 + int-float 20 1.0 1 + int-0.0 20 0.0 0 + 0.0-0.0 0.0 0.0 0 + text 'a' 1 0 + text-int-1 '0' 1 0 + text-int-2 '1' 0 0 + text-float-1 '1.0' 0 0 + text-float-2 '0.0' 1 0 + text-float-3 '1.0' 1 1 + text-float-edge '12-23.0' 0 0 + null-null NULL NULL "" + 1-null 1 NULL "" + 1.0-null 1.0 NULL "" + blob-null x'' NULL 0 + blob2-null x'0001' NULL 0 + 0-null 0 NULL 0 + 0.0-null 0.0 NULL 0 + '0.0'-null '0.0' NULL 0 +} { + do_execsql_test boolean-and-$testname "SELECT $lhs AND $rhs" $::ans +} \ No newline at end of file diff --git a/testing/math.test b/testing/math.test index 7eb15b2bc..384cab4f1 100755 --- a/testing/math.test +++ b/testing/math.test @@ -643,39 +643,6 @@ do_execsql_test bitwise-not-blob-2 { SELECT ~ x'0001'; } {-1} -do_execsql_test boolean-not-empty-blob { - SELECT NOT x'' -} {1} - -do_execsql_test boolean-not-cast-blob { - SELECT NOT CAST ('af' AS BLOB); -} {1} - -do_execsql_test boolean-not-blob { - SELECT NOT x'0000'; -} {1} - -do_execsql_test boolean-not-blob-2 { - SELECT NOT x'0001'; -} {1} - -foreach {testname lhs ans} { - int-1 1 0 - int-2 2 0 - int-3 0 1 - float-1 1.0 0 - float-2 2.0 0 - float-3 0.0 1 - text 'a' 1 - text-int-1 '0' 1 - text-int-2 '1' 0 - text-float-1 '1.0' 0 - text-float-2 '0.0' 1 - text-float-edge '12-23.0' 0 - null NULL {} -} { - do_execsql_test boolean-not "SELECT not $lhs" $::ans -} do_execsql_test pi { SELECT pi() From f3a07c86a016eae6237761a9b42b7a8567f29bd3 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Sun, 13 Apr 2025 23:36:09 -0300 Subject: [PATCH 223/425] Add Ansi Colors to tcl test runner --- testing/tester.tcl | 46 +++++++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/testing/tester.tcl b/testing/tester.tcl index 41117ed37..d739b2a39 100644 --- a/testing/tester.tcl +++ b/testing/tester.tcl @@ -2,6 +2,14 @@ set sqlite_exec [expr {[info exists env(SQLITE_EXEC)] ? $env(SQLITE_EXEC) : "sql set test_dbs [list "testing/testing.db" "testing/testing_norowidalias.db"] set test_small_dbs [list "testing/testing_small.db" ] +proc error_put {sql} { + puts [format "\033\[1;31mTest FAILED:\033\[0m %s" $sql ] +} + +proc test_put {msg db test_name} { + puts [format "\033\[1;34m(%s)\033\[0m %s $msg: \033\[1;32m%s\033\[0m" $db [string repeat " " [expr {40 - [string length $db]}]] $test_name] +} + proc evaluate_sql {sqlite_exec db_name sql} { set command [list $sqlite_exec $db_name $sql] set output [exec {*}$command] @@ -11,7 +19,7 @@ proc evaluate_sql {sqlite_exec db_name sql} { proc run_test {sqlite_exec db_name sql expected_output} { set actual_output [evaluate_sql $sqlite_exec $db_name $sql] if {$actual_output ne $expected_output} { - puts "Test FAILED: '$sql'" + error_put $sql puts "returned '$actual_output'" puts "expected '$expected_output'" exit 1 @@ -20,7 +28,7 @@ proc run_test {sqlite_exec db_name sql expected_output} { proc do_execsql_test {test_name sql_statements expected_outputs} { foreach db $::test_dbs { - puts [format "(%s) %s Running test: %s" $db [string repeat " " [expr {40 - [string length $db]}]] $test_name] + test_put "Running test" $db $test_name set combined_sql [string trim $sql_statements] set combined_expected_output [join $expected_outputs "\n"] run_test $::sqlite_exec $db $combined_sql $combined_expected_output @@ -29,7 +37,7 @@ proc do_execsql_test {test_name sql_statements expected_outputs} { proc do_execsql_test_small {test_name sql_statements expected_outputs} { foreach db $::test_small_dbs { - puts [format "(%s) %s Running test: %s" $db [string repeat " " [expr {40 - [string length $db]}]] $test_name] + test_put "Running test" $db $test_name set combined_sql [string trim $sql_statements] set combined_expected_output [join $expected_outputs "\n"] run_test $::sqlite_exec $db $combined_sql $combined_expected_output @@ -39,13 +47,13 @@ proc do_execsql_test_small {test_name sql_statements expected_outputs} { proc do_execsql_test_regex {test_name sql_statements expected_regex} { foreach db $::test_dbs { - puts [format "(%s) %s Running test: %s" $db [string repeat " " [expr {40 - [string length $db]}]] $test_name] + test_put "Running test" $db $test_name set combined_sql [string trim $sql_statements] set actual_output [evaluate_sql $::sqlite_exec $db $combined_sql] # Validate the actual output against the regular expression if {![regexp $expected_regex $actual_output]} { - puts "Test FAILED: '$sql_statements'" + error_put $sql_statements puts "returned '$actual_output'" puts "expected to match regex '$expected_regex'" exit 1 @@ -55,7 +63,7 @@ proc do_execsql_test_regex {test_name sql_statements expected_regex} { proc do_execsql_test_on_specific_db {db_name test_name sql_statements expected_outputs} { - puts [format "(%s) %s Running test: %s" $db_name [string repeat " " [expr {40 - [string length $db_name]}]] $test_name] + test_put "Running test" $db_name $test_name set combined_sql [string trim $sql_statements] set combined_expected_output [join $expected_outputs "\n"] run_test $::sqlite_exec $db_name $combined_sql $combined_expected_output @@ -69,14 +77,14 @@ proc within_tolerance {actual expected tolerance} { # FIXME: When Limbo's floating point presentation matches to SQLite, this could/should be removed proc do_execsql_test_tolerance {test_name sql_statements expected_outputs tolerance} { foreach db $::test_dbs { - puts [format "(%s) %s Running test: %s" $db [string repeat " " [expr {40 - [string length $db]}]] $test_name] + test_put "Running test" $db $test_name set combined_sql [string trim $sql_statements] set actual_output [evaluate_sql $::sqlite_exec $db $combined_sql] set actual_values [split $actual_output "\n"] set expected_values [split $expected_outputs "\n"] if {[llength $actual_values] != [llength $expected_values]} { - puts "Test FAILED: '$sql_statements'" + error_put $sql_statements puts "returned '$actual_output'" puts "expected '$expected_outputs'" exit 1 @@ -89,7 +97,7 @@ proc do_execsql_test_tolerance {test_name sql_statements expected_outputs tolera if {![within_tolerance $actual $expected $tolerance]} { set lower_bound [expr {$expected - $tolerance}] set upper_bound [expr {$expected + $tolerance}] - puts "Test FAILED: '$sql_statements'" + error_put $sql_statements puts "returned '$actual'" puts "expected a value within the range \[$lower_bound, $upper_bound\]" exit 1 @@ -108,12 +116,12 @@ proc run_test_expecting_any_error {sqlite_exec db_name sql} { # Check if the output contains error indicators (×, error, syntax error, etc.) if {[regexp {(error|ERROR|Error|×|syntax error|failed)} $result]} { # Error found in output - test passed - puts "Test PASSED: Got expected error" + puts "\033\[1;32mTest PASSED:\033\[0m Got expected error" return 1 } # No error indicators in output - puts "Test FAILED: '$sql'" + error_put $sql puts "Expected an error but command output didn't indicate any error: '$result'" exit 1 } @@ -128,7 +136,7 @@ proc run_test_expecting_error {sqlite_exec db_name sql expected_error_pattern} { # Check if the output contains error indicators first if {![regexp {(error|ERROR|Error|×|syntax error|failed)} $result]} { - puts "Test FAILED: '$sql'" + error_put $sql puts "Expected an error matching '$expected_error_pattern'" puts "But command output didn't indicate any error: '$result'" exit 1 @@ -136,7 +144,7 @@ proc run_test_expecting_error {sqlite_exec db_name sql expected_error_pattern} { # Now check if the error message matches the expected pattern if {![regexp $expected_error_pattern $result]} { - puts "Test FAILED: '$sql'" + error_put $sql puts "Error occurred but didn't match expected pattern." puts "Output was: '$result'" puts "Expected pattern: '$expected_error_pattern'" @@ -157,7 +165,7 @@ proc run_test_expecting_error_content {sqlite_exec db_name sql expected_error_te # Check if the output contains error indicators first if {![regexp {(error|ERROR|Error|×|syntax error|failed)} $result]} { - puts "Test FAILED: '$sql'" + error_put $sql puts "Expected an error with text: '$expected_error_text'" puts "But command output didn't indicate any error: '$result'" exit 1 @@ -174,7 +182,7 @@ proc run_test_expecting_error_content {sqlite_exec db_name sql expected_error_te # Check if the normalized strings contain the same text if {[string first $normalized_expected $normalized_actual] == -1} { - puts "Test FAILED: '$sql'" + error_put $sql puts "Error occurred but content didn't match." puts "Output was: '$result'" puts "Expected text: '$expected_error_text'" @@ -187,7 +195,7 @@ proc run_test_expecting_error_content {sqlite_exec db_name sql expected_error_te proc do_execsql_test_error {test_name sql_statements expected_error_pattern} { foreach db $::test_dbs { - puts [format "(%s) %s Running error test: %s" $db [string repeat " " [expr {40 - [string length $db]}]] $test_name] + test_put "Running error test" $db $test_name set combined_sql [string trim $sql_statements] run_test_expecting_error $::sqlite_exec $db $combined_sql $expected_error_pattern } @@ -195,7 +203,7 @@ proc do_execsql_test_error {test_name sql_statements expected_error_pattern} { proc do_execsql_test_error_content {test_name sql_statements expected_error_text} { foreach db $::test_dbs { - puts [format "(%s) %s Running error content test: %s" $db [string repeat " " [expr {40 - [string length $db]}]] $test_name] + test_put "Running error content test" $db $test_name set combined_sql [string trim $sql_statements] run_test_expecting_error_content $::sqlite_exec $db $combined_sql $expected_error_text } @@ -203,14 +211,14 @@ proc do_execsql_test_error_content {test_name sql_statements expected_error_text proc do_execsql_test_any_error {test_name sql_statements} { foreach db $::test_dbs { - puts [format "(%s) %s Running any-error test: %s" $db [string repeat " " [expr {40 - [string length $db]}]] $test_name] + test_put "Running any-error test" $db $test_name set combined_sql [string trim $sql_statements] run_test_expecting_any_error $::sqlite_exec $db $combined_sql } } proc do_execsql_test_in_memory_any_error {test_name sql_statements} { - puts [format "(in-memory) %s Running any-error test: %s" [string repeat " " 31] $test_name] + test_put "Running any-error test" in-memory $test_name # Use ":memory:" special filename for in-memory database set db_name ":memory:" From b3c25939803606536bd3c721a67010b04a7e5a65 Mon Sep 17 00:00:00 2001 From: TcMits Date: Mon, 14 Apr 2025 12:49:30 +0700 Subject: [PATCH 224/425] btree balance-shallower --- core/storage/btree.rs | 113 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 110 insertions(+), 3 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index bae2e85e2..457115201 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1859,6 +1859,7 @@ impl BTreeCursor { // Now do real balancing let parent_page = self.stack.top(); let parent_contents = parent_page.get_contents(); + let parent_is_root = !self.stack.has_parent(); assert!( parent_contents.overflow_cells.is_empty(), @@ -2442,14 +2443,54 @@ impl BTreeCursor { &parent_page, balance_info, parent_contents, - pages_to_balance_new, + &pages_to_balance_new, page_type, leaf_data, cells_debug, sibling_count_new, rightmost_pointer, ); - // TODO: balance root + + // TODO: vacuum support + if parent_is_root && parent_contents.cell_count() == 0 { + // From SQLite: + // The root page of the b-tree now contains no cells. The only sibling + // page is the right-child of the parent. Copy the contents of the + // child page into the parent, decreasing the overall height of the + // b-tree structure by one. This is described as the "balance-shallower" + assert!(sibling_count_new == 1); + let parent_offset = if parent_page.get().id == 1 { + DATABASE_HEADER_SIZE + } else { + 0 + }; + let child_page = &pages_to_balance_new[0]; + let child_contents = child_page.get_contents(); + let child_top = child_contents.cell_content_area() as usize; + let parent_buf = parent_contents.as_ptr(); + let child_buf = child_contents.as_ptr(); + let content_size = self.usable_space() - child_top; + + // Copy cell contents + parent_buf[child_top..child_top + content_size] + .copy_from_slice(&child_buf[child_top..child_top + content_size]); + + // Copy header and pointer + // NOTE: don't use .cell_pointer_array_offset_and_size() because of different + // header size + let header_and_pointer_size = + child_contents.header_size() + child_contents.cell_pointer_array_size(); + parent_buf[parent_offset..parent_offset + header_and_pointer_size] + .copy_from_slice( + &child_buf[child_contents.offset + ..child_contents.offset + header_and_pointer_size], + ); + + self.stack.set_cell_index(0); // reset cell index, top is already parent + self.pager + .free_page(Some(child_page.clone()), child_page.get().id)?; + } + // We have to free pages that are not used anymore for i in sibling_count_new..balance_info.sibling_count { let page = &balance_info.pages_to_balance[i]; @@ -2521,7 +2562,7 @@ impl BTreeCursor { parent_page: &PageRef, balance_info: &mut BalanceInfo, parent_contents: &mut PageContent, - pages_to_balance_new: Vec>, + pages_to_balance_new: &Vec>, page_type: PageType, leaf_data: bool, mut cells_debug: Vec>, @@ -5973,6 +6014,13 @@ mod tests { } } + // this test will create a tree like this: + // -page:2, ptr(right):4 + // +cells:node[rowid:14, ptr(<=):3] + // -page:3, ptr(right):0 + // +cells:leaf[rowid:11, len(payload):137, overflow:false] + // -page:4, ptr(right):0 + // +cells: #[test] pub fn test_drop_page_in_balancing_issue_1203() { let db = get_database(); @@ -6007,6 +6055,65 @@ mod tests { } } + // this test will create a tree like this: + // -page:2, ptr(right):3 + // +cells: + // -page:3, ptr(right):0 + // +cells: + #[test] + pub fn test_drop_page_in_balancing_issue_1203_2() { + let db = get_database(); + let conn = db.connect().unwrap(); + + let queries = vec![ +"CREATE TABLE super_becky (engrossing_berger BLOB,plucky_chai BLOB,mirthful_asbo REAL,bountiful_jon REAL,competitive_petit REAL,engrossing_rexroth REAL);", +"INSERT INTO super_becky VALUES (X'636861726D696E675F6261796572', X'70726F647563746976655F70617269737369', 6847793643.408741, 7330361375.924953, -6586051582.891455, -6921021872.711397), (X'657863656C6C656E745F6F7267616E697A696E67', X'6C757374726F75735F73696E64696B616C6973', 9905774996.48619, 570325205.2246342, 5852346465.53047, 728566012.1968269), (X'7570626561745F73656174746C65', X'62726176655F6661756E', -2202725836.424899, 5424554426.388281, 2625872085.917082, -6657362503.808359), (X'676C6F77696E675F6D617877656C6C', X'7761726D686561727465645F726F77616E', -9610936969.793116, 4886606277.093559, -3414536174.7928505, 6898267795.317778), (X'64796E616D69635F616D616E', X'7374656C6C61725F7374657073', 3918935692.153696, 151068445.947237, 4582065669.356403, -3312668220.4789667), (X'64696C6967656E745F64757272757469', X'7175616C69666965645F6D726163686E696B', 5527271629.262201, 6068855126.044355, 289904657.13490677, 2975774820.0877323), (X'6469706C6F6D617469635F726F76657363696F', X'616C6C7572696E675F626F7474696369', 9844748192.66119, -6180276383.305578, -4137330511.025565, -478754566.79494476), (X'776F6E64726F75735F6173686572', X'6465766F7465645F6176657273696F6E', 2310211470.114773, -6129166761.628184, -2865371645.3145514, 7542428654.8645935), (X'617070726F61636861626C655F6B686F6C61', X'6C757374726F75735F6C696E6E656C6C', -4993113161.458349, 7356727284.362968, -3228937035.568404, -1779334005.5067253);", +"INSERT INTO super_becky VALUES (X'74686F7567687466756C5F726576696577', X'617765736F6D655F63726F73736579', 9401977997.012783, 8428201961.643898, 2822821303.052643, 4555601220.718847), (X'73706563746163756C61725F6B686179617469', X'616772656561626C655F61646F6E696465', 7414547022.041355, 365016845.73330307, 50682963.055828094, -9258802584.962656), (X'6C6F79616C5F656D6572736F6E', X'676C6F77696E675F626174616C6F', -5522070106.765736, 2712536599.6384163, 6631385631.869345, 1242757880.7583427), (X'68617264776F726B696E675F6F6B656C6C79', X'666162756C6F75735F66696C697373', 6682622809.9778805, 4233900041.917185, 9017477903.795563, -756846353.6034946), (X'68617264776F726B696E675F626C61756D616368656E', X'616666656374696F6E6174655F6B6F736D616E', -1146438175.3174362, -7545123696.438596, -6799494012.403366, 5646913977.971333), (X'66616E7461737469635F726F77616E', X'74686F7567687466756C5F7465727269746F72696573', -4414529784.916277, -6209371635.279242, 4491104121.288605, 2590223842.117277);", +"INSERT INTO super_becky VALUES (X'676C697374656E696E675F706F72746572', X'696E7175697369746976655F656D', 2986144164.3676434, 3495899172.5935287, -849280584.9386635, 6869709150.2699375), (X'696D6167696E61746976655F6D65726C696E6F', X'676C6F77696E675F616B74696D6F6E', 8733490615.829357, 6782649864.719433, 6926744218.74107, 1532081022.4379768), (X'6E6963655F726F73736574', X'626C69746865736F6D655F66696C697373', -839304300.0706863, 6155504968.705227, -2951592321.950267, -6254186334.572437), (X'636F6E666964656E745F6C69626574', X'676C696D6D6572696E675F6B6F74616E7969', -5344675223.37533, -8703794729.211002, 3987472096.020382, -7678989974.961197), (X'696D6167696E61746976655F6B61726162756C7574', X'64796E616D69635F6D6367697272', 2028227065.6995697, -7435689525.030833, 7011220815.569796, 5526665697.213846), (X'696E7175697369746976655F636C61726B', X'616666656374696F6E6174655F636C6561766572', 3016598350.546356, -3686782925.383732, 9671422351.958004, 9099319829.078941), (X'63617061626C655F746174616E6B61', X'696E6372656469626C655F6F746F6E6F6D61', 6339989259.432795, -8888997534.102034, 6855868409.475763, -2565348887.290493), (X'676F7267656F75735F6265726E657269', X'65647563617465645F6F6D6F77616C69', 6992467657.527826, -3538089391.748543, -7103111660.146708, 4019283237.3740463), (X'616772656561626C655F63756C74757265', X'73706563746163756C61725F657370616E61', 189387871.06959534, 6211851191.361202, 1786455196.9768047, 7966404387.318119);", +"INSERT INTO super_becky VALUES (X'7068696C6F736F70686963616C5F6C656967686C616973', X'666162756C6F75735F73656D696E61746F7265', 8688321500.141502, -7855144036.024546, -5234949709.573349, -9937638367.366447), (X'617070726F61636861626C655F726F677565', X'676C65616D696E675F6D7574696E79', -5351540099.744092, -3614025150.9013805, -2327775310.276925, 2223379997.077526), (X'676C696D6D6572696E675F63617263686961', X'696D6167696E61746976655F61737379616E6E', 4104832554.8371887, -5531434716.627781, 1652773397.4099865, 3884980522.1830273);", +"DELETE FROM super_becky WHERE (plucky_chai != X'7761726D686561727465645F6877616E67' AND mirthful_asbo != 9537234687.183533 AND bountiful_jon = -3538089391.748543);", +"INSERT INTO super_becky VALUES (X'706C75636B795F6D617263616E74656C', X'696D6167696E61746976655F73696D73', 9535651632.375484, 92270815.0720501, 1299048084.6248207, 6460855331.572151), (X'726F6D616E7469635F706F746C61746368', X'68756D6F726F75735F63686165686F', 9345375719.265533, 7825332230.247925, -7133157299.39028, -6939677879.6597), (X'656666696369656E745F6261676E696E69', X'63726561746976655F67726168616D', -2615470560.1954746, 6790849074.977201, -8081732985.448849, -8133707792.312794), (X'677265676172696F75735F73637564', X'7368696E696E675F67726F7570', -7996394978.2610035, -9734939565.228964, 1108439333.8481388, -5420483517.169478), (X'6C696B61626C655F6B616E6176616C6368796B', X'636F75726167656F75735F7761726669656C64', -1959869609.656724, 4176668769.239971, -8423220404.063669, 9987687878.685959), (X'657863656C6C656E745F68696C6473646F74746572', X'676C6974746572696E675F7472616D7564616E61', -5220160777.908238, 3892402687.8826714, 9803857762.617172, -1065043714.0265541), (X'6D61676E69666963656E745F717565657273', X'73757065725F717565657273', -700932053.2006226, -4706306995.253335, -5286045811.046467, 1954345265.5250092), (X'676976696E675F6275636B65726D616E6E', X'667269656E646C795F70697A7A6F6C61746F', -2186859620.9089565, -6098492099.446075, -7456845586.405931, 8796967674.444252);", +"DELETE FROM super_becky WHERE TRUE;", +"INSERT INTO super_becky VALUES (X'6F7074696D69737469635F6368616E69616C', X'656E657267657469635F6E65677261', 1683345860.4208698, 4163199322.9289455, -4192968616.7868404, -7253371206.571701), (X'616C6C7572696E675F686176656C', X'7477696E6B6C696E675F626965627579636B', -9947019174.287437, 5975899640.893995, 3844707723.8570194, -9699970750.513876), (X'6F7074696D69737469635F7A686F75', X'616D626974696F75735F636F6E6772657373', 4143738484.1081524, -2138255286.170598, 9960750454.03466, 5840575852.80299), (X'73706563746163756C61725F6A6F6E67', X'73656E7369626C655F616269646F72', -1767611042.9716015, -7684260477.580351, 4570634429.188147, -9222640121.140202), (X'706F6C6974655F6B657272', X'696E736967687466756C5F63686F646F726B6F6666', -635016769.5123329, -4359901288.494518, -7531565119.905825, -1180410948.6572971), (X'666C657869626C655F636F6D756E69656C6C6F', X'6E6963655F6172636F73', 8708423014.802425, -6276712625.559328, -771680766.2485523, 8639486874.113342);", +"DELETE FROM super_becky WHERE (mirthful_asbo < 9730384310.536528 AND plucky_chai < X'6E6963655F61726370B2');", +"DELETE FROM super_becky WHERE (mirthful_asbo > 6248699554.426553 AND bountiful_jon > 4124481472.333034);", +"INSERT INTO super_becky VALUES (X'676C696D6D6572696E675F77656C7368', X'64696C6967656E745F636F7262696E', 8217054003.369003, 8745594518.77864, 1928172803.2261295, -8375115534.050233), (X'616772656561626C655F6463', X'6C6F76696E675F666F72656D616E', -5483889804.871533, -8264576639.127487, 4770567289.404846, -3409172927.2573576), (X'6D617276656C6F75735F6173696D616B6F706F756C6F73', X'746563686E6F6C6F676963616C5F6A61637175696572', 2694858779.206814, -1703227425.3442516, -4504989231.263319, -3097265869.5230227), (X'73747570656E646F75735F64757075697364657269', X'68696C6172696F75735F6D75697268656164', 568174708.66469, -4878260547.265669, -9579691520.956625, 73507727.8100338), (X'626C69746865736F6D655F626C6F6B', X'61646570745F6C65696572', 7772117077.916897, 4590608571.321514, -881713470.657032, -9158405774.647465);", +"INSERT INTO super_becky VALUES (X'6772697070696E675F6573736578', X'67656E65726F75735F636875726368696C6C', -4180431825.598956, 7277443000.677654, 2499796052.7878246, -2858339306.235305), (X'756E697175655F6D6172656368616C', X'62726561746874616B696E675F636875726368696C6C', 1401354536.7625294, -611427440.2796707, -4621650430.463729, 1531473111.7482872), (X'657863656C6C656E745F66696E6C6579', X'666169746866756C5F62726F636B', -4020697828.0073624, -2833530733.19637, -7766170050.654022, 8661820959.434689);", +"INSERT INTO super_becky VALUES (X'756E697175655F6C617061797265', X'6C6F76696E675F7374617465', 7063237787.258968, -5425712581.365798, -7750509440.0141945, -7570954710.892544), (X'62726561746874616B696E675F6E65616C', X'636F75726167656F75735F61727269676F6E69', 289862394.2028198, 9690362375.014446, -4712463267.033899, 2474917855.0973473), (X'7477696E6B6C696E675F7368616B7572', X'636F75726167656F75735F636F6D6D6974746565', 5449035403.229155, -2159678989.597906, 3625606019.1150894, -3752010405.4475393);", +"INSERT INTO super_becky VALUES (X'70617373696F6E6174655F73686970776179', X'686F6E6573745F7363687765697A6572', 4193384746.165228, -2232151704.896323, 8615245520.962444, -9789090953.995636);", +"INSERT INTO super_becky VALUES (X'6C696B61626C655F69', X'6661766F7261626C655F6D626168', 6581403690.769894, 3260059398.9544716, -407118859.046051, -3155853965.2700634), (X'73696E636572655F6F72', X'616772656561626C655F617070656C6261756D', 9402938544.308651, -7595112171.758331, -7005316716.211025, -8368210960.419411);", +"INSERT INTO super_becky VALUES (X'6D617276656C6F75735F6B61736864616E', X'6E6963655F636F7272', -5976459640.85817, -3177550476.2092276, 2073318650.736992, -1363247319.9978447);", +"INSERT INTO super_becky VALUES (X'73706C656E6469645F6C616D656E646F6C61', X'677265676172696F75735F766F6E6E65677574', 6898259773.050102, 8973519699.707073, -25070632.280548096, -1845922497.9676847), (X'617765736F6D655F7365766572', X'656E657267657469635F706F746C61746368', -8750678407.717808, 5130907533.668898, -6778425327.111566, 3718982135.202587);", +"INSERT INTO super_becky VALUES (X'70726F706974696F75735F6D616C617465737461', X'657863656C6C656E745F65766572657474', -8846855772.62094, -6168969732.697067, -8796372709.125793, 9983557891.544613), (X'73696E636572655F6C6177', X'696E7175697369746976655F73616E647374726F6D', -6366985697.975358, 3838628702.6652164, 3680621713.3371124, -786796486.8049564), (X'706F6C6974655F676C6561736F6E', X'706C75636B795F677579616E61', -3987946379.104308, -2119148244.413993, -1448660343.6888638, -1264195510.1611118), (X'676C6974746572696E675F6C6975', X'70657273697374656E745F6F6C6976696572', 6741779968.943846, -3239809989.227495, -1026074003.5506897, 4654600514.871752);", +"DELETE FROM super_becky WHERE (engrossing_berger < X'6566651A3C70278D4E200657551D8071A1' AND competitive_petit > 1236742147.9451914);", +"INSERT INTO super_becky VALUES (X'6661766F7261626C655F726569746D616E', X'64657465726D696E65645F726974746572', -7412553243.829927, -7572665195.290464, 7879603411.222157, 3706943306.5691853), (X'70657273697374656E745F6E6F6C616E', X'676C6974746572696E675F73686570617264', 7028261282.277422, -2064164782.3494844, -5244048504.507779, -2399526243.005843), (X'6B6E6F776C6564676561626C655F70617474656E', X'70726F66696369656E745F726F7365627261756768', 3713056763.583538, 3919834206.566164, -6306779387.430006, -9939464323.995546), (X'616461707461626C655F7172757A', X'696E7175697369746976655F68617261776179', 6519349690.299835, -9977624623.820414, 7500579325.440605, -8118341251.362242);", +"INSERT INTO super_becky VALUES (X'636F6E73696465726174655F756E696F6E', X'6E6963655F6573736578', -1497385534.8720198, 9957688503.242973, 9191804202.566128, -179015615.7117195), (X'666169746866756C5F626F776C656773', X'6361707469766174696E675F6D6367697272', 893707300.1576138, 3381656294.246702, 6884723724.381908, 6248331214.701559), (X'6B6E6F776C6564676561626C655F70656E6E61', X'6B696E645F616A697468', -3335162603.6574974, 1812878172.8505402, 5115606679.658335, -5690100280.808182), (X'617765736F6D655F77696E7374616E6C6579', X'70726F706974696F75735F6361726173736F', -7395576292.503981, 4956546102.029215, -1468521769.7486448, -2968223925.60355), (X'636F75726167656F75735F77617266617265', X'74686F7567687466756C5F7361707068697265', 7052982930.566017, -9806098174.104418, -6910398936.377775, -4041963031.766964), (X'657863656C6C656E745F6B62', X'626C69746865736F6D655F666F75747A6F706F756C6F73', 6142173202.994768, 5193126957.544125, -7522202722.983735, -1659088056.594862), (X'7374756E6E696E675F6E6576616461', X'626F756E746966756C5F627572746F6E', -3822097036.7628613, -3458840259.240303, 2544472236.86788, 6928890176.466003);", +"INSERT INTO super_becky VALUES (X'706572736F6E61626C655F646D69747269', X'776F6E64726F75735F6133796F', 2651932559.0077076, 811299402.3174248, -8271909238.671928, 6761098864.189909);", +"INSERT INTO super_becky VALUES (X'726F7573696E675F6B6C6166657461', X'64617A7A6C696E675F6B6E617070', 9370628891.439335, -5923332007.253168, -2763161830.5880013, -9156194881.875952), (X'656666696369656E745F6C6576656C6C6572', X'616C6C7572696E675F706561636F7474', 3102641409.8314342, 2838360181.628153, 2466271662.169607, 1015942181.844162), (X'6469706C6F6D617469635F7065726B696E73', X'726F7573696E675F6172616269', -1551071129.022499, -8079487600.186886, 7832984580.070087, -6785993247.895652), (X'626F756E746966756C5F6D656D62657273', X'706F77657266756C5F70617269737369', 9226031830.72445, 7012021503.536997, -2297349030.108919, -2738320055.4710903), (X'676F7267656F75735F616E6172636F7469636F', X'68656C7066756C5F7765696C616E64', -8394163480.676959, -2978605095.699134, -6439355448.021704, 9137308022.281273), (X'616666656374696F6E6174655F70726F6C65696E666F', X'706C75636B795F73616E7A', 3546758708.3524914, -1870964264.9353771, 338752565.3643894, -3908023657.299715), (X'66756E6E795F706F70756C61697265', X'6F75747374616E64696E675F626576696E67746F6E', -1533858145.408224, 6164225076.710373, 8419445987.622173, 584555253.6852646), (X'76697669645F6D7474', X'7368696D6D6572696E675F70616F6E65737361', 5512251366.193035, -8680583180.123213, -4445968638.153208, -3274009935.4229546);", +"INSERT INTO super_becky VALUES (X'7068696C6F736F70686963616C5F686F7264', X'657863656C6C656E745F67757373656C7370726F757473', -816909447.0240917, -3614686681.8786583, 7701617524.26067, -4541962047.183721), (X'616D6961626C655F69676E6174696576', X'6D61676E69666963656E745F70726F76696E6369616C69', -1318532883.847702, -4918966075.976474, -7601723171.33518, -3515747704.3847466), (X'70726F66696369656E745F32303137', X'66756E6E795F6E77', -1264540201.518032, 8227396547.578808, 6245093925.183641, -8368355328.110817);", +"INSERT INTO super_becky VALUES (X'77696C6C696E675F6E6F6B6B65', X'726F6D616E7469635F677579616E61', 6618610796.3707695, -3814565359.1524105, 1663106272.4565296, -4175107840.768817), (X'72656C617865645F7061766C6F76', X'64657465726D696E65645F63686F646F726B6F6666', -3350029338.034504, -3520837855.4619064, 3375167499.631817, -8866806483.714607), (X'616D706C655F67696464696E6773', X'667269656E646C795F6A6F686E', 1458864959.9942684, 1344208968.0486107, 9335156635.91314, -6180643697.918882), (X'72656C617865645F6C65726F79', X'636F75726167656F75735F6E6F72646772656E', -5164986537.499656, 8820065797.720875, 6146530425.891005, 6949241471.958189), (X'666F63757365645F656D6D61', X'696D6167696E61746976655F6C6F6E67', -9587619060.80035, 6128068142.184402, 6765196076.956905, 800226302.7983418);", +"INSERT INTO super_becky VALUES (X'616D626974696F75735F736F6E67', X'706572666563745F6761686D616E', 4989979180.706432, -9374266591.537058, 314459621.2820797, -3200029490.9553604), (X'666561726C6573735F626C6174', X'676C697374656E696E675F616374696F6E', -8512203612.903147, -7625581186.013805, -9711122307.234787, -301590929.32751083), (X'617765736F6D655F6669646573', X'666169746866756C5F63756E6E696E6768616D', -1428228887.9205084, 7669883854.400173, 5604446195.905277, -1509311057.9653416), (X'68756D6F726F75735F77697468647261776E', X'62726561746874616B696E675F7472617562656C', -7292778713.676636, -6728132503.529593, 2805341768.7252483, 330416975.2300949);", +"INSERT INTO super_becky VALUES (X'677265676172696F75735F696873616E', X'7374656C6C61725F686172746D616E', 8819210651.1988, 5298459883.813452, 7293544377.958424, 460475869.72971725), (X'696E736967687466756C5F62657765726E69747A', X'676C65616D696E675F64656E736C6F77', -6911957282.193239, 1754196756.2193146, -6316860403.693853, -3094020672.236368), (X'6D6972746866756C5F616D6265727261656B656C6C79', X'68756D6F726F75735F6772617665', 1785574023.0269203, -372056983.82761574, 4133719439.9538956, 9374053482.066044), (X'76697669645F736169747461', X'7761726D686561727465645F696E656469746173', 2787071361.6099434, 9663839418.553448, -5934098589.901047, -9774745509.608858), (X'61646570745F6F6375727279', X'6C696B61626C655F726569746D616E', -3098540915.1310825, 5460848322.672174, -6012867197.519758, 6769770087.661135), (X'696E646570656E64656E745F6F', X'656C6567616E745F726F6F726461', 1462542860.3143978, 3360904654.2464733, 5458876201.665213, -5522844849.529962), (X'72656D61726B61626C655F626F6B616E69', X'6F70656E5F6D696E6465645F686F72726F78', 7589481760.867031, 7970075121.546291, 7513467575.5213585, 9663061478.289227), (X'636F6E666964656E745F6C616479', X'70617373696F6E6174655F736B726F7A6974736B79', 8266917234.53915, -7172933478.625412, 309854059.94031143, -8309837814.497616);", +"DELETE FROM super_becky WHERE (competitive_petit != 8725256604.165474 OR engrossing_rexroth > -3607424615.7839313 OR plucky_chai < X'726F7573696E675F6216E20375');", +"INSERT INTO super_becky VALUES (X'7368696E696E675F736F6C69646169726573', X'666561726C6573735F63617264616E', -170727879.20838165, 2744601113.384678, 5676912434.941502, 6757573601.657997), (X'636F75726167656F75735F706C616E636865', X'696E646570656E64656E745F636172736F6E', -6271723086.761938, -180566679.7470188, -1285774632.134449, 1359665735.7842407), (X'677265676172696F75735F7374616D61746F76', X'7374756E6E696E675F77696C64726F6F7473', -6210238866.953484, 2492683045.8287067, -9688894361.68205, 5420275482.048567), (X'696E646570656E64656E745F6F7267616E697A6572', X'676C6974746572696E675F736F72656C', 9291163783.3073, -6843003475.769236, -1320245894.772686, -5023483808.044955), (X'676C6F77696E675F6E65736963', X'676C65616D696E675F746F726D6579', 829526382.8027191, 9365690945.1316, 4761505764.826195, -4149154965.0024815), (X'616C6C7572696E675F646F637472696E65', X'6E6963655F636C6561766572', 3896644979.981762, -288600448.8016701, 9462856570.130062, -909633752.5993862);", + ]; + + for query in queries { + let mut stmt = conn.query(query).unwrap().unwrap(); + loop { + let row = stmt.step().expect("step"); + match row { + StepResult::Done => { + break; + } + _ => { + tracing::debug!("row {:?}", row); + } + } + } + } + } + #[test] pub fn test_free_space() { let db = get_database(); From d286a56e1580ff51c612c1577ced075b88c69fa5 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Mon, 14 Apr 2025 09:40:20 +0300 Subject: [PATCH 225/425] refactor: fold Async/Await insns into a single instruction --- COMPAT.md | 25 +--- core/storage/btree.rs | 5 - core/translate/emitter.rs | 6 +- core/translate/index.rs | 24 +--- core/translate/insert.rs | 17 +-- core/translate/main_loop.rs | 126 ++++++----------- core/translate/schema.rs | 29 +--- core/vdbe/builder.rs | 22 ++- core/vdbe/execute.rs | 260 +++++++----------------------------- core/vdbe/explain.rs | 142 ++++---------------- core/vdbe/insn.rs | 89 +++--------- core/vdbe/mod.rs | 8 +- 12 files changed, 177 insertions(+), 576 deletions(-) diff --git a/COMPAT.md b/COMPAT.md index 3d07558c8..2300a6f07 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -460,9 +460,7 @@ Modifiers: | HaltIfNull | No | | | IdxDelete | No | | | IdxGE | Yes | | -| IdxInsert | No | | -| IdxInsertAsync | Yes | | -| IdxInsertAwait | Yes | | +| IdxInsert | Yes | | | IdxLE | Yes | | | IdxLT | Yes | | | IdxRowid | No | | @@ -474,9 +472,7 @@ Modifiers: | IncrVacuum | No | | | Init | Yes | | | InitCoroutine | Yes | | -| Insert | No | | -| InsertAsync | Yes | | -| InsertAwait | Yes | | +| Insert | Yes | | | InsertInt | No | | | Int64 | No | | | Integer | Yes | | @@ -497,9 +493,7 @@ Modifiers: | MustBeInt | Yes | | | Ne | Yes | | | NewRowid | Yes | | -| Next | No | | -| NextAsync | Yes | | -| NextAwait | Yes | | +| Next | Yes | | | Noop | Yes | | | Not | Yes | | | NotExists | Yes | | @@ -512,18 +506,13 @@ Modifiers: | OpenEphemeral | No | | | OpenPseudo | Yes | | | OpenRead | Yes | | -| OpenReadAsync | Yes | | -| OpenWrite | No | | -| OpenWriteAsync | Yes | | -| OpenWriteAwait | Yes | | +| OpenWrite | Yes | | | Or | Yes | | | Pagecount | Partial| no temp databases | | Param | No | | | ParseSchema | No | | | Permutation | No | | -| Prev | No | | -| PrevAsync | Yes | | -| PrevAwait | Yes | | +| Prev | Yes | | | Program | No | | | ReadCookie | Partial| no temp databases, only user_version supported | | Real | Yes | | @@ -533,8 +522,6 @@ Modifiers: | ResultRow | Yes | | | Return | Yes | | | Rewind | Yes | | -| RewindAsync | Yes | | -| RewindAwait | Yes | | | RowData | No | | | RowId | Yes | | | RowKey | No | | @@ -580,7 +567,7 @@ Modifiers: | VDestroy | No | | | VFilter | Yes | | | VNext | Yes | | -| VOpen | Yes |VOpenAsync| +| VOpen | Yes | | | VRename | No | | | VUpdate | Yes | | | Vacuum | No | | diff --git a/core/storage/btree.rs b/core/storage/btree.rs index ba121d86d..3af102854 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -2972,11 +2972,6 @@ impl BTreeCursor { } } - pub fn wait_for_completion(&mut self) -> Result<()> { - // TODO: Wait for pager I/O to complete - Ok(()) - } - pub fn rowid(&self) -> Result> { if let Some(mv_cursor) = &self.mv_cursor { let mv_cursor = mv_cursor.borrow(); diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 21e311bba..e2914bbd0 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -431,8 +431,7 @@ fn emit_delete_insns( conflict_action, }); } else { - program.emit_insn(Insn::DeleteAsync { cursor_id }); - program.emit_insn(Insn::DeleteAwait { cursor_id }); + program.emit_insn(Insn::Delete { cursor_id }); } if let Some(limit) = limit { let limit_reg = program.alloc_register(); @@ -683,13 +682,12 @@ fn emit_update_insns( count: table_ref.columns().len(), dest_reg: record_reg, }); - program.emit_insn(Insn::InsertAsync { + program.emit_insn(Insn::Insert { cursor: cursor_id, key_reg: beg, record_reg, flag: 0, }); - program.emit_insn(Insn::InsertAwait { cursor_id }); } else if let Some(vtab) = table_ref.virtual_table() { let arg_count = table_ref.columns().len() + 2; program.emit_insn(Insn::VUpdate { diff --git a/core/translate/index.rs b/core/translate/index.rs index 20647d15c..366b986e7 100644 --- a/core/translate/index.rs +++ b/core/translate/index.rs @@ -96,11 +96,10 @@ pub fn translate_create_index( }); // open the sqlite schema table for writing and create a new entry for the index - program.emit_insn(Insn::OpenWriteAsync { + program.emit_insn(Insn::OpenWrite { cursor_id: sqlite_schema_cursor_id, root_page: RegisterOrLiteral::Literal(sqlite_table.root_page), }); - program.emit_insn(Insn::OpenWriteAwait {}); let sql = create_idx_stmt_to_sql(&tbl_name, &idx_name, unique_if_not_exists, &columns); emit_schema_entry( &mut program, @@ -137,18 +136,14 @@ pub fn translate_create_index( }); // open the table we are creating the index on for reading - program.emit_insn(Insn::OpenReadAsync { + program.emit_insn(Insn::OpenRead { cursor_id: table_cursor_id, root_page: tbl.root_page, }); - program.emit_insn(Insn::OpenReadAwait {}); - program.emit_insn(Insn::RewindAsync { - cursor_id: table_cursor_id, - }); let loop_start_label = program.allocate_label(); let loop_end_label = program.allocate_label(); - program.emit_insn(Insn::RewindAwait { + program.emit_insn(Insn::Rewind { cursor_id: table_cursor_id, pc_if_empty: loop_end_label, }); @@ -184,10 +179,7 @@ pub fn translate_create_index( record_reg, }); - program.emit_insn(Insn::NextAsync { - cursor_id: table_cursor_id, - }); - program.emit_insn(Insn::NextAwait { + program.emit_insn(Insn::Next { cursor_id: table_cursor_id, pc_if_next: loop_start_label, }); @@ -195,11 +187,10 @@ pub fn translate_create_index( // Open the index btree we created for writing to insert the // newly sorted index records. - program.emit_insn(Insn::OpenWriteAsync { + program.emit_insn(Insn::OpenWrite { cursor_id: btree_cursor_id, root_page: RegisterOrLiteral::Register(root_page_reg), }); - program.emit_insn(Insn::OpenWriteAwait {}); let sorted_loop_start = program.allocate_label(); let sorted_loop_end = program.allocate_label(); @@ -222,16 +213,13 @@ pub fn translate_create_index( cursor_id: btree_cursor_id, }); // insert new index record - program.emit_insn(Insn::IdxInsertAsync { + program.emit_insn(Insn::IdxInsert { cursor_id: btree_cursor_id, record_reg: sorted_record_reg, unpacked_start: None, // TODO: optimize with these to avoid decoding record twice unpacked_count: None, flags: IdxInsertFlags::new().use_seek(false), }); - program.emit_insn(Insn::IdxInsertAwait { - cursor_id: btree_cursor_id, - }); program.emit_insn(Insn::SorterNext { cursor_id: sorter_cursor_id, pc_if_next: sorted_loop_start, diff --git a/core/translate/insert.rs b/core/translate/insert.rs index af6065733..4ca7e6fca 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -168,11 +168,10 @@ pub fn translate_insert( program.emit_insn(Insn::EndCoroutine { yield_reg }); program.resolve_label(jump_on_definition_label, program.offset()); - program.emit_insn(Insn::OpenWriteAsync { + program.emit_insn(Insn::OpenWrite { cursor_id, root_page: RegisterOrLiteral::Literal(root_page), }); - program.emit_insn(Insn::OpenWriteAwait {}); // Main loop // FIXME: rollback is not implemented. E.g. if you insert 2 rows and one fails to unique constraint violation, @@ -184,11 +183,10 @@ pub fn translate_insert( }); } else { // Single row - populate registers directly - program.emit_insn(Insn::OpenWriteAsync { + program.emit_insn(Insn::OpenWrite { cursor_id, root_page: RegisterOrLiteral::Literal(root_page), }); - program.emit_insn(Insn::OpenWriteAwait {}); populate_column_registers( &mut program, @@ -202,11 +200,10 @@ pub fn translate_insert( } // Open all the index btrees for writing for idx_cursor in idx_cursors.iter() { - program.emit_insn(Insn::OpenWriteAsync { + program.emit_insn(Insn::OpenWrite { cursor_id: idx_cursor.2, root_page: idx_cursor.1.into(), }); - program.emit_insn(Insn::OpenWriteAwait {}); } // Common record insertion logic for both single and multiple rows let check_rowid_is_integer_label = rowid_alias_reg.and(Some(program.allocate_label())); @@ -293,13 +290,12 @@ pub fn translate_insert( dest_reg: record_register, }); - program.emit_insn(Insn::InsertAsync { + program.emit_insn(Insn::Insert { cursor: cursor_id, key_reg: rowid_reg, record_reg: record_register, flag: 0, }); - program.emit_insn(Insn::InsertAwait { cursor_id }); for index_col_mapping in index_col_mappings.iter() { // find which cursor we opened earlier for this index let idx_cursor_id = idx_cursors @@ -337,7 +333,7 @@ pub fn translate_insert( }); // now do the actual index insertion using the unpacked registers - program.emit_insn(Insn::IdxInsertAsync { + program.emit_insn(Insn::IdxInsert { cursor_id: idx_cursor_id, record_reg, unpacked_start: Some(idx_start_reg), // TODO: enable optimization @@ -345,9 +341,6 @@ pub fn translate_insert( // TODO: figure out how to determine whether or not we need to seek prior to insert. flags: IdxInsertFlags::new(), }); - program.emit_insn(Insn::IdxInsertAwait { - cursor_id: idx_cursor_id, - }); } if inserting_multiple_rows { // For multiple rows, loop back diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 6521b9c24..1b709e0d3 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -39,7 +39,7 @@ pub struct LeftJoinMetadata { pub struct LoopLabels { /// jump to the start of the loop body pub loop_start: BranchOffset, - /// jump to the NextAsync instruction (or equivalent) + /// jump to the Next instruction (or equivalent) pub next: BranchOffset, /// jump to the end of the loop, exiting it pub loop_end: BranchOffset, @@ -96,44 +96,39 @@ pub fn init_loop( match (mode, &table.table) { (OperationMode::SELECT, Table::BTree(btree)) => { let root_page = btree.root_page; - program.emit_insn(Insn::OpenReadAsync { + program.emit_insn(Insn::OpenRead { cursor_id, root_page, }); - program.emit_insn(Insn::OpenReadAwait {}); if let Some(index_cursor_id) = index_cursor_id { - program.emit_insn(Insn::OpenReadAsync { + program.emit_insn(Insn::OpenRead { cursor_id: index_cursor_id, root_page: index.as_ref().unwrap().root_page, }); - program.emit_insn(Insn::OpenReadAwait {}); } } (OperationMode::DELETE, Table::BTree(btree)) => { let root_page = btree.root_page; - program.emit_insn(Insn::OpenWriteAsync { + program.emit_insn(Insn::OpenWrite { cursor_id, root_page: root_page.into(), }); - program.emit_insn(Insn::OpenWriteAwait {}); } (OperationMode::UPDATE, Table::BTree(btree)) => { let root_page = btree.root_page; - program.emit_insn(Insn::OpenWriteAsync { + program.emit_insn(Insn::OpenWrite { cursor_id, root_page: root_page.into(), }); if let Some(index_cursor_id) = index_cursor_id { - program.emit_insn(Insn::OpenWriteAsync { + program.emit_insn(Insn::OpenWrite { cursor_id: index_cursor_id, root_page: index.as_ref().unwrap().root_page.into(), }); } - program.emit_insn(Insn::OpenWriteAwait {}); } (_, Table::Virtual(_)) => { - program.emit_insn(Insn::VOpenAsync { cursor_id }); - program.emit_insn(Insn::VOpenAwait {}); + program.emit_insn(Insn::VOpen { cursor_id }); } _ => { unimplemented!() @@ -148,18 +143,16 @@ pub fn init_loop( match mode { OperationMode::SELECT => { - program.emit_insn(Insn::OpenReadAsync { + program.emit_insn(Insn::OpenRead { cursor_id: table_cursor_id, root_page: table.table.get_root_page(), }); - program.emit_insn(Insn::OpenReadAwait {}); } OperationMode::DELETE | OperationMode::UPDATE => { - program.emit_insn(Insn::OpenWriteAsync { + program.emit_insn(Insn::OpenWrite { cursor_id: table_cursor_id, root_page: table.table.get_root_page().into(), }); - program.emit_insn(Insn::OpenWriteAwait {}); } _ => { unimplemented!() @@ -177,18 +170,16 @@ pub fn init_loop( match mode { OperationMode::SELECT => { - program.emit_insn(Insn::OpenReadAsync { + program.emit_insn(Insn::OpenRead { cursor_id: index_cursor_id, root_page: index.root_page, }); - program.emit_insn(Insn::OpenReadAwait); } OperationMode::UPDATE | OperationMode::DELETE => { - program.emit_insn(Insn::OpenWriteAsync { + program.emit_insn(Insn::OpenWrite { cursor_id: index_cursor_id, root_page: index.root_page.into(), }); - program.emit_insn(Insn::OpenWriteAwait {}); } _ => { unimplemented!() @@ -204,7 +195,7 @@ pub fn init_loop( } /// Set up the main query execution loop -/// For example in the case of a nested table scan, this means emitting the RewindAsync instruction +/// For example in the case of a nested table scan, this means emitting the Rewind instruction /// for all tables involved, outermost first. pub fn open_loop( program: &mut ProgramBuilder, @@ -289,51 +280,36 @@ pub fn open_loop( let iteration_cursor_id = index_cursor_id.unwrap_or(cursor_id); if !matches!(&table.table, Table::Virtual(_)) { if *iter_dir == IterationDirection::Backwards { - program.emit_insn(Insn::LastAsync { + program.emit_insn(Insn::Last { cursor_id: iteration_cursor_id, + pc_if_empty: loop_end, }); } else { - program.emit_insn(Insn::RewindAsync { + program.emit_insn(Insn::Rewind { cursor_id: iteration_cursor_id, + pc_if_empty: loop_end, }); } } - match &table.table { - Table::BTree(_) => { - program.emit_insn(if *iter_dir == IterationDirection::Backwards { - Insn::LastAwait { - cursor_id: iteration_cursor_id, - pc_if_empty: loop_end, - } - } else { - Insn::RewindAwait { - cursor_id: iteration_cursor_id, - pc_if_empty: loop_end, - } - }) + if let Table::Virtual(ref table) = table.table { + let start_reg = + program.alloc_registers(table.args.as_ref().map(|a| a.len()).unwrap_or(0)); + let mut cur_reg = start_reg; + let args = match table.args.as_ref() { + Some(args) => args, + None => &vec![], + }; + for arg in args { + let reg = cur_reg; + cur_reg += 1; + let _ = translate_expr(program, Some(tables), arg, reg, &t_ctx.resolver)?; } - Table::Virtual(ref table) => { - let start_reg = program - .alloc_registers(table.args.as_ref().map(|a| a.len()).unwrap_or(0)); - let mut cur_reg = start_reg; - let args = match table.args.as_ref() { - Some(args) => args, - None => &vec![], - }; - for arg in args { - let reg = cur_reg; - cur_reg += 1; - let _ = - translate_expr(program, Some(tables), arg, reg, &t_ctx.resolver)?; - } - program.emit_insn(Insn::VFilter { - cursor_id, - pc_if_empty: loop_end, - arg_count: table.args.as_ref().map_or(0, |args| args.len()), - args_reg: start_reg, - }); - } - other => panic!("Unsupported table reference type: {:?}", other), + program.emit_insn(Insn::VFilter { + cursor_id, + pc_if_empty: loop_end, + arg_count: table.args.as_ref().map_or(0, |args| args.len()), + args_reg: start_reg, + }); } program.resolve_label(loop_start, program.offset()); @@ -702,7 +678,7 @@ fn emit_loop_source( } /// Closes the loop for a given source operator. -/// For example in the case of a nested table scan, this means emitting the NextAsync instruction +/// For example in the case of a nested table scan, this means emitting the Next instruction /// for all tables involved, innermost first. pub fn close_loop( program: &mut ProgramBuilder, @@ -727,7 +703,7 @@ pub fn close_loop( match &table.op { Operation::Subquery { .. } => { program.resolve_label(loop_labels.next, program.offset()); - // A subquery has no cursor to call NextAsync on, so it just emits a Goto + // A subquery has no cursor to call Next on, so it just emits a Goto // to the Yield instruction, which in turn jumps back to the main loop of the subquery, // so that the next row from the subquery can be read. program.emit_insn(Insn::Goto { @@ -745,21 +721,12 @@ pub fn close_loop( match &table.table { Table::BTree(_) => { if *iter_dir == IterationDirection::Backwards { - program.emit_insn(Insn::PrevAsync { + program.emit_insn(Insn::Prev { cursor_id: iteration_cursor_id, + pc_if_prev: loop_labels.loop_start, }); } else { - program.emit_insn(Insn::NextAsync { - cursor_id: iteration_cursor_id, - }); - } - if *iter_dir == IterationDirection::Backwards { - program.emit_insn(Insn::PrevAwait { - cursor_id: iteration_cursor_id, - pc_if_next: loop_labels.loop_start, - }); - } else { - program.emit_insn(Insn::NextAwait { + program.emit_insn(Insn::Next { cursor_id: iteration_cursor_id, pc_if_next: loop_labels.loop_start, }); @@ -776,7 +743,7 @@ pub fn close_loop( } Operation::Search(search) => { program.resolve_label(loop_labels.next, program.offset()); - // Rowid equality point lookups are handled with a SeekRowid instruction which does not loop, so there is no need to emit a NextAsync instruction. + // Rowid equality point lookups are handled with a SeekRowid instruction which does not loop, so there is no need to emit a Next instruction. if !matches!(search, Search::RowidEq { .. }) { let (cursor_id, iter_dir) = match search { Search::Seek { @@ -796,14 +763,12 @@ pub fn close_loop( }; if iter_dir == IterationDirection::Backwards { - program.emit_insn(Insn::PrevAsync { cursor_id }); - program.emit_insn(Insn::PrevAwait { + program.emit_insn(Insn::Prev { cursor_id, - pc_if_next: loop_labels.loop_start, + pc_if_prev: loop_labels.loop_start, }); } else { - program.emit_insn(Insn::NextAsync { cursor_id }); - program.emit_insn(Insn::NextAwait { + program.emit_insn(Insn::Next { cursor_id, pc_if_next: loop_labels.loop_start, }); @@ -881,10 +846,7 @@ fn emit_seek( ) -> Result<()> { let Some(seek) = seek_def.seek.as_ref() else { assert!(seek_def.iter_dir == IterationDirection::Backwards, "A SeekDef without a seek operation should only be used in backwards iteration direction"); - program.emit_insn(Insn::LastAsync { - cursor_id: seek_cursor_id, - }); - program.emit_insn(Insn::LastAwait { + program.emit_insn(Insn::Last { cursor_id: seek_cursor_id, pc_if_empty: loop_end, }); diff --git a/core/translate/schema.rs b/core/translate/schema.rs index 449d1e0e8..cc5df1f3d 100644 --- a/core/translate/schema.rs +++ b/core/translate/schema.rs @@ -102,11 +102,10 @@ pub fn translate_create_table( Some(SQLITE_TABLEID.to_owned()), CursorType::BTreeTable(table.clone()), ); - program.emit_insn(Insn::OpenWriteAsync { + program.emit_insn(Insn::OpenWrite { cursor_id: sqlite_schema_cursor_id, root_page: 1usize.into(), }); - program.emit_insn(Insn::OpenWriteAwait {}); // Add the table entry to sqlite_schema emit_schema_entry( @@ -220,15 +219,12 @@ pub fn emit_schema_entry( dest_reg: record_reg, }); - program.emit_insn(Insn::InsertAsync { + program.emit_insn(Insn::Insert { cursor: sqlite_schema_cursor_id, key_reg: rowid_reg, record_reg, flag: 0, }); - program.emit_insn(Insn::InsertAwait { - cursor_id: sqlite_schema_cursor_id, - }); } struct PrimaryKeyColumnInfo<'a> { @@ -499,11 +495,10 @@ pub fn translate_create_virtual_table( Some(SQLITE_TABLEID.to_owned()), CursorType::BTreeTable(table.clone()), ); - program.emit_insn(Insn::OpenWriteAsync { + program.emit_insn(Insn::OpenWrite { cursor_id: sqlite_schema_cursor_id, root_page: 1usize.into(), }); - program.emit_insn(Insn::OpenWriteAwait {}); let sql = create_vtable_body_to_str(&vtab); emit_schema_entry( @@ -578,19 +573,15 @@ pub fn translate_drop_table( Some(table_name.to_string()), CursorType::BTreeTable(schema_table.clone()), ); - program.emit_insn(Insn::OpenWriteAsync { + program.emit_insn(Insn::OpenWrite { cursor_id: sqlite_schema_cursor_id, root_page: 1usize.into(), }); - program.emit_insn(Insn::OpenWriteAwait {}); // 1. Remove all entries from the schema table related to the table we are dropping, except for triggers // loop to beginning of schema table - program.emit_insn(Insn::RewindAsync { - cursor_id: sqlite_schema_cursor_id, - }); let end_metadata_label = program.allocate_label(); - program.emit_insn(Insn::RewindAwait { + program.emit_insn(Insn::Rewind { cursor_id: sqlite_schema_cursor_id, pc_if_empty: end_metadata_label, }); @@ -625,18 +616,12 @@ pub fn translate_drop_table( cursor_id: sqlite_schema_cursor_id, dest: row_id_reg, }); - program.emit_insn(Insn::DeleteAsync { - cursor_id: sqlite_schema_cursor_id, - }); - program.emit_insn(Insn::DeleteAwait { + program.emit_insn(Insn::Delete { cursor_id: sqlite_schema_cursor_id, }); program.resolve_label(next_label, program.offset()); - program.emit_insn(Insn::NextAsync { - cursor_id: sqlite_schema_cursor_id, - }); - program.emit_insn(Insn::NextAwait { + program.emit_insn(Insn::Next { cursor_id: sqlite_schema_cursor_id, pc_if_next: metadata_loop, }); diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index 78216204e..03c634d07 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -327,17 +327,11 @@ impl ProgramBuilder { } => { resolve(target_pc, "IfNot"); } - Insn::RewindAwait { - cursor_id: _cursor_id, - pc_if_empty, - } => { - resolve(pc_if_empty, "RewindAwait"); + Insn::Rewind { pc_if_empty, .. } => { + resolve(pc_if_empty, "Rewind"); } - Insn::LastAwait { - cursor_id: _cursor_id, - pc_if_empty, - } => { - resolve(pc_if_empty, "LastAwait"); + Insn::Last { pc_if_empty, .. } => { + resolve(pc_if_empty, "Last"); } Insn::Goto { target_pc } => { resolve(target_pc, "Goto"); @@ -366,11 +360,11 @@ impl ProgramBuilder { Insn::IfPos { target_pc, .. } => { resolve(target_pc, "IfPos"); } - Insn::NextAwait { pc_if_next, .. } => { - resolve(pc_if_next, "NextAwait"); + Insn::Next { pc_if_next, .. } => { + resolve(pc_if_next, "Next"); } - Insn::PrevAwait { pc_if_next, .. } => { - resolve(pc_if_next, "PrevAwait"); + Insn::Prev { pc_if_prev, .. } => { + resolve(pc_if_prev, "Prev"); } Insn::InitCoroutine { yield_reg: _, diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 0df9afcc4..be7caa078 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -813,14 +813,14 @@ pub fn op_if_not( Ok(InsnFunctionStepResult::Step) } -pub fn op_open_read_async( +pub fn op_open_read( program: &Program, state: &mut ProgramState, insn: &Insn, pager: &Rc, mv_store: Option<&Rc>, ) -> Result { - let Insn::OpenReadAsync { + let Insn::OpenRead { cursor_id, root_page, } = insn @@ -855,43 +855,32 @@ pub fn op_open_read_async( .replace(Cursor::new_btree(cursor)); } CursorType::Pseudo(_) => { - panic!("OpenReadAsync on pseudo cursor"); + panic!("OpenRead on pseudo cursor"); } CursorType::Sorter => { - panic!("OpenReadAsync on sorter cursor"); + panic!("OpenRead on sorter cursor"); } CursorType::VirtualTable(_) => { - panic!("OpenReadAsync on virtual table cursor, use Insn:VOpenAsync instead"); + panic!("OpenRead on virtual table cursor, use Insn:VOpen instead"); } } state.pc += 1; Ok(InsnFunctionStepResult::Step) } -pub fn op_open_read_await( +pub fn op_vopen( program: &Program, state: &mut ProgramState, insn: &Insn, pager: &Rc, mv_store: Option<&Rc>, ) -> Result { - state.pc += 1; - Ok(InsnFunctionStepResult::Step) -} - -pub fn op_vopen_async( - program: &Program, - state: &mut ProgramState, - insn: &Insn, - pager: &Rc, - mv_store: Option<&Rc>, -) -> Result { - let Insn::VOpenAsync { cursor_id } = insn else { + let Insn::VOpen { cursor_id } = insn else { unreachable!("unexpected Insn {:?}", insn) }; let (_, cursor_type) = program.cursor_ref.get(*cursor_id).unwrap(); let CursorType::VirtualTable(virtual_table) = cursor_type else { - panic!("VOpenAsync on non-virtual table cursor"); + panic!("VOpen on non-virtual table cursor"); }; let cursor = virtual_table.open()?; state @@ -962,17 +951,6 @@ pub fn op_vcreate( Ok(InsnFunctionStepResult::Step) } -pub fn op_vopen_await( - program: &Program, - state: &mut ProgramState, - insn: &Insn, - pager: &Rc, - mv_store: Option<&Rc>, -) -> Result { - state.pc += 1; - Ok(InsnFunctionStepResult::Step) -} - pub fn op_vfilter( program: &Program, state: &mut ProgramState, @@ -1119,7 +1097,7 @@ pub fn op_vnext( }; let (_, cursor_type) = program.cursor_ref.get(*cursor_id).unwrap(); let CursorType::VirtualTable(virtual_table) = cursor_type else { - panic!("VNextAsync on non-virtual table cursor"); + panic!("VNext on non-virtual table cursor"); }; let has_more = { let mut cursor = state.get_cursor(*cursor_id); @@ -1161,53 +1139,14 @@ pub fn op_open_pseudo( Ok(InsnFunctionStepResult::Step) } -pub fn op_rewind_async( +pub fn op_rewind( program: &Program, state: &mut ProgramState, insn: &Insn, pager: &Rc, mv_store: Option<&Rc>, ) -> Result { - let Insn::RewindAsync { cursor_id } = insn else { - unreachable!("unexpected Insn {:?}", insn) - }; - { - let mut cursor = - must_be_btree_cursor!(*cursor_id, program.cursor_ref, state, "RewindAsync"); - let cursor = cursor.as_btree_mut(); - return_if_io!(cursor.rewind()); - } - state.pc += 1; - Ok(InsnFunctionStepResult::Step) -} - -pub fn op_last_async( - program: &Program, - state: &mut ProgramState, - insn: &Insn, - pager: &Rc, - mv_store: Option<&Rc>, -) -> Result { - let Insn::LastAsync { cursor_id } = insn else { - unreachable!("unexpected Insn {:?}", insn) - }; - { - let mut cursor = must_be_btree_cursor!(*cursor_id, program.cursor_ref, state, "LastAsync"); - let cursor = cursor.as_btree_mut(); - return_if_io!(cursor.last()); - } - state.pc += 1; - Ok(InsnFunctionStepResult::Step) -} - -pub fn op_last_await( - program: &Program, - state: &mut ProgramState, - insn: &Insn, - pager: &Rc, - mv_store: Option<&Rc>, -) -> Result { - let Insn::LastAwait { + let Insn::Rewind { cursor_id, pc_if_empty, } = insn @@ -1216,9 +1155,9 @@ pub fn op_last_await( }; assert!(pc_if_empty.is_offset()); let is_empty = { - let mut cursor = must_be_btree_cursor!(*cursor_id, program.cursor_ref, state, "LastAwait"); + let mut cursor = must_be_btree_cursor!(*cursor_id, program.cursor_ref, state, "Rewind"); let cursor = cursor.as_btree_mut(); - cursor.wait_for_completion()?; + return_if_io!(cursor.rewind()); cursor.is_empty() }; if is_empty { @@ -1229,14 +1168,14 @@ pub fn op_last_await( Ok(InsnFunctionStepResult::Step) } -pub fn op_rewind_await( +pub fn op_last( program: &Program, state: &mut ProgramState, insn: &Insn, pager: &Rc, mv_store: Option<&Rc>, ) -> Result { - let Insn::RewindAwait { + let Insn::Last { cursor_id, pc_if_empty, } = insn @@ -1245,10 +1184,9 @@ pub fn op_rewind_await( }; assert!(pc_if_empty.is_offset()); let is_empty = { - let mut cursor = - must_be_btree_cursor!(*cursor_id, program.cursor_ref, state, "RewindAwait"); + let mut cursor = must_be_btree_cursor!(*cursor_id, program.cursor_ref, state, "Last"); let cursor = cursor.as_btree_mut(); - cursor.wait_for_completion()?; + return_if_io!(cursor.last()); cursor.is_empty() }; if is_empty { @@ -1471,54 +1409,14 @@ pub fn op_result_row( return Ok(InsnFunctionStepResult::Row); } -pub fn op_next_async( +pub fn op_next( program: &Program, state: &mut ProgramState, insn: &Insn, pager: &Rc, mv_store: Option<&Rc>, ) -> Result { - let Insn::NextAsync { cursor_id } = insn else { - unreachable!("unexpected Insn {:?}", insn) - }; - { - let mut cursor = must_be_btree_cursor!(*cursor_id, program.cursor_ref, state, "NextAsync"); - let cursor = cursor.as_btree_mut(); - cursor.set_null_flag(false); - return_if_io!(cursor.next()); - } - state.pc += 1; - Ok(InsnFunctionStepResult::Step) -} - -pub fn op_prev_async( - program: &Program, - state: &mut ProgramState, - insn: &Insn, - pager: &Rc, - mv_store: Option<&Rc>, -) -> Result { - let Insn::PrevAsync { cursor_id } = insn else { - unreachable!("unexpected Insn {:?}", insn) - }; - { - let mut cursor = must_be_btree_cursor!(*cursor_id, program.cursor_ref, state, "PrevAsync"); - let cursor = cursor.as_btree_mut(); - cursor.set_null_flag(false); - return_if_io!(cursor.prev()); - } - state.pc += 1; - Ok(InsnFunctionStepResult::Step) -} - -pub fn op_prev_await( - program: &Program, - state: &mut ProgramState, - insn: &Insn, - pager: &Rc, - mv_store: Option<&Rc>, -) -> Result { - let Insn::PrevAwait { + let Insn::Next { cursor_id, pc_if_next, } = insn @@ -1527,9 +1425,11 @@ pub fn op_prev_await( }; assert!(pc_if_next.is_offset()); let is_empty = { - let mut cursor = must_be_btree_cursor!(*cursor_id, program.cursor_ref, state, "PrevAwait"); + let mut cursor = must_be_btree_cursor!(*cursor_id, program.cursor_ref, state, "Next"); let cursor = cursor.as_btree_mut(); - cursor.wait_for_completion()?; + cursor.set_null_flag(false); + return_if_io!(cursor.next()); + cursor.is_empty() }; if !is_empty { @@ -1540,29 +1440,31 @@ pub fn op_prev_await( Ok(InsnFunctionStepResult::Step) } -pub fn op_next_await( +pub fn op_prev( program: &Program, state: &mut ProgramState, insn: &Insn, pager: &Rc, mv_store: Option<&Rc>, ) -> Result { - let Insn::NextAwait { + let Insn::Prev { cursor_id, - pc_if_next, + pc_if_prev, } = insn else { unreachable!("unexpected Insn {:?}", insn) }; - assert!(pc_if_next.is_offset()); + assert!(pc_if_prev.is_offset()); let is_empty = { - let mut cursor = must_be_btree_cursor!(*cursor_id, program.cursor_ref, state, "NextAwait"); + let mut cursor = must_be_btree_cursor!(*cursor_id, program.cursor_ref, state, "Prev"); let cursor = cursor.as_btree_mut(); - cursor.wait_for_completion()?; + cursor.set_null_flag(false); + return_if_io!(cursor.prev()); + cursor.is_empty() }; if !is_empty { - state.pc = pc_if_next.to_offset_int(); + state.pc = pc_if_prev.to_offset_int(); } else { state.pc += 1; } @@ -3737,14 +3639,14 @@ pub fn op_yield( Ok(InsnFunctionStepResult::Step) } -pub fn op_insert_async( +pub fn op_insert( program: &Program, state: &mut ProgramState, insn: &Insn, pager: &Rc, mv_store: Option<&Rc>, ) -> Result { - let Insn::InsertAsync { + let Insn::Insert { cursor, key_reg, record_reg, @@ -3768,25 +3670,6 @@ pub fn op_insert_async( // if we were to set to false after starting a balance procedure, it might // leave undefined state. return_if_io!(cursor.insert(&BTreeKey::new_table_rowid(key as u64, Some(record)), true)); - } - state.pc += 1; - Ok(InsnFunctionStepResult::Step) -} - -pub fn op_insert_await( - program: &Program, - state: &mut ProgramState, - insn: &Insn, - pager: &Rc, - mv_store: Option<&Rc>, -) -> Result { - let Insn::InsertAwait { cursor_id } = insn else { - unreachable!("unexpected Insn {:?}", insn) - }; - { - let mut cursor = state.get_cursor(*cursor_id); - let cursor = cursor.as_btree_mut(); - cursor.wait_for_completion()?; // Only update last_insert_rowid for regular table inserts, not schema modifications if cursor.root_page() != 1 { if let Some(rowid) = cursor.rowid()? { @@ -3802,14 +3685,14 @@ pub fn op_insert_await( Ok(InsnFunctionStepResult::Step) } -pub fn op_delete_async( +pub fn op_delete( program: &Program, state: &mut ProgramState, insn: &Insn, pager: &Rc, mv_store: Option<&Rc>, ) -> Result { - let Insn::DeleteAsync { cursor_id } = insn else { + let Insn::Delete { cursor_id } = insn else { unreachable!("unexpected Insn {:?}", insn) }; { @@ -3817,19 +3700,21 @@ pub fn op_delete_async( let cursor = cursor.as_btree_mut(); return_if_io!(cursor.delete()); } + let prev_changes = program.n_change.get(); + program.n_change.set(prev_changes + 1); state.pc += 1; Ok(InsnFunctionStepResult::Step) } -pub fn op_idx_insert_async( +pub fn op_idx_insert( program: &Program, state: &mut ProgramState, insn: &Insn, pager: &Rc, mv_store: Option<&Rc>, ) -> Result { - dbg!("op_idx_insert_async"); - if let Insn::IdxInsertAsync { + dbg!("op_idx_insert_"); + if let Insn::IdxInsert { cursor_id, record_reg, flags, @@ -3871,55 +3756,16 @@ pub fn op_idx_insert_async( dbg!(moved_before); // Start insertion of row. This might trigger a balance procedure which will take care of moving to different pages, - // therefore, we don't want to seek again if that happens, meaning we don't want to return on io without moving to `Await` opcode + // therefore, we don't want to seek again if that happens, meaning we don't want to return on io without moving to the following opcode // because it could trigger a movement to child page after a balance root which will leave the current page as the root page. return_if_io!(cursor.insert(&BTreeKey::new_index_key(record), moved_before)); } - state.pc += 1; - } - Ok(InsnFunctionStepResult::Step) -} - -pub fn op_idx_insert_await( - program: &Program, - state: &mut ProgramState, - insn: &Insn, - pager: &Rc, - mv_store: Option<&Rc>, -) -> Result { - if let Insn::IdxInsertAwait { cursor_id } = *insn { - { - let mut cursor = state.get_cursor(cursor_id); - let cursor = cursor.as_btree_mut(); - cursor.wait_for_completion()?; - } // TODO: flag optimizations, update n_change if OPFLAG_NCHANGE state.pc += 1; } Ok(InsnFunctionStepResult::Step) } -pub fn op_delete_await( - program: &Program, - state: &mut ProgramState, - insn: &Insn, - pager: &Rc, - mv_store: Option<&Rc>, -) -> Result { - let Insn::DeleteAwait { cursor_id } = insn else { - unreachable!("unexpected Insn {:?}", insn) - }; - { - let mut cursor = state.get_cursor(*cursor_id); - let cursor = cursor.as_btree_mut(); - cursor.wait_for_completion()?; - } - let prev_changes = program.n_change.get(); - program.n_change.set(prev_changes + 1); - state.pc += 1; - Ok(InsnFunctionStepResult::Step) -} - pub fn op_new_rowid( program: &Program, state: &mut ProgramState, @@ -4072,14 +3918,14 @@ pub fn op_offset_limit( // this cursor may be reused for next insert // Update: tablemoveto is used to travers on not exists, on insert depending on flags if nonseek it traverses again. // If not there might be some optimizations obviously. -pub fn op_open_write_async( +pub fn op_open_write( program: &Program, state: &mut ProgramState, insn: &Insn, pager: &Rc, mv_store: Option<&Rc>, ) -> Result { - let Insn::OpenWriteAsync { + let Insn::OpenWrite { cursor_id, root_page, .. @@ -4093,7 +3939,7 @@ pub fn op_open_write_async( OwnedValue::Integer(val) => *val as u64, _ => { return Err(LimboError::InternalError( - "OpenWriteAsync: the value in root_page is not an integer".into(), + "OpenWrite: the value in root_page is not an integer".into(), )); } }, @@ -4128,20 +3974,6 @@ pub fn op_open_write_async( Ok(InsnFunctionStepResult::Step) } -pub fn op_open_write_await( - program: &Program, - state: &mut ProgramState, - insn: &Insn, - pager: &Rc, - mv_store: Option<&Rc>, -) -> Result { - let Insn::OpenWriteAwait {} = insn else { - unreachable!("unexpected Insn {:?}", insn) - }; - state.pc += 1; - Ok(InsnFunctionStepResult::Step) -} - pub fn op_copy( program: &Program, state: &mut ProgramState, @@ -4592,7 +4424,7 @@ pub fn op_open_ephemeral( panic!("OpenEphemeral on sorter cursor"); } CursorType::VirtualTable(_) => { - panic!("OpenEphemeral on virtual table cursor, use Insn::VOpenAsync instead"); + panic!("OpenEphemeral on virtual table cursor, use Insn::VOpen instead"); } } diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index d4a766d1d..51b298816 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -336,11 +336,11 @@ pub fn insn_to_str( 0, format!("if !r[{}] goto {}", reg, target_pc.to_debug_int()), ), - Insn::OpenReadAsync { + Insn::OpenRead { cursor_id, root_page, } => ( - "OpenReadAsync", + "OpenRead", *cursor_id as i32, *root_page as i32, 0, @@ -355,17 +355,8 @@ pub fn insn_to_str( root_page ), ), - Insn::OpenReadAwait => ( - "OpenReadAwait", - 0, - 0, - 0, - OwnedValue::build_text(""), - 0, - "".to_string(), - ), - Insn::VOpenAsync { cursor_id } => ( - "VOpenAsync", + Insn::VOpen { cursor_id } => ( + "VOpen", *cursor_id as i32, 0, 0, @@ -373,15 +364,6 @@ pub fn insn_to_str( 0, "".to_string(), ), - Insn::VOpenAwait => ( - "VOpenAwait", - 0, - 0, - 0, - OwnedValue::build_text(""), - 0, - "".to_string(), - ), Insn::VCreate { table_name, module_name, @@ -462,27 +444,18 @@ pub fn insn_to_str( 0, format!("{} columns in r[{}]", num_fields, content_reg), ), - Insn::RewindAsync { cursor_id } => ( - "RewindAsync", - *cursor_id as i32, - 0, - 0, - OwnedValue::build_text(""), - 0, - "".to_string(), - ), - Insn::RewindAwait { + Insn::Rewind { cursor_id, pc_if_empty, } => ( - "RewindAwait", + "Rewind", *cursor_id as i32, pc_if_empty.to_debug_int(), 0, OwnedValue::build_text(""), 0, format!( - "Rewind table {}", + "Rewind {}", program.cursor_ref[*cursor_id] .0 .as_ref() @@ -573,20 +546,11 @@ pub fn insn_to_str( format!("output=r[{}..{}]", start_reg, start_reg + count - 1) }, ), - Insn::NextAsync { cursor_id } => ( - "NextAsync", - *cursor_id as i32, - 0, - 0, - OwnedValue::build_text(""), - 0, - "".to_string(), - ), - Insn::NextAwait { + Insn::Next { cursor_id, pc_if_next, } => ( - "NextAwait", + "Next", *cursor_id as i32, pc_if_next.to_debug_int(), 0, @@ -795,14 +759,14 @@ pub fn insn_to_str( 0, "".to_string(), ), - Insn::IdxInsertAsync { + Insn::IdxInsert { cursor_id, record_reg, unpacked_start, flags, .. } => ( - "IdxInsertAsync", + "IdxInsert", *cursor_id as i32, *record_reg as i32, unpacked_start.unwrap_or(0) as i32, @@ -810,15 +774,6 @@ pub fn insn_to_str( flags.0 as u16, format!("key=r[{}]", record_reg), ), - Insn::IdxInsertAwait { cursor_id } => ( - "IdxInsertAwait", - *cursor_id as i32, - 0, - 0, - OwnedValue::build_text(""), - 0, - "".to_string(), - ), Insn::IdxGT { cursor_id, start_reg, @@ -1034,13 +989,13 @@ pub fn insn_to_str( 0, "".to_string(), ), - Insn::InsertAsync { + Insn::Insert { cursor, key_reg, record_reg, flag, } => ( - "InsertAsync", + "Insert", *cursor as i32, *record_reg as i32, *key_reg as i32, @@ -1048,26 +1003,8 @@ pub fn insn_to_str( *flag as u16, "".to_string(), ), - Insn::InsertAwait { cursor_id } => ( - "InsertAwait", - *cursor_id as i32, - 0, - 0, - OwnedValue::build_text(""), - 0, - "".to_string(), - ), - Insn::DeleteAsync { cursor_id } => ( - "DeleteAsync", - *cursor_id as i32, - 0, - 0, - OwnedValue::build_text(""), - 0, - "".to_string(), - ), - Insn::DeleteAwait { cursor_id } => ( - "DeleteAwait", + Insn::Delete { cursor_id } => ( + "Delete", *cursor_id as i32, 0, 0, @@ -1135,12 +1072,12 @@ pub fn insn_to_str( limit_reg, combined_reg, limit_reg, offset_reg, combined_reg ), ), - Insn::OpenWriteAsync { + Insn::OpenWrite { cursor_id, root_page, .. } => ( - "OpenWriteAsync", + "OpenWrite", *cursor_id as i32, match root_page { RegisterOrLiteral::Literal(i) => *i as _, @@ -1151,15 +1088,6 @@ pub fn insn_to_str( 0, "".to_string(), ), - Insn::OpenWriteAwait {} => ( - "OpenWriteAwait", - 0, - 0, - 0, - OwnedValue::build_text(""), - 0, - "".to_string(), - ), Insn::Copy { src_reg, dst_reg, @@ -1221,10 +1149,13 @@ pub fn insn_to_str( 0, "".to_string(), ), - Insn::LastAsync { cursor_id } => ( - "LastAsync", + Insn::Last { + cursor_id, + pc_if_empty, + } => ( + "Last", *cursor_id as i32, - 0, + pc_if_empty.to_debug_int(), 0, OwnedValue::build_text(""), 0, @@ -1248,28 +1179,13 @@ pub fn insn_to_str( 0, where_clause.clone(), ), - Insn::LastAwait { cursor_id, .. } => ( - "LastAwait", + Insn::Prev { + cursor_id, + pc_if_prev, + } => ( + "Prev", *cursor_id as i32, - 0, - 0, - OwnedValue::build_text(""), - 0, - "".to_string(), - ), - Insn::PrevAsync { cursor_id } => ( - "PrevAsync", - *cursor_id as i32, - 0, - 0, - OwnedValue::build_text(""), - 0, - "".to_string(), - ), - Insn::PrevAwait { cursor_id, .. } => ( - "PrevAwait", - *cursor_id as i32, - 0, + pc_if_prev.to_debug_int(), 0, OwnedValue::build_text(""), 0, diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index e12293a71..57c327b8c 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -257,22 +257,16 @@ pub enum Insn { jump_if_null: bool, }, /// Open a cursor for reading. - OpenReadAsync { + OpenRead { cursor_id: CursorID, root_page: PageIdx, }, - /// Await for the completion of open cursor. - OpenReadAwait, - /// Open a cursor for a virtual table. - VOpenAsync { + VOpen { cursor_id: CursorID, }, - /// Await for the completion of open cursor for a virtual table. - VOpenAwait, - /// Create a new virtual table. VCreate { module_name: usize, // P1: Name of the module that contains the virtual table implementation @@ -319,21 +313,12 @@ pub enum Insn { }, /// Rewind the cursor to the beginning of the B-Tree. - RewindAsync { - cursor_id: CursorID, - }, - - /// Await for the completion of cursor rewind. - RewindAwait { + Rewind { cursor_id: CursorID, pc_if_empty: BranchOffset, }, - LastAsync { - cursor_id: CursorID, - }, - - LastAwait { + Last { cursor_id: CursorID, pc_if_empty: BranchOffset, }, @@ -368,23 +353,14 @@ pub enum Insn { }, /// Advance the cursor to the next row. - NextAsync { - cursor_id: CursorID, - }, - - /// Await for the completion of cursor advance. - NextAwait { + Next { cursor_id: CursorID, pc_if_next: BranchOffset, }, - PrevAsync { + Prev { cursor_id: CursorID, - }, - - PrevAwait { - cursor_id: CursorID, - pc_if_next: BranchOffset, + pc_if_prev: BranchOffset, }, /// Halt the program. @@ -498,16 +474,13 @@ pub enum Insn { /// P3 + P4 are for the original column values that make up that key in unpacked (pre-serialized) form. /// If P5 has the OPFLAG_APPEND bit set, that is a hint to the b-tree layer that this insert is likely to be an append. /// OPFLAG_NCHANGE bit set, then the change counter is incremented by this instruction. If the OPFLAG_NCHANGE bit is clear, then the change counter is unchanged - IdxInsertAsync { + IdxInsert { cursor_id: CursorID, record_reg: usize, // P2 the register containing the record to insert unpacked_start: Option, // P3 the index of the first register for the unpacked key unpacked_count: Option, // P4 # of unpacked values in the key in P2 flags: IdxInsertFlags, // TODO: optimization }, - IdxInsertAwait { - cursor_id: CursorID, - }, /// The P4 register values beginning with P3 form an unpacked index key that omits the PRIMARY KEY. Compare this key value against the index that P1 is currently pointing to, ignoring the PRIMARY KEY or ROWID fields at the end. /// If the P1 index entry is greater or equal than the key value then jump to P2. Otherwise fall through to the next instruction. @@ -642,22 +615,14 @@ pub enum Insn { end_offset: BranchOffset, }, - InsertAsync { + Insert { cursor: CursorID, key_reg: usize, // Must be int. record_reg: usize, // Blob of record data. flag: usize, // Flags used by insert, for now not used. }, - InsertAwait { - cursor_id: usize, - }, - - DeleteAsync { - cursor_id: CursorID, - }, - - DeleteAwait { + Delete { cursor_id: CursorID, }, @@ -687,13 +652,11 @@ pub enum Insn { offset_reg: usize, }, - OpenWriteAsync { + OpenWrite { cursor_id: CursorID, root_page: RegisterOrLiteral, }, - OpenWriteAwait {}, - Copy { src_reg: usize, dst_reg: usize, @@ -847,28 +810,22 @@ impl Insn { Insn::Ge { .. } => execute::op_ge, Insn::If { .. } => execute::op_if, Insn::IfNot { .. } => execute::op_if_not, - Insn::OpenReadAsync { .. } => execute::op_open_read_async, - Insn::OpenReadAwait => execute::op_open_read_await, - Insn::VOpenAsync { .. } => execute::op_vopen_async, - Insn::VOpenAwait => execute::op_vopen_await, + Insn::OpenRead { .. } => execute::op_open_read, + Insn::VOpen { .. } => execute::op_vopen, Insn::VCreate { .. } => execute::op_vcreate, Insn::VFilter { .. } => execute::op_vfilter, Insn::VColumn { .. } => execute::op_vcolumn, Insn::VUpdate { .. } => execute::op_vupdate, Insn::VNext { .. } => execute::op_vnext, Insn::OpenPseudo { .. } => execute::op_open_pseudo, - Insn::RewindAsync { .. } => execute::op_rewind_async, - Insn::RewindAwait { .. } => execute::op_rewind_await, - Insn::LastAsync { .. } => execute::op_last_async, - Insn::LastAwait { .. } => execute::op_last_await, + Insn::Rewind { .. } => execute::op_rewind, + Insn::Last { .. } => execute::op_last, Insn::Column { .. } => execute::op_column, Insn::TypeCheck { .. } => execute::op_type_check, Insn::MakeRecord { .. } => execute::op_make_record, Insn::ResultRow { .. } => execute::op_result_row, - Insn::NextAsync { .. } => execute::op_next_async, - Insn::NextAwait { .. } => execute::op_next_await, - Insn::PrevAsync { .. } => execute::op_prev_async, - Insn::PrevAwait { .. } => execute::op_prev_await, + Insn::Next { .. } => execute::op_next, + Insn::Prev { .. } => execute::op_prev, Insn::Halt { .. } => execute::op_halt, Insn::Transaction { .. } => execute::op_transaction, Insn::AutoCommit { .. } => execute::op_auto_commit, @@ -904,19 +861,15 @@ impl Insn { Insn::InitCoroutine { .. } => execute::op_init_coroutine, Insn::EndCoroutine { .. } => execute::op_end_coroutine, Insn::Yield { .. } => execute::op_yield, - Insn::InsertAsync { .. } => execute::op_insert_async, - Insn::InsertAwait { .. } => execute::op_insert_await, - Insn::IdxInsertAsync { .. } => execute::op_idx_insert_async, - Insn::IdxInsertAwait { .. } => execute::op_idx_insert_await, - Insn::DeleteAsync { .. } => execute::op_delete_async, - Insn::DeleteAwait { .. } => execute::op_delete_await, + Insn::Insert { .. } => execute::op_insert, + Insn::IdxInsert { .. } => execute::op_idx_insert, + Insn::Delete { .. } => execute::op_delete, Insn::NewRowid { .. } => execute::op_new_rowid, Insn::MustBeInt { .. } => execute::op_must_be_int, Insn::SoftNull { .. } => execute::op_soft_null, Insn::NotExists { .. } => execute::op_not_exists, Insn::OffsetLimit { .. } => execute::op_offset_limit, - Insn::OpenWriteAsync { .. } => execute::op_open_write_async, - Insn::OpenWriteAwait { .. } => execute::op_open_write_await, + Insn::OpenWrite { .. } => execute::op_open_write, Insn::Copy { .. } => execute::op_copy, Insn::CreateBtree { .. } => execute::op_create_btree, Insn::Destroy { .. } => execute::op_destroy, diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index a95cc7fba..c9444dcc5 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -563,8 +563,8 @@ fn print_insn(program: &Program, addr: InsnReference, insn: &Insn, indent: Strin fn get_indent_count(indent_count: usize, curr_insn: &Insn, prev_insn: Option<&Insn>) -> usize { let indent_count = if let Some(insn) = prev_insn { match insn { - Insn::RewindAwait { .. } - | Insn::LastAwait { .. } + Insn::Rewind { .. } + | Insn::Last { .. } | Insn::SorterSort { .. } | Insn::SeekGE { .. } | Insn::SeekGT { .. } @@ -578,9 +578,7 @@ fn get_indent_count(indent_count: usize, curr_insn: &Insn, prev_insn: Option<&In }; match curr_insn { - Insn::NextAsync { .. } | Insn::SorterNext { .. } | Insn::PrevAsync { .. } => { - indent_count - 1 - } + Insn::Next { .. } | Insn::SorterNext { .. } | Insn::Prev { .. } => indent_count - 1, _ => indent_count, } } From ee660187dce581d2ac9ac2d3e65a6a5a33cb6562 Mon Sep 17 00:00:00 2001 From: TcMits Date: Mon, 14 Apr 2025 14:25:18 +0700 Subject: [PATCH 226/425] fix negative free space after balance-shallower --- core/storage/btree.rs | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 457115201..3e5b70b7f 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -2451,24 +2451,41 @@ impl BTreeCursor { rightmost_pointer, ); + let first_child_page = &pages_to_balance_new[0]; + let first_child_contents = first_child_page.get_contents(); + // TODO: vacuum support - if parent_is_root && parent_contents.cell_count() == 0 { + if parent_is_root + && parent_contents.cell_count() == 0 + + // this check to make sure we are not having negative free space + && parent_contents.offset + <= compute_free_space(first_child_contents, self.usable_space() as u16) + as usize + { // From SQLite: // The root page of the b-tree now contains no cells. The only sibling // page is the right-child of the parent. Copy the contents of the // child page into the parent, decreasing the overall height of the // b-tree structure by one. This is described as the "balance-shallower" + // sub-algorithm in some documentation. assert!(sibling_count_new == 1); let parent_offset = if parent_page.get().id == 1 { DATABASE_HEADER_SIZE } else { 0 }; - let child_page = &pages_to_balance_new[0]; - let child_contents = child_page.get_contents(); - let child_top = child_contents.cell_content_area() as usize; + + // From SQLite: + // It is critical that the child page be defragmented before being + // copied into the parent, because if the parent is page 1 then it will + // by smaller than the child due to the database header, and so + // all the free space needs to be up front. + defragment_page(first_child_contents, self.usable_space() as u16); + + let child_top = first_child_contents.cell_content_area() as usize; let parent_buf = parent_contents.as_ptr(); - let child_buf = child_contents.as_ptr(); + let child_buf = first_child_contents.as_ptr(); let content_size = self.usable_space() - child_top; // Copy cell contents @@ -2478,17 +2495,17 @@ impl BTreeCursor { // Copy header and pointer // NOTE: don't use .cell_pointer_array_offset_and_size() because of different // header size - let header_and_pointer_size = - child_contents.header_size() + child_contents.cell_pointer_array_size(); + let header_and_pointer_size = first_child_contents.header_size() + + first_child_contents.cell_pointer_array_size(); parent_buf[parent_offset..parent_offset + header_and_pointer_size] .copy_from_slice( - &child_buf[child_contents.offset - ..child_contents.offset + header_and_pointer_size], + &child_buf[first_child_contents.offset + ..first_child_contents.offset + header_and_pointer_size], ); self.stack.set_cell_index(0); // reset cell index, top is already parent self.pager - .free_page(Some(child_page.clone()), child_page.get().id)?; + .free_page(Some(first_child_page.clone()), first_child_page.get().id)?; } // We have to free pages that are not used anymore From f79da7194f12a1093b7e5bdb922d87cdcd4a3311 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Mon, 14 Apr 2025 11:02:15 +0300 Subject: [PATCH 227/425] implement Iterator for SmallVec and add const generic for array size --- core/storage/sqlite3_ondisk.rs | 84 ++++++++++++++++++++++++++++++++-- 1 file changed, 80 insertions(+), 4 deletions(-) diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index 10251ca51..5f742887e 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -1057,13 +1057,16 @@ pub fn validate_serial_type(value: u64) -> Result { } } -struct SmallVec { - pub data: [std::mem::MaybeUninit; 64], +pub struct SmallVec { + /// Stack allocated data + pub data: [std::mem::MaybeUninit; N], + /// Length of the vector, accounting for both stack and heap allocated data pub len: usize, + /// Extra data on heap pub extra_data: Option>, } -impl SmallVec { +impl SmallVec { pub fn new() -> Self { Self { data: unsafe { std::mem::MaybeUninit::uninit().assume_init() }, @@ -1084,6 +1087,50 @@ impl SmallVec { self.len += 1; } } + + fn get_from_heap(&self, index: usize) -> T { + assert!(self.extra_data.is_some()); + assert!(index >= self.data.len()); + let extra_data_index = index - self.data.len(); + let extra_data = self.extra_data.as_ref().unwrap(); + assert!(extra_data_index < extra_data.len()); + extra_data[extra_data_index] + } + + pub fn get(&self, index: usize) -> Option { + if index >= self.len { + return None; + } + let data_is_on_stack = index < self.data.len(); + if data_is_on_stack { + // SAFETY: We know this index is initialized we checked for index < self.len earlier above. + unsafe { Some(self.data[index].assume_init()) } + } else { + Some(self.get_from_heap(index)) + } + } +} + +impl SmallVec { + pub fn iter(&self) -> SmallVecIter<'_, T, N> { + SmallVecIter { vec: self, pos: 0 } + } +} + +pub struct SmallVecIter<'a, T, const N: usize> { + vec: &'a SmallVec, + pos: usize, +} + +impl<'a, T: Default + Copy, const N: usize> Iterator for SmallVecIter<'a, T, N> { + type Item = T; + + fn next(&mut self) -> Option { + self.vec.get(self.pos).map(|item| { + self.pos += 1; + item + }) + } } pub fn read_record(payload: &[u8], reuse_immutable: &mut ImmutableRecord) -> Result<()> { @@ -1099,7 +1146,7 @@ pub fn read_record(payload: &[u8], reuse_immutable: &mut ImmutableRecord) -> Res let mut header_size = (header_size as usize) - nr; pos += nr; - let mut serial_types = SmallVec::new(); + let mut serial_types = SmallVec::::new(); while header_size > 0 { let (serial_type, nr) = read_varint(&reuse_immutable.get_payload()[pos..])?; let serial_type = validate_serial_type(serial_type)?; @@ -1685,4 +1732,33 @@ mod tests { let result = validate_serial_type(10); assert!(result.is_err()); } + + #[test] + fn test_smallvec_iter() { + let mut small_vec = SmallVec::::new(); + (0..8).for_each(|i| small_vec.push(i)); + + let mut iter = small_vec.iter(); + assert_eq!(iter.next(), Some(0)); + assert_eq!(iter.next(), Some(1)); + assert_eq!(iter.next(), Some(2)); + assert_eq!(iter.next(), Some(3)); + assert_eq!(iter.next(), Some(4)); + assert_eq!(iter.next(), Some(5)); + assert_eq!(iter.next(), Some(6)); + assert_eq!(iter.next(), Some(7)); + assert_eq!(iter.next(), None); + } + + #[test] + fn test_smallvec_get() { + let mut small_vec = SmallVec::::new(); + (0..8).for_each(|i| small_vec.push(i)); + + (0..8).for_each(|i| { + assert_eq!(small_vec.get(i), Some(i as i32)); + }); + + assert_eq!(small_vec.get(8), None); + } } From 9dadc58194a4025132709623d3536dd66988000d Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Mon, 14 Apr 2025 10:33:42 +0300 Subject: [PATCH 228/425] Add support for Insn::Once --- COMPAT.md | 2 +- core/vdbe/execute.rs | 28 ++++++++++++++++++++++++++++ core/vdbe/explain.rs | 11 +++++++++++ core/vdbe/insn.rs | 5 +++++ core/vdbe/mod.rs | 4 ++++ 5 files changed, 49 insertions(+), 1 deletion(-) diff --git a/COMPAT.md b/COMPAT.md index 2300a6f07..14199f709 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -501,7 +501,7 @@ Modifiers: | NotNull | Yes | | | Null | Yes | | | NullRow | Yes | | -| Once | No | | +| Once | Yes | | | OpenAutoindex | No | | | OpenEphemeral | No | | | OpenPseudo | Yes | | diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index be7caa078..41ae9bbe6 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -4432,6 +4432,34 @@ pub fn op_open_ephemeral( Ok(InsnFunctionStepResult::Step) } +/// Execute the [Insn::Once] instruction. +/// +/// This instruction is used to execute a block of code only once. +/// If the instruction is executed again, it will jump to the target program counter. +pub fn op_once( + program: &Program, + state: &mut ProgramState, + insn: &Insn, + pager: &Rc, + mv_store: Option<&Rc>, +) -> Result { + let Insn::Once { + target_pc_when_reentered, + } = insn + else { + unreachable!("unexpected Insn: {:?}", insn) + }; + assert!(target_pc_when_reentered.is_offset()); + let offset = state.pc; + if state.once.iter().any(|o| o == offset) { + state.pc = target_pc_when_reentered.to_offset_int(); + return Ok(InsnFunctionStepResult::Step); + } + state.once.push(offset); + state.pc += 1; + Ok(InsnFunctionStepResult::Step) +} + fn exec_lower(reg: &OwnedValue) -> Option { match reg { OwnedValue::Text(t) => Some(OwnedValue::build_text(&t.as_str().to_lowercase())), diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index 51b298816..2d7050004 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -1321,6 +1321,17 @@ pub fn insn_to_str( if *is_table { "true" } else { "false" } ), ), + Insn::Once { + target_pc_when_reentered, + } => ( + "Once", + target_pc_when_reentered.to_debug_int(), + 0, + 0, + OwnedValue::build_text(""), + 0, + format!("goto {}", target_pc_when_reentered.to_debug_int()), + ), }; format!( "{:<4} {:<17} {:<4} {:<4} {:<4} {:<13} {:<2} {}", diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 57c327b8c..f9a8b5e76 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -780,6 +780,10 @@ pub enum Insn { cursor_id: usize, is_table: bool, }, + /// Fall through to the next instruction on the first invocation, otherwise jump to target_pc + Once { + target_pc_when_reentered: BranchOffset, + }, } impl Insn { @@ -889,6 +893,7 @@ impl Insn { Insn::PageCount { .. } => execute::op_page_count, Insn::ReadCookie { .. } => execute::op_read_cookie, Insn::OpenEphemeral { .. } => execute::op_open_ephemeral, + Insn::Once { .. } => execute::op_once, } } } diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index c9444dcc5..1d1ad0b77 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -28,6 +28,7 @@ use crate::{ error::LimboError, fast_lock::SpinLock, function::{AggFunc, FuncCtx}, + storage::sqlite3_ondisk::SmallVec, }; use crate::{ @@ -232,6 +233,8 @@ pub struct ProgramState { last_compare: Option, deferred_seek: Option<(CursorID, CursorID)>, ended_coroutine: Bitfield<4>, // flag to indicate that a coroutine has ended (key is the yield register. currently we assume that the yield register is always between 0-255, YOLO) + /// Indicate whether an [Insn::Once] instruction at a given program counter position has already been executed, well, once. + once: SmallVec, regex_cache: RegexCache, pub(crate) mv_tx_id: Option, interrupted: bool, @@ -254,6 +257,7 @@ impl ProgramState { last_compare: None, deferred_seek: None, ended_coroutine: Bitfield::new(), + once: SmallVec::::new(), regex_cache: RegexCache::new(), mv_tx_id: None, interrupted: false, From 23f8fffe1283a3a47bab0b602f36371832366b31 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Mon, 14 Apr 2025 10:39:32 +0300 Subject: [PATCH 229/425] Add Insn::OpenAutoindex, which is just an alias for OpenEphemeral --- COMPAT.md | 4 ++-- core/vdbe/execute.rs | 21 +++++++++++---------- core/vdbe/explain.rs | 9 +++++++++ core/vdbe/insn.rs | 6 +++++- 4 files changed, 27 insertions(+), 13 deletions(-) diff --git a/COMPAT.md b/COMPAT.md index 14199f709..11024a582 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -502,8 +502,8 @@ Modifiers: | Null | Yes | | | NullRow | Yes | | | Once | Yes | | -| OpenAutoindex | No | | -| OpenEphemeral | No | | +| OpenAutoindex | Yes | | +| OpenEphemeral | Yes | | | OpenPseudo | Yes | | | OpenRead | Yes | | | OpenWrite | Yes | | diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 41ae9bbe6..ec91c24fb 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -4350,12 +4350,13 @@ pub fn op_open_ephemeral( pager: &Rc, mv_store: Option<&Rc>, ) -> Result { - let Insn::OpenEphemeral { - cursor_id, - is_table, - } = insn - else { - unreachable!("unexpected Insn {:?}", insn) + let (cursor_id, is_table) = match insn { + Insn::OpenEphemeral { + cursor_id, + is_table, + } => (*cursor_id, *is_table), + Insn::OpenAutoindex { cursor_id } => (*cursor_id, false), + _ => unreachable!("unexpected Insn {:?}", insn), }; let conn = program.connection.upgrade().unwrap(); @@ -4378,7 +4379,7 @@ pub fn op_open_ephemeral( buffer_pool, )?); - let flag = if *is_table { + let flag = if is_table { &CreateBTreeFlags::new_table() } else { &CreateBTreeFlags::new_index() @@ -4386,7 +4387,7 @@ pub fn op_open_ephemeral( let root_page = pager.btree_create(flag); - let (_, cursor_type) = program.cursor_ref.get(*cursor_id).unwrap(); + let (_, cursor_type) = program.cursor_ref.get(cursor_id).unwrap(); let mv_cursor = match state.mv_tx_id { Some(tx_id) => { let table_id = root_page as u64; @@ -4407,13 +4408,13 @@ pub fn op_open_ephemeral( match cursor_type { CursorType::BTreeTable(_) => { cursors - .get_mut(*cursor_id) + .get_mut(cursor_id) .unwrap() .replace(Cursor::new_btree(cursor)); } CursorType::BTreeIndex(_) => { cursors - .get_mut(*cursor_id) + .get_mut(cursor_id) .unwrap() .replace(Cursor::new_btree(cursor)); } diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index 2d7050004..42400978f 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -1321,6 +1321,15 @@ pub fn insn_to_str( if *is_table { "true" } else { "false" } ), ), + Insn::OpenAutoindex { cursor_id } => ( + "OpenAutoindex", + *cursor_id as i32, + 0, + 0, + OwnedValue::build_text(""), + 0, + format!("cursor={}", cursor_id), + ), Insn::Once { target_pc_when_reentered, } => ( diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index f9a8b5e76..8c94b02a4 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -780,6 +780,10 @@ pub enum Insn { cursor_id: usize, is_table: bool, }, + /// Works the same as OpenEphemeral, name just distinguishes its use; used for transient indexes in joins. + OpenAutoindex { + cursor_id: usize, + }, /// Fall through to the next instruction on the first invocation, otherwise jump to target_pc Once { target_pc_when_reentered: BranchOffset, @@ -892,7 +896,7 @@ impl Insn { Insn::Noop => execute::op_noop, Insn::PageCount { .. } => execute::op_page_count, Insn::ReadCookie { .. } => execute::op_read_cookie, - Insn::OpenEphemeral { .. } => execute::op_open_ephemeral, + Insn::OpenEphemeral { .. } | Insn::OpenAutoindex { .. } => execute::op_open_ephemeral, Insn::Once { .. } => execute::op_once, } } From 651c00b4a88376153cceb56d36ccc5281635e38e Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Mon, 14 Apr 2025 11:02:17 +0200 Subject: [PATCH 230/425] allow index entry delete --- core/storage/btree.rs | 116 +++++++++++++++++++++++++++--------------- core/storage/pager.rs | 7 +++ 2 files changed, 83 insertions(+), 40 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 3af102854..f4dda7def 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -143,6 +143,12 @@ struct DestroyInfo { state: DestroyState, } +#[derive(Debug, Clone)] +enum DeleteSavepoint { + Rowid(u64), + Payload(ImmutableRecord), +} + #[derive(Debug, Clone)] enum DeleteState { Start, @@ -162,13 +168,13 @@ enum DeleteState { }, CheckNeedsBalancing, StartBalancing { - target_rowid: u64, + target_key: DeleteSavepoint, }, WaitForBalancingToComplete { - target_rowid: u64, + target_key: DeleteSavepoint, }, SeekAfterBalancing { - target_rowid: u64, + target_key: DeleteSavepoint, }, StackRetreat, Finish, @@ -1157,7 +1163,6 @@ impl BTreeCursor { pub fn move_to(&mut self, key: SeekKey<'_>, cmp: SeekOp) -> Result> { assert!(self.mv_cursor.is_none()); tracing::trace!("move_to(key={:?} cmp={:?})", key, cmp); - tracing::trace!("backtrace: {}", std::backtrace::Backtrace::force_capture()); // For a table with N rows, we can find any row by row id in O(log(N)) time by starting at the root page and following the B-tree pointers. // B-trees consist of interior pages and leaf pages. Interior pages contain pointers to other pages, while leaf pages contain the actual row data. // @@ -3062,13 +3067,24 @@ impl BTreeCursor { match delete_state { DeleteState::Start => { - let _target_rowid = match self.rowid.get() { - Some(rowid) => rowid, - None => { + let page = self.stack.top(); + if matches!( + page.get_contents().page_type(), + PageType::TableLeaf | PageType::TableInterior + ) { + let _target_rowid = match self.rowid.get() { + Some(rowid) => rowid, + None => { + self.state = CursorState::None; + return Ok(CursorResult::Ok(())); + } + }; + } else { + if self.reusable_immutable_record.borrow().is_none() { self.state = CursorState::None; return Ok(CursorResult::Ok(())); } - }; + } let delete_info = self.state.mut_delete_info().unwrap(); delete_info.state = DeleteState::LoadPage; @@ -3111,6 +3127,7 @@ impl BTreeCursor { let original_child_pointer = match &cell { BTreeCell::TableInteriorCell(interior) => Some(interior._left_child_page), + BTreeCell::IndexInteriorCell(interior) => Some(interior.left_child_page), _ => None, }; @@ -3157,27 +3174,21 @@ impl BTreeCursor { return_if_io!(self.prev()); let leaf_page = self.stack.top(); - return_if_locked!(leaf_page); + return_if_locked_maybe_load!(self.pager, leaf_page); + assert!( + matches!( + leaf_page.get_contents().page_type(), + PageType::TableLeaf | PageType::IndexLeaf + ), + "self.prev should have returned a leaf page" + ); - if !leaf_page.is_loaded() { - self.pager.load_page(leaf_page.clone())?; - return Ok(CursorResult::IO); - } - - let parent_page = { - self.stack.pop(); - let parent = self.stack.top(); - self.stack.push(leaf_page.clone()); - parent - }; - - if !parent_page.is_loaded() { - self.pager.load_page(parent_page.clone())?; - return Ok(CursorResult::IO); - } + let parent_page = self.stack.parent_page().unwrap(); + assert!(parent_page.is_loaded(), "parent page"); let leaf_contents = leaf_page.get().contents.as_ref().unwrap(); - let leaf_cell_idx = self.stack.current_cell_index() as usize - 1; + // The index of the cell to removed must be the last one. + let leaf_cell_idx = leaf_contents.cell_count() - 1; let predecessor_cell = leaf_contents.cell_get( leaf_cell_idx, payload_overflow_threshold_max( @@ -3196,14 +3207,17 @@ impl BTreeCursor { let parent_contents = parent_page.get().contents.as_mut().unwrap(); - // Create an interior cell from the leaf cell + // Create an interior cell from a predecessor let mut cell_payload: Vec = Vec::new(); + let child_pointer = original_child_pointer.expect("there should be a pointer"); match predecessor_cell { BTreeCell::TableLeafCell(leaf_cell) => { - if let Some(child_pointer) = original_child_pointer { - cell_payload.extend_from_slice(&child_pointer.to_be_bytes()); - write_varint_to_vec(leaf_cell._rowid, &mut cell_payload); - } + cell_payload.extend_from_slice(&child_pointer.to_be_bytes()); + write_varint_to_vec(leaf_cell._rowid, &mut cell_payload); + } + BTreeCell::IndexLeafCell(leaf_cell) => { + cell_payload.extend_from_slice(&child_pointer.to_be_bytes()); + cell_payload.extend_from_slice(leaf_cell.payload); } _ => unreachable!("Expected table leaf cell"), } @@ -3252,17 +3266,21 @@ impl BTreeCursor { let free_space = compute_free_space(contents, self.usable_space() as u16); let needs_balancing = free_space as usize * 3 > self.usable_space() * 2; - let target_rowid = self.rowid.get().unwrap(); + let target_key = if page.is_index() { + DeleteSavepoint::Payload(self.record().as_ref().unwrap().clone()) + } else { + DeleteSavepoint::Rowid(self.rowid.get().unwrap()) + }; let delete_info = self.state.mut_delete_info().unwrap(); if needs_balancing { - delete_info.state = DeleteState::StartBalancing { target_rowid }; + delete_info.state = DeleteState::StartBalancing { target_key }; } else { delete_info.state = DeleteState::StackRetreat; } } - DeleteState::StartBalancing { target_rowid } => { + DeleteState::StartBalancing { target_key } => { let delete_info = self.state.mut_delete_info().unwrap(); if delete_info.balance_write_info.is_none() { @@ -3271,10 +3289,10 @@ impl BTreeCursor { delete_info.balance_write_info = Some(write_info); } - delete_info.state = DeleteState::WaitForBalancingToComplete { target_rowid } + delete_info.state = DeleteState::WaitForBalancingToComplete { target_key } } - DeleteState::WaitForBalancingToComplete { target_rowid } => { + DeleteState::WaitForBalancingToComplete { target_key } => { let delete_info = self.state.mut_delete_info().unwrap(); // Switch the CursorState to Write state for balancing @@ -3292,7 +3310,7 @@ impl BTreeCursor { // Move to seek state self.state = CursorState::Delete(DeleteInfo { - state: DeleteState::SeekAfterBalancing { target_rowid }, + state: DeleteState::SeekAfterBalancing { target_key }, balance_write_info: Some(write_info), }); } @@ -3305,7 +3323,7 @@ impl BTreeCursor { }; self.state = CursorState::Delete(DeleteInfo { - state: DeleteState::WaitForBalancingToComplete { target_rowid }, + state: DeleteState::WaitForBalancingToComplete { target_key }, balance_write_info: Some(write_info), }); return Ok(CursorResult::IO); @@ -3313,8 +3331,14 @@ impl BTreeCursor { } } - DeleteState::SeekAfterBalancing { target_rowid } => { - return_if_io!(self.move_to(SeekKey::TableRowId(target_rowid), SeekOp::EQ)); + DeleteState::SeekAfterBalancing { target_key } => { + let key = match &target_key { + DeleteSavepoint::Rowid(rowid) => SeekKey::TableRowId(*rowid), + DeleteSavepoint::Payload(immutable_record) => { + SeekKey::IndexKey(immutable_record) + } + }; + return_if_io!(self.move_to(key, SeekOp::EQ)); let delete_info = self.state.mut_delete_info().unwrap(); delete_info.state = DeleteState::Finish; @@ -3881,6 +3905,18 @@ impl PageStack { fn clear(&self) { self.current_page.set(-1); } + pub fn parent_page(&self) -> Option { + if self.current_page.get() > 0 { + Some( + self.stack.borrow()[self.current() - 1] + .as_ref() + .unwrap() + .clone(), + ) + } else { + None + } + } } impl CellArray { diff --git a/core/storage/pager.rs b/core/storage/pager.rs index 9d7affa95..51cafbfac 100644 --- a/core/storage/pager.rs +++ b/core/storage/pager.rs @@ -123,6 +123,13 @@ impl Page { tracing::debug!("clear loaded {}", self.get().id); self.get().flags.fetch_and(!PAGE_LOADED, Ordering::SeqCst); } + + pub fn is_index(&self) -> bool { + match self.get_contents().page_type() { + PageType::IndexLeaf | PageType::IndexInterior => true, + PageType::TableLeaf | PageType::TableInterior => false, + } + } } #[derive(Clone, Copy, Debug)] From 18d779924bcf78b1fcc19e2374ead46e65313114 Mon Sep 17 00:00:00 2001 From: TcMits Date: Mon, 14 Apr 2025 18:13:47 +0700 Subject: [PATCH 231/425] post validation should be after balance-shallower --- core/storage/btree.rs | 132 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 113 insertions(+), 19 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 3e5b70b7f..b9c2d3bae 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -2438,23 +2438,9 @@ impl BTreeCursor { } } - #[cfg(debug_assertions)] - self.post_balance_non_root_validation( - &parent_page, - balance_info, - parent_contents, - &pages_to_balance_new, - page_type, - leaf_data, - cells_debug, - sibling_count_new, - rightmost_pointer, - ); - + // TODO: vacuum support let first_child_page = &pages_to_balance_new[0]; let first_child_contents = first_child_page.get_contents(); - - // TODO: vacuum support if parent_is_root && parent_contents.cell_count() == 0 @@ -2504,10 +2490,23 @@ impl BTreeCursor { ); self.stack.set_cell_index(0); // reset cell index, top is already parent - self.pager - .free_page(Some(first_child_page.clone()), first_child_page.get().id)?; + sibling_count_new -= 1; // decrease sibling count for debugging and free at the end + assert!(sibling_count_new < balance_info.sibling_count); } + #[cfg(debug_assertions)] + self.post_balance_non_root_validation( + &parent_page, + balance_info, + parent_contents, + pages_to_balance_new, + page_type, + leaf_data, + cells_debug, + sibling_count_new, + rightmost_pointer, + ); + // We have to free pages that are not used anymore for i in sibling_count_new..balance_info.sibling_count { let page = &balance_info.pages_to_balance[i]; @@ -2579,7 +2578,7 @@ impl BTreeCursor { parent_page: &PageRef, balance_info: &mut BalanceInfo, parent_contents: &mut PageContent, - pages_to_balance_new: &Vec>, + pages_to_balance_new: Vec>, page_type: PageType, leaf_data: bool, mut cells_debug: Vec>, @@ -2717,7 +2716,102 @@ impl BTreeCursor { // Now check divider cells and their pointers. let parent_buf = parent_contents.as_ptr(); let cell_divider_idx = balance_info.first_divider_cell + page_idx; - if page_idx == sibling_count_new - 1 { + if sibling_count_new == 0 { + // Balance-shallower case + // We need to check data in parent page + let rightmost = read_u32(rightmost_pointer, 0); + debug_validate_cells!(parent_contents, self.usable_space() as u16); + + if pages_to_balance_new.len() != 1 { + tracing::error!("balance_non_root(balance_shallower_incorrect_pages_to_balance_new_len, pages_to_balance_new={})", + pages_to_balance_new.len() + ); + valid = false; + } + + if current_index_cell != cells_debug.len() + || cells_debug.len() != contents.cell_count() + || contents.cell_count() != parent_contents.cell_count() + { + tracing::error!("balance_non_root(balance_shallower_incorrect_cell_count, current_index_cell={}, cells_debug={}, cell_count={}, parent_cell_count={})", + current_index_cell, + cells_debug.len(), + contents.cell_count(), + parent_contents.cell_count() + ); + valid = false; + } + + if rightmost == page.get().id as u32 || rightmost == parent_page.get().id as u32 { + tracing::error!("balance_non_root(balance_shallower_rightmost_pointer, page_id={}, parent_page_id={}, rightmost={})", + page.get().id, + parent_page.get().id, + rightmost, + ); + valid = false; + } + + if let Some(rm) = contents.rightmost_pointer() { + if rm != rightmost { + tracing::error!("balance_non_root(balance_shallower_rightmost_pointer, page_rightmost={}, rightmost={})", + rm, + rightmost, + ); + valid = false; + } + } + + if parent_contents.page_type() != page_type { + tracing::error!("balance_non_root(balance_shallower_parent_page_type, page_type={:?}, parent_page_type={:?})", + page_type, + parent_contents.page_type() + ); + valid = false + } + + for parent_cell_idx in 0..contents.cell_count() { + let (parent_cell_start, parent_cell_len) = parent_contents.cell_get_raw_region( + parent_cell_idx, + payload_overflow_threshold_max( + parent_contents.page_type(), + self.usable_space() as u16, + ), + payload_overflow_threshold_min( + parent_contents.page_type(), + self.usable_space() as u16, + ), + self.usable_space(), + ); + + let (cell_start, cell_len) = contents.cell_get_raw_region( + parent_cell_idx, + payload_overflow_threshold_max( + contents.page_type(), + self.usable_space() as u16, + ), + payload_overflow_threshold_min( + contents.page_type(), + self.usable_space() as u16, + ), + self.usable_space(), + ); + + let buf = contents.as_ptr(); + let cell_buf = to_static_buf(&mut buf[cell_start..cell_start + cell_len]); + let parent_cell_buf = to_static_buf( + &mut parent_buf[parent_cell_start..parent_cell_start + parent_cell_len], + ); + let cell_buf_in_array = &cells_debug[parent_cell_idx]; + + if cell_buf != cell_buf_in_array || cell_buf != parent_cell_buf { + tracing::error!("balance_non_root(balance_shallower_cell_not_found_debug, page_id={}, cell_in_cell_array_idx={})", + page.get().id, + parent_cell_idx, + ); + valid = false; + } + } + } else if page_idx == sibling_count_new - 1 { // We will only validate rightmost pointer of parent page, we will not validate rightmost if it's a cell and not the last pointer because, // insert cell could've defragmented the page and invalidated the pointer. // right pointer, we just check right pointer points to this page. From 96112f8e54c4d6df858fa91a48b1ca22f61de24b Mon Sep 17 00:00:00 2001 From: TcMits Date: Mon, 14 Apr 2025 18:30:32 +0700 Subject: [PATCH 232/425] missing check for balance-shallower --- core/storage/btree.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index b9c2d3bae..bb3d659ba 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -2761,6 +2761,16 @@ impl BTreeCursor { } } + if let Some(rm) = parent_contents.rightmost_pointer() { + if rm != rightmost { + tracing::error!("balance_non_root(balance_shallower_rightmost_pointer, parent_rightmost={}, rightmost={})", + rm, + rightmost, + ); + valid = false; + } + } + if parent_contents.page_type() != page_type { tracing::error!("balance_non_root(balance_shallower_parent_page_type, page_type={:?}, parent_page_type={:?})", page_type, From d961baf5ac77da6d4d9a116b15570dfe7f026ff8 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Mon, 14 Apr 2025 15:18:21 +0300 Subject: [PATCH 233/425] btree: move PageStack struct declaration next to impl --- core/storage/btree.rs | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 3af102854..211f9ab68 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -366,24 +366,6 @@ pub struct BTreeCursor { empty_record: Cell, } -/// Stack of pages representing the tree traversal order. -/// current_page represents the current page being used in the tree and current_page - 1 would be -/// the parent. Using current_page + 1 or higher is undefined behaviour. -struct PageStack { - /// Pointer to the current page being consumed - current_page: Cell, - /// List of pages in the stack. Root page will be in index 0 - stack: RefCell<[Option; BTCURSOR_MAX_DEPTH + 1]>, - /// List of cell indices in the stack. - /// cell_indices[current_page] is the current cell index being consumed. Similarly - /// cell_indices[current_page-1] is the cell index of the parent of the current page - /// that we save in case of going back up. - /// There are two points that need special attention: - /// If cell_indices[current_page] = -1, it indicates that the current iteration has reached the start of the current_page - /// If cell_indices[current_page] = `cell_count`, it means that the current iteration has reached the end of the current_page - cell_indices: RefCell<[i32; BTCURSOR_MAX_DEPTH + 1]>, -} - struct CellArray { cells: Vec<&'static mut [u8]>, // TODO(pere): make this with references @@ -3762,6 +3744,24 @@ fn validate_cells_after_insertion(cell_array: &CellArray, leaf_data: bool) { } } +/// Stack of pages representing the tree traversal order. +/// current_page represents the current page being used in the tree and current_page - 1 would be +/// the parent. Using current_page + 1 or higher is undefined behaviour. +struct PageStack { + /// Pointer to the current page being consumed + current_page: Cell, + /// List of pages in the stack. Root page will be in index 0 + stack: RefCell<[Option; BTCURSOR_MAX_DEPTH + 1]>, + /// List of cell indices in the stack. + /// cell_indices[current_page] is the current cell index being consumed. Similarly + /// cell_indices[current_page-1] is the cell index of the parent of the current page + /// that we save in case of going back up. + /// There are two points that need special attention: + /// If cell_indices[current_page] = -1, it indicates that the current iteration has reached the start of the current_page + /// If cell_indices[current_page] = `cell_count`, it means that the current iteration has reached the end of the current_page + cell_indices: RefCell<[i32; BTCURSOR_MAX_DEPTH + 1]>, +} + impl PageStack { fn increment_current(&self) { self.current_page.set(self.current_page.get() + 1); From 930f1d79b46069a60a74e6c8e0aae8f2b5b83b4f Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Mon, 14 Apr 2025 15:19:56 +0300 Subject: [PATCH 234/425] btree: move CellArray struct declaration next to impl --- core/storage/btree.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 211f9ab68..043e52e57 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -366,12 +366,6 @@ pub struct BTreeCursor { empty_record: Cell, } -struct CellArray { - cells: Vec<&'static mut [u8]>, // TODO(pere): make this with references - - number_of_cells_per_page: Vec, // number of cells in each page -} - impl BTreeCursor { pub fn new( mv_cursor: Option>>, @@ -3883,6 +3877,12 @@ impl PageStack { } } +struct CellArray { + cells: Vec<&'static mut [u8]>, // TODO(pere): make this with references + + number_of_cells_per_page: Vec, // number of cells in each page +} + impl CellArray { pub fn cell_size(&self, cell_idx: usize) -> u16 { self.cells[cell_idx].len() as u16 From bf26e6246577e09c1fcc8806d17428aeeed517c4 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Mon, 14 Apr 2025 15:21:07 +0300 Subject: [PATCH 235/425] btree: add doc comment about CellArray struct --- core/storage/btree.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 043e52e57..7e2b2b566 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -3877,6 +3877,7 @@ impl PageStack { } } +/// Used for redistributing cells during a balance operation. struct CellArray { cells: Vec<&'static mut [u8]>, // TODO(pere): make this with references From 5628cc27a6b3d2a07aada507d62819f6c443ba3a Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Mon, 14 Apr 2025 15:25:15 +0300 Subject: [PATCH 236/425] btree: move allocate_overflow_page to Pager impl --- core/storage/btree.rs | 16 +--------------- core/storage/pager.rs | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 7e2b2b566..6869cf298 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -4567,7 +4567,7 @@ fn fill_cell_payload( } // we still have bytes to add, we will need to allocate new overflow page - let overflow_page = allocate_overflow_page(pager.clone()); + let overflow_page = pager.allocate_overflow_page(); overflow_pages.push(overflow_page.clone()); { let id = overflow_page.get().id as u32; @@ -4590,20 +4590,6 @@ fn fill_cell_payload( assert_eq!(cell_size, cell_payload.len()); } -/// Allocate a new overflow page. -/// This is done when a cell overflows and new space is needed. -fn allocate_overflow_page(pager: Rc) -> PageRef { - let page = pager.allocate_page().unwrap(); - tracing::debug!("allocate_overflow_page(id={})", page.get().id); - - // setup overflow page - let contents = page.get().contents.as_mut().unwrap(); - let buf = contents.as_ptr(); - buf.fill(0); - - page -} - /// Returns the maximum payload size (X) that can be stored directly on a b-tree page without spilling to overflow pages. /// /// For table leaf pages: X = usable_size - 35 diff --git a/core/storage/pager.rs b/core/storage/pager.rs index 9d7affa95..47dc5451c 100644 --- a/core/storage/pager.rs +++ b/core/storage/pager.rs @@ -217,6 +217,20 @@ impl Pager { id as u32 } + /// Allocate a new overflow page. + /// This is done when a cell overflows and new space is needed. + pub fn allocate_overflow_page(&self) -> PageRef { + let page = self.allocate_page().unwrap(); + tracing::debug!("Pager::allocate_overflow_page(id={})", page.get().id); + + // setup overflow page + let contents = page.get().contents.as_mut().unwrap(); + let buf = contents.as_ptr(); + buf.fill(0); + + page + } + /// Allocate a new page to the btree via the pager. /// This marks the page as dirty and writes the page header. pub fn do_allocate_page(&self, page_type: PageType, offset: usize) -> PageRef { From 04cb09be2a059cfe609b0ea4d49abf58d59d1c01 Mon Sep 17 00:00:00 2001 From: meteorgan Date: Mon, 14 Apr 2025 20:57:54 +0800 Subject: [PATCH 237/425] Bump julian_day_converter to 0.4.5 --- Cargo.lock | 4 ++-- core/Cargo.toml | 2 +- testing/scalar-functions-datetime.test | 4 ++++ 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 410bf6e34..bbd4f15ce 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1540,9 +1540,9 @@ dependencies = [ [[package]] name = "julian_day_converter" -version = "0.4.4" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1aa5652b85ab018289638c6b924db618da9edd2ddfff7fa0ec38a8b51a9192d3" +checksum = "f2987f71b89b85c812c8484cbf0c5d7912589e77bfdc66fd3e52f760e7859f16" dependencies = [ "chrono", ] diff --git a/core/Cargo.toml b/core/Cargo.toml index a790a0ca3..eb5d092b0 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -53,7 +53,7 @@ regex-syntax = { version = "0.8.5", default-features = false, features = [ "unicode", ] } chrono = { version = "0.4.38", default-features = false, features = ["clock"] } -julian_day_converter = "0.4.4" +julian_day_converter = "0.4.5" rand = "0.8.5" libm = "0.2" limbo_macros = { workspace = true } diff --git a/testing/scalar-functions-datetime.test b/testing/scalar-functions-datetime.test index 3c2f7b771..33caf52c2 100755 --- a/testing/scalar-functions-datetime.test +++ b/testing/scalar-functions-datetime.test @@ -597,6 +597,10 @@ foreach i $FMT { do_execsql_test strftime-invalid-$i "SELECT strftime('$i','2025-01-23T13:14:30.567');" {} } +do_execsql_test strftime-julianday { + SELECT strftime('%Y-%m-%d %H:%M:%fZ', 2459717.08070103); +} {"2022-05-17 13:56:12.569Z"} + # Tests for the TIMEDIFF function From e1ddf5ffcc83e2af28b33f543312a75af43aa94e Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Sun, 13 Apr 2025 19:17:34 -0300 Subject: [PATCH 238/425] Fix Unary Negate Operation on Blobs --- core/translate/expr.rs | 4 ++-- core/vdbe/execute.rs | 9 ++++++++- testing/math.test | 12 ++++++++++++ 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index e5404a644..909fb484b 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1964,7 +1964,7 @@ pub fn translate_expr( Ok(target_register) } (UnaryOperator::Negative, _) => { - let value = -1; + let value = 0; let reg = program.alloc_register(); translate_expr(program, referenced_tables, expr, reg, resolver)?; @@ -1974,7 +1974,7 @@ pub fn translate_expr( dest: zero_reg, }); program.mark_last_insn_constant(); - program.emit_insn(Insn::Multiply { + program.emit_insn(Insn::Subtract { lhs: zero_reg, rhs: reg, dest: target_register, diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index bf4915159..a58ddec7a 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -5379,7 +5379,14 @@ pub fn exec_subtract(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { (other, OwnedValue::Text(text)) => { exec_subtract(other, &cast_text_to_numeric(text.as_str())) } - _ => todo!(), + (other, OwnedValue::Blob(blob)) => { + let text = String::from_utf8_lossy(&blob); + exec_subtract(other, &cast_text_to_numeric(&text)) + } + (OwnedValue::Blob(blob), other) => { + let text = String::from_utf8_lossy(&blob); + exec_subtract(&cast_text_to_numeric(&text), other) + } }; match result { OwnedValue::Float(f) if f.is_nan() => OwnedValue::Null, diff --git a/testing/math.test b/testing/math.test index 9ebe33762..f81be8079 100755 --- a/testing/math.test +++ b/testing/math.test @@ -95,6 +95,18 @@ do_execsql_test subtract-agg-float-agg-int { SELECT sum(3.5) - sum(1) } {2.5} +do_execsql_test subtract-blob { + SELECT -x'11' +} {0} + +do_execsql_test subtract-blob-empty { + SELECT -x'' +} {0} + +do_execsql_test subtract-blob-charcter { + SELECT -'hi'; +} {0} + foreach {testnum lhs rhs ans} { 1 'a' 'a' 0 2 'a' 10 -10 From 53eb2204ceee24e738a3dae74992b9465cb97435 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Mon, 14 Apr 2025 13:35:49 -0300 Subject: [PATCH 239/425] Fix truncation of error output in tests --- testing/cli_tests/test_limbo_cli.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/testing/cli_tests/test_limbo_cli.py b/testing/cli_tests/test_limbo_cli.py index 8b6a61375..55c3e548f 100755 --- a/testing/cli_tests/test_limbo_cli.py +++ b/testing/cli_tests/test_limbo_cli.py @@ -50,7 +50,8 @@ class LimboShell: return "" self._write_to_pipe(f"SELECT '{end_marker}';") output = "" - while True: + done = False + while not done: ready, _, errors = select.select( [self.pipe.stdout, self.pipe.stderr], [], @@ -58,7 +59,7 @@ class LimboShell: ) ready_or_errors = set(ready + errors) if self.pipe.stderr in ready_or_errors: - self._handle_error() + done = self._handle_error() if self.pipe.stdout in ready_or_errors: fragment = self.pipe.stdout.read(PIPE_BUF).decode() output += fragment @@ -71,17 +72,14 @@ class LimboShell: self.pipe.stdin.write((command + "\n").encode()) self.pipe.stdin.flush() - def _handle_error(self) -> None: + def _handle_error(self) -> bool: while True: - ready, _, errors = select.select( - [self.pipe.stderr], [], [self.pipe.stderr], 0 - ) - if not (ready + errors): - break - error_output = self.pipe.stderr.read(PIPE_BUF).decode() - print(error_output, end="") - raise RuntimeError("Error encountered in Limbo shell.") - + error_output = self.pipe.stderr.read(PIPE_BUF) + if error_output == b"": + return True + print(error_output.decode(), end="") + return False + @staticmethod def _clean_output(output: str, marker: str) -> str: output = output.rstrip().removesuffix(marker) From 3c06ddadde9a8d5508b8704d0d31cba503a4a18b Mon Sep 17 00:00:00 2001 From: Anton Harniakou Date: Mon, 14 Apr 2025 21:04:56 +0300 Subject: [PATCH 240/425] Parse hex integers in unary operators Unary operators ~ and - should work with hex integers --- core/translate/expr.rs | 24 +++++++++++++++++++----- testing/math.test | 6 +++++- testing/select.test | 4 ++++ 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index d49254f03..3f82940bb 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1941,14 +1941,23 @@ pub fn translate_expr( // Special case: if we're negating "9223372036854775808", this is exactly MIN_INT64 // If we don't do this -1 * 9223372036854775808 will overflow and parse will fail // and trigger conversion to Real. - if numeric_value == "9223372036854775808" { + if numeric_value == "9223372036854775808" + || numeric_value == "0x7fffffffffffffff" + || numeric_value == "0x7FFFFFFFFFFFFFFF" + { program.emit_insn(Insn::Integer { value: i64::MIN, dest: target_register, }); } else { - let maybe_int = numeric_value.parse::(); - if let Ok(value) = maybe_int { + if numeric_value.starts_with("0x") { + // must be a hex decimal + let int_value = i64::from_str_radix(&numeric_value[2..], 16)?; + program.emit_insn(Insn::Integer { + value: -int_value, + dest: target_register, + }); + } else if let Ok(value) = numeric_value.parse::() { program.emit_insn(Insn::Integer { value: value * -1, dest: target_register, @@ -1982,8 +1991,13 @@ pub fn translate_expr( Ok(target_register) } (UnaryOperator::BitwiseNot, ast::Expr::Literal(ast::Literal::Numeric(num_val))) => { - let maybe_int = num_val.parse::(); - if let Ok(val) = maybe_int { + if num_val.starts_with("0x") { + let int_value = i64::from_str_radix(&num_val[2..], 16)?; + program.emit_insn(Insn::Integer { + value: !int_value, + dest: target_register, + }); + } else if let Ok(val) = num_val.parse::() { program.emit_insn(Insn::Integer { value: !val, dest: target_register, diff --git a/testing/math.test b/testing/math.test index c5e85186a..18183006e 100755 --- a/testing/math.test +++ b/testing/math.test @@ -612,10 +612,14 @@ do_execsql_test bitwise-not-text-float { SELECT ~'823.34' } {-824} -do_execsql_test bitwise-not-text-int { +do_execsql_test bitwise-not-text-int-1 { SELECT ~'1234' } {-1235} +do_execsql_test bitwise-not-text-int-2 { + SELECT ~0xA +} {-11} + do_execsql_test bitwise-not-scalar-float { SELECT ~abs(693.9) } {-694} diff --git a/testing/select.test b/testing/select.test index e9d119f51..6f0c6997d 100755 --- a/testing/select.test +++ b/testing/select.test @@ -15,6 +15,10 @@ do_execsql_test select-const-3 { SELECT 0xDEAF } {57007} +do_execsql_test select-const-4 { + SELECT -0xA +} {-10} + do_execsql_test select-true { SELECT true } {1} From 0cebeef2ff921c09c3ec8c77d2aecb313bd32b78 Mon Sep 17 00:00:00 2001 From: Anton Harniakou Date: Mon, 14 Apr 2025 21:23:04 +0300 Subject: [PATCH 241/425] Support hex integers beginning with uppercase notation like 0Xfff or 0XFFF --- core/translate/expr.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 3f82940bb..6520a8b14 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1850,7 +1850,7 @@ pub fn translate_expr( } ast::Expr::Literal(lit) => match lit { ast::Literal::Numeric(val) => { - if val.starts_with("0x") { + if val.starts_with("0x") || val.starts_with("0X") { // must be a hex decimal let int_value = i64::from_str_radix(&val[2..], 16)?; program.emit_insn(Insn::Integer { @@ -1942,15 +1942,14 @@ pub fn translate_expr( // If we don't do this -1 * 9223372036854775808 will overflow and parse will fail // and trigger conversion to Real. if numeric_value == "9223372036854775808" - || numeric_value == "0x7fffffffffffffff" - || numeric_value == "0x7FFFFFFFFFFFFFFF" + || numeric_value.to_lowercase() == "0x7fffffffffffffff" { program.emit_insn(Insn::Integer { value: i64::MIN, dest: target_register, }); } else { - if numeric_value.starts_with("0x") { + if numeric_value.starts_with("0x") || numeric_value.starts_with("0X") { // must be a hex decimal let int_value = i64::from_str_radix(&numeric_value[2..], 16)?; program.emit_insn(Insn::Integer { @@ -1991,7 +1990,7 @@ pub fn translate_expr( Ok(target_register) } (UnaryOperator::BitwiseNot, ast::Expr::Literal(ast::Literal::Numeric(num_val))) => { - if num_val.starts_with("0x") { + if num_val.starts_with("0x") || num_val.starts_with("0X") { let int_value = i64::from_str_radix(&num_val[2..], 16)?; program.emit_insn(Insn::Integer { value: !int_value, From fab2ddc8cff5028c2deb4bb4c27de7087f0eb828 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Sun, 13 Apr 2025 19:29:59 -0300 Subject: [PATCH 242/425] Fix: incorrect assert in fuzz --- fuzz/fuzz_targets/expression.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fuzz/fuzz_targets/expression.rs b/fuzz/fuzz_targets/expression.rs index 44338c634..9426f0683 100644 --- a/fuzz/fuzz_targets/expression.rs +++ b/fuzz/fuzz_targets/expression.rs @@ -195,7 +195,7 @@ fn do_fuzz(expr: Expr) -> Result> { StepResult::IO => io.run_once()?, StepResult::Row => { let row = stmt.row().unwrap(); - assert_eq!(row.count(), 1, "expr: {:?}", expr); + assert_eq!(row.len(), 1, "expr: {:?}", expr); break 'value row.get_value(0).clone(); } _ => unreachable!(), From aece4e5442fe218f400a5952e64f933954f044e0 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Tue, 15 Apr 2025 10:59:49 +0200 Subject: [PATCH 243/425] use `seek` instead of `move_to` in post balance delete --- core/storage/btree.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index f4dda7def..dcebe0c6d 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -3338,7 +3338,7 @@ impl BTreeCursor { SeekKey::IndexKey(immutable_record) } }; - return_if_io!(self.move_to(key, SeekOp::EQ)); + return_if_io!(self.seek(key, SeekOp::EQ)); let delete_info = self.state.mut_delete_info().unwrap(); delete_info.state = DeleteState::Finish; From 198aedb04298c2beb21698510a1e0262eed0f865 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Tue, 15 Apr 2025 09:54:18 +0300 Subject: [PATCH 244/425] Refactor: add 'pos_in_table' to IndexColumn for easier lookup --- core/schema.rs | 41 +++++++++++++++++------- core/translate/index.rs | 7 +++-- core/util.rs | 69 +++++++++++++++++++++++++++++++---------- 3 files changed, 87 insertions(+), 30 deletions(-) diff --git a/core/schema.rs b/core/schema.rs index 70cc726c6..364ad6f99 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -179,6 +179,10 @@ impl BTreeTable { col.is_rowid_alias } + /// Returns the column position and column for a given column name. + /// Returns None if the column name is not found. + /// E.g. if table is CREATE TABLE t(a, b, c) + /// then get_column("b") returns (1, &Column { .. }) pub fn get_column(&self, name: &str) -> Option<(usize, &Column)> { let name = normalize_ident(name); for (i, column) in self.columns.iter().enumerate() { @@ -669,10 +673,16 @@ pub struct Index { pub struct IndexColumn { pub name: String, pub order: SortOrder, + /// the position of the column in the source table. + /// for example: + /// CREATE TABLE t(a,b,c) + /// CREATE INDEX idx ON t(b) + /// b.pos_in_table == 1 + pub pos_in_table: usize, } impl Index { - pub fn from_sql(sql: &str, root_page: usize) -> Result { + pub fn from_sql(sql: &str, root_page: usize, table: &BTreeTable) -> Result { let mut parser = Parser::new(sql.as_bytes()); let cmd = parser.next()?; match cmd { @@ -684,13 +694,21 @@ impl Index { .. })) => { let index_name = normalize_ident(&idx_name.name.0); - let index_columns = columns - .into_iter() - .map(|col| IndexColumn { - name: normalize_ident(&col.expr.to_string()), + let mut index_columns = Vec::with_capacity(columns.len()); + for col in columns.into_iter() { + let name = normalize_ident(&col.expr.to_string()); + let Some((pos_in_table, _)) = table.get_column(&name) else { + return Err(crate::LimboError::InternalError(format!( + "Column {} is in index {} but not found in table {}", + name, index_name, table.name + ))); + }; + index_columns.push(IndexColumn { + name, order: col.order.unwrap_or(SortOrder::Asc), - }) - .collect(); + pos_in_table, + }); + } Ok(Index { name: index_name, table_name: normalize_ident(&tbl_name.0), @@ -719,15 +737,16 @@ impl Index { .iter() .map(|col_name| { // Verify that each primary key column exists in the table - if table.get_column(col_name).is_none() { + let Some((pos_in_table, _)) = table.get_column(col_name) else { return Err(crate::LimboError::InternalError(format!( - "Primary key column {} not found in table {}", - col_name, table.name + "Column {} is in index {} but not found in table {}", + col_name, index_name, table.name ))); - } + }; Ok(IndexColumn { name: normalize_ident(col_name), order: SortOrder::Asc, // Primary key indexes are always ascending + pos_in_table, }) }) .collect::>>()?; diff --git a/core/translate/index.rs b/core/translate/index.rs index 366b986e7..de79aed23 100644 --- a/core/translate/index.rs +++ b/core/translate/index.rs @@ -55,9 +55,10 @@ pub fn translate_create_index( root_page: 0, // we dont have access till its created, after we parse the schema table columns: columns .iter() - .map(|c| IndexColumn { - name: c.0 .1.name.as_ref().unwrap().clone(), - order: c.1, + .map(|((pos_in_table, col), order)| IndexColumn { + name: col.name.as_ref().unwrap().clone(), + order: *order, + pos_in_table: *pos_in_table, }) .collect(), unique: unique_if_not_exists.0, diff --git a/core/util.rs b/core/util.rs index 80a52f387..b3ce8ecd0 100644 --- a/core/util.rs +++ b/core/util.rs @@ -36,6 +36,21 @@ pub fn normalize_ident(identifier: &str) -> String { pub const PRIMARY_KEY_AUTOMATIC_INDEX_NAME_PREFIX: &str = "sqlite_autoindex_"; +enum UnparsedIndex { + /// CREATE INDEX idx ON table_name(sql) + FromSql { + table_name: String, + root_page: usize, + sql: String, + }, + /// Implicitly created index due to primary key constraints (or UNIQUE, but not implemented) + FromConstraint { + name: String, + table_name: String, + root_page: usize, + }, +} + pub fn parse_schema_rows( rows: Option, schema: &mut Schema, @@ -45,7 +60,7 @@ pub fn parse_schema_rows( ) -> Result<()> { if let Some(mut rows) = rows { rows.set_mv_tx_id(mv_tx_id); - let mut automatic_indexes = Vec::new(); + let mut unparsed_indexes = Vec::with_capacity(10); loop { match rows.step()? { StepResult::Row => { @@ -99,21 +114,24 @@ pub fn parse_schema_rows( let root_page: i64 = row.get::(3)?; match row.get::<&str>(4) { Ok(sql) => { - let index = schema::Index::from_sql(sql, root_page as usize)?; - schema.add_index(Arc::new(index)); + unparsed_indexes.push(UnparsedIndex::FromSql { + table_name: row.get::<&str>(2)?.to_string(), + root_page: root_page as usize, + sql: sql.to_string(), + }); } _ => { // Automatic index on primary key, e.g. // table|foo|foo|2|CREATE TABLE foo (a text PRIMARY KEY, b) // index|sqlite_autoindex_foo_1|foo|3| - let index_name = row.get::<&str>(1)?; - let table_name = row.get::<&str>(2)?; + let index_name = row.get::<&str>(1)?.to_string(); + let table_name = row.get::<&str>(2)?.to_string(); let root_page = row.get::(3)?; - automatic_indexes.push(( - index_name.to_string(), - table_name.to_string(), - root_page, - )); + unparsed_indexes.push(UnparsedIndex::FromConstraint { + name: index_name, + table_name, + root_page: root_page as usize, + }); } } } @@ -130,12 +148,31 @@ pub fn parse_schema_rows( StepResult::Busy => break, } } - for (index_name, table_name, root_page) in automatic_indexes { - // We need to process these after all tables are loaded into memory due to the schema.get_table() call - let table = schema.get_btree_table(&table_name).unwrap(); - let index = - schema::Index::automatic_from_primary_key(&table, &index_name, root_page as usize)?; - schema.add_index(Arc::new(index)); + for unparsed_index in unparsed_indexes { + match unparsed_index { + UnparsedIndex::FromSql { + table_name, + root_page, + sql, + } => { + let table = schema.get_btree_table(&table_name).unwrap(); + let index = schema::Index::from_sql(&sql, root_page as usize, table.as_ref())?; + schema.add_index(Arc::new(index)); + } + UnparsedIndex::FromConstraint { + name, + table_name, + root_page, + } => { + let table = schema.get_btree_table(&table_name).unwrap(); + let index = schema::Index::automatic_from_primary_key( + table.as_ref(), + &name, + root_page as usize, + )?; + schema.add_index(Arc::new(index)); + } + } } } Ok(()) From a467060e1c509fefeb010564b8848738cb7075b6 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Tue, 15 Apr 2025 09:57:33 +0300 Subject: [PATCH 245/425] Index: add method column_table_pos_to_index_pos() --- core/schema.rs | 12 ++++++++++++ core/translate/optimizer.rs | 26 ++++---------------------- 2 files changed, 16 insertions(+), 22 deletions(-) diff --git a/core/schema.rs b/core/schema.rs index 364ad6f99..fbd3a86ec 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -759,6 +759,18 @@ impl Index { unique: true, // Primary key indexes are always unique }) } + + /// Given a column position in the table, return the position in the index. + /// Returns None if the column is not found in the index. + /// For example, given: + /// CREATE TABLE t(a, b, c) + /// CREATE INDEX idx ON t(b) + /// then column_table_pos_to_index_pos(1) returns Some(0) + pub fn column_table_pos_to_index_pos(&self, table_pos: usize) -> Option { + self.columns + .iter() + .position(|c| c.pos_in_table == table_pos) + } } #[cfg(test)] diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 7c29b8834..c4bf12810 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -732,13 +732,7 @@ pub fn try_extract_index_search_from_where_clause( for index in table_indexes { // Check how many terms in the where clause constrain the index in column order - find_index_constraints( - where_clause, - table_index, - table_reference, - index, - &mut constraints_cur, - )?; + find_index_constraints(where_clause, table_index, index, &mut constraints_cur)?; // naive scoring since we don't have statistics: prefer the index where we can use the most columns // e.g. if we can use all columns of an index on (a,b), it's better than an index of (c,d,e) where we can only use c. let score = constraints_cur.len(); @@ -843,7 +837,6 @@ impl UnwrapParens for ast::Expr { fn get_column_position_in_index( expr: &ast::Expr, table_index: usize, - table_reference: &TableReference, index: &Arc, ) -> Result> { let ast::Expr::Column { table, column, .. } = unwrap_parens(expr)? else { @@ -852,13 +845,7 @@ fn get_column_position_in_index( if *table != table_index { return Ok(None); } - let Some(column) = table_reference.table.get_column_at(*column) else { - return Ok(None); - }; - Ok(index - .columns - .iter() - .position(|col| Some(&col.name) == column.name.as_ref())) + Ok(index.column_table_pos_to_index_pos(*column)) } /// Find all [IndexConstraint]s for a given WHERE clause @@ -868,7 +855,6 @@ fn get_column_position_in_index( fn find_index_constraints( where_clause: &mut Vec, table_index: usize, - table_reference: &TableReference, index: &Arc, out_constraints: &mut Vec, ) -> Result<()> { @@ -908,9 +894,7 @@ fn find_index_constraints( } // Check if lhs is a column that is in the i'th position of the index - if Some(position_in_index) - == get_column_position_in_index(lhs, table_index, table_reference, index)? - { + if Some(position_in_index) == get_column_position_in_index(lhs, table_index, index)? { out_constraints.push(IndexConstraint { operator: *operator, position_in_where_clause: (position_in_where_clause, BinaryExprSide::Rhs), @@ -919,9 +903,7 @@ fn find_index_constraints( break; } // Check if rhs is a column that is in the i'th position of the index - if Some(position_in_index) - == get_column_position_in_index(rhs, table_index, table_reference, index)? - { + if Some(position_in_index) == get_column_position_in_index(rhs, table_index, index)? { out_constraints.push(IndexConstraint { operator: opposite_cmp_op(*operator), // swap the operator since e.g. if condition is 5 >= x, we want to use x <= 5 position_in_where_clause: (position_in_where_clause, BinaryExprSide::Lhs), From e299a0e77e554ecac033fec4b42ecaa162917898 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Tue, 15 Apr 2025 11:19:50 +0300 Subject: [PATCH 246/425] vdbe: add Insn::IdxRowId --- core/vdbe/execute.rs | 22 ++++++++++++++++++++++ core/vdbe/explain.rs | 16 ++++++++++++++++ core/vdbe/insn.rs | 6 ++++++ 3 files changed, 44 insertions(+) diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 2eafed217..0325ffae8 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -1836,6 +1836,28 @@ pub fn op_row_id( Ok(InsnFunctionStepResult::Step) } +pub fn op_idx_row_id( + program: &Program, + state: &mut ProgramState, + insn: &Insn, + pager: &Rc, + mv_store: Option<&Rc>, +) -> Result { + let Insn::IdxRowId { cursor_id, dest } = insn else { + unreachable!("unexpected Insn {:?}", insn) + }; + let mut cursors = state.cursors.borrow_mut(); + let cursor = cursors.get_mut(*cursor_id).unwrap().as_mut().unwrap(); + let cursor = cursor.as_btree_mut(); + let rowid = cursor.rowid()?; + state.registers[*dest] = match rowid { + Some(rowid) => Register::OwnedValue(OwnedValue::Integer(rowid as i64)), + None => Register::OwnedValue(OwnedValue::Null), + }; + state.pc += 1; + Ok(InsnFunctionStepResult::Step) +} + pub fn op_seek_rowid( program: &Program, state: &mut ProgramState, diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index f2230d2eb..f6e1073c3 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -684,6 +684,22 @@ pub fn insn_to_str( .unwrap_or(&format!("cursor {}", cursor_id)) ), ), + Insn::IdxRowId { cursor_id, dest } => ( + "IdxRowId", + *cursor_id as i32, + *dest as i32, + 0, + OwnedValue::build_text(""), + 0, + format!( + "r[{}]={}.rowid", + dest, + &program.cursor_ref[*cursor_id] + .0 + .as_ref() + .unwrap_or(&format!("cursor {}", cursor_id)) + ), + ), Insn::SeekRowid { cursor_id, src_reg, diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 7b30396c0..87a615aee 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -438,6 +438,11 @@ pub enum Insn { cursor_id: CursorID, dest: usize, }, + /// Read the rowid of the current row from an index cursor. + IdxRowId { + cursor_id: CursorID, + dest: usize, + }, /// Seek to a rowid in the cursor. If not found, jump to the given PC. Otherwise, continue to the next instruction. SeekRowid { @@ -856,6 +861,7 @@ impl Insn { Insn::String8 { .. } => execute::op_string8, Insn::Blob { .. } => execute::op_blob, Insn::RowId { .. } => execute::op_row_id, + Insn::IdxRowId { .. } => execute::op_idx_row_id, Insn::SeekRowid { .. } => execute::op_seek_rowid, Insn::DeferredSeek { .. } => execute::op_deferred_seek, Insn::SeekGE { .. } => execute::op_seek, From 72dac5981395e572b0dd910b8ea03eaeb357a188 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Tue, 15 Apr 2025 11:18:09 +0300 Subject: [PATCH 247/425] Operation: add method index() to retrieve used index, if any --- core/translate/plan.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/core/translate/plan.rs b/core/translate/plan.rs index ab7bc893c..4bf86ab8d 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -281,6 +281,17 @@ pub enum Operation { }, } +impl Operation { + pub fn index(&self) -> Option<&Arc> { + match self { + Operation::Scan { index, .. } => index.as_ref(), + Operation::Search(Search::RowidEq { .. }) => None, + Operation::Search(Search::Seek { index, .. }) => index.as_ref(), + Operation::Subquery { .. } => None, + } + } +} + impl TableReference { /// Returns the btree table for this table reference, if it is a BTreeTable. pub fn btree(&self) -> Option> { From 5a1cfb7d1550d5d9f147f873a910e8f3ca390cf5 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Mon, 14 Apr 2025 17:25:13 +0300 Subject: [PATCH 248/425] Add ColumnUsedMask struct to TableReference to track columns referenced in query --- core/translate/delete.rs | 7 ++++--- core/translate/plan.rs | 44 +++++++++++++++++++++++++++++++++++++++ core/translate/planner.rs | 19 ++++++++++++----- core/translate/select.rs | 27 +++++++++++++++++------- core/translate/update.rs | 12 ++++++----- 5 files changed, 88 insertions(+), 21 deletions(-) diff --git a/core/translate/delete.rs b/core/translate/delete.rs index b8b92349d..fb580b8e8 100644 --- a/core/translate/delete.rs +++ b/core/translate/delete.rs @@ -7,7 +7,7 @@ use crate::vdbe::builder::{ProgramBuilder, ProgramBuilderOpts, QueryMode}; use crate::{schema::Schema, Result, SymbolTable}; use limbo_sqlite3_parser::ast::{Expr, Limit, QualifiedName}; -use super::plan::{IterationDirection, TableReference}; +use super::plan::{ColumnUsedMask, IterationDirection, TableReference}; pub fn translate_delete( query_mode: QueryMode, @@ -50,7 +50,7 @@ pub fn prepare_delete_plan( crate::bail_corrupt_error!("Table is neither a virtual table nor a btree table"); }; let name = tbl_name.name.0.as_str().to_string(); - let table_references = vec![TableReference { + let mut table_references = vec![TableReference { table, identifier: name, op: Operation::Scan { @@ -58,6 +58,7 @@ pub fn prepare_delete_plan( index: None, }, join_info: None, + col_used_mask: ColumnUsedMask::new(), }]; let mut where_predicates = vec![]; @@ -65,7 +66,7 @@ pub fn prepare_delete_plan( // Parse the WHERE clause parse_where( where_clause.map(|e| *e), - &table_references, + &mut table_references, None, &mut where_predicates, )?; diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 4bf86ab8d..038dd90ee 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -256,6 +256,43 @@ pub struct TableReference { pub identifier: String, /// The join info for this table reference, if it is the right side of a join (which all except the first table reference have) pub join_info: Option, + /// Bitmask of columns that are referenced in the query. + /// Used to decide whether a covering index can be used. + pub col_used_mask: ColumnUsedMask, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +#[repr(transparent)] +pub struct ColumnUsedMask(u128); + +impl ColumnUsedMask { + pub fn new() -> Self { + Self(0) + } + + pub fn set(&mut self, index: usize) { + assert!( + index < 128, + "ColumnUsedMask only supports up to 128 columns" + ); + self.0 |= 1 << index; + } + + pub fn get(&self, index: usize) -> bool { + assert!( + index < 128, + "ColumnUsedMask only supports up to 128 columns" + ); + self.0 & (1 << index) != 0 + } + + pub fn contains_all_set_bits_of(&self, other: &Self) -> bool { + self.0 & other.0 == other.0 + } + + pub fn is_empty(&self) -> bool { + self.0 == 0 + } } #[derive(Clone, Debug)] @@ -331,12 +368,19 @@ impl TableReference { table, identifier: identifier.clone(), join_info, + col_used_mask: ColumnUsedMask::new(), } } pub fn columns(&self) -> &[Column] { self.table.columns() } + + /// Mark a column as used in the query. + /// This is used to determine whether a covering index can be used. + pub fn mark_column_used(&mut self, index: usize) { + self.col_used_mask.set(index); + } } /// A definition of a rowid/index search. diff --git a/core/translate/planner.rs b/core/translate/planner.rs index 2d9246666..f1d7aaeea 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -1,7 +1,7 @@ use super::{ plan::{ - Aggregate, EvalAt, IterationDirection, JoinInfo, Operation, Plan, ResultSetColumn, - SelectPlan, SelectQueryType, TableReference, WhereTerm, + Aggregate, ColumnUsedMask, EvalAt, IterationDirection, JoinInfo, Operation, Plan, + ResultSetColumn, SelectPlan, SelectQueryType, TableReference, WhereTerm, }, select::prepare_select_plan, SymbolTable, @@ -85,7 +85,7 @@ pub fn resolve_aggregates(expr: &Expr, aggs: &mut Vec) -> bool { pub fn bind_column_references( expr: &mut Expr, - referenced_tables: &[TableReference], + referenced_tables: &mut [TableReference], result_columns: Option<&[ResultSetColumn]>, ) -> Result<()> { match expr { @@ -128,6 +128,7 @@ pub fn bind_column_references( column: col_idx, is_rowid_alias, }; + referenced_tables[tbl_idx].mark_column_used(col_idx); return Ok(()); } @@ -178,6 +179,7 @@ pub fn bind_column_references( column: col_idx.unwrap(), is_rowid_alias: col.is_rowid_alias, }; + referenced_tables[tbl_idx].mark_column_used(col_idx.unwrap()); Ok(()) } Expr::Between { @@ -327,6 +329,7 @@ fn parse_from_clause_table<'a>( table: tbl_ref, identifier: alias.unwrap_or(normalized_qualified_name), join_info: None, + col_used_mask: ColumnUsedMask::new(), }); return Ok(()); }; @@ -409,6 +412,7 @@ fn parse_from_clause_table<'a>( join_info: None, table: Table::Virtual(vtab), identifier: alias, + col_used_mask: ColumnUsedMask::new(), }); Ok(()) @@ -539,7 +543,7 @@ pub fn parse_from<'a>( pub fn parse_where( where_clause: Option, - table_references: &[TableReference], + table_references: &mut [TableReference], result_columns: Option<&[ResultSetColumn]>, out_where_clause: &mut Vec, ) -> Result<()> { @@ -758,7 +762,7 @@ fn parse_join<'a>( let mut preds = vec![]; break_predicate_at_and_boundaries(expr, &mut preds); for predicate in preds.iter_mut() { - bind_column_references(predicate, &scope.tables, None)?; + bind_column_references(predicate, &mut scope.tables, None)?; } for pred in preds { let cur_table_idx = scope.tables.len() - 1; @@ -832,6 +836,11 @@ fn parse_join<'a>( is_rowid_alias: right_col.is_rowid_alias, }), ); + + let left_table = scope.tables.get_mut(left_table_idx).unwrap(); + left_table.mark_column_used(left_col_idx); + let right_table = scope.tables.get_mut(cur_table_idx).unwrap(); + right_table.mark_column_used(right_col_idx); let eval_at = if outer { EvalAt::Loop(cur_table_idx) } else { diff --git a/core/translate/select.rs b/core/translate/select.rs index 24a6331e5..3972bdc85 100644 --- a/core/translate/select.rs +++ b/core/translate/select.rs @@ -104,12 +104,17 @@ pub fn prepare_select_plan<'a>( match column { ResultColumn::Star => { select_star(&plan.table_references, &mut plan.result_columns); + for table in plan.table_references.iter_mut() { + for idx in 0..table.columns().len() { + table.mark_column_used(idx); + } + } } ResultColumn::TableStar(name) => { let name_normalized = normalize_ident(name.0.as_str()); let referenced_table = plan .table_references - .iter() + .iter_mut() .enumerate() .find(|(_, t)| t.identifier == name_normalized); @@ -117,23 +122,29 @@ pub fn prepare_select_plan<'a>( crate::bail_parse_error!("Table {} not found", name.0); } let (table_index, table) = referenced_table.unwrap(); - for (idx, col) in table.columns().iter().enumerate() { + let num_columns = table.columns().len(); + for idx in 0..num_columns { + let is_rowid_alias = { + let columns = table.columns(); + columns[idx].is_rowid_alias + }; plan.result_columns.push(ResultSetColumn { expr: ast::Expr::Column { database: None, // TODO: support different databases table: table_index, column: idx, - is_rowid_alias: col.is_rowid_alias, + is_rowid_alias, }, alias: None, contains_aggregates: false, }); + table.mark_column_used(idx); } } ResultColumn::Expr(ref mut expr, maybe_alias) => { bind_column_references( expr, - &plan.table_references, + &mut plan.table_references, Some(&plan.result_columns), )?; match expr { @@ -293,7 +304,7 @@ pub fn prepare_select_plan<'a>( // Parse the actual WHERE clause and add its conditions to the plan WHERE clause that already contains the join conditions. parse_where( where_clause, - &plan.table_references, + &mut plan.table_references, Some(&plan.result_columns), &mut plan.where_clause, )?; @@ -303,7 +314,7 @@ pub fn prepare_select_plan<'a>( replace_column_number_with_copy_of_column_expr(expr, &plan.result_columns)?; bind_column_references( expr, - &plan.table_references, + &mut plan.table_references, Some(&plan.result_columns), )?; } @@ -316,7 +327,7 @@ pub fn prepare_select_plan<'a>( for expr in predicates.iter_mut() { bind_column_references( expr, - &plan.table_references, + &mut plan.table_references, Some(&plan.result_columns), )?; let contains_aggregates = @@ -352,7 +363,7 @@ pub fn prepare_select_plan<'a>( bind_column_references( &mut o.expr, - &plan.table_references, + &mut plan.table_references, Some(&plan.result_columns), )?; resolve_aggregates(&o.expr, &mut plan.aggregates); diff --git a/core/translate/update.rs b/core/translate/update.rs index 62c6c6f9f..a0e32e640 100644 --- a/core/translate/update.rs +++ b/core/translate/update.rs @@ -11,7 +11,8 @@ use limbo_sqlite3_parser::ast::{self, Expr, ResultColumn, SortOrder, Update}; use super::emitter::emit_program; use super::optimizer::optimize_plan; use super::plan::{ - Direction, IterationDirection, Plan, ResultSetColumn, TableReference, UpdatePlan, + ColumnUsedMask, Direction, IterationDirection, Plan, ResultSetColumn, TableReference, + UpdatePlan, }; use super::planner::bind_column_references; use super::planner::{parse_limit, parse_where}; @@ -88,7 +89,7 @@ pub fn prepare_update_plan(schema: &Schema, body: &mut Update) -> crate::Result< }) }) .unwrap_or(IterationDirection::Forwards); - let table_references = vec![TableReference { + let mut table_references = vec![TableReference { table: match table.as_ref() { Table::Virtual(vtab) => Table::Virtual(vtab.clone()), Table::BTree(btree_table) => Table::BTree(btree_table.clone()), @@ -100,6 +101,7 @@ pub fn prepare_update_plan(schema: &Schema, body: &mut Update) -> crate::Result< index: None, }, join_info: None, + col_used_mask: ColumnUsedMask::new(), }]; let set_clauses = body .sets @@ -123,7 +125,7 @@ pub fn prepare_update_plan(schema: &Schema, body: &mut Update) -> crate::Result< )) })?; - let _ = bind_column_references(&mut set.expr, &table_references, None); + let _ = bind_column_references(&mut set.expr, &mut table_references, None); Ok((col_index, set.expr.clone())) }) .collect::, crate::LimboError>>()?; @@ -133,7 +135,7 @@ pub fn prepare_update_plan(schema: &Schema, body: &mut Update) -> crate::Result< if let Some(returning) = &mut body.returning { for rc in returning.iter_mut() { if let ResultColumn::Expr(expr, alias) = rc { - bind_column_references(expr, &table_references, None)?; + bind_column_references(expr, &mut table_references, None)?; result_columns.push(ResultSetColumn { expr: expr.clone(), alias: alias.as_ref().and_then(|a| { @@ -169,7 +171,7 @@ pub fn prepare_update_plan(schema: &Schema, body: &mut Update) -> crate::Result< // Parse the WHERE clause parse_where( body.where_clause.as_ref().map(|w| *w.clone()), - &table_references, + &mut table_references, Some(&result_columns), &mut where_clause, )?; From 1fe1f0ebba0fcd794b6400c8e7c3df35c6036037 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Tue, 15 Apr 2025 13:16:28 +0300 Subject: [PATCH 249/425] ProgramBuilder: add resolve_cursor_id_safe() which doesn't unwrap --- core/vdbe/builder.rs | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index 03c634d07..648044d1d 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -441,15 +441,17 @@ impl ProgramBuilder { } // translate table to cursor id + pub fn resolve_cursor_id_safe(&self, table_identifier: &str) -> Option { + self.cursor_ref.iter().position(|(t_ident, _)| { + t_ident + .as_ref() + .is_some_and(|ident| ident == table_identifier) + }) + } + pub fn resolve_cursor_id(&self, table_identifier: &str) -> CursorID { - self.cursor_ref - .iter() - .position(|(t_ident, _)| { - t_ident - .as_ref() - .is_some_and(|ident| ident == table_identifier) - }) - .unwrap() + self.resolve_cursor_id_safe(table_identifier) + .unwrap_or_else(|| panic!("Cursor not found: {}", table_identifier)) } pub fn build( From 825aeb3f83d2cedc29fdce821a33409aa81c51a2 Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Wed, 9 Apr 2025 10:56:37 -0300 Subject: [PATCH 250/425] core/vdbe: Add BeginSubrtn bytecode Basically it does the very same thing of Null, but has a different name to differentiate its usage. --- COMPAT.md | 1 + core/vdbe/explain.rs | 11 +++++++++++ core/vdbe/insn.rs | 7 +++++++ 3 files changed, 19 insertions(+) diff --git a/COMPAT.md b/COMPAT.md index 4ac83e6bb..ced9fbb6d 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -427,6 +427,7 @@ Modifiers: | BitNot | Yes | | | BitOr | Yes | | | Blob | Yes | | +| BeginSubrtn | Yes | | | Checkpoint | No | | | Clear | No | | | Close | No | | diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index f2230d2eb..a0ddd6701 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -1350,6 +1350,17 @@ pub fn insn_to_str( 0, format!("goto {}", target_pc_when_reentered.to_debug_int()), ), + Insn::BeginSubrtn { dest, dest_end } => ( + "BeginSubrtn", + *dest as i32, + dest_end.map_or(0, |end| end as i32), + 0, + OwnedValue::build_text(""), + 0, + dest_end.map_or(format!("r[{}]=NULL", dest), |end| { + format!("r[{}..{}]=NULL", dest, end) + }), + ), }; format!( "{:<4} {:<17} {:<4} {:<4} {:<4} {:<13} {:<2} {}", diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 7b30396c0..b748706e7 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -100,6 +100,12 @@ pub enum Insn { dest: usize, dest_end: Option, }, + /// Mark the beginning of a subroutine tha can be entered in-line. This opcode is identical to Null + /// it has a different name only to make the byte code easier to read and verify + BeginSubrtn { + dest: usize, + dest_end: Option, + }, /// Move the cursor P1 to a null row. Any Column operations that occur while the cursor is on the null row will always write a NULL. NullRow { cursor_id: CursorID, @@ -803,6 +809,7 @@ impl Insn { match self { Insn::Init { .. } => execute::op_init, Insn::Null { .. } => execute::op_null, + Insn::BeginSubrtn { .. } => execute::op_null, Insn::NullRow { .. } => execute::op_null_row, Insn::Add { .. } => execute::op_add, Insn::Subtract { .. } => execute::op_subtract, From c5161311fcaa5b18fa1f8b550c467eaef7a82a37 Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Mon, 14 Apr 2025 11:50:28 -0300 Subject: [PATCH 251/425] core/vdbe: Add NotFound bytecode If P4==0 then register P3 holds a blob constructed by MakeRecord. If P4>0 then register P3 is the first of P4 registers that form an unpacked record. Cursor P1 is on an index btree. If the record identified by P3 and P4 is not the prefix of any entry in P1 then a jump is made to P2. If P1 does contain an entry whose prefix matches the P3/P4 record then control falls through to the next instruction and P1 is left pointing at the matching entry. This operation leaves the cursor in a state where it cannot be advanced in either direction. In other words, the Next and Prev opcodes do not work after this operation. --- core/vdbe/execute.rs | 47 ++++++++++++++++++++++++++++++++++++++++++++ core/vdbe/explain.rs | 18 +++++++++++++++++ core/vdbe/insn.rs | 8 ++++++++ 3 files changed, 73 insertions(+) diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 2eafed217..7b151c093 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -4472,6 +4472,53 @@ pub fn op_once( Ok(InsnFunctionStepResult::Step) } +pub fn op_not_found( + program: &Program, + state: &mut ProgramState, + insn: &Insn, + pager: &Rc, + mv_store: Option<&Rc>, +) -> Result { + let Insn::NotFound { + cursor_id, + target_pc, + record_reg, + num_regs, + } = insn + else { + unreachable!("unexpected Insn {:?}", insn) + }; + + let found = { + let mut cursor = state.get_cursor(*cursor_id); + let cursor = cursor.as_btree_mut(); + + if *num_regs == 0 { + let record = match &state.registers[*record_reg] { + Register::Record(r) => r, + _ => { + return Err(LimboError::InternalError( + "NotFound: exepected a record in the register".into(), + )); + } + }; + + return_if_io!(cursor.seek(SeekKey::IndexKey(&record), SeekOp::EQ)) + } else { + let record = make_record(&state.registers, record_reg, num_regs); + return_if_io!(cursor.seek(SeekKey::IndexKey(&record), SeekOp::EQ)) + } + }; + + if found { + state.pc += 1; + } else { + state.pc = target_pc.to_offset_int(); + } + + Ok(InsnFunctionStepResult::Step) +} + fn exec_lower(reg: &OwnedValue) -> Option { match reg { OwnedValue::Text(t) => Some(OwnedValue::build_text(&t.as_str().to_lowercase())), diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index a0ddd6701..8149b4ba6 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -1361,6 +1361,24 @@ pub fn insn_to_str( format!("r[{}..{}]=NULL", dest, end) }), ), + Insn::NotFound { + cursor_id, + target_pc, + record_reg, + .. + } => ( + "NotFound", + *cursor_id as i32, + target_pc.to_debug_int(), + *record_reg as i32, + OwnedValue::build_text(""), + 0, + format!( + "if (r[{}] != NULL) goto {}", + record_reg, + target_pc.to_debug_int() + ), + ), }; format!( "{:<4} {:<17} {:<4} {:<4} {:<4} {:<13} {:<2} {}", diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index b748706e7..8f1fbe580 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -802,6 +802,13 @@ pub enum Insn { Once { target_pc_when_reentered: BranchOffset, }, + /// Search for record in the index cusor, if exists is a no-op otherwise go to target_pc + NotFound { + cursor_id: CursorID, + target_pc: BranchOffset, + record_reg: usize, + num_regs: usize, + }, } impl Insn { @@ -916,6 +923,7 @@ impl Insn { Insn::ReadCookie { .. } => execute::op_read_cookie, Insn::OpenEphemeral { .. } | Insn::OpenAutoindex { .. } => execute::op_open_ephemeral, Insn::Once { .. } => execute::op_once, + Insn::NotFound { .. } => execute::op_not_found, } } } From 58efb9046790b4dd897aef59f44abef8f7efd36a Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Mon, 14 Apr 2025 12:09:56 -0300 Subject: [PATCH 252/425] core: Add Affinity bytecode Apply affinities to a range of P2 registers starting with P1. P4 is a string that is P2 characters long. The N-th character of the string indicates the column affinity that should be used for the N-th memory cell in the range. --- core/schema.rs | 16 +++++++++++++++- core/vdbe/execute.rs | 36 +++++++++++++++++++++++++++++++++++- core/vdbe/explain.rs | 22 ++++++++++++++++++++++ core/vdbe/insn.rs | 12 +++++++++++- 4 files changed, 83 insertions(+), 3 deletions(-) diff --git a/core/schema.rs b/core/schema.rs index fbd3a86ec..ea6a26279 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -1,5 +1,5 @@ -use crate::VirtualTable; use crate::{util::normalize_ident, Result}; +use crate::{LimboError, VirtualTable}; use core::fmt; use fallible_iterator::FallibleIterator; use limbo_sqlite3_parser::ast::{Expr, Literal, SortOrder, TableOptions}; @@ -585,6 +585,20 @@ impl Affinity { Affinity::Numeric => SQLITE_AFF_NUMERIC, } } + + pub fn from_char(char: char) -> Result { + match char { + SQLITE_AFF_INTEGER => Ok(Affinity::Integer), + SQLITE_AFF_TEXT => Ok(Affinity::Text), + SQLITE_AFF_NONE => Ok(Affinity::Blob), + SQLITE_AFF_REAL => Ok(Affinity::Real), + SQLITE_AFF_NUMERIC => Ok(Affinity::Numeric), + _ => Err(LimboError::InternalError(format!( + "Invalid affinity character: {}", + char + ))), + } + } } impl fmt::Display for Type { diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 7b151c093..66ced7233 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -4509,7 +4509,7 @@ pub fn op_not_found( return_if_io!(cursor.seek(SeekKey::IndexKey(&record), SeekOp::EQ)) } }; - + if found { state.pc += 1; } else { @@ -4519,6 +4519,40 @@ pub fn op_not_found( Ok(InsnFunctionStepResult::Step) } +pub fn op_affinity( + program: &Program, + state: &mut ProgramState, + insn: &Insn, + pager: &Rc, + mv_store: Option<&Rc>, +) -> Result { + let Insn::Affinity { + start_reg, + count, + affinities, + } = insn + else { + unreachable!("unexpected Insn {:?}", insn) + }; + + if affinities.len() != count.get() { + return Err(LimboError::InternalError( + "Affinity: the length of affinities does not match the count".into(), + )); + } + + for (i, affinity_char) in affinities.chars().enumerate().take(count.get()) { + let reg_index = *start_reg + i; + + let affinity = Affinity::from_char(affinity_char)?; + + apply_affinity_char(&mut state.registers[reg_index], affinity); + } + + state.pc += 1; + Ok(InsnFunctionStepResult::Step) +} + fn exec_lower(reg: &OwnedValue) -> Option { match reg { OwnedValue::Text(t) => Some(OwnedValue::build_text(&t.as_str().to_lowercase())), diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index 8149b4ba6..cbb546a11 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -1379,6 +1379,28 @@ pub fn insn_to_str( target_pc.to_debug_int() ), ), + Insn::Affinity { + start_reg, + count, + affinities, + } => ( + "Affinity", + *start_reg as i32, + count.get() as i32, + 0, + OwnedValue::build_text(""), + 0, + format!( + "r[{}..{}] = {}", + start_reg, + start_reg + count.get(), + affinities + .chars() + .map(|a| a.to_string()) + .collect::>() + .join(", ") + ), + ), }; format!( "{:<4} {:<17} {:<4} {:<4} {:<4} {:<13} {:<2} {}", diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 8f1fbe580..32c3f9550 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -1,4 +1,7 @@ -use std::{num::NonZero, rc::Rc}; +use std::{ + num::{NonZero, NonZeroUsize}, + rc::Rc, +}; use super::{execute, AggFunc, BranchOffset, CursorID, FuncCtx, InsnFunction, PageIdx}; use crate::{ @@ -809,6 +812,12 @@ pub enum Insn { record_reg: usize, num_regs: usize, }, + /// Apply affinities to a range of registers. Affinities must have the same size of count + Affinity { + start_reg: usize, + count: NonZeroUsize, + affinities: String, + }, } impl Insn { @@ -924,6 +933,7 @@ impl Insn { Insn::OpenEphemeral { .. } | Insn::OpenAutoindex { .. } => execute::op_open_ephemeral, Insn::Once { .. } => execute::op_once, Insn::NotFound { .. } => execute::op_not_found, + Insn::Affinity { .. } => execute::op_affinity, } } } From 46eaa52400c1e9a7ccef5921f68033b20e53f21f Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Wed, 2 Apr 2025 01:41:53 -0300 Subject: [PATCH 253/425] write tests for file backed db --- Makefile | 4 + testing/cli_tests/test_limbo_cli.py | 11 +- testing/cli_tests/writes.py | 175 ++++++++++++++++++++++++++++ 3 files changed, 189 insertions(+), 1 deletion(-) create mode 100755 testing/cli_tests/writes.py diff --git a/Makefile b/Makefile index 46ef06c98..f3dff7090 100644 --- a/Makefile +++ b/Makefile @@ -98,6 +98,10 @@ test-memory: SQLITE_EXEC=$(SQLITE_EXEC) ./testing/cli_tests/memory.py .PHONY: test-memory +test-writes: limbo + SQLITE_EXEC=$(SQLITE_EXEC) ./testing/cli_tests/writes.py +.PHONY: test-writes + clickbench: ./perf/clickbench/benchmark.sh .PHONY: clickbench diff --git a/testing/cli_tests/test_limbo_cli.py b/testing/cli_tests/test_limbo_cli.py index 55c3e548f..93f704a07 100755 --- a/testing/cli_tests/test_limbo_cli.py +++ b/testing/cli_tests/test_limbo_cli.py @@ -145,10 +145,19 @@ INSERT INTO t VALUES (zeroblob(1024 - 1), zeroblob(1024 - 2), zeroblob(1024 - 3) def run_test_fn( self, sql: str, validate: Callable[[str], bool], desc: str = "" ) -> None: - actual = self.shell.execute(sql) + # Print the test that is executing before executing the sql command + # Printing later confuses the user of the code what test has actually failed if desc: print(f"Testing: {desc}") + actual = self.shell.execute(sql) assert validate(actual), f"Test failed\nSQL: {sql}\nActual:\n{repr(actual)}" def execute_dot(self, dot_command: str) -> None: self.shell._write_to_pipe(dot_command) + + # Enables the use of `with` syntax + def __enter__(self): + return self + + def __exit__(self, exception_type, exception_value, exception_traceback): + self.quit() diff --git a/testing/cli_tests/writes.py b/testing/cli_tests/writes.py new file mode 100755 index 000000000..5c2c8013d --- /dev/null +++ b/testing/cli_tests/writes.py @@ -0,0 +1,175 @@ +#!/usr/bin/env python3 +import os +from test_limbo_cli import TestLimboShell + + +sqlite_flags = os.getenv("SQLITE_FLAGS", "-q").split(" ") + + +def validate_with_expected(result: str, expected: str): + return (expected in result, expected) + + +def stub_write_blob_test( + limbo: TestLimboShell, + name: str, + blob_size: int = 1024**2, + vals: int = 100, + blobs: bool = True, + schema: str = "CREATE TABLE test (t1 BLOB, t2 INTEGER);", +): + zero_blob = "0" * blob_size * 2 + big_stmt = [schema] + big_stmt = big_stmt + [ + f"INSERT INTO test (t1) VALUES (zeroblob({blob_size}));" + if i % 2 == 0 and blobs + else f"INSERT INTO test (t2) VALUES ({i});" + for i in range(vals * 2) + ] + expected = [] + for i in range(vals * 2): + if i % 2 == 0 and blobs: + big_stmt.append(f"SELECT hex(t1) FROM test LIMIT 1 OFFSET {i};") + expected.append(zero_blob) + else: + big_stmt.append(f"SELECT t2 FROM test LIMIT 1 OFFSET {i};") + expected.append(f"{i}") + + big_stmt.append("SELECT count(*) FROM test;") + expected.append(str(vals * 2)) + + big_stmt = "".join(big_stmt) + expected = "\n".join(expected) + + limbo.run_test_fn(big_stmt, lambda res: validate_with_expected(res, expected), name) + + +# TODO no delete tests for now +def blob_tests() -> list[dict]: + tests: list[dict] = [] + + for vals in range(0, 1000, 100): + tests.append( + { + "name": f"small-insert-integer-vals-{vals}", + "vals": vals, + "blobs": False, + } + ) + + tests.append( + { + "name": f"small-insert-blob-interleaved-blob-size-{1024}", + "vals": 10, + "blob_size": 1024, + } + ) + tests.append( + { + "name": f"big-insert-blob-interleaved-blob-size-{1024}", + "vals": 100, + "blob_size": 1024, + } + ) + + for blob_size in range(0, (1024 * 1024) + 1, 1024 * 4**4): + if blob_size == 0: + continue + tests.append( + { + "name": f"small-insert-blob-interleaved-blob-size-{blob_size}", + "vals": 10, + "blob_size": blob_size, + } + ) + tests.append( + { + "name": f"big-insert-blob-interleaved-blob-size-{blob_size}", + "vals": 100, + "blob_size": blob_size, + } + ) + return tests + + +def test_sqlite_compat(db_fullpath: str, schema: str): + sqlite = TestLimboShell( + init_commands="", + exec_name="sqlite3", + flags=f"{db_fullpath}", + ) + sqlite.run_test_fn( + ".show", + lambda res: f"filename: {db_fullpath}" in res, + "Opened db file created with Limbo in sqlite3", + ) + sqlite.run_test_fn( + ".schema", + lambda res: schema in res, + "Tables created by previous Limbo test exist in db file", + ) + # TODO when we can import external dependencies + # Have some pydantic object be passed to this function with common fields + # To extract the information necessary to query the db in sqlite + # The object should contain Schema information and queries that should be run to + # test in sqlite for compatibility sakes + + # sqlite.run_test_fn( + # "SELECT count(*) FROM test;", + # lambda res: res == "50", + # "Tested large write to testfs", + # ) + # sqlite.run_test_fn( + # "SELECT count(*) FROM vfs;", + # lambda res: res == "50", + # "Tested large write to testfs", + # ) + sqlite.quit() + + +def touch_db_file(db_fullpath: str): + os.O_RDWR + descriptor = os.open( + path=db_fullpath, + flags=( + os.O_RDWR # access mode: read and write + | os.O_CREAT # create if not exists + | os.O_TRUNC # truncate the file to zero + ), + mode=0o777, + ) + f = open(descriptor) + f.close() + + +def cleanup(db_fullpath: str): + wal_path = f"{db_fullpath}-wal" + shm_path = f"{db_fullpath}-shm" + paths = [db_fullpath, wal_path, shm_path] + for path in paths: + if os.path.exists(path): + os.remove(path) + + +if __name__ == "__main__": + tests = blob_tests() + db_path = "testing/writes.db" + schema = "CREATE TABLE test (t1 BLOB, t2 INTEGER);" + # TODO see how to parallelize this loop with different subprocesses + for test in tests: + try: + # Use with syntax to automatically close shell on error + with TestLimboShell() as limbo: + limbo.execute_dot(f".open {db_path}") + stub_write_blob_test(limbo, **test) + print("Testing in SQLite\n") + test_sqlite_compat(db_path, schema) + print() + + except Exception as e: + print(f"Test FAILED: {e}") + cleanup(db_path) + exit(1) + # delete db after every compat test so we we have fresh db for next test + cleanup(db_path) + print("All tests passed successfully.") From 862783aec7c3c0a913338b91164f7299e5a95703 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Wed, 2 Apr 2025 01:45:14 -0300 Subject: [PATCH 254/425] forgot to add to test command in makefile --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f3dff7090..681214955 100644 --- a/Makefile +++ b/Makefile @@ -62,7 +62,7 @@ limbo-wasm: cargo build --package limbo-wasm --target wasm32-wasi .PHONY: limbo-wasm -test: limbo test-compat test-vector test-sqlite3 test-shell test-extensions test-memory +test: limbo test-compat test-vector test-sqlite3 test-shell test-extensions test-memory test-writes .PHONY: test test-extensions: limbo From 58e091cb233365c99b1c6e03d522f001a587e099 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Thu, 3 Apr 2025 00:50:54 -0300 Subject: [PATCH 255/425] setup uv for limbo --- .python-version | 1 + Makefile | 18 +-- pyproject.toml | 15 +++ testing/README.md | 1 + testing/cli_tests/cli_test_cases.py | 8 +- testing/cli_tests/extensions.py | 8 +- testing/cli_tests/writes.py | 8 +- testing/pyproject.toml | 25 +++++ uv.lock | 168 ++++++++++++++++++++++++++++ 9 files changed, 239 insertions(+), 13 deletions(-) create mode 100644 .python-version create mode 100644 pyproject.toml create mode 100644 testing/README.md create mode 100644 testing/pyproject.toml create mode 100644 uv.lock diff --git a/.python-version b/.python-version new file mode 100644 index 000000000..24ee5b1be --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.13 diff --git a/Makefile b/Makefile index 681214955..3357ff76d 100644 --- a/Makefile +++ b/Makefile @@ -62,16 +62,20 @@ limbo-wasm: cargo build --package limbo-wasm --target wasm32-wasi .PHONY: limbo-wasm -test: limbo test-compat test-vector test-sqlite3 test-shell test-extensions test-memory test-writes +uv-sync: + uv sync --all-packages +.PHONE: uv-sync + +test: limbo uv-sync test-compat test-vector test-sqlite3 test-shell test-extensions test-memory test-writes .PHONY: test -test-extensions: limbo +test-extensions: limbo uv-sync cargo build --package limbo_regexp - ./testing/cli_tests/extensions.py + uv run --project limbo_test test-extensions .PHONY: test-extensions -test-shell: limbo - SQLITE_EXEC=$(SQLITE_EXEC) ./testing/cli_tests/cli_test_cases.py +test-shell: limbo uv-sync + SQLITE_EXEC=$(SQLITE_EXEC) uv run --project limbo_test test-shell .PHONY: test-shell test-compat: @@ -98,8 +102,8 @@ test-memory: SQLITE_EXEC=$(SQLITE_EXEC) ./testing/cli_tests/memory.py .PHONY: test-memory -test-writes: limbo - SQLITE_EXEC=$(SQLITE_EXEC) ./testing/cli_tests/writes.py +test-writes: limbo uv-sync + SQLITE_EXEC=$(SQLITE_EXEC) uv run --project limbo_test test-writes .PHONY: test-writes clickbench: diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..c86f106a1 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,15 @@ +[project] +dependencies = [] +name = "limbo" +readme = "README.md" +requires-python = ">=3.13" +version = "0.1.0" + +[tool.uv] +package = false + +[tool.uv.sources] +limbo_test = { workspace = true } + +[tool.uv.workspace] +members = ["testing"] diff --git a/testing/README.md b/testing/README.md new file mode 100644 index 000000000..ef4d07cde --- /dev/null +++ b/testing/README.md @@ -0,0 +1 @@ +# Limbo Testing \ No newline at end of file diff --git a/testing/cli_tests/cli_test_cases.py b/testing/cli_tests/cli_test_cases.py index 17035b0bf..ed16a9775 100755 --- a/testing/cli_tests/cli_test_cases.py +++ b/testing/cli_tests/cli_test_cases.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -from test_limbo_cli import TestLimboShell +from cli_tests.test_limbo_cli import TestLimboShell from pathlib import Path import time import os @@ -300,7 +300,7 @@ def test_insert_default_values(): limbo.quit() -if __name__ == "__main__": +def main(): print("Running all Limbo CLI tests...") test_basic_queries() test_schema_operations() @@ -320,3 +320,7 @@ if __name__ == "__main__": test_update_with_limit() test_update_with_limit_and_offset() print("All tests have passed") + + +if __name__ == "__main__": + main() diff --git a/testing/cli_tests/extensions.py b/testing/cli_tests/extensions.py index 6d252c543..3d3c04927 100755 --- a/testing/cli_tests/extensions.py +++ b/testing/cli_tests/extensions.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 import os -from test_limbo_cli import TestLimboShell +from cli_tests.test_limbo_cli import TestLimboShell sqlite_exec = "./scripts/limbo-sqlite3" sqlite_flags = os.getenv("SQLITE_FLAGS", "-q").split(" ") @@ -588,7 +588,7 @@ def cleanup(): os.remove("testing/vfs.db-wal") -if __name__ == "__main__": +def main(): try: test_regexp() test_uuid() @@ -606,3 +606,7 @@ if __name__ == "__main__": exit(1) cleanup() print("All tests passed successfully.") + + +if __name__ == "__main__": + main() diff --git a/testing/cli_tests/writes.py b/testing/cli_tests/writes.py index 5c2c8013d..b9e1a2fa3 100755 --- a/testing/cli_tests/writes.py +++ b/testing/cli_tests/writes.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 import os -from test_limbo_cli import TestLimboShell +from cli_tests.test_limbo_cli import TestLimboShell sqlite_flags = os.getenv("SQLITE_FLAGS", "-q").split(" ") @@ -151,7 +151,7 @@ def cleanup(db_fullpath: str): os.remove(path) -if __name__ == "__main__": +def main(): tests = blob_tests() db_path = "testing/writes.db" schema = "CREATE TABLE test (t1 BLOB, t2 INTEGER);" @@ -173,3 +173,7 @@ if __name__ == "__main__": # delete db after every compat test so we we have fresh db for next test cleanup(db_path) print("All tests passed successfully.") + + +if __name__ == "__main__": + main() diff --git a/testing/pyproject.toml b/testing/pyproject.toml new file mode 100644 index 000000000..854ed9f20 --- /dev/null +++ b/testing/pyproject.toml @@ -0,0 +1,25 @@ +[project] +description = "Limbo Python Testing Project" +name = "limbo_test" +readme = "README.md" +requires-python = ">=3.13" +version = "0.1.0" +dependencies = ["faker>=37.1.0", "pydantic>=2.11.1", "rich>=14.0.0"] + +[project.scripts] +test-writes = "cli_tests.writes:main" +test-shell = "cli_tests.cli_test_cases:main" +test-extensions = "cli_tests.extensions:main" + +[tool.uv] +package = true + +[build-system] +build-backend = "hatchling.build" +requires = ["hatchling", "hatch-vcs"] + +[tool.hatch.build.targets.wheel] +packages = ["cli_tests"] + +[tool.hatch.metadata] +allow-direct-references = true diff --git a/uv.lock b/uv.lock new file mode 100644 index 000000000..d7afc32d6 --- /dev/null +++ b/uv.lock @@ -0,0 +1,168 @@ +version = 1 +revision = 1 +requires-python = ">=3.13" + +[manifest] +members = [ + "limbo", + "limbo-test", +] + +[[package]] +name = "annotated-types" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643 }, +] + +[[package]] +name = "faker" +version = "37.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "tzdata" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ba/a6/b77f42021308ec8b134502343da882c0905d725a4d661c7adeaf7acaf515/faker-37.1.0.tar.gz", hash = "sha256:ad9dc66a3b84888b837ca729e85299a96b58fdaef0323ed0baace93c9614af06", size = 1875707 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d7/a1/8936bc8e79af80ca38288dd93ed44ed1f9d63beb25447a4c59e746e01f8d/faker-37.1.0-py3-none-any.whl", hash = "sha256:dc2f730be71cb770e9c715b13374d80dbcee879675121ab51f9683d262ae9a1c", size = 1918783 }, +] + +[[package]] +name = "limbo" +version = "0.1.0" +source = { virtual = "." } + +[[package]] +name = "limbo-test" +version = "0.1.0" +source = { editable = "testing" } +dependencies = [ + { name = "faker" }, + { name = "pydantic" }, + { name = "rich" }, +] + +[package.metadata] +requires-dist = [ + { name = "faker", specifier = ">=37.1.0" }, + { name = "pydantic", specifier = ">=2.11.1" }, + { name = "rich", specifier = ">=14.0.0" }, +] + +[[package]] +name = "markdown-it-py" +version = "3.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mdurl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/38/71/3b932df36c1a044d397a1f92d1cf91ee0a503d91e470cbd670aa66b07ed0/markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb", size = 74596 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", size = 87528 }, +] + +[[package]] +name = "mdurl" +version = "0.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979 }, +] + +[[package]] +name = "pydantic" +version = "2.11.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-types" }, + { name = "pydantic-core" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/93/a3/698b87a4d4d303d7c5f62ea5fbf7a79cab236ccfbd0a17847b7f77f8163e/pydantic-2.11.1.tar.gz", hash = "sha256:442557d2910e75c991c39f4b4ab18963d57b9b55122c8b2a9cd176d8c29ce968", size = 782817 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cc/12/f9221a949f2419e2e23847303c002476c26fbcfd62dc7f3d25d0bec5ca99/pydantic-2.11.1-py3-none-any.whl", hash = "sha256:5b6c415eee9f8123a14d859be0c84363fec6b1feb6b688d6435801230b56e0b8", size = 442648 }, +] + +[[package]] +name = "pydantic-core" +version = "2.33.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b9/05/91ce14dfd5a3a99555fce436318cc0fd1f08c4daa32b3248ad63669ea8b4/pydantic_core-2.33.0.tar.gz", hash = "sha256:40eb8af662ba409c3cbf4a8150ad32ae73514cd7cb1f1a2113af39763dd616b3", size = 434080 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/79/20/de2ad03ce8f5b3accf2196ea9b44f31b0cd16ac6e8cfc6b21976ed45ec35/pydantic_core-2.33.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:f00e8b59e1fc8f09d05594aa7d2b726f1b277ca6155fc84c0396db1b373c4555", size = 2032214 }, + { url = "https://files.pythonhosted.org/packages/f9/af/6817dfda9aac4958d8b516cbb94af507eb171c997ea66453d4d162ae8948/pydantic_core-2.33.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1a73be93ecef45786d7d95b0c5e9b294faf35629d03d5b145b09b81258c7cd6d", size = 1852338 }, + { url = "https://files.pythonhosted.org/packages/44/f3/49193a312d9c49314f2b953fb55740b7c530710977cabe7183b8ef111b7f/pydantic_core-2.33.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ff48a55be9da6930254565ff5238d71d5e9cd8c5487a191cb85df3bdb8c77365", size = 1896913 }, + { url = "https://files.pythonhosted.org/packages/06/e0/c746677825b2e29a2fa02122a8991c83cdd5b4c5f638f0664d4e35edd4b2/pydantic_core-2.33.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:26a4ea04195638dcd8c53dadb545d70badba51735b1594810e9768c2c0b4a5da", size = 1986046 }, + { url = "https://files.pythonhosted.org/packages/11/ec/44914e7ff78cef16afb5e5273d480c136725acd73d894affdbe2a1bbaad5/pydantic_core-2.33.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:41d698dcbe12b60661f0632b543dbb119e6ba088103b364ff65e951610cb7ce0", size = 2128097 }, + { url = "https://files.pythonhosted.org/packages/fe/f5/c6247d424d01f605ed2e3802f338691cae17137cee6484dce9f1ac0b872b/pydantic_core-2.33.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ae62032ef513fe6281ef0009e30838a01057b832dc265da32c10469622613885", size = 2681062 }, + { url = "https://files.pythonhosted.org/packages/f0/85/114a2113b126fdd7cf9a9443b1b1fe1b572e5bd259d50ba9d5d3e1927fa9/pydantic_core-2.33.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f225f3a3995dbbc26affc191d0443c6c4aa71b83358fd4c2b7d63e2f6f0336f9", size = 2007487 }, + { url = "https://files.pythonhosted.org/packages/e6/40/3c05ed28d225c7a9acd2b34c5c8010c279683a870219b97e9f164a5a8af0/pydantic_core-2.33.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5bdd36b362f419c78d09630cbaebc64913f66f62bda6d42d5fbb08da8cc4f181", size = 2121382 }, + { url = "https://files.pythonhosted.org/packages/8a/22/e70c086f41eebd323e6baa92cc906c3f38ddce7486007eb2bdb3b11c8f64/pydantic_core-2.33.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:2a0147c0bef783fd9abc9f016d66edb6cac466dc54a17ec5f5ada08ff65caf5d", size = 2072473 }, + { url = "https://files.pythonhosted.org/packages/3e/84/d1614dedd8fe5114f6a0e348bcd1535f97d76c038d6102f271433cd1361d/pydantic_core-2.33.0-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:c860773a0f205926172c6644c394e02c25421dc9a456deff16f64c0e299487d3", size = 2249468 }, + { url = "https://files.pythonhosted.org/packages/b0/c0/787061eef44135e00fddb4b56b387a06c303bfd3884a6df9bea5cb730230/pydantic_core-2.33.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:138d31e3f90087f42aa6286fb640f3c7a8eb7bdae829418265e7e7474bd2574b", size = 2254716 }, + { url = "https://files.pythonhosted.org/packages/ae/e2/27262eb04963201e89f9c280f1e10c493a7a37bc877e023f31aa72d2f911/pydantic_core-2.33.0-cp313-cp313-win32.whl", hash = "sha256:d20cbb9d3e95114325780f3cfe990f3ecae24de7a2d75f978783878cce2ad585", size = 1916450 }, + { url = "https://files.pythonhosted.org/packages/13/8d/25ff96f1e89b19e0b70b3cd607c9ea7ca27e1dcb810a9cd4255ed6abf869/pydantic_core-2.33.0-cp313-cp313-win_amd64.whl", hash = "sha256:ca1103d70306489e3d006b0f79db8ca5dd3c977f6f13b2c59ff745249431a606", size = 1956092 }, + { url = "https://files.pythonhosted.org/packages/1b/64/66a2efeff657b04323ffcd7b898cb0354d36dae3a561049e092134a83e9c/pydantic_core-2.33.0-cp313-cp313-win_arm64.whl", hash = "sha256:6291797cad239285275558e0a27872da735b05c75d5237bbade8736f80e4c225", size = 1908367 }, + { url = "https://files.pythonhosted.org/packages/52/54/295e38769133363d7ec4a5863a4d579f331728c71a6644ff1024ee529315/pydantic_core-2.33.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:7b79af799630af263eca9ec87db519426d8c9b3be35016eddad1832bac812d87", size = 1813331 }, + { url = "https://files.pythonhosted.org/packages/4c/9c/0c8ea02db8d682aa1ef48938abae833c1d69bdfa6e5ec13b21734b01ae70/pydantic_core-2.33.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eabf946a4739b5237f4f56d77fa6668263bc466d06a8036c055587c130a46f7b", size = 1986653 }, + { url = "https://files.pythonhosted.org/packages/8e/4f/3fb47d6cbc08c7e00f92300e64ba655428c05c56b8ab6723bd290bae6458/pydantic_core-2.33.0-cp313-cp313t-win_amd64.whl", hash = "sha256:8a1d581e8cdbb857b0e0e81df98603376c1a5c34dc5e54039dcc00f043df81e7", size = 1931234 }, +] + +[[package]] +name = "pygments" +version = "2.19.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7c/2d/c3338d48ea6cc0feb8446d8e6937e1408088a72a39937982cc6111d17f84/pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f", size = 4968581 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293 }, +] + +[[package]] +name = "rich" +version = "14.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markdown-it-py" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a1/53/830aa4c3066a8ab0ae9a9955976fb770fe9c6102117c8ec4ab3ea62d89e8/rich-14.0.0.tar.gz", hash = "sha256:82f1bc23a6a21ebca4ae0c45af9bdbc492ed20231dcb63f297d6d1021a9d5725", size = 224078 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0d/9b/63f4c7ebc259242c89b3acafdb37b41d1185c07ff0011164674e9076b491/rich-14.0.0-py3-none-any.whl", hash = "sha256:1c9491e1951aac09caffd42f448ee3d04e58923ffe14993f6e83068dc395d7e0", size = 243229 }, +] + +[[package]] +name = "typing-extensions" +version = "4.13.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0e/3e/b00a62db91a83fff600de219b6ea9908e6918664899a2d85db222f4fbf19/typing_extensions-4.13.0.tar.gz", hash = "sha256:0a4ac55a5820789d87e297727d229866c9650f6521b64206413c4fbada24d95b", size = 106520 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/86/39b65d676ec5732de17b7e3c476e45bb80ec64eb50737a8dce1a4178aba1/typing_extensions-4.13.0-py3-none-any.whl", hash = "sha256:c8dd92cc0d6425a97c18fbb9d1954e5ff92c1ca881a309c45f06ebc0b79058e5", size = 45683 }, +] + +[[package]] +name = "typing-inspection" +version = "0.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/82/5c/e6082df02e215b846b4b8c0b887a64d7d08ffaba30605502639d44c06b82/typing_inspection-0.4.0.tar.gz", hash = "sha256:9765c87de36671694a67904bf2c96e395be9c6439bb6c87b5142569dcdd65122", size = 76222 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/31/08/aa4fdfb71f7de5176385bd9e90852eaf6b5d622735020ad600f2bab54385/typing_inspection-0.4.0-py3-none-any.whl", hash = "sha256:50e72559fcd2a6367a19f7a7e610e6afcb9fac940c650290eed893d61386832f", size = 14125 }, +] + +[[package]] +name = "tzdata" +version = "2025.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/95/32/1a225d6164441be760d75c2c42e2780dc0873fe382da3e98a2e1e48361e5/tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9", size = 196380 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839 }, +] From 0c137d6dffb70d7e59e960ccbdcdf4c2ccdf578a Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Thu, 3 Apr 2025 01:48:33 -0300 Subject: [PATCH 256/425] Cleaner and less error prone Write Tests --- testing/cli_tests/writes.py | 214 ++++++++++++++++++------------------ testing/pyproject.toml | 5 +- uv.lock | 45 -------- 3 files changed, 110 insertions(+), 154 deletions(-) diff --git a/testing/cli_tests/writes.py b/testing/cli_tests/writes.py index b9e1a2fa3..a79b44448 100755 --- a/testing/cli_tests/writes.py +++ b/testing/cli_tests/writes.py @@ -1,146 +1,147 @@ #!/usr/bin/env python3 import os from cli_tests.test_limbo_cli import TestLimboShell +from pydantic import BaseModel sqlite_flags = os.getenv("SQLITE_FLAGS", "-q").split(" ") +class InsertTest(BaseModel): + name: str + db_schema: str = "CREATE TABLE test (t1 BLOB, t2 INTEGER);" + blob_size: int = 1024**2 + vals: int = 100 + has_blob: bool = True + db_path: str = "testing/writes.db" + + def run(self, limbo: TestLimboShell): + zero_blob = "0" * self.blob_size * 2 + big_stmt = [self.db_schema] + big_stmt = big_stmt + [ + f"INSERT INTO test (t1) VALUES (zeroblob({self.blob_size}));" + if i % 2 == 0 and self.has_blob + else f"INSERT INTO test (t2) VALUES ({i});" + for i in range(self.vals * 2) + ] + expected = [] + for i in range(self.vals * 2): + if i % 2 == 0 and self.has_blob: + big_stmt.append(f"SELECT hex(t1) FROM test LIMIT 1 OFFSET {i};") + expected.append(zero_blob) + else: + big_stmt.append(f"SELECT t2 FROM test LIMIT 1 OFFSET {i};") + expected.append(f"{i}") + + big_stmt.append("SELECT count(*) FROM test;") + expected.append(str(self.vals * 2)) + + big_stmt = "".join(big_stmt) + expected = "\n".join(expected) + + limbo.run_test_fn( + big_stmt, lambda res: validate_with_expected(res, expected), self.name + ) + + def test_compat(self): + print("Testing in SQLite\n") + + with TestLimboShell( + init_commands="", + exec_name="sqlite3", + flags=f"{self.db_path}", + ) as sqlite: + sqlite.run_test_fn( + ".show", + lambda res: f"filename: {self.db_path}" in res, + "Opened db file created with Limbo in sqlite3", + ) + sqlite.run_test_fn( + ".schema", + lambda res: self.db_schema in res, + "Tables created by previous Limbo test exist in db file", + ) + # TODO Have some pydantic object be passed to this function with common fields + # To extract the information necessary to query the db in sqlite + # The object should contain Schema information and queries that should be run to + # test in sqlite for compatibility sakes + print() + pass + + def validate_with_expected(result: str, expected: str): return (expected in result, expected) -def stub_write_blob_test( - limbo: TestLimboShell, - name: str, - blob_size: int = 1024**2, - vals: int = 100, - blobs: bool = True, - schema: str = "CREATE TABLE test (t1 BLOB, t2 INTEGER);", -): - zero_blob = "0" * blob_size * 2 - big_stmt = [schema] - big_stmt = big_stmt + [ - f"INSERT INTO test (t1) VALUES (zeroblob({blob_size}));" - if i % 2 == 0 and blobs - else f"INSERT INTO test (t2) VALUES ({i});" - for i in range(vals * 2) - ] - expected = [] - for i in range(vals * 2): - if i % 2 == 0 and blobs: - big_stmt.append(f"SELECT hex(t1) FROM test LIMIT 1 OFFSET {i};") - expected.append(zero_blob) - else: - big_stmt.append(f"SELECT t2 FROM test LIMIT 1 OFFSET {i};") - expected.append(f"{i}") - - big_stmt.append("SELECT count(*) FROM test;") - expected.append(str(vals * 2)) - - big_stmt = "".join(big_stmt) - expected = "\n".join(expected) - - limbo.run_test_fn(big_stmt, lambda res: validate_with_expected(res, expected), name) - - # TODO no delete tests for now -def blob_tests() -> list[dict]: +def blob_tests() -> list[InsertTest]: tests: list[dict] = [] for vals in range(0, 1000, 100): tests.append( - { - "name": f"small-insert-integer-vals-{vals}", - "vals": vals, - "blobs": False, - } + InsertTest( + name=f"small-insert-integer-vals-{vals}", + vals=vals, + has_blob=False, + ) ) tests.append( - { - "name": f"small-insert-blob-interleaved-blob-size-{1024}", - "vals": 10, - "blob_size": 1024, - } + InsertTest( + name=f"small-insert-blob-interleaved-blob-size-{1024}", + vals=10, + blob_size=1024, + ) ) tests.append( - { - "name": f"big-insert-blob-interleaved-blob-size-{1024}", - "vals": 100, - "blob_size": 1024, - } + InsertTest( + name=f"big-insert-blob-interleaved-blob-size-{1024}", + vals=100, + blob_size=1024, + ) ) for blob_size in range(0, (1024 * 1024) + 1, 1024 * 4**4): if blob_size == 0: continue tests.append( - { - "name": f"small-insert-blob-interleaved-blob-size-{blob_size}", - "vals": 10, - "blob_size": blob_size, - } + InsertTest( + name=f"small-insert-blob-interleaved-blob-size-{blob_size}", + vals=10, + blob_size=blob_size, + ) ) tests.append( - { - "name": f"big-insert-blob-interleaved-blob-size-{blob_size}", - "vals": 100, - "blob_size": blob_size, - } + InsertTest( + name=f"big-insert-blob-interleaved-blob-size-{blob_size}", + vals=100, + blob_size=blob_size, + ) ) return tests def test_sqlite_compat(db_fullpath: str, schema: str): - sqlite = TestLimboShell( + with TestLimboShell( init_commands="", exec_name="sqlite3", flags=f"{db_fullpath}", - ) - sqlite.run_test_fn( - ".show", - lambda res: f"filename: {db_fullpath}" in res, - "Opened db file created with Limbo in sqlite3", - ) - sqlite.run_test_fn( - ".schema", - lambda res: schema in res, - "Tables created by previous Limbo test exist in db file", - ) - # TODO when we can import external dependencies - # Have some pydantic object be passed to this function with common fields + ) as sqlite: + sqlite.run_test_fn( + ".show", + lambda res: f"filename: {db_fullpath}" in res, + "Opened db file created with Limbo in sqlite3", + ) + sqlite.run_test_fn( + ".schema", + lambda res: schema in res, + "Tables created by previous Limbo test exist in db file", + ) + # TODO Have some pydantic object be passed to this function with common fields # To extract the information necessary to query the db in sqlite # The object should contain Schema information and queries that should be run to # test in sqlite for compatibility sakes - # sqlite.run_test_fn( - # "SELECT count(*) FROM test;", - # lambda res: res == "50", - # "Tested large write to testfs", - # ) - # sqlite.run_test_fn( - # "SELECT count(*) FROM vfs;", - # lambda res: res == "50", - # "Tested large write to testfs", - # ) - sqlite.quit() - - -def touch_db_file(db_fullpath: str): - os.O_RDWR - descriptor = os.open( - path=db_fullpath, - flags=( - os.O_RDWR # access mode: read and write - | os.O_CREAT # create if not exists - | os.O_TRUNC # truncate the file to zero - ), - mode=0o777, - ) - f = open(descriptor) - f.close() - def cleanup(db_fullpath: str): wal_path = f"{db_fullpath}-wal" @@ -153,18 +154,15 @@ def cleanup(db_fullpath: str): def main(): tests = blob_tests() - db_path = "testing/writes.db" - schema = "CREATE TABLE test (t1 BLOB, t2 INTEGER);" - # TODO see how to parallelize this loop with different subprocesses for test in tests: + db_path = test.db_path try: # Use with syntax to automatically close shell on error with TestLimboShell() as limbo: limbo.execute_dot(f".open {db_path}") - stub_write_blob_test(limbo, **test) - print("Testing in SQLite\n") - test_sqlite_compat(db_path, schema) - print() + test.run(limbo) + + test.test_compat() except Exception as e: print(f"Test FAILED: {e}") diff --git a/testing/pyproject.toml b/testing/pyproject.toml index 854ed9f20..548c4ab81 100644 --- a/testing/pyproject.toml +++ b/testing/pyproject.toml @@ -4,7 +4,10 @@ name = "limbo_test" readme = "README.md" requires-python = ">=3.13" version = "0.1.0" -dependencies = ["faker>=37.1.0", "pydantic>=2.11.1", "rich>=14.0.0"] +dependencies = [ + "faker>=37.1.0", + "pydantic>=2.11.1", +] [project.scripts] test-writes = "cli_tests.writes:main" diff --git a/uv.lock b/uv.lock index d7afc32d6..718149d38 100644 --- a/uv.lock +++ b/uv.lock @@ -41,35 +41,12 @@ source = { editable = "testing" } dependencies = [ { name = "faker" }, { name = "pydantic" }, - { name = "rich" }, ] [package.metadata] requires-dist = [ { name = "faker", specifier = ">=37.1.0" }, { name = "pydantic", specifier = ">=2.11.1" }, - { name = "rich", specifier = ">=14.0.0" }, -] - -[[package]] -name = "markdown-it-py" -version = "3.0.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "mdurl" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/38/71/3b932df36c1a044d397a1f92d1cf91ee0a503d91e470cbd670aa66b07ed0/markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb", size = 74596 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", size = 87528 }, -] - -[[package]] -name = "mdurl" -version = "0.1.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979 }, ] [[package]] @@ -115,28 +92,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8e/4f/3fb47d6cbc08c7e00f92300e64ba655428c05c56b8ab6723bd290bae6458/pydantic_core-2.33.0-cp313-cp313t-win_amd64.whl", hash = "sha256:8a1d581e8cdbb857b0e0e81df98603376c1a5c34dc5e54039dcc00f043df81e7", size = 1931234 }, ] -[[package]] -name = "pygments" -version = "2.19.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/7c/2d/c3338d48ea6cc0feb8446d8e6937e1408088a72a39937982cc6111d17f84/pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f", size = 4968581 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293 }, -] - -[[package]] -name = "rich" -version = "14.0.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "markdown-it-py" }, - { name = "pygments" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/a1/53/830aa4c3066a8ab0ae9a9955976fb770fe9c6102117c8ec4ab3ea62d89e8/rich-14.0.0.tar.gz", hash = "sha256:82f1bc23a6a21ebca4ae0c45af9bdbc492ed20231dcb63f297d6d1021a9d5725", size = 224078 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/0d/9b/63f4c7ebc259242c89b3acafdb37b41d1185c07ff0011164674e9076b491/rich-14.0.0-py3-none-any.whl", hash = "sha256:1c9491e1951aac09caffd42f448ee3d04e58923ffe14993f6e83068dc395d7e0", size = 243229 }, -] - [[package]] name = "typing-extensions" version = "4.13.0" From bdef83dc1cee58b255beda0f1b1627602e44c305 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Thu, 3 Apr 2025 18:43:19 -0300 Subject: [PATCH 257/425] update test --- Makefile | 12 +- testing/cli_tests/update.py | 135 ++++++++++++++++++++++ testing/cli_tests/{writes.py => write.py} | 36 ++---- testing/pyproject.toml | 3 +- 4 files changed, 152 insertions(+), 34 deletions(-) create mode 100644 testing/cli_tests/update.py rename testing/cli_tests/{writes.py => write.py} (77%) diff --git a/Makefile b/Makefile index 3357ff76d..2d0924840 100644 --- a/Makefile +++ b/Makefile @@ -66,7 +66,7 @@ uv-sync: uv sync --all-packages .PHONE: uv-sync -test: limbo uv-sync test-compat test-vector test-sqlite3 test-shell test-extensions test-memory test-writes +test: limbo uv-sync test-compat test-vector test-sqlite3 test-shell test-extensions test-memory test-write test-update .PHONY: test test-extensions: limbo uv-sync @@ -102,9 +102,13 @@ test-memory: SQLITE_EXEC=$(SQLITE_EXEC) ./testing/cli_tests/memory.py .PHONY: test-memory -test-writes: limbo uv-sync - SQLITE_EXEC=$(SQLITE_EXEC) uv run --project limbo_test test-writes -.PHONY: test-writes +test-write: limbo uv-sync + SQLITE_EXEC=$(SQLITE_EXEC) uv run --project limbo_test test-write +.PHONY: test-write + +test-update: limbo uv-sync + SQLITE_EXEC=$(SQLITE_EXEC) uv run --project limbo_test test-update +.PHONY: test-update clickbench: ./perf/clickbench/benchmark.sh diff --git a/testing/cli_tests/update.py b/testing/cli_tests/update.py new file mode 100644 index 000000000..e0473c877 --- /dev/null +++ b/testing/cli_tests/update.py @@ -0,0 +1,135 @@ +#!/usr/bin/env python3 +import os +from cli_tests.test_limbo_cli import TestLimboShell +from pydantic import BaseModel + + +sqlite_flags = os.getenv("SQLITE_FLAGS", "-q").split(" ") + + +class UpdateTest(BaseModel): + name: str + db_schema: str = "CREATE TABLE test (key INTEGER, t1 BLOB, t2 INTEGER, t3 TEXT);" + blob_size: int = 1024 + vals: int = 1000 + updates: int = 1 + db_path: str = "testing/update.db" + + def init_db(self): + with TestLimboShell( + init_commands="", + exec_name="sqlite3", + flags=f"{self.db_path}", + ) as sqlite: + sqlite.execute_dot(f".open {self.db_path}") + zero_blob = "0" * self.blob_size * 2 + t2_val = "1" + t3_val = "2" + stmt = [self.db_schema] + stmt = stmt + [ + f"INSERT INTO test (key, t1, t2, t3) VALUES ({i} ,zeroblob({self.blob_size}), {t2_val}, {t3_val});" + for i in range(self.vals) + ] + stmt.append("SELECT count(*) FROM test;") + + sqlite.run_test( + "Init Update Db in Sqlite", + "".join(stmt), + f"{self.vals}", + ) + + stmt = [ + f"SELECT hex(t1), t2, t3 FROM test LIMIT 1 OFFSET {i};" + for i in range(self.vals) + ] + + expected = [f"{zero_blob}|{t2_val}|{t3_val}" for _ in range(self.vals)] + sqlite.run_test( + "Check Values correctly inserted in Sqlite", + "".join(stmt), + "\n".join(expected), + ) + + def run(self, limbo: TestLimboShell): + limbo.execute_dot(f".open {self.db_path}") + # TODO blobs are hard. Forget about blob updates for now + # one_blob = ("0" * ((self.blob_size * 2) - 1)) + "1" + # TODO For now update just on one row. To expand the tests in the future + # use self.updates and do more than 1 update + t2_update_val = "123" + stmt = f"UPDATE test SET t2 = {t2_update_val} WHERE key = {0};" + limbo.run_test(self.name, stmt, "") + + def test_compat(self): + print("Testing in SQLite\n") + + with TestLimboShell( + init_commands="", + exec_name="sqlite3", + flags=f"{self.db_path}", + ) as sqlite: + sqlite.execute_dot(f".open {self.db_path}") + zero_blob = "0" * self.blob_size * 2 + + t2_val = "1" + t2_update_val = "123" + t3_val = "2" + stmt = [] + stmt.append("SELECT count(*) FROM test;") + + sqlite.run_test( + "Check all rows present in Sqlite", + "".join(stmt), + f"{self.vals}", + ) + + stmt = [ + f"SELECT hex(t1), t2, t3 FROM test LIMIT 1 OFFSET {i};" + for i in range(self.vals) + ] + + expected = [ + f"{zero_blob}|{t2_val}|{t3_val}" + if i != 0 + else f"{zero_blob}|{t2_update_val}|{t3_val}" + for i in range(self.vals) + ] + sqlite.run_test( + "Check Values correctly updated in Sqlite", + "".join(stmt), + "\n".join(expected), + ) + print() + + +def cleanup(db_fullpath: str): + wal_path = f"{db_fullpath}-wal" + shm_path = f"{db_fullpath}-shm" + paths = [db_fullpath, wal_path, shm_path] + for path in paths: + if os.path.exists(path): + os.remove(path) + + +def main(): + test = UpdateTest(name="Update 1 column", vals=1) + db_path = test.db_path + try: + test.init_db() + # Use with syntax to automatically close shell on error + with TestLimboShell("") as limbo: + test.run(limbo) + + test.test_compat() + + except Exception as e: + print(f"Test FAILED: {e}") + cleanup(db_path) + exit(1) + # delete db after every compat test so we we have fresh db for next test + cleanup(db_path) + print("All tests passed successfully.") + + +if __name__ == "__main__": + main() diff --git a/testing/cli_tests/writes.py b/testing/cli_tests/write.py similarity index 77% rename from testing/cli_tests/writes.py rename to testing/cli_tests/write.py index a79b44448..4ccf1bc0c 100755 --- a/testing/cli_tests/writes.py +++ b/testing/cli_tests/write.py @@ -61,12 +61,12 @@ class InsertTest(BaseModel): lambda res: self.db_schema in res, "Tables created by previous Limbo test exist in db file", ) - # TODO Have some pydantic object be passed to this function with common fields - # To extract the information necessary to query the db in sqlite - # The object should contain Schema information and queries that should be run to - # test in sqlite for compatibility sakes + sqlite.run_test_fn( + "SELECT count(*) FROM test;", + lambda res: res == str(self.vals * 2), + "Counting total rows inserted", + ) print() - pass def validate_with_expected(result: str, expected: str): @@ -75,7 +75,7 @@ def validate_with_expected(result: str, expected: str): # TODO no delete tests for now def blob_tests() -> list[InsertTest]: - tests: list[dict] = [] + tests: list[InsertTest] = [] for vals in range(0, 1000, 100): tests.append( @@ -121,28 +121,6 @@ def blob_tests() -> list[InsertTest]: return tests -def test_sqlite_compat(db_fullpath: str, schema: str): - with TestLimboShell( - init_commands="", - exec_name="sqlite3", - flags=f"{db_fullpath}", - ) as sqlite: - sqlite.run_test_fn( - ".show", - lambda res: f"filename: {db_fullpath}" in res, - "Opened db file created with Limbo in sqlite3", - ) - sqlite.run_test_fn( - ".schema", - lambda res: schema in res, - "Tables created by previous Limbo test exist in db file", - ) - # TODO Have some pydantic object be passed to this function with common fields - # To extract the information necessary to query the db in sqlite - # The object should contain Schema information and queries that should be run to - # test in sqlite for compatibility sakes - - def cleanup(db_fullpath: str): wal_path = f"{db_fullpath}-wal" shm_path = f"{db_fullpath}-shm" @@ -158,7 +136,7 @@ def main(): db_path = test.db_path try: # Use with syntax to automatically close shell on error - with TestLimboShell() as limbo: + with TestLimboShell("") as limbo: limbo.execute_dot(f".open {db_path}") test.run(limbo) diff --git a/testing/pyproject.toml b/testing/pyproject.toml index 548c4ab81..d4e257361 100644 --- a/testing/pyproject.toml +++ b/testing/pyproject.toml @@ -10,9 +10,10 @@ dependencies = [ ] [project.scripts] -test-writes = "cli_tests.writes:main" +test-write = "cli_tests.write:main" test-shell = "cli_tests.cli_test_cases:main" test-extensions = "cli_tests.extensions:main" +test-update = "cli_tests.update:main" [tool.uv] package = true From dd5310a85e1a9d287077d8a7572dc1f3b64bc79b Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Thu, 3 Apr 2025 21:01:06 -0300 Subject: [PATCH 258/425] adjust workflow to install uv --- .github/workflows/rust.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 67d6d7e23..fcc054d81 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -75,6 +75,18 @@ jobs: curl -L $LINK/$CARGO_C_FILE | tar xz -C ~/.cargo/bin - uses: actions/checkout@v3 + + - name: Install uv + uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + + - name: Set up Python + run: uv python install + + - name: Install the project + run: uv sync --all-extras --dev --all-packages + - uses: "./.github/shared/install_sqlite" - name: Test run: make test From b34e7e011e11cc13850cd9381a4595c5db94c55b Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Thu, 3 Apr 2025 22:30:02 -0300 Subject: [PATCH 259/425] Prettier console --- pyproject.toml | 4 ++- testing/cli_tests/console.py | 62 ++++++++++++++++++++++++++++++++++++ uv.lock | 49 ++++++++++++++++++++++++++++ 3 files changed, 114 insertions(+), 1 deletion(-) create mode 100644 testing/cli_tests/console.py diff --git a/pyproject.toml b/pyproject.toml index c86f106a1..b8d5018cc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,7 @@ [project] -dependencies = [] +dependencies = [ + "rich>=14.0.0", +] name = "limbo" readme = "README.md" requires-python = ">=3.13" diff --git a/testing/cli_tests/console.py b/testing/cli_tests/console.py new file mode 100644 index 000000000..36b3a29a4 --- /dev/null +++ b/testing/cli_tests/console.py @@ -0,0 +1,62 @@ +from typing import Any, Optional, Union +from rich.console import Console, JustifyMethod +from rich.theme import Theme +from rich.style import Style + + +custom_theme = Theme({"info": "blue", "error": "bold red"}) +console = Console(theme=custom_theme) + + +def info( + *objects: Any, + sep: str = " ", + end: str = "\n", + style: Optional[Union[str, Style]] = None, + justify: Optional[JustifyMethod] = None, + emoji: Optional[bool] = None, + markup: Optional[bool] = None, + highlight: Optional[bool] = None, + log_locals: bool = False, + _stack_offset: int = 1, +): + console.log( + "[info]INFO[/info]", + *objects, + sep=sep, + end=end, + style=style, + justify=justify, + emoji=emoji, + markup=markup, + highlight=highlight, + log_locals=log_locals, + _stack_offset=_stack_offset, + ) + + +def error( + *objects: Any, + sep: str = " ", + end: str = "\n", + style: Optional[Union[str, Style]] = None, + justify: Optional[JustifyMethod] = None, + emoji: Optional[bool] = None, + markup: Optional[bool] = None, + highlight: Optional[bool] = None, + log_locals: bool = False, + _stack_offset: int = 1, +): + console.log( + "[error]ERROR[/error]", + *objects, + sep=sep, + end=end, + style=style, + justify=justify, + emoji=emoji, + markup=markup, + highlight=highlight, + log_locals=log_locals, + _stack_offset=_stack_offset, + ) diff --git a/uv.lock b/uv.lock index 718149d38..eaf3e5bff 100644 --- a/uv.lock +++ b/uv.lock @@ -33,6 +33,12 @@ wheels = [ name = "limbo" version = "0.1.0" source = { virtual = "." } +dependencies = [ + { name = "rich" }, +] + +[package.metadata] +requires-dist = [{ name = "rich", specifier = ">=14.0.0" }] [[package]] name = "limbo-test" @@ -49,6 +55,27 @@ requires-dist = [ { name = "pydantic", specifier = ">=2.11.1" }, ] +[[package]] +name = "markdown-it-py" +version = "3.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mdurl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/38/71/3b932df36c1a044d397a1f92d1cf91ee0a503d91e470cbd670aa66b07ed0/markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb", size = 74596 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", size = 87528 }, +] + +[[package]] +name = "mdurl" +version = "0.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979 }, +] + [[package]] name = "pydantic" version = "2.11.1" @@ -92,6 +119,28 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8e/4f/3fb47d6cbc08c7e00f92300e64ba655428c05c56b8ab6723bd290bae6458/pydantic_core-2.33.0-cp313-cp313t-win_amd64.whl", hash = "sha256:8a1d581e8cdbb857b0e0e81df98603376c1a5c34dc5e54039dcc00f043df81e7", size = 1931234 }, ] +[[package]] +name = "pygments" +version = "2.19.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7c/2d/c3338d48ea6cc0feb8446d8e6937e1408088a72a39937982cc6111d17f84/pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f", size = 4968581 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293 }, +] + +[[package]] +name = "rich" +version = "14.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markdown-it-py" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a1/53/830aa4c3066a8ab0ae9a9955976fb770fe9c6102117c8ec4ab3ea62d89e8/rich-14.0.0.tar.gz", hash = "sha256:82f1bc23a6a21ebca4ae0c45af9bdbc492ed20231dcb63f297d6d1021a9d5725", size = 224078 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0d/9b/63f4c7ebc259242c89b3acafdb37b41d1185c07ff0011164674e9076b491/rich-14.0.0-py3-none-any.whl", hash = "sha256:1c9491e1951aac09caffd42f448ee3d04e58923ffe14993f6e83068dc395d7e0", size = 243229 }, +] + [[package]] name = "typing-extensions" version = "4.13.0" From d71029cda7989e3d8a6cc4b8f20ea650e7f4124e Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Thu, 3 Apr 2025 23:07:07 -0300 Subject: [PATCH 260/425] Overhaul in printing using rich --- testing/cli_tests/cli_test_cases.py | 5 +++-- testing/cli_tests/console.py | 33 ++++++++++++++++++++++++++--- testing/cli_tests/extensions.py | 9 ++++---- testing/cli_tests/test_limbo_cli.py | 13 ++++++------ testing/cli_tests/update.py | 11 ++++++---- testing/cli_tests/write.py | 10 +++++---- 6 files changed, 58 insertions(+), 23 deletions(-) diff --git a/testing/cli_tests/cli_test_cases.py b/testing/cli_tests/cli_test_cases.py index ed16a9775..ba5e9a38f 100755 --- a/testing/cli_tests/cli_test_cases.py +++ b/testing/cli_tests/cli_test_cases.py @@ -3,6 +3,7 @@ from cli_tests.test_limbo_cli import TestLimboShell from pathlib import Path import time import os +from cli_tests import console def test_basic_queries(): @@ -301,7 +302,7 @@ def test_insert_default_values(): def main(): - print("Running all Limbo CLI tests...") + console.info("Running all Limbo CLI tests...") test_basic_queries() test_schema_operations() test_file_operations() @@ -319,7 +320,7 @@ def main(): test_table_patterns() test_update_with_limit() test_update_with_limit_and_offset() - print("All tests have passed") + console.info("All tests have passed") if __name__ == "__main__": diff --git a/testing/cli_tests/console.py b/testing/cli_tests/console.py index 36b3a29a4..44fc6fe4f 100644 --- a/testing/cli_tests/console.py +++ b/testing/cli_tests/console.py @@ -4,7 +4,7 @@ from rich.theme import Theme from rich.style import Style -custom_theme = Theme({"info": "blue", "error": "bold red"}) +custom_theme = Theme({"info": "bold blue", "error": "bold red", "debug": "bold blue"}) console = Console(theme=custom_theme) @@ -31,7 +31,7 @@ def info( markup=markup, highlight=highlight, log_locals=log_locals, - _stack_offset=_stack_offset, + _stack_offset=_stack_offset + 1, ) @@ -58,5 +58,32 @@ def error( markup=markup, highlight=highlight, log_locals=log_locals, - _stack_offset=_stack_offset, + _stack_offset=_stack_offset + 1, + ) + + +def debug( + *objects: Any, + sep: str = " ", + end: str = "\n", + style: Optional[Union[str, Style]] = None, + justify: Optional[JustifyMethod] = None, + emoji: Optional[bool] = None, + markup: Optional[bool] = None, + highlight: Optional[bool] = None, + log_locals: bool = False, + _stack_offset: int = 1, +): + console.log( + "[debug]DEBUG[/debug]", + *objects, + sep=sep, + end=end, + style=style, + justify=justify, + emoji=emoji, + markup=markup, + highlight=highlight, + log_locals=log_locals, + _stack_offset=_stack_offset + 1, ) diff --git a/testing/cli_tests/extensions.py b/testing/cli_tests/extensions.py index 3d3c04927..ab57e4178 100755 --- a/testing/cli_tests/extensions.py +++ b/testing/cli_tests/extensions.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import os from cli_tests.test_limbo_cli import TestLimboShell +from cli_tests import console sqlite_exec = "./scripts/limbo-sqlite3" sqlite_flags = os.getenv("SQLITE_FLAGS", "-q").split(" ") @@ -81,7 +82,7 @@ def test_regexp(): lambda res: "Parse error: no such function" in res, ) limbo.run_test_fn(f".load {extension_path}", null) - print(f"Extension {extension_path} loaded successfully.") + console.info(f"Extension {extension_path} loaded successfully.") limbo.run_test_fn("SELECT regexp('a.c', 'abc');", true) limbo.run_test_fn("SELECT regexp('a.c', 'ac');", false) limbo.run_test_fn("SELECT regexp('[0-9]+', 'the year is 2021');", true) @@ -522,7 +523,7 @@ def test_vfs(): lambda res: res == "50", "Tested large write to testfs", ) - print("Tested large write to testfs") + console.info("Tested large write to testfs") limbo.quit() @@ -601,11 +602,11 @@ def main(): test_kv() test_drop_virtual_table() except Exception as e: - print(f"Test FAILED: {e}") + console.error(f"Test FAILED: {e}") cleanup() exit(1) cleanup() - print("All tests passed successfully.") + console.info("All tests passed successfully.") if __name__ == "__main__": diff --git a/testing/cli_tests/test_limbo_cli.py b/testing/cli_tests/test_limbo_cli.py index 93f704a07..8436fb6df 100755 --- a/testing/cli_tests/test_limbo_cli.py +++ b/testing/cli_tests/test_limbo_cli.py @@ -5,6 +5,7 @@ from time import sleep import subprocess from pathlib import Path from typing import Callable, List, Optional +from cli_tests import console PIPE_BUF = 4096 @@ -77,9 +78,9 @@ class LimboShell: error_output = self.pipe.stderr.read(PIPE_BUF) if error_output == b"": return True - print(error_output.decode(), end="") + console.error(error_output.decode(), end="") return False - + @staticmethod def _clean_output(output: str, marker: str) -> str: output = output.rstrip().removesuffix(marker) @@ -128,7 +129,7 @@ INSERT INTO t VALUES (zeroblob(1024 - 1), zeroblob(1024 - 2), zeroblob(1024 - 3) self.shell.quit() def run_test(self, name: str, sql: str, expected: str) -> None: - print(f"Running test: {name}") + console.info(f"Running test: {name}") actual = self.shell.execute(sql) assert actual == expected, ( f"Test failed: {name}\n" @@ -138,9 +139,9 @@ INSERT INTO t VALUES (zeroblob(1024 - 1), zeroblob(1024 - 2), zeroblob(1024 - 3) ) def debug_print(self, sql: str): - print(f"debugging: {sql}") + console.debug(f"debugging: {sql}") actual = self.shell.execute(sql) - print(f"OUTPUT:\n{repr(actual)}") + console.debug(f"OUTPUT:\n{repr(actual)}") def run_test_fn( self, sql: str, validate: Callable[[str], bool], desc: str = "" @@ -148,7 +149,7 @@ INSERT INTO t VALUES (zeroblob(1024 - 1), zeroblob(1024 - 2), zeroblob(1024 - 3) # Print the test that is executing before executing the sql command # Printing later confuses the user of the code what test has actually failed if desc: - print(f"Testing: {desc}") + console.info(f"Testing: {desc}") actual = self.shell.execute(sql) assert validate(actual), f"Test failed\nSQL: {sql}\nActual:\n{repr(actual)}" diff --git a/testing/cli_tests/update.py b/testing/cli_tests/update.py index e0473c877..1d0d23b63 100644 --- a/testing/cli_tests/update.py +++ b/testing/cli_tests/update.py @@ -2,6 +2,7 @@ import os from cli_tests.test_limbo_cli import TestLimboShell from pydantic import BaseModel +from cli_tests import console sqlite_flags = os.getenv("SQLITE_FLAGS", "-q").split(" ") @@ -61,7 +62,7 @@ class UpdateTest(BaseModel): limbo.run_test(self.name, stmt, "") def test_compat(self): - print("Testing in SQLite\n") + console.info("Testing in SQLite\n") with TestLimboShell( init_commands="", @@ -99,7 +100,7 @@ class UpdateTest(BaseModel): "".join(stmt), "\n".join(expected), ) - print() + console.info() def cleanup(db_fullpath: str): @@ -113,6 +114,8 @@ def cleanup(db_fullpath: str): def main(): test = UpdateTest(name="Update 1 column", vals=1) + console.info(test) + db_path = test.db_path try: test.init_db() @@ -123,12 +126,12 @@ def main(): test.test_compat() except Exception as e: - print(f"Test FAILED: {e}") + console.error(f"Test FAILED: {e}") cleanup(db_path) exit(1) # delete db after every compat test so we we have fresh db for next test cleanup(db_path) - print("All tests passed successfully.") + console.info("All tests passed successfully.") if __name__ == "__main__": diff --git a/testing/cli_tests/write.py b/testing/cli_tests/write.py index 4ccf1bc0c..e3f7fd04c 100755 --- a/testing/cli_tests/write.py +++ b/testing/cli_tests/write.py @@ -2,6 +2,7 @@ import os from cli_tests.test_limbo_cli import TestLimboShell from pydantic import BaseModel +from cli_tests import console sqlite_flags = os.getenv("SQLITE_FLAGS", "-q").split(" ") @@ -44,7 +45,7 @@ class InsertTest(BaseModel): ) def test_compat(self): - print("Testing in SQLite\n") + console.info("Testing in SQLite\n") with TestLimboShell( init_commands="", @@ -66,7 +67,7 @@ class InsertTest(BaseModel): lambda res: res == str(self.vals * 2), "Counting total rows inserted", ) - print() + console.info() def validate_with_expected(result: str, expected: str): @@ -133,6 +134,7 @@ def cleanup(db_fullpath: str): def main(): tests = blob_tests() for test in tests: + console.info(test) db_path = test.db_path try: # Use with syntax to automatically close shell on error @@ -143,12 +145,12 @@ def main(): test.test_compat() except Exception as e: - print(f"Test FAILED: {e}") + console.error(f"Test FAILED: {e}") cleanup(db_path) exit(1) # delete db after every compat test so we we have fresh db for next test cleanup(db_path) - print("All tests passed successfully.") + console.info("All tests passed successfully.") if __name__ == "__main__": From 4c0bd50ac9bb7627882750ea5b2456bb85dd2ef0 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Thu, 3 Apr 2025 23:14:48 -0300 Subject: [PATCH 261/425] force terminal colors --- testing/cli_tests/console.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/cli_tests/console.py b/testing/cli_tests/console.py index 44fc6fe4f..14a6a7f7f 100644 --- a/testing/cli_tests/console.py +++ b/testing/cli_tests/console.py @@ -5,7 +5,7 @@ from rich.style import Style custom_theme = Theme({"info": "bold blue", "error": "bold red", "debug": "bold blue"}) -console = Console(theme=custom_theme) +console = Console(theme=custom_theme, force_terminal=True) def info( From 321def3c305ce631ca50c8aa8187bb58c14ef669 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 4 Apr 2025 00:54:48 -0300 Subject: [PATCH 262/425] adjust stack_offset for test_limbo_cli --- testing/cli_tests/test_limbo_cli.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/testing/cli_tests/test_limbo_cli.py b/testing/cli_tests/test_limbo_cli.py index 8436fb6df..ddd59695e 100755 --- a/testing/cli_tests/test_limbo_cli.py +++ b/testing/cli_tests/test_limbo_cli.py @@ -75,11 +75,14 @@ class LimboShell: def _handle_error(self) -> bool: while True: - error_output = self.pipe.stderr.read(PIPE_BUF) - if error_output == b"": - return True - console.error(error_output.decode(), end="") - return False + ready, _, errors = select.select( + [self.pipe.stderr], [], [self.pipe.stderr], 0 + ) + if not (ready + errors): + break + error_output = self.pipe.stderr.read(PIPE_BUF).decode() + console.error(error_output, end="", _stack_offset=2) + raise RuntimeError("Error encountered in Limbo shell.") @staticmethod def _clean_output(output: str, marker: str) -> str: @@ -129,7 +132,7 @@ INSERT INTO t VALUES (zeroblob(1024 - 1), zeroblob(1024 - 2), zeroblob(1024 - 3) self.shell.quit() def run_test(self, name: str, sql: str, expected: str) -> None: - console.info(f"Running test: {name}") + console.info(f"Running test: {name}", _stack_offset=2) actual = self.shell.execute(sql) assert actual == expected, ( f"Test failed: {name}\n" @@ -139,9 +142,9 @@ INSERT INTO t VALUES (zeroblob(1024 - 1), zeroblob(1024 - 2), zeroblob(1024 - 3) ) def debug_print(self, sql: str): - console.debug(f"debugging: {sql}") + console.debug(f"debugging: {sql}", _stack_offset=2) actual = self.shell.execute(sql) - console.debug(f"OUTPUT:\n{repr(actual)}") + console.debug(f"OUTPUT:\n{repr(actual)}", _stack_offset=2) def run_test_fn( self, sql: str, validate: Callable[[str], bool], desc: str = "" @@ -149,7 +152,7 @@ INSERT INTO t VALUES (zeroblob(1024 - 1), zeroblob(1024 - 2), zeroblob(1024 - 3) # Print the test that is executing before executing the sql command # Printing later confuses the user of the code what test has actually failed if desc: - console.info(f"Testing: {desc}") + console.info(f"Testing: {desc}", _stack_offset=2) actual = self.shell.execute(sql) assert validate(actual), f"Test failed\nSQL: {sql}\nActual:\n{repr(actual)}" From 3cd2017df421a32e9c1a7a51e149825e1fc76e08 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 4 Apr 2025 00:56:50 -0300 Subject: [PATCH 263/425] introduce test theme --- testing/cli_tests/console.py | 35 ++++++++++++++++++++++++++++- testing/cli_tests/test_limbo_cli.py | 4 ++-- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/testing/cli_tests/console.py b/testing/cli_tests/console.py index 14a6a7f7f..2f295a90d 100644 --- a/testing/cli_tests/console.py +++ b/testing/cli_tests/console.py @@ -4,7 +4,14 @@ from rich.theme import Theme from rich.style import Style -custom_theme = Theme({"info": "bold blue", "error": "bold red", "debug": "bold blue"}) +custom_theme = Theme( + { + "info": "bold blue", + "error": "bold red", + "debug": "bold blue", + "test": "bold green", + } +) console = Console(theme=custom_theme, force_terminal=True) @@ -87,3 +94,29 @@ def debug( log_locals=log_locals, _stack_offset=_stack_offset + 1, ) + +def test( + *objects: Any, + sep: str = " ", + end: str = "\n", + style: Optional[Union[str, Style]] = None, + justify: Optional[JustifyMethod] = None, + emoji: Optional[bool] = None, + markup: Optional[bool] = None, + highlight: Optional[bool] = None, + log_locals: bool = False, + _stack_offset: int = 1, +): + console.log( + "[test]TEST[/test]", + *objects, + sep=sep, + end=end, + style=style, + justify=justify, + emoji=emoji, + markup=markup, + highlight=highlight, + log_locals=log_locals, + _stack_offset=_stack_offset + 1, + ) \ No newline at end of file diff --git a/testing/cli_tests/test_limbo_cli.py b/testing/cli_tests/test_limbo_cli.py index ddd59695e..626d7defe 100755 --- a/testing/cli_tests/test_limbo_cli.py +++ b/testing/cli_tests/test_limbo_cli.py @@ -132,7 +132,7 @@ INSERT INTO t VALUES (zeroblob(1024 - 1), zeroblob(1024 - 2), zeroblob(1024 - 3) self.shell.quit() def run_test(self, name: str, sql: str, expected: str) -> None: - console.info(f"Running test: {name}", _stack_offset=2) + console.test(f"Running test: {name}", _stack_offset=2) actual = self.shell.execute(sql) assert actual == expected, ( f"Test failed: {name}\n" @@ -152,7 +152,7 @@ INSERT INTO t VALUES (zeroblob(1024 - 1), zeroblob(1024 - 2), zeroblob(1024 - 3) # Print the test that is executing before executing the sql command # Printing later confuses the user of the code what test has actually failed if desc: - console.info(f"Testing: {desc}", _stack_offset=2) + console.test(f"Testing: {desc}", _stack_offset=2) actual = self.shell.execute(sql) assert validate(actual), f"Test failed\nSQL: {sql}\nActual:\n{repr(actual)}" From bd5531987ea4d87a72a4d7a561bb9b7bc51871fd Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Wed, 9 Apr 2025 12:15:25 -0300 Subject: [PATCH 264/425] adjusting memory test to use UV --- Makefile | 4 ++-- testing/cli_tests/memory.py | 13 ++++++------- testing/pyproject.toml | 1 + 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index 2d0924840..623fbb6ce 100644 --- a/Makefile +++ b/Makefile @@ -98,8 +98,8 @@ test-json: SQLITE_EXEC=$(SQLITE_EXEC) ./testing/json.test .PHONY: test-json -test-memory: - SQLITE_EXEC=$(SQLITE_EXEC) ./testing/cli_tests/memory.py +test-memory: limbo uv-sync + SQLITE_EXEC=$(SQLITE_EXEC) uv run --project limbo_test test-memory .PHONY: test-memory test-write: limbo uv-sync diff --git a/testing/cli_tests/memory.py b/testing/cli_tests/memory.py index da98bcc1d..a329ba027 100755 --- a/testing/cli_tests/memory.py +++ b/testing/cli_tests/memory.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import os -from test_limbo_cli import TestLimboShell +from cli_tests.test_limbo_cli import TestLimboShell +from cli_tests import console sqlite_flags = os.getenv("SQLITE_FLAGS", "-q").split(" ") @@ -96,15 +97,13 @@ def main(): tests = memory_tests() # TODO see how to parallelize this loop with different subprocesses for test in tests: - limbo = TestLimboShell() try: - stub_memory_test(limbo, **test) + with TestLimboShell("") as limbo: + stub_memory_test(limbo, **test) except Exception as e: - print(f"Test FAILED: {e}") - limbo.quit() + console.error(f"Test FAILED: {e}") exit(1) - limbo.quit() # remove this line when `with` statement is supported for TestLimboShell - print("All tests passed successfully.") + console.info("All tests passed successfully.") if __name__ == "__main__": diff --git a/testing/pyproject.toml b/testing/pyproject.toml index d4e257361..58292dd91 100644 --- a/testing/pyproject.toml +++ b/testing/pyproject.toml @@ -14,6 +14,7 @@ test-write = "cli_tests.write:main" test-shell = "cli_tests.cli_test_cases:main" test-extensions = "cli_tests.extensions:main" test-update = "cli_tests.update:main" +test-memory = "cli_tests.memory:main" [tool.uv] package = true From 2cc492844ee328598ecda933bc3816307ade7a02 Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Tue, 15 Apr 2025 10:38:29 -0300 Subject: [PATCH 265/425] Improve NotFound's docs clarity --- core/vdbe/insn.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 32c3f9550..7f9432fb0 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -805,7 +805,13 @@ pub enum Insn { Once { target_pc_when_reentered: BranchOffset, }, - /// Search for record in the index cusor, if exists is a no-op otherwise go to target_pc + /// Search for record in the index cusor, if any entry for which the key is a prefix exists + /// is a no-op, otherwise go to target_pc + /// Example => + /// For a index key (1,2,3): + /// NotFound((1,2,3)) => No-op + /// NotFound((1,2)) => No-op + /// NotFound((2,2, 1)) => Jump NotFound { cursor_id: CursorID, target_pc: BranchOffset, From 8c797a9bd1969e18495b7cd9674adef3e169a445 Mon Sep 17 00:00:00 2001 From: Anton Harniakou Date: Tue, 15 Apr 2025 21:48:53 +0300 Subject: [PATCH 266/425] Use eq_ignore_ascii_case to compare strings --- core/translate/expr.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 6520a8b14..77758cc8f 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1942,7 +1942,7 @@ pub fn translate_expr( // If we don't do this -1 * 9223372036854775808 will overflow and parse will fail // and trigger conversion to Real. if numeric_value == "9223372036854775808" - || numeric_value.to_lowercase() == "0x7fffffffffffffff" + || numeric_value.eq_ignore_ascii_case("0x7fffffffffffffff") { program.emit_insn(Insn::Integer { value: i64::MIN, From 8f5a39cc2b68ad913610f01448d1c797e5fc7d31 Mon Sep 17 00:00:00 2001 From: TcMits Date: Wed, 16 Apr 2025 14:27:24 +0700 Subject: [PATCH 267/425] replace vec with array in btree balancing --- core/storage/btree.rs | 225 +++++++++++++++++++++++++----------------- 1 file changed, 136 insertions(+), 89 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index d304963ed..92f409299 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -253,12 +253,12 @@ impl BTreeKey<'_> { #[derive(Clone)] struct BalanceInfo { - /// Old pages being balanced. - pages_to_balance: Vec, + /// Old pages being balanced. We can have maximum 3 pages being balanced at the same time. + pages_to_balance: [Option; 3], /// Bookkeeping of the rightmost pointer so the offset::BTREE_RIGHTMOST_PTR can be updated. rightmost_pointer: *mut u8, - /// Divider cells of old pages - divider_cells: Vec>, + /// Divider cells of old pages. We can have maximum 2 divider cells because of 3 pages. + divider_cells: [Option>; 2], /// Number of siblings being used to balance sibling_count: usize, /// First divider cell to remove that marks the first sibling @@ -387,7 +387,7 @@ struct PageStack { struct CellArray { cells: Vec<&'static mut [u8]>, // TODO(pere): make this with references - number_of_cells_per_page: Vec, // number of cells in each page + number_of_cells_per_page: [u16; 5], // number of cells in each page, max 5 pages } impl BTreeCursor { @@ -1622,7 +1622,7 @@ impl BTreeCursor { PageType::IndexInterior | PageType::TableInterior )); // Part 1: Find the sibling pages to balance - let mut pages_to_balance = vec![]; + let mut pages_to_balance: [Option; 3] = [None, None, None]; let number_of_cells_in_parent = parent_contents.cell_count() + parent_contents.overflow_cells.len(); @@ -1691,7 +1691,7 @@ impl BTreeCursor { for i in (0..=current_sibling).rev() { let page = self.pager.read_page(pgno as usize)?; debug_validate_cells!(&page.get_contents(), self.usable_space() as u16); - pages_to_balance.push(page); + pages_to_balance[i].replace(page); assert_eq!( parent_contents.overflow_cells.len(), 0, @@ -1724,14 +1724,16 @@ impl BTreeCursor { } }; } - // Reverse in order to keep the right order - pages_to_balance.reverse(); #[cfg(debug_assertions)] { - let page_type_of_siblings = pages_to_balance[0].get_contents().page_type(); - for page in &pages_to_balance { - let contents = page.get_contents(); + let page_type_of_siblings = pages_to_balance[0] + .as_ref() + .unwrap() + .get_contents() + .page_type(); + for page in pages_to_balance.iter().take(sibling_count) { + let contents = page.as_ref().unwrap().get_contents(); debug_validate_cells!(&contents, self.usable_space() as u16); assert_eq!(contents.page_type(), page_type_of_siblings); } @@ -1743,7 +1745,7 @@ impl BTreeCursor { .replace(Some(BalanceInfo { pages_to_balance, rightmost_pointer: right_pointer, - divider_cells: Vec::new(), + divider_cells: [None, None], sibling_count, first_divider_cell: first_cell_divider, })); @@ -1759,7 +1761,8 @@ impl BTreeCursor { let all_loaded = balance_info .pages_to_balance .iter() - .all(|page| !page.is_locked()); + .take(balance_info.sibling_count) + .all(|page| !page.as_ref().unwrap().is_locked()); if !all_loaded { return Ok(CursorResult::IO); } @@ -1775,9 +1778,10 @@ impl BTreeCursor { /* 1. Get divider cells and max_cells */ let mut max_cells = 0; - let mut pages_to_balance_new = Vec::new(); + // we only need maximum 5 pages to balance 3 pages + let mut pages_to_balance_new: [Option; 5] = [None, None, None, None, None]; for i in (0..balance_info.sibling_count).rev() { - let sibling_page = &balance_info.pages_to_balance[i]; + let sibling_page = balance_info.pages_to_balance[i].as_ref().unwrap(); let sibling_contents = sibling_page.get_contents(); sibling_page.set_dirty(); self.pager.add_dirty(sibling_page.get().id); @@ -1815,7 +1819,7 @@ impl BTreeCursor { ); // TODO(pere): make this reference and not copy - balance_info.divider_cells.push(cell_buf.to_vec()); + balance_info.divider_cells[i].replace(cell_buf.to_vec()); tracing::trace!( "dropping divider cell from parent cell_idx={} count={}", cell_idx, @@ -1823,31 +1827,33 @@ impl BTreeCursor { ); drop_cell(parent_contents, cell_idx, self.usable_space() as u16)?; } - assert_eq!( - balance_info.divider_cells.len(), - balance_info.sibling_count - 1, - "the number of pages balancing must be divided by one less divider" - ); - // Reverse divider cells to be in order - balance_info.divider_cells.reverse(); /* 2. Initialize CellArray with all the cells used for distribution, this includes divider cells if !leaf. */ let mut cell_array = CellArray { cells: Vec::with_capacity(max_cells), - number_of_cells_per_page: Vec::new(), + number_of_cells_per_page: [0; 5], }; let cells_capacity_start = cell_array.cells.capacity(); let mut total_cells_inserted = 0; // count_cells_in_old_pages is the prefix sum of cells of each page - let mut count_cells_in_old_pages = Vec::new(); + let mut count_cells_in_old_pages: [u16; 5] = [0; 5]; - let page_type = balance_info.pages_to_balance[0].get_contents().page_type(); + let page_type = balance_info.pages_to_balance[0] + .as_ref() + .unwrap() + .get_contents() + .page_type(); tracing::debug!("balance_non_root(page_type={:?})", page_type); let leaf_data = matches!(page_type, PageType::TableLeaf); let leaf = matches!(page_type, PageType::TableLeaf | PageType::IndexLeaf); - for (i, old_page) in balance_info.pages_to_balance.iter().enumerate() { - let old_page_contents = old_page.get_contents(); + for (i, old_page) in balance_info + .pages_to_balance + .iter() + .take(balance_info.sibling_count) + .enumerate() + { + let old_page_contents = old_page.as_ref().unwrap().get_contents(); debug_validate_cells!(&old_page_contents, self.usable_space() as u16); for cell_idx in 0..old_page_contents.cell_count() { let (cell_start, cell_len) = old_page_contents.cell_get_raw_region( @@ -1876,15 +1882,18 @@ impl BTreeCursor { ); } - count_cells_in_old_pages.push(cell_array.cells.len() as u16); + count_cells_in_old_pages[i] = cell_array.cells.len() as u16; let mut cells_inserted = old_page_contents.cell_count() + old_page_contents.overflow_cells.len(); - if i < balance_info.pages_to_balance.len() - 1 && !leaf_data { + if i < balance_info.sibling_count - 1 && !leaf_data { // If we are a index page or a interior table page we need to take the divider cell too. // But we don't need the last divider as it will remain the same. - let mut divider_cell = balance_info.divider_cells[i].as_mut_slice(); + let mut divider_cell = balance_info.divider_cells[i] + .as_mut() + .unwrap() + .as_mut_slice(); // TODO(pere): in case of old pages are leaf pages, so index leaf page, we need to strip page pointers // from divider cells in index interior pages (parent) because those should not be included. cells_inserted += 1; @@ -1926,29 +1935,26 @@ impl BTreeCursor { validate_cells_after_insertion(&cell_array, leaf_data); /* 3. Initiliaze current size of every page including overflow cells and divider cells that might be included. */ - let mut new_page_sizes: Vec = Vec::new(); + let mut new_page_sizes: [i64; 5] = [0; 5]; let leaf_correction = if leaf { 4 } else { 0 }; // number of bytes beyond header, different from global usableSapce which includes // header let usable_space = self.usable_space() - 12 + leaf_correction; for i in 0..balance_info.sibling_count { - cell_array - .number_of_cells_per_page - .push(count_cells_in_old_pages[i]); - let page = &balance_info.pages_to_balance[i]; + cell_array.number_of_cells_per_page[i] = count_cells_in_old_pages[i]; + let page = &balance_info.pages_to_balance[i].as_ref().unwrap(); let page_contents = page.get_contents(); let free_space = compute_free_space(page_contents, self.usable_space() as u16); - new_page_sizes.push(usable_space as i64 - free_space as i64); + new_page_sizes[i] = usable_space as i64 - free_space as i64; for overflow in &page_contents.overflow_cells { - let size = new_page_sizes.last_mut().unwrap(); // 2 to account of pointer - *size += 2 + overflow.payload.len() as i64; + new_page_sizes[i] += 2 + overflow.payload.len() as i64; } if !leaf && i < balance_info.sibling_count - 1 { // Account for divider cell which is included in this page. - let size = new_page_sizes.last_mut().unwrap(); - *size += cell_array.cells[cell_array.cell_count(i)].len() as i64; + new_page_sizes[i] += + cell_array.cells[cell_array.cell_count(i)].len() as i64; } } @@ -1969,15 +1975,15 @@ impl BTreeCursor { while new_page_sizes[i] > usable_space as i64 { let needs_new_page = i + 1 >= sibling_count_new; if needs_new_page { - sibling_count_new += 1; - new_page_sizes.push(0); - cell_array - .number_of_cells_per_page - .push(cell_array.cells.len() as u16); + sibling_count_new = i + 2; assert!( sibling_count_new <= 5, "it is corrupt to require more than 5 pages to balance 3 siblings" ); + + new_page_sizes[sibling_count_new - 1] = 0; + cell_array.number_of_cells_per_page[sibling_count_new - 1] = + cell_array.cells.len() as u16; } let size_of_cell_to_remove_from_left = 2 + cell_array.cells[cell_array.cell_count(i) - 1].len() as i64; @@ -2039,10 +2045,6 @@ impl BTreeCursor { break; } } - new_page_sizes.truncate(sibling_count_new); - cell_array - .number_of_cells_per_page - .truncate(sibling_count_new); tracing::debug!( "balance_non_root(sibling_count={}, sibling_count_new={}, cells={})", @@ -2110,35 +2112,54 @@ impl BTreeCursor { // Allocate pages or set dirty if not needed for i in 0..sibling_count_new { if i < balance_info.sibling_count { - balance_info.pages_to_balance[i].set_dirty(); - pages_to_balance_new.push(balance_info.pages_to_balance[i].clone()); + let page = balance_info.pages_to_balance[i].as_ref().unwrap(); + page.set_dirty(); + pages_to_balance_new[i].replace(page.clone()); } else { let page = self.pager.do_allocate_page(page_type, 0); - pages_to_balance_new.push(page); + pages_to_balance_new[i].replace(page); // Since this page didn't exist before, we can set it to cells length as it // marks them as empty since it is a prefix sum of cells. - count_cells_in_old_pages.push(cell_array.cells.len() as u16); + count_cells_in_old_pages[i] = cell_array.cells.len() as u16; } } // Reassign page numbers in increasing order - let mut page_numbers = Vec::new(); - for page in pages_to_balance_new.iter() { - page_numbers.push(page.get().id); - } - page_numbers.sort(); - for (page, new_id) in pages_to_balance_new.iter().zip(page_numbers) { - if new_id != page.get().id { - page.get().id = new_id; - self.pager.put_loaded_page(new_id, page.clone()); - } - } - - #[cfg(debug_assertions)] { - tracing::debug!("balance_non_root(parent page_id={})", parent_page.get().id); - for page in &pages_to_balance_new { - tracing::debug!("balance_non_root(new_sibling page_id={})", page.get().id); + let mut page_numbers: [usize; 5] = [0; 5]; + for (i, page) in pages_to_balance_new + .iter() + .take(sibling_count_new) + .enumerate() + { + page_numbers[i] = page.as_ref().unwrap().get().id; + } + page_numbers.sort(); + for (page, new_id) in pages_to_balance_new + .iter() + .take(sibling_count_new) + .rev() + .zip(page_numbers.iter().rev().take(sibling_count_new)) + { + let page = page.as_ref().unwrap(); + if *new_id != page.get().id { + page.get().id = *new_id; + self.pager.put_loaded_page(*new_id, page.clone()); + } + } + + #[cfg(debug_assertions)] + { + tracing::debug!( + "balance_non_root(parent page_id={})", + parent_page.get().id + ); + for page in pages_to_balance_new.iter().take(sibling_count_new) { + tracing::debug!( + "balance_non_root(new_sibling page_id={})", + page.as_ref().unwrap().get().id + ); + } } } @@ -2150,7 +2171,11 @@ impl BTreeCursor { // Write right pointer in parent page to point to new rightmost page. keep in mind // we update rightmost pointer first because inserting cells could defragment parent page, // therfore invalidating the pointer. - let right_page_id = pages_to_balance_new.last().unwrap().get().id as u32; + let right_page_id = pages_to_balance_new[sibling_count_new - 1] + .as_ref() + .unwrap() + .get() + .id as u32; let rightmost_pointer = balance_info.rightmost_pointer; let rightmost_pointer = unsafe { std::slice::from_raw_parts_mut(rightmost_pointer, 4) }; @@ -2168,9 +2193,13 @@ impl BTreeCursor { // that was originally on that place. let is_leaf_page = matches!(page_type, PageType::TableLeaf | PageType::IndexLeaf); if !is_leaf_page { - let last_page = balance_info.pages_to_balance.last().unwrap(); + let last_page = balance_info.pages_to_balance[balance_info.sibling_count - 1] + .as_ref() + .unwrap(); let right_pointer = last_page.get_contents().rightmost_pointer().unwrap(); - let new_last_page = pages_to_balance_new.last().unwrap(); + let new_last_page = pages_to_balance_new[sibling_count_new - 1] + .as_ref() + .unwrap(); new_last_page .get_contents() .write_u32(offset::BTREE_RIGHTMOST_PTR, right_pointer); @@ -2183,6 +2212,7 @@ impl BTreeCursor { .take(sibling_count_new - 1) /* do not take last page */ { + let page = page.as_ref().unwrap(); let divider_cell_idx = cell_array.cell_count(i); let mut divider_cell = &mut cell_array.cells[divider_cell_idx]; // FIXME: dont use auxiliary space, could be done without allocations @@ -2261,7 +2291,8 @@ impl BTreeCursor { #[cfg(debug_assertions)] { // Let's ensure every page is pointed to by the divider cell or the rightmost pointer. - for page in &pages_to_balance_new { + for page in pages_to_balance_new.iter().take(sibling_count_new) { + let page = page.as_ref().unwrap(); assert!( pages_pointed_to.contains(&(page.get().id as u32)), "page {} not pointed to by divider cell or rightmost pointer", @@ -2322,31 +2353,31 @@ impl BTreeCursor { cell_array.cell_count(page_idx) - start_new_cells, ) }; - let page = &pages_to_balance_new[page_idx]; + let page = pages_to_balance_new[page_idx].as_ref().unwrap(); tracing::debug!("pre_edit_page(page={})", page.get().id); - let page = page.get_contents(); + let page_contents = page.get_contents(); edit_page( - page, + page_contents, start_old_cells, start_new_cells, number_new_cells, &cell_array, self.usable_space() as u16, )?; - debug_validate_cells!(page, self.usable_space() as u16); + debug_validate_cells!(page_contents, self.usable_space() as u16); tracing::trace!( "edit_page page={} cells={}", - pages_to_balance_new[page_idx].get().id, - page.cell_count() + page.get().id, + page_contents.cell_count() ); - page.overflow_cells.clear(); + page_contents.overflow_cells.clear(); done[page_idx] = true; } } // TODO: vacuum support - let first_child_page = &pages_to_balance_new[0]; + let first_child_page = pages_to_balance_new[0].as_ref().unwrap(); let first_child_contents = first_child_page.get_contents(); if parent_is_root && parent_contents.cell_count() == 0 @@ -2416,7 +2447,7 @@ impl BTreeCursor { // We have to free pages that are not used anymore for i in sibling_count_new..balance_info.sibling_count { - let page = &balance_info.pages_to_balance[i]; + let page = balance_info.pages_to_balance[i].as_ref().unwrap(); self.pager.free_page(Some(page.clone()), page.get().id)?; } (WriteState::BalanceStart, Ok(CursorResult::Ok(()))) @@ -2485,7 +2516,7 @@ impl BTreeCursor { parent_page: &PageRef, balance_info: &mut BalanceInfo, parent_contents: &mut PageContent, - pages_to_balance_new: Vec>, + pages_to_balance_new: [Option>; 5], page_type: PageType, leaf_data: bool, mut cells_debug: Vec>, @@ -2534,7 +2565,12 @@ impl BTreeCursor { } } // Let's now make a in depth check that we in fact added all possible cells somewhere and they are not lost - for (page_idx, page) in pages_to_balance_new.iter().enumerate() { + for (page_idx, page) in pages_to_balance_new + .iter() + .take(sibling_count_new) + .enumerate() + { + let page = page.as_ref().unwrap(); let contents = page.get_contents(); debug_validate_cells!(contents, self.usable_space() as u16); // Cells are distributed in order @@ -2629,13 +2665,24 @@ impl BTreeCursor { let rightmost = read_u32(rightmost_pointer, 0); debug_validate_cells!(parent_contents, self.usable_space() as u16); - if pages_to_balance_new.len() != 1 { - tracing::error!("balance_non_root(balance_shallower_incorrect_pages_to_balance_new_len, pages_to_balance_new={})", - pages_to_balance_new.len() + if !pages_to_balance_new[0].is_some() { + tracing::error!( + "balance_non_root(balance_shallower_incorrect_page, page_idx={})", + 0 ); valid = false; } + for i in 1..sibling_count_new { + if pages_to_balance_new[i].is_some() { + tracing::error!( + "balance_non_root(balance_shallower_incorrect_page, page_idx={})", + i + ); + valid = false; + } + } + if current_index_cell != cells_debug.len() || cells_debug.len() != contents.cell_count() || contents.cell_count() != parent_contents.cell_count() From 87575106069ee1c29802a8675fada583828f1057 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sun, 13 Apr 2025 13:44:00 +0300 Subject: [PATCH 268/425] test/fuzz: revamp compound key seek fuzz test to include desc indexes and be more efficient --- tests/integration/fuzz/mod.rs | 275 ++++++++++++++++++++-------------- 1 file changed, 165 insertions(+), 110 deletions(-) diff --git a/tests/integration/fuzz/mod.rs b/tests/integration/fuzz/mod.rs index 3814be97b..878021f91 100644 --- a/tests/integration/fuzz/mod.rs +++ b/tests/integration/fuzz/mod.rs @@ -202,6 +202,8 @@ mod tests { } #[test] + /// A test for verifying that index seek+scan works correctly for compound keys + /// on indexes with various column orderings. pub fn index_scan_compound_key_fuzz() { let (mut rng, seed) = if std::env::var("SEED").is_ok() { let seed = std::env::var("SEED").unwrap().parse::().unwrap(); @@ -209,8 +211,25 @@ mod tests { } else { rng_from_time() }; - let db = TempDatabase::new_with_rusqlite("CREATE TABLE t(x, y, z, PRIMARY KEY (x, y))"); - let sqlite_conn = rusqlite::Connection::open(db.path.clone()).unwrap(); + // Create all different 3-column primary key permutations + let dbs = [ + TempDatabase::new_with_rusqlite("CREATE TABLE t(x, y, z, PRIMARY KEY (x, y, z))"), + TempDatabase::new_with_rusqlite("CREATE TABLE t(x, y, z, PRIMARY KEY (x desc, y, z))"), + TempDatabase::new_with_rusqlite("CREATE TABLE t(x, y, z, PRIMARY KEY (x, y desc, z))"), + TempDatabase::new_with_rusqlite("CREATE TABLE t(x, y, z, PRIMARY KEY (x, y, z desc))"), + TempDatabase::new_with_rusqlite( + "CREATE TABLE t(x, y, z, PRIMARY KEY (x desc, y desc, z))", + ), + TempDatabase::new_with_rusqlite( + "CREATE TABLE t(x, y, z, PRIMARY KEY (x, y desc, z desc))", + ), + TempDatabase::new_with_rusqlite( + "CREATE TABLE t(x, y, z, PRIMARY KEY (x desc, y, z desc))", + ), + TempDatabase::new_with_rusqlite( + "CREATE TABLE t(x, y, z, PRIMARY KEY (x desc, y desc, z desc))", + ), + ]; let mut pk_tuples = HashSet::new(); while pk_tuples.len() < 100000 { pk_tuples.insert((rng.random_range(0..3000), rng.random_range(0..3000))); @@ -225,125 +244,161 @@ mod tests { )); } let insert = format!("INSERT INTO t VALUES {}", tuples.join(", ")); - sqlite_conn.execute(&insert, params![]).unwrap(); - sqlite_conn.close().unwrap(); - let sqlite_conn = rusqlite::Connection::open(db.path.clone()).unwrap(); - let limbo_conn = db.connect_limbo(); + + // Insert all tuples into all databases + let sqlite_conns = dbs + .iter() + .map(|db| rusqlite::Connection::open(db.path.clone()).unwrap()) + .collect::>(); + for sqlite_conn in sqlite_conns.into_iter() { + sqlite_conn.execute(&insert, params![]).unwrap(); + sqlite_conn.close().unwrap(); + } + let sqlite_conns = dbs + .iter() + .map(|db| rusqlite::Connection::open(db.path.clone()).unwrap()) + .collect::>(); + let limbo_conns = dbs.iter().map(|db| db.connect_limbo()).collect::>(); const COMPARISONS: [&str; 5] = ["=", "<", "<=", ">", ">="]; - const ORDER_BY: [Option<&str>; 3] = [None, Some("ORDER BY x DESC"), Some("ORDER BY x ASC")]; - const SECONDARY_ORDER_BY: [Option<&str>; 3] = [None, Some(", y DESC"), Some(", y ASC")]; + // For verifying index scans, we only care about cases where all but potentially the last column are constrained by an equality (=), + // because this is the only way to utilize an index efficiently for seeking. This is called the "left-prefix rule" of indexes. + // Hence we generate constraint combinations in this manner; as soon as a comparison is not an equality, we stop generating more constraints for the where clause. + // Examples: + // x = 1 AND y = 2 AND z > 3 + // x = 1 AND y > 2 + // x > 1 + let col_comp_first = COMPARISONS + .iter() + .cloned() + .map(|x| (Some(x), None, None)) + .collect::>(); + let col_comp_second = COMPARISONS + .iter() + .cloned() + .map(|x| (Some("="), Some(x), None)) + .collect::>(); + let col_comp_third = COMPARISONS + .iter() + .cloned() + .map(|x| (Some("="), Some("="), Some(x))) + .collect::>(); - let print_dump_on_fail = |insert: &str, seed: u64| { - let comment = format!("-- seed: {}; dump for manual debugging:", seed); - let pragma_journal_mode = "PRAGMA journal_mode = wal;"; - let create_table = "CREATE TABLE t(x, y, z, PRIMARY KEY (x, y));"; - let dump = format!( - "{}\n{}\n{}\n{}\n{}", - comment, pragma_journal_mode, create_table, comment, insert - ); - println!("{}", dump); - }; + let all_comps = [col_comp_first, col_comp_second, col_comp_third].concat(); - for comp in COMPARISONS.iter() { - for order_by in ORDER_BY.iter() { - // make it more likely that the full 2-column index is utilized for seeking - let iter_count_per_permutation = if *comp == "=" { 2000 } else { 500 }; + const ORDER_BY: [Option<&str>; 3] = [None, Some("DESC"), Some("ASC")]; + + const ITERATIONS: usize = 10000; + for i in 0..ITERATIONS { + if i % (ITERATIONS / 100) == 0 { println!( - "fuzzing {} iterations with comp: {:?}, order_by: {:?}", - iter_count_per_permutation, comp, order_by + "index_scan_compound_key_fuzz: iteration {}/{}", + i + 1, + ITERATIONS ); - for _ in 0..iter_count_per_permutation { - let first_col_val = rng.random_range(0..=3000); - let mut limit = "LIMIT 5"; - let mut second_idx_col_cond = "".to_string(); - let mut second_idx_col_comp = "".to_string(); + } + let (comp1, comp2, comp3) = all_comps[rng.random_range(0..all_comps.len())]; + // Similarly as for the constraints, generate order by permutations so that the only columns involved in the index seek are potentially part of the ORDER BY. + let (order_by1, order_by2, order_by3) = { + if comp1.is_some() && comp2.is_some() && comp3.is_some() { + ( + ORDER_BY[rng.random_range(0..ORDER_BY.len())], + ORDER_BY[rng.random_range(0..ORDER_BY.len())], + ORDER_BY[rng.random_range(0..ORDER_BY.len())], + ) + } else if comp1.is_some() && comp2.is_some() { + ( + ORDER_BY[rng.random_range(0..ORDER_BY.len())], + ORDER_BY[rng.random_range(0..ORDER_BY.len())], + None, + ) + } else { + (ORDER_BY[rng.random_range(0..ORDER_BY.len())], None, None) + } + }; - // somtetimes include the second index column in the where clause. - // make it more probable when first column has '=' constraint since those queries are usually faster to run - let second_col_prob = if *comp == "=" { 0.7 } else { 0.02 }; - if rng.random_bool(second_col_prob) { - let second_idx_col = rng.random_range(0..3000); + // Generate random values for the WHERE clause constraints + let (col_val_first, col_val_second, col_val_third) = { + if comp1.is_some() && comp2.is_some() && comp3.is_some() { + ( + Some(rng.random_range(0..=3000)), + Some(rng.random_range(0..=3000)), + Some(rng.random_range(0..=3000)), + ) + } else if comp1.is_some() && comp2.is_some() { + ( + Some(rng.random_range(0..=3000)), + Some(rng.random_range(0..=3000)), + None, + ) + } else { + (Some(rng.random_range(0..=3000)), None, None) + } + }; - second_idx_col_comp = - COMPARISONS[rng.random_range(0..COMPARISONS.len())].to_string(); - second_idx_col_cond = - format!(" AND y {} {}", second_idx_col_comp, second_idx_col); + // Use a small limit to make the test complete faster + let limit = 5; + + // Generate WHERE clause string + let where_clause_components = vec![ + comp1.map(|x| format!("x {} {}", x, col_val_first.unwrap())), + comp2.map(|x| format!("y {} {}", x, col_val_second.unwrap())), + comp3.map(|x| format!("z {} {}", x, col_val_third.unwrap())), + ] + .into_iter() + .filter_map(|x| x) + .collect::>(); + let where_clause = if where_clause_components.is_empty() { + "".to_string() + } else { + format!("WHERE {}", where_clause_components.join(" AND ")) + }; + + // Generate ORDER BY string + let order_by_components = vec![ + order_by1.map(|x| format!("x {}", x)), + order_by2.map(|x| format!("y {}", x)), + order_by3.map(|x| format!("z {}", x)), + ] + .into_iter() + .filter_map(|x| x) + .collect::>(); + let order_by = if order_by_components.is_empty() { + "".to_string() + } else { + format!("ORDER BY {}", order_by_components.join(", ")) + }; + + // Generate final query string + let query = format!( + "SELECT * FROM t {} {} LIMIT {}", + where_clause, order_by, limit + ); + log::debug!("query: {}", query); + + // Execute the query on all databases and compare the results + for (i, sqlite_conn) in sqlite_conns.iter().enumerate() { + let limbo = limbo_exec_rows(&dbs[i], &limbo_conns[i], &query); + let sqlite = sqlite_exec_rows(&sqlite_conn, &query); + if limbo != sqlite { + // if the order by contains exclusively components that are constrained by an equality (=), + // sqlite sometimes doesn't bother with ASC/DESC because it doesn't semantically matter + // so we need to check that limbo and sqlite return the same results when the ordering is reversed. + // because we are generally using LIMIT (to make the test complete faster), we need to rerun the query + // without limit and then check that the results are the same if reversed. + let query_no_limit = + format!("SELECT * FROM t {} {} {}", where_clause, order_by, ""); + let limbo_no_limit = limbo_exec_rows(&dbs[i], &limbo_conns[i], &query_no_limit); + let sqlite_no_limit = sqlite_exec_rows(&sqlite_conn, &query_no_limit); + let limbo_rev = limbo_no_limit.iter().cloned().rev().collect::>(); + if limbo_rev == sqlite_no_limit { + continue; } - - // if the first constraint is =, then half the time, use the second index column in the ORDER BY too - let mut secondary_order_by = None; - let use_secondary_order_by = order_by.is_some() - && *comp == "=" - && second_idx_col_comp != "" - && rng.random_bool(0.5); - let full_order_by = if use_secondary_order_by { - secondary_order_by = - SECONDARY_ORDER_BY[rng.random_range(0..SECONDARY_ORDER_BY.len())]; - if let Some(secondary) = secondary_order_by { - format!("{}{}", order_by.unwrap_or(""), secondary,) - } else { - order_by.unwrap_or("").to_string() - } - } else { - order_by.unwrap_or("").to_string() - }; - - // There are certain cases where SQLite does not bother iterating in reverse order despite the ORDER BY. - // These cases include e.g. - // SELECT * FROM t WHERE x = 3 ORDER BY x DESC - // SELECT * FROM t WHERE x = 3 and y < 100 ORDER BY x DESC - // - // The common thread being that the ORDER BY column is also constrained by an equality predicate, meaning - // that it doesn't semantically matter what the ordering is. - // - // We do not currently replicate this "lazy" behavior, so in these cases we want the full result set and ensure - // that if the result is not exactly equal, then the ordering must be the exact reverse. - let allow_reverse_ordering = { - if *comp != "=" { - false - } else if secondary_order_by.is_some() { - second_idx_col_comp == "=" - } else { - true - } - }; - if allow_reverse_ordering { - // see comment above about ordering and the '=' comparison operator; omitting LIMIT for that reason - // we mainly have LIMIT here for performance reasons but for = we want to get all the rows to ensure - // correctness in the = case - limit = ""; - } - let query = format!( - // e.g. SELECT * FROM t WHERE x = 1 AND y > 2 ORDER BY x DESC LIMIT 5 - "SELECT * FROM t WHERE x {} {} {} {} {}", - comp, first_col_val, second_idx_col_cond, full_order_by, limit, + panic!( + "limbo: {:?}, sqlite: {:?}, seed: {}, query: {}", + limbo, sqlite, seed, query ); - log::debug!("query: {}", query); - let limbo = limbo_exec_rows(&db, &limbo_conn, &query); - let sqlite = sqlite_exec_rows(&sqlite_conn, &query); - let is_equal = limbo == sqlite; - if !is_equal { - if allow_reverse_ordering { - let limbo_row_count = limbo.len(); - let sqlite_row_count = sqlite.len(); - if limbo_row_count == sqlite_row_count { - let limbo_rev = limbo.iter().cloned().rev().collect::>(); - assert_eq!(limbo_rev, sqlite, "query: {}, limbo: {:?}, sqlite: {:?}, seed: {}, allow_reverse_ordering: {}", query, limbo, sqlite, seed, allow_reverse_ordering); - } else { - print_dump_on_fail(&insert, seed); - let error_msg = format!("row count mismatch (limbo row count: {}, sqlite row count: {}): query: {}, limbo: {:?}, sqlite: {:?}, seed: {}, allow_reverse_ordering: {}", limbo_row_count, sqlite_row_count, query, limbo, sqlite, seed, allow_reverse_ordering); - panic!("{}", error_msg); - } - } else { - print_dump_on_fail(&insert, seed); - panic!( - "query: {}, limbo row count: {}, limbo: {:?}, sqlite row count: {}, sqlite: {:?}, seed: {}, allow_reverse_ordering: {}", - query, limbo.len(), limbo, sqlite.len(), sqlite, seed, allow_reverse_ordering - ); - } - } } } } From af09025088356efd9bbb5743269c226758dbb9fb Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sun, 13 Apr 2025 15:15:12 +0300 Subject: [PATCH 269/425] schema: keep track of primary key column sort order --- core/schema.rs | 80 +++++++++++++++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 31 deletions(-) diff --git a/core/schema.rs b/core/schema.rs index ea6a26279..0a5a8d80f 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -158,7 +158,7 @@ impl PartialEq for Table { pub struct BTreeTable { pub root_page: usize, pub name: String, - pub primary_key_column_names: Vec, + pub primary_key_columns: Vec<(String, SortOrder)>, pub columns: Vec, pub has_rowid: bool, pub is_strict: bool, @@ -166,8 +166,8 @@ pub struct BTreeTable { impl BTreeTable { pub fn get_rowid_alias_column(&self) -> Option<(usize, &Column)> { - if self.primary_key_column_names.len() == 1 { - let (idx, col) = self.get_column(&self.primary_key_column_names[0]).unwrap(); + if self.primary_key_columns.len() == 1 { + let (idx, col) = self.get_column(&self.primary_key_columns[0].0).unwrap(); if self.column_is_rowid_alias(col) { return Some((idx, col)); } @@ -265,7 +265,7 @@ fn create_table( let table_name = normalize_ident(&tbl_name.name.0); trace!("Creating table {}", table_name); let mut has_rowid = true; - let mut primary_key_column_names = vec![]; + let mut primary_key_columns = vec![]; let mut cols = vec![]; let is_strict: bool; match body { @@ -282,7 +282,7 @@ fn create_table( } = c.constraint { for column in columns { - primary_key_column_names.push(match column.expr { + let col_name = match column.expr { Expr::Id(id) => normalize_ident(&id.0), Expr::Literal(Literal::String(value)) => { value.trim_matches('\'').to_owned() @@ -290,7 +290,9 @@ fn create_table( _ => { todo!("Unsupported primary key expression"); } - }); + }; + primary_key_columns + .push((col_name, column.order.unwrap_or(SortOrder::Asc))); } } } @@ -347,10 +349,17 @@ fn create_table( let mut default = None; let mut primary_key = false; let mut notnull = false; + let mut order = SortOrder::Asc; for c_def in &col_def.constraints { match &c_def.constraint { - limbo_sqlite3_parser::ast::ColumnConstraint::PrimaryKey { .. } => { + limbo_sqlite3_parser::ast::ColumnConstraint::PrimaryKey { + order: o, + .. + } => { primary_key = true; + if let Some(o) = o { + order = o.clone(); + } } limbo_sqlite3_parser::ast::ColumnConstraint::NotNull { .. } => { notnull = true; @@ -363,8 +372,11 @@ fn create_table( } if primary_key { - primary_key_column_names.push(name.clone()); - } else if primary_key_column_names.contains(&name) { + primary_key_columns.push((name.clone(), order)); + } else if primary_key_columns + .iter() + .any(|(col_name, _)| col_name == &name) + { primary_key = true; } @@ -386,7 +398,7 @@ fn create_table( }; // flip is_rowid_alias back to false if the table has multiple primary keys // or if the table has no rowid - if !has_rowid || primary_key_column_names.len() > 1 { + if !has_rowid || primary_key_columns.len() > 1 { for col in cols.iter_mut() { col.is_rowid_alias = false; } @@ -395,7 +407,7 @@ fn create_table( root_page, name: table_name, has_rowid, - primary_key_column_names, + primary_key_columns, columns: cols, is_strict, }) @@ -621,7 +633,7 @@ pub fn sqlite_schema_table() -> BTreeTable { name: "sqlite_schema".to_string(), has_rowid: true, is_strict: false, - primary_key_column_names: vec![], + primary_key_columns: vec![], columns: vec![ Column { name: Some("type".to_string()), @@ -740,16 +752,16 @@ impl Index { index_name: &str, root_page: usize, ) -> Result { - if table.primary_key_column_names.is_empty() { + if table.primary_key_columns.is_empty() { return Err(crate::LimboError::InternalError( "Cannot create automatic index for table without primary key".to_string(), )); } let index_columns = table - .primary_key_column_names + .primary_key_columns .iter() - .map(|col_name| { + .map(|(col_name, order)| { // Verify that each primary key column exists in the table let Some((pos_in_table, _)) = table.get_column(col_name) else { return Err(crate::LimboError::InternalError(format!( @@ -759,7 +771,7 @@ impl Index { }; Ok(IndexColumn { name: normalize_ident(col_name), - order: SortOrder::Asc, // Primary key indexes are always ascending + order: order.clone(), pos_in_table, }) }) @@ -905,8 +917,8 @@ mod tests { let column = table.get_column("c").unwrap().1; assert!(!column.primary_key, "column 'c' shouldn't be a primary key"); assert_eq!( - vec!["a"], - table.primary_key_column_names, + vec![("a".to_string(), SortOrder::Asc)], + table.primary_key_columns, "primary key column names should be ['a']" ); Ok(()) @@ -923,8 +935,11 @@ mod tests { let column = table.get_column("c").unwrap().1; assert!(!column.primary_key, "column 'c' shouldn't be a primary key"); assert_eq!( - vec!["a", "b"], - table.primary_key_column_names, + vec![ + ("a".to_string(), SortOrder::Asc), + ("b".to_string(), SortOrder::Asc) + ], + table.primary_key_columns, "primary key column names should be ['a', 'b']" ); Ok(()) @@ -932,7 +947,7 @@ mod tests { #[test] pub fn test_primary_key_separate_single() -> Result<()> { - let sql = r#"CREATE TABLE t1 (a INTEGER, b TEXT, c REAL, PRIMARY KEY(a));"#; + let sql = r#"CREATE TABLE t1 (a INTEGER, b TEXT, c REAL, PRIMARY KEY(a desc));"#; let table = BTreeTable::from_sql(sql, 0)?; let column = table.get_column("a").unwrap().1; assert!(column.primary_key, "column 'a' should be a primary key"); @@ -941,8 +956,8 @@ mod tests { let column = table.get_column("c").unwrap().1; assert!(!column.primary_key, "column 'c' shouldn't be a primary key"); assert_eq!( - vec!["a"], - table.primary_key_column_names, + vec![("a".to_string(), SortOrder::Desc)], + table.primary_key_columns, "primary key column names should be ['a']" ); Ok(()) @@ -950,7 +965,7 @@ mod tests { #[test] pub fn test_primary_key_separate_multiple() -> Result<()> { - let sql = r#"CREATE TABLE t1 (a INTEGER, b TEXT, c REAL, PRIMARY KEY(a, b));"#; + let sql = r#"CREATE TABLE t1 (a INTEGER, b TEXT, c REAL, PRIMARY KEY(a, b desc));"#; let table = BTreeTable::from_sql(sql, 0)?; let column = table.get_column("a").unwrap().1; assert!(column.primary_key, "column 'a' should be a primary key"); @@ -959,8 +974,11 @@ mod tests { let column = table.get_column("c").unwrap().1; assert!(!column.primary_key, "column 'c' shouldn't be a primary key"); assert_eq!( - vec!["a", "b"], - table.primary_key_column_names, + vec![ + ("a".to_string(), SortOrder::Asc), + ("b".to_string(), SortOrder::Desc) + ], + table.primary_key_columns, "primary key column names should be ['a', 'b']" ); Ok(()) @@ -977,8 +995,8 @@ mod tests { let column = table.get_column("c").unwrap().1; assert!(!column.primary_key, "column 'c' shouldn't be a primary key"); assert_eq!( - vec!["a"], - table.primary_key_column_names, + vec![("a".to_string(), SortOrder::Asc)], + table.primary_key_columns, "primary key column names should be ['a']" ); Ok(()) @@ -994,8 +1012,8 @@ mod tests { let column = table.get_column("c").unwrap().1; assert!(!column.primary_key, "column 'c' shouldn't be a primary key"); assert_eq!( - vec!["a"], - table.primary_key_column_names, + vec![("a".to_string(), SortOrder::Asc)], + table.primary_key_columns, "primary key column names should be ['a']" ); Ok(()) @@ -1143,7 +1161,7 @@ mod tests { name: "t1".to_string(), has_rowid: true, is_strict: false, - primary_key_column_names: vec!["nonexistent".to_string()], + primary_key_columns: vec![("nonexistent".to_string(), SortOrder::Asc)], columns: vec![Column { name: Some("a".to_string()), ty: Type::Integer, From b1073da4a5e02b56aa1a70fa6b1762acfd0905a9 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sun, 13 Apr 2025 15:17:55 +0300 Subject: [PATCH 270/425] btree: add support fo descending indexes --- core/storage/btree.rs | 73 ++++++++++++++++++++++++++++++++++--------- core/types.rs | 66 ++++++++++++++++++++++++++++++++++++-- core/vdbe/execute.rs | 17 +++++++--- 3 files changed, 135 insertions(+), 21 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 3fd3e3ec9..df3e2ae3f 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1,4 +1,5 @@ use crate::{ + schema::Index, storage::{ pager::Pager, sqlite3_ondisk::{ @@ -7,6 +8,7 @@ use crate::{ }, }, translate::plan::IterationDirection, + types::IndexKeySortOrder, MvCursor, }; @@ -364,6 +366,7 @@ pub struct BTreeCursor { /// Reusable immutable record, used to allow better allocation strategy. reusable_immutable_record: RefCell>, empty_record: Cell, + pub index_key_sort_order: IndexKeySortOrder, } impl BTreeCursor { @@ -388,9 +391,22 @@ impl BTreeCursor { }, reusable_immutable_record: RefCell::new(None), empty_record: Cell::new(true), + index_key_sort_order: IndexKeySortOrder::default(), } } + pub fn new_index( + mv_cursor: Option>>, + pager: Rc, + root_page: usize, + index: &Index, + ) -> Self { + let index_key_sort_order = IndexKeySortOrder::from_index(index); + let mut cursor = Self::new(mv_cursor, pager, root_page); + cursor.index_key_sort_order = index_key_sort_order; + cursor + } + /// Check if the table is empty. /// This is done by checking if the root page has no cells. fn is_empty_table(&self) -> Result> { @@ -547,8 +563,11 @@ impl BTreeCursor { let record_values = record.get_values(); let record_slice_same_num_cols = &record_values[..index_key.get_values().len()]; - let order = - compare_immutable(record_slice_same_num_cols, index_key.get_values()); + let order = compare_immutable( + record_slice_same_num_cols, + index_key.get_values(), + self.index_key_sort_order, + ); order }; @@ -602,8 +621,11 @@ impl BTreeCursor { let record_values = record.get_values(); let record_slice_same_num_cols = &record_values[..index_key.get_values().len()]; - let order = - compare_immutable(record_slice_same_num_cols, index_key.get_values()); + let order = compare_immutable( + record_slice_same_num_cols, + index_key.get_values(), + self.index_key_sort_order, + ); order }; let found = match op { @@ -849,10 +871,18 @@ impl BTreeCursor { let SeekKey::IndexKey(index_key) = key else { unreachable!("index seek key should be a record"); }; - let order = compare_immutable( - &self.get_immutable_record().as_ref().unwrap().get_values(), - index_key.get_values(), - ); + let order = { + let record = self.get_immutable_record(); + let record = record.as_ref().unwrap(); + let record_slice_same_num_cols = + &record.get_values()[..index_key.get_values().len()]; + let order = compare_immutable( + record_slice_same_num_cols, + index_key.get_values(), + self.index_key_sort_order, + ); + order + }; let found = match op { SeekOp::GT => order.is_gt(), SeekOp::GE => order.is_ge(), @@ -901,10 +931,18 @@ impl BTreeCursor { let SeekKey::IndexKey(index_key) = key else { unreachable!("index seek key should be a record"); }; - let order = compare_immutable( - &self.get_immutable_record().as_ref().unwrap().get_values(), - index_key.get_values(), - ); + let order = { + let record = self.get_immutable_record(); + let record = record.as_ref().unwrap(); + let record_slice_same_num_cols = + &record.get_values()[..index_key.get_values().len()]; + let order = compare_immutable( + record_slice_same_num_cols, + index_key.get_values(), + self.index_key_sort_order, + ); + order + }; let found = match op { SeekOp::GT => order.is_lt(), SeekOp::GE => order.is_le(), @@ -1031,7 +1069,11 @@ impl BTreeCursor { let record = record.as_ref().unwrap(); let record_slice_equal_number_of_cols = &record.get_values().as_slice()[..index_key.get_values().len()]; - let order = record_slice_equal_number_of_cols.cmp(index_key.get_values()); + let order = compare_immutable( + record_slice_equal_number_of_cols, + index_key.get_values(), + self.index_key_sort_order, + ); let found = match op { SeekOp::GT => order.is_gt(), SeekOp::GE => order.is_ge(), @@ -1278,6 +1320,7 @@ impl BTreeCursor { let interior_cell_vs_index_key = compare_immutable( record_slice_equal_number_of_cols, index_key.get_values(), + self.index_key_sort_order, ); // in sqlite btrees left child pages have <= keys. // in general, in forwards iteration we want to find the first key that matches the seek condition. @@ -1430,7 +1473,8 @@ impl BTreeCursor { self.get_immutable_record() .as_ref() .unwrap() - .get_values() + .get_values(), + self.index_key_sort_order, ) == Ordering::Equal { tracing::debug!("insert_into_page: found exact match with cell_idx={cell_idx}, overwriting"); @@ -3017,6 +3061,7 @@ impl BTreeCursor { let order = compare_immutable( key.to_index_key_values(), self.get_immutable_record().as_ref().unwrap().get_values(), + self.index_key_sort_order, ); match order { Ordering::Less | Ordering::Equal => { diff --git a/core/types.rs b/core/types.rs index 3d531adfe..045f13393 100644 --- a/core/types.rs +++ b/core/types.rs @@ -1,8 +1,10 @@ use limbo_ext::{AggCtx, FinalizeFunction, StepFunction}; +use limbo_sqlite3_parser::ast::SortOrder; use crate::error::LimboError; use crate::ext::{ExtValue, ExtValueType}; use crate::pseudo::PseudoCursor; +use crate::schema::Index; use crate::storage::btree::BTreeCursor; use crate::storage::sqlite3_ondisk::write_varint; use crate::translate::plan::IterationDirection; @@ -1043,8 +1045,58 @@ impl PartialOrd for RefValue { } } -pub fn compare_immutable(l: &[RefValue], r: &[RefValue]) -> std::cmp::Ordering { - l.partial_cmp(r).unwrap() +/// A bitfield that represents the comparison spec for index keys. +/// Since indexed columns can individually specify ASC/DESC, each key must +/// be compared differently. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +#[repr(transparent)] +pub struct IndexKeySortOrder(u64); + +impl IndexKeySortOrder { + pub fn get_sort_order_for_col(&self, column_idx: usize) -> SortOrder { + assert!(column_idx < 64, "column index out of range: {}", column_idx); + match self.0 & (1 << column_idx) { + 0 => SortOrder::Asc, + _ => SortOrder::Desc, + } + } + + pub fn from_index(index: &Index) -> Self { + let mut spec = 0; + for (i, column) in index.columns.iter().enumerate() { + spec |= ((column.order == SortOrder::Desc) as u64) << i; + } + IndexKeySortOrder(spec) + } + + pub fn default() -> Self { + Self(0) + } +} + +impl Default for IndexKeySortOrder { + fn default() -> Self { + Self::default() + } +} + +pub fn compare_immutable( + l: &[RefValue], + r: &[RefValue], + index_key_sort_order: IndexKeySortOrder, +) -> std::cmp::Ordering { + assert_eq!(l.len(), r.len()); + for (i, (l, r)) in l.iter().zip(r).enumerate() { + let column_order = index_key_sort_order.get_sort_order_for_col(i); + let cmp = l.partial_cmp(r).unwrap(); + if !cmp.is_eq() { + return match column_order { + SortOrder::Asc => cmp, + SortOrder::Desc => cmp.reverse(), + }; + } + } + std::cmp::Ordering::Equal } const I8_LOW: i64 = -128; @@ -1253,6 +1305,16 @@ impl SeekOp { SeekOp::LE | SeekOp::LT => IterationDirection::Backwards, } } + + pub fn reverse(&self) -> Self { + match self { + SeekOp::EQ => SeekOp::EQ, + SeekOp::GE => SeekOp::LE, + SeekOp::GT => SeekOp::LT, + SeekOp::LE => SeekOp::GE, + SeekOp::LT => SeekOp::GT, + } + } } #[derive(Clone, PartialEq, Debug)] diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 41268a342..263ce491a 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -839,16 +839,18 @@ pub fn op_open_read( } None => None, }; - let cursor = BTreeCursor::new(mv_cursor, pager.clone(), *root_page); let mut cursors = state.cursors.borrow_mut(); match cursor_type { CursorType::BTreeTable(_) => { + let cursor = BTreeCursor::new(mv_cursor, pager.clone(), *root_page); cursors .get_mut(*cursor_id) .unwrap() .replace(Cursor::new_btree(cursor)); } - CursorType::BTreeIndex(_) => { + CursorType::BTreeIndex(index) => { + let cursor = + BTreeCursor::new_index(mv_cursor, pager.clone(), *root_page, index.as_ref()); cursors .get_mut(*cursor_id) .unwrap() @@ -3979,7 +3981,10 @@ pub fn op_open_write( }; let (_, cursor_type) = program.cursor_ref.get(*cursor_id).unwrap(); let mut cursors = state.cursors.borrow_mut(); - let is_index = cursor_type.is_index(); + let maybe_index = match cursor_type { + CursorType::BTreeIndex(index) => Some(index), + _ => None, + }; let mv_cursor = match state.mv_tx_id { Some(tx_id) => { let table_id = root_page; @@ -3991,13 +3996,15 @@ pub fn op_open_write( } None => None, }; - let cursor = BTreeCursor::new(mv_cursor, pager.clone(), root_page as usize); - if is_index { + if let Some(index) = maybe_index { + let cursor = + BTreeCursor::new_index(mv_cursor, pager.clone(), root_page as usize, index.as_ref()); cursors .get_mut(*cursor_id) .unwrap() .replace(Cursor::new_btree(cursor)); } else { + let cursor = BTreeCursor::new(mv_cursor, pager.clone(), root_page as usize); cursors .get_mut(*cursor_id) .unwrap() From 1189b7a288a4afae6969ac2487f058cb494e54c6 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sun, 13 Apr 2025 15:18:15 +0300 Subject: [PATCH 271/425] codegen: add support for descending indexes --- core/translate/main_loop.rs | 79 ++++++-- core/translate/optimizer.rs | 375 +++++++++++++++++++++++++++--------- core/translate/plan.rs | 46 ++--- core/vdbe/execute.rs | 30 +-- 4 files changed, 377 insertions(+), 153 deletions(-) diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 1b709e0d3..76057c53b 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -845,27 +845,37 @@ fn emit_seek( is_index: bool, ) -> Result<()> { let Some(seek) = seek_def.seek.as_ref() else { - assert!(seek_def.iter_dir == IterationDirection::Backwards, "A SeekDef without a seek operation should only be used in backwards iteration direction"); - program.emit_insn(Insn::Last { - cursor_id: seek_cursor_id, - pc_if_empty: loop_end, - }); + // If there is no seek key, we start from the first or last row of the index, + // depending on the iteration direction. + match seek_def.iter_dir { + IterationDirection::Forwards => { + program.emit_insn(Insn::Rewind { + cursor_id: seek_cursor_id, + pc_if_empty: loop_end, + }); + } + IterationDirection::Backwards => { + program.emit_insn(Insn::Last { + cursor_id: seek_cursor_id, + pc_if_empty: loop_end, + }); + } + } return Ok(()); }; // We allocated registers for the full index key, but our seek key might not use the full index key. - // Later on for the termination condition we will overwrite the NULL registers. // See [crate::translate::optimizer::build_seek_def] for more details about in which cases we do and don't use the full index key. for i in 0..seek_def.key.len() { let reg = start_reg + i; if i >= seek.len { - if seek_def.null_pad_unset_cols() { + if seek.null_pad { program.emit_insn(Insn::Null { dest: reg, dest_end: None, }); } } else { - let expr = &seek_def.key[i]; + let expr = &seek_def.key[i].0; translate_expr(program, Some(tables), &expr, reg, &t_ctx.resolver)?; // If the seek key column is not verifiably non-NULL, we need check whether it is NULL, // and if so, jump to the loop end. @@ -879,7 +889,7 @@ fn emit_seek( } } } - let num_regs = if seek_def.null_pad_unset_cols() { + let num_regs = if seek.null_pad { seek_def.key.len() } else { seek.len @@ -943,19 +953,46 @@ fn emit_seek_termination( program.resolve_label(loop_start, program.offset()); return Ok(()); }; - let num_regs = termination.len; - // If the seek termination was preceded by a seek (which happens in most cases), - // we can re-use the registers that were allocated for the full index key. - let start_idx = seek_def.seek.as_ref().map_or(0, |seek| seek.len); - for i in start_idx..termination.len { + + // How many non-NULL values were used for seeking. + let seek_len = seek_def.seek.as_ref().map_or(0, |seek| seek.len); + + // How many values will be used for the termination condition. + let num_regs = if termination.null_pad { + seek_def.key.len() + } else { + termination.len + }; + for i in 0..seek_def.key.len() { let reg = start_reg + i; - translate_expr( - program, - Some(tables), - &seek_def.key[i], - reg, - &t_ctx.resolver, - )?; + let is_last = i == seek_def.key.len() - 1; + + // For all index key values apart from the last one, we are guaranteed to use the same values + // as were used for the seek, so we don't need to emit them again. + if i < seek_len && !is_last { + continue; + } + // For the last index key value, we need to emit a NULL if the termination condition is NULL-padded. + // See [SeekKey::null_pad] and [crate::translate::optimizer::build_seek_def] for why this is the case. + if i >= termination.len && !termination.null_pad { + continue; + } + if is_last && termination.null_pad { + program.emit_insn(Insn::Null { + dest: reg, + dest_end: None, + }); + // if the seek key is shorter than the termination key, we need to translate the remaining suffix of the termination key. + // if not, we just reuse what was emitted for the seek. + } else if seek_len < termination.len { + translate_expr( + program, + Some(tables), + &seek_def.key[i].0, + reg, + &t_ctx.resolver, + )?; + } } program.resolve_label(loop_start, program.offset()); let mut rowid_reg = None; diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index c4bf12810..80875da91 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -779,6 +779,7 @@ pub fn try_extract_index_search_from_where_clause( pub struct IndexConstraint { position_in_where_clause: (usize, BinaryExprSide), operator: ast::Operator, + index_column_sort_order: SortOrder, } /// Helper enum for [IndexConstraint] to indicate which side of a binary comparison expression is being compared to the index column. @@ -898,6 +899,7 @@ fn find_index_constraints( out_constraints.push(IndexConstraint { operator: *operator, position_in_where_clause: (position_in_where_clause, BinaryExprSide::Rhs), + index_column_sort_order: index.columns[position_in_index].order, }); found = true; break; @@ -907,6 +909,7 @@ fn find_index_constraints( out_constraints.push(IndexConstraint { operator: opposite_cmp_op(*operator), // swap the operator since e.g. if condition is 5 >= x, we want to use x <= 5 position_in_where_clause: (position_in_where_clause, BinaryExprSide::Lhs), + index_column_sort_order: index.columns[position_in_index].order, }); found = true; break; @@ -963,7 +966,7 @@ pub fn build_seek_def_from_index_constraints( } else { *rhs }; - key.push(cmp_expr); + key.push((cmp_expr, constraint.index_column_sort_order)); } // We know all but potentially the last term is an equality, so we can use the operator of the last term @@ -995,46 +998,80 @@ pub fn build_seek_def_from_index_constraints( /// 2. In contrast, having (x=10 AND y>20) forms a valid index key GT(x:10, y:20) because after the seek, we can simply terminate as soon as x > 10, /// i.e. use GT(x:10, y:20) as the [SeekKey] and GT(x:10) as the [TerminationKey]. /// +/// The preceding examples are for an ascending index. The logic is similar for descending indexes, but an important distinction is that +/// since a descending index is laid out in reverse order, the comparison operators are reversed, e.g. LT becomes GT, LE becomes GE, etc. +/// So when you see e.g. a SeekOp::GT below for a descending index, it actually means that we are seeking the first row where the index key is LESS than the seek key. +/// fn build_seek_def( op: ast::Operator, iter_dir: IterationDirection, - key: Vec, + key: Vec<(ast::Expr, SortOrder)>, ) -> Result { let key_len = key.len(); + let sort_order_of_last_key = key.last().unwrap().1; + + // For the commented examples below, keep in mind that since a descending index is laid out in reverse order, the comparison operators are reversed, e.g. LT becomes GT, LE becomes GE, etc. + // Also keep in mind that index keys are compared based on the number of columns given, so for example: + // - if key is GT(x:10), then (x=10, y=usize::MAX) is not GT because only X is compared. (x=11, y=) is GT. + // - if key is GT(x:10, y:20), then (x=10, y=21) is GT because both X and Y are compared. + // - if key is GT(x:10, y:NULL), then (x=10, y=0) is GT because NULL is always LT in index key comparisons. Ok(match (iter_dir, op) { // Forwards, EQ: // Example: (x=10 AND y=20) - // Seek key: GE(x:10, y:20) - // Termination key: GT(x:10, y:20) + // Seek key: start from the first GE(x:10, y:20) + // Termination key: end at the first GT(x:10, y:20) + // Ascending vs descending doesn't matter because all the comparisons are equalities. (IterationDirection::Forwards, ast::Operator::Equals) => SeekDef { key, iter_dir, seek: Some(SeekKey { len: key_len, + null_pad: false, op: SeekOp::GE, }), termination: Some(TerminationKey { len: key_len, + null_pad: false, op: SeekOp::GT, }), }, // Forwards, GT: - // Example: (x=10 AND y>20) - // Seek key: GT(x:10, y:20) - // Termination key: GT(x:10) + // Ascending index example: (x=10 AND y>20) + // Seek key: start from the first GT(x:10, y:20), e.g. (x=10, y=21) + // Termination key: end at the first GT(x:10), e.g. (x=11, y=0) + // + // Descending index example: (x=10 AND y>20) + // Seek key: start from the first LE(x:10), e.g. (x=10, y=usize::MAX), so reversed -> GE(x:10) + // Termination key: end at the first LE(x:10, y:20), e.g. (x=10, y=20) so reversed -> GE(x:10, y:20) (IterationDirection::Forwards, ast::Operator::Greater) => { - let termination_key_len = key_len - 1; + let (seek_key_len, termination_key_len, seek_op, termination_op) = + if sort_order_of_last_key == SortOrder::Asc { + (key_len, key_len - 1, SeekOp::GT, SeekOp::GT) + } else { + ( + key_len - 1, + key_len, + SeekOp::LE.reverse(), + SeekOp::LE.reverse(), + ) + }; SeekDef { key, iter_dir, - seek: Some(SeekKey { - len: key_len, - op: SeekOp::GT, - }), + seek: if seek_key_len > 0 { + Some(SeekKey { + len: seek_key_len, + op: seek_op, + null_pad: false, + }) + } else { + None + }, termination: if termination_key_len > 0 { Some(TerminationKey { len: termination_key_len, - op: SeekOp::GT, + op: termination_op, + null_pad: false, }) } else { None @@ -1042,22 +1079,42 @@ fn build_seek_def( } } // Forwards, GE: - // Example: (x=10 AND y>=20) - // Seek key: GE(x:10, y:20) - // Termination key: GT(x:10) + // Ascending index example: (x=10 AND y>=20) + // Seek key: start from the first GE(x:10, y:20), e.g. (x=10, y=20) + // Termination key: end at the first GT(x:10), e.g. (x=11, y=0) + // + // Descending index example: (x=10 AND y>=20) + // Seek key: start from the first LE(x:10), e.g. (x=10, y=usize::MAX), so reversed -> GE(x:10) + // Termination key: end at the first LT(x:10, y:20), e.g. (x=10, y=19), so reversed -> GT(x:10, y:20) (IterationDirection::Forwards, ast::Operator::GreaterEquals) => { - let termination_key_len = key_len - 1; + let (seek_key_len, termination_key_len, seek_op, termination_op) = + if sort_order_of_last_key == SortOrder::Asc { + (key_len, key_len - 1, SeekOp::GE, SeekOp::GT) + } else { + ( + key_len - 1, + key_len, + SeekOp::LE.reverse(), + SeekOp::LT.reverse(), + ) + }; SeekDef { key, iter_dir, - seek: Some(SeekKey { - len: key_len, - op: SeekOp::GE, - }), + seek: if seek_key_len > 0 { + Some(SeekKey { + len: seek_key_len, + op: seek_op, + null_pad: false, + }) + } else { + None + }, termination: if termination_key_len > 0 { Some(TerminationKey { len: termination_key_len, - op: SeekOp::GT, + op: termination_op, + null_pad: false, }) } else { None @@ -1065,70 +1122,142 @@ fn build_seek_def( } } // Forwards, LT: - // Example: (x=10 AND y<20) - // Seek key: GT(x:10, y: NULL) // NULL is always LT, indicating we only care about x - // Termination key: GE(x:10, y:20) - (IterationDirection::Forwards, ast::Operator::Less) => SeekDef { - key, - iter_dir, - seek: Some(SeekKey { - len: key_len - 1, - op: SeekOp::GT, - }), - termination: Some(TerminationKey { - len: key_len, - op: SeekOp::GE, - }), - }, + // Ascending index example: (x=10 AND y<20) + // Seek key: start from the first GT(x:10, y: NULL), e.g. (x=10, y=0) + // Termination key: end at the first GE(x:10, y:20), e.g. (x=10, y=20) + // + // Descending index example: (x=10 AND y<20) + // Seek key: start from the first LT(x:10, y:20), e.g. (x=10, y=19), so reversed -> GT(x:10, y:20) + // Termination key: end at the first LT(x:10), e.g. (x=9, y=usize::MAX), so reversed -> GE(x:10, NULL); i.e. GE the smallest possible (x=10, y) combination (NULL is always LT) + (IterationDirection::Forwards, ast::Operator::Less) => { + let (seek_key_len, termination_key_len, seek_op, termination_op) = + if sort_order_of_last_key == SortOrder::Asc { + (key_len - 1, key_len, SeekOp::GT, SeekOp::GE) + } else { + (key_len, key_len - 1, SeekOp::GT, SeekOp::GE) + }; + SeekDef { + key, + iter_dir, + seek: if seek_key_len > 0 { + Some(SeekKey { + len: seek_key_len, + op: seek_op, + null_pad: sort_order_of_last_key == SortOrder::Asc, + }) + } else { + None + }, + termination: if termination_key_len > 0 { + Some(TerminationKey { + len: termination_key_len, + op: termination_op, + null_pad: sort_order_of_last_key == SortOrder::Desc, + }) + } else { + None + }, + } + } // Forwards, LE: - // Example: (x=10 AND y<=20) - // Seek key: GE(x:10, y:NULL) // NULL is always LT, indicating we only care about x - // Termination key: GT(x:10, y:20) - (IterationDirection::Forwards, ast::Operator::LessEquals) => SeekDef { - key, - iter_dir, - seek: Some(SeekKey { - len: key_len - 1, - op: SeekOp::GE, - }), - termination: Some(TerminationKey { - len: key_len, - op: SeekOp::GT, - }), - }, + // Ascending index example: (x=10 AND y<=20) + // Seek key: start from the first GE(x:10, y:NULL), e.g. (x=10, y=0) + // Termination key: end at the first GT(x:10, y:20), e.g. (x=10, y=21) + // + // Descending index example: (x=10 AND y<=20) + // Seek key: start from the first LE(x:10, y:20), e.g. (x=10, y=20) so reversed -> GE(x:10, y:20) + // Termination key: end at the first LT(x:10), e.g. (x=9, y=usize::MAX), so reversed -> GE(x:10, NULL); i.e. GE the smallest possible (x=10, y) combination (NULL is always LT) + (IterationDirection::Forwards, ast::Operator::LessEquals) => { + let (seek_key_len, termination_key_len, seek_op, termination_op) = + if sort_order_of_last_key == SortOrder::Asc { + (key_len - 1, key_len, SeekOp::GT, SeekOp::GT) + } else { + ( + key_len, + key_len - 1, + SeekOp::LE.reverse(), + SeekOp::LE.reverse(), + ) + }; + SeekDef { + key, + iter_dir, + seek: if seek_key_len > 0 { + Some(SeekKey { + len: seek_key_len, + op: seek_op, + null_pad: sort_order_of_last_key == SortOrder::Asc, + }) + } else { + None + }, + termination: if termination_key_len > 0 { + Some(TerminationKey { + len: termination_key_len, + op: termination_op, + null_pad: sort_order_of_last_key == SortOrder::Desc, + }) + } else { + None + }, + } + } // Backwards, EQ: // Example: (x=10 AND y=20) - // Seek key: LE(x:10, y:20) - // Termination key: LT(x:10, y:20) + // Seek key: start from the last LE(x:10, y:20) + // Termination key: end at the first LT(x:10, y:20) + // Ascending vs descending doesn't matter because all the comparisons are equalities. (IterationDirection::Backwards, ast::Operator::Equals) => SeekDef { key, iter_dir, seek: Some(SeekKey { len: key_len, op: SeekOp::LE, + null_pad: false, }), termination: Some(TerminationKey { len: key_len, op: SeekOp::LT, + null_pad: false, }), }, // Backwards, LT: - // Example: (x=10 AND y<20) - // Seek key: LT(x:10, y:20) - // Termination key: LT(x:10) + // Ascending index example: (x=10 AND y<20) + // Seek key: start from the last LT(x:10, y:20), e.g. (x=10, y=19) + // Termination key: end at the first LE(x:10, NULL), e.g. (x=9, y=usize::MAX) + // + // Descending index example: (x=10 AND y<20) + // Seek key: start from the last GT(x:10, y:NULL), e.g. (x=10, y=0) so reversed -> LT(x:10, NULL) + // Termination key: end at the first GE(x:10, y:20), e.g. (x=10, y=20) so reversed -> LE(x:10, y:20) (IterationDirection::Backwards, ast::Operator::Less) => { - let termination_key_len = key_len - 1; + let (seek_key_len, termination_key_len, seek_op, termination_op) = + if sort_order_of_last_key == SortOrder::Asc { + (key_len, key_len - 1, SeekOp::LT, SeekOp::LE) + } else { + ( + key_len - 1, + key_len, + SeekOp::GT.reverse(), + SeekOp::GE.reverse(), + ) + }; SeekDef { key, iter_dir, - seek: Some(SeekKey { - len: key_len, - op: SeekOp::LT, - }), + seek: if seek_key_len > 0 { + Some(SeekKey { + len: seek_key_len, + op: seek_op, + null_pad: sort_order_of_last_key == SortOrder::Desc, + }) + } else { + None + }, termination: if termination_key_len > 0 { Some(TerminationKey { len: termination_key_len, - op: SeekOp::LT, + op: termination_op, + null_pad: sort_order_of_last_key == SortOrder::Asc, }) } else { None @@ -1136,22 +1265,42 @@ fn build_seek_def( } } // Backwards, LE: - // Example: (x=10 AND y<=20) - // Seek key: LE(x:10, y:20) - // Termination key: LT(x:10) + // Ascending index example: (x=10 AND y<=20) + // Seek key: start from the last LE(x:10, y:20), e.g. (x=10, y=20) + // Termination key: end at the first LT(x:10, NULL), e.g. (x=9, y=usize::MAX) + // + // Descending index example: (x=10 AND y<=20) + // Seek key: start from the last GT(x:10, NULL), e.g. (x=10, y=0) so reversed -> LT(x:10, NULL) + // Termination key: end at the first GT(x:10, y:20), e.g. (x=10, y=21) so reversed -> LT(x:10, y:20) (IterationDirection::Backwards, ast::Operator::LessEquals) => { - let termination_key_len = key_len - 1; + let (seek_key_len, termination_key_len, seek_op, termination_op) = + if sort_order_of_last_key == SortOrder::Asc { + (key_len, key_len - 1, SeekOp::LE, SeekOp::LE) + } else { + ( + key_len - 1, + key_len, + SeekOp::GT.reverse(), + SeekOp::GT.reverse(), + ) + }; SeekDef { key, iter_dir, - seek: Some(SeekKey { - len: key_len, - op: SeekOp::LE, - }), + seek: if seek_key_len > 0 { + Some(SeekKey { + len: seek_key_len, + op: seek_op, + null_pad: sort_order_of_last_key == SortOrder::Desc, + }) + } else { + None + }, termination: if termination_key_len > 0 { Some(TerminationKey { len: termination_key_len, - op: SeekOp::LT, + op: termination_op, + null_pad: sort_order_of_last_key == SortOrder::Asc, }) } else { None @@ -1159,49 +1308,89 @@ fn build_seek_def( } } // Backwards, GT: - // Example: (x=10 AND y>20) - // Seek key: LE(x:10) // try to find the last row where x = 10, not considering y at all. - // Termination key: LE(x:10, y:20) + // Ascending index example: (x=10 AND y>20) + // Seek key: start from the last LE(x:10), e.g. (x=10, y=usize::MAX) + // Termination key: end at the first LE(x:10, y:20), e.g. (x=10, y=20) + // + // Descending index example: (x=10 AND y>20) + // Seek key: start from the last GT(x:10, y:20), e.g. (x=10, y=21) so reversed -> LT(x:10, y:20) + // Termination key: end at the first GT(x:10), e.g. (x=11, y=0) so reversed -> LT(x:10) (IterationDirection::Backwards, ast::Operator::Greater) => { - let seek_key_len = key_len - 1; + let (seek_key_len, termination_key_len, seek_op, termination_op) = + if sort_order_of_last_key == SortOrder::Asc { + (key_len - 1, key_len, SeekOp::LE, SeekOp::LE) + } else { + ( + key_len, + key_len - 1, + SeekOp::GT.reverse(), + SeekOp::GT.reverse(), + ) + }; SeekDef { key, iter_dir, seek: if seek_key_len > 0 { Some(SeekKey { len: seek_key_len, - op: SeekOp::LE, + op: seek_op, + null_pad: false, + }) + } else { + None + }, + termination: if termination_key_len > 0 { + Some(TerminationKey { + len: termination_key_len, + op: termination_op, + null_pad: false, }) } else { None }, - termination: Some(TerminationKey { - len: key_len, - op: SeekOp::LE, - }), } } // Backwards, GE: - // Example: (x=10 AND y>=20) - // Seek key: LE(x:10) // try to find the last row where x = 10, not considering y at all. - // Termination key: LT(x:10, y:20) + // Ascending index example: (x=10 AND y>=20) + // Seek key: start from the last LE(x:10), e.g. (x=10, y=usize::MAX) + // Termination key: end at the first LT(x:10, y:20), e.g. (x=10, y=19) + // + // Descending index example: (x=10 AND y>=20) + // Seek key: start from the last GE(x:10, y:20), e.g. (x=10, y=20) so reversed -> LE(x:10, y:20) + // Termination key: end at the first GT(x:10), e.g. (x=11, y=0) so reversed -> LT(x:10) (IterationDirection::Backwards, ast::Operator::GreaterEquals) => { - let seek_key_len = key_len - 1; + let (seek_key_len, termination_key_len, seek_op, termination_op) = + if sort_order_of_last_key == SortOrder::Asc { + (key_len - 1, key_len, SeekOp::LE, SeekOp::LT) + } else { + ( + key_len, + key_len - 1, + SeekOp::GE.reverse(), + SeekOp::GT.reverse(), + ) + }; SeekDef { key, iter_dir, seek: if seek_key_len > 0 { Some(SeekKey { len: seek_key_len, - op: SeekOp::LE, + op: seek_op, + null_pad: false, + }) + } else { + None + }, + termination: if termination_key_len > 0 { + Some(TerminationKey { + len: termination_key_len, + op: termination_op, + null_pad: false, }) } else { None }, - termination: Some(TerminationKey { - len: key_len, - op: SeekOp::LT, - }), } } (_, op) => { @@ -1252,7 +1441,8 @@ pub fn try_extract_rowid_search_expression( | ast::Operator::Less | ast::Operator::LessEquals => { let rhs_owned = rhs.take_ownership(); - let seek_def = build_seek_def(*operator, iter_dir, vec![rhs_owned])?; + let seek_def = + build_seek_def(*operator, iter_dir, vec![(rhs_owned, SortOrder::Asc)])?; return Ok(Some(Search::Seek { index: None, seek_def, @@ -1280,7 +1470,8 @@ pub fn try_extract_rowid_search_expression( | ast::Operator::LessEquals => { let lhs_owned = lhs.take_ownership(); let op = opposite_cmp_op(*operator); - let seek_def = build_seek_def(op, iter_dir, vec![lhs_owned])?; + let seek_def = + build_seek_def(op, iter_dir, vec![(lhs_owned, SortOrder::Asc)])?; return Ok(Some(Search::Seek { index: None, seek_def, diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 038dd90ee..bb581ab13 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -1,5 +1,5 @@ use core::fmt; -use limbo_sqlite3_parser::ast; +use limbo_sqlite3_parser::ast::{self, SortOrder}; use std::{ cmp::Ordering, fmt::{Display, Formatter}, @@ -391,10 +391,10 @@ impl TableReference { pub struct SeekDef { /// The key to use when seeking and when terminating the scan that follows the seek. /// For example, given: - /// - CREATE INDEX i ON t (x, y) + /// - CREATE INDEX i ON t (x, y desc) /// - SELECT * FROM t WHERE x = 1 AND y >= 30 - /// The key is [1, 30] - pub key: Vec, + /// The key is [(1, ASC), (30, DESC)] + pub key: Vec<(ast::Expr, SortOrder)>, /// The condition to use when seeking. See [SeekKey] for more details. pub seek: Option, /// The condition to use when terminating the scan that follows the seek. See [TerminationKey] for more details. @@ -403,35 +403,22 @@ pub struct SeekDef { pub iter_dir: IterationDirection, } -impl SeekDef { - /// Whether we should null pad unset columns when seeking. - /// This is only done for forward seeks. - /// The reason it is done is that sometimes our full index key is not used in seeking. - /// See [SeekKey] for more details. - /// - /// For example, given: - /// - CREATE INDEX i ON t (x, y) - /// - SELECT * FROM t WHERE x = 1 AND y < 30 - /// We want to seek to the first row where x = 1, and then iterate forwards. - /// In this case, the seek key is GT(1, NULL) since '30' cannot be used to seek (since we want y < 30), - /// and any value of y will be greater than NULL. - /// - /// In backwards iteration direction, we do not null pad because we want to seek to the last row that matches the seek key. - /// For example, given: - /// - CREATE INDEX i ON t (x, y) - /// - SELECT * FROM t WHERE x = 1 AND y > 30 ORDER BY y - /// We want to seek to the last row where x = 1, and then iterate backwards. - /// In this case, the seek key is just LE(1) so any row with x = 1 will be a match. - pub fn null_pad_unset_cols(&self) -> bool { - self.iter_dir == IterationDirection::Forwards - } -} - /// A condition to use when seeking. #[derive(Debug, Clone)] pub struct SeekKey { /// How many columns from [SeekDef::key] are used in seeking. pub len: usize, + /// Whether to NULL pad the last column of the seek key to match the length of [SeekDef::key]. + /// The reason it is done is that sometimes our full index key is not used in seeking, + /// but we want to find the lowest value that matches the non-null prefix of the key. + /// For example, given: + /// - CREATE INDEX i ON t (x, y) + /// - SELECT * FROM t WHERE x = 1 AND y < 30 + /// We want to seek to the first row where x = 1, and then iterate forwards. + /// In this case, the seek key is GT(1, NULL) since NULL is always LT in index key comparisons. + /// We can't use just GT(1) because in index key comparisons, only the given number of columns are compared, + /// so this means any index keys with (x=1) will compare equal, e.g. (x=1, y=usize::MAX) will compare equal to the seek key (x:1) + pub null_pad: bool, /// The comparison operator to use when seeking. pub op: SeekOp, } @@ -441,6 +428,9 @@ pub struct SeekKey { pub struct TerminationKey { /// How many columns from [SeekDef::key] are used in terminating the scan that follows the seek. pub len: usize, + /// Whether to NULL pad the last column of the termination key to match the length of [SeekDef::key]. + /// See [SeekKey::null_pad]. + pub null_pad: bool, /// The comparison operator to use when terminating the scan that follows the seek. pub op: SeekOp, } diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 263ce491a..d00ee6129 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -12,6 +12,7 @@ use crate::{ }, printf::exec_printf, }, + types::compare_immutable, }; use std::{borrow::BorrowMut, rc::Rc, sync::Arc}; @@ -2053,9 +2054,11 @@ pub fn op_idx_ge( let record_from_regs = make_record(&state.registers, start_reg, num_regs); let pc = if let Some(ref idx_record) = *cursor.record() { // Compare against the same number of values - let ord = idx_record.get_values()[..record_from_regs.len()] - .partial_cmp(&record_from_regs.get_values()[..]) - .unwrap(); + let idx_values = idx_record.get_values(); + let idx_values = &idx_values[..record_from_regs.len()]; + let record_values = record_from_regs.get_values(); + let record_values = &record_values[..idx_values.len()]; + let ord = compare_immutable(&idx_values, &record_values, cursor.index_key_sort_order); if ord.is_ge() { target_pc.to_offset_int() } else { @@ -2111,9 +2114,10 @@ pub fn op_idx_le( let record_from_regs = make_record(&state.registers, start_reg, num_regs); let pc = if let Some(ref idx_record) = *cursor.record() { // Compare against the same number of values - let ord = idx_record.get_values()[..record_from_regs.len()] - .partial_cmp(&record_from_regs.get_values()[..]) - .unwrap(); + let idx_values = idx_record.get_values(); + let idx_values = &idx_values[..record_from_regs.len()]; + let record_values = record_from_regs.get_values(); + let ord = compare_immutable(&idx_values, &record_values, cursor.index_key_sort_order); if ord.is_le() { target_pc.to_offset_int() } else { @@ -2151,9 +2155,10 @@ pub fn op_idx_gt( let record_from_regs = make_record(&state.registers, start_reg, num_regs); let pc = if let Some(ref idx_record) = *cursor.record() { // Compare against the same number of values - let ord = idx_record.get_values()[..record_from_regs.len()] - .partial_cmp(&record_from_regs.get_values()[..]) - .unwrap(); + let idx_values = idx_record.get_values(); + let idx_values = &idx_values[..record_from_regs.len()]; + let record_values = record_from_regs.get_values(); + let ord = compare_immutable(&idx_values, &record_values, cursor.index_key_sort_order); if ord.is_gt() { target_pc.to_offset_int() } else { @@ -2191,9 +2196,10 @@ pub fn op_idx_lt( let record_from_regs = make_record(&state.registers, start_reg, num_regs); let pc = if let Some(ref idx_record) = *cursor.record() { // Compare against the same number of values - let ord = idx_record.get_values()[..record_from_regs.len()] - .partial_cmp(&record_from_regs.get_values()[..]) - .unwrap(); + let idx_values = idx_record.get_values(); + let idx_values = &idx_values[..record_from_regs.len()]; + let record_values = record_from_regs.get_values(); + let ord = compare_immutable(&idx_values, &record_values, cursor.index_key_sort_order); if ord.is_lt() { target_pc.to_offset_int() } else { From 38dab4c1840dd9fadf48e3f81e758d123bea7e8e Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Wed, 16 Apr 2025 14:00:17 +0300 Subject: [PATCH 272/425] Limbo 0.0.19-pre.5 --- Cargo.lock | 48 +++++++++---------- Cargo.toml | 28 +++++------ .../npm/darwin-universal/package.json | 2 +- .../javascript/npm/linux-x64-gnu/package.json | 2 +- .../npm/win32-x64-msvc/package.json | 2 +- bindings/javascript/package.json | 2 +- bindings/wasm/package-lock.json | 4 +- bindings/wasm/package.json | 2 +- 8 files changed, 45 insertions(+), 45 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 410bf6e34..cf145695a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -512,7 +512,7 @@ checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "core_tester" -version = "0.0.19-pre.4" +version = "0.0.19-pre.5" dependencies = [ "anyhow", "assert_cmd", @@ -1653,7 +1653,7 @@ dependencies = [ [[package]] name = "limbo" -version = "0.0.19-pre.4" +version = "0.0.19-pre.5" dependencies = [ "limbo_core", "thiserror 2.0.12", @@ -1662,14 +1662,14 @@ dependencies = [ [[package]] name = "limbo-go" -version = "0.0.19-pre.4" +version = "0.0.19-pre.5" dependencies = [ "limbo_core", ] [[package]] name = "limbo-java" -version = "0.0.19-pre.4" +version = "0.0.19-pre.5" dependencies = [ "jni", "limbo_core", @@ -1678,7 +1678,7 @@ dependencies = [ [[package]] name = "limbo-wasm" -version = "0.0.19-pre.4" +version = "0.0.19-pre.5" dependencies = [ "console_error_panic_hook", "getrandom 0.2.15", @@ -1691,7 +1691,7 @@ dependencies = [ [[package]] name = "limbo_cli" -version = "0.0.19-pre.4" +version = "0.0.19-pre.5" dependencies = [ "anyhow", "cfg-if", @@ -1715,7 +1715,7 @@ dependencies = [ [[package]] name = "limbo_completion" -version = "0.0.19-pre.4" +version = "0.0.19-pre.5" dependencies = [ "limbo_ext", "mimalloc", @@ -1723,7 +1723,7 @@ dependencies = [ [[package]] name = "limbo_core" -version = "0.0.19-pre.4" +version = "0.0.19-pre.5" dependencies = [ "built", "cfg_block", @@ -1776,7 +1776,7 @@ dependencies = [ [[package]] name = "limbo_crypto" -version = "0.0.19-pre.4" +version = "0.0.19-pre.5" dependencies = [ "blake3", "data-encoding", @@ -1789,7 +1789,7 @@ dependencies = [ [[package]] name = "limbo_ext" -version = "0.0.19-pre.4" +version = "0.0.19-pre.5" dependencies = [ "chrono", "getrandom 0.3.2", @@ -1798,7 +1798,7 @@ dependencies = [ [[package]] name = "limbo_ext_tests" -version = "0.0.19-pre.4" +version = "0.0.19-pre.5" dependencies = [ "env_logger 0.11.7", "lazy_static", @@ -1809,7 +1809,7 @@ dependencies = [ [[package]] name = "limbo_ipaddr" -version = "0.0.19-pre.4" +version = "0.0.19-pre.5" dependencies = [ "ipnetwork", "limbo_ext", @@ -1818,7 +1818,7 @@ dependencies = [ [[package]] name = "limbo_macros" -version = "0.0.19-pre.4" +version = "0.0.19-pre.5" dependencies = [ "proc-macro2", "quote", @@ -1827,7 +1827,7 @@ dependencies = [ [[package]] name = "limbo_node" -version = "0.0.19-pre.4" +version = "0.0.19-pre.5" dependencies = [ "limbo_core", "napi", @@ -1837,7 +1837,7 @@ dependencies = [ [[package]] name = "limbo_percentile" -version = "0.0.19-pre.4" +version = "0.0.19-pre.5" dependencies = [ "limbo_ext", "mimalloc", @@ -1845,7 +1845,7 @@ dependencies = [ [[package]] name = "limbo_regexp" -version = "0.0.19-pre.4" +version = "0.0.19-pre.5" dependencies = [ "limbo_ext", "mimalloc", @@ -1854,7 +1854,7 @@ dependencies = [ [[package]] name = "limbo_series" -version = "0.0.19-pre.4" +version = "0.0.19-pre.5" dependencies = [ "limbo_ext", "mimalloc", @@ -1864,7 +1864,7 @@ dependencies = [ [[package]] name = "limbo_sim" -version = "0.0.19-pre.4" +version = "0.0.19-pre.5" dependencies = [ "anarchist-readable-name-generator-lib", "chrono", @@ -1885,7 +1885,7 @@ dependencies = [ [[package]] name = "limbo_sqlite3" -version = "0.0.19-pre.4" +version = "0.0.19-pre.5" dependencies = [ "env_logger 0.11.7", "libc", @@ -1895,7 +1895,7 @@ dependencies = [ [[package]] name = "limbo_sqlite3_parser" -version = "0.0.19-pre.4" +version = "0.0.19-pre.5" dependencies = [ "bitflags 2.9.0", "cc", @@ -1915,7 +1915,7 @@ dependencies = [ [[package]] name = "limbo_stress" -version = "0.0.19-pre.4" +version = "0.0.19-pre.5" dependencies = [ "anarchist-readable-name-generator-lib", "antithesis_sdk", @@ -1931,7 +1931,7 @@ dependencies = [ [[package]] name = "limbo_time" -version = "0.0.19-pre.4" +version = "0.0.19-pre.5" dependencies = [ "chrono", "limbo_ext", @@ -1943,7 +1943,7 @@ dependencies = [ [[package]] name = "limbo_uuid" -version = "0.0.19-pre.4" +version = "0.0.19-pre.5" dependencies = [ "limbo_ext", "mimalloc", @@ -2582,7 +2582,7 @@ dependencies = [ [[package]] name = "py-limbo" -version = "0.0.19-pre.4" +version = "0.0.19-pre.5" dependencies = [ "anyhow", "limbo_core", diff --git a/Cargo.toml b/Cargo.toml index 44c621be1..ebd1c7e24 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,26 +30,26 @@ members = [ exclude = ["perf/latency/limbo"] [workspace.package] -version = "0.0.19-pre.4" +version = "0.0.19-pre.5" authors = ["the Limbo authors"] edition = "2021" license = "MIT" repository = "https://github.com/tursodatabase/limbo" [workspace.dependencies] -limbo_completion = { path = "extensions/completion", version = "0.0.19-pre.4" } -limbo_core = { path = "core", version = "0.0.19-pre.4" } -limbo_crypto = { path = "extensions/crypto", version = "0.0.19-pre.4" } -limbo_ext = { path = "extensions/core", version = "0.0.19-pre.4" } -limbo_ext_tests = { path = "extensions/tests", version = "0.0.19-pre.4" } -limbo_ipaddr = { path = "extensions/ipaddr", version = "0.0.19-pre.4" } -limbo_macros = { path = "macros", version = "0.0.19-pre.4" } -limbo_percentile = { path = "extensions/percentile", version = "0.0.19-pre.4" } -limbo_regexp = { path = "extensions/regexp", version = "0.0.19-pre.4" } -limbo_series = { path = "extensions/series", version = "0.0.19-pre.4" } -limbo_sqlite3_parser = { path = "vendored/sqlite3-parser", version = "0.0.19-pre.4" } -limbo_time = { path = "extensions/time", version = "0.0.19-pre.4" } -limbo_uuid = { path = "extensions/uuid", version = "0.0.19-pre.4" } +limbo_completion = { path = "extensions/completion", version = "0.0.19-pre.5" } +limbo_core = { path = "core", version = "0.0.19-pre.5" } +limbo_crypto = { path = "extensions/crypto", version = "0.0.19-pre.5" } +limbo_ext = { path = "extensions/core", version = "0.0.19-pre.5" } +limbo_ext_tests = { path = "extensions/tests", version = "0.0.19-pre.5" } +limbo_ipaddr = { path = "extensions/ipaddr", version = "0.0.19-pre.5" } +limbo_macros = { path = "macros", version = "0.0.19-pre.5" } +limbo_percentile = { path = "extensions/percentile", version = "0.0.19-pre.5" } +limbo_regexp = { path = "extensions/regexp", version = "0.0.19-pre.5" } +limbo_series = { path = "extensions/series", version = "0.0.19-pre.5" } +limbo_sqlite3_parser = { path = "vendored/sqlite3-parser", version = "0.0.19-pre.5" } +limbo_time = { path = "extensions/time", version = "0.0.19-pre.5" } +limbo_uuid = { path = "extensions/uuid", version = "0.0.19-pre.5" } [profile.release] debug = "line-tables-only" diff --git a/bindings/javascript/npm/darwin-universal/package.json b/bindings/javascript/npm/darwin-universal/package.json index a404de79b..6f3ca47be 100644 --- a/bindings/javascript/npm/darwin-universal/package.json +++ b/bindings/javascript/npm/darwin-universal/package.json @@ -1,6 +1,6 @@ { "name": "@tursodatabase/limbo-darwin-universal", - "version": "0.0.19-pre.4", + "version": "0.0.19-pre.5", "repository": { "type": "git", "url": "https://github.com/tursodatabase/limbo" diff --git a/bindings/javascript/npm/linux-x64-gnu/package.json b/bindings/javascript/npm/linux-x64-gnu/package.json index 3030e3ac3..12640c9f0 100644 --- a/bindings/javascript/npm/linux-x64-gnu/package.json +++ b/bindings/javascript/npm/linux-x64-gnu/package.json @@ -1,6 +1,6 @@ { "name": "@tursodatabase/limbo-linux-x64-gnu", - "version": "0.0.19-pre.4", + "version": "0.0.19-pre.5", "repository": { "type": "git", "url": "https://github.com/tursodatabase/limbo" diff --git a/bindings/javascript/npm/win32-x64-msvc/package.json b/bindings/javascript/npm/win32-x64-msvc/package.json index 3461f0719..aff0bfaf7 100644 --- a/bindings/javascript/npm/win32-x64-msvc/package.json +++ b/bindings/javascript/npm/win32-x64-msvc/package.json @@ -1,6 +1,6 @@ { "name": "@tursodatabase/limbo-win32-x64-msvc", - "version": "0.0.19-pre.4", + "version": "0.0.19-pre.5", "repository": { "type": "git", "url": "https://github.com/tursodatabase/limbo" diff --git a/bindings/javascript/package.json b/bindings/javascript/package.json index 574884692..9266042aa 100644 --- a/bindings/javascript/package.json +++ b/bindings/javascript/package.json @@ -1,6 +1,6 @@ { "name": "@tursodatabase/limbo", - "version": "0.0.19-pre.4", + "version": "0.0.19-pre.5", "repository": { "type": "git", "url": "https://github.com/tursodatabase/limbo" diff --git a/bindings/wasm/package-lock.json b/bindings/wasm/package-lock.json index 663a8217d..05b646ae5 100644 --- a/bindings/wasm/package-lock.json +++ b/bindings/wasm/package-lock.json @@ -1,12 +1,12 @@ { "name": "limbo-wasm", - "version": "0.0.19-pre.4", + "version": "0.0.19-pre.5", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "limbo-wasm", - "version": "0.0.19-pre.4", + "version": "0.0.19-pre.5", "license": "MIT", "devDependencies": { "@playwright/test": "^1.49.1", diff --git a/bindings/wasm/package.json b/bindings/wasm/package.json index 77d9f1af8..c3e23d018 100644 --- a/bindings/wasm/package.json +++ b/bindings/wasm/package.json @@ -3,7 +3,7 @@ "collaborators": [ "the Limbo authors" ], - "version": "0.0.19-pre.4", + "version": "0.0.19-pre.5", "license": "MIT", "repository": { "type": "git", From 95bc6442442457ac41448f02aed0e8969cba7cac Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Wed, 16 Apr 2025 14:10:25 +0300 Subject: [PATCH 273/425] tests/fuzz: make compound key fuzz test a bit stricter with ordering --- tests/integration/fuzz/mod.rs | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/tests/integration/fuzz/mod.rs b/tests/integration/fuzz/mod.rs index 878021f91..f76a005ba 100644 --- a/tests/integration/fuzz/mod.rs +++ b/tests/integration/fuzz/mod.rs @@ -387,13 +387,27 @@ mod tests { // so we need to check that limbo and sqlite return the same results when the ordering is reversed. // because we are generally using LIMIT (to make the test complete faster), we need to rerun the query // without limit and then check that the results are the same if reversed. - let query_no_limit = - format!("SELECT * FROM t {} {} {}", where_clause, order_by, ""); - let limbo_no_limit = limbo_exec_rows(&dbs[i], &limbo_conns[i], &query_no_limit); - let sqlite_no_limit = sqlite_exec_rows(&sqlite_conn, &query_no_limit); - let limbo_rev = limbo_no_limit.iter().cloned().rev().collect::>(); - if limbo_rev == sqlite_no_limit { - continue; + let order_by_only_equalities = !order_by_components.is_empty() + && order_by_components.iter().all(|o: &String| { + if o.starts_with("x ") { + comp1.map_or(false, |c| c == "=") + } else if o.starts_with("y ") { + comp2.map_or(false, |c| c == "=") + } else { + comp3.map_or(false, |c| c == "=") + } + }); + + if order_by_only_equalities { + let query_no_limit = + format!("SELECT * FROM t {} {} {}", where_clause, order_by, ""); + let limbo_no_limit = + limbo_exec_rows(&dbs[i], &limbo_conns[i], &query_no_limit); + let sqlite_no_limit = sqlite_exec_rows(&sqlite_conn, &query_no_limit); + let limbo_rev = limbo_no_limit.iter().cloned().rev().collect::>(); + if limbo_rev == sqlite_no_limit { + continue; + } } panic!( "limbo: {:?}, sqlite: {:?}, seed: {}, query: {}", From c7935f4fb7b31416ed505e97bbd4f5b9a17d8b29 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Wed, 16 Apr 2025 15:21:48 +0300 Subject: [PATCH 274/425] Update CHANGELOG.md --- CHANGELOG.md | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index eaf9c0ed5..42a9c5bd1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,91 @@ # Changelog +## 0.0.19 - 2025-04-16 + +### Added + +* Add `BeginSubrtn`, `NotFound` and `Affinity` bytecodes (Diego Reis) +* Add Ansi Colors to tcl test runner (Pedro Muniz) +* support modifiers for julianday() (meteorgan) +* Implement Once and OpenAutoindex opcodes (Jussi Saurio) +* Add support for OpenEphemeral bytecode (Diego Reis) +* simulator: Add Bug Database(BugBase) (Alperen Keleş) +* feat: Add timediff data and time function (Sachin Kumar Singh) +* core/btree: Add PageContent::new() helper (Pekka Enberg) +* Add support to load log file with stress test (Pere Diaz Bou) +* Support UPDATE for virtual tables (Preston Thorpe) +* Add `.timer` command to print SQL execution statistics (Pere Diaz Bou) +* Strict table support (Ihor Andrianov) +* Support backwards index scan and seeks + utilize indexes in removing ORDER BY (Jussi Saurio) +* Add deterministic Clock (Avinash Sajjanshetty) +* Support offset clause in Update queries (Preston Thorpe) +* Support Create Index (Preston Thorpe) +* Support insert default values syntax (Preston Thorpe) +* Add support for default values in INSERT statements (Diego Reis) + +### Updated + +* Test: write tests for file backed db (Pedro Muniz) +* btree: move some blocks of code to more reasonable places (Jussi Saurio) +* Parse hex integers 2 (Anton Harniakou) +* More index utils (Jussi Saurio) +* Index utils (Jussi Saurio) +* Feature: VDestroy for Dropping Virtual Tables (Pedro Muniz) +* Feat balance shallower (Lâm Hoàng Phúc) +* Parse hexidecimal integers (Anton Harniakou) +* Code clean-ups (Diego Reis) +* Return null when parameter is unbound (Levy A.) +* Enhance robusteness of optimization for Binary expressions (Diego Reis) +* Check that index seek key members are not null (Jussi Saurio) +* Better diagnostics (Pedro Muniz) +* simulator: provide high level commands on top of a single runner (Alperen Keleş) +* build(deps-dev): bump vite from 6.0.7 to 6.2.6 in /bindings/wasm/test-limbo-pkg (dependabot[bot]) +* btree: remove IterationState (Jussi Saurio) +* build(deps): bump pyo3 from 0.24.0 to 0.24.1 (dependabot[bot]) +* Multi column indexes + index seek refactor (Jussi Saurio) +* Emit ANSI codes only when tracing is outputting to terminal (Preston Thorpe) +* B-Tree code cleanups (Pekka Enberg) +* btree index selection on rightmost pointer in `balance_non_root` (Pere Diaz Bou) +* io/linux: make syscallio the default (io_uring is really slow) (Jussi Saurio) +* Stress improvements (Pekka Enberg) +* VDBE code cleanups (Pekka Enberg) +* Memory tests to track large blob insertions (Pedro Muniz) +* Setup tracing to allow output during test runs (Preston Thorpe) +* allow insertion of multiple overflow cells (Pere Diaz Bou) +* Properly handle insertion of indexed columns (Preston Thorpe) +* VTabs: Proper handling of re-opened db files without the relevant extensions loaded (Preston Thorpe) +* Account divider cell in size while distributing cells (Pere Diaz Bou) +* Format infinite float as "Inf"/"-Inf" (jachewz) +* update sqlite download version to 2025 + remove www. (Pere Diaz Bou) +* Improve validation of btree balancing (Pere Diaz Bou) +* Aggregation without group by produces incorrect results for scalars (Ihor Andrianov) +* Dot command completion (Pedro Muniz) +* Allow reading altered tables by defaulting to null in Column insn (Preston Thorpe) +* docs(readme): update discord link (Jamie Barton) +* More VDBE cleanups (Pekka Enberg) +* Request load page on `insert_into_page` (Pere Diaz Bou) +* core/vdbe: Rename execute_insn_* to op_* (Pekka Enberg) +* Remove RWLock from Shared wal state (Pere Diaz Bou) +* VDBE with indirect function dispatch (Pere Diaz Bou) + +### Fixed + +* Fix truncation of error output in tests (Pedro Muniz) +* Fix Unary Negate Operation on Blobs (Pedro Muniz) +* Fix incompatibility `AND` Operation (Pedro Muniz) +* Fix: comment out incorrect assert in fuzz (Pedro Muniz) +* Fix two issues with indexes (Jussi Saurio) +* Fuzz fix some operations (Pedro Muniz) +* simulator: updates to bug base, refactors (Alperen Keleş) +* Fix overwrite cell with size less than cell size (Pere Diaz Bou) +* Fix `EXPLAIN` to be case insensitive (Pedro Muniz) +* core: Fix syscall VFS on Linux (Pekka Enberg) +* Index insert fixes (Pere Diaz Bou) +* Decrease page count on balancing fixes (Pere Diaz Bou) +* Remainder fixes (jachewz) +* Fix virtual table translation issues (Preston Thorpe) +* Fix overflow position in write_page() (Lâm Hoàng Phúc) + ## 0.0.18 - 2025-04-02 ### Added From 7a3fc335923d2e6372a701ee387d76bd44d0844d Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Wed, 16 Apr 2025 15:23:02 +0300 Subject: [PATCH 275/425] Limbo 0.0.19 --- Cargo.lock | 48 +++++++++---------- Cargo.toml | 28 +++++------ .../npm/darwin-universal/package.json | 2 +- .../javascript/npm/linux-x64-gnu/package.json | 2 +- .../npm/win32-x64-msvc/package.json | 2 +- bindings/javascript/package.json | 2 +- bindings/wasm/package-lock.json | 4 +- bindings/wasm/package.json | 2 +- 8 files changed, 45 insertions(+), 45 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cf145695a..810b9983e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -512,7 +512,7 @@ checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "core_tester" -version = "0.0.19-pre.5" +version = "0.0.19" dependencies = [ "anyhow", "assert_cmd", @@ -1653,7 +1653,7 @@ dependencies = [ [[package]] name = "limbo" -version = "0.0.19-pre.5" +version = "0.0.19" dependencies = [ "limbo_core", "thiserror 2.0.12", @@ -1662,14 +1662,14 @@ dependencies = [ [[package]] name = "limbo-go" -version = "0.0.19-pre.5" +version = "0.0.19" dependencies = [ "limbo_core", ] [[package]] name = "limbo-java" -version = "0.0.19-pre.5" +version = "0.0.19" dependencies = [ "jni", "limbo_core", @@ -1678,7 +1678,7 @@ dependencies = [ [[package]] name = "limbo-wasm" -version = "0.0.19-pre.5" +version = "0.0.19" dependencies = [ "console_error_panic_hook", "getrandom 0.2.15", @@ -1691,7 +1691,7 @@ dependencies = [ [[package]] name = "limbo_cli" -version = "0.0.19-pre.5" +version = "0.0.19" dependencies = [ "anyhow", "cfg-if", @@ -1715,7 +1715,7 @@ dependencies = [ [[package]] name = "limbo_completion" -version = "0.0.19-pre.5" +version = "0.0.19" dependencies = [ "limbo_ext", "mimalloc", @@ -1723,7 +1723,7 @@ dependencies = [ [[package]] name = "limbo_core" -version = "0.0.19-pre.5" +version = "0.0.19" dependencies = [ "built", "cfg_block", @@ -1776,7 +1776,7 @@ dependencies = [ [[package]] name = "limbo_crypto" -version = "0.0.19-pre.5" +version = "0.0.19" dependencies = [ "blake3", "data-encoding", @@ -1789,7 +1789,7 @@ dependencies = [ [[package]] name = "limbo_ext" -version = "0.0.19-pre.5" +version = "0.0.19" dependencies = [ "chrono", "getrandom 0.3.2", @@ -1798,7 +1798,7 @@ dependencies = [ [[package]] name = "limbo_ext_tests" -version = "0.0.19-pre.5" +version = "0.0.19" dependencies = [ "env_logger 0.11.7", "lazy_static", @@ -1809,7 +1809,7 @@ dependencies = [ [[package]] name = "limbo_ipaddr" -version = "0.0.19-pre.5" +version = "0.0.19" dependencies = [ "ipnetwork", "limbo_ext", @@ -1818,7 +1818,7 @@ dependencies = [ [[package]] name = "limbo_macros" -version = "0.0.19-pre.5" +version = "0.0.19" dependencies = [ "proc-macro2", "quote", @@ -1827,7 +1827,7 @@ dependencies = [ [[package]] name = "limbo_node" -version = "0.0.19-pre.5" +version = "0.0.19" dependencies = [ "limbo_core", "napi", @@ -1837,7 +1837,7 @@ dependencies = [ [[package]] name = "limbo_percentile" -version = "0.0.19-pre.5" +version = "0.0.19" dependencies = [ "limbo_ext", "mimalloc", @@ -1845,7 +1845,7 @@ dependencies = [ [[package]] name = "limbo_regexp" -version = "0.0.19-pre.5" +version = "0.0.19" dependencies = [ "limbo_ext", "mimalloc", @@ -1854,7 +1854,7 @@ dependencies = [ [[package]] name = "limbo_series" -version = "0.0.19-pre.5" +version = "0.0.19" dependencies = [ "limbo_ext", "mimalloc", @@ -1864,7 +1864,7 @@ dependencies = [ [[package]] name = "limbo_sim" -version = "0.0.19-pre.5" +version = "0.0.19" dependencies = [ "anarchist-readable-name-generator-lib", "chrono", @@ -1885,7 +1885,7 @@ dependencies = [ [[package]] name = "limbo_sqlite3" -version = "0.0.19-pre.5" +version = "0.0.19" dependencies = [ "env_logger 0.11.7", "libc", @@ -1895,7 +1895,7 @@ dependencies = [ [[package]] name = "limbo_sqlite3_parser" -version = "0.0.19-pre.5" +version = "0.0.19" dependencies = [ "bitflags 2.9.0", "cc", @@ -1915,7 +1915,7 @@ dependencies = [ [[package]] name = "limbo_stress" -version = "0.0.19-pre.5" +version = "0.0.19" dependencies = [ "anarchist-readable-name-generator-lib", "antithesis_sdk", @@ -1931,7 +1931,7 @@ dependencies = [ [[package]] name = "limbo_time" -version = "0.0.19-pre.5" +version = "0.0.19" dependencies = [ "chrono", "limbo_ext", @@ -1943,7 +1943,7 @@ dependencies = [ [[package]] name = "limbo_uuid" -version = "0.0.19-pre.5" +version = "0.0.19" dependencies = [ "limbo_ext", "mimalloc", @@ -2582,7 +2582,7 @@ dependencies = [ [[package]] name = "py-limbo" -version = "0.0.19-pre.5" +version = "0.0.19" dependencies = [ "anyhow", "limbo_core", diff --git a/Cargo.toml b/Cargo.toml index ebd1c7e24..a2dfb3e3e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,26 +30,26 @@ members = [ exclude = ["perf/latency/limbo"] [workspace.package] -version = "0.0.19-pre.5" +version = "0.0.19" authors = ["the Limbo authors"] edition = "2021" license = "MIT" repository = "https://github.com/tursodatabase/limbo" [workspace.dependencies] -limbo_completion = { path = "extensions/completion", version = "0.0.19-pre.5" } -limbo_core = { path = "core", version = "0.0.19-pre.5" } -limbo_crypto = { path = "extensions/crypto", version = "0.0.19-pre.5" } -limbo_ext = { path = "extensions/core", version = "0.0.19-pre.5" } -limbo_ext_tests = { path = "extensions/tests", version = "0.0.19-pre.5" } -limbo_ipaddr = { path = "extensions/ipaddr", version = "0.0.19-pre.5" } -limbo_macros = { path = "macros", version = "0.0.19-pre.5" } -limbo_percentile = { path = "extensions/percentile", version = "0.0.19-pre.5" } -limbo_regexp = { path = "extensions/regexp", version = "0.0.19-pre.5" } -limbo_series = { path = "extensions/series", version = "0.0.19-pre.5" } -limbo_sqlite3_parser = { path = "vendored/sqlite3-parser", version = "0.0.19-pre.5" } -limbo_time = { path = "extensions/time", version = "0.0.19-pre.5" } -limbo_uuid = { path = "extensions/uuid", version = "0.0.19-pre.5" } +limbo_completion = { path = "extensions/completion", version = "0.0.19" } +limbo_core = { path = "core", version = "0.0.19" } +limbo_crypto = { path = "extensions/crypto", version = "0.0.19" } +limbo_ext = { path = "extensions/core", version = "0.0.19" } +limbo_ext_tests = { path = "extensions/tests", version = "0.0.19" } +limbo_ipaddr = { path = "extensions/ipaddr", version = "0.0.19" } +limbo_macros = { path = "macros", version = "0.0.19" } +limbo_percentile = { path = "extensions/percentile", version = "0.0.19" } +limbo_regexp = { path = "extensions/regexp", version = "0.0.19" } +limbo_series = { path = "extensions/series", version = "0.0.19" } +limbo_sqlite3_parser = { path = "vendored/sqlite3-parser", version = "0.0.19" } +limbo_time = { path = "extensions/time", version = "0.0.19" } +limbo_uuid = { path = "extensions/uuid", version = "0.0.19" } [profile.release] debug = "line-tables-only" diff --git a/bindings/javascript/npm/darwin-universal/package.json b/bindings/javascript/npm/darwin-universal/package.json index 6f3ca47be..bee9cf13b 100644 --- a/bindings/javascript/npm/darwin-universal/package.json +++ b/bindings/javascript/npm/darwin-universal/package.json @@ -1,6 +1,6 @@ { "name": "@tursodatabase/limbo-darwin-universal", - "version": "0.0.19-pre.5", + "version": "0.0.19", "repository": { "type": "git", "url": "https://github.com/tursodatabase/limbo" diff --git a/bindings/javascript/npm/linux-x64-gnu/package.json b/bindings/javascript/npm/linux-x64-gnu/package.json index 12640c9f0..a34ccda73 100644 --- a/bindings/javascript/npm/linux-x64-gnu/package.json +++ b/bindings/javascript/npm/linux-x64-gnu/package.json @@ -1,6 +1,6 @@ { "name": "@tursodatabase/limbo-linux-x64-gnu", - "version": "0.0.19-pre.5", + "version": "0.0.19", "repository": { "type": "git", "url": "https://github.com/tursodatabase/limbo" diff --git a/bindings/javascript/npm/win32-x64-msvc/package.json b/bindings/javascript/npm/win32-x64-msvc/package.json index aff0bfaf7..c4bc40bb6 100644 --- a/bindings/javascript/npm/win32-x64-msvc/package.json +++ b/bindings/javascript/npm/win32-x64-msvc/package.json @@ -1,6 +1,6 @@ { "name": "@tursodatabase/limbo-win32-x64-msvc", - "version": "0.0.19-pre.5", + "version": "0.0.19", "repository": { "type": "git", "url": "https://github.com/tursodatabase/limbo" diff --git a/bindings/javascript/package.json b/bindings/javascript/package.json index 9266042aa..62c5c27e3 100644 --- a/bindings/javascript/package.json +++ b/bindings/javascript/package.json @@ -1,6 +1,6 @@ { "name": "@tursodatabase/limbo", - "version": "0.0.19-pre.5", + "version": "0.0.19", "repository": { "type": "git", "url": "https://github.com/tursodatabase/limbo" diff --git a/bindings/wasm/package-lock.json b/bindings/wasm/package-lock.json index 05b646ae5..b357a1ef1 100644 --- a/bindings/wasm/package-lock.json +++ b/bindings/wasm/package-lock.json @@ -1,12 +1,12 @@ { "name": "limbo-wasm", - "version": "0.0.19-pre.5", + "version": "0.0.19", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "limbo-wasm", - "version": "0.0.19-pre.5", + "version": "0.0.19", "license": "MIT", "devDependencies": { "@playwright/test": "^1.49.1", diff --git a/bindings/wasm/package.json b/bindings/wasm/package.json index c3e23d018..9f519b652 100644 --- a/bindings/wasm/package.json +++ b/bindings/wasm/package.json @@ -3,7 +3,7 @@ "collaborators": [ "the Limbo authors" ], - "version": "0.0.19-pre.5", + "version": "0.0.19", "license": "MIT", "repository": { "type": "git", From 30f2a977823b791ccc0f22cbb0d39882cc771d15 Mon Sep 17 00:00:00 2001 From: TcMits Date: Wed, 16 Apr 2025 19:55:52 +0700 Subject: [PATCH 276/425] shorter syntax --- core/storage/btree.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 252c0e9fc..35e05a69f 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1598,7 +1598,7 @@ impl BTreeCursor { PageType::IndexInterior | PageType::TableInterior )); // Part 1: Find the sibling pages to balance - let mut pages_to_balance: [Option; 3] = [None, None, None]; + let mut pages_to_balance: [Option; 3] = [const { None }; 3]; let number_of_cells_in_parent = parent_contents.cell_count() + parent_contents.overflow_cells.len(); @@ -1721,7 +1721,7 @@ impl BTreeCursor { .replace(Some(BalanceInfo { pages_to_balance, rightmost_pointer: right_pointer, - divider_cells: [None, None], + divider_cells: [const { None }; 2], sibling_count, first_divider_cell: first_cell_divider, })); @@ -1755,7 +1755,7 @@ impl BTreeCursor { /* 1. Get divider cells and max_cells */ let mut max_cells = 0; // we only need maximum 5 pages to balance 3 pages - let mut pages_to_balance_new: [Option; 5] = [None, None, None, None, None]; + let mut pages_to_balance_new: [Option; 5] = [const { None }; 5]; for i in (0..balance_info.sibling_count).rev() { let sibling_page = balance_info.pages_to_balance[i].as_ref().unwrap(); let sibling_contents = sibling_page.get_contents(); From a73f4db38b2c14cc968d4cffd8aefe05ed933ad7 Mon Sep 17 00:00:00 2001 From: TcMits Date: Thu, 17 Apr 2025 15:02:59 +0700 Subject: [PATCH 277/425] missing done var --- core/storage/btree.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 35e05a69f..7ce096586 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -2299,7 +2299,7 @@ impl BTreeCursor { ** upwards pass simply processes pages that were missed on the downward ** pass. */ - let mut done = vec![false; sibling_count_new]; + let mut done = [false; 5]; for i in (1 - sibling_count_new as i64)..sibling_count_new as i64 { let page_idx = i.unsigned_abs() as usize; if done[page_idx] { From 262c630c166b82e07042cd9ed0a194d1871d04a6 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Thu, 17 Apr 2025 18:22:10 +0200 Subject: [PATCH 278/425] fix validation with overflow cells --- core/storage/btree.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index df3e2ae3f..b1a8a8d26 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -2782,6 +2782,10 @@ impl BTreeCursor { } } if was_overflow { + if !leaf_data { + // remember to increase cell if this cell was moved to parent + current_index_cell += 1; + } continue; } // check if overflow @@ -2885,6 +2889,10 @@ impl BTreeCursor { } } if was_overflow { + if !leaf_data { + // remember to increase cell if this cell was moved to parent + current_index_cell += 1; + } continue; } let (parent_cell_start, parent_cell_len) = parent_contents.cell_get_raw_region( From 853af16946a7f9850e98b60ab56c3db5d49279d3 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Wed, 26 Mar 2025 09:16:03 -0400 Subject: [PATCH 279/425] Implement xBestIndex for virtual table api to improve query planning --- core/lib.rs | 31 +++++- core/translate/main_loop.rs | 43 ++++++-- core/translate/plan.rs | 89 ++++++++++++++++ core/vdbe/execute.rs | 9 +- core/vdbe/insn.rs | 2 + extensions/completion/src/lib.rs | 4 +- extensions/core/src/lib.rs | 5 +- extensions/core/src/vtabs.rs | 172 ++++++++++++++++++++++++++++++- extensions/series/src/lib.rs | 2 +- extensions/tests/src/lib.rs | 39 ++++++- macros/src/lib.rs | 23 ++++- 11 files changed, 395 insertions(+), 24 deletions(-) diff --git a/core/lib.rs b/core/lib.rs index 353789839..68384e77d 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -35,7 +35,7 @@ pub use io::UringIO; pub use io::{ Buffer, Completion, File, MemoryIO, OpenFlags, PlatformIO, SyscallIO, WriteCompletion, IO, }; -use limbo_ext::{ResultCode, VTabKind, VTabModuleImpl}; +use limbo_ext::{ConstraintInfo, IndexInfo, OrderByInfo, ResultCode, VTabKind, VTabModuleImpl}; use limbo_sqlite3_parser::{ast, ast::Cmd, lexer::sql::Parser}; use parking_lot::RwLock; use schema::{Column, Schema}; @@ -641,6 +641,21 @@ impl VirtualTable { pub(crate) fn rowid(&self, cursor: &VTabOpaqueCursor) -> i64 { unsafe { (self.implementation.rowid)(cursor.as_ptr()) } } + + pub(crate) fn best_index( + &self, + constraints: &[ConstraintInfo], + order_by: &[OrderByInfo], + ) -> IndexInfo { + unsafe { + IndexInfo::from_ffi((self.implementation.best_idx)( + constraints.as_ptr(), + constraints.len() as i32, + order_by.as_ptr(), + order_by.len() as i32, + )) + } + } /// takes ownership of the provided Args pub(crate) fn from_args( tbl_name: Option<&str>, @@ -693,6 +708,8 @@ impl VirtualTable { pub fn filter( &self, cursor: &VTabOpaqueCursor, + idx_num: i32, + idx_str: Option, arg_count: usize, args: Vec, ) -> Result { @@ -701,8 +718,18 @@ impl VirtualTable { let ownedvalue_arg = args.get(i).unwrap(); filter_args.push(ownedvalue_arg.to_ffi()); } + let c_idx_str = idx_str + .map(|s| std::ffi::CString::new(s).unwrap()) + .map(|cstr| cstr.into_raw()) + .unwrap_or(std::ptr::null_mut()); let rc = unsafe { - (self.implementation.filter)(cursor.as_ptr(), arg_count as i32, filter_args.as_ptr()) + (self.implementation.filter)( + cursor.as_ptr(), + arg_count as i32, + filter_args.as_ptr(), + c_idx_str, + idx_num, + ) }; for arg in filter_args { unsafe { diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 76057c53b..877f55f75 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -9,6 +9,8 @@ use crate::{ }, Result, }; +use limbo_ext::{ConstraintInfo, OrderByInfo}; +use limbo_sqlite3_parser::ast; use super::{ aggregation::translate_aggregation_step, @@ -18,8 +20,8 @@ use super::{ optimizer::Optimizable, order_by::{order_by_sorter_insert, sorter_insert}, plan::{ - IterationDirection, Operation, Search, SeekDef, SelectPlan, SelectQueryType, - TableReference, WhereTerm, + try_convert_to_constraint_info, IterationDirection, Operation, Search, SeekDef, SelectPlan, + SelectQueryType, TableReference, WhereTerm, }, }; @@ -251,9 +253,6 @@ pub fn open_loop( end_offset: loop_end, }); - // These are predicates evaluated outside of the subquery, - // so they are translated here. - // E.g. SELECT foo FROM (SELECT bar as foo FROM t1) sub WHERE sub.foo > 10 for cond in predicates .iter() .filter(|cond| cond.should_eval_at_loop(table_index)) @@ -290,12 +289,25 @@ pub fn open_loop( pc_if_empty: loop_end, }); } - } - if let Table::Virtual(ref table) = table.table { + } else if let Some(vtab) = table.table.virtual_table() { + let constraints: Vec = predicates + .iter() + .filter(|p| p.applies_to_table(&table.table, tables)) + .filter_map(|p| try_convert_to_constraint_info(p, table_index)) + .collect(); + + let order_by = vec![OrderByInfo { + column_index: *t_ctx + .result_column_indexes_in_orderby_sorter + .first() + .unwrap_or(&0) as u32, + desc: matches!(iter_dir, IterationDirection::Backwards), + }]; + let index_info = vtab.best_index(&constraints, &order_by); let start_reg = - program.alloc_registers(table.args.as_ref().map(|a| a.len()).unwrap_or(0)); + program.alloc_registers(vtab.args.as_ref().map(|a| a.len()).unwrap_or(0)); let mut cur_reg = start_reg; - let args = match table.args.as_ref() { + let args = match vtab.args.as_ref() { Some(args) => args, None => &vec![], }; @@ -304,11 +316,22 @@ pub fn open_loop( cur_reg += 1; let _ = translate_expr(program, Some(tables), arg, reg, &t_ctx.resolver)?; } + let mut maybe_idx_str_reg = None; + if let Some(idx_str) = index_info.idx_str { + let reg = program.alloc_register(); + program.emit_insn(Insn::String8 { + dest: reg, + value: idx_str, + }); + maybe_idx_str_reg = Some(reg); + } program.emit_insn(Insn::VFilter { cursor_id, pc_if_empty: loop_end, - arg_count: table.args.as_ref().map_or(0, |args| args.len()), + arg_count: vtab.args.as_ref().map_or(0, |args| args.len()), args_reg: start_reg, + idx_str: maybe_idx_str_reg, + idx_num: index_info.idx_num as usize, }); } program.resolve_label(loop_start, program.offset()); diff --git a/core/translate/plan.rs b/core/translate/plan.rs index bb581ab13..9c535c57a 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -1,4 +1,5 @@ use core::fmt; +use limbo_ext::{ConstraintInfo, ConstraintOp, IndexInfo}; use limbo_sqlite3_parser::ast::{self, SortOrder}; use std::{ cmp::Ordering, @@ -73,8 +74,96 @@ impl WhereTerm { pub fn should_eval_at_loop(&self, loop_idx: usize) -> bool { self.eval_at == EvalAt::Loop(loop_idx) } + + pub fn applies_to_table(&self, table: &Table, tables: &[TableReference]) -> bool { + match &self.expr { + ast::Expr::Column { + table: table_idx, .. + } => { + let table_ref = &tables[*table_idx]; + table_ref.table == *table + } + _ => false, + } + } } +use crate::ast::{Expr, Operator}; + +use super::optimizer::{ConstantPredicate, Optimizable}; + +fn reverse_operator(op: &Operator) -> Option { + match op { + Operator::Equals => Some(Operator::Equals), + Operator::Less => Some(Operator::Greater), + Operator::LessEquals => Some(Operator::GreaterEquals), + Operator::Greater => Some(Operator::Less), + Operator::GreaterEquals => Some(Operator::LessEquals), + Operator::NotEquals => Some(Operator::NotEquals), + Operator::Is => Some(Operator::Is), + Operator::IsNot => Some(Operator::IsNot), + _ => None, + } +} + +pub fn try_convert_to_constraint_info( + term: &WhereTerm, + table_index: usize, +) -> Option { + if term.from_outer_join { + return None; + } + + let Expr::Binary(lhs, op, rhs) = &term.expr else { + return None; + }; + + let (col_expr, _, op) = match (&**lhs, &**rhs) { + (Expr::Column { .. }, rhs) + if rhs.check_constant().ok()? == Some(ConstantPredicate::AlwaysTrue) => + { + (lhs, rhs, op) + } + (lhs, Expr::Column { .. }) + if lhs.check_constant().ok()? == Some(ConstantPredicate::AlwaysTrue) => + { + (rhs, lhs, &reverse_operator(op).unwrap_or(*op)) + } + _ => return None, + }; + + let Expr::Column { + table: tbl_idx, + column, + .. + } = **col_expr + else { + return None; + }; + + if tbl_idx != table_index { + return None; + } + + let column_index = column as u32; + let constraint_op = match op { + Operator::Equals => ConstraintOp::Eq, + Operator::Less => ConstraintOp::Lt, + Operator::LessEquals => ConstraintOp::Le, + Operator::Greater => ConstraintOp::Gt, + Operator::GreaterEquals => ConstraintOp::Ge, + Operator::NotEquals => ConstraintOp::Ne, + Operator::Is => ConstraintOp::Is, + Operator::IsNot => ConstraintOp::IsNot, + _ => return None, + }; + + Some(ConstraintInfo { + column_index, + op: constraint_op, + usable: true, + }) +} /// The loop index where to evaluate the condition. /// For example, in `SELECT * FROM u JOIN p WHERE u.id = 5`, the condition can already be evaluated at the first loop (idx 0), /// because that is the rightmost table that it references. diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index d00ee6129..9abc7c6c3 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -966,6 +966,8 @@ pub fn op_vfilter( pc_if_empty, arg_count, args_reg, + idx_str, + idx_num, } = insn else { unreachable!("unexpected Insn {:?}", insn) @@ -981,7 +983,12 @@ pub fn op_vfilter( for i in 0..*arg_count { args.push(state.registers[args_reg + i].get_owned_value().clone()); } - virtual_table.filter(cursor, *arg_count, args)? + let idx_str = if let Some(idx_str) = idx_str { + Some(state.registers[*idx_str].get_owned_value().to_string()) + } else { + None + }; + virtual_table.filter(cursor, *idx_num as i32, idx_str, *arg_count, args)? }; if !has_rows { state.pc = pc_if_empty.to_offset_int(); diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index f1276798f..56f44bd2b 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -289,6 +289,8 @@ pub enum Insn { pc_if_empty: BranchOffset, arg_count: usize, args_reg: usize, + idx_str: Option, + idx_num: usize, }, /// Read a column from the current row of the virtual table cursor. diff --git a/extensions/completion/src/lib.rs b/extensions/completion/src/lib.rs index 09b09c479..53358c23c 100644 --- a/extensions/completion/src/lib.rs +++ b/extensions/completion/src/lib.rs @@ -91,8 +91,8 @@ impl VTabModule for CompletionVTab { cursor.eof() } - fn filter(cursor: &mut Self::VCursor, args: &[Value]) -> ResultCode { - if args.len() == 0 || args.len() > 2 { + fn filter(cursor: &mut Self::VCursor, args: &[Value], _: Option<(&str, i32)>) -> ResultCode { + if args.is_empty() || args.len() > 2 { return ResultCode::InvalidArgs; } cursor.reset(); diff --git a/extensions/core/src/lib.rs b/extensions/core/src/lib.rs index e73b2b894..99729de6c 100644 --- a/extensions/core/src/lib.rs +++ b/extensions/core/src/lib.rs @@ -15,7 +15,10 @@ pub use types::{ResultCode, Value, ValueType}; #[cfg(feature = "vfs")] pub use vfs_modules::{RegisterVfsFn, VfsExtension, VfsFile, VfsFileImpl, VfsImpl, VfsInterface}; use vtabs::RegisterModuleFn; -pub use vtabs::{VTabCursor, VTabKind, VTabModule, VTabModuleImpl}; +pub use vtabs::{ + ConstraintInfo, ConstraintOp, ConstraintUsage, ExtIndexInfo, IndexInfo, OrderByInfo, + VTabCursor, VTabKind, VTabModule, VTabModuleImpl, +}; pub type ExtResult = std::result::Result; diff --git a/extensions/core/src/vtabs.rs b/extensions/core/src/vtabs.rs index 83b3dae78..cf89b1fd1 100644 --- a/extensions/core/src/vtabs.rs +++ b/extensions/core/src/vtabs.rs @@ -22,6 +22,7 @@ pub struct VTabModuleImpl { pub update: VtabFnUpdate, pub rowid: VtabRowIDFn, pub destroy: VtabFnDestroy, + pub best_idx: BestIdxFn, } #[cfg(feature = "core_only")] @@ -43,8 +44,13 @@ pub type VtabFnCreateSchema = unsafe extern "C" fn(args: *const Value, argc: i32 pub type VtabFnOpen = unsafe extern "C" fn(*const c_void) -> *const c_void; -pub type VtabFnFilter = - unsafe extern "C" fn(cursor: *const c_void, argc: i32, argv: *const Value) -> ResultCode; +pub type VtabFnFilter = unsafe extern "C" fn( + cursor: *const c_void, + argc: i32, + argv: *const Value, + idx_str: *const c_char, + idx_num: i32, +) -> ResultCode; pub type VtabFnColumn = unsafe extern "C" fn(cursor: *const c_void, idx: u32) -> Value; @@ -62,6 +68,12 @@ pub type VtabFnUpdate = unsafe extern "C" fn( ) -> ResultCode; pub type VtabFnDestroy = unsafe extern "C" fn(vtab: *const c_void) -> ResultCode; +pub type BestIdxFn = unsafe extern "C" fn( + constraints: *const ConstraintInfo, + constraint_len: i32, + order_by: *const OrderByInfo, + order_by_len: i32, +) -> ExtIndexInfo; #[repr(C)] #[derive(Clone, Copy, Debug, PartialEq)] @@ -78,7 +90,11 @@ pub trait VTabModule: 'static { fn create_schema(args: &[Value]) -> String; fn open(&self) -> Result; - fn filter(cursor: &mut Self::VCursor, args: &[Value]) -> ResultCode; + fn filter( + cursor: &mut Self::VCursor, + args: &[Value], + idx_info: Option<(&str, i32)>, + ) -> ResultCode; fn column(cursor: &Self::VCursor, idx: u32) -> Result; fn next(cursor: &mut Self::VCursor) -> ResultCode; fn eof(cursor: &Self::VCursor) -> bool; @@ -94,6 +110,22 @@ pub trait VTabModule: 'static { fn destroy(&mut self) -> Result<(), Self::Error> { Ok(()) } + fn best_index(_constraints: &[ConstraintInfo], _order_by: &[OrderByInfo]) -> IndexInfo { + IndexInfo { + idx_num: 0, + idx_str: None, + order_by_consumed: false, + estimated_cost: 1_000_000.0, + estimated_rows: u32::MAX, + constraint_usages: _constraints + .iter() + .map(|_| ConstraintUsage { + argv_index: Some(0), + omit: false, + }) + .collect(), + } + } } pub trait VTabCursor: Sized { @@ -103,3 +135,137 @@ pub trait VTabCursor: Sized { fn eof(&self) -> bool; fn next(&mut self) -> ResultCode; } + +#[repr(u8)] +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum ConstraintOp { + Eq = 2, + Lt = 4, + Le = 8, + Gt = 16, + Ge = 32, + Match = 64, + Like = 65, + Glob = 66, + Regexp = 67, + Ne = 68, + IsNot = 69, + IsNotNull = 70, + IsNull = 71, + Is = 72, + In = 73, +} + +#[repr(C)] +#[derive(Copy, Clone)] +pub struct OrderByInfo { + pub column_index: u32, + pub desc: bool, +} + +#[derive(Debug, Clone)] +pub struct IndexInfo { + pub idx_num: i32, + pub idx_str: Option, + pub order_by_consumed: bool, + /// TODO: for eventual cost based query planning + pub estimated_cost: f64, + pub estimated_rows: u32, + pub constraint_usages: Vec, +} +impl Default for IndexInfo { + fn default() -> Self { + Self { + idx_num: 0, + idx_str: None, + order_by_consumed: false, + estimated_cost: 1_000_000.0, + estimated_rows: u32::MAX, + constraint_usages: Vec::new(), + } + } +} + +impl IndexInfo { + /// + /// Converts IndexInfo to an FFI-safe `ExtIndexInfo`. + /// This method transfers ownership of `constraint_usages` and `idx_str`, + /// which must later be reclaimed using `from_ffi` to prevent leaks. + pub fn to_ffi(self) -> ExtIndexInfo { + let len = self.constraint_usages.len(); + let ptr = Box::into_raw(self.constraint_usages.into_boxed_slice()) as *mut ConstraintUsage; + let idx_str_len = self.idx_str.as_ref().map(|s| s.len()).unwrap_or(0); + let c_idx_str = self + .idx_str + .map(|s| std::ffi::CString::new(s).unwrap().into_raw()) + .unwrap_or(std::ptr::null_mut()); + ExtIndexInfo { + idx_num: self.idx_num, + estimated_cost: self.estimated_cost, + estimated_rows: self.estimated_rows, + order_by_consumed: self.order_by_consumed, + constraint_usages_ptr: ptr, + constraint_usage_len: len, + idx_str: c_idx_str as *mut _, + idx_str_len, + } + } + + /// Reclaims ownership of `constraint_usages` and `idx_str` from an FFI-safe `ExtIndexInfo`. + /// # Safety + /// This method is unsafe because it can cause memory leaks if not used correctly. + /// to_ffi and from_ffi are meant to send index info across ffi bounds then immediately reclaim it. + pub unsafe fn from_ffi(ffi: ExtIndexInfo) -> Self { + let constraint_usages = unsafe { + Box::from_raw(std::slice::from_raw_parts_mut( + ffi.constraint_usages_ptr, + ffi.constraint_usage_len, + )) + .to_vec() + }; + let idx_str = if ffi.idx_str.is_null() { + None + } else { + Some(unsafe { + std::ffi::CString::from_raw(ffi.idx_str as *mut _) + .to_string_lossy() + .into_owned() + }) + }; + Self { + idx_num: ffi.idx_num, + idx_str, + order_by_consumed: ffi.order_by_consumed, + estimated_cost: ffi.estimated_cost, + estimated_rows: ffi.estimated_rows, + constraint_usages, + } + } +} + +#[repr(C)] +#[derive(Clone, Debug)] +pub struct ExtIndexInfo { + pub idx_num: i32, + pub idx_str: *const u8, + pub idx_str_len: usize, + pub order_by_consumed: bool, + pub estimated_cost: f64, + pub estimated_rows: u32, + pub constraint_usages_ptr: *mut ConstraintUsage, + pub constraint_usage_len: usize, +} + +#[derive(Debug, Clone, Copy)] +pub struct ConstraintUsage { + pub argv_index: Option, // 1-based index into VFilter args + pub omit: bool, // if true, core skips checking it again +} + +#[derive(Clone, Copy, Debug)] +#[repr(C)] +pub struct ConstraintInfo { + pub column_index: u32, + pub op: ConstraintOp, + pub usable: bool, +} diff --git a/extensions/series/src/lib.rs b/extensions/series/src/lib.rs index 43028eed5..5f833a607 100644 --- a/extensions/series/src/lib.rs +++ b/extensions/series/src/lib.rs @@ -45,7 +45,7 @@ impl VTabModule for GenerateSeriesVTab { }) } - fn filter(cursor: &mut Self::VCursor, args: &[Value]) -> ResultCode { + fn filter(cursor: &mut Self::VCursor, args: &[Value], _: Option<(&str, i32)>) -> ResultCode { // args are the start, stop, and step if args.is_empty() || args.len() > 3 { return ResultCode::InvalidArgs; diff --git a/extensions/tests/src/lib.rs b/extensions/tests/src/lib.rs index beff17004..c10a40f58 100644 --- a/extensions/tests/src/lib.rs +++ b/extensions/tests/src/lib.rs @@ -1,7 +1,7 @@ use lazy_static::lazy_static; use limbo_ext::{ - register_extension, scalar, ExtResult, ResultCode, VTabCursor, VTabKind, VTabModule, - VTabModuleDerive, Value, + register_extension, scalar, ConstraintInfo, ConstraintOp, ExtResult, IndexInfo, OrderByInfo, + ResultCode, VTabCursor, VTabKind, VTabModule, VTabModuleDerive, Value, }; #[cfg(not(target_family = "wasm"))] use limbo_ext::{VfsDerive, VfsExtension, VfsFile}; @@ -46,7 +46,40 @@ impl VTabModule for KVStoreVTab { }) } - fn filter(cursor: &mut Self::VCursor, _args: &[Value]) -> ResultCode { + fn best_index(constraints: &[ConstraintInfo], _order_by: &[OrderByInfo]) -> IndexInfo { + // not exactly the ideal kind of table to demonstrate this on... + for constraint in constraints { + println!("constraint: {:?}", constraint); + if constraint.usable + && constraint.op == ConstraintOp::Eq + && constraint.column_index == 0 + { + // key = ? is supported + return IndexInfo { + idx_num: 1, // arbitrary non-zero code to signify optimization + idx_str: Some("key_eq".to_string()), + order_by_consumed: false, + estimated_cost: 10.0, + ..Default::default() + }; + } + } + + // fallback: full scan + IndexInfo { + idx_num: -1, + idx_str: None, + order_by_consumed: false, + estimated_cost: 1000.0, + ..Default::default() + } + } + + fn filter( + cursor: &mut Self::VCursor, + _args: &[Value], + _idx_str: Option<(&str, i32)>, + ) -> ResultCode { let store = GLOBAL_STORE.lock().unwrap(); cursor.rows = store .iter() diff --git a/macros/src/lib.rs b/macros/src/lib.rs index c03788c7c..acb969876 100644 --- a/macros/src/lib.rs +++ b/macros/src/lib.rs @@ -455,6 +455,7 @@ pub fn derive_vtab_module(input: TokenStream) -> TokenStream { let update_fn_name = format_ident!("update_{}", struct_name); let rowid_fn_name = format_ident!("rowid_{}", struct_name); let destroy_fn_name = format_ident!("destroy_{}", struct_name); + let best_idx_fn_name = format_ident!("best_idx_{}", struct_name); let expanded = quote! { impl #struct_name { @@ -490,13 +491,20 @@ pub fn derive_vtab_module(input: TokenStream) -> TokenStream { cursor: *const ::std::ffi::c_void, argc: i32, argv: *const ::limbo_ext::Value, + idx_str: *const ::std::ffi::c_char, + idx_num: i32, ) -> ::limbo_ext::ResultCode { if cursor.is_null() { return ::limbo_ext::ResultCode::Error; } let cursor = unsafe { &mut *(cursor as *mut <#struct_name as ::limbo_ext::VTabModule>::VCursor) }; let args = ::std::slice::from_raw_parts(argv, argc as usize); - <#struct_name as ::limbo_ext::VTabModule>::filter(cursor, args) + let idx_str = if idx_str.is_null() { + None + } else { + Some((unsafe { ::std::ffi::CStr::from_ptr(idx_str).to_str().unwrap() }, idx_num)) + }; + <#struct_name as ::limbo_ext::VTabModule>::filter(cursor, args, idx_str) } #[no_mangle] @@ -613,6 +621,18 @@ pub fn derive_vtab_module(input: TokenStream) -> TokenStream { return ::limbo_ext::ResultCode::OK; } + #[no_mangle] + pub unsafe extern "C" fn #best_idx_fn_name( + constraints: *const ::limbo_ext::ConstraintInfo, + n_constraints: i32, + order_by: *const ::limbo_ext::OrderByInfo, + n_order_by: i32, + ) -> ::limbo_ext::ExtIndexInfo { + let constraints = std::slice::from_raw_parts(constraints, n_constraints as usize); + let order_by = std::slice::from_raw_parts(order_by, n_order_by as usize); + <#struct_name as ::limbo_ext::VTabModule>::best_index(constraints, order_by).to_ffi() + } + #[no_mangle] pub unsafe extern "C" fn #register_fn_name( api: *const ::limbo_ext::ExtensionApi @@ -636,6 +656,7 @@ pub fn derive_vtab_module(input: TokenStream) -> TokenStream { update: Self::#update_fn_name, rowid: Self::#rowid_fn_name, destroy: Self::#destroy_fn_name, + best_idx: Self::#best_idx_fn_name, }; (api.register_vtab_module)(api.ctx, name_c, module, <#struct_name as ::limbo_ext::VTabModule>::VTAB_KIND) } From 0f34a813ffd8154b1ba96b839e5769ac68d89769 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sun, 6 Apr 2025 14:20:51 -0400 Subject: [PATCH 280/425] Add can_pushdown_predicate fn to evaluate ast expressions for constness --- core/util.rs | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/core/util.rs b/core/util.rs index b3ce8ecd0..406d7059f 100644 --- a/core/util.rs +++ b/core/util.rs @@ -2,6 +2,7 @@ use limbo_sqlite3_parser::ast::{self, CreateTableBody, Expr, FunctionTail, Liter use std::{rc::Rc, sync::Arc}; use crate::{ + function::Func, schema::{self, Column, Schema, Type}, types::{OwnedValue, OwnedValueType}, LimboError, OpenFlags, Result, Statement, StepResult, SymbolTable, IO, @@ -565,6 +566,41 @@ pub fn columns_from_create_table_body(body: &ast::CreateTableBody) -> crate::Res .collect::>()) } +pub fn can_pushdown_predicate(expr: &Expr) -> bool { + match expr { + Expr::Literal(_) => true, + Expr::Column { .. } => true, + Expr::Binary(lhs, _, rhs) => can_pushdown_predicate(lhs) && can_pushdown_predicate(rhs), + Expr::Parenthesized(exprs) => can_pushdown_predicate(exprs.first().unwrap()), + Expr::Unary(_, expr) => can_pushdown_predicate(expr), + Expr::FunctionCall { args, name, .. } => { + let function = crate::function::Func::resolve_function( + &name.0, + args.as_ref().map_or(0, |a| a.len()), + ); + matches!(function, Ok(Func::Scalar(_))) + } + Expr::Like { lhs, rhs, .. } => can_pushdown_predicate(lhs) && can_pushdown_predicate(rhs), + Expr::Between { + lhs, start, end, .. + } => { + can_pushdown_predicate(lhs) + && can_pushdown_predicate(start) + && can_pushdown_predicate(end) + } + Expr::Id(_) => true, + Expr::Name(_) => true, + Expr::Qualified(_, _) => true, + Expr::DoublyQualified(_, _, _) => true, + Expr::InTable { lhs, .. } => can_pushdown_predicate(lhs), + _ => false, + } +} + +fn is_deterministic(func: &Func) -> bool { + matches!(func, Func::Scalar(_)) +} + #[derive(Debug, Default, PartialEq)] pub struct OpenOptions<'a> { /// The authority component of the URI. may be 'localhost' or empty From de27c2fe4c73839c882e557efbe0ffd801cfd3f3 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sun, 6 Apr 2025 17:31:15 -0400 Subject: [PATCH 281/425] Properly handle pushing predicates for query optimization from xBestIndex --- core/translate/emitter.rs | 2 + core/translate/main_loop.rs | 89 ++++++++++++++++++++++++++++-------- core/translate/plan.rs | 57 ++++++++++------------- core/translate/subquery.rs | 1 + extensions/core/src/vtabs.rs | 1 + extensions/series/src/lib.rs | 8 ++-- extensions/tests/src/lib.rs | 75 ++++++++++++++++++++---------- 7 files changed, 155 insertions(+), 78 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index e2914bbd0..0bc54bb9a 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -90,6 +90,7 @@ pub struct TranslateCtx<'a> { // This vector holds the indexes of the result columns that we need to skip. pub result_columns_to_skip_in_orderby_sorter: Option>, pub resolver: Resolver<'a>, + pub omit_predicates: Vec, } /// Used to distinguish database operations @@ -132,6 +133,7 @@ fn prologue<'a>( result_column_indexes_in_orderby_sorter: (0..result_column_count).collect(), result_columns_to_skip_in_orderby_sorter: None, resolver: Resolver::new(syms), + omit_predicates: Vec::new(), }; Ok((t_ctx, init_label, start_offset)) diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 877f55f75..b3c5d6457 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -9,8 +9,10 @@ use crate::{ }, Result, }; +use limbo_ext::OrderByInfo; use limbo_ext::{ConstraintInfo, OrderByInfo}; use limbo_sqlite3_parser::ast; +use limbo_sqlite3_parser::ast; use super::{ aggregation::translate_aggregation_step, @@ -289,13 +291,18 @@ pub fn open_loop( pc_if_empty: loop_end, }); } - } else if let Some(vtab) = table.table.virtual_table() { - let constraints: Vec = predicates + } else if let Some(vtab) = table.virtual_table() { + // Collect usable constraints and track which predicate each came from + let converted_constraints = predicates .iter() - .filter(|p| p.applies_to_table(&table.table, tables)) - .filter_map(|p| try_convert_to_constraint_info(p, table_index)) - .collect(); - + .enumerate() + .filter_map(|(i, pred)| { + try_convert_to_constraint_info(pred, table_index, i) + .map(|c| (c, &predicates[i])) + }) + .collect::>(); + let constraints: Vec<_> = + converted_constraints.iter().map(|(c, _)| *c).collect(); let order_by = vec![OrderByInfo { column_index: *t_ctx .result_column_indexes_in_orderby_sorter @@ -304,18 +311,43 @@ pub fn open_loop( desc: matches!(iter_dir, IterationDirection::Backwards), }]; let index_info = vtab.best_index(&constraints, &order_by); - let start_reg = - program.alloc_registers(vtab.args.as_ref().map(|a| a.len()).unwrap_or(0)); - let mut cur_reg = start_reg; - let args = match vtab.args.as_ref() { - Some(args) => args, - None => &vec![], - }; - for arg in args { - let reg = cur_reg; - cur_reg += 1; - let _ = translate_expr(program, Some(tables), arg, reg, &t_ctx.resolver)?; + + // Translate arguments to pass into VFilter + let args_needed = index_info + .constraint_usages + .iter() + .filter(|u| u.argv_index.is_some()) + .count(); + let start_reg = program.alloc_registers(args_needed); + let mut arg_regs = vec![]; + + for (i, usage) in index_info.constraint_usages.iter().enumerate() { + if let Some(argv_index) = usage.argv_index { + let (_, pred) = &converted_constraints[i]; + // this is the literal side of the expression (col = 'literal') + let ast::Expr::Binary(lhs, _, rhs) = &pred.expr else { + continue; + }; + + let literal_expr = match (&**lhs, &**rhs) { + (ast::Expr::Column { .. }, rhs) => rhs, + (lhs, ast::Expr::Column { .. }) => lhs, + _ => continue, + }; + + let target_reg = start_reg + (argv_index - 1) as usize; + translate_expr( + program, + Some(tables), + literal_expr, + target_reg, + &t_ctx.resolver, + )?; + arg_regs.push(target_reg); + } } + + // Encode idx_str to pass to VFilter let mut maybe_idx_str_reg = None; if let Some(idx_str) = index_info.idx_str { let reg = program.alloc_register(); @@ -328,11 +360,32 @@ pub fn open_loop( program.emit_insn(Insn::VFilter { cursor_id, pc_if_empty: loop_end, - arg_count: vtab.args.as_ref().map_or(0, |args| args.len()), + arg_count: args_needed, args_reg: start_reg, idx_str: maybe_idx_str_reg, idx_num: index_info.idx_num as usize, }); + + // Remove predicates omitted by best_index + let omit_predicates: Vec = predicates + .iter() + .enumerate() + .filter(|(i, _)| { + !index_info + .constraint_usages + .iter() + .enumerate() + .any(|(j, usage)| { + usage.argv_index.is_some() + && !usage.omit + && constraints.get(j).map_or(false, |c| c.pred_idx == *i) + }) + }) + .map(|(i, _)| i) + .collect(); + t_ctx + .omit_predicates + .extend_from_slice(&omit_predicates[..]); } program.resolve_label(loop_start, program.offset()); diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 9c535c57a..faec051f1 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -1,5 +1,5 @@ use core::fmt; -use limbo_ext::{ConstraintInfo, ConstraintOp, IndexInfo}; +use limbo_ext::{ConstraintInfo, ConstraintOp}; use limbo_sqlite3_parser::ast::{self, SortOrder}; use std::{ cmp::Ordering, @@ -17,6 +17,7 @@ use crate::{ use crate::{ schema::{PseudoTable, Type}, types::SeekOp, + util::can_pushdown_predicate, }; #[derive(Debug, Clone)] @@ -74,24 +75,10 @@ impl WhereTerm { pub fn should_eval_at_loop(&self, loop_idx: usize) -> bool { self.eval_at == EvalAt::Loop(loop_idx) } - - pub fn applies_to_table(&self, table: &Table, tables: &[TableReference]) -> bool { - match &self.expr { - ast::Expr::Column { - table: table_idx, .. - } => { - let table_ref = &tables[*table_idx]; - table_ref.table == *table - } - _ => false, - } - } } use crate::ast::{Expr, Operator}; -use super::optimizer::{ConstantPredicate, Optimizable}; - fn reverse_operator(op: &Operator) -> Option { match op { Operator::Equals => Some(Operator::Equals), @@ -106,9 +93,19 @@ fn reverse_operator(op: &Operator) -> Option { } } +/// This function takes a WhereTerm for a select involving a VTab at index 'table_index'. +/// It determines whether or not it involves the given table and whether or not it can +/// be converted into a ConstraintInfo which can be passed to the vtab module's xBestIndex +/// method, which will possibly calculate some information to improve the query plan, that we can send +/// back to it as arguments for the VFilter operation. Perhaps we should save the exact Expr for which a relevant column +/// is going to be filtered against: e.g: +/// 'SELECT key, value FROM vtab WHERE key = 'some_key'; +/// we need to send the OwnedValue('some_key') as an argument to VFilter, and possibly omit it from +/// the filtration in the vdbe layer. pub fn try_convert_to_constraint_info( term: &WhereTerm, table_index: usize, + pred_idx: usize, ) -> Option { if term.from_outer_join { return None; @@ -119,32 +116,27 @@ pub fn try_convert_to_constraint_info( }; let (col_expr, _, op) = match (&**lhs, &**rhs) { - (Expr::Column { .. }, rhs) - if rhs.check_constant().ok()? == Some(ConstantPredicate::AlwaysTrue) => - { + (Expr::Column { table, .. }, rhs) if can_pushdown_predicate(rhs) => { + if table != &table_index { + return None; + } (lhs, rhs, op) } - (lhs, Expr::Column { .. }) - if lhs.check_constant().ok()? == Some(ConstantPredicate::AlwaysTrue) => - { + (lhs, Expr::Column { table, .. }) if can_pushdown_predicate(lhs) => { + if table != &table_index { + return None; + } (rhs, lhs, &reverse_operator(op).unwrap_or(*op)) } - _ => return None, + _ => { + return None; + } }; - let Expr::Column { - table: tbl_idx, - column, - .. - } = **col_expr - else { + let Expr::Column { column, .. } = **col_expr else { return None; }; - if tbl_idx != table_index { - return None; - } - let column_index = column as u32; let constraint_op = match op { Operator::Equals => ConstraintOp::Eq, @@ -162,6 +154,7 @@ pub fn try_convert_to_constraint_info( column_index, op: constraint_op, usable: true, + pred_idx, }) } /// The loop index where to evaluate the condition. diff --git a/core/translate/subquery.rs b/core/translate/subquery.rs index 87ddddd63..71cb72348 100644 --- a/core/translate/subquery.rs +++ b/core/translate/subquery.rs @@ -83,6 +83,7 @@ pub fn emit_subquery<'a>( reg_offset: plan.offset.map(|_| program.alloc_register()), reg_limit_offset_sum: plan.offset.map(|_| program.alloc_register()), resolver: Resolver::new(t_ctx.resolver.symbol_table), + omit_predicates: Vec::new(), }; let subquery_body_end_label = program.allocate_label(); program.emit_insn(Insn::InitCoroutine { diff --git a/extensions/core/src/vtabs.rs b/extensions/core/src/vtabs.rs index cf89b1fd1..ae3fc7429 100644 --- a/extensions/core/src/vtabs.rs +++ b/extensions/core/src/vtabs.rs @@ -268,4 +268,5 @@ pub struct ConstraintInfo { pub column_index: u32, pub op: ConstraintOp, pub usable: bool, + pub pred_idx: usize, } diff --git a/extensions/series/src/lib.rs b/extensions/series/src/lib.rs index 5f833a607..21d3a89fa 100644 --- a/extensions/series/src/lib.rs +++ b/extensions/series/src/lib.rs @@ -240,7 +240,7 @@ mod tests { ]; // Initialize cursor through filter - match GenerateSeriesVTab::filter(&mut cursor, &args) { + match GenerateSeriesVTab::filter(&mut cursor, &args, None) { ResultCode::OK => (), ResultCode::EOF => return Ok(vec![]), err => return Err(err), @@ -293,7 +293,7 @@ mod tests { let expected_len = series_expected_length(&series); assert_eq!( values.len(), - expected_len as usize, + expected_len, "Series length mismatch for start={}, stop={}, step={}: expected {}, got {}, values: {:?}", start, stop, @@ -546,7 +546,7 @@ mod tests { let start = series.start; let stop = series.stop; let step = series.step; - let tbl = GenerateSeriesVTab::default(); + let tbl = GenerateSeriesVTab {}; let mut cursor = tbl.open().unwrap(); let args = vec![ @@ -556,7 +556,7 @@ mod tests { ]; // Initialize cursor through filter - GenerateSeriesVTab::filter(&mut cursor, &args); + GenerateSeriesVTab::filter(&mut cursor, &args, None); let mut rowids = vec![]; while !GenerateSeriesVTab::eof(&cursor) { diff --git a/extensions/tests/src/lib.rs b/extensions/tests/src/lib.rs index c10a40f58..7592355ed 100644 --- a/extensions/tests/src/lib.rs +++ b/extensions/tests/src/lib.rs @@ -1,7 +1,7 @@ use lazy_static::lazy_static; use limbo_ext::{ - register_extension, scalar, ConstraintInfo, ConstraintOp, ExtResult, IndexInfo, OrderByInfo, - ResultCode, VTabCursor, VTabKind, VTabModule, VTabModuleDerive, Value, + register_extension, scalar, ConstraintInfo, ConstraintOp, ConstraintUsage, ExtResult, + IndexInfo, OrderByInfo, ResultCode, VTabCursor, VTabKind, VTabModule, VTabModuleDerive, Value, }; #[cfg(not(target_family = "wasm"))] use limbo_ext::{VfsDerive, VfsExtension, VfsFile}; @@ -40,6 +40,7 @@ impl VTabModule for KVStoreVTab { } fn open(&self) -> Result { + let _ = env_logger::try_init(); Ok(KVStoreCursor { rows: Vec::new(), index: None, @@ -47,25 +48,29 @@ impl VTabModule for KVStoreVTab { } fn best_index(constraints: &[ConstraintInfo], _order_by: &[OrderByInfo]) -> IndexInfo { - // not exactly the ideal kind of table to demonstrate this on... - for constraint in constraints { - println!("constraint: {:?}", constraint); + // Look for: key = ? + for constraint in constraints.iter() { if constraint.usable && constraint.op == ConstraintOp::Eq && constraint.column_index == 0 { - // key = ? is supported + log::debug!("xBestIndex: constraint found for 'key = ?'"); return IndexInfo { - idx_num: 1, // arbitrary non-zero code to signify optimization + idx_num: 1, idx_str: Some("key_eq".to_string()), order_by_consumed: false, estimated_cost: 10.0, - ..Default::default() + estimated_rows: 4, + constraint_usages: vec![ConstraintUsage { + omit: true, + argv_index: Some(1), + }], }; } } // fallback: full scan + log::debug!("No usable constraints found, using full scan"); IndexInfo { idx_num: -1, idx_str: None, @@ -77,23 +82,45 @@ impl VTabModule for KVStoreVTab { fn filter( cursor: &mut Self::VCursor, - _args: &[Value], - _idx_str: Option<(&str, i32)>, + args: &[Value], + idx_str: Option<(&str, i32)>, ) -> ResultCode { - let store = GLOBAL_STORE.lock().unwrap(); - cursor.rows = store - .iter() - .map(|(&rowid, (k, v))| (rowid, k.clone(), v.clone())) - .collect(); - cursor.rows.sort_by_key(|(rowid, _, _)| *rowid); - - if cursor.rows.is_empty() { - cursor.index = None; - return ResultCode::EOF; - } else { - cursor.index = Some(0); + match idx_str { + Some(("key_eq", 1)) => { + let key = args + .first() + .and_then(|v| v.to_text()) + .map(|s| s.to_string()); + log::debug!("idx_str found: key_eq\n value: {:?}", key); + if let Some(key) = key { + let rowid = hash_key(&key); + let store = GLOBAL_STORE.lock().unwrap(); + if let Some((k, v)) = store.get(&rowid) { + cursor.rows.push((rowid, k.clone(), v.clone())); + cursor.index = Some(0); + } else { + cursor.index = None; + } + return ResultCode::OK; + } + cursor.index = None; + ResultCode::OK + } + _ => { + let store = GLOBAL_STORE.lock().unwrap(); + cursor.rows = store + .iter() + .map(|(&rowid, (k, v))| (rowid, k.clone(), v.clone())) + .collect(); + cursor.rows.sort_by_key(|(rowid, _, _)| *rowid); + cursor.index = if cursor.rows.is_empty() { + None + } else { + Some(0) + }; + ResultCode::OK + } } - ResultCode::OK } fn insert(&mut self, values: &[Value]) -> Result { @@ -152,7 +179,7 @@ impl VTabModule for KVStoreVTab { _ => Err("Invalid column".into()), } } else { - Err("cursor out of range".into()) + Err("Invalid Column".into()) } } From 6f2c6c6a61138d38f4686d341cf01bdd798f91da Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sun, 6 Apr 2025 19:30:00 -0400 Subject: [PATCH 282/425] Actually skip omitted predicates in open loop --- core/translate/main_loop.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index b3c5d6457..b9b857eee 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -396,10 +396,14 @@ pub fn open_loop( }); } - for cond in predicates + for (i, cond) in predicates .iter() - .filter(|cond| cond.should_eval_at_loop(table_index)) + .enumerate() + .filter(|(_, cond)| cond.should_eval_at_loop(table_index)) { + if t_ctx.omit_predicates.contains(&i) { + continue; + } let jump_target_when_true = program.allocate_label(); let condition_metadata = ConditionMetadata { jump_if_condition_is_true: false, From 7d271edf8a83dfec6866f8fadd1af2c8771dfb9a Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sun, 6 Apr 2025 19:33:15 -0400 Subject: [PATCH 283/425] Remove unused function in core/util.rs --- core/util.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/core/util.rs b/core/util.rs index 406d7059f..8ab86c70a 100644 --- a/core/util.rs +++ b/core/util.rs @@ -578,6 +578,7 @@ pub fn can_pushdown_predicate(expr: &Expr) -> bool { &name.0, args.as_ref().map_or(0, |a| a.len()), ); + // is deterministic matches!(function, Ok(Func::Scalar(_))) } Expr::Like { lhs, rhs, .. } => can_pushdown_predicate(lhs) && can_pushdown_predicate(rhs), @@ -597,10 +598,6 @@ pub fn can_pushdown_predicate(expr: &Expr) -> bool { } } -fn is_deterministic(func: &Func) -> bool { - matches!(func, Func::Scalar(_)) -} - #[derive(Debug, Default, PartialEq)] pub struct OpenOptions<'a> { /// The authority component of the URI. may be 'localhost' or empty From 528a9b6c7e343e122371d0d73b9218a3d2b18601 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sun, 6 Apr 2025 23:44:19 -0400 Subject: [PATCH 284/425] Clean up allocations in main loop and fix ext tests --- core/translate/main_loop.rs | 208 +++++++++++++++++++----------------- extensions/tests/src/lib.rs | 15 ++- 2 files changed, 119 insertions(+), 104 deletions(-) diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index b9b857eee..00c943b9b 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -1,3 +1,6 @@ +use limbo_ext::{OrderByInfo, VTabKind}; +use limbo_sqlite3_parser::ast; + use crate::{ schema::Table, translate::result_row::emit_select_result, @@ -9,10 +12,6 @@ use crate::{ }, Result, }; -use limbo_ext::OrderByInfo; -use limbo_ext::{ConstraintInfo, OrderByInfo}; -use limbo_sqlite3_parser::ast; -use limbo_sqlite3_parser::ast; use super::{ aggregation::translate_aggregation_step, @@ -293,99 +292,115 @@ pub fn open_loop( } } else if let Some(vtab) = table.virtual_table() { // Collect usable constraints and track which predicate each came from - let converted_constraints = predicates - .iter() - .enumerate() - .filter_map(|(i, pred)| { - try_convert_to_constraint_info(pred, table_index, i) - .map(|c| (c, &predicates[i])) - }) - .collect::>(); - let constraints: Vec<_> = - converted_constraints.iter().map(|(c, _)| *c).collect(); - let order_by = vec![OrderByInfo { - column_index: *t_ctx - .result_column_indexes_in_orderby_sorter - .first() - .unwrap_or(&0) as u32, - desc: matches!(iter_dir, IterationDirection::Backwards), - }]; - let index_info = vtab.best_index(&constraints, &order_by); - - // Translate arguments to pass into VFilter - let args_needed = index_info - .constraint_usages - .iter() - .filter(|u| u.argv_index.is_some()) - .count(); - let start_reg = program.alloc_registers(args_needed); - let mut arg_regs = vec![]; - - for (i, usage) in index_info.constraint_usages.iter().enumerate() { - if let Some(argv_index) = usage.argv_index { - let (_, pred) = &converted_constraints[i]; - // this is the literal side of the expression (col = 'literal') - let ast::Expr::Binary(lhs, _, rhs) = &pred.expr else { - continue; - }; - - let literal_expr = match (&**lhs, &**rhs) { - (ast::Expr::Column { .. }, rhs) => rhs, - (lhs, ast::Expr::Column { .. }) => lhs, - _ => continue, - }; - - let target_reg = start_reg + (argv_index - 1) as usize; - translate_expr( - program, - Some(tables), - literal_expr, - target_reg, - &t_ctx.resolver, - )?; - arg_regs.push(target_reg); + // Virtual tables may be used either as VTab or TVF, distinguished by vtab.name. + let (start_reg, count, maybe_idx_str, maybe_idx_int) = if vtab + .kind + .eq(&VTabKind::VirtualTable) + { + // Build converted constraints from the predicates. + let mut converted_constraints = Vec::with_capacity(predicates.len()); + for (i, pred) in predicates.iter().enumerate() { + if let Some(cinfo) = + try_convert_to_constraint_info(pred, table_index, i) + { + converted_constraints.push((cinfo, pred)); + } } - } + let constraints: Vec<_> = + converted_constraints.iter().map(|(c, _)| *c).collect(); + let order_by = vec![OrderByInfo { + column_index: *t_ctx + .result_column_indexes_in_orderby_sorter + .first() + .unwrap_or(&0) as u32, + desc: matches!(iter_dir, IterationDirection::Backwards), + }]; + let index_info = vtab.best_index(&constraints, &order_by); - // Encode idx_str to pass to VFilter - let mut maybe_idx_str_reg = None; - if let Some(idx_str) = index_info.idx_str { - let reg = program.alloc_register(); - program.emit_insn(Insn::String8 { - dest: reg, - value: idx_str, - }); - maybe_idx_str_reg = Some(reg); - } + // Determine the number of VFilter arguments (constraints with an argv_index). + let args_needed = index_info + .constraint_usages + .iter() + .filter(|u| u.argv_index.is_some()) + .count(); + let start_reg = program.alloc_registers(args_needed); + + // For each constraint used by best_index, translate the opposite side. + for (i, usage) in index_info.constraint_usages.iter().enumerate() { + if let Some(argv_index) = usage.argv_index { + if let Some((_, pred)) = converted_constraints.get(i) { + if let ast::Expr::Binary(lhs, _, rhs) = &pred.expr { + let literal_expr = match (&**lhs, &**rhs) { + (ast::Expr::Column { .. }, lit) => lit, + (lit, ast::Expr::Column { .. }) => lit, + _ => continue, + }; + // argv_index is 1-based; adjust to get the proper register offset. + let target_reg = start_reg + (argv_index - 1) as usize; + translate_expr( + program, + Some(tables), + literal_expr, + target_reg, + &t_ctx.resolver, + )?; + } + } + } + } + // If best_index provided an idx_str, translate it. + let maybe_idx_str = if let Some(idx_str) = index_info.idx_str { + let reg = program.alloc_register(); + program.emit_insn(Insn::String8 { + dest: reg, + value: idx_str, + }); + Some(reg) + } else { + None + }; + + // Record (in t_ctx) the indices of predicates that best_index tells us to omit. + // Here we insert directly into t_ctx.omit_predicates + for (j, usage) in index_info.constraint_usages.iter().enumerate() { + if usage.argv_index.is_some() && usage.omit { + if let Some(constraint) = constraints.get(j) { + t_ctx.omit_predicates.push(constraint.pred_idx); + } + } + } + ( + start_reg, + args_needed, + maybe_idx_str, + Some(index_info.idx_num), + ) + } else { + // For table-valued functions: translate the table args. + let args = match vtab.args.as_ref() { + Some(args) => args, + None => &vec![], + }; + let start_reg = program.alloc_registers(args.len()); + let mut cur_reg = start_reg; + for arg in args { + let reg = cur_reg; + cur_reg += 1; + let _ = + translate_expr(program, Some(tables), arg, reg, &t_ctx.resolver)?; + } + (start_reg, args.len(), None, None) + }; + + // Emit VFilter with the computed arguments. program.emit_insn(Insn::VFilter { cursor_id, - pc_if_empty: loop_end, - arg_count: args_needed, + arg_count: count, args_reg: start_reg, - idx_str: maybe_idx_str_reg, - idx_num: index_info.idx_num as usize, + idx_str: maybe_idx_str, + idx_num: maybe_idx_int.unwrap_or(0) as usize, + pc_if_empty: loop_end, }); - - // Remove predicates omitted by best_index - let omit_predicates: Vec = predicates - .iter() - .enumerate() - .filter(|(i, _)| { - !index_info - .constraint_usages - .iter() - .enumerate() - .any(|(j, usage)| { - usage.argv_index.is_some() - && !usage.omit - && constraints.get(j).map_or(false, |c| c.pred_idx == *i) - }) - }) - .map(|(i, _)| i) - .collect(); - t_ctx - .omit_predicates - .extend_from_slice(&omit_predicates[..]); } program.resolve_label(loop_start, program.offset()); @@ -396,14 +411,9 @@ pub fn open_loop( }); } - for (i, cond) in predicates - .iter() - .enumerate() - .filter(|(_, cond)| cond.should_eval_at_loop(table_index)) - { - if t_ctx.omit_predicates.contains(&i) { - continue; - } + for (_, cond) in predicates.iter().enumerate().filter(|(i, cond)| { + cond.should_eval_at_loop(table_index) && !t_ctx.omit_predicates.contains(i) + }) { let jump_target_when_true = program.allocate_label(); let condition_metadata = ConditionMetadata { jump_if_condition_is_true: false, diff --git a/extensions/tests/src/lib.rs b/extensions/tests/src/lib.rs index 7592355ed..a574febfb 100644 --- a/extensions/tests/src/lib.rs +++ b/extensions/tests/src/lib.rs @@ -99,10 +99,13 @@ impl VTabModule for KVStoreVTab { cursor.rows.push((rowid, k.clone(), v.clone())); cursor.index = Some(0); } else { + cursor.rows.clear(); cursor.index = None; + return ResultCode::EOF; } return ResultCode::OK; } + cursor.rows.clear(); cursor.index = None; ResultCode::OK } @@ -113,12 +116,13 @@ impl VTabModule for KVStoreVTab { .map(|(&rowid, (k, v))| (rowid, k.clone(), v.clone())) .collect(); cursor.rows.sort_by_key(|(rowid, _, _)| *rowid); - cursor.index = if cursor.rows.is_empty() { - None + if cursor.rows.is_empty() { + cursor.index = None; + ResultCode::EOF } else { - Some(0) - }; - ResultCode::OK + cursor.index = Some(0); + ResultCode::OK + } } } } @@ -156,6 +160,7 @@ impl VTabModule for KVStoreVTab { let _ = self.insert(values)?; Ok(()) } + fn eof(cursor: &Self::VCursor) -> bool { cursor.index.is_some_and(|s| s >= cursor.rows.len()) || cursor.index.is_none() } From e17fd7edc40cf2faa0bf618da28e9a088093410d Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Wed, 9 Apr 2025 11:06:26 -0400 Subject: [PATCH 285/425] Add comments and address PR review --- core/translate/main_loop.rs | 11 +++++------ core/translate/plan.rs | 8 +++++++- core/util.rs | 8 ++------ extensions/core/src/vtabs.rs | 26 ++++++++++++++++++++++++-- 4 files changed, 38 insertions(+), 15 deletions(-) diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 00c943b9b..86ce7415f 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -291,8 +291,7 @@ pub fn open_loop( }); } } else if let Some(vtab) = table.virtual_table() { - // Collect usable constraints and track which predicate each came from - // Virtual tables may be used either as VTab or TVF, distinguished by vtab.name. + // Virtual tables may be used either as VTab or TVF let (start_reg, count, maybe_idx_str, maybe_idx_int) = if vtab .kind .eq(&VTabKind::VirtualTable) @@ -308,13 +307,14 @@ pub fn open_loop( } let constraints: Vec<_> = converted_constraints.iter().map(|(c, _)| *c).collect(); - let order_by = vec![OrderByInfo { + let order_by = [OrderByInfo { column_index: *t_ctx .result_column_indexes_in_orderby_sorter .first() .unwrap_or(&0) as u32, desc: matches!(iter_dir, IterationDirection::Backwards), }]; + // Call xBestIndex method on the underlying vtable. let index_info = vtab.best_index(&constraints, &order_by); // Determine the number of VFilter arguments (constraints with an argv_index). @@ -330,7 +330,7 @@ pub fn open_loop( if let Some(argv_index) = usage.argv_index { if let Some((_, pred)) = converted_constraints.get(i) { if let ast::Expr::Binary(lhs, _, rhs) = &pred.expr { - let literal_expr = match (&**lhs, &**rhs) { + let expr = match (&**lhs, &**rhs) { (ast::Expr::Column { .. }, lit) => lit, (lit, ast::Expr::Column { .. }) => lit, _ => continue, @@ -340,7 +340,7 @@ pub fn open_loop( translate_expr( program, Some(tables), - literal_expr, + expr, target_reg, &t_ctx.resolver, )?; @@ -359,7 +359,6 @@ pub fn open_loop( } else { None }; - // Record (in t_ctx) the indices of predicates that best_index tells us to omit. // Here we insert directly into t_ctx.omit_predicates for (j, usage) in index_info.constraint_usages.iter().enumerate() { diff --git a/core/translate/plan.rs b/core/translate/plan.rs index faec051f1..3b60775a7 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -79,6 +79,10 @@ impl WhereTerm { use crate::ast::{Expr, Operator}; +// This function takes an operator and returns the operator you would obtain if the operands were swapped. +// e.g. "literal < column" +// which is not the canonical order for constraint pushdown. +// This function will return > so that the expression can be treated as if it were written "column > literal" fn reverse_operator(op: &Operator) -> Option { match op { Operator::Equals => Some(Operator::Equals), @@ -97,7 +101,7 @@ fn reverse_operator(op: &Operator) -> Option { /// It determines whether or not it involves the given table and whether or not it can /// be converted into a ConstraintInfo which can be passed to the vtab module's xBestIndex /// method, which will possibly calculate some information to improve the query plan, that we can send -/// back to it as arguments for the VFilter operation. Perhaps we should save the exact Expr for which a relevant column +/// back to it as arguments for the VFilter operation. /// is going to be filtered against: e.g: /// 'SELECT key, value FROM vtab WHERE key = 'some_key'; /// we need to send the OwnedValue('some_key') as an argument to VFilter, and possibly omit it from @@ -126,6 +130,8 @@ pub fn try_convert_to_constraint_info( if table != &table_index { return None; } + // if the column is on the rhs, swap the operands and possibly + // the operator if it's a logical comparison. (rhs, lhs, &reverse_operator(op).unwrap_or(*op)) } _ => { diff --git a/core/util.rs b/core/util.rs index 8ab86c70a..5d0010423 100644 --- a/core/util.rs +++ b/core/util.rs @@ -566,10 +566,11 @@ pub fn columns_from_create_table_body(body: &ast::CreateTableBody) -> crate::Res .collect::>()) } +/// This function checks if a given expression is a constant value that can be pushed down to the database engine. +/// It is expected to be called with the other half of a binary expression with an Expr::Column pub fn can_pushdown_predicate(expr: &Expr) -> bool { match expr { Expr::Literal(_) => true, - Expr::Column { .. } => true, Expr::Binary(lhs, _, rhs) => can_pushdown_predicate(lhs) && can_pushdown_predicate(rhs), Expr::Parenthesized(exprs) => can_pushdown_predicate(exprs.first().unwrap()), Expr::Unary(_, expr) => can_pushdown_predicate(expr), @@ -589,11 +590,6 @@ pub fn can_pushdown_predicate(expr: &Expr) -> bool { && can_pushdown_predicate(start) && can_pushdown_predicate(end) } - Expr::Id(_) => true, - Expr::Name(_) => true, - Expr::Qualified(_, _) => true, - Expr::DoublyQualified(_, _, _) => true, - Expr::InTable { lhs, .. } => can_pushdown_predicate(lhs), _ => false, } } diff --git a/extensions/core/src/vtabs.rs b/extensions/core/src/vtabs.rs index ae3fc7429..c2f6620d1 100644 --- a/extensions/core/src/vtabs.rs +++ b/extensions/core/src/vtabs.rs @@ -158,19 +158,30 @@ pub enum ConstraintOp { #[repr(C)] #[derive(Copy, Clone)] +/// Describes an ORDER BY clause in a query involving a virtual table. +/// Passed along with the constraints to xBestIndex. pub struct OrderByInfo { + /// The index of the column referenced in the ORDER BY clause. pub column_index: u32, + /// Whether or not the clause is in descending order. pub desc: bool, } +/// The internal (core) representation of an 'index' on a virtual table. +/// Returned from xBestIndex and then processed and passed to VFilter. #[derive(Debug, Clone)] pub struct IndexInfo { + /// The index number, used to identify the index internally by the VTab pub idx_num: i32, + /// Optional index name. these are passed to vfilter in a tuple (idx_num, idx_str) pub idx_str: Option, + /// Whether the index is used for order by pub order_by_consumed: bool, /// TODO: for eventual cost based query planning pub estimated_cost: f64, + /// Estimated number of rows that the query will return pub estimated_rows: u32, + /// List of constraints that can be used to optimize the query. pub constraint_usages: Vec, } impl Default for IndexInfo { @@ -245,6 +256,7 @@ impl IndexInfo { #[repr(C)] #[derive(Clone, Debug)] +/// FFI representation of IndexInfo. pub struct ExtIndexInfo { pub idx_num: i32, pub idx_str: *const u8, @@ -256,17 +268,27 @@ pub struct ExtIndexInfo { pub constraint_usage_len: usize, } +/// Returned from xBestIndex to describe how the virtual table +/// can use the constraints in the WHERE clause of a query. #[derive(Debug, Clone, Copy)] pub struct ConstraintUsage { - pub argv_index: Option, // 1-based index into VFilter args - pub omit: bool, // if true, core skips checking it again + /// 1 based index of the argument in the WHERE clause. + pub argv_index: Option, + /// If true, core can omit this constraint in the vdbe layer. + pub omit: bool, } #[derive(Clone, Copy, Debug)] #[repr(C)] +/// The primary argument to xBestIndex, which describes a constraint +/// in a query involving a virtual table. pub struct ConstraintInfo { + /// The index of the column referenced in the WHERE clause. pub column_index: u32, + /// The operator used in the clause. pub op: ConstraintOp, + /// Whether or not constraint is garaunteed to be enforced. pub usable: bool, + /// pub pred_idx: usize, } From d53c60e0719775f07b025eee5e52caab7efa7332 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Thu, 17 Apr 2025 13:11:21 -0400 Subject: [PATCH 286/425] Prevent double allocations for VFilter args in vdbe --- core/lib.rs | 13 +++++-------- core/vdbe/execute.rs | 9 +++++++-- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/core/lib.rs b/core/lib.rs index 68384e77d..e130306f7 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -705,19 +705,16 @@ impl VirtualTable { VTabOpaqueCursor::new(cursor) } + #[tracing::instrument(skip(cursor))] pub fn filter( &self, cursor: &VTabOpaqueCursor, idx_num: i32, idx_str: Option, arg_count: usize, - args: Vec, + args: Vec, ) -> Result { - let mut filter_args = Vec::with_capacity(arg_count); - for i in 0..arg_count { - let ownedvalue_arg = args.get(i).unwrap(); - filter_args.push(ownedvalue_arg.to_ffi()); - } + tracing::trace!("xFilter"); let c_idx_str = idx_str .map(|s| std::ffi::CString::new(s).unwrap()) .map(|cstr| cstr.into_raw()) @@ -726,12 +723,12 @@ impl VirtualTable { (self.implementation.filter)( cursor.as_ptr(), arg_count as i32, - filter_args.as_ptr(), + args.as_ptr(), c_idx_str, idx_num, ) }; - for arg in filter_args { + for arg in args { unsafe { arg.__free_internal_type(); } diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 9abc7c6c3..de871f54c 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -979,9 +979,14 @@ pub fn op_vfilter( let has_rows = { let mut cursor = state.get_cursor(*cursor_id); let cursor = cursor.as_virtual_mut(); - let mut args = Vec::new(); + let mut args = Vec::with_capacity(*arg_count); for i in 0..*arg_count { - args.push(state.registers[args_reg + i].get_owned_value().clone()); + args.push( + state.registers[args_reg + i] + .get_owned_value() + .clone() + .to_ffi(), + ); } let idx_str = if let Some(idx_str) = idx_str { Some(state.registers[*idx_str].get_owned_value().to_string()) From 95a2fdc096c0e9d7c9722b4fff13e7e0b7c9e509 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Thu, 17 Apr 2025 13:12:07 -0400 Subject: [PATCH 287/425] Fix array from ptr in bestindex ffi method in proc macro --- extensions/tests/src/lib.rs | 10 +++++++++- macros/src/lib.rs | 4 ++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/extensions/tests/src/lib.rs b/extensions/tests/src/lib.rs index a574febfb..5c6495595 100644 --- a/extensions/tests/src/lib.rs +++ b/extensions/tests/src/lib.rs @@ -54,11 +54,19 @@ impl VTabModule for KVStoreVTab { && constraint.op == ConstraintOp::Eq && constraint.column_index == 0 { + // this extension wouldn't support order by but for testing purposes, + // we will consume it if we find an ASC order by clause on the value column + let mut consumed = false; + if let Some(order) = _order_by.first() { + if order.column_index == 1 && !order.desc { + consumed = true; + } + } log::debug!("xBestIndex: constraint found for 'key = ?'"); return IndexInfo { idx_num: 1, idx_str: Some("key_eq".to_string()), - order_by_consumed: false, + order_by_consumed: consumed, estimated_cost: 10.0, estimated_rows: 4, constraint_usages: vec![ConstraintUsage { diff --git a/macros/src/lib.rs b/macros/src/lib.rs index acb969876..d47101589 100644 --- a/macros/src/lib.rs +++ b/macros/src/lib.rs @@ -628,8 +628,8 @@ pub fn derive_vtab_module(input: TokenStream) -> TokenStream { order_by: *const ::limbo_ext::OrderByInfo, n_order_by: i32, ) -> ::limbo_ext::ExtIndexInfo { - let constraints = std::slice::from_raw_parts(constraints, n_constraints as usize); - let order_by = std::slice::from_raw_parts(order_by, n_order_by as usize); + let constraints = if n_constraints > 0 { std::slice::from_raw_parts(constraints, n_constraints as usize) } else { &[] }; + let order_by = if n_order_by > 0 { std::slice::from_raw_parts(order_by, n_order_by as usize) } else { &[] }; <#struct_name as ::limbo_ext::VTabModule>::best_index(constraints, order_by).to_ffi() } From 245e7f94f6bd3238de4df9a14549bd09e2a0a26d Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Thu, 17 Apr 2025 13:14:04 -0400 Subject: [PATCH 288/425] Store packed field on ConstraintInfo to optimize planning for vfilter --- extensions/core/src/vtabs.rs | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/extensions/core/src/vtabs.rs b/extensions/core/src/vtabs.rs index c2f6620d1..5d86457f7 100644 --- a/extensions/core/src/vtabs.rs +++ b/extensions/core/src/vtabs.rs @@ -272,7 +272,7 @@ pub struct ExtIndexInfo { /// can use the constraints in the WHERE clause of a query. #[derive(Debug, Clone, Copy)] pub struct ConstraintUsage { - /// 1 based index of the argument in the WHERE clause. + /// 1 based index of the argument passed pub argv_index: Option, /// If true, core can omit this constraint in the vdbe layer. pub omit: bool, @@ -289,6 +289,18 @@ pub struct ConstraintInfo { pub op: ConstraintOp, /// Whether or not constraint is garaunteed to be enforced. pub usable: bool, - /// - pub pred_idx: usize, + /// packed integer with the index of the constraint in the planner, + /// and the side of the binary expr that the relevant column is on. + pub plan_info: u32, +} + +impl ConstraintInfo { + #[inline(always)] + pub fn pack_plan_info(pred_idx: u32, is_right_side: bool) -> u32 { + ((pred_idx) << 1) | (is_right_side as u32) + } + #[inline(always)] + pub fn unpack_plan_info(&self) -> (usize, bool) { + ((self.plan_info >> 1) as usize, (self.plan_info & 1) != 0) + } } From a25a02efe1a62916b0de618ac55774c885156bc2 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Thu, 17 Apr 2025 13:15:20 -0400 Subject: [PATCH 289/425] Improve xBestIndex call site and allow for proper handling of join and where constraints --- core/translate/main_loop.rs | 73 +++++++++++++--------------- core/translate/plan.rs | 96 +++++++++++++++++++++++-------------- core/util.rs | 21 ++++---- 3 files changed, 106 insertions(+), 84 deletions(-) diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 86ce7415f..f9ba9ca97 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -21,8 +21,8 @@ use super::{ optimizer::Optimizable, order_by::{order_by_sorter_insert, sorter_insert}, plan::{ - try_convert_to_constraint_info, IterationDirection, Operation, Search, SeekDef, SelectPlan, - SelectQueryType, TableReference, WhereTerm, + convert_where_to_vtab_constraint, IterationDirection, Operation, Search, SeekDef, + SelectPlan, SelectQueryType, TableReference, WhereTerm, }, }; @@ -291,31 +291,33 @@ pub fn open_loop( }); } } else if let Some(vtab) = table.virtual_table() { - // Virtual tables may be used either as VTab or TVF let (start_reg, count, maybe_idx_str, maybe_idx_int) = if vtab .kind .eq(&VTabKind::VirtualTable) { - // Build converted constraints from the predicates. - let mut converted_constraints = Vec::with_capacity(predicates.len()); - for (i, pred) in predicates.iter().enumerate() { - if let Some(cinfo) = - try_convert_to_constraint_info(pred, table_index, i) - { - converted_constraints.push((cinfo, pred)); - } - } - let constraints: Vec<_> = - converted_constraints.iter().map(|(c, _)| *c).collect(); - let order_by = [OrderByInfo { - column_index: *t_ctx - .result_column_indexes_in_orderby_sorter - .first() - .unwrap_or(&0) as u32, - desc: matches!(iter_dir, IterationDirection::Backwards), - }]; - // Call xBestIndex method on the underlying vtable. - let index_info = vtab.best_index(&constraints, &order_by); + // Virtual‑table (non‑TVF) modules can receive constraints via xBestIndex. + // They return information with which to pass to VFilter operation. + // We forward every predicate that touches vtab columns. + // + // vtab.col = literal (always usable) + // vtab.col = outer_table.col (usable, because outer_table is already positioned) + // vtab.col = later_table.col (forwarded with usable = false) + // + // xBestIndex decides which ones it wants by setting argvIndex and whether the + // core layer may omit them (omit = true). + // We then materialise the RHS/LHS into registers before issuing VFilter. + let converted_constraints = predicates + .iter() + .filter(|p| p.should_eval_at_loop(table_index)) + .enumerate() + .filter_map(|(i, p)| { + // Build ConstraintInfo from the predicates + convert_where_to_vtab_constraint(p, table_index, i) + }) + .collect::>(); + // TODO: get proper order_by information to pass to the vtab. + // maybe encode more info on t_ctx? we need: [col_idx, is_descending] + let index_info = vtab.best_index(&converted_constraints, &[]); // Determine the number of VFilter arguments (constraints with an argv_index). let args_needed = index_info @@ -328,13 +330,12 @@ pub fn open_loop( // For each constraint used by best_index, translate the opposite side. for (i, usage) in index_info.constraint_usages.iter().enumerate() { if let Some(argv_index) = usage.argv_index { - if let Some((_, pred)) = converted_constraints.get(i) { - if let ast::Expr::Binary(lhs, _, rhs) = &pred.expr { - let expr = match (&**lhs, &**rhs) { - (ast::Expr::Column { .. }, lit) => lit, - (lit, ast::Expr::Column { .. }) => lit, - _ => continue, - }; + if let Some(cinfo) = converted_constraints.get(i) { + let (pred_idx, is_rhs) = cinfo.unpack_plan_info(); + if let ast::Expr::Binary(lhs, _, rhs) = + &predicates[pred_idx].expr + { + let expr = if is_rhs { rhs } else { lhs }; // argv_index is 1-based; adjust to get the proper register offset. let target_reg = start_reg + (argv_index - 1) as usize; translate_expr( @@ -344,6 +345,9 @@ pub fn open_loop( target_reg, &t_ctx.resolver, )?; + if cinfo.usable && usage.omit { + t_ctx.omit_predicates.push(pred_idx) + } } } } @@ -359,15 +363,6 @@ pub fn open_loop( } else { None }; - // Record (in t_ctx) the indices of predicates that best_index tells us to omit. - // Here we insert directly into t_ctx.omit_predicates - for (j, usage) in index_info.constraint_usages.iter().enumerate() { - if usage.argv_index.is_some() && usage.omit { - if let Some(constraint) = constraints.get(j) { - t_ctx.omit_predicates.push(constraint.pred_idx); - } - } - } ( start_reg, args_needed, diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 3b60775a7..25a4fd7ef 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -97,6 +97,18 @@ fn reverse_operator(op: &Operator) -> Option { } } +fn to_ext_constraint_op(op: &Operator) -> Option { + match op { + Operator::Equals => Some(ConstraintOp::Eq), + Operator::Less => Some(ConstraintOp::Lt), + Operator::LessEquals => Some(ConstraintOp::Le), + Operator::Greater => Some(ConstraintOp::Gt), + Operator::GreaterEquals => Some(ConstraintOp::Ge), + Operator::NotEquals => Some(ConstraintOp::Ne), + _ => None, + } +} + /// This function takes a WhereTerm for a select involving a VTab at index 'table_index'. /// It determines whether or not it involves the given table and whether or not it can /// be converted into a ConstraintInfo which can be passed to the vtab module's xBestIndex @@ -106,7 +118,7 @@ fn reverse_operator(op: &Operator) -> Option { /// 'SELECT key, value FROM vtab WHERE key = 'some_key'; /// we need to send the OwnedValue('some_key') as an argument to VFilter, and possibly omit it from /// the filtration in the vdbe layer. -pub fn try_convert_to_constraint_info( +pub fn convert_where_to_vtab_constraint( term: &WhereTerm, table_index: usize, pred_idx: usize, @@ -114,53 +126,63 @@ pub fn try_convert_to_constraint_info( if term.from_outer_join { return None; } - let Expr::Binary(lhs, op, rhs) = &term.expr else { return None; }; - - let (col_expr, _, op) = match (&**lhs, &**rhs) { - (Expr::Column { table, .. }, rhs) if can_pushdown_predicate(rhs) => { - if table != &table_index { - return None; + let expr_is_ready = |e: &Expr| -> bool { can_pushdown_predicate(e, table_index) }; + let (vcol_idx, op_for_vtab, usable, is_rhs) = match (&**lhs, &**rhs) { + ( + Expr::Column { + table: tbl_l, + column: col_l, + .. + }, + Expr::Column { + table: tbl_r, + column: col_r, + .. + }, + ) => { + // one side must be the virtual table + let vtab_on_l = *tbl_l == table_index; + let vtab_on_r = *tbl_r == table_index; + if vtab_on_l == vtab_on_r { + return None; // either both or none -> not convertible } - (lhs, rhs, op) - } - (lhs, Expr::Column { table, .. }) if can_pushdown_predicate(lhs) => { - if table != &table_index { - return None; + + if vtab_on_l { + // vtab on left side: operator unchanged + let usable = *tbl_r < table_index; // usable if the other table is already positioned + (col_l, op, usable, false) + } else { + // vtab on right side of the expr: reverse operator + let usable = *tbl_l < table_index; + (col_r, &reverse_operator(op).unwrap_or(*op), usable, true) } - // if the column is on the rhs, swap the operands and possibly - // the operator if it's a logical comparison. - (rhs, lhs, &reverse_operator(op).unwrap_or(*op)) } - _ => { - return None; + (Expr::Column { table, column, .. }, other) if *table == table_index => { + ( + column, + op, + expr_is_ready(other), // literal / earlier‑table / deterministic func ? + false, + ) } - }; + (other, Expr::Column { table, column, .. }) if *table == table_index => ( + column, + &reverse_operator(op).unwrap_or(*op), + expr_is_ready(other), + true, + ), - let Expr::Column { column, .. } = **col_expr else { - return None; - }; - - let column_index = column as u32; - let constraint_op = match op { - Operator::Equals => ConstraintOp::Eq, - Operator::Less => ConstraintOp::Lt, - Operator::LessEquals => ConstraintOp::Le, - Operator::Greater => ConstraintOp::Gt, - Operator::GreaterEquals => ConstraintOp::Ge, - Operator::NotEquals => ConstraintOp::Ne, - Operator::Is => ConstraintOp::Is, - Operator::IsNot => ConstraintOp::IsNot, - _ => return None, + _ => return None, // does not involve the virtual table at all }; Some(ConstraintInfo { - column_index, - op: constraint_op, - usable: true, - pred_idx, + column_index: *vcol_idx as u32, + op: to_ext_constraint_op(op_for_vtab)?, + usable, + plan_info: ConstraintInfo::pack_plan_info(pred_idx as u32, is_rhs), }) } /// The loop index where to evaluate the condition. diff --git a/core/util.rs b/core/util.rs index 5d0010423..f518df6f4 100644 --- a/core/util.rs +++ b/core/util.rs @@ -568,12 +568,15 @@ pub fn columns_from_create_table_body(body: &ast::CreateTableBody) -> crate::Res /// This function checks if a given expression is a constant value that can be pushed down to the database engine. /// It is expected to be called with the other half of a binary expression with an Expr::Column -pub fn can_pushdown_predicate(expr: &Expr) -> bool { +pub fn can_pushdown_predicate(expr: &Expr, table_idx: usize) -> bool { match expr { Expr::Literal(_) => true, - Expr::Binary(lhs, _, rhs) => can_pushdown_predicate(lhs) && can_pushdown_predicate(rhs), - Expr::Parenthesized(exprs) => can_pushdown_predicate(exprs.first().unwrap()), - Expr::Unary(_, expr) => can_pushdown_predicate(expr), + Expr::Column { table, .. } => *table <= table_idx, + Expr::Binary(lhs, _, rhs) => { + can_pushdown_predicate(lhs, table_idx) && can_pushdown_predicate(rhs, table_idx) + } + Expr::Parenthesized(exprs) => can_pushdown_predicate(exprs.first().unwrap(), table_idx), + Expr::Unary(_, expr) => can_pushdown_predicate(expr, table_idx), Expr::FunctionCall { args, name, .. } => { let function = crate::function::Func::resolve_function( &name.0, @@ -582,13 +585,15 @@ pub fn can_pushdown_predicate(expr: &Expr) -> bool { // is deterministic matches!(function, Ok(Func::Scalar(_))) } - Expr::Like { lhs, rhs, .. } => can_pushdown_predicate(lhs) && can_pushdown_predicate(rhs), + Expr::Like { lhs, rhs, .. } => { + can_pushdown_predicate(lhs, table_idx) && can_pushdown_predicate(rhs, table_idx) + } Expr::Between { lhs, start, end, .. } => { - can_pushdown_predicate(lhs) - && can_pushdown_predicate(start) - && can_pushdown_predicate(end) + can_pushdown_predicate(lhs, table_idx) + && can_pushdown_predicate(start, table_idx) + && can_pushdown_predicate(end, table_idx) } _ => false, } From d02900294eb47a76bc0ec61f59fbe499cab5218a Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Thu, 17 Apr 2025 13:47:46 -0400 Subject: [PATCH 290/425] Remove 2nd shell in vtab tests, fix expr translation in main loop --- core/translate/main_loop.rs | 5 +++-- testing/cli_tests/extensions.py | 1 - 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index f9ba9ca97..7a2e1b9ef 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -1,4 +1,4 @@ -use limbo_ext::{OrderByInfo, VTabKind}; +use limbo_ext::VTabKind; use limbo_sqlite3_parser::ast; use crate::{ @@ -335,7 +335,8 @@ pub fn open_loop( if let ast::Expr::Binary(lhs, _, rhs) = &predicates[pred_idx].expr { - let expr = if is_rhs { rhs } else { lhs }; + // translate the opposite side of the referenced vtab column + let expr = if is_rhs { lhs } else { rhs }; // argv_index is 1-based; adjust to get the proper register offset. let target_reg = start_reg + (argv_index - 1) as usize; translate_expr( diff --git a/testing/cli_tests/extensions.py b/testing/cli_tests/extensions.py index ab57e4178..d4d55fca1 100755 --- a/testing/cli_tests/extensions.py +++ b/testing/cli_tests/extensions.py @@ -343,7 +343,6 @@ def test_kv(): # first, create a normal table to ensure no issues limbo.execute_dot("CREATE TABLE other (a,b,c);") limbo.execute_dot("INSERT INTO other values (23,32,23);") - limbo = TestLimboShell() limbo.run_test_fn( "create virtual table t using kv_store;", lambda res: "Module kv_store not found" in res, From 32d59b8c78c55c9681bc850a062dae87b29dd90f Mon Sep 17 00:00:00 2001 From: "Levy A." Date: Thu, 17 Apr 2025 20:08:05 -0300 Subject: [PATCH 291/425] refactor+fix: using a more robust pattern matching approach --- core/util.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/core/util.rs b/core/util.rs index b3ce8ecd0..295989cee 100644 --- a/core/util.rs +++ b/core/util.rs @@ -878,11 +878,7 @@ fn parse_numeric_str(text: &str) -> Result<(OwnedValueType, &str), ()> { let text = text.trim(); let bytes = text.as_bytes(); - if bytes.is_empty() - || bytes[0] == b'e' - || bytes[0] == b'E' - || (bytes[0] == b'.' && (bytes[1] == b'e' || bytes[1] == b'E')) - { + if matches!(bytes, [b'e', ..] | [b'E', ..] | [b'.', b'e' | b'E', ..]) { return Err(()); } From 5fd2ed0bae42036128ee34e348ce3c7f2af810e9 Mon Sep 17 00:00:00 2001 From: "Levy A." Date: Thu, 17 Apr 2025 20:20:57 -0300 Subject: [PATCH 292/425] fix: handle empty case --- core/util.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/core/util.rs b/core/util.rs index 295989cee..82c7cb3c2 100644 --- a/core/util.rs +++ b/core/util.rs @@ -878,7 +878,10 @@ fn parse_numeric_str(text: &str) -> Result<(OwnedValueType, &str), ()> { let text = text.trim(); let bytes = text.as_bytes(); - if matches!(bytes, [b'e', ..] | [b'E', ..] | [b'.', b'e' | b'E', ..]) { + if matches!( + bytes, + [] | [b'e', ..] | [b'E', ..] | [b'.', b'e' | b'E', ..] + ) { return Err(()); } From 4ab4a3f6c3aadee480c98702417bdd1cbe1616d9 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Tue, 15 Apr 2025 11:27:11 +0300 Subject: [PATCH 293/425] TableReference: add index_is_covering() and utilizes_covering_index() --- core/translate/emitter.rs | 2 +- core/translate/plan.rs | 40 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 0bc54bb9a..5b12e4375 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -95,7 +95,7 @@ pub struct TranslateCtx<'a> { /// Used to distinguish database operations #[allow(clippy::upper_case_acronyms, dead_code)] -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum OperationMode { SELECT, INSERT, diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 25a4fd7ef..3d43da75d 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -491,6 +491,46 @@ impl TableReference { pub fn mark_column_used(&mut self, index: usize) { self.col_used_mask.set(index); } + /// Returns true if a given index is a covering index for this [TableReference]. + pub fn index_is_covering(&self, index: &Index) -> bool { + let Table::BTree(btree) = &self.table else { + return false; + }; + if self.col_used_mask.is_empty() { + return false; + } + let mut index_cols_mask = ColumnUsedMask::new(); + for col in index.columns.iter() { + index_cols_mask.set(col.pos_in_table); + } + + // If a table has a rowid (i.e. is not a WITHOUT ROWID table), the index is guaranteed to contain the rowid as well. + if btree.has_rowid { + if let Some(pos_of_rowid_alias_col) = btree.get_rowid_alias_column().map(|(pos, _)| pos) + { + let mut empty_mask = ColumnUsedMask::new(); + empty_mask.set(pos_of_rowid_alias_col); + if self.col_used_mask == empty_mask { + // However if the index would be ONLY used for the rowid, then let's not bother using it to cover the query. + // Example: if the query is SELECT id FROM t, and id is a rowid alias, then let's rather just scan the table + // instead of an index. + return false; + } + index_cols_mask.set(pos_of_rowid_alias_col); + } + } + + index_cols_mask.contains_all_set_bits_of(&self.col_used_mask) + } + + /// Returns true if the index selected for use with this [TableReference] is a covering index, + /// meaning that it contains all the columns that are referenced in the query. + pub fn utilizes_covering_index(&self) -> bool { + let Some(index) = self.op.index() else { + return false; + }; + self.index_is_covering(index.as_ref()) + } } /// A definition of a rowid/index search. From d5a6553e6388a2f2d9b15faff263341840f2e67b Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Tue, 15 Apr 2025 11:27:34 +0300 Subject: [PATCH 294/425] TableReference: add open_cursors() --- core/translate/plan.rs | 52 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 3d43da75d..6dd0738b6 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -11,8 +11,11 @@ use std::{ use crate::{ function::AggFunc, schema::{BTreeTable, Column, Index, Table}, - vdbe::BranchOffset, - VirtualTable, + vdbe::{ + builder::{CursorType, ProgramBuilder}, + BranchOffset, CursorID, + }, + Result, VirtualTable, }; use crate::{ schema::{PseudoTable, Type}, @@ -20,6 +23,8 @@ use crate::{ util::can_pushdown_predicate, }; +use super::emitter::OperationMode; + #[derive(Debug, Clone)] pub struct ResultSetColumn { pub expr: ast::Expr, @@ -491,6 +496,49 @@ impl TableReference { pub fn mark_column_used(&mut self, index: usize) { self.col_used_mask.set(index); } + + /// Open the necessary cursors for this table reference. + /// Generally a table cursor is always opened unless a SELECT query can use a covering index. + /// An index cursor is opened if an index is used in any way for reading data from the table. + pub fn open_cursors( + &self, + program: &mut ProgramBuilder, + mode: OperationMode, + ) -> Result<(Option, Option)> { + let index = self.op.index(); + match &self.table { + Table::BTree(btree) => { + let use_covering_index = self.utilizes_covering_index(); + let table_cursor_id = if use_covering_index && mode == OperationMode::SELECT { + None + } else { + Some(program.alloc_cursor_id( + Some(self.identifier.clone()), + CursorType::BTreeTable(btree.clone()), + )) + }; + let index_cursor_id = if let Some(index) = index { + Some(program.alloc_cursor_id( + Some(index.name.clone()), + CursorType::BTreeIndex(index.clone()), + )) + } else { + None + }; + Ok((table_cursor_id, index_cursor_id)) + } + Table::Virtual(virtual_table) => { + let table_cursor_id = Some(program.alloc_cursor_id( + Some(self.identifier.clone()), + CursorType::VirtualTable(virtual_table.clone()), + )); + let index_cursor_id = None; + Ok((table_cursor_id, index_cursor_id)) + } + Table::Pseudo(_) => Ok((None, None)), + } + } + /// Returns true if a given index is a covering index for this [TableReference]. pub fn index_is_covering(&self, index: &Index) -> bool { let Table::BTree(btree) = &self.table else { From 40d880c3b089117ba82a8ee6391c594902f2145d Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Tue, 15 Apr 2025 13:17:14 +0300 Subject: [PATCH 295/425] TableReference: add resolve_cursors() method --- core/translate/plan.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 6dd0738b6..07a8de392 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -539,6 +539,17 @@ impl TableReference { } } + /// Resolve the already opened cursors for this table reference. + pub fn resolve_cursors( + &self, + program: &mut ProgramBuilder, + ) -> Result<(Option, Option)> { + let index = self.op.index(); + let table_cursor_id = program.resolve_cursor_id_safe(&self.identifier); + let index_cursor_id = index.map(|index| program.resolve_cursor_id(&index.name)); + Ok((table_cursor_id, index_cursor_id)) + } + /// Returns true if a given index is a covering index for this [TableReference]. pub fn index_is_covering(&self, index: &Index) -> bool { let Table::BTree(btree) = &self.table else { From 5b71d3a3dad4a671f8521ef477dcb533ad63e80b Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Tue, 15 Apr 2025 13:20:07 +0300 Subject: [PATCH 296/425] eliminate_unnecessary_orderby: add edge case handling --- core/translate/optimizer.rs | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 80875da91..4cb55661e 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -167,32 +167,34 @@ fn eliminate_orderby_like_groupby(plan: &mut SelectPlan) -> Result<()> { Ok(()) } +/// Eliminate unnecessary ORDER BY clauses. +/// Returns true if the ORDER BY clause was eliminated. fn eliminate_unnecessary_orderby( table_references: &mut [TableReference], available_indexes: &HashMap>>, order_by: &mut Option>, group_by: &Option, -) -> Result<()> { +) -> Result { let Some(order) = order_by else { - return Ok(()); + return Ok(false); }; let Some(first_table_reference) = table_references.first_mut() else { - return Ok(()); + return Ok(false); }; let Some(btree_table) = first_table_reference.btree() else { - return Ok(()); + return Ok(false); }; // If GROUP BY clause is present, we can't rely on already ordered columns because GROUP BY reorders the data // This early return prevents the elimination of ORDER BY when GROUP BY exists, as sorting must be applied after grouping // And if ORDER BY clause duplicates GROUP BY we handle it later in fn eliminate_orderby_like_groupby if group_by.is_some() { - return Ok(()); + return Ok(false); } let Operation::Scan { index, iter_dir, .. } = &mut first_table_reference.op else { - return Ok(()); + return Ok(false); }; assert!( @@ -207,7 +209,7 @@ fn eliminate_unnecessary_orderby( Direction::Descending => IterationDirection::Backwards, }; *order_by = None; - return Ok(()); + return Ok(true); } // Find the best matching index for the ORDER BY columns @@ -235,7 +237,7 @@ fn eliminate_unnecessary_orderby( } let Some(matching_index) = best_index.0 else { - return Ok(()); + return Ok(false); }; let match_count = best_index.1; @@ -280,7 +282,7 @@ fn eliminate_unnecessary_orderby( } } - Ok(()) + Ok(order_by.is_none()) } /** @@ -300,7 +302,8 @@ fn use_indexes( group_by: &Option, ) -> Result<()> { // Try to use indexes for eliminating ORDER BY clauses - eliminate_unnecessary_orderby(table_references, available_indexes, order_by, group_by)?; + let did_eliminate_orderby = + eliminate_unnecessary_orderby(table_references, available_indexes, order_by, group_by)?; // Try to use indexes for WHERE conditions for (table_index, table_reference) in table_references.iter_mut().enumerate() { @@ -346,6 +349,12 @@ fn use_indexes( i += 1; } } + if did_eliminate_orderby && table_index == 0 { + // If we already made the decision to remove ORDER BY based on the Rowid (e.g. ORDER BY id), then skip this. + // It would be possible to analyze the index and see if the covering index would retain the ordering guarantee, + // but we just don't do that yet. + continue; + } if let Some(indexes) = available_indexes.get(table_name) { if let Some(search) = try_extract_index_search_from_where_clause( where_clause, From 6c73db6fd34350f01c16ed18f226e1bbda919924 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Tue, 15 Apr 2025 13:34:52 +0300 Subject: [PATCH 297/425] feat: use covering indexes whenever possible --- core/translate/expr.rs | 112 +++++++++++++------ core/translate/main_loop.rs | 213 ++++++++++++++++-------------------- core/translate/optimizer.rs | 16 +++ 3 files changed, 193 insertions(+), 148 deletions(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 6a3b1fc8b..6c9072ab9 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1770,41 +1770,79 @@ pub fn translate_expr( is_rowid_alias, } => { let table_reference = referenced_tables.as_ref().unwrap().get(*table).unwrap(); + let index = table_reference.op.index(); + let use_covering_index = table_reference.utilizes_covering_index(); match table_reference.op { // If we are reading a column from a table, we find the cursor that corresponds to // the table and read the column from the cursor. - Operation::Scan { .. } | Operation::Search(_) => match &table_reference.table { - Table::BTree(_) => { - let cursor_id = program.resolve_cursor_id(&table_reference.identifier); - if *is_rowid_alias { - program.emit_insn(Insn::RowId { - cursor_id, - dest: target_register, - }); - } else { - program.emit_insn(Insn::Column { + // If we have a covering index, we don't have an open table cursor so we read from the index cursor. + Operation::Scan { .. } | Operation::Search(_) => { + match &table_reference.table { + Table::BTree(_) => { + let table_cursor_id = if use_covering_index { + None + } else { + Some(program.resolve_cursor_id(&table_reference.identifier)) + }; + let index_cursor_id = if let Some(index) = index { + Some(program.resolve_cursor_id(&index.name)) + } else { + None + }; + if *is_rowid_alias { + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn(Insn::IdxRowId { + cursor_id: index_cursor_id, + dest: target_register, + }); + } else if let Some(table_cursor_id) = table_cursor_id { + program.emit_insn(Insn::RowId { + cursor_id: table_cursor_id, + dest: target_register, + }); + } else { + unreachable!("Either index or table cursor must be opened"); + } + } else { + let read_cursor = if use_covering_index { + index_cursor_id + .expect("index cursor should be opened when use_covering_index=true") + } else { + table_cursor_id + .expect("table cursor should be opened when use_covering_index=false") + }; + let column = if use_covering_index { + let index = index.expect("index cursor should be opened when use_covering_index=true"); + index.column_table_pos_to_index_pos(*column).unwrap_or_else(|| { + panic!("covering index {} does not contain column number {} of table {}", index.name, column, table_reference.identifier) + }) + } else { + *column + }; + program.emit_insn(Insn::Column { + cursor_id: read_cursor, + column, + dest: target_register, + }); + } + let Some(column) = table_reference.table.get_column_at(*column) else { + crate::bail_parse_error!("column index out of bounds"); + }; + maybe_apply_affinity(column.ty, target_register, program); + Ok(target_register) + } + Table::Virtual(_) => { + let cursor_id = program.resolve_cursor_id(&table_reference.identifier); + program.emit_insn(Insn::VColumn { cursor_id, column: *column, dest: target_register, }); + Ok(target_register) } - let Some(column) = table_reference.table.get_column_at(*column) else { - crate::bail_parse_error!("column index out of bounds"); - }; - maybe_apply_affinity(column.ty, target_register, program); - Ok(target_register) + _ => unreachable!(), } - Table::Virtual(_) => { - let cursor_id = program.resolve_cursor_id(&table_reference.identifier); - program.emit_insn(Insn::VColumn { - cursor_id, - column: *column, - dest: target_register, - }); - Ok(target_register) - } - _ => unreachable!(), - }, + } // If we are reading a column from a subquery, we instead copy the column from the // subquery's result registers. Operation::Subquery { @@ -1822,11 +1860,23 @@ pub fn translate_expr( } ast::Expr::RowId { database: _, table } => { let table_reference = referenced_tables.as_ref().unwrap().get(*table).unwrap(); - let cursor_id = program.resolve_cursor_id(&table_reference.identifier); - program.emit_insn(Insn::RowId { - cursor_id, - dest: target_register, - }); + let index = table_reference.op.index(); + let use_covering_index = table_reference.utilizes_covering_index(); + if use_covering_index { + let index = + index.expect("index cursor should be opened when use_covering_index=true"); + let cursor_id = program.resolve_cursor_id(&index.name); + program.emit_insn(Insn::IdxRowId { + cursor_id, + dest: target_register, + }); + } else { + let cursor_id = program.resolve_cursor_id(&table_reference.identifier); + program.emit_insn(Insn::RowId { + cursor_id, + dest: target_register, + }); + } Ok(target_register) } ast::Expr::InList { .. } => todo!(), diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 7a2e1b9ef..74da26438 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -6,7 +6,7 @@ use crate::{ translate::result_row::emit_select_result, types::SeekOp, vdbe::{ - builder::{CursorType, ProgramBuilder}, + builder::ProgramBuilder, insn::{CmpInsFlags, Insn}, BranchOffset, }, @@ -81,77 +81,67 @@ pub fn init_loop( t_ctx.meta_left_joins[table_index] = Some(lj_metadata); } } + let (table_cursor_id, index_cursor_id) = table.open_cursors(program, mode)?; match &table.op { - Operation::Scan { index, .. } => { - let cursor_id = program.alloc_cursor_id( - Some(table.identifier.clone()), - match &table.table { - Table::BTree(_) => CursorType::BTreeTable(table.btree().unwrap().clone()), - Table::Virtual(_) => { - CursorType::VirtualTable(table.virtual_table().unwrap().clone()) - } - other => panic!("Invalid table reference type in Scan: {:?}", other), - }, - ); - let index_cursor_id = index.as_ref().map(|i| { - program.alloc_cursor_id(Some(i.name.clone()), CursorType::BTreeIndex(i.clone())) - }); - match (mode, &table.table) { - (OperationMode::SELECT, Table::BTree(btree)) => { - let root_page = btree.root_page; + Operation::Scan { index, .. } => match (mode, &table.table) { + (OperationMode::SELECT, Table::BTree(btree)) => { + let root_page = btree.root_page; + if let Some(cursor_id) = table_cursor_id { program.emit_insn(Insn::OpenRead { cursor_id, root_page, }); - if let Some(index_cursor_id) = index_cursor_id { - program.emit_insn(Insn::OpenRead { - cursor_id: index_cursor_id, - root_page: index.as_ref().unwrap().root_page, - }); - } } - (OperationMode::DELETE, Table::BTree(btree)) => { - let root_page = btree.root_page; - program.emit_insn(Insn::OpenWrite { - cursor_id, - root_page: root_page.into(), + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn(Insn::OpenRead { + cursor_id: index_cursor_id, + root_page: index.as_ref().unwrap().root_page, }); } - (OperationMode::UPDATE, Table::BTree(btree)) => { - let root_page = btree.root_page; - program.emit_insn(Insn::OpenWrite { - cursor_id, - root_page: root_page.into(), - }); - if let Some(index_cursor_id) = index_cursor_id { - program.emit_insn(Insn::OpenWrite { - cursor_id: index_cursor_id, - root_page: index.as_ref().unwrap().root_page.into(), - }); - } - } - (_, Table::Virtual(_)) => { - program.emit_insn(Insn::VOpen { cursor_id }); - } - _ => { - unimplemented!() - } } - } - Operation::Search(search) => { - let table_cursor_id = program.alloc_cursor_id( - Some(table.identifier.clone()), - CursorType::BTreeTable(table.btree().unwrap().clone()), - ); - - match mode { - OperationMode::SELECT => { - program.emit_insn(Insn::OpenRead { - cursor_id: table_cursor_id, - root_page: table.table.get_root_page(), + (OperationMode::DELETE, Table::BTree(btree)) => { + let root_page = btree.root_page; + program.emit_insn(Insn::OpenWrite { + cursor_id: table_cursor_id + .expect("table cursor is always opened in OperationMode::DELETE"), + root_page: root_page.into(), + }); + } + (OperationMode::UPDATE, Table::BTree(btree)) => { + let root_page = btree.root_page; + program.emit_insn(Insn::OpenWrite { + cursor_id: table_cursor_id + .expect("table cursor is always opened in OperationMode::UPDATE"), + root_page: root_page.into(), + }); + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn(Insn::OpenWrite { + cursor_id: index_cursor_id, + root_page: index.as_ref().unwrap().root_page.into(), }); } + } + (_, Table::Virtual(_)) => { + if let Some(cursor_id) = table_cursor_id { + program.emit_insn(Insn::VOpen { cursor_id }); + } + } + _ => { + unimplemented!() + } + }, + Operation::Search(search) => { + match mode { + OperationMode::SELECT => { + if let Some(table_cursor_id) = table_cursor_id { + program.emit_insn(Insn::OpenRead { + cursor_id: table_cursor_id, + root_page: table.table.get_root_page(), + }); + } + } OperationMode::DELETE | OperationMode::UPDATE => { + let table_cursor_id = table_cursor_id.expect("table cursor is always opened in OperationMode::DELETE or OperationMode::UPDATE"); program.emit_insn(Insn::OpenWrite { cursor_id: table_cursor_id, root_page: table.table.get_root_page().into(), @@ -166,21 +156,18 @@ pub fn init_loop( index: Some(index), .. } = search { - let index_cursor_id = program.alloc_cursor_id( - Some(index.name.clone()), - CursorType::BTreeIndex(index.clone()), - ); - match mode { OperationMode::SELECT => { program.emit_insn(Insn::OpenRead { - cursor_id: index_cursor_id, + cursor_id: index_cursor_id + .expect("index cursor is always opened in Seek with index"), root_page: index.root_page, }); } OperationMode::UPDATE | OperationMode::DELETE => { program.emit_insn(Insn::OpenWrite { - cursor_id: index_cursor_id, + cursor_id: index_cursor_id + .expect("index cursor is always opened in Seek with index"), root_page: index.root_page.into(), }); } @@ -229,6 +216,8 @@ pub fn open_loop( } } + let (table_cursor_id, index_cursor_id) = table.resolve_cursors(program)?; + match &table.op { Operation::Subquery { plan, .. } => { let (yield_reg, coroutine_implementation_start) = match &plan.query_type { @@ -274,10 +263,10 @@ pub fn open_loop( program.resolve_label(jump_target_when_true, program.offset()); } } - Operation::Scan { iter_dir, index } => { - let cursor_id = program.resolve_cursor_id(&table.identifier); - let index_cursor_id = index.as_ref().map(|i| program.resolve_cursor_id(&i.name)); - let iteration_cursor_id = index_cursor_id.unwrap_or(cursor_id); + Operation::Scan { iter_dir, .. } => { + let iteration_cursor_id = index_cursor_id.unwrap_or_else(|| { + table_cursor_id.expect("Either index or table cursor must be opened") + }); if !matches!(&table.table, Table::Virtual(_)) { if *iter_dir == IterationDirection::Backwards { program.emit_insn(Insn::Last { @@ -389,7 +378,8 @@ pub fn open_loop( // Emit VFilter with the computed arguments. program.emit_insn(Insn::VFilter { - cursor_id, + cursor_id: table_cursor_id + .expect("Virtual tables do not support covering indexes"), arg_count: count, args_reg: start_reg, idx_str: maybe_idx_str, @@ -399,11 +389,13 @@ pub fn open_loop( } program.resolve_label(loop_start, program.offset()); - if let Some(index_cursor_id) = index_cursor_id { - program.emit_insn(Insn::DeferredSeek { - index_cursor_id, - table_cursor_id: cursor_id, - }); + if let Some(table_cursor_id) = table_cursor_id { + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn(Insn::DeferredSeek { + index_cursor_id, + table_cursor_id, + }); + } } for (_, cond) in predicates.iter().enumerate().filter(|(i, cond)| { @@ -426,7 +418,6 @@ pub fn open_loop( } } Operation::Search(search) => { - let table_cursor_id = program.resolve_cursor_id(&table.identifier); // Open the loop for the index search. // Rowid equality point lookups are handled with a SeekRowid instruction which does not loop, since it is a single row lookup. if let Search::RowidEq { cmp_expr } = search { @@ -439,22 +430,17 @@ pub fn open_loop( &t_ctx.resolver, )?; program.emit_insn(Insn::SeekRowid { - cursor_id: table_cursor_id, + cursor_id: table_cursor_id + .expect("Search::RowidEq requires a table cursor"), src_reg, target_pc: next, }); } else { // Otherwise, it's an index/rowid scan, i.e. first a seek is performed and then a scan until the comparison expression is not satisfied anymore. - let index_cursor_id = if let Search::Seek { - index: Some(index), .. - } = search - { - Some(program.resolve_cursor_id(&index.name)) - } else { - None - }; let is_index = index_cursor_id.is_some(); - let seek_cursor_id = index_cursor_id.unwrap_or(table_cursor_id); + let seek_cursor_id = index_cursor_id.unwrap_or_else(|| { + table_cursor_id.expect("Either index or table cursor must be opened") + }); let Search::Seek { seek_def, .. } = search else { unreachable!("Rowid equality point lookup should have been handled above"); }; @@ -483,11 +469,13 @@ pub fn open_loop( )?; if let Some(index_cursor_id) = index_cursor_id { - // Don't do a btree table seek until it's actually necessary to read from the table. - program.emit_insn(Insn::DeferredSeek { - index_cursor_id, - table_cursor_id, - }); + if let Some(table_cursor_id) = table_cursor_id { + // Don't do a btree table seek until it's actually necessary to read from the table. + program.emit_insn(Insn::DeferredSeek { + index_cursor_id, + table_cursor_id, + }); + } } } @@ -785,6 +773,8 @@ pub fn close_loop( .get(table_index) .expect("source has no loop labels"); + let (table_cursor_id, index_cursor_id) = table.resolve_cursors(program)?; + match &table.op { Operation::Subquery { .. } => { program.resolve_label(loop_labels.next, program.offset()); @@ -795,14 +785,11 @@ pub fn close_loop( target_pc: loop_labels.loop_start, }); } - Operation::Scan { - index, iter_dir, .. - } => { + Operation::Scan { iter_dir, .. } => { program.resolve_label(loop_labels.next, program.offset()); - - let cursor_id = program.resolve_cursor_id(&table.identifier); - let index_cursor_id = index.as_ref().map(|i| program.resolve_cursor_id(&i.name)); - let iteration_cursor_id = index_cursor_id.unwrap_or(cursor_id); + let iteration_cursor_id = index_cursor_id.unwrap_or_else(|| { + table_cursor_id.expect("Either index or table cursor must be opened") + }); match &table.table { Table::BTree(_) => { if *iter_dir == IterationDirection::Backwards { @@ -819,7 +806,8 @@ pub fn close_loop( } Table::Virtual(_) => { program.emit_insn(Insn::VNext { - cursor_id, + cursor_id: table_cursor_id + .expect("Virtual tables do not support covering indexes"), pc_if_next: loop_labels.loop_start, }); } @@ -828,33 +816,24 @@ pub fn close_loop( } Operation::Search(search) => { program.resolve_label(loop_labels.next, program.offset()); + let iteration_cursor_id = index_cursor_id.unwrap_or_else(|| { + table_cursor_id.expect("Either index or table cursor must be opened") + }); // Rowid equality point lookups are handled with a SeekRowid instruction which does not loop, so there is no need to emit a Next instruction. if !matches!(search, Search::RowidEq { .. }) { - let (cursor_id, iter_dir) = match search { - Search::Seek { - index: Some(index), - seek_def, - .. - } => (program.resolve_cursor_id(&index.name), seek_def.iter_dir), - Search::Seek { - index: None, - seek_def, - .. - } => ( - program.resolve_cursor_id(&table.identifier), - seek_def.iter_dir, - ), + let iter_dir = match search { + Search::Seek { seek_def, .. } => seek_def.iter_dir, Search::RowidEq { .. } => unreachable!(), }; if iter_dir == IterationDirection::Backwards { program.emit_insn(Insn::Prev { - cursor_id, + cursor_id: iteration_cursor_id, pc_if_prev: loop_labels.loop_start, }); } else { program.emit_insn(Insn::Next { - cursor_id, + cursor_id: iteration_cursor_id, pc_if_next: loop_labels.loop_start, }); } diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 4cb55661e..fe764ee50 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -368,6 +368,22 @@ fn use_indexes( } } } + + // Finally, if there's no other reason to use an index, if an index covers the columns used in the query, let's use it. + if let Some(indexes) = available_indexes.get(table_reference.table.get_name()) { + for index_candidate in indexes.iter() { + let is_covering = table_reference.index_is_covering(index_candidate); + if let Operation::Scan { index, .. } = &mut table_reference.op { + if index.is_some() { + continue; + } + if is_covering { + *index = Some(index_candidate.clone()); + break; + } + } + } + } } Ok(()) From 8477ff0d3d66fdaf1eac8f7b3d8d02856bb8f851 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Wed, 16 Apr 2025 16:48:53 +0300 Subject: [PATCH 298/425] tests/fuzz: amend compound index key fuzz to include nonindexed columns some of the time --- tests/integration/fuzz/mod.rs | 113 ++++++++++++++++++++++++++-------- 1 file changed, 89 insertions(+), 24 deletions(-) diff --git a/tests/integration/fuzz/mod.rs b/tests/integration/fuzz/mod.rs index f76a005ba..929b33d8d 100644 --- a/tests/integration/fuzz/mod.rs +++ b/tests/integration/fuzz/mod.rs @@ -4,7 +4,7 @@ pub mod grammar_generator; mod tests { use std::{collections::HashSet, rc::Rc}; - use rand::{Rng, SeedableRng}; + use rand::{seq::IndexedRandom, Rng, SeedableRng}; use rand_chacha::ChaCha8Rng; use rusqlite::params; @@ -213,34 +213,47 @@ mod tests { }; // Create all different 3-column primary key permutations let dbs = [ - TempDatabase::new_with_rusqlite("CREATE TABLE t(x, y, z, PRIMARY KEY (x, y, z))"), - TempDatabase::new_with_rusqlite("CREATE TABLE t(x, y, z, PRIMARY KEY (x desc, y, z))"), - TempDatabase::new_with_rusqlite("CREATE TABLE t(x, y, z, PRIMARY KEY (x, y desc, z))"), - TempDatabase::new_with_rusqlite("CREATE TABLE t(x, y, z, PRIMARY KEY (x, y, z desc))"), TempDatabase::new_with_rusqlite( - "CREATE TABLE t(x, y, z, PRIMARY KEY (x desc, y desc, z))", + "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x, y, z))", ), TempDatabase::new_with_rusqlite( - "CREATE TABLE t(x, y, z, PRIMARY KEY (x, y desc, z desc))", + "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x desc, y, z))", ), TempDatabase::new_with_rusqlite( - "CREATE TABLE t(x, y, z, PRIMARY KEY (x desc, y, z desc))", + "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x, y desc, z))", ), TempDatabase::new_with_rusqlite( - "CREATE TABLE t(x, y, z, PRIMARY KEY (x desc, y desc, z desc))", + "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x, y, z desc))", + ), + TempDatabase::new_with_rusqlite( + "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x desc, y desc, z))", + ), + TempDatabase::new_with_rusqlite( + "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x, y desc, z desc))", + ), + TempDatabase::new_with_rusqlite( + "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x desc, y, z desc))", + ), + TempDatabase::new_with_rusqlite( + "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x desc, y desc, z desc))", ), ]; let mut pk_tuples = HashSet::new(); while pk_tuples.len() < 100000 { - pk_tuples.insert((rng.random_range(0..3000), rng.random_range(0..3000))); + pk_tuples.insert(( + rng.random_range(0..3000), + rng.random_range(0..3000), + rng.random_range(0..3000), + )); } let mut tuples = Vec::new(); for pk_tuple in pk_tuples { tuples.push(format!( - "({}, {}, {})", + "({}, {}, {}, {})", pk_tuple.0, pk_tuple.1, - rng.random_range(0..2000) + pk_tuple.2, + rng.random_range(0..3000) )); } let insert = format!("INSERT INTO t VALUES {}", tuples.join(", ")); @@ -298,6 +311,21 @@ mod tests { ITERATIONS ); } + // let's choose random columns from the table + let col_choices = ["x", "y", "z", "nonindexed_col"]; + let col_choices_weights = [10.0, 10.0, 10.0, 3.0]; + let num_cols_in_select = rng.random_range(1..=4); + let select_cols = col_choices + .choose_multiple_weighted(&mut rng, num_cols_in_select, |s| { + let idx = col_choices.iter().position(|c| c == s).unwrap(); + col_choices_weights[idx] + }) + .unwrap() + .collect::>() + .iter() + .map(|x| x.to_string()) + .collect::>(); + let (comp1, comp2, comp3) = all_comps[rng.random_range(0..all_comps.len())]; // Similarly as for the constraints, generate order by permutations so that the only columns involved in the index seek are potentially part of the ORDER BY. let (order_by1, order_by2, order_by3) = { @@ -318,7 +346,7 @@ mod tests { } }; - // Generate random values for the WHERE clause constraints + // Generate random values for the WHERE clause constraints. Only involve primary key columns. let (col_val_first, col_val_second, col_val_third) = { if comp1.is_some() && comp2.is_some() && comp3.is_some() { ( @@ -372,8 +400,11 @@ mod tests { // Generate final query string let query = format!( - "SELECT * FROM t {} {} LIMIT {}", - where_clause, order_by, limit + "SELECT {} FROM t {} {} LIMIT {}", + select_cols.join(", "), + where_clause, + order_by, + limit ); log::debug!("query: {}", query); @@ -398,19 +429,53 @@ mod tests { } }); - if order_by_only_equalities { - let query_no_limit = - format!("SELECT * FROM t {} {} {}", where_clause, order_by, ""); - let limbo_no_limit = - limbo_exec_rows(&dbs[i], &limbo_conns[i], &query_no_limit); - let sqlite_no_limit = sqlite_exec_rows(&sqlite_conn, &query_no_limit); - let limbo_rev = limbo_no_limit.iter().cloned().rev().collect::>(); - if limbo_rev == sqlite_no_limit { + let query_no_limit = + format!("SELECT * FROM t {} {} {}", where_clause, order_by, ""); + let limbo_no_limit = limbo_exec_rows(&dbs[i], &limbo_conns[i], &query_no_limit); + let sqlite_no_limit = sqlite_exec_rows(&sqlite_conn, &query_no_limit); + let limbo_rev = limbo_no_limit.iter().cloned().rev().collect::>(); + if limbo_rev == sqlite_no_limit && order_by_only_equalities { + continue; + } + + // finally, if the order by columns specified contain duplicates, sqlite might've returned the rows in an arbitrary different order. + // e.g. SELECT x,y,z FROM t ORDER BY x,y -- if there are duplicates on (x,y), the ordering returned might be different for limbo and sqlite. + // let's check this case and forgive ourselves if the ordering is different for this reason (but no other reason!) + let order_by_cols = select_cols + .iter() + .enumerate() + .filter(|(i, _)| { + order_by_components + .iter() + .any(|o| o.starts_with(col_choices[*i])) + }) + .map(|(i, _)| i) + .collect::>(); + let duplicate_on_order_by_exists = { + let mut exists = false; + 'outer: for (i, row) in limbo_no_limit.iter().enumerate() { + for (j, other_row) in limbo_no_limit.iter().enumerate() { + if i != j + && order_by_cols.iter().all(|&col| row[col] == other_row[col]) + { + exists = true; + break 'outer; + } + } + } + exists + }; + if duplicate_on_order_by_exists { + let len_equal = limbo_no_limit.len() == sqlite_no_limit.len(); + let all_contained = + len_equal && limbo_no_limit.iter().all(|x| sqlite_no_limit.contains(x)); + if all_contained { continue; } } + panic!( - "limbo: {:?}, sqlite: {:?}, seed: {}, query: {}", + "DIFFERENT RESULTS! limbo: {:?}, sqlite: {:?}, seed: {}, query: {}", limbo, sqlite, seed, query ); } From 3f9bdbdf14227d0285d6bc36e37afc48bdcc1450 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Wed, 16 Apr 2025 20:22:43 +0300 Subject: [PATCH 299/425] btree: use binary search in move_to() for table btrees --- core/storage/btree.rs | 106 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 5fd9ec2ec..0777bc951 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1178,6 +1178,108 @@ impl BTreeCursor { } } + /// Specialized version of move_to() for table btrees that uses binary search instead + /// of iterating cells in order. + /// The only reason this is specialized for rowids is that Jussi didn't have the energy to implement + /// it for index btrees yet lol. + fn tablebtree_move_to_binsearch( + &mut self, + rowid: u64, + seek_op: SeekOp, + iter_dir: IterationDirection, + ) -> Result> { + 'outer: loop { + let page = self.stack.top(); + return_if_locked!(page); + let contents = page.get().contents.as_ref().unwrap(); + if contents.is_leaf() { + return Ok(CursorResult::Ok(())); + } + + let cell_count = contents.cell_count(); + let mut min: isize = 0; + let mut max: isize = cell_count as isize - 1; + let mut leftmost_matching_cell = None; + loop { + if min > max { + if let Some(leftmost_matching_cell) = leftmost_matching_cell { + self.stack.set_cell_index(leftmost_matching_cell as i32); + let matching_cell = contents.cell_get( + leftmost_matching_cell, + payload_overflow_threshold_max( + contents.page_type(), + self.usable_space() as u16, + ), + payload_overflow_threshold_min( + contents.page_type(), + self.usable_space() as u16, + ), + self.usable_space(), + )?; + self.stack.next_cell_in_direction(iter_dir); + let BTreeCell::TableInteriorCell(TableInteriorCell { + _left_child_page, + .. + }) = matching_cell + else { + unreachable!("unexpected cell type: {:?}", matching_cell); + }; + let mem_page = self.pager.read_page(_left_child_page as usize)?; + self.stack.push(mem_page); + continue 'outer; + } + self.stack.set_cell_index(contents.cell_count() as i32 + 1); + match contents.rightmost_pointer() { + Some(right_most_pointer) => { + let mem_page = self.pager.read_page(right_most_pointer as usize)?; + self.stack.push(mem_page); + continue 'outer; + } + None => { + unreachable!("we shall not go back up! The only way is down the slope"); + } + } + } + let cur_cell_idx = (min + max) / 2; + self.stack.set_cell_index(cur_cell_idx as i32); + let cur_cell = contents.cell_get( + cur_cell_idx as usize, + payload_overflow_threshold_max( + contents.page_type(), + self.usable_space() as u16, + ), + payload_overflow_threshold_min( + contents.page_type(), + self.usable_space() as u16, + ), + self.usable_space(), + )?; + + match &cur_cell { + BTreeCell::TableInteriorCell(TableInteriorCell { + _left_child_page, + _rowid: cell_rowid, + }) => { + let is_on_left = match seek_op { + SeekOp::GT => *cell_rowid > rowid, + SeekOp::GE => *cell_rowid >= rowid, + SeekOp::LE => *cell_rowid >= rowid, + SeekOp::LT => *cell_rowid + 1 >= rowid, + SeekOp::EQ => *cell_rowid >= rowid, + }; + if is_on_left { + leftmost_matching_cell = Some(cur_cell_idx as usize); + max = cur_cell_idx - 1; + } else { + min = cur_cell_idx + 1; + } + } + _ => unreachable!("unexpected cell type: {:?}", cur_cell), + } + } + } + } + pub fn move_to(&mut self, key: SeekKey<'_>, cmp: SeekOp) -> Result> { assert!(self.mv_cursor.is_none()); tracing::trace!("move_to(key={:?} cmp={:?})", key, cmp); @@ -1208,6 +1310,10 @@ impl BTreeCursor { let iter_dir = cmp.iteration_direction(); + if let SeekKey::TableRowId(rowid) = key { + return self.tablebtree_move_to_binsearch(rowid, cmp, iter_dir); + } + loop { let page = self.stack.top(); return_if_locked!(page); From 12e689b9fc1096279a63d3317aa2abbd564361df Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Thu, 17 Apr 2025 09:45:18 +0300 Subject: [PATCH 300/425] btree: use binary search on table leaf pages too --- core/storage/btree.rs | 160 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 156 insertions(+), 4 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 0777bc951..92e7ddd7d 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -976,6 +976,9 @@ impl BTreeCursor { /// We don't include the rowid in the comparison and that's why the last value from the record is not included. fn do_seek(&mut self, key: SeekKey<'_>, op: SeekOp) -> Result>> { let cell_iter_dir = op.iteration_direction(); + if let SeekKey::TableRowId(rowid) = key { + return self.tablebtree_seek(rowid, op, cell_iter_dir); + } return_if_io!(self.move_to(key.clone(), op.clone())); { @@ -1280,6 +1283,159 @@ impl BTreeCursor { } } + /// Specialized version of do_seek() for table btrees that uses binary search instead + /// of iterating cells in order. + fn tablebtree_seek( + &mut self, + rowid: u64, + seek_op: SeekOp, + iter_dir: IterationDirection, + ) -> Result>> { + assert!(self.mv_cursor.is_none()); + self.move_to_root(); + return_if_io!(self.tablebtree_move_to_binsearch(rowid, seek_op, iter_dir)); + let page = self.stack.top(); + return_if_locked!(page); + let contents = page.get().contents.as_ref().unwrap(); + assert!( + contents.is_leaf(), + "tablebtree_seek_binsearch() called on non-leaf page" + ); + + let cell_count = contents.cell_count(); + let mut min: isize = 0; + let mut max: isize = cell_count as isize - 1; + + // If iter dir is forwards, we want the first cell that matches; + // If iter dir is backwards, we want the last cell that matches. + let mut nearest_matching_cell = None; + loop { + if min > max { + let Some(nearest_matching_cell) = nearest_matching_cell else { + return Ok(CursorResult::Ok(None)); + }; + self.stack.set_cell_index(nearest_matching_cell as i32); + let matching_cell = contents.cell_get( + nearest_matching_cell, + payload_overflow_threshold_max( + contents.page_type(), + self.usable_space() as u16, + ), + payload_overflow_threshold_min( + contents.page_type(), + self.usable_space() as u16, + ), + self.usable_space(), + )?; + let BTreeCell::TableLeafCell(TableLeafCell { + _rowid: cell_rowid, + _payload, + first_overflow_page, + payload_size, + .. + }) = matching_cell + else { + unreachable!("unexpected cell type: {:?}", matching_cell); + }; + + return_if_io!(self.read_record_w_possible_overflow( + _payload, + first_overflow_page, + payload_size + )); + self.stack.next_cell_in_direction(iter_dir); + + return Ok(CursorResult::Ok(Some(cell_rowid))); + } + + let cur_cell_idx = (min + max) / 2; + self.stack.set_cell_index(cur_cell_idx as i32); + let cur_cell = contents.cell_get( + cur_cell_idx as usize, + payload_overflow_threshold_max(contents.page_type(), self.usable_space() as u16), + payload_overflow_threshold_min(contents.page_type(), self.usable_space() as u16), + self.usable_space(), + )?; + + let BTreeCell::TableLeafCell(TableLeafCell { + _rowid: cell_rowid, + _payload, + first_overflow_page, + payload_size, + .. + }) = cur_cell + else { + unreachable!("unexpected cell type: {:?}", cur_cell); + }; + + let cmp = cell_rowid.cmp(&rowid); + + let found = match seek_op { + SeekOp::GT => cmp.is_gt(), + SeekOp::GE => cmp.is_ge(), + SeekOp::EQ => cmp.is_eq(), + SeekOp::LE => cmp.is_le(), + SeekOp::LT => cmp.is_lt(), + }; + + // rowids are unique, so we can return the rowid immediately + if found && SeekOp::EQ == seek_op { + return_if_io!(self.read_record_w_possible_overflow( + _payload, + first_overflow_page, + payload_size + )); + self.stack.next_cell_in_direction(iter_dir); + return Ok(CursorResult::Ok(Some(cell_rowid))); + } + + if found { + match iter_dir { + IterationDirection::Forwards => { + nearest_matching_cell = Some(cur_cell_idx as usize); + max = cur_cell_idx - 1; + } + IterationDirection::Backwards => { + nearest_matching_cell = Some(cur_cell_idx as usize); + min = cur_cell_idx + 1; + } + } + } else { + if cmp.is_gt() { + max = cur_cell_idx - 1; + } else if cmp.is_lt() { + min = cur_cell_idx + 1; + } else { + match iter_dir { + IterationDirection::Forwards => { + min = cur_cell_idx + 1; + } + IterationDirection::Backwards => { + max = cur_cell_idx - 1; + } + } + } + } + } + } + + fn read_record_w_possible_overflow( + &mut self, + payload: &'static [u8], + next_page: Option, + payload_size: u64, + ) -> Result> { + if let Some(next_page) = next_page { + self.process_overflow_read(payload, next_page, payload_size) + } else { + crate::storage::sqlite3_ondisk::read_record( + payload, + self.get_immutable_record_or_create().as_mut().unwrap(), + )?; + Ok(CursorResult::Ok(())) + } + } + pub fn move_to(&mut self, key: SeekKey<'_>, cmp: SeekOp) -> Result> { assert!(self.mv_cursor.is_none()); tracing::trace!("move_to(key={:?} cmp={:?})", key, cmp); @@ -1310,10 +1466,6 @@ impl BTreeCursor { let iter_dir = cmp.iteration_direction(); - if let SeekKey::TableRowId(rowid) = key { - return self.tablebtree_move_to_binsearch(rowid, cmp, iter_dir); - } - loop { let page = self.stack.top(); return_if_locked!(page); From 0974ba6e711667ee6aaf2ab2e9c34c7340146a14 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Thu, 17 Apr 2025 14:10:31 +0300 Subject: [PATCH 301/425] default to using tablebtree_move_to in all calls to move_to with rowids --- core/storage/btree.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 92e7ddd7d..c2c850f09 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1463,8 +1463,10 @@ impl BTreeCursor { // This cell contains the actual data we are looking for. // 6. If we find the cell, we return the record. Otherwise, we return an empty result. self.move_to_root(); - let iter_dir = cmp.iteration_direction(); + if let SeekKey::TableRowId(rowid_key) = key { + return self.tablebtree_move_to_binsearch(rowid_key, cmp, iter_dir); + } loop { let page = self.stack.top(); From 3dab59201d87777a2f5b381d67c002cdd1b48f07 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Thu, 17 Apr 2025 14:23:25 +0300 Subject: [PATCH 302/425] Separate both table&index move_to impls into different funcs --- core/storage/btree.rs | 369 +++++++++++++++++++----------------------- 1 file changed, 168 insertions(+), 201 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index c2c850f09..a454da948 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1181,10 +1181,7 @@ impl BTreeCursor { } } - /// Specialized version of move_to() for table btrees that uses binary search instead - /// of iterating cells in order. - /// The only reason this is specialized for rowids is that Jussi didn't have the energy to implement - /// it for index btrees yet lol. + /// Specialized version of move_to() for table btrees. fn tablebtree_move_to_binsearch( &mut self, rowid: u64, @@ -1219,6 +1216,14 @@ impl BTreeCursor { ), self.usable_space(), )?; + // If we found our target rowid in the left subtree, + // we need to move the parent cell pointer forwards or backwards depending on the iteration direction. + // For example: since the internal node contains the max rowid of the left subtree, we need to move the + // parent pointer backwards in backwards iteration so that we don't come back to the parent again. + // E.g. + // this parent: rowid 666 + // left child has: 664,665,666 + // we need to move to the previous parent (with e.g. rowid 663) when iterating backwards. self.stack.next_cell_in_direction(iter_dir); let BTreeCell::TableInteriorCell(TableInteriorCell { _left_child_page, @@ -1263,6 +1268,29 @@ impl BTreeCursor { _left_child_page, _rowid: cell_rowid, }) => { + // in sqlite btrees left child pages have <= keys. + // table btrees can have a duplicate rowid in the interior cell, so for example if we are looking for rowid=10, + // and we find an interior cell with rowid=10, we need to move to the left page since (due to the <= rule of sqlite btrees) + // the left page may have a rowid=10. + // Logic table for determining if target leaf page is in left subtree + // + // Forwards iteration (looking for first match in tree): + // OP | Current Cell vs Seek Key | Action? | Explanation + // GT | > | go left | First > key is in left subtree + // GT | = or < | go right | First > key is in right subtree + // GE | > or = | go left | First >= key is in left subtree + // GE | < | go right | First >= key is in right subtree + // + // Backwards iteration (looking for last match in tree): + // OP | Current Cell vs Seek Key | Action? | Explanation + // LE | > or = | go left | Last <= key is in left subtree + // LE | < | go right | Last <= key is in right subtree + // LT | > or = | go left | Last < key is in left subtree + // LT | < | go right?| Last < key is in right subtree, except if cell rowid is exactly 1 less + // + // No iteration (point query): + // EQ | > or = | go left | Last = key is in left subtree + // EQ | < | go right | Last = key is in right subtree let is_on_left = match seek_op { SeekOp::GT => *cell_rowid > rowid, SeekOp::GE => *cell_rowid >= rowid, @@ -1283,6 +1311,136 @@ impl BTreeCursor { } } + /// Specialized version of move_to() for index btrees. + /// TODO: refactor this to use binary search instead of iterating cells in order. + fn indexbtree_move_to<'a>( + &mut self, + index_key: &'a ImmutableRecord, + cmp: SeekOp, + iter_dir: IterationDirection, + ) -> Result> { + loop { + let page = self.stack.top(); + return_if_locked!(page); + + let contents = page.get().contents.as_ref().unwrap(); + if contents.is_leaf() { + return Ok(CursorResult::Ok(())); + } + + let mut found_cell = false; + for cell_idx in 0..contents.cell_count() { + let cell = contents.cell_get( + cell_idx, + payload_overflow_threshold_max( + contents.page_type(), + self.usable_space() as u16, + ), + payload_overflow_threshold_min( + contents.page_type(), + self.usable_space() as u16, + ), + self.usable_space(), + )?; + let BTreeCell::IndexInteriorCell(IndexInteriorCell { + left_child_page, + payload, + first_overflow_page, + payload_size, + }) = &cell + else { + unreachable!("unexpected cell type: {:?}", cell); + }; + + if let Some(next_page) = first_overflow_page { + return_if_io!(self.process_overflow_read(payload, *next_page, *payload_size)) + } else { + crate::storage::sqlite3_ondisk::read_record( + payload, + self.get_immutable_record_or_create().as_mut().unwrap(), + )? + }; + let record = self.get_immutable_record(); + let record = record.as_ref().unwrap(); + let record_slice_equal_number_of_cols = + &record.get_values().as_slice()[..index_key.get_values().len()]; + let interior_cell_vs_index_key = compare_immutable( + record_slice_equal_number_of_cols, + index_key.get_values(), + self.index_key_sort_order, + ); + // in sqlite btrees left child pages have <= keys. + // in general, in forwards iteration we want to find the first key that matches the seek condition. + // in backwards iteration we want to find the last key that matches the seek condition. + // + // Logic table for determining if target leaf page is in left subtree. + // For index b-trees this is a bit more complicated since the interior cells contain payloads (the key is the payload). + // and for non-unique indexes there might be several cells with the same key. + // + // Forwards iteration (looking for first match in tree): + // OP | Current Cell vs Seek Key | Action? | Explanation + // GT | > | go left | First > key could be exactly this one, or in left subtree + // GT | = or < | go right | First > key must be in right subtree + // GE | > | go left | First >= key could be exactly this one, or in left subtree + // GE | = | go left | First >= key could be exactly this one, or in left subtree + // GE | < | go right | First >= key must be in right subtree + // + // Backwards iteration (looking for last match in tree): + // OP | Current Cell vs Seek Key | Action? | Explanation + // LE | > | go left | Last <= key must be in left subtree + // LE | = | go right | Last <= key is either this one, or somewhere to the right of this one. So we need to go right to make sure + // LE | < | go right | Last <= key must be in right subtree + // LT | > | go left | Last < key must be in left subtree + // LT | = | go left | Last < key must be in left subtree since we want strictly less than + // LT | < | go right | Last < key could be exactly this one, or in right subtree + // + // No iteration (point query): + // EQ | > | go left | First = key must be in left subtree + // EQ | = | go left | First = key could be exactly this one, or in left subtree + // EQ | < | go right | First = key must be in right subtree + + let target_leaf_page_is_in_left_subtree = match cmp { + SeekOp::GT => interior_cell_vs_index_key.is_gt(), + SeekOp::GE => interior_cell_vs_index_key.is_ge(), + SeekOp::EQ => interior_cell_vs_index_key.is_ge(), + SeekOp::LE => interior_cell_vs_index_key.is_gt(), + SeekOp::LT => interior_cell_vs_index_key.is_ge(), + }; + if target_leaf_page_is_in_left_subtree { + // we don't advance in case of forward iteration and index tree internal nodes because we will visit this node going up. + // in backwards iteration, we must retreat because otherwise we would unnecessarily visit this node again. + // Example: + // this parent: key 666, and we found the target key in the left child. + // left child has: key 663, key 664, key 665 + // we need to move to the previous parent (with e.g. key 662) when iterating backwards so that we don't end up back here again. + if iter_dir == IterationDirection::Backwards { + self.stack.retreat(); + } + let mem_page = self.pager.read_page(*left_child_page as usize)?; + self.stack.push(mem_page); + found_cell = true; + break; + } else { + self.stack.advance(); + } + } + + if !found_cell { + match contents.rightmost_pointer() { + Some(right_most_pointer) => { + self.stack.advance(); + let mem_page = self.pager.read_page(right_most_pointer as usize)?; + self.stack.push(mem_page); + continue; + } + None => { + unreachable!("we shall not go back up! The only way is down the slope"); + } + } + } + } + } + /// Specialized version of do_seek() for table btrees that uses binary search instead /// of iterating cells in order. fn tablebtree_seek( @@ -1463,205 +1621,14 @@ impl BTreeCursor { // This cell contains the actual data we are looking for. // 6. If we find the cell, we return the record. Otherwise, we return an empty result. self.move_to_root(); + let iter_dir = cmp.iteration_direction(); - if let SeekKey::TableRowId(rowid_key) = key { - return self.tablebtree_move_to_binsearch(rowid_key, cmp, iter_dir); - } - - loop { - let page = self.stack.top(); - return_if_locked!(page); - - let contents = page.get().contents.as_ref().unwrap(); - if contents.is_leaf() { - return Ok(CursorResult::Ok(())); + match key { + SeekKey::TableRowId(rowid_key) => { + return self.tablebtree_move_to_binsearch(rowid_key, cmp, iter_dir); } - - let mut found_cell = false; - for cell_idx in 0..contents.cell_count() { - let cell = contents.cell_get( - cell_idx, - payload_overflow_threshold_max( - contents.page_type(), - self.usable_space() as u16, - ), - payload_overflow_threshold_min( - contents.page_type(), - self.usable_space() as u16, - ), - self.usable_space(), - )?; - match &cell { - BTreeCell::TableInteriorCell(TableInteriorCell { - _left_child_page, - _rowid: cell_rowid, - }) => { - let SeekKey::TableRowId(rowid_key) = key else { - unreachable!("table seek key should be a rowid"); - }; - // in sqlite btrees left child pages have <= keys. - // table btrees can have a duplicate rowid in the interior cell, so for example if we are looking for rowid=10, - // and we find an interior cell with rowid=10, we need to move to the left page since (due to the <= rule of sqlite btrees) - // the left page may have a rowid=10. - // Logic table for determining if target leaf page is in left subtree - // - // Forwards iteration (looking for first match in tree): - // OP | Current Cell vs Seek Key | Action? | Explanation - // GT | > | go left | First > key is in left subtree - // GT | = or < | go right | First > key is in right subtree - // GE | > or = | go left | First >= key is in left subtree - // GE | < | go right | First >= key is in right subtree - // - // Backwards iteration (looking for last match in tree): - // OP | Current Cell vs Seek Key | Action? | Explanation - // LE | > or = | go left | Last <= key is in left subtree - // LE | < | go right | Last <= key is in right subtree - // LT | > or = | go left | Last < key is in left subtree - // LT | < | go right?| Last < key is in right subtree, except if cell rowid is exactly 1 less - // - // No iteration (point query): - // EQ | > or = | go left | Last = key is in left subtree - // EQ | < | go right | Last = key is in right subtree - let target_leaf_page_is_in_left_subtree = match cmp { - SeekOp::GT => *cell_rowid > rowid_key, - SeekOp::GE => *cell_rowid >= rowid_key, - SeekOp::LE => *cell_rowid >= rowid_key, - SeekOp::LT => *cell_rowid + 1 >= rowid_key, - SeekOp::EQ => *cell_rowid >= rowid_key, - }; - if target_leaf_page_is_in_left_subtree { - // If we found our target rowid in the left subtree, - // we need to move the parent cell pointer forwards or backwards depending on the iteration direction. - // For example: since the internal node contains the max rowid of the left subtree, we need to move the - // parent pointer backwards in backwards iteration so that we don't come back to the parent again. - // E.g. - // this parent: rowid 666 - // left child has: 664,665,666 - // we need to move to the previous parent (with e.g. rowid 663) when iterating backwards. - self.stack.next_cell_in_direction(iter_dir); - let mem_page = self.pager.read_page(*_left_child_page as usize)?; - self.stack.push(mem_page); - found_cell = true; - break; - } else { - self.stack.advance(); - } - } - BTreeCell::TableLeafCell(TableLeafCell { - _rowid: _, - _payload: _, - first_overflow_page: _, - .. - }) => { - unreachable!( - "we don't iterate leaf cells while trying to move to a leaf cell" - ); - } - BTreeCell::IndexInteriorCell(IndexInteriorCell { - left_child_page, - payload, - first_overflow_page, - payload_size, - }) => { - let SeekKey::IndexKey(index_key) = key else { - unreachable!("index seek key should be a record"); - }; - if let Some(next_page) = first_overflow_page { - return_if_io!(self.process_overflow_read( - payload, - *next_page, - *payload_size - )) - } else { - crate::storage::sqlite3_ondisk::read_record( - payload, - self.get_immutable_record_or_create().as_mut().unwrap(), - )? - }; - let record = self.get_immutable_record(); - let record = record.as_ref().unwrap(); - let record_slice_equal_number_of_cols = - &record.get_values().as_slice()[..index_key.get_values().len()]; - let interior_cell_vs_index_key = compare_immutable( - record_slice_equal_number_of_cols, - index_key.get_values(), - self.index_key_sort_order, - ); - // in sqlite btrees left child pages have <= keys. - // in general, in forwards iteration we want to find the first key that matches the seek condition. - // in backwards iteration we want to find the last key that matches the seek condition. - // - // Logic table for determining if target leaf page is in left subtree. - // For index b-trees this is a bit more complicated since the interior cells contain payloads (the key is the payload). - // and for non-unique indexes there might be several cells with the same key. - // - // Forwards iteration (looking for first match in tree): - // OP | Current Cell vs Seek Key | Action? | Explanation - // GT | > | go left | First > key could be exactly this one, or in left subtree - // GT | = or < | go right | First > key must be in right subtree - // GE | > | go left | First >= key could be exactly this one, or in left subtree - // GE | = | go left | First >= key could be exactly this one, or in left subtree - // GE | < | go right | First >= key must be in right subtree - // - // Backwards iteration (looking for last match in tree): - // OP | Current Cell vs Seek Key | Action? | Explanation - // LE | > | go left | Last <= key must be in left subtree - // LE | = | go right | Last <= key is either this one, or somewhere to the right of this one. So we need to go right to make sure - // LE | < | go right | Last <= key must be in right subtree - // LT | > | go left | Last < key must be in left subtree - // LT | = | go left | Last < key must be in left subtree since we want strictly less than - // LT | < | go right | Last < key could be exactly this one, or in right subtree - // - // No iteration (point query): - // EQ | > | go left | First = key must be in left subtree - // EQ | = | go left | First = key could be exactly this one, or in left subtree - // EQ | < | go right | First = key must be in right subtree - - let target_leaf_page_is_in_left_subtree = match cmp { - SeekOp::GT => interior_cell_vs_index_key.is_gt(), - SeekOp::GE => interior_cell_vs_index_key.is_ge(), - SeekOp::EQ => interior_cell_vs_index_key.is_ge(), - SeekOp::LE => interior_cell_vs_index_key.is_gt(), - SeekOp::LT => interior_cell_vs_index_key.is_ge(), - }; - if target_leaf_page_is_in_left_subtree { - // we don't advance in case of forward iteration and index tree internal nodes because we will visit this node going up. - // in backwards iteration, we must retreat because otherwise we would unnecessarily visit this node again. - // Example: - // this parent: key 666, and we found the target key in the left child. - // left child has: key 663, key 664, key 665 - // we need to move to the previous parent (with e.g. key 662) when iterating backwards so that we don't end up back here again. - if iter_dir == IterationDirection::Backwards { - self.stack.retreat(); - } - let mem_page = self.pager.read_page(*left_child_page as usize)?; - self.stack.push(mem_page); - found_cell = true; - break; - } else { - self.stack.advance(); - } - } - BTreeCell::IndexLeafCell(_) => { - unreachable!( - "we don't iterate leaf cells while trying to move to a leaf cell" - ); - } - } - } - - if !found_cell { - match contents.rightmost_pointer() { - Some(right_most_pointer) => { - self.stack.advance(); - let mem_page = self.pager.read_page(right_most_pointer as usize)?; - self.stack.push(mem_page); - continue; - } - None => { - unreachable!("we shall not go back up! The only way is down the slope"); - } - } + SeekKey::IndexKey(index_key) => { + return self.indexbtree_move_to(index_key, cmp, iter_dir); } } } From 017cdb9568b8f136059b6bc110e99b234a409441 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Thu, 17 Apr 2025 10:43:02 +0300 Subject: [PATCH 303/425] btree: avoid reading entire cell when only rowid needed --- core/storage/btree.rs | 132 +++++++++++++++------------------ core/storage/sqlite3_ondisk.rs | 31 ++++++++ 2 files changed, 90 insertions(+), 73 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index a454da948..6ecf6ec97 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1250,62 +1250,42 @@ impl BTreeCursor { } let cur_cell_idx = (min + max) / 2; self.stack.set_cell_index(cur_cell_idx as i32); - let cur_cell = contents.cell_get( - cur_cell_idx as usize, - payload_overflow_threshold_max( - contents.page_type(), - self.usable_space() as u16, - ), - payload_overflow_threshold_min( - contents.page_type(), - self.usable_space() as u16, - ), - self.usable_space(), - )?; - - match &cur_cell { - BTreeCell::TableInteriorCell(TableInteriorCell { - _left_child_page, - _rowid: cell_rowid, - }) => { - // in sqlite btrees left child pages have <= keys. - // table btrees can have a duplicate rowid in the interior cell, so for example if we are looking for rowid=10, - // and we find an interior cell with rowid=10, we need to move to the left page since (due to the <= rule of sqlite btrees) - // the left page may have a rowid=10. - // Logic table for determining if target leaf page is in left subtree - // - // Forwards iteration (looking for first match in tree): - // OP | Current Cell vs Seek Key | Action? | Explanation - // GT | > | go left | First > key is in left subtree - // GT | = or < | go right | First > key is in right subtree - // GE | > or = | go left | First >= key is in left subtree - // GE | < | go right | First >= key is in right subtree - // - // Backwards iteration (looking for last match in tree): - // OP | Current Cell vs Seek Key | Action? | Explanation - // LE | > or = | go left | Last <= key is in left subtree - // LE | < | go right | Last <= key is in right subtree - // LT | > or = | go left | Last < key is in left subtree - // LT | < | go right?| Last < key is in right subtree, except if cell rowid is exactly 1 less - // - // No iteration (point query): - // EQ | > or = | go left | Last = key is in left subtree - // EQ | < | go right | Last = key is in right subtree - let is_on_left = match seek_op { - SeekOp::GT => *cell_rowid > rowid, - SeekOp::GE => *cell_rowid >= rowid, - SeekOp::LE => *cell_rowid >= rowid, - SeekOp::LT => *cell_rowid + 1 >= rowid, - SeekOp::EQ => *cell_rowid >= rowid, - }; - if is_on_left { - leftmost_matching_cell = Some(cur_cell_idx as usize); - max = cur_cell_idx - 1; - } else { - min = cur_cell_idx + 1; - } - } - _ => unreachable!("unexpected cell type: {:?}", cur_cell), + let cell_rowid = contents.cell_table_interior_read_rowid(cur_cell_idx as usize)?; + // in sqlite btrees left child pages have <= keys. + // table btrees can have a duplicate rowid in the interior cell, so for example if we are looking for rowid=10, + // and we find an interior cell with rowid=10, we need to move to the left page since (due to the <= rule of sqlite btrees) + // the left page may have a rowid=10. + // Logic table for determining if target leaf page is in left subtree + // + // Forwards iteration (looking for first match in tree): + // OP | Current Cell vs Seek Key | Action? | Explanation + // GT | > | go left | First > key is in left subtree + // GT | = or < | go right | First > key is in right subtree + // GE | > or = | go left | First >= key is in left subtree + // GE | < | go right | First >= key is in right subtree + // + // Backwards iteration (looking for last match in tree): + // OP | Current Cell vs Seek Key | Action? | Explanation + // LE | > or = | go left | Last <= key is in left subtree + // LE | < | go right | Last <= key is in right subtree + // LT | > or = | go left | Last < key is in left subtree + // LT | < | go right?| Last < key is in right subtree, except if cell rowid is exactly 1 less + // + // No iteration (point query): + // EQ | > or = | go left | Last = key is in left subtree + // EQ | < | go right | Last = key is in right subtree + let is_on_left = match seek_op { + SeekOp::GT => cell_rowid > rowid, + SeekOp::GE => cell_rowid >= rowid, + SeekOp::LE => cell_rowid >= rowid, + SeekOp::LT => cell_rowid + 1 >= rowid, + SeekOp::EQ => cell_rowid >= rowid, + }; + if is_on_left { + leftmost_matching_cell = Some(cur_cell_idx as usize); + max = cur_cell_idx - 1; + } else { + min = cur_cell_idx + 1; } } } @@ -1508,23 +1488,7 @@ impl BTreeCursor { let cur_cell_idx = (min + max) / 2; self.stack.set_cell_index(cur_cell_idx as i32); - let cur_cell = contents.cell_get( - cur_cell_idx as usize, - payload_overflow_threshold_max(contents.page_type(), self.usable_space() as u16), - payload_overflow_threshold_min(contents.page_type(), self.usable_space() as u16), - self.usable_space(), - )?; - - let BTreeCell::TableLeafCell(TableLeafCell { - _rowid: cell_rowid, - _payload, - first_overflow_page, - payload_size, - .. - }) = cur_cell - else { - unreachable!("unexpected cell type: {:?}", cur_cell); - }; + let cell_rowid = contents.cell_table_leaf_read_rowid(cur_cell_idx as usize)?; let cmp = cell_rowid.cmp(&rowid); @@ -1538,6 +1502,28 @@ impl BTreeCursor { // rowids are unique, so we can return the rowid immediately if found && SeekOp::EQ == seek_op { + let cur_cell = contents.cell_get( + cur_cell_idx as usize, + payload_overflow_threshold_max( + contents.page_type(), + self.usable_space() as u16, + ), + payload_overflow_threshold_min( + contents.page_type(), + self.usable_space() as u16, + ), + self.usable_space(), + )?; + let BTreeCell::TableLeafCell(TableLeafCell { + _rowid: _, + _payload, + first_overflow_page, + payload_size, + .. + }) = cur_cell + else { + unreachable!("unexpected cell type: {:?}", cur_cell); + }; return_if_io!(self.read_record_w_possible_overflow( _payload, first_overflow_page, diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index 5f742887e..fccf233b5 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -598,6 +598,37 @@ impl PageContent { usable_size, ) } + + /// Read the rowid of a table interior cell. + #[inline(always)] + pub fn cell_table_interior_read_rowid(&self, idx: usize) -> Result { + assert!(self.page_type() == PageType::TableInterior); + let buf = self.as_ptr(); + const INTERIOR_PAGE_HEADER_SIZE_BYTES: usize = 12; + let cell_pointer_array_start = INTERIOR_PAGE_HEADER_SIZE_BYTES; + let cell_pointer = cell_pointer_array_start + (idx * 2); + let cell_pointer = self.read_u16(cell_pointer) as usize; + const LEFT_CHILD_PAGE_SIZE_BYTES: usize = 4; + let (rowid, _) = read_varint(&buf[cell_pointer + LEFT_CHILD_PAGE_SIZE_BYTES..])?; + Ok(rowid) + } + + /// Read the rowid of a table leaf cell. + #[inline(always)] + pub fn cell_table_leaf_read_rowid(&self, idx: usize) -> Result { + assert!(self.page_type() == PageType::TableLeaf); + let buf = self.as_ptr(); + const LEAF_PAGE_HEADER_SIZE_BYTES: usize = 8; + let cell_pointer_array_start = LEAF_PAGE_HEADER_SIZE_BYTES; + let cell_pointer = cell_pointer_array_start + (idx * 2); + let cell_pointer = self.read_u16(cell_pointer) as usize; + let mut pos = cell_pointer; + let (_, nr) = read_varint(&buf[pos..])?; + pos += nr; + let (rowid, _) = read_varint(&buf[pos..])?; + Ok(rowid) + } + /// The cell pointer array of a b-tree page immediately follows the b-tree page header. /// Let K be the number of cells on the btree. /// The cell pointer array consists of K 2-byte integer offsets to the cell contents. From 83c509a61311a97a5244898ac173e0c37f5b98bf Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sat, 19 Apr 2025 13:47:17 +0300 Subject: [PATCH 304/425] Fix bug: left join null flag not being cleared In left joins, even if the join condition is not matched, the system must emit a row for every row of the outer table: -- this must return t1.count() rows, with NULLs for all columns of t2 SELECT * FROM t1 LEFT JOIN t2 ON FALSE; Our logic for clearing the null flag was to do it in Next/Prev. However, this is problematic for a few reasons: - If the inner table of the left join is using SeekRowid, then Next/Prev is never called on its cursor, so the null flag doesn't get cleared. - If the inner table of the left join is using a non-covering index seek, i.e. it iterates its rows using an index, but seeks to the main table to fetch data, then Next/Prev is never called on the main table, and the main table's null flag doesn't get cleared. What this results in is NULL values incorrectly being emitted for the inner table after the first correct NULL row, since the null flag is correctly set to true, but never cleared. This PR fixes the issue by clearing the null flag whenever seek() is invoked on the cursor. Hence, the null flag is now cleared on: - next() - prev() - seek() --- core/storage/btree.rs | 9 +++++++++ core/translate/main_loop.rs | 27 ++++++++++++++------------- testing/join.test | 12 +++++++++++- 3 files changed, 34 insertions(+), 14 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index a454da948..6d28625ad 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -3416,6 +3416,10 @@ impl BTreeCursor { pub fn seek(&mut self, key: SeekKey<'_>, op: SeekOp) -> Result> { assert!(self.mv_cursor.is_none()); + // We need to clear the null flag for the table cursor before seeking, + // because it might have been set to false by an unmatched left-join row during the previous iteration + // on the outer loop. + self.set_null_flag(false); let rowid = return_if_io!(self.do_seek(key, op)); self.rowid.replace(rowid); self.empty_record.replace(rowid.is_none()); @@ -3789,10 +3793,15 @@ impl BTreeCursor { } } + /// In outer joins, whenever the right-side table has no matching row, the query must still return a row + /// for each left-side row. In order to achieve this, we set the null flag on the right-side table cursor + /// so that it returns NULL for all columns until cleared. + #[inline(always)] pub fn set_null_flag(&mut self, flag: bool) { self.null_flag = flag; } + #[inline(always)] pub fn get_null_flag(&self) -> bool { self.null_flag } diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 74da26438..7354eb4a1 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -853,24 +853,26 @@ pub fn close_loop( // If the left join match flag has been set to 1, we jump to the next row on the outer table, // i.e. continue to the next row of t1 in our example. program.resolve_label(lj_meta.label_match_flag_check_value, program.offset()); - let jump_offset = program.offset().add(3u32); + let label_when_right_table_notnull = program.allocate_label(); program.emit_insn(Insn::IfPos { reg: lj_meta.reg_match_flag, - target_pc: jump_offset, + target_pc: label_when_right_table_notnull, decrement_by: 0, }); // If the left join match flag is still 0, it means there was no match on the right table, // but since it's a LEFT JOIN, we still need to emit a row with NULLs for the right table. // In that case, we now enter the routine that does exactly that. - // First we set the right table cursor's "pseudo null bit" on, which means any Insn::Column will return NULL - let right_cursor_id = match &table.op { - Operation::Scan { .. } => program.resolve_cursor_id(&table.identifier), - Operation::Search { .. } => program.resolve_cursor_id(&table.identifier), - _ => unreachable!(), - }; - program.emit_insn(Insn::NullRow { - cursor_id: right_cursor_id, - }); + // First we set the right table cursor's "pseudo null bit" on, which means any Insn::Column will return NULL. + // This needs to be set for both the table and the index cursor, if present, + // since even if the iteration cursor is the index cursor, it might fetch values from the table cursor. + [table_cursor_id, index_cursor_id] + .iter() + .filter_map(|maybe_cursor_id| maybe_cursor_id.as_ref()) + .for_each(|cursor_id| { + program.emit_insn(Insn::NullRow { + cursor_id: *cursor_id, + }); + }); // Then we jump to setting the left join match flag to 1 again, // but this time the right table cursor will set everything to null. // This leads to emitting a row with cols from the left + nulls from the right, @@ -880,8 +882,7 @@ pub fn close_loop( program.emit_insn(Insn::Goto { target_pc: lj_meta.label_match_flag_set_true, }); - - assert_eq!(program.offset(), jump_offset); + program.resolve_label(label_when_right_table_notnull, program.offset()); } } } diff --git a/testing/join.test b/testing/join.test index 64b3dcbd3..1f5eb0f1f 100755 --- a/testing/join.test +++ b/testing/join.test @@ -272,4 +272,14 @@ do_execsql_test natural-join-and-using-join { select u.id, u2.id, p.id from users u natural join products p join users u2 using (first_name) limit 3; } {"1|1|1 1|1204|1 -1|1261|1"} \ No newline at end of file +1|1261|1"} + +# regression test for a backwards iteration left join case, +# where the null flag of the right table was not cleared after a previous unmatched row. +do_execsql_test left-join-backwards-iteration { + select users.id, users.first_name as user_name, products.name as product_name + from users left join products on users.id = products.id + where users.id < 13 order by users.id desc limit 3; +} {12|Alan| +11|Travis|accessories +10|Daniel|coat} \ No newline at end of file From b550fbb3e4794be681655c7cb1acb6df47efe882 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Sun, 20 Apr 2025 17:03:43 -0300 Subject: [PATCH 305/425] Only initialize Rustyline if we are in a tty --- Cargo.lock | 6 +++-- cli/Cargo.toml | 2 ++ cli/app.rs | 59 ++++++++++++++++++++++++++++++++++++++++---------- cli/main.rs | 37 +++++++++++++++++++++++-------- 4 files changed, 82 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 810b9983e..a463c03b3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1575,9 +1575,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" -version = "0.2.171" +version = "0.2.172" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6" +checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" [[package]] name = "libgit2-sys" @@ -1702,8 +1702,10 @@ dependencies = [ "ctrlc", "dirs 5.0.1", "env_logger 0.10.2", + "libc", "limbo_core", "miette", + "nix 0.29.0", "nu-ansi-term 0.50.1", "rustyline", "shlex", diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 2f1625420..65f39ad15 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -28,10 +28,12 @@ csv = "1.3.1" ctrlc = "3.4.4" dirs = "5.0.1" env_logger = "0.10.1" +libc = "0.2.172" limbo_core = { path = "../core", default-features = true, features = [ "completion", ] } miette = { version = "7.4.0", features = ["fancy"] } +nix = "0.29.0" nu-ansi-term = "0.50.1" rustyline = { version = "15.0.0", default-features = true, features = [ "derive", diff --git a/cli/app.rs b/cli/app.rs index bb9515660..16c464b41 100644 --- a/cli/app.rs +++ b/cli/app.rs @@ -7,6 +7,7 @@ use crate::{ helper::LimboHelper, input::{get_io, get_writer, DbLocation, OutputMode, Settings}, opcodes_dictionary::OPCODE_DESCRIPTIONS, + HISTORY_FILE, }; use comfy_table::{Attribute, Cell, CellAlignment, Color, ContentArrangement, Row, Table}; use limbo_core::{Database, LimboError, OwnedValue, Statement, StepResult}; @@ -14,10 +15,10 @@ use tracing_appender::non_blocking::WorkerGuard; use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter}; use clap::Parser; -use rustyline::{history::DefaultHistory, Editor}; +use rustyline::{error::ReadlineError, history::DefaultHistory, Editor}; use std::{ fmt, - io::{self, Write}, + io::{self, BufRead as _, Write}, path::PathBuf, rc::Rc, sync::{ @@ -62,7 +63,7 @@ pub struct Opts { const PROMPT: &str = "limbo> "; -pub struct Limbo<'a> { +pub struct Limbo { pub prompt: String, io: Arc, writer: Box, @@ -70,7 +71,7 @@ pub struct Limbo<'a> { pub interrupt_count: Arc, input_buff: String, opts: Settings, - pub rl: &'a mut Editor, + pub rl: Option>, } struct QueryStatistics { @@ -105,8 +106,8 @@ macro_rules! query_internal { static COLORS: &[Color] = &[Color::Green, Color::Black, Color::Grey]; -impl<'a> Limbo<'a> { - pub fn new(rl: &'a mut rustyline::Editor) -> anyhow::Result { +impl Limbo { + pub fn new() -> anyhow::Result { let opts = Opts::parse(); let db_file = opts .database @@ -133,8 +134,6 @@ impl<'a> Limbo<'a> { ) }; let conn = db.connect()?; - let h = LimboHelper::new(conn.clone(), io.clone()); - rl.set_helper(Some(h)); let interrupt_count = Arc::new(AtomicUsize::new(0)); { let interrupt_count: Arc = Arc::clone(&interrupt_count); @@ -154,12 +153,19 @@ impl<'a> Limbo<'a> { interrupt_count, input_buff: String::new(), opts: Settings::from(opts), - rl, + rl: None, }; app.first_run(sql, quiet)?; Ok(app) } + pub fn with_readline(mut self, mut rl: Editor) -> Self { + let h = LimboHelper::new(self.conn.clone(), self.io.clone()); + rl.set_helper(Some(h)); + self.rl = Some(rl); + self + } + fn first_run(&mut self, sql: Option, quiet: bool) -> io::Result<()> { if let Some(sql) = sql { self.handle_first_input(&sql); @@ -470,8 +476,9 @@ impl<'a> Limbo<'a> { } } - fn reset_line(&mut self, line: &str) -> rustyline::Result<()> { - self.rl.add_history_entry(line.to_owned())?; + fn reset_line(&mut self, _line: &str) -> rustyline::Result<()> { + // Entry is auto added to history + // self.rl.add_history_entry(line.to_owned())?; self.interrupt_count.store(0, Ordering::SeqCst); Ok(()) } @@ -973,4 +980,34 @@ impl<'a> Limbo<'a> { self.run_query(buff.as_str()); self.reset_input(); } + + pub fn readline(&mut self) -> Result { + if let Some(rl) = &mut self.rl { + Ok(rl.readline(&self.prompt)?) + } else { + let mut input = String::new(); + println!(""); + let mut reader = std::io::stdin().lock(); + if reader.read_line(&mut input)? == 0 { + return Err(ReadlineError::Eof.into()); + } + // Remove trailing newline + if input.ends_with('\n') { + input.pop(); + if input.ends_with('\r') { + input.pop(); + } + } + + Ok(input) + } + } +} + +impl Drop for Limbo { + fn drop(&mut self) { + if let Some(rl) = &mut self.rl { + let _ = rl.save_history(HISTORY_FILE.as_path()); + } + } } diff --git a/cli/main.rs b/cli/main.rs index ec81b64af..f0fe4c934 100644 --- a/cli/main.rs +++ b/cli/main.rs @@ -5,26 +5,41 @@ mod helper; mod input; mod opcodes_dictionary; +use nix::unistd::isatty; use rustyline::{error::ReadlineError, Config, Editor}; -use std::sync::atomic::Ordering; +use std::{ + path::PathBuf, + sync::{atomic::Ordering, LazyLock}, +}; fn rustyline_config() -> Config { Config::builder() .completion_type(rustyline::CompletionType::List) + .auto_add_history(true) .build() } +pub static HOME_DIR: LazyLock = + LazyLock::new(|| dirs::home_dir().expect("Could not determine home directory")); + +pub static HISTORY_FILE: LazyLock = LazyLock::new(|| HOME_DIR.join(".limbo_history")); + fn main() -> anyhow::Result<()> { - let mut rl = Editor::with_config(rustyline_config())?; - let mut app = app::Limbo::new(&mut rl)?; + let mut app = app::Limbo::new()?; let _guard = app.init_tracing()?; - let home = dirs::home_dir().expect("Could not determine home directory"); - let history_file = home.join(".limbo_history"); - if history_file.exists() { - app.rl.load_history(history_file.as_path())?; + + if is_a_tty() { + let mut rl = Editor::with_config(rustyline_config())?; + if HISTORY_FILE.exists() { + rl.load_history(HISTORY_FILE.as_path())?; + } + app = app.with_readline(rl); + } else { + tracing::debug!("not in tty"); } + loop { - let readline = app.rl.readline(&app.prompt); + let readline = app.readline(); match readline { Ok(line) => match app.handle_input_line(line.trim()) { Ok(_) => {} @@ -54,6 +69,10 @@ fn main() -> anyhow::Result<()> { } } } - rl.save_history(history_file.as_path())?; Ok(()) } + +/// Return whether or not STDIN is a TTY +fn is_a_tty() -> bool { + isatty(libc::STDIN_FILENO).unwrap_or(false) +} From 7aaffff45fd0c847865982737664c85a83bf60c7 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Sun, 20 Apr 2025 18:11:45 -0300 Subject: [PATCH 306/425] Correct for Windows --- Cargo.lock | 1 + cli/Cargo.toml | 9 +++++++- cli/main.rs | 56 +++++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 64 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a463c03b3..c5563a1ff 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1713,6 +1713,7 @@ dependencies = [ "tracing", "tracing-appender", "tracing-subscriber", + "windows-sys 0.59.0", ] [[package]] diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 65f39ad15..2dbcb5f79 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -33,7 +33,6 @@ limbo_core = { path = "../core", default-features = true, features = [ "completion", ] } miette = { version = "7.4.0", features = ["fancy"] } -nix = "0.29.0" nu-ansi-term = "0.50.1" rustyline = { version = "15.0.0", default-features = true, features = [ "derive", @@ -44,6 +43,14 @@ tracing = "0.1.41" tracing-appender = "0.2.3" tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } +[target.'cfg(target_family = "unix")'.dependencies] +nix = "0.29.0" + +[target.'cfg(windows)'.dependencies] +windows-sys = { version = "0.59.0", features = [ + "Win32_Foundation", + "Win32_System_Console", +] } [features] default = ["io_uring"] diff --git a/cli/main.rs b/cli/main.rs index f0fe4c934..de77f2d6d 100644 --- a/cli/main.rs +++ b/cli/main.rs @@ -5,6 +5,7 @@ mod helper; mod input; mod opcodes_dictionary; +#[cfg(unix)] use nix::unistd::isatty; use rustyline::{error::ReadlineError, Config, Editor}; use std::{ @@ -74,5 +75,58 @@ fn main() -> anyhow::Result<()> { /// Return whether or not STDIN is a TTY fn is_a_tty() -> bool { - isatty(libc::STDIN_FILENO).unwrap_or(false) + #[cfg(unix)] + { + isatty(libc::STDIN_FILENO).unwrap_or(false) + } + #[cfg(windows)] + { + let handle = windows::get_std_handle(windows::console::STD_INPUT_HANDLE); + match handle { + Ok(handle) => { + // If this function doesn't fail then fd is a TTY + windows::get_console_mode(handle).is_ok() + } + Err(_) => false, + } + } +} + +// Code acquired from Rustyline +#[cfg(windows)] +mod windows { + use std::io; + use windows_sys::Win32::Foundation::{self as foundation, BOOL, FALSE, HANDLE}; + pub use windows_sys::Win32::System::Console as console; + + pub fn get_console_mode(handle: HANDLE) -> rustyline::Result { + let mut original_mode = 0; + check(unsafe { console::GetConsoleMode(handle, &mut original_mode) })?; + Ok(original_mode) + } + + pub fn get_std_handle(fd: console::STD_HANDLE) -> rustyline::Result { + let handle = unsafe { console::GetStdHandle(fd) }; + check_handle(handle) + } + + fn check_handle(handle: HANDLE) -> rustyline::Result { + if handle == foundation::INVALID_HANDLE_VALUE { + Err(io::Error::last_os_error())?; + } else if handle.is_null() { + Err(io::Error::new( + io::ErrorKind::Other, + "no stdio handle available for this process", + ))?; + } + Ok(handle) + } + + fn check(rc: BOOL) -> io::Result<()> { + if rc == FALSE { + Err(io::Error::last_os_error()) + } else { + Ok(()) + } + } } From 277f6f1083fc4a1aeab55e11cea3824ca0bf32cc Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Mon, 21 Apr 2025 01:03:31 -0300 Subject: [PATCH 307/425] Switch to using `std::io::IsTerminal::is_terminal` --- Cargo.lock | 2 -- cli/Cargo.toml | 9 -------- cli/main.rs | 62 +------------------------------------------------- 3 files changed, 1 insertion(+), 72 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c5563a1ff..ca334c084 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1705,7 +1705,6 @@ dependencies = [ "libc", "limbo_core", "miette", - "nix 0.29.0", "nu-ansi-term 0.50.1", "rustyline", "shlex", @@ -1713,7 +1712,6 @@ dependencies = [ "tracing", "tracing-appender", "tracing-subscriber", - "windows-sys 0.59.0", ] [[package]] diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 2dbcb5f79..253c08b45 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -43,15 +43,6 @@ tracing = "0.1.41" tracing-appender = "0.2.3" tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } -[target.'cfg(target_family = "unix")'.dependencies] -nix = "0.29.0" - -[target.'cfg(windows)'.dependencies] -windows-sys = { version = "0.59.0", features = [ - "Win32_Foundation", - "Win32_System_Console", -] } - [features] default = ["io_uring"] io_uring = ["limbo_core/io_uring"] diff --git a/cli/main.rs b/cli/main.rs index de77f2d6d..82eb64953 100644 --- a/cli/main.rs +++ b/cli/main.rs @@ -5,8 +5,6 @@ mod helper; mod input; mod opcodes_dictionary; -#[cfg(unix)] -use nix::unistd::isatty; use rustyline::{error::ReadlineError, Config, Editor}; use std::{ path::PathBuf, @@ -29,7 +27,7 @@ fn main() -> anyhow::Result<()> { let mut app = app::Limbo::new()?; let _guard = app.init_tracing()?; - if is_a_tty() { + if std::io::IsTerminal::is_terminal(&std::io::stdin()) { let mut rl = Editor::with_config(rustyline_config())?; if HISTORY_FILE.exists() { rl.load_history(HISTORY_FILE.as_path())?; @@ -72,61 +70,3 @@ fn main() -> anyhow::Result<()> { } Ok(()) } - -/// Return whether or not STDIN is a TTY -fn is_a_tty() -> bool { - #[cfg(unix)] - { - isatty(libc::STDIN_FILENO).unwrap_or(false) - } - #[cfg(windows)] - { - let handle = windows::get_std_handle(windows::console::STD_INPUT_HANDLE); - match handle { - Ok(handle) => { - // If this function doesn't fail then fd is a TTY - windows::get_console_mode(handle).is_ok() - } - Err(_) => false, - } - } -} - -// Code acquired from Rustyline -#[cfg(windows)] -mod windows { - use std::io; - use windows_sys::Win32::Foundation::{self as foundation, BOOL, FALSE, HANDLE}; - pub use windows_sys::Win32::System::Console as console; - - pub fn get_console_mode(handle: HANDLE) -> rustyline::Result { - let mut original_mode = 0; - check(unsafe { console::GetConsoleMode(handle, &mut original_mode) })?; - Ok(original_mode) - } - - pub fn get_std_handle(fd: console::STD_HANDLE) -> rustyline::Result { - let handle = unsafe { console::GetStdHandle(fd) }; - check_handle(handle) - } - - fn check_handle(handle: HANDLE) -> rustyline::Result { - if handle == foundation::INVALID_HANDLE_VALUE { - Err(io::Error::last_os_error())?; - } else if handle.is_null() { - Err(io::Error::new( - io::ErrorKind::Other, - "no stdio handle available for this process", - ))?; - } - Ok(handle) - } - - fn check(rc: BOOL) -> io::Result<()> { - if rc == FALSE { - Err(io::Error::last_os_error()) - } else { - Ok(()) - } - } -} From b945e9b2a06ef67c47d13ed93cd1b75bcb2324d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timo=20K=C3=B6sters?= Date: Mon, 21 Apr 2025 10:13:46 +0200 Subject: [PATCH 308/425] docs: add Rust to "Getting Started" section --- README.md | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index d7619dab9..cc72d1133 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ In the future, we will be also working on:
You can install the latest `limbo` release with: -```shell +```shell curl --proto '=https' --tlsv1.2 -LsSf \ https://github.com/tursodatabase/limbo/releases/latest/download/limbo_cli-installer.sh | sh ``` @@ -72,6 +72,24 @@ cargo run ``` +
+🦀 Rust +
+ +```console +cargo add limbo +``` + +Example usage: + +```rust +let db = Builder::new_local("sqlite.db").build().await?; +let conn = db.connect()?; + +let res = conn.query("SELECT * FROM users", ()).await?; +``` +
+
✨ JavaScript
@@ -144,7 +162,7 @@ defer stmt.Close() rows, _ = stmt.Query() for rows.Next() { - var id int + var id int var username string _ := rows.Scan(&id, &username) fmt.Printf("User: ID: %d, Username: %s\n", id, username) @@ -153,7 +171,7 @@ for rows.Next() {
- + ☕️ Java
From 1c827524735d53da95318707574f6a359652c63c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timo=20K=C3=B6sters?= Date: Mon, 21 Apr 2025 13:14:12 +0200 Subject: [PATCH 309/425] feat: Statement::columns function for Rust bindings --- bindings/rust/src/lib.rs | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/bindings/rust/src/lib.rs b/bindings/rust/src/lib.rs index 61e6271c9..8c57e7909 100644 --- a/bindings/rust/src/lib.rs +++ b/bindings/rust/src/lib.rs @@ -190,6 +190,39 @@ impl Statement { } } } + + pub fn columns(&self) -> Vec { + let stmt = self.inner.lock().unwrap(); + + let n = stmt.num_columns(); + + let mut cols = Vec::with_capacity(n); + + for i in 0..n { + let name = stmt.get_column_name(i).into_owned(); + cols.push(Column { + name, + decl_type: None, // TODO + }); + } + + cols + } +} + +pub struct Column { + name: String, + decl_type: Option, +} + +impl Column { + pub fn name(&self) -> &str { + &self.name + } + + pub fn decl_type(&self) -> Option<&str> { + self.decl_type.as_deref() + } } pub trait IntoValue { From d0da7307be40e0ff915cb5e9abea3dfc088c78d6 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Tue, 15 Apr 2025 20:33:57 +0300 Subject: [PATCH 310/425] Index: add new field ephemeral: bool --- core/schema.rs | 3 +++ core/translate/index.rs | 1 + 2 files changed, 4 insertions(+) diff --git a/core/schema.rs b/core/schema.rs index 0a5a8d80f..dd09671ab 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -692,6 +692,7 @@ pub struct Index { pub root_page: usize, pub columns: Vec, pub unique: bool, + pub ephemeral: bool, } #[allow(dead_code)] @@ -741,6 +742,7 @@ impl Index { root_page, columns: index_columns, unique, + ephemeral: false, }) } _ => todo!("Expected create index statement"), @@ -783,6 +785,7 @@ impl Index { root_page, columns: index_columns, unique: true, // Primary key indexes are always unique + ephemeral: false, }) } diff --git a/core/translate/index.rs b/core/translate/index.rs index de79aed23..55222e40f 100644 --- a/core/translate/index.rs +++ b/core/translate/index.rs @@ -62,6 +62,7 @@ pub fn translate_create_index( }) .collect(), unique: unique_if_not_exists.0, + ephemeral: false, }); // Allocate the necessary cursors: From 09ad6d8f0149020fe1a5c81832e021ad095782a5 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Wed, 16 Apr 2025 14:23:13 +0300 Subject: [PATCH 311/425] vdbe: resolve labels for Insn::Once --- core/vdbe/builder.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index 648044d1d..05fdc4938 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -363,6 +363,12 @@ impl ProgramBuilder { Insn::Next { pc_if_next, .. } => { resolve(pc_if_next, "Next"); } + Insn::Once { + target_pc_when_reentered, + .. + } => { + resolve(target_pc_when_reentered, "Once"); + } Insn::Prev { pc_if_prev, .. } => { resolve(pc_if_prev, "Prev"); } From c1b2dfc32b17fcd75f756b16a909dd4581b3c213 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sat, 19 Apr 2025 12:20:25 +0300 Subject: [PATCH 312/425] TableReference: add method column_is_used() --- core/translate/plan.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 07a8de392..46d4d38da 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -590,6 +590,10 @@ impl TableReference { }; self.index_is_covering(index.as_ref()) } + + pub fn column_is_used(&self, index: usize) -> bool { + self.col_used_mask.get(index) + } } /// A definition of a rowid/index search. From af21f60887bc3fce20930c7c152def5c0b619450 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Wed, 16 Apr 2025 14:22:54 +0300 Subject: [PATCH 313/425] translate/main_loop: create autoindex when index.ephemeral=true --- core/translate/main_loop.rs | 135 ++++++++++++++++++++++++++++++------ core/translate/plan.rs | 5 +- 2 files changed, 119 insertions(+), 21 deletions(-) diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 7354eb4a1..c56680446 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -1,14 +1,16 @@ use limbo_ext::VTabKind; use limbo_sqlite3_parser::ast; +use std::sync::Arc; + use crate::{ - schema::Table, + schema::{Index, Table}, translate::result_row::emit_select_result, types::SeekOp, vdbe::{ builder::ProgramBuilder, - insn::{CmpInsFlags, Insn}, - BranchOffset, + insn::{CmpInsFlags, IdxInsertFlags, Insn}, + BranchOffset, CursorID, }, Result, }; @@ -156,23 +158,26 @@ pub fn init_loop( index: Some(index), .. } = search { - match mode { - OperationMode::SELECT => { - program.emit_insn(Insn::OpenRead { - cursor_id: index_cursor_id - .expect("index cursor is always opened in Seek with index"), - root_page: index.root_page, - }); - } - OperationMode::UPDATE | OperationMode::DELETE => { - program.emit_insn(Insn::OpenWrite { - cursor_id: index_cursor_id - .expect("index cursor is always opened in Seek with index"), - root_page: index.root_page.into(), - }); - } - _ => { - unimplemented!() + // Ephemeral index cursor are opened ad-hoc when needed. + if !index.ephemeral { + match mode { + OperationMode::SELECT => { + program.emit_insn(Insn::OpenRead { + cursor_id: index_cursor_id + .expect("index cursor is always opened in Seek with index"), + root_page: index.root_page, + }); + } + OperationMode::UPDATE | OperationMode::DELETE => { + program.emit_insn(Insn::OpenWrite { + cursor_id: index_cursor_id + .expect("index cursor is always opened in Seek with index"), + root_page: index.root_page.into(), + }); + } + _ => { + unimplemented!() + } } } } @@ -437,6 +442,32 @@ pub fn open_loop( }); } else { // Otherwise, it's an index/rowid scan, i.e. first a seek is performed and then a scan until the comparison expression is not satisfied anymore. + if let Search::Seek { + index: Some(index), .. + } = search + { + if index.ephemeral { + let table_has_rowid = if let Table::BTree(btree) = &table.table { + btree.has_rowid + } else { + false + }; + Some(emit_autoindex( + program, + &index, + table_cursor_id + .expect("an ephemeral index must have a source table cursor"), + index_cursor_id + .expect("an ephemeral index must have an index cursor"), + table_has_rowid, + )?) + } else { + index_cursor_id + } + } else { + index_cursor_id + }; + let is_index = index_cursor_id.is_some(); let seek_cursor_id = index_cursor_id.unwrap_or_else(|| { table_cursor_id.expect("Either index or table cursor must be opened") @@ -1125,3 +1156,67 @@ fn emit_seek_termination( Ok(()) } + +/// Open an ephemeral index cursor and build an automatic index on a table. +/// This is used as a last-resort to avoid a nested full table scan +/// Returns the cursor id of the ephemeral index cursor. +fn emit_autoindex( + program: &mut ProgramBuilder, + index: &Arc, + table_cursor_id: CursorID, + index_cursor_id: CursorID, + table_has_rowid: bool, +) -> Result { + assert!(index.ephemeral, "Index {} is not ephemeral", index.name); + let label_ephemeral_build_end = program.allocate_label(); + // Since this typically happens in an inner loop, we only build it once. + program.emit_insn(Insn::Once { + target_pc_when_reentered: label_ephemeral_build_end, + }); + program.emit_insn(Insn::OpenAutoindex { + cursor_id: index_cursor_id, + }); + // Rewind source table + program.emit_insn(Insn::Rewind { + cursor_id: table_cursor_id, + pc_if_empty: label_ephemeral_build_end, + }); + let offset_ephemeral_build_loop_start = program.offset(); + // Emit all columns from source table that are needed in the ephemeral index. + // Also reserve a register for the rowid if the source table has rowids. + let num_regs_to_reserve = index.columns.len() + table_has_rowid as usize; + let ephemeral_cols_start_reg = program.alloc_registers(num_regs_to_reserve); + for (i, col) in index.columns.iter().enumerate() { + let reg = ephemeral_cols_start_reg + i; + program.emit_insn(Insn::Column { + cursor_id: table_cursor_id, + column: col.pos_in_table, + dest: reg, + }); + } + if table_has_rowid { + program.emit_insn(Insn::RowId { + cursor_id: table_cursor_id, + dest: ephemeral_cols_start_reg + index.columns.len(), + }); + } + let record_reg = program.alloc_register(); + program.emit_insn(Insn::MakeRecord { + start_reg: ephemeral_cols_start_reg, + count: num_regs_to_reserve, + dest_reg: record_reg, + }); + program.emit_insn(Insn::IdxInsert { + cursor_id: index_cursor_id, + record_reg, + unpacked_start: Some(ephemeral_cols_start_reg), + unpacked_count: Some(num_regs_to_reserve as u16), + flags: IdxInsertFlags::new().use_seek(false), + }); + program.emit_insn(Insn::Next { + cursor_id: table_cursor_id, + pc_if_next: offset_ephemeral_build_loop_start, + }); + program.resolve_label(label_ephemeral_build_end, program.offset()); + Ok(index_cursor_id) +} diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 46d4d38da..51bc3f7c6 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -509,7 +509,10 @@ impl TableReference { match &self.table { Table::BTree(btree) => { let use_covering_index = self.utilizes_covering_index(); - let table_cursor_id = if use_covering_index && mode == OperationMode::SELECT { + let index_is_ephemeral = index.map_or(false, |index| index.ephemeral); + let table_not_required = + OperationMode::SELECT == mode && use_covering_index && !index_is_ephemeral; + let table_cursor_id = if table_not_required { None } else { Some(program.alloc_cursor_id( From a50fa03d247af77c6f19d885f585939f126b8412 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sat, 19 Apr 2025 12:21:34 +0300 Subject: [PATCH 314/425] optimizer: allow calling try_extract_index... without any persistent indexes --- core/translate/optimizer.rs | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index fe764ee50..8e796c175 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -355,15 +355,18 @@ fn use_indexes( // but we just don't do that yet. continue; } + let placeholder = vec![]; + let mut usable_indexes_ref = &placeholder; if let Some(indexes) = available_indexes.get(table_name) { - if let Some(search) = try_extract_index_search_from_where_clause( - where_clause, - table_index, - table_reference, - indexes, - )? { - table_reference.op = Operation::Search(search); - } + usable_indexes_ref = indexes; + } + if let Some(search) = try_extract_index_search_from_where_clause( + where_clause, + table_index, + table_reference, + usable_indexes_ref, + )? { + table_reference.op = Operation::Search(search); } } } @@ -730,10 +733,6 @@ pub fn try_extract_index_search_from_where_clause( if where_clause.is_empty() { return Ok(None); } - // If there are no indexes, we can't extract a search - if table_indexes.is_empty() { - return Ok(None); - } let iter_dir = if let Operation::Scan { iter_dir, .. } = &table_reference.op { *iter_dir From 6924424f11bc5c5f02c3da816e49c96d5cd32e66 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sat, 19 Apr 2025 12:22:40 +0300 Subject: [PATCH 315/425] optimizer: add highly unintelligent heuristics-based cost estimation --- core/translate/optimizer.rs | 160 ++++++++++++++++++++++++++++-------- 1 file changed, 125 insertions(+), 35 deletions(-) diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 8e796c175..872758257 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -713,14 +713,80 @@ fn opposite_cmp_op(op: ast::Operator) -> ast::Operator { } /// Struct used for scoring index scans -/// Currently we just score by the number of index columns that can be utilized -/// in the scan, i.e. no statistics are used. +/// Currently we just estimate cost in a really dumb way, +/// i.e. no statistics are used. struct IndexScore { index: Option>, - score: usize, + cost: f64, constraints: Vec, } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +struct IndexInfo { + unique: bool, + column_count: usize, +} + +const ESTIMATED_HARDCODED_ROWS_PER_TABLE: f64 = 1000.0; + +/// Unbelievably dumb cost estimate for rows scanned by an index scan. +fn dumb_cost_estimator( + index_info: Option, + constraints: &[IndexConstraint], + is_inner_loop: bool, + is_ephemeral: bool, +) -> f64 { + // assume that the outer table always does a full table scan :) + // this discourages building ephemeral indexes on the outer table + // (since a scan reads TABLE_ROWS rows, so an ephemeral index on the outer table would both read TABLE_ROWS rows to build the index and then seek the index) + // but encourages building it on the inner table because it's only built once but the inner loop is run as many times as the outer loop has iterations. + let loop_multiplier = if is_inner_loop { + ESTIMATED_HARDCODED_ROWS_PER_TABLE + } else { + 1.0 + }; + + // If we are building an ephemeral index, we assume we will scan the entire source table to build it. + // Non-ephemeral indexes don't need to be built. + let cost_to_build_index = is_ephemeral as usize as f64 * ESTIMATED_HARDCODED_ROWS_PER_TABLE; + + let Some(index_info) = index_info else { + return cost_to_build_index + ESTIMATED_HARDCODED_ROWS_PER_TABLE * loop_multiplier; + }; + + let final_constraint_is_range = constraints + .last() + .map_or(false, |c| c.operator != ast::Operator::Equals); + let equalities_count = constraints + .iter() + .take(if final_constraint_is_range { + constraints.len() - 1 + } else { + constraints.len() + }) + .count() as f64; + + let selectivity = match ( + index_info.unique, + index_info.column_count as f64, + equalities_count, + ) { + // no equalities: let's assume range query selectivity is 0.4. if final constraint is not range and there are no equalities, it means full table scan incoming + (_, _, 0.0) => { + if final_constraint_is_range { + 0.4 + } else { + 1.0 + } + } + // on an unique index if we have equalities across all index columns, assume very high selectivity + (true, index_cols, eq_count) if eq_count == index_cols => 0.01 * eq_count, + // some equalities: let's assume each equality has a selectivity of 0.1 and range query selectivity is 0.4 + (_, _, eq_count) => (eq_count * 0.1) * if final_constraint_is_range { 0.4 } else { 1.0 }, + }; + cost_to_build_index + selectivity * ESTIMATED_HARDCODED_ROWS_PER_TABLE * loop_multiplier +} + /// Try to extract an index search from the WHERE clause /// Returns an optional [Search] struct if an index search can be extracted, otherwise returns None. pub fn try_extract_index_search_from_where_clause( @@ -747,10 +813,11 @@ pub fn try_extract_index_search_from_where_clause( // 3. constrain the index columns in the order that they appear in the index // - e.g. if the index is on (a,b,c) then we can use all of "a = 1 AND b = 2 AND c = 3" to constrain the index scan, // - but if the where clause is "a = 1 and c = 3" then we can only use "a = 1". + let cost_of_full_table_scan = dumb_cost_estimator(None, &[], table_index != 0, false); let mut constraints_cur = vec![]; let mut best_index = IndexScore { index: None, - score: 0, + cost: cost_of_full_table_scan, constraints: vec![], }; @@ -759,10 +826,18 @@ pub fn try_extract_index_search_from_where_clause( find_index_constraints(where_clause, table_index, index, &mut constraints_cur)?; // naive scoring since we don't have statistics: prefer the index where we can use the most columns // e.g. if we can use all columns of an index on (a,b), it's better than an index of (c,d,e) where we can only use c. - let score = constraints_cur.len(); - if score > best_index.score { + let cost = dumb_cost_estimator( + Some(IndexInfo { + unique: index.unique, + column_count: index.columns.len(), + }), + &constraints_cur, + table_index != 0, + false, + ); + if cost < best_index.cost { best_index.index = Some(Arc::clone(index)); - best_index.score = score; + best_index.cost = cost; best_index.constraints.clear(); best_index.constraints.append(&mut constraints_cur); } @@ -873,6 +948,45 @@ fn get_column_position_in_index( Ok(index.column_table_pos_to_index_pos(*column)) } +fn is_potential_index_constraint(term: &WhereTerm, table_index: usize) -> bool { + // Skip terms that cannot be evaluated at this table's loop level + if !term.should_eval_at_loop(table_index) { + return false; + } + // Skip terms that are not binary comparisons + let Ok(ast::Expr::Binary(lhs, operator, rhs)) = unwrap_parens(&term.expr) else { + return false; + }; + // Only consider index scans for binary ops that are comparisons + if !matches!( + *operator, + ast::Operator::Equals + | ast::Operator::Greater + | ast::Operator::GreaterEquals + | ast::Operator::Less + | ast::Operator::LessEquals + ) { + return false; + } + + // If both lhs and rhs refer to columns from this table, we can't use this constraint + // because we can't use the index to satisfy the condition. + // Examples: + // - WHERE t.x > t.y + // - WHERE t.x + 1 > t.y - 5 + // - WHERE t.x = (t.x) + let Ok(eval_at_left) = determine_where_to_eval_expr(&lhs) else { + return false; + }; + let Ok(eval_at_right) = determine_where_to_eval_expr(&rhs) else { + return false; + }; + if eval_at_left == EvalAt::Loop(table_index) && eval_at_right == EvalAt::Loop(table_index) { + return false; + } + true +} + /// Find all [IndexConstraint]s for a given WHERE clause /// Constraints are appended as long as they constrain the index in column order. /// E.g. for index (a,b,c) to be fully used, there must be a [WhereTerm] for each of a, b, and c. @@ -886,37 +1000,13 @@ fn find_index_constraints( for position_in_index in 0..index.columns.len() { let mut found = false; for (position_in_where_clause, term) in where_clause.iter().enumerate() { - // Skip terms that cannot be evaluated at this table's loop level - if !term.should_eval_at_loop(table_index) { - continue; - } - // Skip terms that are not binary comparisons - let ast::Expr::Binary(lhs, operator, rhs) = unwrap_parens(&term.expr)? else { - continue; - }; - // Only consider index scans for binary ops that are comparisons - if !matches!( - *operator, - ast::Operator::Equals - | ast::Operator::Greater - | ast::Operator::GreaterEquals - | ast::Operator::Less - | ast::Operator::LessEquals - ) { + if !is_potential_index_constraint(term, table_index) { continue; } - // If both lhs and rhs refer to columns from this table, we can't use this constraint - // because we can't use the index to satisfy the condition. - // Examples: - // - WHERE t.x > t.y - // - WHERE t.x + 1 > t.y - 5 - // - WHERE t.x = (t.x) - if determine_where_to_eval_expr(&lhs)? == EvalAt::Loop(table_index) - && determine_where_to_eval_expr(&rhs)? == EvalAt::Loop(table_index) - { - continue; - } + let ast::Expr::Binary(lhs, operator, rhs) = unwrap_parens(&term.expr)? else { + panic!("expected binary expression"); + }; // Check if lhs is a column that is in the i'th position of the index if Some(position_in_index) == get_column_position_in_index(lhs, table_index, index)? { From 3b44b269a31eb00be79a9fe74fd9daf2a5bf58ae Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sat, 19 Apr 2025 12:22:57 +0300 Subject: [PATCH 316/425] optimizer: try to build ephemeral index to avoid nested table scan --- core/translate/optimizer.rs | 157 +++++++++++++++++++++++++++++++++++- 1 file changed, 155 insertions(+), 2 deletions(-) diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 872758257..41e34418e 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -1,9 +1,9 @@ -use std::{collections::HashMap, sync::Arc}; +use std::{cmp::Ordering, collections::HashMap, sync::Arc}; use limbo_sqlite3_parser::ast::{self, Expr, SortOrder}; use crate::{ - schema::{Index, Schema}, + schema::{Index, IndexColumn, Schema}, translate::plan::TerminationKey, types::SeekOp, util::exprs_are_equivalent, @@ -843,6 +843,25 @@ pub fn try_extract_index_search_from_where_clause( } } + // We haven't found a persistent btree index that is any better than a full table scan; + // let's see if building an ephemeral index would be better. + if best_index.index.is_none() { + let (ephemeral_cost, constraints_with_col_idx, mut constraints_without_col_idx) = + ephemeral_index_estimate_cost(where_clause, table_reference, table_index); + if ephemeral_cost < best_index.cost { + // ephemeral index makes sense, so let's build it now. + // ephemeral columns are: columns from the table_reference, constraints first, then the rest + let ephemeral_index = + ephemeral_index_build(table_reference, table_index, &constraints_with_col_idx); + best_index.index = Some(Arc::new(ephemeral_index)); + best_index.cost = ephemeral_cost; + best_index.constraints.clear(); + best_index + .constraints + .append(&mut constraints_without_col_idx); + } + } + if best_index.index.is_none() { return Ok(None); } @@ -869,6 +888,140 @@ pub fn try_extract_index_search_from_where_clause( })); } +fn ephemeral_index_estimate_cost( + where_clause: &mut Vec, + table_reference: &TableReference, + table_index: usize, +) -> (f64, Vec<(usize, IndexConstraint)>, Vec) { + let mut constraints_with_col_idx: Vec<(usize, IndexConstraint)> = where_clause + .iter() + .enumerate() + .filter(|(_, term)| is_potential_index_constraint(term, table_index)) + .filter_map(|(i, term)| { + let Ok(ast::Expr::Binary(lhs, operator, rhs)) = unwrap_parens(&term.expr) else { + panic!("expected binary expression"); + }; + if let ast::Expr::Column { table, column, .. } = lhs.as_ref() { + if *table == table_index { + return Some(( + *column, + IndexConstraint { + position_in_where_clause: (i, BinaryExprSide::Rhs), + operator: *operator, + index_column_sort_order: SortOrder::Asc, + }, + )); + } + } + if let ast::Expr::Column { table, column, .. } = rhs.as_ref() { + if *table == table_index { + return Some(( + *column, + IndexConstraint { + position_in_where_clause: (i, BinaryExprSide::Lhs), + operator: opposite_cmp_op(*operator), + index_column_sort_order: SortOrder::Asc, + }, + )); + } + } + None + }) + .collect(); + // sort equalities first + constraints_with_col_idx.sort_by(|a, _| { + if a.1.operator == ast::Operator::Equals { + Ordering::Less + } else { + Ordering::Equal + } + }); + // drop everything after the first inequality + constraints_with_col_idx.truncate( + constraints_with_col_idx + .iter() + .position(|c| c.1.operator != ast::Operator::Equals) + .unwrap_or(constraints_with_col_idx.len()), + ); + + let ephemeral_column_count = table_reference + .columns() + .iter() + .enumerate() + .filter(|(i, _)| table_reference.column_is_used(*i)) + .count(); + + let constraints_without_col_idx = constraints_with_col_idx + .iter() + .cloned() + .map(|(_, c)| c) + .collect::>(); + let ephemeral_cost = dumb_cost_estimator( + Some(IndexInfo { + unique: false, + column_count: ephemeral_column_count, + }), + &constraints_without_col_idx, + table_index != 0, + true, + ); + ( + ephemeral_cost, + constraints_with_col_idx, + constraints_without_col_idx, + ) +} + +fn ephemeral_index_build( + table_reference: &TableReference, + table_index: usize, + index_constraints: &[(usize, IndexConstraint)], +) -> Index { + let mut ephemeral_columns: Vec = table_reference + .columns() + .iter() + .enumerate() + .map(|(i, c)| IndexColumn { + name: c.name.clone().unwrap(), + order: SortOrder::Asc, + pos_in_table: i, + }) + // only include columns that are used in the query + .filter(|c| table_reference.column_is_used(c.pos_in_table)) + .collect(); + // sort so that constraints first, then rest in whatever order they were in in the table + ephemeral_columns.sort_by(|a, b| { + let a_constraint = index_constraints + .iter() + .enumerate() + .find(|(_, c)| c.0 == a.pos_in_table); + let b_constraint = index_constraints + .iter() + .enumerate() + .find(|(_, c)| c.0 == b.pos_in_table); + match (a_constraint, b_constraint) { + (Some(_), None) => Ordering::Less, + (None, Some(_)) => Ordering::Greater, + (Some((a_idx, _)), Some((b_idx, _))) => a_idx.cmp(&b_idx), + (None, None) => Ordering::Equal, + } + }); + let ephemeral_index = Index { + name: format!( + "ephemeral_{}_{}", + table_reference.table.get_name(), + table_index + ), + columns: ephemeral_columns, + unique: false, + ephemeral: true, + table_name: table_reference.table.get_name().to_string(), + root_page: 0, + }; + + ephemeral_index +} + #[derive(Debug, Clone)] /// A representation of an expression in a [WhereTerm] that can potentially be used as part of an index seek key. /// For example, if there is an index on table T(x,y) and another index on table U(z), and the where clause is "WHERE x > 10 AND 20 = z", From f256fb46fd5eeda8459f91d40ec3cde10e757458 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sat, 19 Apr 2025 12:46:11 +0300 Subject: [PATCH 317/425] remove print spam from index insert --- core/vdbe/execute.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index de871f54c..0869491d6 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -3766,7 +3766,6 @@ pub fn op_idx_insert( pager: &Rc, mv_store: Option<&Rc>, ) -> Result { - dbg!("op_idx_insert_"); if let Insn::IdxInsert { cursor_id, record_reg, @@ -3807,7 +3806,6 @@ pub fn op_idx_insert( } }; - dbg!(moved_before); // Start insertion of row. This might trigger a balance procedure which will take care of moving to different pages, // therefore, we don't want to seek again if that happens, meaning we don't want to return on io without moving to the following opcode // because it could trigger a movement to child page after a balance root which will leave the current page as the root page. From 7f170756ae347d9d61b40ef41333cea7e653b4eb Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Mon, 21 Apr 2025 12:22:20 -0400 Subject: [PATCH 318/425] Add python script to benchmark vfs against eachother --- testing/cli_tests/vfs_bench.py | 116 +++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 testing/cli_tests/vfs_bench.py diff --git a/testing/cli_tests/vfs_bench.py b/testing/cli_tests/vfs_bench.py new file mode 100644 index 000000000..ae5a969d0 --- /dev/null +++ b/testing/cli_tests/vfs_bench.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python3 + +# vfs benchmarking/comparison +import os +from pathlib import Path +import subprocess +import statistics +import argparse +from time import perf_counter, sleep +from typing import Dict + +from cli_tests.test_limbo_cli import TestLimboShell +from cli_tests.console import info, error, test + +LIMBO_BIN = Path("./target/release/limbo") +DB_FILE = Path("testing/temp.db") +vfs_list = ["syscall", "io_uring"] + + +def append_time(times, start, perf_counter): + times.append(perf_counter() - start) + return True + + +def bench_one(vfs: str, sql: str, iterations: int) -> list[float]: + """ + Launch a single Limbo process with the requested VFS, run `sql` + `iterations` times, return a list of elapsed wall‑clock times. + """ + shell = TestLimboShell( + exec_name=str(LIMBO_BIN), + flags=f"-q -m list --vfs {vfs} {DB_FILE}", + init_commands="", + ) + + times: list[float] = [] + + for i in range(1, iterations + 1): + start = perf_counter() + _ = shell.run_test_fn( + sql, lambda x: x is not None and append_time(times, start, perf_counter) + ) + test(f" {vfs} | run {i:>3}: {times[-1]:.6f}s") + + shell.quit() + return times + + +def setup_temp_db() -> None: + cmd = ["sqlite3", "testing/testing.db", ".clone testing/temp.db"] + proc = subprocess.run(cmd, check=True) + proc.check_returncode() + sleep(0.3) # make sure it's finished + + +def cleanup_temp_db() -> None: + if DB_FILE.exists(): + DB_FILE.unlink() + os.remove("testing/temp.db-wal") + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Benchmark a SQL statement against all Limbo VFS back‑ends." + ) + parser.add_argument("sql", help="SQL statement to execute (quote it)") + parser.add_argument("iterations", type=int, help="number of repetitions") + args = parser.parse_args() + setup_temp_db() + + sql, iterations = args.sql, args.iterations + if iterations <= 0: + error("iterations must be a positive integer") + parser.error("Invalid Arguments") + + info(f"SQL : {sql}") + info(f"Iterations : {iterations}") + info(f"Database : {DB_FILE.resolve()}") + info("-" * 60) + averages: Dict[str, float] = {} + + for vfs in vfs_list: + test(f"\n### VFS: {vfs} ###") + times = bench_one(vfs, sql, iterations) + info(f"All times ({vfs}):", " ".join(f"{t:.6f}" for t in times)) + avg = statistics.mean(times) + averages[vfs] = avg + + info("\n" + "-" * 60) + info("Average runtime per VFS") + info("-" * 60) + + for vfs in vfs_list: + info(f"vfs: {vfs} : {averages[vfs]:.6f} s") + info("-" * 60) + + baseline = "syscall" + baseline_avg = averages[baseline] + + name_pad = max(len(v) for v in vfs_list) + for vfs in vfs_list: + avg = averages[vfs] + if vfs == baseline: + info(f"{vfs:<{name_pad}} : {avg:.6f} (baseline)") + else: + pct = (avg - baseline_avg) / baseline_avg * 100.0 + faster_slower = "slower" if pct > 0 else "faster" + info( + f"{vfs:<{name_pad}} : {avg:.6f} ({abs(pct):.1f}% {faster_slower} than {baseline})" + ) + info("-" * 60) + cleanup_temp_db() + + +if __name__ == "__main__": + main() From 2037fbeba540a4330a438c33c31c2b60c233ebc5 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Mon, 21 Apr 2025 12:22:40 -0400 Subject: [PATCH 319/425] Add bench-vfs command to makefile --- Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 623fbb6ce..9c5364aef 100644 --- a/Makefile +++ b/Makefile @@ -70,7 +70,6 @@ test: limbo uv-sync test-compat test-vector test-sqlite3 test-shell test-extensi .PHONY: test test-extensions: limbo uv-sync - cargo build --package limbo_regexp uv run --project limbo_test test-extensions .PHONY: test-extensions @@ -110,6 +109,9 @@ test-update: limbo uv-sync SQLITE_EXEC=$(SQLITE_EXEC) uv run --project limbo_test test-update .PHONY: test-update +bench-vfs: uv-sync + uv run --project limbo_test bench-vfs "$(SQL)" "$(N)" + clickbench: ./perf/clickbench/benchmark.sh .PHONY: clickbench From 9bbd6a3a7f3380b08e8a8b8000dcfb19f80955aa Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Mon, 21 Apr 2025 12:23:06 -0400 Subject: [PATCH 320/425] Add vfs bench to testing pyproject.toml --- testing/pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/testing/pyproject.toml b/testing/pyproject.toml index 58292dd91..cdd30ec54 100644 --- a/testing/pyproject.toml +++ b/testing/pyproject.toml @@ -15,6 +15,7 @@ test-shell = "cli_tests.cli_test_cases:main" test-extensions = "cli_tests.extensions:main" test-update = "cli_tests.update:main" test-memory = "cli_tests.memory:main" +bench-vfs = "cli_tests.vfs_bench:main" [tool.uv] package = true From f180de4d950aa072f9043a3a30feedbb7434106d Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Mon, 21 Apr 2025 12:24:18 -0400 Subject: [PATCH 321/425] Write quick note about vfs benchmark script in PERF.md --- PERF.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/PERF.md b/PERF.md index 40edcf7ea..fb25045fb 100644 --- a/PERF.md +++ b/PERF.md @@ -32,3 +32,13 @@ make clickbench This will build Limbo in release mode, create a database, and run the benchmarks with a small subset of the Clickbench dataset. It will run the queries for both Limbo and SQLite, and print the results. + + +## Comparing VFS's/IO Back-ends (io_uring | syscall) + +```shell +make bench-vfs SQL="select * from users;" N=500 +``` + +The naive script will build and run limbo in release mode and execute the given SQL (against a copy of the `testing/testing.db` file) +`N` times with each `vfs`. This is not meant to be a definitive or thorough performance benchmark but serves to compare the two. From 2e33ce6896cb6e8796083ca9611b0ae995971467 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Mon, 21 Apr 2025 12:31:38 -0400 Subject: [PATCH 322/425] Add release build to bench vfs in makefile to ensure there is an exec target --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index 9c5364aef..06afa0e5d 100644 --- a/Makefile +++ b/Makefile @@ -110,6 +110,7 @@ test-update: limbo uv-sync .PHONY: test-update bench-vfs: uv-sync + cargo build --release uv run --project limbo_test bench-vfs "$(SQL)" "$(N)" clickbench: From 1928dcfa1073619c3844f9bb3207cf7300c8148c Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Mon, 21 Apr 2025 23:05:01 -0300 Subject: [PATCH 323/425] Correct docs regarding between --- COMPAT.md | 2 +- core/translate/expr.rs | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/COMPAT.md b/COMPAT.md index ced9fbb6d..799411193 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -200,7 +200,7 @@ Feature support of [sqlite expr syntax](https://www.sqlite.org/lang_expr.html). | (NOT) MATCH | No | | | IS (NOT) | Yes | | | IS (NOT) DISTINCT FROM | Yes | | -| (NOT) BETWEEN ... AND ... | No | | +| (NOT) BETWEEN ... AND ... | Yes | Expression is rewritten in the optimizer | | (NOT) IN (subquery) | No | | | (NOT) EXISTS (subquery) | No | | | CASE WHEN THEN ELSE END | Yes | | diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 6c9072ab9..53deb7e0f 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -186,7 +186,9 @@ pub fn translate_condition_expr( resolver: &Resolver, ) -> Result<()> { match expr { - ast::Expr::Between { .. } => todo!(), + ast::Expr::Between { .. } => { + unreachable!("expression should have been rewritten in optmizer") + } ast::Expr::Binary(lhs, ast::Operator::And, rhs) => { // In a binary AND, never jump to the parent 'jump_target_when_true' label on the first condition, because // the second condition MUST also be true. Instead we instruct the child expression to jump to a local @@ -492,7 +494,9 @@ pub fn translate_expr( return Ok(target_register); } match expr { - ast::Expr::Between { .. } => todo!(), + ast::Expr::Between { .. } => { + unreachable!("expression should have been rewritten in optmizer") + } ast::Expr::Binary(e1, op, e2) => { // Check if both sides of the expression are equivalent and reuse the same register if so if exprs_are_equivalent(e1, e2) { From 094fd0e21143d9cb50a49fe844f021407394e462 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 22 Apr 2025 09:46:16 +0300 Subject: [PATCH 324/425] Add TPC-H instructions to PERF.md --- PERF.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/PERF.md b/PERF.md index 40edcf7ea..54a55f2ac 100644 --- a/PERF.md +++ b/PERF.md @@ -32,3 +32,28 @@ make clickbench This will build Limbo in release mode, create a database, and run the benchmarks with a small subset of the Clickbench dataset. It will run the queries for both Limbo and SQLite, and print the results. + +## TPC-H + +1. Clone the Taratool TPC-H benchmarking tool: + +```shell +git clone git@github.com:tarantool/tpch.git +``` + +2. Patch the benchmark runner script: + +```patch +diff --git a/bench_queries.sh b/bench_queries.sh +index 6b894f9..c808e9a 100755 +--- a/bench_queries.sh ++++ b/bench_queries.sh +@@ -4,7 +4,7 @@ function check_q { + local query=queries/$*.sql + ( + echo $query +- time ( sqlite3 TPC-H.db < $query > /dev/null ) ++ time ( ../../limbo/target/release/limbo -m list TPC-H.db < $query > /dev/null ) + ) + } +``` From 68d8b86bb7d2365237c04c4310a3157af73d43fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timo=20K=C3=B6sters?= Date: Mon, 21 Apr 2025 13:15:04 +0200 Subject: [PATCH 325/425] fix: get name of rowid column --- bindings/java/rs_src/limbo_statement.rs | 2 +- core/lib.rs | 2 +- core/translate/plan.rs | 19 ++++++++++++++++--- tests/integration/common.rs | 10 +++++----- 4 files changed, 23 insertions(+), 10 deletions(-) diff --git a/bindings/java/rs_src/limbo_statement.rs b/bindings/java/rs_src/limbo_statement.rs index b28ff55b1..c49469cd6 100644 --- a/bindings/java/rs_src/limbo_statement.rs +++ b/bindings/java/rs_src/limbo_statement.rs @@ -138,7 +138,7 @@ pub extern "system" fn Java_tech_turso_core_LimboStatement_columns<'local>( for i in 0..num_columns { let column_name = stmt.stmt.get_column_name(i); - let str = env.new_string(column_name.as_str()).unwrap(); + let str = env.new_string(column_name.into_owned()).unwrap(); env.set_object_array_element(&obj_arr, i as i32, str) .unwrap(); } diff --git a/core/lib.rs b/core/lib.rs index e130306f7..67d168640 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -591,7 +591,7 @@ impl Statement { self.program.result_columns.len() } - pub fn get_column_name(&self, idx: usize) -> Cow { + pub fn get_column_name(&self, idx: usize) -> Cow { let column = &self.program.result_columns[idx]; match column.name(&self.program.table_references) { Some(name) => Cow::Borrowed(name), diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 07a8de392..48ce4c854 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -34,13 +34,26 @@ pub struct ResultSetColumn { } impl ResultSetColumn { - pub fn name<'a>(&'a self, tables: &'a [TableReference]) -> Option<&'a String> { + pub fn name<'a>(&'a self, tables: &'a [TableReference]) -> Option<&'a str> { if let Some(alias) = &self.alias { return Some(alias); } match &self.expr { ast::Expr::Column { table, column, .. } => { - tables[*table].columns()[*column].name.as_ref() + tables[*table].columns()[*column].name.as_deref() + } + ast::Expr::RowId { table, .. } => { + // If there is a rowid alias column, use its name + if let Table::BTree(table) = &tables[*table].table { + if let Some(rowid_alias_column) = table.get_rowid_alias_column() { + if let Some(name) = &rowid_alias_column.1.name { + return Some(name); + } + } + } + + // If there is no rowid alias, use "rowid". + Some("rowid") } _ => None, } @@ -465,7 +478,7 @@ impl TableReference { plan.result_columns .iter() .map(|rc| Column { - name: rc.name(&plan.table_references).map(String::clone), + name: rc.name(&plan.table_references).map(String::from), ty: Type::Text, // FIXME: infer proper type ty_str: "TEXT".to_string(), is_rowid_alias: false, diff --git a/tests/integration/common.rs b/tests/integration/common.rs index a034b36ae..2c668a12f 100644 --- a/tests/integration/common.rs +++ b/tests/integration/common.rs @@ -120,16 +120,16 @@ mod tests { let columns = stmt.num_columns(); assert_eq!(columns, 3); - assert_eq!(stmt.get_column_name(0), "foo".into()); - assert_eq!(stmt.get_column_name(1), "bar".into()); - assert_eq!(stmt.get_column_name(2), "baz".into()); + assert_eq!(stmt.get_column_name(0), "foo"); + assert_eq!(stmt.get_column_name(1), "bar"); + assert_eq!(stmt.get_column_name(2), "baz"); let stmt = conn.prepare("select foo, bar from test;")?; let columns = stmt.num_columns(); assert_eq!(columns, 2); - assert_eq!(stmt.get_column_name(0), "foo".into()); - assert_eq!(stmt.get_column_name(1), "bar".into()); + assert_eq!(stmt.get_column_name(0), "foo"); + assert_eq!(stmt.get_column_name(1), "bar"); let stmt = conn.prepare("delete from test;")?; let columns = stmt.num_columns(); From c2cf4756ef8601de234170fc44f620ca0861640e Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 22 Apr 2025 12:10:02 +0300 Subject: [PATCH 326/425] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index cc72d1133..1bf0cd1e7 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@

+ PyPI PyPI
From 936365a44e15a7658a13761d1b42c0ae798752a3 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 22 Apr 2025 12:11:23 +0300 Subject: [PATCH 327/425] Update README.md --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1bf0cd1e7..255843d80 100644 --- a/README.md +++ b/README.md @@ -10,8 +10,9 @@

- PyPI - PyPI + PyPI + PyPI + PyPI
From 2495d15b96495562213ae14e8d09cfe0b7b77443 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Wed, 16 Apr 2025 13:11:49 +0300 Subject: [PATCH 328/425] Index insert fuzz --- core/storage/btree.rs | 86 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 84 insertions(+), 2 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index a8167fd17..0b8226017 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -5159,8 +5159,10 @@ mod tests { fast_lock::SpinLock, io::{Buffer, Completion, MemoryIO, OpenFlags, IO}, storage::{ - database::DatabaseFile, page_cache::DumbLruPageCache, sqlite3_ondisk, - sqlite3_ondisk::DatabaseHeader, + database::DatabaseFile, + page_cache::DumbLruPageCache, + pager::CreateBTreeFlags, + sqlite3_ondisk::{self, DatabaseHeader}, }, types::Text, vdbe::Register, @@ -5678,6 +5680,81 @@ mod tests { } } } + + fn btree_index_insert_fuzz_run(attempts: usize, inserts: usize) { + let (mut rng, seed) = if std::env::var("SEED").is_ok() { + let seed = std::env::var("SEED").unwrap(); + let seed = seed.parse::().unwrap(); + let rng = ChaCha8Rng::seed_from_u64(seed); + (rng, seed) + } else { + rng_from_time() + }; + let mut seen = HashSet::new(); + tracing::info!("super seed: {}", seed); + for _ in 0..attempts { + let (pager, _) = empty_btree(); + let index_root_page = pager.btree_create(&CreateBTreeFlags::new_index()); + let index_root_page = index_root_page as usize; + let mut cursor = BTreeCursor::new(None, pager.clone(), index_root_page); + let mut keys = Vec::new(); + tracing::info!("seed: {}", seed); + for _ in 0..inserts { + let key = { + let result; + loop { + let cols = (0..10) + .map(|_| (rng.next_u64() % (1 << 30)) as i64) + .collect::>(); + if seen.contains(&cols) { + continue; + } else { + seen.insert(cols.clone()); + } + result = cols; + break; + } + result + }; + keys.push(key.clone()); + let value = ImmutableRecord::from_registers( + &key.iter() + .map(|col| Register::OwnedValue(OwnedValue::Integer(*col))) + .collect::>(), + ); + run_until_done( + || { + cursor.insert( + &BTreeKey::new_index_key(&value), + cursor.is_write_in_progress(), + ) + }, + pager.deref(), + ) + .unwrap(); + keys.sort(); + cursor.move_to_root(); + } + keys.sort(); + cursor.move_to_root(); + for key in keys.iter() { + tracing::trace!("seeking key: {:?}", key); + run_until_done(|| cursor.next(), pager.deref()).unwrap(); + let record = cursor.record(); + let record = record.as_ref().unwrap(); + let cursor_key = record.get_values(); + assert_eq!( + cursor_key, + &key.iter() + .map(|col| RefValue::Integer(*col)) + .collect::>(), + "key {:?} is not found", + key + ); + } + } + } + #[test] pub fn test_drop_odd() { let db = get_database(); @@ -5731,6 +5808,11 @@ mod tests { } } + #[test] + pub fn btree_index_insert_fuzz_run_equal_size() { + btree_index_insert_fuzz_run(2, 1024 * 32); + } + #[test] pub fn btree_insert_fuzz_run_random() { btree_insert_fuzz_run(128, 16, |rng| (rng.next_u32() % 4096) as usize); From fc5099e2efd7fd4537d12394078e9364a6b6f5db Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 22 Apr 2025 12:04:47 +0300 Subject: [PATCH 329/425] antithesis: Enable RUST_BACKTRACE for workload --- Dockerfile.antithesis | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile.antithesis b/Dockerfile.antithesis index 1f4f3ba10..6305c12f0 100644 --- a/Dockerfile.antithesis +++ b/Dockerfile.antithesis @@ -71,4 +71,5 @@ COPY --from=builder /app/target/release/limbo_stress /bin/limbo_stress COPY stress/docker-entrypoint.sh /bin RUN chmod +x /bin/docker-entrypoint.sh ENTRYPOINT ["/bin/docker-entrypoint.sh"] +ENV RUST_BACKTRACE=1 CMD ["/bin/limbo_stress"] From 3bbd4432867cd65a6b8922507c7fcbb6a617a439 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Wed, 23 Apr 2025 10:32:38 +0300 Subject: [PATCH 330/425] python: add UV project for 'scripts' mainly so i don't have to install pygithub every time i want to `uv run scripts/merge-pr.py` --- pyproject.toml | 2 +- scripts/pyproject.toml | 9 ++ uv.lock | 237 ++++++++++++++++++++++++++++++++++++++++- 3 files changed, 246 insertions(+), 2 deletions(-) create mode 100644 scripts/pyproject.toml diff --git a/pyproject.toml b/pyproject.toml index b8d5018cc..b652f9f25 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,4 +14,4 @@ package = false limbo_test = { workspace = true } [tool.uv.workspace] -members = ["testing"] +members = ["testing", "scripts"] diff --git a/scripts/pyproject.toml b/scripts/pyproject.toml new file mode 100644 index 000000000..a9d988d2e --- /dev/null +++ b/scripts/pyproject.toml @@ -0,0 +1,9 @@ +[project] +name = "scripts" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.13" +dependencies = [ + "pygithub>=2.6.1", +] diff --git a/uv.lock b/uv.lock index eaf3e5bff..7c6a5bc43 100644 --- a/uv.lock +++ b/uv.lock @@ -1,11 +1,11 @@ version = 1 -revision = 1 requires-python = ">=3.13" [manifest] members = [ "limbo", "limbo-test", + "scripts", ] [[package]] @@ -17,6 +17,106 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643 }, ] +[[package]] +name = "certifi" +version = "2025.1.31" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1c/ab/c9f1e32b7b1bf505bf26f0ef697775960db7932abeb7b516de930ba2705f/certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651", size = 167577 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/38/fc/bce832fd4fd99766c04d1ee0eead6b0ec6486fb100ae5e74c1d91292b982/certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe", size = 166393 }, +] + +[[package]] +name = "cffi" +version = "1.17.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pycparser" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fc/97/c783634659c2920c3fc70419e3af40972dbaf758daa229a7d6ea6135c90d/cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824", size = 516621 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8d/f8/dd6c246b148639254dad4d6803eb6a54e8c85c6e11ec9df2cffa87571dbe/cffi-1.17.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f3a2b4222ce6b60e2e8b337bb9596923045681d71e5a082783484d845390938e", size = 182989 }, + { url = "https://files.pythonhosted.org/packages/8b/f1/672d303ddf17c24fc83afd712316fda78dc6fce1cd53011b839483e1ecc8/cffi-1.17.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2", size = 178802 }, + { url = "https://files.pythonhosted.org/packages/0e/2d/eab2e858a91fdff70533cab61dcff4a1f55ec60425832ddfdc9cd36bc8af/cffi-1.17.1-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d01b12eeeb4427d3110de311e1774046ad344f5b1a7403101878976ecd7a10f3", size = 454792 }, + { url = "https://files.pythonhosted.org/packages/75/b2/fbaec7c4455c604e29388d55599b99ebcc250a60050610fadde58932b7ee/cffi-1.17.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:706510fe141c86a69c8ddc029c7910003a17353970cff3b904ff0686a5927683", size = 478893 }, + { url = "https://files.pythonhosted.org/packages/4f/b7/6e4a2162178bf1935c336d4da8a9352cccab4d3a5d7914065490f08c0690/cffi-1.17.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de55b766c7aa2e2a3092c51e0483d700341182f08e67c63630d5b6f200bb28e5", size = 485810 }, + { url = "https://files.pythonhosted.org/packages/c7/8a/1d0e4a9c26e54746dc08c2c6c037889124d4f59dffd853a659fa545f1b40/cffi-1.17.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c59d6e989d07460165cc5ad3c61f9fd8f1b4796eacbd81cee78957842b834af4", size = 471200 }, + { url = "https://files.pythonhosted.org/packages/26/9f/1aab65a6c0db35f43c4d1b4f580e8df53914310afc10ae0397d29d697af4/cffi-1.17.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd398dbc6773384a17fe0d3e7eeb8d1a21c2200473ee6806bb5e6a8e62bb73dd", size = 479447 }, + { url = "https://files.pythonhosted.org/packages/5f/e4/fb8b3dd8dc0e98edf1135ff067ae070bb32ef9d509d6cb0f538cd6f7483f/cffi-1.17.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:3edc8d958eb099c634dace3c7e16560ae474aa3803a5df240542b305d14e14ed", size = 484358 }, + { url = "https://files.pythonhosted.org/packages/f1/47/d7145bf2dc04684935d57d67dff9d6d795b2ba2796806bb109864be3a151/cffi-1.17.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9", size = 488469 }, + { url = "https://files.pythonhosted.org/packages/bf/ee/f94057fa6426481d663b88637a9a10e859e492c73d0384514a17d78ee205/cffi-1.17.1-cp313-cp313-win32.whl", hash = "sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d", size = 172475 }, + { url = "https://files.pythonhosted.org/packages/7c/fc/6a8cb64e5f0324877d503c854da15d76c1e50eb722e320b15345c4d0c6de/cffi-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a", size = 182009 }, +] + +[[package]] +name = "charset-normalizer" +version = "3.4.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/16/b0/572805e227f01586461c80e0fd25d65a2115599cc9dad142fee4b747c357/charset_normalizer-3.4.1.tar.gz", hash = "sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3", size = 123188 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/38/94/ce8e6f63d18049672c76d07d119304e1e2d7c6098f0841b51c666e9f44a0/charset_normalizer-3.4.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:aabfa34badd18f1da5ec1bc2715cadc8dca465868a4e73a0173466b688f29dda", size = 195698 }, + { url = "https://files.pythonhosted.org/packages/24/2e/dfdd9770664aae179a96561cc6952ff08f9a8cd09a908f259a9dfa063568/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22e14b5d70560b8dd51ec22863f370d1e595ac3d024cb8ad7d308b4cd95f8313", size = 140162 }, + { url = "https://files.pythonhosted.org/packages/24/4e/f646b9093cff8fc86f2d60af2de4dc17c759de9d554f130b140ea4738ca6/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8436c508b408b82d87dc5f62496973a1805cd46727c34440b0d29d8a2f50a6c9", size = 150263 }, + { url = "https://files.pythonhosted.org/packages/5e/67/2937f8d548c3ef6e2f9aab0f6e21001056f692d43282b165e7c56023e6dd/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d074908e1aecee37a7635990b2c6d504cd4766c7bc9fc86d63f9c09af3fa11b", size = 142966 }, + { url = "https://files.pythonhosted.org/packages/52/ed/b7f4f07de100bdb95c1756d3a4d17b90c1a3c53715c1a476f8738058e0fa/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:955f8851919303c92343d2f66165294848d57e9bba6cf6e3625485a70a038d11", size = 144992 }, + { url = "https://files.pythonhosted.org/packages/96/2c/d49710a6dbcd3776265f4c923bb73ebe83933dfbaa841c5da850fe0fd20b/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:44ecbf16649486d4aebafeaa7ec4c9fed8b88101f4dd612dcaf65d5e815f837f", size = 147162 }, + { url = "https://files.pythonhosted.org/packages/b4/41/35ff1f9a6bd380303dea55e44c4933b4cc3c4850988927d4082ada230273/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0924e81d3d5e70f8126529951dac65c1010cdf117bb75eb02dd12339b57749dd", size = 140972 }, + { url = "https://files.pythonhosted.org/packages/fb/43/c6a0b685fe6910d08ba971f62cd9c3e862a85770395ba5d9cad4fede33ab/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2967f74ad52c3b98de4c3b32e1a44e32975e008a9cd2a8cc8966d6a5218c5cb2", size = 149095 }, + { url = "https://files.pythonhosted.org/packages/4c/ff/a9a504662452e2d2878512115638966e75633519ec11f25fca3d2049a94a/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c75cb2a3e389853835e84a2d8fb2b81a10645b503eca9bcb98df6b5a43eb8886", size = 152668 }, + { url = "https://files.pythonhosted.org/packages/6c/71/189996b6d9a4b932564701628af5cee6716733e9165af1d5e1b285c530ed/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:09b26ae6b1abf0d27570633b2b078a2a20419c99d66fb2823173d73f188ce601", size = 150073 }, + { url = "https://files.pythonhosted.org/packages/e4/93/946a86ce20790e11312c87c75ba68d5f6ad2208cfb52b2d6a2c32840d922/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa88b843d6e211393a37219e6a1c1df99d35e8fd90446f1118f4216e307e48cd", size = 145732 }, + { url = "https://files.pythonhosted.org/packages/cd/e5/131d2fb1b0dddafc37be4f3a2fa79aa4c037368be9423061dccadfd90091/charset_normalizer-3.4.1-cp313-cp313-win32.whl", hash = "sha256:eb8178fe3dba6450a3e024e95ac49ed3400e506fd4e9e5c32d30adda88cbd407", size = 95391 }, + { url = "https://files.pythonhosted.org/packages/27/f2/4f9a69cc7712b9b5ad8fdb87039fd89abba997ad5cbe690d1835d40405b0/charset_normalizer-3.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:b1ac5992a838106edb89654e0aebfc24f5848ae2547d22c2c3f66454daa11971", size = 102702 }, + { url = "https://files.pythonhosted.org/packages/0e/f6/65ecc6878a89bb1c23a086ea335ad4bf21a588990c3f535a227b9eea9108/charset_normalizer-3.4.1-py3-none-any.whl", hash = "sha256:d98b1668f06378c6dbefec3b92299716b931cd4e6061f3c875a71ced1780ab85", size = 49767 }, +] + +[[package]] +name = "cryptography" +version = "44.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi", marker = "platform_python_implementation != 'PyPy'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/cd/25/4ce80c78963834b8a9fd1cc1266be5ed8d1840785c0f2e1b73b8d128d505/cryptography-44.0.2.tar.gz", hash = "sha256:c63454aa261a0cf0c5b4718349629793e9e634993538db841165b3df74f37ec0", size = 710807 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/92/ef/83e632cfa801b221570c5f58c0369db6fa6cef7d9ff859feab1aae1a8a0f/cryptography-44.0.2-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:efcfe97d1b3c79e486554efddeb8f6f53a4cdd4cf6086642784fa31fc384e1d7", size = 6676361 }, + { url = "https://files.pythonhosted.org/packages/30/ec/7ea7c1e4c8fc8329506b46c6c4a52e2f20318425d48e0fe597977c71dbce/cryptography-44.0.2-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29ecec49f3ba3f3849362854b7253a9f59799e3763b0c9d0826259a88efa02f1", size = 3952350 }, + { url = "https://files.pythonhosted.org/packages/27/61/72e3afdb3c5ac510330feba4fc1faa0fe62e070592d6ad00c40bb69165e5/cryptography-44.0.2-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc821e161ae88bfe8088d11bb39caf2916562e0a2dc7b6d56714a48b784ef0bb", size = 4166572 }, + { url = "https://files.pythonhosted.org/packages/26/e4/ba680f0b35ed4a07d87f9e98f3ebccb05091f3bf6b5a478b943253b3bbd5/cryptography-44.0.2-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:3c00b6b757b32ce0f62c574b78b939afab9eecaf597c4d624caca4f9e71e7843", size = 3958124 }, + { url = "https://files.pythonhosted.org/packages/9c/e8/44ae3e68c8b6d1cbc59040288056df2ad7f7f03bbcaca6b503c737ab8e73/cryptography-44.0.2-cp37-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:7bdcd82189759aba3816d1f729ce42ffded1ac304c151d0a8e89b9996ab863d5", size = 3678122 }, + { url = "https://files.pythonhosted.org/packages/27/7b/664ea5e0d1eab511a10e480baf1c5d3e681c7d91718f60e149cec09edf01/cryptography-44.0.2-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:4973da6ca3db4405c54cd0b26d328be54c7747e89e284fcff166132eb7bccc9c", size = 4191831 }, + { url = "https://files.pythonhosted.org/packages/2a/07/79554a9c40eb11345e1861f46f845fa71c9e25bf66d132e123d9feb8e7f9/cryptography-44.0.2-cp37-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:4e389622b6927d8133f314949a9812972711a111d577a5d1f4bee5e58736b80a", size = 3960583 }, + { url = "https://files.pythonhosted.org/packages/bb/6d/858e356a49a4f0b591bd6789d821427de18432212e137290b6d8a817e9bf/cryptography-44.0.2-cp37-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:f514ef4cd14bb6fb484b4a60203e912cfcb64f2ab139e88c2274511514bf7308", size = 4191753 }, + { url = "https://files.pythonhosted.org/packages/b2/80/62df41ba4916067fa6b125aa8c14d7e9181773f0d5d0bd4dcef580d8b7c6/cryptography-44.0.2-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:1bc312dfb7a6e5d66082c87c34c8a62176e684b6fe3d90fcfe1568de675e6688", size = 4079550 }, + { url = "https://files.pythonhosted.org/packages/f3/cd/2558cc08f7b1bb40683f99ff4327f8dcfc7de3affc669e9065e14824511b/cryptography-44.0.2-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:3b721b8b4d948b218c88cb8c45a01793483821e709afe5f622861fc6182b20a7", size = 4298367 }, + { url = "https://files.pythonhosted.org/packages/71/59/94ccc74788945bc3bd4cf355d19867e8057ff5fdbcac781b1ff95b700fb1/cryptography-44.0.2-cp37-abi3-win32.whl", hash = "sha256:51e4de3af4ec3899d6d178a8c005226491c27c4ba84101bfb59c901e10ca9f79", size = 2772843 }, + { url = "https://files.pythonhosted.org/packages/ca/2c/0d0bbaf61ba05acb32f0841853cfa33ebb7a9ab3d9ed8bb004bd39f2da6a/cryptography-44.0.2-cp37-abi3-win_amd64.whl", hash = "sha256:c505d61b6176aaf982c5717ce04e87da5abc9a36a5b39ac03905c4aafe8de7aa", size = 3209057 }, + { url = "https://files.pythonhosted.org/packages/9e/be/7a26142e6d0f7683d8a382dd963745e65db895a79a280a30525ec92be890/cryptography-44.0.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:8e0ddd63e6bf1161800592c71ac794d3fb8001f2caebe0966e77c5234fa9efc3", size = 6677789 }, + { url = "https://files.pythonhosted.org/packages/06/88/638865be7198a84a7713950b1db7343391c6066a20e614f8fa286eb178ed/cryptography-44.0.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:81276f0ea79a208d961c433a947029e1a15948966658cf6710bbabb60fcc2639", size = 3951919 }, + { url = "https://files.pythonhosted.org/packages/d7/fc/99fe639bcdf58561dfad1faa8a7369d1dc13f20acd78371bb97a01613585/cryptography-44.0.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a1e657c0f4ea2a23304ee3f964db058c9e9e635cc7019c4aa21c330755ef6fd", size = 4167812 }, + { url = "https://files.pythonhosted.org/packages/53/7b/aafe60210ec93d5d7f552592a28192e51d3c6b6be449e7fd0a91399b5d07/cryptography-44.0.2-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:6210c05941994290f3f7f175a4a57dbbb2afd9273657614c506d5976db061181", size = 3958571 }, + { url = "https://files.pythonhosted.org/packages/16/32/051f7ce79ad5a6ef5e26a92b37f172ee2d6e1cce09931646eef8de1e9827/cryptography-44.0.2-cp39-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:d1c3572526997b36f245a96a2b1713bf79ce99b271bbcf084beb6b9b075f29ea", size = 3679832 }, + { url = "https://files.pythonhosted.org/packages/78/2b/999b2a1e1ba2206f2d3bca267d68f350beb2b048a41ea827e08ce7260098/cryptography-44.0.2-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:b042d2a275c8cee83a4b7ae30c45a15e6a4baa65a179a0ec2d78ebb90e4f6699", size = 4193719 }, + { url = "https://files.pythonhosted.org/packages/72/97/430e56e39a1356e8e8f10f723211a0e256e11895ef1a135f30d7d40f2540/cryptography-44.0.2-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:d03806036b4f89e3b13b6218fefea8d5312e450935b1a2d55f0524e2ed7c59d9", size = 3960852 }, + { url = "https://files.pythonhosted.org/packages/89/33/c1cf182c152e1d262cac56850939530c05ca6c8d149aa0dcee490b417e99/cryptography-44.0.2-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:c7362add18b416b69d58c910caa217f980c5ef39b23a38a0880dfd87bdf8cd23", size = 4193906 }, + { url = "https://files.pythonhosted.org/packages/e1/99/87cf26d4f125380dc674233971069bc28d19b07f7755b29861570e513650/cryptography-44.0.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:8cadc6e3b5a1f144a039ea08a0bdb03a2a92e19c46be3285123d32029f40a922", size = 4081572 }, + { url = "https://files.pythonhosted.org/packages/b3/9f/6a3e0391957cc0c5f84aef9fbdd763035f2b52e998a53f99345e3ac69312/cryptography-44.0.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6f101b1f780f7fc613d040ca4bdf835c6ef3b00e9bd7125a4255ec574c7916e4", size = 4298631 }, + { url = "https://files.pythonhosted.org/packages/e2/a5/5bc097adb4b6d22a24dea53c51f37e480aaec3465285c253098642696423/cryptography-44.0.2-cp39-abi3-win32.whl", hash = "sha256:3dc62975e31617badc19a906481deacdeb80b4bb454394b4098e3f2525a488c5", size = 2773792 }, + { url = "https://files.pythonhosted.org/packages/33/cf/1f7649b8b9a3543e042d3f348e398a061923ac05b507f3f4d95f11938aa9/cryptography-44.0.2-cp39-abi3-win_amd64.whl", hash = "sha256:5f6f90b72d8ccadb9c6e311c775c8305381db88374c65fa1a68250aa8a9cb3a6", size = 3210957 }, +] + +[[package]] +name = "deprecated" +version = "1.2.18" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/98/97/06afe62762c9a8a86af0cfb7bfdab22a43ad17138b07af5b1a58442690a2/deprecated-1.2.18.tar.gz", hash = "sha256:422b6f6d859da6f2ef57857761bfb392480502a64c3028ca9bbe86085d72115d", size = 2928744 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6e/c6/ac0b6c1e2d138f1002bcf799d330bd6d85084fece321e662a14223794041/Deprecated-1.2.18-py2.py3-none-any.whl", hash = "sha256:bd5011788200372a32418f888e326a09ff80d0214bd961147cfed01b5c018eec", size = 9998 }, +] + [[package]] name = "faker" version = "37.1.0" @@ -29,6 +129,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d7/a1/8936bc8e79af80ca38288dd93ed44ed1f9d63beb25447a4c59e746e01f8d/faker-37.1.0-py3-none-any.whl", hash = "sha256:dc2f730be71cb770e9c715b13374d80dbcee879675121ab51f9683d262ae9a1c", size = 1918783 }, ] +[[package]] +name = "idna" +version = "3.10" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442 }, +] + [[package]] name = "limbo" version = "0.1.0" @@ -76,6 +185,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979 }, ] +[[package]] +name = "pycparser" +version = "2.22" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1d/b2/31537cf4b1ca988837256c910a668b553fceb8f069bedc4b1c826024b52c/pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6", size = 172736 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/13/a3/a812df4e2dd5696d1f351d58b8fe16a405b234ad2886a0dab9183fb78109/pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc", size = 117552 }, +] + [[package]] name = "pydantic" version = "2.11.1" @@ -119,6 +237,23 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8e/4f/3fb47d6cbc08c7e00f92300e64ba655428c05c56b8ab6723bd290bae6458/pydantic_core-2.33.0-cp313-cp313t-win_amd64.whl", hash = "sha256:8a1d581e8cdbb857b0e0e81df98603376c1a5c34dc5e54039dcc00f043df81e7", size = 1931234 }, ] +[[package]] +name = "pygithub" +version = "2.6.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "deprecated" }, + { name = "pyjwt", extra = ["crypto"] }, + { name = "pynacl" }, + { name = "requests" }, + { name = "typing-extensions" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c0/88/e08ab18dc74b2916f48703ed1a797d57cb64eca0e23b0a9254e13cfe3911/pygithub-2.6.1.tar.gz", hash = "sha256:b5c035392991cca63959e9453286b41b54d83bf2de2daa7d7ff7e4312cebf3bf", size = 3659473 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ac/fc/a444cd19ccc8c4946a512f3827ed0b3565c88488719d800d54a75d541c0b/PyGithub-2.6.1-py3-none-any.whl", hash = "sha256:6f2fa6d076ccae475f9fc392cc6cdbd54db985d4f69b8833a28397de75ed6ca3", size = 410451 }, +] + [[package]] name = "pygments" version = "2.19.1" @@ -128,6 +263,55 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293 }, ] +[[package]] +name = "pyjwt" +version = "2.10.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e7/46/bd74733ff231675599650d3e47f361794b22ef3e3770998dda30d3b63726/pyjwt-2.10.1.tar.gz", hash = "sha256:3cc5772eb20009233caf06e9d8a0577824723b44e6648ee0a2aedb6cf9381953", size = 87785 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/61/ad/689f02752eeec26aed679477e80e632ef1b682313be70793d798c1d5fc8f/PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb", size = 22997 }, +] + +[package.optional-dependencies] +crypto = [ + { name = "cryptography" }, +] + +[[package]] +name = "pynacl" +version = "1.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a7/22/27582568be639dfe22ddb3902225f91f2f17ceff88ce80e4db396c8986da/PyNaCl-1.5.0.tar.gz", hash = "sha256:8ac7448f09ab85811607bdd21ec2464495ac8b7c66d146bf545b0f08fb9220ba", size = 3392854 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ce/75/0b8ede18506041c0bf23ac4d8e2971b4161cd6ce630b177d0a08eb0d8857/PyNaCl-1.5.0-cp36-abi3-macosx_10_10_universal2.whl", hash = "sha256:401002a4aaa07c9414132aaed7f6836ff98f59277a234704ff66878c2ee4a0d1", size = 349920 }, + { url = "https://files.pythonhosted.org/packages/59/bb/fddf10acd09637327a97ef89d2a9d621328850a72f1fdc8c08bdf72e385f/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:52cb72a79269189d4e0dc537556f4740f7f0a9ec41c1322598799b0bdad4ef92", size = 601722 }, + { url = "https://files.pythonhosted.org/packages/5d/70/87a065c37cca41a75f2ce113a5a2c2aa7533be648b184ade58971b5f7ccc/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a36d4a9dda1f19ce6e03c9a784a2921a4b726b02e1c736600ca9c22029474394", size = 680087 }, + { url = "https://files.pythonhosted.org/packages/ee/87/f1bb6a595f14a327e8285b9eb54d41fef76c585a0edef0a45f6fc95de125/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:0c84947a22519e013607c9be43706dd42513f9e6ae5d39d3613ca1e142fba44d", size = 856678 }, + { url = "https://files.pythonhosted.org/packages/66/28/ca86676b69bf9f90e710571b67450508484388bfce09acf8a46f0b8c785f/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06b8f6fa7f5de8d5d2f7573fe8c863c051225a27b61e6860fd047b1775807858", size = 1133660 }, + { url = "https://files.pythonhosted.org/packages/3d/85/c262db650e86812585e2bc59e497a8f59948a005325a11bbbc9ecd3fe26b/PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:a422368fc821589c228f4c49438a368831cb5bbc0eab5ebe1d7fac9dded6567b", size = 663824 }, + { url = "https://files.pythonhosted.org/packages/fd/1a/cc308a884bd299b651f1633acb978e8596c71c33ca85e9dc9fa33a5399b9/PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:61f642bf2378713e2c2e1de73444a3778e5f0a38be6fee0fe532fe30060282ff", size = 1117912 }, + { url = "https://files.pythonhosted.org/packages/25/2d/b7df6ddb0c2a33afdb358f8af6ea3b8c4d1196ca45497dd37a56f0c122be/PyNaCl-1.5.0-cp36-abi3-win32.whl", hash = "sha256:e46dae94e34b085175f8abb3b0aaa7da40767865ac82c928eeb9e57e1ea8a543", size = 204624 }, + { url = "https://files.pythonhosted.org/packages/5e/22/d3db169895faaf3e2eda892f005f433a62db2decbcfbc2f61e6517adfa87/PyNaCl-1.5.0-cp36-abi3-win_amd64.whl", hash = "sha256:20f42270d27e1b6a29f54032090b972d97f0a1b0948cc52392041ef7831fee93", size = 212141 }, +] + +[[package]] +name = "requests" +version = "2.32.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "idna" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/63/70/2bf7780ad2d390a8d301ad0b550f1581eadbd9a20f896afe06353c2a2913/requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", size = 131218 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928 }, +] + [[package]] name = "rich" version = "14.0.0" @@ -141,6 +325,17 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0d/9b/63f4c7ebc259242c89b3acafdb37b41d1185c07ff0011164674e9076b491/rich-14.0.0-py3-none-any.whl", hash = "sha256:1c9491e1951aac09caffd42f448ee3d04e58923ffe14993f6e83068dc395d7e0", size = 243229 }, ] +[[package]] +name = "scripts" +version = "0.1.0" +source = { virtual = "scripts" } +dependencies = [ + { name = "pygithub" }, +] + +[package.metadata] +requires-dist = [{ name = "pygithub", specifier = ">=2.6.1" }] + [[package]] name = "typing-extensions" version = "4.13.0" @@ -170,3 +365,43 @@ sdist = { url = "https://files.pythonhosted.org/packages/95/32/1a225d6164441be76 wheels = [ { url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839 }, ] + +[[package]] +name = "urllib3" +version = "2.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8a/78/16493d9c386d8e60e442a35feac5e00f0913c0f4b7c217c11e8ec2ff53e0/urllib3-2.4.0.tar.gz", hash = "sha256:414bc6535b787febd7567804cc015fee39daab8ad86268f1310a9250697de466", size = 390672 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6b/11/cc635220681e93a0183390e26485430ca2c7b5f9d33b15c74c2861cb8091/urllib3-2.4.0-py3-none-any.whl", hash = "sha256:4e16665048960a0900c702d4a66415956a584919c03361cac9f1df5c5dd7e813", size = 128680 }, +] + +[[package]] +name = "wrapt" +version = "1.17.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c3/fc/e91cc220803d7bc4db93fb02facd8461c37364151b8494762cc88b0fbcef/wrapt-1.17.2.tar.gz", hash = "sha256:41388e9d4d1522446fe79d3213196bd9e3b301a336965b9e27ca2788ebd122f3", size = 55531 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ce/b9/0ffd557a92f3b11d4c5d5e0c5e4ad057bd9eb8586615cdaf901409920b14/wrapt-1.17.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6ed6ffac43aecfe6d86ec5b74b06a5be33d5bb9243d055141e8cabb12aa08125", size = 53800 }, + { url = "https://files.pythonhosted.org/packages/c0/ef/8be90a0b7e73c32e550c73cfb2fa09db62234227ece47b0e80a05073b375/wrapt-1.17.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:35621ae4c00e056adb0009f8e86e28eb4a41a4bfa8f9bfa9fca7d343fe94f998", size = 38824 }, + { url = "https://files.pythonhosted.org/packages/36/89/0aae34c10fe524cce30fe5fc433210376bce94cf74d05b0d68344c8ba46e/wrapt-1.17.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a604bf7a053f8362d27eb9fefd2097f82600b856d5abe996d623babd067b1ab5", size = 38920 }, + { url = "https://files.pythonhosted.org/packages/3b/24/11c4510de906d77e0cfb5197f1b1445d4fec42c9a39ea853d482698ac681/wrapt-1.17.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cbabee4f083b6b4cd282f5b817a867cf0b1028c54d445b7ec7cfe6505057cf8", size = 88690 }, + { url = "https://files.pythonhosted.org/packages/71/d7/cfcf842291267bf455b3e266c0c29dcb675b5540ee8b50ba1699abf3af45/wrapt-1.17.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:49703ce2ddc220df165bd2962f8e03b84c89fee2d65e1c24a7defff6f988f4d6", size = 80861 }, + { url = "https://files.pythonhosted.org/packages/d5/66/5d973e9f3e7370fd686fb47a9af3319418ed925c27d72ce16b791231576d/wrapt-1.17.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8112e52c5822fc4253f3901b676c55ddf288614dc7011634e2719718eaa187dc", size = 89174 }, + { url = "https://files.pythonhosted.org/packages/a7/d3/8e17bb70f6ae25dabc1aaf990f86824e4fd98ee9cadf197054e068500d27/wrapt-1.17.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9fee687dce376205d9a494e9c121e27183b2a3df18037f89d69bd7b35bcf59e2", size = 86721 }, + { url = "https://files.pythonhosted.org/packages/6f/54/f170dfb278fe1c30d0ff864513cff526d624ab8de3254b20abb9cffedc24/wrapt-1.17.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:18983c537e04d11cf027fbb60a1e8dfd5190e2b60cc27bc0808e653e7b218d1b", size = 79763 }, + { url = "https://files.pythonhosted.org/packages/4a/98/de07243751f1c4a9b15c76019250210dd3486ce098c3d80d5f729cba029c/wrapt-1.17.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:703919b1633412ab54bcf920ab388735832fdcb9f9a00ae49387f0fe67dad504", size = 87585 }, + { url = "https://files.pythonhosted.org/packages/f9/f0/13925f4bd6548013038cdeb11ee2cbd4e37c30f8bfd5db9e5a2a370d6e20/wrapt-1.17.2-cp313-cp313-win32.whl", hash = "sha256:abbb9e76177c35d4e8568e58650aa6926040d6a9f6f03435b7a522bf1c487f9a", size = 36676 }, + { url = "https://files.pythonhosted.org/packages/bf/ae/743f16ef8c2e3628df3ddfd652b7d4c555d12c84b53f3d8218498f4ade9b/wrapt-1.17.2-cp313-cp313-win_amd64.whl", hash = "sha256:69606d7bb691b50a4240ce6b22ebb319c1cfb164e5f6569835058196e0f3a845", size = 38871 }, + { url = "https://files.pythonhosted.org/packages/3d/bc/30f903f891a82d402ffb5fda27ec1d621cc97cb74c16fea0b6141f1d4e87/wrapt-1.17.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:4a721d3c943dae44f8e243b380cb645a709ba5bd35d3ad27bc2ed947e9c68192", size = 56312 }, + { url = "https://files.pythonhosted.org/packages/8a/04/c97273eb491b5f1c918857cd26f314b74fc9b29224521f5b83f872253725/wrapt-1.17.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:766d8bbefcb9e00c3ac3b000d9acc51f1b399513f44d77dfe0eb026ad7c9a19b", size = 40062 }, + { url = "https://files.pythonhosted.org/packages/4e/ca/3b7afa1eae3a9e7fefe499db9b96813f41828b9fdb016ee836c4c379dadb/wrapt-1.17.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e496a8ce2c256da1eb98bd15803a79bee00fc351f5dfb9ea82594a3f058309e0", size = 40155 }, + { url = "https://files.pythonhosted.org/packages/89/be/7c1baed43290775cb9030c774bc53c860db140397047cc49aedaf0a15477/wrapt-1.17.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40d615e4fe22f4ad3528448c193b218e077656ca9ccb22ce2cb20db730f8d306", size = 113471 }, + { url = "https://files.pythonhosted.org/packages/32/98/4ed894cf012b6d6aae5f5cc974006bdeb92f0241775addad3f8cd6ab71c8/wrapt-1.17.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a5aaeff38654462bc4b09023918b7f21790efb807f54c000a39d41d69cf552cb", size = 101208 }, + { url = "https://files.pythonhosted.org/packages/ea/fd/0c30f2301ca94e655e5e057012e83284ce8c545df7661a78d8bfca2fac7a/wrapt-1.17.2-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a7d15bbd2bc99e92e39f49a04653062ee6085c0e18b3b7512a4f2fe91f2d681", size = 109339 }, + { url = "https://files.pythonhosted.org/packages/75/56/05d000de894c4cfcb84bcd6b1df6214297b8089a7bd324c21a4765e49b14/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e3890b508a23299083e065f435a492b5435eba6e304a7114d2f919d400888cc6", size = 110232 }, + { url = "https://files.pythonhosted.org/packages/53/f8/c3f6b2cf9b9277fb0813418e1503e68414cd036b3b099c823379c9575e6d/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:8c8b293cd65ad716d13d8dd3624e42e5a19cc2a2f1acc74b30c2c13f15cb61a6", size = 100476 }, + { url = "https://files.pythonhosted.org/packages/a7/b1/0bb11e29aa5139d90b770ebbfa167267b1fc548d2302c30c8f7572851738/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4c82b8785d98cdd9fed4cac84d765d234ed3251bd6afe34cb7ac523cb93e8b4f", size = 106377 }, + { url = "https://files.pythonhosted.org/packages/6a/e1/0122853035b40b3f333bbb25f1939fc1045e21dd518f7f0922b60c156f7c/wrapt-1.17.2-cp313-cp313t-win32.whl", hash = "sha256:13e6afb7fe71fe7485a4550a8844cc9ffbe263c0f1a1eea569bc7091d4898555", size = 37986 }, + { url = "https://files.pythonhosted.org/packages/09/5e/1655cf481e079c1f22d0cabdd4e51733679932718dc23bf2db175f329b76/wrapt-1.17.2-cp313-cp313t-win_amd64.whl", hash = "sha256:eaf675418ed6b3b31c7a989fd007fa7c3be66ce14e5c3b27336383604c9da85c", size = 40750 }, + { url = "https://files.pythonhosted.org/packages/2d/82/f56956041adef78f849db6b289b282e72b55ab8045a75abad81898c28d19/wrapt-1.17.2-py3-none-any.whl", hash = "sha256:b18f2d1533a71f069c7f82d524a52599053d4c7166e9dd374ae2136b7f40f7c8", size = 23594 }, +] From f1ee92bf2d90c0eeaf9a707a2e9a174c027a23a2 Mon Sep 17 00:00:00 2001 From: "Levy A." Date: Tue, 22 Apr 2025 18:23:07 -0300 Subject: [PATCH 331/425] numeric types overhaul --- core/Cargo.toml | 1 + core/lib.rs | 6 + core/numeric.rs | 564 ++++++++++++++++++++++++++++++++ core/numeric/nonnan.rs | 105 ++++++ core/vdbe/execute.rs | 387 ++-------------------- fuzz/Cargo.lock | 12 +- fuzz/Cargo.toml | 6 +- fuzz/fuzz_targets/cast_real.rs | 22 ++ fuzz/fuzz_targets/expression.rs | 20 +- 9 files changed, 753 insertions(+), 370 deletions(-) create mode 100644 core/numeric.rs create mode 100644 core/numeric/nonnan.rs create mode 100644 fuzz/fuzz_targets/cast_real.rs diff --git a/core/Cargo.toml b/core/Cargo.toml index eb5d092b0..f23aeeeb0 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -27,6 +27,7 @@ series = ["limbo_series/static"] ipaddr = ["limbo_ipaddr/static"] completion = ["limbo_completion/static"] testvfs = ["limbo_ext_tests/static"] +fuzz = [] [target.'cfg(target_os = "linux")'.dependencies] io-uring = { version = "0.6.1", optional = true } diff --git a/core/lib.rs b/core/lib.rs index 67d168640..9d5508e2d 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -20,6 +20,12 @@ mod util; mod vdbe; mod vector; +#[cfg(feature = "fuzz")] +pub mod numeric; + +#[cfg(not(feature = "fuzz"))] +mod numeric; + #[cfg(not(target_family = "wasm"))] #[global_allocator] static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; diff --git a/core/numeric.rs b/core/numeric.rs new file mode 100644 index 000000000..c0499b78d --- /dev/null +++ b/core/numeric.rs @@ -0,0 +1,564 @@ +use crate::OwnedValue; + +mod nonnan; + +use nonnan::NonNan; + +// TODO: Remove when https://github.com/rust-lang/libs-team/issues/230 is available +trait SaturatingShl { + fn saturating_shl(self, rhs: u32) -> Self; +} + +impl SaturatingShl for i64 { + fn saturating_shl(self, rhs: u32) -> Self { + if rhs >= Self::BITS { + 0 + } else { + self << rhs + } + } +} + +// TODO: Remove when https://github.com/rust-lang/libs-team/issues/230 is available +trait SaturatingShr { + fn saturating_shr(self, rhs: u32) -> Self; +} + +impl SaturatingShr for i64 { + fn saturating_shr(self, rhs: u32) -> Self { + if rhs >= Self::BITS { + if self >= 0 { + 0 + } else { + -1 + } + } else { + self >> rhs + } + } +} + +#[derive(Debug, Clone, Copy)] +pub enum Numeric { + Null, + Integer(i64), + Float(NonNan), +} + +impl Numeric { + pub fn try_into_bool(&self) -> Option { + match self { + Numeric::Null => None, + Numeric::Integer(0) => Some(false), + Numeric::Float(non_nan) if *non_nan == 0.0 => Some(false), + _ => Some(true), + } + } +} + +impl From for NullableInteger { + fn from(value: Numeric) -> Self { + match value { + Numeric::Null => NullableInteger::Null, + Numeric::Integer(v) => NullableInteger::Integer(v), + Numeric::Float(v) => NullableInteger::Integer(f64::from(v) as i64), + } + } +} + +impl From for OwnedValue { + fn from(value: Numeric) -> Self { + match value { + Numeric::Null => OwnedValue::Null, + Numeric::Integer(v) => OwnedValue::Integer(v), + Numeric::Float(v) => OwnedValue::Float(v.into()), + } + } +} + +impl> From for Numeric { + fn from(value: T) -> Self { + let text = value.as_ref(); + + let Some((real, is_fractional)) = atof(text) else { + return Self::Integer(0); + }; + + if is_fractional { + return Self::Float(real); + } + + let integer = atoi(text); + + if real == integer as f64 { + return Self::Integer(integer); + } + + Self::Float(real) + } +} + +impl From for Numeric { + fn from(value: OwnedValue) -> Self { + Self::from(&value) + } +} +impl From<&OwnedValue> for Numeric { + fn from(value: &OwnedValue) -> Self { + match value { + OwnedValue::Null => Self::Null, + OwnedValue::Integer(v) => Self::Integer(*v), + OwnedValue::Float(v) => match NonNan::new(*v) { + Some(v) => Self::Float(v), + None => Self::Null, + }, + OwnedValue::Text(text) => Numeric::from(text.as_str()), + OwnedValue::Blob(blob) => { + let text = String::from_utf8_lossy(blob.as_slice()); + Numeric::from(&text) + } + } + } +} + +impl std::ops::Add for Numeric { + type Output = Self; + + fn add(self, rhs: Self) -> Self::Output { + match (self, rhs) { + (Numeric::Null, _) | (_, Numeric::Null) => Numeric::Null, + (Numeric::Integer(lhs), Numeric::Integer(rhs)) => match lhs.checked_add(rhs) { + None => Numeric::Float(lhs.into()) + Numeric::Float(rhs.into()), + Some(i) => Numeric::Integer(i), + }, + (Numeric::Float(lhs), Numeric::Float(rhs)) => match lhs + rhs { + Some(v) => Numeric::Float(v), + None => Numeric::Null, + }, + (f @ Numeric::Float(_), Numeric::Integer(i)) + | (Numeric::Integer(i), f @ Numeric::Float(_)) => f + Numeric::Float(i.into()), + } + } +} + +impl std::ops::Sub for Numeric { + type Output = Self; + + fn sub(self, rhs: Self) -> Self::Output { + match (self, rhs) { + (Numeric::Null, _) | (_, Numeric::Null) => Numeric::Null, + (Numeric::Float(lhs), Numeric::Float(rhs)) => match lhs - rhs { + Some(v) => Numeric::Float(v), + None => Numeric::Null, + }, + (Numeric::Integer(lhs), Numeric::Integer(rhs)) => match lhs.checked_sub(rhs) { + None => Numeric::Float(lhs.into()) - Numeric::Float(rhs.into()), + Some(i) => Numeric::Integer(i), + }, + (f @ Numeric::Float(_), Numeric::Integer(i)) => f - Numeric::Float(i.into()), + (Numeric::Integer(i), f @ Numeric::Float(_)) => Numeric::Float(i.into()) - f, + } + } +} + +impl std::ops::Mul for Numeric { + type Output = Self; + + fn mul(self, rhs: Self) -> Self::Output { + match (self, rhs) { + (Numeric::Null, _) | (_, Numeric::Null) => Numeric::Null, + (Numeric::Float(lhs), Numeric::Float(rhs)) => match lhs * rhs { + Some(v) => Numeric::Float(v), + None => Numeric::Null, + }, + (Numeric::Integer(lhs), Numeric::Integer(rhs)) => match lhs.checked_mul(rhs) { + None => Numeric::Float(lhs.into()) * Numeric::Float(rhs.into()), + Some(i) => Numeric::Integer(i), + }, + (f @ Numeric::Float(_), Numeric::Integer(i)) + | (Numeric::Integer(i), f @ Numeric::Float(_)) => f * Numeric::Float(i.into()), + } + } +} + +impl std::ops::Div for Numeric { + type Output = Self; + + fn div(self, rhs: Self) -> Self::Output { + match (self, rhs) { + (Numeric::Null, _) | (_, Numeric::Null) => Numeric::Null, + (Numeric::Float(lhs), Numeric::Float(rhs)) => match lhs / rhs { + Some(v) if rhs != 0.0 => Numeric::Float(v), + _ => Numeric::Null, + }, + (Numeric::Integer(lhs), Numeric::Integer(rhs)) => match lhs.checked_div(rhs) { + None => Numeric::Float(lhs.into()) / Numeric::Float(rhs.into()), + Some(v) => Numeric::Integer(v), + }, + (f @ Numeric::Float(_), Numeric::Integer(i)) => f / Numeric::Float(i.into()), + (Numeric::Integer(i), f @ Numeric::Float(_)) => Numeric::Float(i.into()) / f, + } + } +} + +impl std::ops::Neg for Numeric { + type Output = Self; + + fn neg(self) -> Self::Output { + match self { + Numeric::Null => Numeric::Null, + Numeric::Integer(v) => match v.checked_neg() { + None => -Numeric::Float(v.into()), + Some(i) => Numeric::Integer(i), + }, + Numeric::Float(v) => Numeric::Float(-v), + } + } +} + +#[derive(Debug)] +pub enum NullableInteger { + Null, + Integer(i64), +} + +impl From for OwnedValue { + fn from(value: NullableInteger) -> Self { + match value { + NullableInteger::Null => OwnedValue::Null, + NullableInteger::Integer(v) => OwnedValue::Integer(v), + } + } +} + +impl> From for NullableInteger { + fn from(value: T) -> Self { + Self::Integer(atoi(value.as_ref())) + } +} + +impl From for NullableInteger { + fn from(value: OwnedValue) -> Self { + Self::from(&value) + } +} + +impl From<&OwnedValue> for NullableInteger { + fn from(value: &OwnedValue) -> Self { + match value { + OwnedValue::Null => Self::Null, + OwnedValue::Integer(v) => Self::Integer(*v), + OwnedValue::Float(v) => Self::Integer(*v as i64), + OwnedValue::Text(text) => Self::from(text.as_str()), + OwnedValue::Blob(blob) => { + let text = String::from_utf8_lossy(blob.as_slice()); + Self::from(text) + } + } + } +} + +impl std::ops::Not for NullableInteger { + type Output = Self; + + fn not(self) -> Self::Output { + match self { + NullableInteger::Null => NullableInteger::Null, + NullableInteger::Integer(lhs) => NullableInteger::Integer(!lhs), + } + } +} + +impl std::ops::BitAnd for NullableInteger { + type Output = Self; + + fn bitand(self, rhs: Self) -> Self::Output { + match (self, rhs) { + (NullableInteger::Null, _) | (_, NullableInteger::Null) => NullableInteger::Null, + (NullableInteger::Integer(lhs), NullableInteger::Integer(rhs)) => { + NullableInteger::Integer(lhs & rhs) + } + } + } +} + +impl std::ops::BitOr for NullableInteger { + type Output = Self; + + fn bitor(self, rhs: Self) -> Self::Output { + match (self, rhs) { + (NullableInteger::Null, _) | (_, NullableInteger::Null) => NullableInteger::Null, + (NullableInteger::Integer(lhs), NullableInteger::Integer(rhs)) => { + NullableInteger::Integer(lhs | rhs) + } + } + } +} + +impl std::ops::Shl for NullableInteger { + type Output = Self; + + fn shl(self, rhs: Self) -> Self::Output { + match (self, rhs) { + (NullableInteger::Null, _) | (_, NullableInteger::Null) => NullableInteger::Null, + (NullableInteger::Integer(lhs), NullableInteger::Integer(rhs)) => { + NullableInteger::Integer(if rhs.is_positive() { + lhs.saturating_shl(rhs.try_into().unwrap_or(u32::MAX)) + } else { + lhs.saturating_shr(rhs.saturating_abs().try_into().unwrap_or(u32::MAX)) + }) + } + } + } +} + +impl std::ops::Shr for NullableInteger { + type Output = Self; + + fn shr(self, rhs: Self) -> Self::Output { + match (self, rhs) { + (NullableInteger::Null, _) | (_, NullableInteger::Null) => NullableInteger::Null, + (NullableInteger::Integer(lhs), NullableInteger::Integer(rhs)) => { + NullableInteger::Integer(if rhs.is_positive() { + lhs.saturating_shr(rhs.try_into().unwrap_or(u32::MAX)) + } else { + lhs.saturating_shl(rhs.saturating_abs().try_into().unwrap_or(u32::MAX)) + }) + } + } + } +} + +impl std::ops::Rem for NullableInteger { + type Output = Self; + + fn rem(self, rhs: Self) -> Self::Output { + match (self, rhs) { + (NullableInteger::Null, _) | (_, NullableInteger::Null) => NullableInteger::Null, + (_, NullableInteger::Integer(0)) => NullableInteger::Null, + (lhs, NullableInteger::Integer(-1)) => lhs % NullableInteger::Integer(1), + (NullableInteger::Integer(lhs), NullableInteger::Integer(rhs)) => { + NullableInteger::Integer(lhs % rhs) + } + } + } +} + +// Maximum u64 that can survive a f64 round trip +const MAX_EXACT: u64 = u64::MAX << 11; + +const VERTICAL_TAB: char = '\u{b}'; + +#[derive(Debug, Clone, Copy)] +struct DoubleDouble(f64, f64); + +impl From for DoubleDouble { + fn from(value: u64) -> Self { + let r = value as f64; + + // If the value is smaller than MAX_EXACT, the error isn't significant + let rr = if r <= MAX_EXACT as f64 { + let round_tripped = value as f64 as u64; + let sign = if value >= round_tripped { 1.0 } else { -1.0 }; + + // Error term is the signed distance of the round tripped value and itself + sign * value.abs_diff(round_tripped) as f64 + } else { + 0.0 + }; + + DoubleDouble(r, rr) + } +} + +impl From for f64 { + fn from(DoubleDouble(a, aa): DoubleDouble) -> Self { + a + aa + } +} + +impl std::ops::Mul for DoubleDouble { + type Output = Self; + + /// Double-Double multiplication. (self.0, self.1) *= (rhs.0, rhs.1) + /// + /// Reference: + /// T. J. Dekker, "A Floating-Point Technique for Extending the Available Precision". + /// 1971-07-26. + /// + fn mul(self, rhs: Self) -> Self::Output { + // TODO: Better variable naming + + let mask = u64::MAX << 26; + + let hx = f64::from_bits(self.0.to_bits() & mask); + let tx = self.0 - hx; + + let hy = f64::from_bits(rhs.0.to_bits() & mask); + let ty = rhs.0 - hy; + + let p = hx * hy; + let q = hx * ty + tx * hy; + + let c = p + q; + let cc = p - c + q + tx * ty; + let cc = self.0 * rhs.1 + self.1 * rhs.0 + cc; + + let r = c + cc; + let rr = (c - r) + cc; + + DoubleDouble(r, rr) + } +} + +impl std::ops::MulAssign for DoubleDouble { + fn mul_assign(&mut self, rhs: Self) { + *self = self.clone() * rhs; + } +} + +pub fn atoi(input: impl AsRef) -> i64 { + let input = input + .as_ref() + .trim_matches(|ch: char| ch.is_ascii_whitespace() || ch == VERTICAL_TAB); + + let mut iter = input.chars().enumerate().peekable(); + + iter.next_if(|(_, ch)| matches!(ch, '+' | '-')); + let Some((end, _)) = iter.take_while(|(_, ch)| ch.is_ascii_digit()).last() else { + return 0; + }; + + input[0..=end] + .parse::() + .unwrap_or_else(|err| match err.kind() { + std::num::IntErrorKind::PosOverflow => i64::MAX, + std::num::IntErrorKind::NegOverflow => i64::MIN, + _ => 0, + }) +} + +pub fn atof(input: impl AsRef) -> Option<(NonNan, bool)> { + let mut input = input + .as_ref() + .trim_matches(|ch: char| ch.is_ascii_whitespace() || ch == VERTICAL_TAB) + .chars() + .peekable(); + + let sign = match input.next_if(|ch| matches!(ch, '-' | '+')) { + Some('-') => -1.0, + _ => 1.0, + }; + + let mut had_digits = false; + let mut is_fractional = false; + + if matches!(input.peek(), Some('e' | 'E')) { + return None; + } + + let mut significant: u64 = 0; + + // Copy as many significant digits as we can + while let Some(digit) = input.peek().and_then(|ch| ch.to_digit(10)) { + had_digits = true; + + match significant + .checked_mul(10) + .and_then(|v| v.checked_add(digit as u64)) + { + Some(new) => significant = new, + None => break, + } + + input.next(); + } + + let mut exponent = 0; + + // Increment the exponent for every non significant digit we skipped + while input.next_if(char::is_ascii_digit).is_some() { + exponent += 1 + } + + if input.next_if(|ch| matches!(ch, '.')).is_some() { + if had_digits || input.peek().is_some_and(char::is_ascii_digit) { + is_fractional = true + } + + while let Some(digit) = input.peek().and_then(|ch| ch.to_digit(10)) { + if significant < (u64::MAX - 9) / 10 { + significant = significant * 10 + digit as u64; + exponent -= 1; + } + + input.next(); + } + }; + + if input.next_if(|ch| matches!(ch, 'e' | 'E')).is_some() { + let sign = match input.next_if(|ch| matches!(ch, '-' | '+')) { + Some('-') => -1, + _ => 1, + }; + + if input.peek().is_some_and(char::is_ascii_digit) { + is_fractional = true + } + + let e = input.map_while(|ch| ch.to_digit(10)).fold(0, |acc, digit| { + if acc < 1000 { + acc * 10 + digit as i32 + } else { + 1000 + } + }); + + exponent += sign * e; + }; + + while exponent.is_positive() && significant < MAX_EXACT / 10 { + significant *= 10; + exponent -= 1; + } + + while exponent.is_negative() && significant % 10 == 0 { + significant /= 10; + exponent += 1; + } + + let mut result = DoubleDouble::from(significant); + + if exponent > 0 { + while exponent >= 100 { + exponent -= 100; + result *= DoubleDouble(1.0e+100, -1.5902891109759918046e+83); + } + while exponent >= 10 { + exponent -= 10; + result *= DoubleDouble(1.0e+10, 0.0); + } + while exponent >= 1 { + exponent -= 1; + result *= DoubleDouble(1.0e+01, 0.0); + } + } else { + while exponent <= -100 { + exponent += 100; + result *= DoubleDouble(1.0e-100, -1.99918998026028836196e-117); + } + while exponent <= -10 { + exponent += 10; + result *= DoubleDouble(1.0e-10, -3.6432197315497741579e-27); + } + while exponent <= -1 { + exponent += 1; + result *= DoubleDouble(1.0e-01, -5.5511151231257827021e-18); + } + } + + let result = NonNan::new(f64::from(result) * sign) + .unwrap_or_else(|| NonNan::new(sign * f64::INFINITY).unwrap()); + + Some((result, is_fractional)) +} diff --git a/core/numeric/nonnan.rs b/core/numeric/nonnan.rs new file mode 100644 index 000000000..5ae6a1f34 --- /dev/null +++ b/core/numeric/nonnan.rs @@ -0,0 +1,105 @@ +#[repr(transparent)] +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct NonNan(f64); + +impl NonNan { + pub fn new(value: f64) -> Option { + if value.is_nan() { + return None; + } + + Some(NonNan(value)) + } +} + +impl PartialEq for f64 { + fn eq(&self, other: &NonNan) -> bool { + *self == other.0 + } +} + +impl PartialEq for NonNan { + fn eq(&self, other: &f64) -> bool { + self.0 == *other + } +} + +impl PartialOrd for NonNan { + fn partial_cmp(&self, other: &f64) -> Option { + self.0.partial_cmp(other) + } +} + +impl PartialOrd for f64 { + fn partial_cmp(&self, other: &NonNan) -> Option { + self.partial_cmp(&other.0) + } +} + +impl From for NonNan { + fn from(value: i64) -> Self { + NonNan(value as f64) + } +} + +impl From for f64 { + fn from(value: NonNan) -> Self { + value.0 + } +} + +impl std::ops::Deref for NonNan { + type Target = f64; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl std::ops::Add for NonNan { + type Output = Option; + + fn add(self, rhs: Self) -> Self::Output { + Self::new(self.0 + rhs.0) + } +} + +impl std::ops::Sub for NonNan { + type Output = Option; + + fn sub(self, rhs: Self) -> Self::Output { + Self::new(self.0 - rhs.0) + } +} + +impl std::ops::Mul for NonNan { + type Output = Option; + + fn mul(self, rhs: Self) -> Self::Output { + Self::new(self.0 * rhs.0) + } +} + +impl std::ops::Div for NonNan { + type Output = Option; + + fn div(self, rhs: Self) -> Self::Output { + Self::new(self.0 / rhs.0) + } +} + +impl std::ops::Rem for NonNan { + type Output = Option; + + fn rem(self, rhs: Self) -> Self::Output { + Self::new(self.0 % rhs.0) + } +} + +impl std::ops::Neg for NonNan { + type Output = Self; + + fn neg(self) -> Self::Output { + Self(-self.0) + } +} diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 0869491d6..1185a77b0 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -1,4 +1,5 @@ #![allow(unused_variables)] +use crate::numeric::{NullableInteger, Numeric}; use crate::storage::database::FileMemoryStorage; use crate::storage::page_cache::DumbLruPageCache; use crate::storage::pager::CreateBTreeFlags; @@ -5482,357 +5483,61 @@ fn exec_likelihood(reg: &OwnedValue, _probability: &OwnedValue) -> OwnedValue { } pub fn exec_add(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { - let result = match (lhs, rhs) { - (OwnedValue::Integer(lhs), OwnedValue::Integer(rhs)) => { - let result = lhs.overflowing_add(*rhs); - if result.1 { - OwnedValue::Float(*lhs as f64 + *rhs as f64) - } else { - OwnedValue::Integer(result.0) - } - } - (OwnedValue::Float(lhs), OwnedValue::Float(rhs)) => OwnedValue::Float(lhs + rhs), - (OwnedValue::Float(f), OwnedValue::Integer(i)) - | (OwnedValue::Integer(i), OwnedValue::Float(f)) => OwnedValue::Float(*f + *i as f64), - (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_add( - &cast_text_to_numeric(lhs.as_str()), - &cast_text_to_numeric(rhs.as_str()), - ), - (OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => { - exec_add(&cast_text_to_numeric(text.as_str()), other) - } - _ => todo!(), - }; - match result { - OwnedValue::Float(f) if f.is_nan() => OwnedValue::Null, - _ => result, - } + (Numeric::from(lhs) + Numeric::from(rhs)).into() } pub fn exec_subtract(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { - let result = match (lhs, rhs) { - (OwnedValue::Integer(lhs), OwnedValue::Integer(rhs)) => { - let result = lhs.overflowing_sub(*rhs); - if result.1 { - OwnedValue::Float(*lhs as f64 - *rhs as f64) - } else { - OwnedValue::Integer(result.0) - } - } - (OwnedValue::Float(lhs), OwnedValue::Float(rhs)) => OwnedValue::Float(lhs - rhs), - (OwnedValue::Float(lhs), OwnedValue::Integer(rhs)) => OwnedValue::Float(lhs - *rhs as f64), - (OwnedValue::Integer(lhs), OwnedValue::Float(rhs)) => OwnedValue::Float(*lhs as f64 - rhs), - (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_subtract( - &cast_text_to_numeric(lhs.as_str()), - &cast_text_to_numeric(rhs.as_str()), - ), - (OwnedValue::Text(text), other) => { - exec_subtract(&cast_text_to_numeric(text.as_str()), other) - } - (other, OwnedValue::Text(text)) => { - exec_subtract(other, &cast_text_to_numeric(text.as_str())) - } - (other, OwnedValue::Blob(blob)) => { - let text = String::from_utf8_lossy(&blob); - exec_subtract(other, &cast_text_to_numeric(&text)) - } - (OwnedValue::Blob(blob), other) => { - let text = String::from_utf8_lossy(&blob); - exec_subtract(&cast_text_to_numeric(&text), other) - } - }; - match result { - OwnedValue::Float(f) if f.is_nan() => OwnedValue::Null, - _ => result, - } + (Numeric::from(lhs) - Numeric::from(rhs)).into() } pub fn exec_multiply(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { - let result = match (lhs, rhs) { - (OwnedValue::Integer(lhs), OwnedValue::Integer(rhs)) => { - let result = lhs.overflowing_mul(*rhs); - if result.1 { - OwnedValue::Float(*lhs as f64 * *rhs as f64) - } else { - OwnedValue::Integer(result.0) - } - } - (OwnedValue::Float(lhs), OwnedValue::Float(rhs)) => OwnedValue::Float(lhs * rhs), - (OwnedValue::Integer(i), OwnedValue::Float(f)) - | (OwnedValue::Float(f), OwnedValue::Integer(i)) => OwnedValue::Float(*i as f64 * { *f }), - (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_multiply( - &cast_text_to_numeric(lhs.as_str()), - &cast_text_to_numeric(rhs.as_str()), - ), - (OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => { - exec_multiply(&cast_text_to_numeric(text.as_str()), other) - } - - _ => todo!(), - }; - match result { - OwnedValue::Float(f) if f.is_nan() => OwnedValue::Null, - _ => result, - } + (Numeric::from(lhs) * Numeric::from(rhs)).into() } pub fn exec_divide(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { - let result = match (lhs, rhs) { - (_, OwnedValue::Integer(0)) | (_, OwnedValue::Float(0.0)) => OwnedValue::Null, - (OwnedValue::Integer(lhs), OwnedValue::Integer(rhs)) => { - let result = lhs.overflowing_div(*rhs); - if result.1 { - OwnedValue::Float(*lhs as f64 / *rhs as f64) - } else { - OwnedValue::Integer(result.0) - } - } - (OwnedValue::Float(lhs), OwnedValue::Float(rhs)) => OwnedValue::Float(lhs / rhs), - (OwnedValue::Float(lhs), OwnedValue::Integer(rhs)) => OwnedValue::Float(lhs / *rhs as f64), - (OwnedValue::Integer(lhs), OwnedValue::Float(rhs)) => OwnedValue::Float(*lhs as f64 / rhs), - (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_divide( - &cast_text_to_numeric(lhs.as_str()), - &cast_text_to_numeric(rhs.as_str()), - ), - (OwnedValue::Text(text), other) => exec_divide(&cast_text_to_numeric(text.as_str()), other), - (other, OwnedValue::Text(text)) => exec_divide(other, &cast_text_to_numeric(text.as_str())), - _ => todo!(), - }; - match result { - OwnedValue::Float(f) if f.is_nan() => OwnedValue::Null, - _ => result, - } + (Numeric::from(lhs) / Numeric::from(rhs)).into() } pub fn exec_bit_and(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { - match (lhs, rhs) { - (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, - (_, OwnedValue::Integer(0)) - | (OwnedValue::Integer(0), _) - | (_, OwnedValue::Float(0.0)) - | (OwnedValue::Float(0.0), _) => OwnedValue::Integer(0), - (OwnedValue::Integer(lh), OwnedValue::Integer(rh)) => OwnedValue::Integer(lh & rh), - (OwnedValue::Float(lh), OwnedValue::Float(rh)) => { - OwnedValue::Integer(*lh as i64 & *rh as i64) - } - (OwnedValue::Float(lh), OwnedValue::Integer(rh)) => OwnedValue::Integer(*lh as i64 & rh), - (OwnedValue::Integer(lh), OwnedValue::Float(rh)) => OwnedValue::Integer(lh & *rh as i64), - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_bit_and( - &cast_text_to_numeric(lhs.as_str()), - &cast_text_to_numeric(rhs.as_str()), - ), - (OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => { - exec_bit_and(&cast_text_to_numeric(text.as_str()), other) - } - _ => todo!(), - } + (NullableInteger::from(lhs) & NullableInteger::from(rhs)).into() } pub fn exec_bit_or(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { - match (lhs, rhs) { - (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, - (OwnedValue::Integer(lh), OwnedValue::Integer(rh)) => OwnedValue::Integer(lh | rh), - (OwnedValue::Float(lh), OwnedValue::Integer(rh)) => OwnedValue::Integer(*lh as i64 | rh), - (OwnedValue::Integer(lh), OwnedValue::Float(rh)) => OwnedValue::Integer(lh | *rh as i64), - (OwnedValue::Float(lh), OwnedValue::Float(rh)) => { - OwnedValue::Integer(*lh as i64 | *rh as i64) - } - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_bit_or( - &cast_text_to_numeric(lhs.as_str()), - &cast_text_to_numeric(rhs.as_str()), - ), - (OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => { - exec_bit_or(&cast_text_to_numeric(text.as_str()), other) - } - _ => todo!(), - } + (NullableInteger::from(lhs) | NullableInteger::from(rhs)).into() } pub fn exec_remainder(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { - match (lhs, rhs) { - (OwnedValue::Null, _) - | (_, OwnedValue::Null) - | (_, OwnedValue::Integer(0)) - | (_, OwnedValue::Float(0.0)) => OwnedValue::Null, - (OwnedValue::Integer(lhs), OwnedValue::Integer(rhs)) => { - if rhs == &0 { - OwnedValue::Null + let convert_to_float = matches!(Numeric::from(lhs), Numeric::Float(_)) + || matches!(Numeric::from(rhs), Numeric::Float(_)); + + match NullableInteger::from(lhs) % NullableInteger::from(rhs) { + NullableInteger::Null => OwnedValue::Null, + NullableInteger::Integer(v) => { + if convert_to_float { + OwnedValue::Float(v as f64) } else { - OwnedValue::Integer(lhs % rhs.abs()) + OwnedValue::Integer(v) } } - (OwnedValue::Float(lhs), OwnedValue::Float(rhs)) => { - let rhs_int = *rhs as i64; - if rhs_int == 0 { - OwnedValue::Null - } else { - OwnedValue::Float(((*lhs as i64) % rhs_int.abs()) as f64) - } - } - (OwnedValue::Float(lhs), OwnedValue::Integer(rhs)) => { - if rhs == &0 { - OwnedValue::Null - } else { - OwnedValue::Float(((*lhs as i64) % rhs.abs()) as f64) - } - } - (OwnedValue::Integer(lhs), OwnedValue::Float(rhs)) => { - let rhs_int = *rhs as i64; - if rhs_int == 0 { - OwnedValue::Null - } else { - OwnedValue::Float((lhs % rhs_int.abs()) as f64) - } - } - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_remainder( - &cast_text_to_numeric(lhs.as_str()), - &cast_text_to_numeric(rhs.as_str()), - ), - (OwnedValue::Text(text), other) => { - exec_remainder(&cast_text_to_numeric(text.as_str()), other) - } - (other, OwnedValue::Text(text)) => { - exec_remainder(other, &cast_text_to_numeric(text.as_str())) - } - other => todo!("remainder not implemented for: {:?} {:?}", lhs, other), } } pub fn exec_bit_not(reg: &OwnedValue) -> OwnedValue { - match reg { - OwnedValue::Null => OwnedValue::Null, - OwnedValue::Integer(i) => OwnedValue::Integer(!i), - OwnedValue::Float(f) => OwnedValue::Integer(!(*f as i64)), - OwnedValue::Text(text) => exec_bit_not(&cast_text_to_integer(text.as_str())), - OwnedValue::Blob(blob) => { - let text = String::from_utf8_lossy(blob); - exec_bit_not(&cast_text_to_integer(&text)) - } - } + (!NullableInteger::from(reg)).into() } pub fn exec_shift_left(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { - match (lhs, rhs) { - (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, - (OwnedValue::Integer(lh), OwnedValue::Integer(rh)) => { - OwnedValue::Integer(compute_shl(*lh, *rh)) - } - (OwnedValue::Float(lh), OwnedValue::Integer(rh)) => { - OwnedValue::Integer(compute_shl(*lh as i64, *rh)) - } - (OwnedValue::Integer(lh), OwnedValue::Float(rh)) => { - OwnedValue::Integer(compute_shl(*lh, *rh as i64)) - } - (OwnedValue::Float(lh), OwnedValue::Float(rh)) => { - OwnedValue::Integer(compute_shl(*lh as i64, *rh as i64)) - } - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_shift_left( - &cast_text_to_numeric(lhs.as_str()), - &cast_text_to_numeric(rhs.as_str()), - ), - (OwnedValue::Text(text), other) => { - exec_shift_left(&cast_text_to_numeric(text.as_str()), other) - } - (other, OwnedValue::Text(text)) => { - exec_shift_left(other, &cast_text_to_numeric(text.as_str())) - } - _ => todo!(), - } -} - -fn compute_shl(lhs: i64, rhs: i64) -> i64 { - if rhs == 0 { - lhs - } else if rhs > 0 { - // for positive shifts, if it's too large return 0 - if rhs >= 64 { - 0 - } else { - lhs << rhs - } - } else { - // for negative shifts, check if it's i64::MIN to avoid overflow on negation - if rhs == i64::MIN || rhs <= -64 { - if lhs < 0 { - -1 - } else { - 0 - } - } else { - lhs >> (-rhs) - } - } + (NullableInteger::from(lhs) << NullableInteger::from(rhs)).into() } pub fn exec_shift_right(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { - match (lhs, rhs) { - (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, - (OwnedValue::Integer(lh), OwnedValue::Integer(rh)) => { - OwnedValue::Integer(compute_shr(*lh, *rh)) - } - (OwnedValue::Float(lh), OwnedValue::Integer(rh)) => { - OwnedValue::Integer(compute_shr(*lh as i64, *rh)) - } - (OwnedValue::Integer(lh), OwnedValue::Float(rh)) => { - OwnedValue::Integer(compute_shr(*lh, *rh as i64)) - } - (OwnedValue::Float(lh), OwnedValue::Float(rh)) => { - OwnedValue::Integer(compute_shr(*lh as i64, *rh as i64)) - } - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_shift_right( - &cast_text_to_numeric(lhs.as_str()), - &cast_text_to_numeric(rhs.as_str()), - ), - (OwnedValue::Text(text), other) => { - exec_shift_right(&cast_text_to_numeric(text.as_str()), other) - } - (other, OwnedValue::Text(text)) => { - exec_shift_right(other, &cast_text_to_numeric(text.as_str())) - } - _ => todo!(), - } -} - -// compute binary shift to the right if rhs >= 0 and binary shift to the left - if rhs < 0 -// note, that binary shift to the right is sign-extended -fn compute_shr(lhs: i64, rhs: i64) -> i64 { - if rhs == 0 { - lhs - } else if rhs > 0 { - // for positive right shifts - if rhs >= 64 { - if lhs < 0 { - -1 - } else { - 0 - } - } else { - lhs >> rhs - } - } else { - // for negative right shifts, check if it's i64::MIN to avoid overflow - if rhs == i64::MIN || -rhs >= 64 { - 0 - } else { - lhs << (-rhs) - } - } + (NullableInteger::from(lhs) >> NullableInteger::from(rhs)).into() } pub fn exec_boolean_not(reg: &OwnedValue) -> OwnedValue { - match reg { - OwnedValue::Null => OwnedValue::Null, - OwnedValue::Integer(i) => OwnedValue::Integer((*i == 0) as i64), - OwnedValue::Float(f) => OwnedValue::Integer((*f == 0.0) as i64), - OwnedValue::Text(text) => exec_boolean_not(&&cast_text_to_real(text.as_str())), - OwnedValue::Blob(blob) => { - let text = String::from_utf8_lossy(blob); - exec_boolean_not(&cast_text_to_real(&text)) - } + match Numeric::from(reg).try_into_bool() { + None => OwnedValue::Null, + Some(v) => OwnedValue::Integer(!v as i64), } } pub fn exec_concat(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { @@ -5872,46 +5577,24 @@ pub fn exec_concat(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { } pub fn exec_and(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { - match (lhs, rhs) { - (_, OwnedValue::Integer(0)) - | (OwnedValue::Integer(0), _) - | (_, OwnedValue::Float(0.0)) - | (OwnedValue::Float(0.0), _) => OwnedValue::Integer(0), - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_and( - &cast_text_to_real(lhs.as_str()), - &cast_text_to_real(rhs.as_str()), - ), - (OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => { - exec_and(&cast_text_to_real(text.as_str()), other) - } - (OwnedValue::Blob(blob), other) | (other, OwnedValue::Blob(blob)) => { - let text = String::from_utf8_lossy(blob); - exec_and(&cast_text_to_real(&text), other) - } - (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, + match ( + Numeric::from(lhs).try_into_bool(), + Numeric::from(rhs).try_into_bool(), + ) { + (Some(false), _) | (_, Some(false)) => OwnedValue::Integer(0), + (None, _) | (_, None) => OwnedValue::Null, _ => OwnedValue::Integer(1), } } pub fn exec_or(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue { - match (lhs, rhs) { - (OwnedValue::Null, OwnedValue::Null) - | (OwnedValue::Null, OwnedValue::Float(0.0)) - | (OwnedValue::Float(0.0), OwnedValue::Null) - | (OwnedValue::Null, OwnedValue::Integer(0)) - | (OwnedValue::Integer(0), OwnedValue::Null) => OwnedValue::Null, - (OwnedValue::Float(0.0), OwnedValue::Integer(0)) - | (OwnedValue::Integer(0), OwnedValue::Float(0.0)) - | (OwnedValue::Float(0.0), OwnedValue::Float(0.0)) - | (OwnedValue::Integer(0), OwnedValue::Integer(0)) => OwnedValue::Integer(0), - (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_or( - &cast_text_to_numeric(lhs.as_str()), - &cast_text_to_numeric(rhs.as_str()), - ), - (OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => { - exec_or(&cast_text_to_numeric(text.as_str()), other) - } - _ => OwnedValue::Integer(1), + match ( + Numeric::from(lhs).try_into_bool(), + Numeric::from(rhs).try_into_bool(), + ) { + (Some(true), _) | (_, Some(true)) => OwnedValue::Integer(1), + (None, _) | (_, None) => OwnedValue::Null, + _ => OwnedValue::Integer(0), } } diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index 2556485fb..9acebc2cc 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -566,7 +566,7 @@ dependencies = [ [[package]] name = "limbo_core" -version = "0.0.19-pre.4" +version = "0.0.19" dependencies = [ "built", "cfg_block", @@ -599,7 +599,7 @@ dependencies = [ [[package]] name = "limbo_ext" -version = "0.0.19-pre.4" +version = "0.0.19" dependencies = [ "chrono", "getrandom 0.3.1", @@ -608,7 +608,7 @@ dependencies = [ [[package]] name = "limbo_macros" -version = "0.0.19-pre.4" +version = "0.0.19" dependencies = [ "proc-macro2", "quote", @@ -617,7 +617,7 @@ dependencies = [ [[package]] name = "limbo_sqlite3_parser" -version = "0.0.19-pre.4" +version = "0.0.19" dependencies = [ "bitflags", "cc", @@ -636,7 +636,7 @@ dependencies = [ [[package]] name = "limbo_time" -version = "0.0.19-pre.4" +version = "0.0.19" dependencies = [ "chrono", "limbo_ext", @@ -648,7 +648,7 @@ dependencies = [ [[package]] name = "limbo_uuid" -version = "0.0.19-pre.4" +version = "0.0.19" dependencies = [ "limbo_ext", "mimalloc", diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index ac411077e..69d6f438f 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -11,7 +11,7 @@ cargo-fuzz = true [dependencies] libfuzzer-sys = "0.4" arbitrary = { version = "1.4.1", features = ["derive"] } -limbo_core = { path = "../core" } +limbo_core = { path = "../core", features = ["fuzz"] } rusqlite = { version = "0.34.0", features = ["bundled"] } # Prevent this from interfering with workspaces @@ -21,3 +21,7 @@ members = ["."] [[bin]] name = "expression" path = "fuzz_targets/expression.rs" + +[[bin]] +name = "cast_real" +path = "fuzz_targets/cast_real.rs" diff --git a/fuzz/fuzz_targets/cast_real.rs b/fuzz/fuzz_targets/cast_real.rs new file mode 100644 index 000000000..65f550ec8 --- /dev/null +++ b/fuzz/fuzz_targets/cast_real.rs @@ -0,0 +1,22 @@ +#![no_main] +use libfuzzer_sys::{fuzz_target, Corpus}; +use std::error::Error; + +fn do_fuzz(text: String) -> Result> { + let expected = { + let conn = rusqlite::Connection::open_in_memory()?; + conn.query_row(&format!("SELECT cast(? as real)"), (&text,), |row| { + row.get::<_, f64>(0) + })? + }; + + let actual = limbo_core::numeric::atof(&text) + .map(|(non_nan, _)| f64::from(non_nan)) + .unwrap_or(0.0); + + assert_eq!(expected, actual); + + Ok(Corpus::Keep) +} + +fuzz_target!(|blob: String| -> Corpus { do_fuzz(blob).unwrap_or(Corpus::Keep) }); diff --git a/fuzz/fuzz_targets/expression.rs b/fuzz/fuzz_targets/expression.rs index 9426f0683..e49ec5dab 100644 --- a/fuzz/fuzz_targets/expression.rs +++ b/fuzz/fuzz_targets/expression.rs @@ -31,9 +31,11 @@ macro_rules! str_enum { str_enum! { enum Binary { - Equal => "=", - Is => "IS", - NotEqual => "<>", + // TODO: Not compatible yet + // Equal => "=", + // Is => "IS", + // Concat => "||", + // NotEqual => "<>", GreaterThan => ">", GreaterThanOrEqual => ">=", LessThan => "<", @@ -49,13 +51,13 @@ str_enum! { Multiply => "*", Divide => "/", Mod => "%", - Concat => "||", } } str_enum! { enum Unary { - Not => "~", + Not => "NOT", + BitwiseNot => "~", Negative => "-", Positive => "+", } @@ -167,7 +169,7 @@ fn do_fuzz(expr: Expr) -> Result> { let sql = format!("SELECT {}", expr.query); // FIX: `limbo_core::translate::expr::translate_expr` causes a overflow if this is any higher. - if expr.depth > 153 { + if expr.depth > 150 { return Ok(Corpus::Reject); } @@ -206,12 +208,8 @@ fn do_fuzz(expr: Expr) -> Result> { assert_eq!( OwnedValue::from(expected.clone()), found.clone(), - "with expression {:?} {}", + "with expression {:?}", expr, - match (expected, found) { - (Value::Real(a), OwnedValue::Float(b)) => format!("float diff: {:?}", (a - b).abs()), - _ => "".to_string(), - } ); Ok(Corpus::Keep) From ed27f22e2f0cd0d10ecc07077ba4826e61f59eae Mon Sep 17 00:00:00 2001 From: "Levy A." Date: Wed, 23 Apr 2025 08:33:25 -0300 Subject: [PATCH 332/425] comment out incompatible operations --- fuzz/fuzz_targets/expression.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fuzz/fuzz_targets/expression.rs b/fuzz/fuzz_targets/expression.rs index e49ec5dab..5aa2666e5 100644 --- a/fuzz/fuzz_targets/expression.rs +++ b/fuzz/fuzz_targets/expression.rs @@ -36,10 +36,10 @@ str_enum! { // Is => "IS", // Concat => "||", // NotEqual => "<>", - GreaterThan => ">", - GreaterThanOrEqual => ">=", - LessThan => "<", - LessThanOrEqual => "<=", + // GreaterThan => ">", + // GreaterThanOrEqual => ">=", + // LessThan => "<", + // LessThanOrEqual => "<=", RightShift => ">>", LeftShift => "<<", BitwiseAnd => "&", From 2cbb59e3f9b859f00b1b3c1e9f58cba1b82f395d Mon Sep 17 00:00:00 2001 From: "Levy A." Date: Wed, 23 Apr 2025 09:53:37 -0300 Subject: [PATCH 333/425] refactor: renaming and better types --- core/numeric.rs | 59 ++++++++++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 25 deletions(-) diff --git a/core/numeric.rs b/core/numeric.rs index c0499b78d..9a40e63b8 100644 --- a/core/numeric.rs +++ b/core/numeric.rs @@ -80,21 +80,19 @@ impl> From for Numeric { fn from(value: T) -> Self { let text = value.as_ref(); - let Some((real, is_fractional)) = atof(text) else { - return Self::Integer(0); - }; + match str_to_f64(text) { + None => Self::Integer(0), + Some(StrToF64::Fractional(value)) => Self::Float(value), + Some(StrToF64::Decimal(real)) => { + let integer = str_to_i64(text).unwrap_or(0); - if is_fractional { - return Self::Float(real); + if real == integer as f64 { + Self::Integer(integer) + } else { + Self::Float(real) + } + } } - - let integer = atoi(text); - - if real == integer as f64 { - return Self::Integer(integer); - } - - Self::Float(real) } } @@ -233,7 +231,7 @@ impl From for OwnedValue { impl> From for NullableInteger { fn from(value: T) -> Self { - Self::Integer(atoi(value.as_ref())) + Self::Integer(str_to_i64(value.as_ref()).unwrap_or(0)) } } @@ -417,7 +415,7 @@ impl std::ops::MulAssign for DoubleDouble { } } -pub fn atoi(input: impl AsRef) -> i64 { +pub fn str_to_i64(input: impl AsRef) -> Option { let input = input .as_ref() .trim_matches(|ch: char| ch.is_ascii_whitespace() || ch == VERTICAL_TAB); @@ -426,19 +424,26 @@ pub fn atoi(input: impl AsRef) -> i64 { iter.next_if(|(_, ch)| matches!(ch, '+' | '-')); let Some((end, _)) = iter.take_while(|(_, ch)| ch.is_ascii_digit()).last() else { - return 0; + return Some(0); }; - input[0..=end] - .parse::() - .unwrap_or_else(|err| match err.kind() { - std::num::IntErrorKind::PosOverflow => i64::MAX, - std::num::IntErrorKind::NegOverflow => i64::MIN, - _ => 0, - }) + input[0..=end].parse::().map_or_else( + |err| match err.kind() { + std::num::IntErrorKind::PosOverflow => Some(i64::MAX), + std::num::IntErrorKind::NegOverflow => Some(i64::MIN), + std::num::IntErrorKind::Empty => unreachable!(), + _ => Some(0), + }, + Some, + ) } -pub fn atof(input: impl AsRef) -> Option<(NonNan, bool)> { +pub enum StrToF64 { + Fractional(NonNan), + Decimal(NonNan), +} + +pub fn str_to_f64(input: impl AsRef) -> Option { let mut input = input .as_ref() .trim_matches(|ch: char| ch.is_ascii_whitespace() || ch == VERTICAL_TAB) @@ -560,5 +565,9 @@ pub fn atof(input: impl AsRef) -> Option<(NonNan, bool)> { let result = NonNan::new(f64::from(result) * sign) .unwrap_or_else(|| NonNan::new(sign * f64::INFINITY).unwrap()); - Some((result, is_fractional)) + Some(if is_fractional { + StrToF64::Fractional(result) + } else { + StrToF64::Decimal(result) + }) } From 613a332e99975e1946a63ab73f659c2d2a5b081c Mon Sep 17 00:00:00 2001 From: "Levy A." Date: Wed, 23 Apr 2025 10:13:32 -0300 Subject: [PATCH 334/425] doc: add doc for DoubleDouble --- core/numeric.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/core/numeric.rs b/core/numeric.rs index 9a40e63b8..e736edbc4 100644 --- a/core/numeric.rs +++ b/core/numeric.rs @@ -347,6 +347,8 @@ const MAX_EXACT: u64 = u64::MAX << 11; const VERTICAL_TAB: char = '\u{b}'; +/// Encapsulates Dekker's arithmetic for higher precision. This is spiritually the same as using a +/// f128 for arithmetic, but cross platform and compatible with sqlite. #[derive(Debug, Clone, Copy)] struct DoubleDouble(f64, f64); From 8ff906e353de1c6bcd06c1388154f9f1651f6709 Mon Sep 17 00:00:00 2001 From: "Levy A." Date: Wed, 23 Apr 2025 10:15:49 -0300 Subject: [PATCH 335/425] fix: decrease even more nested operations this is a worrying trend --- fuzz/Cargo.lock | 4 ++-- fuzz/fuzz_targets/expression.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index 9acebc2cc..091feceb7 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -470,9 +470,9 @@ dependencies = [ [[package]] name = "julian_day_converter" -version = "0.4.4" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1aa5652b85ab018289638c6b924db618da9edd2ddfff7fa0ec38a8b51a9192d3" +checksum = "f2987f71b89b85c812c8484cbf0c5d7912589e77bfdc66fd3e52f760e7859f16" dependencies = [ "chrono", ] diff --git a/fuzz/fuzz_targets/expression.rs b/fuzz/fuzz_targets/expression.rs index 5aa2666e5..703d64263 100644 --- a/fuzz/fuzz_targets/expression.rs +++ b/fuzz/fuzz_targets/expression.rs @@ -169,7 +169,7 @@ fn do_fuzz(expr: Expr) -> Result> { let sql = format!("SELECT {}", expr.query); // FIX: `limbo_core::translate::expr::translate_expr` causes a overflow if this is any higher. - if expr.depth > 150 { + if expr.depth > 140 { return Ok(Corpus::Reject); } From 5c18c1c57a8380795be5e7f1074be9d89a99ad70 Mon Sep 17 00:00:00 2001 From: Anton Harniakou Date: Wed, 23 Apr 2025 16:36:43 +0300 Subject: [PATCH 336/425] Draw table if it contains any row Some table can be headerless, for example results of PRAGMA calls --- cli/app.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/app.rs b/cli/app.rs index bb9515660..9e296416e 100644 --- a/cli/app.rs +++ b/cli/app.rs @@ -797,7 +797,7 @@ impl<'a> Limbo<'a> { } } - if table.header().is_some() { + if !table.is_empty() { let _ = self.write_fmt(format_args!("{}", table)); } } From 517390a4eaed08e2ed881245964179221b356cb1 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Wed, 23 Apr 2025 16:57:17 +0300 Subject: [PATCH 337/425] tests/fuzz/compound_index_seek: show which table had failed query --- tests/integration/fuzz/mod.rs | 46 +++++++++++++++-------------------- 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/tests/integration/fuzz/mod.rs b/tests/integration/fuzz/mod.rs index 929b33d8d..5b9cbbaa5 100644 --- a/tests/integration/fuzz/mod.rs +++ b/tests/integration/fuzz/mod.rs @@ -211,32 +211,26 @@ mod tests { } else { rng_from_time() }; + let table_defs: [&str; 8] = [ + "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x, y, z))", + "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x desc, y, z))", + "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x, y desc, z))", + "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x, y, z desc))", + "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x desc, y desc, z))", + "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x desc, y, z desc))", + "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x, y desc, z desc))", + "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x desc, y desc, z desc))", + ]; // Create all different 3-column primary key permutations let dbs = [ - TempDatabase::new_with_rusqlite( - "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x, y, z))", - ), - TempDatabase::new_with_rusqlite( - "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x desc, y, z))", - ), - TempDatabase::new_with_rusqlite( - "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x, y desc, z))", - ), - TempDatabase::new_with_rusqlite( - "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x, y, z desc))", - ), - TempDatabase::new_with_rusqlite( - "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x desc, y desc, z))", - ), - TempDatabase::new_with_rusqlite( - "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x, y desc, z desc))", - ), - TempDatabase::new_with_rusqlite( - "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x desc, y, z desc))", - ), - TempDatabase::new_with_rusqlite( - "CREATE TABLE t(x, y, z, nonindexed_col, PRIMARY KEY (x desc, y desc, z desc))", - ), + TempDatabase::new_with_rusqlite(table_defs[0]), + TempDatabase::new_with_rusqlite(table_defs[1]), + TempDatabase::new_with_rusqlite(table_defs[2]), + TempDatabase::new_with_rusqlite(table_defs[3]), + TempDatabase::new_with_rusqlite(table_defs[4]), + TempDatabase::new_with_rusqlite(table_defs[5]), + TempDatabase::new_with_rusqlite(table_defs[6]), + TempDatabase::new_with_rusqlite(table_defs[7]), ]; let mut pk_tuples = HashSet::new(); while pk_tuples.len() < 100000 { @@ -475,8 +469,8 @@ mod tests { } panic!( - "DIFFERENT RESULTS! limbo: {:?}, sqlite: {:?}, seed: {}, query: {}", - limbo, sqlite, seed, query + "DIFFERENT RESULTS! limbo: {:?}, sqlite: {:?}, seed: {}, query: {}, table def: {}", + limbo, sqlite, seed, query, table_defs[i] ); } } From 48071b7ad7b3d0e8231318ea6fd0c87b1864c71f Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Wed, 23 Apr 2025 17:34:19 +0300 Subject: [PATCH 338/425] tests/fuzz/compound_index_seek: order select cols by definition order --- tests/integration/fuzz/mod.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/integration/fuzz/mod.rs b/tests/integration/fuzz/mod.rs index 5b9cbbaa5..fb38455ac 100644 --- a/tests/integration/fuzz/mod.rs +++ b/tests/integration/fuzz/mod.rs @@ -309,7 +309,7 @@ mod tests { let col_choices = ["x", "y", "z", "nonindexed_col"]; let col_choices_weights = [10.0, 10.0, 10.0, 3.0]; let num_cols_in_select = rng.random_range(1..=4); - let select_cols = col_choices + let mut select_cols = col_choices .choose_multiple_weighted(&mut rng, num_cols_in_select, |s| { let idx = col_choices.iter().position(|c| c == s).unwrap(); col_choices_weights[idx] @@ -320,6 +320,9 @@ mod tests { .map(|x| x.to_string()) .collect::>(); + // sort select cols by index of col_choices + select_cols.sort_by_cached_key(|x| col_choices.iter().position(|c| c == x).unwrap()); + let (comp1, comp2, comp3) = all_comps[rng.random_range(0..all_comps.len())]; // Similarly as for the constraints, generate order by permutations so that the only columns involved in the index seek are potentially part of the ORDER BY. let (order_by1, order_by2, order_by3) = { From 8743dcd0da3b026df9e5e7cd8e176c74ce79241f Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Wed, 23 Apr 2025 15:17:22 +0300 Subject: [PATCH 339/425] btree: extract indexbtree_seek() into a function like tablebtree_seek() --- core/storage/btree.rs | 279 ++++++++++++++++++------------------------ 1 file changed, 118 insertions(+), 161 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index a8167fd17..3359ac175 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -976,169 +976,14 @@ impl BTreeCursor { /// We don't include the rowid in the comparison and that's why the last value from the record is not included. fn do_seek(&mut self, key: SeekKey<'_>, op: SeekOp) -> Result>> { let cell_iter_dir = op.iteration_direction(); - if let SeekKey::TableRowId(rowid) = key { - return self.tablebtree_seek(rowid, op, cell_iter_dir); - } - return_if_io!(self.move_to(key.clone(), op.clone())); - - { - let page = self.stack.top(); - return_if_locked!(page); - - let contents = page.get().contents.as_ref().unwrap(); - - let cell_count = contents.cell_count(); - let mut cell_idx: isize = if cell_iter_dir == IterationDirection::Forwards { - 0 - } else { - cell_count as isize - 1 - }; - let end = if cell_iter_dir == IterationDirection::Forwards { - cell_count as isize - 1 - } else { - 0 - }; - self.stack.set_cell_index(cell_idx as i32); - while cell_count > 0 - && (if cell_iter_dir == IterationDirection::Forwards { - cell_idx <= end - } else { - cell_idx >= end - }) - { - let cell = contents.cell_get( - cell_idx as usize, - payload_overflow_threshold_max( - contents.page_type(), - self.usable_space() as u16, - ), - payload_overflow_threshold_min( - contents.page_type(), - self.usable_space() as u16, - ), - self.usable_space(), - )?; - match &cell { - BTreeCell::TableLeafCell(TableLeafCell { - _rowid: cell_rowid, - _payload: payload, - first_overflow_page, - payload_size, - }) => { - let SeekKey::TableRowId(rowid_key) = key else { - unreachable!("table seek key should be a rowid"); - }; - let found = match op { - SeekOp::GT => *cell_rowid > rowid_key, - SeekOp::GE => *cell_rowid >= rowid_key, - SeekOp::EQ => *cell_rowid == rowid_key, - SeekOp::LE => *cell_rowid <= rowid_key, - SeekOp::LT => *cell_rowid < rowid_key, - }; - if found { - if let Some(next_page) = first_overflow_page { - return_if_io!(self.process_overflow_read( - payload, - *next_page, - *payload_size - )) - } else { - crate::storage::sqlite3_ondisk::read_record( - payload, - self.get_immutable_record_or_create().as_mut().unwrap(), - )? - }; - self.stack.next_cell_in_direction(cell_iter_dir); - return Ok(CursorResult::Ok(Some(*cell_rowid))); - } else { - self.stack.next_cell_in_direction(cell_iter_dir); - } - } - BTreeCell::IndexLeafCell(IndexLeafCell { - payload, - first_overflow_page, - payload_size, - }) => { - let SeekKey::IndexKey(index_key) = key else { - unreachable!("index seek key should be a record"); - }; - if let Some(next_page) = first_overflow_page { - return_if_io!(self.process_overflow_read( - payload, - *next_page, - *payload_size - )) - } else { - crate::storage::sqlite3_ondisk::read_record( - payload, - self.get_immutable_record_or_create().as_mut().unwrap(), - )? - }; - let record = self.get_immutable_record(); - let record = record.as_ref().unwrap(); - let record_slice_equal_number_of_cols = - &record.get_values().as_slice()[..index_key.get_values().len()]; - let order = compare_immutable( - record_slice_equal_number_of_cols, - index_key.get_values(), - self.index_key_sort_order, - ); - let found = match op { - SeekOp::GT => order.is_gt(), - SeekOp::GE => order.is_ge(), - SeekOp::EQ => order.is_eq(), - SeekOp::LE => order.is_le(), - SeekOp::LT => order.is_lt(), - }; - self.stack.next_cell_in_direction(cell_iter_dir); - if found { - let rowid = match record.last_value() { - Some(RefValue::Integer(rowid)) => *rowid as u64, - _ => unreachable!("index cells should have an integer rowid"), - }; - return Ok(CursorResult::Ok(Some(rowid))); - } - } - cell_type => { - unreachable!("unexpected cell type: {:?}", cell_type); - } - } - if cell_iter_dir == IterationDirection::Forwards { - cell_idx += 1; - } else { - cell_idx -= 1; - } + match key { + SeekKey::TableRowId(rowid) => { + return self.tablebtree_seek(rowid, op, cell_iter_dir); + } + SeekKey::IndexKey(index_key) => { + return self.indexbtree_seek(index_key, op, cell_iter_dir); } } - - // We have now iterated over all cells in the leaf page and found no match. - let is_index = matches!(key, SeekKey::IndexKey(_)); - if is_index { - // Unlike tables, indexes store payloads in interior cells as well. self.move_to() always moves to a leaf page, so there are cases where we need to - // move back up to the parent interior cell and get the next record from there to perform a correct seek. - // an example of how this can occur: - // - // we do an index seek for key K with cmp = SeekOp::GT, meaning we want to seek to the first key that is greater than K. - // in self.move_to(), we encounter an interior cell with key K' = K+2, and move the left child page, which is a leaf page. - // the reason we move to the left child page is that we know that in an index, all keys in the left child page are less than K' i.e. less than K+2, - // meaning that the left subtree may contain a key greater than K, e.g. K+1. however, it is possible that it doesn't, in which case the correct - // next key is K+2, which is in the parent interior cell. - // - // In the seek() method, once we have landed in the leaf page and find that there is no cell with a key greater than K, - // if we were to return Ok(CursorResult::Ok((None, None))), self.record would be None, which is incorrect, because we already know - // that there is a record with a key greater than K (K' = K+2) in the parent interior cell. Hence, we need to move back up the tree - // and get the next matching record from there. - match op.iteration_direction() { - IterationDirection::Forwards => { - return self.get_next_record(Some((key, op))); - } - IterationDirection::Backwards => { - return self.get_prev_record(Some((key, op))); - } - } - } - - Ok(CursorResult::Ok(None)) } /// Move the cursor to the root page of the btree. @@ -1563,6 +1408,118 @@ impl BTreeCursor { } } + fn indexbtree_seek( + &mut self, + key: &ImmutableRecord, + seek_op: SeekOp, + cell_iter_dir: IterationDirection, + ) -> Result>> { + self.move_to_root(); + return_if_io!(self.indexbtree_move_to(key, seek_op, cell_iter_dir)); + + let page = self.stack.top(); + return_if_locked!(page); + + let contents = page.get().contents.as_ref().unwrap(); + + let cell_count = contents.cell_count(); + let mut cell_idx: isize = if cell_iter_dir == IterationDirection::Forwards { + 0 + } else { + cell_count as isize - 1 + }; + let end = if cell_iter_dir == IterationDirection::Forwards { + cell_count as isize - 1 + } else { + 0 + }; + self.stack.set_cell_index(cell_idx as i32); + while cell_count > 0 + && (if cell_iter_dir == IterationDirection::Forwards { + cell_idx <= end + } else { + cell_idx >= end + }) + { + let cell = contents.cell_get( + cell_idx as usize, + payload_overflow_threshold_max(contents.page_type(), self.usable_space() as u16), + payload_overflow_threshold_min(contents.page_type(), self.usable_space() as u16), + self.usable_space(), + )?; + let BTreeCell::IndexLeafCell(IndexLeafCell { + payload, + first_overflow_page, + payload_size, + }) = &cell + else { + unreachable!("unexpected cell type: {:?}", cell); + }; + + if let Some(next_page) = first_overflow_page { + return_if_io!(self.process_overflow_read(payload, *next_page, *payload_size)) + } else { + crate::storage::sqlite3_ondisk::read_record( + payload, + self.get_immutable_record_or_create().as_mut().unwrap(), + )? + }; + let record = self.get_immutable_record(); + let record = record.as_ref().unwrap(); + let record_slice_equal_number_of_cols = + &record.get_values().as_slice()[..key.get_values().len()]; + let order = compare_immutable( + record_slice_equal_number_of_cols, + key.get_values(), + self.index_key_sort_order, + ); + let found = match seek_op { + SeekOp::GT => order.is_gt(), + SeekOp::GE => order.is_ge(), + SeekOp::EQ => order.is_eq(), + SeekOp::LE => order.is_le(), + SeekOp::LT => order.is_lt(), + }; + self.stack.next_cell_in_direction(cell_iter_dir); + if found { + let rowid = match record.last_value() { + Some(RefValue::Integer(rowid)) => *rowid as u64, + _ => unreachable!("index cells should have an integer rowid"), + }; + return Ok(CursorResult::Ok(Some(rowid))); + } + if cell_iter_dir == IterationDirection::Forwards { + cell_idx += 1; + } else { + cell_idx -= 1; + } + } + + // We have now iterated over all cells in the leaf page and found no match. + // Unlike tables, indexes store payloads in interior cells as well. self.move_to() always moves to a leaf page, so there are cases where we need to + // move back up to the parent interior cell and get the next record from there to perform a correct seek. + // an example of how this can occur: + // + // we do an index seek for key K with cmp = SeekOp::GT, meaning we want to seek to the first key that is greater than K. + // in self.move_to(), we encounter an interior cell with key K' = K+2, and move the left child page, which is a leaf page. + // the reason we move to the left child page is that we know that in an index, all keys in the left child page are less than K' i.e. less than K+2, + // meaning that the left subtree may contain a key greater than K, e.g. K+1. however, it is possible that it doesn't, in which case the correct + // next key is K+2, which is in the parent interior cell. + // + // In the seek() method, once we have landed in the leaf page and find that there is no cell with a key greater than K, + // if we were to return Ok(CursorResult::Ok((None, None))), self.record would be None, which is incorrect, because we already know + // that there is a record with a key greater than K (K' = K+2) in the parent interior cell. Hence, we need to move back up the tree + // and get the next matching record from there. + match seek_op.iteration_direction() { + IterationDirection::Forwards => { + return self.get_next_record(Some((SeekKey::IndexKey(key), seek_op))); + } + IterationDirection::Backwards => { + return self.get_prev_record(Some((SeekKey::IndexKey(key), seek_op))); + } + } + } + fn read_record_w_possible_overflow( &mut self, payload: &'static [u8], From 7a133f422fb981fe984c23aebd07c35e11282625 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Wed, 23 Apr 2025 16:56:45 +0300 Subject: [PATCH 340/425] btree: use binary search for index leaves --- core/storage/btree.rs | 169 +++++++++++++++++++++++++++--------------- 1 file changed, 110 insertions(+), 59 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 3359ac175..7672509b7 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1423,26 +1423,87 @@ impl BTreeCursor { let contents = page.get().contents.as_ref().unwrap(); let cell_count = contents.cell_count(); - let mut cell_idx: isize = if cell_iter_dir == IterationDirection::Forwards { - 0 - } else { - cell_count as isize - 1 - }; - let end = if cell_iter_dir == IterationDirection::Forwards { - cell_count as isize - 1 - } else { - 0 - }; - self.stack.set_cell_index(cell_idx as i32); - while cell_count > 0 - && (if cell_iter_dir == IterationDirection::Forwards { - cell_idx <= end - } else { - cell_idx >= end - }) - { + let mut min: isize = 0; + let mut max: isize = cell_count as isize - 1; + + // If iter dir is forwards, we want the first cell that matches; + // If iter dir is backwards, we want the last cell that matches. + let mut nearest_matching_cell = None; + loop { + if min > max { + let Some(nearest_matching_cell) = nearest_matching_cell else { + // We have now iterated over all cells in the leaf page and found no match. + // Unlike tables, indexes store payloads in interior cells as well. self.move_to() always moves to a leaf page, so there are cases where we need to + // move back up to the parent interior cell and get the next record from there to perform a correct seek. + // an example of how this can occur: + // + // we do an index seek for key K with cmp = SeekOp::GT, meaning we want to seek to the first key that is greater than K. + // in self.move_to(), we encounter an interior cell with key K' = K+2, and move the left child page, which is a leaf page. + // the reason we move to the left child page is that we know that in an index, all keys in the left child page are less than K' i.e. less than K+2, + // meaning that the left subtree may contain a key greater than K, e.g. K+1. however, it is possible that it doesn't, in which case the correct + // next key is K+2, which is in the parent interior cell. + // + // In the seek() method, once we have landed in the leaf page and find that there is no cell with a key greater than K, + // if we were to return Ok(CursorResult::Ok((None, None))), self.record would be None, which is incorrect, because we already know + // that there is a record with a key greater than K (K' = K+2) in the parent interior cell. Hence, we need to move back up the tree + // and get the next matching record from there. + match seek_op.iteration_direction() { + IterationDirection::Forwards => { + self.stack.set_cell_index(cell_count as i32); + return self.get_next_record(Some((SeekKey::IndexKey(key), seek_op))); + } + IterationDirection::Backwards => { + self.stack.set_cell_index(-1); + return self.get_prev_record(Some((SeekKey::IndexKey(key), seek_op))); + } + } + }; + let cell = contents.cell_get( + nearest_matching_cell as usize, + payload_overflow_threshold_max( + contents.page_type(), + self.usable_space() as u16, + ), + payload_overflow_threshold_min( + contents.page_type(), + self.usable_space() as u16, + ), + self.usable_space(), + )?; + + let BTreeCell::IndexLeafCell(IndexLeafCell { + payload, + first_overflow_page, + payload_size, + }) = &cell + else { + unreachable!("unexpected cell type: {:?}", cell); + }; + + if let Some(next_page) = first_overflow_page { + return_if_io!(self.process_overflow_read(payload, *next_page, *payload_size)) + } else { + crate::storage::sqlite3_ondisk::read_record( + payload, + self.get_immutable_record_or_create().as_mut().unwrap(), + )? + } + let record = self.get_immutable_record(); + let record = record.as_ref().unwrap(); + let rowid = match record.last_value() { + Some(RefValue::Integer(rowid)) => *rowid as u64, + _ => unreachable!("index cells should have an integer rowid"), + }; + self.stack.set_cell_index(nearest_matching_cell as i32); + self.stack.next_cell_in_direction(cell_iter_dir); + return Ok(CursorResult::Ok(Some(rowid))); + } + + let cur_cell_idx = (min + max) / 2; + self.stack.set_cell_index(cur_cell_idx as i32); + let cell = contents.cell_get( - cell_idx as usize, + cur_cell_idx as usize, payload_overflow_threshold_max(contents.page_type(), self.usable_space() as u16), payload_overflow_threshold_min(contents.page_type(), self.usable_space() as u16), self.usable_space(), @@ -1468,54 +1529,44 @@ impl BTreeCursor { let record = record.as_ref().unwrap(); let record_slice_equal_number_of_cols = &record.get_values().as_slice()[..key.get_values().len()]; - let order = compare_immutable( + let cmp = compare_immutable( record_slice_equal_number_of_cols, key.get_values(), self.index_key_sort_order, ); let found = match seek_op { - SeekOp::GT => order.is_gt(), - SeekOp::GE => order.is_ge(), - SeekOp::EQ => order.is_eq(), - SeekOp::LE => order.is_le(), - SeekOp::LT => order.is_lt(), + SeekOp::GT => cmp.is_gt(), + SeekOp::GE => cmp.is_ge(), + SeekOp::EQ => cmp.is_eq(), + SeekOp::LE => cmp.is_le(), + SeekOp::LT => cmp.is_lt(), }; - self.stack.next_cell_in_direction(cell_iter_dir); if found { - let rowid = match record.last_value() { - Some(RefValue::Integer(rowid)) => *rowid as u64, - _ => unreachable!("index cells should have an integer rowid"), - }; - return Ok(CursorResult::Ok(Some(rowid))); - } - if cell_iter_dir == IterationDirection::Forwards { - cell_idx += 1; + match cell_iter_dir { + IterationDirection::Forwards => { + nearest_matching_cell = Some(cur_cell_idx as usize); + max = cur_cell_idx - 1; + } + IterationDirection::Backwards => { + nearest_matching_cell = Some(cur_cell_idx as usize); + min = cur_cell_idx + 1; + } + } } else { - cell_idx -= 1; - } - } - - // We have now iterated over all cells in the leaf page and found no match. - // Unlike tables, indexes store payloads in interior cells as well. self.move_to() always moves to a leaf page, so there are cases where we need to - // move back up to the parent interior cell and get the next record from there to perform a correct seek. - // an example of how this can occur: - // - // we do an index seek for key K with cmp = SeekOp::GT, meaning we want to seek to the first key that is greater than K. - // in self.move_to(), we encounter an interior cell with key K' = K+2, and move the left child page, which is a leaf page. - // the reason we move to the left child page is that we know that in an index, all keys in the left child page are less than K' i.e. less than K+2, - // meaning that the left subtree may contain a key greater than K, e.g. K+1. however, it is possible that it doesn't, in which case the correct - // next key is K+2, which is in the parent interior cell. - // - // In the seek() method, once we have landed in the leaf page and find that there is no cell with a key greater than K, - // if we were to return Ok(CursorResult::Ok((None, None))), self.record would be None, which is incorrect, because we already know - // that there is a record with a key greater than K (K' = K+2) in the parent interior cell. Hence, we need to move back up the tree - // and get the next matching record from there. - match seek_op.iteration_direction() { - IterationDirection::Forwards => { - return self.get_next_record(Some((SeekKey::IndexKey(key), seek_op))); - } - IterationDirection::Backwards => { - return self.get_prev_record(Some((SeekKey::IndexKey(key), seek_op))); + if cmp.is_gt() { + max = cur_cell_idx - 1; + } else if cmp.is_lt() { + min = cur_cell_idx + 1; + } else { + match cell_iter_dir { + IterationDirection::Forwards => { + min = cur_cell_idx + 1; + } + IterationDirection::Backwards => { + max = cur_cell_idx - 1; + } + } + } } } } From 8c338438dd5598440082b9d6ad833d6d885c56b5 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Wed, 23 Apr 2025 17:33:45 +0300 Subject: [PATCH 341/425] btree: use binary search for index interior cell seek --- core/storage/btree.rs | 107 +++++++++++++++++++++++++++--------------- 1 file changed, 68 insertions(+), 39 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 7672509b7..e6e9c3dc7 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1137,26 +1137,79 @@ impl BTreeCursor { } /// Specialized version of move_to() for index btrees. - /// TODO: refactor this to use binary search instead of iterating cells in order. - fn indexbtree_move_to<'a>( + fn indexbtree_move_to( &mut self, - index_key: &'a ImmutableRecord, + index_key: &ImmutableRecord, cmp: SeekOp, iter_dir: IterationDirection, ) -> Result> { - loop { + 'outer: loop { let page = self.stack.top(); return_if_locked!(page); - let contents = page.get().contents.as_ref().unwrap(); if contents.is_leaf() { return Ok(CursorResult::Ok(())); } - let mut found_cell = false; - for cell_idx in 0..contents.cell_count() { + let cell_count = contents.cell_count(); + let mut min: isize = 0; + let mut max: isize = cell_count as isize - 1; + let mut leftmost_matching_cell = None; + loop { + if min > max { + let Some(leftmost_matching_cell) = leftmost_matching_cell else { + self.stack.set_cell_index(contents.cell_count() as i32 + 1); + match contents.rightmost_pointer() { + Some(right_most_pointer) => { + let mem_page = self.pager.read_page(right_most_pointer as usize)?; + self.stack.push(mem_page); + continue 'outer; + } + None => { + unreachable!( + "we shall not go back up! The only way is down the slope" + ); + } + } + }; + let matching_cell = contents.cell_get( + leftmost_matching_cell, + payload_overflow_threshold_max( + contents.page_type(), + self.usable_space() as u16, + ), + payload_overflow_threshold_min( + contents.page_type(), + self.usable_space() as u16, + ), + self.usable_space(), + )?; + self.stack.set_cell_index(leftmost_matching_cell as i32); + // we don't advance in case of forward iteration and index tree internal nodes because we will visit this node going up. + // in backwards iteration, we must retreat because otherwise we would unnecessarily visit this node again. + // Example: + // this parent: key 666, and we found the target key in the left child. + // left child has: key 663, key 664, key 665 + // we need to move to the previous parent (with e.g. key 662) when iterating backwards so that we don't end up back here again. + if iter_dir == IterationDirection::Backwards { + self.stack.retreat(); + } + let BTreeCell::IndexInteriorCell(IndexInteriorCell { + left_child_page, .. + }) = &matching_cell + else { + unreachable!("unexpected cell type: {:?}", matching_cell); + }; + + let mem_page = self.pager.read_page(*left_child_page as usize)?; + self.stack.push(mem_page); + continue 'outer; + } + + let cur_cell_idx = (min + max) / 2; + self.stack.set_cell_index(cur_cell_idx as i32); let cell = contents.cell_get( - cell_idx, + cur_cell_idx as usize, payload_overflow_threshold_max( contents.page_type(), self.usable_space() as u16, @@ -1168,10 +1221,10 @@ impl BTreeCursor { self.usable_space(), )?; let BTreeCell::IndexInteriorCell(IndexInteriorCell { - left_child_page, payload, - first_overflow_page, payload_size, + first_overflow_page, + .. }) = &cell else { unreachable!("unexpected cell type: {:?}", cell); @@ -1231,36 +1284,12 @@ impl BTreeCursor { SeekOp::LE => interior_cell_vs_index_key.is_gt(), SeekOp::LT => interior_cell_vs_index_key.is_ge(), }; - if target_leaf_page_is_in_left_subtree { - // we don't advance in case of forward iteration and index tree internal nodes because we will visit this node going up. - // in backwards iteration, we must retreat because otherwise we would unnecessarily visit this node again. - // Example: - // this parent: key 666, and we found the target key in the left child. - // left child has: key 663, key 664, key 665 - // we need to move to the previous parent (with e.g. key 662) when iterating backwards so that we don't end up back here again. - if iter_dir == IterationDirection::Backwards { - self.stack.retreat(); - } - let mem_page = self.pager.read_page(*left_child_page as usize)?; - self.stack.push(mem_page); - found_cell = true; - break; - } else { - self.stack.advance(); - } - } - if !found_cell { - match contents.rightmost_pointer() { - Some(right_most_pointer) => { - self.stack.advance(); - let mem_page = self.pager.read_page(right_most_pointer as usize)?; - self.stack.push(mem_page); - continue; - } - None => { - unreachable!("we shall not go back up! The only way is down the slope"); - } + if target_leaf_page_is_in_left_subtree { + leftmost_matching_cell = Some(cur_cell_idx as usize); + max = cur_cell_idx - 1; + } else { + min = cur_cell_idx + 1; } } } From 044339efc78a92c0d591bb8c862255d9e3346ab4 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Wed, 23 Apr 2025 17:35:22 +0300 Subject: [PATCH 342/425] btree: rename tablebtree_move_to_binsearch -> tablebtree_move_to --- core/storage/btree.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index e6e9c3dc7..bd10562a3 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1027,7 +1027,7 @@ impl BTreeCursor { } /// Specialized version of move_to() for table btrees. - fn tablebtree_move_to_binsearch( + fn tablebtree_move_to( &mut self, rowid: u64, seek_op: SeekOp, @@ -1305,13 +1305,13 @@ impl BTreeCursor { ) -> Result>> { assert!(self.mv_cursor.is_none()); self.move_to_root(); - return_if_io!(self.tablebtree_move_to_binsearch(rowid, seek_op, iter_dir)); + return_if_io!(self.tablebtree_move_to(rowid, seek_op, iter_dir)); let page = self.stack.top(); return_if_locked!(page); let contents = page.get().contents.as_ref().unwrap(); assert!( contents.is_leaf(), - "tablebtree_seek_binsearch() called on non-leaf page" + "tablebtree_seek() called on non-leaf page" ); let cell_count = contents.cell_count(); @@ -1648,7 +1648,7 @@ impl BTreeCursor { let iter_dir = cmp.iteration_direction(); match key { SeekKey::TableRowId(rowid_key) => { - return self.tablebtree_move_to_binsearch(rowid_key, cmp, iter_dir); + return self.tablebtree_move_to(rowid_key, cmp, iter_dir); } SeekKey::IndexKey(index_key) => { return self.indexbtree_move_to(index_key, cmp, iter_dir); From af703110f893381e54088e59f369dacda2cc5af0 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Wed, 23 Apr 2025 17:38:48 +0300 Subject: [PATCH 343/425] btree: remove extra iter_dir argument that can be derived from seek_op --- core/storage/btree.rs | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index bd10562a3..ad222ebf5 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -975,13 +975,12 @@ impl BTreeCursor { /// or e.g. find the first record greater than the seek key in a range query (e.g. SELECT * FROM table WHERE col > 10). /// We don't include the rowid in the comparison and that's why the last value from the record is not included. fn do_seek(&mut self, key: SeekKey<'_>, op: SeekOp) -> Result>> { - let cell_iter_dir = op.iteration_direction(); match key { SeekKey::TableRowId(rowid) => { - return self.tablebtree_seek(rowid, op, cell_iter_dir); + return self.tablebtree_seek(rowid, op); } SeekKey::IndexKey(index_key) => { - return self.indexbtree_seek(index_key, op, cell_iter_dir); + return self.indexbtree_seek(index_key, op); } } } @@ -1027,12 +1026,8 @@ impl BTreeCursor { } /// Specialized version of move_to() for table btrees. - fn tablebtree_move_to( - &mut self, - rowid: u64, - seek_op: SeekOp, - iter_dir: IterationDirection, - ) -> Result> { + fn tablebtree_move_to(&mut self, rowid: u64, seek_op: SeekOp) -> Result> { + let iter_dir = seek_op.iteration_direction(); 'outer: loop { let page = self.stack.top(); return_if_locked!(page); @@ -1141,8 +1136,8 @@ impl BTreeCursor { &mut self, index_key: &ImmutableRecord, cmp: SeekOp, - iter_dir: IterationDirection, ) -> Result> { + let iter_dir = cmp.iteration_direction(); 'outer: loop { let page = self.stack.top(); return_if_locked!(page); @@ -1301,11 +1296,10 @@ impl BTreeCursor { &mut self, rowid: u64, seek_op: SeekOp, - iter_dir: IterationDirection, ) -> Result>> { assert!(self.mv_cursor.is_none()); self.move_to_root(); - return_if_io!(self.tablebtree_move_to(rowid, seek_op, iter_dir)); + return_if_io!(self.tablebtree_move_to(rowid, seek_op)); let page = self.stack.top(); return_if_locked!(page); let contents = page.get().contents.as_ref().unwrap(); @@ -1313,6 +1307,7 @@ impl BTreeCursor { contents.is_leaf(), "tablebtree_seek() called on non-leaf page" ); + let iter_dir = seek_op.iteration_direction(); let cell_count = contents.cell_count(); let mut min: isize = 0; @@ -1441,10 +1436,9 @@ impl BTreeCursor { &mut self, key: &ImmutableRecord, seek_op: SeekOp, - cell_iter_dir: IterationDirection, ) -> Result>> { self.move_to_root(); - return_if_io!(self.indexbtree_move_to(key, seek_op, cell_iter_dir)); + return_if_io!(self.indexbtree_move_to(key, seek_op)); let page = self.stack.top(); return_if_locked!(page); @@ -1455,6 +1449,8 @@ impl BTreeCursor { let mut min: isize = 0; let mut max: isize = cell_count as isize - 1; + let iter_dir = seek_op.iteration_direction(); + // If iter dir is forwards, we want the first cell that matches; // If iter dir is backwards, we want the last cell that matches. let mut nearest_matching_cell = None; @@ -1524,7 +1520,7 @@ impl BTreeCursor { _ => unreachable!("index cells should have an integer rowid"), }; self.stack.set_cell_index(nearest_matching_cell as i32); - self.stack.next_cell_in_direction(cell_iter_dir); + self.stack.next_cell_in_direction(iter_dir); return Ok(CursorResult::Ok(Some(rowid))); } @@ -1571,7 +1567,7 @@ impl BTreeCursor { SeekOp::LT => cmp.is_lt(), }; if found { - match cell_iter_dir { + match iter_dir { IterationDirection::Forwards => { nearest_matching_cell = Some(cur_cell_idx as usize); max = cur_cell_idx - 1; @@ -1587,7 +1583,7 @@ impl BTreeCursor { } else if cmp.is_lt() { min = cur_cell_idx + 1; } else { - match cell_iter_dir { + match iter_dir { IterationDirection::Forwards => { min = cur_cell_idx + 1; } @@ -1645,13 +1641,12 @@ impl BTreeCursor { // 6. If we find the cell, we return the record. Otherwise, we return an empty result. self.move_to_root(); - let iter_dir = cmp.iteration_direction(); match key { SeekKey::TableRowId(rowid_key) => { - return self.tablebtree_move_to(rowid_key, cmp, iter_dir); + return self.tablebtree_move_to(rowid_key, cmp); } SeekKey::IndexKey(index_key) => { - return self.indexbtree_move_to(index_key, cmp, iter_dir); + return self.indexbtree_move_to(index_key, cmp); } } } From a7488496d501e260b4dedc5fa653953ec8abe90a Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Wed, 23 Apr 2025 18:10:33 +0300 Subject: [PATCH 344/425] expr.is_nonnull(): return true if col.primary_key || col.notnull --- core/translate/main_loop.rs | 2 +- core/translate/optimizer.rs | 46 +++++++++++++++++++++++++------------ 2 files changed, 32 insertions(+), 16 deletions(-) diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index c56680446..ff60bb305 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -977,7 +977,7 @@ fn emit_seek( // and if so, jump to the loop end. // This is to avoid returning rows for e.g. SELECT * FROM t WHERE t.x > NULL, // which would erroneously return all rows from t, as NULL is lower than any non-NULL value in index key comparisons. - if !expr.is_nonnull() { + if !expr.is_nonnull(tables) { program.emit_insn(Insn::IsNull { reg, target_pc: loop_end, diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 41e34418e..dc9dccaa6 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -502,7 +502,7 @@ pub trait Optimizable { .map_or(false, |c| c == ConstantPredicate::AlwaysFalse)) } fn is_rowid_alias_of(&self, table_index: usize) -> bool; - fn is_nonnull(&self) -> bool; + fn is_nonnull(&self, tables: &[TableReference]) -> bool; } impl Optimizable for ast::Expr { @@ -522,26 +522,28 @@ impl Optimizable for ast::Expr { /// This function is currently very conservative, and will return false /// for any expression where we aren't sure and didn't bother to find out /// by writing more complex code. - fn is_nonnull(&self) -> bool { + fn is_nonnull(&self, tables: &[TableReference]) -> bool { match self { Expr::Between { lhs, start, end, .. - } => lhs.is_nonnull() && start.is_nonnull() && end.is_nonnull(), - Expr::Binary(expr, _, expr1) => expr.is_nonnull() && expr1.is_nonnull(), + } => lhs.is_nonnull(tables) && start.is_nonnull(tables) && end.is_nonnull(tables), + Expr::Binary(expr, _, expr1) => expr.is_nonnull(tables) && expr1.is_nonnull(tables), Expr::Case { base, when_then_pairs, else_expr, .. } => { - base.as_ref().map_or(true, |base| base.is_nonnull()) - && when_then_pairs.iter().all(|(_, then)| then.is_nonnull()) + base.as_ref().map_or(true, |base| base.is_nonnull(tables)) + && when_then_pairs + .iter() + .all(|(_, then)| then.is_nonnull(tables)) && else_expr .as_ref() - .map_or(true, |else_expr| else_expr.is_nonnull()) + .map_or(true, |else_expr| else_expr.is_nonnull(tables)) } - Expr::Cast { expr, .. } => expr.is_nonnull(), - Expr::Collate(expr, _) => expr.is_nonnull(), + Expr::Cast { expr, .. } => expr.is_nonnull(tables), + Expr::Collate(expr, _) => expr.is_nonnull(tables), Expr::DoublyQualified(..) => { panic!("Do not call is_nonnull before DoublyQualified has been rewritten as Column") } @@ -549,18 +551,32 @@ impl Optimizable for ast::Expr { Expr::FunctionCall { .. } => false, Expr::FunctionCallStar { .. } => false, Expr::Id(..) => panic!("Do not call is_nonnull before Id has been rewritten as Column"), - Expr::Column { is_rowid_alias, .. } => *is_rowid_alias, + Expr::Column { + table, + column, + is_rowid_alias, + .. + } => { + if *is_rowid_alias { + return true; + } + + let table_ref = &tables[*table]; + let columns = table_ref.columns(); + let column = &columns[*column]; + return column.primary_key || column.notnull; + } Expr::RowId { .. } => true, Expr::InList { lhs, rhs, .. } => { - lhs.is_nonnull() + lhs.is_nonnull(tables) && rhs .as_ref() - .map_or(true, |rhs| rhs.iter().all(|rhs| rhs.is_nonnull())) + .map_or(true, |rhs| rhs.iter().all(|rhs| rhs.is_nonnull(tables))) } Expr::InSelect { .. } => false, Expr::InTable { .. } => false, Expr::IsNull(..) => true, - Expr::Like { lhs, rhs, .. } => lhs.is_nonnull() && rhs.is_nonnull(), + Expr::Like { lhs, rhs, .. } => lhs.is_nonnull(tables) && rhs.is_nonnull(tables), Expr::Literal(literal) => match literal { ast::Literal::Numeric(_) => true, ast::Literal::String(_) => true, @@ -573,13 +589,13 @@ impl Optimizable for ast::Expr { }, Expr::Name(..) => false, Expr::NotNull(..) => true, - Expr::Parenthesized(exprs) => exprs.iter().all(|expr| expr.is_nonnull()), + Expr::Parenthesized(exprs) => exprs.iter().all(|expr| expr.is_nonnull(tables)), Expr::Qualified(..) => { panic!("Do not call is_nonnull before Qualified has been rewritten as Column") } Expr::Raise(..) => false, Expr::Subquery(..) => false, - Expr::Unary(_, expr) => expr.is_nonnull(), + Expr::Unary(_, expr) => expr.is_nonnull(tables), Expr::Variable(..) => false, } } From b6036cc79d6a8699ceab9b08f98b27a1d31d4bd4 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Wed, 23 Apr 2025 16:44:13 -0300 Subject: [PATCH 345/425] Primary key constraint working --- core/translate/insert.rs | 66 ++++++++++++++++++++++++++++++++-------- core/vdbe/builder.rs | 3 ++ core/vdbe/execute.rs | 62 +++++++++++++++++++++++++++++++++++++ core/vdbe/explain.rs | 18 +++++++++-- core/vdbe/insn.rs | 8 +++++ 5 files changed, 142 insertions(+), 15 deletions(-) diff --git a/core/translate/insert.rs b/core/translate/insert.rs index 4ca7e6fca..5e8365383 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -283,19 +283,7 @@ pub fn translate_insert( } _ => (), } - // Create and insert the record - program.emit_insn(Insn::MakeRecord { - start_reg: column_registers_start, - count: num_cols, - dest_reg: record_register, - }); - program.emit_insn(Insn::Insert { - cursor: cursor_id, - key_reg: rowid_reg, - record_reg: record_register, - flag: 0, - }); for index_col_mapping in index_col_mappings.iter() { // find which cursor we opened earlier for this index let idx_cursor_id = idx_cursors @@ -332,6 +320,43 @@ pub fn translate_insert( dest_reg: record_reg, }); + let make_record_label = program.allocate_label(); + program.emit_insn(Insn::NoConflict { + cursor_id: idx_cursor_id, + target_pc: make_record_label, + record_reg: idx_start_reg, + num_regs: num_cols, + }); + let mut column_names = Vec::new(); + for (index, ..) in index_col_mapping.columns.iter() { + let name = btree_table + .columns + .get(*index) + .unwrap() + .name + .as_ref() + .expect("column name is None"); + column_names.push(format!("{}.{name}", btree_table.name)); + } + let column_names = + column_names + .into_iter() + .enumerate() + .fold(String::new(), |mut accum, (idx, name)| { + if idx % 2 == 1 { + accum.push(','); + } + accum.push_str(&name); + accum + }); + + program.emit_insn(Insn::Halt { + err_code: SQLITE_CONSTRAINT_PRIMARYKEY, + description: format!("{}.{}", table_name.0, column_names), + }); + + program.resolve_label(make_record_label, program.offset()); + // now do the actual index insertion using the unpacked registers program.emit_insn(Insn::IdxInsert { cursor_id: idx_cursor_id, @@ -342,6 +367,21 @@ pub fn translate_insert( flags: IdxInsertFlags::new(), }); } + + // Create and insert the record + program.emit_insn(Insn::MakeRecord { + start_reg: column_registers_start, + count: num_cols, + dest_reg: record_register, + }); + + program.emit_insn(Insn::Insert { + cursor: cursor_id, + key_reg: rowid_reg, + record_reg: record_register, + flag: 0, + }); + if inserting_multiple_rows { // For multiple rows, loop back program.emit_insn(Insn::Goto { @@ -472,7 +512,7 @@ fn resolve_columns_for_insert<'a>( /// Represents how a column in an index should be populated during an INSERT. /// Similar to ColumnMapping above but includes the index name, as well as multiple /// possible value indices for each. -#[derive(Default)] +#[derive(Debug, Default)] struct IndexColMapping { idx_name: String, columns: Vec<(usize, IndexColumn)>, diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index 05fdc4938..66b2143bb 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -440,6 +440,9 @@ impl ProgramBuilder { Insn::VFilter { pc_if_empty, .. } => { resolve(pc_if_empty, "VFilter"); } + Insn::NoConflict { target_pc, .. } => { + resolve(target_pc, "NoConflict"); + } _ => {} } } diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 1185a77b0..31a81e9b9 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -3,6 +3,7 @@ use crate::numeric::{NullableInteger, Numeric}; use crate::storage::database::FileMemoryStorage; use crate::storage::page_cache::DumbLruPageCache; use crate::storage::pager::CreateBTreeFlags; +use crate::types::ImmutableRecord; use crate::{ error::{LimboError, SQLITE_CONSTRAINT, SQLITE_CONSTRAINT_PRIMARYKEY}, ext::ExtValue, @@ -3895,6 +3896,67 @@ pub fn op_soft_null( Ok(InsnFunctionStepResult::Step) } +pub fn op_no_conflict( + program: &Program, + state: &mut ProgramState, + insn: &Insn, + pager: &Rc, + mv_store: Option<&Rc>, +) -> Result { + let Insn::NoConflict { + cursor_id, + target_pc, + record_reg, + num_regs, + } = insn + else { + unreachable!("unexpected Insn {:?}", insn) + }; + let found = { + let mut cursor = state.get_cursor(*cursor_id); + let cursor = cursor.as_btree_mut(); + + let any_fn = |record: &ImmutableRecord| { + for val in record.values.iter() { + if matches!(val, RefValue::Null) { + return false; + } + } + true + }; + + let record = if *num_regs == 0 { + let record = match &state.registers[*record_reg] { + Register::Record(r) => r, + _ => { + return Err(LimboError::InternalError( + "NoConflict: exepected a record in the register".into(), + )); + } + }; + record + } else { + &make_record(&state.registers, record_reg, num_regs) + }; + + // Should early return and jump if any of the values in the record is NULL + let found = any_fn(record); + if found { + return_if_io!(cursor.seek(SeekKey::IndexKey(record), SeekOp::EQ)) + } else { + found + } + }; + + if found { + state.pc += 1; + } else { + state.pc = target_pc.to_offset_int(); + } + + Ok(InsnFunctionStepResult::Step) +} + pub fn op_not_exists( program: &Program, state: &mut ProgramState, diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index 96afc5d17..eadb5a0d9 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -569,13 +569,13 @@ pub fn insn_to_str( ), Insn::Halt { err_code, - description: _, + description, } => ( "Halt", *err_code as i32, 0, 0, - OwnedValue::build_text(""), + OwnedValue::build_text(&description), 0, "".to_string(), ), @@ -1068,6 +1068,20 @@ pub fn insn_to_str( 0, "".to_string(), ), + Insn::NoConflict { + cursor_id, + target_pc, + record_reg, + num_regs, + } => ( + "NoConflict", + *cursor_id as i32, + target_pc.to_debug_int(), + *record_reg as i32, + OwnedValue::build_text(&format!("{num_regs}")), + 0, + format!("key=r[{}]", record_reg), + ), Insn::NotExists { cursor, rowid_reg, diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 56f44bd2b..633647c36 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -664,6 +664,13 @@ pub enum Insn { reg: usize, }, + NoConflict { + cursor_id: CursorID, // P1 index cursor + target_pc: BranchOffset, // P2 jump target + record_reg: usize, + num_regs: usize, + }, + NotExists { cursor: CursorID, rowid_reg: usize, @@ -922,6 +929,7 @@ impl Insn { Insn::NewRowid { .. } => execute::op_new_rowid, Insn::MustBeInt { .. } => execute::op_must_be_int, Insn::SoftNull { .. } => execute::op_soft_null, + Insn::NoConflict { .. } => execute::op_no_conflict, Insn::NotExists { .. } => execute::op_not_exists, Insn::OffsetLimit { .. } => execute::op_offset_limit, Insn::OpenWrite { .. } => execute::op_open_write, From 9dd1ced5adeed8281d1d5639ddc4a15737a9c824 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Wed, 23 Apr 2025 20:38:08 -0300 Subject: [PATCH 346/425] added tests --- core/translate/insert.rs | 4 +- testing/cli_tests/constraint.py | 369 ++++++++++++++++++++++++++++++++ 2 files changed, 371 insertions(+), 2 deletions(-) create mode 100644 testing/cli_tests/constraint.py diff --git a/core/translate/insert.rs b/core/translate/insert.rs index 5e8365383..235cc09ac 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -344,7 +344,7 @@ pub fn translate_insert( .enumerate() .fold(String::new(), |mut accum, (idx, name)| { if idx % 2 == 1 { - accum.push(','); + accum.push_str(", "); } accum.push_str(&name); accum @@ -352,7 +352,7 @@ pub fn translate_insert( program.emit_insn(Insn::Halt { err_code: SQLITE_CONSTRAINT_PRIMARYKEY, - description: format!("{}.{}", table_name.0, column_names), + description: column_names, }); program.resolve_label(make_record_label, program.offset()); diff --git a/testing/cli_tests/constraint.py b/testing/cli_tests/constraint.py new file mode 100644 index 000000000..a37a5b020 --- /dev/null +++ b/testing/cli_tests/constraint.py @@ -0,0 +1,369 @@ +#!/usr/bin/env python3 +import os +from faker import Faker +from faker.providers.lorem.en_US import Provider as P +from cli_tests.test_limbo_cli import TestLimboShell +from pydantic import BaseModel +from cli_tests import console +from enum import Enum +import random +import sqlite3 + +sqlite_flags = os.getenv("SQLITE_FLAGS", "-q").split(" ") + + +keywords = [ + "ABORT", + "ACTION", + "ADD", + "AFTER", + "ALL", + "ALTER", + "ALWAYS", + "ANALYZE", + "AND", + "AS", + "ASC", + "ATTACH", + "AUTOINCREMENT", + "BEFORE", + "BEGIN", + "BETWEEN", + "BY", + "CASCADE", + "CASE", + "CAST", + "CHECK", + "COLLATE", + "COLUMN", + "COMMIT", + "CONFLICT", + "CONSTRAINT", + "CREATE", + "CROSS", + "CURRENT", + "CURRENT_DATE", + "CURRENT_TIME", + "CURRENT_TIMESTAMP", + "DATABASE", + "DEFAULT", + "DEFERRABLE", + "DEFERRED", + "DELETE", + "DESC", + "DETACH", + "DISTINCT", + "DO", + "DROP", + "EACH", + "ELSE", + "END", + "ESCAPE", + "EXCEPT", + "EXCLUDE", + "EXCLUSIVE", + "EXISTS", + "EXPLAIN", + "FAIL", + "FILTER", + "FIRST", + "FOLLOWING", + "FOR", + "FOREIGN", + "FROM", + "FULL", + "GENERATED", + "GLOB", + "GROUP", + "GROUPS", + "HAVING", + "IF", + "IGNORE", + "IMMEDIATE", + "IN", + "INDEX", + "INDEXED", + "INITIALLY", + "INNER", + "INSERT", + "INSTEAD", + "INTERSECT", + "INTO", + "IS", + "ISNULL", + "JOIN", + "KEY", + "LAST", + "LEFT", + "LIKE", + "LIMIT", + "MATCH", + "MATERIALIZED", + "NATURAL", + "NO", + "NOT", + "NOTHING", + "NOTNULL", + "NULL", + "NULLS", + "OF", + "OFFSET", + "ON", + "OR", + "ORDER", + "OTHERS", + "OUTER", + "OVER", + "PARTITION", + "PLAN", + "PRAGMA", + "PRECEDING", + "PRIMARY", + "QUERY", + "RAISE", + "RANGE", + "RECURSIVE", + "REFERENCES", + "REGEXP", + "REINDEX", + "RELEASE", + "RENAME", + "REPLACE", + "RESTRICT", + "RETURNING", + "RIGHT", + "ROLLBACK", + "ROW", + "ROWS", + "SAVEPOINT", + "SELECT", + "SET", + "TABLE", + "TEMP", + "TEMPORARY", + "THEN", + "TIES", + "TO", + "TRANSACTION", + "TRIGGER", + "UNBOUNDED", + "UNION", + "UNIQUE", + "UPDATE", + "USING", + "VACUUM", + "VALUES", + "VIEW", + "VIRTUAL", + "WHEN", + "WHERE", + "WINDOW", + "WITH", + "WITHOUT", +] +P.word_list = tuple(word for word in P.word_list if word.upper() not in keywords) +del P +fake: Faker = Faker(locale="en_US").unique +Faker.seed(0) + + +class ColumnType(Enum): + blob = "blob" + integer = "integer" + real = "real" + text = "text" + + def generate(self, faker: Faker) -> str: + match self.value: + case "blob": + blob = sqlite3.Binary(faker.binary(length=4)).hex() + return f"x'{blob}'" + case "integer": + return str(faker.pyint()) + case "real": + return str(faker.pyfloat()) + case "text": + return f"'{faker.text(max_nb_chars=20)}'" + + def __str__(self) -> str: + return self.value.upper() + + +class Column(BaseModel): + name: str + col_type: ColumnType + primary_key: bool + + def generate(faker: Faker) -> "Column": + name = faker.word().replace(" ", "_") + return Column( + name=name, + col_type=Faker().enum(ColumnType), + primary_key=False, + ) + + def __str__(self) -> str: + return f"{self.name} {str(self.col_type)}" + + +class Table(BaseModel): + columns: list[Column] + name: str + + def create_table(self) -> str: + accum = f"CREATE TABLE {self.name} " + col_strings = [str(col) for col in self.columns] + + pk_columns = [col.name for col in self.columns if col.primary_key] + primary_key_stmt = "PRIMARY KEY (" + ", ".join(pk_columns) + ")" + col_strings.append(primary_key_stmt) + + accum = accum + "(" + ", ".join(col_strings) + ");" + + return accum + + def generate_insert(self) -> str: + vals = [col.col_type.generate(fake) for col in self.columns] + vals = ", ".join(vals) + + return f"INSERT INTO {self.name} VALUES ({vals});" + + +class ConstraintTest(BaseModel): + table: Table + db_path: str = "testing/constraint.db" + insert_stmts: list[str] + insert_errors: list[str] + + def run( + self, + limbo: TestLimboShell, + ): + big_stmt = [self.table.create_table()] + for insert_stmt in self.insert_stmts: + big_stmt.append(insert_stmt) + + limbo.run_test("Inserting values into table", "\n".join(big_stmt), "") + + for insert_stmt in self.insert_errors: + limbo.run_test_fn( + insert_stmt, + lambda val: "Runtime error: UNIQUE constraint failed" in val, + ) + limbo.run_test( + "Nothing was inserted after error", + f"SELECT count(*) from {self.table.name};", + str(len(self.insert_stmts)), + ) + + +def validate_with_expected(result: str, expected: str): + return (expected in result, expected) + + +def generate_test(col_amount: int, primary_keys: int) -> ConstraintTest: + assert col_amount >= primary_keys, "Cannot have more primary keys than columns" + cols: list[Column] = [] + for _ in range(col_amount): + cols.append(Column.generate(fake)) + + pk_cols = random.sample( + population=cols, + k=primary_keys, + ) + + for col in pk_cols: + for c in cols: + if col.name == c.name: + c.primary_key = True + + table = Table(columns=cols, name=fake.word()) + insert_stmts = [table.generate_insert() for _ in range(col_amount)] + return ConstraintTest( + table=table, insert_stmts=insert_stmts, insert_errors=insert_stmts + ) + + +def custom_test_1() -> ConstraintTest: + cols = [ + Column(name="id", col_type="integer", primary_key=True), + Column(name="username", col_type="text", primary_key=True), + ] + table = Table(columns=cols, name="users") + insert_stmts = [ + "INSERT INTO users VALUES (1, 'alice');", + "INSERT INTO users VALUES (2, 'bob');", + ] + return ConstraintTest( + table=table, insert_stmts=insert_stmts, insert_errors=insert_stmts + ) + + +def custom_test_2(limbo: TestLimboShell): + create = "CREATE TABLE users (id INT PRIMARY KEY, username TEXT);" + first_insert = "INSERT INTO users VALUES (1, 'alice');" + limbo.run_test("Create unique INT index", create + first_insert, "") + fail_insert = "INSERT INTO users VALUES (1, 'bob');" + limbo.run_test_fn( + fail_insert, + lambda val: "Runtime error: UNIQUE constraint failed" in val, + ) + + +def all_tests() -> list[ConstraintTest]: + tests: list[ConstraintTest] = [] + max_cols = 10 + + curr_fake = Faker() + for _ in range(25): + num_cols = curr_fake.pyint(1, max_cols) + test = generate_test(num_cols, curr_fake.pyint(1, num_cols)) + tests.append(test) + + tests.append(custom_test_1()) + return tests + + +def cleanup(db_fullpath: str): + wal_path = f"{db_fullpath}-wal" + shm_path = f"{db_fullpath}-shm" + paths = [db_fullpath, wal_path, shm_path] + for path in paths: + if os.path.exists(path): + os.remove(path) + + +def main(): + tests = all_tests() + for test in tests: + console.info(test.table) + db_path = test.db_path + try: + # Use with syntax to automatically close shell on error + with TestLimboShell("") as limbo: + limbo.execute_dot(f".open {db_path}") + test.run(limbo) + + except Exception as e: + console.error(f"Test FAILED: {e}") + console.debug(test.table.create_table(), test.insert_stmts) + cleanup(db_path) + exit(1) + # delete db after every compat test so we we have fresh db for next test + cleanup(db_path) + + db_path = "testing/constraint.db" + try: + with TestLimboShell("") as limbo: + limbo.execute_dot(f".open {db_path}") + custom_test_2(limbo) + except Exception as e: + console.error(f"Test FAILED: {e}") + cleanup(db_path) + exit(1) + cleanup(db_path) + console.info("All tests passed successfully.") + + +if __name__ == "__main__": + main() From 0a69ea0138415a347d2c33a8ed50545464f5d9ff Mon Sep 17 00:00:00 2001 From: Anton Harniakou Date: Wed, 23 Apr 2025 16:46:37 +0300 Subject: [PATCH 347/425] Support reading db page size using PRAGMA page_size --- core/translate/pragma.rs | 7 +++++++ core/vdbe/execute.rs | 1 - vendored/sqlite3-parser/src/parser/ast/mod.rs | 2 ++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/core/translate/pragma.rs b/core/translate/pragma.rs index 668c1f214..bd120ecb8 100644 --- a/core/translate/pragma.rs +++ b/core/translate/pragma.rs @@ -160,6 +160,9 @@ fn update_pragma( // getting here unreachable!(); } + PragmaName::PageSize => { + todo!("updating page_size is not yet implemented") + } } } @@ -257,6 +260,10 @@ fn query_pragma( }); program.emit_result_row(register, 1); } + PragmaName::PageSize => { + program.emit_int(database_header.lock().page_size.into(), register); + program.emit_result_row(register, 1); + } } Ok(()) diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index de871f54c..e6cba3769 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -1447,7 +1447,6 @@ pub fn op_result_row( values: &state.registers[*start_reg] as *const Register, count: *count, }; - state.result_row = Some(row); state.pc += 1; return Ok(InsnFunctionStepResult::Row); diff --git a/vendored/sqlite3-parser/src/parser/ast/mod.rs b/vendored/sqlite3-parser/src/parser/ast/mod.rs index 1aac9c2c4..d511ddaaf 100644 --- a/vendored/sqlite3-parser/src/parser/ast/mod.rs +++ b/vendored/sqlite3-parser/src/parser/ast/mod.rs @@ -1622,6 +1622,8 @@ pub enum PragmaName { LegacyFileFormat, /// Return the total number of pages in the database file. PageCount, + // `page_size` pragma + PageSize, /// returns information about the columns of a table TableInfo, /// Returns the user version of the database file. From f53448ae75517b817fe7ef0d5702841952a4b1ec Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sat, 19 Apr 2025 20:30:54 +0300 Subject: [PATCH 348/425] Fix bug: we cant remove order by terms from the head of the list --- core/translate/optimizer.rs | 75 +++++++++++++------------------------ core/types.rs | 8 ++++ core/vdbe/sorter.rs | 32 +++++----------- testing/groupby.test | 7 ++++ 4 files changed, 51 insertions(+), 71 deletions(-) diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index dc9dccaa6..53bbcac1e 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -111,59 +111,36 @@ fn eliminate_orderby_like_groupby(plan: &mut SelectPlan) -> Result<()> { } let order_by_clauses = plan.order_by.as_mut().unwrap(); + // TODO: let's make the group by sorter aware of the order by directions so we dont need to skip + // descending terms. + if order_by_clauses + .iter() + .any(|(_, dir)| matches!(dir, Direction::Descending)) + { + return Ok(()); + } let group_by_clauses = plan.group_by.as_mut().unwrap(); - - let mut group_by_insert_position = 0; - let mut order_index = 0; - - // This function optimizes query execution by eliminating duplicate expressions between ORDER BY and GROUP BY clauses - // When the same column appears in both clauses, we can avoid redundant sorting operations - // The function reorders GROUP BY expressions and removes redundant ORDER BY expressions to ensure consistent ordering - while order_index < order_by_clauses.len() { - let (order_expr, direction) = &order_by_clauses[order_index]; - - // Skip descending orders as they require separate sorting - if matches!(direction, Direction::Descending) { - order_index += 1; - continue; - } - - // Check if the current ORDER BY expression matches any expression in the GROUP BY clause - if let Some(group_expr_position) = group_by_clauses + // all order by terms must be in the group by clause for order by to be eliminated + if !order_by_clauses.iter().all(|(o_expr, _)| { + group_by_clauses .exprs .iter() - .position(|expr| exprs_are_equivalent(expr, order_expr)) - { - // If we found a matching expression in GROUP BY, we need to ensure it's in the correct position - // to preserve the ordering specified by ORDER BY clauses - - // Move the matching GROUP BY expression to the current insertion position - // This effectively "bubbles up" the expression to maintain proper ordering - if group_expr_position != group_by_insert_position { - let mut current_position = group_expr_position; - - // Swap expressions to move the matching one to the correct position - while current_position > group_by_insert_position { - group_by_clauses - .exprs - .swap(current_position, current_position - 1); - current_position -= 1; - } - } - - group_by_insert_position += 1; - - // Remove this expression from ORDER BY since it's now handled by GROUP BY - order_by_clauses.remove(order_index); - // Note: We don't increment order_index here because removal shifts all elements - } else { - // If not found in GROUP BY, move to next ORDER BY expression - order_index += 1; - } - } - if order_by_clauses.is_empty() { - plan.order_by = None + .any(|g_expr| exprs_are_equivalent(g_expr, o_expr)) + }) { + return Ok(()); } + + // reorder group by terms so that they match the order by terms + // this way the group by sorter will effectively do the order by sorter's job and + // we can remove the order by clause + group_by_clauses.exprs.sort_by_key(|g_expr| { + order_by_clauses + .iter() + .position(|(o_expr, _)| exprs_are_equivalent(o_expr, g_expr)) + .unwrap_or(usize::MAX) + }); + + plan.order_by = None; Ok(()) } diff --git a/core/types.rs b/core/types.rs index 045f13393..b73bc0e50 100644 --- a/core/types.rs +++ b/core/types.rs @@ -1069,6 +1069,14 @@ impl IndexKeySortOrder { IndexKeySortOrder(spec) } + pub fn from_bool_vec(order: Vec) -> Self { + let mut spec = 0; + for (i, &is_asc) in order.iter().enumerate() { + spec |= (!is_asc as u64) << i; + } + IndexKeySortOrder(spec) + } + pub fn default() -> Self { Self(0) } diff --git a/core/vdbe/sorter.rs b/core/vdbe/sorter.rs index 584a29271..c79db02b9 100644 --- a/core/vdbe/sorter.rs +++ b/core/vdbe/sorter.rs @@ -1,10 +1,10 @@ -use crate::types::ImmutableRecord; -use std::cmp::Ordering; +use crate::types::{compare_immutable, ImmutableRecord, IndexKeySortOrder}; pub struct Sorter { records: Vec, current: Option, - order: Vec, + order: IndexKeySortOrder, + key_len: usize, } impl Sorter { @@ -12,7 +12,8 @@ impl Sorter { Self { records: Vec::new(), current: None, - order, + key_len: order.len(), + order: IndexKeySortOrder::from_bool_vec(order), } } pub fn is_empty(&self) -> bool { @@ -26,24 +27,11 @@ impl Sorter { // We do the sorting here since this is what is called by the SorterSort instruction pub fn sort(&mut self) { self.records.sort_by(|a, b| { - let cmp_by_idx = |idx: usize, ascending: bool| { - let a = &a.get_value(idx); - let b = &b.get_value(idx); - if ascending { - a.cmp(b) - } else { - b.cmp(a) - } - }; - - let mut cmp_ret = Ordering::Equal; - for (idx, &is_asc) in self.order.iter().enumerate() { - cmp_ret = cmp_by_idx(idx, is_asc); - if cmp_ret != Ordering::Equal { - break; - } - } - cmp_ret + compare_immutable( + &a.values[..self.key_len], + &b.values[..self.key_len], + self.order, + ) }); self.records.reverse(); self.next() diff --git a/testing/groupby.test b/testing/groupby.test index 9fd6e51bf..9fce2e83e 100644 --- a/testing/groupby.test +++ b/testing/groupby.test @@ -185,3 +185,10 @@ William|111} do_execsql_test group_by_column_number { select u.first_name, count(1) from users u group by 1 limit 1; } {Aaron|41} + +# There was a regression where we incorrectly removed SOME order by terms and left others in place, which is invalid and results in wrong rows being returned. +do_execsql_test groupby_orderby_removal_regression_test { + select id, last_name, count(1) from users GROUP BY 1,2 order by id, last_name desc limit 3; +} {1|Foster|1 +2|Salazar|1 +3|Perry|1} From 3798b4aa8bdc0dd2245e86ea531a27b22b47b53c Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Mon, 21 Apr 2025 14:58:50 +0300 Subject: [PATCH 349/425] use SortOrder in sorters always --- core/translate/group_by.rs | 10 ++-------- core/translate/index.rs | 15 ++------------- core/translate/optimizer.rs | 24 +++++++++++------------- core/translate/order_by.rs | 15 +++++---------- core/translate/plan.rs | 32 +++++++++++++------------------- core/translate/select.rs | 10 ++-------- core/translate/update.rs | 15 ++------------- core/types.rs | 6 +++--- core/vdbe/execute.rs | 8 -------- core/vdbe/explain.rs | 13 ++++--------- core/vdbe/insn.rs | 8 ++++---- core/vdbe/sorter.rs | 6 ++++-- 12 files changed, 52 insertions(+), 110 deletions(-) diff --git a/core/translate/group_by.rs b/core/translate/group_by.rs index 68f732cbb..9b09fca04 100644 --- a/core/translate/group_by.rs +++ b/core/translate/group_by.rs @@ -1,11 +1,10 @@ use std::rc::Rc; -use limbo_sqlite3_parser::ast; +use limbo_sqlite3_parser::ast::{self, SortOrder}; use crate::{ function::AggFunc, schema::{Column, PseudoTable}, - types::{OwnedValue, Record}, util::exprs_are_equivalent, vdbe::{ builder::{CursorType, ProgramBuilder}, @@ -74,15 +73,10 @@ pub fn init_group_by( let label_subrtn_acc_clear = program.allocate_label(); - let mut order = Vec::new(); - const ASCENDING: i64 = 0; - for _ in group_by.exprs.iter() { - order.push(OwnedValue::Integer(ASCENDING)); - } program.emit_insn(Insn::SorterOpen { cursor_id: sort_cursor, columns: non_aggregate_count + plan.aggregates.len(), - order: Record::new(order), + order: (0..group_by.exprs.len()).map(|_| SortOrder::Asc).collect(), }); program.add_comment(program.offset(), "clear group by abort flag"); diff --git a/core/translate/index.rs b/core/translate/index.rs index 55222e40f..bbf9a6b3b 100644 --- a/core/translate/index.rs +++ b/core/translate/index.rs @@ -3,13 +3,11 @@ use std::sync::Arc; use crate::{ schema::{BTreeTable, Column, Index, IndexColumn, PseudoTable, Schema}, storage::pager::CreateBTreeFlags, - types::Record, util::normalize_ident, vdbe::{ builder::{CursorType, ProgramBuilder, QueryMode}, insn::{IdxInsertFlags, Insn, RegisterOrLiteral}, }, - OwnedValue, }; use limbo_sqlite3_parser::ast::{self, Expr, Id, SortOrder, SortedColumn}; @@ -114,21 +112,12 @@ pub fn translate_create_index( ); // determine the order of the columns in the index for the sorter - let order = idx - .columns - .iter() - .map(|c| { - OwnedValue::Integer(match c.order { - SortOrder::Asc => 0, - SortOrder::Desc => 1, - }) - }) - .collect(); + let order = idx.columns.iter().map(|c| c.order.clone()).collect(); // open the sorter and the pseudo table program.emit_insn(Insn::SorterOpen { cursor_id: sorter_cursor_id, columns: columns.len(), - order: Record::new(order), + order, }); let content_reg = program.alloc_register(); program.emit_insn(Insn::OpenPseudo { diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 53bbcac1e..d2992803c 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -12,8 +12,8 @@ use crate::{ use super::{ plan::{ - DeletePlan, Direction, EvalAt, GroupBy, IterationDirection, Operation, Plan, Search, - SeekDef, SeekKey, SelectPlan, TableReference, UpdatePlan, WhereTerm, + DeletePlan, EvalAt, GroupBy, IterationDirection, Operation, Plan, Search, SeekDef, SeekKey, + SelectPlan, TableReference, UpdatePlan, WhereTerm, }, planner::determine_where_to_eval_expr, }; @@ -115,7 +115,7 @@ fn eliminate_orderby_like_groupby(plan: &mut SelectPlan) -> Result<()> { // descending terms. if order_by_clauses .iter() - .any(|(_, dir)| matches!(dir, Direction::Descending)) + .any(|(_, dir)| matches!(dir, SortOrder::Desc)) { return Ok(()); } @@ -149,7 +149,7 @@ fn eliminate_orderby_like_groupby(plan: &mut SelectPlan) -> Result<()> { fn eliminate_unnecessary_orderby( table_references: &mut [TableReference], available_indexes: &HashMap>>, - order_by: &mut Option>, + order_by: &mut Option>, group_by: &Option, ) -> Result { let Some(order) = order_by else { @@ -182,8 +182,8 @@ fn eliminate_unnecessary_orderby( // Special case: if ordering by just the rowid, we can remove the ORDER BY clause if order.len() == 1 && order[0].0.is_rowid_alias_of(0) { *iter_dir = match order[0].1 { - Direction::Ascending => IterationDirection::Forwards, - Direction::Descending => IterationDirection::Backwards, + SortOrder::Asc => IterationDirection::Forwards, + SortOrder::Desc => IterationDirection::Backwards, }; *order_by = None; return Ok(true); @@ -224,10 +224,10 @@ fn eliminate_unnecessary_orderby( // If they don't, we must iterate the index in backwards order. let index_direction = &matching_index.columns.first().as_ref().unwrap().order; *iter_dir = match (index_direction, order[0].1) { - (SortOrder::Asc, Direction::Ascending) | (SortOrder::Desc, Direction::Descending) => { + (SortOrder::Asc, SortOrder::Asc) | (SortOrder::Desc, SortOrder::Desc) => { IterationDirection::Forwards } - (SortOrder::Asc, Direction::Descending) | (SortOrder::Desc, Direction::Ascending) => { + (SortOrder::Asc, SortOrder::Desc) | (SortOrder::Desc, SortOrder::Asc) => { IterationDirection::Backwards } }; @@ -242,12 +242,10 @@ fn eliminate_unnecessary_orderby( let mut all_match_reverse = true; for (i, (_, direction)) in order.iter().enumerate() { match (&matching_index.columns[i].order, direction) { - (SortOrder::Asc, Direction::Ascending) - | (SortOrder::Desc, Direction::Descending) => { + (SortOrder::Asc, SortOrder::Asc) | (SortOrder::Desc, SortOrder::Desc) => { all_match_reverse = false; } - (SortOrder::Asc, Direction::Descending) - | (SortOrder::Desc, Direction::Ascending) => { + (SortOrder::Asc, SortOrder::Desc) | (SortOrder::Desc, SortOrder::Asc) => { all_match_forward = false; } } @@ -275,7 +273,7 @@ fn use_indexes( table_references: &mut [TableReference], available_indexes: &HashMap>>, where_clause: &mut Vec, - order_by: &mut Option>, + order_by: &mut Option>, group_by: &Option, ) -> Result<()> { // Try to use indexes for eliminating ORDER BY clauses diff --git a/core/translate/order_by.rs b/core/translate/order_by.rs index 9793afdc9..7fb570a94 100644 --- a/core/translate/order_by.rs +++ b/core/translate/order_by.rs @@ -1,10 +1,9 @@ use std::rc::Rc; -use limbo_sqlite3_parser::ast; +use limbo_sqlite3_parser::ast::{self, SortOrder}; use crate::{ schema::{Column, PseudoTable}, - types::{OwnedValue, Record}, util::exprs_are_equivalent, vdbe::{ builder::{CursorType, ProgramBuilder}, @@ -16,7 +15,7 @@ use crate::{ use super::{ emitter::TranslateCtx, expr::translate_expr, - plan::{Direction, ResultSetColumn, SelectPlan}, + plan::{ResultSetColumn, SelectPlan}, result_row::{emit_offset, emit_result_row_and_limit}, }; @@ -33,21 +32,17 @@ pub struct SortMetadata { pub fn init_order_by( program: &mut ProgramBuilder, t_ctx: &mut TranslateCtx, - order_by: &[(ast::Expr, Direction)], + order_by: &[(ast::Expr, SortOrder)], ) -> Result<()> { let sort_cursor = program.alloc_cursor_id(None, CursorType::Sorter); t_ctx.meta_sort = Some(SortMetadata { sort_cursor, reg_sorter_data: program.alloc_register(), }); - let mut order = Vec::new(); - for (_, direction) in order_by.iter() { - order.push(OwnedValue::Integer(*direction as i64)); - } program.emit_insn(Insn::SorterOpen { cursor_id: sort_cursor, columns: order_by.len(), - order: Record::new(order), + order: order_by.iter().map(|(_, direction)| *direction).collect(), }); Ok(()) } @@ -258,7 +253,7 @@ pub fn sorter_insert( /// /// If any result columns can be skipped, this returns list of 2-tuples of (SkippedResultColumnIndex: usize, ResultColumnIndexInOrderBySorter: usize) pub fn order_by_deduplicate_result_columns( - order_by: &[(ast::Expr, Direction)], + order_by: &[(ast::Expr, SortOrder)], result_columns: &[ResultSetColumn], ) -> Option> { let mut result_column_remapping: Option> = None; diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 44a43f73a..4eedd65be 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -267,7 +267,7 @@ pub struct SelectPlan { /// group by clause pub group_by: Option, /// order by clause - pub order_by: Option>, + pub order_by: Option>, /// all the aggregates collected from the result columns, order by, and (TODO) having clauses pub aggregates: Vec, /// limit clause @@ -290,7 +290,7 @@ pub struct DeletePlan { /// where clause split into a vec at 'AND' boundaries. pub where_clause: Vec, /// order by clause - pub order_by: Option>, + pub order_by: Option>, /// limit clause pub limit: Option, /// offset clause @@ -306,7 +306,7 @@ pub struct UpdatePlan { // (colum index, new value) pairs pub set_clauses: Vec<(usize, ast::Expr)>, pub where_clause: Vec, - pub order_by: Option>, + pub order_by: Option>, pub limit: Option, pub offset: Option, // TODO: optional RETURNING clause @@ -678,21 +678,6 @@ pub enum Search { }, } -#[derive(Clone, Copy, Debug, PartialEq)] -pub enum Direction { - Ascending, - Descending, -} - -impl Display for Direction { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - match self { - Direction::Ascending => write!(f, "ASC"), - Direction::Descending => write!(f, "DESC"), - } - } -} - #[derive(Clone, Debug, PartialEq)] pub struct Aggregate { pub func: AggFunc, @@ -870,7 +855,16 @@ impl fmt::Display for UpdatePlan { if let Some(order_by) = &self.order_by { writeln!(f, "ORDER BY:")?; for (expr, dir) in order_by { - writeln!(f, " - {} {}", expr, dir)?; + writeln!( + f, + " - {} {}", + expr, + if *dir == SortOrder::Asc { + "ASC" + } else { + "DESC" + } + )?; } } if let Some(limit) = self.limit { diff --git a/core/translate/select.rs b/core/translate/select.rs index 3972bdc85..b1eb613bb 100644 --- a/core/translate/select.rs +++ b/core/translate/select.rs @@ -3,7 +3,7 @@ use super::plan::{select_star, Operation, Search, SelectQueryType}; use super::planner::Scope; use crate::function::{AggFunc, ExtFunc, Func}; use crate::translate::optimizer::optimize_plan; -use crate::translate::plan::{Aggregate, Direction, GroupBy, Plan, ResultSetColumn, SelectPlan}; +use crate::translate::plan::{Aggregate, GroupBy, Plan, ResultSetColumn, SelectPlan}; use crate::translate::planner::{ bind_column_references, break_predicate_at_and_boundaries, parse_from, parse_limit, parse_where, resolve_aggregates, @@ -368,13 +368,7 @@ pub fn prepare_select_plan<'a>( )?; resolve_aggregates(&o.expr, &mut plan.aggregates); - key.push(( - o.expr, - o.order.map_or(Direction::Ascending, |o| match o { - ast::SortOrder::Asc => Direction::Ascending, - ast::SortOrder::Desc => Direction::Descending, - }), - )); + key.push((o.expr, o.order.unwrap_or(ast::SortOrder::Asc))); } plan.order_by = Some(key); } diff --git a/core/translate/update.rs b/core/translate/update.rs index a0e32e640..74b27c600 100644 --- a/core/translate/update.rs +++ b/core/translate/update.rs @@ -11,8 +11,7 @@ use limbo_sqlite3_parser::ast::{self, Expr, ResultColumn, SortOrder, Update}; use super::emitter::emit_program; use super::optimizer::optimize_plan; use super::plan::{ - ColumnUsedMask, Direction, IterationDirection, Plan, ResultSetColumn, TableReference, - UpdatePlan, + ColumnUsedMask, IterationDirection, Plan, ResultSetColumn, TableReference, UpdatePlan, }; use super::planner::bind_column_references; use super::planner::{parse_limit, parse_where}; @@ -155,17 +154,7 @@ pub fn prepare_update_plan(schema: &Schema, body: &mut Update) -> crate::Result< let order_by = body.order_by.as_ref().map(|order| { order .iter() - .map(|o| { - ( - o.expr.clone(), - o.order - .map(|s| match s { - SortOrder::Asc => Direction::Ascending, - SortOrder::Desc => Direction::Descending, - }) - .unwrap_or(Direction::Ascending), - ) - }) + .map(|o| (o.expr.clone(), o.order.unwrap_or(SortOrder::Asc))) .collect() }); // Parse the WHERE clause diff --git a/core/types.rs b/core/types.rs index b73bc0e50..4ce40d2f7 100644 --- a/core/types.rs +++ b/core/types.rs @@ -1069,10 +1069,10 @@ impl IndexKeySortOrder { IndexKeySortOrder(spec) } - pub fn from_bool_vec(order: Vec) -> Self { + pub fn from_list(order: &[SortOrder]) -> Self { let mut spec = 0; - for (i, &is_asc) in order.iter().enumerate() { - spec |= (!is_asc as u64) << i; + for (i, order) in order.iter().enumerate() { + spec |= ((*order == SortOrder::Desc) as u64) << i; } IndexKeySortOrder(spec) } diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 1185a77b0..9d52cc992 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -2693,14 +2693,6 @@ pub fn op_sorter_open( else { unreachable!("unexpected Insn {:?}", insn) }; - let order = order - .get_values() - .iter() - .map(|v| match v { - OwnedValue::Integer(i) => *i == 0, - _ => unreachable!(), - }) - .collect(); let cursor = Sorter::new(order); let mut cursors = state.cursors.borrow_mut(); cursors diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index 96afc5d17..3e968e93e 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -1,3 +1,5 @@ +use limbo_sqlite3_parser::ast::SortOrder; + use crate::vdbe::{builder::CursorType, insn::RegisterOrLiteral}; use super::{Insn, InsnReference, OwnedValue, Program}; @@ -876,17 +878,10 @@ pub fn insn_to_str( } => { let _p4 = String::new(); let to_print: Vec = order - .get_values() .iter() .map(|v| match v { - OwnedValue::Integer(i) => { - if *i == 0 { - "B".to_string() - } else { - "-B".to_string() - } - } - _ => unreachable!(), + SortOrder::Asc => "B".to_string(), + SortOrder::Desc => "-B".to_string(), }) .collect(); ( diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 56f44bd2b..c95adcc10 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -7,9 +7,9 @@ use super::{execute, AggFunc, BranchOffset, CursorID, FuncCtx, InsnFunction, Pag use crate::{ schema::BTreeTable, storage::{pager::CreateBTreeFlags, wal::CheckpointMode}, - types::Record, }; use limbo_macros::Description; +use limbo_sqlite3_parser::ast::SortOrder; /// Flags provided to comparison instructions (e.g. Eq, Ne) which determine behavior related to NULL values. #[derive(Clone, Copy, Debug, Default)] @@ -586,9 +586,9 @@ pub enum Insn { /// Open a sorter. SorterOpen { - cursor_id: CursorID, // P1 - columns: usize, // P2 - order: Record, // P4. 0 if ASC and 1 if DESC + cursor_id: CursorID, // P1 + columns: usize, // P2 + order: Vec, // P4. }, /// Insert a row into the sorter. diff --git a/core/vdbe/sorter.rs b/core/vdbe/sorter.rs index c79db02b9..d758f91f5 100644 --- a/core/vdbe/sorter.rs +++ b/core/vdbe/sorter.rs @@ -1,3 +1,5 @@ +use limbo_sqlite3_parser::ast::SortOrder; + use crate::types::{compare_immutable, ImmutableRecord, IndexKeySortOrder}; pub struct Sorter { @@ -8,12 +10,12 @@ pub struct Sorter { } impl Sorter { - pub fn new(order: Vec) -> Self { + pub fn new(order: &[SortOrder]) -> Self { Self { records: Vec::new(), current: None, key_len: order.len(), - order: IndexKeySortOrder::from_bool_vec(order), + order: IndexKeySortOrder::from_list(order), } } pub fn is_empty(&self) -> bool { From 51fc1773ea82351c4392c6d98f3aee6fee0d6e64 Mon Sep 17 00:00:00 2001 From: Anton Harniakou Date: Thu, 24 Apr 2025 10:36:23 +0300 Subject: [PATCH 350/425] Fix missing documentation warning; improve the documentation message --- vendored/sqlite3-parser/src/parser/ast/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vendored/sqlite3-parser/src/parser/ast/mod.rs b/vendored/sqlite3-parser/src/parser/ast/mod.rs index d511ddaaf..74da5b647 100644 --- a/vendored/sqlite3-parser/src/parser/ast/mod.rs +++ b/vendored/sqlite3-parser/src/parser/ast/mod.rs @@ -1622,7 +1622,7 @@ pub enum PragmaName { LegacyFileFormat, /// Return the total number of pages in the database file. PageCount, - // `page_size` pragma + /// Return the page size of the database in bytes. PageSize, /// returns information about the columns of a table TableInfo, From b36c898842bbf71013b7a4685dbe0c951f7dddff Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Thu, 17 Apr 2025 15:00:16 +0300 Subject: [PATCH 351/425] rename check_constant() to less confusing name --- core/translate/optimizer.rs | 80 +++++++++++++++++++------------------ 1 file changed, 41 insertions(+), 39 deletions(-) diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index dc9dccaa6..814958392 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -479,7 +479,7 @@ fn rewrite_exprs_update(plan: &mut UpdatePlan) -> Result<()> { } #[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum ConstantPredicate { +pub enum AlwaysTrueOrFalse { AlwaysTrue, AlwaysFalse, } @@ -489,17 +489,18 @@ pub enum ConstantPredicate { Implemented for ast::Expr */ pub trait Optimizable { - // if the expression is a constant expression e.g. '1', returns the constant condition - fn check_constant(&self) -> Result>; + // if the expression is a constant expression that, when evaluated as a condition, is always true or false + // return a [ConstantPredicate]. + fn check_always_true_or_false(&self) -> Result>; fn is_always_true(&self) -> Result { Ok(self - .check_constant()? - .map_or(false, |c| c == ConstantPredicate::AlwaysTrue)) + .check_always_true_or_false()? + .map_or(false, |c| c == AlwaysTrueOrFalse::AlwaysTrue)) } fn is_always_false(&self) -> Result { Ok(self - .check_constant()? - .map_or(false, |c| c == ConstantPredicate::AlwaysFalse)) + .check_always_true_or_false()? + .map_or(false, |c| c == AlwaysTrueOrFalse::AlwaysFalse)) } fn is_rowid_alias_of(&self, table_index: usize) -> bool; fn is_nonnull(&self, tables: &[TableReference]) -> bool; @@ -599,22 +600,23 @@ impl Optimizable for ast::Expr { Expr::Variable(..) => false, } } - fn check_constant(&self) -> Result> { + /// Returns true if the expression is a constant expression that, when evaluated as a condition, is always true or false + fn check_always_true_or_false(&self) -> Result> { match self { Self::Literal(lit) => match lit { ast::Literal::Numeric(b) => { if let Ok(int_value) = b.parse::() { return Ok(Some(if int_value == 0 { - ConstantPredicate::AlwaysFalse + AlwaysTrueOrFalse::AlwaysFalse } else { - ConstantPredicate::AlwaysTrue + AlwaysTrueOrFalse::AlwaysTrue })); } if let Ok(float_value) = b.parse::() { return Ok(Some(if float_value == 0.0 { - ConstantPredicate::AlwaysFalse + AlwaysTrueOrFalse::AlwaysFalse } else { - ConstantPredicate::AlwaysTrue + AlwaysTrueOrFalse::AlwaysTrue })); } @@ -624,35 +626,35 @@ impl Optimizable for ast::Expr { let without_quotes = s.trim_matches('\''); if let Ok(int_value) = without_quotes.parse::() { return Ok(Some(if int_value == 0 { - ConstantPredicate::AlwaysFalse + AlwaysTrueOrFalse::AlwaysFalse } else { - ConstantPredicate::AlwaysTrue + AlwaysTrueOrFalse::AlwaysTrue })); } if let Ok(float_value) = without_quotes.parse::() { return Ok(Some(if float_value == 0.0 { - ConstantPredicate::AlwaysFalse + AlwaysTrueOrFalse::AlwaysFalse } else { - ConstantPredicate::AlwaysTrue + AlwaysTrueOrFalse::AlwaysTrue })); } - Ok(Some(ConstantPredicate::AlwaysFalse)) + Ok(Some(AlwaysTrueOrFalse::AlwaysFalse)) } _ => Ok(None), }, Self::Unary(op, expr) => { if *op == ast::UnaryOperator::Not { - let trivial = expr.check_constant()?; + let trivial = expr.check_always_true_or_false()?; return Ok(trivial.map(|t| match t { - ConstantPredicate::AlwaysTrue => ConstantPredicate::AlwaysFalse, - ConstantPredicate::AlwaysFalse => ConstantPredicate::AlwaysTrue, + AlwaysTrueOrFalse::AlwaysTrue => AlwaysTrueOrFalse::AlwaysFalse, + AlwaysTrueOrFalse::AlwaysFalse => AlwaysTrueOrFalse::AlwaysTrue, })); } if *op == ast::UnaryOperator::Negative { - let trivial = expr.check_constant()?; + let trivial = expr.check_always_true_or_false()?; return Ok(trivial); } @@ -661,50 +663,50 @@ impl Optimizable for ast::Expr { Self::InList { lhs: _, not, rhs } => { if rhs.is_none() { return Ok(Some(if *not { - ConstantPredicate::AlwaysTrue + AlwaysTrueOrFalse::AlwaysTrue } else { - ConstantPredicate::AlwaysFalse + AlwaysTrueOrFalse::AlwaysFalse })); } let rhs = rhs.as_ref().unwrap(); if rhs.is_empty() { return Ok(Some(if *not { - ConstantPredicate::AlwaysTrue + AlwaysTrueOrFalse::AlwaysTrue } else { - ConstantPredicate::AlwaysFalse + AlwaysTrueOrFalse::AlwaysFalse })); } Ok(None) } Self::Binary(lhs, op, rhs) => { - let lhs_trivial = lhs.check_constant()?; - let rhs_trivial = rhs.check_constant()?; + let lhs_trivial = lhs.check_always_true_or_false()?; + let rhs_trivial = rhs.check_always_true_or_false()?; match op { ast::Operator::And => { - if lhs_trivial == Some(ConstantPredicate::AlwaysFalse) - || rhs_trivial == Some(ConstantPredicate::AlwaysFalse) + if lhs_trivial == Some(AlwaysTrueOrFalse::AlwaysFalse) + || rhs_trivial == Some(AlwaysTrueOrFalse::AlwaysFalse) { - return Ok(Some(ConstantPredicate::AlwaysFalse)); + return Ok(Some(AlwaysTrueOrFalse::AlwaysFalse)); } - if lhs_trivial == Some(ConstantPredicate::AlwaysTrue) - && rhs_trivial == Some(ConstantPredicate::AlwaysTrue) + if lhs_trivial == Some(AlwaysTrueOrFalse::AlwaysTrue) + && rhs_trivial == Some(AlwaysTrueOrFalse::AlwaysTrue) { - return Ok(Some(ConstantPredicate::AlwaysTrue)); + return Ok(Some(AlwaysTrueOrFalse::AlwaysTrue)); } Ok(None) } ast::Operator::Or => { - if lhs_trivial == Some(ConstantPredicate::AlwaysTrue) - || rhs_trivial == Some(ConstantPredicate::AlwaysTrue) + if lhs_trivial == Some(AlwaysTrueOrFalse::AlwaysTrue) + || rhs_trivial == Some(AlwaysTrueOrFalse::AlwaysTrue) { - return Ok(Some(ConstantPredicate::AlwaysTrue)); + return Ok(Some(AlwaysTrueOrFalse::AlwaysTrue)); } - if lhs_trivial == Some(ConstantPredicate::AlwaysFalse) - && rhs_trivial == Some(ConstantPredicate::AlwaysFalse) + if lhs_trivial == Some(AlwaysTrueOrFalse::AlwaysFalse) + && rhs_trivial == Some(AlwaysTrueOrFalse::AlwaysFalse) { - return Ok(Some(ConstantPredicate::AlwaysFalse)); + return Ok(Some(AlwaysTrueOrFalse::AlwaysFalse)); } Ok(None) From 5bed33150553988a80c08694991998f3ca83185e Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Fri, 18 Apr 2025 15:15:43 +0300 Subject: [PATCH 352/425] add Func::is_deterministic() --- core/function.rs | 96 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/core/function.rs b/core/function.rs index da246e719..5a436465e 100644 --- a/core/function.rs +++ b/core/function.rs @@ -10,6 +10,12 @@ pub struct ExternalFunc { pub func: ExtFunc, } +impl ExternalFunc { + pub fn is_deterministic(&self) -> bool { + false // external functions can be whatever so let's just default to false + } +} + #[derive(Debug, Clone)] pub enum ExtFunc { Scalar(ScalarFunction), @@ -98,6 +104,13 @@ pub enum JsonFunc { JsonQuote, } +#[cfg(feature = "json")] +impl JsonFunc { + pub fn is_deterministic(&self) -> bool { + true + } +} + #[cfg(feature = "json")] impl Display for JsonFunc { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { @@ -145,6 +158,12 @@ pub enum VectorFunc { VectorDistanceCos, } +impl VectorFunc { + pub fn is_deterministic(&self) -> bool { + true + } +} + impl Display for VectorFunc { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let str = match self { @@ -198,6 +217,10 @@ impl PartialEq for AggFunc { } impl AggFunc { + pub fn is_deterministic(&self) -> bool { + false // consider aggregate functions nondeterministic since they depend on the number of rows, not only the input arguments + } + pub fn num_args(&self) -> usize { match self { Self::Avg => 1, @@ -297,6 +320,65 @@ pub enum ScalarFunc { Likelihood, } +impl ScalarFunc { + pub fn is_deterministic(&self) -> bool { + match self { + ScalarFunc::Cast => true, + ScalarFunc::Changes => false, // depends on DB state + ScalarFunc::Char => true, + ScalarFunc::Coalesce => true, + ScalarFunc::Concat => true, + ScalarFunc::ConcatWs => true, + ScalarFunc::Glob => true, + ScalarFunc::IfNull => true, + ScalarFunc::Iif => true, + ScalarFunc::Instr => true, + ScalarFunc::Like => true, + ScalarFunc::Abs => true, + ScalarFunc::Upper => true, + ScalarFunc::Lower => true, + ScalarFunc::Random => false, // duh + ScalarFunc::RandomBlob => false, // duh + ScalarFunc::Trim => true, + ScalarFunc::LTrim => true, + ScalarFunc::RTrim => true, + ScalarFunc::Round => true, + ScalarFunc::Length => true, + ScalarFunc::OctetLength => true, + ScalarFunc::Min => true, + ScalarFunc::Max => true, + ScalarFunc::Nullif => true, + ScalarFunc::Sign => true, + ScalarFunc::Substr => true, + ScalarFunc::Substring => true, + ScalarFunc::Soundex => true, + ScalarFunc::Date => false, + ScalarFunc::Time => false, + ScalarFunc::TotalChanges => false, + ScalarFunc::DateTime => false, + ScalarFunc::Typeof => true, + ScalarFunc::Unicode => true, + ScalarFunc::Quote => true, + ScalarFunc::SqliteVersion => true, + ScalarFunc::SqliteSourceId => true, + ScalarFunc::UnixEpoch => false, + ScalarFunc::JulianDay => false, + ScalarFunc::Hex => true, + ScalarFunc::Unhex => true, + ScalarFunc::ZeroBlob => true, + ScalarFunc::LastInsertRowid => false, + ScalarFunc::Replace => true, + #[cfg(feature = "fs")] + ScalarFunc::LoadExtension => true, + ScalarFunc::StrfTime => false, + ScalarFunc::Printf => false, + ScalarFunc::Likely => true, + ScalarFunc::TimeDiff => false, + ScalarFunc::Likelihood => true, + } + } +} + impl Display for ScalarFunc { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let str = match self { @@ -398,6 +480,9 @@ pub enum MathFuncArity { } impl MathFunc { + pub fn is_deterministic(&self) -> bool { + true + } pub fn arity(&self) -> MathFuncArity { match self { Self::Pi => MathFuncArity::Nullary, @@ -501,6 +586,17 @@ pub struct FuncCtx { } impl Func { + pub fn is_deterministic(&self) -> bool { + match self { + Self::Agg(agg_func) => agg_func.is_deterministic(), + Self::Scalar(scalar_func) => scalar_func.is_deterministic(), + Self::Math(math_func) => math_func.is_deterministic(), + Self::Vector(vector_func) => vector_func.is_deterministic(), + #[cfg(feature = "json")] + Self::Json(json_func) => json_func.is_deterministic(), + Self::External(external_func) => external_func.is_deterministic(), + } + } pub fn resolve_function(name: &str, arg_count: usize) -> Result { match name { "avg" => { From e5bab63522f55e26388ba4ca35fc4ac7f4df7d84 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Thu, 17 Apr 2025 15:00:25 +0300 Subject: [PATCH 353/425] add expr.is_constant() --- core/translate/optimizer.rs | 85 +++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 814958392..efb2015f0 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -11,6 +11,7 @@ use crate::{ }; use super::{ + emitter::Resolver, plan::{ DeletePlan, Direction, EvalAt, GroupBy, IterationDirection, Operation, Plan, Search, SeekDef, SeekKey, SelectPlan, TableReference, UpdatePlan, WhereTerm, @@ -502,6 +503,7 @@ pub trait Optimizable { .check_always_true_or_false()? .map_or(false, |c| c == AlwaysTrueOrFalse::AlwaysFalse)) } + fn is_constant(&self, resolver: &Resolver<'_>) -> bool; fn is_rowid_alias_of(&self, table_index: usize) -> bool; fn is_nonnull(&self, tables: &[TableReference]) -> bool; } @@ -600,6 +602,89 @@ impl Optimizable for ast::Expr { Expr::Variable(..) => false, } } + /// Returns true if the expression is a constant i.e. does not depend on variables or columns etc. + fn is_constant(&self, resolver: &Resolver<'_>) -> bool { + match self { + Expr::Between { + lhs, start, end, .. + } => { + lhs.is_constant(resolver) + && start.is_constant(resolver) + && end.is_constant(resolver) + } + Expr::Binary(expr, _, expr1) => { + expr.is_constant(resolver) && expr1.is_constant(resolver) + } + Expr::Case { + base, + when_then_pairs, + else_expr, + } => { + base.as_ref() + .map_or(true, |base| base.is_constant(resolver)) + && when_then_pairs.iter().all(|(when, then)| { + when.is_constant(resolver) && then.is_constant(resolver) + }) + && else_expr + .as_ref() + .map_or(true, |else_expr| else_expr.is_constant(resolver)) + } + Expr::Cast { expr, .. } => expr.is_constant(resolver), + Expr::Collate(expr, _) => expr.is_constant(resolver), + Expr::DoublyQualified(_, _, _) => { + panic!("DoublyQualified should have been rewritten as Column") + } + Expr::Exists(_) => false, + Expr::FunctionCall { args, name, .. } => { + let Some(func) = + resolver.resolve_function(&name.0, args.as_ref().map_or(0, |args| args.len())) + else { + return false; + }; + func.is_deterministic() + && args.as_ref().map_or(true, |args| { + args.iter().all(|arg| arg.is_constant(resolver)) + }) + } + Expr::FunctionCallStar { .. } => false, + Expr::Id(_) => panic!("Id should have been rewritten as Column"), + Expr::Column { .. } => false, + Expr::RowId { .. } => false, + Expr::InList { lhs, rhs, .. } => { + lhs.is_constant(resolver) + && rhs + .as_ref() + .map_or(true, |rhs| rhs.iter().all(|rhs| rhs.is_constant(resolver))) + } + Expr::InSelect { .. } => { + false // might be constant, too annoying to check subqueries etc. implement later + } + Expr::InTable { .. } => false, + Expr::IsNull(expr) => expr.is_constant(resolver), + Expr::Like { + lhs, rhs, escape, .. + } => { + lhs.is_constant(resolver) + && rhs.is_constant(resolver) + && escape + .as_ref() + .map_or(true, |escape| escape.is_constant(resolver)) + } + Expr::Literal(_) => true, + Expr::Name(_) => false, + Expr::NotNull(expr) => expr.is_constant(resolver), + Expr::Parenthesized(exprs) => exprs.iter().all(|expr| expr.is_constant(resolver)), + Expr::Qualified(_, _) => { + panic!("Qualified should have been rewritten as Column") + } + Expr::Raise(_, expr) => expr + .as_ref() + .map_or(true, |expr| expr.is_constant(resolver)), + Expr::Subquery(_) => false, + Expr::Unary(_, expr) => expr.is_constant(resolver), + Expr::Variable(_) => false, + } + } /// Returns true if the expression is a constant expression that, when evaluated as a condition, is always true or false fn check_always_true_or_false(&self) -> Result> { match self { From 47f3f3bda386099758296116b3958b86970f5427 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Wed, 23 Apr 2025 12:53:09 +0300 Subject: [PATCH 354/425] vdbe: replace constant_insns with constant_spans --- core/vdbe/builder.rs | 129 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 108 insertions(+), 21 deletions(-) diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index 05fdc4938..dae59e81b 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -1,5 +1,6 @@ use std::{ cell::Cell, + cmp::Ordering, collections::HashMap, rc::{Rc, Weak}, sync::Arc, @@ -21,9 +22,12 @@ use super::{BranchOffset, CursorID, Insn, InsnFunction, InsnReference, Program}; pub struct ProgramBuilder { next_free_register: usize, next_free_cursor_id: usize, - insns: Vec<(Insn, InsnFunction)>, - // for temporarily storing instructions that will be put after Transaction opcode - constant_insns: Vec<(Insn, InsnFunction)>, + /// Instruction, the function to execute it with, and its original index in the vector. + insns: Vec<(Insn, InsnFunction, usize)>, + /// A span of instructions from (offset_start_inclusive, offset_end_exclusive), + /// that are deemed to be compile-time constant and can be hoisted out of loops + /// so that they get evaluated only once at the start of the program. + pub constant_spans: Vec<(usize, usize)>, // Vector of labels which must be assigned to next emitted instruction next_insn_labels: Vec, // Cursors that are referenced by the program. Indexed by CursorID. @@ -84,7 +88,7 @@ impl ProgramBuilder { insns: Vec::with_capacity(opts.approx_num_insns), next_insn_labels: Vec::with_capacity(2), cursor_ref: Vec::with_capacity(opts.num_cursors), - constant_insns: Vec::new(), + constant_spans: Vec::new(), label_to_resolved_offset: Vec::with_capacity(opts.approx_num_labels), seekrowid_emitted_bitmask: 0, comments: if opts.query_mode == QueryMode::Explain { @@ -98,6 +102,56 @@ impl ProgramBuilder { } } + /// Start a new constant span. The next instruction to be emitted will be the first + /// instruction in the span. + pub fn constant_span_start(&mut self) -> usize { + let span = self.constant_spans.len(); + let start = self.insns.len(); + self.constant_spans.push((start, usize::MAX)); + span + } + + /// End the current constant span. The last instruction that was emitted is the last + /// instruction in the span. + pub fn constant_span_end(&mut self, span_idx: usize) { + let span = &mut self.constant_spans[span_idx]; + if span.1 == usize::MAX { + span.1 = self.insns.len().saturating_sub(1); + } + } + + /// End all constant spans that are currently open. This is used to handle edge cases + /// where we think a parent expression is constant, but we decide during the evaluation + /// of one of its children that it is not. + pub fn constant_span_end_all(&mut self) { + for span in self.constant_spans.iter_mut() { + if span.1 == usize::MAX { + span.1 = self.insns.len().saturating_sub(1); + } + } + } + + /// Check if there is a constant span that is currently open. + pub fn constant_span_is_open(&self) -> bool { + self.constant_spans + .last() + .map_or(false, |(_, end)| *end == usize::MAX) + } + + /// Get the index of the next constant span. + /// Used in [crate::translate::expr::translate_expr_no_constant_opt()] to invalidate + /// all constant spans after the given index. + pub fn constant_spans_next_idx(&self) -> usize { + self.constant_spans.len() + } + + /// Invalidate all constant spans after the given index. This is used when we want to + /// be sure that constant optimization is never used for translating a given expression. + /// See [crate::translate::expr::translate_expr_no_constant_opt()] for more details. + pub fn constant_spans_invalidate_after(&mut self, idx: usize) { + self.constant_spans.truncate(idx); + } + pub fn alloc_register(&mut self) -> usize { let reg = self.next_free_register; self.next_free_register += 1; @@ -123,12 +177,8 @@ impl ProgramBuilder { } pub fn emit_insn(&mut self, insn: Insn) { - for label in self.next_insn_labels.drain(..) { - self.label_to_resolved_offset[label.to_label_value() as usize] = - Some(self.insns.len() as InsnReference); - } let function = insn.to_function(); - self.insns.push((insn, function)); + self.insns.push((insn, function, self.insns.len())); } pub fn close_cursors(&mut self, cursors: &[CursorID]) { @@ -204,16 +254,53 @@ impl ProgramBuilder { } } - // Emit an instruction that will be put at the end of the program (after Transaction statement). - // This is useful for instructions that otherwise will be unnecessarily repeated in a loop. - // Example: In `SELECT * from users where name='John'`, it is unnecessary to set r[1]='John' as we SCAN users table. - // We could simply set it once before the SCAN started. pub fn mark_last_insn_constant(&mut self) { - self.constant_insns.push(self.insns.pop().unwrap()); + if self.constant_span_is_open() { + // no need to mark this insn as constant as the surrounding parent expression is already constant + return; + } + + let prev = self.insns.len().saturating_sub(1); + self.constant_spans.push((prev, prev)); } pub fn emit_constant_insns(&mut self) { - self.insns.append(&mut self.constant_insns); + // move compile-time constant instructions to the end of the program, where they are executed once after Init jumps to it. + // any label_to_resolved_offset that points to an instruction within any moved constant span should be updated to point to the new location. + + // the instruction reordering can be done by sorting the insns, so that the ordering is: + // 1. if insn not in any constant span, it stays where it is + // 2. if insn is in a constant span, it is after other insns, except those that are in a later constant span + // 3. within a single constant span the order is preserver + self.insns.sort_by(|(_, _, index_a), (_, _, index_b)| { + let a_span = self + .constant_spans + .iter() + .find(|span| span.0 <= *index_a && span.1 >= *index_a); + let b_span = self + .constant_spans + .iter() + .find(|span| span.0 <= *index_b && span.1 >= *index_b); + if a_span.is_some() && b_span.is_some() { + a_span.unwrap().0.cmp(&b_span.unwrap().0) + } else if a_span.is_some() { + Ordering::Greater + } else if b_span.is_some() { + Ordering::Less + } else { + Ordering::Equal + } + }); + for resolved_offset in self.label_to_resolved_offset.iter_mut() { + if let Some((old_offset, target)) = resolved_offset { + let new_offset = self + .insns + .iter() + .position(|(_, _, index)| *old_offset == *index as u32) + .unwrap() as u32; + *resolved_offset = Some((new_offset, *target)); + } + } } pub fn offset(&self) -> BranchOffset { @@ -260,7 +347,7 @@ impl ProgramBuilder { ); } }; - for (insn, _) in self.insns.iter_mut() { + for (insn, _, _) in self.insns.iter_mut() { match insn { Insn::Init { target_pc } => { resolve(target_pc, "Init"); @@ -467,15 +554,15 @@ impl ProgramBuilder { change_cnt_on: bool, ) -> Program { self.resolve_labels(); - assert!( - self.constant_insns.is_empty(), - "constant_insns is not empty when build() is called, did you forget to call emit_constant_insns()?" - ); self.parameters.list.dedup(); Program { max_registers: self.next_free_register, - insns: self.insns, + insns: self + .insns + .into_iter() + .map(|(insn, function, _)| (insn, function)) + .collect(), cursor_ref: self.cursor_ref, database_header, comments: self.comments, From b4b38bdb3c532242cdfca3d32e225a07b4589d3a Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Wed, 23 Apr 2025 12:53:28 +0300 Subject: [PATCH 355/425] vdbe: resolve labels for InitCoroutine::start_offset --- core/vdbe/builder.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index dae59e81b..fa56ba7ff 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -462,9 +462,10 @@ impl ProgramBuilder { Insn::InitCoroutine { yield_reg: _, jump_on_definition, - start_offset: _, + start_offset, } => { resolve(jump_on_definition, "InitCoroutine"); + resolve(start_offset, "InitCoroutine"); } Insn::NotExists { cursor: _, From 0f5c7917845ee99d4e47ed73c32d83bfb7f7c51a Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Wed, 23 Apr 2025 12:58:58 +0300 Subject: [PATCH 356/425] vdbe: refactor label resolution to account for insn offsets changing --- core/vdbe/builder.rs | 63 +++++++++++++++++++++++++++++++------------- core/vdbe/mod.rs | 33 ++++++++++++++++------- 2 files changed, 69 insertions(+), 27 deletions(-) diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index fa56ba7ff..a5d9e8b8a 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -17,7 +17,7 @@ use crate::{ Connection, VirtualTable, }; -use super::{BranchOffset, CursorID, Insn, InsnFunction, InsnReference, Program}; +use super::{BranchOffset, CursorID, Insn, InsnFunction, InsnReference, JumpTarget, Program}; #[allow(dead_code)] pub struct ProgramBuilder { next_free_register: usize, @@ -28,12 +28,10 @@ pub struct ProgramBuilder { /// that are deemed to be compile-time constant and can be hoisted out of loops /// so that they get evaluated only once at the start of the program. pub constant_spans: Vec<(usize, usize)>, - // Vector of labels which must be assigned to next emitted instruction - next_insn_labels: Vec, // Cursors that are referenced by the program. Indexed by CursorID. pub cursor_ref: Vec<(Option, CursorType)>, /// A vector where index=label number, value=resolved offset. Resolved in build(). - label_to_resolved_offset: Vec>, + label_to_resolved_offset: Vec>, // Bitmask of cursors that have emitted a SeekRowid instruction. seekrowid_emitted_bitmask: u64, // map of instruction index to manual comment (used in EXPLAIN only) @@ -86,7 +84,6 @@ impl ProgramBuilder { next_free_register: 1, next_free_cursor_id: 0, insns: Vec::with_capacity(opts.approx_num_insns), - next_insn_labels: Vec::with_capacity(2), cursor_ref: Vec::with_capacity(opts.num_cursors), constant_spans: Vec::new(), label_to_resolved_offset: Vec::with_capacity(opts.approx_num_labels), @@ -313,18 +310,42 @@ impl ProgramBuilder { BranchOffset::Label(label_n as u32) } - // Effectively a GOTO without the need to emit an explicit GOTO instruction. - // Useful when you know you need to jump to "the next part", but the exact offset is unknowable - // at the time of emitting the instruction. + /// Resolve a label to whatever instruction follows the one that was + /// last emitted. + /// + /// Use this when your use case is: "the program should jump to whatever instruction + /// follows the one that was previously emitted", and you don't care exactly + /// which instruction that is. Examples include "the start of a loop", or + /// "after the loop ends". + /// + /// It is important to handle those cases this way, because the precise + /// instruction that follows any given instruction might change due to + /// reordering the emitted instructions. + #[inline] pub fn preassign_label_to_next_insn(&mut self, label: BranchOffset) { - self.next_insn_labels.push(label); + assert!(label.is_label(), "BranchOffset {:?} is not a label", label); + self._resolve_label(label, self.offset().sub(1u32), JumpTarget::AfterThisInsn); } + /// Resolve a label to exactly the instruction that was last emitted. + /// + /// Use this when your use case is: "the program should jump to the exact instruction + /// that was last emitted", and you don't care WHERE exactly that ends up being + /// once the order of the bytecode of the program is finalized. Examples include + /// "jump to the Halt instruction", or "jump to the Next instruction of a loop". + #[inline] pub fn resolve_label(&mut self, label: BranchOffset, to_offset: BranchOffset) { + self._resolve_label(label, to_offset, JumpTarget::ExactlyThisInsn); + } + + fn _resolve_label(&mut self, label: BranchOffset, to_offset: BranchOffset, target: JumpTarget) { assert!(matches!(label, BranchOffset::Label(_))); assert!(matches!(to_offset, BranchOffset::Offset(_))); - self.label_to_resolved_offset[label.to_label_value() as usize] = - Some(to_offset.to_offset_int()); + let BranchOffset::Label(label_number) = label else { + unreachable!("Label is not a label"); + }; + self.label_to_resolved_offset[label_number as usize] = + Some((to_offset.to_offset_int(), target)); } /// Resolve unresolved labels to a specific offset in the instruction list. @@ -335,15 +356,21 @@ impl ProgramBuilder { pub fn resolve_labels(&mut self) { let resolve = |pc: &mut BranchOffset, insn_name: &str| { if let BranchOffset::Label(label) = pc { - let to_offset = self - .label_to_resolved_offset - .get(*label as usize) - .unwrap_or_else(|| { - panic!("Reference to undefined label in {}: {}", insn_name, label) - }); + let Some(Some((to_offset, target))) = + self.label_to_resolved_offset.get(*label as usize) + else { + panic!( + "Reference to undefined or unresolved label in {}: {}", + insn_name, label + ); + }; *pc = BranchOffset::Offset( to_offset - .unwrap_or_else(|| panic!("Unresolved label in {}: {}", insn_name, label)), + + if *target == JumpTarget::ExactlyThisInsn { + 0 + } else { + 1 + }, ); } }; diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 1d1ad0b77..86fd53303 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -60,6 +60,26 @@ use std::{ sync::Arc, }; +/// We use labels to indicate that we want to jump to whatever the instruction offset +/// will be at runtime, because the offset cannot always be determined when the jump +/// instruction is created. +/// +/// In some cases, we want to jump to EXACTLY a specific instruction. +/// - Example: a condition is not met, so we want to jump to wherever Halt is. +/// In other cases, we don't care what the exact instruction is, but we know that we +/// want to jump to whatever comes AFTER a certain instruction. +/// - Example: a Next instruction will want to jump to "whatever the start of the loop is", +/// but it doesn't care what instruction that is. +/// +/// The reason this distinction is important is that we might reorder instructions that are +/// constant at compile time, and when we do that, we need to change the offsets of any impacted +/// jump instructions, so the instruction that comes immediately after "next Insn" might have changed during the reordering. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum JumpTarget { + ExactlyThisInsn, + AfterThisInsn, +} + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] /// Represents a target for a jump instruction. /// Stores 32-bit ints to keep the enum word-sized. @@ -95,15 +115,6 @@ impl BranchOffset { } } - /// Returns the label value. Panics if the branch offset is an offset or placeholder. - pub fn to_label_value(&self) -> u32 { - match self { - BranchOffset::Label(v) => *v, - BranchOffset::Offset(_) => unreachable!("Offset cannot be converted to label value"), - BranchOffset::Placeholder => unreachable!("Unresolved placeholder"), - } - } - /// Returns the branch offset as a signed integer. /// Used in explain output, where we don't want to panic in case we have an unresolved /// label or placeholder. @@ -121,6 +132,10 @@ impl BranchOffset { pub fn add>(self, n: N) -> BranchOffset { BranchOffset::Offset(self.to_offset_int() + n.into()) } + + pub fn sub>(self, n: N) -> BranchOffset { + BranchOffset::Offset(self.to_offset_int() - n.into()) + } } pub type CursorID = usize; From e557503091c4f965301f2cedb3985b167a1a71b2 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Wed, 23 Apr 2025 12:59:24 +0300 Subject: [PATCH 357/425] expr.rs: use constant spans to optimize constant expressions --- core/translate/expr.rs | 185 ++++++++++++++++++++++++++--------------- 1 file changed, 117 insertions(+), 68 deletions(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 53deb7e0f..79ccb1fe9 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -13,6 +13,7 @@ use crate::vdbe::{ use crate::Result; use super::emitter::Resolver; +use super::optimizer::Optimizable; use super::plan::{Operation, TableReference}; #[derive(Debug, Clone, Copy)] @@ -205,7 +206,7 @@ pub fn translate_condition_expr( }, resolver, )?; - program.resolve_label(jump_target_when_true, program.offset()); + program.preassign_label_to_next_insn(jump_target_when_true); translate_condition_expr( program, referenced_tables, @@ -230,7 +231,7 @@ pub fn translate_condition_expr( }, resolver, )?; - program.resolve_label(jump_target_when_false, program.offset()); + program.preassign_label_to_next_insn(jump_target_when_false); translate_condition_expr( program, referenced_tables, @@ -254,8 +255,8 @@ pub fn translate_condition_expr( { let lhs_reg = program.alloc_register(); let rhs_reg = program.alloc_register(); - translate_and_mark(program, Some(referenced_tables), lhs, lhs_reg, resolver)?; - translate_and_mark(program, Some(referenced_tables), rhs, rhs_reg, resolver)?; + translate_expr(program, Some(referenced_tables), lhs, lhs_reg, resolver)?; + translate_expr(program, Some(referenced_tables), rhs, rhs_reg, resolver)?; match op { ast::Operator::Greater => { emit_cmp_insn!(program, condition_metadata, Gt, Le, lhs_reg, rhs_reg) @@ -410,7 +411,7 @@ pub fn translate_condition_expr( } if !condition_metadata.jump_if_condition_is_true { - program.resolve_label(jump_target_when_true, program.offset()); + program.preassign_label_to_next_insn(jump_target_when_true); } } ast::Expr::Like { not, .. } => { @@ -478,6 +479,38 @@ pub fn translate_condition_expr( Ok(()) } +/// Reason why [translate_expr_no_constant_opt()] was called. +#[derive(Debug)] +pub enum NoConstantOptReason { + /// The expression translation involves reusing register(s), + /// so hoisting those register assignments is not safe. + /// e.g. SELECT COALESCE(1, t.x, NULL) would overwrite 1 with NULL, which is invalid. + RegisterReuse, +} + +/// Translate an expression into bytecode via [translate_expr()], and forbid any constant values from being hoisted +/// into the beginning of the program. This is a good idea in most cases where +/// a register will end up being reused e.g. in a coroutine. +pub fn translate_expr_no_constant_opt( + program: &mut ProgramBuilder, + referenced_tables: Option<&[TableReference]>, + expr: &ast::Expr, + target_register: usize, + resolver: &Resolver, + deopt_reason: NoConstantOptReason, +) -> Result { + tracing::debug!( + "translate_expr_no_constant_opt: expr={:?}, deopt_reason={:?}", + expr, + deopt_reason + ); + let next_span_idx = program.constant_spans_next_idx(); + let translated = translate_expr(program, referenced_tables, expr, target_register, resolver)?; + program.constant_spans_invalidate_after(next_span_idx); + Ok(translated) +} + +/// Translate an expression into bytecode. pub fn translate_expr( program: &mut ProgramBuilder, referenced_tables: Option<&[TableReference]>, @@ -485,14 +518,29 @@ pub fn translate_expr( target_register: usize, resolver: &Resolver, ) -> Result { + let constant_span = if expr.is_constant(resolver) { + if !program.constant_span_is_open() { + Some(program.constant_span_start()) + } else { + None + } + } else { + program.constant_span_end_all(); + None + }; + if let Some(reg) = resolver.resolve_cached_expr_reg(expr) { program.emit_insn(Insn::Copy { src_reg: reg, dst_reg: target_register, amount: 0, }); + if let Some(span) = constant_span { + program.constant_span_end(span); + } return Ok(target_register); } + match expr { ast::Expr::Between { .. } => { unreachable!("expression should have been rewritten in optmizer") @@ -504,17 +552,17 @@ pub fn translate_expr( translate_expr(program, referenced_tables, e1, shared_reg, resolver)?; emit_binary_insn(program, op, shared_reg, shared_reg, target_register)?; - return Ok(target_register); + Ok(target_register) + } else { + let e1_reg = program.alloc_registers(2); + let e2_reg = e1_reg + 1; + + translate_expr(program, referenced_tables, e1, e1_reg, resolver)?; + translate_expr(program, referenced_tables, e2, e2_reg, resolver)?; + + emit_binary_insn(program, op, e1_reg, e2_reg, target_register)?; + Ok(target_register) } - - let e1_reg = program.alloc_registers(2); - let e2_reg = e1_reg + 1; - - translate_expr(program, referenced_tables, e1, e1_reg, resolver)?; - translate_expr(program, referenced_tables, e2, e2_reg, resolver)?; - - emit_binary_insn(program, op, e1_reg, e2_reg, target_register)?; - Ok(target_register) } ast::Expr::Case { base, @@ -545,7 +593,14 @@ pub fn translate_expr( )?; }; for (when_expr, then_expr) in when_then_pairs { - translate_expr(program, referenced_tables, when_expr, expr_reg, resolver)?; + translate_expr_no_constant_opt( + program, + referenced_tables, + when_expr, + expr_reg, + resolver, + NoConstantOptReason::RegisterReuse, + )?; match base_reg { // CASE 1 WHEN 0 THEN 0 ELSE 1 becomes 1==0, Ne branch to next clause Some(base_reg) => program.emit_insn(Insn::Ne { @@ -563,12 +618,13 @@ pub fn translate_expr( }), }; // THEN... - translate_expr( + translate_expr_no_constant_opt( program, referenced_tables, then_expr, target_register, resolver, + NoConstantOptReason::RegisterReuse, )?; program.emit_insn(Insn::Goto { target_pc: return_label, @@ -580,7 +636,14 @@ pub fn translate_expr( } match else_expr { Some(expr) => { - translate_expr(program, referenced_tables, expr, target_register, resolver)?; + translate_expr_no_constant_opt( + program, + referenced_tables, + expr, + target_register, + resolver, + NoConstantOptReason::RegisterReuse, + )?; } // If ELSE isn't specified, it means ELSE null. None => { @@ -590,7 +653,7 @@ pub fn translate_expr( }); } }; - program.resolve_label(return_label, program.offset()); + program.preassign_label_to_next_insn(return_label); Ok(target_register) } ast::Expr::Cast { expr, type_name } => { @@ -776,7 +839,7 @@ pub fn translate_expr( if let Some(args) = args { for (i, arg) in args.iter().enumerate() { // register containing result of each argument expression - translate_and_mark( + translate_expr( program, referenced_tables, arg, @@ -904,12 +967,13 @@ pub fn translate_expr( // whenever a not null check succeeds, we jump to the end of the series let label_coalesce_end = program.allocate_label(); for (index, arg) in args.iter().enumerate() { - let reg = translate_expr( + let reg = translate_expr_no_constant_opt( program, referenced_tables, arg, target_register, resolver, + NoConstantOptReason::RegisterReuse, )?; if index < args.len() - 1 { program.emit_insn(Insn::NotNull { @@ -991,12 +1055,13 @@ pub fn translate_expr( }; let temp_reg = program.alloc_register(); - translate_expr( + translate_expr_no_constant_opt( program, referenced_tables, &args[0], temp_reg, resolver, + NoConstantOptReason::RegisterReuse, )?; let before_copy_label = program.allocate_label(); program.emit_insn(Insn::NotNull { @@ -1004,12 +1069,13 @@ pub fn translate_expr( target_pc: before_copy_label, }); - translate_expr( + translate_expr_no_constant_opt( program, referenced_tables, &args[1], temp_reg, resolver, + NoConstantOptReason::RegisterReuse, )?; program.resolve_label(before_copy_label, program.offset()); program.emit_insn(Insn::Copy { @@ -1029,12 +1095,13 @@ pub fn translate_expr( ), }; let temp_reg = program.alloc_register(); - translate_expr( + translate_expr_no_constant_opt( program, referenced_tables, &args[0], temp_reg, resolver, + NoConstantOptReason::RegisterReuse, )?; let jump_target_when_false = program.allocate_label(); program.emit_insn(Insn::IfNot { @@ -1042,26 +1109,28 @@ pub fn translate_expr( target_pc: jump_target_when_false, jump_if_null: true, }); - translate_expr( + translate_expr_no_constant_opt( program, referenced_tables, &args[1], target_register, resolver, + NoConstantOptReason::RegisterReuse, )?; let jump_target_result = program.allocate_label(); program.emit_insn(Insn::Goto { target_pc: jump_target_result, }); - program.resolve_label(jump_target_when_false, program.offset()); - translate_expr( + program.preassign_label_to_next_insn(jump_target_when_false); + translate_expr_no_constant_opt( program, referenced_tables, &args[2], target_register, resolver, + NoConstantOptReason::RegisterReuse, )?; - program.resolve_label(jump_target_result, program.offset()); + program.preassign_label_to_next_insn(jump_target_result); Ok(target_register) } ScalarFunc::Glob | ScalarFunc::Like => { @@ -1113,7 +1182,7 @@ pub fn translate_expr( | ScalarFunc::ZeroBlob => { let args = expect_arguments_exact!(args, 1, srf); let start_reg = program.alloc_register(); - translate_and_mark( + translate_expr( program, referenced_tables, &args[0], @@ -1132,7 +1201,7 @@ pub fn translate_expr( ScalarFunc::LoadExtension => { let args = expect_arguments_exact!(args, 1, srf); let start_reg = program.alloc_register(); - translate_and_mark( + translate_expr( program, referenced_tables, &args[0], @@ -1169,7 +1238,7 @@ pub fn translate_expr( if let Some(args) = args { for (i, arg) in args.iter().enumerate() { // register containing result of each argument expression - translate_and_mark( + translate_expr( program, referenced_tables, arg, @@ -1248,7 +1317,7 @@ pub fn translate_expr( crate::bail_parse_error!("hex function with no arguments",); }; let start_reg = program.alloc_register(); - translate_and_mark( + translate_expr( program, referenced_tables, &args[0], @@ -1296,7 +1365,7 @@ pub fn translate_expr( if let Some(args) = args { for (i, arg) in args.iter().enumerate() { // register containing result of each argument expression - translate_and_mark( + translate_expr( program, referenced_tables, arg, @@ -1365,7 +1434,7 @@ pub fn translate_expr( let start_reg = program.alloc_registers(args.len()); for (i, arg) in args.iter().enumerate() { - translate_and_mark( + translate_expr( program, referenced_tables, arg, @@ -1394,7 +1463,7 @@ pub fn translate_expr( }; let start_reg = program.alloc_registers(args.len()); for (i, arg) in args.iter().enumerate() { - translate_and_mark( + translate_expr( program, referenced_tables, arg, @@ -1424,7 +1493,7 @@ pub fn translate_expr( }; let start_reg = program.alloc_registers(args.len()); for (i, arg) in args.iter().enumerate() { - translate_and_mark( + translate_expr( program, referenced_tables, arg, @@ -1577,7 +1646,7 @@ pub fn translate_expr( if let Some(args) = args { for (i, arg) in args.iter().enumerate() { // register containing result of each argument expression - translate_and_mark( + translate_expr( program, referenced_tables, arg, @@ -1614,7 +1683,7 @@ pub fn translate_expr( crate::bail_parse_error!("likely function with no arguments",); }; let start_reg = program.alloc_register(); - translate_and_mark( + translate_expr( program, referenced_tables, &args[0], @@ -1665,7 +1734,7 @@ pub fn translate_expr( } let start_reg = program.alloc_register(); - translate_and_mark( + translate_expr( program, referenced_tables, &args[0], @@ -1701,13 +1770,7 @@ pub fn translate_expr( MathFuncArity::Unary => { let args = expect_arguments_exact!(args, 1, math_func); let start_reg = program.alloc_register(); - translate_and_mark( - program, - referenced_tables, - &args[0], - start_reg, - resolver, - )?; + translate_expr(program, referenced_tables, &args[0], start_reg, resolver)?; program.emit_insn(Insn::Function { constant_mask: 0, start_reg, @@ -2098,7 +2161,13 @@ pub fn translate_expr( }); Ok(target_register) } + }?; + + if let Some(span) = constant_span { + program.constant_span_end(span); } + + Ok(target_register) } fn emit_binary_insn( @@ -2367,17 +2436,11 @@ fn translate_like_base( let arg_count = if matches!(escape, Some(_)) { 3 } else { 2 }; let start_reg = program.alloc_registers(arg_count); let mut constant_mask = 0; - translate_and_mark(program, referenced_tables, lhs, start_reg + 1, resolver)?; + translate_expr(program, referenced_tables, lhs, start_reg + 1, resolver)?; let _ = translate_expr(program, referenced_tables, rhs, start_reg, resolver)?; if arg_count == 3 { if let Some(escape) = escape { - translate_and_mark( - program, - referenced_tables, - escape, - start_reg + 2, - resolver, - )?; + translate_expr(program, referenced_tables, escape, start_reg + 2, resolver)?; } } if matches!(rhs.as_ref(), ast::Expr::Literal(_)) { @@ -2482,20 +2545,6 @@ pub fn maybe_apply_affinity(col_type: Type, target_register: usize, program: &mu } } -pub fn translate_and_mark( - program: &mut ProgramBuilder, - referenced_tables: Option<&[TableReference]>, - expr: &ast::Expr, - target_register: usize, - resolver: &Resolver, -) -> Result<()> { - translate_expr(program, referenced_tables, expr, target_register, resolver)?; - if matches!(expr, ast::Expr::Literal(_)) { - program.mark_last_insn_constant(); - } - Ok(()) -} - /// Sanitaizes a string literal by removing single quote at front and back /// and escaping double single quotes pub fn sanitize_string(input: &str) -> String { From 029e5edddedefd2e6730660e10edcd3f8a2ee619 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Wed, 23 Apr 2025 13:00:01 +0300 Subject: [PATCH 358/425] Fix existing resolve_label() calls to work with new system --- core/translate/emitter.rs | 16 +++++------- core/translate/group_by.rs | 12 ++++----- core/translate/index.rs | 11 ++++---- core/translate/insert.rs | 40 ++++++++++++++++++++--------- core/translate/main_loop.rs | 48 ++++++++++++++++++++++------------- core/translate/order_by.rs | 5 ++-- core/translate/pragma.rs | 4 +-- core/translate/schema.rs | 18 ++++++------- core/translate/subquery.rs | 5 ++-- core/translate/transaction.rs | 4 +-- 10 files changed, 94 insertions(+), 69 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 5b12e4375..86283fa64 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -159,8 +159,7 @@ fn epilogue( err_code: 0, description: String::new(), }); - - program.resolve_label(init_label, program.offset()); + program.preassign_label_to_next_insn(init_label); match txn_mode { TransactionMode::Read => program.emit_insn(Insn::Transaction { write: false }), @@ -297,7 +296,7 @@ pub fn emit_query<'a>( condition_metadata, &t_ctx.resolver, )?; - program.resolve_label(jump_target_when_true, program.offset()); + program.preassign_label_to_next_insn(jump_target_when_true); } // Set up main query execution loop @@ -308,8 +307,7 @@ pub fn emit_query<'a>( // Clean up and close the main execution loop close_loop(program, t_ctx, &plan.table_references)?; - - program.resolve_label(after_main_loop_label, program.offset()); + program.preassign_label_to_next_insn(after_main_loop_label); let mut order_by_necessary = plan.order_by.is_some() && !plan.contains_constant_false_condition; let order_by = plan.order_by.as_ref(); @@ -379,8 +377,7 @@ fn emit_program_for_delete( // Clean up and close the main execution loop close_loop(program, &mut t_ctx, &plan.table_references)?; - - program.resolve_label(after_main_loop_label, program.offset()); + program.preassign_label_to_next_insn(after_main_loop_label); // Finalize program epilogue(program, init_label, start_offset, TransactionMode::Write)?; @@ -516,8 +513,7 @@ fn emit_program_for_update( )?; emit_update_insns(&plan, &t_ctx, program)?; close_loop(program, &mut t_ctx, &plan.table_references)?; - - program.resolve_label(after_main_loop_label, program.offset()); + program.preassign_label_to_next_insn(after_main_loop_label); // Finalize program epilogue(program, init_label, start_offset, TransactionMode::Write)?; @@ -570,7 +566,7 @@ fn emit_update_insns( meta, &t_ctx.resolver, )?; - program.resolve_label(jump_target, program.offset()); + program.preassign_label_to_next_insn(jump_target); } let beg = program.alloc_registers( table_ref.table.columns().len() diff --git a/core/translate/group_by.rs b/core/translate/group_by.rs index 68f732cbb..6eeee3fa3 100644 --- a/core/translate/group_by.rs +++ b/core/translate/group_by.rs @@ -275,16 +275,18 @@ pub fn emit_group_by<'a>( "start new group if comparison is not equal", ); // If we are at a new group, continue. If we are at the same group, jump to the aggregation step (i.e. accumulate more values into the aggregations) + let label_jump_after_comparison = program.allocate_label(); program.emit_insn(Insn::Jump { - target_pc_lt: program.offset().add(1u32), + target_pc_lt: label_jump_after_comparison, target_pc_eq: agg_step_label, - target_pc_gt: program.offset().add(1u32), + target_pc_gt: label_jump_after_comparison, }); program.add_comment( program.offset(), "check if ended group had data, and output if so", ); + program.resolve_label(label_jump_after_comparison, program.offset()); program.emit_insn(Insn::Gosub { target_pc: label_subrtn_acc_output, return_reg: reg_subrtn_acc_output_return_offset, @@ -364,8 +366,7 @@ pub fn emit_group_by<'a>( cursor_id: sort_cursor, pc_if_next: label_grouping_loop_start, }); - - program.resolve_label(label_grouping_loop_end, program.offset()); + program.preassign_label_to_next_insn(label_grouping_loop_end); program.add_comment(program.offset(), "emit row for final group"); program.emit_insn(Insn::Gosub { @@ -505,8 +506,7 @@ pub fn emit_group_by<'a>( program.emit_insn(Insn::Return { return_reg: reg_subrtn_acc_clear_return_offset, }); - - program.resolve_label(label_group_by_end, program.offset()); + program.preassign_label_to_next_insn(label_group_by_end); Ok(()) } diff --git a/core/translate/index.rs b/core/translate/index.rs index 55222e40f..b01fb2921 100644 --- a/core/translate/index.rs +++ b/core/translate/index.rs @@ -149,8 +149,7 @@ pub fn translate_create_index( cursor_id: table_cursor_id, pc_if_empty: loop_end_label, }); - - program.resolve_label(loop_start_label, program.offset()); + program.preassign_label_to_next_insn(loop_start_label); // Loop start: // Collect index values into start_reg..rowid_reg @@ -185,7 +184,7 @@ pub fn translate_create_index( cursor_id: table_cursor_id, pc_if_next: loop_start_label, }); - program.resolve_label(loop_end_label, program.offset()); + program.preassign_label_to_next_insn(loop_end_label); // Open the index btree we created for writing to insert the // newly sorted index records. @@ -202,7 +201,7 @@ pub fn translate_create_index( cursor_id: sorter_cursor_id, pc_if_empty: sorted_loop_end, }); - program.resolve_label(sorted_loop_start, program.offset()); + program.preassign_label_to_next_insn(sorted_loop_start); let sorted_record_reg = program.alloc_register(); program.emit_insn(Insn::SorterData { pseudo_cursor: pseudo_cursor_id, @@ -226,7 +225,7 @@ pub fn translate_create_index( cursor_id: sorter_cursor_id, pc_if_next: sorted_loop_start, }); - program.resolve_label(sorted_loop_end, program.offset()); + program.preassign_label_to_next_insn(sorted_loop_end); // End of the outer loop // @@ -248,7 +247,7 @@ pub fn translate_create_index( // Epilogue: program.emit_halt(); - program.resolve_label(init_label, program.offset()); + program.preassign_label_to_next_insn(init_label); program.emit_transaction(true); program.emit_constant_insns(); program.emit_goto(start_offset); diff --git a/core/translate/insert.rs b/core/translate/insert.rs index 4ca7e6fca..78b338160 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -13,16 +13,15 @@ use crate::vdbe::insn::{IdxInsertFlags, RegisterOrLiteral}; use crate::vdbe::BranchOffset; use crate::{ schema::{Column, Schema}, - translate::expr::translate_expr, vdbe::{ builder::{CursorType, ProgramBuilder}, insn::Insn, }, - SymbolTable, }; -use crate::{Result, VirtualTable}; +use crate::{Result, SymbolTable, VirtualTable}; use super::emitter::Resolver; +use super::expr::{translate_expr_no_constant_opt, NoConstantOptReason}; #[allow(clippy::too_many_arguments)] pub fn translate_insert( @@ -144,12 +143,15 @@ pub fn translate_insert( if inserting_multiple_rows { let yield_reg = program.alloc_register(); let jump_on_definition_label = program.allocate_label(); + let start_offset_label = program.allocate_label(); program.emit_insn(Insn::InitCoroutine { yield_reg, jump_on_definition: jump_on_definition_label, - start_offset: program.offset().add(1u32), + start_offset: start_offset_label, }); + program.resolve_label(start_offset_label, program.offset()); + for value in values { populate_column_registers( &mut program, @@ -166,7 +168,7 @@ pub fn translate_insert( }); } program.emit_insn(Insn::EndCoroutine { yield_reg }); - program.resolve_label(jump_on_definition_label, program.offset()); + program.preassign_label_to_next_insn(jump_on_definition_label); program.emit_insn(Insn::OpenWrite { cursor_id, @@ -268,8 +270,7 @@ pub fn translate_insert( err_code: SQLITE_CONSTRAINT_PRIMARYKEY, description: format!("{}.{}", table_name.0, rowid_column_name), }); - - program.resolve_label(make_record_label, program.offset()); + program.preassign_label_to_next_insn(make_record_label); } match table.btree() { @@ -354,8 +355,8 @@ pub fn translate_insert( err_code: 0, description: String::new(), }); + program.preassign_label_to_next_insn(init_label); - program.resolve_label(init_label, program.offset()); program.emit_insn(Insn::Transaction { write: true }); program.emit_constant_insns(); program.emit_insn(Insn::Goto { @@ -557,18 +558,26 @@ fn populate_column_registers( } else { target_reg }; - translate_expr( + translate_expr_no_constant_opt( program, None, value.get(value_index).expect("value index out of bounds"), reg, resolver, + NoConstantOptReason::RegisterReuse, )?; if write_directly_to_rowid_reg { program.emit_insn(Insn::SoftNull { reg: target_reg }); } } else if let Some(default_expr) = mapping.default_value { - translate_expr(program, None, default_expr, target_reg, resolver)?; + translate_expr_no_constant_opt( + program, + None, + default_expr, + target_reg, + resolver, + NoConstantOptReason::RegisterReuse, + )?; } else { // Column was not specified as has no DEFAULT - use NULL if it is nullable, otherwise error // Rowid alias columns can be NULL because we will autogenerate a rowid in that case. @@ -618,7 +627,14 @@ fn translate_virtual_table_insert( let value_registers_start = program.alloc_registers(values[0].len()); for (i, expr) in values[0].iter().enumerate() { - translate_expr(program, None, expr, value_registers_start + i, resolver)?; + translate_expr_no_constant_opt( + program, + None, + expr, + value_registers_start + i, + resolver, + NoConstantOptReason::RegisterReuse, + )?; } /* * * Inserts for virtual tables are done in a single step. @@ -672,12 +688,12 @@ fn translate_virtual_table_insert( }); let halt_label = program.allocate_label(); + program.resolve_label(halt_label, program.offset()); program.emit_insn(Insn::Halt { err_code: 0, description: String::new(), }); - program.resolve_label(halt_label, program.offset()); program.resolve_label(init_label, program.offset()); program.emit_insn(Insn::Goto { diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index ff60bb305..a1cabc511 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -18,7 +18,10 @@ use crate::{ use super::{ aggregation::translate_aggregation_step, emitter::{OperationMode, TranslateCtx}, - expr::{translate_condition_expr, translate_expr, ConditionMetadata}, + expr::{ + translate_condition_expr, translate_expr, translate_expr_no_constant_opt, + ConditionMetadata, NoConstantOptReason, + }, group_by::is_column_in_group_by, optimizer::Optimizable, order_by::{order_by_sorter_insert, sorter_insert}, @@ -238,7 +241,7 @@ pub fn open_loop( jump_on_definition: BranchOffset::Offset(0), start_offset: coroutine_implementation_start, }); - program.resolve_label(loop_start, program.offset()); + program.preassign_label_to_next_insn(loop_start); // A subquery within the main loop of a parent query has no cursor, so instead of advancing the cursor, // it emits a Yield which jumps back to the main loop of the subquery itself to retrieve the next row. // When the subquery coroutine completes, this instruction jumps to the label at the top of the termination_label_stack, @@ -265,7 +268,7 @@ pub fn open_loop( condition_metadata, &t_ctx.resolver, )?; - program.resolve_label(jump_target_when_true, program.offset()); + program.preassign_label_to_next_insn(jump_target_when_true); } } Operation::Scan { iter_dir, .. } => { @@ -284,6 +287,7 @@ pub fn open_loop( pc_if_empty: loop_end, }); } + program.preassign_label_to_next_insn(loop_start); } else if let Some(vtab) = table.virtual_table() { let (start_reg, count, maybe_idx_str, maybe_idx_int) = if vtab .kind @@ -391,8 +395,8 @@ pub fn open_loop( idx_num: maybe_idx_int.unwrap_or(0) as usize, pc_if_empty: loop_end, }); + program.preassign_label_to_next_insn(loop_start); } - program.resolve_label(loop_start, program.offset()); if let Some(table_cursor_id) = table_cursor_id { if let Some(index_cursor_id) = index_cursor_id { @@ -419,7 +423,7 @@ pub fn open_loop( condition_metadata, &t_ctx.resolver, )?; - program.resolve_label(jump_target_when_true, program.offset()); + program.preassign_label_to_next_insn(jump_target_when_true); } } Operation::Search(search) => { @@ -527,7 +531,7 @@ pub fn open_loop( condition_metadata, &t_ctx.resolver, )?; - program.resolve_label(jump_target_when_true, program.offset()); + program.preassign_label_to_next_insn(jump_target_when_true); } } } @@ -815,6 +819,7 @@ pub fn close_loop( program.emit_insn(Insn::Goto { target_pc: loop_labels.loop_start, }); + program.preassign_label_to_next_insn(loop_labels.loop_end); } Operation::Scan { iter_dir, .. } => { program.resolve_label(loop_labels.next, program.offset()); @@ -844,6 +849,7 @@ pub fn close_loop( } other => unreachable!("Unsupported table reference type: {:?}", other), } + program.preassign_label_to_next_insn(loop_labels.loop_end); } Operation::Search(search) => { program.resolve_label(loop_labels.next, program.offset()); @@ -869,11 +875,10 @@ pub fn close_loop( }); } } + program.preassign_label_to_next_insn(loop_labels.loop_end); } } - program.resolve_label(loop_labels.loop_end, program.offset()); - // Handle OUTER JOIN logic. The reason this comes after the "loop end" mark is that we may need to still jump back // and emit a row with NULLs for the right table, and then jump back to the next row of the left table. if let Some(join_info) = table.join_info.as_ref() { @@ -913,7 +918,7 @@ pub fn close_loop( program.emit_insn(Insn::Goto { target_pc: lj_meta.label_match_flag_set_true, }); - program.resolve_label(label_when_right_table_notnull, program.offset()); + program.preassign_label_to_next_insn(label_when_right_table_notnull); } } } @@ -972,7 +977,14 @@ fn emit_seek( } } else { let expr = &seek_def.key[i].0; - translate_expr(program, Some(tables), &expr, reg, &t_ctx.resolver)?; + translate_expr_no_constant_opt( + program, + Some(tables), + &expr, + reg, + &t_ctx.resolver, + NoConstantOptReason::RegisterReuse, + )?; // If the seek key column is not verifiably non-NULL, we need check whether it is NULL, // and if so, jump to the loop end. // This is to avoid returning rows for e.g. SELECT * FROM t WHERE t.x > NULL, @@ -1046,7 +1058,7 @@ fn emit_seek_termination( is_index: bool, ) -> Result<()> { let Some(termination) = seek_def.termination.as_ref() else { - program.resolve_label(loop_start, program.offset()); + program.preassign_label_to_next_insn(loop_start); return Ok(()); }; @@ -1081,16 +1093,17 @@ fn emit_seek_termination( // if the seek key is shorter than the termination key, we need to translate the remaining suffix of the termination key. // if not, we just reuse what was emitted for the seek. } else if seek_len < termination.len { - translate_expr( + translate_expr_no_constant_opt( program, Some(tables), &seek_def.key[i].0, reg, &t_ctx.resolver, + NoConstantOptReason::RegisterReuse, )?; } } - program.resolve_label(loop_start, program.offset()); + program.preassign_label_to_next_insn(loop_start); let mut rowid_reg = None; if !is_index { rowid_reg = Some(program.alloc_register()); @@ -1177,11 +1190,12 @@ fn emit_autoindex( cursor_id: index_cursor_id, }); // Rewind source table + let label_ephemeral_build_loop_start = program.allocate_label(); program.emit_insn(Insn::Rewind { cursor_id: table_cursor_id, - pc_if_empty: label_ephemeral_build_end, + pc_if_empty: label_ephemeral_build_loop_start, }); - let offset_ephemeral_build_loop_start = program.offset(); + program.preassign_label_to_next_insn(label_ephemeral_build_loop_start); // Emit all columns from source table that are needed in the ephemeral index. // Also reserve a register for the rowid if the source table has rowids. let num_regs_to_reserve = index.columns.len() + table_has_rowid as usize; @@ -1215,8 +1229,8 @@ fn emit_autoindex( }); program.emit_insn(Insn::Next { cursor_id: table_cursor_id, - pc_if_next: offset_ephemeral_build_loop_start, + pc_if_next: label_ephemeral_build_loop_start, }); - program.resolve_label(label_ephemeral_build_end, program.offset()); + program.preassign_label_to_next_insn(label_ephemeral_build_end); Ok(index_cursor_id) } diff --git a/core/translate/order_by.rs b/core/translate/order_by.rs index 9793afdc9..ce9cbc4ac 100644 --- a/core/translate/order_by.rs +++ b/core/translate/order_by.rs @@ -124,8 +124,8 @@ pub fn emit_order_by( cursor_id: sort_cursor, pc_if_empty: sort_loop_end_label, }); + program.preassign_label_to_next_insn(sort_loop_start_label); - program.resolve_label(sort_loop_start_label, program.offset()); emit_offset(program, t_ctx, plan, sort_loop_next_label)?; program.emit_insn(Insn::SorterData { @@ -154,8 +154,7 @@ pub fn emit_order_by( cursor_id: sort_cursor, pc_if_next: sort_loop_start_label, }); - - program.resolve_label(sort_loop_end_label, program.offset()); + program.preassign_label_to_next_insn(sort_loop_end_label); Ok(()) } diff --git a/core/translate/pragma.rs b/core/translate/pragma.rs index 668c1f214..98d21c834 100644 --- a/core/translate/pragma.rs +++ b/core/translate/pragma.rs @@ -29,7 +29,7 @@ fn list_pragmas( } program.emit_halt(); - program.resolve_label(init_label, program.offset()); + program.preassign_label_to_next_insn(init_label); program.emit_constant_insns(); program.emit_goto(start_offset); } @@ -104,7 +104,7 @@ pub fn translate_pragma( }, }; program.emit_halt(); - program.resolve_label(init_label, program.offset()); + program.preassign_label_to_next_insn(init_label); program.emit_transaction(write); program.emit_constant_insns(); program.emit_goto(start_offset); diff --git a/core/translate/schema.rs b/core/translate/schema.rs index 49e73cf1b..760de4e41 100644 --- a/core/translate/schema.rs +++ b/core/translate/schema.rs @@ -37,7 +37,7 @@ pub fn translate_create_table( let init_label = program.emit_init(); let start_offset = program.offset(); program.emit_halt(); - program.resolve_label(init_label, program.offset()); + program.preassign_label_to_next_insn(init_label); program.emit_transaction(true); program.emit_constant_insns(); program.emit_goto(start_offset); @@ -148,7 +148,7 @@ pub fn translate_create_table( // TODO: SqlExec program.emit_halt(); - program.resolve_label(init_label, program.offset()); + program.preassign_label_to_next_insn(init_label); program.emit_transaction(true); program.emit_constant_insns(); program.emit_goto(start_offset); @@ -448,7 +448,7 @@ pub fn translate_create_virtual_table( }); let init_label = program.emit_init(); program.emit_halt(); - program.resolve_label(init_label, program.offset()); + program.preassign_label_to_next_insn(init_label); program.emit_transaction(true); program.emit_constant_insns(); return Ok(program); @@ -519,7 +519,7 @@ pub fn translate_create_virtual_table( }); program.emit_halt(); - program.resolve_label(init_label, program.offset()); + program.preassign_label_to_next_insn(init_label); program.emit_transaction(true); program.emit_constant_insns(); program.emit_goto(start_offset); @@ -545,7 +545,7 @@ pub fn translate_drop_table( let init_label = program.emit_init(); let start_offset = program.offset(); program.emit_halt(); - program.resolve_label(init_label, program.offset()); + program.preassign_label_to_next_insn(init_label); program.emit_transaction(true); program.emit_constant_insns(); program.emit_goto(start_offset); @@ -583,14 +583,14 @@ pub fn translate_drop_table( // 1. Remove all entries from the schema table related to the table we are dropping, except for triggers // loop to beginning of schema table let end_metadata_label = program.allocate_label(); + let metadata_loop = program.allocate_label(); program.emit_insn(Insn::Rewind { cursor_id: sqlite_schema_cursor_id, pc_if_empty: end_metadata_label, }); + program.preassign_label_to_next_insn(metadata_loop); // start loop on schema table - let metadata_loop = program.allocate_label(); - program.resolve_label(metadata_loop, program.offset()); program.emit_insn(Insn::Column { cursor_id: sqlite_schema_cursor_id, column: 2, @@ -627,7 +627,7 @@ pub fn translate_drop_table( cursor_id: sqlite_schema_cursor_id, pc_if_next: metadata_loop, }); - program.resolve_label(end_metadata_label, program.offset()); + program.preassign_label_to_next_insn(end_metadata_label); // end of loop on schema table // 2. Destroy the indices within a loop @@ -696,7 +696,7 @@ pub fn translate_drop_table( // end of the program program.emit_halt(); - program.resolve_label(init_label, program.offset()); + program.preassign_label_to_next_insn(init_label); program.emit_transaction(true); program.emit_constant_insns(); diff --git a/core/translate/subquery.rs b/core/translate/subquery.rs index 71cb72348..17434edc2 100644 --- a/core/translate/subquery.rs +++ b/core/translate/subquery.rs @@ -52,7 +52,7 @@ pub fn emit_subquery<'a>( t_ctx: &mut TranslateCtx<'a>, ) -> Result { let yield_reg = program.alloc_register(); - let coroutine_implementation_start_offset = program.offset().add(1u32); + let coroutine_implementation_start_offset = program.allocate_label(); match &mut plan.query_type { SelectQueryType::Subquery { yield_reg: y, @@ -91,6 +91,7 @@ pub fn emit_subquery<'a>( jump_on_definition: subquery_body_end_label, start_offset: coroutine_implementation_start_offset, }); + program.preassign_label_to_next_insn(coroutine_implementation_start_offset); // Normally we mark each LIMIT value as a constant insn that is emitted only once, but in the case of a subquery, // we need to initialize it every time the subquery is run; otherwise subsequent runs of the subquery will already // have the LIMIT counter at 0, and will never return rows. @@ -103,6 +104,6 @@ pub fn emit_subquery<'a>( let result_column_start_reg = emit_query(program, plan, &mut metadata)?; program.resolve_label(end_coroutine_label, program.offset()); program.emit_insn(Insn::EndCoroutine { yield_reg }); - program.resolve_label(subquery_body_end_label, program.offset()); + program.preassign_label_to_next_insn(subquery_body_end_label); Ok(result_column_start_reg) } diff --git a/core/translate/transaction.rs b/core/translate/transaction.rs index 60e00e73b..11c0a8a10 100644 --- a/core/translate/transaction.rs +++ b/core/translate/transaction.rs @@ -33,7 +33,7 @@ pub fn translate_tx_begin( } } program.emit_halt(); - program.resolve_label(init_label, program.offset()); + program.preassign_label_to_next_insn(init_label); program.emit_goto(start_offset); Ok(program) } @@ -52,7 +52,7 @@ pub fn translate_tx_commit(_tx_name: Option) -> Result { rollback: false, }); program.emit_halt(); - program.resolve_label(init_label, program.offset()); + program.preassign_label_to_next_insn(init_label); program.emit_goto(start_offset); Ok(program) } From c3441f96854d541b078e0e3452f3c0adf11eea5e Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Wed, 23 Apr 2025 13:11:39 +0300 Subject: [PATCH 359/425] vdbe: move comments if instructions were moved around in emit_constant_insns() --- core/vdbe/builder.rs | 19 +++++++++++++++---- core/vdbe/mod.rs | 22 +++++++++++++--------- 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index a5d9e8b8a..2f09cf3ee 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -1,7 +1,6 @@ use std::{ cell::Cell, cmp::Ordering, - collections::HashMap, rc::{Rc, Weak}, sync::Arc, }; @@ -35,7 +34,7 @@ pub struct ProgramBuilder { // Bitmask of cursors that have emitted a SeekRowid instruction. seekrowid_emitted_bitmask: u64, // map of instruction index to manual comment (used in EXPLAIN only) - comments: Option>, + comments: Option>, pub parameters: Parameters, pub result_columns: Vec, pub table_references: Vec, @@ -89,7 +88,7 @@ impl ProgramBuilder { label_to_resolved_offset: Vec::with_capacity(opts.approx_num_labels), seekrowid_emitted_bitmask: 0, comments: if opts.query_mode == QueryMode::Explain { - Some(HashMap::new()) + Some(Vec::new()) } else { None }, @@ -247,7 +246,7 @@ impl ProgramBuilder { pub fn add_comment(&mut self, insn_index: BranchOffset, comment: &'static str) { if let Some(comments) = &mut self.comments { - comments.insert(insn_index.to_offset_int(), comment); + comments.push((insn_index.to_offset_int(), comment)); } } @@ -298,6 +297,18 @@ impl ProgramBuilder { *resolved_offset = Some((new_offset, *target)); } } + + // Fix comments to refer to new locations + if let Some(comments) = &mut self.comments { + for (old_offset, _) in comments.iter_mut() { + let new_offset = self + .insns + .iter() + .position(|(_, _, index)| *old_offset == *index as u32) + .expect("comment must exist") as u32; + *old_offset = new_offset; + } + } } pub fn offset(&self) -> BranchOffset { diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 86fd53303..45b23c538 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -367,7 +367,7 @@ pub struct Program { pub insns: Vec<(Insn, InsnFunction)>, pub cursor_ref: Vec<(Option, CursorType)>, pub database_header: Arc>, - pub comments: Option>, + pub comments: Option>, pub parameters: crate::parameters::Parameters, pub connection: Weak, pub n_change: Cell, @@ -557,10 +557,11 @@ fn trace_insn(program: &Program, addr: InsnReference, insn: &Insn) { addr, insn, String::new(), - program - .comments - .as_ref() - .and_then(|comments| comments.get(&{ addr }).copied()) + program.comments.as_ref().and_then(|comments| comments + .iter() + .find(|(offset, _)| *offset == addr) + .map(|(_, comment)| comment) + .copied()) ) ); } @@ -571,10 +572,13 @@ fn print_insn(program: &Program, addr: InsnReference, insn: &Insn, indent: Strin addr, insn, indent, - program - .comments - .as_ref() - .and_then(|comments| comments.get(&{ addr }).copied()), + program.comments.as_ref().and_then(|comments| { + comments + .iter() + .find(|(offset, _)| *offset == addr) + .map(|(_, comment)| comment) + .copied() + }), ); w.push_str(&s); } From fdf3dd97963efff9d3c5d32d707e719c8b2135f2 Mon Sep 17 00:00:00 2001 From: Anton Harniakou Date: Thu, 24 Apr 2025 13:25:00 +0300 Subject: [PATCH 360/425] Bugfix: Explain command should display syntax errors in CLI Closes #1392 --- cli/app.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/cli/app.rs b/cli/app.rs index 9e296416e..8212ce8c7 100644 --- a/cli/app.rs +++ b/cli/app.rs @@ -410,8 +410,14 @@ impl<'a> Limbo<'a> { // Uncased or Unicase. let temp = input.to_lowercase(); if temp.trim_start().starts_with("explain") { - if let Ok(Some(stmt)) = self.conn.query(input) { - let _ = self.writeln(stmt.explain().as_bytes()); + match self.conn.query(input) { + Ok(Some(stmt)) => { + let _ = self.writeln(stmt.explain().as_bytes()); + } + Err(e) => { + let _ = self.writeln(e.to_string()); + } + _ => {} } } else { let conn = self.conn.clone(); From af6a783f4d557d99dbb35e66b3ed5b046e9bf7c4 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Thu, 24 Apr 2025 15:36:54 +0300 Subject: [PATCH 361/425] core/types: remove duplicate serialtype implementation --- core/storage/sqlite3_ondisk.rs | 470 +++++++++++++-------------------- core/types.rs | 45 +++- 2 files changed, 221 insertions(+), 294 deletions(-) diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index fccf233b5..2e86bbdff 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -47,7 +47,7 @@ use crate::io::{Buffer, Completion, ReadCompletion, SyncCompletion, WriteComplet use crate::storage::buffer_pool::BufferPool; use crate::storage::database::DatabaseStorage; use crate::storage::pager::Pager; -use crate::types::{ImmutableRecord, RawSlice, RefValue, TextRef, TextSubtype}; +use crate::types::{ImmutableRecord, RawSlice, RefValue, SerialType, TextRef, TextSubtype}; use crate::{File, Result}; use std::cell::RefCell; use std::mem::MaybeUninit; @@ -985,107 +985,8 @@ fn read_payload(unread: &'static [u8], payload_size: usize) -> (&'static [u8], O } } -pub type SerialType = u64; - -pub const SERIAL_TYPE_NULL: SerialType = 0; -pub const SERIAL_TYPE_INT8: SerialType = 1; -pub const SERIAL_TYPE_BEINT16: SerialType = 2; -pub const SERIAL_TYPE_BEINT24: SerialType = 3; -pub const SERIAL_TYPE_BEINT32: SerialType = 4; -pub const SERIAL_TYPE_BEINT48: SerialType = 5; -pub const SERIAL_TYPE_BEINT64: SerialType = 6; -pub const SERIAL_TYPE_BEFLOAT64: SerialType = 7; -pub const SERIAL_TYPE_CONSTINT0: SerialType = 8; -pub const SERIAL_TYPE_CONSTINT1: SerialType = 9; - -pub trait SerialTypeExt { - fn is_null(self) -> bool; - fn is_int8(self) -> bool; - fn is_beint16(self) -> bool; - fn is_beint24(self) -> bool; - fn is_beint32(self) -> bool; - fn is_beint48(self) -> bool; - fn is_beint64(self) -> bool; - fn is_befloat64(self) -> bool; - fn is_constint0(self) -> bool; - fn is_constint1(self) -> bool; - fn is_blob(self) -> bool; - fn is_string(self) -> bool; - fn blob_size(self) -> usize; - fn string_size(self) -> usize; - fn is_valid(self) -> bool; -} - -impl SerialTypeExt for u64 { - fn is_null(self) -> bool { - self == SERIAL_TYPE_NULL - } - - fn is_int8(self) -> bool { - self == SERIAL_TYPE_INT8 - } - - fn is_beint16(self) -> bool { - self == SERIAL_TYPE_BEINT16 - } - - fn is_beint24(self) -> bool { - self == SERIAL_TYPE_BEINT24 - } - - fn is_beint32(self) -> bool { - self == SERIAL_TYPE_BEINT32 - } - - fn is_beint48(self) -> bool { - self == SERIAL_TYPE_BEINT48 - } - - fn is_beint64(self) -> bool { - self == SERIAL_TYPE_BEINT64 - } - - fn is_befloat64(self) -> bool { - self == SERIAL_TYPE_BEFLOAT64 - } - - fn is_constint0(self) -> bool { - self == SERIAL_TYPE_CONSTINT0 - } - - fn is_constint1(self) -> bool { - self == SERIAL_TYPE_CONSTINT1 - } - - fn is_blob(self) -> bool { - self >= 12 && self % 2 == 0 - } - - fn is_string(self) -> bool { - self >= 13 && self % 2 == 1 - } - - fn blob_size(self) -> usize { - debug_assert!(self.is_blob()); - ((self - 12) / 2) as usize - } - - fn string_size(self) -> usize { - debug_assert!(self.is_string()); - ((self - 13) / 2) as usize - } - - fn is_valid(self) -> bool { - self <= 9 || self.is_blob() || self.is_string() - } -} - pub fn validate_serial_type(value: u64) -> Result { - if value.is_valid() { - Ok(value) - } else { - crate::bail_corrupt_error!("Invalid serial type: {}", value) - } + value.try_into() } pub struct SmallVec { @@ -1180,7 +1081,7 @@ pub fn read_record(payload: &[u8], reuse_immutable: &mut ImmutableRecord) -> Res let mut serial_types = SmallVec::::new(); while header_size > 0 { let (serial_type, nr) = read_varint(&reuse_immutable.get_payload()[pos..])?; - let serial_type = validate_serial_type(serial_type)?; + let _ = validate_serial_type(serial_type)?; serial_types.push(serial_type); pos += nr; assert!(header_size >= nr); @@ -1189,14 +1090,17 @@ pub fn read_record(payload: &[u8], reuse_immutable: &mut ImmutableRecord) -> Res for &serial_type in &serial_types.data[..serial_types.len.min(serial_types.data.len())] { let (value, n) = read_value(&reuse_immutable.get_payload()[pos..], unsafe { - *serial_type.as_ptr() + serial_type.assume_init().try_into()? })?; pos += n; reuse_immutable.add_value(value); } if let Some(extra) = serial_types.extra_data.as_ref() { for serial_type in extra { - let (value, n) = read_value(&reuse_immutable.get_payload()[pos..], *serial_type)?; + let (value, n) = read_value( + &reuse_immutable.get_payload()[pos..], + (*serial_type).try_into()?, + )?; pos += n; reuse_immutable.add_value(value); } @@ -1209,140 +1113,123 @@ pub fn read_record(payload: &[u8], reuse_immutable: &mut ImmutableRecord) -> Res /// always. #[inline(always)] pub fn read_value(buf: &[u8], serial_type: SerialType) -> Result<(RefValue, usize)> { - if serial_type.is_null() { - return Ok((RefValue::Null, 0)); - } - - if serial_type.is_int8() { - if buf.is_empty() { - crate::bail_corrupt_error!("Invalid UInt8 value"); + match serial_type { + SerialType::Null => Ok((RefValue::Null, 0)), + SerialType::I8 => { + if buf.is_empty() { + crate::bail_corrupt_error!("Invalid UInt8 value"); + } + let val = buf[0] as i8; + Ok((RefValue::Integer(val as i64), 1)) } - let val = buf[0] as i8; - return Ok((RefValue::Integer(val as i64), 1)); - } - - if serial_type.is_beint16() { - if buf.len() < 2 { - crate::bail_corrupt_error!("Invalid BEInt16 value"); + SerialType::I16 => { + if buf.len() < 2 { + crate::bail_corrupt_error!("Invalid BEInt16 value"); + } + Ok(( + RefValue::Integer(i16::from_be_bytes([buf[0], buf[1]]) as i64), + 2, + )) } - return Ok(( - RefValue::Integer(i16::from_be_bytes([buf[0], buf[1]]) as i64), - 2, - )); - } - - if serial_type.is_beint24() { - if buf.len() < 3 { - crate::bail_corrupt_error!("Invalid BEInt24 value"); + SerialType::I24 => { + if buf.len() < 3 { + crate::bail_corrupt_error!("Invalid BEInt24 value"); + } + let sign_extension = if buf[0] <= 127 { 0 } else { 255 }; + Ok(( + RefValue::Integer( + i32::from_be_bytes([sign_extension, buf[0], buf[1], buf[2]]) as i64 + ), + 3, + )) } - let sign_extension = if buf[0] <= 127 { 0 } else { 255 }; - return Ok(( - RefValue::Integer(i32::from_be_bytes([sign_extension, buf[0], buf[1], buf[2]]) as i64), - 3, - )); - } - - if serial_type.is_beint32() { - if buf.len() < 4 { - crate::bail_corrupt_error!("Invalid BEInt32 value"); + SerialType::I32 => { + if buf.len() < 4 { + crate::bail_corrupt_error!("Invalid BEInt32 value"); + } + Ok(( + RefValue::Integer(i32::from_be_bytes([buf[0], buf[1], buf[2], buf[3]]) as i64), + 4, + )) } - return Ok(( - RefValue::Integer(i32::from_be_bytes([buf[0], buf[1], buf[2], buf[3]]) as i64), - 4, - )); - } - - if serial_type.is_beint48() { - if buf.len() < 6 { - crate::bail_corrupt_error!("Invalid BEInt48 value"); + SerialType::I48 => { + if buf.len() < 6 { + crate::bail_corrupt_error!("Invalid BEInt48 value"); + } + let sign_extension = if buf[0] <= 127 { 0 } else { 255 }; + Ok(( + RefValue::Integer(i64::from_be_bytes([ + sign_extension, + sign_extension, + buf[0], + buf[1], + buf[2], + buf[3], + buf[4], + buf[5], + ])), + 6, + )) } - let sign_extension = if buf[0] <= 127 { 0 } else { 255 }; - return Ok(( - RefValue::Integer(i64::from_be_bytes([ - sign_extension, - sign_extension, - buf[0], - buf[1], - buf[2], - buf[3], - buf[4], - buf[5], - ])), - 6, - )); - } - - if serial_type.is_beint64() { - if buf.len() < 8 { - crate::bail_corrupt_error!("Invalid BEInt64 value"); + SerialType::I64 => { + if buf.len() < 8 { + crate::bail_corrupt_error!("Invalid BEInt64 value"); + } + Ok(( + RefValue::Integer(i64::from_be_bytes([ + buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7], + ])), + 8, + )) } - return Ok(( - RefValue::Integer(i64::from_be_bytes([ - buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7], - ])), - 8, - )); - } - - if serial_type.is_befloat64() { - if buf.len() < 8 { - crate::bail_corrupt_error!("Invalid BEFloat64 value"); + SerialType::F64 => { + if buf.len() < 8 { + crate::bail_corrupt_error!("Invalid BEFloat64 value"); + } + Ok(( + RefValue::Float(f64::from_be_bytes([ + buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7], + ])), + 8, + )) } - return Ok(( - RefValue::Float(f64::from_be_bytes([ - buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7], - ])), - 8, - )); - } - - if serial_type.is_constint0() { - return Ok((RefValue::Integer(0), 0)); - } - - if serial_type.is_constint1() { - return Ok((RefValue::Integer(1), 0)); - } - - if serial_type.is_blob() { - let n = serial_type.blob_size(); - if buf.len() < n { - crate::bail_corrupt_error!("Invalid Blob value"); + SerialType::ConstInt0 => Ok((RefValue::Integer(0), 0)), + SerialType::ConstInt1 => Ok((RefValue::Integer(1), 0)), + SerialType::Blob { content_size } => { + if buf.len() < content_size { + crate::bail_corrupt_error!("Invalid Blob value"); + } + if content_size == 0 { + Ok((RefValue::Blob(RawSlice::new(std::ptr::null(), 0)), 0)) + } else { + let ptr = &buf[0] as *const u8; + let slice = RawSlice::new(ptr, content_size); + Ok((RefValue::Blob(slice), content_size)) + } } - if n == 0 { - return Ok((RefValue::Blob(RawSlice::new(std::ptr::null(), 0)), 0)); + SerialType::Text { content_size } => { + if buf.len() < content_size { + crate::bail_corrupt_error!( + "Invalid String value, length {} < expected length {}", + buf.len(), + content_size + ); + } + let slice = if content_size == 0 { + RawSlice::new(std::ptr::null(), 0) + } else { + let ptr = &buf[0] as *const u8; + RawSlice::new(ptr, content_size) + }; + Ok(( + RefValue::Text(TextRef { + value: slice, + subtype: TextSubtype::Text, + }), + content_size, + )) } - let ptr = &buf[0] as *const u8; - let slice = RawSlice::new(ptr, n); - return Ok((RefValue::Blob(slice), n)); } - - if serial_type.is_string() { - let n = serial_type.string_size(); - if buf.len() < n { - crate::bail_corrupt_error!( - "Invalid String value, length {} < expected length {}", - buf.len(), - n - ); - } - let slice = if n == 0 { - RawSlice::new(std::ptr::null(), 0) - } else { - let ptr = &buf[0] as *const u8; - RawSlice::new(ptr, n) - }; - return Ok(( - RefValue::Text(TextRef { - value: slice, - subtype: TextSubtype::Text, - }), - n, - )); - } - - // This should never happen if validate_serial_type is used correctly - crate::bail_corrupt_error!("Invalid serial type: {}", serial_type) } #[inline(always)] @@ -1676,32 +1563,32 @@ mod tests { use rstest::rstest; #[rstest] - #[case(&[], SERIAL_TYPE_NULL, OwnedValue::Null)] - #[case(&[255], SERIAL_TYPE_INT8, OwnedValue::Integer(-1))] - #[case(&[0x12, 0x34], SERIAL_TYPE_BEINT16, OwnedValue::Integer(0x1234))] - #[case(&[0xFE], SERIAL_TYPE_INT8, OwnedValue::Integer(-2))] - #[case(&[0x12, 0x34, 0x56], SERIAL_TYPE_BEINT24, OwnedValue::Integer(0x123456))] - #[case(&[0x12, 0x34, 0x56, 0x78], SERIAL_TYPE_BEINT32, OwnedValue::Integer(0x12345678))] - #[case(&[0x12, 0x34, 0x56, 0x78, 0x9A, 0xBC], SERIAL_TYPE_BEINT48, OwnedValue::Integer(0x123456789ABC))] - #[case(&[0x12, 0x34, 0x56, 0x78, 0x9A, 0xBC, 0xDE, 0xFF], SERIAL_TYPE_BEINT64, OwnedValue::Integer(0x123456789ABCDEFF))] - #[case(&[0x40, 0x09, 0x21, 0xFB, 0x54, 0x44, 0x2D, 0x18], SERIAL_TYPE_BEFLOAT64, OwnedValue::Float(std::f64::consts::PI))] - #[case(&[1, 2], SERIAL_TYPE_CONSTINT0, OwnedValue::Integer(0))] - #[case(&[65, 66], SERIAL_TYPE_CONSTINT1, OwnedValue::Integer(1))] - #[case(&[1, 2, 3], 18, OwnedValue::Blob(vec![1, 2, 3].into()))] - #[case(&[], 12, OwnedValue::Blob(vec![].into()))] // empty blob - #[case(&[65, 66, 67], 19, OwnedValue::build_text("ABC"))] - #[case(&[0x80], SERIAL_TYPE_INT8, OwnedValue::Integer(-128))] - #[case(&[0x80, 0], SERIAL_TYPE_BEINT16, OwnedValue::Integer(-32768))] - #[case(&[0x80, 0, 0], SERIAL_TYPE_BEINT24, OwnedValue::Integer(-8388608))] - #[case(&[0x80, 0, 0, 0], SERIAL_TYPE_BEINT32, OwnedValue::Integer(-2147483648))] - #[case(&[0x80, 0, 0, 0, 0, 0], SERIAL_TYPE_BEINT48, OwnedValue::Integer(-140737488355328))] - #[case(&[0x80, 0, 0, 0, 0, 0, 0, 0], SERIAL_TYPE_BEINT64, OwnedValue::Integer(-9223372036854775808))] - #[case(&[0x7f], SERIAL_TYPE_INT8, OwnedValue::Integer(127))] - #[case(&[0x7f, 0xff], SERIAL_TYPE_BEINT16, OwnedValue::Integer(32767))] - #[case(&[0x7f, 0xff, 0xff], SERIAL_TYPE_BEINT24, OwnedValue::Integer(8388607))] - #[case(&[0x7f, 0xff, 0xff, 0xff], SERIAL_TYPE_BEINT32, OwnedValue::Integer(2147483647))] - #[case(&[0x7f, 0xff, 0xff, 0xff, 0xff, 0xff], SERIAL_TYPE_BEINT48, OwnedValue::Integer(140737488355327))] - #[case(&[0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff], SERIAL_TYPE_BEINT64, OwnedValue::Integer(9223372036854775807))] + #[case(&[], SerialType::Null, OwnedValue::Null)] + #[case(&[255], SerialType::I8, OwnedValue::Integer(-1))] + #[case(&[0x12, 0x34], SerialType::I16, OwnedValue::Integer(0x1234))] + #[case(&[0xFE], SerialType::I8, OwnedValue::Integer(-2))] + #[case(&[0x12, 0x34, 0x56], SerialType::I24, OwnedValue::Integer(0x123456))] + #[case(&[0x12, 0x34, 0x56, 0x78], SerialType::I32, OwnedValue::Integer(0x12345678))] + #[case(&[0x12, 0x34, 0x56, 0x78, 0x9A, 0xBC], SerialType::I48, OwnedValue::Integer(0x123456789ABC))] + #[case(&[0x12, 0x34, 0x56, 0x78, 0x9A, 0xBC, 0xDE, 0xFF], SerialType::I64, OwnedValue::Integer(0x123456789ABCDEFF))] + #[case(&[0x40, 0x09, 0x21, 0xFB, 0x54, 0x44, 0x2D, 0x18], SerialType::F64, OwnedValue::Float(std::f64::consts::PI))] + #[case(&[1, 2], SerialType::ConstInt0, OwnedValue::Integer(0))] + #[case(&[65, 66], SerialType::ConstInt1, OwnedValue::Integer(1))] + #[case(&[1, 2, 3], SerialType::Blob { content_size: 3 }, OwnedValue::Blob(vec![1, 2, 3].into()))] + #[case(&[], SerialType::Blob { content_size: 0 }, OwnedValue::Blob(vec![].into()))] // empty blob + #[case(&[65, 66, 67], SerialType::Text { content_size: 3 }, OwnedValue::build_text("ABC"))] + #[case(&[0x80], SerialType::I8, OwnedValue::Integer(-128))] + #[case(&[0x80, 0], SerialType::I16, OwnedValue::Integer(-32768))] + #[case(&[0x80, 0, 0], SerialType::I24, OwnedValue::Integer(-8388608))] + #[case(&[0x80, 0, 0, 0], SerialType::I32, OwnedValue::Integer(-2147483648))] + #[case(&[0x80, 0, 0, 0, 0, 0], SerialType::I48, OwnedValue::Integer(-140737488355328))] + #[case(&[0x80, 0, 0, 0, 0, 0, 0, 0], SerialType::I64, OwnedValue::Integer(-9223372036854775808))] + #[case(&[0x7f], SerialType::I8, OwnedValue::Integer(127))] + #[case(&[0x7f, 0xff], SerialType::I16, OwnedValue::Integer(32767))] + #[case(&[0x7f, 0xff, 0xff], SerialType::I24, OwnedValue::Integer(8388607))] + #[case(&[0x7f, 0xff, 0xff, 0xff], SerialType::I32, OwnedValue::Integer(2147483647))] + #[case(&[0x7f, 0xff, 0xff, 0xff, 0xff, 0xff], SerialType::I48, OwnedValue::Integer(140737488355327))] + #[case(&[0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff], SerialType::I64, OwnedValue::Integer(9223372036854775807))] fn test_read_value( #[case] buf: &[u8], #[case] serial_type: SerialType, @@ -1713,46 +1600,47 @@ mod tests { #[test] fn test_serial_type_helpers() { - assert!(SERIAL_TYPE_NULL.is_null()); - assert!(SERIAL_TYPE_INT8.is_int8()); - assert!(SERIAL_TYPE_BEINT16.is_beint16()); - assert!(SERIAL_TYPE_BEINT24.is_beint24()); - assert!(SERIAL_TYPE_BEINT32.is_beint32()); - assert!(SERIAL_TYPE_BEINT48.is_beint48()); - assert!(SERIAL_TYPE_BEINT64.is_beint64()); - assert!(SERIAL_TYPE_BEFLOAT64.is_befloat64()); - assert!(SERIAL_TYPE_CONSTINT0.is_constint0()); - assert!(SERIAL_TYPE_CONSTINT1.is_constint1()); - - assert!(12u64.is_blob()); - assert!(14u64.is_blob()); - assert!(13u64.is_string()); - assert!(15u64.is_string()); - - assert_eq!(12u64.blob_size(), 0); - assert_eq!(14u64.blob_size(), 1); - assert_eq!(16u64.blob_size(), 2); - - assert_eq!(13u64.string_size(), 0); - assert_eq!(15u64.string_size(), 1); - assert_eq!(17u64.string_size(), 2); + assert_eq!( + TryInto::::try_into(12u64).unwrap(), + SerialType::Blob { content_size: 0 } + ); + assert_eq!( + TryInto::::try_into(14u64).unwrap(), + SerialType::Blob { content_size: 1 } + ); + assert_eq!( + TryInto::::try_into(13u64).unwrap(), + SerialType::Text { content_size: 0 } + ); + assert_eq!( + TryInto::::try_into(15u64).unwrap(), + SerialType::Text { content_size: 1 } + ); + assert_eq!( + TryInto::::try_into(16u64).unwrap(), + SerialType::Blob { content_size: 2 } + ); + assert_eq!( + TryInto::::try_into(17u64).unwrap(), + SerialType::Text { content_size: 2 } + ); } #[rstest] - #[case(0, SERIAL_TYPE_NULL)] - #[case(1, SERIAL_TYPE_INT8)] - #[case(2, SERIAL_TYPE_BEINT16)] - #[case(3, SERIAL_TYPE_BEINT24)] - #[case(4, SERIAL_TYPE_BEINT32)] - #[case(5, SERIAL_TYPE_BEINT48)] - #[case(6, SERIAL_TYPE_BEINT64)] - #[case(7, SERIAL_TYPE_BEFLOAT64)] - #[case(8, SERIAL_TYPE_CONSTINT0)] - #[case(9, SERIAL_TYPE_CONSTINT1)] - #[case(12, 12)] // Blob(0) - #[case(13, 13)] // String(0) - #[case(14, 14)] // Blob(1) - #[case(15, 15)] // String(1) + #[case(0, SerialType::Null)] + #[case(1, SerialType::I8)] + #[case(2, SerialType::I16)] + #[case(3, SerialType::I24)] + #[case(4, SerialType::I32)] + #[case(5, SerialType::I48)] + #[case(6, SerialType::I64)] + #[case(7, SerialType::F64)] + #[case(8, SerialType::ConstInt0)] + #[case(9, SerialType::ConstInt1)] + #[case(12, SerialType::Blob { content_size: 0 })] + #[case(13, SerialType::Text { content_size: 0 })] + #[case(14, SerialType::Blob { content_size: 1 })] + #[case(15, SerialType::Text { content_size: 1 })] fn test_validate_serial_type(#[case] input: u64, #[case] expected: SerialType) { let result = validate_serial_type(input).unwrap(); assert_eq!(result, expected); diff --git a/core/types.rs b/core/types.rs index 045f13393..01b902d9f 100644 --- a/core/types.rs +++ b/core/types.rs @@ -787,7 +787,7 @@ impl ImmutableRecord { serials.push((serial_type_buf, n)); let value_size = match serial_type { - SerialType::Null => 0, + SerialType::Null | SerialType::ConstInt0 | SerialType::ConstInt1 => 0, SerialType::I8 => 1, SerialType::I16 => 2, SerialType::I24 => 3, @@ -845,6 +845,7 @@ impl ImmutableRecord { values.push(RefValue::Integer(*i)); let serial_type = SerialType::from(value); match serial_type { + SerialType::ConstInt0 | SerialType::ConstInt1 => {} SerialType::I8 => writer.extend_from_slice(&(*i as i8).to_be_bytes()), SerialType::I16 => writer.extend_from_slice(&(*i as i16).to_be_bytes()), SerialType::I24 => { @@ -853,7 +854,7 @@ impl ImmutableRecord { SerialType::I32 => writer.extend_from_slice(&(*i as i32).to_be_bytes()), SerialType::I48 => writer.extend_from_slice(&i.to_be_bytes()[2..]), // remove 2 most significant bytes SerialType::I64 => writer.extend_from_slice(&i.to_be_bytes()), - _ => unreachable!(), + other => panic!("Serial type is not an integer: {:?}", other), } } OwnedValue::Float(f) => { @@ -1113,7 +1114,7 @@ const I48_HIGH: i64 = 140737488355327; /// Sqlite Serial Types /// https://www.sqlite.org/fileformat.html#record_format #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] -enum SerialType { +pub enum SerialType { Null, I8, I16, @@ -1122,6 +1123,8 @@ enum SerialType { I48, I64, F64, + ConstInt0, + ConstInt1, Text { content_size: usize }, Blob { content_size: usize }, } @@ -1131,6 +1134,8 @@ impl From<&OwnedValue> for SerialType { match value { OwnedValue::Null => SerialType::Null, OwnedValue::Integer(i) => match i { + 0 => SerialType::ConstInt0, + 1 => SerialType::ConstInt1, i if *i >= I8_LOW && *i <= I8_HIGH => SerialType::I8, i if *i >= I16_LOW && *i <= I16_HIGH => SerialType::I16, i if *i >= I24_LOW && *i <= I24_HIGH => SerialType::I24, @@ -1160,12 +1165,46 @@ impl From for u64 { SerialType::I48 => 5, SerialType::I64 => 6, SerialType::F64 => 7, + SerialType::ConstInt0 => 8, + SerialType::ConstInt1 => 9, SerialType::Text { content_size } => (content_size * 2 + 13) as u64, SerialType::Blob { content_size } => (content_size * 2 + 12) as u64, } } } +impl TryFrom for SerialType { + type Error = LimboError; + + fn try_from(serial_type: u64) -> Result { + match serial_type { + 0 => Ok(SerialType::Null), + 1 => Ok(SerialType::I8), + 2 => Ok(SerialType::I16), + 3 => Ok(SerialType::I24), + 4 => Ok(SerialType::I32), + 5 => Ok(SerialType::I48), + 6 => Ok(SerialType::I64), + 7 => Ok(SerialType::F64), + 8 => Ok(SerialType::ConstInt0), + 9 => Ok(SerialType::ConstInt1), + n if n >= 12 => match n % 2 { + 0 => Ok(SerialType::Blob { + content_size: (n as usize - 12) / 2, + }), + 1 => Ok(SerialType::Text { + content_size: (n as usize - 13) / 2, + }), + _ => unreachable!(), + }, + _ => Err(LimboError::Corrupt(format!( + "Invalid serial type: {}", + serial_type + ))), + } + } +} + impl Record { pub fn new(values: Vec) -> Self { Self { values } From 04adf8242a4a2a8de4414ef30e611cf5eb52d01c Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Thu, 24 Apr 2025 16:05:12 +0300 Subject: [PATCH 362/425] faster validate --- core/storage/sqlite3_ondisk.rs | 30 ++++++++++++++++++++++-------- core/types.rs | 7 +++++++ 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index 2e86bbdff..833c928fe 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -985,8 +985,12 @@ fn read_payload(unread: &'static [u8], payload_size: usize) -> (&'static [u8], O } } -pub fn validate_serial_type(value: u64) -> Result { - value.try_into() +#[inline(always)] +pub fn validate_serial_type(value: u64) -> Result<()> { + if !SerialType::u64_is_valid_serial_type(value) { + crate::bail_corrupt_error!("Invalid serial type: {}", value); + } + Ok(()) } pub struct SmallVec { @@ -1081,7 +1085,7 @@ pub fn read_record(payload: &[u8], reuse_immutable: &mut ImmutableRecord) -> Res let mut serial_types = SmallVec::::new(); while header_size > 0 { let (serial_type, nr) = read_varint(&reuse_immutable.get_payload()[pos..])?; - let _ = validate_serial_type(serial_type)?; + validate_serial_type(serial_type)?; serial_types.push(serial_type); pos += nr; assert!(header_size >= nr); @@ -1641,15 +1645,25 @@ mod tests { #[case(13, SerialType::Text { content_size: 0 })] #[case(14, SerialType::Blob { content_size: 1 })] #[case(15, SerialType::Text { content_size: 1 })] - fn test_validate_serial_type(#[case] input: u64, #[case] expected: SerialType) { - let result = validate_serial_type(input).unwrap(); + fn test_parse_serial_type(#[case] input: u64, #[case] expected: SerialType) { + let result = SerialType::try_from(input).unwrap(); assert_eq!(result, expected); } #[test] - fn test_invalid_serial_type() { - let result = validate_serial_type(10); - assert!(result.is_err()); + fn test_validate_serial_type() { + for i in 0..=9 { + let result = validate_serial_type(i); + assert!(result.is_ok()); + } + for i in 10..=11 { + let result = validate_serial_type(i); + assert!(result.is_err()); + } + for i in 12..=1000 { + let result = validate_serial_type(i); + assert!(result.is_ok()); + } } #[test] diff --git a/core/types.rs b/core/types.rs index 01b902d9f..4173324f8 100644 --- a/core/types.rs +++ b/core/types.rs @@ -1129,6 +1129,13 @@ pub enum SerialType { Blob { content_size: usize }, } +impl SerialType { + #[inline(always)] + pub fn u64_is_valid_serial_type(n: u64) -> bool { + n != 10 && n != 11 + } +} + impl From<&OwnedValue> for SerialType { fn from(value: &OwnedValue) -> Self { match value { From b7970a286d77808bd01ce1c4e40ab4d103b82990 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Tue, 22 Apr 2025 11:50:51 +0200 Subject: [PATCH 363/425] implement IdxDelete clippy revert op_idx_ge changes fmt fmt again rever op_idx_gt changes --- core/translate/delete.rs | 6 + core/translate/emitter.rs | 53 +++++++- core/translate/plan.rs | 2 + core/vdbe/execute.rs | 41 +++++- core/vdbe/explain.rs | 13 ++ core/vdbe/insn.rs | 7 + .../query_processing/test_write_path.rs | 125 ++++++++++++++++++ 7 files changed, 239 insertions(+), 8 deletions(-) diff --git a/core/translate/delete.rs b/core/translate/delete.rs index fb580b8e8..5cb38cf42 100644 --- a/core/translate/delete.rs +++ b/core/translate/delete.rs @@ -50,6 +50,11 @@ pub fn prepare_delete_plan( crate::bail_corrupt_error!("Table is neither a virtual table nor a btree table"); }; let name = tbl_name.name.0.as_str().to_string(); + let indexes = schema + .get_indices(table.get_name()) + .iter() + .cloned() + .collect(); let mut table_references = vec![TableReference { table, identifier: name, @@ -82,6 +87,7 @@ pub fn prepare_delete_plan( limit: resolved_limit, offset: resolved_offset, contains_constant_false_condition: false, + indexes, }; Ok(Plan::Delete(plan)) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 5b12e4375..d1e2fdce9 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -2,13 +2,16 @@ // It handles translating high-level SQL operations into low-level bytecode that can be executed by the virtual machine. use std::rc::Rc; +use std::sync::Arc; use limbo_sqlite3_parser::ast::{self}; use crate::function::Func; +use crate::schema::Index; use crate::translate::plan::{DeletePlan, Plan, Search}; use crate::util::exprs_are_equivalent; use crate::vdbe::builder::ProgramBuilder; +use crate::vdbe::insn::RegisterOrLiteral; use crate::vdbe::{insn::Insn, BranchOffset}; use crate::{Result, SymbolTable}; @@ -375,7 +378,13 @@ fn emit_program_for_delete( &plan.table_references, &plan.where_clause, )?; - emit_delete_insns(program, &mut t_ctx, &plan.table_references, &plan.limit)?; + emit_delete_insns( + program, + &mut t_ctx, + &plan.table_references, + &plan.indexes, + &plan.limit, + )?; // Clean up and close the main execution loop close_loop(program, &mut t_ctx, &plan.table_references)?; @@ -393,6 +402,7 @@ fn emit_delete_insns( program: &mut ProgramBuilder, t_ctx: &mut TranslateCtx, table_references: &[TableReference], + index_references: &[Arc], limit: &Option, ) -> Result<()> { let table_reference = table_references.first().unwrap(); @@ -408,11 +418,12 @@ fn emit_delete_insns( }, _ => return Ok(()), }; + let main_table_cursor_id = program.resolve_cursor_id(table_reference.table.get_name()); // Emit the instructions to delete the row let key_reg = program.alloc_register(); program.emit_insn(Insn::RowId { - cursor_id, + cursor_id: main_table_cursor_id, dest: key_reg, }); @@ -433,7 +444,43 @@ fn emit_delete_insns( conflict_action, }); } else { - program.emit_insn(Insn::Delete { cursor_id }); + for index in index_references { + let index_cursor_id = program.alloc_cursor_id( + Some(index.name.clone()), + crate::vdbe::builder::CursorType::BTreeIndex(index.clone()), + ); + + program.emit_insn(Insn::OpenWrite { + cursor_id: index_cursor_id, + root_page: RegisterOrLiteral::Literal(index.root_page), + }); + let num_regs = index.columns.len() + 1; + let start_reg = program.alloc_registers(num_regs); + // Emit columns that are part of the index + index + .columns + .iter() + .enumerate() + .for_each(|(reg_offset, column_index)| { + program.emit_insn(Insn::Column { + cursor_id: main_table_cursor_id, + column: column_index.pos_in_table, + dest: start_reg + reg_offset, + }); + }); + program.emit_insn(Insn::RowId { + cursor_id: main_table_cursor_id, + dest: start_reg + num_regs - 1, + }); + program.emit_insn(Insn::IdxDelete { + start_reg, + num_regs, + cursor_id: index_cursor_id, + }); + } + program.emit_insn(Insn::Delete { + cursor_id: main_table_cursor_id, + }); } if let Some(limit) = limit { let limit_reg = program.alloc_register(); diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 48ce4c854..d25e1837c 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -297,6 +297,8 @@ pub struct DeletePlan { pub offset: Option, /// query contains a constant condition that is always false pub contains_constant_false_condition: bool, + /// Indexes that must be updated by the delete operation. + pub indexes: Vec>, } #[derive(Debug, Clone)] diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index de871f54c..1177af3b7 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -1810,11 +1810,17 @@ pub fn op_row_id( let rowid = { let mut index_cursor = state.get_cursor(index_cursor_id); let index_cursor = index_cursor.as_btree_mut(); - index_cursor.rowid()? + let record = index_cursor.record(); + let record = record.as_ref().unwrap(); + let rowid = record.get_values().last().unwrap(); + match rowid { + RefValue::Integer(rowid) => *rowid as u64, + _ => todo!(), + } }; let mut table_cursor = state.get_cursor(table_cursor_id); let table_cursor = table_cursor.as_btree_mut(); - match table_cursor.seek(SeekKey::TableRowId(rowid.unwrap()), SeekOp::EQ)? { + match table_cursor.seek(SeekKey::TableRowId(rowid), SeekOp::EQ)? { CursorResult::Ok(_) => None, CursorResult::IO => Some((index_cursor_id, table_cursor_id)), } @@ -2069,7 +2075,6 @@ pub fn op_idx_ge( let idx_values = idx_record.get_values(); let idx_values = &idx_values[..record_from_regs.len()]; let record_values = record_from_regs.get_values(); - let record_values = &record_values[..idx_values.len()]; let ord = compare_immutable(&idx_values, &record_values, cursor.index_key_sort_order); if ord.is_ge() { target_pc.to_offset_int() @@ -3759,6 +3764,34 @@ pub fn op_delete( Ok(InsnFunctionStepResult::Step) } +pub fn op_idx_delete( + program: &Program, + state: &mut ProgramState, + insn: &Insn, + pager: &Rc, + mv_store: Option<&Rc>, +) -> Result { + let Insn::IdxDelete { + cursor_id, + start_reg, + num_regs, + } = insn + else { + unreachable!("unexpected Insn {:?}", insn) + }; + let record = make_record(&state.registers, start_reg, num_regs); + { + let mut cursor = state.get_cursor(*cursor_id); + let cursor = cursor.as_btree_mut(); + return_if_io!(cursor.seek(SeekKey::IndexKey(&record), SeekOp::EQ)); + return_if_io!(cursor.delete()); + } + let prev_changes = program.n_change.get(); + program.n_change.set(prev_changes + 1); + state.pc += 1; + Ok(InsnFunctionStepResult::Step) +} + pub fn op_idx_insert( program: &Program, state: &mut ProgramState, @@ -3766,7 +3799,6 @@ pub fn op_idx_insert( pager: &Rc, mv_store: Option<&Rc>, ) -> Result { - dbg!("op_idx_insert_"); if let Insn::IdxInsert { cursor_id, record_reg, @@ -3807,7 +3839,6 @@ pub fn op_idx_insert( } }; - dbg!(moved_before); // Start insertion of row. This might trigger a balance procedure which will take care of moving to different pages, // therefore, we don't want to seek again if that happens, meaning we don't want to return on io without moving to the following opcode // because it could trigger a movement to child page after a balance root which will leave the current page as the root page. diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index 96afc5d17..a6db7be05 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -1037,6 +1037,19 @@ pub fn insn_to_str( 0, "".to_string(), ), + Insn::IdxDelete { + cursor_id, + start_reg, + num_regs, + } => ( + "IdxDelete", + *cursor_id as i32, + *start_reg as i32, + *num_regs as i32, + OwnedValue::build_text(""), + 0, + "".to_string(), + ), Insn::NewRowid { cursor, rowid_reg, diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 56f44bd2b..295b41a2d 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -650,6 +650,12 @@ pub enum Insn { cursor_id: CursorID, }, + IdxDelete { + start_reg: usize, + num_regs: usize, + cursor_id: CursorID, + }, + NewRowid { cursor: CursorID, // P1 rowid_reg: usize, // P2 Destination register to store the new rowid @@ -948,6 +954,7 @@ impl Insn { Insn::Once { .. } => execute::op_once, Insn::NotFound { .. } => execute::op_not_found, Insn::Affinity { .. } => execute::op_affinity, + Insn::IdxDelete { .. } => execute::op_idx_delete, } } } diff --git a/tests/integration/query_processing/test_write_path.rs b/tests/integration/query_processing/test_write_path.rs index e948ed5d1..9c6107d58 100644 --- a/tests/integration/query_processing/test_write_path.rs +++ b/tests/integration/query_processing/test_write_path.rs @@ -461,3 +461,128 @@ fn test_insert_after_big_blob() -> anyhow::Result<()> { Ok(()) } + +#[test_log::test] +#[ignore = "this takes too long :)"] +fn test_write_delete_with_index() -> anyhow::Result<()> { + let _ = env_logger::try_init(); + + maybe_setup_tracing(); + + let tmp_db = TempDatabase::new_with_rusqlite("CREATE TABLE test (x PRIMARY KEY);"); + let conn = tmp_db.connect_limbo(); + + let list_query = "SELECT * FROM test"; + let max_iterations = 1000; + for i in 0..max_iterations { + println!("inserting {} ", i); + if (i % 100) == 0 { + let progress = (i as f64 / max_iterations as f64) * 100.0; + println!("progress {:.1}%", progress); + } + let insert_query = format!("INSERT INTO test VALUES ({})", i); + match conn.query(insert_query) { + Ok(Some(ref mut rows)) => loop { + match rows.step()? { + StepResult::IO => { + tmp_db.io.run_once()?; + } + StepResult::Done => break, + _ => unreachable!(), + } + }, + Ok(None) => {} + Err(err) => { + eprintln!("{}", err); + } + }; + } + for i in 0..max_iterations { + println!("deleting {} ", i); + if (i % 100) == 0 { + let progress = (i as f64 / max_iterations as f64) * 100.0; + println!("progress {:.1}%", progress); + } + let delete_query = format!("delete from test where x={}", i); + match conn.query(delete_query) { + Ok(Some(ref mut rows)) => loop { + match rows.step()? { + StepResult::IO => { + tmp_db.io.run_once()?; + } + StepResult::Done => break, + _ => unreachable!(), + } + }, + Ok(None) => {} + Err(err) => { + eprintln!("{}", err); + } + }; + println!("listing after deleting {} ", i); + let mut current_read_index = i + 1; + match conn.query(list_query) { + Ok(Some(ref mut rows)) => loop { + match rows.step()? { + StepResult::Row => { + let row = rows.row().unwrap(); + let first_value = row.get::<&OwnedValue>(0).expect("missing id"); + let id = match first_value { + limbo_core::OwnedValue::Integer(i) => *i as i32, + limbo_core::OwnedValue::Float(f) => *f as i32, + _ => unreachable!(), + }; + assert_eq!(current_read_index, id); + current_read_index += 1; + } + StepResult::IO => { + tmp_db.io.run_once()?; + } + StepResult::Interrupt => break, + StepResult::Done => break, + StepResult::Busy => { + panic!("Database is busy"); + } + } + }, + Ok(None) => {} + Err(err) => { + eprintln!("{}", err); + } + } + for i in i + 1..max_iterations { + // now test with seek + match conn.query(format!("select * from test where x = {}", i)) { + Ok(Some(ref mut rows)) => loop { + match rows.step()? { + StepResult::Row => { + let row = rows.row().unwrap(); + let first_value = row.get::<&OwnedValue>(0).expect("missing id"); + let id = match first_value { + limbo_core::OwnedValue::Integer(i) => *i as i32, + limbo_core::OwnedValue::Float(f) => *f as i32, + _ => unreachable!(), + }; + assert_eq!(i, id); + break; + } + StepResult::IO => { + tmp_db.io.run_once()?; + } + StepResult::Interrupt => break, + StepResult::Done => break, + StepResult::Busy => { + panic!("Database is busy"); + } + } + }, + Ok(None) => {} + Err(err) => { + eprintln!("{}", err); + } + } + } + } + + Ok(()) +} From 7921d7c2e00a69dce10c49ab8a8d152f38a288f1 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Thu, 24 Apr 2025 17:28:31 +0300 Subject: [PATCH 364/425] types: refactor serialtype again to make it faster --- core/storage/sqlite3_ondisk.rs | 124 ++++++++-------- core/types.rs | 261 ++++++++++++++++++++------------- 2 files changed, 223 insertions(+), 162 deletions(-) diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index 833c928fe..a2d9d6ece 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -47,7 +47,9 @@ use crate::io::{Buffer, Completion, ReadCompletion, SyncCompletion, WriteComplet use crate::storage::buffer_pool::BufferPool; use crate::storage::database::DatabaseStorage; use crate::storage::pager::Pager; -use crate::types::{ImmutableRecord, RawSlice, RefValue, SerialType, TextRef, TextSubtype}; +use crate::types::{ + ImmutableRecord, RawSlice, RefValue, SerialType, SerialTypeKind, TextRef, TextSubtype, +}; use crate::{File, Result}; use std::cell::RefCell; use std::mem::MaybeUninit; @@ -1117,16 +1119,16 @@ pub fn read_record(payload: &[u8], reuse_immutable: &mut ImmutableRecord) -> Res /// always. #[inline(always)] pub fn read_value(buf: &[u8], serial_type: SerialType) -> Result<(RefValue, usize)> { - match serial_type { - SerialType::Null => Ok((RefValue::Null, 0)), - SerialType::I8 => { + match serial_type.kind() { + SerialTypeKind::Null => Ok((RefValue::Null, 0)), + SerialTypeKind::I8 => { if buf.is_empty() { crate::bail_corrupt_error!("Invalid UInt8 value"); } let val = buf[0] as i8; Ok((RefValue::Integer(val as i64), 1)) } - SerialType::I16 => { + SerialTypeKind::I16 => { if buf.len() < 2 { crate::bail_corrupt_error!("Invalid BEInt16 value"); } @@ -1135,7 +1137,7 @@ pub fn read_value(buf: &[u8], serial_type: SerialType) -> Result<(RefValue, usiz 2, )) } - SerialType::I24 => { + SerialTypeKind::I24 => { if buf.len() < 3 { crate::bail_corrupt_error!("Invalid BEInt24 value"); } @@ -1147,7 +1149,7 @@ pub fn read_value(buf: &[u8], serial_type: SerialType) -> Result<(RefValue, usiz 3, )) } - SerialType::I32 => { + SerialTypeKind::I32 => { if buf.len() < 4 { crate::bail_corrupt_error!("Invalid BEInt32 value"); } @@ -1156,7 +1158,7 @@ pub fn read_value(buf: &[u8], serial_type: SerialType) -> Result<(RefValue, usiz 4, )) } - SerialType::I48 => { + SerialTypeKind::I48 => { if buf.len() < 6 { crate::bail_corrupt_error!("Invalid BEInt48 value"); } @@ -1175,7 +1177,7 @@ pub fn read_value(buf: &[u8], serial_type: SerialType) -> Result<(RefValue, usiz 6, )) } - SerialType::I64 => { + SerialTypeKind::I64 => { if buf.len() < 8 { crate::bail_corrupt_error!("Invalid BEInt64 value"); } @@ -1186,7 +1188,7 @@ pub fn read_value(buf: &[u8], serial_type: SerialType) -> Result<(RefValue, usiz 8, )) } - SerialType::F64 => { + SerialTypeKind::F64 => { if buf.len() < 8 { crate::bail_corrupt_error!("Invalid BEFloat64 value"); } @@ -1197,9 +1199,10 @@ pub fn read_value(buf: &[u8], serial_type: SerialType) -> Result<(RefValue, usiz 8, )) } - SerialType::ConstInt0 => Ok((RefValue::Integer(0), 0)), - SerialType::ConstInt1 => Ok((RefValue::Integer(1), 0)), - SerialType::Blob { content_size } => { + SerialTypeKind::ConstInt0 => Ok((RefValue::Integer(0), 0)), + SerialTypeKind::ConstInt1 => Ok((RefValue::Integer(1), 0)), + SerialTypeKind::Blob => { + let content_size = serial_type.size(); if buf.len() < content_size { crate::bail_corrupt_error!("Invalid Blob value"); } @@ -1211,7 +1214,8 @@ pub fn read_value(buf: &[u8], serial_type: SerialType) -> Result<(RefValue, usiz Ok((RefValue::Blob(slice), content_size)) } } - SerialType::Text { content_size } => { + SerialTypeKind::Text => { + let content_size = serial_type.size(); if buf.len() < content_size { crate::bail_corrupt_error!( "Invalid String value, length {} < expected length {}", @@ -1567,32 +1571,32 @@ mod tests { use rstest::rstest; #[rstest] - #[case(&[], SerialType::Null, OwnedValue::Null)] - #[case(&[255], SerialType::I8, OwnedValue::Integer(-1))] - #[case(&[0x12, 0x34], SerialType::I16, OwnedValue::Integer(0x1234))] - #[case(&[0xFE], SerialType::I8, OwnedValue::Integer(-2))] - #[case(&[0x12, 0x34, 0x56], SerialType::I24, OwnedValue::Integer(0x123456))] - #[case(&[0x12, 0x34, 0x56, 0x78], SerialType::I32, OwnedValue::Integer(0x12345678))] - #[case(&[0x12, 0x34, 0x56, 0x78, 0x9A, 0xBC], SerialType::I48, OwnedValue::Integer(0x123456789ABC))] - #[case(&[0x12, 0x34, 0x56, 0x78, 0x9A, 0xBC, 0xDE, 0xFF], SerialType::I64, OwnedValue::Integer(0x123456789ABCDEFF))] - #[case(&[0x40, 0x09, 0x21, 0xFB, 0x54, 0x44, 0x2D, 0x18], SerialType::F64, OwnedValue::Float(std::f64::consts::PI))] - #[case(&[1, 2], SerialType::ConstInt0, OwnedValue::Integer(0))] - #[case(&[65, 66], SerialType::ConstInt1, OwnedValue::Integer(1))] - #[case(&[1, 2, 3], SerialType::Blob { content_size: 3 }, OwnedValue::Blob(vec![1, 2, 3].into()))] - #[case(&[], SerialType::Blob { content_size: 0 }, OwnedValue::Blob(vec![].into()))] // empty blob - #[case(&[65, 66, 67], SerialType::Text { content_size: 3 }, OwnedValue::build_text("ABC"))] - #[case(&[0x80], SerialType::I8, OwnedValue::Integer(-128))] - #[case(&[0x80, 0], SerialType::I16, OwnedValue::Integer(-32768))] - #[case(&[0x80, 0, 0], SerialType::I24, OwnedValue::Integer(-8388608))] - #[case(&[0x80, 0, 0, 0], SerialType::I32, OwnedValue::Integer(-2147483648))] - #[case(&[0x80, 0, 0, 0, 0, 0], SerialType::I48, OwnedValue::Integer(-140737488355328))] - #[case(&[0x80, 0, 0, 0, 0, 0, 0, 0], SerialType::I64, OwnedValue::Integer(-9223372036854775808))] - #[case(&[0x7f], SerialType::I8, OwnedValue::Integer(127))] - #[case(&[0x7f, 0xff], SerialType::I16, OwnedValue::Integer(32767))] - #[case(&[0x7f, 0xff, 0xff], SerialType::I24, OwnedValue::Integer(8388607))] - #[case(&[0x7f, 0xff, 0xff, 0xff], SerialType::I32, OwnedValue::Integer(2147483647))] - #[case(&[0x7f, 0xff, 0xff, 0xff, 0xff, 0xff], SerialType::I48, OwnedValue::Integer(140737488355327))] - #[case(&[0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff], SerialType::I64, OwnedValue::Integer(9223372036854775807))] + #[case(&[], SerialType::null(), OwnedValue::Null)] + #[case(&[255], SerialType::i8(), OwnedValue::Integer(-1))] + #[case(&[0x12, 0x34], SerialType::i16(), OwnedValue::Integer(0x1234))] + #[case(&[0xFE], SerialType::i8(), OwnedValue::Integer(-2))] + #[case(&[0x12, 0x34, 0x56], SerialType::i24(), OwnedValue::Integer(0x123456))] + #[case(&[0x12, 0x34, 0x56, 0x78], SerialType::i32(), OwnedValue::Integer(0x12345678))] + #[case(&[0x12, 0x34, 0x56, 0x78, 0x9A, 0xBC], SerialType::i48(), OwnedValue::Integer(0x123456789ABC))] + #[case(&[0x12, 0x34, 0x56, 0x78, 0x9A, 0xBC, 0xDE, 0xFF], SerialType::i64(), OwnedValue::Integer(0x123456789ABCDEFF))] + #[case(&[0x40, 0x09, 0x21, 0xFB, 0x54, 0x44, 0x2D, 0x18], SerialType::f64(), OwnedValue::Float(std::f64::consts::PI))] + #[case(&[1, 2], SerialType::const_int0(), OwnedValue::Integer(0))] + #[case(&[65, 66], SerialType::const_int1(), OwnedValue::Integer(1))] + #[case(&[1, 2, 3], SerialType::blob(3), OwnedValue::Blob(vec![1, 2, 3].into()))] + #[case(&[], SerialType::blob(0), OwnedValue::Blob(vec![].into()))] // empty blob + #[case(&[65, 66, 67], SerialType::text(3), OwnedValue::build_text("ABC"))] + #[case(&[0x80], SerialType::i8(), OwnedValue::Integer(-128))] + #[case(&[0x80, 0], SerialType::i16(), OwnedValue::Integer(-32768))] + #[case(&[0x80, 0, 0], SerialType::i24(), OwnedValue::Integer(-8388608))] + #[case(&[0x80, 0, 0, 0], SerialType::i32(), OwnedValue::Integer(-2147483648))] + #[case(&[0x80, 0, 0, 0, 0, 0], SerialType::i48(), OwnedValue::Integer(-140737488355328))] + #[case(&[0x80, 0, 0, 0, 0, 0, 0, 0], SerialType::i64(), OwnedValue::Integer(-9223372036854775808))] + #[case(&[0x7f], SerialType::i8(), OwnedValue::Integer(127))] + #[case(&[0x7f, 0xff], SerialType::i16(), OwnedValue::Integer(32767))] + #[case(&[0x7f, 0xff, 0xff], SerialType::i24(), OwnedValue::Integer(8388607))] + #[case(&[0x7f, 0xff, 0xff, 0xff], SerialType::i32(), OwnedValue::Integer(2147483647))] + #[case(&[0x7f, 0xff, 0xff, 0xff, 0xff, 0xff], SerialType::i48(), OwnedValue::Integer(140737488355327))] + #[case(&[0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff], SerialType::i64(), OwnedValue::Integer(9223372036854775807))] fn test_read_value( #[case] buf: &[u8], #[case] serial_type: SerialType, @@ -1606,45 +1610,45 @@ mod tests { fn test_serial_type_helpers() { assert_eq!( TryInto::::try_into(12u64).unwrap(), - SerialType::Blob { content_size: 0 } + SerialType::blob(0) ); assert_eq!( TryInto::::try_into(14u64).unwrap(), - SerialType::Blob { content_size: 1 } + SerialType::blob(1) ); assert_eq!( TryInto::::try_into(13u64).unwrap(), - SerialType::Text { content_size: 0 } + SerialType::text(0) ); assert_eq!( TryInto::::try_into(15u64).unwrap(), - SerialType::Text { content_size: 1 } + SerialType::text(1) ); assert_eq!( TryInto::::try_into(16u64).unwrap(), - SerialType::Blob { content_size: 2 } + SerialType::blob(2) ); assert_eq!( TryInto::::try_into(17u64).unwrap(), - SerialType::Text { content_size: 2 } + SerialType::text(2) ); } #[rstest] - #[case(0, SerialType::Null)] - #[case(1, SerialType::I8)] - #[case(2, SerialType::I16)] - #[case(3, SerialType::I24)] - #[case(4, SerialType::I32)] - #[case(5, SerialType::I48)] - #[case(6, SerialType::I64)] - #[case(7, SerialType::F64)] - #[case(8, SerialType::ConstInt0)] - #[case(9, SerialType::ConstInt1)] - #[case(12, SerialType::Blob { content_size: 0 })] - #[case(13, SerialType::Text { content_size: 0 })] - #[case(14, SerialType::Blob { content_size: 1 })] - #[case(15, SerialType::Text { content_size: 1 })] + #[case(0, SerialType::null())] + #[case(1, SerialType::i8())] + #[case(2, SerialType::i16())] + #[case(3, SerialType::i24())] + #[case(4, SerialType::i32())] + #[case(5, SerialType::i48())] + #[case(6, SerialType::i64())] + #[case(7, SerialType::f64())] + #[case(8, SerialType::const_int0())] + #[case(9, SerialType::const_int1())] + #[case(12, SerialType::blob(0))] + #[case(13, SerialType::text(0))] + #[case(14, SerialType::blob(1))] + #[case(15, SerialType::text(1))] fn test_parse_serial_type(#[case] input: u64, #[case] expected: SerialType) { let result = SerialType::try_from(input).unwrap(); assert_eq!(result, expected); diff --git a/core/types.rs b/core/types.rs index 4173324f8..b2a7a053b 100644 --- a/core/types.rs +++ b/core/types.rs @@ -171,13 +171,13 @@ impl OwnedValue { OwnedValue::Null => {} OwnedValue::Integer(i) => { let serial_type = SerialType::from(self); - match serial_type { - SerialType::I8 => out.extend_from_slice(&(*i as i8).to_be_bytes()), - SerialType::I16 => out.extend_from_slice(&(*i as i16).to_be_bytes()), - SerialType::I24 => out.extend_from_slice(&(*i as i32).to_be_bytes()[1..]), // remove most significant byte - SerialType::I32 => out.extend_from_slice(&(*i as i32).to_be_bytes()), - SerialType::I48 => out.extend_from_slice(&i.to_be_bytes()[2..]), // remove 2 most significant bytes - SerialType::I64 => out.extend_from_slice(&i.to_be_bytes()), + match serial_type.kind() { + SerialTypeKind::I8 => out.extend_from_slice(&(*i as i8).to_be_bytes()), + SerialTypeKind::I16 => out.extend_from_slice(&(*i as i16).to_be_bytes()), + SerialTypeKind::I24 => out.extend_from_slice(&(*i as i32).to_be_bytes()[1..]), // remove most significant byte + SerialTypeKind::I32 => out.extend_from_slice(&(*i as i32).to_be_bytes()), + SerialTypeKind::I48 => out.extend_from_slice(&i.to_be_bytes()[2..]), // remove 2 most significant bytes + SerialTypeKind::I64 => out.extend_from_slice(&i.to_be_bytes()), _ => unreachable!(), } } @@ -786,18 +786,7 @@ impl ImmutableRecord { let n = write_varint(&mut serial_type_buf[0..], serial_type.into()); serials.push((serial_type_buf, n)); - let value_size = match serial_type { - SerialType::Null | SerialType::ConstInt0 | SerialType::ConstInt1 => 0, - SerialType::I8 => 1, - SerialType::I16 => 2, - SerialType::I24 => 3, - SerialType::I32 => 4, - SerialType::I48 => 6, - SerialType::I64 => 8, - SerialType::F64 => 8, - SerialType::Text { content_size } => content_size, - SerialType::Blob { content_size } => content_size, - }; + let value_size = serial_type.size(); size_header += n; size_values += value_size; @@ -844,16 +833,16 @@ impl ImmutableRecord { OwnedValue::Integer(i) => { values.push(RefValue::Integer(*i)); let serial_type = SerialType::from(value); - match serial_type { - SerialType::ConstInt0 | SerialType::ConstInt1 => {} - SerialType::I8 => writer.extend_from_slice(&(*i as i8).to_be_bytes()), - SerialType::I16 => writer.extend_from_slice(&(*i as i16).to_be_bytes()), - SerialType::I24 => { + match serial_type.kind() { + SerialTypeKind::ConstInt0 | SerialTypeKind::ConstInt1 => {} + SerialTypeKind::I8 => writer.extend_from_slice(&(*i as i8).to_be_bytes()), + SerialTypeKind::I16 => writer.extend_from_slice(&(*i as i16).to_be_bytes()), + SerialTypeKind::I24 => { writer.extend_from_slice(&(*i as i32).to_be_bytes()[1..]) } // remove most significant byte - SerialType::I32 => writer.extend_from_slice(&(*i as i32).to_be_bytes()), - SerialType::I48 => writer.extend_from_slice(&i.to_be_bytes()[2..]), // remove 2 most significant bytes - SerialType::I64 => writer.extend_from_slice(&i.to_be_bytes()), + SerialTypeKind::I32 => writer.extend_from_slice(&(*i as i32).to_be_bytes()), + SerialTypeKind::I48 => writer.extend_from_slice(&i.to_be_bytes()[2..]), // remove 2 most significant bytes + SerialTypeKind::I64 => writer.extend_from_slice(&i.to_be_bytes()), other => panic!("Serial type is not an integer: {:?}", other), } } @@ -1114,7 +1103,11 @@ const I48_HIGH: i64 = 140737488355327; /// Sqlite Serial Types /// https://www.sqlite.org/fileformat.html#record_format #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] -pub enum SerialType { +#[repr(transparent)] +pub struct SerialType(u64); + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub enum SerialTypeKind { Null, I8, I16, @@ -1125,8 +1118,8 @@ pub enum SerialType { F64, ConstInt0, ConstInt1, - Text { content_size: usize }, - Blob { content_size: usize }, + Text, + Blob, } impl SerialType { @@ -1134,81 +1127,143 @@ impl SerialType { pub fn u64_is_valid_serial_type(n: u64) -> bool { n != 10 && n != 11 } + + const NULL: Self = Self(0); + const I8: Self = Self(1); + const I16: Self = Self(2); + const I24: Self = Self(3); + const I32: Self = Self(4); + const I48: Self = Self(5); + const I64: Self = Self(6); + const F64: Self = Self(7); + const CONST_INT0: Self = Self(8); + const CONST_INT1: Self = Self(9); + + pub fn null() -> Self { + Self::NULL + } + + pub fn i8() -> Self { + Self::I8 + } + + pub fn i16() -> Self { + Self::I16 + } + + pub fn i24() -> Self { + Self::I24 + } + + pub fn i32() -> Self { + Self::I32 + } + + pub fn i48() -> Self { + Self::I48 + } + + pub fn i64() -> Self { + Self::I64 + } + + pub fn f64() -> Self { + Self::F64 + } + + pub fn const_int0() -> Self { + Self::CONST_INT0 + } + + pub fn const_int1() -> Self { + Self::CONST_INT1 + } + + pub fn blob(size: u64) -> Self { + Self(12 + size * 2) + } + + pub fn text(size: u64) -> Self { + Self(13 + size * 2) + } + + pub fn kind(&self) -> SerialTypeKind { + match self.0 { + 0 => SerialTypeKind::Null, + 1 => SerialTypeKind::I8, + 2 => SerialTypeKind::I16, + 3 => SerialTypeKind::I24, + 4 => SerialTypeKind::I32, + 5 => SerialTypeKind::I48, + 6 => SerialTypeKind::I64, + 7 => SerialTypeKind::F64, + 8 => SerialTypeKind::ConstInt0, + 9 => SerialTypeKind::ConstInt1, + n if n >= 12 => match n % 2 { + 0 => SerialTypeKind::Blob, + 1 => SerialTypeKind::Text, + _ => unreachable!(), + }, + _ => unreachable!(), + } + } + + pub fn size(&self) -> usize { + match self.kind() { + SerialTypeKind::Null => 0, + SerialTypeKind::I8 => 1, + SerialTypeKind::I16 => 2, + SerialTypeKind::I24 => 3, + SerialTypeKind::I32 => 4, + SerialTypeKind::I48 => 6, + SerialTypeKind::I64 => 8, + SerialTypeKind::F64 => 8, + SerialTypeKind::ConstInt0 => 0, + SerialTypeKind::ConstInt1 => 0, + SerialTypeKind::Text => (self.0 as usize - 13) / 2, + SerialTypeKind::Blob => (self.0 as usize - 12) / 2, + } + } } impl From<&OwnedValue> for SerialType { fn from(value: &OwnedValue) -> Self { match value { - OwnedValue::Null => SerialType::Null, + OwnedValue::Null => SerialType::null(), OwnedValue::Integer(i) => match i { - 0 => SerialType::ConstInt0, - 1 => SerialType::ConstInt1, - i if *i >= I8_LOW && *i <= I8_HIGH => SerialType::I8, - i if *i >= I16_LOW && *i <= I16_HIGH => SerialType::I16, - i if *i >= I24_LOW && *i <= I24_HIGH => SerialType::I24, - i if *i >= I32_LOW && *i <= I32_HIGH => SerialType::I32, - i if *i >= I48_LOW && *i <= I48_HIGH => SerialType::I48, - _ => SerialType::I64, - }, - OwnedValue::Float(_) => SerialType::F64, - OwnedValue::Text(t) => SerialType::Text { - content_size: t.value.len(), - }, - OwnedValue::Blob(b) => SerialType::Blob { - content_size: b.len(), + 0 => SerialType::const_int0(), + 1 => SerialType::const_int1(), + i if *i >= I8_LOW && *i <= I8_HIGH => SerialType::i8(), + i if *i >= I16_LOW && *i <= I16_HIGH => SerialType::i16(), + i if *i >= I24_LOW && *i <= I24_HIGH => SerialType::i24(), + i if *i >= I32_LOW && *i <= I32_HIGH => SerialType::i32(), + i if *i >= I48_LOW && *i <= I48_HIGH => SerialType::i48(), + _ => SerialType::i64(), }, + OwnedValue::Float(_) => SerialType::f64(), + OwnedValue::Text(t) => SerialType::text(t.value.len() as u64), + OwnedValue::Blob(b) => SerialType::blob(b.len() as u64), } } } impl From for u64 { fn from(serial_type: SerialType) -> Self { - match serial_type { - SerialType::Null => 0, - SerialType::I8 => 1, - SerialType::I16 => 2, - SerialType::I24 => 3, - SerialType::I32 => 4, - SerialType::I48 => 5, - SerialType::I64 => 6, - SerialType::F64 => 7, - SerialType::ConstInt0 => 8, - SerialType::ConstInt1 => 9, - SerialType::Text { content_size } => (content_size * 2 + 13) as u64, - SerialType::Blob { content_size } => (content_size * 2 + 12) as u64, - } + serial_type.0 } } impl TryFrom for SerialType { type Error = LimboError; - fn try_from(serial_type: u64) -> Result { - match serial_type { - 0 => Ok(SerialType::Null), - 1 => Ok(SerialType::I8), - 2 => Ok(SerialType::I16), - 3 => Ok(SerialType::I24), - 4 => Ok(SerialType::I32), - 5 => Ok(SerialType::I48), - 6 => Ok(SerialType::I64), - 7 => Ok(SerialType::F64), - 8 => Ok(SerialType::ConstInt0), - 9 => Ok(SerialType::ConstInt1), - n if n >= 12 => match n % 2 { - 0 => Ok(SerialType::Blob { - content_size: (n as usize - 12) / 2, - }), - 1 => Ok(SerialType::Text { - content_size: (n as usize - 13) / 2, - }), - _ => unreachable!(), - }, - _ => Err(LimboError::Corrupt(format!( + fn try_from(uint: u64) -> Result { + if uint == 10 || uint == 11 { + return Err(LimboError::Corrupt(format!( "Invalid serial type: {}", - serial_type - ))), + uint + ))); } + Ok(SerialType(uint)) } } @@ -1236,13 +1291,15 @@ impl Record { OwnedValue::Null => {} OwnedValue::Integer(i) => { let serial_type = SerialType::from(value); - match serial_type { - SerialType::I8 => buf.extend_from_slice(&(*i as i8).to_be_bytes()), - SerialType::I16 => buf.extend_from_slice(&(*i as i16).to_be_bytes()), - SerialType::I24 => buf.extend_from_slice(&(*i as i32).to_be_bytes()[1..]), // remove most significant byte - SerialType::I32 => buf.extend_from_slice(&(*i as i32).to_be_bytes()), - SerialType::I48 => buf.extend_from_slice(&i.to_be_bytes()[2..]), // remove 2 most significant bytes - SerialType::I64 => buf.extend_from_slice(&i.to_be_bytes()), + match serial_type.kind() { + SerialTypeKind::I8 => buf.extend_from_slice(&(*i as i8).to_be_bytes()), + SerialTypeKind::I16 => buf.extend_from_slice(&(*i as i16).to_be_bytes()), + SerialTypeKind::I24 => { + buf.extend_from_slice(&(*i as i32).to_be_bytes()[1..]) + } // remove most significant byte + SerialTypeKind::I32 => buf.extend_from_slice(&(*i as i32).to_be_bytes()), + SerialTypeKind::I48 => buf.extend_from_slice(&i.to_be_bytes()[2..]), // remove 2 most significant bytes + SerialTypeKind::I64 => buf.extend_from_slice(&i.to_be_bytes()), _ => unreachable!(), } } @@ -1397,7 +1454,7 @@ mod tests { // First byte should be header size assert_eq!(header[0], header_length as u8); // Second byte should be serial type for NULL - assert_eq!(header[1] as u64, u64::from(SerialType::Null)); + assert_eq!(header[1] as u64, u64::from(SerialType::null())); // Check that the buffer is empty after the header assert_eq!(buf.len(), header_length); } @@ -1421,12 +1478,12 @@ mod tests { assert_eq!(header[0], header_length as u8); // Header should be larger than number of values // Check that correct serial types were chosen - assert_eq!(header[1] as u64, u64::from(SerialType::I8)); - assert_eq!(header[2] as u64, u64::from(SerialType::I16)); - assert_eq!(header[3] as u64, u64::from(SerialType::I24)); - assert_eq!(header[4] as u64, u64::from(SerialType::I32)); - assert_eq!(header[5] as u64, u64::from(SerialType::I48)); - assert_eq!(header[6] as u64, u64::from(SerialType::I64)); + assert_eq!(header[1] as u64, u64::from(SerialType::i8())); + assert_eq!(header[2] as u64, u64::from(SerialType::i16())); + assert_eq!(header[3] as u64, u64::from(SerialType::i24())); + assert_eq!(header[4] as u64, u64::from(SerialType::i32())); + assert_eq!(header[5] as u64, u64::from(SerialType::i48())); + assert_eq!(header[6] as u64, u64::from(SerialType::i64())); // test that the bytes after the header can be interpreted as the correct values let mut cur_offset = header_length; @@ -1489,7 +1546,7 @@ mod tests { // First byte should be header size assert_eq!(header[0], header_length as u8); // Second byte should be serial type for FLOAT - assert_eq!(header[1] as u64, u64::from(SerialType::F64)); + assert_eq!(header[1] as u64, u64::from(SerialType::f64())); // Check that the bytes after the header can be interpreted as the float let float_bytes = &buf[header_length..header_length + size_of::()]; let float = f64::from_be_bytes(float_bytes.try_into().unwrap()); @@ -1553,11 +1610,11 @@ mod tests { // First byte should be header size assert_eq!(header[0], header_length as u8); // Second byte should be serial type for NULL - assert_eq!(header[1] as u64, u64::from(SerialType::Null)); + assert_eq!(header[1] as u64, u64::from(SerialType::null())); // Third byte should be serial type for I8 - assert_eq!(header[2] as u64, u64::from(SerialType::I8)); + assert_eq!(header[2] as u64, u64::from(SerialType::i8())); // Fourth byte should be serial type for F64 - assert_eq!(header[3] as u64, u64::from(SerialType::F64)); + assert_eq!(header[3] as u64, u64::from(SerialType::f64())); // Fifth byte should be serial type for TEXT, which is (len * 2 + 13) assert_eq!(header[4] as u64, (4 * 2 + 13) as u64); From 3ba5c2349fb1f48a047090f05fa57273d2656368 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Thu, 24 Apr 2025 16:23:20 +0200 Subject: [PATCH 365/425] add corrupt error if no matching record found for idxdelete a --- core/vdbe/execute.rs | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 1177af3b7..bc91c9841 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -1815,7 +1815,7 @@ pub fn op_row_id( let rowid = record.get_values().last().unwrap(); match rowid { RefValue::Integer(rowid) => *rowid as u64, - _ => todo!(), + _ => unreachable!(), } }; let mut table_cursor = state.get_cursor(table_cursor_id); @@ -3784,10 +3784,22 @@ pub fn op_idx_delete( let mut cursor = state.get_cursor(*cursor_id); let cursor = cursor.as_btree_mut(); return_if_io!(cursor.seek(SeekKey::IndexKey(&record), SeekOp::EQ)); + + if cursor.rowid()?.is_none() { + // If P5 is not zero, then raise an SQLITE_CORRUPT_INDEX error if no matching + // index entry is found. This happens when running an UPDATE or DELETE statement and the + // index entry to be updated or deleted is not found. For some uses of IdxDelete + // (example: the EXCEPT operator) it does not matter that no matching entry is found. + // For those cases, P5 is zero. Also, do not raise this (self-correcting and non-critical) error if in writable_schema mode. + return Err(LimboError::Corrupt(format!( + "IdxDelete: no matching index entry found for record {:?}", + record + ))); + } return_if_io!(cursor.delete()); } - let prev_changes = program.n_change.get(); - program.n_change.set(prev_changes + 1); + let n_change = program.n_change.get(); + program.n_change.set(n_change + 1); state.pc += 1; Ok(InsnFunctionStepResult::Step) } From 2e147b20a816cb29d2e05d9a184daaecf76c7160 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Thu, 24 Apr 2025 12:11:45 -0300 Subject: [PATCH 366/425] Adjustments and explicitely just emitting NoConflict on unique indexes --- Makefile | 6 ++- core/schema.rs | 8 ++++ core/translate/insert.rs | 68 ++++++++++++++++++--------------- core/vdbe/execute.rs | 64 ++++++++++++++----------------- core/vdbe/insn.rs | 5 +++ testing/cli_tests/constraint.py | 2 + testing/pyproject.toml | 1 + 7 files changed, 86 insertions(+), 68 deletions(-) diff --git a/Makefile b/Makefile index 06afa0e5d..db3c3acdb 100644 --- a/Makefile +++ b/Makefile @@ -66,7 +66,7 @@ uv-sync: uv sync --all-packages .PHONE: uv-sync -test: limbo uv-sync test-compat test-vector test-sqlite3 test-shell test-extensions test-memory test-write test-update +test: limbo uv-sync test-compat test-vector test-sqlite3 test-shell test-extensions test-memory test-write test-update test-constraint .PHONY: test test-extensions: limbo uv-sync @@ -109,6 +109,10 @@ test-update: limbo uv-sync SQLITE_EXEC=$(SQLITE_EXEC) uv run --project limbo_test test-update .PHONY: test-update +test-constraint: limbo uv-sync + SQLITE_EXEC=$(SQLITE_EXEC) uv run --project limbo_test test-constraint +.PHONY: test-constraint + bench-vfs: uv-sync cargo build --release uv run --project limbo_test bench-vfs "$(SQL)" "$(N)" diff --git a/core/schema.rs b/core/schema.rs index dd09671ab..42c619693 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -81,6 +81,14 @@ impl Schema { .map_or_else(|| &[] as &[Arc], |v| v.as_slice()) } + pub fn get_index(&self, table_name: &str, index_name: &str) -> Option<&Arc> { + let name = normalize_ident(table_name); + self.indexes + .get(&name)? + .iter() + .find(|index| index.name == index_name) + } + pub fn remove_indices_for_table(&mut self, table_name: &str) { let name = normalize_ident(table_name); self.indexes.remove(&name); diff --git a/core/translate/insert.rs b/core/translate/insert.rs index 235cc09ac..b17d19110 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -320,42 +320,48 @@ pub fn translate_insert( dest_reg: record_reg, }); - let make_record_label = program.allocate_label(); - program.emit_insn(Insn::NoConflict { - cursor_id: idx_cursor_id, - target_pc: make_record_label, - record_reg: idx_start_reg, - num_regs: num_cols, - }); - let mut column_names = Vec::new(); - for (index, ..) in index_col_mapping.columns.iter() { - let name = btree_table - .columns - .get(*index) - .unwrap() - .name - .as_ref() - .expect("column name is None"); - column_names.push(format!("{}.{name}", btree_table.name)); - } - let column_names = - column_names - .into_iter() - .enumerate() - .fold(String::new(), |mut accum, (idx, name)| { - if idx % 2 == 1 { + let index = schema + .get_index(&table_name.0, &index_col_mapping.idx_name) + .expect("index should be present"); + + if index.unique { + let label_idx_insert = program.allocate_label(); + program.emit_insn(Insn::NoConflict { + cursor_id: idx_cursor_id, + target_pc: label_idx_insert, + record_reg: idx_start_reg, + num_regs: num_cols, + }); + let column_names = index_col_mapping.columns.iter().enumerate().fold( + String::with_capacity(50), + |mut accum, (idx, (index, _))| { + if idx > 0 { accum.push_str(", "); } - accum.push_str(&name); + + accum.push_str(&btree_table.name); + accum.push('.'); + + let name = btree_table + .columns + .get(*index) + .unwrap() + .name + .as_ref() + .expect("column name is None"); + accum.push_str(name); + accum - }); + }, + ); - program.emit_insn(Insn::Halt { - err_code: SQLITE_CONSTRAINT_PRIMARYKEY, - description: column_names, - }); + program.emit_insn(Insn::Halt { + err_code: SQLITE_CONSTRAINT_PRIMARYKEY, + description: column_names, + }); - program.resolve_label(make_record_label, program.offset()); + program.resolve_label(label_idx_insert, program.offset()); + } // now do the actual index insertion using the unpacked registers program.emit_insn(Insn::IdxInsert { diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 31a81e9b9..7b6f23f5e 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -3,7 +3,6 @@ use crate::numeric::{NullableInteger, Numeric}; use crate::storage::database::FileMemoryStorage; use crate::storage::page_cache::DumbLruPageCache; use crate::storage::pager::CreateBTreeFlags; -use crate::types::ImmutableRecord; use crate::{ error::{LimboError, SQLITE_CONSTRAINT, SQLITE_CONSTRAINT_PRIMARYKEY}, ext::ExtValue, @@ -3912,48 +3911,41 @@ pub fn op_no_conflict( else { unreachable!("unexpected Insn {:?}", insn) }; - let found = { - let mut cursor = state.get_cursor(*cursor_id); - let cursor = cursor.as_btree_mut(); + let mut cursor_ref = state.get_cursor(*cursor_id); + let cursor = cursor_ref.as_btree_mut(); - let any_fn = |record: &ImmutableRecord| { - for val in record.values.iter() { - if matches!(val, RefValue::Null) { - return false; - } + let record = if *num_regs == 0 { + let record = match &state.registers[*record_reg] { + Register::Record(r) => r, + _ => { + return Err(LimboError::InternalError( + "NoConflict: exepected a record in the register".into(), + )); } - true }; - - let record = if *num_regs == 0 { - let record = match &state.registers[*record_reg] { - Register::Record(r) => r, - _ => { - return Err(LimboError::InternalError( - "NoConflict: exepected a record in the register".into(), - )); - } - }; - record - } else { - &make_record(&state.registers, record_reg, num_regs) - }; - - // Should early return and jump if any of the values in the record is NULL - let found = any_fn(record); - if found { - return_if_io!(cursor.seek(SeekKey::IndexKey(record), SeekOp::EQ)) - } else { - found - } - }; - - if found { - state.pc += 1; + record } else { + &make_record(&state.registers, record_reg, num_regs) + }; + // If there is at least one NULL in the index record, there cannot be a conflict so we can immediately jump. + let contains_nulls = record + .get_values() + .iter() + .any(|val| matches!(val, RefValue::Null)); + + if contains_nulls { + drop(cursor_ref); state.pc = target_pc.to_offset_int(); + return Ok(InsnFunctionStepResult::Step); } + let conflict = return_if_io!(cursor.seek(SeekKey::IndexKey(record), SeekOp::EQ)); + drop(cursor_ref); + if !conflict { + state.pc = target_pc.to_offset_int(); + } else { + state.pc += 1; + } Ok(InsnFunctionStepResult::Step) } diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 633647c36..6f310f746 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -664,6 +664,11 @@ pub enum Insn { reg: usize, }, + /// If P4==0 then register P3 holds a blob constructed by [MakeRecord](https://sqlite.org/opcode.html#MakeRecord). If P4>0 then register P3 is the first of P4 registers that form an unpacked record.\ + /// + /// Cursor P1 is on an index btree. If the record identified by P3 and P4 contains any NULL value, jump immediately to P2. If all terms of the record are not-NULL then a check is done to determine if any row in the P1 index btree has a matching key prefix. If there are no matches, jump immediately to P2. If there is a match, fall through and leave the P1 cursor pointing to the matching row.\ + /// + /// This opcode is similar to [NotFound](https://sqlite.org/opcode.html#NotFound) with the exceptions that the branch is always taken if any part of the search key input is NULL. NoConflict { cursor_id: CursorID, // P1 index cursor target_pc: BranchOffset, // P2 jump target diff --git a/testing/cli_tests/constraint.py b/testing/cli_tests/constraint.py index a37a5b020..65758745b 100644 --- a/testing/cli_tests/constraint.py +++ b/testing/cli_tests/constraint.py @@ -1,4 +1,6 @@ #!/usr/bin/env python3 + +# Eventually extract these tests to be in the fuzzing integration tests import os from faker import Faker from faker.providers.lorem.en_US import Provider as P diff --git a/testing/pyproject.toml b/testing/pyproject.toml index cdd30ec54..0aed7b99b 100644 --- a/testing/pyproject.toml +++ b/testing/pyproject.toml @@ -16,6 +16,7 @@ test-extensions = "cli_tests.extensions:main" test-update = "cli_tests.update:main" test-memory = "cli_tests.memory:main" bench-vfs = "cli_tests.vfs_bench:main" +test-constraint = "cli_tests.constraint:main" [tool.uv] package = true From f993a22023d8f81a75dc7cafb02e48f2e2557095 Mon Sep 17 00:00:00 2001 From: eric-dinh-antithesis Date: Thu, 24 Apr 2025 12:20:41 -0400 Subject: [PATCH 367/425] antithesis-tests: add all tests --- .../bank-test/anytime_validate.py | 26 +++++++ .../bank-test/eventually_validate.py | 26 +++++++ .../bank-test/finally_validate.py | 26 +++++++ antithesis-tests/bank-test/first_setup.py | 47 ++++++++++++ .../parallel_driver_generate_transaction.py | 54 +++++++++++++ .../stress-composer/first_setup.py | 75 +++++++++++++++++++ .../stress-composer/parallel_driver_delete.py | 33 ++++++++ .../stress-composer/parallel_driver_insert.py | 31 ++++++++ .../stress-composer/parallel_driver_update.py | 45 +++++++++++ antithesis-tests/stress-composer/utils.py | 20 +++++ .../stress/singleton_driver_stress.sh | 3 + 11 files changed, 386 insertions(+) create mode 100755 antithesis-tests/bank-test/anytime_validate.py create mode 100755 antithesis-tests/bank-test/eventually_validate.py create mode 100755 antithesis-tests/bank-test/finally_validate.py create mode 100755 antithesis-tests/bank-test/first_setup.py create mode 100755 antithesis-tests/bank-test/parallel_driver_generate_transaction.py create mode 100755 antithesis-tests/stress-composer/first_setup.py create mode 100755 antithesis-tests/stress-composer/parallel_driver_delete.py create mode 100755 antithesis-tests/stress-composer/parallel_driver_insert.py create mode 100755 antithesis-tests/stress-composer/parallel_driver_update.py create mode 100755 antithesis-tests/stress-composer/utils.py create mode 100755 antithesis-tests/stress/singleton_driver_stress.sh diff --git a/antithesis-tests/bank-test/anytime_validate.py b/antithesis-tests/bank-test/anytime_validate.py new file mode 100755 index 000000000..8bfb11304 --- /dev/null +++ b/antithesis-tests/bank-test/anytime_validate.py @@ -0,0 +1,26 @@ +#!/usr/bin/env -S python3 -u + +import limbo +from antithesis.random import get_random +from antithesis.assertions import always + +con = limbo.connect("bank_test.db") +cur = con.cursor() + +initial_state = cur.execute(f''' + SELECT * FROM initial_state +''').fetchone() + +curr_total = cur.execute(f''' + SELECT SUM(balance) AS total FROM accounts; +''').fetchone() + +always( + initial_state[1] == curr_total[0], + '[Anytime] Initial balance always equals current balance', + { + 'init_bal': initial_state[1], + 'curr_bal': curr_total[0] + } +) + diff --git a/antithesis-tests/bank-test/eventually_validate.py b/antithesis-tests/bank-test/eventually_validate.py new file mode 100755 index 000000000..413d04aae --- /dev/null +++ b/antithesis-tests/bank-test/eventually_validate.py @@ -0,0 +1,26 @@ +#!/usr/bin/env -S python3 -u + +import limbo +from antithesis.random import get_random +from antithesis.assertions import always + +con = limbo.connect("bank_test.db") +cur = con.cursor() + +initial_state = cur.execute(f''' + SELECT * FROM initial_state +''').fetchone() + +curr_total = cur.execute(f''' + SELECT SUM(balance) AS total FROM accounts; +''').fetchone() + +always( + initial_state[1] == curr_total[0], + '[Eventually] Initial balance always equals current balance', + { + 'init_bal': initial_state[1], + 'curr_bal': curr_total[0] + } +) + diff --git a/antithesis-tests/bank-test/finally_validate.py b/antithesis-tests/bank-test/finally_validate.py new file mode 100755 index 000000000..fa90b15f8 --- /dev/null +++ b/antithesis-tests/bank-test/finally_validate.py @@ -0,0 +1,26 @@ +#!/usr/bin/env -S python3 -u + +import limbo +from antithesis.random import get_random +from antithesis.assertions import always + +con = limbo.connect("bank_test.db") +cur = con.cursor() + +initial_state = cur.execute(f''' + SELECT * FROM initial_state +''').fetchone() + +curr_total = cur.execute(f''' + SELECT SUM(balance) AS total FROM accounts; +''').fetchone() + +always( + initial_state[1] == curr_total[0], + '[Finally] Initial balance always equals current balance', + { + 'init_bal': initial_state[1], + 'curr_bal': curr_total[0] + } +) + diff --git a/antithesis-tests/bank-test/first_setup.py b/antithesis-tests/bank-test/first_setup.py new file mode 100755 index 000000000..df833b96e --- /dev/null +++ b/antithesis-tests/bank-test/first_setup.py @@ -0,0 +1,47 @@ +#!/usr/bin/env -S python3 -u + +import limbo +from antithesis.random import get_random + +con = limbo.connect("bank_test.db") +cur = con.cursor() + +# drop accounts table if it exists and create a new table +cur.execute(f''' + DROP TABLE IF EXISTS accounts; +''') + +cur.execute(f''' + CREATE TABLE accounts ( + account_id INTEGER PRIMARY KEY AUTOINCREMENT, + balance REAL NOT NULL DEFAULT 0.0 + ); +''') + +# randomly create up to 100 accounts with a balance up to 1e9 +total = 0 +num_accts = get_random() % 100 +for i in range(num_accts): + bal = get_random() % 1e9 + total += bal + cur.execute(f''' + INSERT INTO accounts (balance) + VALUES ({bal}) + ''') + +# drop initial_state table if it exists and create a new table +cur.execute(f''' + DROP TABLE IF EXISTS initial_state; +''') +cur.execute(f''' + CREATE TABLE initial_state ( + num_accts INTEGER, + total REAL + ); +''') + +# store initial state in the table +cur.execute(f''' + INSERT INTO initial_state (num_accts, total) + VALUES ({num_accts}, {total}) +''') \ No newline at end of file diff --git a/antithesis-tests/bank-test/parallel_driver_generate_transaction.py b/antithesis-tests/bank-test/parallel_driver_generate_transaction.py new file mode 100755 index 000000000..9e96260ba --- /dev/null +++ b/antithesis-tests/bank-test/parallel_driver_generate_transaction.py @@ -0,0 +1,54 @@ +#!/usr/bin/env -S python3 -u + +import limbo +import logging +from logging.handlers import RotatingFileHandler +from antithesis.random import get_random + +handler = RotatingFileHandler(filename='bank_test.log', mode='a', maxBytes=1*1024*1024, backupCount=5, encoding=None, delay=0) +handler.setLevel(logging.INFO) + +logger = logging.getLogger('root') +logger.setLevel(logging.INFO) + +logger.addHandler(handler) + +con = limbo.connect("bank_test.db") +cur = con.cursor() + +length = cur.execute("SELECT num_accts FROM initial_state").fetchone()[0] + +def transaction(): + # check that sender and recipient are different + sender = get_random() % length + 1 + recipient = get_random() % length + 1 + if sender != recipient: + # get a random value to transfer between accounts + value = get_random() % 1e9 + + logger.info(f"Sender ID: {sender} | Recipient ID: {recipient} | Txn Val: {value}") + + cur.execute("BEGIN TRANSACTION;") + + # subtract value from balance of the sender account + cur.execute(f''' + UPDATE accounts + SET balance = balance - {value} + WHERE account_id = {sender}; + ''') + + # add value to balance of the recipient account + cur.execute(f''' + UPDATE accounts + SET balance = balance + {value} + WHERE account_id = {recipient}; + ''') + + cur.execute("COMMIT;") + +# run up to 100 transactions +iterations = get_random() % 100 +# logger.info(f"Starting {iterations} iterations") +for i in range(iterations): + transaction() +# logger.info(f"Finished {iterations} iterations") diff --git a/antithesis-tests/stress-composer/first_setup.py b/antithesis-tests/stress-composer/first_setup.py new file mode 100755 index 000000000..cccf5e015 --- /dev/null +++ b/antithesis-tests/stress-composer/first_setup.py @@ -0,0 +1,75 @@ +#!/usr/bin/env -S python3 -u + +import json +import glob +import os +import limbo +from antithesis.random import get_random, random_choice + +constraints = ['NOT NULL', 'UNIQUE', ''] +data_type = ['INTEGER', 'REAL', 'TEXT', 'BLOB', 'NUMERIC'] + +# remove any existing db files +for f in glob.glob('*.db'): + try: + os.remove(f) + except OSError: + pass + +for f in glob.glob('*.db-wal'): + try: + os.remove(f) + except OSError: + pass + +# store initial states in a separate db +con_init = limbo.connect('init_state.db') +cur_init = con_init.cursor() +cur_init.execute('CREATE TABLE schemas (schema TEXT, tbl INT PRIMARY KEY)') +cur_init.execute('CREATE TABLE tables (count INT)') + +con = limbo.connect('stress_composer.db') +cur = con.cursor() + +tbl_count = max(1, get_random() % 10) + +cur_init.execute(f'INSERT INTO tables (count) VALUES ({tbl_count})') + +schemas = [] +for i in range(tbl_count): + col_count = max(1, get_random() % 10) + pk = get_random() % col_count + + schema = { + 'table': i, + 'colCount': col_count, + 'pk': pk + } + + cols = [] + cols_str = '' + for j in range(col_count): + col_data_type = random_choice(data_type) + col_constraint_1 = random_choice(constraints) + col_constraint_2 = random_choice(constraints) + + col = f'col_{j} {col_data_type} {col_constraint_1} {col_constraint_2 if col_constraint_2 != col_constraint_1 else ""}' if j != pk else f'col_{j} {col_data_type} PRIMARY KEY NOT NULL' + + cols.append(col) + + schema[f'col_{j}'] = { + 'data_type': col_data_type, + 'constraint1': col_constraint_1 if j != pk else 'PRIMARY KEY', + 'constraint2': col_constraint_2 if col_constraint_1 != col_constraint_2 else "" if j != pk else 'NOT NULL', + } + + cols_str = ', '.join(cols) + + schemas.append(schema) + cur_init.execute(f"INSERT INTO schemas (schema, tbl) VALUES ('{json.dumps(schema)}', {i})") + + cur.execute(f''' + CREATE TABLE tbl_{i} ({cols_str}) + ''') + +print(f'DB Schemas\n------------\n{json.dumps(schemas, indent=2)}') \ No newline at end of file diff --git a/antithesis-tests/stress-composer/parallel_driver_delete.py b/antithesis-tests/stress-composer/parallel_driver_delete.py new file mode 100755 index 000000000..6d0331f56 --- /dev/null +++ b/antithesis-tests/stress-composer/parallel_driver_delete.py @@ -0,0 +1,33 @@ +#!/usr/bin/env -S python3 -u + +import json +import limbo +from utils import generate_random_value +from antithesis.random import get_random + +# Get initial state +con_init = limbo.connect('init_state.db') +cur_init = con_init.cursor() + +tbl_len = cur_init.execute('SELECT count FROM tables').fetchone()[0] +selected_tbl = get_random() % tbl_len +tbl_schema = json.loads(cur_init.execute(f'SELECT schema FROM schemas WHERE tbl = {selected_tbl}').fetchone()[0]) + +# get primary key column +pk = tbl_schema['pk'] +# get non-pk columns +cols = [f'col_{col}' for col in range(tbl_schema['colCount']) if col != pk] + +con = limbo.connect('stress_composer.db') +cur = con.cursor() + +deletions = get_random() % 100 +print(f'Attempt to delete {deletions} rows in tbl_{selected_tbl}...') + +for i in range(deletions): + where_clause = f"col_{pk} = {generate_random_value(tbl_schema[f'col_{pk}']['data_type'])}" + + cur.execute(f''' + DELETE FROM tbl_{selected_tbl} WHERE {where_clause} + ''') + diff --git a/antithesis-tests/stress-composer/parallel_driver_insert.py b/antithesis-tests/stress-composer/parallel_driver_insert.py new file mode 100755 index 000000000..89d4daea0 --- /dev/null +++ b/antithesis-tests/stress-composer/parallel_driver_insert.py @@ -0,0 +1,31 @@ +#!/usr/bin/env -S python3 -u + +import json +import limbo +from utils import generate_random_value +from antithesis.random import get_random + + +# Get initial state +con_init = limbo.connect('init_state.db') +cur_init = con_init.cursor() + +tbl_len = cur_init.execute('SELECT count FROM tables').fetchone()[0] +selected_tbl = get_random() % tbl_len +tbl_schema = json.loads(cur_init.execute(f'SELECT schema FROM schemas WHERE tbl = {selected_tbl}').fetchone()[0]) +cols = ', '.join([f'col_{col}' for col in range(tbl_schema['colCount'])]) + +con = limbo.connect('stress_composer.db') +cur = con.cursor() + +# insert up to 100 rows in the selected table +insertions = get_random() % 100 +print(f'Inserting {insertions} rows...') + +for i in range(insertions): + values = [generate_random_value(tbl_schema[f'col_{col}']['data_type']) for col in range(tbl_schema['colCount'])] + cur.execute(f''' + INSERT INTO tbl_{selected_tbl} ({cols}) + VALUES ({', '.join(values)}) + ''') + diff --git a/antithesis-tests/stress-composer/parallel_driver_update.py b/antithesis-tests/stress-composer/parallel_driver_update.py new file mode 100755 index 000000000..fc707cb8b --- /dev/null +++ b/antithesis-tests/stress-composer/parallel_driver_update.py @@ -0,0 +1,45 @@ +#!/usr/bin/env -S python3 -u + +import json +import limbo +from utils import generate_random_value +from antithesis.random import get_random + +# Get initial state +con_init = limbo.connect('init_state.db') +cur_init = con_init.cursor() + +tbl_len = cur_init.execute('SELECT count FROM tables').fetchone()[0] +selected_tbl = get_random() % tbl_len +tbl_schema = json.loads(cur_init.execute(f'SELECT schema FROM schemas WHERE tbl = {selected_tbl}').fetchone()[0]) + +# get primary key column +pk = tbl_schema['pk'] +# get non-pk columns +cols = [f'col_{col}' for col in range(tbl_schema['colCount']) if col != pk] +# print(cols) +con = limbo.connect('stress_composer.db') +cur = con.cursor() + +# insert up to 100 rows in the selected table +updates = get_random() % 100 +print(f'Attempt to update {updates} rows in tbl_{selected_tbl}...') + +for i in range(updates): + set_clause = '' + if tbl_schema['colCount'] == 1: + set_clause = f"col_{pk} = {generate_random_value(tbl_schema[f'col_{pk}']['data_type'])}" + else: + values = [] + for col in cols: + # print(col) + values.append(f"{col} = {generate_random_value(tbl_schema[col]['data_type'])}") + set_clause = ', '.join(values) + + where_clause = f"col_{pk} = {generate_random_value(tbl_schema[f'col_{pk}']['data_type'])}" + # print(where_clause) + + cur.execute(f''' + UPDATE tbl_{selected_tbl} SET {set_clause} WHERE {where_clause} + ''') + diff --git a/antithesis-tests/stress-composer/utils.py b/antithesis-tests/stress-composer/utils.py new file mode 100755 index 000000000..f99052bf3 --- /dev/null +++ b/antithesis-tests/stress-composer/utils.py @@ -0,0 +1,20 @@ +import string +from antithesis.random import get_random, random_choice + +def generate_random_identifier(type: str, num: int): + return ''.join(type, '_', get_random() % num) + +def generate_random_value(type: str): + match type: + case 'INTEGER': + return str(get_random() % 100) + case 'REAL': + return '{:.2f}'.format(get_random() % 100 / 100.0) + case 'TEXT': + return f"'{''.join(random_choice(string.ascii_lowercase) for _ in range(5))}'" + case 'BLOB': + return f"x'{''.join(random_choice(string.ascii_lowercase) for _ in range(5)).encode().hex()}'" + case 'NUMERIC': + return str(get_random() % 100) + case _: + return NULL \ No newline at end of file diff --git a/antithesis-tests/stress/singleton_driver_stress.sh b/antithesis-tests/stress/singleton_driver_stress.sh new file mode 100755 index 000000000..06f27223f --- /dev/null +++ b/antithesis-tests/stress/singleton_driver_stress.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +/bin/limbo_stress \ No newline at end of file From 364a78b270bc11fd3f60b4ac0b28f8de110152e2 Mon Sep 17 00:00:00 2001 From: eric-dinh-antithesis Date: Thu, 24 Apr 2025 12:22:03 -0400 Subject: [PATCH 368/425] Cargo.toml: add profile for antithesis builds for full debug --- Cargo.toml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index a2dfb3e3e..5178b9fb1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -57,6 +57,13 @@ codegen-units = 1 panic = "abort" lto = true +[profile.antithesis] +inherits = "release" +debug = true +codegen-units = 1 +panic = "abort" +lto = true + [profile.bench-profile] inherits = "release" debug = true From 62e2745c3c081eb960f347719894d974f48d1bd1 Mon Sep 17 00:00:00 2001 From: eric-dinh-antithesis Date: Thu, 24 Apr 2025 12:23:22 -0400 Subject: [PATCH 369/425] Dockerfile.antithesis: install dependencies --- Dockerfile.antithesis | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Dockerfile.antithesis b/Dockerfile.antithesis index 6305c12f0..15041cc89 100644 --- a/Dockerfile.antithesis +++ b/Dockerfile.antithesis @@ -62,7 +62,11 @@ RUN if [ "$antithesis" = "true" ]; then \ # FROM debian:bullseye-slim AS runtime -RUN apt-get update && apt-get install -y bash && rm -rf /var/lib/apt/lists/* +RUN apt-get update && apt-get install -y bash curl xz-utils python3 sqlite3 bc binutils pip && rm -rf /var/lib/apt/lists/* +RUN curl --proto '=https' --tlsv1.2 -LsSf \ + https://github.com/tursodatabase/limbo/releases/latest/download/limbo_cli-installer.sh | sh +RUN bash -c "source $HOME/.limbo/env" +RUN pip install antithesis pylimbo --break-system-packages WORKDIR /app EXPOSE 8080 From 5953d32e4d32bfb47383adaa476ab59c7d74e434 Mon Sep 17 00:00:00 2001 From: eric-dinh-antithesis Date: Thu, 24 Apr 2025 12:24:44 -0400 Subject: [PATCH 370/425] Dockerfile.antithesis: add symbols for rust, cataloging for python, and antithesis tests to image, update entrypoint --- Dockerfile.antithesis | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/Dockerfile.antithesis b/Dockerfile.antithesis index 15041cc89..997b2c7a2 100644 --- a/Dockerfile.antithesis +++ b/Dockerfile.antithesis @@ -72,8 +72,16 @@ WORKDIR /app EXPOSE 8080 COPY --from=builder /usr/lib/libvoidstar.so* /usr/lib/ COPY --from=builder /app/target/release/limbo_stress /bin/limbo_stress +COPY --from=builder /app/target/antithesis/limbo_stress /symbols COPY stress/docker-entrypoint.sh /bin RUN chmod +x /bin/docker-entrypoint.sh -ENTRYPOINT ["/bin/docker-entrypoint.sh"] -ENV RUST_BACKTRACE=1 -CMD ["/bin/limbo_stress"] + +COPY ./antithesis-tests/bank-test/*.py /opt/antithesis/test/v1/bank-test/ +COPY ./antithesis-tests/stress-composer/*.py /opt/antithesis/test/v1/stress-composer/ +COPY ./antithesis-tests/stress /opt/antithesis/test/v1/stress +RUN chmod 777 -R /opt/antithesis/test/v1 + +RUN mkdir /opt/antithesis/catalog +RUN ln -s /opt/antithesis/test/v1/bank-test/*.py /opt/antithesis/catalog + +ENTRYPOINT ["/bin/docker-entrypoint.sh"] \ No newline at end of file From 8390233b994586b13a82f36c7751fb422606fbd3 Mon Sep 17 00:00:00 2001 From: eric-dinh-antithesis Date: Thu, 24 Apr 2025 12:25:19 -0400 Subject: [PATCH 371/425] Dockerfile.antithesis: update limbo_stress build step --- Dockerfile.antithesis | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile.antithesis b/Dockerfile.antithesis index 997b2c7a2..7308ea97b 100644 --- a/Dockerfile.antithesis +++ b/Dockerfile.antithesis @@ -52,7 +52,7 @@ COPY --from=planner /app/vendored ./vendored/ RUN if [ "$antithesis" = "true" ]; then \ cp /opt/antithesis/libvoidstar.so /usr/lib/libvoidstar.so && \ export RUSTFLAGS="-Ccodegen-units=1 -Cpasses=sancov-module -Cllvm-args=-sanitizer-coverage-level=3 -Cllvm-args=-sanitizer-coverage-trace-pc-guard -Clink-args=-Wl,--build-id -L/usr/lib/ -lvoidstar" && \ - cargo build --bin limbo_stress --release; \ + cargo build --bin limbo_stress --antithesis; \ else \ cargo build --bin limbo_stress --release; \ fi From 75ae5dbd13ddcbc8e0d800ae6f1da25d25f338e0 Mon Sep 17 00:00:00 2001 From: eric-dinh-antithesis Date: Thu, 24 Apr 2025 12:26:00 -0400 Subject: [PATCH 372/425] stress: update docker-compose --- stress/docker-compose.yaml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/stress/docker-compose.yaml b/stress/docker-compose.yaml index 13b1149a8..c6e081aac 100644 --- a/stress/docker-compose.yaml +++ b/stress/docker-compose.yaml @@ -1,4 +1,7 @@ services: - workload: - image: us-central1-docker.pkg.dev/molten-verve-216720/turso-repository/limbo-workload:antithesis-latest - command: [ "/bin/limbo_stress" ] + limbo: + container_name: limbo + hostname: limbo + image: limbo:latest + entrypoint: ["/bin/docker-entrypoint.sh"] + command: ["sleep", "infinity"] \ No newline at end of file From b8885777dc4d656dc3bbdf13d38ad1c88323fe0c Mon Sep 17 00:00:00 2001 From: eric-dinh-antithesis Date: Thu, 24 Apr 2025 12:27:05 -0400 Subject: [PATCH 373/425] stress: move sdk setup_complete from limbo_stress to docker-entrypoint --- stress/docker-entrypoint.sh | 2 ++ stress/main.rs | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/stress/docker-entrypoint.sh b/stress/docker-entrypoint.sh index a09822694..1aa226912 100644 --- a/stress/docker-entrypoint.sh +++ b/stress/docker-entrypoint.sh @@ -1,5 +1,7 @@ #!/bin/bash +echo '{"antithesis_setup": { "status": "complete", "details": null }}' > $ANTITHESIS_OUTPUT_DIR/sdk.jsonl + set -Eeuo pipefail exec "$@" diff --git a/stress/main.rs b/stress/main.rs index e8b61b459..13f6ec795 100644 --- a/stress/main.rs +++ b/stress/main.rs @@ -396,7 +396,7 @@ async fn main() -> Result<(), Box> { "num_nodes": num_nodes, "main_node_id": main_id, }); - lifecycle::setup_complete(&startup_data); + // lifecycle::setup_complete(&startup_data); antithesis_init(); let mut opts = Opts::parse(); From 27e15364c4ea57ffe2c92acd85f0a01d15b19dd8 Mon Sep 17 00:00:00 2001 From: eric-dinh-antithesis Date: Thu, 24 Apr 2025 12:27:58 -0400 Subject: [PATCH 374/425] stress: suppress logfile since it's too big --- stress/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stress/main.rs b/stress/main.rs index 13f6ec795..a00b9fb66 100644 --- a/stress/main.rs +++ b/stress/main.rs @@ -321,7 +321,7 @@ fn generate_plan(opts: &Opts) -> Result Date: Thu, 24 Apr 2025 20:46:26 +0300 Subject: [PATCH 375/425] stress: Make Clippy happy --- Cargo.lock | 1 - stress/Cargo.toml | 1 - stress/main.rs | 7 ------- 3 files changed, 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0b9a8bd75..31a99b8f1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1922,7 +1922,6 @@ dependencies = [ "clap", "hex", "limbo", - "serde_json", "tokio", "tracing", "tracing-appender", diff --git a/stress/Cargo.toml b/stress/Cargo.toml index 9c0097d45..84e7dc6a4 100644 --- a/stress/Cargo.toml +++ b/stress/Cargo.toml @@ -18,7 +18,6 @@ path = "main.rs" antithesis_sdk = "0.2.5" clap = { version = "4.5", features = ["derive"] } limbo = { path = "../bindings/rust" } -serde_json = "1.0.139" tokio = { version = "1.29.1", features = ["full"] } anarchist-readable-name-generator-lib = "0.1.0" hex = "0.4" diff --git a/stress/main.rs b/stress/main.rs index a00b9fb66..743ff5722 100644 --- a/stress/main.rs +++ b/stress/main.rs @@ -8,7 +8,6 @@ use core::panic; use hex; use limbo::Builder; use opts::Opts; -use serde_json::json; use std::collections::HashSet; use std::fs::File; use std::io::{Read, Write}; @@ -391,12 +390,6 @@ pub fn init_tracing() -> Result { #[tokio::main] async fn main() -> Result<(), Box> { let _g = init_tracing()?; - let (num_nodes, main_id) = (1, "n-001"); - let startup_data = json!({ - "num_nodes": num_nodes, - "main_node_id": main_id, - }); - // lifecycle::setup_complete(&startup_data); antithesis_init(); let mut opts = Opts::parse(); From 31677c9c94cefc6d901fb3781c22e0957aaa4d24 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 24 Apr 2025 20:55:30 +0300 Subject: [PATCH 376/425] scripts/antithesis: Build Docker image for x86-64 --- scripts/antithesis/publish-docker.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/antithesis/publish-docker.sh b/scripts/antithesis/publish-docker.sh index 74de3981b..ee59f3367 100755 --- a/scripts/antithesis/publish-docker.sh +++ b/scripts/antithesis/publish-docker.sh @@ -18,6 +18,6 @@ fi DOCKER_IMAGE=$DOCKER_REPO_URL/$IMAGE_NAME:$DOCKER_IMAGE_VERSION -docker build -f $DOCKERFILE -t $DOCKER_IMAGE $DOCKER_BUILD_ARGS $DOCKER_DIR +docker build --platform linux/amd64 -f $DOCKERFILE -t $DOCKER_IMAGE $DOCKER_BUILD_ARGS $DOCKER_DIR docker push $DOCKER_IMAGE From fa5d6dcf6baaf10fc4aac2d00f3f7ed0746969da Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 24 Apr 2025 21:03:19 +0300 Subject: [PATCH 377/425] Fix Antithesis Docker file --- Dockerfile.antithesis | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Dockerfile.antithesis b/Dockerfile.antithesis index 7308ea97b..319dbc5f4 100644 --- a/Dockerfile.antithesis +++ b/Dockerfile.antithesis @@ -63,10 +63,7 @@ RUN if [ "$antithesis" = "true" ]; then \ FROM debian:bullseye-slim AS runtime RUN apt-get update && apt-get install -y bash curl xz-utils python3 sqlite3 bc binutils pip && rm -rf /var/lib/apt/lists/* -RUN curl --proto '=https' --tlsv1.2 -LsSf \ - https://github.com/tursodatabase/limbo/releases/latest/download/limbo_cli-installer.sh | sh -RUN bash -c "source $HOME/.limbo/env" -RUN pip install antithesis pylimbo --break-system-packages +RUN pip install antithesis pylimbo WORKDIR /app EXPOSE 8080 From 117dbe6c8ce7dbf68661def907de43e75cf7941f Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 24 Apr 2025 21:12:40 +0300 Subject: [PATCH 378/425] Fix Antithesis Docker file some more --- Dockerfile.antithesis | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile.antithesis b/Dockerfile.antithesis index 319dbc5f4..acac11f45 100644 --- a/Dockerfile.antithesis +++ b/Dockerfile.antithesis @@ -52,7 +52,7 @@ COPY --from=planner /app/vendored ./vendored/ RUN if [ "$antithesis" = "true" ]; then \ cp /opt/antithesis/libvoidstar.so /usr/lib/libvoidstar.so && \ export RUSTFLAGS="-Ccodegen-units=1 -Cpasses=sancov-module -Cllvm-args=-sanitizer-coverage-level=3 -Cllvm-args=-sanitizer-coverage-trace-pc-guard -Clink-args=-Wl,--build-id -L/usr/lib/ -lvoidstar" && \ - cargo build --bin limbo_stress --antithesis; \ + cargo build --bin limbo_stress; \ else \ cargo build --bin limbo_stress --release; \ fi From 4d0c40a4353453906775ad7856d1f069ba65feb0 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 24 Apr 2025 21:17:36 +0300 Subject: [PATCH 379/425] One more fix to Antithesis Dockerfile --- Dockerfile.antithesis | 1 - 1 file changed, 1 deletion(-) diff --git a/Dockerfile.antithesis b/Dockerfile.antithesis index acac11f45..6f10a91bd 100644 --- a/Dockerfile.antithesis +++ b/Dockerfile.antithesis @@ -69,7 +69,6 @@ WORKDIR /app EXPOSE 8080 COPY --from=builder /usr/lib/libvoidstar.so* /usr/lib/ COPY --from=builder /app/target/release/limbo_stress /bin/limbo_stress -COPY --from=builder /app/target/antithesis/limbo_stress /symbols COPY stress/docker-entrypoint.sh /bin RUN chmod +x /bin/docker-entrypoint.sh From dd7c0ad1c8cb9d3eef8af6bf1c274a0308df5c23 Mon Sep 17 00:00:00 2001 From: Anton Harniakou Date: Thu, 24 Apr 2025 22:28:00 +0300 Subject: [PATCH 380/425] Give name to hard-coded page_size values --- core/storage/sqlite3_ondisk.rs | 12 ++++++++---- core/storage/wal.rs | 1 + 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index fccf233b5..6ee192380 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -63,6 +63,8 @@ pub const DATABASE_HEADER_SIZE: usize = 100; // DEFAULT_CACHE_SIZE negative values mean that we store the amount of pages a XKiB of memory can hold. // We can calculate "real" cache size by diving by page size. const DEFAULT_CACHE_SIZE: i32 = -2000; +// The size of db page in bytes. +const DEFAULT_PAGE_SIZE: u16 = 4096; // Minimum number of pages that cache can hold. pub const MIN_PAGE_CACHE_SIZE: usize = 10; @@ -217,7 +219,7 @@ impl Default for DatabaseHeader { fn default() -> Self { Self { magic: *b"SQLite format 3\0", - page_size: 4096, + page_size: DEFAULT_PAGE_SIZE, write_version: 2, read_version: 2, reserved_space: 0, @@ -1475,6 +1477,7 @@ pub fn begin_write_wal_frame( io: &Arc, offset: usize, page: &PageRef, + page_size: u16, db_size: u32, write_counter: Rc>, wal_header: &WalHeader, @@ -1511,15 +1514,16 @@ pub fn begin_write_wal_frame( let content_len = contents_buf.len(); buf[WAL_FRAME_HEADER_SIZE..WAL_FRAME_HEADER_SIZE + content_len] .copy_from_slice(contents_buf); - if content_len < 4096 { - buf[WAL_FRAME_HEADER_SIZE + content_len..WAL_FRAME_HEADER_SIZE + 4096].fill(0); + if content_len < page_size as usize { + buf[WAL_FRAME_HEADER_SIZE + content_len..WAL_FRAME_HEADER_SIZE + page_size as usize] + .fill(0); } let expects_be = wal_header.magic & 1; let use_native_endian = cfg!(target_endian = "big") as u32 == expects_be; let header_checksum = checksum_wal(&buf[0..8], wal_header, checksums, use_native_endian); // Only 8 bytes let final_checksum = checksum_wal( - &buf[WAL_FRAME_HEADER_SIZE..WAL_FRAME_HEADER_SIZE + 4096], + &buf[WAL_FRAME_HEADER_SIZE..WAL_FRAME_HEADER_SIZE + page_size as usize], wal_header, header_checksum, use_native_endian, diff --git a/core/storage/wal.rs b/core/storage/wal.rs index 2d1f17776..e332d3108 100644 --- a/core/storage/wal.rs +++ b/core/storage/wal.rs @@ -462,6 +462,7 @@ impl Wal for WalFile { &shared.file, offset, &page, + self.page_size as u16, db_size, write_counter, &header, From f464d15f8b048b0dcc83d7a04c9d9e97838f3abe Mon Sep 17 00:00:00 2001 From: meteorgan Date: Fri, 25 Apr 2025 21:45:18 +0800 Subject: [PATCH 381/425] refactor database open_file and open --- bindings/javascript/src/lib.rs | 17 +++-------------- bindings/wasm/lib.rs | 16 ++-------------- core/lib.rs | 26 ++++++++++++-------------- 3 files changed, 17 insertions(+), 42 deletions(-) diff --git a/bindings/javascript/src/lib.rs b/bindings/javascript/src/lib.rs index 2e0054358..aa18b208c 100644 --- a/bindings/javascript/src/lib.rs +++ b/bindings/javascript/src/lib.rs @@ -4,7 +4,7 @@ use std::cell::RefCell; use std::rc::Rc; use std::sync::Arc; -use limbo_core::{Clock, Instant}; +use limbo_core::{maybe_init_database_file, Clock, Instant}; use napi::{Env, JsUnknown, Result as NapiResult}; use napi_derive::napi; @@ -29,20 +29,9 @@ impl Database { let file = io .open_file(&path, limbo_core::OpenFlags::Create, false) .unwrap(); - limbo_core::maybe_init_database_file(&file, &io).unwrap(); + maybe_init_database_file(&file, &io).unwrap(); let db_file = Arc::new(DatabaseFile::new(file)); - let db_header = limbo_core::Pager::begin_open(db_file.clone()).unwrap(); - - // ensure db header is there - io.run_once().unwrap(); - - let page_size = db_header.lock().page_size; - - let wal_path = format!("{}-wal", path); - let wal_shared = - limbo_core::WalFileShared::open_shared(&io, wal_path.as_str(), page_size).unwrap(); - - let db = limbo_core::Database::open(io, db_file, wal_shared, false).unwrap(); + let db = limbo_core::Database::open(io, &path, db_file, false).unwrap(); let conn = db.connect().unwrap(); Self { memory, diff --git a/bindings/wasm/lib.rs b/bindings/wasm/lib.rs index a704706be..95984ebcf 100644 --- a/bindings/wasm/lib.rs +++ b/bindings/wasm/lib.rs @@ -19,22 +19,10 @@ impl Database { #[wasm_bindgen(constructor)] pub fn new(path: &str) -> Database { let io: Arc = Arc::new(PlatformIO { vfs: VFS::new() }); - let file = io - .open_file(path, limbo_core::OpenFlags::Create, false) - .unwrap(); + let file = io.open_file(path, OpenFlags::Create, false).unwrap(); maybe_init_database_file(&file, &io).unwrap(); let db_file = Arc::new(DatabaseFile::new(file)); - let db_header = Pager::begin_open(db_file.clone()).unwrap(); - - // ensure db header is there - io.run_once().unwrap(); - - let page_size = db_header.lock().page_size; - - let wal_path = format!("{}-wal", path); - let wal_shared = WalFileShared::open_shared(&io, wal_path.as_str(), page_size).unwrap(); - - let db = limbo_core::Database::open(io, db_file, wal_shared, false).unwrap(); + let db = limbo_core::Database::open(io, path, db_file, false).unwrap(); let conn = db.connect().unwrap(); Database { db, conn } } diff --git a/core/lib.rs b/core/lib.rs index 9d5508e2d..aa25f3c94 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -109,48 +109,46 @@ unsafe impl Sync for Database {} impl Database { #[cfg(feature = "fs")] pub fn open_file(io: Arc, path: &str, enable_mvcc: bool) -> Result> { - use storage::wal::WalFileShared; - let file = io.open_file(path, OpenFlags::Create, true)?; maybe_init_database_file(&file, &io)?; let db_file = Arc::new(DatabaseFile::new(file)); - let wal_path = format!("{}-wal", path); - let db_header = Pager::begin_open(db_file.clone())?; - io.run_once()?; - let page_size = db_header.lock().page_size; - let wal_shared = WalFileShared::open_shared(&io, wal_path.as_str(), page_size)?; - Self::open(io, db_file, wal_shared, enable_mvcc) + Self::open(io, path, db_file, enable_mvcc) } #[allow(clippy::arc_with_non_send_sync)] pub fn open( io: Arc, + path: &str, db_file: Arc, - shared_wal: Arc>, enable_mvcc: bool, ) -> Result> { let db_header = Pager::begin_open(db_file.clone())?; io.run_once()?; + + let page_size = db_header.lock().page_size; + let wal_path = format!("{}-wal", path); + let shared_wal = WalFileShared::open_shared(&io, wal_path.as_str(), page_size)?; + DATABASE_VERSION.get_or_init(|| { let version = db_header.lock().version_number; version.to_string() }); + let mv_store = if enable_mvcc { Some(Rc::new(MvStore::new( - crate::mvcc::LocalClock::new(), - crate::mvcc::persistent_storage::Storage::new_noop(), + mvcc::LocalClock::new(), + mvcc::persistent_storage::Storage::new_noop(), ))) } else { None }; + let shared_page_cache = Arc::new(RwLock::new(DumbLruPageCache::new(10))); - let page_size = db_header.lock().page_size; - let header = db_header; let schema = Arc::new(RwLock::new(Schema::new())); let db = Database { mv_store, schema: schema.clone(), - header: header.clone(), + header: db_header.clone(), shared_page_cache: shared_page_cache.clone(), shared_wal: shared_wal.clone(), db_file, From 0202fa3ed0f4ca2668a1faea20ae061f393e4f3d Mon Sep 17 00:00:00 2001 From: meteorgan Date: Fri, 25 Apr 2025 21:57:35 +0800 Subject: [PATCH 382/425] add back one comment --- core/lib.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/core/lib.rs b/core/lib.rs index aa25f3c94..9b36a184a 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -85,9 +85,9 @@ enum TransactionState { None, } -pub(crate) type MvStore = crate::mvcc::MvStore; +pub(crate) type MvStore = mvcc::MvStore; -pub(crate) type MvCursor = crate::mvcc::cursor::ScanCursor; +pub(crate) type MvCursor = mvcc::cursor::ScanCursor; pub struct Database { mv_store: Option>, @@ -123,6 +123,7 @@ impl Database { enable_mvcc: bool, ) -> Result> { let db_header = Pager::begin_open(db_file.clone())?; + // ensure db header is there io.run_once()?; let page_size = db_header.lock().page_size; @@ -216,7 +217,7 @@ impl Database { #[cfg(feature = "fs")] #[allow(clippy::arc_with_non_send_sync)] pub fn open_new(path: &str, vfs: &str) -> Result<(Arc, Arc)> { - let vfsmods = crate::ext::add_builtin_vfs_extensions(None)?; + let vfsmods = ext::add_builtin_vfs_extensions(None)?; let io: Arc = match vfsmods.iter().find(|v| v.0 == vfs).map(|v| v.1.clone()) { Some(vfs) => vfs, None => match vfs.trim() { From 6a860e75b8ee4cc9a1d398adb226f17dadc1abbe Mon Sep 17 00:00:00 2001 From: meteorgan Date: Fri, 25 Apr 2025 22:06:44 +0800 Subject: [PATCH 383/425] fix cargo clippy --- bindings/wasm/lib.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/bindings/wasm/lib.rs b/bindings/wasm/lib.rs index 95984ebcf..02f9a2e35 100644 --- a/bindings/wasm/lib.rs +++ b/bindings/wasm/lib.rs @@ -1,7 +1,5 @@ use js_sys::{Array, Object}; -use limbo_core::{ - maybe_init_database_file, Clock, Instant, OpenFlags, Pager, Result, WalFileShared, -}; +use limbo_core::{maybe_init_database_file, Clock, Instant, OpenFlags, Result}; use std::cell::RefCell; use std::rc::Rc; use std::sync::Arc; From bde2d4f0a37806c17a171d26f1e5dba89034124c Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Sat, 26 Apr 2025 09:14:24 +0300 Subject: [PATCH 384/425] Fix Antithesis docker-compose.yaml --- stress/docker-compose.yaml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/stress/docker-compose.yaml b/stress/docker-compose.yaml index c6e081aac..38c77ec2f 100644 --- a/stress/docker-compose.yaml +++ b/stress/docker-compose.yaml @@ -1,7 +1,6 @@ services: limbo: - container_name: limbo - hostname: limbo - image: limbo:latest - entrypoint: ["/bin/docker-entrypoint.sh"] - command: ["sleep", "infinity"] \ No newline at end of file + image: us-central1-docker.pkg.dev/molten-verve-216720/turso-repository/limbo-workload:antithesis-latest + environment: + SANDBOX: "composed" + command: ["sleep", "infinity"] From a7537be2b6dd9e283d93f544ba3874ec0c70508a Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Sat, 26 Apr 2025 09:38:03 +0300 Subject: [PATCH 385/425] antithesis-tests: Fix accounts to be at least one --- antithesis-tests/bank-test/first_setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/antithesis-tests/bank-test/first_setup.py b/antithesis-tests/bank-test/first_setup.py index df833b96e..86580315d 100755 --- a/antithesis-tests/bank-test/first_setup.py +++ b/antithesis-tests/bank-test/first_setup.py @@ -20,7 +20,7 @@ cur.execute(f''' # randomly create up to 100 accounts with a balance up to 1e9 total = 0 -num_accts = get_random() % 100 +num_accts = get_random() % 100 + 1 for i in range(num_accts): bal = get_random() % 1e9 total += bal From e8bc3086f2fa7d8654ae399c848485a91006a0a4 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Sat, 26 Apr 2025 10:53:31 +0300 Subject: [PATCH 386/425] antithesis-tests: Fix generate_random_value() on older Python versions --- antithesis-tests/stress-composer/utils.py | 27 +++++++++++------------ 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/antithesis-tests/stress-composer/utils.py b/antithesis-tests/stress-composer/utils.py index f99052bf3..358e44670 100755 --- a/antithesis-tests/stress-composer/utils.py +++ b/antithesis-tests/stress-composer/utils.py @@ -4,17 +4,16 @@ from antithesis.random import get_random, random_choice def generate_random_identifier(type: str, num: int): return ''.join(type, '_', get_random() % num) -def generate_random_value(type: str): - match type: - case 'INTEGER': - return str(get_random() % 100) - case 'REAL': - return '{:.2f}'.format(get_random() % 100 / 100.0) - case 'TEXT': - return f"'{''.join(random_choice(string.ascii_lowercase) for _ in range(5))}'" - case 'BLOB': - return f"x'{''.join(random_choice(string.ascii_lowercase) for _ in range(5)).encode().hex()}'" - case 'NUMERIC': - return str(get_random() % 100) - case _: - return NULL \ No newline at end of file +def generate_random_value(type_str): + if type_str == 'INTEGER': + return str(get_random() % 100) + elif type_str == 'REAL': + return '{:.2f}'.format(get_random() % 100 / 100.0) + elif type_str == 'TEXT': + return f"'{''.join(random_choice(string.ascii_lowercase) for _ in range(5))}'" + elif type_str == 'BLOB': + return f"x'{''.join(random_choice(string.ascii_lowercase) for _ in range(5)).encode().hex()}'" + elif type_str == 'NUMERIC': + return str(get_random() % 100) + else: + return NULL From 23ae5a27c473b142e45cea1966d72af18f35f2a8 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sat, 26 Apr 2025 11:32:24 +0300 Subject: [PATCH 387/425] btree/tablebtree_move_to: micro-optimizations --- core/storage/btree.rs | 29 +++++++---------------------- core/storage/sqlite3_ondisk.rs | 17 +++++++++++++++++ core/types.rs | 1 + 3 files changed, 25 insertions(+), 22 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index d191227a4..02c60363d 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1043,18 +1043,8 @@ impl BTreeCursor { loop { if min > max { if let Some(leftmost_matching_cell) = leftmost_matching_cell { - self.stack.set_cell_index(leftmost_matching_cell as i32); - let matching_cell = contents.cell_get( - leftmost_matching_cell, - payload_overflow_threshold_max( - contents.page_type(), - self.usable_space() as u16, - ), - payload_overflow_threshold_min( - contents.page_type(), - self.usable_space() as u16, - ), - self.usable_space(), + let left_child_page = contents.cell_table_interior_read_left_child_page( + leftmost_matching_cell as usize, )?; // If we found our target rowid in the left subtree, // we need to move the parent cell pointer forwards or backwards depending on the iteration direction. @@ -1064,15 +1054,11 @@ impl BTreeCursor { // this parent: rowid 666 // left child has: 664,665,666 // we need to move to the previous parent (with e.g. rowid 663) when iterating backwards. - self.stack.next_cell_in_direction(iter_dir); - let BTreeCell::TableInteriorCell(TableInteriorCell { - _left_child_page, - .. - }) = matching_cell - else { - unreachable!("unexpected cell type: {:?}", matching_cell); - }; - let mem_page = self.pager.read_page(_left_child_page as usize)?; + let index_change = + -1 + (iter_dir == IterationDirection::Forwards) as i32 * 2; + self.stack + .set_cell_index(leftmost_matching_cell as i32 + index_change); + let mem_page = self.pager.read_page(left_child_page as usize)?; self.stack.push(mem_page); continue 'outer; } @@ -1089,7 +1075,6 @@ impl BTreeCursor { } } let cur_cell_idx = (min + max) / 2; - self.stack.set_cell_index(cur_cell_idx as i32); let cell_rowid = contents.cell_table_interior_read_rowid(cur_cell_idx as usize)?; // in sqlite btrees left child pages have <= keys. // table btrees can have a duplicate rowid in the interior cell, so for example if we are looking for rowid=10, diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index 8e091ef64..315d01460 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -617,6 +617,23 @@ impl PageContent { Ok(rowid) } + /// Read the left child page of a table interior cell. + #[inline(always)] + pub fn cell_table_interior_read_left_child_page(&self, idx: usize) -> Result { + assert!(self.page_type() == PageType::TableInterior); + let buf = self.as_ptr(); + const INTERIOR_PAGE_HEADER_SIZE_BYTES: usize = 12; + let cell_pointer_array_start = INTERIOR_PAGE_HEADER_SIZE_BYTES; + let cell_pointer = cell_pointer_array_start + (idx * 2); + let cell_pointer = self.read_u16(cell_pointer) as usize; + Ok(u32::from_be_bytes([ + buf[cell_pointer], + buf[cell_pointer + 1], + buf[cell_pointer + 2], + buf[cell_pointer + 3], + ])) + } + /// Read the rowid of a table leaf cell. #[inline(always)] pub fn cell_table_leaf_read_rowid(&self, idx: usize) -> Result { diff --git a/core/types.rs b/core/types.rs index b2a7a053b..5d86f97f3 100644 --- a/core/types.rs +++ b/core/types.rs @@ -1402,6 +1402,7 @@ impl SeekOp { /// A seek with SeekOp::LE implies: /// Find the last table/index key that compares less than or equal to the seek key /// -> used in backwards iteration. + #[inline(always)] pub fn iteration_direction(&self) -> IterationDirection { match self { SeekOp::EQ | SeekOp::GE | SeekOp::GT => IterationDirection::Forwards, From 6d3c63fb012605f2eddfcbb752bad8b224589c2c Mon Sep 17 00:00:00 2001 From: Anton Harniakou Date: Sat, 26 Apr 2025 12:04:37 +0300 Subject: [PATCH 388/425] Add the .indexes command --- cli/app.rs | 54 ++++++++++++++++++++++++++++++++++++++++++++ cli/commands/args.rs | 6 +++++ cli/commands/mod.rs | 7 ++++-- cli/input.rs | 2 ++ 4 files changed, 67 insertions(+), 2 deletions(-) diff --git a/cli/app.rs b/cli/app.rs index 8212ce8c7..196face0a 100644 --- a/cli/app.rs +++ b/cli/app.rs @@ -634,6 +634,11 @@ impl<'a> Limbo<'a> { let _ = self.writeln(v); }); } + Command::ListIndexes(args) => { + if let Err(e) = self.display_indexes(args.tbl_name) { + let _ = self.writeln(e.to_string()); + } + } Command::Timer(timer_mode) => { self.opts.timer = match timer_mode.mode { TimerMode::On => true, @@ -909,6 +914,55 @@ impl<'a> Limbo<'a> { Ok(()) } + fn display_indexes(&mut self, maybe_table: Option) -> anyhow::Result<()> { + let sql = match maybe_table { + Some(ref tbl_name) => format!( + "SELECT name FROM sqlite_schema WHERE type='index' AND tbl_name = '{}' ORDER BY 1", + tbl_name + ), + None => String::from("SELECT name FROM sqlite_schema WHERE type='index' ORDER BY 1"), + }; + + match self.conn.query(&sql) { + Ok(Some(ref mut rows)) => { + let mut indexes = String::new(); + loop { + match rows.step()? { + StepResult::Row => { + let row = rows.row().unwrap(); + if let Ok(OwnedValue::Text(idx)) = row.get::<&OwnedValue>(0) { + indexes.push_str(idx.as_str()); + indexes.push(' '); + } + } + StepResult::IO => { + self.io.run_once()?; + } + StepResult::Interrupt => break, + StepResult::Done => break, + StepResult::Busy => { + let _ = self.writeln("database is busy"); + break; + } + } + } + if !indexes.is_empty() { + let _ = self.writeln(indexes.trim_end()); + } + } + Err(err) => { + if err.to_string().contains("no such table: sqlite_schema") { + return Err(anyhow::anyhow!("Unable to access database schema. The database may be using an older SQLite version or may not be properly initialized.")); + } else { + return Err(anyhow::anyhow!("Error querying schema: {}", err)); + } + } + Ok(None) => {} + } + + Ok(()) + } + fn display_tables(&mut self, pattern: Option<&str>) -> anyhow::Result<()> { let sql = match pattern { Some(pattern) => format!( diff --git a/cli/commands/args.rs b/cli/commands/args.rs index 750895049..4c36e6ef6 100644 --- a/cli/commands/args.rs +++ b/cli/commands/args.rs @@ -3,6 +3,12 @@ use clap_complete::{ArgValueCompleter, CompletionCandidate, PathCompleter}; use crate::{input::OutputMode, opcodes_dictionary::OPCODE_DESCRIPTIONS}; +#[derive(Debug, Clone, Args)] +pub struct IndexesArgs { + /// Name of table + pub tbl_name: Option, +} + #[derive(Debug, Clone, Args)] pub struct ExitArgs { /// Exit code diff --git a/cli/commands/mod.rs b/cli/commands/mod.rs index e01828517..bd94c6051 100644 --- a/cli/commands/mod.rs +++ b/cli/commands/mod.rs @@ -2,8 +2,8 @@ pub mod args; pub mod import; use args::{ - CwdArgs, EchoArgs, ExitArgs, LoadExtensionArgs, NullValueArgs, OpcodesArgs, OpenArgs, - OutputModeArgs, SchemaArgs, SetOutputArgs, TablesArgs, TimerArgs, + CwdArgs, EchoArgs, ExitArgs, IndexesArgs, LoadExtensionArgs, NullValueArgs, OpcodesArgs, + OpenArgs, OutputModeArgs, SchemaArgs, SetOutputArgs, TablesArgs, TimerArgs, }; use clap::Parser; use import::ImportArgs; @@ -72,6 +72,9 @@ pub enum Command { /// List vfs modules available #[command(name = "vfslist", display_name = ".vfslist")] ListVfs, + /// Show names of indexes + #[command(name = "indexes", display_name = ".indexes")] + ListIndexes(IndexesArgs), #[command(name = "timer", display_name = ".timer")] Timer(TimerArgs), } diff --git a/cli/input.rs b/cli/input.rs index eac5312dc..e20d5a71a 100644 --- a/cli/input.rs +++ b/cli/input.rs @@ -218,6 +218,8 @@ pub const AFTER_HELP_MSG: &str = r#"Usage Examples: 13. To list all available VFS: .listvfs +14. To show names of indexes: + .indexes ?TABLE? Note: - All SQL commands must end with a semicolon (;). From 5060f1a1fa937aa712accfd44d9aa8f463373129 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sat, 26 Apr 2025 12:12:09 +0300 Subject: [PATCH 389/425] don't use integer division in binary search halfpoint calculation --- core/storage/btree.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 02c60363d..d42d5f95a 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1074,7 +1074,7 @@ impl BTreeCursor { } } } - let cur_cell_idx = (min + max) / 2; + let cur_cell_idx = (min + max) >> 1; // rustc generates extra insns for (min+max)/2 due to them being isize. we know min&max are >=0 here. let cell_rowid = contents.cell_table_interior_read_rowid(cur_cell_idx as usize)?; // in sqlite btrees left child pages have <= keys. // table btrees can have a duplicate rowid in the interior cell, so for example if we are looking for rowid=10, @@ -1186,7 +1186,7 @@ impl BTreeCursor { continue 'outer; } - let cur_cell_idx = (min + max) / 2; + let cur_cell_idx = (min + max) >> 1; // rustc generates extra insns for (min+max)/2 due to them being isize. we know min&max are >=0 here. self.stack.set_cell_index(cur_cell_idx as i32); let cell = contents.cell_get( cur_cell_idx as usize, @@ -1340,7 +1340,7 @@ impl BTreeCursor { return Ok(CursorResult::Ok(Some(cell_rowid))); } - let cur_cell_idx = (min + max) / 2; + let cur_cell_idx = (min + max) >> 1; // rustc generates extra insns for (min+max)/2 due to them being isize. we know min&max are >=0 here. self.stack.set_cell_index(cur_cell_idx as i32); let cell_rowid = contents.cell_table_leaf_read_rowid(cur_cell_idx as usize)?; @@ -1509,7 +1509,7 @@ impl BTreeCursor { return Ok(CursorResult::Ok(Some(rowid))); } - let cur_cell_idx = (min + max) / 2; + let cur_cell_idx = (min + max) >> 1; // rustc generates extra insns for (min+max)/2 due to them being isize. we know min&max are >=0 here. self.stack.set_cell_index(cur_cell_idx as i32); let cell = contents.cell_get( From e46c01928c25f4c61c869d2da9b1bd0ff8759de5 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Sat, 26 Apr 2025 12:59:19 +0300 Subject: [PATCH 390/425] antithesis: Enable Rust backtraces again --- Dockerfile.antithesis | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Dockerfile.antithesis b/Dockerfile.antithesis index 6f10a91bd..b3ce828d5 100644 --- a/Dockerfile.antithesis +++ b/Dockerfile.antithesis @@ -80,4 +80,6 @@ RUN chmod 777 -R /opt/antithesis/test/v1 RUN mkdir /opt/antithesis/catalog RUN ln -s /opt/antithesis/test/v1/bank-test/*.py /opt/antithesis/catalog -ENTRYPOINT ["/bin/docker-entrypoint.sh"] \ No newline at end of file +ENV RUST_BACKTRACE=1 + +ENTRYPOINT ["/bin/docker-entrypoint.sh"] From ac1bc17ea45f918e0cc37683cc11da85cc0fce1b Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sat, 26 Apr 2025 13:41:30 +0300 Subject: [PATCH 391/425] btree/tablebtree_seek: remove some more useless calls to set_cell_index() --- core/storage/btree.rs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index d42d5f95a..46c6ef9f2 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1306,7 +1306,6 @@ impl BTreeCursor { let Some(nearest_matching_cell) = nearest_matching_cell else { return Ok(CursorResult::Ok(None)); }; - self.stack.set_cell_index(nearest_matching_cell as i32); let matching_cell = contents.cell_get( nearest_matching_cell, payload_overflow_threshold_max( @@ -1335,13 +1334,16 @@ impl BTreeCursor { first_overflow_page, payload_size )); - self.stack.next_cell_in_direction(iter_dir); - + let cell_idx = if iter_dir == IterationDirection::Forwards { + nearest_matching_cell as i32 + 1 + } else { + nearest_matching_cell as i32 - 1 + }; + self.stack.set_cell_index(cell_idx as i32); return Ok(CursorResult::Ok(Some(cell_rowid))); } let cur_cell_idx = (min + max) >> 1; // rustc generates extra insns for (min+max)/2 due to them being isize. we know min&max are >=0 here. - self.stack.set_cell_index(cur_cell_idx as i32); let cell_rowid = contents.cell_table_leaf_read_rowid(cur_cell_idx as usize)?; let cmp = cell_rowid.cmp(&rowid); @@ -1383,7 +1385,12 @@ impl BTreeCursor { first_overflow_page, payload_size )); - self.stack.next_cell_in_direction(iter_dir); + let cell_idx = if iter_dir == IterationDirection::Forwards { + cur_cell_idx + 1 + } else { + cur_cell_idx - 1 + }; + self.stack.set_cell_index(cell_idx as i32); return Ok(CursorResult::Ok(Some(cell_rowid))); } From 75c6678a06a57029c0de80521ad35d8c03094cd1 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sat, 26 Apr 2025 14:47:45 +0300 Subject: [PATCH 392/425] sqlite3_ondisk: use debug asserts for cell_table_interior_read... funcs --- core/storage/sqlite3_ondisk.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index 315d01460..200dd5490 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -606,7 +606,7 @@ impl PageContent { /// Read the rowid of a table interior cell. #[inline(always)] pub fn cell_table_interior_read_rowid(&self, idx: usize) -> Result { - assert!(self.page_type() == PageType::TableInterior); + debug_assert!(self.page_type() == PageType::TableInterior); let buf = self.as_ptr(); const INTERIOR_PAGE_HEADER_SIZE_BYTES: usize = 12; let cell_pointer_array_start = INTERIOR_PAGE_HEADER_SIZE_BYTES; @@ -620,7 +620,7 @@ impl PageContent { /// Read the left child page of a table interior cell. #[inline(always)] pub fn cell_table_interior_read_left_child_page(&self, idx: usize) -> Result { - assert!(self.page_type() == PageType::TableInterior); + debug_assert!(self.page_type() == PageType::TableInterior); let buf = self.as_ptr(); const INTERIOR_PAGE_HEADER_SIZE_BYTES: usize = 12; let cell_pointer_array_start = INTERIOR_PAGE_HEADER_SIZE_BYTES; @@ -637,7 +637,7 @@ impl PageContent { /// Read the rowid of a table leaf cell. #[inline(always)] pub fn cell_table_leaf_read_rowid(&self, idx: usize) -> Result { - assert!(self.page_type() == PageType::TableLeaf); + debug_assert!(self.page_type() == PageType::TableLeaf); let buf = self.as_ptr(); const LEAF_PAGE_HEADER_SIZE_BYTES: usize = 8; let cell_pointer_array_start = LEAF_PAGE_HEADER_SIZE_BYTES; From 46d45a6bf49f1d31bc9c244569b5f9583ca11fff Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sat, 26 Apr 2025 14:47:56 +0300 Subject: [PATCH 393/425] don't recompute cell_count --- core/storage/btree.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 46c6ef9f2..bc3e450c8 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1062,7 +1062,7 @@ impl BTreeCursor { self.stack.push(mem_page); continue 'outer; } - self.stack.set_cell_index(contents.cell_count() as i32 + 1); + self.stack.set_cell_index(cell_count as i32 + 1); match contents.rightmost_pointer() { Some(right_most_pointer) => { let mem_page = self.pager.read_page(right_most_pointer as usize)?; From f3f09a5b7b6917f6abfeadefd1efb1f23c7c1f8a Mon Sep 17 00:00:00 2001 From: meteorgan Date: Sat, 26 Apr 2025 00:00:29 +0800 Subject: [PATCH 394/425] Fix pragma page_count --- core/vdbe/execute.rs | 9 ++------- testing/pragma.test | 2 +- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 928f7f94a..18f3ecf4f 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -4229,13 +4229,8 @@ pub fn op_page_count( // TODO: implement temp databases todo!("temp databases not implemented yet"); } - // SQLite returns "0" on an empty database, and 2 on the first insertion, - // so we'll mimic that behavior. - let mut pages = pager.db_header.lock().database_size.into(); - if pages == 1 { - pages = 0; - } - state.registers[*dest] = Register::OwnedValue(OwnedValue::Integer(pages)); + let count = pager.db_header.lock().database_size.into(); + state.registers[*dest] = Register::OwnedValue(OwnedValue::Integer(count)); state.pc += 1; Ok(InsnFunctionStepResult::Step) } diff --git a/testing/pragma.test b/testing/pragma.test index c478c032c..29a460c65 100755 --- a/testing/pragma.test +++ b/testing/pragma.test @@ -35,7 +35,7 @@ do_execsql_test pragma-table-info-invalid-table { do_execsql_test_on_specific_db ":memory:" pragma-page-count-empty { PRAGMA page_count -} {0} +} {1} do_execsql_test_on_specific_db ":memory:" pragma-page-count-table { CREATE TABLE foo(bar); From eabe5e1631a1d4ddeeeaeb463e9bf970330946ef Mon Sep 17 00:00:00 2001 From: meteorgan Date: Sat, 26 Apr 2025 00:42:18 +0800 Subject: [PATCH 395/425] temporarily comment the pragma-page-count-empty test case --- testing/pragma.test | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/testing/pragma.test b/testing/pragma.test index 29a460c65..4d56e06ab 100755 --- a/testing/pragma.test +++ b/testing/pragma.test @@ -33,9 +33,10 @@ do_execsql_test pragma-table-info-invalid-table { PRAGMA table_info=pekka } {} -do_execsql_test_on_specific_db ":memory:" pragma-page-count-empty { - PRAGMA page_count -} {1} +# temporarily skip this test case. The issue is detailed in #1407 +#do_execsql_test_on_specific_db ":memory:" pragma-page-count-empty { +# PRAGMA page_count +#} {0} do_execsql_test_on_specific_db ":memory:" pragma-page-count-table { CREATE TABLE foo(bar); From 29d463aa89c9cf633bf026741ab36818fad5d616 Mon Sep 17 00:00:00 2001 From: Peter Hayman Date: Mon, 28 Apr 2025 00:22:39 +1000 Subject: [PATCH 396/425] implement Clone for Arc types --- bindings/rust/src/lib.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/bindings/rust/src/lib.rs b/bindings/rust/src/lib.rs index 8c57e7909..783012c5d 100644 --- a/bindings/rust/src/lib.rs +++ b/bindings/rust/src/lib.rs @@ -129,6 +129,14 @@ pub struct Statement { inner: Arc>, } +impl Clone for Statement { + fn clone(&self) -> Self { + Self { + inner: Arc::clone(&self.inner), + } + } +} + unsafe impl Send for Statement {} unsafe impl Sync for Statement {} @@ -241,6 +249,14 @@ pub struct Rows { inner: Arc>, } +impl Clone for Rows { + fn clone(&self) -> Self { + Self { + inner: Arc::clone(&self.inner), + } + } +} + unsafe impl Send for Rows {} unsafe impl Sync for Rows {} From 33d230771f52bf21502c1a51dda849ce5a2fc10e Mon Sep 17 00:00:00 2001 From: Piotr Rzysko Date: Mon, 28 Apr 2025 08:35:04 +0200 Subject: [PATCH 397/425] Save history on exit --- cli/app.rs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/cli/app.rs b/cli/app.rs index d214d139f..82fa34b63 100644 --- a/cli/app.rs +++ b/cli/app.rs @@ -558,11 +558,13 @@ impl Limbo { } Ok(cmd) => match cmd.command { Command::Exit(args) => { + self.save_history(); std::process::exit(args.code); } Command::Quit => { let _ = self.writeln("Exiting Limbo SQL Shell."); let _ = self.close_conn(); + self.save_history(); std::process::exit(0) } Command::Open(args) => { @@ -1008,12 +1010,16 @@ impl Limbo { Ok(input) } } -} -impl Drop for Limbo { - fn drop(&mut self) { + fn save_history(&mut self) { if let Some(rl) = &mut self.rl { let _ = rl.save_history(HISTORY_FILE.as_path()); } } } + +impl Drop for Limbo { + fn drop(&mut self) { + self.save_history() + } +} From d2dce740f78e8876504bd1dbc9c9e49f54047c6d Mon Sep 17 00:00:00 2001 From: meteorgan Date: Sat, 26 Apr 2025 20:23:26 +0800 Subject: [PATCH 398/425] fix some issues about page_size --- core/lib.rs | 10 ++++----- core/storage/btree.rs | 12 +++++------ core/storage/pager.rs | 4 ++-- core/storage/sqlite3_ondisk.rs | 38 ++++++++++++++++++++++++++++++---- core/storage/wal.rs | 10 ++++----- core/translate/pragma.rs | 2 +- core/vdbe/execute.rs | 2 +- 7 files changed, 54 insertions(+), 24 deletions(-) diff --git a/core/lib.rs b/core/lib.rs index 9b36a184a..ddf741ffb 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -96,7 +96,7 @@ pub struct Database { header: Arc>, db_file: Arc, io: Arc, - page_size: u16, + page_size: u32, // Shared structures of a Database are the parts that are common to multiple threads that might // create DB connections. shared_page_cache: Arc>, @@ -126,7 +126,7 @@ impl Database { // ensure db header is there io.run_once()?; - let page_size = db_header.lock().page_size; + let page_size = db_header.lock().get_page_size(); let wal_path = format!("{}-wal", path); let shared_wal = WalFileShared::open_shared(&io, wal_path.as_str(), page_size)?; @@ -181,7 +181,7 @@ impl Database { let wal = Rc::new(RefCell::new(WalFile::new( self.io.clone(), - self.page_size as usize, + self.page_size, self.shared_wal.clone(), buffer_pool.clone(), ))); @@ -244,7 +244,7 @@ pub fn maybe_init_database_file(file: &Arc, io: &Arc) -> Resul let db_header = DatabaseHeader::default(); let page1 = allocate_page( 1, - &Rc::new(BufferPool::new(db_header.page_size as usize)), + &Rc::new(BufferPool::new(db_header.get_page_size() as usize)), DATABASE_HEADER_SIZE, ); { @@ -256,7 +256,7 @@ pub fn maybe_init_database_file(file: &Arc, io: &Arc) -> Resul &page1, storage::sqlite3_ondisk::PageType::TableLeaf, DATABASE_HEADER_SIZE, - db_header.page_size - db_header.reserved_space as u16, + (db_header.get_page_size() - db_header.reserved_space as u32) as u16, ); let contents = page1.get().contents.as_mut().unwrap(); diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 36b39eb15..d47de3bd0 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -5537,15 +5537,15 @@ mod tests { fn empty_btree() -> (Rc, usize) { let db_header = DatabaseHeader::default(); - let page_size = db_header.page_size as usize; + let page_size = db_header.get_page_size(); #[allow(clippy::arc_with_non_send_sync)] let io: Arc = Arc::new(MemoryIO::new()); let io_file = io.open_file("test.db", OpenFlags::Create, false).unwrap(); let db_file = Arc::new(DatabaseFile::new(io_file)); - let buffer_pool = Rc::new(BufferPool::new(db_header.page_size as usize)); - let wal_shared = WalFileShared::open_shared(&io, "test.wal", db_header.page_size).unwrap(); + let buffer_pool = Rc::new(BufferPool::new(page_size as usize)); + let wal_shared = WalFileShared::open_shared(&io, "test.wal", page_size as u32).unwrap(); let wal_file = WalFile::new(io.clone(), page_size, wal_shared, buffer_pool.clone()); let wal = Rc::new(RefCell::new(wal_file)); @@ -5908,7 +5908,7 @@ mod tests { fn setup_test_env(database_size: u32) -> (Rc, Arc>) { let page_size = 512; let mut db_header = DatabaseHeader::default(); - db_header.page_size = page_size; + db_header.update_page_size(page_size); db_header.database_size = database_size; let db_header = Arc::new(SpinLock::new(db_header)); @@ -5940,7 +5940,7 @@ mod tests { let wal_shared = WalFileShared::open_shared(&io, "test.wal", page_size).unwrap(); let wal = Rc::new(RefCell::new(WalFile::new( io.clone(), - page_size as usize, + page_size, wal_shared, buffer_pool.clone(), ))); @@ -5980,7 +5980,7 @@ mod tests { let drop_fn = Rc::new(|_buf| {}); #[allow(clippy::arc_with_non_send_sync)] let buf = Arc::new(RefCell::new(Buffer::allocate( - db_header.lock().page_size as usize, + db_header.lock().get_page_size() as usize, drop_fn, ))); let write_complete = Box::new(|_| {}); diff --git a/core/storage/pager.rs b/core/storage/pager.rs index 02aff9164..36a0936a3 100644 --- a/core/storage/pager.rs +++ b/core/storage/pager.rs @@ -257,7 +257,7 @@ impl Pager { /// In other words, if the page size is 512, then the reserved space size cannot exceed 32. pub fn usable_space(&self) -> usize { let db_header = self.db_header.lock(); - (db_header.page_size - db_header.reserved_space as u16) as usize + (db_header.get_page_size() - db_header.reserved_space as u32) as usize } #[inline(always)] @@ -685,7 +685,7 @@ impl Pager { pub fn usable_size(&self) -> usize { let db_header = self.db_header.lock(); - (db_header.page_size - db_header.reserved_space as u16) as usize + (db_header.get_page_size() - db_header.reserved_space as u32) as usize } } diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index 200dd5490..637a8bd1e 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -65,11 +65,19 @@ pub const DATABASE_HEADER_SIZE: usize = 100; // DEFAULT_CACHE_SIZE negative values mean that we store the amount of pages a XKiB of memory can hold. // We can calculate "real" cache size by diving by page size. const DEFAULT_CACHE_SIZE: i32 = -2000; -// The size of db page in bytes. -const DEFAULT_PAGE_SIZE: u16 = 4096; + // Minimum number of pages that cache can hold. pub const MIN_PAGE_CACHE_SIZE: usize = 10; +/// The minimum page size in bytes. +const MIN_PAGE_SIZE: u32 = 512; + +/// The maximum page size in bytes. +const MAX_PAGE_SIZE: u32 = 65536; + +/// The default page size in bytes. +const DEFAULT_PAGE_SIZE: u16 = 4096; + /// The database header. /// The first 100 bytes of the database file comprise the database file header. /// The database file header is divided into fields as shown by the table below. @@ -81,7 +89,7 @@ pub struct DatabaseHeader { /// The database page size in bytes. Must be a power of two between 512 and 32768 inclusive, /// or the value 1 representing a page size of 65536. - pub page_size: u16, + page_size: u16, /// File format write version. 1 for legacy; 2 for WAL. write_version: u8, @@ -172,7 +180,7 @@ pub struct WalHeader { /// WAL format version. Currently 3007000 pub file_format: u32, - /// Database page size in bytes. Power of two between 512 and 32768 inclusive + /// Database page size in bytes. Power of two between 512 and 65536 inclusive pub page_size: u32, /// Checkpoint sequence number. Increases with each checkpoint @@ -247,6 +255,28 @@ impl Default for DatabaseHeader { } } +impl DatabaseHeader { + pub fn update_page_size(&mut self, size: u32) { + if !(MIN_PAGE_SIZE..=MAX_PAGE_SIZE).contains(&size) || (size & (size - 1) != 0) { + return; + } + + self.page_size = if size == MAX_PAGE_SIZE { + 1u16 + } else { + size as u16 + }; + } + + pub fn get_page_size(&self) -> u32 { + if self.page_size == 1 { + MAX_PAGE_SIZE + } else { + self.page_size as u32 + } + } +} + pub fn begin_read_database_header( db_file: Arc, ) -> Result>> { diff --git a/core/storage/wal.rs b/core/storage/wal.rs index e332d3108..fd41af51b 100644 --- a/core/storage/wal.rs +++ b/core/storage/wal.rs @@ -246,7 +246,7 @@ pub struct WalFile { sync_state: RefCell, syncing: Rc>, - page_size: usize, + page_size: u32, shared: Arc>, ongoing_checkpoint: OngoingCheckpoint, @@ -688,7 +688,7 @@ impl Wal for WalFile { impl WalFile { pub fn new( io: Arc, - page_size: usize, + page_size: u32, shared: Arc>, buffer_pool: Rc, ) -> Self { @@ -728,7 +728,7 @@ impl WalFile { fn frame_offset(&self, frame_id: u64) -> usize { assert!(frame_id > 0, "Frame ID must be 1-based"); let page_size = self.page_size; - let page_offset = (frame_id - 1) * (page_size + WAL_FRAME_HEADER_SIZE) as u64; + let page_offset = (frame_id - 1) * (page_size + WAL_FRAME_HEADER_SIZE as u32) as u64; let offset = WAL_HEADER_SIZE as u64 + page_offset; offset as usize } @@ -743,7 +743,7 @@ impl WalFileShared { pub fn open_shared( io: &Arc, path: &str, - page_size: u16, + page_size: u32, ) -> Result>> { let file = io.open_file(path, crate::io::OpenFlags::Create, false)?; let header = if file.size()? > 0 { @@ -764,7 +764,7 @@ impl WalFileShared { let mut wal_header = WalHeader { magic, file_format: 3007000, - page_size: page_size as u32, + page_size, checkpoint_seq: 0, // TODO implement sequence number salt_1: io.generate_random_number() as u32, salt_2: io.generate_random_number() as u32, diff --git a/core/translate/pragma.rs b/core/translate/pragma.rs index a3662c9c2..950803c7c 100644 --- a/core/translate/pragma.rs +++ b/core/translate/pragma.rs @@ -261,7 +261,7 @@ fn query_pragma( program.emit_result_row(register, 1); } PragmaName::PageSize => { - program.emit_int(database_header.lock().page_size.into(), register); + program.emit_int(database_header.lock().get_page_size().into(), register); program.emit_result_row(register, 1); } } diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index f76bc5add..210e775cd 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -4522,7 +4522,7 @@ pub fn op_open_ephemeral( let db_file = Arc::new(FileMemoryStorage::new(file)); let db_header = Pager::begin_open(db_file.clone())?; - let buffer_pool = Rc::new(BufferPool::new(db_header.lock().page_size as usize)); + let buffer_pool = Rc::new(BufferPool::new(db_header.lock().get_page_size() as usize)); let page_cache = Arc::new(RwLock::new(DumbLruPageCache::new(10))); let pager = Rc::new(Pager::finish_open( From d1a50f8a694f627949919ce5b3abe76a26aa2c63 Mon Sep 17 00:00:00 2001 From: meteorgan Date: Sat, 26 Apr 2025 21:44:31 +0800 Subject: [PATCH 399/425] skip unneccessary conversion --- core/storage/btree.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index d47de3bd0..fde75c771 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -5545,7 +5545,7 @@ mod tests { let db_file = Arc::new(DatabaseFile::new(io_file)); let buffer_pool = Rc::new(BufferPool::new(page_size as usize)); - let wal_shared = WalFileShared::open_shared(&io, "test.wal", page_size as u32).unwrap(); + let wal_shared = WalFileShared::open_shared(&io, "test.wal", page_size).unwrap(); let wal_file = WalFile::new(io.clone(), page_size, wal_shared, buffer_pool.clone()); let wal = Rc::new(RefCell::new(wal_file)); From 3e70cc3b68479be0034ce1051c31c13c8abefc80 Mon Sep 17 00:00:00 2001 From: "Levy A." Date: Mon, 28 Apr 2025 11:33:46 +0300 Subject: [PATCH 400/425] fix: old name --- fuzz/fuzz_targets/cast_real.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fuzz/fuzz_targets/cast_real.rs b/fuzz/fuzz_targets/cast_real.rs index 65f550ec8..4ef4ab2ba 100644 --- a/fuzz/fuzz_targets/cast_real.rs +++ b/fuzz/fuzz_targets/cast_real.rs @@ -1,5 +1,6 @@ #![no_main] use libfuzzer_sys::{fuzz_target, Corpus}; +use limbo_core::numeric::StrToF64; use std::error::Error; fn do_fuzz(text: String) -> Result> { @@ -10,8 +11,11 @@ fn do_fuzz(text: String) -> Result> { })? }; - let actual = limbo_core::numeric::atof(&text) - .map(|(non_nan, _)| f64::from(non_nan)) + let actual = limbo_core::numeric::str_to_f64(&text) + .map(|v| { + let (StrToF64::Fractional(non_nan) | StrToF64::Decimal(non_nan)) = v; + f64::from(non_nan) + }) .unwrap_or(0.0); assert_eq!(expected, actual); From a30241ca9150cf37e64937cc789c78355aebeee5 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Mon, 28 Apr 2025 12:45:23 +0300 Subject: [PATCH 401/425] Add state machine for op_idx_delete + DeleteState simplification DeleteState had a bit too many unnecessary states so I removed them. Usually we care about having a different state when I/O is triggered requiring a state to be stored for later. Furthermore, there was a bug with op_idx_delete where if balance is triggered, op_idx_delete wouldn't be re-entrant. So a state machine was added to prevent that from happening. --- core/storage/btree.rs | 94 ++++++++++++------------------------------- core/types.rs | 20 +++++++++ core/vdbe/execute.rs | 73 +++++++++++++++++++++++---------- core/vdbe/mod.rs | 4 +- 4 files changed, 100 insertions(+), 91 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 36b39eb15..185887afc 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -165,21 +165,13 @@ enum DeleteState { cell_idx: usize, original_child_pointer: Option, }, - DropCell { - cell_idx: usize, - }, CheckNeedsBalancing, - StartBalancing { - target_key: DeleteSavepoint, - }, WaitForBalancingToComplete { target_key: DeleteSavepoint, }, SeekAfterBalancing { target_key: DeleteSavepoint, }, - StackRetreat, - Finish, } #[derive(Clone)] @@ -3530,15 +3522,12 @@ impl BTreeCursor { /// 1. Start -> check if the rowid to be delete is present in the page or not. If not we early return /// 2. LoadPage -> load the page. /// 3. FindCell -> find the cell to be deleted in the page. - /// 4. ClearOverflowPages -> clear overflow pages associated with the cell. here if the cell is a leaf page go to DropCell state - /// or else go to InteriorNodeReplacement + /// 4. ClearOverflowPages -> Clear the overflow pages if there are any before dropping the cell, then if we are in a leaf page we just drop the cell in place. + /// if we are in interior page, we need to rotate keys in order to replace current cell (InteriorNodeReplacement). /// 5. InteriorNodeReplacement -> we copy the left subtree leaf node into the deleted interior node's place. - /// 6. DropCell -> only for leaf nodes. drop the cell. - /// 7. CheckNeedsBalancing -> check if balancing is needed. If yes, move to StartBalancing else move to StackRetreat - /// 8. WaitForBalancingToComplete -> perform balancing - /// 9. SeekAfterBalancing -> adjust the cursor to a node that is closer to the deleted value. go to Finish - /// 10. StackRetreat -> perform stack retreat for cursor positioning. only when balancing is not needed. go to Finish - /// 11. Finish -> Delete operation is done. Return CursorResult(Ok()) + /// 6. WaitForBalancingToComplete -> perform balancing + /// 7. SeekAfterBalancing -> adjust the cursor to a node that is closer to the deleted value. go to Finish + /// 8. Finish -> Delete operation is done. Return CursorResult(Ok()) pub fn delete(&mut self) -> Result> { assert!(self.mv_cursor.is_none()); @@ -3554,10 +3543,13 @@ impl BTreeCursor { let delete_info = self.state.delete_info().expect("cannot get delete info"); delete_info.state.clone() }; + tracing::debug!("delete state: {:?}", delete_state); match delete_state { DeleteState::Start => { let page = self.stack.top(); + page.set_dirty(); + self.pager.add_dirty(page.get().id); if matches!( page.get_contents().page_type(), PageType::TableLeaf | PageType::TableInterior @@ -3646,7 +3638,11 @@ impl BTreeCursor { original_child_pointer, }; } else { - delete_info.state = DeleteState::DropCell { cell_idx }; + let contents = page.get().contents.as_mut().unwrap(); + drop_cell(contents, cell_idx, self.usable_space() as u16)?; + + let delete_info = self.state.mut_delete_info().unwrap(); + delete_info.state = DeleteState::CheckNeedsBalancing; } } @@ -3724,33 +3720,9 @@ impl BTreeCursor { delete_info.state = DeleteState::CheckNeedsBalancing; } - DeleteState::DropCell { cell_idx } => { - let page = self.stack.top(); - return_if_locked!(page); - - if !page.is_loaded() { - self.pager.load_page(page.clone())?; - return Ok(CursorResult::IO); - } - - page.set_dirty(); - self.pager.add_dirty(page.get().id); - - let contents = page.get().contents.as_mut().unwrap(); - drop_cell(contents, cell_idx, self.usable_space() as u16)?; - - let delete_info = self.state.mut_delete_info().unwrap(); - delete_info.state = DeleteState::CheckNeedsBalancing; - } - DeleteState::CheckNeedsBalancing => { let page = self.stack.top(); - return_if_locked!(page); - - if !page.is_loaded() { - self.pager.load_page(page.clone())?; - return Ok(CursorResult::IO); - } + return_if_locked_maybe_load!(self.pager, page); let contents = page.get().contents.as_ref().unwrap(); let free_space = compute_free_space(contents, self.usable_space() as u16); @@ -3764,24 +3736,20 @@ impl BTreeCursor { let delete_info = self.state.mut_delete_info().unwrap(); if needs_balancing { - delete_info.state = DeleteState::StartBalancing { target_key }; + if delete_info.balance_write_info.is_none() { + let mut write_info = WriteInfo::new(); + write_info.state = WriteState::BalanceStart; + delete_info.balance_write_info = Some(write_info); + } + + delete_info.state = DeleteState::WaitForBalancingToComplete { target_key } } else { - delete_info.state = DeleteState::StackRetreat; + self.stack.retreat(); + self.state = CursorState::None; + return Ok(CursorResult::Ok(())); } } - DeleteState::StartBalancing { target_key } => { - let delete_info = self.state.mut_delete_info().unwrap(); - - if delete_info.balance_write_info.is_none() { - let mut write_info = WriteInfo::new(); - write_info.state = WriteState::BalanceStart; - delete_info.balance_write_info = Some(write_info); - } - - delete_info.state = DeleteState::WaitForBalancingToComplete { target_key } - } - DeleteState::WaitForBalancingToComplete { target_key } => { let delete_info = self.state.mut_delete_info().unwrap(); @@ -3806,6 +3774,7 @@ impl BTreeCursor { } CursorResult::IO => { + // Move to seek state // Save balance progress and return IO let write_info = match &self.state { CursorState::Write(wi) => wi.clone(), @@ -3830,19 +3799,6 @@ impl BTreeCursor { }; return_if_io!(self.seek(key, SeekOp::EQ)); - let delete_info = self.state.mut_delete_info().unwrap(); - delete_info.state = DeleteState::Finish; - delete_info.balance_write_info = None; - } - - DeleteState::StackRetreat => { - self.stack.retreat(); - let delete_info = self.state.mut_delete_info().unwrap(); - delete_info.state = DeleteState::Finish; - delete_info.balance_write_info = None; - } - - DeleteState::Finish => { self.state = CursorState::None; return Ok(CursorResult::Ok(())); } diff --git a/core/types.rs b/core/types.rs index 5d86f97f3..6ed66cc0c 100644 --- a/core/types.rs +++ b/core/types.rs @@ -903,6 +903,26 @@ impl ImmutableRecord { } } +impl Display for ImmutableRecord { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for value in &self.values { + match value { + RefValue::Null => write!(f, "NULL")?, + RefValue::Integer(i) => write!(f, "Integer({})", *i)?, + RefValue::Float(flo) => write!(f, "Float({})", *flo)?, + RefValue::Text(text_ref) => write!(f, "Text({})", text_ref.as_str())?, + RefValue::Blob(raw_slice) => { + write!(f, "Blob({})", String::from_utf8_lossy(raw_slice.to_slice()))? + } + } + if value != self.values.last().unwrap() { + write!(f, ", ")?; + } + } + Ok(()) + } +} + impl Clone for ImmutableRecord { fn clone(&self) -> Self { let mut new_values = Vec::new(); diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index f76bc5add..1a1af59d2 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -3,6 +3,7 @@ use crate::numeric::{NullableInteger, Numeric}; use crate::storage::database::FileMemoryStorage; use crate::storage::page_cache::DumbLruPageCache; use crate::storage::pager::CreateBTreeFlags; +use crate::types::ImmutableRecord; use crate::{ error::{LimboError, SQLITE_CONSTRAINT, SQLITE_CONSTRAINT_PRIMARYKEY}, ext::ExtValue, @@ -3756,6 +3757,11 @@ pub fn op_delete( { let mut cursor = state.get_cursor(*cursor_id); let cursor = cursor.as_btree_mut(); + tracing::debug!( + "op_delete(record={:?}, rowid={:?})", + cursor.record(), + cursor.rowid()? + ); return_if_io!(cursor.delete()); } let prev_changes = program.n_change.get(); @@ -3764,6 +3770,10 @@ pub fn op_delete( Ok(InsnFunctionStepResult::Step) } +pub enum OpIdxDeleteState { + Seeking(ImmutableRecord), // First seek row to delete + Deleting, +} pub fn op_idx_delete( program: &Program, state: &mut ProgramState, @@ -3779,29 +3789,50 @@ pub fn op_idx_delete( else { unreachable!("unexpected Insn {:?}", insn) }; - let record = make_record(&state.registers, start_reg, num_regs); - { - let mut cursor = state.get_cursor(*cursor_id); - let cursor = cursor.as_btree_mut(); - return_if_io!(cursor.seek(SeekKey::IndexKey(&record), SeekOp::EQ)); - - if cursor.rowid()?.is_none() { - // If P5 is not zero, then raise an SQLITE_CORRUPT_INDEX error if no matching - // index entry is found. This happens when running an UPDATE or DELETE statement and the - // index entry to be updated or deleted is not found. For some uses of IdxDelete - // (example: the EXCEPT operator) it does not matter that no matching entry is found. - // For those cases, P5 is zero. Also, do not raise this (self-correcting and non-critical) error if in writable_schema mode. - return Err(LimboError::Corrupt(format!( - "IdxDelete: no matching index entry found for record {:?}", - record - ))); + loop { + match &state.op_idx_delete_state { + Some(OpIdxDeleteState::Seeking(record)) => { + { + let mut cursor = state.get_cursor(*cursor_id); + let cursor = cursor.as_btree_mut(); + return_if_io!(cursor.seek(SeekKey::IndexKey(&record), SeekOp::EQ)); + tracing::debug!( + "op_idx_delete(seek={}, record={} rowid={:?})", + &record, + cursor.record().as_ref().unwrap(), + cursor.rowid() + ); + if cursor.rowid()?.is_none() { + // If P5 is not zero, then raise an SQLITE_CORRUPT_INDEX error if no matching + // index entry is found. This happens when running an UPDATE or DELETE statement and the + // index entry to be updated or deleted is not found. For some uses of IdxDelete + // (example: the EXCEPT operator) it does not matter that no matching entry is found. + // For those cases, P5 is zero. Also, do not raise this (self-correcting and non-critical) error if in writable_schema mode. + return Err(LimboError::Corrupt(format!( + "IdxDelete: no matching index entry found for record {:?}", + record + ))); + } + } + state.op_idx_delete_state = Some(OpIdxDeleteState::Deleting); + } + Some(OpIdxDeleteState::Deleting) => { + { + let mut cursor = state.get_cursor(*cursor_id); + let cursor = cursor.as_btree_mut(); + return_if_io!(cursor.delete()); + } + let n_change = program.n_change.get(); + program.n_change.set(n_change + 1); + state.pc += 1; + return Ok(InsnFunctionStepResult::Step); + } + None => { + let record = make_record(&state.registers, start_reg, num_regs); + state.op_idx_delete_state = Some(OpIdxDeleteState::Seeking(record)); + } } - return_if_io!(cursor.delete()); } - let n_change = program.n_change.get(); - program.n_change.set(n_change + 1); - state.pc += 1; - Ok(InsnFunctionStepResult::Step) } pub fn op_idx_insert( diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 45b23c538..9652dafef 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -43,7 +43,7 @@ use crate::CheckpointStatus; #[cfg(feature = "json")] use crate::json::JsonCacheCell; use crate::{Connection, MvStore, Result, TransactionState}; -use execute::{InsnFunction, InsnFunctionStepResult}; +use execute::{InsnFunction, InsnFunctionStepResult, OpIdxDeleteState}; use rand::{ distributions::{Distribution, Uniform}, @@ -257,6 +257,7 @@ pub struct ProgramState { halt_state: Option, #[cfg(feature = "json")] json_cache: JsonCacheCell, + op_idx_delete_state: Option, } impl ProgramState { @@ -280,6 +281,7 @@ impl ProgramState { halt_state: None, #[cfg(feature = "json")] json_cache: JsonCacheCell::new(), + op_idx_delete_state: None, } } From 51d43074f3fa474fcfb6b0d91965fe398b5d7c30 Mon Sep 17 00:00:00 2001 From: meteorgan Date: Tue, 29 Apr 2025 22:34:20 +0800 Subject: [PATCH 402/425] Support literal-value current_time, current_date and current_timestamp --- core/translate/expr.rs | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 79ccb1fe9..dba075ac2 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1,8 +1,12 @@ use limbo_sqlite3_parser::ast::{self, UnaryOperator}; +use super::emitter::Resolver; +use super::optimizer::Optimizable; +use super::plan::{Operation, TableReference}; #[cfg(feature = "json")] use crate::function::JsonFunc; use crate::function::{Func, FuncCtx, MathFuncArity, ScalarFunc, VectorFunc}; +use crate::functions::datetime; use crate::schema::{Table, Type}; use crate::util::{exprs_are_equivalent, normalize_ident}; use crate::vdbe::{ @@ -12,10 +16,6 @@ use crate::vdbe::{ }; use crate::Result; -use super::emitter::Resolver; -use super::optimizer::Optimizable; -use super::plan::{Operation, TableReference}; - #[derive(Debug, Clone, Copy)] pub struct ConditionMetadata { pub jump_if_condition_is_true: bool, @@ -2020,9 +2020,27 @@ pub fn translate_expr( }); Ok(target_register) } - ast::Literal::CurrentDate => todo!(), - ast::Literal::CurrentTime => todo!(), - ast::Literal::CurrentTimestamp => todo!(), + ast::Literal::CurrentDate => { + program.emit_insn(Insn::String8 { + value: datetime::exec_date(&[]).to_string(), + dest: target_register, + }); + Ok(target_register) + } + ast::Literal::CurrentTime => { + program.emit_insn(Insn::String8 { + value: datetime::exec_time(&[]).to_string(), + dest: target_register, + }); + Ok(target_register) + } + ast::Literal::CurrentTimestamp => { + program.emit_insn(Insn::String8 { + value: datetime::exec_datetime_full(&[]).to_string(), + dest: target_register, + }); + Ok(target_register) + } }, ast::Expr::Name(_) => todo!(), ast::Expr::NotNull(_) => todo!(), From be5ae7d0e3d78d8da8d920d990c1c4d733a63e45 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Tue, 29 Apr 2025 10:38:01 -0400 Subject: [PATCH 403/425] Bump io_uring dependency to 0.7.5 --- core/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/Cargo.toml b/core/Cargo.toml index f23aeeeb0..8633cc3b0 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -30,7 +30,7 @@ testvfs = ["limbo_ext_tests/static"] fuzz = [] [target.'cfg(target_os = "linux")'.dependencies] -io-uring = { version = "0.6.1", optional = true } +io-uring = { version = "0.7.5", optional = true } [target.'cfg(target_family = "unix")'.dependencies] polling = "3.7.2" From 2785fd5d4af05574ee6fe354397b4d5d176f4180 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Tue, 29 Apr 2025 10:38:46 -0400 Subject: [PATCH 404/425] Bump polling crate dependency to 3.7.4 --- core/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/Cargo.toml b/core/Cargo.toml index 8633cc3b0..507c6d99f 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -33,7 +33,7 @@ fuzz = [] io-uring = { version = "0.7.5", optional = true } [target.'cfg(target_family = "unix")'.dependencies] -polling = "3.7.2" +polling = "3.7.4" rustix = "0.38.34" [target.'cfg(not(target_family = "wasm"))'.dependencies] From 582ca686403409d396a079025fe47796c977bc81 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Tue, 29 Apr 2025 10:39:26 -0400 Subject: [PATCH 405/425] Bump rustix dependency to v1.0.5 --- core/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/Cargo.toml b/core/Cargo.toml index 507c6d99f..fbb7a4ad0 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -34,7 +34,7 @@ io-uring = { version = "0.7.5", optional = true } [target.'cfg(target_family = "unix")'.dependencies] polling = "3.7.4" -rustix = "0.38.34" +rustix = "1.0.5" [target.'cfg(not(target_family = "wasm"))'.dependencies] mimalloc = { version = "0.1", default-features = false } From ba225ade0d9ca858a85d33d65f66fdd747e19977 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Tue, 29 Apr 2025 10:42:10 -0400 Subject: [PATCH 406/425] Bump libc dependency to 0.2.172 --- core/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/Cargo.toml b/core/Cargo.toml index fbb7a4ad0..df8a0b97f 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -45,7 +45,7 @@ limbo_ext = { workspace = true, features = ["core_only"] } cfg_block = "0.1.1" fallible-iterator = "0.3.0" hex = "0.4.3" -libc = { version = "0.2.155", optional = true } +libc = { version = "0.2.172", optional = true } limbo_sqlite3_parser = { workspace = true } thiserror = "1.0.61" getrandom = { version = "0.2.15" } From f581d1de3a5dc0408b8555307be1f96a40984b55 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Tue, 29 Apr 2025 10:43:07 -0400 Subject: [PATCH 407/425] Bump miette dependency to 7.6.0 --- core/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/Cargo.toml b/core/Cargo.toml index df8a0b97f..5ee0b4e52 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -67,7 +67,7 @@ limbo_series = { workspace = true, optional = true, features = ["static"] } limbo_ipaddr = { workspace = true, optional = true, features = ["static"] } limbo_completion = { workspace = true, optional = true, features = ["static"] } limbo_ext_tests = { workspace = true, optional = true, features = ["static"] } -miette = "7.4.0" +miette = "7.6.0" strum = "0.26" parking_lot = "0.12.3" crossbeam-skiplist = "0.1.3" From 7a3d949bd19ad66e6657bdfad1844a0677ad1db0 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Tue, 29 Apr 2025 10:43:46 -0400 Subject: [PATCH 408/425] Bump mimalloc dependency to 0.1.46 --- core/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/Cargo.toml b/core/Cargo.toml index 5ee0b4e52..b71e72489 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -37,7 +37,7 @@ polling = "3.7.4" rustix = "1.0.5" [target.'cfg(not(target_family = "wasm"))'.dependencies] -mimalloc = { version = "0.1", default-features = false } +mimalloc = { version = "0.1.46", default-features = false } libloading = "0.8.6" [dependencies] From 7b6452034be91dc05cd4117df112fc95ab4a9afd Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Tue, 29 Apr 2025 10:44:26 -0400 Subject: [PATCH 409/425] Bump lru dependency to 0.14.0 --- core/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/Cargo.toml b/core/Cargo.toml index b71e72489..de2ae29cc 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -96,7 +96,7 @@ rand = "0.8.5" # Required for quickcheck rand_chacha = "0.9.0" env_logger = "0.11.6" test-log = { version = "0.2.17", features = ["trace"] } -lru = "0.13.0" +lru = "0.14.0" [[bench]] name = "benchmark" From 1e2be35e3b878d7c5010c3a25a70d351dec70811 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Tue, 29 Apr 2025 23:07:28 -0400 Subject: [PATCH 410/425] Add fs feature to rustix dependency --- core/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/Cargo.toml b/core/Cargo.toml index de2ae29cc..61c0249c8 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -34,7 +34,7 @@ io-uring = { version = "0.7.5", optional = true } [target.'cfg(target_family = "unix")'.dependencies] polling = "3.7.4" -rustix = "1.0.5" +rustix = { version = "1.0.5", features = ["fs"]} [target.'cfg(not(target_family = "wasm"))'.dependencies] mimalloc = { version = "0.1.46", default-features = false } From 525b7fdbaab388dae9d61cf6193cc18da6489949 Mon Sep 17 00:00:00 2001 From: Anton Harniakou Date: Wed, 30 Apr 2025 09:00:35 +0300 Subject: [PATCH 411/425] Add PRAGMA schema_version --- core/storage/sqlite3_ondisk.rs | 2 +- core/translate/pragma.rs | 12 ++++++++++++ core/vdbe/execute.rs | 1 + vendored/sqlite3-parser/src/parser/ast/mod.rs | 2 ++ 4 files changed, 16 insertions(+), 1 deletion(-) diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index 200dd5490..ee6b3fb34 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -117,7 +117,7 @@ pub struct DatabaseHeader { pub freelist_pages: u32, /// The schema cookie. Incremented when the database schema changes. - schema_cookie: u32, + pub schema_cookie: u32, /// The schema format number. Supported formats are 1, 2, 3, and 4. schema_format: u32, diff --git a/core/translate/pragma.rs b/core/translate/pragma.rs index a3662c9c2..46eba30c6 100644 --- a/core/translate/pragma.rs +++ b/core/translate/pragma.rs @@ -154,6 +154,10 @@ fn update_pragma( // TODO: Implement updating user_version todo!("updating user_version not yet implemented") } + PragmaName::SchemaVersion => { + // TODO: Implement updating schema_version + todo!("updating schema_version not yet implemented") + } PragmaName::TableInfo => { // because we need control over the write parameter for the transaction, // this should be unreachable. We have to force-call query_pragma before @@ -260,6 +264,14 @@ fn query_pragma( }); program.emit_result_row(register, 1); } + PragmaName::SchemaVersion => { + program.emit_insn(Insn::ReadCookie { + db: 0, + dest: register, + cookie: Cookie::SchemaVersion, + }); + program.emit_result_row(register, 1); + } PragmaName::PageSize => { program.emit_int(database_header.lock().page_size.into(), register); program.emit_result_row(register, 1); diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index f76bc5add..a3571a749 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -4336,6 +4336,7 @@ pub fn op_read_cookie( } let cookie_value = match cookie { Cookie::UserVersion => pager.db_header.lock().user_version.into(), + Cookie::SchemaVersion => pager.db_header.lock().schema_cookie.into(), cookie => todo!("{cookie:?} is not yet implement for ReadCookie"), }; state.registers[*dest] = Register::OwnedValue(OwnedValue::Integer(cookie_value)); diff --git a/vendored/sqlite3-parser/src/parser/ast/mod.rs b/vendored/sqlite3-parser/src/parser/ast/mod.rs index 74da5b647..f2275e952 100644 --- a/vendored/sqlite3-parser/src/parser/ast/mod.rs +++ b/vendored/sqlite3-parser/src/parser/ast/mod.rs @@ -1624,6 +1624,8 @@ pub enum PragmaName { PageCount, /// Return the page size of the database in bytes. PageSize, + /// Returns schema version of the database file. + SchemaVersion, /// returns information about the columns of a table TableInfo, /// Returns the user version of the database file. From 8f366e98d59bcdbeb542769ed2164cf58665868b Mon Sep 17 00:00:00 2001 From: Peter Hayman Date: Thu, 1 May 2025 15:31:38 +1000 Subject: [PATCH 412/425] add Row::column_count --- bindings/rust/src/lib.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bindings/rust/src/lib.rs b/bindings/rust/src/lib.rs index 783012c5d..6c04c231a 100644 --- a/bindings/rust/src/lib.rs +++ b/bindings/rust/src/lib.rs @@ -297,4 +297,8 @@ impl Row { limbo_core::OwnedValue::Blob(items) => Ok(Value::Blob(items.to_vec())), } } + + pub fn column_count(&self) -> usize { + self.values.len() + } } From 1a2a383635ab9e3302594e59235d87c3c6eb7d96 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Tue, 29 Apr 2025 15:22:40 +0300 Subject: [PATCH 413/425] fix setting default value for primary key on UPDATE I noticed when updating a table with a primary key, it would sometimes set primary key column to null. A primary key can be nullified if it isn't a rowid alias, meaning it isn't a INTEGER PRIMAR KEY. --- core/translate/emitter.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 77a3efdce..5ea2f9cfa 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -686,7 +686,7 @@ fn emit_update_insns( // don't emit null for pkey of virtual tables. they require first two args // before the 'record' to be explicitly non-null - if table_column.primary_key && !is_virtual { + if table_column.is_rowid_alias && !is_virtual { program.emit_null(target_reg, None); } else if is_virtual { program.emit_insn(Insn::VColumn { From 64a12ed88788c1f524f1e16bc94ad82387ef1de8 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Thu, 1 May 2025 10:54:11 +0300 Subject: [PATCH 414/425] update index on indexed columns Previously columns that were indexed were updated only in the BtreeTable, but not on Index table. This commit basically enables updates on indexes too if they are needed. --- core/translate/emitter.rs | 62 +++++++++++++++++++++++++++++++++++++-- core/translate/plan.rs | 1 + core/translate/update.rs | 15 ++++++++++ 3 files changed, 75 insertions(+), 3 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 5ea2f9cfa..09a9bbb9f 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -10,8 +10,8 @@ use crate::function::Func; use crate::schema::Index; use crate::translate::plan::{DeletePlan, Plan, Search}; use crate::util::exprs_are_equivalent; -use crate::vdbe::builder::ProgramBuilder; -use crate::vdbe::insn::RegisterOrLiteral; +use crate::vdbe::builder::{CursorType, ProgramBuilder}; +use crate::vdbe::insn::{IdxInsertFlags, RegisterOrLiteral}; use crate::vdbe::{insn::Insn, BranchOffset}; use crate::{Result, SymbolTable}; @@ -546,19 +546,34 @@ fn emit_program_for_update( target_pc: after_main_loop_label, }); } + init_loop( program, &mut t_ctx, &plan.table_references, OperationMode::UPDATE, )?; + // Open indexes for update. + let mut index_cursors = vec![]; + // TODO: do not reopen if there is table reference using it. + for index in &plan.indexes_to_update { + let index_cursor = program.alloc_cursor_id( + Some(index.table_name.clone()), + CursorType::BTreeIndex(index.clone()), + ); + program.emit_insn(Insn::OpenWrite { + cursor_id: index_cursor, + root_page: RegisterOrLiteral::Literal(index.root_page), + }); + index_cursors.push(index_cursor); + } open_loop( program, &mut t_ctx, &plan.table_references, &plan.where_clause, )?; - emit_update_insns(&plan, &t_ctx, program)?; + emit_update_insns(&plan, &t_ctx, program, index_cursors)?; close_loop(program, &mut t_ctx, &plan.table_references)?; program.preassign_label_to_next_insn(after_main_loop_label); @@ -573,6 +588,7 @@ fn emit_update_insns( plan: &UpdatePlan, t_ctx: &TranslateCtx, program: &mut ProgramBuilder, + index_cursors: Vec, ) -> crate::Result<()> { let table_ref = &plan.table_references.first().unwrap(); let loop_labels = t_ctx.labels_main_loop.first().unwrap(); @@ -663,6 +679,46 @@ fn emit_update_insns( )?; } + // Update indexes first. Columns that are updated will be translated from an expression and those who aren't modified will be + // read from table. Mutiple value index key could be updated partially. + for (index, index_cursor) in plan.indexes_to_update.iter().zip(index_cursors) { + let index_record_reg_count = index.columns.len() + 1; + let index_record_reg_start = program.alloc_registers(index_record_reg_count); + for (idx, column) in index.columns.iter().enumerate() { + if let Some((_, expr)) = plan.set_clauses.iter().find(|(i, _)| *i == idx) { + translate_expr( + program, + Some(&plan.table_references), + expr, + index_record_reg_start + idx, + &t_ctx.resolver, + )?; + } else { + program.emit_insn(Insn::Column { + cursor_id: cursor_id, + column: column.pos_in_table, + dest: index_record_reg_start + idx, + }); + } + } + program.emit_insn(Insn::RowId { + cursor_id: cursor_id, + dest: index_record_reg_start + index.columns.len(), + }); + let index_record_reg = program.alloc_register(); + program.emit_insn(Insn::MakeRecord { + start_reg: index_record_reg_start, + count: index_record_reg_count, + dest_reg: index_record_reg, + }); + program.emit_insn(Insn::IdxInsert { + cursor_id: index_cursor, + record_reg: index_record_reg, + unpacked_start: Some(index_record_reg_start), + unpacked_count: Some(index_record_reg_count as u16), + flags: IdxInsertFlags::new(), + }); + } // we scan a column at a time, loading either the column's values, or the new value // from the Set expression, into registers so we can emit a MakeRecord and update the row. let start = if is_virtual { beg + 2 } else { beg + 1 }; diff --git a/core/translate/plan.rs b/core/translate/plan.rs index ad6514247..2baf59d32 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -315,6 +315,7 @@ pub struct UpdatePlan { pub returning: Option>, // whether the WHERE clause is always false pub contains_constant_false_condition: bool, + pub indexes_to_update: Vec>, } #[derive(Clone, Copy, Debug, PartialEq, Eq)] diff --git a/core/translate/update.rs b/core/translate/update.rs index a0e32e640..70b275396 100644 --- a/core/translate/update.rs +++ b/core/translate/update.rs @@ -183,6 +183,20 @@ pub fn prepare_update_plan(schema: &Schema, body: &mut Update) -> crate::Result< .map(|l| parse_limit(l)) .unwrap_or(Ok((None, None)))?; + let mut indexes_to_update = vec![]; + let indexes = schema.get_indices(&table_name.0); + for (set_column_index, _) in &set_clauses { + if let Some(index) = indexes.iter().find(|index| { + index + .columns + .iter() + .find(|column| column.pos_in_table == *set_column_index) + .is_some() + }) { + indexes_to_update.push(index.clone()); + } + } + Ok(Plan::Update(UpdatePlan { table_references, set_clauses, @@ -192,5 +206,6 @@ pub fn prepare_update_plan(schema: &Schema, body: &mut Update) -> crate::Result< limit, offset, contains_constant_false_condition: false, + indexes_to_update, })) } From e503bb46414e968c0db24f55fac0ad80d07cdb4d Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Thu, 1 May 2025 11:16:25 +0300 Subject: [PATCH 415/425] run_query helper for test_write_path --- .../query_processing/test_write_path.rs | 359 +++++------------- 1 file changed, 104 insertions(+), 255 deletions(-) diff --git a/tests/integration/query_processing/test_write_path.rs b/tests/integration/query_processing/test_write_path.rs index 9c6107d58..bab91e3ff 100644 --- a/tests/integration/query_processing/test_write_path.rs +++ b/tests/integration/query_processing/test_write_path.rs @@ -1,6 +1,6 @@ use crate::common::{self, maybe_setup_tracing}; use crate::common::{compare_string, do_flush, TempDatabase}; -use limbo_core::{Connection, OwnedValue, StepResult}; +use limbo_core::{Connection, OwnedValue, Row, StepResult}; use log::debug; use std::rc::Rc; @@ -153,52 +153,19 @@ fn test_sequential_write() -> anyhow::Result<()> { println!("progress {:.1}%", progress); } let insert_query = format!("INSERT INTO test VALUES ({})", i); - match conn.query(insert_query) { - Ok(Some(ref mut rows)) => loop { - match rows.step()? { - StepResult::IO => { - tmp_db.io.run_once()?; - } - StepResult::Done => break, - _ => unreachable!(), - } - }, - Ok(None) => {} - Err(err) => { - eprintln!("{}", err); - } - }; + run_query(&tmp_db, &conn, &insert_query)?; let mut current_read_index = 0; - match conn.query(list_query) { - Ok(Some(ref mut rows)) => loop { - match rows.step()? { - StepResult::Row => { - let row = rows.row().unwrap(); - let first_value = row.get::<&OwnedValue>(0).expect("missing id"); - let id = match first_value { - limbo_core::OwnedValue::Integer(i) => *i as i32, - limbo_core::OwnedValue::Float(f) => *f as i32, - _ => unreachable!(), - }; - assert_eq!(current_read_index, id); - current_read_index += 1; - } - StepResult::IO => { - tmp_db.io.run_once()?; - } - StepResult::Interrupt => break, - StepResult::Done => break, - StepResult::Busy => { - panic!("Database is busy"); - } - } - }, - Ok(None) => {} - Err(err) => { - eprintln!("{}", err); - } - } + run_query_on_row(&tmp_db, &conn, &list_query, |row: &Row| { + let first_value = row.get::<&OwnedValue>(0).expect("missing id"); + let id = match first_value { + limbo_core::OwnedValue::Integer(i) => *i as i32, + limbo_core::OwnedValue::Float(f) => *f as i32, + _ => unreachable!(), + }; + assert_eq!(current_read_index, id); + current_read_index += 1; + })?; common::do_flush(&conn, &tmp_db)?; } Ok(()) @@ -215,55 +182,22 @@ fn test_regression_multi_row_insert() -> anyhow::Result<()> { let insert_query = "INSERT INTO test VALUES (-2), (-3), (-1)"; let list_query = "SELECT * FROM test"; - match conn.query(insert_query) { - Ok(Some(ref mut rows)) => loop { - match rows.step()? { - StepResult::IO => { - tmp_db.io.run_once()?; - } - StepResult::Done => break, - _ => unreachable!(), - } - }, - Ok(None) => {} - Err(err) => { - eprintln!("{}", err); - } - }; + run_query(&tmp_db, &conn, insert_query)?; common::do_flush(&conn, &tmp_db)?; let mut current_read_index = 1; let expected_ids = vec![-3, -2, -1]; let mut actual_ids = Vec::new(); - match conn.query(list_query) { - Ok(Some(ref mut rows)) => loop { - match rows.step()? { - StepResult::Row => { - let row = rows.row().unwrap(); - let first_value = row.get::<&OwnedValue>(0).expect("missing id"); - let id = match first_value { - OwnedValue::Float(f) => *f as i32, - _ => panic!("expected float"), - }; - actual_ids.push(id); - current_read_index += 1; - } - StepResult::IO => { - tmp_db.io.run_once()?; - } - StepResult::Interrupt => break, - StepResult::Done => break, - StepResult::Busy => { - panic!("Database is busy"); - } - } - }, - Ok(None) => {} - Err(err) => { - eprintln!("{}", err); - } - } + run_query_on_row(&tmp_db, &conn, list_query, |row: &Row| { + let first_value = row.get::<&OwnedValue>(0).expect("missing id"); + let id = match first_value { + OwnedValue::Float(f) => *f as i32, + _ => panic!("expected float"), + }; + actual_ids.push(id); + current_read_index += 1; + })?; assert_eq!(current_read_index, 4); // Verify we read all rows // sort ids @@ -331,49 +265,18 @@ fn test_wal_checkpoint() -> anyhow::Result<()> { let insert_query = format!("INSERT INTO test VALUES ({})", i); do_flush(&conn, &tmp_db)?; conn.checkpoint()?; - match conn.query(insert_query) { - Ok(Some(ref mut rows)) => loop { - match rows.step()? { - StepResult::IO => { - tmp_db.io.run_once()?; - } - StepResult::Done => break, - _ => unreachable!(), - } - }, - Ok(None) => {} - Err(err) => { - eprintln!("{}", err); - } - }; + run_query(&tmp_db, &conn, &insert_query)?; } do_flush(&conn, &tmp_db)?; conn.clear_page_cache()?; let list_query = "SELECT * FROM test LIMIT 1"; let mut current_index = 0; - match conn.query(list_query) { - Ok(Some(ref mut rows)) => loop { - match rows.step()? { - StepResult::Row => { - let row = rows.row().unwrap(); - let id = row.get::(0).unwrap(); - assert_eq!(current_index, id as usize); - current_index += 1; - } - StepResult::IO => { - tmp_db.io.run_once()?; - } - StepResult::Interrupt => break, - StepResult::Done => break, - StepResult::Busy => unreachable!(), - } - }, - Ok(None) => {} - Err(err) => { - eprintln!("{}", err); - } - } + run_query_on_row(&tmp_db, &conn, list_query, |row: &Row| { + let id = row.get::(0).unwrap(); + assert_eq!(current_index, id as usize); + current_index += 1; + })?; do_flush(&conn, &tmp_db)?; Ok(()) } @@ -387,21 +290,7 @@ fn test_wal_restart() -> anyhow::Result<()> { fn insert(i: usize, conn: &Rc, tmp_db: &TempDatabase) -> anyhow::Result<()> { debug!("inserting {}", i); let insert_query = format!("INSERT INTO test VALUES ({})", i); - match conn.query(insert_query) { - Ok(Some(ref mut rows)) => loop { - match rows.step()? { - StepResult::IO => { - tmp_db.io.run_once()?; - } - StepResult::Done => break, - _ => unreachable!(), - } - }, - Ok(None) => {} - Err(err) => { - eprintln!("{}", err); - } - }; + run_query(tmp_db, conn, &insert_query)?; debug!("inserted {}", i); tmp_db.io.run_once()?; Ok(()) @@ -410,26 +299,13 @@ fn test_wal_restart() -> anyhow::Result<()> { fn count(conn: &Rc, tmp_db: &TempDatabase) -> anyhow::Result { debug!("counting"); let list_query = "SELECT count(x) FROM test"; - loop { - if let Some(ref mut rows) = conn.query(list_query)? { - loop { - match rows.step()? { - StepResult::Row => { - let row = rows.row().unwrap(); - let count = row.get::(0).unwrap(); - debug!("counted {}", count); - return Ok(count as usize); - } - StepResult::IO => { - tmp_db.io.run_once()?; - } - StepResult::Interrupt => break, - StepResult::Done => break, - StepResult::Busy => panic!("Database is busy"), - } - } - } - } + let mut count = None; + run_query_on_row(tmp_db, conn, list_query, |row: &Row| { + assert!(count.is_none()); + count = Some(row.get::(0).unwrap() as usize); + debug!("counted {:?}", count); + })?; + Ok(count.unwrap()) } { @@ -476,113 +352,86 @@ fn test_write_delete_with_index() -> anyhow::Result<()> { let max_iterations = 1000; for i in 0..max_iterations { println!("inserting {} ", i); - if (i % 100) == 0 { - let progress = (i as f64 / max_iterations as f64) * 100.0; - println!("progress {:.1}%", progress); - } let insert_query = format!("INSERT INTO test VALUES ({})", i); - match conn.query(insert_query) { - Ok(Some(ref mut rows)) => loop { - match rows.step()? { - StepResult::IO => { - tmp_db.io.run_once()?; - } - StepResult::Done => break, - _ => unreachable!(), - } - }, - Ok(None) => {} - Err(err) => { - eprintln!("{}", err); - } - }; + run_query(&tmp_db, &conn, &insert_query)?; } for i in 0..max_iterations { println!("deleting {} ", i); - if (i % 100) == 0 { - let progress = (i as f64 / max_iterations as f64) * 100.0; - println!("progress {:.1}%", progress); - } let delete_query = format!("delete from test where x={}", i); - match conn.query(delete_query) { - Ok(Some(ref mut rows)) => loop { - match rows.step()? { - StepResult::IO => { - tmp_db.io.run_once()?; - } - StepResult::Done => break, - _ => unreachable!(), - } - }, - Ok(None) => {} - Err(err) => { - eprintln!("{}", err); - } - }; + run_query(&tmp_db, &conn, &delete_query)?; println!("listing after deleting {} ", i); let mut current_read_index = i + 1; - match conn.query(list_query) { - Ok(Some(ref mut rows)) => loop { - match rows.step()? { - StepResult::Row => { - let row = rows.row().unwrap(); - let first_value = row.get::<&OwnedValue>(0).expect("missing id"); - let id = match first_value { - limbo_core::OwnedValue::Integer(i) => *i as i32, - limbo_core::OwnedValue::Float(f) => *f as i32, - _ => unreachable!(), - }; - assert_eq!(current_read_index, id); - current_read_index += 1; - } - StepResult::IO => { - tmp_db.io.run_once()?; - } - StepResult::Interrupt => break, - StepResult::Done => break, - StepResult::Busy => { - panic!("Database is busy"); - } - } - }, - Ok(None) => {} - Err(err) => { - eprintln!("{}", err); - } - } + run_query_on_row(&tmp_db, &conn, list_query, |row: &Row| { + let first_value = row.get::<&OwnedValue>(0).expect("missing id"); + let id = match first_value { + limbo_core::OwnedValue::Integer(i) => *i as i32, + limbo_core::OwnedValue::Float(f) => *f as i32, + _ => unreachable!(), + }; + assert_eq!(current_read_index, id); + current_read_index += 1; + })?; for i in i + 1..max_iterations { // now test with seek - match conn.query(format!("select * from test where x = {}", i)) { - Ok(Some(ref mut rows)) => loop { - match rows.step()? { - StepResult::Row => { - let row = rows.row().unwrap(); - let first_value = row.get::<&OwnedValue>(0).expect("missing id"); - let id = match first_value { - limbo_core::OwnedValue::Integer(i) => *i as i32, - limbo_core::OwnedValue::Float(f) => *f as i32, - _ => unreachable!(), - }; - assert_eq!(i, id); - break; - } - StepResult::IO => { - tmp_db.io.run_once()?; - } - StepResult::Interrupt => break, - StepResult::Done => break, - StepResult::Busy => { - panic!("Database is busy"); - } - } + run_query_on_row( + &tmp_db, + &conn, + &format!("select * from test where x = {}", i), + |row| { + let first_value = row.get::<&OwnedValue>(0).expect("missing id"); + let id = match first_value { + limbo_core::OwnedValue::Integer(i) => *i as i32, + limbo_core::OwnedValue::Float(f) => *f as i32, + _ => unreachable!(), + }; + assert_eq!(i, id); }, - Ok(None) => {} - Err(err) => { - eprintln!("{}", err); - } - } + )?; } } Ok(()) } + +fn run_query(tmp_db: &TempDatabase, conn: &Rc, query: &str) -> anyhow::Result<()> { + run_query_core(tmp_db, conn, query, None::) +} + +fn run_query_on_row( + tmp_db: &TempDatabase, + conn: &Rc, + query: &str, + on_row: impl FnMut(&Row), +) -> anyhow::Result<()> { + run_query_core(tmp_db, conn, query, Some(on_row)) +} + +fn run_query_core( + tmp_db: &TempDatabase, + conn: &Rc, + query: &str, + mut on_row: Option, +) -> anyhow::Result<()> { + match conn.query(query) { + Ok(Some(ref mut rows)) => loop { + match rows.step()? { + StepResult::IO => { + tmp_db.io.run_once()?; + } + StepResult::Done => break, + StepResult::Row => { + if let Some(on_row) = on_row.as_mut() { + let row = rows.row().unwrap(); + on_row(row) + } + } + _ => unreachable!(), + } + }, + Ok(None) => {} + Err(err) => { + eprintln!("{}", err); + } + }; + Ok(()) +} From c808863256b1973df354b4bb70f83334b9f11678 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Thu, 1 May 2025 11:43:42 +0300 Subject: [PATCH 416/425] test update with index --- core/vdbe/mod.rs | 9 +++++ .../query_processing/test_write_path.rs | 36 +++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 9652dafef..2adf438b6 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -623,6 +623,15 @@ impl<'a> FromValueRow<'a> for i64 { } } +impl<'a> FromValueRow<'a> for f64 { + fn from_value(value: &'a OwnedValue) -> Result { + match value { + OwnedValue::Float(f) => Ok(*f), + _ => Err(LimboError::ConversionError("Expected integer value".into())), + } + } +} + impl<'a> FromValueRow<'a> for String { fn from_value(value: &'a OwnedValue) -> Result { match value { diff --git a/tests/integration/query_processing/test_write_path.rs b/tests/integration/query_processing/test_write_path.rs index bab91e3ff..407d1e366 100644 --- a/tests/integration/query_processing/test_write_path.rs +++ b/tests/integration/query_processing/test_write_path.rs @@ -393,6 +393,42 @@ fn test_write_delete_with_index() -> anyhow::Result<()> { Ok(()) } +#[test] +fn test_update_with_index() -> anyhow::Result<()> { + let _ = env_logger::try_init(); + + maybe_setup_tracing(); + + let tmp_db = TempDatabase::new_with_rusqlite("CREATE TABLE test (x REAL PRIMARY KEY, y TEXT);"); + let conn = tmp_db.connect_limbo(); + + run_query(&tmp_db, &conn, "INSERT INTO test VALUES (1.0, 'foo')")?; + run_query(&tmp_db, &conn, "INSERT INTO test VALUES (2.0, 'bar')")?; + + run_query_on_row(&tmp_db, &conn, "SELECT * from test WHERE x=10.0", |row| { + assert_eq!(row.get::(0).unwrap(), 1.0); + })?; + run_query(&tmp_db, &conn, "UPDATE test SET x=10.0 WHERE x=1.0")?; + run_query_on_row(&tmp_db, &conn, "SELECT * from test WHERE x=10.0", |row| { + assert_eq!(row.get::(0).unwrap(), 10.0); + })?; + + let mut count_1 = 0; + let mut count_10 = 0; + run_query_on_row(&tmp_db, &conn, "SELECT * from test", |row| { + let v = row.get::(0).unwrap(); + if v == 1.0 { + count_1 += 1; + } else if v == 10.0 { + count_10 += 1; + } + })?; + assert_eq!(count_1, 0, "1.0 shouldn't be inside table"); + assert_eq!(count_10, 1, "10.0 should have existed"); + + Ok(()) +} + fn run_query(tmp_db: &TempDatabase, conn: &Rc, query: &str) -> anyhow::Result<()> { run_query_core(tmp_db, conn, query, None::) } From f15a17699b37471a3a32e1aa1bbd962f9438a517 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Thu, 1 May 2025 12:10:44 +0300 Subject: [PATCH 417/425] check indexes are not added twice in update plan --- core/translate/emitter.rs | 2 +- core/translate/update.rs | 25 +++++++++++++------------ 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 09a9bbb9f..e2d3f78c4 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -554,7 +554,7 @@ fn emit_program_for_update( OperationMode::UPDATE, )?; // Open indexes for update. - let mut index_cursors = vec![]; + let mut index_cursors = Vec::with_capacity(plan.indexes_to_update.len()); // TODO: do not reopen if there is table reference using it. for index in &plan.indexes_to_update { let index_cursor = program.alloc_cursor_id( diff --git a/core/translate/update.rs b/core/translate/update.rs index 70b275396..3e36583dc 100644 --- a/core/translate/update.rs +++ b/core/translate/update.rs @@ -183,19 +183,20 @@ pub fn prepare_update_plan(schema: &Schema, body: &mut Update) -> crate::Result< .map(|l| parse_limit(l)) .unwrap_or(Ok((None, None)))?; - let mut indexes_to_update = vec![]; + // Check what indexes will need to be updated by checking set_clauses and see + // if a column is contained in an index. let indexes = schema.get_indices(&table_name.0); - for (set_column_index, _) in &set_clauses { - if let Some(index) = indexes.iter().find(|index| { - index - .columns - .iter() - .find(|column| column.pos_in_table == *set_column_index) - .is_some() - }) { - indexes_to_update.push(index.clone()); - } - } + let indexes_to_update = indexes + .iter() + .filter(|index| { + index.columns.iter().any(|index_column| { + set_clauses + .iter() + .any(|(set_index_column, _)| index_column.pos_in_table == *set_index_column) + }) + }) + .cloned() + .collect(); Ok(Plan::Update(UpdatePlan { table_references, From f025f7e91e4d2743ff6dd3eda4855271a8b6d8f0 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Mon, 28 Apr 2025 19:49:29 -0400 Subject: [PATCH 418/425] Fix panic on async io due to reading locked page --- core/storage/btree.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 141a2b35d..37b1c935b 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1944,6 +1944,7 @@ impl BTreeCursor { let current_sibling = sibling_pointer; for i in (0..=current_sibling).rev() { let page = self.pager.read_page(pgno as usize)?; + return_if_locked!(page); debug_validate_cells!(&page.get_contents(), self.usable_space() as u16); pages_to_balance[i].replace(page); assert_eq!( From d4cf8367ba67adb489773f8d069cbd9c0089c63c Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Fri, 2 May 2025 10:54:50 -0400 Subject: [PATCH 419/425] Wrap return_if_locked in balance non root in debug assertion cfg --- core/storage/btree.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 37b1c935b..0523edcb8 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1944,8 +1944,11 @@ impl BTreeCursor { let current_sibling = sibling_pointer; for i in (0..=current_sibling).rev() { let page = self.pager.read_page(pgno as usize)?; - return_if_locked!(page); - debug_validate_cells!(&page.get_contents(), self.usable_space() as u16); + #[cfg(debug_assertions)] + { + return_if_locked!(page); + debug_validate_cells!(&page.get_contents(), self.usable_space() as u16); + } pages_to_balance[i].replace(page); assert_eq!( parent_contents.overflow_cells.len(), From 0c22382f3c9f8aecfae2072d200a0021733e4377 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 2 May 2025 16:30:48 -0300 Subject: [PATCH 420/425] shared lock on file and throw ReadOnly error in transaction --- Cargo.lock | 1 + core/Cargo.toml | 1 + core/error.rs | 2 ++ core/io/generic.rs | 14 +++++++++----- core/io/io_uring.rs | 16 ++++++++++------ core/io/mod.rs | 23 +++++++++++++---------- core/io/unix.rs | 21 ++++++++++++--------- core/io/vfs.rs | 2 +- core/io/windows.rs | 14 +++++++++----- core/lib.rs | 27 +++++++++++++++++++++++++-- core/vdbe/execute.rs | 13 +++++-------- 11 files changed, 88 insertions(+), 46 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1a4c6069c..3fb8760ac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1726,6 +1726,7 @@ dependencies = [ name = "limbo_core" version = "0.0.19" dependencies = [ + "bitflags 2.9.0", "built", "cfg_block", "chrono", diff --git a/core/Cargo.toml b/core/Cargo.toml index f23aeeeb0..9fd71b059 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -73,6 +73,7 @@ parking_lot = "0.12.3" crossbeam-skiplist = "0.1.3" tracing = "0.1.41" ryu = "1.0.19" +bitflags = "2.9.0" [build-dependencies] chrono = { version = "0.4.38", default-features = false } diff --git a/core/error.rs b/core/error.rs index e8eb83a5a..1eca50305 100644 --- a/core/error.rs +++ b/core/error.rs @@ -51,6 +51,8 @@ pub enum LimboError { IntegerOverflow, #[error("Schema is locked for write")] SchemaLocked, + #[error("Database Connection is read-only")] + ReadOnly, } #[macro_export] diff --git a/core/io/generic.rs b/core/io/generic.rs index fd59ece88..aab5f2687 100644 --- a/core/io/generic.rs +++ b/core/io/generic.rs @@ -20,11 +20,15 @@ unsafe impl Sync for GenericIO {} impl IO for GenericIO { fn open_file(&self, path: &str, flags: OpenFlags, _direct: bool) -> Result> { trace!("open_file(path = {})", path); - let file = std::fs::OpenOptions::new() - .read(true) - .write(true) - .create(matches!(flags, OpenFlags::Create)) - .open(path)?; + let mut file = std::fs::File::options(); + file.read(true); + + if !flags.contains(OpenFlags::ReadOnly) { + file.write(true); + file.create(flags.contains(OpenFlags::Create)); + } + + let file = file.open(path)?; Ok(Arc::new(GenericFile { file: RefCell::new(file), memory_io: Arc::new(MemoryIO::new()), diff --git a/core/io/io_uring.rs b/core/io/io_uring.rs index b4b21aca8..25d6aa33e 100644 --- a/core/io/io_uring.rs +++ b/core/io/io_uring.rs @@ -139,11 +139,15 @@ impl WrappedIOUring { impl IO for UringIO { fn open_file(&self, path: &str, flags: OpenFlags, direct: bool) -> Result> { trace!("open_file(path = {})", path); - let file = std::fs::File::options() - .read(true) - .write(true) - .create(matches!(flags, OpenFlags::Create)) - .open(path)?; + let mut file = std::fs::File::options(); + file.read(true); + + if !flags.contains(OpenFlags::ReadOnly) { + file.write(true); + file.create(flags.contains(OpenFlags::Create)); + } + + let file = file.open(path)?; // Let's attempt to enable direct I/O. Not all filesystems support it // so ignore any errors. let fd = file.as_fd(); @@ -158,7 +162,7 @@ impl IO for UringIO { file, }); if std::env::var(common::ENV_DISABLE_FILE_LOCK).is_err() { - uring_file.lock_file(true)?; + uring_file.lock_file(!flags.contains(OpenFlags::ReadOnly))?; } Ok(uring_file) } diff --git a/core/io/mod.rs b/core/io/mod.rs index 6f161d114..6f75e9bea 100644 --- a/core/io/mod.rs +++ b/core/io/mod.rs @@ -1,4 +1,5 @@ use crate::Result; +use bitflags::bitflags; use cfg_block::cfg_block; use std::fmt; use std::sync::Arc; @@ -19,18 +20,20 @@ pub trait File: Send + Sync { fn size(&self) -> Result; } -#[derive(Copy, Clone)] -pub enum OpenFlags { - None, - Create, +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct OpenFlags(i32); + +bitflags! { + impl OpenFlags: i32 { + const None = 0b00000000; + const Create = 0b0000001; + const ReadOnly = 0b0000010; + } } -impl OpenFlags { - pub fn to_flags(&self) -> i32 { - match self { - Self::None => 0, - Self::Create => 1, - } +impl Default for OpenFlags { + fn default() -> Self { + Self::Create } } diff --git a/core/io/unix.rs b/core/io/unix.rs index c232ed3ad..721ba20f3 100644 --- a/core/io/unix.rs +++ b/core/io/unix.rs @@ -3,6 +3,7 @@ use crate::io::common; use crate::Result; use super::{Completion, File, MemoryIO, OpenFlags, IO}; +use crate::io::clock::{Clock, Instant}; use polling::{Event, Events, Poller}; use rustix::{ fd::{AsFd, AsRawFd}, @@ -18,7 +19,6 @@ use std::{ sync::Arc, }; use tracing::{debug, trace}; -use crate::io::clock::{Clock, Instant}; struct OwnedCallbacks(UnsafeCell); // We assume we locking on IO level is done by user. @@ -197,12 +197,15 @@ impl Clock for UnixIO { impl IO for UnixIO { fn open_file(&self, path: &str, flags: OpenFlags, _direct: bool) -> Result> { trace!("open_file(path = {})", path); - let file = std::fs::File::options() - .read(true) - .custom_flags(OFlags::NONBLOCK.bits() as i32) - .write(true) - .create(matches!(flags, OpenFlags::Create)) - .open(path)?; + let mut file = std::fs::File::options(); + file.read(true).custom_flags(OFlags::NONBLOCK.bits() as i32); + + if !flags.contains(OpenFlags::ReadOnly) { + file.write(true); + file.create(flags.contains(OpenFlags::Create)); + } + + let file = file.open(path)?; #[allow(clippy::arc_with_non_send_sync)] let unix_file = Arc::new(UnixFile { @@ -211,7 +214,7 @@ impl IO for UnixIO { callbacks: BorrowedCallbacks(self.callbacks.as_mut().into()), }); if std::env::var(common::ENV_DISABLE_FILE_LOCK).is_err() { - unix_file.lock_file(true)?; + unix_file.lock_file(!flags.contains(OpenFlags::ReadOnly))?; } Ok(unix_file) } @@ -258,7 +261,7 @@ impl IO for UnixIO { getrandom::getrandom(&mut buf).unwrap(); i64::from_ne_bytes(buf) } - + fn get_memory_io(&self) -> Arc { Arc::new(MemoryIO::new()) } diff --git a/core/io/vfs.rs b/core/io/vfs.rs index 95b4055d0..d02f7d345 100644 --- a/core/io/vfs.rs +++ b/core/io/vfs.rs @@ -24,7 +24,7 @@ impl IO for VfsMod { })?; let ctx = self.ctx as *mut c_void; let vfs = unsafe { &*self.ctx }; - let file = unsafe { (vfs.open)(ctx, c_path.as_ptr(), flags.to_flags(), direct) }; + let file = unsafe { (vfs.open)(ctx, c_path.as_ptr(), flags.0, direct) }; if file.is_null() { return Err(LimboError::ExtensionError("File not found".to_string())); } diff --git a/core/io/windows.rs b/core/io/windows.rs index 6c46d1973..a329abc14 100644 --- a/core/io/windows.rs +++ b/core/io/windows.rs @@ -19,11 +19,15 @@ unsafe impl Sync for WindowsIO {} impl IO for WindowsIO { fn open_file(&self, path: &str, flags: OpenFlags, direct: bool) -> Result> { trace!("open_file(path = {})", path); - let file = std::fs::File::options() - .read(true) - .write(true) - .create(matches!(flags, OpenFlags::Create)) - .open(path)?; + let mut file = std::fs::File::options(); + file.read(true); + + if !flags.contains(OpenFlags::ReadOnly) { + file.write(true); + file.create(flags.contains(OpenFlags::Create)); + } + + let file = file.open(path)?; Ok(Arc::new(WindowsFile { file: RefCell::new(file), })) diff --git a/core/lib.rs b/core/lib.rs index ddf741ffb..2f7ab0577 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -101,6 +101,7 @@ pub struct Database { // create DB connections. shared_page_cache: Arc>, shared_wal: Arc>, + open_flags: OpenFlags, } unsafe impl Send for Database {} @@ -109,10 +110,20 @@ unsafe impl Sync for Database {} impl Database { #[cfg(feature = "fs")] pub fn open_file(io: Arc, path: &str, enable_mvcc: bool) -> Result> { - let file = io.open_file(path, OpenFlags::Create, true)?; + Self::open_file_with_flags(io, path, OpenFlags::default(), enable_mvcc) + } + + #[cfg(feature = "fs")] + pub fn open_file_with_flags( + io: Arc, + path: &str, + flags: OpenFlags, + enable_mvcc: bool, + ) -> Result> { + let file = io.open_file(path, flags, true)?; maybe_init_database_file(&file, &io)?; let db_file = Arc::new(DatabaseFile::new(file)); - Self::open(io, path, db_file, enable_mvcc) + Self::open_with_flags(io, path, db_file, flags, enable_mvcc) } #[allow(clippy::arc_with_non_send_sync)] @@ -121,6 +132,17 @@ impl Database { path: &str, db_file: Arc, enable_mvcc: bool, + ) -> Result> { + Self::open_with_flags(io, path, db_file, OpenFlags::default(), enable_mvcc) + } + + #[allow(clippy::arc_with_non_send_sync)] + pub fn open_with_flags( + io: Arc, + path: &str, + db_file: Arc, + flags: OpenFlags, + enable_mvcc: bool, ) -> Result> { let db_header = Pager::begin_open(db_file.clone())?; // ensure db header is there @@ -155,6 +177,7 @@ impl Database { db_file, io: io.clone(), page_size, + open_flags: flags, }; let db = Arc::new(db); { diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 948f0a5b8..30f9e6781 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -1564,20 +1564,17 @@ pub fn op_transaction( let Insn::Transaction { write } = insn else { unreachable!("unexpected Insn {:?}", insn) }; + let connection = program.connection.upgrade().unwrap(); + if *write && connection._db.open_flags.contains(OpenFlags::ReadOnly) { + return Err(LimboError::ReadOnly); + } if let Some(mv_store) = &mv_store { if state.mv_tx_id.is_none() { let tx_id = mv_store.begin_tx(); - program - .connection - .upgrade() - .unwrap() - .mv_transactions - .borrow_mut() - .push(tx_id); + connection.mv_transactions.borrow_mut().push(tx_id); state.mv_tx_id = Some(tx_id); } } else { - let connection = program.connection.upgrade().unwrap(); let current_state = connection.transaction_state.get(); let (new_transaction_state, updated) = match (current_state, write) { (TransactionState::Write, true) => (TransactionState::Write, false), From 2b3285d66905f14c1e587d2c8b1a9cefb75d0148 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 2 May 2025 16:31:11 -0300 Subject: [PATCH 421/425] test opening in read only mode --- tests/integration/common.rs | 141 +++++++++++++++++++++++++++++++++- tests/integration/fuzz/mod.rs | 67 +--------------- 2 files changed, 140 insertions(+), 68 deletions(-) diff --git a/tests/integration/common.rs b/tests/integration/common.rs index 2c668a12f..1ef890db9 100644 --- a/tests/integration/common.rs +++ b/tests/integration/common.rs @@ -1,6 +1,7 @@ use limbo_core::{CheckpointStatus, Connection, Database, IO}; use rand::{rng, RngCore}; -use std::path::PathBuf; +use rusqlite::params; +use std::path::{Path, PathBuf}; use std::rc::Rc; use std::sync::Arc; use tempfile::TempDir; @@ -28,6 +29,14 @@ impl TempDatabase { Self { path, io } } + pub fn new_with_existent(db_path: &Path) -> Self { + let io: Arc = Arc::new(limbo_core::PlatformIO::new().unwrap()); + Self { + path: db_path.to_path_buf(), + io, + } + } + pub fn new_with_rusqlite(table_sql: &str) -> Self { let mut path = TempDir::new().unwrap().into_path(); path.push("test.db"); @@ -44,8 +53,21 @@ impl TempDatabase { } pub fn connect_limbo(&self) -> Rc { + Self::connect_limbo_with_flags(&self, limbo_core::OpenFlags::default()) + } + + pub fn connect_limbo_with_flags( + &self, + flags: limbo_core::OpenFlags, + ) -> Rc { log::debug!("conneting to limbo"); - let db = Database::open_file(self.io.clone(), self.path.to_str().unwrap(), false).unwrap(); + let db = Database::open_file_with_flags( + self.io.clone(), + self.path.to_str().unwrap(), + flags, + false, + ) + .unwrap(); let conn = db.connect().unwrap(); log::debug!("connected to limbo"); @@ -104,9 +126,97 @@ pub fn maybe_setup_tracing() { .with(EnvFilter::from_default_env()) .try_init(); } + +pub(crate) fn sqlite_exec_rows( + conn: &rusqlite::Connection, + query: &str, +) -> Vec> { + let mut stmt = conn.prepare(&query).unwrap(); + let mut rows = stmt.query(params![]).unwrap(); + let mut results = Vec::new(); + while let Some(row) = rows.next().unwrap() { + let mut result = Vec::new(); + for i in 0.. { + let column: rusqlite::types::Value = match row.get(i) { + Ok(column) => column, + Err(rusqlite::Error::InvalidColumnIndex(_)) => break, + Err(err) => panic!("unexpected rusqlite error: {}", err), + }; + result.push(column); + } + results.push(result) + } + + results +} + +pub(crate) fn limbo_exec_rows( + db: &TempDatabase, + conn: &Rc, + query: &str, +) -> Vec> { + let mut stmt = conn.prepare(query).unwrap(); + let mut rows = Vec::new(); + 'outer: loop { + let row = loop { + let result = stmt.step().unwrap(); + match result { + limbo_core::StepResult::Row => { + let row = stmt.row().unwrap(); + break row; + } + limbo_core::StepResult::IO => { + db.io.run_once().unwrap(); + continue; + } + limbo_core::StepResult::Done => break 'outer, + r => panic!("unexpected result {:?}: expecting single row", r), + } + }; + let row = row + .get_values() + .map(|x| match x { + limbo_core::OwnedValue::Null => rusqlite::types::Value::Null, + limbo_core::OwnedValue::Integer(x) => rusqlite::types::Value::Integer(*x), + limbo_core::OwnedValue::Float(x) => rusqlite::types::Value::Real(*x), + limbo_core::OwnedValue::Text(x) => { + rusqlite::types::Value::Text(x.as_str().to_string()) + } + limbo_core::OwnedValue::Blob(x) => rusqlite::types::Value::Blob(x.to_vec()), + }) + .collect(); + rows.push(row); + } + rows +} + +pub(crate) fn limbo_exec_rows_error( + db: &TempDatabase, + conn: &Rc, + query: &str, +) -> limbo_core::Result<()> { + let mut stmt = conn.prepare(query)?; + loop { + let result = stmt.step()?; + match result { + limbo_core::StepResult::IO => { + db.io.run_once()?; + continue; + } + limbo_core::StepResult::Done => return Ok(()), + r => panic!("unexpected result {:?}: expecting single row", r), + } + } +} + #[cfg(test)] mod tests { - use super::TempDatabase; + use std::vec; + + use tempfile::TempDir; + + use super::{limbo_exec_rows, limbo_exec_rows_error, TempDatabase}; + use rusqlite::types::Value; #[test] fn test_statement_columns() -> anyhow::Result<()> { @@ -145,4 +255,29 @@ mod tests { Ok(()) } + + #[test] + fn test_limbo_open_read_only() -> anyhow::Result<()> { + let path = TempDir::new().unwrap().into_path().join("temp_read_only"); + let db = TempDatabase::new_with_existent(&path); + { + let conn = db.connect_limbo(); + let ret = limbo_exec_rows(&db, &conn, "CREATE table t(a)"); + assert!(ret.is_empty(), "{:?}", ret); + limbo_exec_rows(&db, &conn, "INSERT INTO t values (1)"); + conn.close().unwrap() + } + + { + let conn = db.connect_limbo_with_flags( + limbo_core::OpenFlags::default() | limbo_core::OpenFlags::ReadOnly, + ); + let ret = limbo_exec_rows(&db, &conn, "SELECT * from t"); + assert_eq!(ret, vec![vec![Value::Integer(1)]]); + + let err = limbo_exec_rows_error(&db, &conn, "INSERT INTO t values (1)").unwrap_err(); + assert!(matches!(err, limbo_core::LimboError::ReadOnly), "{:?}", err); + } + Ok(()) + } } diff --git a/tests/integration/fuzz/mod.rs b/tests/integration/fuzz/mod.rs index fb38455ac..82d1c11ac 100644 --- a/tests/integration/fuzz/mod.rs +++ b/tests/integration/fuzz/mod.rs @@ -2,14 +2,14 @@ pub mod grammar_generator; #[cfg(test)] mod tests { - use std::{collections::HashSet, rc::Rc}; + use std::collections::HashSet; use rand::{seq::IndexedRandom, Rng, SeedableRng}; use rand_chacha::ChaCha8Rng; use rusqlite::params; use crate::{ - common::TempDatabase, + common::{limbo_exec_rows, sqlite_exec_rows, TempDatabase}, fuzz::grammar_generator::{const_str, rand_int, rand_str, GrammarGenerator}, }; @@ -24,69 +24,6 @@ mod tests { (rng, seed) } - fn sqlite_exec_rows( - conn: &rusqlite::Connection, - query: &str, - ) -> Vec> { - let mut stmt = conn.prepare(&query).unwrap(); - let mut rows = stmt.query(params![]).unwrap(); - let mut results = Vec::new(); - while let Some(row) = rows.next().unwrap() { - let mut result = Vec::new(); - for i in 0.. { - let column: rusqlite::types::Value = match row.get(i) { - Ok(column) => column, - Err(rusqlite::Error::InvalidColumnIndex(_)) => break, - Err(err) => panic!("unexpected rusqlite error: {}", err), - }; - result.push(column); - } - results.push(result) - } - - results - } - - fn limbo_exec_rows( - db: &TempDatabase, - conn: &Rc, - query: &str, - ) -> Vec> { - let mut stmt = conn.prepare(query).unwrap(); - let mut rows = Vec::new(); - 'outer: loop { - let row = loop { - let result = stmt.step().unwrap(); - match result { - limbo_core::StepResult::Row => { - let row = stmt.row().unwrap(); - break row; - } - limbo_core::StepResult::IO => { - db.io.run_once().unwrap(); - continue; - } - limbo_core::StepResult::Done => break 'outer, - r => panic!("unexpected result {:?}: expecting single row", r), - } - }; - let row = row - .get_values() - .map(|x| match x { - limbo_core::OwnedValue::Null => rusqlite::types::Value::Null, - limbo_core::OwnedValue::Integer(x) => rusqlite::types::Value::Integer(*x), - limbo_core::OwnedValue::Float(x) => rusqlite::types::Value::Real(*x), - limbo_core::OwnedValue::Text(x) => { - rusqlite::types::Value::Text(x.as_str().to_string()) - } - limbo_core::OwnedValue::Blob(x) => rusqlite::types::Value::Blob(x.to_vec()), - }) - .collect(); - rows.push(row); - } - rows - } - #[test] pub fn arithmetic_expression_fuzz_ex1() { let db = TempDatabase::new_empty(); From 7cc190a12b37de55559c2fa629206c1b1b0bf219 Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Fri, 2 May 2025 19:26:44 -0300 Subject: [PATCH 422/425] reset statement before executing --- bindings/rust/src/lib.rs | 5 +++++ db.sqlite | Bin 0 -> 4096 bytes 2 files changed, 5 insertions(+) create mode 100644 db.sqlite diff --git a/bindings/rust/src/lib.rs b/bindings/rust/src/lib.rs index 8c57e7909..955f069af 100644 --- a/bindings/rust/src/lib.rs +++ b/bindings/rust/src/lib.rs @@ -153,6 +153,10 @@ impl Statement { } pub async fn execute(&mut self, params: impl IntoParams) -> Result { + { + // Reset the statement before executing + self.inner.lock().unwrap().reset(); + } let params = params.into_params()?; match params { params::Params::None => (), @@ -263,6 +267,7 @@ impl Rows { } } +#[derive(Debug)] pub struct Row { values: Vec, } diff --git a/db.sqlite b/db.sqlite new file mode 100644 index 0000000000000000000000000000000000000000..a7582c3c97f3b7989dfc514a25271a8d1051e5d1 GIT binary patch literal 4096 zcmWFz^vNtqRY=P(%1ta$FlG>7U}9o$P*7lCU|@t|AO#03K;bV?9!MTP(5s7J0AgN{ wDgpdTM`cGtU^E0qLtr!nMnhmU1V%$(Gz3ONU^E0qLtr!nMnhmU1ZWom02k^A Date: Fri, 2 May 2025 12:37:43 +0300 Subject: [PATCH 423/425] Eliminate a superfluous read transaction when doing PRAGMA user_version --- core/translate/pragma.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/core/translate/pragma.rs b/core/translate/pragma.rs index fdf307b45..4e49a55d1 100644 --- a/core/translate/pragma.rs +++ b/core/translate/pragma.rs @@ -256,7 +256,6 @@ fn query_pragma( } } PragmaName::UserVersion => { - program.emit_transaction(false); program.emit_insn(Insn::ReadCookie { db: 0, dest: register, From b6a5cbe6265a6e2070ff957b58d907a054a9329f Mon Sep 17 00:00:00 2001 From: Anton Harniakou Date: Sat, 3 May 2025 12:41:19 +0300 Subject: [PATCH 424/425] Test that DROP TABLE also deletes the related indices --- testing/drop_table.test | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/testing/drop_table.test b/testing/drop_table.test index c6daf04d4..e1c48ec0c 100755 --- a/testing/drop_table.test +++ b/testing/drop_table.test @@ -26,23 +26,23 @@ do_execsql_test_on_specific_db {:memory:} drop-table-if-exists-2 { } {success} # Test dropping table with index -#do_execsql_test_on_specific_db {:memory:} drop-table-with-index-1 { -# CREATE TABLE t3(x INTEGER PRIMARY KEY, y TEXT); -# CREATE INDEX idx_t3_y ON t3(y); -# INSERT INTO t3 VALUES(1, 'one'); -# DROP TABLE t3; -# SELECT count(*) FROM sqlite_schema WHERE tbl_name='t3'; -#} {0} +do_execsql_test_on_specific_db {:memory:} drop-table-with-index-1 { + CREATE TABLE t3(x INTEGER PRIMARY KEY, y TEXT); + CREATE INDEX idx_t3_y ON t3(y); + INSERT INTO t3 VALUES(1, 'one'); + DROP TABLE t3; + SELECT count(*) FROM sqlite_schema WHERE tbl_name='t3'; +} {0} # Test dropping table cleans up related schema entries -#do_execsql_test_on_specific_db {:memory:} drop-table-schema-cleanup-1 { -# CREATE TABLE t4(x INTEGER PRIMARY KEY, y TEXT); -# CREATE INDEX idx1_t4 ON t4(x); -# CREATE INDEX idx2_t4 ON t4(y); -# INSERT INTO t4 VALUES(1, 'one'); -# DROP TABLE t4; -# SELECT count(*) FROM sqlite_schema WHERE tbl_name='t4'; -#} {0} +do_execsql_test_on_specific_db {:memory:} drop-table-schema-cleanup-1 { + CREATE TABLE t4(x INTEGER PRIMARY KEY, y TEXT); + CREATE INDEX idx1_t4 ON t4(x); + CREATE INDEX idx2_t4 ON t4(y); + INSERT INTO t4 VALUES(1, 'one'); + DROP TABLE t4; + SELECT count(*) FROM sqlite_schema WHERE tbl_name='t4'; +} {0} # Test dropping table after multiple inserts and deletes do_execsql_test_on_specific_db {:memory:} drop-table-after-ops-1 { From 7920161efc46dbc19eb8c512424a926d4b4264ae Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sat, 3 May 2025 18:32:58 +0300 Subject: [PATCH 425/425] update Cargo.lock --- Cargo.lock | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1a4c6069c..fb99fa590 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -911,7 +911,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ce92ff622d6dadf7349484f42c93271a0d49b7cc4d466a936405bacbe10aa78" dependencies = [ "cfg-if", - "rustix 1.0.3", + "rustix 1.0.7", "windows-sys 0.59.0", ] @@ -1412,11 +1412,12 @@ dependencies = [ [[package]] name = "io-uring" -version = "0.6.4" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "595a0399f411a508feb2ec1e970a4a30c249351e30208960d58298de8660b0e5" +checksum = "3c2f96dfbc20c12b9b4f12eef60472d8c29b9c3f29463570dcb47e4a48551168" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.9.0", + "cfg-if", "libc", ] @@ -1609,9 +1610,9 @@ checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" [[package]] name = "libmimalloc-sys" -version = "0.1.40" +version = "0.1.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07d0e07885d6a754b9c7993f2625187ad694ee985d60f23355ff0e7077261502" +checksum = "ec9d6fac27761dabcd4ee73571cdb06b7022dc99089acbe5435691edffaac0f4" dependencies = [ "cc", "libc", @@ -1766,7 +1767,7 @@ dependencies = [ "regex-syntax 0.8.5", "rstest", "rusqlite", - "rustix 0.38.44", + "rustix 1.0.7", "ryu", "strum", "tempfile", @@ -2012,9 +2013,9 @@ checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" [[package]] name = "lru" -version = "0.13.0" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "227748d55f2f0ab4735d87fd623798cb6b664512fe979705f829c9f81c934465" +checksum = "9f8cc7106155f10bdf99a6f379688f543ad6596a415375b36a59a054ceda1198" dependencies = [ "hashbrown", ] @@ -2060,9 +2061,9 @@ dependencies = [ [[package]] name = "miette" -version = "7.5.0" +version = "7.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a955165f87b37fd1862df2a59547ac542c77ef6d17c666f619d1ad22dd89484" +checksum = "5f98efec8807c63c752b5bd61f862c165c115b0a35685bdcfd9238c7aeb592b7" dependencies = [ "backtrace", "backtrace-ext", @@ -2074,15 +2075,14 @@ dependencies = [ "supports-unicode", "terminal_size", "textwrap", - "thiserror 1.0.69", "unicode-width 0.1.14", ] [[package]] name = "miette-derive" -version = "7.5.0" +version = "7.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf45bf44ab49be92fd1227a3be6fc6f617f1a337c06af54981048574d8783147" +checksum = "db5b29714e950dbb20d5e6f74f9dcec4edbcc1067bb7f8ed198c097b8c1a818b" dependencies = [ "proc-macro2", "quote", @@ -2091,9 +2091,9 @@ dependencies = [ [[package]] name = "mimalloc" -version = "0.1.44" +version = "0.1.46" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99585191385958383e13f6b822e6b6d8d9cf928e7d286ceb092da92b43c87bc1" +checksum = "995942f432bbb4822a7e9c3faa87a695185b0d09273ba85f097b54f4e458f2af" dependencies = [ "libmimalloc-sys", ] @@ -3000,9 +3000,9 @@ dependencies = [ [[package]] name = "rustix" -version = "1.0.3" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e56a18552996ac8d29ecc3b190b4fdbb2d91ca4ec396de7bbffaf43f3d637e96" +checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266" dependencies = [ "bitflags 2.9.0", "errno", @@ -3328,7 +3328,7 @@ dependencies = [ "fastrand", "getrandom 0.3.2", "once_cell", - "rustix 1.0.3", + "rustix 1.0.7", "windows-sys 0.59.0", ] @@ -3347,7 +3347,7 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "45c6481c4829e4cc63825e62c49186a34538b7b2750b73b266581ffb612fb5ed" dependencies = [ - "rustix 1.0.3", + "rustix 1.0.7", "windows-sys 0.59.0", ]