From 9a8b94ef9358929d91a7507295b4442eb42e7ce1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=84=A0=EC=9A=B0?= Date: Sun, 22 Dec 2024 13:10:50 +0900 Subject: [PATCH 01/39] First successful implementation of delete planning --- core/pseudo.rs | 4 + core/storage/btree.rs | 6 + core/translate/delete.rs | 25 +++++ core/translate/emitter.rs | 216 ++++++++++++++++++++++++++++++++++++ core/translate/mod.rs | 21 +++- core/translate/optimizer.rs | 9 ++ core/translate/planner.rs | 54 ++++++++- core/types.rs | 1 + core/vdbe/explain.rs | 18 +++ core/vdbe/mod.rs | 24 +++- core/vdbe/sorter.rs | 4 + 11 files changed, 378 insertions(+), 4 deletions(-) create mode 100644 core/translate/delete.rs diff --git a/core/pseudo.rs b/core/pseudo.rs index a87647d2b..45f47856e 100644 --- a/core/pseudo.rs +++ b/core/pseudo.rs @@ -79,6 +79,10 @@ impl Cursor for PseudoCursor { Ok(CursorResult::Ok(())) } + fn delete(&mut self) -> Result> { + unimplemented!() + } + fn get_null_flag(&self) -> bool { false } diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 19ba4c7fa..d500e7727 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1753,6 +1753,12 @@ impl Cursor for BTreeCursor { Ok(CursorResult::Ok(())) } + fn delete(&mut self) -> Result> { + println!("ROWID: {:?}", self.rowid.borrow()); + return Ok(CursorResult::Ok(())); + unimplemented!() + } + fn set_null_flag(&mut self, flag: bool) { self.null_flag = flag; } diff --git a/core/translate/delete.rs b/core/translate/delete.rs new file mode 100644 index 000000000..dd34d957b --- /dev/null +++ b/core/translate/delete.rs @@ -0,0 +1,25 @@ +use crate::translate::emitter::emit_program_for_delete; +use crate::translate::optimizer::optimize_delete_plan; +use crate::translate::planner::prepare_delete_plan; +use crate::{ + schema::Schema, + storage::sqlite3_ondisk::DatabaseHeader, + vdbe::Program, +}; +use crate::{Connection, Result}; +use sqlite3_parser::ast::{Expr, QualifiedName, ResultColumn}; +use std::rc::Weak; +use std::{cell::RefCell, rc::Rc}; + +pub fn translate_delete( + schema: &Schema, + tbl_name: &QualifiedName, + where_clause: Option, + _returning: &Option>, + database_header: Rc>, + connection: Weak, +) -> Result { + let delete_plan = prepare_delete_plan(schema, tbl_name, where_clause)?; + let optimized_plan = optimize_delete_plan(delete_plan)?; + emit_program_for_delete(database_header, optimized_plan, connection) +} diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 38311b9d9..7a32336be 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -272,6 +272,51 @@ pub fn emit_program( Ok(program.build(database_header, connection)) } +pub fn emit_program_for_delete( + database_header: Rc>, + mut plan: Plan, + connection: Weak, +) -> Result { + let (mut program, mut metadata, init_label, start_offset) = prologue()?; + + // No rows will be read from source table loops if there is a constant false condition eg. WHERE 0 + let skip_loops_label = if plan.contains_constant_false_condition { + let skip_loops_label = program.allocate_label(); + program.emit_insn_with_label_dependency( + Insn::Goto { + target_pc: skip_loops_label, + }, + skip_loops_label, + ); + Some(skip_loops_label) + } else { + None + }; + + // Initialize cursors and other resources needed for query execution + init_source_for_delete(&mut program, &plan.source, &mut metadata)?; + + // Set up main query execution loop + open_loop( + &mut program, + &mut plan.source, + &plan.referenced_tables, + &mut metadata, + )?; + + // Close the loop and handle deletion + close_loop_for_delete(&mut program, &plan.source, &mut metadata)?; + + if let Some(skip_loops_label) = skip_loops_label { + program.resolve_label(skip_loops_label, program.offset()); + } + + // Finalize program + epilogue(&mut program, &mut metadata, init_label, start_offset)?; + + Ok(program.build(database_header, connection)) +} + /// Initialize resources needed for ORDER BY processing fn init_order_by( program: &mut ProgramBuilder, @@ -466,6 +511,74 @@ fn init_source( } } +fn init_source_for_delete( + program: &mut ProgramBuilder, + source: &SourceOperator, + metadata: &mut Metadata, +) -> Result<()> { + match source { + SourceOperator::Join { .. } => { + unreachable!() + } + SourceOperator::Scan { + id, + table_reference, + .. + } => { + let cursor_id = program.alloc_cursor_id( + Some(table_reference.table_identifier.clone()), + Some(Table::BTree(table_reference.table.clone())), + ); + let root_page = table_reference.table.root_page; + let next_row_label = program.allocate_label(); + metadata.next_row_labels.insert(*id, next_row_label); + program.emit_insn(Insn::OpenWriteAsync{ + cursor_id, + root_page, + }); + program.emit_insn(Insn::OpenWriteAwait {}); + + Ok(()) + } + SourceOperator::Search { + id, + table_reference, + search, + .. + } => { + let table_cursor_id = program.alloc_cursor_id( + Some(table_reference.table_identifier.clone()), + Some(Table::BTree(table_reference.table.clone())), + ); + + let next_row_label = program.allocate_label(); + + metadata.next_row_labels.insert(*id, next_row_label); + + program.emit_insn(Insn::OpenWriteAsync { + cursor_id: table_cursor_id, + root_page: table_reference.table.root_page, + }); + program.emit_insn(Insn::OpenWriteAwait {}); + + if let Search::IndexSearch { index, .. } = search { + let index_cursor_id = program + .alloc_cursor_id(Some(index.name.clone()), Some(Table::Index(index.clone()))); + program.emit_insn(Insn::OpenWriteAsync { + cursor_id: index_cursor_id, + root_page: index.root_page, + }); + program.emit_insn(Insn::OpenWriteAwait {}); + } + + Ok(()) + } + SourceOperator::Nothing => { + Ok(()) + } + } +} + /// Set up the main query execution loop /// For example in the case of a nested table scan, this means emitting the RewindAsync instruction /// for all tables involved, outermost first. @@ -1121,6 +1234,109 @@ fn close_loop( } } +fn close_loop_for_delete( + program: &mut ProgramBuilder, + source: &SourceOperator, + metadata: &mut Metadata, +) -> Result<()> { + match source { + SourceOperator::Scan { + id, + table_reference, + iter_dir, + .. + } => { + let cursor_id = program.resolve_cursor_id(&table_reference.table_identifier); + + // Emit the instructions to delete the row + let key_reg = program.alloc_register(); + program.emit_insn(Insn::RowId { + cursor_id, + dest: key_reg, + }); + program.emit_insn(Insn::DeleteAsync { cursor_id }); + program.emit_insn(Insn::DeleteAwait { cursor_id }); + + program.resolve_label(*metadata.next_row_labels.get(id).unwrap(), program.offset()); + + // Emit the NextAsync or PrevAsync instruction to continue the loop + if iter_dir + .as_ref() + .is_some_and(|dir| *dir == IterationDirection::Backwards) + { + program.emit_insn(Insn::PrevAsync { cursor_id }); + } else { + program.emit_insn(Insn::NextAsync { cursor_id }); + } + let jump_label = metadata.scan_loop_body_labels.pop().unwrap(); + + // Emit the NextAwait or PrevAwait instruction with label dependency + if iter_dir + .as_ref() + .is_some_and(|dir| *dir == IterationDirection::Backwards) + { + program.emit_insn_with_label_dependency( + Insn::PrevAwait { + cursor_id, + pc_if_next: jump_label, + }, + jump_label, + ); + } else { + program.emit_insn_with_label_dependency( + Insn::NextAwait { + cursor_id, + pc_if_next: jump_label, + }, + jump_label, + ); + } + Ok(()) + } + SourceOperator::Search { + id, + table_reference, + search, + .. + } => { + let cursor_id = match search { + Search::RowidEq { .. } | Search::RowidSearch { .. } => { + program.resolve_cursor_id(&table_reference.table_identifier) + } + Search::IndexSearch { index, .. } => program.resolve_cursor_id(&index.name), + }; + + // Emit the instructions to delete the row + let key_reg = program.alloc_register(); + program.emit_insn(Insn::RowId { + cursor_id, + dest: key_reg, + }); + program.emit_insn(Insn::DeleteAsync { cursor_id }); + program.emit_insn(Insn::DeleteAwait { cursor_id }); + + // resolve labels after calling Delete opcodes + program.resolve_label(*metadata.next_row_labels.get(id).unwrap(), program.offset()); + + // Emit the NextAsync instruction to continue the loop + if !matches!(search, Search::RowidEq { .. }) { + program.emit_insn(Insn::NextAsync { cursor_id }); + let jump_label = metadata.scan_loop_body_labels.pop().unwrap(); + program.emit_insn_with_label_dependency( + Insn::NextAwait { + cursor_id, + pc_if_next: jump_label, + }, + jump_label, + ); + } + + Ok(()) + } + _ => Ok(()), + } +} + /// Emits the bytecode for processing a GROUP BY clause. /// This is called when the main query execution loop has finished processing, /// and we now have data in the GROUP BY sorter. diff --git a/core/translate/mod.rs b/core/translate/mod.rs index 2e5d86141..5ea44f05a 100644 --- a/core/translate/mod.rs +++ b/core/translate/mod.rs @@ -14,6 +14,7 @@ pub(crate) mod optimizer; pub(crate) mod plan; pub(crate) mod planner; pub(crate) mod select; +pub(crate) mod delete; use std::cell::RefCell; use std::fmt::Display; @@ -29,6 +30,7 @@ use insert::translate_insert; use select::translate_select; use sqlite3_parser::ast::fmt::ToTokens; use sqlite3_parser::ast::{self, PragmaName}; +use crate::translate::delete::translate_delete; /// Translate SQL statement into bytecode program. pub fn translate( @@ -68,7 +70,24 @@ pub fn translate( ast::Stmt::CreateVirtualTable { .. } => { bail_parse_error!("CREATE VIRTUAL TABLE not supported yet") } - ast::Stmt::Delete { .. } => bail_parse_error!("DELETE not supported yet"), + ast::Stmt::Delete { + with, + tbl_name, + indexed, + where_clause, + returning, + order_by, + limit + } => { + translate_delete( + schema, + &tbl_name, + where_clause, + &returning, + database_header, + connection + ) + } ast::Stmt::Detach(_) => bail_parse_error!("DETACH not supported yet"), ast::Stmt::DropIndex { .. } => bail_parse_error!("DROP INDEX not supported yet"), ast::Stmt::DropTable { .. } => bail_parse_error!("DROP TABLE not supported yet"), diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 4763f8b1e..6ca17f217 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -41,6 +41,15 @@ pub fn optimize_plan(mut select_plan: Plan) -> Result { Ok(select_plan) } +pub fn optimize_delete_plan(mut delete_plan: Plan) -> Result { + use_indexes( + &mut delete_plan.source, + &delete_plan.referenced_tables, + &delete_plan.available_indexes, + )?; + Ok(delete_plan) +} + fn _operator_is_already_ordered_by( operator: &mut SourceOperator, key: &mut ast::Expr, diff --git a/core/translate/planner.rs b/core/translate/planner.rs index 14757e00a..373fa498f 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -1,11 +1,10 @@ use super::{ - optimizer::Optimizable, plan::{ Aggregate, BTreeTableReference, Direction, GroupBy, Plan, ResultSetColumn, SourceOperator, }, }; use crate::{function::Func, schema::Schema, util::normalize_ident, Result}; -use sqlite3_parser::ast::{self, FromClause, JoinType, ResultColumn}; +use sqlite3_parser::ast::{self, Expr, FromClause, JoinType, QualifiedName, ResultColumn}; pub struct OperatorIdCounter { id: usize, @@ -738,6 +737,57 @@ fn parse_join( )) } +pub fn prepare_delete_plan( + schema: &Schema, + tbl_name: &QualifiedName, + where_clause: Option, +) -> Result { + let table_name = tbl_name.name.0.clone(); + + let table = if let Some(table) = schema.get_table(&table_name) { + table + } else { + crate::bail_parse_error!("Table {} not found", table_name); + }; + + let table_ref = BTreeTableReference { + table: table.clone(), + table_identifier: table_name.clone(), + table_index: 0 + }; + + // Parse and resolve the where_clause + let mut resolved_where_clause = None; + if let Some(where_expr) = where_clause { + let mut predicates = vec![]; + break_predicate_at_and_boundaries(where_expr, &mut predicates); + for expr in predicates.iter_mut() { + bind_column_references(expr, &[table_ref.clone()])?; + } + resolved_where_clause = Some(predicates); + } + + let plan = Plan { + source: SourceOperator::Scan { + id: 0, + table_reference: table_ref.clone(), + predicates: resolved_where_clause.clone(), + iter_dir: None + }, + result_columns: vec![], + where_clause: resolved_where_clause, + group_by: None, + order_by: None, + aggregates: vec![], + limit: None, + referenced_tables: vec![table_ref], + available_indexes: vec![], + contains_constant_false_condition: false + }; + + Ok(plan) +} + fn break_predicate_at_and_boundaries(predicate: ast::Expr, out_predicates: &mut Vec) { match predicate { ast::Expr::Binary(left, ast::Operator::And, right) => { diff --git a/core/types.rs b/core/types.rs index 5f1b55d7b..c545fa514 100644 --- a/core/types.rs +++ b/core/types.rs @@ -484,6 +484,7 @@ pub trait Cursor { record: &OwnedRecord, moved_before: bool, /* Tells inserter that it doesn't need to traverse in order to find leaf page */ ) -> Result>; // + fn delete(&mut self) -> Result>; fn exists(&mut self, key: &OwnedValue) -> Result>; fn set_null_flag(&mut self, flag: bool); fn get_null_flag(&self) -> bool; diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index ce03a53fd..94d89f94c 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -834,6 +834,24 @@ pub fn insn_to_str( 0, "".to_string(), ), + Insn::DeleteAsync { cursor_id } => ( + "DeleteAsync", + *cursor_id as i32, + 0, + 0, + OwnedValue::Text(Rc::new("".to_string())), + 0, + "".to_string(), + ), + Insn::DeleteAwait { cursor_id } => ( + "DeleteAwait", + *cursor_id as i32, + 0, + 0, + OwnedValue::Text(Rc::new("".to_string())), + 0, + "".to_string(), + ), Insn::NewRowid { cursor, rowid_reg, diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 362f60042..3db275f63 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -484,6 +484,14 @@ pub enum Insn { cursor_id: usize, }, + DeleteAsync { + cursor_id: CursorID, + }, + + DeleteAwait { + cursor_id: CursorID + }, + NewRowid { cursor: CursorID, // P1 rowid_reg: usize, // P2 Destination register to store the new rowid @@ -2648,7 +2656,17 @@ impl Program { } } state.pc += 1; - } + }, + Insn::DeleteAsync { cursor_id } => { + let cursor = cursors.get_mut(cursor_id).unwrap(); + return_if_io!(cursor.delete()); + state.pc += 1; + }, + Insn::DeleteAwait { cursor_id } => { + let cursor = cursors.get_mut(cursor_id).unwrap(); + cursor.wait_for_completion()?; + state.pc += 1; + }, Insn::NewRowid { cursor, rowid_reg, .. } => { @@ -3879,6 +3897,10 @@ mod tests { unimplemented!() } + fn delete(&mut self, key: &OwnedValue) -> Result> { + unimplemented!() + } + fn wait_for_completion(&mut self) -> Result<()> { unimplemented!() } diff --git a/core/vdbe/sorter.rs b/core/vdbe/sorter.rs index 0365007a4..e3962b096 100644 --- a/core/vdbe/sorter.rs +++ b/core/vdbe/sorter.rs @@ -96,6 +96,10 @@ impl Cursor for Sorter { Ok(CursorResult::Ok(())) } + fn delete(&mut self) -> Result> { + unimplemented!() + } + fn set_null_flag(&mut self, _flag: bool) { todo!(); } From a42b185ecec078f4845206c48eaa2a4dbfe99e5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=84=A0=EC=9A=B0?= Date: Sun, 22 Dec 2024 14:22:10 +0900 Subject: [PATCH 02/39] Nit --- core/storage/btree.rs | 5 ++--- core/vdbe/explain.rs | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index d500e7727..b2761a5aa 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1754,9 +1754,8 @@ impl Cursor for BTreeCursor { } fn delete(&mut self) -> Result> { - println!("ROWID: {:?}", self.rowid.borrow()); - return Ok(CursorResult::Ok(())); - unimplemented!() + debug!("rowid: {:?}", self.rowid.borrow()); + Ok(CursorResult::Ok(())) } fn set_null_flag(&mut self, flag: bool) { diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index 94d89f94c..a5e4f91bd 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -839,7 +839,7 @@ pub fn insn_to_str( *cursor_id as i32, 0, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, "".to_string(), ), @@ -848,7 +848,7 @@ pub fn insn_to_str( *cursor_id as i32, 0, 0, - OwnedValue::Text(Rc::new("".to_string())), + OwnedValue::build_text(Rc::new("".to_string())), 0, "".to_string(), ), From 57c7a56e35aa855370710732726bfc5012e1d4f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=84=A0=EC=9A=B0?= Date: Sun, 22 Dec 2024 14:27:21 +0900 Subject: [PATCH 03/39] Apply fmt, clippy --- core/translate/delete.rs | 6 +----- core/translate/emitter.rs | 6 ++---- core/translate/mod.rs | 24 +++++++++++------------- core/translate/planner.rs | 12 +++++------- core/vdbe/mod.rs | 10 +++++----- 5 files changed, 24 insertions(+), 34 deletions(-) diff --git a/core/translate/delete.rs b/core/translate/delete.rs index dd34d957b..d3d5fd346 100644 --- a/core/translate/delete.rs +++ b/core/translate/delete.rs @@ -1,11 +1,7 @@ use crate::translate::emitter::emit_program_for_delete; use crate::translate::optimizer::optimize_delete_plan; use crate::translate::planner::prepare_delete_plan; -use crate::{ - schema::Schema, - storage::sqlite3_ondisk::DatabaseHeader, - vdbe::Program, -}; +use crate::{schema::Schema, storage::sqlite3_ondisk::DatabaseHeader, vdbe::Program}; use crate::{Connection, Result}; use sqlite3_parser::ast::{Expr, QualifiedName, ResultColumn}; use std::rc::Weak; diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 7a32336be..8779dbc25 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -532,7 +532,7 @@ fn init_source_for_delete( let root_page = table_reference.table.root_page; let next_row_label = program.allocate_label(); metadata.next_row_labels.insert(*id, next_row_label); - program.emit_insn(Insn::OpenWriteAsync{ + program.emit_insn(Insn::OpenWriteAsync { cursor_id, root_page, }); @@ -573,9 +573,7 @@ fn init_source_for_delete( Ok(()) } - SourceOperator::Nothing => { - Ok(()) - } + SourceOperator::Nothing => Ok(()), } } diff --git a/core/translate/mod.rs b/core/translate/mod.rs index 5ea44f05a..381c82df1 100644 --- a/core/translate/mod.rs +++ b/core/translate/mod.rs @@ -7,6 +7,7 @@ //! a SELECT statement will be translated into a sequence of instructions that //! will read rows from the database and filter them according to a WHERE clause. +pub(crate) mod delete; pub(crate) mod emitter; pub(crate) mod expr; pub(crate) mod insert; @@ -14,7 +15,6 @@ pub(crate) mod optimizer; pub(crate) mod plan; pub(crate) mod planner; pub(crate) mod select; -pub(crate) mod delete; use std::cell::RefCell; use std::fmt::Display; @@ -24,13 +24,13 @@ use std::str::FromStr; use crate::schema::Schema; use crate::storage::pager::Pager; use crate::storage::sqlite3_ondisk::{DatabaseHeader, MIN_PAGE_CACHE_SIZE}; +use crate::translate::delete::translate_delete; use crate::vdbe::{builder::ProgramBuilder, Insn, Program}; use crate::{bail_parse_error, Connection, Result}; use insert::translate_insert; use select::translate_select; use sqlite3_parser::ast::fmt::ToTokens; use sqlite3_parser::ast::{self, PragmaName}; -use crate::translate::delete::translate_delete; /// Translate SQL statement into bytecode program. pub fn translate( @@ -77,17 +77,15 @@ pub fn translate( where_clause, returning, order_by, - limit - } => { - translate_delete( - schema, - &tbl_name, - where_clause, - &returning, - database_header, - connection - ) - } + limit, + } => translate_delete( + schema, + &tbl_name, + where_clause, + &returning, + database_header, + connection, + ), ast::Stmt::Detach(_) => bail_parse_error!("DETACH not supported yet"), ast::Stmt::DropIndex { .. } => bail_parse_error!("DROP INDEX not supported yet"), ast::Stmt::DropTable { .. } => bail_parse_error!("DROP TABLE not supported yet"), diff --git a/core/translate/planner.rs b/core/translate/planner.rs index 373fa498f..e613dd14d 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -1,7 +1,5 @@ -use super::{ - plan::{ - Aggregate, BTreeTableReference, Direction, GroupBy, Plan, ResultSetColumn, SourceOperator, - }, +use super::plan::{ + Aggregate, BTreeTableReference, Direction, GroupBy, Plan, ResultSetColumn, SourceOperator, }; use crate::{function::Func, schema::Schema, util::normalize_ident, Result}; use sqlite3_parser::ast::{self, Expr, FromClause, JoinType, QualifiedName, ResultColumn}; @@ -753,7 +751,7 @@ pub fn prepare_delete_plan( let table_ref = BTreeTableReference { table: table.clone(), table_identifier: table_name.clone(), - table_index: 0 + table_index: 0, }; // Parse and resolve the where_clause @@ -772,7 +770,7 @@ pub fn prepare_delete_plan( id: 0, table_reference: table_ref.clone(), predicates: resolved_where_clause.clone(), - iter_dir: None + iter_dir: None, }, result_columns: vec![], where_clause: resolved_where_clause, @@ -782,7 +780,7 @@ pub fn prepare_delete_plan( limit: None, referenced_tables: vec![table_ref], available_indexes: vec![], - contains_constant_false_condition: false + contains_constant_false_condition: false, }; Ok(plan) diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 3db275f63..26f125675 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -489,7 +489,7 @@ pub enum Insn { }, DeleteAwait { - cursor_id: CursorID + cursor_id: CursorID, }, NewRowid { @@ -2656,17 +2656,17 @@ impl Program { } } state.pc += 1; - }, + } Insn::DeleteAsync { cursor_id } => { let cursor = cursors.get_mut(cursor_id).unwrap(); return_if_io!(cursor.delete()); state.pc += 1; - }, + } Insn::DeleteAwait { cursor_id } => { let cursor = cursors.get_mut(cursor_id).unwrap(); cursor.wait_for_completion()?; state.pc += 1; - }, + } Insn::NewRowid { cursor, rowid_reg, .. } => { @@ -3897,7 +3897,7 @@ mod tests { unimplemented!() } - fn delete(&mut self, key: &OwnedValue) -> Result> { + fn delete(&mut self) -> Result> { unimplemented!() } From 9bacf80f2e29bc83e1c6859b9e70f89bdfc0ae24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=84=A0=EC=9A=B0?= Date: Sun, 22 Dec 2024 14:41:12 +0900 Subject: [PATCH 04/39] Change to println! --- core/storage/btree.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index b2761a5aa..3f6b87da3 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1754,7 +1754,7 @@ impl Cursor for BTreeCursor { } fn delete(&mut self) -> Result> { - debug!("rowid: {:?}", self.rowid.borrow()); + println!("rowid: {:?}", self.rowid.borrow()); Ok(CursorResult::Ok(())) } From 1d3ce528122fb4a81bdeaca6749958df1f517de6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=84=A0=EC=9A=B0?= Date: Sun, 22 Dec 2024 15:11:26 +0900 Subject: [PATCH 05/39] Refactor planner and optimizer to be DRY --- core/lib.rs | 4 +- core/translate/optimizer.rs | 65 ++++++++++++-------- core/translate/planner.rs | 119 ++++++++++++++++++------------------ core/translate/select.rs | 9 ++- 4 files changed, 105 insertions(+), 92 deletions(-) diff --git a/core/lib.rs b/core/lib.rs index 1f5668d76..e39a5fa7a 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -34,12 +34,12 @@ pub use storage::wal::WalFile; pub use storage::wal::WalFileShared; use util::parse_schema_rows; -use translate::optimizer::optimize_plan; use translate::planner::prepare_select_plan; pub use error::LimboError; pub type Result = std::result::Result; +use crate::translate::optimizer::optimize_select_plan; pub use io::OpenFlags; #[cfg(feature = "fs")] pub use io::PlatformIO; @@ -267,7 +267,7 @@ impl Connection { match stmt { ast::Stmt::Select(select) => { let plan = prepare_select_plan(&*self.schema.borrow(), select)?; - let plan = optimize_plan(plan)?; + let plan = optimize_select_plan(plan)?; println!("{}", plan); } _ => todo!(), diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 6ca17f217..9ccd8e7f7 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -9,12 +9,25 @@ use super::plan::{ Direction, IterationDirection, Plan, Search, SourceOperator, }; +pub fn optimize_select_plan(plan: Plan) -> Result { + optimize_plan(plan, true, true, true) +} + +pub fn optimize_delete_plan(plan: Plan) -> Result { + optimize_plan(plan, false, true, false) +} + /** * Make a few passes over the plan to optimize it. * TODO: these could probably be done in less passes, * but having them separate makes them easier to understand */ -pub fn optimize_plan(mut select_plan: Plan) -> Result { +fn optimize_plan( + mut select_plan: Plan, + optimize_push_predicates: bool, + optimize_use_indexes: bool, + optimize_eliminate_unnecessary_order_by: bool, +) -> Result { eliminate_between(&mut select_plan.source, &mut select_plan.where_clause)?; if let ConstantConditionEliminationResult::ImpossibleCondition = eliminate_constants(&mut select_plan.source, &mut select_plan.where_clause)? @@ -22,32 +35,32 @@ pub fn optimize_plan(mut select_plan: Plan) -> Result { select_plan.contains_constant_false_condition = true; return Ok(select_plan); } - push_predicates( - &mut select_plan.source, - &mut select_plan.where_clause, - &select_plan.referenced_tables, - )?; - use_indexes( - &mut select_plan.source, - &select_plan.referenced_tables, - &select_plan.available_indexes, - )?; - eliminate_unnecessary_orderby( - &mut select_plan.source, - &mut select_plan.order_by, - &select_plan.referenced_tables, - &select_plan.available_indexes, - )?; - Ok(select_plan) -} -pub fn optimize_delete_plan(mut delete_plan: Plan) -> Result { - use_indexes( - &mut delete_plan.source, - &delete_plan.referenced_tables, - &delete_plan.available_indexes, - )?; - Ok(delete_plan) + if optimize_push_predicates { + push_predicates( + &mut select_plan.source, + &mut select_plan.where_clause, + &select_plan.referenced_tables, + )?; + } + + if optimize_use_indexes { + use_indexes( + &mut select_plan.source, + &select_plan.referenced_tables, + &select_plan.available_indexes, + )?; + } + + if optimize_eliminate_unnecessary_order_by { + eliminate_unnecessary_orderby( + &mut select_plan.source, + &mut select_plan.order_by, + &select_plan.referenced_tables, + &select_plan.available_indexes, + )?; + } + Ok(select_plan) } fn _operator_is_already_ordered_by( diff --git a/core/translate/planner.rs b/core/translate/planner.rs index e613dd14d..d32eba6c4 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -283,14 +283,7 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

(schema: &Schema, select: ast::Select) -> Result

, +) -> Result { + let table_name = tbl_name.name.0.clone(); + + let table = if let Some(table) = schema.get_table(&table_name) { + table + } else { + crate::bail_parse_error!("Table {} not found", table_name); + }; + + let table_ref = BTreeTableReference { + table: table.clone(), + table_identifier: table_name.clone(), + table_index: 0, + }; + + // Parse and resolve the where_clause + let resolved_where_clauses = parse_where(where_clause, &[table_ref.clone()])?; + + let plan = Plan { + source: SourceOperator::Scan { + id: 0, + table_reference: table_ref.clone(), + predicates: resolved_where_clauses.clone(), + iter_dir: None, + }, + result_columns: vec![], + where_clause: resolved_where_clauses, + group_by: None, + order_by: None, + aggregates: vec![], + limit: None, // TODO: add support for limit + referenced_tables: vec![table_ref], + available_indexes: vec![], + contains_constant_false_condition: false, + }; + + Ok(plan) +} + #[allow(clippy::type_complexity)] fn parse_from( schema: &Schema, @@ -552,6 +588,22 @@ fn parse_from( Ok((operator, tables)) } +fn parse_where( + where_clause: Option, + referenced_tables: &[BTreeTableReference], +) -> Result>> { + if let Some(where_expr) = where_clause { + let mut predicates = vec![]; + break_predicate_at_and_boundaries(where_expr, &mut predicates); + for expr in predicates.iter_mut() { + bind_column_references(expr, referenced_tables)?; + } + Ok(Some(predicates)) + } else { + Ok(None) + } +} + fn parse_join( schema: &Schema, join: ast::JoinedSelectTable, @@ -735,57 +787,6 @@ fn parse_join( )) } -pub fn prepare_delete_plan( - schema: &Schema, - tbl_name: &QualifiedName, - where_clause: Option, -) -> Result { - let table_name = tbl_name.name.0.clone(); - - let table = if let Some(table) = schema.get_table(&table_name) { - table - } else { - crate::bail_parse_error!("Table {} not found", table_name); - }; - - let table_ref = BTreeTableReference { - table: table.clone(), - table_identifier: table_name.clone(), - table_index: 0, - }; - - // Parse and resolve the where_clause - let mut resolved_where_clause = None; - if let Some(where_expr) = where_clause { - let mut predicates = vec![]; - break_predicate_at_and_boundaries(where_expr, &mut predicates); - for expr in predicates.iter_mut() { - bind_column_references(expr, &[table_ref.clone()])?; - } - resolved_where_clause = Some(predicates); - } - - let plan = Plan { - source: SourceOperator::Scan { - id: 0, - table_reference: table_ref.clone(), - predicates: resolved_where_clause.clone(), - iter_dir: None, - }, - result_columns: vec![], - where_clause: resolved_where_clause, - group_by: None, - order_by: None, - aggregates: vec![], - limit: None, - referenced_tables: vec![table_ref], - available_indexes: vec![], - contains_constant_false_condition: false, - }; - - Ok(plan) -} - fn break_predicate_at_and_boundaries(predicate: ast::Expr, out_predicates: &mut Vec) { match predicate { ast::Expr::Binary(left, ast::Operator::And, right) => { diff --git a/core/translate/select.rs b/core/translate/select.rs index 6d846ded8..a0a0fa36c 100644 --- a/core/translate/select.rs +++ b/core/translate/select.rs @@ -1,15 +1,14 @@ use std::rc::Weak; use std::{cell::RefCell, rc::Rc}; +use super::emitter::emit_program; +use super::planner::prepare_select_plan; use crate::storage::sqlite3_ondisk::DatabaseHeader; +use crate::translate::optimizer::optimize_select_plan; use crate::Connection; use crate::{schema::Schema, vdbe::Program, Result}; use sqlite3_parser::ast; -use super::emitter::emit_program; -use super::optimizer::optimize_plan; -use super::planner::prepare_select_plan; - pub fn translate_select( schema: &Schema, select: ast::Select, @@ -17,6 +16,6 @@ pub fn translate_select( connection: Weak, ) -> Result { let select_plan = prepare_select_plan(schema, select)?; - let optimized_plan = optimize_plan(select_plan)?; + let optimized_plan = optimize_select_plan(select_plan)?; emit_program(database_header, optimized_plan, connection) } From e83819ef3094c882658c323fb5065f2d045484cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=84=A0=EC=9A=B0?= Date: Sun, 22 Dec 2024 16:00:35 +0900 Subject: [PATCH 06/39] Extract the appending delete related opcodes to `emit_delete_opcodes` --- core/translate/emitter.rs | 233 +++++++++++++++----------------------- 1 file changed, 91 insertions(+), 142 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 8779dbc25..49b85e17b 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -101,6 +101,15 @@ pub struct Metadata { pub result_columns_to_skip_in_orderby_sorter: Option>, } +/// Used to distinguish database operations +#[derive(Debug, Clone)] +pub enum OperationMode { + SELECT, + INSERT, + UPDATE, + DELETE, +} + /// Initialize the program with basic setup and return initial metadata and labels fn prologue() -> Result<(ProgramBuilder, Metadata, BranchOffset, BranchOffset)> { let mut program = ProgramBuilder::new(); @@ -201,7 +210,12 @@ pub fn emit_program( if let Some(ref mut group_by) = plan.group_by { init_group_by(&mut program, group_by, &plan.aggregates, &mut metadata)?; } - init_source(&mut program, &plan.source, &mut metadata)?; + init_source( + &mut program, + &plan.source, + &mut metadata, + &OperationMode::SELECT, + )?; // Set up main query execution loop open_loop( @@ -294,7 +308,12 @@ pub fn emit_program_for_delete( }; // Initialize cursors and other resources needed for query execution - init_source_for_delete(&mut program, &plan.source, &mut metadata)?; + init_source( + &mut program, + &plan.source, + &mut metadata, + &OperationMode::DELETE, + )?; // Set up main query execution loop open_loop( @@ -304,8 +323,15 @@ pub fn emit_program_for_delete( &mut metadata, )?; + emit_delete_insns(&mut program, &plan.source)?; + // Close the loop and handle deletion - close_loop_for_delete(&mut program, &plan.source, &mut metadata)?; + close_loop( + &mut program, + &plan.source, + &mut metadata, + &plan.referenced_tables, + )?; if let Some(skip_loops_label) = skip_loops_label { program.resolve_label(skip_loops_label, program.offset()); @@ -430,6 +456,7 @@ fn init_source( program: &mut ProgramBuilder, source: &SourceOperator, metadata: &mut Metadata, + mode: &OperationMode, ) -> Result<()> { match source { SourceOperator::Join { @@ -447,10 +474,10 @@ fn init_source( }; metadata.left_joins.insert(*id, lj_metadata); } - init_source(program, left, metadata)?; - init_source(program, right, metadata)?; + init_source(program, left, metadata, mode)?; + init_source(program, right, metadata, mode)?; - return Ok(()); + Ok(()) } SourceOperator::Scan { id, @@ -464,80 +491,27 @@ fn init_source( let root_page = table_reference.table.root_page; let next_row_label = program.allocate_label(); metadata.next_row_labels.insert(*id, next_row_label); - program.emit_insn(Insn::OpenReadAsync { - cursor_id, - root_page, - }); - program.emit_insn(Insn::OpenReadAwait); - return Ok(()); - } - SourceOperator::Search { - id, - table_reference, - search, - .. - } => { - let table_cursor_id = program.alloc_cursor_id( - Some(table_reference.table_identifier.clone()), - Some(Table::BTree(table_reference.table.clone())), - ); - - let next_row_label = program.allocate_label(); - - metadata.next_row_labels.insert(*id, next_row_label); - - program.emit_insn(Insn::OpenReadAsync { - cursor_id: table_cursor_id, - root_page: table_reference.table.root_page, - }); - program.emit_insn(Insn::OpenReadAwait); - - if let Search::IndexSearch { index, .. } = search { - let index_cursor_id = program - .alloc_cursor_id(Some(index.name.clone()), Some(Table::Index(index.clone()))); - program.emit_insn(Insn::OpenReadAsync { - cursor_id: index_cursor_id, - root_page: index.root_page, - }); - program.emit_insn(Insn::OpenReadAwait); + match mode { + OperationMode::SELECT => { + program.emit_insn(Insn::OpenReadAsync { + cursor_id, + root_page, + }); + program.emit_insn(Insn::OpenReadAwait {}); + } + OperationMode::DELETE => { + program.emit_insn(Insn::OpenWriteAsync { + cursor_id, + root_page, + }); + program.emit_insn(Insn::OpenWriteAwait {}); + } + _ => { + unimplemented!() + } } - return Ok(()); - } - SourceOperator::Nothing => { - return Ok(()); - } - } -} - -fn init_source_for_delete( - program: &mut ProgramBuilder, - source: &SourceOperator, - metadata: &mut Metadata, -) -> Result<()> { - match source { - SourceOperator::Join { .. } => { - unreachable!() - } - SourceOperator::Scan { - id, - table_reference, - .. - } => { - let cursor_id = program.alloc_cursor_id( - Some(table_reference.table_identifier.clone()), - Some(Table::BTree(table_reference.table.clone())), - ); - let root_page = table_reference.table.root_page; - let next_row_label = program.allocate_label(); - metadata.next_row_labels.insert(*id, next_row_label); - program.emit_insn(Insn::OpenWriteAsync { - cursor_id, - root_page, - }); - program.emit_insn(Insn::OpenWriteAwait {}); - Ok(()) } SourceOperator::Search { @@ -555,20 +529,49 @@ fn init_source_for_delete( metadata.next_row_labels.insert(*id, next_row_label); - program.emit_insn(Insn::OpenWriteAsync { - cursor_id: table_cursor_id, - root_page: table_reference.table.root_page, - }); - program.emit_insn(Insn::OpenWriteAwait {}); + match mode { + OperationMode::SELECT => { + program.emit_insn(Insn::OpenReadAsync { + cursor_id: table_cursor_id, + root_page: table_reference.table.root_page, + }); + program.emit_insn(Insn::OpenReadAwait {}); + } + OperationMode::DELETE => { + program.emit_insn(Insn::OpenWriteAsync { + cursor_id: table_cursor_id, + root_page: table_reference.table.root_page, + }); + program.emit_insn(Insn::OpenWriteAwait {}); + } + _ => { + unimplemented!() + } + } if let Search::IndexSearch { index, .. } = search { let index_cursor_id = program .alloc_cursor_id(Some(index.name.clone()), Some(Table::Index(index.clone()))); - program.emit_insn(Insn::OpenWriteAsync { - cursor_id: index_cursor_id, - root_page: index.root_page, - }); - program.emit_insn(Insn::OpenWriteAwait {}); + + match mode { + OperationMode::SELECT => { + program.emit_insn(Insn::OpenReadAsync { + cursor_id: index_cursor_id, + root_page: index.root_page, + }); + program.emit_insn(Insn::OpenReadAwait); + } + OperationMode::DELETE => { + program.emit_insn(Insn::OpenWriteAsync { + cursor_id: index_cursor_id, + root_page: index.root_page, + }); + program.emit_insn(Insn::OpenWriteAwait {}); + } + _ => { + unimplemented!() + } + } } Ok(()) @@ -1232,11 +1235,7 @@ fn close_loop( } } -fn close_loop_for_delete( - program: &mut ProgramBuilder, - source: &SourceOperator, - metadata: &mut Metadata, -) -> Result<()> { +fn emit_delete_insns(program: &mut ProgramBuilder, source: &SourceOperator) -> Result<()> { match source { SourceOperator::Scan { id, @@ -1255,40 +1254,6 @@ fn close_loop_for_delete( program.emit_insn(Insn::DeleteAsync { cursor_id }); program.emit_insn(Insn::DeleteAwait { cursor_id }); - program.resolve_label(*metadata.next_row_labels.get(id).unwrap(), program.offset()); - - // Emit the NextAsync or PrevAsync instruction to continue the loop - if iter_dir - .as_ref() - .is_some_and(|dir| *dir == IterationDirection::Backwards) - { - program.emit_insn(Insn::PrevAsync { cursor_id }); - } else { - program.emit_insn(Insn::NextAsync { cursor_id }); - } - let jump_label = metadata.scan_loop_body_labels.pop().unwrap(); - - // Emit the NextAwait or PrevAwait instruction with label dependency - if iter_dir - .as_ref() - .is_some_and(|dir| *dir == IterationDirection::Backwards) - { - program.emit_insn_with_label_dependency( - Insn::PrevAwait { - cursor_id, - pc_if_next: jump_label, - }, - jump_label, - ); - } else { - program.emit_insn_with_label_dependency( - Insn::NextAwait { - cursor_id, - pc_if_next: jump_label, - }, - jump_label, - ); - } Ok(()) } SourceOperator::Search { @@ -1313,22 +1278,6 @@ fn close_loop_for_delete( program.emit_insn(Insn::DeleteAsync { cursor_id }); program.emit_insn(Insn::DeleteAwait { cursor_id }); - // resolve labels after calling Delete opcodes - program.resolve_label(*metadata.next_row_labels.get(id).unwrap(), program.offset()); - - // Emit the NextAsync instruction to continue the loop - if !matches!(search, Search::RowidEq { .. }) { - program.emit_insn(Insn::NextAsync { cursor_id }); - let jump_label = metadata.scan_loop_body_labels.pop().unwrap(); - program.emit_insn_with_label_dependency( - Insn::NextAwait { - cursor_id, - pc_if_next: jump_label, - }, - jump_label, - ); - } - Ok(()) } _ => Ok(()), From 6f235e6f6c03fe6ee2e3e80b266d6280d4cbdce4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=84=A0=EC=9A=B0?= Date: Sun, 22 Dec 2024 21:06:54 +0900 Subject: [PATCH 07/39] Fix comment --- core/translate/emitter.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 49b85e17b..3f3bdf1b2 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -325,7 +325,7 @@ pub fn emit_program_for_delete( emit_delete_insns(&mut program, &plan.source)?; - // Close the loop and handle deletion + // Clean up and close the main execution loop close_loop( &mut program, &plan.source, From 82c127b7a3491af8841b2ab2546fbf0fcdf2bda1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=84=A0=EC=9A=B0?= Date: Mon, 23 Dec 2024 04:47:05 +0900 Subject: [PATCH 08/39] Remove bool args in optimize_plan --- core/translate/optimizer.rs | 80 ++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 9ccd8e7f7..9e93b1d71 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -9,58 +9,58 @@ use super::plan::{ Direction, IterationDirection, Plan, Search, SourceOperator, }; -pub fn optimize_select_plan(plan: Plan) -> Result { - optimize_plan(plan, true, true, true) -} - -pub fn optimize_delete_plan(plan: Plan) -> Result { - optimize_plan(plan, false, true, false) -} - /** * Make a few passes over the plan to optimize it. * TODO: these could probably be done in less passes, * but having them separate makes them easier to understand */ -fn optimize_plan( - mut select_plan: Plan, - optimize_push_predicates: bool, - optimize_use_indexes: bool, - optimize_eliminate_unnecessary_order_by: bool, -) -> Result { - eliminate_between(&mut select_plan.source, &mut select_plan.where_clause)?; +pub fn optimize_select_plan(mut plan: Plan) -> Result { + eliminate_between(&mut plan.source, &mut plan.where_clause)?; if let ConstantConditionEliminationResult::ImpossibleCondition = - eliminate_constants(&mut select_plan.source, &mut select_plan.where_clause)? + eliminate_constants(&mut plan.source, &mut plan.where_clause)? { - select_plan.contains_constant_false_condition = true; - return Ok(select_plan); + plan.contains_constant_false_condition = true; + return Ok(plan); } - if optimize_push_predicates { - push_predicates( - &mut select_plan.source, - &mut select_plan.where_clause, - &select_plan.referenced_tables, - )?; + push_predicates( + &mut plan.source, + &mut plan.where_clause, + &plan.referenced_tables, + )?; + + use_indexes( + &mut plan.source, + &plan.referenced_tables, + &plan.available_indexes, + )?; + + eliminate_unnecessary_orderby( + &mut plan.source, + &mut plan.order_by, + &plan.referenced_tables, + &plan.available_indexes, + )?; + + Ok(plan) +} + +pub fn optimize_delete_plan(mut plan: Plan) -> Result { + eliminate_between(&mut plan.source, &mut plan.where_clause)?; + if let ConstantConditionEliminationResult::ImpossibleCondition = + eliminate_constants(&mut plan.source, &mut plan.where_clause)? + { + plan.contains_constant_false_condition = true; + return Ok(plan); } - if optimize_use_indexes { - use_indexes( - &mut select_plan.source, - &select_plan.referenced_tables, - &select_plan.available_indexes, - )?; - } + use_indexes( + &mut plan.source, + &plan.referenced_tables, + &plan.available_indexes, + )?; - if optimize_eliminate_unnecessary_order_by { - eliminate_unnecessary_orderby( - &mut select_plan.source, - &mut select_plan.order_by, - &select_plan.referenced_tables, - &select_plan.available_indexes, - )?; - } - Ok(select_plan) + Ok(plan) } fn _operator_is_already_ordered_by( From f8d4edc8d7495aebde190b0955c3f35429a35916 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=84=A0=EC=9A=B0?= Date: Mon, 23 Dec 2024 04:54:40 +0900 Subject: [PATCH 09/39] Use schema.get_table(...) instead of referencing directly --- core/translate/planner.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/core/translate/planner.rs b/core/translate/planner.rs index d32eba6c4..0c417fa66 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -489,17 +489,15 @@ pub fn prepare_delete_plan( tbl_name: &QualifiedName, where_clause: Option, ) -> Result { - let table_name = tbl_name.name.0.clone(); - - let table = if let Some(table) = schema.get_table(&table_name) { - table - } else { - crate::bail_parse_error!("Table {} not found", table_name); + // let table_name = tbl_name.name.0.clone(); + let table = match schema.get_table(tbl_name.name.0.as_str()) { + Some(table) => table, + None => crate::bail_corrupt_error!("Parse error: no such table: {}", tbl_name), }; let table_ref = BTreeTableReference { table: table.clone(), - table_identifier: table_name.clone(), + table_identifier: table.name.clone(), table_index: 0, }; From 5cdcb8d78ce08226168474bd32d856d1e6147585 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=84=A0=EC=9A=B0?= Date: Mon, 23 Dec 2024 05:45:23 +0900 Subject: [PATCH 10/39] Split `Plan` into `Select` and `Delete` --- core/lib.rs | 4 ++-- core/translate/delete.rs | 8 ++++---- core/translate/emitter.rs | 21 ++++++++++++++----- core/translate/optimizer.rs | 13 +++++++++--- core/translate/plan.rs | 40 ++++++++++++++++++++++++++++++++----- core/translate/planner.rs | 13 ++++++------ core/translate/select.rs | 4 ++-- 7 files changed, 75 insertions(+), 28 deletions(-) diff --git a/core/lib.rs b/core/lib.rs index e39a5fa7a..f49d8bd3b 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -39,7 +39,7 @@ use translate::planner::prepare_select_plan; pub use error::LimboError; pub type Result = std::result::Result; -use crate::translate::optimizer::optimize_select_plan; +use crate::translate::optimizer::optimize_plan; pub use io::OpenFlags; #[cfg(feature = "fs")] pub use io::PlatformIO; @@ -267,7 +267,7 @@ impl Connection { match stmt { ast::Stmt::Select(select) => { let plan = prepare_select_plan(&*self.schema.borrow(), select)?; - let plan = optimize_select_plan(plan)?; + let plan = optimize_plan(plan)?; println!("{}", plan); } _ => todo!(), diff --git a/core/translate/delete.rs b/core/translate/delete.rs index d3d5fd346..b0ecbdc69 100644 --- a/core/translate/delete.rs +++ b/core/translate/delete.rs @@ -1,5 +1,5 @@ -use crate::translate::emitter::emit_program_for_delete; -use crate::translate::optimizer::optimize_delete_plan; +use crate::translate::emitter::emit_program; +use crate::translate::optimizer::optimize_plan; use crate::translate::planner::prepare_delete_plan; use crate::{schema::Schema, storage::sqlite3_ondisk::DatabaseHeader, vdbe::Program}; use crate::{Connection, Result}; @@ -16,6 +16,6 @@ pub fn translate_delete( connection: Weak, ) -> Result { let delete_plan = prepare_delete_plan(schema, tbl_name, where_clause)?; - let optimized_plan = optimize_delete_plan(delete_plan)?; - emit_program_for_delete(database_header, optimized_plan, connection) + let optimized_plan = optimize_plan(delete_plan)?; + emit_program(database_header, optimized_plan, connection) } diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 3f3bdf1b2..5e5f0d601 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -9,7 +9,7 @@ use sqlite3_parser::ast::{self}; use crate::schema::{Column, PseudoTable, Table}; use crate::storage::sqlite3_ondisk::DatabaseHeader; -use crate::translate::plan::{IterationDirection, Search}; +use crate::translate::plan::{DeletePlan, IterationDirection, Plan, Search}; use crate::types::{OwnedRecord, OwnedValue}; use crate::util::exprs_are_equivalent; use crate::vdbe::builder::ProgramBuilder; @@ -20,7 +20,7 @@ use super::expr::{ translate_aggregation, translate_aggregation_groupby, translate_condition_expr, translate_expr, ConditionMetadata, }; -use super::plan::{Aggregate, BTreeTableReference, Direction, GroupBy, Plan}; +use super::plan::{Aggregate, BTreeTableReference, Direction, GroupBy, SelectPlan}; use super::plan::{ResultSetColumn, SourceOperator}; // Metadata for handling LEFT JOIN operations @@ -175,6 +175,17 @@ pub fn emit_program( database_header: Rc>, mut plan: Plan, connection: Weak, +) -> Result { + match plan { + Plan::Select(plan) => emit_program_for_select(database_header, plan, connection), + Plan::Delete(plan) => emit_program_for_delete(database_header, plan, connection), + } +} + +fn emit_program_for_select( + database_header: Rc>, + mut plan: SelectPlan, + connection: Weak, ) -> Result { let (mut program, mut metadata, init_label, start_offset) = prologue()?; @@ -286,9 +297,9 @@ pub fn emit_program( Ok(program.build(database_header, connection)) } -pub fn emit_program_for_delete( +fn emit_program_for_delete( database_header: Rc>, - mut plan: Plan, + mut plan: DeletePlan, connection: Weak, ) -> Result { let (mut program, mut metadata, init_label, start_offset) = prologue()?; @@ -925,7 +936,7 @@ pub enum InnerLoopEmitTarget<'a> { /// At this point the cursors for all tables have been opened and rewound. fn inner_loop_emit( program: &mut ProgramBuilder, - plan: &mut Plan, + plan: &mut SelectPlan, metadata: &mut Metadata, ) -> Result<()> { // if we have a group by, we emit a record into the group by sorter. diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 9e93b1d71..f86c20c26 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -6,15 +6,22 @@ use crate::{schema::Index, Result}; use super::plan::{ get_table_ref_bitmask_for_ast_expr, get_table_ref_bitmask_for_operator, BTreeTableReference, - Direction, IterationDirection, Plan, Search, SourceOperator, + DeletePlan, Direction, IterationDirection, Plan, Search, SelectPlan, SourceOperator, }; +pub fn optimize_plan(mut plan: Plan) -> Result { + match plan { + Plan::Select(plan) => optimize_select_plan(plan).map(Plan::Select), + Plan::Delete(plan) => optimize_delete_plan(plan).map(Plan::Delete), + } +} + /** * Make a few passes over the plan to optimize it. * TODO: these could probably be done in less passes, * but having them separate makes them easier to understand */ -pub fn optimize_select_plan(mut plan: Plan) -> Result { +fn optimize_select_plan(mut plan: SelectPlan) -> Result { eliminate_between(&mut plan.source, &mut plan.where_clause)?; if let ConstantConditionEliminationResult::ImpossibleCondition = eliminate_constants(&mut plan.source, &mut plan.where_clause)? @@ -45,7 +52,7 @@ pub fn optimize_select_plan(mut plan: Plan) -> Result { Ok(plan) } -pub fn optimize_delete_plan(mut plan: Plan) -> Result { +fn optimize_delete_plan(mut plan: DeletePlan) -> Result { eliminate_between(&mut plan.source, &mut plan.where_clause)?; if let ConstantConditionEliminationResult::ImpossibleCondition = eliminate_constants(&mut plan.source, &mut plan.where_clause)? diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 8e0fa326e..4cd30d08b 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -1,11 +1,12 @@ use core::fmt; +use sqlite3_parser::ast; +use std::ptr::write; use std::{ fmt::{Display, Formatter}, rc::Rc, }; -use sqlite3_parser::ast; - +use crate::translate::plan::Plan::{Delete, Select}; use crate::{ function::AggFunc, schema::{BTreeTable, Column, Index}, @@ -27,7 +28,13 @@ pub struct GroupBy { } #[derive(Debug)] -pub struct Plan { +pub enum Plan { + Select(SelectPlan), + Delete(DeletePlan), +} + +#[derive(Debug)] +pub struct SelectPlan { /// A tree of sources (tables). pub source: SourceOperator, /// the columns inside SELECT ... FROM @@ -50,9 +57,32 @@ pub struct Plan { pub contains_constant_false_condition: bool, } +#[derive(Debug)] +pub struct DeletePlan { + /// A tree of sources (tables). + pub source: SourceOperator, + /// the columns inside SELECT ... FROM + pub result_columns: Vec, + /// where clause split into a vec at 'AND' boundaries. + pub where_clause: Option>, + /// order by clause + pub order_by: Option>, + /// limit clause + pub limit: Option, + /// all the tables referenced in the query + pub referenced_tables: Vec, + /// all the indexes available + pub available_indexes: Vec>, + /// query contains a constant condition that is always false + pub contains_constant_false_condition: bool, +} + impl Display for Plan { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.source) + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Select(select_plan) => write!(f, "{}", select_plan.source), + Delete(delete_plan) => write!(f, "{}", delete_plan.source), + } } } diff --git a/core/translate/planner.rs b/core/translate/planner.rs index 0c417fa66..f9b941571 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -1,5 +1,6 @@ use super::plan::{ - Aggregate, BTreeTableReference, Direction, GroupBy, Plan, ResultSetColumn, SourceOperator, + Aggregate, BTreeTableReference, DeletePlan, Direction, GroupBy, Plan, ResultSetColumn, + SelectPlan, SourceOperator, }; use crate::{function::Func, schema::Schema, util::normalize_ident, Result}; use sqlite3_parser::ast::{self, Expr, FromClause, JoinType, QualifiedName, ResultColumn}; @@ -269,7 +270,7 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

(schema: &Schema, select: ast::Select) -> Result

todo!(), } @@ -504,7 +505,7 @@ pub fn prepare_delete_plan( // Parse and resolve the where_clause let resolved_where_clauses = parse_where(where_clause, &[table_ref.clone()])?; - let plan = Plan { + let plan = DeletePlan { source: SourceOperator::Scan { id: 0, table_reference: table_ref.clone(), @@ -513,16 +514,14 @@ pub fn prepare_delete_plan( }, result_columns: vec![], where_clause: resolved_where_clauses, - group_by: None, order_by: None, - aggregates: vec![], limit: None, // TODO: add support for limit referenced_tables: vec![table_ref], available_indexes: vec![], contains_constant_false_condition: false, }; - Ok(plan) + Ok(Plan::Delete(plan)) } #[allow(clippy::type_complexity)] diff --git a/core/translate/select.rs b/core/translate/select.rs index a0a0fa36c..b79560fda 100644 --- a/core/translate/select.rs +++ b/core/translate/select.rs @@ -4,7 +4,7 @@ use std::{cell::RefCell, rc::Rc}; use super::emitter::emit_program; use super::planner::prepare_select_plan; use crate::storage::sqlite3_ondisk::DatabaseHeader; -use crate::translate::optimizer::optimize_select_plan; +use crate::translate::optimizer::optimize_plan; use crate::Connection; use crate::{schema::Schema, vdbe::Program, Result}; use sqlite3_parser::ast; @@ -16,6 +16,6 @@ pub fn translate_select( connection: Weak, ) -> Result { let select_plan = prepare_select_plan(schema, select)?; - let optimized_plan = optimize_select_plan(select_plan)?; + let optimized_plan = optimize_plan(select_plan)?; emit_program(database_header, optimized_plan, connection) } From 357ab551a5d0243d5c9252c971e3ac81fbace62b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=84=A0=EC=9A=B0?= Date: Mon, 23 Dec 2024 07:42:12 +0900 Subject: [PATCH 11/39] nit --- core/translate/planner.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/core/translate/planner.rs b/core/translate/planner.rs index f9b941571..70df2eee9 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -490,7 +490,6 @@ pub fn prepare_delete_plan( tbl_name: &QualifiedName, where_clause: Option, ) -> Result { - // let table_name = tbl_name.name.0.clone(); let table = match schema.get_table(tbl_name.name.0.as_str()) { Some(table) => table, None => crate::bail_corrupt_error!("Parse error: no such table: {}", tbl_name), From 906975e1ca7010fdf70391eaa15b1cb224de30d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B9=80=EC=84=A0=EC=9A=B0?= Date: Tue, 24 Dec 2024 12:25:04 +0900 Subject: [PATCH 12/39] Add limit support --- core/translate/delete.rs | 6 +-- core/translate/emitter.rs | 87 +++++++++++++++++++++------------------ core/translate/mod.rs | 2 +- core/translate/planner.rs | 35 +++++++++------- core/vdbe/builder.rs | 11 +++++ 5 files changed, 82 insertions(+), 59 deletions(-) diff --git a/core/translate/delete.rs b/core/translate/delete.rs index b0ecbdc69..135c9d76d 100644 --- a/core/translate/delete.rs +++ b/core/translate/delete.rs @@ -3,7 +3,7 @@ use crate::translate::optimizer::optimize_plan; use crate::translate::planner::prepare_delete_plan; use crate::{schema::Schema, storage::sqlite3_ondisk::DatabaseHeader, vdbe::Program}; use crate::{Connection, Result}; -use sqlite3_parser::ast::{Expr, QualifiedName, ResultColumn}; +use sqlite3_parser::ast::{Expr, Limit, QualifiedName}; use std::rc::Weak; use std::{cell::RefCell, rc::Rc}; @@ -11,11 +11,11 @@ pub fn translate_delete( schema: &Schema, tbl_name: &QualifiedName, where_clause: Option, - _returning: &Option>, + limit: Option, database_header: Rc>, connection: Weak, ) -> Result { - let delete_plan = prepare_delete_plan(schema, tbl_name, where_clause)?; + let delete_plan = prepare_delete_plan(schema, tbl_name, where_clause, limit)?; let optimized_plan = optimize_plan(delete_plan)?; emit_program(database_header, optimized_plan, connection) } diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 5e5f0d601..a032aa09e 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -334,7 +334,7 @@ fn emit_program_for_delete( &mut metadata, )?; - emit_delete_insns(&mut program, &plan.source)?; + emit_delete_insns(&mut program, &plan.source, &plan.limit, &metadata)?; // Clean up and close the main execution loop close_loop( @@ -1246,53 +1246,58 @@ fn close_loop( } } -fn emit_delete_insns(program: &mut ProgramBuilder, source: &SourceOperator) -> Result<()> { - match source { +fn emit_delete_insns( + program: &mut ProgramBuilder, + source: &SourceOperator, + limit: &Option, + metadata: &Metadata, +) -> Result<()> { + let cursor_id = match source { SourceOperator::Scan { - id, - table_reference, - iter_dir, - .. - } => { - let cursor_id = program.resolve_cursor_id(&table_reference.table_identifier); - - // Emit the instructions to delete the row - let key_reg = program.alloc_register(); - program.emit_insn(Insn::RowId { - cursor_id, - dest: key_reg, - }); - program.emit_insn(Insn::DeleteAsync { cursor_id }); - program.emit_insn(Insn::DeleteAwait { cursor_id }); - - Ok(()) - } + table_reference, .. + } => program.resolve_cursor_id(&table_reference.table_identifier), SourceOperator::Search { - id, table_reference, search, .. - } => { - let cursor_id = match search { - Search::RowidEq { .. } | Search::RowidSearch { .. } => { - program.resolve_cursor_id(&table_reference.table_identifier) - } - Search::IndexSearch { index, .. } => program.resolve_cursor_id(&index.name), - }; + } => match search { + Search::RowidEq { .. } | Search::RowidSearch { .. } => { + program.resolve_cursor_id(&table_reference.table_identifier) + } + Search::IndexSearch { index, .. } => program.resolve_cursor_id(&index.name), + }, + _ => return Ok(()), + }; - // Emit the instructions to delete the row - let key_reg = program.alloc_register(); - program.emit_insn(Insn::RowId { - cursor_id, - dest: key_reg, - }); - program.emit_insn(Insn::DeleteAsync { cursor_id }); - program.emit_insn(Insn::DeleteAwait { cursor_id }); - - Ok(()) - } - _ => Ok(()), + // Emit the instructions to delete the row + let key_reg = program.alloc_register(); + program.emit_insn(Insn::RowId { + cursor_id, + dest: key_reg, + }); + program.emit_insn(Insn::DeleteAsync { cursor_id }); + program.emit_insn(Insn::DeleteAwait { cursor_id }); + if let Some(limit) = limit { + let limit_reg = program.alloc_register(); + program.emit_insn(Insn::Integer { + value: *limit as i64, + dest: limit_reg, + }); + program.mark_last_insn_constant(); + let jump_label_on_limit_reached = metadata + .termination_label_stack + .last() + .expect("termination_label_stack should not be empty."); + program.emit_insn_with_label_dependency( + Insn::DecrJumpZero { + reg: limit_reg, + target_pc: *jump_label_on_limit_reached, + }, + *jump_label_on_limit_reached, + ) } + + Ok(()) } /// Emits the bytecode for processing a GROUP BY clause. diff --git a/core/translate/mod.rs b/core/translate/mod.rs index 381c82df1..8d082c1f3 100644 --- a/core/translate/mod.rs +++ b/core/translate/mod.rs @@ -82,7 +82,7 @@ pub fn translate( schema, &tbl_name, where_clause, - &returning, + limit, database_header, connection, ), diff --git a/core/translate/planner.rs b/core/translate/planner.rs index 70df2eee9..8a20a4029 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -2,8 +2,10 @@ use super::plan::{ Aggregate, BTreeTableReference, DeletePlan, Direction, GroupBy, Plan, ResultSetColumn, SelectPlan, SourceOperator, }; -use crate::{function::Func, schema::Schema, util::normalize_ident, Result}; -use sqlite3_parser::ast::{self, Expr, FromClause, JoinType, QualifiedName, ResultColumn}; +use crate::{bail_parse_error, function::Func, schema::Schema, util::normalize_ident, Result}; +use sqlite3_parser::ast::{ + self, Expr, FromClause, JoinType, Limit, QualifiedName, ResultColumn, SortedColumn, +}; pub struct OperatorIdCounter { id: usize, @@ -468,15 +470,7 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

{ - let l = n.parse()?; - Some(l) - } - _ => todo!(), - } - } + plan.limit = select.limit.and_then(|limit| parse_limit(limit)); // Return the unoptimized query plan Ok(Plan::Select(plan)) @@ -489,6 +483,7 @@ pub fn prepare_delete_plan( schema: &Schema, tbl_name: &QualifiedName, where_clause: Option, + limit: Option, ) -> Result { let table = match schema.get_table(tbl_name.name.0.as_str()) { Some(table) => table, @@ -500,10 +495,14 @@ pub fn prepare_delete_plan( table_identifier: table.name.clone(), table_index: 0, }; + let referenced_tables = vec![table_ref.clone()]; - // Parse and resolve the where_clause + // Parse the WHERE clause let resolved_where_clauses = parse_where(where_clause, &[table_ref.clone()])?; + // Parse the LIMIT clause + let resolved_limit = limit.and_then(|limit| parse_limit(limit)); + let plan = DeletePlan { source: SourceOperator::Scan { id: 0, @@ -514,8 +513,8 @@ pub fn prepare_delete_plan( result_columns: vec![], where_clause: resolved_where_clauses, order_by: None, - limit: None, // TODO: add support for limit - referenced_tables: vec![table_ref], + limit: resolved_limit, + referenced_tables, available_indexes: vec![], contains_constant_false_condition: false, }; @@ -783,6 +782,14 @@ fn parse_join( )) } +fn parse_limit(limit: Limit) -> Option { + if let Expr::Literal(ast::Literal::Numeric(n)) = limit.expr { + n.parse().ok() + } else { + None + } +} + fn break_predicate_at_and_boundaries(predicate: ast::Expr, out_predicates: &mut Vec) { match predicate { ast::Expr::Binary(left, ast::Operator::And, right) => { diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index 8dd1cd4de..e3c6dc322 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -144,6 +144,17 @@ impl ProgramBuilder { .push((label, insn_reference)); } + /// Resolve unresolved labels to a specific offset in the instruction list. + /// + /// This function updates all instructions that reference the given label + /// to point to the specified offset. It ensures that the label and offset + /// are valid and updates the target program counter (PC) of each instruction + /// that references the label. + /// + /// # Arguments + /// + /// * `label` - The label to resolve. + /// * `to_offset` - The offset to which the labeled instructions should be resolved to. pub fn resolve_label(&mut self, label: BranchOffset, to_offset: BranchOffset) { assert!(label < 0); assert!(to_offset >= 0); From 91cca0d5b7b337249a555cfdfa78fc7a4c5ba878 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Tue, 24 Dec 2024 10:28:53 +0200 Subject: [PATCH 13/39] use more descriptive names in BTreeCursor::insert_into_cell() --- core/storage/btree.rs | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index bdd27932b..d62173342 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -742,7 +742,8 @@ impl BTreeCursor { /// i.e. whether we need to balance the btree after the insert. fn insert_into_cell(&self, page: &mut PageContent, payload: &[u8], cell_idx: usize) { let free = self.compute_free_space(page, RefCell::borrow(&self.database_header)); - let enough_space = payload.len() + 2 <= free as usize; + const CELL_POINTER_SIZE_BYTES: usize = 2; + let enough_space = payload.len() + CELL_POINTER_SIZE_BYTES <= free as usize; if !enough_space { // add to overflow cell page.overflow_cells.push(OverflowCell { @@ -753,27 +754,30 @@ impl BTreeCursor { } // TODO: insert into cell payload in internal page - let pc = self.allocate_cell_space(page, payload.len() as u16); + let new_cell_data_pointer = self.allocate_cell_space(page, payload.len() as u16); let buf = page.as_ptr(); // copy data - buf[pc as usize..pc as usize + payload.len()].copy_from_slice(payload); + buf[new_cell_data_pointer as usize..new_cell_data_pointer as usize + payload.len()] + .copy_from_slice(payload); // memmove(pIns+2, pIns, 2*(pPage->nCell - i)); - let (pointer_area_pc_by_idx, _) = page.cell_get_raw_pointer_region(); - let pointer_area_pc_by_idx = pointer_area_pc_by_idx + (2 * cell_idx); + let (cell_pointer_array_start, _) = page.cell_get_raw_pointer_region(); + let cell_pointer_cur_idx = cell_pointer_array_start + (CELL_POINTER_SIZE_BYTES * cell_idx); - // move previous pointers forward and insert new pointer there - let n_cells_forward = 2 * (page.cell_count() - cell_idx); - if n_cells_forward > 0 { + // move existing pointers forward by CELL_POINTER_SIZE_BYTES... + let n_cells_forward = page.cell_count() - cell_idx; + let n_bytes_forward = CELL_POINTER_SIZE_BYTES * n_cells_forward; + if n_bytes_forward > 0 { buf.copy_within( - pointer_area_pc_by_idx..pointer_area_pc_by_idx + n_cells_forward, - pointer_area_pc_by_idx + 2, + cell_pointer_cur_idx..cell_pointer_cur_idx + n_bytes_forward, + cell_pointer_cur_idx + CELL_POINTER_SIZE_BYTES, ); } - page.write_u16(pointer_area_pc_by_idx - page.offset, pc); + // ...and insert new cell pointer at the current index + page.write_u16(cell_pointer_cur_idx - page.offset, new_cell_data_pointer); - // update first byte of content area - page.write_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, pc); + // update first byte of content area (cell data always appended to the left, so cell content area pointer moves to point to the new cell data) + page.write_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, new_cell_data_pointer); // update cell count let new_n_cells = (page.cell_count() + 1) as u16; From c6b7ddf77adee9e57fcfa99e34b295914ad3e438 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Tue, 24 Dec 2024 10:30:27 +0200 Subject: [PATCH 14/39] Improve comments in BTreeCursor::compute_free_space() --- core/storage/btree.rs | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index d62173342..5a0a51e33 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1441,10 +1441,14 @@ impl BTreeCursor { let first_cell = (page.offset + 8 + child_pointer_size + (2 * ncell)) as u16; // The amount of free space is the sum of: - // 1. 0..first_byte_in_cell_content (everything to the left of the cell content area pointer is unused free space) - // 2. fragmented_free_bytes. + // #1. space to the left of the cell content area pointer + // #2. fragmented_free_bytes (isolated 1-3 byte chunks of free space within the cell content area) + // #3. freeblocks (linked list of blocks of at least 4 bytes within the cell content area that are not in use due to e.g. deletions) + + // #1 and #2 are known from the page header let mut nfree = fragmented_free_bytes as usize + first_byte_in_cell_content as usize; + // #3 is computed by iterating over the freeblocks linked list let mut pc = free_block_pointer as usize; if pc > 0 { if pc < first_byte_in_cell_content as usize { @@ -1457,28 +1461,33 @@ impl BTreeCursor { let mut size = 0; loop { // TODO: check corruption icellast - next = u16::from_be_bytes(buf[pc..pc + 2].try_into().unwrap()) as usize; - size = u16::from_be_bytes(buf[pc + 2..pc + 4].try_into().unwrap()) as usize; + next = u16::from_be_bytes(buf[pc..pc + 2].try_into().unwrap()) as usize; // first 2 bytes in freeblock = next freeblock pointer + size = u16::from_be_bytes(buf[pc + 2..pc + 4].try_into().unwrap()) as usize; // next 2 bytes in freeblock = size of current freeblock nfree += size; + // Freeblocks are in order from left to right on the page, + // so next pointer should > current pointer + its size, or 0 if no next block exists. if next <= pc + size + 3 { break; } pc = next; } - if next > 0 { - todo!("corrupted page ascending order"); - } + // Next should always be 0 (NULL) at this point since we have reached the end of the freeblocks linked list + assert!( + next == 0, + "corrupted page: freeblocks list not in ascending order" + ); - if pc + size > usable_space { - todo!("corrupted page last freeblock extends last page end"); - } + assert!( + pc + size <= usable_space, + "corrupted page: last freeblock extends last page end" + ); } // if( nFree>usableSize || nFree Date: Tue, 24 Dec 2024 18:50:16 +0200 Subject: [PATCH 15/39] refactor compute_free_space() --- core/storage/btree.rs | 63 +++++++++++++++++++--------------- core/storage/sqlite3_ondisk.rs | 50 +++++++++++++++++---------- 2 files changed, 67 insertions(+), 46 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 5a0a51e33..1ff53e7d2 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1415,10 +1415,12 @@ impl BTreeCursor { #[allow(unused_assignments)] fn compute_free_space(&self, page: &PageContent, db_header: Ref) -> u16 { // TODO(pere): maybe free space is not calculated correctly with offset - let buf = page.as_ptr(); + // Usable space, not the same as free space, simply means: + // space that is not reserved for extensions by sqlite. Usually reserved_space is 0. let usable_space = (db_header.page_size - db_header.reserved_space as u16) as usize; - let mut first_byte_in_cell_content = page.cell_content_area(); + + let mut cell_content_area_start = page.cell_content_area(); // A zero value for the cell content area pointer is interpreted as 65536. // See https://www.sqlite.org/fileformat.html // The max page size for a sqlite database is 64kiB i.e. 65536 bytes. @@ -1428,30 +1430,23 @@ impl BTreeCursor { // 1. the page size is 64kiB // 2. there are no cells on the page // 3. there is no reserved space at the end of the page - if first_byte_in_cell_content == 0 { - first_byte_in_cell_content = u16::MAX; + if cell_content_area_start == 0 { + cell_content_area_start = u16::MAX; } - let fragmented_free_bytes = page.num_frag_free_bytes(); - let free_block_pointer = page.first_freeblock(); - let ncell = page.cell_count(); - - // 8 + 4 == header end - let child_pointer_size = if page.is_leaf() { 0 } else { 4 }; - let first_cell = (page.offset + 8 + child_pointer_size + (2 * ncell)) as u16; - // The amount of free space is the sum of: - // #1. space to the left of the cell content area pointer - // #2. fragmented_free_bytes (isolated 1-3 byte chunks of free space within the cell content area) + // #1. the size of the unallocated region + // #2. fragments (isolated 1-3 byte chunks of free space within the cell content area) // #3. freeblocks (linked list of blocks of at least 4 bytes within the cell content area that are not in use due to e.g. deletions) - // #1 and #2 are known from the page header - let mut nfree = fragmented_free_bytes as usize + first_byte_in_cell_content as usize; + let mut free_space_bytes = + page.unallocated_region_size() as usize + page.num_frag_free_bytes() as usize; // #3 is computed by iterating over the freeblocks linked list - let mut pc = free_block_pointer as usize; - if pc > 0 { - if pc < first_byte_in_cell_content as usize { + let mut cur_freeblock_ptr = page.first_freeblock() as usize; + let page_buf = page.as_ptr(); + if cur_freeblock_ptr > 0 { + if cur_freeblock_ptr < cell_content_area_start as usize { // Freeblocks exist in the cell content area e.g. after deletions // They should never exist in the unused area of the page. todo!("corrupted page"); @@ -1461,15 +1456,23 @@ impl BTreeCursor { let mut size = 0; loop { // TODO: check corruption icellast - next = u16::from_be_bytes(buf[pc..pc + 2].try_into().unwrap()) as usize; // first 2 bytes in freeblock = next freeblock pointer - size = u16::from_be_bytes(buf[pc + 2..pc + 4].try_into().unwrap()) as usize; // next 2 bytes in freeblock = size of current freeblock - nfree += size; + next = u16::from_be_bytes( + page_buf[cur_freeblock_ptr..cur_freeblock_ptr + 2] + .try_into() + .unwrap(), + ) as usize; // first 2 bytes in freeblock = next freeblock pointer + size = u16::from_be_bytes( + page_buf[cur_freeblock_ptr + 2..cur_freeblock_ptr + 4] + .try_into() + .unwrap(), + ) as usize; // next 2 bytes in freeblock = size of current freeblock + free_space_bytes += size; // Freeblocks are in order from left to right on the page, // so next pointer should > current pointer + its size, or 0 if no next block exists. - if next <= pc + size + 3 { + if next <= cur_freeblock_ptr + size + 3 { break; } - pc = next; + cur_freeblock_ptr = next; } // Next should always be 0 (NULL) at this point since we have reached the end of the freeblocks linked list @@ -1479,17 +1482,21 @@ impl BTreeCursor { ); assert!( - pc + size <= usable_space, + cur_freeblock_ptr + size <= usable_space, "corrupted page: last freeblock extends last page end" ); } + assert!( + free_space_bytes <= usable_space, + "corrupted page: free space is greater than usable space" + ); + // if( nFree>usableSize || nFree usize { self.read_u16(3) as usize } + /// The size of the cell pointer array in bytes. + /// 2 bytes per cell pointer + pub fn cell_pointer_array_size(&self) -> usize { + const CELL_POINTER_SIZE_BYTES: usize = 2; + self.cell_count() * CELL_POINTER_SIZE_BYTES + } + + /// The start of the unallocated region. + /// Effectively: the offset after the page header + the cell pointer array. + pub fn unallocated_region_start(&self) -> usize { + self.offset + self.header_size() + self.cell_pointer_array_size() + } + + pub fn unallocated_region_size(&self) -> usize { + self.cell_content_area() as usize - self.unallocated_region_start() + } + /// The start of the cell content area. /// SQLite strives to place cells as far toward the end of the b-tree page as it can, /// in order to leave space for future growth of the cell pointer array. @@ -497,6 +515,17 @@ impl PageContent { self.read_u16(5) } + /// The size of the page header in bytes. + /// 8 bytes for leaf pages, 12 bytes for interior pages (due to storing rightmost child pointer) + pub fn header_size(&self) -> usize { + match self.page_type() { + PageType::IndexInterior => 12, + PageType::TableInterior => 12, + PageType::IndexLeaf => 8, + PageType::TableLeaf => 8, + } + } + /// The total number of bytes in all fragments is stored in the fifth field of the b-tree page header. /// Fragments are isolated groups of 1, 2, or 3 unused bytes within the cell content area. pub fn num_frag_free_bytes(&self) -> u8 { @@ -526,12 +555,7 @@ impl PageContent { let ncells = self.cell_count(); // the page header is 12 bytes for interior pages, 8 bytes for leaf pages // this is because the 4 last bytes in the interior page's header are used for the rightmost pointer. - let cell_pointer_array_start = match self.page_type() { - PageType::IndexInterior => 12, - PageType::TableInterior => 12, - PageType::IndexLeaf => 8, - PageType::TableLeaf => 8, - }; + let cell_pointer_array_start = self.header_size(); assert!(idx < ncells, "cell_get: idx out of bounds"); let cell_pointer = cell_pointer_array_start + (idx * 2); let cell_pointer = self.read_u16(cell_pointer) as usize; @@ -553,12 +577,7 @@ impl PageContent { /// - left-most cell (the cell with the smallest key) first and /// - the right-most cell (the cell with the largest key) last. pub fn cell_get_raw_pointer_region(&self) -> (usize, usize) { - let cell_start = match self.page_type() { - PageType::IndexInterior => 12, - PageType::TableInterior => 12, - PageType::IndexLeaf => 8, - PageType::TableLeaf => 8, - }; + let cell_start = self.header_size(); (self.offset + cell_start, self.cell_count() * 2) } @@ -572,12 +591,7 @@ impl PageContent { ) -> (usize, usize) { let buf = self.as_ptr(); let ncells = self.cell_count(); - let cell_pointer_array_start = match self.page_type() { - PageType::IndexInterior => 12, - PageType::TableInterior => 12, - PageType::IndexLeaf => 8, - PageType::TableLeaf => 8, - }; + let cell_pointer_array_start = self.header_size(); assert!(idx < ncells, "cell_get: idx out of bounds"); let cell_pointer = cell_pointer_array_start + (idx * 2); // pointers are 2 bytes each let cell_pointer = self.read_u16(cell_pointer) as usize; From 42ea9041e1f3ce30ca95a843426d1fa64b0f9015 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Tue, 24 Dec 2024 19:21:44 +0200 Subject: [PATCH 16/39] rename cell_get_raw_pointer_region() and refactor a bit --- core/storage/btree.rs | 11 ++++------- core/storage/sqlite3_ondisk.rs | 9 +++++---- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 1ff53e7d2..387351225 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -761,7 +761,7 @@ impl BTreeCursor { buf[new_cell_data_pointer as usize..new_cell_data_pointer as usize + payload.len()] .copy_from_slice(payload); // memmove(pIns+2, pIns, 2*(pPage->nCell - i)); - let (cell_pointer_array_start, _) = page.cell_get_raw_pointer_region(); + let (cell_pointer_array_start, _) = page.cell_pointer_array_offset_and_size(); let cell_pointer_cur_idx = cell_pointer_array_start + (CELL_POINTER_SIZE_BYTES * cell_idx); // move existing pointers forward by CELL_POINTER_SIZE_BYTES... @@ -1232,7 +1232,7 @@ impl BTreeCursor { if is_page_1 { // Remove header from child and set offset to 0 let contents = child.get().contents.as_mut().unwrap(); - let (cell_pointer_offset, _) = contents.cell_get_raw_pointer_region(); + let (cell_pointer_offset, _) = contents.cell_pointer_array_offset_and_size(); // change cell pointers for cell_idx in 0..contents.cell_count() { let cell_pointer_offset = cell_pointer_offset + (2 * cell_idx) - offset; @@ -1288,7 +1288,7 @@ impl BTreeCursor { fn allocate_cell_space(&self, page_ref: &PageContent, amount: u16) -> u16 { let amount = amount as usize; - let (cell_offset, _) = page_ref.cell_get_raw_pointer_region(); + let (cell_offset, _) = page_ref.cell_pointer_array_offset_and_size(); let gap = cell_offset + 2 * page_ref.cell_count(); let mut top = page_ref.cell_content_area() as usize; @@ -1330,10 +1330,7 @@ impl BTreeCursor { // TODO: implement fast algorithm let last_cell = usable_space - 4; - let first_cell = { - let (start, end) = cloned_page.cell_get_raw_pointer_region(); - start + end - }; + let first_cell = cloned_page.unallocated_region_start() as u64; if cloned_page.cell_count() > 0 { let page_type = page.page_type(); diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index eb57ff041..0403bee87 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -500,7 +500,8 @@ impl PageContent { /// The start of the unallocated region. /// Effectively: the offset after the page header + the cell pointer array. pub fn unallocated_region_start(&self) -> usize { - self.offset + self.header_size() + self.cell_pointer_array_size() + let (cell_ptr_array_start, cell_ptr_array_size) = self.cell_pointer_array_offset_and_size(); + cell_ptr_array_start + cell_ptr_array_size } pub fn unallocated_region_size(&self) -> usize { @@ -576,9 +577,9 @@ impl PageContent { /// The cell pointers are arranged in key order with: /// - left-most cell (the cell with the smallest key) first and /// - the right-most cell (the cell with the largest key) last. - pub fn cell_get_raw_pointer_region(&self) -> (usize, usize) { - let cell_start = self.header_size(); - (self.offset + cell_start, self.cell_count() * 2) + pub fn cell_pointer_array_offset_and_size(&self) -> (usize, usize) { + let header_size = self.header_size(); + (self.offset + header_size, self.cell_pointer_array_size()) } /* Get region of a cell's payload */ From 51541dd8dc93ba2085988e572403c62220a13d78 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sat, 21 Dec 2024 14:46:38 +0200 Subject: [PATCH 17/39] fix issues with insert --- core/translate/insert.rs | 315 ++++++++++++++++++++++++++++----------- 1 file changed, 232 insertions(+), 83 deletions(-) diff --git a/core/translate/insert.rs b/core/translate/insert.rs index 614cde8b2..89665a726 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -2,10 +2,11 @@ use std::rc::Weak; use std::{cell::RefCell, ops::Deref, rc::Rc}; use sqlite3_parser::ast::{ - DistinctNames, InsertBody, QualifiedName, ResolveType, ResultColumn, With, + DistinctNames, Expr, InsertBody, QualifiedName, ResolveType, ResultColumn, With, }; use crate::error::SQLITE_CONSTRAINT_PRIMARYKEY; +use crate::util::normalize_ident; use crate::{ schema::{Schema, Table}, storage::sqlite3_ondisk::DatabaseHeader, @@ -14,13 +15,117 @@ use crate::{ }; use crate::{Connection, Result}; +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +/// Helper enum to indicate how a column is being inserted. +/// For example: +/// CREATE TABLE t (a, b, c, d); +/// INSERT INTO t (c, b) VALUES (1, 2); +/// +/// resolve_columns_for_insert() returns [ +/// ColumnToInsert::AutomaticNull, +/// ColumnToInsert::UserProvided { index_in_value_tuple: 1 }, +/// ColumnToInsert::UserProvided { index_in_value_tuple: 0 }, +/// ColumnToInsert::AutomaticNull, +/// ] +enum ColumnToInsert { + /// The column is provided by the user. + UserProvided { index_in_value_tuple: usize }, + /// The column is automatically set to NULL since it was not provided by the user. + AutomaticNull, +} + +/// Resolves how each column in a table should be populated during an INSERT. +/// For each column, determines whether it will: +/// 1. Use a user-provided value from the VALUES clause, or +/// 2. Be automatically set to NULL +/// +/// Two cases are handled: +/// 1. No column list specified in INSERT statement: +/// - Values are assigned to columns in table definition order +/// - If fewer values than columns, remaining columns are NULL +/// 2. Column list specified in INSERT statement: +/// - For specified columns, a ColumnToInsert::UserProvided entry is created. +/// - Any columns not listed are set to NULL, i.e. a ColumnToInsert::AutomaticNull entry is created. +/// +/// Returns a Vec with an entry for each column in the table, +/// indicating how that column should be populated. +fn resolve_columns_for_insert( + table: Rc, + columns: &Option, + values: &[Vec], +) -> Result> { + assert!(table.has_rowid()); + if values.is_empty() { + crate::bail_parse_error!("no values to insert"); + } + + let num_cols_in_table = table.columns().len(); + + if columns.is_none() { + let num_cols = values[0].len(); + // ensure value tuples dont have more columns than the table + if num_cols > num_cols_in_table { + crate::bail_parse_error!( + "table {} has {} columns but {} values were supplied", + table.get_name(), + num_cols_in_table, + num_cols + ); + } + // ensure each value tuple has the same number of columns + for value in values.iter().skip(1) { + if value.len() != num_cols { + crate::bail_parse_error!("all VALUES must have the same number of terms"); + } + } + let columns: Vec = (0..num_cols_in_table) + .map(|i| { + if i < num_cols { + ColumnToInsert::UserProvided { + index_in_value_tuple: i, + } + } else { + ColumnToInsert::AutomaticNull + } + }) + .collect(); + return Ok(columns); + } + + // resolve the given columns to actual table column names and ensure they exist + let columns = columns.as_ref().unwrap(); + let mut resolved_columns: Vec = (0..num_cols_in_table) + .map(|i| ColumnToInsert::AutomaticNull) + .collect(); + for (index_in_value_tuple, column) in columns.iter().enumerate() { + let column_name = normalize_ident(column.0.as_str()); + let column_idx = table + .columns() + .iter() + .position(|c| c.name.eq_ignore_ascii_case(&column_name)); + if let Some(i) = column_idx { + resolved_columns[i] = ColumnToInsert::UserProvided { + index_in_value_tuple, + }; + } else { + crate::bail_parse_error!( + "table {} has no column named {}", + table.get_name(), + column_name + ); + } + } + + Ok(resolved_columns) +} + #[allow(clippy::too_many_arguments)] pub fn translate_insert( schema: &Schema, with: &Option, or_conflict: &Option, tbl_name: &QualifiedName, - _columns: &Option, + columns: &Option, body: &InsertBody, _returning: &Option>, database_header: Rc>, @@ -46,6 +151,10 @@ pub fn translate_insert( None => crate::bail_corrupt_error!("Parse error: no such table: {}", table_name), }; let table = Rc::new(Table::BTree(table)); + if !table.has_rowid() { + crate::bail_parse_error!("INSERT into WITHOUT ROWID table is not supported"); + } + let cursor_id = program.alloc_cursor_id( Some(table_name.0.clone()), Some(table.clone().deref().clone()), @@ -55,18 +164,44 @@ pub fn translate_insert( Table::Index(index) => index.root_page, Table::Pseudo(_) => todo!(), }; + let values = match body { + InsertBody::Select(select, None) => match &select.body.select { + sqlite3_parser::ast::OneSelect::Values(values) => values, + _ => todo!(), + }, + _ => todo!(), + }; - let mut num_cols = table.columns().len(); - if table.has_rowid() { - num_cols += 1; - } - // column_registers_start[0] == rowid if has rowid - let column_registers_start = program.alloc_registers(num_cols); + let columns = resolve_columns_for_insert(table.clone(), columns, values)?; + // Check if rowid was provided (through INTEGER PRIMARY KEY as a rowid alias) + let rowid_alias_index = table.columns().iter().position(|c| c.is_rowid_alias); + let has_user_provided_rowid = { + assert!(columns.len() == table.columns().len()); + if let Some(index) = rowid_alias_index { + matches!(columns[index], ColumnToInsert::UserProvided { .. }) + } else { + false + } + }; + + // allocate a register for each column in the table. if not provided by user, they will simply be set as null. + // allocate an extra register for rowid regardless of whether user provided a rowid alias column. + let num_cols = table.columns().len(); + let rowid_reg = program.alloc_registers(num_cols + 1); + let column_registers_start = rowid_reg + 1; + let rowid_alias_reg = { + if has_user_provided_rowid { + Some(column_registers_start + rowid_alias_index.unwrap()) + } else { + None + } + }; - // Coroutine for values let yield_reg = program.alloc_register(); let jump_on_definition_label = program.allocate_label(); { + // Coroutine for values + // TODO/efficiency: only use coroutine when there are multiple values to insert program.emit_insn_with_label_dependency( Insn::InitCoroutine { yield_reg, @@ -75,40 +210,41 @@ pub fn translate_insert( }, jump_on_definition_label, ); - match body { - InsertBody::Select(select, None) => match &select.body.select { - sqlite3_parser::ast::OneSelect::Select { - distinctness: _, - columns: _, - from: _, - where_clause: _, - group_by: _, - window_clause: _, - } => todo!(), - sqlite3_parser::ast::OneSelect::Values(values) => { - for value in values { - for (col, expr) in value.iter().enumerate() { - let mut col = col; - if table.has_rowid() { - col += 1; - } - translate_expr( - &mut program, - None, - expr, - column_registers_start + col, - None, - )?; - } - program.emit_insn(Insn::Yield { - yield_reg, - end_offset: 0, + + for value in values { + // Process each value according to resolved columns + for (i, column) in columns.iter().enumerate() { + match column { + ColumnToInsert::UserProvided { + index_in_value_tuple, + } => { + translate_expr( + &mut program, + None, + value.get(*index_in_value_tuple).expect( + format!( + "values tuple has no value for column {}", + table.column_index_to_name(i).unwrap() + ) + .as_str(), + ), + column_registers_start + i, + None, + )?; + } + ColumnToInsert::AutomaticNull => { + program.emit_insn(Insn::Null { + dest: column_registers_start + i, + dest_end: None, }); + program.mark_last_insn_constant(); } } - }, - InsertBody::DefaultValues => todo!("default values not yet supported"), - _ => todo!(), + } + program.emit_insn(Insn::Yield { + yield_reg, + end_offset: 0, + }); } program.emit_insn(Insn::EndCoroutine { yield_reg }); } @@ -121,6 +257,8 @@ pub fn translate_insert( program.emit_insn(Insn::OpenWriteAwait {}); // Main loop + // FIXME: rollback is not implemented. E.g. if you insert 2 rows and one fails to unique constraint violation, + // the other row will still be inserted. let record_register = program.alloc_register(); let halt_label = program.allocate_label(); let loop_start_offset = program.offset(); @@ -132,68 +270,79 @@ pub fn translate_insert( halt_label, ); - if table.has_rowid() { - let row_id_reg = column_registers_start; - if let Some(rowid_alias_column) = table.get_rowid_alias_column() { - let key_reg = column_registers_start + 1 + rowid_alias_column.0; - // copy key to rowid - program.emit_insn(Insn::Copy { - src_reg: key_reg, - dst_reg: row_id_reg, - amount: 0, - }); - program.emit_insn(Insn::SoftNull { reg: key_reg }); - } - - let notnull_label = program.allocate_label(); + let check_rowid_is_integer_label = rowid_alias_reg.and(Some(program.allocate_label())); + if let Some(reg) = rowid_alias_reg { + program.emit_insn(Insn::Copy { + src_reg: reg, + dst_reg: rowid_reg, + amount: 0, // TODO: rename 'amount' to something else; amount==0 means 1 + }); + // for the row record, the rowid alias column is always set to NULL + program.emit_insn(Insn::SoftNull { reg }); + // the user provided rowid value might itself be NULL. If it is, we create a new rowid on the next instruction. program.emit_insn_with_label_dependency( Insn::NotNull { - reg: row_id_reg, - target_pc: notnull_label, + reg: rowid_reg, + target_pc: check_rowid_is_integer_label.unwrap(), }, - notnull_label, + check_rowid_is_integer_label.unwrap(), ); - program.emit_insn(Insn::NewRowid { - cursor: cursor_id, - rowid_reg: row_id_reg, - prev_largest_reg: 0, - }); + } - program.resolve_label(notnull_label, program.offset()); - program.emit_insn(Insn::MustBeInt { reg: row_id_reg }); + // Create new rowid if a) not provided by user or b) provided by user but is NULL + program.emit_insn(Insn::NewRowid { + cursor: cursor_id, + rowid_reg: rowid_reg, + prev_largest_reg: 0, + }); + + if let Some(must_be_int_label) = check_rowid_is_integer_label { + program.resolve_label(must_be_int_label, program.offset()); + // If the user provided a rowid, it must be an integer. + program.emit_insn(Insn::MustBeInt { reg: rowid_reg }); + } + + // Check uniqueness constraint for rowid if it was provided by user. + // When the DB allocates it there are no need for separate uniqueness checks. + if has_user_provided_rowid { let make_record_label = program.allocate_label(); program.emit_insn_with_label_dependency( Insn::NotExists { cursor: cursor_id, - rowid_reg: row_id_reg, + rowid_reg: rowid_reg, target_pc: make_record_label, }, make_record_label, ); - // TODO: rollback + let rowid_column_name = if let Some(index) = rowid_alias_index { + table.column_index_to_name(index).unwrap() + } else { + "rowid" + }; + program.emit_insn(Insn::Halt { err_code: SQLITE_CONSTRAINT_PRIMARYKEY, - description: format!( - "{}.{}", - table.get_name(), - table.column_index_to_name(0).unwrap() - ), + description: format!("{}.{}", table.get_name(), rowid_column_name), }); + program.resolve_label(make_record_label, program.offset()); - program.emit_insn(Insn::MakeRecord { - start_reg: column_registers_start + 1, - count: num_cols - 1, - dest_reg: record_register, - }); - program.emit_insn(Insn::InsertAsync { - cursor: cursor_id, - key_reg: column_registers_start, - record_reg: record_register, - flag: 0, - }); - program.emit_insn(Insn::InsertAwait { cursor_id }); } + // Create and insert the record + program.emit_insn(Insn::MakeRecord { + start_reg: column_registers_start, + count: num_cols, + dest_reg: record_register, + }); + + program.emit_insn(Insn::InsertAsync { + cursor: cursor_id, + key_reg: rowid_reg, + record_reg: record_register, + flag: 0, + }); + program.emit_insn(Insn::InsertAwait { cursor_id }); + program.emit_insn(Insn::Goto { target_pc: loop_start_offset, }); From fa5ca68eec2f327434520c215833c3aad68f68b6 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sat, 21 Dec 2024 15:12:53 +0200 Subject: [PATCH 18/39] Add multi-row insert to simulator --- simulator/generation/plan.rs | 4 ++-- simulator/generation/query.rs | 13 +++++++++---- simulator/model/query.rs | 17 ++++++++++++----- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/simulator/generation/plan.rs b/simulator/generation/plan.rs index 61b115f01..82c75c4e3 100644 --- a/simulator/generation/plan.rs +++ b/simulator/generation/plan.rs @@ -106,7 +106,7 @@ impl Interactions { .iter_mut() .find(|t| t.name == insert.table) .unwrap(); - table.rows.push(insert.values.clone()); + table.rows.extend(insert.values.clone()); } Query::Delete(_) => todo!(), Query::Select(_) => {} @@ -320,7 +320,7 @@ fn property_insert_select(rng: &mut R, env: &SimulatorEnv) -> Inte // Insert the row let insert_query = Interaction::Query(Query::Insert(Insert { table: table.name.clone(), - values: row.clone(), + values: vec![row.clone()], })); // Select the row diff --git a/simulator/generation/query.rs b/simulator/generation/query.rs index ca6926650..0ff9d44e1 100644 --- a/simulator/generation/query.rs +++ b/simulator/generation/query.rs @@ -37,10 +37,15 @@ impl ArbitraryFrom> for Select { impl ArbitraryFrom
for Insert { fn arbitrary_from(rng: &mut R, table: &Table) -> Self { - let values = table - .columns - .iter() - .map(|c| Value::arbitrary_from(rng, &c.column_type)) + let num_rows = rng.gen_range(1..10); + let values: Vec> = (0..num_rows) + .map(|_| { + table + .columns + .iter() + .map(|c| Value::arbitrary_from(rng, &c.column_type)) + .collect() + }) .collect(); Insert { table: table.name.clone(), diff --git a/simulator/model/query.rs b/simulator/model/query.rs index eeec68d08..7a12def8d 100644 --- a/simulator/model/query.rs +++ b/simulator/model/query.rs @@ -75,7 +75,7 @@ pub(crate) struct Select { #[derive(Clone, Debug, PartialEq)] pub(crate) struct Insert { pub(crate) table: String, - pub(crate) values: Vec, + pub(crate) values: Vec>, } #[derive(Clone, Debug, PartialEq)] @@ -104,14 +104,21 @@ impl Display for Query { predicate: guard, }) => write!(f, "SELECT * FROM {} WHERE {}", table, guard), Query::Insert(Insert { table, values }) => { - write!(f, "INSERT INTO {} VALUES (", table)?; - for (i, v) in values.iter().enumerate() { + write!(f, "INSERT INTO {} VALUES ", table)?; + for (i, row) in values.iter().enumerate() { if i != 0 { write!(f, ", ")?; } - write!(f, "{}", v)?; + write!(f, "(")?; + for (j, value) in row.iter().enumerate() { + if j != 0 { + write!(f, ", ")?; + } + write!(f, "{}", value)?; + } + write!(f, ")")?; } - write!(f, ")") + Ok(()) } Query::Delete(Delete { table, From c78a3e952a73854002a0b72b3a550ccc6064d73c Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sat, 21 Dec 2024 19:33:16 +0200 Subject: [PATCH 19/39] clean up implementation --- core/translate/insert.rs | 173 +++++++++++++++++++-------------------- 1 file changed, 83 insertions(+), 90 deletions(-) diff --git a/core/translate/insert.rs b/core/translate/insert.rs index 89665a726..aee6e0ec4 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -8,115 +8,109 @@ use sqlite3_parser::ast::{ use crate::error::SQLITE_CONSTRAINT_PRIMARYKEY; use crate::util::normalize_ident; use crate::{ - schema::{Schema, Table}, + schema::{Column, Schema, Table}, storage::sqlite3_ondisk::DatabaseHeader, translate::expr::translate_expr, vdbe::{builder::ProgramBuilder, Insn, Program}, }; use crate::{Connection, Result}; -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -/// Helper enum to indicate how a column is being inserted. -/// For example: -/// CREATE TABLE t (a, b, c, d); -/// INSERT INTO t (c, b) VALUES (1, 2); -/// -/// resolve_columns_for_insert() returns [ -/// ColumnToInsert::AutomaticNull, -/// ColumnToInsert::UserProvided { index_in_value_tuple: 1 }, -/// ColumnToInsert::UserProvided { index_in_value_tuple: 0 }, -/// ColumnToInsert::AutomaticNull, -/// ] -enum ColumnToInsert { - /// The column is provided by the user. - UserProvided { index_in_value_tuple: usize }, - /// The column is automatically set to NULL since it was not provided by the user. - AutomaticNull, +#[derive(Debug)] +/// Represents how a column should be populated during an INSERT. +/// Contains both the column definition and optionally the index into the VALUES tuple. +struct ColumnMapping<'a> { + /// Reference to the column definition from the table schema + column: &'a Column, + /// If Some(i), use the i-th value from the VALUES tuple + /// If None, use NULL (column was not specified in INSERT statement) + value_index: Option, } /// Resolves how each column in a table should be populated during an INSERT. -/// For each column, determines whether it will: -/// 1. Use a user-provided value from the VALUES clause, or -/// 2. Be automatically set to NULL +/// Returns a Vec of ColumnMapping, one for each column in the table's schema. +/// +/// For each column, specifies: +/// 1. The column definition (type, constraints, etc) +/// 2. Where to get the value from: +/// - Some(i) -> use i-th value from the VALUES tuple +/// - None -> use NULL (column wasn't specified in INSERT) /// /// Two cases are handled: -/// 1. No column list specified in INSERT statement: +/// 1. No column list specified (INSERT INTO t VALUES ...): /// - Values are assigned to columns in table definition order -/// - If fewer values than columns, remaining columns are NULL -/// 2. Column list specified in INSERT statement: -/// - For specified columns, a ColumnToInsert::UserProvided entry is created. -/// - Any columns not listed are set to NULL, i.e. a ColumnToInsert::AutomaticNull entry is created. -/// -/// Returns a Vec with an entry for each column in the table, -/// indicating how that column should be populated. -fn resolve_columns_for_insert( - table: Rc
, +/// - If fewer values than columns, remaining columns map to None +/// 2. Column list specified (INSERT INTO t (col1, col3) VALUES ...): +/// - Named columns map to their corresponding value index +/// - Unspecified columns map to None +fn resolve_columns_for_insert<'a>( + table: &'a Table, columns: &Option, values: &[Vec], -) -> Result> { - assert!(table.has_rowid()); +) -> Result>> { if values.is_empty() { crate::bail_parse_error!("no values to insert"); } - let num_cols_in_table = table.columns().len(); + let table_columns = table.columns(); + // Case 1: No columns specified - map values to columns in order if columns.is_none() { - let num_cols = values[0].len(); - // ensure value tuples dont have more columns than the table - if num_cols > num_cols_in_table { + let num_values = values[0].len(); + if num_values > table_columns.len() { crate::bail_parse_error!( "table {} has {} columns but {} values were supplied", table.get_name(), - num_cols_in_table, - num_cols + table_columns.len(), + num_values ); } - // ensure each value tuple has the same number of columns + + // Verify all value tuples have same length for value in values.iter().skip(1) { - if value.len() != num_cols { + if value.len() != num_values { crate::bail_parse_error!("all VALUES must have the same number of terms"); } } - let columns: Vec = (0..num_cols_in_table) - .map(|i| { - if i < num_cols { - ColumnToInsert::UserProvided { - index_in_value_tuple: i, - } - } else { - ColumnToInsert::AutomaticNull - } + + // Map each column to either its corresponding value index or None + return Ok(table_columns + .iter() + .enumerate() + .map(|(i, col)| ColumnMapping { + column: col, + value_index: if i < num_values { Some(i) } else { None }, }) - .collect(); - return Ok(columns); + .collect()); } - // resolve the given columns to actual table column names and ensure they exist - let columns = columns.as_ref().unwrap(); - let mut resolved_columns: Vec = (0..num_cols_in_table) - .map(|i| ColumnToInsert::AutomaticNull) + // Case 2: Columns specified - map named columns to their values + let mut mappings: Vec<_> = table_columns + .iter() + .map(|col| ColumnMapping { + column: col, + value_index: None, + }) .collect(); - for (index_in_value_tuple, column) in columns.iter().enumerate() { - let column_name = normalize_ident(column.0.as_str()); - let column_idx = table - .columns() + + // Map each named column to its value index + for (value_index, column_name) in columns.as_ref().unwrap().iter().enumerate() { + let column_name = normalize_ident(column_name.0.as_str()); + let table_index = table_columns .iter() .position(|c| c.name.eq_ignore_ascii_case(&column_name)); - if let Some(i) = column_idx { - resolved_columns[i] = ColumnToInsert::UserProvided { - index_in_value_tuple, - }; - } else { + + if table_index.is_none() { crate::bail_parse_error!( "table {} has no column named {}", table.get_name(), column_name ); } + + mappings[table_index.unwrap()].value_index = Some(value_index); } - Ok(resolved_columns) + Ok(mappings) } #[allow(clippy::too_many_arguments)] @@ -172,13 +166,13 @@ pub fn translate_insert( _ => todo!(), }; - let columns = resolve_columns_for_insert(table.clone(), columns, values)?; + let column_mappings = resolve_columns_for_insert(&table, columns, values)?; // Check if rowid was provided (through INTEGER PRIMARY KEY as a rowid alias) let rowid_alias_index = table.columns().iter().position(|c| c.is_rowid_alias); let has_user_provided_rowid = { - assert!(columns.len() == table.columns().len()); + assert!(column_mappings.len() == table.columns().len()); if let Some(index) = rowid_alias_index { - matches!(columns[index], ColumnToInsert::UserProvided { .. }) + column_mappings[index].value_index.is_some() } else { false } @@ -213,31 +207,30 @@ pub fn translate_insert( for value in values { // Process each value according to resolved columns - for (i, column) in columns.iter().enumerate() { - match column { - ColumnToInsert::UserProvided { - index_in_value_tuple, - } => { - translate_expr( - &mut program, - None, - value.get(*index_in_value_tuple).expect( - format!( - "values tuple has no value for column {}", - table.column_index_to_name(i).unwrap() - ) - .as_str(), - ), - column_registers_start + i, - None, - )?; - } - ColumnToInsert::AutomaticNull => { + for (i, mapping) in column_mappings.iter().enumerate() { + let target_reg = column_registers_start + i; + + if let Some(value_index) = mapping.value_index { + // Column has a value in the VALUES tuple + translate_expr( + &mut program, + None, + value.get(value_index).expect("value index out of bounds"), + target_reg, + None, + )?; + } else { + // Column was not specified - use NULL if it is nullable, otherwise error. + // Rowid alias columns can be NULL because we will autogenerate a rowid in that case. + let is_nullable = !mapping.column.primary_key || mapping.column.is_rowid_alias; + if is_nullable { program.emit_insn(Insn::Null { - dest: column_registers_start + i, + dest: target_reg, dest_end: None, }); program.mark_last_insn_constant(); + } else { + crate::bail_parse_error!("column {} is not nullable", mapping.column.name); } } } From 050b8744eaabe0e7914624869ff7c2c508e8a5a0 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sat, 21 Dec 2024 23:21:48 +0200 Subject: [PATCH 20/39] Dont use coroutine when inserting a single row --- core/translate/insert.rs | 192 ++++++++++++++++++++++++++------------- 1 file changed, 128 insertions(+), 64 deletions(-) diff --git a/core/translate/insert.rs b/core/translate/insert.rs index aee6e0ec4..bd9caae4c 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -113,6 +113,58 @@ fn resolve_columns_for_insert<'a>( Ok(mappings) } +/// Populates the column registers with values for a single row +fn populate_column_registers( + program: &mut ProgramBuilder, + value: &[Expr], + column_mappings: &[ColumnMapping], + column_registers_start: usize, + inserting_multiple_rows: bool, + rowid_reg: usize, +) -> Result<()> { + for (i, mapping) in column_mappings.iter().enumerate() { + let target_reg = column_registers_start + i; + + // Column has a value in the VALUES tuple + if let Some(value_index) = mapping.value_index { + // When inserting a single row, SQLite writes the value provided for the rowid alias column (INTEGER PRIMARY KEY) + // directly into the rowid register and writes a NULL into the rowid alias column. Not sure why this only happens + // in the single row case, but let's copy it. + let write_directly_to_rowid_reg = + mapping.column.is_rowid_alias && !inserting_multiple_rows; + let reg = if write_directly_to_rowid_reg { + rowid_reg + } else { + target_reg + }; + translate_expr( + program, + None, + value.get(value_index).expect("value index out of bounds"), + reg, + None, + )?; + if write_directly_to_rowid_reg { + program.emit_insn(Insn::SoftNull { reg: target_reg }); + } + } else { + // Column was not specified - use NULL if it is nullable, otherwise error + // Rowid alias columns can be NULL because we will autogenerate a rowid in that case. + let is_nullable = !mapping.column.primary_key || mapping.column.is_rowid_alias; + if is_nullable { + program.emit_insn(Insn::Null { + dest: target_reg, + dest_end: None, + }); + program.mark_last_insn_constant(); + } else { + crate::bail_parse_error!("column {} is not nullable", mapping.column.name); + } + } + } + Ok(()) +} + #[allow(clippy::too_many_arguments)] pub fn translate_insert( schema: &Schema, @@ -191,11 +243,16 @@ pub fn translate_insert( } }; - let yield_reg = program.alloc_register(); - let jump_on_definition_label = program.allocate_label(); - { - // Coroutine for values - // TODO/efficiency: only use coroutine when there are multiple values to insert + let record_register = program.alloc_register(); + let halt_label = program.allocate_label(); + let mut loop_start_offset = 0; + + let inserting_multiple_rows = values.len() > 1; + + // Multiple rows - use coroutine for value population + if inserting_multiple_rows { + let yield_reg = program.alloc_register(); + let jump_on_definition_label = program.allocate_label(); program.emit_insn_with_label_dependency( Insn::InitCoroutine { yield_reg, @@ -206,72 +263,75 @@ pub fn translate_insert( ); for value in values { - // Process each value according to resolved columns - for (i, mapping) in column_mappings.iter().enumerate() { - let target_reg = column_registers_start + i; - - if let Some(value_index) = mapping.value_index { - // Column has a value in the VALUES tuple - translate_expr( - &mut program, - None, - value.get(value_index).expect("value index out of bounds"), - target_reg, - None, - )?; - } else { - // Column was not specified - use NULL if it is nullable, otherwise error. - // Rowid alias columns can be NULL because we will autogenerate a rowid in that case. - let is_nullable = !mapping.column.primary_key || mapping.column.is_rowid_alias; - if is_nullable { - program.emit_insn(Insn::Null { - dest: target_reg, - dest_end: None, - }); - program.mark_last_insn_constant(); - } else { - crate::bail_parse_error!("column {} is not nullable", mapping.column.name); - } - } - } + populate_column_registers( + &mut program, + value, + &column_mappings, + column_registers_start, + true, + rowid_reg, + )?; program.emit_insn(Insn::Yield { yield_reg, end_offset: 0, }); } program.emit_insn(Insn::EndCoroutine { yield_reg }); + program.resolve_label(jump_on_definition_label, program.offset()); + + program.emit_insn(Insn::OpenWriteAsync { + cursor_id, + root_page, + }); + program.emit_insn(Insn::OpenWriteAwait {}); + + // Main loop + // FIXME: rollback is not implemented. E.g. if you insert 2 rows and one fails to unique constraint violation, + // the other row will still be inserted. + loop_start_offset = program.offset(); + program.emit_insn_with_label_dependency( + Insn::Yield { + yield_reg, + end_offset: halt_label, + }, + halt_label, + ); + } else { + // Single row - populate registers directly + program.emit_insn(Insn::OpenWriteAsync { + cursor_id, + root_page, + }); + program.emit_insn(Insn::OpenWriteAwait {}); + + populate_column_registers( + &mut program, + &values[0], + &column_mappings, + column_registers_start, + false, + rowid_reg, + )?; } - program.resolve_label(jump_on_definition_label, program.offset()); - program.emit_insn(Insn::OpenWriteAsync { - cursor_id, - root_page, - }); - program.emit_insn(Insn::OpenWriteAwait {}); - - // Main loop - // FIXME: rollback is not implemented. E.g. if you insert 2 rows and one fails to unique constraint violation, - // the other row will still be inserted. - let record_register = program.alloc_register(); - let halt_label = program.allocate_label(); - let loop_start_offset = program.offset(); - program.emit_insn_with_label_dependency( - Insn::Yield { - yield_reg, - end_offset: halt_label, - }, - halt_label, - ); - + // Common record insertion logic for both single and multiple rows let check_rowid_is_integer_label = rowid_alias_reg.and(Some(program.allocate_label())); if let Some(reg) = rowid_alias_reg { - program.emit_insn(Insn::Copy { - src_reg: reg, - dst_reg: rowid_reg, - amount: 0, // TODO: rename 'amount' to something else; amount==0 means 1 - }); - // for the row record, the rowid alias column is always set to NULL - program.emit_insn(Insn::SoftNull { reg }); + // for the row record, the rowid alias column (INTEGER PRIMARY KEY) is always set to NULL + // and its value is copied to the rowid register. in the case where a single row is inserted, + // the value is written directly to the rowid register (see populate_column_registers()). + // again, not sure why this only happens in the single row case, but let's mimic sqlite. + // in the single row case we save a Copy instruction, but in the multiple rows case we do + // it here in the loop. + if inserting_multiple_rows { + program.emit_insn(Insn::Copy { + src_reg: reg, + dst_reg: rowid_reg, + amount: 0, // TODO: rename 'amount' to something else; amount==0 means 1 + }); + // for the row record, the rowid alias column is always set to NULL + program.emit_insn(Insn::SoftNull { reg }); + } // the user provided rowid value might itself be NULL. If it is, we create a new rowid on the next instruction. program.emit_insn_with_label_dependency( Insn::NotNull { @@ -336,15 +396,19 @@ pub fn translate_insert( }); program.emit_insn(Insn::InsertAwait { cursor_id }); - program.emit_insn(Insn::Goto { - target_pc: loop_start_offset, - }); + if inserting_multiple_rows { + // For multiple rows, loop back + program.emit_insn(Insn::Goto { + target_pc: loop_start_offset, + }); + } program.resolve_label(halt_label, program.offset()); program.emit_insn(Insn::Halt { err_code: 0, description: String::new(), }); + program.resolve_label(init_label, program.offset()); program.emit_insn(Insn::Transaction { write: true }); program.emit_constant_insns(); From c4e2a344ae2462008b20900ab002d2b719b0a871 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sat, 21 Dec 2024 23:44:41 +0200 Subject: [PATCH 21/39] parse error instead of assert! for unsupported features --- core/translate/insert.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/core/translate/insert.rs b/core/translate/insert.rs index bd9caae4c..12c9ed016 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -169,7 +169,7 @@ fn populate_column_registers( pub fn translate_insert( schema: &Schema, with: &Option, - or_conflict: &Option, + on_conflict: &Option, tbl_name: &QualifiedName, columns: &Option, body: &InsertBody, @@ -177,8 +177,12 @@ pub fn translate_insert( database_header: Rc>, connection: Weak, ) -> Result { - assert!(with.is_none()); - assert!(or_conflict.is_none()); + if with.is_some() { + crate::bail_parse_error!("WITH clause is not supported"); + } + if on_conflict.is_some() { + crate::bail_parse_error!("ON CONFLICT clause is not supported"); + } let mut program = ProgramBuilder::new(); let init_label = program.allocate_label(); program.emit_insn_with_label_dependency( From 78da71c72a8e427ad1866d32cce523e34d223353 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Wed, 25 Dec 2024 22:08:11 +0200 Subject: [PATCH 22/39] encode integers with proper varint types --- core/types.rs | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/core/types.rs b/core/types.rs index 5f1b55d7b..ec86efe93 100644 --- a/core/types.rs +++ b/core/types.rs @@ -394,11 +394,20 @@ impl OwnedRecord { pub fn serialize(&self, buf: &mut Vec) { let initial_i = buf.len(); + let mut serial_types = Vec::with_capacity(self.values.len()); + // First pass: calculate serial types and store them for value in &self.values { let serial_type = match value { OwnedValue::Null => 0, - OwnedValue::Integer(_) => 6, // for now let's only do i64 + OwnedValue::Integer(i) => match i { + i if *i >= -128 && *i <= 127 => 1, // 8-bit + i if *i >= -32768 && *i <= 32767 => 2, // 16-bit + i if *i >= -8388608 && *i <= 8388607 => 3, // 24-bit + i if *i >= -2147483648 && *i <= 2147483647 => 4, // 32-bit + i if *i >= -140737488355328 && *i <= 140737488355327 => 5, // 48-bit + _ => 6, // 64-bit + }, OwnedValue::Float(_) => 7, OwnedValue::Text(t) => (t.value.len() * 2 + 13) as u64, OwnedValue::Blob(b) => (b.len() * 2 + 12) as u64, @@ -411,15 +420,24 @@ impl OwnedRecord { let len = buf.len(); let n = write_varint(&mut buf[len - 9..], serial_type); buf.truncate(buf.len() - 9 + n); // Remove unused bytes + + serial_types.push(serial_type); } let mut header_size = buf.len() - initial_i; // write content - for value in &self.values { - // TODO: make integers and floats with smaller serial types + for (value, &serial_type) in self.values.iter().zip(serial_types.iter()) { match value { OwnedValue::Null => {} - OwnedValue::Integer(i) => buf.extend_from_slice(&i.to_be_bytes()), + OwnedValue::Integer(i) => match serial_type { + 1 => buf.extend_from_slice(&(*i as i8).to_be_bytes()), + 2 => buf.extend_from_slice(&(*i as i16).to_be_bytes()), + 3 => buf.extend_from_slice(&(*i as i32).to_be_bytes()[1..]), + 4 => buf.extend_from_slice(&(*i as i32).to_be_bytes()), + 5 => buf.extend_from_slice(&i.to_be_bytes()[2..]), + 6 => buf.extend_from_slice(&i.to_be_bytes()), + _ => unreachable!(), + }, OwnedValue::Float(f) => buf.extend_from_slice(&f.to_be_bytes()), OwnedValue::Text(t) => buf.extend_from_slice(t.value.as_bytes()), OwnedValue::Blob(b) => buf.extend_from_slice(b), From 6bf1ab7726fe06dd4879036314b8f470d907a070 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Wed, 25 Dec 2024 22:34:34 +0200 Subject: [PATCH 23/39] add consts for integer lo/hi values and serial types --- core/types.rs | 51 ++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/core/types.rs b/core/types.rs index ec86efe93..ad201572e 100644 --- a/core/types.rs +++ b/core/types.rs @@ -387,6 +387,27 @@ pub struct OwnedRecord { pub values: Vec, } +const I8_LOW: i64 = -128; +const I8_HIGH: i64 = 127; +const I16_LOW: i64 = -32768; +const I16_HIGH: i64 = 32767; +const I24_LOW: i64 = -8388608; +const I24_HIGH: i64 = 8388607; +const I32_LOW: i64 = -2147483648; +const I32_HIGH: i64 = 2147483647; +const I48_LOW: i64 = -140737488355328; +const I48_HIGH: i64 = 140737488355327; + +// https://www.sqlite.org/fileformat.html#record_format +const SERIAL_TYPE_INTEGER_ZERO: u64 = 0; +const SERIAL_TYPE_I8: u64 = 1; +const SERIAL_TYPE_I16: u64 = 2; +const SERIAL_TYPE_I24: u64 = 3; +const SERIAL_TYPE_I32: u64 = 4; +const SERIAL_TYPE_I48: u64 = 5; +const SERIAL_TYPE_I64: u64 = 6; +const SERIAL_TYPE_F64: u64 = 7; + impl OwnedRecord { pub fn new(values: Vec) -> Self { Self { values } @@ -399,16 +420,16 @@ impl OwnedRecord { // First pass: calculate serial types and store them for value in &self.values { let serial_type = match value { - OwnedValue::Null => 0, + OwnedValue::Null => SERIAL_TYPE_INTEGER_ZERO, OwnedValue::Integer(i) => match i { - i if *i >= -128 && *i <= 127 => 1, // 8-bit - i if *i >= -32768 && *i <= 32767 => 2, // 16-bit - i if *i >= -8388608 && *i <= 8388607 => 3, // 24-bit - i if *i >= -2147483648 && *i <= 2147483647 => 4, // 32-bit - i if *i >= -140737488355328 && *i <= 140737488355327 => 5, // 48-bit - _ => 6, // 64-bit + i if *i >= I8_LOW && *i <= I8_HIGH => SERIAL_TYPE_I8, + i if *i >= I16_LOW && *i <= I16_HIGH => SERIAL_TYPE_I16, + i if *i >= I24_LOW && *i <= I24_HIGH => SERIAL_TYPE_I24, + i if *i >= I32_LOW && *i <= I32_HIGH => SERIAL_TYPE_I32, + i if *i >= I48_LOW && *i <= I48_HIGH => SERIAL_TYPE_I48, + _ => SERIAL_TYPE_I64, }, - OwnedValue::Float(_) => 7, + OwnedValue::Float(_) => SERIAL_TYPE_F64, OwnedValue::Text(t) => (t.value.len() * 2 + 13) as u64, OwnedValue::Blob(b) => (b.len() * 2 + 12) as u64, // not serializable values @@ -416,7 +437,7 @@ impl OwnedRecord { OwnedValue::Record(_) => unreachable!(), }; - buf.resize(buf.len() + 9, 0); // Ensure space for varint + buf.resize(buf.len() + 9, 0); // Ensure space for varint (1-9 bytes in length) let len = buf.len(); let n = write_varint(&mut buf[len - 9..], serial_type); buf.truncate(buf.len() - 9 + n); // Remove unused bytes @@ -430,12 +451,12 @@ impl OwnedRecord { match value { OwnedValue::Null => {} OwnedValue::Integer(i) => match serial_type { - 1 => buf.extend_from_slice(&(*i as i8).to_be_bytes()), - 2 => buf.extend_from_slice(&(*i as i16).to_be_bytes()), - 3 => buf.extend_from_slice(&(*i as i32).to_be_bytes()[1..]), - 4 => buf.extend_from_slice(&(*i as i32).to_be_bytes()), - 5 => buf.extend_from_slice(&i.to_be_bytes()[2..]), - 6 => buf.extend_from_slice(&i.to_be_bytes()), + SERIAL_TYPE_I8 => buf.extend_from_slice(&(*i as i8).to_be_bytes()), + SERIAL_TYPE_I16 => buf.extend_from_slice(&(*i as i16).to_be_bytes()), + SERIAL_TYPE_I24 => buf.extend_from_slice(&(*i as i32).to_be_bytes()[1..]), // remove most significant byte + SERIAL_TYPE_I32 => buf.extend_from_slice(&(*i as i32).to_be_bytes()), + SERIAL_TYPE_I48 => buf.extend_from_slice(&i.to_be_bytes()[2..]), // remove 2 most significant bytes + SERIAL_TYPE_I64 => buf.extend_from_slice(&i.to_be_bytes()), _ => unreachable!(), }, OwnedValue::Float(f) => buf.extend_from_slice(&f.to_be_bytes()), From 381335724a12deb1f3033bc2428cb46633a61c1f Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Wed, 25 Dec 2024 22:57:55 +0200 Subject: [PATCH 24/39] add tests for serialize() --- core/types.rs | 203 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 203 insertions(+) diff --git a/core/types.rs b/core/types.rs index ad201572e..20f1943ab 100644 --- a/core/types.rs +++ b/core/types.rs @@ -528,3 +528,206 @@ pub trait Cursor { fn get_null_flag(&self) -> bool; fn btree_create(&mut self, flags: usize) -> u32; } + +#[cfg(test)] +mod tests { + use super::*; + use std::rc::Rc; + + #[test] + fn test_serialize_null() { + let record = OwnedRecord::new(vec![OwnedValue::Null]); + let mut buf = Vec::new(); + record.serialize(&mut buf); + + let header_length = record.values.len() + 1; + let header = &buf[0..header_length]; + // First byte should be header size + assert_eq!(header[0], header_length as u8); + // Second byte should be serial type for NULL + assert_eq!(header[1], SERIAL_TYPE_INTEGER_ZERO as u8); + // Check that the buffer is empty after the header + assert_eq!(buf.len(), header_length); + } + + #[test] + fn test_serialize_integers() { + let record = OwnedRecord::new(vec![ + OwnedValue::Integer(42), // Should use SERIAL_TYPE_I8 + OwnedValue::Integer(1000), // Should use SERIAL_TYPE_I16 + OwnedValue::Integer(1_000_000), // Should use SERIAL_TYPE_I24 + OwnedValue::Integer(1_000_000_000), // Should use SERIAL_TYPE_I32 + OwnedValue::Integer(1_000_000_000_000), // Should use SERIAL_TYPE_I48 + OwnedValue::Integer(i64::MAX), // Should use SERIAL_TYPE_I64 + ]); + let mut buf = Vec::new(); + record.serialize(&mut buf); + + let header_length = record.values.len() + 1; + let header = &buf[0..header_length]; + // First byte should be header size + assert!(header[0] == header_length as u8); // Header should be larger than number of values + + // Check that correct serial types were chosen + assert_eq!(header[1], SERIAL_TYPE_I8 as u8); + assert_eq!(header[2], SERIAL_TYPE_I16 as u8); + assert_eq!(header[3], SERIAL_TYPE_I24 as u8); + assert_eq!(header[4], SERIAL_TYPE_I32 as u8); + assert_eq!(header[5], SERIAL_TYPE_I48 as u8); + assert_eq!(header[6], SERIAL_TYPE_I64 as u8); + + // test that the bytes after the header can be interpreted as the correct values + let mut cur_offset = header_length; + let i8_bytes = &buf[cur_offset..cur_offset + size_of::()]; + cur_offset += size_of::(); + let i16_bytes = &buf[cur_offset..cur_offset + size_of::()]; + cur_offset += size_of::(); + let i24_bytes = &buf[cur_offset..cur_offset + size_of::() - 1]; + cur_offset += size_of::() - 1; // i24 + let i32_bytes = &buf[cur_offset..cur_offset + size_of::()]; + cur_offset += size_of::(); + let i48_bytes = &buf[cur_offset..cur_offset + size_of::() - 2]; + cur_offset += size_of::() - 2; // i48 + let i64_bytes = &buf[cur_offset..cur_offset + size_of::()]; + + let val_int8 = i8::from_be_bytes(i8_bytes.try_into().unwrap()); + let val_int16 = i16::from_be_bytes(i16_bytes.try_into().unwrap()); + + let mut leading_0 = vec![0]; + leading_0.extend(i24_bytes); + let val_int24 = i32::from_be_bytes(leading_0.try_into().unwrap()); + + let val_int32 = i32::from_be_bytes(i32_bytes.try_into().unwrap()); + + let mut leading_00 = vec![0, 0]; + leading_00.extend(i48_bytes); + let val_int48 = i64::from_be_bytes(leading_00.try_into().unwrap()); + + let val_int64 = i64::from_be_bytes(i64_bytes.try_into().unwrap()); + + assert_eq!(val_int8, 42); + assert_eq!(val_int16, 1000); + assert_eq!(val_int24, 1_000_000); + assert_eq!(val_int32, 1_000_000_000); + assert_eq!(val_int48, 1_000_000_000_000); + assert_eq!(val_int64, i64::MAX); + + // assert correct size of buffer: header + values (bytes per value depends on serial type) + assert_eq!( + buf.len(), + header_length + + size_of::() + + size_of::() + + (size_of::() - 1) // i24 + + size_of::() + + (size_of::() - 2) // i48 + + size_of::() + ); + } + + #[test] + fn test_serialize_float() { + let record = OwnedRecord::new(vec![OwnedValue::Float(3.14159)]); + let mut buf = Vec::new(); + record.serialize(&mut buf); + + let header_length = record.values.len() + 1; + let header = &buf[0..header_length]; + // First byte should be header size + assert_eq!(header[0], header_length as u8); + // Second byte should be serial type for FLOAT + assert_eq!(header[1], SERIAL_TYPE_F64 as u8); + // Check that the bytes after the header can be interpreted as the float + let float_bytes = &buf[header_length..header_length + size_of::()]; + let float = f64::from_be_bytes(float_bytes.try_into().unwrap()); + assert_eq!(float, 3.14159); + // Check that buffer length is correct + assert_eq!(buf.len(), header_length + size_of::()); + } + + #[test] + fn test_serialize_text() { + let text = Rc::new("hello".to_string()); + let record = OwnedRecord::new(vec![OwnedValue::Text(LimboText::new(text.clone()))]); + let mut buf = Vec::new(); + record.serialize(&mut buf); + + let header_length = record.values.len() + 1; + let header = &buf[0..header_length]; + // First byte should be header size + assert_eq!(header[0], header_length as u8); + // Second byte should be serial type for TEXT, which is (len * 2 + 13) + assert_eq!(header[1], (5 * 2 + 13) as u8); + // Check the actual text bytes + assert_eq!(&buf[2..7], b"hello"); + // Check that buffer length is correct + assert_eq!(buf.len(), header_length + text.len()); + } + + #[test] + fn test_serialize_blob() { + let blob = Rc::new(vec![1, 2, 3, 4, 5]); + let record = OwnedRecord::new(vec![OwnedValue::Blob(blob.clone())]); + let mut buf = Vec::new(); + record.serialize(&mut buf); + + let header_length = record.values.len() + 1; + let header = &buf[0..header_length]; + // First byte should be header size + assert_eq!(header[0], header_length as u8); + // Second byte should be serial type for BLOB, which is (len * 2 + 12) + assert_eq!(header[1], (5 * 2 + 12) as u8); + // Check the actual blob bytes + assert_eq!(&buf[2..7], &[1, 2, 3, 4, 5]); + // Check that buffer length is correct + assert_eq!(buf.len(), header_length + blob.len()); + } + + #[test] + fn test_serialize_mixed_types() { + let text = Rc::new("test".to_string()); + let record = OwnedRecord::new(vec![ + OwnedValue::Null, + OwnedValue::Integer(42), + OwnedValue::Float(3.14), + OwnedValue::Text(LimboText::new(text.clone())), + ]); + let mut buf = Vec::new(); + record.serialize(&mut buf); + + let header_length = record.values.len() + 1; + let header = &buf[0..header_length]; + // First byte should be header size + assert_eq!(header[0], header_length as u8); + // Second byte should be serial type for NULL + assert_eq!(header[1], SERIAL_TYPE_INTEGER_ZERO as u8); + // Third byte should be serial type for I8 + assert_eq!(header[2], SERIAL_TYPE_I8 as u8); + // Fourth byte should be serial type for F64 + assert_eq!(header[3], SERIAL_TYPE_F64 as u8); + // Fifth byte should be serial type for TEXT, which is (len * 2 + 13) + assert_eq!(header[4], (4 * 2 + 13) as u8); + + // Check that the bytes after the header can be interpreted as the correct values + let mut cur_offset = header_length; + let i8_bytes = &buf[cur_offset..cur_offset + size_of::()]; + cur_offset += size_of::(); + let f64_bytes = &buf[cur_offset..cur_offset + size_of::()]; + cur_offset += size_of::(); + let text_bytes = &buf[cur_offset..cur_offset + text.len()]; + + let val_int8 = i8::from_be_bytes(i8_bytes.try_into().unwrap()); + let val_float = f64::from_be_bytes(f64_bytes.try_into().unwrap()); + let val_text = String::from_utf8(text_bytes.to_vec()).unwrap(); + + assert_eq!(val_int8, 42); + assert_eq!(val_float, 3.14); + assert_eq!(val_text, "test"); + + // Check that buffer length is correct + assert_eq!( + buf.len(), + header_length + size_of::() + size_of::() + text.len() + ); + } +} From 80933a32e9bc46d65b117cb1c051e526fdb3b705 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Wed, 25 Dec 2024 23:09:23 +0200 Subject: [PATCH 25/39] remove space allocated for overflow pointer in non-overflow cases --- core/storage/btree.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index bdd27932b..e72cbfa6d 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1514,7 +1514,6 @@ impl BTreeCursor { if record_buf.len() <= payload_overflow_threshold_max { // enough allowed space to fit inside a btree page cell_payload.extend_from_slice(record_buf.as_slice()); - cell_payload.resize(cell_payload.len() + 4, 0); return; } log::debug!("fill_cell_payload(overflow)"); From c7448d29176adc599cca43fab4a4115751502674 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Thu, 26 Dec 2024 10:49:43 +0200 Subject: [PATCH 26/39] no allocation for serial types --- core/types.rs | 139 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 85 insertions(+), 54 deletions(-) diff --git a/core/types.rs b/core/types.rs index 20f1943ab..b0e6e679d 100644 --- a/core/types.rs +++ b/core/types.rs @@ -398,15 +398,63 @@ const I32_HIGH: i64 = 2147483647; const I48_LOW: i64 = -140737488355328; const I48_HIGH: i64 = 140737488355327; -// https://www.sqlite.org/fileformat.html#record_format -const SERIAL_TYPE_INTEGER_ZERO: u64 = 0; -const SERIAL_TYPE_I8: u64 = 1; -const SERIAL_TYPE_I16: u64 = 2; -const SERIAL_TYPE_I24: u64 = 3; -const SERIAL_TYPE_I32: u64 = 4; -const SERIAL_TYPE_I48: u64 = 5; -const SERIAL_TYPE_I64: u64 = 6; -const SERIAL_TYPE_F64: u64 = 7; +/// Sqlite Serial Types +/// https://www.sqlite.org/fileformat.html#record_format +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +enum SerialType { + Null, + I8, + I16, + I24, + I32, + I48, + I64, + F64, + Text { content_size: usize }, + Blob { content_size: usize }, +} + +impl From<&OwnedValue> for SerialType { + fn from(value: &OwnedValue) -> Self { + match value { + OwnedValue::Null => SerialType::Null, + OwnedValue::Integer(i) => match i { + i if *i >= I8_LOW && *i <= I8_HIGH => SerialType::I8, + i if *i >= I16_LOW && *i <= I16_HIGH => SerialType::I16, + i if *i >= I24_LOW && *i <= I24_HIGH => SerialType::I24, + i if *i >= I32_LOW && *i <= I32_HIGH => SerialType::I32, + i if *i >= I48_LOW && *i <= I48_HIGH => SerialType::I48, + _ => SerialType::I64, + }, + OwnedValue::Float(_) => SerialType::F64, + OwnedValue::Text(t) => SerialType::Text { + content_size: t.value.len(), + }, + OwnedValue::Blob(b) => SerialType::Blob { + content_size: b.len(), + }, + OwnedValue::Agg(_) => unreachable!(), + OwnedValue::Record(_) => unreachable!(), + } + } +} + +impl From for u64 { + fn from(serial_type: SerialType) -> Self { + match serial_type { + SerialType::Null => 0, + SerialType::I8 => 1, + SerialType::I16 => 2, + SerialType::I24 => 3, + SerialType::I32 => 4, + SerialType::I48 => 5, + SerialType::I64 => 6, + SerialType::F64 => 7, + SerialType::Text { content_size } => (content_size * 2 + 13) as u64, + SerialType::Blob { content_size } => (content_size * 2 + 12) as u64, + } + } +} impl OwnedRecord { pub fn new(values: Vec) -> Self { @@ -415,50 +463,33 @@ impl OwnedRecord { pub fn serialize(&self, buf: &mut Vec) { let initial_i = buf.len(); - let mut serial_types = Vec::with_capacity(self.values.len()); - // First pass: calculate serial types and store them + // write serial types for value in &self.values { - let serial_type = match value { - OwnedValue::Null => SERIAL_TYPE_INTEGER_ZERO, - OwnedValue::Integer(i) => match i { - i if *i >= I8_LOW && *i <= I8_HIGH => SERIAL_TYPE_I8, - i if *i >= I16_LOW && *i <= I16_HIGH => SERIAL_TYPE_I16, - i if *i >= I24_LOW && *i <= I24_HIGH => SERIAL_TYPE_I24, - i if *i >= I32_LOW && *i <= I32_HIGH => SERIAL_TYPE_I32, - i if *i >= I48_LOW && *i <= I48_HIGH => SERIAL_TYPE_I48, - _ => SERIAL_TYPE_I64, - }, - OwnedValue::Float(_) => SERIAL_TYPE_F64, - OwnedValue::Text(t) => (t.value.len() * 2 + 13) as u64, - OwnedValue::Blob(b) => (b.len() * 2 + 12) as u64, - // not serializable values - OwnedValue::Agg(_) => unreachable!(), - OwnedValue::Record(_) => unreachable!(), - }; - + let serial_type = SerialType::from(value); buf.resize(buf.len() + 9, 0); // Ensure space for varint (1-9 bytes in length) let len = buf.len(); - let n = write_varint(&mut buf[len - 9..], serial_type); + let n = write_varint(&mut buf[len - 9..], serial_type.into()); buf.truncate(buf.len() - 9 + n); // Remove unused bytes - - serial_types.push(serial_type); } let mut header_size = buf.len() - initial_i; // write content - for (value, &serial_type) in self.values.iter().zip(serial_types.iter()) { + for value in &self.values { match value { OwnedValue::Null => {} - OwnedValue::Integer(i) => match serial_type { - SERIAL_TYPE_I8 => buf.extend_from_slice(&(*i as i8).to_be_bytes()), - SERIAL_TYPE_I16 => buf.extend_from_slice(&(*i as i16).to_be_bytes()), - SERIAL_TYPE_I24 => buf.extend_from_slice(&(*i as i32).to_be_bytes()[1..]), // remove most significant byte - SERIAL_TYPE_I32 => buf.extend_from_slice(&(*i as i32).to_be_bytes()), - SERIAL_TYPE_I48 => buf.extend_from_slice(&i.to_be_bytes()[2..]), // remove 2 most significant bytes - SERIAL_TYPE_I64 => buf.extend_from_slice(&i.to_be_bytes()), - _ => unreachable!(), - }, + OwnedValue::Integer(i) => { + let serial_type = SerialType::from(value); + match serial_type { + SerialType::I8 => buf.extend_from_slice(&(*i as i8).to_be_bytes()), + SerialType::I16 => buf.extend_from_slice(&(*i as i16).to_be_bytes()), + SerialType::I24 => buf.extend_from_slice(&(*i as i32).to_be_bytes()[1..]), // remove most significant byte + SerialType::I32 => buf.extend_from_slice(&(*i as i32).to_be_bytes()), + SerialType::I48 => buf.extend_from_slice(&i.to_be_bytes()[2..]), // remove 2 most significant bytes + SerialType::I64 => buf.extend_from_slice(&i.to_be_bytes()), + _ => unreachable!(), + } + } OwnedValue::Float(f) => buf.extend_from_slice(&f.to_be_bytes()), OwnedValue::Text(t) => buf.extend_from_slice(t.value.as_bytes()), OwnedValue::Blob(b) => buf.extend_from_slice(b), @@ -545,7 +576,7 @@ mod tests { // First byte should be header size assert_eq!(header[0], header_length as u8); // Second byte should be serial type for NULL - assert_eq!(header[1], SERIAL_TYPE_INTEGER_ZERO as u8); + assert_eq!(header[1] as u64, u64::from(SerialType::Null)); // Check that the buffer is empty after the header assert_eq!(buf.len(), header_length); } @@ -569,12 +600,12 @@ mod tests { assert!(header[0] == header_length as u8); // Header should be larger than number of values // Check that correct serial types were chosen - assert_eq!(header[1], SERIAL_TYPE_I8 as u8); - assert_eq!(header[2], SERIAL_TYPE_I16 as u8); - assert_eq!(header[3], SERIAL_TYPE_I24 as u8); - assert_eq!(header[4], SERIAL_TYPE_I32 as u8); - assert_eq!(header[5], SERIAL_TYPE_I48 as u8); - assert_eq!(header[6], SERIAL_TYPE_I64 as u8); + assert_eq!(header[1] as u64, u64::from(SerialType::I8)); + assert_eq!(header[2] as u64, u64::from(SerialType::I16)); + assert_eq!(header[3] as u64, u64::from(SerialType::I24)); + assert_eq!(header[4] as u64, u64::from(SerialType::I32)); + assert_eq!(header[5] as u64, u64::from(SerialType::I48)); + assert_eq!(header[6] as u64, u64::from(SerialType::I64)); // test that the bytes after the header can be interpreted as the correct values let mut cur_offset = header_length; @@ -636,7 +667,7 @@ mod tests { // First byte should be header size assert_eq!(header[0], header_length as u8); // Second byte should be serial type for FLOAT - assert_eq!(header[1], SERIAL_TYPE_F64 as u8); + assert_eq!(header[1] as u64, u64::from(SerialType::F64)); // Check that the bytes after the header can be interpreted as the float let float_bytes = &buf[header_length..header_length + size_of::()]; let float = f64::from_be_bytes(float_bytes.try_into().unwrap()); @@ -700,13 +731,13 @@ mod tests { // First byte should be header size assert_eq!(header[0], header_length as u8); // Second byte should be serial type for NULL - assert_eq!(header[1], SERIAL_TYPE_INTEGER_ZERO as u8); + assert_eq!(header[1] as u64, u64::from(SerialType::Null)); // Third byte should be serial type for I8 - assert_eq!(header[2], SERIAL_TYPE_I8 as u8); + assert_eq!(header[2] as u64, u64::from(SerialType::I8)); // Fourth byte should be serial type for F64 - assert_eq!(header[3], SERIAL_TYPE_F64 as u8); + assert_eq!(header[3] as u64, u64::from(SerialType::F64)); // Fifth byte should be serial type for TEXT, which is (len * 2 + 13) - assert_eq!(header[4], (4 * 2 + 13) as u8); + assert_eq!(header[4] as u64, (4 * 2 + 13) as u64); // Check that the bytes after the header can be interpreted as the correct values let mut cur_offset = header_length; From 4368e8767ba363768bfb7ab25fed1f2293e9e7a2 Mon Sep 17 00:00:00 2001 From: psvri Date: Thu, 26 Dec 2024 22:38:54 +0530 Subject: [PATCH 27/39] Fix like function giving wrong results --- core/Cargo.toml | 3 ++- core/vdbe/mod.rs | 48 ++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 40 insertions(+), 11 deletions(-) diff --git a/core/Cargo.toml b/core/Cargo.toml index 4d731eb2b..1f2285f5f 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -44,7 +44,8 @@ sieve-cache = "0.1.4" sqlite3-parser = { path = "../vendored/sqlite3-parser" } thiserror = "1.0.61" getrandom = { version = "0.2.15", features = ["js"] } -regex = "1.10.5" +regex = "1.11.1" +regex-syntax = { version = "0.8.5", default-features = false, features = ["unicode"] } chrono = "0.4.38" julian_day_converter = "0.3.2" jsonb = { version = "0.4.4", optional = true } diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 427770155..f75df4a5f 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -46,7 +46,7 @@ use datetime::{exec_date, exec_time, exec_unixepoch}; use rand::distributions::{Distribution, Uniform}; use rand::{thread_rng, Rng}; -use regex::Regex; +use regex::{Regex, RegexBuilder}; use std::borrow::{Borrow, BorrowMut}; use std::cell::RefCell; use std::collections::{BTreeMap, HashMap}; @@ -3166,10 +3166,32 @@ fn exec_char(values: Vec) -> OwnedValue { } fn construct_like_regex(pattern: &str) -> Regex { - let mut regex_pattern = String::from("(?i)^"); - regex_pattern.push_str(&pattern.replace('%', ".*").replace('_', ".")); + + let mut regex_pattern = String::with_capacity(pattern.len() * 2); + + regex_pattern.push('^'); + + for c in pattern.chars() { + match c { + '\\' => regex_pattern.push_str("\\\\"), + '%' => regex_pattern.push_str(".*"), + '_' => regex_pattern.push('.'), + ch => { + if regex_syntax::is_meta_character(c) { + regex_pattern.push('\\'); + } + regex_pattern.push(ch); + } + } + } + regex_pattern.push('$'); - Regex::new(®ex_pattern).unwrap() + + RegexBuilder::new(®ex_pattern) + .case_insensitive(true) + .dot_matches_new_line(true) + .build() + .unwrap() } // Implements LIKE pattern matching. Caches the constructed regex if a cache is provided @@ -4316,12 +4338,18 @@ mod tests { ); } + #[test] + fn test_like_with_escape_or_regexmeta_chars() { + assert!(exec_like(None, r#"\%A"#, r#"\A"#)); + assert!(exec_like(None, "%a%a", "aaaa")); + } + #[test] fn test_like_no_cache() { assert!(exec_like(None, "a%", "aaaa")); assert!(exec_like(None, "%a%a", "aaaa")); - assert!(exec_like(None, "%a.a", "aaaa")); - assert!(exec_like(None, "a.a%", "aaaa")); + assert!(!exec_like(None, "%a.a", "aaaa")); + assert!(!exec_like(None, "a.a%", "aaaa")); assert!(!exec_like(None, "%a.ab", "aaaa")); } @@ -4330,15 +4358,15 @@ mod tests { let mut cache = HashMap::new(); assert!(exec_like(Some(&mut cache), "a%", "aaaa")); assert!(exec_like(Some(&mut cache), "%a%a", "aaaa")); - assert!(exec_like(Some(&mut cache), "%a.a", "aaaa")); - assert!(exec_like(Some(&mut cache), "a.a%", "aaaa")); + assert!(!exec_like(Some(&mut cache), "%a.a", "aaaa")); + assert!(!exec_like(Some(&mut cache), "a.a%", "aaaa")); assert!(!exec_like(Some(&mut cache), "%a.ab", "aaaa")); // again after values have been cached assert!(exec_like(Some(&mut cache), "a%", "aaaa")); assert!(exec_like(Some(&mut cache), "%a%a", "aaaa")); - assert!(exec_like(Some(&mut cache), "%a.a", "aaaa")); - assert!(exec_like(Some(&mut cache), "a.a%", "aaaa")); + assert!(!exec_like(Some(&mut cache), "%a.a", "aaaa")); + assert!(!exec_like(Some(&mut cache), "a.a%", "aaaa")); assert!(!exec_like(Some(&mut cache), "%a.ab", "aaaa")); } From 12e49da1d0b9f66c6353f2141f071d6e41c61f51 Mon Sep 17 00:00:00 2001 From: psvri Date: Thu, 26 Dec 2024 22:42:46 +0530 Subject: [PATCH 28/39] fmt fixes --- core/vdbe/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index f75df4a5f..e9df3a139 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -3166,7 +3166,6 @@ fn exec_char(values: Vec) -> OwnedValue { } fn construct_like_regex(pattern: &str) -> Regex { - let mut regex_pattern = String::with_capacity(pattern.len() * 2); regex_pattern.push('^'); From 28244b10d6bccb2d32bc5f4742735993798da11a Mon Sep 17 00:00:00 2001 From: Peter Sooley Date: Thu, 26 Dec 2024 14:36:44 -0800 Subject: [PATCH 29/39] implement json_array_length --- COMPAT.md | 4 +- core/function.rs | 4 + core/json/mod.rs | 164 ++++++++++++++++++++++++++++++++++++- core/json/path.rs | 181 +++++++++++++++++++++++++++++++++++++++++ core/translate/expr.rs | 45 ++++++++++ core/vdbe/mod.rs | 17 +++- testing/json.test | 32 ++++++++ 7 files changed, 443 insertions(+), 4 deletions(-) create mode 100644 core/json/path.rs diff --git a/COMPAT.md b/COMPAT.md index a7baaca83..71a00290b 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -234,8 +234,8 @@ Feature support of [sqlite expr syntax](https://www.sqlite.org/lang_expr.html). | jsonb(json) | | | | json_array(value1,value2,...) | Yes | | | jsonb_array(value1,value2,...) | | | -| json_array_length(json) | | | -| json_array_length(json,path) | | | +| json_array_length(json) | Yes | | +| json_array_length(json,path) | Yes | | | json_error_position(json) | | | | json_extract(json,path,...) | | | | jsonb_extract(json,path,...) | | | diff --git a/core/function.rs b/core/function.rs index 8681a4fdf..0b19a5474 100644 --- a/core/function.rs +++ b/core/function.rs @@ -6,6 +6,7 @@ use std::fmt::Display; pub enum JsonFunc { Json, JsonArray, + JsonArrayLength, } #[cfg(feature = "json")] @@ -17,6 +18,7 @@ impl Display for JsonFunc { match self { JsonFunc::Json => "json".to_string(), JsonFunc::JsonArray => "json_array".to_string(), + JsonFunc::JsonArrayLength => "json_array_length".to_string(), } ) } @@ -334,6 +336,8 @@ impl Func { "json" => Ok(Func::Json(JsonFunc::Json)), #[cfg(feature = "json")] "json_array" => Ok(Func::Json(JsonFunc::JsonArray)), + #[cfg(feature = "json")] + "json_array_length" => Ok(Func::Json(JsonFunc::JsonArrayLength)), "unixepoch" => Ok(Func::Scalar(ScalarFunc::UnixEpoch)), "hex" => Ok(Func::Scalar(ScalarFunc::Hex)), "unhex" => Ok(Func::Scalar(ScalarFunc::Unhex)), diff --git a/core/json/mod.rs b/core/json/mod.rs index b1394b2bd..046f66237 100644 --- a/core/json/mod.rs +++ b/core/json/mod.rs @@ -1,5 +1,6 @@ mod de; mod error; +mod path; mod ser; use std::rc::Rc; @@ -8,9 +9,10 @@ pub use crate::json::de::from_str; pub use crate::json::ser::to_string; use crate::types::{LimboText, OwnedValue, TextSubtype}; use indexmap::IndexMap; +use path::get_json_val_by_path; use serde::{Deserialize, Serialize}; -#[derive(Serialize, Deserialize, Debug)] +#[derive(Serialize, Deserialize, PartialEq, Debug)] #[serde(untagged)] pub enum Val { Null, @@ -88,6 +90,49 @@ pub fn json_array(values: Vec<&OwnedValue>) -> crate::Result { Ok(OwnedValue::Text(LimboText::json(Rc::new(s)))) } +pub fn json_array_length( + json_value: &OwnedValue, + json_path: Option<&OwnedValue>, +) -> crate::Result { + let path = match json_path { + Some(OwnedValue::Text(t)) => Some(t.value.to_string()), + Some(OwnedValue::Integer(i)) => Some(i.to_string()), + Some(OwnedValue::Float(f)) => Some(f.to_string()), + _ => None::, + }; + + let top_val = match json_value { + OwnedValue::Text(ref t) => crate::json::from_str::(&t.value), + OwnedValue::Blob(b) => match jsonb::from_slice(b) { + Ok(j) => { + let json = j.to_string(); + crate::json::from_str(&json) + } + Err(_) => crate::bail_parse_error!("malformed JSON"), + }, + _ => return Ok(OwnedValue::Integer(0)), + }; + + let Ok(top_val) = top_val else { + crate::bail_parse_error!("malformed JSON") + }; + + let arr_val = if let Some(path) = path { + match get_json_val_by_path(&top_val, &path) { + Ok(Some(val)) => val, + Ok(None) => return Ok(OwnedValue::Null), + Err(e) => return Err(e), + } + } else { + &top_val + }; + + if let Val::Array(val) = &arr_val { + return Ok(OwnedValue::Integer(val.len() as i64)); + } + Ok(OwnedValue::Integer(0)) +} + #[cfg(test)] mod tests { use super::*; @@ -266,4 +311,121 @@ mod tests { Err(e) => assert!(e.to_string().contains("JSON cannot hold BLOB values")), } } + + #[test] + fn test_json_array_length() { + let input = OwnedValue::build_text(Rc::new("[1,2,3,4]".to_string())); + let result = json_array_length(&input, None).unwrap(); + if let OwnedValue::Integer(res) = result { + assert_eq!(res, 4); + } else { + panic!("Expected OwnedValue::Integer"); + } + } + + #[test] + fn test_json_array_length_empty() { + let input = OwnedValue::build_text(Rc::new("[]".to_string())); + let result = json_array_length(&input, None).unwrap(); + if let OwnedValue::Integer(res) = result { + assert_eq!(res, 0); + } else { + panic!("Expected OwnedValue::Integer"); + } + } + + #[test] + fn test_json_array_length_root() { + let input = OwnedValue::build_text(Rc::new("[1,2,3,4]".to_string())); + let result = json_array_length( + &input, + Some(&OwnedValue::build_text(Rc::new("$".to_string()))), + ) + .unwrap(); + if let OwnedValue::Integer(res) = result { + assert_eq!(res, 4); + } else { + panic!("Expected OwnedValue::Integer"); + } + } + + #[test] + fn test_json_array_length_not_array() { + let input = OwnedValue::build_text(Rc::new("{one: [1,2,3,4]}".to_string())); + let result = json_array_length(&input, None).unwrap(); + if let OwnedValue::Integer(res) = result { + assert_eq!(res, 0); + } else { + panic!("Expected OwnedValue::Integer"); + } + } + + #[test] + fn test_json_array_length_via_prop() { + let input = OwnedValue::build_text(Rc::new("{one: [1,2,3,4]}".to_string())); + let result = json_array_length( + &input, + Some(&OwnedValue::build_text(Rc::new("$.one".to_string()))), + ) + .unwrap(); + if let OwnedValue::Integer(res) = result { + assert_eq!(res, 4); + } else { + panic!("Expected OwnedValue::Integer"); + } + } + + #[test] + fn test_json_array_length_via_index() { + let input = OwnedValue::build_text(Rc::new("[[1,2,3,4]]".to_string())); + let result = json_array_length( + &input, + Some(&OwnedValue::build_text(Rc::new("$[0]".to_string()))), + ) + .unwrap(); + if let OwnedValue::Integer(res) = result { + assert_eq!(res, 4); + } else { + panic!("Expected OwnedValue::Integer"); + } + } + + #[test] + fn test_json_array_length_via_index_not_array() { + let input = OwnedValue::build_text(Rc::new("[1,2,3,4]".to_string())); + let result = json_array_length( + &input, + Some(&OwnedValue::build_text(Rc::new("$[2]".to_string()))), + ) + .unwrap(); + if let OwnedValue::Integer(res) = result { + assert_eq!(res, 0); + } else { + panic!("Expected OwnedValue::Integer"); + } + } + + #[test] + fn test_json_array_length_via_index_bad_prop() { + let input = OwnedValue::build_text(Rc::new("{one: [1,2,3,4]}".to_string())); + let result = json_array_length( + &input, + Some(&OwnedValue::build_text(Rc::new("$.two".to_string()))), + ) + .unwrap(); + assert_eq!(OwnedValue::Null, result); + } + + #[test] + fn test_json_array_length_simple_json_subtype() { + let input = OwnedValue::build_text(Rc::new("[1,2,3]".to_string())); + let wrapped = get_json(&input).unwrap(); + let result = json_array_length(&wrapped, None).unwrap(); + + if let OwnedValue::Integer(res) = result { + assert_eq!(res, 3); + } else { + panic!("Expected OwnedValue::Integer"); + } + } } diff --git a/core/json/path.rs b/core/json/path.rs new file mode 100644 index 000000000..e475f6647 --- /dev/null +++ b/core/json/path.rs @@ -0,0 +1,181 @@ +use super::Val; + +pub fn get_json_val_by_path<'v>(val: &'v Val, path: &str) -> crate::Result> { + match path.strip_prefix('$') { + Some(tail) => json_val_by_path(val, tail), + None => crate::bail_parse_error!("malformed path"), + } +} + +fn json_val_by_path<'v>(val: &'v Val, path: &str) -> crate::Result> { + if path.is_empty() { + return Ok(Some(val)); + } + + match val { + Val::Array(inner) => { + if inner.is_empty() { + return Ok(None); + } + let Some(tail) = path.strip_prefix('[') else { + return Ok(None); + }; + let (from_end, tail) = if let Some(updated_tail) = tail.strip_prefix("#-") { + (true, updated_tail) + } else { + (false, tail) + }; + + let Some((idx_str, tail)) = tail.split_once("]") else { + crate::bail_parse_error!("malformed path"); + }; + + if idx_str.is_empty() { + return Ok(None); + } + let Ok(idx) = idx_str.parse::() else { + crate::bail_parse_error!("malformed path"); + }; + let result = if from_end { + inner.get(inner.len() - 1 - idx) + } else { + inner.get(idx) + }; + + if let Some(result) = result { + return json_val_by_path(result, tail); + } + Ok(None) + } + Val::Object(inner) => { + let Some(tail) = path.strip_prefix('.') else { + return Ok(None); + }; + + let (property, tail) = if let Some(tail) = tail.strip_prefix('"') { + if let Some((property, tail)) = tail.split_once('"') { + (property, tail) + } else { + crate::bail_parse_error!("malformed path"); + } + } else if let Some(idx) = tail.find('.') { + (&tail[..idx], &tail[idx..]) + } else { + (tail, "") + }; + + if let Some(result) = inner.get(property) { + return json_val_by_path(result, tail); + } + Ok(None) + } + _ => Ok(None), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_path_root() { + assert_eq!( + get_json_val_by_path(&Val::Bool(true), "$",).unwrap(), + Some(&Val::Bool(true)) + ); + } + + #[test] + fn test_path_index() { + assert_eq!( + get_json_val_by_path( + &Val::Array(vec![Val::Integer(33), Val::Integer(55), Val::Integer(66)]), + "$[2]", + ) + .unwrap(), + Some(&Val::Integer(66)) + ); + } + + #[test] + fn test_path_negative_index() { + assert_eq!( + get_json_val_by_path( + &Val::Array(vec![Val::Integer(33), Val::Integer(55), Val::Integer(66)]), + "$[#-2]", + ) + .unwrap(), + Some(&Val::Integer(33)) + ); + } + + #[test] + fn test_path_index_deep() { + assert_eq!( + get_json_val_by_path( + &Val::Array(vec![Val::Array(vec![ + Val::Integer(33), + Val::Integer(55), + Val::Integer(66) + ])]), + "$[0][1]", + ) + .unwrap(), + Some(&Val::Integer(55)) + ); + } + + #[test] + fn test_path_prop_simple() { + assert_eq!( + get_json_val_by_path( + &Val::Object( + [ + ("foo".into(), Val::Integer(55)), + ("bar".into(), Val::Integer(66)) + ] + .into() + ), + "$.bar", + ) + .unwrap(), + Some(&Val::Integer(66)) + ); + } + + #[test] + fn test_path_prop_nested() { + assert_eq!( + get_json_val_by_path( + &Val::Object( + [( + "foo".into(), + Val::Object([("bar".into(), Val::Integer(66))].into()) + )] + .into() + ), + "$.foo.bar", + ) + .unwrap(), + Some(&Val::Integer(66)) + ); + } + + #[test] + fn test_path_prop_quoted() { + assert_eq!( + get_json_val_by_path( + &Val::Object( + [ + ("foo.baz".into(), Val::Integer(55)), + ("bar".into(), Val::Integer(66)) + ] + .into() + ), + r#"$."foo.baz""#, + ) + .unwrap(), + Some(&Val::Integer(55)) + ); + } +} diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 734dbb98e..afdb6c7ad 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -913,6 +913,51 @@ pub fn translate_expr( }); Ok(target_register) } + JsonFunc::JsonArrayLength => { + let args = if let Some(args) = args { + if args.len() > 2 { + crate::bail_parse_error!( + "{} function with wrong number of arguments", + j.to_string() + ) + } + args + } else { + crate::bail_parse_error!( + "{} function with no arguments", + j.to_string() + ); + }; + + let json_reg = program.alloc_register(); + let path_reg = program.alloc_register(); + + translate_expr( + program, + referenced_tables, + &args[0], + json_reg, + precomputed_exprs_to_registers, + )?; + + if args.len() == 2 { + translate_expr( + program, + referenced_tables, + &args[1], + path_reg, + precomputed_exprs_to_registers, + )?; + } + + program.emit_insn(Insn::Function { + constant_mask: 0, + start_reg: json_reg, + dest: target_register, + func: func_ctx, + }); + Ok(target_register) + } }, Func::Scalar(srf) => { match srf { diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 427770155..da5a32c3f 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -37,7 +37,7 @@ use crate::types::{ }; use crate::util::parse_schema_rows; #[cfg(feature = "json")] -use crate::{function::JsonFunc, json::get_json, json::json_array}; +use crate::{function::JsonFunc, json::get_json, json::json_array, json::json_array_length}; use crate::{Connection, Result, TransactionState}; use crate::{Rows, DATABASE_VERSION}; use limbo_macros::Description; @@ -2281,6 +2281,21 @@ impl Program { Err(e) => return Err(e), } } + #[cfg(feature = "json")] + crate::function::Func::Json(JsonFunc::JsonArrayLength) => { + let json_value = &state.registers[*start_reg]; + let path_value = if arg_count > 1 { + Some(&state.registers[*start_reg + 1]) + } else { + None + }; + let json_array_length = json_array_length(json_value, path_value); + + match json_array_length { + Ok(length) => state.registers[*dest] = length, + Err(e) => return Err(e), + } + } crate::function::Func::Scalar(scalar_func) => match scalar_func { ScalarFunc::Cast => { assert!(arg_count == 2); diff --git a/testing/json.test b/testing/json.test index a62040555..7c33fdc5a 100755 --- a/testing/json.test +++ b/testing/json.test @@ -83,3 +83,35 @@ do_execsql_test json_array_json { do_execsql_test json_array_nested { SELECT json_array(json_array(1,2,3), json('[1,2,3]'), '[1,2,3]') } {{[[1,2,3],[1,2,3],"[1,2,3]"]}} + +do_execsql_test json_array_length { + SELECT json_array_length('[1,2,3,4]'); +} {{4}} + +do_execsql_test json_array_length_empty { + SELECT json_array_length('[]'); +} {{0}} + +do_execsql_test json_array_length_root { + SELECT json_array_length('[1,2,3,4]', '$'); +} {{4}} + +do_execsql_test json_array_length_not_array { + SELECT json_array_length('{"one":[1,2,3]}'); +} {{0}} + +do_execsql_test json_array_length_via_prop { + SELECT json_array_length('{"one":[1,2,3]}', '$.one'); +} {{3}} + +do_execsql_test json_array_length_via_index { + SELECT json_array_length('[[1,2,3,4]]', '$[0]'); +} {{4}} + +do_execsql_test json_array_length_via_index_not_array { + SELECT json_array_length('[1,2,3,4]', '$[2]'); +} {{0}} + +do_execsql_test json_array_length_via_bad_prop { + SELECT json_array_length('{"one":[1,2,3]}', '$.two'); +} {{}} \ No newline at end of file From f2ecebc3574e26a7500c3ecc2ba05cbf9a52f83b Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Fri, 27 Dec 2024 10:20:26 +0200 Subject: [PATCH 30/39] Rename RowResult to StepResult The name "row result" is confusing because it really *is* a result from a step() call. The only difference is how a row is represented as we return from VDBE or from a statement. Therefore, rename RowResult to StepResult. --- bindings/python/src/lib.rs | 20 +++---- bindings/wasm/lib.rs | 20 +++---- cli/app.rs | 42 +++++++------- core/benches/benchmark.rs | 30 +++++----- core/lib.rs | 16 +++--- core/util.rs | 12 ++-- perf/latency/limbo/src/main.rs | 6 +- simulator/generation/plan.rs | 12 ++-- simulator/main.rs | 2 +- sqlite3/src/lib.rs | 10 ++-- test/src/lib.rs | 100 ++++++++++++++++----------------- 11 files changed, 135 insertions(+), 135 deletions(-) diff --git a/bindings/python/src/lib.rs b/bindings/python/src/lib.rs index c31520a82..1b3514032 100644 --- a/bindings/python/src/lib.rs +++ b/bindings/python/src/lib.rs @@ -128,22 +128,22 @@ impl Cursor { match smt_lock.step().map_err(|e| { PyErr::new::(format!("Step error: {:?}", e)) })? { - limbo_core::RowResult::Row(row) => { + limbo_core::StepResult::Row(row) => { let py_row = row_to_py(py, &row); return Ok(Some(py_row)); } - limbo_core::RowResult::IO => { + limbo_core::StepResult::IO => { self.conn.io.run_once().map_err(|e| { PyErr::new::(format!("IO error: {:?}", e)) })?; } - limbo_core::RowResult::Interrupt => { + limbo_core::StepResult::Interrupt => { return Ok(None); } - limbo_core::RowResult::Done => { + limbo_core::StepResult::Done => { return Ok(None); } - limbo_core::RowResult::Busy => { + limbo_core::StepResult::Busy => { return Err( PyErr::new::("Busy error".to_string()).into() ); @@ -167,22 +167,22 @@ impl Cursor { match smt_lock.step().map_err(|e| { PyErr::new::(format!("Step error: {:?}", e)) })? { - limbo_core::RowResult::Row(row) => { + limbo_core::StepResult::Row(row) => { let py_row = row_to_py(py, &row); results.push(py_row); } - limbo_core::RowResult::IO => { + limbo_core::StepResult::IO => { self.conn.io.run_once().map_err(|e| { PyErr::new::(format!("IO error: {:?}", e)) })?; } - limbo_core::RowResult::Interrupt => { + limbo_core::StepResult::Interrupt => { return Ok(results); } - limbo_core::RowResult::Done => { + limbo_core::StepResult::Done => { return Ok(results); } - limbo_core::RowResult::Busy => { + limbo_core::StepResult::Busy => { return Err( PyErr::new::("Busy error".to_string()).into() ); diff --git a/bindings/wasm/lib.rs b/bindings/wasm/lib.rs index a2ae5b266..a06321f16 100644 --- a/bindings/wasm/lib.rs +++ b/bindings/wasm/lib.rs @@ -75,7 +75,7 @@ impl Statement { pub fn get(&self) -> JsValue { match self.inner.borrow_mut().step() { - Ok(limbo_core::RowResult::Row(row)) => { + Ok(limbo_core::StepResult::Row(row)) => { let row_array = js_sys::Array::new(); for value in row.values { let value = to_js_value(value); @@ -83,10 +83,10 @@ impl Statement { } JsValue::from(row_array) } - Ok(limbo_core::RowResult::IO) - | Ok(limbo_core::RowResult::Done) - | Ok(limbo_core::RowResult::Interrupt) - | Ok(limbo_core::RowResult::Busy) => JsValue::UNDEFINED, + Ok(limbo_core::StepResult::IO) + | Ok(limbo_core::StepResult::Done) + | Ok(limbo_core::StepResult::Interrupt) + | Ok(limbo_core::StepResult::Busy) => JsValue::UNDEFINED, Err(e) => panic!("Error: {:?}", e), } } @@ -95,7 +95,7 @@ impl Statement { let array = js_sys::Array::new(); loop { match self.inner.borrow_mut().step() { - Ok(limbo_core::RowResult::Row(row)) => { + Ok(limbo_core::StepResult::Row(row)) => { let row_array = js_sys::Array::new(); for value in row.values { let value = to_js_value(value); @@ -103,10 +103,10 @@ impl Statement { } array.push(&row_array); } - Ok(limbo_core::RowResult::IO) => {} - Ok(limbo_core::RowResult::Interrupt) => break, - Ok(limbo_core::RowResult::Done) => break, - Ok(limbo_core::RowResult::Busy) => break, + Ok(limbo_core::StepResult::IO) => {} + Ok(limbo_core::StepResult::Interrupt) => break, + Ok(limbo_core::StepResult::Done) => break, + Ok(limbo_core::StepResult::Busy) => break, Err(e) => panic!("Error: {:?}", e), } } diff --git a/cli/app.rs b/cli/app.rs index cbce1ca5c..0593066c5 100644 --- a/cli/app.rs +++ b/cli/app.rs @@ -1,6 +1,6 @@ use crate::opcodes_dictionary::OPCODE_DESCRIPTIONS; use cli_table::{Cell, Table}; -use limbo_core::{Database, LimboError, RowResult, Value}; +use limbo_core::{Database, LimboError, StepResult, Value}; use clap::{Parser, ValueEnum}; use std::{ @@ -498,7 +498,7 @@ impl Limbo { } match rows.next_row() { - Ok(RowResult::Row(row)) => { + Ok(StepResult::Row(row)) => { for (i, value) in row.values.iter().enumerate() { if i > 0 { let _ = self.writer.write(b"|"); @@ -518,14 +518,14 @@ impl Limbo { } let _ = self.writeln(""); } - Ok(RowResult::IO) => { + Ok(StepResult::IO) => { self.io.run_once()?; } - Ok(RowResult::Interrupt) => break, - Ok(RowResult::Done) => { + Ok(StepResult::Interrupt) => break, + Ok(StepResult::Done) => { break; } - Ok(RowResult::Busy) => { + Ok(StepResult::Busy) => { self.writeln("database is busy"); break; } @@ -543,7 +543,7 @@ impl Limbo { let mut table_rows: Vec> = vec![]; loop { match rows.next_row() { - Ok(RowResult::Row(row)) => { + Ok(StepResult::Row(row)) => { table_rows.push( row.values .iter() @@ -559,12 +559,12 @@ impl Limbo { .collect(), ); } - Ok(RowResult::IO) => { + Ok(StepResult::IO) => { self.io.run_once()?; } - Ok(RowResult::Interrupt) => break, - Ok(RowResult::Done) => break, - Ok(RowResult::Busy) => { + Ok(StepResult::Interrupt) => break, + Ok(StepResult::Done) => break, + Ok(StepResult::Busy) => { self.writeln("database is busy"); break; } @@ -607,18 +607,18 @@ impl Limbo { let mut found = false; loop { match rows.next_row()? { - RowResult::Row(row) => { + StepResult::Row(row) => { if let Some(Value::Text(schema)) = row.values.first() { let _ = self.write_fmt(format_args!("{};", schema)); found = true; } } - RowResult::IO => { + StepResult::IO => { self.io.run_once()?; } - RowResult::Interrupt => break, - RowResult::Done => break, - RowResult::Busy => { + StepResult::Interrupt => break, + StepResult::Done => break, + StepResult::Busy => { self.writeln("database is busy"); break; } @@ -664,18 +664,18 @@ impl Limbo { let mut tables = String::new(); loop { match rows.next_row()? { - RowResult::Row(row) => { + StepResult::Row(row) => { if let Some(Value::Text(table)) = row.values.first() { tables.push_str(table); tables.push(' '); } } - RowResult::IO => { + StepResult::IO => { self.io.run_once()?; } - RowResult::Interrupt => break, - RowResult::Done => break, - RowResult::Busy => { + StepResult::Interrupt => break, + StepResult::Done => break, + StepResult::Busy => { self.writeln("database is busy"); break; } diff --git a/core/benches/benchmark.rs b/core/benches/benchmark.rs index 0fe17d991..0dff08b5b 100644 --- a/core/benches/benchmark.rs +++ b/core/benches/benchmark.rs @@ -40,19 +40,19 @@ fn limbo_bench(criterion: &mut Criterion) { b.iter(|| { let mut rows = stmt.query().unwrap(); match rows.next_row().unwrap() { - limbo_core::RowResult::Row(row) => { + limbo_core::StepResult::Row(row) => { assert_eq!(row.get::(0).unwrap(), 1); } - limbo_core::RowResult::IO => { + limbo_core::StepResult::IO => { io.run_once().unwrap(); } - limbo_core::RowResult::Interrupt => { + limbo_core::StepResult::Interrupt => { unreachable!(); } - limbo_core::RowResult::Done => { + limbo_core::StepResult::Done => { unreachable!(); } - limbo_core::RowResult::Busy => { + limbo_core::StepResult::Busy => { unreachable!(); } } @@ -68,19 +68,19 @@ fn limbo_bench(criterion: &mut Criterion) { b.iter(|| { let mut rows = stmt.query().unwrap(); match rows.next_row().unwrap() { - limbo_core::RowResult::Row(row) => { + limbo_core::StepResult::Row(row) => { assert_eq!(row.get::(0).unwrap(), 1); } - limbo_core::RowResult::IO => { + limbo_core::StepResult::IO => { io.run_once().unwrap(); } - limbo_core::RowResult::Interrupt => { + limbo_core::StepResult::Interrupt => { unreachable!(); } - limbo_core::RowResult::Done => { + limbo_core::StepResult::Done => { unreachable!(); } - limbo_core::RowResult::Busy => { + limbo_core::StepResult::Busy => { unreachable!() } } @@ -97,19 +97,19 @@ fn limbo_bench(criterion: &mut Criterion) { b.iter(|| { let mut rows = stmt.query().unwrap(); match rows.next_row().unwrap() { - limbo_core::RowResult::Row(row) => { + limbo_core::StepResult::Row(row) => { assert_eq!(row.get::(0).unwrap(), 1); } - limbo_core::RowResult::IO => { + limbo_core::StepResult::IO => { io.run_once().unwrap(); } - limbo_core::RowResult::Interrupt => { + limbo_core::StepResult::Interrupt => { unreachable!(); } - limbo_core::RowResult::Done => { + limbo_core::StepResult::Done => { unreachable!(); } - limbo_core::RowResult::Busy => { + limbo_core::StepResult::Busy => { unreachable!() } } diff --git a/core/lib.rs b/core/lib.rs index 255c47217..445786cdf 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -374,14 +374,14 @@ impl Statement { self.state.interrupt(); } - pub fn step(&mut self) -> Result> { + pub fn step(&mut self) -> Result> { let result = self.program.step(&mut self.state, self.pager.clone())?; match result { - vdbe::StepResult::Row(row) => Ok(RowResult::Row(Row { values: row.values })), - vdbe::StepResult::IO => Ok(RowResult::IO), - vdbe::StepResult::Done => Ok(RowResult::Done), - vdbe::StepResult::Interrupt => Ok(RowResult::Interrupt), - vdbe::StepResult::Busy => Ok(RowResult::Busy), + vdbe::StepResult::Row(row) => Ok(StepResult::Row(Row { values: row.values })), + vdbe::StepResult::IO => Ok(StepResult::IO), + vdbe::StepResult::Done => Ok(StepResult::Done), + vdbe::StepResult::Interrupt => Ok(StepResult::Interrupt), + vdbe::StepResult::Busy => Ok(StepResult::Busy), } } @@ -393,7 +393,7 @@ impl Statement { pub fn reset(&self) {} } -pub enum RowResult<'a> { +pub enum StepResult<'a> { Row(Row<'a>), IO, Done, @@ -421,7 +421,7 @@ impl Rows { Self { stmt } } - pub fn next_row(&mut self) -> Result> { + pub fn next_row(&mut self) -> Result> { self.stmt.step() } } diff --git a/core/util.rs b/core/util.rs index a57186890..dcf04ac81 100644 --- a/core/util.rs +++ b/core/util.rs @@ -4,7 +4,7 @@ use sqlite3_parser::ast::{Expr, FunctionTail, Literal}; use crate::{ schema::{self, Schema}, - Result, RowResult, Rows, IO, + Result, Rows, StepResult, IO, }; // https://sqlite.org/lang_keywords.html @@ -27,7 +27,7 @@ pub fn parse_schema_rows(rows: Option, schema: &mut Schema, io: Arc { + StepResult::Row(row) => { let ty = row.get::<&str>(0)?; if ty != "table" && ty != "index" { continue; @@ -53,14 +53,14 @@ pub fn parse_schema_rows(rows: Option, schema: &mut Schema, io: Arc continue, } } - RowResult::IO => { + StepResult::IO => { // TODO: How do we ensure that the I/O we submitted to // read the schema is actually complete? io.run_once()?; } - RowResult::Interrupt => break, - RowResult::Done => break, - RowResult::Busy => break, + StepResult::Interrupt => break, + StepResult::Done => break, + StepResult::Busy => break, } } } diff --git a/perf/latency/limbo/src/main.rs b/perf/latency/limbo/src/main.rs index c790c6bc8..b51ffb406 100644 --- a/perf/latency/limbo/src/main.rs +++ b/perf/latency/limbo/src/main.rs @@ -38,11 +38,11 @@ fn main() { loop { let row = rows.next_row().unwrap(); match row { - limbo_core::RowResult::Row(_) => { + limbo_core::StepResult::Row(_) => { count += 1; } - limbo_core::RowResult::IO => yield, - limbo_core::RowResult::Done => break, + limbo_core::StepResult::IO => yield, + limbo_core::StepResult::Done => break, } } assert!(count == 100); diff --git a/simulator/generation/plan.rs b/simulator/generation/plan.rs index 82c75c4e3..ea2392f4e 100644 --- a/simulator/generation/plan.rs +++ b/simulator/generation/plan.rs @@ -1,6 +1,6 @@ use std::{fmt::Display, rc::Rc}; -use limbo_core::{Connection, Result, RowResult}; +use limbo_core::{Connection, Result, StepResult}; use rand::SeedableRng; use rand_chacha::ChaCha8Rng; @@ -215,7 +215,7 @@ impl Interaction { let mut out = Vec::new(); while let Ok(row) = rows.next_row() { match row { - RowResult::Row(row) => { + StepResult::Row(row) => { let mut r = Vec::new(); for el in &row.values { let v = match el { @@ -230,12 +230,12 @@ impl Interaction { out.push(r); } - RowResult::IO => {} - RowResult::Interrupt => {} - RowResult::Done => { + StepResult::IO => {} + StepResult::Interrupt => {} + StepResult::Done => { break; } - RowResult::Busy => {} + StepResult::Busy => {} } } diff --git a/simulator/main.rs b/simulator/main.rs index 9f70abed2..b12018062 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -1,7 +1,7 @@ use clap::Parser; use generation::plan::{Interaction, InteractionPlan, ResultSet}; use generation::{pick_index, ArbitraryFrom}; -use limbo_core::{Connection, Database, Result, RowResult, IO}; +use limbo_core::{Connection, Database, Result, StepResult, IO}; use model::table::Value; use rand::prelude::*; use rand_chacha::ChaCha8Rng; diff --git a/sqlite3/src/lib.rs b/sqlite3/src/lib.rs index 6bd5b23d6..cd09ef62b 100644 --- a/sqlite3/src/lib.rs +++ b/sqlite3/src/lib.rs @@ -239,14 +239,14 @@ pub unsafe extern "C" fn sqlite3_step(stmt: *mut sqlite3_stmt) -> std::ffi::c_in let stmt = &mut *stmt; if let Ok(result) = stmt.stmt.step() { match result { - limbo_core::RowResult::IO => SQLITE_BUSY, - limbo_core::RowResult::Done => SQLITE_DONE, - limbo_core::RowResult::Interrupt => SQLITE_INTERRUPT, - limbo_core::RowResult::Row(row) => { + limbo_core::StepResult::IO => SQLITE_BUSY, + limbo_core::StepResult::Done => SQLITE_DONE, + limbo_core::StepResult::Interrupt => SQLITE_INTERRUPT, + limbo_core::StepResult::Row(row) => { stmt.row.replace(Some(row)); SQLITE_ROW } - limbo_core::RowResult::Busy => SQLITE_BUSY, + limbo_core::StepResult::Busy => SQLITE_BUSY, } } else { SQLITE_ERROR diff --git a/test/src/lib.rs b/test/src/lib.rs index 8bd6feea2..931c9b1bf 100644 --- a/test/src/lib.rs +++ b/test/src/lib.rs @@ -40,7 +40,7 @@ impl TempDatabase { #[cfg(test)] mod tests { use super::*; - use limbo_core::{CheckpointStatus, Connection, RowResult, Value}; + use limbo_core::{CheckpointStatus, Connection, StepResult, Value}; use log::debug; #[ignore] @@ -63,10 +63,10 @@ mod tests { match conn.query(insert_query) { Ok(Some(ref mut rows)) => loop { match rows.next_row()? { - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Done => break, + StepResult::Done => break, _ => unreachable!(), } }, @@ -80,7 +80,7 @@ mod tests { match conn.query(list_query) { Ok(Some(ref mut rows)) => loop { match rows.next_row()? { - RowResult::Row(row) => { + StepResult::Row(row) => { let first_value = row.values.first().expect("missing id"); let id = match first_value { Value::Integer(i) => *i as i32, @@ -90,12 +90,12 @@ mod tests { assert_eq!(current_read_index, id); current_read_index += 1; } - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Interrupt => break, - RowResult::Done => break, - RowResult::Busy => { + StepResult::Interrupt => break, + StepResult::Done => break, + StepResult::Busy => { panic!("Database is busy"); } } @@ -127,10 +127,10 @@ mod tests { match conn.query(insert_query) { Ok(Some(ref mut rows)) => loop { match rows.next_row()? { - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Done => break, + StepResult::Done => break, _ => unreachable!(), } }, @@ -146,7 +146,7 @@ mod tests { match conn.query(list_query) { Ok(Some(ref mut rows)) => loop { match rows.next_row()? { - RowResult::Row(row) => { + StepResult::Row(row) => { let first_value = &row.values[0]; let text = &row.values[1]; let id = match first_value { @@ -161,12 +161,12 @@ mod tests { assert_eq!(1, id); compare_string(&huge_text, text); } - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Interrupt => break, - RowResult::Done => break, - RowResult::Busy => unreachable!(), + StepResult::Interrupt => break, + StepResult::Done => break, + StepResult::Busy => unreachable!(), } }, Ok(None) => {} @@ -200,10 +200,10 @@ mod tests { match conn.query(insert_query) { Ok(Some(ref mut rows)) => loop { match rows.next_row()? { - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Done => break, + StepResult::Done => break, _ => unreachable!(), } }, @@ -219,7 +219,7 @@ mod tests { match conn.query(list_query) { Ok(Some(ref mut rows)) => loop { match rows.next_row()? { - RowResult::Row(row) => { + StepResult::Row(row) => { let first_value = &row.values[0]; let text = &row.values[1]; let id = match first_value { @@ -236,12 +236,12 @@ mod tests { compare_string(huge_text, text); current_index += 1; } - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Interrupt => break, - RowResult::Done => break, - RowResult::Busy => unreachable!(), + StepResult::Interrupt => break, + StepResult::Done => break, + StepResult::Busy => unreachable!(), } }, Ok(None) => {} @@ -269,10 +269,10 @@ mod tests { match conn.query(insert_query) { Ok(Some(ref mut rows)) => loop { match rows.next_row()? { - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Done => break, + StepResult::Done => break, _ => unreachable!(), } }, @@ -290,7 +290,7 @@ mod tests { match conn.query(list_query) { Ok(Some(ref mut rows)) => loop { match rows.next_row()? { - RowResult::Row(row) => { + StepResult::Row(row) => { let first_value = &row.values[0]; let id = match first_value { Value::Integer(i) => *i as i32, @@ -300,12 +300,12 @@ mod tests { assert_eq!(current_index, id as usize); current_index += 1; } - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Interrupt => break, - RowResult::Done => break, - RowResult::Busy => unreachable!(), + StepResult::Interrupt => break, + StepResult::Done => break, + StepResult::Busy => unreachable!(), } }, Ok(None) => {} @@ -329,10 +329,10 @@ mod tests { match conn.query(insert_query) { Ok(Some(ref mut rows)) => loop { match rows.next_row()? { - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Done => break, + StepResult::Done => break, _ => unreachable!(), } }, @@ -353,7 +353,7 @@ mod tests { if let Some(ref mut rows) = conn.query(list_query).unwrap() { loop { match rows.next_row()? { - RowResult::Row(row) => { + StepResult::Row(row) => { let first_value = &row.values[0]; let count = match first_value { Value::Integer(i) => *i as i32, @@ -362,12 +362,12 @@ mod tests { log::debug!("counted {}", count); return Ok(count as usize); } - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Interrupt => break, - RowResult::Done => break, - RowResult::Busy => panic!("Database is busy"), + StepResult::Interrupt => break, + StepResult::Done => break, + StepResult::Busy => panic!("Database is busy"), } } } @@ -436,10 +436,10 @@ mod tests { if let Some(ref mut rows) = insert_query { loop { match rows.next_row()? { - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Done => break, + StepResult::Done => break, _ => unreachable!(), } } @@ -450,17 +450,17 @@ mod tests { if let Some(ref mut rows) = select_query { loop { match rows.next_row()? { - RowResult::Row(row) => { + StepResult::Row(row) => { if let Value::Integer(id) = row.values[0] { assert_eq!(id, 1, "First insert should have rowid 1"); } } - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Interrupt => break, - RowResult::Done => break, - RowResult::Busy => panic!("Database is busy"), + StepResult::Interrupt => break, + StepResult::Done => break, + StepResult::Busy => panic!("Database is busy"), } } } @@ -469,10 +469,10 @@ mod tests { match conn.query("INSERT INTO test_rowid (id, val) VALUES (5, 'test2')") { Ok(Some(ref mut rows)) => loop { match rows.next_row()? { - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Done => break, + StepResult::Done => break, _ => unreachable!(), } }, @@ -485,17 +485,17 @@ mod tests { match conn.query("SELECT last_insert_rowid()") { Ok(Some(ref mut rows)) => loop { match rows.next_row()? { - RowResult::Row(row) => { + StepResult::Row(row) => { if let Value::Integer(id) = row.values[0] { last_id = id; } } - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Interrupt => break, - RowResult::Done => break, - RowResult::Busy => panic!("Database is busy"), + StepResult::Interrupt => break, + StepResult::Done => break, + StepResult::Busy => panic!("Database is busy"), } }, Ok(None) => {} From 75992a84d8093e4f8df9953a57266c7708c8e1b7 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Fri, 27 Dec 2024 10:30:15 +0200 Subject: [PATCH 31/39] cli: Fix unused result warnings --- cli/app.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cli/app.rs b/cli/app.rs index 0593066c5..7e6155543 100644 --- a/cli/app.rs +++ b/cli/app.rs @@ -526,7 +526,7 @@ impl Limbo { break; } Ok(StepResult::Busy) => { - self.writeln("database is busy"); + let _ = self.writeln("database is busy"); break; } Err(err) => { @@ -565,7 +565,7 @@ impl Limbo { Ok(StepResult::Interrupt) => break, Ok(StepResult::Done) => break, Ok(StepResult::Busy) => { - self.writeln("database is busy"); + let _ = self.writeln("database is busy"); break; } Err(err) => { @@ -619,7 +619,7 @@ impl Limbo { StepResult::Interrupt => break, StepResult::Done => break, StepResult::Busy => { - self.writeln("database is busy"); + let _ = self.writeln("database is busy"); break; } } @@ -676,7 +676,7 @@ impl Limbo { StepResult::Interrupt => break, StepResult::Done => break, StepResult::Busy => { - self.writeln("database is busy"); + let _ = self.writeln("database is busy"); break; } } From 244326ee572183d383b7bf4c4607b74afbd3bed1 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Fri, 27 Dec 2024 10:33:47 +0200 Subject: [PATCH 32/39] core: Remove unused imports --- core/storage/wal.rs | 3 +-- core/translate/planner.rs | 7 ++----- core/vdbe/explain.rs | 1 - 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/core/storage/wal.rs b/core/storage/wal.rs index 3cdad9263..8648efe1c 100644 --- a/core/storage/wal.rs +++ b/core/storage/wal.rs @@ -1,4 +1,4 @@ -use std::collections::{HashMap, HashSet}; +use std::collections::HashMap; use std::sync::atomic::{AtomicU32, Ordering}; use std::sync::RwLock; use std::{cell::RefCell, rc::Rc, sync::Arc}; @@ -16,7 +16,6 @@ use crate::{Completion, Page}; use self::sqlite3_ondisk::{checksum_wal, PageContent, WAL_MAGIC_BE, WAL_MAGIC_LE}; use super::buffer_pool::BufferPool; -use super::page_cache::PageCacheKey; use super::pager::{PageRef, Pager}; use super::sqlite3_ondisk::{self, begin_write_btree_page, WalHeader}; diff --git a/core/translate/planner.rs b/core/translate/planner.rs index 0bdc447f3..75e37d1a5 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -1,8 +1,5 @@ -use super::{ - optimizer::Optimizable, - plan::{ - Aggregate, BTreeTableReference, Direction, GroupBy, Plan, ResultSetColumn, SourceOperator, - }, +use super::plan::{ + Aggregate, BTreeTableReference, Direction, GroupBy, Plan, ResultSetColumn, SourceOperator, }; use crate::{ function::Func, diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index ce03a53fd..cdcbe70bb 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -1,5 +1,4 @@ use super::{Insn, InsnReference, OwnedValue, Program}; -use crate::types::LimboText; use std::rc::Rc; pub fn insn_to_str( From 464508bb298640198c130e607291934ceb55e186 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Fri, 27 Dec 2024 10:35:25 +0200 Subject: [PATCH 33/39] core/vdbe: Kill unused next_free_register() --- core/vdbe/builder.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index 8dd1cd4de..3a687b441 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -59,10 +59,6 @@ impl ProgramBuilder { reg } - pub fn next_free_register(&self) -> usize { - self.next_free_register - } - pub fn alloc_cursor_id( &mut self, table_identifier: Option, From 9680471876c027fc8b220505faa39f0e5fc1d256 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Fri, 27 Dec 2024 10:36:20 +0200 Subject: [PATCH 34/39] core: Remove unreachable pragma patterns --- core/translate/mod.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/core/translate/mod.rs b/core/translate/mod.rs index db69f1578..0a459e3fb 100644 --- a/core/translate/mod.rs +++ b/core/translate/mod.rs @@ -369,7 +369,6 @@ fn update_pragma( query_pragma("journal_mode", header, program)?; Ok(()) } - _ => todo!("pragma `{name}`"), } } @@ -396,9 +395,6 @@ fn query_pragma( dest: register, }); } - _ => { - todo!("pragma `{name}`"); - } } program.emit_insn(Insn::ResultRow { From b2f96ddfbd66fdbc9861477813a9d0c490e8863e Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Fri, 27 Dec 2024 10:39:24 +0200 Subject: [PATCH 35/39] core/translate: Remove unnecessary mut --- core/translate/planner.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/translate/planner.rs b/core/translate/planner.rs index 75e37d1a5..7eb269b9a 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -264,7 +264,7 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

{ let col_count = columns.len(); From 9dea335a0ab8afd762ffaa3b44aa371d6af27328 Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Fri, 27 Dec 2024 11:39:02 -0300 Subject: [PATCH 36/39] Add test function with regex --- testing/tester.tcl | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/testing/tester.tcl b/testing/tester.tcl index 04a43c3eb..b8cbff17f 100644 --- a/testing/tester.tcl +++ b/testing/tester.tcl @@ -26,6 +26,23 @@ proc do_execsql_test {test_name sql_statements expected_outputs} { } } +proc do_execsql_test_regex {test_name sql_statements expected_regex} { + foreach db $::test_dbs { + puts [format "(%s) %s Running test: %s" $db [string repeat " " [expr {40 - [string length $db]}]] $test_name] + set combined_sql [string trim $sql_statements] + set actual_output [evaluate_sql $::sqlite_exec $db $combined_sql] + + # Validate the actual output against the regular expression + if {![regexp $expected_regex $actual_output]} { + puts "Test FAILED: '$sql_statements'" + puts "returned '$actual_output'" + puts "expected to match regex '$expected_regex'" + exit 1 + } + } +} + + proc do_execsql_test_on_specific_db {db_name test_name sql_statements expected_outputs} { puts [format "(%s) %s Running test: %s" $db_name [string repeat " " [expr {40 - [string length $db_name]}]] $test_name] set combined_sql [string trim $sql_statements] From 2d0c16c428d8de0da8d63df969dd15dd9878f3da Mon Sep 17 00:00:00 2001 From: Diego Reis Date: Fri, 27 Dec 2024 11:39:33 -0300 Subject: [PATCH 37/39] Fix sqlite_version() out of bound --- core/translate/expr.rs | 2 +- testing/scalar-functions.test | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 734dbb98e..31463f8d6 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1555,7 +1555,7 @@ pub fn translate_expr( program.emit_insn(Insn::Copy { src_reg: output_register, dst_reg: target_register, - amount: 1, + amount: 0, }); Ok(target_register) } diff --git a/testing/scalar-functions.test b/testing/scalar-functions.test index e7f1c1b10..f04fa1765 100755 --- a/testing/scalar-functions.test +++ b/testing/scalar-functions.test @@ -809,6 +809,10 @@ do_execsql_test cast-small-float-to-numeric { SELECT typeof(CAST('1.23' AS NUMERIC)), CAST('1.23' AS NUMERIC); } {real|1.23} +do_execsql_test_regex sqlite-version-should-return-valid-output { + SELECT sqlite_version(); +} {\d+\.\d+\.\d+} + # TODO COMPAT: sqlite returns 9.22337203685478e+18, do we care...? # do_execsql_test cast-large-text-to-numeric { # SELECT typeof(CAST('9223372036854775808' AS NUMERIC)), CAST('9223372036854775808' AS NUMERIC); From 5470ea2344424f3b64ce5ef440a894cb1c678948 Mon Sep 17 00:00:00 2001 From: psvri Date: Fri, 27 Dec 2024 21:49:26 +0530 Subject: [PATCH 38/39] Add tests in like.test --- testing/like.test | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/testing/like.test b/testing/like.test index edd6ba5e5..a52b90b60 100755 --- a/testing/like.test +++ b/testing/like.test @@ -77,3 +77,15 @@ Robert|Roberts} do_execsql_test where-like-impossible { select * from products where 'foobar' like 'fooba'; } {} + +do_execsql_test like-with-backslash { + select like('\%A', '\A') +} {1} + +do_execsql_test like-with-dollar { + select like('A$%', 'A$') +} {1} + +do_execsql_test like-with-dot { + select like('%a.a', 'aaaa') +} {0} From 3bc554f27c1de2a302e16065464d5aca19cc070f Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Fri, 27 Dec 2024 18:39:38 +0200 Subject: [PATCH 39/39] core: Remove unused import --- core/translate/plan.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 4cd30d08b..abfab41fb 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -1,6 +1,5 @@ use core::fmt; use sqlite3_parser::ast; -use std::ptr::write; use std::{ fmt::{Display, Formatter}, rc::Rc,