From 76e2d98381aaa36d32883fdefdaa08f5f567693a Mon Sep 17 00:00:00 2001 From: Zaid Humayun Date: Wed, 5 Feb 2025 20:33:32 +0530 Subject: [PATCH 01/20] drop table: addresses issue https://github.com/tursodatabase/limbo/issues/894 which requires DROP TABLE to be implemented this is the initial commit is for the implementation of DROP TABLE. It adds support for the DROP TABLE instruction and adds a DropBTree instruction. It also implements the btree_drop method in btree.rs which makes use of free_page method which will be implemented via PR https://github.com/tursodatabase/limbo/pull/785 --- core/lib.rs | 5 + core/storage/btree.rs | 106 ++++++++++++++++++++ core/translate/mod.rs | 226 +++++++++++++++++++++++++++++++++++++++++- core/vdbe/explain.rs | 9 ++ core/vdbe/insn.rs | 8 ++ core/vdbe/mod.rs | 8 ++ 6 files changed, 361 insertions(+), 1 deletion(-) diff --git a/core/lib.rs b/core/lib.rs index 00889db98..b9f0a25d1 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -316,8 +316,10 @@ impl Connection { pub(crate) fn run_cmd(self: &Rc, cmd: Cmd) -> Result> { let db = self.db.clone(); let syms: &SymbolTable = &db.syms.borrow(); + println!("the command {:?}", cmd); match cmd { Cmd::Stmt(stmt) => { + println!("AST: {:?}", stmt); let program = Rc::new(translate::translate( &self.schema.borrow(), stmt, @@ -327,6 +329,7 @@ impl Connection { syms, QueryMode::Normal, )?); + println!("the generated program {:?}", program); let stmt = Statement::new(program, self.pager.clone()); Ok(Some(stmt)) } @@ -367,11 +370,13 @@ impl Connection { } pub fn execute(self: &Rc, sql: impl AsRef) -> Result<()> { + println!("running execute"); let sql = sql.as_ref(); let db = &self.db; let syms: &SymbolTable = &db.syms.borrow(); let mut parser = Parser::new(sql.as_bytes()); let cmd = parser.next()?; + println!("the command {:?}", cmd); if let Some(cmd) = cmd { match cmd { Cmd::Explain(stmt) => { diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 40d9c59d8..22f9d08ee 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -12,6 +12,7 @@ use crate::{LimboError, Result}; use std::cell::{Ref, RefCell}; use std::pin::Pin; use std::rc::Rc; +use std::sync::Arc; use super::pager::PageRef; use super::sqlite3_ondisk::{ @@ -2234,6 +2235,111 @@ impl BTreeCursor { Ok(Some(n_overflow)) } + + pub fn btree_drop(&mut self) -> Result> { + self.move_to_root(); + + loop { + let page = self.stack.top(); + return_if_locked!(page); + + if !page.is_loaded() { + self.pager.load_page(Arc::clone(&page))?; + return Ok(CursorResult::IO); + } + + let contents = page.get().contents.as_ref().unwrap(); + // TOOD: Uncomment after Krishvishal's PR https://github.com/tursodatabase/limbo/pull/785 merged + // let current_page_id = page.get().id; + + if !contents.is_leaf() { + let mut has_unprocessed_children = false; + + // Process all the regular cells first + for cell_idx in 0..contents.cell_count() { + let cell = contents.cell_get( + cell_idx, + Rc::clone(&self.pager), + self.payload_overflow_threshold_max(contents.page_type()), + self.payload_overflow_threshold_min(contents.page_type()), + self.usable_space(), + )?; + if let BTreeCell::TableInteriorCell(interior) = cell { + let child_page = + self.pager.read_page(interior._left_child_page as usize)?; + self.stack.push(child_page); + has_unprocessed_children = true; + break; + } + } + + if !has_unprocessed_children { + if let Some(rightmost) = contents.rightmost_pointer() { + let rightmost_page = self.pager.read_page(rightmost as usize)?; + self.stack.push(rightmost_page); + continue; + } + } + + if has_unprocessed_children { + continue; + } + } else { + for cell_idx in 0..contents.cell_count() { + let cell = contents.cell_get( + cell_idx, + Rc::clone(&self.pager), + self.payload_overflow_threshold_max(contents.page_type()), + self.payload_overflow_threshold_min(contents.page_type()), + self.usable_space(), + )?; + if let BTreeCell::TableLeafCell(TableLeafCell { + _rowid, + _payload, + first_overflow_page: Some(overflow_page_id), + }) = cell + { + let mut current_overflow_id = overflow_page_id; + loop { + let overflow_page = + self.pager.read_page(current_overflow_id as usize)?; + return_if_locked!(overflow_page); + + if !overflow_page.is_loaded() { + self.pager.load_page(Arc::clone(&overflow_page))?; + return Ok(CursorResult::IO); + } + + let overflow_contents = overflow_page.get().contents.as_ref().unwrap(); + let next_overflow_id = u32::from_be_bytes( + overflow_contents.as_ptr()[..4].try_into().unwrap(), + ); + + // TODO: Uncomment after Krishvishal's PR https://github.com/tursodatabase/limbo/pull/785 is merged + // self.pager + // .free_page(Some(overflow_page), current_overflow_id as usize)?; + + if next_overflow_id == 0 { + break; + } + current_overflow_id = next_overflow_id; + } + } + } + } + + // All children & overflow pages have been processed + // TODO: Uncomment after Krishvishal's PR https://github.com/tursodatabase/limbo/pull/785 is merged + // self.pager.free_page(Some(page), current_page_id)?; + + if self.stack.has_parent() { + self.stack.pop(); + } else { + break; + } + } + Ok(CursorResult::Ok(())) + } } impl PageStack { diff --git a/core/translate/mod.rs b/core/translate/mod.rs index 3ee8f4ce0..19d522813 100644 --- a/core/translate/mod.rs +++ b/core/translate/mod.rs @@ -89,7 +89,13 @@ pub fn translate( } ast::Stmt::Detach(_) => bail_parse_error!("DETACH not supported yet"), ast::Stmt::DropIndex { .. } => bail_parse_error!("DROP INDEX not supported yet"), - ast::Stmt::DropTable { .. } => bail_parse_error!("DROP TABLE not supported yet"), + // ast::Stmt::DropTable { .. } => bail_parse_error!("DROP TABLE not supported yet"), + ast::Stmt::DropTable { + if_exists, + tbl_name, + } => { + translate_drop_table(&mut program, tbl_name, if_exists, schema)?; + } ast::Stmt::DropTrigger { .. } => bail_parse_error!("DROP TRIGGER not supported yet"), ast::Stmt::DropView { .. } => bail_parse_error!("DROP VIEW not supported yet"), ast::Stmt::Pragma(name, body) => pragma::translate_pragma( @@ -515,6 +521,224 @@ fn translate_create_table( Ok(program) } +fn translate_drop_table( + program: &mut ProgramBuilder, + tbl_name: ast::QualifiedName, + if_exists: bool, + schema: &Schema, +) -> Result<()> { + let table = schema.get_table(tbl_name.name.0.as_str()); + if table.is_none() { + if if_exists { + let init_label = program.emit_init(); + let start_offset = program.offset(); + program.emit_halt(); + program.resolve_label(init_label, program.offset()); + program.emit_transaction(true); + program.emit_constant_insns(); + program.emit_goto(start_offset); + + return Ok(()); + } + bail_parse_error!("No such table: {}", tbl_name.name.0.as_str()); + } + let table = table.unwrap(); // safe to do since we have a check before this + + let init_label = program.emit_init(); + let start_offset = program.offset(); + + // 1. Drop the table BTree + program.emit_insn(Insn::DropBtree { + db: 0, + root: table.root_page, + }); + + // TODO: Drop indexes? + + // 2. Delete table metadata from sqlite_schema + let table_name = "sqlite_schema"; + let table = schema.get_table(&table_name).unwrap(); + let sqlite_schema_cursor_id = program.alloc_cursor_id( + Some(table_name.to_string()), + CursorType::BTreeTable(table.clone()), + ); + program.emit_insn(Insn::OpenWriteAsync { + cursor_id: sqlite_schema_cursor_id, + root_page: 1, + }); + program.emit_insn(Insn::OpenWriteAwait {}); + + // Rewind to the very beginning of the cursor + program.emit_insn(Insn::RewindAsync { + cursor_id: sqlite_schema_cursor_id, + }); + let end_metadata_label = program.allocate_label(); + program.emit_insn(Insn::RewindAwait { + cursor_id: sqlite_schema_cursor_id, + pc_if_empty: end_metadata_label, + }); + let metadata_loop = program.allocate_label(); + program.resolve_label(metadata_loop, program.offset()); + + // Load row details + let tbl_name_reg = program.alloc_register(); + program.emit_insn(Insn::Column { + cursor_id: sqlite_schema_cursor_id, + column: 2, + dest: tbl_name_reg, + }); + let string_reg = program.emit_string8_new_reg(tbl_name.name.0.clone()); + let next_label = program.allocate_label(); + program.emit_insn(Insn::Ne { + lhs: tbl_name_reg, + rhs: string_reg, + target_pc: next_label, + flags: CmpInsFlags::default(), + }); + + // Delete matching row + program.emit_insn(Insn::DeleteAsync { + cursor_id: sqlite_schema_cursor_id, + }); + program.emit_insn(Insn::DeleteAwait { + cursor_id: sqlite_schema_cursor_id, + }); + + // Move to next row + program.resolve_label(next_label, program.offset()); + program.emit_insn(Insn::NextAsync { + cursor_id: sqlite_schema_cursor_id, + }); + program.emit_insn(Insn::NextAwait { + cursor_id: sqlite_schema_cursor_id, + pc_if_next: metadata_loop, + }); + program.resolve_label(end_metadata_label, program.offset()); + + // Update schema + let parse_schema_where_clause = format!("tbl_name = {}", tbl_name.name.0); + program.emit_insn(Insn::ParseSchema { + db: 0, + where_clause: parse_schema_where_clause, + }); + + program.emit_halt(); + program.resolve_label(init_label, program.offset()); + program.emit_transaction(true); + program.emit_goto(start_offset); + + Ok(program) +} + +fn translate_drop_table( + program: &mut ProgramBuilder, + tbl_name: ast::QualifiedName, + if_exists: bool, + schema: &Schema, +) -> Result<()> { + let table = schema.get_table(tbl_name.name.0.as_str()); + if table.is_none() { + if if_exists { + let init_label = program.emit_init(); + let start_offset = program.offset(); + program.emit_halt(); + program.resolve_label(init_label, program.offset()); + program.emit_transaction(true); + program.emit_constant_insns(); + program.emit_goto(start_offset); + + return Ok(()); + } + bail_parse_error!("No such table: {}", tbl_name.name.0.as_str()); + } + let table = table.unwrap(); // safe to do since we have a check before this + + let init_label = program.emit_init(); + let start_offset = program.offset(); + + // 1. Drop the table BTree + program.emit_insn(Insn::DropBtree { + db: 0, + root: table.root_page, + }); + + // TODO: Drop indexes? + + // 2. Delete table metadata from sqlite_schema + let table_name = "sqlite_schema"; + let table = schema.get_table(&table_name).unwrap(); + let sqlite_schema_cursor_id = program.alloc_cursor_id( + Some(table_name.to_string()), + CursorType::BTreeTable(table.clone()), + ); + program.emit_insn(Insn::OpenWriteAsync { + cursor_id: sqlite_schema_cursor_id, + root_page: 1, + }); + program.emit_insn(Insn::OpenWriteAwait {}); + + // Rewind to the very beginning of the cursor + program.emit_insn(Insn::RewindAsync { + cursor_id: sqlite_schema_cursor_id, + }); + let end_metadata_label = program.allocate_label(); + program.emit_insn(Insn::RewindAwait { + cursor_id: sqlite_schema_cursor_id, + pc_if_empty: end_metadata_label, + }); + let metadata_loop = program.allocate_label(); + program.resolve_label(metadata_loop, program.offset()); + + // Load row details + let tbl_name_reg = program.alloc_register(); + program.emit_insn(Insn::Column { + cursor_id: sqlite_schema_cursor_id, + column: 2, + dest: tbl_name_reg, + }); + let string_reg = program.emit_string8_new_reg(tbl_name.name.0.clone()); + let next_label = program.allocate_label(); + program.emit_insn(Insn::Ne { + lhs: tbl_name_reg, + rhs: string_reg, + target_pc: next_label, + flags: CmpInsFlags::default(), + }); + + // Delete matching row + program.emit_insn(Insn::DeleteAsync { + cursor_id: sqlite_schema_cursor_id, + }); + program.emit_insn(Insn::DeleteAwait { + cursor_id: sqlite_schema_cursor_id, + }); + + // Move to next row + program.resolve_label(next_label, program.offset()); + program.emit_insn(Insn::NextAsync { + cursor_id: sqlite_schema_cursor_id, + }); + program.emit_insn(Insn::NextAwait { + cursor_id: sqlite_schema_cursor_id, + pc_if_next: metadata_loop, + }); + program.resolve_label(end_metadata_label, program.offset()); + + // Update schema + let parse_schema_where_clause = format!("tbl_name = {}", tbl_name.name.0); + program.emit_insn(Insn::ParseSchema { + db: 0, + where_clause: parse_schema_where_clause, + }); + + program.emit_halt(); + program.resolve_label(init_label, program.offset()); + program.emit_transaction(true); + program.emit_goto(start_offset); + + Ok(()) +} + enum PrimaryKeyDefinitionType<'a> { Simple { typename: Option<&'a str>, diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index 2b61310d5..ac46a5362 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -1101,6 +1101,15 @@ pub fn insn_to_str( 0, format!("r[{}]=root iDb={} flags={}", root, db, flags), ), + Insn::DropBtree { db, root } => ( + "DropBtree", + *db as i32, + *root as i32, + 0, + OwnedValue::build_text(Rc::new("".to_string())), + 0, + format!("root iDb={}", db), + ), Insn::Close { cursor_id } => ( "Close", *cursor_id as i32, diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 6b04ef462..0dbde7b8b 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -590,6 +590,14 @@ pub enum Insn { flags: usize, }, + // Drop a b-tree + DropBtree { + // The database within which this b-tree needs to be dropped (P1). + db: usize, + // The root page of this b-tree (P2). + root: usize, + }, + /// Close a cursor. Close { cursor_id: CursorID, diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index a07145dc9..71a7f6531 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -2688,6 +2688,14 @@ impl Program { state.registers[*root] = OwnedValue::Integer(root_page as i64); state.pc += 1; } + Insn::DropBtree { db, root } => { + if *db > 0 { + todo!("temp databases not implemented yet"); + } + let mut cursor = Box::new(BTreeCursor::new(pager.clone(), *root)); + cursor.btree_drop()?; + state.pc += 1; + } Insn::Close { cursor_id } => { let mut cursors = state.cursors.borrow_mut(); cursors.get_mut(*cursor_id).unwrap().take(); From b8bebf3fa32fddf1ae2aff8cb2274766ea66d01d Mon Sep 17 00:00:00 2001 From: Zaid Humayun Date: Thu, 6 Feb 2025 22:56:06 +0530 Subject: [PATCH 02/20] translate: updated the command to more closely match SQLite semantics the command for drop table translation has been updated so that it more closely matches the semantics of SQLite's drop table command. there are a few more things missing like ephemeral tables, destroy etc. --- core/translate/mod.rs | 53 +++++++++++++++++++++++++------------------ core/vdbe/explain.rs | 4 ++-- core/vdbe/insn.rs | 4 ++-- core/vdbe/mod.rs | 2 +- 4 files changed, 36 insertions(+), 27 deletions(-) diff --git a/core/translate/mod.rs b/core/translate/mod.rs index 19d522813..46b9c26eb 100644 --- a/core/translate/mod.rs +++ b/core/translate/mod.rs @@ -651,20 +651,17 @@ fn translate_drop_table( } bail_parse_error!("No such table: {}", tbl_name.name.0.as_str()); } - let table = table.unwrap(); // safe to do since we have a check before this let init_label = program.emit_init(); let start_offset = program.offset(); - // 1. Drop the table BTree - program.emit_insn(Insn::DropBtree { - db: 0, - root: table.root_page, - }); + let null_reg = program.alloc_register(); // r1 + program.emit_null(null_reg); + let tbl_name_reg = program.alloc_register(); // r2 + let table_reg = program.emit_string8_new_reg(tbl_name.name.0.clone()); // r3 + let table_type = program.emit_string8_new_reg("trigger".to_string()); // r4 + let row_id_reg = program.alloc_register(); // r5 - // TODO: Drop indexes? - - // 2. Delete table metadata from sqlite_schema let table_name = "sqlite_schema"; let table = schema.get_table(&table_name).unwrap(); let sqlite_schema_cursor_id = program.alloc_cursor_id( @@ -677,7 +674,7 @@ fn translate_drop_table( }); program.emit_insn(Insn::OpenWriteAwait {}); - // Rewind to the very beginning of the cursor + // loop to beginning of schema table program.emit_insn(Insn::RewindAsync { cursor_id: sqlite_schema_cursor_id, }); @@ -686,26 +683,37 @@ fn translate_drop_table( cursor_id: sqlite_schema_cursor_id, pc_if_empty: end_metadata_label, }); + + // start loop on schema table let metadata_loop = program.allocate_label(); program.resolve_label(metadata_loop, program.offset()); - - // Load row details - let tbl_name_reg = program.alloc_register(); program.emit_insn(Insn::Column { cursor_id: sqlite_schema_cursor_id, column: 2, dest: tbl_name_reg, }); - let string_reg = program.emit_string8_new_reg(tbl_name.name.0.clone()); let next_label = program.allocate_label(); program.emit_insn(Insn::Ne { lhs: tbl_name_reg, - rhs: string_reg, + rhs: table_reg, target_pc: next_label, flags: CmpInsFlags::default(), }); - - // Delete matching row + program.emit_insn(Insn::Column { + cursor_id: sqlite_schema_cursor_id, + column: 0, + dest: tbl_name_reg, + }); + program.emit_insn(Insn::Eq { + lhs: tbl_name_reg, + rhs: table_type, + target_pc: next_label, + flags: CmpInsFlags::default(), + }); + program.emit_insn(Insn::RowId { + cursor_id: sqlite_schema_cursor_id, + dest: row_id_reg, + }); program.emit_insn(Insn::DeleteAsync { cursor_id: sqlite_schema_cursor_id, }); @@ -713,7 +721,6 @@ fn translate_drop_table( cursor_id: sqlite_schema_cursor_id, }); - // Move to next row program.resolve_label(next_label, program.offset()); program.emit_insn(Insn::NextAsync { cursor_id: sqlite_schema_cursor_id, @@ -723,17 +730,19 @@ fn translate_drop_table( pc_if_next: metadata_loop, }); program.resolve_label(end_metadata_label, program.offset()); + // end of loop on schema table - // Update schema - let parse_schema_where_clause = format!("tbl_name = {}", tbl_name.name.0); - program.emit_insn(Insn::ParseSchema { + // 2. Drop the table structure + program.emit_insn(Insn::DropTable { db: 0, - where_clause: parse_schema_where_clause, + root: table.root_page, }); + // end of the program program.emit_halt(); program.resolve_label(init_label, program.offset()); program.emit_transaction(true); + program.emit_goto(start_offset); Ok(()) diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index ac46a5362..be0e4f92a 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -1101,8 +1101,8 @@ pub fn insn_to_str( 0, format!("r[{}]=root iDb={} flags={}", root, db, flags), ), - Insn::DropBtree { db, root } => ( - "DropBtree", + Insn::DropTable { db, root } => ( + "DropTable", *db as i32, *root as i32, 0, diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 0dbde7b8b..897842d4d 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -590,8 +590,8 @@ pub enum Insn { flags: usize, }, - // Drop a b-tree - DropBtree { + // Drop a table + DropTable { // The database within which this b-tree needs to be dropped (P1). db: usize, // The root page of this b-tree (P2). diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 71a7f6531..b28a1faf9 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -2688,7 +2688,7 @@ impl Program { state.registers[*root] = OwnedValue::Integer(root_page as i64); state.pc += 1; } - Insn::DropBtree { db, root } => { + Insn::DropTable { db, root } => { if *db > 0 { todo!("temp databases not implemented yet"); } From caab6129013e007c4e29103bbcc045753eb5b28b Mon Sep 17 00:00:00 2001 From: Zaid Humayun Date: Sat, 8 Feb 2025 18:45:59 +0530 Subject: [PATCH 03/20] resolved all conflicts after rebase from upstream/main --- core/translate/mod.rs | 128 ++++-------------------------------------- 1 file changed, 12 insertions(+), 116 deletions(-) diff --git a/core/translate/mod.rs b/core/translate/mod.rs index 46b9c26eb..3d6aba0ae 100644 --- a/core/translate/mod.rs +++ b/core/translate/mod.rs @@ -30,6 +30,7 @@ use crate::storage::sqlite3_ondisk::DatabaseHeader; use crate::translate::delete::translate_delete; use crate::util::PRIMARY_KEY_AUTOMATIC_INDEX_NAME_PREFIX; use crate::vdbe::builder::{CursorType, ProgramBuilderOpts, QueryMode}; +use crate::vdbe::insn::CmpInsFlags; use crate::vdbe::{builder::ProgramBuilder, insn::Insn, Program}; use crate::{bail_parse_error, Connection, LimboError, Result, SymbolTable}; use insert::translate_insert; @@ -93,9 +94,7 @@ pub fn translate( ast::Stmt::DropTable { if_exists, tbl_name, - } => { - translate_drop_table(&mut program, tbl_name, if_exists, schema)?; - } + } => translate_drop_table(query_mode, tbl_name, if_exists, schema)?, ast::Stmt::DropTrigger { .. } => bail_parse_error!("DROP TRIGGER not supported yet"), ast::Stmt::DropView { .. } => bail_parse_error!("DROP VIEW not supported yet"), ast::Stmt::Pragma(name, body) => pragma::translate_pragma( @@ -522,11 +521,17 @@ fn translate_create_table( } fn translate_drop_table( - program: &mut ProgramBuilder, + query_mode: QueryMode, tbl_name: ast::QualifiedName, if_exists: bool, schema: &Schema, -) -> Result<()> { +) -> Result { + let mut program = ProgramBuilder::new(ProgramBuilderOpts { + query_mode, + num_cursors: 1, + approx_num_insns: 30, + approx_num_labels: 1, + }); let table = schema.get_table(tbl_name.name.0.as_str()); if table.is_none() { if if_exists { @@ -538,116 +543,7 @@ fn translate_drop_table( program.emit_constant_insns(); program.emit_goto(start_offset); - return Ok(()); - } - bail_parse_error!("No such table: {}", tbl_name.name.0.as_str()); - } - let table = table.unwrap(); // safe to do since we have a check before this - - let init_label = program.emit_init(); - let start_offset = program.offset(); - - // 1. Drop the table BTree - program.emit_insn(Insn::DropBtree { - db: 0, - root: table.root_page, - }); - - // TODO: Drop indexes? - - // 2. Delete table metadata from sqlite_schema - let table_name = "sqlite_schema"; - let table = schema.get_table(&table_name).unwrap(); - let sqlite_schema_cursor_id = program.alloc_cursor_id( - Some(table_name.to_string()), - CursorType::BTreeTable(table.clone()), - ); - program.emit_insn(Insn::OpenWriteAsync { - cursor_id: sqlite_schema_cursor_id, - root_page: 1, - }); - program.emit_insn(Insn::OpenWriteAwait {}); - - // Rewind to the very beginning of the cursor - program.emit_insn(Insn::RewindAsync { - cursor_id: sqlite_schema_cursor_id, - }); - let end_metadata_label = program.allocate_label(); - program.emit_insn(Insn::RewindAwait { - cursor_id: sqlite_schema_cursor_id, - pc_if_empty: end_metadata_label, - }); - let metadata_loop = program.allocate_label(); - program.resolve_label(metadata_loop, program.offset()); - - // Load row details - let tbl_name_reg = program.alloc_register(); - program.emit_insn(Insn::Column { - cursor_id: sqlite_schema_cursor_id, - column: 2, - dest: tbl_name_reg, - }); - let string_reg = program.emit_string8_new_reg(tbl_name.name.0.clone()); - let next_label = program.allocate_label(); - program.emit_insn(Insn::Ne { - lhs: tbl_name_reg, - rhs: string_reg, - target_pc: next_label, - flags: CmpInsFlags::default(), - }); - - // Delete matching row - program.emit_insn(Insn::DeleteAsync { - cursor_id: sqlite_schema_cursor_id, - }); - program.emit_insn(Insn::DeleteAwait { - cursor_id: sqlite_schema_cursor_id, - }); - - // Move to next row - program.resolve_label(next_label, program.offset()); - program.emit_insn(Insn::NextAsync { - cursor_id: sqlite_schema_cursor_id, - }); - program.emit_insn(Insn::NextAwait { - cursor_id: sqlite_schema_cursor_id, - pc_if_next: metadata_loop, - }); - program.resolve_label(end_metadata_label, program.offset()); - - // Update schema - let parse_schema_where_clause = format!("tbl_name = {}", tbl_name.name.0); - program.emit_insn(Insn::ParseSchema { - db: 0, - where_clause: parse_schema_where_clause, - }); - - program.emit_halt(); - program.resolve_label(init_label, program.offset()); - program.emit_transaction(true); - program.emit_goto(start_offset); - - Ok(program) -} - -fn translate_drop_table( - program: &mut ProgramBuilder, - tbl_name: ast::QualifiedName, - if_exists: bool, - schema: &Schema, -) -> Result<()> { - let table = schema.get_table(tbl_name.name.0.as_str()); - if table.is_none() { - if if_exists { - let init_label = program.emit_init(); - let start_offset = program.offset(); - program.emit_halt(); - program.resolve_label(init_label, program.offset()); - program.emit_transaction(true); - program.emit_constant_insns(); - program.emit_goto(start_offset); - - return Ok(()); + return Ok(program); } bail_parse_error!("No such table: {}", tbl_name.name.0.as_str()); } @@ -745,7 +641,7 @@ fn translate_drop_table( program.emit_goto(start_offset); - Ok(()) + Ok(program) } enum PrimaryKeyDefinitionType<'a> { From 713465c5920ace078a415e8292868f4df27e862b Mon Sep 17 00:00:00 2001 From: Zaid Humayun Date: Sun, 9 Feb 2025 00:59:51 +0530 Subject: [PATCH 04/20] instruction: added destroy instruction added required functionality for destroy. minor cleanup of print statements --- core/lib.rs | 5 ----- core/vdbe/explain.rs | 16 ++++++++++++++++ core/vdbe/insn.rs | 10 ++++++++++ core/vdbe/mod.rs | 16 +++++++++++++--- 4 files changed, 39 insertions(+), 8 deletions(-) diff --git a/core/lib.rs b/core/lib.rs index b9f0a25d1..00889db98 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -316,10 +316,8 @@ impl Connection { pub(crate) fn run_cmd(self: &Rc, cmd: Cmd) -> Result> { let db = self.db.clone(); let syms: &SymbolTable = &db.syms.borrow(); - println!("the command {:?}", cmd); match cmd { Cmd::Stmt(stmt) => { - println!("AST: {:?}", stmt); let program = Rc::new(translate::translate( &self.schema.borrow(), stmt, @@ -329,7 +327,6 @@ impl Connection { syms, QueryMode::Normal, )?); - println!("the generated program {:?}", program); let stmt = Statement::new(program, self.pager.clone()); Ok(Some(stmt)) } @@ -370,13 +367,11 @@ impl Connection { } pub fn execute(self: &Rc, sql: impl AsRef) -> Result<()> { - println!("running execute"); let sql = sql.as_ref(); let db = &self.db; let syms: &SymbolTable = &db.syms.borrow(); let mut parser = Parser::new(sql.as_bytes()); let cmd = parser.next()?; - println!("the command {:?}", cmd); if let Some(cmd) = cmd { match cmd { Cmd::Explain(stmt) => { diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index be0e4f92a..98dd5b80e 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -1101,6 +1101,22 @@ pub fn insn_to_str( 0, format!("r[{}]=root iDb={} flags={}", root, db, flags), ), + Insn::Destroy { + root, + former_root_reg, + is_temp, + } => ( + "Destroy", + *root as i32, + *former_root_reg as i32, + *is_temp as i32, + OwnedValue::build_text(Rc::new("".to_string())), + 0, + format!( + "root iDb={} former_root={} is_temp={}", + root, former_root_reg, is_temp + ), + ), Insn::DropTable { db, root } => ( "DropTable", *db as i32, diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 897842d4d..2402d17ed 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -590,6 +590,16 @@ pub enum Insn { flags: usize, }, + /// Deletes an entire database table or index whose root page in the database file is given by P1. + Destroy { + /// The root page of the table/index to destroy + root: usize, + /// Register to store the former value of any moved root page (for AUTOVACUUM) + former_root_reg: usize, + /// Whether this is a temporary table (1) or main database table (0) + is_temp: usize, + }, + // Drop a table DropTable { // The database within which this b-tree needs to be dropped (P1). diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index b28a1faf9..f18b16f29 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -2688,14 +2688,24 @@ impl Program { state.registers[*root] = OwnedValue::Integer(root_page as i64); state.pc += 1; } - Insn::DropTable { db, root } => { - if *db > 0 { - todo!("temp databases not implemented yet"); + Insn::Destroy { + root, + former_root_reg: _, + is_temp, + } => { + if *is_temp == 1 { + todo!("temp databases not implemented yet."); } let mut cursor = Box::new(BTreeCursor::new(pager.clone(), *root)); cursor.btree_drop()?; state.pc += 1; } + Insn::DropTable { db, root: _ } => { + if *db > 0 { + todo!("temp databases not implemented yet"); + } + // TODO (Zaid): implement the functionality to clean up in-memory structures + } Insn::Close { cursor_id } => { let mut cursors = state.cursors.borrow_mut(); cursors.get_mut(*cursor_id).unwrap().take(); From 508dad77aba9926aa6683a8c3af5ded3688308cb Mon Sep 17 00:00:00 2001 From: Zaid Humayun Date: Sun, 9 Feb 2025 10:58:06 +0530 Subject: [PATCH 05/20] DROP TABLE: modified program instructions correctly emitting constant instructions and also calling Destroy instead of DropTable instruction for removing btree pages --- core/translate/mod.rs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/core/translate/mod.rs b/core/translate/mod.rs index 3d6aba0ae..fb8878df8 100644 --- a/core/translate/mod.rs +++ b/core/translate/mod.rs @@ -547,6 +547,7 @@ fn translate_drop_table( } bail_parse_error!("No such table: {}", tbl_name.name.0.as_str()); } + let table = table.unwrap(); // safe since we just checked for None let init_label = program.emit_init(); let start_offset = program.offset(); @@ -555,14 +556,16 @@ fn translate_drop_table( program.emit_null(null_reg); let tbl_name_reg = program.alloc_register(); // r2 let table_reg = program.emit_string8_new_reg(tbl_name.name.0.clone()); // r3 + program.mark_last_insn_constant(); let table_type = program.emit_string8_new_reg("trigger".to_string()); // r4 + program.mark_last_insn_constant(); let row_id_reg = program.alloc_register(); // r5 let table_name = "sqlite_schema"; - let table = schema.get_table(&table_name).unwrap(); + let schema_table = schema.get_table(&table_name).unwrap(); let sqlite_schema_cursor_id = program.alloc_cursor_id( Some(table_name.to_string()), - CursorType::BTreeTable(table.clone()), + CursorType::BTreeTable(schema_table.clone()), ); program.emit_insn(Insn::OpenWriteAsync { cursor_id: sqlite_schema_cursor_id, @@ -628,16 +631,18 @@ fn translate_drop_table( program.resolve_label(end_metadata_label, program.offset()); // end of loop on schema table - // 2. Drop the table structure - program.emit_insn(Insn::DropTable { - db: 0, + // 2. Destroy the table structure + program.emit_insn(Insn::Destroy { root: table.root_page, + former_root_reg: 0, // no autovacuum (https://www.sqlite.org/opcode.html#Destroy) + is_temp: 0, }); // end of the program program.emit_halt(); program.resolve_label(init_label, program.offset()); program.emit_transaction(true); + program.emit_constant_insns(); program.emit_goto(start_offset); From 97d87955cc1c90ed330b5d44f52b62bf724d5e28 Mon Sep 17 00:00:00 2001 From: Zaid Humayun Date: Sun, 9 Feb 2025 11:03:30 +0530 Subject: [PATCH 06/20] DROP TABLE: renamed BTreeCusor::btree_drop to BTreeCursor::btree_destroy this more closely matches semantics --- core/storage/btree.rs | 2 +- core/vdbe/mod.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 22f9d08ee..fe5347091 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -2236,7 +2236,7 @@ impl BTreeCursor { Ok(Some(n_overflow)) } - pub fn btree_drop(&mut self) -> Result> { + pub fn btree_destroy(&mut self) -> Result> { self.move_to_root(); loop { diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index f18b16f29..34be4a235 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -2697,7 +2697,7 @@ impl Program { todo!("temp databases not implemented yet."); } let mut cursor = Box::new(BTreeCursor::new(pager.clone(), *root)); - cursor.btree_drop()?; + cursor.btree_destroy()?; state.pc += 1; } Insn::DropTable { db, root: _ } => { From fbc8cd7e700b13bdd0dcfe5f8a780375219546d4 Mon Sep 17 00:00:00 2001 From: Zaid Humayun Date: Sun, 9 Feb 2025 11:16:57 +0530 Subject: [PATCH 07/20] vdbe: modified the Null instruction modified the Null instruction to more closely match SQLite semantics. Allows passing in a second register and all registers from r1..r2 area set to null --- core/translate/mod.rs | 11 ++++++++--- core/translate/pragma.rs | 2 +- core/vdbe/builder.rs | 7 ++----- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/core/translate/mod.rs b/core/translate/mod.rs index fb8878df8..1d7489ee7 100644 --- a/core/translate/mod.rs +++ b/core/translate/mod.rs @@ -224,7 +224,7 @@ fn emit_schema_entry( if let Some(sql) = sql { program.emit_string8(sql, sql_reg); } else { - program.emit_null(sql_reg); + program.emit_null(sql_reg, None); } let record_reg = program.alloc_register(); @@ -552,8 +552,8 @@ fn translate_drop_table( let init_label = program.emit_init(); let start_offset = program.offset(); - let null_reg = program.alloc_register(); // r1 - program.emit_null(null_reg); + let r1 = program.alloc_register(); // r1 + program.emit_null(r1, None); let tbl_name_reg = program.alloc_register(); // r2 let table_reg = program.emit_string8_new_reg(tbl_name.name.0.clone()); // r3 program.mark_last_insn_constant(); @@ -573,6 +573,7 @@ fn translate_drop_table( }); program.emit_insn(Insn::OpenWriteAwait {}); + // 1. Remove all entries from the schema table related to the table we are dropping, except for triggers // loop to beginning of schema table program.emit_insn(Insn::RewindAsync { cursor_id: sqlite_schema_cursor_id, @@ -638,6 +639,10 @@ fn translate_drop_table( is_temp: 0, }); + let r6 = program.alloc_register(); + let r7 = program.alloc_register(); + program.emit_null(r6, Some(r7)); + // end of the program program.emit_halt(); program.resolve_label(init_label, program.offset()); diff --git a/core/translate/pragma.rs b/core/translate/pragma.rs index 1fd738acd..eb19c31e9 100644 --- a/core/translate/pragma.rs +++ b/core/translate/pragma.rs @@ -233,7 +233,7 @@ fn query_pragma( // dflt_value match &column.default { None => { - program.emit_null(base_reg + 4); + program.emit_null(base_reg + 4, None); } Some(expr) => { program.emit_string8(expr.to_string(), base_reg + 4); diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index 716610b71..35863a6ac 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -138,11 +138,8 @@ impl ProgramBuilder { }); } - pub fn emit_null(&mut self, dest: usize) { - self.emit_insn(Insn::Null { - dest, - dest_end: None, - }); + pub fn emit_null(&mut self, dest: usize, dest_end: Option) { + self.emit_insn(Insn::Null { dest, dest_end }); } pub fn emit_result_row(&mut self, start_reg: usize, count: usize) { From 40b08c559c1326e1d89d23d7da91ac1b7d450bd8 Mon Sep 17 00:00:00 2001 From: Zaid Humayun Date: Sun, 9 Feb 2025 12:55:09 +0530 Subject: [PATCH 08/20] vdbe: modified instruction DropTable the instruction DropTable has been modified to correctly match SQLite semantics --- core/translate/mod.rs | 14 ++++++++++++++ core/vdbe/explain.rs | 13 +++++++++---- core/vdbe/insn.rs | 10 +++++++--- core/vdbe/mod.rs | 9 +++++++-- 4 files changed, 37 insertions(+), 9 deletions(-) diff --git a/core/translate/mod.rs b/core/translate/mod.rs index 1d7489ee7..f87739f45 100644 --- a/core/translate/mod.rs +++ b/core/translate/mod.rs @@ -643,6 +643,20 @@ fn translate_drop_table( let r7 = program.alloc_register(); program.emit_null(r6, Some(r7)); + // 3. TODO: Open an ephemeral table, and read over triggers from schema table into ephemeral table + // Requires support via https://github.com/tursodatabase/limbo/pull/768 + + // 4. TODO: Open a write cursor to the schema table and re-insert all triggers into the sqlite schema table from the ephemeral table and delete old trigger + // Requires support via https://github.com/tursodatabase/limbo/pull/768 + + // Drop the in-memory structures for the table + program.emit_insn(Insn::DropTable { + db: 0, + _p2: 0, + _p3: 0, + table_name: tbl_name.name.0, + }); + // end of the program program.emit_halt(); program.resolve_label(init_label, program.offset()); diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index 98dd5b80e..750c79f42 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -1117,14 +1117,19 @@ pub fn insn_to_str( root, former_root_reg, is_temp ), ), - Insn::DropTable { db, root } => ( + Insn::DropTable { + db, + _p2, + _p3, + table_name, + } => ( "DropTable", *db as i32, - *root as i32, 0, - OwnedValue::build_text(Rc::new("".to_string())), 0, - format!("root iDb={}", db), + OwnedValue::build_text(Rc::new(table_name.clone())), + 0, + format!("DROP TABLE {}", table_name), ), Insn::Close { cursor_id } => ( "Close", diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 2402d17ed..542161311 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -602,10 +602,14 @@ pub enum Insn { // Drop a table DropTable { - // The database within which this b-tree needs to be dropped (P1). + /// The database within which this b-tree needs to be dropped (P1). db: usize, - // The root page of this b-tree (P2). - root: usize, + /// unused register p2 + _p2: usize, + /// unused register p3 + _p3: usize, + // The name of the table being dropped + table_name: String, }, /// Close a cursor. diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 34be4a235..45e23fa85 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -2700,11 +2700,16 @@ impl Program { cursor.btree_destroy()?; state.pc += 1; } - Insn::DropTable { db, root: _ } => { + Insn::DropTable { + db, + _p2, + _p3, + table_name, + } => { if *db > 0 { todo!("temp databases not implemented yet"); } - // TODO (Zaid): implement the functionality to clean up in-memory structures + // TODO (Zaid): implement the functionality to clean up in-memory structures for table_name } Insn::Close { cursor_id } => { let mut cursors = state.cursors.borrow_mut(); From dc2bb7cb9b3e2727301280135632daee9df6e784 Mon Sep 17 00:00:00 2001 From: Zaid Humayun Date: Sun, 9 Feb 2025 14:07:19 +0530 Subject: [PATCH 09/20] DropTable: implementation complete added helper methods to Schema to remove table and indices from in-memory structures completed the implementation for DropTable using that --- core/schema.rs | 10 ++++++++++ core/vdbe/mod.rs | 7 ++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/core/schema.rs b/core/schema.rs index 38bcf86a5..f4a1643a9 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -30,6 +30,11 @@ impl Schema { self.tables.insert(name, table); } + pub fn remove_table(&mut self, table_name: &str) { + let name = normalize_ident(table_name); + self.tables.remove(&name); + } + pub fn get_table(&self, name: &str) -> Option> { let name = normalize_ident(name); self.tables.get(&name).cloned() @@ -42,6 +47,11 @@ impl Schema { .or_default() .push(index.clone()) } + + pub fn remove_indices_for_table(&mut self, table_name: &str) { + let name = normalize_ident(table_name); + self.indexes.remove(&name); + } } #[derive(Clone, Debug)] diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 45e23fa85..fc458157d 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -2709,7 +2709,12 @@ impl Program { if *db > 0 { todo!("temp databases not implemented yet"); } - // TODO (Zaid): implement the functionality to clean up in-memory structures for table_name + if let Some(conn) = self.connection.upgrade() { + let mut schema = RefCell::borrow_mut(&conn.schema); + schema.remove_indices_for_table(table_name); + schema.remove_table(table_name); + } + state.pc += 1; } Insn::Close { cursor_id } => { let mut cursors = state.cursors.borrow_mut(); From e0105398a6f0f7e17c83d9dcf740814cd4d58d44 Mon Sep 17 00:00:00 2001 From: Zaid Humayun Date: Sun, 9 Feb 2025 18:34:17 +0530 Subject: [PATCH 10/20] btree: modified the btree_destroy subroutine modified the btree_destroy subroutine to do an iterative DFS and use the stack cell counters to keep track of whether children have been removed. adds a unit test. --- core/storage/btree.rs | 141 ++++++++++++++++++++++++++---------------- 1 file changed, 87 insertions(+), 54 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index fe5347091..aadaa1ac2 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -2236,6 +2236,20 @@ impl BTreeCursor { Ok(Some(n_overflow)) } + /// Destroys a B-tree by freeing all its pages in an iterative depth-first order. + /// This ensures child pages are freed before their parents + /// + /// # Example + /// For a B-tree with this structure: + /// ```text + /// 1 (root) + /// / \ + /// 2 3 + /// / \ / \ + /// 4 5 6 7 + /// ``` + /// + /// The destruction order would be: [4,5,2,6,7,3,1] pub fn btree_destroy(&mut self) -> Result> { self.move_to_root(); @@ -2249,89 +2263,54 @@ impl BTreeCursor { } let contents = page.get().contents.as_ref().unwrap(); - // TOOD: Uncomment after Krishvishal's PR https://github.com/tursodatabase/limbo/pull/785 merged - // let current_page_id = page.get().id; + let current_page_id = page.get().id; + // if this node is an interior cell, process all it's children first if !contents.is_leaf() { - let mut has_unprocessed_children = false; - - // Process all the regular cells first - for cell_idx in 0..contents.cell_count() { + let cell_idx = self.stack.current_cell_index(); + // process interior nodes first + if cell_idx < contents.cell_count() as i32 { let cell = contents.cell_get( - cell_idx, + cell_idx as usize, Rc::clone(&self.pager), self.payload_overflow_threshold_max(contents.page_type()), self.payload_overflow_threshold_min(contents.page_type()), self.usable_space(), )?; + // load the actual page this cell points to if let BTreeCell::TableInteriorCell(interior) = cell { let child_page = self.pager.read_page(interior._left_child_page as usize)?; + self.stack.advance(); self.stack.push(child_page); - has_unprocessed_children = true; - break; + continue; } - } - - if !has_unprocessed_children { + // process the right child + } else if cell_idx == contents.cell_count() as i32 { if let Some(rightmost) = contents.rightmost_pointer() { let rightmost_page = self.pager.read_page(rightmost as usize)?; + self.stack.advance(); self.stack.push(rightmost_page); continue; } } - - if has_unprocessed_children { - continue; - } } else { - for cell_idx in 0..contents.cell_count() { + // if the node is a leaf node, clear overflow pages + let cell_idx = self.stack.current_cell_index(); + for i in cell_idx..contents.cell_count() as i32 { let cell = contents.cell_get( - cell_idx, + i as usize, Rc::clone(&self.pager), self.payload_overflow_threshold_max(contents.page_type()), self.payload_overflow_threshold_min(contents.page_type()), self.usable_space(), )?; - if let BTreeCell::TableLeafCell(TableLeafCell { - _rowid, - _payload, - first_overflow_page: Some(overflow_page_id), - }) = cell - { - let mut current_overflow_id = overflow_page_id; - loop { - let overflow_page = - self.pager.read_page(current_overflow_id as usize)?; - return_if_locked!(overflow_page); - - if !overflow_page.is_loaded() { - self.pager.load_page(Arc::clone(&overflow_page))?; - return Ok(CursorResult::IO); - } - - let overflow_contents = overflow_page.get().contents.as_ref().unwrap(); - let next_overflow_id = u32::from_be_bytes( - overflow_contents.as_ptr()[..4].try_into().unwrap(), - ); - - // TODO: Uncomment after Krishvishal's PR https://github.com/tursodatabase/limbo/pull/785 is merged - // self.pager - // .free_page(Some(overflow_page), current_overflow_id as usize)?; - - if next_overflow_id == 0 { - break; - } - current_overflow_id = next_overflow_id; - } - } + return_if_io!(self.clear_overflow_pages(&cell)); + self.stack.advance(); } } - // All children & overflow pages have been processed - // TODO: Uncomment after Krishvishal's PR https://github.com/tursodatabase/limbo/pull/785 is merged - // self.pager.free_page(Some(page), current_page_id)?; - + self.pager.free_page(Some(page), current_page_id)?; if self.stack.has_parent() { self.stack.pop(); } else { @@ -2986,4 +2965,58 @@ mod tests { Ok(()) } + + #[test] + fn test_btree_destroy() -> Result<()> { + let initial_size = 3; + let (pager, db_header) = setup_test_env(initial_size); + let mut cursor = BTreeCursor::new(pager.clone(), 2); + assert_eq!( + db_header.borrow().database_size, + initial_size, + "Database should initially have 3 pages" + ); + + // Initialize page 2 as a leaf page + let root_page = cursor.pager.read_page(2)?; + { + let contents = root_page.get().contents.as_mut().unwrap(); + contents.write_u8(0, PageType::TableLeaf as u8); // Set page type + contents.write_u16(1, 0); // First freeblock + contents.write_u16(3, 0); // Number of cells + contents.write_u16(5, contents.buffer.borrow().len() as u16); // Cell content area + contents.write_u8(7, 0); // Fragment bytes + } + + // Insert records until we force a split + for i in 0..100 { + let record = Record::new(vec![OwnedValue::Integer(i)]); + cursor.insert(&OwnedValue::Integer(i), &record, false)?; + } + + // Verify split occurred + assert_eq!( + db_header.borrow().database_size, + initial_size + 2, + "Split should add 2 pages" + ); + + // Track freelist state before destruction + let initial_free_pages = db_header.borrow().freelist_pages; + assert_eq!(initial_free_pages, 0, "Should start with no free pages"); + + // Destroy the btree + match cursor.btree_destroy()? { + CursorResult::Ok(_) => { + let pages_freed = db_header.borrow().freelist_pages - initial_free_pages; + // We expect 3 pages to be freed: root became interior + 2 leaf pages + assert_eq!(pages_freed, 3, "Should free 3 pages (root + 2 leaves)"); + } + CursorResult::IO => { + cursor.pager.io.run_once()?; + } + } + + Ok(()) + } } From e57e0cb4977c8b93ef8c60a7b6fd387619432c1f Mon Sep 17 00:00:00 2001 From: Zaid Humayun Date: Sun, 9 Feb 2025 19:03:36 +0530 Subject: [PATCH 11/20] tcl tests: added DROP TABLE tcl tests --- testing/drop_table.test | 56 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100755 testing/drop_table.test diff --git a/testing/drop_table.test b/testing/drop_table.test new file mode 100755 index 000000000..9b5ae8d7a --- /dev/null +++ b/testing/drop_table.test @@ -0,0 +1,56 @@ +#!/usr/bin/env tclsh + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +# Basic DROP TABLE functionality +do_execsql_test_on_specific_db {:memory:} drop-table-basic-1 { + CREATE TABLE t1(x INTEGER PRIMARY KEY); + INSERT INTO t1 VALUES (1); + INSERT INTO t1 VALUES (2); + DROP TABLE t1; + SELECT count(*) FROM sqlite_schema WHERE type='table' AND name='t1'; +} {0} + +# Test DROP TABLE IF EXISTS on existing table +do_execsql_test_on_specific_db {:memory:} drop-table-if-exists-1 { + CREATE TABLE t2(x INTEGER PRIMARY KEY); + DROP TABLE IF EXISTS t2; + SELECT count(*) FROM sqlite_schema WHERE type='table' AND name='t2'; +} {0} + +# Test DROP TABLE IF EXISTS on non-existent table +do_execsql_test_on_specific_db {:memory:} drop-table-if-exists-2 { + DROP TABLE IF EXISTS nonexistent_table; + SELECT 'success'; +} {success} + +# Test dropping table with index +do_execsql_test_on_specific_db {:memory:} drop-table-with-index-1 { + CREATE TABLE t3(x INTEGER PRIMARY KEY, y TEXT); + CREATE INDEX idx_t3_y ON t3(y); + INSERT INTO t3 VALUES(1, 'one'); + DROP TABLE t3; + SELECT count(*) FROM sqlite_schema WHERE tbl_name='t3'; +} {0} + +# Test dropping table cleans up related schema entries +do_execsql_test_on_specific_db {:memory:} drop-table-schema-cleanup-1 { + CREATE TABLE t4(x INTEGER PRIMARY KEY, y TEXT); + CREATE INDEX idx1_t4 ON t4(x); + CREATE INDEX idx2_t4 ON t4(y); + INSERT INTO t4 VALUES(1, 'one'); + DROP TABLE t4; + SELECT count(*) FROM sqlite_schema WHERE tbl_name='t4'; +} {0} + +# Test dropping table after multiple inserts and deletes +do_execsql_test_on_specific_db {:memory:} drop-table-after-ops-1 { + CREATE TABLE t6(x INTEGER PRIMARY KEY); + INSERT INTO t6 VALUES (1); + INSERT INTO t6 VALUES (2); + DELETE FROM t6 WHERE x = 1; + INSERT INTO t6 VALUES (3); + DROP TABLE t6; + SELECT count(*) FROM sqlite_schema WHERE type='table' AND name='t6'; +} {0} \ No newline at end of file From 34f390abc959b5cab7c878b62af1eef8809e874e Mon Sep 17 00:00:00 2001 From: Zaid Humayun Date: Sun, 9 Feb 2025 19:16:46 +0530 Subject: [PATCH 12/20] cleanup: removed commented code --- core/translate/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/core/translate/mod.rs b/core/translate/mod.rs index f87739f45..29055c537 100644 --- a/core/translate/mod.rs +++ b/core/translate/mod.rs @@ -90,7 +90,6 @@ pub fn translate( } ast::Stmt::Detach(_) => bail_parse_error!("DETACH not supported yet"), ast::Stmt::DropIndex { .. } => bail_parse_error!("DROP INDEX not supported yet"), - // ast::Stmt::DropTable { .. } => bail_parse_error!("DROP TABLE not supported yet"), ast::Stmt::DropTable { if_exists, tbl_name, From 07efc9b90258b8109de9eb160ebdf56c4d491aa3 Mon Sep 17 00:00:00 2001 From: Zaid Humayun Date: Mon, 10 Feb 2025 10:34:01 +0530 Subject: [PATCH 13/20] DROP TABLE: index drop added missing instructions to drop the indices for a table physically from btree pages in a loop --- core/schema.rs | 7 +++++++ core/translate/mod.rs | 21 ++++++++++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/core/schema.rs b/core/schema.rs index f4a1643a9..61ec6d384 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -48,6 +48,13 @@ impl Schema { .push(index.clone()) } + pub fn get_indices(&self, table_name: &str) -> &[Rc] { + let name = normalize_ident(table_name); + self.indexes + .get(&name) + .map_or_else(|| &[] as &[Rc], |v| v.as_slice()) + } + pub fn remove_indices_for_table(&mut self, table_name: &str) { let name = normalize_ident(table_name); self.indexes.remove(&name); diff --git a/core/translate/mod.rs b/core/translate/mod.rs index 29055c537..71e089dbf 100644 --- a/core/translate/mod.rs +++ b/core/translate/mod.rs @@ -631,7 +631,26 @@ fn translate_drop_table( program.resolve_label(end_metadata_label, program.offset()); // end of loop on schema table - // 2. Destroy the table structure + // 2. Destroy the indices within a loop + let indices = schema.get_indices(&tbl_name.name.0); + for index in indices { + program.emit_insn(Insn::Destroy { + root: index.root_page, + former_root_reg: 0, // no autovacuum (https://www.sqlite.org/opcode.html#Destroy) + is_temp: 0, + }); + let null_reg_1 = program.alloc_register(); + let null_reg_2 = program.alloc_register(); + program.emit_null(null_reg_1, Some(null_reg_2)); + + // 3. TODO: Open an ephemeral table, and read over triggers from schema table into ephemeral table + // Requires support via https://github.com/tursodatabase/limbo/pull/768 + + // 4. TODO: Open a write cursor to the schema table and re-insert all triggers into the sqlite schema table from the ephemeral table and delete old trigger + // Requires support via https://github.com/tursodatabase/limbo/pull/768 + } + + // 3. Destroy the table structure program.emit_insn(Insn::Destroy { root: table.root_page, former_root_reg: 0, // no autovacuum (https://www.sqlite.org/opcode.html#Destroy) From f7dc0d42dfeaf84853156335e06d6efb5d26cc85 Mon Sep 17 00:00:00 2001 From: Zaid Humayun Date: Sun, 16 Feb 2025 13:01:59 +0530 Subject: [PATCH 14/20] btree_destroy: implemented functionality with state machine --- core/storage/btree.rs | 273 +++++++++++++++++++++++++++++++++++------- 1 file changed, 232 insertions(+), 41 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index aadaa1ac2..a04542299 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -75,6 +75,25 @@ macro_rules! return_if_locked { }}; } +/// State machine of destroy operations +/// Keep track of traversal so that it can be resumed when IO is encountered +#[derive(Debug, Clone)] +enum DestroyState { + Start, + LoadPage, + ProcessPage, + ClearOverflowPages { + cell_idx: i32, + overflow_chain: Vec, + next_to_free: usize, + }, + FreePage, +} + +struct DestroyInfo { + state: DestroyState, +} + /// State machine of a write operation. /// May involve balancing due to overflow. #[derive(Debug, Clone, Copy)] @@ -120,6 +139,7 @@ impl WriteInfo { enum CursorState { None, Write(WriteInfo), + Destroy(DestroyInfo), } impl CursorState { @@ -135,6 +155,19 @@ impl CursorState { _ => None, } } + + fn destroy_info(&self) -> Option<&DestroyInfo> { + match self { + CursorState::Destroy(x) => Some(x), + _ => None, + } + } + fn mut_destroy_info(&mut self) -> Option<&mut DestroyInfo> { + match self { + CursorState::Destroy(x) => Some(x), + _ => None, + } + } } pub struct BTreeCursor { @@ -2251,25 +2284,85 @@ impl BTreeCursor { /// /// The destruction order would be: [4,5,2,6,7,3,1] pub fn btree_destroy(&mut self) -> Result> { - self.move_to_root(); + if let CursorState::None = &self.state { + self.move_to_root(); + self.state = CursorState::Destroy(DestroyInfo { + state: DestroyState::Start, + }); + } loop { - let page = self.stack.top(); - return_if_locked!(page); + let destroy_state = { + let destroy_info = self + .state + .mut_destroy_info() + .expect("unable to get a mut reference to destroy state in cursor"); + destroy_info.state.clone() + }; - if !page.is_loaded() { - self.pager.load_page(Arc::clone(&page))?; - return Ok(CursorResult::IO); - } + match destroy_state { + DestroyState::Start => { + let destroy_info = self + .state + .mut_destroy_info() + .expect("unable to get a mut reference to destroy state in cursor"); + destroy_info.state = DestroyState::LoadPage; + } + DestroyState::LoadPage => { + let page = self.stack.top(); + return_if_locked!(page); - let contents = page.get().contents.as_ref().unwrap(); - let current_page_id = page.get().id; + if !page.is_loaded() { + self.pager.load_page(Arc::clone(&page)); + return Ok(CursorResult::IO); + } - // if this node is an interior cell, process all it's children first - if !contents.is_leaf() { - let cell_idx = self.stack.current_cell_index(); - // process interior nodes first - if cell_idx < contents.cell_count() as i32 { + let destroy_info = self + .state + .mut_destroy_info() + .expect("unable to get a mut reference to destroy state in cursor"); + destroy_info.state = DestroyState::ProcessPage; + } + DestroyState::ProcessPage => { + let page = self.stack.top(); + assert!(page.is_loaded()); // page should be loaded at this time + + let contents = page.get().contents.as_ref().unwrap(); + + let cell_idx = self.stack.current_cell_index(); + if cell_idx > contents.cell_count() as i32 { + // If all the cells in this page have been processed, move state machine to freeing current page + let destroy_info = self + .state + .mut_destroy_info() + .expect("unable to get a mut reference to destroy state in cursor"); + destroy_info.state = DestroyState::FreePage; + continue; + } else if cell_idx == contents.cell_count() as i32 { + // At the last cell index + // If interior page, free right page + if !contents.is_leaf() { + if let Some(rightmost) = contents.rightmost_pointer() { + let rightmost_page = self.pager.read_page(rightmost as usize)?; + self.stack.advance(); + self.stack.push(rightmost_page); + let destroy_info = self.state.mut_destroy_info().expect( + "unable to get a mut reference to destroy state in cursor", + ); + destroy_info.state = DestroyState::LoadPage; + continue; + } + } + // If leaf page, free current page + let destroy_info = self + .state + .mut_destroy_info() + .expect("unable to get a mut reference to destroy state in cursor"); + destroy_info.state = DestroyState::FreePage; + continue; + } + + // There are still cells left to process in this page let cell = contents.cell_get( cell_idx as usize, Rc::clone(&self.pager), @@ -2277,47 +2370,145 @@ impl BTreeCursor { self.payload_overflow_threshold_min(contents.page_type()), self.usable_space(), )?; - // load the actual page this cell points to - if let BTreeCell::TableInteriorCell(interior) = cell { - let child_page = - self.pager.read_page(interior._left_child_page as usize)?; + if !contents.is_leaf() { + // Load the left child of this interior cell + let child_page_id = match &cell { + BTreeCell::TableInteriorCell(cell) => cell._left_child_page, + BTreeCell::IndexInteriorCell(cell) => cell.left_child_page, + _ => panic!("expected interior cell"), + }; + let child_page = self.pager.read_page(child_page_id as usize)?; self.stack.advance(); self.stack.push(child_page); + let destroy_info = self + .state + .mut_destroy_info() + .expect("unable to get a mut reference to destroy state in cursor"); + destroy_info.state = DestroyState::LoadPage; continue; - } - // process the right child - } else if cell_idx == contents.cell_count() as i32 { - if let Some(rightmost) = contents.rightmost_pointer() { - let rightmost_page = self.pager.read_page(rightmost as usize)?; - self.stack.advance(); - self.stack.push(rightmost_page); - continue; + } else { + // Clear the overflow pages for this leaf cell + let destroy_info = self + .state + .mut_destroy_info() + .expect("unable to get a mut reference to destroy state in cursor"); + destroy_info.state = DestroyState::ClearOverflowPages { + cell_idx: self.stack.current_cell_index(), + overflow_chain: Vec::new(), + next_to_free: 0, + }; } } - } else { - // if the node is a leaf node, clear overflow pages - let cell_idx = self.stack.current_cell_index(); - for i in cell_idx..contents.cell_count() as i32 { + DestroyState::ClearOverflowPages { cell_idx, .. } => { + let page = self.stack.top(); + let contents = page.get().contents.as_ref().unwrap(); let cell = contents.cell_get( - i as usize, + cell_idx as usize, Rc::clone(&self.pager), self.payload_overflow_threshold_max(contents.page_type()), self.payload_overflow_threshold_min(contents.page_type()), self.usable_space(), )?; - return_if_io!(self.clear_overflow_pages(&cell)); - self.stack.advance(); + + match self.clear_overflow_pages(&cell)? { + CursorResult::Ok(_) => { + let destroy_info = self + .state + .mut_destroy_info() + .expect("unable to get a mut reference to destroy state in cursor"); + destroy_info.state = DestroyState::ProcessPage; + self.stack.advance(); + } + CursorResult::IO => return Ok(CursorResult::IO), + } + } + DestroyState::FreePage => { + let page = self.stack.top(); + let page_id = page.get().id; + + self.pager.free_page(Some(page), page_id)?; + + if self.stack.has_parent() { + self.stack.pop(); + let destroy_info = self + .state + .mut_destroy_info() + .expect("unable to get a mut reference to destroy state in cursor"); + destroy_info.state = DestroyState::ProcessPage; + } else { + self.state = CursorState::None; + return Ok(CursorResult::Ok(())); + } } } - - self.pager.free_page(Some(page), current_page_id)?; - if self.stack.has_parent() { - self.stack.pop(); - } else { - break; - } } - Ok(CursorResult::Ok(())) + // self.move_to_root(); + + // loop { + // let page = self.stack.top(); + // return_if_locked!(page); + + // if !page.is_loaded() { + // self.pager.load_page(Arc::clone(&page))?; + // return Ok(CursorResult::IO); + // } + + // let contents = page.get().contents.as_ref().unwrap(); + // let current_page_id = page.get().id; + + // // if this node is an interior cell, process all it's children first + // if !contents.is_leaf() { + // let cell_idx = self.stack.current_cell_index(); + // // process interior nodes first + // if cell_idx < contents.cell_count() as i32 { + // let cell = contents.cell_get( + // cell_idx as usize, + // Rc::clone(&self.pager), + // self.payload_overflow_threshold_max(contents.page_type()), + // self.payload_overflow_threshold_min(contents.page_type()), + // self.usable_space(), + // )?; + // // load the actual page this cell points to + // if let BTreeCell::TableInteriorCell(interior) = cell { + // let child_page = + // self.pager.read_page(interior._left_child_page as usize)?; + // self.stack.advance(); + // self.stack.push(child_page); + // continue; + // } + // // process the right child + // } else if cell_idx == contents.cell_count() as i32 { + // if let Some(rightmost) = contents.rightmost_pointer() { + // let rightmost_page = self.pager.read_page(rightmost as usize)?; + // self.stack.advance(); + // self.stack.push(rightmost_page); + // continue; + // } + // } + // } else { + // // if the node is a leaf node, clear overflow pages + // let cell_idx = self.stack.current_cell_index(); + // for i in cell_idx..contents.cell_count() as i32 { + // let cell = contents.cell_get( + // i as usize, + // Rc::clone(&self.pager), + // self.payload_overflow_threshold_max(contents.page_type()), + // self.payload_overflow_threshold_min(contents.page_type()), + // self.usable_space(), + // )?; + // return_if_io!(self.clear_overflow_pages(&cell)); + // self.stack.advance(); + // } + // } + + // self.pager.free_page(Some(page), current_page_id)?; + // if self.stack.has_parent() { + // self.stack.pop(); + // } else { + // break; + // } + // } + // Ok(CursorResult::Ok(())) } } From 8ce305ad4401ad0ff83e5d833976fd86e93c9950 Mon Sep 17 00:00:00 2001 From: Zaid Humayun Date: Sun, 16 Feb 2025 14:47:57 +0530 Subject: [PATCH 15/20] fixed compiler errors after rebase --- core/vdbe/explain.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index 750c79f42..6733ebb45 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -1110,7 +1110,7 @@ pub fn insn_to_str( *root as i32, *former_root_reg as i32, *is_temp as i32, - OwnedValue::build_text(Rc::new("".to_string())), + OwnedValue::build_text(&Rc::new("".to_string())), 0, format!( "root iDb={} former_root={} is_temp={}", @@ -1127,7 +1127,7 @@ pub fn insn_to_str( *db as i32, 0, 0, - OwnedValue::build_text(Rc::new(table_name.clone())), + OwnedValue::build_text(&Rc::new(table_name.clone())), 0, format!("DROP TABLE {}", table_name), ), From 2ff27834ffd20ffd796091a5bb3e0971c1ef514b Mon Sep 17 00:00:00 2001 From: Zaid Humayun Date: Sun, 16 Feb 2025 14:49:11 +0530 Subject: [PATCH 16/20] fix: addresses comment https://github.com/tursodatabase/limbo/pull/897#discussion_r1948154297 by @pereman2 this commit adds the final touches to the state machine for btree_destroy and also introduces a state machine for clear_overflow_pages this addresses the issue of IO interrupting an operation in progress and solves the following problems: * performance issues with repeating an operation * missing page issues with clear overflow pages --- core/storage/btree.rs | 227 ++++++++++++------------------------------ 1 file changed, 64 insertions(+), 163 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index a04542299..16a853cec 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -16,8 +16,7 @@ use std::sync::Arc; use super::pager::PageRef; use super::sqlite3_ondisk::{ - payload_overflows, write_varint_to_vec, IndexInteriorCell, IndexLeafCell, OverflowCell, - DATABASE_HEADER_SIZE, + write_varint_to_vec, IndexInteriorCell, IndexLeafCell, OverflowCell, DATABASE_HEADER_SIZE, }; /* @@ -82,11 +81,7 @@ enum DestroyState { Start, LoadPage, ProcessPage, - ClearOverflowPages { - cell_idx: i32, - overflow_chain: Vec, - next_to_free: usize, - }, + ClearOverflowPages { cell_idx: i32 }, FreePage, } @@ -170,6 +165,12 @@ impl CursorState { } } +enum OverflowState { + Start, + ProcessPage { next_page: u32 }, + Done, +} + pub struct BTreeCursor { pager: Rc, /// Page id of the root page used to go back up fast. @@ -184,6 +185,9 @@ pub struct BTreeCursor { going_upwards: bool, /// Information maintained across execution attempts when an operation yields due to I/O. state: CursorState, + /// Information maintained while freeing overflow pages. Maintained separately from cursor state since + /// any method could require freeing overflow pages + overflow_state: Option, /// Page stack used to traverse the btree. /// Each cursor has a stack because each cursor traverses the btree independently. stack: PageStack, @@ -217,6 +221,7 @@ impl BTreeCursor { null_flag: false, going_upwards: false, state: CursorState::None, + overflow_state: None, stack: PageStack { current_page: RefCell::new(-1), cell_indices: RefCell::new([0; BTCURSOR_MAX_DEPTH + 1]), @@ -2183,106 +2188,71 @@ impl BTreeCursor { id as u32 } - fn clear_overflow_pages(&self, cell: &BTreeCell) -> Result> { - // Get overflow info based on cell type - let (first_overflow_page, n_overflow) = match cell { - BTreeCell::TableLeafCell(leaf_cell) => { - match self.calculate_overflow_info(leaf_cell._payload.len(), PageType::TableLeaf)? { - Some(n_overflow) => (leaf_cell.first_overflow_page, n_overflow), - None => return Ok(CursorResult::Ok(())), + fn clear_overflow_pages(&mut self, cell: &BTreeCell) -> Result> { + loop { + let state = self.overflow_state.take().unwrap_or(OverflowState::Start); + + match state { + OverflowState::Start => { + let first_overflow_page = match cell { + BTreeCell::TableLeafCell(leaf_cell) => leaf_cell.first_overflow_page, + BTreeCell::IndexLeafCell(leaf_cell) => leaf_cell.first_overflow_page, + BTreeCell::IndexInteriorCell(interior_cell) => { + interior_cell.first_overflow_page + } + BTreeCell::TableInteriorCell(_) => return Ok(CursorResult::Ok(())), // No overflow pages + }; + + if let Some(page) = first_overflow_page { + self.overflow_state = Some(OverflowState::ProcessPage { next_page: page }); + continue; + } else { + self.overflow_state = Some(OverflowState::Done); + } } - } - BTreeCell::IndexLeafCell(leaf_cell) => { - match self.calculate_overflow_info(leaf_cell.payload.len(), PageType::IndexLeaf)? { - Some(n_overflow) => (leaf_cell.first_overflow_page, n_overflow), - None => return Ok(CursorResult::Ok(())), + OverflowState::ProcessPage { next_page } => { + if next_page < 2 + || next_page as usize > self.pager.db_header.borrow().database_size as usize + { + self.overflow_state = None; + return Err(LimboError::Corrupt("Invalid overflow page number".into())); + } + let page = self.pager.read_page(next_page as usize)?; + return_if_locked!(page); + + let contents = page.get().contents.as_ref().unwrap(); + let next = contents.read_u32(0); + + self.pager.free_page(Some(page), next_page as usize)?; + + if next != 0 { + self.overflow_state = Some(OverflowState::ProcessPage { next_page: next }); + } else { + self.overflow_state = Some(OverflowState::Done); + } } - } - BTreeCell::IndexInteriorCell(interior_cell) => { - match self - .calculate_overflow_info(interior_cell.payload.len(), PageType::IndexInterior)? - { - Some(n_overflow) => (interior_cell.first_overflow_page, n_overflow), - None => return Ok(CursorResult::Ok(())), + OverflowState::Done => { + self.overflow_state = None; + return Ok(CursorResult::Ok(())); } - } - BTreeCell::TableInteriorCell(_) => return Ok(CursorResult::Ok(())), // No overflow pages - }; - - let Some(first_page) = first_overflow_page else { - return Ok(CursorResult::Ok(())); - }; - let page_count = self.pager.db_header.borrow().database_size as usize; - let mut pages_left = n_overflow; - let mut current_page = first_page; - // Clear overflow pages - while pages_left > 0 { - pages_left -= 1; - - // Validate overflow page number - if current_page < 2 || current_page as usize > page_count { - return Err(LimboError::Corrupt("Invalid overflow page number".into())); - } - - let page = self.pager.read_page(current_page as usize)?; - return_if_locked!(page); - let contents = page.get().contents.as_ref().unwrap(); - - let next_page = if pages_left > 0 { - contents.read_u32(0) - } else { - 0 }; - - // Free the current page - self.pager.free_page(Some(page), current_page as usize)?; - - current_page = next_page; } - Ok(CursorResult::Ok(())) - } - - fn calculate_overflow_info( - &self, - payload_len: usize, - page_type: PageType, - ) -> Result> { - let max_local = self.payload_overflow_threshold_max(page_type.clone()); - let min_local = self.payload_overflow_threshold_min(page_type.clone()); - let usable_size = self.usable_space(); - - let (_, local_size) = payload_overflows(payload_len, max_local, min_local, usable_size); - - assert!( - local_size != payload_len, - "Trying to clear overflow pages when there are no overflow pages" - ); - - // Calculate expected overflow pages - let overflow_page_size = self.usable_space() - 4; - let n_overflow = - (payload_len - local_size + overflow_page_size).div_ceil(overflow_page_size); - if n_overflow == 0 { - return Err(LimboError::Corrupt("Invalid overflow calculation".into())); - } - - Ok(Some(n_overflow)) } /// Destroys a B-tree by freeing all its pages in an iterative depth-first order. /// This ensures child pages are freed before their parents /// /// # Example - /// For a B-tree with this structure: + /// For a B-tree with this structure (where 4' is an overflow page): /// ```text /// 1 (root) /// / \ /// 2 3 /// / \ / \ - /// 4 5 6 7 + /// 4' <- 4 5 6 7 /// ``` /// - /// The destruction order would be: [4,5,2,6,7,3,1] + /// The destruction order would be: [4',4,5,2,6,7,3,1] pub fn btree_destroy(&mut self) -> Result> { if let CursorState::None = &self.state { self.move_to_root(); @@ -2295,7 +2265,7 @@ impl BTreeCursor { let destroy_state = { let destroy_info = self .state - .mut_destroy_info() + .destroy_info() .expect("unable to get a mut reference to destroy state in cursor"); destroy_info.state.clone() }; @@ -2313,7 +2283,7 @@ impl BTreeCursor { return_if_locked!(page); if !page.is_loaded() { - self.pager.load_page(Arc::clone(&page)); + self.pager.load_page(Arc::clone(&page))?; return Ok(CursorResult::IO); } @@ -2394,12 +2364,10 @@ impl BTreeCursor { .expect("unable to get a mut reference to destroy state in cursor"); destroy_info.state = DestroyState::ClearOverflowPages { cell_idx: self.stack.current_cell_index(), - overflow_chain: Vec::new(), - next_to_free: 0, }; } } - DestroyState::ClearOverflowPages { cell_idx, .. } => { + DestroyState::ClearOverflowPages { cell_idx } => { let page = self.stack.top(); let contents = page.get().contents.as_ref().unwrap(); let cell = contents.cell_get( @@ -2442,73 +2410,6 @@ impl BTreeCursor { } } } - // self.move_to_root(); - - // loop { - // let page = self.stack.top(); - // return_if_locked!(page); - - // if !page.is_loaded() { - // self.pager.load_page(Arc::clone(&page))?; - // return Ok(CursorResult::IO); - // } - - // let contents = page.get().contents.as_ref().unwrap(); - // let current_page_id = page.get().id; - - // // if this node is an interior cell, process all it's children first - // if !contents.is_leaf() { - // let cell_idx = self.stack.current_cell_index(); - // // process interior nodes first - // if cell_idx < contents.cell_count() as i32 { - // let cell = contents.cell_get( - // cell_idx as usize, - // Rc::clone(&self.pager), - // self.payload_overflow_threshold_max(contents.page_type()), - // self.payload_overflow_threshold_min(contents.page_type()), - // self.usable_space(), - // )?; - // // load the actual page this cell points to - // if let BTreeCell::TableInteriorCell(interior) = cell { - // let child_page = - // self.pager.read_page(interior._left_child_page as usize)?; - // self.stack.advance(); - // self.stack.push(child_page); - // continue; - // } - // // process the right child - // } else if cell_idx == contents.cell_count() as i32 { - // if let Some(rightmost) = contents.rightmost_pointer() { - // let rightmost_page = self.pager.read_page(rightmost as usize)?; - // self.stack.advance(); - // self.stack.push(rightmost_page); - // continue; - // } - // } - // } else { - // // if the node is a leaf node, clear overflow pages - // let cell_idx = self.stack.current_cell_index(); - // for i in cell_idx..contents.cell_count() as i32 { - // let cell = contents.cell_get( - // i as usize, - // Rc::clone(&self.pager), - // self.payload_overflow_threshold_max(contents.page_type()), - // self.payload_overflow_threshold_min(contents.page_type()), - // self.usable_space(), - // )?; - // return_if_io!(self.clear_overflow_pages(&cell)); - // self.stack.advance(); - // } - // } - - // self.pager.free_page(Some(page), current_page_id)?; - // if self.stack.has_parent() { - // self.stack.pop(); - // } else { - // break; - // } - // } - // Ok(CursorResult::Ok(())) } } @@ -3021,7 +2922,7 @@ mod tests { #[test] fn test_clear_overflow_pages() -> Result<()> { let (pager, db_header) = setup_test_env(5); - let cursor = BTreeCursor::new(pager.clone(), 1); + let mut cursor = BTreeCursor::new(pager.clone(), 1); let max_local = cursor.payload_overflow_threshold_max(PageType::TableLeaf); let usable_size = cursor.usable_space(); @@ -3118,7 +3019,7 @@ mod tests { #[test] fn test_clear_overflow_pages_no_overflow() -> Result<()> { let (pager, db_header) = setup_test_env(5); - let cursor = BTreeCursor::new(pager.clone(), 1); + let mut cursor = BTreeCursor::new(pager.clone(), 1); let small_payload = vec![b'A'; 10]; From 77815b20d709d38e225114bfa81d57b0a6a8c2be Mon Sep 17 00:00:00 2001 From: Zaid Humayun Date: Sun, 16 Feb 2025 14:55:08 +0530 Subject: [PATCH 17/20] docs: added documentation for btree_destroy and clear_overflow_pages --- core/storage/btree.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 16a853cec..e5824667e 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -2188,6 +2188,9 @@ impl BTreeCursor { id as u32 } + /// Clear the overflow pages linked to a specific page provided by the leaf cell + /// Uses a state machine to keep track of it's operations so that traversal can be + /// resumed from last point after IO interruption fn clear_overflow_pages(&mut self, cell: &BTreeCell) -> Result> { loop { let state = self.overflow_state.take().unwrap_or(OverflowState::Start); @@ -2241,6 +2244,7 @@ impl BTreeCursor { /// Destroys a B-tree by freeing all its pages in an iterative depth-first order. /// This ensures child pages are freed before their parents + /// Uses a state machine to keep track of the operation to ensure IO doesn't cause repeated traversals /// /// # Example /// For a B-tree with this structure (where 4' is an overflow page): From fdf4166a7c77ed029c374978249f63f567f7168f Mon Sep 17 00:00:00 2001 From: Zaid Humayun Date: Sun, 23 Feb 2025 12:41:17 +0530 Subject: [PATCH 18/20] fix: addresses comment https://github.com/tursodatabase/limbo/pull/897#discussion_r1962102119 by @pereman2 this commit changes the contents of the state machine state for ClearOverflowPages to contain the BTreeCell instead of the cell index. this avoids having to repeatedly do an operation to get the BTreeCell from the cell index each time the state is reached --- core/storage/btree.rs | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index e5824667e..b8fbb077c 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -81,7 +81,7 @@ enum DestroyState { Start, LoadPage, ProcessPage, - ClearOverflowPages { cell_idx: i32 }, + ClearOverflowPages { cell: BTreeCell }, FreePage, } @@ -2362,26 +2362,21 @@ impl BTreeCursor { continue; } else { // Clear the overflow pages for this leaf cell + let cell = contents.cell_get( + cell_idx as usize, + Rc::clone(&self.pager), + self.payload_overflow_threshold_max(contents.page_type()), + self.payload_overflow_threshold_min(contents.page_type()), + self.usable_space(), + )?; let destroy_info = self .state .mut_destroy_info() .expect("unable to get a mut reference to destroy state in cursor"); - destroy_info.state = DestroyState::ClearOverflowPages { - cell_idx: self.stack.current_cell_index(), - }; + destroy_info.state = DestroyState::ClearOverflowPages { cell }; } } - DestroyState::ClearOverflowPages { cell_idx } => { - let page = self.stack.top(); - let contents = page.get().contents.as_ref().unwrap(); - let cell = contents.cell_get( - cell_idx as usize, - Rc::clone(&self.pager), - self.payload_overflow_threshold_max(contents.page_type()), - self.payload_overflow_threshold_min(contents.page_type()), - self.usable_space(), - )?; - + DestroyState::ClearOverflowPages { cell } => { match self.clear_overflow_pages(&cell)? { CursorResult::Ok(_) => { let destroy_info = self From 662b34c8b69534e42c62bd4d8d93f73c2fe08052 Mon Sep 17 00:00:00 2001 From: Zaid Humayun Date: Sun, 23 Feb 2025 16:12:28 +0530 Subject: [PATCH 19/20] fix: addresses comments https://github.com/tursodatabase/limbo/pull/897#discussion_r1962100891 and https://github.com/tursodatabase/limbo/pull/897#discussion_r1962100299 by @pereman2 this commit introduces the ability of the state machine traversal to handle index interior cell overflow pages. it also makes the state machine states more explicit with match arms. --- core/storage/btree.rs | 161 +++++++++++++++++++++++++----------------- 1 file changed, 98 insertions(+), 63 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index b8fbb077c..d6e00cde9 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -2302,41 +2302,52 @@ impl BTreeCursor { assert!(page.is_loaded()); // page should be loaded at this time let contents = page.get().contents.as_ref().unwrap(); - let cell_idx = self.stack.current_cell_index(); - if cell_idx > contents.cell_count() as i32 { - // If all the cells in this page have been processed, move state machine to freeing current page - let destroy_info = self - .state - .mut_destroy_info() - .expect("unable to get a mut reference to destroy state in cursor"); - destroy_info.state = DestroyState::FreePage; - continue; - } else if cell_idx == contents.cell_count() as i32 { - // At the last cell index - // If interior page, free right page - if !contents.is_leaf() { - if let Some(rightmost) = contents.rightmost_pointer() { - let rightmost_page = self.pager.read_page(rightmost as usize)?; - self.stack.advance(); - self.stack.push(rightmost_page); + + // If we've processed all cells in this page, figure out what to do with this page + if cell_idx >= contents.cell_count() as i32 { + match (contents.is_leaf(), cell_idx) { + // Leaf pages with all cells processed + (true, n) if n >= contents.cell_count() as i32 => { let destroy_info = self.state.mut_destroy_info().expect( "unable to get a mut reference to destroy state in cursor", ); - destroy_info.state = DestroyState::LoadPage; + destroy_info.state = DestroyState::FreePage; continue; } + // Non-leaf page which has processed all children but not it's potential right child + (false, n) if n == contents.cell_count() as i32 => { + if let Some(rightmost) = contents.rightmost_pointer() { + let rightmost_page = + self.pager.read_page(rightmost as usize)?; + self.stack.advance(); + self.stack.push(rightmost_page); + let destroy_info = self.state.mut_destroy_info().expect( + "unable to get a mut reference to destroy state in cursor", + ); + destroy_info.state = DestroyState::LoadPage; + } else { + let destroy_info = self.state.mut_destroy_info().expect( + "unable to get a mut reference to destroy state in cursor", + ); + destroy_info.state = DestroyState::FreePage; + } + continue; + } + // Non-leaf page which has processed all children and it's right child + (false, n) if n > contents.cell_count() as i32 => { + let destroy_info = self.state.mut_destroy_info().expect( + "unable to get a mut reference to destroy state in cursor", + ); + destroy_info.state = DestroyState::FreePage; + continue; + } + _ => unreachable!("Invalid cell idx state"), } - // If leaf page, free current page - let destroy_info = self - .state - .mut_destroy_info() - .expect("unable to get a mut reference to destroy state in cursor"); - destroy_info.state = DestroyState::FreePage; - continue; } - // There are still cells left to process in this page + // We have not yet processed all cells in this page + // Get the current cell let cell = contents.cell_get( cell_idx as usize, Rc::clone(&self.pager), @@ -2344,48 +2355,72 @@ impl BTreeCursor { self.payload_overflow_threshold_min(contents.page_type()), self.usable_space(), )?; - if !contents.is_leaf() { - // Load the left child of this interior cell - let child_page_id = match &cell { - BTreeCell::TableInteriorCell(cell) => cell._left_child_page, - BTreeCell::IndexInteriorCell(cell) => cell.left_child_page, - _ => panic!("expected interior cell"), - }; - let child_page = self.pager.read_page(child_page_id as usize)?; - self.stack.advance(); - self.stack.push(child_page); - let destroy_info = self - .state - .mut_destroy_info() - .expect("unable to get a mut reference to destroy state in cursor"); - destroy_info.state = DestroyState::LoadPage; - continue; - } else { - // Clear the overflow pages for this leaf cell - let cell = contents.cell_get( - cell_idx as usize, - Rc::clone(&self.pager), - self.payload_overflow_threshold_max(contents.page_type()), - self.payload_overflow_threshold_min(contents.page_type()), - self.usable_space(), - )?; - let destroy_info = self - .state - .mut_destroy_info() - .expect("unable to get a mut reference to destroy state in cursor"); - destroy_info.state = DestroyState::ClearOverflowPages { cell }; - } - } - DestroyState::ClearOverflowPages { cell } => { - match self.clear_overflow_pages(&cell)? { - CursorResult::Ok(_) => { + + match contents.is_leaf() { + // For a leaf cell, clear the overflow pages associated with this cell + true => { let destroy_info = self .state .mut_destroy_info() .expect("unable to get a mut reference to destroy state in cursor"); - destroy_info.state = DestroyState::ProcessPage; - self.stack.advance(); + destroy_info.state = DestroyState::ClearOverflowPages { cell }; + continue; } + // For interior cells, check the type of cell to determine what to do + false => match &cell { + // For index interior cells, remove the overflow pages + BTreeCell::IndexInteriorCell(_) => { + let destroy_info = self.state.mut_destroy_info().expect( + "unable to get a mut reference to destroy state in cursor", + ); + destroy_info.state = DestroyState::ClearOverflowPages { cell }; + continue; + } + // For all other interior cells, load the left child page + _ => { + let child_page_id = match &cell { + BTreeCell::TableInteriorCell(cell) => cell._left_child_page, + BTreeCell::IndexInteriorCell(cell) => cell.left_child_page, + _ => panic!("expected interior cell"), + }; + let child_page = self.pager.read_page(child_page_id as usize)?; + self.stack.advance(); + self.stack.push(child_page); + let destroy_info = self.state.mut_destroy_info().expect( + "unable to get a mut reference to destroy state in cursor", + ); + destroy_info.state = DestroyState::LoadPage; + continue; + } + }, + } + } + DestroyState::ClearOverflowPages { cell } => { + match self.clear_overflow_pages(&cell)? { + CursorResult::Ok(_) => match cell { + // For an index interior cell, clear the left child page now that overflow pages have been cleared + BTreeCell::IndexInteriorCell(index_int_cell) => { + let child_page = self + .pager + .read_page(index_int_cell.left_child_page as usize)?; + self.stack.advance(); + self.stack.push(child_page); + let destroy_info = self.state.mut_destroy_info().expect( + "unable to get a mut reference to destroy state in cursor", + ); + destroy_info.state = DestroyState::LoadPage; + continue; + } + // For any leaf cell, advance the index now that overflow pages have been cleared + BTreeCell::TableLeafCell(_) | BTreeCell::IndexLeafCell(_) => { + self.stack.advance(); + let destroy_info = self.state.mut_destroy_info().expect( + "unable to get a mut reference to destroy state in cursor", + ); + destroy_info.state = DestroyState::LoadPage; + } + _ => panic!("unexpected cell type"), + }, CursorResult::IO => return Ok(CursorResult::IO), } } From 2f14b5ddbcc644918c3f61a358c78dee99253482 Mon Sep 17 00:00:00 2001 From: Zaid Humayun Date: Sat, 1 Mar 2025 01:26:00 +0530 Subject: [PATCH 20/20] fix: addresses comment https://github.com/tursodatabase/limbo/pull/897#discussion_r1975723452 renames the variable r1 to null_reg since that's the purpose anyway --- core/translate/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/translate/mod.rs b/core/translate/mod.rs index da69999d8..fd4f5a149 100644 --- a/core/translate/mod.rs +++ b/core/translate/mod.rs @@ -557,8 +557,8 @@ fn translate_drop_table( let init_label = program.emit_init(); let start_offset = program.offset(); - let r1 = program.alloc_register(); // r1 - program.emit_null(r1, None); + let null_reg = program.alloc_register(); // r1 + program.emit_null(null_reg, None); let tbl_name_reg = program.alloc_register(); // r2 let table_reg = program.emit_string8_new_reg(tbl_name.name.0.clone()); // r3 program.mark_last_insn_constant();