From 49b9a69c409cf599236ac3a4b290b795520017c6 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Thu, 17 Jul 2025 18:26:14 +0300 Subject: [PATCH] fix/btree: fix insert_into_cell() logic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During running simulations for #1988 I ran into a post-balance validation error where the correct divider cell could not be found from the parent. This was caused by divider cell insertion happening this way: - First divider cell caused overflow - Second technically had space to fit, so we didn't add it to overflow cells I looked at SQLite source, and it seems SQLite always adds the cell to overflow cells if there are existing overflow cells: ```c if( pPage->nOverflow || sz+2>pPage->nFree ){ ...add to overflow cells... } ``` So, I changed our implementation to do the same, which fixed the balance validation issue. However, then I ran into another issue: A cell inserted during balancing in the `edit_page()` stage was added to overflow cells, which should not happen. The reason for this was the changed logic in `insert_into_page()`, outlined above. It looks like SQLite doesn't use `insert_into_cell()ยด in its implementation of `page_insert_array()` which explains this. For simplicity, I made a second version of `insert_into_cell()` called `insert_into_cell_during_balance()` which allows regular cell insertion despite existing overflow cells, since the existing overflow cells are what caused the balance to happen in the first place and will be cleared as soon as `edit_page()` is done. --- core/storage/btree.rs | 64 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 55 insertions(+), 9 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 871d49d78..bcda6159d 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -3042,6 +3042,10 @@ impl BTreeCursor { .get_contents() .write_u32(offset::BTREE_RIGHTMOST_PTR, right_pointer); } + turso_assert!( + parent_contents.overflow_cells.is_empty(), + "parent page overflow cells should be empty before divider cell reinsertion" + ); // TODO: pointer map update (vacuum support) // Update divider cells in parent for (sibling_page_idx, page) in pages_to_balance_new @@ -3400,7 +3404,7 @@ impl BTreeCursor { parent_contents: &mut PageContent, pages_to_balance_new: [Option; MAX_NEW_SIBLING_PAGES_AFTER_BALANCE], page_type: PageType, - leaf_data: bool, + is_table_leaf: bool, mut cells_debug: Vec>, sibling_count_new: usize, right_page_id: u32, @@ -3649,7 +3653,7 @@ impl BTreeCursor { } } if was_overflow { - if !leaf_data { + if !is_table_leaf { // remember to increase cell if this cell was moved to parent current_index_cell += 1; } @@ -3669,7 +3673,7 @@ impl BTreeCursor { ); valid = false; } - if leaf_data { + if is_table_leaf { // If we are in a table leaf page, we just need to check that this cell that should be a divider cell is in the parent // This means we already check cell in leaf pages but not on parent so we don't advance current_index_cell let last_sibling_idx = balance_info.sibling_count - 1; @@ -3728,7 +3732,7 @@ impl BTreeCursor { } } if was_overflow { - if !leaf_data { + if !is_table_leaf { // remember to increase cell if this cell was moved to parent current_index_cell += 1; } @@ -3781,7 +3785,10 @@ impl BTreeCursor { } } } - assert!(valid, "corrupted database, cells were to balanced properly"); + assert!( + valid, + "corrupted database, cells were not balanced properly" + ); } /// Balance the root page. @@ -5995,7 +6002,7 @@ fn page_insert_array( page.page_type() ); for i in first..first + count { - insert_into_cell( + insert_into_cell_during_balance( page, cell_array.cell_payloads[i], start_insert, @@ -6196,11 +6203,12 @@ fn debug_validate_cells_core(page: &PageContent, usable_space: u16) { /// insert_into_cell() is called from insert_into_page(), /// and the overflow cell count is used to determine if the page overflows, /// i.e. whether we need to balance the btree after the insert. -fn insert_into_cell( +fn _insert_into_cell( page: &mut PageContent, payload: &[u8], cell_idx: usize, usable_space: u16, + allow_regular_insert_despite_overflow: bool, // see [insert_into_cell_during_balance()] ) -> Result<()> { assert!( cell_idx <= page.cell_count() + page.overflow_cells.len(), @@ -6208,8 +6216,14 @@ fn insert_into_cell( cell_idx, page.cell_count() ); - let free = compute_free_space(page, usable_space); - let enough_space = payload.len() + CELL_PTR_SIZE_BYTES <= free as usize; + let already_has_overflow = !page.overflow_cells.is_empty(); + let enough_space = if already_has_overflow && !allow_regular_insert_despite_overflow { + false + } else { + // otherwise, we need to check if we have enough space + let free = compute_free_space(page, usable_space); + payload.len() + CELL_PTR_SIZE_BYTES <= free as usize + }; if !enough_space { // add to overflow cell page.overflow_cells.push(OverflowCell { @@ -6218,6 +6232,10 @@ fn insert_into_cell( }); return Ok(()); } + assert!( + cell_idx <= page.cell_count(), + "cell_idx > page.cell_count() without overflow cells" + ); let new_cell_data_pointer = allocate_cell_space(page, payload.len() as u16, usable_space)?; tracing::debug!( @@ -6255,6 +6273,34 @@ fn insert_into_cell( Ok(()) } +fn insert_into_cell( + page: &mut PageContent, + payload: &[u8], + cell_idx: usize, + usable_space: u16, +) -> Result<()> { + _insert_into_cell(page, payload, cell_idx, usable_space, false) +} + +/// Normally in [insert_into_cell()], if a page already has overflow cells, all +/// new insertions are also added to the overflow cells vector. +/// SQLite doesn't use regular [insert_into_cell()] during balancing, +/// so we have a specialized function for use during balancing that allows regular cell insertion +/// despite the presence of existing overflow cells (overflow cells are one of the reasons we are balancing in the first place). +/// During balancing cells are first repositioned with [edit_page()] +/// and then inserted via [page_insert_array()] which calls [insert_into_cell_during_balance()], +/// and finally the existing overflow cells are cleared. +/// If we would not allow the cell insert to proceed normally despite overflow cells being present, +/// the new insertions would also be added as overflow cells which defeats the point of balancing. +fn insert_into_cell_during_balance( + page: &mut PageContent, + payload: &[u8], + cell_idx: usize, + usable_space: u16, +) -> Result<()> { + _insert_into_cell(page, payload, cell_idx, usable_space, true) +} + /// The amount of free space is the sum of: /// #1. The size of the unallocated region /// #2. Fragments (isolated 1-3 byte chunks of free space within the cell content area)