mirror of
https://github.com/aljazceru/turso.git
synced 2026-02-23 08:55:40 +01:00
Merge 'btree: fix interior cell replacement in btrees with depth >=3' from Jussi Saurio
## Background When a divider cell is deleted from an index interior page, the following algorithm is used: 1. Find predecessor: Move to largest key in left subtree of the current page. This is always a leaf page. 2. Create replacement: Convert this predecessor leaf cell to interior cell format, using original cell's left child page pointer 3. Replace: Drop original cell from parent page, insert replacement at same position 4. Cleanup: Delete the taken predecessor cell from the leaf page <img width="845" height="266" alt="Screenshot 2025-07-16 at 10 39 18" src="https://github.com/user- attachments/assets/30517da4-a4dc-471e-a8f5-c27ba0979c86" /> ## The faulty code leading to the bug The error in our logic was that we always expected to only traverse down one level of the btree: ```rust let parent_page = self.stack.parent_page().unwrap(); let leaf_page = self.stack.top(); ``` This meant that when the deletion happened on say, level 1, and the replacement cell was taken from level 3, we actually inserted the replacement cell into level 2 instead of level 1. ## Manifestation of the bug in issue 2106 In #2106, this manifested as the following chain of pages, going from parent to children: 3 -> 111 -> 119 - Cell to be deleted was on page 3 (whose left pointer is 111) - Going to the largest key in the left subtree meant traversing from 3 to 111 and then from 111 to 119 - a replacement cell was taken from 119 - incorrectly inserted into 111 - and its left child pointer also set as 111! - now whenever page 111 wanted to go to its left child page, it would just traverse back to itself, eventually causing a crash because we have a hard limit of the number of pages on the page stack. ## The fix The fix is quite trivial: store the page we are on before we start traversing down. Closes #2106 Closes #2108
This commit is contained in:
@@ -190,6 +190,7 @@ enum DeleteState {
|
||||
post_balancing_seek_key: Option<DeleteSavepoint>,
|
||||
},
|
||||
InteriorNodeReplacement {
|
||||
page: PageRef,
|
||||
cell_idx: usize,
|
||||
original_child_pointer: Option<u32>,
|
||||
post_balancing_seek_key: Option<DeleteSavepoint>,
|
||||
@@ -4349,6 +4350,7 @@ impl BTreeCursor {
|
||||
let delete_info = self.state.mut_delete_info().unwrap();
|
||||
if !contents.is_leaf() {
|
||||
delete_info.state = DeleteState::InteriorNodeReplacement {
|
||||
page: page.clone(),
|
||||
cell_idx,
|
||||
original_child_pointer,
|
||||
post_balancing_seek_key,
|
||||
@@ -4366,6 +4368,7 @@ impl BTreeCursor {
|
||||
}
|
||||
|
||||
DeleteState::InteriorNodeReplacement {
|
||||
page,
|
||||
cell_idx,
|
||||
original_child_pointer,
|
||||
post_balancing_seek_key,
|
||||
@@ -4376,6 +4379,8 @@ impl BTreeCursor {
|
||||
// 3. Delete that key from the child page.
|
||||
|
||||
// Step 1: Move cursor to the largest key in the left subtree.
|
||||
// The largest key is always in a leaf, and so this traversal may involvegoing multiple pages downwards,
|
||||
// so we store the page we are currently on.
|
||||
return_if_io!(self.prev());
|
||||
let (cell_payload, leaf_cell_idx) = {
|
||||
let leaf_page_ref = self.stack.top();
|
||||
@@ -4416,18 +4421,26 @@ impl BTreeCursor {
|
||||
(cell_payload, leaf_cell_idx)
|
||||
};
|
||||
|
||||
let parent_page = self.stack.parent_page().unwrap();
|
||||
let leaf_page = self.stack.top();
|
||||
|
||||
parent_page.get().set_dirty();
|
||||
self.pager.add_dirty(parent_page.get().get().id);
|
||||
page.set_dirty();
|
||||
self.pager.add_dirty(page.get().id);
|
||||
leaf_page.get().set_dirty();
|
||||
self.pager.add_dirty(leaf_page.get().get().id);
|
||||
|
||||
// Step 2: Replace the cell in the parent (interior) page.
|
||||
{
|
||||
let parent_page_ref = parent_page.get();
|
||||
let parent_contents = parent_page_ref.get().contents.as_mut().unwrap();
|
||||
let parent_contents = page.get_contents();
|
||||
let parent_page_id = page.get().id;
|
||||
let left_child_page = u32::from_be_bytes(
|
||||
cell_payload[..4].try_into().expect("invalid cell payload"),
|
||||
);
|
||||
turso_assert!(
|
||||
left_child_page as usize != parent_page_id,
|
||||
"corrupt: current page and left child page of cell {} are both {}",
|
||||
left_child_page,
|
||||
parent_page_id
|
||||
);
|
||||
|
||||
// First, drop the old cell that is being replaced.
|
||||
drop_cell(parent_contents, cell_idx, self.usable_space() as u16)?;
|
||||
@@ -4443,7 +4456,7 @@ impl BTreeCursor {
|
||||
// Step 3: Delete the predecessor cell from the leaf page.
|
||||
{
|
||||
let leaf_page_ref = leaf_page.get();
|
||||
let leaf_contents = leaf_page_ref.get().contents.as_mut().unwrap();
|
||||
let leaf_contents = leaf_page_ref.get_contents();
|
||||
drop_cell(leaf_contents, leaf_cell_idx, self.usable_space() as u16)?;
|
||||
}
|
||||
|
||||
@@ -5561,18 +5574,6 @@ impl PageStack {
|
||||
fn clear(&self) {
|
||||
self.current_page.set(-1);
|
||||
}
|
||||
pub fn parent_page(&self) -> Option<BTreePage> {
|
||||
if self.current_page.get() > 0 {
|
||||
Some(
|
||||
self.stack.borrow()[self.current() - 1]
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.clone(),
|
||||
)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Used for redistributing cells during a balance operation.
|
||||
|
||||
Reference in New Issue
Block a user