fix/btree/balance: allow exactly 1 parent overflow cell for index balancing

This commit is contained in:
Jussi Saurio
2025-07-23 18:42:31 +03:00
parent 025ea8808a
commit d773a7924d

View File

@@ -2376,16 +2376,38 @@ impl BTreeCursor {
let parent_page = self.stack.top();
return_if_locked_maybe_load!(self.pager, parent_page);
let parent_page = parent_page.get();
// If `move_to` moved to rightmost page, cell index will be out of bounds. Meaning cell_count+1.
// In any other case, `move_to` will stay in the correct index.
if self.stack.current_cell_index() as usize
== parent_page.get_contents().cell_count() + 1
{
let parent_contents = parent_page.get_contents();
let page_type = parent_contents.page_type();
turso_assert!(
matches!(page_type, PageType::IndexInterior | PageType::TableInterior),
"expected index or table interior page"
);
let number_of_cells_in_parent =
parent_contents.cell_count() + parent_contents.overflow_cells.len();
// If `seek` moved to rightmost page, cell index will be out of bounds. Meaning cell_count+1.
// In any other case, `seek` will stay in the correct index.
let past_rightmost_pointer =
self.stack.current_cell_index() as usize == number_of_cells_in_parent + 1;
if past_rightmost_pointer {
self.stack.retreat();
} else if self.stack.current_cell_index() == -1 {
// We might've retreated in CheckRequiresBalancing, so advance to the next cell
// to prevent panic in the asserts below due to -1 index
self.stack.advance();
} else if !parent_contents.overflow_cells.is_empty() {
// The ONLY way we can have an overflow cell in the parent is if we replaced an interior cell from a cell in the child, and that replacement did not fit.
// This can only happen on index btrees.
if matches!(page_type, PageType::IndexInterior) {
turso_assert!(parent_contents.overflow_cells.len() == 1, "index interior page must have no more than 1 overflow cell, as a result of InteriorNodeReplacement");
} else {
turso_assert!(false, "{page_type:?} must have no overflow cells");
}
let overflow_cell = parent_contents.overflow_cells.first().unwrap();
let parent_page_cell_idx = self.stack.current_cell_index() as usize;
// Parent page must be positioned at the divider cell that overflowed due to the replacement.
turso_assert!(
overflow_cell.index == parent_page_cell_idx,
"overflow cell index must be the result of InteriorNodeReplacement that leaves both child and parent (id={}) unbalanced, and hence parent page's position must = overflow_cell.index. Instead got: parent_page_cell_idx={parent_page_cell_idx} overflow_cell.index={}",
parent_page.get().id,
overflow_cell.index
);
}
self.pager.add_dirty(&parent_page);
let parent_contents = parent_page.get().contents.as_ref().unwrap();
@@ -2396,23 +2418,9 @@ impl BTreeCursor {
parent_page.get().id,
page_to_balance_idx
);
turso_assert!(
matches!(
parent_contents.page_type(),
PageType::IndexInterior | PageType::TableInterior
),
"expected index or table interior page"
);
// Part 1: Find the sibling pages to balance
let mut pages_to_balance: [Option<BTreePage>; MAX_SIBLING_PAGES_TO_BALANCE] =
[const { None }; MAX_SIBLING_PAGES_TO_BALANCE];
let number_of_cells_in_parent =
parent_contents.cell_count() + parent_contents.overflow_cells.len();
turso_assert!(
parent_contents.overflow_cells.is_empty(),
"balancing child page with overflowed parent not yet implemented"
);
turso_assert!(
page_to_balance_idx <= parent_contents.cell_count(),
"page_to_balance_idx={page_to_balance_idx} is out of bounds for parent cell count {number_of_cells_in_parent}"
@@ -2449,10 +2457,41 @@ impl BTreeCursor {
let right_pointer = if last_sibling_is_right_pointer {
parent_contents.rightmost_pointer_raw().unwrap()
} else {
let (start_of_cell, _) = parent_contents.cell_get_raw_region(
first_cell_divider + sibling_pointer,
self.usable_space(),
let max_overflow_cells = if matches!(page_type, PageType::IndexInterior) {
1
} else {
0
};
turso_assert!(
parent_contents.overflow_cells.len() <= max_overflow_cells,
"must have at most {max_overflow_cells} overflow cell in the parent"
);
// OVERFLOW CELL ADJUSTMENT:
// Let there be parent with cells [0,1,2,3,4].
// Let's imagine the cell at idx 2 gets replaced with a new payload that causes it to overflow.
// See handling of InteriorNodeReplacement in btree.rs.
//
// In this case the rightmost divider is going to be 3 (2 is the middle one and we pick neighbors 1-3).
// drop_cell(): [0,1,2,3,4] -> [0,1,3,4] <-- cells on right side get shifted left!
// insert_into_cell(): [0,1,3,4] -> [0,1,3,4] + overflow cell (2) <-- crucially, no physical shifting happens, overflow cell is stored separately
//
// This means '3' is actually physically located at index '2'.
// So IF the parent has an overflow cell, we need to subtract 1 to get the actual rightmost divider cell idx to physically read from.
// The formula for the actual cell idx is:
// first_cell_divider + sibling_pointer - parent_contents.overflow_cells.len()
// so in the above case:
// actual_cell_idx = 1 + 2 - 1 = 2
//
// In the case where the last divider cell is the overflow cell, there would be no left-shifting of cells in drop_cell(),
// because they are still positioned correctly (imagine .pop() from a vector).
// However, note that we are always looking for the _rightmost_ child page pointer between the (max 2) dividers, and for any case where the last divider cell is the overflow cell,
// the 'last_sibling_is_right_pointer' condition will also be true (since the overflow cell's left child will be the middle page), so we won't enter this code branch.
//
// Hence: when we enter this branch with overflow_cells.len() == 1, we know that left-shifting has happened and we need to subtract 1.
let actual_cell_idx =
first_cell_divider + sibling_pointer - parent_contents.overflow_cells.len();
let (start_of_cell, _) =
parent_contents.cell_get_raw_region(actual_cell_idx, self.usable_space());
let buf = parent_contents.as_ptr().as_mut_ptr();
unsafe { buf.add(start_of_cell) }
};
@@ -2477,25 +2516,58 @@ impl BTreeCursor {
);
}
pages_to_balance[i].replace(page);
turso_assert!(
parent_contents.overflow_cells.is_empty(),
"overflow in parent is not yet implented while balancing it"
);
if i == 0 {
break;
}
let next_cell_divider = i + first_cell_divider - 1;
pgno = match parent_contents.cell_get(next_cell_divider, self.usable_space())? {
BTreeCell::TableInteriorCell(TableInteriorCell {
left_child_page, ..
})
| BTreeCell::IndexInteriorCell(IndexInteriorCell {
left_child_page, ..
}) => left_child_page,
other => {
crate::bail_corrupt_error!("expected interior cell, got {:?}", other)
}
};
let divider_is_overflow_cell = parent_contents
.overflow_cells
.first()
.is_some_and(|overflow_cell| overflow_cell.index == next_cell_divider);
if divider_is_overflow_cell {
turso_assert!(
matches!(parent_contents.page_type(), PageType::IndexInterior),
"expected index interior page, got {:?}",
parent_contents.page_type()
);
turso_assert!(
parent_contents.overflow_cells.len() == 1,
"must have a single overflow cell in the parent, as a result of InteriorNodeReplacement"
);
let overflow_cell = parent_contents.overflow_cells.first().unwrap();
pgno = u32::from_be_bytes(overflow_cell.payload[0..4].try_into().unwrap());
} else {
// grep for 'OVERFLOW CELL ADJUSTMENT' for explanation.
// here we only subtract 1 if the divider cell has been shifted left, i.e. the overflow cell was placed to the left
// this cell.
let actual_cell_idx =
if let Some(overflow_cell) = parent_contents.overflow_cells.first() {
if next_cell_divider < overflow_cell.index {
next_cell_divider
} else {
next_cell_divider - 1
}
} else {
next_cell_divider
};
pgno =
match parent_contents.cell_get(actual_cell_idx, self.usable_space())? {
BTreeCell::TableInteriorCell(TableInteriorCell {
left_child_page,
..
})
| BTreeCell::IndexInteriorCell(IndexInteriorCell {
left_child_page,
..
}) => left_child_page,
other => {
crate::bail_corrupt_error!(
"expected interior cell, got {:?}",
other
)
}
};
}
}
#[cfg(debug_assertions)]
@@ -2547,11 +2619,6 @@ impl BTreeCursor {
let parent_contents = parent_page.get_contents();
let parent_is_root = !self.stack.has_parent();
turso_assert!(
parent_contents.overflow_cells.is_empty(),
"overflow parent not yet implemented"
);
// 1. Collect cell data from divider cells, and count the total number of cells to be distributed.
// The count includes: all cells and overflow cells from the sibling pages, and divider cells from the parent page,
// excluding the rightmost divider, which will not be dropped from the parent; instead it will be updated at the end.
@@ -2575,10 +2642,33 @@ impl BTreeCursor {
}
// Since we know we have a left sibling, take the divider that points to left sibling of this page
let cell_idx = balance_info.first_divider_cell + i;
let (cell_start, cell_len) =
parent_contents.cell_get_raw_region(cell_idx, self.usable_space());
let buf = parent_contents.as_ptr();
let cell_buf = &buf[cell_start..cell_start + cell_len];
let divider_is_overflow_cell = parent_contents
.overflow_cells
.first()
.is_some_and(|overflow_cell| overflow_cell.index == cell_idx);
let cell_buf = if divider_is_overflow_cell {
turso_assert!(
matches!(parent_contents.page_type(), PageType::IndexInterior),
"expected index interior page, got {:?}",
parent_contents.page_type()
);
turso_assert!(
parent_contents.overflow_cells.len() == 1,
"must have a single overflow cell in the parent, as a result of InteriorNodeReplacement"
);
let overflow_cell = parent_contents.overflow_cells.first().unwrap();
&overflow_cell.payload
} else {
// grep for 'OVERFLOW CELL ADJUSTMENT' for explanation.
// here we can subtract overflow_cells.len() every time, because we are iterating right-to-left,
// so if we are to the left of the overflow cell, it has already been cleared from the parent and overflow_cells.len() is 0.
let actual_cell_idx = cell_idx - parent_contents.overflow_cells.len();
let (cell_start, cell_len) = parent_contents
.cell_get_raw_region(actual_cell_idx, self.usable_space());
let buf = parent_contents.as_ptr();
&buf[cell_start..cell_start + cell_len]
};
// Count the divider cell itself (which will be dropped from the parent)
total_cells_to_redistribute += 1;
@@ -2591,12 +2681,24 @@ impl BTreeCursor {
// TODO(pere): make this reference and not copy
balance_info.divider_cell_payloads[i].replace(cell_buf.to_vec());
tracing::trace!(
"dropping divider cell from parent cell_idx={} count={}",
cell_idx,
parent_contents.cell_count()
);
drop_cell(parent_contents, cell_idx, self.usable_space() as u16)?;
if divider_is_overflow_cell {
tracing::debug!(
"clearing overflow cells from parent cell_idx={}",
cell_idx
);
parent_contents.overflow_cells.clear();
} else {
// grep for 'OVERFLOW CELL ADJUSTMENT' for explanation.
// here we can subtract overflow_cells.len() every time, because we are iterating right-to-left,
// so if we are to the left of the overflow cell, it has already been cleared from the parent and overflow_cells.len() is 0.
let actual_cell_idx = cell_idx - parent_contents.overflow_cells.len();
tracing::trace!(
"dropping divider cell from parent cell_idx={} count={}",
actual_cell_idx,
parent_contents.cell_count()
);
drop_cell(parent_contents, actual_cell_idx, self.usable_space() as u16)?;
}
}
/* 2. Initialize CellArray with all the cells used for distribution, this includes divider cells if !leaf. */
@@ -4428,7 +4530,17 @@ impl BTreeCursor {
// Step 1: Move cursor to the largest key in the left subtree.
// The largest key is always in a leaf, and so this traversal may involvegoing multiple pages downwards,
// so we store the page we are currently on.
return_if_io!(self.get_prev_record()); // avoid calling prev() because it internally calls restore_context() which may cause unintended behavior.
// avoid calling prev() because it internally calls restore_context() which may cause unintended behavior.
return_if_io!(self.get_prev_record());
// Ensure we keep the parent page at the same position as before the replacement.
self.stack
.node_states
.borrow_mut()
.get_mut(btree_depth)
.expect("parent page should be on the stack")
.cell_idx = cell_idx as i32;
let (cell_payload, leaf_cell_idx) = {
let leaf_page_ref = self.stack.top();
let leaf_page = leaf_page_ref.get();