From 1e0fb143f6feb02b23a968c5774793ac9a4e2fa6 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Thu, 18 Sep 2025 14:27:36 +0400 Subject: [PATCH] remove io.blocks from btree balancing code --- core/storage/btree.rs | 186 ++++++++++++++++++++++++++++++++---------- 1 file changed, 145 insertions(+), 41 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 78b92fa80..9d001d254 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -206,10 +206,35 @@ pub enum OverwriteCellState { }, } -#[derive(Debug, PartialEq)] +struct BalanceContext { + pages_to_balance_new: [Option>; MAX_NEW_SIBLING_PAGES_AFTER_BALANCE], + sibling_count_new: usize, + cell_array: CellArray, + old_cell_count_per_page_cumulative: [u16; MAX_NEW_SIBLING_PAGES_AFTER_BALANCE], + #[cfg(debug_assertions)] + cells_debug: Vec>, +} + +impl std::fmt::Debug for BalanceContext { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("BalanceContext") + .field("pages_to_balance_new", &self.pages_to_balance_new) + .field("sibling_count_new", &self.sibling_count_new) + .field("cell_array", &self.cell_array) + .field( + "old_cell_count_per_page_cumulative", + &self.old_cell_count_per_page_cumulative, + ) + .finish() + } +} + +#[derive(Debug)] /// State machine of a btree rebalancing operation. enum BalanceSubState { Start, + BalanceRoot, + Decide, Quick, /// Choose which sibling pages to balance (max 3). /// Generally, the siblings involved will be the page that triggered the balancing and its left and right siblings. @@ -220,6 +245,13 @@ enum BalanceSubState { /// Perform the actual balancing. This will result in 1-5 pages depending on the number of total cells to be distributed /// from the source pages. NonRootDoBalancing, + NonRootDoBalancingAllocate { + i: usize, + context: Option, + }, + NonRootDoBalancingFinish { + context: BalanceContext, + }, /// Free pages that are not used anymore after balancing. FreePages { curr_page: usize, @@ -2345,7 +2377,7 @@ impl BTreeCursor { let overflows = !page.get_contents().overflow_cells.is_empty(); if overflows { *write_state = WriteState::Balancing; - assert!(self.balance_state.sub_state == BalanceSubState::Start, "There should be no balancing operation in progress when insert state is {:?}, got: {:?}", self.state, self.balance_state.sub_state); + assert!(matches!(self.balance_state.sub_state, BalanceSubState::Start), "There should be no balancing operation in progress when insert state is {:?}, got: {:?}", self.state, self.balance_state.sub_state); // If we balance, we must save the cursor position and seek to it later. self.save_context(CursorContext::seek_eq_only(bkey)); } else { @@ -2388,7 +2420,7 @@ impl BTreeCursor { }; if overflows || underflows { *write_state = WriteState::Balancing; - assert!(self.balance_state.sub_state == BalanceSubState::Start, "There should be no balancing operation in progress when overwrite state is {:?}, got: {:?}", self.state, self.balance_state.sub_state); + assert!(matches!(self.balance_state.sub_state, BalanceSubState::Start), "There should be no balancing operation in progress when overwrite state is {:?}, got: {:?}", self.state, self.balance_state.sub_state); // If we balance, we must save the cursor position and seek to it later. self.save_context(CursorContext::seek_eq_only(bkey)); } else { @@ -2473,11 +2505,19 @@ impl BTreeCursor { return Ok(IOResult::Done(())); } } - if !self.stack.has_parent() { - let _res = self.balance_root()?; + *sub_state = BalanceSubState::BalanceRoot; + } else { + *sub_state = BalanceSubState::Decide; } + } + BalanceSubState::BalanceRoot => { + return_if_io!(self.balance_root()); + let BalanceState { sub_state, .. } = &mut self.balance_state; + *sub_state = BalanceSubState::Decide; + } + BalanceSubState::Decide => { let cur_page = self.stack.top_ref(); let cur_page_contents = cur_page.get_contents(); @@ -2522,6 +2562,8 @@ impl BTreeCursor { } BalanceSubState::NonRootPickSiblings | BalanceSubState::NonRootDoBalancing + | BalanceSubState::NonRootDoBalancingAllocate { .. } + | BalanceSubState::NonRootDoBalancingFinish { .. } | BalanceSubState::FreePages { .. } => { return_if_io!(self.balance_non_root()); } @@ -2623,7 +2665,10 @@ impl BTreeCursor { tracing::debug!(?sub_state); match sub_state { - BalanceSubState::Start | BalanceSubState::Quick => { + BalanceSubState::Start + | BalanceSubState::BalanceRoot + | BalanceSubState::Decide + | BalanceSubState::Quick => { panic!("balance_non_root: unexpected state {sub_state:?}") } BalanceSubState::NonRootPickSiblings => { @@ -2880,13 +2925,12 @@ impl BTreeCursor { // Start balancing. let parent_page = self.stack.top_ref(); let parent_contents = parent_page.get_contents(); - let parent_is_root = !self.stack.has_parent(); // 1. Collect cell data from divider cells, and count the total number of cells to be distributed. // The count includes: all cells and overflow cells from the sibling pages, and divider cells from the parent page, // excluding the rightmost divider, which will not be dropped from the parent; instead it will be updated at the end. let mut total_cells_to_redistribute = 0; - let mut pages_to_balance_new: [Option>; + let pages_to_balance_new: [Option>; MAX_NEW_SIBLING_PAGES_AFTER_BALANCE] = [const { None }; MAX_NEW_SIBLING_PAGES_AFTER_BALANCE]; for i in (0..balance_info.sibling_count).rev() { @@ -3335,31 +3379,84 @@ impl BTreeCursor { ); } + *sub_state = BalanceSubState::NonRootDoBalancingAllocate { + i: 0, + context: Some(BalanceContext { + pages_to_balance_new, + sibling_count_new, + cell_array, + old_cell_count_per_page_cumulative, + #[cfg(debug_assertions)] + cells_debug, + }), + }; + } + BalanceSubState::NonRootDoBalancingAllocate { i, context } => { + let BalanceContext { + pages_to_balance_new, + old_cell_count_per_page_cumulative, + cell_array, + sibling_count_new, + .. + } = context.as_mut().unwrap(); let pager = self.pager.clone(); - + let mut balance_info = balance_info.borrow_mut(); + let balance_info = balance_info.as_mut().unwrap(); + let page_type = balance_info.pages_to_balance[0] + .as_ref() + .unwrap() + .get_contents() + .page_type(); // Allocate pages or set dirty if not needed - for i in 0..sibling_count_new { - if i < balance_info.sibling_count { - let page = balance_info.pages_to_balance[i].as_ref().unwrap(); - turso_assert!( - page.is_dirty(), - "sibling page must be already marked dirty" - ); - pages_to_balance_new[i].replace(page.clone()); - } else { - // FIXME: handle page cache is full - // FIXME: add new state machine state instead of this sync IO hack - let page = pager.io.block(|| { - pager.do_allocate_page(page_type, 0, BtreePageAllocMode::Any) - })?; - pages_to_balance_new[i].replace(page); - // Since this page didn't exist before, we can set it to cells length as it - // marks them as empty since it is a prefix sum of cells. - old_cell_count_per_page_cumulative[i] = - cell_array.cell_payloads.len() as u16; - } + if *i < balance_info.sibling_count { + let page = balance_info.pages_to_balance[*i].as_ref().unwrap(); + turso_assert!(page.is_dirty(), "sibling page must be already marked dirty"); + pages_to_balance_new[*i].replace(page.clone()); + } else { + // FIXME: handle page cache is full + let page = return_if_io!(pager.do_allocate_page( + page_type, + 0, + BtreePageAllocMode::Any + )); + pages_to_balance_new[*i].replace(page); + // Since this page didn't exist before, we can set it to cells length as it + // marks them as empty since it is a prefix sum of cells. + old_cell_count_per_page_cumulative[*i] = + cell_array.cell_payloads.len() as u16; } - + if *i + 1 < *sibling_count_new { + *i += 1; + continue; + } else { + *sub_state = BalanceSubState::NonRootDoBalancingFinish { + context: context.take().unwrap(), + }; + } + } + BalanceSubState::NonRootDoBalancingFinish { + context: + BalanceContext { + pages_to_balance_new, + sibling_count_new, + cell_array, + old_cell_count_per_page_cumulative, + #[cfg(debug_assertions)] + cells_debug, + }, + } => { + let mut balance_info = balance_info.borrow_mut(); + let balance_info = balance_info.as_mut().unwrap(); + let page_type = balance_info.pages_to_balance[0] + .as_ref() + .unwrap() + .get_contents() + .page_type(); + let parent_is_root = !self.stack.has_parent(); + let parent_page = self.stack.top_ref(); + let parent_contents = parent_page.get_contents(); + let mut sibling_count_new = *sibling_count_new; + let is_table_leaf = matches!(page_type, PageType::TableLeaf); // Reassign page numbers in increasing order { let mut page_numbers: [usize; MAX_NEW_SIBLING_PAGES_AFTER_BALANCE] = @@ -3652,7 +3749,7 @@ impl BTreeCursor { start_old_cells, start_new_cells, number_new_cells, - &cell_array, + cell_array, usable_space, )?; debug_validate_cells!(page_contents, usable_space); @@ -3831,10 +3928,10 @@ impl BTreeCursor { parent_page: &PageRef, balance_info: &BalanceInfo, parent_contents: &mut PageContent, - pages_to_balance_new: [Option; MAX_NEW_SIBLING_PAGES_AFTER_BALANCE], + pages_to_balance_new: &[Option; MAX_NEW_SIBLING_PAGES_AFTER_BALANCE], page_type: PageType, is_table_leaf: bool, - mut cells_debug: Vec>, + cells_debug: &mut [Vec], sibling_count_new: usize, right_page_id: u32, usable_space: usize, @@ -4228,15 +4325,16 @@ impl BTreeCursor { let _ = self.move_to_right_state.1.take(); let root = self.stack.top(); - let is_page_1 = root.get().id == 1; - let offset = if is_page_1 { DatabaseHeader::SIZE } else { 0 }; let root_contents = root.get_contents(); // FIXME: handle page cache is full - // FIXME: remove sync IO hack - let child = self.pager.io.block(|| { - self.pager - .do_allocate_page(root_contents.page_type(), 0, BtreePageAllocMode::Any) - })?; + let child = return_if_io!(self.pager.do_allocate_page( + root_contents.page_type(), + 0, + BtreePageAllocMode::Any + )); + + let is_page_1 = root.get().id == 1; + let offset = if is_page_1 { DatabaseHeader::SIZE } else { 0 }; tracing::debug!( "balance_root(root={}, rightmost={}, page_type={:?})", @@ -4961,7 +5059,7 @@ impl BTreeCursor { } } let balance_both = leaf_underflows && interior_overflows_or_underflows; - assert!(self.balance_state.sub_state == BalanceSubState::Start, "There should be no balancing operation in progress when delete state is {:?}, got: {:?}", self.state, self.balance_state.sub_state); + assert!(matches!(self.balance_state.sub_state, BalanceSubState::Start), "There should be no balancing operation in progress when delete state is {:?}, got: {:?}", self.state, self.balance_state.sub_state); let post_balancing_seek_key = post_balancing_seek_key .take() .expect("post_balancing_seek_key should be Some"); @@ -6435,6 +6533,12 @@ struct CellArray { cell_count_per_page_cumulative: [u16; MAX_NEW_SIBLING_PAGES_AFTER_BALANCE], } +impl std::fmt::Debug for CellArray { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("CellArray").finish() + } +} + impl CellArray { pub fn cell_size_bytes(&self, cell_idx: usize) -> u16 { self.cell_payloads[cell_idx].len() as u16