From cd3fe523a35858914c12da5e96d99c706770253d Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Wed, 6 Aug 2025 07:46:51 +0300 Subject: [PATCH 1/2] core/types: add IOResult::is_io() helper --- core/types.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/core/types.rs b/core/types.rs index 8a6d65d76..fb26fd50a 100644 --- a/core/types.rs +++ b/core/types.rs @@ -2466,6 +2466,12 @@ pub enum IOResult { IO, } +impl IOResult { + pub fn is_io(&self) -> bool { + matches!(self, IOResult::IO) + } +} + /// Evaluate a Result>, if IO return IO. #[macro_export] macro_rules! return_if_io { From 839d428e36cbd6705f512d818bb1e9f6eb12beac Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Wed, 6 Aug 2025 07:48:21 +0300 Subject: [PATCH 2/2] core/btree: fix re-entrancy bug in insert_into_page() We currently clone WriteState on every iteration of `insert_into_page()`, presumably for Borrow Checker Reasons (tm). There was a bug in `WriteState::Insert` handling where if `fill_cell_payload()` returned IO, the `fill_cell_payload_state` was not updated in `write_info.state`, leading to an infinite loop of allocating new pages. This bug was surfaced by, but not caused by, #2400. --- core/storage/btree.rs | 67 +++++++++++++++++++++++++++++++------------ 1 file changed, 49 insertions(+), 18 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index ff56d2857..5b4ffe24a 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -269,7 +269,9 @@ enum WriteState { Insert { page: Arc, cell_idx: usize, - new_payload: Vec, + /// Dumb double-indirection via Arc because we clone [WriteState] for some reason and we use unsafe in [FillCellPayloadState::AllocateOverflowPages] + /// so the underlying Vec must not be cloned in upper layers. + new_payload: Arc>>, fill_cell_payload_state: FillCellPayloadState, }, BalanceStart, @@ -2260,7 +2262,9 @@ impl BTreeCursor { write_info.state = WriteState::Insert { page: page.clone(), cell_idx, - new_payload: Vec::with_capacity(record_values.len() + 4), + new_payload: Arc::new(Mutex::new(Vec::with_capacity( + record_values.len() + 4, + ))), fill_cell_payload_state: FillCellPayloadState::Start, }; continue; @@ -2268,19 +2272,42 @@ impl BTreeCursor { WriteState::Insert { page, cell_idx, - mut new_payload, + new_payload, mut fill_cell_payload_state, } => { - return_if_io!(fill_cell_payload( - page.get().get().contents.as_ref().unwrap(), - bkey.maybe_rowid(), - &mut new_payload, - cell_idx, - record, - self.usable_space(), - self.pager.clone(), - &mut fill_cell_payload_state, - )); + { + let mut new_payload_mut = new_payload.lock(); + if fill_cell_payload( + page.get().get().contents.as_ref().unwrap(), + bkey.maybe_rowid(), + &mut new_payload_mut, + cell_idx, + record, + self.usable_space(), + self.pager.clone(), + &mut fill_cell_payload_state, + )? + .is_io() + { + let write_info = self + .state + .mut_write_info() + .expect("write info should be present"); + let WriteState::Insert { + fill_cell_payload_state: old_fill_cell_state, + .. + } = &mut write_info.state + else { + panic!("expected insert state"); + }; + // FIXME: don't clone WriteState on every iteration, it's really wasteful and leads to having + // to do things like this to ensure the next state is consistent. + // The reason WriteState is cloned is probably to work around the borrow checker, but we should + // be able to not clone here regardless. + *old_fill_cell_state = fill_cell_payload_state; + return Ok(IOResult::IO); + } + } { let page = page.get(); @@ -2289,7 +2316,7 @@ impl BTreeCursor { insert_into_cell( contents, - new_payload.as_slice(), + new_payload.lock().as_slice(), cell_idx, self.usable_space() as u16, )?; @@ -2326,10 +2353,10 @@ impl BTreeCursor { "page {}is not loaded", page.get().get().id ); - if matches!( - self.overwrite_cell(page.clone(), cell_idx, record, &mut state)?, - IOResult::IO - ) { + if self + .overwrite_cell(page.clone(), cell_idx, record, &mut state)? + .is_io() + { let write_info = self .state .mut_write_info() @@ -2340,6 +2367,10 @@ impl BTreeCursor { else { panic!("expected overwrite state"); }; + // FIXME: don't clone WriteState on every iteration, it's really wasteful and leads to having + // to do things like this to ensure the next state is consistent. + // The reason WriteState is cloned is probably to work around the borrow checker, but we should + // be able to not clone here regardless. *old_state = state; return Ok(IOResult::IO); }