From 4f3f66d55e4fa17717290da5babfe4fdd748df03 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Mon, 4 Aug 2025 10:25:14 +0300 Subject: [PATCH] fix/wal: remove start_pages_in_frames_hack to prevent checkpoint data loss We have some kind of transaction-local hack (`start_pages_in_frames`) for bookkeeping how many pages are currently in the in-memory WAL frame cache, I assume for performance reasons or whatever. `wal.rollback()` clears all the frames from `shared.frame_cache` that the rollbacking tx is allowed to clear, and then truncates `shared.pages_in_frames` to however much its local `start_pages_in_frames` value was. In `complete_append_frame`, we check if `frame_cache` has that key (page) already, and if not, we add it to `pages_in_frames`. However, `wal.rollback()` never _removes_ the key (page) if its value is empty, so we can end up in a scenario where the `frame_cache` key for `page P` exists but has no frames, and so `page P` does not get added to `pages_in_frames` in `complete_append_frame`. This leads to a checkpoint data loss scenario: - transaction rolls back, has start_pages_in_frames=0, so truncates shared pages_in_frames to an empty vec. let's say `page P` key in `frame_cache` still remains but it has no frames. - The next time someone commits a frame for `page P`, it does NOT get added to `pages_in_frames` because `frame_cache` has that key - At some point, a PASSIVE checkpoint checkpoints `n` frames, but since `pages_in_frames` does not have `page P`, it doesn't actually checkpoint it and all the "checkpointed" frames are simply thrown away - very similar to the scenario in #2366 Remove the `start_pages_in_frames` hack entirely and just make `pages_in_frames` effectively the same as `frame_cache.keys`. I think we could also just get rid of `pages_in_frames` and just use `frame_cache.contains_key(p)` but maybe Pere can chime in here --- core/storage/wal.rs | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/core/storage/wal.rs b/core/storage/wal.rs index bb7c31c4a..633e04f41 100644 --- a/core/storage/wal.rs +++ b/core/storage/wal.rs @@ -437,9 +437,6 @@ pub struct WalFile { /// Check of last frame in WAL, this is a cumulative checksum over all frames in the WAL last_checksum: (u32, u32), - /// Hack for now in case of rollback, will not be needed once we remove this bullshit frame cache. - start_pages_in_frames: usize, - /// Count of possible pages to checkpoint, and number of backfilled prev_checkpoint: CheckpointResult, @@ -734,7 +731,7 @@ impl Wal for WalFile { // Now take a shared read on that slot, and if we are successful, // grab another snapshot of the shared state. - let (mx2, nb2, cksm2, start_pages, ckpt_seq2) = { + let (mx2, nb2, cksm2, ckpt_seq2) = { let shared = self.get_shared(); if !shared.read_locks[best_idx as usize].read() { // TODO: we should retry here instead of always returning Busy @@ -744,7 +741,6 @@ impl Wal for WalFile { shared.max_frame.load(Ordering::SeqCst), shared.nbackfills.load(Ordering::SeqCst), shared.last_checksum, - shared.pages_in_frames.lock().len(), shared.wal_header.lock().checkpoint_seq, ) }; @@ -778,7 +774,6 @@ impl Wal for WalFile { return Err(LimboError::Busy); } self.max_frame = best_mark as u64; - self.start_pages_in_frames = start_pages; self.max_frame_read_lock_index.set(best_idx as usize); tracing::debug!( "begin_read_tx(min={}, max={}, slot={}, max_frame_in_wal={})", @@ -1148,7 +1143,9 @@ impl Wal for WalFile { frames.truncate(last_valid_frame); } let mut pages_in_frames = shared.pages_in_frames.lock(); - pages_in_frames.truncate(self.start_pages_in_frames); + pages_in_frames.retain(|page| { + frame_cache.contains_key(page) && !frame_cache.get(page).unwrap().is_empty() + }); } self.last_checksum = shared.last_checksum; } @@ -1191,7 +1188,6 @@ impl WalFile { let header = unsafe { shared.get().as_mut().unwrap().wal_header.lock() }; let last_checksum = unsafe { (*shared.get()).last_checksum }; - let start_pages_in_frames = unsafe { (*shared.get()).pages_in_frames.lock().len() }; Self { io, // default to max frame in WAL, so that when we read schema we can read from WAL too if it's there. @@ -1215,7 +1211,6 @@ impl WalFile { last_checksum, prev_checkpoint: CheckpointResult::default(), checkpoint_guard: None, - start_pages_in_frames, header: *header, } } @@ -1243,8 +1238,12 @@ impl WalFile { { let mut frame_cache = shared.frame_cache.lock(); let frames = frame_cache.get_mut(&page_id); - match frames { - Some(frames) => frames.push(frame_id), + match frames.filter(|frames| !frames.is_empty()) { + Some(frames) => { + let pages_in_frames = shared.pages_in_frames.lock(); + turso_assert!(pages_in_frames.contains(&page_id), "page_id={page_id} must be in pages_in_frames if it's also already in frame_cache: pages_in_frames={:?}, frames={:?}", *pages_in_frames, frames); + frames.push(frame_id); + } None => { frame_cache.insert(page_id, vec![frame_id]); shared.pages_in_frames.lock().push(page_id);