From 8558675c4cfd42000046dbd37d59db6c9ff19238 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Fri, 11 Jul 2025 16:32:15 +0300 Subject: [PATCH] page cache: pin pages on the stack --- core/storage/btree.rs | 40 +++++++++++++++++++++++++++++++++++++- core/storage/page_cache.rs | 31 ++++++++++++++++++++++++++--- core/storage/pager.rs | 39 ++++++++++++++++++++++++++++++++++++- 3 files changed, 105 insertions(+), 5 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index d0e2e3577..f5a3ed98c 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -5513,6 +5513,10 @@ impl PageStack { "corrupted database, stack is bigger than expected" ); assert!(current >= 0); + + // Pin the page to prevent it from being evicted while on the stack + page.get().pin(); + self.stack.borrow_mut()[current as usize] = Some(page); self.node_states.borrow_mut()[current as usize] = BTreeNodeState { cell_idx: starting_cell_idx, @@ -5525,7 +5529,7 @@ impl PageStack { /// without having to perform IO to get the ancestor page contents. /// /// This rests on the assumption that the parent page is already in memory whenever a child is pushed onto the stack. - /// We currently ensure this by pinning interior pages to the page cache so that they cannot be evicted. + /// We currently ensure this by pinning all the pages on [PageStack] to the page cache so that they cannot be evicted. fn populate_parent_cell_count(&self) { let stack_empty = self.current_page.get() == -1; if stack_empty { @@ -5535,6 +5539,16 @@ impl PageStack { let stack = self.stack.borrow(); let page = stack[current].as_ref().unwrap(); let page = page.get(); + turso_assert!( + page.is_pinned(), + "parent page {} is not pinned", + page.get().id + ); + turso_assert!( + page.is_loaded(), + "parent page {} is not loaded", + page.get().id + ); let contents = page.get_contents(); let cell_count = contents.cell_count() as i32; self.node_states.borrow_mut()[current].cell_count = Some(cell_count); @@ -5555,6 +5569,12 @@ impl PageStack { let current = self.current_page.get(); assert!(current >= 0); tracing::trace!(current); + + // Unpin the page before removing it from the stack + if let Some(page) = &self.stack.borrow()[current as usize] { + page.get().unpin(); + } + self.node_states.borrow_mut()[current as usize] = BTreeNodeState::default(); self.stack.borrow_mut()[current as usize] = None; self.decrement_current(); @@ -5623,11 +5643,29 @@ impl PageStack { self.current_page.get() > 0 } + fn unpin_all_if_pinned(&self) { + self.stack + .borrow_mut() + .iter_mut() + .flatten() + .for_each(|page| { + let _ = page.get().try_unpin(); + }); + } + fn clear(&self) { + self.unpin_all_if_pinned(); + self.current_page.set(-1); } } +impl Drop for PageStack { + fn drop(&mut self) { + self.unpin_all_if_pinned(); + } +} + /// Used for redistributing cells during a balance operation. struct CellArray { /// The actual cell data. diff --git a/core/storage/page_cache.rs b/core/storage/page_cache.rs index 93016ab35..3e69c1cf9 100644 --- a/core/storage/page_cache.rs +++ b/core/storage/page_cache.rs @@ -47,6 +47,7 @@ pub enum CacheError { InternalError(String), Locked, Dirty { pgno: usize }, + Pinned { pgno: usize }, ActiveRefs, Full, KeyExists, @@ -134,6 +135,7 @@ impl DumbLruPageCache { } let ptr = *self.map.borrow().get(&key).unwrap(); + // Try to detach from LRU list first, can fail self.detach(ptr, clean_page)?; let ptr = self.map.borrow_mut().remove(&key).unwrap(); @@ -177,10 +179,11 @@ impl DumbLruPageCache { } } - fn detach( + fn _detach( &mut self, mut entry: NonNull, clean_page: bool, + allow_detach_pinned: bool, ) -> Result<(), CacheError> { let entry_mut = unsafe { entry.as_mut() }; if entry_mut.page.is_locked() { @@ -191,6 +194,11 @@ impl DumbLruPageCache { pgno: entry_mut.page.get().id, }); } + if entry_mut.page.is_pinned() && !allow_detach_pinned { + return Err(CacheError::Pinned { + pgno: entry_mut.page.get().id, + }); + } if clean_page { entry_mut.page.clear_loaded(); @@ -201,6 +209,22 @@ impl DumbLruPageCache { Ok(()) } + fn detach( + &mut self, + entry: NonNull, + clean_page: bool, + ) -> Result<(), CacheError> { + self._detach(entry, clean_page, false) + } + + fn detach_even_if_pinned( + &mut self, + entry: NonNull, + clean_page: bool, + ) -> Result<(), CacheError> { + self._detach(entry, clean_page, true) + } + fn unlink(&mut self, mut entry: NonNull) { let (next, prev) = unsafe { let c = entry.as_mut(); @@ -276,7 +300,8 @@ impl DumbLruPageCache { while need_to_evict > 0 && current_opt.is_some() { let current = current_opt.unwrap(); let entry = unsafe { current.as_ref() }; - current_opt = entry.prev; // Pick prev before modifying entry + // Pick prev before modifying entry + current_opt = entry.prev; match self.delete(entry.key.clone()) { Err(_) => {} Ok(_) => need_to_evict -= 1, @@ -296,7 +321,7 @@ impl DumbLruPageCache { self.map.borrow_mut().remove(¤t_entry.as_ref().key); } let next = unsafe { current_entry.as_ref().next }; - self.detach(current_entry, true)?; + self.detach_even_if_pinned(current_entry, true)?; unsafe { assert!(!current_entry.as_ref().page.is_dirty()); } diff --git a/core/storage/pager.rs b/core/storage/pager.rs index 3e227a011..c3f9f1cf9 100644 --- a/core/storage/pager.rs +++ b/core/storage/pager.rs @@ -7,7 +7,7 @@ use crate::storage::sqlite3_ondisk::{self, DatabaseHeader, PageContent, PageType use crate::storage::wal::{CheckpointResult, Wal}; use crate::types::IOResult; use crate::{return_if_io, Completion}; -use crate::{Buffer, Connection, LimboError, Result}; +use crate::{turso_assert, Buffer, Connection, LimboError, Result}; use parking_lot::RwLock; use std::cell::{Cell, OnceCell, RefCell, UnsafeCell}; use std::collections::HashSet; @@ -28,6 +28,7 @@ pub struct PageInner { pub flags: AtomicUsize, pub contents: Option, pub id: usize, + pub pin_count: AtomicUsize, } #[derive(Debug)] @@ -57,6 +58,7 @@ impl Page { flags: AtomicUsize::new(0), contents: None, id, + pin_count: AtomicUsize::new(0), }), } } @@ -139,6 +141,41 @@ impl Page { PageType::TableLeaf | PageType::TableInterior => false, } } + + /// Pin the page to prevent it from being evicted from the page cache. + pub fn pin(&self) { + self.get().pin_count.fetch_add(1, Ordering::Relaxed); + } + + /// Unpin the page to allow it to be evicted from the page cache. + pub fn unpin(&self) { + let was_pinned = self.try_unpin(); + + turso_assert!( + was_pinned, + "Attempted to unpin page {} that was not pinned", + self.get().id + ); + } + + /// Try to unpin the page if it's pinned, otherwise do nothing. + /// Returns true if the page was originally pinned. + pub fn try_unpin(&self) -> bool { + self.get() + .pin_count + .fetch_update(Ordering::SeqCst, Ordering::SeqCst, |current| { + if current == 0 { + None + } else { + Some(current - 1) + } + }) + .is_ok() + } + + pub fn is_pinned(&self) -> bool { + self.get().pin_count.load(Ordering::SeqCst) > 0 + } } #[derive(Clone, Copy, Debug)]