page cache: pin pages on the stack

This commit is contained in:
Jussi Saurio
2025-07-11 16:32:15 +03:00
parent f7b9265c26
commit 8558675c4c
3 changed files with 105 additions and 5 deletions

View File

@@ -5513,6 +5513,10 @@ impl PageStack {
"corrupted database, stack is bigger than expected"
);
assert!(current >= 0);
// Pin the page to prevent it from being evicted while on the stack
page.get().pin();
self.stack.borrow_mut()[current as usize] = Some(page);
self.node_states.borrow_mut()[current as usize] = BTreeNodeState {
cell_idx: starting_cell_idx,
@@ -5525,7 +5529,7 @@ impl PageStack {
/// without having to perform IO to get the ancestor page contents.
///
/// This rests on the assumption that the parent page is already in memory whenever a child is pushed onto the stack.
/// We currently ensure this by pinning interior pages to the page cache so that they cannot be evicted.
/// We currently ensure this by pinning all the pages on [PageStack] to the page cache so that they cannot be evicted.
fn populate_parent_cell_count(&self) {
let stack_empty = self.current_page.get() == -1;
if stack_empty {
@@ -5535,6 +5539,16 @@ impl PageStack {
let stack = self.stack.borrow();
let page = stack[current].as_ref().unwrap();
let page = page.get();
turso_assert!(
page.is_pinned(),
"parent page {} is not pinned",
page.get().id
);
turso_assert!(
page.is_loaded(),
"parent page {} is not loaded",
page.get().id
);
let contents = page.get_contents();
let cell_count = contents.cell_count() as i32;
self.node_states.borrow_mut()[current].cell_count = Some(cell_count);
@@ -5555,6 +5569,12 @@ impl PageStack {
let current = self.current_page.get();
assert!(current >= 0);
tracing::trace!(current);
// Unpin the page before removing it from the stack
if let Some(page) = &self.stack.borrow()[current as usize] {
page.get().unpin();
}
self.node_states.borrow_mut()[current as usize] = BTreeNodeState::default();
self.stack.borrow_mut()[current as usize] = None;
self.decrement_current();
@@ -5623,11 +5643,29 @@ impl PageStack {
self.current_page.get() > 0
}
fn unpin_all_if_pinned(&self) {
self.stack
.borrow_mut()
.iter_mut()
.flatten()
.for_each(|page| {
let _ = page.get().try_unpin();
});
}
fn clear(&self) {
self.unpin_all_if_pinned();
self.current_page.set(-1);
}
}
impl Drop for PageStack {
fn drop(&mut self) {
self.unpin_all_if_pinned();
}
}
/// Used for redistributing cells during a balance operation.
struct CellArray {
/// The actual cell data.

View File

@@ -47,6 +47,7 @@ pub enum CacheError {
InternalError(String),
Locked,
Dirty { pgno: usize },
Pinned { pgno: usize },
ActiveRefs,
Full,
KeyExists,
@@ -134,6 +135,7 @@ impl DumbLruPageCache {
}
let ptr = *self.map.borrow().get(&key).unwrap();
// Try to detach from LRU list first, can fail
self.detach(ptr, clean_page)?;
let ptr = self.map.borrow_mut().remove(&key).unwrap();
@@ -177,10 +179,11 @@ impl DumbLruPageCache {
}
}
fn detach(
fn _detach(
&mut self,
mut entry: NonNull<PageCacheEntry>,
clean_page: bool,
allow_detach_pinned: bool,
) -> Result<(), CacheError> {
let entry_mut = unsafe { entry.as_mut() };
if entry_mut.page.is_locked() {
@@ -191,6 +194,11 @@ impl DumbLruPageCache {
pgno: entry_mut.page.get().id,
});
}
if entry_mut.page.is_pinned() && !allow_detach_pinned {
return Err(CacheError::Pinned {
pgno: entry_mut.page.get().id,
});
}
if clean_page {
entry_mut.page.clear_loaded();
@@ -201,6 +209,22 @@ impl DumbLruPageCache {
Ok(())
}
fn detach(
&mut self,
entry: NonNull<PageCacheEntry>,
clean_page: bool,
) -> Result<(), CacheError> {
self._detach(entry, clean_page, false)
}
fn detach_even_if_pinned(
&mut self,
entry: NonNull<PageCacheEntry>,
clean_page: bool,
) -> Result<(), CacheError> {
self._detach(entry, clean_page, true)
}
fn unlink(&mut self, mut entry: NonNull<PageCacheEntry>) {
let (next, prev) = unsafe {
let c = entry.as_mut();
@@ -276,7 +300,8 @@ impl DumbLruPageCache {
while need_to_evict > 0 && current_opt.is_some() {
let current = current_opt.unwrap();
let entry = unsafe { current.as_ref() };
current_opt = entry.prev; // Pick prev before modifying entry
// Pick prev before modifying entry
current_opt = entry.prev;
match self.delete(entry.key.clone()) {
Err(_) => {}
Ok(_) => need_to_evict -= 1,
@@ -296,7 +321,7 @@ impl DumbLruPageCache {
self.map.borrow_mut().remove(&current_entry.as_ref().key);
}
let next = unsafe { current_entry.as_ref().next };
self.detach(current_entry, true)?;
self.detach_even_if_pinned(current_entry, true)?;
unsafe {
assert!(!current_entry.as_ref().page.is_dirty());
}

View File

@@ -7,7 +7,7 @@ use crate::storage::sqlite3_ondisk::{self, DatabaseHeader, PageContent, PageType
use crate::storage::wal::{CheckpointResult, Wal};
use crate::types::IOResult;
use crate::{return_if_io, Completion};
use crate::{Buffer, Connection, LimboError, Result};
use crate::{turso_assert, Buffer, Connection, LimboError, Result};
use parking_lot::RwLock;
use std::cell::{Cell, OnceCell, RefCell, UnsafeCell};
use std::collections::HashSet;
@@ -28,6 +28,7 @@ pub struct PageInner {
pub flags: AtomicUsize,
pub contents: Option<PageContent>,
pub id: usize,
pub pin_count: AtomicUsize,
}
#[derive(Debug)]
@@ -57,6 +58,7 @@ impl Page {
flags: AtomicUsize::new(0),
contents: None,
id,
pin_count: AtomicUsize::new(0),
}),
}
}
@@ -139,6 +141,41 @@ impl Page {
PageType::TableLeaf | PageType::TableInterior => false,
}
}
/// Pin the page to prevent it from being evicted from the page cache.
pub fn pin(&self) {
self.get().pin_count.fetch_add(1, Ordering::Relaxed);
}
/// Unpin the page to allow it to be evicted from the page cache.
pub fn unpin(&self) {
let was_pinned = self.try_unpin();
turso_assert!(
was_pinned,
"Attempted to unpin page {} that was not pinned",
self.get().id
);
}
/// Try to unpin the page if it's pinned, otherwise do nothing.
/// Returns true if the page was originally pinned.
pub fn try_unpin(&self) -> bool {
self.get()
.pin_count
.fetch_update(Ordering::SeqCst, Ordering::SeqCst, |current| {
if current == 0 {
None
} else {
Some(current - 1)
}
})
.is_ok()
}
pub fn is_pinned(&self) -> bool {
self.get().pin_count.load(Ordering::SeqCst) > 0
}
}
#[derive(Clone, Copy, Debug)]