From 03d5598cfbde33a1e4ca99403011f53670abeb62 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Mon, 1 Sep 2025 22:57:25 -0400 Subject: [PATCH 01/13] Use sieve algorithm in page cache in place of full LRU --- core/lib.rs | 10 +- core/storage/btree.rs | 4 +- core/storage/page_cache.rs | 1845 +++++++++++++++++++----------------- core/storage/pager.rs | 22 +- core/vdbe/execute.rs | 4 +- 5 files changed, 1009 insertions(+), 876 deletions(-) diff --git a/core/lib.rs b/core/lib.rs index 2f32774df..e08cfa026 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -80,7 +80,7 @@ use std::{ use storage::database::DatabaseFile; pub use storage::database::IOContext; pub use storage::encryption::{EncryptionContext, EncryptionKey}; -use storage::page_cache::DumbLruPageCache; +use storage::page_cache::PageCache; use storage::pager::{AtomicDbState, DbState}; use storage::sqlite3_ondisk::PageSize; pub use storage::{ @@ -187,7 +187,7 @@ pub struct Database { buffer_pool: Arc, // Shared structures of a Database are the parts that are common to multiple threads that might // create DB connections. - _shared_page_cache: Arc>, + _shared_page_cache: Arc>, shared_wal: Arc>, db_state: Arc, init_lock: Arc>, @@ -385,7 +385,7 @@ impl Database { DbState::Initialized }; - let shared_page_cache = Arc::new(RwLock::new(DumbLruPageCache::default())); + let shared_page_cache = Arc::new(RwLock::new(PageCache::default())); let syms = SymbolTable::new(); let arena_size = if std::env::var("TESTING").is_ok_and(|v| v.eq_ignore_ascii_case("true")) { BufferPool::TEST_ARENA_SIZE @@ -576,7 +576,7 @@ impl Database { self.db_file.clone(), Some(wal), self.io.clone(), - Arc::new(RwLock::new(DumbLruPageCache::default())), + Arc::new(RwLock::new(PageCache::default())), buffer_pool.clone(), db_state, self.init_lock.clone(), @@ -599,7 +599,7 @@ impl Database { self.db_file.clone(), None, self.io.clone(), - Arc::new(RwLock::new(DumbLruPageCache::default())), + Arc::new(RwLock::new(PageCache::default())), buffer_pool.clone(), db_state, Arc::new(Mutex::new(())), diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 8e52ef3ba..15d50e70c 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -7376,7 +7376,7 @@ mod tests { schema::IndexColumn, storage::{ database::DatabaseFile, - page_cache::DumbLruPageCache, + page_cache::PageCache, pager::{AtomicDbState, DbState}, sqlite3_ondisk::PageSize, }, @@ -8631,7 +8631,7 @@ mod tests { db_file, Some(wal), io, - Arc::new(parking_lot::RwLock::new(DumbLruPageCache::new(10))), + Arc::new(parking_lot::RwLock::new(PageCache::new(10))), buffer_pool, Arc::new(AtomicDbState::new(DbState::Uninitialized)), Arc::new(Mutex::new(())), diff --git a/core/storage/page_cache.rs b/core/storage/page_cache.rs index f4b3f7fe9..886fdf20c 100644 --- a/core/storage/page_cache.rs +++ b/core/storage/page_cache.rs @@ -1,8 +1,9 @@ +use std::cell::Cell; +use std::cell::RefCell; use std::sync::atomic::Ordering; -use std::{cell::RefCell, ptr::NonNull}; use std::sync::Arc; -use tracing::{debug, trace}; +use tracing::trace; use crate::turso_assert; @@ -12,41 +13,87 @@ use super::pager::PageRef; const DEFAULT_PAGE_CACHE_SIZE_IN_PAGES_MAKE_ME_SMALLER_ONCE_WAL_SPILL_IS_IMPLEMENTED: usize = 100000; -#[derive(Debug, Eq, Hash, PartialEq, Clone, Copy)] -pub struct PageCacheKey { - pgno: usize, -} +#[derive(Debug, Copy, Eq, Hash, PartialEq, Clone)] +#[repr(transparent)] +pub struct PageCacheKey(usize); -#[allow(dead_code)] +type SlotIndex = usize; + +const NULL: SlotIndex = usize::MAX; + +#[derive(Clone, Debug)] struct PageCacheEntry { + /// Key identifying this page key: PageCacheKey, - page: PageRef, - prev: Option>, - next: Option>, + /// The cached page, None if this slot is free + page: Option, + /// Reference bit for SIEVE algorithm - set on access, cleared during eviction scan + ref_bit: Cell, + /// Index of next entry in SIEVE queue (newer/toward head) + next: Cell, + /// Index of previous entry in SIEVE queue (older/toward tail) + prev: Cell, } -pub struct DumbLruPageCache { - capacity: usize, - map: RefCell, - head: RefCell>>, - tail: RefCell>>, +impl Default for PageCacheEntry { + fn default() -> Self { + Self { + key: PageCacheKey(0), + page: None, + ref_bit: Cell::new(false), + next: Cell::new(NULL), + prev: Cell::new(NULL), + } + } } -unsafe impl Send for DumbLruPageCache {} -unsafe impl Sync for DumbLruPageCache {} + +impl PageCacheEntry { + #[inline] + fn empty() -> Self { + Self::default() + } + #[inline] + fn reset_links(&self) { + self.next.set(NULL); + self.prev.set(NULL); + } +} + +/// PageCache implements the SIEVE eviction algorithm, a simpler and more efficient +/// alternative to LRU that achieves comparable or better hit ratios with lower overhead. +/// +/// # Algorithm Overview +/// +/// SIEVE maintains a queue of cached pages and uses a "second chance" mechanism: +/// - New pages enter at the head (MRU position) +/// - Eviction candidates are examined from the tail (LRU position) +/// - Each page has a reference bit that is set when accessed +/// - During eviction, if a page's reference bit is set, it gets a "second chance": +/// the bit is cleared and the page moves to the head +/// - Pages with clear reference bits are evicted immediately +pub struct PageCache { + /// Capacity in pages + capacity: usize, + /// Map of Key -> SlotIndex in entries array + map: RefCell, + /// Pointers to intrusive doubly-linked list for eviction order (head=MRU, tail=LRU) + head: Cell, + tail: Cell, + /// Fixed-size vec holding page entries + entries: RefCell>, + /// Free list: Stack of available slot indices + freelist: RefCell>, +} + +unsafe impl Send for PageCache {} +unsafe impl Sync for PageCache {} struct PageHashMap { - // FIXME: do we prefer array buckets or list? Deletes will be slower here which I guess happens often. I will do this for now to test how well it does. buckets: Vec>, capacity: usize, size: usize, } -#[derive(Clone)] -struct HashMapNode { - key: PageCacheKey, - value: NonNull, -} - #[derive(Debug, Clone, PartialEq, thiserror::Error)] pub enum CacheError { #[error("{0}")] @@ -73,28 +120,81 @@ pub enum CacheResizeResult { impl PageCacheKey { pub fn new(pgno: usize) -> Self { - Self { pgno } + Self(pgno) } } -impl DumbLruPageCache { + +impl PageCache { pub fn new(capacity: usize) -> Self { - assert!(capacity > 0, "capacity of cache should be at least 1"); + assert!(capacity > 0); + let freelist = (0..capacity).rev().collect::>(); Self { capacity, map: RefCell::new(PageHashMap::new(capacity)), - head: RefCell::new(None), - tail: RefCell::new(None), + head: Cell::new(NULL), + tail: Cell::new(NULL), + entries: RefCell::new(vec![PageCacheEntry::empty(); capacity]), + freelist: RefCell::new(freelist), } } - pub fn contains_key(&mut self, key: &PageCacheKey) -> bool { + #[inline] + fn link_front(&self, slot: SlotIndex) { + let entries = self.entries.borrow(); + let old_head = self.head.replace(slot); + + entries[slot].next.set(old_head); + entries[slot].prev.set(NULL); + + if old_head != NULL { + entries[old_head].prev.set(slot); + } else { + // list was empty + self.tail.set(slot); + } + } + + #[inline] + fn unlink(&self, slot: SlotIndex) { + let entries = self.entries.borrow(); + + let p = entries[slot].prev.get(); + let n = entries[slot].next.get(); + + if p != NULL { + entries[p].next.set(n); + } else { + self.head.set(n); + } + if n != NULL { + entries[n].prev.set(p); + } else { + self.tail.set(p); + } + + entries[slot].reset_links(); + } + + #[inline] + fn move_to_front(&self, slot: SlotIndex) { + if self.head.get() == slot { + return; + } + turso_assert!(self.entries.borrow()[slot].page.is_some(), "must be linked"); + self.unlink(slot); + self.link_front(slot); + } + + pub fn contains_key(&self, key: &PageCacheKey) -> bool { self.map.borrow().contains_key(key) } + #[inline] pub fn insert(&mut self, key: PageCacheKey, value: PageRef) -> Result<(), CacheError> { self._insert(key, value, false) } + #[inline] pub fn insert_ignore_existing( &mut self, key: PageCacheKey, @@ -111,208 +211,231 @@ impl DumbLruPageCache { ) -> Result<(), CacheError> { trace!("insert(key={:?})", key); // Check first if page already exists in cache - let existing_ptr = self.map.borrow().get(&key).copied(); - if let Some(ptr) = existing_ptr { + if let Some(slot) = self.map.borrow().get(&key) { if !ignore_exists { - if let Some(existing_page_ref) = self.get(&key)? { - assert!( - Arc::ptr_eq(&value, &existing_page_ref), - "Attempted to insert different page with same key: {key:?}" - ); - return Err(CacheError::KeyExists); - } - } else { - // ignore_exists is called when the existing entry needs to be updated in place - unsafe { - let entry = ptr.as_ptr(); - (*entry).page = value; - } - self.unlink(ptr); - self.touch(ptr); - return Ok(()); + let existing = self.entries.borrow()[slot] + .page + .as_ref() + .expect("map points to empty slot") + .clone(); + turso_assert!( + Arc::ptr_eq(&existing, &value), + "Attempted to insert different page with same key: {key:?}" + ); + return Err(CacheError::KeyExists); } } - // Key doesn't exist, proceed with new entry self.make_room_for(1)?; - let entry = Box::new(PageCacheEntry { - key, - next: None, - prev: None, - page: value, - }); - let ptr_raw = Box::into_raw(entry); - let ptr = unsafe { NonNull::new_unchecked(ptr_raw) }; - self.touch(ptr); - self.map.borrow_mut().insert(key, ptr); + let slot_index = self.find_free_slot()?; + { + let mut entries = self.entries.borrow_mut(); + let s = &mut entries[slot_index]; + turso_assert!(s.page.is_none(), "page must be None in free slot"); + s.key = key; + s.page = Some(value); + // Sieve ref bit starts cleared, will be set on first access + s.ref_bit.set(false); + } + self.map.borrow_mut().insert(key, slot_index); + self.link_front(slot_index); Ok(()) } + fn find_free_slot(&self) -> Result { + let slot = self.freelist.borrow_mut().pop().ok_or_else(|| { + CacheError::InternalError("No free slots available after make_room_for".into()) + })?; + #[cfg(debug_assertions)] + { + let entries = self.entries.borrow(); + turso_assert!( + entries[slot].page.is_none(), + "allocating non-free slot {}", + slot + ); + } + // Reset linkage on entry itself + self.entries.borrow()[slot].reset_links(); + Ok(slot) + } + + fn _delete(&mut self, key: PageCacheKey, clean_page: bool) -> Result<(), CacheError> { + if !self.contains_key(&key) { + return Ok(()); + } + + let (entry, slot_idx) = { + let map = self.map.borrow(); + let idx = map.get(&key).ok_or_else(|| { + CacheError::InternalError("Key exists but not found in map".into()) + })?; + ( + self.entries.borrow()[idx] + .page + .as_ref() + .expect("page in map was None") + .clone(), + idx, + ) + }; + + if entry.is_locked() { + return Err(CacheError::Locked { + pgno: entry.get().id, + }); + } + if entry.is_dirty() { + return Err(CacheError::Dirty { + pgno: entry.get().id, + }); + } + if entry.is_pinned() { + return Err(CacheError::Pinned { + pgno: entry.get().id, + }); + } + + if clean_page { + entry.clear_loaded(); + let _ = entry.get().contents.take(); + } + + self.unlink(slot_idx); + self.map.borrow_mut().remove(&key); + { + let e = &mut self.entries.borrow_mut()[slot_idx]; + e.page = None; + e.ref_bit.set(false); + e.reset_links(); + self.freelist.borrow_mut().push(slot_idx); + } + Ok(()) + } + + #[inline] + /// Deletes a page from the cache pub fn delete(&mut self, key: PageCacheKey) -> Result<(), CacheError> { trace!("cache_delete(key={:?})", key); self._delete(key, true) } - // Returns Ok if key is not found - pub fn _delete(&mut self, key: PageCacheKey, clean_page: bool) -> Result<(), CacheError> { - if !self.contains_key(&key) { - return Ok(()); - } - - let ptr = *self.map.borrow().get(&key).unwrap(); - - // Try to detach from LRU list first, can fail - self.detach(ptr, clean_page)?; - let ptr = self.map.borrow_mut().remove(&key).unwrap(); - unsafe { - let _ = Box::from_raw(ptr.as_ptr()); + #[inline] + pub fn get(&mut self, key: &PageCacheKey) -> crate::Result> { + let Some(slot) = self.map.borrow().get(key) else { + return Ok(None); }; - Ok(()) - } - - fn get_ptr(&mut self, key: &PageCacheKey) -> Option> { - let m = self.map.borrow_mut(); - let ptr = m.get(key); - ptr.copied() - } - - pub fn get(&mut self, key: &PageCacheKey) -> Result, CacheError> { - if let Some(page) = self.peek(key, true) { - // Because we can abort a read_page completion, this means a page can be in the cache but be unloaded and unlocked. - // However, if we do not evict that page from the page cache, we will return an unloaded page later which will trigger - // assertions later on. This is worsened by the fact that page cache is not per `Statement`, so you can abort a completion - // in one Statement, and trigger some error in the next one if we don't evict the page here. - if !page.is_loaded() && !page.is_locked() { - self.delete(*key)?; - Ok(None) - } else { - Ok(Some(page)) - } - } else { + // Because we can abort a read_page completion, this means a page can be in the cache but be unloaded and unlocked. + // However, if we do not evict that page from the page cache, we will return an unloaded page later which will trigger + // assertions later on. This is worsened by the fact that page cache is not per `Statement`, so you can abort a completion + // in one Statement, and trigger some error in the next one if we don't evict the page here. + let page = self.entries.borrow()[slot] + .page + .as_ref() + .expect("page in the map to exist") + .clone(); + if !page.is_loaded() && !page.is_locked() { + self.delete(*key)?; Ok(None) + } else { + Ok(Some(page)) } } - /// Get page without promoting entry - pub fn peek(&mut self, key: &PageCacheKey, touch: bool) -> Option { - trace!("cache_get(key={:?})", key); - let mut ptr = self.get_ptr(key)?; - let page = unsafe { ptr.as_mut().page.clone() }; + #[inline] + pub fn peek(&self, key: &PageCacheKey, touch: bool) -> Option { + let slot = self.map.borrow().get(key)?; + let entries = self.entries.borrow(); + let page = entries[slot].page.as_ref()?.clone(); if touch { - self.unlink(ptr); - self.touch(ptr); + // set reference bit to 'touch' page + entries[slot].ref_bit.set(true); } Some(page) } - // To match SQLite behavior, just set capacity and try to shrink as much as possible. - // In case of failure, the caller should request further evictions (e.g. after I/O). - pub fn resize(&mut self, capacity: usize) -> CacheResizeResult { - let new_map = self.map.borrow().rehash(capacity); + /// Resizes the cache to a new capacity + /// + /// If shrinking, attempts to evict pages using the SIEVE algorithm. + /// If growing, simply increases capacity. + pub fn resize(&mut self, new_cap: usize) -> CacheResizeResult { + if new_cap == self.capacity { + return CacheResizeResult::Done; + } + assert!(new_cap > 0); + // Collect survivors as payload, no linkage + struct Payload { + key: PageCacheKey, + page: PageRef, + ref_bit: bool, + } + let survivors: Vec = { + let entries = self.entries.borrow(); + let mut v = Vec::with_capacity(self.len()); + // walk tail..head to preserve recency when re-linking via link_front + let mut cur = self.tail.get(); + while cur != NULL { + let e = &entries[cur]; + if let Some(ref p) = e.page { + v.push(Payload { + key: e.key, + page: p.clone(), + ref_bit: e.ref_bit.get(), + }); + } + cur = entries[cur].prev.get(); + } + v + }; + + // Resize entry array; reset heads + self.entries + .borrow_mut() + .resize(new_cap, PageCacheEntry::empty()); + self.capacity = new_cap; + let mut new_map = PageHashMap::new(new_cap); + self.head.set(NULL); + self.tail.set(NULL); + + // Repack compactly: survivors[tail..head] pushed to front -> final order == original + { + let mut entries_mut = self.entries.borrow_mut(); + for (slot, pl) in survivors.iter().enumerate().take(new_cap) { + let e = &mut entries_mut[slot]; + e.key = pl.key; + e.page = Some(pl.page.clone()); + e.ref_bit.set(pl.ref_bit); + e.reset_links(); + new_map.insert(pl.key, slot); + } + } + for slot in 0..survivors.len().min(new_cap) { + self.link_front(slot); + } self.map.replace(new_map); - self.capacity = capacity; - match self.make_room_for(0) { - Ok(_) => CacheResizeResult::Done, - Err(_) => CacheResizeResult::PendingEvictions, - } - } - fn _detach( - &mut self, - mut entry: NonNull, - clean_page: bool, - allow_detach_pinned: bool, - ) -> Result<(), CacheError> { - let entry_mut = unsafe { entry.as_mut() }; - if entry_mut.page.is_locked() { - return Err(CacheError::Locked { - pgno: entry_mut.page.get().id, - }); - } - if entry_mut.page.is_dirty() { - return Err(CacheError::Dirty { - pgno: entry_mut.page.get().id, - }); - } - if entry_mut.page.is_pinned() && !allow_detach_pinned { - return Err(CacheError::Pinned { - pgno: entry_mut.page.get().id, - }); - } - - if clean_page { - entry_mut.page.clear_loaded(); - debug!("clean(page={})", entry_mut.page.get().id); - let _ = entry_mut.page.get().contents.take(); - } - self.unlink(entry); - Ok(()) - } - - fn detach( - &mut self, - entry: NonNull, - clean_page: bool, - ) -> Result<(), CacheError> { - self._detach(entry, clean_page, false) - } - - fn detach_even_if_pinned( - &mut self, - entry: NonNull, - clean_page: bool, - ) -> Result<(), CacheError> { - self._detach(entry, clean_page, true) - } - - fn unlink(&mut self, mut entry: NonNull) { - let (next, prev) = unsafe { - let c = entry.as_mut(); - let next = c.next; - let prev = c.prev; - c.prev = None; - c.next = None; - (next, prev) - }; - - match (prev, next) { - (None, None) => { - self.head.replace(None); - self.tail.replace(None); - } - (None, Some(mut n)) => { - unsafe { n.as_mut().prev = None }; - self.head.borrow_mut().replace(n); - } - (Some(mut p), None) => { - unsafe { p.as_mut().next = None }; - self.tail = RefCell::new(Some(p)); - } - (Some(mut p), Some(mut n)) => unsafe { - let p_mut = p.as_mut(); - p_mut.next = Some(n); - let n_mut = n.as_mut(); - n_mut.prev = Some(p); - }, - }; - } - - /// inserts into head, assuming we detached first - fn touch(&mut self, mut entry: NonNull) { - if let Some(mut head) = *self.head.borrow_mut() { - unsafe { - entry.as_mut().next.replace(head); - let head = head.as_mut(); - head.prev = Some(entry); + // Rebuild freelist + let used = survivors.len().min(new_cap); + { + let mut fl = self.freelist.borrow_mut(); + fl.clear(); + for i in (used..new_cap).rev() { + fl.push(i); } } - if self.tail.borrow().is_none() { - self.tail.borrow_mut().replace(entry); - } - self.head.borrow_mut().replace(entry); + CacheResizeResult::Done } + /// Ensures at least `n` free slots are available + /// + /// Uses the SIEVE algorithm to evict pages if necessary: + /// 1. Start at tail (LRU position) + /// 2. If page is marked, unmark and move to head (second chance) + /// 3. If page is unmarked, evict it + /// 4. If page is unevictable (dirty/locked/pinned), move to head + /// + /// Returns `CacheError::Full` if not enough pages can be evicted pub fn make_room_for(&mut self, n: usize) -> Result<(), CacheError> { if n > self.capacity { return Err(CacheError::Full); @@ -324,117 +447,131 @@ impl DumbLruPageCache { return Ok(()); } - let tail = self.tail.borrow().ok_or_else(|| { - CacheError::InternalError(format!( - "Page cache of len {} expected to have a tail pointer", - self.len() - )) - })?; + let mut need = n - available; + let mut examined = 0usize; + let max_examinations = self.len().saturating_mul(2); - // Handle len > capacity, too - let available = self.capacity.saturating_sub(len); - let x = n.saturating_sub(available); - let mut need_to_evict = x.saturating_add(len.saturating_sub(self.capacity)); + while need > 0 && examined < max_examinations { + let tail_idx = match self.tail.get() { + NULL => { + return Err(CacheError::InternalError( + "Tail is None but map not empty".into(), + )) + } + t => t, + }; - let mut current_opt = Some(tail); - while need_to_evict > 0 && current_opt.is_some() { - let current = current_opt.unwrap(); - let entry = unsafe { current.as_ref() }; - // Pick prev before modifying entry - current_opt = entry.prev; - match self.delete(entry.key) { - Err(_) => {} - Ok(_) => need_to_evict -= 1, + let (was_marked, key) = { + let mut entries = self.entries.borrow_mut(); + let s = &mut entries[tail_idx]; + turso_assert!(s.page.is_some(), "tail points to empty slot"); + (s.ref_bit.replace(false), s.key) + }; + + examined += 1; + + if was_marked { + self.move_to_front(tail_idx); + continue; + } + + match self._delete(key, true) { + Ok(_) => { + need -= 1; + examined = 0; + } + Err( + CacheError::Dirty { .. } + | CacheError::Locked { .. } + | CacheError::Pinned { .. }, + ) => { + self.move_to_front(tail_idx); + } + Err(e) => return Err(e), } } - - match need_to_evict > 0 { - true => Err(CacheError::Full), - false => Ok(()), + if need > 0 { + return Err(CacheError::Full); } + Ok(()) } pub fn clear(&mut self) -> Result<(), CacheError> { - let mut current = *self.head.borrow(); - while let Some(current_entry) = current { - unsafe { - self.map.borrow_mut().remove(¤t_entry.as_ref().key); + for e in self.entries.borrow().iter() { + if let Some(ref p) = e.page { + if p.is_dirty() { + return Err(CacheError::Dirty { pgno: p.get().id }); + } + p.clear_loaded(); + let _ = p.get().contents.take(); + } + } + self.entries.borrow_mut().fill(PageCacheEntry::empty()); + self.map.borrow_mut().clear(); + self.head.set(NULL); + self.tail.set(NULL); + { + let mut fl = self.freelist.borrow_mut(); + fl.clear(); + for i in (0..self.capacity).rev() { + fl.push(i); } - let next = unsafe { current_entry.as_ref().next }; - self.detach_even_if_pinned(current_entry, true)?; - unsafe { - assert!(!current_entry.as_ref().page.is_dirty()); - } - unsafe { - let _ = Box::from_raw(current_entry.as_ptr()); - }; - current = next; } - let _ = self.head.take(); - let _ = self.tail.take(); - - assert!(self.head.borrow().is_none()); - assert!(self.tail.borrow().is_none()); - assert!(self.map.borrow().is_empty()); Ok(()) } /// Removes all pages from the cache with pgno greater than len pub fn truncate(&mut self, len: usize) -> Result<(), CacheError> { - let head_ptr = *self.head.borrow(); - let mut current = head_ptr; - while let Some(node) = current { - let node_ref = unsafe { node.as_ref() }; - - current = node_ref.next; - if node_ref.key.pgno <= len { - continue; - } - - self.map.borrow_mut().remove(&node_ref.key); - turso_assert!(!node_ref.page.is_dirty(), "page must be clean"); - turso_assert!(!node_ref.page.is_locked(), "page must be unlocked"); - turso_assert!(!node_ref.page.is_pinned(), "page must be unpinned"); - self.detach(node, true)?; - - unsafe { - let _ = Box::from_raw(node.as_ptr()); - } + let keys_to_delete: Vec = { + self.entries + .borrow() + .iter() + .filter_map(|entry| { + entry.page.as_ref().and({ + if entry.key.0 > len { + Some(entry.key) + } else { + None + } + }) + }) + .collect() + }; + for key in keys_to_delete.iter() { + self.delete(*key)?; } Ok(()) } pub fn print(&self) { tracing::debug!("page_cache_len={}", self.map.borrow().len()); - let head_ptr = *self.head.borrow(); - let mut current = head_ptr; - while let Some(node) = current { - unsafe { + let entries = self.entries.borrow(); + + for (i, entry_opt) in entries.iter().enumerate() { + if let Some(ref page) = entry_opt.page { tracing::debug!( - "page={:?}, flags={}, pin_count={}", - node.as_ref().key, - node.as_ref().page.get().flags.load(Ordering::SeqCst), - node.as_ref().page.get().pin_count.load(Ordering::SeqCst), + "slot={}, page={:?}, flags={}, pin_count={}, ref_bit={}", + i, + entry_opt.key, + page.get().flags.load(Ordering::Relaxed), + page.get().pin_count.load(Ordering::Relaxed), + entry_opt.ref_bit.get(), ); - let node_ref = node.as_ref(); - current = node_ref.next; } } } #[cfg(test)] pub fn keys(&mut self) -> Vec { - let mut this_keys = Vec::new(); - let head_ptr = *self.head.borrow(); - let mut current = head_ptr; - while let Some(node) = current { - unsafe { - this_keys.push(node.as_ref().key); - let node_ref = node.as_ref(); - current = node_ref.next; + let mut keys = Vec::with_capacity(self.len()); + let entries = self.entries.borrow(); + for entry in entries.iter() { + if entry.page.is_none() { + continue; } + keys.push(entry.key); } - this_keys + keys } pub fn len(&self) -> usize { @@ -445,146 +582,129 @@ impl DumbLruPageCache { self.capacity } - #[cfg(test)] - fn get_entry_ptr(&self, key: &PageCacheKey) -> Option> { - self.map.borrow().get(key).copied() - } - - #[cfg(test)] - fn verify_list_integrity(&self) { - let map_len = self.map.borrow().len(); - let head_ptr = *self.head.borrow(); - let tail_ptr: Option> = *self.tail.borrow(); - - if map_len == 0 { - assert!(head_ptr.is_none(), "Head should be None when map is empty"); - assert!(tail_ptr.is_none(), "Tail should be None when map is empty"); - return; - } - - assert!( - head_ptr.is_some(), - "Head should be Some when map is not empty" - ); - assert!( - tail_ptr.is_some(), - "Tail should be Some when map is not empty" - ); - - unsafe { - assert!( - head_ptr.unwrap().as_ref().prev.is_none(), - "Head's prev pointer mismatch" - ); - } - - unsafe { - assert!( - tail_ptr.unwrap().as_ref().next.is_none(), - "Tail's next pointer mismatch" - ); - } - - // Forward traversal - let mut forward_count = 0; - let mut current = head_ptr; - let mut last_ptr: Option> = None; - while let Some(node) = current { - forward_count += 1; - unsafe { - let node_ref = node.as_ref(); - assert_eq!( - node_ref.prev, last_ptr, - "Backward pointer mismatch during forward traversal for key {:?}", - node_ref.key - ); - assert!( - self.map.borrow().contains_key(&node_ref.key), - "Node key {:?} not found in map during forward traversal", - node_ref.key - ); - assert_eq!( - self.map.borrow().get(&node_ref.key).copied(), - Some(node), - "Map pointer mismatch for key {:?}", - node_ref.key - ); - - last_ptr = Some(node); - current = node_ref.next; - } - - if forward_count > map_len + 5 { - panic!( - "Infinite loop suspected in forward integrity check. Size {map_len}, count {forward_count}" - ); - } - } - assert_eq!( - forward_count, map_len, - "Forward count mismatch (counted {forward_count}, map has {map_len})" - ); - assert_eq!( - tail_ptr, last_ptr, - "Tail pointer mismatch after forward traversal" - ); - - // Backward traversal - let mut backward_count = 0; - current = tail_ptr; - last_ptr = None; - while let Some(node) = current { - backward_count += 1; - unsafe { - let node_ref = node.as_ref(); - assert_eq!( - node_ref.next, last_ptr, - "Forward pointer mismatch during backward traversal for key {:?}", - node_ref.key - ); - assert!( - self.map.borrow().contains_key(&node_ref.key), - "Node key {:?} not found in map during backward traversal", - node_ref.key - ); - - last_ptr = Some(node); - current = node_ref.prev; - } - if backward_count > map_len + 5 { - panic!( - "Infinite loop suspected in backward integrity check. Size {map_len}, count {backward_count}" - ); - } - } - assert_eq!( - backward_count, map_len, - "Backward count mismatch (counted {backward_count}, map has {map_len})" - ); - assert_eq!( - head_ptr, last_ptr, - "Head pointer mismatch after backward traversal" - ); - } - pub fn unset_dirty_all_pages(&mut self) { - for node in self.map.borrow_mut().iter_mut() { - unsafe { - let entry = node.value.as_mut(); - entry.page.clear_dirty() - }; + let entries = self.entries.borrow(); + for entry in entries.iter() { + if entry.page.is_none() { + continue; + } + entry.page.as_ref().unwrap().clear_dirty(); } } + + #[cfg(test)] + fn verify_cache_integrity(&self) { + let entries = self.entries.borrow(); + let map = self.map.borrow(); + + let head = self.head.get(); + let tail = self.tail.get(); + + // Head/tail base constraints + if head == NULL { + assert_eq!(tail, NULL, "tail must be NULL when head is NULL"); + assert_eq!(map.len(), 0, "map not empty but list is empty"); + } else { + assert_eq!(entries[head].prev.get(), NULL, "head.prev must be NULL"); + } + if tail != NULL { + assert_eq!(entries[tail].next.get(), NULL, "tail.next must be NULL"); + } + + // 0 = unseen, 1 = freelist, 2 = sieve + let mut seen = vec![0u8; self.capacity]; + + // Walk SIEVE forward from head, check links and detect cycles + let mut cnt = 0usize; + let mut cur = head; + let mut prev = NULL; + let mut hops = 0usize; + while cur != NULL { + hops += 1; + assert!(hops <= self.capacity, "SIEVE cycle detected"); + let e = &entries[cur]; + + assert!(e.page.is_some(), "SIEVE points to empty slot {cur}"); + assert_eq!(e.prev.get(), prev, "prev link broken at slot {cur}"); + assert_eq!(seen[cur], 0, "slot {cur} appears twice (SIEVE/freelist)"); + + seen[cur] = 2; + cnt += 1; + prev = cur; + cur = e.next.get(); + } + assert_eq!(tail, prev, "tail mismatch"); + assert_eq!( + cnt, + map.len(), + "list length {} != map size {}", + cnt, + map.len() + ); + + // Map bijection: every map entry must be on the SIEVE list with matching key + for node in map.iter() { + let slot = node.slot_index; + assert!( + entries[slot].page.is_some(), + "map points to empty slot {cur}", + ); + assert_eq!( + entries[slot].key, node.key, + "map key mismatch at slot {cur}", + ); + assert_eq!(seen[slot], 2, "map slot {slot} not on SIEVE list"); + } + + // Freelist disjointness and shape: free slots must be unlinked and empty + let freelist = self.freelist.borrow(); + let mut free_count = 0usize; + for &s in freelist.iter() { + free_count += 1; + assert_eq!(seen[s], 0, "slot {s} in both freelist and SIEVE"); + assert!(entries[s].page.is_none(), "freelist slot {s} has a page"); + assert_eq!( + entries[s].next.get(), + NULL, + "freelist slot {s} next != NULL", + ); + assert_eq!( + entries[s].prev.get(), + NULL, + "freelist slot {s} prev != NULL", + ); + seen[s] = 1; + } + + // No orphans; partition covers capacity + let orphans = seen.iter().filter(|&&v| v == 0).count(); + assert_eq!(orphans, 0, "orphan slots detected: {orphans}"); + assert_eq!( + free_count + cnt, + self.capacity, + "free {} + sieve {} != capacity {}", + free_count, + cnt, + self.capacity + ); + } } -impl Default for DumbLruPageCache { +impl Default for PageCache { fn default() -> Self { - DumbLruPageCache::new( + PageCache::new( DEFAULT_PAGE_CACHE_SIZE_IN_PAGES_MAKE_ME_SMALLER_ONCE_WAL_SPILL_IS_IMPLEMENTED, ) } } +#[derive(Clone)] +struct HashMapNode { + key: PageCacheKey, + slot_index: SlotIndex, +} + +#[allow(dead_code)] impl PageHashMap { pub fn new(capacity: usize) -> PageHashMap { PageHashMap { @@ -594,26 +714,20 @@ impl PageHashMap { } } - /// Insert page into hashmap. If a key was already in the hashmap, then update it and return the previous value. - pub fn insert( - &mut self, - key: PageCacheKey, - value: NonNull, - ) -> Option> { + pub fn insert(&mut self, key: PageCacheKey, slot_index: SlotIndex) { let bucket = self.hash(&key); let bucket = &mut self.buckets[bucket]; let mut idx = 0; while let Some(node) = bucket.get_mut(idx) { if node.key == key { - let prev = node.value; - node.value = value; - return Some(prev); + node.slot_index = slot_index; + node.key = key; + return; } idx += 1; } - bucket.push(HashMapNode { key, value }); + bucket.push(HashMapNode { key, slot_index }); self.size += 1; - None } pub fn contains_key(&self, key: &PageCacheKey) -> bool { @@ -621,20 +735,18 @@ impl PageHashMap { self.buckets[bucket].iter().any(|node| node.key == *key) } - pub fn get(&self, key: &PageCacheKey) -> Option<&NonNull> { + pub fn get(&self, key: &PageCacheKey) -> Option { let bucket = self.hash(key); let bucket = &self.buckets[bucket]; - let mut idx = 0; - while let Some(node) = bucket.get(idx) { + for node in bucket { if node.key == *key { - return Some(&node.value); + return Some(node.slot_index); } - idx += 1; } None } - pub fn remove(&mut self, key: &PageCacheKey) -> Option> { + pub fn remove(&mut self, key: &PageCacheKey) -> Option { let bucket = self.hash(key); let bucket = &mut self.buckets[bucket]; let mut idx = 0; @@ -649,38 +761,37 @@ impl PageHashMap { } else { let v = bucket.remove(idx); self.size -= 1; - Some(v.value) + Some(v.slot_index) } } - pub fn is_empty(&self) -> bool { - self.size == 0 + pub fn clear(&mut self) { + for bucket in &mut self.buckets { + bucket.clear(); + } + self.size = 0; } pub fn len(&self) -> usize { self.size } - pub fn iter(&self) -> impl Iterator { - self.buckets.iter().flat_map(|bucket| bucket.iter()) - } - - pub fn iter_mut(&mut self) -> impl Iterator { - self.buckets.iter_mut().flat_map(|bucket| bucket.iter_mut()) + fn iter(&self) -> impl Iterator { + self.buckets.iter().flat_map(|b| b.iter()) } fn hash(&self, key: &PageCacheKey) -> usize { if self.capacity.is_power_of_two() { - key.pgno & (self.capacity - 1) + key.0 & (self.capacity - 1) } else { - key.pgno % self.capacity + key.0 % self.capacity } } - pub fn rehash(&self, new_capacity: usize) -> PageHashMap { + fn rehash(&self, new_capacity: usize) -> PageHashMap { let mut new_hash_map = PageHashMap::new(new_capacity); for node in self.iter() { - new_hash_map.insert(node.key, node.value); + new_hash_map.insert(node.key, node.slot_index); } new_hash_map } @@ -693,15 +804,12 @@ mod tests { use crate::storage::pager::{Page, PageRef}; use crate::storage::sqlite3_ondisk::PageContent; use crate::{BufferPool, IO}; - use std::ptr::NonNull; - use std::sync::OnceLock; - use std::{num::NonZeroUsize, sync::Arc}; - - use lru::LruCache; use rand_chacha::{ rand_core::{RngCore, SeedableRng}, ChaCha8Rng, }; + use std::sync::Arc; + use std::sync::OnceLock; fn create_key(id: usize) -> PageCacheKey { PageCacheKey::new(id) @@ -728,7 +836,7 @@ mod tests { page } - fn insert_page(cache: &mut DumbLruPageCache, id: usize) -> PageCacheKey { + fn insert_page(cache: &mut PageCache, id: usize) -> PageCacheKey { let key = create_key(id); let page = page_with_content(id); assert!(cache.insert(key, page).is_ok()); @@ -739,26 +847,21 @@ mod tests { page.is_loaded() && page.get().contents.is_some() } - fn insert_and_get_entry( - cache: &mut DumbLruPageCache, - id: usize, - ) -> (PageCacheKey, NonNull) { + fn insert_and_get_entry(cache: &mut PageCache, id: usize) -> (PageCacheKey, PageCacheEntry) { let key = create_key(id); let page = page_with_content(id); assert!(cache.insert(key, page).is_ok()); - let entry = cache.get_ptr(&key).expect("Entry should exist"); + let entry = cache.map.borrow().get(&key).expect("Key should exist"); + let entry = cache.entries.borrow()[entry].clone(); (key, entry) } #[test] - fn test_detach_only_element() { - let mut cache = DumbLruPageCache::default(); + fn test_delete_only_element() { + let mut cache = PageCache::default(); let key1 = insert_page(&mut cache, 1); - cache.verify_list_integrity(); + cache.verify_cache_integrity(); assert_eq!(cache.len(), 1); - assert!(cache.head.borrow().is_some()); - assert!(cache.tail.borrow().is_some()); - assert_eq!(*cache.head.borrow(), *cache.tail.borrow()); assert!(cache.delete(key1).is_ok()); @@ -768,198 +871,156 @@ mod tests { "Length should be 0 after deleting only element" ); assert!( - cache.map.borrow().get(&key1).is_none(), - "Map should not contain key after delete" - ); - assert!(cache.head.borrow().is_none(), "Head should be None"); - assert!(cache.tail.borrow().is_none(), "Tail should be None"); - cache.verify_list_integrity(); - } - - #[test] - fn test_detach_head() { - let mut cache = DumbLruPageCache::default(); - let _key1 = insert_page(&mut cache, 1); // Tail - let key2 = insert_page(&mut cache, 2); // Middle - let key3 = insert_page(&mut cache, 3); // Head - cache.verify_list_integrity(); - assert_eq!(cache.len(), 3); - - let head_ptr_before = cache.head.borrow().unwrap(); - assert_eq!( - unsafe { &head_ptr_before.as_ref().key }, - &key3, - "Initial head check" - ); - - assert!(cache.delete(key3).is_ok()); - - assert_eq!(cache.len(), 2, "Length should be 2 after deleting head"); - assert!( - cache.map.borrow().get(&key3).is_none(), - "Map should not contain deleted head key" - ); - cache.verify_list_integrity(); - - let new_head_ptr = cache.head.borrow().unwrap(); - assert_eq!( - unsafe { &new_head_ptr.as_ref().key }, - &key2, - "New head should be key2" - ); - assert!( - unsafe { new_head_ptr.as_ref().prev.is_none() }, - "New head's prev should be None" - ); - - let tail_ptr = cache.tail.borrow().unwrap(); - assert_eq!( - unsafe { new_head_ptr.as_ref().next }, - Some(tail_ptr), - "New head's next should point to tail (key1)" + !cache.contains_key(&key1), + "Cache should not contain key after delete" ); + cache.verify_cache_integrity(); } #[test] fn test_detach_tail() { - let mut cache = DumbLruPageCache::default(); - let key1 = insert_page(&mut cache, 1); // Tail - let key2 = insert_page(&mut cache, 2); // Middle - let _key3 = insert_page(&mut cache, 3); // Head - cache.verify_list_integrity(); + let mut cache = PageCache::default(); + let key1 = insert_page(&mut cache, 1); // Will be tail + let key2 = insert_page(&mut cache, 2); // Will be middle + let _key3 = insert_page(&mut cache, 3); // Will be head + cache.verify_cache_integrity(); assert_eq!(cache.len(), 3); - let tail_ptr_before = cache.tail.borrow().unwrap(); + // Verify initial tail + let tail_slot = cache.tail.get(); assert_eq!( - unsafe { &tail_ptr_before.as_ref().key }, - &key1, + cache.entries.borrow()[tail_slot].key, + key1, "Initial tail check" ); - assert!(cache.delete(key1).is_ok()); // Delete tail - + // Delete tail + assert!(cache.delete(key1).is_ok()); assert_eq!(cache.len(), 2, "Length should be 2 after deleting tail"); assert!( cache.map.borrow().get(&key1).is_none(), "Map should not contain deleted tail key" ); - cache.verify_list_integrity(); + cache.verify_cache_integrity(); - let new_tail_ptr = cache.tail.borrow().unwrap(); + // Check new tail is key2 + let new_tail_slot = cache.tail.get(); + let entries = cache.entries.borrow(); + assert_eq!(entries[new_tail_slot].key, key2, "New tail should be key2"); assert_eq!( - unsafe { &new_tail_ptr.as_ref().key }, - &key2, - "New tail should be key2" - ); - assert!( - unsafe { new_tail_ptr.as_ref().next.is_none() }, - "New tail's next should be None" + entries[new_tail_slot].next.get(), + NULL, + "New tail's next should be NULL" ); - let head_ptr = cache.head.borrow().unwrap(); + let head_slot = cache.head.get(); assert_eq!( - unsafe { head_ptr.as_ref().prev }, - None, - "Head's prev should point to new tail (key2)" + entries[head_slot].prev.get(), + NULL, + "Head's prev should be NULL" ); assert_eq!( - unsafe { head_ptr.as_ref().next }, - Some(new_tail_ptr), - "Head's next should point to new tail (key2)" - ); - assert_eq!( - unsafe { new_tail_ptr.as_ref().next }, - None, - "Double check new tail's next is None" + entries[head_slot].next.get(), + new_tail_slot, + "Head's next should point to new tail" ); } #[test] fn test_detach_middle() { - let mut cache = DumbLruPageCache::default(); - let key1 = insert_page(&mut cache, 1); // Tail - let key2 = insert_page(&mut cache, 2); // Middle - let key3 = insert_page(&mut cache, 3); // Middle - let _key4 = insert_page(&mut cache, 4); // Head - cache.verify_list_integrity(); + let mut cache = PageCache::default(); + let key1 = insert_page(&mut cache, 1); // Will be tail + let key2 = insert_page(&mut cache, 2); // Will be middle + let key3 = insert_page(&mut cache, 3); // Will be middle + let key4 = insert_page(&mut cache, 4); // Will be head + cache.verify_cache_integrity(); assert_eq!(cache.len(), 4); + assert_eq!( + cache.entries.borrow()[cache.tail.get()].key, + key1, + "Initial tail check" + ); + assert_eq!( + cache.entries.borrow()[cache.head.get()].key, + key4, + "Initial head check" + ); - let head_ptr_before = cache.head.borrow().unwrap(); - let tail_ptr_before = cache.tail.borrow().unwrap(); - - assert!(cache.delete(key2).is_ok()); // Detach a middle element (key2) + let head_slot_before = cache.head.get(); + let tail_slot_before = cache.tail.get(); + // Delete middle element (key2) + assert!(cache.delete(key2).is_ok()); assert_eq!(cache.len(), 3, "Length should be 3 after deleting middle"); assert!( cache.map.borrow().get(&key2).is_none(), "Map should not contain deleted middle key2" ); - cache.verify_list_integrity(); + cache.verify_cache_integrity(); - // Check neighbors - let key1_ptr = cache.get_entry_ptr(&key1).expect("Key1 should still exist"); - let key3_ptr = cache.get_entry_ptr(&key3).expect("Key3 should still exist"); + // Head and tail should remain the same assert_eq!( - unsafe { key3_ptr.as_ref().next }, - Some(key1_ptr), - "Key3's next should point to key1" - ); - assert_eq!( - unsafe { key1_ptr.as_ref().prev }, - Some(key3_ptr), - "Key1's prev should point to key2" - ); - - assert_eq!( - cache.head.borrow().unwrap(), - head_ptr_before, + cache.head.get(), + head_slot_before, "Head should remain key4" ); assert_eq!( - cache.tail.borrow().unwrap(), - tail_ptr_before, + cache.tail.get(), + tail_slot_before, "Tail should remain key1" ); + + // Check that key3 and key1 are now linked + let entries = cache.entries.borrow(); + let key3_slot = cache.map.borrow().get(&key3).unwrap(); + let key1_slot = cache.map.borrow().get(&key1).unwrap(); + + assert_eq!( + entries[key3_slot].next.get(), + key1_slot, + "Key3's next should point to key1" + ); + assert_eq!( + entries[key1_slot].prev.get(), + key3_slot, + "Key1's prev should point to key3" + ); } #[test] - #[ignore = "for now let's not track active refs"] - fn test_detach_via_delete() { - let mut cache = DumbLruPageCache::default(); + fn test_insert_existing_key_updates_in_place() { + let mut cache = PageCache::default(); let key1 = create_key(1); - let page1 = page_with_content(1); - assert!(cache.insert(key1, page1.clone()).is_ok()); - assert!(page_has_content(&page1)); - cache.verify_list_integrity(); + let page1_v1 = page_with_content(1); + let page1_v2 = page1_v1.clone(); - let result = cache.delete(key1); - assert!(result.is_err()); - assert_eq!(result.unwrap_err(), CacheError::ActiveRefs); + assert!(cache.insert(key1, page1_v1.clone()).is_ok()); assert_eq!(cache.len(), 1); - - drop(page1); - - assert!(cache.delete(key1).is_ok()); - assert_eq!(cache.len(), 0); - cache.verify_list_integrity(); + // This should return KeyExists error but not panic + let result = cache.insert(key1, page1_v2.clone()); + assert_eq!(result, Err(CacheError::KeyExists)); + assert_eq!(cache.len(), 1); + // Verify the page is still accessible + assert!(cache.get(&key1).unwrap().is_some()); + cache.verify_cache_integrity(); } #[test] #[should_panic(expected = "Attempted to insert different page with same key")] - fn test_insert_existing_key_fail() { - let mut cache = DumbLruPageCache::default(); + fn test_insert_different_page_same_key_panics() { + let mut cache = PageCache::default(); let key1 = create_key(1); let page1_v1 = page_with_content(1); - let page1_v2 = page_with_content(1); + let page1_v2 = page_with_content(1); // Different instance assert!(cache.insert(key1, page1_v1.clone()).is_ok()); assert_eq!(cache.len(), 1); - cache.verify_list_integrity(); + cache.verify_cache_integrity(); let _ = cache.insert(key1, page1_v2.clone()); // Panic } #[test] - fn test_detach_nonexistent_key() { - let mut cache = DumbLruPageCache::default(); + fn test_delete_nonexistent_key() { + let mut cache = PageCache::default(); let key_nonexist = create_key(99); assert!(cache.delete(key_nonexist).is_ok()); // no-op @@ -967,127 +1028,338 @@ mod tests { #[test] fn test_page_cache_evict() { - let mut cache = DumbLruPageCache::new(1); + let mut cache = PageCache::new(1); let key1 = insert_page(&mut cache, 1); let key2 = insert_page(&mut cache, 2); assert_eq!(cache.get(&key2).unwrap().unwrap().get().id, 2); assert!(cache.get(&key1).unwrap().is_none()); - } - - #[test] - fn test_detach_locked_page() { - let mut cache = DumbLruPageCache::default(); - let (_, mut entry) = insert_and_get_entry(&mut cache, 1); - unsafe { entry.as_mut().page.set_locked() }; - assert_eq!( - cache.detach(entry, false), - Err(CacheError::Locked { pgno: 1 }) + assert_eq!(cache.get(&key2).unwrap().unwrap().get().id, 2); + assert!( + cache.get(&key1).unwrap().is_none(), + "capacity=1 should evict the older page" ); - cache.verify_list_integrity(); } #[test] - fn test_detach_dirty_page() { - let mut cache = DumbLruPageCache::default(); - let (key, mut entry) = insert_and_get_entry(&mut cache, 1); - cache.get(&key).expect("Page should exist"); - unsafe { entry.as_mut().page.set_dirty() }; - assert_eq!( - cache.detach(entry, false), - Err(CacheError::Dirty { pgno: 1 }) + fn test_sieve_touch_non_tail_does_not_affect_immediate_eviction() { + // Insert 1,2,3 -> [3,2,1], tail=1 + let mut cache = PageCache::new(3); + let key1 = insert_page(&mut cache, 1); + let key2 = insert_page(&mut cache, 2); + let key3 = insert_page(&mut cache, 3); + + // Touch key2 (not tail) to mark it. + assert!(cache.get(&key2).unwrap().is_some()); + + // Insert 4: tail is still 1 (unmarked) -> evict 1 (not 2). + let key4 = insert_page(&mut cache, 4); + + assert!( + cache.get(&key2).unwrap().is_some(), + "marked non-tail should remain" ); - cache.verify_list_integrity(); + assert!(cache.get(&key3).unwrap().is_some()); + assert!(cache.get(&key4).unwrap().is_some()); + assert!( + cache.get(&key1).unwrap().is_none(), + "unmarked tail should be evicted" + ); + cache.verify_cache_integrity(); } #[test] - #[ignore = "for now let's not track active refs"] - fn test_detach_with_active_reference_clean() { - let mut cache = DumbLruPageCache::default(); - let (key, entry) = insert_and_get_entry(&mut cache, 1); - let page_ref = cache.get(&key); - assert_eq!(cache.detach(entry, true), Err(CacheError::ActiveRefs)); - drop(page_ref); - cache.verify_list_integrity(); + fn test_sieve_second_chance_preserves_marked_tail() { + // Capacity 3, insert 1,2,3 → order(head..tail) = [3,2,1] + let mut cache = PageCache::new(3); + let key1 = insert_page(&mut cache, 1); + let key2 = insert_page(&mut cache, 2); + let key3 = insert_page(&mut cache, 3); + assert_eq!(cache.len(), 3); + + // Mark the TAIL (key1). SIEVE should not move it yet + assert!(cache.get(&key1).unwrap().is_some()); + + // Insert 4: must evict exactly one unmarked page. + // Tail is 1 (marked): give second chance: move 1 to head & clear its bit. + // New tail becomes 2 (unmarked): evict 2. Final set: (1,3,4). + let key4 = insert_page(&mut cache, 4); + + assert!( + cache.get(&key1).unwrap().is_some(), + "key1 is marked tail and should survive" + ); + assert!(cache.get(&key3).unwrap().is_some(), "key3 should remain"); + assert!(cache.get(&key4).unwrap().is_some(), "key4 just inserted"); + assert!( + cache.get(&key2).unwrap().is_none(), + "key2 should be the one evicted by SIEVE" + ); + assert_eq!(cache.len(), 3); + cache.verify_cache_integrity(); } #[test] - #[ignore = "for now let's not track active refs"] - fn test_detach_with_active_reference_no_clean() { - let mut cache = DumbLruPageCache::default(); - let (key, entry) = insert_and_get_entry(&mut cache, 1); - cache.get(&key).expect("Page should exist"); - assert!(cache.detach(entry, false).is_ok()); - assert!(cache.map.borrow_mut().remove(&key).is_some()); - cache.verify_list_integrity(); + fn test_delete_locked_page() { + let mut cache = PageCache::default(); + let key = insert_page(&mut cache, 1); + let page = cache.get(&key).unwrap().unwrap(); + page.set_locked(); + + assert_eq!(cache.delete(key), Err(CacheError::Locked { pgno: 1 })); + cache.verify_cache_integrity(); } #[test] - fn test_detach_without_cleaning() { - let mut cache = DumbLruPageCache::default(); - let (key, entry) = insert_and_get_entry(&mut cache, 1); - assert!(cache.detach(entry, false).is_ok()); - assert!(cache.map.borrow_mut().remove(&key).is_some()); - cache.verify_list_integrity(); + fn test_delete_dirty_page() { + let mut cache = PageCache::default(); + let key = insert_page(&mut cache, 1); + let page = cache.get(&key).expect("Page should exist").unwrap(); + page.set_dirty(); + + assert_eq!(cache.delete(key), Err(CacheError::Dirty { pgno: 1 })); + cache.verify_cache_integrity(); + } + + #[test] + fn test_delete_pinned_page() { + let mut cache = PageCache::default(); + let key = insert_page(&mut cache, 1); + let page = cache.get(&key).expect("Page should exist").unwrap(); + page.pin(); + + assert_eq!(cache.delete(key), Err(CacheError::Pinned { pgno: 1 })); + cache.verify_cache_integrity(); + } + + #[test] + fn test_make_room_for_with_dirty_pages() { + let mut cache = PageCache::new(2); + let key1 = insert_page(&mut cache, 1); + let key2 = insert_page(&mut cache, 2); + + // Make both pages dirty + cache.get(&key1).unwrap().unwrap().set_dirty(); + cache.get(&key2).unwrap().unwrap().set_dirty(); + + // Try to insert a third page, should fail because can't evict dirty pages + let key3 = create_key(3); + let page3 = page_with_content(3); + let result = cache.insert(key3, page3); + + assert_eq!(result, Err(CacheError::Full)); + assert_eq!(cache.len(), 2); + } + + #[test] + fn test_page_cache_insert_and_get() { + let mut cache = PageCache::default(); + let key1 = insert_page(&mut cache, 1); + let key2 = insert_page(&mut cache, 2); + assert_eq!(cache.get(&key1).unwrap().unwrap().get().id, 1); + assert_eq!(cache.get(&key2).unwrap().unwrap().get().id, 2); + } + + #[test] + fn test_page_cache_over_capacity() { + // Capacity 2, insert 1,2 -> [2,1] with tail=1 (unmarked) + let mut cache = PageCache::new(2); + let key1 = insert_page(&mut cache, 1); + let key2 = insert_page(&mut cache, 2); + + // Insert 3 -> tail(1) is unmarked → evict 1; keep 2. + let key3 = insert_page(&mut cache, 3); + + assert_eq!(cache.len(), 2); + assert!(cache.get(&key2).unwrap().is_some(), "key2 should remain"); + assert!(cache.get(&key3).unwrap().is_some(), "key3 just inserted"); + assert!( + cache.get(&key1).unwrap().is_none(), + "key1 (tail, unmarked) must be evicted" + ); + cache.verify_cache_integrity(); + } + + #[test] + fn test_page_cache_delete() { + let mut cache = PageCache::default(); + let key1 = insert_page(&mut cache, 1); + assert!(cache.delete(key1).is_ok()); + assert!(cache.get(&key1).unwrap().is_none()); + } + + #[test] + fn test_page_cache_clear() { + let mut cache = PageCache::default(); + let key1 = insert_page(&mut cache, 1); + let key2 = insert_page(&mut cache, 2); + assert!(cache.clear().is_ok()); + assert!(cache.get(&key1).unwrap().is_none()); + assert!(cache.get(&key2).unwrap().is_none()); assert_eq!(cache.len(), 0); } #[test] - fn test_detach_with_cleaning() { - let mut cache = DumbLruPageCache::default(); - let (key, entry) = insert_and_get_entry(&mut cache, 1); - let page = cache.get(&key).unwrap().expect("Page should exist"); - assert!(page_has_content(&page)); - drop(page); - assert!(cache.detach(entry, true).is_ok()); - // Internal testing: the page is still in map, so we use it to check content - let page = cache.peek(&key, false).expect("Page should exist in map"); - assert!(!page_has_content(&page)); - assert!(cache.map.borrow_mut().remove(&key).is_some()); - cache.verify_list_integrity(); - } - - #[test] - fn test_detach_only_element_preserves_integrity() { - let mut cache = DumbLruPageCache::default(); - let (_, entry) = insert_and_get_entry(&mut cache, 1); - assert!(cache.detach(entry, false).is_ok()); - assert!( - cache.head.borrow().is_none(), - "Head should be None after detaching only element" - ); - assert!( - cache.tail.borrow().is_none(), - "Tail should be None after detaching only element" - ); + fn test_resize_smaller_success() { + let mut cache = PageCache::default(); + for i in 1..=5 { + let _ = insert_page(&mut cache, i); + } + assert_eq!(cache.len(), 5); + let result = cache.resize(3); + assert_eq!(result, CacheResizeResult::Done); + assert_eq!(cache.len(), 3); + assert_eq!(cache.capacity(), 3); + assert!(cache.insert(create_key(6), page_with_content(6)).is_ok()); } #[test] fn test_detach_with_multiple_pages() { - let mut cache = DumbLruPageCache::default(); - let (key1, _) = insert_and_get_entry(&mut cache, 1); - let (key2, entry2) = insert_and_get_entry(&mut cache, 2); - let (key3, _) = insert_and_get_entry(&mut cache, 3); - let head_key = unsafe { cache.head.borrow().unwrap().as_ref().key }; - let tail_key = unsafe { cache.tail.borrow().unwrap().as_ref().key }; - assert_eq!(head_key, key3, "Head should be key3"); - assert_eq!(tail_key, key1, "Tail should be key1"); - assert!(cache.detach(entry2, false).is_ok()); - let head_entry = unsafe { cache.head.borrow().unwrap().as_ref() }; - let tail_entry = unsafe { cache.tail.borrow().unwrap().as_ref() }; - assert_eq!(head_entry.key, key3, "Head should still be key3"); - assert_eq!(tail_entry.key, key1, "Tail should still be key1"); + let mut cache = PageCache::default(); + let key1 = insert_page(&mut cache, 1); + let key2 = insert_page(&mut cache, 2); + let key3 = insert_page(&mut cache, 3); + + assert_eq!(cache.head.get(), 2, "Head should be slot for key3"); + assert_eq!(cache.tail.get(), 0, "Tail should be slot for key1"); + + // Delete middle element + assert!(cache.delete(key2).is_ok()); + + // Verify structure after deletion + assert_eq!(cache.len(), 2); + assert!(cache.map.borrow().get(&key2).is_none()); + + // Head should still be key3's slot, tail should still be key1's slot + let head_slot = cache.head.get(); + let tail_slot = cache.tail.get(); + let entries = cache.entries.borrow(); + assert_eq!(entries[head_slot].key, key3, "Head should still be key3"); + assert_eq!(entries[tail_slot].key, key1, "Tail should still be key1"); + + // Check linkage assert_eq!( - unsafe { head_entry.next.unwrap().as_ref().key }, - key1, - "Head's next should point to tail after middle element detached" + entries[head_slot].next.get(), + tail_slot, + "Head's next should point directly to tail after middle deleted" ); assert_eq!( - unsafe { tail_entry.prev.unwrap().as_ref().key }, - key3, - "Tail's prev should point to head after middle element detached" + entries[tail_slot].prev.get(), + head_slot, + "Tail's prev should point directly to head after middle deleted" ); - assert!(cache.map.borrow_mut().remove(&key2).is_some()); - cache.verify_list_integrity(); + + cache.verify_cache_integrity(); + } + + #[test] + fn test_delete_multiple_elements() { + let mut cache = PageCache::default(); + let key1 = insert_page(&mut cache, 1); + let key2 = insert_page(&mut cache, 2); + let key3 = insert_page(&mut cache, 3); + cache.verify_cache_integrity(); + assert_eq!(cache.len(), 3); + + // Delete head (key3) + assert!(cache.delete(key3).is_ok()); + assert_eq!(cache.len(), 2, "Length should be 2 after deleting head"); + assert!( + cache.map.borrow().get(&key3).is_none(), + "Map should not contain deleted head key" + ); + cache.verify_cache_integrity(); + + // Check new head is key2 + let head_slot = cache.head.get(); + { + let entries = cache.entries.borrow_mut(); + assert_eq!(entries[head_slot].key, key2, "New head should be key2"); + assert_eq!( + entries[head_slot].prev.get(), + NULL, + "New head's prev should be NULL" + ); + } + // Delete another + assert!(cache.delete(key1).is_ok()); + assert_eq!(cache.len(), 1, "Length should be 1 after deleting two"); + cache.verify_cache_integrity(); + + // Delete last + assert!(cache.delete(key2).is_ok()); + assert_eq!(cache.len(), 0, "Length should be 0 after deleting all"); + assert_eq!(cache.head.get(), NULL, "Head should be NULL when empty"); + assert_eq!(cache.tail.get(), NULL, "Tail should be NULL when empty"); + cache.verify_cache_integrity(); + } + + fn test_resize_larger() { + let mut cache = PageCache::new(2); + let key1 = insert_page(&mut cache, 1); + let key2 = insert_page(&mut cache, 2); + assert_eq!(cache.len(), 2); + + let result = cache.resize(5); + assert_eq!(result, CacheResizeResult::Done); + assert_eq!(cache.len(), 2); + assert_eq!(cache.capacity(), 5); + + assert!(cache.get(&key1).is_ok_and(|p| p.is_some())); + assert!(cache.get(&key2).is_ok_and(|p| p.is_some())); + + // Now we should be able to add 3 more + for i in 3..=5 { + let _ = insert_page(&mut cache, i); + } + assert_eq!(cache.len(), 5); + cache.verify_cache_integrity(); + } + + #[test] + fn test_resize_same_capacity() { + let mut cache = PageCache::new(3); + for i in 1..=3 { + let _ = insert_page(&mut cache, i); + } + let result = cache.resize(3); + assert_eq!(result, CacheResizeResult::Done); + assert_eq!(cache.len(), 3); + assert_eq!(cache.capacity(), 3); + cache.verify_cache_integrity(); + } + + #[test] + fn test_truncate_page_cache() { + let mut cache = PageCache::new(10); + let _ = insert_page(&mut cache, 1); + let _ = insert_page(&mut cache, 4); + let _ = insert_page(&mut cache, 8); + let _ = insert_page(&mut cache, 10); + + cache.truncate(4).unwrap(); + + assert!(cache.contains_key(&PageCacheKey(1))); + assert!(cache.contains_key(&PageCacheKey(4))); + assert!(!cache.contains_key(&PageCacheKey(8))); + assert!(!cache.contains_key(&PageCacheKey(10))); + assert_eq!(cache.len(), 2); + assert_eq!(cache.capacity(), 10); + cache.verify_cache_integrity(); + } + + #[test] + fn test_truncate_page_cache_remove_all() { + let mut cache = PageCache::new(10); + let _ = insert_page(&mut cache, 8); + let _ = insert_page(&mut cache, 10); + + cache.truncate(4).unwrap(); + + assert!(!cache.contains_key(&PageCacheKey(8))); + assert!(!cache.contains_key(&PageCacheKey(10))); + assert_eq!(cache.len(), 0); + assert_eq!(cache.capacity(), 10); + cache.verify_cache_integrity(); } #[test] @@ -1099,14 +1371,12 @@ mod tests { let mut rng = ChaCha8Rng::seed_from_u64(seed); tracing::info!("super seed: {}", seed); let max_pages = 10; - let mut cache = DumbLruPageCache::new(10); - let mut lru = LruCache::new(NonZeroUsize::new(10).unwrap()); + let mut cache = PageCache::new(10); + let mut reference_map = std::collections::HashMap::new(); for _ in 0..10000 { cache.print(); - for (key, _) in &lru { - tracing::debug!("lru_page={:?}", key); - } + match rng.next_u64() % 2 { 0 => { // add @@ -1114,18 +1384,23 @@ mod tests { let key = PageCacheKey::new(id_page as usize); #[allow(clippy::arc_with_non_send_sync)] let page = Arc::new(Page::new(id_page as usize)); + if cache.peek(&key, false).is_some() { continue; // skip duplicate page ids } + tracing::debug!("inserting page {:?}", key); match cache.insert(key, page.clone()) { Err(CacheError::Full | CacheError::ActiveRefs) => {} // Ignore Err(err) => { - // Any other error should fail the test panic!("Cache insertion failed: {err:?}"); } Ok(_) => { - lru.push(key, page); + reference_map.insert(key, page); + // Clean up reference_map if cache evicted something + if cache.len() < reference_map.len() { + reference_map.retain(|k, _| cache.contains_key(k)); + } } } assert!(cache.len() <= 10); @@ -1133,200 +1408,58 @@ mod tests { 1 => { // remove let random = rng.next_u64() % 2 == 0; - let key = if random || lru.is_empty() { + let key = if random || reference_map.is_empty() { let id_page: u64 = rng.next_u64() % max_pages; - PageCacheKey::new(id_page as usize) } else { - let i = rng.next_u64() as usize % lru.len(); - let key: PageCacheKey = *lru.iter().nth(i).unwrap().0; - key + let i = rng.next_u64() as usize % reference_map.len(); + *reference_map.keys().nth(i).unwrap() }; + tracing::debug!("removing page {:?}", key); - lru.pop(&key); + reference_map.remove(&key); assert!(cache.delete(key).is_ok()); } _ => unreachable!(), } - compare_to_lru(&mut cache, &lru); - cache.print(); - for (key, _) in &lru { - tracing::debug!("lru_page={:?}", key); - } - cache.verify_list_integrity(); - for (key, page) in &lru { - println!("getting page {key:?}"); - cache.peek(key, false).unwrap(); - assert_eq!(page.get().id, key.pgno); - } - } - } - pub fn compare_to_lru(cache: &mut DumbLruPageCache, lru: &LruCache) { - let this_keys = cache.keys(); - let mut lru_keys = Vec::new(); - for (lru_key, _) in lru { - lru_keys.push(*lru_key); - } - if this_keys != lru_keys { - cache.print(); - for (lru_key, _) in lru { - tracing::debug!("lru_page={:?}", lru_key); + cache.verify_cache_integrity(); + + // Verify all pages in reference_map are in cache + for (key, page) in &reference_map { + let cached_page = cache.peek(key, false).expect("Page should be in cache"); + assert_eq!(cached_page.get().id, key.0); + assert_eq!(page.get().id, key.0); } - assert_eq!(&this_keys, &lru_keys) } } #[test] - fn test_page_cache_insert_and_get() { - let mut cache = DumbLruPageCache::default(); + fn test_peek_without_touch() { + // Capacity 2: insert 1,2 -> [2,1] tail=1 + let mut cache = PageCache::new(2); let key1 = insert_page(&mut cache, 1); let key2 = insert_page(&mut cache, 2); assert_eq!(cache.get(&key1).unwrap().unwrap().get().id, 1); assert_eq!(cache.get(&key2).unwrap().unwrap().get().id, 2); - } - #[test] - fn test_page_cache_over_capacity() { - let mut cache = DumbLruPageCache::new(2); - let key1 = insert_page(&mut cache, 1); - let key2 = insert_page(&mut cache, 2); + // Peek without touching DOES NOT mark key1 + assert!(cache.peek(&key1, false).is_some()); + + // Insert 3 -> tail(1) still unmarked → evict 1. let key3 = insert_page(&mut cache, 3); assert!(cache.get(&key1).unwrap().is_none()); assert_eq!(cache.get(&key2).unwrap().unwrap().get().id, 2); assert_eq!(cache.get(&key3).unwrap().unwrap().get().id, 3); - } - #[test] - fn test_page_cache_delete() { - let mut cache = DumbLruPageCache::default(); - let key1 = insert_page(&mut cache, 1); - assert!(cache.delete(key1).is_ok()); - assert!(cache.get(&key1).unwrap().is_none()); - } - - #[test] - fn test_page_cache_clear() { - let mut cache = DumbLruPageCache::default(); - let key1 = insert_page(&mut cache, 1); - let key2 = insert_page(&mut cache, 2); - assert!(cache.clear().is_ok()); - assert!(cache.get(&key1).unwrap().is_none()); - assert!(cache.get(&key2).unwrap().is_none()); - } - - #[test] - fn test_page_cache_insert_sequential() { - let mut cache = DumbLruPageCache::default(); - for i in 0..10000 { - let key = insert_page(&mut cache, i); - assert_eq!(cache.peek(&key, false).unwrap().get().id, i); - } - } - - #[test] - fn test_resize_smaller_success() { - let mut cache = DumbLruPageCache::default(); - for i in 1..=5 { - let _ = insert_page(&mut cache, i); - } - assert_eq!(cache.len(), 5); - let result = cache.resize(3); - assert_eq!(result, CacheResizeResult::Done); - assert_eq!(cache.len(), 3); - assert_eq!(cache.capacity, 3); - assert!(cache.insert(create_key(6), page_with_content(6)).is_ok()); - } - - #[test] - #[should_panic(expected = "Attempted to insert different page with same key")] - fn test_resize_larger() { - let mut cache = DumbLruPageCache::default(); - let _ = insert_page(&mut cache, 1); - let _ = insert_page(&mut cache, 2); + assert!(cache.get(&key3).is_ok_and(|p| p.is_some())); + assert!(cache.get(&key2).is_ok_and(|p| p.is_some())); + assert!( + cache.get(&key1).is_ok_and(|p| p.is_none()), + "key1 should be evicted since peek(false) didn't mark" + ); assert_eq!(cache.len(), 2); - let result = cache.resize(5); - assert_eq!(result, CacheResizeResult::Done); - assert_eq!(cache.len(), 2); - assert_eq!(cache.capacity, 5); - assert!(cache.get(&create_key(1)).unwrap().is_some()); - assert!(cache.get(&create_key(2)).unwrap().is_some()); - for i in 3..=5 { - let _ = insert_page(&mut cache, i); - } - assert_eq!(cache.len(), 5); - // FIXME: For now this will assert because we cannot insert a page with same id but different contents of page. - assert!(cache.insert(create_key(4), page_with_content(4)).is_err()); - cache.verify_list_integrity(); - } - - #[test] - #[ignore = "for now let's not track active refs"] - fn test_resize_with_active_references() { - let mut cache = DumbLruPageCache::default(); - let page1 = page_with_content(1); - let page2 = page_with_content(2); - let page3 = page_with_content(3); - assert!(cache.insert(create_key(1), page1.clone()).is_ok()); - assert!(cache.insert(create_key(2), page2.clone()).is_ok()); - assert!(cache.insert(create_key(3), page3.clone()).is_ok()); - assert_eq!(cache.len(), 3); - cache.verify_list_integrity(); - assert_eq!(cache.resize(2), CacheResizeResult::PendingEvictions); - assert_eq!(cache.capacity, 2); - assert_eq!(cache.len(), 3); - drop(page2); - drop(page3); - assert_eq!(cache.resize(1), CacheResizeResult::Done); // Evicted 2 and 3 - assert_eq!(cache.len(), 1); - assert!(cache.insert(create_key(4), page_with_content(4)).is_err()); - cache.verify_list_integrity(); - } - - #[test] - fn test_resize_same_capacity() { - let mut cache = DumbLruPageCache::new(3); - for i in 1..=3 { - let _ = insert_page(&mut cache, i); - } - let result = cache.resize(3); - assert_eq!(result, CacheResizeResult::Done); // no-op - assert_eq!(cache.len(), 3); - assert_eq!(cache.capacity, 3); - cache.verify_list_integrity(); - assert!(cache.insert(create_key(4), page_with_content(4)).is_ok()); - } - - #[test] - fn test_truncate_page_cache() { - let mut cache = DumbLruPageCache::new(10); - let _ = insert_page(&mut cache, 1); - let _ = insert_page(&mut cache, 4); - let _ = insert_page(&mut cache, 8); - let _ = insert_page(&mut cache, 10); - cache.truncate(4).unwrap(); - assert!(cache.contains_key(&PageCacheKey { pgno: 1 })); - assert!(cache.contains_key(&PageCacheKey { pgno: 4 })); - assert!(!cache.contains_key(&PageCacheKey { pgno: 8 })); - assert!(!cache.contains_key(&PageCacheKey { pgno: 10 })); - assert_eq!(cache.len(), 2); - assert_eq!(cache.capacity, 10); - cache.verify_list_integrity(); - assert!(cache.insert(create_key(8), page_with_content(8)).is_ok()); - } - - #[test] - fn test_truncate_page_cache_remove_all() { - let mut cache = DumbLruPageCache::new(10); - let _ = insert_page(&mut cache, 8); - let _ = insert_page(&mut cache, 10); - cache.truncate(4).unwrap(); - assert!(!cache.contains_key(&PageCacheKey { pgno: 8 })); - assert!(!cache.contains_key(&PageCacheKey { pgno: 10 })); - assert_eq!(cache.len(), 0); - assert_eq!(cache.capacity, 10); - cache.verify_list_integrity(); - assert!(cache.insert(create_key(8), page_with_content(8)).is_ok()); + cache.verify_cache_integrity(); } #[test] @@ -1335,7 +1468,7 @@ mod tests { let initial_memory = memory_stats::memory_stats().unwrap().physical_mem; for _ in 0..100000 { - let mut cache = DumbLruPageCache::new(1000); + let mut cache = PageCache::new(1000); for i in 0..1000 { let key = create_key(i); diff --git a/core/storage/pager.rs b/core/storage/pager.rs index da6a2380e..7be2cf1b2 100644 --- a/core/storage/pager.rs +++ b/core/storage/pager.rs @@ -25,7 +25,7 @@ use std::sync::{Arc, Mutex}; use tracing::{instrument, trace, Level}; use super::btree::btree_init_page; -use super::page_cache::{CacheError, CacheResizeResult, DumbLruPageCache, PageCacheKey}; +use super::page_cache::{CacheError, CacheResizeResult, PageCache, PageCacheKey}; use super::sqlite3_ondisk::begin_write_btree_page; use super::wal::CheckpointMode; use crate::storage::encryption::{CipherMode, EncryptionContext, EncryptionKey}; @@ -129,7 +129,7 @@ pub struct PageInner { /// requests unpinning via [Page::unpin], the pin count will still be >0 if the outer /// code path has not yet requested to unpin the page as well. /// - /// Note that [DumbLruPageCache::clear] evicts the pages even if pinned, so as long as + /// Note that [PageCache::clear] evicts the pages even if pinned, so as long as /// we clear the page cache on errors, pins will not 'leak'. pub pin_count: AtomicUsize, /// The WAL frame number this page was loaded from (0 if loaded from main DB file) @@ -464,7 +464,7 @@ pub struct Pager { /// in-memory databases, ephemeral tables and ephemeral indexes do not have a WAL. pub(crate) wal: Option>>, /// A page cache for the database. - page_cache: Arc>, + page_cache: Arc>, /// Buffer pool for temporary data storage. pub buffer_pool: Arc, /// I/O interface for input/output operations. @@ -564,7 +564,7 @@ impl Pager { db_file: Arc, wal: Option>>, io: Arc, - page_cache: Arc>, + page_cache: Arc>, buffer_pool: Arc, db_state: Arc, init_lock: Arc>, @@ -1153,7 +1153,7 @@ impl Pager { &self, page_idx: usize, page: PageRef, - page_cache: &mut DumbLruPageCache, + page_cache: &mut PageCache, ) -> Result<()> { let page_key = PageCacheKey::new(page_idx); match page_cache.insert(page_key, page.clone()) { @@ -1981,7 +1981,7 @@ impl Pager { trunk_page.get_contents().as_ptr().fill(0); let page_key = PageCacheKey::new(trunk_page.get().id); { - let mut page_cache = self.page_cache.write(); + let page_cache = self.page_cache.read(); turso_assert!( page_cache.contains_key(&page_key), "page {} is not in cache", @@ -2013,7 +2013,7 @@ impl Pager { leaf_page.get_contents().as_ptr().fill(0); let page_key = PageCacheKey::new(leaf_page.get().id); { - let mut page_cache = self.page_cache.write(); + let page_cache = self.page_cache.read(); turso_assert!( page_cache.contains_key(&page_key), "page {} is not in cache", @@ -2402,14 +2402,14 @@ mod tests { use parking_lot::RwLock; - use crate::storage::page_cache::{DumbLruPageCache, PageCacheKey}; + use crate::storage::page_cache::{PageCache, PageCacheKey}; use super::Page; #[test] fn test_shared_cache() { // ensure cache can be shared between threads - let cache = Arc::new(RwLock::new(DumbLruPageCache::new(10))); + let cache = Arc::new(RwLock::new(PageCache::new(10))); let thread = { let cache = cache.clone(); @@ -2442,7 +2442,7 @@ mod ptrmap_tests { use crate::io::{MemoryIO, OpenFlags, IO}; use crate::storage::buffer_pool::BufferPool; use crate::storage::database::{DatabaseFile, DatabaseStorage}; - use crate::storage::page_cache::DumbLruPageCache; + use crate::storage::page_cache::PageCache; use crate::storage::pager::Pager; use crate::storage::sqlite3_ondisk::PageSize; use crate::storage::wal::{WalFile, WalFileShared}; @@ -2471,7 +2471,7 @@ mod ptrmap_tests { let pages = initial_db_pages + 10; let sz = std::cmp::max(std::cmp::min(pages, 64), pages); let buffer_pool = BufferPool::begin_init(&io, (sz * page_size) as usize); - let page_cache = Arc::new(RwLock::new(DumbLruPageCache::new(sz as usize))); + let page_cache = Arc::new(RwLock::new(PageCache::new(sz as usize))); let wal = Rc::new(RefCell::new(WalFile::new( io.clone(), diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 27336a32d..1ad42ee7a 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -6,7 +6,7 @@ use crate::storage::btree::{ integrity_check, IntegrityCheckError, IntegrityCheckState, PageCategory, }; use crate::storage::database::DatabaseFile; -use crate::storage::page_cache::DumbLruPageCache; +use crate::storage::page_cache::PageCache; use crate::storage::pager::{AtomicDbState, CreateBTreeFlags, DbState}; use crate::storage::sqlite3_ondisk::read_varint; use crate::translate::collate::CollationSeq; @@ -6996,7 +6996,7 @@ pub fn op_open_ephemeral( .get(); let buffer_pool = program.connection._db.buffer_pool.clone(); - let page_cache = Arc::new(RwLock::new(DumbLruPageCache::default())); + let page_cache = Arc::new(RwLock::new(PageCache::default())); let pager = Rc::new(Pager::new( db_file, From 3a0b9b360a9775dd5be39fa2f870e28266b7edff Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Tue, 2 Sep 2025 00:37:58 -0400 Subject: [PATCH 02/13] Fix clippy warnings --- core/storage/page_cache.rs | 4 ---- core/storage/pager.rs | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/core/storage/page_cache.rs b/core/storage/page_cache.rs index 886fdf20c..0cf05841c 100644 --- a/core/storage/page_cache.rs +++ b/core/storage/page_cache.rs @@ -843,10 +843,6 @@ mod tests { key } - fn page_has_content(page: &PageRef) -> bool { - page.is_loaded() && page.get().contents.is_some() - } - fn insert_and_get_entry(cache: &mut PageCache, id: usize) -> (PageCacheKey, PageCacheEntry) { let key = create_key(id); let page = page_with_content(id); diff --git a/core/storage/pager.rs b/core/storage/pager.rs index 7be2cf1b2..345f8335c 100644 --- a/core/storage/pager.rs +++ b/core/storage/pager.rs @@ -2423,7 +2423,7 @@ mod tests { }) }; let _ = thread.join(); - let mut cache = cache.write(); + let cache = cache.read(); let page_key = PageCacheKey::new(1); let page = cache.get(&page_key).unwrap(); assert_eq!(page.unwrap().get().id, 1); From 254a0a9342c84d8318989d2a65a705a122e82bd2 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Tue, 2 Sep 2025 09:45:45 -0400 Subject: [PATCH 03/13] Apply fix and rename ignore_existing to upsert --- core/storage/page_cache.rs | 44 ++++++++++++++++++++++---------------- core/storage/pager.rs | 12 +++++------ 2 files changed, 31 insertions(+), 25 deletions(-) diff --git a/core/storage/page_cache.rs b/core/storage/page_cache.rs index 0cf05841c..ba18b48f4 100644 --- a/core/storage/page_cache.rs +++ b/core/storage/page_cache.rs @@ -195,11 +195,7 @@ impl PageCache { } #[inline] - pub fn insert_ignore_existing( - &mut self, - key: PageCacheKey, - value: PageRef, - ) -> Result<(), CacheError> { + pub fn upsert_page(&mut self, key: PageCacheKey, value: PageRef) -> Result<(), CacheError> { self._insert(key, value, true) } @@ -207,23 +203,35 @@ impl PageCache { &mut self, key: PageCacheKey, value: PageRef, - ignore_exists: bool, + update_in_place: bool, ) -> Result<(), CacheError> { trace!("insert(key={:?})", key); // Check first if page already exists in cache - if let Some(slot) = self.map.borrow().get(&key) { - if !ignore_exists { - let existing = self.entries.borrow()[slot] - .page - .as_ref() - .expect("map points to empty slot") - .clone(); - turso_assert!( - Arc::ptr_eq(&existing, &value), - "Attempted to insert different page with same key: {key:?}" - ); - return Err(CacheError::KeyExists); + let slot = { self.map.borrow().get(&key) }; + if let Some(slot) = slot { + { + let existing = &mut self.entries.borrow_mut()[slot]; + existing.ref_bit.set(true); + if update_in_place { + // update it in place + existing.page = Some(value); + } else { + // the page we are inserting must match the existing page + turso_assert!( + Arc::ptr_eq( + existing.page.as_ref().expect("map points to empty slot"), + &value + ), + "Attempted to insert different page with same key: {key:?}" + ); + } } + self.move_to_front(slot); + return if update_in_place { + Ok(()) + } else { + Err(CacheError::KeyExists) + }; } // Key doesn't exist, proceed with new entry self.make_room_for(1)?; diff --git a/core/storage/pager.rs b/core/storage/pager.rs index 345f8335c..737abf7c1 100644 --- a/core/storage/pager.rs +++ b/core/storage/pager.rs @@ -2090,13 +2090,11 @@ impl Pager { // FIXME: use specific page key for writer instead of max frame, this will make readers not conflict assert!(page.is_dirty()); - cache - .insert_ignore_existing(page_key, page.clone()) - .map_err(|e| { - LimboError::InternalError(format!( - "Failed to insert loaded page {id} into cache: {e:?}" - )) - })?; + cache.upsert_page(page_key, page.clone()).map_err(|e| { + LimboError::InternalError(format!( + "Failed to insert loaded page {id} into cache: {e:?}" + )) + })?; page.set_loaded(); Ok(()) } From 582e25241ea38331e6b0f1beff4dabe95f605a6a Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Tue, 2 Sep 2025 21:27:29 -0400 Subject: [PATCH 04/13] Implement GClock algorithm to distinguish between hot pages and scan touches --- core/storage/page_cache.rs | 570 ++++++++++++++++++++++++++----------- core/storage/pager.rs | 2 +- 2 files changed, 405 insertions(+), 167 deletions(-) diff --git a/core/storage/page_cache.rs b/core/storage/page_cache.rs index ba18b48f4..b295f58b2 100644 --- a/core/storage/page_cache.rs +++ b/core/storage/page_cache.rs @@ -21,6 +21,17 @@ type SlotIndex = usize; const NULL: SlotIndex = usize::MAX; +#[derive(Clone, Ord, Eq, PartialOrd, PartialEq, Copy, Debug)] +/// To distinguish between a page touched by a scan vs a 'hot' page, +/// we use a Reference Bit with 4 states, incrementing this bit up to a maximum +/// of 3 on each access, and decrementing on each eviction scan pass. +enum RefBit { + Clear, + Min, + Med, + Max, +} + #[derive(Clone, Debug)] struct PageCacheEntry { /// Key identifying this page @@ -28,7 +39,7 @@ struct PageCacheEntry { /// The cached page, None if this slot is free page: Option, /// Reference bit for SIEVE algorithm - set on access, cleared during eviction scan - ref_bit: Cell, + ref_bit: Cell, /// Index of next entry in SIEVE queue (newer/toward head) next: Cell, /// Index of previous entry in SIEVE queue (older/toward tail) @@ -40,7 +51,7 @@ impl Default for PageCacheEntry { Self { key: PageCacheKey(0), page: None, - ref_bit: Cell::new(false), + ref_bit: Cell::new(RefBit::Clear), next: Cell::new(NULL), prev: Cell::new(NULL), } @@ -48,6 +59,31 @@ impl Default for PageCacheEntry { } impl PageCacheEntry { + #[inline] + fn bump_ref(&self) { + self.ref_bit.set(match self.ref_bit.get() { + RefBit::Clear => RefBit::Min, + RefBit::Min => RefBit::Med, + _ => RefBit::Max, + }); + } + + #[inline] + /// Returns the old value + fn decrement_ref(&self) -> RefBit { + let old = self.ref_bit.get(); + let new = match old { + RefBit::Max => RefBit::Med, + RefBit::Med => RefBit::Min, + _ => RefBit::Clear, + }; + self.ref_bit.set(new); + old + } + #[inline] + fn clear_ref(&self) { + self.ref_bit.set(RefBit::Clear); + } #[inline] fn empty() -> Self { Self::default() @@ -206,44 +242,57 @@ impl PageCache { update_in_place: bool, ) -> Result<(), CacheError> { trace!("insert(key={:?})", key); - // Check first if page already exists in cache let slot = { self.map.borrow().get(&key) }; if let Some(slot) = slot { + let stale = { + let entries = self.entries.borrow(); + let e = &entries[slot]; + let p = e.page.as_ref().expect("slot must have a page"); + !p.is_loaded() && !p.is_locked() + }; + if stale { + // evict, then continue with fresh insert + self._delete(key, true)?; + let slot_index = self.find_free_slot()?; + { + let mut entries = self.entries.borrow_mut(); + let entry = &mut entries[slot_index]; + entry.key = key; + entry.page = Some(value); + entry.clear_ref(); + } + self.map.borrow_mut().insert(key, slot_index); + self.link_front(slot_index); + return Ok(()); + } + + self.move_to_front(slot); { let existing = &mut self.entries.borrow_mut()[slot]; - existing.ref_bit.set(true); + existing.bump_ref(); if update_in_place { - // update it in place existing.page = Some(value); + return Ok(()); } else { - // the page we are inserting must match the existing page turso_assert!( - Arc::ptr_eq( - existing.page.as_ref().expect("map points to empty slot"), - &value - ), + Arc::ptr_eq(existing.page.as_ref().unwrap(), &value), "Attempted to insert different page with same key: {key:?}" ); + return Err(CacheError::KeyExists); } } - self.move_to_front(slot); - return if update_in_place { - Ok(()) - } else { - Err(CacheError::KeyExists) - }; } // Key doesn't exist, proceed with new entry self.make_room_for(1)?; let slot_index = self.find_free_slot()?; { let mut entries = self.entries.borrow_mut(); - let s = &mut entries[slot_index]; - turso_assert!(s.page.is_none(), "page must be None in free slot"); - s.key = key; - s.page = Some(value); + let entry = &mut entries[slot_index]; + turso_assert!(entry.page.is_none(), "page must be None in free slot"); + entry.key = key; + entry.page = Some(value); // Sieve ref bit starts cleared, will be set on first access - s.ref_bit.set(false); + entry.clear_ref(); } self.map.borrow_mut().insert(key, slot_index); self.link_front(slot_index); @@ -314,7 +363,7 @@ impl PageCache { { let e = &mut self.entries.borrow_mut()[slot_idx]; e.page = None; - e.ref_bit.set(false); + e.clear_ref(); e.reset_links(); self.freelist.borrow_mut().push(slot_idx); } @@ -337,11 +386,15 @@ impl PageCache { // However, if we do not evict that page from the page cache, we will return an unloaded page later which will trigger // assertions later on. This is worsened by the fact that page cache is not per `Statement`, so you can abort a completion // in one Statement, and trigger some error in the next one if we don't evict the page here. - let page = self.entries.borrow()[slot] - .page - .as_ref() - .expect("page in the map to exist") - .clone(); + let page = { + let entry = &self.entries.borrow()[slot]; + entry.bump_ref(); + entry + .page + .as_ref() + .expect("page in the map to exist") + .clone() + }; if !page.is_loaded() && !page.is_locked() { self.delete(*key)?; Ok(None) @@ -357,7 +410,7 @@ impl PageCache { let page = entries[slot].page.as_ref()?.clone(); if touch { // set reference bit to 'touch' page - entries[slot].ref_bit.set(true); + entries[slot].bump_ref(); } Some(page) } @@ -370,12 +423,24 @@ impl PageCache { if new_cap == self.capacity { return CacheResizeResult::Done; } + if new_cap < self.len() { + let need = self.len() - new_cap; + // repeat SIEVE passes until we evict `need` pages or give up + let mut evicted = 0; + while evicted < need { + match self.make_room_for(1) { + Ok(()) => evicted += 1, + Err(CacheError::Full) => return CacheResizeResult::PendingEvictions, + Err(_) => return CacheResizeResult::PendingEvictions, + } + } + } assert!(new_cap > 0); // Collect survivors as payload, no linkage struct Payload { key: PageCacheKey, page: PageRef, - ref_bit: bool, + ref_bit: RefBit, } let survivors: Vec = { let entries = self.entries.borrow(); @@ -439,8 +504,8 @@ impl PageCache { /// /// Uses the SIEVE algorithm to evict pages if necessary: /// 1. Start at tail (LRU position) - /// 2. If page is marked, unmark and move to head (second chance) - /// 3. If page is unmarked, evict it + /// 2. If page is marked, decrement mark and move to head + /// 3. If page mark was already Cleared, evict it /// 4. If page is unevictable (dirty/locked/pinned), move to head /// /// Returns `CacheError::Full` if not enough pages can be evicted @@ -473,7 +538,7 @@ impl PageCache { let mut entries = self.entries.borrow_mut(); let s = &mut entries[tail_idx]; turso_assert!(s.page.is_some(), "tail points to empty slot"); - (s.ref_bit.replace(false), s.key) + (s.decrement_ref() != RefBit::Clear, s.key) }; examined += 1; @@ -558,7 +623,7 @@ impl PageCache { for (i, entry_opt) in entries.iter().enumerate() { if let Some(ref page) = entry_opt.page { tracing::debug!( - "slot={}, page={:?}, flags={}, pin_count={}, ref_bit={}", + "slot={}, page={:?}, flags={}, pin_count={}, ref_bit={:?}", i, entry_opt.key, page.get().flags.load(Ordering::Relaxed), @@ -696,6 +761,16 @@ impl PageCache { self.capacity ); } + + #[cfg(test)] + fn slot_of(&self, key: &PageCacheKey) -> Option { + self.map.borrow().get(key) + } + #[cfg(test)] + fn ref_of(&self, key: &PageCacheKey) -> Option { + self.slot_of(key) + .map(|i| self.entries.borrow()[i].ref_bit.get()) + } } impl Default for PageCache { @@ -811,28 +886,20 @@ mod tests { use crate::storage::page_cache::CacheError; use crate::storage::pager::{Page, PageRef}; use crate::storage::sqlite3_ondisk::PageContent; - use crate::{BufferPool, IO}; use rand_chacha::{ rand_core::{RngCore, SeedableRng}, ChaCha8Rng, }; use std::sync::Arc; - use std::sync::OnceLock; fn create_key(id: usize) -> PageCacheKey { PageCacheKey::new(id) } - static TEST_BUFFER_POOL: OnceLock> = OnceLock::new(); - - #[allow(clippy::arc_with_non_send_sync)] pub fn page_with_content(page_id: usize) -> PageRef { let page = Arc::new(Page::new(page_id)); { - let mock_io = Arc::new(crate::PlatformIO::new().unwrap()) as Arc; - let pool = TEST_BUFFER_POOL - .get_or_init(|| BufferPool::begin_init(&mock_io, BufferPool::TEST_ARENA_SIZE)); - let buffer = pool.allocate(4096); + let buffer = crate::Buffer::new_temporary(4096); let page_content = PageContent { offset: 0, buffer: Arc::new(buffer), @@ -851,15 +918,6 @@ mod tests { key } - fn insert_and_get_entry(cache: &mut PageCache, id: usize) -> (PageCacheKey, PageCacheEntry) { - let key = create_key(id); - let page = page_with_content(id); - assert!(cache.insert(key, page).is_ok()); - let entry = cache.map.borrow().get(&key).expect("Key should exist"); - let entry = cache.entries.borrow()[entry].clone(); - (key, entry) - } - #[test] fn test_delete_only_element() { let mut cache = PageCache::default(); @@ -878,37 +936,41 @@ mod tests { !cache.contains_key(&key1), "Cache should not contain key after delete" ); + assert_eq!(cache.head.get(), NULL, "Head should be NULL when empty"); + assert_eq!(cache.tail.get(), NULL, "Tail should be NULL when empty"); cache.verify_cache_integrity(); } #[test] fn test_detach_tail() { let mut cache = PageCache::default(); - let key1 = insert_page(&mut cache, 1); // Will be tail - let key2 = insert_page(&mut cache, 2); // Will be middle - let _key3 = insert_page(&mut cache, 3); // Will be head + let key1 = insert_page(&mut cache, 1); // tail + let key2 = insert_page(&mut cache, 2); // middle + let key3 = insert_page(&mut cache, 3); // head cache.verify_cache_integrity(); assert_eq!(cache.len(), 3); - // Verify initial tail + // Verify initial tail (key1 should be at tail) let tail_slot = cache.tail.get(); + assert_ne!(tail_slot, NULL, "Tail should not be NULL"); assert_eq!( cache.entries.borrow()[tail_slot].key, key1, - "Initial tail check" + "Initial tail should be key1" ); // Delete tail assert!(cache.delete(key1).is_ok()); assert_eq!(cache.len(), 2, "Length should be 2 after deleting tail"); assert!( - cache.map.borrow().get(&key1).is_none(), - "Map should not contain deleted tail key" + !cache.contains_key(&key1), + "Cache should not contain deleted tail key" ); cache.verify_cache_integrity(); - // Check new tail is key2 + // Check new tail is key2 (next oldest) let new_tail_slot = cache.tail.get(); + assert_ne!(new_tail_slot, NULL, "New tail should not be NULL"); let entries = cache.entries.borrow(); assert_eq!(entries[new_tail_slot].key, key2, "New tail should be key2"); assert_eq!( @@ -917,17 +979,15 @@ mod tests { "New tail's next should be NULL" ); + // Verify head is key3 let head_slot = cache.head.get(); + assert_ne!(head_slot, NULL, "Head should not be NULL"); + assert_eq!(entries[head_slot].key, key3, "Head should be key3"); assert_eq!( entries[head_slot].prev.get(), NULL, "Head's prev should be NULL" ); - assert_eq!( - entries[head_slot].next.get(), - new_tail_slot, - "Head's next should point to new tail" - ); } #[test] @@ -939,55 +999,57 @@ mod tests { let key4 = insert_page(&mut cache, 4); // Will be head cache.verify_cache_integrity(); assert_eq!(cache.len(), 4); + + // Verify initial state + let tail_slot = cache.tail.get(); + let head_slot = cache.head.get(); assert_eq!( - cache.entries.borrow()[cache.tail.get()].key, + cache.entries.borrow()[tail_slot].key, key1, "Initial tail check" ); assert_eq!( - cache.entries.borrow()[cache.head.get()].key, + cache.entries.borrow()[head_slot].key, key4, "Initial head check" ); - let head_slot_before = cache.head.get(); - let tail_slot_before = cache.tail.get(); - // Delete middle element (key2) assert!(cache.delete(key2).is_ok()); assert_eq!(cache.len(), 3, "Length should be 3 after deleting middle"); assert!( - cache.map.borrow().get(&key2).is_none(), - "Map should not contain deleted middle key2" + !cache.contains_key(&key2), + "Cache should not contain deleted middle key2" ); cache.verify_cache_integrity(); - // Head and tail should remain the same + // Verify head and tail keys remain the same + let new_head_slot = cache.head.get(); + let new_tail_slot = cache.tail.get(); + let entries = cache.entries.borrow(); assert_eq!( - cache.head.get(), - head_slot_before, - "Head should remain key4" + entries[new_head_slot].key, key4, + "Head should still be key4" ); assert_eq!( - cache.tail.get(), - tail_slot_before, - "Tail should remain key1" + entries[new_tail_slot].key, key1, + "Tail should still be key1" ); - // Check that key3 and key1 are now linked - let entries = cache.entries.borrow(); + // Check that key3 and key1 are now properly linked let key3_slot = cache.map.borrow().get(&key3).unwrap(); let key1_slot = cache.map.borrow().get(&key1).unwrap(); + // key3 should be between head(key4) and tail(key1) + assert_eq!( + entries[key3_slot].prev.get(), + new_head_slot, + "Key3's prev should point to head (key4)" + ); assert_eq!( entries[key3_slot].next.get(), key1_slot, - "Key3's next should point to key1" - ); - assert_eq!( - entries[key1_slot].prev.get(), - key3_slot, - "Key1's prev should point to key3" + "Key3's next should point to tail (key1)" ); } @@ -996,14 +1058,16 @@ mod tests { let mut cache = PageCache::default(); let key1 = create_key(1); let page1_v1 = page_with_content(1); - let page1_v2 = page1_v1.clone(); + let page1_v2 = page1_v1.clone(); // Same Arc instance assert!(cache.insert(key1, page1_v1.clone()).is_ok()); assert_eq!(cache.len(), 1); - // This should return KeyExists error but not panic + + // Inserting same page instance should return KeyExists error let result = cache.insert(key1, page1_v2.clone()); assert_eq!(result, Err(CacheError::KeyExists)); assert_eq!(cache.len(), 1); + // Verify the page is still accessible assert!(cache.get(&key1).unwrap().is_some()); cache.verify_cache_integrity(); @@ -1015,11 +1079,14 @@ mod tests { let mut cache = PageCache::default(); let key1 = create_key(1); let page1_v1 = page_with_content(1); - let page1_v2 = page_with_content(1); // Different instance + let page1_v2 = page_with_content(1); // Different Arc instance + assert!(cache.insert(key1, page1_v1.clone()).is_ok()); assert_eq!(cache.len(), 1); cache.verify_cache_integrity(); - let _ = cache.insert(key1, page1_v2.clone()); // Panic + + // This should panic because it's a different page instance + let _ = cache.insert(key1, page1_v2.clone()); } #[test] @@ -1027,7 +1094,10 @@ mod tests { let mut cache = PageCache::default(); let key_nonexist = create_key(99); - assert!(cache.delete(key_nonexist).is_ok()); // no-op + // Deleting non-existent key should be a no-op (returns Ok) + assert!(cache.delete(key_nonexist).is_ok()); + assert_eq!(cache.len(), 0); + cache.verify_cache_integrity(); } #[test] @@ -1035,68 +1105,84 @@ mod tests { let mut cache = PageCache::new(1); let key1 = insert_page(&mut cache, 1); let key2 = insert_page(&mut cache, 2); - assert_eq!(cache.get(&key2).unwrap().unwrap().get().id, 2); - assert!(cache.get(&key1).unwrap().is_none()); + + // With capacity=1, inserting key2 should evict key1 assert_eq!(cache.get(&key2).unwrap().unwrap().get().id, 2); assert!( cache.get(&key1).unwrap().is_none(), - "capacity=1 should evict the older page" + "key1 should be evicted" ); + + // key2 should still be accessible + assert_eq!(cache.get(&key2).unwrap().unwrap().get().id, 2); + assert!( + cache.get(&key1).unwrap().is_none(), + "capacity=1 should have evicted the older page" + ); + cache.verify_cache_integrity(); } #[test] fn test_sieve_touch_non_tail_does_not_affect_immediate_eviction() { - // Insert 1,2,3 -> [3,2,1], tail=1 + // SIEVE algorithm: touching a non-tail page marks it but doesn't move it. + // The tail (if unmarked) will still be the first eviction candidate. + + // Insert 1,2,3 -> order [3,2,1] with tail=1 let mut cache = PageCache::new(3); let key1 = insert_page(&mut cache, 1); let key2 = insert_page(&mut cache, 2); let key3 = insert_page(&mut cache, 3); - // Touch key2 (not tail) to mark it. + // Touch key2 (middle) to mark it with reference bit assert!(cache.get(&key2).unwrap().is_some()); - // Insert 4: tail is still 1 (unmarked) -> evict 1 (not 2). + // Insert 4: SIEVE examines tail (key1, unmarked) -> evict key1 let key4 = insert_page(&mut cache, 4); assert!( cache.get(&key2).unwrap().is_some(), - "marked non-tail should remain" + "marked non-tail (key2) should remain" + ); + assert!(cache.get(&key3).unwrap().is_some(), "key3 should remain"); + assert!( + cache.get(&key4).unwrap().is_some(), + "key4 was just inserted" ); - assert!(cache.get(&key3).unwrap().is_some()); - assert!(cache.get(&key4).unwrap().is_some()); assert!( cache.get(&key1).unwrap().is_none(), - "unmarked tail should be evicted" + "unmarked tail (key1) should be evicted first" ); cache.verify_cache_integrity(); } #[test] fn test_sieve_second_chance_preserves_marked_tail() { - // Capacity 3, insert 1,2,3 → order(head..tail) = [3,2,1] + // SIEVE gives a "second chance" to marked pages at the tail + // by clearing their bit and moving them to head + let mut cache = PageCache::new(3); let key1 = insert_page(&mut cache, 1); let key2 = insert_page(&mut cache, 2); let key3 = insert_page(&mut cache, 3); assert_eq!(cache.len(), 3); - // Mark the TAIL (key1). SIEVE should not move it yet + // Mark the tail (key1) by accessing it assert!(cache.get(&key1).unwrap().is_some()); - // Insert 4: must evict exactly one unmarked page. - // Tail is 1 (marked): give second chance: move 1 to head & clear its bit. - // New tail becomes 2 (unmarked): evict 2. Final set: (1,3,4). + // Insert 4: SIEVE process: + // 1. Examines tail (key1, marked) -> clear bit, move to head + // 2. New tail is key2 (unmarked) -> evict key2 let key4 = insert_page(&mut cache, 4); assert!( cache.get(&key1).unwrap().is_some(), - "key1 is marked tail and should survive" + "key1 had ref bit set, got second chance" ); assert!(cache.get(&key3).unwrap().is_some(), "key3 should remain"); assert!(cache.get(&key4).unwrap().is_some(), "key4 just inserted"); assert!( cache.get(&key2).unwrap().is_none(), - "key2 should be the one evicted by SIEVE" + "key2 became new tail after key1's second chance and was evicted" ); assert_eq!(cache.len(), 3); cache.verify_cache_integrity(); @@ -1110,6 +1196,7 @@ mod tests { page.set_locked(); assert_eq!(cache.delete(key), Err(CacheError::Locked { pgno: 1 })); + assert_eq!(cache.len(), 1, "Locked page should not be deleted"); cache.verify_cache_integrity(); } @@ -1117,10 +1204,11 @@ mod tests { fn test_delete_dirty_page() { let mut cache = PageCache::default(); let key = insert_page(&mut cache, 1); - let page = cache.get(&key).expect("Page should exist").unwrap(); + let page = cache.get(&key).unwrap().unwrap(); page.set_dirty(); assert_eq!(cache.delete(key), Err(CacheError::Dirty { pgno: 1 })); + assert_eq!(cache.len(), 1, "Dirty page should not be deleted"); cache.verify_cache_integrity(); } @@ -1128,10 +1216,11 @@ mod tests { fn test_delete_pinned_page() { let mut cache = PageCache::default(); let key = insert_page(&mut cache, 1); - let page = cache.get(&key).expect("Page should exist").unwrap(); + let page = cache.get(&key).unwrap().unwrap(); page.pin(); assert_eq!(cache.delete(key), Err(CacheError::Pinned { pgno: 1 })); + assert_eq!(cache.len(), 1, "Pinned page should not be deleted"); cache.verify_cache_integrity(); } @@ -1141,7 +1230,7 @@ mod tests { let key1 = insert_page(&mut cache, 1); let key2 = insert_page(&mut cache, 2); - // Make both pages dirty + // Make both pages dirty (unevictable) cache.get(&key1).unwrap().unwrap().set_dirty(); cache.get(&key2).unwrap().unwrap().set_dirty(); @@ -1152,6 +1241,7 @@ mod tests { assert_eq!(result, Err(CacheError::Full)); assert_eq!(cache.len(), 2); + cache.verify_cache_integrity(); } #[test] @@ -1159,18 +1249,20 @@ mod tests { let mut cache = PageCache::default(); let key1 = insert_page(&mut cache, 1); let key2 = insert_page(&mut cache, 2); + assert_eq!(cache.get(&key1).unwrap().unwrap().get().id, 1); assert_eq!(cache.get(&key2).unwrap().unwrap().get().id, 2); + cache.verify_cache_integrity(); } #[test] fn test_page_cache_over_capacity() { - // Capacity 2, insert 1,2 -> [2,1] with tail=1 (unmarked) + // Test SIEVE eviction when exceeding capacity let mut cache = PageCache::new(2); let key1 = insert_page(&mut cache, 1); let key2 = insert_page(&mut cache, 2); - // Insert 3 -> tail(1) is unmarked → evict 1; keep 2. + // Insert 3: tail (key1, unmarked) should be evicted let key3 = insert_page(&mut cache, 3); assert_eq!(cache.len(), 2); @@ -1178,7 +1270,7 @@ mod tests { assert!(cache.get(&key3).unwrap().is_some(), "key3 just inserted"); assert!( cache.get(&key1).unwrap().is_none(), - "key1 (tail, unmarked) must be evicted" + "key1 (oldest, unmarked) should be evicted" ); cache.verify_cache_integrity(); } @@ -1187,8 +1279,11 @@ mod tests { fn test_page_cache_delete() { let mut cache = PageCache::default(); let key1 = insert_page(&mut cache, 1); + assert!(cache.delete(key1).is_ok()); assert!(cache.get(&key1).unwrap().is_none()); + assert_eq!(cache.len(), 0); + cache.verify_cache_integrity(); } #[test] @@ -1196,10 +1291,14 @@ mod tests { let mut cache = PageCache::default(); let key1 = insert_page(&mut cache, 1); let key2 = insert_page(&mut cache, 2); + assert!(cache.clear().is_ok()); assert!(cache.get(&key1).unwrap().is_none()); assert!(cache.get(&key2).unwrap().is_none()); assert_eq!(cache.len(), 0); + assert_eq!(cache.head.get(), NULL); + assert_eq!(cache.tail.get(), NULL); + cache.verify_cache_integrity(); } #[test] @@ -1209,11 +1308,16 @@ mod tests { let _ = insert_page(&mut cache, i); } assert_eq!(cache.len(), 5); + let result = cache.resize(3); assert_eq!(result, CacheResizeResult::Done); assert_eq!(cache.len(), 3); assert_eq!(cache.capacity(), 3); + + // Should still be able to insert after resize assert!(cache.insert(create_key(6), page_with_content(6)).is_ok()); + assert_eq!(cache.len(), 3); // One was evicted to make room + cache.verify_cache_integrity(); } #[test] @@ -1223,32 +1327,49 @@ mod tests { let key2 = insert_page(&mut cache, 2); let key3 = insert_page(&mut cache, 3); - assert_eq!(cache.head.get(), 2, "Head should be slot for key3"); - assert_eq!(cache.tail.get(), 0, "Tail should be slot for key1"); + // Verify initial ordering (head=key3, tail=key1) + let head_slot = cache.head.get(); + let tail_slot = cache.tail.get(); + assert_eq!( + cache.entries.borrow()[head_slot].key, + key3, + "Head should be key3" + ); + assert_eq!( + cache.entries.borrow()[tail_slot].key, + key1, + "Tail should be key1" + ); - // Delete middle element + // Delete middle element (key2) assert!(cache.delete(key2).is_ok()); // Verify structure after deletion assert_eq!(cache.len(), 2); - assert!(cache.map.borrow().get(&key2).is_none()); + assert!(!cache.contains_key(&key2)); - // Head should still be key3's slot, tail should still be key1's slot - let head_slot = cache.head.get(); - let tail_slot = cache.tail.get(); + // Head and tail keys should remain the same + let new_head_slot = cache.head.get(); + let new_tail_slot = cache.tail.get(); let entries = cache.entries.borrow(); - assert_eq!(entries[head_slot].key, key3, "Head should still be key3"); - assert_eq!(entries[tail_slot].key, key1, "Tail should still be key1"); - - // Check linkage assert_eq!( - entries[head_slot].next.get(), - tail_slot, + entries[new_head_slot].key, key3, + "Head should still be key3" + ); + assert_eq!( + entries[new_tail_slot].key, key1, + "Tail should still be key1" + ); + + // Check direct linkage between head and tail + assert_eq!( + entries[new_head_slot].next.get(), + new_tail_slot, "Head's next should point directly to tail after middle deleted" ); assert_eq!( - entries[tail_slot].prev.get(), - head_slot, + entries[new_tail_slot].prev.get(), + new_head_slot, "Tail's prev should point directly to head after middle deleted" ); @@ -1268,28 +1389,30 @@ mod tests { assert!(cache.delete(key3).is_ok()); assert_eq!(cache.len(), 2, "Length should be 2 after deleting head"); assert!( - cache.map.borrow().get(&key3).is_none(), - "Map should not contain deleted head key" + !cache.contains_key(&key3), + "Cache should not contain deleted head key" ); cache.verify_cache_integrity(); // Check new head is key2 let head_slot = cache.head.get(); - { - let entries = cache.entries.borrow_mut(); - assert_eq!(entries[head_slot].key, key2, "New head should be key2"); - assert_eq!( - entries[head_slot].prev.get(), - NULL, - "New head's prev should be NULL" - ); - } - // Delete another + assert_eq!( + cache.entries.borrow()[head_slot].key, + key2, + "New head should be key2" + ); + assert_eq!( + cache.entries.borrow()[head_slot].prev.get(), + NULL, + "New head's prev should be NULL" + ); + + // Delete tail (key1) assert!(cache.delete(key1).is_ok()); assert_eq!(cache.len(), 1, "Length should be 1 after deleting two"); cache.verify_cache_integrity(); - // Delete last + // Delete last element (key2) assert!(cache.delete(key2).is_ok()); assert_eq!(cache.len(), 0, "Length should be 0 after deleting all"); assert_eq!(cache.head.get(), NULL, "Head should be NULL when empty"); @@ -1297,6 +1420,7 @@ mod tests { cache.verify_cache_integrity(); } + #[test] fn test_resize_larger() { let mut cache = PageCache::new(2); let key1 = insert_page(&mut cache, 1); @@ -1308,10 +1432,11 @@ mod tests { assert_eq!(cache.len(), 2); assert_eq!(cache.capacity(), 5); + // Existing pages should still be accessible assert!(cache.get(&key1).is_ok_and(|p| p.is_some())); assert!(cache.get(&key2).is_ok_and(|p| p.is_some())); - // Now we should be able to add 3 more + // Now we should be able to add 3 more without eviction for i in 3..=5 { let _ = insert_page(&mut cache, i); } @@ -1325,6 +1450,7 @@ mod tests { for i in 1..=3 { let _ = insert_page(&mut cache, i); } + let result = cache.resize(3); assert_eq!(result, CacheResizeResult::Done); assert_eq!(cache.len(), 3); @@ -1340,6 +1466,7 @@ mod tests { let _ = insert_page(&mut cache, 8); let _ = insert_page(&mut cache, 10); + // Truncate to keep only pages <= 4 cache.truncate(4).unwrap(); assert!(cache.contains_key(&PageCacheKey(1))); @@ -1357,6 +1484,7 @@ mod tests { let _ = insert_page(&mut cache, 8); let _ = insert_page(&mut cache, 10); + // Truncate to 4 (removes all pages since they're > 4) cache.truncate(4).unwrap(); assert!(!cache.contains_key(&PageCacheKey(8))); @@ -1373,7 +1501,8 @@ mod tests { .unwrap() .as_secs(); let mut rng = ChaCha8Rng::seed_from_u64(seed); - tracing::info!("super seed: {}", seed); + tracing::info!("fuzz test seed: {}", seed); + let max_pages = 10; let mut cache = PageCache::new(10); let mut reference_map = std::collections::HashMap::new(); @@ -1383,21 +1512,21 @@ mod tests { match rng.next_u64() % 2 { 0 => { - // add + // Insert operation let id_page = rng.next_u64() % max_pages; let key = PageCacheKey::new(id_page as usize); #[allow(clippy::arc_with_non_send_sync)] let page = Arc::new(Page::new(id_page as usize)); if cache.peek(&key, false).is_some() { - continue; // skip duplicate page ids + continue; // Skip duplicate page ids } tracing::debug!("inserting page {:?}", key); match cache.insert(key, page.clone()) { - Err(CacheError::Full | CacheError::ActiveRefs) => {} // Ignore + Err(CacheError::Full | CacheError::ActiveRefs) => {} // Expected, ignore Err(err) => { - panic!("Cache insertion failed: {err:?}"); + panic!("Cache insertion failed unexpectedly: {err:?}"); } Ok(_) => { reference_map.insert(key, page); @@ -1407,10 +1536,10 @@ mod tests { } } } - assert!(cache.len() <= 10); + assert!(cache.len() <= 10, "Cache size exceeded capacity"); } 1 => { - // remove + // Delete operation let random = rng.next_u64() % 2 == 0; let key = if random || reference_map.is_empty() { let id_page: u64 = rng.next_u64() % max_pages; @@ -1440,34 +1569,62 @@ mod tests { #[test] fn test_peek_without_touch() { - // Capacity 2: insert 1,2 -> [2,1] tail=1 + // Test that peek with touch=false doesn't mark pages let mut cache = PageCache::new(2); let key1 = insert_page(&mut cache, 1); let key2 = insert_page(&mut cache, 2); - assert_eq!(cache.get(&key1).unwrap().unwrap().get().id, 1); - assert_eq!(cache.get(&key2).unwrap().unwrap().get().id, 2); - // Peek without touching DOES NOT mark key1 + // Peek key1 without touching (no ref bit set) assert!(cache.peek(&key1, false).is_some()); - // Insert 3 -> tail(1) still unmarked → evict 1. + // Insert 3: should evict unmarked tail (key1) let key3 = insert_page(&mut cache, 3); - assert!(cache.get(&key1).unwrap().is_none()); - assert_eq!(cache.get(&key2).unwrap().unwrap().get().id, 2); - assert_eq!(cache.get(&key3).unwrap().unwrap().get().id, 3); - assert!(cache.get(&key3).is_ok_and(|p| p.is_some())); - assert!(cache.get(&key2).is_ok_and(|p| p.is_some())); + assert!(cache.get(&key2).unwrap().is_some(), "key2 should remain"); assert!( - cache.get(&key1).is_ok_and(|p| p.is_none()), - "key1 should be evicted since peek(false) didn't mark" + cache.get(&key3).unwrap().is_some(), + "key3 was just inserted" + ); + assert!( + cache.get(&key1).unwrap().is_none(), + "key1 should be evicted since peek(false) didn't mark it" ); assert_eq!(cache.len(), 2); cache.verify_cache_integrity(); } #[test] - #[ignore = "long running test, remove to verify"] + fn test_peek_with_touch() { + // Test that peek with touch=true marks pages for SIEVE + let mut cache = PageCache::new(2); + let key1 = insert_page(&mut cache, 1); + let key2 = insert_page(&mut cache, 2); + + // Peek key1 WITH touching (sets ref bit) + assert!(cache.peek(&key1, true).is_some()); + + // Insert 3: key1 is marked, so it gets second chance + // key2 becomes new tail and gets evicted + let key3 = insert_page(&mut cache, 3); + + assert!( + cache.get(&key1).unwrap().is_some(), + "key1 should survive (was marked)" + ); + assert!( + cache.get(&key3).unwrap().is_some(), + "key3 was just inserted" + ); + assert!( + cache.get(&key2).unwrap().is_none(), + "key2 should be evicted after key1's second chance" + ); + assert_eq!(cache.len(), 2); + cache.verify_cache_integrity(); + } + + #[test] + #[ignore = "long running test, remove ignore to verify memory stability"] fn test_clear_memory_stability() { let initial_memory = memory_stats::memory_stats().unwrap().physical_mem; @@ -1485,12 +1642,93 @@ mod tests { } let final_memory = memory_stats::memory_stats().unwrap().physical_mem; - let growth = final_memory.saturating_sub(initial_memory); - println!("Growth: {growth}"); + + println!("Memory growth: {growth} bytes"); assert!( growth < 10_000_000, - "Memory grew by {growth} bytes over 10 cycles" + "Memory grew by {growth} bytes over test cycles (limit: 10MB)", ); } + + #[test] + fn gclock_ref_saturates_and_decays() { + let mut c = PageCache::new(3); + let k = insert_page(&mut c, 1); + + // No credit on insert. + assert_eq!(c.ref_of(&k), Some(RefBit::Clear)); + + // Three touches saturate to Max. + assert!(c.get(&k).unwrap().is_some()); + assert!(c.get(&k).unwrap().is_some()); + assert!(c.get(&k).unwrap().is_some()); + assert_eq!(c.ref_of(&k), Some(RefBit::Max)); + + // Force three eviction passes; it should survive until credit drains. + insert_page(&mut c, 2); // pass 1, bump to Med and moved to head + assert_eq!(c.ref_of(&k), Some(RefBit::Med)); + insert_page(&mut c, 3); // pass 2, Min + assert_eq!(c.ref_of(&k), Some(RefBit::Min)); + insert_page(&mut c, 4); // pass 3, Clear then evict on next encounter + // It got moved to head on pass 3; now force one more pass to evict it: + insert_page(&mut c, 5); + assert!( + c.get(&k).unwrap().is_none(), + "k should be evicted after credit drains" + ); + c.verify_cache_integrity(); + } + + #[test] + fn gclock_hot_survives_scan_pages() { + let mut c = PageCache::new(4); + let _k1 = insert_page(&mut c, 1); + let k2 = insert_page(&mut c, 2); + let _k3 = insert_page(&mut c, 3); + let _k4 = insert_page(&mut c, 4); + + // Make k2 truly hot: three real touches + for _ in 0..3 { + assert!(c.get(&k2).unwrap().is_some()); + } + assert_eq!(c.ref_of(&k2), Some(RefBit::Max)); + + // Now simulate a scan inserting new pages 5..10 (one-hit wonders). + for id in 5..=10 { + let _ = insert_page(&mut c, id); + } + + // Hot k2 should still be present; most single-hit scan pages should churn. + assert!( + c.get(&k2).unwrap().is_some(), + "hot page should survive scan" + ); + // The earliest single-hit page should be gone. + assert!(c.get(&create_key(5)).unwrap().is_none()); + c.verify_cache_integrity(); + } + + #[test] + fn resize_preserves_ref_and_recency() { + let mut c = PageCache::new(4); + let _k1 = insert_page(&mut c, 1); + let k2 = insert_page(&mut c, 2); + let _k3 = insert_page(&mut c, 3); + let _k4 = insert_page(&mut c, 4); + // Make k2 hot. + for _ in 0..3 { + assert!(c.get(&k2).unwrap().is_some()); + } + let r_before = c.ref_of(&k2); + + // Shrink to 3 (one page will be evicted during repack/next insert) + assert_eq!(c.resize(3), CacheResizeResult::Done); + assert_eq!(c.ref_of(&k2), r_before); + + // Force an eviction; hot k2 should survive more passes. + let _ = insert_page(&mut c, 5); + assert!(c.get(&k2).unwrap().is_some()); + c.verify_cache_integrity(); + } } diff --git a/core/storage/pager.rs b/core/storage/pager.rs index 737abf7c1..00ca36e00 100644 --- a/core/storage/pager.rs +++ b/core/storage/pager.rs @@ -2421,7 +2421,7 @@ mod tests { }) }; let _ = thread.join(); - let cache = cache.read(); + let mut cache = cache.write(); let page_key = PageCacheKey::new(1); let page = cache.get(&page_key).unwrap(); assert_eq!(page.unwrap().get().id, 1); From 246b62d513c18859a6732e70a918dddf0f3c7072 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Tue, 2 Sep 2025 23:49:18 -0400 Subject: [PATCH 05/13] Remove unnecessary refcells, as PageCacheEntry has interior mutability --- core/storage/page_cache.rs | 222 ++++++++++++++----------------------- core/storage/pager.rs | 2 +- 2 files changed, 85 insertions(+), 139 deletions(-) diff --git a/core/storage/page_cache.rs b/core/storage/page_cache.rs index b295f58b2..db491937b 100644 --- a/core/storage/page_cache.rs +++ b/core/storage/page_cache.rs @@ -1,5 +1,4 @@ use std::cell::Cell; -use std::cell::RefCell; use std::sync::atomic::Ordering; use std::sync::Arc; @@ -95,30 +94,29 @@ impl PageCacheEntry { } } -/// PageCache implements the SIEVE eviction algorithm, a simpler and more efficient -/// alternative to LRU that achieves comparable or better hit ratios with lower overhead. +/// PageCache implements a variation of the SIEVE algorithm that maintains an intrusive linked list queue of +/// pages which keep a 'reference_bit' to determine how recently/frequently the page has been accessed. +/// The bit is set to `Clear` on initial insertion and then bumped on each access and decremented +/// during eviction scans. /// -/// # Algorithm Overview -/// -/// SIEVE maintains a queue of cached pages and uses a "second chance" mechanism: /// - New pages enter at the head (MRU position) /// - Eviction candidates are examined from the tail (LRU position) /// - Each page has a reference bit that is set when accessed -/// - During eviction, if a page's reference bit is set, it gets a "second chance": -/// the bit is cleared and the page moves to the head -/// - Pages with clear reference bits are evicted immediately +/// - During eviction, if a page's reference bit is decremented +/// and the page moves to the head, Pages with Clear reference +/// bits are evicted immediately pub struct PageCache { /// Capacity in pages capacity: usize, /// Map of Key -> SlotIndex in entries array - map: RefCell, - /// Pointers to intrusive doubly-linked list for eviction order (head=MRU, tail=LRU) + map: PageHashMap, + /// Pointers to intrusive doubly-linked list for eviction order head: Cell, tail: Cell, /// Fixed-size vec holding page entries - entries: RefCell>, + entries: Vec, /// Free list: Stack of available slot indices - freelist: RefCell>, + freelist: Vec, } unsafe impl Send for PageCache {} @@ -166,24 +164,23 @@ impl PageCache { let freelist = (0..capacity).rev().collect::>(); Self { capacity, - map: RefCell::new(PageHashMap::new(capacity)), + map: PageHashMap::new(capacity), head: Cell::new(NULL), tail: Cell::new(NULL), - entries: RefCell::new(vec![PageCacheEntry::empty(); capacity]), - freelist: RefCell::new(freelist), + entries: vec![PageCacheEntry::empty(); capacity], + freelist, } } #[inline] fn link_front(&self, slot: SlotIndex) { - let entries = self.entries.borrow(); let old_head = self.head.replace(slot); - entries[slot].next.set(old_head); - entries[slot].prev.set(NULL); + self.entries[slot].next.set(old_head); + self.entries[slot].prev.set(NULL); if old_head != NULL { - entries[old_head].prev.set(slot); + self.entries[old_head].prev.set(slot); } else { // list was empty self.tail.set(slot); @@ -192,23 +189,21 @@ impl PageCache { #[inline] fn unlink(&self, slot: SlotIndex) { - let entries = self.entries.borrow(); - - let p = entries[slot].prev.get(); - let n = entries[slot].next.get(); + let p = self.entries[slot].prev.get(); + let n = self.entries[slot].next.get(); if p != NULL { - entries[p].next.set(n); + self.entries[p].next.set(n); } else { self.head.set(n); } if n != NULL { - entries[n].prev.set(p); + self.entries[n].prev.set(p); } else { self.tail.set(p); } - entries[slot].reset_links(); + self.entries[slot].reset_links(); } #[inline] @@ -216,13 +211,13 @@ impl PageCache { if self.head.get() == slot { return; } - turso_assert!(self.entries.borrow()[slot].page.is_some(), "must be linked"); + turso_assert!(self.entries[slot].page.is_some(), "must be linked"); self.unlink(slot); self.link_front(slot); } pub fn contains_key(&self, key: &PageCacheKey) -> bool { - self.map.borrow().contains_key(key) + self.map.contains_key(key) } #[inline] @@ -242,11 +237,10 @@ impl PageCache { update_in_place: bool, ) -> Result<(), CacheError> { trace!("insert(key={:?})", key); - let slot = { self.map.borrow().get(&key) }; + let slot = { self.map.get(&key) }; if let Some(slot) = slot { let stale = { - let entries = self.entries.borrow(); - let e = &entries[slot]; + let e = &self.entries[slot]; let p = e.page.as_ref().expect("slot must have a page"); !p.is_loaded() && !p.is_locked() }; @@ -255,20 +249,20 @@ impl PageCache { self._delete(key, true)?; let slot_index = self.find_free_slot()?; { - let mut entries = self.entries.borrow_mut(); + let entries = &mut self.entries; let entry = &mut entries[slot_index]; entry.key = key; entry.page = Some(value); entry.clear_ref(); } - self.map.borrow_mut().insert(key, slot_index); + self.map.insert(key, slot_index); self.link_front(slot_index); return Ok(()); } self.move_to_front(slot); { - let existing = &mut self.entries.borrow_mut()[slot]; + let existing = &mut self.entries[slot]; existing.bump_ref(); if update_in_place { existing.page = Some(value); @@ -286,7 +280,7 @@ impl PageCache { self.make_room_for(1)?; let slot_index = self.find_free_slot()?; { - let mut entries = self.entries.borrow_mut(); + let entries = &mut self.entries; let entry = &mut entries[slot_index]; turso_assert!(entry.page.is_none(), "page must be None in free slot"); entry.key = key; @@ -294,26 +288,25 @@ impl PageCache { // Sieve ref bit starts cleared, will be set on first access entry.clear_ref(); } - self.map.borrow_mut().insert(key, slot_index); + self.map.insert(key, slot_index); self.link_front(slot_index); Ok(()) } - fn find_free_slot(&self) -> Result { - let slot = self.freelist.borrow_mut().pop().ok_or_else(|| { + fn find_free_slot(&mut self) -> Result { + let slot = self.freelist.pop().ok_or_else(|| { CacheError::InternalError("No free slots available after make_room_for".into()) })?; #[cfg(debug_assertions)] { - let entries = self.entries.borrow(); turso_assert!( - entries[slot].page.is_none(), + self.entries[slot].page.is_none(), "allocating non-free slot {}", slot ); } // Reset linkage on entry itself - self.entries.borrow()[slot].reset_links(); + self.entries[slot].reset_links(); Ok(slot) } @@ -323,12 +316,12 @@ impl PageCache { } let (entry, slot_idx) = { - let map = self.map.borrow(); + let map = &self.map; let idx = map.get(&key).ok_or_else(|| { CacheError::InternalError("Key exists but not found in map".into()) })?; ( - self.entries.borrow()[idx] + self.entries[idx] .page .as_ref() .expect("page in map was None") @@ -359,13 +352,13 @@ impl PageCache { } self.unlink(slot_idx); - self.map.borrow_mut().remove(&key); + self.map.remove(&key); { - let e = &mut self.entries.borrow_mut()[slot_idx]; - e.page = None; - e.clear_ref(); - e.reset_links(); - self.freelist.borrow_mut().push(slot_idx); + let entry = &mut self.entries[slot_idx]; + entry.page = None; + entry.clear_ref(); + entry.reset_links(); + self.freelist.push(slot_idx); } Ok(()) } @@ -379,7 +372,7 @@ impl PageCache { #[inline] pub fn get(&mut self, key: &PageCacheKey) -> crate::Result> { - let Some(slot) = self.map.borrow().get(key) else { + let Some(slot) = self.map.get(key) else { return Ok(None); }; // Because we can abort a read_page completion, this means a page can be in the cache but be unloaded and unlocked. @@ -387,7 +380,7 @@ impl PageCache { // assertions later on. This is worsened by the fact that page cache is not per `Statement`, so you can abort a completion // in one Statement, and trigger some error in the next one if we don't evict the page here. let page = { - let entry = &self.entries.borrow()[slot]; + let entry = &self.entries[slot]; entry.bump_ref(); entry .page @@ -405,8 +398,8 @@ impl PageCache { #[inline] pub fn peek(&self, key: &PageCacheKey, touch: bool) -> Option { - let slot = self.map.borrow().get(key)?; - let entries = self.entries.borrow(); + let slot = self.map.get(key)?; + let entries = &self.entries; let page = entries[slot].page.as_ref()?.clone(); if touch { // set reference bit to 'touch' page @@ -443,7 +436,7 @@ impl PageCache { ref_bit: RefBit, } let survivors: Vec = { - let entries = self.entries.borrow(); + let entries = &self.entries; let mut v = Vec::with_capacity(self.len()); // walk tail..head to preserve recency when re-linking via link_front let mut cur = self.tail.get(); @@ -462,9 +455,7 @@ impl PageCache { }; // Resize entry array; reset heads - self.entries - .borrow_mut() - .resize(new_cap, PageCacheEntry::empty()); + self.entries.resize(new_cap, PageCacheEntry::empty()); self.capacity = new_cap; let mut new_map = PageHashMap::new(new_cap); self.head.set(NULL); @@ -472,7 +463,7 @@ impl PageCache { // Repack compactly: survivors[tail..head] pushed to front -> final order == original { - let mut entries_mut = self.entries.borrow_mut(); + let entries_mut = &mut self.entries; for (slot, pl) in survivors.iter().enumerate().take(new_cap) { let e = &mut entries_mut[slot]; e.key = pl.key; @@ -485,12 +476,12 @@ impl PageCache { for slot in 0..survivors.len().min(new_cap) { self.link_front(slot); } - self.map.replace(new_map); + self.map = new_map; // Rebuild freelist let used = survivors.len().min(new_cap); { - let mut fl = self.freelist.borrow_mut(); + let fl = &mut self.freelist; fl.clear(); for i in (used..new_cap).rev() { fl.push(i); @@ -520,9 +511,13 @@ impl PageCache { return Ok(()); } + const MAX_REF: usize = 3; + let len = self.len(); + let available = self.capacity.saturating_sub(len); let mut need = n - available; let mut examined = 0usize; - let max_examinations = self.len().saturating_mul(2); + // allow enough rotations to drain max credits (+ one for the eviction pass) + let max_examinations = len.saturating_mul(MAX_REF + 1); while need > 0 && examined < max_examinations { let tail_idx = match self.tail.get() { @@ -535,7 +530,7 @@ impl PageCache { }; let (was_marked, key) = { - let mut entries = self.entries.borrow_mut(); + let entries = &mut self.entries; let s = &mut entries[tail_idx]; turso_assert!(s.page.is_some(), "tail points to empty slot"); (s.decrement_ref() != RefBit::Clear, s.key) @@ -570,7 +565,7 @@ impl PageCache { } pub fn clear(&mut self) -> Result<(), CacheError> { - for e in self.entries.borrow().iter() { + for e in self.entries.iter() { if let Some(ref p) = e.page { if p.is_dirty() { return Err(CacheError::Dirty { pgno: p.get().id }); @@ -579,12 +574,12 @@ impl PageCache { let _ = p.get().contents.take(); } } - self.entries.borrow_mut().fill(PageCacheEntry::empty()); - self.map.borrow_mut().clear(); + self.entries.fill(PageCacheEntry::empty()); + self.map.clear(); self.head.set(NULL); self.tail.set(NULL); { - let mut fl = self.freelist.borrow_mut(); + let fl = &mut self.freelist; fl.clear(); for i in (0..self.capacity).rev() { fl.push(i); @@ -597,7 +592,6 @@ impl PageCache { pub fn truncate(&mut self, len: usize) -> Result<(), CacheError> { let keys_to_delete: Vec = { self.entries - .borrow() .iter() .filter_map(|entry| { entry.page.as_ref().and({ @@ -617,8 +611,8 @@ impl PageCache { } pub fn print(&self) { - tracing::debug!("page_cache_len={}", self.map.borrow().len()); - let entries = self.entries.borrow(); + tracing::debug!("page_cache_len={}", self.map.len()); + let entries = &self.entries; for (i, entry_opt) in entries.iter().enumerate() { if let Some(ref page) = entry_opt.page { @@ -637,7 +631,7 @@ impl PageCache { #[cfg(test)] pub fn keys(&mut self) -> Vec { let mut keys = Vec::with_capacity(self.len()); - let entries = self.entries.borrow(); + let entries = &self.entries; for entry in entries.iter() { if entry.page.is_none() { continue; @@ -648,7 +642,7 @@ impl PageCache { } pub fn len(&self) -> usize { - self.map.borrow().len() + self.map.len() } pub fn capacity(&self) -> usize { @@ -656,7 +650,7 @@ impl PageCache { } pub fn unset_dirty_all_pages(&mut self) { - let entries = self.entries.borrow(); + let entries = &self.entries; for entry in entries.iter() { if entry.page.is_none() { continue; @@ -667,8 +661,8 @@ impl PageCache { #[cfg(test)] fn verify_cache_integrity(&self) { - let entries = self.entries.borrow(); - let map = self.map.borrow(); + let entries = &self.entries; + let map = &self.map; let head = self.head.get(); let tail = self.tail.get(); @@ -730,7 +724,7 @@ impl PageCache { } // Freelist disjointness and shape: free slots must be unlinked and empty - let freelist = self.freelist.borrow(); + let freelist = &self.freelist; let mut free_count = 0usize; for &s in freelist.iter() { free_count += 1; @@ -764,12 +758,11 @@ impl PageCache { #[cfg(test)] fn slot_of(&self, key: &PageCacheKey) -> Option { - self.map.borrow().get(key) + self.map.get(key) } #[cfg(test)] fn ref_of(&self, key: &PageCacheKey) -> Option { - self.slot_of(key) - .map(|i| self.entries.borrow()[i].ref_bit.get()) + self.slot_of(key).map(|i| self.entries[i].ref_bit.get()) } } @@ -954,8 +947,7 @@ mod tests { let tail_slot = cache.tail.get(); assert_ne!(tail_slot, NULL, "Tail should not be NULL"); assert_eq!( - cache.entries.borrow()[tail_slot].key, - key1, + cache.entries[tail_slot].key, key1, "Initial tail should be key1" ); @@ -971,7 +963,7 @@ mod tests { // Check new tail is key2 (next oldest) let new_tail_slot = cache.tail.get(); assert_ne!(new_tail_slot, NULL, "New tail should not be NULL"); - let entries = cache.entries.borrow(); + let entries = &cache.entries; assert_eq!(entries[new_tail_slot].key, key2, "New tail should be key2"); assert_eq!( entries[new_tail_slot].next.get(), @@ -1003,16 +995,8 @@ mod tests { // Verify initial state let tail_slot = cache.tail.get(); let head_slot = cache.head.get(); - assert_eq!( - cache.entries.borrow()[tail_slot].key, - key1, - "Initial tail check" - ); - assert_eq!( - cache.entries.borrow()[head_slot].key, - key4, - "Initial head check" - ); + assert_eq!(cache.entries[tail_slot].key, key1, "Initial tail check"); + assert_eq!(cache.entries[head_slot].key, key4, "Initial head check"); // Delete middle element (key2) assert!(cache.delete(key2).is_ok()); @@ -1026,7 +1010,7 @@ mod tests { // Verify head and tail keys remain the same let new_head_slot = cache.head.get(); let new_tail_slot = cache.tail.get(); - let entries = cache.entries.borrow(); + let entries = &cache.entries; assert_eq!( entries[new_head_slot].key, key4, "Head should still be key4" @@ -1037,8 +1021,8 @@ mod tests { ); // Check that key3 and key1 are now properly linked - let key3_slot = cache.map.borrow().get(&key3).unwrap(); - let key1_slot = cache.map.borrow().get(&key1).unwrap(); + let key3_slot = cache.map.get(&key3).unwrap(); + let key1_slot = cache.map.get(&key1).unwrap(); // key3 should be between head(key4) and tail(key1) assert_eq!( @@ -1330,16 +1314,8 @@ mod tests { // Verify initial ordering (head=key3, tail=key1) let head_slot = cache.head.get(); let tail_slot = cache.tail.get(); - assert_eq!( - cache.entries.borrow()[head_slot].key, - key3, - "Head should be key3" - ); - assert_eq!( - cache.entries.borrow()[tail_slot].key, - key1, - "Tail should be key1" - ); + assert_eq!(cache.entries[head_slot].key, key3, "Head should be key3"); + assert_eq!(cache.entries[tail_slot].key, key1, "Tail should be key1"); // Delete middle element (key2) assert!(cache.delete(key2).is_ok()); @@ -1351,7 +1327,7 @@ mod tests { // Head and tail keys should remain the same let new_head_slot = cache.head.get(); let new_tail_slot = cache.tail.get(); - let entries = cache.entries.borrow(); + let entries = &cache.entries; assert_eq!( entries[new_head_slot].key, key3, "Head should still be key3" @@ -1397,12 +1373,11 @@ mod tests { // Check new head is key2 let head_slot = cache.head.get(); assert_eq!( - cache.entries.borrow()[head_slot].key, - key2, + cache.entries[head_slot].key, key2, "New head should be key2" ); assert_eq!( - cache.entries.borrow()[head_slot].prev.get(), + cache.entries[head_slot].prev.get(), NULL, "New head's prev should be NULL" ); @@ -1651,35 +1626,6 @@ mod tests { ); } - #[test] - fn gclock_ref_saturates_and_decays() { - let mut c = PageCache::new(3); - let k = insert_page(&mut c, 1); - - // No credit on insert. - assert_eq!(c.ref_of(&k), Some(RefBit::Clear)); - - // Three touches saturate to Max. - assert!(c.get(&k).unwrap().is_some()); - assert!(c.get(&k).unwrap().is_some()); - assert!(c.get(&k).unwrap().is_some()); - assert_eq!(c.ref_of(&k), Some(RefBit::Max)); - - // Force three eviction passes; it should survive until credit drains. - insert_page(&mut c, 2); // pass 1, bump to Med and moved to head - assert_eq!(c.ref_of(&k), Some(RefBit::Med)); - insert_page(&mut c, 3); // pass 2, Min - assert_eq!(c.ref_of(&k), Some(RefBit::Min)); - insert_page(&mut c, 4); // pass 3, Clear then evict on next encounter - // It got moved to head on pass 3; now force one more pass to evict it: - insert_page(&mut c, 5); - assert!( - c.get(&k).unwrap().is_none(), - "k should be evicted after credit drains" - ); - c.verify_cache_integrity(); - } - #[test] fn gclock_hot_survives_scan_pages() { let mut c = PageCache::new(4); diff --git a/core/storage/pager.rs b/core/storage/pager.rs index 00ca36e00..13cb758a3 100644 --- a/core/storage/pager.rs +++ b/core/storage/pager.rs @@ -1171,7 +1171,7 @@ impl Pager { tracing::trace!("read_page(page_idx = {})", page_idx); let mut page_cache = self.page_cache.write(); let page_key = PageCacheKey::new(page_idx); - Ok(page_cache.get(&page_key)?) + page_cache.get(&page_key) } /// Get a page from cache only if it matches the target frame From 5ba273eea53256e121f6b6147f53ff11dae05d5c Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Wed, 3 Sep 2025 08:09:49 -0400 Subject: [PATCH 06/13] remove unused impl for refbit --- core/storage/page_cache.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/storage/page_cache.rs b/core/storage/page_cache.rs index db491937b..567ab906e 100644 --- a/core/storage/page_cache.rs +++ b/core/storage/page_cache.rs @@ -20,7 +20,7 @@ type SlotIndex = usize; const NULL: SlotIndex = usize::MAX; -#[derive(Clone, Ord, Eq, PartialOrd, PartialEq, Copy, Debug)] +#[derive(Clone, Copy, Debug)] /// To distinguish between a page touched by a scan vs a 'hot' page, /// we use a Reference Bit with 4 states, incrementing this bit up to a maximum /// of 3 on each access, and decrementing on each eviction scan pass. @@ -533,7 +533,7 @@ impl PageCache { let entries = &mut self.entries; let s = &mut entries[tail_idx]; turso_assert!(s.page.is_some(), "tail points to empty slot"); - (s.decrement_ref() != RefBit::Clear, s.key) + (!matches!(s.decrement_ref(), RefBit::Clear), s.key) }; examined += 1; @@ -1638,7 +1638,7 @@ mod tests { for _ in 0..3 { assert!(c.get(&k2).unwrap().is_some()); } - assert_eq!(c.ref_of(&k2), Some(RefBit::Max)); + assert!(matches!(c.ref_of(&k2), Some(RefBit::Max))); // Now simulate a scan inserting new pages 5..10 (one-hit wonders). for id in 5..=10 { @@ -1670,7 +1670,7 @@ mod tests { // Shrink to 3 (one page will be evicted during repack/next insert) assert_eq!(c.resize(3), CacheResizeResult::Done); - assert_eq!(c.ref_of(&k2), r_before); + assert!(matches!(c.ref_of(&k2), r_before)); // Force an eviction; hot k2 should survive more passes. let _ = insert_page(&mut c, 5); From c85a61442f8e257e4cf73d666de7dbe1d9de6638 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Wed, 3 Sep 2025 16:27:15 -0400 Subject: [PATCH 07/13] Remove type alias in page cache --- core/storage/page_cache.rs | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/core/storage/page_cache.rs b/core/storage/page_cache.rs index 567ab906e..4d96bf17f 100644 --- a/core/storage/page_cache.rs +++ b/core/storage/page_cache.rs @@ -16,9 +16,7 @@ const DEFAULT_PAGE_CACHE_SIZE_IN_PAGES_MAKE_ME_SMALLER_ONCE_WAL_SPILL_IS_IMPLEME #[repr(transparent)] pub struct PageCacheKey(usize); -type SlotIndex = usize; - -const NULL: SlotIndex = usize::MAX; +const NULL: usize = usize::MAX; #[derive(Clone, Copy, Debug)] /// To distinguish between a page touched by a scan vs a 'hot' page, @@ -40,9 +38,9 @@ struct PageCacheEntry { /// Reference bit for SIEVE algorithm - set on access, cleared during eviction scan ref_bit: Cell, /// Index of next entry in SIEVE queue (newer/toward head) - next: Cell, + next: Cell, /// Index of previous entry in SIEVE queue (older/toward tail) - prev: Cell, + prev: Cell, } impl Default for PageCacheEntry { @@ -108,15 +106,15 @@ impl PageCacheEntry { pub struct PageCache { /// Capacity in pages capacity: usize, - /// Map of Key -> SlotIndex in entries array + /// Map of Key -> usize in entries array map: PageHashMap, /// Pointers to intrusive doubly-linked list for eviction order - head: Cell, - tail: Cell, + head: Cell, + tail: Cell, /// Fixed-size vec holding page entries entries: Vec, /// Free list: Stack of available slot indices - freelist: Vec, + freelist: Vec, } unsafe impl Send for PageCache {} @@ -173,7 +171,7 @@ impl PageCache { } #[inline] - fn link_front(&self, slot: SlotIndex) { + fn link_front(&self, slot: usize) { let old_head = self.head.replace(slot); self.entries[slot].next.set(old_head); @@ -188,7 +186,7 @@ impl PageCache { } #[inline] - fn unlink(&self, slot: SlotIndex) { + fn unlink(&self, slot: usize) { let p = self.entries[slot].prev.get(); let n = self.entries[slot].next.get(); @@ -207,7 +205,7 @@ impl PageCache { } #[inline] - fn move_to_front(&self, slot: SlotIndex) { + fn move_to_front(&self, slot: usize) { if self.head.get() == slot { return; } @@ -777,7 +775,7 @@ impl Default for PageCache { #[derive(Clone)] struct HashMapNode { key: PageCacheKey, - slot_index: SlotIndex, + slot_index: usize, } #[allow(dead_code)] @@ -790,7 +788,7 @@ impl PageHashMap { } } - pub fn insert(&mut self, key: PageCacheKey, slot_index: SlotIndex) { + pub fn insert(&mut self, key: PageCacheKey, slot_index: usize) { let bucket = self.hash(&key); let bucket = &mut self.buckets[bucket]; let mut idx = 0; @@ -811,7 +809,7 @@ impl PageHashMap { self.buckets[bucket].iter().any(|node| node.key == *key) } - pub fn get(&self, key: &PageCacheKey) -> Option { + pub fn get(&self, key: &PageCacheKey) -> Option { let bucket = self.hash(key); let bucket = &self.buckets[bucket]; for node in bucket { @@ -822,7 +820,7 @@ impl PageHashMap { None } - pub fn remove(&mut self, key: &PageCacheKey) -> Option { + pub fn remove(&mut self, key: &PageCacheKey) -> Option { let bucket = self.hash(key); let bucket = &mut self.buckets[bucket]; let mut idx = 0; @@ -1666,11 +1664,11 @@ mod tests { for _ in 0..3 { assert!(c.get(&k2).unwrap().is_some()); } - let r_before = c.ref_of(&k2); + let _r_before = c.ref_of(&k2); // Shrink to 3 (one page will be evicted during repack/next insert) assert_eq!(c.resize(3), CacheResizeResult::Done); - assert!(matches!(c.ref_of(&k2), r_before)); + assert!(matches!(c.ref_of(&k2), _r_before)); // Force an eviction; hot k2 should survive more passes. let _ = insert_page(&mut c, 5); From e418a902e52553fb6d6adb134f2e1d62c4765d30 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Wed, 3 Sep 2025 20:06:36 -0400 Subject: [PATCH 08/13] Fix scoping issues now that refcells are gone to prevent extra destructors --- core/storage/page_cache.rs | 167 ++++++++++++++++--------------------- 1 file changed, 70 insertions(+), 97 deletions(-) diff --git a/core/storage/page_cache.rs b/core/storage/page_cache.rs index 4d96bf17f..1fff880fa 100644 --- a/core/storage/page_cache.rs +++ b/core/storage/page_cache.rs @@ -237,55 +237,47 @@ impl PageCache { trace!("insert(key={:?})", key); let slot = { self.map.get(&key) }; if let Some(slot) = slot { - let stale = { - let e = &self.entries[slot]; - let p = e.page.as_ref().expect("slot must have a page"); - !p.is_loaded() && !p.is_locked() - }; - if stale { + let p = self.entries[slot] + .page + .as_ref() + .expect("slot must have a page"); + + if !p.is_loaded() && !p.is_locked() { // evict, then continue with fresh insert self._delete(key, true)?; let slot_index = self.find_free_slot()?; - { - let entries = &mut self.entries; - let entry = &mut entries[slot_index]; - entry.key = key; - entry.page = Some(value); - entry.clear_ref(); - } + let entry = &mut self.entries[slot_index]; + entry.key = key; + entry.page = Some(value); + entry.clear_ref(); self.map.insert(key, slot_index); self.link_front(slot_index); return Ok(()); } self.move_to_front(slot); - { - let existing = &mut self.entries[slot]; - existing.bump_ref(); - if update_in_place { - existing.page = Some(value); - return Ok(()); - } else { - turso_assert!( - Arc::ptr_eq(existing.page.as_ref().unwrap(), &value), - "Attempted to insert different page with same key: {key:?}" - ); - return Err(CacheError::KeyExists); - } + let existing = &mut self.entries[slot]; + existing.bump_ref(); + if update_in_place { + existing.page = Some(value); + return Ok(()); + } else { + turso_assert!( + Arc::ptr_eq(existing.page.as_ref().unwrap(), &value), + "Attempted to insert different page with same key: {key:?}" + ); + return Err(CacheError::KeyExists); } } // Key doesn't exist, proceed with new entry self.make_room_for(1)?; let slot_index = self.find_free_slot()?; - { - let entries = &mut self.entries; - let entry = &mut entries[slot_index]; - turso_assert!(entry.page.is_none(), "page must be None in free slot"); - entry.key = key; - entry.page = Some(value); - // Sieve ref bit starts cleared, will be set on first access - entry.clear_ref(); - } + let entry = &mut self.entries[slot_index]; + turso_assert!(entry.page.is_none(), "page must be None in free slot"); + entry.key = key; + entry.page = Some(value); + // Sieve ref bit starts cleared, will be set on first access + entry.clear_ref(); self.map.insert(key, slot_index); self.link_front(slot_index); Ok(()) @@ -313,20 +305,15 @@ impl PageCache { return Ok(()); } - let (entry, slot_idx) = { - let map = &self.map; - let idx = map.get(&key).ok_or_else(|| { - CacheError::InternalError("Key exists but not found in map".into()) - })?; - ( - self.entries[idx] - .page - .as_ref() - .expect("page in map was None") - .clone(), - idx, - ) - }; + let slot_idx = self + .map + .get(&key) + .ok_or_else(|| CacheError::InternalError("Key exists but not found in map".into()))?; + let entry = self.entries[slot_idx] + .page + .as_ref() + .expect("page in map was None") + .clone(); if entry.is_locked() { return Err(CacheError::Locked { @@ -351,13 +338,11 @@ impl PageCache { self.unlink(slot_idx); self.map.remove(&key); - { - let entry = &mut self.entries[slot_idx]; - entry.page = None; - entry.clear_ref(); - entry.reset_links(); - self.freelist.push(slot_idx); - } + let entry = &mut self.entries[slot_idx]; + entry.page = None; + entry.clear_ref(); + entry.reset_links(); + self.freelist.push(slot_idx); Ok(()) } @@ -377,15 +362,13 @@ impl PageCache { // However, if we do not evict that page from the page cache, we will return an unloaded page later which will trigger // assertions later on. This is worsened by the fact that page cache is not per `Statement`, so you can abort a completion // in one Statement, and trigger some error in the next one if we don't evict the page here. - let page = { - let entry = &self.entries[slot]; - entry.bump_ref(); - entry - .page - .as_ref() - .expect("page in the map to exist") - .clone() - }; + let entry = &self.entries[slot]; + entry.bump_ref(); + let page = entry + .page + .as_ref() + .expect("page in the map to exist") + .clone(); if !page.is_loaded() && !page.is_locked() { self.delete(*key)?; Ok(None) @@ -397,11 +380,11 @@ impl PageCache { #[inline] pub fn peek(&self, key: &PageCacheKey, touch: bool) -> Option { let slot = self.map.get(key)?; - let entries = &self.entries; - let page = entries[slot].page.as_ref()?.clone(); + let entry = &self.entries[slot]; + let page = entry.page.as_ref()?.clone(); if touch { // set reference bit to 'touch' page - entries[slot].bump_ref(); + entry.bump_ref(); } Some(page) } @@ -460,16 +443,13 @@ impl PageCache { self.tail.set(NULL); // Repack compactly: survivors[tail..head] pushed to front -> final order == original - { - let entries_mut = &mut self.entries; - for (slot, pl) in survivors.iter().enumerate().take(new_cap) { - let e = &mut entries_mut[slot]; - e.key = pl.key; - e.page = Some(pl.page.clone()); - e.ref_bit.set(pl.ref_bit); - e.reset_links(); - new_map.insert(pl.key, slot); - } + for (slot, pl) in survivors.iter().enumerate().take(new_cap) { + let e = &mut self.entries[slot]; + e.key = pl.key; + e.page = Some(pl.page.clone()); + e.ref_bit.set(pl.ref_bit); + e.reset_links(); + new_map.insert(pl.key, slot); } for slot in 0..survivors.len().min(new_cap) { self.link_front(slot); @@ -478,12 +458,10 @@ impl PageCache { // Rebuild freelist let used = survivors.len().min(new_cap); - { - let fl = &mut self.freelist; - fl.clear(); - for i in (used..new_cap).rev() { - fl.push(i); - } + let fl = &mut self.freelist; + fl.clear(); + for i in (used..new_cap).rev() { + fl.push(i); } CacheResizeResult::Done @@ -527,13 +505,10 @@ impl PageCache { t => t, }; - let (was_marked, key) = { - let entries = &mut self.entries; - let s = &mut entries[tail_idx]; - turso_assert!(s.page.is_some(), "tail points to empty slot"); - (!matches!(s.decrement_ref(), RefBit::Clear), s.key) - }; - + let s = &mut self.entries[tail_idx]; + turso_assert!(s.page.is_some(), "tail points to empty slot"); + let key = s.key; + let was_marked = !matches!(s.decrement_ref(), RefBit::Clear); examined += 1; if was_marked { @@ -576,12 +551,10 @@ impl PageCache { self.map.clear(); self.head.set(NULL); self.tail.set(NULL); - { - let fl = &mut self.freelist; - fl.clear(); - for i in (0..self.capacity).rev() { - fl.push(i); - } + let fl = &mut self.freelist; + fl.clear(); + for i in (0..self.capacity).rev() { + fl.push(i); } Ok(()) } From f45a7538febb6ecb033a2135fc9478a7e23e34b9 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Fri, 5 Sep 2025 08:36:57 -0400 Subject: [PATCH 09/13] Use true sieve/gclock algo instead of lru,dont link pages circilarly --- core/storage/page_cache.rs | 119 ++++++++++++++++++++----------------- 1 file changed, 63 insertions(+), 56 deletions(-) diff --git a/core/storage/page_cache.rs b/core/storage/page_cache.rs index 1fff880fa..771ddba6b 100644 --- a/core/storage/page_cache.rs +++ b/core/storage/page_cache.rs @@ -111,6 +111,7 @@ pub struct PageCache { /// Pointers to intrusive doubly-linked list for eviction order head: Cell, tail: Cell, + clock_hand: Cell, /// Fixed-size vec holding page entries entries: Vec, /// Free list: Stack of available slot indices @@ -165,24 +166,28 @@ impl PageCache { map: PageHashMap::new(capacity), head: Cell::new(NULL), tail: Cell::new(NULL), + clock_hand: Cell::new(NULL), entries: vec![PageCacheEntry::empty(); capacity], freelist, } } #[inline] - fn link_front(&self, slot: usize) { + fn link_at_head(&self, slot: usize) { let old_head = self.head.replace(slot); - self.entries[slot].next.set(old_head); self.entries[slot].prev.set(NULL); if old_head != NULL { self.entries[old_head].prev.set(slot); } else { - // list was empty + // List was empty, this is now both head and tail self.tail.set(slot); } + // If hand was NULL/list was empty, set it to the new element + if self.clock_hand.get() == NULL { + self.clock_hand.set(slot); + } } #[inline] @@ -204,16 +209,6 @@ impl PageCache { self.entries[slot].reset_links(); } - #[inline] - fn move_to_front(&self, slot: usize) { - if self.head.get() == slot { - return; - } - turso_assert!(self.entries[slot].page.is_some(), "must be linked"); - self.unlink(slot); - self.link_front(slot); - } - pub fn contains_key(&self, key: &PageCacheKey) -> bool { self.map.contains_key(key) } @@ -251,11 +246,10 @@ impl PageCache { entry.page = Some(value); entry.clear_ref(); self.map.insert(key, slot_index); - self.link_front(slot_index); + self.link_at_head(slot_index); return Ok(()); } - self.move_to_front(slot); let existing = &mut self.entries[slot]; existing.bump_ref(); if update_in_place { @@ -279,7 +273,7 @@ impl PageCache { // Sieve ref bit starts cleared, will be set on first access entry.clear_ref(); self.map.insert(key, slot_index); - self.link_front(slot_index); + self.link_at_head(slot_index); Ok(()) } @@ -452,7 +446,7 @@ impl PageCache { new_map.insert(pl.key, slot); } for slot in 0..survivors.len().min(new_cap) { - self.link_front(slot); + self.link_at_head(slot); } self.map = new_map; @@ -481,56 +475,69 @@ impl PageCache { return Err(CacheError::Full); } - let len = self.len(); - let available = self.capacity.saturating_sub(len); - if n <= available && len <= self.capacity { + let available = self.capacity.saturating_sub(self.len()); + if n <= available { return Ok(()); } - const MAX_REF: usize = 3; - let len = self.len(); - let available = self.capacity.saturating_sub(len); let mut need = n - available; - let mut examined = 0usize; - // allow enough rotations to drain max credits (+ one for the eviction pass) - let max_examinations = len.saturating_mul(MAX_REF + 1); + let mut examined = 0; + // Max ref bit value + 1 + let max_examinations = self.len() * 4; + // Start from where we left off, or from tail if hand is invalid + let mut current = self.clock_hand.get(); + if current == NULL || self.entries[current].page.is_none() { + current = self.tail.get(); + } + let start_position = current; + let mut wrapped = false; while need > 0 && examined < max_examinations { - let tail_idx = match self.tail.get() { - NULL => { - return Err(CacheError::InternalError( - "Tail is None but map not empty".into(), - )) - } - t => t, - }; - - let s = &mut self.entries[tail_idx]; - turso_assert!(s.page.is_some(), "tail points to empty slot"); - let key = s.key; - let was_marked = !matches!(s.decrement_ref(), RefBit::Clear); - examined += 1; - - if was_marked { - self.move_to_front(tail_idx); - continue; + if current == NULL { + break; } + let entry = &self.entries[current]; + let next = entry.next.get(); - match self._delete(key, true) { - Ok(_) => { - need -= 1; - examined = 0; + if let Some(ref page) = entry.page { + if !page.is_dirty() && !page.is_locked() && !page.is_pinned() { + if matches!(entry.ref_bit.get(), RefBit::Clear) { + // Evict this page + let key = entry.key; + self.clock_hand + .set(if next != NULL { next } else { self.tail.get() }); + self._delete(key, true)?; + need -= 1; + examined = 0; + // After deletion, current is invalid, use hand + current = self.clock_hand.get(); + continue; + } else { + // Decrement and continue + entry.decrement_ref(); + examined += 1; + } + } else { + examined += 1; } - Err( - CacheError::Dirty { .. } - | CacheError::Locked { .. } - | CacheError::Pinned { .. }, - ) => { - self.move_to_front(tail_idx); - } - Err(e) => return Err(e), + } + // Move to next + if next != NULL { + current = next; + } else if !wrapped { + // Wrap around to tail once + current = self.tail.get(); + wrapped = true; + } else { + // We've wrapped and hit the end again + break; + } + // Stop if we've come full circle + if wrapped && current == start_position { + break; } } + self.clock_hand.set(current); if need > 0 { return Err(CacheError::Full); } From 39a47d67e692588aa0bc044d40c75bbd036a2e3d Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Fri, 5 Sep 2025 11:54:02 -0400 Subject: [PATCH 10/13] Apply PR suggestions --- core/storage/page_cache.rs | 259 +++++++++++++++++++++++-------------- 1 file changed, 159 insertions(+), 100 deletions(-) diff --git a/core/storage/page_cache.rs b/core/storage/page_cache.rs index 771ddba6b..211b13c9d 100644 --- a/core/storage/page_cache.rs +++ b/core/storage/page_cache.rs @@ -18,16 +18,8 @@ pub struct PageCacheKey(usize); const NULL: usize = usize::MAX; -#[derive(Clone, Copy, Debug)] -/// To distinguish between a page touched by a scan vs a 'hot' page, -/// we use a Reference Bit with 4 states, incrementing this bit up to a maximum -/// of 3 on each access, and decrementing on each eviction scan pass. -enum RefBit { - Clear, - Min, - Med, - Max, -} +const CLEAR: u8 = 0; +const REF_MAX: u8 = 3; #[derive(Clone, Debug)] struct PageCacheEntry { @@ -36,10 +28,10 @@ struct PageCacheEntry { /// The cached page, None if this slot is free page: Option, /// Reference bit for SIEVE algorithm - set on access, cleared during eviction scan - ref_bit: Cell, - /// Index of next entry in SIEVE queue (newer/toward head) + ref_bit: Cell, + /// Index of next entry in SIEVE queue (older/toward tail) next: Cell, - /// Index of previous entry in SIEVE queue (older/toward tail) + /// Index of previous entry in SIEVE queue (newer/toward head) prev: Cell, } @@ -48,7 +40,7 @@ impl Default for PageCacheEntry { Self { key: PageCacheKey(0), page: None, - ref_bit: Cell::new(RefBit::Clear), + ref_bit: Cell::new(CLEAR), next: Cell::new(NULL), prev: Cell::new(NULL), } @@ -58,28 +50,20 @@ impl Default for PageCacheEntry { impl PageCacheEntry { #[inline] fn bump_ref(&self) { - self.ref_bit.set(match self.ref_bit.get() { - RefBit::Clear => RefBit::Min, - RefBit::Min => RefBit::Med, - _ => RefBit::Max, - }); + self.ref_bit + .set(std::cmp::min(self.ref_bit.get() + 1, REF_MAX)); } #[inline] /// Returns the old value - fn decrement_ref(&self) -> RefBit { + fn decrement_ref(&self) -> u8 { let old = self.ref_bit.get(); - let new = match old { - RefBit::Max => RefBit::Med, - RefBit::Med => RefBit::Min, - _ => RefBit::Clear, - }; - self.ref_bit.set(new); + self.ref_bit.set(old.saturating_sub(1)); old } #[inline] fn clear_ref(&self) { - self.ref_bit.set(RefBit::Clear); + self.ref_bit.set(CLEAR); } #[inline] fn empty() -> Self { @@ -182,6 +166,10 @@ impl PageCache { self.entries[old_head].prev.set(slot); } else { // List was empty, this is now both head and tail + turso_assert!( + self.tail.get() == NULL, + "tail must be NULL if head was NULL" + ); self.tail.set(slot); } // If hand was NULL/list was empty, set it to the new element @@ -192,20 +180,19 @@ impl PageCache { #[inline] fn unlink(&self, slot: usize) { - let p = self.entries[slot].prev.get(); - let n = self.entries[slot].next.get(); + let prev = self.entries[slot].prev.get(); + let next = self.entries[slot].next.get(); - if p != NULL { - self.entries[p].next.set(n); + if prev != NULL { + self.entries[prev].next.set(next); } else { - self.head.set(n); + self.head.set(next); } - if n != NULL { - self.entries[n].prev.set(p); + if next != NULL { + self.entries[next].prev.set(prev); } else { - self.tail.set(p); + self.tail.set(prev); } - self.entries[slot].reset_links(); } @@ -230,7 +217,7 @@ impl PageCache { update_in_place: bool, ) -> Result<(), CacheError> { trace!("insert(key={:?})", key); - let slot = { self.map.get(&key) }; + let slot = self.map.get(&key); if let Some(slot) = slot { let p = self.entries[slot] .page @@ -289,8 +276,11 @@ impl PageCache { slot ); } - // Reset linkage on entry itself - self.entries[slot].reset_links(); + turso_assert!( + self.entries[slot].next.get() == NULL && self.entries[slot].prev.get() == NULL, + "freelist slot {} has non-NULL links", + slot + ); Ok(slot) } @@ -330,6 +320,16 @@ impl PageCache { let _ = entry.get().contents.take(); } + let next = self.entries[slot_idx].next.get(); + let prev = self.entries[slot_idx].prev.get(); + if self.clock_hand.get() == slot_idx { + self.clock_hand.set(match (prev, next) { + // prefer forward progress when possible + (_, n) if n != NULL => n, + (p, _) if p != NULL => p, + _ => NULL, // sole element + }); + } self.unlink(slot_idx); self.map.remove(&key); let entry = &mut self.entries[slot_idx]; @@ -357,7 +357,6 @@ impl PageCache { // assertions later on. This is worsened by the fact that page cache is not per `Statement`, so you can abort a completion // in one Statement, and trigger some error in the next one if we don't evict the page here. let entry = &self.entries[slot]; - entry.bump_ref(); let page = entry .page .as_ref() @@ -365,10 +364,10 @@ impl PageCache { .clone(); if !page.is_loaded() && !page.is_locked() { self.delete(*key)?; - Ok(None) - } else { - Ok(Some(page)) + return Ok(None); } + entry.bump_ref(); + Ok(Some(page)) } #[inline] @@ -408,7 +407,7 @@ impl PageCache { struct Payload { key: PageCacheKey, page: PageRef, - ref_bit: RefBit, + ref_bit: u8, } let survivors: Vec = { let entries = &self.entries; @@ -457,84 +456,84 @@ impl PageCache { for i in (used..new_cap).rev() { fl.push(i); } - + self.clock_hand.set(self.tail.get()); CacheResizeResult::Done } /// Ensures at least `n` free slots are available /// /// Uses the SIEVE algorithm to evict pages if necessary: - /// 1. Start at tail (LRU position) - /// 2. If page is marked, decrement mark and move to head - /// 3. If page mark was already Cleared, evict it - /// 4. If page is unevictable (dirty/locked/pinned), move to head + /// Start at tail (LRU position) + /// If page is marked, decrement mark + /// If page mark was already Cleared, evict it + /// If page is unevictable (dirty/locked/pinned), continue sweep /// /// Returns `CacheError::Full` if not enough pages can be evicted pub fn make_room_for(&mut self, n: usize) -> Result<(), CacheError> { if n > self.capacity { return Err(CacheError::Full); } - let available = self.capacity.saturating_sub(self.len()); if n <= available { return Ok(()); } + const MAX_REF: usize = 3; let mut need = n - available; - let mut examined = 0; - // Max ref bit value + 1 - let max_examinations = self.len() * 4; + let mut examined = 0usize; + let max_examinations = self.len().saturating_mul(MAX_REF + 1); - // Start from where we left off, or from tail if hand is invalid + // start where the hand left off, else from tail let mut current = self.clock_hand.get(); - if current == NULL || self.entries[current].page.is_none() { + if current == NULL || current >= self.capacity || self.entries[current].page.is_none() { current = self.tail.get(); } - let start_position = current; - let mut wrapped = false; + while need > 0 && examined < max_examinations { if current == NULL { break; } - let entry = &self.entries[current]; - let next = entry.next.get(); - - if let Some(ref page) = entry.page { - if !page.is_dirty() && !page.is_locked() && !page.is_pinned() { - if matches!(entry.ref_bit.get(), RefBit::Clear) { - // Evict this page - let key = entry.key; - self.clock_hand - .set(if next != NULL { next } else { self.tail.get() }); - self._delete(key, true)?; - need -= 1; - examined = 0; - // After deletion, current is invalid, use hand - current = self.clock_hand.get(); - continue; - } else { - // Decrement and continue - entry.decrement_ref(); + let forward = self.entries[current].prev.get(); + let evictable_and_clear = { + let e = &self.entries[current]; + if let Some(ref p) = e.page { + if p.is_dirty() || p.is_locked() || p.is_pinned() { examined += 1; + false + } else { + match e.ref_bit.get() { + CLEAR => true, + _ => { + e.decrement_ref(); // second chance + examined += 1; + false + } + } } } else { examined += 1; + false } - } - // Move to next - if next != NULL { - current = next; - } else if !wrapped { - // Wrap around to tail once - current = self.tail.get(); - wrapped = true; + }; + + if evictable_and_clear { + let key = self.entries[current].key; + // hand moves forward, if we were at head (forward == NULL), wrap to tail + self.clock_hand.set(if forward != NULL { + forward + } else { + self.tail.get() + }); + self._delete(key, true)?; + need -= 1; + examined = 0; + current = self.clock_hand.get(); } else { - // We've wrapped and hit the end again - break; - } - // Stop if we've come full circle - if wrapped && current == start_position { - break; + current = if forward != NULL { + forward + } else { + self.tail.get() + }; } } self.clock_hand.set(current); @@ -558,6 +557,7 @@ impl PageCache { self.map.clear(); self.head.set(NULL); self.tail.set(NULL); + self.clock_hand.set(NULL); let fl = &mut self.freelist; fl.clear(); for i in (0..self.capacity).rev() { @@ -732,6 +732,15 @@ impl PageCache { cnt, self.capacity ); + + let hand = self.clock_hand.get(); + if hand != NULL { + assert!(hand < self.capacity, "clock_hand out of bounds"); + assert!( + entries[hand].page.is_some(), + "clock_hand points to empty slot" + ); + } } #[cfg(test)] @@ -739,7 +748,7 @@ impl PageCache { self.map.get(key) } #[cfg(test)] - fn ref_of(&self, key: &PageCacheKey) -> Option { + fn ref_of(&self, key: &PageCacheKey) -> Option { self.slot_of(key).map(|i| self.entries[i].ref_bit.get()) } } @@ -1119,23 +1128,15 @@ mod tests { #[test] fn test_sieve_second_chance_preserves_marked_tail() { - // SIEVE gives a "second chance" to marked pages at the tail - // by clearing their bit and moving them to head - let mut cache = PageCache::new(3); let key1 = insert_page(&mut cache, 1); let key2 = insert_page(&mut cache, 2); let key3 = insert_page(&mut cache, 3); assert_eq!(cache.len(), 3); - // Mark the tail (key1) by accessing it assert!(cache.get(&key1).unwrap().is_some()); - // Insert 4: SIEVE process: - // 1. Examines tail (key1, marked) -> clear bit, move to head - // 2. New tail is key2 (unmarked) -> evict key2 let key4 = insert_page(&mut cache, 4); - assert!( cache.get(&key1).unwrap().is_some(), "key1 had ref bit set, got second chance" @@ -1604,6 +1605,37 @@ mod tests { ); } + #[test] + fn clock_drains_hot_page_within_single_sweep_when_others_are_unevictable() { + // capacity 3: [3(head), 2, 1(tail)] + let mut c = PageCache::new(3); + let k1 = insert_page(&mut c, 1); + let k2 = insert_page(&mut c, 2); + let _k3 = insert_page(&mut c, 3); + + // Make k1 hot: bump to Max + for _ in 0..3 { + assert!(c.get(&k1).unwrap().is_some()); + } + assert!(matches!(c.ref_of(&k1), Some(REF_MAX))); + + // Make other pages unevictable; clock must keep revisiting k1. + c.get(&k2).unwrap().unwrap().set_dirty(); + c.get(&_k3).unwrap().unwrap().set_dirty(); + + // Insert 4 -> sweep rotates as needed, draining k1 and evicting it. + let _k4 = insert_page(&mut c, 4); + + assert!( + c.get(&k1).unwrap().is_none(), + "k1 should be evicted after its credit drains" + ); + assert!(c.get(&k2).unwrap().is_some(), "k2 is dirty (unevictable)"); + assert!(c.get(&_k3).unwrap().is_some(), "k3 is dirty (unevictable)"); + assert!(c.get(&_k4).unwrap().is_some(), "k4 just inserted"); + c.verify_cache_integrity(); + } + #[test] fn gclock_hot_survives_scan_pages() { let mut c = PageCache::new(4); @@ -1616,7 +1648,7 @@ mod tests { for _ in 0..3 { assert!(c.get(&k2).unwrap().is_some()); } - assert!(matches!(c.ref_of(&k2), Some(RefBit::Max))); + assert!(matches!(c.ref_of(&k2), Some(REF_MAX))); // Now simulate a scan inserting new pages 5..10 (one-hit wonders). for id in 5..=10 { @@ -1633,6 +1665,33 @@ mod tests { c.verify_cache_integrity(); } + #[test] + fn hand_stays_valid_after_deleting_only_element() { + let mut c = PageCache::new(2); + let k = insert_page(&mut c, 1); + assert!(c.delete(k).is_ok()); + // Inserting again should not panic and should succeed + let _ = insert_page(&mut c, 2); + c.verify_cache_integrity(); + } + + #[test] + fn hand_is_reset_after_clear_and_resize() { + let mut c = PageCache::new(3); + for i in 1..=3 { + let _ = insert_page(&mut c, i); + } + c.clear().unwrap(); + // No elements; insert should not rely on stale hand + let _ = insert_page(&mut c, 10); + + // Resize from 1 -> 4 and back should not OOB the hand + assert_eq!(c.resize(4), CacheResizeResult::Done); + assert_eq!(c.resize(1), CacheResizeResult::Done); + let _ = insert_page(&mut c, 11); + c.verify_cache_integrity(); + } + #[test] fn resize_preserves_ref_and_recency() { let mut c = PageCache::new(4); From b89513f03112f2842005b0d9b88e9b6323a9037f Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Fri, 5 Sep 2025 12:22:38 -0400 Subject: [PATCH 11/13] remove useless saturating sub --- core/storage/page_cache.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/storage/page_cache.rs b/core/storage/page_cache.rs index 211b13c9d..a6086dbaf 100644 --- a/core/storage/page_cache.rs +++ b/core/storage/page_cache.rs @@ -473,7 +473,7 @@ impl PageCache { if n > self.capacity { return Err(CacheError::Full); } - let available = self.capacity.saturating_sub(self.len()); + let available = self.capacity - self.len(); if n <= available { return Ok(()); } From 644d0f270b698640052cd4242e1d06eb96256e0f Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Fri, 5 Sep 2025 13:20:03 -0400 Subject: [PATCH 12/13] Add evict slot method in page cache --- core/storage/page_cache.rs | 88 +++++++++++++++++++++++++++----------- 1 file changed, 62 insertions(+), 26 deletions(-) diff --git a/core/storage/page_cache.rs b/core/storage/page_cache.rs index a6086dbaf..f7e60d1b9 100644 --- a/core/storage/page_cache.rs +++ b/core/storage/page_cache.rs @@ -157,24 +157,31 @@ impl PageCache { } #[inline] - fn link_at_head(&self, slot: usize) { - let old_head = self.head.replace(slot); - self.entries[slot].next.set(old_head); - self.entries[slot].prev.set(NULL); - - if old_head != NULL { - self.entries[old_head].prev.set(slot); + fn link_after(&self, a: usize, b: usize) { + // insert `b` after `a` + let an = self.entries[a].next.get(); + self.entries[b].prev.set(a); + self.entries[b].next.set(an); + if an != NULL { + self.entries[an].prev.set(b); } else { - // List was empty, this is now both head and tail - turso_assert!( - self.tail.get() == NULL, - "tail must be NULL if head was NULL" - ); - self.tail.set(slot); + self.tail.set(b); } - // If hand was NULL/list was empty, set it to the new element - if self.clock_hand.get() == NULL { + self.entries[a].next.set(b); + } + + #[inline] + fn link_new_node(&self, slot: usize) { + let hand = self.clock_hand.get(); + if hand == NULL { + // first element + self.head.set(slot); + self.tail.set(slot); + self.entries[slot].prev.set(NULL); + self.entries[slot].next.set(NULL); self.clock_hand.set(slot); + } else { + self.link_after(hand, slot); } } @@ -233,7 +240,7 @@ impl PageCache { entry.page = Some(value); entry.clear_ref(); self.map.insert(key, slot_index); - self.link_at_head(slot_index); + self.link_new_node(slot_index); return Ok(()); } @@ -260,7 +267,7 @@ impl PageCache { // Sieve ref bit starts cleared, will be set on first access entry.clear_ref(); self.map.insert(key, slot_index); - self.link_at_head(slot_index); + self.link_new_node(slot_index); Ok(()) } @@ -445,7 +452,7 @@ impl PageCache { new_map.insert(pl.key, slot); } for slot in 0..survivors.len().min(new_cap) { - self.link_at_head(slot); + self.link_new_node(slot); } self.map = new_map; @@ -517,14 +524,7 @@ impl PageCache { }; if evictable_and_clear { - let key = self.entries[current].key; - // hand moves forward, if we were at head (forward == NULL), wrap to tail - self.clock_hand.set(if forward != NULL { - forward - } else { - self.tail.get() - }); - self._delete(key, true)?; + self.evict_slot(current, true)?; need -= 1; examined = 0; current = self.clock_hand.get(); @@ -566,6 +566,42 @@ impl PageCache { Ok(()) } + #[inline] + /// preconditions: slot contains Some(page) and is clean/unlocked/unpinned + fn evict_slot(&mut self, slot: usize, clean_page: bool) -> Result<(), CacheError> { + let key = self.entries[slot].key; + + if clean_page { + if let Some(ref p) = self.entries[slot].page { + p.clear_loaded(); + let _ = p.get().contents.take(); + } + } + + // advance the hand off of `slot` using your forward direction (prev == toward head) + let next = self.entries[slot].next.get(); + let prev = self.entries[slot].prev.get(); + if self.clock_hand.get() == slot { + self.clock_hand.set(match (prev, next) { + // prefer forward progress + (_, n) if n != NULL => n, + (p, _) if p != NULL => p, + _ => NULL, + }); + } + self.unlink(slot); + let _ = self.map.remove(&key); + + // recycle the slot + let e = &mut self.entries[slot]; + e.page = None; + e.clear_ref(); + e.reset_links(); + self.freelist.push(slot); + + Ok(()) + } + /// Removes all pages from the cache with pgno greater than len pub fn truncate(&mut self, len: usize) -> Result<(), CacheError> { let keys_to_delete: Vec = { From 01d64977d7260f8bea11014102bea7056ce71337 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Fri, 5 Sep 2025 16:12:54 -0400 Subject: [PATCH 13/13] Use more efficient circular list and rely on clock hand for pagecache --- core/storage/page_cache.rs | 800 ++++++++++++++++++------------------- 1 file changed, 395 insertions(+), 405 deletions(-) diff --git a/core/storage/page_cache.rs b/core/storage/page_cache.rs index f7e60d1b9..c125f44c8 100644 --- a/core/storage/page_cache.rs +++ b/core/storage/page_cache.rs @@ -1,4 +1,3 @@ -use std::cell::Cell; use std::sync::atomic::Ordering; use std::sync::Arc; @@ -27,12 +26,13 @@ struct PageCacheEntry { key: PageCacheKey, /// The cached page, None if this slot is free page: Option, - /// Reference bit for SIEVE algorithm - set on access, cleared during eviction scan - ref_bit: Cell, + /// Reference counter (SIEVE/GClock): starts at zero, bumped on access, + /// decremented during eviction, only pages at 0 are evicted. + ref_bit: u8, /// Index of next entry in SIEVE queue (older/toward tail) - next: Cell, + next: usize, /// Index of previous entry in SIEVE queue (newer/toward head) - prev: Cell, + prev: usize, } impl Default for PageCacheEntry { @@ -40,39 +40,38 @@ impl Default for PageCacheEntry { Self { key: PageCacheKey(0), page: None, - ref_bit: Cell::new(CLEAR), - next: Cell::new(NULL), - prev: Cell::new(NULL), + ref_bit: CLEAR, + next: NULL, + prev: NULL, } } } impl PageCacheEntry { #[inline] - fn bump_ref(&self) { - self.ref_bit - .set(std::cmp::min(self.ref_bit.get() + 1, REF_MAX)); + fn bump_ref(&mut self) { + self.ref_bit = std::cmp::min(self.ref_bit + 1, REF_MAX); } #[inline] /// Returns the old value - fn decrement_ref(&self) -> u8 { - let old = self.ref_bit.get(); - self.ref_bit.set(old.saturating_sub(1)); + fn decrement_ref(&mut self) -> u8 { + let old = self.ref_bit; + self.ref_bit = old.saturating_sub(1); old } #[inline] - fn clear_ref(&self) { - self.ref_bit.set(CLEAR); + fn clear_ref(&mut self) { + self.ref_bit = CLEAR; } #[inline] fn empty() -> Self { Self::default() } #[inline] - fn reset_links(&self) { - self.next.set(NULL); - self.prev.set(NULL); + fn reset_links(&mut self) { + self.next = NULL; + self.prev = NULL; } } @@ -81,21 +80,16 @@ impl PageCacheEntry { /// The bit is set to `Clear` on initial insertion and then bumped on each access and decremented /// during eviction scans. /// -/// - New pages enter at the head (MRU position) -/// - Eviction candidates are examined from the tail (LRU position) -/// - Each page has a reference bit that is set when accessed -/// - During eviction, if a page's reference bit is decremented -/// and the page moves to the head, Pages with Clear reference -/// bits are evicted immediately +/// The ring is circular. `clock_hand` points at the tail (LRU). +/// Sweep order follows next: tail (LRU) -> head (MRU) -> .. -> tail +/// New pages are inserted after the clock hand in the `next` direction, +/// which places them at head (MRU) (i.e. `tail.next` is the head). pub struct PageCache { /// Capacity in pages capacity: usize, /// Map of Key -> usize in entries array map: PageHashMap, - /// Pointers to intrusive doubly-linked list for eviction order - head: Cell, - tail: Cell, - clock_hand: Cell, + clock_hand: usize, /// Fixed-size vec holding page entries entries: Vec, /// Free list: Stack of available slot indices @@ -148,61 +142,60 @@ impl PageCache { Self { capacity, map: PageHashMap::new(capacity), - head: Cell::new(NULL), - tail: Cell::new(NULL), - clock_hand: Cell::new(NULL), + clock_hand: NULL, entries: vec![PageCacheEntry::empty(); capacity], freelist, } } #[inline] - fn link_after(&self, a: usize, b: usize) { - // insert `b` after `a` - let an = self.entries[a].next.get(); - self.entries[b].prev.set(a); - self.entries[b].next.set(an); - if an != NULL { - self.entries[an].prev.set(b); - } else { - self.tail.set(b); - } - self.entries[a].next.set(b); + fn link_after(&mut self, a: usize, b: usize) { + // insert `b` after `a` in a non-empty circular list + let an = self.entries[a].next; + self.entries[b].prev = a; + self.entries[b].next = an; + self.entries[an].prev = b; + self.entries[a].next = b; } #[inline] - fn link_new_node(&self, slot: usize) { - let hand = self.clock_hand.get(); + fn link_new_node(&mut self, slot: usize) { + let hand = self.clock_hand; if hand == NULL { - // first element - self.head.set(slot); - self.tail.set(slot); - self.entries[slot].prev.set(NULL); - self.entries[slot].next.set(NULL); - self.clock_hand.set(slot); + // first element → points to itself + self.entries[slot].prev = slot; + self.entries[slot].next = slot; + self.clock_hand = slot; } else { + // insert after the hand (LRU) self.link_after(hand, slot); } } #[inline] - fn unlink(&self, slot: usize) { - let prev = self.entries[slot].prev.get(); - let next = self.entries[slot].next.get(); + fn unlink(&mut self, slot: usize) { + let p = self.entries[slot].prev; + let n = self.entries[slot].next; - if prev != NULL { - self.entries[prev].next.set(next); + if p == slot && n == slot { + self.clock_hand = NULL; } else { - self.head.set(next); - } - if next != NULL { - self.entries[next].prev.set(prev); - } else { - self.tail.set(prev); + self.entries[p].next = n; + self.entries[n].prev = p; + if self.clock_hand == slot { + // stay at LRU position, second-oldest becomes oldest + self.clock_hand = p; + } } + self.entries[slot].reset_links(); } + #[inline] + fn forward_of(&self, i: usize) -> usize { + self.entries[i].next + } + pub fn contains_key(&self, key: &PageCacheKey) -> bool { self.map.contains_key(key) } @@ -284,7 +277,7 @@ impl PageCache { ); } turso_assert!( - self.entries[slot].next.get() == NULL && self.entries[slot].prev.get() == NULL, + self.entries[slot].next == NULL && self.entries[slot].prev == NULL, "freelist slot {} has non-NULL links", slot ); @@ -295,7 +288,6 @@ impl PageCache { if !self.contains_key(&key) { return Ok(()); } - let slot_idx = self .map .get(&key) @@ -305,7 +297,6 @@ impl PageCache { .as_ref() .expect("page in map was None") .clone(); - if entry.is_locked() { return Err(CacheError::Locked { pgno: entry.get().id, @@ -321,28 +312,17 @@ impl PageCache { pgno: entry.get().id, }); } - if clean_page { entry.clear_loaded(); let _ = entry.get().contents.take(); } - - let next = self.entries[slot_idx].next.get(); - let prev = self.entries[slot_idx].prev.get(); - if self.clock_hand.get() == slot_idx { - self.clock_hand.set(match (prev, next) { - // prefer forward progress when possible - (_, n) if n != NULL => n, - (p, _) if p != NULL => p, - _ => NULL, // sole element - }); - } + // unlink from circular list and advance hand if needed self.unlink(slot_idx); self.map.remove(&key); - let entry = &mut self.entries[slot_idx]; - entry.page = None; - entry.clear_ref(); - entry.reset_links(); + let e = &mut self.entries[slot_idx]; + e.page = None; + e.clear_ref(); + e.reset_links(); self.freelist.push(slot_idx); Ok(()) } @@ -363,7 +343,7 @@ impl PageCache { // However, if we do not evict that page from the page cache, we will return an unloaded page later which will trigger // assertions later on. This is worsened by the fact that page cache is not per `Statement`, so you can abort a completion // in one Statement, and trigger some error in the next one if we don't evict the page here. - let entry = &self.entries[slot]; + let entry = &mut self.entries[slot]; let page = entry .page .as_ref() @@ -378,20 +358,18 @@ impl PageCache { } #[inline] - pub fn peek(&self, key: &PageCacheKey, touch: bool) -> Option { + pub fn peek(&mut self, key: &PageCacheKey, touch: bool) -> Option { let slot = self.map.get(key)?; - let entry = &self.entries[slot]; + let entry = &mut self.entries[slot]; let page = entry.page.as_ref()?.clone(); if touch { - // set reference bit to 'touch' page entry.bump_ref(); } Some(page) } /// Resizes the cache to a new capacity - /// - /// If shrinking, attempts to evict pages using the SIEVE algorithm. + /// If shrinking, attempts to evict pages. /// If growing, simply increases capacity. pub fn resize(&mut self, new_cap: usize) -> CacheResizeResult { if new_cap == self.capacity { @@ -399,7 +377,6 @@ impl PageCache { } if new_cap < self.len() { let need = self.len() - new_cap; - // repeat SIEVE passes until we evict `need` pages or give up let mut evicted = 0; while evicted < need { match self.make_room_for(1) { @@ -410,60 +387,63 @@ impl PageCache { } } assert!(new_cap > 0); - // Collect survivors as payload, no linkage + // Collect survivors starting from hand, one full cycle struct Payload { key: PageCacheKey, page: PageRef, ref_bit: u8, } let survivors: Vec = { - let entries = &self.entries; let mut v = Vec::with_capacity(self.len()); - // walk tail..head to preserve recency when re-linking via link_front - let mut cur = self.tail.get(); - while cur != NULL { - let e = &entries[cur]; - if let Some(ref p) = e.page { - v.push(Payload { - key: e.key, - page: p.clone(), - ref_bit: e.ref_bit.get(), - }); + let start = self.clock_hand; + if start != NULL { + let mut cur = start; + let mut seen = 0usize; + loop { + let e = &self.entries[cur]; + if let Some(ref p) = e.page { + v.push(Payload { + key: e.key, + page: p.clone(), + ref_bit: e.ref_bit, + }); + seen += 1; + } + cur = e.next; + if cur == start || seen >= self.len() { + break; + } } - cur = entries[cur].prev.get(); } v }; - - // Resize entry array; reset heads + // Rebuild storage self.entries.resize(new_cap, PageCacheEntry::empty()); self.capacity = new_cap; let mut new_map = PageHashMap::new(new_cap); - self.head.set(NULL); - self.tail.set(NULL); - // Repack compactly: survivors[tail..head] pushed to front -> final order == original - for (slot, pl) in survivors.iter().enumerate().take(new_cap) { - let e = &mut self.entries[slot]; - e.key = pl.key; - e.page = Some(pl.page.clone()); - e.ref_bit.set(pl.ref_bit); - e.reset_links(); - new_map.insert(pl.key, slot); - } - for slot in 0..survivors.len().min(new_cap) { - self.link_new_node(slot); + let used = survivors.len().min(new_cap); + for (i, item) in survivors.iter().enumerate().take(used) { + let e = &mut self.entries[i]; + e.key = item.key; + e.page = Some(item.page.clone()); + e.ref_bit = item.ref_bit; + // link circularly to neighbors by index + let prev = if i == 0 { used - 1 } else { i - 1 }; + let next = if i + 1 == used { 0 } else { i + 1 }; + e.prev = prev; + e.next = next; + new_map.insert(item.key, i); } self.map = new_map; - - // Rebuild freelist - let used = survivors.len().min(new_cap); - let fl = &mut self.freelist; - fl.clear(); + // hand points to slot 0 if there are survivors, else NULL + self.clock_hand = if used > 0 { 0 } else { NULL }; + // rebuild freelist + self.freelist.clear(); for i in (used..new_cap).rev() { - fl.push(i); + self.freelist.push(i); } - self.clock_hand.set(self.tail.get()); + CacheResizeResult::Done } @@ -474,6 +454,10 @@ impl PageCache { /// If page is marked, decrement mark /// If page mark was already Cleared, evict it /// If page is unevictable (dirty/locked/pinned), continue sweep + /// On sweep, pages with ref_bit > 0 are given a second chance by decrementing + /// their ref_bit and leaving them in place; only pages with ref_bit == 0 are evicted. + /// We never relocate nodes during sweeping. + /// because the list is circular, `tail.next == head` and `head.prev == tail`. /// /// Returns `CacheError::Full` if not enough pages can be evicted pub fn make_room_for(&mut self, n: usize) -> Result<(), CacheError> { @@ -485,37 +469,31 @@ impl PageCache { return Ok(()); } - const MAX_REF: usize = 3; let mut need = n - available; let mut examined = 0usize; - let max_examinations = self.len().saturating_mul(MAX_REF + 1); + let max_examinations = self.len().saturating_mul(REF_MAX as usize + 1); - // start where the hand left off, else from tail - let mut current = self.clock_hand.get(); - if current == NULL || current >= self.capacity || self.entries[current].page.is_none() { - current = self.tail.get(); + let mut cur = self.clock_hand; + if cur == NULL || cur >= self.capacity || self.entries[cur].page.is_none() { + return Err(CacheError::Full); } while need > 0 && examined < max_examinations { - if current == NULL { - break; - } - let forward = self.entries[current].prev.get(); + // compute the next candidate before mutating anything + let next = self.forward_of(cur); + let evictable_and_clear = { - let e = &self.entries[current]; + let e = &mut self.entries[cur]; if let Some(ref p) = e.page { if p.is_dirty() || p.is_locked() || p.is_pinned() { examined += 1; false + } else if e.ref_bit == CLEAR { + true } else { - match e.ref_bit.get() { - CLEAR => true, - _ => { - e.decrement_ref(); // second chance - examined += 1; - false - } - } + e.decrement_ref(); + examined += 1; + false } } else { examined += 1; @@ -524,19 +502,25 @@ impl PageCache { }; if evictable_and_clear { - self.evict_slot(current, true)?; + // Evict the current slot, then continue from the next candidate in sweep direction + self.evict_slot(cur, true)?; need -= 1; examined = 0; - current = self.clock_hand.get(); + + // move on; if the ring became empty, self.clock_hand may be NULL + cur = if next == cur { self.clock_hand } else { next }; + if cur == NULL { + if need == 0 { + break; + } + return Err(CacheError::Full); + } } else { - current = if forward != NULL { - forward - } else { - self.tail.get() - }; + // keep sweeping + cur = next; } } - self.clock_hand.set(current); + self.clock_hand = cur; if need > 0 { return Err(CacheError::Full); } @@ -555,13 +539,10 @@ impl PageCache { } self.entries.fill(PageCacheEntry::empty()); self.map.clear(); - self.head.set(NULL); - self.tail.set(NULL); - self.clock_hand.set(NULL); - let fl = &mut self.freelist; - fl.clear(); + self.clock_hand = NULL; + self.freelist.clear(); for i in (0..self.capacity).rev() { - fl.push(i); + self.freelist.push(i); } Ok(()) } @@ -570,29 +551,16 @@ impl PageCache { /// preconditions: slot contains Some(page) and is clean/unlocked/unpinned fn evict_slot(&mut self, slot: usize, clean_page: bool) -> Result<(), CacheError> { let key = self.entries[slot].key; - if clean_page { if let Some(ref p) = self.entries[slot].page { p.clear_loaded(); let _ = p.get().contents.take(); } } - - // advance the hand off of `slot` using your forward direction (prev == toward head) - let next = self.entries[slot].next.get(); - let prev = self.entries[slot].prev.get(); - if self.clock_hand.get() == slot { - self.clock_hand.set(match (prev, next) { - // prefer forward progress - (_, n) if n != NULL => n, - (p, _) if p != NULL => p, - _ => NULL, - }); - } + // unlink will advance the hand if it pointed to `slot` self.unlink(slot); let _ = self.map.remove(&key); - // recycle the slot let e = &mut self.entries[slot]; e.page = None; e.clear_ref(); @@ -636,7 +604,7 @@ impl PageCache { entry_opt.key, page.get().flags.load(Ordering::Relaxed), page.get().pin_count.load(Ordering::Relaxed), - entry_opt.ref_bit.get(), + entry_opt.ref_bit, ); } } @@ -675,105 +643,88 @@ impl PageCache { #[cfg(test)] fn verify_cache_integrity(&self) { - let entries = &self.entries; let map = &self.map; + let hand = self.clock_hand; - let head = self.head.get(); - let tail = self.tail.get(); - - // Head/tail base constraints - if head == NULL { - assert_eq!(tail, NULL, "tail must be NULL when head is NULL"); + if hand == NULL { assert_eq!(map.len(), 0, "map not empty but list is empty"); } else { - assert_eq!(entries[head].prev.get(), NULL, "head.prev must be NULL"); - } - if tail != NULL { - assert_eq!(entries[tail].next.get(), NULL, "tail.next must be NULL"); - } + // 0 = unseen, 1 = freelist, 2 = in list + let mut seen = vec![0u8; self.capacity]; + // Walk exactly map.len steps from hand, ensure circular closure + let mut cnt = 0usize; + let mut cur = hand; + loop { + let e = &self.entries[cur]; - // 0 = unseen, 1 = freelist, 2 = sieve - let mut seen = vec![0u8; self.capacity]; + assert!(e.page.is_some(), "list points to empty slot {cur}"); + assert_eq!(seen[cur], 0, "slot {cur} appears twice (list/freelist)"); + seen[cur] = 2; + cnt += 1; - // Walk SIEVE forward from head, check links and detect cycles - let mut cnt = 0usize; - let mut cur = head; - let mut prev = NULL; - let mut hops = 0usize; - while cur != NULL { - hops += 1; - assert!(hops <= self.capacity, "SIEVE cycle detected"); - let e = &entries[cur]; + let n = e.next; + let p = e.prev; + assert_eq!(self.entries[n].prev, cur, "broken next.prev at {cur}"); + assert_eq!(self.entries[p].next, cur, "broken prev.next at {cur}"); - assert!(e.page.is_some(), "SIEVE points to empty slot {cur}"); - assert_eq!(e.prev.get(), prev, "prev link broken at slot {cur}"); - assert_eq!(seen[cur], 0, "slot {cur} appears twice (SIEVE/freelist)"); - - seen[cur] = 2; - cnt += 1; - prev = cur; - cur = e.next.get(); - } - assert_eq!(tail, prev, "tail mismatch"); - assert_eq!( - cnt, - map.len(), - "list length {} != map size {}", - cnt, - map.len() - ); - - // Map bijection: every map entry must be on the SIEVE list with matching key - for node in map.iter() { - let slot = node.slot_index; - assert!( - entries[slot].page.is_some(), - "map points to empty slot {cur}", - ); + if n == hand { + break; + } + assert!(cnt <= map.len(), "cycle longer than map len"); + cur = n; + } assert_eq!( - entries[slot].key, node.key, - "map key mismatch at slot {cur}", + cnt, + map.len(), + "list length {} != map size {}", + cnt, + map.len() ); - assert_eq!(seen[slot], 2, "map slot {slot} not on SIEVE list"); + + // Map bijection + for node in map.iter() { + let slot = node.slot_index; + assert!( + self.entries[slot].page.is_some(), + "map points to empty slot" + ); + assert_eq!(self.entries[slot].key, node.key, "map key mismatch"); + assert_eq!(seen[slot], 2, "map slot {slot} not on list"); + } + + // Freelist disjointness + let mut free_count = 0usize; + for &s in &self.freelist { + free_count += 1; + assert_eq!(seen[s], 0, "slot {s} in both freelist and list"); + assert!( + self.entries[s].page.is_none(), + "freelist slot {s} has a page" + ); + assert_eq!(self.entries[s].next, NULL, "freelist slot {s} next != NULL"); + assert_eq!(self.entries[s].prev, NULL, "freelist slot {s} prev != NULL"); + seen[s] = 1; + } + + // No orphans: every slot is in list or freelist or unused beyond capacity + let orphans = seen.iter().filter(|&&v| v == 0).count(); + assert_eq!( + free_count + cnt + orphans, + self.capacity, + "free {} + list {} + orphans {} != capacity {}", + free_count, + cnt, + orphans, + self.capacity + ); + // In practice orphans==0; assertion above detects mismatches. } - // Freelist disjointness and shape: free slots must be unlinked and empty - let freelist = &self.freelist; - let mut free_count = 0usize; - for &s in freelist.iter() { - free_count += 1; - assert_eq!(seen[s], 0, "slot {s} in both freelist and SIEVE"); - assert!(entries[s].page.is_none(), "freelist slot {s} has a page"); - assert_eq!( - entries[s].next.get(), - NULL, - "freelist slot {s} next != NULL", - ); - assert_eq!( - entries[s].prev.get(), - NULL, - "freelist slot {s} prev != NULL", - ); - seen[s] = 1; - } - - // No orphans; partition covers capacity - let orphans = seen.iter().filter(|&&v| v == 0).count(); - assert_eq!(orphans, 0, "orphan slots detected: {orphans}"); - assert_eq!( - free_count + cnt, - self.capacity, - "free {} + sieve {} != capacity {}", - free_count, - cnt, - self.capacity - ); - - let hand = self.clock_hand.get(); + // Hand sanity if hand != NULL { assert!(hand < self.capacity, "clock_hand out of bounds"); assert!( - entries[hand].page.is_some(), + self.entries[hand].page.is_some(), "clock_hand points to empty slot" ); } @@ -785,7 +736,7 @@ impl PageCache { } #[cfg(test)] fn ref_of(&self, key: &PageCacheKey) -> Option { - self.slot_of(key).map(|i| self.entries[i].ref_bit.get()) + self.slot_of(key).map(|i| self.entries[i].ref_bit) } } @@ -952,8 +903,6 @@ mod tests { !cache.contains_key(&key1), "Cache should not contain key after delete" ); - assert_eq!(cache.head.get(), NULL, "Head should be NULL when empty"); - assert_eq!(cache.tail.get(), NULL, "Tail should be NULL when empty"); cache.verify_cache_integrity(); } @@ -961,19 +910,11 @@ mod tests { fn test_detach_tail() { let mut cache = PageCache::default(); let key1 = insert_page(&mut cache, 1); // tail - let key2 = insert_page(&mut cache, 2); // middle - let key3 = insert_page(&mut cache, 3); // head + let _key2 = insert_page(&mut cache, 2); // middle + let _key3 = insert_page(&mut cache, 3); // head cache.verify_cache_integrity(); assert_eq!(cache.len(), 3); - // Verify initial tail (key1 should be at tail) - let tail_slot = cache.tail.get(); - assert_ne!(tail_slot, NULL, "Tail should not be NULL"); - assert_eq!( - cache.entries[tail_slot].key, key1, - "Initial tail should be key1" - ); - // Delete tail assert!(cache.delete(key1).is_ok()); assert_eq!(cache.len(), 2, "Length should be 2 after deleting tail"); @@ -982,82 +923,6 @@ mod tests { "Cache should not contain deleted tail key" ); cache.verify_cache_integrity(); - - // Check new tail is key2 (next oldest) - let new_tail_slot = cache.tail.get(); - assert_ne!(new_tail_slot, NULL, "New tail should not be NULL"); - let entries = &cache.entries; - assert_eq!(entries[new_tail_slot].key, key2, "New tail should be key2"); - assert_eq!( - entries[new_tail_slot].next.get(), - NULL, - "New tail's next should be NULL" - ); - - // Verify head is key3 - let head_slot = cache.head.get(); - assert_ne!(head_slot, NULL, "Head should not be NULL"); - assert_eq!(entries[head_slot].key, key3, "Head should be key3"); - assert_eq!( - entries[head_slot].prev.get(), - NULL, - "Head's prev should be NULL" - ); - } - - #[test] - fn test_detach_middle() { - let mut cache = PageCache::default(); - let key1 = insert_page(&mut cache, 1); // Will be tail - let key2 = insert_page(&mut cache, 2); // Will be middle - let key3 = insert_page(&mut cache, 3); // Will be middle - let key4 = insert_page(&mut cache, 4); // Will be head - cache.verify_cache_integrity(); - assert_eq!(cache.len(), 4); - - // Verify initial state - let tail_slot = cache.tail.get(); - let head_slot = cache.head.get(); - assert_eq!(cache.entries[tail_slot].key, key1, "Initial tail check"); - assert_eq!(cache.entries[head_slot].key, key4, "Initial head check"); - - // Delete middle element (key2) - assert!(cache.delete(key2).is_ok()); - assert_eq!(cache.len(), 3, "Length should be 3 after deleting middle"); - assert!( - !cache.contains_key(&key2), - "Cache should not contain deleted middle key2" - ); - cache.verify_cache_integrity(); - - // Verify head and tail keys remain the same - let new_head_slot = cache.head.get(); - let new_tail_slot = cache.tail.get(); - let entries = &cache.entries; - assert_eq!( - entries[new_head_slot].key, key4, - "Head should still be key4" - ); - assert_eq!( - entries[new_tail_slot].key, key1, - "Tail should still be key1" - ); - - // Check that key3 and key1 are now properly linked - let key3_slot = cache.map.get(&key3).unwrap(); - let key1_slot = cache.map.get(&key1).unwrap(); - - // key3 should be between head(key4) and tail(key1) - assert_eq!( - entries[key3_slot].prev.get(), - new_head_slot, - "Key3's prev should point to head (key4)" - ); - assert_eq!( - entries[key3_slot].next.get(), - key1_slot, - "Key3's next should point to tail (key1)" - ); } #[test] @@ -1163,25 +1028,20 @@ mod tests { } #[test] - fn test_sieve_second_chance_preserves_marked_tail() { + fn clock_second_chance_decrements_tail_then_evicts_next() { let mut cache = PageCache::new(3); let key1 = insert_page(&mut cache, 1); let key2 = insert_page(&mut cache, 2); let key3 = insert_page(&mut cache, 3); assert_eq!(cache.len(), 3); - assert!(cache.get(&key1).unwrap().is_some()); - let key4 = insert_page(&mut cache, 4); + assert!(cache.get(&key1).unwrap().is_some(), "key1 should survive"); + assert!(cache.get(&key2).unwrap().is_some(), "key2 remains"); + assert!(cache.get(&key4).unwrap().is_some(), "key4 inserted"); assert!( - cache.get(&key1).unwrap().is_some(), - "key1 had ref bit set, got second chance" - ); - assert!(cache.get(&key3).unwrap().is_some(), "key3 should remain"); - assert!(cache.get(&key4).unwrap().is_some(), "key4 just inserted"); - assert!( - cache.get(&key2).unwrap().is_none(), - "key2 became new tail after key1's second chance and was evicted" + cache.get(&key3).unwrap().is_none(), + "key3 (next after tail) evicted" ); assert_eq!(cache.len(), 3); cache.verify_cache_integrity(); @@ -1295,8 +1155,6 @@ mod tests { assert!(cache.get(&key1).unwrap().is_none()); assert!(cache.get(&key2).unwrap().is_none()); assert_eq!(cache.len(), 0); - assert_eq!(cache.head.get(), NULL); - assert_eq!(cache.tail.get(), NULL); cache.verify_cache_integrity(); } @@ -1322,15 +1180,9 @@ mod tests { #[test] fn test_detach_with_multiple_pages() { let mut cache = PageCache::default(); - let key1 = insert_page(&mut cache, 1); + let _key1 = insert_page(&mut cache, 1); let key2 = insert_page(&mut cache, 2); - let key3 = insert_page(&mut cache, 3); - - // Verify initial ordering (head=key3, tail=key1) - let head_slot = cache.head.get(); - let tail_slot = cache.tail.get(); - assert_eq!(cache.entries[head_slot].key, key3, "Head should be key3"); - assert_eq!(cache.entries[tail_slot].key, key1, "Tail should be key1"); + let _key3 = insert_page(&mut cache, 3); // Delete middle element (key2) assert!(cache.delete(key2).is_ok()); @@ -1339,31 +1191,6 @@ mod tests { assert_eq!(cache.len(), 2); assert!(!cache.contains_key(&key2)); - // Head and tail keys should remain the same - let new_head_slot = cache.head.get(); - let new_tail_slot = cache.tail.get(); - let entries = &cache.entries; - assert_eq!( - entries[new_head_slot].key, key3, - "Head should still be key3" - ); - assert_eq!( - entries[new_tail_slot].key, key1, - "Tail should still be key1" - ); - - // Check direct linkage between head and tail - assert_eq!( - entries[new_head_slot].next.get(), - new_tail_slot, - "Head's next should point directly to tail after middle deleted" - ); - assert_eq!( - entries[new_tail_slot].prev.get(), - new_head_slot, - "Tail's prev should point directly to head after middle deleted" - ); - cache.verify_cache_integrity(); } @@ -1385,18 +1212,6 @@ mod tests { ); cache.verify_cache_integrity(); - // Check new head is key2 - let head_slot = cache.head.get(); - assert_eq!( - cache.entries[head_slot].key, key2, - "New head should be key2" - ); - assert_eq!( - cache.entries[head_slot].prev.get(), - NULL, - "New head's prev should be NULL" - ); - // Delete tail (key1) assert!(cache.delete(key1).is_ok()); assert_eq!(cache.len(), 1, "Length should be 1 after deleting two"); @@ -1405,8 +1220,6 @@ mod tests { // Delete last element (key2) assert!(cache.delete(key2).is_ok()); assert_eq!(cache.len(), 0, "Length should be 0 after deleting all"); - assert_eq!(cache.head.get(), NULL, "Head should be NULL when empty"); - assert_eq!(cache.tail.get(), NULL, "Tail should be NULL when empty"); cache.verify_cache_integrity(); } @@ -1750,4 +1563,181 @@ mod tests { assert!(c.get(&k2).unwrap().is_some()); c.verify_cache_integrity(); } + + #[test] + fn test_sieve_second_chance_preserves_marked_page() { + let mut cache = PageCache::new(3); + let key1 = insert_page(&mut cache, 1); + let key2 = insert_page(&mut cache, 2); + let key3 = insert_page(&mut cache, 3); + + // Mark key1 for second chance + assert!(cache.get(&key1).unwrap().is_some()); + + let key4 = insert_page(&mut cache, 4); + // CLOCK sweep from hand: + // - key1 marked -> decrement, continue + // - key3 (MRU) unmarked -> evict + assert!( + cache.get(&key1).unwrap().is_some(), + "key1 had ref bit set, got second chance" + ); + assert!( + cache.get(&key3).unwrap().is_none(), + "key3 (MRU) should be evicted" + ); + assert!(cache.get(&key4).unwrap().is_some(), "key4 just inserted"); + assert!( + cache.get(&key2).unwrap().is_some(), + "key2 (middle) should remain" + ); + cache.verify_cache_integrity(); + } + + #[test] + fn test_clock_sweep_wraps_around() { + // Test that clock hand properly wraps around the circular list + let mut cache = PageCache::new(3); + let key1 = insert_page(&mut cache, 1); + let key2 = insert_page(&mut cache, 2); + let key3 = insert_page(&mut cache, 3); + + // Mark all pages + assert!(cache.get(&key1).unwrap().is_some()); + assert!(cache.get(&key2).unwrap().is_some()); + assert!(cache.get(&key3).unwrap().is_some()); + + // Insert 4: hand will sweep full circle, decrementing all refs + // then sweep again and evict first unmarked page + let key4 = insert_page(&mut cache, 4); + + // One page was evicted after full sweep + assert_eq!(cache.len(), 3); + assert!(cache.get(&key4).unwrap().is_some()); + + // Verify exactly one of the original pages was evicted + let survivors = [key1, key2, key3] + .iter() + .filter(|k| cache.get(k).unwrap().is_some()) + .count(); + assert_eq!(survivors, 2, "Should have 2 survivors from original 3"); + cache.verify_cache_integrity(); + } + + #[test] + fn test_circular_list_single_element() { + let mut cache = PageCache::new(3); + let key1 = insert_page(&mut cache, 1); + + // Single element should point to itself + let slot = cache.slot_of(&key1).unwrap(); + assert_eq!(cache.entries[slot].next, slot); + assert_eq!(cache.entries[slot].prev, slot); + + // Delete single element + assert!(cache.delete(key1).is_ok()); + assert_eq!(cache.clock_hand, NULL); + + // Insert after empty should work + let key2 = insert_page(&mut cache, 2); + let slot2 = cache.slot_of(&key2).unwrap(); + assert_eq!(cache.entries[slot2].next, slot2); + assert_eq!(cache.entries[slot2].prev, slot2); + cache.verify_cache_integrity(); + } + + #[test] + fn test_hand_advances_on_eviction() { + let mut cache = PageCache::new(2); + let _key1 = insert_page(&mut cache, 1); + let _key2 = insert_page(&mut cache, 2); + + // Note initial hand position + let initial_hand = cache.clock_hand; + + // Force eviction + let _key3 = insert_page(&mut cache, 3); + + // Hand should have advanced + let new_hand = cache.clock_hand; + assert_ne!(new_hand, NULL); + // Hand moved during sweep (exact position depends on eviction) + assert!(initial_hand == NULL || new_hand != initial_hand || cache.len() < 2); + cache.verify_cache_integrity(); + } + + #[test] + fn test_multi_level_ref_counting() { + let mut cache = PageCache::new(2); + let key1 = insert_page(&mut cache, 1); + let _key2 = insert_page(&mut cache, 2); + + // Bump key1 to MAX (3 accesses) + for _ in 0..3 { + assert!(cache.get(&key1).unwrap().is_some()); + } + assert_eq!(cache.ref_of(&key1), Some(REF_MAX)); + + // Insert multiple new pages - key1 should survive longer + for i in 3..6 { + let _ = insert_page(&mut cache, i); + } + + // key1 might still be there due to high ref count + // (depends on exact sweep pattern, but it got multiple chances) + cache.verify_cache_integrity(); + } + + #[test] + fn test_resize_maintains_circular_structure() { + let mut cache = PageCache::new(5); + for i in 1..=4 { + let _ = insert_page(&mut cache, i); + } + + // Resize smaller + assert_eq!(cache.resize(2), CacheResizeResult::Done); + assert_eq!(cache.len(), 2); + + // Verify circular structure + if cache.clock_hand != NULL { + let start = cache.clock_hand; + let mut current = start; + let mut count = 0; + loop { + count += 1; + current = cache.entries[current].next; + if current == start { + break; + } + assert!(count <= cache.len(), "Circular list broken after resize"); + } + assert_eq!(count, cache.len()); + } + cache.verify_cache_integrity(); + } + + #[test] + fn test_link_after_correctness() { + let mut cache = PageCache::new(4); + let key1 = insert_page(&mut cache, 1); + let key2 = insert_page(&mut cache, 2); + let key3 = insert_page(&mut cache, 3); + + // Verify circular linkage + let slot1 = cache.slot_of(&key1).unwrap(); + let slot2 = cache.slot_of(&key2).unwrap(); + let slot3 = cache.slot_of(&key3).unwrap(); + + // Should form a circle: 3 -> 2 -> 1 -> 3 (insertion order) + assert_eq!(cache.entries[slot3].next, slot2); + assert_eq!(cache.entries[slot2].next, slot1); + assert_eq!(cache.entries[slot1].next, slot3); + + assert_eq!(cache.entries[slot3].prev, slot1); + assert_eq!(cache.entries[slot2].prev, slot3); + assert_eq!(cache.entries[slot1].prev, slot2); + + cache.verify_cache_integrity(); + } }