Remove inefficient vec in WAL for tracking pages present in frame cache

This commit is contained in:
PThorpe92
2025-08-15 09:26:50 -04:00
parent d6d72d2966
commit d2c3ba14c8
2 changed files with 14 additions and 31 deletions

View File

@@ -1596,7 +1596,6 @@ pub fn read_entire_wal_dumb(file: &Arc<dyn File>) -> Result<Arc<UnsafeCell<WalFi
max_frame: AtomicU64::new(0),
nbackfills: AtomicU64::new(0),
frame_cache: Arc::new(SpinLock::new(HashMap::new())),
pages_in_frames: Arc::new(SpinLock::new(Vec::new())),
last_checksum: (0, 0),
file: file.clone(),
read_locks,
@@ -1762,10 +1761,6 @@ pub fn read_entire_wal_dumb(file: &Arc<dyn File>) -> Result<Arc<UnsafeCell<WalFi
.entry(frame_h_page_number as u64)
.or_default()
.push(frame_idx);
wfs_data
.pages_in_frames
.lock()
.push(frame_h_page_number as u64);
let is_commit_record = frame_h_db_size > 0;
if is_commit_record {
@@ -1780,20 +1775,23 @@ pub fn read_entire_wal_dumb(file: &Arc<dyn File>) -> Result<Arc<UnsafeCell<WalFi
let max_frame = wfs_data.max_frame.load(Ordering::SeqCst);
// cleanup in-memory index from tail frames which was written after the last commited frame
let mut pages_in_frames = wfs_data.pages_in_frames.lock();
// cleanup in-memory index from tail frames which was written after the last committed frame
let mut frame_cache = wfs_data.frame_cache.lock();
for i in max_frame as usize..pages_in_frames.len() {
let page = pages_in_frames[i];
let Some(cached) = frame_cache.get_mut(&page) else {
panic!("page from pages_in_frames must be in the frame_cache");
};
while !cached.is_empty() && *cached.last().unwrap() > max_frame {
cached.pop();
for (page, frames) in frame_cache.iter_mut() {
// remove any frame IDs > max_frame
let original_len = frames.len();
frames.retain(|&frame_id| frame_id <= max_frame);
if frames.len() < original_len {
tracing::debug!(
"removed {} frame(s) from page {} from the in-memory WAL index because they were written after the last committed frame {}",
original_len - frames.len(),
page,
max_frame
);
}
tracing::debug!("remove page {page} from the in-memory WAL index because it was written after the last commited frame");
}
pages_in_frames.truncate(max_frame as usize);
// also remove any pages that now have no frames
frame_cache.retain(|_page, frames| !frames.is_empty());
wfs_data.nbackfills.store(0, Ordering::SeqCst);
wfs_data.loaded.store(true, Ordering::SeqCst);

View File

@@ -647,8 +647,6 @@ pub struct WalFileShared {
// we don't need WAL's index file. So we can do stuff like this without shared memory.
// TODO: this will need refactoring because this is incredible memory inefficient.
pub frame_cache: Arc<SpinLock<HashMap<u64, Vec<u64>>>>,
// Another memory inefficient array made to just keep track of pages that are in frame_cache.
pub pages_in_frames: Arc<SpinLock<Vec<u64>>>,
pub last_checksum: (u32, u32), // Check of last frame in WAL, this is a cumulative checksum over all frames in the WAL
pub file: Arc<dyn File>,
@@ -676,7 +674,6 @@ impl fmt::Debug for WalFileShared {
.field("max_frame", &self.max_frame)
.field("nbackfills", &self.nbackfills)
.field("frame_cache", &self.frame_cache)
.field("pages_in_frames", &self.pages_in_frames)
.field("last_checksum", &self.last_checksum)
// Excluding `file`, `read_locks`, and `write_lock`
.finish()
@@ -1330,10 +1327,6 @@ impl Wal for WalFile {
}
frames.truncate(last_valid_frame);
}
let mut pages_in_frames = shared.pages_in_frames.lock();
pages_in_frames.retain(|page| {
frame_cache.contains_key(page) && !frame_cache.get(page).unwrap().is_empty()
});
}
self.last_checksum = shared.last_checksum;
}
@@ -1453,15 +1446,10 @@ impl WalFile {
let mut frame_cache = shared.frame_cache.lock();
match frame_cache.get_mut(&page_id) {
Some(frames) => {
if frames.is_empty() {
let mut pages_in_frames = shared.pages_in_frames.lock();
pages_in_frames.push(page_id);
}
frames.push(frame_id);
}
None => {
frame_cache.insert(page_id, vec![frame_id]);
shared.pages_in_frames.lock().push(page_id);
}
}
}
@@ -2033,7 +2021,6 @@ impl WalFileShared {
frame_cache: Arc::new(SpinLock::new(HashMap::new())),
last_checksum: (0, 0),
file,
pages_in_frames: Arc::new(SpinLock::new(Vec::new())),
read_locks,
write_lock: TursoRwLock::new(),
checkpoint_lock: TursoRwLock::new(),
@@ -2079,8 +2066,6 @@ impl WalFileShared {
}
self.frame_cache.lock().clear();
self.pages_in_frames.lock().clear();
// read-marks
self.read_locks[0].set_value_exclusive(0);
self.read_locks[1].set_value_exclusive(0);