From 2ca388d78dd27a60d4a175006cecc4867afe31f7 Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Thu, 16 Oct 2025 22:00:01 +0300 Subject: [PATCH] WAL: don't hold shared lock across IO operations Without this change and running: ``` cd stress cargo run -- --nr-threads=4 -i 1000 --verbose --busy-timeout=0 ``` I can produce a deadlock quite reliably. With this change, I can't. Even with 5 second busy timeout (the default), the run makes progress although it is slow as hell because of the busy timeout. --- core/storage/wal.rs | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/core/storage/wal.rs b/core/storage/wal.rs index fed9f15ad..1fd114a94 100644 --- a/core/storage/wal.rs +++ b/core/storage/wal.rs @@ -1322,10 +1322,12 @@ impl Wal for WalFile { tracing::debug!("wal_sync finish"); syncing.store(false, Ordering::SeqCst); }); - let shared = self.get_shared(); + let file = { + let shared = self.get_shared(); + assert!(shared.enabled.load(Ordering::SeqCst), "WAL must be enabled"); + shared.file.as_ref().unwrap().clone() + }; self.syncing.store(true, Ordering::SeqCst); - assert!(shared.enabled.load(Ordering::SeqCst), "WAL must be enabled"); - let file = shared.file.as_ref().unwrap(); let c = file.sync(completion)?; Ok(c) } @@ -1575,9 +1577,11 @@ impl Wal for WalFile { let c = Completion::new_write_linked(cmp); - let shared = self.get_shared(); - assert!(shared.enabled.load(Ordering::SeqCst), "WAL must be enabled"); - let file = shared.file.as_ref().unwrap(); + let file = { + let shared = self.get_shared(); + assert!(shared.enabled.load(Ordering::SeqCst), "WAL must be enabled"); + shared.file.as_ref().unwrap().clone() + }; let c = file.pwritev(start_off, iovecs, c)?; Ok(c) }