From 1b514e6d0fce8b362ab61846df84ec2ab500af7e Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sun, 24 Aug 2025 11:09:24 -0400 Subject: [PATCH 1/3] Only checkpoint final remaining DB connection, and use Truncate mode --- core/lib.rs | 37 +++++++++++++++++++++++++++++++++---- core/storage/pager.rs | 26 +++++++++++++++++++++----- 2 files changed, 54 insertions(+), 9 deletions(-) diff --git a/core/lib.rs b/core/lib.rs index 7f35f8861..aeffb58c5 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -71,7 +71,7 @@ use std::{ num::NonZero, ops::Deref, rc::Rc, - sync::{Arc, LazyLock, Mutex, Weak}, + sync::{atomic::AtomicUsize, Arc, LazyLock, Mutex, Weak}, }; #[cfg(feature = "fs")] use storage::database::DatabaseFile; @@ -137,6 +137,7 @@ pub struct Database { open_flags: OpenFlags, builtin_syms: RefCell, experimental_views: bool, + n_connections: AtomicUsize, } unsafe impl Send for Database {} @@ -185,6 +186,12 @@ impl fmt::Debug for Database { }; debug_struct.field("page_cache", &cache_info); + debug_struct.field( + "n_connections", + &self + .n_connections + .load(std::sync::atomic::Ordering::Relaxed), + ); debug_struct.finish() } } @@ -372,6 +379,7 @@ impl Database { init_lock: Arc::new(Mutex::new(())), experimental_views: enable_views, buffer_pool: BufferPool::begin_init(&io, arena_size), + n_connections: AtomicUsize::new(0), }); db.register_global_builtin_extensions() .expect("unable to register global extensions"); @@ -425,6 +433,8 @@ impl Database { .unwrap_or_default() .get(); + self.n_connections + .fetch_add(1, std::sync::atomic::Ordering::Relaxed); let conn = Arc::new(Connection { _db: self.clone(), pager: RefCell::new(Rc::new(pager)), @@ -888,6 +898,17 @@ pub struct Connection { encryption_key: RefCell>, } +impl Drop for Connection { + fn drop(&mut self) { + if !self.closed.get() { + // if connection wasn't properly closed, decrement the connection counter + self._db + .n_connections + .fetch_sub(1, std::sync::atomic::Ordering::Relaxed); + } + } +} + impl Connection { #[instrument(skip_all, level = Level::INFO)] pub fn prepare(self: &Arc, sql: impl AsRef) -> Result { @@ -1506,9 +1527,17 @@ impl Connection { } } - self.pager - .borrow() - .checkpoint_shutdown(self.wal_auto_checkpoint_disabled.get()) + if self + ._db + .n_connections + .fetch_sub(1, std::sync::atomic::Ordering::Relaxed) + .eq(&1) + { + self.pager + .borrow() + .checkpoint_shutdown(self.wal_auto_checkpoint_disabled.get()); + }; + Ok(()) } pub fn wal_auto_checkpoint_disable(&self) { diff --git a/core/storage/pager.rs b/core/storage/pager.rs index 1149acf61..cc528cd74 100644 --- a/core/storage/pager.rs +++ b/core/storage/pager.rs @@ -1555,8 +1555,18 @@ impl Pager { .expect("Failed to clear page cache"); } + /// Checkpoint in Truncate mode and delete the WAL file. This method is _only_ to be called + /// for shutting down the last remaining connection to a database. + /// + /// sqlite3.h + /// Usually, when a database in [WAL mode] is closed or detached from a + /// database handle, SQLite checks if if there are other connections to the + /// same database, and if there are no other database connection (if the + /// connection being closed is the last open connection to the database), + /// then SQLite performs a [checkpoint] before closing the connection and + /// deletes the WAL file. pub fn checkpoint_shutdown(&self, wal_auto_checkpoint_disabled: bool) -> Result<()> { - let mut _attempts = 0; + let mut attempts = 0; { let Some(wal) = self.wal.as_ref() else { return Err(LimboError::InternalError( @@ -1565,16 +1575,22 @@ impl Pager { }; let mut wal = wal.borrow_mut(); // fsync the wal syncronously before beginning checkpoint - // TODO: for now forget about timeouts as they fail regularly in SIM - // need to think of a better way to do this let c = wal.sync()?; self.io.wait_for_completion(c)?; } if !wal_auto_checkpoint_disabled { - self.wal_checkpoint(CheckpointMode::Passive { + while let Err(LimboError::Busy) = self.wal_checkpoint(CheckpointMode::Truncate { upper_bound_inclusive: None, - })?; + }) { + if attempts == 3 { + // don't return error on `close` if we are unable to checkpoint, we can + // silently fail + return Ok(()); + } + attempts += 1; + } } + // TODO: delete the WAL file here after truncate checkpoint Ok(()) } From 748e339f68a4b1bc259a7b84ba70c0488bc9451a Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Sun, 24 Aug 2025 12:07:42 -0400 Subject: [PATCH 2/3] Make clippy happy --- core/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/lib.rs b/core/lib.rs index aeffb58c5..f9f76fe91 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -1535,7 +1535,7 @@ impl Connection { { self.pager .borrow() - .checkpoint_shutdown(self.wal_auto_checkpoint_disabled.get()); + .checkpoint_shutdown(self.wal_auto_checkpoint_disabled.get())?; }; Ok(()) } From 2d661e3304a3ded1678b7a289720b2b47d994aac Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Mon, 25 Aug 2025 16:56:43 -0400 Subject: [PATCH 3/3] Apply review suggestions, add logging --- core/lib.rs | 4 ++-- core/storage/pager.rs | 9 ++++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/core/lib.rs b/core/lib.rs index f9f76fe91..4ea22bfd1 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -433,8 +433,6 @@ impl Database { .unwrap_or_default() .get(); - self.n_connections - .fetch_add(1, std::sync::atomic::Ordering::Relaxed); let conn = Arc::new(Connection { _db: self.clone(), pager: RefCell::new(Rc::new(pager)), @@ -466,6 +464,8 @@ impl Database { is_nested_stmt: Cell::new(false), encryption_key: RefCell::new(None), }); + self.n_connections + .fetch_add(1, std::sync::atomic::Ordering::Relaxed); let builtin_syms = self.builtin_syms.borrow(); // add built-in extensions symbols to the connection to prevent having to load each time conn.syms.borrow_mut().extend(&builtin_syms); diff --git a/core/storage/pager.rs b/core/storage/pager.rs index cc528cd74..f4ba0fc5f 100644 --- a/core/storage/pager.rs +++ b/core/storage/pager.rs @@ -1583,14 +1583,17 @@ impl Pager { upper_bound_inclusive: None, }) { if attempts == 3 { - // don't return error on `close` if we are unable to checkpoint, we can - // silently fail + // don't return error on `close` if we are unable to checkpoint, we can silently fail + tracing::warn!( + "Failed to checkpoint WAL on shutdown after 3 attempts, giving up" + ); return Ok(()); } attempts += 1; } } - // TODO: delete the WAL file here after truncate checkpoint + // TODO: delete the WAL file here after truncate checkpoint, but *only* if we are sure that + // no other connections have opened since. Ok(()) }