diff --git a/Cargo.lock b/Cargo.lock index e44b703e2..26d3b538c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4935,6 +4935,7 @@ dependencies = [ "roaring", "rstest", "rusqlite", + "rustc-hash", "rustix 1.0.7", "ryu", "serde", diff --git a/core/Cargo.toml b/core/Cargo.toml index b913b62f2..30bd91e67 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -84,6 +84,7 @@ intrusive-collections = "0.9.7" roaring = "0.11.2" simsimd = "6.5.3" arc-swap = "1.7" +rustc-hash = "2.0" [build-dependencies] chrono = { workspace = true, default-features = false } diff --git a/core/lib.rs b/core/lib.rs index b5a6f3e10..a276af239 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -67,6 +67,7 @@ pub use io::{ SyscallIO, WriteCompletion, IO, }; use parking_lot::RwLock; +use rustc_hash::FxHashMap; use schema::Schema; use std::task::Waker; use std::{ @@ -601,7 +602,7 @@ impl Database { db: self.clone(), pager: ArcSwap::new(pager), schema: RwLock::new(self.schema.lock().unwrap().clone()), - database_schemas: RwLock::new(std::collections::HashMap::new()), + database_schemas: RwLock::new(FxHashMap::default()), auto_commit: AtomicBool::new(true), transaction_state: AtomicTransactionState::new(TransactionState::None), last_insert_rowid: AtomicI64::new(0), @@ -1109,7 +1110,7 @@ pub struct Connection { schema: RwLock>, /// Per-database schema cache (database_index -> schema) /// Loaded lazily to avoid copying all schemas on connection open - database_schemas: RwLock>>, + database_schemas: RwLock>>, /// Whether to automatically commit transaction auto_commit: AtomicBool, transaction_state: AtomicTransactionState, diff --git a/core/storage/btree.rs b/core/storage/btree.rs index ed1fbc84e..daae63d16 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1,3 +1,4 @@ +use rustc_hash::FxHashMap; use tracing::{instrument, Level}; use crate::{ @@ -43,7 +44,7 @@ use std::{ any::Any, cell::{Cell, Ref, RefCell}, cmp::{Ordering, Reverse}, - collections::{BinaryHeap, HashMap}, + collections::BinaryHeap, fmt::Debug, ops::DerefMut, pin::Pin, @@ -5805,7 +5806,7 @@ pub struct IntegrityCheckState { page_stack: Vec, pub db_size: usize, first_leaf_level: Option, - pub page_reference: HashMap, + pub page_reference: FxHashMap, page: Option, pub freelist_count: CheckFreelist, } @@ -5815,7 +5816,7 @@ impl IntegrityCheckState { Self { page_stack: Vec::new(), db_size, - page_reference: HashMap::new(), + page_reference: FxHashMap::default(), first_leaf_level: None, page: None, freelist_count: CheckFreelist { diff --git a/core/storage/page_cache.rs b/core/storage/page_cache.rs index 25bfe357a..e8ac4e657 100644 --- a/core/storage/page_cache.rs +++ b/core/storage/page_cache.rs @@ -1,8 +1,6 @@ use intrusive_collections::{intrusive_adapter, LinkedList, LinkedListLink}; -use std::{ - collections::HashMap, - sync::{atomic::Ordering, Arc}, -}; +use rustc_hash::FxHashMap; +use std::sync::{atomic::Ordering, Arc}; use tracing::trace; use crate::turso_assert; @@ -74,7 +72,7 @@ pub struct PageCache { /// Capacity in pages capacity: usize, /// Map of Key -> pointer to entry in the queue - map: HashMap, + map: FxHashMap, /// The eviction queue (intrusive doubly-linked list) queue: LinkedList, /// Clock hand cursor for SIEVE eviction (pointer to an entry in the queue, or null) @@ -119,7 +117,7 @@ impl PageCache { assert!(capacity > 0); Self { capacity, - map: HashMap::new(), + map: FxHashMap::default(), queue: LinkedList::new(EntryAdapter::new()), clock_hand: std::ptr::null_mut(), } diff --git a/core/storage/pager.rs b/core/storage/pager.rs index 2d37f9d74..315e2557c 100644 --- a/core/storage/pager.rs +++ b/core/storage/pager.rs @@ -19,8 +19,7 @@ use crate::{io_yield_one, CompletionError, IOContext, OpenFlags, IO}; use parking_lot::RwLock; use roaring::RoaringBitmap; use std::cell::{RefCell, UnsafeCell}; -use std::collections::HashSet; -use std::hash; +use std::collections::BTreeSet; use std::rc::Rc; use std::sync::atomic::{ AtomicBool, AtomicU16, AtomicU32, AtomicU64, AtomicU8, AtomicUsize, Ordering, @@ -513,7 +512,11 @@ pub struct Pager { pub buffer_pool: Arc, /// I/O interface for input/output operations. pub io: Arc, - dirty_pages: Arc>>>, + /// Dirty pages sorted by page number. + /// + /// We need dirty pages in page number order when we flush them out to ensure + /// that the WAL we generate is compatible with SQLite. + dirty_pages: Arc>>, subjournal: RwLock>, savepoints: Arc>>, commit_info: RwLock, @@ -635,9 +638,7 @@ impl Pager { wal, page_cache, io, - dirty_pages: Arc::new(RwLock::new(HashSet::with_hasher( - hash::BuildHasherDefault::new(), - ))), + dirty_pages: Arc::new(RwLock::new(BTreeSet::new())), subjournal: RwLock::new(None), savepoints: Arc::new(RwLock::new(Vec::new())), commit_info: RwLock::new(CommitInfo { diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index 93f897dc1..c83131983 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -66,7 +66,8 @@ use crate::{ bail_corrupt_error, turso_assert, CompletionError, File, IOContext, Result, WalFileShared, }; use parking_lot::RwLock; -use std::collections::{BTreeMap, HashMap}; +use rustc_hash::FxHashMap; +use std::collections::BTreeMap; use std::mem::MaybeUninit; use std::pin::Pin; use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, AtomicUsize, Ordering}; @@ -1646,7 +1647,7 @@ pub fn build_shared_wal( max_frame: AtomicU64::new(0), nbackfills: AtomicU64::new(0), transaction_count: AtomicU64::new(0), - frame_cache: Arc::new(SpinLock::new(HashMap::new())), + frame_cache: Arc::new(SpinLock::new(FxHashMap::default())), last_checksum: (0, 0), file: Some(file.clone()), read_locks, @@ -1711,7 +1712,7 @@ struct StreamingState { frame_idx: u64, cumulative_checksum: (u32, u32), last_valid_frame: u64, - pending_frames: HashMap>, + pending_frames: FxHashMap>, page_size: usize, use_native_endian: bool, header_valid: bool, @@ -1736,7 +1737,7 @@ impl StreamingWalReader { frame_idx: 1, cumulative_checksum: (0, 0), last_valid_frame: 0, - pending_frames: HashMap::new(), + pending_frames: FxHashMap::default(), page_size: 0, use_native_endian: false, header_valid: false, diff --git a/core/storage/wal.rs b/core/storage/wal.rs index 941e07f4d..5983e89ae 100644 --- a/core/storage/wal.rs +++ b/core/storage/wal.rs @@ -1,8 +1,9 @@ #![allow(clippy::not_unsafe_ptr_arg_deref)] +use rustc_hash::{FxHashMap, FxHashSet}; use std::array; use std::borrow::Cow; -use std::collections::{BTreeMap, HashMap, HashSet}; +use std::collections::BTreeMap; use strum::EnumString; use tracing::{instrument, Level}; @@ -679,7 +680,7 @@ pub struct WalFileShared { // One difference between SQLite and limbo is that we will never support multi process, meaning // we don't need WAL's index file. So we can do stuff like this without shared memory. // TODO: this will need refactoring because this is incredible memory inefficient. - pub frame_cache: Arc>>>, + pub frame_cache: Arc>>>, pub last_checksum: (u32, u32), // Check of last frame in WAL, this is a cumulative checksum over all frames in the WAL pub file: Option>, /// Read locks advertise the maximum WAL frame a reader may access. @@ -1394,7 +1395,7 @@ impl Wal for WalFile { let frame_count = self.get_max_frame(); let page_size = self.page_size(); let mut frame = vec![0u8; page_size as usize + WAL_FRAME_HEADER_SIZE]; - let mut seen = HashSet::new(); + let mut seen = FxHashSet::default(); turso_assert!( frame_count >= frame_watermark, "frame_count must be not less than frame_watermark: {} vs {}", @@ -2353,7 +2354,7 @@ impl WalFileShared { max_frame: AtomicU64::new(0), nbackfills: AtomicU64::new(0), transaction_count: AtomicU64::new(0), - frame_cache: Arc::new(SpinLock::new(HashMap::new())), + frame_cache: Arc::new(SpinLock::new(FxHashMap::default())), last_checksum: (0, 0), file: None, read_locks, @@ -2398,7 +2399,7 @@ impl WalFileShared { max_frame: AtomicU64::new(0), nbackfills: AtomicU64::new(0), transaction_count: AtomicU64::new(0), - frame_cache: Arc::new(SpinLock::new(HashMap::new())), + frame_cache: Arc::new(SpinLock::new(FxHashMap::default())), last_checksum: (0, 0), file: Some(file), read_locks,