Merge 'core: Switch to FxHash to improve performance' from Pekka Enberg

The default Rust hash map is slow for integer keys. Switch to FxHash
instead to reduce executed instructions for, for example, throughput
benchmark.
Before:
```
penberg@turing:~/src/tursodatabase/turso/perf/throughput/turso$ perf stat ../../../target/release/write-throughput --threads 1 --batch-size 100 --compute 0 -i 10000
Turso,1,100,0,106875.21

 Performance counter stats for '../../../target/release/write-throughput --threads 1 --batch-size 100 --compute 0 -i 10000':

          2,908.02 msec task-clock                       #    0.310 CPUs utilized
            30,508      context-switches                 #   10.491 K/sec
               261      cpu-migrations                   #   89.752 /sec
               813      page-faults                      #  279.572 /sec
    20,655,313,128      instructions                     #    1.73  insn per cycle
                                                  #    0.14  stalled cycles per insn
    11,930,088,949      cycles                           #    4.102 GHz
     2,845,040,381      stalled-cycles-frontend          #   23.85% frontend cycles idle
     3,814,652,892      branches                         #    1.312 G/sec
        54,760,600      branch-misses                    #    1.44% of all branches

       9.372979876 seconds time elapsed

       2.276835000 seconds user
       0.530135000 seconds sys
```
After:
```
penberg@turing:~/src/tursodatabase/turso/perf/throughput/turso$ perf stat ../../../target/release/write-throughput --threads 1 --batch-size 100 --compute 0 -i 10000
Turso,1,100,0,108663.84

 Performance counter stats for '../../../target/release/write-throughput --threads 1 --batch-size 100 --compute 0 -i 10000':

          2,838.65 msec task-clock                       #    0.308 CPUs utilized
            30,629      context-switches                 #   10.790 K/sec
               351      cpu-migrations                   #  123.650 /sec
               818      page-faults                      #  288.165 /sec
    19,887,102,451      instructions                     #    1.72  insn per cycle
                                                  #    0.14  stalled cycles per insn
    11,593,166,024      cycles                           #    4.084 GHz
     2,830,298,617      stalled-cycles-frontend          #   24.41% frontend cycles idle
     3,764,334,333      branches                         #    1.326 G/sec
        53,157,766      branch-misses                    #    1.41% of all branches

       9.218225731 seconds time elapsed

       2.231889000 seconds user
       0.508785000 seconds sys

```

Closes #3837
This commit is contained in:
Pekka Enberg
2025-10-28 14:49:09 +02:00
committed by GitHub
8 changed files with 31 additions and 26 deletions

1
Cargo.lock generated
View File

@@ -4935,6 +4935,7 @@ dependencies = [
"roaring",
"rstest",
"rusqlite",
"rustc-hash",
"rustix 1.0.7",
"ryu",
"serde",

View File

@@ -84,6 +84,7 @@ intrusive-collections = "0.9.7"
roaring = "0.11.2"
simsimd = "6.5.3"
arc-swap = "1.7"
rustc-hash = "2.0"
[build-dependencies]
chrono = { workspace = true, default-features = false }

View File

@@ -67,6 +67,7 @@ pub use io::{
SyscallIO, WriteCompletion, IO,
};
use parking_lot::RwLock;
use rustc_hash::FxHashMap;
use schema::Schema;
use std::task::Waker;
use std::{
@@ -601,7 +602,7 @@ impl Database {
db: self.clone(),
pager: ArcSwap::new(pager),
schema: RwLock::new(self.schema.lock().unwrap().clone()),
database_schemas: RwLock::new(std::collections::HashMap::new()),
database_schemas: RwLock::new(FxHashMap::default()),
auto_commit: AtomicBool::new(true),
transaction_state: AtomicTransactionState::new(TransactionState::None),
last_insert_rowid: AtomicI64::new(0),
@@ -1109,7 +1110,7 @@ pub struct Connection {
schema: RwLock<Arc<Schema>>,
/// Per-database schema cache (database_index -> schema)
/// Loaded lazily to avoid copying all schemas on connection open
database_schemas: RwLock<std::collections::HashMap<usize, Arc<Schema>>>,
database_schemas: RwLock<FxHashMap<usize, Arc<Schema>>>,
/// Whether to automatically commit transaction
auto_commit: AtomicBool,
transaction_state: AtomicTransactionState,

View File

@@ -1,3 +1,4 @@
use rustc_hash::FxHashMap;
use tracing::{instrument, Level};
use crate::{
@@ -43,7 +44,7 @@ use std::{
any::Any,
cell::{Cell, Ref, RefCell},
cmp::{Ordering, Reverse},
collections::{BinaryHeap, HashMap},
collections::BinaryHeap,
fmt::Debug,
ops::DerefMut,
pin::Pin,
@@ -5805,7 +5806,7 @@ pub struct IntegrityCheckState {
page_stack: Vec<IntegrityCheckPageEntry>,
pub db_size: usize,
first_leaf_level: Option<usize>,
pub page_reference: HashMap<i64, i64>,
pub page_reference: FxHashMap<i64, i64>,
page: Option<PageRef>,
pub freelist_count: CheckFreelist,
}
@@ -5815,7 +5816,7 @@ impl IntegrityCheckState {
Self {
page_stack: Vec::new(),
db_size,
page_reference: HashMap::new(),
page_reference: FxHashMap::default(),
first_leaf_level: None,
page: None,
freelist_count: CheckFreelist {

View File

@@ -1,8 +1,6 @@
use intrusive_collections::{intrusive_adapter, LinkedList, LinkedListLink};
use std::{
collections::HashMap,
sync::{atomic::Ordering, Arc},
};
use rustc_hash::FxHashMap;
use std::sync::{atomic::Ordering, Arc};
use tracing::trace;
use crate::turso_assert;
@@ -74,7 +72,7 @@ pub struct PageCache {
/// Capacity in pages
capacity: usize,
/// Map of Key -> pointer to entry in the queue
map: HashMap<PageCacheKey, *mut PageCacheEntry>,
map: FxHashMap<PageCacheKey, *mut PageCacheEntry>,
/// The eviction queue (intrusive doubly-linked list)
queue: LinkedList<EntryAdapter>,
/// Clock hand cursor for SIEVE eviction (pointer to an entry in the queue, or null)
@@ -119,7 +117,7 @@ impl PageCache {
assert!(capacity > 0);
Self {
capacity,
map: HashMap::new(),
map: FxHashMap::default(),
queue: LinkedList::new(EntryAdapter::new()),
clock_hand: std::ptr::null_mut(),
}

View File

@@ -19,8 +19,7 @@ use crate::{io_yield_one, CompletionError, IOContext, OpenFlags, IO};
use parking_lot::RwLock;
use roaring::RoaringBitmap;
use std::cell::{RefCell, UnsafeCell};
use std::collections::HashSet;
use std::hash;
use std::collections::BTreeSet;
use std::rc::Rc;
use std::sync::atomic::{
AtomicBool, AtomicU16, AtomicU32, AtomicU64, AtomicU8, AtomicUsize, Ordering,
@@ -513,7 +512,11 @@ pub struct Pager {
pub buffer_pool: Arc<BufferPool>,
/// I/O interface for input/output operations.
pub io: Arc<dyn crate::io::IO>,
dirty_pages: Arc<RwLock<HashSet<usize, hash::BuildHasherDefault<hash::DefaultHasher>>>>,
/// Dirty pages sorted by page number.
///
/// We need dirty pages in page number order when we flush them out to ensure
/// that the WAL we generate is compatible with SQLite.
dirty_pages: Arc<RwLock<BTreeSet<usize>>>,
subjournal: RwLock<Option<Subjournal>>,
savepoints: Arc<RwLock<Vec<Savepoint>>>,
commit_info: RwLock<CommitInfo>,
@@ -635,9 +638,7 @@ impl Pager {
wal,
page_cache,
io,
dirty_pages: Arc::new(RwLock::new(HashSet::with_hasher(
hash::BuildHasherDefault::new(),
))),
dirty_pages: Arc::new(RwLock::new(BTreeSet::new())),
subjournal: RwLock::new(None),
savepoints: Arc::new(RwLock::new(Vec::new())),
commit_info: RwLock::new(CommitInfo {

View File

@@ -66,7 +66,8 @@ use crate::{
bail_corrupt_error, turso_assert, CompletionError, File, IOContext, Result, WalFileShared,
};
use parking_lot::RwLock;
use std::collections::{BTreeMap, HashMap};
use rustc_hash::FxHashMap;
use std::collections::BTreeMap;
use std::mem::MaybeUninit;
use std::pin::Pin;
use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, AtomicUsize, Ordering};
@@ -1646,7 +1647,7 @@ pub fn build_shared_wal(
max_frame: AtomicU64::new(0),
nbackfills: AtomicU64::new(0),
transaction_count: AtomicU64::new(0),
frame_cache: Arc::new(SpinLock::new(HashMap::new())),
frame_cache: Arc::new(SpinLock::new(FxHashMap::default())),
last_checksum: (0, 0),
file: Some(file.clone()),
read_locks,
@@ -1711,7 +1712,7 @@ struct StreamingState {
frame_idx: u64,
cumulative_checksum: (u32, u32),
last_valid_frame: u64,
pending_frames: HashMap<u64, Vec<u64>>,
pending_frames: FxHashMap<u64, Vec<u64>>,
page_size: usize,
use_native_endian: bool,
header_valid: bool,
@@ -1736,7 +1737,7 @@ impl StreamingWalReader {
frame_idx: 1,
cumulative_checksum: (0, 0),
last_valid_frame: 0,
pending_frames: HashMap::new(),
pending_frames: FxHashMap::default(),
page_size: 0,
use_native_endian: false,
header_valid: false,

View File

@@ -1,8 +1,9 @@
#![allow(clippy::not_unsafe_ptr_arg_deref)]
use rustc_hash::{FxHashMap, FxHashSet};
use std::array;
use std::borrow::Cow;
use std::collections::{BTreeMap, HashMap, HashSet};
use std::collections::BTreeMap;
use strum::EnumString;
use tracing::{instrument, Level};
@@ -679,7 +680,7 @@ pub struct WalFileShared {
// One difference between SQLite and limbo is that we will never support multi process, meaning
// we don't need WAL's index file. So we can do stuff like this without shared memory.
// TODO: this will need refactoring because this is incredible memory inefficient.
pub frame_cache: Arc<SpinLock<HashMap<u64, Vec<u64>>>>,
pub frame_cache: Arc<SpinLock<FxHashMap<u64, Vec<u64>>>>,
pub last_checksum: (u32, u32), // Check of last frame in WAL, this is a cumulative checksum over all frames in the WAL
pub file: Option<Arc<dyn File>>,
/// Read locks advertise the maximum WAL frame a reader may access.
@@ -1394,7 +1395,7 @@ impl Wal for WalFile {
let frame_count = self.get_max_frame();
let page_size = self.page_size();
let mut frame = vec![0u8; page_size as usize + WAL_FRAME_HEADER_SIZE];
let mut seen = HashSet::new();
let mut seen = FxHashSet::default();
turso_assert!(
frame_count >= frame_watermark,
"frame_count must be not less than frame_watermark: {} vs {}",
@@ -2353,7 +2354,7 @@ impl WalFileShared {
max_frame: AtomicU64::new(0),
nbackfills: AtomicU64::new(0),
transaction_count: AtomicU64::new(0),
frame_cache: Arc::new(SpinLock::new(HashMap::new())),
frame_cache: Arc::new(SpinLock::new(FxHashMap::default())),
last_checksum: (0, 0),
file: None,
read_locks,
@@ -2398,7 +2399,7 @@ impl WalFileShared {
max_frame: AtomicU64::new(0),
nbackfills: AtomicU64::new(0),
transaction_count: AtomicU64::new(0),
frame_cache: Arc::new(SpinLock::new(HashMap::new())),
frame_cache: Arc::new(SpinLock::new(FxHashMap::default())),
last_checksum: (0, 0),
file: Some(file),
read_locks,