mirror of
https://github.com/aljazceru/turso.git
synced 2026-02-10 18:54:22 +01:00
Merge 'core: Switch to FxHash to improve performance' from Pekka Enberg
The default Rust hash map is slow for integer keys. Switch to FxHash
instead to reduce executed instructions for, for example, throughput
benchmark.
Before:
```
penberg@turing:~/src/tursodatabase/turso/perf/throughput/turso$ perf stat ../../../target/release/write-throughput --threads 1 --batch-size 100 --compute 0 -i 10000
Turso,1,100,0,106875.21
Performance counter stats for '../../../target/release/write-throughput --threads 1 --batch-size 100 --compute 0 -i 10000':
2,908.02 msec task-clock # 0.310 CPUs utilized
30,508 context-switches # 10.491 K/sec
261 cpu-migrations # 89.752 /sec
813 page-faults # 279.572 /sec
20,655,313,128 instructions # 1.73 insn per cycle
# 0.14 stalled cycles per insn
11,930,088,949 cycles # 4.102 GHz
2,845,040,381 stalled-cycles-frontend # 23.85% frontend cycles idle
3,814,652,892 branches # 1.312 G/sec
54,760,600 branch-misses # 1.44% of all branches
9.372979876 seconds time elapsed
2.276835000 seconds user
0.530135000 seconds sys
```
After:
```
penberg@turing:~/src/tursodatabase/turso/perf/throughput/turso$ perf stat ../../../target/release/write-throughput --threads 1 --batch-size 100 --compute 0 -i 10000
Turso,1,100,0,108663.84
Performance counter stats for '../../../target/release/write-throughput --threads 1 --batch-size 100 --compute 0 -i 10000':
2,838.65 msec task-clock # 0.308 CPUs utilized
30,629 context-switches # 10.790 K/sec
351 cpu-migrations # 123.650 /sec
818 page-faults # 288.165 /sec
19,887,102,451 instructions # 1.72 insn per cycle
# 0.14 stalled cycles per insn
11,593,166,024 cycles # 4.084 GHz
2,830,298,617 stalled-cycles-frontend # 24.41% frontend cycles idle
3,764,334,333 branches # 1.326 G/sec
53,157,766 branch-misses # 1.41% of all branches
9.218225731 seconds time elapsed
2.231889000 seconds user
0.508785000 seconds sys
```
Closes #3837
This commit is contained in:
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -4935,6 +4935,7 @@ dependencies = [
|
||||
"roaring",
|
||||
"rstest",
|
||||
"rusqlite",
|
||||
"rustc-hash",
|
||||
"rustix 1.0.7",
|
||||
"ryu",
|
||||
"serde",
|
||||
|
||||
@@ -84,6 +84,7 @@ intrusive-collections = "0.9.7"
|
||||
roaring = "0.11.2"
|
||||
simsimd = "6.5.3"
|
||||
arc-swap = "1.7"
|
||||
rustc-hash = "2.0"
|
||||
|
||||
[build-dependencies]
|
||||
chrono = { workspace = true, default-features = false }
|
||||
|
||||
@@ -67,6 +67,7 @@ pub use io::{
|
||||
SyscallIO, WriteCompletion, IO,
|
||||
};
|
||||
use parking_lot::RwLock;
|
||||
use rustc_hash::FxHashMap;
|
||||
use schema::Schema;
|
||||
use std::task::Waker;
|
||||
use std::{
|
||||
@@ -601,7 +602,7 @@ impl Database {
|
||||
db: self.clone(),
|
||||
pager: ArcSwap::new(pager),
|
||||
schema: RwLock::new(self.schema.lock().unwrap().clone()),
|
||||
database_schemas: RwLock::new(std::collections::HashMap::new()),
|
||||
database_schemas: RwLock::new(FxHashMap::default()),
|
||||
auto_commit: AtomicBool::new(true),
|
||||
transaction_state: AtomicTransactionState::new(TransactionState::None),
|
||||
last_insert_rowid: AtomicI64::new(0),
|
||||
@@ -1109,7 +1110,7 @@ pub struct Connection {
|
||||
schema: RwLock<Arc<Schema>>,
|
||||
/// Per-database schema cache (database_index -> schema)
|
||||
/// Loaded lazily to avoid copying all schemas on connection open
|
||||
database_schemas: RwLock<std::collections::HashMap<usize, Arc<Schema>>>,
|
||||
database_schemas: RwLock<FxHashMap<usize, Arc<Schema>>>,
|
||||
/// Whether to automatically commit transaction
|
||||
auto_commit: AtomicBool,
|
||||
transaction_state: AtomicTransactionState,
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use rustc_hash::FxHashMap;
|
||||
use tracing::{instrument, Level};
|
||||
|
||||
use crate::{
|
||||
@@ -43,7 +44,7 @@ use std::{
|
||||
any::Any,
|
||||
cell::{Cell, Ref, RefCell},
|
||||
cmp::{Ordering, Reverse},
|
||||
collections::{BinaryHeap, HashMap},
|
||||
collections::BinaryHeap,
|
||||
fmt::Debug,
|
||||
ops::DerefMut,
|
||||
pin::Pin,
|
||||
@@ -5805,7 +5806,7 @@ pub struct IntegrityCheckState {
|
||||
page_stack: Vec<IntegrityCheckPageEntry>,
|
||||
pub db_size: usize,
|
||||
first_leaf_level: Option<usize>,
|
||||
pub page_reference: HashMap<i64, i64>,
|
||||
pub page_reference: FxHashMap<i64, i64>,
|
||||
page: Option<PageRef>,
|
||||
pub freelist_count: CheckFreelist,
|
||||
}
|
||||
@@ -5815,7 +5816,7 @@ impl IntegrityCheckState {
|
||||
Self {
|
||||
page_stack: Vec::new(),
|
||||
db_size,
|
||||
page_reference: HashMap::new(),
|
||||
page_reference: FxHashMap::default(),
|
||||
first_leaf_level: None,
|
||||
page: None,
|
||||
freelist_count: CheckFreelist {
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
use intrusive_collections::{intrusive_adapter, LinkedList, LinkedListLink};
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
sync::{atomic::Ordering, Arc},
|
||||
};
|
||||
use rustc_hash::FxHashMap;
|
||||
use std::sync::{atomic::Ordering, Arc};
|
||||
use tracing::trace;
|
||||
|
||||
use crate::turso_assert;
|
||||
@@ -74,7 +72,7 @@ pub struct PageCache {
|
||||
/// Capacity in pages
|
||||
capacity: usize,
|
||||
/// Map of Key -> pointer to entry in the queue
|
||||
map: HashMap<PageCacheKey, *mut PageCacheEntry>,
|
||||
map: FxHashMap<PageCacheKey, *mut PageCacheEntry>,
|
||||
/// The eviction queue (intrusive doubly-linked list)
|
||||
queue: LinkedList<EntryAdapter>,
|
||||
/// Clock hand cursor for SIEVE eviction (pointer to an entry in the queue, or null)
|
||||
@@ -119,7 +117,7 @@ impl PageCache {
|
||||
assert!(capacity > 0);
|
||||
Self {
|
||||
capacity,
|
||||
map: HashMap::new(),
|
||||
map: FxHashMap::default(),
|
||||
queue: LinkedList::new(EntryAdapter::new()),
|
||||
clock_hand: std::ptr::null_mut(),
|
||||
}
|
||||
|
||||
@@ -19,8 +19,7 @@ use crate::{io_yield_one, CompletionError, IOContext, OpenFlags, IO};
|
||||
use parking_lot::RwLock;
|
||||
use roaring::RoaringBitmap;
|
||||
use std::cell::{RefCell, UnsafeCell};
|
||||
use std::collections::HashSet;
|
||||
use std::hash;
|
||||
use std::collections::BTreeSet;
|
||||
use std::rc::Rc;
|
||||
use std::sync::atomic::{
|
||||
AtomicBool, AtomicU16, AtomicU32, AtomicU64, AtomicU8, AtomicUsize, Ordering,
|
||||
@@ -513,7 +512,11 @@ pub struct Pager {
|
||||
pub buffer_pool: Arc<BufferPool>,
|
||||
/// I/O interface for input/output operations.
|
||||
pub io: Arc<dyn crate::io::IO>,
|
||||
dirty_pages: Arc<RwLock<HashSet<usize, hash::BuildHasherDefault<hash::DefaultHasher>>>>,
|
||||
/// Dirty pages sorted by page number.
|
||||
///
|
||||
/// We need dirty pages in page number order when we flush them out to ensure
|
||||
/// that the WAL we generate is compatible with SQLite.
|
||||
dirty_pages: Arc<RwLock<BTreeSet<usize>>>,
|
||||
subjournal: RwLock<Option<Subjournal>>,
|
||||
savepoints: Arc<RwLock<Vec<Savepoint>>>,
|
||||
commit_info: RwLock<CommitInfo>,
|
||||
@@ -635,9 +638,7 @@ impl Pager {
|
||||
wal,
|
||||
page_cache,
|
||||
io,
|
||||
dirty_pages: Arc::new(RwLock::new(HashSet::with_hasher(
|
||||
hash::BuildHasherDefault::new(),
|
||||
))),
|
||||
dirty_pages: Arc::new(RwLock::new(BTreeSet::new())),
|
||||
subjournal: RwLock::new(None),
|
||||
savepoints: Arc::new(RwLock::new(Vec::new())),
|
||||
commit_info: RwLock::new(CommitInfo {
|
||||
|
||||
@@ -66,7 +66,8 @@ use crate::{
|
||||
bail_corrupt_error, turso_assert, CompletionError, File, IOContext, Result, WalFileShared,
|
||||
};
|
||||
use parking_lot::RwLock;
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use rustc_hash::FxHashMap;
|
||||
use std::collections::BTreeMap;
|
||||
use std::mem::MaybeUninit;
|
||||
use std::pin::Pin;
|
||||
use std::sync::atomic::{AtomicBool, AtomicU32, AtomicU64, AtomicUsize, Ordering};
|
||||
@@ -1646,7 +1647,7 @@ pub fn build_shared_wal(
|
||||
max_frame: AtomicU64::new(0),
|
||||
nbackfills: AtomicU64::new(0),
|
||||
transaction_count: AtomicU64::new(0),
|
||||
frame_cache: Arc::new(SpinLock::new(HashMap::new())),
|
||||
frame_cache: Arc::new(SpinLock::new(FxHashMap::default())),
|
||||
last_checksum: (0, 0),
|
||||
file: Some(file.clone()),
|
||||
read_locks,
|
||||
@@ -1711,7 +1712,7 @@ struct StreamingState {
|
||||
frame_idx: u64,
|
||||
cumulative_checksum: (u32, u32),
|
||||
last_valid_frame: u64,
|
||||
pending_frames: HashMap<u64, Vec<u64>>,
|
||||
pending_frames: FxHashMap<u64, Vec<u64>>,
|
||||
page_size: usize,
|
||||
use_native_endian: bool,
|
||||
header_valid: bool,
|
||||
@@ -1736,7 +1737,7 @@ impl StreamingWalReader {
|
||||
frame_idx: 1,
|
||||
cumulative_checksum: (0, 0),
|
||||
last_valid_frame: 0,
|
||||
pending_frames: HashMap::new(),
|
||||
pending_frames: FxHashMap::default(),
|
||||
page_size: 0,
|
||||
use_native_endian: false,
|
||||
header_valid: false,
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
#![allow(clippy::not_unsafe_ptr_arg_deref)]
|
||||
|
||||
use rustc_hash::{FxHashMap, FxHashSet};
|
||||
use std::array;
|
||||
use std::borrow::Cow;
|
||||
use std::collections::{BTreeMap, HashMap, HashSet};
|
||||
use std::collections::BTreeMap;
|
||||
use strum::EnumString;
|
||||
use tracing::{instrument, Level};
|
||||
|
||||
@@ -679,7 +680,7 @@ pub struct WalFileShared {
|
||||
// One difference between SQLite and limbo is that we will never support multi process, meaning
|
||||
// we don't need WAL's index file. So we can do stuff like this without shared memory.
|
||||
// TODO: this will need refactoring because this is incredible memory inefficient.
|
||||
pub frame_cache: Arc<SpinLock<HashMap<u64, Vec<u64>>>>,
|
||||
pub frame_cache: Arc<SpinLock<FxHashMap<u64, Vec<u64>>>>,
|
||||
pub last_checksum: (u32, u32), // Check of last frame in WAL, this is a cumulative checksum over all frames in the WAL
|
||||
pub file: Option<Arc<dyn File>>,
|
||||
/// Read locks advertise the maximum WAL frame a reader may access.
|
||||
@@ -1394,7 +1395,7 @@ impl Wal for WalFile {
|
||||
let frame_count = self.get_max_frame();
|
||||
let page_size = self.page_size();
|
||||
let mut frame = vec![0u8; page_size as usize + WAL_FRAME_HEADER_SIZE];
|
||||
let mut seen = HashSet::new();
|
||||
let mut seen = FxHashSet::default();
|
||||
turso_assert!(
|
||||
frame_count >= frame_watermark,
|
||||
"frame_count must be not less than frame_watermark: {} vs {}",
|
||||
@@ -2353,7 +2354,7 @@ impl WalFileShared {
|
||||
max_frame: AtomicU64::new(0),
|
||||
nbackfills: AtomicU64::new(0),
|
||||
transaction_count: AtomicU64::new(0),
|
||||
frame_cache: Arc::new(SpinLock::new(HashMap::new())),
|
||||
frame_cache: Arc::new(SpinLock::new(FxHashMap::default())),
|
||||
last_checksum: (0, 0),
|
||||
file: None,
|
||||
read_locks,
|
||||
@@ -2398,7 +2399,7 @@ impl WalFileShared {
|
||||
max_frame: AtomicU64::new(0),
|
||||
nbackfills: AtomicU64::new(0),
|
||||
transaction_count: AtomicU64::new(0),
|
||||
frame_cache: Arc::new(SpinLock::new(HashMap::new())),
|
||||
frame_cache: Arc::new(SpinLock::new(FxHashMap::default())),
|
||||
last_checksum: (0, 0),
|
||||
file: Some(file),
|
||||
read_locks,
|
||||
|
||||
Reference in New Issue
Block a user