feat(homeserver): optimize chunk size for LMDB details

This commit is contained in:
nazeh
2024-10-16 17:52:10 +03:00
parent 8dde7b17da
commit fd88986a32
5 changed files with 52 additions and 4 deletions

1
Cargo.lock generated
View File

@@ -1725,6 +1725,7 @@ dependencies = [
"futures-util",
"heed",
"hex",
"libc",
"pkarr",
"postcard",
"pubky-common",

View File

@@ -15,6 +15,7 @@ flume = "0.11.0"
futures-util = "0.3.30"
heed = "0.20.3"
hex = "0.4.3"
libc = "0.2.159"
pkarr = { workspace = true }
postcard = { version = "1.0.8", features = ["alloc"] }
pubky-common = { version = "0.1.0", path = "../pubky-common" }

View File

@@ -15,6 +15,7 @@ pub struct DB {
pub(crate) tables: Tables,
pub(crate) config: Config,
pub(crate) buffers_dir: PathBuf,
pub(crate) max_chunk_size: usize,
}
impl DB {
@@ -39,8 +40,24 @@ impl DB {
tables,
config,
buffers_dir,
max_chunk_size: max_chunk_size(),
};
Ok(db)
}
}
/// calculate optimal chunk size:
/// - https://lmdb.readthedocs.io/en/release/#storage-efficiency-limits
/// - https://github.com/lmdbjava/benchmarks/blob/master/results/20160710/README.md#test-2-determine-24816-kb-byte-values
fn max_chunk_size() -> usize {
let page_size = unsafe { libc::sysconf(libc::_SC_PAGESIZE) as usize };
// - 16 bytes Header per page (LMDB)
// - Each page has to contain 2 records
// - 8 bytes per record (LMDB)
// - 12 bytes key:
// - timestamp : 8 bytes
// - chunk index: 4 bytes
((page_size - 16) / 2) - 8 - 12
}

View File

@@ -350,9 +350,6 @@ impl<'db> EntryWriter<'db> {
pub fn commit(&self) -> anyhow::Result<Entry> {
let hash = self.hasher.finalize();
// TODO: get the chunk size from the OS's page size
let chunk_size: usize = 2000;
let mut buffer = File::open(&self.buffer_path)?;
let mut wtxn = self.db.env.write_txn()?;
@@ -363,7 +360,7 @@ impl<'db> EntryWriter<'db> {
let mut chunk_index: u32 = 0;
loop {
let mut chunk = vec![0_u8; chunk_size];
let mut chunk = vec![0_u8; self.db.max_chunk_size];
let bytes_read = buffer.read(&mut chunk)?;

View File

@@ -98,6 +98,7 @@ mod tests {
use crate::*;
use bytes::Bytes;
use pkarr::{mainline::Testnet, Keypair};
use pubky_homeserver::Homeserver;
use reqwest::{Method, StatusCode};
@@ -819,4 +820,35 @@ mod tests {
]
)
}
#[tokio::test]
async fn stream() {
// TODO: test better streaming API
let testnet = Testnet::new(10);
let server = Homeserver::start_test(&testnet).await.unwrap();
let client = PubkyClient::test(&testnet);
let keypair = Keypair::random();
client.signup(&keypair, &server.public_key()).await.unwrap();
let url = format!("pubky://{}/pub/foo.txt", keypair.public_key());
let url = url.as_str();
let bytes = Bytes::from(vec![0; 1024 * 1024]);
client.put(url, &bytes).await.unwrap();
let response = client.get(url).await.unwrap().unwrap();
assert_eq!(response, bytes);
client.delete(url).await.unwrap();
let response = client.get(url).await.unwrap();
assert_eq!(response, None);
}
}