diff --git a/Cargo.lock b/Cargo.lock index 1eab3be..68f3ab5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1725,6 +1725,7 @@ dependencies = [ "futures-util", "heed", "hex", + "libc", "pkarr", "postcard", "pubky-common", diff --git a/pubky-homeserver/Cargo.toml b/pubky-homeserver/Cargo.toml index ea55cf2..bb1a908 100644 --- a/pubky-homeserver/Cargo.toml +++ b/pubky-homeserver/Cargo.toml @@ -15,6 +15,7 @@ flume = "0.11.0" futures-util = "0.3.30" heed = "0.20.3" hex = "0.4.3" +libc = "0.2.159" pkarr = { workspace = true } postcard = { version = "1.0.8", features = ["alloc"] } pubky-common = { version = "0.1.0", path = "../pubky-common" } diff --git a/pubky-homeserver/src/database.rs b/pubky-homeserver/src/database.rs index 93d6311..10fc3b4 100644 --- a/pubky-homeserver/src/database.rs +++ b/pubky-homeserver/src/database.rs @@ -15,6 +15,7 @@ pub struct DB { pub(crate) tables: Tables, pub(crate) config: Config, pub(crate) buffers_dir: PathBuf, + pub(crate) max_chunk_size: usize, } impl DB { @@ -39,8 +40,24 @@ impl DB { tables, config, buffers_dir, + max_chunk_size: max_chunk_size(), }; Ok(db) } } + +/// calculate optimal chunk size: +/// - https://lmdb.readthedocs.io/en/release/#storage-efficiency-limits +/// - https://github.com/lmdbjava/benchmarks/blob/master/results/20160710/README.md#test-2-determine-24816-kb-byte-values +fn max_chunk_size() -> usize { + let page_size = unsafe { libc::sysconf(libc::_SC_PAGESIZE) as usize }; + + // - 16 bytes Header per page (LMDB) + // - Each page has to contain 2 records + // - 8 bytes per record (LMDB) + // - 12 bytes key: + // - timestamp : 8 bytes + // - chunk index: 4 bytes + ((page_size - 16) / 2) - 8 - 12 +} diff --git a/pubky-homeserver/src/database/tables/entries.rs b/pubky-homeserver/src/database/tables/entries.rs index 52ec778..30b1043 100644 --- a/pubky-homeserver/src/database/tables/entries.rs +++ b/pubky-homeserver/src/database/tables/entries.rs @@ -350,9 +350,6 @@ impl<'db> EntryWriter<'db> { pub fn commit(&self) -> anyhow::Result { let hash = self.hasher.finalize(); - // TODO: get the chunk size from the OS's page size - let chunk_size: usize = 2000; - let mut buffer = File::open(&self.buffer_path)?; let mut wtxn = self.db.env.write_txn()?; @@ -363,7 +360,7 @@ impl<'db> EntryWriter<'db> { let mut chunk_index: u32 = 0; loop { - let mut chunk = vec![0_u8; chunk_size]; + let mut chunk = vec![0_u8; self.db.max_chunk_size]; let bytes_read = buffer.read(&mut chunk)?; diff --git a/pubky/src/shared/public.rs b/pubky/src/shared/public.rs index da5c180..81118f7 100644 --- a/pubky/src/shared/public.rs +++ b/pubky/src/shared/public.rs @@ -98,6 +98,7 @@ mod tests { use crate::*; + use bytes::Bytes; use pkarr::{mainline::Testnet, Keypair}; use pubky_homeserver::Homeserver; use reqwest::{Method, StatusCode}; @@ -819,4 +820,35 @@ mod tests { ] ) } + + #[tokio::test] + async fn stream() { + // TODO: test better streaming API + + let testnet = Testnet::new(10); + let server = Homeserver::start_test(&testnet).await.unwrap(); + + let client = PubkyClient::test(&testnet); + + let keypair = Keypair::random(); + + client.signup(&keypair, &server.public_key()).await.unwrap(); + + let url = format!("pubky://{}/pub/foo.txt", keypair.public_key()); + let url = url.as_str(); + + let bytes = Bytes::from(vec![0; 1024 * 1024]); + + client.put(url, &bytes).await.unwrap(); + + let response = client.get(url).await.unwrap().unwrap(); + + assert_eq!(response, bytes); + + client.delete(url).await.unwrap(); + + let response = client.get(url).await.unwrap(); + + assert_eq!(response, None); + } }