From c88c085dec01e80083f25c2a1f3798753abaa371 Mon Sep 17 00:00:00 2001 From: nazeh Date: Tue, 19 Dec 2023 16:54:47 +0300 Subject: [PATCH] wip: update the Node to use redb tables --- Cargo.lock | 213 +++++++++++++++++++++++++++++++++++++++++++- mast/Cargo.toml | 5 +- mast/src/node.rs | 225 +++++++++++++++++++++++++++-------------------- 3 files changed, 348 insertions(+), 95 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bbc8e57..61e1c4a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -42,6 +42,18 @@ dependencies = [ "rand", ] +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" + [[package]] name = "blake2" version = "0.10.6" @@ -136,6 +148,22 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" +[[package]] +name = "errno" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "fastrand" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" + [[package]] name = "generic-array" version = "0.14.7" @@ -176,12 +204,19 @@ version = "0.2.150" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c" +[[package]] +name = "linux-raw-sys" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4cd1a83af159aa67994778be9070f0ae1bd732942279cabb14f86f986a21456" + [[package]] name = "mast" version = "0.1.0" dependencies = [ "blake3", - "bytes", + "redb", + "tempfile", "varu64", ] @@ -250,6 +285,37 @@ dependencies = [ "getrandom", ] +[[package]] +name = "redb" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08837f9a129bde83c51953b8c96cbb3422b940166b730caa954836106eb1dfd2" +dependencies = [ + "libc", +] + +[[package]] +name = "redox_syscall" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +dependencies = [ + "bitflags 1.3.2", +] + +[[package]] +name = "rustix" +version = "0.38.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72e572a5e8ca657d7366229cdde4bd14c4eb5499a9573d4d366fe1b599daa316" +dependencies = [ + "bitflags 2.4.1", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.52.0", +] + [[package]] name = "snafu" version = "0.6.10" @@ -299,6 +365,19 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "tempfile" +version = "3.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ef1adac450ad7f4b3c28589471ade84f25f731a7a0fe30d71dfa9f60fd808e5" +dependencies = [ + "cfg-if", + "fastrand", + "redox_syscall", + "rustix", + "windows-sys 0.48.0", +] + [[package]] name = "thiserror" version = "1.0.50" @@ -352,6 +431,138 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.0", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" +dependencies = [ + "windows_aarch64_gnullvm 0.52.0", + "windows_aarch64_msvc 0.52.0", + "windows_i686_gnu 0.52.0", + "windows_i686_msvc 0.52.0", + "windows_x86_64_gnu 0.52.0", + "windows_x86_64_gnullvm 0.52.0", + "windows_x86_64_msvc 0.52.0", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" + [[package]] name = "z32" version = "1.0.2" diff --git a/mast/Cargo.toml b/mast/Cargo.toml index 1ab6f87..06ef672 100644 --- a/mast/Cargo.toml +++ b/mast/Cargo.toml @@ -7,5 +7,8 @@ edition = "2021" [dependencies] blake3 = "1.5.0" -bytes = "1.5.0" +redb = "1.4.0" varu64 = "0.7.0" + +[dev-dependencies] +tempfile = "3.8.1" diff --git a/mast/src/node.rs b/mast/src/node.rs index f1098e1..828624b 100644 --- a/mast/src/node.rs +++ b/mast/src/node.rs @@ -1,26 +1,23 @@ -use crate::storage::memory::MemoryStorage; +use redb::{Database, ReadableTable, Table, TableDefinition, WriteTransaction}; + use crate::{Hash, Hasher, EMPTY_HASH}; -// TODO: make sure that the hash is always in sync. -// TODO: keep track of ref count and sync status in the storage, without adding it to the in memory -// representation. +// TODO: Are we creating too many hashers? +// TODO: are we calculating the rank and hash too often? #[derive(Debug, Clone)] /// In memory reprsentation of treap node. pub(crate) struct Node { - /// The hash of this node, uniquely identifying its key, value, and children. - hash: Hash, - // Key value key: Box<[u8]>, - value: Hash, - - // Rank - rank: Hash, + value: Box<[u8]>, // Children left: Option, right: Option, + + // Metadata that should not be encoded. + ref_count: u64, } #[derive(Debug)] @@ -30,26 +27,42 @@ pub(crate) enum Branch { } impl Node { - pub fn new(key: &[u8], value: Hash) -> Self { - let mut hasher = Hasher::new(); - hasher.update(key); + pub fn from_bytes(bytes: &[u8]) -> Self { + let (size, remaining) = varu64::decode(bytes).unwrap(); + let key = remaining[..size as usize].to_vec().into_boxed_slice(); - let rank = hasher.finalize(); + let (size, remaining) = varu64::decode(&remaining[size as usize..]).unwrap(); + let value = remaining[..size as usize].to_vec().into_boxed_slice(); - let mut node = Self { - hash: EMPTY_HASH, + let left = remaining[size as usize..((size as usize) + 32)] + .try_into() + .map_or(None, |h| Some(Hash::from_bytes(h))); - key: key.into(), + let right = remaining[(size as usize) + 32..((size as usize) + 32 + 32)] + .try_into() + .map_or(None, |h| Some(Hash::from_bytes(h))); + + Node { + key, value, - left: None, - right: None, - rank, - }; + left, + right, - node + ref_count: 0, + } } - // TODO: add from bytes and remember to update its hash. + pub fn new(key: &[u8], value: &[u8]) -> Self { + Self { + key: key.into(), + value: value.into(), + left: None, + right: None, + + ref_count: 0, + } + } + // TODO: remember to update its hash. // === Getters === @@ -57,19 +70,10 @@ impl Node { &self.key } - pub(crate) fn value(&self) -> &Hash { + pub(crate) fn value(&self) -> &[u8] { &self.value } - pub(crate) fn rank(&self) -> &Hash { - &self.rank - } - - /// Returns the hash of the node. - pub(crate) fn hash(&self) -> &Hash { - &self.hash - } - pub(crate) fn left(&self) -> &Option { &self.left } @@ -78,68 +82,103 @@ impl Node { &self.right } + // === Public Methods === + + pub(crate) fn rank(&self) -> Hash { + hash(&self.key) + } + + /// Returns the hash of the node. + pub(crate) fn hash(&self) -> Hash { + hash(&self.canonical_encode()) + } + + pub(crate) fn set_child( + &mut self, + branch: &Branch, + new_child: Option, + table: &mut Table<&[u8], (u64, &[u8])>, + ) { + let old_child = match branch { + Branch::Left => self.left, + Branch::Right => self.right, + }; + + // increment old child's ref count. + decrement_ref_count(old_child, table); + + // increment new child's ref count. + increment_ref_count(new_child, table); + + // set new child + match branch { + Branch::Left => self.left = new_child, + Branch::Right => self.right = new_child, + } + + let encoded = self.canonical_encode(); + table.insert( + hash(&encoded).as_bytes().as_slice(), + (self.ref_count, encoded.as_slice()), + ); + } + // === Private Methods === - pub(crate) fn update_hash(&mut self) -> Hash { - let mut hasher = Hasher::new(); + fn canonical_encode(&self) -> Vec { + let mut bytes = vec![]; - hasher.update(&self.key); - hasher.update(self.value.as_bytes()); - hasher.update(self.left.unwrap_or(EMPTY_HASH).as_bytes()); - hasher.update(self.right.unwrap_or(EMPTY_HASH).as_bytes()); + encode(&self.key, &mut bytes); + encode(&self.value, &mut bytes); + encode( + &self.left.map(|h| h.as_bytes().to_vec()).unwrap_or_default(), + &mut bytes, + ); + encode( + &self.left.map(|h| h.as_bytes().to_vec()).unwrap_or_default(), + &mut bytes, + ); - self.hash = hasher.finalize(); - self.hash - } - - /// When inserting a node, once we find its instertion point, - /// we give one of its children (depending on the direction), - /// to the current node at the insertion position, and then we - /// replace that child with the updated current node. - pub(crate) fn insertion_swap( - &mut self, - direction: Branch, - current_node: &mut Node, - storage: &mut MemoryStorage, - ) { - match direction { - Branch::Left => current_node.set_child(&Branch::Left, *self.right()), - Branch::Right => current_node.set_child(&Branch::Left, *self.left()), - } - - current_node.update(storage); - - match direction { - Branch::Left => self.left = Some(*current_node.hash()), - Branch::Right => self.right = Some(*current_node.hash()), - } - - self.update(storage); - } - - pub(crate) fn set_child(&mut self, branch: &Branch, hash: Option) { - // decrement old child's ref count. - - // set children - match branch { - Branch::Left => self.left = hash, - Branch::Right => self.right = hash, - } - - // TODO: increment node's ref count. - } - - pub(crate) fn update(&mut self, storage: &mut MemoryStorage) -> &Hash { - // TODO: save new hash to storage. - // TODO: increment ref count. - // TODO: decrement ref count of old hash! - - // let old_hash = self.hash(); - - self.update_hash(); - - storage.insert_node(self); - - self.hash() + bytes + } +} + +fn encode(bytes: &[u8], out: &mut Vec) { + varu64::encode(bytes.len() as u64, out); + out.extend_from_slice(bytes); +} + +fn hash(bytes: &[u8]) -> Hash { + let mut hasher = Hasher::new(); + hasher.update(bytes); + + hasher.finalize() +} + +fn increment_ref_count(child: Option, table: &mut Table<&[u8], (u64, &[u8])>) { + update_ref_count(child, 1, table); +} + +fn decrement_ref_count(child: Option, table: &mut Table<&[u8], (u64, &[u8])>) { + update_ref_count(child, -1, table); +} + +fn update_ref_count(child: Option, ref_diff: i8, table: &mut Table<&[u8], (u64, &[u8])>) { + if let Some(hash) = child { + let mut existing = table + .get(hash.as_bytes().as_slice()) + .unwrap() + .expect("Child shouldn't be messing!"); + + let (ref_count, bytes) = { + let (r, v) = existing.value(); + (r + 1, v.to_vec()) + }; + drop(existing); + + table.insert( + hash.as_bytes().as_slice(), + (ref_count + ref_diff as u64, bytes.as_slice()), + ); } }