wip: update the Node to use redb tables

This commit is contained in:
nazeh
2023-12-19 16:54:47 +03:00
parent 44ee478d38
commit c88c085dec
3 changed files with 348 additions and 95 deletions

213
Cargo.lock generated
View File

@@ -42,6 +42,18 @@ dependencies = [
"rand",
]
[[package]]
name = "bitflags"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "bitflags"
version = "2.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07"
[[package]]
name = "blake2"
version = "0.10.6"
@@ -136,6 +148,22 @@ version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
[[package]]
name = "errno"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245"
dependencies = [
"libc",
"windows-sys 0.52.0",
]
[[package]]
name = "fastrand"
version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5"
[[package]]
name = "generic-array"
version = "0.14.7"
@@ -176,12 +204,19 @@ version = "0.2.150"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c"
[[package]]
name = "linux-raw-sys"
version = "0.4.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4cd1a83af159aa67994778be9070f0ae1bd732942279cabb14f86f986a21456"
[[package]]
name = "mast"
version = "0.1.0"
dependencies = [
"blake3",
"bytes",
"redb",
"tempfile",
"varu64",
]
@@ -250,6 +285,37 @@ dependencies = [
"getrandom",
]
[[package]]
name = "redb"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08837f9a129bde83c51953b8c96cbb3422b940166b730caa954836106eb1dfd2"
dependencies = [
"libc",
]
[[package]]
name = "redox_syscall"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa"
dependencies = [
"bitflags 1.3.2",
]
[[package]]
name = "rustix"
version = "0.38.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72e572a5e8ca657d7366229cdde4bd14c4eb5499a9573d4d366fe1b599daa316"
dependencies = [
"bitflags 2.4.1",
"errno",
"libc",
"linux-raw-sys",
"windows-sys 0.52.0",
]
[[package]]
name = "snafu"
version = "0.6.10"
@@ -299,6 +365,19 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "tempfile"
version = "3.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ef1adac450ad7f4b3c28589471ade84f25f731a7a0fe30d71dfa9f60fd808e5"
dependencies = [
"cfg-if",
"fastrand",
"redox_syscall",
"rustix",
"windows-sys 0.48.0",
]
[[package]]
name = "thiserror"
version = "1.0.50"
@@ -352,6 +431,138 @@ version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "windows-sys"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
dependencies = [
"windows-targets 0.48.5",
]
[[package]]
name = "windows-sys"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
dependencies = [
"windows-targets 0.52.0",
]
[[package]]
name = "windows-targets"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
dependencies = [
"windows_aarch64_gnullvm 0.48.5",
"windows_aarch64_msvc 0.48.5",
"windows_i686_gnu 0.48.5",
"windows_i686_msvc 0.48.5",
"windows_x86_64_gnu 0.48.5",
"windows_x86_64_gnullvm 0.48.5",
"windows_x86_64_msvc 0.48.5",
]
[[package]]
name = "windows-targets"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd"
dependencies = [
"windows_aarch64_gnullvm 0.52.0",
"windows_aarch64_msvc 0.52.0",
"windows_i686_gnu 0.52.0",
"windows_i686_msvc 0.52.0",
"windows_x86_64_gnu 0.52.0",
"windows_x86_64_gnullvm 0.52.0",
"windows_x86_64_msvc 0.52.0",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea"
[[package]]
name = "windows_aarch64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef"
[[package]]
name = "windows_i686_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
[[package]]
name = "windows_i686_gnu"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313"
[[package]]
name = "windows_i686_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
[[package]]
name = "windows_i686_msvc"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a"
[[package]]
name = "windows_x86_64_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e"
[[package]]
name = "windows_x86_64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04"
[[package]]
name = "z32"
version = "1.0.2"

View File

@@ -7,5 +7,8 @@ edition = "2021"
[dependencies]
blake3 = "1.5.0"
bytes = "1.5.0"
redb = "1.4.0"
varu64 = "0.7.0"
[dev-dependencies]
tempfile = "3.8.1"

View File

@@ -1,26 +1,23 @@
use crate::storage::memory::MemoryStorage;
use redb::{Database, ReadableTable, Table, TableDefinition, WriteTransaction};
use crate::{Hash, Hasher, EMPTY_HASH};
// TODO: make sure that the hash is always in sync.
// TODO: keep track of ref count and sync status in the storage, without adding it to the in memory
// representation.
// TODO: Are we creating too many hashers?
// TODO: are we calculating the rank and hash too often?
#[derive(Debug, Clone)]
/// In memory reprsentation of treap node.
pub(crate) struct Node {
/// The hash of this node, uniquely identifying its key, value, and children.
hash: Hash,
// Key value
key: Box<[u8]>,
value: Hash,
// Rank
rank: Hash,
value: Box<[u8]>,
// Children
left: Option<Hash>,
right: Option<Hash>,
// Metadata that should not be encoded.
ref_count: u64,
}
#[derive(Debug)]
@@ -30,26 +27,42 @@ pub(crate) enum Branch {
}
impl Node {
pub fn new(key: &[u8], value: Hash) -> Self {
let mut hasher = Hasher::new();
hasher.update(key);
pub fn from_bytes(bytes: &[u8]) -> Self {
let (size, remaining) = varu64::decode(bytes).unwrap();
let key = remaining[..size as usize].to_vec().into_boxed_slice();
let rank = hasher.finalize();
let (size, remaining) = varu64::decode(&remaining[size as usize..]).unwrap();
let value = remaining[..size as usize].to_vec().into_boxed_slice();
let mut node = Self {
hash: EMPTY_HASH,
let left = remaining[size as usize..((size as usize) + 32)]
.try_into()
.map_or(None, |h| Some(Hash::from_bytes(h)));
key: key.into(),
let right = remaining[(size as usize) + 32..((size as usize) + 32 + 32)]
.try_into()
.map_or(None, |h| Some(Hash::from_bytes(h)));
Node {
key,
value,
left: None,
right: None,
rank,
};
left,
right,
node
ref_count: 0,
}
}
// TODO: add from bytes and remember to update its hash.
pub fn new(key: &[u8], value: &[u8]) -> Self {
Self {
key: key.into(),
value: value.into(),
left: None,
right: None,
ref_count: 0,
}
}
// TODO: remember to update its hash.
// === Getters ===
@@ -57,19 +70,10 @@ impl Node {
&self.key
}
pub(crate) fn value(&self) -> &Hash {
pub(crate) fn value(&self) -> &[u8] {
&self.value
}
pub(crate) fn rank(&self) -> &Hash {
&self.rank
}
/// Returns the hash of the node.
pub(crate) fn hash(&self) -> &Hash {
&self.hash
}
pub(crate) fn left(&self) -> &Option<Hash> {
&self.left
}
@@ -78,68 +82,103 @@ impl Node {
&self.right
}
// === Public Methods ===
pub(crate) fn rank(&self) -> Hash {
hash(&self.key)
}
/// Returns the hash of the node.
pub(crate) fn hash(&self) -> Hash {
hash(&self.canonical_encode())
}
pub(crate) fn set_child(
&mut self,
branch: &Branch,
new_child: Option<Hash>,
table: &mut Table<&[u8], (u64, &[u8])>,
) {
let old_child = match branch {
Branch::Left => self.left,
Branch::Right => self.right,
};
// increment old child's ref count.
decrement_ref_count(old_child, table);
// increment new child's ref count.
increment_ref_count(new_child, table);
// set new child
match branch {
Branch::Left => self.left = new_child,
Branch::Right => self.right = new_child,
}
let encoded = self.canonical_encode();
table.insert(
hash(&encoded).as_bytes().as_slice(),
(self.ref_count, encoded.as_slice()),
);
}
// === Private Methods ===
pub(crate) fn update_hash(&mut self) -> Hash {
let mut hasher = Hasher::new();
fn canonical_encode(&self) -> Vec<u8> {
let mut bytes = vec![];
hasher.update(&self.key);
hasher.update(self.value.as_bytes());
hasher.update(self.left.unwrap_or(EMPTY_HASH).as_bytes());
hasher.update(self.right.unwrap_or(EMPTY_HASH).as_bytes());
encode(&self.key, &mut bytes);
encode(&self.value, &mut bytes);
encode(
&self.left.map(|h| h.as_bytes().to_vec()).unwrap_or_default(),
&mut bytes,
);
encode(
&self.left.map(|h| h.as_bytes().to_vec()).unwrap_or_default(),
&mut bytes,
);
self.hash = hasher.finalize();
self.hash
}
/// When inserting a node, once we find its instertion point,
/// we give one of its children (depending on the direction),
/// to the current node at the insertion position, and then we
/// replace that child with the updated current node.
pub(crate) fn insertion_swap(
&mut self,
direction: Branch,
current_node: &mut Node,
storage: &mut MemoryStorage,
) {
match direction {
Branch::Left => current_node.set_child(&Branch::Left, *self.right()),
Branch::Right => current_node.set_child(&Branch::Left, *self.left()),
}
current_node.update(storage);
match direction {
Branch::Left => self.left = Some(*current_node.hash()),
Branch::Right => self.right = Some(*current_node.hash()),
}
self.update(storage);
}
pub(crate) fn set_child(&mut self, branch: &Branch, hash: Option<Hash>) {
// decrement old child's ref count.
// set children
match branch {
Branch::Left => self.left = hash,
Branch::Right => self.right = hash,
}
// TODO: increment node's ref count.
}
pub(crate) fn update(&mut self, storage: &mut MemoryStorage) -> &Hash {
// TODO: save new hash to storage.
// TODO: increment ref count.
// TODO: decrement ref count of old hash!
// let old_hash = self.hash();
self.update_hash();
storage.insert_node(self);
self.hash()
bytes
}
}
fn encode(bytes: &[u8], out: &mut Vec<u8>) {
varu64::encode(bytes.len() as u64, out);
out.extend_from_slice(bytes);
}
fn hash(bytes: &[u8]) -> Hash {
let mut hasher = Hasher::new();
hasher.update(bytes);
hasher.finalize()
}
fn increment_ref_count(child: Option<Hash>, table: &mut Table<&[u8], (u64, &[u8])>) {
update_ref_count(child, 1, table);
}
fn decrement_ref_count(child: Option<Hash>, table: &mut Table<&[u8], (u64, &[u8])>) {
update_ref_count(child, -1, table);
}
fn update_ref_count(child: Option<Hash>, ref_diff: i8, table: &mut Table<&[u8], (u64, &[u8])>) {
if let Some(hash) = child {
let mut existing = table
.get(hash.as_bytes().as_slice())
.unwrap()
.expect("Child shouldn't be messing!");
let (ref_count, bytes) = {
let (r, v) = existing.value();
(r + 1, v.to_vec())
};
drop(existing);
table.insert(
hash.as_bytes().as_slice(),
(ref_count + ref_diff as u64, bytes.as_slice()),
);
}
}