From 582d97d242080d7c655a14dcf0b1fb35809c9c06 Mon Sep 17 00:00:00 2001 From: nazeh Date: Sun, 17 Dec 2023 18:57:52 +0300 Subject: [PATCH] wip: snapshot of the iterative approach with comments --- mast/src/lib.rs | 26 ++- mast/src/mermaid.rs | 57 +++--- mast/src/node.rs | 144 +++++++++++++++ mast/src/storage/memory.rs | 16 +- mast/src/treap.rs | 356 +++++++++++++++++++++++-------------- 5 files changed, 426 insertions(+), 173 deletions(-) create mode 100644 mast/src/node.rs diff --git a/mast/src/lib.rs b/mast/src/lib.rs index ad5e17e..88126f5 100644 --- a/mast/src/lib.rs +++ b/mast/src/lib.rs @@ -1,23 +1,37 @@ #![allow(unused)] mod mermaid; +mod node; mod storage; -mod treap; +pub mod treap; + +pub(crate) use blake3::{Hash, Hasher}; + +pub(crate) use node::Node; +pub(crate) use treap::Treap; + +// TODO: If we are going to use Iroh Bytes, might as well ues this from Iroh basics. +/// The hash for the empty byte range (`b""`). +pub(crate) const EMPTY_HASH: Hash = Hash::from_bytes([ + 175, 19, 73, 185, 245, 249, 161, 166, 160, 64, 77, 234, 54, 220, 201, 73, 155, 203, 37, 201, + 173, 193, 18, 183, 204, 154, 147, 202, 228, 31, 50, 98, +]); #[cfg(test)] mod test { - use super::mermaid; + use super::storage::memory::MemoryStorage; use super::treap::Treap; #[test] fn basic() { - let mut tree = Treap::default(); + let mut storage = MemoryStorage::new(); + let mut tree = Treap::new(&mut storage); - for i in 0..4 { - tree.insert(&[i], b"0"); + for key in ["A", "C", "D", "F", "G", "H", "M", "P", "X", "Y"].iter() { + tree.insert(key.as_bytes(), b"0"); } dbg!(&tree); - // println!("{}", tree.as_mermaid_graph()) + println!("{}", tree.as_mermaid_graph()) } } diff --git a/mast/src/mermaid.rs b/mast/src/mermaid.rs index b74cde0..f3b973a 100644 --- a/mast/src/mermaid.rs +++ b/mast/src/mermaid.rs @@ -1,40 +1,45 @@ -use crate::treap::{Node, Treap}; +#[cfg(test)] +mod test { + use crate::{Node, Treap}; -impl Treap { - pub fn as_mermaid_graph(&self) -> String { - let mut graph = String::new(); + impl<'a> Treap<'a> { + pub fn as_mermaid_graph(&self) -> String { + let mut graph = String::new(); - graph.push_str("graph TD;\n"); + graph.push_str("graph TD;\n"); - if let Some(root) = self.get_node(self.root) { - self.build_graph_string(&root, &mut graph); + if let Some(root) = &self.root { + self.build_graph_string(&root, &mut graph); + } + + graph } - graph - } + fn build_graph_string(&self, node: &Node, graph: &mut String) { + let key = bytes_to_string(node.key()); + let node_label = format!("{}({})", key, key); - fn build_graph_string(&self, node: &Node, graph: &mut String) { - let key = bytes_to_string(&node.key); - let node_label = format!("{}({}:)", key, key); + graph.push_str(&format!(" {};\n", node_label)); - if let Some(left) = self.get_node(node.left) { - let key = bytes_to_string(&left.key); - let left_label = format!("{}({})", key, key); + if let Some(child) = self.get_node(node.left()) { + let key = bytes_to_string(child.key()); + let child_label = format!("{}({})", key, key); - graph.push_str(&format!(" {} --> {};\n", node_label, left_label)); - self.build_graph_string(&left, graph); - } + graph.push_str(&format!(" {} --> {};\n", node_label, child_label)); + self.build_graph_string(&child, graph); + } - if let Some(right) = self.get_node(node.right) { - let key = bytes_to_string(&right.key); - let right_label = format!("{}({})", key, key); + if let Some(child) = self.get_node(node.right()) { + let key = bytes_to_string(child.key()); + let child_label = format!("{}({})", key, key); - graph.push_str(&format!(" {} --> {};\n", node_label, right_label)); - self.build_graph_string(&right, graph); + graph.push_str(&format!(" {} --> {};\n", node_label, child_label)); + self.build_graph_string(&child, graph); + } } } -} -fn bytes_to_string(bytes: &[u8]) -> String { - bytes.iter().map(|&b| b.to_string()).collect() + fn bytes_to_string(byte: &[u8]) -> String { + String::from_utf8(byte.to_vec()).expect("Invalid utf8 key in test with mermaig graph") + } } diff --git a/mast/src/node.rs b/mast/src/node.rs new file mode 100644 index 0000000..b5c1cdc --- /dev/null +++ b/mast/src/node.rs @@ -0,0 +1,144 @@ +use crate::storage::memory::MemoryStorage; +use crate::{Hash, Hasher, EMPTY_HASH}; + +// TODO: make sure that the hash is always in sync. +// TODO: keep track of ref count and sync status in the storage, without adding it to the in memory +// representation. + +#[derive(Debug, Clone)] +/// In memory reprsentation of treap node. +pub(crate) struct Node { + /// The hash of this node, uniquely identifying its key, value, and children. + hash: Hash, + + // Key value + key: Box<[u8]>, + value: Hash, + + // Rank + rank: Hash, + + // Children + left: Option, + right: Option, +} + +pub(crate) enum Child { + Left, + Right, +} + +impl Node { + // TODO: Convert to Result, since it shouldn't be missing! + pub(crate) fn open(storage: &MemoryStorage, hash: Hash) -> Option { + storage.get_node(&hash) + } + + pub fn new(key: &[u8], value: Hash) -> Self { + let mut hasher = Hasher::new(); + hasher.update(key); + + let rank = hasher.finalize(); + + let mut node = Self { + hash: EMPTY_HASH, + + key: key.into(), + value, + left: None, + right: None, + rank, + }; + + node.update_hash(); + + node + } + + // === Getters === + + pub(crate) fn key(&self) -> &[u8] { + &self.key + } + + pub(crate) fn value(&self) -> &Hash { + &self.value + } + + pub(crate) fn rank(&self) -> &Hash { + &self.rank + } + + /// Returns the hash of the node. + pub(crate) fn hash(&self) -> &Hash { + &self.hash + } + + pub(crate) fn left(&self) -> &Option { + &self.left + } + + pub(crate) fn right(&self) -> &Option { + &self.right + } + + // === Private Methods === + + pub(crate) fn update_hash(&mut self) -> Hash { + let mut hasher = Hasher::new(); + + hasher.update(&self.key); + hasher.update(self.value.as_bytes()); + hasher.update(self.left.unwrap_or(EMPTY_HASH).as_bytes()); + hasher.update(self.right.unwrap_or(EMPTY_HASH).as_bytes()); + + self.hash = hasher.finalize(); + self.hash + } + + // /// Replace a child of this node, and return the old child. + // /// + // /// This method decrements the ref count of the old child, + // /// and incrments the ref count of the new child, + // /// + // /// but it dosn't flush any changes to the storage. + // pub(crate) fn set_child( + // &mut self, + // node: &mut Option, + // child: Child, + // storage: &MemoryStorage, + // ) -> Option { + // // Decrement old child's ref count. + // let mut old_child = match child { + // Child::Left => self.left, + // Child::Right => self.right, + // } + // .and_then(|hash| storage.get_node(&hash)); + // old_child.as_mut().map(|n| n.decrement_ref_count()); + // + // // Increment new child's ref count. + // node.as_mut().map(|n| n.increment_ref_count()); + // + // // swap children + // match child { + // Child::Left => self.left = node.as_mut().map(|n| n.update_hash()), + // Child::Right => self.right = node.as_mut().map(|n| n.update_hash()), + // } + // + // // Update this node's hash. + // self.update_hash(); + // + // old_child + // } + + pub(crate) fn set_child_hash(&mut self, child: Child, hash: Hash) { + // Swap the child. + match child { + Child::Left => self.left = Some(hash), + Child::Right => self.right = Some(hash), + } + + // Update this node's hash, after updating the child. + self.update_hash(); + } +} diff --git a/mast/src/storage/memory.rs b/mast/src/storage/memory.rs index 0b3a1ac..6025e3c 100644 --- a/mast/src/storage/memory.rs +++ b/mast/src/storage/memory.rs @@ -1,10 +1,11 @@ use blake3::Hash; use std::collections::HashMap; -use crate::treap::Node; +use crate::Node; #[derive(Debug)] pub struct MemoryStorage { + roots: HashMap, Node>, nodes: HashMap, blobs: HashMap>, } @@ -12,20 +13,27 @@ pub struct MemoryStorage { impl MemoryStorage { pub(crate) fn new() -> Self { Self { + roots: HashMap::new(), nodes: HashMap::new(), blobs: HashMap::new(), } } + // TODO: return result or something. + + pub(crate) fn insert_root(&mut self, name: &[u8], node: Node) { + self.roots.insert(name.into(), node); + } + pub(crate) fn insert_node(&mut self, node: &Node) { - self.nodes.insert(node.hash(), node.clone()); + self.nodes.insert(*node.hash(), node.clone()); } pub(crate) fn insert_blob(&mut self, hash: Hash, blob: &[u8]) { self.blobs.insert(hash, blob.into()); } - pub(crate) fn get_node(&self, hash: &Hash) -> Option<&Node> { - self.nodes.get(hash) + pub(crate) fn get_node(&self, hash: &Hash) -> Option { + self.nodes.get(hash).cloned() } } diff --git a/mast/src/treap.rs b/mast/src/treap.rs index 490d779..709a5fc 100644 --- a/mast/src/treap.rs +++ b/mast/src/treap.rs @@ -1,89 +1,49 @@ use blake3::{Hash, Hasher}; -use std::cmp::{self, Ordering}; -use std::collections::HashMap; -use std::mem; -use std::ops::Deref; - +use crate::node::Child; use crate::storage::memory::MemoryStorage; +use crate::Node; -const EMPTY_HASH: Hash = Hash::from_bytes([0_u8; 32]); - -#[derive(Debug, Clone, PartialEq)] -pub(crate) struct Node { - pub(crate) key: Box<[u8]>, - pub(crate) value: Hash, - pub(crate) rank: Hash, - pub(crate) left: Hash, - pub(crate) right: Hash, +#[derive(Debug)] +pub struct Treap<'a> { + pub(crate) storage: &'a mut MemoryStorage, + pub(crate) root: Option, } -impl Node { - fn new(key: &[u8], value: Hash) -> Self { - let mut hasher = Hasher::new(); - hasher.update(key); +// TODO: pass a transaction. +fn insert( + node: &mut Node, + root: Option, + storage: MemoryStorage, + changed: &mut Vec, +) -> Node { + let root = root.and_then(|hash| storage.get_node(&hash)); - let rank = hasher.finalize(); + if root.is_none() { + return node.clone(); + } - Self { - key: key.into(), - value, - left: EMPTY_HASH, - right: EMPTY_HASH, - rank, + let mut root = root.unwrap(); + + if node.key() < root.key() { + if insert(node, *root.left(), storage, changed).key() == node.key() { + if node.rank().as_bytes() < root.rank().as_bytes() { + root.set_child_hash(Child::Left, *node.hash()) + } else { + // root.set_child_hash(Child::Left, *node.right()); + node.set_child_hash(Child::Right, *root.hash()); + } } } - /// Returns the hash of the node. - pub fn hash(&self) -> Hash { - let mut hasher = Hasher::new(); - - hasher.update(&self.key); - hasher.update(self.value.as_bytes()); - hasher.update(self.left.as_bytes()); - hasher.update(self.right.as_bytes()); - - hasher.finalize() - } - - fn to_bytes(&self) -> Box<[u8]> { - let mut bytes = vec![]; - - bytes.extend_from_slice(self.value.as_bytes()); - bytes.extend_from_slice(self.left.as_bytes()); - bytes.extend_from_slice(self.right.as_bytes()); - bytes.extend_from_slice(&self.key); - - bytes.into_boxed_slice() - } - - fn from_bytes(bytes: &Box<[u8]>) -> Self { - // TODO: Make sure that bytes is long enough at least >96 bytes. - - let mut node = Self::new( - &bytes[96..], - Hash::from_bytes(bytes[..32].try_into().unwrap()), - ); - - node.left = Hash::from_bytes(bytes[32..64].try_into().unwrap()); - node.right = Hash::from_bytes(bytes[64..96].try_into().unwrap()); - - node - } - - fn set_left(&mut self, left: Hash, storage: &mut MemoryStorage) {} + return root; } -#[derive(Debug)] -pub struct Treap { - pub(crate) root: Hash, - storage: MemoryStorage, -} - -impl Treap { - pub fn new(storage: MemoryStorage) -> Self { +impl<'a> Treap<'a> { + // TODO: add name to open from storage with. + pub fn new(storage: &'a mut MemoryStorage) -> Self { Self { - root: EMPTY_HASH, + root: None, storage, } } @@ -92,66 +52,165 @@ impl Treap { let value = self.insert_blob(value); let mut node = Node::new(key, value); - // TODO: batch inserting updated nodes. + let mut changed: Vec = vec![]; - let new_root = self.insert_impl(&mut node, self.root); - self.root = new_root.hash(); + insert( + &mut node, + Some(self.root.hash()), + self.storage, + &mut changed, + ) } - // Recursive insertion (unzipping) algorithm. + // pub fn insert(&mut self, key: &[u8], value: &[u8]) { + // let value = self.insert_blob(value); + // let mut node = Node::new(key, value); // - // Returns the new root node. - fn insert_impl(&mut self, x: &mut Node, root_hash: Hash) -> Node { - if let Some(mut root) = self.get_node(root_hash) { - if x.key < root.key { - if self.insert_impl(x, root.left).key == x.key { - if x.rank.as_bytes() < root.rank.as_bytes() { - root.left = self.store_node(x); - self.store_node(&root); - } else { - root.left = x.right; - x.right = self.store_node(&root); - - self.store_node(x); - return x.clone(); - } - } - } else { - if self.insert_impl(x, root.right).key == x.key { - if x.rank.as_bytes() < root.rank.as_bytes() { - root.right = self.store_node(x); - - self.store_node(&root); - } else { - root.right = x.left; - x.right = self.store_node(&root); - - self.store_node(x); - - return x.clone(); - } - } - } - - self.store_node(&root); - - return root; - } else { - self.store_node(x); - - return x.clone(); - } - } - - /// Store a node after it has been modified and had a new hash. - fn store_node(&mut self, node: &Node) -> Hash { - // TODO: save the hash somewhere in the Node instead of hashing it again. - - let hash = node.hash(); - self.storage.insert_node(node); - - hash - } + // // Watch this [video](https://youtu.be/NxRXhBur6Xs?si=GNwaUOfuGwr_tBKI&t=1763) for a good explanation of the unzipping algorithm. + // // Also see the Iterative insertion algorithm in the page 12 of the [original paper](https://arxiv.org/pdf/1806.06726.pdf). + // + // // Let's say we have the following treap: + // // + // // F + // // / \ + // // D P + // // / / \ + // // C H X + // // / / \ \ + // // A G M Y + // // / + // // I + // // + // // We focus on the binary search path for J, in this case [F, P, H, M, I]: + // // + // // F < J + // // \ + // // J < P + // // / + // // H < J + // // \ + // // J < M + // // / + // // I < J + // // + // // First we traverse until we reach the insertion point, in this case H, + // // because J has a higher rank than H, but lower than F and P; + // + // let mut path: Vec = Vec::new(); + // + // let mut current = self.root.clone(); + // + // while let Some(curr) = current { + // if node.rank().as_bytes() > curr.rank().as_bytes() { + // // We reached the insertion point. + // // rank can't be equal, as we are using a secure hashing funciton. + // break; + // } + // + // path.push(curr.clone()); + // + // if node.key() < curr.key() { + // current = self.get_node(curr.left()); + // } else { + // current = self.get_node(curr.right()); + // } + // } + // + // if let Some(mut prev) = path.last_mut() { + // let old = prev.clone(); + // + // // TODO: pass transaction here. + // if node.key() < prev.key() { + // prev.set_child_hash(Child::Left, node.update_hash()) + // } else { + // prev.set_child_hash(Child::Right, node.update_hash()) + // } + // + // self.storage.insert_node(&prev); + // dbg!((old, prev)); + // } else { + // // The insertion point is at the root node, either because the tree is empty, + // // or because the root node has lower rank than the new node. + // + // self.root = Some(node.clone()); + // } + // + // dbg!(&path); + // + // // then Unzip the rest of the path: + // // + // // In the example above these are [H, M] + // // + // // F + // // \ + // // P + // // / + // // J < Insertion point. + // // / connect J to H to the left + // // H < Unzip + // // \\ + // // M + // // // + // // I + // // + // // if let Some(curr) = current { + // // if node.key() < curr.key() { + // // node.set_child_hash(Child::Right, *curr.hash()) + // // } else { + // // node.set_child_hash(Child::Left, *curr.hash()) + // // } + // // } else { + // // // We reached the endo of the searhc path, and inserted a leaf node. + // // return; + // // } + // + // // The unsizipped path should look like: + // // + // // F + // // \ + // // P + // // / + // // J + // // // \\ + // // H M < See how that looks like unzipping? :) + // // \\ + // // I + // // + // + // // if let Some(curr) = current { + // // // We reached the insertion (unzipping point); + // // } else { + // // // We reached the end of the search path, this is equivilant of + // // // J having lower rank than I, so we insert J as a leaf node. + // // + // // // There has to be a node, because we already checked at the beginning + // // // that the tree is not empty. + // // if let Some(current_leaf) = previous { + // // if key < current_leaf.key() { + // // // Insert as a left child. + // // // let old_child = self.update_child(current_leaf, Child::Left, node); + // // } else { + // // // Insert as a right child. + // // let old_child = self.update_child(current_leaf, Child::Right, node); + // // } + // // } + // // } + // + // // So the final tree should look like: + // // + // // F + // // / \ + // // D P + // // / / \ + // // C J X + // // / / \ \ + // // A H M Y + // // / \ + // // G I + // + // // Finally we should commit the changes to the storage. + // // TODO: commit + // } // TODO: Add stream input API. fn insert_blob(&mut self, blob: &[u8]) -> Hash { @@ -159,19 +218,42 @@ impl Treap { hasher.update(blob); let hash = hasher.finalize(); - self.storage.insert_blob(hash, blob.into()); + self.storage.insert_blob(hash, blob); hash } - // TODO: move to storage abstraction. - pub(crate) fn get_node(&self, hash: Hash) -> Option { - self.storage.get_node(&hash).cloned() - } -} + // === Private Methods === -impl Default for Treap { - fn default() -> Self { - Self::new(MemoryStorage::new()) + pub(crate) fn get_node(&self, hash: &Option) -> Option { + hash.and_then(|h| self.storage.get_node(&h)) } + + // /// Replace a child of a node, and return the old child. + // /// + // /// Also decrements the ref_count of the old child, + // /// and incrments the ref_count of the new child, + // /// + // /// but it dosn't flush any changes to the storage yet. + // pub(crate) fn update_child( + // &self, + // node: &mut Node, + // child: Child, + // new_child: Node, + // ) -> Option { + // // Decrement old child's ref count. + // let mut old_child = match child { + // Child::Left => node.left(), + // Child::Right => node.right(), + // } + // .and_then(|hash| self.storage.get_node(&hash)); + // old_child.as_mut().map(|n| n.decrement_ref_count()); + // + // // Increment new child's ref count. + // node.increment_ref_count(); + // + // node.set_child_hash(child, node.hash().clone()); + // + // old_child + // } }