From 60ff54a651b531bb66245dadd4a871f6e9c4c7de Mon Sep 17 00:00:00 2001 From: nazeh Date: Mon, 18 Dec 2023 13:19:46 +0300 Subject: [PATCH] wip: insertion still missed up, but getting closer --- mast/src/lib.rs | 21 +-- mast/src/mermaid.rs | 6 +- mast/src/node.rs | 99 +++++----- mast/src/treap.rs | 444 ++++++++++++++++++++++---------------------- 4 files changed, 280 insertions(+), 290 deletions(-) diff --git a/mast/src/lib.rs b/mast/src/lib.rs index 88126f5..dfadc2d 100644 --- a/mast/src/lib.rs +++ b/mast/src/lib.rs @@ -8,7 +8,7 @@ pub mod treap; pub(crate) use blake3::{Hash, Hasher}; pub(crate) use node::Node; -pub(crate) use treap::Treap; +pub(crate) use treap::HashTreap; // TODO: If we are going to use Iroh Bytes, might as well ues this from Iroh basics. /// The hash for the empty byte range (`b""`). @@ -16,22 +16,3 @@ pub(crate) const EMPTY_HASH: Hash = Hash::from_bytes([ 175, 19, 73, 185, 245, 249, 161, 166, 160, 64, 77, 234, 54, 220, 201, 73, 155, 203, 37, 201, 173, 193, 18, 183, 204, 154, 147, 202, 228, 31, 50, 98, ]); - -#[cfg(test)] -mod test { - use super::storage::memory::MemoryStorage; - use super::treap::Treap; - - #[test] - fn basic() { - let mut storage = MemoryStorage::new(); - let mut tree = Treap::new(&mut storage); - - for key in ["A", "C", "D", "F", "G", "H", "M", "P", "X", "Y"].iter() { - tree.insert(key.as_bytes(), b"0"); - } - - dbg!(&tree); - println!("{}", tree.as_mermaid_graph()) - } -} diff --git a/mast/src/mermaid.rs b/mast/src/mermaid.rs index f3b973a..ad2e28d 100644 --- a/mast/src/mermaid.rs +++ b/mast/src/mermaid.rs @@ -1,14 +1,14 @@ #[cfg(test)] mod test { - use crate::{Node, Treap}; + use crate::{HashTreap, Node}; - impl<'a> Treap<'a> { + impl<'a> HashTreap<'a> { pub fn as_mermaid_graph(&self) -> String { let mut graph = String::new(); graph.push_str("graph TD;\n"); - if let Some(root) = &self.root { + if let Some(root) = self.get_node(&self.root) { self.build_graph_string(&root, &mut graph); } diff --git a/mast/src/node.rs b/mast/src/node.rs index b5c1cdc..f1098e1 100644 --- a/mast/src/node.rs +++ b/mast/src/node.rs @@ -23,17 +23,13 @@ pub(crate) struct Node { right: Option, } -pub(crate) enum Child { +#[derive(Debug)] +pub(crate) enum Branch { Left, Right, } impl Node { - // TODO: Convert to Result, since it shouldn't be missing! - pub(crate) fn open(storage: &MemoryStorage, hash: Hash) -> Option { - storage.get_node(&hash) - } - pub fn new(key: &[u8], value: Hash) -> Self { let mut hasher = Hasher::new(); hasher.update(key); @@ -50,11 +46,11 @@ impl Node { rank, }; - node.update_hash(); - node } + // TODO: add from bytes and remember to update its hash. + // === Getters === pub(crate) fn key(&self) -> &[u8] { @@ -96,49 +92,54 @@ impl Node { self.hash } - // /// Replace a child of this node, and return the old child. - // /// - // /// This method decrements the ref count of the old child, - // /// and incrments the ref count of the new child, - // /// - // /// but it dosn't flush any changes to the storage. - // pub(crate) fn set_child( - // &mut self, - // node: &mut Option, - // child: Child, - // storage: &MemoryStorage, - // ) -> Option { - // // Decrement old child's ref count. - // let mut old_child = match child { - // Child::Left => self.left, - // Child::Right => self.right, - // } - // .and_then(|hash| storage.get_node(&hash)); - // old_child.as_mut().map(|n| n.decrement_ref_count()); - // - // // Increment new child's ref count. - // node.as_mut().map(|n| n.increment_ref_count()); - // - // // swap children - // match child { - // Child::Left => self.left = node.as_mut().map(|n| n.update_hash()), - // Child::Right => self.right = node.as_mut().map(|n| n.update_hash()), - // } - // - // // Update this node's hash. - // self.update_hash(); - // - // old_child - // } - - pub(crate) fn set_child_hash(&mut self, child: Child, hash: Hash) { - // Swap the child. - match child { - Child::Left => self.left = Some(hash), - Child::Right => self.right = Some(hash), + /// When inserting a node, once we find its instertion point, + /// we give one of its children (depending on the direction), + /// to the current node at the insertion position, and then we + /// replace that child with the updated current node. + pub(crate) fn insertion_swap( + &mut self, + direction: Branch, + current_node: &mut Node, + storage: &mut MemoryStorage, + ) { + match direction { + Branch::Left => current_node.set_child(&Branch::Left, *self.right()), + Branch::Right => current_node.set_child(&Branch::Left, *self.left()), } - // Update this node's hash, after updating the child. + current_node.update(storage); + + match direction { + Branch::Left => self.left = Some(*current_node.hash()), + Branch::Right => self.right = Some(*current_node.hash()), + } + + self.update(storage); + } + + pub(crate) fn set_child(&mut self, branch: &Branch, hash: Option) { + // decrement old child's ref count. + + // set children + match branch { + Branch::Left => self.left = hash, + Branch::Right => self.right = hash, + } + + // TODO: increment node's ref count. + } + + pub(crate) fn update(&mut self, storage: &mut MemoryStorage) -> &Hash { + // TODO: save new hash to storage. + // TODO: increment ref count. + // TODO: decrement ref count of old hash! + + // let old_hash = self.hash(); + self.update_hash(); + + storage.insert_node(self); + + self.hash() } } diff --git a/mast/src/treap.rs b/mast/src/treap.rs index 709a5fc..826e8c7 100644 --- a/mast/src/treap.rs +++ b/mast/src/treap.rs @@ -1,45 +1,16 @@ use blake3::{Hash, Hasher}; -use crate::node::Child; +use crate::node::Branch; use crate::storage::memory::MemoryStorage; use crate::Node; #[derive(Debug)] -pub struct Treap<'a> { +pub struct HashTreap<'a> { pub(crate) storage: &'a mut MemoryStorage, - pub(crate) root: Option, + pub(crate) root: Option, } -// TODO: pass a transaction. -fn insert( - node: &mut Node, - root: Option, - storage: MemoryStorage, - changed: &mut Vec, -) -> Node { - let root = root.and_then(|hash| storage.get_node(&hash)); - - if root.is_none() { - return node.clone(); - } - - let mut root = root.unwrap(); - - if node.key() < root.key() { - if insert(node, *root.left(), storage, changed).key() == node.key() { - if node.rank().as_bytes() < root.rank().as_bytes() { - root.set_child_hash(Child::Left, *node.hash()) - } else { - // root.set_child_hash(Child::Left, *node.right()); - node.set_child_hash(Child::Right, *root.hash()); - } - } - } - - return root; -} - -impl<'a> Treap<'a> { +impl<'a> HashTreap<'a> { // TODO: add name to open from storage with. pub fn new(storage: &'a mut MemoryStorage) -> Self { Self { @@ -49,168 +20,181 @@ impl<'a> Treap<'a> { } pub fn insert(&mut self, key: &[u8], value: &[u8]) { + // TODO: validate key and value length. + let value = self.insert_blob(value); let mut node = Node::new(key, value); - let mut changed: Vec = vec![]; + println!( + "\n New insert {:?}", + String::from_utf8(key.to_vec()).unwrap() + ); - insert( - &mut node, - Some(self.root.hash()), - self.storage, - &mut changed, - ) + if self.root.is_none() { + self.update_root(*node.hash()); + return; + } + + // Watch this [video](https://youtu.be/NxRXhBur6Xs?si=GNwaUOfuGwr_tBKI&t=1763) for a good explanation of the unzipping algorithm. + // Also see the Iterative insertion algorithm in the page 12 of the [original paper](https://arxiv.org/pdf/1806.06726.pdf). + // The difference here is that in a Hash Treap, we need to update nodes bottom up. + + // Let's say we have the following tree: + // + // F + // / \ + // D P + // / / \ + // C H X + // / / \ \ + // A G M Y + // / + // I + // + // First we mark the binary search path to the leaf, going right if the key is greater than + // the current node's key and vice versa. + // + // F + // \ + // P + // / + // H + // \ + // M + // / + // I + // + + // Path before insertion point. (Node, Branch to update) + let mut top_path: Vec<(Node, Branch)> = Vec::new(); + // Subtree of nodes on the path smaller than the inserted key. + let mut left_unzip_path: Vec = Vec::new(); + // Subtree of nodes on the path larger than the inserted key. + let mut right_unzip_path: Vec = Vec::new(); + + let mut next = self.root; + + // Top down traversal of the binary search path. + while let Some(current) = self.get_node(&next) { + let should_zip = node.rank().as_bytes() > current.rank().as_bytes(); + + // Traverse left or right. + if key < current.key() { + next = *current.left(); + + if should_zip { + left_unzip_path.push(current) + } else { + top_path.push((current, Branch::Left)); + } + } else { + next = *current.right(); + + if should_zip { + right_unzip_path.push(current) + } else { + top_path.push((current, Branch::Right)); + } + }; + } + dbg!(( + "Out of the first loop", + &top_path, + &left_unzip_path, + &right_unzip_path + )); + + // === Updating hashes bottom up === + + // We are at the unzipping part of the path. + // + // First do the unzipping bottom up. + // + // H + // \ + // M < current_right + // / + // I < current_left + // + // Into (hopefully you can see the "unzipping"): + // + // left right + // subtree subtree + // + // H | + // \ | + // I | M + + while left_unzip_path.len() > 1 { + let child = left_unzip_path.pop().unwrap(); + let mut parent = left_unzip_path.last_mut().unwrap(); + + parent.set_child(&Branch::Right, Some(*child.hash())); + parent.update(self.storage); + } + + while right_unzip_path.len() > 1 { + let child = right_unzip_path.pop().unwrap(); + let mut parent = right_unzip_path.last_mut().unwrap(); + + parent.set_child(&Branch::Left, Some(*child.hash())); + parent.update(self.storage); + } + + // Done unzipping, join the current_left and current_right to J and update hashes upwards. + // + // J < Insertion point. + // / \ + // H M + // \ + // I + + node.set_child(&Branch::Left, left_unzip_path.first().map(|n| *n.hash())); + node.set_child(&Branch::Right, left_unzip_path.first().map(|n| *n.hash())); + node.update(self.storage); + + // Update the rest of the path upwards with the new hashes. + // So the final tree should look like: + // + // F + // / \ + // D P + // / / \ + // C J X + // / / \ \ + // A H M Y + // / \ + // G I + + if top_path.is_empty() { + // The insertion point is at the root and we are done. + self.update_root(*node.hash()) + } + + let mut previous = node; + + while let Some((mut parent, branch)) = top_path.pop() { + parent.set_child(&branch, Some(*previous.hash())); + parent.update(self.storage); + + previous = parent; + } + + // Update the root pointer. + self.update_root(*previous.hash()) + + // Finally we should commit the changes to the storage. + // TODO: commit } - // pub fn insert(&mut self, key: &[u8], value: &[u8]) { - // let value = self.insert_blob(value); - // let mut node = Node::new(key, value); - // - // // Watch this [video](https://youtu.be/NxRXhBur6Xs?si=GNwaUOfuGwr_tBKI&t=1763) for a good explanation of the unzipping algorithm. - // // Also see the Iterative insertion algorithm in the page 12 of the [original paper](https://arxiv.org/pdf/1806.06726.pdf). - // - // // Let's say we have the following treap: - // // - // // F - // // / \ - // // D P - // // / / \ - // // C H X - // // / / \ \ - // // A G M Y - // // / - // // I - // // - // // We focus on the binary search path for J, in this case [F, P, H, M, I]: - // // - // // F < J - // // \ - // // J < P - // // / - // // H < J - // // \ - // // J < M - // // / - // // I < J - // // - // // First we traverse until we reach the insertion point, in this case H, - // // because J has a higher rank than H, but lower than F and P; - // - // let mut path: Vec = Vec::new(); - // - // let mut current = self.root.clone(); - // - // while let Some(curr) = current { - // if node.rank().as_bytes() > curr.rank().as_bytes() { - // // We reached the insertion point. - // // rank can't be equal, as we are using a secure hashing funciton. - // break; - // } - // - // path.push(curr.clone()); - // - // if node.key() < curr.key() { - // current = self.get_node(curr.left()); - // } else { - // current = self.get_node(curr.right()); - // } - // } - // - // if let Some(mut prev) = path.last_mut() { - // let old = prev.clone(); - // - // // TODO: pass transaction here. - // if node.key() < prev.key() { - // prev.set_child_hash(Child::Left, node.update_hash()) - // } else { - // prev.set_child_hash(Child::Right, node.update_hash()) - // } - // - // self.storage.insert_node(&prev); - // dbg!((old, prev)); - // } else { - // // The insertion point is at the root node, either because the tree is empty, - // // or because the root node has lower rank than the new node. - // - // self.root = Some(node.clone()); - // } - // - // dbg!(&path); - // - // // then Unzip the rest of the path: - // // - // // In the example above these are [H, M] - // // - // // F - // // \ - // // P - // // / - // // J < Insertion point. - // // / connect J to H to the left - // // H < Unzip - // // \\ - // // M - // // // - // // I - // // - // // if let Some(curr) = current { - // // if node.key() < curr.key() { - // // node.set_child_hash(Child::Right, *curr.hash()) - // // } else { - // // node.set_child_hash(Child::Left, *curr.hash()) - // // } - // // } else { - // // // We reached the endo of the searhc path, and inserted a leaf node. - // // return; - // // } - // - // // The unsizipped path should look like: - // // - // // F - // // \ - // // P - // // / - // // J - // // // \\ - // // H M < See how that looks like unzipping? :) - // // \\ - // // I - // // - // - // // if let Some(curr) = current { - // // // We reached the insertion (unzipping point); - // // } else { - // // // We reached the end of the search path, this is equivilant of - // // // J having lower rank than I, so we insert J as a leaf node. - // // - // // // There has to be a node, because we already checked at the beginning - // // // that the tree is not empty. - // // if let Some(current_leaf) = previous { - // // if key < current_leaf.key() { - // // // Insert as a left child. - // // // let old_child = self.update_child(current_leaf, Child::Left, node); - // // } else { - // // // Insert as a right child. - // // let old_child = self.update_child(current_leaf, Child::Right, node); - // // } - // // } - // // } - // - // // So the final tree should look like: - // // - // // F - // // / \ - // // D P - // // / / \ - // // C J X - // // / / \ \ - // // A H M Y - // // / \ - // // G I - // - // // Finally we should commit the changes to the storage. - // // TODO: commit - // } + // === Private Methods === + + fn update_root(&mut self, hash: Hash) { + // The tree is empty, the incoming node has to be the root, and we are done. + self.root = Some(hash); + + // TODO: we need to persist the root change too to the storage. + } // TODO: Add stream input API. fn insert_blob(&mut self, blob: &[u8]) -> Hash { @@ -223,37 +207,61 @@ impl<'a> Treap<'a> { hash } - // === Private Methods === - pub(crate) fn get_node(&self, hash: &Option) -> Option { hash.and_then(|h| self.storage.get_node(&h)) } - // /// Replace a child of a node, and return the old child. - // /// - // /// Also decrements the ref_count of the old child, - // /// and incrments the ref_count of the new child, - // /// - // /// but it dosn't flush any changes to the storage yet. - // pub(crate) fn update_child( - // &self, - // node: &mut Node, - // child: Child, - // new_child: Node, - // ) -> Option { - // // Decrement old child's ref count. - // let mut old_child = match child { - // Child::Left => node.left(), - // Child::Right => node.right(), - // } - // .and_then(|hash| self.storage.get_node(&hash)); - // old_child.as_mut().map(|n| n.decrement_ref_count()); - // - // // Increment new child's ref count. - // node.increment_ref_count(); - // - // node.set_child_hash(child, node.hash().clone()); - // - // old_child - // } + // === Test Methods === + + #[cfg(test)] + fn verify_ranks(&self) -> bool { + let node = self.get_node(&self.root); + self.check_rank(node) + } + + #[cfg(test)] + fn check_rank(&self, node: Option) -> bool { + match node { + Some(n) => { + let left_check = self.get_node(n.left()).map_or(true, |left| { + n.rank().as_bytes() > left.rank().as_bytes() && self.check_rank(Some(left)) + }); + let right_check = self.get_node(n.right()).map_or(true, |right| { + n.rank().as_bytes() > right.rank().as_bytes() && self.check_rank(Some(right)) + }); + + left_check && right_check + } + None => true, + } + } +} + +#[cfg(test)] +mod test { + use super::HashTreap; + use super::MemoryStorage; + use super::Node; + + #[test] + fn basic() { + let mut storage = MemoryStorage::new(); + let mut treap = HashTreap::new(&mut storage); + + // let mut keys = ["A", "C", "D", "F", "G", "H", "M", "P", "X", "Y"]; + let mut keys = [ + "D", "N", "P", "X", "F", "Z", "Y", "A", "G", "C", "M", "H", "I", "J", + ]; + // let mut keys = ["A", "B", "C"]; + // keys.reverse(); + // keys.reverse(); // Overflowing stack! damn recursion. + + for key in keys.iter() { + treap.insert(key.as_bytes(), b"0"); + } + + assert!(treap.verify_ranks()); + // dbg!(&tree); + println!("{}", treap.as_mermaid_graph()) + } }