wip: insertion still missed up, but getting closer

This commit is contained in:
nazeh
2023-12-18 13:19:46 +03:00
parent 582d97d242
commit 60ff54a651
4 changed files with 280 additions and 290 deletions

View File

@@ -8,7 +8,7 @@ pub mod treap;
pub(crate) use blake3::{Hash, Hasher};
pub(crate) use node::Node;
pub(crate) use treap::Treap;
pub(crate) use treap::HashTreap;
// TODO: If we are going to use Iroh Bytes, might as well ues this from Iroh basics.
/// The hash for the empty byte range (`b""`).
@@ -16,22 +16,3 @@ pub(crate) const EMPTY_HASH: Hash = Hash::from_bytes([
175, 19, 73, 185, 245, 249, 161, 166, 160, 64, 77, 234, 54, 220, 201, 73, 155, 203, 37, 201,
173, 193, 18, 183, 204, 154, 147, 202, 228, 31, 50, 98,
]);
#[cfg(test)]
mod test {
use super::storage::memory::MemoryStorage;
use super::treap::Treap;
#[test]
fn basic() {
let mut storage = MemoryStorage::new();
let mut tree = Treap::new(&mut storage);
for key in ["A", "C", "D", "F", "G", "H", "M", "P", "X", "Y"].iter() {
tree.insert(key.as_bytes(), b"0");
}
dbg!(&tree);
println!("{}", tree.as_mermaid_graph())
}
}

View File

@@ -1,14 +1,14 @@
#[cfg(test)]
mod test {
use crate::{Node, Treap};
use crate::{HashTreap, Node};
impl<'a> Treap<'a> {
impl<'a> HashTreap<'a> {
pub fn as_mermaid_graph(&self) -> String {
let mut graph = String::new();
graph.push_str("graph TD;\n");
if let Some(root) = &self.root {
if let Some(root) = self.get_node(&self.root) {
self.build_graph_string(&root, &mut graph);
}

View File

@@ -23,17 +23,13 @@ pub(crate) struct Node {
right: Option<Hash>,
}
pub(crate) enum Child {
#[derive(Debug)]
pub(crate) enum Branch {
Left,
Right,
}
impl Node {
// TODO: Convert to Result, since it shouldn't be missing!
pub(crate) fn open(storage: &MemoryStorage, hash: Hash) -> Option<Self> {
storage.get_node(&hash)
}
pub fn new(key: &[u8], value: Hash) -> Self {
let mut hasher = Hasher::new();
hasher.update(key);
@@ -50,11 +46,11 @@ impl Node {
rank,
};
node.update_hash();
node
}
// TODO: add from bytes and remember to update its hash.
// === Getters ===
pub(crate) fn key(&self) -> &[u8] {
@@ -96,49 +92,54 @@ impl Node {
self.hash
}
// /// Replace a child of this node, and return the old child.
// ///
// /// This method decrements the ref count of the old child,
// /// and incrments the ref count of the new child,
// ///
// /// but it dosn't flush any changes to the storage.
// pub(crate) fn set_child(
// &mut self,
// node: &mut Option<Node>,
// child: Child,
// storage: &MemoryStorage,
// ) -> Option<Node> {
// // Decrement old child's ref count.
// let mut old_child = match child {
// Child::Left => self.left,
// Child::Right => self.right,
// }
// .and_then(|hash| storage.get_node(&hash));
// old_child.as_mut().map(|n| n.decrement_ref_count());
//
// // Increment new child's ref count.
// node.as_mut().map(|n| n.increment_ref_count());
//
// // swap children
// match child {
// Child::Left => self.left = node.as_mut().map(|n| n.update_hash()),
// Child::Right => self.right = node.as_mut().map(|n| n.update_hash()),
// }
//
// // Update this node's hash.
// self.update_hash();
//
// old_child
// }
pub(crate) fn set_child_hash(&mut self, child: Child, hash: Hash) {
// Swap the child.
match child {
Child::Left => self.left = Some(hash),
Child::Right => self.right = Some(hash),
/// When inserting a node, once we find its instertion point,
/// we give one of its children (depending on the direction),
/// to the current node at the insertion position, and then we
/// replace that child with the updated current node.
pub(crate) fn insertion_swap(
&mut self,
direction: Branch,
current_node: &mut Node,
storage: &mut MemoryStorage,
) {
match direction {
Branch::Left => current_node.set_child(&Branch::Left, *self.right()),
Branch::Right => current_node.set_child(&Branch::Left, *self.left()),
}
// Update this node's hash, after updating the child.
current_node.update(storage);
match direction {
Branch::Left => self.left = Some(*current_node.hash()),
Branch::Right => self.right = Some(*current_node.hash()),
}
self.update(storage);
}
pub(crate) fn set_child(&mut self, branch: &Branch, hash: Option<Hash>) {
// decrement old child's ref count.
// set children
match branch {
Branch::Left => self.left = hash,
Branch::Right => self.right = hash,
}
// TODO: increment node's ref count.
}
pub(crate) fn update(&mut self, storage: &mut MemoryStorage) -> &Hash {
// TODO: save new hash to storage.
// TODO: increment ref count.
// TODO: decrement ref count of old hash!
// let old_hash = self.hash();
self.update_hash();
storage.insert_node(self);
self.hash()
}
}

View File

@@ -1,45 +1,16 @@
use blake3::{Hash, Hasher};
use crate::node::Child;
use crate::node::Branch;
use crate::storage::memory::MemoryStorage;
use crate::Node;
#[derive(Debug)]
pub struct Treap<'a> {
pub struct HashTreap<'a> {
pub(crate) storage: &'a mut MemoryStorage,
pub(crate) root: Option<Node>,
pub(crate) root: Option<Hash>,
}
// TODO: pass a transaction.
fn insert(
node: &mut Node,
root: Option<Hash>,
storage: MemoryStorage,
changed: &mut Vec<Node>,
) -> Node {
let root = root.and_then(|hash| storage.get_node(&hash));
if root.is_none() {
return node.clone();
}
let mut root = root.unwrap();
if node.key() < root.key() {
if insert(node, *root.left(), storage, changed).key() == node.key() {
if node.rank().as_bytes() < root.rank().as_bytes() {
root.set_child_hash(Child::Left, *node.hash())
} else {
// root.set_child_hash(Child::Left, *node.right());
node.set_child_hash(Child::Right, *root.hash());
}
}
}
return root;
}
impl<'a> Treap<'a> {
impl<'a> HashTreap<'a> {
// TODO: add name to open from storage with.
pub fn new(storage: &'a mut MemoryStorage) -> Self {
Self {
@@ -49,168 +20,181 @@ impl<'a> Treap<'a> {
}
pub fn insert(&mut self, key: &[u8], value: &[u8]) {
// TODO: validate key and value length.
let value = self.insert_blob(value);
let mut node = Node::new(key, value);
let mut changed: Vec<Node> = vec![];
println!(
"\n New insert {:?}",
String::from_utf8(key.to_vec()).unwrap()
);
insert(
&mut node,
Some(self.root.hash()),
self.storage,
&mut changed,
)
if self.root.is_none() {
self.update_root(*node.hash());
return;
}
// Watch this [video](https://youtu.be/NxRXhBur6Xs?si=GNwaUOfuGwr_tBKI&t=1763) for a good explanation of the unzipping algorithm.
// Also see the Iterative insertion algorithm in the page 12 of the [original paper](https://arxiv.org/pdf/1806.06726.pdf).
// The difference here is that in a Hash Treap, we need to update nodes bottom up.
// Let's say we have the following tree:
//
// F
// / \
// D P
// / / \
// C H X
// / / \ \
// A G M Y
// /
// I
//
// First we mark the binary search path to the leaf, going right if the key is greater than
// the current node's key and vice versa.
//
// F
// \
// P
// /
// H
// \
// M
// /
// I
//
// Path before insertion point. (Node, Branch to update)
let mut top_path: Vec<(Node, Branch)> = Vec::new();
// Subtree of nodes on the path smaller than the inserted key.
let mut left_unzip_path: Vec<Node> = Vec::new();
// Subtree of nodes on the path larger than the inserted key.
let mut right_unzip_path: Vec<Node> = Vec::new();
let mut next = self.root;
// Top down traversal of the binary search path.
while let Some(current) = self.get_node(&next) {
let should_zip = node.rank().as_bytes() > current.rank().as_bytes();
// Traverse left or right.
if key < current.key() {
next = *current.left();
if should_zip {
left_unzip_path.push(current)
} else {
top_path.push((current, Branch::Left));
}
} else {
next = *current.right();
if should_zip {
right_unzip_path.push(current)
} else {
top_path.push((current, Branch::Right));
}
};
}
dbg!((
"Out of the first loop",
&top_path,
&left_unzip_path,
&right_unzip_path
));
// === Updating hashes bottom up ===
// We are at the unzipping part of the path.
//
// First do the unzipping bottom up.
//
// H
// \
// M < current_right
// /
// I < current_left
//
// Into (hopefully you can see the "unzipping"):
//
// left right
// subtree subtree
//
// H |
// \ |
// I | M
while left_unzip_path.len() > 1 {
let child = left_unzip_path.pop().unwrap();
let mut parent = left_unzip_path.last_mut().unwrap();
parent.set_child(&Branch::Right, Some(*child.hash()));
parent.update(self.storage);
}
while right_unzip_path.len() > 1 {
let child = right_unzip_path.pop().unwrap();
let mut parent = right_unzip_path.last_mut().unwrap();
parent.set_child(&Branch::Left, Some(*child.hash()));
parent.update(self.storage);
}
// Done unzipping, join the current_left and current_right to J and update hashes upwards.
//
// J < Insertion point.
// / \
// H M
// \
// I
node.set_child(&Branch::Left, left_unzip_path.first().map(|n| *n.hash()));
node.set_child(&Branch::Right, left_unzip_path.first().map(|n| *n.hash()));
node.update(self.storage);
// Update the rest of the path upwards with the new hashes.
// So the final tree should look like:
//
// F
// / \
// D P
// / / \
// C J X
// / / \ \
// A H M Y
// / \
// G I
if top_path.is_empty() {
// The insertion point is at the root and we are done.
self.update_root(*node.hash())
}
let mut previous = node;
while let Some((mut parent, branch)) = top_path.pop() {
parent.set_child(&branch, Some(*previous.hash()));
parent.update(self.storage);
previous = parent;
}
// Update the root pointer.
self.update_root(*previous.hash())
// Finally we should commit the changes to the storage.
// TODO: commit
}
// pub fn insert(&mut self, key: &[u8], value: &[u8]) {
// let value = self.insert_blob(value);
// let mut node = Node::new(key, value);
//
// // Watch this [video](https://youtu.be/NxRXhBur6Xs?si=GNwaUOfuGwr_tBKI&t=1763) for a good explanation of the unzipping algorithm.
// // Also see the Iterative insertion algorithm in the page 12 of the [original paper](https://arxiv.org/pdf/1806.06726.pdf).
//
// // Let's say we have the following treap:
// //
// // F
// // / \
// // D P
// // / / \
// // C H X
// // / / \ \
// // A G M Y
// // /
// // I
// //
// // We focus on the binary search path for J, in this case [F, P, H, M, I]:
// //
// // F < J
// // \
// // J < P
// // /
// // H < J
// // \
// // J < M
// // /
// // I < J
// //
// // First we traverse until we reach the insertion point, in this case H,
// // because J has a higher rank than H, but lower than F and P;
//
// let mut path: Vec<Node> = Vec::new();
//
// let mut current = self.root.clone();
//
// while let Some(curr) = current {
// if node.rank().as_bytes() > curr.rank().as_bytes() {
// // We reached the insertion point.
// // rank can't be equal, as we are using a secure hashing funciton.
// break;
// }
//
// path.push(curr.clone());
//
// if node.key() < curr.key() {
// current = self.get_node(curr.left());
// } else {
// current = self.get_node(curr.right());
// }
// }
//
// if let Some(mut prev) = path.last_mut() {
// let old = prev.clone();
//
// // TODO: pass transaction here.
// if node.key() < prev.key() {
// prev.set_child_hash(Child::Left, node.update_hash())
// } else {
// prev.set_child_hash(Child::Right, node.update_hash())
// }
//
// self.storage.insert_node(&prev);
// dbg!((old, prev));
// } else {
// // The insertion point is at the root node, either because the tree is empty,
// // or because the root node has lower rank than the new node.
//
// self.root = Some(node.clone());
// }
//
// dbg!(&path);
//
// // then Unzip the rest of the path:
// //
// // In the example above these are [H, M]
// //
// // F
// // \
// // P
// // /
// // J < Insertion point.
// // / connect J to H to the left
// // H < Unzip
// // \\
// // M
// // //
// // I
// //
// // if let Some(curr) = current {
// // if node.key() < curr.key() {
// // node.set_child_hash(Child::Right, *curr.hash())
// // } else {
// // node.set_child_hash(Child::Left, *curr.hash())
// // }
// // } else {
// // // We reached the endo of the searhc path, and inserted a leaf node.
// // return;
// // }
//
// // The unsizipped path should look like:
// //
// // F
// // \
// // P
// // /
// // J
// // // \\
// // H M < See how that looks like unzipping? :)
// // \\
// // I
// //
//
// // if let Some(curr) = current {
// // // We reached the insertion (unzipping point);
// // } else {
// // // We reached the end of the search path, this is equivilant of
// // // J having lower rank than I, so we insert J as a leaf node.
// //
// // // There has to be a node, because we already checked at the beginning
// // // that the tree is not empty.
// // if let Some(current_leaf) = previous {
// // if key < current_leaf.key() {
// // // Insert as a left child.
// // // let old_child = self.update_child(current_leaf, Child::Left, node);
// // } else {
// // // Insert as a right child.
// // let old_child = self.update_child(current_leaf, Child::Right, node);
// // }
// // }
// // }
//
// // So the final tree should look like:
// //
// // F
// // / \
// // D P
// // / / \
// // C J X
// // / / \ \
// // A H M Y
// // / \
// // G I
//
// // Finally we should commit the changes to the storage.
// // TODO: commit
// }
// === Private Methods ===
fn update_root(&mut self, hash: Hash) {
// The tree is empty, the incoming node has to be the root, and we are done.
self.root = Some(hash);
// TODO: we need to persist the root change too to the storage.
}
// TODO: Add stream input API.
fn insert_blob(&mut self, blob: &[u8]) -> Hash {
@@ -223,37 +207,61 @@ impl<'a> Treap<'a> {
hash
}
// === Private Methods ===
pub(crate) fn get_node(&self, hash: &Option<Hash>) -> Option<Node> {
hash.and_then(|h| self.storage.get_node(&h))
}
// /// Replace a child of a node, and return the old child.
// ///
// /// Also decrements the ref_count of the old child,
// /// and incrments the ref_count of the new child,
// ///
// /// but it dosn't flush any changes to the storage yet.
// pub(crate) fn update_child(
// &self,
// node: &mut Node,
// child: Child,
// new_child: Node,
// ) -> Option<Node> {
// // Decrement old child's ref count.
// let mut old_child = match child {
// Child::Left => node.left(),
// Child::Right => node.right(),
// }
// .and_then(|hash| self.storage.get_node(&hash));
// old_child.as_mut().map(|n| n.decrement_ref_count());
//
// // Increment new child's ref count.
// node.increment_ref_count();
//
// node.set_child_hash(child, node.hash().clone());
//
// old_child
// }
// === Test Methods ===
#[cfg(test)]
fn verify_ranks(&self) -> bool {
let node = self.get_node(&self.root);
self.check_rank(node)
}
#[cfg(test)]
fn check_rank(&self, node: Option<Node>) -> bool {
match node {
Some(n) => {
let left_check = self.get_node(n.left()).map_or(true, |left| {
n.rank().as_bytes() > left.rank().as_bytes() && self.check_rank(Some(left))
});
let right_check = self.get_node(n.right()).map_or(true, |right| {
n.rank().as_bytes() > right.rank().as_bytes() && self.check_rank(Some(right))
});
left_check && right_check
}
None => true,
}
}
}
#[cfg(test)]
mod test {
use super::HashTreap;
use super::MemoryStorage;
use super::Node;
#[test]
fn basic() {
let mut storage = MemoryStorage::new();
let mut treap = HashTreap::new(&mut storage);
// let mut keys = ["A", "C", "D", "F", "G", "H", "M", "P", "X", "Y"];
let mut keys = [
"D", "N", "P", "X", "F", "Z", "Y", "A", "G", "C", "M", "H", "I", "J",
];
// let mut keys = ["A", "B", "C"];
// keys.reverse();
// keys.reverse(); // Overflowing stack! damn recursion.
for key in keys.iter() {
treap.insert(key.as_bytes(), b"0");
}
assert!(treap.verify_ranks());
// dbg!(&tree);
println!("{}", treap.as_mermaid_graph())
}
}