mirror of
https://github.com/aljazceru/pubky-core.git
synced 2026-01-19 14:04:23 +01:00
wip: insertion still missed up, but getting closer
This commit is contained in:
@@ -8,7 +8,7 @@ pub mod treap;
|
||||
pub(crate) use blake3::{Hash, Hasher};
|
||||
|
||||
pub(crate) use node::Node;
|
||||
pub(crate) use treap::Treap;
|
||||
pub(crate) use treap::HashTreap;
|
||||
|
||||
// TODO: If we are going to use Iroh Bytes, might as well ues this from Iroh basics.
|
||||
/// The hash for the empty byte range (`b""`).
|
||||
@@ -16,22 +16,3 @@ pub(crate) const EMPTY_HASH: Hash = Hash::from_bytes([
|
||||
175, 19, 73, 185, 245, 249, 161, 166, 160, 64, 77, 234, 54, 220, 201, 73, 155, 203, 37, 201,
|
||||
173, 193, 18, 183, 204, 154, 147, 202, 228, 31, 50, 98,
|
||||
]);
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::storage::memory::MemoryStorage;
|
||||
use super::treap::Treap;
|
||||
|
||||
#[test]
|
||||
fn basic() {
|
||||
let mut storage = MemoryStorage::new();
|
||||
let mut tree = Treap::new(&mut storage);
|
||||
|
||||
for key in ["A", "C", "D", "F", "G", "H", "M", "P", "X", "Y"].iter() {
|
||||
tree.insert(key.as_bytes(), b"0");
|
||||
}
|
||||
|
||||
dbg!(&tree);
|
||||
println!("{}", tree.as_mermaid_graph())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::{Node, Treap};
|
||||
use crate::{HashTreap, Node};
|
||||
|
||||
impl<'a> Treap<'a> {
|
||||
impl<'a> HashTreap<'a> {
|
||||
pub fn as_mermaid_graph(&self) -> String {
|
||||
let mut graph = String::new();
|
||||
|
||||
graph.push_str("graph TD;\n");
|
||||
|
||||
if let Some(root) = &self.root {
|
||||
if let Some(root) = self.get_node(&self.root) {
|
||||
self.build_graph_string(&root, &mut graph);
|
||||
}
|
||||
|
||||
|
||||
@@ -23,17 +23,13 @@ pub(crate) struct Node {
|
||||
right: Option<Hash>,
|
||||
}
|
||||
|
||||
pub(crate) enum Child {
|
||||
#[derive(Debug)]
|
||||
pub(crate) enum Branch {
|
||||
Left,
|
||||
Right,
|
||||
}
|
||||
|
||||
impl Node {
|
||||
// TODO: Convert to Result, since it shouldn't be missing!
|
||||
pub(crate) fn open(storage: &MemoryStorage, hash: Hash) -> Option<Self> {
|
||||
storage.get_node(&hash)
|
||||
}
|
||||
|
||||
pub fn new(key: &[u8], value: Hash) -> Self {
|
||||
let mut hasher = Hasher::new();
|
||||
hasher.update(key);
|
||||
@@ -50,11 +46,11 @@ impl Node {
|
||||
rank,
|
||||
};
|
||||
|
||||
node.update_hash();
|
||||
|
||||
node
|
||||
}
|
||||
|
||||
// TODO: add from bytes and remember to update its hash.
|
||||
|
||||
// === Getters ===
|
||||
|
||||
pub(crate) fn key(&self) -> &[u8] {
|
||||
@@ -96,49 +92,54 @@ impl Node {
|
||||
self.hash
|
||||
}
|
||||
|
||||
// /// Replace a child of this node, and return the old child.
|
||||
// ///
|
||||
// /// This method decrements the ref count of the old child,
|
||||
// /// and incrments the ref count of the new child,
|
||||
// ///
|
||||
// /// but it dosn't flush any changes to the storage.
|
||||
// pub(crate) fn set_child(
|
||||
// &mut self,
|
||||
// node: &mut Option<Node>,
|
||||
// child: Child,
|
||||
// storage: &MemoryStorage,
|
||||
// ) -> Option<Node> {
|
||||
// // Decrement old child's ref count.
|
||||
// let mut old_child = match child {
|
||||
// Child::Left => self.left,
|
||||
// Child::Right => self.right,
|
||||
// }
|
||||
// .and_then(|hash| storage.get_node(&hash));
|
||||
// old_child.as_mut().map(|n| n.decrement_ref_count());
|
||||
//
|
||||
// // Increment new child's ref count.
|
||||
// node.as_mut().map(|n| n.increment_ref_count());
|
||||
//
|
||||
// // swap children
|
||||
// match child {
|
||||
// Child::Left => self.left = node.as_mut().map(|n| n.update_hash()),
|
||||
// Child::Right => self.right = node.as_mut().map(|n| n.update_hash()),
|
||||
// }
|
||||
//
|
||||
// // Update this node's hash.
|
||||
// self.update_hash();
|
||||
//
|
||||
// old_child
|
||||
// }
|
||||
|
||||
pub(crate) fn set_child_hash(&mut self, child: Child, hash: Hash) {
|
||||
// Swap the child.
|
||||
match child {
|
||||
Child::Left => self.left = Some(hash),
|
||||
Child::Right => self.right = Some(hash),
|
||||
/// When inserting a node, once we find its instertion point,
|
||||
/// we give one of its children (depending on the direction),
|
||||
/// to the current node at the insertion position, and then we
|
||||
/// replace that child with the updated current node.
|
||||
pub(crate) fn insertion_swap(
|
||||
&mut self,
|
||||
direction: Branch,
|
||||
current_node: &mut Node,
|
||||
storage: &mut MemoryStorage,
|
||||
) {
|
||||
match direction {
|
||||
Branch::Left => current_node.set_child(&Branch::Left, *self.right()),
|
||||
Branch::Right => current_node.set_child(&Branch::Left, *self.left()),
|
||||
}
|
||||
|
||||
// Update this node's hash, after updating the child.
|
||||
current_node.update(storage);
|
||||
|
||||
match direction {
|
||||
Branch::Left => self.left = Some(*current_node.hash()),
|
||||
Branch::Right => self.right = Some(*current_node.hash()),
|
||||
}
|
||||
|
||||
self.update(storage);
|
||||
}
|
||||
|
||||
pub(crate) fn set_child(&mut self, branch: &Branch, hash: Option<Hash>) {
|
||||
// decrement old child's ref count.
|
||||
|
||||
// set children
|
||||
match branch {
|
||||
Branch::Left => self.left = hash,
|
||||
Branch::Right => self.right = hash,
|
||||
}
|
||||
|
||||
// TODO: increment node's ref count.
|
||||
}
|
||||
|
||||
pub(crate) fn update(&mut self, storage: &mut MemoryStorage) -> &Hash {
|
||||
// TODO: save new hash to storage.
|
||||
// TODO: increment ref count.
|
||||
// TODO: decrement ref count of old hash!
|
||||
|
||||
// let old_hash = self.hash();
|
||||
|
||||
self.update_hash();
|
||||
|
||||
storage.insert_node(self);
|
||||
|
||||
self.hash()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,45 +1,16 @@
|
||||
use blake3::{Hash, Hasher};
|
||||
|
||||
use crate::node::Child;
|
||||
use crate::node::Branch;
|
||||
use crate::storage::memory::MemoryStorage;
|
||||
use crate::Node;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Treap<'a> {
|
||||
pub struct HashTreap<'a> {
|
||||
pub(crate) storage: &'a mut MemoryStorage,
|
||||
pub(crate) root: Option<Node>,
|
||||
pub(crate) root: Option<Hash>,
|
||||
}
|
||||
|
||||
// TODO: pass a transaction.
|
||||
fn insert(
|
||||
node: &mut Node,
|
||||
root: Option<Hash>,
|
||||
storage: MemoryStorage,
|
||||
changed: &mut Vec<Node>,
|
||||
) -> Node {
|
||||
let root = root.and_then(|hash| storage.get_node(&hash));
|
||||
|
||||
if root.is_none() {
|
||||
return node.clone();
|
||||
}
|
||||
|
||||
let mut root = root.unwrap();
|
||||
|
||||
if node.key() < root.key() {
|
||||
if insert(node, *root.left(), storage, changed).key() == node.key() {
|
||||
if node.rank().as_bytes() < root.rank().as_bytes() {
|
||||
root.set_child_hash(Child::Left, *node.hash())
|
||||
} else {
|
||||
// root.set_child_hash(Child::Left, *node.right());
|
||||
node.set_child_hash(Child::Right, *root.hash());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return root;
|
||||
}
|
||||
|
||||
impl<'a> Treap<'a> {
|
||||
impl<'a> HashTreap<'a> {
|
||||
// TODO: add name to open from storage with.
|
||||
pub fn new(storage: &'a mut MemoryStorage) -> Self {
|
||||
Self {
|
||||
@@ -49,168 +20,181 @@ impl<'a> Treap<'a> {
|
||||
}
|
||||
|
||||
pub fn insert(&mut self, key: &[u8], value: &[u8]) {
|
||||
// TODO: validate key and value length.
|
||||
|
||||
let value = self.insert_blob(value);
|
||||
let mut node = Node::new(key, value);
|
||||
|
||||
let mut changed: Vec<Node> = vec![];
|
||||
println!(
|
||||
"\n New insert {:?}",
|
||||
String::from_utf8(key.to_vec()).unwrap()
|
||||
);
|
||||
|
||||
insert(
|
||||
&mut node,
|
||||
Some(self.root.hash()),
|
||||
self.storage,
|
||||
&mut changed,
|
||||
)
|
||||
if self.root.is_none() {
|
||||
self.update_root(*node.hash());
|
||||
return;
|
||||
}
|
||||
|
||||
// Watch this [video](https://youtu.be/NxRXhBur6Xs?si=GNwaUOfuGwr_tBKI&t=1763) for a good explanation of the unzipping algorithm.
|
||||
// Also see the Iterative insertion algorithm in the page 12 of the [original paper](https://arxiv.org/pdf/1806.06726.pdf).
|
||||
// The difference here is that in a Hash Treap, we need to update nodes bottom up.
|
||||
|
||||
// Let's say we have the following tree:
|
||||
//
|
||||
// F
|
||||
// / \
|
||||
// D P
|
||||
// / / \
|
||||
// C H X
|
||||
// / / \ \
|
||||
// A G M Y
|
||||
// /
|
||||
// I
|
||||
//
|
||||
// First we mark the binary search path to the leaf, going right if the key is greater than
|
||||
// the current node's key and vice versa.
|
||||
//
|
||||
// F
|
||||
// \
|
||||
// P
|
||||
// /
|
||||
// H
|
||||
// \
|
||||
// M
|
||||
// /
|
||||
// I
|
||||
//
|
||||
|
||||
// Path before insertion point. (Node, Branch to update)
|
||||
let mut top_path: Vec<(Node, Branch)> = Vec::new();
|
||||
// Subtree of nodes on the path smaller than the inserted key.
|
||||
let mut left_unzip_path: Vec<Node> = Vec::new();
|
||||
// Subtree of nodes on the path larger than the inserted key.
|
||||
let mut right_unzip_path: Vec<Node> = Vec::new();
|
||||
|
||||
let mut next = self.root;
|
||||
|
||||
// Top down traversal of the binary search path.
|
||||
while let Some(current) = self.get_node(&next) {
|
||||
let should_zip = node.rank().as_bytes() > current.rank().as_bytes();
|
||||
|
||||
// Traverse left or right.
|
||||
if key < current.key() {
|
||||
next = *current.left();
|
||||
|
||||
if should_zip {
|
||||
left_unzip_path.push(current)
|
||||
} else {
|
||||
top_path.push((current, Branch::Left));
|
||||
}
|
||||
} else {
|
||||
next = *current.right();
|
||||
|
||||
if should_zip {
|
||||
right_unzip_path.push(current)
|
||||
} else {
|
||||
top_path.push((current, Branch::Right));
|
||||
}
|
||||
};
|
||||
}
|
||||
dbg!((
|
||||
"Out of the first loop",
|
||||
&top_path,
|
||||
&left_unzip_path,
|
||||
&right_unzip_path
|
||||
));
|
||||
|
||||
// === Updating hashes bottom up ===
|
||||
|
||||
// We are at the unzipping part of the path.
|
||||
//
|
||||
// First do the unzipping bottom up.
|
||||
//
|
||||
// H
|
||||
// \
|
||||
// M < current_right
|
||||
// /
|
||||
// I < current_left
|
||||
//
|
||||
// Into (hopefully you can see the "unzipping"):
|
||||
//
|
||||
// left right
|
||||
// subtree subtree
|
||||
//
|
||||
// H |
|
||||
// \ |
|
||||
// I | M
|
||||
|
||||
while left_unzip_path.len() > 1 {
|
||||
let child = left_unzip_path.pop().unwrap();
|
||||
let mut parent = left_unzip_path.last_mut().unwrap();
|
||||
|
||||
parent.set_child(&Branch::Right, Some(*child.hash()));
|
||||
parent.update(self.storage);
|
||||
}
|
||||
|
||||
while right_unzip_path.len() > 1 {
|
||||
let child = right_unzip_path.pop().unwrap();
|
||||
let mut parent = right_unzip_path.last_mut().unwrap();
|
||||
|
||||
parent.set_child(&Branch::Left, Some(*child.hash()));
|
||||
parent.update(self.storage);
|
||||
}
|
||||
|
||||
// Done unzipping, join the current_left and current_right to J and update hashes upwards.
|
||||
//
|
||||
// J < Insertion point.
|
||||
// / \
|
||||
// H M
|
||||
// \
|
||||
// I
|
||||
|
||||
node.set_child(&Branch::Left, left_unzip_path.first().map(|n| *n.hash()));
|
||||
node.set_child(&Branch::Right, left_unzip_path.first().map(|n| *n.hash()));
|
||||
node.update(self.storage);
|
||||
|
||||
// Update the rest of the path upwards with the new hashes.
|
||||
// So the final tree should look like:
|
||||
//
|
||||
// F
|
||||
// / \
|
||||
// D P
|
||||
// / / \
|
||||
// C J X
|
||||
// / / \ \
|
||||
// A H M Y
|
||||
// / \
|
||||
// G I
|
||||
|
||||
if top_path.is_empty() {
|
||||
// The insertion point is at the root and we are done.
|
||||
self.update_root(*node.hash())
|
||||
}
|
||||
|
||||
let mut previous = node;
|
||||
|
||||
while let Some((mut parent, branch)) = top_path.pop() {
|
||||
parent.set_child(&branch, Some(*previous.hash()));
|
||||
parent.update(self.storage);
|
||||
|
||||
previous = parent;
|
||||
}
|
||||
|
||||
// Update the root pointer.
|
||||
self.update_root(*previous.hash())
|
||||
|
||||
// Finally we should commit the changes to the storage.
|
||||
// TODO: commit
|
||||
}
|
||||
|
||||
// pub fn insert(&mut self, key: &[u8], value: &[u8]) {
|
||||
// let value = self.insert_blob(value);
|
||||
// let mut node = Node::new(key, value);
|
||||
//
|
||||
// // Watch this [video](https://youtu.be/NxRXhBur6Xs?si=GNwaUOfuGwr_tBKI&t=1763) for a good explanation of the unzipping algorithm.
|
||||
// // Also see the Iterative insertion algorithm in the page 12 of the [original paper](https://arxiv.org/pdf/1806.06726.pdf).
|
||||
//
|
||||
// // Let's say we have the following treap:
|
||||
// //
|
||||
// // F
|
||||
// // / \
|
||||
// // D P
|
||||
// // / / \
|
||||
// // C H X
|
||||
// // / / \ \
|
||||
// // A G M Y
|
||||
// // /
|
||||
// // I
|
||||
// //
|
||||
// // We focus on the binary search path for J, in this case [F, P, H, M, I]:
|
||||
// //
|
||||
// // F < J
|
||||
// // \
|
||||
// // J < P
|
||||
// // /
|
||||
// // H < J
|
||||
// // \
|
||||
// // J < M
|
||||
// // /
|
||||
// // I < J
|
||||
// //
|
||||
// // First we traverse until we reach the insertion point, in this case H,
|
||||
// // because J has a higher rank than H, but lower than F and P;
|
||||
//
|
||||
// let mut path: Vec<Node> = Vec::new();
|
||||
//
|
||||
// let mut current = self.root.clone();
|
||||
//
|
||||
// while let Some(curr) = current {
|
||||
// if node.rank().as_bytes() > curr.rank().as_bytes() {
|
||||
// // We reached the insertion point.
|
||||
// // rank can't be equal, as we are using a secure hashing funciton.
|
||||
// break;
|
||||
// }
|
||||
//
|
||||
// path.push(curr.clone());
|
||||
//
|
||||
// if node.key() < curr.key() {
|
||||
// current = self.get_node(curr.left());
|
||||
// } else {
|
||||
// current = self.get_node(curr.right());
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// if let Some(mut prev) = path.last_mut() {
|
||||
// let old = prev.clone();
|
||||
//
|
||||
// // TODO: pass transaction here.
|
||||
// if node.key() < prev.key() {
|
||||
// prev.set_child_hash(Child::Left, node.update_hash())
|
||||
// } else {
|
||||
// prev.set_child_hash(Child::Right, node.update_hash())
|
||||
// }
|
||||
//
|
||||
// self.storage.insert_node(&prev);
|
||||
// dbg!((old, prev));
|
||||
// } else {
|
||||
// // The insertion point is at the root node, either because the tree is empty,
|
||||
// // or because the root node has lower rank than the new node.
|
||||
//
|
||||
// self.root = Some(node.clone());
|
||||
// }
|
||||
//
|
||||
// dbg!(&path);
|
||||
//
|
||||
// // then Unzip the rest of the path:
|
||||
// //
|
||||
// // In the example above these are [H, M]
|
||||
// //
|
||||
// // F
|
||||
// // \
|
||||
// // P
|
||||
// // /
|
||||
// // J < Insertion point.
|
||||
// // / connect J to H to the left
|
||||
// // H < Unzip
|
||||
// // \\
|
||||
// // M
|
||||
// // //
|
||||
// // I
|
||||
// //
|
||||
// // if let Some(curr) = current {
|
||||
// // if node.key() < curr.key() {
|
||||
// // node.set_child_hash(Child::Right, *curr.hash())
|
||||
// // } else {
|
||||
// // node.set_child_hash(Child::Left, *curr.hash())
|
||||
// // }
|
||||
// // } else {
|
||||
// // // We reached the endo of the searhc path, and inserted a leaf node.
|
||||
// // return;
|
||||
// // }
|
||||
//
|
||||
// // The unsizipped path should look like:
|
||||
// //
|
||||
// // F
|
||||
// // \
|
||||
// // P
|
||||
// // /
|
||||
// // J
|
||||
// // // \\
|
||||
// // H M < See how that looks like unzipping? :)
|
||||
// // \\
|
||||
// // I
|
||||
// //
|
||||
//
|
||||
// // if let Some(curr) = current {
|
||||
// // // We reached the insertion (unzipping point);
|
||||
// // } else {
|
||||
// // // We reached the end of the search path, this is equivilant of
|
||||
// // // J having lower rank than I, so we insert J as a leaf node.
|
||||
// //
|
||||
// // // There has to be a node, because we already checked at the beginning
|
||||
// // // that the tree is not empty.
|
||||
// // if let Some(current_leaf) = previous {
|
||||
// // if key < current_leaf.key() {
|
||||
// // // Insert as a left child.
|
||||
// // // let old_child = self.update_child(current_leaf, Child::Left, node);
|
||||
// // } else {
|
||||
// // // Insert as a right child.
|
||||
// // let old_child = self.update_child(current_leaf, Child::Right, node);
|
||||
// // }
|
||||
// // }
|
||||
// // }
|
||||
//
|
||||
// // So the final tree should look like:
|
||||
// //
|
||||
// // F
|
||||
// // / \
|
||||
// // D P
|
||||
// // / / \
|
||||
// // C J X
|
||||
// // / / \ \
|
||||
// // A H M Y
|
||||
// // / \
|
||||
// // G I
|
||||
//
|
||||
// // Finally we should commit the changes to the storage.
|
||||
// // TODO: commit
|
||||
// }
|
||||
// === Private Methods ===
|
||||
|
||||
fn update_root(&mut self, hash: Hash) {
|
||||
// The tree is empty, the incoming node has to be the root, and we are done.
|
||||
self.root = Some(hash);
|
||||
|
||||
// TODO: we need to persist the root change too to the storage.
|
||||
}
|
||||
|
||||
// TODO: Add stream input API.
|
||||
fn insert_blob(&mut self, blob: &[u8]) -> Hash {
|
||||
@@ -223,37 +207,61 @@ impl<'a> Treap<'a> {
|
||||
hash
|
||||
}
|
||||
|
||||
// === Private Methods ===
|
||||
|
||||
pub(crate) fn get_node(&self, hash: &Option<Hash>) -> Option<Node> {
|
||||
hash.and_then(|h| self.storage.get_node(&h))
|
||||
}
|
||||
|
||||
// /// Replace a child of a node, and return the old child.
|
||||
// ///
|
||||
// /// Also decrements the ref_count of the old child,
|
||||
// /// and incrments the ref_count of the new child,
|
||||
// ///
|
||||
// /// but it dosn't flush any changes to the storage yet.
|
||||
// pub(crate) fn update_child(
|
||||
// &self,
|
||||
// node: &mut Node,
|
||||
// child: Child,
|
||||
// new_child: Node,
|
||||
// ) -> Option<Node> {
|
||||
// // Decrement old child's ref count.
|
||||
// let mut old_child = match child {
|
||||
// Child::Left => node.left(),
|
||||
// Child::Right => node.right(),
|
||||
// }
|
||||
// .and_then(|hash| self.storage.get_node(&hash));
|
||||
// old_child.as_mut().map(|n| n.decrement_ref_count());
|
||||
//
|
||||
// // Increment new child's ref count.
|
||||
// node.increment_ref_count();
|
||||
//
|
||||
// node.set_child_hash(child, node.hash().clone());
|
||||
//
|
||||
// old_child
|
||||
// }
|
||||
// === Test Methods ===
|
||||
|
||||
#[cfg(test)]
|
||||
fn verify_ranks(&self) -> bool {
|
||||
let node = self.get_node(&self.root);
|
||||
self.check_rank(node)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
fn check_rank(&self, node: Option<Node>) -> bool {
|
||||
match node {
|
||||
Some(n) => {
|
||||
let left_check = self.get_node(n.left()).map_or(true, |left| {
|
||||
n.rank().as_bytes() > left.rank().as_bytes() && self.check_rank(Some(left))
|
||||
});
|
||||
let right_check = self.get_node(n.right()).map_or(true, |right| {
|
||||
n.rank().as_bytes() > right.rank().as_bytes() && self.check_rank(Some(right))
|
||||
});
|
||||
|
||||
left_check && right_check
|
||||
}
|
||||
None => true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::HashTreap;
|
||||
use super::MemoryStorage;
|
||||
use super::Node;
|
||||
|
||||
#[test]
|
||||
fn basic() {
|
||||
let mut storage = MemoryStorage::new();
|
||||
let mut treap = HashTreap::new(&mut storage);
|
||||
|
||||
// let mut keys = ["A", "C", "D", "F", "G", "H", "M", "P", "X", "Y"];
|
||||
let mut keys = [
|
||||
"D", "N", "P", "X", "F", "Z", "Y", "A", "G", "C", "M", "H", "I", "J",
|
||||
];
|
||||
// let mut keys = ["A", "B", "C"];
|
||||
// keys.reverse();
|
||||
// keys.reverse(); // Overflowing stack! damn recursion.
|
||||
|
||||
for key in keys.iter() {
|
||||
treap.insert(key.as_bytes(), b"0");
|
||||
}
|
||||
|
||||
assert!(treap.verify_ranks());
|
||||
// dbg!(&tree);
|
||||
println!("{}", treap.as_mermaid_graph())
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user