wip: snapshot of the iterative approach with comments

This commit is contained in:
nazeh
2023-12-17 18:57:52 +03:00
parent 6ff8d1b0f6
commit 582d97d242
5 changed files with 426 additions and 173 deletions

View File

@@ -1,23 +1,37 @@
#![allow(unused)]
mod mermaid;
mod node;
mod storage;
mod treap;
pub mod treap;
pub(crate) use blake3::{Hash, Hasher};
pub(crate) use node::Node;
pub(crate) use treap::Treap;
// TODO: If we are going to use Iroh Bytes, might as well ues this from Iroh basics.
/// The hash for the empty byte range (`b""`).
pub(crate) const EMPTY_HASH: Hash = Hash::from_bytes([
175, 19, 73, 185, 245, 249, 161, 166, 160, 64, 77, 234, 54, 220, 201, 73, 155, 203, 37, 201,
173, 193, 18, 183, 204, 154, 147, 202, 228, 31, 50, 98,
]);
#[cfg(test)]
mod test {
use super::mermaid;
use super::storage::memory::MemoryStorage;
use super::treap::Treap;
#[test]
fn basic() {
let mut tree = Treap::default();
let mut storage = MemoryStorage::new();
let mut tree = Treap::new(&mut storage);
for i in 0..4 {
tree.insert(&[i], b"0");
for key in ["A", "C", "D", "F", "G", "H", "M", "P", "X", "Y"].iter() {
tree.insert(key.as_bytes(), b"0");
}
dbg!(&tree);
// println!("{}", tree.as_mermaid_graph())
println!("{}", tree.as_mermaid_graph())
}
}

View File

@@ -1,40 +1,45 @@
use crate::treap::{Node, Treap};
#[cfg(test)]
mod test {
use crate::{Node, Treap};
impl Treap {
pub fn as_mermaid_graph(&self) -> String {
let mut graph = String::new();
impl<'a> Treap<'a> {
pub fn as_mermaid_graph(&self) -> String {
let mut graph = String::new();
graph.push_str("graph TD;\n");
graph.push_str("graph TD;\n");
if let Some(root) = self.get_node(self.root) {
self.build_graph_string(&root, &mut graph);
if let Some(root) = &self.root {
self.build_graph_string(&root, &mut graph);
}
graph
}
graph
}
fn build_graph_string(&self, node: &Node, graph: &mut String) {
let key = bytes_to_string(node.key());
let node_label = format!("{}({})", key, key);
fn build_graph_string(&self, node: &Node, graph: &mut String) {
let key = bytes_to_string(&node.key);
let node_label = format!("{}({}:)", key, key);
graph.push_str(&format!(" {};\n", node_label));
if let Some(left) = self.get_node(node.left) {
let key = bytes_to_string(&left.key);
let left_label = format!("{}({})", key, key);
if let Some(child) = self.get_node(node.left()) {
let key = bytes_to_string(child.key());
let child_label = format!("{}({})", key, key);
graph.push_str(&format!(" {} --> {};\n", node_label, left_label));
self.build_graph_string(&left, graph);
}
graph.push_str(&format!(" {} --> {};\n", node_label, child_label));
self.build_graph_string(&child, graph);
}
if let Some(right) = self.get_node(node.right) {
let key = bytes_to_string(&right.key);
let right_label = format!("{}({})", key, key);
if let Some(child) = self.get_node(node.right()) {
let key = bytes_to_string(child.key());
let child_label = format!("{}({})", key, key);
graph.push_str(&format!(" {} --> {};\n", node_label, right_label));
self.build_graph_string(&right, graph);
graph.push_str(&format!(" {} --> {};\n", node_label, child_label));
self.build_graph_string(&child, graph);
}
}
}
}
fn bytes_to_string(bytes: &[u8]) -> String {
bytes.iter().map(|&b| b.to_string()).collect()
fn bytes_to_string(byte: &[u8]) -> String {
String::from_utf8(byte.to_vec()).expect("Invalid utf8 key in test with mermaig graph")
}
}

144
mast/src/node.rs Normal file
View File

@@ -0,0 +1,144 @@
use crate::storage::memory::MemoryStorage;
use crate::{Hash, Hasher, EMPTY_HASH};
// TODO: make sure that the hash is always in sync.
// TODO: keep track of ref count and sync status in the storage, without adding it to the in memory
// representation.
#[derive(Debug, Clone)]
/// In memory reprsentation of treap node.
pub(crate) struct Node {
/// The hash of this node, uniquely identifying its key, value, and children.
hash: Hash,
// Key value
key: Box<[u8]>,
value: Hash,
// Rank
rank: Hash,
// Children
left: Option<Hash>,
right: Option<Hash>,
}
pub(crate) enum Child {
Left,
Right,
}
impl Node {
// TODO: Convert to Result, since it shouldn't be missing!
pub(crate) fn open(storage: &MemoryStorage, hash: Hash) -> Option<Self> {
storage.get_node(&hash)
}
pub fn new(key: &[u8], value: Hash) -> Self {
let mut hasher = Hasher::new();
hasher.update(key);
let rank = hasher.finalize();
let mut node = Self {
hash: EMPTY_HASH,
key: key.into(),
value,
left: None,
right: None,
rank,
};
node.update_hash();
node
}
// === Getters ===
pub(crate) fn key(&self) -> &[u8] {
&self.key
}
pub(crate) fn value(&self) -> &Hash {
&self.value
}
pub(crate) fn rank(&self) -> &Hash {
&self.rank
}
/// Returns the hash of the node.
pub(crate) fn hash(&self) -> &Hash {
&self.hash
}
pub(crate) fn left(&self) -> &Option<Hash> {
&self.left
}
pub(crate) fn right(&self) -> &Option<Hash> {
&self.right
}
// === Private Methods ===
pub(crate) fn update_hash(&mut self) -> Hash {
let mut hasher = Hasher::new();
hasher.update(&self.key);
hasher.update(self.value.as_bytes());
hasher.update(self.left.unwrap_or(EMPTY_HASH).as_bytes());
hasher.update(self.right.unwrap_or(EMPTY_HASH).as_bytes());
self.hash = hasher.finalize();
self.hash
}
// /// Replace a child of this node, and return the old child.
// ///
// /// This method decrements the ref count of the old child,
// /// and incrments the ref count of the new child,
// ///
// /// but it dosn't flush any changes to the storage.
// pub(crate) fn set_child(
// &mut self,
// node: &mut Option<Node>,
// child: Child,
// storage: &MemoryStorage,
// ) -> Option<Node> {
// // Decrement old child's ref count.
// let mut old_child = match child {
// Child::Left => self.left,
// Child::Right => self.right,
// }
// .and_then(|hash| storage.get_node(&hash));
// old_child.as_mut().map(|n| n.decrement_ref_count());
//
// // Increment new child's ref count.
// node.as_mut().map(|n| n.increment_ref_count());
//
// // swap children
// match child {
// Child::Left => self.left = node.as_mut().map(|n| n.update_hash()),
// Child::Right => self.right = node.as_mut().map(|n| n.update_hash()),
// }
//
// // Update this node's hash.
// self.update_hash();
//
// old_child
// }
pub(crate) fn set_child_hash(&mut self, child: Child, hash: Hash) {
// Swap the child.
match child {
Child::Left => self.left = Some(hash),
Child::Right => self.right = Some(hash),
}
// Update this node's hash, after updating the child.
self.update_hash();
}
}

View File

@@ -1,10 +1,11 @@
use blake3::Hash;
use std::collections::HashMap;
use crate::treap::Node;
use crate::Node;
#[derive(Debug)]
pub struct MemoryStorage {
roots: HashMap<Box<[u8]>, Node>,
nodes: HashMap<Hash, Node>,
blobs: HashMap<Hash, Box<[u8]>>,
}
@@ -12,20 +13,27 @@ pub struct MemoryStorage {
impl MemoryStorage {
pub(crate) fn new() -> Self {
Self {
roots: HashMap::new(),
nodes: HashMap::new(),
blobs: HashMap::new(),
}
}
// TODO: return result or something.
pub(crate) fn insert_root(&mut self, name: &[u8], node: Node) {
self.roots.insert(name.into(), node);
}
pub(crate) fn insert_node(&mut self, node: &Node) {
self.nodes.insert(node.hash(), node.clone());
self.nodes.insert(*node.hash(), node.clone());
}
pub(crate) fn insert_blob(&mut self, hash: Hash, blob: &[u8]) {
self.blobs.insert(hash, blob.into());
}
pub(crate) fn get_node(&self, hash: &Hash) -> Option<&Node> {
self.nodes.get(hash)
pub(crate) fn get_node(&self, hash: &Hash) -> Option<Node> {
self.nodes.get(hash).cloned()
}
}

View File

@@ -1,89 +1,49 @@
use blake3::{Hash, Hasher};
use std::cmp::{self, Ordering};
use std::collections::HashMap;
use std::mem;
use std::ops::Deref;
use crate::node::Child;
use crate::storage::memory::MemoryStorage;
use crate::Node;
const EMPTY_HASH: Hash = Hash::from_bytes([0_u8; 32]);
#[derive(Debug, Clone, PartialEq)]
pub(crate) struct Node {
pub(crate) key: Box<[u8]>,
pub(crate) value: Hash,
pub(crate) rank: Hash,
pub(crate) left: Hash,
pub(crate) right: Hash,
#[derive(Debug)]
pub struct Treap<'a> {
pub(crate) storage: &'a mut MemoryStorage,
pub(crate) root: Option<Node>,
}
impl Node {
fn new(key: &[u8], value: Hash) -> Self {
let mut hasher = Hasher::new();
hasher.update(key);
// TODO: pass a transaction.
fn insert(
node: &mut Node,
root: Option<Hash>,
storage: MemoryStorage,
changed: &mut Vec<Node>,
) -> Node {
let root = root.and_then(|hash| storage.get_node(&hash));
let rank = hasher.finalize();
if root.is_none() {
return node.clone();
}
Self {
key: key.into(),
value,
left: EMPTY_HASH,
right: EMPTY_HASH,
rank,
let mut root = root.unwrap();
if node.key() < root.key() {
if insert(node, *root.left(), storage, changed).key() == node.key() {
if node.rank().as_bytes() < root.rank().as_bytes() {
root.set_child_hash(Child::Left, *node.hash())
} else {
// root.set_child_hash(Child::Left, *node.right());
node.set_child_hash(Child::Right, *root.hash());
}
}
}
/// Returns the hash of the node.
pub fn hash(&self) -> Hash {
let mut hasher = Hasher::new();
hasher.update(&self.key);
hasher.update(self.value.as_bytes());
hasher.update(self.left.as_bytes());
hasher.update(self.right.as_bytes());
hasher.finalize()
}
fn to_bytes(&self) -> Box<[u8]> {
let mut bytes = vec![];
bytes.extend_from_slice(self.value.as_bytes());
bytes.extend_from_slice(self.left.as_bytes());
bytes.extend_from_slice(self.right.as_bytes());
bytes.extend_from_slice(&self.key);
bytes.into_boxed_slice()
}
fn from_bytes(bytes: &Box<[u8]>) -> Self {
// TODO: Make sure that bytes is long enough at least >96 bytes.
let mut node = Self::new(
&bytes[96..],
Hash::from_bytes(bytes[..32].try_into().unwrap()),
);
node.left = Hash::from_bytes(bytes[32..64].try_into().unwrap());
node.right = Hash::from_bytes(bytes[64..96].try_into().unwrap());
node
}
fn set_left(&mut self, left: Hash, storage: &mut MemoryStorage) {}
return root;
}
#[derive(Debug)]
pub struct Treap {
pub(crate) root: Hash,
storage: MemoryStorage,
}
impl Treap {
pub fn new(storage: MemoryStorage) -> Self {
impl<'a> Treap<'a> {
// TODO: add name to open from storage with.
pub fn new(storage: &'a mut MemoryStorage) -> Self {
Self {
root: EMPTY_HASH,
root: None,
storage,
}
}
@@ -92,66 +52,165 @@ impl Treap {
let value = self.insert_blob(value);
let mut node = Node::new(key, value);
// TODO: batch inserting updated nodes.
let mut changed: Vec<Node> = vec![];
let new_root = self.insert_impl(&mut node, self.root);
self.root = new_root.hash();
insert(
&mut node,
Some(self.root.hash()),
self.storage,
&mut changed,
)
}
// Recursive insertion (unzipping) algorithm.
// pub fn insert(&mut self, key: &[u8], value: &[u8]) {
// let value = self.insert_blob(value);
// let mut node = Node::new(key, value);
//
// Returns the new root node.
fn insert_impl(&mut self, x: &mut Node, root_hash: Hash) -> Node {
if let Some(mut root) = self.get_node(root_hash) {
if x.key < root.key {
if self.insert_impl(x, root.left).key == x.key {
if x.rank.as_bytes() < root.rank.as_bytes() {
root.left = self.store_node(x);
self.store_node(&root);
} else {
root.left = x.right;
x.right = self.store_node(&root);
self.store_node(x);
return x.clone();
}
}
} else {
if self.insert_impl(x, root.right).key == x.key {
if x.rank.as_bytes() < root.rank.as_bytes() {
root.right = self.store_node(x);
self.store_node(&root);
} else {
root.right = x.left;
x.right = self.store_node(&root);
self.store_node(x);
return x.clone();
}
}
}
self.store_node(&root);
return root;
} else {
self.store_node(x);
return x.clone();
}
}
/// Store a node after it has been modified and had a new hash.
fn store_node(&mut self, node: &Node) -> Hash {
// TODO: save the hash somewhere in the Node instead of hashing it again.
let hash = node.hash();
self.storage.insert_node(node);
hash
}
// // Watch this [video](https://youtu.be/NxRXhBur6Xs?si=GNwaUOfuGwr_tBKI&t=1763) for a good explanation of the unzipping algorithm.
// // Also see the Iterative insertion algorithm in the page 12 of the [original paper](https://arxiv.org/pdf/1806.06726.pdf).
//
// // Let's say we have the following treap:
// //
// // F
// // / \
// // D P
// // / / \
// // C H X
// // / / \ \
// // A G M Y
// // /
// // I
// //
// // We focus on the binary search path for J, in this case [F, P, H, M, I]:
// //
// // F < J
// // \
// // J < P
// // /
// // H < J
// // \
// // J < M
// // /
// // I < J
// //
// // First we traverse until we reach the insertion point, in this case H,
// // because J has a higher rank than H, but lower than F and P;
//
// let mut path: Vec<Node> = Vec::new();
//
// let mut current = self.root.clone();
//
// while let Some(curr) = current {
// if node.rank().as_bytes() > curr.rank().as_bytes() {
// // We reached the insertion point.
// // rank can't be equal, as we are using a secure hashing funciton.
// break;
// }
//
// path.push(curr.clone());
//
// if node.key() < curr.key() {
// current = self.get_node(curr.left());
// } else {
// current = self.get_node(curr.right());
// }
// }
//
// if let Some(mut prev) = path.last_mut() {
// let old = prev.clone();
//
// // TODO: pass transaction here.
// if node.key() < prev.key() {
// prev.set_child_hash(Child::Left, node.update_hash())
// } else {
// prev.set_child_hash(Child::Right, node.update_hash())
// }
//
// self.storage.insert_node(&prev);
// dbg!((old, prev));
// } else {
// // The insertion point is at the root node, either because the tree is empty,
// // or because the root node has lower rank than the new node.
//
// self.root = Some(node.clone());
// }
//
// dbg!(&path);
//
// // then Unzip the rest of the path:
// //
// // In the example above these are [H, M]
// //
// // F
// // \
// // P
// // /
// // J < Insertion point.
// // / connect J to H to the left
// // H < Unzip
// // \\
// // M
// // //
// // I
// //
// // if let Some(curr) = current {
// // if node.key() < curr.key() {
// // node.set_child_hash(Child::Right, *curr.hash())
// // } else {
// // node.set_child_hash(Child::Left, *curr.hash())
// // }
// // } else {
// // // We reached the endo of the searhc path, and inserted a leaf node.
// // return;
// // }
//
// // The unsizipped path should look like:
// //
// // F
// // \
// // P
// // /
// // J
// // // \\
// // H M < See how that looks like unzipping? :)
// // \\
// // I
// //
//
// // if let Some(curr) = current {
// // // We reached the insertion (unzipping point);
// // } else {
// // // We reached the end of the search path, this is equivilant of
// // // J having lower rank than I, so we insert J as a leaf node.
// //
// // // There has to be a node, because we already checked at the beginning
// // // that the tree is not empty.
// // if let Some(current_leaf) = previous {
// // if key < current_leaf.key() {
// // // Insert as a left child.
// // // let old_child = self.update_child(current_leaf, Child::Left, node);
// // } else {
// // // Insert as a right child.
// // let old_child = self.update_child(current_leaf, Child::Right, node);
// // }
// // }
// // }
//
// // So the final tree should look like:
// //
// // F
// // / \
// // D P
// // / / \
// // C J X
// // / / \ \
// // A H M Y
// // / \
// // G I
//
// // Finally we should commit the changes to the storage.
// // TODO: commit
// }
// TODO: Add stream input API.
fn insert_blob(&mut self, blob: &[u8]) -> Hash {
@@ -159,19 +218,42 @@ impl Treap {
hasher.update(blob);
let hash = hasher.finalize();
self.storage.insert_blob(hash, blob.into());
self.storage.insert_blob(hash, blob);
hash
}
// TODO: move to storage abstraction.
pub(crate) fn get_node(&self, hash: Hash) -> Option<Node> {
self.storage.get_node(&hash).cloned()
}
}
// === Private Methods ===
impl Default for Treap {
fn default() -> Self {
Self::new(MemoryStorage::new())
pub(crate) fn get_node(&self, hash: &Option<Hash>) -> Option<Node> {
hash.and_then(|h| self.storage.get_node(&h))
}
// /// Replace a child of a node, and return the old child.
// ///
// /// Also decrements the ref_count of the old child,
// /// and incrments the ref_count of the new child,
// ///
// /// but it dosn't flush any changes to the storage yet.
// pub(crate) fn update_child(
// &self,
// node: &mut Node,
// child: Child,
// new_child: Node,
// ) -> Option<Node> {
// // Decrement old child's ref count.
// let mut old_child = match child {
// Child::Left => node.left(),
// Child::Right => node.right(),
// }
// .and_then(|hash| self.storage.get_node(&hash));
// old_child.as_mut().map(|n| n.decrement_ref_count());
//
// // Increment new child's ref count.
// node.increment_ref_count();
//
// node.set_child_hash(child, node.hash().clone());
//
// old_child
// }
}