wip: little refactor to move mermaid code out of the way

This commit is contained in:
nazeh
2023-12-16 22:04:40 +03:00
parent ed88adbf02
commit 6ff8d1b0f6
5 changed files with 255 additions and 222 deletions

View File

@@ -1,229 +1,13 @@
#![allow(unused)]
use blake3::{Hash, Hasher};
use std::cmp::{self, Ordering};
use std::collections::HashMap;
use std::mem;
use std::ops::Deref;
const EMPTY_HASH: Hash = Hash::from_bytes([0_u8; 32]);
#[derive(Debug, Clone, PartialEq)]
struct Node {
key: Box<[u8]>,
value: Hash,
rank: Hash,
left: Hash,
right: Hash,
}
impl Node {
fn new(key: &[u8], value: Hash) -> Self {
let mut hasher = Hasher::new();
hasher.update(key);
let rank = hasher.finalize();
Self {
key: key.into(),
value,
left: EMPTY_HASH,
right: EMPTY_HASH,
rank,
}
}
// TODO: memoize
fn hash(&self) -> Hash {
let mut hasher = Hasher::new();
hasher.update(&self.key);
hasher.update(self.value.as_bytes());
hasher.update(self.left.as_bytes());
hasher.update(self.right.as_bytes());
hasher.finalize()
}
fn to_bytes(&self) -> Box<[u8]> {
let mut bytes = vec![];
bytes.extend_from_slice(self.value.as_bytes());
bytes.extend_from_slice(self.left.as_bytes());
bytes.extend_from_slice(self.right.as_bytes());
bytes.extend_from_slice(&self.key);
bytes.into_boxed_slice()
}
fn from_bytes(bytes: &Box<[u8]>) -> Self {
// TODO: Make sure that bytes is long enough at least >96 bytes.
let mut node = Self::new(
&bytes[96..],
Hash::from_bytes(bytes[..32].try_into().unwrap()),
);
node.left = Hash::from_bytes(bytes[32..64].try_into().unwrap());
node.right = Hash::from_bytes(bytes[64..96].try_into().unwrap());
node
}
}
#[derive(Debug)]
pub struct Treap {
root: Hash,
storage: HashMap<Hash, Box<[u8]>>,
}
impl Treap {
pub fn new(storage: HashMap<Hash, Box<[u8]>>) -> Self {
Self {
root: EMPTY_HASH,
storage,
}
}
pub fn insert(&mut self, key: &[u8], value: &[u8]) {
let value = self.insert_blob(value);
let mut node = Node::new(key, value);
// TODO: batch inserting updated nodes.
let new_root = self.insert_impl(&mut node, self.root);
self.root = new_root.hash();
dbg!(("new root", self.root));
}
// Recursive insertion (unzipping) algorithm.
//
// Returns the new root node.
fn insert_impl(&mut self, x: &mut Node, root_hash: Hash) -> Node {
if let Some(mut root) = self.get_node(root_hash) {
if x.key < root.key {
if self.insert_impl(x, root.left).key == x.key {
if x.rank.as_bytes() < root.rank.as_bytes() {
root.left = self.store_node(x);
self.store_node(&root);
} else {
root.left = x.right;
x.right = self.store_node(&root);
self.store_node(x);
return x.clone();
}
}
} else {
dbg!("going right",);
if self.insert_impl(x, root.right).key == x.key {
if x.rank.as_bytes() < root.rank.as_bytes() {
root.right = self.store_node(x);
self.store_node(&root);
} else {
root.right = x.left;
x.right = self.store_node(&root);
self.store_node(x);
// dbg!(("after going right", &x, &root));
return x.clone();
}
}
}
self.store_node(&root);
return root;
} else {
self.store_node(x);
return x.clone();
}
}
/// Store a node after it has been modified and had a new hash.
fn store_node(&mut self, node: &Node) -> Hash {
// TODO: save the hash somewhere in the Node instead of hashing it again.
let hash = node.hash();
self.storage.insert(hash, node.to_bytes());
hash
}
// TODO: Add stream input API.
fn insert_blob(&mut self, blob: &[u8]) -> Hash {
let mut hasher = Hasher::new();
hasher.update(blob);
let hash = hasher.finalize();
self.storage.insert(hash, blob.into());
hash
}
// TODO: move to storage abstraction.
fn get_node(&self, hash: Hash) -> Option<Node> {
if hash == EMPTY_HASH {
return None;
}
self.storage.get(&hash).map(Node::from_bytes)
}
fn as_mermaid_graph(&self) -> String {
let mut graph = String::new();
graph.push_str("graph TD;\n");
if let Some(root) = self.get_node(self.root) {
self.build_graph_string(&root, &mut graph);
}
graph
}
fn build_graph_string(&self, node: &Node, graph: &mut String) {
dbg!(("building for", &node.key, &node.left, &node.right));
let key = bytes_to_string(&node.key);
let node_label = format!("{}({}:)", key, key);
graph.push_str(&format!(" {};\n", node_label));
if let Some(left) = self.get_node(node.left) {
let key = bytes_to_string(&left.key);
let left_label = format!("{}({})", key, key);
graph.push_str(&format!(" {} --> {};\n", node_label, left_label));
self.build_graph_string(&left, graph);
}
if let Some(right) = self.get_node(node.right) {
let key = bytes_to_string(&right.key);
let right_label = format!("{}({})", key, key);
graph.push_str(&format!(" {} --> {};\n", node_label, right_label));
self.build_graph_string(&right, graph);
}
}
}
impl Default for Treap {
fn default() -> Self {
Self::new(HashMap::new())
}
}
fn bytes_to_string(bytes: &[u8]) -> String {
bytes.iter().map(|&b| b.to_string()).collect()
}
mod mermaid;
mod storage;
mod treap;
#[cfg(test)]
mod test {
use super::*;
use super::mermaid;
use super::treap::Treap;
#[test]
fn basic() {
@@ -233,7 +17,7 @@ mod test {
tree.insert(&[i], b"0");
}
dbg!(tree);
dbg!(&tree);
// println!("{}", tree.as_mermaid_graph())
}
}

40
mast/src/mermaid.rs Normal file
View File

@@ -0,0 +1,40 @@
use crate::treap::{Node, Treap};
impl Treap {
pub fn as_mermaid_graph(&self) -> String {
let mut graph = String::new();
graph.push_str("graph TD;\n");
if let Some(root) = self.get_node(self.root) {
self.build_graph_string(&root, &mut graph);
}
graph
}
fn build_graph_string(&self, node: &Node, graph: &mut String) {
let key = bytes_to_string(&node.key);
let node_label = format!("{}({}:)", key, key);
if let Some(left) = self.get_node(node.left) {
let key = bytes_to_string(&left.key);
let left_label = format!("{}({})", key, key);
graph.push_str(&format!(" {} --> {};\n", node_label, left_label));
self.build_graph_string(&left, graph);
}
if let Some(right) = self.get_node(node.right) {
let key = bytes_to_string(&right.key);
let right_label = format!("{}({})", key, key);
graph.push_str(&format!(" {} --> {};\n", node_label, right_label));
self.build_graph_string(&right, graph);
}
}
}
fn bytes_to_string(bytes: &[u8]) -> String {
bytes.iter().map(|&b| b.to_string()).collect()
}

View File

@@ -0,0 +1,31 @@
use blake3::Hash;
use std::collections::HashMap;
use crate::treap::Node;
#[derive(Debug)]
pub struct MemoryStorage {
nodes: HashMap<Hash, Node>,
blobs: HashMap<Hash, Box<[u8]>>,
}
impl MemoryStorage {
pub(crate) fn new() -> Self {
Self {
nodes: HashMap::new(),
blobs: HashMap::new(),
}
}
pub(crate) fn insert_node(&mut self, node: &Node) {
self.nodes.insert(node.hash(), node.clone());
}
pub(crate) fn insert_blob(&mut self, hash: Hash, blob: &[u8]) {
self.blobs.insert(hash, blob.into());
}
pub(crate) fn get_node(&self, hash: &Hash) -> Option<&Node> {
self.nodes.get(hash)
}
}

1
mast/src/storage/mod.rs Normal file
View File

@@ -0,0 +1 @@
pub mod memory;

177
mast/src/treap.rs Normal file
View File

@@ -0,0 +1,177 @@
use blake3::{Hash, Hasher};
use std::cmp::{self, Ordering};
use std::collections::HashMap;
use std::mem;
use std::ops::Deref;
use crate::storage::memory::MemoryStorage;
const EMPTY_HASH: Hash = Hash::from_bytes([0_u8; 32]);
#[derive(Debug, Clone, PartialEq)]
pub(crate) struct Node {
pub(crate) key: Box<[u8]>,
pub(crate) value: Hash,
pub(crate) rank: Hash,
pub(crate) left: Hash,
pub(crate) right: Hash,
}
impl Node {
fn new(key: &[u8], value: Hash) -> Self {
let mut hasher = Hasher::new();
hasher.update(key);
let rank = hasher.finalize();
Self {
key: key.into(),
value,
left: EMPTY_HASH,
right: EMPTY_HASH,
rank,
}
}
/// Returns the hash of the node.
pub fn hash(&self) -> Hash {
let mut hasher = Hasher::new();
hasher.update(&self.key);
hasher.update(self.value.as_bytes());
hasher.update(self.left.as_bytes());
hasher.update(self.right.as_bytes());
hasher.finalize()
}
fn to_bytes(&self) -> Box<[u8]> {
let mut bytes = vec![];
bytes.extend_from_slice(self.value.as_bytes());
bytes.extend_from_slice(self.left.as_bytes());
bytes.extend_from_slice(self.right.as_bytes());
bytes.extend_from_slice(&self.key);
bytes.into_boxed_slice()
}
fn from_bytes(bytes: &Box<[u8]>) -> Self {
// TODO: Make sure that bytes is long enough at least >96 bytes.
let mut node = Self::new(
&bytes[96..],
Hash::from_bytes(bytes[..32].try_into().unwrap()),
);
node.left = Hash::from_bytes(bytes[32..64].try_into().unwrap());
node.right = Hash::from_bytes(bytes[64..96].try_into().unwrap());
node
}
fn set_left(&mut self, left: Hash, storage: &mut MemoryStorage) {}
}
#[derive(Debug)]
pub struct Treap {
pub(crate) root: Hash,
storage: MemoryStorage,
}
impl Treap {
pub fn new(storage: MemoryStorage) -> Self {
Self {
root: EMPTY_HASH,
storage,
}
}
pub fn insert(&mut self, key: &[u8], value: &[u8]) {
let value = self.insert_blob(value);
let mut node = Node::new(key, value);
// TODO: batch inserting updated nodes.
let new_root = self.insert_impl(&mut node, self.root);
self.root = new_root.hash();
}
// Recursive insertion (unzipping) algorithm.
//
// Returns the new root node.
fn insert_impl(&mut self, x: &mut Node, root_hash: Hash) -> Node {
if let Some(mut root) = self.get_node(root_hash) {
if x.key < root.key {
if self.insert_impl(x, root.left).key == x.key {
if x.rank.as_bytes() < root.rank.as_bytes() {
root.left = self.store_node(x);
self.store_node(&root);
} else {
root.left = x.right;
x.right = self.store_node(&root);
self.store_node(x);
return x.clone();
}
}
} else {
if self.insert_impl(x, root.right).key == x.key {
if x.rank.as_bytes() < root.rank.as_bytes() {
root.right = self.store_node(x);
self.store_node(&root);
} else {
root.right = x.left;
x.right = self.store_node(&root);
self.store_node(x);
return x.clone();
}
}
}
self.store_node(&root);
return root;
} else {
self.store_node(x);
return x.clone();
}
}
/// Store a node after it has been modified and had a new hash.
fn store_node(&mut self, node: &Node) -> Hash {
// TODO: save the hash somewhere in the Node instead of hashing it again.
let hash = node.hash();
self.storage.insert_node(node);
hash
}
// TODO: Add stream input API.
fn insert_blob(&mut self, blob: &[u8]) -> Hash {
let mut hasher = Hasher::new();
hasher.update(blob);
let hash = hasher.finalize();
self.storage.insert_blob(hash, blob.into());
hash
}
// TODO: move to storage abstraction.
pub(crate) fn get_node(&self, hash: Hash) -> Option<Node> {
self.storage.get_node(&hash).cloned()
}
}
impl Default for Treap {
fn default() -> Self {
Self::new(MemoryStorage::new())
}
}