diff --git a/mast/src/lib.rs b/mast/src/lib.rs index dbfb5f1..995d8eb 100644 --- a/mast/src/lib.rs +++ b/mast/src/lib.rs @@ -1,10 +1,12 @@ #![allow(unused)] -mod mermaid; mod node; mod operations; pub mod treap; +#[cfg(test)] +mod test; + pub(crate) use blake3::{Hash, Hasher}; pub const HASH_LEN: usize = 32; diff --git a/mast/src/mermaid.rs b/mast/src/mermaid.rs deleted file mode 100644 index 54877df..0000000 --- a/mast/src/mermaid.rs +++ /dev/null @@ -1,55 +0,0 @@ -#[cfg(test)] -mod test { - use crate::node::Node; - use crate::treap::HashTreap; - - impl<'treap> HashTreap<'treap> { - pub fn as_mermaid_graph(&self) -> String { - let mut graph = String::new(); - - graph.push_str("graph TD;\n"); - - if let Some(root) = self.root() { - self.build_graph_string(&root, &mut graph); - } - - graph.push_str(&format!( - " classDef null fill:#1111,stroke-width:1px,color:#fff,stroke-dasharray: 5 5;\n" - )); - - graph - } - - fn build_graph_string(&self, node: &Node, graph: &mut String) { - let key = bytes_to_string(node.key()); - let node_label = format!("{}(({}))", node.hash(), key); - - // graph.push_str(&format!("## START node {}\n", node_label)); - if let Some(child) = self.get_node(node.left()) { - let key = bytes_to_string(child.key()); - let child_label = format!("{}(({}))", child.hash(), key); - - graph.push_str(&format!(" {} --l--> {};\n", node_label, child_label)); - self.build_graph_string(&child, graph); - } else { - graph.push_str(&format!(" {} -.-> {}l((l));\n", node_label, node.hash())); - graph.push_str(&format!(" class {}l null;\n", node.hash())); - } - - if let Some(child) = self.get_node(node.right()) { - let key = bytes_to_string(child.key()); - let child_label = format!("{}(({}))", child.hash(), key); - - graph.push_str(&format!(" {} --r--> {};\n", node_label, child_label)); - self.build_graph_string(&child, graph); - } else { - graph.push_str(&format!(" {} -.-> {}r((r));\n", node_label, node.hash())); - graph.push_str(&format!(" class {}r null;\n", node.hash())); - } - } - } - - fn bytes_to_string(byte: &[u8]) -> String { - String::from_utf8(byte.to_vec()).expect("Invalid utf8 key in test with mermaig graph") - } -} diff --git a/mast/src/node.rs b/mast/src/node.rs index 329be72..55947e1 100644 --- a/mast/src/node.rs +++ b/mast/src/node.rs @@ -9,9 +9,9 @@ use crate::{Hash, Hasher, HASH_LEN}; // TODO: remove unused // TODO: remove unwrap -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq)] /// In memory reprsentation of treap node. -pub(crate) struct Node { +pub struct Node { // Key value key: Box<[u8]>, value: Box<[u8]>, @@ -101,39 +101,39 @@ impl Node { // === Getters === - pub(crate) fn key(&self) -> &[u8] { + pub fn key(&self) -> &[u8] { &self.key } - pub(crate) fn value(&self) -> &[u8] { + pub fn value(&self) -> &[u8] { &self.value } - pub(crate) fn left(&self) -> &Option { + pub fn left(&self) -> &Option { &self.left } - pub(crate) fn right(&self) -> &Option { + pub fn right(&self) -> &Option { &self.right } // === Public Methods === - pub(crate) fn rank(&self) -> Hash { + pub fn rank(&self) -> Hash { hash(&self.key) } /// Returns the hash of the node. - pub(crate) fn hash(&self) -> Hash { + pub fn hash(&self) -> Hash { hash(&self.canonical_encode()) } - pub(crate) fn decrement_ref_count(&self, table: &mut Table<&[u8], (u64, &[u8])>) { + // === Private Methods === + + pub fn decrement_ref_count(&self, table: &mut Table<&[u8], (u64, &[u8])>) { self.update_ref_count(table, RefCountDiff::Decrement) } - // === Private Methods === - fn set_child( &mut self, table: &mut Table<&[u8], (u64, &[u8])>, diff --git a/mast/src/operations/insert.rs b/mast/src/operations/insert.rs index abd98af..d2f8e2a 100644 --- a/mast/src/operations/insert.rs +++ b/mast/src/operations/insert.rs @@ -116,10 +116,16 @@ pub fn insert( struct BinarySearchPath { upper_path: Vec<(Node, Branch)>, - exact_match: Option, + existing: Option, unzip_path: Vec<(Node, Branch)>, } +/// Returns the binary search path for a given key in the following form: +/// - `upper_path` is the path with nodes with rank higher than the rank of the key. +/// - `match` is the node with the exact same key (if any). +/// - `lower_path` is the path with nodes with rank lesss than the rank of the key. +/// +/// If a match was found, the `lower_path` will be empty. fn binary_search_path( table: &'_ mut Table<&'static [u8], (u64, &'static [u8])>, root: Option, @@ -129,7 +135,7 @@ fn binary_search_path( let mut result = BinarySearchPath { upper_path: Default::default(), - exact_match: None, + existing: None, unzip_path: Default::default(), }; diff --git a/mast/src/test.rs b/mast/src/test.rs new file mode 100644 index 0000000..8cce6e7 --- /dev/null +++ b/mast/src/test.rs @@ -0,0 +1,306 @@ +use crate::node::Node; +use crate::treap::{HashTreap, NODES_TABLE}; +use crate::Hash; + +use redb::backends::InMemoryBackend; +use redb::{Database, Error, ReadableTable, TableDefinition}; + +#[test] +fn cases() { + let sorted_alphabets = [ + "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", + "S", "T", "U", "V", "W", "X", "Y", "Z", + ] + .map(|key| Entry { + key: key.as_bytes().to_vec(), + value: [b"v", key.as_bytes()].concat(), + }); + + let mut reverse_alphabets = sorted_alphabets.clone(); + reverse_alphabets.reverse(); + + let unsorted = ["D", "N", "P", "X", "A", "G", "C", "M", "H", "I", "J"].map(|key| Entry { + key: key.as_bytes().to_vec(), + value: [b"v", key.as_bytes()].concat(), + }); + + let single_entry = ["X"].map(|key| Entry { + key: key.as_bytes().to_vec(), + value: [b"v", key.as_bytes()].concat(), + }); + + let upsert_at_root = [ + ( + Entry { + key: b"X".to_vec(), + value: b"A".to_vec(), + }, + Operation::Insert, + ), + (( + Entry { + key: b"X".to_vec(), + value: b"B".to_vec(), + }, + Operation::Insert, + )), + ]; + + let upsert_deeper = [ + ( + Entry { + key: b"F".to_vec(), + value: b"A".to_vec(), + }, + Operation::Insert, + ), + ( + Entry { + key: b"X".to_vec(), + value: b"A".to_vec(), + }, + Operation::Insert, + ), + (( + Entry { + key: b"X".to_vec(), + value: b"B".to_vec(), + }, + Operation::Insert, + )), + ]; + + let cases = [ + ( + "sorted alphabets", + sorted_alphabets + .clone() + .map(|e| (e, Operation::Insert)) + .to_vec(), + sorted_alphabets.to_vec(), + Some("02af3de6ed6368c5abc16f231a17d1140e7bfec483c8d0aa63af4ef744d29bc3"), + ), + ( + "reversed alphabets", + sorted_alphabets + .clone() + .map(|e| (e, Operation::Insert)) + .to_vec(), + sorted_alphabets.to_vec(), + Some("02af3de6ed6368c5abc16f231a17d1140e7bfec483c8d0aa63af4ef744d29bc3"), + ), + ( + "unsorted alphabets", + unsorted.clone().map(|e| (e, Operation::Insert)).to_vec(), + unsorted.to_vec(), + Some("0957cc9b87c11cef6d88a95328cfd9043a3d6a99e9ba35ee5c9c47e53fb6d42b"), + ), + ( + "Single insert", + single_entry + .clone() + .map(|e| (e, Operation::Insert)) + .to_vec(), + single_entry.to_vec(), + Some("b3e862d316e6f5caca72c8f91b7a15015b4f7f8f970c2731433aad793f7fe3e6"), + ), + ( + "upsert at root", + upsert_at_root.to_vec(), + upsert_at_root[1..] + .iter() + .map(|(e, _)| e.clone()) + .collect::>(), + Some("2947139081bbcc3816ebd73cb81ac0be5c564df55b88d6dbeb52c5254c1de887"), + ), + ( + "upsert deeper", + upsert_deeper.to_vec(), + upsert_at_root[0..2] + .iter() + .map(|(e, _)| e.clone()) + .collect::>(), + // Some("2947139081bbcc3816ebd73cb81ac0be5c564df55b88d6dbeb52c5254c1de887"), + None, + ), + ]; + + for case in cases { + test(case.0, &case.1, &case.2, case.3); + } +} + +// === Helpers === + +#[derive(Clone)] +enum Operation { + Insert, + Delete, +} + +#[derive(Clone, PartialEq)] +struct Entry { + key: Vec, + value: Vec, +} + +impl std::fmt::Debug for Entry { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "({:?}, {:?})", self.key, self.value) + } +} + +fn test(name: &str, input: &[(Entry, Operation)], output: &[Entry], root_hash: Option<&str>) { + let inmemory = InMemoryBackend::new(); + let db = Database::builder() + .create_with_backend(inmemory) + .expect("Failed to create DB"); + + let mut treap = HashTreap::new(&db, "test"); + + for (entry, operation) in input { + match operation { + Operation::Insert => treap.insert(&entry.key, &entry.value), + Operation::Delete => todo!(), + } + } + + let collected = treap + .iter() + .map(|n| Entry { + key: n.key().to_vec(), + value: n.value().to_vec(), + }) + .collect::>(); + + let mut sorted = output.to_vec(); + sorted.sort_by(|a, b| a.key.cmp(&b.key)); + + // dbg!(&treap.root_hash()); + println!("{}", into_mermaid_graph(&treap)); + + if root_hash.is_some() { + assert_root(&treap, root_hash.unwrap()); + } + + assert_eq!( + collected, + sorted, + "{}", + format!("Entries do not match at: \"{}\"", name) + ); +} + +/// Verify ranks, and keys order +fn verify(treap: &HashTreap, entries: &[(&[u8], Vec)]) { + verify_ranks(treap); + verify_entries( + treap, + entries + .iter() + .map(|(k, v)| (k.to_vec(), v.to_vec())) + .collect::>(), + ); +} + +/// Verify that every node has higher rank than its children. +fn verify_ranks(treap: &HashTreap) { + assert!( + verify_children_rank(treap, treap.root()), + "Ranks are not sorted correctly" + ) +} + +fn verify_children_rank(treap: &HashTreap, node: Option) -> bool { + match node { + Some(n) => { + let left_check = treap.get_node(n.left()).map_or(true, |left| { + n.rank().as_bytes() > left.rank().as_bytes() + && verify_children_rank(treap, Some(left)) + }); + let right_check = treap.get_node(n.right()).map_or(true, |right| { + n.rank().as_bytes() > right.rank().as_bytes() + && verify_children_rank(treap, Some(right)) + }); + + left_check && right_check + } + None => true, + } +} + +/// Verify that the expected entries are both sorted and present in the treap. +fn verify_entries(treap: &HashTreap, entries: Vec<(Vec, Vec)>) { + let collected = treap + .iter() + .map(|n| (n.key().to_vec(), n.value().to_vec())) + .collect::>(); + + let mut sorted = entries.iter().cloned().collect::>(); + sorted.sort_by(|a, b| a.0.cmp(&b.0)); + + assert_eq!(collected, sorted, "Entries do not match"); +} + +fn assert_root(treap: &HashTreap, expected_root_hash: &str) { + let root_hash = treap + .root() + .map(|n| n.hash()) + .expect("Has root hash after insertion"); + + assert_eq!( + root_hash, + Hash::from_hex(expected_root_hash).expect("Invalid hash hex"), + "Root hash is not correct" + ) +} + +// === Visualize the treap to verify the structure === + +fn into_mermaid_graph(treap: &HashTreap) -> String { + let mut graph = String::new(); + + graph.push_str("graph TD;\n"); + + if let Some(root) = treap.root() { + build_graph_string(&treap, &root, &mut graph); + } + + graph.push_str(&format!( + " classDef null fill:#1111,stroke-width:1px,color:#fff,stroke-dasharray: 5 5;\n" + )); + + graph +} + +fn build_graph_string(treap: &HashTreap, node: &Node, graph: &mut String) { + let key = bytes_to_string(node.key()); + let node_label = format!("{}(({}))", node.hash(), key); + + // graph.push_str(&format!("## START node {}\n", node_label)); + if let Some(child) = treap.get_node(node.left()) { + let key = bytes_to_string(child.key()); + let child_label = format!("{}(({}))", child.hash(), key); + + graph.push_str(&format!(" {} --l--> {};\n", node_label, child_label)); + build_graph_string(&treap, &child, graph); + } else { + graph.push_str(&format!(" {} -.-> {}l((l));\n", node_label, node.hash())); + graph.push_str(&format!(" class {}l null;\n", node.hash())); + } + + if let Some(child) = treap.get_node(node.right()) { + let key = bytes_to_string(child.key()); + let child_label = format!("{}(({}))", child.hash(), key); + + graph.push_str(&format!(" {} --r--> {};\n", node_label, child_label)); + build_graph_string(&treap, &child, graph); + } else { + graph.push_str(&format!(" {} -.-> {}r((r));\n", node_label, node.hash())); + graph.push_str(&format!(" class {}r null;\n", node.hash())); + } +} + +fn bytes_to_string(byte: &[u8]) -> String { + String::from_utf8(byte.to_vec()).expect("Invalid utf8 key in test with mermaig graph") +} diff --git a/mast/src/treap.rs b/mast/src/treap.rs index e2a2dfc..4e2ef0e 100644 --- a/mast/src/treap.rs +++ b/mast/src/treap.rs @@ -46,28 +46,12 @@ impl<'treap> HashTreap<'treap> { // === Getters === - pub(crate) fn root(&self) -> Option { + /// Returns the root hash of the treap. + pub fn root_hash(&self) -> Option { let read_txn = self.db.begin_read().unwrap(); + let table = read_txn.open_table(ROOTS_TABLE).unwrap(); - let roots_table = read_txn.open_table(ROOTS_TABLE).unwrap(); - let nodes_table = read_txn.open_table(NODES_TABLE).unwrap(); - - self.root_hash(&roots_table) - .and_then(|hash| Node::open(&nodes_table, hash)) - } - - fn root_hash( - &self, - table: &'_ impl ReadableTable<&'static [u8], &'static [u8]>, - ) -> Option { - let existing = table.get(self.name.as_bytes()).unwrap(); - existing.as_ref()?; - - let hash = existing.unwrap(); - - let hash: [u8; HASH_LEN] = hash.value().try_into().expect("Invalid root hash"); - - Some(Hash::from_bytes(hash)) + self.root_hash_inner(&table) } // === Public Methods === @@ -81,7 +65,7 @@ impl<'treap> HashTreap<'treap> { let mut roots_table = write_txn.open_table(ROOTS_TABLE).unwrap(); let mut nodes_table = write_txn.open_table(NODES_TABLE).unwrap(); - let root = self.root_hash(&roots_table); + let root = self.root_hash_inner(&roots_table); let new_root = crate::operations::insert::insert(&mut nodes_table, root, key, value); @@ -92,159 +76,85 @@ impl<'treap> HashTreap<'treap> { write_txn.commit().unwrap(); } + pub fn iter(&self) -> TreapIterator<'_> { + TreapIterator::new(self) + } + // === Private Methods === - // === Test Methods === + pub(crate) fn root(&self) -> Option { + let read_txn = self.db.begin_read().unwrap(); - // TODO: move tests and test helper methods to separate module. - // Only keep the public methods here, and probably move it to lib.rs too. + let roots_table = read_txn.open_table(ROOTS_TABLE).unwrap(); + let nodes_table = read_txn.open_table(NODES_TABLE).unwrap(); + + self.root_hash_inner(&roots_table) + .and_then(|hash| Node::open(&nodes_table, hash)) + } + + fn root_hash_inner( + &self, + table: &'_ impl ReadableTable<&'static [u8], &'static [u8]>, + ) -> Option { + let existing = table.get(self.name.as_bytes()).unwrap(); + existing.as_ref()?; + + let hash = existing.unwrap(); + + let hash: [u8; HASH_LEN] = hash.value().try_into().expect("Invalid root hash"); + + Some(Hash::from_bytes(hash)) + } - /// Create a read transaction and get a node from the nodes table. - #[cfg(test)] pub(crate) fn get_node(&self, hash: &Option) -> Option { let read_txn = self.db.begin_read().unwrap(); let table = read_txn.open_table(NODES_TABLE).unwrap(); hash.and_then(|h| Node::open(&table, h)) } +} - #[cfg(test)] - fn verify_ranks(&self) -> bool { - self.check_rank(self.root()) +pub struct TreapIterator<'treap> { + treap: &'treap HashTreap<'treap>, + stack: Vec, +} + +impl<'a> TreapIterator<'a> { + fn new(treap: &'a HashTreap<'a>) -> Self { + let mut iter = TreapIterator { + treap, + stack: Vec::new(), + }; + + if let Some(root) = treap.root() { + iter.push_left(root) + }; + + iter } - #[cfg(test)] - fn check_rank(&self, node: Option) -> bool { - match node { - Some(n) => { - let left_check = self.get_node(n.left()).map_or(true, |left| { - n.rank().as_bytes() > left.rank().as_bytes() && self.check_rank(Some(left)) - }); - let right_check = self.get_node(n.right()).map_or(true, |right| { - n.rank().as_bytes() > right.rank().as_bytes() && self.check_rank(Some(right)) - }); - - left_check && right_check - } - None => true, - } - } - - #[cfg(test)] - fn list_all_nodes(&self) { - // TODO: return all the nodes to verify GC in the test, or verify it here. - let read_txn = self.db.begin_read().unwrap(); - let nodes_table = read_txn.open_table(NODES_TABLE).unwrap(); - - let mut iter = nodes_table.iter().unwrap(); - - while let Some(existing) = iter.next() { - let key; - let data; - let existing = existing.unwrap(); - { - key = existing.0.value(); - data = existing.1.value(); - } - - // TODO: iterate over nodes - // println!( - // "HEre is a node key:{:?} ref_count:{:?} node:{:?}", - // Hash::from_bytes(key.try_into().unwrap()), - // data.0, - // Node::open(data) - // ); + fn push_left(&mut self, mut node: Node) { + while let Some(left) = self.treap.get_node(node.left()) { + self.stack.push(node); + node = left; } + self.stack.push(node); } } -#[cfg(test)] -mod test { - use super::HashTreap; - use super::Node; +impl<'a> Iterator for TreapIterator<'a> { + type Item = Node; - use redb::backends::InMemoryBackend; - use redb::{Database, Error, ReadableTable, TableDefinition}; + fn next(&mut self) -> Option { + match self.stack.pop() { + Some(node) => { + if let Some(right) = self.treap.get_node(node.right()) { + self.push_left(right) + } - // TODO: write a good test for GC. - - #[test] - fn sorted_insert() { - let file = tempfile::NamedTempFile::new().unwrap(); - let db = Database::create(file.path()).unwrap(); - - let mut treap = HashTreap::new(&db, "test"); - - let mut keys = [ - "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", - "R", "S", "T", "U", "V", "W", "X", "Y", "Z", - ]; - - for key in keys.iter() { - treap.insert(key.as_bytes(), b"0"); + Some(node.clone()) + } + _ => None, } - - assert!(treap.verify_ranks()); - println!("{}", treap.as_mermaid_graph()) - } - - #[test] - fn unsorted_insert() { - let file = tempfile::NamedTempFile::new().unwrap(); - let db = Database::create(file.path()).unwrap(); - - let mut treap = HashTreap::new(&db, "test"); - - let mut keys = ["D", "N", "P", "X", "A", "G", "C", "M", "H", "I", "J"]; - - for key in keys.iter() { - treap.insert(key.as_bytes(), b"0"); - } - - assert!(treap.verify_ranks(), "Ranks are not correct"); - - treap.list_all_nodes(); - - println!("{}", treap.as_mermaid_graph()) - } - - #[test] - fn upsert() { - let file = tempfile::NamedTempFile::new().unwrap(); - let db = Database::create(file.path()).unwrap(); - - let mut treap = HashTreap::new(&db, "test"); - - let mut keys = ["X", "X"]; - - for key in keys.iter() { - treap.insert(key.as_bytes(), b"0"); - } - - assert!(treap.verify_ranks(), "Ranks are not correct"); - - // TODO: check the value. - - println!("{}", treap.as_mermaid_graph()) - } - - #[test] - fn upsert_deeper_than_root() { - let file = tempfile::NamedTempFile::new().unwrap(); - let db = Database::create(file.path()).unwrap(); - - let mut treap = HashTreap::new(&db, "test"); - - let keys = ["F", "X", "X"]; - - for key in keys.iter() { - treap.insert(key.as_bytes(), b"0"); - } - - assert!(treap.verify_ranks(), "Ranks are not correct"); - - // TODO: check the value. - - println!("{}", treap.as_mermaid_graph()) } }