wip: basic cases of remove

This commit is contained in:
nazeh
2023-12-23 22:28:36 +03:00
parent 2f3f6bfc62
commit d86952df16
7 changed files with 487 additions and 222 deletions

View File

@@ -0,0 +1,10 @@
# Seeds for failure cases proptest has generated in the past. It is
# automatically read and these particular cases re-run before any
# novel cases are generated.
#
# It is recommended to check this file in to source control so that
# everyone who runs the test benefits from these saved cases.
cc 491e92aa5f00ba54a3a98d00351fe58ef2b3943a701f80c942349e3b5bb5baca # shrinks to random_entries = [([142], [0], 216), ([142], [0], 29)]
cc 7b8908fd3cf99c905ebf09706be69e33babea514bfc4670b27800e667ed38bbe # shrinks to random_entries = [([223], [0], Insert), ([223], [0], Remove)]
cc 460005103c0d4107d2b993317861e5c237cc6da4d47c44d9e1b6f33bd85f8d51 # shrinks to random_entries = [([120], [0], Insert), ([28], [0], Remove)]
cc 02d5799411cd45dafd4fd3106f577bd0bd3aab86b2edeec47bd4f2a204ec5a86 # shrinks to random_entries = [([23], [0], Insert), ([0], [0], Insert), ([23], [0], Remove)]

View File

@@ -1,9 +1,9 @@
use std::cmp::Ordering;
use crate::node::{hash, Branch, Node};
use blake3::Hash;
use redb::Table;
use super::search::binary_search_path;
use crate::node::{Branch, Node};
// Watch this [video](https://youtu.be/NxRXhBur6Xs?si=GNwaUOfuGwr_tBKI&t=1763) for a good explanation of the unzipping algorithm.
// Also see the Iterative insertion algorithm in the page 12 of the [original paper](https://arxiv.org/pdf/1806.06726.pdf).
// The difference here is that in a Hash Treap, we need to update nodes bottom up.
@@ -50,7 +50,7 @@ use redb::Table;
// Upper path doesn't change much beyond updating the hash of their child in the branch featured in
// this binary search path.
//
// We call the rest of the path `unzipping path` or `split path` and this is where we create two
// We call the rest of the path `unzipping path` or `lower path` and this is where we create two
// new paths (left and right), each contain the nodes with keys smaller than or larger than the
// inserted key respectively.
//
@@ -75,13 +75,32 @@ use redb::Table;
// need to update the hash of the node (according to the new value) and update the hash of its
// parents until we reach the root.
//
// Also note that we need to update the `ref_count` of all the nodes, and delete the nodes with
// `ref_count` of zero.
// After unzipping the lower path, we should get:
//
// F
// \
// P
// /
// J
// / \
// H M
// \
// I
//
// So the end result beocmes:
//
// F
// / \
// D P
// / / \
// C J X
// / / \ \
// A H M Y
// / \
// G I
//
// The simplest way to do so, is to decrement all the nodes in the search path, and then increment
// all then new nodes (in both the upper and lower paths) before comitting the write transaction.
pub fn insert(
pub(crate) fn insert(
nodes_table: &'_ mut Table<&'static [u8], (u64, &'static [u8])>,
root: Option<Node>,
key: &[u8],
@@ -89,22 +108,22 @@ pub fn insert(
) -> Node {
let mut path = binary_search_path(nodes_table, root, key);
let mut unzip_left_root: Option<Hash> = None;
let mut unzip_right_root: Option<Hash> = None;
let mut left_subtree: Option<Hash> = None;
let mut right_subtree: Option<Hash> = None;
// Unzip the lower path to get left and right children of the inserted node.
for (node, branch) in path.unzip_path.iter_mut().rev() {
for (node, branch) in path.lower.iter_mut().rev() {
// Decrement the old version.
node.decrement_ref_count().save(nodes_table);
match branch {
Branch::Right => {
node.set_right_child(unzip_left_root);
unzip_left_root = Some(node.hash());
node.set_right_child(left_subtree);
left_subtree = Some(node.hash());
}
Branch::Left => {
node.set_left_child(unzip_right_root);
unzip_right_root = Some(node.hash());
node.set_left_child(right_subtree);
right_subtree = Some(node.hash());
}
}
@@ -113,40 +132,36 @@ pub fn insert(
let mut root;
if let Some(mut existing) = path.existing {
if existing.value() == value {
if let Some(mut target) = path.target {
if target.value() == value {
// There is really nothing to update. Skip traversing upwards.
return path
.upper_path
.first()
.map(|(n, _)| n.clone())
.unwrap_or(existing);
return path.upper.first().map(|(n, _)| n.clone()).unwrap_or(target);
}
// Decrement the old version.
existing.decrement_ref_count().save(nodes_table);
target.decrement_ref_count().save(nodes_table);
// Else, update the value and rehashe the node so that we can update the hashes upwards.
existing
target
.set_value(value)
.increment_ref_count()
.save(nodes_table);
root = existing
root = target
} else {
// Insert the new node.
let mut node = Node::new(key, value);
node.set_left_child(unzip_left_root)
.set_right_child(unzip_right_root)
node.set_left_child(left_subtree)
.set_right_child(right_subtree)
.increment_ref_count()
.save(nodes_table);
root = node
};
let mut upper_path = path.upper_path;
let mut upper_path = path.upper;
// Propagate the new hashes upwards if there are any nodes in the upper_path.
while let Some((mut node, branch)) = upper_path.pop() {
@@ -166,191 +181,130 @@ pub fn insert(
root
}
#[derive(Debug)]
struct BinarySearchPath {
upper_path: Vec<(Node, Branch)>,
existing: Option<Node>,
unzip_path: Vec<(Node, Branch)>,
}
/// Returns the binary search path for a given key in the following form:
/// - `upper_path` is the path with nodes with rank higher than the rank of the key.
/// - `match` is the node with the exact same key (if any).
/// - `lower_path` is the path with nodes with rank lesss than the rank of the key.
///
/// If a match was found, the `lower_path` will be empty.
fn binary_search_path(
table: &Table<&'static [u8], (u64, &'static [u8])>,
root: Option<Node>,
key: &[u8],
) -> BinarySearchPath {
let rank = hash(key);
let mut result = BinarySearchPath {
upper_path: Default::default(),
existing: None,
unzip_path: Default::default(),
};
let mut next = root;
while let Some(current) = next {
let path = if current.rank().as_bytes() > rank.as_bytes() {
&mut result.upper_path
} else {
&mut result.unzip_path
};
match key.cmp(current.key()) {
Ordering::Equal => {
// We found exact match. terminate the search.
result.existing = Some(current);
return result;
}
Ordering::Less => {
next = current.left().and_then(|n| Node::open(table, n));
path.push((current, Branch::Left));
}
Ordering::Greater => {
next = current.right().and_then(|n| Node::open(table, n));
path.push((current, Branch::Right));
}
};
}
result
}
#[cfg(test)]
mod test {
use crate::test::{test_operations, Entry};
use proptest::prelude::*;
proptest! {
#[test]
/// Test that upserting an entry with the same key in different tree shapes results in the
/// expected structure
fn test_upsert(random_entries in prop::collection::vec(
(prop::collection::vec(any::<u8>(), 1), prop::collection::vec(any::<u8>(), 1)),
1..10,
)) {
let operations = random_entries.into_iter().map(|(key, value)| {
Entry::insert(&key, &value)
}).collect::<Vec<_>>();
test_operations(&operations, None);
}
#[test]
fn test_general_insertiong(random_entries in prop::collection::vec(
(prop::collection::vec(any::<u8>(), 32), prop::collection::vec(any::<u8>(), 32)),
1..50,
)) {
let operations = random_entries.into_iter().map(|(key, value)| {
Entry::insert(&key, &value)
}).collect::<Vec<_>>();
test_operations(&operations, None);
}
}
#[test]
fn insert_single_entry() {
let case = ["A"];
test_operations(
&case.map(|key| Entry::insert(key.as_bytes(), &[b"v", key.as_bytes()].concat())),
Some("78fd7507ef338f1a5816ffd702394999680a9694a85f4b8af77795d9fdd5854d"),
)
}
#[test]
fn sorted_alphabets() {
let case = [
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q",
"R", "S", "T", "U", "V", "W", "X", "Y", "Z",
];
let expected =
case.map(|key| Entry::insert(key.as_bytes(), &[b"v", key.as_bytes()].concat()));
test_operations(
&case.map(|key| Entry::insert(key.as_bytes(), &[b"v", key.as_bytes()].concat())),
Some("02af3de6ed6368c5abc16f231a17d1140e7bfec483c8d0aa63af4ef744d29bc3"),
);
}
#[test]
fn reverse_alphabets() {
let mut case = [
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q",
"R", "S", "T", "U", "V", "W", "X", "Y", "Z",
];
case.reverse();
test_operations(
&case.map(|key| Entry::insert(key.as_bytes(), &[b"v", key.as_bytes()].concat())),
Some("02af3de6ed6368c5abc16f231a17d1140e7bfec483c8d0aa63af4ef744d29bc3"),
)
}
#[test]
fn unsorted() {
let case = ["D", "N", "P", "X", "A", "G", "C", "M", "H", "I", "J"];
test_operations(
&case.map(|key| Entry::insert(key.as_bytes(), &[b"v", key.as_bytes()].concat())),
Some("0957cc9b87c11cef6d88a95328cfd9043a3d6a99e9ba35ee5c9c47e53fb6d42b"),
)
}
#[test]
fn upsert_at_root() {
let case = ["X", "X"];
let mut i = 0;
test_operations(
&case.map(|key| {
i += 1;
Entry::insert(key.as_bytes(), i.to_string().as_bytes())
}),
Some("4538b4de5e58f9be9d54541e69fab8c94c31553a1dec579227ef9b572d1c1dff"),
)
}
#[test]
fn upsert_deeper() {
// X has higher rank.
let case = ["X", "F", "F"];
let mut i = 0;
test_operations(
&case.map(|key| {
i += 1;
Entry::insert(key.as_bytes(), i.to_string().as_bytes())
}),
Some("c9f7aaefb18ec8569322b9621fc64f430a7389a790e0bf69ec0ad02879d6ce54"),
)
}
#[test]
fn upsert_root_with_children() {
// X has higher rank.
let case = ["F", "X", "X"];
let mut i = 0;
test_operations(
&case.map(|key| {
i += 1;
Entry::insert(key.as_bytes(), i.to_string().as_bytes())
}),
Some("02e26311f2b55bf6d4a7163399f99e17c975891a05af2f1e09bc969f8bf0f95d"),
)
}
// use crate::test::{test_operations, Entry};
// use proptest::prelude::*;
//
// proptest! {
// #[test]
// /// Test that upserting an entry with the same key in different tree shapes results in the
// /// expected structure
// fn test_upsert(random_entries in prop::collection::vec(
// (prop::collection::vec(any::<u8>(), 1), prop::collection::vec(any::<u8>(), 1)),
// 1..10,
// )) {
// let operations = random_entries.into_iter().map(|(key, value)| {
// Entry::insert(&key, &value)
// }).collect::<Vec<_>>();
//
// test_operations(&operations, None);
// }
//
// #[test]
// fn test_general_insertiong(random_entries in prop::collection::vec(
// (prop::collection::vec(any::<u8>(), 32), prop::collection::vec(any::<u8>(), 32)),
// 1..50,
// )) {
// let operations = random_entries.into_iter().map(|(key, value)| {
// Entry::insert(&key, &value)
// }).collect::<Vec<_>>();
//
// test_operations(&operations, None);
// }
// }
//
// #[test]
// fn insert_single_entry() {
// let case = ["A"];
//
// test_operations(
// &case.map(|key| Entry::insert(key.as_bytes(), &[b"v", key.as_bytes()].concat())),
// Some("78fd7507ef338f1a5816ffd702394999680a9694a85f4b8af77795d9fdd5854d"),
// )
// }
//
// #[test]
// fn sorted_alphabets() {
// let case = [
// "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q",
// "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
// ];
//
// test_operations(
// &case.map(|key| Entry::insert(key.as_bytes(), &[b"v", key.as_bytes()].concat())),
// Some("02af3de6ed6368c5abc16f231a17d1140e7bfec483c8d0aa63af4ef744d29bc3"),
// );
// }
//
// #[test]
// fn reverse_alphabets() {
// let mut case = [
// "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q",
// "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
// ];
// case.reverse();
//
// test_operations(
// &case.map(|key| Entry::insert(key.as_bytes(), &[b"v", key.as_bytes()].concat())),
// Some("02af3de6ed6368c5abc16f231a17d1140e7bfec483c8d0aa63af4ef744d29bc3"),
// )
// }
//
// #[test]
// fn unsorted() {
// let case = ["D", "N", "P", "X", "A", "G", "C", "M", "H", "I", "J"];
//
// test_operations(
// &case.map(|key| Entry::insert(key.as_bytes(), &[b"v", key.as_bytes()].concat())),
// Some("0957cc9b87c11cef6d88a95328cfd9043a3d6a99e9ba35ee5c9c47e53fb6d42b"),
// )
// }
//
// #[test]
// fn upsert_at_root() {
// let case = ["X", "X"];
//
// let mut i = 0;
//
// test_operations(
// &case.map(|key| {
// i += 1;
// Entry::insert(key.as_bytes(), i.to_string().as_bytes())
// }),
// Some("4538b4de5e58f9be9d54541e69fab8c94c31553a1dec579227ef9b572d1c1dff"),
// )
// }
//
// #[test]
// fn upsert_deeper() {
// // X has higher rank.
// let case = ["X", "F", "F"];
//
// let mut i = 0;
//
// test_operations(
// &case.map(|key| {
// i += 1;
// Entry::insert(key.as_bytes(), i.to_string().as_bytes())
// }),
// Some("c9f7aaefb18ec8569322b9621fc64f430a7389a790e0bf69ec0ad02879d6ce54"),
// )
// }
//
// #[test]
// fn upsert_root_with_children() {
// // X has higher rank.
// let case = ["F", "X", "X"];
//
// let mut i = 0;
//
// test_operations(
// &case.map(|key| {
// i += 1;
// Entry::insert(key.as_bytes(), i.to_string().as_bytes())
// }),
// Some("02e26311f2b55bf6d4a7163399f99e17c975891a05af2f1e09bc969f8bf0f95d"),
// )
// }
}

View File

@@ -1 +1,6 @@
pub mod insert;
pub mod remove;
mod search;
pub(crate) use insert::insert;
pub(crate) use remove::remove;

View File

@@ -0,0 +1,191 @@
use blake3::Hash;
use redb::Table;
use super::search::binary_search_path;
use crate::node::{hash, Branch, Node};
pub(crate) fn remove<'a>(
nodes_table: &'_ mut Table<&'static [u8], (u64, &'static [u8])>,
root: Option<Node>,
key: &[u8],
) -> Option<Node> {
let mut path = binary_search_path(nodes_table, root, key);
// The key doesn't exist, so there is nothing to remove.
let mut root = path.upper.first().map(|(n, _)| n.clone());
dbg!(&path);
if let Some(mut target) = path.target {
// Zipping
target.decrement_ref_count();
target.save(nodes_table);
let mut left_subtree = Vec::new();
let mut right_subtree = Vec::new();
target
.left()
.and_then(|h| Node::open(nodes_table, h))
.map(|n| left_subtree.push(n));
while let Some(next) = left_subtree
.last()
.and_then(|n| n.right().and_then(|h| Node::open(nodes_table, h)))
{
left_subtree.push(next);
}
target
.right()
.and_then(|h| Node::open(nodes_table, h))
.map(|n| right_subtree.push(n));
while let Some(next) = right_subtree
.last()
.and_then(|n| n.left().and_then(|h| Node::open(nodes_table, h)))
{
right_subtree.push(next);
}
let mut i = left_subtree.len().max(right_subtree.len());
let mut last: Option<Node> = None;
while i > 0 {
last = match (left_subtree.get_mut(i - 1), right_subtree.get_mut(i - 1)) {
(Some(left), None) => Some(left.clone()), // Left subtree is deeper
(None, Some(right)) => Some(right.clone()), // Right subtree is deeper
(Some(left), Some(right)) => {
let rank_left = hash(left.key());
let rank_right = hash(right.key());
if hash(left.key()).as_bytes() > hash(right.key()).as_bytes() {
right
// decrement old version
.decrement_ref_count()
.save(nodes_table)
// save new version
.set_left_child(last.map(|n| n.hash()))
.increment_ref_count()
.save(nodes_table);
left
// decrement old version
.decrement_ref_count()
.save(nodes_table)
// save new version
.set_right_child(Some(right.hash()))
.increment_ref_count()
.save(nodes_table);
Some(left.clone())
} else {
left
// decrement old version
.decrement_ref_count()
.save(nodes_table)
// save new version
.set_right_child(last.map(|n| n.hash()))
.increment_ref_count()
.save(nodes_table);
right
// decrement old version
.decrement_ref_count()
.save(nodes_table)
// save new version
.set_left_child(Some(left.hash()))
.increment_ref_count()
.save(nodes_table);
Some(right.clone())
}
}
_ => {
// Should never happen!
None
}
};
i -= 1;
}
// dbg!(&last);
return last;
} else {
// clearly the lower path has the highest node, and it won't be changed.
return path.lower.first().map(|(n, _)| n.clone());
}
if root.is_none() {
root = path.lower.first().map(|(n, _)| n.clone());
}
return root;
}
#[cfg(test)]
mod test {
use crate::test::{test_operations, Entry, Operation};
use proptest::prelude::*;
fn operation_strategy() -> impl Strategy<Value = Operation> {
prop_oneof![
// For cases without data, `Just` is all you need
Just(Operation::Insert),
Just(Operation::Remove),
]
}
proptest! {
// #[test]
fn insert_remove(
random_entries in prop::collection::vec(
(prop::collection::vec(any::<u8>(), 1), prop::collection::vec(any::<u8>(), 1), operation_strategy()),
1..10,
)) {
let operations = random_entries
.into_iter()
.map(|(key, value, op)| (Entry::new(&key, &value), op))
.collect::<Vec<_>>();
test_operations(&operations, None);
}
}
// #[test]
fn empty() {
let case = [("A", Operation::Insert), ("A", Operation::Remove)]
.map(|(k, op)| (Entry::new(k.as_bytes(), k.as_bytes()), op));
test_operations(
&case,
Some("78fd7507ef338f1a5816ffd702394999680a9694a85f4b8af77795d9fdd5854d"),
)
}
#[test]
fn lower_path() {
let case = [Entry::insert(&[120], &[0]), Entry::remove(&[28])];
test_operations(&case, None)
}
#[test]
fn remove_with_lower() {
let case = [
Entry::insert(&[23], &[0]),
Entry::insert(&[0], &[0]),
Entry::remove(&[23]),
];
test_operations(&case, None)
}
#[test]
fn remove_with_upper() {
let case = [Entry::insert(&[88], &[0]), Entry::remove(&[0])];
test_operations(&case, None)
}
}

View File

@@ -0,0 +1,62 @@
use redb::Table;
use std::cmp::Ordering;
use crate::node::{hash, Branch, Node};
#[derive(Debug)]
pub(crate) struct BinarySearchPath {
pub upper: Vec<(Node, Branch)>,
pub target: Option<Node>,
pub lower: Vec<(Node, Branch)>,
}
/// Returns the binary search path for a given key in the following form:
/// - `upper` is the path with nodes with rank higher than the rank of the key.
/// - `target` is the node with the exact same key (if any).
/// - `lower` is the path with nodes with rank lesss than the rank of the key.
///
/// If a match was found, the `lower_path` will be empty.
pub(crate) fn binary_search_path(
table: &Table<&'static [u8], (u64, &'static [u8])>,
root: Option<Node>,
key: &[u8],
) -> BinarySearchPath {
let rank = hash(key);
let mut path = BinarySearchPath {
upper: Default::default(),
target: None,
lower: Default::default(),
};
let mut next = root;
while let Some(current) = next {
let stack = if current.rank().as_bytes() > rank.as_bytes() {
&mut path.upper
} else {
&mut path.lower
};
match key.cmp(current.key()) {
Ordering::Equal => {
// We found exact match. terminate the search.
path.target = Some(current);
return path;
}
Ordering::Less => {
next = current.left().and_then(|n| Node::open(table, n));
stack.push((current, Branch::Left));
}
Ordering::Greater => {
next = current.right().and_then(|n| Node::open(table, n));
stack.push((current, Branch::Right));
}
};
}
path
}

View File

@@ -13,7 +13,7 @@ use redb::Database;
#[derive(Clone, Debug)]
pub enum Operation {
Insert,
Delete,
Remove,
}
#[derive(Clone, PartialEq)]
@@ -23,6 +23,12 @@ pub struct Entry {
}
impl Entry {
pub fn new(key: &[u8], value: &[u8]) -> Self {
Self {
key: key.to_vec(),
value: value.to_vec(),
}
}
pub fn insert(key: &[u8], value: &[u8]) -> (Self, Operation) {
(
Self {
@@ -32,6 +38,15 @@ impl Entry {
Operation::Insert,
)
}
pub fn remove(key: &[u8]) -> (Self, Operation) {
(
Self {
key: key.to_vec(),
value: b"".to_vec(),
},
Operation::Remove,
)
}
}
impl std::fmt::Debug for Entry {
@@ -51,12 +66,14 @@ pub fn test_operations(input: &[(Entry, Operation)], root_hash: Option<&str>) {
for (entry, operation) in input {
match operation {
Operation::Insert => treap.insert(&entry.key, &entry.value),
Operation::Delete => todo!(),
Operation::Remove => {
treap.remove(&entry.key);
}
}
}
// Uncomment to see the graph (only if values are utf8)
// println!("{}", into_mermaid_graph(&treap));
// Uncomment to see the graph
println!("{}", into_mermaid_graph(&treap));
let collected = treap
.iter()
@@ -83,7 +100,7 @@ pub fn test_operations(input: &[(Entry, Operation)], root_hash: Option<&str>) {
Operation::Insert => {
btree.insert(&entry.key, &entry.value);
}
Operation::Delete => {
Operation::Remove => {
btree.remove(&entry.key);
}
}

View File

@@ -33,7 +33,6 @@ impl<'treap> HashTreap<'treap> {
// TODO: add name to open from storage with.
pub fn new(db: &'treap Database, name: &'treap str) -> Self {
// Setup tables
let write_tx = db.begin_write().unwrap();
{
let _table = write_tx.open_table(NODES_TABLE).unwrap();
@@ -69,8 +68,7 @@ impl<'treap> HashTreap<'treap> {
.root_hash_inner(&roots_table)
.and_then(|hash| Node::open(&nodes_table, hash));
let new_root =
crate::operations::insert::insert(&mut nodes_table, old_root, key, value);
let new_root = crate::operations::insert(&mut nodes_table, old_root, key, value);
roots_table
.insert(self.name.as_bytes(), new_root.hash().as_bytes().as_slice())
@@ -81,6 +79,34 @@ impl<'treap> HashTreap<'treap> {
write_txn.commit().unwrap();
}
pub fn remove(&mut self, key: &[u8]) -> Option<&[u8]> {
let write_txn = self.db.begin_write().unwrap();
{
let mut roots_table = write_txn.open_table(ROOTS_TABLE).unwrap();
let mut nodes_table = write_txn.open_table(NODES_TABLE).unwrap();
let old_root = self
.root_hash_inner(&roots_table)
.and_then(|hash| Node::open(&nodes_table, hash));
let new_root = crate::operations::remove(&mut nodes_table, old_root, key);
if let Some(new_root) = new_root {
roots_table
.insert(self.name.as_bytes(), new_root.hash().as_bytes().as_slice())
.unwrap();
} else {
roots_table.remove(self.name.as_bytes()).unwrap();
}
};
// Finally commit the changes to the storage.
write_txn.commit().unwrap();
None
}
pub fn iter(&self) -> TreapIterator<'_> {
TreapIterator::new(self)
}