Skip to content

Commit

Permalink
Merge branch 'main' into ielashi/remove_bounded_storable
Browse files Browse the repository at this point in the history
  • Loading branch information
ielashi authored Aug 17, 2023
2 parents 798f986 + 7b2f7a6 commit daaec44
Show file tree
Hide file tree
Showing 6 changed files with 344 additions and 151 deletions.
36 changes: 36 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ lazy_static = "1.4.0"
maplit = "1.0.2"
proptest = "1"
tempfile = "3.3.0"
test-strategy = "0.3.1"

[[bench]]
name = "benches"
Expand Down
2 changes: 1 addition & 1 deletion src/btreemap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -722,7 +722,7 @@ where
//
// [c] (parent)
// / \
// (child) [a, b] [d, e, f] (left sibling)
// (child) [a, b] [d, e, f] (right sibling)
// /
// [d']
//
Expand Down
163 changes: 13 additions & 150 deletions src/btreemap/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ use crate::{
use std::borrow::{Borrow, Cow};
use std::cell::{Ref, RefCell};

#[cfg(test)]
mod tests;
mod v1;

// The minimum degree to use in the btree.
// This constant is taken from Rust's std implementation of BTreeMap.
const B: usize = 6;
Expand All @@ -20,6 +24,7 @@ const INTERNAL_NODE_TYPE: u8 = 1;
const U32_SIZE: Bytes = Bytes::new(4);

#[derive(Debug, PartialEq, Copy, Clone, Eq)]
#[cfg_attr(test, derive(test_strategy::Arbitrary))]
pub enum NodeType {
Leaf,
Internal,
Expand All @@ -28,18 +33,7 @@ pub enum NodeType {
pub type Entry<K> = (K, Vec<u8>);

/// A node of a B-Tree.
///
/// The node is stored in stable memory with the following layout:
///
/// | NodeHeader | Entries (keys and values) | Children |
///
/// Each node contains up to `CAPACITY` entries, each entry contains:
/// - size of key (4 bytes)
/// - key (`max_key_size` bytes)
/// - size of value (4 bytes)
/// - value (`max_value_size` bytes)
///
/// Each node can contain up to `CAPACITY + 1` children, each child is 8 bytes.
/// See `v1.rs` for more details on the memory layout.
#[derive(Debug)]
pub struct Node<K: Storable + Ord + Clone> {
address: Address,
Expand All @@ -63,15 +57,7 @@ impl<K: Storable + Ord + Clone> Node<K> {
max_key_size: u32,
max_value_size: u32,
) -> Node<K> {
Node {
address,
keys: vec![],
encoded_values: RefCell::default(),
children: vec![],
node_type,
max_key_size,
max_value_size,
}
Node::new_v1(address, node_type, max_key_size, max_value_size)
}

/// Loads a node from memory at the given address.
Expand All @@ -81,128 +67,14 @@ impl<K: Storable + Ord + Clone> Node<K> {
max_key_size: u32,
max_value_size: u32,
) -> Self {
// Load the header.
let header: NodeHeader = read_struct(address, memory);
assert_eq!(&header.magic, MAGIC, "Bad magic.");
assert_eq!(header.version, LAYOUT_VERSION, "Unsupported version.");

// Load the entries.
let mut keys = Vec::with_capacity(header.num_entries as usize);
let mut encoded_values = Vec::with_capacity(header.num_entries as usize);
let mut offset = NodeHeader::size();
let mut buf = Vec::with_capacity(max_key_size.max(max_value_size) as usize);
for _ in 0..header.num_entries {
// Read the key's size.
let key_size = read_u32(memory, address + offset);
offset += U32_SIZE;

// Read the key.
buf.resize(key_size as usize, 0);
memory.read((address + offset).get(), &mut buf);
offset += Bytes::from(max_key_size);
let key = K::from_bytes(Cow::Borrowed(&buf));
keys.push(key);

// Values are loaded lazily. Store a reference and skip loading it.
encoded_values.push(Value::ByRef(offset));
offset += U32_SIZE + Bytes::from(max_value_size);
}

// Load children if this is an internal node.
let mut children = vec![];
if header.node_type == INTERNAL_NODE_TYPE {
// The number of children is equal to the number of entries + 1.
for _ in 0..header.num_entries + 1 {
let child = Address::from(read_u64(memory, address + offset));
offset += Address::size();
children.push(child);
}

assert_eq!(children.len(), keys.len() + 1);
}

Self {
address,
keys,
encoded_values: RefCell::new(encoded_values),
children,
node_type: match header.node_type {
LEAF_NODE_TYPE => NodeType::Leaf,
INTERNAL_NODE_TYPE => NodeType::Internal,
other => unreachable!("Unknown node type {}", other),
},
max_key_size,
max_value_size,
}
// NOTE: new versions of `Node` will be introduced.
Self::load_v1(address, max_key_size, max_value_size, memory)
}

/// Saves the node to memory.
pub fn save<M: Memory>(&self, memory: &M) {
match self.node_type {
NodeType::Leaf => {
assert!(self.children.is_empty());
}
NodeType::Internal => {
assert_eq!(self.children.len(), self.keys.len() + 1);
}
};

// We should never be saving an empty node.
assert!(!self.keys.is_empty() || !self.children.is_empty());

// Assert entries are sorted in strictly increasing order.
assert!(self.keys.windows(2).all(|e| e[0] < e[1]));

let header = NodeHeader {
magic: *MAGIC,
version: LAYOUT_VERSION,
node_type: match self.node_type {
NodeType::Leaf => LEAF_NODE_TYPE,
NodeType::Internal => INTERNAL_NODE_TYPE,
},
num_entries: self.keys.len() as u16,
};

write_struct(&header, self.address, memory);

let mut offset = NodeHeader::size();

// Load all the values. This is necessary so that we don't overwrite referenced
// values when writing the entries to the node.
for i in 0..self.keys.len() {
self.value(i, memory);
}

// Write the entries.
for (idx, key) in self.keys.iter().enumerate() {
// Write the size of the key.
let key_bytes = key.to_bytes();
write_u32(memory, self.address + offset, key_bytes.len() as u32);
offset += U32_SIZE;

// Write the key.
write(memory, (self.address + offset).get(), key_bytes.borrow());
offset += Bytes::from(self.max_key_size);

// Write the size of the value.
let value = self.value(idx, memory);
write_u32(memory, self.address + offset, value.len() as u32);
offset += U32_SIZE;

// Write the value.
write(memory, (self.address + offset).get(), &value);
offset += Bytes::from(self.max_value_size);
}

// Write the children
for child in self.children.iter() {
write(
memory,
(self.address + offset).get(),
&child.get().to_le_bytes(),
);
offset += Address::size();
}
// NOTE: new versions of `Node` will be introduced.
self.save_v1(memory)
}

/// Returns the address of the node.
Expand Down Expand Up @@ -453,7 +325,7 @@ impl<K: Storable + Ord + Clone> Node<K> {
assert_eq!(b.children.len(), 0);
}

#[allow(dead_code)]
#[cfg(test)]
pub fn entries<M: Memory>(&self, memory: &M) -> Vec<Entry<K>> {
self.keys
.iter()
Expand Down Expand Up @@ -481,16 +353,7 @@ impl<K: Storable + Ord + Clone> Node<K> {
///
/// See the documentation of [`Node`] for the memory layout.
pub fn size(max_key_size: u32, max_value_size: u32) -> Bytes {
let max_key_size = Bytes::from(max_key_size);
let max_value_size = Bytes::from(max_value_size);

let node_header_size = NodeHeader::size();
let entry_size = U32_SIZE + max_key_size + max_value_size + U32_SIZE;
let child_size = Address::size();

node_header_size
+ Bytes::from(CAPACITY as u64) * entry_size
+ Bytes::from((CAPACITY + 1) as u64) * child_size
v1::size_v1(max_key_size, max_value_size)
}

/// Returns true if the node is at the minimum required size, false otherwise.
Expand Down
91 changes: 91 additions & 0 deletions src/btreemap/node/tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
use super::*;
use proptest::collection::btree_map as pmap;
use proptest::collection::vec as pvec;
use std::cell::RefCell;
use std::collections::BTreeMap;
use std::rc::Rc;
use test_strategy::{proptest, Arbitrary};

fn make_memory() -> Rc<RefCell<Vec<u8>>> {
Rc::new(RefCell::new(Vec::new()))
}

/// Generates arbitrary v1 nodes.
#[derive(Arbitrary, Debug)]
struct NodeV1Data {
#[strategy(0..1_000u32)]
max_key_size: u32,
#[strategy(0..1_000u32)]
max_value_size: u32,

// NOTE: A BTreeMap is used for creating the entries so that they're in sorted order.
#[strategy(
pmap(
pvec(0..u8::MAX, 0..=#max_key_size as usize),
pvec(0..u8::MAX, 0..=#max_value_size as usize),
1..CAPACITY
)
)]
entries: BTreeMap<Vec<u8>, Vec<u8>>,
node_type: NodeType,
}

impl NodeV1Data {
/// Returns a v1 node with the data generated by this struct.
fn get(&self, address: Address) -> Node<Vec<u8>> {
let mut node = Node::new_v1(
address,
self.node_type,
self.max_key_size,
self.max_value_size,
);

// Push the entries
for entry in self.entries.clone().into_iter() {
node.push_entry(entry);
}

// Push the children
for child in self.children() {
node.push_child(child);
}

node
}

fn children(&self) -> Vec<Address> {
match self.node_type {
// A leaf node doesn't have any children.
NodeType::Leaf => vec![],
// An internal node has entries.len() + 1 children.
// Here we generate a list of addresses.
NodeType::Internal => (0..=self.entries.len())
.map(|i| Address::from(i as u64))
.collect(),
}
}
}

#[proptest]
fn saving_and_loading_v1_preserves_data(node_data: NodeV1Data) {
let mem = make_memory();

// Create a new node and save it into memory.
let node_addr = Address::from(0);
let node = node_data.get(node_addr);
node.save_v1(&mem);

// Load the node and double check all the entries and children are correct.
let node = Node::load_v1(
node_addr,
node_data.max_key_size,
node_data.max_value_size,
&mem,
);

assert_eq!(node.children, node_data.children());
assert_eq!(
node.entries(&mem),
node_data.entries.into_iter().collect::<Vec<_>>()
);
}
Loading

0 comments on commit daaec44

Please sign in to comment.