From 9cd153e84fb7fd500ee748a5f2ae978524b6e669 Mon Sep 17 00:00:00 2001 From: Declan Kelly Date: Mon, 4 May 2026 20:52:34 -0700 Subject: [PATCH] Update inner node builder to require two children **Description** In a prior commit, I added a builder for creating inner nodes that guaranteed at least a single child. I realized later that I hadn't gone far enough, all inner nodes actually require at least two children! This commit updates the builder to add another type-state, modifies tests to pass this new requirement. **Motivation** In a separate branch I've been experimenting with some optimizations and one of those optimizations had a requirement that the inner node had at least two children. That made me remember the builder stuff and I figured it would be good to actually bring it up the real requirement. **Testing Done** Modified existing tests, no new, all passed. --- benches/node/match_prefix.rs | 4 + benches/node/min_max.rs | 2 + benches/tree/iter.rs | 10 +- src/raw/representation.rs | 102 ++++++++++++++++---- src/raw/representation/inner_node_direct.rs | 1 + src/raw/representation/inner_node_sorted.rs | 4 +- src/raw/representation/pointers.rs | 4 + src/raw/visitor/well_formed.rs | 18 ++-- 8 files changed, 110 insertions(+), 35 deletions(-) diff --git a/benches/node/match_prefix.rs b/benches/node/match_prefix.rs index 176bde18..cb8a26d8 100644 --- a/benches/node/match_prefix.rs +++ b/benches/node/match_prefix.rs @@ -28,9 +28,11 @@ fn bench(c: &mut Criterion) { let p0 = &[0, 0, 0, 0, 0, 0, 0, 0]; let node48_small = InnerNode48::, usize, 16>::builder(p0, p0.len()) .write_child(99, leaf_opaque) + .write_child(100, leaf_opaque) .build(); let node256_small = InnerNodeDirect::, usize, 16>::builder(p0, p0.len()) .write_child(99, leaf_opaque) + .write_child(100, leaf_opaque) .build(); let p1 = &[ @@ -38,9 +40,11 @@ fn bench(c: &mut Criterion) { ]; let node48_large = InnerNode48::, usize, 16>::builder(p1, p1.len()) .write_child(99, leaf_opaque) + .write_child(100, leaf_opaque) .build(); let node256_large = InnerNodeDirect::, usize, 16>::builder(p1, p1.len()) .write_child(99, leaf_opaque) + .write_child(100, leaf_opaque) .build(); macro_rules! generate_benches { diff --git a/benches/node/min_max.rs b/benches/node/min_max.rs index 1bdd3b27..1d14f289 100644 --- a/benches/node/min_max.rs +++ b/benches/node/min_max.rs @@ -14,6 +14,7 @@ fn bench(c: &mut Criterion) { let idx = i * skip; let node = InnerNode48::::builder(&[], 0) .write_child(idx, dangling_opaque) + .write_child(idx.wrapping_add(1), dangling_opaque) .build(); (idx, node) }) @@ -23,6 +24,7 @@ fn bench(c: &mut Criterion) { let idx = i * skip; let node = InnerNodeDirect::::builder(&[], 0) .write_child(idx, dangling_opaque) + .write_child(idx.wrapping_add(1), dangling_opaque) .build(); (idx, node) }) diff --git a/benches/tree/iter.rs b/benches/tree/iter.rs index cb9a87be..e82eff7f 100644 --- a/benches/tree/iter.rs +++ b/benches/tree/iter.rs @@ -13,8 +13,8 @@ fn iter_node>( ) { for (idx, size) in sizes.iter().enumerate() { assert!( - *size > 0, - "size {size} in index {idx} must be greater than zero" + *size >= 2, + "size {size} in index {idx} must be at least two" ); } @@ -28,7 +28,9 @@ fn iter_node>( let mut group = c.benchmark_group(format!("iter_node/{ty}")); for size in sizes { let mut iter = bytes.choose_multiple(&mut rng, *size as usize); - let mut builder = N::builder(&[], 0).write_child(*iter.next().unwrap(), dangling_opaque); + let mut builder = N::builder(&[], 0) + .write_child(*iter.next().unwrap(), dangling_opaque) + .write_child(*iter.next().unwrap(), dangling_opaque); for key in iter { builder = builder.write_child(*key, dangling_opaque); } @@ -46,7 +48,7 @@ fn iter_node>( } fn bench(c: &mut Criterion) { - iter_node::<16, _, InnerNode4>(c, "n4", &[1, 4]); + iter_node::<16, _, InnerNode4>(c, "n4", &[2, 4]); iter_node::<16, _, InnerNode16>(c, "n16", &[5, 12, 16]); iter_node::<16, _, InnerNode48>(c, "n48", &[17, 32, 48]); iter_node::<16, _, InnerNodeDirect>(c, "n256", &[49, 100, 152, 204, 256]); diff --git a/src/raw/representation.rs b/src/raw/representation.rs index 519c4a44..140810e1 100644 --- a/src/raw/representation.rs +++ b/src/raw/representation.rs @@ -557,15 +557,18 @@ pub trait InnerNode: /// Marker type for [`InnerNodeBuilder`]: no children have been added yet. pub struct NoChild; -/// Marker type for [`InnerNodeBuilder`]: at least one child has been added. +/// Marker type for [`InnerNodeBuilder`]: exactly one child has been added. +pub struct HasOneChild; + +/// Marker type for [`InnerNodeBuilder`]: at least two children have been added. pub struct HasChild; -/// Typestate builder for inner nodes that enforces the non-empty invariant. +/// Typestate builder for inner nodes that enforces the two-child minimum +/// invariant. /// /// The only way to call [`build`][InnerNodeBuilder::build] is to first add at -/// least one child via -/// [`write_child`][InnerNodeBuilder::write_child]. This is checked at -/// compile time via the `S` typestate parameter. +/// least two children via [`write_child`][InnerNodeBuilder::write_child]. This +/// is checked at compile time via the `S` typestate parameter. /// /// Obtain a builder via [`InnerNodeCommon::builder`]. #[expect(clippy::type_complexity)] @@ -578,8 +581,27 @@ impl InnerNodeBuilder, { - /// Add the first child, transitioning the builder to the [`HasChild`] state - /// and enabling [`build`][InnerNodeBuilder::build]. + /// Add the first child, transitioning the builder to the [`HasOneChild`] + /// state. + pub fn write_child( + mut self, + key_byte: u8, + child: OpaqueNodePtr, + ) -> InnerNodeBuilder { + self.node.write_child(key_byte, child); + InnerNodeBuilder { + node: self.node, + _state: PhantomData, + } + } +} + +impl InnerNodeBuilder +where + N: InnerNodeCommon, +{ + /// Add the second child, transitioning the builder to the [`HasChild`] + /// state and enabling [`build`][InnerNodeBuilder::build]. pub fn write_child( mut self, key_byte: u8, @@ -598,7 +620,31 @@ impl { /// Add the first child to the node without bounds check or order. /// - /// This function transitions the build to the [`HasChild`] state and + /// This function transitions the builder to the [`HasOneChild`] state. + /// + /// # Safety + /// - This functions assumes that the write is gonna be inbound (i.e the + /// check for a full node is done previously to the call of this function) + pub unsafe fn write_child_unchecked( + mut self, + key_byte: u8, + child: OpaqueNodePtr, + ) -> InnerNodeBuilder, HasOneChild> { + // SAFETY: Covered by function safety requirements + unsafe { self.node.write_child_unchecked(key_byte, child) }; + InnerNodeBuilder { + node: self.node, + _state: PhantomData, + } + } +} + +impl + InnerNodeBuilder, HasOneChild> +{ + /// Add the second child to the node without bounds check or order. + /// + /// This function transitions the builder to the [`HasChild`] state and /// enabling [`build`][InnerNodeBuilder::build]. /// /// # Safety @@ -897,15 +943,19 @@ mod tests { let n4 = InnerNode4::, (), 16>::builder(&[], 0) .write_child(0, leaf_ptr) + .write_child(1, leaf_ptr) .build(); let n16 = InnerNode4::, (), 16>::builder(&[], 0) .write_child(0, leaf_ptr) + .write_child(1, leaf_ptr) .build(); let n48 = InnerNode4::, (), 16>::builder(&[], 0) .write_child(0, leaf_ptr) + .write_child(1, leaf_ptr) .build(); let n256 = InnerNode4::, (), 16>::builder(&[], 0) .write_child(0, leaf_ptr) + .write_child(1, leaf_ptr) .build(); let n4_ptr = const_addr(&n4 as *const InnerNode4, (), 16>); @@ -934,12 +984,15 @@ mod tests { .map(|leaf| NodePtr::from(leaf).to_opaque()) .collect(); - let mut node = N::builder(&[], 0).write_child(0, leaf_pointers[0]).build(); + let mut node = N::builder(&[], 0) + .write_child(0, leaf_pointers[0]) + .write_child(1, leaf_pointers[1]) + .build(); assert!(!node.is_full()); - for (idx, leaf_pointer) in leaf_pointers[1..].iter().copied().enumerate() { - node.write_child(u8::try_from(idx + 1).unwrap(), leaf_pointer); + for (idx, leaf_pointer) in leaf_pointers[2..].iter().copied().enumerate() { + node.write_child(u8::try_from(idx + 2).unwrap(), leaf_pointer); } for (idx, leaf_pointer) in leaf_pointers.iter().copied().enumerate() { @@ -965,12 +1018,15 @@ mod tests { .map(|leaf| NodePtr::from(leaf).to_opaque()) .collect(); - let mut node = N::builder(&[], 0).write_child(0, leaf_pointers[0]).build(); + let mut node = N::builder(&[], 0) + .write_child(0, leaf_pointers[0]) + .write_child(1, leaf_pointers[1]) + .build(); assert!(!node.is_full()); - for (idx, leaf_pointer) in leaf_pointers[1..].iter().copied().enumerate() { - node.write_child(u8::try_from(idx + 1).unwrap(), leaf_pointer); + for (idx, leaf_pointer) in leaf_pointers[2..].iter().copied().enumerate() { + node.write_child(u8::try_from(idx + 2).unwrap(), leaf_pointer); } for (idx, leaf_pointer) in leaf_pointers.iter().copied().enumerate() { @@ -1005,10 +1061,13 @@ mod tests { .map(|leaf| NodePtr::from(leaf).to_opaque()) .collect(); - let mut node = N::builder(&[], 0).write_child(0, leaf_pointers[0]).build(); + let mut node = N::builder(&[], 0) + .write_child(0, leaf_pointers[0]) + .write_child(1, leaf_pointers[1]) + .build(); - for (idx, leaf_pointer) in leaf_pointers[1..].iter().copied().enumerate() { - node.write_child(u8::try_from(idx + 1).unwrap(), leaf_pointer); + for (idx, leaf_pointer) in leaf_pointers[2..].iter().copied().enumerate() { + node.write_child(u8::try_from(idx + 2).unwrap(), leaf_pointer); } let shrunk_node = node.shrink(); @@ -1039,10 +1098,13 @@ mod tests { .map(|leaf| NodePtr::from(leaf).to_opaque()) .collect(); - let mut node = N::builder(&[], 0).write_child(0, leaf_pointers[0]).build(); + let mut node = N::builder(&[], 0) + .write_child(0, leaf_pointers[0]) + .write_child(1, leaf_pointers[1]) + .build(); - for (idx, leaf_pointer) in leaf_pointers[1..].iter().copied().enumerate() { - node.write_child(u8::try_from(idx + 1).unwrap(), leaf_pointer); + for (idx, leaf_pointer) in leaf_pointers[2..].iter().copied().enumerate() { + node.write_child(u8::try_from(idx + 2).unwrap(), leaf_pointer); } assert_eq!(node.header().num_children(), num_children); diff --git a/src/raw/representation/inner_node_direct.rs b/src/raw/representation/inner_node_direct.rs index 507de5e3..3ffd39cc 100644 --- a/src/raw/representation/inner_node_direct.rs +++ b/src/raw/representation/inner_node_direct.rs @@ -394,6 +394,7 @@ mod tests { let leaf_ptr = NodePtr::from(&mut leaf).to_opaque(); let n = InnerNodeDirect::, (), 16>::builder(&[], 0) .write_child(0, leaf_ptr) + .write_child(1, leaf_ptr) .build(); n.grow(); diff --git a/src/raw/representation/inner_node_sorted.rs b/src/raw/representation/inner_node_sorted.rs index 8f12522d..4bd656c8 100644 --- a/src/raw/representation/inner_node_sorted.rs +++ b/src/raw/representation/inner_node_sorted.rs @@ -672,6 +672,7 @@ mod tests { let leaf_ptr = NodePtr::from(&mut leaf).to_opaque(); let n4 = InnerNode4::, (), 16>::builder(&[], 0) .write_child(0, leaf_ptr) + .write_child(1, leaf_ptr) .build(); n4.shrink(); @@ -747,8 +748,9 @@ mod tests { let mut n16 = InnerNode16::, (), 16>::builder(&[], 0) .write_child(0, v[0]) + .write_child(2, v[1]) .build(); - for i in 1..16u8 { + for i in 2..16u8 { n16.write_child(i * 2, v[usize::from(i)]); } diff --git a/src/raw/representation/pointers.rs b/src/raw/representation/pointers.rs index a3a5762e..858de5fe 100644 --- a/src/raw/representation/pointers.rs +++ b/src/raw/representation/pointers.rs @@ -611,15 +611,19 @@ mod tests { let mut n4 = InnerNode4::, usize, 16>::builder(&[], 0) .write_child(0, leaf_ptr) + .write_child(1, leaf_ptr) .build(); let mut n16 = InnerNode16::, usize, 16>::builder(&[], 0) .write_child(0, leaf_ptr) + .write_child(1, leaf_ptr) .build(); let mut n48 = InnerNode48::, usize, 16>::builder(&[], 0) .write_child(0, leaf_ptr) + .write_child(1, leaf_ptr) .build(); let mut n256 = InnerNodeDirect::, usize, 16>::builder(&[], 0) .write_child(0, leaf_ptr) + .write_child(1, leaf_ptr) .build(); let n4_ptr = NodePtr::from(&mut n4).to_opaque(); diff --git a/src/raw/visitor/well_formed.rs b/src/raw/visitor/well_formed.rs index 0762e9b1..82a79bfc 100644 --- a/src/raw/visitor/well_formed.rs +++ b/src/raw/visitor/well_formed.rs @@ -676,40 +676,38 @@ mod tests { let l2_ptr = NodePtr::allocate_node_ptr(l2, &Global); let l3_ptr = NodePtr::allocate_node_ptr(l3, &Global); - // Build n4_left with first child, then allocate + // Build n4_left with both children upfront let n4_left = InnerNode4::builder(&[5, 6], 2) .write_child(1, l1_ptr.to_opaque()) + .write_child(2, l2_ptr.to_opaque()) .build(); let n4_left_ptr = NodePtr::allocate_node_ptr(n4_left, &Global); - // Build n4_right with first child (l3), then allocate; loop child added later + // Build n4_right with l3 and a placeholder; loop child (root) added later let n4_right = InnerNode4::builder(&[7, 8], 2) .write_child(3, l3_ptr.to_opaque()) + .write_child(4, l3_ptr.to_opaque()) .build(); let n4_right_ptr = NodePtr::allocate_node_ptr(n4_right, &Global); - // Build n16 with first child (n4_left), then allocate; n4_right added later + // Build n16 with n4_left and a placeholder; n4_right added later let n16 = InnerNode16::builder(&[1, 2], 2) .write_child(3, n4_left_ptr.to_opaque()) + .write_child(4, n4_left_ptr.to_opaque()) .build(); // construct root early let root = NodePtr::allocate_node_ptr(n16, &Global); - { - let n4_left = unsafe { n4_left_ptr.as_mut() }; - // Add remaining child - n4_left.write_child(2, l2_ptr.to_opaque()); - } - { let n4_right = unsafe { n4_right_ptr.as_mut() }; - // replace normal l4 pointer with loop back to root + // replace placeholder with loop back to root n4_right.write_child(4, root.to_opaque()); } { let n16 = unsafe { root.as_mut() }; + // replace placeholder with actual n4_right child n16.write_child(4, n4_right_ptr.to_opaque()); }