diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 08b0d9ca..f0c26625 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -29,3 +29,4 @@ doc = false [profile.release] rustflags = ["-C", "target-cpu=native", "-Z", "tune-cpu=native"] +debug-assertions = true diff --git a/fuzz/fuzz_targets/tree_map_api.rs b/fuzz/fuzz_targets/tree_map_api.rs index 91cf1fa5..c04df659 100644 --- a/fuzz/fuzz_targets/tree_map_api.rs +++ b/fuzz/fuzz_targets/tree_map_api.rs @@ -1,17 +1,19 @@ #![no_main] #![feature(btree_entry_insert)] +use std::{ + collections::{hash_map::RandomState, BTreeMap}, + hash::BuildHasher, + mem, + ops::Bound, +}; + use blart::{ map::{PrefixEntry, PrefixOccupied}, visitor::WellFormedChecker, TreeMap, }; use libfuzzer_sys::arbitrary::{self, Arbitrary}; -use std::{ - collections::{hash_map::RandomState, BTreeMap}, - hash::BuildHasher, - mem, -}; #[derive(Arbitrary, Debug)] enum EntryAction { @@ -45,6 +47,44 @@ enum RetainKind { Half, } +#[derive(Arbitrary, Debug)] +enum RangeBound { + Unbounded, + Included(Box<[u8]>), + Excluded(Box<[u8]>), +} + +impl RangeBound { + fn as_bound(&self) -> Bound<&[u8]> { + match self { + RangeBound::Unbounded => Bound::Unbounded, + RangeBound::Included(k) => Bound::Included(k.as_ref()), + RangeBound::Excluded(k) => Bound::Excluded(k.as_ref()), + } + } + + fn key_bytes(&self) -> Option<&[u8]> { + match self { + RangeBound::Unbounded => None, + RangeBound::Included(k) | RangeBound::Excluded(k) => Some(k.as_ref()), + } + } +} + +fn range_is_valid(start: &RangeBound, end: &RangeBound) -> bool { + match (start.key_bytes(), end.key_bytes()) { + (None, _) | (_, None) => true, + (Some(s), Some(e)) => { + s < e + || (s == e + && matches!( + (start, end), + (RangeBound::Included(_), RangeBound::Included(_)) + )) + }, + } +} + #[derive(Arbitrary, Debug)] enum Action { Clear, @@ -69,6 +109,7 @@ enum Action { Fuzzy(Box<[u8]>), Prefix(Box<[u8]>), IntoIter { take_front: usize, take_back: usize }, + Range(RangeBound, RangeBound), Retain(RetainKind), SplitOff(Box<[u8]>), } @@ -258,13 +299,31 @@ libfuzzer_sys::fuzz_target!(|actions: Vec| { }); }, Action::Fuzzy(key) => { + // TODO: Provide an oracle implementation for fuzzy search (hard) let v: Vec<_> = tree.fuzzy(&key, 3).collect(); std::hint::black_box(v); }, Action::Prefix(key) => { + // TODO: Provide an oracle implementation for prefix search (easy) let v: Vec<_> = tree.prefix(&key).collect(); std::hint::black_box(v); }, + Action::Range(start, end) => { + if range_is_valid(&start, &end) { + let bounds = (start.as_bound(), end.as_bound()); + let tree_result: Vec<_> = tree.range::<[u8], _>(bounds).collect(); + let oracle_result: Vec<_> = oracle.range::<[u8], _>(bounds).collect(); + assert_eq!( + tree_result.len(), + oracle_result.len(), + "range result lengths differ for bounds {bounds:?}" + ); + for ((tk, tv), (ok, ov)) in tree_result.iter().zip(oracle_result.iter()) { + assert_eq!(tk.as_ref() as &[u8], ok.as_ref() as &[u8]); + assert_eq!(tv, ov); + } + } + }, Action::IntoIter { take_front, take_back, diff --git a/src/collections/map/iterators/range.rs b/src/collections/map/iterators/range.rs index 9ec30a60..363aef15 100644 --- a/src/collections/map/iterators/range.rs +++ b/src/collections/map/iterators/range.rs @@ -12,9 +12,8 @@ use crate::{ allocator::{Allocator, Global}, map::{NonEmptyTree, DEFAULT_PREFIX_LEN}, raw::{ - match_concrete_node_ptr, maximum_unchecked, minimum_unchecked, - AttemptOptimisticPrefixMatch, ConcreteNodePtr, InnerNode, LeafNode, NodePtr, OpaqueNodePtr, - PessimisticMismatch, PrefixMatchBehavior, RawIterator, + match_concrete_node_ptr, maximum_unchecked, minimum_unchecked, ConcreteNodePtr, + ExplicitMismatch, InnerNode, LeafNode, NodePtr, OpaqueNodePtr, PrefixMatch, RawIterator, }, AsBytes, TreeMap, }; @@ -124,7 +123,6 @@ pub(crate) unsafe fn find_terminating_node, key_bytes: &[u8], current_depth: &mut usize, - prefix_match_behavior: &mut PrefixMatchBehavior, ) -> ControlFlow, OpaqueNodePtr> where N: InnerNode, @@ -135,22 +133,20 @@ pub(crate) unsafe fn find_terminating_node { - let (full_prefix, _) = inner_node.read_full_prefix(*current_depth); - let upper_bound = (*current_depth + full_prefix.len()).min(key_bytes.len()); - let key_segment = &key_bytes[(*current_depth)..upper_bound]; - - let node_prefix_comparison_to_search_key_segment = full_prefix.cmp(key_segment); - - debug_assert_ne!( - node_prefix_comparison_to_search_key_segment, - Ordering::Equal, - "if there was a mismatch, the prefix must not be equal" - ); + assert!(*current_depth <= key_bytes.len()); + // SAFETY: The condition that "`current_depth` must be less than or equal to + // `key.len()`" is checked via assertion just above. + // TODO: Figure out a better reasoning for this safety condition, I think it + // should be possible just via reasoning through the range lookup + // process (without needing an assert). + let match_prefix = unsafe { inner_node.match_full_prefix(key_bytes, *current_depth) }; + match match_prefix { + Err(ExplicitMismatch { + matched_bytes, + node_prefix_comparison_to_search_key_segment, + .. + }) => { // We report a prefix mismatch if the prefix is longer that the remaining // portion of the key bytes. However, in the context of the // `InnerNodeSearchResultReason` running out of bytes is really a different @@ -169,7 +165,7 @@ pub(crate) unsafe fn find_terminating_node { + Ok(PrefixMatch { matched_bytes }) => { // Since the prefix matched, advance the depth by the size of the prefix *current_depth += matched_bytes; @@ -209,27 +205,20 @@ pub(crate) unsafe fn find_terminating_node unsafe { // SAFETY: The safety requirement is covered by the safety documentation on the // containing function - check_prefix_lookup_child( - inner_ptr, - key_bytes, - &mut current_depth, - &mut prefix_match_behavior, - ) + check_prefix_lookup_child(inner_ptr, key_bytes, &mut current_depth) }, LeafNode(leaf_ptr) => { // SAFETY: The shared reference is bounded to this block and there are no // concurrent modifications, by the safety conditions of this function. let leaf_node = unsafe { leaf_ptr.as_ref() }; - let leaf_key_comparison_to_search_key = - prefix_match_behavior.compare_leaf_key(leaf_node, key_bytes, current_depth); + let leaf_key_comparison_to_search_key = leaf_node.compare_full_key(key_bytes); return TerminatingNodeSearchResult::Leaf { leaf_ptr, @@ -268,11 +257,11 @@ pub(crate) fn validate_range_bounds(start_bound: &Bound<&[u8]>, end_bound: &Boun type KeyByteAndChild = Option<(u8, OpaqueNodePtr)>; /// This function searches a trie for the smallest/largest leaf node that is -/// greater/less than the given bound. +/// greater than or equal (GTE) / less than or equal (LTE) to the given bound. /// /// If `is_start` is true, its looking for the smallest leaf node that is -/// greater than the given bound. If `is_start` is false, we're looking for the -/// greatest leaf node that is smaller than the given bound. +/// GTE than the given bound. If `is_start` is false, we're looking for the +/// greatest leaf node that is LTE than the given bound. /// /// # Safety /// @@ -327,32 +316,117 @@ pub(crate) unsafe fn find_leaf_pointer_for_bound match reason { - InnerNodeSearchResultReason::PrefixMismatch - | InnerNodeSearchResultReason::InsufficientBytes => { - match (is_start, node_prefix_comparison_to_search_key_segment) { - (true, Ordering::Equal | Ordering::Greater) => unsafe { - // SAFETY: Covered by function safety doc - minimum_unchecked(node_ptr) - }, - (true, Ordering::Less) => unsafe { - // SAFETY: Covered by function safety doc - let max_leaf_ptr = maximum_unchecked(node_ptr); - let max_leaf = max_leaf_ptr.as_ref(); - max_leaf.next? - }, - (false, Ordering::Equal | Ordering::Less) => unsafe { - // SAFETY: Covered by function safety doc - maximum_unchecked(node_ptr) - }, - (false, Ordering::Greater) => unsafe { + InnerNodeSearchResultReason::PrefixMismatch => { + // If there was a prefix mismatch while searching, then we must know whether + // the mismatch occurred because the search key was + // greater than OR less than the prefix. + + if matches!( + node_prefix_comparison_to_search_key_segment, + Ordering::Greater + ) { + // If the node prefix was greater than the search key segment, then our + // candidates are going to be around the minimum leaf of this subtree. + + // SAFETY: Covered by function safety doc + let min_leaf_ptr = unsafe { minimum_unchecked(node_ptr) }; + if is_start { + // In the context of a start bound, the search key being + // lexicographically smaller is easy to understand, since it will be + // ordered before the minimum key of the subtree in all cases. + + min_leaf_ptr + } else { + // In the context of a end bound, the search key being + // lexicographically shorted means + // that none of the keys in this subtree are eligible to + // be in range. So we must find the minimum and then go one before + // that. + // SAFETY: Covered by function safety doc - let min_leaf_ptr = minimum_unchecked(node_ptr); + unsafe { + let min_leaf = min_leaf_ptr.as_ref(); + min_leaf.previous? + } + } + } else { + // We assume here that Equal is impossible otherwise + // there would be no mismatch. + // + // If the node prefix was less than the search key segment, then our + // candidates are going to be around the maximum leaf of this subtree. + + // SAFETY: Covered by function safety doc + let max_leaf_ptr = unsafe { maximum_unchecked(node_ptr) }; + if is_start { + // In the case of a start bound, the maximum leaf of this tree is + // going to be smaller than the search key. So we need to get the + // next larger leaf + unsafe { + let max_leaf = max_leaf_ptr.as_ref(); + max_leaf.next? + } + } else { + // In the case of an end bound, the maximum leaf is the largest key + // less than or equal to the search key, so we return it directly. + max_leaf_ptr + } + } + }, + InnerNodeSearchResultReason::InsufficientBytes => { + // If we've run out of bytes while searching, it means that the key was + // shorter than all the keys in the subtree rooted by + // the inner node. In a lexicographic ordering, given an equal prefix, the + // shorter sequence is always ordered before the longer sequence. + + // SAFETY: Covered by function safety doc + let min_leaf_ptr = unsafe { minimum_unchecked(node_ptr) }; + if is_start { + // In the context of a start bound, the search key being + // lexicographically shorter is easy to understand, since it will be + // ordered before the minimum key of the subtree in all cases. + + min_leaf_ptr + } else { + // In the context of a end bound, the search key being lexicographically + // shorted means that none of the keys in this subtree are eligible to + // be in range. So we must find the minimum and then go one before that. + + // SAFETY: Covered by function safety doc + unsafe { let min_leaf = min_leaf_ptr.as_ref(); min_leaf.previous? - }, + } } }, InnerNodeSearchResultReason::MissingChild => { + // If we stopped on an inner node because there was no corresponding child + // to continue the search, then it is possible that the key is anywhere + // within the subtree rooted by the inner node. In this case, we have to + // find the next largest or next smallest child and then find their minimum + // or maximum leaf node accordingly. + // + // There are a couple of baseline cases without considering the `is_start` + // bit yet. Imagine an array of child keys (with implicit pointers): + // + // ```text + // Middle Start End + // |---|---| |---|---| |---|---| + // | 3 | 7 | | 3 | 7 | | 3 | 7 | + // |---|---| |---|---| |---|---| + // ^ ^ ^ + // 5 1 9 + // ``` + // + // Now consider start vs end bound: + // - If we're in the "Middle" case, with `is_start == true`, then we want + // the next largest key and `is_start == false` we want the next smallest + // key. Both are accessible within this subtree, just selecting the + // appropriate child neighbor and then going to the minimum or maximum + // descendant. + // - If we're in the "Start" case, then `is_start == true` is the same as + // the "Middle" case. The `is_start == false` means we need to go the + // portion of the tree just outside of this node, to the "left" (less). fn access_closest_child< const PREFIX_LEN: usize, N: InnerNode, @@ -361,7 +435,7 @@ pub(crate) unsafe fn find_leaf_pointer_for_bound, Bound), is_start: bool, ) -> KeyByteAndChild { - // SAFETY: Covered by function safety doc + // SAFETY: Covered by outer function safety doc let inner = unsafe { inner_ptr.as_ref() }; let mut child_it = inner.range(child_bounds); if is_start { @@ -371,11 +445,14 @@ pub(crate) unsafe fn find_leaf_pointer_for_bound { @@ -386,14 +463,48 @@ pub(crate) unsafe fn find_leaf_pointer_for_bound { + // This is case "End" from above. If we're trying to get the start + // bound, but there is no next sibling, we should go to leaf after + // the maximum leaf of this subtree. + + // SAFETY: Covered by function safety doc + unsafe { + let max_leaf_ptr = maximum_unchecked(node_ptr); + let max_leaf = max_leaf_ptr.as_ref(); + max_leaf.next? + } + }, + (true, Some((_, next_child))) => { + // This is possibly case "Middle" from above. We select the smallest + // leaf of the next larger sibling. + + // SAFETY: Covered by function safety doc + unsafe { minimum_unchecked(next_child) } + }, + (false, None) => { + // This is case "Start" from above. If we're + // trying to get the end bound, but there is no + // previous sibling, we should go the leaf prior + // to the minimum leaf of this subtree. + + // SAFETY: Covered by function safety doc + unsafe { + let min_leaf_ptr = minimum_unchecked(node_ptr); + let min_leaf = min_leaf_ptr.as_ref(); + min_leaf.previous? + } + }, + (false, Some((_, prev_child))) => { + // This is possibly case "Middle" from above. We + // select the largest leaf of the next smaller + // sibling. + + // SAFETY: Covered by function safety doc + unsafe { maximum_unchecked(prev_child) } + }, } }, }, @@ -962,4 +1073,258 @@ mod tests { ] ); } + + #[test] + fn range_inclusive_end_past_all_node_keys() { + // Regression test: when the end bound's key byte at a Node4/Node16 level + // exceeds all child key bytes and no child is found (MissingChild), the + // node's range() method was called with Included(byte > max_key), which + // mapped Last(n) → Included(n) and caused an out-of-bounds panic on the + // n-element initialized slice. + + let mut tree: TreeMap = TreeMap::default(); + tree.try_insert(0x10u8, 1).unwrap(); + tree.try_insert(0x20u8, 2).unwrap(); + tree.try_insert(0x30u8, 3).unwrap(); + + // End-bound first byte 0x50 > max child byte 0x30 at root → MissingChild + // → inner.range((Unbounded, Included(0x50))) was previously OOB + let result: Vec<_> = tree + .range(( + core::ops::Bound::Unbounded, + core::ops::Bound::Included(0x50u8), + )) + .map(|(k, v)| (*k, *v)) + .collect(); + assert_eq!(result, [(0x10u8, 1), (0x20u8, 2), (0x30u8, 3),]); + } + + #[test] + fn range_excluded_start_past_all_node_keys() { + // Regression test: when the start bound's key byte at a Node4/Node16 level + // exceeds all child key bytes and no child is found (MissingChild), the + // node's range() method was called with Excluded(byte > max_key), which + // mapped Last(n) → Excluded(n) and caused an out-of-bounds panic (slice + // start at n+1 on an n-element slice). + + let mut tree: TreeMap = TreeMap::default(); + tree.try_insert(0x10u8, 1).unwrap(); + tree.try_insert(0x20u8, 2).unwrap(); + tree.try_insert(0x30u8, 3).unwrap(); + + // Start-bound first byte 0x50 > max child byte 0x30 at root → MissingChild + // → inner.range((Excluded(0x50), Unbounded)) was previously OOB + let result: Vec<_> = tree + .range(( + core::ops::Bound::Excluded(0x50u8), + core::ops::Bound::Unbounded, + )) + .collect(); + assert!(result.is_empty(), "expected empty range, got {result:?}"); + } + + #[test] + fn range_excluded_empty_end_bound_is_empty() { + // Regression test: Excluded(&[]) as an end bound should yield an empty + // iterator because [] is the lexicographic minimum — no key is less than + // the empty slice. Previously find_leaf_pointer_for_bound hit the + // (false, Equal) arm of PrefixMismatchOrInsufficientBytes and returned + // maximum_unchecked(root), producing all entries instead of none. + let mut tree: TreeMap, u32> = TreeMap::new(); + for i in 0u8..=9 { + tree.try_insert(Box::from([i]), i as u32).unwrap(); + } + + let result: Vec<_> = tree + .range::<[u8], _>((Bound::Unbounded, Bound::Excluded([].as_ref()))) + .collect(); + assert!(result.is_empty(), "expected empty range, got {result:?}"); + } + + #[test] + fn range_regression_missing_child_62c8b0067a82ea8bf3a296d73915e1c0b76359ae() { + // [ + // TryInsertMany( + // [ + // 59, + // ], + // 7, + // ), + // TryInsertMany( + // [ + // 247, + // ], + // 35, + // ), + // Range( + // Included( + // [ + // 59, + // 59, + // 0, + // ], + // ), + // Unbounded, + // ), + // ] + let mut map = TreeMap::, usize>::new(); + + for first_subtree in 0..=7u8 { + map.try_insert(Box::new([59, first_subtree]), map.len()) + .unwrap(); + } + for second_subtree in 0..=35 { + map.try_insert(Box::new([247, second_subtree]), map.len()) + .unwrap(); + } + + let contents = map + .range((Bound::Included(Box::from([59u8, 59, 0])), Bound::Unbounded)) + .map(|(k, _)| k.clone()) + .collect::>(); + + assert_eq!( + contents, + (0..=35u8).map(|v| Box::from([247, v])).collect::>() + ); + } + + #[test] + fn range_regression_prefix_mismatch_b91a7f054453649d83d175c6ca31635e311f03fa() { + // [ + // TryInsertMany( + // [ + // 151, + // 93, + // 151, + // 151, + // 151, + // 151, + // 215, + // 151, + // 255, + // 255, + // 151, + // 255, + // 255, + // 247, + // 0, + // 3, + // 16, + // ], + // 91, + // ), + // Range( + // Included( + // [ + // 255, + // 26, + // ], + // ), + // Unbounded, + // ), + // ] + + let mut map = TreeMap::, usize>::new(); + for k in 0..=91u8 { + map.try_insert( + Box::from([ + 151u8, 93, 151, 151, 151, 151, 215, 151, 255, 255, 151, 255, 255, 247, 0, 3, + 16, k, + ]), + map.len(), + ) + .unwrap(); + } + + let contents = map + .range((Bound::Included(Box::from([255, 26])), Bound::Unbounded)) + // Make it easier to read, only unique part of key is last byte + .map(|(k, _)| k[17]) + .collect::>(); + + // Results should be empty because `255, 26` is greater than the prefix + assert_eq!(contents, &[]); + } + + #[test] + fn range_regression_excluded_0ef360aad5ebf9e3d5968d3ca6ba381ccb749cc8() { + // [ + // TryInsertMany( + // [ + // 255, + // 255, + // 255, + // 255, + // 255, + // 255, + // 255, + // 255, + // 255, + // 255, + // 255, + // 255, + // 255, + // 255, + // 255, + // 255, + // 255, + // ], + // 225, + // ), + // Range( + // Excluded( + // [ + // 225, + // 146, + // 225, + // 251, + // 251, + // 251, + // 251, + // 251, + // 251, + // 251, + // 251, + // 251, + // 251, + // 251, + // 251, + // 251, + // 232, + // 2, + // ], + // ), + // Unbounded, + // ), + // ] + + let mut map = TreeMap::, usize>::new(); + for k in 0..=64 { + map.try_insert( + Box::from([ + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, k, + ]), + map.len(), + ) + .unwrap(); + } + + let contents = map + .range(( + Bound::Excluded(Box::from([ + 225, 146, 225, 251, 251, 251, 251, 251, 251, 251, 251, 251, 251, 251, 251, 251, + 232, 2, + ])), + Bound::Unbounded, + )) + // Make it easier to read, only unique part of key is last byte + .map(|(k, _)| k[17]) + .collect::>(); + + // The range should contain all elements in the tree because the range start is + // less than all the keys in the tree. + assert_eq!(contents, (0..=64).collect::>()); + } } diff --git a/src/raw/operations/lookup.rs b/src/raw/operations/lookup.rs index 68c833d6..a5b70681 100644 --- a/src/raw/operations/lookup.rs +++ b/src/raw/operations/lookup.rs @@ -1,5 +1,3 @@ -use core::cmp::Ordering; - use crate::{ raw::{ match_concrete_node_ptr, AttemptOptimisticPrefixMatch, ConcreteNodePtr, InnerNode, @@ -101,40 +99,6 @@ impl PrefixMatchBehavior { result } - /// This function will compare the key bytes against the key in the given - /// leaf node. - /// - /// Specifically: - /// - If the current behavior is "optimistic", then the entire leaf key - /// will be compared against the given key bytes - /// - If the current behavior is "pessimistic", then only the key bytes - /// that were not used during the lookup process will be compared against - /// the corresponding leaf key bytes. - /// - /// This is a minor optimization to reduce the amount of work needed - /// confirming that a lookup found the right leaf node. - pub fn compare_leaf_key( - self, - leaf: &LeafNode, - key_bytes: &[u8], - current_depth: usize, - ) -> Ordering { - match self { - PrefixMatchBehavior::Pessimistic => { - let leaf_key_bytes = leaf.key_ref().as_bytes(); - let current_depth = current_depth.min(leaf_key_bytes.len()).min(key_bytes.len()); - // PANIC SAFETY: Since we limit `current_depth` to be the minimum of the lengths - // and the current depth we will at most get an empty slice, it - // should panic. I ran a small test to make sure that `&[1][1..] == &[][..]` and - // does not panic. - let leaf_key_bytes = &leaf_key_bytes[current_depth..]; - let key_bytes = &key_bytes[current_depth..]; - leaf_key_bytes.cmp(key_bytes) - }, - PrefixMatchBehavior::Optimistic => leaf.key_ref().as_bytes().cmp(key_bytes), - } - } - /// This function will test the key bytes against the key in the given leaf /// node. /// diff --git a/src/raw/representation.rs b/src/raw/representation.rs index f45d4ba6..2936b7e1 100644 --- a/src/raw/representation.rs +++ b/src/raw/representation.rs @@ -1,6 +1,7 @@ //! Trie node representation use core::{ + cmp::Ordering, fmt, iter::FusedIterator, ops::{Bound, RangeBounds, RangeInclusive}, @@ -138,6 +139,8 @@ pub struct ExplicitMismatch { pub prefix_byte: u8, /// Pointer to the leaf if the prefix was reconstructed pub leaf_ptr: OptionalLeafPtr, + /// Comparison between the full prefix and the relevant key segment + pub node_prefix_comparison_to_search_key_segment: Ordering, } impl Clone for ExplicitMismatch { @@ -217,7 +220,7 @@ pub unsafe trait InnerNodeCommon: Sized { /// `prefix_len` as the node prefix length. /// /// This is done because when a prefix mismatch happens - /// the length of the mismatch can be grater or equal to + /// the length of the mismatch can be greater or equal to /// prefix size, since we search for the first child of the /// node to recreate the prefix, that's why we don't use /// `prefix.len()` as the node prefix length @@ -381,18 +384,28 @@ pub unsafe trait InnerNodeCommon: Sized { } let (prefix, leaf_ptr) = self.read_full_prefix(current_depth); - let key = &key[current_depth..]; let matched_bytes = prefix .iter() - .zip(key) + .zip(&key[current_depth..]) .take_while(|(a, b)| **a == **b) .count(); if matched_bytes < prefix.len() { + let upper_bound = (current_depth + prefix.len()).min(key.len()); + let key_segment = &key[current_depth..upper_bound]; + let node_prefix_comparison_to_search_key_segment = prefix.cmp(key_segment); + + debug_assert_ne!( + node_prefix_comparison_to_search_key_segment, + Ordering::Equal, + "if there was a mismatch, the prefix must not be equal" + ); + Err(ExplicitMismatch { matched_bytes, prefix_byte: prefix[matched_bytes], leaf_ptr, + node_prefix_comparison_to_search_key_segment, }) } else { Ok(PrefixMatch { matched_bytes }) @@ -425,20 +438,18 @@ pub unsafe trait InnerNodeCommon: Sized { let leaf = unsafe { leaf_ptr.as_ref() }; let leaf = leaf.key_ref().as_bytes(); - unsafe { - // SAFETY: Since we are iterating the key and prefixes, we - // expect that the depth never exceeds the key len. - // Because if this happens we ran out of bytes in the key to match - // and the whole process should be already finished - core::hint::assert_unchecked(current_depth <= leaf.len()); - - // SAFETY: By the construction of the prefix we know that this is inbounds - // since the prefix len guarantees it to us - core::hint::assert_unchecked(current_depth + len <= leaf.len()); - - // SAFETY: This can't overflow since len comes from a u32 - core::hint::assert_unchecked(current_depth <= current_depth + len); - } + assert!( + current_depth <= leaf.len(), + "current_depth [{current_depth}] must not exceed leaf key length [{}]; leaf must \ + be a descendant of this node", + leaf.len() + ); + assert!( + current_depth + len <= leaf.len(), + "current_depth [{current_depth}] + prefix_len [{len}] must not exceed leaf key \ + length [{}]; leaf must be a descendant of this node", + leaf.len() + ); let leaf = &leaf[current_depth..(current_depth + len)]; (leaf, Some(leaf_ptr)) } diff --git a/src/raw/representation/header.rs b/src/raw/representation/header.rs index 93aa24dd..49b10144 100644 --- a/src/raw/representation/header.rs +++ b/src/raw/representation/header.rs @@ -103,16 +103,12 @@ impl Header { let begin = len; let end = begin + self.capped_prefix_len(); - unsafe { - // SAFETY: This function is called when mismatch happened and - // we used the node to match the number of bytes, - // by this we know that len < prefix len, but since we + 1, - // to skip the key byte we have that len <= prefix len - core::hint::assert_unchecked(end <= self.prefix.len()); - - // SAFETY: This is by construction end = begin + len - core::hint::assert_unchecked(begin <= end); - } + assert!( + end <= self.prefix.len(), + "prefix copy range end [{end}] must not exceed prefix array length [{}]; ltrim_by \ + must only be called when prefix_len <= PREFIX_LEN", + self.prefix.len() + ); self.prefix.copy_within(begin..end, 0); } @@ -179,16 +175,12 @@ impl Header { let end = begin + self.capped_prefix_len(); let len = end - begin; - unsafe { - // SAFETY: This function is called a mismatch happened and - // we used the leaf to match the number of matching bytes, - // by this we know that len < prefix len, but since we + 1, - // to skip the key byte we have that len <= prefix len - core::hint::assert_unchecked(end <= leaf_key.len()); - - // SAFETY: This is by construction end = begin + len - core::hint::assert_unchecked(begin <= end); - } + assert!( + end <= leaf_key.len(), + "leaf key slice end [{end}] must not exceed leaf key length [{}]; leaf must be a \ + descendant of this node so its key covers depth+prefix_len bytes", + leaf_key.len() + ); let leaf_key = &leaf_key[begin..end]; self.prefix[..len].copy_from_slice(leaf_key) diff --git a/src/raw/representation/inner_node_sorted.rs b/src/raw/representation/inner_node_sorted.rs index 20ef453c..77f78deb 100644 --- a/src/raw/representation/inner_node_sorted.rs +++ b/src/raw/representation/inner_node_sorted.rs @@ -465,8 +465,20 @@ unsafe impl InnerNodeCommon Bound::Included(idx), - // key = Included(255), bound = Included(Last(4)), output = Included(4) - Bound::Included(WritePoint::Last(idx)) => Bound::Included(idx), + // Last(n) means "insert after all n children". + // - as start bound: key > all children → empty range; Included(idx) on an + // idx-element slice starts at idx (valid empty slice). + // - as end bound: include all n children → last valid index is idx-1. Previously + // Included(idx) was returned, which caused &slice[..=idx] on an idx-element + // slice to panic. + Bound::Included(WritePoint::Last(idx)) => { + if is_start { + Bound::Included(idx) + } else { + idx.checked_sub(1) + .map_or(Bound::Excluded(0), Bound::Included) + } + }, // key = Included(2), bound = Included(Shift(1)), output = Included(1) Bound::Included(WritePoint::Shift(idx)) => { if is_start { @@ -478,8 +490,17 @@ unsafe impl InnerNodeCommon Bound::Excluded(idx), - // key = Excluded(255), bound = Excluded(Last(4)), output = Excluded(4) - Bound::Excluded(WritePoint::Last(idx)) => Bound::Excluded(idx), + // key = Excluded(255), bound = Excluded(Last(4)): + // - as end bound → Excluded(4), meaning &slice[..4] = all children + // - as start bound → key > all children, so result is empty; Excluded(idx-1) + // starts the slice at idx (empty) without OOB. + Bound::Excluded(WritePoint::Last(idx)) => { + if is_start { + idx.checked_sub(1).map_or(Bound::Unbounded, Bound::Excluded) + } else { + Bound::Excluded(idx) + } + }, // key = Excluded(2), bound = Excluded(Shift(1)), output = Excluded(0) Bound::Excluded(WritePoint::Shift(idx)) => { if is_start { @@ -942,6 +963,31 @@ mod tests { check(&node, (Bound::Included(255), Bound::::Unbounded), []); } + #[test] + fn node4_range_inclusive_end_past_max() { + let (node, _, [l1_ptr, l2_ptr, l3_ptr, l4_ptr]) = node4_fixture_empty_edges(); + // keys = [2, 3, 85, 254]; end bound 255 > max key 254 + // Previously: Last(4) → Included(4) → &slice[..=4] on 4-elem slice → panic + let pairs: Vec<_> = node + .range((Bound::::Unbounded, Bound::Included(255))) + .collect(); + assert_eq!( + pairs, + [(2u8, l3_ptr), (3, l1_ptr), (85, l4_ptr), (254, l2_ptr)] + ); + } + + #[test] + fn node4_range_excluded_start_past_max() { + let (node, _, [_l1_ptr, _l2_ptr, _l3_ptr, _l4_ptr]) = node4_fixture_empty_edges(); + // keys = [2, 3, 85, 254]; start bound Excluded(255) > max key 254 + // Previously: Last(4) → Excluded(4) → &slice[5..] on 4-elem slice → panic + let pairs: Vec<_> = node + .range((Bound::Excluded(255u8), Bound::::Unbounded)) + .collect(); + assert_eq!(pairs, []); + } + #[test] #[should_panic = "range start and end are equal and excluded: (80)"] fn node4_range_iterate_out_of_bounds_panic_both_excluded() { @@ -1155,6 +1201,31 @@ mod tests { assert_eq!(pairs, &[(0u8, l3_ptr), (3, l1_ptr)]); } + #[test] + fn node16_range_inclusive_end_past_max() { + let (node, _, [l1_ptr, l2_ptr, l3_ptr, l4_ptr]) = node16_fixture_empty_edges(); + // keys = [2, 3, 85, 254]; end bound 255 > max key 254 + // Previously: Last(4) → Included(4) → &slice[..=4] on 4-elem slice → panic + let pairs: Vec<_> = node + .range((Bound::::Unbounded, Bound::Included(255))) + .collect(); + assert_eq!( + pairs, + [(2u8, l3_ptr), (3, l1_ptr), (85, l4_ptr), (254, l2_ptr)] + ); + } + + #[test] + fn node16_range_excluded_start_past_max() { + let (node, _, [_l1_ptr, _l2_ptr, _l3_ptr, _l4_ptr]) = node16_fixture_empty_edges(); + // keys = [2, 3, 85, 254]; start bound Excluded(255) > max key 254 + // Previously: Last(4) → Excluded(4) → &slice[5..] on 4-elem slice → panic + let pairs: Vec<_> = node + .range((Bound::Excluded(255u8), Bound::::Unbounded)) + .collect(); + assert_eq!(pairs, []); + } + #[test] #[should_panic = "range start and end are equal and excluded: (80)"] fn node16_range_iterate_out_of_bounds_panic_both_excluded() { diff --git a/src/raw/representation/leaf_node.rs b/src/raw/representation/leaf_node.rs index 55ed4d2e..7c8ebdec 100644 --- a/src/raw/representation/leaf_node.rs +++ b/src/raw/representation/leaf_node.rs @@ -1,3 +1,5 @@ +use core::cmp::Ordering; + use crate::{ raw::{Node, NodePtr, NodeType}, AsBytes, @@ -216,6 +218,15 @@ impl LeafNode { self.key.as_bytes().eq(possible_key) } + /// Compare lexicographically the leaf stored key bytes with the given + /// search key bytes. + pub fn compare_full_key(&self, possible_key: &[u8]) -> Ordering + where + K: AsBytes, + { + self.key.as_bytes().cmp(possible_key) + } + /// Check that the key starts with the given slice. pub fn starts_with(&self, key: &[u8]) -> bool where diff --git a/src/raw/visitor/tree_stats.rs b/src/raw/visitor/tree_stats.rs index 88ee9ecc..c4cb2ce3 100644 --- a/src/raw/visitor/tree_stats.rs +++ b/src/raw/visitor/tree_stats.rs @@ -173,12 +173,13 @@ impl Add for LeafStats { } } -/// TODO +/// A mapping from [`NodeType`] to [`InnerNodeStats`] that has a fixed debug +/// ordering. #[derive(Clone, PartialEq, Eq, Default)] pub struct FixedOrderNodeStats(HashMap); impl FixedOrderNodeStats { - /// TODO + /// Lookup the inner node stats for a specific node type. pub fn get(&self, node_type: NodeType) -> Option<&InnerNodeStats> { self.0.get(&node_type) }