Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 76 additions & 30 deletions integer/src/repr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,56 @@ impl Repr {
self
}

/// Slow path for `Clone::clone`: heap-resident values only (|capacity| > 2).
///
/// Allocates directly instead of going through `Buffer::allocate` +
/// `push_slice` + `mem::transmute`, skipping the redundant capacity/length
/// bound checks and the sign-flip dance. Kept `#[inline(never)]` so the
/// inline fast path in `clone` stays small.
#[inline(never)]
fn clone_heap(&self) -> Self {

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function indeed combines buffer allocate + push_slice + transmute into a single function. But there is a Buffer::allocate_exact function existing, I insist to use Buffer's function for allocation, in case in future we plan to improve the allocators used for Buffer. Don't directly call alloc here.

For skipping the sign check, I think it's fair to flatten it out, but it can still be achieved by create the Buffer first, and then access and flip the capacity field if necessary.

debug_assert!(self.capacity.get().unsigned_abs() > 2);
// SAFETY: abs(capacity) > 2 ⇒ the `heap` variant of the union
// is the live one, and `len >= 3`.
unsafe {
let (src_ptr, len) = self.data.heap;
debug_assert!(len >= 3);

// Mirrors `Buffer::default_capacity(len)`. `len <=
// Buffer::MAX_CAPACITY` is invariant for any heap Repr, and
// `default_capacity` is monotone, so `new_cap` is bounded
// and the bound check inside `Buffer::allocate_raw` would
// never fire.
let new_cap = len + len / 8 + 2;
debug_assert!((3..=Buffer::MAX_CAPACITY).contains(&new_cap));

let layout = core::alloc::Layout::from_size_align_unchecked(
new_cap * mem::size_of::<Word>(),
mem::align_of::<Word>(),
);
let new_ptr = alloc::alloc::alloc(layout) as *mut Word;
if new_ptr.is_null() {
crate::error::panic_out_of_memory();
}

ptr::copy_nonoverlapping(src_ptr, new_ptr, len);

// Set the result's signed capacity in one shot so we never
// build a positive Repr only to negate it back.
let signed_cap = if self.capacity.get() < 0 {
-(new_cap as isize)
} else {
new_cap as isize
};
Repr {
data: ReprData {
heap: (new_ptr, len),
},
capacity: NonZeroIsize::new_unchecked(signed_cap),
}
}
}

/// Returns a number representing sign of self.
///
/// * [Self::zero] if the number is zero
Expand All @@ -461,43 +511,34 @@ impl Repr {

// Cloning for Repr is written in a verbose way because it's performance critical.
impl Clone for Repr {
#[inline]
fn clone(&self) -> Self {
let (capacity, sign) = self.sign_capacity();

// SAFETY: see the comments inside the block
let new = unsafe {
// inline the data if the length is less than 3
// SAFETY: we check the capacity before accessing the variants
if capacity <= 2 {
Repr {
data: ReprData {
inline: self.data.inline,
},
// SAFETY: the capacity is from self, which guarantees it to be zero
capacity: NonZeroIsize::new_unchecked(capacity as isize),
}
} else {
let (ptr, len) = self.data.heap;
// SAFETY: len is at least 2 when it's heap allocated (invariant of Repr)
let mut new_buffer = Buffer::allocate(len);
new_buffer.push_slice(slice::from_raw_parts(ptr, len));

// SAFETY: abs(self.capacity) >= 3 => self.data.len >= 3
// so the capacity and len of new_buffer will be both >= 3
mem::transmute::<Buffer, Repr>(new_buffer)
}
};
new.with_sign(sign)
// Inline case (|capacity| <= 2): copy the union and signed capacity
// verbatim, avoiding the `sign_capacity` + `with_sign` round-trip the
// old implementation did. The heap path is split into `clone_heap`.
if self.capacity.get().unsigned_abs() <= 2 {
return Repr {
data: ReprData {
// SAFETY: capacity in {-2,-1,1,2} ⇒ the `inline`
// variant of the union is the live one.
inline: unsafe { self.data.inline },
},
capacity: self.capacity,
};
}
self.clone_heap()
}

#[inline]
fn clone_from(&mut self, src: &Self) {
let (src_cap, src_sign) = src.sign_capacity();
let (cap, _) = self.sign_capacity();
let src_cap_raw = src.capacity.get();

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think there is a difference before and after your change on this function. The only change that possibly makes a difference is adding the inline attribute. If adding the attribute makes an improvement, then we can just add that.

let cap = self.capacity.get().unsigned_abs();

// SAFETY: see the comments inside the block
unsafe {
// shortcut for inlined data
if src_cap <= 2 {
// Fast path: src is inline. Copy union + signed capacity, with a
// rare dealloc when self was previously heap-resident.
if src_cap_raw.unsigned_abs() <= 2 {
if cap > 2 {
// release the old buffer if necessary
// SAFETY: self.data.heap.0 must be valid pointer if cap > 2
Expand All @@ -507,6 +548,11 @@ impl Clone for Repr {
self.capacity = src.capacity;
return;
}
let src_sign = if src_cap_raw > 0 {
Sign::Positive
} else {
Sign::Negative
};

// SAFETY: we checked that abs(src.capacity) > 2
let (src_ptr, src_len) = src.data.heap;
Expand Down
53 changes: 53 additions & 0 deletions integer/tests/clone.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
mod helper_macros;

#[test]
fn test_clone_inline_and_heap() {
// value (and sign) preserved across clone, for inline and heap reprs
let ubigs = [
ubig!(0),
ubig!(5),
ubig!(0x10000000000000000), // 2^64, two-word inline
ubig!(1) << 200, // heap
(ubig!(1) << 256) - ubig!(1), // heap, all ones
];
for v in &ubigs {
assert_eq!(&v.clone(), v);
}
let ibigs = [
ibig!(0),
ibig!(5),
ibig!(-5),
ibig!(1) << 200,
-(ibig!(1) << 200),
];
for v in &ibigs {
assert_eq!(&v.clone(), v);
}
}

#[test]
fn test_clone_from_transitions() {
// clone_from across every inline/heap combination of (dst, src) — including
// dst heap-allocated with an inline src (dst's buffer must be freed) and
// inline dst with a heap src (must allocate).
let vals = [
ubig!(0),
ubig!(7),
ubig!(0x10000000000000000), // inline cap 2
ubig!(1) << 130, // heap
ubig!(1) << 200, // heap, larger
];
for src in &vals {
for dst0 in &vals {
let mut dst = dst0.clone();
dst.clone_from(src);
assert_eq!(&dst, src, "clone_from {dst0} <- {src}");
}
}
// signed: heap negative -> inline negative -> heap positive
let mut x = -(ibig!(1) << 200);
x.clone_from(&ibig!(-3));
assert_eq!(x, ibig!(-3));
x.clone_from(&(ibig!(1) << 200));
assert_eq!(x, ibig!(1) << 200);
}
Loading