-
Notifications
You must be signed in to change notification settings - Fork 13.7k
Split Vec::dedup_by
into 2 cycles
#92104
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1617,7 +1617,29 @@ impl<T, A: Allocator> Vec<T, A> { | |
return; | ||
} | ||
|
||
/* INVARIANT: vec.len() > read >= write > write-1 >= 0 */ | ||
// Check if we ever want to remove anything. | ||
// This allows to use copy_non_overlapping in next cycle. | ||
// And avoids any memory writes if we don't need to remove anything. | ||
let mut possible_remove_idx = 1; | ||
let start = self.as_mut_ptr(); | ||
while possible_remove_idx < len { | ||
let found_duplicate = unsafe { | ||
// SAFETY: possible_remove_idx always in range [1..len) | ||
let prev = start.add(possible_remove_idx - 1); | ||
let current = start.add(possible_remove_idx); | ||
same_bucket(&mut *current, &mut *prev) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If I'm following the old code right, it looks like this swapped the ordering -- we used to pass (0, 1), (1, 2), (2, 3), etc., whereas this now passes them as (1, 0), (2, 1), ... -- it seems like we can probably just swap current and prev here? It would be great to add a test for this, too. |
||
}; | ||
if found_duplicate { | ||
break; | ||
} | ||
possible_remove_idx += 1; | ||
} | ||
// Don't need to remove anything. | ||
if possible_remove_idx >= len { | ||
return; | ||
} | ||
|
||
/* INVARIANT: vec.len() > read > write > write-1 >= 0 */ | ||
struct FillGapOnDrop<'a, T, A: core::alloc::Allocator> { | ||
/* Offset of the element we want to check if it is duplicate */ | ||
read: usize, | ||
|
@@ -1663,31 +1685,35 @@ impl<T, A: Allocator> Vec<T, A> { | |
} | ||
} | ||
|
||
let mut gap = FillGapOnDrop { read: 1, write: 1, vec: self }; | ||
let ptr = gap.vec.as_mut_ptr(); | ||
|
||
/* Drop items while going through Vec, it should be more efficient than | ||
* doing slice partition_dedup + truncate */ | ||
|
||
// Construct gap first and then drop item to avoid memory corruption if `T::drop` panics. | ||
let mut gap = | ||
FillGapOnDrop { read: possible_remove_idx + 1, write: possible_remove_idx, vec: self }; | ||
unsafe { | ||
// SAFETY: we checked that possible_remove_idx < len before. | ||
// If drop panics, `gap` would remove this item without drop. | ||
ptr::drop_in_place(start.add(possible_remove_idx)); | ||
} | ||
|
||
/* SAFETY: Because of the invariant, read_ptr, prev_ptr and write_ptr | ||
* are always in-bounds and read_ptr never aliases prev_ptr */ | ||
unsafe { | ||
while gap.read < len { | ||
let read_ptr = ptr.add(gap.read); | ||
let prev_ptr = ptr.add(gap.write.wrapping_sub(1)); | ||
let read_ptr = start.add(gap.read); | ||
let prev_ptr = start.add(gap.write.wrapping_sub(1)); | ||
|
||
if same_bucket(&mut *read_ptr, &mut *prev_ptr) { | ||
// Increase `gap.read` now since the drop may panic. | ||
gap.read += 1; | ||
/* We have found duplicate, drop it in-place */ | ||
ptr::drop_in_place(read_ptr); | ||
} else { | ||
let write_ptr = ptr.add(gap.write); | ||
let write_ptr = start.add(gap.write); | ||
|
||
/* Because `read_ptr` can be equal to `write_ptr`, we either | ||
* have to use `copy` or conditional `copy_nonoverlapping`. | ||
* Looks like the first option is faster. */ | ||
ptr::copy(read_ptr, write_ptr, 1); | ||
/* read_ptr cannot be equal to write_ptr by `gap` invariant */ | ||
ptr::copy_nonoverlapping(read_ptr, write_ptr, 1); | ||
|
||
/* We have filled that place, so go further */ | ||
gap.write += 1; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.