Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 130 additions & 0 deletions src/raw/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1372,6 +1372,20 @@ impl<T, A: Allocator> RawTable<T, A> {
self.table.iter()
}

/// Returns an iterator over occupied buckets that could match a given hash.
///
/// `RawTable` only stores 7 bits of the hash value, so this iterator may
/// return items that have a hash value different than the one provided. You
/// should always validate the returned values before using them.
///
/// It is up to the caller to ensure that the `RawTable` outlives the
/// `RawIterHash`. Because we cannot make the `next` method unsafe on the
/// `RawIterHash` struct, we have to make the `iter_hash` method unsafe.
#[cfg_attr(feature = "inline-more", inline)]
pub unsafe fn iter_hash(&self, hash: u64) -> RawIterHash<T> {
RawIterHash::new(self, hash)
}

/// Returns an iterator which removes all elements from the table without
/// freeing the memory.
#[cfg_attr(feature = "inline-more", inline)]
Expand Down Expand Up @@ -3996,6 +4010,122 @@ impl<T, A: Allocator> Iterator for RawDrain<'_, T, A> {
impl<T, A: Allocator> ExactSizeIterator for RawDrain<'_, T, A> {}
impl<T, A: Allocator> FusedIterator for RawDrain<'_, T, A> {}

/// Iterator over occupied buckets that could match a given hash.
///
/// `RawTable` only stores 7 bits of the hash value, so this iterator may return
/// items that have a hash value different than the one provided. You should
/// always validate the returned values before using them.
///
/// For maximum flexibility this iterator is not bound by a lifetime, but you
/// must observe several rules when using it:
/// - You must not free the hash table while iterating (including via growing/shrinking).
/// - It is fine to erase a bucket that has been yielded by the iterator.
/// - Erasing a bucket that has not yet been yielded by the iterator may still
/// result in the iterator yielding that bucket.
/// - It is unspecified whether an element inserted after the iterator was
/// created will be yielded by that iterator.
/// - The order in which the iterator yields buckets is unspecified and may
/// change in the future.
pub struct RawIterHash<T> {
inner: RawIterHashInner,
_marker: PhantomData<T>,
}

struct RawIterHashInner {
// See `RawTableInner`'s corresponding fields for details.
// We can't store a `*const RawTableInner` as it would get
// invalidated by the user calling `&mut` methods on `RawTable`.
bucket_mask: usize,
ctrl: NonNull<u8>,

// The top 7 bits of the hash.
h2_hash: u8,

// The sequence of groups to probe in the search.
probe_seq: ProbeSeq,

group: Group,

// The elements within the group with a matching h2-hash.
bitmask: BitMaskIter,
}

impl<T> RawIterHash<T> {
#[cfg_attr(feature = "inline-more", inline)]
unsafe fn new<A: Allocator>(table: &RawTable<T, A>, hash: u64) -> Self {
RawIterHash {
inner: RawIterHashInner::new(&table.table, hash),
_marker: PhantomData,
}
}
}

impl RawIterHashInner {
#[cfg_attr(feature = "inline-more", inline)]
unsafe fn new(table: &RawTableInner, hash: u64) -> Self {
let h2_hash = h2(hash);
let probe_seq = table.probe_seq(hash);
let group = Group::load(table.ctrl(probe_seq.pos));
let bitmask = group.match_byte(h2_hash).into_iter();

RawIterHashInner {
bucket_mask: table.bucket_mask,
ctrl: table.ctrl,
h2_hash,
probe_seq,
group,
bitmask,
}
}
}

impl<T> Iterator for RawIterHash<T> {
type Item = Bucket<T>;

fn next(&mut self) -> Option<Bucket<T>> {
unsafe {
match self.inner.next() {
Some(index) => {
// Can't use `RawTable::bucket` here as we don't have
// an actual `RawTable` reference to use.
debug_assert!(index <= self.inner.bucket_mask);
let bucket = Bucket::from_base_index(self.inner.ctrl.cast(), index);
Some(bucket)
}
None => None,
}
}
}
}

impl Iterator for RawIterHashInner {
type Item = usize;

fn next(&mut self) -> Option<Self::Item> {
unsafe {
loop {
if let Some(bit) = self.bitmask.next() {
let index = (self.probe_seq.pos + bit) & self.bucket_mask;
return Some(index);
}
if likely(self.group.match_empty().any_bit_set()) {
return None;
}
self.probe_seq.move_next(self.bucket_mask);

// Can't use `RawTableInner::ctrl` here as we don't have
// an actual `RawTableInner` reference to use.
let index = self.probe_seq.pos;
debug_assert!(index < self.bucket_mask + 1 + Group::WIDTH);
let group_ctrl = self.ctrl.as_ptr().add(index);

self.group = Group::load(group_ctrl);
self.bitmask = self.group.match_byte(self.h2_hash).into_iter();
}
}
}
}

pub(crate) struct RawExtractIf<'a, T, A: Allocator> {
pub iter: RawIter<T>,
pub table: &'a mut RawTable<T, A>,
Expand Down
144 changes: 143 additions & 1 deletion src/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use core::{fmt, iter::FusedIterator, marker::PhantomData};
use crate::{
raw::{
Allocator, Bucket, Global, InsertSlot, RawDrain, RawExtractIf, RawIntoIter, RawIter,
RawTable,
RawIterHash, RawTable,
},
TryReserveError,
};
Expand Down Expand Up @@ -741,6 +741,98 @@ where
}
}

/// An iterator visiting all elements which may match a hash.
/// The iterator element type is `&'a T`.
///
/// This iterator may return elements from the table that have a hash value
/// different than the one provided. You should always validate the returned
/// values before using them.
///
/// # Examples
///
/// ```
/// # #[cfg(feature = "nightly")]
/// # fn test() {
/// use hashbrown::{HashTable, DefaultHashBuilder};
/// use std::hash::BuildHasher;
///
/// let mut table = HashTable::new();
/// let hasher = DefaultHashBuilder::default();
/// let hasher = |val: &_| hasher.hash_one(val);
/// table.insert_unique(hasher(&"a"), "a", hasher);
/// table.insert_unique(hasher(&"a"), "b", hasher);
/// table.insert_unique(hasher(&"b"), "c", hasher);
///
/// // Will print "a" and "b" (and possibly "c") in an arbitrary order.
/// for x in table.iter_hash(hasher(&"a")) {
/// println!("{}", x);
/// }
/// # }
/// # fn main() {
/// # #[cfg(feature = "nightly")]
/// # test()
/// # }
/// ```
pub fn iter_hash(&self, hash: u64) -> IterHash<'_, T> {
IterHash {
inner: unsafe { self.raw.iter_hash(hash) },
_marker: PhantomData,
}
}

/// A mutable iterator visiting all elements which may match a hash.
/// The iterator element type is `&'a mut T`.
///
/// This iterator may return elements from the table that have a hash value
/// different than the one provided. You should always validate the returned
/// values before using them.
///
/// # Examples
///
/// ```
/// # #[cfg(feature = "nightly")]
/// # fn test() {
/// use hashbrown::{HashTable, DefaultHashBuilder};
/// use std::hash::BuildHasher;
///
/// let mut table = HashTable::new();
/// let hasher = DefaultHashBuilder::default();
/// let hasher = |val: &_| hasher.hash_one(val);
/// table.insert_unique(hasher(&1), 2, hasher);
/// table.insert_unique(hasher(&1), 3, hasher);
/// table.insert_unique(hasher(&2), 5, hasher);
///
/// // Update matching values
/// for val in table.iter_hash_mut(hasher(&1)) {
/// *val *= 2;
/// }
///
/// assert_eq!(table.len(), 3);
/// let mut vec: Vec<i32> = Vec::new();
///
/// for val in &table {
/// println!("val: {}", val);
/// vec.push(*val);
/// }
///
/// // The values will contain 4 and 6 and may contain either 5 or 10.
/// assert!(vec.contains(&4));
/// assert!(vec.contains(&6));
///
/// assert_eq!(table.len(), 3);
/// # }
/// # fn main() {
/// # #[cfg(feature = "nightly")]
/// # test()
/// # }
/// ```
pub fn iter_hash_mut(&mut self, hash: u64) -> IterHashMut<'_, T> {
IterHashMut {
inner: unsafe { self.raw.iter_hash(hash) },
_marker: PhantomData,
}
}

/// Retains only the elements specified by the predicate.
///
/// In other words, remove all elements `e` such that `f(&e)` returns `false`.
Expand Down Expand Up @@ -1932,6 +2024,56 @@ impl<T> ExactSizeIterator for IterMut<'_, T> {

impl<T> FusedIterator for IterMut<'_, T> {}

/// An iterator over the entries of a `HashTable` that could match a given hash.
/// The iterator element type is `&'a T`.
///
/// This `struct` is created by the [`iter_hash`] method on [`HashTable`]. See its
/// documentation for more.
///
/// [`iter_hash`]: struct.HashTable.html#method.iter_hash
/// [`HashTable`]: struct.HashTable.html
pub struct IterHash<'a, T> {
inner: RawIterHash<T>,
_marker: PhantomData<&'a T>,
}

impl<'a, T> Iterator for IterHash<'a, T> {
type Item = &'a T;

fn next(&mut self) -> Option<Self::Item> {
// Avoid `Option::map` because it bloats LLVM IR.
match self.inner.next() {
Some(bucket) => Some(unsafe { bucket.as_ref() }),
None => None,
}
}
}

/// A mutable iterator over the entries of a `HashTable` that could match a given hash.
/// The iterator element type is `&'a mut T`.
///
/// This `struct` is created by the [`iter_hash_mut`] method on [`HashTable`]. See its
/// documentation for more.
///
/// [`iter_hash_mut`]: struct.HashTable.html#method.iter_hash_mut
/// [`HashTable`]: struct.HashTable.html
pub struct IterHashMut<'a, T> {
inner: RawIterHash<T>,
_marker: PhantomData<&'a mut T>,
}

impl<'a, T> Iterator for IterHashMut<'a, T> {
type Item = &'a mut T;

fn next(&mut self) -> Option<Self::Item> {
// Avoid `Option::map` because it bloats LLVM IR.
match self.inner.next() {
Some(bucket) => Some(unsafe { bucket.as_mut() }),
None => None,
}
}
}

/// An owning iterator over the entries of a `HashTable` in arbitrary order.
/// The iterator element type is `T`.
///
Expand Down
Loading