From 83595f9242ad9e8a7da091f65d450e44e4434f89 Mon Sep 17 00:00:00 2001 From: Scott McMurray Date: Sat, 2 Apr 2022 14:29:41 -0700 Subject: [PATCH 1/2] Fix `array::IntoIter::fold` to use the optimized `Range::fold` It was using `Iterator::by_ref` in the implementation, which ended up pessimizing it enough that, for example, it didn't vectorize when we tried it in the conversation. Demonstration that the codegen test doesn't pass on the current nightly: --- library/core/src/array/iter.rs | 16 +++++- .../core/src/iter/adapters/by_ref_sized.rs | 29 ++++++++++ library/core/tests/array.rs | 32 +++++++++++ src/test/codegen/simd-wide-sum.rs | 54 +++++++++++++++++++ 4 files changed, 130 insertions(+), 1 deletion(-) create mode 100644 src/test/codegen/simd-wide-sum.rs diff --git a/library/core/src/array/iter.rs b/library/core/src/array/iter.rs index e5024c215be9c..baf2f2d6c9714 100644 --- a/library/core/src/array/iter.rs +++ b/library/core/src/array/iter.rs @@ -266,7 +266,7 @@ impl Iterator for IntoIter { Fold: FnMut(Acc, Self::Item) -> Acc, { let data = &mut self.data; - self.alive.by_ref().fold(init, |acc, idx| { + iter::ByRefSized(&mut self.alive).fold(init, |acc, idx| { // SAFETY: idx is obtained by folding over the `alive` range, which implies the // value is currently considered alive but as the range is being consumed each value // we read here will only be read once and then considered dead. @@ -323,6 +323,20 @@ impl DoubleEndedIterator for IntoIter { }) } + #[inline] + fn rfold(mut self, init: Acc, mut rfold: Fold) -> Acc + where + Fold: FnMut(Acc, Self::Item) -> Acc, + { + let data = &mut self.data; + iter::ByRefSized(&mut self.alive).rfold(init, |acc, idx| { + // SAFETY: idx is obtained by folding over the `alive` range, which implies the + // value is currently considered alive but as the range is being consumed each value + // we read here will only be read once and then considered dead. + rfold(acc, unsafe { data.get_unchecked(idx).assume_init_read() }) + }) + } + fn advance_back_by(&mut self, n: usize) -> Result<(), usize> { let len = self.len(); diff --git a/library/core/src/iter/adapters/by_ref_sized.rs b/library/core/src/iter/adapters/by_ref_sized.rs index 0b5e2a89ef3de..890faa5975f3e 100644 --- a/library/core/src/iter/adapters/by_ref_sized.rs +++ b/library/core/src/iter/adapters/by_ref_sized.rs @@ -40,3 +40,32 @@ impl Iterator for ByRefSized<'_, I> { self.0.try_fold(init, f) } } + +impl DoubleEndedIterator for ByRefSized<'_, I> { + fn next_back(&mut self) -> Option { + self.0.next_back() + } + + fn advance_back_by(&mut self, n: usize) -> Result<(), usize> { + self.0.advance_back_by(n) + } + + fn nth_back(&mut self, n: usize) -> Option { + self.0.nth_back(n) + } + + fn rfold(self, init: B, f: F) -> B + where + F: FnMut(B, Self::Item) -> B, + { + self.0.rfold(init, f) + } + + fn try_rfold(&mut self, init: B, f: F) -> R + where + F: FnMut(B, Self::Item) -> R, + R: Try, + { + self.0.try_rfold(init, f) + } +} diff --git a/library/core/tests/array.rs b/library/core/tests/array.rs index a778779c0fd88..ee7ff012ec1c1 100644 --- a/library/core/tests/array.rs +++ b/library/core/tests/array.rs @@ -668,3 +668,35 @@ fn array_mixed_equality_nans() { assert!(!(mut3 == array3)); assert!(mut3 != array3); } + +#[test] +fn array_into_iter_fold() { + // Strings to help MIRI catch if we double-free or something + let a = ["Aa".to_string(), "Bb".to_string(), "Cc".to_string()]; + let mut s = "s".to_string(); + a.into_iter().for_each(|b| s += &b); + assert_eq!(s, "sAaBbCc"); + + let a = [1, 2, 3, 4, 5, 6]; + let mut it = a.into_iter(); + it.advance_by(1).unwrap(); + it.advance_back_by(2).unwrap(); + let s = it.fold(10, |a, b| 10 * a + b); + assert_eq!(s, 10234); +} + +#[test] +fn array_into_iter_rfold() { + // Strings to help MIRI catch if we double-free or something + let a = ["Aa".to_string(), "Bb".to_string(), "Cc".to_string()]; + let mut s = "s".to_string(); + a.into_iter().rev().for_each(|b| s += &b); + assert_eq!(s, "sCcBbAa"); + + let a = [1, 2, 3, 4, 5, 6]; + let mut it = a.into_iter(); + it.advance_by(1).unwrap(); + it.advance_back_by(2).unwrap(); + let s = it.rfold(10, |a, b| 10 * a + b); + assert_eq!(s, 10432); +} diff --git a/src/test/codegen/simd-wide-sum.rs b/src/test/codegen/simd-wide-sum.rs new file mode 100644 index 0000000000000..fde9b0fcd8ac1 --- /dev/null +++ b/src/test/codegen/simd-wide-sum.rs @@ -0,0 +1,54 @@ +// compile-flags: -C opt-level=3 --edition=2021 +// only-x86_64 +// ignore-debug: the debug assertions get in the way + +#![crate_type = "lib"] +#![feature(portable_simd)] + +use std::simd::Simd; +const N: usize = 8; + +#[no_mangle] +// CHECK-LABEL: @wider_reduce_simd +pub fn wider_reduce_simd(x: Simd) -> u16 { + // CHECK: zext <8 x i8> + // CHECK-SAME: to <8 x i16> + // CHECK: call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> + let x: Simd = x.cast(); + x.reduce_sum() +} + +#[no_mangle] +// CHECK-LABEL: @wider_reduce_loop +pub fn wider_reduce_loop(x: Simd) -> u16 { + // CHECK: zext <8 x i8> + // CHECK-SAME: to <8 x i16> + // CHECK: call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> + let mut sum = 0_u16; + for i in 0..N { + sum += u16::from(x[i]); + } + sum +} + +#[no_mangle] +// CHECK-LABEL: @wider_reduce_iter +pub fn wider_reduce_iter(x: Simd) -> u16 { + // CHECK: zext <8 x i8> + // CHECK-SAME: to <8 x i16> + // CHECK: call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> + x.as_array().iter().copied().map(u16::from).sum() +} + +// This iterator one is the most interesting, as it's the one +// which used to not auto-vectorize due to a suboptimality in the +// `::fold` implementation. + +#[no_mangle] +// CHECK-LABEL: @wider_reduce_into_iter +pub fn wider_reduce_into_iter(x: Simd) -> u16 { + // CHECK: zext <8 x i8> + // CHECK-SAME: to <8 x i16> + // CHECK: call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> + x.to_array().into_iter().map(u16::from).sum() +} From e8fc7ba6a756628b3226e002e3f3e54ddf14fa04 Mon Sep 17 00:00:00 2001 From: Scott McMurray Date: Fri, 13 May 2022 00:43:15 -0700 Subject: [PATCH 2/2] Slap #[inline] on all the ByRefSized methods, per the8472's suggestion --- library/core/src/iter/adapters/by_ref_sized.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/library/core/src/iter/adapters/by_ref_sized.rs b/library/core/src/iter/adapters/by_ref_sized.rs index 890faa5975f3e..bf2e3e182e2a3 100644 --- a/library/core/src/iter/adapters/by_ref_sized.rs +++ b/library/core/src/iter/adapters/by_ref_sized.rs @@ -9,22 +9,27 @@ pub(crate) struct ByRefSized<'a, I>(pub &'a mut I); impl Iterator for ByRefSized<'_, I> { type Item = I::Item; + #[inline] fn next(&mut self) -> Option { self.0.next() } + #[inline] fn size_hint(&self) -> (usize, Option) { self.0.size_hint() } + #[inline] fn advance_by(&mut self, n: usize) -> Result<(), usize> { self.0.advance_by(n) } + #[inline] fn nth(&mut self, n: usize) -> Option { self.0.nth(n) } + #[inline] fn fold(self, init: B, f: F) -> B where F: FnMut(B, Self::Item) -> B, @@ -32,6 +37,7 @@ impl Iterator for ByRefSized<'_, I> { self.0.fold(init, f) } + #[inline] fn try_fold(&mut self, init: B, f: F) -> R where F: FnMut(B, Self::Item) -> R, @@ -42,18 +48,22 @@ impl Iterator for ByRefSized<'_, I> { } impl DoubleEndedIterator for ByRefSized<'_, I> { + #[inline] fn next_back(&mut self) -> Option { self.0.next_back() } + #[inline] fn advance_back_by(&mut self, n: usize) -> Result<(), usize> { self.0.advance_back_by(n) } + #[inline] fn nth_back(&mut self, n: usize) -> Option { self.0.nth_back(n) } + #[inline] fn rfold(self, init: B, f: F) -> B where F: FnMut(B, Self::Item) -> B, @@ -61,6 +71,7 @@ impl DoubleEndedIterator for ByRefSized<'_, I> { self.0.rfold(init, f) } + #[inline] fn try_rfold(&mut self, init: B, f: F) -> R where F: FnMut(B, Self::Item) -> R,