Skip to content

Commit 8b67cb7

Browse files
committed
Fix expected instructions of AVX512 _mm_movepi8_mask and _mm256_movepi8_mask intrinsics.
vpmovmskb instructions are generated because our test shim functions return values in normal registers and vpmovb2m plus kmovq would be more expensive.
1 parent c840ddc commit 8b67cb7

File tree

1 file changed

+4
-2
lines changed

1 file changed

+4
-2
lines changed

crates/core_arch/src/x86/avx512bw.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8061,7 +8061,8 @@ pub unsafe fn _mm512_movepi8_mask(a: __m512i) -> __mmask64 {
80618061
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_movepi8_mask&expand=3882)
80628062
#[inline]
80638063
#[target_feature(enable = "avx512bw,avx512vl")]
8064-
#[cfg_attr(test, assert_instr(mov))] // should be vpmovb2m but msvc does not generate it
8064+
#[cfg_attr(test, assert_instr(vpmovmskb))] // should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
8065+
// using vpmovb2m plus converting the mask register to a standard register.
80658066
pub unsafe fn _mm256_movepi8_mask(a: __m256i) -> __mmask32 {
80668067
let filter = _mm256_set1_epi8(1 << 7);
80678068
let a = _mm256_and_si256(a, filter);
@@ -8073,7 +8074,8 @@ pub unsafe fn _mm256_movepi8_mask(a: __m256i) -> __mmask32 {
80738074
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movepi8_mask&expand=3881)
80748075
#[inline]
80758076
#[target_feature(enable = "avx512bw,avx512vl")]
8076-
#[cfg_attr(test, assert_instr(mov))] // should be vpmovb2m but msvc does not generate it
8077+
#[cfg_attr(test, assert_instr(vpmovmskb))] // should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
8078+
// using vpmovb2m plus converting the mask register to a standard register.
80778079
pub unsafe fn _mm_movepi8_mask(a: __m128i) -> __mmask16 {
80788080
let filter = _mm_set1_epi8(1 << 7);
80798081
let a = _mm_and_si128(a, filter);

0 commit comments

Comments
 (0)