-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[msan] Handle AVX512 VCVTPS2PH #154460
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[msan] Handle AVX512 VCVTPS2PH #154460
Changes from all commits
b2ff774
e6d9db0
6bd7941
a5d7363
6ac0c66
6e8d1ea
1b4441b
df8d598
35a4aff
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3429,26 +3429,30 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { | |
| return ShadowType; | ||
| } | ||
|
|
||
| /// Doubles the length of a vector shadow (filled with zeros) if necessary to | ||
| /// match the length of the shadow for the instruction. | ||
| /// Doubles the length of a vector shadow (extending with zeros) if necessary | ||
| /// to match the length of the shadow for the instruction. | ||
| /// If scalar types of the vectors are different, it will use the type of the | ||
| /// input vector. | ||
| /// This is more type-safe than CreateShadowCast(). | ||
| Value *maybeExtendVectorShadowWithZeros(Value *Shadow, IntrinsicInst &I) { | ||
| IRBuilder<> IRB(&I); | ||
| assert(isa<FixedVectorType>(Shadow->getType())); | ||
| assert(isa<FixedVectorType>(I.getType())); | ||
|
|
||
| Value *FullShadow = getCleanShadow(&I); | ||
| assert(cast<FixedVectorType>(Shadow->getType())->getNumElements() <= | ||
| cast<FixedVectorType>(FullShadow->getType())->getNumElements()); | ||
| assert(cast<FixedVectorType>(Shadow->getType())->getScalarType() == | ||
| cast<FixedVectorType>(FullShadow->getType())->getScalarType()); | ||
| unsigned ShadowNumElems = | ||
| cast<FixedVectorType>(Shadow->getType())->getNumElements(); | ||
| unsigned FullShadowNumElems = | ||
| cast<FixedVectorType>(FullShadow->getType())->getNumElements(); | ||
|
|
||
| if (Shadow->getType() == FullShadow->getType()) { | ||
| assert((ShadowNumElems == FullShadowNumElems) || | ||
| (ShadowNumElems * 2 == FullShadowNumElems)); | ||
|
|
||
| if (ShadowNumElems == FullShadowNumElems) { | ||
| FullShadow = Shadow; | ||
| } else { | ||
| // TODO: generalize beyond 2x? | ||
| SmallVector<int, 32> ShadowMask( | ||
| cast<FixedVectorType>(FullShadow->getType())->getNumElements()); | ||
| SmallVector<int, 32> ShadowMask(FullShadowNumElems); | ||
| std::iota(ShadowMask.begin(), ShadowMask.end(), 0); | ||
|
|
||
| // Append zeros | ||
|
|
@@ -4528,58 +4532,102 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { | |
| return isFixedFPVectorTy(V->getType()); | ||
| } | ||
|
|
||
| // e.g., call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512 | ||
| // (<16 x float> a, <16 x i32> writethru, i16 mask, | ||
| // i32 rounding) | ||
| // e.g., <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512 | ||
| // (<16 x float> a, <16 x i32> writethru, i16 mask, | ||
| // i32 rounding) | ||
| // | ||
| // Inconveniently, some similar intrinsics have a different operand order: | ||
| // <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512 | ||
| // (<16 x float> a, i32 rounding, <16 x i16> writethru, | ||
| // i16 mask) | ||
| // | ||
| // If the return type has more elements than A, the excess elements are | ||
| // zeroed (and the corresponding shadow is initialized). | ||
| // <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128 | ||
| // (<4 x float> a, i32 rounding, <8 x i16> writethru, | ||
| // i8 mask) | ||
| // | ||
| // dst[i] = mask[i] ? convert(a[i]) : writethru[i] | ||
| // dst_shadow[i] = mask[i] ? all_or_nothing(a_shadow[i]) : writethru_shadow[i] | ||
| // where all_or_nothing(x) is fully uninitialized if x has any | ||
| // uninitialized bits | ||
| void handleAVX512VectorConvertFPToInt(IntrinsicInst &I) { | ||
| void handleAVX512VectorConvertFPToInt(IntrinsicInst &I, bool LastMask) { | ||
| IRBuilder<> IRB(&I); | ||
|
|
||
| assert(I.arg_size() == 4); | ||
| Value *A = I.getOperand(0); | ||
| Value *WriteThrough = I.getOperand(1); | ||
| Value *Mask = I.getOperand(2); | ||
| Value *RoundingMode = I.getOperand(3); | ||
| Value *WriteThrough; | ||
| Value *Mask; | ||
| Value *RoundingMode; | ||
| if (LastMask) { | ||
| WriteThrough = I.getOperand(2); | ||
| Mask = I.getOperand(3); | ||
| RoundingMode = I.getOperand(1); | ||
| } else { | ||
| WriteThrough = I.getOperand(1); | ||
| Mask = I.getOperand(2); | ||
| RoundingMode = I.getOperand(3); | ||
| } | ||
|
|
||
| assert(isFixedFPVector(A)); | ||
| assert(isFixedIntVector(WriteThrough)); | ||
|
|
||
| unsigned ANumElements = | ||
| cast<FixedVectorType>(A->getType())->getNumElements(); | ||
| assert(ANumElements == | ||
| cast<FixedVectorType>(WriteThrough->getType())->getNumElements()); | ||
| [[maybe_unused]] unsigned WriteThruNumElements = | ||
| cast<FixedVectorType>(WriteThrough->getType())->getNumElements(); | ||
| assert(ANumElements == WriteThruNumElements || | ||
| ANumElements * 2 == WriteThruNumElements); | ||
|
|
||
| assert(Mask->getType()->isIntegerTy()); | ||
| assert(Mask->getType()->getScalarSizeInBits() == ANumElements); | ||
| unsigned MaskNumElements = Mask->getType()->getScalarSizeInBits(); | ||
| assert(ANumElements == MaskNumElements || | ||
| ANumElements * 2 == MaskNumElements); | ||
|
|
||
| assert(WriteThruNumElements == MaskNumElements); | ||
|
|
||
| // Some bits of the mask may be unused, though it's unusual to have partly | ||
| // uninitialized bits. | ||
| insertCheckShadowOf(Mask, &I); | ||
|
|
||
| assert(RoundingMode->getType()->isIntegerTy()); | ||
| // Only four bits of the rounding mode are used, though it's very | ||
| // Only some bits of the rounding mode are used, though it's very | ||
| // unusual to have uninitialized bits there (more commonly, it's a | ||
| // constant). | ||
| insertCheckShadowOf(RoundingMode, &I); | ||
|
|
||
| assert(I.getType() == WriteThrough->getType()); | ||
|
|
||
| Value *AShadow = getShadow(A); | ||
| AShadow = maybeExtendVectorShadowWithZeros(AShadow, I); | ||
|
|
||
| if (ANumElements * 2 == MaskNumElements) { | ||
| // Ensure that the irrelevant bits of the mask are zero, hence selecting | ||
| // from the zeroed shadow instead of the writethrough's shadow. | ||
| Mask = | ||
| IRB.CreateTrunc(Mask, IRB.getIntNTy(ANumElements), "_ms_mask_trunc"); | ||
| Mask = | ||
| IRB.CreateZExt(Mask, IRB.getIntNTy(MaskNumElements), "_ms_mask_zext"); | ||
| } | ||
|
|
||
| // Convert i16 mask to <16 x i1> | ||
| Mask = IRB.CreateBitCast( | ||
| Mask, FixedVectorType::get(IRB.getInt1Ty(), ANumElements)); | ||
| Mask, FixedVectorType::get(IRB.getInt1Ty(), MaskNumElements), | ||
| "_ms_mask_bitcast"); | ||
|
|
||
| Value *AShadow = getShadow(A); | ||
| /// For scalars: | ||
| /// Since they are converting from floating-point, the output is: | ||
| /// For floating-point to integer conversion, the output is: | ||
| /// - fully uninitialized if *any* bit of the input is uninitialized | ||
| /// - fully ininitialized if all bits of the input are ininitialized | ||
| /// We apply the same principle on a per-element basis for vectors. | ||
| AShadow = IRB.CreateSExt(IRB.CreateICmpNE(AShadow, getCleanShadow(A)), | ||
| getShadowTy(A)); | ||
| /// | ||
| /// We use the scalar width of the return type instead of A's. | ||
| AShadow = IRB.CreateSExt( | ||
| IRB.CreateICmpNE(AShadow, getCleanShadow(AShadow->getType())), | ||
| getShadowTy(&I), "_ms_a_shadow"); | ||
|
|
||
| Value *WriteThroughShadow = getShadow(WriteThrough); | ||
| Value *Shadow = IRB.CreateSelect(Mask, AShadow, WriteThroughShadow); | ||
| Value *Shadow = IRB.CreateSelect(Mask, AShadow, WriteThroughShadow, | ||
| "_ms_writethru_select"); | ||
|
|
||
| setShadow(&I, Shadow); | ||
| setOriginForNaryOp(I); | ||
|
|
@@ -5300,6 +5348,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { | |
| case Intrinsic::x86_sse_ldmxcsr: | ||
| handleLdmxcsr(I); | ||
| break; | ||
|
|
||
| // Convert Scalar Double Precision Floating-Point Value | ||
| // to Unsigned Doubleword Integer | ||
| // etc. | ||
| case Intrinsic::x86_avx512_vcvtsd2usi64: | ||
| case Intrinsic::x86_avx512_vcvtsd2usi32: | ||
| case Intrinsic::x86_avx512_vcvtss2usi64: | ||
|
|
@@ -5340,6 +5392,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { | |
| break; | ||
| } | ||
|
|
||
| // Convert Packed Double Precision Floating-Point Values | ||
| // to Packed Single Precision Floating-Point Values | ||
| case Intrinsic::x86_sse2_cvtpd2ps: | ||
| case Intrinsic::x86_sse2_cvtps2dq: | ||
| case Intrinsic::x86_sse2_cvtpd2dq: | ||
|
|
@@ -5354,6 +5408,20 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { | |
| break; | ||
| } | ||
|
|
||
| // Convert Single-Precision FP Value to 16-bit FP Value | ||
| // <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512 | ||
| // (<16 x float>, i32, <16 x i16>, i16) | ||
| // <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128 | ||
| // (<4 x float>, i32, <8 x i16>, i8) | ||
| // <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256 | ||
| // (<8 x float>, i32, <8 x i16>, i8) | ||
| case Intrinsic::x86_avx512_mask_vcvtps2ph_512: | ||
| case Intrinsic::x86_avx512_mask_vcvtps2ph_256: | ||
| case Intrinsic::x86_avx512_mask_vcvtps2ph_128: | ||
| handleAVX512VectorConvertFPToInt(I, /*LastMask=*/true); | ||
| break; | ||
|
|
||
| // Shift Packed Data (Left Logical, Right Arithmetic, Right Logical) | ||
| case Intrinsic::x86_avx512_psll_w_512: | ||
| case Intrinsic::x86_avx512_psll_d_512: | ||
| case Intrinsic::x86_avx512_psll_q_512: | ||
|
|
@@ -5920,10 +5988,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { | |
| /*trailingVerbatimArgs=*/1); | ||
| break; | ||
|
|
||
| case Intrinsic::x86_avx512_mask_cvtps2dq_512: { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you moved this around to group "convert" stuff, right?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, that was the intent. For simplicity, I've undone the change. (I'll move it around in a future NFC patch.) |
||
| handleAVX512VectorConvertFPToInt(I); | ||
| // Convert Packed Single Precision Floating-Point Values | ||
| // to Packed Signed Doubleword Integer Values | ||
| // | ||
| // <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512 | ||
| // (<16 x float>, <16 x i32>, i16, i32) | ||
| case Intrinsic::x86_avx512_mask_cvtps2dq_512: | ||
| handleAVX512VectorConvertFPToInt(I, /*LastMask=*/false); | ||
| break; | ||
| } | ||
|
|
||
| // AVX512 PMOV: Packed MOV, with truncation | ||
| // Precisely handled by applying the same intrinsic to the shadow | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
driveby: should we assert fail for not the same or double?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added