Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 102 additions & 30 deletions llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3429,26 +3429,30 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
return ShadowType;
}

/// Doubles the length of a vector shadow (filled with zeros) if necessary to
/// match the length of the shadow for the instruction.
/// Doubles the length of a vector shadow (extending with zeros) if necessary
/// to match the length of the shadow for the instruction.
/// If scalar types of the vectors are different, it will use the type of the
/// input vector.
/// This is more type-safe than CreateShadowCast().
Value *maybeExtendVectorShadowWithZeros(Value *Shadow, IntrinsicInst &I) {
IRBuilder<> IRB(&I);
assert(isa<FixedVectorType>(Shadow->getType()));
assert(isa<FixedVectorType>(I.getType()));

Value *FullShadow = getCleanShadow(&I);
assert(cast<FixedVectorType>(Shadow->getType())->getNumElements() <=
cast<FixedVectorType>(FullShadow->getType())->getNumElements());
assert(cast<FixedVectorType>(Shadow->getType())->getScalarType() ==
cast<FixedVectorType>(FullShadow->getType())->getScalarType());
unsigned ShadowNumElems =
cast<FixedVectorType>(Shadow->getType())->getNumElements();
unsigned FullShadowNumElems =
cast<FixedVectorType>(FullShadow->getType())->getNumElements();

if (Shadow->getType() == FullShadow->getType()) {
assert((ShadowNumElems == FullShadowNumElems) ||
(ShadowNumElems * 2 == FullShadowNumElems));

if (ShadowNumElems == FullShadowNumElems) {
FullShadow = Shadow;
} else {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

driveby: should we assert fail for not the same or double?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added

// TODO: generalize beyond 2x?
SmallVector<int, 32> ShadowMask(
cast<FixedVectorType>(FullShadow->getType())->getNumElements());
SmallVector<int, 32> ShadowMask(FullShadowNumElems);
std::iota(ShadowMask.begin(), ShadowMask.end(), 0);

// Append zeros
Expand Down Expand Up @@ -4528,58 +4532,102 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
return isFixedFPVectorTy(V->getType());
}

// e.g., call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512
// (<16 x float> a, <16 x i32> writethru, i16 mask,
// i32 rounding)
// e.g., <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512
// (<16 x float> a, <16 x i32> writethru, i16 mask,
// i32 rounding)
//
// Inconveniently, some similar intrinsics have a different operand order:
// <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512
// (<16 x float> a, i32 rounding, <16 x i16> writethru,
// i16 mask)
//
// If the return type has more elements than A, the excess elements are
// zeroed (and the corresponding shadow is initialized).
// <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128
// (<4 x float> a, i32 rounding, <8 x i16> writethru,
// i8 mask)
//
// dst[i] = mask[i] ? convert(a[i]) : writethru[i]
// dst_shadow[i] = mask[i] ? all_or_nothing(a_shadow[i]) : writethru_shadow[i]
// where all_or_nothing(x) is fully uninitialized if x has any
// uninitialized bits
void handleAVX512VectorConvertFPToInt(IntrinsicInst &I) {
void handleAVX512VectorConvertFPToInt(IntrinsicInst &I, bool LastMask) {
IRBuilder<> IRB(&I);

assert(I.arg_size() == 4);
Value *A = I.getOperand(0);
Value *WriteThrough = I.getOperand(1);
Value *Mask = I.getOperand(2);
Value *RoundingMode = I.getOperand(3);
Value *WriteThrough;
Value *Mask;
Value *RoundingMode;
if (LastMask) {
WriteThrough = I.getOperand(2);
Mask = I.getOperand(3);
RoundingMode = I.getOperand(1);
} else {
WriteThrough = I.getOperand(1);
Mask = I.getOperand(2);
RoundingMode = I.getOperand(3);
}

assert(isFixedFPVector(A));
assert(isFixedIntVector(WriteThrough));

unsigned ANumElements =
cast<FixedVectorType>(A->getType())->getNumElements();
assert(ANumElements ==
cast<FixedVectorType>(WriteThrough->getType())->getNumElements());
[[maybe_unused]] unsigned WriteThruNumElements =
cast<FixedVectorType>(WriteThrough->getType())->getNumElements();
assert(ANumElements == WriteThruNumElements ||
ANumElements * 2 == WriteThruNumElements);

assert(Mask->getType()->isIntegerTy());
assert(Mask->getType()->getScalarSizeInBits() == ANumElements);
unsigned MaskNumElements = Mask->getType()->getScalarSizeInBits();
assert(ANumElements == MaskNumElements ||
ANumElements * 2 == MaskNumElements);

assert(WriteThruNumElements == MaskNumElements);

// Some bits of the mask may be unused, though it's unusual to have partly
// uninitialized bits.
insertCheckShadowOf(Mask, &I);

assert(RoundingMode->getType()->isIntegerTy());
// Only four bits of the rounding mode are used, though it's very
// Only some bits of the rounding mode are used, though it's very
// unusual to have uninitialized bits there (more commonly, it's a
// constant).
insertCheckShadowOf(RoundingMode, &I);

assert(I.getType() == WriteThrough->getType());

Value *AShadow = getShadow(A);
AShadow = maybeExtendVectorShadowWithZeros(AShadow, I);

if (ANumElements * 2 == MaskNumElements) {
// Ensure that the irrelevant bits of the mask are zero, hence selecting
// from the zeroed shadow instead of the writethrough's shadow.
Mask =
IRB.CreateTrunc(Mask, IRB.getIntNTy(ANumElements), "_ms_mask_trunc");
Mask =
IRB.CreateZExt(Mask, IRB.getIntNTy(MaskNumElements), "_ms_mask_zext");
}

// Convert i16 mask to <16 x i1>
Mask = IRB.CreateBitCast(
Mask, FixedVectorType::get(IRB.getInt1Ty(), ANumElements));
Mask, FixedVectorType::get(IRB.getInt1Ty(), MaskNumElements),
"_ms_mask_bitcast");

Value *AShadow = getShadow(A);
/// For scalars:
/// Since they are converting from floating-point, the output is:
/// For floating-point to integer conversion, the output is:
/// - fully uninitialized if *any* bit of the input is uninitialized
/// - fully ininitialized if all bits of the input are ininitialized
/// We apply the same principle on a per-element basis for vectors.
AShadow = IRB.CreateSExt(IRB.CreateICmpNE(AShadow, getCleanShadow(A)),
getShadowTy(A));
///
/// We use the scalar width of the return type instead of A's.
AShadow = IRB.CreateSExt(
IRB.CreateICmpNE(AShadow, getCleanShadow(AShadow->getType())),
getShadowTy(&I), "_ms_a_shadow");

Value *WriteThroughShadow = getShadow(WriteThrough);
Value *Shadow = IRB.CreateSelect(Mask, AShadow, WriteThroughShadow);
Value *Shadow = IRB.CreateSelect(Mask, AShadow, WriteThroughShadow,
"_ms_writethru_select");

setShadow(&I, Shadow);
setOriginForNaryOp(I);
Expand Down Expand Up @@ -5300,6 +5348,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
case Intrinsic::x86_sse_ldmxcsr:
handleLdmxcsr(I);
break;

// Convert Scalar Double Precision Floating-Point Value
// to Unsigned Doubleword Integer
// etc.
case Intrinsic::x86_avx512_vcvtsd2usi64:
case Intrinsic::x86_avx512_vcvtsd2usi32:
case Intrinsic::x86_avx512_vcvtss2usi64:
Expand Down Expand Up @@ -5340,6 +5392,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
break;
}

// Convert Packed Double Precision Floating-Point Values
// to Packed Single Precision Floating-Point Values
case Intrinsic::x86_sse2_cvtpd2ps:
case Intrinsic::x86_sse2_cvtps2dq:
case Intrinsic::x86_sse2_cvtpd2dq:
Expand All @@ -5354,6 +5408,20 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
break;
}

// Convert Single-Precision FP Value to 16-bit FP Value
// <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512
// (<16 x float>, i32, <16 x i16>, i16)
// <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128
// (<4 x float>, i32, <8 x i16>, i8)
// <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256
// (<8 x float>, i32, <8 x i16>, i8)
case Intrinsic::x86_avx512_mask_vcvtps2ph_512:
case Intrinsic::x86_avx512_mask_vcvtps2ph_256:
case Intrinsic::x86_avx512_mask_vcvtps2ph_128:
handleAVX512VectorConvertFPToInt(I, /*LastMask=*/true);
break;

// Shift Packed Data (Left Logical, Right Arithmetic, Right Logical)
case Intrinsic::x86_avx512_psll_w_512:
case Intrinsic::x86_avx512_psll_d_512:
case Intrinsic::x86_avx512_psll_q_512:
Expand Down Expand Up @@ -5920,10 +5988,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/*trailingVerbatimArgs=*/1);
break;

case Intrinsic::x86_avx512_mask_cvtps2dq_512: {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you moved this around to group "convert" stuff, right?

Copy link
Contributor Author

@thurstond thurstond Aug 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that was the intent. For simplicity, I've undone the change. (I'll move it around in a future NFC patch.)

handleAVX512VectorConvertFPToInt(I);
// Convert Packed Single Precision Floating-Point Values
// to Packed Signed Doubleword Integer Values
//
// <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512
// (<16 x float>, <16 x i32>, i16, i32)
case Intrinsic::x86_avx512_mask_cvtps2dq_512:
handleAVX512VectorConvertFPToInt(I, /*LastMask=*/false);
break;
}

// AVX512 PMOV: Packed MOV, with truncation
// Precisely handled by applying the same intrinsic to the shadow
Expand Down
57 changes: 26 additions & 31 deletions llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
; - llvm.x86.avx512.mask.rndscale.pd.512, llvm.x86.avx512.mask.rndscale.ps.512, llvm.x86.avx512.mask.rndscale.sd, llvm.x86.avx512.mask.rndscale.ss
; - llvm.x86.avx512.mask.scalef.pd.512, llvm.x86.avx512.mask.scalef.ps.512
; - llvm.x86.avx512.mask.sqrt.sd, llvm.x86.avx512.mask.sqrt.ss
; - llvm.x86.avx512.mask.vcvtps2ph.512
; - llvm.x86.avx512.maskz.fixupimm.pd.512, llvm.x86.avx512.maskz.fixupimm.ps.512, llvm.x86.avx512.maskz.fixupimm.sd, llvm.x86.avx512.maskz.fixupimm.ss
; - llvm.x86.avx512.mul.pd.512, llvm.x86.avx512.mul.ps.512
; - llvm.x86.avx512.permvar.df.512, llvm.x86.avx512.permvar.sf.512
Expand Down Expand Up @@ -1903,50 +1902,46 @@ define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0, <16 x i16> %src, i16
; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 104) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
; CHECK: 6:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
; CHECK-NEXT: unreachable
; CHECK: 7:
; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = sext <16 x i1> [[TMP6]] to <16 x i16>
; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> splat (i1 true), <16 x i16> [[TMP7]], <16 x i16> zeroinitializer
; CHECK-NEXT: [[RES1:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> [[A0:%.*]], i32 2, <16 x i16> zeroinitializer, i16 -1)
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer
; CHECK-NEXT: [[TMP12:%.*]] = sext <16 x i1> [[TMP11]] to <16 x i16>
; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP10]], <16 x i16> [[TMP12]], <16 x i16> zeroinitializer
; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP2]], 0
; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
; CHECK: 9:
; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
; CHECK: 7:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
; CHECK-NEXT: unreachable
; CHECK: 10:
; CHECK-NEXT: [[RES2:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> [[A0]], i32 11, <16 x i16> zeroinitializer, i16 [[MASK:%.*]])
; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0
; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i16> [[TMP3]] to i256
; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP12]], 0
; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
; CHECK: 8:
; CHECK-NEXT: [[RES2:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> [[A0]], i32 11, <16 x i16> zeroinitializer, i16 [[MASK]])
; CHECK-NEXT: [[TMP25:%.*]] = bitcast i16 [[MASK]] to <16 x i1>
; CHECK-NEXT: [[TMP26:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer
; CHECK-NEXT: [[TMP27:%.*]] = sext <16 x i1> [[TMP26]] to <16 x i16>
; CHECK-NEXT: [[TMP20:%.*]] = select <16 x i1> [[TMP25]], <16 x i16> [[TMP27]], <16 x i16> [[TMP3]]
; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i16 [[TMP2]], 0
; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]]
; CHECK-NEXT: br i1 [[_MSOR7]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
; CHECK: 13:
; CHECK-NEXT: br i1 [[_MSCMP6]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]]
; CHECK: 10:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
; CHECK-NEXT: unreachable
; CHECK: 14:
; CHECK: 11:
; CHECK-NEXT: [[RES3:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> [[A0]], i32 12, <16 x i16> [[SRC:%.*]], i16 [[MASK]])
; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i64 [[TMP4]], 0
; CHECK-NEXT: br i1 [[_MSCMP8]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]]
; CHECK: 15:
; CHECK-NEXT: br i1 [[_MSCMP8]], label [[TMP21:%.*]], label [[TMP22:%.*]], !prof [[PROF1]]
; CHECK: 12:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
; CHECK-NEXT: unreachable
; CHECK: 16:
; CHECK: 13:
; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[DST:%.*]] to i64
; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP17]], 87960930222080
; CHECK-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP18]] to ptr
; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr [[TMP19]], align 32
; CHECK-NEXT: store <16 x i16> [[TMP8]], ptr [[TMP19]], align 32
; CHECK-NEXT: store <16 x i16> [[RES1]], ptr [[DST]], align 32
; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP13]], [[TMP20]]
; CHECK-NEXT: [[RES:%.*]] = add <16 x i16> [[RES2]], [[RES3]]
; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <16 x i16> [[RES]]
;
%res1 = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1)
Expand Down Expand Up @@ -7451,10 +7446,10 @@ define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_512(<16 x float> %x0, <16 x
; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[TMP3]], <16 x i32> [[TMP5]], <16 x i32> [[TMP2]]
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i16 [[TMP10]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]]
; CHECK: 8:
; CHECK: 5:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
; CHECK-NEXT: unreachable
; CHECK: 9:
; CHECK: 6:
; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]], i16 [[X2]], i32 10)
; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = sext <16 x i1> [[TMP7]] to <16 x i32>
Expand Down
Loading