Skip to content

Conversation

@thurstond
Copy link
Contributor

Propagate the shadow according to the writemask, instead of using the default strict handler.

Updates the test added in #123980

Propagate the shadow according to the writemask, instead of using the
default strict handler.

Updates the test added in llvm#123980
@llvmbot
Copy link
Member

llvmbot commented Jul 7, 2025

@llvm/pr-subscribers-compiler-rt-sanitizer

@llvm/pr-subscribers-llvm-transforms

Author: Thurston Dang (thurstond)

Changes

Propagate the shadow according to the writemask, instead of using the default strict handler.

Updates the test added in #123980


Full diff: https://github.com/llvm/llvm-project/pull/147377.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp (+60)
  • (modified) llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll (+10-25)
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index ec94dcaa2c051..fceb282e6df10 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -4342,6 +4342,61 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     setOriginForNaryOp(I);
   }
 
+  // e.g., call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512
+  //                           (<16 x float> a, <16 x i32> writethru, i16 mask,
+  //                           i32 rounding)
+  //
+  // dst[i] = mask[i] ? convert(a[i]) : writethru[i]
+  // dst_shadow[i] = mask[i] ? all_or_nothing(a_shadow[i]) : writethru_shadow[i]
+  //    where all_or_nothing(x) is fully uninitialized if x has any
+  //    uninitialized bits
+  void handleAVX512VectorConvertFPToInt(IntrinsicInst &I) {
+    IRBuilder<> IRB(&I);
+
+    assert(I.arg_size() == 4);
+    Value *A = I.getOperand(0);
+    Value *WriteThrough = I.getOperand(1);
+    Value *Mask = I.getOperand(2);
+    [[maybe_unused]] Value *RoundingMode = I.getOperand(3);
+
+    assert(isa<FixedVectorType>(A->getType()));
+    assert(A->getType()->isFPOrFPVectorTy());
+
+    assert(isa<FixedVectorType>(WriteThrough->getType()));
+    assert(WriteThrough->getType()->isIntOrIntVectorTy());
+
+    unsigned ANumElements =
+        cast<FixedVectorType>(A->getType())->getNumElements();
+    assert(ANumElements ==
+           cast<FixedVectorType>(WriteThrough->getType())->getNumElements());
+
+    assert(Mask->getType()->isIntegerTy());
+    assert(Mask->getType()->getScalarSizeInBits() == ANumElements);
+
+    assert(RoundingMode->getType()->isIntegerTy());
+
+    assert(I.getType() == WriteThrough->getType());
+
+    // Convert i16 mask to <16 x i1>
+    Mask = IRB.CreateBitCast(
+        Mask, FixedVectorType::get(IRB.getInt1Ty(), ANumElements));
+
+    Value *AShadow = getShadow(A);
+    /// For scalars:
+    /// Since they are converting from floating-point, the output is:
+    /// - fully uninitialized if *any* bit of the input is uninitialized
+    /// - fully ininitialized if all bits of the input are ininitialized
+    /// We apply the same principle on a per-field basis for vectors.
+    AShadow = IRB.CreateSExt(IRB.CreateICmpNE(AShadow, getCleanShadow(A)),
+                             getShadowTy(A));
+
+    Value *WriteThroughShadow = getShadow(WriteThrough);
+    Value *Shadow = IRB.CreateSelect(Mask, AShadow, WriteThroughShadow);
+
+    setShadow(&I, Shadow);
+    setOriginForNaryOp(I);
+  }
+
   // Instrument BMI / BMI2 intrinsics.
   // All of these intrinsics are Z = I(X, Y)
   // where the types of all operands and the result match, and are either i32 or
@@ -5318,6 +5373,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       handleAVXVpermi2var(I);
       break;
 
+    case Intrinsic::x86_avx512_mask_cvtps2dq_512: {
+      handleAVX512VectorConvertFPToInt(I);
+      break;
+    }
+
     case Intrinsic::x86_avx512fp16_mask_add_sh_round:
     case Intrinsic::x86_avx512fp16_mask_sub_sh_round:
     case Intrinsic::x86_avx512fp16_mask_mul_sh_round:
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll
index 43595dccc35bc..10144509d96a6 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll
@@ -8152,34 +8152,19 @@ define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_512(<16 x float> %x0, <16 x
 ; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_ps2dq_512(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i16 [[TMP3]], 0
-; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK:       6:
-; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT:    unreachable
-; CHECK:       7:
-; CHECK-NEXT:    [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]], i16 [[X2:%.*]], i32 10)
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0
-; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
-; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK:       10:
-; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT:    unreachable
-; CHECK:       11:
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16 [[X2:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = sext <16 x i1> [[TMP4]] to <16 x i32>
+; CHECK-NEXT:    [[TMP6:%.*]] = select <16 x i1> [[TMP3]], <16 x i32> [[TMP5]], <16 x i32> [[TMP2]]
+; CHECK-NEXT:    [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]], i16 [[X2]], i32 10)
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP8:%.*]] = sext <16 x i1> [[TMP7]] to <16 x i32>
+; CHECK-NEXT:    [[TMP9:%.*]] = select <16 x i1> splat (i1 true), <16 x i32> [[TMP8]], <16 x i32> [[TMP2]]
 ; CHECK-NEXT:    [[RES1:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> [[X0]], <16 x i32> [[X1]], i16 -1, i32 8)
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i32> [[TMP6]], [[TMP9]]
 ; CHECK-NEXT:    [[RES2:%.*]] = add <16 x i32> [[RES]], [[RES1]]
-; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <16 x i32> [[RES2]]
 ;
   %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 10)

/// Since they are converting from floating-point, the output is:
/// - fully uninitialized if *any* bit of the input is uninitialized
/// - fully ininitialized if all bits of the input are ininitialized
/// We apply the same principle on a per-field basis for vectors.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

per-element?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

assert(Mask->getType()->isIntegerTy());
assert(Mask->getType()->getScalarSizeInBits() == ANumElements);

assert(RoundingMode->getType()->isIntegerTy());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure we really need this assertion? nothing in this function actually relies on this AFAICT

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It prevents misdemeanor misuse of this handler. If "RoundingMode" is not an integer, this handler is probably being called with an intrinsic it wasn't intended to handle.

@thurstond thurstond merged commit 556c846 into llvm:main Jul 7, 2025
9 checks passed
@llvm-ci
Copy link
Collaborator

llvm-ci commented Jul 7, 2025

LLVM Buildbot has detected a new failure on builder flang-arm64-windows-msvc running on linaro-armv8-windows-msvc-01 while building llvm at step 7 "test-build-unified-tree-check-flang".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/207/builds/3523

Here is the relevant piece of the build log for the reference
Step 7 (test-build-unified-tree-check-flang) failure: test (failure)
******************** TEST 'Flang :: Parser/OpenMP/declare-reduction-operator.f90' FAILED ********************
Exit Code: 2

Command Output (stdout):
--
# RUN: at line 1
c:\users\tcwg\llvm-worker\flang-arm64-windows-msvc\build\bin\flang.exe -fc1 -fdebug-unparse -fopenmp C:\Users\tcwg\llvm-worker\flang-arm64-windows-msvc\llvm-project\flang\test\Parser\OpenMP\declare-reduction-operator.f90 | c:\users\tcwg\llvm-worker\flang-arm64-windows-msvc\build\bin\filecheck.exe --ignore-case C:\Users\tcwg\llvm-worker\flang-arm64-windows-msvc\llvm-project\flang\test\Parser\OpenMP\declare-reduction-operator.f90
# executed command: 'c:\users\tcwg\llvm-worker\flang-arm64-windows-msvc\build\bin\flang.exe' -fc1 -fdebug-unparse -fopenmp 'C:\Users\tcwg\llvm-worker\flang-arm64-windows-msvc\llvm-project\flang\test\Parser\OpenMP\declare-reduction-operator.f90'
# executed command: 'c:\users\tcwg\llvm-worker\flang-arm64-windows-msvc\build\bin\filecheck.exe' --ignore-case 'C:\Users\tcwg\llvm-worker\flang-arm64-windows-msvc\llvm-project\flang\test\Parser\OpenMP\declare-reduction-operator.f90'
# RUN: at line 2
c:\users\tcwg\llvm-worker\flang-arm64-windows-msvc\build\bin\flang.exe -fc1 -fdebug-dump-parse-tree -fopenmp C:\Users\tcwg\llvm-worker\flang-arm64-windows-msvc\llvm-project\flang\test\Parser\OpenMP\declare-reduction-operator.f90 | c:\users\tcwg\llvm-worker\flang-arm64-windows-msvc\build\bin\filecheck.exe --check-prefix="PARSE-TREE" C:\Users\tcwg\llvm-worker\flang-arm64-windows-msvc\llvm-project\flang\test\Parser\OpenMP\declare-reduction-operator.f90
# executed command: 'c:\users\tcwg\llvm-worker\flang-arm64-windows-msvc\build\bin\flang.exe' -fc1 -fdebug-dump-parse-tree -fopenmp 'C:\Users\tcwg\llvm-worker\flang-arm64-windows-msvc\llvm-project\flang\test\Parser\OpenMP\declare-reduction-operator.f90'
# .---command stderr------------
# | PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
# | Stack dump:
# | 0.	Program arguments: c:\\users\\tcwg\\llvm-worker\\flang-arm64-windows-msvc\\build\\bin\\flang.exe -fc1 -fdebug-dump-parse-tree -fopenmp C:\\Users\\tcwg\\llvm-worker\\flang-arm64-windows-msvc\\llvm-project\\flang\\test\\Parser\\OpenMP\\declare-reduction-operator.f90
# | Exception Code: 0xC0000005
# |  #0 0x00007ff6841a21f8 (c:\users\tcwg\llvm-worker\flang-arm64-windows-msvc\build\bin\flang.exe+0x1a921f8)
# |  #1 0x00007ff68419fee0 (c:\users\tcwg\llvm-worker\flang-arm64-windows-msvc\build\bin\flang.exe+0x1a8fee0)
# |  #2 0x00007ff68419dd74 (c:\users\tcwg\llvm-worker\flang-arm64-windows-msvc\build\bin\flang.exe+0x1a8dd74)
# |  #3 0x00007ff6841a3a88 (c:\users\tcwg\llvm-worker\flang-arm64-windows-msvc\build\bin\flang.exe+0x1a93a88)
# |  #4 0x00007ff68419d8dc (c:\users\tcwg\llvm-worker\flang-arm64-windows-msvc\build\bin\flang.exe+0x1a8d8dc)
# |  #5 0x00007ff682fbc934 (c:\users\tcwg\llvm-worker\flang-arm64-windows-msvc\build\bin\flang.exe+0x8ac934)
# |  #6 0x00007ff683005d94 (c:\users\tcwg\llvm-worker\flang-arm64-windows-msvc\build\bin\flang.exe+0x8f5d94)
# |  #7 0x00007ff6830aec50 (c:\users\tcwg\llvm-worker\flang-arm64-windows-msvc\build\bin\flang.exe+0x99ec50)
# |  #8 0x00007ff6830051d0 (c:\users\tcwg\llvm-worker\flang-arm64-windows-msvc\build\bin\flang.exe+0x8f51d0)
# |  #9 0x00007ff6827a8cdc (c:\users\tcwg\llvm-worker\flang-arm64-windows-msvc\build\bin\flang.exe+0x98cdc)
# | #10 0x00007ff6827becc8 (c:\users\tcwg\llvm-worker\flang-arm64-windows-msvc\build\bin\flang.exe+0xaecc8)
# | #11 0x00007ff6827134e8 (c:\users\tcwg\llvm-worker\flang-arm64-windows-msvc\build\bin\flang.exe+0x34e8)
# | #12 0x00007ff6827120a4 (c:\users\tcwg\llvm-worker\flang-arm64-windows-msvc\build\bin\flang.exe+0x20a4)
# | #13 0x00007ff6873d3740 mlir::detail::FallbackTypeIDResolver::registerImplicitTypeID(class llvm::StringRef) (c:\users\tcwg\llvm-worker\flang-arm64-windows-msvc\build\bin\flang.exe+0x4cc3740)
# | #14 0xbd1e7ff6873d37dc
# `-----------------------------
# error: command failed with exit status: 0xc0000005
# executed command: 'c:\users\tcwg\llvm-worker\flang-arm64-windows-msvc\build\bin\filecheck.exe' --check-prefix=PARSE-TREE 'C:\Users\tcwg\llvm-worker\flang-arm64-windows-msvc\llvm-project\flang\test\Parser\OpenMP\declare-reduction-operator.f90'
# .---command stderr------------
# | FileCheck error: '<stdin>' is empty.
# | FileCheck command line:  c:\users\tcwg\llvm-worker\flang-arm64-windows-msvc\build\bin\filecheck.exe --check-prefix=PARSE-TREE C:\Users\tcwg\llvm-worker\flang-arm64-windows-msvc\llvm-project\flang\test\Parser\OpenMP\declare-reduction-operator.f90
# `-----------------------------
# error: command failed with exit status: 2

--

********************


thurstond added a commit to thurstond/llvm-project that referenced this pull request Jul 9, 2025
The checks were missing in "Add handler for
llvm.x86.avx512.mask.cvtps2dq.512 (llvm#147377)
thurstond added a commit that referenced this pull request Jul 9, 2025
…nt (#147782)

The checks were missing in "Add handler for
llvm.x86.avx512.mask.cvtps2dq.512
(#147377)
llvm-sync bot pushed a commit to arm/arm-toolchain that referenced this pull request Jul 9, 2025
…onvertFPToInt (#147782)

The checks were missing in "Add handler for
llvm.x86.avx512.mask.cvtps2dq.512
(llvm/llvm-project#147377)
thurstond added a commit to thurstond/llvm-project that referenced this pull request Aug 20, 2025
This extends maybeExtendVectorShadowWithZeros() from 556c846 (llvm#147377)
to handle AVX512 VCVTPS2PH.
thurstond added a commit that referenced this pull request Aug 21, 2025
This extends handleAVX512VectorConvertFPToInt() from
556c846
(#147377) to handle AVX512
VCVTPS2PH.
llvm-sync bot pushed a commit to arm/arm-toolchain that referenced this pull request Aug 21, 2025
This extends handleAVX512VectorConvertFPToInt() from
556c846
(llvm/llvm-project#147377) to handle AVX512
VCVTPS2PH.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants