@@ -3684,6 +3684,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
36843684 // TODO: Store origin.
36853685 }
36863686
3687+ // Intrinsic::masked_store
3688+ //
3689+ // Note: handleAVXMaskedStore handles AVX/AVX2 variants, though AVX512 masked
3690+ // stores are lowered to Intrinsic::masked_store.
36873691 void handleMaskedStore (IntrinsicInst &I) {
36883692 IRBuilder<> IRB (&I);
36893693 Value *V = I.getArgOperand (0 );
@@ -3714,6 +3718,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
37143718 std::max (Alignment, kMinOriginAlignment ));
37153719 }
37163720
3721+ // Intrinsic::masked_load
3722+ //
3723+ // Note: handleAVXMaskedLoad handles AVX/AVX2 variants, though AVX512 masked
3724+ // loads are lowered to Intrinsic::masked_load.
37173725 void handleMaskedLoad (IntrinsicInst &I) {
37183726 IRBuilder<> IRB (&I);
37193727 Value *Ptr = I.getArgOperand (0 );
@@ -3758,13 +3766,16 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
37583766 // e.g., void @llvm.x86.avx.maskstore.ps.256(ptr, <8 x i32>, <8 x float>)
37593767 // dst mask src
37603768 //
3761- // Note: it is difficult to combine this function with handleMaskedStore. The
3762- // key challenge is that the LLVM masked intrinsics require a vector of
3763- // booleans, while AVX masked intrinsics use the MSBs of a vector of
3764- // integers. X86InstCombineIntrinsic.cpp::simplifyX86MaskedLoad mentions that
3765- // the x86 backend does not know how to efficiently convert from a vector of
3766- // booleans back into the AVX mask format; therefore, they (and we) do not
3767- // reduce AVX masked intrinsics into LLVM masked intrinsics.
3769+ // AVX512 masked stores are lowered to Intrinsic::masked_load and are handled
3770+ // by handleMaskedStore.
3771+ //
3772+ // This function handles AVX and AVX2 masked stores; these use the MSBs of a
3773+ // vector of integers, unlike the LLVM masked intrinsics, which require a
3774+ // vector of booleans. X86InstCombineIntrinsic.cpp::simplifyX86MaskedLoad
3775+ // mentions that the x86 backend does not know how to efficiently convert
3776+ // from a vector of booleans back into the AVX mask format; therefore, they
3777+ // (and we) do not reduce AVX/AVX2 masked intrinsics into LLVM masked
3778+ // intrinsics.
37683779 void handleAVXMaskedStore (IntrinsicInst &I) {
37693780 IRBuilder<> IRB (&I);
37703781
@@ -3820,6 +3831,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
38203831 // Masked-off values are replaced with 0, which conveniently also represents
38213832 // initialized memory.
38223833 //
3834+ // AVX512 masked stores are lowered to Intrinsic::masked_load and are handled
3835+ // by handleMaskedStore.
3836+ //
38233837 // We do not combine this with handleMaskedLoad; see comment in
38243838 // handleAVXMaskedStore for the rationale.
38253839 //
0 commit comments