@@ -81,17 +81,17 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
8181 Name.startswith (" fma.vfmsubadd." ) || // Added in 7.0
8282 Name.startswith (" fma.vfnmadd." ) || // Added in 7.0
8383 Name.startswith (" fma.vfnmsub." ) || // Added in 7.0
84- Name.startswith (" avx512.mask.vfmadd.p " ) || // Added in 7.0
85- Name.startswith (" avx512.mask.vfnmadd.p " ) || // Added in 7.0
86- Name.startswith (" avx512.mask.vfnmsub.p " ) || // Added in 7.0
87- Name.startswith (" avx512.mask3.vfmadd.p " ) || // Added in 7.0
88- Name.startswith (" avx512.maskz.vfmadd.p " ) || // Added in 7.0
89- Name.startswith (" avx512.mask3.vfmsub.p " ) || // Added in 7.0
90- Name.startswith (" avx512.mask3.vfnmsub.p " ) || // Added in 7.0
91- Name.startswith (" avx512.mask.vfmaddsub.p " ) || // Added in 7.0
92- Name.startswith (" avx512.maskz.vfmaddsub.p " ) || // Added in 7.0
93- Name.startswith (" avx512.mask3.vfmaddsub.p " ) || // Added in 7.0
94- Name.startswith (" avx512.mask3.vfmsubadd.p " ) || // Added in 7.0
84+ Name.startswith (" avx512.mask.vfmadd." ) || // Added in 7.0
85+ Name.startswith (" avx512.mask.vfnmadd." ) || // Added in 7.0
86+ Name.startswith (" avx512.mask.vfnmsub." ) || // Added in 7.0
87+ Name.startswith (" avx512.mask3.vfmadd." ) || // Added in 7.0
88+ Name.startswith (" avx512.maskz.vfmadd." ) || // Added in 7.0
89+ Name.startswith (" avx512.mask3.vfmsub." ) || // Added in 7.0
90+ Name.startswith (" avx512.mask3.vfnmsub." ) || // Added in 7.0
91+ Name.startswith (" avx512.mask.vfmaddsub." ) || // Added in 7.0
92+ Name.startswith (" avx512.maskz.vfmaddsub." ) || // Added in 7.0
93+ Name.startswith (" avx512.mask3.vfmaddsub." ) || // Added in 7.0
94+ Name.startswith (" avx512.mask3.vfmsubadd." ) || // Added in 7.0
9595 Name.startswith (" avx512.mask.shuf.i" ) || // Added in 6.0
9696 Name.startswith (" avx512.mask.shuf.f" ) || // Added in 6.0
9797 Name.startswith (" avx512.kunpck" ) || // added in 6.0
@@ -826,7 +826,7 @@ static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
826826
827827static Value *EmitX86Select (IRBuilder<> &Builder, Value *Mask,
828828 Value *Op0, Value *Op1) {
829- // If the mask is all ones just emit the align operation.
829+ // If the mask is all ones just emit the first operation.
830830 if (const auto *C = dyn_cast<Constant>(Mask))
831831 if (C->isAllOnesValue ())
832832 return Op0;
@@ -835,6 +835,21 @@ static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
835835 return Builder.CreateSelect (Mask, Op0, Op1);
836836}
837837
838+ static Value *EmitX86ScalarSelect (IRBuilder<> &Builder, Value *Mask,
839+ Value *Op0, Value *Op1) {
840+ // If the mask is all ones just emit the first operation.
841+ if (const auto *C = dyn_cast<Constant>(Mask))
842+ if (C->isAllOnesValue ())
843+ return Op0;
844+
845+ llvm::VectorType *MaskTy =
846+ llvm::VectorType::get (Builder.getInt1Ty (),
847+ Mask->getType ()->getIntegerBitWidth ());
848+ Mask = Builder.CreateBitCast (Mask, MaskTy);
849+ Mask = Builder.CreateExtractElement (Mask, (uint64_t )0 );
850+ return Builder.CreateSelect (Mask, Op0, Op1);
851+ }
852+
838853// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
839854// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
840855// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
@@ -2806,6 +2821,64 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
28062821
28072822 Rep = Builder.CreateInsertElement (Constant::getNullValue (CI->getType ()),
28082823 Rep, (uint64_t )0 );
2824+ } else if (IsX86 && (Name.startswith (" avx512.mask.vfmadd.s" ) ||
2825+ Name.startswith (" avx512.maskz.vfmadd.s" ) ||
2826+ Name.startswith (" avx512.mask3.vfmadd.s" ) ||
2827+ Name.startswith (" avx512.mask3.vfmsub.s" ) ||
2828+ Name.startswith (" avx512.mask3.vfnmsub.s" ))) {
2829+ bool IsMask3 = Name[11 ] == ' 3' ;
2830+ bool IsMaskZ = Name[11 ] == ' z' ;
2831+ // Drop the "avx512.mask." to make it easier.
2832+ Name = Name.drop_front (IsMask3 || IsMaskZ ? 13 : 12 );
2833+ bool NegMul = Name[2 ] == ' n' ;
2834+ bool NegAcc = NegMul ? Name[4 ] == ' s' : Name[3 ] == ' s' ;
2835+
2836+ Value *A = CI->getArgOperand (0 );
2837+ Value *B = CI->getArgOperand (1 );
2838+ Value *C = CI->getArgOperand (2 );
2839+
2840+ if (NegMul && (IsMask3 || IsMaskZ))
2841+ A = Builder.CreateFNeg (A);
2842+ if (NegMul && !(IsMask3 || IsMaskZ))
2843+ B = Builder.CreateFNeg (B);
2844+ if (NegAcc)
2845+ C = Builder.CreateFNeg (C);
2846+
2847+ A = Builder.CreateExtractElement (A, (uint64_t )0 );
2848+ B = Builder.CreateExtractElement (B, (uint64_t )0 );
2849+ C = Builder.CreateExtractElement (C, (uint64_t )0 );
2850+
2851+ if (!isa<ConstantInt>(CI->getArgOperand (4 )) ||
2852+ cast<ConstantInt>(CI->getArgOperand (4 ))->getZExtValue () != 4 ) {
2853+ Value *Ops[] = { A, B, C, CI->getArgOperand (4 ) };
2854+
2855+ Intrinsic::ID IID;
2856+ if (Name.back () == ' d' )
2857+ IID = Intrinsic::x86_avx512_vfmadd_f64;
2858+ else
2859+ IID = Intrinsic::x86_avx512_vfmadd_f32;
2860+ Function *FMA = Intrinsic::getDeclaration (CI->getModule (), IID);
2861+ Rep = Builder.CreateCall (FMA, Ops);
2862+ } else {
2863+ Function *FMA = Intrinsic::getDeclaration (CI->getModule (),
2864+ Intrinsic::fma,
2865+ A->getType ());
2866+ Rep = Builder.CreateCall (FMA, { A, B, C });
2867+ }
2868+
2869+ Value *PassThru = IsMaskZ ? Constant::getNullValue (Rep->getType ()) :
2870+ IsMask3 ? C : A;
2871+
2872+ // For Mask3 with NegAcc, we need to create a new extractelement that
2873+ // avoids the negation above.
2874+ if (NegAcc && IsMask3)
2875+ PassThru = Builder.CreateExtractElement (CI->getArgOperand (2 ),
2876+ (uint64_t )0 );
2877+
2878+ Rep = EmitX86ScalarSelect (Builder, CI->getArgOperand (3 ),
2879+ Rep, PassThru);
2880+ Rep = Builder.CreateInsertElement (CI->getArgOperand (IsMask3 ? 2 : 0 ),
2881+ Rep, (uint64_t )0 );
28092882 } else if (IsX86 && (Name.startswith (" avx512.mask.vfmadd.p" ) ||
28102883 Name.startswith (" avx512.mask.vfnmadd.p" ) ||
28112884 Name.startswith (" avx512.mask.vfnmsub.p" ) ||
@@ -2820,6 +2893,17 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
28202893 bool NegMul = Name[2 ] == ' n' ;
28212894 bool NegAcc = NegMul ? Name[4 ] == ' s' : Name[3 ] == ' s' ;
28222895
2896+ Value *A = CI->getArgOperand (0 );
2897+ Value *B = CI->getArgOperand (1 );
2898+ Value *C = CI->getArgOperand (2 );
2899+
2900+ if (NegMul && (IsMask3 || IsMaskZ))
2901+ A = Builder.CreateFNeg (A);
2902+ if (NegMul && !(IsMask3 || IsMaskZ))
2903+ B = Builder.CreateFNeg (B);
2904+ if (NegAcc)
2905+ C = Builder.CreateFNeg (C);
2906+
28232907 if (CI->getNumArgOperands () == 5 &&
28242908 (!isa<ConstantInt>(CI->getArgOperand (4 )) ||
28252909 cast<ConstantInt>(CI->getArgOperand (4 ))->getZExtValue () != 4 )) {
@@ -2830,38 +2914,13 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
28302914 else
28312915 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
28322916
2833- Value *Ops[] = { CI->getArgOperand (0 ), CI->getArgOperand (1 ),
2834- CI->getArgOperand (2 ), CI->getArgOperand (4 ) };
2835-
2836- if (NegMul) {
2837- if (IsMaskZ || IsMask3)
2838- Ops[0 ] = Builder.CreateFNeg (Ops[0 ]);
2839- else
2840- Ops[1 ] = Builder.CreateFNeg (Ops[1 ]);
2841- }
2842- if (NegAcc)
2843- Ops[2 ] = Builder.CreateFNeg (Ops[2 ]);
2844-
28452917 Rep = Builder.CreateCall (Intrinsic::getDeclaration (F->getParent (), IID),
2846- Ops );
2918+ { A, B, C, CI-> getArgOperand ( 4 ) } );
28472919 } else {
2848-
2849- Value *Ops[] = { CI->getArgOperand (0 ), CI->getArgOperand (1 ),
2850- CI->getArgOperand (2 ) };
2851-
2852- if (NegMul) {
2853- if (IsMaskZ || IsMask3)
2854- Ops[0 ] = Builder.CreateFNeg (Ops[0 ]);
2855- else
2856- Ops[1 ] = Builder.CreateFNeg (Ops[1 ]);
2857- }
2858- if (NegAcc)
2859- Ops[2 ] = Builder.CreateFNeg (Ops[2 ]);
2860-
28612920 Function *FMA = Intrinsic::getDeclaration (CI->getModule (),
28622921 Intrinsic::fma,
2863- Ops[ 0 ] ->getType ());
2864- Rep = Builder.CreateCall (FMA, Ops );
2922+ A ->getType ());
2923+ Rep = Builder.CreateCall (FMA, { A, B, C } );
28652924 }
28662925
28672926 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue (CI->getType ()) :
0 commit comments