diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 19131fbd4102b..8df2fcc5a2c1e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -44957,6 +44957,24 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode( Known.Zero.setLowBits(Known2.countMinTrailingZeros()); return false; } + case X86ISD::VPMADD52L: + case X86ISD::VPMADD52H: { + KnownBits KnownOp0, KnownOp1; + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + // Only demand the lower 52-bits of operands 0 / 1 (and all 64-bits of + // operand 2). + APInt Low52Bits = APInt::getLowBitsSet(BitWidth, 52); + if (SimplifyDemandedBits(Op0, Low52Bits, OriginalDemandedElts, KnownOp0, + TLO, Depth + 1)) + return true; + + if (SimplifyDemandedBits(Op1, Low52Bits, OriginalDemandedElts, KnownOp1, + TLO, Depth + 1)) + return true; + // TODO: Compute the known bits for VPMADD52L/VPMADD52H. + break; + } } return TargetLowering::SimplifyDemandedBitsForTargetNode( @@ -60068,6 +60086,19 @@ static SDValue combineVPMADD(SDNode *N, SelectionDAG &DAG, return SDValue(); } +// Simplify VPMADD52L/VPMADD52H operations. +static SDValue combineVPMADD52LH(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { + MVT VT = N->getSimpleValueType(0); + unsigned NumEltBits = VT.getScalarSizeInBits(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnes(NumEltBits), + DCI)) + return SDValue(N, 0); + + return SDValue(); +} + static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { @@ -60705,6 +60736,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::PMULUDQ: return combinePMULDQ(N, DAG, DCI, Subtarget); case X86ISD::VPMADDUBSW: case X86ISD::VPMADDWD: return combineVPMADD(N, DAG, DCI); + case X86ISD::VPMADD52L: + case X86ISD::VPMADD52H: return combineVPMADD52LH(N, DAG, DCI); case X86ISD::KSHIFTL: case X86ISD::KSHIFTR: return combineKSHIFT(N, DAG, DCI); case ISD::FP16_TO_FP: return combineFP16_TO_FP(N, DAG, Subtarget); diff --git a/llvm/test/CodeGen/X86/combine-vpmadd52.ll b/llvm/test/CodeGen/X86/combine-vpmadd52.ll new file mode 100644 index 0000000000000..004db995ee584 --- /dev/null +++ b/llvm/test/CodeGen/X86/combine-vpmadd52.ll @@ -0,0 +1,106 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512ifma,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxifma | FileCheck %s --check-prefixes=CHECK,AVX + +define <2 x i64> @test1_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; AVX512-LABEL: test1_vpmadd52l: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test1_vpmadd52l: +; AVX: # %bb.0: +; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX-NEXT: retq + + %and = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1LL << 52) - 1 + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %and, <2 x i64> %x2) + ret <2 x i64> %1 +} + +define <2 x i64> @test2_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; AVX512-LABEL: test2_vpmadd52l: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test2_vpmadd52l: +; AVX: # %bb.0: +; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX-NEXT: retq + %and = and <2 x i64> %x2, splat (i64 4503599627370495) ; (1LL << 52) - 1 + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %and) + ret <2 x i64> %1 +} + +define <2 x i64> @test3_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; AVX512-LABEL: test3_vpmadd52l: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test3_vpmadd52l: +; AVX: # %bb.0: +; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX-NEXT: retq + %and = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1LL << 52) - 1 + %or = or <2 x i64> %x2, splat (i64 4503599627370496) ; 1LL << 52 + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %and, <2 x i64> %or) + ret <2 x i64> %1 +} + +define <2 x i64> @test_vpmadd52l_wrong_bits(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; AVX512-LABEL: test_vpmadd52l_wrong_bits: +; AVX512: # %bb.0: +; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 +; AVX512-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm2, %xmm2 +; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test_vpmadd52l_wrong_bits: +; AVX: # %bb.0: +; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX-NEXT: retq + %and = and <2 x i64> %x1, splat (i64 2251799813685247) ; (1LL << 51) - 1 + %or = or <2 x i64> %x2, splat (i64 2251799813685248) ; 1LL << 51 + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %and, <2 x i64> %or) + ret <2 x i64> %1 +} + +define <2 x i64> @test_vpmadd52l_wrong_op(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; AVX512-LABEL: test_vpmadd52l_wrong_op: +; AVX512: # %bb.0: +; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0 +; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test_vpmadd52l_wrong_op: +; AVX: # %bb.0: +; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX-NEXT: retq + %and = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1LL << 52) - 1 + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %and, <2 x i64> %x1, <2 x i64> %x2) + ret <2 x i64> %1 +} + +define <2 x i64> @test_vpmadd52h(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; AVX512-LABEL: test_vpmadd52h: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test_vpmadd52h: +; AVX: # %bb.0: +; AVX-NEXT: {vex} vpmadd52huq %xmm2, %xmm1, %xmm0 +; AVX-NEXT: retq + + %and = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1LL << 52) - 1 + %or = or <2 x i64> %x2, splat (i64 4503599627370496) ; 1LL << 52 + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %and, <2 x i64> %or) + ret <2 x i64> %1 +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}}