From f7b02238dfd71a5520e5d6a0fad7ab9185ae96df Mon Sep 17 00:00:00 2001 From: XChy Date: Wed, 27 Aug 2025 04:24:55 +0800 Subject: [PATCH 1/2] [X86][NFC] Add tests for pr155387 --- llvm/test/CodeGen/X86/combine-vpmadd52.ll | 116 ++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 llvm/test/CodeGen/X86/combine-vpmadd52.ll diff --git a/llvm/test/CodeGen/X86/combine-vpmadd52.ll b/llvm/test/CodeGen/X86/combine-vpmadd52.ll new file mode 100644 index 0000000000000..2ae33fca726a7 --- /dev/null +++ b/llvm/test/CodeGen/X86/combine-vpmadd52.ll @@ -0,0 +1,116 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512ifma,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxifma | FileCheck %s --check-prefixes=CHECK,AVX + +define <2 x i64> @test1_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; AVX512-LABEL: test1_vpmadd52l: +; AVX512: # %bb.0: +; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 +; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test1_vpmadd52l: +; AVX: # %bb.0: +; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX-NEXT: retq + %and = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1LL << 52) - 1 + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %and, <2 x i64> %x2) + ret <2 x i64> %1 +} + +define <2 x i64> @test2_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; AVX512-LABEL: test2_vpmadd52l: +; AVX512: # %bb.0: +; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm2, %xmm2 +; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test2_vpmadd52l: +; AVX: # %bb.0: +; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX-NEXT: retq + %and = and <2 x i64> %x2, splat (i64 4503599627370495) ; (1LL << 52) - 1 + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %and) + ret <2 x i64> %1 +} + +define <2 x i64> @test3_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; AVX512-LABEL: test3_vpmadd52l: +; AVX512: # %bb.0: +; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 +; AVX512-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm2, %xmm2 +; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test3_vpmadd52l: +; AVX: # %bb.0: +; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX-NEXT: retq + %and = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1LL << 52) - 1 + %or = or <2 x i64> %x2, splat (i64 4503599627370496) ; 1LL << 52 + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %and, <2 x i64> %or) + ret <2 x i64> %1 +} + +define <2 x i64> @test_vpmadd52l_wrong_bits(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; AVX512-LABEL: test_vpmadd52l_wrong_bits: +; AVX512: # %bb.0: +; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 +; AVX512-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm2, %xmm2 +; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test_vpmadd52l_wrong_bits: +; AVX: # %bb.0: +; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX-NEXT: retq + %and = and <2 x i64> %x1, splat (i64 2251799813685247) ; (1LL << 51) - 1 + %or = or <2 x i64> %x2, splat (i64 2251799813685248) ; 1LL << 51 + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %and, <2 x i64> %or) + ret <2 x i64> %1 +} + +define <2 x i64> @test_vpmadd52l_wrong_op(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; AVX512-LABEL: test_vpmadd52l_wrong_op: +; AVX512: # %bb.0: +; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0 +; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test_vpmadd52l_wrong_op: +; AVX: # %bb.0: +; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0 +; AVX-NEXT: retq + %and = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1LL << 52) - 1 + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %and, <2 x i64> %x1, <2 x i64> %x2) + ret <2 x i64> %1 +} + +define <2 x i64> @test_vpmadd52h(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { +; AVX512-LABEL: test_vpmadd52h: +; AVX512: # %bb.0: +; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 +; AVX512-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm2, %xmm2 +; AVX512-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 +; AVX512-NEXT: retq +; +; AVX-LABEL: test_vpmadd52h: +; AVX: # %bb.0: +; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX-NEXT: {vex} vpmadd52huq %xmm2, %xmm1, %xmm0 +; AVX-NEXT: retq + %and = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1LL << 52) - 1 + %or = or <2 x i64> %x2, splat (i64 4503599627370496) ; 1LL << 52 + %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %and, <2 x i64> %or) + ret <2 x i64> %1 +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} From b258fd6f8f1e7ab15d0c1f3122f1a424a377ccd4 Mon Sep 17 00:00:00 2001 From: XChy Date: Wed, 27 Aug 2025 04:26:37 +0800 Subject: [PATCH 2/2] [X86] SimplifyDemandedBitsForTargetNode - add handling for VPMADD52L/VPMADD52H --- llvm/lib/Target/X86/X86ISelLowering.cpp | 33 +++++++++++++++++++++++ llvm/test/CodeGen/X86/combine-vpmadd52.ll | 14 ++-------- 2 files changed, 35 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 19131fbd4102b..8df2fcc5a2c1e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -44957,6 +44957,24 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode( Known.Zero.setLowBits(Known2.countMinTrailingZeros()); return false; } + case X86ISD::VPMADD52L: + case X86ISD::VPMADD52H: { + KnownBits KnownOp0, KnownOp1; + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + // Only demand the lower 52-bits of operands 0 / 1 (and all 64-bits of + // operand 2). + APInt Low52Bits = APInt::getLowBitsSet(BitWidth, 52); + if (SimplifyDemandedBits(Op0, Low52Bits, OriginalDemandedElts, KnownOp0, + TLO, Depth + 1)) + return true; + + if (SimplifyDemandedBits(Op1, Low52Bits, OriginalDemandedElts, KnownOp1, + TLO, Depth + 1)) + return true; + // TODO: Compute the known bits for VPMADD52L/VPMADD52H. + break; + } } return TargetLowering::SimplifyDemandedBitsForTargetNode( @@ -60068,6 +60086,19 @@ static SDValue combineVPMADD(SDNode *N, SelectionDAG &DAG, return SDValue(); } +// Simplify VPMADD52L/VPMADD52H operations. +static SDValue combineVPMADD52LH(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { + MVT VT = N->getSimpleValueType(0); + unsigned NumEltBits = VT.getScalarSizeInBits(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnes(NumEltBits), + DCI)) + return SDValue(N, 0); + + return SDValue(); +} + static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { @@ -60705,6 +60736,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::PMULUDQ: return combinePMULDQ(N, DAG, DCI, Subtarget); case X86ISD::VPMADDUBSW: case X86ISD::VPMADDWD: return combineVPMADD(N, DAG, DCI); + case X86ISD::VPMADD52L: + case X86ISD::VPMADD52H: return combineVPMADD52LH(N, DAG, DCI); case X86ISD::KSHIFTL: case X86ISD::KSHIFTR: return combineKSHIFT(N, DAG, DCI); case ISD::FP16_TO_FP: return combineFP16_TO_FP(N, DAG, Subtarget); diff --git a/llvm/test/CodeGen/X86/combine-vpmadd52.ll b/llvm/test/CodeGen/X86/combine-vpmadd52.ll index 2ae33fca726a7..004db995ee584 100644 --- a/llvm/test/CodeGen/X86/combine-vpmadd52.ll +++ b/llvm/test/CodeGen/X86/combine-vpmadd52.ll @@ -5,15 +5,14 @@ define <2 x i64> @test1_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { ; AVX512-LABEL: test1_vpmadd52l: ; AVX512: # %bb.0: -; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 ; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 ; AVX512-NEXT: retq ; ; AVX-LABEL: test1_vpmadd52l: ; AVX: # %bb.0: -; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0 ; AVX-NEXT: retq + %and = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1LL << 52) - 1 %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %and, <2 x i64> %x2) ret <2 x i64> %1 @@ -22,13 +21,11 @@ define <2 x i64> @test1_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { define <2 x i64> @test2_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { ; AVX512-LABEL: test2_vpmadd52l: ; AVX512: # %bb.0: -; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm2, %xmm2 ; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 ; AVX512-NEXT: retq ; ; AVX-LABEL: test2_vpmadd52l: ; AVX: # %bb.0: -; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 ; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0 ; AVX-NEXT: retq %and = and <2 x i64> %x2, splat (i64 4503599627370495) ; (1LL << 52) - 1 @@ -39,15 +36,11 @@ define <2 x i64> @test2_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { define <2 x i64> @test3_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { ; AVX512-LABEL: test3_vpmadd52l: ; AVX512: # %bb.0: -; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 -; AVX512-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm2, %xmm2 ; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 ; AVX512-NEXT: retq ; ; AVX-LABEL: test3_vpmadd52l: ; AVX: # %bb.0: -; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 ; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0 ; AVX-NEXT: retq %and = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1LL << 52) - 1 @@ -96,17 +89,14 @@ define <2 x i64> @test_vpmadd52l_wrong_op(<2 x i64> %x0, <2 x i64> %x1, <2 x i64 define <2 x i64> @test_vpmadd52h(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { ; AVX512-LABEL: test_vpmadd52h: ; AVX512: # %bb.0: -; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 -; AVX512-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm2, %xmm2 ; AVX512-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 ; AVX512-NEXT: retq ; ; AVX-LABEL: test_vpmadd52h: ; AVX: # %bb.0: -; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 ; AVX-NEXT: {vex} vpmadd52huq %xmm2, %xmm1, %xmm0 ; AVX-NEXT: retq + %and = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1LL << 52) - 1 %or = or <2 x i64> %x2, splat (i64 4503599627370496) ; 1LL << 52 %1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %and, <2 x i64> %or)