Skip to content

Commit b258fd6

Browse files
committed
[X86] SimplifyDemandedBitsForTargetNode - add handling for VPMADD52L/VPMADD52H
1 parent f7b0223 commit b258fd6

File tree

2 files changed

+35
-12
lines changed

2 files changed

+35
-12
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44957,6 +44957,24 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
4495744957
Known.Zero.setLowBits(Known2.countMinTrailingZeros());
4495844958
return false;
4495944959
}
44960+
case X86ISD::VPMADD52L:
44961+
case X86ISD::VPMADD52H: {
44962+
KnownBits KnownOp0, KnownOp1;
44963+
SDValue Op0 = Op.getOperand(0);
44964+
SDValue Op1 = Op.getOperand(1);
44965+
// Only demand the lower 52-bits of operands 0 / 1 (and all 64-bits of
44966+
// operand 2).
44967+
APInt Low52Bits = APInt::getLowBitsSet(BitWidth, 52);
44968+
if (SimplifyDemandedBits(Op0, Low52Bits, OriginalDemandedElts, KnownOp0,
44969+
TLO, Depth + 1))
44970+
return true;
44971+
44972+
if (SimplifyDemandedBits(Op1, Low52Bits, OriginalDemandedElts, KnownOp1,
44973+
TLO, Depth + 1))
44974+
return true;
44975+
// TODO: Compute the known bits for VPMADD52L/VPMADD52H.
44976+
break;
44977+
}
4496044978
}
4496144979

4496244980
return TargetLowering::SimplifyDemandedBitsForTargetNode(
@@ -60068,6 +60086,19 @@ static SDValue combineVPMADD(SDNode *N, SelectionDAG &DAG,
6006860086
return SDValue();
6006960087
}
6007060088

60089+
// Simplify VPMADD52L/VPMADD52H operations.
60090+
static SDValue combineVPMADD52LH(SDNode *N, SelectionDAG &DAG,
60091+
TargetLowering::DAGCombinerInfo &DCI) {
60092+
MVT VT = N->getSimpleValueType(0);
60093+
unsigned NumEltBits = VT.getScalarSizeInBits();
60094+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
60095+
if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnes(NumEltBits),
60096+
DCI))
60097+
return SDValue(N, 0);
60098+
60099+
return SDValue();
60100+
}
60101+
6007160102
static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG,
6007260103
TargetLowering::DAGCombinerInfo &DCI,
6007360104
const X86Subtarget &Subtarget) {
@@ -60705,6 +60736,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
6070560736
case X86ISD::PMULUDQ: return combinePMULDQ(N, DAG, DCI, Subtarget);
6070660737
case X86ISD::VPMADDUBSW:
6070760738
case X86ISD::VPMADDWD: return combineVPMADD(N, DAG, DCI);
60739+
case X86ISD::VPMADD52L:
60740+
case X86ISD::VPMADD52H: return combineVPMADD52LH(N, DAG, DCI);
6070860741
case X86ISD::KSHIFTL:
6070960742
case X86ISD::KSHIFTR: return combineKSHIFT(N, DAG, DCI);
6071060743
case ISD::FP16_TO_FP: return combineFP16_TO_FP(N, DAG, Subtarget);

llvm/test/CodeGen/X86/combine-vpmadd52.ll

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,14 @@
55
define <2 x i64> @test1_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
66
; AVX512-LABEL: test1_vpmadd52l:
77
; AVX512: # %bb.0:
8-
; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
98
; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0
109
; AVX512-NEXT: retq
1110
;
1211
; AVX-LABEL: test1_vpmadd52l:
1312
; AVX: # %bb.0:
14-
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1513
; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0
1614
; AVX-NEXT: retq
15+
1716
%and = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1LL << 52) - 1
1817
%1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %and, <2 x i64> %x2)
1918
ret <2 x i64> %1
@@ -22,13 +21,11 @@ define <2 x i64> @test1_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
2221
define <2 x i64> @test2_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
2322
; AVX512-LABEL: test2_vpmadd52l:
2423
; AVX512: # %bb.0:
25-
; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm2, %xmm2
2624
; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0
2725
; AVX512-NEXT: retq
2826
;
2927
; AVX-LABEL: test2_vpmadd52l:
3028
; AVX: # %bb.0:
31-
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
3229
; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0
3330
; AVX-NEXT: retq
3431
%and = and <2 x i64> %x2, splat (i64 4503599627370495) ; (1LL << 52) - 1
@@ -39,15 +36,11 @@ define <2 x i64> @test2_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
3936
define <2 x i64> @test3_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
4037
; AVX512-LABEL: test3_vpmadd52l:
4138
; AVX512: # %bb.0:
42-
; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
43-
; AVX512-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm2, %xmm2
4439
; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0
4540
; AVX512-NEXT: retq
4641
;
4742
; AVX-LABEL: test3_vpmadd52l:
4843
; AVX: # %bb.0:
49-
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
50-
; AVX-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
5144
; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0
5245
; AVX-NEXT: retq
5346
%and = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1LL << 52) - 1
@@ -96,17 +89,14 @@ define <2 x i64> @test_vpmadd52l_wrong_op(<2 x i64> %x0, <2 x i64> %x1, <2 x i64
9689
define <2 x i64> @test_vpmadd52h(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
9790
; AVX512-LABEL: test_vpmadd52h:
9891
; AVX512: # %bb.0:
99-
; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
100-
; AVX512-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm2, %xmm2
10192
; AVX512-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0
10293
; AVX512-NEXT: retq
10394
;
10495
; AVX-LABEL: test_vpmadd52h:
10596
; AVX: # %bb.0:
106-
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
107-
; AVX-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
10897
; AVX-NEXT: {vex} vpmadd52huq %xmm2, %xmm1, %xmm0
10998
; AVX-NEXT: retq
99+
110100
%and = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1LL << 52) - 1
111101
%or = or <2 x i64> %x2, splat (i64 4503599627370496) ; 1LL << 52
112102
%1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %and, <2 x i64> %or)

0 commit comments

Comments
 (0)