Skip to content

Commit 5a33bc5

Browse files
authored
[X86] Fold vpmadd52h/l for pattern X * 0 + Y --> Y (#156086)
Resolves comment in #155494 (comment)
1 parent 981f25a commit 5a33bc5

File tree

2 files changed

+89
-2
lines changed

2 files changed

+89
-2
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44957,6 +44957,7 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
4495744957
KnownBits KnownOp0, KnownOp1;
4495844958
SDValue Op0 = Op.getOperand(0);
4495944959
SDValue Op1 = Op.getOperand(1);
44960+
SDValue Op2 = Op.getOperand(2);
4496044961
// Only demand the lower 52-bits of operands 0 / 1 (and all 64-bits of
4496144962
// operand 2).
4496244963
APInt Low52Bits = APInt::getLowBitsSet(BitWidth, 52);
@@ -44967,6 +44968,13 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
4496744968
if (SimplifyDemandedBits(Op1, Low52Bits, OriginalDemandedElts, KnownOp1,
4496844969
TLO, Depth + 1))
4496944970
return true;
44971+
44972+
// X * 0 + Y --> Y
44973+
// TODO: Handle cases where lower/higher 52 of bits of Op0 * Op1 are known
44974+
// zeroes.
44975+
if (KnownOp0.trunc(52).isZero() || KnownOp1.trunc(52).isZero())
44976+
return TLO.CombineTo(Op, Op2);
44977+
4497044978
// TODO: Compute the known bits for VPMADD52L/VPMADD52H.
4497144979
break;
4497244980
}

llvm/test/CodeGen/X86/combine-vpmadd52.ll

Lines changed: 81 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,5 +102,84 @@ define <2 x i64> @test_vpmadd52h(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
102102
%1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %and, <2 x i64> %or)
103103
ret <2 x i64> %1
104104
}
105-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
106-
; CHECK: {{.*}}
105+
106+
; Test the fold x * 0 + y -> y
107+
define <2 x i64> @test_vpmadd52l_mul_zero(<2 x i64> %x0, <2 x i64> %x1) {
108+
; CHECK-LABEL: test_vpmadd52l_mul_zero:
109+
; CHECK: # %bb.0:
110+
; CHECK-NEXT: retq
111+
112+
%1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> <i64 0, i64 0>, <2 x i64> %x1)
113+
ret <2 x i64> %1
114+
}
115+
116+
define <2 x i64> @test_vpmadd52h_mul_zero(<2 x i64> %x0, <2 x i64> %x1) {
117+
; CHECK-LABEL: test_vpmadd52h_mul_zero:
118+
; CHECK: # %bb.0:
119+
; CHECK-NEXT: retq
120+
121+
%1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> <i64 0, i64 0>, <2 x i64> %x1)
122+
ret <2 x i64> %1
123+
}
124+
125+
define <2 x i64> @test_vpmadd52l_mul_zero_commuted(<2 x i64> %x0, <2 x i64> %x1) {
126+
; CHECK-LABEL: test_vpmadd52l_mul_zero_commuted:
127+
; CHECK: # %bb.0:
128+
; CHECK-NEXT: retq
129+
130+
%1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> <i64 0, i64 0>)
131+
ret <2 x i64> %1
132+
}
133+
134+
define <2 x i64> @test_vpmadd52l_mul_zero_both(<2 x i64> %x0) {
135+
; CHECK-LABEL: test_vpmadd52l_mul_zero_both:
136+
; CHECK: # %bb.0:
137+
; CHECK-NEXT: retq
138+
139+
%1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> <i64 0, i64 0>, <2 x i64> <i64 0, i64 0>)
140+
ret <2 x i64> %1
141+
}
142+
143+
define <2 x i64> @test_vpmadd52l_mul_zero_in_52bits(<2 x i64> %x0, <2 x i64> %x1) {
144+
; CHECK-LABEL: test_vpmadd52l_mul_zero_in_52bits:
145+
; CHECK: # %bb.0:
146+
; CHECK-NEXT: retq
147+
148+
; mul by (1 << 52)
149+
%1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> splat (i64 4503599627370496), <2 x i64> %x1)
150+
ret <2 x i64> %1
151+
}
152+
153+
define <2 x i64> @test_vpmadd52l_add_zero(<2 x i64> %x0, <2 x i64> %x1) {
154+
; AVX512-LABEL: test_vpmadd52l_add_zero:
155+
; AVX512: # %bb.0:
156+
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
157+
; AVX512-NEXT: vpmadd52luq %xmm1, %xmm0, %xmm2
158+
; AVX512-NEXT: vmovdqa %xmm2, %xmm0
159+
; AVX512-NEXT: retq
160+
;
161+
; AVX-LABEL: test_vpmadd52l_add_zero:
162+
; AVX: # %bb.0:
163+
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
164+
; AVX-NEXT: {vex} vpmadd52luq %xmm1, %xmm0, %xmm2
165+
; AVX-NEXT: vmovdqa %xmm2, %xmm0
166+
; AVX-NEXT: retq
167+
168+
%1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> <i64 0, i64 0>, <2 x i64> %x0, <2 x i64> %x1)
169+
ret <2 x i64> %1
170+
}
171+
172+
define <2 x i64> @test_vpmadd52l_mul_zero_scalar(<2 x i64> %x0, <2 x i64> %x1) {
173+
; AVX512-LABEL: test_vpmadd52l_mul_zero_scalar:
174+
; AVX512: # %bb.0:
175+
; AVX512-NEXT: vpmadd52luq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
176+
; AVX512-NEXT: retq
177+
;
178+
; AVX-LABEL: test_vpmadd52l_mul_zero_scalar:
179+
; AVX: # %bb.0:
180+
; AVX-NEXT: {vex} vpmadd52luq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
181+
; AVX-NEXT: retq
182+
183+
%1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> <i64 0, i64 123>, <2 x i64> %x1)
184+
ret <2 x i64> %1
185+
}

0 commit comments

Comments
 (0)