Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
80e303c
[DAG] Fold trunc(avg(x,y)) for avgceil/floor u/s nodes if they have s…
houngkoungting Aug 6, 2025
24287f7
[DAG] Fold trunc(avg(x,y)) for avgceil/floor u/s nodes if they have s…
houngkoungting Aug 7, 2025
c8cc2a9
[DAG] Fold trunc(avg(x,y)) for avgceil/floor u/s nodes if they have s…
houngkoungting Aug 8, 2025
1115256
[DAG] Fold trunc(avg(x,y)) for avgceil/floor u/s nodes if they have s…
houngkoungting Aug 8, 2025
08138a2
[DAG] Fold trunc(avg(x,y)) for avgceil/floor u/s nodes if they have s…
houngkoungting Aug 8, 2025
728b37d
[DAG] Fold trunc(avg(x,y)) for avgceil/floor u/s nodes if they have s…
houngkoungting Aug 8, 2025
44609a3
[DAG] Fold trunc(avg(x,y)) for avgceil/floor u/s nodes if they have s…
houngkoungting Aug 9, 2025
2d268fc
[DAG] Fold trunc(avg(x,y)) for avgceil/floor u/s nodes if they have s…
houngkoungting Aug 16, 2025
32041fb
[DAG] Fold trunc(avg(x,y)) for avgceil/floor u/s nodes if they have s…
houngkoungting Aug 16, 2025
4e1af14
Fold trunc(avg(x,y)) for avgceil/floor u/s nodes if they have suffici…
houngkoungting Aug 17, 2025
c4ea7bd
[DAG] Fold trunc(avg(x,y)) for avgceil/floor u/s nodes if they have s…
houngkoungting Aug 18, 2025
6f84361
Merge branch 'main' into main
RKSimon Aug 18, 2025
3729135
Merge branch 'llvm:main' into main
houngkoungting Sep 1, 2025
f85579b
Merge branch 'llvm:main' into main
houngkoungting Sep 15, 2025
a7bbda8
Merge branch 'llvm:main' into main
houngkoungting Sep 16, 2025
12b64f6
Merge branch 'llvm:main' into main
houngkoungting Sep 17, 2025
fac54ff
[X86] X86TargetLowering::computeKnownBitsForTargetNode - add X86ISD::…
houngkoungting Sep 17, 2025
c5100dc
Remove unintended changes to DAGCombiner.cpp
houngkoungting Sep 17, 2025
380155d
Merge branch 'main' into main
houngkoungting Sep 17, 2025
27f0f42
update test case
houngkoungting Sep 18, 2025
efeb740
update test case: knownbits-vpmadd52.ll
houngkoungting Sep 22, 2025
89555f8
Merge branch 'main' into main
RKSimon Sep 22, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38999,6 +38999,26 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
}
break;
}
case X86ISD::VPMADD52L:
case X86ISD::VPMADD52H: {
assert(Op.getValueType().isVector() &&
Op.getValueType().getScalarType() == MVT::i64 &&
"Unexpected VPMADD52 type");
KnownBits K0 =
DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
KnownBits K1 =
DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
KnownBits KAcc =
DAG.computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
K0 = K0.trunc(52);
K1 = K1.trunc(52);
KnownBits KnownMul = (Op.getOpcode() == X86ISD::VPMADD52L)
? KnownBits::mul(K0, K1)
: KnownBits::mulhu(K0, K1);
KnownMul = KnownMul.zext(64);
Known = KnownBits::add(KAcc, KnownMul);
return;
}
}

// Handle target shuffles.
Expand Down
109 changes: 109 additions & 0 deletions llvm/test/CodeGen/X86/knownbits-vpmadd52.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5

; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512ifma,+avx512vl | FileCheck %s --check-prefixes=AVX512VL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxifma | FileCheck %s --check-prefixes=AVXIFMA

; High-52 path

declare <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64>, <2 x i64>, <2 x i64>)
declare <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64>, <4 x i64>, <4 x i64>)

; High-52, 25x25 masked inputs, accumulator = 1, expected constant fold.
define <2 x i64> @kb52h_128_mask25_and1(<2 x i64> %x, <2 x i64> %y) {
; AVX512VL-LABEL: kb52h_128_mask25_and1:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovddup {{.*#+}} xmm0 = [1,1]
; AVX512VL-NEXT: # xmm0 = mem[0,0]
; AVX512VL-NEXT: retq
;
; AVXIFMA-LABEL: kb52h_128_mask25_and1:
; AVXIFMA: # %bb.0:
; AVXIFMA-NEXT: vmovddup {{.*#+}} xmm0 = [1,1]
; AVXIFMA-NEXT: # xmm0 = mem[0,0]
; AVXIFMA-NEXT: retq
%mx = and <2 x i64> %x, splat (i64 33554431) ; (1<<25)-1
%my = and <2 x i64> %y, splat (i64 33554431) ; (1<<25)-1
%r = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(
<2 x i64> splat (i64 1),
<2 x i64> %mx,
<2 x i64> %my)
%ret = and <2 x i64> %r, splat (i64 1)
ret <2 x i64> %ret
}

; High-52, 25x26 masked inputs, accumulator = 1, expected constant fold.
define <4 x i64> @kb52h_256_mask25x26_acc1(<4 x i64> %x, <4 x i64> %y) {
; AVX512VL-LABEL: kb52h_256_mask25x26_acc1:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vbroadcastsd {{.*#+}} ymm0 = [1,1,1,1]
; AVX512VL-NEXT: retq
;
; AVXIFMA-LABEL: kb52h_256_mask25x26_acc1:
; AVXIFMA: # %bb.0:
; AVXIFMA-NEXT: vbroadcastsd {{.*#+}} ymm0 = [1,1,1,1]
; AVXIFMA-NEXT: retq
%mx = and <4 x i64> %x, splat (i64 33554431) ; (1<<25)-1
%my = and <4 x i64> %y, splat (i64 67108863) ; (1<<26)-1
%r = call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(
<4 x i64> splat (i64 1),
<4 x i64> %mx, <4 x i64> %my)
ret <4 x i64> %r
}

; Low-52 path

declare <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64>, <2 x i64>, <2 x i64>)
declare <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64>, <4 x i64>, <4 x i64>)

; Low-52, 26x26 masked inputs, add with accumulator.
define <2 x i64> @kb52l_128_mask26x26_add_intrin(<2 x i64> %x, <2 x i64> %y, <2 x i64> %acc) {
; AVX512VL-LABEL: kb52l_128_mask26x26_add_intrin:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [67108863,67108863]
; AVX512VL-NEXT: vpand %xmm3, %xmm0, %xmm0
; AVX512VL-NEXT: vpand %xmm3, %xmm1, %xmm1
; AVX512VL-NEXT: vpmadd52luq %xmm1, %xmm0, %xmm2
; AVX512VL-NEXT: vmovdqa %xmm2, %xmm0
; AVX512VL-NEXT: retq
;
; AVXIFMA-LABEL: kb52l_128_mask26x26_add_intrin:
; AVXIFMA: # %bb.0:
; AVXIFMA-NEXT: vpbroadcastq {{.*#+}} xmm3 = [67108863,67108863]
; AVXIFMA-NEXT: vpand %xmm3, %xmm0, %xmm0
; AVXIFMA-NEXT: vpand %xmm3, %xmm1, %xmm1
; AVXIFMA-NEXT: {vex} vpmadd52luq %xmm1, %xmm0, %xmm2
; AVXIFMA-NEXT: vmovdqa %xmm2, %xmm0
; AVXIFMA-NEXT: retq
%xm = and <2 x i64> %x, splat (i64 67108863) ; (1<<26)-1
%ym = and <2 x i64> %y, splat (i64 67108863) ; (1<<26)-1
%r = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(
<2 x i64> %acc, <2 x i64> %xm, <2 x i64> %ym)
ret <2 x i64> %r
}

; Low-52, 50-bit × 2-bit masked inputs, add with accumulator.
define <4 x i64> @kb52l_256_mask50x3_add_intrin(<4 x i64> %x, <4 x i64> %y, <4 x i64> %acc) {
; AVX512VL-LABEL: kb52l_256_mask50x3_add_intrin:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm1
; AVX512VL-NEXT: vpmadd52luq %ymm1, %ymm0, %ymm2
; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
; AVX512VL-NEXT: retq
;
; AVXIFMA-LABEL: kb52l_256_mask50x3_add_intrin:
; AVXIFMA: # %bb.0:
; AVXIFMA-NEXT: vpbroadcastq {{.*#+}} ymm3 = [1125899906842623,1125899906842623,1125899906842623,1125899906842623]
; AVXIFMA-NEXT: vpand %ymm3, %ymm0, %ymm0
; AVXIFMA-NEXT: vpbroadcastq {{.*#+}} ymm3 = [3,3,3,3]
; AVXIFMA-NEXT: vpand %ymm3, %ymm1, %ymm1
; AVXIFMA-NEXT: {vex} vpmadd52luq %ymm1, %ymm0, %ymm2
; AVXIFMA-NEXT: vmovdqa %ymm2, %ymm0
; AVXIFMA-NEXT: retq
%xm = and <4 x i64> %x, splat (i64 1125899906842623) ; (1<<50)-1
%ym = and <4 x i64> %y, splat (i64 3) ; (1<<2)-1
%r = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(
<4 x i64> %acc, <4 x i64> %xm, <4 x i64> %ym)
ret <4 x i64> %r
}