diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 0837e1e3feb1d..2feb76e0eb7b4 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -38999,6 +38999,26 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, } break; } + case X86ISD::VPMADD52L: + case X86ISD::VPMADD52H: { + assert(Op.getValueType().isVector() && + Op.getValueType().getScalarType() == MVT::i64 && + "Unexpected VPMADD52 type"); + KnownBits K0 = + DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + KnownBits K1 = + DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + KnownBits KAcc = + DAG.computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1); + K0 = K0.trunc(52); + K1 = K1.trunc(52); + KnownBits KnownMul = (Op.getOpcode() == X86ISD::VPMADD52L) + ? KnownBits::mul(K0, K1) + : KnownBits::mulhu(K0, K1); + KnownMul = KnownMul.zext(64); + Known = KnownBits::add(KAcc, KnownMul); + return; + } } // Handle target shuffles. diff --git a/llvm/test/CodeGen/X86/knownbits-vpmadd52.ll b/llvm/test/CodeGen/X86/knownbits-vpmadd52.ll new file mode 100644 index 0000000000000..0e322fec2c7d9 --- /dev/null +++ b/llvm/test/CodeGen/X86/knownbits-vpmadd52.ll @@ -0,0 +1,109 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 + +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512ifma,+avx512vl | FileCheck %s --check-prefixes=AVX512VL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxifma | FileCheck %s --check-prefixes=AVXIFMA + +; High-52 path + +declare <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64>, <2 x i64>, <2 x i64>) +declare <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64>, <4 x i64>, <4 x i64>) + +; High-52, 25x25 masked inputs, accumulator = 1, expected constant fold. +define <2 x i64> @kb52h_128_mask25_and1(<2 x i64> %x, <2 x i64> %y) { +; AVX512VL-LABEL: kb52h_128_mask25_and1: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vmovddup {{.*#+}} xmm0 = [1,1] +; AVX512VL-NEXT: # xmm0 = mem[0,0] +; AVX512VL-NEXT: retq +; +; AVXIFMA-LABEL: kb52h_128_mask25_and1: +; AVXIFMA: # %bb.0: +; AVXIFMA-NEXT: vmovddup {{.*#+}} xmm0 = [1,1] +; AVXIFMA-NEXT: # xmm0 = mem[0,0] +; AVXIFMA-NEXT: retq + %mx = and <2 x i64> %x, splat (i64 33554431) ; (1<<25)-1 + %my = and <2 x i64> %y, splat (i64 33554431) ; (1<<25)-1 + %r = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128( + <2 x i64> splat (i64 1), + <2 x i64> %mx, + <2 x i64> %my) + %ret = and <2 x i64> %r, splat (i64 1) + ret <2 x i64> %ret +} + +; High-52, 25x26 masked inputs, accumulator = 1, expected constant fold. +define <4 x i64> @kb52h_256_mask25x26_acc1(<4 x i64> %x, <4 x i64> %y) { +; AVX512VL-LABEL: kb52h_256_mask25x26_acc1: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vbroadcastsd {{.*#+}} ymm0 = [1,1,1,1] +; AVX512VL-NEXT: retq +; +; AVXIFMA-LABEL: kb52h_256_mask25x26_acc1: +; AVXIFMA: # %bb.0: +; AVXIFMA-NEXT: vbroadcastsd {{.*#+}} ymm0 = [1,1,1,1] +; AVXIFMA-NEXT: retq + %mx = and <4 x i64> %x, splat (i64 33554431) ; (1<<25)-1 + %my = and <4 x i64> %y, splat (i64 67108863) ; (1<<26)-1 + %r = call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256( + <4 x i64> splat (i64 1), + <4 x i64> %mx, <4 x i64> %my) + ret <4 x i64> %r +} + +; Low-52 path + +declare <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64>, <2 x i64>, <2 x i64>) +declare <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64>, <4 x i64>, <4 x i64>) + +; Low-52, 26x26 masked inputs, add with accumulator. +define <2 x i64> @kb52l_128_mask26x26_add_intrin(<2 x i64> %x, <2 x i64> %y, <2 x i64> %acc) { +; AVX512VL-LABEL: kb52l_128_mask26x26_add_intrin: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [67108863,67108863] +; AVX512VL-NEXT: vpand %xmm3, %xmm0, %xmm0 +; AVX512VL-NEXT: vpand %xmm3, %xmm1, %xmm1 +; AVX512VL-NEXT: vpmadd52luq %xmm1, %xmm0, %xmm2 +; AVX512VL-NEXT: vmovdqa %xmm2, %xmm0 +; AVX512VL-NEXT: retq +; +; AVXIFMA-LABEL: kb52l_128_mask26x26_add_intrin: +; AVXIFMA: # %bb.0: +; AVXIFMA-NEXT: vpbroadcastq {{.*#+}} xmm3 = [67108863,67108863] +; AVXIFMA-NEXT: vpand %xmm3, %xmm0, %xmm0 +; AVXIFMA-NEXT: vpand %xmm3, %xmm1, %xmm1 +; AVXIFMA-NEXT: {vex} vpmadd52luq %xmm1, %xmm0, %xmm2 +; AVXIFMA-NEXT: vmovdqa %xmm2, %xmm0 +; AVXIFMA-NEXT: retq + %xm = and <2 x i64> %x, splat (i64 67108863) ; (1<<26)-1 + %ym = and <2 x i64> %y, splat (i64 67108863) ; (1<<26)-1 + %r = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128( + <2 x i64> %acc, <2 x i64> %xm, <2 x i64> %ym) + ret <2 x i64> %r +} + +; Low-52, 50-bit × 2-bit masked inputs, add with accumulator. +define <4 x i64> @kb52l_256_mask50x3_add_intrin(<4 x i64> %x, <4 x i64> %y, <4 x i64> %acc) { +; AVX512VL-LABEL: kb52l_256_mask50x3_add_intrin: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0 +; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm1 +; AVX512VL-NEXT: vpmadd52luq %ymm1, %ymm0, %ymm2 +; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0 +; AVX512VL-NEXT: retq +; +; AVXIFMA-LABEL: kb52l_256_mask50x3_add_intrin: +; AVXIFMA: # %bb.0: +; AVXIFMA-NEXT: vpbroadcastq {{.*#+}} ymm3 = [1125899906842623,1125899906842623,1125899906842623,1125899906842623] +; AVXIFMA-NEXT: vpand %ymm3, %ymm0, %ymm0 +; AVXIFMA-NEXT: vpbroadcastq {{.*#+}} ymm3 = [3,3,3,3] +; AVXIFMA-NEXT: vpand %ymm3, %ymm1, %ymm1 +; AVXIFMA-NEXT: {vex} vpmadd52luq %ymm1, %ymm0, %ymm2 +; AVXIFMA-NEXT: vmovdqa %ymm2, %ymm0 +; AVXIFMA-NEXT: retq + %xm = and <4 x i64> %x, splat (i64 1125899906842623) ; (1<<50)-1 + %ym = and <4 x i64> %y, splat (i64 3) ; (1<<2)-1 + %r = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256( + <4 x i64> %acc, <4 x i64> %xm, <4 x i64> %ym) + ret <4 x i64> %r +} +