Skip to content

Conversation

@SahilPatidar
Copy link
Contributor

Resolve #84905

@SahilPatidar
Copy link
Contributor Author

@RKSimon, Let me know if any of the tests make sense to you.

@RKSimon RKSimon changed the title add ISD::ABDU/ISD::ABDS handling #84905 [DAG] computeKnownBits - add ISD::ABDU/ISD::ABDS handling #84905 Apr 10, 2024
@RKSimon
Copy link
Collaborator

RKSimon commented Apr 10, 2024

Please can you add the new tests as the first commit in the patch, and the handling (and test changes) as the second commit so it can show the effect

@RKSimon RKSimon added the llvm:SelectionDAG SelectionDAGISel as well label Apr 10, 2024
@llvmbot
Copy link
Member

llvmbot commented Apr 10, 2024

@llvm/pr-subscribers-backend-aarch64

@llvm/pr-subscribers-llvm-selectiondag

Author: None (SahilPatidar)

Changes

Resolve #84905


Full diff: https://github.com/llvm/llvm-project/pull/88253.diff

2 Files Affected:

  • (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (+12)
  • (modified) llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll (+153)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 1dd0fa49a460f8..a3953c2f29ffeb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3397,6 +3397,18 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
     Known = KnownBits::mulhs(Known, Known2);
     break;
   }
+  case ISD::ABDU: {
+    Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+    Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+    Known = KnownBits::abdu(Known, Known2);
+    break;
+  }
+  case ISD::ABDS: {
+    Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+    Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+    Known = KnownBits::abds(Known, Known2);
+    break;
+  }
   case ISD::UMUL_LOHI: {
     assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result");
     Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll b/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll
index a13eac9b0a5e65..6d5a8ad0989719 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll
@@ -291,3 +291,156 @@ define <2 x double> @test_fabd_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
   %abd = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> %lhs, <2 x double> %rhs)
   ret <2 x double> %abd
 }
+
+define <8 x i16> @test_uabd_knownbits_vec8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK-LABEL: test_uabd_knownbits_vec8i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v2.8h, #15
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    and v1.16b, v1.16b, v2.16b
+; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    rev64 v0.8h, v0.8h
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    ret
+  %and1 = and <8 x i16> %lhs, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+  %and2 = and <8 x i16> %rhs, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+  %uabd = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %and1, <8 x i16> %and2)
+  %suff = shufflevector <8 x i16> %uabd, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  %res = and <8 x i16> %suff, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+  ret <8 x i16> %res
+}
+
+define <4 x i32> @knownbits_uabd_mask_and_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: knownbits_uabd_mask_and_shuffle_lshr:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    ushr v0.4s, v0.4s, #17
+; CHECK-NEXT:    ret
+  %1 = and <4 x i32> %a0, <i32 65535, i32 65535, i32 65535, i32 65535>
+  %2 = and <4 x i32> %a1, <i32 65535, i32 65535, i32 65535, i32 65535>
+  %3 = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %1, <4 x i32> %2)
+  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %5 = lshr <4 x i32> %4, <i32 17, i32 17, i32 17, i32 17>
+  ret <4 x i32> %5
+}
+
+define <4 x i32> @knownbits_mask_and_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: knownbits_mask_and_shuffle_lshr:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    ret
+  %1 = and <4 x i32> %a0, <i32 32767, i32 32767, i32 32767, i32 32767>
+  %2 = and <4 x i32> %a1, <i32 32767, i32 32767, i32 32767, i32 32767>
+  %3 = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %1, <4 x i32> %2)
+  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %5 = lshr <4 x i32> %4, <i32 17, i32 17, i32 17, i32 17>
+  ret <4 x i32> %5
+}
+
+define <4 x i32> @test_sabd_knownbits_vec4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK-LABEL: test_sabd_knownbits_vec4i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI31_0
+; CHECK-NEXT:    adrp x9, .LCPI31_1
+; CHECK-NEXT:    movi v4.2d, #0x0000ff000000ff
+; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI31_0]
+; CHECK-NEXT:    ldr q3, [x9, :lo12:.LCPI31_1]
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    and v1.16b, v1.16b, v3.16b
+; CHECK-NEXT:    sub v0.4s, v4.4s, v0.4s
+; CHECK-NEXT:    sub v1.4s, v4.4s, v1.4s
+; CHECK-NEXT:    sabd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    mov v0.s[1], v0.s[0]
+; CHECK-NEXT:    trn2 v0.4s, v0.4s, v0.4s
+; CHECK-NEXT:    and v0.16b, v0.16b, v4.16b
+; CHECK-NEXT:    ret
+  %and1 = and <4 x i32> %lhs, <i32 255, i32 -1, i32 -1, i32 255>
+  %and2 = and <4 x i32> %rhs, <i32 255, i32 255, i32 -1, i32 -1>
+  %sub1 = sub <4 x i32> <i32 255, i32 255, i32 255, i32 255>, %and1
+  %sub2 = sub <4 x i32> <i32 255, i32 255, i32 255, i32 255>, %and2
+  %abd = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %sub1, <4 x i32> %sub2)
+  %s = shufflevector <4 x i32> %abd, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
+  %4 = and <4 x i32> %s, <i32 255, i32 255, i32 255, i32 255>
+  ret <4 x i32> %4
+}
+
+define <4 x i32> @knownbits_sabd_and_mask_sub(<4 x i16> %a0) {
+; CHECK-LABEL: knownbits_sabd_and_mask_sub:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg v1.4h, v0.4h
+; CHECK-NEXT:    sabd v0.4h, v0.4h, v1.4h
+; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-NEXT:    ret
+  %1 = and <4 x i16> %a0, <i16 255, i16 255, i16 255, i16 255>
+  %2 = sub <4 x i16> zeroinitializer, %a0
+  %3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %a0, <4 x i16> %2)
+  %4 = sext <4 x i16> %3 to <4 x i32>
+  ret <4 x i32> %4
+}
+
+define <4 x i32> @knownbits_uabd_and_mask_sub(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: knownbits_uabd_and_mask_sub:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v2.2d, #0xffffffffffffffff
+; CHECK-NEXT:    movi v3.2d, #0x0000ff000000ff
+; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    sub v1.4s, v1.4s, v2.4s
+; CHECK-NEXT:    and v0.16b, v0.16b, v3.16b
+; CHECK-NEXT:    and v1.16b, v1.16b, v3.16b
+; CHECK-NEXT:    uabd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    rev64 v0.4s, v0.4s
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    ret
+  %1 = sub <4 x i32> %a0, <i32 255, i32 255, i32 255, i32 255>
+  %2 = sub <4 x i32> %a1, <i32 255, i32 255, i32 255, i32 255>
+  %3 = and <4 x i32> %1, <i32 255, i32 255, i32 255, i32 255>
+  %4 = and <4 x i32> %2, <i32 255, i32 255, i32 255, i32 255>
+  %5 = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %3, <4 x i32> %4)
+  %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i32> %6
+}
+
+define <4 x i32> @knownbits_uabd_and_lshr_mask(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: knownbits_uabd_and_lshr_mask:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v2.4s, #3, msl #8
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    and v1.16b, v1.16b, v2.16b
+; CHECK-NEXT:    ushr v0.4s, v0.4s, #4
+; CHECK-NEXT:    ushr v1.4s, v1.4s, #4
+; CHECK-NEXT:    uabd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    rev64 v0.4s, v0.4s
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    ret
+  %1 = and <4 x i32> %a0, <i32 1023, i32 1023, i32 1023, i32 1023>
+  %2 = and <4 x i32> %a1, <i32 1023, i32 1023, i32 1023, i32 1023>
+  %3 = lshr <4 x i32> %1, <i32 4, i32 4, i32 4, i32 4>
+  %4 = lshr <4 x i32> %2, <i32 4, i32 4, i32 4, i32 4>
+  %5 = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %3, <4 x i32> %4)
+  %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i32> %6
+}
+
+define <4 x i32> @knownbits_sabd_and_mask(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: knownbits_sabd_and_mask:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI35_0
+; CHECK-NEXT:    adrp x9, .LCPI35_1
+; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI35_0]
+; CHECK-NEXT:    ldr q3, [x9, :lo12:.LCPI35_1]
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    and v1.16b, v1.16b, v3.16b
+; CHECK-NEXT:    shl v0.4s, v0.4s, #4
+; CHECK-NEXT:    shl v1.4s, v1.4s, #4
+; CHECK-NEXT:    uabd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    mov v0.s[1], v0.s[0]
+; CHECK-NEXT:    trn2 v0.4s, v0.4s, v0.4s
+; CHECK-NEXT:    ret
+  %1 = and <4 x i32> %a0, <i32 1023, i32 -1, i32 1023, i32 1023>
+  %2 = and <4 x i32> %a1, <i32 1023, i32 1023, i32 -1, i32 1023>
+  %3 = shl <4 x i32> %1, <i32 4, i32 4, i32 4, i32 4>
+  %4 = shl <4 x i32> %2, <i32 4, i32 4, i32 4, i32 4>
+  %5 = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %3, <4 x i32> %4)
+  %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
+  ret <4 x i32> %6
+}

@SahilPatidar
Copy link
Contributor Author

@RKSimon, I checked that out! I made separate commits and changed some test cases.

@RKSimon RKSimon marked this pull request as ready for review April 12, 2024 09:57
Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please can you look at the failing AArch64 test:

Failed Tests (1):
  LLVM :: CodeGen/AArch64/abd-combine.ll

Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@RKSimon RKSimon merged commit ab037c4 into llvm:main Apr 12, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

backend:AArch64 llvm:SelectionDAG SelectionDAGISel as well

Projects

None yet

Development

Successfully merging this pull request may close these issues.

[DAG] SelectionDAG::computeKnownBits - add ISD::ABDU/ISD::ABDS handling

3 participants