-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[GlobalISel] Add G_SHUFFLE_VECTOR computeKnownBits #139501
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Member
|
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-aarch64 Author: David Green (davemgreen) ChangesThe code is taken from SelectionDAG::computeKnownBits. Full diff: https://github.com/llvm/llvm-project/pull/139501.diff 4 Files Affected:
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index 12fe28b29e5c8..21990be21bbf7 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -14,6 +14,7 @@
#include "llvm/CodeGen/GlobalISel/GISelValueTracking.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -629,6 +630,33 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
Known.Zero.setBitsFrom(LowBits);
break;
}
+ case TargetOpcode::G_SHUFFLE_VECTOR: {
+ APInt DemandedLHS, DemandedRHS;
+ // Collect the known bits that are shared by every vector element referenced
+ // by the shuffle.
+ unsigned NumElts = MRI.getType(MI.getOperand(1).getReg()).getNumElements();
+ if (!getShuffleDemandedElts(NumElts, MI.getOperand(3).getShuffleMask(),
+ DemandedElts, DemandedLHS, DemandedRHS))
+ break;
+
+ // Known bits are the values that are shared by every demanded element.
+ Known.Zero.setAllBits();
+ Known.One.setAllBits();
+ if (!!DemandedLHS) {
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedLHS,
+ Depth + 1);
+ Known = Known.intersectWith(Known2);
+ }
+ // If we don't know any bits, early out.
+ if (Known.isUnknown())
+ break;
+ if (!!DemandedRHS) {
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedRHS,
+ Depth + 1);
+ Known = Known.intersectWith(Known2);
+ }
+ break;
+ }
}
LLVM_DEBUG(dumpResult(MI, Known, Depth));
diff --git a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
index c7a423f2e4f8d..56393142726c7 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
@@ -38,9 +38,9 @@ define <8 x i16> @dupzext_v8i8_v8i16(i8 %src, <8 x i8> %b) {
; CHECK-GI-LABEL: dupzext_v8i8_v8i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: and w8, w0, #0xff
-; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: dup v1.8h, w8
-; CHECK-GI-NEXT: mul v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: xtn v1.8b, v1.8h
+; CHECK-GI-NEXT: umull v0.8h, v1.8b, v0.8b
; CHECK-GI-NEXT: ret
entry:
%in = zext i8 %src to i16
@@ -84,9 +84,9 @@ define <4 x i32> @dupzext_v4i16_v4i32(i16 %src, <4 x i16> %b) {
; CHECK-GI-LABEL: dupzext_v4i16_v4i32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: and w8, w0, #0xffff
-; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-GI-NEXT: dup v1.4s, w8
-; CHECK-GI-NEXT: mul v0.4s, v1.4s, v0.4s
+; CHECK-GI-NEXT: xtn v1.4h, v1.4s
+; CHECK-GI-NEXT: umull v0.4s, v1.4h, v0.4h
; CHECK-GI-NEXT: ret
entry:
%in = zext i16 %src to i32
@@ -138,16 +138,9 @@ define <2 x i64> @dupzext_v2i32_v2i64(i32 %src, <2 x i32> %b) {
; CHECK-GI-LABEL: dupzext_v2i32_v2i64:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov w8, w0
-; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
; CHECK-GI-NEXT: dup v1.2d, x8
-; CHECK-GI-NEXT: fmov x9, d0
-; CHECK-GI-NEXT: mov x11, v0.d[1]
-; CHECK-GI-NEXT: fmov x8, d1
-; CHECK-GI-NEXT: mov x10, v1.d[1]
-; CHECK-GI-NEXT: mul x8, x8, x9
-; CHECK-GI-NEXT: mul x9, x10, x11
-; CHECK-GI-NEXT: mov v0.d[0], x8
-; CHECK-GI-NEXT: mov v0.d[1], x9
+; CHECK-GI-NEXT: xtn v1.2s, v1.2d
+; CHECK-GI-NEXT: umull v0.2d, v1.2s, v0.2s
; CHECK-GI-NEXT: ret
entry:
%in = zext i32 %src to i64
@@ -169,16 +162,9 @@ define <2 x i32> @dupzext_v2i32_v2i64_trunc(i32 %src, <2 x i32> %b) {
; CHECK-GI-LABEL: dupzext_v2i32_v2i64_trunc:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov w8, w0
-; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
; CHECK-GI-NEXT: dup v1.2d, x8
-; CHECK-GI-NEXT: fmov x9, d0
-; CHECK-GI-NEXT: mov x11, v0.d[1]
-; CHECK-GI-NEXT: fmov x8, d1
-; CHECK-GI-NEXT: mov x10, v1.d[1]
-; CHECK-GI-NEXT: mul x8, x8, x9
-; CHECK-GI-NEXT: mul x9, x10, x11
-; CHECK-GI-NEXT: mov v0.d[0], x8
-; CHECK-GI-NEXT: mov v0.d[1], x9
+; CHECK-GI-NEXT: xtn v1.2s, v1.2d
+; CHECK-GI-NEXT: umull v0.2d, v1.2s, v0.2s
; CHECK-GI-NEXT: xtn v0.2s, v0.2d
; CHECK-GI-NEXT: ret
entry:
@@ -240,14 +226,9 @@ define <2 x i64> @dupzext_v2i16_v2i64(i16 %src, <2 x i16> %b) {
; CHECK-GI-NEXT: and x8, x0, #0xffff
; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: dup v1.2d, x8
-; CHECK-GI-NEXT: fmov x8, d1
-; CHECK-GI-NEXT: fmov x9, d0
-; CHECK-GI-NEXT: mov x10, v1.d[1]
-; CHECK-GI-NEXT: mov x11, v0.d[1]
-; CHECK-GI-NEXT: mul x8, x8, x9
-; CHECK-GI-NEXT: mul x9, x10, x11
-; CHECK-GI-NEXT: mov v0.d[0], x8
-; CHECK-GI-NEXT: mov v0.d[1], x9
+; CHECK-GI-NEXT: xtn v1.2s, v1.2d
+; CHECK-GI-NEXT: xtn v0.2s, v0.2d
+; CHECK-GI-NEXT: umull v0.2d, v1.2s, v0.2s
; CHECK-GI-NEXT: ret
entry:
%in = zext i16 %src to i64
@@ -492,10 +473,10 @@ define <8 x i16> @shufzext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) {
; CHECK-GI-LABEL: shufzext_v8i8_v8i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-GI-NEXT: rev64 v0.8h, v0.8h
; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: xtn v0.8b, v0.8h
+; CHECK-GI-NEXT: umull v0.8h, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
entry:
%in = zext <8 x i8> %src to <8 x i16>
@@ -546,8 +527,8 @@ define <8 x i16> @shufzext_v8i8_v8i16_twoin(<8 x i8> %src1, <8 x i8> %src2, <8 x
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-GI-NEXT: trn1 v0.8h, v0.8h, v1.8h
-; CHECK-GI-NEXT: ushll v1.8h, v2.8b, #0
-; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: xtn v0.8b, v0.8h
+; CHECK-GI-NEXT: umull v0.8h, v0.8b, v2.8b
; CHECK-GI-NEXT: ret
entry:
%in1 = zext <8 x i8> %src1 to <8 x i16>
diff --git a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
index fb6575cc0ee83..eee1ec0b37315 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
@@ -28,6 +28,7 @@ define void @matrix_mul_unsigned(i32 %N, ptr nocapture %C, ptr nocapture readonl
; CHECK-GI-NEXT: dup v0.4s, w8
; CHECK-GI-NEXT: mov w8, w0
; CHECK-GI-NEXT: and x8, x8, #0xfffffff8
+; CHECK-GI-NEXT: xtn v0.4h, v0.4s
; CHECK-GI-NEXT: .LBB0_1: // %vector.body
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-GI-NEXT: add x9, x2, w0, uxtw #1
@@ -35,10 +36,8 @@ define void @matrix_mul_unsigned(i32 %N, ptr nocapture %C, ptr nocapture readonl
; CHECK-GI-NEXT: ldp d1, d2, [x9]
; CHECK-GI-NEXT: add x9, x1, w0, uxtw #2
; CHECK-GI-NEXT: add w0, w0, #8
-; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
-; CHECK-GI-NEXT: ushll v2.4s, v2.4h, #0
-; CHECK-GI-NEXT: mul v1.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT: mul v2.4s, v0.4s, v2.4s
+; CHECK-GI-NEXT: umull v1.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT: umull v2.4s, v0.4h, v2.4h
; CHECK-GI-NEXT: stp q1, q2, [x9]
; CHECK-GI-NEXT: b.ne .LBB0_1
; CHECK-GI-NEXT: // %bb.2: // %for.end12
@@ -478,22 +477,21 @@ define void @larger_umull(ptr nocapture noundef readonly %x, i16 noundef %y, ptr
; CHECK-GI-NEXT: mov x12, x8
; CHECK-GI-NEXT: .LBB4_3: // %vector.body
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-GI-NEXT: ldp q0, q1, [x11, #-16]
; CHECK-GI-NEXT: and w13, w1, #0xffff
-; CHECK-GI-NEXT: dup v2.4s, w13
+; CHECK-GI-NEXT: ldp q1, q2, [x11, #-16]
+; CHECK-GI-NEXT: dup v0.4s, w13
; CHECK-GI-NEXT: mov x13, x10
; CHECK-GI-NEXT: subs x12, x12, #16
; CHECK-GI-NEXT: add x11, x11, #32
-; CHECK-GI-NEXT: ushll v3.4s, v0.4h, #0
-; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0
-; CHECK-GI-NEXT: ushll v4.4s, v1.4h, #0
-; CHECK-GI-NEXT: ushll2 v1.4s, v1.8h, #0
-; CHECK-GI-NEXT: mul v3.4s, v2.4s, v3.4s
-; CHECK-GI-NEXT: mul v0.4s, v2.4s, v0.4s
-; CHECK-GI-NEXT: mul v4.4s, v2.4s, v4.4s
-; CHECK-GI-NEXT: mul v1.4s, v2.4s, v1.4s
-; CHECK-GI-NEXT: stp q3, q0, [x13, #-32]!
-; CHECK-GI-NEXT: stp q4, q1, [x10], #64
+; CHECK-GI-NEXT: mov d3, v1.d[1]
+; CHECK-GI-NEXT: mov d4, v2.d[1]
+; CHECK-GI-NEXT: xtn v0.4h, v0.4s
+; CHECK-GI-NEXT: umull v1.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT: umull v3.4s, v0.4h, v3.4h
+; CHECK-GI-NEXT: umull v2.4s, v0.4h, v2.4h
+; CHECK-GI-NEXT: umull v0.4s, v0.4h, v4.4h
+; CHECK-GI-NEXT: stp q1, q3, [x13, #-32]!
+; CHECK-GI-NEXT: stp q2, q0, [x10], #64
; CHECK-GI-NEXT: b.ne .LBB4_3
; CHECK-GI-NEXT: // %bb.4: // %middle.block
; CHECK-GI-NEXT: cmp x8, x9
@@ -775,22 +773,15 @@ define void @sink_v2z64_1(ptr %p, ptr %d, i64 %n, <2 x i32> %a) {
; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
; CHECK-GI-NEXT: mov x8, xzr
; CHECK-GI-NEXT: dup v0.2d, v0.d[1]
-; CHECK-GI-NEXT: mov x9, v0.d[1]
-; CHECK-GI-NEXT: fmov x10, d0
+; CHECK-GI-NEXT: xtn v0.2s, v0.2d
; CHECK-GI-NEXT: .LBB6_1: // %loop
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-GI-NEXT: ldr d0, [x0]
+; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: subs x2, x2, #8
; CHECK-GI-NEXT: add x8, x8, #8
-; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT: fmov x11, d0
-; CHECK-GI-NEXT: mov x12, v0.d[1]
-; CHECK-GI-NEXT: mul x11, x11, x10
-; CHECK-GI-NEXT: mul x12, x12, x9
-; CHECK-GI-NEXT: mov v0.d[0], x11
-; CHECK-GI-NEXT: mov v0.d[1], x12
-; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #15
-; CHECK-GI-NEXT: str d0, [x0], #32
+; CHECK-GI-NEXT: umull v1.2d, v1.2s, v0.2s
+; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #15
+; CHECK-GI-NEXT: str d1, [x0], #32
; CHECK-GI-NEXT: b.ne .LBB6_1
; CHECK-GI-NEXT: // %bb.2: // %exit
; CHECK-GI-NEXT: ret
@@ -917,13 +908,14 @@ define void @sink_v8z16_0(ptr %p, ptr %d, i64 %n, <16 x i8> %a) {
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: mov x8, xzr
+; CHECK-GI-NEXT: dup v0.8h, v0.h[0]
+; CHECK-GI-NEXT: xtn v0.8b, v0.8h
; CHECK-GI-NEXT: .LBB8_1: // %loop
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-GI-NEXT: ldr d1, [x0]
; CHECK-GI-NEXT: subs x2, x2, #8
; CHECK-GI-NEXT: add x8, x8, #8
-; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
-; CHECK-GI-NEXT: mul v1.8h, v1.8h, v0.h[0]
+; CHECK-GI-NEXT: umull v1.8h, v1.8b, v0.8b
; CHECK-GI-NEXT: sshr v1.8h, v1.8h, #15
; CHECK-GI-NEXT: xtn v1.8b, v1.8h
; CHECK-GI-NEXT: str d1, [x0], #32
@@ -1046,6 +1038,7 @@ define void @matrix_mul_unsigned_and(i32 %N, ptr nocapture %C, ptr nocapture rea
; CHECK-GI-NEXT: dup v0.4s, w8
; CHECK-GI-NEXT: mov w8, w0
; CHECK-GI-NEXT: and x8, x8, #0xfffffff8
+; CHECK-GI-NEXT: xtn v0.4h, v0.4s
; CHECK-GI-NEXT: .LBB10_1: // %vector.body
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-GI-NEXT: add x9, x2, w0, uxtw #1
@@ -1053,10 +1046,8 @@ define void @matrix_mul_unsigned_and(i32 %N, ptr nocapture %C, ptr nocapture rea
; CHECK-GI-NEXT: ldp d1, d2, [x9]
; CHECK-GI-NEXT: add x9, x1, w0, uxtw #2
; CHECK-GI-NEXT: add w0, w0, #8
-; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
-; CHECK-GI-NEXT: ushll v2.4s, v2.4h, #0
-; CHECK-GI-NEXT: mul v1.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT: mul v2.4s, v0.4s, v2.4s
+; CHECK-GI-NEXT: umull v1.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT: umull v2.4s, v0.4h, v2.4h
; CHECK-GI-NEXT: stp q1, q2, [x9]
; CHECK-GI-NEXT: b.ne .LBB10_1
; CHECK-GI-NEXT: // %bb.2: // %for.end12
@@ -1135,6 +1126,7 @@ define void @matrix_mul_unsigned_and_double(i32 %N, ptr nocapture %C, ptr nocapt
; CHECK-GI-NEXT: dup v0.4s, w8
; CHECK-GI-NEXT: mov w8, w0
; CHECK-GI-NEXT: and x8, x8, #0xfffffff0
+; CHECK-GI-NEXT: xtn v0.4h, v0.4s
; CHECK-GI-NEXT: .LBB11_1: // %vector.body
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-GI-NEXT: add x9, x2, w0, uxtw #1
@@ -1143,16 +1135,14 @@ define void @matrix_mul_unsigned_and_double(i32 %N, ptr nocapture %C, ptr nocapt
; CHECK-GI-NEXT: ldur q2, [x9, #8]
; CHECK-GI-NEXT: add x9, x1, w0, uxtw #2
; CHECK-GI-NEXT: add w0, w0, #16
-; CHECK-GI-NEXT: ushll v3.4s, v1.4h, #0
-; CHECK-GI-NEXT: ushll2 v1.4s, v1.8h, #0
-; CHECK-GI-NEXT: ushll v4.4s, v2.4h, #0
-; CHECK-GI-NEXT: ushll2 v2.4s, v2.8h, #0
-; CHECK-GI-NEXT: mul v3.4s, v0.4s, v3.4s
-; CHECK-GI-NEXT: mul v1.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT: mul v4.4s, v0.4s, v4.4s
-; CHECK-GI-NEXT: mul v2.4s, v0.4s, v2.4s
-; CHECK-GI-NEXT: stp q3, q1, [x9]
-; CHECK-GI-NEXT: stp q4, q2, [x9, #32]!
+; CHECK-GI-NEXT: mov d3, v1.d[1]
+; CHECK-GI-NEXT: mov d4, v2.d[1]
+; CHECK-GI-NEXT: umull v1.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT: umull v2.4s, v0.4h, v2.4h
+; CHECK-GI-NEXT: umull v3.4s, v0.4h, v3.4h
+; CHECK-GI-NEXT: umull v4.4s, v0.4h, v4.4h
+; CHECK-GI-NEXT: stp q1, q3, [x9]
+; CHECK-GI-NEXT: stp q2, q4, [x9, #32]!
; CHECK-GI-NEXT: b.ne .LBB11_1
; CHECK-GI-NEXT: // %bb.2: // %for.end12
; CHECK-GI-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
index 714be46a015f4..951001c84aed0 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
@@ -1730,11 +1730,11 @@ define <8 x i32> @umull_and_v8i32_dup(<8 x i16> %src1, i32 %src2) {
; CHECK-GI-LABEL: umull_and_v8i32_dup:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: and w8, w0, #0xff
-; CHECK-GI-NEXT: ushll v1.4s, v0.4h, #0
-; CHECK-GI-NEXT: ushll2 v2.4s, v0.8h, #0
-; CHECK-GI-NEXT: dup v3.4s, w8
-; CHECK-GI-NEXT: mul v0.4s, v1.4s, v3.4s
-; CHECK-GI-NEXT: mul v1.4s, v2.4s, v3.4s
+; CHECK-GI-NEXT: mov d2, v0.d[1]
+; CHECK-GI-NEXT: dup v1.4s, w8
+; CHECK-GI-NEXT: xtn v1.4h, v1.4s
+; CHECK-GI-NEXT: umull v0.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT: umull v1.4s, v2.4h, v1.4h
; CHECK-GI-NEXT: ret
entry:
%in1 = zext <8 x i16> %src1 to <8 x i32>
@@ -1819,23 +1819,11 @@ define <4 x i64> @umull_and_v4i64_dup(<4 x i32> %src1, i64 %src2) {
; CHECK-GI-LABEL: umull_and_v4i64_dup:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: and x8, x0, #0xff
-; CHECK-GI-NEXT: ushll v1.2d, v0.2s, #0
-; CHECK-GI-NEXT: ushll2 v0.2d, v0.4s, #0
-; CHECK-GI-NEXT: dup v2.2d, x8
-; CHECK-GI-NEXT: fmov x8, d1
-; CHECK-GI-NEXT: fmov x12, d0
-; CHECK-GI-NEXT: mov x10, v1.d[1]
-; CHECK-GI-NEXT: fmov x9, d2
-; CHECK-GI-NEXT: mov x11, v2.d[1]
-; CHECK-GI-NEXT: mov x13, v0.d[1]
-; CHECK-GI-NEXT: mul x8, x8, x9
-; CHECK-GI-NEXT: mul x9, x12, x9
-; CHECK-GI-NEXT: mul x10, x10, x11
-; CHECK-GI-NEXT: mov v0.d[0], x8
-; CHECK-GI-NEXT: mul x11, x13, x11
-; CHECK-GI-NEXT: mov v1.d[0], x9
-; CHECK-GI-NEXT: mov v0.d[1], x10
-; CHECK-GI-NEXT: mov v1.d[1], x11
+; CHECK-GI-NEXT: mov d2, v0.d[1]
+; CHECK-GI-NEXT: dup v1.2d, x8
+; CHECK-GI-NEXT: xtn v1.2s, v1.2d
+; CHECK-GI-NEXT: umull v0.2d, v0.2s, v1.2s
+; CHECK-GI-NEXT: umull v1.2d, v2.2s, v1.2s
; CHECK-GI-NEXT: ret
entry:
%in1 = zext <4 x i32> %src1 to <4 x i64>
|
2f3feb7 to
971e6e1
Compare
arsenm
approved these changes
May 13, 2025
The code is taken from SelectionDAG::computeKnownBits.
971e6e1 to
578aab2
Compare
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
The code is taken from SelectionDAG::computeKnownBits.