Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit 85e3087

Browse files
committed
[AArch64] generate vuzp instead of mov
when a BUILD_VECTOR is created out of a sequence of EXTRACT_VECTOR_ELT with a specific pattern sequence, either <0, 2, 4, ...> or <1, 3, 5, ...>, replace the BUILD_VECTOR with either vuzp1 or vuzp2. With this patch LLVM generates the following code for the first function fun1 in the testcase: adrp x8, .LCPI0_0 ldr q0, [x8, :lo12:.LCPI0_0] tbl v0.16b, { v0.16b }, v0.16b ext v1.16b, v0.16b, v0.16b, #8 uzp1 v0.8b, v0.8b, v1.8b str d0, [x8] ret Without this patch LLVM currently generates this code: adrp x8, .LCPI0_0 ldr q0, [x8, :lo12:.LCPI0_0] tbl v0.16b, { v0.16b }, v0.16b mov v1.16b, v0.16b mov v1.b[1], v0.b[2] mov v1.b[2], v0.b[4] mov v1.b[3], v0.b[6] mov v1.b[4], v0.b[8] mov v1.b[5], v0.b[10] mov v1.b[6], v0.b[12] mov v1.b[7], v0.b[14] str d1, [x8] ret git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@326443 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 6d9b2bf commit 85e3087

File tree

2 files changed

+110
-0
lines changed

2 files changed

+110
-0
lines changed

lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6706,16 +6706,20 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
67066706
// select the values we'll be overwriting for the non-constant
67076707
// lanes such that we can directly materialize the vector
67086708
// some other way (MOVI, e.g.), we can be sneaky.
6709+
// 5) if all operands are EXTRACT_VECTOR_ELT, check for VUZP.
67096710
unsigned NumElts = VT.getVectorNumElements();
67106711
bool isOnlyLowElement = true;
67116712
bool usesOnlyOneValue = true;
67126713
bool usesOnlyOneConstantValue = true;
67136714
bool isConstant = true;
6715+
bool AllLanesExtractElt = true;
67146716
unsigned NumConstantLanes = 0;
67156717
SDValue Value;
67166718
SDValue ConstantValue;
67176719
for (unsigned i = 0; i < NumElts; ++i) {
67186720
SDValue V = Op.getOperand(i);
6721+
if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
6722+
AllLanesExtractElt = false;
67196723
if (V.isUndef())
67206724
continue;
67216725
if (i > 0)
@@ -6748,6 +6752,61 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
67486752
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
67496753
}
67506754

6755+
if (AllLanesExtractElt) {
6756+
SDNode *Vector = nullptr;
6757+
bool Even = false;
6758+
bool Odd = false;
6759+
// Check whether the extract elements match the Even pattern <0,2,4,...> or
6760+
// the Odd pattern <1,3,5,...>.
6761+
for (unsigned i = 0; i < NumElts; ++i) {
6762+
SDValue V = Op.getOperand(i);
6763+
const SDNode *N = V.getNode();
6764+
if (!isa<ConstantSDNode>(N->getOperand(1)))
6765+
break;
6766+
6767+
// All elements are extracted from the same vector.
6768+
if (!Vector)
6769+
Vector = N->getOperand(0).getNode();
6770+
else if (Vector != N->getOperand(0).getNode()) {
6771+
Odd = false;
6772+
Even = false;
6773+
break;
6774+
}
6775+
6776+
// Extracted values are either at Even indices <0,2,4,...> or at Odd
6777+
// indices <1,3,5,...>.
6778+
uint64_t Val = N->getConstantOperandVal(1);
6779+
if (Val == 2 * i) {
6780+
Even = true;
6781+
continue;
6782+
}
6783+
if (Val - 1 == 2 * i) {
6784+
Odd = true;
6785+
continue;
6786+
}
6787+
6788+
// Something does not match: abort.
6789+
Odd = false;
6790+
Even = false;
6791+
break;
6792+
}
6793+
if (Even || Odd) {
6794+
SDValue LHS =
6795+
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SDValue(Vector, 0),
6796+
DAG.getConstant(0, dl, MVT::i64));
6797+
SDValue RHS =
6798+
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SDValue(Vector, 0),
6799+
DAG.getConstant(NumElts, dl, MVT::i64));
6800+
6801+
if (Even && !Odd)
6802+
return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), LHS,
6803+
RHS);
6804+
if (Odd && !Even)
6805+
return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), LHS,
6806+
RHS);
6807+
}
6808+
}
6809+
67516810
// Use DUP for non-constant splats. For f32 constant splats, reduce to
67526811
// i32 and try again.
67536812
if (usesOnlyOneValue) {
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
2+
3+
; CHECK-LABEL: fun1:
4+
; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
5+
; CHECK-NOT: mov
6+
define i32 @fun1() {
7+
entry:
8+
%vtbl1.i.1 = tail call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> <i8 0, i8 16, i8 19, i8 4, i8 -65, i8 -65, i8 -71, i8 -71, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> undef)
9+
%vuzp.i212.1 = shufflevector <16 x i8> %vtbl1.i.1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
10+
%scevgep = getelementptr <8 x i8>, <8 x i8>* undef, i64 1
11+
store <8 x i8> %vuzp.i212.1, <8 x i8>* %scevgep, align 1
12+
ret i32 undef
13+
}
14+
15+
; CHECK-LABEL: fun2:
16+
; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
17+
; CHECK-NOT: mov
18+
define i32 @fun2() {
19+
entry:
20+
%vtbl1.i.1 = tail call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> <i8 0, i8 16, i8 19, i8 4, i8 -65, i8 -65, i8 -71, i8 -71, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> undef)
21+
%vuzp.i212.1 = shufflevector <16 x i8> %vtbl1.i.1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
22+
%scevgep = getelementptr <8 x i8>, <8 x i8>* undef, i64 1
23+
store <8 x i8> %vuzp.i212.1, <8 x i8>* %scevgep, align 1
24+
ret i32 undef
25+
}
26+
27+
; CHECK-LABEL: fun3:
28+
; CHECK-NOT: uzp1
29+
; CHECK: mov
30+
define i32 @fun3() {
31+
entry:
32+
%vtbl1.i.1 = tail call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> <i8 0, i8 16, i8 19, i8 4, i8 -65, i8 -65, i8 -71, i8 -71, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> undef)
33+
%vuzp.i212.1 = shufflevector <16 x i8> %vtbl1.i.1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 15>
34+
%scevgep = getelementptr <8 x i8>, <8 x i8>* undef, i64 1
35+
store <8 x i8> %vuzp.i212.1, <8 x i8>* %scevgep, align 1
36+
ret i32 undef
37+
}
38+
39+
; CHECK-LABEL: fun4:
40+
; CHECK-NOT: uzp2
41+
; CHECK: mov
42+
define i32 @fun4() {
43+
entry:
44+
%vtbl1.i.1 = tail call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> <i8 0, i8 16, i8 19, i8 4, i8 -65, i8 -65, i8 -71, i8 -71, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> undef)
45+
%vuzp.i212.1 = shufflevector <16 x i8> %vtbl1.i.1, <16 x i8> undef, <8 x i32> <i32 3, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
46+
%scevgep = getelementptr <8 x i8>, <8 x i8>* undef, i64 1
47+
store <8 x i8> %vuzp.i212.1, <8 x i8>* %scevgep, align 1
48+
ret i32 undef
49+
}
50+
51+
declare <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8>, <16 x i8>)

0 commit comments

Comments
 (0)