Skip to content

Commit dda0d62

Browse files
committed
[AArch64] Full reverse shuffles.
A full shuffle reverse needs to use EXT+REV64. This adds handling for v8s16 and v16s8 types to match SDAG. Other types should be handled by perfect shuffles.
1 parent 342c8db commit dda0d62

File tree

4 files changed

+51
-48
lines changed

4 files changed

+51
-48
lines changed

llvm/lib/Target/AArch64/AArch64Combine.td

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,13 @@ def ext: GICombineRule <
131131
(apply [{ applyEXT(*${root}, ${matchinfo}); }])
132132
>;
133133

134+
def fullrev: GICombineRule <
135+
(defs root:$root, shuffle_matchdata:$matchinfo),
136+
(match (G_SHUFFLE_VECTOR $src, $src1, $src2, $mask):$root,
137+
[{ return matchFullRev(*${root}, MRI); }]),
138+
(apply [{ applyFullRev(*${root}, MRI); }])
139+
>;
140+
134141
def insertelt_nonconst: GICombineRule <
135142
(defs root:$root, shuffle_matchdata:$matchinfo),
136143
(match (wip_match_opcode G_INSERT_VECTOR_ELT):$root,
@@ -163,7 +170,7 @@ def form_duplane : GICombineRule <
163170
(apply [{ applyDupLane(*${root}, MRI, B, ${matchinfo}); }])
164171
>;
165172

166-
def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn,
173+
def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn, fullrev,
167174
form_duplane, shuf_to_ins]>;
168175

169176
// Turn G_UNMERGE_VALUES -> G_EXTRACT_VECTOR_ELT's

llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,28 @@ void applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
405405
MI.eraseFromParent();
406406
}
407407

408+
bool matchFullRev(MachineInstr &MI, MachineRegisterInfo &MRI) {
409+
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
410+
Register Dst = MI.getOperand(0).getReg();
411+
LLT DstTy = MRI.getType(Dst);
412+
Register V1 = MI.getOperand(1).getReg();
413+
auto Mask = MI.getOperand(3).getShuffleMask();
414+
return (DstTy == LLT::fixed_vector(16, 8) ||
415+
DstTy == LLT::fixed_vector(8, 16)) &&
416+
DstTy == MRI.getType(V1) &&
417+
ShuffleVectorInst::isReverseMask(Mask, Mask.size());
418+
}
419+
420+
void applyFullRev(MachineInstr &MI, MachineRegisterInfo &MRI) {
421+
MachineIRBuilder MIRBuilder(MI);
422+
Register Dst = MI.getOperand(0).getReg();
423+
Register Src = MI.getOperand(1).getReg();
424+
auto Cst = MIRBuilder.buildConstant(LLT::scalar(32), 8);
425+
auto Rev = MIRBuilder.buildInstr(AArch64::G_REV64, {MRI.getType(Dst)}, {Src});
426+
MIRBuilder.buildInstr(AArch64::G_EXT, {Dst}, {Rev, Rev, Cst});
427+
MI.eraseFromParent();
428+
}
429+
408430
bool matchNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI) {
409431
assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
410432

llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -407,11 +407,10 @@ define <8 x i16> @shufsext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) {
407407
;
408408
; CHECK-GI-LABEL: shufsext_v8i8_v8i16:
409409
; CHECK-GI: // %bb.0: // %entry
410-
; CHECK-GI-NEXT: adrp x8, .LCPI13_0
411-
; CHECK-GI-NEXT: sshll v2.8h, v0.8b, #0
410+
; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
412411
; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
413-
; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI13_0]
414-
; CHECK-GI-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b
412+
; CHECK-GI-NEXT: rev64 v0.8h, v0.8h
413+
; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
415414
; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h
416415
; CHECK-GI-NEXT: ret
417416
entry:
@@ -460,11 +459,10 @@ define <8 x i16> @shufzext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) {
460459
;
461460
; CHECK-GI-LABEL: shufzext_v8i8_v8i16:
462461
; CHECK-GI: // %bb.0: // %entry
463-
; CHECK-GI-NEXT: adrp x8, .LCPI15_0
464-
; CHECK-GI-NEXT: ushll v2.8h, v0.8b, #0
462+
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
465463
; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
466-
; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI15_0]
467-
; CHECK-GI-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b
464+
; CHECK-GI-NEXT: rev64 v0.8h, v0.8h
465+
; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
468466
; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h
469467
; CHECK-GI-NEXT: ret
470468
entry:

llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll

Lines changed: 15 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -60,19 +60,11 @@ entry:
6060
}
6161

6262
define <8 x i16> @v8i16(<8 x i16> %a) {
63-
; CHECK-SD-LABEL: v8i16:
64-
; CHECK-SD: // %bb.0: // %entry
65-
; CHECK-SD-NEXT: rev64 v0.8h, v0.8h
66-
; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
67-
; CHECK-SD-NEXT: ret
68-
;
69-
; CHECK-GI-LABEL: v8i16:
70-
; CHECK-GI: // %bb.0: // %entry
71-
; CHECK-GI-NEXT: adrp x8, .LCPI4_0
72-
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1
73-
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI4_0]
74-
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
75-
; CHECK-GI-NEXT: ret
63+
; CHECK-LABEL: v8i16:
64+
; CHECK: // %bb.0: // %entry
65+
; CHECK-NEXT: rev64 v0.8h, v0.8h
66+
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
67+
; CHECK-NEXT: ret
7668
entry:
7769
%V128 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
7870
ret <8 x i16> %V128
@@ -112,19 +104,11 @@ entry:
112104
}
113105

114106
define <16 x i8> @v16i8(<16 x i8> %a) {
115-
; CHECK-SD-LABEL: v16i8:
116-
; CHECK-SD: // %bb.0: // %entry
117-
; CHECK-SD-NEXT: rev64 v0.16b, v0.16b
118-
; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
119-
; CHECK-SD-NEXT: ret
120-
;
121-
; CHECK-GI-LABEL: v16i8:
122-
; CHECK-GI: // %bb.0: // %entry
123-
; CHECK-GI-NEXT: adrp x8, .LCPI7_0
124-
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1
125-
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI7_0]
126-
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
127-
; CHECK-GI-NEXT: ret
107+
; CHECK-LABEL: v16i8:
108+
; CHECK: // %bb.0: // %entry
109+
; CHECK-NEXT: rev64 v0.16b, v0.16b
110+
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
111+
; CHECK-NEXT: ret
128112
entry:
129113
%V128 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
130114
ret <16 x i8> %V128
@@ -203,19 +187,11 @@ entry:
203187
}
204188

205189
define <8 x half> @v8f16(<8 x half> %a) {
206-
; CHECK-SD-LABEL: v8f16:
207-
; CHECK-SD: // %bb.0: // %entry
208-
; CHECK-SD-NEXT: rev64 v0.8h, v0.8h
209-
; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
210-
; CHECK-SD-NEXT: ret
211-
;
212-
; CHECK-GI-LABEL: v8f16:
213-
; CHECK-GI: // %bb.0: // %entry
214-
; CHECK-GI-NEXT: adrp x8, .LCPI13_0
215-
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1
216-
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI13_0]
217-
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
218-
; CHECK-GI-NEXT: ret
190+
; CHECK-LABEL: v8f16:
191+
; CHECK: // %bb.0: // %entry
192+
; CHECK-NEXT: rev64 v0.8h, v0.8h
193+
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
194+
; CHECK-NEXT: ret
219195
entry:
220196
%V128 = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
221197
ret <8 x half> %V128

0 commit comments

Comments
 (0)